Sectors and Vectors

# %%
import json
import pandas as pd
from pathlib import Path

base = Path("/mnt/data")

# -----------------------------
# Source: encode sector -> services/standards/anchors used (✓)
# Derived from the user's UCLS sector write-up in the conversation.
# -----------------------------

sectors = {
    "Defense & Security": {
        "services": {"NaaS","AaaS","RegaaS","IDaaS","ValaaS","OaaS","TaaS","RaaS","ProvAaaS","VaaS","InteropaaS","ACaaS","DaaS","AnaaS","VizaaS"},
        "standards": {"STIX/TAXII","RDF/JSON-LD","W3C VC"},
        "anchors": {"Sequences","Strains","Specimens","Digital Credentials"},
    },
    "Manufacturing & Supply Chain": {
        "services": {"IDaaS","OaaS","TaaS","ProvAaaS","InteropaaS","ACaaS","VaaS","DaaS","AnaaS","VizaaS"},
        "standards": {"GS1 EPCIS","RDF/JSON-LD","W3C VC"},
        "anchors": {"Batch Certificates","Cultures/Sequences"},
    },
    # The user's taxonomy merges Manufacturing & Supply Chain into one sector.
    # We'll treat "Supply Chain" as an alias node pointing to same attributes for overlay clarity.
    "Supply Chain": {
        "services": {"IDaaS","OaaS","TaaS","ProvAaaS","InteropaaS","ACaaS","VaaS","DaaS","AnaaS","VizaaS"},
        "standards": {"GS1 EPCIS","RDF/JSON-LD","W3C VC"},
        "anchors": {"Batch Certificates","Cultures/Sequences"},
    },
    "Smart Cities & Infrastructure": {
        "services": {"NaaS","IDaaS","OaaS","TaaS","ProvAaaS","InteropaaS","ACaaS","DaaS","AnaaS","VizaaS"},
        "standards": {"OGC","RDF","JSON-LD","W3C VC"},
        "anchors": {"Digital Certificates","Relevés/Specimens"},
    },
    "Energy Systems": {
        "services": {"IDaaS","OaaS","TaaS","ProvAaaS","VaaS","InteropaaS","ACaaS","DaaS","AnaaS","VizaaS"},
        "standards": {"RDF/OWL","CFIHOS","OPC UA","ISO 15926","W3C VC"},
        "anchors": {"Digital Credentials","Material Samples"},
    },
    "Climate & Geoscience": {
        "services": {"OaaS","TaaS","RaaS","ProvAaaS","InteropaaS","DaaS","AnaaS","VizaaS"},
        "standards": {"OGC","NetCDF","RDF","JSON-LD"},
        "anchors": {"Type Relevé","Specimens","Isotope Anchors"},
    },
}

# -----------------------------
# Helper: compute overlay edges between triplets using intersection metrics
# -----------------------------
def build_overlay(group_name, sector_names):
    nodes = []
    edges = []
    # Collect nodes with attributes
    for s in sector_names:
        meta = sectors[s]
        nodes.append({
            "sector": s,
            "services": sorted(meta["services"]),
            "standards": sorted(meta["standards"]),
            "anchors": sorted(meta["anchors"]),
        })
    # Pairwise edges with shared attributes
    for i in range(len(sector_names)):
        for j in range(i+1, len(sector_names)):
            a = sectors[sector_names[i]]
            b = sectors[sector_names[j]]
            shared_services = sorted(a["services"].intersection(b["services"]))
            shared_standards = sorted(a["standards"].intersection(b["standards"]))
            shared_anchors = sorted(a["anchors"].intersection(b["anchors"]))
            edges.append({
                "group": group_name,
                "source": sector_names[i],
                "target": sector_names[j],
                "shared_services": shared_services,
                "shared_services_count": len(shared_services),
                "shared_standards": shared_standards,
                "shared_standards_count": len(shared_standards),
                "shared_anchors": shared_anchors,
                "shared_anchors_count": len(shared_anchors),
                "overlap_score": len(shared_services) + 0.5*len(shared_standards) + 0.5*len(shared_anchors),
            })
    return nodes, edges

# Define overlay groups
overlay_specs = {
    "Defense–Manufacturing–SupplyChain": ["Defense & Security","Manufacturing & Supply Chain","Supply Chain"],
    "SmartCities–Energy–Climate": ["Smart Cities & Infrastructure","Energy Systems","Climate & Geoscience"],
}

manifest = {
    "name": "UCLS Overlays",
    "version": "1.0",
    "generated": True,
    "groups": {},
}

all_edges = []
all_nodes = []

for group, members in overlay_specs.items():
    nodes, edges = build_overlay(group, members)
    all_nodes += [{"group": group, **n} for n in nodes]
    all_edges += edges
    manifest["groups"][group] = {"members": members, "edge_count": len(edges)}

# -----------------------------
# Output CSV/JSON for each group + combined
# -----------------------------
def write_group_files(group, members):
    nodes = [n for n in all_nodes if n["group"] == group]
    edges = [e for e in all_edges if e["group"] == group]
    # CSV edges
    df_e = pd.DataFrame(edges)
    df_n = pd.DataFrame(nodes)
    e_csv = base / f"overlay_{group}_edges.csv"
    e_json = base / f"overlay_{group}_edges.json"
    n_csv = base / f"overlay_{group}_nodes.csv"
    n_json = base / f"overlay_{group}_nodes.json"
    df_e.to_csv(e_csv, index=False)
    df_n.to_csv(n_csv, index=False)
    e_json.write_text(json.dumps(edges, indent=2))
    n_json.write_text(json.dumps(nodes, indent=2))
    return dict(edge_csv=str(e_csv), edge_json=str(e_json), node_csv=str(n_csv), node_json=str(n_json))

file_index = {}
for group, members in overlay_specs.items():
    file_index[group] = write_group_files(group, members)

# Combined files
edges_df = pd.DataFrame(all_edges)
nodes_df = pd.DataFrame(all_nodes)
edges_df.to_csv(base / "overlays_all_edges.csv", index=False)
nodes_df.to_csv(base / "overlays_all_nodes.csv", index=False)
Path(base / "overlays_manifest.json").write_text(json.dumps({**manifest, "files": file_index}, indent=2))

# -----------------------------
# Append to WordPress markdown if present, else create a small section
# -----------------------------
md_path = base / "ucls_directory_wordpress.md"
append_md = []
append_md.append("\n\n---\n\n## Inter-Sector Overlays (UCLS)\n")
append_md.append("> Auto-generated overlay graphs showing shared services, standards, and anchors.\n\n")

def links_for_group(group, files):
    return (
        f"### {group.replace('–',' — ')}\n"
        f"- **Edges (CSV):** {Path(files['edge_csv']).as_posix()}\n"
        f"- **Edges (JSON):** {Path(files['edge_json']).as_posix()}\n"
        f"- **Nodes (CSV):** {Path(files['node_csv']).as_posix()}\n"
        f"- **Nodes (JSON):** {Path(files['node_json']).as_posix()}\n"
    )

for group, files in file_index.items():
    append_md.append(links_for_group(group, files))

append_md.append(
    "\n### Combined\n"
    f"- **All Overlays — Edges (CSV):** {(base/'overlays_all_edges.csv').as_posix()}\n"
    f"- **All Overlays — Nodes (CSV):** {(base/'overlays_all_nodes.csv').as_posix()}\n"
    f"- **Manifest (JSON):** {(base/'overlays_manifest.json').as_posix()}\n"
)

if md_path.exists():
    # Append
    with md_path.open("a") as f:
        f.write("".join(append_md))
    updated = True
else:
    # Create minimal MD with overlay section only
    md_path.write_text("# UCLS Inter-Sector Overlays\n" + "".join(append_md))
    updated = False

# Return a quick summary of what was written
summary = {
    "markdown_appended": updated,
    "markdown_path": md_path.as_posix(),
    "overlay_files": file_index,
    "combined": {
        "edges_csv": (base/"overlays_all_edges.csv").as_posix(),
        "nodes_csv": (base/"overlays_all_nodes.csv").as_posix(),
        "manifest_json": (base/"overlays_manifest.json").as_posix()
    }
}

summary
# Re-run generation after kernel reset

import json
import pandas as pd
from pathlib import Path

base = Path("/mnt/data")

sectors = {
    "Defense & Security": {
        "services": {"NaaS","AaaS","RegaaS","IDaaS","ValaaS","OaaS","TaaS","RaaS","ProvAaaS","VaaS","InteropaaS","ACaaS","DaaS","AnaaS","VizaaS"},
        "standards": {"STIX/TAXII","RDF/JSON-LD","W3C VC"},
        "anchors": {"Sequences","Strains","Specimens","Digital Credentials"},
    },
    "Manufacturing & Supply Chain": {
        "services": {"IDaaS","OaaS","TaaS","ProvAaaS","InteropaaS","ACaaS","VaaS","DaaS","AnaaS","VizaaS"},
        "standards": {"GS1 EPCIS","RDF/JSON-LD","W3C VC"},
        "anchors": {"Batch Certificates","Cultures/Sequences"},
    },
    "Supply Chain": {
        "services": {"IDaaS","OaaS","TaaS","ProvAaaS","InteropaaS","ACaaS","VaaS","DaaS","AnaaS","VizaaS"},
        "standards": {"GS1 EPCIS","RDF/JSON-LD","W3C VC"},
        "anchors": {"Batch Certificates","Cultures/Sequences"},
    },
    "Smart Cities & Infrastructure": {
        "services": {"NaaS","IDaaS","OaaS","TaaS","ProvAaaS","InteropaaS","ACaaS","DaaS","AnaaS","VizaaS"},
        "standards": {"OGC","RDF","JSON-LD","W3C VC"},
        "anchors": {"Digital Certificates","Relevés/Specimens"},
    },
    "Energy Systems": {
        "services": {"IDaaS","OaaS","TaaS","ProvAaaS","VaaS","InteropaaS","ACaaS","DaaS","AnaaS","VizaaS"},
        "standards": {"RDF/OWL","CFIHOS","OPC UA","ISO 15926","W3C VC"},
        "anchors": {"Digital Credentials","Material Samples"},
    },
    "Climate & Geoscience": {
        "services": {"OaaS","TaaS","RaaS","ProvAaaS","InteropaaS","DaaS","AnaaS","VizaaS"},
        "standards": {"OGC","NetCDF","RDF","JSON-LD"},
        "anchors": {"Type Relevé","Specimens","Isotope Anchors"},
    },
}

def build_overlay(group_name, sector_names):
    nodes = []
    edges = []
    for s in sector_names:
        meta = sectors[s]
        nodes.append({
            "sector": s,
            "services": sorted(meta["services"]),
            "standards": sorted(meta["standards"]),
            "anchors": sorted(meta["anchors"]),
        })
    for i in range(len(sector_names)):
        for j in range(i+1, len(sector_names)):
            a = sectors[sector_names[i]]
            b = sectors[sector_names[j]]
            shared_services = sorted(a["services"].intersection(b["services"]))
            shared_standards = sorted(a["standards"].intersection(b["standards"]))
            shared_anchors = sorted(a["anchors"].intersection(b["anchors"]))
            edges.append({
                "group": group_name,
                "source": sector_names[i],
                "target": sector_names[j],
                "shared_services": shared_services,
                "shared_services_count": len(shared_services),
                "shared_standards": shared_standards,
                "shared_standards_count": len(shared_standards),
                "shared_anchors": shared_anchors,
                "shared_anchors_count": len(shared_anchors),
                "overlap_score": len(shared_services) + 0.5*len(shared_standards) + 0.5*len(shared_anchors),
            })
    return nodes, edges

overlay_specs = {
    "Defense–Manufacturing–SupplyChain": ["Defense & Security","Manufacturing & Supply Chain","Supply Chain"],
    "SmartCities–Energy–Climate": ["Smart Cities & Infrastructure","Energy Systems","Climate & Geoscience"],
}

manifest = {"name": "UCLS Overlays","version": "1.0","generated": True,"groups": {}}
all_edges, all_nodes = [], []

for group, members in overlay_specs.items():
    nodes, edges = build_overlay(group, members)
    all_nodes += [{"group": group, **n} for n in nodes]
    all_edges += edges
    manifest["groups"][group] = {"members": members, "edge_count": len(edges)}

def write_group_files(group, members):
    nodes = [n for n in all_nodes if n["group"] == group]
    edges = [e for e in all_edges if e["group"] == group]
    df_e = pd.DataFrame(edges)
    df_n = pd.DataFrame(nodes)
    e_csv = base / f"overlay_{group}_edges.csv"
    e_json = base / f"overlay_{group}_edges.json"
    n_csv = base / f"overlay_{group}_nodes.csv"
    n_json = base / f"overlay_{group}_nodes.json"
    df_e.to_csv(e_csv, index=False)
    df_n.to_csv(n_csv, index=False)
    e_json.write_text(json.dumps(edges, indent=2))
    n_json.write_text(json.dumps(nodes, indent=2))
    return dict(edge_csv=str(e_csv), edge_json=str(e_json), node_csv=str(n_csv), node_json=str(n_json))

file_index = {group: write_group_files(group, members) for group, members in overlay_specs.items()}

# Combined
edges_df = pd.DataFrame(all_edges)
nodes_df = pd.DataFrame(all_nodes)
edges_df.to_csv(base / "overlays_all_edges.csv", index=False)
nodes_df.to_csv(base / "overlays_all_nodes.csv", index=False)
(base / "overlays_manifest.json").write_text(json.dumps({**manifest, "files": file_index}, indent=2))

# Append markdown links
md_path = base / "ucls_directory_wordpress.md"
append_md = []
append_md.append("\n\n---\n\n## Inter-Sector Overlays (UCLS)\n")
append_md.append("> Auto-generated overlay graphs showing shared services, standards, and anchors.\n\n")

for group, files in file_index.items():
    append_md.append(f"### {group.replace('–',' — ')}\n")
    append_md.append(f"- **Edges (CSV):** {Path(files['edge_csv']).as_posix()}\n")
    append_md.append(f"- **Edges (JSON):** {Path(files['edge_json']).as_posix()}\n")
    append_md.append(f"- **Nodes (CSV):** {Path(files['node_csv']).as_posix()}\n")
    append_md.append(f"- **Nodes (JSON):** {Path(files['node_json']).as_posix()}\n\n")

append_md.append(
    "### Combined\n"
    f"- **All Overlays — Edges (CSV):** {(base/'overlays_all_edges.csv').as_posix()}\n"
    f"- **All Overlays — Nodes (CSV):** {(base/'overlays_all_nodes.csv').as_posix()}\n"
    f"- **Manifest (JSON):** {(base/'overlays_manifest.json').as_posix()}\n"
)

if md_path.exists():
    with md_path.open("a") as f:
        f.write("".join(append_md))
    action = "appended to existing WordPress MD"
else:
    md_path.write_text("# UCLS Inter-Sector Overlays\n" + "".join(append_md))
    action = "created new overlay MD"

{
    "action": action,
    "wordpress_md": md_path.as_posix(),
    "groups": file_index,
    "combined": {
        "edges_csv": (base/"overlays_all_edges.csv").as_posix(),
        "nodes_csv": (base/"overlays_all_nodes.csv").as_posix(),
        "manifest_json": (base/"overlays_manifest.json").as_posix()
    }
}