Run locally or in a notebook.
import unicodedata, json, datetime
def graphemes(s:str):
# Simple fallback: Python’s list() iterates code points; good enough for demo
return [ch for ch in s]
def ascii_map_with_fallbacks(s:str):
nfc = unicodedata.normalize(“NFC”, s)
nfd = unicodedata.normalize(“NFD”, nfc)
stripped = “”.join(ch for ch in nfd if not unicodedata.combining(ch))
out = []
for ch in stripped:
code = ord(ch)
if code <= 127:
out.append({“ch”: ch, “dec”: code, “hex”: hex(code).upper()})
else:
out.append({“ch”: ch, “dec”: code, “hex”: hex(code).upper(), “fallback”: “?”})
return nfc, stripped, out
def sgi(units=True, etymon=True, scope=True):
score = (int(units) + int(etymon) + int(scope)) / 3
return round(score, 2), (score == 1.0)
def provenance(input_text, normalized, ascii_list, sgi_score, sgi_pass):
return {
“node_id”: “STT-OS-Pipeline-v1”,
“timestamp”: datetime.datetime.utcnow().isoformat()+”Z”,
“input_text”: input_text,
“normalized_text”: normalized,
“ascii_stream”: ascii_list,
“sgi”: {“score”: sgi_score, “pass”: sgi_pass, “checks”: {“units”: True, “etymon”: True, “scope”: True}},
“policy”: {“halt_on_sgi_below”: “1.00”, “review_required_if”: “<1.00”},
“lineage”: {“glossary_ref”: “Glossary v1.2”, “pages”: [
“Phase 5.O Ω – Extended Edition”,
“Unified Harmonics Audit (Final 10/10 Edition)”,
“Archival Mapping (Phases 1–5.O Ω)”
]}
}
if name == “main“:
sample = “café and colour I read today”
print(“INPUT:”, sample)
print(“GRAPHEMES:”, graphemes(sample))
nfc, stripped, codes = ascii_map_with_fallbacks(sample)
print("NFC:", nfc)
print("ASCII/STRIPPED:", stripped)
print("CODES:", codes[:8], "... total:", len(codes))
score, ok = sgi(True, True, True)
print("SGI:", score, "PASS" if ok else "HALT")
prov = provenance(sample, nfc, codes, score, ok)
print(json.dumps(prov, indent=2))