DTC and SCI improvements

2026-05-25 14:22:32 +10:00
parent 62d1c3c876
commit 80819448cf
21 changed files with 13823 additions and 86 deletions
--- a/h8536/pseudocode.py
+++ b/h8536/pseudocode.py
@@ -0,0 +1,652 @@
+from __future__ import annotations
+
+import argparse
+import json
+import re
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Any
+
+
+JsonObject = dict[str, Any]
+
+
+BRANCH_CONDITIONS = {
+    "BRN": "0",
+    "BHI": "!C && !Z",
+    "BLS": "C || Z",
+    "BCC": "!C",
+    "BCS": "C",
+    "BNE": "!Z",
+    "BEQ": "Z",
+    "BVC": "!V",
+    "BVS": "V",
+    "BPL": "!N",
+    "BMI": "N",
+    "BGE": "N == V",
+    "BLT": "N != V",
+    "BGT": "!Z && (N == V)",
+    "BLE": "Z || (N != V)",
+}
+
+
+@dataclass(frozen=True)
+class PseudocodeOptions:
+    include_asm: bool = True
+    include_addresses: bool = True
+    include_cycles: bool = False
+    emit_declarations: bool = True
+    max_functions: int | None = None
+
+
+def generate_pseudocode(
+    payload: JsonObject,
+    *,
+    source_name: str = "",
+    options: PseudocodeOptions | None = None,
+) -> str:
+    opts = options or PseudocodeOptions()
+    instructions = list(payload.get("instructions", []))
+    label_names = _collect_label_names(payload)
+    functions = _function_nodes(payload, instructions, label_names)
+    if opts.max_functions is not None:
+        functions = functions[: opts.max_functions]
+
+    lines: list[str] = []
+    lines.extend(_file_header(source_name, payload))
+    if opts.emit_declarations:
+        lines.extend(_declarations(instructions, functions, label_names))
+
+    by_address = {int(ins["address"]): ins for ins in instructions}
+    all_addresses = sorted(by_address)
+    emitted: set[int] = set()
+    for function in functions:
+        function_lines, used_addresses = _render_function(function, by_address, label_names, opts)
+        if function_lines:
+            lines.extend(function_lines)
+            emitted.update(used_addresses)
+
+    orphan_addresses = [address for address in all_addresses if address not in emitted]
+    if orphan_addresses:
+        lines.extend(_render_orphan_block(orphan_addresses, by_address, label_names, opts))
+
+    return "\n".join(lines).rstrip() + "\n"
+
+
+def load_pseudocode_input(path: Path) -> JsonObject:
+    with path.open("r", encoding="utf-8") as handle:
+        payload = json.load(handle)
+    if not isinstance(payload, dict) or "instructions" not in payload:
+        raise ValueError(f"{path} does not look like h8536_decompiler JSON output")
+    return payload
+
+
+def write_pseudocode(input_path: Path, output_path: Path, options: PseudocodeOptions) -> None:
+    payload = load_pseudocode_input(input_path)
+    output_path.parent.mkdir(parents=True, exist_ok=True)
+    output_path.write_text(
+        generate_pseudocode(payload, source_name=str(input_path), options=options),
+        encoding="utf-8",
+    )
+
+
+def main(argv: list[str] | None = None) -> int:
+    parser = argparse.ArgumentParser(
+        description="Generate conservative C-like pseudocode from h8536_decompiler JSON output.",
+    )
+    parser.add_argument(
+        "input",
+        nargs="?",
+        type=Path,
+        default=Path("build/rom_decompiled.json"),
+        help="structured JSON emitted by h8536_decompiler.py",
+    )
+    parser.add_argument(
+        "--out",
+        type=Path,
+        default=Path("build/rom_pseudocode.c"),
+        help="pseudocode output path",
+    )
+    parser.add_argument("--no-asm", action="store_true", help="omit original assembly from line comments")
+    parser.add_argument("--no-addresses", action="store_true", help="omit instruction addresses from line comments")
+    parser.add_argument("--cycles", action="store_true", help="include cycle estimates when present in JSON")
+    parser.add_argument("--no-declarations", action="store_true", help="omit register/function declarations")
+    parser.add_argument("--max-functions", type=int, default=None, help="emit only the first N functions")
+    args = parser.parse_args(argv)
+
+    options = PseudocodeOptions(
+        include_asm=not args.no_asm,
+        include_addresses=not args.no_addresses,
+        include_cycles=args.cycles,
+        emit_declarations=not args.no_declarations,
+        max_functions=args.max_functions,
+    )
+    write_pseudocode(args.input, args.out, options)
+    print(f"wrote {args.out}")
+    return 0
+
+
+def _file_header(source_name: str, payload: JsonObject) -> list[str]:
+    vector_count = len(payload.get("vectors", []))
+    function_count = len(payload.get("call_graph", {}).get("nodes", []))
+    instruction_count = len(payload.get("instructions", []))
+    source = f" from {source_name}" if source_name else ""
+    return [
+        "/*",
+        f" * H8/536 C-like pseudocode{source}",
+        " *",
+        " * This is a conservative structural translation of the decompiler JSON.",
+        " * Helpers such as set_flags_cmp8(), MEM8[], BIT(), C/Z/N/V, and",
+        " * return_from_interrupt() are pseudocode placeholders, not a runtime ABI.",
+        " *",
+        f" * vectors: {vector_count}, functions: {function_count}, instructions: {instruction_count}",
+        " */",
+        "",
+        "#include <stdint.h>",
+        "",
+        "typedef uint8_t u8;",
+        "typedef uint16_t u16;",
+        "",
+        "#define BIT(n) (1u << (n))",
+        "extern volatile u8 MEM8[0x10000];",
+        "extern volatile u16 MEM16[0x10000];",
+        "",
+        "u16 R0, R1, R2, R3, R4, R5, R6, R7;",
+        "u16 SR;",
+        "u8 CCR, BR, EP, DP, TP;",
+        "int C, Z, N, V;",
+        "",
+    ]
+
+
+def _declarations(instructions: list[JsonObject], functions: list[JsonObject], labels: dict[int, str]) -> list[str]:
+    lines: list[str] = []
+    registers = _referenced_io_registers(instructions)
+    if registers:
+        lines.append("/* H8/536 register field symbols used by this ROM. */")
+        for name, (address, width) in sorted(registers.items(), key=lambda item: item[1][0]):
+            c_type = "u16" if width == 16 else "u8"
+            lines.append(f"extern volatile {c_type} {c_identifier(name)}; /* 0x{address:04X} */")
+        lines.append("")
+
+    if functions:
+        lines.append("/* Function entry points discovered from vectors and call targets. */")
+        for function in functions:
+            label = labels.get(int(function["start"]), str(function.get("label", "")))
+            lines.append(f"void {c_identifier(label)}(void);")
+        lines.append("")
+    return lines
+
+
+def _referenced_io_registers(instructions: list[JsonObject]) -> dict[str, tuple[int, int]]:
+    registers: dict[str, tuple[int, int]] = {}
+    for ins in instructions:
+        width = _size_bits(_mnemonic_size(str(ins.get("mnemonic", ""))))
+        for ref in ins.get("references", []):
+            name = ref.get("name")
+            if not name:
+                continue
+            address = int(ref["address"])
+            old = registers.get(name)
+            old_width = old[1] if old else 8
+            registers[name] = (address, max(old_width, width))
+    return registers
+
+
+def _collect_label_names(payload: JsonObject) -> dict[int, str]:
+    labels: dict[int, str] = {}
+    for vector in payload.get("vectors", []):
+        target = vector.get("target")
+        label = vector.get("target_label")
+        if target is not None and label:
+            labels[int(target)] = c_identifier(str(label))
+    for node in payload.get("call_graph", {}).get("nodes", []):
+        start = int(node["start"])
+        labels[start] = c_identifier(str(node.get("label") or _label_for(start)))
+    for ins in payload.get("instructions", []):
+        for target in ins.get("targets", []):
+            labels.setdefault(int(target), c_identifier(_label_for(int(target))))
+    return labels
+
+
+def _function_nodes(
+    payload: JsonObject,
+    instructions: list[JsonObject],
+    labels: dict[int, str],
+) -> list[JsonObject]:
+    nodes = [dict(node) for node in payload.get("call_graph", {}).get("nodes", [])]
+    if nodes:
+        nodes.sort(key=lambda node: int(node["start"]))
+        return nodes
+
+    if not instructions:
+        return []
+    start = int(min(ins["address"] for ins in instructions))
+    end = int(max(ins["address"] for ins in instructions))
+    return [
+        {
+            "start": start,
+            "end": end,
+            "label": labels.get(start, _label_for(start)),
+            "sources": [],
+            "instruction_count": len(instructions),
+            "calls": [],
+            "unresolved_calls": 0,
+        },
+    ]
+
+
+def _render_function(
+    function: JsonObject,
+    by_address: dict[int, JsonObject],
+    labels: dict[int, str],
+    opts: PseudocodeOptions,
+) -> tuple[list[str], set[int]]:
+    start = int(function["start"])
+    end = int(function.get("end", start))
+    addresses = [address for address in sorted(by_address) if start <= address <= end]
+    if not addresses:
+        return [], set()
+
+    name = c_identifier(labels.get(start, str(function.get("label") or _label_for(start))))
+    local_targets = _local_target_addresses(addresses, by_address) | {
+        address for address in addresses if address in labels
+    }
+
+    lines = [f"void {name}(void)", "{"]
+    sources = function.get("sources") or []
+    if sources:
+        lines.append(f"    /* vector sources: {', '.join(str(source) for source in sources)} */")
+
+    for address in addresses:
+        if address in local_targets and address != start:
+            lines.append(f"{labels.get(address, _label_for(address))}:")
+        ins = by_address[address]
+        statement = _translate_instruction(ins, labels)
+        comment = _line_comment(ins, opts)
+        lines.append(f"    {statement}{comment}")
+
+    lines.append("}")
+    lines.append("")
+    return lines, set(addresses)
+
+
+def _render_orphan_block(
+    addresses: list[int],
+    by_address: dict[int, JsonObject],
+    labels: dict[int, str],
+    opts: PseudocodeOptions,
+) -> list[str]:
+    lines = ["void unreached_or_unowned_code(void)", "{"]
+    local_targets = _local_target_addresses(addresses, by_address) | {
+        address for address in addresses if address in labels
+    }
+    for address in addresses:
+        if address in local_targets:
+            lines.append(f"{labels.get(address, _label_for(address))}:")
+        ins = by_address[address]
+        lines.append(f"    {_translate_instruction(ins, labels)}{_line_comment(ins, opts)}")
+    lines.append("}")
+    lines.append("")
+    return lines
+
+
+def _local_target_addresses(addresses: list[int], by_address: dict[int, JsonObject]) -> set[int]:
+    address_set = set(addresses)
+    targets: set[int] = set()
+    for address in addresses:
+        for target in by_address[address].get("targets", []):
+            target = int(target)
+            if target in address_set:
+                targets.add(target)
+    return targets
+
+
+def _translate_instruction(ins: JsonObject, labels: dict[int, str]) -> str:
+    mnemonic = str(ins.get("mnemonic", ""))
+    operands = str(ins.get("operands", ""))
+    kind = str(ins.get("kind", "normal"))
+    ops = split_operands(operands)
+    base = _mnemonic_base(mnemonic)
+    size = _mnemonic_size(mnemonic)
+
+    if kind == "return":
+        if ops:
+            return f"return_with_stack_adjust({_format_operand(ops[0], size)});"
+        return "return;"
+    if kind == "rte":
+        return "return_from_interrupt();"
+    if kind == "sleep":
+        return "sleep_until_interrupt();"
+    if kind == "call":
+        return _call_statement(ins, labels, ops)
+    if kind in {"branch", "jump"}:
+        return _branch_or_jump_statement(ins, labels, ops, base)
+
+    if base.startswith("."):
+        return f"emit_data({_quoted(str(ins.get('text', mnemonic)))});"
+
+    if base in {"MOV", "MOV:G", "MOV:I", "MOV:E", "MOV:L", "MOV:S", "MOV:F"} and len(ops) == 2:
+        source = _format_operand(ops[0], size)
+        dest = _format_operand(ops[1], size, lvalue=True)
+        return f"{dest} = {_cast(source, size)};"
+
+    if base in {"MOVFPE"} and len(ops) == 2:
+        source = _format_operand(ops[0], size)
+        dest = _format_operand(ops[1], size, lvalue=True)
+        return f"{dest} = read_eclock({source});"
+    if base in {"MOVTPE"} and len(ops) == 2:
+        source = _format_operand(ops[0], size)
+        dest = _format_operand(ops[1], size, lvalue=True)
+        return f"write_eclock({dest}, {source});"
+
+    if base in {"ADD", "ADD:G", "ADD:Q", "ADDS"} and len(ops) == 2:
+        return _binary_update(ops, "+=", size)
+    if base in {"SUB", "SUBS"} and len(ops) == 2:
+        return _binary_update(ops, "-=", size)
+    if base == "OR" and len(ops) == 2:
+        return _binary_update(ops, "|=", size)
+    if base == "AND" and len(ops) == 2:
+        return _binary_update(ops, "&=", size)
+    if base == "XOR" and len(ops) == 2:
+        return _binary_update(ops, "^=", size)
+
+    if base in {"ADDX", "SUBX", "MULXU", "DIVXU"} and len(ops) == 2:
+        source = _format_operand(ops[0], size)
+        dest = _format_operand(ops[1], size, lvalue=True)
+        helper = _helper_name(base, size)
+        return f"{dest} = {helper}({dest}, {source});"
+
+    if base in {"CMP", "CMP:G", "CMP:I", "CMP:E"} and len(ops) == 2:
+        source = _format_operand(ops[0], size)
+        dest = _format_operand(ops[1], size)
+        return f"{_helper_name('set_flags_cmp', size)}({dest}, {source});"
+    if base == "TST" and len(ops) == 1:
+        return f"{_helper_name('set_flags_tst', size)}({_format_operand(ops[0], size)});"
+
+    if base == "CLR" and len(ops) == 1:
+        return f"{_format_operand(ops[0], size, lvalue=True)} = 0;"
+    if base == "NEG" and len(ops) == 1:
+        target = _format_operand(ops[0], size, lvalue=True)
+        return f"{target} = -{target};"
+    if base == "NOT" and len(ops) == 1:
+        target = _format_operand(ops[0], size, lvalue=True)
+        return f"{target} = ~{target};"
+
+    if base in {"SHAL", "SHLL"} and len(ops) == 1:
+        target = _format_operand(ops[0], size, lvalue=True)
+        return f"{target} <<= 1;"
+    if base in {"SHAR", "SHLR"} and len(ops) == 1:
+        target = _format_operand(ops[0], size, lvalue=True)
+        return f"{target} >>= 1;"
+    if base in {"ROTL", "ROTR", "ROTXL", "ROTXR"} and len(ops) == 1:
+        target = _format_operand(ops[0], size, lvalue=True)
+        return f"{target} = {_helper_name(base.lower(), size)}({target});"
+    if base == "SWAP" and len(ops) == 1:
+        target = _format_operand(ops[0], size, lvalue=True)
+        return f"{target} = swap_bytes({target});"
+    if base == "EXTU" and len(ops) == 1:
+        target = _format_operand(ops[0], size, lvalue=True)
+        return f"{target} = zero_extend8({target});"
+    if base == "EXTS" and len(ops) == 1:
+        target = _format_operand(ops[0], size, lvalue=True)
+        return f"{target} = sign_extend8({target});"
+
+    if base in {"BSET", "BCLR", "BNOT", "BTST"} and len(ops) == 2:
+        return _bit_statement(base, ops, size)
+
+    if base == "LDC" and len(ops) == 2:
+        source = _format_operand(ops[0], size)
+        dest = _format_operand(ops[1], size, lvalue=True)
+        return f"{dest} = {_cast(source, size)};"
+    if base == "STC" and len(ops) == 2:
+        source = _format_operand(ops[0], size)
+        dest = _format_operand(ops[1], size, lvalue=True)
+        return f"{dest} = {_cast(source, size)};"
+    if base == "ORC" and len(ops) == 2:
+        return _binary_update(ops, "|=", size)
+    if base == "ANDC" and len(ops) == 2:
+        return _binary_update(ops, "&=", size)
+    if base == "XORC" and len(ops) == 2:
+        return _binary_update(ops, "^=", size)
+
+    if base == "LDM" and len(ops) == 2:
+        return f"pop_registers({_register_list_argument(ops[1])});"
+    if base == "STM" and len(ops) == 2:
+        return f"push_registers({_register_list_argument(ops[0])});"
+    if base == "LINK" and len(ops) == 2:
+        return f"link_frame({_format_operand(ops[1], size)});"
+    if base == "UNLK":
+        return "unlink_frame();"
+    if base == "TRAPA" and ops:
+        return f"trap({_format_operand(ops[0], size)});"
+    if base == "TRAP/VS":
+        return "trap_vs();"
+    if base == "NOP":
+        return "/* nop */;"
+
+    return f"asm_{_safe_token(base)}({_quoted(str(ins.get('text') or mnemonic))});"
+
+
+def _branch_or_jump_statement(ins: JsonObject, labels: dict[int, str], ops: list[str], base: str) -> str:
+    target = _target_label(ins, labels)
+    if base in {"BRA", "JMP", "PJMP"}:
+        if target:
+            return f"goto {target};"
+        expr = _format_operand(ops[0], "") if ops else "unknown_target"
+        return f"goto_indirect({expr});"
+    if base.startswith("SCB/"):
+        register = _format_operand(ops[0], "") if ops else "R?"
+        cond = base.split("/", 1)[1].lower()
+        return f"if (scb_{cond}({register})) goto {target or 'unknown_target'};"
+    condition = BRANCH_CONDITIONS.get(base, f"cond_{_safe_token(base)}()")
+    return f"if ({condition}) goto {target or 'unknown_target'};"
+
+
+def _call_statement(ins: JsonObject, labels: dict[int, str], ops: list[str]) -> str:
+    target = _target_label(ins, labels)
+    if target:
+        return f"{target}();"
+    expr = _format_operand(ops[0], "") if ops else "unknown_target"
+    return f"call_indirect({expr});"
+
+
+def _target_label(ins: JsonObject, labels: dict[int, str]) -> str:
+    targets = ins.get("targets", [])
+    if targets:
+        target = int(targets[0])
+        return labels.get(target, _label_for(target))
+    return ""
+
+
+def _binary_update(ops: list[str], operator: str, size: str) -> str:
+    source = _format_operand(ops[0], size)
+    dest = _format_operand(ops[1], size, lvalue=True)
+    return f"{dest} {operator} {_cast(source, size)};"
+
+
+def _bit_statement(base: str, ops: list[str], size: str) -> str:
+    bit = _format_operand(ops[0], size)
+    dest = _format_operand(ops[1], size, lvalue=True)
+    bit_expr = f"BIT({bit})"
+    if base == "BSET":
+        return f"{dest} |= {bit_expr};"
+    if base == "BCLR":
+        return f"{dest} &= ~{bit_expr};"
+    if base == "BNOT":
+        return f"{dest} ^= {bit_expr};"
+    return f"set_flags_btst({dest}, {bit});"
+
+
+def split_operands(operands: str) -> list[str]:
+    if not operands:
+        return []
+    parts: list[str] = []
+    start = 0
+    depth = 0
+    for idx, char in enumerate(operands):
+        if char in "({":
+            depth += 1
+        elif char in ")}" and depth:
+            depth -= 1
+        elif char == "," and depth == 0:
+            parts.append(operands[start:idx].strip())
+            start = idx + 1
+    parts.append(operands[start:].strip())
+    return [part for part in parts if part]
+
+
+def _format_operand(operand: str, size: str, *, lvalue: bool = False) -> str:
+    op = _replace_h_literals(operand.strip())
+    if op.startswith("#"):
+        return op[1:]
+    if op.startswith("@(") and op.endswith(")"):
+        inner = op[2:-1]
+        pieces = split_operands(inner)
+        if len(pieces) == 2:
+            disp, reg = pieces
+            offset = f"{reg} - {disp[1:]}" if disp.startswith("-") else f"{reg} + {disp}"
+            return f"{_mem_name(size)}[{offset}]"
+    if re.fullmatch(r"@-R[0-7]", op):
+        return f"{_mem_name(size)}[--{op[2:]}]"
+    if re.fullmatch(r"@R[0-7]\+", op):
+        return f"{_mem_name(size)}[{op[1:-1]}++]"
+    if re.fullmatch(r"@R[0-7]", op):
+        return f"{_mem_name(size)}[{op[1:]}]"
+    if op.startswith("@BR:"):
+        return f"{_mem_name(size)}[(BR << 8) | {op[4:]}]"
+    if op.startswith("@0x"):
+        return f"{_mem_name(size)}[{op[1:]}]"
+    if op.startswith("@"):
+        return c_identifier(op[1:])
+    if op.startswith("{") and op.endswith("}"):
+        return _register_list_argument(op)
+    if re.fullmatch(r"loc_[0-9A-Fa-f]{4}", op):
+        return c_identifier(op)
+    if re.fullmatch(r"[A-Za-z_][A-Za-z0-9_/\?]*", op):
+        return c_identifier(op)
+    return op
+
+
+def _cast(expr: str, size: str) -> str:
+    if size == "B":
+        return f"(uint8_t)({expr})"
+    if size == "W":
+        return f"(uint16_t)({expr})"
+    return expr
+
+
+def _line_comment(ins: JsonObject, opts: PseudocodeOptions) -> str:
+    parts: list[str] = []
+    if opts.include_addresses:
+        parts.append(f"{int(ins['address']):04X}")
+    if opts.include_asm:
+        text = str(ins.get("text") or _instruction_text(ins))
+        parts.append(text)
+    comment = str(ins.get("comment") or "").strip()
+    if comment:
+        parts.append(comment)
+    parts.extend(_metadata_comments(ins))
+    if opts.include_cycles and ins.get("cycles"):
+        parts.append(_cycle_summary(ins["cycles"]))
+    if not parts:
+        return ""
+    return " /* " + "; ".join(_sanitize_comment(part) for part in parts) + " */"
+
+
+def _metadata_comments(ins: JsonObject) -> list[str]:
+    comments: list[str] = []
+    sci = ins.get("sci")
+    if isinstance(sci, dict):
+        for inference in sci.get("inferences", []):
+            if isinstance(inference, dict) and inference.get("comment"):
+                comments.append(str(inference["comment"]))
+
+    for access in ins.get("peripheral_access", []):
+        if not isinstance(access, dict):
+            continue
+        register = access.get("register")
+        direction = access.get("direction")
+        size = access.get("size")
+        byte = access.get("byte")
+        if register and direction:
+            comments.append(f"{register} {size} {direction} {byte} TEMP access")
+    return comments
+
+
+def _instruction_text(ins: JsonObject) -> str:
+    mnemonic = str(ins.get("mnemonic", ""))
+    operands = str(ins.get("operands", ""))
+    return f"{mnemonic} {operands}".strip()
+
+
+def _cycle_summary(cycles: JsonObject) -> str:
+    if "cycles" in cycles:
+        return f"cycles={cycles['cycles']}"
+    if "not_taken" in cycles and "taken" in cycles:
+        return f"cycles={cycles['not_taken']}/{cycles['taken']} nt/t"
+    return "cycles=?"
+
+
+def _mnemonic_base(mnemonic: str) -> str:
+    return mnemonic.rsplit(".", 1)[0] if "." in mnemonic else mnemonic
+
+
+def _mnemonic_size(mnemonic: str) -> str:
+    suffix = mnemonic.rsplit(".", 1)[-1] if "." in mnemonic else ""
+    if suffix in {"B", "W"}:
+        return suffix
+    if mnemonic.startswith("CMP:I"):
+        return "W"
+    if mnemonic.startswith("CMP:E"):
+        return "B"
+    return ""
+
+
+def _size_bits(size: str) -> int:
+    return 16 if size == "W" else 8
+
+
+def _mem_name(size: str) -> str:
+    return "MEM16" if size == "W" else "MEM8"
+
+
+def _helper_name(base: str, size: str) -> str:
+    suffix = {"B": "8", "W": "16"}.get(size, "")
+    return f"{_safe_token(base)}{suffix}"
+
+
+def _register_list_argument(operand: str) -> str:
+    inner = operand.strip().strip("{}")
+    regs = [c_identifier(part.strip()) for part in inner.split(",") if part.strip()]
+    return ", ".join(regs) if regs else "/* empty */"
+
+
+def _replace_h_literals(text: str) -> str:
+    return re.sub(r"H'([0-9A-Fa-f]+)", lambda match: "0x" + match.group(1).upper(), text)
+
+
+def c_identifier(name: str) -> str:
+    cleaned = re.sub(r"[^0-9A-Za-z_]", "_", name.strip())
+    cleaned = re.sub(r"_+", "_", cleaned).strip("_")
+    if not cleaned:
+        cleaned = "unnamed"
+    if cleaned[0].isdigit():
+        cleaned = "_" + cleaned
+    return cleaned
+
+
+def _safe_token(text: str) -> str:
+    return c_identifier(text).lower()
+
+
+def _label_for(address: int) -> str:
+    return f"loc_{address:04X}"
+
+
+def _quoted(text: str) -> str:
+    return json.dumps(text)
+
+
+def _sanitize_comment(text: str) -> str:
+    return str(text).replace("*/", "* /").replace("\r", " ").replace("\n", " ")