h8-536-decoder/h8536/pseudocode.py

from __future__ import annotations

import argparse
import json
import re
from dataclasses import dataclass
from pathlib import Path
from typing import Any

from .consistency import is_byte_immediate_to_word_destination


JsonObject = dict[str, Any]


BRANCH_CONDITIONS = {
    "BRN": "0",
    "BHI": "!C && !Z",
    "BLS": "C || Z",
    "BCC": "!C",
    "BCS": "C",
    "BNE": "!Z",
    "BEQ": "Z",
    "BVC": "!V",
    "BVS": "V",
    "BPL": "!N",
    "BMI": "N",
    "BGE": "N == V",
    "BLT": "N != V",
    "BGT": "!Z && (N == V)",
    "BLE": "Z || (N != V)",
}

NEGATED_BRANCH_CONDITIONS = {
    "BRN": "1",
    "BHI": "C || Z",
    "BLS": "!C && !Z",
    "BCC": "C",
    "BCS": "!C",
    "BNE": "Z",
    "BEQ": "!Z",
    "BVC": "V",
    "BVS": "!V",
    "BPL": "N",
    "BMI": "!N",
    "BGE": "N != V",
    "BLT": "N == V",
    "BGT": "Z || (N != V)",
    "BLE": "!Z && (N == V)",
}

_MAX_STRUCTURED_IF_BODY = 8
_MAX_STRUCTURED_LOOP_BODY = 24


@dataclass(frozen=True)
class PseudocodeOptions:
    include_asm: bool = True
    include_addresses: bool = True
    include_cycles: bool = False
    emit_declarations: bool = True
    max_functions: int | None = None
    structured: bool = True


@dataclass(frozen=True)
class _IfCandidate:
    target_index: int
    target_address: int
    condition: str
    instruction: JsonObject


@dataclass(frozen=True)
class _LoopCandidate:
    end_index: int
    condition: str
    instruction: JsonObject


def generate_pseudocode(
    payload: JsonObject,
    *,
    source_name: str = "",
    options: PseudocodeOptions | None = None,
) -> str:
    opts = options or PseudocodeOptions()
    instructions = list(payload.get("instructions", []))
    label_names = _collect_label_names(payload)
    functions = _function_nodes(payload, instructions, label_names)
    if opts.max_functions is not None:
        functions = functions[: opts.max_functions]

    lines: list[str] = []
    lines.extend(_file_header(source_name, payload))
    if opts.emit_declarations:
        lines.extend(_declarations(payload, instructions, functions, label_names))

    by_address = {int(ins["address"]): ins for ins in instructions}
    all_addresses = sorted(by_address)
    emitted: set[int] = set()
    for function in functions:
        function_lines, used_addresses = _render_function(function, by_address, label_names, opts)
        if function_lines:
            lines.extend(function_lines)
            emitted.update(used_addresses)

    orphan_addresses = [address for address in all_addresses if address not in emitted]
    if orphan_addresses:
        lines.extend(_render_orphan_block(orphan_addresses, by_address, label_names, opts))

    return "\n".join(lines).rstrip() + "\n"


def load_pseudocode_input(path: Path) -> JsonObject:
    with path.open("r", encoding="utf-8") as handle:
        payload = json.load(handle)
    if not isinstance(payload, dict) or "instructions" not in payload:
        raise ValueError(f"{path} does not look like h8536_decompiler JSON output")
    return payload


def write_pseudocode(input_path: Path, output_path: Path, options: PseudocodeOptions) -> None:
    payload = load_pseudocode_input(input_path)
    output_path.parent.mkdir(parents=True, exist_ok=True)
    output_path.write_text(
        generate_pseudocode(payload, source_name=str(input_path), options=options),
        encoding="utf-8",
    )


def main(argv: list[str] | None = None) -> int:
    parser = argparse.ArgumentParser(
        description="Generate conservative C-like pseudocode from h8536_decompiler JSON output.",
    )
    parser.add_argument(
        "input",
        nargs="?",
        type=Path,
        default=Path("build/rom_decompiled.json"),
        help="structured JSON emitted by h8536_decompiler.py",
    )
    parser.add_argument(
        "--out",
        type=Path,
        default=Path("build/rom_pseudocode.c"),
        help="pseudocode output path",
    )
    parser.add_argument("--no-asm", action="store_true", help="omit original assembly from line comments")
    parser.add_argument("--no-addresses", action="store_true", help="omit instruction addresses from line comments")
    parser.add_argument("--cycles", action="store_true", help="include cycle estimates when present in JSON")
    parser.add_argument("--no-declarations", action="store_true", help="omit register/function declarations")
    parser.add_argument("--no-structure", action="store_true", help="preserve label/goto output without if/loop structuring")
    parser.add_argument("--max-functions", type=int, default=None, help="emit only the first N functions")
    args = parser.parse_args(argv)

    options = PseudocodeOptions(
        include_asm=not args.no_asm,
        include_addresses=not args.no_addresses,
        include_cycles=args.cycles,
        emit_declarations=not args.no_declarations,
        max_functions=args.max_functions,
        structured=not args.no_structure,
    )
    write_pseudocode(args.input, args.out, options)
    print(f"wrote {args.out}")
    return 0


def _file_header(source_name: str, payload: JsonObject) -> list[str]:
    vector_count = len(payload.get("vectors", []))
    function_count = len(payload.get("call_graph", {}).get("nodes", []))
    instruction_count = len(payload.get("instructions", []))
    source = f" from {source_name}" if source_name else ""
    return [
        "/*",
        f" * H8/536 C-like pseudocode{source}",
        " *",
        " * This is a conservative structural translation of the decompiler JSON.",
        " * Helpers such as set_flags_cmp8(), MEM8[], BIT(), C/Z/N/V, and",
        " * return_from_interrupt() are pseudocode placeholders, not a runtime ABI.",
        " *",
        f" * vectors: {vector_count}, functions: {function_count}, instructions: {instruction_count}",
        " */",
        "",
        "#include <stdint.h>",
        "",
        "typedef uint8_t u8;",
        "typedef uint16_t u16;",
        "",
        "#define BIT(n) (1u << (n))",
        "extern volatile u8 MEM8[0x10000];",
        "extern volatile u16 MEM16[0x10000];",
        "",
        "u16 R0, R1, R2, R3, R4, R5, R6, R7;",
        "u16 SR;",
        "u8 CCR, BR, EP, DP, TP;",
        "int C, Z, N, V;",
        "",
        "static inline u16 zero_extend8_to16(u8 value) { return (u16)value; }",
        "",
    ]


def _declarations(
    payload: JsonObject,
    instructions: list[JsonObject],
    functions: list[JsonObject],
    labels: dict[int, str],
) -> list[str]:
    lines: list[str] = []
    registers = _referenced_io_registers(instructions)
    if registers:
        lines.append("/* H8/536 register field symbols used by this ROM. */")
        for name, (address, width) in sorted(registers.items(), key=lambda item: item[1][0]):
            c_type = "u16" if width == 16 else "u8"
            lines.append(f"extern volatile {c_type} {c_identifier(name)}; /* 0x{address:04X} */")
        lines.append("")

    memory_symbols = _referenced_memory_symbols(payload)
    if memory_symbols:
        lines.append("/* RAM/external symbols inferred from instruction references and data tables. */")
        for symbol in memory_symbols:
            c_type = "u16" if symbol.get("width") == "word" else "u8"
            width = symbol.get("width") or "unknown"
            lines.append(
                f"extern volatile {c_type} {c_identifier(str(symbol['name']))}; "
                f"/* 0x{int(symbol['address']):04X} {symbol['kind']} {width} */"
            )
        lines.append("")

    if functions:
        lines.append("/* Function entry points discovered from vectors and call targets. */")
        for function in functions:
            label = labels.get(int(function["start"]), str(function.get("label", "")))
            lines.append(f"void {c_identifier(label)}(void);")
        lines.append("")
    return lines


def _referenced_io_registers(instructions: list[JsonObject]) -> dict[str, tuple[int, int]]:
    registers: dict[str, tuple[int, int]] = {}
    for ins in instructions:
        width = _size_bits(_mnemonic_size(str(ins.get("mnemonic", ""))))
        for ref in ins.get("references", []):
            name = ref.get("name")
            if not name:
                continue
            address = int(ref["address"])
            old = registers.get(name)
            old_width = old[1] if old else 8
            registers[name] = (address, max(old_width, width))
    return registers


def _referenced_memory_symbols(payload: JsonObject) -> list[JsonObject]:
    symbols = payload.get("symbols", {}).get("symbols", [])
    if not isinstance(symbols, list):
        return []
    memory_symbols: list[JsonObject] = []
    for symbol in symbols:
        if not isinstance(symbol, dict) or symbol.get("kind") == "register":
            continue
        if not symbol.get("name") or symbol.get("address") is None:
            continue
        memory_symbols.append(symbol)
    return sorted(memory_symbols, key=lambda symbol: int(symbol["address"]))


def _collect_label_names(payload: JsonObject) -> dict[int, str]:
    labels: dict[int, str] = {}
    for vector in payload.get("vectors", []):
        target = vector.get("target")
        label = vector.get("target_label")
        if target is not None and label:
            labels[int(target)] = c_identifier(str(label))
    for node in payload.get("call_graph", {}).get("nodes", []):
        start = int(node["start"])
        labels[start] = c_identifier(str(node.get("label") or _label_for(start)))
    for ins in payload.get("instructions", []):
        for target in ins.get("targets", []):
            labels.setdefault(int(target), c_identifier(_label_for(int(target))))
    return labels


def _function_nodes(
    payload: JsonObject,
    instructions: list[JsonObject],
    labels: dict[int, str],
) -> list[JsonObject]:
    nodes = [dict(node) for node in payload.get("call_graph", {}).get("nodes", [])]
    if nodes:
        nodes.sort(key=lambda node: int(node["start"]))
        return nodes

    if not instructions:
        return []
    start = int(min(ins["address"] for ins in instructions))
    end = int(max(ins["address"] for ins in instructions))
    return [
        {
            "start": start,
            "end": end,
            "label": labels.get(start, _label_for(start)),
            "sources": [],
            "instruction_count": len(instructions),
            "calls": [],
            "unresolved_calls": 0,
        },
    ]


def _render_function(
    function: JsonObject,
    by_address: dict[int, JsonObject],
    labels: dict[int, str],
    opts: PseudocodeOptions,
) -> tuple[list[str], set[int]]:
    start = int(function["start"])
    end = int(function.get("end", start))
    addresses = [address for address in sorted(by_address) if start <= address <= end]
    if not addresses:
        return [], set()

    name = c_identifier(labels.get(start, str(function.get("label") or _label_for(start))))
    local_targets = _local_target_addresses(addresses, by_address) | {
        address for address in addresses if address in labels
    }

    lines = [f"void {name}(void)", "{"]
    sources = function.get("sources") or []
    if sources:
        lines.append(f"    /* vector sources: {', '.join(str(source) for source in sources)} */")

    lines.extend(_render_instruction_block(addresses, by_address, labels, opts, local_targets, function_entry=start))

    lines.append("}")
    lines.append("")
    return lines, set(addresses)


def _render_orphan_block(
    addresses: list[int],
    by_address: dict[int, JsonObject],
    labels: dict[int, str],
    opts: PseudocodeOptions,
) -> list[str]:
    lines = ["void unreached_or_unowned_code(void)", "{"]
    local_targets = _local_target_addresses(addresses, by_address) | {
        address for address in addresses if address in labels
    }
    lines.extend(_render_instruction_block(addresses, by_address, labels, opts, local_targets, function_entry=None))
    lines.append("}")
    lines.append("")
    return lines


def _render_instruction_block(
    addresses: list[int],
    by_address: dict[int, JsonObject],
    labels: dict[int, str],
    opts: PseudocodeOptions,
    local_targets: set[int],
    *,
    function_entry: int | None,
) -> list[str]:
    if not opts.structured:
        return _render_linear_block(
            addresses,
            by_address,
            labels,
            opts,
            local_targets,
            function_entry=function_entry,
            suppressed_labels=set(),
            indent=1,
        )

    incoming = _incoming_local_targets(addresses, by_address)
    suppressed_labels: set[int] = set()
    return _render_structured_block(
        addresses,
        by_address,
        labels,
        opts,
        local_targets,
        incoming,
        function_entry=function_entry,
        suppressed_labels=suppressed_labels,
        indent=1,
    )


def _render_structured_block(
    addresses: list[int],
    by_address: dict[int, JsonObject],
    labels: dict[int, str],
    opts: PseudocodeOptions,
    local_targets: set[int],
    incoming: dict[int, set[int]],
    *,
    function_entry: int | None,
    suppressed_labels: set[int],
    indent: int,
) -> list[str]:
    lines: list[str] = []
    address_to_index = {address: index for index, address in enumerate(addresses)}
    index = 0
    while index < len(addresses):
        loop = _loop_candidate_at(index, addresses, address_to_index, by_address, local_targets, incoming)
        if loop:
            start_address = addresses[index]
            suppressed_labels.add(start_address)
            lines.append(f"{_indent(indent)}do {{")
            lines.extend(
                _render_structured_block(
                    addresses[index : loop.end_index],
                    by_address,
                    labels,
                    opts,
                    local_targets,
                    incoming,
                    function_entry=function_entry,
                    suppressed_labels=suppressed_labels,
                    indent=indent + 1,
                )
            )
            lines.append(f"{_indent(indent)}}} while ({loop.condition});{_line_comment(loop.instruction, opts)}")
            index = loop.end_index + 1
            continue

        if_candidate = _if_candidate_at(index, addresses, address_to_index, by_address, local_targets, incoming)
        if if_candidate:
            suppressed_labels.add(if_candidate.target_address)
            lines.append(f"{_indent(indent)}if ({if_candidate.condition}) {{{_line_comment(if_candidate.instruction, opts)}")
            lines.extend(
                _render_structured_block(
                    addresses[index + 1 : if_candidate.target_index],
                    by_address,
                    labels,
                    opts,
                    local_targets,
                    incoming,
                    function_entry=function_entry,
                    suppressed_labels=suppressed_labels,
                    indent=indent + 1,
                )
            )
            lines.append(f"{_indent(indent)}}}")
            index = if_candidate.target_index
            continue

        address = addresses[index]
        lines.extend(
            _render_linear_block(
                [address],
                by_address,
                labels,
                opts,
                local_targets,
                function_entry=function_entry,
                suppressed_labels=suppressed_labels,
                indent=indent,
            )
        )
        index += 1
    return lines


def _render_linear_block(
    addresses: list[int],
    by_address: dict[int, JsonObject],
    labels: dict[int, str],
    opts: PseudocodeOptions,
    local_targets: set[int],
    *,
    function_entry: int | None,
    suppressed_labels: set[int],
    indent: int,
) -> list[str]:
    lines: list[str] = []
    for address in addresses:
        if _should_emit_label(address, local_targets, function_entry, suppressed_labels):
            lines.append(f"{_indent(max(indent - 1, 0))}{labels.get(address, _label_for(address))}:")
        ins = by_address[address]
        lines.append(f"{_indent(indent)}{_translate_instruction(ins, labels)}{_line_comment(ins, opts)}")
    return lines


def _if_candidate_at(
    index: int,
    addresses: list[int],
    address_to_index: dict[int, int],
    by_address: dict[int, JsonObject],
    local_targets: set[int],
    incoming: dict[int, set[int]],
) -> _IfCandidate | None:
    address = addresses[index]
    ins = by_address[address]
    base = _conditional_branch_base(ins)
    if not base:
        return None

    target = _single_target_address(ins)
    if target is None or target <= address or target not in address_to_index:
        return None

    target_index = address_to_index[target]
    body_addresses = addresses[index + 1 : target_index]
    if not body_addresses or len(body_addresses) > _MAX_STRUCTURED_IF_BODY:
        return None
    if not _is_straight_line_span(body_addresses, by_address):
        return None
    if any(body_address in local_targets for body_address in body_addresses):
        return None
    if any(incoming.get(body_address) for body_address in body_addresses):
        return None
    if incoming.get(target, set()) != {address}:
        return None

    return _IfCandidate(
        target_index=target_index,
        target_address=target,
        condition=NEGATED_BRANCH_CONDITIONS[base],
        instruction=ins,
    )


def _loop_candidate_at(
    index: int,
    addresses: list[int],
    address_to_index: dict[int, int],
    by_address: dict[int, JsonObject],
    local_targets: set[int],
    incoming: dict[int, set[int]],
) -> _LoopCandidate | None:
    start_address = addresses[index]
    max_end = min(len(addresses), index + _MAX_STRUCTURED_LOOP_BODY + 1)
    for end_index in range(index + 1, max_end):
        branch_address = addresses[end_index]
        branch = by_address[branch_address]
        base = _conditional_branch_base(branch)
        if not base:
            continue

        target = _single_target_address(branch)
        if target != start_address or target not in address_to_index:
            continue

        body_addresses = addresses[index:end_index]
        interior_addresses = addresses[index + 1 : end_index + 1]
        if not body_addresses or not _is_straight_line_span(body_addresses, by_address):
            return None
        if any(address in local_targets for address in interior_addresses):
            return None
        if incoming.get(start_address, set()) != {branch_address}:
            return None
        if any(incoming.get(address) for address in interior_addresses):
            return None

        return _LoopCandidate(
            end_index=end_index,
            condition=BRANCH_CONDITIONS[base],
            instruction=branch,
        )
    return None


def _incoming_local_targets(addresses: list[int], by_address: dict[int, JsonObject]) -> dict[int, set[int]]:
    address_set = set(addresses)
    incoming: dict[int, set[int]] = {address: set() for address in addresses}
    for source in addresses:
        for target in by_address[source].get("targets", []):
            target_address = int(target)
            if target_address in address_set:
                incoming[target_address].add(source)
    return incoming


def _conditional_branch_base(ins: JsonObject) -> str | None:
    if str(ins.get("kind", "normal")) != "branch":
        return None
    base = _mnemonic_base(str(ins.get("mnemonic", "")))
    if base == "BRN" or base not in BRANCH_CONDITIONS:
        return None
    return base


def _single_target_address(ins: JsonObject) -> int | None:
    targets = ins.get("targets", [])
    if len(targets) != 1:
        return None
    return int(targets[0])


def _is_straight_line_span(addresses: list[int], by_address: dict[int, JsonObject]) -> bool:
    for address in addresses:
        kind = str(by_address[address].get("kind", "normal"))
        if kind in {"branch", "jump", "return", "rte"}:
            return False
    return True


def _should_emit_label(
    address: int,
    local_targets: set[int],
    function_entry: int | None,
    suppressed_labels: set[int],
) -> bool:
    return address in local_targets and address != function_entry and address not in suppressed_labels


def _indent(level: int) -> str:
    return "    " * level


def _local_target_addresses(addresses: list[int], by_address: dict[int, JsonObject]) -> set[int]:
    address_set = set(addresses)
    targets: set[int] = set()
    for address in addresses:
        for target in by_address[address].get("targets", []):
            target = int(target)
            if target in address_set:
                targets.add(target)
    return targets


def _translate_instruction(ins: JsonObject, labels: dict[int, str]) -> str:
    mnemonic = str(ins.get("mnemonic", ""))
    operands = str(ins.get("operands", ""))
    kind = str(ins.get("kind", "normal"))
    ops = split_operands(operands)
    base = _mnemonic_base(mnemonic)
    size = _mnemonic_size(mnemonic)

    if kind == "return":
        if ops:
            return f"return_with_stack_adjust({_format_operand(ops[0], size)});"
        return "return;"
    if kind == "rte":
        return "return_from_interrupt();"
    if kind == "sleep":
        return "sleep_until_interrupt();"
    if kind == "call":
        return _call_statement(ins, labels, ops)
    if kind in {"branch", "jump"}:
        return _branch_or_jump_statement(ins, labels, ops, base)

    if base.startswith("."):
        return f"emit_data({_quoted(str(ins.get('text', mnemonic)))});"

    if base in {"MOV", "MOV:G", "MOV:I", "MOV:E", "MOV:L", "MOV:S", "MOV:F"} and len(ops) == 2:
        source = _format_operand(ops[0], size)
        dest = _format_operand(ops[1], size, lvalue=True)
        if is_byte_immediate_to_word_destination(ins):
            return f"{dest} = zero_extend8_to16({source});"
        return f"{dest} = {_cast(source, size)};"

    if base in {"MOVFPE"} and len(ops) == 2:
        source = _format_operand(ops[0], size)
        dest = _format_operand(ops[1], size, lvalue=True)
        return f"{dest} = read_eclock({source});"
    if base in {"MOVTPE"} and len(ops) == 2:
        source = _format_operand(ops[0], size)
        dest = _format_operand(ops[1], size, lvalue=True)
        return f"write_eclock({dest}, {source});"

    if base in {"ADD", "ADD:G", "ADD:Q", "ADDS"} and len(ops) == 2:
        return _binary_update(ops, "+=", size)
    if base in {"SUB", "SUBS"} and len(ops) == 2:
        return _binary_update(ops, "-=", size)
    if base == "OR" and len(ops) == 2:
        return _binary_update(ops, "|=", size)
    if base == "AND" and len(ops) == 2:
        return _binary_update(ops, "&=", size)
    if base == "XOR" and len(ops) == 2:
        return _binary_update(ops, "^=", size)

    if base in {"ADDX", "SUBX", "MULXU", "DIVXU"} and len(ops) == 2:
        source = _format_operand(ops[0], size)
        dest = _format_operand(ops[1], size, lvalue=True)
        helper = _helper_name(base, size)
        return f"{dest} = {helper}({dest}, {source});"

    if base in {"CMP", "CMP:G", "CMP:I", "CMP:E"} and len(ops) == 2:
        source = _format_operand(ops[0], size)
        dest = _format_operand(ops[1], size)
        return f"{_helper_name('set_flags_cmp', size)}({dest}, {source});"
    if base == "TST" and len(ops) == 1:
        return f"{_helper_name('set_flags_tst', size)}({_format_operand(ops[0], size)});"

    if base == "CLR" and len(ops) == 1:
        return f"{_format_operand(ops[0], size, lvalue=True)} = 0;"
    if base == "NEG" and len(ops) == 1:
        target = _format_operand(ops[0], size, lvalue=True)
        return f"{target} = -{target};"
    if base == "NOT" and len(ops) == 1:
        target = _format_operand(ops[0], size, lvalue=True)
        return f"{target} = ~{target};"

    if base in {"SHAL", "SHLL"} and len(ops) == 1:
        target = _format_operand(ops[0], size, lvalue=True)
        return f"{target} <<= 1;"
    if base in {"SHAR", "SHLR"} and len(ops) == 1:
        target = _format_operand(ops[0], size, lvalue=True)
        return f"{target} >>= 1;"
    if base in {"ROTL", "ROTR", "ROTXL", "ROTXR"} and len(ops) == 1:
        target = _format_operand(ops[0], size, lvalue=True)
        return f"{target} = {_helper_name(base.lower(), size)}({target});"
    if base == "SWAP" and len(ops) == 1:
        target = _format_operand(ops[0], size, lvalue=True)
        return f"{target} = swap_bytes({target});"
    if base == "EXTU" and len(ops) == 1:
        target = _format_operand(ops[0], size, lvalue=True)
        return f"{target} = zero_extend8({target});"
    if base == "EXTS" and len(ops) == 1:
        target = _format_operand(ops[0], size, lvalue=True)
        return f"{target} = sign_extend8({target});"

    if base in {"BSET", "BCLR", "BNOT", "BTST"} and len(ops) == 2:
        return _bit_statement(base, ops, size)

    if base == "LDC" and len(ops) == 2:
        source = _format_operand(ops[0], size)
        dest = _format_operand(ops[1], size, lvalue=True)
        return f"{dest} = {_cast(source, size)};"
    if base == "STC" and len(ops) == 2:
        source = _format_operand(ops[0], size)
        dest = _format_operand(ops[1], size, lvalue=True)
        return f"{dest} = {_cast(source, size)};"
    if base == "ORC" and len(ops) == 2:
        return _binary_update(ops, "|=", size)
    if base == "ANDC" and len(ops) == 2:
        return _binary_update(ops, "&=", size)
    if base == "XORC" and len(ops) == 2:
        return _binary_update(ops, "^=", size)

    if base == "LDM" and len(ops) == 2:
        return f"pop_registers({_register_list_argument(ops[1])});"
    if base == "STM" and len(ops) == 2:
        return f"push_registers({_register_list_argument(ops[0])});"
    if base == "LINK" and len(ops) == 2:
        return f"link_frame({_format_operand(ops[1], size)});"
    if base == "UNLK":
        return "unlink_frame();"
    if base == "TRAPA" and ops:
        return f"trap({_format_operand(ops[0], size)});"
    if base == "TRAP/VS":
        return "trap_vs();"
    if base == "NOP":
        return "/* nop */;"

    return f"asm_{_safe_token(base)}({_quoted(str(ins.get('text') or mnemonic))});"


def _branch_or_jump_statement(ins: JsonObject, labels: dict[int, str], ops: list[str], base: str) -> str:
    target = _target_label(ins, labels)
    if base in {"BRA", "JMP", "PJMP"}:
        if target:
            return f"goto {target};"
        table_expr = _indirect_table_call_args(ins)
        if table_expr:
            return f"goto_indirect_table({table_expr});"
        expr = _format_operand(ops[0], "") if ops else "unknown_target"
        return f"goto_indirect({expr});"
    if base.startswith("SCB/"):
        register = _format_operand(ops[0], "") if ops else "R?"
        cond = base.split("/", 1)[1].lower()
        return f"if (scb_{cond}({register})) goto {target or 'unknown_target'};"
    condition = BRANCH_CONDITIONS.get(base, f"cond_{_safe_token(base)}()")
    return f"if ({condition}) goto {target or 'unknown_target'};"


def _call_statement(ins: JsonObject, labels: dict[int, str], ops: list[str]) -> str:
    target = _target_label(ins, labels)
    if target:
        return f"{target}();"
    table_expr = _indirect_table_call_args(ins)
    if table_expr:
        return f"call_indirect_table({table_expr});"
    expr = _format_operand(ops[0], "") if ops else "unknown_target"
    return f"call_indirect({expr});"


def _indirect_table_call_args(ins: JsonObject) -> str:
    indirect = ins.get("indirect_flow")
    if not isinstance(indirect, dict):
        return ""
    table = indirect.get("table")
    if not isinstance(table, dict) or table.get("base") is None:
        return ""
    base = int(table["base"])
    index_register = c_identifier(str(table.get("index_register") or "index"))
    target_register = c_identifier(str(table.get("target_register") or indirect.get("target_register") or "target"))
    return f"0x{base:04X}, {index_register}, {target_register}"


def _target_label(ins: JsonObject, labels: dict[int, str]) -> str:
    targets = ins.get("targets", [])
    if targets:
        target = int(targets[0])
        return labels.get(target, _label_for(target))
    return ""


def _binary_update(ops: list[str], operator: str, size: str) -> str:
    source = _format_operand(ops[0], size)
    dest = _format_operand(ops[1], size, lvalue=True)
    return f"{dest} {operator} {_cast(source, size)};"


def _bit_statement(base: str, ops: list[str], size: str) -> str:
    bit = _format_operand(ops[0], size)
    dest = _format_operand(ops[1], size, lvalue=True)
    bit_expr = f"BIT({bit})"
    if base == "BSET":
        return f"{dest} |= {bit_expr};"
    if base == "BCLR":
        return f"{dest} &= ~{bit_expr};"
    if base == "BNOT":
        return f"{dest} ^= {bit_expr};"
    return f"set_flags_btst({dest}, {bit});"


def split_operands(operands: str) -> list[str]:
    if not operands:
        return []
    parts: list[str] = []
    start = 0
    depth = 0
    for idx, char in enumerate(operands):
        if char in "({":
            depth += 1
        elif char in ")}" and depth:
            depth -= 1
        elif char == "," and depth == 0:
            parts.append(operands[start:idx].strip())
            start = idx + 1
    parts.append(operands[start:].strip())
    return [part for part in parts if part]


def _format_operand(operand: str, size: str, *, lvalue: bool = False) -> str:
    op = _replace_h_literals(operand.strip())
    if op.startswith("#"):
        return op[1:]
    if op.startswith("@(") and op.endswith(")"):
        inner = op[2:-1]
        pieces = split_operands(inner)
        if len(pieces) == 2:
            disp, reg = pieces
            offset = f"{reg} - {disp[1:]}" if disp.startswith("-") else f"{reg} + {disp}"
            return f"{_mem_name(size)}[{offset}]"
    if re.fullmatch(r"@-R[0-7]", op):
        return f"{_mem_name(size)}[--{op[2:]}]"
    if re.fullmatch(r"@R[0-7]\+", op):
        return f"{_mem_name(size)}[{op[1:-1]}++]"
    if re.fullmatch(r"@R[0-7]", op):
        return f"{_mem_name(size)}[{op[1:]}]"
    if op.startswith("@BR:"):
        return f"{_mem_name(size)}[(BR << 8) | {op[4:]}]"
    if op.startswith("@0x"):
        return f"{_mem_name(size)}[{op[1:]}]"
    if op.startswith("@"):
        return c_identifier(op[1:])
    if op.startswith("{") and op.endswith("}"):
        return _register_list_argument(op)
    if re.fullmatch(r"loc_[0-9A-Fa-f]{4}", op):
        return c_identifier(op)
    if re.fullmatch(r"[A-Za-z_][A-Za-z0-9_/\?]*", op):
        return c_identifier(op)
    return op


def _cast(expr: str, size: str) -> str:
    if size == "B":
        return f"(uint8_t)({expr})"
    if size == "W":
        return f"(uint16_t)({expr})"
    return expr


def _line_comment(ins: JsonObject, opts: PseudocodeOptions) -> str:
    parts: list[str] = []
    if opts.include_addresses:
        parts.append(f"{int(ins['address']):04X}")
    if opts.include_asm:
        text = str(ins.get("text") or _instruction_text(ins))
        parts.append(text)
    comment = str(ins.get("comment") or "").strip()
    if comment:
        parts.append(comment)
    parts.extend(_metadata_comments(ins))
    if opts.include_cycles and ins.get("cycles"):
        parts.append(_cycle_summary(ins["cycles"]))
    if not parts:
        return ""
    return " /* " + "; ".join(_sanitize_comment(part) for part in parts) + " */"


def _metadata_comments(ins: JsonObject) -> list[str]:
    comments: list[str] = []
    sci = ins.get("sci")
    if isinstance(sci, dict):
        for inference in sci.get("inferences", []):
            if isinstance(inference, dict) and inference.get("comment"):
                comments.append(str(inference["comment"]))

    for event in ins.get("sci_protocol", []):
        if isinstance(event, dict) and event.get("comment"):
            comments.append(str(event["comment"]))

    for item in ins.get("serial_reconstruction", []):
        if isinstance(item, dict) and item.get("comment"):
            comments.append(str(item["comment"]))

    if is_byte_immediate_to_word_destination(ins):
        comments.append("byte immediate zero-extended into word destination")

    board_profile = ins.get("board_profile")
    if isinstance(board_profile, dict) and board_profile.get("comment"):
        comments.append(str(board_profile["comment"]))

    indirect = ins.get("indirect_flow")
    if isinstance(indirect, dict) and indirect.get("summary"):
        comments.append(str(indirect["summary"]))

    lcd_text = ins.get("lcd_text")
    if isinstance(lcd_text, dict) and lcd_text.get("comment"):
        comments.append(str(lcd_text["comment"]))

    for lcd_item in ins.get("lcd_driver", []):
        if isinstance(lcd_item, dict) and lcd_item.get("summary"):
            comments.append(str(lcd_item["summary"]))

    dataflow = ins.get("dataflow")
    if isinstance(dataflow, dict):
        changes = dataflow.get("changes")
        if isinstance(changes, list):
            known_changes = [_dataflow_change_comment(change) for change in changes if isinstance(change, dict)]
            known_changes = [change for change in known_changes if change]
            if known_changes:
                suffix = " ..." if len(known_changes) > 4 else ""
                comments.append("dataflow " + ", ".join(known_changes[:4]) + suffix)

    refs = []
    for ref in ins.get("references", []):
        if not isinstance(ref, dict):
            continue
        symbol = ref.get("symbol") or ref.get("name")
        if symbol:
            refs.append(str(symbol))
    if refs:
        comments.append("refs " + ", ".join(refs))

    for access in ins.get("peripheral_access", []):
        if not isinstance(access, dict):
            continue
        register = access.get("register")
        direction = access.get("direction")
        size = access.get("size")
        byte = access.get("byte")
        if register and direction:
            comments.append(f"{register} {size} {direction} {byte} TEMP access")
    return comments


def _dataflow_change_comment(change: JsonObject) -> str:
    after = change.get("after")
    if not isinstance(after, dict) or not after.get("known"):
        return ""
    width = int(after.get("width", 16))
    value = int(after["value"])
    digits = 2 if width <= 8 else 4
    return f"{change['name']}=0x{value:0{digits}X}"


def _instruction_text(ins: JsonObject) -> str:
    mnemonic = str(ins.get("mnemonic", ""))
    operands = str(ins.get("operands", ""))
    return f"{mnemonic} {operands}".strip()


def _cycle_summary(cycles: JsonObject) -> str:
    if "cycles" in cycles:
        return f"cycles={cycles['cycles']}"
    if "not_taken" in cycles and "taken" in cycles:
        return f"cycles={cycles['not_taken']}/{cycles['taken']} nt/t"
    return "cycles=?"


def _mnemonic_base(mnemonic: str) -> str:
    return mnemonic.rsplit(".", 1)[0] if "." in mnemonic else mnemonic


def _mnemonic_size(mnemonic: str) -> str:
    suffix = mnemonic.rsplit(".", 1)[-1] if "." in mnemonic else ""
    if suffix in {"B", "W"}:
        return suffix
    if mnemonic.startswith("CMP:I"):
        return "W"
    if mnemonic.startswith("CMP:E"):
        return "B"
    return ""


def _size_bits(size: str) -> int:
    return 16 if size == "W" else 8


def _mem_name(size: str) -> str:
    return "MEM16" if size == "W" else "MEM8"


def _helper_name(base: str, size: str) -> str:
    suffix = {"B": "8", "W": "16"}.get(size, "")
    return f"{_safe_token(base)}{suffix}"


def _register_list_argument(operand: str) -> str:
    inner = operand.strip().strip("{}")
    regs = [c_identifier(part.strip()) for part in inner.split(",") if part.strip()]
    return ", ".join(regs) if regs else "/* empty */"


def _replace_h_literals(text: str) -> str:
    return re.sub(r"H'([0-9A-Fa-f]+)", lambda match: "0x" + match.group(1).upper(), text)


def c_identifier(name: str) -> str:
    cleaned = re.sub(r"[^0-9A-Za-z_]", "_", name.strip())
    cleaned = re.sub(r"_+", "_", cleaned).strip("_")
    if not cleaned:
        cleaned = "unnamed"
    if cleaned[0].isdigit():
        cleaned = "_" + cleaned
    return cleaned


def _safe_token(text: str) -> str:
    return c_identifier(text).lower()


def _label_for(address: int) -> str:
    return f"loc_{address:04X}"


def _quoted(text: str) -> str:
    return json.dumps(text)


def _sanitize_comment(text: str) -> str:
    return str(text).replace("*/", "* /").replace("\r", " ").replace("\n", " ")