from __future__ import annotations import argparse import json import re from dataclasses import dataclass from pathlib import Path from typing import Any from .consistency import is_byte_immediate_to_word_destination JsonObject = dict[str, Any] BRANCH_CONDITIONS = { "BRN": "0", "BHI": "!C && !Z", "BLS": "C || Z", "BCC": "!C", "BCS": "C", "BNE": "!Z", "BEQ": "Z", "BVC": "!V", "BVS": "V", "BPL": "!N", "BMI": "N", "BGE": "N == V", "BLT": "N != V", "BGT": "!Z && (N == V)", "BLE": "Z || (N != V)", } NEGATED_BRANCH_CONDITIONS = { "BRN": "1", "BHI": "C || Z", "BLS": "!C && !Z", "BCC": "C", "BCS": "!C", "BNE": "Z", "BEQ": "!Z", "BVC": "V", "BVS": "!V", "BPL": "N", "BMI": "!N", "BGE": "N != V", "BLT": "N == V", "BGT": "Z || (N != V)", "BLE": "!Z && (N == V)", } _MAX_STRUCTURED_IF_BODY = 8 _MAX_STRUCTURED_LOOP_BODY = 24 @dataclass(frozen=True) class PseudocodeOptions: include_asm: bool = True include_addresses: bool = True include_cycles: bool = False emit_declarations: bool = True max_functions: int | None = None structured: bool = True @dataclass(frozen=True) class _IfCandidate: target_index: int target_address: int condition: str instruction: JsonObject @dataclass(frozen=True) class _LoopCandidate: end_index: int condition: str instruction: JsonObject def generate_pseudocode( payload: JsonObject, *, source_name: str = "", options: PseudocodeOptions | None = None, ) -> str: opts = options or PseudocodeOptions() instructions = list(payload.get("instructions", [])) label_names = _collect_label_names(payload) functions = _function_nodes(payload, instructions, label_names) if opts.max_functions is not None: functions = functions[: opts.max_functions] lines: list[str] = [] lines.extend(_file_header(source_name, payload)) if opts.emit_declarations: lines.extend(_declarations(payload, instructions, functions, label_names)) by_address = {int(ins["address"]): ins for ins in instructions} all_addresses = sorted(by_address) emitted: set[int] = set() for function in functions: function_lines, used_addresses = _render_function(function, by_address, label_names, opts) if function_lines: lines.extend(function_lines) emitted.update(used_addresses) orphan_addresses = [address for address in all_addresses if address not in emitted] if orphan_addresses: lines.extend(_render_orphan_block(orphan_addresses, by_address, label_names, opts)) return "\n".join(lines).rstrip() + "\n" def load_pseudocode_input(path: Path) -> JsonObject: with path.open("r", encoding="utf-8") as handle: payload = json.load(handle) if not isinstance(payload, dict) or "instructions" not in payload: raise ValueError(f"{path} does not look like h8536_decompiler JSON output") return payload def write_pseudocode(input_path: Path, output_path: Path, options: PseudocodeOptions) -> None: payload = load_pseudocode_input(input_path) output_path.parent.mkdir(parents=True, exist_ok=True) output_path.write_text( generate_pseudocode(payload, source_name=str(input_path), options=options), encoding="utf-8", ) def main(argv: list[str] | None = None) -> int: parser = argparse.ArgumentParser( description="Generate conservative C-like pseudocode from h8536_decompiler JSON output.", ) parser.add_argument( "input", nargs="?", type=Path, default=Path("build/rom_decompiled.json"), help="structured JSON emitted by h8536_decompiler.py", ) parser.add_argument( "--out", type=Path, default=Path("build/rom_pseudocode.c"), help="pseudocode output path", ) parser.add_argument("--no-asm", action="store_true", help="omit original assembly from line comments") parser.add_argument("--no-addresses", action="store_true", help="omit instruction addresses from line comments") parser.add_argument("--cycles", action="store_true", help="include cycle estimates when present in JSON") parser.add_argument("--no-declarations", action="store_true", help="omit register/function declarations") parser.add_argument("--no-structure", action="store_true", help="preserve label/goto output without if/loop structuring") parser.add_argument("--max-functions", type=int, default=None, help="emit only the first N functions") args = parser.parse_args(argv) options = PseudocodeOptions( include_asm=not args.no_asm, include_addresses=not args.no_addresses, include_cycles=args.cycles, emit_declarations=not args.no_declarations, max_functions=args.max_functions, structured=not args.no_structure, ) write_pseudocode(args.input, args.out, options) print(f"wrote {args.out}") return 0 def _file_header(source_name: str, payload: JsonObject) -> list[str]: vector_count = len(payload.get("vectors", [])) function_count = len(payload.get("call_graph", {}).get("nodes", [])) instruction_count = len(payload.get("instructions", [])) source = f" from {source_name}" if source_name else "" return [ "/*", f" * H8/536 C-like pseudocode{source}", " *", " * This is a conservative structural translation of the decompiler JSON.", " * Helpers such as set_flags_cmp8(), MEM8[], BIT(), C/Z/N/V, and", " * return_from_interrupt() are pseudocode placeholders, not a runtime ABI.", " *", f" * vectors: {vector_count}, functions: {function_count}, instructions: {instruction_count}", " */", "", "#include ", "", "typedef uint8_t u8;", "typedef uint16_t u16;", "", "#define BIT(n) (1u << (n))", "extern volatile u8 MEM8[0x10000];", "extern volatile u16 MEM16[0x10000];", "", "u16 R0, R1, R2, R3, R4, R5, R6, R7;", "u16 SR;", "u8 CCR, BR, EP, DP, TP;", "int C, Z, N, V;", "", "static inline u16 zero_extend8_to16(u8 value) { return (u16)value; }", "", ] def _declarations( payload: JsonObject, instructions: list[JsonObject], functions: list[JsonObject], labels: dict[int, str], ) -> list[str]: lines: list[str] = [] registers = _referenced_io_registers(instructions) if registers: lines.append("/* H8/536 register field symbols used by this ROM. */") for name, (address, width) in sorted(registers.items(), key=lambda item: item[1][0]): c_type = "u16" if width == 16 else "u8" lines.append(f"extern volatile {c_type} {c_identifier(name)}; /* 0x{address:04X} */") lines.append("") memory_symbols = _referenced_memory_symbols(payload) if memory_symbols: lines.append("/* RAM/external symbols inferred from instruction references and data tables. */") for symbol in memory_symbols: c_type = "u16" if symbol.get("width") == "word" else "u8" width = symbol.get("width") or "unknown" lines.append( f"extern volatile {c_type} {c_identifier(str(symbol['name']))}; " f"/* 0x{int(symbol['address']):04X} {symbol['kind']} {width} */" ) lines.append("") if functions: lines.append("/* Function entry points discovered from vectors and call targets. */") for function in functions: label = labels.get(int(function["start"]), str(function.get("label", ""))) lines.append(f"void {c_identifier(label)}(void);") lines.append("") return lines def _referenced_io_registers(instructions: list[JsonObject]) -> dict[str, tuple[int, int]]: registers: dict[str, tuple[int, int]] = {} for ins in instructions: width = _size_bits(_mnemonic_size(str(ins.get("mnemonic", "")))) for ref in ins.get("references", []): name = ref.get("name") if not name: continue address = int(ref["address"]) old = registers.get(name) old_width = old[1] if old else 8 registers[name] = (address, max(old_width, width)) return registers def _referenced_memory_symbols(payload: JsonObject) -> list[JsonObject]: symbols = payload.get("symbols", {}).get("symbols", []) if not isinstance(symbols, list): return [] memory_symbols: list[JsonObject] = [] for symbol in symbols: if not isinstance(symbol, dict) or symbol.get("kind") == "register": continue if not symbol.get("name") or symbol.get("address") is None: continue memory_symbols.append(symbol) return sorted(memory_symbols, key=lambda symbol: int(symbol["address"])) def _collect_label_names(payload: JsonObject) -> dict[int, str]: labels: dict[int, str] = {} for vector in payload.get("vectors", []): target = vector.get("target") label = vector.get("target_label") if target is not None and label: labels[int(target)] = c_identifier(str(label)) for node in payload.get("call_graph", {}).get("nodes", []): start = int(node["start"]) labels[start] = c_identifier(str(node.get("label") or _label_for(start))) for ins in payload.get("instructions", []): for target in ins.get("targets", []): labels.setdefault(int(target), c_identifier(_label_for(int(target)))) return labels def _function_nodes( payload: JsonObject, instructions: list[JsonObject], labels: dict[int, str], ) -> list[JsonObject]: nodes = [dict(node) for node in payload.get("call_graph", {}).get("nodes", [])] if nodes: nodes.sort(key=lambda node: int(node["start"])) return nodes if not instructions: return [] start = int(min(ins["address"] for ins in instructions)) end = int(max(ins["address"] for ins in instructions)) return [ { "start": start, "end": end, "label": labels.get(start, _label_for(start)), "sources": [], "instruction_count": len(instructions), "calls": [], "unresolved_calls": 0, }, ] def _render_function( function: JsonObject, by_address: dict[int, JsonObject], labels: dict[int, str], opts: PseudocodeOptions, ) -> tuple[list[str], set[int]]: start = int(function["start"]) end = int(function.get("end", start)) addresses = [address for address in sorted(by_address) if start <= address <= end] if not addresses: return [], set() name = c_identifier(labels.get(start, str(function.get("label") or _label_for(start)))) local_targets = _local_target_addresses(addresses, by_address) | { address for address in addresses if address in labels } lines = [f"void {name}(void)", "{"] sources = function.get("sources") or [] if sources: lines.append(f" /* vector sources: {', '.join(str(source) for source in sources)} */") lines.extend(_render_instruction_block(addresses, by_address, labels, opts, local_targets, function_entry=start)) lines.append("}") lines.append("") return lines, set(addresses) def _render_orphan_block( addresses: list[int], by_address: dict[int, JsonObject], labels: dict[int, str], opts: PseudocodeOptions, ) -> list[str]: lines = ["void unreached_or_unowned_code(void)", "{"] local_targets = _local_target_addresses(addresses, by_address) | { address for address in addresses if address in labels } lines.extend(_render_instruction_block(addresses, by_address, labels, opts, local_targets, function_entry=None)) lines.append("}") lines.append("") return lines def _render_instruction_block( addresses: list[int], by_address: dict[int, JsonObject], labels: dict[int, str], opts: PseudocodeOptions, local_targets: set[int], *, function_entry: int | None, ) -> list[str]: if not opts.structured: return _render_linear_block( addresses, by_address, labels, opts, local_targets, function_entry=function_entry, suppressed_labels=set(), indent=1, ) incoming = _incoming_local_targets(addresses, by_address) suppressed_labels: set[int] = set() return _render_structured_block( addresses, by_address, labels, opts, local_targets, incoming, function_entry=function_entry, suppressed_labels=suppressed_labels, indent=1, ) def _render_structured_block( addresses: list[int], by_address: dict[int, JsonObject], labels: dict[int, str], opts: PseudocodeOptions, local_targets: set[int], incoming: dict[int, set[int]], *, function_entry: int | None, suppressed_labels: set[int], indent: int, ) -> list[str]: lines: list[str] = [] address_to_index = {address: index for index, address in enumerate(addresses)} index = 0 while index < len(addresses): loop = _loop_candidate_at(index, addresses, address_to_index, by_address, local_targets, incoming) if loop: start_address = addresses[index] suppressed_labels.add(start_address) lines.append(f"{_indent(indent)}do {{") lines.extend( _render_structured_block( addresses[index : loop.end_index], by_address, labels, opts, local_targets, incoming, function_entry=function_entry, suppressed_labels=suppressed_labels, indent=indent + 1, ) ) lines.append(f"{_indent(indent)}}} while ({loop.condition});{_line_comment(loop.instruction, opts)}") index = loop.end_index + 1 continue if_candidate = _if_candidate_at(index, addresses, address_to_index, by_address, local_targets, incoming) if if_candidate: suppressed_labels.add(if_candidate.target_address) lines.append(f"{_indent(indent)}if ({if_candidate.condition}) {{{_line_comment(if_candidate.instruction, opts)}") lines.extend( _render_structured_block( addresses[index + 1 : if_candidate.target_index], by_address, labels, opts, local_targets, incoming, function_entry=function_entry, suppressed_labels=suppressed_labels, indent=indent + 1, ) ) lines.append(f"{_indent(indent)}}}") index = if_candidate.target_index continue address = addresses[index] lines.extend( _render_linear_block( [address], by_address, labels, opts, local_targets, function_entry=function_entry, suppressed_labels=suppressed_labels, indent=indent, ) ) index += 1 return lines def _render_linear_block( addresses: list[int], by_address: dict[int, JsonObject], labels: dict[int, str], opts: PseudocodeOptions, local_targets: set[int], *, function_entry: int | None, suppressed_labels: set[int], indent: int, ) -> list[str]: lines: list[str] = [] for address in addresses: if _should_emit_label(address, local_targets, function_entry, suppressed_labels): lines.append(f"{_indent(max(indent - 1, 0))}{labels.get(address, _label_for(address))}:") ins = by_address[address] lines.append(f"{_indent(indent)}{_translate_instruction(ins, labels)}{_line_comment(ins, opts)}") return lines def _if_candidate_at( index: int, addresses: list[int], address_to_index: dict[int, int], by_address: dict[int, JsonObject], local_targets: set[int], incoming: dict[int, set[int]], ) -> _IfCandidate | None: address = addresses[index] ins = by_address[address] base = _conditional_branch_base(ins) if not base: return None target = _single_target_address(ins) if target is None or target <= address or target not in address_to_index: return None target_index = address_to_index[target] body_addresses = addresses[index + 1 : target_index] if not body_addresses or len(body_addresses) > _MAX_STRUCTURED_IF_BODY: return None if not _is_straight_line_span(body_addresses, by_address): return None if any(body_address in local_targets for body_address in body_addresses): return None if any(incoming.get(body_address) for body_address in body_addresses): return None if incoming.get(target, set()) != {address}: return None return _IfCandidate( target_index=target_index, target_address=target, condition=NEGATED_BRANCH_CONDITIONS[base], instruction=ins, ) def _loop_candidate_at( index: int, addresses: list[int], address_to_index: dict[int, int], by_address: dict[int, JsonObject], local_targets: set[int], incoming: dict[int, set[int]], ) -> _LoopCandidate | None: start_address = addresses[index] max_end = min(len(addresses), index + _MAX_STRUCTURED_LOOP_BODY + 1) for end_index in range(index + 1, max_end): branch_address = addresses[end_index] branch = by_address[branch_address] base = _conditional_branch_base(branch) if not base: continue target = _single_target_address(branch) if target != start_address or target not in address_to_index: continue body_addresses = addresses[index:end_index] interior_addresses = addresses[index + 1 : end_index + 1] if not body_addresses or not _is_straight_line_span(body_addresses, by_address): return None if any(address in local_targets for address in interior_addresses): return None if incoming.get(start_address, set()) != {branch_address}: return None if any(incoming.get(address) for address in interior_addresses): return None return _LoopCandidate( end_index=end_index, condition=BRANCH_CONDITIONS[base], instruction=branch, ) return None def _incoming_local_targets(addresses: list[int], by_address: dict[int, JsonObject]) -> dict[int, set[int]]: address_set = set(addresses) incoming: dict[int, set[int]] = {address: set() for address in addresses} for source in addresses: for target in by_address[source].get("targets", []): target_address = int(target) if target_address in address_set: incoming[target_address].add(source) return incoming def _conditional_branch_base(ins: JsonObject) -> str | None: if str(ins.get("kind", "normal")) != "branch": return None base = _mnemonic_base(str(ins.get("mnemonic", ""))) if base == "BRN" or base not in BRANCH_CONDITIONS: return None return base def _single_target_address(ins: JsonObject) -> int | None: targets = ins.get("targets", []) if len(targets) != 1: return None return int(targets[0]) def _is_straight_line_span(addresses: list[int], by_address: dict[int, JsonObject]) -> bool: for address in addresses: kind = str(by_address[address].get("kind", "normal")) if kind in {"branch", "jump", "return", "rte"}: return False return True def _should_emit_label( address: int, local_targets: set[int], function_entry: int | None, suppressed_labels: set[int], ) -> bool: return address in local_targets and address != function_entry and address not in suppressed_labels def _indent(level: int) -> str: return " " * level def _local_target_addresses(addresses: list[int], by_address: dict[int, JsonObject]) -> set[int]: address_set = set(addresses) targets: set[int] = set() for address in addresses: for target in by_address[address].get("targets", []): target = int(target) if target in address_set: targets.add(target) return targets def _translate_instruction(ins: JsonObject, labels: dict[int, str]) -> str: mnemonic = str(ins.get("mnemonic", "")) operands = str(ins.get("operands", "")) kind = str(ins.get("kind", "normal")) ops = split_operands(operands) base = _mnemonic_base(mnemonic) size = _mnemonic_size(mnemonic) if kind == "return": if ops: return f"return_with_stack_adjust({_format_operand(ops[0], size)});" return "return;" if kind == "rte": return "return_from_interrupt();" if kind == "sleep": return "sleep_until_interrupt();" if kind == "call": return _call_statement(ins, labels, ops) if kind in {"branch", "jump"}: return _branch_or_jump_statement(ins, labels, ops, base) if base.startswith("."): return f"emit_data({_quoted(str(ins.get('text', mnemonic)))});" if base in {"MOV", "MOV:G", "MOV:I", "MOV:E", "MOV:L", "MOV:S", "MOV:F"} and len(ops) == 2: source = _format_operand(ops[0], size) dest = _format_operand(ops[1], size, lvalue=True) if is_byte_immediate_to_word_destination(ins): return f"{dest} = zero_extend8_to16({source});" return f"{dest} = {_cast(source, size)};" if base in {"MOVFPE"} and len(ops) == 2: source = _format_operand(ops[0], size) dest = _format_operand(ops[1], size, lvalue=True) return f"{dest} = read_eclock({source});" if base in {"MOVTPE"} and len(ops) == 2: source = _format_operand(ops[0], size) dest = _format_operand(ops[1], size, lvalue=True) return f"write_eclock({dest}, {source});" if base in {"ADD", "ADD:G", "ADD:Q", "ADDS"} and len(ops) == 2: return _binary_update(ops, "+=", size) if base in {"SUB", "SUBS"} and len(ops) == 2: return _binary_update(ops, "-=", size) if base == "OR" and len(ops) == 2: return _binary_update(ops, "|=", size) if base == "AND" and len(ops) == 2: return _binary_update(ops, "&=", size) if base == "XOR" and len(ops) == 2: return _binary_update(ops, "^=", size) if base in {"ADDX", "SUBX", "MULXU", "DIVXU"} and len(ops) == 2: source = _format_operand(ops[0], size) dest = _format_operand(ops[1], size, lvalue=True) helper = _helper_name(base, size) return f"{dest} = {helper}({dest}, {source});" if base in {"CMP", "CMP:G", "CMP:I", "CMP:E"} and len(ops) == 2: source = _format_operand(ops[0], size) dest = _format_operand(ops[1], size) return f"{_helper_name('set_flags_cmp', size)}({dest}, {source});" if base == "TST" and len(ops) == 1: return f"{_helper_name('set_flags_tst', size)}({_format_operand(ops[0], size)});" if base == "CLR" and len(ops) == 1: return f"{_format_operand(ops[0], size, lvalue=True)} = 0;" if base == "NEG" and len(ops) == 1: target = _format_operand(ops[0], size, lvalue=True) return f"{target} = -{target};" if base == "NOT" and len(ops) == 1: target = _format_operand(ops[0], size, lvalue=True) return f"{target} = ~{target};" if base in {"SHAL", "SHLL"} and len(ops) == 1: target = _format_operand(ops[0], size, lvalue=True) return f"{target} <<= 1;" if base in {"SHAR", "SHLR"} and len(ops) == 1: target = _format_operand(ops[0], size, lvalue=True) return f"{target} >>= 1;" if base in {"ROTL", "ROTR", "ROTXL", "ROTXR"} and len(ops) == 1: target = _format_operand(ops[0], size, lvalue=True) return f"{target} = {_helper_name(base.lower(), size)}({target});" if base == "SWAP" and len(ops) == 1: target = _format_operand(ops[0], size, lvalue=True) return f"{target} = swap_bytes({target});" if base == "EXTU" and len(ops) == 1: target = _format_operand(ops[0], size, lvalue=True) return f"{target} = zero_extend8({target});" if base == "EXTS" and len(ops) == 1: target = _format_operand(ops[0], size, lvalue=True) return f"{target} = sign_extend8({target});" if base in {"BSET", "BCLR", "BNOT", "BTST"} and len(ops) == 2: return _bit_statement(base, ops, size) if base == "LDC" and len(ops) == 2: source = _format_operand(ops[0], size) dest = _format_operand(ops[1], size, lvalue=True) return f"{dest} = {_cast(source, size)};" if base == "STC" and len(ops) == 2: source = _format_operand(ops[0], size) dest = _format_operand(ops[1], size, lvalue=True) return f"{dest} = {_cast(source, size)};" if base == "ORC" and len(ops) == 2: return _binary_update(ops, "|=", size) if base == "ANDC" and len(ops) == 2: return _binary_update(ops, "&=", size) if base == "XORC" and len(ops) == 2: return _binary_update(ops, "^=", size) if base == "LDM" and len(ops) == 2: return f"pop_registers({_register_list_argument(ops[1])});" if base == "STM" and len(ops) == 2: return f"push_registers({_register_list_argument(ops[0])});" if base == "LINK" and len(ops) == 2: return f"link_frame({_format_operand(ops[1], size)});" if base == "UNLK": return "unlink_frame();" if base == "TRAPA" and ops: return f"trap({_format_operand(ops[0], size)});" if base == "TRAP/VS": return "trap_vs();" if base == "NOP": return "/* nop */;" return f"asm_{_safe_token(base)}({_quoted(str(ins.get('text') or mnemonic))});" def _branch_or_jump_statement(ins: JsonObject, labels: dict[int, str], ops: list[str], base: str) -> str: target = _target_label(ins, labels) if base in {"BRA", "JMP", "PJMP"}: if target: return f"goto {target};" table_expr = _indirect_table_call_args(ins) if table_expr: return f"goto_indirect_table({table_expr});" expr = _format_operand(ops[0], "") if ops else "unknown_target" return f"goto_indirect({expr});" if base.startswith("SCB/"): register = _format_operand(ops[0], "") if ops else "R?" cond = base.split("/", 1)[1].lower() return f"if (scb_{cond}({register})) goto {target or 'unknown_target'};" condition = BRANCH_CONDITIONS.get(base, f"cond_{_safe_token(base)}()") return f"if ({condition}) goto {target or 'unknown_target'};" def _call_statement(ins: JsonObject, labels: dict[int, str], ops: list[str]) -> str: target = _target_label(ins, labels) if target: return f"{target}();" table_expr = _indirect_table_call_args(ins) if table_expr: return f"call_indirect_table({table_expr});" expr = _format_operand(ops[0], "") if ops else "unknown_target" return f"call_indirect({expr});" def _indirect_table_call_args(ins: JsonObject) -> str: indirect = ins.get("indirect_flow") if not isinstance(indirect, dict): return "" table = indirect.get("table") if not isinstance(table, dict) or table.get("base") is None: return "" base = int(table["base"]) index_register = c_identifier(str(table.get("index_register") or "index")) target_register = c_identifier(str(table.get("target_register") or indirect.get("target_register") or "target")) return f"0x{base:04X}, {index_register}, {target_register}" def _target_label(ins: JsonObject, labels: dict[int, str]) -> str: targets = ins.get("targets", []) if targets: target = int(targets[0]) return labels.get(target, _label_for(target)) return "" def _binary_update(ops: list[str], operator: str, size: str) -> str: source = _format_operand(ops[0], size) dest = _format_operand(ops[1], size, lvalue=True) return f"{dest} {operator} {_cast(source, size)};" def _bit_statement(base: str, ops: list[str], size: str) -> str: bit = _format_operand(ops[0], size) dest = _format_operand(ops[1], size, lvalue=True) bit_expr = f"BIT({bit})" if base == "BSET": return f"{dest} |= {bit_expr};" if base == "BCLR": return f"{dest} &= ~{bit_expr};" if base == "BNOT": return f"{dest} ^= {bit_expr};" return f"set_flags_btst({dest}, {bit});" def split_operands(operands: str) -> list[str]: if not operands: return [] parts: list[str] = [] start = 0 depth = 0 for idx, char in enumerate(operands): if char in "({": depth += 1 elif char in ")}" and depth: depth -= 1 elif char == "," and depth == 0: parts.append(operands[start:idx].strip()) start = idx + 1 parts.append(operands[start:].strip()) return [part for part in parts if part] def _format_operand(operand: str, size: str, *, lvalue: bool = False) -> str: op = _replace_h_literals(operand.strip()) if op.startswith("#"): return op[1:] if op.startswith("@(") and op.endswith(")"): inner = op[2:-1] pieces = split_operands(inner) if len(pieces) == 2: disp, reg = pieces offset = f"{reg} - {disp[1:]}" if disp.startswith("-") else f"{reg} + {disp}" return f"{_mem_name(size)}[{offset}]" if re.fullmatch(r"@-R[0-7]", op): return f"{_mem_name(size)}[--{op[2:]}]" if re.fullmatch(r"@R[0-7]\+", op): return f"{_mem_name(size)}[{op[1:-1]}++]" if re.fullmatch(r"@R[0-7]", op): return f"{_mem_name(size)}[{op[1:]}]" if op.startswith("@BR:"): return f"{_mem_name(size)}[(BR << 8) | {op[4:]}]" if op.startswith("@0x"): return f"{_mem_name(size)}[{op[1:]}]" if op.startswith("@"): return c_identifier(op[1:]) if op.startswith("{") and op.endswith("}"): return _register_list_argument(op) if re.fullmatch(r"loc_[0-9A-Fa-f]{4}", op): return c_identifier(op) if re.fullmatch(r"[A-Za-z_][A-Za-z0-9_/\?]*", op): return c_identifier(op) return op def _cast(expr: str, size: str) -> str: if size == "B": return f"(uint8_t)({expr})" if size == "W": return f"(uint16_t)({expr})" return expr def _line_comment(ins: JsonObject, opts: PseudocodeOptions) -> str: parts: list[str] = [] if opts.include_addresses: parts.append(f"{int(ins['address']):04X}") if opts.include_asm: text = str(ins.get("text") or _instruction_text(ins)) parts.append(text) comment = str(ins.get("comment") or "").strip() if comment: parts.append(comment) parts.extend(_metadata_comments(ins)) if opts.include_cycles and ins.get("cycles"): parts.append(_cycle_summary(ins["cycles"])) if not parts: return "" return " /* " + "; ".join(_sanitize_comment(part) for part in parts) + " */" def _metadata_comments(ins: JsonObject) -> list[str]: comments: list[str] = [] sci = ins.get("sci") if isinstance(sci, dict): for inference in sci.get("inferences", []): if isinstance(inference, dict) and inference.get("comment"): comments.append(str(inference["comment"])) for event in ins.get("sci_protocol", []): if isinstance(event, dict) and event.get("comment"): comments.append(str(event["comment"])) for item in ins.get("serial_reconstruction", []): if isinstance(item, dict) and item.get("comment"): comments.append(str(item["comment"])) if is_byte_immediate_to_word_destination(ins): comments.append("byte immediate zero-extended into word destination") board_profile = ins.get("board_profile") if isinstance(board_profile, dict) and board_profile.get("comment"): comments.append(str(board_profile["comment"])) indirect = ins.get("indirect_flow") if isinstance(indirect, dict) and indirect.get("summary"): comments.append(str(indirect["summary"])) lcd_text = ins.get("lcd_text") if isinstance(lcd_text, dict) and lcd_text.get("comment"): comments.append(str(lcd_text["comment"])) for lcd_item in ins.get("lcd_driver", []): if isinstance(lcd_item, dict) and lcd_item.get("summary"): comments.append(str(lcd_item["summary"])) dataflow = ins.get("dataflow") if isinstance(dataflow, dict): changes = dataflow.get("changes") if isinstance(changes, list): known_changes = [_dataflow_change_comment(change) for change in changes if isinstance(change, dict)] known_changes = [change for change in known_changes if change] if known_changes: suffix = " ..." if len(known_changes) > 4 else "" comments.append("dataflow " + ", ".join(known_changes[:4]) + suffix) refs = [] for ref in ins.get("references", []): if not isinstance(ref, dict): continue symbol = ref.get("symbol") or ref.get("name") if symbol: refs.append(str(symbol)) if refs: comments.append("refs " + ", ".join(refs)) for access in ins.get("peripheral_access", []): if not isinstance(access, dict): continue register = access.get("register") direction = access.get("direction") size = access.get("size") byte = access.get("byte") if register and direction: comments.append(f"{register} {size} {direction} {byte} TEMP access") return comments def _dataflow_change_comment(change: JsonObject) -> str: after = change.get("after") if not isinstance(after, dict) or not after.get("known"): return "" width = int(after.get("width", 16)) value = int(after["value"]) digits = 2 if width <= 8 else 4 return f"{change['name']}=0x{value:0{digits}X}" def _instruction_text(ins: JsonObject) -> str: mnemonic = str(ins.get("mnemonic", "")) operands = str(ins.get("operands", "")) return f"{mnemonic} {operands}".strip() def _cycle_summary(cycles: JsonObject) -> str: if "cycles" in cycles: return f"cycles={cycles['cycles']}" if "not_taken" in cycles and "taken" in cycles: return f"cycles={cycles['not_taken']}/{cycles['taken']} nt/t" return "cycles=?" def _mnemonic_base(mnemonic: str) -> str: return mnemonic.rsplit(".", 1)[0] if "." in mnemonic else mnemonic def _mnemonic_size(mnemonic: str) -> str: suffix = mnemonic.rsplit(".", 1)[-1] if "." in mnemonic else "" if suffix in {"B", "W"}: return suffix if mnemonic.startswith("CMP:I"): return "W" if mnemonic.startswith("CMP:E"): return "B" return "" def _size_bits(size: str) -> int: return 16 if size == "W" else 8 def _mem_name(size: str) -> str: return "MEM16" if size == "W" else "MEM8" def _helper_name(base: str, size: str) -> str: suffix = {"B": "8", "W": "16"}.get(size, "") return f"{_safe_token(base)}{suffix}" def _register_list_argument(operand: str) -> str: inner = operand.strip().strip("{}") regs = [c_identifier(part.strip()) for part in inner.split(",") if part.strip()] return ", ".join(regs) if regs else "/* empty */" def _replace_h_literals(text: str) -> str: return re.sub(r"H'([0-9A-Fa-f]+)", lambda match: "0x" + match.group(1).upper(), text) def c_identifier(name: str) -> str: cleaned = re.sub(r"[^0-9A-Za-z_]", "_", name.strip()) cleaned = re.sub(r"_+", "_", cleaned).strip("_") if not cleaned: cleaned = "unnamed" if cleaned[0].isdigit(): cleaned = "_" + cleaned return cleaned def _safe_token(text: str) -> str: return c_identifier(text).lower() def _label_for(address: int) -> str: return f"loc_{address:04X}" def _quoted(text: str) -> str: return json.dumps(text) def _sanitize_comment(text: str) -> str: return str(text).replace("*/", "* /").replace("\r", " ").replace("\n", " ")