from __future__ import annotations import argparse import json import re from collections.abc import Iterable, Mapping from pathlib import Path from typing import Any from .formatting import h16, label_for JsonObject = dict[str, Any] DEFAULT_INPUT = Path("build/rom_decompiled.json") DEFAULT_TEXT_OUTPUT = Path("build/rom_report_sources.txt") DEFAULT_JSON_OUTPUT = Path("build/rom_report_sources.json") QUEUE_FUNCTION = 0x3E54 REPORT_INDEX_OF_INTEREST = 0x0007 _LOGICAL_TABLE_OFFSETS = { 0x2000: "primary_value_table_candidate", 0x1C00: "secondary_value_table_candidate", 0x1800: "current_value_table_candidate", 0x1400: "flag_table_candidate", } def load_report_source_input(path: Path) -> JsonObject: with path.open("r", encoding="utf-8") as handle: payload = json.load(handle) if not isinstance(payload, dict) or "instructions" not in payload: raise ValueError(f"{path} does not look like h8536_decompiler JSON output") return payload def analyze_report_sources( payload: Mapping[str, Any], *, target: int = QUEUE_FUNCTION, report_index: int = REPORT_INDEX_OF_INTEREST, window: int = 16, ) -> JsonObject: instructions = _instruction_sequence(payload.get("instructions")) functions = _function_ranges(payload) calls = [ _analyze_call(instructions, index, functions, target, report_index, window) for index, ins in enumerate(instructions) if _is_direct_call_to(ins, target) ] direct_hits = [ call for call in calls if call["r2"].get("bit7") is True and call["r3"].get("classification") == "constant" and call["r3"].get("value") == report_index ] dynamic_candidates = [ call for call in calls if call["r2"].get("bit7") is not False and call["r3"].get("classification") in {"dynamic/table-derived", "unknown"} ] if direct_hits: conclusion = ( f"At least one direct loc_3E54 caller statically loads report index {report_index:#06x} " "with R2.bit7 set before the queue call." ) status = "direct_static_hit" else: conclusion = ( f"No direct loc_3E54 caller in this JSON statically loads report index {report_index:#06x}. " f"{report_index:#06x} remains an observed runtime/capture value unless another indirect " "or table-dispatch path is proven." ) status = "not_statically_proven" return { "kind": "report_source_trace", "queue_function": target, "queue_function_hex": h16(target), "report_index_of_interest": report_index, "report_index_of_interest_hex": f"0x{report_index:04X}", "summary": { "direct_call_count": len(calls), "direct_static_hit_count": len(direct_hits), "dynamic_or_unknown_candidate_count": len(dynamic_candidates), "status": status, "conclusion": conclusion, }, "calls": calls, "caveats": [ "This is a bounded local static trace, not an emulator run.", "R3 values classified as dynamic/table-derived may still become 0x0007 at runtime.", "Indirect dispatch, table handlers, interrupt interleavings, or callers absent from the JSON may still enqueue 0x0007.", "The generic queue-to-TX path only emits queued entries; this tracer looks for direct report-index sources at loc_3E54 callers.", ], } def format_text_report(analysis: Mapping[str, Any]) -> str: summary = analysis.get("summary", {}) lines = [ "H8/536 loc_3E54 Report Source Trace", "", f"Queue function: {analysis.get('queue_function_hex', h16(QUEUE_FUNCTION))}", f"Report index of interest: {analysis.get('report_index_of_interest_hex', '0x0007')}", f"Direct callers: {summary.get('direct_call_count', 0)}", f"Direct static 0x0007 hits: {summary.get('direct_static_hit_count', 0)}", f"Dynamic/unknown candidates: {summary.get('dynamic_or_unknown_candidate_count', 0)}", "", f"Conclusion: {summary.get('conclusion', '')}", "", "Call sites:", ] for call in analysis.get("calls", []): if not isinstance(call, Mapping): continue r2 = call.get("r2", {}) if isinstance(call.get("r2"), Mapping) else {} r3 = call.get("r3", {}) if isinstance(call.get("r3"), Mapping) else {} r3_value = r3.get("value_hex") or "" lines.append( f"- {call.get('address_hex')} in {call.get('function_label')}: " f"R2.bit7={_format_bit(r2.get('bit7'))}, " f"R3={r3_value} ({r3.get('classification', 'unknown')}); " f"direct_0x0007={call.get('can_directly_enqueue_report_index')}" ) for source_name, source in (("R2", r2), ("R3", r3)): evidence = source.get("evidence") if isinstance(source, Mapping) else None if not isinstance(evidence, Mapping): continue text = evidence.get("instruction") if text: lines.append(f" {source_name} evidence: {evidence.get('address_hex')} {text}") table_hints = call.get("table_hints") if isinstance(table_hints, list) and table_hints: hints = ", ".join( f"{item.get('address_hex')} {item.get('table')} via {item.get('operand')}" for item in table_hints[:4] if isinstance(item, Mapping) ) lines.append(f" table/context hints: {hints}") lines.extend(["", "Caveats:"]) for caveat in analysis.get("caveats", []): lines.append(f"- {caveat}") return "\n".join(lines).rstrip() + "\n" def write_report_sources(input_path: Path, output_path: Path, *, as_json: bool = False) -> JsonObject: analysis = analyze_report_sources(load_report_source_input(input_path)) output_path.parent.mkdir(parents=True, exist_ok=True) if as_json: output_path.write_text(json.dumps(analysis, indent=2, sort_keys=True) + "\n", encoding="utf-8") else: output_path.write_text(format_text_report(analysis), encoding="utf-8") return analysis def main(argv: list[str] | None = None, stdout: Any | None = None) -> int: parser = argparse.ArgumentParser( description="Trace direct loc_3E54 report queue callers and their R2/R3 sources.", ) parser.add_argument( "input", nargs="?", type=Path, default=DEFAULT_INPUT, help="structured JSON emitted by h8536_decompiler.py", ) parser.add_argument("--json", action="store_true", help="emit structured JSON instead of readable text") parser.add_argument("--out", type=Path, default=None, help="write report to this path") parser.add_argument("--window", type=int, default=16, help="bounded backward instruction window per call") args = parser.parse_args(argv) stream = stdout if stream is None: import sys stream = sys.stdout analysis = analyze_report_sources(load_report_source_input(args.input), window=args.window) rendered = json.dumps(analysis, indent=2, sort_keys=True) + "\n" if args.json else format_text_report(analysis) if args.out: args.out.parent.mkdir(parents=True, exist_ok=True) args.out.write_text(rendered, encoding="utf-8") print(f"wrote {args.out}", file=stream) else: print(rendered, end="", file=stream) return 0 def _analyze_call( instructions: list[JsonObject], call_index: int, functions: list[JsonObject], target: int, report_index: int, window: int, ) -> JsonObject: call = instructions[call_index] address = int(call["address"]) function = _function_for_address(functions, address) local_window = _local_window(instructions, call_index, function, window) r2 = _resolve_register(local_window, "R2", call, width=8) r3 = _resolve_register(local_window, "R3", call, width=16) table_hints = _table_hints(local_window) can_enqueue = ( r2.get("bit7") is True and r3.get("classification") == "constant" and r3.get("value") == report_index ) return { "address": address, "address_hex": h16(address), "instruction": _instruction_text(call), "target": target, "target_hex": h16(target), "function_start": function.get("start") if function else None, "function_start_hex": h16(int(function["start"])) if function else None, "function_label": function.get("label") if function else label_for(address), "dataflow_block": _dataflow_block(call), "window_instruction_count": len(local_window), "window_start": int(local_window[0]["address"]) if local_window else address, "window_start_hex": h16(int(local_window[0]["address"])) if local_window else h16(address), "r2": r2, "r3": r3, "table_hints": table_hints, "can_directly_enqueue_report_index": can_enqueue, "assessment": _call_assessment(r2, r3, report_index), } def _resolve_register(window: list[Mapping[str, Any]], register: str, call: Mapping[str, Any], *, width: int) -> JsonObject: evidence = _trace_register(window, register, seen=set(), width=width) if evidence is None: evidence = _dataflow_before(call, register) if evidence is None: return { "register": register, "classification": "unknown", "value": None, "value_hex": None, "bit7": None if register != "R2" else "unknown", "evidence": None, } if evidence.get("classification") == "constant" and isinstance(evidence.get("value"), int): value = int(evidence["value"]) & ((1 << width) - 1) evidence["value"] = value evidence["value_hex"] = f"0x{value:04X}" if width > 8 else f"0x{value:02X}" if register == "R2": evidence["bit7"] = bool(value & 0x80) elif register == "R2": evidence["bit7"] = None evidence["register"] = register return evidence def _trace_register( window: list[Mapping[str, Any]], register: str, *, seen: set[str], width: int, ) -> JsonObject | None: register = register.upper() if register in seen: return None seen.add(register) for ins in reversed(window): source, destination = _source_destination_operands(str(ins.get("operands", ""))) mnemonic = _mnemonic_root(str(ins.get("mnemonic", ""))) if destination.upper() != register: if _mutates_register(ins, register): return _source_record("dynamic/table-derived", ins, reason=f"{mnemonic} mutates {register}") continue immediate = _parse_immediate(source) if mnemonic.startswith("MOV") and immediate is not None: return _source_record("constant", ins, value=immediate & ((1 << width) - 1), reason="immediate load") source_register = _register_operand(source) if mnemonic.startswith("MOV") and source_register: nested = _trace_register(window[: window.index(ins)], source_register, seen=seen, width=width) if nested is not None: nested = dict(nested) nested["via"] = _evidence_record(ins) return nested return _source_record("dynamic/table-derived", ins, reason=f"copied from unresolved {source_register}") if "@" in source or "@" in destination: classification = "dynamic/table-derived" if _table_operand(source) or _table_operand(destination) else "dynamic/table-derived" return _source_record(classification, ins, reason="memory/indexed source") return _source_record("unknown", ins, reason=f"unsupported writer {mnemonic}") return None def _dataflow_before(call: Mapping[str, Any], register: str) -> JsonObject | None: dataflow = call.get("dataflow") if not isinstance(dataflow, Mapping): return None changes = dataflow.get("changes") if not isinstance(changes, list): return None for change in changes: if not isinstance(change, Mapping) or change.get("kind") != "register" or str(change.get("name", "")).upper() != register: continue before = change.get("before") if isinstance(before, Mapping) and before.get("known") is True and isinstance(before.get("value"), int): return { "classification": "constant", "value": int(before["value"]), "value_hex": before.get("hex"), "reason": "decompiler dataflow before call", "evidence": { "address": call.get("address"), "address_hex": h16(int(call["address"])) if isinstance(call.get("address"), int) else None, "instruction": before.get("source"), }, } if isinstance(before, Mapping) and before.get("known") is False: return { "classification": "unknown", "value": None, "value_hex": None, "reason": f"decompiler dataflow: {before.get('reason', 'unknown')}", "evidence": None, } return None def _call_assessment(r2: Mapping[str, Any], r3: Mapping[str, Any], report_index: int) -> str: if r2.get("bit7") is False: return "R2.bit7 appears clear, so loc_3E54 would not enqueue on this local evidence." if r3.get("classification") == "constant": if r3.get("value") == report_index: return f"Direct static enqueue source for {report_index:#06x}." return f"Direct static enqueue source for {int(r3.get('value', 0)):#06x}, not {report_index:#06x}." return f"No static {report_index:#06x} constant here; R3 is {r3.get('classification', 'unknown')}." def _local_window( instructions: list[JsonObject], call_index: int, function: Mapping[str, Any] | None, window: int, ) -> list[JsonObject]: call = instructions[call_index] call_block = _dataflow_block(call) selected: list[JsonObject] = [] for prior in reversed(instructions[:call_index]): if len(selected) >= window: break address = int(prior["address"]) if function and not (int(function["start"]) <= address <= int(function["end"])): break prior_block = _dataflow_block(prior) if call_block is not None and prior_block is not None and prior_block != call_block: continue selected.append(prior) return list(reversed(selected)) def _table_hints(window: Iterable[Mapping[str, Any]]) -> list[JsonObject]: hints: list[JsonObject] = [] for ins in window: operands = str(ins.get("operands", "")) for operand, table in _table_operands(operands): hints.append( { "address": int(ins["address"]), "address_hex": h16(int(ins["address"])), "instruction": _instruction_text(ins), "operand": operand, "table": table, } ) return hints def _table_operand(operand: str) -> bool: return bool(_table_operands(operand)) def _table_operands(operands: str) -> list[tuple[str, str]]: matches: list[tuple[str, str]] = [] for match in re.finditer(r"@\(-H'([0-9A-Fa-f]+),\s*(R[0-7])\)", operands): offset = int(match.group(1), 16) & 0xFFFF table = _LOGICAL_TABLE_OFFSETS.get(offset) if table: matches.append((match.group(0), table)) return matches def _source_record( classification: str, ins: Mapping[str, Any], *, value: int | None = None, reason: str, ) -> JsonObject: return { "classification": classification, "value": value, "value_hex": f"0x{value:04X}" if value is not None else None, "reason": reason, "evidence": _evidence_record(ins), } def _evidence_record(ins: Mapping[str, Any]) -> JsonObject: address = ins.get("address") return { "address": address, "address_hex": h16(int(address)) if isinstance(address, int) else None, "instruction": _instruction_text(ins), "mnemonic": ins.get("mnemonic"), "operands": ins.get("operands"), } def _is_direct_call_to(ins: Mapping[str, Any], target: int) -> bool: mnemonic = _mnemonic_root(str(ins.get("mnemonic", ""))) return mnemonic in {"BSR", "JSR"} and target in _targets(ins) def _instruction_sequence(value: object) -> list[JsonObject]: if isinstance(value, Mapping): values: Iterable[Any] = value.values() elif isinstance(value, list): values = value else: values = [] return sorted( [item for item in values if isinstance(item, dict) and isinstance(item.get("address"), int)], key=lambda item: int(item["address"]), ) def _function_ranges(payload: Mapping[str, Any]) -> list[JsonObject]: call_graph = payload.get("call_graph") nodes = call_graph.get("nodes") if isinstance(call_graph, Mapping) else None if not isinstance(nodes, list): return [] ranges: list[JsonObject] = [] for node in nodes: if not isinstance(node, Mapping): continue start = node.get("start") end = node.get("end") if isinstance(start, int) and isinstance(end, int): ranges.append({"start": start, "end": end, "label": str(node.get("label") or label_for(start))}) return sorted(ranges, key=lambda item: int(item["start"])) def _function_for_address(functions: list[JsonObject], address: int) -> JsonObject | None: for function in functions: if int(function["start"]) <= address <= int(function["end"]): return function return None def _dataflow_block(ins: Mapping[str, Any]) -> int | None: dataflow = ins.get("dataflow") if isinstance(dataflow, Mapping) and isinstance(dataflow.get("block"), int): return int(dataflow["block"]) return None def _targets(ins: Mapping[str, Any]) -> list[int]: targets = ins.get("targets", []) return [int(target) for target in targets if isinstance(target, int)] if isinstance(targets, list) else [] def _instruction_text(ins: Mapping[str, Any]) -> str: text = ins.get("text") if isinstance(text, str) and text: return text operands = str(ins.get("operands", "")) return f"{ins.get('mnemonic', '')} {operands}".strip() def _source_destination_operands(operands: str) -> tuple[str, str]: depth = 0 split_at: int | None = None for index, char in enumerate(operands): if char in "({": depth += 1 elif char in ")}" and depth: depth -= 1 elif char == "," and depth == 0: split_at = index if split_at is None: operand = operands.strip() return "", operand return operands[:split_at].strip(), operands[split_at + 1 :].strip() def _parse_immediate(operand: str) -> int | None: text = operand.strip() if text.startswith("#"): text = text[1:].strip() try: if text.upper().startswith("H'"): return int(text[2:], 16) & 0xFFFF if text.upper().startswith("0X"): return int(text, 16) & 0xFFFF if text.startswith("$"): return int(text[1:], 16) & 0xFFFF return int(text, 10) & 0xFFFF except ValueError: return None def _register_operand(operand: str) -> str | None: text = operand.strip().upper() return text if re.fullmatch(r"R[0-7]", text) else None def _mutates_register(ins: Mapping[str, Any], register: str) -> bool: mnemonic = _mnemonic_root(str(ins.get("mnemonic", ""))) source, destination = _source_destination_operands(str(ins.get("operands", ""))) if destination.upper() != register.upper(): return False return not mnemonic.startswith("MOV") def _mnemonic_root(mnemonic: str) -> str: return mnemonic.rsplit(".", 1)[0].upper() def _format_bit(value: Any) -> str: if value is True: return "set" if value is False: return "clear" return "unknown" __all__ = [ "analyze_report_sources", "format_text_report", "load_report_source_input", "main", "write_report_sources", ] if __name__ == "__main__": raise SystemExit(main())