1
0

further digging and basic emulator

This commit is contained in:
Aiden
2026-05-25 17:42:58 +10:00
parent 07f48c76e0
commit b264037e82
11 changed files with 1628 additions and 3 deletions

View File

@@ -0,0 +1,548 @@
from __future__ import annotations
import argparse
import json
import re
from collections.abc import Iterable, Mapping
from pathlib import Path
from typing import Any
from .formatting import h16, label_for
JsonObject = dict[str, Any]
DEFAULT_INPUT = Path("build/rom_decompiled.json")
DEFAULT_TEXT_OUTPUT = Path("build/rom_report_sources.txt")
DEFAULT_JSON_OUTPUT = Path("build/rom_report_sources.json")
QUEUE_FUNCTION = 0x3E54
REPORT_INDEX_OF_INTEREST = 0x0007
_LOGICAL_TABLE_OFFSETS = {
0x2000: "primary_value_table_candidate",
0x1C00: "secondary_value_table_candidate",
0x1800: "current_value_table_candidate",
0x1400: "flag_table_candidate",
}
def load_report_source_input(path: Path) -> JsonObject:
with path.open("r", encoding="utf-8") as handle:
payload = json.load(handle)
if not isinstance(payload, dict) or "instructions" not in payload:
raise ValueError(f"{path} does not look like h8536_decompiler JSON output")
return payload
def analyze_report_sources(
payload: Mapping[str, Any],
*,
target: int = QUEUE_FUNCTION,
report_index: int = REPORT_INDEX_OF_INTEREST,
window: int = 16,
) -> JsonObject:
instructions = _instruction_sequence(payload.get("instructions"))
functions = _function_ranges(payload)
calls = [
_analyze_call(instructions, index, functions, target, report_index, window)
for index, ins in enumerate(instructions)
if _is_direct_call_to(ins, target)
]
direct_hits = [
call
for call in calls
if call["r2"].get("bit7") is True
and call["r3"].get("classification") == "constant"
and call["r3"].get("value") == report_index
]
dynamic_candidates = [
call
for call in calls
if call["r2"].get("bit7") is not False
and call["r3"].get("classification") in {"dynamic/table-derived", "unknown"}
]
if direct_hits:
conclusion = (
f"At least one direct loc_3E54 caller statically loads report index {report_index:#06x} "
"with R2.bit7 set before the queue call."
)
status = "direct_static_hit"
else:
conclusion = (
f"No direct loc_3E54 caller in this JSON statically loads report index {report_index:#06x}. "
f"{report_index:#06x} remains an observed runtime/capture value unless another indirect "
"or table-dispatch path is proven."
)
status = "not_statically_proven"
return {
"kind": "report_source_trace",
"queue_function": target,
"queue_function_hex": h16(target),
"report_index_of_interest": report_index,
"report_index_of_interest_hex": f"0x{report_index:04X}",
"summary": {
"direct_call_count": len(calls),
"direct_static_hit_count": len(direct_hits),
"dynamic_or_unknown_candidate_count": len(dynamic_candidates),
"status": status,
"conclusion": conclusion,
},
"calls": calls,
"caveats": [
"This is a bounded local static trace, not an emulator run.",
"R3 values classified as dynamic/table-derived may still become 0x0007 at runtime.",
"Indirect dispatch, table handlers, interrupt interleavings, or callers absent from the JSON may still enqueue 0x0007.",
"The generic queue-to-TX path only emits queued entries; this tracer looks for direct report-index sources at loc_3E54 callers.",
],
}
def format_text_report(analysis: Mapping[str, Any]) -> str:
summary = analysis.get("summary", {})
lines = [
"H8/536 loc_3E54 Report Source Trace",
"",
f"Queue function: {analysis.get('queue_function_hex', h16(QUEUE_FUNCTION))}",
f"Report index of interest: {analysis.get('report_index_of_interest_hex', '0x0007')}",
f"Direct callers: {summary.get('direct_call_count', 0)}",
f"Direct static 0x0007 hits: {summary.get('direct_static_hit_count', 0)}",
f"Dynamic/unknown candidates: {summary.get('dynamic_or_unknown_candidate_count', 0)}",
"",
f"Conclusion: {summary.get('conclusion', '')}",
"",
"Call sites:",
]
for call in analysis.get("calls", []):
if not isinstance(call, Mapping):
continue
r2 = call.get("r2", {}) if isinstance(call.get("r2"), Mapping) else {}
r3 = call.get("r3", {}) if isinstance(call.get("r3"), Mapping) else {}
r3_value = r3.get("value_hex") or "<dynamic>"
lines.append(
f"- {call.get('address_hex')} in {call.get('function_label')}: "
f"R2.bit7={_format_bit(r2.get('bit7'))}, "
f"R3={r3_value} ({r3.get('classification', 'unknown')}); "
f"direct_0x0007={call.get('can_directly_enqueue_report_index')}"
)
for source_name, source in (("R2", r2), ("R3", r3)):
evidence = source.get("evidence") if isinstance(source, Mapping) else None
if not isinstance(evidence, Mapping):
continue
text = evidence.get("instruction")
if text:
lines.append(f" {source_name} evidence: {evidence.get('address_hex')} {text}")
table_hints = call.get("table_hints")
if isinstance(table_hints, list) and table_hints:
hints = ", ".join(
f"{item.get('address_hex')} {item.get('table')} via {item.get('operand')}"
for item in table_hints[:4]
if isinstance(item, Mapping)
)
lines.append(f" table/context hints: {hints}")
lines.extend(["", "Caveats:"])
for caveat in analysis.get("caveats", []):
lines.append(f"- {caveat}")
return "\n".join(lines).rstrip() + "\n"
def write_report_sources(input_path: Path, output_path: Path, *, as_json: bool = False) -> JsonObject:
analysis = analyze_report_sources(load_report_source_input(input_path))
output_path.parent.mkdir(parents=True, exist_ok=True)
if as_json:
output_path.write_text(json.dumps(analysis, indent=2, sort_keys=True) + "\n", encoding="utf-8")
else:
output_path.write_text(format_text_report(analysis), encoding="utf-8")
return analysis
def main(argv: list[str] | None = None, stdout: Any | None = None) -> int:
parser = argparse.ArgumentParser(
description="Trace direct loc_3E54 report queue callers and their R2/R3 sources.",
)
parser.add_argument(
"input",
nargs="?",
type=Path,
default=DEFAULT_INPUT,
help="structured JSON emitted by h8536_decompiler.py",
)
parser.add_argument("--json", action="store_true", help="emit structured JSON instead of readable text")
parser.add_argument("--out", type=Path, default=None, help="write report to this path")
parser.add_argument("--window", type=int, default=16, help="bounded backward instruction window per call")
args = parser.parse_args(argv)
stream = stdout
if stream is None:
import sys
stream = sys.stdout
analysis = analyze_report_sources(load_report_source_input(args.input), window=args.window)
rendered = json.dumps(analysis, indent=2, sort_keys=True) + "\n" if args.json else format_text_report(analysis)
if args.out:
args.out.parent.mkdir(parents=True, exist_ok=True)
args.out.write_text(rendered, encoding="utf-8")
print(f"wrote {args.out}", file=stream)
else:
print(rendered, end="", file=stream)
return 0
def _analyze_call(
instructions: list[JsonObject],
call_index: int,
functions: list[JsonObject],
target: int,
report_index: int,
window: int,
) -> JsonObject:
call = instructions[call_index]
address = int(call["address"])
function = _function_for_address(functions, address)
local_window = _local_window(instructions, call_index, function, window)
r2 = _resolve_register(local_window, "R2", call, width=8)
r3 = _resolve_register(local_window, "R3", call, width=16)
table_hints = _table_hints(local_window)
can_enqueue = (
r2.get("bit7") is True
and r3.get("classification") == "constant"
and r3.get("value") == report_index
)
return {
"address": address,
"address_hex": h16(address),
"instruction": _instruction_text(call),
"target": target,
"target_hex": h16(target),
"function_start": function.get("start") if function else None,
"function_start_hex": h16(int(function["start"])) if function else None,
"function_label": function.get("label") if function else label_for(address),
"dataflow_block": _dataflow_block(call),
"window_instruction_count": len(local_window),
"window_start": int(local_window[0]["address"]) if local_window else address,
"window_start_hex": h16(int(local_window[0]["address"])) if local_window else h16(address),
"r2": r2,
"r3": r3,
"table_hints": table_hints,
"can_directly_enqueue_report_index": can_enqueue,
"assessment": _call_assessment(r2, r3, report_index),
}
def _resolve_register(window: list[Mapping[str, Any]], register: str, call: Mapping[str, Any], *, width: int) -> JsonObject:
evidence = _trace_register(window, register, seen=set(), width=width)
if evidence is None:
evidence = _dataflow_before(call, register)
if evidence is None:
return {
"register": register,
"classification": "unknown",
"value": None,
"value_hex": None,
"bit7": None if register != "R2" else "unknown",
"evidence": None,
}
if evidence.get("classification") == "constant" and isinstance(evidence.get("value"), int):
value = int(evidence["value"]) & ((1 << width) - 1)
evidence["value"] = value
evidence["value_hex"] = f"0x{value:04X}" if width > 8 else f"0x{value:02X}"
if register == "R2":
evidence["bit7"] = bool(value & 0x80)
elif register == "R2":
evidence["bit7"] = None
evidence["register"] = register
return evidence
def _trace_register(
window: list[Mapping[str, Any]],
register: str,
*,
seen: set[str],
width: int,
) -> JsonObject | None:
register = register.upper()
if register in seen:
return None
seen.add(register)
for ins in reversed(window):
source, destination = _source_destination_operands(str(ins.get("operands", "")))
mnemonic = _mnemonic_root(str(ins.get("mnemonic", "")))
if destination.upper() != register:
if _mutates_register(ins, register):
return _source_record("dynamic/table-derived", ins, reason=f"{mnemonic} mutates {register}")
continue
immediate = _parse_immediate(source)
if mnemonic.startswith("MOV") and immediate is not None:
return _source_record("constant", ins, value=immediate & ((1 << width) - 1), reason="immediate load")
source_register = _register_operand(source)
if mnemonic.startswith("MOV") and source_register:
nested = _trace_register(window[: window.index(ins)], source_register, seen=seen, width=width)
if nested is not None:
nested = dict(nested)
nested["via"] = _evidence_record(ins)
return nested
return _source_record("dynamic/table-derived", ins, reason=f"copied from unresolved {source_register}")
if "@" in source or "@" in destination:
classification = "dynamic/table-derived" if _table_operand(source) or _table_operand(destination) else "dynamic/table-derived"
return _source_record(classification, ins, reason="memory/indexed source")
return _source_record("unknown", ins, reason=f"unsupported writer {mnemonic}")
return None
def _dataflow_before(call: Mapping[str, Any], register: str) -> JsonObject | None:
dataflow = call.get("dataflow")
if not isinstance(dataflow, Mapping):
return None
changes = dataflow.get("changes")
if not isinstance(changes, list):
return None
for change in changes:
if not isinstance(change, Mapping) or change.get("kind") != "register" or str(change.get("name", "")).upper() != register:
continue
before = change.get("before")
if isinstance(before, Mapping) and before.get("known") is True and isinstance(before.get("value"), int):
return {
"classification": "constant",
"value": int(before["value"]),
"value_hex": before.get("hex"),
"reason": "decompiler dataflow before call",
"evidence": {
"address": call.get("address"),
"address_hex": h16(int(call["address"])) if isinstance(call.get("address"), int) else None,
"instruction": before.get("source"),
},
}
if isinstance(before, Mapping) and before.get("known") is False:
return {
"classification": "unknown",
"value": None,
"value_hex": None,
"reason": f"decompiler dataflow: {before.get('reason', 'unknown')}",
"evidence": None,
}
return None
def _call_assessment(r2: Mapping[str, Any], r3: Mapping[str, Any], report_index: int) -> str:
if r2.get("bit7") is False:
return "R2.bit7 appears clear, so loc_3E54 would not enqueue on this local evidence."
if r3.get("classification") == "constant":
if r3.get("value") == report_index:
return f"Direct static enqueue source for {report_index:#06x}."
return f"Direct static enqueue source for {int(r3.get('value', 0)):#06x}, not {report_index:#06x}."
return f"No static {report_index:#06x} constant here; R3 is {r3.get('classification', 'unknown')}."
def _local_window(
instructions: list[JsonObject],
call_index: int,
function: Mapping[str, Any] | None,
window: int,
) -> list[JsonObject]:
call = instructions[call_index]
call_block = _dataflow_block(call)
selected: list[JsonObject] = []
for prior in reversed(instructions[:call_index]):
if len(selected) >= window:
break
address = int(prior["address"])
if function and not (int(function["start"]) <= address <= int(function["end"])):
break
prior_block = _dataflow_block(prior)
if call_block is not None and prior_block is not None and prior_block != call_block:
continue
selected.append(prior)
return list(reversed(selected))
def _table_hints(window: Iterable[Mapping[str, Any]]) -> list[JsonObject]:
hints: list[JsonObject] = []
for ins in window:
operands = str(ins.get("operands", ""))
for operand, table in _table_operands(operands):
hints.append(
{
"address": int(ins["address"]),
"address_hex": h16(int(ins["address"])),
"instruction": _instruction_text(ins),
"operand": operand,
"table": table,
}
)
return hints
def _table_operand(operand: str) -> bool:
return bool(_table_operands(operand))
def _table_operands(operands: str) -> list[tuple[str, str]]:
matches: list[tuple[str, str]] = []
for match in re.finditer(r"@\(-H'([0-9A-Fa-f]+),\s*(R[0-7])\)", operands):
offset = int(match.group(1), 16) & 0xFFFF
table = _LOGICAL_TABLE_OFFSETS.get(offset)
if table:
matches.append((match.group(0), table))
return matches
def _source_record(
classification: str,
ins: Mapping[str, Any],
*,
value: int | None = None,
reason: str,
) -> JsonObject:
return {
"classification": classification,
"value": value,
"value_hex": f"0x{value:04X}" if value is not None else None,
"reason": reason,
"evidence": _evidence_record(ins),
}
def _evidence_record(ins: Mapping[str, Any]) -> JsonObject:
address = ins.get("address")
return {
"address": address,
"address_hex": h16(int(address)) if isinstance(address, int) else None,
"instruction": _instruction_text(ins),
"mnemonic": ins.get("mnemonic"),
"operands": ins.get("operands"),
}
def _is_direct_call_to(ins: Mapping[str, Any], target: int) -> bool:
mnemonic = _mnemonic_root(str(ins.get("mnemonic", "")))
return mnemonic in {"BSR", "JSR"} and target in _targets(ins)
def _instruction_sequence(value: object) -> list[JsonObject]:
if isinstance(value, Mapping):
values: Iterable[Any] = value.values()
elif isinstance(value, list):
values = value
else:
values = []
return sorted(
[item for item in values if isinstance(item, dict) and isinstance(item.get("address"), int)],
key=lambda item: int(item["address"]),
)
def _function_ranges(payload: Mapping[str, Any]) -> list[JsonObject]:
call_graph = payload.get("call_graph")
nodes = call_graph.get("nodes") if isinstance(call_graph, Mapping) else None
if not isinstance(nodes, list):
return []
ranges: list[JsonObject] = []
for node in nodes:
if not isinstance(node, Mapping):
continue
start = node.get("start")
end = node.get("end")
if isinstance(start, int) and isinstance(end, int):
ranges.append({"start": start, "end": end, "label": str(node.get("label") or label_for(start))})
return sorted(ranges, key=lambda item: int(item["start"]))
def _function_for_address(functions: list[JsonObject], address: int) -> JsonObject | None:
for function in functions:
if int(function["start"]) <= address <= int(function["end"]):
return function
return None
def _dataflow_block(ins: Mapping[str, Any]) -> int | None:
dataflow = ins.get("dataflow")
if isinstance(dataflow, Mapping) and isinstance(dataflow.get("block"), int):
return int(dataflow["block"])
return None
def _targets(ins: Mapping[str, Any]) -> list[int]:
targets = ins.get("targets", [])
return [int(target) for target in targets if isinstance(target, int)] if isinstance(targets, list) else []
def _instruction_text(ins: Mapping[str, Any]) -> str:
text = ins.get("text")
if isinstance(text, str) and text:
return text
operands = str(ins.get("operands", ""))
return f"{ins.get('mnemonic', '')} {operands}".strip()
def _source_destination_operands(operands: str) -> tuple[str, str]:
depth = 0
split_at: int | None = None
for index, char in enumerate(operands):
if char in "({":
depth += 1
elif char in ")}" and depth:
depth -= 1
elif char == "," and depth == 0:
split_at = index
if split_at is None:
operand = operands.strip()
return "", operand
return operands[:split_at].strip(), operands[split_at + 1 :].strip()
def _parse_immediate(operand: str) -> int | None:
text = operand.strip()
if text.startswith("#"):
text = text[1:].strip()
try:
if text.upper().startswith("H'"):
return int(text[2:], 16) & 0xFFFF
if text.upper().startswith("0X"):
return int(text, 16) & 0xFFFF
if text.startswith("$"):
return int(text[1:], 16) & 0xFFFF
return int(text, 10) & 0xFFFF
except ValueError:
return None
def _register_operand(operand: str) -> str | None:
text = operand.strip().upper()
return text if re.fullmatch(r"R[0-7]", text) else None
def _mutates_register(ins: Mapping[str, Any], register: str) -> bool:
mnemonic = _mnemonic_root(str(ins.get("mnemonic", "")))
source, destination = _source_destination_operands(str(ins.get("operands", "")))
if destination.upper() != register.upper():
return False
return not mnemonic.startswith("MOV")
def _mnemonic_root(mnemonic: str) -> str:
return mnemonic.rsplit(".", 1)[0].upper()
def _format_bit(value: Any) -> str:
if value is True:
return "set"
if value is False:
return "clear"
return "unknown"
__all__ = [
"analyze_report_sources",
"format_text_report",
"load_report_source_input",
"main",
"write_report_sources",
]
if __name__ == "__main__":
raise SystemExit(main())