549 lines
20 KiB
Python
549 lines
20 KiB
Python
from __future__ import annotations
|
|
|
|
import argparse
|
|
import json
|
|
import re
|
|
from collections.abc import Iterable, Mapping
|
|
from pathlib import Path
|
|
from typing import Any
|
|
|
|
from .formatting import h16, label_for
|
|
|
|
|
|
JsonObject = dict[str, Any]
|
|
|
|
DEFAULT_INPUT = Path("build/rom_decompiled.json")
|
|
DEFAULT_TEXT_OUTPUT = Path("build/rom_report_sources.txt")
|
|
DEFAULT_JSON_OUTPUT = Path("build/rom_report_sources.json")
|
|
QUEUE_FUNCTION = 0x3E54
|
|
REPORT_INDEX_OF_INTEREST = 0x0007
|
|
|
|
_LOGICAL_TABLE_OFFSETS = {
|
|
0x2000: "primary_value_table_candidate",
|
|
0x1C00: "secondary_value_table_candidate",
|
|
0x1800: "current_value_table_candidate",
|
|
0x1400: "flag_table_candidate",
|
|
}
|
|
|
|
|
|
def load_report_source_input(path: Path) -> JsonObject:
|
|
with path.open("r", encoding="utf-8") as handle:
|
|
payload = json.load(handle)
|
|
if not isinstance(payload, dict) or "instructions" not in payload:
|
|
raise ValueError(f"{path} does not look like h8536_decompiler JSON output")
|
|
return payload
|
|
|
|
|
|
def analyze_report_sources(
|
|
payload: Mapping[str, Any],
|
|
*,
|
|
target: int = QUEUE_FUNCTION,
|
|
report_index: int = REPORT_INDEX_OF_INTEREST,
|
|
window: int = 16,
|
|
) -> JsonObject:
|
|
instructions = _instruction_sequence(payload.get("instructions"))
|
|
functions = _function_ranges(payload)
|
|
calls = [
|
|
_analyze_call(instructions, index, functions, target, report_index, window)
|
|
for index, ins in enumerate(instructions)
|
|
if _is_direct_call_to(ins, target)
|
|
]
|
|
direct_hits = [
|
|
call
|
|
for call in calls
|
|
if call["r2"].get("bit7") is True
|
|
and call["r3"].get("classification") == "constant"
|
|
and call["r3"].get("value") == report_index
|
|
]
|
|
dynamic_candidates = [
|
|
call
|
|
for call in calls
|
|
if call["r2"].get("bit7") is not False
|
|
and call["r3"].get("classification") in {"dynamic/table-derived", "unknown"}
|
|
]
|
|
|
|
if direct_hits:
|
|
conclusion = (
|
|
f"At least one direct loc_3E54 caller statically loads report index {report_index:#06x} "
|
|
"with R2.bit7 set before the queue call."
|
|
)
|
|
status = "direct_static_hit"
|
|
else:
|
|
conclusion = (
|
|
f"No direct loc_3E54 caller in this JSON statically loads report index {report_index:#06x}. "
|
|
f"{report_index:#06x} remains an observed runtime/capture value unless another indirect "
|
|
"or table-dispatch path is proven."
|
|
)
|
|
status = "not_statically_proven"
|
|
|
|
return {
|
|
"kind": "report_source_trace",
|
|
"queue_function": target,
|
|
"queue_function_hex": h16(target),
|
|
"report_index_of_interest": report_index,
|
|
"report_index_of_interest_hex": f"0x{report_index:04X}",
|
|
"summary": {
|
|
"direct_call_count": len(calls),
|
|
"direct_static_hit_count": len(direct_hits),
|
|
"dynamic_or_unknown_candidate_count": len(dynamic_candidates),
|
|
"status": status,
|
|
"conclusion": conclusion,
|
|
},
|
|
"calls": calls,
|
|
"caveats": [
|
|
"This is a bounded local static trace, not an emulator run.",
|
|
"R3 values classified as dynamic/table-derived may still become 0x0007 at runtime.",
|
|
"Indirect dispatch, table handlers, interrupt interleavings, or callers absent from the JSON may still enqueue 0x0007.",
|
|
"The generic queue-to-TX path only emits queued entries; this tracer looks for direct report-index sources at loc_3E54 callers.",
|
|
],
|
|
}
|
|
|
|
|
|
def format_text_report(analysis: Mapping[str, Any]) -> str:
|
|
summary = analysis.get("summary", {})
|
|
lines = [
|
|
"H8/536 loc_3E54 Report Source Trace",
|
|
"",
|
|
f"Queue function: {analysis.get('queue_function_hex', h16(QUEUE_FUNCTION))}",
|
|
f"Report index of interest: {analysis.get('report_index_of_interest_hex', '0x0007')}",
|
|
f"Direct callers: {summary.get('direct_call_count', 0)}",
|
|
f"Direct static 0x0007 hits: {summary.get('direct_static_hit_count', 0)}",
|
|
f"Dynamic/unknown candidates: {summary.get('dynamic_or_unknown_candidate_count', 0)}",
|
|
"",
|
|
f"Conclusion: {summary.get('conclusion', '')}",
|
|
"",
|
|
"Call sites:",
|
|
]
|
|
for call in analysis.get("calls", []):
|
|
if not isinstance(call, Mapping):
|
|
continue
|
|
r2 = call.get("r2", {}) if isinstance(call.get("r2"), Mapping) else {}
|
|
r3 = call.get("r3", {}) if isinstance(call.get("r3"), Mapping) else {}
|
|
r3_value = r3.get("value_hex") or "<dynamic>"
|
|
lines.append(
|
|
f"- {call.get('address_hex')} in {call.get('function_label')}: "
|
|
f"R2.bit7={_format_bit(r2.get('bit7'))}, "
|
|
f"R3={r3_value} ({r3.get('classification', 'unknown')}); "
|
|
f"direct_0x0007={call.get('can_directly_enqueue_report_index')}"
|
|
)
|
|
for source_name, source in (("R2", r2), ("R3", r3)):
|
|
evidence = source.get("evidence") if isinstance(source, Mapping) else None
|
|
if not isinstance(evidence, Mapping):
|
|
continue
|
|
text = evidence.get("instruction")
|
|
if text:
|
|
lines.append(f" {source_name} evidence: {evidence.get('address_hex')} {text}")
|
|
table_hints = call.get("table_hints")
|
|
if isinstance(table_hints, list) and table_hints:
|
|
hints = ", ".join(
|
|
f"{item.get('address_hex')} {item.get('table')} via {item.get('operand')}"
|
|
for item in table_hints[:4]
|
|
if isinstance(item, Mapping)
|
|
)
|
|
lines.append(f" table/context hints: {hints}")
|
|
|
|
lines.extend(["", "Caveats:"])
|
|
for caveat in analysis.get("caveats", []):
|
|
lines.append(f"- {caveat}")
|
|
return "\n".join(lines).rstrip() + "\n"
|
|
|
|
|
|
def write_report_sources(input_path: Path, output_path: Path, *, as_json: bool = False) -> JsonObject:
|
|
analysis = analyze_report_sources(load_report_source_input(input_path))
|
|
output_path.parent.mkdir(parents=True, exist_ok=True)
|
|
if as_json:
|
|
output_path.write_text(json.dumps(analysis, indent=2, sort_keys=True) + "\n", encoding="utf-8")
|
|
else:
|
|
output_path.write_text(format_text_report(analysis), encoding="utf-8")
|
|
return analysis
|
|
|
|
|
|
def main(argv: list[str] | None = None, stdout: Any | None = None) -> int:
|
|
parser = argparse.ArgumentParser(
|
|
description="Trace direct loc_3E54 report queue callers and their R2/R3 sources.",
|
|
)
|
|
parser.add_argument(
|
|
"input",
|
|
nargs="?",
|
|
type=Path,
|
|
default=DEFAULT_INPUT,
|
|
help="structured JSON emitted by h8536_decompiler.py",
|
|
)
|
|
parser.add_argument("--json", action="store_true", help="emit structured JSON instead of readable text")
|
|
parser.add_argument("--out", type=Path, default=None, help="write report to this path")
|
|
parser.add_argument("--window", type=int, default=16, help="bounded backward instruction window per call")
|
|
args = parser.parse_args(argv)
|
|
|
|
stream = stdout
|
|
if stream is None:
|
|
import sys
|
|
|
|
stream = sys.stdout
|
|
|
|
analysis = analyze_report_sources(load_report_source_input(args.input), window=args.window)
|
|
rendered = json.dumps(analysis, indent=2, sort_keys=True) + "\n" if args.json else format_text_report(analysis)
|
|
if args.out:
|
|
args.out.parent.mkdir(parents=True, exist_ok=True)
|
|
args.out.write_text(rendered, encoding="utf-8")
|
|
print(f"wrote {args.out}", file=stream)
|
|
else:
|
|
print(rendered, end="", file=stream)
|
|
return 0
|
|
|
|
|
|
def _analyze_call(
|
|
instructions: list[JsonObject],
|
|
call_index: int,
|
|
functions: list[JsonObject],
|
|
target: int,
|
|
report_index: int,
|
|
window: int,
|
|
) -> JsonObject:
|
|
call = instructions[call_index]
|
|
address = int(call["address"])
|
|
function = _function_for_address(functions, address)
|
|
local_window = _local_window(instructions, call_index, function, window)
|
|
r2 = _resolve_register(local_window, "R2", call, width=8)
|
|
r3 = _resolve_register(local_window, "R3", call, width=16)
|
|
table_hints = _table_hints(local_window)
|
|
can_enqueue = (
|
|
r2.get("bit7") is True
|
|
and r3.get("classification") == "constant"
|
|
and r3.get("value") == report_index
|
|
)
|
|
return {
|
|
"address": address,
|
|
"address_hex": h16(address),
|
|
"instruction": _instruction_text(call),
|
|
"target": target,
|
|
"target_hex": h16(target),
|
|
"function_start": function.get("start") if function else None,
|
|
"function_start_hex": h16(int(function["start"])) if function else None,
|
|
"function_label": function.get("label") if function else label_for(address),
|
|
"dataflow_block": _dataflow_block(call),
|
|
"window_instruction_count": len(local_window),
|
|
"window_start": int(local_window[0]["address"]) if local_window else address,
|
|
"window_start_hex": h16(int(local_window[0]["address"])) if local_window else h16(address),
|
|
"r2": r2,
|
|
"r3": r3,
|
|
"table_hints": table_hints,
|
|
"can_directly_enqueue_report_index": can_enqueue,
|
|
"assessment": _call_assessment(r2, r3, report_index),
|
|
}
|
|
|
|
|
|
def _resolve_register(window: list[Mapping[str, Any]], register: str, call: Mapping[str, Any], *, width: int) -> JsonObject:
|
|
evidence = _trace_register(window, register, seen=set(), width=width)
|
|
if evidence is None:
|
|
evidence = _dataflow_before(call, register)
|
|
if evidence is None:
|
|
return {
|
|
"register": register,
|
|
"classification": "unknown",
|
|
"value": None,
|
|
"value_hex": None,
|
|
"bit7": None if register != "R2" else "unknown",
|
|
"evidence": None,
|
|
}
|
|
if evidence.get("classification") == "constant" and isinstance(evidence.get("value"), int):
|
|
value = int(evidence["value"]) & ((1 << width) - 1)
|
|
evidence["value"] = value
|
|
evidence["value_hex"] = f"0x{value:04X}" if width > 8 else f"0x{value:02X}"
|
|
if register == "R2":
|
|
evidence["bit7"] = bool(value & 0x80)
|
|
elif register == "R2":
|
|
evidence["bit7"] = None
|
|
evidence["register"] = register
|
|
return evidence
|
|
|
|
|
|
def _trace_register(
|
|
window: list[Mapping[str, Any]],
|
|
register: str,
|
|
*,
|
|
seen: set[str],
|
|
width: int,
|
|
) -> JsonObject | None:
|
|
register = register.upper()
|
|
if register in seen:
|
|
return None
|
|
seen.add(register)
|
|
for ins in reversed(window):
|
|
source, destination = _source_destination_operands(str(ins.get("operands", "")))
|
|
mnemonic = _mnemonic_root(str(ins.get("mnemonic", "")))
|
|
if destination.upper() != register:
|
|
if _mutates_register(ins, register):
|
|
return _source_record("dynamic/table-derived", ins, reason=f"{mnemonic} mutates {register}")
|
|
continue
|
|
immediate = _parse_immediate(source)
|
|
if mnemonic.startswith("MOV") and immediate is not None:
|
|
return _source_record("constant", ins, value=immediate & ((1 << width) - 1), reason="immediate load")
|
|
source_register = _register_operand(source)
|
|
if mnemonic.startswith("MOV") and source_register:
|
|
nested = _trace_register(window[: window.index(ins)], source_register, seen=seen, width=width)
|
|
if nested is not None:
|
|
nested = dict(nested)
|
|
nested["via"] = _evidence_record(ins)
|
|
return nested
|
|
return _source_record("dynamic/table-derived", ins, reason=f"copied from unresolved {source_register}")
|
|
if "@" in source or "@" in destination:
|
|
classification = "dynamic/table-derived" if _table_operand(source) or _table_operand(destination) else "dynamic/table-derived"
|
|
return _source_record(classification, ins, reason="memory/indexed source")
|
|
return _source_record("unknown", ins, reason=f"unsupported writer {mnemonic}")
|
|
return None
|
|
|
|
|
|
def _dataflow_before(call: Mapping[str, Any], register: str) -> JsonObject | None:
|
|
dataflow = call.get("dataflow")
|
|
if not isinstance(dataflow, Mapping):
|
|
return None
|
|
changes = dataflow.get("changes")
|
|
if not isinstance(changes, list):
|
|
return None
|
|
for change in changes:
|
|
if not isinstance(change, Mapping) or change.get("kind") != "register" or str(change.get("name", "")).upper() != register:
|
|
continue
|
|
before = change.get("before")
|
|
if isinstance(before, Mapping) and before.get("known") is True and isinstance(before.get("value"), int):
|
|
return {
|
|
"classification": "constant",
|
|
"value": int(before["value"]),
|
|
"value_hex": before.get("hex"),
|
|
"reason": "decompiler dataflow before call",
|
|
"evidence": {
|
|
"address": call.get("address"),
|
|
"address_hex": h16(int(call["address"])) if isinstance(call.get("address"), int) else None,
|
|
"instruction": before.get("source"),
|
|
},
|
|
}
|
|
if isinstance(before, Mapping) and before.get("known") is False:
|
|
return {
|
|
"classification": "unknown",
|
|
"value": None,
|
|
"value_hex": None,
|
|
"reason": f"decompiler dataflow: {before.get('reason', 'unknown')}",
|
|
"evidence": None,
|
|
}
|
|
return None
|
|
|
|
|
|
def _call_assessment(r2: Mapping[str, Any], r3: Mapping[str, Any], report_index: int) -> str:
|
|
if r2.get("bit7") is False:
|
|
return "R2.bit7 appears clear, so loc_3E54 would not enqueue on this local evidence."
|
|
if r3.get("classification") == "constant":
|
|
if r3.get("value") == report_index:
|
|
return f"Direct static enqueue source for {report_index:#06x}."
|
|
return f"Direct static enqueue source for {int(r3.get('value', 0)):#06x}, not {report_index:#06x}."
|
|
return f"No static {report_index:#06x} constant here; R3 is {r3.get('classification', 'unknown')}."
|
|
|
|
|
|
def _local_window(
|
|
instructions: list[JsonObject],
|
|
call_index: int,
|
|
function: Mapping[str, Any] | None,
|
|
window: int,
|
|
) -> list[JsonObject]:
|
|
call = instructions[call_index]
|
|
call_block = _dataflow_block(call)
|
|
selected: list[JsonObject] = []
|
|
for prior in reversed(instructions[:call_index]):
|
|
if len(selected) >= window:
|
|
break
|
|
address = int(prior["address"])
|
|
if function and not (int(function["start"]) <= address <= int(function["end"])):
|
|
break
|
|
prior_block = _dataflow_block(prior)
|
|
if call_block is not None and prior_block is not None and prior_block != call_block:
|
|
continue
|
|
selected.append(prior)
|
|
return list(reversed(selected))
|
|
|
|
|
|
def _table_hints(window: Iterable[Mapping[str, Any]]) -> list[JsonObject]:
|
|
hints: list[JsonObject] = []
|
|
for ins in window:
|
|
operands = str(ins.get("operands", ""))
|
|
for operand, table in _table_operands(operands):
|
|
hints.append(
|
|
{
|
|
"address": int(ins["address"]),
|
|
"address_hex": h16(int(ins["address"])),
|
|
"instruction": _instruction_text(ins),
|
|
"operand": operand,
|
|
"table": table,
|
|
}
|
|
)
|
|
return hints
|
|
|
|
|
|
def _table_operand(operand: str) -> bool:
|
|
return bool(_table_operands(operand))
|
|
|
|
|
|
def _table_operands(operands: str) -> list[tuple[str, str]]:
|
|
matches: list[tuple[str, str]] = []
|
|
for match in re.finditer(r"@\(-H'([0-9A-Fa-f]+),\s*(R[0-7])\)", operands):
|
|
offset = int(match.group(1), 16) & 0xFFFF
|
|
table = _LOGICAL_TABLE_OFFSETS.get(offset)
|
|
if table:
|
|
matches.append((match.group(0), table))
|
|
return matches
|
|
|
|
|
|
def _source_record(
|
|
classification: str,
|
|
ins: Mapping[str, Any],
|
|
*,
|
|
value: int | None = None,
|
|
reason: str,
|
|
) -> JsonObject:
|
|
return {
|
|
"classification": classification,
|
|
"value": value,
|
|
"value_hex": f"0x{value:04X}" if value is not None else None,
|
|
"reason": reason,
|
|
"evidence": _evidence_record(ins),
|
|
}
|
|
|
|
|
|
def _evidence_record(ins: Mapping[str, Any]) -> JsonObject:
|
|
address = ins.get("address")
|
|
return {
|
|
"address": address,
|
|
"address_hex": h16(int(address)) if isinstance(address, int) else None,
|
|
"instruction": _instruction_text(ins),
|
|
"mnemonic": ins.get("mnemonic"),
|
|
"operands": ins.get("operands"),
|
|
}
|
|
|
|
|
|
def _is_direct_call_to(ins: Mapping[str, Any], target: int) -> bool:
|
|
mnemonic = _mnemonic_root(str(ins.get("mnemonic", "")))
|
|
return mnemonic in {"BSR", "JSR"} and target in _targets(ins)
|
|
|
|
|
|
def _instruction_sequence(value: object) -> list[JsonObject]:
|
|
if isinstance(value, Mapping):
|
|
values: Iterable[Any] = value.values()
|
|
elif isinstance(value, list):
|
|
values = value
|
|
else:
|
|
values = []
|
|
return sorted(
|
|
[item for item in values if isinstance(item, dict) and isinstance(item.get("address"), int)],
|
|
key=lambda item: int(item["address"]),
|
|
)
|
|
|
|
|
|
def _function_ranges(payload: Mapping[str, Any]) -> list[JsonObject]:
|
|
call_graph = payload.get("call_graph")
|
|
nodes = call_graph.get("nodes") if isinstance(call_graph, Mapping) else None
|
|
if not isinstance(nodes, list):
|
|
return []
|
|
ranges: list[JsonObject] = []
|
|
for node in nodes:
|
|
if not isinstance(node, Mapping):
|
|
continue
|
|
start = node.get("start")
|
|
end = node.get("end")
|
|
if isinstance(start, int) and isinstance(end, int):
|
|
ranges.append({"start": start, "end": end, "label": str(node.get("label") or label_for(start))})
|
|
return sorted(ranges, key=lambda item: int(item["start"]))
|
|
|
|
|
|
def _function_for_address(functions: list[JsonObject], address: int) -> JsonObject | None:
|
|
for function in functions:
|
|
if int(function["start"]) <= address <= int(function["end"]):
|
|
return function
|
|
return None
|
|
|
|
|
|
def _dataflow_block(ins: Mapping[str, Any]) -> int | None:
|
|
dataflow = ins.get("dataflow")
|
|
if isinstance(dataflow, Mapping) and isinstance(dataflow.get("block"), int):
|
|
return int(dataflow["block"])
|
|
return None
|
|
|
|
|
|
def _targets(ins: Mapping[str, Any]) -> list[int]:
|
|
targets = ins.get("targets", [])
|
|
return [int(target) for target in targets if isinstance(target, int)] if isinstance(targets, list) else []
|
|
|
|
|
|
def _instruction_text(ins: Mapping[str, Any]) -> str:
|
|
text = ins.get("text")
|
|
if isinstance(text, str) and text:
|
|
return text
|
|
operands = str(ins.get("operands", ""))
|
|
return f"{ins.get('mnemonic', '')} {operands}".strip()
|
|
|
|
|
|
def _source_destination_operands(operands: str) -> tuple[str, str]:
|
|
depth = 0
|
|
split_at: int | None = None
|
|
for index, char in enumerate(operands):
|
|
if char in "({":
|
|
depth += 1
|
|
elif char in ")}" and depth:
|
|
depth -= 1
|
|
elif char == "," and depth == 0:
|
|
split_at = index
|
|
if split_at is None:
|
|
operand = operands.strip()
|
|
return "", operand
|
|
return operands[:split_at].strip(), operands[split_at + 1 :].strip()
|
|
|
|
|
|
def _parse_immediate(operand: str) -> int | None:
|
|
text = operand.strip()
|
|
if text.startswith("#"):
|
|
text = text[1:].strip()
|
|
try:
|
|
if text.upper().startswith("H'"):
|
|
return int(text[2:], 16) & 0xFFFF
|
|
if text.upper().startswith("0X"):
|
|
return int(text, 16) & 0xFFFF
|
|
if text.startswith("$"):
|
|
return int(text[1:], 16) & 0xFFFF
|
|
return int(text, 10) & 0xFFFF
|
|
except ValueError:
|
|
return None
|
|
|
|
|
|
def _register_operand(operand: str) -> str | None:
|
|
text = operand.strip().upper()
|
|
return text if re.fullmatch(r"R[0-7]", text) else None
|
|
|
|
|
|
def _mutates_register(ins: Mapping[str, Any], register: str) -> bool:
|
|
mnemonic = _mnemonic_root(str(ins.get("mnemonic", "")))
|
|
source, destination = _source_destination_operands(str(ins.get("operands", "")))
|
|
if destination.upper() != register.upper():
|
|
return False
|
|
return not mnemonic.startswith("MOV")
|
|
|
|
|
|
def _mnemonic_root(mnemonic: str) -> str:
|
|
return mnemonic.rsplit(".", 1)[0].upper()
|
|
|
|
|
|
def _format_bit(value: Any) -> str:
|
|
if value is True:
|
|
return "set"
|
|
if value is False:
|
|
return "clear"
|
|
return "unknown"
|
|
|
|
|
|
__all__ = [
|
|
"analyze_report_sources",
|
|
"format_text_report",
|
|
"load_report_source_input",
|
|
"main",
|
|
"write_report_sources",
|
|
]
|
|
|
|
|
|
if __name__ == "__main__":
|
|
raise SystemExit(main())
|