further digging and basic emulator
This commit is contained in:
548
h8536/report_source_trace.py
Normal file
548
h8536/report_source_trace.py
Normal file
@@ -0,0 +1,548 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import re
|
||||
from collections.abc import Iterable, Mapping
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
from .formatting import h16, label_for
|
||||
|
||||
|
||||
JsonObject = dict[str, Any]
|
||||
|
||||
DEFAULT_INPUT = Path("build/rom_decompiled.json")
|
||||
DEFAULT_TEXT_OUTPUT = Path("build/rom_report_sources.txt")
|
||||
DEFAULT_JSON_OUTPUT = Path("build/rom_report_sources.json")
|
||||
QUEUE_FUNCTION = 0x3E54
|
||||
REPORT_INDEX_OF_INTEREST = 0x0007
|
||||
|
||||
_LOGICAL_TABLE_OFFSETS = {
|
||||
0x2000: "primary_value_table_candidate",
|
||||
0x1C00: "secondary_value_table_candidate",
|
||||
0x1800: "current_value_table_candidate",
|
||||
0x1400: "flag_table_candidate",
|
||||
}
|
||||
|
||||
|
||||
def load_report_source_input(path: Path) -> JsonObject:
|
||||
with path.open("r", encoding="utf-8") as handle:
|
||||
payload = json.load(handle)
|
||||
if not isinstance(payload, dict) or "instructions" not in payload:
|
||||
raise ValueError(f"{path} does not look like h8536_decompiler JSON output")
|
||||
return payload
|
||||
|
||||
|
||||
def analyze_report_sources(
|
||||
payload: Mapping[str, Any],
|
||||
*,
|
||||
target: int = QUEUE_FUNCTION,
|
||||
report_index: int = REPORT_INDEX_OF_INTEREST,
|
||||
window: int = 16,
|
||||
) -> JsonObject:
|
||||
instructions = _instruction_sequence(payload.get("instructions"))
|
||||
functions = _function_ranges(payload)
|
||||
calls = [
|
||||
_analyze_call(instructions, index, functions, target, report_index, window)
|
||||
for index, ins in enumerate(instructions)
|
||||
if _is_direct_call_to(ins, target)
|
||||
]
|
||||
direct_hits = [
|
||||
call
|
||||
for call in calls
|
||||
if call["r2"].get("bit7") is True
|
||||
and call["r3"].get("classification") == "constant"
|
||||
and call["r3"].get("value") == report_index
|
||||
]
|
||||
dynamic_candidates = [
|
||||
call
|
||||
for call in calls
|
||||
if call["r2"].get("bit7") is not False
|
||||
and call["r3"].get("classification") in {"dynamic/table-derived", "unknown"}
|
||||
]
|
||||
|
||||
if direct_hits:
|
||||
conclusion = (
|
||||
f"At least one direct loc_3E54 caller statically loads report index {report_index:#06x} "
|
||||
"with R2.bit7 set before the queue call."
|
||||
)
|
||||
status = "direct_static_hit"
|
||||
else:
|
||||
conclusion = (
|
||||
f"No direct loc_3E54 caller in this JSON statically loads report index {report_index:#06x}. "
|
||||
f"{report_index:#06x} remains an observed runtime/capture value unless another indirect "
|
||||
"or table-dispatch path is proven."
|
||||
)
|
||||
status = "not_statically_proven"
|
||||
|
||||
return {
|
||||
"kind": "report_source_trace",
|
||||
"queue_function": target,
|
||||
"queue_function_hex": h16(target),
|
||||
"report_index_of_interest": report_index,
|
||||
"report_index_of_interest_hex": f"0x{report_index:04X}",
|
||||
"summary": {
|
||||
"direct_call_count": len(calls),
|
||||
"direct_static_hit_count": len(direct_hits),
|
||||
"dynamic_or_unknown_candidate_count": len(dynamic_candidates),
|
||||
"status": status,
|
||||
"conclusion": conclusion,
|
||||
},
|
||||
"calls": calls,
|
||||
"caveats": [
|
||||
"This is a bounded local static trace, not an emulator run.",
|
||||
"R3 values classified as dynamic/table-derived may still become 0x0007 at runtime.",
|
||||
"Indirect dispatch, table handlers, interrupt interleavings, or callers absent from the JSON may still enqueue 0x0007.",
|
||||
"The generic queue-to-TX path only emits queued entries; this tracer looks for direct report-index sources at loc_3E54 callers.",
|
||||
],
|
||||
}
|
||||
|
||||
|
||||
def format_text_report(analysis: Mapping[str, Any]) -> str:
|
||||
summary = analysis.get("summary", {})
|
||||
lines = [
|
||||
"H8/536 loc_3E54 Report Source Trace",
|
||||
"",
|
||||
f"Queue function: {analysis.get('queue_function_hex', h16(QUEUE_FUNCTION))}",
|
||||
f"Report index of interest: {analysis.get('report_index_of_interest_hex', '0x0007')}",
|
||||
f"Direct callers: {summary.get('direct_call_count', 0)}",
|
||||
f"Direct static 0x0007 hits: {summary.get('direct_static_hit_count', 0)}",
|
||||
f"Dynamic/unknown candidates: {summary.get('dynamic_or_unknown_candidate_count', 0)}",
|
||||
"",
|
||||
f"Conclusion: {summary.get('conclusion', '')}",
|
||||
"",
|
||||
"Call sites:",
|
||||
]
|
||||
for call in analysis.get("calls", []):
|
||||
if not isinstance(call, Mapping):
|
||||
continue
|
||||
r2 = call.get("r2", {}) if isinstance(call.get("r2"), Mapping) else {}
|
||||
r3 = call.get("r3", {}) if isinstance(call.get("r3"), Mapping) else {}
|
||||
r3_value = r3.get("value_hex") or "<dynamic>"
|
||||
lines.append(
|
||||
f"- {call.get('address_hex')} in {call.get('function_label')}: "
|
||||
f"R2.bit7={_format_bit(r2.get('bit7'))}, "
|
||||
f"R3={r3_value} ({r3.get('classification', 'unknown')}); "
|
||||
f"direct_0x0007={call.get('can_directly_enqueue_report_index')}"
|
||||
)
|
||||
for source_name, source in (("R2", r2), ("R3", r3)):
|
||||
evidence = source.get("evidence") if isinstance(source, Mapping) else None
|
||||
if not isinstance(evidence, Mapping):
|
||||
continue
|
||||
text = evidence.get("instruction")
|
||||
if text:
|
||||
lines.append(f" {source_name} evidence: {evidence.get('address_hex')} {text}")
|
||||
table_hints = call.get("table_hints")
|
||||
if isinstance(table_hints, list) and table_hints:
|
||||
hints = ", ".join(
|
||||
f"{item.get('address_hex')} {item.get('table')} via {item.get('operand')}"
|
||||
for item in table_hints[:4]
|
||||
if isinstance(item, Mapping)
|
||||
)
|
||||
lines.append(f" table/context hints: {hints}")
|
||||
|
||||
lines.extend(["", "Caveats:"])
|
||||
for caveat in analysis.get("caveats", []):
|
||||
lines.append(f"- {caveat}")
|
||||
return "\n".join(lines).rstrip() + "\n"
|
||||
|
||||
|
||||
def write_report_sources(input_path: Path, output_path: Path, *, as_json: bool = False) -> JsonObject:
|
||||
analysis = analyze_report_sources(load_report_source_input(input_path))
|
||||
output_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
if as_json:
|
||||
output_path.write_text(json.dumps(analysis, indent=2, sort_keys=True) + "\n", encoding="utf-8")
|
||||
else:
|
||||
output_path.write_text(format_text_report(analysis), encoding="utf-8")
|
||||
return analysis
|
||||
|
||||
|
||||
def main(argv: list[str] | None = None, stdout: Any | None = None) -> int:
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Trace direct loc_3E54 report queue callers and their R2/R3 sources.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"input",
|
||||
nargs="?",
|
||||
type=Path,
|
||||
default=DEFAULT_INPUT,
|
||||
help="structured JSON emitted by h8536_decompiler.py",
|
||||
)
|
||||
parser.add_argument("--json", action="store_true", help="emit structured JSON instead of readable text")
|
||||
parser.add_argument("--out", type=Path, default=None, help="write report to this path")
|
||||
parser.add_argument("--window", type=int, default=16, help="bounded backward instruction window per call")
|
||||
args = parser.parse_args(argv)
|
||||
|
||||
stream = stdout
|
||||
if stream is None:
|
||||
import sys
|
||||
|
||||
stream = sys.stdout
|
||||
|
||||
analysis = analyze_report_sources(load_report_source_input(args.input), window=args.window)
|
||||
rendered = json.dumps(analysis, indent=2, sort_keys=True) + "\n" if args.json else format_text_report(analysis)
|
||||
if args.out:
|
||||
args.out.parent.mkdir(parents=True, exist_ok=True)
|
||||
args.out.write_text(rendered, encoding="utf-8")
|
||||
print(f"wrote {args.out}", file=stream)
|
||||
else:
|
||||
print(rendered, end="", file=stream)
|
||||
return 0
|
||||
|
||||
|
||||
def _analyze_call(
|
||||
instructions: list[JsonObject],
|
||||
call_index: int,
|
||||
functions: list[JsonObject],
|
||||
target: int,
|
||||
report_index: int,
|
||||
window: int,
|
||||
) -> JsonObject:
|
||||
call = instructions[call_index]
|
||||
address = int(call["address"])
|
||||
function = _function_for_address(functions, address)
|
||||
local_window = _local_window(instructions, call_index, function, window)
|
||||
r2 = _resolve_register(local_window, "R2", call, width=8)
|
||||
r3 = _resolve_register(local_window, "R3", call, width=16)
|
||||
table_hints = _table_hints(local_window)
|
||||
can_enqueue = (
|
||||
r2.get("bit7") is True
|
||||
and r3.get("classification") == "constant"
|
||||
and r3.get("value") == report_index
|
||||
)
|
||||
return {
|
||||
"address": address,
|
||||
"address_hex": h16(address),
|
||||
"instruction": _instruction_text(call),
|
||||
"target": target,
|
||||
"target_hex": h16(target),
|
||||
"function_start": function.get("start") if function else None,
|
||||
"function_start_hex": h16(int(function["start"])) if function else None,
|
||||
"function_label": function.get("label") if function else label_for(address),
|
||||
"dataflow_block": _dataflow_block(call),
|
||||
"window_instruction_count": len(local_window),
|
||||
"window_start": int(local_window[0]["address"]) if local_window else address,
|
||||
"window_start_hex": h16(int(local_window[0]["address"])) if local_window else h16(address),
|
||||
"r2": r2,
|
||||
"r3": r3,
|
||||
"table_hints": table_hints,
|
||||
"can_directly_enqueue_report_index": can_enqueue,
|
||||
"assessment": _call_assessment(r2, r3, report_index),
|
||||
}
|
||||
|
||||
|
||||
def _resolve_register(window: list[Mapping[str, Any]], register: str, call: Mapping[str, Any], *, width: int) -> JsonObject:
|
||||
evidence = _trace_register(window, register, seen=set(), width=width)
|
||||
if evidence is None:
|
||||
evidence = _dataflow_before(call, register)
|
||||
if evidence is None:
|
||||
return {
|
||||
"register": register,
|
||||
"classification": "unknown",
|
||||
"value": None,
|
||||
"value_hex": None,
|
||||
"bit7": None if register != "R2" else "unknown",
|
||||
"evidence": None,
|
||||
}
|
||||
if evidence.get("classification") == "constant" and isinstance(evidence.get("value"), int):
|
||||
value = int(evidence["value"]) & ((1 << width) - 1)
|
||||
evidence["value"] = value
|
||||
evidence["value_hex"] = f"0x{value:04X}" if width > 8 else f"0x{value:02X}"
|
||||
if register == "R2":
|
||||
evidence["bit7"] = bool(value & 0x80)
|
||||
elif register == "R2":
|
||||
evidence["bit7"] = None
|
||||
evidence["register"] = register
|
||||
return evidence
|
||||
|
||||
|
||||
def _trace_register(
|
||||
window: list[Mapping[str, Any]],
|
||||
register: str,
|
||||
*,
|
||||
seen: set[str],
|
||||
width: int,
|
||||
) -> JsonObject | None:
|
||||
register = register.upper()
|
||||
if register in seen:
|
||||
return None
|
||||
seen.add(register)
|
||||
for ins in reversed(window):
|
||||
source, destination = _source_destination_operands(str(ins.get("operands", "")))
|
||||
mnemonic = _mnemonic_root(str(ins.get("mnemonic", "")))
|
||||
if destination.upper() != register:
|
||||
if _mutates_register(ins, register):
|
||||
return _source_record("dynamic/table-derived", ins, reason=f"{mnemonic} mutates {register}")
|
||||
continue
|
||||
immediate = _parse_immediate(source)
|
||||
if mnemonic.startswith("MOV") and immediate is not None:
|
||||
return _source_record("constant", ins, value=immediate & ((1 << width) - 1), reason="immediate load")
|
||||
source_register = _register_operand(source)
|
||||
if mnemonic.startswith("MOV") and source_register:
|
||||
nested = _trace_register(window[: window.index(ins)], source_register, seen=seen, width=width)
|
||||
if nested is not None:
|
||||
nested = dict(nested)
|
||||
nested["via"] = _evidence_record(ins)
|
||||
return nested
|
||||
return _source_record("dynamic/table-derived", ins, reason=f"copied from unresolved {source_register}")
|
||||
if "@" in source or "@" in destination:
|
||||
classification = "dynamic/table-derived" if _table_operand(source) or _table_operand(destination) else "dynamic/table-derived"
|
||||
return _source_record(classification, ins, reason="memory/indexed source")
|
||||
return _source_record("unknown", ins, reason=f"unsupported writer {mnemonic}")
|
||||
return None
|
||||
|
||||
|
||||
def _dataflow_before(call: Mapping[str, Any], register: str) -> JsonObject | None:
|
||||
dataflow = call.get("dataflow")
|
||||
if not isinstance(dataflow, Mapping):
|
||||
return None
|
||||
changes = dataflow.get("changes")
|
||||
if not isinstance(changes, list):
|
||||
return None
|
||||
for change in changes:
|
||||
if not isinstance(change, Mapping) or change.get("kind") != "register" or str(change.get("name", "")).upper() != register:
|
||||
continue
|
||||
before = change.get("before")
|
||||
if isinstance(before, Mapping) and before.get("known") is True and isinstance(before.get("value"), int):
|
||||
return {
|
||||
"classification": "constant",
|
||||
"value": int(before["value"]),
|
||||
"value_hex": before.get("hex"),
|
||||
"reason": "decompiler dataflow before call",
|
||||
"evidence": {
|
||||
"address": call.get("address"),
|
||||
"address_hex": h16(int(call["address"])) if isinstance(call.get("address"), int) else None,
|
||||
"instruction": before.get("source"),
|
||||
},
|
||||
}
|
||||
if isinstance(before, Mapping) and before.get("known") is False:
|
||||
return {
|
||||
"classification": "unknown",
|
||||
"value": None,
|
||||
"value_hex": None,
|
||||
"reason": f"decompiler dataflow: {before.get('reason', 'unknown')}",
|
||||
"evidence": None,
|
||||
}
|
||||
return None
|
||||
|
||||
|
||||
def _call_assessment(r2: Mapping[str, Any], r3: Mapping[str, Any], report_index: int) -> str:
|
||||
if r2.get("bit7") is False:
|
||||
return "R2.bit7 appears clear, so loc_3E54 would not enqueue on this local evidence."
|
||||
if r3.get("classification") == "constant":
|
||||
if r3.get("value") == report_index:
|
||||
return f"Direct static enqueue source for {report_index:#06x}."
|
||||
return f"Direct static enqueue source for {int(r3.get('value', 0)):#06x}, not {report_index:#06x}."
|
||||
return f"No static {report_index:#06x} constant here; R3 is {r3.get('classification', 'unknown')}."
|
||||
|
||||
|
||||
def _local_window(
|
||||
instructions: list[JsonObject],
|
||||
call_index: int,
|
||||
function: Mapping[str, Any] | None,
|
||||
window: int,
|
||||
) -> list[JsonObject]:
|
||||
call = instructions[call_index]
|
||||
call_block = _dataflow_block(call)
|
||||
selected: list[JsonObject] = []
|
||||
for prior in reversed(instructions[:call_index]):
|
||||
if len(selected) >= window:
|
||||
break
|
||||
address = int(prior["address"])
|
||||
if function and not (int(function["start"]) <= address <= int(function["end"])):
|
||||
break
|
||||
prior_block = _dataflow_block(prior)
|
||||
if call_block is not None and prior_block is not None and prior_block != call_block:
|
||||
continue
|
||||
selected.append(prior)
|
||||
return list(reversed(selected))
|
||||
|
||||
|
||||
def _table_hints(window: Iterable[Mapping[str, Any]]) -> list[JsonObject]:
|
||||
hints: list[JsonObject] = []
|
||||
for ins in window:
|
||||
operands = str(ins.get("operands", ""))
|
||||
for operand, table in _table_operands(operands):
|
||||
hints.append(
|
||||
{
|
||||
"address": int(ins["address"]),
|
||||
"address_hex": h16(int(ins["address"])),
|
||||
"instruction": _instruction_text(ins),
|
||||
"operand": operand,
|
||||
"table": table,
|
||||
}
|
||||
)
|
||||
return hints
|
||||
|
||||
|
||||
def _table_operand(operand: str) -> bool:
|
||||
return bool(_table_operands(operand))
|
||||
|
||||
|
||||
def _table_operands(operands: str) -> list[tuple[str, str]]:
|
||||
matches: list[tuple[str, str]] = []
|
||||
for match in re.finditer(r"@\(-H'([0-9A-Fa-f]+),\s*(R[0-7])\)", operands):
|
||||
offset = int(match.group(1), 16) & 0xFFFF
|
||||
table = _LOGICAL_TABLE_OFFSETS.get(offset)
|
||||
if table:
|
||||
matches.append((match.group(0), table))
|
||||
return matches
|
||||
|
||||
|
||||
def _source_record(
|
||||
classification: str,
|
||||
ins: Mapping[str, Any],
|
||||
*,
|
||||
value: int | None = None,
|
||||
reason: str,
|
||||
) -> JsonObject:
|
||||
return {
|
||||
"classification": classification,
|
||||
"value": value,
|
||||
"value_hex": f"0x{value:04X}" if value is not None else None,
|
||||
"reason": reason,
|
||||
"evidence": _evidence_record(ins),
|
||||
}
|
||||
|
||||
|
||||
def _evidence_record(ins: Mapping[str, Any]) -> JsonObject:
|
||||
address = ins.get("address")
|
||||
return {
|
||||
"address": address,
|
||||
"address_hex": h16(int(address)) if isinstance(address, int) else None,
|
||||
"instruction": _instruction_text(ins),
|
||||
"mnemonic": ins.get("mnemonic"),
|
||||
"operands": ins.get("operands"),
|
||||
}
|
||||
|
||||
|
||||
def _is_direct_call_to(ins: Mapping[str, Any], target: int) -> bool:
|
||||
mnemonic = _mnemonic_root(str(ins.get("mnemonic", "")))
|
||||
return mnemonic in {"BSR", "JSR"} and target in _targets(ins)
|
||||
|
||||
|
||||
def _instruction_sequence(value: object) -> list[JsonObject]:
|
||||
if isinstance(value, Mapping):
|
||||
values: Iterable[Any] = value.values()
|
||||
elif isinstance(value, list):
|
||||
values = value
|
||||
else:
|
||||
values = []
|
||||
return sorted(
|
||||
[item for item in values if isinstance(item, dict) and isinstance(item.get("address"), int)],
|
||||
key=lambda item: int(item["address"]),
|
||||
)
|
||||
|
||||
|
||||
def _function_ranges(payload: Mapping[str, Any]) -> list[JsonObject]:
|
||||
call_graph = payload.get("call_graph")
|
||||
nodes = call_graph.get("nodes") if isinstance(call_graph, Mapping) else None
|
||||
if not isinstance(nodes, list):
|
||||
return []
|
||||
ranges: list[JsonObject] = []
|
||||
for node in nodes:
|
||||
if not isinstance(node, Mapping):
|
||||
continue
|
||||
start = node.get("start")
|
||||
end = node.get("end")
|
||||
if isinstance(start, int) and isinstance(end, int):
|
||||
ranges.append({"start": start, "end": end, "label": str(node.get("label") or label_for(start))})
|
||||
return sorted(ranges, key=lambda item: int(item["start"]))
|
||||
|
||||
|
||||
def _function_for_address(functions: list[JsonObject], address: int) -> JsonObject | None:
|
||||
for function in functions:
|
||||
if int(function["start"]) <= address <= int(function["end"]):
|
||||
return function
|
||||
return None
|
||||
|
||||
|
||||
def _dataflow_block(ins: Mapping[str, Any]) -> int | None:
|
||||
dataflow = ins.get("dataflow")
|
||||
if isinstance(dataflow, Mapping) and isinstance(dataflow.get("block"), int):
|
||||
return int(dataflow["block"])
|
||||
return None
|
||||
|
||||
|
||||
def _targets(ins: Mapping[str, Any]) -> list[int]:
|
||||
targets = ins.get("targets", [])
|
||||
return [int(target) for target in targets if isinstance(target, int)] if isinstance(targets, list) else []
|
||||
|
||||
|
||||
def _instruction_text(ins: Mapping[str, Any]) -> str:
|
||||
text = ins.get("text")
|
||||
if isinstance(text, str) and text:
|
||||
return text
|
||||
operands = str(ins.get("operands", ""))
|
||||
return f"{ins.get('mnemonic', '')} {operands}".strip()
|
||||
|
||||
|
||||
def _source_destination_operands(operands: str) -> tuple[str, str]:
|
||||
depth = 0
|
||||
split_at: int | None = None
|
||||
for index, char in enumerate(operands):
|
||||
if char in "({":
|
||||
depth += 1
|
||||
elif char in ")}" and depth:
|
||||
depth -= 1
|
||||
elif char == "," and depth == 0:
|
||||
split_at = index
|
||||
if split_at is None:
|
||||
operand = operands.strip()
|
||||
return "", operand
|
||||
return operands[:split_at].strip(), operands[split_at + 1 :].strip()
|
||||
|
||||
|
||||
def _parse_immediate(operand: str) -> int | None:
|
||||
text = operand.strip()
|
||||
if text.startswith("#"):
|
||||
text = text[1:].strip()
|
||||
try:
|
||||
if text.upper().startswith("H'"):
|
||||
return int(text[2:], 16) & 0xFFFF
|
||||
if text.upper().startswith("0X"):
|
||||
return int(text, 16) & 0xFFFF
|
||||
if text.startswith("$"):
|
||||
return int(text[1:], 16) & 0xFFFF
|
||||
return int(text, 10) & 0xFFFF
|
||||
except ValueError:
|
||||
return None
|
||||
|
||||
|
||||
def _register_operand(operand: str) -> str | None:
|
||||
text = operand.strip().upper()
|
||||
return text if re.fullmatch(r"R[0-7]", text) else None
|
||||
|
||||
|
||||
def _mutates_register(ins: Mapping[str, Any], register: str) -> bool:
|
||||
mnemonic = _mnemonic_root(str(ins.get("mnemonic", "")))
|
||||
source, destination = _source_destination_operands(str(ins.get("operands", "")))
|
||||
if destination.upper() != register.upper():
|
||||
return False
|
||||
return not mnemonic.startswith("MOV")
|
||||
|
||||
|
||||
def _mnemonic_root(mnemonic: str) -> str:
|
||||
return mnemonic.rsplit(".", 1)[0].upper()
|
||||
|
||||
|
||||
def _format_bit(value: Any) -> str:
|
||||
if value is True:
|
||||
return "set"
|
||||
if value is False:
|
||||
return "clear"
|
||||
return "unknown"
|
||||
|
||||
|
||||
__all__ = [
|
||||
"analyze_report_sources",
|
||||
"format_text_report",
|
||||
"load_report_source_input",
|
||||
"main",
|
||||
"write_report_sources",
|
||||
]
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
raise SystemExit(main())
|
||||
Reference in New Issue
Block a user