Data flow improvements in pseudo code generator
This commit is contained in:
148
h8536/indirect.py
Normal file
148
h8536/indirect.py
Normal file
@@ -0,0 +1,148 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
from collections.abc import Mapping
|
||||
|
||||
from .formatting import h16, parse_int
|
||||
from .memory import region_for
|
||||
from .model import Instruction
|
||||
from .rom import Rom
|
||||
|
||||
|
||||
INDEXED_WORD_LOAD_RE = re.compile(r"^@\((?P<base>[^,]+),\s*(?P<index>R[0-7])\),\s*(?P<dest>R[0-7])$")
|
||||
INDIRECT_FLOW_RE = re.compile(r"^@(?P<reg>R[0-7])$")
|
||||
|
||||
|
||||
def analyze_indirect_flow(
|
||||
rom: Rom,
|
||||
instructions: Mapping[int, Instruction],
|
||||
labels: Mapping[int, str] | None = None,
|
||||
*,
|
||||
max_entries: int = 128,
|
||||
) -> dict[str, object]:
|
||||
labels = labels or {}
|
||||
ordered = [instructions[address] for address in sorted(instructions)]
|
||||
by_address = {ins.address: ins for ins in ordered}
|
||||
known_code = set(by_address)
|
||||
sites: list[dict[str, object]] = []
|
||||
|
||||
for index, ins in enumerate(ordered):
|
||||
if ins.kind not in {"call", "jump"} or ins.targets:
|
||||
continue
|
||||
target_reg = _indirect_target_register(ins.operands)
|
||||
if target_reg is None:
|
||||
continue
|
||||
previous = ordered[index - 1] if index else None
|
||||
table = _table_from_previous_load(rom, previous, target_reg, known_code, labels, max_entries)
|
||||
site: dict[str, object] = {
|
||||
"address": ins.address,
|
||||
"instruction": ins.text,
|
||||
"kind": ins.kind,
|
||||
"target_register": target_reg,
|
||||
"confidence": "table_load" if table else "unknown",
|
||||
}
|
||||
if table:
|
||||
site["table"] = table
|
||||
site["summary"] = _site_summary(ins, target_reg, table)
|
||||
else:
|
||||
site["summary"] = f"{ins.text} uses {target_reg}; target not resolved"
|
||||
sites.append(site)
|
||||
|
||||
return {"sites": sites}
|
||||
|
||||
|
||||
def indirect_comment_for_instruction(analysis: Mapping[str, object] | None, address: int) -> str:
|
||||
if not analysis:
|
||||
return ""
|
||||
for site in analysis.get("sites", []):
|
||||
if isinstance(site, Mapping) and int(site.get("address", -1)) == address:
|
||||
return str(site.get("summary", ""))
|
||||
return ""
|
||||
|
||||
|
||||
def indirect_metadata_for_instruction(
|
||||
analysis: Mapping[str, object] | None,
|
||||
address: int,
|
||||
) -> dict[str, object] | None:
|
||||
if not analysis:
|
||||
return None
|
||||
for site in analysis.get("sites", []):
|
||||
if isinstance(site, dict) and int(site.get("address", -1)) == address:
|
||||
return site
|
||||
return None
|
||||
|
||||
|
||||
def _indirect_target_register(operands: str) -> str | None:
|
||||
match = INDIRECT_FLOW_RE.match(operands.strip())
|
||||
return match.group("reg") if match else None
|
||||
|
||||
|
||||
def _table_from_previous_load(
|
||||
rom: Rom,
|
||||
previous: Instruction | None,
|
||||
target_reg: str,
|
||||
known_code: set[int],
|
||||
labels: Mapping[int, str],
|
||||
max_entries: int,
|
||||
) -> dict[str, object] | None:
|
||||
if previous is None or not previous.mnemonic.startswith("MOV:G.W"):
|
||||
return None
|
||||
match = INDEXED_WORD_LOAD_RE.match(previous.operands.strip())
|
||||
if not match or match.group("dest") != target_reg:
|
||||
return None
|
||||
try:
|
||||
base = parse_int(match.group("base"))
|
||||
except ValueError:
|
||||
return None
|
||||
if not rom.contains(base, 2):
|
||||
return None
|
||||
|
||||
entries: list[dict[str, object]] = []
|
||||
for entry_index in range(max_entries):
|
||||
address = base + entry_index * 2
|
||||
if not rom.contains(address, 2):
|
||||
break
|
||||
target = rom.u16(address)
|
||||
if target in (0x0000, 0xFFFF):
|
||||
break
|
||||
region = region_for(target)
|
||||
is_code = target in known_code
|
||||
plausible = is_code or region.kind == "program"
|
||||
if not plausible and entries:
|
||||
break
|
||||
entries.append(
|
||||
{
|
||||
"index": entry_index,
|
||||
"entry_address": address,
|
||||
"target": target,
|
||||
"target_label": labels.get(target),
|
||||
"target_region": region.name,
|
||||
"decoded_code": is_code,
|
||||
},
|
||||
)
|
||||
if not plausible:
|
||||
break
|
||||
|
||||
if not entries:
|
||||
return None
|
||||
decoded = sum(1 for entry in entries if entry["decoded_code"])
|
||||
return {
|
||||
"base": base,
|
||||
"index_register": match.group("index"),
|
||||
"target_register": target_reg,
|
||||
"load_address": previous.address,
|
||||
"load_instruction": previous.text,
|
||||
"entry_size": 2,
|
||||
"entry_count": len(entries),
|
||||
"decoded_target_count": decoded,
|
||||
"entries": entries,
|
||||
}
|
||||
|
||||
|
||||
def _site_summary(ins: Instruction, target_reg: str, table: Mapping[str, object]) -> str:
|
||||
decoded = int(table["decoded_target_count"])
|
||||
total = int(table["entry_count"])
|
||||
return (
|
||||
f"{ins.text} uses {target_reg} loaded from pointer table {h16(int(table['base']))} "
|
||||
f"via {table['index_register']} ({decoded}/{total} decoded targets)"
|
||||
)
|
||||
Reference in New Issue
Block a user