1
0
This commit is contained in:
Aiden
2026-05-25 13:47:13 +10:00
parent 46ccaf3e39
commit 5ad90ade49
12 changed files with 17974 additions and 883 deletions

View File

@@ -59,3 +59,103 @@ def collect_labels(instructions: Iterable[Instruction], vectors: dict[int, tuple
for target in ins.targets:
labels.setdefault(target, label_for(target))
return labels
def collect_function_entries(
instructions: Iterable[Instruction],
vectors: dict[int, tuple[str, int]],
) -> set[int]:
entries = {target for _name, target in vectors.values()}
for ins in instructions:
if ins.kind == "call":
entries.update(ins.targets)
return entries
def assign_functions(instructions: dict[int, Instruction], entries: set[int]) -> dict[int, int]:
owners: dict[int, int] = {}
current: int | None = None
for address in sorted(instructions):
if address in entries:
current = address
if current is not None:
owners[address] = current
if instructions[address].kind in {"return", "rte", "sleep"}:
current = None
return owners
def build_functions(
instructions: dict[int, Instruction],
vectors: dict[int, tuple[str, int]],
labels: dict[int, str],
) -> list[dict[str, object]]:
entries = collect_function_entries(instructions.values(), vectors)
owners = assign_functions(instructions, entries)
vector_sources: dict[int, list[str]] = {}
for _vector_addr, (name, target) in vectors.items():
vector_sources.setdefault(target, []).append(name)
functions: dict[int, dict[str, object]] = {}
for address, owner in owners.items():
ins = instructions[address]
function = functions.setdefault(
owner,
{
"start": owner,
"label": labels.get(owner, label_for(owner)),
"sources": vector_sources.get(owner, []),
"instruction_count": 0,
"end": owner,
"calls": [],
"unresolved_calls": 0,
},
)
function["instruction_count"] = int(function["instruction_count"]) + 1
function["end"] = max(int(function["end"]), ins.address + max(ins.size, 1) - 1)
if ins.kind == "call":
if ins.targets:
calls = function["calls"]
assert isinstance(calls, list)
for target in ins.targets:
if target not in calls:
calls.append(target)
else:
function["unresolved_calls"] = int(function["unresolved_calls"]) + 1
return [functions[start] for start in sorted(functions)]
def build_call_graph(
instructions: dict[int, Instruction],
vectors: dict[int, tuple[str, int]],
labels: dict[int, str],
) -> dict[str, object]:
entries = collect_function_entries(instructions.values(), vectors)
owners = assign_functions(instructions, entries)
nodes = build_functions(instructions, vectors, labels)
edges: list[dict[str, object]] = []
seen: set[tuple[int, int]] = set()
for ins in instructions.values():
if ins.kind != "call" or not ins.targets:
continue
source = owners.get(ins.address)
if source is None:
continue
for target in ins.targets:
key = (source, target)
if key in seen:
continue
seen.add(key)
edges.append(
{
"from": source,
"from_label": labels.get(source, label_for(source)),
"to": target,
"to_label": labels.get(target, label_for(target)),
"call_site": ins.address,
},
)
return {"nodes": nodes, "edges": sorted(edges, key=lambda edge: (edge["from"], edge["to"]))}

View File

@@ -3,10 +3,11 @@ from __future__ import annotations
import argparse
from pathlib import Path
from .analysis import collect_labels, linear_sweep, trace
from .analysis import build_call_graph, collect_labels, linear_sweep, trace
from .data_analysis import analyze_unreached_data
from .decoder import H8536Decoder
from .formatting import parse_int
from .render import format_listing, write_json
from .render import format_callgraph_dot, format_listing, write_json
from .rom import Rom
from .vectors import read_dtc_vectors_max, read_dtc_vectors_min, read_vectors_max, read_vectors_min
@@ -30,6 +31,7 @@ def main() -> int:
parser.add_argument("--entry", type=parse_int, action="append", default=[], help="extra entry point to trace")
parser.add_argument("--br", type=parse_int, default=None, help="optional BR value for @aa:8 short absolute operands")
parser.add_argument("--linear", action="store_true", help="linear-sweep the selected range instead of tracing from vectors")
parser.add_argument("--callgraph-dot", type=Path, default=None, help="optional Graphviz DOT call graph output")
args = parser.parse_args()
data = args.rom.read_bytes()
@@ -58,6 +60,8 @@ def main() -> int:
else:
instructions = trace(decoder, starts, args.start, end)
labels.update(collect_labels(instructions.values(), vectors))
data_candidates = analyze_unreached_data(rom, instructions, args.start, end)
call_graph = build_call_graph(instructions, vectors, labels)
args.out.parent.mkdir(parents=True, exist_ok=True)
args.out.write_text(
@@ -70,15 +74,29 @@ def main() -> int:
args.mode,
traced=not args.linear,
dtc_vectors=dtc_vectors,
data_candidates=data_candidates,
),
encoding="utf-8",
)
if args.json:
args.json.parent.mkdir(parents=True, exist_ok=True)
write_json(args.json, instructions, vectors, labels, dtc_vectors=dtc_vectors)
write_json(
args.json,
instructions,
vectors,
labels,
dtc_vectors=dtc_vectors,
data_candidates=data_candidates,
call_graph=call_graph,
)
if args.callgraph_dot:
args.callgraph_dot.parent.mkdir(parents=True, exist_ok=True)
args.callgraph_dot.write_text(format_callgraph_dot(call_graph), encoding="utf-8")
invalid = sum(1 for ins in instructions.values() if not ins.valid)
print(f"wrote {args.out} ({len(instructions)} items, {invalid} invalid/data bytes)")
if args.json:
print(f"wrote {args.json}")
if args.callgraph_dot:
print(f"wrote {args.callgraph_dot}")
return 0

126
h8536/data_analysis.py Normal file
View File

@@ -0,0 +1,126 @@
from __future__ import annotations
from .memory import region_for
from .model import Instruction
from .rom import Rom
PRINTABLE_ASCII = set(range(0x20, 0x7F))
def _occupied_addresses(instructions: dict[int, Instruction]) -> set[int]:
occupied: set[int] = set()
for ins in instructions.values():
occupied.update(range(ins.address, ins.address + max(ins.size, 1)))
return occupied
def _unoccupied_ranges(start: int, end: int, occupied: set[int]) -> list[tuple[int, int]]:
ranges: list[tuple[int, int]] = []
run_start: int | None = None
for address in range(start, end):
if address in occupied:
if run_start is not None:
ranges.append((run_start, address))
run_start = None
continue
if run_start is None:
run_start = address
if run_start is not None:
ranges.append((run_start, end))
return ranges
def _is_pointer_target(target: int, known_targets: set[int]) -> bool:
if target in (0x0000, 0xFFFF):
return False
if target in known_targets:
return True
region = region_for(target)
return region.kind in {"ram", "registers"}
def find_ascii_strings(
rom: Rom,
ranges: list[tuple[int, int]],
min_length: int = 6,
max_candidates: int = 200,
) -> list[dict[str, object]]:
candidates: list[dict[str, object]] = []
for start, end in ranges:
address = start
while address < end:
if rom.u8(address) not in PRINTABLE_ASCII:
address += 1
continue
text_start = address
raw = bytearray()
while address < end and rom.u8(address) in PRINTABLE_ASCII:
raw.append(rom.u8(address))
address += 1
if len(raw) >= min_length:
text = raw.decode("ascii", errors="replace")
candidates.append(
{
"address": text_start,
"length": len(raw),
"text": text,
"terminated": address < end and rom.u8(address) == 0,
},
)
if len(candidates) >= max_candidates:
return candidates
address += 1
return candidates
def find_pointer_tables(
rom: Rom,
ranges: list[tuple[int, int]],
known_targets: set[int],
min_entries: int = 3,
max_candidates: int = 200,
) -> list[dict[str, object]]:
candidates: list[dict[str, object]] = []
for start, end in ranges:
address = start if start % 2 == 0 else start + 1
while address + 2 <= end:
entries: list[int] = []
cursor = address
while cursor + 2 <= end:
target = rom.u16(cursor)
if not _is_pointer_target(target, known_targets):
break
entries.append(target)
cursor += 2
if len(entries) >= min_entries:
candidates.append(
{
"address": address,
"entry_size": 2,
"count": len(entries),
"targets": entries,
"target_regions": [region_for(target).name for target in entries],
},
)
if len(candidates) >= max_candidates:
return candidates
address = cursor
else:
address += 2
return candidates
def analyze_unreached_data(
rom: Rom,
instructions: dict[int, Instruction],
start: int,
end: int,
) -> dict[str, list[dict[str, object]]]:
occupied = _occupied_addresses(instructions)
known_targets = set(instructions)
ranges = _unoccupied_ranges(max(start, 0x0100), min(end, rom.end), occupied)
return {
"strings": find_ascii_strings(rom, ranges),
"pointer_tables": find_pointer_tables(rom, ranges, known_targets),
}

View File

@@ -77,6 +77,113 @@ def _bitfield_values(address: int, value: int) -> str:
return " ".join(parts)
def _adcsr_semantics(value: int) -> str:
channels_single = ["AN0", "AN1", "AN2", "AN3", "AN4", "AN5", "AN6", "AN7"]
channels_scan = ["AN0", "AN0-AN1", "AN0-AN2", "AN0-AN3", "AN4", "AN4-AN5", "AN4-AN6", "AN4-AN7"]
scan = bool(value & 0x10)
channels = channels_scan[value & 0x07] if scan else channels_single[value & 0x07]
mode = "scan" if scan else "single"
state = "start" if value & 0x20 else "halt"
interrupt = "ADI enabled" if value & 0x40 else "ADI disabled"
conversion = "138-state max" if value & 0x08 else "274-state max"
return f"A/D {state}, {mode} {channels}, {conversion}, {interrupt}"
def _sci_smr_semantics(value: int) -> str:
mode = "sync" if value & 0x80 else "async"
char_len = "7-bit" if value & 0x40 else "8-bit"
parity = "odd parity" if value & 0x10 else "even parity"
parity = parity if value & 0x20 else "no parity"
stop = "2 stop" if value & 0x08 else "1 stop"
clock = ["phi", "phi/4", "phi/16", "phi/64"][value & 0x03]
return f"SCI {mode}, {char_len}, {parity}, {stop}, clock {clock}"
def _sci_scr_semantics(value: int) -> str:
enabled: list[str] = []
if value & 0x20:
enabled.append("TX")
if value & 0x10:
enabled.append("RX")
if value & 0x80:
enabled.append("TXI")
if value & 0x40:
enabled.append("RXI/ERI")
clock = "external clock" if value & 0x02 else "internal clock"
if value & 0x01:
clock += ", SCK output"
return f"SCI enables {','.join(enabled) if enabled else 'none'}, {clock}"
def _wcr_semantics(value: int) -> str:
modes = ["programmable wait", "no wait states", "pin wait", "pin auto-wait"]
counts = ["0 waits", "1 wait", "2 waits", "3 waits"]
return f"{modes[(value >> 2) & 0x03]}, {counts[value & 0x03]}"
def _wdt_semantics(value: int) -> str:
if value <= 0xFF:
data = value
return _wdt_tcsr_data_semantics(data)
password = (value >> 8) & 0xFF
data = value & 0xFF
if password == 0xA5:
return f"TCSR password H'A5, {_wdt_tcsr_data_semantics(data)}"
if password == 0x5A:
return f"TCNT password H'5A, counter write {h8(data)}"
return f"watchdog password {h8(password)}, data {h8(data)}"
def _wdt_tcsr_data_semantics(value: int) -> str:
clock = ["phi/2", "phi/32", "phi/64", "phi/128", "phi/256", "phi/512", "phi/2048", "phi/4096"][value & 0x07]
mode = "watchdog NMI" if value & 0x40 else "interval IRQ0"
state = "enabled" if value & 0x20 else "disabled"
return f"WDT {state}, {mode}, clock {clock}"
def _rstcsr_semantics(value: int) -> str:
if value <= 0xFF:
data = value
password = None
else:
password = (value >> 8) & 0xFF
data = value & 0xFF
if password == 0xA5:
return f"RSTCSR password H'A5, clear WRST with data {h8(data)}"
if password == 0x5A:
rstoe = "RES output enabled" if data & 0x40 else "RES output disabled"
return f"RSTCSR password H'5A, {rstoe}"
return "RSTCSR status/control"
def _semantic_values(address: int, value: int) -> str:
if address == 0xFEE8:
return _adcsr_semantics(value)
if address in (0xFED8, 0xFEF0):
return _sci_smr_semantics(value)
if address in (0xFEDA, 0xFEF2):
return _sci_scr_semantics(value)
if address == 0xFEFC:
brle = "bus release pins enabled" if value & 0x08 else "P12/P13 are I/O"
irq0 = "IRQ0 enabled" if value & 0x20 else "IRQ0 disabled"
irq1 = "IRQ1 enabled" if value & 0x40 else "IRQ1 disabled"
return f"{brle}, {irq0}, {irq1}"
if address == 0xFEFD:
enabled = [name for bit, name in ((0, "SCI2 pins"), (1, "P9 PWM"), (2, "P6 PWM"), (3, "IRQ2"), (4, "IRQ3"), (5, "IRQ4"), (6, "IRQ5")) if value & (1 << bit)]
return "enabled " + ", ".join(enabled) if enabled else "alternate pin functions disabled"
if address == 0xFEEC:
return _wdt_semantics(value)
if address == 0xFF10:
return _wcr_semantics(value)
if address == 0xFF11:
return "on-chip RAM enabled" if value & 0x80 else "on-chip RAM disabled"
if address == 0xFF12:
return f"mode select bits MDS={value & 0x07}"
if address == 0xFF14:
return _rstcsr_semantics(value)
return ""
def write_comment(ea: EA, value: int | None) -> str:
if ea.address is None or ea.address not in IO_REGISTERS:
return ""
@@ -85,7 +192,9 @@ def write_comment(ea: EA, value: int | None) -> str:
return name
text = f"{name} = {h16(value) if value > 0xFF else h8(value)}"
fields = _bitfield_values(ea.address, value)
return f"{text} ({fields})" if fields else text
semantic = _semantic_values(ea.address, value)
details = "; ".join(part for part in (fields, semantic) if part)
return f"{text} ({details})" if details else text
def bit_comment(mnemonic: str, ea: EA, bit: int) -> str:

View File

@@ -23,7 +23,7 @@ class MemoryRegion:
MEMORY_REGIONS: tuple[MemoryRegion, ...] = (
MemoryRegion("exception_vectors", 0x0000, 0x009F, "vectors", "section 2 address space"),
MemoryRegion("dtc_vectors", 0x00A0, 0x00FF, "dtc_vectors", "section 2 address space"),
MemoryRegion("on_chip_rom", 0x0100, 0xF67F, "rom", "section 17 ROM"),
MemoryRegion("program_or_external", 0x0100, 0xF67F, "program", "section 2/17 mode-dependent ROM or external space"),
MemoryRegion("on_chip_ram", 0xF680, 0xFE7F, "ram", "section 16 RAM"),
MemoryRegion("register_field", 0xFE80, 0xFFFF, "registers", "appendix B register map"),
)

View File

@@ -4,11 +4,22 @@ import json
from pathlib import Path
from .formatting import h16, label_for
from .memory import MEMORY_REGIONS, region_for
from .model import Instruction
from .rom import Rom
from .tables import IO_REGISTERS
from .vectors import DtcVectorEntry
def _reference_comment(ins: Instruction) -> str:
parts: list[str] = []
for address in ins.references:
region = region_for(address)
name = IO_REGISTERS.get(address, h16(address))
parts.append(f"{name} in {region.name}")
return "refs " + ", ".join(parts) if parts else ""
def format_listing(
rom_path: Path,
rom: Rom,
@@ -18,6 +29,7 @@ def format_listing(
mode: str,
traced: bool,
dtc_vectors: dict[int, DtcVectorEntry] | None = None,
data_candidates: dict[str, list[dict[str, object]]] | None = None,
) -> str:
lines: list[str] = []
lines.append("; H8/536 ROM disassembly")
@@ -30,6 +42,11 @@ def format_listing(
lines.append("; - H8/536 uses the H8/500 CPU instruction set.")
lines.append("; - In minimum mode the reset vector at H'0000-H'0001 is a 16-bit PC.")
lines.append("; - The register field is H'FE80-H'FFFF; names below come from appendix B.")
lines.append("; - @aa:8 short absolute operands use BR as the upper address byte.")
lines.append("")
lines.append("; Memory Map")
for region in MEMORY_REGIONS:
lines.append(f"; {h16(region.start)}-{h16(region.end)} {region.name:<18} {region.kind}")
lines.append("")
lines.append("; Vectors")
for vector_addr, (name, target) in sorted(vectors.items()):
@@ -43,6 +60,23 @@ def format_listing(
lines.append(f"; {h16(vector_addr)} {entry['source']:<24} -> {h16(target)}")
lines.append("")
if data_candidates:
strings = data_candidates.get("strings", [])
pointer_tables = data_candidates.get("pointer_tables", [])
if strings or pointer_tables:
lines.append("; Unreached Data Candidates")
for item in strings[:40]:
lines.append(
f"; string {h16(int(item['address'])):<8} len={item['length']:<3} {item['text']!r}",
)
for item in pointer_tables[:40]:
targets = ", ".join(h16(int(target)) for target in item["targets"][:8])
suffix = " ..." if int(item["count"]) > 8 else ""
lines.append(
f"; ptrtbl {h16(int(item['address'])):<8} count={item['count']:<3} -> {targets}{suffix}",
)
lines.append("")
for address in sorted(instructions):
ins = instructions[address]
if address in labels:
@@ -50,7 +84,8 @@ def format_listing(
lines.append(f"{labels[address]}:")
raw = " ".join(f"{byte:02X}" for byte in ins.raw)
padded_raw = raw.ljust(14)
comment = f" ; {ins.comment}" if ins.comment else ""
comment_parts = [part for part in (ins.comment, _reference_comment(ins) if not ins.comment else "") if part]
comment = f" ; {'; '.join(comment_parts)}" if comment_parts else ""
lines.append(f"{address:04X}: {padded_raw} {ins.text}{comment}")
lines.append("")
return "\n".join(lines)
@@ -62,6 +97,8 @@ def write_json(
vectors: dict[int, tuple[str, int]],
labels: dict[int, str],
dtc_vectors: dict[int, DtcVectorEntry] | None = None,
data_candidates: dict[str, list[dict[str, object]]] | None = None,
call_graph: dict[str, object] | None = None,
) -> None:
payload = {
"vectors": [
@@ -69,15 +106,37 @@ def write_json(
for addr, (name, target) in sorted(vectors.items())
],
"dtc_vectors": list((dtc_vectors or {}).values()),
"memory_regions": [
{
"name": region.name,
"start": region.start,
"end": region.end,
"kind": region.kind,
"manual": region.manual,
}
for region in MEMORY_REGIONS
],
"data_candidates": data_candidates or {"strings": [], "pointer_tables": []},
"call_graph": call_graph or {"nodes": [], "edges": []},
"instructions": [
{
"address": ins.address,
"address_region": region_for(ins.address).name,
"bytes": ins.raw.hex().upper(),
"text": ins.text,
"mnemonic": ins.mnemonic,
"operands": ins.operands,
"kind": ins.kind,
"targets": ins.targets,
"references": [
{
"address": address,
"name": IO_REGISTERS.get(address),
"region": region_for(address).name,
"kind": region_for(address).kind,
}
for address in ins.references
],
"comment": ins.comment,
"valid": ins.valid,
}
@@ -85,3 +144,16 @@ def write_json(
],
}
path.write_text(json.dumps(payload, indent=2), encoding="utf-8")
def format_callgraph_dot(call_graph: dict[str, object]) -> str:
lines = ["digraph callgraph {"]
lines.append(' graph [rankdir="LR"];')
for node in call_graph.get("nodes", []):
label = node["label"]
lines.append(f' "{label}" [label="{label}\\n{h16(int(node["start"]))}"];')
for edge in call_graph.get("edges", []):
lines.append(f' "{edge["from_label"]}" -> "{edge["to_label"]}" [label="{h16(int(edge["call_site"]))}"];')
lines.append("}")
lines.append("")
return "\n".join(lines)

View File

@@ -229,6 +229,14 @@ IO_BITFIELDS: dict[int, dict[int, str]] = {
1: "CH1",
0: "CH0",
},
0xFEEC: {
7: "OVF",
6: "WT/IT",
5: "TME",
2: "CKS2",
1: "CKS1",
0: "CKS0",
},
0xFEF0: _SCI_SMR_BITS,
0xFEF2: _SCI_SCR_BITS,
0xFEF4: _SCI_SSR_BITS,
@@ -261,6 +269,14 @@ IO_BITFIELDS: dict[int, dict[int, str]] = {
1: "MDS1",
0: "MDS0",
},
0xFF14: {
7: "WRST",
6: "RSTOE",
},
0xFF15: {
7: "WRST",
6: "RSTOE",
},
}