1
0
This commit is contained in:
Aiden
2026-05-25 13:47:13 +10:00
parent 46ccaf3e39
commit 5ad90ade49
12 changed files with 17974 additions and 883 deletions

View File

@@ -6,10 +6,10 @@ This repo now includes a standalone Python helper for the H8/536 ROM image:
python h8536_decompiler.py ROM\M27C512@DIP28_1.BIN --out build\rom_decompiled.asm --json build\rom_decompiled.json
```
On this machine the Windows `python.exe` entry is a Microsoft Store launcher stub, so validation was run with WSL:
If you are using the repo-local venv:
```powershell
ubuntu.exe run python3 h8536_decompiler.py --out build/rom_decompiled.asm --json build/rom_decompiled.json
.\.venv\Scripts\python.exe h8536_decompiler.py --out build\rom_decompiled.asm --json build\rom_decompiled.json --callgraph-dot build\callgraph.dot
```
## What It Does
@@ -18,7 +18,13 @@ ubuntu.exe run python3 h8536_decompiler.py --out build/rom_decompiled.asm --json
- Reads the H8/536 minimum-mode vector table from the ROM.
- Recursively traces reachable code from reset, interrupt, and trap vectors.
- Emits labels for branch and call targets.
- Annotates H8/536 on-chip register accesses such as `P1DDR`, `SYSCR1`, `WCR`, and timer/SCI/A-D registers.
- Tracks `LDC.B #xx, BR` along traced control flow so later short absolute `@aa:8` operands can resolve automatically.
- Annotates H8/536 register accesses such as `P1DDR`, `SYSCR1`, `WCR`, watchdog, timer/SCI/A-D, and RAM-control registers.
- Decodes register bitfields and selected hardware semantics for setup writes.
- Emits memory-region metadata for vector, DTC, RAM, register-field, and mode-dependent program/external space.
- Parses the DTC vector table described by the manual.
- Scans unreached ROM ranges for ASCII strings and pointer-table candidates.
- Emits function summaries and a direct-call graph in JSON, with optional Graphviz DOT output.
- Handles the E-clock transfer instructions `MOVFPE` and `MOVTPE`.
The generated listing is written to:
@@ -44,6 +50,7 @@ python h8536_decompiler.py --help
- `--linear`: linear-sweep the selected range instead of tracing from vectors.
- `--start H'1000 --end H'D100`: constrain the decode range.
- `--br H'FE`: resolve short absolute `@aa:8` operands through a known base-register value.
- `--callgraph-dot build\callgraph.dot`: write a Graphviz DOT call graph.
## Code Layout
@@ -51,7 +58,9 @@ python h8536_decompiler.py --help
- `h8536/cli.py`: argument parsing and end-to-end orchestration.
- `h8536/decoder.py`: instruction and effective-address decoding.
- `h8536/tables.py`: manual-derived opcode/vector/register tables.
- `h8536/vectors.py`: exception vector parsing.
- `h8536/analysis.py`: recursive tracing, linear sweep, and labels.
- `h8536/vectors.py`: exception and DTC vector parsing.
- `h8536/analysis.py`: recursive tracing, linear sweep, labels, function grouping, and call graph analysis.
- `h8536/data_analysis.py`: unreached string and pointer-table candidate scans.
- `h8536/memory.py`: manual-derived memory-region tagging.
- `h8536/render.py`: assembly and JSON output.
- `h8536/model.py`, `h8536/rom.py`, `h8536/formatting.py`: shared data structures and helpers.

207
build/callgraph.dot Normal file
View File

@@ -0,0 +1,207 @@
digraph callgraph {
graph [rankdir="LR"];
"vec_reset_1000" [label="vec_reset_1000\nH'1000"];
"loc_10CE" [label="loc_10CE\nH'10CE"];
"loc_15E0" [label="loc_15E0\nH'15E0"];
"loc_1705" [label="loc_1705\nH'1705"];
"loc_174D" [label="loc_174D\nH'174D"];
"loc_1795" [label="loc_1795\nH'1795"];
"loc_17C9" [label="loc_17C9\nH'17C9"];
"loc_17FB" [label="loc_17FB\nH'17FB"];
"loc_182D" [label="loc_182D\nH'182D"];
"loc_1891" [label="loc_1891\nH'1891"];
"loc_18E7" [label="loc_18E7\nH'18E7"];
"loc_194A" [label="loc_194A\nH'194A"];
"loc_1979" [label="loc_1979\nH'1979"];
"loc_19A2" [label="loc_19A2\nH'19A2"];
"loc_19DB" [label="loc_19DB\nH'19DB"];
"loc_1A35" [label="loc_1A35\nH'1A35"];
"loc_1A8D" [label="loc_1A8D\nH'1A8D"];
"loc_1A9C" [label="loc_1A9C\nH'1A9C"];
"loc_1AE4" [label="loc_1AE4\nH'1AE4"];
"loc_1B0B" [label="loc_1B0B\nH'1B0B"];
"loc_1B2D" [label="loc_1B2D\nH'1B2D"];
"loc_1B44" [label="loc_1B44\nH'1B44"];
"loc_1B5B" [label="loc_1B5B\nH'1B5B"];
"loc_1B72" [label="loc_1B72\nH'1B72"];
"loc_1B89" [label="loc_1B89\nH'1B89"];
"loc_1BA0" [label="loc_1BA0\nH'1BA0"];
"loc_1BB6" [label="loc_1BB6\nH'1BB6"];
"loc_1BCC" [label="loc_1BCC\nH'1BCC"];
"loc_1BE2" [label="loc_1BE2\nH'1BE2"];
"loc_1BF8" [label="loc_1BF8\nH'1BF8"];
"loc_1C0E" [label="loc_1C0E\nH'1C0E"];
"loc_2127" [label="loc_2127\nH'2127"];
"loc_2650" [label="loc_2650\nH'2650"];
"loc_2806" [label="loc_2806\nH'2806"];
"loc_3930" [label="loc_3930\nH'3930"];
"loc_398A" [label="loc_398A\nH'398A"];
"loc_3995" [label="loc_3995\nH'3995"];
"loc_3A2E" [label="loc_3A2E\nH'3A2E"];
"vec_irq4_3AC7" [label="vec_irq4_3AC7\nH'3AC7"];
"vec_irq3_3C30" [label="vec_irq3_3C30\nH'3C30"];
"vec_ad_adi_3D99" [label="vec_ad_adi_3D99\nH'3D99"];
"loc_3E54" [label="loc_3E54\nH'3E54"];
"loc_3ECC" [label="loc_3ECC\nH'3ECC"];
"loc_3F28" [label="loc_3F28\nH'3F28"];
"loc_3F40" [label="loc_3F40\nH'3F40"];
"loc_3FD3" [label="loc_3FD3\nH'3FD3"];
"loc_3FEF" [label="loc_3FEF\nH'3FEF"];
"loc_400C" [label="loc_400C\nH'400C"];
"loc_4046" [label="loc_4046\nH'4046"];
"loc_4075" [label="loc_4075\nH'4075"];
"loc_4096" [label="loc_4096\nH'4096"];
"loc_40BB" [label="loc_40BB\nH'40BB"];
"loc_4217" [label="loc_4217\nH'4217"];
"loc_430C" [label="loc_430C\nH'430C"];
"loc_4324" [label="loc_4324\nH'4324"];
"loc_434C" [label="loc_434C\nH'434C"];
"vec_nmi_4393" [label="vec_nmi_4393\nH'4393"];
"loc_4394" [label="loc_4394\nH'4394"];
"loc_442F" [label="loc_442F\nH'442F"];
"loc_4457" [label="loc_4457\nH'4457"];
"loc_44F2" [label="loc_44F2\nH'44F2"];
"loc_451A" [label="loc_451A\nH'451A"];
"loc_45B5" [label="loc_45B5\nH'45B5"];
"loc_48EF" [label="loc_48EF\nH'48EF"];
"loc_48FA" [label="loc_48FA\nH'48FA"];
"loc_6206" [label="loc_6206\nH'6206"];
"loc_622B" [label="loc_622B\nH'622B"];
"loc_BA26" [label="loc_BA26\nH'BA26"];
"vec_sci1_txi_BA84" [label="vec_sci1_txi_BA84\nH'BA84"];
"loc_BAF2" [label="loc_BAF2\nH'BAF2"];
"vec_sci1_eri_BB57" [label="vec_sci1_eri_BB57\nH'BB57"];
"vec_sci1_rxi_BB67" [label="vec_sci1_rxi_BB67\nH'BB67"];
"loc_BBAB" [label="loc_BBAB\nH'BBAB"];
"loc_BE70" [label="loc_BE70\nH'BE70"];
"loc_BE9E" [label="loc_BE9E\nH'BE9E"];
"vec_frt1_ocia_BEEA" [label="vec_frt1_ocia_BEEA\nH'BEEA"];
"vec_frt2_ocia_BF23" [label="vec_frt2_ocia_BF23\nH'BF23"];
"vec_interval_timer_BFC4" [label="vec_interval_timer_BFC4\nH'BFC4"];
"loc_BFE0" [label="loc_BFE0\nH'BFE0"];
"loc_BFFE" [label="loc_BFFE\nH'BFFE"];
"loc_C010" [label="loc_C010\nH'C010"];
"loc_C039" [label="loc_C039\nH'C039"];
"loc_C06A" [label="loc_C06A\nH'C06A"];
"loc_C08B" [label="loc_C08B\nH'C08B"];
"loc_C0DB" [label="loc_C0DB\nH'C0DB"];
"loc_C10C" [label="loc_C10C\nH'C10C"];
"loc_C121" [label="loc_C121\nH'C121"];
"loc_C142" [label="loc_C142\nH'C142"];
"loc_10CE" -> "loc_3ECC" [label="H'10D4"];
"loc_15E0" -> "loc_1705" [label="H'1624"];
"loc_15E0" -> "loc_174D" [label="H'162D"];
"loc_15E0" -> "loc_1795" [label="H'1636"];
"loc_15E0" -> "loc_17C9" [label="H'1649"];
"loc_15E0" -> "loc_17FB" [label="H'1652"];
"loc_15E0" -> "loc_182D" [label="H'165B"];
"loc_15E0" -> "loc_1891" [label="H'1664"];
"loc_15E0" -> "loc_18E7" [label="H'166D"];
"loc_15E0" -> "loc_194A" [label="H'1676"];
"loc_15E0" -> "loc_1979" [label="H'167F"];
"loc_15E0" -> "loc_1B2D" [label="H'1692"];
"loc_15E0" -> "loc_1B44" [label="H'169B"];
"loc_15E0" -> "loc_1B5B" [label="H'16A4"];
"loc_15E0" -> "loc_1B72" [label="H'16C8"];
"loc_15E0" -> "loc_1B89" [label="H'16D1"];
"loc_15E0" -> "loc_1BA0" [label="H'16AD"];
"loc_15E0" -> "loc_1BB6" [label="H'16B6"];
"loc_15E0" -> "loc_1BCC" [label="H'16BF"];
"loc_15E0" -> "loc_1BE2" [label="H'16EC"];
"loc_15E0" -> "loc_1BF8" [label="H'16F5"];
"loc_15E0" -> "loc_2650" [label="H'15E0"];
"loc_15E0" -> "loc_3E54" [label="H'15F6"];
"loc_15E0" -> "loc_4394" [label="H'1605"];
"loc_15E0" -> "loc_4457" [label="H'160E"];
"loc_15E0" -> "loc_451A" [label="H'1617"];
"loc_1705" -> "loc_19A2" [label="H'1741"];
"loc_1705" -> "loc_48FA" [label="H'1731"];
"loc_174D" -> "loc_19A2" [label="H'1789"];
"loc_174D" -> "loc_48FA" [label="H'1779"];
"loc_1795" -> "loc_19A2" [label="H'17BD"];
"loc_1795" -> "loc_2127" [label="H'17A2"];
"loc_17C9" -> "loc_19A2" [label="H'17EF"];
"loc_17FB" -> "loc_19A2" [label="H'1821"];
"loc_182D" -> "loc_19A2" [label="H'1859"];
"loc_1891" -> "loc_19A2" [label="H'18AF"];
"loc_18E7" -> "loc_19A2" [label="H'1913"];
"loc_194A" -> "loc_19A2" [label="H'196A"];
"loc_1979" -> "loc_19DB" [label="H'1997"];
"loc_19DB" -> "loc_3E54" [label="H'1A11"];
"loc_1A35" -> "loc_1A8D" [label="H'1A43"];
"loc_1A35" -> "loc_3E54" [label="H'1A79"];
"loc_1A9C" -> "loc_48FA" [label="H'1AE0"];
"loc_1AE4" -> "loc_48FA" [label="H'1B07"];
"loc_1B0B" -> "loc_48FA" [label="H'1B29"];
"loc_1B2D" -> "loc_1C0E" [label="H'1B38"];
"loc_1B44" -> "loc_1C0E" [label="H'1B4F"];
"loc_1B5B" -> "loc_1C0E" [label="H'1B66"];
"loc_1B72" -> "loc_1C0E" [label="H'1B7D"];
"loc_1B89" -> "loc_1C0E" [label="H'1B94"];
"loc_1BA0" -> "loc_1C0E" [label="H'1BAB"];
"loc_1BB6" -> "loc_1C0E" [label="H'1BC1"];
"loc_1BCC" -> "loc_1C0E" [label="H'1BD7"];
"loc_1BE2" -> "loc_1C0E" [label="H'1BED"];
"loc_1BF8" -> "loc_1C0E" [label="H'1C03"];
"loc_2127" -> "loc_48FA" [label="H'2140"];
"loc_2650" -> "loc_3E54" [label="H'26A8"];
"loc_2806" -> "loc_6206" [label="H'282C"];
"loc_3E54" -> "loc_3FD3" [label="H'3E93"];
"loc_3ECC" -> "loc_3F28" [label="H'3F0E"];
"loc_3ECC" -> "loc_3F40" [label="H'3ED4"];
"loc_3F28" -> "loc_3F40" [label="H'3F36"];
"loc_3FD3" -> "loc_BAF2" [label="H'3FEB"];
"loc_3FEF" -> "loc_400C" [label="H'4003"];
"loc_400C" -> "loc_4075" [label="H'4040"];
"loc_400C" -> "loc_4217" [label="H'4042"];
"loc_4075" -> "loc_4096" [label="H'4093"];
"loc_40BB" -> "loc_BFE0" [label="H'4114"];
"loc_40BB" -> "loc_BFFE" [label="H'41E3"];
"loc_4217" -> "loc_3ECC" [label="H'42C6"];
"loc_4324" -> "loc_10CE" [label="H'4348"];
"loc_4324" -> "loc_3ECC" [label="H'432A"];
"loc_4394" -> "loc_19A2" [label="H'43CA"];
"loc_4394" -> "loc_1A35" [label="H'43D6"];
"loc_4394" -> "loc_1A9C" [label="H'43E2"];
"loc_4394" -> "loc_1AE4" [label="H'43EE"];
"loc_4394" -> "loc_1B0B" [label="H'43FA"];
"loc_4394" -> "loc_442F" [label="H'43CF"];
"loc_4394" -> "loc_48FA" [label="H'4408"];
"loc_4457" -> "loc_19A2" [label="H'448D"];
"loc_4457" -> "loc_1A35" [label="H'4499"];
"loc_4457" -> "loc_1A9C" [label="H'44A5"];
"loc_4457" -> "loc_1AE4" [label="H'44B1"];
"loc_4457" -> "loc_1B0B" [label="H'44BD"];
"loc_4457" -> "loc_44F2" [label="H'4492"];
"loc_4457" -> "loc_48FA" [label="H'44CB"];
"loc_451A" -> "loc_19A2" [label="H'4550"];
"loc_451A" -> "loc_1A35" [label="H'455C"];
"loc_451A" -> "loc_1A9C" [label="H'4568"];
"loc_451A" -> "loc_1AE4" [label="H'4574"];
"loc_451A" -> "loc_1B0B" [label="H'4580"];
"loc_451A" -> "loc_45B5" [label="H'4555"];
"loc_451A" -> "loc_48FA" [label="H'458E"];
"loc_48EF" -> "loc_48FA" [label="H'48F7"];
"loc_48FA" -> "loc_3E54" [label="H'4926"];
"loc_BAF2" -> "loc_6206" [label="H'BB0E"];
"loc_BAF2" -> "loc_BA26" [label="H'BB43"];
"loc_BBAB" -> "loc_622B" [label="H'BC01"];
"loc_BBAB" -> "loc_BA26" [label="H'BCFA"];
"loc_BBAB" -> "loc_BE70" [label="H'BC86"];
"loc_BBAB" -> "loc_BFE0" [label="H'BD5F"];
"loc_BE9E" -> "loc_BA26" [label="H'BED5"];
"vec_frt2_ocia_BF23" -> "loc_48EF" [label="H'BF68"];
"loc_BFE0" -> "loc_C010" [label="H'BFE7"];
"loc_BFE0" -> "loc_C039" [label="H'BFE9"];
"loc_BFFE" -> "loc_C039" [label="H'C003"];
"loc_C010" -> "loc_C06A" [label="H'C010"];
"loc_C010" -> "loc_C08B" [label="H'C01D"];
"loc_C010" -> "loc_C121" [label="H'C018"];
"loc_C010" -> "loc_C142" [label="H'C035"];
"loc_C039" -> "loc_C06A" [label="H'C039"];
"loc_C039" -> "loc_C08B" [label="H'C046"];
"loc_C039" -> "loc_C0DB" [label="H'C05B"];
"loc_C039" -> "loc_C10C" [label="H'C060"];
"loc_C039" -> "loc_C121" [label="H'C041"];
"loc_C039" -> "loc_C142" [label="H'C066"];
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -59,3 +59,103 @@ def collect_labels(instructions: Iterable[Instruction], vectors: dict[int, tuple
for target in ins.targets:
labels.setdefault(target, label_for(target))
return labels
def collect_function_entries(
instructions: Iterable[Instruction],
vectors: dict[int, tuple[str, int]],
) -> set[int]:
entries = {target for _name, target in vectors.values()}
for ins in instructions:
if ins.kind == "call":
entries.update(ins.targets)
return entries
def assign_functions(instructions: dict[int, Instruction], entries: set[int]) -> dict[int, int]:
owners: dict[int, int] = {}
current: int | None = None
for address in sorted(instructions):
if address in entries:
current = address
if current is not None:
owners[address] = current
if instructions[address].kind in {"return", "rte", "sleep"}:
current = None
return owners
def build_functions(
instructions: dict[int, Instruction],
vectors: dict[int, tuple[str, int]],
labels: dict[int, str],
) -> list[dict[str, object]]:
entries = collect_function_entries(instructions.values(), vectors)
owners = assign_functions(instructions, entries)
vector_sources: dict[int, list[str]] = {}
for _vector_addr, (name, target) in vectors.items():
vector_sources.setdefault(target, []).append(name)
functions: dict[int, dict[str, object]] = {}
for address, owner in owners.items():
ins = instructions[address]
function = functions.setdefault(
owner,
{
"start": owner,
"label": labels.get(owner, label_for(owner)),
"sources": vector_sources.get(owner, []),
"instruction_count": 0,
"end": owner,
"calls": [],
"unresolved_calls": 0,
},
)
function["instruction_count"] = int(function["instruction_count"]) + 1
function["end"] = max(int(function["end"]), ins.address + max(ins.size, 1) - 1)
if ins.kind == "call":
if ins.targets:
calls = function["calls"]
assert isinstance(calls, list)
for target in ins.targets:
if target not in calls:
calls.append(target)
else:
function["unresolved_calls"] = int(function["unresolved_calls"]) + 1
return [functions[start] for start in sorted(functions)]
def build_call_graph(
instructions: dict[int, Instruction],
vectors: dict[int, tuple[str, int]],
labels: dict[int, str],
) -> dict[str, object]:
entries = collect_function_entries(instructions.values(), vectors)
owners = assign_functions(instructions, entries)
nodes = build_functions(instructions, vectors, labels)
edges: list[dict[str, object]] = []
seen: set[tuple[int, int]] = set()
for ins in instructions.values():
if ins.kind != "call" or not ins.targets:
continue
source = owners.get(ins.address)
if source is None:
continue
for target in ins.targets:
key = (source, target)
if key in seen:
continue
seen.add(key)
edges.append(
{
"from": source,
"from_label": labels.get(source, label_for(source)),
"to": target,
"to_label": labels.get(target, label_for(target)),
"call_site": ins.address,
},
)
return {"nodes": nodes, "edges": sorted(edges, key=lambda edge: (edge["from"], edge["to"]))}

View File

@@ -3,10 +3,11 @@ from __future__ import annotations
import argparse
from pathlib import Path
from .analysis import collect_labels, linear_sweep, trace
from .analysis import build_call_graph, collect_labels, linear_sweep, trace
from .data_analysis import analyze_unreached_data
from .decoder import H8536Decoder
from .formatting import parse_int
from .render import format_listing, write_json
from .render import format_callgraph_dot, format_listing, write_json
from .rom import Rom
from .vectors import read_dtc_vectors_max, read_dtc_vectors_min, read_vectors_max, read_vectors_min
@@ -30,6 +31,7 @@ def main() -> int:
parser.add_argument("--entry", type=parse_int, action="append", default=[], help="extra entry point to trace")
parser.add_argument("--br", type=parse_int, default=None, help="optional BR value for @aa:8 short absolute operands")
parser.add_argument("--linear", action="store_true", help="linear-sweep the selected range instead of tracing from vectors")
parser.add_argument("--callgraph-dot", type=Path, default=None, help="optional Graphviz DOT call graph output")
args = parser.parse_args()
data = args.rom.read_bytes()
@@ -58,6 +60,8 @@ def main() -> int:
else:
instructions = trace(decoder, starts, args.start, end)
labels.update(collect_labels(instructions.values(), vectors))
data_candidates = analyze_unreached_data(rom, instructions, args.start, end)
call_graph = build_call_graph(instructions, vectors, labels)
args.out.parent.mkdir(parents=True, exist_ok=True)
args.out.write_text(
@@ -70,15 +74,29 @@ def main() -> int:
args.mode,
traced=not args.linear,
dtc_vectors=dtc_vectors,
data_candidates=data_candidates,
),
encoding="utf-8",
)
if args.json:
args.json.parent.mkdir(parents=True, exist_ok=True)
write_json(args.json, instructions, vectors, labels, dtc_vectors=dtc_vectors)
write_json(
args.json,
instructions,
vectors,
labels,
dtc_vectors=dtc_vectors,
data_candidates=data_candidates,
call_graph=call_graph,
)
if args.callgraph_dot:
args.callgraph_dot.parent.mkdir(parents=True, exist_ok=True)
args.callgraph_dot.write_text(format_callgraph_dot(call_graph), encoding="utf-8")
invalid = sum(1 for ins in instructions.values() if not ins.valid)
print(f"wrote {args.out} ({len(instructions)} items, {invalid} invalid/data bytes)")
if args.json:
print(f"wrote {args.json}")
if args.callgraph_dot:
print(f"wrote {args.callgraph_dot}")
return 0

126
h8536/data_analysis.py Normal file
View File

@@ -0,0 +1,126 @@
from __future__ import annotations
from .memory import region_for
from .model import Instruction
from .rom import Rom
PRINTABLE_ASCII = set(range(0x20, 0x7F))
def _occupied_addresses(instructions: dict[int, Instruction]) -> set[int]:
occupied: set[int] = set()
for ins in instructions.values():
occupied.update(range(ins.address, ins.address + max(ins.size, 1)))
return occupied
def _unoccupied_ranges(start: int, end: int, occupied: set[int]) -> list[tuple[int, int]]:
ranges: list[tuple[int, int]] = []
run_start: int | None = None
for address in range(start, end):
if address in occupied:
if run_start is not None:
ranges.append((run_start, address))
run_start = None
continue
if run_start is None:
run_start = address
if run_start is not None:
ranges.append((run_start, end))
return ranges
def _is_pointer_target(target: int, known_targets: set[int]) -> bool:
if target in (0x0000, 0xFFFF):
return False
if target in known_targets:
return True
region = region_for(target)
return region.kind in {"ram", "registers"}
def find_ascii_strings(
rom: Rom,
ranges: list[tuple[int, int]],
min_length: int = 6,
max_candidates: int = 200,
) -> list[dict[str, object]]:
candidates: list[dict[str, object]] = []
for start, end in ranges:
address = start
while address < end:
if rom.u8(address) not in PRINTABLE_ASCII:
address += 1
continue
text_start = address
raw = bytearray()
while address < end and rom.u8(address) in PRINTABLE_ASCII:
raw.append(rom.u8(address))
address += 1
if len(raw) >= min_length:
text = raw.decode("ascii", errors="replace")
candidates.append(
{
"address": text_start,
"length": len(raw),
"text": text,
"terminated": address < end and rom.u8(address) == 0,
},
)
if len(candidates) >= max_candidates:
return candidates
address += 1
return candidates
def find_pointer_tables(
rom: Rom,
ranges: list[tuple[int, int]],
known_targets: set[int],
min_entries: int = 3,
max_candidates: int = 200,
) -> list[dict[str, object]]:
candidates: list[dict[str, object]] = []
for start, end in ranges:
address = start if start % 2 == 0 else start + 1
while address + 2 <= end:
entries: list[int] = []
cursor = address
while cursor + 2 <= end:
target = rom.u16(cursor)
if not _is_pointer_target(target, known_targets):
break
entries.append(target)
cursor += 2
if len(entries) >= min_entries:
candidates.append(
{
"address": address,
"entry_size": 2,
"count": len(entries),
"targets": entries,
"target_regions": [region_for(target).name for target in entries],
},
)
if len(candidates) >= max_candidates:
return candidates
address = cursor
else:
address += 2
return candidates
def analyze_unreached_data(
rom: Rom,
instructions: dict[int, Instruction],
start: int,
end: int,
) -> dict[str, list[dict[str, object]]]:
occupied = _occupied_addresses(instructions)
known_targets = set(instructions)
ranges = _unoccupied_ranges(max(start, 0x0100), min(end, rom.end), occupied)
return {
"strings": find_ascii_strings(rom, ranges),
"pointer_tables": find_pointer_tables(rom, ranges, known_targets),
}

View File

@@ -77,6 +77,113 @@ def _bitfield_values(address: int, value: int) -> str:
return " ".join(parts)
def _adcsr_semantics(value: int) -> str:
channels_single = ["AN0", "AN1", "AN2", "AN3", "AN4", "AN5", "AN6", "AN7"]
channels_scan = ["AN0", "AN0-AN1", "AN0-AN2", "AN0-AN3", "AN4", "AN4-AN5", "AN4-AN6", "AN4-AN7"]
scan = bool(value & 0x10)
channels = channels_scan[value & 0x07] if scan else channels_single[value & 0x07]
mode = "scan" if scan else "single"
state = "start" if value & 0x20 else "halt"
interrupt = "ADI enabled" if value & 0x40 else "ADI disabled"
conversion = "138-state max" if value & 0x08 else "274-state max"
return f"A/D {state}, {mode} {channels}, {conversion}, {interrupt}"
def _sci_smr_semantics(value: int) -> str:
mode = "sync" if value & 0x80 else "async"
char_len = "7-bit" if value & 0x40 else "8-bit"
parity = "odd parity" if value & 0x10 else "even parity"
parity = parity if value & 0x20 else "no parity"
stop = "2 stop" if value & 0x08 else "1 stop"
clock = ["phi", "phi/4", "phi/16", "phi/64"][value & 0x03]
return f"SCI {mode}, {char_len}, {parity}, {stop}, clock {clock}"
def _sci_scr_semantics(value: int) -> str:
enabled: list[str] = []
if value & 0x20:
enabled.append("TX")
if value & 0x10:
enabled.append("RX")
if value & 0x80:
enabled.append("TXI")
if value & 0x40:
enabled.append("RXI/ERI")
clock = "external clock" if value & 0x02 else "internal clock"
if value & 0x01:
clock += ", SCK output"
return f"SCI enables {','.join(enabled) if enabled else 'none'}, {clock}"
def _wcr_semantics(value: int) -> str:
modes = ["programmable wait", "no wait states", "pin wait", "pin auto-wait"]
counts = ["0 waits", "1 wait", "2 waits", "3 waits"]
return f"{modes[(value >> 2) & 0x03]}, {counts[value & 0x03]}"
def _wdt_semantics(value: int) -> str:
if value <= 0xFF:
data = value
return _wdt_tcsr_data_semantics(data)
password = (value >> 8) & 0xFF
data = value & 0xFF
if password == 0xA5:
return f"TCSR password H'A5, {_wdt_tcsr_data_semantics(data)}"
if password == 0x5A:
return f"TCNT password H'5A, counter write {h8(data)}"
return f"watchdog password {h8(password)}, data {h8(data)}"
def _wdt_tcsr_data_semantics(value: int) -> str:
clock = ["phi/2", "phi/32", "phi/64", "phi/128", "phi/256", "phi/512", "phi/2048", "phi/4096"][value & 0x07]
mode = "watchdog NMI" if value & 0x40 else "interval IRQ0"
state = "enabled" if value & 0x20 else "disabled"
return f"WDT {state}, {mode}, clock {clock}"
def _rstcsr_semantics(value: int) -> str:
if value <= 0xFF:
data = value
password = None
else:
password = (value >> 8) & 0xFF
data = value & 0xFF
if password == 0xA5:
return f"RSTCSR password H'A5, clear WRST with data {h8(data)}"
if password == 0x5A:
rstoe = "RES output enabled" if data & 0x40 else "RES output disabled"
return f"RSTCSR password H'5A, {rstoe}"
return "RSTCSR status/control"
def _semantic_values(address: int, value: int) -> str:
if address == 0xFEE8:
return _adcsr_semantics(value)
if address in (0xFED8, 0xFEF0):
return _sci_smr_semantics(value)
if address in (0xFEDA, 0xFEF2):
return _sci_scr_semantics(value)
if address == 0xFEFC:
brle = "bus release pins enabled" if value & 0x08 else "P12/P13 are I/O"
irq0 = "IRQ0 enabled" if value & 0x20 else "IRQ0 disabled"
irq1 = "IRQ1 enabled" if value & 0x40 else "IRQ1 disabled"
return f"{brle}, {irq0}, {irq1}"
if address == 0xFEFD:
enabled = [name for bit, name in ((0, "SCI2 pins"), (1, "P9 PWM"), (2, "P6 PWM"), (3, "IRQ2"), (4, "IRQ3"), (5, "IRQ4"), (6, "IRQ5")) if value & (1 << bit)]
return "enabled " + ", ".join(enabled) if enabled else "alternate pin functions disabled"
if address == 0xFEEC:
return _wdt_semantics(value)
if address == 0xFF10:
return _wcr_semantics(value)
if address == 0xFF11:
return "on-chip RAM enabled" if value & 0x80 else "on-chip RAM disabled"
if address == 0xFF12:
return f"mode select bits MDS={value & 0x07}"
if address == 0xFF14:
return _rstcsr_semantics(value)
return ""
def write_comment(ea: EA, value: int | None) -> str:
if ea.address is None or ea.address not in IO_REGISTERS:
return ""
@@ -85,7 +192,9 @@ def write_comment(ea: EA, value: int | None) -> str:
return name
text = f"{name} = {h16(value) if value > 0xFF else h8(value)}"
fields = _bitfield_values(ea.address, value)
return f"{text} ({fields})" if fields else text
semantic = _semantic_values(ea.address, value)
details = "; ".join(part for part in (fields, semantic) if part)
return f"{text} ({details})" if details else text
def bit_comment(mnemonic: str, ea: EA, bit: int) -> str:

View File

@@ -23,7 +23,7 @@ class MemoryRegion:
MEMORY_REGIONS: tuple[MemoryRegion, ...] = (
MemoryRegion("exception_vectors", 0x0000, 0x009F, "vectors", "section 2 address space"),
MemoryRegion("dtc_vectors", 0x00A0, 0x00FF, "dtc_vectors", "section 2 address space"),
MemoryRegion("on_chip_rom", 0x0100, 0xF67F, "rom", "section 17 ROM"),
MemoryRegion("program_or_external", 0x0100, 0xF67F, "program", "section 2/17 mode-dependent ROM or external space"),
MemoryRegion("on_chip_ram", 0xF680, 0xFE7F, "ram", "section 16 RAM"),
MemoryRegion("register_field", 0xFE80, 0xFFFF, "registers", "appendix B register map"),
)

View File

@@ -4,11 +4,22 @@ import json
from pathlib import Path
from .formatting import h16, label_for
from .memory import MEMORY_REGIONS, region_for
from .model import Instruction
from .rom import Rom
from .tables import IO_REGISTERS
from .vectors import DtcVectorEntry
def _reference_comment(ins: Instruction) -> str:
parts: list[str] = []
for address in ins.references:
region = region_for(address)
name = IO_REGISTERS.get(address, h16(address))
parts.append(f"{name} in {region.name}")
return "refs " + ", ".join(parts) if parts else ""
def format_listing(
rom_path: Path,
rom: Rom,
@@ -18,6 +29,7 @@ def format_listing(
mode: str,
traced: bool,
dtc_vectors: dict[int, DtcVectorEntry] | None = None,
data_candidates: dict[str, list[dict[str, object]]] | None = None,
) -> str:
lines: list[str] = []
lines.append("; H8/536 ROM disassembly")
@@ -30,6 +42,11 @@ def format_listing(
lines.append("; - H8/536 uses the H8/500 CPU instruction set.")
lines.append("; - In minimum mode the reset vector at H'0000-H'0001 is a 16-bit PC.")
lines.append("; - The register field is H'FE80-H'FFFF; names below come from appendix B.")
lines.append("; - @aa:8 short absolute operands use BR as the upper address byte.")
lines.append("")
lines.append("; Memory Map")
for region in MEMORY_REGIONS:
lines.append(f"; {h16(region.start)}-{h16(region.end)} {region.name:<18} {region.kind}")
lines.append("")
lines.append("; Vectors")
for vector_addr, (name, target) in sorted(vectors.items()):
@@ -43,6 +60,23 @@ def format_listing(
lines.append(f"; {h16(vector_addr)} {entry['source']:<24} -> {h16(target)}")
lines.append("")
if data_candidates:
strings = data_candidates.get("strings", [])
pointer_tables = data_candidates.get("pointer_tables", [])
if strings or pointer_tables:
lines.append("; Unreached Data Candidates")
for item in strings[:40]:
lines.append(
f"; string {h16(int(item['address'])):<8} len={item['length']:<3} {item['text']!r}",
)
for item in pointer_tables[:40]:
targets = ", ".join(h16(int(target)) for target in item["targets"][:8])
suffix = " ..." if int(item["count"]) > 8 else ""
lines.append(
f"; ptrtbl {h16(int(item['address'])):<8} count={item['count']:<3} -> {targets}{suffix}",
)
lines.append("")
for address in sorted(instructions):
ins = instructions[address]
if address in labels:
@@ -50,7 +84,8 @@ def format_listing(
lines.append(f"{labels[address]}:")
raw = " ".join(f"{byte:02X}" for byte in ins.raw)
padded_raw = raw.ljust(14)
comment = f" ; {ins.comment}" if ins.comment else ""
comment_parts = [part for part in (ins.comment, _reference_comment(ins) if not ins.comment else "") if part]
comment = f" ; {'; '.join(comment_parts)}" if comment_parts else ""
lines.append(f"{address:04X}: {padded_raw} {ins.text}{comment}")
lines.append("")
return "\n".join(lines)
@@ -62,6 +97,8 @@ def write_json(
vectors: dict[int, tuple[str, int]],
labels: dict[int, str],
dtc_vectors: dict[int, DtcVectorEntry] | None = None,
data_candidates: dict[str, list[dict[str, object]]] | None = None,
call_graph: dict[str, object] | None = None,
) -> None:
payload = {
"vectors": [
@@ -69,15 +106,37 @@ def write_json(
for addr, (name, target) in sorted(vectors.items())
],
"dtc_vectors": list((dtc_vectors or {}).values()),
"memory_regions": [
{
"name": region.name,
"start": region.start,
"end": region.end,
"kind": region.kind,
"manual": region.manual,
}
for region in MEMORY_REGIONS
],
"data_candidates": data_candidates or {"strings": [], "pointer_tables": []},
"call_graph": call_graph or {"nodes": [], "edges": []},
"instructions": [
{
"address": ins.address,
"address_region": region_for(ins.address).name,
"bytes": ins.raw.hex().upper(),
"text": ins.text,
"mnemonic": ins.mnemonic,
"operands": ins.operands,
"kind": ins.kind,
"targets": ins.targets,
"references": [
{
"address": address,
"name": IO_REGISTERS.get(address),
"region": region_for(address).name,
"kind": region_for(address).kind,
}
for address in ins.references
],
"comment": ins.comment,
"valid": ins.valid,
}
@@ -85,3 +144,16 @@ def write_json(
],
}
path.write_text(json.dumps(payload, indent=2), encoding="utf-8")
def format_callgraph_dot(call_graph: dict[str, object]) -> str:
lines = ["digraph callgraph {"]
lines.append(' graph [rankdir="LR"];')
for node in call_graph.get("nodes", []):
label = node["label"]
lines.append(f' "{label}" [label="{label}\\n{h16(int(node["start"]))}"];')
for edge in call_graph.get("edges", []):
lines.append(f' "{edge["from_label"]}" -> "{edge["to_label"]}" [label="{h16(int(edge["call_site"]))}"];')
lines.append("}")
lines.append("")
return "\n".join(lines)

View File

@@ -229,6 +229,14 @@ IO_BITFIELDS: dict[int, dict[int, str]] = {
1: "CH1",
0: "CH0",
},
0xFEEC: {
7: "OVF",
6: "WT/IT",
5: "TME",
2: "CKS2",
1: "CKS1",
0: "CKS0",
},
0xFEF0: _SCI_SMR_BITS,
0xFEF2: _SCI_SCR_BITS,
0xFEF4: _SCI_SSR_BITS,
@@ -261,6 +269,14 @@ IO_BITFIELDS: dict[int, dict[int, str]] = {
1: "MDS1",
0: "MDS0",
},
0xFF14: {
7: "WRST",
6: "RSTOE",
},
0xFF15: {
7: "WRST",
6: "RSTOE",
},
}

View File

@@ -0,0 +1,41 @@
import unittest
from h8536.analysis import build_call_graph
from h8536.data_analysis import analyze_unreached_data
from h8536.memory import region_for
from h8536.model import Instruction
from h8536.rom import Rom
class AnalysisOutputsTest(unittest.TestCase):
def test_memory_regions_distinguish_ram_and_register_field(self):
self.assertEqual(region_for(0xF680).name, "on_chip_ram")
self.assertEqual(region_for(0xFE80).name, "register_field")
self.assertEqual(region_for(0x0100).name, "program_or_external")
def test_unreached_data_scan_finds_ascii_string_candidate(self):
data = bytearray([0xFF] * 0x120)
data[0x100:0x106] = b"HELLO!"
candidates = analyze_unreached_data(Rom(bytes(data)), {}, 0, len(data))
self.assertEqual(candidates["strings"][0]["address"], 0x100)
self.assertEqual(candidates["strings"][0]["text"], "HELLO!")
def test_call_graph_records_direct_calls_by_function_owner(self):
instructions = {
0x0100: Instruction(0x0100, b"\x18\x02\x00", "JSR", "@loc_0200", kind="call", targets=[0x0200]),
0x0103: Instruction(0x0103, b"\x19", "RTS", kind="return", fallthrough=False),
0x0200: Instruction(0x0200, b"\x19", "RTS", kind="return", fallthrough=False),
}
vectors = {0x0000: ("reset", 0x0100)}
labels = {0x0100: "vec_reset_0100", 0x0200: "loc_0200"}
graph = build_call_graph(instructions, vectors, labels)
self.assertEqual(graph["edges"][0]["from"], 0x0100)
self.assertEqual(graph["edges"][0]["to"], 0x0200)
if __name__ == "__main__":
unittest.main()