1
0
Files
h8-536-decoder/h8536/data_analysis.py
2026-05-25 13:47:13 +10:00

127 lines
4.0 KiB
Python

from __future__ import annotations
from .memory import region_for
from .model import Instruction
from .rom import Rom
PRINTABLE_ASCII = set(range(0x20, 0x7F))
def _occupied_addresses(instructions: dict[int, Instruction]) -> set[int]:
occupied: set[int] = set()
for ins in instructions.values():
occupied.update(range(ins.address, ins.address + max(ins.size, 1)))
return occupied
def _unoccupied_ranges(start: int, end: int, occupied: set[int]) -> list[tuple[int, int]]:
ranges: list[tuple[int, int]] = []
run_start: int | None = None
for address in range(start, end):
if address in occupied:
if run_start is not None:
ranges.append((run_start, address))
run_start = None
continue
if run_start is None:
run_start = address
if run_start is not None:
ranges.append((run_start, end))
return ranges
def _is_pointer_target(target: int, known_targets: set[int]) -> bool:
if target in (0x0000, 0xFFFF):
return False
if target in known_targets:
return True
region = region_for(target)
return region.kind in {"ram", "registers"}
def find_ascii_strings(
rom: Rom,
ranges: list[tuple[int, int]],
min_length: int = 6,
max_candidates: int = 200,
) -> list[dict[str, object]]:
candidates: list[dict[str, object]] = []
for start, end in ranges:
address = start
while address < end:
if rom.u8(address) not in PRINTABLE_ASCII:
address += 1
continue
text_start = address
raw = bytearray()
while address < end and rom.u8(address) in PRINTABLE_ASCII:
raw.append(rom.u8(address))
address += 1
if len(raw) >= min_length:
text = raw.decode("ascii", errors="replace")
candidates.append(
{
"address": text_start,
"length": len(raw),
"text": text,
"terminated": address < end and rom.u8(address) == 0,
},
)
if len(candidates) >= max_candidates:
return candidates
address += 1
return candidates
def find_pointer_tables(
rom: Rom,
ranges: list[tuple[int, int]],
known_targets: set[int],
min_entries: int = 3,
max_candidates: int = 200,
) -> list[dict[str, object]]:
candidates: list[dict[str, object]] = []
for start, end in ranges:
address = start if start % 2 == 0 else start + 1
while address + 2 <= end:
entries: list[int] = []
cursor = address
while cursor + 2 <= end:
target = rom.u16(cursor)
if not _is_pointer_target(target, known_targets):
break
entries.append(target)
cursor += 2
if len(entries) >= min_entries:
candidates.append(
{
"address": address,
"entry_size": 2,
"count": len(entries),
"targets": entries,
"target_regions": [region_for(target).name for target in entries],
},
)
if len(candidates) >= max_candidates:
return candidates
address = cursor
else:
address += 2
return candidates
def analyze_unreached_data(
rom: Rom,
instructions: dict[int, Instruction],
start: int,
end: int,
) -> dict[str, list[dict[str, object]]]:
occupied = _occupied_addresses(instructions)
known_targets = set(instructions)
ranges = _unoccupied_ranges(max(start, 0x0100), min(end, rom.end), occupied)
return {
"strings": find_ascii_strings(rom, ranges),
"pointer_tables": find_pointer_tables(rom, ranges, known_targets),
}