from __future__ import annotations from .memory import region_for from .model import Instruction from .rom import Rom PRINTABLE_ASCII = set(range(0x20, 0x7F)) def _occupied_addresses(instructions: dict[int, Instruction]) -> set[int]: occupied: set[int] = set() for ins in instructions.values(): occupied.update(range(ins.address, ins.address + max(ins.size, 1))) return occupied def _unoccupied_ranges(start: int, end: int, occupied: set[int]) -> list[tuple[int, int]]: ranges: list[tuple[int, int]] = [] run_start: int | None = None for address in range(start, end): if address in occupied: if run_start is not None: ranges.append((run_start, address)) run_start = None continue if run_start is None: run_start = address if run_start is not None: ranges.append((run_start, end)) return ranges def _is_pointer_target(target: int, known_targets: set[int]) -> bool: if target in (0x0000, 0xFFFF): return False if target in known_targets: return True region = region_for(target) return region.kind in {"ram", "registers"} def find_ascii_strings( rom: Rom, ranges: list[tuple[int, int]], min_length: int = 6, max_candidates: int = 200, ) -> list[dict[str, object]]: candidates: list[dict[str, object]] = [] for start, end in ranges: address = start while address < end: if rom.u8(address) not in PRINTABLE_ASCII: address += 1 continue text_start = address raw = bytearray() while address < end and rom.u8(address) in PRINTABLE_ASCII: raw.append(rom.u8(address)) address += 1 if len(raw) >= min_length: text = raw.decode("ascii", errors="replace") candidates.append( { "address": text_start, "length": len(raw), "text": text, "terminated": address < end and rom.u8(address) == 0, }, ) if len(candidates) >= max_candidates: return candidates address += 1 return candidates def find_pointer_tables( rom: Rom, ranges: list[tuple[int, int]], known_targets: set[int], min_entries: int = 3, max_candidates: int = 200, ) -> list[dict[str, object]]: candidates: list[dict[str, object]] = [] for start, end in ranges: address = start if start % 2 == 0 else start + 1 while address + 2 <= end: entries: list[int] = [] cursor = address while cursor + 2 <= end: target = rom.u16(cursor) if not _is_pointer_target(target, known_targets): break entries.append(target) cursor += 2 if len(entries) >= min_entries: candidates.append( { "address": address, "entry_size": 2, "count": len(entries), "targets": entries, "target_regions": [region_for(target).name for target in entries], }, ) if len(candidates) >= max_candidates: return candidates address = cursor else: address += 2 return candidates def analyze_unreached_data( rom: Rom, instructions: dict[int, Instruction], start: int, end: int, ) -> dict[str, list[dict[str, object]]]: occupied = _occupied_addresses(instructions) known_targets = set(instructions) ranges = _unoccupied_ranges(max(start, 0x0100), min(end, rom.end), occupied) return { "strings": find_ascii_strings(rom, ranges), "pointer_tables": find_pointer_tables(rom, ranges, known_targets), }