Updates
This commit is contained in:
126
h8536/data_analysis.py
Normal file
126
h8536/data_analysis.py
Normal file
@@ -0,0 +1,126 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from .memory import region_for
|
||||
from .model import Instruction
|
||||
from .rom import Rom
|
||||
|
||||
|
||||
PRINTABLE_ASCII = set(range(0x20, 0x7F))
|
||||
|
||||
|
||||
def _occupied_addresses(instructions: dict[int, Instruction]) -> set[int]:
|
||||
occupied: set[int] = set()
|
||||
for ins in instructions.values():
|
||||
occupied.update(range(ins.address, ins.address + max(ins.size, 1)))
|
||||
return occupied
|
||||
|
||||
|
||||
def _unoccupied_ranges(start: int, end: int, occupied: set[int]) -> list[tuple[int, int]]:
|
||||
ranges: list[tuple[int, int]] = []
|
||||
run_start: int | None = None
|
||||
for address in range(start, end):
|
||||
if address in occupied:
|
||||
if run_start is not None:
|
||||
ranges.append((run_start, address))
|
||||
run_start = None
|
||||
continue
|
||||
if run_start is None:
|
||||
run_start = address
|
||||
if run_start is not None:
|
||||
ranges.append((run_start, end))
|
||||
return ranges
|
||||
|
||||
|
||||
def _is_pointer_target(target: int, known_targets: set[int]) -> bool:
|
||||
if target in (0x0000, 0xFFFF):
|
||||
return False
|
||||
if target in known_targets:
|
||||
return True
|
||||
region = region_for(target)
|
||||
return region.kind in {"ram", "registers"}
|
||||
|
||||
|
||||
def find_ascii_strings(
|
||||
rom: Rom,
|
||||
ranges: list[tuple[int, int]],
|
||||
min_length: int = 6,
|
||||
max_candidates: int = 200,
|
||||
) -> list[dict[str, object]]:
|
||||
candidates: list[dict[str, object]] = []
|
||||
for start, end in ranges:
|
||||
address = start
|
||||
while address < end:
|
||||
if rom.u8(address) not in PRINTABLE_ASCII:
|
||||
address += 1
|
||||
continue
|
||||
text_start = address
|
||||
raw = bytearray()
|
||||
while address < end and rom.u8(address) in PRINTABLE_ASCII:
|
||||
raw.append(rom.u8(address))
|
||||
address += 1
|
||||
if len(raw) >= min_length:
|
||||
text = raw.decode("ascii", errors="replace")
|
||||
candidates.append(
|
||||
{
|
||||
"address": text_start,
|
||||
"length": len(raw),
|
||||
"text": text,
|
||||
"terminated": address < end and rom.u8(address) == 0,
|
||||
},
|
||||
)
|
||||
if len(candidates) >= max_candidates:
|
||||
return candidates
|
||||
address += 1
|
||||
return candidates
|
||||
|
||||
|
||||
def find_pointer_tables(
|
||||
rom: Rom,
|
||||
ranges: list[tuple[int, int]],
|
||||
known_targets: set[int],
|
||||
min_entries: int = 3,
|
||||
max_candidates: int = 200,
|
||||
) -> list[dict[str, object]]:
|
||||
candidates: list[dict[str, object]] = []
|
||||
for start, end in ranges:
|
||||
address = start if start % 2 == 0 else start + 1
|
||||
while address + 2 <= end:
|
||||
entries: list[int] = []
|
||||
cursor = address
|
||||
while cursor + 2 <= end:
|
||||
target = rom.u16(cursor)
|
||||
if not _is_pointer_target(target, known_targets):
|
||||
break
|
||||
entries.append(target)
|
||||
cursor += 2
|
||||
if len(entries) >= min_entries:
|
||||
candidates.append(
|
||||
{
|
||||
"address": address,
|
||||
"entry_size": 2,
|
||||
"count": len(entries),
|
||||
"targets": entries,
|
||||
"target_regions": [region_for(target).name for target in entries],
|
||||
},
|
||||
)
|
||||
if len(candidates) >= max_candidates:
|
||||
return candidates
|
||||
address = cursor
|
||||
else:
|
||||
address += 2
|
||||
return candidates
|
||||
|
||||
|
||||
def analyze_unreached_data(
|
||||
rom: Rom,
|
||||
instructions: dict[int, Instruction],
|
||||
start: int,
|
||||
end: int,
|
||||
) -> dict[str, list[dict[str, object]]]:
|
||||
occupied = _occupied_addresses(instructions)
|
||||
known_targets = set(instructions)
|
||||
ranges = _unoccupied_ranges(max(start, 0x0100), min(end, rom.end), occupied)
|
||||
return {
|
||||
"strings": find_ascii_strings(rom, ranges),
|
||||
"pointer_tables": find_pointer_tables(rom, ranges, known_targets),
|
||||
}
|
||||
Reference in New Issue
Block a user