1
0
Files
h8-536-decoder/h8536/symbols.py

381 lines
12 KiB
Python

from __future__ import annotations
from collections.abc import Iterable, Mapping, Sequence
from dataclasses import dataclass, field
from .memory import MEMORY_REGIONS, MemoryRegion, region_for
from .model import Instruction
from .tables import IO_REGISTERS
READ_ONLY_ROOTS = {"BTST", "CMP:E", "CMP:G", "CMP:I", "MOVFPE", "TST"}
WRITE_ONLY_ROOTS = {"CLR", "MOVTPE", "STC"}
READ_MODIFY_WRITE_ROOTS = {"ADD:Q", "BCLR", "BNOT", "BSET", "NEG", "NOT", "TAS"}
DESTINATION_UPDATE_ROOTS = {
"ADD:G",
"ADDS",
"ADDX",
"AND",
"OR",
"ROTL",
"ROTR",
"ROTXL",
"ROTXR",
"SHAL",
"SHAR",
"SHLL",
"SHLR",
"SUB",
"SUBS",
"SUBX",
"XOR",
}
MOV_ROOTS = {"MOV:E", "MOV:F", "MOV:G", "MOV:I", "MOV:L", "MOV:S"}
@dataclass
class _Symbol:
address: int
name: str
region: str
kind: str
access_count: int = 0
read_count: int = 0
write_count: int = 0
unknown_count: int = 0
widths: set[str] = field(default_factory=set)
first_access: int | None = None
last_access: int | None = None
accesses: list[dict[str, object]] = field(default_factory=list)
xrefs: list[dict[str, object]] = field(default_factory=list)
def record_access(self, access: Mapping[str, object]) -> None:
instruction_address = int(access["instruction_address"])
direction = str(access["direction"])
width = access.get("width")
self.access_count += 1
if direction == "read":
self.read_count += 1
elif direction == "write":
self.write_count += 1
elif direction == "read_write":
self.read_count += 1
self.write_count += 1
else:
self.unknown_count += 1
if isinstance(width, str):
self.widths.add(width)
if self.first_access is None or instruction_address < self.first_access:
self.first_access = instruction_address
if self.last_access is None or instruction_address > self.last_access:
self.last_access = instruction_address
self.accesses.append(dict(access))
def record_xref(self, xref: Mapping[str, object]) -> None:
self.xrefs.append(dict(xref))
def discover_symbols(
instructions: Mapping[int, Instruction] | Iterable[Instruction],
regions: Sequence[MemoryRegion | Mapping[str, object]] | None = None,
*,
include_registers: bool = False,
data_candidates: Mapping[str, object] | None = None,
) -> dict[str, object]:
"""Discover conservative memory symbols from decoded instruction references.
The analyzer is intentionally standalone: it consumes decoded instructions and
region metadata, then returns a JSON-friendly payload that later renderers can
use without changing decode semantics.
"""
active_regions = tuple(regions or MEMORY_REGIONS)
symbols: dict[int, _Symbol] = {}
for ins in _instruction_sequence(instructions):
for access in instruction_accesses(ins):
address = int(access["address"])
region = _region_for(address, active_regions)
if _skip_region(region, include_registers):
continue
symbol = symbols.setdefault(address, _new_symbol(address, region))
symbol.record_access(access)
_record_data_candidate_xrefs(symbols, data_candidates, active_regions, include_registers)
public_symbols = [_public_symbol(symbols[address]) for address in sorted(symbols)]
return {
"symbols": public_symbols,
"by_address": {address: symbols[address].name for address in sorted(symbols)},
}
def instruction_accesses(ins: Instruction) -> list[dict[str, object]]:
"""Return per-reference access metadata for one instruction."""
if not ins.references:
return []
refs = list(ins.references)
operands = _split_operands(ins.operands)
width = _width_hint(ins.mnemonic)
accesses: list[dict[str, object]] = []
for address in refs:
operand_index = _operand_index_for_reference(address, refs, operands)
direction = _direction_for_reference(ins.mnemonic, operands, operand_index, len(refs))
access: dict[str, object] = {
"address": address,
"instruction_address": ins.address,
"instruction": ins.text,
"mnemonic": ins.mnemonic,
"direction": direction,
}
if width is not None:
access["width"] = width
if operand_index is not None:
access["operand"] = operands[operand_index]
access["operand_index"] = operand_index
accesses.append(access)
return accesses
def symbol_for_address(analysis: Mapping[str, object] | None, address: int) -> str | None:
if not analysis:
return None
by_address = analysis.get("by_address")
if not isinstance(by_address, Mapping):
return None
symbol = by_address.get(address)
return str(symbol) if symbol else None
def _instruction_sequence(
instructions: Mapping[int, Instruction] | Iterable[Instruction],
) -> list[Instruction]:
values = instructions.values() if isinstance(instructions, Mapping) else instructions
return sorted(values, key=lambda item: item.address)
def _new_symbol(address: int, region: MemoryRegion) -> _Symbol:
return _Symbol(
address=address,
name=_symbol_name(address, region),
region=region.name,
kind=_symbol_kind(region),
)
def _public_symbol(symbol: _Symbol) -> dict[str, object]:
widths = sorted(symbol.widths, key=lambda item: ("byte", "word").index(item) if item in {"byte", "word"} else 99)
payload: dict[str, object] = {
"address": symbol.address,
"name": symbol.name,
"region": symbol.region,
"kind": symbol.kind,
"access_count": symbol.access_count,
"read_count": symbol.read_count,
"write_count": symbol.write_count,
"unknown_count": symbol.unknown_count,
"width_hints": widths,
"width": widths[0] if len(widths) == 1 else "mixed" if widths else None,
"first_access": symbol.first_access,
"last_access": symbol.last_access,
"accesses": sorted(symbol.accesses, key=lambda item: int(item["instruction_address"])),
}
if symbol.xrefs:
payload["xref_count"] = len(symbol.xrefs)
payload["xrefs"] = sorted(symbol.xrefs, key=lambda item: (str(item["source"]), int(item["address"])))
return payload
def _symbol_name(address: int, region: MemoryRegion) -> str:
if region.kind == "registers":
return IO_REGISTERS.get(address, f"io_{_hex_address(address)}")
if region.kind == "ram":
return f"ram_{_hex_address(address)}"
return f"mem_{_hex_address(address)}"
def _symbol_kind(region: MemoryRegion) -> str:
if region.kind == "registers":
return "register"
if region.kind == "ram":
return "ram"
return "memory"
def _hex_address(address: int) -> str:
width = 4 if address <= 0xFFFF else 6
return f"{address:0{width}X}"
def _skip_region(region: MemoryRegion, include_registers: bool) -> bool:
return region.kind == "registers" and not include_registers
def _region_for(address: int, regions: Sequence[MemoryRegion | Mapping[str, object]]) -> MemoryRegion:
for item in regions:
region = _coerce_region(item)
if region.contains(address):
return region
return region_for(address)
def _coerce_region(item: MemoryRegion | Mapping[str, object]) -> MemoryRegion:
if isinstance(item, MemoryRegion):
return item
return MemoryRegion(
str(item["name"]),
int(item["start"]),
int(item["end"]),
str(item["kind"]),
str(item.get("manual", "")),
)
def _record_data_candidate_xrefs(
symbols: dict[int, _Symbol],
data_candidates: Mapping[str, object] | None,
regions: Sequence[MemoryRegion | Mapping[str, object]],
include_registers: bool,
) -> None:
if not data_candidates:
return
pointer_tables = data_candidates.get("pointer_tables")
if not isinstance(pointer_tables, Iterable):
return
for table in pointer_tables:
if not isinstance(table, Mapping):
continue
source_address = table.get("address")
targets = table.get("targets")
if not isinstance(source_address, int) or not isinstance(targets, Iterable):
continue
for target in targets:
if not isinstance(target, int):
continue
region = _region_for(target, regions)
if _skip_region(region, include_registers):
continue
symbol = symbols.setdefault(target, _new_symbol(target, region))
symbol.record_xref(
{
"source": "pointer_table",
"address": source_address,
"target": target,
},
)
def _direction_for_reference(
mnemonic: str,
operands: Sequence[str],
operand_index: int | None,
reference_count: int,
) -> str:
root = _mnemonic_root(mnemonic)
destination_index = len(operands) - 1 if operands else None
if root in READ_ONLY_ROOTS:
return "read"
if root in READ_MODIFY_WRITE_ROOTS:
return "read_write"
if root in WRITE_ONLY_ROOTS:
if root == "STC" and operand_index not in (None, destination_index):
return "unknown"
return "write"
if root == "LDC":
return "read" if operand_index in (None, 0) else "unknown"
if root in MOV_ROOTS:
return _source_or_destination_direction(operands, operand_index, reference_count)
if root in DESTINATION_UPDATE_ROOTS:
if operand_index is None:
return "unknown"
return "read_write" if operand_index == destination_index else "read"
return "unknown"
def _source_or_destination_direction(
operands: Sequence[str],
operand_index: int | None,
reference_count: int,
) -> str:
if not operands:
return "unknown"
destination_index = len(operands) - 1
if operand_index is not None:
return "write" if operand_index == destination_index else "read"
memory_indexes = [index for index, operand in enumerate(operands) if _is_memory_operand(operand)]
if reference_count == 1 and len(memory_indexes) == 1:
return "write" if memory_indexes[0] == destination_index else "read"
return "unknown"
def _operand_index_for_reference(
address: int,
refs: Sequence[int],
operands: Sequence[str],
) -> int | None:
matches = [index for index, operand in enumerate(operands) if _operand_mentions_address(operand, address)]
if len(matches) == 1:
return matches[0]
memory_indexes = [index for index, operand in enumerate(operands) if _is_memory_operand(operand)]
if len(refs) == 1 and len(memory_indexes) == 1:
return memory_indexes[0]
if len(refs) == len(memory_indexes):
try:
return memory_indexes[refs.index(address)]
except ValueError:
return None
return None
def _operand_mentions_address(operand: str, address: int) -> bool:
normalized = operand.upper()
if f"H'{address & 0xFFFF:04X}" in normalized:
return True
if address in IO_REGISTERS and IO_REGISTERS[address].upper() in normalized:
return True
return False
def _is_memory_operand(operand: str) -> bool:
return operand.strip().startswith("@")
def _split_operands(operands: str) -> list[str]:
parts: list[str] = []
current: list[str] = []
depth = 0
for char in operands:
if char in "({":
depth += 1
elif char in ")}" and depth:
depth -= 1
if char == "," and depth == 0:
parts.append("".join(current).strip())
current = []
continue
current.append(char)
if current or operands:
parts.append("".join(current).strip())
return [part for part in parts if part]
def _mnemonic_root(mnemonic: str) -> str:
return mnemonic.rsplit(".", 1)[0]
def _width_hint(mnemonic: str) -> str | None:
if mnemonic.endswith(".B"):
return "byte"
if mnemonic.endswith(".W"):
return "word"
return None