LCD decompile
This commit is contained in:
386
h8536/lcd_text.py
Normal file
386
h8536/lcd_text.py
Normal file
@@ -0,0 +1,386 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from collections.abc import Iterable, Mapping
|
||||
from difflib import SequenceMatcher
|
||||
|
||||
from .formatting import h16
|
||||
from .model import Instruction
|
||||
from .rom import Rom
|
||||
|
||||
|
||||
DISPLAY_PRINTABLE = set(range(0x20, 0x7F))
|
||||
DISPLAY_PUNCTUATION = set(b" ./:-+,%()[]")
|
||||
DEFAULT_SEARCH_TERMS = ("CONNECT",)
|
||||
MOV_IW_FIRST_OPCODE = 0x58
|
||||
MOV_IW_LAST_OPCODE = 0x5F
|
||||
|
||||
|
||||
def analyze_lcd_text(
|
||||
rom: Rom,
|
||||
instructions: Mapping[int, Instruction] | Iterable[Instruction] | None = None,
|
||||
*,
|
||||
start: int | None = None,
|
||||
end: int | None = None,
|
||||
search_terms: Iterable[str] = DEFAULT_SEARCH_TERMS,
|
||||
max_candidates: int = 240,
|
||||
) -> dict[str, object]:
|
||||
"""Find likely fixed-width LCD/menu strings and their nearby raw xrefs.
|
||||
|
||||
The firmware stores some display text as inline menu/script records rather
|
||||
than as plain null-terminated strings. This pass scans the ROM bytes
|
||||
directly, then correlates likely text fields with decoded and raw immediate
|
||||
address loads such as ``MOV:I.W #H'63D4, R0``.
|
||||
"""
|
||||
|
||||
lower = 0 if start is None else max(0, start)
|
||||
upper = rom.end if end is None else min(rom.end, end)
|
||||
candidates = _unique_candidates(
|
||||
[
|
||||
*_ff_terminated_candidates(rom, lower, upper),
|
||||
*_printable_run_candidates(rom, lower, upper),
|
||||
],
|
||||
)
|
||||
candidates = sorted(candidates, key=lambda item: (-float(item["score"]), int(item["address"])))[:max_candidates]
|
||||
candidates.sort(key=lambda item: int(item["address"]))
|
||||
|
||||
instruction_list = _instruction_sequence(instructions)
|
||||
xrefs_by_address = _xref_map(rom, candidates, instruction_list)
|
||||
for candidate in candidates:
|
||||
xrefs = xrefs_by_address.get(int(candidate["address"]), [])
|
||||
if xrefs:
|
||||
candidate["xrefs"] = xrefs
|
||||
candidate["xref_count"] = len(xrefs)
|
||||
|
||||
regions = _group_regions(candidates)
|
||||
searches = [_search_term(rom, candidates, term) for term in search_terms]
|
||||
return {
|
||||
"strings": candidates,
|
||||
"regions": regions,
|
||||
"searches": searches,
|
||||
"notes": [
|
||||
"LCD text scan is byte-oriented and conservative; strings may be inline script fields.",
|
||||
"Raw xrefs include MOV:I.W immediates to the string address and nearby record prefixes.",
|
||||
],
|
||||
}
|
||||
|
||||
|
||||
def lcd_text_comment_for_instruction(analysis: Mapping[str, object] | None, address: int) -> str:
|
||||
if not analysis:
|
||||
return ""
|
||||
for candidate in analysis.get("strings", []):
|
||||
if not isinstance(candidate, Mapping):
|
||||
continue
|
||||
for xref in candidate.get("xrefs", []):
|
||||
if isinstance(xref, Mapping) and int(xref.get("address", -1)) == address:
|
||||
text = str(candidate.get("trimmed") or candidate.get("text") or "").strip()
|
||||
return f"LCD text xref {h16(int(candidate['address']))} {text!r}"
|
||||
return ""
|
||||
|
||||
|
||||
def _instruction_sequence(
|
||||
instructions: Mapping[int, Instruction] | Iterable[Instruction] | None,
|
||||
) -> list[Instruction]:
|
||||
if instructions is None:
|
||||
return []
|
||||
values = instructions.values() if isinstance(instructions, Mapping) else instructions
|
||||
return sorted(values, key=lambda ins: ins.address)
|
||||
|
||||
|
||||
def _ff_terminated_candidates(rom: Rom, start: int, end: int) -> list[dict[str, object]]:
|
||||
candidates: list[dict[str, object]] = []
|
||||
address = start
|
||||
while address < end:
|
||||
if rom.u8(address) not in DISPLAY_PRINTABLE:
|
||||
address += 1
|
||||
continue
|
||||
text_start = address
|
||||
raw = bytearray()
|
||||
while address < end and rom.u8(address) in DISPLAY_PRINTABLE:
|
||||
raw.append(rom.u8(address))
|
||||
address += 1
|
||||
ff_count = 0
|
||||
cursor = address
|
||||
while cursor < end and rom.u8(cursor) == 0xFF:
|
||||
ff_count += 1
|
||||
cursor += 1
|
||||
if ff_count and _looks_like_lcd_text(raw):
|
||||
candidates.append(_candidate(text_start, raw, "ff_terminated", ff_count=ff_count))
|
||||
address = max(cursor, address + 1)
|
||||
return candidates
|
||||
|
||||
|
||||
def _printable_run_candidates(rom: Rom, start: int, end: int) -> list[dict[str, object]]:
|
||||
candidates: list[dict[str, object]] = []
|
||||
address = start
|
||||
while address < end:
|
||||
if rom.u8(address) not in DISPLAY_PRINTABLE:
|
||||
address += 1
|
||||
continue
|
||||
text_start = address
|
||||
raw = bytearray()
|
||||
while address < end and rom.u8(address) in DISPLAY_PRINTABLE:
|
||||
raw.append(rom.u8(address))
|
||||
address += 1
|
||||
if _looks_like_lcd_text(raw, allow_long=True):
|
||||
candidates.append(_candidate(text_start, raw, "printable_run", ff_count=0))
|
||||
address += 1
|
||||
return candidates
|
||||
|
||||
|
||||
def _candidate(address: int, raw: bytearray, kind: str, *, ff_count: int) -> dict[str, object]:
|
||||
text = raw.decode("ascii", errors="replace")
|
||||
trimmed = text.strip()
|
||||
score = _display_score(raw, kind, ff_count)
|
||||
payload: dict[str, object] = {
|
||||
"address": address,
|
||||
"length": len(raw),
|
||||
"text": text,
|
||||
"trimmed": trimmed,
|
||||
"kind": kind,
|
||||
"score": round(score, 3),
|
||||
"confidence": _confidence(score),
|
||||
}
|
||||
if ff_count:
|
||||
payload["ff_terminators"] = ff_count
|
||||
if len(raw) > 32:
|
||||
payload["segments"] = _fixed_width_segments(text)
|
||||
return payload
|
||||
|
||||
|
||||
def _unique_candidates(candidates: list[dict[str, object]]) -> list[dict[str, object]]:
|
||||
by_key: dict[tuple[int, int], dict[str, object]] = {}
|
||||
for candidate in candidates:
|
||||
key = (int(candidate["address"]), int(candidate["length"]))
|
||||
existing = by_key.get(key)
|
||||
if existing is None or float(candidate["score"]) > float(existing["score"]):
|
||||
by_key[key] = candidate
|
||||
return list(by_key.values())
|
||||
|
||||
|
||||
def _looks_like_lcd_text(raw: bytearray, *, allow_long: bool = False) -> bool:
|
||||
if len(raw) < 4:
|
||||
return False
|
||||
if len(raw) > 80 and not allow_long:
|
||||
return False
|
||||
trimmed = bytes(raw).strip()
|
||||
if len(trimmed) < 2:
|
||||
return False
|
||||
good = sum(1 for value in trimmed if _display_char_score(value) > 0)
|
||||
return good / max(len(trimmed), 1) >= 0.78
|
||||
|
||||
|
||||
def _display_char_score(value: int) -> float:
|
||||
if 0x41 <= value <= 0x5A or 0x30 <= value <= 0x39:
|
||||
return 1.0
|
||||
if value == 0x20:
|
||||
return 0.8
|
||||
if value in DISPLAY_PUNCTUATION:
|
||||
return 0.7
|
||||
if 0x61 <= value <= 0x7A:
|
||||
return 0.35
|
||||
return 0.0
|
||||
|
||||
|
||||
def _display_score(raw: bytearray, kind: str, ff_count: int) -> float:
|
||||
trimmed = bytes(raw).strip()
|
||||
if not trimmed:
|
||||
return 0.0
|
||||
char_score = sum(_display_char_score(value) for value in trimmed) / len(trimmed)
|
||||
length_bonus = 0.15 if len(raw) in {8, 10, 16, 17, 18, 19, 20} else 0.0
|
||||
terminator_bonus = min(ff_count, 3) * 0.08
|
||||
kind_bonus = 0.08 if kind == "ff_terminated" else 0.0
|
||||
long_penalty = 0.2 if len(raw) > 40 else 0.0
|
||||
return max(0.0, char_score + length_bonus + terminator_bonus + kind_bonus - long_penalty)
|
||||
|
||||
|
||||
def _confidence(score: float) -> str:
|
||||
if score >= 1.05:
|
||||
return "high"
|
||||
if score >= 0.82:
|
||||
return "medium"
|
||||
return "low"
|
||||
|
||||
|
||||
def _fixed_width_segments(text: str) -> list[dict[str, object]]:
|
||||
segments: list[dict[str, object]] = []
|
||||
for width in (10, 16, 18, 20):
|
||||
if len(text) < width * 2:
|
||||
continue
|
||||
chunks = [text[index : index + width] for index in range(0, len(text), width)]
|
||||
useful = [chunk.strip() for chunk in chunks if len(chunk.strip()) >= 2]
|
||||
if len(useful) >= 2:
|
||||
segments.append({"width": width, "chunks": chunks})
|
||||
return segments[:3]
|
||||
|
||||
|
||||
def _xref_map(
|
||||
rom: Rom,
|
||||
candidates: list[dict[str, object]],
|
||||
instructions: list[Instruction],
|
||||
) -> dict[int, list[dict[str, object]]]:
|
||||
addresses = [int(candidate["address"]) for candidate in candidates]
|
||||
target_to_address: dict[int, int] = {}
|
||||
for address in addresses:
|
||||
for delta in range(-4, 5):
|
||||
target = address + delta
|
||||
if 0 <= target <= 0xFFFF:
|
||||
target_to_address.setdefault(target, address)
|
||||
|
||||
xrefs: dict[int, list[dict[str, object]]] = {address: [] for address in addresses}
|
||||
_add_decoded_xrefs(xrefs, target_to_address, instructions)
|
||||
_add_raw_mov_iw_xrefs(xrefs, target_to_address, rom)
|
||||
return {address: refs for address, refs in xrefs.items() if refs}
|
||||
|
||||
|
||||
def _add_decoded_xrefs(
|
||||
xrefs: dict[int, list[dict[str, object]]],
|
||||
target_to_address: Mapping[int, int],
|
||||
instructions: list[Instruction],
|
||||
) -> None:
|
||||
for ins in instructions:
|
||||
for target, candidate_address in target_to_address.items():
|
||||
needle = f"H'{target:04X}"
|
||||
if needle in ins.operands:
|
||||
xrefs[candidate_address].append(
|
||||
{
|
||||
"address": ins.address,
|
||||
"kind": "decoded_operand",
|
||||
"target": target,
|
||||
"delta": target - candidate_address,
|
||||
"instruction": ins.text,
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
def _add_raw_mov_iw_xrefs(
|
||||
xrefs: dict[int, list[dict[str, object]]],
|
||||
target_to_address: Mapping[int, int],
|
||||
rom: Rom,
|
||||
) -> None:
|
||||
data = rom.data
|
||||
for address in range(0, max(len(data) - 2, 0)):
|
||||
opcode = data[address]
|
||||
if not MOV_IW_FIRST_OPCODE <= opcode <= MOV_IW_LAST_OPCODE:
|
||||
continue
|
||||
target = (data[address + 1] << 8) | data[address + 2]
|
||||
candidate_address = target_to_address.get(target)
|
||||
if candidate_address is None:
|
||||
continue
|
||||
register = f"R{opcode - MOV_IW_FIRST_OPCODE}"
|
||||
xref: dict[str, object] = {
|
||||
"address": address,
|
||||
"kind": "raw_mov_iw",
|
||||
"target": target,
|
||||
"delta": target - candidate_address,
|
||||
"register": register,
|
||||
"instruction": f"MOV:I.W #{h16(target)}, {register}",
|
||||
}
|
||||
bsr = _following_bsr(data, address + 3)
|
||||
if bsr:
|
||||
xref["following_bsr"] = bsr
|
||||
xrefs[candidate_address].append(xref)
|
||||
|
||||
|
||||
def _following_bsr(data: bytes, address: int) -> dict[str, object] | None:
|
||||
if address + 2 >= len(data) or data[address] != 0x1E:
|
||||
return None
|
||||
displacement = (data[address + 1] << 8) | data[address + 2]
|
||||
if displacement & 0x8000:
|
||||
displacement -= 0x10000
|
||||
target = (address + 3 + displacement) & 0xFFFF
|
||||
return {"address": address, "target": target, "instruction": f"BSR {h16(target)}"}
|
||||
|
||||
|
||||
def _group_regions(candidates: list[dict[str, object]]) -> list[dict[str, object]]:
|
||||
regions: list[dict[str, object]] = []
|
||||
current: list[dict[str, object]] = []
|
||||
previous_end: int | None = None
|
||||
for candidate in candidates:
|
||||
address = int(candidate["address"])
|
||||
length = int(candidate["length"])
|
||||
if current and previous_end is not None and address - previous_end > 0x80:
|
||||
_append_region(regions, current)
|
||||
current = []
|
||||
current.append(candidate)
|
||||
previous_end = address + length
|
||||
if current:
|
||||
_append_region(regions, current)
|
||||
return regions
|
||||
|
||||
|
||||
def _append_region(regions: list[dict[str, object]], candidates: list[dict[str, object]]) -> None:
|
||||
if len(candidates) < 2:
|
||||
return
|
||||
start = int(candidates[0]["address"])
|
||||
end = max(int(item["address"]) + int(item["length"]) for item in candidates)
|
||||
regions.append(
|
||||
{
|
||||
"start": start,
|
||||
"end": end,
|
||||
"count": len(candidates),
|
||||
"samples": [str(item.get("trimmed") or item.get("text")) for item in candidates[:8]],
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
def _search_term(rom: Rom, candidates: list[dict[str, object]], term: str) -> dict[str, object]:
|
||||
raw = term.encode("ascii", errors="ignore")
|
||||
literal_hits = _literal_hits(rom.data, raw)
|
||||
folded = term.upper()
|
||||
candidate_hits = [
|
||||
{
|
||||
"address": int(candidate["address"]),
|
||||
"text": candidate["text"],
|
||||
"trimmed": candidate["trimmed"],
|
||||
}
|
||||
for candidate in candidates
|
||||
if folded in str(candidate.get("text", "")).upper()
|
||||
]
|
||||
near_matches = _near_matches(candidates, term)
|
||||
return {
|
||||
"term": term,
|
||||
"literal_hits": literal_hits,
|
||||
"candidate_hits": candidate_hits,
|
||||
"near_matches": near_matches,
|
||||
"status": "found" if literal_hits or candidate_hits else "not_found",
|
||||
}
|
||||
|
||||
|
||||
def _literal_hits(data: bytes, needle: bytes) -> list[int]:
|
||||
if not needle:
|
||||
return []
|
||||
hits: list[int] = []
|
||||
start = 0
|
||||
upper_data = data.upper()
|
||||
upper_needle = needle.upper()
|
||||
while True:
|
||||
index = upper_data.find(upper_needle, start)
|
||||
if index < 0:
|
||||
return hits
|
||||
hits.append(index)
|
||||
start = index + 1
|
||||
|
||||
|
||||
def _near_matches(candidates: list[dict[str, object]], term: str) -> list[dict[str, object]]:
|
||||
normalized_term = _normalize_text(term)
|
||||
matches: list[dict[str, object]] = []
|
||||
for candidate in candidates:
|
||||
normalized = _normalize_text(str(candidate.get("trimmed") or candidate.get("text") or ""))
|
||||
if not normalized:
|
||||
continue
|
||||
ratio = SequenceMatcher(None, normalized_term, normalized).ratio()
|
||||
if ratio >= 0.34:
|
||||
matches.append(
|
||||
{
|
||||
"address": int(candidate["address"]),
|
||||
"text": candidate["text"],
|
||||
"trimmed": candidate["trimmed"],
|
||||
"score": round(ratio, 3),
|
||||
},
|
||||
)
|
||||
matches.sort(key=lambda item: (-float(item["score"]), int(item["address"])))
|
||||
return matches[:12]
|
||||
|
||||
|
||||
def _normalize_text(text: str) -> str:
|
||||
return "".join(char for char in text.upper() if "A" <= char <= "Z" or "0" <= char <= "9")
|
||||
Reference in New Issue
Block a user