LCD decompile

2026-05-25 15:10:32 +10:00
parent 1d7f00e59c
commit cdfb811c28
15 changed files with 8836 additions and 19 deletions
--- a/h8536/lcd_text.py
+++ b/h8536/lcd_text.py
@@ -0,0 +1,386 @@
+from __future__ import annotations
+
+from collections.abc import Iterable, Mapping
+from difflib import SequenceMatcher
+
+from .formatting import h16
+from .model import Instruction
+from .rom import Rom
+
+
+DISPLAY_PRINTABLE = set(range(0x20, 0x7F))
+DISPLAY_PUNCTUATION = set(b" ./:-+,%()[]")
+DEFAULT_SEARCH_TERMS = ("CONNECT",)
+MOV_IW_FIRST_OPCODE = 0x58
+MOV_IW_LAST_OPCODE = 0x5F
+
+
+def analyze_lcd_text(
+    rom: Rom,
+    instructions: Mapping[int, Instruction] | Iterable[Instruction] | None = None,
+    *,
+    start: int | None = None,
+    end: int | None = None,
+    search_terms: Iterable[str] = DEFAULT_SEARCH_TERMS,
+    max_candidates: int = 240,
+) -> dict[str, object]:
+    """Find likely fixed-width LCD/menu strings and their nearby raw xrefs.
+
+    The firmware stores some display text as inline menu/script records rather
+    than as plain null-terminated strings. This pass scans the ROM bytes
+    directly, then correlates likely text fields with decoded and raw immediate
+    address loads such as ``MOV:I.W #H'63D4, R0``.
+    """
+
+    lower = 0 if start is None else max(0, start)
+    upper = rom.end if end is None else min(rom.end, end)
+    candidates = _unique_candidates(
+        [
+            *_ff_terminated_candidates(rom, lower, upper),
+            *_printable_run_candidates(rom, lower, upper),
+        ],
+    )
+    candidates = sorted(candidates, key=lambda item: (-float(item["score"]), int(item["address"])))[:max_candidates]
+    candidates.sort(key=lambda item: int(item["address"]))
+
+    instruction_list = _instruction_sequence(instructions)
+    xrefs_by_address = _xref_map(rom, candidates, instruction_list)
+    for candidate in candidates:
+        xrefs = xrefs_by_address.get(int(candidate["address"]), [])
+        if xrefs:
+            candidate["xrefs"] = xrefs
+            candidate["xref_count"] = len(xrefs)
+
+    regions = _group_regions(candidates)
+    searches = [_search_term(rom, candidates, term) for term in search_terms]
+    return {
+        "strings": candidates,
+        "regions": regions,
+        "searches": searches,
+        "notes": [
+            "LCD text scan is byte-oriented and conservative; strings may be inline script fields.",
+            "Raw xrefs include MOV:I.W immediates to the string address and nearby record prefixes.",
+        ],
+    }
+
+
+def lcd_text_comment_for_instruction(analysis: Mapping[str, object] | None, address: int) -> str:
+    if not analysis:
+        return ""
+    for candidate in analysis.get("strings", []):
+        if not isinstance(candidate, Mapping):
+            continue
+        for xref in candidate.get("xrefs", []):
+            if isinstance(xref, Mapping) and int(xref.get("address", -1)) == address:
+                text = str(candidate.get("trimmed") or candidate.get("text") or "").strip()
+                return f"LCD text xref {h16(int(candidate['address']))} {text!r}"
+    return ""
+
+
+def _instruction_sequence(
+    instructions: Mapping[int, Instruction] | Iterable[Instruction] | None,
+) -> list[Instruction]:
+    if instructions is None:
+        return []
+    values = instructions.values() if isinstance(instructions, Mapping) else instructions
+    return sorted(values, key=lambda ins: ins.address)
+
+
+def _ff_terminated_candidates(rom: Rom, start: int, end: int) -> list[dict[str, object]]:
+    candidates: list[dict[str, object]] = []
+    address = start
+    while address < end:
+        if rom.u8(address) not in DISPLAY_PRINTABLE:
+            address += 1
+            continue
+        text_start = address
+        raw = bytearray()
+        while address < end and rom.u8(address) in DISPLAY_PRINTABLE:
+            raw.append(rom.u8(address))
+            address += 1
+        ff_count = 0
+        cursor = address
+        while cursor < end and rom.u8(cursor) == 0xFF:
+            ff_count += 1
+            cursor += 1
+        if ff_count and _looks_like_lcd_text(raw):
+            candidates.append(_candidate(text_start, raw, "ff_terminated", ff_count=ff_count))
+        address = max(cursor, address + 1)
+    return candidates
+
+
+def _printable_run_candidates(rom: Rom, start: int, end: int) -> list[dict[str, object]]:
+    candidates: list[dict[str, object]] = []
+    address = start
+    while address < end:
+        if rom.u8(address) not in DISPLAY_PRINTABLE:
+            address += 1
+            continue
+        text_start = address
+        raw = bytearray()
+        while address < end and rom.u8(address) in DISPLAY_PRINTABLE:
+            raw.append(rom.u8(address))
+            address += 1
+        if _looks_like_lcd_text(raw, allow_long=True):
+            candidates.append(_candidate(text_start, raw, "printable_run", ff_count=0))
+        address += 1
+    return candidates
+
+
+def _candidate(address: int, raw: bytearray, kind: str, *, ff_count: int) -> dict[str, object]:
+    text = raw.decode("ascii", errors="replace")
+    trimmed = text.strip()
+    score = _display_score(raw, kind, ff_count)
+    payload: dict[str, object] = {
+        "address": address,
+        "length": len(raw),
+        "text": text,
+        "trimmed": trimmed,
+        "kind": kind,
+        "score": round(score, 3),
+        "confidence": _confidence(score),
+    }
+    if ff_count:
+        payload["ff_terminators"] = ff_count
+    if len(raw) > 32:
+        payload["segments"] = _fixed_width_segments(text)
+    return payload
+
+
+def _unique_candidates(candidates: list[dict[str, object]]) -> list[dict[str, object]]:
+    by_key: dict[tuple[int, int], dict[str, object]] = {}
+    for candidate in candidates:
+        key = (int(candidate["address"]), int(candidate["length"]))
+        existing = by_key.get(key)
+        if existing is None or float(candidate["score"]) > float(existing["score"]):
+            by_key[key] = candidate
+    return list(by_key.values())
+
+
+def _looks_like_lcd_text(raw: bytearray, *, allow_long: bool = False) -> bool:
+    if len(raw) < 4:
+        return False
+    if len(raw) > 80 and not allow_long:
+        return False
+    trimmed = bytes(raw).strip()
+    if len(trimmed) < 2:
+        return False
+    good = sum(1 for value in trimmed if _display_char_score(value) > 0)
+    return good / max(len(trimmed), 1) >= 0.78
+
+
+def _display_char_score(value: int) -> float:
+    if 0x41 <= value <= 0x5A or 0x30 <= value <= 0x39:
+        return 1.0
+    if value == 0x20:
+        return 0.8
+    if value in DISPLAY_PUNCTUATION:
+        return 0.7
+    if 0x61 <= value <= 0x7A:
+        return 0.35
+    return 0.0
+
+
+def _display_score(raw: bytearray, kind: str, ff_count: int) -> float:
+    trimmed = bytes(raw).strip()
+    if not trimmed:
+        return 0.0
+    char_score = sum(_display_char_score(value) for value in trimmed) / len(trimmed)
+    length_bonus = 0.15 if len(raw) in {8, 10, 16, 17, 18, 19, 20} else 0.0
+    terminator_bonus = min(ff_count, 3) * 0.08
+    kind_bonus = 0.08 if kind == "ff_terminated" else 0.0
+    long_penalty = 0.2 if len(raw) > 40 else 0.0
+    return max(0.0, char_score + length_bonus + terminator_bonus + kind_bonus - long_penalty)
+
+
+def _confidence(score: float) -> str:
+    if score >= 1.05:
+        return "high"
+    if score >= 0.82:
+        return "medium"
+    return "low"
+
+
+def _fixed_width_segments(text: str) -> list[dict[str, object]]:
+    segments: list[dict[str, object]] = []
+    for width in (10, 16, 18, 20):
+        if len(text) < width * 2:
+            continue
+        chunks = [text[index : index + width] for index in range(0, len(text), width)]
+        useful = [chunk.strip() for chunk in chunks if len(chunk.strip()) >= 2]
+        if len(useful) >= 2:
+            segments.append({"width": width, "chunks": chunks})
+    return segments[:3]
+
+
+def _xref_map(
+    rom: Rom,
+    candidates: list[dict[str, object]],
+    instructions: list[Instruction],
+) -> dict[int, list[dict[str, object]]]:
+    addresses = [int(candidate["address"]) for candidate in candidates]
+    target_to_address: dict[int, int] = {}
+    for address in addresses:
+        for delta in range(-4, 5):
+            target = address + delta
+            if 0 <= target <= 0xFFFF:
+                target_to_address.setdefault(target, address)
+
+    xrefs: dict[int, list[dict[str, object]]] = {address: [] for address in addresses}
+    _add_decoded_xrefs(xrefs, target_to_address, instructions)
+    _add_raw_mov_iw_xrefs(xrefs, target_to_address, rom)
+    return {address: refs for address, refs in xrefs.items() if refs}
+
+
+def _add_decoded_xrefs(
+    xrefs: dict[int, list[dict[str, object]]],
+    target_to_address: Mapping[int, int],
+    instructions: list[Instruction],
+) -> None:
+    for ins in instructions:
+        for target, candidate_address in target_to_address.items():
+            needle = f"H'{target:04X}"
+            if needle in ins.operands:
+                xrefs[candidate_address].append(
+                    {
+                        "address": ins.address,
+                        "kind": "decoded_operand",
+                        "target": target,
+                        "delta": target - candidate_address,
+                        "instruction": ins.text,
+                    },
+                )
+
+
+def _add_raw_mov_iw_xrefs(
+    xrefs: dict[int, list[dict[str, object]]],
+    target_to_address: Mapping[int, int],
+    rom: Rom,
+) -> None:
+    data = rom.data
+    for address in range(0, max(len(data) - 2, 0)):
+        opcode = data[address]
+        if not MOV_IW_FIRST_OPCODE <= opcode <= MOV_IW_LAST_OPCODE:
+            continue
+        target = (data[address + 1] << 8) | data[address + 2]
+        candidate_address = target_to_address.get(target)
+        if candidate_address is None:
+            continue
+        register = f"R{opcode - MOV_IW_FIRST_OPCODE}"
+        xref: dict[str, object] = {
+            "address": address,
+            "kind": "raw_mov_iw",
+            "target": target,
+            "delta": target - candidate_address,
+            "register": register,
+            "instruction": f"MOV:I.W #{h16(target)}, {register}",
+        }
+        bsr = _following_bsr(data, address + 3)
+        if bsr:
+            xref["following_bsr"] = bsr
+        xrefs[candidate_address].append(xref)
+
+
+def _following_bsr(data: bytes, address: int) -> dict[str, object] | None:
+    if address + 2 >= len(data) or data[address] != 0x1E:
+        return None
+    displacement = (data[address + 1] << 8) | data[address + 2]
+    if displacement & 0x8000:
+        displacement -= 0x10000
+    target = (address + 3 + displacement) & 0xFFFF
+    return {"address": address, "target": target, "instruction": f"BSR {h16(target)}"}
+
+
+def _group_regions(candidates: list[dict[str, object]]) -> list[dict[str, object]]:
+    regions: list[dict[str, object]] = []
+    current: list[dict[str, object]] = []
+    previous_end: int | None = None
+    for candidate in candidates:
+        address = int(candidate["address"])
+        length = int(candidate["length"])
+        if current and previous_end is not None and address - previous_end > 0x80:
+            _append_region(regions, current)
+            current = []
+        current.append(candidate)
+        previous_end = address + length
+    if current:
+        _append_region(regions, current)
+    return regions
+
+
+def _append_region(regions: list[dict[str, object]], candidates: list[dict[str, object]]) -> None:
+    if len(candidates) < 2:
+        return
+    start = int(candidates[0]["address"])
+    end = max(int(item["address"]) + int(item["length"]) for item in candidates)
+    regions.append(
+        {
+            "start": start,
+            "end": end,
+            "count": len(candidates),
+            "samples": [str(item.get("trimmed") or item.get("text")) for item in candidates[:8]],
+        },
+    )
+
+
+def _search_term(rom: Rom, candidates: list[dict[str, object]], term: str) -> dict[str, object]:
+    raw = term.encode("ascii", errors="ignore")
+    literal_hits = _literal_hits(rom.data, raw)
+    folded = term.upper()
+    candidate_hits = [
+        {
+            "address": int(candidate["address"]),
+            "text": candidate["text"],
+            "trimmed": candidate["trimmed"],
+        }
+        for candidate in candidates
+        if folded in str(candidate.get("text", "")).upper()
+    ]
+    near_matches = _near_matches(candidates, term)
+    return {
+        "term": term,
+        "literal_hits": literal_hits,
+        "candidate_hits": candidate_hits,
+        "near_matches": near_matches,
+        "status": "found" if literal_hits or candidate_hits else "not_found",
+    }
+
+
+def _literal_hits(data: bytes, needle: bytes) -> list[int]:
+    if not needle:
+        return []
+    hits: list[int] = []
+    start = 0
+    upper_data = data.upper()
+    upper_needle = needle.upper()
+    while True:
+        index = upper_data.find(upper_needle, start)
+        if index < 0:
+            return hits
+        hits.append(index)
+        start = index + 1
+
+
+def _near_matches(candidates: list[dict[str, object]], term: str) -> list[dict[str, object]]:
+    normalized_term = _normalize_text(term)
+    matches: list[dict[str, object]] = []
+    for candidate in candidates:
+        normalized = _normalize_text(str(candidate.get("trimmed") or candidate.get("text") or ""))
+        if not normalized:
+            continue
+        ratio = SequenceMatcher(None, normalized_term, normalized).ratio()
+        if ratio >= 0.34:
+            matches.append(
+                {
+                    "address": int(candidate["address"]),
+                    "text": candidate["text"],
+                    "trimmed": candidate["trimmed"],
+                    "score": round(ratio, 3),
+                },
+            )
+    matches.sort(key=lambda item: (-float(item["score"]), int(item["address"])))
+    return matches[:12]
+
+
+def _normalize_text(text: str) -> str:
+    return "".join(char for char in text.upper() if "A" <= char <= "Z" or "0" <= char <= "9")