from __future__ import annotations from collections.abc import Iterable, Mapping from difflib import SequenceMatcher from .formatting import h16 from .model import Instruction from .rom import Rom DISPLAY_PRINTABLE = set(range(0x20, 0x7F)) DISPLAY_PUNCTUATION = set(b" ./:-+,%()[]") DEFAULT_SEARCH_TERMS = ("CONNECT",) MOV_IW_FIRST_OPCODE = 0x58 MOV_IW_LAST_OPCODE = 0x5F def analyze_lcd_text( rom: Rom, instructions: Mapping[int, Instruction] | Iterable[Instruction] | None = None, *, start: int | None = None, end: int | None = None, search_terms: Iterable[str] = DEFAULT_SEARCH_TERMS, max_candidates: int = 240, ) -> dict[str, object]: """Find likely fixed-width LCD/menu strings and their nearby raw xrefs. The firmware stores some display text as inline menu/script records rather than as plain null-terminated strings. This pass scans the ROM bytes directly, then correlates likely text fields with decoded and raw immediate address loads such as ``MOV:I.W #H'63D4, R0``. """ lower = 0 if start is None else max(0, start) upper = rom.end if end is None else min(rom.end, end) candidates = _unique_candidates( [ *_ff_terminated_candidates(rom, lower, upper), *_printable_run_candidates(rom, lower, upper), ], ) candidates = sorted(candidates, key=lambda item: (-float(item["score"]), int(item["address"])))[:max_candidates] candidates.sort(key=lambda item: int(item["address"])) instruction_list = _instruction_sequence(instructions) xrefs_by_address = _xref_map(rom, candidates, instruction_list) for candidate in candidates: xrefs = xrefs_by_address.get(int(candidate["address"]), []) if xrefs: candidate["xrefs"] = xrefs candidate["xref_count"] = len(xrefs) regions = _group_regions(candidates) searches = [_search_term(rom, candidates, term) for term in search_terms] return { "strings": candidates, "regions": regions, "searches": searches, "notes": [ "LCD text scan is byte-oriented and conservative; strings may be inline script fields.", "Raw xrefs include MOV:I.W immediates to the string address and nearby record prefixes.", ], } def lcd_text_comment_for_instruction(analysis: Mapping[str, object] | None, address: int) -> str: if not analysis: return "" for candidate in analysis.get("strings", []): if not isinstance(candidate, Mapping): continue for xref in candidate.get("xrefs", []): if isinstance(xref, Mapping) and int(xref.get("address", -1)) == address: text = str(candidate.get("trimmed") or candidate.get("text") or "").strip() return f"LCD text xref {h16(int(candidate['address']))} {text!r}" return "" def _instruction_sequence( instructions: Mapping[int, Instruction] | Iterable[Instruction] | None, ) -> list[Instruction]: if instructions is None: return [] values = instructions.values() if isinstance(instructions, Mapping) else instructions return sorted(values, key=lambda ins: ins.address) def _ff_terminated_candidates(rom: Rom, start: int, end: int) -> list[dict[str, object]]: candidates: list[dict[str, object]] = [] address = start while address < end: if rom.u8(address) not in DISPLAY_PRINTABLE: address += 1 continue text_start = address raw = bytearray() while address < end and rom.u8(address) in DISPLAY_PRINTABLE: raw.append(rom.u8(address)) address += 1 ff_count = 0 cursor = address while cursor < end and rom.u8(cursor) == 0xFF: ff_count += 1 cursor += 1 if ff_count and _looks_like_lcd_text(raw): candidates.append(_candidate(text_start, raw, "ff_terminated", ff_count=ff_count)) address = max(cursor, address + 1) return candidates def _printable_run_candidates(rom: Rom, start: int, end: int) -> list[dict[str, object]]: candidates: list[dict[str, object]] = [] address = start while address < end: if rom.u8(address) not in DISPLAY_PRINTABLE: address += 1 continue text_start = address raw = bytearray() while address < end and rom.u8(address) in DISPLAY_PRINTABLE: raw.append(rom.u8(address)) address += 1 if _looks_like_lcd_text(raw, allow_long=True): candidates.append(_candidate(text_start, raw, "printable_run", ff_count=0)) address += 1 return candidates def _candidate(address: int, raw: bytearray, kind: str, *, ff_count: int) -> dict[str, object]: text = raw.decode("ascii", errors="replace") trimmed = text.strip() score = _display_score(raw, kind, ff_count) payload: dict[str, object] = { "address": address, "length": len(raw), "text": text, "trimmed": trimmed, "kind": kind, "score": round(score, 3), "confidence": _confidence(score), } if ff_count: payload["ff_terminators"] = ff_count if len(raw) > 32: payload["segments"] = _fixed_width_segments(text) return payload def _unique_candidates(candidates: list[dict[str, object]]) -> list[dict[str, object]]: by_key: dict[tuple[int, int], dict[str, object]] = {} for candidate in candidates: key = (int(candidate["address"]), int(candidate["length"])) existing = by_key.get(key) if existing is None or float(candidate["score"]) > float(existing["score"]): by_key[key] = candidate return list(by_key.values()) def _looks_like_lcd_text(raw: bytearray, *, allow_long: bool = False) -> bool: if len(raw) < 4: return False if len(raw) > 80 and not allow_long: return False trimmed = bytes(raw).strip() if len(trimmed) < 2: return False good = sum(1 for value in trimmed if _display_char_score(value) > 0) return good / max(len(trimmed), 1) >= 0.78 def _display_char_score(value: int) -> float: if 0x41 <= value <= 0x5A or 0x30 <= value <= 0x39: return 1.0 if value == 0x20: return 0.8 if value in DISPLAY_PUNCTUATION: return 0.7 if 0x61 <= value <= 0x7A: return 0.35 return 0.0 def _display_score(raw: bytearray, kind: str, ff_count: int) -> float: trimmed = bytes(raw).strip() if not trimmed: return 0.0 char_score = sum(_display_char_score(value) for value in trimmed) / len(trimmed) length_bonus = 0.15 if len(raw) in {8, 10, 16, 17, 18, 19, 20} else 0.0 terminator_bonus = min(ff_count, 3) * 0.08 kind_bonus = 0.08 if kind == "ff_terminated" else 0.0 long_penalty = 0.2 if len(raw) > 40 else 0.0 return max(0.0, char_score + length_bonus + terminator_bonus + kind_bonus - long_penalty) def _confidence(score: float) -> str: if score >= 1.05: return "high" if score >= 0.82: return "medium" return "low" def _fixed_width_segments(text: str) -> list[dict[str, object]]: segments: list[dict[str, object]] = [] for width in (10, 16, 18, 20): if len(text) < width * 2: continue chunks = [text[index : index + width] for index in range(0, len(text), width)] useful = [chunk.strip() for chunk in chunks if len(chunk.strip()) >= 2] if len(useful) >= 2: segments.append({"width": width, "chunks": chunks}) return segments[:3] def _xref_map( rom: Rom, candidates: list[dict[str, object]], instructions: list[Instruction], ) -> dict[int, list[dict[str, object]]]: addresses = [int(candidate["address"]) for candidate in candidates] target_to_address: dict[int, int] = {} for address in addresses: for delta in range(-4, 5): target = address + delta if 0 <= target <= 0xFFFF: target_to_address.setdefault(target, address) xrefs: dict[int, list[dict[str, object]]] = {address: [] for address in addresses} _add_decoded_xrefs(xrefs, target_to_address, instructions) _add_raw_mov_iw_xrefs(xrefs, target_to_address, rom) return {address: refs for address, refs in xrefs.items() if refs} def _add_decoded_xrefs( xrefs: dict[int, list[dict[str, object]]], target_to_address: Mapping[int, int], instructions: list[Instruction], ) -> None: for ins in instructions: for target, candidate_address in target_to_address.items(): needle = f"H'{target:04X}" if needle in ins.operands: xrefs[candidate_address].append( { "address": ins.address, "kind": "decoded_operand", "target": target, "delta": target - candidate_address, "instruction": ins.text, }, ) def _add_raw_mov_iw_xrefs( xrefs: dict[int, list[dict[str, object]]], target_to_address: Mapping[int, int], rom: Rom, ) -> None: data = rom.data for address in range(0, max(len(data) - 2, 0)): opcode = data[address] if not MOV_IW_FIRST_OPCODE <= opcode <= MOV_IW_LAST_OPCODE: continue target = (data[address + 1] << 8) | data[address + 2] candidate_address = target_to_address.get(target) if candidate_address is None: continue register = f"R{opcode - MOV_IW_FIRST_OPCODE}" xref: dict[str, object] = { "address": address, "kind": "raw_mov_iw", "target": target, "delta": target - candidate_address, "register": register, "instruction": f"MOV:I.W #{h16(target)}, {register}", } bsr = _following_bsr(data, address + 3) if bsr: xref["following_bsr"] = bsr xrefs[candidate_address].append(xref) def _following_bsr(data: bytes, address: int) -> dict[str, object] | None: if address + 2 >= len(data) or data[address] != 0x1E: return None displacement = (data[address + 1] << 8) | data[address + 2] if displacement & 0x8000: displacement -= 0x10000 target = (address + 3 + displacement) & 0xFFFF return {"address": address, "target": target, "instruction": f"BSR {h16(target)}"} def _group_regions(candidates: list[dict[str, object]]) -> list[dict[str, object]]: regions: list[dict[str, object]] = [] current: list[dict[str, object]] = [] previous_end: int | None = None for candidate in candidates: address = int(candidate["address"]) length = int(candidate["length"]) if current and previous_end is not None and address - previous_end > 0x80: _append_region(regions, current) current = [] current.append(candidate) previous_end = address + length if current: _append_region(regions, current) return regions def _append_region(regions: list[dict[str, object]], candidates: list[dict[str, object]]) -> None: if len(candidates) < 2: return start = int(candidates[0]["address"]) end = max(int(item["address"]) + int(item["length"]) for item in candidates) regions.append( { "start": start, "end": end, "count": len(candidates), "samples": [str(item.get("trimmed") or item.get("text")) for item in candidates[:8]], }, ) def _search_term(rom: Rom, candidates: list[dict[str, object]], term: str) -> dict[str, object]: raw = term.encode("ascii", errors="ignore") literal_hits = _literal_hits(rom.data, raw) folded = term.upper() candidate_hits = [ { "address": int(candidate["address"]), "text": candidate["text"], "trimmed": candidate["trimmed"], } for candidate in candidates if folded in str(candidate.get("text", "")).upper() ] near_matches = _near_matches(candidates, term) return { "term": term, "literal_hits": literal_hits, "candidate_hits": candidate_hits, "near_matches": near_matches, "status": "found" if literal_hits or candidate_hits else "not_found", } def _literal_hits(data: bytes, needle: bytes) -> list[int]: if not needle: return [] hits: list[int] = [] start = 0 upper_data = data.upper() upper_needle = needle.upper() while True: index = upper_data.find(upper_needle, start) if index < 0: return hits hits.append(index) start = index + 1 def _near_matches(candidates: list[dict[str, object]], term: str) -> list[dict[str, object]]: normalized_term = _normalize_text(term) matches: list[dict[str, object]] = [] for candidate in candidates: normalized = _normalize_text(str(candidate.get("trimmed") or candidate.get("text") or "")) if not normalized: continue ratio = SequenceMatcher(None, normalized_term, normalized).ratio() if ratio >= 0.34: matches.append( { "address": int(candidate["address"]), "text": candidate["text"], "trimmed": candidate["trimmed"], "score": round(ratio, 3), }, ) matches.sort(key=lambda item: (-float(item["score"]), int(item["address"]))) return matches[:12] def _normalize_text(text: str) -> str: return "".join(char for char in text.upper() if "A" <= char <= "Z" or "0" <= char <= "9")