from __future__ import annotations import argparse import json import re from pathlib import Path from typing import Any, Mapping JsonObject = dict[str, Any] def analyze_decompiler_consistency(payload: Mapping[str, Any]) -> JsonObject: """Flag decompiler/pseudocode semantic cases that are easy to misread.""" width_checks = [ _byte_immediate_word_write_check(ins) for ins in _instruction_sequence(payload.get("instructions")) if is_byte_immediate_to_word_destination(ins) ] width_checks = [check for check in width_checks if check] return { "kind": "decompiler_pseudocode_consistency", "summary": _summary(width_checks), "checks": width_checks, } def format_consistency_report(analysis: Mapping[str, Any]) -> str: lines = [ "Decompiler/Pseudocode Consistency", str(analysis.get("summary") or "No checks emitted."), "", ] checks = analysis.get("checks") if not isinstance(checks, list) or not checks: return "\n".join(lines).rstrip() + "\n" for check in checks: if not isinstance(check, Mapping): continue lines.append( f"- {check.get('address_hex', '?')}: {check.get('instruction', '')} " f"[{check.get('status', 'info')}]", ) summary = check.get("summary") if summary: lines.append(f" {summary}") return "\n".join(lines).rstrip() + "\n" def is_byte_immediate_to_word_destination(instruction: Mapping[str, Any]) -> bool: mnemonic = str(instruction.get("mnemonic") or "") if _mnemonic_base(mnemonic) not in {"MOV:G", "MOV"} or _mnemonic_size(mnemonic) != "W": return False operands = _split_operands(str(instruction.get("operands") or "")) if len(operands) != 2: return False source = operands[0].strip() if not source.startswith("#"): return False literal = _immediate_literal_text(source[1:]) return literal is not None and len(literal) <= 2 def _byte_immediate_word_write_check(instruction: Mapping[str, Any]) -> JsonObject: address = int(instruction.get("address") or 0) immediate = _immediate_value(_split_operands(str(instruction.get("operands") or ""))[0]) value_text = f"0x{immediate:04X}" if immediate is not None else "zero-extended byte" return { "kind": "byte_immediate_to_word_destination", "status": "requires_zero_extend8_to16_pseudocode", "address": address, "address_hex": _h16(address), "instruction": str(instruction.get("text") or _instruction_text(instruction)), "expected_pseudocode_hint": "zero_extend8_to16", "zero_extended_value_hex": value_text, "summary": ( "Word-sized MOV with an 8-bit immediate writes a zero-extended word. " "Pseudocode should not model this as a one-byte write or preserve the old low byte." ), } def _summary(width_checks: list[JsonObject]) -> str: if not width_checks: return "No byte-immediate-to-word destination cases found." return ( f"{len(width_checks)} byte-immediate-to-word destination case(s) require " "explicit zero-extension in pseudocode." ) def _instruction_sequence(value: object) -> list[Mapping[str, Any]]: if not isinstance(value, list): return [] instructions = [item for item in value if isinstance(item, Mapping)] return sorted(instructions, key=lambda item: int(item.get("address") or 0)) def _split_operands(operands: str) -> list[str]: if not operands: return [] parts: list[str] = [] start = 0 depth = 0 for idx, char in enumerate(operands): if char in "({": depth += 1 elif char in ")}" and depth: depth -= 1 elif char == "," and depth == 0: parts.append(operands[start:idx].strip()) start = idx + 1 parts.append(operands[start:].strip()) return [part for part in parts if part] def _immediate_literal_text(text: str) -> str | None: stripped = text.strip() h_match = re.fullmatch(r"H'([0-9A-Fa-f]+)", stripped) if h_match: return h_match.group(1) x_match = re.fullmatch(r"0x([0-9A-Fa-f]+)", stripped) if x_match: return x_match.group(1) decimal_match = re.fullmatch(r"\d+", stripped) if decimal_match: value = int(stripped, 10) if 0 <= value <= 0xFF: return f"{value:02X}" return None def _immediate_value(operand: str) -> int | None: stripped = operand.strip() if stripped.startswith("#"): stripped = stripped[1:].strip() literal = _immediate_literal_text(stripped) if literal is None: return None return int(literal, 16) def _instruction_text(instruction: Mapping[str, Any]) -> str: mnemonic = str(instruction.get("mnemonic") or "") operands = str(instruction.get("operands") or "") return f"{mnemonic} {operands}".strip() def _mnemonic_base(mnemonic: str) -> str: return mnemonic.rsplit(".", 1)[0] if "." in mnemonic else mnemonic def _mnemonic_size(mnemonic: str) -> str: suffix = mnemonic.rsplit(".", 1)[-1] if "." in mnemonic else "" return suffix if suffix in {"B", "W"} else "" def _h16(value: int) -> str: return f"H'{value & 0xFFFF:04X}" def load_consistency_input(path: Path) -> JsonObject: with path.open("r", encoding="utf-8") as handle: payload = json.load(handle) if not isinstance(payload, dict) or "instructions" not in payload: raise ValueError(f"{path} does not look like h8536_decompiler JSON output") return payload def write_consistency_report(input_path: Path, output_path: Path, *, json_output: bool = False) -> None: analysis = analyze_decompiler_consistency(load_consistency_input(input_path)) output_path.parent.mkdir(parents=True, exist_ok=True) if json_output: output_path.write_text(json.dumps(analysis, indent=2), encoding="utf-8") else: output_path.write_text(format_consistency_report(analysis), encoding="utf-8") def main(argv: list[str] | None = None) -> int: parser = argparse.ArgumentParser( description="Report decompiler/pseudocode semantic consistency checks.", ) parser.add_argument( "input", nargs="?", type=Path, default=Path("build/rom_decompiled.json"), help="structured JSON emitted by h8536_decompiler.py", ) parser.add_argument( "--out", type=Path, default=Path("build/rom_consistency.txt"), help="consistency report output path", ) parser.add_argument("--json", action="store_true", help="write JSON instead of text") args = parser.parse_args(argv) write_consistency_report(args.input, args.out, json_output=args.json) print(f"wrote {args.out}") return 0 __all__ = [ "analyze_decompiler_consistency", "format_consistency_report", "is_byte_immediate_to_word_destination", "load_consistency_input", "main", "write_consistency_report", ]