1
0
Files
h8-536-decoder/h8536/consistency.py
2026-05-25 21:00:25 +10:00

212 lines
6.9 KiB
Python

from __future__ import annotations
import argparse
import json
import re
from pathlib import Path
from typing import Any, Mapping
JsonObject = dict[str, Any]
def analyze_decompiler_consistency(payload: Mapping[str, Any]) -> JsonObject:
"""Flag decompiler/pseudocode semantic cases that are easy to misread."""
width_checks = [
_byte_immediate_word_write_check(ins)
for ins in _instruction_sequence(payload.get("instructions"))
if is_byte_immediate_to_word_destination(ins)
]
width_checks = [check for check in width_checks if check]
return {
"kind": "decompiler_pseudocode_consistency",
"summary": _summary(width_checks),
"checks": width_checks,
}
def format_consistency_report(analysis: Mapping[str, Any]) -> str:
lines = [
"Decompiler/Pseudocode Consistency",
str(analysis.get("summary") or "No checks emitted."),
"",
]
checks = analysis.get("checks")
if not isinstance(checks, list) or not checks:
return "\n".join(lines).rstrip() + "\n"
for check in checks:
if not isinstance(check, Mapping):
continue
lines.append(
f"- {check.get('address_hex', '?')}: {check.get('instruction', '')} "
f"[{check.get('status', 'info')}]",
)
summary = check.get("summary")
if summary:
lines.append(f" {summary}")
return "\n".join(lines).rstrip() + "\n"
def is_byte_immediate_to_word_destination(instruction: Mapping[str, Any]) -> bool:
mnemonic = str(instruction.get("mnemonic") or "")
if _mnemonic_base(mnemonic) not in {"MOV:G", "MOV"} or _mnemonic_size(mnemonic) != "W":
return False
operands = _split_operands(str(instruction.get("operands") or ""))
if len(operands) != 2:
return False
source = operands[0].strip()
if not source.startswith("#"):
return False
literal = _immediate_literal_text(source[1:])
return literal is not None and len(literal) <= 2
def _byte_immediate_word_write_check(instruction: Mapping[str, Any]) -> JsonObject:
address = int(instruction.get("address") or 0)
immediate = _immediate_value(_split_operands(str(instruction.get("operands") or ""))[0])
value_text = f"0x{immediate:04X}" if immediate is not None else "zero-extended byte"
return {
"kind": "byte_immediate_to_word_destination",
"status": "requires_zero_extend8_to16_pseudocode",
"address": address,
"address_hex": _h16(address),
"instruction": str(instruction.get("text") or _instruction_text(instruction)),
"expected_pseudocode_hint": "zero_extend8_to16",
"zero_extended_value_hex": value_text,
"summary": (
"Word-sized MOV with an 8-bit immediate writes a zero-extended word. "
"Pseudocode should not model this as a one-byte write or preserve the old low byte."
),
}
def _summary(width_checks: list[JsonObject]) -> str:
if not width_checks:
return "No byte-immediate-to-word destination cases found."
return (
f"{len(width_checks)} byte-immediate-to-word destination case(s) require "
"explicit zero-extension in pseudocode."
)
def _instruction_sequence(value: object) -> list[Mapping[str, Any]]:
if not isinstance(value, list):
return []
instructions = [item for item in value if isinstance(item, Mapping)]
return sorted(instructions, key=lambda item: int(item.get("address") or 0))
def _split_operands(operands: str) -> list[str]:
if not operands:
return []
parts: list[str] = []
start = 0
depth = 0
for idx, char in enumerate(operands):
if char in "({":
depth += 1
elif char in ")}" and depth:
depth -= 1
elif char == "," and depth == 0:
parts.append(operands[start:idx].strip())
start = idx + 1
parts.append(operands[start:].strip())
return [part for part in parts if part]
def _immediate_literal_text(text: str) -> str | None:
stripped = text.strip()
h_match = re.fullmatch(r"H'([0-9A-Fa-f]+)", stripped)
if h_match:
return h_match.group(1)
x_match = re.fullmatch(r"0x([0-9A-Fa-f]+)", stripped)
if x_match:
return x_match.group(1)
decimal_match = re.fullmatch(r"\d+", stripped)
if decimal_match:
value = int(stripped, 10)
if 0 <= value <= 0xFF:
return f"{value:02X}"
return None
def _immediate_value(operand: str) -> int | None:
stripped = operand.strip()
if stripped.startswith("#"):
stripped = stripped[1:].strip()
literal = _immediate_literal_text(stripped)
if literal is None:
return None
return int(literal, 16)
def _instruction_text(instruction: Mapping[str, Any]) -> str:
mnemonic = str(instruction.get("mnemonic") or "")
operands = str(instruction.get("operands") or "")
return f"{mnemonic} {operands}".strip()
def _mnemonic_base(mnemonic: str) -> str:
return mnemonic.rsplit(".", 1)[0] if "." in mnemonic else mnemonic
def _mnemonic_size(mnemonic: str) -> str:
suffix = mnemonic.rsplit(".", 1)[-1] if "." in mnemonic else ""
return suffix if suffix in {"B", "W"} else ""
def _h16(value: int) -> str:
return f"H'{value & 0xFFFF:04X}"
def load_consistency_input(path: Path) -> JsonObject:
with path.open("r", encoding="utf-8") as handle:
payload = json.load(handle)
if not isinstance(payload, dict) or "instructions" not in payload:
raise ValueError(f"{path} does not look like h8536_decompiler JSON output")
return payload
def write_consistency_report(input_path: Path, output_path: Path, *, json_output: bool = False) -> None:
analysis = analyze_decompiler_consistency(load_consistency_input(input_path))
output_path.parent.mkdir(parents=True, exist_ok=True)
if json_output:
output_path.write_text(json.dumps(analysis, indent=2), encoding="utf-8")
else:
output_path.write_text(format_consistency_report(analysis), encoding="utf-8")
def main(argv: list[str] | None = None) -> int:
parser = argparse.ArgumentParser(
description="Report decompiler/pseudocode semantic consistency checks.",
)
parser.add_argument(
"input",
nargs="?",
type=Path,
default=Path("build/rom_decompiled.json"),
help="structured JSON emitted by h8536_decompiler.py",
)
parser.add_argument(
"--out",
type=Path,
default=Path("build/rom_consistency.txt"),
help="consistency report output path",
)
parser.add_argument("--json", action="store_true", help="write JSON instead of text")
args = parser.parse_args(argv)
write_consistency_report(args.input, args.out, json_output=args.json)
print(f"wrote {args.out}")
return 0
__all__ = [
"analyze_decompiler_consistency",
"format_consistency_report",
"is_byte_immediate_to_word_destination",
"load_consistency_input",
"main",
"write_consistency_report",
]