212 lines
6.9 KiB
Python
212 lines
6.9 KiB
Python
from __future__ import annotations
|
|
|
|
import argparse
|
|
import json
|
|
import re
|
|
from pathlib import Path
|
|
from typing import Any, Mapping
|
|
|
|
|
|
JsonObject = dict[str, Any]
|
|
|
|
|
|
def analyze_decompiler_consistency(payload: Mapping[str, Any]) -> JsonObject:
|
|
"""Flag decompiler/pseudocode semantic cases that are easy to misread."""
|
|
width_checks = [
|
|
_byte_immediate_word_write_check(ins)
|
|
for ins in _instruction_sequence(payload.get("instructions"))
|
|
if is_byte_immediate_to_word_destination(ins)
|
|
]
|
|
width_checks = [check for check in width_checks if check]
|
|
return {
|
|
"kind": "decompiler_pseudocode_consistency",
|
|
"summary": _summary(width_checks),
|
|
"checks": width_checks,
|
|
}
|
|
|
|
|
|
def format_consistency_report(analysis: Mapping[str, Any]) -> str:
|
|
lines = [
|
|
"Decompiler/Pseudocode Consistency",
|
|
str(analysis.get("summary") or "No checks emitted."),
|
|
"",
|
|
]
|
|
checks = analysis.get("checks")
|
|
if not isinstance(checks, list) or not checks:
|
|
return "\n".join(lines).rstrip() + "\n"
|
|
for check in checks:
|
|
if not isinstance(check, Mapping):
|
|
continue
|
|
lines.append(
|
|
f"- {check.get('address_hex', '?')}: {check.get('instruction', '')} "
|
|
f"[{check.get('status', 'info')}]",
|
|
)
|
|
summary = check.get("summary")
|
|
if summary:
|
|
lines.append(f" {summary}")
|
|
return "\n".join(lines).rstrip() + "\n"
|
|
|
|
|
|
def is_byte_immediate_to_word_destination(instruction: Mapping[str, Any]) -> bool:
|
|
mnemonic = str(instruction.get("mnemonic") or "")
|
|
if _mnemonic_base(mnemonic) not in {"MOV:G", "MOV"} or _mnemonic_size(mnemonic) != "W":
|
|
return False
|
|
operands = _split_operands(str(instruction.get("operands") or ""))
|
|
if len(operands) != 2:
|
|
return False
|
|
source = operands[0].strip()
|
|
if not source.startswith("#"):
|
|
return False
|
|
literal = _immediate_literal_text(source[1:])
|
|
return literal is not None and len(literal) <= 2
|
|
|
|
|
|
def _byte_immediate_word_write_check(instruction: Mapping[str, Any]) -> JsonObject:
|
|
address = int(instruction.get("address") or 0)
|
|
immediate = _immediate_value(_split_operands(str(instruction.get("operands") or ""))[0])
|
|
value_text = f"0x{immediate:04X}" if immediate is not None else "zero-extended byte"
|
|
return {
|
|
"kind": "byte_immediate_to_word_destination",
|
|
"status": "requires_zero_extend8_to16_pseudocode",
|
|
"address": address,
|
|
"address_hex": _h16(address),
|
|
"instruction": str(instruction.get("text") or _instruction_text(instruction)),
|
|
"expected_pseudocode_hint": "zero_extend8_to16",
|
|
"zero_extended_value_hex": value_text,
|
|
"summary": (
|
|
"Word-sized MOV with an 8-bit immediate writes a zero-extended word. "
|
|
"Pseudocode should not model this as a one-byte write or preserve the old low byte."
|
|
),
|
|
}
|
|
|
|
|
|
def _summary(width_checks: list[JsonObject]) -> str:
|
|
if not width_checks:
|
|
return "No byte-immediate-to-word destination cases found."
|
|
return (
|
|
f"{len(width_checks)} byte-immediate-to-word destination case(s) require "
|
|
"explicit zero-extension in pseudocode."
|
|
)
|
|
|
|
|
|
def _instruction_sequence(value: object) -> list[Mapping[str, Any]]:
|
|
if not isinstance(value, list):
|
|
return []
|
|
instructions = [item for item in value if isinstance(item, Mapping)]
|
|
return sorted(instructions, key=lambda item: int(item.get("address") or 0))
|
|
|
|
|
|
def _split_operands(operands: str) -> list[str]:
|
|
if not operands:
|
|
return []
|
|
parts: list[str] = []
|
|
start = 0
|
|
depth = 0
|
|
for idx, char in enumerate(operands):
|
|
if char in "({":
|
|
depth += 1
|
|
elif char in ")}" and depth:
|
|
depth -= 1
|
|
elif char == "," and depth == 0:
|
|
parts.append(operands[start:idx].strip())
|
|
start = idx + 1
|
|
parts.append(operands[start:].strip())
|
|
return [part for part in parts if part]
|
|
|
|
|
|
def _immediate_literal_text(text: str) -> str | None:
|
|
stripped = text.strip()
|
|
h_match = re.fullmatch(r"H'([0-9A-Fa-f]+)", stripped)
|
|
if h_match:
|
|
return h_match.group(1)
|
|
x_match = re.fullmatch(r"0x([0-9A-Fa-f]+)", stripped)
|
|
if x_match:
|
|
return x_match.group(1)
|
|
decimal_match = re.fullmatch(r"\d+", stripped)
|
|
if decimal_match:
|
|
value = int(stripped, 10)
|
|
if 0 <= value <= 0xFF:
|
|
return f"{value:02X}"
|
|
return None
|
|
|
|
|
|
def _immediate_value(operand: str) -> int | None:
|
|
stripped = operand.strip()
|
|
if stripped.startswith("#"):
|
|
stripped = stripped[1:].strip()
|
|
literal = _immediate_literal_text(stripped)
|
|
if literal is None:
|
|
return None
|
|
return int(literal, 16)
|
|
|
|
|
|
def _instruction_text(instruction: Mapping[str, Any]) -> str:
|
|
mnemonic = str(instruction.get("mnemonic") or "")
|
|
operands = str(instruction.get("operands") or "")
|
|
return f"{mnemonic} {operands}".strip()
|
|
|
|
|
|
def _mnemonic_base(mnemonic: str) -> str:
|
|
return mnemonic.rsplit(".", 1)[0] if "." in mnemonic else mnemonic
|
|
|
|
|
|
def _mnemonic_size(mnemonic: str) -> str:
|
|
suffix = mnemonic.rsplit(".", 1)[-1] if "." in mnemonic else ""
|
|
return suffix if suffix in {"B", "W"} else ""
|
|
|
|
|
|
def _h16(value: int) -> str:
|
|
return f"H'{value & 0xFFFF:04X}"
|
|
|
|
|
|
def load_consistency_input(path: Path) -> JsonObject:
|
|
with path.open("r", encoding="utf-8") as handle:
|
|
payload = json.load(handle)
|
|
if not isinstance(payload, dict) or "instructions" not in payload:
|
|
raise ValueError(f"{path} does not look like h8536_decompiler JSON output")
|
|
return payload
|
|
|
|
|
|
def write_consistency_report(input_path: Path, output_path: Path, *, json_output: bool = False) -> None:
|
|
analysis = analyze_decompiler_consistency(load_consistency_input(input_path))
|
|
output_path.parent.mkdir(parents=True, exist_ok=True)
|
|
if json_output:
|
|
output_path.write_text(json.dumps(analysis, indent=2), encoding="utf-8")
|
|
else:
|
|
output_path.write_text(format_consistency_report(analysis), encoding="utf-8")
|
|
|
|
|
|
def main(argv: list[str] | None = None) -> int:
|
|
parser = argparse.ArgumentParser(
|
|
description="Report decompiler/pseudocode semantic consistency checks.",
|
|
)
|
|
parser.add_argument(
|
|
"input",
|
|
nargs="?",
|
|
type=Path,
|
|
default=Path("build/rom_decompiled.json"),
|
|
help="structured JSON emitted by h8536_decompiler.py",
|
|
)
|
|
parser.add_argument(
|
|
"--out",
|
|
type=Path,
|
|
default=Path("build/rom_consistency.txt"),
|
|
help="consistency report output path",
|
|
)
|
|
parser.add_argument("--json", action="store_true", help="write JSON instead of text")
|
|
args = parser.parse_args(argv)
|
|
|
|
write_consistency_report(args.input, args.out, json_output=args.json)
|
|
print(f"wrote {args.out}")
|
|
return 0
|
|
|
|
|
|
__all__ = [
|
|
"analyze_decompiler_consistency",
|
|
"format_consistency_report",
|
|
"is_byte_immediate_to_word_destination",
|
|
"load_consistency_input",
|
|
"main",
|
|
"write_consistency_report",
|
|
]
|