emualtor working
This commit is contained in:
211
h8536/consistency.py
Normal file
211
h8536/consistency.py
Normal file
@@ -0,0 +1,211 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import re
|
||||
from pathlib import Path
|
||||
from typing import Any, Mapping
|
||||
|
||||
|
||||
JsonObject = dict[str, Any]
|
||||
|
||||
|
||||
def analyze_decompiler_consistency(payload: Mapping[str, Any]) -> JsonObject:
|
||||
"""Flag decompiler/pseudocode semantic cases that are easy to misread."""
|
||||
width_checks = [
|
||||
_byte_immediate_word_write_check(ins)
|
||||
for ins in _instruction_sequence(payload.get("instructions"))
|
||||
if is_byte_immediate_to_word_destination(ins)
|
||||
]
|
||||
width_checks = [check for check in width_checks if check]
|
||||
return {
|
||||
"kind": "decompiler_pseudocode_consistency",
|
||||
"summary": _summary(width_checks),
|
||||
"checks": width_checks,
|
||||
}
|
||||
|
||||
|
||||
def format_consistency_report(analysis: Mapping[str, Any]) -> str:
|
||||
lines = [
|
||||
"Decompiler/Pseudocode Consistency",
|
||||
str(analysis.get("summary") or "No checks emitted."),
|
||||
"",
|
||||
]
|
||||
checks = analysis.get("checks")
|
||||
if not isinstance(checks, list) or not checks:
|
||||
return "\n".join(lines).rstrip() + "\n"
|
||||
for check in checks:
|
||||
if not isinstance(check, Mapping):
|
||||
continue
|
||||
lines.append(
|
||||
f"- {check.get('address_hex', '?')}: {check.get('instruction', '')} "
|
||||
f"[{check.get('status', 'info')}]",
|
||||
)
|
||||
summary = check.get("summary")
|
||||
if summary:
|
||||
lines.append(f" {summary}")
|
||||
return "\n".join(lines).rstrip() + "\n"
|
||||
|
||||
|
||||
def is_byte_immediate_to_word_destination(instruction: Mapping[str, Any]) -> bool:
|
||||
mnemonic = str(instruction.get("mnemonic") or "")
|
||||
if _mnemonic_base(mnemonic) not in {"MOV:G", "MOV"} or _mnemonic_size(mnemonic) != "W":
|
||||
return False
|
||||
operands = _split_operands(str(instruction.get("operands") or ""))
|
||||
if len(operands) != 2:
|
||||
return False
|
||||
source = operands[0].strip()
|
||||
if not source.startswith("#"):
|
||||
return False
|
||||
literal = _immediate_literal_text(source[1:])
|
||||
return literal is not None and len(literal) <= 2
|
||||
|
||||
|
||||
def _byte_immediate_word_write_check(instruction: Mapping[str, Any]) -> JsonObject:
|
||||
address = int(instruction.get("address") or 0)
|
||||
immediate = _immediate_value(_split_operands(str(instruction.get("operands") or ""))[0])
|
||||
value_text = f"0x{immediate:04X}" if immediate is not None else "zero-extended byte"
|
||||
return {
|
||||
"kind": "byte_immediate_to_word_destination",
|
||||
"status": "requires_zero_extend8_to16_pseudocode",
|
||||
"address": address,
|
||||
"address_hex": _h16(address),
|
||||
"instruction": str(instruction.get("text") or _instruction_text(instruction)),
|
||||
"expected_pseudocode_hint": "zero_extend8_to16",
|
||||
"zero_extended_value_hex": value_text,
|
||||
"summary": (
|
||||
"Word-sized MOV with an 8-bit immediate writes a zero-extended word. "
|
||||
"Pseudocode should not model this as a one-byte write or preserve the old low byte."
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
def _summary(width_checks: list[JsonObject]) -> str:
|
||||
if not width_checks:
|
||||
return "No byte-immediate-to-word destination cases found."
|
||||
return (
|
||||
f"{len(width_checks)} byte-immediate-to-word destination case(s) require "
|
||||
"explicit zero-extension in pseudocode."
|
||||
)
|
||||
|
||||
|
||||
def _instruction_sequence(value: object) -> list[Mapping[str, Any]]:
|
||||
if not isinstance(value, list):
|
||||
return []
|
||||
instructions = [item for item in value if isinstance(item, Mapping)]
|
||||
return sorted(instructions, key=lambda item: int(item.get("address") or 0))
|
||||
|
||||
|
||||
def _split_operands(operands: str) -> list[str]:
|
||||
if not operands:
|
||||
return []
|
||||
parts: list[str] = []
|
||||
start = 0
|
||||
depth = 0
|
||||
for idx, char in enumerate(operands):
|
||||
if char in "({":
|
||||
depth += 1
|
||||
elif char in ")}" and depth:
|
||||
depth -= 1
|
||||
elif char == "," and depth == 0:
|
||||
parts.append(operands[start:idx].strip())
|
||||
start = idx + 1
|
||||
parts.append(operands[start:].strip())
|
||||
return [part for part in parts if part]
|
||||
|
||||
|
||||
def _immediate_literal_text(text: str) -> str | None:
|
||||
stripped = text.strip()
|
||||
h_match = re.fullmatch(r"H'([0-9A-Fa-f]+)", stripped)
|
||||
if h_match:
|
||||
return h_match.group(1)
|
||||
x_match = re.fullmatch(r"0x([0-9A-Fa-f]+)", stripped)
|
||||
if x_match:
|
||||
return x_match.group(1)
|
||||
decimal_match = re.fullmatch(r"\d+", stripped)
|
||||
if decimal_match:
|
||||
value = int(stripped, 10)
|
||||
if 0 <= value <= 0xFF:
|
||||
return f"{value:02X}"
|
||||
return None
|
||||
|
||||
|
||||
def _immediate_value(operand: str) -> int | None:
|
||||
stripped = operand.strip()
|
||||
if stripped.startswith("#"):
|
||||
stripped = stripped[1:].strip()
|
||||
literal = _immediate_literal_text(stripped)
|
||||
if literal is None:
|
||||
return None
|
||||
return int(literal, 16)
|
||||
|
||||
|
||||
def _instruction_text(instruction: Mapping[str, Any]) -> str:
|
||||
mnemonic = str(instruction.get("mnemonic") or "")
|
||||
operands = str(instruction.get("operands") or "")
|
||||
return f"{mnemonic} {operands}".strip()
|
||||
|
||||
|
||||
def _mnemonic_base(mnemonic: str) -> str:
|
||||
return mnemonic.rsplit(".", 1)[0] if "." in mnemonic else mnemonic
|
||||
|
||||
|
||||
def _mnemonic_size(mnemonic: str) -> str:
|
||||
suffix = mnemonic.rsplit(".", 1)[-1] if "." in mnemonic else ""
|
||||
return suffix if suffix in {"B", "W"} else ""
|
||||
|
||||
|
||||
def _h16(value: int) -> str:
|
||||
return f"H'{value & 0xFFFF:04X}"
|
||||
|
||||
|
||||
def load_consistency_input(path: Path) -> JsonObject:
|
||||
with path.open("r", encoding="utf-8") as handle:
|
||||
payload = json.load(handle)
|
||||
if not isinstance(payload, dict) or "instructions" not in payload:
|
||||
raise ValueError(f"{path} does not look like h8536_decompiler JSON output")
|
||||
return payload
|
||||
|
||||
|
||||
def write_consistency_report(input_path: Path, output_path: Path, *, json_output: bool = False) -> None:
|
||||
analysis = analyze_decompiler_consistency(load_consistency_input(input_path))
|
||||
output_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
if json_output:
|
||||
output_path.write_text(json.dumps(analysis, indent=2), encoding="utf-8")
|
||||
else:
|
||||
output_path.write_text(format_consistency_report(analysis), encoding="utf-8")
|
||||
|
||||
|
||||
def main(argv: list[str] | None = None) -> int:
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Report decompiler/pseudocode semantic consistency checks.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"input",
|
||||
nargs="?",
|
||||
type=Path,
|
||||
default=Path("build/rom_decompiled.json"),
|
||||
help="structured JSON emitted by h8536_decompiler.py",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--out",
|
||||
type=Path,
|
||||
default=Path("build/rom_consistency.txt"),
|
||||
help="consistency report output path",
|
||||
)
|
||||
parser.add_argument("--json", action="store_true", help="write JSON instead of text")
|
||||
args = parser.parse_args(argv)
|
||||
|
||||
write_consistency_report(args.input, args.out, json_output=args.json)
|
||||
print(f"wrote {args.out}")
|
||||
return 0
|
||||
|
||||
|
||||
__all__ = [
|
||||
"analyze_decompiler_consistency",
|
||||
"format_consistency_report",
|
||||
"is_byte_immediate_to_word_destination",
|
||||
"load_consistency_input",
|
||||
"main",
|
||||
"write_consistency_report",
|
||||
]
|
||||
Reference in New Issue
Block a user