1
0

DTC and SCI improvements

This commit is contained in:
Aiden
2026-05-25 14:22:32 +10:00
parent 62d1c3c876
commit 80819448cf
21 changed files with 13823 additions and 86 deletions

652
h8536/pseudocode.py Normal file
View File

@@ -0,0 +1,652 @@
from __future__ import annotations
import argparse
import json
import re
from dataclasses import dataclass
from pathlib import Path
from typing import Any
JsonObject = dict[str, Any]
BRANCH_CONDITIONS = {
"BRN": "0",
"BHI": "!C && !Z",
"BLS": "C || Z",
"BCC": "!C",
"BCS": "C",
"BNE": "!Z",
"BEQ": "Z",
"BVC": "!V",
"BVS": "V",
"BPL": "!N",
"BMI": "N",
"BGE": "N == V",
"BLT": "N != V",
"BGT": "!Z && (N == V)",
"BLE": "Z || (N != V)",
}
@dataclass(frozen=True)
class PseudocodeOptions:
include_asm: bool = True
include_addresses: bool = True
include_cycles: bool = False
emit_declarations: bool = True
max_functions: int | None = None
def generate_pseudocode(
payload: JsonObject,
*,
source_name: str = "",
options: PseudocodeOptions | None = None,
) -> str:
opts = options or PseudocodeOptions()
instructions = list(payload.get("instructions", []))
label_names = _collect_label_names(payload)
functions = _function_nodes(payload, instructions, label_names)
if opts.max_functions is not None:
functions = functions[: opts.max_functions]
lines: list[str] = []
lines.extend(_file_header(source_name, payload))
if opts.emit_declarations:
lines.extend(_declarations(instructions, functions, label_names))
by_address = {int(ins["address"]): ins for ins in instructions}
all_addresses = sorted(by_address)
emitted: set[int] = set()
for function in functions:
function_lines, used_addresses = _render_function(function, by_address, label_names, opts)
if function_lines:
lines.extend(function_lines)
emitted.update(used_addresses)
orphan_addresses = [address for address in all_addresses if address not in emitted]
if orphan_addresses:
lines.extend(_render_orphan_block(orphan_addresses, by_address, label_names, opts))
return "\n".join(lines).rstrip() + "\n"
def load_pseudocode_input(path: Path) -> JsonObject:
with path.open("r", encoding="utf-8") as handle:
payload = json.load(handle)
if not isinstance(payload, dict) or "instructions" not in payload:
raise ValueError(f"{path} does not look like h8536_decompiler JSON output")
return payload
def write_pseudocode(input_path: Path, output_path: Path, options: PseudocodeOptions) -> None:
payload = load_pseudocode_input(input_path)
output_path.parent.mkdir(parents=True, exist_ok=True)
output_path.write_text(
generate_pseudocode(payload, source_name=str(input_path), options=options),
encoding="utf-8",
)
def main(argv: list[str] | None = None) -> int:
parser = argparse.ArgumentParser(
description="Generate conservative C-like pseudocode from h8536_decompiler JSON output.",
)
parser.add_argument(
"input",
nargs="?",
type=Path,
default=Path("build/rom_decompiled.json"),
help="structured JSON emitted by h8536_decompiler.py",
)
parser.add_argument(
"--out",
type=Path,
default=Path("build/rom_pseudocode.c"),
help="pseudocode output path",
)
parser.add_argument("--no-asm", action="store_true", help="omit original assembly from line comments")
parser.add_argument("--no-addresses", action="store_true", help="omit instruction addresses from line comments")
parser.add_argument("--cycles", action="store_true", help="include cycle estimates when present in JSON")
parser.add_argument("--no-declarations", action="store_true", help="omit register/function declarations")
parser.add_argument("--max-functions", type=int, default=None, help="emit only the first N functions")
args = parser.parse_args(argv)
options = PseudocodeOptions(
include_asm=not args.no_asm,
include_addresses=not args.no_addresses,
include_cycles=args.cycles,
emit_declarations=not args.no_declarations,
max_functions=args.max_functions,
)
write_pseudocode(args.input, args.out, options)
print(f"wrote {args.out}")
return 0
def _file_header(source_name: str, payload: JsonObject) -> list[str]:
vector_count = len(payload.get("vectors", []))
function_count = len(payload.get("call_graph", {}).get("nodes", []))
instruction_count = len(payload.get("instructions", []))
source = f" from {source_name}" if source_name else ""
return [
"/*",
f" * H8/536 C-like pseudocode{source}",
" *",
" * This is a conservative structural translation of the decompiler JSON.",
" * Helpers such as set_flags_cmp8(), MEM8[], BIT(), C/Z/N/V, and",
" * return_from_interrupt() are pseudocode placeholders, not a runtime ABI.",
" *",
f" * vectors: {vector_count}, functions: {function_count}, instructions: {instruction_count}",
" */",
"",
"#include <stdint.h>",
"",
"typedef uint8_t u8;",
"typedef uint16_t u16;",
"",
"#define BIT(n) (1u << (n))",
"extern volatile u8 MEM8[0x10000];",
"extern volatile u16 MEM16[0x10000];",
"",
"u16 R0, R1, R2, R3, R4, R5, R6, R7;",
"u16 SR;",
"u8 CCR, BR, EP, DP, TP;",
"int C, Z, N, V;",
"",
]
def _declarations(instructions: list[JsonObject], functions: list[JsonObject], labels: dict[int, str]) -> list[str]:
lines: list[str] = []
registers = _referenced_io_registers(instructions)
if registers:
lines.append("/* H8/536 register field symbols used by this ROM. */")
for name, (address, width) in sorted(registers.items(), key=lambda item: item[1][0]):
c_type = "u16" if width == 16 else "u8"
lines.append(f"extern volatile {c_type} {c_identifier(name)}; /* 0x{address:04X} */")
lines.append("")
if functions:
lines.append("/* Function entry points discovered from vectors and call targets. */")
for function in functions:
label = labels.get(int(function["start"]), str(function.get("label", "")))
lines.append(f"void {c_identifier(label)}(void);")
lines.append("")
return lines
def _referenced_io_registers(instructions: list[JsonObject]) -> dict[str, tuple[int, int]]:
registers: dict[str, tuple[int, int]] = {}
for ins in instructions:
width = _size_bits(_mnemonic_size(str(ins.get("mnemonic", ""))))
for ref in ins.get("references", []):
name = ref.get("name")
if not name:
continue
address = int(ref["address"])
old = registers.get(name)
old_width = old[1] if old else 8
registers[name] = (address, max(old_width, width))
return registers
def _collect_label_names(payload: JsonObject) -> dict[int, str]:
labels: dict[int, str] = {}
for vector in payload.get("vectors", []):
target = vector.get("target")
label = vector.get("target_label")
if target is not None and label:
labels[int(target)] = c_identifier(str(label))
for node in payload.get("call_graph", {}).get("nodes", []):
start = int(node["start"])
labels[start] = c_identifier(str(node.get("label") or _label_for(start)))
for ins in payload.get("instructions", []):
for target in ins.get("targets", []):
labels.setdefault(int(target), c_identifier(_label_for(int(target))))
return labels
def _function_nodes(
payload: JsonObject,
instructions: list[JsonObject],
labels: dict[int, str],
) -> list[JsonObject]:
nodes = [dict(node) for node in payload.get("call_graph", {}).get("nodes", [])]
if nodes:
nodes.sort(key=lambda node: int(node["start"]))
return nodes
if not instructions:
return []
start = int(min(ins["address"] for ins in instructions))
end = int(max(ins["address"] for ins in instructions))
return [
{
"start": start,
"end": end,
"label": labels.get(start, _label_for(start)),
"sources": [],
"instruction_count": len(instructions),
"calls": [],
"unresolved_calls": 0,
},
]
def _render_function(
function: JsonObject,
by_address: dict[int, JsonObject],
labels: dict[int, str],
opts: PseudocodeOptions,
) -> tuple[list[str], set[int]]:
start = int(function["start"])
end = int(function.get("end", start))
addresses = [address for address in sorted(by_address) if start <= address <= end]
if not addresses:
return [], set()
name = c_identifier(labels.get(start, str(function.get("label") or _label_for(start))))
local_targets = _local_target_addresses(addresses, by_address) | {
address for address in addresses if address in labels
}
lines = [f"void {name}(void)", "{"]
sources = function.get("sources") or []
if sources:
lines.append(f" /* vector sources: {', '.join(str(source) for source in sources)} */")
for address in addresses:
if address in local_targets and address != start:
lines.append(f"{labels.get(address, _label_for(address))}:")
ins = by_address[address]
statement = _translate_instruction(ins, labels)
comment = _line_comment(ins, opts)
lines.append(f" {statement}{comment}")
lines.append("}")
lines.append("")
return lines, set(addresses)
def _render_orphan_block(
addresses: list[int],
by_address: dict[int, JsonObject],
labels: dict[int, str],
opts: PseudocodeOptions,
) -> list[str]:
lines = ["void unreached_or_unowned_code(void)", "{"]
local_targets = _local_target_addresses(addresses, by_address) | {
address for address in addresses if address in labels
}
for address in addresses:
if address in local_targets:
lines.append(f"{labels.get(address, _label_for(address))}:")
ins = by_address[address]
lines.append(f" {_translate_instruction(ins, labels)}{_line_comment(ins, opts)}")
lines.append("}")
lines.append("")
return lines
def _local_target_addresses(addresses: list[int], by_address: dict[int, JsonObject]) -> set[int]:
address_set = set(addresses)
targets: set[int] = set()
for address in addresses:
for target in by_address[address].get("targets", []):
target = int(target)
if target in address_set:
targets.add(target)
return targets
def _translate_instruction(ins: JsonObject, labels: dict[int, str]) -> str:
mnemonic = str(ins.get("mnemonic", ""))
operands = str(ins.get("operands", ""))
kind = str(ins.get("kind", "normal"))
ops = split_operands(operands)
base = _mnemonic_base(mnemonic)
size = _mnemonic_size(mnemonic)
if kind == "return":
if ops:
return f"return_with_stack_adjust({_format_operand(ops[0], size)});"
return "return;"
if kind == "rte":
return "return_from_interrupt();"
if kind == "sleep":
return "sleep_until_interrupt();"
if kind == "call":
return _call_statement(ins, labels, ops)
if kind in {"branch", "jump"}:
return _branch_or_jump_statement(ins, labels, ops, base)
if base.startswith("."):
return f"emit_data({_quoted(str(ins.get('text', mnemonic)))});"
if base in {"MOV", "MOV:G", "MOV:I", "MOV:E", "MOV:L", "MOV:S", "MOV:F"} and len(ops) == 2:
source = _format_operand(ops[0], size)
dest = _format_operand(ops[1], size, lvalue=True)
return f"{dest} = {_cast(source, size)};"
if base in {"MOVFPE"} and len(ops) == 2:
source = _format_operand(ops[0], size)
dest = _format_operand(ops[1], size, lvalue=True)
return f"{dest} = read_eclock({source});"
if base in {"MOVTPE"} and len(ops) == 2:
source = _format_operand(ops[0], size)
dest = _format_operand(ops[1], size, lvalue=True)
return f"write_eclock({dest}, {source});"
if base in {"ADD", "ADD:G", "ADD:Q", "ADDS"} and len(ops) == 2:
return _binary_update(ops, "+=", size)
if base in {"SUB", "SUBS"} and len(ops) == 2:
return _binary_update(ops, "-=", size)
if base == "OR" and len(ops) == 2:
return _binary_update(ops, "|=", size)
if base == "AND" and len(ops) == 2:
return _binary_update(ops, "&=", size)
if base == "XOR" and len(ops) == 2:
return _binary_update(ops, "^=", size)
if base in {"ADDX", "SUBX", "MULXU", "DIVXU"} and len(ops) == 2:
source = _format_operand(ops[0], size)
dest = _format_operand(ops[1], size, lvalue=True)
helper = _helper_name(base, size)
return f"{dest} = {helper}({dest}, {source});"
if base in {"CMP", "CMP:G", "CMP:I", "CMP:E"} and len(ops) == 2:
source = _format_operand(ops[0], size)
dest = _format_operand(ops[1], size)
return f"{_helper_name('set_flags_cmp', size)}({dest}, {source});"
if base == "TST" and len(ops) == 1:
return f"{_helper_name('set_flags_tst', size)}({_format_operand(ops[0], size)});"
if base == "CLR" and len(ops) == 1:
return f"{_format_operand(ops[0], size, lvalue=True)} = 0;"
if base == "NEG" and len(ops) == 1:
target = _format_operand(ops[0], size, lvalue=True)
return f"{target} = -{target};"
if base == "NOT" and len(ops) == 1:
target = _format_operand(ops[0], size, lvalue=True)
return f"{target} = ~{target};"
if base in {"SHAL", "SHLL"} and len(ops) == 1:
target = _format_operand(ops[0], size, lvalue=True)
return f"{target} <<= 1;"
if base in {"SHAR", "SHLR"} and len(ops) == 1:
target = _format_operand(ops[0], size, lvalue=True)
return f"{target} >>= 1;"
if base in {"ROTL", "ROTR", "ROTXL", "ROTXR"} and len(ops) == 1:
target = _format_operand(ops[0], size, lvalue=True)
return f"{target} = {_helper_name(base.lower(), size)}({target});"
if base == "SWAP" and len(ops) == 1:
target = _format_operand(ops[0], size, lvalue=True)
return f"{target} = swap_bytes({target});"
if base == "EXTU" and len(ops) == 1:
target = _format_operand(ops[0], size, lvalue=True)
return f"{target} = zero_extend8({target});"
if base == "EXTS" and len(ops) == 1:
target = _format_operand(ops[0], size, lvalue=True)
return f"{target} = sign_extend8({target});"
if base in {"BSET", "BCLR", "BNOT", "BTST"} and len(ops) == 2:
return _bit_statement(base, ops, size)
if base == "LDC" and len(ops) == 2:
source = _format_operand(ops[0], size)
dest = _format_operand(ops[1], size, lvalue=True)
return f"{dest} = {_cast(source, size)};"
if base == "STC" and len(ops) == 2:
source = _format_operand(ops[0], size)
dest = _format_operand(ops[1], size, lvalue=True)
return f"{dest} = {_cast(source, size)};"
if base == "ORC" and len(ops) == 2:
return _binary_update(ops, "|=", size)
if base == "ANDC" and len(ops) == 2:
return _binary_update(ops, "&=", size)
if base == "XORC" and len(ops) == 2:
return _binary_update(ops, "^=", size)
if base == "LDM" and len(ops) == 2:
return f"pop_registers({_register_list_argument(ops[1])});"
if base == "STM" and len(ops) == 2:
return f"push_registers({_register_list_argument(ops[0])});"
if base == "LINK" and len(ops) == 2:
return f"link_frame({_format_operand(ops[1], size)});"
if base == "UNLK":
return "unlink_frame();"
if base == "TRAPA" and ops:
return f"trap({_format_operand(ops[0], size)});"
if base == "TRAP/VS":
return "trap_vs();"
if base == "NOP":
return "/* nop */;"
return f"asm_{_safe_token(base)}({_quoted(str(ins.get('text') or mnemonic))});"
def _branch_or_jump_statement(ins: JsonObject, labels: dict[int, str], ops: list[str], base: str) -> str:
target = _target_label(ins, labels)
if base in {"BRA", "JMP", "PJMP"}:
if target:
return f"goto {target};"
expr = _format_operand(ops[0], "") if ops else "unknown_target"
return f"goto_indirect({expr});"
if base.startswith("SCB/"):
register = _format_operand(ops[0], "") if ops else "R?"
cond = base.split("/", 1)[1].lower()
return f"if (scb_{cond}({register})) goto {target or 'unknown_target'};"
condition = BRANCH_CONDITIONS.get(base, f"cond_{_safe_token(base)}()")
return f"if ({condition}) goto {target or 'unknown_target'};"
def _call_statement(ins: JsonObject, labels: dict[int, str], ops: list[str]) -> str:
target = _target_label(ins, labels)
if target:
return f"{target}();"
expr = _format_operand(ops[0], "") if ops else "unknown_target"
return f"call_indirect({expr});"
def _target_label(ins: JsonObject, labels: dict[int, str]) -> str:
targets = ins.get("targets", [])
if targets:
target = int(targets[0])
return labels.get(target, _label_for(target))
return ""
def _binary_update(ops: list[str], operator: str, size: str) -> str:
source = _format_operand(ops[0], size)
dest = _format_operand(ops[1], size, lvalue=True)
return f"{dest} {operator} {_cast(source, size)};"
def _bit_statement(base: str, ops: list[str], size: str) -> str:
bit = _format_operand(ops[0], size)
dest = _format_operand(ops[1], size, lvalue=True)
bit_expr = f"BIT({bit})"
if base == "BSET":
return f"{dest} |= {bit_expr};"
if base == "BCLR":
return f"{dest} &= ~{bit_expr};"
if base == "BNOT":
return f"{dest} ^= {bit_expr};"
return f"set_flags_btst({dest}, {bit});"
def split_operands(operands: str) -> list[str]:
if not operands:
return []
parts: list[str] = []
start = 0
depth = 0
for idx, char in enumerate(operands):
if char in "({":
depth += 1
elif char in ")}" and depth:
depth -= 1
elif char == "," and depth == 0:
parts.append(operands[start:idx].strip())
start = idx + 1
parts.append(operands[start:].strip())
return [part for part in parts if part]
def _format_operand(operand: str, size: str, *, lvalue: bool = False) -> str:
op = _replace_h_literals(operand.strip())
if op.startswith("#"):
return op[1:]
if op.startswith("@(") and op.endswith(")"):
inner = op[2:-1]
pieces = split_operands(inner)
if len(pieces) == 2:
disp, reg = pieces
offset = f"{reg} - {disp[1:]}" if disp.startswith("-") else f"{reg} + {disp}"
return f"{_mem_name(size)}[{offset}]"
if re.fullmatch(r"@-R[0-7]", op):
return f"{_mem_name(size)}[--{op[2:]}]"
if re.fullmatch(r"@R[0-7]\+", op):
return f"{_mem_name(size)}[{op[1:-1]}++]"
if re.fullmatch(r"@R[0-7]", op):
return f"{_mem_name(size)}[{op[1:]}]"
if op.startswith("@BR:"):
return f"{_mem_name(size)}[(BR << 8) | {op[4:]}]"
if op.startswith("@0x"):
return f"{_mem_name(size)}[{op[1:]}]"
if op.startswith("@"):
return c_identifier(op[1:])
if op.startswith("{") and op.endswith("}"):
return _register_list_argument(op)
if re.fullmatch(r"loc_[0-9A-Fa-f]{4}", op):
return c_identifier(op)
if re.fullmatch(r"[A-Za-z_][A-Za-z0-9_/\?]*", op):
return c_identifier(op)
return op
def _cast(expr: str, size: str) -> str:
if size == "B":
return f"(uint8_t)({expr})"
if size == "W":
return f"(uint16_t)({expr})"
return expr
def _line_comment(ins: JsonObject, opts: PseudocodeOptions) -> str:
parts: list[str] = []
if opts.include_addresses:
parts.append(f"{int(ins['address']):04X}")
if opts.include_asm:
text = str(ins.get("text") or _instruction_text(ins))
parts.append(text)
comment = str(ins.get("comment") or "").strip()
if comment:
parts.append(comment)
parts.extend(_metadata_comments(ins))
if opts.include_cycles and ins.get("cycles"):
parts.append(_cycle_summary(ins["cycles"]))
if not parts:
return ""
return " /* " + "; ".join(_sanitize_comment(part) for part in parts) + " */"
def _metadata_comments(ins: JsonObject) -> list[str]:
comments: list[str] = []
sci = ins.get("sci")
if isinstance(sci, dict):
for inference in sci.get("inferences", []):
if isinstance(inference, dict) and inference.get("comment"):
comments.append(str(inference["comment"]))
for access in ins.get("peripheral_access", []):
if not isinstance(access, dict):
continue
register = access.get("register")
direction = access.get("direction")
size = access.get("size")
byte = access.get("byte")
if register and direction:
comments.append(f"{register} {size} {direction} {byte} TEMP access")
return comments
def _instruction_text(ins: JsonObject) -> str:
mnemonic = str(ins.get("mnemonic", ""))
operands = str(ins.get("operands", ""))
return f"{mnemonic} {operands}".strip()
def _cycle_summary(cycles: JsonObject) -> str:
if "cycles" in cycles:
return f"cycles={cycles['cycles']}"
if "not_taken" in cycles and "taken" in cycles:
return f"cycles={cycles['not_taken']}/{cycles['taken']} nt/t"
return "cycles=?"
def _mnemonic_base(mnemonic: str) -> str:
return mnemonic.rsplit(".", 1)[0] if "." in mnemonic else mnemonic
def _mnemonic_size(mnemonic: str) -> str:
suffix = mnemonic.rsplit(".", 1)[-1] if "." in mnemonic else ""
if suffix in {"B", "W"}:
return suffix
if mnemonic.startswith("CMP:I"):
return "W"
if mnemonic.startswith("CMP:E"):
return "B"
return ""
def _size_bits(size: str) -> int:
return 16 if size == "W" else 8
def _mem_name(size: str) -> str:
return "MEM16" if size == "W" else "MEM8"
def _helper_name(base: str, size: str) -> str:
suffix = {"B": "8", "W": "16"}.get(size, "")
return f"{_safe_token(base)}{suffix}"
def _register_list_argument(operand: str) -> str:
inner = operand.strip().strip("{}")
regs = [c_identifier(part.strip()) for part in inner.split(",") if part.strip()]
return ", ".join(regs) if regs else "/* empty */"
def _replace_h_literals(text: str) -> str:
return re.sub(r"H'([0-9A-Fa-f]+)", lambda match: "0x" + match.group(1).upper(), text)
def c_identifier(name: str) -> str:
cleaned = re.sub(r"[^0-9A-Za-z_]", "_", name.strip())
cleaned = re.sub(r"_+", "_", cleaned).strip("_")
if not cleaned:
cleaned = "unnamed"
if cleaned[0].isdigit():
cleaned = "_" + cleaned
return cleaned
def _safe_token(text: str) -> str:
return c_identifier(text).lower()
def _label_for(address: int) -> str:
return f"loc_{address:04X}"
def _quoted(text: str) -> str:
return json.dumps(text)
def _sanitize_comment(text: str) -> str:
return str(text).replace("*/", "* /").replace("\r", " ").replace("\n", " ")