1
0
Files
h8-536-decoder/h8536/serial_pseudocode.py
2026-05-25 16:05:45 +10:00

632 lines
22 KiB
Python

from __future__ import annotations
import argparse
import json
import re
from dataclasses import dataclass
from pathlib import Path
from typing import Any
from .serial_semantics import analyze_serial_semantics
JsonObject = dict[str, Any]
@dataclass(frozen=True)
class SerialPseudocodeOptions:
include_tx: bool = True
include_rx: bool = True
include_evidence: bool = True
include_manual: bool = True
include_board: bool = True
include_semantics: bool = True
def generate_serial_pseudocode(
payload: JsonObject,
*,
source_name: str = "",
options: SerialPseudocodeOptions | None = None,
) -> str:
opts = options or SerialPseudocodeOptions()
tx_candidate = _find_candidate(payload, "candidate_sci1_tx_frame")
rx_candidate = _find_candidate(payload, "candidate_sci1_rx_frame")
serial_semantics = analyze_serial_semantics(payload) if opts.include_semantics else None
lines: list[str] = []
lines.extend(_file_header(source_name, tx_candidate, rx_candidate))
if opts.include_board:
lines.extend(_board_comment_lines(payload))
if opts.include_manual:
lines.extend(_manual_reference_lines(payload))
lines.extend(_declarations(tx_candidate, rx_candidate))
if opts.include_semantics:
lines.extend(_semantics_lines(serial_semantics, opts))
emitted = False
if opts.include_tx and tx_candidate:
lines.extend(_tx_functions(tx_candidate, opts))
emitted = True
if opts.include_rx and rx_candidate:
lines.extend(_rx_functions(rx_candidate, opts))
emitted = True
if not emitted:
lines.append("/* No requested SCI serial reconstruction candidates were present in the JSON input. */")
lines.append("")
return "\n".join(lines).rstrip() + "\n"
def load_serial_pseudocode_input(path: Path) -> JsonObject:
with path.open("r", encoding="utf-8") as handle:
payload = json.load(handle)
if not isinstance(payload, dict) or "instructions" not in payload:
raise ValueError(f"{path} does not look like h8536_decompiler JSON output")
return payload
def write_serial_pseudocode(
input_path: Path,
output_path: Path,
options: SerialPseudocodeOptions,
) -> None:
payload = load_serial_pseudocode_input(input_path)
output_path.parent.mkdir(parents=True, exist_ok=True)
output_path.write_text(
generate_serial_pseudocode(payload, source_name=str(input_path), options=options),
encoding="utf-8",
)
def main(argv: list[str] | None = None) -> int:
parser = argparse.ArgumentParser(
description="Generate focused C-like SCI RX/TX pseudocode from h8536_decompiler JSON output.",
)
parser.add_argument(
"input",
nargs="?",
type=Path,
default=Path("build/rom_decompiled.json"),
help="structured JSON emitted by h8536_decompiler.py",
)
parser.add_argument(
"--out",
type=Path,
default=Path("build/rom_serial_pseudocode.c"),
help="focused serial pseudocode output path",
)
mode = parser.add_mutually_exclusive_group()
mode.add_argument("--tx-only", action="store_true", help="emit only the candidate TX path")
mode.add_argument("--rx-only", action="store_true", help="emit only the candidate RX path")
parser.add_argument("--no-evidence", action="store_true", help="omit evidence-address comments")
parser.add_argument("--no-manual", action="store_true", help="omit manual-reference comments")
parser.add_argument("--no-board", action="store_true", help="omit board/MAX202 comments")
parser.add_argument("--no-semantics", action="store_true", help="omit candidate command/field semantics")
args = parser.parse_args(argv)
options = SerialPseudocodeOptions(
include_tx=not args.rx_only,
include_rx=not args.tx_only,
include_evidence=not args.no_evidence,
include_manual=not args.no_manual,
include_board=not args.no_board,
include_semantics=not args.no_semantics,
)
write_serial_pseudocode(args.input, args.out, options)
print(f"wrote {args.out}")
return 0
def _file_header(
source_name: str,
tx_candidate: JsonObject | None,
rx_candidate: JsonObject | None,
) -> list[str]:
source = f" from {source_name}" if source_name else ""
lines = [
"/*",
f" * H8/536 focused SCI RX/TX pseudocode{source}",
" *",
" * This is a protocol-oriented reconstruction from decompiler JSON metadata.",
" * It is intentionally phrased as candidate behavior: it summarizes evidence",
" * from the ROM without claiming source-level intent or a proven packet format.",
" */",
"",
]
if tx_candidate or rx_candidate:
lines.append("/* Candidate summary:")
for candidate in (tx_candidate, rx_candidate):
if not candidate:
continue
lines.append(
" * - "
+ _candidate_label(candidate)
+ f": confidence {candidate.get('confidence', 'unknown')} "
+ f"({candidate.get('confidence_score', 'n/a')})"
)
reason = str(candidate.get("confidence_reason") or "").strip()
if reason:
lines.append(f" * reason: {_comment_text(reason)}")
caveat = str(candidate.get("caveat") or "").strip()
if caveat:
lines.append(f" * caveat: {_comment_text(caveat)}")
lines.append(" */")
lines.append("")
return lines
def _board_comment_lines(payload: JsonObject) -> list[str]:
board = payload.get("board_profile")
if not isinstance(board, dict):
return []
lines = ["/* Board path:"]
summary = str(board.get("summary") or "").strip()
if summary:
lines.append(f" * {summary}")
for trace in _mapping_items(board.get("traces")):
signal = trace.get("signal", "?")
h8_pin = trace.get("h8_pin", "?")
h8_name = trace.get("h8_pin_name", "?")
max202_pin = trace.get("max202_pin", "?")
evidence = str(trace.get("evidence") or "").strip()
lines.append(f" * - {signal}: H8 pin {h8_pin} {h8_name} <-> MAX202 pin {max202_pin}")
if evidence:
lines.append(f" * evidence: {_comment_text(evidence)}")
lines.append(" */")
lines.append("")
return lines
def _manual_reference_lines(payload: JsonObject) -> list[str]:
refs: list[str] = []
for section in ("sci_protocol", "board_profile"):
data = payload.get(section)
if isinstance(data, dict):
refs.extend(str(ref) for ref in data.get("manual_references", []) if ref)
refs = _dedupe(refs)
if not refs:
return []
lines = ["/* Manual anchors used by the decompiler metadata:"]
for ref in refs:
lines.append(f" * - {_comment_text(ref)}")
lines.append(" */")
lines.append("")
return lines
def _declarations(tx_candidate: JsonObject | None, rx_candidate: JsonObject | None) -> list[str]:
candidate = tx_candidate or rx_candidate or {}
channel = str(candidate.get("channel") or "SCI1")
tdr = _int_field(tx_candidate, "tdr_address", 0xFEDB)
rdr = _int_field(rx_candidate, "rdr_address", 0xFEDD)
scr = tdr - 1 if tdr else 0xFEDA
ssr = rdr - 1 if rdr else 0xFEDC
lines = [
"#include <stdbool.h>",
"#include <stdint.h>",
"",
"typedef uint8_t u8;",
"typedef uint16_t u16;",
"",
"extern volatile u8 MEM8[0x10000];",
"",
f"#define {channel}_SCR MEM8[{_c_hex(scr)}]",
f"#define {channel}_TDR MEM8[{_c_hex(tdr)}]",
f"#define {channel}_SSR MEM8[{_c_hex(ssr)}]",
f"#define {channel}_RDR MEM8[{_c_hex(rdr)}]",
"",
"#define SCI_SCR_TIE 0x80u",
"#define SCI_SCR_RIE 0x40u",
"#define SCI_SCR_TE 0x20u",
"#define SCI_SCR_RE 0x10u",
"#define SCI_SSR_TDRE 0x80u",
"#define SCI_SSR_RDRF 0x40u",
"#define SCI_SSR_ORER 0x20u",
"#define SCI_SSR_FER 0x10u",
"#define SCI_SSR_PER 0x08u",
"",
]
if tx_candidate:
tx_start = _int_field(tx_candidate, "buffer_start", 0xF858)
tx_index = _int_field(tx_candidate, "tx_index_address", 0xF9C2)
length = _int_field(tx_candidate, "frame_length", 6)
lines.extend(
[
f"#define TX_FRAME_LENGTH {length}u",
f"#define TX_FRAME(n) MEM8[(u16)({_c_hex(tx_start)} + (n))]",
f"#define TX_INDEX MEM8[{_c_hex(tx_index)}]",
"",
],
)
if rx_candidate:
capture_start = _int_field(rx_candidate, "capture_buffer_start", 0xF868)
frame_start = _int_field(rx_candidate, "validation_buffer_start", 0xF860)
rx_index = _int_field(rx_candidate, "rx_index_address", 0xF9C3)
timeout = _int_field(rx_candidate, "interbyte_timeout_address", 0xF9C1)
complete = _int_field(rx_candidate, "complete_timer_address", 0xF9C5)
length = _int_field(rx_candidate, "frame_length", 6)
lines.extend(
[
f"#define RX_FRAME_LENGTH {length}u",
f"#define RX_CAPTURE(n) MEM8[(u16)({_c_hex(capture_start)} + (n))]",
f"#define RX_FRAME(n) MEM8[(u16)({_c_hex(frame_start)} + (n))]",
f"#define RX_INDEX MEM8[{_c_hex(rx_index)}]",
f"#define RX_INTERBYTE_TIMEOUT MEM8[{_c_hex(timeout)}]",
f"#define RX_COMPLETE_TIMER MEM8[{_c_hex(complete)}]",
"",
],
)
return lines
def _semantics_lines(
analysis: JsonObject | None,
opts: SerialPseudocodeOptions,
) -> list[str]:
if not isinstance(analysis, dict):
return []
protocols = analysis.get("protocol_semantics")
if not isinstance(protocols, list) or not protocols:
return []
protocol = protocols[0]
if not isinstance(protocol, dict):
return []
lines: list[str] = ["/* Candidate Protocol Semantics"]
lines.append(
f" * confidence: {protocol.get('confidence', 'unknown')} "
f"({protocol.get('confidence_score', 'n/a')})",
)
caveat = str(protocol.get("caveat") or "").strip()
if caveat:
lines.append(f" * caveat: {_comment_text(caveat)}")
layout = protocol.get("byte_layout")
if isinstance(layout, list) and layout:
lines.append(" * byte layout:")
for item in layout:
if not isinstance(item, dict):
continue
offset = item.get("offset", "?")
name = item.get("name_candidate", "byte")
semantic = item.get("semantic", "")
confidence = item.get("confidence", "unknown")
lines.append(f" * - byte{offset}: {name} ({confidence}) - {_comment_text(str(semantic))}")
dispatch = protocol.get("command_dispatch")
if isinstance(dispatch, dict):
values = ", ".join(str(value) for value in dispatch.get("command_values_hex", []))
lines.append(
" * dispatch: command_low3 = RX_FRAME(0) & 0x07"
+ (f"; observed {values}" if values else ""),
)
if opts.include_evidence:
lines.append(f" * dispatch evidence: {_hex_join(dispatch.get('evidence_addresses_hex'))}")
index_decoder = protocol.get("index_decoder")
if isinstance(index_decoder, dict):
lines.append(
" * index decoder: RX[1:2] -> logical index via "
f"{index_decoder.get('label', 'loc_622B')} ({index_decoder.get('confidence', 'unknown')})",
)
commands = [item for item in protocol.get("commands", []) if isinstance(item, dict)]
if commands:
lines.append(" * command candidates:")
for command in commands:
value = command.get("command_value_hex", "??")
name = command.get("name_candidate", "unknown")
summary = _comment_text(str(command.get("summary") or ""))
handler = command.get("handler_start_hex") or "multiple"
responses = ", ".join(str(item) for item in command.get("response_candidates", [])) or "none"
lines.append(f" * - {value} {name}: {summary}; handler {handler}; responses {responses}")
lines.append(" */")
lines.append("")
lines.extend(
[
"static u8 sci1_rx_candidate_command(void)",
"{",
" return (u8)(RX_FRAME(0) & 0x07u);",
"}",
"",
"static u16 sci1_rx_candidate_value(void)",
"{",
" return (u16)(((u16)RX_FRAME(3) << 8) | RX_FRAME(4));",
"}",
"",
"static u16 sci1_rx_candidate_logical_index(void)",
"{",
" u8 page = RX_FRAME(1);",
" u8 offset = RX_FRAME(2);",
"",
" if (page == 0u && offset <= 0x7Fu) {",
" return offset;",
" }",
" if (page == 1u) {",
" return (u16)(0x0080u + offset);",
" }",
" if (page == 2u && offset <= 0x7Fu) {",
" return (u16)(0x0180u + offset);",
" }",
" return 0x01FFu;",
"}",
"",
"void sci1_process_candidate_protocol_command(void)",
"{",
" u8 command = sci1_rx_candidate_command();",
" u16 logical_index = sci1_rx_candidate_logical_index();",
" u16 value = sci1_rx_candidate_value();",
"",
" switch (command) {",
],
)
for command in commands:
value = command.get("command_value")
if not isinstance(value, int):
continue
name = _safe_identifier(str(command.get("name_candidate") or f"command_{value:02X}"))
summary = _comment_text(str(command.get("summary") or "candidate command semantics unknown"))
evidence = _hex_join(command.get("evidence_addresses_hex"))
lines.append(f" case 0x{value:02X}u:")
lines.append(f" /* {name}: {summary}")
if opts.include_evidence and evidence:
lines.append(f" * evidence: {evidence}")
lines.append(" */")
lines.append(f" candidate_{name}(logical_index, value);")
lines.append(" break;")
lines.extend(
[
" default:",
" candidate_unknown_command(command, logical_index, value);",
" break;",
" }",
"}",
"",
],
)
return lines
def _tx_functions(candidate: JsonObject, opts: SerialPseudocodeOptions) -> list[str]:
length = _int_field(candidate, "frame_length", 6)
seed = _int_field(candidate, "checksum_seed", 0x5A)
data_length = max(length - 1, 0)
lines = _candidate_comment_block("TX reconstruction evidence", candidate, opts)
lines.extend(
[
"static u8 sci1_tx_candidate_checksum(void)",
"{",
f" u8 checksum = {_c_hex(seed, width=2)};",
],
)
for index in range(data_length):
lines.append(f" checksum ^= TX_FRAME({index});")
lines.extend(
[
" return checksum;",
"}",
"",
"void sci1_tx_start_candidate_frame(void)",
"{",
" /* The ROM appears to have populated TX_FRAME(0..4) before this point. */",
f" TX_FRAME({data_length}) = sci1_tx_candidate_checksum();",
"",
f" while ((SCI1_SSR & SCI_SSR_TDRE) == 0u) {{",
" /* wait for transmit data register empty */",
" }",
"",
" SCI1_TDR = TX_FRAME(0);",
" TX_INDEX = 1u;",
" SCI1_SSR &= (u8)~SCI_SSR_TDRE;",
" SCI1_SCR |= SCI_SCR_TIE;",
"}",
"",
"void sci1_txi_candidate_isr(void)",
"{",
" if (TX_INDEX < TX_FRAME_LENGTH) {",
" SCI1_TDR = TX_FRAME(TX_INDEX);",
" TX_INDEX = (u8)(TX_INDEX + 1u);",
" SCI1_SSR &= (u8)~SCI_SSR_TDRE;",
" }",
"",
" if (TX_INDEX >= TX_FRAME_LENGTH) {",
" SCI1_SCR &= (u8)~SCI_SCR_TIE;",
" }",
"}",
"",
],
)
return lines
def _rx_functions(candidate: JsonObject, opts: SerialPseudocodeOptions) -> list[str]:
length = _int_field(candidate, "frame_length", 6)
seed = _int_field(candidate, "checksum_seed", 0x5A)
data_length = max(length - 1, 0)
lines = _candidate_comment_block("RX reconstruction evidence", candidate, opts)
lines.extend(
[
"static u8 sci1_rx_candidate_checksum(void)",
"{",
f" u8 checksum = {_c_hex(seed, width=2)};",
],
)
for index in range(data_length):
lines.append(f" checksum ^= RX_FRAME({index});")
lines.extend(
[
" return checksum;",
"}",
"",
"bool sci1_process_rx_candidate_frame(void)",
"{",
" u8 i;",
"",
" if (RX_INDEX != RX_FRAME_LENGTH) {",
" return false;",
" }",
"",
" for (i = 0u; i < RX_FRAME_LENGTH; i++) {",
" RX_FRAME(i) = RX_CAPTURE(i);",
" }",
"",
f" if (sci1_rx_candidate_checksum() != RX_FRAME({data_length})) {{",
" RX_INDEX = 0u;",
" return false;",
" }",
"",
" RX_INDEX = 0u;",
" return true;",
"}",
"",
"bool sci1_rx_byte_received_candidate_isr(void)",
"{",
" u8 byte;",
"",
" SCI1_SSR &= (u8)~SCI_SSR_RDRF;",
" byte = SCI1_RDR;",
"",
" if (RX_INTERBYTE_TIMEOUT == 0u) {",
" RX_INDEX = 0u;",
" }",
"",
" RX_INTERBYTE_TIMEOUT = 5u;",
" RX_CAPTURE(RX_INDEX) = byte;",
" RX_INDEX = (u8)(RX_INDEX + 1u);",
"",
" if (RX_INDEX == RX_FRAME_LENGTH) {",
" RX_COMPLETE_TIMER = 0x14u;",
" return sci1_process_rx_candidate_frame();",
" }",
"",
" return false;",
"}",
"",
"void sci1_rx_error_candidate_isr(void)",
"{",
" SCI1_SSR &= (u8)~(SCI_SSR_ORER | SCI_SSR_FER | SCI_SSR_PER);",
" RX_INDEX = 0u;",
"}",
"",
],
)
return lines
def _candidate_comment_block(
title: str,
candidate: JsonObject,
opts: SerialPseudocodeOptions,
) -> list[str]:
lines = ["/*", f" * {title}"]
comment = str(candidate.get("comment") or candidate.get("short_comment") or "").strip()
if comment:
lines.append(f" * {_comment_text(comment)}")
formula = str(candidate.get("checksum_formula") or "").strip()
if formula:
lines.append(f" * checksum formula: {_comment_text(formula)}")
if opts.include_evidence:
evidence = candidate.get("evidence_addresses_hex")
if isinstance(evidence, dict):
lines.append(" * evidence addresses:")
for key in sorted(evidence):
addresses = ", ".join(str(item) for item in evidence.get(key, []))
lines.append(f" * - {key}: {addresses}")
lines.append(" */")
return lines
def _find_candidate(payload: JsonObject, kind: str) -> JsonObject | None:
serial = payload.get("serial_reconstruction")
if not isinstance(serial, dict):
return None
candidates = serial.get("candidates")
if not isinstance(candidates, list):
return None
for candidate in candidates:
if isinstance(candidate, dict) and candidate.get("kind") == kind:
return candidate
return None
def _candidate_label(candidate: JsonObject) -> str:
kind = str(candidate.get("kind") or "candidate")
channel = str(candidate.get("channel") or "SCI")
length = candidate.get("frame_length", "?")
if kind.endswith("_tx_frame"):
start = candidate.get("buffer_start_hex") or _h(_int_field(candidate, "buffer_start", 0))
end = candidate.get("buffer_end_hex") or _h(_int_field(candidate, "buffer_end", 0))
return f"{channel} TX {length}-byte frame at {start}-{end}"
if kind.endswith("_rx_frame"):
capture_start = candidate.get("capture_buffer_start_hex") or _h(
_int_field(candidate, "capture_buffer_start", 0),
)
capture_end = candidate.get("capture_buffer_end_hex") or _h(
_int_field(candidate, "capture_buffer_end", 0),
)
return f"{channel} RX {length}-byte frame captured at {capture_start}-{capture_end}"
return f"{channel} {kind}"
def _mapping_items(value: object) -> list[JsonObject]:
if not isinstance(value, list):
return []
return [item for item in value if isinstance(item, dict)]
def _int_field(candidate: JsonObject | None, key: str, default: int) -> int:
if not candidate:
return default
value = candidate.get(key)
if isinstance(value, bool):
return int(value)
if isinstance(value, int):
return value
return default
def _c_hex(value: int, *, width: int = 4) -> str:
return f"0x{value & 0xFFFF:0{width}X}u"
def _h(value: int) -> str:
return f"H'{value & 0xFFFF:04X}"
def _dedupe(items: list[str]) -> list[str]:
seen: set[str] = set()
output: list[str] = []
for item in items:
if item in seen:
continue
seen.add(item)
output.append(item)
return output
def _hex_join(value: object) -> str:
if not isinstance(value, list):
return ""
return ", ".join(str(item) for item in value)
def _safe_identifier(value: str) -> str:
cleaned = re.sub(r"[^0-9A-Za-z_]", "_", value.strip())
cleaned = re.sub(r"_+", "_", cleaned).strip("_")
if not cleaned:
return "unknown"
if cleaned[0].isdigit():
return "_" + cleaned
return cleaned
def _comment_text(text: str) -> str:
return text.replace("*/", "* /").replace("\r", " ").replace("\n", " ")