from __future__ import annotations import argparse import json import re from dataclasses import dataclass from pathlib import Path from typing import Any from .serial_semantics import analyze_serial_semantics JsonObject = dict[str, Any] @dataclass(frozen=True) class SerialPseudocodeOptions: include_tx: bool = True include_rx: bool = True include_evidence: bool = True include_manual: bool = True include_board: bool = True include_semantics: bool = True def generate_serial_pseudocode( payload: JsonObject, *, source_name: str = "", options: SerialPseudocodeOptions | None = None, ) -> str: opts = options or SerialPseudocodeOptions() tx_candidate = _find_candidate(payload, "candidate_sci1_tx_frame") rx_candidate = _find_candidate(payload, "candidate_sci1_rx_frame") serial_semantics = analyze_serial_semantics(payload) if opts.include_semantics else None lines: list[str] = [] lines.extend(_file_header(source_name, tx_candidate, rx_candidate)) if opts.include_board: lines.extend(_board_comment_lines(payload)) if opts.include_manual: lines.extend(_manual_reference_lines(payload)) lines.extend(_declarations(tx_candidate, rx_candidate)) if opts.include_semantics: lines.extend(_semantics_lines(serial_semantics, opts)) emitted = False if opts.include_tx and tx_candidate: lines.extend(_tx_functions(tx_candidate, opts)) emitted = True if opts.include_rx and rx_candidate: lines.extend(_rx_functions(rx_candidate, opts)) emitted = True if not emitted: lines.append("/* No requested SCI serial reconstruction candidates were present in the JSON input. */") lines.append("") return "\n".join(lines).rstrip() + "\n" def load_serial_pseudocode_input(path: Path) -> JsonObject: with path.open("r", encoding="utf-8") as handle: payload = json.load(handle) if not isinstance(payload, dict) or "instructions" not in payload: raise ValueError(f"{path} does not look like h8536_decompiler JSON output") return payload def write_serial_pseudocode( input_path: Path, output_path: Path, options: SerialPseudocodeOptions, ) -> None: payload = load_serial_pseudocode_input(input_path) output_path.parent.mkdir(parents=True, exist_ok=True) output_path.write_text( generate_serial_pseudocode(payload, source_name=str(input_path), options=options), encoding="utf-8", ) def main(argv: list[str] | None = None) -> int: parser = argparse.ArgumentParser( description="Generate focused C-like SCI RX/TX pseudocode from h8536_decompiler JSON output.", ) parser.add_argument( "input", nargs="?", type=Path, default=Path("build/rom_decompiled.json"), help="structured JSON emitted by h8536_decompiler.py", ) parser.add_argument( "--out", type=Path, default=Path("build/rom_serial_pseudocode.c"), help="focused serial pseudocode output path", ) mode = parser.add_mutually_exclusive_group() mode.add_argument("--tx-only", action="store_true", help="emit only the candidate TX path") mode.add_argument("--rx-only", action="store_true", help="emit only the candidate RX path") parser.add_argument("--no-evidence", action="store_true", help="omit evidence-address comments") parser.add_argument("--no-manual", action="store_true", help="omit manual-reference comments") parser.add_argument("--no-board", action="store_true", help="omit board/MAX202 comments") parser.add_argument("--no-semantics", action="store_true", help="omit candidate command/field semantics") args = parser.parse_args(argv) options = SerialPseudocodeOptions( include_tx=not args.rx_only, include_rx=not args.tx_only, include_evidence=not args.no_evidence, include_manual=not args.no_manual, include_board=not args.no_board, include_semantics=not args.no_semantics, ) write_serial_pseudocode(args.input, args.out, options) print(f"wrote {args.out}") return 0 def _file_header( source_name: str, tx_candidate: JsonObject | None, rx_candidate: JsonObject | None, ) -> list[str]: source = f" from {source_name}" if source_name else "" lines = [ "/*", f" * H8/536 focused SCI RX/TX pseudocode{source}", " *", " * This is a protocol-oriented reconstruction from decompiler JSON metadata.", " * It is intentionally phrased as candidate behavior: it summarizes evidence", " * from the ROM without claiming source-level intent or a proven packet format.", " */", "", ] if tx_candidate or rx_candidate: lines.append("/* Candidate summary:") for candidate in (tx_candidate, rx_candidate): if not candidate: continue lines.append( " * - " + _candidate_label(candidate) + f": confidence {candidate.get('confidence', 'unknown')} " + f"({candidate.get('confidence_score', 'n/a')})" ) reason = str(candidate.get("confidence_reason") or "").strip() if reason: lines.append(f" * reason: {_comment_text(reason)}") caveat = str(candidate.get("caveat") or "").strip() if caveat: lines.append(f" * caveat: {_comment_text(caveat)}") lines.append(" */") lines.append("") return lines def _board_comment_lines(payload: JsonObject) -> list[str]: board = payload.get("board_profile") if not isinstance(board, dict): return [] lines = ["/* Board path:"] summary = str(board.get("summary") or "").strip() if summary: lines.append(f" * {summary}") for trace in _mapping_items(board.get("traces")): signal = trace.get("signal", "?") h8_pin = trace.get("h8_pin", "?") h8_name = trace.get("h8_pin_name", "?") max202_pin = trace.get("max202_pin", "?") evidence = str(trace.get("evidence") or "").strip() lines.append(f" * - {signal}: H8 pin {h8_pin} {h8_name} <-> MAX202 pin {max202_pin}") if evidence: lines.append(f" * evidence: {_comment_text(evidence)}") lines.append(" */") lines.append("") return lines def _manual_reference_lines(payload: JsonObject) -> list[str]: refs: list[str] = [] for section in ("sci_protocol", "board_profile"): data = payload.get(section) if isinstance(data, dict): refs.extend(str(ref) for ref in data.get("manual_references", []) if ref) refs = _dedupe(refs) if not refs: return [] lines = ["/* Manual anchors used by the decompiler metadata:"] for ref in refs: lines.append(f" * - {_comment_text(ref)}") lines.append(" */") lines.append("") return lines def _declarations(tx_candidate: JsonObject | None, rx_candidate: JsonObject | None) -> list[str]: candidate = tx_candidate or rx_candidate or {} channel = str(candidate.get("channel") or "SCI1") tdr = _int_field(tx_candidate, "tdr_address", 0xFEDB) rdr = _int_field(rx_candidate, "rdr_address", 0xFEDD) scr = tdr - 1 if tdr else 0xFEDA ssr = rdr - 1 if rdr else 0xFEDC lines = [ "#include ", "#include ", "", "typedef uint8_t u8;", "typedef uint16_t u16;", "", "extern volatile u8 MEM8[0x10000];", "", f"#define {channel}_SCR MEM8[{_c_hex(scr)}]", f"#define {channel}_TDR MEM8[{_c_hex(tdr)}]", f"#define {channel}_SSR MEM8[{_c_hex(ssr)}]", f"#define {channel}_RDR MEM8[{_c_hex(rdr)}]", "", "#define SCI_SCR_TIE 0x80u", "#define SCI_SCR_RIE 0x40u", "#define SCI_SCR_TE 0x20u", "#define SCI_SCR_RE 0x10u", "#define SCI_SSR_TDRE 0x80u", "#define SCI_SSR_RDRF 0x40u", "#define SCI_SSR_ORER 0x20u", "#define SCI_SSR_FER 0x10u", "#define SCI_SSR_PER 0x08u", "", ] if tx_candidate: tx_start = _int_field(tx_candidate, "buffer_start", 0xF858) tx_index = _int_field(tx_candidate, "tx_index_address", 0xF9C2) length = _int_field(tx_candidate, "frame_length", 6) lines.extend( [ f"#define TX_FRAME_LENGTH {length}u", f"#define TX_FRAME(n) MEM8[(u16)({_c_hex(tx_start)} + (n))]", f"#define TX_INDEX MEM8[{_c_hex(tx_index)}]", "", ], ) if rx_candidate: capture_start = _int_field(rx_candidate, "capture_buffer_start", 0xF868) frame_start = _int_field(rx_candidate, "validation_buffer_start", 0xF860) rx_index = _int_field(rx_candidate, "rx_index_address", 0xF9C3) timeout = _int_field(rx_candidate, "interbyte_timeout_address", 0xF9C1) complete = _int_field(rx_candidate, "complete_timer_address", 0xF9C5) length = _int_field(rx_candidate, "frame_length", 6) lines.extend( [ f"#define RX_FRAME_LENGTH {length}u", f"#define RX_CAPTURE(n) MEM8[(u16)({_c_hex(capture_start)} + (n))]", f"#define RX_FRAME(n) MEM8[(u16)({_c_hex(frame_start)} + (n))]", f"#define RX_INDEX MEM8[{_c_hex(rx_index)}]", f"#define RX_INTERBYTE_TIMEOUT MEM8[{_c_hex(timeout)}]", f"#define RX_COMPLETE_TIMER MEM8[{_c_hex(complete)}]", "", ], ) return lines def _semantics_lines( analysis: JsonObject | None, opts: SerialPseudocodeOptions, ) -> list[str]: if not isinstance(analysis, dict): return [] protocols = analysis.get("protocol_semantics") if not isinstance(protocols, list) or not protocols: return [] protocol = protocols[0] if not isinstance(protocol, dict): return [] lines: list[str] = ["/* Candidate Protocol Semantics"] lines.append( f" * confidence: {protocol.get('confidence', 'unknown')} " f"({protocol.get('confidence_score', 'n/a')})", ) caveat = str(protocol.get("caveat") or "").strip() if caveat: lines.append(f" * caveat: {_comment_text(caveat)}") layout = protocol.get("byte_layout") if isinstance(layout, list) and layout: lines.append(" * byte layout:") for item in layout: if not isinstance(item, dict): continue offset = item.get("offset", "?") name = item.get("name_candidate", "byte") semantic = item.get("semantic", "") confidence = item.get("confidence", "unknown") lines.append(f" * - byte{offset}: {name} ({confidence}) - {_comment_text(str(semantic))}") dispatch = protocol.get("command_dispatch") if isinstance(dispatch, dict): values = ", ".join(str(value) for value in dispatch.get("command_values_hex", [])) lines.append( " * dispatch: command_low3 = RX_FRAME(0) & 0x07" + (f"; observed {values}" if values else ""), ) if opts.include_evidence: lines.append(f" * dispatch evidence: {_hex_join(dispatch.get('evidence_addresses_hex'))}") index_decoder = protocol.get("index_decoder") if isinstance(index_decoder, dict): lines.append( " * index decoder: RX[1:2] -> logical index via " f"{index_decoder.get('label', 'loc_622B')} ({index_decoder.get('confidence', 'unknown')})", ) commands = [item for item in protocol.get("commands", []) if isinstance(item, dict)] if commands: lines.append(" * command candidates:") for command in commands: value = command.get("command_value_hex", "??") name = command.get("name_candidate", "unknown") summary = _comment_text(str(command.get("summary") or "")) handler = command.get("handler_start_hex") or "multiple" responses = ", ".join(str(item) for item in command.get("response_candidates", [])) or "none" lines.append(f" * - {value} {name}: {summary}; handler {handler}; responses {responses}") lines.append(" */") lines.append("") lines.extend( [ "static u8 sci1_rx_candidate_command(void)", "{", " return (u8)(RX_FRAME(0) & 0x07u);", "}", "", "static u16 sci1_rx_candidate_value(void)", "{", " return (u16)(((u16)RX_FRAME(3) << 8) | RX_FRAME(4));", "}", "", "static u16 sci1_rx_candidate_logical_index(void)", "{", " u8 page = RX_FRAME(1);", " u8 offset = RX_FRAME(2);", "", " if (page == 0u && offset <= 0x7Fu) {", " return offset;", " }", " if (page == 1u) {", " return (u16)(0x0080u + offset);", " }", " if (page == 2u && offset <= 0x7Fu) {", " return (u16)(0x0180u + offset);", " }", " return 0x01FFu;", "}", "", "void sci1_process_candidate_protocol_command(void)", "{", " u8 command = sci1_rx_candidate_command();", " u16 logical_index = sci1_rx_candidate_logical_index();", " u16 value = sci1_rx_candidate_value();", "", " switch (command) {", ], ) for command in commands: value = command.get("command_value") if not isinstance(value, int): continue name = _safe_identifier(str(command.get("name_candidate") or f"command_{value:02X}")) summary = _comment_text(str(command.get("summary") or "candidate command semantics unknown")) evidence = _hex_join(command.get("evidence_addresses_hex")) lines.append(f" case 0x{value:02X}u:") lines.append(f" /* {name}: {summary}") if opts.include_evidence and evidence: lines.append(f" * evidence: {evidence}") lines.append(" */") lines.append(f" candidate_{name}(logical_index, value);") lines.append(" break;") lines.extend( [ " default:", " candidate_unknown_command(command, logical_index, value);", " break;", " }", "}", "", ], ) return lines def _tx_functions(candidate: JsonObject, opts: SerialPseudocodeOptions) -> list[str]: length = _int_field(candidate, "frame_length", 6) seed = _int_field(candidate, "checksum_seed", 0x5A) data_length = max(length - 1, 0) lines = _candidate_comment_block("TX reconstruction evidence", candidate, opts) lines.extend( [ "static u8 sci1_tx_candidate_checksum(void)", "{", f" u8 checksum = {_c_hex(seed, width=2)};", ], ) for index in range(data_length): lines.append(f" checksum ^= TX_FRAME({index});") lines.extend( [ " return checksum;", "}", "", "void sci1_tx_start_candidate_frame(void)", "{", " /* The ROM appears to have populated TX_FRAME(0..4) before this point. */", f" TX_FRAME({data_length}) = sci1_tx_candidate_checksum();", "", f" while ((SCI1_SSR & SCI_SSR_TDRE) == 0u) {{", " /* wait for transmit data register empty */", " }", "", " SCI1_TDR = TX_FRAME(0);", " TX_INDEX = 1u;", " SCI1_SSR &= (u8)~SCI_SSR_TDRE;", " SCI1_SCR |= SCI_SCR_TIE;", "}", "", "void sci1_txi_candidate_isr(void)", "{", " if (TX_INDEX < TX_FRAME_LENGTH) {", " SCI1_TDR = TX_FRAME(TX_INDEX);", " TX_INDEX = (u8)(TX_INDEX + 1u);", " SCI1_SSR &= (u8)~SCI_SSR_TDRE;", " }", "", " if (TX_INDEX >= TX_FRAME_LENGTH) {", " SCI1_SCR &= (u8)~SCI_SCR_TIE;", " }", "}", "", ], ) return lines def _rx_functions(candidate: JsonObject, opts: SerialPseudocodeOptions) -> list[str]: length = _int_field(candidate, "frame_length", 6) seed = _int_field(candidate, "checksum_seed", 0x5A) data_length = max(length - 1, 0) lines = _candidate_comment_block("RX reconstruction evidence", candidate, opts) lines.extend( [ "static u8 sci1_rx_candidate_checksum(void)", "{", f" u8 checksum = {_c_hex(seed, width=2)};", ], ) for index in range(data_length): lines.append(f" checksum ^= RX_FRAME({index});") lines.extend( [ " return checksum;", "}", "", "bool sci1_process_rx_candidate_frame(void)", "{", " u8 i;", "", " if (RX_INDEX != RX_FRAME_LENGTH) {", " return false;", " }", "", " for (i = 0u; i < RX_FRAME_LENGTH; i++) {", " RX_FRAME(i) = RX_CAPTURE(i);", " }", "", f" if (sci1_rx_candidate_checksum() != RX_FRAME({data_length})) {{", " RX_INDEX = 0u;", " return false;", " }", "", " RX_INDEX = 0u;", " return true;", "}", "", "bool sci1_rx_byte_received_candidate_isr(void)", "{", " u8 byte;", "", " SCI1_SSR &= (u8)~SCI_SSR_RDRF;", " byte = SCI1_RDR;", "", " if (RX_INTERBYTE_TIMEOUT == 0u) {", " RX_INDEX = 0u;", " }", "", " RX_INTERBYTE_TIMEOUT = 5u;", " RX_CAPTURE(RX_INDEX) = byte;", " RX_INDEX = (u8)(RX_INDEX + 1u);", "", " if (RX_INDEX == RX_FRAME_LENGTH) {", " RX_COMPLETE_TIMER = 0x14u;", " return sci1_process_rx_candidate_frame();", " }", "", " return false;", "}", "", "void sci1_rx_error_candidate_isr(void)", "{", " SCI1_SSR &= (u8)~(SCI_SSR_ORER | SCI_SSR_FER | SCI_SSR_PER);", " RX_INDEX = 0u;", "}", "", ], ) return lines def _candidate_comment_block( title: str, candidate: JsonObject, opts: SerialPseudocodeOptions, ) -> list[str]: lines = ["/*", f" * {title}"] comment = str(candidate.get("comment") or candidate.get("short_comment") or "").strip() if comment: lines.append(f" * {_comment_text(comment)}") formula = str(candidate.get("checksum_formula") or "").strip() if formula: lines.append(f" * checksum formula: {_comment_text(formula)}") if opts.include_evidence: evidence = candidate.get("evidence_addresses_hex") if isinstance(evidence, dict): lines.append(" * evidence addresses:") for key in sorted(evidence): addresses = ", ".join(str(item) for item in evidence.get(key, [])) lines.append(f" * - {key}: {addresses}") lines.append(" */") return lines def _find_candidate(payload: JsonObject, kind: str) -> JsonObject | None: serial = payload.get("serial_reconstruction") if not isinstance(serial, dict): return None candidates = serial.get("candidates") if not isinstance(candidates, list): return None for candidate in candidates: if isinstance(candidate, dict) and candidate.get("kind") == kind: return candidate return None def _candidate_label(candidate: JsonObject) -> str: kind = str(candidate.get("kind") or "candidate") channel = str(candidate.get("channel") or "SCI") length = candidate.get("frame_length", "?") if kind.endswith("_tx_frame"): start = candidate.get("buffer_start_hex") or _h(_int_field(candidate, "buffer_start", 0)) end = candidate.get("buffer_end_hex") or _h(_int_field(candidate, "buffer_end", 0)) return f"{channel} TX {length}-byte frame at {start}-{end}" if kind.endswith("_rx_frame"): capture_start = candidate.get("capture_buffer_start_hex") or _h( _int_field(candidate, "capture_buffer_start", 0), ) capture_end = candidate.get("capture_buffer_end_hex") or _h( _int_field(candidate, "capture_buffer_end", 0), ) return f"{channel} RX {length}-byte frame captured at {capture_start}-{capture_end}" return f"{channel} {kind}" def _mapping_items(value: object) -> list[JsonObject]: if not isinstance(value, list): return [] return [item for item in value if isinstance(item, dict)] def _int_field(candidate: JsonObject | None, key: str, default: int) -> int: if not candidate: return default value = candidate.get(key) if isinstance(value, bool): return int(value) if isinstance(value, int): return value return default def _c_hex(value: int, *, width: int = 4) -> str: return f"0x{value & 0xFFFF:0{width}X}u" def _h(value: int) -> str: return f"H'{value & 0xFFFF:04X}" def _dedupe(items: list[str]) -> list[str]: seen: set[str] = set() output: list[str] = [] for item in items: if item in seen: continue seen.add(item) output.append(item) return output def _hex_join(value: object) -> str: if not isinstance(value, list): return "" return ", ".join(str(item) for item in value) def _safe_identifier(value: str) -> str: cleaned = re.sub(r"[^0-9A-Za-z_]", "_", value.strip()) cleaned = re.sub(r"_+", "_", cleaned).strip("_") if not cleaned: return "unknown" if cleaned[0].isdigit(): return "_" + cleaned return cleaned def _comment_text(text: str) -> str: return text.replace("*/", "* /").replace("\r", " ").replace("\n", " ")