1
0

serial improvements

This commit is contained in:
Aiden
2026-05-25 16:05:45 +10:00
parent c80ea695dc
commit 6ceed81765
8 changed files with 6578 additions and 2 deletions

View File

@@ -41,7 +41,8 @@ To generate a focused RX/TX serial-path pseudocode view from the reconstruction
- Tracks SCI setup writes and can infer baud rates from SMR/BRR when `--clock-hz` is supplied.
- Annotates SCI protocol actions such as TDRE waits, TDR writes, RDR reads, RX/TX interrupt enables, and receive-error clears.
- Reconstructs evidence-supported SCI1 serial frame candidates, including the apparent six-byte TX/RX units and XOR checksum seeded by `0x5A`.
- Generates a focused RX/TX serial-path pseudocode view from those serial reconstruction candidates.
- Infers candidate serial protocol semantics from validated frames, including `RX[0] & 0x07` command dispatch, likely index/value byte roles, and response staging through `F850-F854`.
- Generates a focused RX/TX serial-path pseudocode view from those serial reconstruction and protocol-semantic candidates.
- Adds a Sony RCP-TX7 board profile that ties H8/536 pin 66 `P95/TXD` and pin 67 `P96/RXD` to the MAX202 RS232 transceiver.
- Flags/manual-annotates TEMP-register access ordering for FRT and A/D 16-bit peripheral registers.
- Scans unreached ROM ranges for ASCII strings and pointer-table candidates.
@@ -108,6 +109,7 @@ python h8536_serial_pseudocode.py --help
- `--no-evidence`: omit evidence-address comments.
- `--no-manual`: omit manual-reference comments.
- `--no-board`: omit board/MAX202 comments.
- `--no-semantics`: omit candidate command/field semantics.
## Code Layout
@@ -130,6 +132,7 @@ python h8536_serial_pseudocode.py --help
- `h8536/sci.py`: SCI setup tracking and baud inference.
- `h8536/sci_protocol.py`: SCI transmit/receive/status semantic annotations.
- `h8536/serial_reconstruction.py`: cautious higher-level SCI frame reconstruction from decompiled evidence.
- `h8536/serial_semantics.py`: candidate command/field semantics inferred from serial frame use.
- `h8536/serial_pseudocode.py`: focused RX/TX protocol pseudocode generation from reconstruction metadata.
- `h8536/board_profile.py`: Sony RCP-TX7 board-trace annotations, including the MAX202 RS232 path.
- `h8536/peripheral_access.py`: FRT/A-D TEMP-register access analysis.

File diff suppressed because it is too large Load Diff

View File

@@ -89,6 +89,111 @@ extern volatile u8 MEM8[0x10000];
#define RX_INTERBYTE_TIMEOUT MEM8[0xF9C1u]
#define RX_COMPLETE_TIMER MEM8[0xF9C5u]
/* Candidate Protocol Semantics
* confidence: medium-high (0.9)
* caveat: Semantic names are candidates only. The analyzer reports byte roles, command values, dispatch targets, and response staging patterns observed in code; it does not prove source-level intent or protocol documentation.
* byte layout:
* - byte0: op_flags (medium-high) - low three bits select a command; upper bits are preserved or gated in some paths
* - byte1: addr_page_flags (medium) - candidate high/page byte for logical point/index; bit 7 is tested as a control flag
* - byte2: addr_offset (medium) - candidate low/offset byte for logical point/index
* - byte3: value_hi (medium) - candidate high byte of a word value
* - byte4: value_lo (medium) - candidate low byte of a word value
* - byte5: checksum (high) - 0x5A-seeded XOR of bytes 0..4
* dispatch: command_low3 = RX_FRAME(0) & 0x07; observed H'00, H'01, H'02, H'04, H'05, H'06, H'07
* dispatch evidence: H'BC08, H'BC0C, H'BC20, H'BC22, H'BC24, H'BC26, H'BC29, H'BC2B, H'BC2E, H'BC30, H'BC45, H'BC47, H'BC4A, H'BC4C, H'BC4F, H'BC51, H'BC54, H'BC56
* index decoder: RX[1:2] -> logical index via loc_622B (medium)
* command candidates:
* - H'00 set_value_acked: candidate write of RX[3:4] into primary/current tables, followed by a response; handler H'BC69; responses response_at_BCCD
* - H'01 read_value: candidate read from the primary table, followed by a response carrying the value; handler H'BCD7; responses response_at_BCFA
* - H'02 clear_or_abort: candidate clear/abort path with no immediate response builder; handler H'BD04; responses none
* - H'04 set_value_no_immediate_reply: candidate write/update path that stores a value without an immediate serial response; handler H'BD0E; responses none
* - H'05 ack_or_clear_pending: candidate pending/event acknowledgement path; handler H'BD80; responses none
* - H'06 set_secondary_value: candidate secondary-table value write path; handler H'BDDB; responses none
* - H'07 retransmit_or_error_reply: candidate retransmit/NAK-style path; error handling also builds command 0x07 responses; handler H'BE05; responses response_at_BE22
*/
static u8 sci1_rx_candidate_command(void)
{
return (u8)(RX_FRAME(0) & 0x07u);
}
static u16 sci1_rx_candidate_value(void)
{
return (u16)(((u16)RX_FRAME(3) << 8) | RX_FRAME(4));
}
static u16 sci1_rx_candidate_logical_index(void)
{
u8 page = RX_FRAME(1);
u8 offset = RX_FRAME(2);
if (page == 0u && offset <= 0x7Fu) {
return offset;
}
if (page == 1u) {
return (u16)(0x0080u + offset);
}
if (page == 2u && offset <= 0x7Fu) {
return (u16)(0x0180u + offset);
}
return 0x01FFu;
}
void sci1_process_candidate_protocol_command(void)
{
u8 command = sci1_rx_candidate_command();
u16 logical_index = sci1_rx_candidate_logical_index();
u16 value = sci1_rx_candidate_value();
switch (command) {
case 0x00u:
/* set_value_acked: candidate write of RX[3:4] into primary/current tables, followed by a response
* evidence: H'BC08, H'BC0C, H'BC20, H'BC22, H'BCB0, H'BCB9, H'BCC1, H'BCC9, H'BCB5, H'BCBD, H'BCC5, H'BCCD
*/
candidate_set_value_acked(logical_index, value);
break;
case 0x01u:
/* read_value: candidate read from the primary table, followed by a response carrying the value
* evidence: H'BC08, H'BC0C, H'BC24, H'BC26, H'BCB0, H'BCB9, H'BCC1, H'BCC9, H'BCD7, H'BCE0, H'BCE8, H'BCF0, H'BCF6, H'BCB5, H'BCBD, H'BCC5, H'BCDC, H'BCE4, H'BCFA
*/
candidate_read_value(logical_index, value);
break;
case 0x02u:
/* clear_or_abort: candidate clear/abort path with no immediate response builder
* evidence: H'BC08, H'BC0C, H'BC29, H'BC2B
*/
candidate_clear_or_abort(logical_index, value);
break;
case 0x04u:
/* set_value_no_immediate_reply: candidate write/update path that stores a value without an immediate serial response
* evidence: H'BC08, H'BC0C, H'BC45, H'BC47
*/
candidate_set_value_no_immediate_reply(logical_index, value);
break;
case 0x05u:
/* ack_or_clear_pending: candidate pending/event acknowledgement path
* evidence: H'BC08, H'BC0C, H'BC4A, H'BC4C
*/
candidate_ack_or_clear_pending(logical_index, value);
break;
case 0x06u:
/* set_secondary_value: candidate secondary-table value write path
* evidence: H'BC08, H'BC0C, H'BC4F, H'BC51
*/
candidate_set_secondary_value(logical_index, value);
break;
case 0x07u:
/* retransmit_or_error_reply: candidate retransmit/NAK-style path; error handling also builds command 0x07 responses
* evidence: H'BC08, H'BC0C, H'BC2E, H'BC30, H'BC54, H'BC56, H'BE09, H'BE11, H'BE19, H'BE22
*/
candidate_retransmit_or_error_reply(logical_index, value);
break;
default:
candidate_unknown_command(command, logical_index, value);
break;
}
}
/*
* TX reconstruction evidence
* candidate/evidence-supported SCI1 6-byte TX frame hypothesis using buffer H'F858-H'F85D with checksum byte H'F85D seeded by H'005A

View File

@@ -30,6 +30,7 @@ from .serial_reconstruction import (
serial_reconstruction_json_payload,
serial_reconstruction_metadata_for_instruction,
)
from .serial_semantics import analyze_serial_semantics
from .symbols import symbol_for_address
from .tables import IO_REGISTERS
from .timing import format_timing_summary
@@ -474,6 +475,7 @@ def write_json(
for ins in (instructions[addr] for addr in sorted(instructions))
],
}
payload["serial_semantics"] = analyze_serial_semantics(payload)
path.write_text(json.dumps(payload, indent=2), encoding="utf-8")

View File

@@ -2,10 +2,13 @@ from __future__ import annotations
import argparse
import json
import re
from dataclasses import dataclass
from pathlib import Path
from typing import Any
from .serial_semantics import analyze_serial_semantics
JsonObject = dict[str, Any]
@@ -17,6 +20,7 @@ class SerialPseudocodeOptions:
include_evidence: bool = True
include_manual: bool = True
include_board: bool = True
include_semantics: bool = True
def generate_serial_pseudocode(
@@ -28,6 +32,7 @@ def generate_serial_pseudocode(
opts = options or SerialPseudocodeOptions()
tx_candidate = _find_candidate(payload, "candidate_sci1_tx_frame")
rx_candidate = _find_candidate(payload, "candidate_sci1_rx_frame")
serial_semantics = analyze_serial_semantics(payload) if opts.include_semantics else None
lines: list[str] = []
lines.extend(_file_header(source_name, tx_candidate, rx_candidate))
@@ -36,6 +41,8 @@ def generate_serial_pseudocode(
if opts.include_manual:
lines.extend(_manual_reference_lines(payload))
lines.extend(_declarations(tx_candidate, rx_candidate))
if opts.include_semantics:
lines.extend(_semantics_lines(serial_semantics, opts))
emitted = False
if opts.include_tx and tx_candidate:
@@ -96,6 +103,7 @@ def main(argv: list[str] | None = None) -> int:
parser.add_argument("--no-evidence", action="store_true", help="omit evidence-address comments")
parser.add_argument("--no-manual", action="store_true", help="omit manual-reference comments")
parser.add_argument("--no-board", action="store_true", help="omit board/MAX202 comments")
parser.add_argument("--no-semantics", action="store_true", help="omit candidate command/field semantics")
args = parser.parse_args(argv)
options = SerialPseudocodeOptions(
@@ -104,6 +112,7 @@ def main(argv: list[str] | None = None) -> int:
include_evidence=not args.no_evidence,
include_manual=not args.no_manual,
include_board=not args.no_board,
include_semantics=not args.no_semantics,
)
write_serial_pseudocode(args.input, args.out, options)
print(f"wrote {args.out}")
@@ -256,6 +265,135 @@ def _declarations(tx_candidate: JsonObject | None, rx_candidate: JsonObject | No
return lines
def _semantics_lines(
analysis: JsonObject | None,
opts: SerialPseudocodeOptions,
) -> list[str]:
if not isinstance(analysis, dict):
return []
protocols = analysis.get("protocol_semantics")
if not isinstance(protocols, list) or not protocols:
return []
protocol = protocols[0]
if not isinstance(protocol, dict):
return []
lines: list[str] = ["/* Candidate Protocol Semantics"]
lines.append(
f" * confidence: {protocol.get('confidence', 'unknown')} "
f"({protocol.get('confidence_score', 'n/a')})",
)
caveat = str(protocol.get("caveat") or "").strip()
if caveat:
lines.append(f" * caveat: {_comment_text(caveat)}")
layout = protocol.get("byte_layout")
if isinstance(layout, list) and layout:
lines.append(" * byte layout:")
for item in layout:
if not isinstance(item, dict):
continue
offset = item.get("offset", "?")
name = item.get("name_candidate", "byte")
semantic = item.get("semantic", "")
confidence = item.get("confidence", "unknown")
lines.append(f" * - byte{offset}: {name} ({confidence}) - {_comment_text(str(semantic))}")
dispatch = protocol.get("command_dispatch")
if isinstance(dispatch, dict):
values = ", ".join(str(value) for value in dispatch.get("command_values_hex", []))
lines.append(
" * dispatch: command_low3 = RX_FRAME(0) & 0x07"
+ (f"; observed {values}" if values else ""),
)
if opts.include_evidence:
lines.append(f" * dispatch evidence: {_hex_join(dispatch.get('evidence_addresses_hex'))}")
index_decoder = protocol.get("index_decoder")
if isinstance(index_decoder, dict):
lines.append(
" * index decoder: RX[1:2] -> logical index via "
f"{index_decoder.get('label', 'loc_622B')} ({index_decoder.get('confidence', 'unknown')})",
)
commands = [item for item in protocol.get("commands", []) if isinstance(item, dict)]
if commands:
lines.append(" * command candidates:")
for command in commands:
value = command.get("command_value_hex", "??")
name = command.get("name_candidate", "unknown")
summary = _comment_text(str(command.get("summary") or ""))
handler = command.get("handler_start_hex") or "multiple"
responses = ", ".join(str(item) for item in command.get("response_candidates", [])) or "none"
lines.append(f" * - {value} {name}: {summary}; handler {handler}; responses {responses}")
lines.append(" */")
lines.append("")
lines.extend(
[
"static u8 sci1_rx_candidate_command(void)",
"{",
" return (u8)(RX_FRAME(0) & 0x07u);",
"}",
"",
"static u16 sci1_rx_candidate_value(void)",
"{",
" return (u16)(((u16)RX_FRAME(3) << 8) | RX_FRAME(4));",
"}",
"",
"static u16 sci1_rx_candidate_logical_index(void)",
"{",
" u8 page = RX_FRAME(1);",
" u8 offset = RX_FRAME(2);",
"",
" if (page == 0u && offset <= 0x7Fu) {",
" return offset;",
" }",
" if (page == 1u) {",
" return (u16)(0x0080u + offset);",
" }",
" if (page == 2u && offset <= 0x7Fu) {",
" return (u16)(0x0180u + offset);",
" }",
" return 0x01FFu;",
"}",
"",
"void sci1_process_candidate_protocol_command(void)",
"{",
" u8 command = sci1_rx_candidate_command();",
" u16 logical_index = sci1_rx_candidate_logical_index();",
" u16 value = sci1_rx_candidate_value();",
"",
" switch (command) {",
],
)
for command in commands:
value = command.get("command_value")
if not isinstance(value, int):
continue
name = _safe_identifier(str(command.get("name_candidate") or f"command_{value:02X}"))
summary = _comment_text(str(command.get("summary") or "candidate command semantics unknown"))
evidence = _hex_join(command.get("evidence_addresses_hex"))
lines.append(f" case 0x{value:02X}u:")
lines.append(f" /* {name}: {summary}")
if opts.include_evidence and evidence:
lines.append(f" * evidence: {evidence}")
lines.append(" */")
lines.append(f" candidate_{name}(logical_index, value);")
lines.append(" break;")
lines.extend(
[
" default:",
" candidate_unknown_command(command, logical_index, value);",
" break;",
" }",
"}",
"",
],
)
return lines
def _tx_functions(candidate: JsonObject, opts: SerialPseudocodeOptions) -> list[str]:
length = _int_field(candidate, "frame_length", 6)
seed = _int_field(candidate, "checksum_seed", 0x5A)
@@ -473,5 +611,21 @@ def _dedupe(items: list[str]) -> list[str]:
return output
def _hex_join(value: object) -> str:
if not isinstance(value, list):
return ""
return ", ".join(str(item) for item in value)
def _safe_identifier(value: str) -> str:
cleaned = re.sub(r"[^0-9A-Za-z_]", "_", value.strip())
cleaned = re.sub(r"_+", "_", cleaned).strip("_")
if not cleaned:
return "unknown"
if cleaned[0].isdigit():
return "_" + cleaned
return cleaned
def _comment_text(text: str) -> str:
return text.replace("*/", "* /").replace("\r", " ").replace("\n", " ")

1262
h8536/serial_semantics.py Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -97,6 +97,23 @@ def candidate_payload() -> dict:
}
def semantic_payload() -> dict:
payload = candidate_payload()
payload["instructions"] = [
{"address": 0xBC08, "mnemonic": "MOV:G.B", "operands": "@H'F860, R0", "references": [{"address": 0xF860}], "targets": []},
{"address": 0xBC0C, "mnemonic": "AND.B", "operands": "#H'07, R0", "references": [], "targets": []},
{"address": 0xBC20, "mnemonic": "CMP:E.B", "operands": "#H'00, R0", "references": [], "targets": []},
{"address": 0xBC22, "mnemonic": "BEQ", "operands": "loc_BC69", "references": [], "targets": [0xBC69]},
{"address": 0xBC24, "mnemonic": "CMP:E.B", "operands": "#H'01, R0", "references": [], "targets": []},
{"address": 0xBC26, "mnemonic": "BEQ", "operands": "loc_BCD7", "references": [], "targets": [0xBCD7]},
{"address": 0xBCB0, "mnemonic": "MOV:G.B", "operands": "#H'04, @H'F850", "references": [{"address": 0xF850}], "targets": []},
{"address": 0xBCB5, "mnemonic": "MOV:G.B", "operands": "@H'F861, R0", "references": [{"address": 0xF861}], "targets": []},
{"address": 0xBCB9, "mnemonic": "MOV:G.B", "operands": "R0, @H'F851", "references": [{"address": 0xF851}], "targets": []},
{"address": 0xBCCD, "mnemonic": "BSR", "operands": "loc_BA26", "references": [], "targets": [0xBA26]},
]
return payload
class SerialPseudocodeTest(unittest.TestCase):
def test_generates_focused_tx_and_rx_candidate_paths(self):
text = generate_serial_pseudocode(candidate_payload(), source_name="rom.json")
@@ -117,6 +134,17 @@ class SerialPseudocodeTest(unittest.TestCase):
self.assertIn("return sci1_process_rx_candidate_frame();", text)
self.assertIn("rx_xor_checksum_validation: H'BBD6, H'BBEC", text)
def test_generates_candidate_protocol_semantics_switch(self):
text = generate_serial_pseudocode(semantic_payload())
self.assertIn("Candidate Protocol Semantics", text)
self.assertIn("byte0: op_flags", text)
self.assertIn("dispatch: command_low3 = RX_FRAME(0) & 0x07", text)
self.assertIn("case 0x00u:", text)
self.assertIn("candidate_set_value_acked(logical_index, value);", text)
self.assertIn("case 0x01u:", text)
self.assertIn("candidate_read_value(logical_index, value);", text)
def test_tx_only_option_omits_rx_functions(self):
text = generate_serial_pseudocode(
candidate_payload(),

View File

@@ -0,0 +1,160 @@
import unittest
from h8536.serial_semantics import analyze_serial_semantics
def reference(address: int) -> dict:
return {"address": address}
def instruction(
address: int,
mnemonic: str,
operands: str = "",
references: list[int] | None = None,
targets: list[int] | None = None,
) -> dict:
return {
"address": address,
"mnemonic": mnemonic,
"operands": operands,
"references": [reference(item) for item in (references or [])],
"targets": targets or [],
}
def base_payload(instructions: list[dict]) -> dict:
return {
"serial_reconstruction": {
"candidates": [
{
"kind": "candidate_sci1_rx_frame",
"channel": "SCI1",
"frame_length": 6,
"validation_buffer_start": 0xF860,
"validation_buffer_end": 0xF865,
"checksum_address": 0xF865,
"checksum_seed": 0x5A,
"confidence": "high",
},
{
"kind": "candidate_sci1_tx_frame",
"channel": "SCI1",
"frame_length": 6,
"buffer_start": 0xF850,
"buffer_end": 0xF855,
"checksum_address": 0xF855,
"checksum_seed": 0x5A,
"confidence": "high",
},
],
},
"instructions": instructions,
}
def only_semantics(testcase: unittest.TestCase, payload: dict) -> dict:
analysis = analyze_serial_semantics(payload)
testcase.assertEqual(analysis["kind"], "serial_semantics")
testcase.assertEqual(len(analysis["protocol_semantics"]), 1)
return analysis["protocol_semantics"][0]
class SerialSemanticsTest(unittest.TestCase):
def test_detects_low_three_bit_command_dispatch(self):
payload = base_payload(
[
instruction(0xBA80, "MOV:G.B", "@H'F860, R0", [0xF860]),
instruction(0xBA84, "AND.B", "#H'07, R0"),
instruction(0xBA88, "CMP:E.B", "#H'00, R0"),
instruction(0xBA8C, "BEQ", "loc_BAA0", targets=[0xBAA0]),
instruction(0xBA90, "CMP:E.B", "#H'02, R0"),
instruction(0xBA94, "BEQ", "loc_BAC0", targets=[0xBAC0]),
instruction(0xBA98, "CMP:E.B", "#H'07, R0"),
instruction(0xBA9C, "BEQ", "loc_BAE0", targets=[0xBAE0]),
]
)
semantics = only_semantics(self, payload)
dispatch = semantics["command_dispatch"]
self.assertEqual(dispatch["source_address"], 0xF860)
self.assertEqual(dispatch["source_field"], "byte0")
self.assertEqual(dispatch["mask"], 0x07)
self.assertEqual(dispatch["field"], "command_low3")
self.assertEqual(
{(case["value"], case["target"]) for case in dispatch["cases"]},
{(0x00, 0xBAA0), (0x02, 0xBAC0), (0x07, 0xBAE0)},
)
self.assertIn(0xBA80, dispatch["evidence_addresses"])
self.assertIn(0xBA84, dispatch["evidence_addresses"])
def test_labels_likely_rx_fields_from_validation_buffer_offsets(self):
payload = base_payload(
[
instruction(0xBB00, "MOV:G.B", "@H'F860, R0", [0xF860]),
instruction(0xBB04, "AND.B", "#H'07, R0"),
instruction(0xBB08, "MOV:G.W", "@H'F861, R1", [0xF861]),
instruction(0xBB0C, "MOV:G.W", "@H'F863, R2", [0xF863]),
]
)
semantics = only_semantics(self, payload)
fields = {field["offset"]: field for field in semantics["rx_fields"]}
self.assertEqual(fields[0]["name"], "command_low3")
self.assertEqual(fields[0]["address"], 0xF860)
self.assertEqual(fields[0]["mask"], 0x07)
self.assertEqual(fields[1]["name"], "likely_id_or_index")
self.assertEqual(fields[2]["name"], "likely_id_or_index")
self.assertEqual(fields[3]["name"], "likely_value")
self.assertEqual(fields[4]["name"], "likely_value")
self.assertIn("candidate", fields[1]["confidence"])
self.assertIn("candidate", fields[3]["confidence"])
def test_detects_response_builder_before_serial_send_call(self):
payload = base_payload(
[
instruction(0xBC00, "MOV:G.B", "@H'F860, R0", [0xF860]),
instruction(0xBC04, "MOV:G.B", "R0, @H'F850", [0xF850]),
instruction(0xBC08, "MOV:G.B", "@H'F861, R1", [0xF861]),
instruction(0xBC0C, "MOV:G.B", "R1, @H'F851", [0xF851]),
instruction(0xBC10, "MOV:G.B", "@H'F862, R2", [0xF862]),
instruction(0xBC14, "MOV:G.B", "R2, @H'F852", [0xF852]),
instruction(0xBC18, "MOV:G.B", "#H'00, @H'F853", [0xF853]),
instruction(0xBC1C, "MOV:G.B", "#H'01, @H'F854", [0xF854]),
instruction(0xBC20, "BSR", "loc_BA26", targets=[0xBA26]),
]
)
semantics = only_semantics(self, payload)
response = semantics["response_builders"][0]
self.assertEqual(response["buffer_start"], 0xF850)
self.assertEqual(response["buffer_end"], 0xF854)
self.assertEqual(response["send_call_target"], 0xBA26)
self.assertEqual(response["call_address"], 0xBC20)
self.assertEqual(
[write["address"] for write in response["writes"]],
[0xF850, 0xF851, 0xF852, 0xF853, 0xF854],
)
def test_missing_serial_reconstruction_candidates_emit_no_protocol_semantics(self):
payload = {
"serial_reconstruction": {"candidates": []},
"instructions": [
instruction(0xBA80, "MOV:G.B", "@H'F860, R0", [0xF860]),
instruction(0xBA84, "AND.B", "#H'07, R0"),
instruction(0xBA88, "CMP:E.B", "#H'00, R0"),
instruction(0xBA8C, "BEQ", "loc_BAA0", targets=[0xBAA0]),
],
}
analysis = analyze_serial_semantics(payload)
self.assertEqual(analysis["kind"], "serial_semantics")
self.assertEqual(analysis["protocol_semantics"], [])
if __name__ == "__main__":
unittest.main()