More decompiling work

2026-05-25 17:32:00 +10:00
parent 56829b6e0b
commit 07f48c76e0
22 changed files with 9837 additions and 5 deletions
--- a/h8536/protocol_capture.py
+++ b/h8536/protocol_capture.py
@@ -0,0 +1,534 @@
+from __future__ import annotations
+
+import argparse
+import json
+import re
+import sys
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Any, Iterable, Mapping, TextIO
+
+try:  # Keep this module useful even when copied away from the decompiler tree.
+    from . import protocol_trace as _protocol_trace
+except ImportError:  # pragma: no cover - exercised only outside package imports.
+    _protocol_trace = None
+
+
+CHECKSUM_SEED = getattr(_protocol_trace, "CHECKSUM_SEED", 0x5A)
+FRAME_LENGTH = getattr(_protocol_trace, "FRAME_LENGTH", 6)
+CAPTURE_LINE_RE = re.compile(
+    r"^\s*(?P<time>\d{1,2}:\d{2}:\d{2}(?:\.\d{1,6})?)\s+"
+    r"(?P<direction>RX|TX)\s+"
+    r"(?P<count>\d+)\s+bytes?\s+"
+    r"(?P<byte_text>.*?)\s*$",
+    re.IGNORECASE,
+)
+HEX_BYTE_RE = re.compile(r"\b[0-9A-Fa-f]{2}\b")
+
+_FALLBACK_OBSERVED_TX_REPORT_CANDIDATES: dict[tuple[int, int], dict[str, str]] = {
+    (0x0000, 0x0080): {
+        "name_candidate": "heartbeat_alive_candidate",
+    },
+    (0x0015, 0x8000): {
+        "name_candidate": "call_button_candidate",
+        "state_candidate": "active",
+    },
+    (0x0015, 0x0000): {
+        "name_candidate": "call_button_candidate",
+        "state_candidate": "inactive",
+    },
+    (0x0007, 0x8000): {
+        "name_candidate": "cam_power_button_candidate",
+        "state_candidate": "active",
+    },
+}
+OBSERVED_TX_REPORT_CANDIDATES = getattr(
+    _protocol_trace,
+    "OBSERVED_TX_REPORT_CANDIDATES",
+    _FALLBACK_OBSERVED_TX_REPORT_CANDIDATES,
+)
+
+
+@dataclass(frozen=True)
+class CaptureChunk:
+    chunk_index: int
+    timestamp: str
+    timestamp_ms: int
+    analyzer_direction: str
+    device_direction: str
+    declared_count: int
+    bytes: tuple[int, ...]
+    raw_line: str
+
+
+def checksum_for(frame_prefix: Iterable[int]) -> int:
+    if _protocol_trace is not None and hasattr(_protocol_trace, "checksum_for"):
+        return int(_protocol_trace.checksum_for(frame_prefix))
+    value = CHECKSUM_SEED
+    for byte in frame_prefix:
+        value ^= byte & 0xFF
+    return value & 0xFF
+
+
+def parse_capture_text(text: str) -> list[CaptureChunk]:
+    chunks: list[CaptureChunk] = []
+    for raw_line in text.splitlines():
+        line = raw_line.strip()
+        if not line:
+            continue
+        match = CAPTURE_LINE_RE.match(line)
+        if not match:
+            continue
+        byte_values = tuple(int(token, 16) for token in HEX_BYTE_RE.findall(match.group("byte_text")))
+        analyzer_direction = match.group("direction").lower()
+        chunks.append(
+            CaptureChunk(
+                chunk_index=len(chunks),
+                timestamp=match.group("time"),
+                timestamp_ms=_timestamp_ms(match.group("time")),
+                analyzer_direction=analyzer_direction,
+                device_direction=_device_direction(analyzer_direction),
+                declared_count=int(match.group("count")),
+                bytes=byte_values,
+                raw_line=raw_line,
+            )
+        )
+        if len(byte_values) != int(match.group("count")):
+            # Preserve the chunk and expose the mismatch in analysis instead of dropping capture evidence.
+            continue
+    return chunks
+
+
+def analyze_capture_text(text: str) -> dict[str, Any]:
+    return analyze_capture_chunks(parse_capture_text(text))
+
+
+def analyze_capture_chunks(chunks: Iterable[CaptureChunk]) -> dict[str, Any]:
+    chunk_list = list(chunks)
+    frames = _recombine_frames(chunk_list)
+    groups = _repeated_groups(frames)
+    gate_session_hints = _gate_session_hints(frames)
+    return {
+        "kind": "h8536_protocol_capture",
+        "frame_length": FRAME_LENGTH,
+        "checksum_model": {
+            "algorithm": "xor",
+            "seed": CHECKSUM_SEED,
+            "seed_hex": _h8(CHECKSUM_SEED),
+            "covered_offsets": [0, 1, 2, 3, 4],
+            "checksum_offset": 5,
+        },
+        "chunks": [_chunk_dict(chunk) for chunk in chunk_list],
+        "chunk_count": len(chunk_list),
+        "frames": frames,
+        "frame_count": len(frames),
+        "repeated_groups": groups,
+        "repeated_group_count": len(groups),
+        "gate_session_hints": gate_session_hints,
+        "direction_note": (
+            "Capture RX is analyzer-perspective receive; these bytes are device-perspective TX."
+        ),
+    }
+
+
+def format_text_report(analysis: Mapping[str, Any]) -> str:
+    lines = [
+        "H8/536 capture log",
+        (
+            f"chunks={analysis.get('chunk_count', 0)} "
+            f"frames={analysis.get('frame_count', 0)} "
+            f"repeated_groups={analysis.get('repeated_group_count', 0)}"
+        ),
+    ]
+    for frame in analysis.get("frames", []):
+        label = ""
+        report = frame.get("report_candidate") or {}
+        candidate = report.get("observed_candidate") or {}
+        if candidate.get("name_candidate"):
+            label = f" {candidate['name_candidate']}"
+            if candidate.get("state_candidate"):
+                label += f" state={candidate['state_candidate']}"
+        split = " split" if frame.get("source_chunk_count", 0) > 1 else ""
+        lines.append(
+            (
+                f"[{frame['frame_index']:04d}] {frame['timestamp']} "
+                f"{frame['analyzer_direction'].upper()}=>device:{frame['device_direction']} "
+                f"bytes={' '.join(frame['bytes_hex'])} checksum=ok{split} "
+                f"index={report.get('index_hex')} value={report.get('value_hex')}{label}"
+            )
+        )
+    for group in analysis.get("repeated_groups", []):
+        cadence = group.get("cadence_ms") or {}
+        cadence_text = "n/a"
+        if cadence.get("average") is not None:
+            cadence_text = (
+                f"avg={cadence['average']:.1f}ms "
+                f"min={cadence['min']}ms max={cadence['max']}ms"
+            )
+        lines.append(
+            (
+                f"group {group['bytes']} count={group['count']} "
+                f"span={group['span_ms']}ms cadence={cadence_text}"
+            )
+        )
+    hints = analysis.get("gate_session_hints") or {}
+    names = hints.get("observed_autonomous_report_names") or []
+    if names:
+        lines.append("observed autonomous report candidates: " + ", ".join(names))
+    heartbeat = hints.get("heartbeat_cadence_ms") or {}
+    if heartbeat.get("count"):
+        cadence_text = "n/a"
+        if heartbeat.get("average") is not None:
+            cadence_text = (
+                f"avg={heartbeat['average']:.1f}ms "
+                f"min={heartbeat['min']}ms max={heartbeat['max']}ms"
+            )
+        lines.append(f"heartbeat cadence count={heartbeat['count']} cadence={cadence_text}")
+    for transition in hints.get("active_inactive_transitions", []):
+        lines.append(
+            (
+                f"transition index={transition['index_hex']} "
+                f"{transition['from_state']}->{transition['to_state']} "
+                f"{transition['from_timestamp']}..{transition['to_timestamp']}"
+            )
+        )
+    for interruption in hints.get("heartbeat_interruptions", []):
+        interrupted_names = ", ".join(
+            item["name_candidate"] for item in interruption.get("interrupted_by", [])
+        )
+        lines.append(
+            (
+                f"heartbeat gap {interruption['from_timestamp']}..{interruption['to_timestamp']} "
+                f"gap={interruption['gap_ms']}ms interrupted_by={interrupted_names}"
+            )
+        )
+    if hints.get("caveat"):
+        lines.append(f"caveat: {hints['caveat']}")
+    return "\n".join(lines)
+
+
+def main(argv: list[str] | None = None, *, stdin: TextIO | None = None, stdout: TextIO | None = None) -> int:
+    parser = argparse.ArgumentParser(
+        description="Analyze timestamped H8/536 serial capture logs and recombine 6-byte frames."
+    )
+    parser.add_argument("input", nargs="?", help="Capture log path. Use '-' or omit to read stdin.")
+    parser.add_argument("--json", action="store_true", help="Emit JSON instead of text.")
+    args = parser.parse_args(argv)
+
+    stdin = stdin or sys.stdin
+    stdout = stdout or sys.stdout
+    if args.input and args.input != "-":
+        text = Path(args.input).read_text(encoding="utf-8")
+    else:
+        text = stdin.read()
+
+    analysis = analyze_capture_text(text)
+    if args.json:
+        json.dump(analysis, stdout, indent=2, sort_keys=True)
+        stdout.write("\n")
+    else:
+        stdout.write(format_text_report(analysis))
+        stdout.write("\n")
+    return 0
+
+
+def _recombine_frames(chunks: list[CaptureChunk]) -> list[dict[str, Any]]:
+    buffers: dict[str, list[dict[str, Any]]] = {}
+    frames: list[dict[str, Any]] = []
+    for chunk in chunks:
+        key = chunk.analyzer_direction
+        stream = buffers.setdefault(key, [])
+        for offset, byte in enumerate(chunk.bytes):
+            stream.append({"byte": byte, "chunk": chunk, "offset": offset})
+            _drain_valid_frames(stream, frames)
+    return frames
+
+
+def _drain_valid_frames(stream: list[dict[str, Any]], frames: list[dict[str, Any]]) -> None:
+    while len(stream) >= FRAME_LENGTH:
+        candidate = stream[:FRAME_LENGTH]
+        values = [int(item["byte"]) for item in candidate]
+        if checksum_for(values[:5]) == values[5]:
+            frames.append(_frame_dict(len(frames), candidate))
+            del stream[:FRAME_LENGTH]
+            continue
+
+        realigned = False
+        for start in range(1, len(stream) - FRAME_LENGTH + 1):
+            window = stream[start : start + FRAME_LENGTH]
+            values = [int(item["byte"]) for item in window]
+            if checksum_for(values[:5]) == values[5]:
+                del stream[:start]
+                realigned = True
+                break
+        if not realigned:
+            break
+
+
+def _frame_dict(frame_index: int, items: list[dict[str, Any]]) -> dict[str, Any]:
+    values = [int(item["byte"]) for item in items]
+    chunks = [item["chunk"] for item in items]
+    first: CaptureChunk = chunks[0]
+    source_chunk_indexes = sorted({chunk.chunk_index for chunk in chunks})
+    return {
+        "frame_index": frame_index,
+        "timestamp": first.timestamp,
+        "timestamp_ms": first.timestamp_ms,
+        "analyzer_direction": first.analyzer_direction,
+        "device_direction": first.device_direction,
+        "bytes": values,
+        "bytes_hex": [_h8(value) for value in values],
+        "checksum": {
+            "valid": True,
+            "expected": values[5],
+            "expected_hex": _h8(values[5]),
+            "actual": values[5],
+            "actual_hex": _h8(values[5]),
+        },
+        "source_chunk_indexes": source_chunk_indexes,
+        "source_chunk_count": len(source_chunk_indexes),
+        "report_candidate": _tx_report_candidate(values),
+    }
+
+
+def _tx_report_candidate(frame: list[int]) -> dict[str, Any]:
+    index = (frame[0] << 16) | (frame[1] << 8) | frame[2]
+    value = (frame[3] << 8) | frame[4]
+    candidate = OBSERVED_TX_REPORT_CANDIDATES.get((index, value))
+    return {
+        "encoding": "observed_tx_index_value_report_candidate",
+        "confidence": "observed_candidate" if candidate else "unknown",
+        "index": index,
+        "index_hex": f"0x{index:06X}" if index > 0xFFFF else _h16(index),
+        "value": value,
+        "value_hex": _h16(value),
+        "observed_candidate": dict(candidate) if candidate else None,
+        "caveat": "Observed TX report names are capture labels, not proven protocol facts.",
+    }
+
+
+def _repeated_groups(frames: list[Mapping[str, Any]]) -> list[dict[str, Any]]:
+    by_bytes: dict[tuple[int, ...], list[Mapping[str, Any]]] = {}
+    for frame in frames:
+        by_bytes.setdefault(tuple(frame["bytes"]), []).append(frame)
+
+    groups: list[dict[str, Any]] = []
+    for values, members in by_bytes.items():
+        if len(members) < 2:
+            continue
+        timestamps = [int(member["timestamp_ms"]) for member in members]
+        deltas = [right - left for left, right in zip(timestamps, timestamps[1:])]
+        groups.append(
+            {
+                "bytes": " ".join(_h8(value) for value in values),
+                "count": len(members),
+                "frame_indexes": [member["frame_index"] for member in members],
+                "first_timestamp": members[0]["timestamp"],
+                "last_timestamp": members[-1]["timestamp"],
+                "span_ms": timestamps[-1] - timestamps[0],
+                "cadence_ms": {
+                    "samples": deltas,
+                    "average": (sum(deltas) / len(deltas)) if deltas else None,
+                    "min": min(deltas) if deltas else None,
+                    "max": max(deltas) if deltas else None,
+                },
+            }
+        )
+    return sorted(groups, key=lambda group: (-int(group["count"]), str(group["bytes"])))
+
+
+def _gate_session_hints(frames: list[Mapping[str, Any]]) -> dict[str, Any]:
+    observed = [_observed_report_frame(frame) for frame in frames]
+    observed = [item for item in observed if item is not None]
+    by_name: dict[str, list[dict[str, Any]]] = {}
+    for item in observed:
+        by_name.setdefault(str(item["name_candidate"]), []).append(item)
+
+    observed_reports = []
+    for name, members in sorted(by_name.items()):
+        observed_reports.append(
+            {
+                "name_candidate": name,
+                "count": len(members),
+                "first_timestamp": members[0]["timestamp"],
+                "last_timestamp": members[-1]["timestamp"],
+                "frame_indexes": [member["frame_index"] for member in members],
+                "indexes_hex": sorted({str(member["index_hex"]) for member in members}),
+                "values_hex": sorted({str(member["value_hex"]) for member in members}),
+                "states": sorted(
+                    {
+                        str(member["state_candidate"])
+                        for member in members
+                        if member.get("state_candidate")
+                    }
+                ),
+            }
+        )
+
+    heartbeat_frames = [
+        item for item in observed if item.get("name_candidate") == "heartbeat_alive_candidate"
+    ]
+    heartbeat_timestamps = [int(item["timestamp_ms"]) for item in heartbeat_frames]
+    heartbeat_deltas = [
+        right - left for left, right in zip(heartbeat_timestamps, heartbeat_timestamps[1:])
+    ]
+
+    return {
+        "observed_autonomous_report_names": sorted(by_name),
+        "observed_reports": observed_reports,
+        "active_inactive_transitions": _active_inactive_transitions(observed),
+        "heartbeat_cadence_ms": {
+            "count": len(heartbeat_frames),
+            "samples": heartbeat_deltas,
+            "average": (sum(heartbeat_deltas) / len(heartbeat_deltas)) if heartbeat_deltas else None,
+            "min": min(heartbeat_deltas) if heartbeat_deltas else None,
+            "max": max(heartbeat_deltas) if heartbeat_deltas else None,
+        },
+        "heartbeat_interruptions": _heartbeat_interruptions(observed),
+        "caveat": (
+            "Missing autonomous reports for other controls may reflect host/session gating "
+            "or capture timing, not proof that local control state did not change."
+        ),
+        "evidence_scope": "capture_side_observation_only",
+    }
+
+
+def _observed_report_frame(frame: Mapping[str, Any]) -> dict[str, Any] | None:
+    report = frame.get("report_candidate") or {}
+    candidate = report.get("observed_candidate") or {}
+    name = candidate.get("name_candidate")
+    if not name:
+        return None
+    return {
+        "frame_index": frame.get("frame_index"),
+        "timestamp": frame.get("timestamp"),
+        "timestamp_ms": frame.get("timestamp_ms"),
+        "analyzer_direction": frame.get("analyzer_direction"),
+        "device_direction": frame.get("device_direction"),
+        "name_candidate": name,
+        "state_candidate": candidate.get("state_candidate"),
+        "index": report.get("index"),
+        "index_hex": report.get("index_hex"),
+        "value": report.get("value"),
+        "value_hex": report.get("value_hex"),
+    }
+
+
+def _active_inactive_transitions(observed: list[Mapping[str, Any]]) -> list[dict[str, Any]]:
+    by_index: dict[int, list[Mapping[str, Any]]] = {}
+    for item in observed:
+        state = item.get("state_candidate")
+        index = item.get("index")
+        if state not in {"active", "inactive"} or not isinstance(index, int):
+            continue
+        by_index.setdefault(index, []).append(item)
+
+    transitions: list[dict[str, Any]] = []
+    for index, members in sorted(by_index.items()):
+        previous: Mapping[str, Any] | None = None
+        for member in sorted(members, key=lambda item: int(item.get("frame_index") or 0)):
+            if previous is not None and previous.get("state_candidate") != member.get("state_candidate"):
+                transitions.append(
+                    {
+                        "index": index,
+                        "index_hex": member.get("index_hex"),
+                        "name_candidate": member.get("name_candidate"),
+                        "from_state": previous.get("state_candidate"),
+                        "to_state": member.get("state_candidate"),
+                        "from_timestamp": previous.get("timestamp"),
+                        "to_timestamp": member.get("timestamp"),
+                        "from_frame_index": previous.get("frame_index"),
+                        "to_frame_index": member.get("frame_index"),
+                    }
+                )
+            previous = member
+    return transitions
+
+
+def _heartbeat_interruptions(observed: list[Mapping[str, Any]]) -> list[dict[str, Any]]:
+    interruptions: list[dict[str, Any]] = []
+    heartbeat_positions = [
+        index
+        for index, item in enumerate(observed)
+        if item.get("name_candidate") == "heartbeat_alive_candidate"
+    ]
+    for left, right in zip(heartbeat_positions, heartbeat_positions[1:]):
+        between = [
+            item
+            for item in observed[left + 1 : right]
+            if item.get("name_candidate") != "heartbeat_alive_candidate"
+        ]
+        if not between:
+            continue
+        start = observed[left]
+        end = observed[right]
+        interruptions.append(
+            {
+                "from_frame_index": start.get("frame_index"),
+                "to_frame_index": end.get("frame_index"),
+                "from_timestamp": start.get("timestamp"),
+                "to_timestamp": end.get("timestamp"),
+                "gap_ms": int(end.get("timestamp_ms") or 0) - int(start.get("timestamp_ms") or 0),
+                "interrupted_by": [
+                    {
+                        "frame_index": item.get("frame_index"),
+                        "timestamp": item.get("timestamp"),
+                        "name_candidate": item.get("name_candidate"),
+                        "state_candidate": item.get("state_candidate"),
+                        "index_hex": item.get("index_hex"),
+                        "value_hex": item.get("value_hex"),
+                    }
+                    for item in between
+                ],
+            }
+        )
+    return interruptions
+
+
+def _chunk_dict(chunk: CaptureChunk) -> dict[str, Any]:
+    return {
+        "chunk_index": chunk.chunk_index,
+        "timestamp": chunk.timestamp,
+        "timestamp_ms": chunk.timestamp_ms,
+        "analyzer_direction": chunk.analyzer_direction,
+        "device_direction": chunk.device_direction,
+        "declared_count": chunk.declared_count,
+        "byte_count": len(chunk.bytes),
+        "count_matches": chunk.declared_count == len(chunk.bytes),
+        "bytes": list(chunk.bytes),
+        "bytes_hex": [_h8(byte) for byte in chunk.bytes],
+    }
+
+
+def _device_direction(analyzer_direction: str) -> str:
+    if analyzer_direction == "rx":
+        return "tx"
+    if analyzer_direction == "tx":
+        return "rx"
+    return "unknown"
+
+
+def _timestamp_ms(value: str) -> int:
+    head, _, fraction = value.partition(".")
+    hours, minutes, seconds = [int(part) for part in head.split(":")]
+    millis = int((fraction + "000")[:3]) if fraction else 0
+    return ((hours * 60 + minutes) * 60 + seconds) * 1000 + millis
+
+
+def _h8(value: int) -> str:
+    return f"0x{value & 0xFF:02X}"
+
+
+def _h16(value: int) -> str:
+    return f"0x{value & 0xFFFF:04X}"
+
+
+__all__ = [
+    "CaptureChunk",
+    "analyze_capture_chunks",
+    "analyze_capture_text",
+    "checksum_for",
+    "format_text_report",
+    "main",
+    "parse_capture_text",
+]