h8-536-decoder/h8536/protocol_capture.py

from __future__ import annotations

import argparse
import json
import re
import sys
from dataclasses import dataclass
from pathlib import Path
from typing import Any, Iterable, Mapping, TextIO

try:  # Keep this module useful even when copied away from the decompiler tree.
    from . import protocol_trace as _protocol_trace
except ImportError:  # pragma: no cover - exercised only outside package imports.
    _protocol_trace = None


CHECKSUM_SEED = getattr(_protocol_trace, "CHECKSUM_SEED", 0x5A)
FRAME_LENGTH = getattr(_protocol_trace, "FRAME_LENGTH", 6)
CAPTURE_LINE_RE = re.compile(
    r"^\s*(?P<time>\d{1,2}:\d{2}:\d{2}(?:\.\d{1,6})?)\s+"
    r"(?P<direction>RX|TX|FRAME)\s+"
    r"(?P<count>\d+)(?:\s+bytes?)?\s+"
    r"(?P<byte_text>.*?)\s*$",
    re.IGNORECASE,
)
HEX_BYTE_RE = re.compile(r"\b[0-9A-Fa-f]{2}\b")

_FALLBACK_OBSERVED_TX_REPORT_CANDIDATES: dict[tuple[int, int], dict[str, str]] = {
    (0x0000, 0x0080): {
        "name_candidate": "heartbeat_alive_candidate",
    },
    (0x0015, 0x8000): {
        "name_candidate": "call_button_candidate",
        "state_candidate": "active",
    },
    (0x0015, 0x0000): {
        "name_candidate": "call_button_candidate",
        "state_candidate": "inactive",
    },
    (0x0007, 0x8000): {
        "name_candidate": "cam_power_button_candidate",
        "state_candidate": "active",
    },
}
OBSERVED_TX_REPORT_CANDIDATES = getattr(
    _protocol_trace,
    "OBSERVED_TX_REPORT_CANDIDATES",
    _FALLBACK_OBSERVED_TX_REPORT_CANDIDATES,
)


@dataclass(frozen=True)
class CaptureChunk:
    chunk_index: int
    timestamp: str
    timestamp_ms: int
    analyzer_direction: str
    device_direction: str
    declared_count: int
    bytes: tuple[int, ...]
    raw_line: str


def checksum_for(frame_prefix: Iterable[int]) -> int:
    if _protocol_trace is not None and hasattr(_protocol_trace, "checksum_for"):
        return int(_protocol_trace.checksum_for(frame_prefix))
    value = CHECKSUM_SEED
    for byte in frame_prefix:
        value ^= byte & 0xFF
    return value & 0xFF


def parse_capture_text(text: str) -> list[CaptureChunk]:
    chunks: list[CaptureChunk] = []
    for raw_line in text.splitlines():
        line = raw_line.strip()
        if not line:
            continue
        match = CAPTURE_LINE_RE.match(line)
        if not match:
            continue
        byte_values = tuple(int(token, 16) for token in HEX_BYTE_RE.findall(match.group("byte_text")))
        raw_direction = match.group("direction").lower()
        analyzer_direction = "rx" if raw_direction == "frame" else raw_direction
        chunks.append(
            CaptureChunk(
                chunk_index=len(chunks),
                timestamp=match.group("time"),
                timestamp_ms=_timestamp_ms(match.group("time")),
                analyzer_direction=analyzer_direction,
                device_direction=_device_direction(analyzer_direction),
                declared_count=int(match.group("count")),
                bytes=byte_values,
                raw_line=raw_line,
            )
        )
        if len(byte_values) != int(match.group("count")):
            # Preserve the chunk and expose the mismatch in analysis instead of dropping capture evidence.
            continue
    return chunks


def analyze_capture_text(text: str) -> dict[str, Any]:
    return analyze_capture_chunks(parse_capture_text(text))


def analyze_capture_chunks(chunks: Iterable[CaptureChunk]) -> dict[str, Any]:
    chunk_list = list(chunks)
    frames = _recombine_frames(chunk_list)
    groups = _repeated_groups(frames)
    gate_session_hints = _gate_session_hints(frames)
    return {
        "kind": "h8536_protocol_capture",
        "frame_length": FRAME_LENGTH,
        "checksum_model": {
            "algorithm": "xor",
            "seed": CHECKSUM_SEED,
            "seed_hex": _h8(CHECKSUM_SEED),
            "covered_offsets": [0, 1, 2, 3, 4],
            "checksum_offset": 5,
        },
        "chunks": [_chunk_dict(chunk) for chunk in chunk_list],
        "chunk_count": len(chunk_list),
        "frames": frames,
        "frame_count": len(frames),
        "repeated_groups": groups,
        "repeated_group_count": len(groups),
        "gate_session_hints": gate_session_hints,
        "direction_note": (
            "Capture RX is analyzer-perspective receive; these bytes are device-perspective TX."
        ),
    }


def format_text_report(analysis: Mapping[str, Any]) -> str:
    lines = [
        "H8/536 capture log",
        (
            f"chunks={analysis.get('chunk_count', 0)} "
            f"frames={analysis.get('frame_count', 0)} "
            f"repeated_groups={analysis.get('repeated_group_count', 0)}"
        ),
    ]
    for frame in analysis.get("frames", []):
        label = ""
        report = frame.get("report_candidate") or {}
        candidate = report.get("observed_candidate") or {}
        if candidate.get("name_candidate"):
            label = f" {candidate['name_candidate']}"
            if candidate.get("state_candidate"):
                label += f" state={candidate['state_candidate']}"
        split = " split" if frame.get("source_chunk_count", 0) > 1 else ""
        lines.append(
            (
                f"[{frame['frame_index']:04d}] {frame['timestamp']} "
                f"{frame['analyzer_direction'].upper()}=>device:{frame['device_direction']} "
                f"bytes={' '.join(frame['bytes_hex'])} checksum=ok{split} "
                f"index={report.get('index_hex')} value={report.get('value_hex')}{label}"
            )
        )
    for group in analysis.get("repeated_groups", []):
        cadence = group.get("cadence_ms") or {}
        cadence_text = "n/a"
        if cadence.get("average") is not None:
            cadence_text = (
                f"avg={cadence['average']:.1f}ms "
                f"min={cadence['min']}ms max={cadence['max']}ms"
            )
        lines.append(
            (
                f"group {group['bytes']} count={group['count']} "
                f"span={group['span_ms']}ms cadence={cadence_text}"
            )
        )
    hints = analysis.get("gate_session_hints") or {}
    names = hints.get("observed_autonomous_report_names") or []
    if names:
        lines.append("observed autonomous report candidates: " + ", ".join(names))
    heartbeat = hints.get("heartbeat_cadence_ms") or {}
    if heartbeat.get("count"):
        cadence_text = "n/a"
        if heartbeat.get("average") is not None:
            cadence_text = (
                f"avg={heartbeat['average']:.1f}ms "
                f"min={heartbeat['min']}ms max={heartbeat['max']}ms"
            )
        lines.append(f"heartbeat cadence count={heartbeat['count']} cadence={cadence_text}")
    for transition in hints.get("active_inactive_transitions", []):
        lines.append(
            (
                f"transition index={transition['index_hex']} "
                f"{transition['from_state']}->{transition['to_state']} "
                f"{transition['from_timestamp']}..{transition['to_timestamp']}"
            )
        )
    for interruption in hints.get("heartbeat_interruptions", []):
        interrupted_names = ", ".join(
            item["name_candidate"] for item in interruption.get("interrupted_by", [])
        )
        lines.append(
            (
                f"heartbeat gap {interruption['from_timestamp']}..{interruption['to_timestamp']} "
                f"gap={interruption['gap_ms']}ms interrupted_by={interrupted_names}"
            )
        )
    if hints.get("caveat"):
        lines.append(f"caveat: {hints['caveat']}")
    return "\n".join(lines)


def main(argv: list[str] | None = None, *, stdin: TextIO | None = None, stdout: TextIO | None = None) -> int:
    parser = argparse.ArgumentParser(
        description="Analyze timestamped H8/536 serial capture logs and recombine 6-byte frames."
    )
    parser.add_argument("input", nargs="?", help="Capture log path. Use '-' or omit to read stdin.")
    parser.add_argument("--json", action="store_true", help="Emit JSON instead of text.")
    args = parser.parse_args(argv)

    stdin = stdin or sys.stdin
    stdout = stdout or sys.stdout
    if args.input and args.input != "-":
        text = Path(args.input).read_text(encoding="utf-8")
    else:
        text = stdin.read()

    analysis = analyze_capture_text(text)
    if args.json:
        json.dump(analysis, stdout, indent=2, sort_keys=True)
        stdout.write("\n")
    else:
        stdout.write(format_text_report(analysis))
        stdout.write("\n")
    return 0


def _recombine_frames(chunks: list[CaptureChunk]) -> list[dict[str, Any]]:
    buffers: dict[str, list[dict[str, Any]]] = {}
    frames: list[dict[str, Any]] = []
    for chunk in chunks:
        key = chunk.analyzer_direction
        stream = buffers.setdefault(key, [])
        for offset, byte in enumerate(chunk.bytes):
            stream.append({"byte": byte, "chunk": chunk, "offset": offset})
            _drain_valid_frames(stream, frames)
    return frames


def _drain_valid_frames(stream: list[dict[str, Any]], frames: list[dict[str, Any]]) -> None:
    while len(stream) >= FRAME_LENGTH:
        candidate = stream[:FRAME_LENGTH]
        values = [int(item["byte"]) for item in candidate]
        if checksum_for(values[:5]) == values[5]:
            frames.append(_frame_dict(len(frames), candidate))
            del stream[:FRAME_LENGTH]
            continue

        realigned = False
        for start in range(1, len(stream) - FRAME_LENGTH + 1):
            window = stream[start : start + FRAME_LENGTH]
            values = [int(item["byte"]) for item in window]
            if checksum_for(values[:5]) == values[5]:
                del stream[:start]
                realigned = True
                break
        if not realigned:
            break


def _frame_dict(frame_index: int, items: list[dict[str, Any]]) -> dict[str, Any]:
    values = [int(item["byte"]) for item in items]
    chunks = [item["chunk"] for item in items]
    first: CaptureChunk = chunks[0]
    source_chunk_indexes = sorted({chunk.chunk_index for chunk in chunks})
    return {
        "frame_index": frame_index,
        "timestamp": first.timestamp,
        "timestamp_ms": first.timestamp_ms,
        "analyzer_direction": first.analyzer_direction,
        "device_direction": first.device_direction,
        "bytes": values,
        "bytes_hex": [_h8(value) for value in values],
        "checksum": {
            "valid": True,
            "expected": values[5],
            "expected_hex": _h8(values[5]),
            "actual": values[5],
            "actual_hex": _h8(values[5]),
        },
        "source_chunk_indexes": source_chunk_indexes,
        "source_chunk_count": len(source_chunk_indexes),
        "report_candidate": _tx_report_candidate(values),
    }


def _tx_report_candidate(frame: list[int]) -> dict[str, Any]:
    index = (frame[0] << 16) | (frame[1] << 8) | frame[2]
    value = (frame[3] << 8) | frame[4]
    candidate = OBSERVED_TX_REPORT_CANDIDATES.get((index, value))
    return {
        "encoding": "observed_tx_index_value_report_candidate",
        "confidence": "observed_candidate" if candidate else "unknown",
        "index": index,
        "index_hex": f"0x{index:06X}" if index > 0xFFFF else _h16(index),
        "value": value,
        "value_hex": _h16(value),
        "observed_candidate": dict(candidate) if candidate else None,
        "caveat": "Observed TX report names are capture labels, not proven protocol facts.",
    }


def _repeated_groups(frames: list[Mapping[str, Any]]) -> list[dict[str, Any]]:
    by_bytes: dict[tuple[int, ...], list[Mapping[str, Any]]] = {}
    for frame in frames:
        by_bytes.setdefault(tuple(frame["bytes"]), []).append(frame)

    groups: list[dict[str, Any]] = []
    for values, members in by_bytes.items():
        if len(members) < 2:
            continue
        timestamps = [int(member["timestamp_ms"]) for member in members]
        deltas = [right - left for left, right in zip(timestamps, timestamps[1:])]
        groups.append(
            {
                "bytes": " ".join(_h8(value) for value in values),
                "count": len(members),
                "frame_indexes": [member["frame_index"] for member in members],
                "first_timestamp": members[0]["timestamp"],
                "last_timestamp": members[-1]["timestamp"],
                "span_ms": timestamps[-1] - timestamps[0],
                "cadence_ms": {
                    "samples": deltas,
                    "average": (sum(deltas) / len(deltas)) if deltas else None,
                    "min": min(deltas) if deltas else None,
                    "max": max(deltas) if deltas else None,
                },
            }
        )
    return sorted(groups, key=lambda group: (-int(group["count"]), str(group["bytes"])))


def _gate_session_hints(frames: list[Mapping[str, Any]]) -> dict[str, Any]:
    observed = [_observed_report_frame(frame) for frame in frames]
    observed = [item for item in observed if item is not None]
    by_name: dict[str, list[dict[str, Any]]] = {}
    for item in observed:
        by_name.setdefault(str(item["name_candidate"]), []).append(item)

    observed_reports = []
    for name, members in sorted(by_name.items()):
        observed_reports.append(
            {
                "name_candidate": name,
                "count": len(members),
                "first_timestamp": members[0]["timestamp"],
                "last_timestamp": members[-1]["timestamp"],
                "frame_indexes": [member["frame_index"] for member in members],
                "indexes_hex": sorted({str(member["index_hex"]) for member in members}),
                "values_hex": sorted({str(member["value_hex"]) for member in members}),
                "states": sorted(
                    {
                        str(member["state_candidate"])
                        for member in members
                        if member.get("state_candidate")
                    }
                ),
            }
        )

    heartbeat_frames = [
        item for item in observed if item.get("name_candidate") == "heartbeat_alive_candidate"
    ]
    heartbeat_timestamps = [int(item["timestamp_ms"]) for item in heartbeat_frames]
    heartbeat_deltas = [
        right - left for left, right in zip(heartbeat_timestamps, heartbeat_timestamps[1:])
    ]

    return {
        "observed_autonomous_report_names": sorted(by_name),
        "observed_reports": observed_reports,
        "active_inactive_transitions": _active_inactive_transitions(observed),
        "heartbeat_cadence_ms": {
            "count": len(heartbeat_frames),
            "samples": heartbeat_deltas,
            "average": (sum(heartbeat_deltas) / len(heartbeat_deltas)) if heartbeat_deltas else None,
            "min": min(heartbeat_deltas) if heartbeat_deltas else None,
            "max": max(heartbeat_deltas) if heartbeat_deltas else None,
        },
        "heartbeat_interruptions": _heartbeat_interruptions(observed),
        "caveat": (
            "Missing autonomous reports for other controls may reflect host/session gating "
            "or capture timing, not proof that local control state did not change."
        ),
        "evidence_scope": "capture_side_observation_only",
    }


def _observed_report_frame(frame: Mapping[str, Any]) -> dict[str, Any] | None:
    report = frame.get("report_candidate") or {}
    candidate = report.get("observed_candidate") or {}
    name = candidate.get("name_candidate")
    if not name:
        return None
    return {
        "frame_index": frame.get("frame_index"),
        "timestamp": frame.get("timestamp"),
        "timestamp_ms": frame.get("timestamp_ms"),
        "analyzer_direction": frame.get("analyzer_direction"),
        "device_direction": frame.get("device_direction"),
        "name_candidate": name,
        "state_candidate": candidate.get("state_candidate"),
        "index": report.get("index"),
        "index_hex": report.get("index_hex"),
        "value": report.get("value"),
        "value_hex": report.get("value_hex"),
    }


def _active_inactive_transitions(observed: list[Mapping[str, Any]]) -> list[dict[str, Any]]:
    by_index: dict[int, list[Mapping[str, Any]]] = {}
    for item in observed:
        state = item.get("state_candidate")
        index = item.get("index")
        if state not in {"active", "inactive"} or not isinstance(index, int):
            continue
        by_index.setdefault(index, []).append(item)

    transitions: list[dict[str, Any]] = []
    for index, members in sorted(by_index.items()):
        previous: Mapping[str, Any] | None = None
        for member in sorted(members, key=lambda item: int(item.get("frame_index") or 0)):
            if previous is not None and previous.get("state_candidate") != member.get("state_candidate"):
                transitions.append(
                    {
                        "index": index,
                        "index_hex": member.get("index_hex"),
                        "name_candidate": member.get("name_candidate"),
                        "from_state": previous.get("state_candidate"),
                        "to_state": member.get("state_candidate"),
                        "from_timestamp": previous.get("timestamp"),
                        "to_timestamp": member.get("timestamp"),
                        "from_frame_index": previous.get("frame_index"),
                        "to_frame_index": member.get("frame_index"),
                    }
                )
            previous = member
    return transitions


def _heartbeat_interruptions(observed: list[Mapping[str, Any]]) -> list[dict[str, Any]]:
    interruptions: list[dict[str, Any]] = []
    heartbeat_positions = [
        index
        for index, item in enumerate(observed)
        if item.get("name_candidate") == "heartbeat_alive_candidate"
    ]
    for left, right in zip(heartbeat_positions, heartbeat_positions[1:]):
        between = [
            item
            for item in observed[left + 1 : right]
            if item.get("name_candidate") != "heartbeat_alive_candidate"
        ]
        if not between:
            continue
        start = observed[left]
        end = observed[right]
        interruptions.append(
            {
                "from_frame_index": start.get("frame_index"),
                "to_frame_index": end.get("frame_index"),
                "from_timestamp": start.get("timestamp"),
                "to_timestamp": end.get("timestamp"),
                "gap_ms": int(end.get("timestamp_ms") or 0) - int(start.get("timestamp_ms") or 0),
                "interrupted_by": [
                    {
                        "frame_index": item.get("frame_index"),
                        "timestamp": item.get("timestamp"),
                        "name_candidate": item.get("name_candidate"),
                        "state_candidate": item.get("state_candidate"),
                        "index_hex": item.get("index_hex"),
                        "value_hex": item.get("value_hex"),
                    }
                    for item in between
                ],
            }
        )
    return interruptions


def _chunk_dict(chunk: CaptureChunk) -> dict[str, Any]:
    return {
        "chunk_index": chunk.chunk_index,
        "timestamp": chunk.timestamp,
        "timestamp_ms": chunk.timestamp_ms,
        "analyzer_direction": chunk.analyzer_direction,
        "device_direction": chunk.device_direction,
        "declared_count": chunk.declared_count,
        "byte_count": len(chunk.bytes),
        "count_matches": chunk.declared_count == len(chunk.bytes),
        "bytes": list(chunk.bytes),
        "bytes_hex": [_h8(byte) for byte in chunk.bytes],
    }


def _device_direction(analyzer_direction: str) -> str:
    if analyzer_direction == "rx":
        return "tx"
    if analyzer_direction == "tx":
        return "rx"
    return "unknown"


def _timestamp_ms(value: str) -> int:
    head, _, fraction = value.partition(".")
    hours, minutes, seconds = [int(part) for part in head.split(":")]
    millis = int((fraction + "000")[:3]) if fraction else 0
    return ((hours * 60 + minutes) * 60 + seconds) * 1000 + millis


def _h8(value: int) -> str:
    return f"0x{value & 0xFF:02X}"


def _h16(value: int) -> str:
    return f"0x{value & 0xFFFF:04X}"


__all__ = [
    "CaptureChunk",
    "analyze_capture_chunks",
    "analyze_capture_text",
    "checksum_for",
    "format_text_report",
    "main",
    "parse_capture_text",
]