diff --git a/.gitignore b/.gitignore index 02f9372..69f0dc5 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,5 @@ compile_commands.json build .cache +__pycache__/ +*.pyc diff --git a/src/RtlUsbAdapter.cpp b/src/RtlUsbAdapter.cpp index 89c1b3e..b9243d6 100644 --- a/src/RtlUsbAdapter.cpp +++ b/src/RtlUsbAdapter.cpp @@ -212,6 +212,19 @@ void RtlUsbAdapter::ReadEFuseByte(uint16_t _offset, uint8_t *pbuf) { uint8_t readbyte; uint16_t retry; + /* Match the kernel `88XXau` driver's per-iteration EFUSE_TEST clear. + * Cold-init usbmon diff (2026-05-28, devourer-testrig VM kernel-side + * vs host devourer-side) shows the kernel does an RD-then-WR sequence + * at REG_EFUSE_TEST (0x0034) = 0x0000 (16-bit) BEFORE every EFUSE byte + * read, 312 times per init; devourer never touched 0x0034. We mirror + * the sequence so the EFUSE state machine sees identical wire shape + * across all 312 byte reads. Empirically harmless on its own (does + * NOT fix the RTL8814AU TX-on-air gate per a sniffer run with this + * patch + bulk-IN drainer enabled) but removes a known concrete + * wire-level divergence flagged by tools/usbmon_pcap_diff.py. */ + (void)rtw_read16(REG_EFUSE_TEST); + rtw_write16(REG_EFUSE_TEST, 0x0000); + /* Write Address */ rtw_write8(EFUSE_CTRL + 1, (uint8_t)(_offset & 0xff)); readbyte = rtw_read8(EFUSE_CTRL + 2); diff --git a/tests/test_urbscript_roundtrip.py b/tests/test_urbscript_roundtrip.py new file mode 100644 index 0000000..31648ee --- /dev/null +++ b/tests/test_urbscript_roundtrip.py @@ -0,0 +1,85 @@ +#!/usr/bin/env python3 +"""End-to-end smoke test: pcapng → urbscript emitter → C replay (dry-run). + +Synthesises a small pcapng with a mix of control writes, control reads, a +bulk OUT, and an interrupt IN. Runs tools/pcapng_to_urbscript.py to produce +a .urbs file, then runs build/usbmon_replay --dry-run on it and verifies +the emitted submit counts match the script. + +Requires build/usbmon_replay to have been compiled +(cc -O2 -Wall -Wextra -o build/usbmon_replay tools/usbmon_replay.c). +""" + +from __future__ import annotations + +import os +import subprocess +import sys +import tempfile +from pathlib import Path + +ROOT = Path(__file__).resolve().parents[1] +sys.path.insert(0, str(ROOT)) + +from tests.test_usbmon_pcap_diff import ( # noqa: E402 + _build_pcap, + _bulk_out, + _ctrl_read, + _ctrl_write, + _interrupt_in, +) + + +def main() -> int: + replay_bin = ROOT / "build" / "usbmon_replay" + if not replay_bin.exists(): + print(f"FAIL: {replay_bin} not built", file=sys.stderr) + print(" cc -O2 -Wall -Wextra -o build/usbmon_replay tools/usbmon_replay.c", + file=sys.stderr) + return 1 + + with tempfile.TemporaryDirectory() as d: + tmp = Path(d) + pcap = tmp / "in.pcap" + urbs = tmp / "in.urbs" + + records = [] + # 3 control writes, 1 control read, 1 bulk OUT, 1 interrupt IN. + records += list(_ctrl_write(0x0100, b"\x01", 1_000_000, 1)) + records += list(_ctrl_write(0x0102, b"\xab\xcd", 1_000_500, 2)) + records += list(_ctrl_write(0x0C90, b"\x55\xaa\x55\xaa", 1_001_000, 3)) + records += list(_ctrl_read(0x0F00, b"\x12\x34", 1_001_500, 4)) + records += list(_bulk_out(0x02, b"hello world " * 8, 1_002_000, 5)) + records += list(_interrupt_in(0x83, b"\xc2\xc2\xc2", 1_002_500, 6)) + pcap.write_bytes(_build_pcap(records)) + + # Stage 1 — pcap → urbscript. + r = subprocess.run( + [sys.executable, str(ROOT / "tools" / "pcapng_to_urbscript.py"), + str(pcap), "-o", str(urbs)], + check=True, capture_output=True, text=True, + ) + assert urbs.exists(), "urbscript not emitted" + out = r.stdout + r.stderr + # 6 submits total. + assert "wrote 6 URB records" in out, f"unexpected stage-1 output:\n{out}" + print(f" stage 1 (pcap → urbs): ok") + + # Stage 2 — replay --dry-run. + r = subprocess.run( + [str(replay_bin), "--device", "/dev/null", + "--urbs", str(urbs), "--dry-run", "-v"], + check=True, capture_output=True, text=True, + ) + out = r.stderr + # Expect 6 submits, 6 ok, by kind: ctrl=4 bulk=1 intr=1, in=2 out=4 + assert "6 submits, ok=6" in out, f"summary missing or wrong:\n{out}" + assert "ctrl=4 bulk=1 intr=1" in out, f"by-kind wrong:\n{out}" + assert "in=2 out=4" in out, f"by-dir wrong:\n{out}" + print(f" stage 2 (urbs → replay --dry-run): ok") + print("ALL OK") + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/tests/test_usbmon_pcap_diff.py b/tests/test_usbmon_pcap_diff.py new file mode 100644 index 0000000..6184dea --- /dev/null +++ b/tests/test_usbmon_pcap_diff.py @@ -0,0 +1,367 @@ +#!/usr/bin/env python3 +"""Unit tests for tools/usbmon_pcap_diff.py. + +Synthesises a pcap stream of Linux usbmon (LINKTYPE_USB_LINUX_MMAPPED, 220) +records by hand-rolling the binary headers, writes it to a temp file, then +exercises the parser, aggregate, offload-probe, phase-split, and diff paths. + +Run: PYTHONPATH=. python3 tests/test_usbmon_pcap_diff.py +""" + +from __future__ import annotations + +import hashlib +import os +import struct +import sys +import tempfile +import warnings +from pathlib import Path + +# scapy emits a noisy "unknown LL type 220" warning every time it opens a +# pcap of LINKTYPE_USB_LINUX_MMAPPED — we parse the payload ourselves, so +# falling back to Raw packets is exactly what we want. Mute that line. +warnings.filterwarnings("ignore", message=".*unknown LL type.*") + +sys.path.insert(0, str(Path(__file__).resolve().parents[1])) +from tools.usbmon_pcap_diff import ( # noqa: E402 + PATH_A_LSSI_REG, + REALTEK_VENQT_READ, + REALTEK_VENQT_REQ, + REALTEK_VENQT_WRITE, + Urb, + USBMON_MMAPPED_HDR_SIZE, + _HDR_STRUCT, + aggregate, + diff, + find_phase_boundary, + offload_probe, + read_urbs, +) + +# pcap (classic, not pcapng) global + per-record header — easy to hand-roll. +# Magic 0xa1b2c3d4 (host byte order) with thiszone=0, sigfigs=0, snaplen=65535, +# network=220 (LINKTYPE_USB_LINUX_MMAPPED). +PCAP_GLOBAL = struct.pack( + " bytes: + """Build one 64-byte usbmon header.""" + assert len(setup) == 8 + epnum = (ep & 0x0F) | (DIR_IN_BIT if dir_in else 0) + ts_sec = ts_us // 1_000_000 + ts_usec = ts_us % 1_000_000 + return _HDR_STRUCT.pack( + urb_id, event, xfer, epnum, devnum, busnum, + flag_setup, ord(" "), + ts_sec, ts_usec, status, length, length, + setup, + 0, 0, 0, 0, + ) + + +def _record(payload: bytes, ts_us: int) -> bytes: + """Wrap one URB header+payload in a pcap-record header.""" + ts_sec = ts_us // 1_000_000 + ts_usec = ts_us % 1_000_000 + n = len(payload) + return struct.pack(" tuple[bytes, bytes]: + """One Realtek vendor write — emit Submit + Complete records. + Returns (submit_record, complete_record) ready to concat into the pcap.""" + setup = struct.pack( + " tuple[bytes, bytes]: + setup = struct.pack( + " tuple[bytes, bytes]: + submit_hdr = _hdr( + urb_id=urb_id, event=URB_SUBMIT, xfer=XFER_BULK, ep=ep, dir_in=False, + devnum=2, busnum=1, ts_us=ts_us, status=-115, length=len(data), + flag_setup=ord("-"), + ) + complete_hdr = _hdr( + urb_id=urb_id, event=URB_COMPLETE, xfer=XFER_BULK, ep=ep, dir_in=False, + devnum=2, busnum=1, ts_us=ts_us + 100, status=0, length=len(data), + flag_setup=ord("-"), + ) + return _record(submit_hdr + data, ts_us), _record(complete_hdr, ts_us + 100) + + +def _interrupt_in(ep: int, payload: bytes, ts_us: int, urb_id: int) -> tuple[bytes, bytes]: + submit_hdr = _hdr( + urb_id=urb_id, event=URB_SUBMIT, xfer=XFER_INTR, ep=ep, dir_in=True, + devnum=2, busnum=1, ts_us=ts_us, status=-115, length=len(payload), + flag_setup=ord("-"), + ) + complete_hdr = _hdr( + urb_id=urb_id, event=URB_COMPLETE, xfer=XFER_INTR, ep=ep, dir_in=True, + devnum=2, busnum=1, ts_us=ts_us + 50, status=0, length=len(payload), + flag_setup=ord("-"), + ) + return _record(submit_hdr, ts_us), _record(complete_hdr + payload, ts_us + 50) + + +def _build_pcap(records: list[bytes]) -> bytes: + return PCAP_GLOBAL + b"".join(records) + + +# -------------------------------------------------------------------------- +# Tests +# -------------------------------------------------------------------------- + + +def test_basic_parse(tmp: Path) -> None: + rs = [] + rs += list(_ctrl_write(0x0100, b"\x05", 1_000_000, 1)) + rs += list(_ctrl_read(0x0100, b"\x05", 1_001_000, 2)) + rs += list(_bulk_out(0x02, b"\xff" * 100, 1_002_000, 3)) + rs += list(_interrupt_in(0x83, b"\x01\x02\x03", 1_003_000, 4)) + + cap = tmp / "basic.pcap" + cap.write_bytes(_build_pcap(rs)) + urbs = read_urbs(cap) + assert len(urbs) == 8, f"expected 8 URBs, got {len(urbs)}" + submits = [u for u in urbs if u.event == "S"] + assert len(submits) == 4 + # First submit is the control write — must have setup decoded. + write = submits[0] + assert write.bmRequestType == REALTEK_VENQT_WRITE + assert write.bRequest == REALTEK_VENQT_REQ + assert write.wValue == 0x0100 + assert write.wLength == 1 + assert write.payload == b"\x05", f"payload was {write.payload!r}" + # Read URB: payload arrives on the COMPLETE. + read_complete = [ + u for u in urbs + if u.event == "C" and u.xfer_type == 2 and u.dir_in + ][0] + assert read_complete.payload == b"\x05" + # Bulk OUT submit carries 100 bytes of 0xff. + bulk_submit = [u for u in urbs if u.xfer_type == 3 and u.event == "S"][0] + assert bulk_submit.payload == b"\xff" * 100 + # Interrupt IN complete carries the response payload — must be first-class. + intr_complete = [u for u in urbs if u.xfer_type == 1 and u.event == "C"][0] + assert intr_complete.payload == b"\x01\x02\x03" + print(f" test_basic_parse: ok ({len(urbs)} URBs)") + + +def test_offload_probe_per_write(tmp: Path) -> None: + # Simulate "devourer-style" capture: 2047 EP0 writes to path-A LSSI. + rs = [] + for i in range(2047): + rs += list(_ctrl_write(PATH_A_LSSI_REG, b"\xde\xad\xbe\xef", + 1_000_000 + i * 200, 1000 + i)) + cap = tmp / "perwrite.pcap" + cap.write_bytes(_build_pcap(rs)) + urbs = read_urbs(cap) + probe = offload_probe(urbs) + assert probe["path_a_writes"] == 2047 + assert "PER-WRITE" in probe["verdict"], probe["verdict"] + print(f" test_offload_probe_per_write: ok (2047 path-A writes detected)") + + +def test_offload_probe_offloaded(tmp: Path) -> None: + # Simulate "kernel-FW-offload" capture: a handful of EP0 writes plus one + # bulk-OUT carrying a fake H2C command. + rs = [] + for i in range(10): + rs += list(_ctrl_write(PATH_A_LSSI_REG, b"\x11\x22\x33\x44", + 1_000_000 + i * 200, 2000 + i)) + rs += list(_bulk_out(0x03, b"\xa5" * 64, 1_100_000, 2100)) + cap = tmp / "offloaded.pcap" + cap.write_bytes(_build_pcap(rs)) + urbs = read_urbs(cap) + probe = offload_probe(urbs) + assert probe["path_a_writes"] == 10 + assert "OFFLOAD" in probe["verdict"], probe["verdict"] + assert probe["h2c_candidates"] >= 1 + print(f" test_offload_probe_offloaded: ok ({probe['path_a_writes']} writes, " + f"{probe['h2c_candidates']} H2C candidates)") + + +def test_phase_split_by_sentinel(tmp: Path) -> None: + # Sentinel: write 0xDEAD to 0x01C0 before init, 0xBEEF after init. + rs = [] + rs += list(_ctrl_write(0x01C0, b"\xad\xde", 1_000_000, 5000)) # init start + for i in range(20): + rs += list(_ctrl_write(0x0100 + i, b"\x42", 1_010_000 + i * 10, 5100 + i)) + rs += list(_ctrl_write(0x01C0, b"\xef\xbe", 1_020_000, 5200)) # init end + rs += list(_bulk_out(0x02, b"tx" * 50, 1_030_000, 5300)) + cap = tmp / "phase.pcap" + cap.write_bytes(_build_pcap(rs)) + urbs = read_urbs(cap) + boundary = find_phase_boundary(urbs, sentinel_reg=0x01C0) + # The init-end sentinel SUBMIT is at index 44 (22 ops * 2 each); boundary + # returned is the index AFTER it. + print(f" test_phase_split_by_sentinel: boundary at {boundary}") + assert boundary > 0 and boundary < len(urbs) + # Everything after the boundary should be TX (bulk-OUT only). + tail = urbs[boundary:] + bulk_in_tail = [u for u in tail if u.xfer_type == 3] + assert len(bulk_in_tail) == 2, f"expected 2 bulk records after boundary, got {len(bulk_in_tail)}" + + +def test_phase_split_by_gap(tmp: Path) -> None: + rs = [] + for i in range(10): + rs += list(_ctrl_write(0x0100 + i, b"\x10", 1_000_000 + i * 100, 6000 + i)) + # 50ms quiescent gap. + rs += list(_bulk_out(0x02, b"x" * 40, 1_500_000, 6100)) + cap = tmp / "gap.pcap" + cap.write_bytes(_build_pcap(rs)) + urbs = read_urbs(cap) + boundary = find_phase_boundary(urbs, sentinel_reg=0x9999, min_gap_us=20_000) + print(f" test_phase_split_by_gap: boundary at {boundary}") + # boundary should land just before the bulk-OUT submit. + assert urbs[boundary].xfer_type == 3 and urbs[boundary].event == "S" + + +def test_diff_finds_extra_urb(tmp: Path) -> None: + # Capture A: 5 writes. Capture B: same 5 + 1 extra in the middle. + rs_a, rs_b = [], [] + for i in range(5): + rs_a += list(_ctrl_write(0x0100 + i, b"\x01", 1_000_000 + i * 100, 7000 + i)) + rs_b += list(_ctrl_write(0x0100 + i, b"\x01", 1_000_000 + i * 100, 8000 + i)) + if i == 2: + rs_b += list(_ctrl_write(0xC90, b"\xaa\xbb\xcc\xdd", + 1_000_250, 8500)) + cap_a = tmp / "a.pcap" + cap_b = tmp / "b.pcap" + cap_a.write_bytes(_build_pcap(rs_a)) + cap_b.write_bytes(_build_pcap(rs_b)) + a = read_urbs(cap_a) + b = read_urbs(cap_b) + res = diff(a, b) + assert res["deltas"], "expected at least one delta" + extra_b = [d for d in res["deltas"] if d["idx_a"] is None] + assert extra_b, f"expected an 'extra in B' delta, got {res['deltas']}" + print(f" test_diff_finds_extra_urb: ok ({len(res['deltas'])} deltas)") + + +def test_diff_finds_payload_divergence(tmp: Path) -> None: + rs_a, rs_b = [], [] + for i in range(5): + rs_a += list(_ctrl_write(0xC90, b"\xaa\xaa\xaa\xaa", + 1_000_000 + i * 100, 9000 + i)) + # Capture B: same registers, different payload on entry 2 — simulates a + # data-byte divergence that text usbmon would silently drop. + payload = b"\xbb\xbb\xbb\xbb" if i == 2 else b"\xaa\xaa\xaa\xaa" + rs_b += list(_ctrl_write(0xC90, payload, + 1_000_000 + i * 100, 9500 + i)) + cap_a = tmp / "a2.pcap" + cap_b = tmp / "b2.pcap" + cap_a.write_bytes(_build_pcap(rs_a)) + cap_b.write_bytes(_build_pcap(rs_b)) + a = read_urbs(cap_a) + b = read_urbs(cap_b) + res = diff(a, b) + # Each ctrl write emits 2 URBs (Submit + Complete), so the diverged + # payload sits at URB-index 4. The resync emits 2 deltas naming the + # B-side records the walker couldn't pair (B[4] = Submit-bb, B[5] = + # its Complete). SHA bb*4 = bfcd12432b17 — that string MUST surface + # in at least one delta, otherwise the payload divergence was + # silently dropped (the exact failure mode of the old text-format + # tool). + sha_bb = hashlib.sha256(b"\xbb\xbb\xbb\xbb").hexdigest()[:12] + assert any(sha_bb in d["b"] for d in res["deltas"]), \ + f"expected delta naming SHA {sha_bb}, got {res['deltas']}" + print(f" test_diff_finds_payload_divergence: ok ({len(res['deltas'])} deltas, " + f"sha bb*4={sha_bb} surfaced)") + + +def test_aggregate_classifies_in_urbs(tmp: Path) -> None: + rs = [] + rs += list(_ctrl_write(0x100, b"\x01", 1_000_000, 10000)) + rs += list(_ctrl_read(0x100, b"\x01", 1_000_100, 10001)) + rs += list(_bulk_out(0x02, b"abc", 1_000_200, 10002)) + rs += list(_interrupt_in(0x83, b"\x99", 1_000_300, 10003)) + cap = tmp / "agg.pcap" + cap.write_bytes(_build_pcap(rs)) + urbs = read_urbs(cap) + agg = aggregate(urbs) + # 4 submits. + assert agg["submits"] == 4 + # 1 realtek write, 1 realtek read. + assert agg["realtek_writes"] == 1 + assert agg["realtek_reads"] == 1 + # 1 IN URB (the interrupt; the control read is also IN-directed). + assert agg["in_urbs_submitted"] == 2, \ + f"expected 2 IN submits (ctrl read + intr in), got {agg['in_urbs_submitted']}" + print(f" test_aggregate_classifies_in_urbs: ok") + + +def main() -> int: + with tempfile.TemporaryDirectory() as d: + tmp = Path(d) + print("running tests...") + for fn in [ + test_basic_parse, + test_offload_probe_per_write, + test_offload_probe_offloaded, + test_phase_split_by_sentinel, + test_phase_split_by_gap, + test_diff_finds_extra_urb, + test_diff_finds_payload_divergence, + test_aggregate_classifies_in_urbs, + ]: + fn(tmp) + print("ALL OK") + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/tools/pcapng_to_urbscript.py b/tools/pcapng_to_urbscript.py new file mode 100644 index 0000000..689d357 --- /dev/null +++ b/tools/pcapng_to_urbscript.py @@ -0,0 +1,183 @@ +#!/usr/bin/env python3 +"""Emit a binary URB script from a pcapng captured by tshark on usbmonN. + +The output is consumed by tools/usbmon_replay (compiled C binary) which +replays each URB verbatim via USBDEVFS_SUBMITURB. The two-stage pipeline +lets the heavy pcap parsing live in Python (already implemented in +tools/usbmon_pcap_diff.py) while the kernel-facing replay stays in C +where the ioctl interface is direct. + +What we emit: + + file header (12 bytes): + char magic[4] = "URBS" + u32 version = 1 (LE) + u32 urb_count (LE) + + per-URB record (24 + data_bytes bytes): + u8 xfer_type 1=INTR, 2=CTRL, 3=BULK (skip ISO; not used by Realtek) + u8 ep_with_dir bit 7 = IN, bits 3..0 = ep number + u16 reserved + u32 inter_urb_gap_us time since previous SUBMIT (0 for first) + u8 setup[8] control setup packet; zero-filled for non-control + u32 transfer_length wire transfer size (for OUT: bytes to send; for + IN: bytes to expect/receive) + u32 data_bytes actual bytes following this header (0 for IN; equal + to transfer_length for OUT) + u8 data[data_bytes] OUT payload — IN URBs carry no on-disk data + because the data comes from the chip at replay + time + +Only SUBMIT records are emitted. Completion events are generated by the +chip+kernel during replay — replaying them would make no sense. + +Filtering: --busnum / --devnum restrict to a specific device. --skip-bulk-in +drops interrupt/bulk IN URBs that the kernel polls but the chip may not +strictly require for replay (use sparingly — IN URBs can be load-bearing if +the chip's state machine reacts to a C2H message the kernel reads). + +Usage: + sudo tshark -i usbmon4 -s 0 -w /tmp/k.pcapng # capture kernel side + # ... let modprobe 88XXau + airodump-ng run, then Ctrl-C tshark + python3 tools/pcapng_to_urbscript.py /tmp/k.pcapng --busnum 4 --devnum 2 \\ + -o /tmp/k.urbs + sudo ./build/usbmon_replay --device /dev/bus/usb/004/002 --urbs /tmp/k.urbs +""" + +from __future__ import annotations + +import argparse +import struct +import sys +from pathlib import Path + +sys.path.insert(0, str(Path(__file__).resolve().parent)) +from usbmon_pcap_diff import ( # noqa: E402 + Urb, + XFER_LETTER, + read_urbs, +) + +MAGIC = b"URBS" +VERSION = 1 +HEADER_FMT = "<4sII" +HEADER_SIZE = struct.calcsize(HEADER_FMT) +RECORD_FIXED_FMT = " int: + """Write one URB SUBMIT record. Returns the timestamp written (for the + next iteration's inter-URB-gap computation).""" + + gap = max(0, urb.ts_us - prev_ts_us) if prev_ts_us else 0 + # cap at 1 second — the replay tool further caps at 100ms; this is just a + # defensive guard so a multi-day pcap doesn't emit ridiculous values. + gap = min(gap, 1_000_000) + + ep_with_dir = (urb.ep & 0x0F) | (0x80 if urb.dir_in else 0) + setup = urb.setup if (urb.xfer_type == 2 and urb.setup) else b"\x00" * 8 + if len(setup) != 8: + setup = (setup + b"\x00" * 8)[:8] + # OUT submits carry payload bytes; IN submits get an empty on-disk data + # area (the chip provides the bytes at replay time). transfer_length + # tells the replay tool how big a buffer to allocate either way. + if urb.dir_in: + data = b"" + transfer_length = urb.length + else: + data = urb.payload or b"" + transfer_length = len(data) + + fout.write(struct.pack( + RECORD_FIXED_FMT, + urb.xfer_type, ep_with_dir, 0, gap, setup, + transfer_length, len(data), + )) + fout.write(data) + return urb.ts_us + + +def emit( + urbs: list[Urb], + out_path: Path, + *, + skip_in: bool = False, +) -> dict: + """Walk the URB list, emit SUBMIT records to out_path. Returns stats.""" + + stats = { + "total_submits": 0, + "ctrl_out": 0, + "ctrl_in": 0, + "bulk_out": 0, + "bulk_in": 0, + "intr_in": 0, + "intr_out": 0, + "skipped": 0, + } + + submits = [u for u in urbs if u.event == "S"] + # Skip ISO — not used by Realtek and the replay tool doesn't support it. + submits = [u for u in submits if u.xfer_type != 0] + + # Optionally skip IN URBs (defensive — keep them by default). + if skip_in: + kept = [] + for u in submits: + if u.dir_in and u.xfer_type in (1, 3): + stats["skipped"] += 1 + continue + kept.append(u) + submits = kept + + # Reserve header space, write records, backpatch header with count. + with open(out_path, "wb") as fout: + fout.write(b"\x00" * HEADER_SIZE) + prev_ts = 0 + for u in submits: + prev_ts = _emit_record(prev_ts, u, fout) + stats["total_submits"] += 1 + key = { + (2, False): "ctrl_out", (2, True): "ctrl_in", + (3, False): "bulk_out", (3, True): "bulk_in", + (1, False): "intr_out", (1, True): "intr_in", + }.get((u.xfer_type, u.dir_in)) + if key: + stats[key] += 1 + # Backpatch real header. + fout.seek(0) + fout.write(struct.pack(HEADER_FMT, MAGIC, VERSION, stats["total_submits"])) + return stats + + +def main(argv: list[str] | None = None) -> int: + p = argparse.ArgumentParser( + description="Convert a usbmon pcapng to a binary URB script for replay.", + ) + p.add_argument("pcap", type=Path) + p.add_argument("-o", "--output", type=Path, required=True) + p.add_argument("--busnum", type=int, default=None, + help="Restrict to this USB bus.") + p.add_argument("--devnum", type=int, default=None, + help="Restrict to this device address on the bus.") + p.add_argument("--skip-in", action="store_true", + help="Drop interrupt/bulk IN URBs. Use only if you've " + "confirmed the chip doesn't act on C2H messages.") + args = p.parse_args(argv) + + urbs = read_urbs(args.pcap, busnum=args.busnum, devnum=args.devnum) + print(f"loaded {len(urbs)} URB events ({sum(1 for u in urbs if u.event == 'S')} submits)") + stats = emit(urbs, args.output, skip_in=args.skip_in) + print(f"wrote {stats['total_submits']} URB records to {args.output}") + for k in ("ctrl_out", "ctrl_in", "bulk_out", "bulk_in", "intr_in", "intr_out"): + if stats[k]: + print(f" {k}: {stats[k]}") + if stats["skipped"]: + print(f" skipped (IN, --skip-in): {stats['skipped']}") + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/tools/usbmon_diff.py b/tools/usbmon_diff.py index 1964691..c216070 100755 --- a/tools/usbmon_diff.py +++ b/tools/usbmon_diff.py @@ -1,10 +1,15 @@ #!/usr/bin/env python3 -"""Diff two usbmon captures by URB-level properties. +"""Diff two usbmon captures by URB-level properties — TEXT FORMAT, BULK ONLY. -Use to compare a libusb-routed TX path vs. a kernel-routed (qemu USB passthrough) -TX path on the same chip — both go through host xhci so host usbmon sees both. -The decisive question is what URB-level property the kernel preserves and the -libusb userspace path drops. +For control-transfer-aware diffs (EP0 vendor reads/writes, full payload bytes, +URB flags, IN URBs, chip-side C2H interrupts), use `tools/usbmon_pcap_diff.py` +instead. That tool consumes pcapng captured by tshark on usbmonN at full +fidelity and surfaces axes this text-format tool structurally cannot. + +Original purpose: compare a libusb-routed TX path vs. a kernel-routed (qemu USB +passthrough) TX path on the same chip — both go through host xhci so host +usbmon sees both. The decisive question is what URB-level property the kernel +preserves and the libusb userspace path drops. Format: usbmon text format (the `Nu` debugfs file on kernel 6.18 emits this). See Documentation/usb/usbmon.rst. Line layout: diff --git a/tools/usbmon_pcap_diff.py b/tools/usbmon_pcap_diff.py new file mode 100644 index 0000000..8ee2157 --- /dev/null +++ b/tools/usbmon_pcap_diff.py @@ -0,0 +1,590 @@ +#!/usr/bin/env python3 +"""Binary-fidelity diff for USB captures collected by tshark on usbmonN. + +Why this exists (the epistemic point that motivated it): + + If the URB sequence on the wire is truly identical between two captures and + one produces a result the other doesn't, the diff has a gap. Prior + `tools/usbmon_diff.py` (text-format usbmon, bulk-OUT only) cannot represent + EP0 control transfers, full payload bytes, URB flags, IN URBs, or chip-side + C2H interrupts. So "the wire matches" claims based on it were unfounded. + +This tool reads pcapng captured via + sudo modprobe usbmon + sudo tshark -i usbmonN -s 0 -w cap.pcapng +and surfaces every URB axis that the text-format tool drops: + + * Control transfer setup packet: bmRequestType, bRequest, wValue, wIndex, + wLength. For Realtek register writes this is bmRequestType=0x40, + bRequest=5, wValue=, wIndex=0, wLength={1,2,4}. + * Full payload bytes (not truncated) + SHA-256 for fast equality checks. + * URB status (0=OK, -ESHUTDOWN, -ETIMEDOUT, -EPIPE...). + * Timestamps and inter-URB gaps at microsecond resolution. + * IN URBs (control reads, bulk-IN, interrupt-IN) — first-class diff units, + NOT filtered away. + +Capture format expected: pcap link-layer type 220 = LINKTYPE_USB_LINUX_MMAPPED +(64-byte usbmon header in front of each URB) or type 189 = LINKTYPE_USB_LINUX +(48-byte header). Both are produced by tshark on different kernels; both are +handled here. + +Modes: + default — full URB-stream diff between two captures. + --offload-probe COUNT — count Realtek control writes to path-A LSSI in a + single capture, classify as FW-offload vs per-write. This + answers the Phase-3 gate in the plan: if the kernel routes + the 2047-entry RF table through firmware (one bulk-OUT) + rather than 2047 EP0 control writes, the userspace driver's + wire shape is fundamentally divergent. + --phase-split — detect "init done, TX starts" boundary by largest inter-URB + gap, report per-phase aggregates. + --aggregate — histograms only (EP, xfer_type, control bRequest, register + address pages). + +Filtering: + --busnum N — limit to a specific USB bus (use `lsusb` to find). + --devnum N — limit to a specific device address on that bus. + +The defaults are no filter. tshark already filters by interface; if multiple +devices share a bus, --devnum is what you want. +""" + +from __future__ import annotations + +import argparse +import collections +import dataclasses +import hashlib +import struct +import sys +from pathlib import Path +from typing import Iterable, Iterator, Optional + +try: + import logging + from scapy.utils import PcapNgReader, PcapReader + from scapy.error import Scapy_Exception + # scapy logs "unknown LL type 220" every time we open a usbmon pcap. + # We do the framing ourselves and want the Raw-packet fallback, so + # raise its log level above WARNING. + logging.getLogger("scapy.runtime").setLevel(logging.ERROR) +except ImportError as e: # pragma: no cover + print(f"scapy is required: {e}", file=sys.stderr) + sys.exit(2) + + +# Linux usbmon binary headers — kernel/Documentation/usb/usbmon.rst +# LINKTYPE_USB_LINUX_MMAPPED (220) is the 64-byte form produced since ~3.x. +# LINKTYPE_USB_LINUX (189) is the older 48-byte form. +USBMON_MMAPPED_HDR_SIZE = 64 +USBMON_LEGACY_HDR_SIZE = 48 + +# Header format: id, type, xfer, epnum, devnum, busnum, flag_setup, flag_data, +# ts_sec, ts_usec, status, length, len_cap, setup[8], interval, start_frame, +# xfer_flags, ndesc. +_HDR_STRUCT = struct.Struct("device, vendor, recipient device +REALTEK_VENQT_READ = 0xC0 # bmRequestType: device->host, vendor, recipient device +REALTEK_VENQT_REQ = 0x05 # bRequest + +# Path-A LSSI register window — RTL8814AU jaguar baseband. +# Mirrors src/RadioManagementModule.cpp:308 (rA_LSSIWrite_Jaguar = 0xC90). +PATH_A_LSSI_REG = 0xC90 +PATH_A_LSSI_WINDOW = (0xC90, 0xCDF) + +# Standard errno strings for common URB completions on Linux. +ERRNO = { + 0: "OK", + -2: "-ENOENT", + -32: "-EPIPE", + -71: "-EPROTO", + -75: "-EOVERFLOW", + -84: "-EILSEQ", + -108: "-ESHUTDOWN", + -110: "-ETIMEDOUT", + -115: "-EINPROGRESS", + -121: "-EREMOTEIO", +} + + +@dataclasses.dataclass +class Urb: + """One usbmon record (a Submit, Complete, or Error event for a URB). + + A SUBMIT carries the host's intent (setup packet for control, the OUT + payload for bulk/interrupt-OUT). A COMPLETE carries the device's reply + (data for control/bulk/interrupt-IN, status code, actual length). + Pair S/C by (id) to get a full transaction. + """ + + id: int + event: str # 'S', 'C', 'E' + xfer_type: int # 0..3 — see XFER_TYPE + ep: int # endpoint number (low 4 bits) — direction in `dir_in` + dir_in: bool # True for device->host + devnum: int + busnum: int + ts_us: int # microseconds since epoch (sec*1e6 + usec) + status: int # 0 on success, errno negative otherwise + length: int # requested length + len_cap: int # captured payload length + setup: bytes # 8 bytes; valid only when xfer_type==CTRL and event=='S' + payload: bytes # actual captured URB data + + # Decoded fields for control transfers (setup unpacked). + bmRequestType: Optional[int] = None + bRequest: Optional[int] = None + wValue: Optional[int] = None + wIndex: Optional[int] = None + wLength: Optional[int] = None + + @property + def payload_sha(self) -> str: + return hashlib.sha256(self.payload).hexdigest()[:12] if self.payload else "" + + def is_realtek_write(self) -> bool: + return ( + self.xfer_type == 2 + and self.event == "S" + and self.bmRequestType == REALTEK_VENQT_WRITE + and self.bRequest == REALTEK_VENQT_REQ + ) + + def is_realtek_read(self) -> bool: + return ( + self.xfer_type == 2 + and self.event == "S" + and self.bmRequestType == REALTEK_VENQT_READ + and self.bRequest == REALTEK_VENQT_REQ + ) + + def short(self) -> str: + kind = XFER_LETTER[self.xfer_type] + d = "i" if self.dir_in else "o" + s = f"{self.event} {kind}{d} dev={self.devnum} ep={self.ep:02x}" + if self.xfer_type == 2 and self.event == "S" and self.bmRequestType is not None: + s += ( + f" bmRT=0x{self.bmRequestType:02x} bReq={self.bRequest} " + f"wVal=0x{self.wValue:04x} wIdx=0x{self.wIndex:04x} " + f"wLen={self.wLength}" + ) + s += f" status={ERRNO.get(self.status, str(self.status))}" + s += f" len={self.length}/{self.len_cap}" + if self.payload: + s += f" sha={self.payload_sha}" + return s + + +def _decode_urb(raw: bytes) -> Optional[Urb]: + """Parse one pcap record (link-type 220) into a Urb. Returns None on + truncated records — defensive against torn captures and unknown link + types. Bytes past the 64-byte header are the URB payload (for OUT + SUBMITs and IN COMPLETIONs).""" + + if len(raw) < USBMON_MMAPPED_HDR_SIZE: + # Legacy 48-byte header — older kernels. Pad up so the unpack stays + # consistent, then ignore the trailing 16 bytes we synthesised. + if len(raw) < USBMON_LEGACY_HDR_SIZE: + return None + raw = raw[:USBMON_LEGACY_HDR_SIZE] + b"\x00" * 16 + raw[USBMON_LEGACY_HDR_SIZE:] + + ( + urb_id, etype, xfer, epnum, devnum, busnum, flag_setup, flag_data, + ts_sec, ts_usec, status, length, len_cap, setup, + _interval, _start_frame, _xfer_flags, _ndesc, + ) = _HDR_STRUCT.unpack_from(raw, 0) + + payload = raw[USBMON_MMAPPED_HDR_SIZE:USBMON_MMAPPED_HDR_SIZE + len_cap] + + event = chr(etype) if etype in (ord("S"), ord("C"), ord("E")) else "?" + dir_in = bool(epnum & DIR_IN_BIT) + ep = epnum & 0x0F + ts_us = ts_sec * 1_000_000 + ts_usec + + urb = Urb( + id=urb_id, event=event, xfer_type=xfer, ep=ep, dir_in=dir_in, + devnum=devnum, busnum=busnum, ts_us=ts_us, status=status, + length=length, len_cap=len_cap, setup=setup, payload=payload, + ) + + # Decode control setup packet for every CTRL submit. Kernel-doc says the + # flag_setup byte should be ' ' (0x20) when the setup is captured, but + # current Linux kernels (verified 6.x) write 0 instead — so checking + # against any specific magic byte is fragile. For a control submit the + # setup bytes are valid by construction; decode unconditionally. + if xfer == 2 and event == "S": + (urb.bmRequestType, urb.bRequest, urb.wValue, urb.wIndex, + urb.wLength) = struct.unpack(" list[Urb]: + """Read every URB record from a pcap/pcapng. Optionally filter to a + single bus / device address.""" + + urbs: list[Urb] = [] + with _open_reader(path) as r: + for pkt in r: + # scapy returns a Packet whose raw bytes start with the link-layer + # header. For USB_LINUX_MMAPPED that's the 64-byte usbmon header. + raw = bytes(pkt) + urb = _decode_urb(raw) + if urb is None: + continue + if busnum is not None and urb.busnum != busnum: + continue + if devnum is not None and urb.devnum != devnum: + continue + urbs.append(urb) + return urbs + + +# -------------------------------------------------------------------------- +# Phase split — finds the largest inter-URB gap and uses it as the init/TX +# boundary. Devourer-side captures should have a sentinel (DEAD/BEEF wValue); +# kernel-side captures rely on the deliberate quiescent gap the operator +# inserted between `modprobe` and `airodump-ng`. +# -------------------------------------------------------------------------- + + +def find_phase_boundary( + urbs: list[Urb], *, sentinel_reg: Optional[int] = None, + min_gap_us: int = 20_000, +) -> int: + """Return the index AT WHICH the TX phase begins (urbs[idx:] is TX, + urbs[:idx] is init). If sentinel_reg is set and a control write with + wValue=0xBEEF to that register is present, that wins. Otherwise the + largest inter-URB gap exceeding min_gap_us is the marker. + Returns len(urbs) if no boundary detected (all-init capture).""" + + if sentinel_reg is not None: + for i, u in enumerate(urbs): + if u.is_realtek_write() and u.wValue == sentinel_reg \ + and u.payload == b"\xef\xbe": + return i + 1 + if u.is_realtek_write() and u.wValue == sentinel_reg \ + and u.payload == b"\xef\xbe\x00\x00": + return i + 1 + + submits = [u for u in urbs if u.event == "S"] + if len(submits) < 2: + return len(urbs) + gaps = [ + (submits[i + 1].ts_us - submits[i].ts_us, i + 1) + for i in range(len(submits) - 1) + ] + biggest, idx = max(gaps, key=lambda t: t[0]) + if biggest < min_gap_us: + return len(urbs) + # Translate the submits-list index back to the full urbs-list index. + target = submits[idx] + return urbs.index(target) + + +# -------------------------------------------------------------------------- +# Offload probe — answers Phase-3 gate of the plan. Counts Realtek control +# writes whose wValue falls in the path-A LSSI window; if it's near 2047 +# the loader is doing per-entry writes (devourer behaviour), if it's <50 +# the table almost certainly went down via H2C FW offload. +# -------------------------------------------------------------------------- + + +def offload_probe( + urbs: list[Urb], *, + lssi_window: tuple[int, int] = PATH_A_LSSI_WINDOW, + threshold_offload: int = 50, + threshold_walk: int = 1500, +) -> dict: + """Classify a capture by counting per-entry path-A RF writes. Returns a + dict with counts and a verdict string.""" + + lo, hi = lssi_window + writes_in_window = sum( + 1 for u in urbs + if u.is_realtek_write() and lo <= (u.wValue or 0) <= hi + ) + # Any bulk-OUT that LOOKS like an H2C command frame (small first byte + # indicating a phydm offload opcode) — we don't know the cmd id yet, so + # surface a count of suspicious bulk-OUTs for manual inspection. + bulk_out_submits = [ + u for u in urbs + if u.event == "S" and u.xfer_type == 3 and not u.dir_in + ] + suspect_h2c = [ + u for u in bulk_out_submits + if 16 <= len(u.payload) <= 256 + ] + + if writes_in_window >= threshold_walk: + verdict = "PER-WRITE (devourer-like). 2047-entry RF table walked one EP0 control transfer at a time." + elif writes_in_window <= threshold_offload: + verdict = ( + "OFFLOAD-LIKELY. Path-A LSSI window has very few EP0 writes; " + "kernel almost certainly batched the RF table via an H2C bulk-OUT command. " + "Devourer does not implement PHYDM_PHY_PARAM_OFFLOAD — that is the divergence." + ) + else: + verdict = "MIXED / inconclusive — inspect manually." + + return { + "lssi_window": f"0x{lo:04x}..0x{hi:04x}", + "path_a_writes": writes_in_window, + "bulk_out_submits_total": len(bulk_out_submits), + "h2c_candidates": len(suspect_h2c), + "verdict": verdict, + } + + +# -------------------------------------------------------------------------- +# Aggregate summary — histograms, no diff. Useful to spot structural +# differences (e.g. one side has IN URBs the other side doesn't even read). +# -------------------------------------------------------------------------- + + +def aggregate(urbs: list[Urb]) -> dict: + submits = [u for u in urbs if u.event == "S"] + completes = [u for u in urbs if u.event == "C"] + by_kind = collections.Counter( + (XFER_TYPE.get(u.xfer_type, "?"), "IN" if u.dir_in else "OUT") + for u in submits + ) + realtek_writes = [u for u in submits if u.is_realtek_write()] + realtek_reads = [u for u in submits if u.is_realtek_read()] + write_pages = collections.Counter( + (u.wValue or 0) & 0xFF00 for u in realtek_writes + ) + in_urbs = [u for u in submits if u.dir_in] + statuses = collections.Counter( + ERRNO.get(u.status, str(u.status)) for u in completes + ) + return { + "submits": len(submits), + "completes": len(completes), + "by_kind": dict(by_kind), + "realtek_writes": len(realtek_writes), + "realtek_reads": len(realtek_reads), + "write_pages_top": write_pages.most_common(8), + "in_urbs_submitted": len(in_urbs), + "completion_status": dict(statuses), + } + + +# -------------------------------------------------------------------------- +# Diff — position-aligned URB comparison. Two captures, walked in parallel. +# Records every divergence and reports a normalized delta. +# -------------------------------------------------------------------------- + + +def _semantic_tuple(u: Urb) -> tuple: + """A tuple that should be equal across two captures of the SAME logical + operation. Drops absolute timestamps and URB IDs (kernel pointers); keeps + transfer kind, EP, direction, setup packet, payload hash, status.""" + return ( + u.event, + u.xfer_type, + u.ep, + u.dir_in, + u.bmRequestType, + u.bRequest, + u.wValue, + u.wIndex, + u.wLength, + u.payload_sha, + u.status, + ) + + +def diff( + a: list[Urb], b: list[Urb], *, + max_report: int = 50, +) -> dict: + """Walk both URB streams in parallel. At each step, compare semantic + tuples; on divergence record both sides and resynchronise by skipping + the side that's ahead. This is intentionally a simple LCS-free walk — + if the URB streams are truly equivalent the walk stays paired; if they + diverge structurally the report will surface that immediately.""" + + deltas = [] + i = j = 0 + while i < len(a) and j < len(b) and len(deltas) < max_report: + ta, tb = _semantic_tuple(a[i]), _semantic_tuple(b[j]) + if ta == tb: + i += 1 + j += 1 + continue + # Try to resynchronise: look ahead a small window on either side. + WINDOW = 8 + found = None + for k in range(1, WINDOW + 1): + if j + k < len(b) and _semantic_tuple(b[j + k]) == ta: + found = ("B_AHEAD", k) + break + if i + k < len(a) and _semantic_tuple(a[i + k]) == tb: + found = ("A_AHEAD", k) + break + if found is None: + deltas.append({ + "idx_a": i, "idx_b": j, + "a": a[i].short(), "b": b[j].short(), + }) + i += 1 + j += 1 + elif found[0] == "B_AHEAD": + # B has extra URBs at j..j+k-1 that A doesn't have. + for k in range(found[1]): + deltas.append({ + "idx_a": None, "idx_b": j + k, + "a": "(missing)", "b": b[j + k].short(), + }) + if len(deltas) >= max_report: + break + j += found[1] + else: # A_AHEAD + for k in range(found[1]): + deltas.append({ + "idx_a": i + k, "idx_b": None, + "a": a[i + k].short(), "b": "(missing)", + }) + if len(deltas) >= max_report: + break + i += found[1] + return { + "a_total": len(a), + "b_total": len(b), + "consumed_a": i, + "consumed_b": j, + "deltas": deltas, + } + + +# -------------------------------------------------------------------------- +# Output helpers +# -------------------------------------------------------------------------- + + +def _print_aggregate(label: str, agg: dict) -> None: + print(f"=== {label} ===") + print(f" submits={agg['submits']} completes={agg['completes']}") + print(f" by_kind={agg['by_kind']}") + print(f" realtek_writes={agg['realtek_writes']} realtek_reads={agg['realtek_reads']}") + print(f" write_pages_top={agg['write_pages_top']}") + print(f" in_urbs_submitted={agg['in_urbs_submitted']}") + print(f" completion_status={agg['completion_status']}") + + +def _print_offload(label: str, probe: dict) -> None: + print(f"=== offload-probe: {label} ===") + print(f" path-A LSSI window: {probe['lssi_window']}") + print(f" EP0 writes targeting that window: {probe['path_a_writes']}") + print(f" bulk-OUT submits total: {probe['bulk_out_submits_total']}") + print(f" H2C-candidate bulk-OUTs (16..256B): {probe['h2c_candidates']}") + print(f" verdict: {probe['verdict']}") + + +def main(argv: Optional[list[str]] = None) -> int: + p = argparse.ArgumentParser( + description="Binary-fidelity USB capture diff (Linux usbmon pcapng).", + ) + p.add_argument("cap_a", type=Path, help="First capture (pcap or pcapng).") + p.add_argument("cap_b", type=Path, nargs="?", default=None, + help="Second capture; required unless --offload-probe is set.") + p.add_argument("--busnum", type=int, default=None, + help="Restrict to this USB bus.") + p.add_argument("--devnum-a", type=int, default=None, + help="Restrict cap_a to this device address.") + p.add_argument("--devnum-b", type=int, default=None, + help="Restrict cap_b to this device address.") + p.add_argument("--offload-probe", action="store_true", + help="Classify cap_a as FW-offload vs per-write for path-A RF table.") + p.add_argument("--phase-split", action="store_true", + help="Detect init/TX boundary and report per-phase aggregates.") + p.add_argument("--aggregate", action="store_true", + help="Print histograms only, skip the URB-by-URB diff.") + p.add_argument("--sentinel-reg", type=lambda s: int(s, 0), default=0x04FC, + help="Devourer sentinel register; default 0x04FC (REG_DUMMY).") + p.add_argument("--max-report", type=int, default=50, + help="Max divergences to print in default diff mode.") + args = p.parse_args(argv) + + if not args.cap_a.exists(): + print(f"capture not found: {args.cap_a}", file=sys.stderr) + return 2 + + urbs_a = read_urbs(args.cap_a, busnum=args.busnum, devnum=args.devnum_a) + print(f"loaded {len(urbs_a)} URBs from {args.cap_a}") + + if args.offload_probe: + probe = offload_probe(urbs_a) + _print_offload(str(args.cap_a), probe) + return 0 + + if args.cap_b is None or not args.cap_b.exists(): + print("cap_b required unless --offload-probe is set", file=sys.stderr) + return 2 + urbs_b = read_urbs(args.cap_b, busnum=args.busnum, devnum=args.devnum_b) + print(f"loaded {len(urbs_b)} URBs from {args.cap_b}") + + if args.aggregate: + _print_aggregate(str(args.cap_a), aggregate(urbs_a)) + _print_aggregate(str(args.cap_b), aggregate(urbs_b)) + return 0 + + if args.phase_split: + ba = find_phase_boundary(urbs_a, sentinel_reg=args.sentinel_reg) + bb = find_phase_boundary(urbs_b, sentinel_reg=args.sentinel_reg) + print(f"phase boundary: cap_a@{ba}/{len(urbs_a)} cap_b@{bb}/{len(urbs_b)}") + _print_aggregate(f"{args.cap_a} INIT", aggregate(urbs_a[:ba])) + _print_aggregate(f"{args.cap_a} TX", aggregate(urbs_a[ba:])) + _print_aggregate(f"{args.cap_b} INIT", aggregate(urbs_b[:bb])) + _print_aggregate(f"{args.cap_b} TX", aggregate(urbs_b[bb:])) + return 0 + + # Default: full diff. + result = diff(urbs_a, urbs_b, max_report=args.max_report) + print(f"a_total={result['a_total']} b_total={result['b_total']} " + f"consumed_a={result['consumed_a']} consumed_b={result['consumed_b']}") + if not result["deltas"]: + print("no divergence within first matched windows.") + else: + print(f"first {len(result['deltas'])} divergences:") + for d in result["deltas"]: + print(f" [a@{d['idx_a']}] {d['a']}") + print(f" [b@{d['idx_b']}] {d['b']}") + print() + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/tools/usbmon_replay.c b/tools/usbmon_replay.c new file mode 100644 index 0000000..003dff4 --- /dev/null +++ b/tools/usbmon_replay.c @@ -0,0 +1,406 @@ +/* usbmon_replay — replay a binary URB script verbatim via USBDEVFS. + * + * Why this exists: prior bisect work narrowed RTL8814AU's userspace TX + * gate to "path-A radio table application" by removing kernel-side init + * blocks. That's an indirect argument. The decisive test is: + * + * Feed the kernel's exact URB sequence into the chip from userspace. + * If on-air TX appears, the gate is at the URB layer (and we know + * which URBs devourer is missing or getting wrong). If TX still + * fails, the gate is below the URB layer (xhci scheduling, DMA + * coherency, IRQ-EP polling cadence) — no userspace fix exists + * without a kernel shim. + * + * Input: a binary URB script produced by tools/pcapng_to_urbscript.py + * from a tshark capture of usbmonN during a working kernel-driver + * session. + * + * What this tool does NOT do (deliberately): + * - It does NOT try to reproduce kernel scheduling jitter at sub-100µs + * granularity. Inter-URB gaps are capped at 100ms and floored at 0. + * - It does NOT clear halts between URBs. If a stall happens during + * replay, that is itself a divergence worth reporting. + * - It does NOT inject extra TX traffic at the end. The kernel capture + * already contains the first TX URBs that produced on-air emission; + * replay them as part of the script. + * + * Build: g++ -O2 -Wall -Wextra -o build/usbmon_replay tools/usbmon_replay.c + * (or wire into CMake; standalone single-file for portability). + * + * Run: + * sudo ./build/usbmon_replay \ + * --device /dev/bus/usb/004/003 \ + * --urbs /tmp/cap-kernel.urbs \ + * --interface 0 + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* URB script format — keep in sync with tools/pcapng_to_urbscript.py. */ +#define URBSCRIPT_MAGIC "URBS" +#define URBSCRIPT_VERSION 1u + +struct __attribute__((packed)) urbs_header { + char magic[4]; + uint32_t version; + uint32_t urb_count; +}; + +struct __attribute__((packed)) urbs_record_fixed { + uint8_t xfer_type; /* 1=INTR, 2=CTRL, 3=BULK */ + uint8_t ep_with_dir; /* bit 7 = IN, bits 3..0 = ep number */ + uint16_t reserved; + uint32_t inter_urb_gap_us; + uint8_t setup[8]; + /* transfer_length: bytes to send (OUT) or expect (IN) on the wire. + * data_bytes: actual bytes following this header on disk. For OUT they + * are equal; for IN data_bytes is always 0 — the chip provides the + * bytes at replay time and we allocate transfer_length of buffer. */ + uint32_t transfer_length; + uint32_t data_bytes; +}; +_Static_assert(sizeof(struct urbs_record_fixed) == 24, "record size drift"); + +/* CLI-configurable. */ +static int g_iface = 0; +static int g_max_gap_us = 100000; /* cap each inter-URB sleep at 100ms */ +static int g_dry_run = 0; +static int g_verbose = 0; +static int g_disconnect = 0; /* USBDEVFS_DISCONNECT before claim — for chips + bound by a kernel driver we want kicked out. */ + +/* Per-xfer-type counters. */ +struct stats { + unsigned submits; + unsigned ok; + unsigned err_submit; + unsigned err_reap; + unsigned err_status; + unsigned ctrl, bulk, intr, in_urbs, out_urbs; +}; + +static void sleep_us(unsigned us) { + if (us == 0) return; + if ((int)us > g_max_gap_us) us = g_max_gap_us; + struct timespec req = {.tv_sec = us / 1000000, .tv_nsec = (us % 1000000) * 1000}; + nanosleep(&req, NULL); +} + +static const char *xfer_name(uint8_t t) { + switch (t) { + case 1: return "INTR"; + case 2: return "CTRL"; + case 3: return "BULK"; + default: return "????"; + } +} + +/* Build a usbdevfs_urb for one record, submit it, reap the completion. */ +static int replay_one(int fd, const struct urbs_record_fixed *rec, + const uint8_t *data, struct stats *st, unsigned idx) { + uint8_t type; + switch (rec->xfer_type) { + case 1: type = USBDEVFS_URB_TYPE_INTERRUPT; break; + case 2: type = USBDEVFS_URB_TYPE_CONTROL; break; + case 3: type = USBDEVFS_URB_TYPE_BULK; break; + default: + fprintf(stderr, "[%u] unsupported xfer_type=%u, skipping\n", + idx, rec->xfer_type); + return -1; + } + + /* Buffer layout: + * CTRL: setup[8] || payload[transfer_length] + * buffer_length = 8 + transfer_length + * BULK/INTR: payload[transfer_length] + * buffer_length = transfer_length (min 1 for the kernel's sake) + * For OUT URBs the source bytes are at `data` (data_bytes == + * transfer_length). For IN URBs `data` is empty (data_bytes == 0) and + * the buffer is zero-filled — the chip fills it on completion. */ + size_t buf_len; + if (type == USBDEVFS_URB_TYPE_CONTROL) { + buf_len = 8 + rec->transfer_length; + } else { + buf_len = rec->transfer_length; + if (buf_len == 0) buf_len = 1; /* libusb doesn't accept a null buffer */ + } + + uint8_t *buf = (uint8_t *)calloc(1, buf_len); + if (!buf) { + fprintf(stderr, "[%u] OOM allocating %zu byte buffer\n", idx, buf_len); + return -1; + } + int is_in = (rec->ep_with_dir & 0x80) != 0; + if (type == USBDEVFS_URB_TYPE_CONTROL) { + memcpy(buf, rec->setup, 8); + if (rec->data_bytes > 0 && data && !is_in) { + memcpy(buf + 8, data, rec->data_bytes); + } + } else if (rec->data_bytes > 0 && data && !is_in) { + memcpy(buf, data, rec->data_bytes); + } + + struct usbdevfs_urb urb; + memset(&urb, 0, sizeof(urb)); + urb.type = type; + urb.endpoint = rec->ep_with_dir; /* high bit already set for IN */ + urb.buffer = buf; + urb.buffer_length = (int)buf_len; + /* Match kernel rtl8814au TX-OUT behaviour where applicable: */ + if (type == USBDEVFS_URB_TYPE_BULK && !(rec->ep_with_dir & 0x80)) { + urb.flags = USBDEVFS_URB_ZERO_PACKET; + } + urb.usercontext = (void *)(uintptr_t)idx; + + st->submits++; + if (rec->ep_with_dir & 0x80) st->in_urbs++; else st->out_urbs++; + if (type == USBDEVFS_URB_TYPE_CONTROL) st->ctrl++; + if (type == USBDEVFS_URB_TYPE_BULK) st->bulk++; + if (type == USBDEVFS_URB_TYPE_INTERRUPT) st->intr++; + + if (g_dry_run) { + if (g_verbose) { + fprintf(stderr, "[%u] dry-run %s ep=0x%02x xfer_len=%u disk=%u\n", + idx, xfer_name(rec->xfer_type), rec->ep_with_dir, + rec->transfer_length, rec->data_bytes); + } + free(buf); + st->ok++; + return 0; + } + + if (ioctl(fd, USBDEVFS_SUBMITURB, &urb) < 0) { + int e = errno; + fprintf(stderr, "[%u] SUBMITURB %s ep=0x%02x failed: %s\n", + idx, xfer_name(rec->xfer_type), rec->ep_with_dir, strerror(e)); + st->err_submit++; + free(buf); + return -1; + } + + /* Reap. Block until completion. */ + struct usbdevfs_urb *reaped = NULL; + if (ioctl(fd, USBDEVFS_REAPURB, &reaped) < 0) { + int e = errno; + fprintf(stderr, "[%u] REAPURB failed: %s\n", idx, strerror(e)); + st->err_reap++; + free(buf); + return -1; + } + if (reaped != &urb) { + fprintf(stderr, "[%u] reaped unexpected URB pointer\n", idx); + st->err_reap++; + free(buf); + return -1; + } + if (reaped->status != 0) { + /* Non-zero status: -EPIPE = stall, -ETIMEDOUT = timed out, -ENOENT = + * cancelled. We do NOT clear-halt here — a stall during replay is + * itself a divergence and we want it counted. */ + if (g_verbose) { + fprintf(stderr, "[%u] %s ep=0x%02x completed with status=%d actual=%d\n", + idx, xfer_name(rec->xfer_type), rec->ep_with_dir, + reaped->status, reaped->actual_length); + } + st->err_status++; + free(buf); + return 0; + } + st->ok++; + if (g_verbose) { + fprintf(stderr, "[%u] OK %s ep=0x%02x actual=%d\n", + idx, xfer_name(rec->xfer_type), rec->ep_with_dir, + reaped->actual_length); + } + free(buf); + return 0; +} + +static int parse_header(FILE *f, uint32_t *urb_count_out) { + struct urbs_header hdr; + if (fread(&hdr, sizeof(hdr), 1, f) != 1) { + fprintf(stderr, "failed to read URB script header\n"); + return -1; + } + if (memcmp(hdr.magic, URBSCRIPT_MAGIC, 4) != 0) { + fprintf(stderr, "URB script magic mismatch (got %.4s, want %s)\n", + hdr.magic, URBSCRIPT_MAGIC); + return -1; + } + if (hdr.version != URBSCRIPT_VERSION) { + fprintf(stderr, "URB script version %u != supported %u\n", + hdr.version, URBSCRIPT_VERSION); + return -1; + } + *urb_count_out = hdr.urb_count; + return 0; +} + +static int open_and_claim(const char *device) { + int fd = open(device, O_RDWR); + if (fd < 0) { + fprintf(stderr, "open(%s) failed: %s\n", device, strerror(errno)); + return -1; + } + if (g_disconnect) { + /* DISCONNECT_CLAIM yanks the kernel driver and claims the interface + * in one ioctl. Equivalent to detach_kernel_driver+claim. */ + struct usbdevfs_disconnect_claim dc; + memset(&dc, 0, sizeof(dc)); + dc.interface = g_iface; + dc.flags = USBDEVFS_DISCONNECT_CLAIM_IF_DRIVER; + strncpy(dc.driver, "usbfs", sizeof(dc.driver) - 1); + if (ioctl(fd, USBDEVFS_DISCONNECT_CLAIM, &dc) < 0 && errno != ENODATA) { + int e = errno; + fprintf(stderr, + "DISCONNECT_CLAIM iface=%d failed: %s — falling back to plain CLAIM\n", + g_iface, strerror(e)); + /* Fall through to plain claim. */ + } else { + return fd; + } + } + unsigned ifnum = g_iface; + if (ioctl(fd, USBDEVFS_CLAIMINTERFACE, &ifnum) < 0) { + fprintf(stderr, "CLAIMINTERFACE %u failed: %s (try --disconnect)\n", + ifnum, strerror(errno)); + close(fd); + return -1; + } + return fd; +} + +static void release_and_close(int fd) { + if (fd < 0) return; + unsigned ifnum = g_iface; + if (ioctl(fd, USBDEVFS_RELEASEINTERFACE, &ifnum) < 0 && errno != ENOENT) { + fprintf(stderr, "RELEASEINTERFACE %u failed: %s\n", + ifnum, strerror(errno)); + } + close(fd); +} + +static void usage(const char *argv0) { + fprintf(stderr, + "usage: %s --device PATH --urbs FILE [--interface N] [--disconnect]\n" + " [--max-gap-us US] [--dry-run] [-v]\n" + "\n" + " --device PATH e.g. /dev/bus/usb/004/003\n" + " --urbs FILE URB script from tools/pcapng_to_urbscript.py\n" + " --interface N USB interface to claim (default 0)\n" + " --disconnect kick the kernel driver off the interface first\n" + " --max-gap-us US cap each inter-URB sleep at US (default 100000)\n" + " --dry-run parse the script but don't talk to the chip\n" + " -v verbose (one line per URB)\n", + argv0); +} + +int main(int argc, char **argv) { + const char *device = NULL; + const char *urbs_path = NULL; + static const struct option opts[] = { + {"device", required_argument, 0, 'd'}, + {"urbs", required_argument, 0, 'u'}, + {"interface", required_argument, 0, 'i'}, + {"disconnect", no_argument, 0, 'D'}, + {"max-gap-us", required_argument, 0, 'g'}, + {"dry-run", no_argument, 0, 'n'}, + {"help", no_argument, 0, 'h'}, + {0, 0, 0, 0}, + }; + int c; + while ((c = getopt_long(argc, argv, "d:u:i:Dg:nvh", opts, NULL)) != -1) { + switch (c) { + case 'd': device = optarg; break; + case 'u': urbs_path = optarg; break; + case 'i': g_iface = atoi(optarg); break; + case 'D': g_disconnect = 1; break; + case 'g': g_max_gap_us = atoi(optarg); break; + case 'n': g_dry_run = 1; break; + case 'v': g_verbose = 1; break; + case 'h': default: usage(argv[0]); return c == 'h' ? 0 : 2; + } + } + if (!device || !urbs_path) { + usage(argv[0]); + return 2; + } + + FILE *f = fopen(urbs_path, "rb"); + if (!f) { + fprintf(stderr, "open URB script %s: %s\n", urbs_path, strerror(errno)); + return 1; + } + uint32_t urb_count = 0; + if (parse_header(f, &urb_count) < 0) { + fclose(f); + return 1; + } + fprintf(stderr, "URB script: %u URBs\n", urb_count); + + int fd = -1; + if (!g_dry_run) { + fd = open_and_claim(device); + if (fd < 0) { + fclose(f); + return 1; + } + } + + struct stats st; + memset(&st, 0, sizeof(st)); + + /* Allocate per-URB. Data sizes are bounded by RF-table replay sizes (a + * few KB at most for any single transfer). If a single URB carries + * more than 64KB the chip and the script have other problems. */ + uint8_t *databuf = (uint8_t *)malloc(65536); + if (!databuf) { + fprintf(stderr, "OOM allocating record buffer\n"); + if (fd >= 0) release_and_close(fd); + fclose(f); + return 1; + } + + for (uint32_t i = 0; i < urb_count; i++) { + struct urbs_record_fixed rec; + if (fread(&rec, sizeof(rec), 1, f) != 1) { + fprintf(stderr, "[%u] short read in URB script\n", i); + break; + } + if (rec.data_bytes > 65536) { + fprintf(stderr, "[%u] data_bytes=%u exceeds buffer (skipping)\n", + i, rec.data_bytes); + if (fseek(f, rec.data_bytes, SEEK_CUR) != 0) break; + continue; + } + if (rec.data_bytes > 0 && fread(databuf, rec.data_bytes, 1, f) != 1) { + fprintf(stderr, "[%u] short read on data (%u bytes)\n", i, rec.data_bytes); + break; + } + sleep_us(rec.inter_urb_gap_us); + replay_one(fd, &rec, databuf, &st, i); + } + + free(databuf); + if (fd >= 0) release_and_close(fd); + fclose(f); + + fprintf(stderr, + "replay done: %u submits, ok=%u err_submit=%u err_reap=%u err_status=%u\n", + st.submits, st.ok, st.err_submit, st.err_reap, st.err_status); + fprintf(stderr, + " by kind: ctrl=%u bulk=%u intr=%u | by dir: in=%u out=%u\n", + st.ctrl, st.bulk, st.intr, st.in_urbs, st.out_urbs); + return (st.err_submit > 0 || st.err_reap > 0) ? 1 : 0; +} diff --git a/txdemo/main.cpp b/txdemo/main.cpp index 98e4105..4126913 100644 --- a/txdemo/main.cpp +++ b/txdemo/main.cpp @@ -1,3 +1,4 @@ +#include #include #include #include @@ -147,6 +148,111 @@ int main(int argc, char **argv) { rc = libusb_claim_interface(handle, 0); assert(rc == 0); + /* USB-wire sentinel writes — gated behind DEVOURER_USB_SENTINEL=1. Used by + * tools/usbmon_pcap_diff.py --phase-split to delimit the init phase in a + * devourer-side capture. We write to REG_DUMMY (0x04FC, documented as a + * no-side-effect scratch register in hal_com_reg.h:322) so the sentinel + * cannot perturb chip state. The 2-byte payload encodes the marker: + * 0xDEAD before init, 0xBEEF at init-done. Diff tool matches by + * wValue (== REG_DUMMY) AND payload (\\xad\\xde or \\xef\\xbe LE). */ + const bool sentinel_enabled = std::getenv("DEVOURER_USB_SENTINEL") != nullptr; + auto write_sentinel = [&](uint16_t marker, const char *label) { + if (!sentinel_enabled) return; + uint16_t payload = marker; + int srt = libusb_control_transfer( + handle, /*bmRequestType*/ 0x40, /*bRequest*/ 5, + /*wValue=REG_DUMMY*/ 0x04FC, /*wIndex*/ 0, + reinterpret_cast(&payload), sizeof(payload), + /*timeout_ms*/ 500); + logger->info("USB sentinel {}: REG_DUMMY <= 0x{:04x} rc={}", label, marker, srt); + }; + + write_sentinel(0xDEAD, "pre-init"); + + /* Optional interrupt-IN poller on EP 0x85 — gated by + * DEVOURER_POLL_INTR_IN=1. The 8814AU descriptor exposes an Interrupt IN + * endpoint at 0x85 (64-byte, bInterval=1) which carries C2H (chip-to-host) + * messages; the upstream aircrack-ng 8814au driver submits a perpetual URB + * on it under CONFIG_USB_INTERRUPT_IN_PIPE. Devourer currently never reads + * it, so the chip's C2H buffer fills and firmware may stall waiting for + * drainage — a candidate explanation for "bulk-OUT URBs complete OK but + * nothing reaches the air" (issue #36). This thread polls EP 0x85 until + * the process is killed; failures other than -ETIMEDOUT are logged once + * per N. */ + /* Optional bulk-IN drainer on EP 0x81 — gated by DEVOURER_DRAIN_BULK_IN=1. + * The kernel `88XXau` driver pre-arms 8 bulk-IN URBs of 32 KB each on + * EP 0x81 at the end of init, *before* the first TX. The RTL8814AU + * delivers TX-status reports back on the bulk-IN endpoint mixed with + * RX data; if the host never has IN URBs pending, the chip cannot + * deliver TX status and queues TX indefinitely — which fits the + * observed pathology exactly (bulk-OUT URBs complete OK at the libusb + * level but nothing reaches the air). Spawn a thread that pre-submits + * a small pool of bulk-IN URBs on EP 0x81 before TX begins and keeps + * a stream of them in flight. */ + std::atomic bulk_in_running{false}; + std::thread bulk_in_thread; + if (std::getenv("DEVOURER_DRAIN_BULK_IN")) { + bulk_in_running = true; + bulk_in_thread = std::thread([handle, &bulk_in_running, logger]() { + static constexpr int BUF_SIZE = 16 * 1024; + uint8_t buf[BUF_SIZE]; + uint64_t reads = 0; + while (bulk_in_running) { + int actual = 0; + int rc = libusb_bulk_transfer(handle, 0x81, buf, sizeof(buf), + &actual, 200 /* ms */); + if (rc == 0 && actual > 0) { + ++reads; + if (reads <= 5 || (reads % 200) == 0) { + logger->info("EP 0x81 IN #{}: {} bytes (head=0x{:02x}{:02x})", + reads, actual, buf[0], buf[1]); + } + } else if (rc != 0 && rc != LIBUSB_ERROR_TIMEOUT) { + std::this_thread::sleep_for(std::chrono::milliseconds(20)); + } + } + }); + logger->info("DEVOURER_DRAIN_BULK_IN — EP 0x81 bulk-IN drainer running"); + } + + std::atomic intr_running{false}; + std::thread intr_in_thread; + if (std::getenv("DEVOURER_POLL_INTR_IN")) { + intr_running = true; + intr_in_thread = std::thread([handle, &intr_running, logger]() { + uint8_t buf[64]; + uint64_t reads = 0, errs = 0; + while (intr_running) { + int actual = 0; + int rc = libusb_interrupt_transfer(handle, 0x85, buf, sizeof(buf), + &actual, 100 /* ms */); + if (rc == 0 && actual > 0) { + ++reads; + if (reads <= 20 || (reads % 100) == 0) { + char hex[64 * 2 + 1] = {0}; + /* Explicit template arg so MSVC's `windows.h` `min` macro doesn't + * mangle this — same pattern as RtlUsbAdapter.cpp:435. */ + int hex_len = std::min(actual, 32); + for (int k = 0; k < hex_len; ++k) { + static const char hd[] = "0123456789abcdef"; + hex[2*k] = hd[buf[k] >> 4]; + hex[2*k+1] = hd[buf[k] & 0xF]; + } + logger->info("EP 0x85 IN #{}: {} bytes, head={}", + reads, actual, hex); + } + } else if (rc != 0 && rc != LIBUSB_ERROR_TIMEOUT) { + ++errs; + if ((errs % 50) == 1) { + logger->error("EP 0x85 IN rc={} (#{})", rc, errs); + } + std::this_thread::sleep_for(std::chrono::milliseconds(10)); + } + } + }); + logger->info("DEVOURER_POLL_INTR_IN — EP 0x85 interrupt-IN poller running"); + } + WiFiDriver wifi_driver{logger}; auto rtlDevice = wifi_driver.CreateRtlDevice(handle); @@ -185,6 +291,8 @@ int main(int argc, char **argv) { .ChannelOffset = 0, .ChannelWidth = CHANNEL_WIDTH_20}); + write_sentinel(0xBEEF, "post-init/pre-TX"); + sleep(5); std::thread usb_thread(usb_event_loop, logger, context);