From fbc7c95fbf574fced685809cac349e493745cd35 Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Thu, 2 Jul 2026 11:00:13 +0000
Subject: [PATCH] =?UTF-8?q?M9=20finish=20(2/2):=20=C2=A714.9=20differentia?=
 =?UTF-8?q?l=20/=20reference-parser=20tester?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add tests/differential.rs: an independent, spec-derived reference segment
parser run alongside the production recover_segment classifier, with an
exact-match oracle on classification. Catches a recovery-classifier error by
construction (two implementations disagreeing) rather than probabilistically —
the class of bug the issue #26 sentinel hole was.

The reference calls no production parse code (not record::decode, not
recover_segment, not the segment.rs helpers): it re-derives the §5.2/§5.3/§8.2
constants and re-implements the length-bound check, the all-zero-header
sentinel rule (post #26), the CRC-validation ordering, the bounded
tail-vs-corruption forward scan, and the sealed-vs-active distinction from raw
bytes. It uses only the shared crc32c primitive (a dependency, not parse logic).

Inputs: a deterministic 184-case scenario matrix enumerating every arm (clean
runs, torn tails, interior corruption incl. the rec_type→0 vector, reserved
types, LSN gaps, physical truncation, within/beyond-scan-bound continuations,
len>max, non-1 bases; active + sealed) PLUS the Task-1 regrown corpora as raw
segment bodies (1666 inputs). Both parsers must return the identical SegClass
variant AND offsets/max_lsn. Green: 184 + 1666 agree, ~6 s.

Falsifiability shown: injecting the pre-#26 naive rec_type==0 ⇒ sentinel rule
into the reference makes the differential fire (production=Truncated vs
reference=Clean) on the torn-last-zero_rectype scenario and real corpus
entries — i.e. it would have caught the sentinel bug — then reverted.

The only src/ change is a thin #[cfg(feature="fuzzing")] recover_segment_classify
accessor in lib.rs (no public API widening, no second production parser). The
test is #![cfg(feature="fuzzing")], so default cargo build/test are unaffected
(zero release impact). CI: per-PR blocking `differential (§14.9)` job in ci.yml
(a divergence reds the PR, never gates H1). Docs: §14.9 implemented, §14.11
per-PR row, §14.12 D4/D5/D10/D11 rows, §14.13 DoD bullet.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
Claude-Session: https://claude.ai/code/session_01Rpbwt9JT56hQvVXiqTS131
---
 .github/workflows/ci.yml |  19 ++
 CLAUDE.md                |   1 +
 docs/wal_design_v6.md    |  15 +-
 src/lib.rs               |  63 +++++
 tests/differential.rs    | 597 +++++++++++++++++++++++++++++++++++++++
 5 files changed, 689 insertions(+), 6 deletions(-)
 create mode 100644 tests/differential.rs

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index a8ec6b5..ce0726d 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -139,6 +139,25 @@ jobs:
         run: |
           cargo +nightly miri test --lib -- record:: crc:: lsn:: config::
 
+  differential:
+    # §14.9 — differential / reference-parser tester. An independent, spec-derived
+    # reference segment parser (`tests/differential.rs`) is run alongside the
+    # production `recover_segment` classifier over a deterministic scenario matrix
+    # AND the committed fuzz corpora; any divergence in classification is a real
+    # recovery-classifier bug and reds the PR (same posture as the fuzz smoke,
+    # never gates an H1 dispatch). Fast (~6 s over the minimized corpus), so it
+    # runs per-PR. Needs `--features fuzzing` for the `recover_segment_classify`
+    # accessor (test-only, zero release impact).
+    name: differential (§14.9)
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - name: Install stable toolchain
+        uses: dtolnay/rust-toolchain@stable
+      - uses: Swatinem/rust-cache@v2
+      - name: differential reference-parser check
+        run: cargo test --features fuzzing --test differential
+
   dirfsync-presence:
     # M8 §14.4d Tier 1 (PRIMARY): the deterministic, FS-independent regression
     # guard for the roll-time directory fsync (§7.4 step 5). It straces the roll
diff --git a/CLAUDE.md b/CLAUDE.md
index c860e2d..0237a3c 100644
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -109,6 +109,7 @@ The entire value of this component is **correct behavior under crashes and fault
 
 ## Project status (keep this updated)
 
+- **LATEST (2026-07-02): M9 finish task 2 — §14.9 differential / reference-parser tester LANDED, green here.** New `tests/differential.rs` (`#![cfg(feature="fuzzing")]`): an **independent** reference segment parser — re-deriving the §5.2/§5.3/§8.2 constants and re-implementing the length-bound check, the **all-zero-header sentinel** rule (post issue #26), the CRC-validation ordering, the bounded tail-vs-corruption forward scan, and the sealed-vs-active distinction from raw bytes, calling **no** production parse code (only the shared `crc32c` primitive) — run against production via a new `#[cfg(feature="fuzzing")]` `recover_segment_classify` accessor with an **exact-match** oracle on the `SegClass` variant *and* offsets/`max_lsn`. Inputs: a deterministic **184-case scenario matrix** (clean runs, torn tails, interior corruption incl. the `rec_type→0` vector, reserved types, LSN gaps, physical truncation, within/beyond-scan-bound continuations, `len>max`, non-1 bases; active + sealed) **plus** the Task-1 regrown corpora as raw segment bodies (**1666 inputs**) — catches a classifier error **by construction** (two implementations disagreeing), the class of bug the #26 sentinel hole was. **Green: 184 + 1666 agree, ~6 s.** **Falsifiability shown:** injecting the pre-#26 naive `rec_type==0 ⇒ sentinel` rule into the reference makes the differential fire (`production=Truncated … reference=Clean`) on both the `torn-last-zero_rectype` scenario and real corpus entries, then reverted. **CI:** per-PR blocking `differential (§14.9)` job in `ci.yml` (`cargo test --features fuzzing --test differential`; a divergence reds the PR, never gates H1). **`src/` change is only the feature-gated accessor** (no public API widening, no second production parser; default `cargo build`/`cargo test` unaffected — the test compiles to nothing without `fuzzing`). Docs: §14.9 implemented, §14.11 per-PR row, §14.12 D4/D5/D10/D11 rows, §14.13 DoD bullet. **After this: M9 finish follow-ups 1+2 (interior rec_type→0 permanent seed + coverage trust-model) then 3 (DoD audit); then M9 is software-complete.**
 - **LATEST (2026-07-01): M9 finish task 1 — F1–F4 corpus regrow + `cmin` on the post-sentinel-fix format, and the §14.13 fuzz gate's N pinned at 24 CPU-hours/target.** The sentinel fix (`2b198e7`, all-zero-header) changed how `rec_type==0, crc≠0` is classified (sentinel → `Invalid` → `TornMidLog`/torn-tail — a path that did not exist pre-fix), so the pre-fix coverage maps were stale and any §14.13 CPU-hours must be counted on the **current** format. Regrew each target (`cargo +nightly fuzz run <t> -max_total_time≈300` on top of the existing corpus) then `cargo fuzz cmin`'d to the coverage-preserving set: **recovery 174→316, structure 130→129, decode 17→40, model 321→348** minimized entries; per-target coverage rose (**recovery 780→892, structure 561→592, model 798→839**); **zero crashes** during the regrow (artifacts dirs empty). **Pinned N** — the previously vacuous "N CPU-hours" is now **≥ 24 CPU-hours per target (96 total) accumulated since `2b198e7`**, with the "since the last format change" clock made explicit (a format change resets it + mandates this regrow) — written into the §14.13 fuzz row, the `fuzz.yml` header + in-run `::warning::` banner, and a new **regrow-log** section in `fuzz/README.md` recording the `2b198e7` regrow as precedent. **Does NOT discharge the gate** — it stays CONTINGENT/OPEN pending the 24 h/target run on a dedicated runner; this only makes the contingent statement non-vacuous and honestly-anchored. No `src/` change (corpus + docs + one workflow comment/banner only). Lands as PR #1 of the two M9-finish PRs (task 2 = §14.9 differential harness reuses this regrown corpus).
 - **LATEST (2026-07-01): M9 — CI-matrix tidy-up LANDED (§14.11); this is the LAST M9 build slice. M9 is now feature-complete, with only the standing owner/dedicated-runner *observations* open.** **Docs-only reconciliation** (no `src/`, no workflow behavior change — the 8 workflows already implemented the intended matrix; the spec had simply not caught up). Rewrote §14.11 into a **faithful index** of what actually runs: the **Per-PR (blocking)** row now enumerates the real `ci.yml` jobs — `rustfmt+clippy`, `build+test` (§14.1 vectors + §14.2 reduced proptest + §14.7 zero-alloc + `bench --no-run` + the §14.6 `!Sync` trybuild compile-fail + dir-lock/`Send` tests), `MSRV 1.85`, `Miri (codec subset)`, **`fuzz smoke (F1/F2/F3/F4)` (blocking — a crash reds the PR)**, `§14.4d dir-fsync presence (strace)` — plus the paths-filtered per-PR gates (`m8-macos.yml` H4 Half A, `lazyfs.yml`, `m8.yml`); the **Nightly (scheduled)** row names each workflow with its staggered cron (`bench.yml` 03:17 / `fuzz.yml` 04:17 / `m8-dmflakey.yml` 04:23 / `soak.yml` 05:17, all CONTINGENT + dispatch), and honestly flags that full-iteration §14.2/§14.3 and the §14.9 differential are **not yet** automated as nightly (covered at PR granularity by the reduced proptest + M6 `model_oracle`). Added the **FS-matrix honesty note** (per plan Slice 9): the FS matrix is meaningful only for the durability/metadata-fault gates (LazyFS/H3/§14.4d); the byte-level codec/CRC/recovery-classifier logic the §14.5 fuzz + §14.6 Miri lanes exercise is **filesystem-independent**, so those lanes are **not** multiplied across the matrix — over-claiming an "all-FS fuzz matrix" would be dishonest. **Honest close-out:** no long-run gate is marked green — the §14.13 fuzz row (N-CPU-hours) and the multi-hour soak stay OPEN-pending a dedicated runner; Miri clean, `!Sync`, and zero-alloc rows are DONE. `cargo fmt --check` / `clippy --all-targets -D warnings` / `cargo test` unaffected (docs-only). **M9 remaining: only the F1–F4 N-CPU-hour + multi-hour-soak release-gate observations on owner/dedicated hardware — no more in-session build work.**
 - **LATEST (2026-06-29): M9 — soak / endurance LANDED (§14.10), short run green here; the multi-hour gate stays CONTINGENT.** New `tests/soak.rs` (`#[ignore]`, env-driven `WAL_SOAK_SECONDS` default 3 / `WAL_SOAK_SEED` / optional `WAL_SOAK_EVIDENCE`): drives a **single long-lived `Wal`** through a weighted randomized loop — append (boundary-biased 0/1/8/`max_record_size`/random) / commit (timed into an `hdrhistogram`) / `checkpoint(durable)` / process-crash-recover (drop+reopen) — over a 4 KiB-segment/256 B-record config so rolls/splits/checkpoints fire constantly. After **every** recover it re-checks the §14.3 refinement envelope against an independent in-memory oracle: **D1/D3** (recovered `durable_lsn` ≥ committed watermark), **D8** (oldest ≤ authorized `up_to`+1, monotone), **D2/D6** (dense byte-identical replay `oldest..=durable` via `reader_from(0)`). Four resource monitors with bounded-growth gates: **fd** (`/proc/self/fd`, `peak ≤ baseline+32`), **disk** (a deterministic **per-checkpoint floor** — the soak always checkpoints to `durable_lsn`, so every sealed segment is superseded and exactly the active segment must remain right after `checkpoint(durable)`; a reclaim-N−1-of-N leak reds on cycle 1, not after 16 segments accrue — backstopped by a `peak ≤ 16×segment_size` runaway ceiling), **RSS** (`/proc/self/statm`×pagesize, `≤ baseline+64 MiB`; §8.5 — recovery materializes no payloads), **commit p999 ≤ 2 s**. Deterministic seeded LCG (no RNG dep) ⇒ reproduces from the seed; oracle self-prunes below `max_ckpt`/`oldest` so it never trips its own RSS watch. **Short run green here: `WAL_SOAK_SECONDS=4` ⇒ ~29 k ops / ~6.5 k commits / ~2 k checkpoints / ~2 k recoveries, fd 6→6, disk floor==1 every checkpoint, RSS +0.5 MiB, p999 sub-ms–7 ms.** **Falsifiability shown (honest form, per review)**: injecting a per-cycle leak (`deletable_prefix_len` reclaims N−1 of N) reds the floor on cycle 1 (`found 2 *.wal files`), then reverted — the real leak class, not a sub-working-set threshold. Wrapper `scripts/m9/soak.sh` (release build, scrapes the one-line JSON summary, re-emits a §5 evidence ledger via `scripts/m8/evidence.sh`, loud SHORT-vs-gate framing) + `.github/workflows/soak.yml` (schedule 05:17 + dispatch, **NOT per-PR**, contingent banner, uploads the evidence artifact). **No `src/` change** (public API only). **Honest framing (same stopgap as LazyFS/bench):** a SHORT run proves the driver/monitors/oracle work but is **not** the gate — the §14.13 soak is a **multi-hour** run on a dedicated runner with zero regression/violation. `cargo test --test soak` compiles, `clippy --test soak`, `shellcheck scripts/m9/soak.sh`, `actionlint soak.yml` clean. **Remaining M9:** CI-matrix tidy-up (§14.11); the F1–F4 N-CPU-hour + multi-hour-soak release-gate observation on a dedicated runner.
diff --git a/docs/wal_design_v6.md b/docs/wal_design_v6.md
index a93d47e..1f4ac55 100644
--- a/docs/wal_design_v6.md
+++ b/docs/wal_design_v6.md
@@ -617,14 +617,16 @@ Construct the specific resurrection hazard: write records, induce a torn tail su
 ### 14.9 Differential / reference testing (optional, high value)
 A deliberately slow, obviously-correct **reference parser** (separate code path, possibly another language) run alongside the production parser on the fuzz corpus and model-harness outputs. Any divergence in classification (valid / torn-tail / fatal-corruption / truncation offset) is a bug.
 
+*(**M9 — IMPLEMENTED** as `tests/differential.rs` (needs `--features fuzzing`; per-PR `differential (§14.9)` job in `ci.yml`). An **independent** reference segment parser — re-deriving the §5.2/§5.3/§8.2 constants and re-implementing the length-bound check, the all-zero-header sentinel rule (post issue #26), the CRC-validation ordering, the bounded tail-vs-corruption forward scan, and the sealed-vs-active distinction from raw bytes, calling **no** production parse code (only the shared `crc32c` primitive) — is run against the production classifier via the `#[cfg(feature="fuzzing")]` `recover_segment_classify` accessor, with an **exact-match** oracle on the `SegClass` variant *and* its offsets/`max_lsn`. Inputs: a deterministic **scenario matrix** (184 cases) enumerating every arm — clean runs, torn tails, interior corruption incl. the `rec_type→0` vector, reserved types, LSN gaps, physical truncation, within/beyond-scan-bound continuations, `len>max`, non-1 bases — for both active and sealed segments; **plus** the committed Task-1 regrown corpora (`fuzz/corpus/{recovery,structure,decode,model}`, 1666 inputs) as raw segment bodies. This catches a classifier error **by construction** (two implementations disagreeing), the class of bug the issue-#26 sentinel hole was. **Falsifiability shown:** injecting the pre-#26 naive `rec_type==0 ⇒ sentinel` rule into the reference makes the differential fire (`production=Truncated … reference=Clean`) on both the `torn-last-zero_rectype` scenario and real corpus entries — i.e. it *would* have caught the sentinel bug — then reverted. **Scope note:** the corpus is consumed as raw segment bodies rather than by re-decoding each fuzz target's `arbitrary` envelope (which would duplicate the generators); the envelope-specific deep states are covered by the scenario matrix, and the corpus pass adds real-adversarial-bytes breadth. A thin `#[cfg(feature="fuzzing")]` accessor is the only `src/` addition — no public API widening, no second production parser.)*
+
 ### 14.10 Soak / endurance
 Multi-hour randomized workload with periodic injected crashes+recoveries and checkpoints. Monitor: invariant violations, fd leaks, **disk-space leaks** (unreclaimed segments), memory growth, latency drift.
 
 *(**M9 — IMPLEMENTED** as `tests/soak.rs` (`#[ignore]`, env-driven `WAL_SOAK_SECONDS`/`WAL_SOAK_SEED`), driven by `scripts/m9/soak.sh` and run nightly/dispatch in `soak.yml`. It drives a **single long-lived `Wal`** through a weighted randomized loop — append (boundary-biased sizes 0/1/8/`max_record_size`/random), commit (timed into an `hdrhistogram`), `checkpoint(durable)`, and process-crash-recover (drop + reopen) — over a 4 KiB-segment/256 B-record config so rolls/splits/checkpoints fire constantly. After **every** recover it re-checks the §14.3 refinement envelope (D1/D3 durable ≥ committed watermark, D8 oldest ≤ authorized `up_to`+1 and monotone, D2/D6 dense byte-identical replay `oldest..=durable`) against an independent in-memory oracle, so a *correctness* regression under sustained load reds the run too — not just a leak. The four monitors are sampled every 128 ops with bounded-growth gates: **fd** (`/proc/self/fd`, `peak ≤ baseline+32`), **disk** (a deterministic **per-checkpoint floor** — because the soak always checkpoints to exactly `durable_lsn`, every sealed segment is fully superseded, so right after `checkpoint(durable)` exactly the active segment must remain (§9); a checkpoint that reclaims N−1 of N sealed segments reds on the **first** bad cycle, not after 16 accumulate — backstopped by a `peak ≤ 16×segment_size` ceiling for gross runaway between checkpoints), **RSS** (`/proc/self/statm`, `≤ baseline+64 MiB`; recovery materializes no payloads, §8.5), and **commit-latency** (p999 ≤ 2 s ceiling). Deterministic (seeded LCG, no RNG dep) so a failure reproduces from `WAL_SOAK_SEED`. **Honest framing (same stopgap as LazyFS/dm-flakey/bench):** a SHORT run (the per-dispatch default 300 s, and the in-session smoke) proves the driver/monitors/oracle work but is **NOT** the gate — the §14.13 soak gate is a **multi-hour** run with zero regression/violation; a short green run is CONTINGENT, a failure is a real bug. Falsifiability shown (honest form): injecting a per-cycle leak (`deletable_prefix_len` reclaims N−1 of N) reds the **floor** assertion on cycle 1 (`found 2 *.wal files`) — the real leak class, not a sub-working-set threshold; the oracle's refinement checks are the same ones the M6 harness demonstrated falsifiable. No `src/` change — the driver uses only the public API.)*
 
 ### 14.11 CI matrix
-- **Per-PR (fast, blocking).** The always-on `ci.yml` jobs (`push:main` + `pull_request` + dispatch): **`rustfmt + clippy`** (`-D warnings`, all targets); **`build + test`** = §14.1 codec/CRC vectors, §14.2 (reduced proptest) round-trip/property suites, §14.7 **zero-alloc assertion** (enforced) + `cargo bench --no-run` (benches must compile, can't bitrot), and the §14.6 **`!Sync` compile-fail** (`tests/ui.rs` trybuild, stable-only) + **directory-lock / `Send`** runtime tests — all under `cargo test`; **`MSRV (1.85)`** `cargo check --all-targets --locked`; **`Miri (codec subset)`** (§14.6, `record`/`crc`/`lsn`/`config` under `-Zmiri-strict-provenance` — the whole Miri-executable surface, since the file-backed paths use foreign syscalls Miri can't run); **`fuzz smoke (F1/F2/F3/F4)`** (§14.5, short bounded `cargo fuzz run` per target — **BLOCKING: a reproducible crash is a real D1–D11 bug and reds the PR**, distinct from the nightly time-boxed lane and never gating an H1 dispatch); **`§14.4d dir-fsync presence (strace)`** (M8 Tier-1, FS-independent syscall-presence guard). Plus the **paths-filtered** per-PR durability gates that only fire when their sources change: **§14.8 H4 Half A** *(`m8-macos.yml` on `macos-latest` — a macOS-only `F_FULLFSYNC`-routing regression is invisible to Linux PR CI because the `cfg(macos)` path does not compile there; `dtruss` Half B stays owner-run per #19)*, the **§14.4b LazyFS** gate *(`lazyfs.yml`, informational)*, and the **§12 fsync-fault poison** gate *(`m8.yml`)*.
-- **Nightly (scheduled, staggered off the `:00` rush + `workflow_dispatch`).** **§14.7 benchmarks + gates** *(`bench.yml`, 03:17 UTC; **informational** on hosted runners until a controlled/pinned-governor runner makes the §14.7 thresholds enforceable — same stopgap as the LazyFS gate)*; **§14.5 fuzz (time-boxed F1–F4)** *(`fuzz.yml`, 04:17 UTC; CONTINGENT, NOT the N-CPU-hour §14.13 gate — a crash still reds it)*; **§14.8 H3-physical + §14.4d** *(`m8-dmflakey.yml`, 04:23 UTC + push-to-main; hosted ubuntu VMs reach `dm-flakey`, so these are real gates there — ext4 hard with a source-confirmed block-layer EIO (#16) and a `drop_writes` positive control (#17), xfs/btrfs informational, **best-effort + loud skip** if a runner lacks dm-flakey)*; **§14.10 soak** *(`soak.yml`, 05:17 UTC; a short hosted slice is a CONTINGENT smoke, not the multi-hour gate)*. *(Still to be wired as nightly, not yet automated: full-iteration §14.2/§14.3 and the §14.9 differential — currently the reduced per-PR proptest + the M6 `model_oracle` suite cover these at PR granularity.)*
+- **Per-PR (fast, blocking).** The always-on `ci.yml` jobs (`push:main` + `pull_request` + dispatch): **`rustfmt + clippy`** (`-D warnings`, all targets); **`build + test`** = §14.1 codec/CRC vectors, §14.2 (reduced proptest) round-trip/property suites, §14.7 **zero-alloc assertion** (enforced) + `cargo bench --no-run` (benches must compile, can't bitrot), and the §14.6 **`!Sync` compile-fail** (`tests/ui.rs` trybuild, stable-only) + **directory-lock / `Send`** runtime tests — all under `cargo test`; **`MSRV (1.85)`** `cargo check --all-targets --locked`; **`Miri (codec subset)`** (§14.6, `record`/`crc`/`lsn`/`config` under `-Zmiri-strict-provenance` — the whole Miri-executable surface, since the file-backed paths use foreign syscalls Miri can't run); **`fuzz smoke (F1/F2/F3/F4)`** (§14.5, short bounded `cargo fuzz run` per target — **BLOCKING: a reproducible crash is a real D1–D11 bug and reds the PR**, distinct from the nightly time-boxed lane and never gating an H1 dispatch); **`differential (§14.9)`** (`cargo test --features fuzzing --test differential` — an independent reference parser vs the production classifier over a 184-case scenario matrix + the committed corpora; **BLOCKING: a classification divergence is a real recovery-classifier bug**, ~6 s, never gates an H1 dispatch); **`§14.4d dir-fsync presence (strace)`** (M8 Tier-1, FS-independent syscall-presence guard). Plus the **paths-filtered** per-PR durability gates that only fire when their sources change: **§14.8 H4 Half A** *(`m8-macos.yml` on `macos-latest` — a macOS-only `F_FULLFSYNC`-routing regression is invisible to Linux PR CI because the `cfg(macos)` path does not compile there; `dtruss` Half B stays owner-run per #19)*, the **§14.4b LazyFS** gate *(`lazyfs.yml`, informational)*, and the **§12 fsync-fault poison** gate *(`m8.yml`)*.
+- **Nightly (scheduled, staggered off the `:00` rush + `workflow_dispatch`).** **§14.7 benchmarks + gates** *(`bench.yml`, 03:17 UTC; **informational** on hosted runners until a controlled/pinned-governor runner makes the §14.7 thresholds enforceable — same stopgap as the LazyFS gate)*; **§14.5 fuzz (time-boxed F1–F4)** *(`fuzz.yml`, 04:17 UTC; CONTINGENT, NOT the N-CPU-hour §14.13 gate — a crash still reds it)*; **§14.8 H3-physical + §14.4d** *(`m8-dmflakey.yml`, 04:23 UTC + push-to-main; hosted ubuntu VMs reach `dm-flakey`, so these are real gates there — ext4 hard with a source-confirmed block-layer EIO (#16) and a `drop_writes` positive control (#17), xfs/btrfs informational, **best-effort + loud skip** if a runner lacks dm-flakey)*; **§14.10 soak** *(`soak.yml`, 05:17 UTC; a short hosted slice is a CONTINGENT smoke, not the multi-hour gate)*. *(The §14.9 differential runs **per-PR** (`differential (§14.9)` job, ~6 s) rather than nightly. Still to be wired as nightly, not yet automated: full-iteration §14.2/§14.3 — currently the reduced per-PR proptest + the M6 `model_oracle` suite cover these at PR granularity.)*
 - **Pre-release / manual:** §14.8 H1 power-pull on target hardware, §14.10 soak *(M9: `soak.yml`, schedule + manual, **contingent** until a multi-hour run on a dedicated runner — a hosted nightly slice is a smoke, not the gate; same stopgap as the LazyFS/bench informational lanes; each run uploads its §5 soak-evidence artifact)*. *(The nightly §14.8 dm-flakey/macOS gates above also accept `workflow_dispatch`; a manual run posts its §5 evidence to the tracking issue as the human sign-off, while the nightly cron stays artifact-only and surfaces regressions as a red build.)*
 - **OS matrix:** Linux (primary — sole platform for the §14.8 hardware-durability gate), macOS (dev/correctness — exercises `F_FULLFSYNC`; unit/property/fuzz only, not §14.8). Windows is out of scope for v1 (§8.3).
 - **FS matrix (Linux):** ext4, xfs, btrfs (CoW), tmpfs (logic only — never durability claims). **Scope, honestly:** the FS matrix is meaningful *only* for the durability/metadata-fault gates whose outcome depends on filesystem journaling and cache semantics — §14.4b LazyFS, §14.8 H3-physical, and the §14.4d negative control (which reproduces behaviorally only on a journal-less FS — see §14.4d). The **byte-level logic** — the record codec, CRC, the recovery classifier/bounded scan, and everything the §14.5 fuzz targets and the §14.6 Miri subset exercise — is **filesystem-independent** (pure in-memory parsing over arbitrary bytes, or plain `pwrite`/`fdatasync` with no FS-specific assumption), so those lanes run once on the runner's default FS and are **not** multiplied across the matrix. Over-claiming an "all-FS fuzz matrix" would be dishonest; the fuzz/codec guarantees hold by construction regardless of FS.
@@ -636,14 +638,14 @@ Multi-hour randomized workload with periodic injected crashes+recoveries and che
 | D1 Durability on commit | §14.3, §14.4b clear-cache, §14.4c, §14.8 H1 |
 | D2 Dense gap-free suffix | §14.2 P2/P4, §14.3, §14.4b torn-seq, §14.1 (contiguity), §14.8 |
 | D3 At-most-tail loss | §14.3, §14.4a, §14.4b, §14.4c, §14.8 H1 |
-| D4 Torn-tail truncation | §14.4b torn-op, §14.4e (i), §14.4f, §14.4g |
-| D5 Mid-log corruption fatal | §14.4e (ii)(iii)(v), sealed-segment cases |
+| D4 Torn-tail truncation | §14.4b torn-op, §14.4e (i), §14.4f, §14.4g, §14.9 differential (truncation offset) |
+| D5 Mid-log corruption fatal | §14.4e (ii)(iii)(v), sealed-segment cases, `recovery::rec_type_zeroed_interior_is_fatal_tornmidlog` (issue #26), §14.9 differential (interior-corruption classification) |
 | D6 Read-back fidelity | §14.2 P1, §14.3 |
 | D7 Idempotent recovery | §14.2 P6, §14.3 (no-mutation reopen) |
 | D8 Checkpoint safety | §14.1 math, §14.2 P5, §14.3 (terminal reopen), §14.4c |
 | D9 Crash-anywhere recoverable | §14.4a, §14.4c (incl. split-batch & roll) |
-| D10 No buried garbage / resurrection | §14.4g (incl. stale-valid-record case) |
-| D11 Bounded recovery parsing | §14.5 F1/F2/F3, §14.4f, §14.6 Miri, bounded-scan counter |
+| D10 No buried garbage / resurrection | §14.4g (incl. stale-valid-record case), §14.9 differential (beyond-bound continuation ⇒ torn tail, not resurrected) |
+| D11 Bounded recovery parsing | §14.5 F1/F2/F3/F4, §14.4f, §14.6 Miri, §14.9 differential, bounded-scan counter (structural drift guard) |
 | D12 Sealed-segment immutability | §14.4h sealed-segment-immutability, concurrent-tailer, backup round-trip |
 
 ### 14.13 Definition of Done (release gate)
@@ -652,6 +654,7 @@ Multi-hour randomized workload with periodic injected crashes+recoveries and che
 - §14.4d negative control catches the injected bug **and** the correct build passes. *(M8 — **satisfied by Tier-1.** **Tier-1 (primary) PASSES, deterministic + per-PR:** `scripts/m8/dirfsync-presence.sh` (in `ci.yml`) straces the roll path and asserts the correct build issues the roll-time directory `fsync` while `--features inject_no_dir_fsync` does not — verified green (`correct=5` dir-fsyncs vs `inject=1`). FS-independent syscall-presence regression guard; the row's satisfier. **Tier-2 (behavioral power-loss) — CLOSED as a documented negative result (PR #21, owner Fedora 43):** the synchronized mid-run cut (`dirfsync_cut_workload`, `dirfsync-negative <fs>`) blocks the workload with the new segment's dirent un-synced and cuts inside the window, yet the inject build recovers fully on **every** config tested — ext4/xfs/btrfs, journal-less ext4 (incl. `ext2`-format, serviced by the ext4 driver on modern kernels — standalone ext2 driver removed in Linux 6.9), and journaled ext4 `data=writeback` (the driver's weakest ordering). The dirent reaches disk via the file's own `fdatasync` everywhere; the earlier "ext2 block-adjacency" claim is **retracted** and the mechanism was not isolated. No readily-available Linux FS exposes it behaviorally ⇒ honest negative result, not a gap. **Tier-3 — ext4/xfs/btrfs (+ journal-less "ext2") INCONCLUSIVE-by-design**, never red on a masked miss. `fsync_dir` retained unconditionally as a POSIX-portability safeguard. Earlier "certified on ext4" was wrong; the harness loud-skips where dm-flakey is absent rather than fake green; the positive split+roll power-loss case passes under LazyFS in M4.)*
 - §14.4g resurrection test passes **and** is demonstrated to fail both (a) if zeroing-on-truncate is disabled and (b) if the invalidation is not durably synced (the power-loss-of-zeroing assertion).
 - Fuzzers F1–F4: **≥ 24 CPU-hours per target accumulated since `2b198e7` (the sentinel fix = the last on-disk-format change), zero outstanding crashes, bounded-scan counter never exceeded.** The "since the last format change" clause is load-bearing: a format change **resets this clock** and requires a corpus **regrow + `cargo fuzz cmin`** on the new format (the stale coverage map no longer reflects the classification the parser performs) — see `fuzz/README.md`. N is pinned at **24 CPU-hours/target** (96 total) — sized to the format's small, bounded surface; raise it if the format grows materially. *(**M9 in progress. F1 (recovery-parser) IMPLEMENTED** — `fuzz/fuzz_targets/recovery.rs`, primary surface the real `Wal::open` over an adversarial multi-segment directory, bounded-scan counter instrumented on the real scan loop and asserted against the shared `scan_bound` symbol (falsifiability demonstrated). Built + smoke-green (60 000 runs, zero crashes); CI is `fuzz.yml` (nightly/dispatch, time-boxed, contingent) + a blocking per-PR smoke in `ci.yml`. The **N-CPU-hour gate itself stays OPEN** — a hosted short slice does not meet it; carry until a dedicated runner accrues the hours. **Framing:** the "bounded-scan counter never exceeds the bound" clause is satisfied **structurally** (the loop window *is* `scan_bound`), so it is a drift/regression guard, not the headline — the substantive D11 proof is the crash-free / no-OOB / termination surface over adversarial inputs (the running fuzz). **F2 (single-record decoder) IMPLEMENTED** — `fuzz/fuzz_targets/decode.rs`, raw bytes × a boundary-biased `max_record_size` set, CRC-valid-seeded so the Record path is reached, bounds-soundness asserts, falsifiability shown (300 000 runs, zero crashes); same CI lanes. **F3 (structure-aware classifier) IMPLEMENTED** — `fuzz/fuzz_targets/structure.rs`, valid dense segment + one localized mutation driving `Wal::open`, with a sharp D4/D5 oracle (interior corruption fatal, last corruption truncates) + D6/D10 byte-identity + D7 idempotent reopen; falsifiability shown (forced "no continuation" ⇒ `D5: interior corruption returned Ok`); built + smoke-green (150 000 runs, zero crashes); same CI lanes. **F4 (op-script oracle) IMPLEMENTED** — `fuzz/fuzz_targets/model.rs` decodes fuzzer bytes into a `WalConfig` + weighted `Vec<Op>` and drives the M6 executor `tests/model/mod.rs::run` verbatim (`#[path]`, zero duplication), panicking on any D1/D2/D3/D6/D7/D8 breach; process-crash model only; falsifiability shown (seeded recovery loss ⇒ `D1/D3` panic); smoke-green (40 000 runs, zero crashes). **All four targets F1–F4 now exist**; what remains for the §14.13 fuzz row is the **24-CPU-hour/target observation** on a dedicated runner (the gate stays OPEN until then — a hosted short slice does not meet it). **Corpus regrown + `cargo fuzz cmin`'d on the post-`2b198e7` format** (the sentinel fix changed how `rec_type==0` is classified, staling the pre-fix coverage maps): recovery 174→316, structure 130→129, decode 17→40, model 321→348 minimized entries; per-target coverage rose (recovery 780→892, structure 561→592, model 798→839), zero crashes found during the regrow. So the clock legitimately starts at `2b198e7`; this makes the *contingent* statement non-vacuous but does **not** discharge the 24-hour gate.)*
+- Differential reference parser (§14.9): **DONE.** `tests/differential.rs` runs an independent, spec-derived reference classifier against the production `recover_segment` classifier with an **exact-match** oracle (variant + offset + `max_lsn`) over a 184-case scenario matrix and the committed Task-1 corpora (1666 inputs); per-PR `differential (§14.9)` job, ~6 s, a divergence reds the build. Falsifiability shown (the pre-#26 naive sentinel rule injected into the reference makes it fire). Corroborates the D4/D5/D10/D11 classification rows of §14.12. Only `src/` addition is the `#[cfg(feature="fuzzing")]` `recover_segment_classify` accessor (no public API widening, no second production parser).
 - §14.8 H1: ≥ M power-pull cycles on target hardware, zero acked-record loss. *(M8: the **harness + runbook are built** — `src/bin/power_pull_{workload,verify}.rs` + `scripts/m8/power-pull.sh`, with the off-box network side channel, send-strictly-after-`commit() Ok` ack-ordering, contiguous-watermark conservative verify, and the H2 vacuous-pass gate as a precondition; the mechanical chain was dry-run green on loopback. **OPEN-pending-owner-run** for the actual ≥50-cycle power-pull on real/cache-configured hardware (no cuttable target in the sandbox). H3 fsync-failure poison: the **§12 state machine RUNS green** via the LD_PRELOAD shim (`scripts/m8/fsync-fault.sh`); the **physical** dm-flakey half now runs **nightly + manual on hosted CI** (`m8-dmflakey.yml`, best-effort + loud skip) instead of owner-only. H4 macOS `F_FULLFSYNC` **Half A** (routing/smoke) now runs on **macOS CI** (`m8-macos.yml`); Half B (`dtruss` trace) stays owner-run (root + SIP). See `docs/m8-runbook.md`.)*
 - Zero-allocation assertion (§14.7) passes for append/commit and `Reader::next`. *(M7: PASSES — hardened to also prove no-roll in the measured window and to cover a `max_record_size` payload. The §14.7 benches + regression gate exist; **gate enforcement is OPEN-pending-controlled-runner** — informational on hosted CI per §14.11, a real gate on a pinned-governor runner.)*
 - Miri clean on covered suites. *(**M9 — DONE for the covered (FS-free) suites.** Per-PR `Miri (codec subset)` job (`ci.yml`) runs `record::`/`crc::`/`lsn::`/`config::` under `-Zmiri-strict-provenance`, green (25 tests, ~6 s). Scope is honest: zero `unsafe` ⇒ a regression guard, not a bug hunt; the file-backed paths use foreign syscalls Miri can't run (covered by the ASan fuzz targets). Not enabling `-Zmiri-symbolic-alignment-check` (the `crc32c` SW path trips it while alignment-correct at runtime).)*
diff --git a/src/lib.rs b/src/lib.rs
index 3c70ad3..68c14a1 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -148,4 +148,67 @@ pub mod fuzzing {
     pub fn encode_record_into(buf: &mut Vec<u8>, lsn: u64, payload: &[u8]) -> usize {
         crate::record::encode_into(buf, Lsn(lsn), payload)
     }
+
+    /// The production per-segment recovery **classification** (§8.2), flattened to
+    /// a plain value for the §14.9 differential tester (`tests/differential.rs`).
+    /// This is the exact classification surface an independent reference parser
+    /// must reproduce byte-for-byte; a divergence is a recovery-classifier bug.
+    ///
+    /// `Truncated`/`Clean` carry `max_lsn` and the truncation `offset`; the two
+    /// fatal arms carry the failing `offset`. `OtherErr` catches any error the
+    /// single-segment `recover_segment` is not expected to produce here (e.g. an
+    /// I/O error) so the differential can flag it rather than silently coerce it.
+    #[derive(Debug, Clone, Copy, PartialEq, Eq)]
+    pub enum SegClass {
+        /// Clean end of records; `max_lsn` is the highest valid LSN (`base-1` if
+        /// the segment is empty).
+        Clean { max_lsn: u64 },
+        /// Active-segment torn tail truncated at `offset`; `max_lsn` is the last
+        /// valid record before it.
+        Truncated { offset: u64, max_lsn: u64 },
+        /// Mid-log corruption: an invalid record with a valid record still ahead
+        /// within the bounded forward scan (active segment) — fatal (D5).
+        TornMidLog { offset: u64 },
+        /// Any invalid record in a sealed segment — fatal, no forward scan (D5).
+        Corruption { offset: u64 },
+        /// An error outside the classification surface (e.g. I/O). The differential
+        /// treats this as its own class so it is never silently equated.
+        OtherErr,
+    }
+
+    /// Run the **real** production `recover_segment` (§8.2) over one open segment
+    /// `file` and return its classification. Used only by the §14.9 differential
+    /// tester to compare production against an independent reference parser.
+    ///
+    /// NOTE: on a torn tail this performs the production durable zeroing of
+    /// `[offset, EOF)` (§8.2.1) as a side effect — so the differential must pass
+    /// production its **own** copy of the segment file and read the classification
+    /// from this return value, never re-derive it from the mutated file.
+    #[must_use]
+    pub fn recover_segment_classify(
+        file: &File,
+        base_lsn: u64,
+        is_active: bool,
+        segment_size: u64,
+        max_record_size: u32,
+    ) -> SegClass {
+        use crate::error::WalError;
+        use crate::wal::TailState;
+        let base = Lsn(base_lsn.max(1));
+        match crate::recovery::recover_segment(file, base, is_active, segment_size, max_record_size)
+        {
+            Ok(rec) => match rec.tail_state {
+                TailState::Clean => SegClass::Clean {
+                    max_lsn: rec.max_lsn.0,
+                },
+                TailState::TruncatedAt { offset, .. } => SegClass::Truncated {
+                    offset,
+                    max_lsn: rec.max_lsn.0,
+                },
+            },
+            Err(WalError::TornMidLog { offset, .. }) => SegClass::TornMidLog { offset },
+            Err(WalError::Corruption { offset, .. }) => SegClass::Corruption { offset },
+            Err(_) => SegClass::OtherErr,
+        }
+    }
 }
diff --git a/tests/differential.rs b/tests/differential.rs
new file mode 100644
index 0000000..531546e
--- /dev/null
+++ b/tests/differential.rs
@@ -0,0 +1,597 @@
+//! §14.9 — differential / reference-parser tester.
+//!
+//! A deliberately slow, obviously-correct **reference** segment parser, written
+//! from the on-disk-format spec (§5.2/§5.3/§8.2) in a **separate code path**, run
+//! alongside the production `recover_segment` classifier. **Any divergence in
+//! classification is a bug** — this is the one technique that catches a
+//! recovery-classifier error *by construction* (two independent implementations
+//! disagreeing) rather than probabilistically (a fuzzer happening to hit the
+//! trigger). It exists because the issue-#26 sentinel hole was exactly that class
+//! of bug: a classifier that mis-mapped one byte pattern.
+//!
+//! ## Independence (the whole point — see the module's hard rules)
+//!
+//! The reference below calls **no** production parse code: not `record::decode`,
+//! not `recover_segment`, not the `segment.rs` read helpers. It re-derives the
+//! constants and re-implements the length-bound check, the all-zero-header
+//! sentinel rule, the CRC-validation ordering, the bounded tail-vs-corruption
+//! forward scan, and the sealed-vs-active distinction from scratch, reading raw
+//! bytes. It **may** use the `crc32c` crate (via `open_wal::crc32c`) — that is a
+//! shared *dependency*, not shared *parse logic*; re-implementing CRC-32C would
+//! test the crate, not the parser.
+//!
+//! It implements the **post-issue-#26 contract**: the sentinel is an all-zero
+//! 20-byte header, and a `rec_type == 0` record with a non-zero CRC is `Invalid`
+//! (→ `TornMidLog` interior / torn-tail at the end), never a clean sentinel. A
+//! naive `rec_type == 0 ⇒ sentinel` reference would make the differential fire on
+//! the corpus — which would be catching the *reference's* bug (see the
+//! falsifiability note in the PR).
+//!
+//! ## Inputs
+//!
+//! 1. A deterministic **scenario matrix** (`scenario_cases`) that enumerates every
+//!    classification arm — clean runs, torn tails, interior corruption (incl. the
+//!    `rec_type→0` case), reserved types, LSN gaps, physical truncation, buried
+//!    stale records, garbage — for both active and sealed segments. This is the
+//!    exact-match oracle: production and reference must return the identical
+//!    `SegClass` variant *and* offsets/`max_lsn`.
+//! 2. The committed **fuzz corpora** (`fuzz/corpus/{recovery,structure}`, the
+//!    Task-1 regrown set) fed as raw segment *bodies* after a valid header — real
+//!    fuzzer-discovered byte patterns over which the two parsers must still agree.
+//!    (Scope: we consume the raw corpus bytes as bodies rather than re-decoding
+//!    each target's `arbitrary` envelope — duplicating those generators would be
+//!    fragile; the differential property "both parsers agree on these bytes" holds
+//!    regardless of how the bytes were originally produced. The envelope-specific
+//!    deep states are covered exhaustively by the scenario matrix instead.)
+//!
+//! Requires the `fuzzing` feature (for the `recover_segment_classify` accessor):
+//! `cargo test --features fuzzing --test differential`.
+#![cfg(feature = "fuzzing")]
+
+use std::fs::OpenOptions;
+use std::os::unix::fs::FileExt;
+use std::path::Path;
+
+use open_wal::crc32c;
+use open_wal::fuzzing::{self, SegClass};
+
+// ---- constants, re-derived independently from §5.2/§5.3 (NOT imported) ----
+const HEADER_SIZE: usize = 64; // §5.2 segment header
+const REC_HEADER: usize = 20; // §5.3 record header
+const CRC_OFF: usize = 0;
+const LEN_OFF: usize = 4;
+const LSN_OFF: usize = 8;
+const REC_TYPE_OFF: usize = 16;
+const REC_TYPE_FULL: u8 = 1;
+
+/// Padding after a `payload_len`-byte payload to the next 8-byte boundary (§5.3):
+/// `pad = (8 − ((20 + payload_len) mod 8)) mod 8`. Re-derived here.
+fn ref_padding(payload_len: u64) -> u64 {
+    (8 - ((REC_HEADER as u64 + payload_len) % 8)) % 8
+}
+
+/// The bounded forward-scan distance (§8.2 step 5): the largest frame a single
+/// record can occupy — `max_record_size` payload + 20-byte header + up to 7
+/// padding + 1. Re-derived independently (production hoists the same value into
+/// `recovery::scan_bound`; we do NOT import it — a differential that shared the
+/// constant would not be independent).
+fn ref_scan_bound(max_record_size: u32) -> u64 {
+    u64::from(max_record_size) + 28
+}
+
+/// Outcome of reading one candidate record at an offset (mirrors, independently,
+/// `segment::read_record_at`'s three-way split).
+enum RScan {
+    Record { lsn: u64, framed: u64 },
+    CleanEnd,
+    Invalid,
+}
+
+/// Independent, read-only reimplementation of `segment::read_record_at` (§8.2
+/// record-level checks). `bytes` is the physical file content; `segment_size` is
+/// the logical bound. A read that runs past `bytes.len()` models a short physical
+/// read (a file truncated below `segment_size`, §14.4f) ⇒ `Invalid`.
+fn ref_read_record_at(bytes: &[u8], offset: u64, segment_size: u64, max_record_size: u32) -> RScan {
+    let remaining = segment_size.saturating_sub(offset);
+    // §8.2 step 1: fewer than a header's worth of logical space left ⇒ clean end.
+    if remaining < REC_HEADER as u64 {
+        return RScan::CleanEnd;
+    }
+    let off = offset as usize;
+    // Physical header read: a short read (truncated file) is a candidate boundary.
+    let header = match bytes.get(off..off + REC_HEADER) {
+        Some(h) => h,
+        None => return RScan::Invalid,
+    };
+    // §8.2 step 1: the end-of-records sentinel is an ALL-ZERO 20-byte header — NOT
+    // `rec_type == 0` alone (issue #26). A `rec_type == 0` record with any non-zero
+    // header byte falls through to the CRC check below and is `Invalid`.
+    if header.iter().all(|&b| b == 0) {
+        return RScan::CleanEnd;
+    }
+    // Length bound BEFORE touching payload (§5.3 / D11): caps `length` at
+    // `max_record_size`, so the framed size below cannot be adversarially huge.
+    let length = u32::from_le_bytes(header[LEN_OFF..LEN_OFF + 4].try_into().unwrap());
+    if length > max_record_size {
+        return RScan::Invalid;
+    }
+    let framed = REC_HEADER as u64 + u64::from(length) + ref_padding(u64::from(length));
+    // Framed record must fit the logical remaining space, else short/torn tail.
+    if framed > remaining {
+        return RScan::Invalid;
+    }
+    // Physical payload+padding read: a short read is again a truncated file.
+    let full = match bytes.get(off..off + framed as usize) {
+        Some(f) => f,
+        None => return RScan::Invalid,
+    };
+    // CRC-32C over [4, framed): header tail + payload + padding (§5.3). Using the
+    // shared crc32c crate is sanctioned — it is the checksum primitive, not parse
+    // logic.
+    let stored = u32::from_le_bytes(full[CRC_OFF..CRC_OFF + 4].try_into().unwrap());
+    if crc32c(&full[LEN_OFF..framed as usize]) != stored {
+        return RScan::Invalid;
+    }
+    // CRC is intact ⇒ the bytes are genuine; a non-Full type is then a real
+    // reserved/unknown record (UnknownRecType), still `Invalid` to recovery.
+    if full[REC_TYPE_OFF] != REC_TYPE_FULL {
+        return RScan::Invalid;
+    }
+    let lsn = u64::from_le_bytes(full[LSN_OFF..LSN_OFF + 8].try_into().unwrap());
+    RScan::Record { lsn, framed }
+}
+
+/// Independent reimplementation of the §8.2 bounded forward scan: from `x + 8`,
+/// step 8 bytes at a time up to `x + 8 + bound` (inclusive), looking for a
+/// structurally valid record that *continues the log* (`lsn >= expected`, the
+/// v6.1 corrected condition). Read-only — it does not zero anything (the
+/// classification of a single pass does not depend on the durable zeroing, which
+/// only affects a *later* recovery's idempotence).
+fn ref_forward_scan_finds_valid(
+    bytes: &[u8],
+    x: u64,
+    expected: u64,
+    segment_size: u64,
+    max_record_size: u32,
+) -> bool {
+    let bound = ref_scan_bound(max_record_size);
+    let start = x.saturating_add(8);
+    let end = start.saturating_add(bound);
+    let mut p = start;
+    while p <= end {
+        if let RScan::Record { lsn, .. } =
+            ref_read_record_at(bytes, p, segment_size, max_record_size)
+        {
+            if lsn >= expected {
+                return true;
+            }
+        }
+        p += 8;
+    }
+    false
+}
+
+/// The independent reference classifier (mirrors `recovery::recover_segment` +
+/// `classify`, from scratch). Returns the same `SegClass` the production accessor
+/// returns, so a divergence is a plain `assert_eq!` failure.
+fn reference_classify(
+    bytes: &[u8],
+    base_lsn: u64,
+    is_active: bool,
+    segment_size: u64,
+    max_record_size: u32,
+) -> SegClass {
+    // Production clamps an out-of-range base to the lowest legal value (§5.2:
+    // Lsn(0) is the reserved sentinel); mirror it so `base - 1` cannot underflow.
+    let base = base_lsn.max(1);
+    let mut offset = HEADER_SIZE as u64;
+    let mut expected = base;
+    let mut last_valid = base - 1; // base-1: empty segment ⇒ Clean{base-1} (§8.1)
+
+    loop {
+        match ref_read_record_at(bytes, offset, segment_size, max_record_size) {
+            RScan::Record { lsn, framed } => {
+                if lsn != expected {
+                    // A structurally valid record with the wrong LSN is invalid at
+                    // this offset (§8.2 step 4) — classify tail vs corruption.
+                    return ref_classify_boundary(
+                        bytes,
+                        base,
+                        is_active,
+                        segment_size,
+                        max_record_size,
+                        offset,
+                        expected,
+                        last_valid,
+                    );
+                }
+                last_valid = lsn;
+                offset += framed;
+                expected = lsn + 1;
+            }
+            RScan::CleanEnd => {
+                return SegClass::Clean {
+                    max_lsn: last_valid,
+                };
+            }
+            RScan::Invalid => {
+                return ref_classify_boundary(
+                    bytes,
+                    base,
+                    is_active,
+                    segment_size,
+                    max_record_size,
+                    offset,
+                    expected,
+                    last_valid,
+                );
+            }
+        }
+    }
+}
+
+/// Classify an invalid record at offset `x` (§8.2 step 5), independently.
+#[allow(clippy::too_many_arguments)]
+fn ref_classify_boundary(
+    bytes: &[u8],
+    _base: u64,
+    is_active: bool,
+    segment_size: u64,
+    max_record_size: u32,
+    x: u64,
+    expected: u64,
+    last_valid: u64,
+) -> SegClass {
+    if !is_active {
+        // A sealed segment is fully synced before the next segment exists (§7.3):
+        // no torn tail, any invalid record is fatal corruption. No forward scan.
+        return SegClass::Corruption { offset: x };
+    }
+    if ref_forward_scan_finds_valid(bytes, x, expected, segment_size, max_record_size) {
+        // A genuine acked record after the gap ⇒ truncating would drop it (D5).
+        SegClass::TornMidLog { offset: x }
+    } else {
+        // Torn tail: truncate at x (production also durably zeroes [x, EOF)).
+        SegClass::Truncated {
+            offset: x,
+            max_lsn: last_valid,
+        }
+    }
+}
+
+// -------------------------------------------------------------------------
+// Harness: write bytes to a real file, run BOTH parsers, assert identical.
+// -------------------------------------------------------------------------
+
+struct Harness {
+    dir: tempfile::TempDir,
+    counter: usize,
+    checked: usize,
+}
+
+impl Harness {
+    fn new() -> Self {
+        Harness {
+            dir: tempfile::tempdir().expect("tempdir"),
+            counter: 0,
+            checked: 0,
+        }
+    }
+
+    /// Run production and reference on the same `bytes` and assert they classify
+    /// identically. Production gets its OWN file (it may durably zero a torn tail,
+    /// §8.2.1); the reference reads the pristine in-memory bytes.
+    fn check(&mut self, label: &str, bytes: &[u8], base: u64, is_active: bool, seg: u64, max: u32) {
+        self.counter += 1;
+        let path = self.dir.path().join(format!("case-{}.bin", self.counter));
+        let file = OpenOptions::new()
+            .read(true)
+            .write(true)
+            .create(true)
+            .truncate(true)
+            .open(&path)
+            .expect("open case file");
+        file.write_all_at(bytes, 0).expect("write case bytes");
+        file.sync_data().ok();
+
+        let prod = fuzzing::recover_segment_classify(&file, base, is_active, seg, max);
+        let reference = reference_classify(bytes, base, is_active, seg, max);
+        assert_eq!(
+            prod, reference,
+            "DIVERGENCE [{label}] active={is_active} base={base} seg={seg} max={max}: \
+             production={prod:?} reference={reference:?}"
+        );
+        self.checked += 1;
+    }
+}
+
+// -------------------------------------------------------------------------
+// Segment builders (use the production encoders to lay valid bytes; corruption
+// is applied afterward, so the reference and production see identical inputs).
+// -------------------------------------------------------------------------
+
+fn framed_size(payload_len: usize) -> usize {
+    REC_HEADER + payload_len + ref_padding(payload_len as u64) as usize
+}
+
+/// Build `header + dense records` for `base`, returning the bytes and the
+/// absolute offset of each record.
+fn build_segment(base: u64, payloads: &[&[u8]]) -> (Vec<u8>, Vec<usize>) {
+    let mut bytes = fuzzing::segment_header_bytes(base);
+    let mut offsets = Vec::new();
+    for (i, p) in payloads.iter().enumerate() {
+        offsets.push(bytes.len());
+        fuzzing::encode_record_into(&mut bytes, base + i as u64, p);
+    }
+    (bytes, offsets)
+}
+
+/// Recompute a record's CRC over [4, framed) in place (for reserved-type cases).
+fn refix_crc(bytes: &mut [u8], off: usize, framed: usize) {
+    let crc = crc32c(&bytes[off + 4..off + framed]);
+    bytes[off..off + 4].copy_from_slice(&crc.to_le_bytes());
+}
+
+/// The deterministic scenario matrix — one closure per case that returns
+/// `(label, bytes, base, seg, max)`; the harness runs each for active AND sealed.
+fn scenario_cases(h: &mut Harness) {
+    let configs: &[(u64, u32)] = &[(4096, 256), (65536, 4096)];
+
+    for &(seg, max) in configs {
+        let base = 1u64;
+        let pcap = (max as usize).min(48);
+        let p = |n: usize| vec![0xABu8; n.min(pcap)];
+        let sizes = [0usize, 1, 7, 8, 20, pcap];
+
+        // 1. Empty segment (header only, padded with sentinel zeros to seg).
+        {
+            let mut bytes = fuzzing::segment_header_bytes(base);
+            bytes.resize(seg as usize, 0);
+            for &active in &[true, false] {
+                h.check("empty", &bytes, base, active, seg, max);
+            }
+        }
+
+        // 2. Clean dense runs of k records, various payload sizes, padded to seg.
+        for k in 1..=5usize {
+            for &sz in &sizes {
+                let pl: Vec<Vec<u8>> = (0..k).map(|_| p(sz)).collect();
+                let refs: Vec<&[u8]> = pl.iter().map(|v| v.as_slice()).collect();
+                let (mut bytes, _offs) = build_segment(base, &refs);
+                if bytes.len() > seg as usize {
+                    continue;
+                }
+                bytes.resize(seg as usize, 0);
+                for &active in &[true, false] {
+                    h.check("clean-run", &bytes, base, active, seg, max);
+                }
+            }
+        }
+
+        // 3. Torn/invalid LAST record, several corruption kinds.
+        //    active ⇒ Truncated (or TornMidLog if a continuation is planted);
+        //    sealed ⇒ Corruption.
+        for kind in [
+            "flip_crc",
+            "zero_rectype",
+            "extend_len",
+            "reserved_type",
+            "flip_pad",
+        ] {
+            let pl = [p(8), p(8), p(8)];
+            let refs: Vec<&[u8]> = pl.iter().map(|v| v.as_slice()).collect();
+            let (mut bytes, offs) = build_segment(base, &refs);
+            let last = *offs.last().unwrap();
+            let framed = framed_size(8);
+            match kind {
+                "flip_crc" => bytes[last] ^= 0xFF,
+                "zero_rectype" => bytes[last + REC_TYPE_OFF] = 0, // issue #26 vector
+                "extend_len" => {
+                    let nl = 8u32.wrapping_add(8);
+                    bytes[last + 4..last + 8].copy_from_slice(&nl.to_le_bytes());
+                }
+                "reserved_type" => {
+                    bytes[last + REC_TYPE_OFF] = 2;
+                    refix_crc(&mut bytes, last, framed);
+                }
+                "flip_pad" => {
+                    // padding byte (payload 8 ⇒ framed 32 ⇒ 4 pad bytes at 28..32)
+                    bytes[last + REC_HEADER + 8] ^= 0xFF;
+                }
+                _ => unreachable!(),
+            }
+            bytes.resize(seg as usize, 0);
+            for &active in &[true, false] {
+                h.check(&format!("torn-last-{kind}"), &bytes, base, active, seg, max);
+            }
+        }
+
+        // 4. Interior corruption (record 1 of 3 corrupt; record 2 valid after it).
+        //    active ⇒ TornMidLog; sealed ⇒ Corruption. Covers the issue-#26
+        //    interior rec_type→0 vector explicitly.
+        for kind in ["flip_crc", "zero_rectype"] {
+            let pl = [p(8), p(8), p(8)];
+            let refs: Vec<&[u8]> = pl.iter().map(|v| v.as_slice()).collect();
+            let (mut bytes, offs) = build_segment(base, &refs);
+            let mid = offs[1];
+            match kind {
+                "flip_crc" => bytes[mid] ^= 0xFF,
+                "zero_rectype" => bytes[mid + REC_TYPE_OFF] = 0,
+                _ => unreachable!(),
+            }
+            bytes.resize(seg as usize, 0);
+            for &active in &[true, false] {
+                h.check(&format!("interior-{kind}"), &bytes, base, active, seg, max);
+            }
+        }
+
+        // 5. LSN gap: a structurally valid record with a skipped LSN in the middle.
+        {
+            let (mut bytes, offs) = build_segment(base, &[&p(8), &p(8)]);
+            // Overwrite record 2 with a valid record whose LSN is base+5 (a gap).
+            let mut rec = fuzzing::segment_header_bytes(base); // scratch, unused header
+            rec.clear();
+            fuzzing::encode_record_into(&mut rec, base + 5, &p(8));
+            let at = offs[1];
+            bytes[at..at + rec.len()].copy_from_slice(&rec);
+            bytes.resize(seg as usize, 0);
+            for &active in &[true, false] {
+                h.check("lsn-gap", &bytes, base, active, seg, max);
+            }
+        }
+
+        // 6. Sentinel (all-zero header) mid-run ⇒ Clean at that offset.
+        {
+            let (mut bytes, offs) = build_segment(base, &[&p(8), &p(8)]);
+            let at = offs[1];
+            for b in &mut bytes[at..at + REC_HEADER] {
+                *b = 0;
+            }
+            bytes.resize(seg as usize, 0);
+            for &active in &[true, false] {
+                h.check("mid-sentinel", &bytes, base, active, seg, max);
+            }
+        }
+
+        // 7. Physically truncated file mid-last-record (short read ⇒ Invalid).
+        {
+            let (bytes, offs) = build_segment(base, &[&p(8), &p(8), &p(8)]);
+            let last = *offs.last().unwrap();
+            let cut = last + REC_HEADER + 2; // mid-way through the last record
+            let short = bytes[..cut.min(bytes.len())].to_vec();
+            for &active in &[true, false] {
+                h.check("phys-truncated", &short, base, active, seg, max);
+            }
+        }
+
+        // 8. Interior torn tail with a genuine continuation just WITHIN the bound
+        //    (active ⇒ TornMidLog) and one just BEYOND it (active ⇒ Truncated).
+        for within in [true, false] {
+            let (mut bytes, offs) = build_segment(base, &[&p(8)]);
+            let x = offs[0] + framed_size(8); // offset just past record 1 (expected base+1)
+            // A torn record at x (bad CRC).
+            let mut torn = Vec::new();
+            fuzzing::encode_record_into(&mut torn, base + 1, &p(4));
+            torn[0] ^= 0xFF;
+            if x + torn.len() <= seg as usize {
+                bytes.resize(x, 0);
+                bytes.extend_from_slice(&torn);
+            }
+            // Plant a valid continuation (lsn base+1) within/beyond the scan bound.
+            let bound = ref_scan_bound(max);
+            let end = (x as u64) + 8 + bound;
+            let cont_off = if within {
+                ((end / 8) * 8) as usize // largest 8-aligned start <= end
+            } else {
+                (((end / 8) * 8) + 8) as usize // first strictly beyond
+            };
+            let mut cont = Vec::new();
+            fuzzing::encode_record_into(&mut cont, base + 1, &p(8));
+            let needed = cont_off + cont.len();
+            if needed <= seg as usize {
+                if bytes.len() < needed {
+                    bytes.resize(needed, 0);
+                }
+                bytes[cont_off..cont_off + cont.len()].copy_from_slice(&cont);
+                bytes.resize(seg as usize, 0);
+                for &active in &[true, false] {
+                    let label = if within {
+                        "cont-within-bound"
+                    } else {
+                        "cont-beyond-bound"
+                    };
+                    h.check(label, &bytes, base, active, seg, max);
+                }
+            }
+        }
+
+        // 9. Reserved rec_type on record 1 of 2 (CRC fixed) ⇒ UnknownRecType.
+        //    active ⇒ TornMidLog (valid record 2 follows); sealed ⇒ Corruption.
+        {
+            let (mut bytes, offs) = build_segment(base, &[&p(8), &p(8)]);
+            let at = offs[0];
+            bytes[at + REC_TYPE_OFF] = 3;
+            refix_crc(&mut bytes, at, framed_size(8));
+            bytes.resize(seg as usize, 0);
+            for &active in &[true, false] {
+                h.check("reserved-interior", &bytes, base, active, seg, max);
+            }
+        }
+
+        // 10. length > max_record_size at the first record ⇒ Invalid boundary.
+        {
+            let (mut bytes, offs) = build_segment(base, &[&p(8), &p(8)]);
+            let at = offs[0];
+            let huge = max.wrapping_add(1);
+            bytes[at + 4..at + 8].copy_from_slice(&huge.to_le_bytes());
+            bytes.resize(seg as usize, 0);
+            for &active in &[true, false] {
+                h.check("len-over-max", &bytes, base, active, seg, max);
+            }
+        }
+
+        // 11. A non-1 base (offsets/max_lsn must track it) with a torn tail.
+        {
+            let b2 = 1000u64;
+            let (mut bytes, offs) = build_segment(b2, &[&p(8), &p(8)]);
+            let last = *offs.last().unwrap();
+            bytes[last] ^= 0xFF;
+            bytes.resize(seg as usize, 0);
+            for &active in &[true, false] {
+                h.check("nonone-base-torn", &bytes, b2, active, seg, max);
+            }
+        }
+    }
+}
+
+#[test]
+fn differential_scenario_matrix() {
+    let mut h = Harness::new();
+    scenario_cases(&mut h);
+    assert!(
+        h.checked > 100,
+        "expected a broad scenario matrix, ran {}",
+        h.checked
+    );
+    eprintln!("differential scenario matrix: {} cases agreed", h.checked);
+}
+
+#[test]
+fn differential_over_fuzz_corpora() {
+    // The committed (Task-1 regrown) corpora, consumed as raw segment bodies.
+    // Config chosen so a body up to ~64 KiB fits after the header.
+    const SEG: u64 = 65536;
+    const MAX: u32 = 4096;
+    let mut h = Harness::new();
+
+    for sub in ["recovery", "structure", "decode", "model"] {
+        let dir = Path::new(env!("CARGO_MANIFEST_DIR"))
+            .join("fuzz/corpus")
+            .join(sub);
+        let entries = match std::fs::read_dir(&dir) {
+            Ok(e) => e,
+            Err(_) => continue, // corpus dir absent in some checkouts — skip, not fail
+        };
+        for entry in entries.flatten() {
+            let raw = match std::fs::read(entry.path()) {
+                Ok(b) => b,
+                Err(_) => continue,
+            };
+            // Build a valid header + the corpus bytes as the record body, capped
+            // to the segment body capacity. Both parsers see identical bytes.
+            let cap = (SEG as usize) - HEADER_SIZE;
+            let body = &raw[..raw.len().min(cap)];
+            let mut bytes = fuzzing::segment_header_bytes(1);
+            bytes.extend_from_slice(body);
+            for &active in &[true, false] {
+                h.check(&format!("corpus/{sub}"), &bytes, 1, active, SEG, MAX);
+            }
+        }
+    }
+    eprintln!(
+        "differential over fuzz corpora: {} inputs agreed",
+        h.checked
+    );
+    // Not asserting a minimum count: a fresh checkout may have a thin corpus. The
+    // scenario matrix carries the exact-match coverage; this pass adds breadth.
+}