From fbc7c95fbf574fced685809cac349e493745cd35 Mon Sep 17 00:00:00 2001 From: Claude Date: Thu, 2 Jul 2026 11:00:13 +0000 Subject: [PATCH] =?UTF-8?q?M9=20finish=20(2/2):=20=C2=A714.9=20differentia?= =?UTF-8?q?l=20/=20reference-parser=20tester?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add tests/differential.rs: an independent, spec-derived reference segment parser run alongside the production recover_segment classifier, with an exact-match oracle on classification. Catches a recovery-classifier error by construction (two implementations disagreeing) rather than probabilistically — the class of bug the issue #26 sentinel hole was. The reference calls no production parse code (not record::decode, not recover_segment, not the segment.rs helpers): it re-derives the §5.2/§5.3/§8.2 constants and re-implements the length-bound check, the all-zero-header sentinel rule (post #26), the CRC-validation ordering, the bounded tail-vs-corruption forward scan, and the sealed-vs-active distinction from raw bytes. It uses only the shared crc32c primitive (a dependency, not parse logic). Inputs: a deterministic 184-case scenario matrix enumerating every arm (clean runs, torn tails, interior corruption incl. the rec_type→0 vector, reserved types, LSN gaps, physical truncation, within/beyond-scan-bound continuations, len>max, non-1 bases; active + sealed) PLUS the Task-1 regrown corpora as raw segment bodies (1666 inputs). Both parsers must return the identical SegClass variant AND offsets/max_lsn. Green: 184 + 1666 agree, ~6 s. Falsifiability shown: injecting the pre-#26 naive rec_type==0 ⇒ sentinel rule into the reference makes the differential fire (production=Truncated vs reference=Clean) on the torn-last-zero_rectype scenario and real corpus entries — i.e. it would have caught the sentinel bug — then reverted. The only src/ change is a thin #[cfg(feature="fuzzing")] recover_segment_classify accessor in lib.rs (no public API widening, no second production parser). The test is #![cfg(feature="fuzzing")], so default cargo build/test are unaffected (zero release impact). CI: per-PR blocking `differential (§14.9)` job in ci.yml (a divergence reds the PR, never gates H1). Docs: §14.9 implemented, §14.11 per-PR row, §14.12 D4/D5/D10/D11 rows, §14.13 DoD bullet. Co-Authored-By: Claude Opus 4.8 Claude-Session: https://claude.ai/code/session_01Rpbwt9JT56hQvVXiqTS131 --- .github/workflows/ci.yml | 19 ++ CLAUDE.md | 1 + docs/wal_design_v6.md | 15 +- src/lib.rs | 63 +++++ tests/differential.rs | 597 +++++++++++++++++++++++++++++++++++++++ 5 files changed, 689 insertions(+), 6 deletions(-) create mode 100644 tests/differential.rs diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index a8ec6b5..ce0726d 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -139,6 +139,25 @@ jobs: run: | cargo +nightly miri test --lib -- record:: crc:: lsn:: config:: + differential: + # §14.9 — differential / reference-parser tester. An independent, spec-derived + # reference segment parser (`tests/differential.rs`) is run alongside the + # production `recover_segment` classifier over a deterministic scenario matrix + # AND the committed fuzz corpora; any divergence in classification is a real + # recovery-classifier bug and reds the PR (same posture as the fuzz smoke, + # never gates an H1 dispatch). Fast (~6 s over the minimized corpus), so it + # runs per-PR. Needs `--features fuzzing` for the `recover_segment_classify` + # accessor (test-only, zero release impact). + name: differential (§14.9) + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - name: Install stable toolchain + uses: dtolnay/rust-toolchain@stable + - uses: Swatinem/rust-cache@v2 + - name: differential reference-parser check + run: cargo test --features fuzzing --test differential + dirfsync-presence: # M8 §14.4d Tier 1 (PRIMARY): the deterministic, FS-independent regression # guard for the roll-time directory fsync (§7.4 step 5). It straces the roll diff --git a/CLAUDE.md b/CLAUDE.md index c860e2d..0237a3c 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -109,6 +109,7 @@ The entire value of this component is **correct behavior under crashes and fault ## Project status (keep this updated) +- **LATEST (2026-07-02): M9 finish task 2 — §14.9 differential / reference-parser tester LANDED, green here.** New `tests/differential.rs` (`#![cfg(feature="fuzzing")]`): an **independent** reference segment parser — re-deriving the §5.2/§5.3/§8.2 constants and re-implementing the length-bound check, the **all-zero-header sentinel** rule (post issue #26), the CRC-validation ordering, the bounded tail-vs-corruption forward scan, and the sealed-vs-active distinction from raw bytes, calling **no** production parse code (only the shared `crc32c` primitive) — run against production via a new `#[cfg(feature="fuzzing")]` `recover_segment_classify` accessor with an **exact-match** oracle on the `SegClass` variant *and* offsets/`max_lsn`. Inputs: a deterministic **184-case scenario matrix** (clean runs, torn tails, interior corruption incl. the `rec_type→0` vector, reserved types, LSN gaps, physical truncation, within/beyond-scan-bound continuations, `len>max`, non-1 bases; active + sealed) **plus** the Task-1 regrown corpora as raw segment bodies (**1666 inputs**) — catches a classifier error **by construction** (two implementations disagreeing), the class of bug the #26 sentinel hole was. **Green: 184 + 1666 agree, ~6 s.** **Falsifiability shown:** injecting the pre-#26 naive `rec_type==0 ⇒ sentinel` rule into the reference makes the differential fire (`production=Truncated … reference=Clean`) on both the `torn-last-zero_rectype` scenario and real corpus entries, then reverted. **CI:** per-PR blocking `differential (§14.9)` job in `ci.yml` (`cargo test --features fuzzing --test differential`; a divergence reds the PR, never gates H1). **`src/` change is only the feature-gated accessor** (no public API widening, no second production parser; default `cargo build`/`cargo test` unaffected — the test compiles to nothing without `fuzzing`). Docs: §14.9 implemented, §14.11 per-PR row, §14.12 D4/D5/D10/D11 rows, §14.13 DoD bullet. **After this: M9 finish follow-ups 1+2 (interior rec_type→0 permanent seed + coverage trust-model) then 3 (DoD audit); then M9 is software-complete.** - **LATEST (2026-07-01): M9 finish task 1 — F1–F4 corpus regrow + `cmin` on the post-sentinel-fix format, and the §14.13 fuzz gate's N pinned at 24 CPU-hours/target.** The sentinel fix (`2b198e7`, all-zero-header) changed how `rec_type==0, crc≠0` is classified (sentinel → `Invalid` → `TornMidLog`/torn-tail — a path that did not exist pre-fix), so the pre-fix coverage maps were stale and any §14.13 CPU-hours must be counted on the **current** format. Regrew each target (`cargo +nightly fuzz run -max_total_time≈300` on top of the existing corpus) then `cargo fuzz cmin`'d to the coverage-preserving set: **recovery 174→316, structure 130→129, decode 17→40, model 321→348** minimized entries; per-target coverage rose (**recovery 780→892, structure 561→592, model 798→839**); **zero crashes** during the regrow (artifacts dirs empty). **Pinned N** — the previously vacuous "N CPU-hours" is now **≥ 24 CPU-hours per target (96 total) accumulated since `2b198e7`**, with the "since the last format change" clock made explicit (a format change resets it + mandates this regrow) — written into the §14.13 fuzz row, the `fuzz.yml` header + in-run `::warning::` banner, and a new **regrow-log** section in `fuzz/README.md` recording the `2b198e7` regrow as precedent. **Does NOT discharge the gate** — it stays CONTINGENT/OPEN pending the 24 h/target run on a dedicated runner; this only makes the contingent statement non-vacuous and honestly-anchored. No `src/` change (corpus + docs + one workflow comment/banner only). Lands as PR #1 of the two M9-finish PRs (task 2 = §14.9 differential harness reuses this regrown corpus). - **LATEST (2026-07-01): M9 — CI-matrix tidy-up LANDED (§14.11); this is the LAST M9 build slice. M9 is now feature-complete, with only the standing owner/dedicated-runner *observations* open.** **Docs-only reconciliation** (no `src/`, no workflow behavior change — the 8 workflows already implemented the intended matrix; the spec had simply not caught up). Rewrote §14.11 into a **faithful index** of what actually runs: the **Per-PR (blocking)** row now enumerates the real `ci.yml` jobs — `rustfmt+clippy`, `build+test` (§14.1 vectors + §14.2 reduced proptest + §14.7 zero-alloc + `bench --no-run` + the §14.6 `!Sync` trybuild compile-fail + dir-lock/`Send` tests), `MSRV 1.85`, `Miri (codec subset)`, **`fuzz smoke (F1/F2/F3/F4)` (blocking — a crash reds the PR)**, `§14.4d dir-fsync presence (strace)` — plus the paths-filtered per-PR gates (`m8-macos.yml` H4 Half A, `lazyfs.yml`, `m8.yml`); the **Nightly (scheduled)** row names each workflow with its staggered cron (`bench.yml` 03:17 / `fuzz.yml` 04:17 / `m8-dmflakey.yml` 04:23 / `soak.yml` 05:17, all CONTINGENT + dispatch), and honestly flags that full-iteration §14.2/§14.3 and the §14.9 differential are **not yet** automated as nightly (covered at PR granularity by the reduced proptest + M6 `model_oracle`). Added the **FS-matrix honesty note** (per plan Slice 9): the FS matrix is meaningful only for the durability/metadata-fault gates (LazyFS/H3/§14.4d); the byte-level codec/CRC/recovery-classifier logic the §14.5 fuzz + §14.6 Miri lanes exercise is **filesystem-independent**, so those lanes are **not** multiplied across the matrix — over-claiming an "all-FS fuzz matrix" would be dishonest. **Honest close-out:** no long-run gate is marked green — the §14.13 fuzz row (N-CPU-hours) and the multi-hour soak stay OPEN-pending a dedicated runner; Miri clean, `!Sync`, and zero-alloc rows are DONE. `cargo fmt --check` / `clippy --all-targets -D warnings` / `cargo test` unaffected (docs-only). **M9 remaining: only the F1–F4 N-CPU-hour + multi-hour-soak release-gate observations on owner/dedicated hardware — no more in-session build work.** - **LATEST (2026-06-29): M9 — soak / endurance LANDED (§14.10), short run green here; the multi-hour gate stays CONTINGENT.** New `tests/soak.rs` (`#[ignore]`, env-driven `WAL_SOAK_SECONDS` default 3 / `WAL_SOAK_SEED` / optional `WAL_SOAK_EVIDENCE`): drives a **single long-lived `Wal`** through a weighted randomized loop — append (boundary-biased 0/1/8/`max_record_size`/random) / commit (timed into an `hdrhistogram`) / `checkpoint(durable)` / process-crash-recover (drop+reopen) — over a 4 KiB-segment/256 B-record config so rolls/splits/checkpoints fire constantly. After **every** recover it re-checks the §14.3 refinement envelope against an independent in-memory oracle: **D1/D3** (recovered `durable_lsn` ≥ committed watermark), **D8** (oldest ≤ authorized `up_to`+1, monotone), **D2/D6** (dense byte-identical replay `oldest..=durable` via `reader_from(0)`). Four resource monitors with bounded-growth gates: **fd** (`/proc/self/fd`, `peak ≤ baseline+32`), **disk** (a deterministic **per-checkpoint floor** — the soak always checkpoints to `durable_lsn`, so every sealed segment is superseded and exactly the active segment must remain right after `checkpoint(durable)`; a reclaim-N−1-of-N leak reds on cycle 1, not after 16 segments accrue — backstopped by a `peak ≤ 16×segment_size` runaway ceiling), **RSS** (`/proc/self/statm`×pagesize, `≤ baseline+64 MiB`; §8.5 — recovery materializes no payloads), **commit p999 ≤ 2 s**. Deterministic seeded LCG (no RNG dep) ⇒ reproduces from the seed; oracle self-prunes below `max_ckpt`/`oldest` so it never trips its own RSS watch. **Short run green here: `WAL_SOAK_SECONDS=4` ⇒ ~29 k ops / ~6.5 k commits / ~2 k checkpoints / ~2 k recoveries, fd 6→6, disk floor==1 every checkpoint, RSS +0.5 MiB, p999 sub-ms–7 ms.** **Falsifiability shown (honest form, per review)**: injecting a per-cycle leak (`deletable_prefix_len` reclaims N−1 of N) reds the floor on cycle 1 (`found 2 *.wal files`), then reverted — the real leak class, not a sub-working-set threshold. Wrapper `scripts/m9/soak.sh` (release build, scrapes the one-line JSON summary, re-emits a §5 evidence ledger via `scripts/m8/evidence.sh`, loud SHORT-vs-gate framing) + `.github/workflows/soak.yml` (schedule 05:17 + dispatch, **NOT per-PR**, contingent banner, uploads the evidence artifact). **No `src/` change** (public API only). **Honest framing (same stopgap as LazyFS/bench):** a SHORT run proves the driver/monitors/oracle work but is **not** the gate — the §14.13 soak is a **multi-hour** run on a dedicated runner with zero regression/violation. `cargo test --test soak` compiles, `clippy --test soak`, `shellcheck scripts/m9/soak.sh`, `actionlint soak.yml` clean. **Remaining M9:** CI-matrix tidy-up (§14.11); the F1–F4 N-CPU-hour + multi-hour-soak release-gate observation on a dedicated runner. diff --git a/docs/wal_design_v6.md b/docs/wal_design_v6.md index a93d47e..1f4ac55 100644 --- a/docs/wal_design_v6.md +++ b/docs/wal_design_v6.md @@ -617,14 +617,16 @@ Construct the specific resurrection hazard: write records, induce a torn tail su ### 14.9 Differential / reference testing (optional, high value) A deliberately slow, obviously-correct **reference parser** (separate code path, possibly another language) run alongside the production parser on the fuzz corpus and model-harness outputs. Any divergence in classification (valid / torn-tail / fatal-corruption / truncation offset) is a bug. +*(**M9 — IMPLEMENTED** as `tests/differential.rs` (needs `--features fuzzing`; per-PR `differential (§14.9)` job in `ci.yml`). An **independent** reference segment parser — re-deriving the §5.2/§5.3/§8.2 constants and re-implementing the length-bound check, the all-zero-header sentinel rule (post issue #26), the CRC-validation ordering, the bounded tail-vs-corruption forward scan, and the sealed-vs-active distinction from raw bytes, calling **no** production parse code (only the shared `crc32c` primitive) — is run against the production classifier via the `#[cfg(feature="fuzzing")]` `recover_segment_classify` accessor, with an **exact-match** oracle on the `SegClass` variant *and* its offsets/`max_lsn`. Inputs: a deterministic **scenario matrix** (184 cases) enumerating every arm — clean runs, torn tails, interior corruption incl. the `rec_type→0` vector, reserved types, LSN gaps, physical truncation, within/beyond-scan-bound continuations, `len>max`, non-1 bases — for both active and sealed segments; **plus** the committed Task-1 regrown corpora (`fuzz/corpus/{recovery,structure,decode,model}`, 1666 inputs) as raw segment bodies. This catches a classifier error **by construction** (two implementations disagreeing), the class of bug the issue-#26 sentinel hole was. **Falsifiability shown:** injecting the pre-#26 naive `rec_type==0 ⇒ sentinel` rule into the reference makes the differential fire (`production=Truncated … reference=Clean`) on both the `torn-last-zero_rectype` scenario and real corpus entries — i.e. it *would* have caught the sentinel bug — then reverted. **Scope note:** the corpus is consumed as raw segment bodies rather than by re-decoding each fuzz target's `arbitrary` envelope (which would duplicate the generators); the envelope-specific deep states are covered by the scenario matrix, and the corpus pass adds real-adversarial-bytes breadth. A thin `#[cfg(feature="fuzzing")]` accessor is the only `src/` addition — no public API widening, no second production parser.)* + ### 14.10 Soak / endurance Multi-hour randomized workload with periodic injected crashes+recoveries and checkpoints. Monitor: invariant violations, fd leaks, **disk-space leaks** (unreclaimed segments), memory growth, latency drift. *(**M9 — IMPLEMENTED** as `tests/soak.rs` (`#[ignore]`, env-driven `WAL_SOAK_SECONDS`/`WAL_SOAK_SEED`), driven by `scripts/m9/soak.sh` and run nightly/dispatch in `soak.yml`. It drives a **single long-lived `Wal`** through a weighted randomized loop — append (boundary-biased sizes 0/1/8/`max_record_size`/random), commit (timed into an `hdrhistogram`), `checkpoint(durable)`, and process-crash-recover (drop + reopen) — over a 4 KiB-segment/256 B-record config so rolls/splits/checkpoints fire constantly. After **every** recover it re-checks the §14.3 refinement envelope (D1/D3 durable ≥ committed watermark, D8 oldest ≤ authorized `up_to`+1 and monotone, D2/D6 dense byte-identical replay `oldest..=durable`) against an independent in-memory oracle, so a *correctness* regression under sustained load reds the run too — not just a leak. The four monitors are sampled every 128 ops with bounded-growth gates: **fd** (`/proc/self/fd`, `peak ≤ baseline+32`), **disk** (a deterministic **per-checkpoint floor** — because the soak always checkpoints to exactly `durable_lsn`, every sealed segment is fully superseded, so right after `checkpoint(durable)` exactly the active segment must remain (§9); a checkpoint that reclaims N−1 of N sealed segments reds on the **first** bad cycle, not after 16 accumulate — backstopped by a `peak ≤ 16×segment_size` ceiling for gross runaway between checkpoints), **RSS** (`/proc/self/statm`, `≤ baseline+64 MiB`; recovery materializes no payloads, §8.5), and **commit-latency** (p999 ≤ 2 s ceiling). Deterministic (seeded LCG, no RNG dep) so a failure reproduces from `WAL_SOAK_SEED`. **Honest framing (same stopgap as LazyFS/dm-flakey/bench):** a SHORT run (the per-dispatch default 300 s, and the in-session smoke) proves the driver/monitors/oracle work but is **NOT** the gate — the §14.13 soak gate is a **multi-hour** run with zero regression/violation; a short green run is CONTINGENT, a failure is a real bug. Falsifiability shown (honest form): injecting a per-cycle leak (`deletable_prefix_len` reclaims N−1 of N) reds the **floor** assertion on cycle 1 (`found 2 *.wal files`) — the real leak class, not a sub-working-set threshold; the oracle's refinement checks are the same ones the M6 harness demonstrated falsifiable. No `src/` change — the driver uses only the public API.)* ### 14.11 CI matrix -- **Per-PR (fast, blocking).** The always-on `ci.yml` jobs (`push:main` + `pull_request` + dispatch): **`rustfmt + clippy`** (`-D warnings`, all targets); **`build + test`** = §14.1 codec/CRC vectors, §14.2 (reduced proptest) round-trip/property suites, §14.7 **zero-alloc assertion** (enforced) + `cargo bench --no-run` (benches must compile, can't bitrot), and the §14.6 **`!Sync` compile-fail** (`tests/ui.rs` trybuild, stable-only) + **directory-lock / `Send`** runtime tests — all under `cargo test`; **`MSRV (1.85)`** `cargo check --all-targets --locked`; **`Miri (codec subset)`** (§14.6, `record`/`crc`/`lsn`/`config` under `-Zmiri-strict-provenance` — the whole Miri-executable surface, since the file-backed paths use foreign syscalls Miri can't run); **`fuzz smoke (F1/F2/F3/F4)`** (§14.5, short bounded `cargo fuzz run` per target — **BLOCKING: a reproducible crash is a real D1–D11 bug and reds the PR**, distinct from the nightly time-boxed lane and never gating an H1 dispatch); **`§14.4d dir-fsync presence (strace)`** (M8 Tier-1, FS-independent syscall-presence guard). Plus the **paths-filtered** per-PR durability gates that only fire when their sources change: **§14.8 H4 Half A** *(`m8-macos.yml` on `macos-latest` — a macOS-only `F_FULLFSYNC`-routing regression is invisible to Linux PR CI because the `cfg(macos)` path does not compile there; `dtruss` Half B stays owner-run per #19)*, the **§14.4b LazyFS** gate *(`lazyfs.yml`, informational)*, and the **§12 fsync-fault poison** gate *(`m8.yml`)*. -- **Nightly (scheduled, staggered off the `:00` rush + `workflow_dispatch`).** **§14.7 benchmarks + gates** *(`bench.yml`, 03:17 UTC; **informational** on hosted runners until a controlled/pinned-governor runner makes the §14.7 thresholds enforceable — same stopgap as the LazyFS gate)*; **§14.5 fuzz (time-boxed F1–F4)** *(`fuzz.yml`, 04:17 UTC; CONTINGENT, NOT the N-CPU-hour §14.13 gate — a crash still reds it)*; **§14.8 H3-physical + §14.4d** *(`m8-dmflakey.yml`, 04:23 UTC + push-to-main; hosted ubuntu VMs reach `dm-flakey`, so these are real gates there — ext4 hard with a source-confirmed block-layer EIO (#16) and a `drop_writes` positive control (#17), xfs/btrfs informational, **best-effort + loud skip** if a runner lacks dm-flakey)*; **§14.10 soak** *(`soak.yml`, 05:17 UTC; a short hosted slice is a CONTINGENT smoke, not the multi-hour gate)*. *(Still to be wired as nightly, not yet automated: full-iteration §14.2/§14.3 and the §14.9 differential — currently the reduced per-PR proptest + the M6 `model_oracle` suite cover these at PR granularity.)* +- **Per-PR (fast, blocking).** The always-on `ci.yml` jobs (`push:main` + `pull_request` + dispatch): **`rustfmt + clippy`** (`-D warnings`, all targets); **`build + test`** = §14.1 codec/CRC vectors, §14.2 (reduced proptest) round-trip/property suites, §14.7 **zero-alloc assertion** (enforced) + `cargo bench --no-run` (benches must compile, can't bitrot), and the §14.6 **`!Sync` compile-fail** (`tests/ui.rs` trybuild, stable-only) + **directory-lock / `Send`** runtime tests — all under `cargo test`; **`MSRV (1.85)`** `cargo check --all-targets --locked`; **`Miri (codec subset)`** (§14.6, `record`/`crc`/`lsn`/`config` under `-Zmiri-strict-provenance` — the whole Miri-executable surface, since the file-backed paths use foreign syscalls Miri can't run); **`fuzz smoke (F1/F2/F3/F4)`** (§14.5, short bounded `cargo fuzz run` per target — **BLOCKING: a reproducible crash is a real D1–D11 bug and reds the PR**, distinct from the nightly time-boxed lane and never gating an H1 dispatch); **`differential (§14.9)`** (`cargo test --features fuzzing --test differential` — an independent reference parser vs the production classifier over a 184-case scenario matrix + the committed corpora; **BLOCKING: a classification divergence is a real recovery-classifier bug**, ~6 s, never gates an H1 dispatch); **`§14.4d dir-fsync presence (strace)`** (M8 Tier-1, FS-independent syscall-presence guard). Plus the **paths-filtered** per-PR durability gates that only fire when their sources change: **§14.8 H4 Half A** *(`m8-macos.yml` on `macos-latest` — a macOS-only `F_FULLFSYNC`-routing regression is invisible to Linux PR CI because the `cfg(macos)` path does not compile there; `dtruss` Half B stays owner-run per #19)*, the **§14.4b LazyFS** gate *(`lazyfs.yml`, informational)*, and the **§12 fsync-fault poison** gate *(`m8.yml`)*. +- **Nightly (scheduled, staggered off the `:00` rush + `workflow_dispatch`).** **§14.7 benchmarks + gates** *(`bench.yml`, 03:17 UTC; **informational** on hosted runners until a controlled/pinned-governor runner makes the §14.7 thresholds enforceable — same stopgap as the LazyFS gate)*; **§14.5 fuzz (time-boxed F1–F4)** *(`fuzz.yml`, 04:17 UTC; CONTINGENT, NOT the N-CPU-hour §14.13 gate — a crash still reds it)*; **§14.8 H3-physical + §14.4d** *(`m8-dmflakey.yml`, 04:23 UTC + push-to-main; hosted ubuntu VMs reach `dm-flakey`, so these are real gates there — ext4 hard with a source-confirmed block-layer EIO (#16) and a `drop_writes` positive control (#17), xfs/btrfs informational, **best-effort + loud skip** if a runner lacks dm-flakey)*; **§14.10 soak** *(`soak.yml`, 05:17 UTC; a short hosted slice is a CONTINGENT smoke, not the multi-hour gate)*. *(The §14.9 differential runs **per-PR** (`differential (§14.9)` job, ~6 s) rather than nightly. Still to be wired as nightly, not yet automated: full-iteration §14.2/§14.3 — currently the reduced per-PR proptest + the M6 `model_oracle` suite cover these at PR granularity.)* - **Pre-release / manual:** §14.8 H1 power-pull on target hardware, §14.10 soak *(M9: `soak.yml`, schedule + manual, **contingent** until a multi-hour run on a dedicated runner — a hosted nightly slice is a smoke, not the gate; same stopgap as the LazyFS/bench informational lanes; each run uploads its §5 soak-evidence artifact)*. *(The nightly §14.8 dm-flakey/macOS gates above also accept `workflow_dispatch`; a manual run posts its §5 evidence to the tracking issue as the human sign-off, while the nightly cron stays artifact-only and surfaces regressions as a red build.)* - **OS matrix:** Linux (primary — sole platform for the §14.8 hardware-durability gate), macOS (dev/correctness — exercises `F_FULLFSYNC`; unit/property/fuzz only, not §14.8). Windows is out of scope for v1 (§8.3). - **FS matrix (Linux):** ext4, xfs, btrfs (CoW), tmpfs (logic only — never durability claims). **Scope, honestly:** the FS matrix is meaningful *only* for the durability/metadata-fault gates whose outcome depends on filesystem journaling and cache semantics — §14.4b LazyFS, §14.8 H3-physical, and the §14.4d negative control (which reproduces behaviorally only on a journal-less FS — see §14.4d). The **byte-level logic** — the record codec, CRC, the recovery classifier/bounded scan, and everything the §14.5 fuzz targets and the §14.6 Miri subset exercise — is **filesystem-independent** (pure in-memory parsing over arbitrary bytes, or plain `pwrite`/`fdatasync` with no FS-specific assumption), so those lanes run once on the runner's default FS and are **not** multiplied across the matrix. Over-claiming an "all-FS fuzz matrix" would be dishonest; the fuzz/codec guarantees hold by construction regardless of FS. @@ -636,14 +638,14 @@ Multi-hour randomized workload with periodic injected crashes+recoveries and che | D1 Durability on commit | §14.3, §14.4b clear-cache, §14.4c, §14.8 H1 | | D2 Dense gap-free suffix | §14.2 P2/P4, §14.3, §14.4b torn-seq, §14.1 (contiguity), §14.8 | | D3 At-most-tail loss | §14.3, §14.4a, §14.4b, §14.4c, §14.8 H1 | -| D4 Torn-tail truncation | §14.4b torn-op, §14.4e (i), §14.4f, §14.4g | -| D5 Mid-log corruption fatal | §14.4e (ii)(iii)(v), sealed-segment cases | +| D4 Torn-tail truncation | §14.4b torn-op, §14.4e (i), §14.4f, §14.4g, §14.9 differential (truncation offset) | +| D5 Mid-log corruption fatal | §14.4e (ii)(iii)(v), sealed-segment cases, `recovery::rec_type_zeroed_interior_is_fatal_tornmidlog` (issue #26), §14.9 differential (interior-corruption classification) | | D6 Read-back fidelity | §14.2 P1, §14.3 | | D7 Idempotent recovery | §14.2 P6, §14.3 (no-mutation reopen) | | D8 Checkpoint safety | §14.1 math, §14.2 P5, §14.3 (terminal reopen), §14.4c | | D9 Crash-anywhere recoverable | §14.4a, §14.4c (incl. split-batch & roll) | -| D10 No buried garbage / resurrection | §14.4g (incl. stale-valid-record case) | -| D11 Bounded recovery parsing | §14.5 F1/F2/F3, §14.4f, §14.6 Miri, bounded-scan counter | +| D10 No buried garbage / resurrection | §14.4g (incl. stale-valid-record case), §14.9 differential (beyond-bound continuation ⇒ torn tail, not resurrected) | +| D11 Bounded recovery parsing | §14.5 F1/F2/F3/F4, §14.4f, §14.6 Miri, §14.9 differential, bounded-scan counter (structural drift guard) | | D12 Sealed-segment immutability | §14.4h sealed-segment-immutability, concurrent-tailer, backup round-trip | ### 14.13 Definition of Done (release gate) @@ -652,6 +654,7 @@ Multi-hour randomized workload with periodic injected crashes+recoveries and che - §14.4d negative control catches the injected bug **and** the correct build passes. *(M8 — **satisfied by Tier-1.** **Tier-1 (primary) PASSES, deterministic + per-PR:** `scripts/m8/dirfsync-presence.sh` (in `ci.yml`) straces the roll path and asserts the correct build issues the roll-time directory `fsync` while `--features inject_no_dir_fsync` does not — verified green (`correct=5` dir-fsyncs vs `inject=1`). FS-independent syscall-presence regression guard; the row's satisfier. **Tier-2 (behavioral power-loss) — CLOSED as a documented negative result (PR #21, owner Fedora 43):** the synchronized mid-run cut (`dirfsync_cut_workload`, `dirfsync-negative `) blocks the workload with the new segment's dirent un-synced and cuts inside the window, yet the inject build recovers fully on **every** config tested — ext4/xfs/btrfs, journal-less ext4 (incl. `ext2`-format, serviced by the ext4 driver on modern kernels — standalone ext2 driver removed in Linux 6.9), and journaled ext4 `data=writeback` (the driver's weakest ordering). The dirent reaches disk via the file's own `fdatasync` everywhere; the earlier "ext2 block-adjacency" claim is **retracted** and the mechanism was not isolated. No readily-available Linux FS exposes it behaviorally ⇒ honest negative result, not a gap. **Tier-3 — ext4/xfs/btrfs (+ journal-less "ext2") INCONCLUSIVE-by-design**, never red on a masked miss. `fsync_dir` retained unconditionally as a POSIX-portability safeguard. Earlier "certified on ext4" was wrong; the harness loud-skips where dm-flakey is absent rather than fake green; the positive split+roll power-loss case passes under LazyFS in M4.)* - §14.4g resurrection test passes **and** is demonstrated to fail both (a) if zeroing-on-truncate is disabled and (b) if the invalidation is not durably synced (the power-loss-of-zeroing assertion). - Fuzzers F1–F4: **≥ 24 CPU-hours per target accumulated since `2b198e7` (the sentinel fix = the last on-disk-format change), zero outstanding crashes, bounded-scan counter never exceeded.** The "since the last format change" clause is load-bearing: a format change **resets this clock** and requires a corpus **regrow + `cargo fuzz cmin`** on the new format (the stale coverage map no longer reflects the classification the parser performs) — see `fuzz/README.md`. N is pinned at **24 CPU-hours/target** (96 total) — sized to the format's small, bounded surface; raise it if the format grows materially. *(**M9 in progress. F1 (recovery-parser) IMPLEMENTED** — `fuzz/fuzz_targets/recovery.rs`, primary surface the real `Wal::open` over an adversarial multi-segment directory, bounded-scan counter instrumented on the real scan loop and asserted against the shared `scan_bound` symbol (falsifiability demonstrated). Built + smoke-green (60 000 runs, zero crashes); CI is `fuzz.yml` (nightly/dispatch, time-boxed, contingent) + a blocking per-PR smoke in `ci.yml`. The **N-CPU-hour gate itself stays OPEN** — a hosted short slice does not meet it; carry until a dedicated runner accrues the hours. **Framing:** the "bounded-scan counter never exceeds the bound" clause is satisfied **structurally** (the loop window *is* `scan_bound`), so it is a drift/regression guard, not the headline — the substantive D11 proof is the crash-free / no-OOB / termination surface over adversarial inputs (the running fuzz). **F2 (single-record decoder) IMPLEMENTED** — `fuzz/fuzz_targets/decode.rs`, raw bytes × a boundary-biased `max_record_size` set, CRC-valid-seeded so the Record path is reached, bounds-soundness asserts, falsifiability shown (300 000 runs, zero crashes); same CI lanes. **F3 (structure-aware classifier) IMPLEMENTED** — `fuzz/fuzz_targets/structure.rs`, valid dense segment + one localized mutation driving `Wal::open`, with a sharp D4/D5 oracle (interior corruption fatal, last corruption truncates) + D6/D10 byte-identity + D7 idempotent reopen; falsifiability shown (forced "no continuation" ⇒ `D5: interior corruption returned Ok`); built + smoke-green (150 000 runs, zero crashes); same CI lanes. **F4 (op-script oracle) IMPLEMENTED** — `fuzz/fuzz_targets/model.rs` decodes fuzzer bytes into a `WalConfig` + weighted `Vec` and drives the M6 executor `tests/model/mod.rs::run` verbatim (`#[path]`, zero duplication), panicking on any D1/D2/D3/D6/D7/D8 breach; process-crash model only; falsifiability shown (seeded recovery loss ⇒ `D1/D3` panic); smoke-green (40 000 runs, zero crashes). **All four targets F1–F4 now exist**; what remains for the §14.13 fuzz row is the **24-CPU-hour/target observation** on a dedicated runner (the gate stays OPEN until then — a hosted short slice does not meet it). **Corpus regrown + `cargo fuzz cmin`'d on the post-`2b198e7` format** (the sentinel fix changed how `rec_type==0` is classified, staling the pre-fix coverage maps): recovery 174→316, structure 130→129, decode 17→40, model 321→348 minimized entries; per-target coverage rose (recovery 780→892, structure 561→592, model 798→839), zero crashes found during the regrow. So the clock legitimately starts at `2b198e7`; this makes the *contingent* statement non-vacuous but does **not** discharge the 24-hour gate.)* +- Differential reference parser (§14.9): **DONE.** `tests/differential.rs` runs an independent, spec-derived reference classifier against the production `recover_segment` classifier with an **exact-match** oracle (variant + offset + `max_lsn`) over a 184-case scenario matrix and the committed Task-1 corpora (1666 inputs); per-PR `differential (§14.9)` job, ~6 s, a divergence reds the build. Falsifiability shown (the pre-#26 naive sentinel rule injected into the reference makes it fire). Corroborates the D4/D5/D10/D11 classification rows of §14.12. Only `src/` addition is the `#[cfg(feature="fuzzing")]` `recover_segment_classify` accessor (no public API widening, no second production parser). - §14.8 H1: ≥ M power-pull cycles on target hardware, zero acked-record loss. *(M8: the **harness + runbook are built** — `src/bin/power_pull_{workload,verify}.rs` + `scripts/m8/power-pull.sh`, with the off-box network side channel, send-strictly-after-`commit() Ok` ack-ordering, contiguous-watermark conservative verify, and the H2 vacuous-pass gate as a precondition; the mechanical chain was dry-run green on loopback. **OPEN-pending-owner-run** for the actual ≥50-cycle power-pull on real/cache-configured hardware (no cuttable target in the sandbox). H3 fsync-failure poison: the **§12 state machine RUNS green** via the LD_PRELOAD shim (`scripts/m8/fsync-fault.sh`); the **physical** dm-flakey half now runs **nightly + manual on hosted CI** (`m8-dmflakey.yml`, best-effort + loud skip) instead of owner-only. H4 macOS `F_FULLFSYNC` **Half A** (routing/smoke) now runs on **macOS CI** (`m8-macos.yml`); Half B (`dtruss` trace) stays owner-run (root + SIP). See `docs/m8-runbook.md`.)* - Zero-allocation assertion (§14.7) passes for append/commit and `Reader::next`. *(M7: PASSES — hardened to also prove no-roll in the measured window and to cover a `max_record_size` payload. The §14.7 benches + regression gate exist; **gate enforcement is OPEN-pending-controlled-runner** — informational on hosted CI per §14.11, a real gate on a pinned-governor runner.)* - Miri clean on covered suites. *(**M9 — DONE for the covered (FS-free) suites.** Per-PR `Miri (codec subset)` job (`ci.yml`) runs `record::`/`crc::`/`lsn::`/`config::` under `-Zmiri-strict-provenance`, green (25 tests, ~6 s). Scope is honest: zero `unsafe` ⇒ a regression guard, not a bug hunt; the file-backed paths use foreign syscalls Miri can't run (covered by the ASan fuzz targets). Not enabling `-Zmiri-symbolic-alignment-check` (the `crc32c` SW path trips it while alignment-correct at runtime).)* diff --git a/src/lib.rs b/src/lib.rs index 3c70ad3..68c14a1 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -148,4 +148,67 @@ pub mod fuzzing { pub fn encode_record_into(buf: &mut Vec, lsn: u64, payload: &[u8]) -> usize { crate::record::encode_into(buf, Lsn(lsn), payload) } + + /// The production per-segment recovery **classification** (§8.2), flattened to + /// a plain value for the §14.9 differential tester (`tests/differential.rs`). + /// This is the exact classification surface an independent reference parser + /// must reproduce byte-for-byte; a divergence is a recovery-classifier bug. + /// + /// `Truncated`/`Clean` carry `max_lsn` and the truncation `offset`; the two + /// fatal arms carry the failing `offset`. `OtherErr` catches any error the + /// single-segment `recover_segment` is not expected to produce here (e.g. an + /// I/O error) so the differential can flag it rather than silently coerce it. + #[derive(Debug, Clone, Copy, PartialEq, Eq)] + pub enum SegClass { + /// Clean end of records; `max_lsn` is the highest valid LSN (`base-1` if + /// the segment is empty). + Clean { max_lsn: u64 }, + /// Active-segment torn tail truncated at `offset`; `max_lsn` is the last + /// valid record before it. + Truncated { offset: u64, max_lsn: u64 }, + /// Mid-log corruption: an invalid record with a valid record still ahead + /// within the bounded forward scan (active segment) — fatal (D5). + TornMidLog { offset: u64 }, + /// Any invalid record in a sealed segment — fatal, no forward scan (D5). + Corruption { offset: u64 }, + /// An error outside the classification surface (e.g. I/O). The differential + /// treats this as its own class so it is never silently equated. + OtherErr, + } + + /// Run the **real** production `recover_segment` (§8.2) over one open segment + /// `file` and return its classification. Used only by the §14.9 differential + /// tester to compare production against an independent reference parser. + /// + /// NOTE: on a torn tail this performs the production durable zeroing of + /// `[offset, EOF)` (§8.2.1) as a side effect — so the differential must pass + /// production its **own** copy of the segment file and read the classification + /// from this return value, never re-derive it from the mutated file. + #[must_use] + pub fn recover_segment_classify( + file: &File, + base_lsn: u64, + is_active: bool, + segment_size: u64, + max_record_size: u32, + ) -> SegClass { + use crate::error::WalError; + use crate::wal::TailState; + let base = Lsn(base_lsn.max(1)); + match crate::recovery::recover_segment(file, base, is_active, segment_size, max_record_size) + { + Ok(rec) => match rec.tail_state { + TailState::Clean => SegClass::Clean { + max_lsn: rec.max_lsn.0, + }, + TailState::TruncatedAt { offset, .. } => SegClass::Truncated { + offset, + max_lsn: rec.max_lsn.0, + }, + }, + Err(WalError::TornMidLog { offset, .. }) => SegClass::TornMidLog { offset }, + Err(WalError::Corruption { offset, .. }) => SegClass::Corruption { offset }, + Err(_) => SegClass::OtherErr, + } + } } diff --git a/tests/differential.rs b/tests/differential.rs new file mode 100644 index 0000000..531546e --- /dev/null +++ b/tests/differential.rs @@ -0,0 +1,597 @@ +//! §14.9 — differential / reference-parser tester. +//! +//! A deliberately slow, obviously-correct **reference** segment parser, written +//! from the on-disk-format spec (§5.2/§5.3/§8.2) in a **separate code path**, run +//! alongside the production `recover_segment` classifier. **Any divergence in +//! classification is a bug** — this is the one technique that catches a +//! recovery-classifier error *by construction* (two independent implementations +//! disagreeing) rather than probabilistically (a fuzzer happening to hit the +//! trigger). It exists because the issue-#26 sentinel hole was exactly that class +//! of bug: a classifier that mis-mapped one byte pattern. +//! +//! ## Independence (the whole point — see the module's hard rules) +//! +//! The reference below calls **no** production parse code: not `record::decode`, +//! not `recover_segment`, not the `segment.rs` read helpers. It re-derives the +//! constants and re-implements the length-bound check, the all-zero-header +//! sentinel rule, the CRC-validation ordering, the bounded tail-vs-corruption +//! forward scan, and the sealed-vs-active distinction from scratch, reading raw +//! bytes. It **may** use the `crc32c` crate (via `open_wal::crc32c`) — that is a +//! shared *dependency*, not shared *parse logic*; re-implementing CRC-32C would +//! test the crate, not the parser. +//! +//! It implements the **post-issue-#26 contract**: the sentinel is an all-zero +//! 20-byte header, and a `rec_type == 0` record with a non-zero CRC is `Invalid` +//! (→ `TornMidLog` interior / torn-tail at the end), never a clean sentinel. A +//! naive `rec_type == 0 ⇒ sentinel` reference would make the differential fire on +//! the corpus — which would be catching the *reference's* bug (see the +//! falsifiability note in the PR). +//! +//! ## Inputs +//! +//! 1. A deterministic **scenario matrix** (`scenario_cases`) that enumerates every +//! classification arm — clean runs, torn tails, interior corruption (incl. the +//! `rec_type→0` case), reserved types, LSN gaps, physical truncation, buried +//! stale records, garbage — for both active and sealed segments. This is the +//! exact-match oracle: production and reference must return the identical +//! `SegClass` variant *and* offsets/`max_lsn`. +//! 2. The committed **fuzz corpora** (`fuzz/corpus/{recovery,structure}`, the +//! Task-1 regrown set) fed as raw segment *bodies* after a valid header — real +//! fuzzer-discovered byte patterns over which the two parsers must still agree. +//! (Scope: we consume the raw corpus bytes as bodies rather than re-decoding +//! each target's `arbitrary` envelope — duplicating those generators would be +//! fragile; the differential property "both parsers agree on these bytes" holds +//! regardless of how the bytes were originally produced. The envelope-specific +//! deep states are covered exhaustively by the scenario matrix instead.) +//! +//! Requires the `fuzzing` feature (for the `recover_segment_classify` accessor): +//! `cargo test --features fuzzing --test differential`. +#![cfg(feature = "fuzzing")] + +use std::fs::OpenOptions; +use std::os::unix::fs::FileExt; +use std::path::Path; + +use open_wal::crc32c; +use open_wal::fuzzing::{self, SegClass}; + +// ---- constants, re-derived independently from §5.2/§5.3 (NOT imported) ---- +const HEADER_SIZE: usize = 64; // §5.2 segment header +const REC_HEADER: usize = 20; // §5.3 record header +const CRC_OFF: usize = 0; +const LEN_OFF: usize = 4; +const LSN_OFF: usize = 8; +const REC_TYPE_OFF: usize = 16; +const REC_TYPE_FULL: u8 = 1; + +/// Padding after a `payload_len`-byte payload to the next 8-byte boundary (§5.3): +/// `pad = (8 − ((20 + payload_len) mod 8)) mod 8`. Re-derived here. +fn ref_padding(payload_len: u64) -> u64 { + (8 - ((REC_HEADER as u64 + payload_len) % 8)) % 8 +} + +/// The bounded forward-scan distance (§8.2 step 5): the largest frame a single +/// record can occupy — `max_record_size` payload + 20-byte header + up to 7 +/// padding + 1. Re-derived independently (production hoists the same value into +/// `recovery::scan_bound`; we do NOT import it — a differential that shared the +/// constant would not be independent). +fn ref_scan_bound(max_record_size: u32) -> u64 { + u64::from(max_record_size) + 28 +} + +/// Outcome of reading one candidate record at an offset (mirrors, independently, +/// `segment::read_record_at`'s three-way split). +enum RScan { + Record { lsn: u64, framed: u64 }, + CleanEnd, + Invalid, +} + +/// Independent, read-only reimplementation of `segment::read_record_at` (§8.2 +/// record-level checks). `bytes` is the physical file content; `segment_size` is +/// the logical bound. A read that runs past `bytes.len()` models a short physical +/// read (a file truncated below `segment_size`, §14.4f) ⇒ `Invalid`. +fn ref_read_record_at(bytes: &[u8], offset: u64, segment_size: u64, max_record_size: u32) -> RScan { + let remaining = segment_size.saturating_sub(offset); + // §8.2 step 1: fewer than a header's worth of logical space left ⇒ clean end. + if remaining < REC_HEADER as u64 { + return RScan::CleanEnd; + } + let off = offset as usize; + // Physical header read: a short read (truncated file) is a candidate boundary. + let header = match bytes.get(off..off + REC_HEADER) { + Some(h) => h, + None => return RScan::Invalid, + }; + // §8.2 step 1: the end-of-records sentinel is an ALL-ZERO 20-byte header — NOT + // `rec_type == 0` alone (issue #26). A `rec_type == 0` record with any non-zero + // header byte falls through to the CRC check below and is `Invalid`. + if header.iter().all(|&b| b == 0) { + return RScan::CleanEnd; + } + // Length bound BEFORE touching payload (§5.3 / D11): caps `length` at + // `max_record_size`, so the framed size below cannot be adversarially huge. + let length = u32::from_le_bytes(header[LEN_OFF..LEN_OFF + 4].try_into().unwrap()); + if length > max_record_size { + return RScan::Invalid; + } + let framed = REC_HEADER as u64 + u64::from(length) + ref_padding(u64::from(length)); + // Framed record must fit the logical remaining space, else short/torn tail. + if framed > remaining { + return RScan::Invalid; + } + // Physical payload+padding read: a short read is again a truncated file. + let full = match bytes.get(off..off + framed as usize) { + Some(f) => f, + None => return RScan::Invalid, + }; + // CRC-32C over [4, framed): header tail + payload + padding (§5.3). Using the + // shared crc32c crate is sanctioned — it is the checksum primitive, not parse + // logic. + let stored = u32::from_le_bytes(full[CRC_OFF..CRC_OFF + 4].try_into().unwrap()); + if crc32c(&full[LEN_OFF..framed as usize]) != stored { + return RScan::Invalid; + } + // CRC is intact ⇒ the bytes are genuine; a non-Full type is then a real + // reserved/unknown record (UnknownRecType), still `Invalid` to recovery. + if full[REC_TYPE_OFF] != REC_TYPE_FULL { + return RScan::Invalid; + } + let lsn = u64::from_le_bytes(full[LSN_OFF..LSN_OFF + 8].try_into().unwrap()); + RScan::Record { lsn, framed } +} + +/// Independent reimplementation of the §8.2 bounded forward scan: from `x + 8`, +/// step 8 bytes at a time up to `x + 8 + bound` (inclusive), looking for a +/// structurally valid record that *continues the log* (`lsn >= expected`, the +/// v6.1 corrected condition). Read-only — it does not zero anything (the +/// classification of a single pass does not depend on the durable zeroing, which +/// only affects a *later* recovery's idempotence). +fn ref_forward_scan_finds_valid( + bytes: &[u8], + x: u64, + expected: u64, + segment_size: u64, + max_record_size: u32, +) -> bool { + let bound = ref_scan_bound(max_record_size); + let start = x.saturating_add(8); + let end = start.saturating_add(bound); + let mut p = start; + while p <= end { + if let RScan::Record { lsn, .. } = + ref_read_record_at(bytes, p, segment_size, max_record_size) + { + if lsn >= expected { + return true; + } + } + p += 8; + } + false +} + +/// The independent reference classifier (mirrors `recovery::recover_segment` + +/// `classify`, from scratch). Returns the same `SegClass` the production accessor +/// returns, so a divergence is a plain `assert_eq!` failure. +fn reference_classify( + bytes: &[u8], + base_lsn: u64, + is_active: bool, + segment_size: u64, + max_record_size: u32, +) -> SegClass { + // Production clamps an out-of-range base to the lowest legal value (§5.2: + // Lsn(0) is the reserved sentinel); mirror it so `base - 1` cannot underflow. + let base = base_lsn.max(1); + let mut offset = HEADER_SIZE as u64; + let mut expected = base; + let mut last_valid = base - 1; // base-1: empty segment ⇒ Clean{base-1} (§8.1) + + loop { + match ref_read_record_at(bytes, offset, segment_size, max_record_size) { + RScan::Record { lsn, framed } => { + if lsn != expected { + // A structurally valid record with the wrong LSN is invalid at + // this offset (§8.2 step 4) — classify tail vs corruption. + return ref_classify_boundary( + bytes, + base, + is_active, + segment_size, + max_record_size, + offset, + expected, + last_valid, + ); + } + last_valid = lsn; + offset += framed; + expected = lsn + 1; + } + RScan::CleanEnd => { + return SegClass::Clean { + max_lsn: last_valid, + }; + } + RScan::Invalid => { + return ref_classify_boundary( + bytes, + base, + is_active, + segment_size, + max_record_size, + offset, + expected, + last_valid, + ); + } + } + } +} + +/// Classify an invalid record at offset `x` (§8.2 step 5), independently. +#[allow(clippy::too_many_arguments)] +fn ref_classify_boundary( + bytes: &[u8], + _base: u64, + is_active: bool, + segment_size: u64, + max_record_size: u32, + x: u64, + expected: u64, + last_valid: u64, +) -> SegClass { + if !is_active { + // A sealed segment is fully synced before the next segment exists (§7.3): + // no torn tail, any invalid record is fatal corruption. No forward scan. + return SegClass::Corruption { offset: x }; + } + if ref_forward_scan_finds_valid(bytes, x, expected, segment_size, max_record_size) { + // A genuine acked record after the gap ⇒ truncating would drop it (D5). + SegClass::TornMidLog { offset: x } + } else { + // Torn tail: truncate at x (production also durably zeroes [x, EOF)). + SegClass::Truncated { + offset: x, + max_lsn: last_valid, + } + } +} + +// ------------------------------------------------------------------------- +// Harness: write bytes to a real file, run BOTH parsers, assert identical. +// ------------------------------------------------------------------------- + +struct Harness { + dir: tempfile::TempDir, + counter: usize, + checked: usize, +} + +impl Harness { + fn new() -> Self { + Harness { + dir: tempfile::tempdir().expect("tempdir"), + counter: 0, + checked: 0, + } + } + + /// Run production and reference on the same `bytes` and assert they classify + /// identically. Production gets its OWN file (it may durably zero a torn tail, + /// §8.2.1); the reference reads the pristine in-memory bytes. + fn check(&mut self, label: &str, bytes: &[u8], base: u64, is_active: bool, seg: u64, max: u32) { + self.counter += 1; + let path = self.dir.path().join(format!("case-{}.bin", self.counter)); + let file = OpenOptions::new() + .read(true) + .write(true) + .create(true) + .truncate(true) + .open(&path) + .expect("open case file"); + file.write_all_at(bytes, 0).expect("write case bytes"); + file.sync_data().ok(); + + let prod = fuzzing::recover_segment_classify(&file, base, is_active, seg, max); + let reference = reference_classify(bytes, base, is_active, seg, max); + assert_eq!( + prod, reference, + "DIVERGENCE [{label}] active={is_active} base={base} seg={seg} max={max}: \ + production={prod:?} reference={reference:?}" + ); + self.checked += 1; + } +} + +// ------------------------------------------------------------------------- +// Segment builders (use the production encoders to lay valid bytes; corruption +// is applied afterward, so the reference and production see identical inputs). +// ------------------------------------------------------------------------- + +fn framed_size(payload_len: usize) -> usize { + REC_HEADER + payload_len + ref_padding(payload_len as u64) as usize +} + +/// Build `header + dense records` for `base`, returning the bytes and the +/// absolute offset of each record. +fn build_segment(base: u64, payloads: &[&[u8]]) -> (Vec, Vec) { + let mut bytes = fuzzing::segment_header_bytes(base); + let mut offsets = Vec::new(); + for (i, p) in payloads.iter().enumerate() { + offsets.push(bytes.len()); + fuzzing::encode_record_into(&mut bytes, base + i as u64, p); + } + (bytes, offsets) +} + +/// Recompute a record's CRC over [4, framed) in place (for reserved-type cases). +fn refix_crc(bytes: &mut [u8], off: usize, framed: usize) { + let crc = crc32c(&bytes[off + 4..off + framed]); + bytes[off..off + 4].copy_from_slice(&crc.to_le_bytes()); +} + +/// The deterministic scenario matrix — one closure per case that returns +/// `(label, bytes, base, seg, max)`; the harness runs each for active AND sealed. +fn scenario_cases(h: &mut Harness) { + let configs: &[(u64, u32)] = &[(4096, 256), (65536, 4096)]; + + for &(seg, max) in configs { + let base = 1u64; + let pcap = (max as usize).min(48); + let p = |n: usize| vec![0xABu8; n.min(pcap)]; + let sizes = [0usize, 1, 7, 8, 20, pcap]; + + // 1. Empty segment (header only, padded with sentinel zeros to seg). + { + let mut bytes = fuzzing::segment_header_bytes(base); + bytes.resize(seg as usize, 0); + for &active in &[true, false] { + h.check("empty", &bytes, base, active, seg, max); + } + } + + // 2. Clean dense runs of k records, various payload sizes, padded to seg. + for k in 1..=5usize { + for &sz in &sizes { + let pl: Vec> = (0..k).map(|_| p(sz)).collect(); + let refs: Vec<&[u8]> = pl.iter().map(|v| v.as_slice()).collect(); + let (mut bytes, _offs) = build_segment(base, &refs); + if bytes.len() > seg as usize { + continue; + } + bytes.resize(seg as usize, 0); + for &active in &[true, false] { + h.check("clean-run", &bytes, base, active, seg, max); + } + } + } + + // 3. Torn/invalid LAST record, several corruption kinds. + // active ⇒ Truncated (or TornMidLog if a continuation is planted); + // sealed ⇒ Corruption. + for kind in [ + "flip_crc", + "zero_rectype", + "extend_len", + "reserved_type", + "flip_pad", + ] { + let pl = [p(8), p(8), p(8)]; + let refs: Vec<&[u8]> = pl.iter().map(|v| v.as_slice()).collect(); + let (mut bytes, offs) = build_segment(base, &refs); + let last = *offs.last().unwrap(); + let framed = framed_size(8); + match kind { + "flip_crc" => bytes[last] ^= 0xFF, + "zero_rectype" => bytes[last + REC_TYPE_OFF] = 0, // issue #26 vector + "extend_len" => { + let nl = 8u32.wrapping_add(8); + bytes[last + 4..last + 8].copy_from_slice(&nl.to_le_bytes()); + } + "reserved_type" => { + bytes[last + REC_TYPE_OFF] = 2; + refix_crc(&mut bytes, last, framed); + } + "flip_pad" => { + // padding byte (payload 8 ⇒ framed 32 ⇒ 4 pad bytes at 28..32) + bytes[last + REC_HEADER + 8] ^= 0xFF; + } + _ => unreachable!(), + } + bytes.resize(seg as usize, 0); + for &active in &[true, false] { + h.check(&format!("torn-last-{kind}"), &bytes, base, active, seg, max); + } + } + + // 4. Interior corruption (record 1 of 3 corrupt; record 2 valid after it). + // active ⇒ TornMidLog; sealed ⇒ Corruption. Covers the issue-#26 + // interior rec_type→0 vector explicitly. + for kind in ["flip_crc", "zero_rectype"] { + let pl = [p(8), p(8), p(8)]; + let refs: Vec<&[u8]> = pl.iter().map(|v| v.as_slice()).collect(); + let (mut bytes, offs) = build_segment(base, &refs); + let mid = offs[1]; + match kind { + "flip_crc" => bytes[mid] ^= 0xFF, + "zero_rectype" => bytes[mid + REC_TYPE_OFF] = 0, + _ => unreachable!(), + } + bytes.resize(seg as usize, 0); + for &active in &[true, false] { + h.check(&format!("interior-{kind}"), &bytes, base, active, seg, max); + } + } + + // 5. LSN gap: a structurally valid record with a skipped LSN in the middle. + { + let (mut bytes, offs) = build_segment(base, &[&p(8), &p(8)]); + // Overwrite record 2 with a valid record whose LSN is base+5 (a gap). + let mut rec = fuzzing::segment_header_bytes(base); // scratch, unused header + rec.clear(); + fuzzing::encode_record_into(&mut rec, base + 5, &p(8)); + let at = offs[1]; + bytes[at..at + rec.len()].copy_from_slice(&rec); + bytes.resize(seg as usize, 0); + for &active in &[true, false] { + h.check("lsn-gap", &bytes, base, active, seg, max); + } + } + + // 6. Sentinel (all-zero header) mid-run ⇒ Clean at that offset. + { + let (mut bytes, offs) = build_segment(base, &[&p(8), &p(8)]); + let at = offs[1]; + for b in &mut bytes[at..at + REC_HEADER] { + *b = 0; + } + bytes.resize(seg as usize, 0); + for &active in &[true, false] { + h.check("mid-sentinel", &bytes, base, active, seg, max); + } + } + + // 7. Physically truncated file mid-last-record (short read ⇒ Invalid). + { + let (bytes, offs) = build_segment(base, &[&p(8), &p(8), &p(8)]); + let last = *offs.last().unwrap(); + let cut = last + REC_HEADER + 2; // mid-way through the last record + let short = bytes[..cut.min(bytes.len())].to_vec(); + for &active in &[true, false] { + h.check("phys-truncated", &short, base, active, seg, max); + } + } + + // 8. Interior torn tail with a genuine continuation just WITHIN the bound + // (active ⇒ TornMidLog) and one just BEYOND it (active ⇒ Truncated). + for within in [true, false] { + let (mut bytes, offs) = build_segment(base, &[&p(8)]); + let x = offs[0] + framed_size(8); // offset just past record 1 (expected base+1) + // A torn record at x (bad CRC). + let mut torn = Vec::new(); + fuzzing::encode_record_into(&mut torn, base + 1, &p(4)); + torn[0] ^= 0xFF; + if x + torn.len() <= seg as usize { + bytes.resize(x, 0); + bytes.extend_from_slice(&torn); + } + // Plant a valid continuation (lsn base+1) within/beyond the scan bound. + let bound = ref_scan_bound(max); + let end = (x as u64) + 8 + bound; + let cont_off = if within { + ((end / 8) * 8) as usize // largest 8-aligned start <= end + } else { + (((end / 8) * 8) + 8) as usize // first strictly beyond + }; + let mut cont = Vec::new(); + fuzzing::encode_record_into(&mut cont, base + 1, &p(8)); + let needed = cont_off + cont.len(); + if needed <= seg as usize { + if bytes.len() < needed { + bytes.resize(needed, 0); + } + bytes[cont_off..cont_off + cont.len()].copy_from_slice(&cont); + bytes.resize(seg as usize, 0); + for &active in &[true, false] { + let label = if within { + "cont-within-bound" + } else { + "cont-beyond-bound" + }; + h.check(label, &bytes, base, active, seg, max); + } + } + } + + // 9. Reserved rec_type on record 1 of 2 (CRC fixed) ⇒ UnknownRecType. + // active ⇒ TornMidLog (valid record 2 follows); sealed ⇒ Corruption. + { + let (mut bytes, offs) = build_segment(base, &[&p(8), &p(8)]); + let at = offs[0]; + bytes[at + REC_TYPE_OFF] = 3; + refix_crc(&mut bytes, at, framed_size(8)); + bytes.resize(seg as usize, 0); + for &active in &[true, false] { + h.check("reserved-interior", &bytes, base, active, seg, max); + } + } + + // 10. length > max_record_size at the first record ⇒ Invalid boundary. + { + let (mut bytes, offs) = build_segment(base, &[&p(8), &p(8)]); + let at = offs[0]; + let huge = max.wrapping_add(1); + bytes[at + 4..at + 8].copy_from_slice(&huge.to_le_bytes()); + bytes.resize(seg as usize, 0); + for &active in &[true, false] { + h.check("len-over-max", &bytes, base, active, seg, max); + } + } + + // 11. A non-1 base (offsets/max_lsn must track it) with a torn tail. + { + let b2 = 1000u64; + let (mut bytes, offs) = build_segment(b2, &[&p(8), &p(8)]); + let last = *offs.last().unwrap(); + bytes[last] ^= 0xFF; + bytes.resize(seg as usize, 0); + for &active in &[true, false] { + h.check("nonone-base-torn", &bytes, b2, active, seg, max); + } + } + } +} + +#[test] +fn differential_scenario_matrix() { + let mut h = Harness::new(); + scenario_cases(&mut h); + assert!( + h.checked > 100, + "expected a broad scenario matrix, ran {}", + h.checked + ); + eprintln!("differential scenario matrix: {} cases agreed", h.checked); +} + +#[test] +fn differential_over_fuzz_corpora() { + // The committed (Task-1 regrown) corpora, consumed as raw segment bodies. + // Config chosen so a body up to ~64 KiB fits after the header. + const SEG: u64 = 65536; + const MAX: u32 = 4096; + let mut h = Harness::new(); + + for sub in ["recovery", "structure", "decode", "model"] { + let dir = Path::new(env!("CARGO_MANIFEST_DIR")) + .join("fuzz/corpus") + .join(sub); + let entries = match std::fs::read_dir(&dir) { + Ok(e) => e, + Err(_) => continue, // corpus dir absent in some checkouts — skip, not fail + }; + for entry in entries.flatten() { + let raw = match std::fs::read(entry.path()) { + Ok(b) => b, + Err(_) => continue, + }; + // Build a valid header + the corpus bytes as the record body, capped + // to the segment body capacity. Both parsers see identical bytes. + let cap = (SEG as usize) - HEADER_SIZE; + let body = &raw[..raw.len().min(cap)]; + let mut bytes = fuzzing::segment_header_bytes(1); + bytes.extend_from_slice(body); + for &active in &[true, false] { + h.check(&format!("corpus/{sub}"), &bytes, 1, active, SEG, MAX); + } + } + } + eprintln!( + "differential over fuzz corpora: {} inputs agreed", + h.checked + ); + // Not asserting a minimum count: a fresh checkout may have a thin corpus. The + // scenario matrix carries the exact-match coverage; this pass adds breadth. +}