From 977c732151b970d88c3013154258f0d35c496fea Mon Sep 17 00:00:00 2001 From: Claude Date: Sun, 28 Jun 2026 12:14:22 +0000 Subject: [PATCH 1/2] =?UTF-8?q?M9=20F3:=20structure-aware=20tail-vs-corrup?= =?UTF-8?q?tion=20fuzz=20(=C2=A714.5,=20D4/D5/D6/D10)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Third M9 slice. F3 is the target with teeth on the recovery classifier: it builds a VALID dense single segment (the harness owns the correct CRCs via the `fuzzing` generators, so the deep classifier states a blind byte fuzzer never reaches are hit every run), applies ONE fuzzer-chosen localized mutation at a chosen record, then drives the real public `Wal::open`. - fuzz/fuzz_targets/structure.rs. Mutation menu: flip CRC / flip a CRC-covered body byte / zero rec_type (-> sentinel) / extend length / tamper padding / reserved rec_type + re-CRC (via the public crc32c). Sharp classifier oracle: * invalid INTERIOR record (a valid record still follows) => MUST be fatal TornMidLog/Corruption (D5 -- never silent truncation); * invalid LAST record => MUST truncate at its offset (D4); * rec_type->0 => sentinel / clean end. Plus: the surviving suffix is a dense, byte-identical prefix of the built records (D6/D10 -- nothing past the cut, no mutated/garbage bytes); an idempotent reopen presents a clean tail (D7, durable zeroing); the forward scan stays within scan_bound (D11). - Corpus cargo-fuzz cmin'd (111 entries). - CI: `structure` added to fuzz.yml matrix; per-PR smoke in ci.yml now runs F1+F2+F3 (renamed "fuzz smoke (F1/F2/F3)"); a crash reds the PR. Falsifiability: forcing forward_scan_finds_valid to always report "no continuation" (the classic D5 bug -- mid-log corruption silently truncated) trips `D5: interior corruption returned Ok (silent truncation!)`, then reverted. No `src/` change (git diff src/ empty). Built + smoke-green (150000 runs, exit 0, zero crashes, cov 561); cargo fmt --check, clippy --all-targets -D warnings, cargo test, actionlint all green. N-CPU-hour release gate stays OPEN. Co-Authored-By: Claude Opus 4.8 Claude-Session: https://claude.ai/code/session_01Rpbwt9JT56hQvVXiqTS131 --- .github/workflows/ci.yml | 15 +- .github/workflows/fuzz.yml | 4 +- CLAUDE.md | 1 + docs/wal_design_v6.md | 4 +- fuzz/Cargo.toml | 7 + fuzz/README.md | 3 +- .../042c3eab61390f6689c5e42aeb4ad0ff9f97340d | Bin 0 -> 27 bytes .../0610af0a0d967c9cef07077287855f4a51ab0e28 | Bin 0 -> 27 bytes .../087e7a2387fd0357f3d4a96af65aca5ccdacf37c | Bin 0 -> 38 bytes .../09d28000e92ae94fdd997b54a6632b469faac183 | Bin 0 -> 34 bytes .../0e298300aac04bafdc922ed58830adf44c90e599 | Bin 0 -> 46 bytes .../0e50a80f671fedb8356f70954b38d6cb0b055bd1 | Bin 0 -> 41 bytes .../11ecca55db4ea38e84a31f35fc7281412611d791 | Bin 0 -> 27 bytes .../130ce4fcec056d98ac7dca28edf11bc6aa318978 | Bin 0 -> 27 bytes .../155d9d534df0fecb8ddffaf77b57d13539a09c22 | Bin 0 -> 510 bytes .../15d9308cfbe361835dcdb293fd68ae825a34d1ea | Bin 0 -> 27 bytes .../177cce554b8b51dabe7ed14a40940bfed4f88ef7 | 1 + .../1a370de08947b72a112300539475a420f9e9add6 | Bin 0 -> 27 bytes .../1b119fbe420151de1687c1a544f857c2cd3ca537 | Bin 0 -> 27 bytes .../1d0a3fab5c4de4d391d1ecfa53cf622b05e3329c | Bin 0 -> 28 bytes .../2262c8329be7e42bbe0f27ae2f5efe6ed856c397 | Bin 0 -> 30 bytes .../226513999f92baac4000544d766b7e4f01fef052 | Bin 0 -> 176 bytes .../22e28352faa724c18e45d2144c43986a33ed005e | Bin 0 -> 193 bytes .../24873c3f1a2639529d3ea0f8a436270f0deb3505 | Bin 0 -> 30 bytes .../27b13a0e5d083250b99134f64e884ae43c92eb69 | Bin 0 -> 27 bytes .../28ca2827d34c1e7d53d7397cd620969d86976618 | Bin 0 -> 262 bytes .../2a43dc6238ace462de3f6f45ca40d86b05bfbc69 | 1 + .../322e52e5c88df0409bc11553cd2b1e9cc1918bcb | Bin 0 -> 30 bytes .../3406a81ea6530f38ec9723b612cb62ceb97a6e50 | Bin 0 -> 601 bytes .../3507bfe5491064e512095e65d76acb508ee248a0 | Bin 0 -> 27 bytes .../35118fa6bddd04d45f88474ca578fdd2ae94ba29 | Bin 0 -> 33 bytes .../36b0b457c1d5e8ec8b34bf041bfb495189472b33 | Bin 0 -> 27 bytes .../3700f023f5d096ca32d73cb31c66b23538f621de | Bin 0 -> 27 bytes .../3ab949789ff0abc1bc992a45b716e15b01e53227 | Bin 0 -> 138 bytes .../3b6c9dcb63ffb5ed9b4ceb43a4446d08ad1b8256 | Bin 0 -> 86 bytes .../3bd6faf14861911447e869474d909beed3844e0b | Bin 0 -> 29 bytes .../3dff0e013875cecdf76336625e71d971b3f9f1c0 | Bin 0 -> 27 bytes .../3ea0580f6a7c1a6f04c550765155309ee3ec239e | Bin 0 -> 29 bytes .../4275f9e684ea52876c457e2cd9c15de5c60a787c | Bin 0 -> 27 bytes .../447480bb65af95f4d00a8706b55e2384ce46efdc | Bin 0 -> 29 bytes .../467d6d9e020535f7d24a2f0b29df2ef4cf0ca7bd | Bin 0 -> 43 bytes .../49237d1e1749e661d21d92abd673e468b9560c7c | Bin 0 -> 68 bytes .../4969e681a3347db88b99b7dfc4bbe6993cd8ea6e | Bin 0 -> 46 bytes .../50acbbcece7605b738622515d15c353f66edd404 | Bin 0 -> 32 bytes .../52866320303072e5071cf58971044cb211ce7886 | Bin 0 -> 46 bytes .../52b4780aa46782439b6affca1e9857029a83875a | Bin 0 -> 27 bytes .../54794d10b78c852a2401d2e3d718448a9cf63969 | Bin 0 -> 27 bytes .../55d9b777feb74c9b3543adf197a197bbc8c99f1b | Bin 0 -> 51 bytes .../5ae8205f0209fbd5dde4d8d6d0bc0e25f41dda5d | Bin 0 -> 27 bytes .../5c970256d86d12c162554fff71682ec1f3804b60 | Bin 0 -> 28 bytes .../5eb73d7aee2f8b2b3194682ef92c21476f3a3dcf | Bin 0 -> 81 bytes .../6167d93bc6aed6fef01e4c1c9a0282558aee8d50 | Bin 0 -> 28 bytes .../62807f033d32099981eb3ce894ce464d0cbe4468 | Bin 0 -> 29 bytes .../64707a29d50aa040bf54bd165499f12b3c70ea1a | Bin 0 -> 30 bytes .../65d03999d7e33c6fde61911dd9aa580703bf67c2 | Bin 0 -> 84 bytes .../6814137aab2d2ade0be401a7598b3297f5f27f3a | Bin 0 -> 30 bytes .../69fa26bf3b8b698b33220c2116017e75eb8b25d6 | Bin 0 -> 81 bytes .../6b140356a217f605c361b1eb341e434180233493 | Bin 0 -> 27 bytes .../6c46bcba457bbe201897b0b15a538c114612b090 | Bin 0 -> 114 bytes .../7350b8cd28ebe4a1654b5b08335be83895f5e635 | Bin 0 -> 27 bytes .../739ceab5acd0bea54eabc185ff3b0fdfa2156fbf | Bin 0 -> 128 bytes .../73ed03711ab6c7f8326ca100b984f36963adba4c | Bin 0 -> 27 bytes .../75631e6904d2e59e63545e3df4ecdcc974b38190 | Bin 0 -> 34 bytes .../75ed8005ee00dc55bf226b02420942332fdb7d90 | Bin 0 -> 129 bytes .../76f7ff2587831bc416bccad4c554dcd4570fd8b4 | Bin 0 -> 64 bytes .../79b75c6d7fd89e7c949bde1d8f2635bb590a8e18 | Bin 0 -> 27 bytes .../7bb556e341b903591c498d2843197c357b238e10 | Bin 0 -> 27 bytes .../7bc0a73e7004b6eab650b2f5fbef8c55ab770c3f | Bin 0 -> 880 bytes .../7e8f9ac226738b178b92f41789f6cb57e2aea18f | Bin 0 -> 31 bytes .../80cb0ca0a7c5c91e1d77f8218d6531d6ac1841e4 | Bin 0 -> 334 bytes .../81bccba3c7bd64152057250241d946d6516187bd | Bin 0 -> 170 bytes .../82beaa5bb6a8a694e95ac4da97dcb12671fb1de1 | Bin 0 -> 125 bytes .../842abc768b13b4adf20a4072c207ae5d01396a43 | Bin 0 -> 37 bytes .../855051e3744afbca95669a252a521773686cf642 | Bin 0 -> 28 bytes .../8f550e5caadd1648fa92ea05c3d10b7a044e26aa | Bin 0 -> 85 bytes .../8ffb29c5f04b9b95c9d100788d5938c9da30492d | Bin 0 -> 29 bytes .../939e9708d7d2c978bf71ac1b0f0f3cea00fb07f7 | Bin 0 -> 27 bytes .../93fdb912ff393e72b2d4b41b2600316fee28c9f2 | Bin 0 -> 29 bytes .../95c32f32b7c04ad8b0d1abf7436f8b885908e3cd | Bin 0 -> 29 bytes .../96a43307bbd856c782c6a7dd4543416e0f5e215a | Bin 0 -> 27 bytes .../96e80dc54ac22a75ed44fb3d0d6f15dfc862c950 | Bin 0 -> 306 bytes .../970051bac6e1bd06e7e2f6c660c0645364b83462 | Bin 0 -> 46 bytes .../9764da1c4fe0ea5e7f1c74fa426e25bd988f227e | Bin 0 -> 30 bytes .../97cac2eb9be831bae35f997da7ed8251e115816a | Bin 0 -> 28 bytes .../99937979861509d74572670eb66b4321126fde80 | Bin 0 -> 100 bytes .../9d3f2d1e2f3ae5ae3fe4318a8478db6b12edf5f3 | Bin 0 -> 313 bytes .../9de5542255fe98abc2ff7c0df74b523f99d0f68a | Bin 0 -> 32 bytes .../a17fabc5f446207740cc91a1ab36545a09fa2ba8 | Bin 0 -> 58 bytes .../a23c2ee7aaa44db0d85d6854d2c6dc82a251f747 | Bin 0 -> 512 bytes .../a3c440ac8e42d4abf4182be76406df25a4363c48 | Bin 0 -> 74 bytes .../a3e6d0396be8359ae0825f2ba1a230b80dca2250 | Bin 0 -> 80 bytes .../a461aa184b16964471434c7722d1ea171182984f | Bin 0 -> 27 bytes .../a63ad1a5a86b0b99cbf7bd6a5b9d8a774f6dcab8 | Bin 0 -> 27 bytes .../ae13c6bdc21c067b764bf080c3ce545d857c5956 | Bin 0 -> 94 bytes .../b1610389177fa92ebe1c913b0c16195b3cbe70db | Bin 0 -> 33 bytes .../b2c7c60de0e6c20399d21d0523d34d61c921e69e | Bin 0 -> 27 bytes .../bbea107c5ab1993645f49ae2731089f4eed01a4d | Bin 0 -> 535 bytes .../bfae9d5068c322466034c012d63aedea14ee8b5a | Bin 0 -> 30 bytes .../bfc28fbe9fdd05f105b5e7ed1c5fa79b2ee9c290 | Bin 0 -> 187 bytes .../c443fab8b4ec0df114e0d3e6c12e70b6a34816fc | Bin 0 -> 31 bytes .../c4a7b71ff70378200b354b35b984c00e14fc482a | Bin 0 -> 62 bytes .../cc1ace770a7a022dccb17a780155747545399218 | Bin 0 -> 27 bytes .../d06c3ff8e3ea85766877e03cc169d7ee4e37f589 | Bin 0 -> 344 bytes .../d6c8aeed118282b3285aa39906dd304daef1277d | Bin 0 -> 33 bytes .../d8eaa8a080a625fb63a39905a5d410710a36883b | Bin 0 -> 54 bytes .../d9bfb773d3007e5025b7c301648d8315ca8e04e0 | Bin 0 -> 61 bytes .../dd405e296d5681e4bdb690f1f34597c67b62eab3 | Bin 0 -> 200 bytes .../dda48b05b3eae4676ab834c7eef007c36f6cd50a | Bin 0 -> 991 bytes .../de93e8d8347dcc923577843f0db08572f072f666 | Bin 0 -> 28 bytes .../e91eb76e19d6a3029756a9469c2952a52f5c1de9 | Bin 0 -> 28 bytes .../ea3dbd6575ffea244c156ef748b2808910347d1a | Bin 0 -> 215 bytes .../f458f65575c8a52555c9c8ef52b6fc78a5a2aa38 | Bin 0 -> 494 bytes .../f51ffeee1ce264a5a9e44a39c152d9434b050eb3 | Bin 0 -> 27 bytes .../f6004d89499154ec3e18efe7e6b25b1651258486 | Bin 0 -> 28 bytes .../faf1d3e5bfead395a1d3c1c8fb4611019b98926a | Bin 0 -> 27 bytes .../fc28fa89d863e289e8be5a2dd383edbb4303c714 | Bin 0 -> 174 bytes .../fc587f0f172a220dc2b252fba1fce12a7154d896 | Bin 0 -> 301 bytes fuzz/fuzz_targets/structure.rs | 284 ++++++++++++++++++ 118 files changed, 309 insertions(+), 11 deletions(-) create mode 100644 fuzz/corpus/structure/042c3eab61390f6689c5e42aeb4ad0ff9f97340d create mode 100644 fuzz/corpus/structure/0610af0a0d967c9cef07077287855f4a51ab0e28 create mode 100644 fuzz/corpus/structure/087e7a2387fd0357f3d4a96af65aca5ccdacf37c create mode 100644 fuzz/corpus/structure/09d28000e92ae94fdd997b54a6632b469faac183 create mode 100644 fuzz/corpus/structure/0e298300aac04bafdc922ed58830adf44c90e599 create mode 100644 fuzz/corpus/structure/0e50a80f671fedb8356f70954b38d6cb0b055bd1 create mode 100644 fuzz/corpus/structure/11ecca55db4ea38e84a31f35fc7281412611d791 create mode 100644 fuzz/corpus/structure/130ce4fcec056d98ac7dca28edf11bc6aa318978 create mode 100644 fuzz/corpus/structure/155d9d534df0fecb8ddffaf77b57d13539a09c22 create mode 100644 fuzz/corpus/structure/15d9308cfbe361835dcdb293fd68ae825a34d1ea create mode 100644 fuzz/corpus/structure/177cce554b8b51dabe7ed14a40940bfed4f88ef7 create mode 100644 fuzz/corpus/structure/1a370de08947b72a112300539475a420f9e9add6 create mode 100644 fuzz/corpus/structure/1b119fbe420151de1687c1a544f857c2cd3ca537 create mode 100644 fuzz/corpus/structure/1d0a3fab5c4de4d391d1ecfa53cf622b05e3329c create mode 100644 fuzz/corpus/structure/2262c8329be7e42bbe0f27ae2f5efe6ed856c397 create mode 100644 fuzz/corpus/structure/226513999f92baac4000544d766b7e4f01fef052 create mode 100644 fuzz/corpus/structure/22e28352faa724c18e45d2144c43986a33ed005e create mode 100644 fuzz/corpus/structure/24873c3f1a2639529d3ea0f8a436270f0deb3505 create mode 100644 fuzz/corpus/structure/27b13a0e5d083250b99134f64e884ae43c92eb69 create mode 100644 fuzz/corpus/structure/28ca2827d34c1e7d53d7397cd620969d86976618 create mode 100644 fuzz/corpus/structure/2a43dc6238ace462de3f6f45ca40d86b05bfbc69 create mode 100644 fuzz/corpus/structure/322e52e5c88df0409bc11553cd2b1e9cc1918bcb create mode 100644 fuzz/corpus/structure/3406a81ea6530f38ec9723b612cb62ceb97a6e50 create mode 100644 fuzz/corpus/structure/3507bfe5491064e512095e65d76acb508ee248a0 create mode 100644 fuzz/corpus/structure/35118fa6bddd04d45f88474ca578fdd2ae94ba29 create mode 100644 fuzz/corpus/structure/36b0b457c1d5e8ec8b34bf041bfb495189472b33 create mode 100644 fuzz/corpus/structure/3700f023f5d096ca32d73cb31c66b23538f621de create mode 100644 fuzz/corpus/structure/3ab949789ff0abc1bc992a45b716e15b01e53227 create mode 100644 fuzz/corpus/structure/3b6c9dcb63ffb5ed9b4ceb43a4446d08ad1b8256 create mode 100644 fuzz/corpus/structure/3bd6faf14861911447e869474d909beed3844e0b create mode 100644 fuzz/corpus/structure/3dff0e013875cecdf76336625e71d971b3f9f1c0 create mode 100644 fuzz/corpus/structure/3ea0580f6a7c1a6f04c550765155309ee3ec239e create mode 100644 fuzz/corpus/structure/4275f9e684ea52876c457e2cd9c15de5c60a787c create mode 100644 fuzz/corpus/structure/447480bb65af95f4d00a8706b55e2384ce46efdc create mode 100644 fuzz/corpus/structure/467d6d9e020535f7d24a2f0b29df2ef4cf0ca7bd create mode 100644 fuzz/corpus/structure/49237d1e1749e661d21d92abd673e468b9560c7c create mode 100644 fuzz/corpus/structure/4969e681a3347db88b99b7dfc4bbe6993cd8ea6e create mode 100644 fuzz/corpus/structure/50acbbcece7605b738622515d15c353f66edd404 create mode 100644 fuzz/corpus/structure/52866320303072e5071cf58971044cb211ce7886 create mode 100644 fuzz/corpus/structure/52b4780aa46782439b6affca1e9857029a83875a create mode 100644 fuzz/corpus/structure/54794d10b78c852a2401d2e3d718448a9cf63969 create mode 100644 fuzz/corpus/structure/55d9b777feb74c9b3543adf197a197bbc8c99f1b create mode 100644 fuzz/corpus/structure/5ae8205f0209fbd5dde4d8d6d0bc0e25f41dda5d create mode 100644 fuzz/corpus/structure/5c970256d86d12c162554fff71682ec1f3804b60 create mode 100644 fuzz/corpus/structure/5eb73d7aee2f8b2b3194682ef92c21476f3a3dcf create mode 100644 fuzz/corpus/structure/6167d93bc6aed6fef01e4c1c9a0282558aee8d50 create mode 100644 fuzz/corpus/structure/62807f033d32099981eb3ce894ce464d0cbe4468 create mode 100644 fuzz/corpus/structure/64707a29d50aa040bf54bd165499f12b3c70ea1a create mode 100644 fuzz/corpus/structure/65d03999d7e33c6fde61911dd9aa580703bf67c2 create mode 100644 fuzz/corpus/structure/6814137aab2d2ade0be401a7598b3297f5f27f3a create mode 100644 fuzz/corpus/structure/69fa26bf3b8b698b33220c2116017e75eb8b25d6 create mode 100644 fuzz/corpus/structure/6b140356a217f605c361b1eb341e434180233493 create mode 100644 fuzz/corpus/structure/6c46bcba457bbe201897b0b15a538c114612b090 create mode 100644 fuzz/corpus/structure/7350b8cd28ebe4a1654b5b08335be83895f5e635 create mode 100644 fuzz/corpus/structure/739ceab5acd0bea54eabc185ff3b0fdfa2156fbf create mode 100644 fuzz/corpus/structure/73ed03711ab6c7f8326ca100b984f36963adba4c create mode 100644 fuzz/corpus/structure/75631e6904d2e59e63545e3df4ecdcc974b38190 create mode 100644 fuzz/corpus/structure/75ed8005ee00dc55bf226b02420942332fdb7d90 create mode 100644 fuzz/corpus/structure/76f7ff2587831bc416bccad4c554dcd4570fd8b4 create mode 100644 fuzz/corpus/structure/79b75c6d7fd89e7c949bde1d8f2635bb590a8e18 create mode 100644 fuzz/corpus/structure/7bb556e341b903591c498d2843197c357b238e10 create mode 100644 fuzz/corpus/structure/7bc0a73e7004b6eab650b2f5fbef8c55ab770c3f create mode 100644 fuzz/corpus/structure/7e8f9ac226738b178b92f41789f6cb57e2aea18f create mode 100644 fuzz/corpus/structure/80cb0ca0a7c5c91e1d77f8218d6531d6ac1841e4 create mode 100644 fuzz/corpus/structure/81bccba3c7bd64152057250241d946d6516187bd create mode 100644 fuzz/corpus/structure/82beaa5bb6a8a694e95ac4da97dcb12671fb1de1 create mode 100644 fuzz/corpus/structure/842abc768b13b4adf20a4072c207ae5d01396a43 create mode 100644 fuzz/corpus/structure/855051e3744afbca95669a252a521773686cf642 create mode 100644 fuzz/corpus/structure/8f550e5caadd1648fa92ea05c3d10b7a044e26aa create mode 100644 fuzz/corpus/structure/8ffb29c5f04b9b95c9d100788d5938c9da30492d create mode 100644 fuzz/corpus/structure/939e9708d7d2c978bf71ac1b0f0f3cea00fb07f7 create mode 100644 fuzz/corpus/structure/93fdb912ff393e72b2d4b41b2600316fee28c9f2 create mode 100644 fuzz/corpus/structure/95c32f32b7c04ad8b0d1abf7436f8b885908e3cd create mode 100644 fuzz/corpus/structure/96a43307bbd856c782c6a7dd4543416e0f5e215a create mode 100644 fuzz/corpus/structure/96e80dc54ac22a75ed44fb3d0d6f15dfc862c950 create mode 100644 fuzz/corpus/structure/970051bac6e1bd06e7e2f6c660c0645364b83462 create mode 100644 fuzz/corpus/structure/9764da1c4fe0ea5e7f1c74fa426e25bd988f227e create mode 100644 fuzz/corpus/structure/97cac2eb9be831bae35f997da7ed8251e115816a create mode 100644 fuzz/corpus/structure/99937979861509d74572670eb66b4321126fde80 create mode 100644 fuzz/corpus/structure/9d3f2d1e2f3ae5ae3fe4318a8478db6b12edf5f3 create mode 100644 fuzz/corpus/structure/9de5542255fe98abc2ff7c0df74b523f99d0f68a create mode 100644 fuzz/corpus/structure/a17fabc5f446207740cc91a1ab36545a09fa2ba8 create mode 100644 fuzz/corpus/structure/a23c2ee7aaa44db0d85d6854d2c6dc82a251f747 create mode 100644 fuzz/corpus/structure/a3c440ac8e42d4abf4182be76406df25a4363c48 create mode 100644 fuzz/corpus/structure/a3e6d0396be8359ae0825f2ba1a230b80dca2250 create mode 100644 fuzz/corpus/structure/a461aa184b16964471434c7722d1ea171182984f create mode 100644 fuzz/corpus/structure/a63ad1a5a86b0b99cbf7bd6a5b9d8a774f6dcab8 create mode 100644 fuzz/corpus/structure/ae13c6bdc21c067b764bf080c3ce545d857c5956 create mode 100644 fuzz/corpus/structure/b1610389177fa92ebe1c913b0c16195b3cbe70db create mode 100644 fuzz/corpus/structure/b2c7c60de0e6c20399d21d0523d34d61c921e69e create mode 100644 fuzz/corpus/structure/bbea107c5ab1993645f49ae2731089f4eed01a4d create mode 100644 fuzz/corpus/structure/bfae9d5068c322466034c012d63aedea14ee8b5a create mode 100644 fuzz/corpus/structure/bfc28fbe9fdd05f105b5e7ed1c5fa79b2ee9c290 create mode 100644 fuzz/corpus/structure/c443fab8b4ec0df114e0d3e6c12e70b6a34816fc create mode 100644 fuzz/corpus/structure/c4a7b71ff70378200b354b35b984c00e14fc482a create mode 100644 fuzz/corpus/structure/cc1ace770a7a022dccb17a780155747545399218 create mode 100644 fuzz/corpus/structure/d06c3ff8e3ea85766877e03cc169d7ee4e37f589 create mode 100644 fuzz/corpus/structure/d6c8aeed118282b3285aa39906dd304daef1277d create mode 100644 fuzz/corpus/structure/d8eaa8a080a625fb63a39905a5d410710a36883b create mode 100644 fuzz/corpus/structure/d9bfb773d3007e5025b7c301648d8315ca8e04e0 create mode 100644 fuzz/corpus/structure/dd405e296d5681e4bdb690f1f34597c67b62eab3 create mode 100644 fuzz/corpus/structure/dda48b05b3eae4676ab834c7eef007c36f6cd50a create mode 100644 fuzz/corpus/structure/de93e8d8347dcc923577843f0db08572f072f666 create mode 100644 fuzz/corpus/structure/e91eb76e19d6a3029756a9469c2952a52f5c1de9 create mode 100644 fuzz/corpus/structure/ea3dbd6575ffea244c156ef748b2808910347d1a create mode 100644 fuzz/corpus/structure/f458f65575c8a52555c9c8ef52b6fc78a5a2aa38 create mode 100644 fuzz/corpus/structure/f51ffeee1ce264a5a9e44a39c152d9434b050eb3 create mode 100644 fuzz/corpus/structure/f6004d89499154ec3e18efe7e6b25b1651258486 create mode 100644 fuzz/corpus/structure/faf1d3e5bfead395a1d3c1c8fb4611019b98926a create mode 100644 fuzz/corpus/structure/fc28fa89d863e289e8be5a2dd383edbb4303c714 create mode 100644 fuzz/corpus/structure/fc587f0f172a220dc2b252fba1fce12a7154d896 create mode 100644 fuzz/fuzz_targets/structure.rs diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 2c8cc48..8a384a8 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -67,12 +67,13 @@ jobs: fuzz-smoke: # M9 (§14.5): a SHORT per-PR smoke of the fuzz targets (F1 recovery, F2 - # decoder). This lane is real and BLOCKING — a reproducible crash here is a - # genuine D11 bug and must red the PR (not advisory). It is distinct from the - # time-boxed nightly fuzz lane (fuzz.yml) and from any H1 sign-off: M9 fuzz - # findings gate M9-relevant PRs; they never red an H1 dispatch run. The full - # N-CPU-hour §14.13 release gate runs on a dedicated runner, not here. - name: fuzz smoke (F1/F2) + # decoder, F3 structure-aware classifier). This lane is real and BLOCKING — a + # reproducible crash here is a genuine D4/D5/D10/D11 bug and must red the PR + # (not advisory). It is distinct from the time-boxed nightly fuzz lane + # (fuzz.yml) and from any H1 sign-off: M9 fuzz findings gate M9-relevant PRs; + # they never red an H1 dispatch run. The full N-CPU-hour §14.13 release gate + # runs on a dedicated runner, not here. + name: fuzz smoke (F1/F2/F3) runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 @@ -98,6 +99,8 @@ jobs: run: cargo +nightly fuzz run recovery --target x86_64-unknown-linux-gnu -- -runs=20000 -rss_limit_mb=4096 - name: F2 decode smoke (bounded; a crash reds the PR) run: cargo +nightly fuzz run decode --target x86_64-unknown-linux-gnu -- -runs=40000 -rss_limit_mb=4096 + - name: F3 structure smoke (bounded; a crash reds the PR) + run: cargo +nightly fuzz run structure --target x86_64-unknown-linux-gnu -- -runs=8000 -rss_limit_mb=4096 dirfsync-presence: # M8 §14.4d Tier 1 (PRIMARY): the deterministic, FS-independent regression diff --git a/.github/workflows/fuzz.yml b/.github/workflows/fuzz.yml index eaa0b0c..68a16b5 100644 --- a/.github/workflows/fuzz.yml +++ b/.github/workflows/fuzz.yml @@ -35,8 +35,8 @@ jobs: strategy: fail-fast: false matrix: - # F3–F4 are appended here as their slices land. - target: [recovery, decode] + # F4 is appended here as its slice lands. + target: [recovery, decode, structure] steps: - uses: actions/checkout@v4 - name: Install nightly toolchain diff --git a/CLAUDE.md b/CLAUDE.md index 2e2fd41..d131424 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -109,6 +109,7 @@ The entire value of this component is **correct behavior under crashes and fault ## Project status (keep this updated) +- **LATEST (2026-06-28): M9 slice 3 — F3 structure-aware classifier fuzz LANDED, built + smoke-green here; gate stays OPEN.** New `fuzz/fuzz_targets/structure.rs` (third cargo-fuzz bin). Builds a **valid dense single segment** (the harness owns the correct CRCs via the `fuzzing` generators, so the deep classifier states a blind byte fuzzer never reaches are hit every run), applies ONE fuzzer-chosen **localized mutation** at a chosen record (flip CRC / flip a CRC-covered body byte / zero `rec_type`→sentinel / extend `length` / tamper padding / reserved `rec_type`+re-CRC via the public `crc32c`), then drives the **real public `Wal::open`**. **Sharp classifier oracle:** invalid **interior** record (a valid record still follows) ⇒ MUST be fatal `TornMidLog`/`Corruption` (**D5**, never silent truncation); invalid **last** record ⇒ MUST truncate at its offset (**D4**); `rec_type`→0 ⇒ sentinel/clean-end. Plus the surviving suffix is a dense **byte-identical** prefix of the built records (**D6/D10**), an **idempotent reopen** yields a clean tail (**D7**, durable zeroing), and the forward scan stays within `scan_bound` (**D11**). **Smoke-green: 150 000 runs, exit 0, zero crashes** (cov 561); corpus `cargo fuzz cmin`'d (111 entries). **Falsifiability shown**: forcing `forward_scan_finds_valid` to always report "no continuation" (the classic D5 bug — mid-log corruption silently truncated) trips `D5: interior corruption returned Ok (silent truncation!)`, then reverted. **No `src/` change** (`git diff src/` empty). CI: `structure` added to `fuzz.yml` matrix + the per-PR smoke in `ci.yml` (now `fuzz smoke (F1/F2/F3)`, a crash reds the M9 PR). `cargo fmt --check`, `clippy --all-targets -D warnings`, `cargo test`, `actionlint` green. **Still NOT done in M9:** F4 (op-script oracle); Miri; `!Sync` trybuild + dir-lock; loom; soak; CI-matrix tidy-up; the N-CPU-hour release-gate observation. - **LATEST (2026-06-28): M9 slice 2 — F2 single-record decoder fuzz LANDED, built + smoke-green here; gate stays OPEN.** New `fuzz/fuzz_targets/decode.rs` (second cargo-fuzz bin). The **raw fuzzer bytes are the decode buffer** (no `arbitrary` envelope — chosen after the struct-`Arbitrary` byte layout proved fragile; the corpus is now just record bytes), decoded against a boundary-biased `max_record_size` set `{0,1,7,8,64,4096,1<<20,u32::MAX}` so the length bound is hit from both sides for any record the buffer encodes — **including `max < payload`, which keeps the payload-bound assertion non-vacuous**. Asserts bounds-soundness on any returned record: `payload_len ≤ max`, `framed_len ≤ buf.len()`, `framed_len ≥ 20`, 8-aligned, `20 + payload_len ≤ framed_len` (D11, record level). Because a blind byte fuzzer essentially never synthesizes a CRC-valid frame, the corpus is **seeded with genuine CRC-valid records** (a Python `crc32c` generator self-checked against the canonical `0xE3069283` vector so it matches the `crc32c` crate) + `cargo fuzz cmin` (17 entries); **falsifiability shown**: disabling the decoder's length bound trips `payload_len 5 exceeds max_record_size 0` on a valid seed, then reverted. **Smoke-green: 300 000 runs, exit 0, zero crashes**; **no `src/` change** (`git diff src/` empty). CI: `decode` added to `fuzz.yml` matrix + the per-PR smoke in `ci.yml` (renamed `fuzz smoke (F1/F2)`, both targets, a crash reds the M9 PR). `cargo fmt --check`, `clippy --all-targets -D warnings`, record unit tests green. **Still NOT done in M9:** F3 (structure-aware), F4 (op-script oracle); Miri; `!Sync` trybuild + dir-lock; loom; soak; CI-matrix tidy-up; the N-CPU-hour release-gate observation. - **LATEST (2026-06-27): M9 started — F1 recovery-parser fuzz LANDED (slice 1 of M9), built + smoke-green here; the N-CPU-hour gate stays OPEN.** New `fuzzing` Cargo feature (zero-cost when off) gates a `#[doc(hidden)] pub mod fuzzing` in `src/lib.rs` (exposes the internal parse entry points for the cargo-fuzz targets) and the **bounded-scan instrumentation** in `src/recovery.rs`. Per the designer's load-bearing fix, the `max_record_size + 28` bound is hoisted into one `recovery::scan_bound(max_record_size)` symbol used by **both** the real `forward_scan_finds_valid` loop's window **and** the in-loop `assert!`/thread-local probe — so the gate measures **production**, not a harness copy, and the bound cannot drift. New `fuzz/` cargo-fuzz crate (libFuzzer + `arbitrary` + ASan; standalone, never published): `fuzz/fuzz_targets/recovery.rs` (F1). **Primary surface is the real public `Wal::open`** over an adversarial *directory* of segment files — fuzzer-controlled filenames + `base_lsn`s (out-of-order/duplicate/gapped/`0`/malformed-name), valid-header dense bodies and pure garbage — so filename-parse → discovery → sort → header validation → §8.4 incomplete-highest discard → cross-segment continuity → `recover_segment` are all in the blast radius (D11/D2/contiguity), with a secondary single-file `recover_segment` probe asserting the bound directly. **Built with `cargo +nightly fuzz build` and smoke-green: 60 000 runs, exit 0, zero crashes**, corpus = the fuzzer-grown, `cargo fuzz cmin`-minimized coverage-preserving set (`fuzz/corpus/recovery/`, ~174 entries reaching the multi-segment-continuity coverage that hand-authored entropy seeds miss — per the designer's review note). **Falsifiability shown** (§14.0.3): widening the scan loop past `scan_bound` trips the in-loop `assert!` (`distance 4128 > 4124`), then reverted. **Framing (designer note, do not over-read):** the bounded-scan counter holds **structurally** (the loop window *is* `scan_bound`, so `distance ≤ scan_bound` for every input) — it is a **drift/regression guard**, not the headline; the substantive D11 proof in F1 is the **crash-free / no-OOB / no-unbounded-alloc / termination** surface over adversarial inputs. CI: new `.github/workflows/fuzz.yml` (nightly + dispatch, time-boxed, loud "contingent, NOT the N-CPU-hour gate" banner, uploads corpus/artifacts) + a **blocking per-PR smoke** in `ci.yml` (a reproducible crash reds the M9 PR — flag #3; never reds an H1 *dispatch* run). `cargo test` (no feature + `--features fuzzing`, 84 lib + all integration), `cargo clippy --all-targets -D warnings` (both configs), `cargo fmt --check`, MSRV 1.85 (both configs), `cargo build` (no feature ⇒ zero release impact) all green; `actionlint` clean on both workflows. **F4's crash model (when it lands) is the process-crash state machine, not power loss** — flag #2. **Still NOT done in M9:** F2 (decoder), F3 (structure-aware), F4 (op-script oracle); Miri; `!Sync` trybuild + dir-lock; loom publish-barrier; soak; CI-matrix tidy-up; and the F1 N-CPU-hour release-gate observation on a dedicated runner. - **LATEST (2026-06-25, PRs #20 + #21 off `main`): dm-flakey CI now RUNS, H3-physical PASSES, §14.4d is three-tier.** PR #20 (`claude/m8-dmflakey-ci-fixes`) fixes the hosted dm-flakey gate: provision `linux-modules-extra-$(uname -r)` + `modprobe dm_flakey` (dm-flakey **is** reachable on hosted Azure runners — no self-hosted runner needed), `cmd_check` queries `dmsetup targets` **as root**, and dm table reloads use `dmsetup suspend --noflush --nolockfs` in **both** `flakey_fault` and `flakey_up` (a default suspend's lockfs **freeze** is a full fs-sync that either EIO'd through the erroring target — misread as a §12 violation — or persisted the un-synced data before the drop, defeating the §14.4d controls). **Result: H3-physical ext4 PASSES** (source-confirmed block-layer EIO → §12 poison; evidence on issue #16). PR #21 (`claude/m8-dirfsync-tiers`, stacked on #20) resolves §14.4d per the designer: **the dir-fsync omission is NOT reproducible on ext4/xfs/btrfs** — those journaling FSes transitively persist a new file's dir entry on the segment's own `fsync` (AFSNCE OSDI '14, §18), masking it; `fsync_dir` is kept as a portable-durability safeguard. Three tiers: **Tier-1 (PRIMARY, per-PR, deterministic) = `scripts/m8/dirfsync-presence.sh`** straces the roll path, asserts correct issues the roll-time dir-`fsync` while `inject_no_dir_fsync` does not — **RUN+green here** (`correct=5` vs `inject=1`), wired into `ci.yml`; **Tier-2 = behavioral power-loss via a synchronized mid-run cut** (`src/bin/dirfsync_cut_workload.rs` rolls once, acks a record into the new segment, blocks with the dirent dirty; harness activates `drop_writes` *before* kill/umount, fsck, remount, verify) — **CLOSED as a DOCUMENTED NEGATIVE RESULT (PR #21, owner Fedora 43):** the inject build recovers fully on EVERY config tested — ext4/xfs/btrfs, journal-less ext4 (incl. `ext2`-format), and the last attempt, journaled ext4 `data=writeback` (the ext4 driver's weakest ordering; `data=writeback` weakens data ordering, not the metadata/dirent). The dirent reaches disk transitively via the file's own `fdatasync` everywhere. **Mechanism correction:** the earlier "ext2 block-adjacency" claim is RETRACTED — dmesg shows `ext2`-format is serviced by the **ext4 driver journal-less** on modern kernels (standalone ext2 driver removed in Linux 6.9); mechanism not isolated. No readily-available Linux FS exposes it behaviorally ⇒ honest negative result, not a gap. Tier-1 strace carries the DoD; `fsync_dir` retained as a POSIX-portability safeguard. (Note: `data=writeback` requires a journal — NOT combinable with `-O ^has_journal`.); **Tier-3 = ext4/xfs/btrfs INCONCLUSIVE-by-design** (informational, never red on a masked miss, still red on a correct-build data loss). dm-flakey harness also got `wipefs`/zero-before-mkfs + `udevadm settle` + `dmsetup remove --retry/-f --deferred` (fixes the back-to-back "device busy"). Docs corrected (design §14.4d note + §14.13 row, runbook three-tier, this block). `shellcheck`+`cargo fmt --check` clean; the strace gate is self-verified green. **§14.4d behavioral (Tier-2) is now CLOSED as a documented negative result** (Tier-1 satisfies the DoD). **Still owner/CI to observe:** H1 power-pull. diff --git a/docs/wal_design_v6.md b/docs/wal_design_v6.md index 7685b98..54de2ca 100644 --- a/docs/wal_design_v6.md +++ b/docs/wal_design_v6.md @@ -567,7 +567,7 @@ Construct the specific resurrection hazard: write records, induce a torn tail su ### 14.5 Fuzzing (`cargo-fuzz` / libFuzzer + `arbitrary`) - **F1 Recovery-parser fuzz (highest priority).** Arbitrary bytes as a segment file / directory of segments. Parser MUST never panic, never read OOB (verify under ASan/Miri), never infinite-loop, never allocate unboundedly, and the **forward scan MUST stay within its bound** (assert via an instrumented counter). Always terminates with `Ok(suffix)` or clean `Err` (**D11**). *(**M9 — IMPLEMENTED** as `fuzz/fuzz_targets/recovery.rs` (cargo-fuzz + libFuzzer + `arbitrary`, ASan). Its **primary** surface is the real public `Wal::open` driven over an adversarial directory of segment files — fuzzer-controlled filenames + `base_lsn`s (out-of-order/duplicate/gapped/`0`/malformed-name), valid-header dense bodies and pure garbage — so discovery → sort → header validation → the §8.4 incomplete-highest discard → cross-segment continuity → `recover_segment` are all under test; a secondary single-file `recover_segment` probe asserts the bound directly. The **bounded-scan counter** is instrumented on the **real** `forward_scan_finds_valid` loop (feature `fuzzing`, compiled out of release) and asserted against `recovery::scan_bound(max_record_size)` — the **same** symbol that bounds the loop, so the two cannot drift; falsifiability shown by widening the loop past `scan_bound` and watching the in-loop `assert!` trip (`distance 4128 > 4124`), then reverting. Built + smoke-green here (60 000 runs, exit 0, no crash); the **N-CPU-hour release gate stays OPEN** (`fuzz.yml` nightly/dispatch + a blocking per-PR smoke in `ci.yml`). **Framing (do not over-read):** the bounded-scan counter holds **structurally** — the loop is `while p <= end` with `end = start + scan_bound(..)`, so `distance ≤ scan_bound` for *every* input; the `assert!` can only ever trip on a future change that **decouples the loop window from `scan_bound`**. So it is a **drift/regression guard**, not the headline D11 finding. The substantive D11 proof in F1 is the **no-panic / no-OOB / no-unbounded-alloc / termination** surface over adversarial inputs — the crash-free fuzzing (60 000 runs now, the N-CPU-hour gate later).)* - **F2 Decoder fuzz** — single-record decoder in isolation. *(**M9 — IMPLEMENTED** as `fuzz/fuzz_targets/decode.rs`. The raw fuzzer bytes are the decode buffer (no `arbitrary` envelope ⇒ the corpus is just record bytes), decoded against a boundary-biased set of `max_record_size` thresholds `{0,1,7,8,64,4096,1<<20,u32::MAX}` so the length bound is exercised from both sides for any record the buffer encodes — including `max < payload`, which is what keeps the payload-bound assertion non-vacuous. Asserts bounds-soundness on any returned record: `payload_len ≤ max`, `framed_len ≤ buf.len()`, `framed_len ≥ 20`, 8-aligned, `20 + payload_len ≤ framed_len` (D11, record level). Because a blind byte fuzzer essentially never synthesizes a CRC-valid frame, the corpus is **seeded with genuine CRC-valid records** (built from the canonical `crc32c` vector) so the Record path is actually reached; falsifiability shown by disabling the decoder's length bound ⇒ `payload_len 5 exceeds max_record_size 0` on a valid seed, then reverted. Built + smoke-green (300 000 runs, exit 0, no crash); the N-CPU-hour gate stays OPEN. Per-PR smoke in `ci.yml`, time-boxed nightly in `fuzz.yml`.)* -- **F3 Structure-aware fuzz** — `arbitrary`-generated mostly-valid segments with localized mutations (flip CRC, extend length, zero a region, tamper padding), driving the tail-vs-corruption classifier. +- **F3 Structure-aware fuzz** — `arbitrary`-generated mostly-valid segments with localized mutations (flip CRC, extend length, zero a region, tamper padding), driving the tail-vs-corruption classifier. *(**M9 — IMPLEMENTED** as `fuzz/fuzz_targets/structure.rs`. Builds a valid dense single segment (the harness owns the correct CRCs, so the deep classifier states a blind fuzzer can't reach are hit every run), applies ONE localized mutation at a fuzzer-chosen record (flip CRC / flip a CRC-covered body byte / zero `rec_type`→sentinel / extend `length` / tamper padding / reserved `rec_type`+re-CRC), then drives the real public `Wal::open`. **Sharp classifier oracle:** an invalid **interior** record (a valid record still follows) MUST be **fatal** (`TornMidLog`/`Corruption`) — **D5**, never silent truncation; an invalid **last** record MUST truncate at its offset — **D4**; a `rec_type`→0 mutation is a sentinel ⇒ clean end. Plus: the surviving suffix is a dense, **byte-identical** prefix of the built records (**D6/D10** — nothing past the cut, no mutated/garbage bytes), an **idempotent reopen** presents a clean tail (**D7**, durable zeroing), and the forward scan stays within `scan_bound` (**D11**). Falsifiability shown: forcing the forward scan to always report "no continuation" (the classic D5 bug — mid-log corruption silently truncated) trips `D5: interior corruption returned Ok`, then reverted. Built + smoke-green (150 000 runs, exit 0, no crash); the N-CPU-hour gate stays OPEN; per-PR smoke in `ci.yml`, nightly in `fuzz.yml`.)* - **F4 Operation-script fuzz** — drive the §14.3 oracle harness from fuzzer-provided op scripts. **DEFERRED to M9** (like F1–F3); the §14.3 in-tree proptest harness (M6, `tests/model_oracle.rs`) is the interim generative coverage, and its executor (`tests/model/mod.rs::run`) is already proptest-free so the F4 target reuses it verbatim. - Maintain a corpus; run continuously; release gate: N CPU-hours, zero new crashes. @@ -646,7 +646,7 @@ Multi-hour randomized workload with periodic injected crashes+recoveries and che - Every enumerated crash point in §14.4c (including split-batch and roll sub-cases) has a test. - §14.4d negative control catches the injected bug **and** the correct build passes. *(M8 — **satisfied by Tier-1.** **Tier-1 (primary) PASSES, deterministic + per-PR:** `scripts/m8/dirfsync-presence.sh` (in `ci.yml`) straces the roll path and asserts the correct build issues the roll-time directory `fsync` while `--features inject_no_dir_fsync` does not — verified green (`correct=5` dir-fsyncs vs `inject=1`). FS-independent syscall-presence regression guard; the row's satisfier. **Tier-2 (behavioral power-loss) — CLOSED as a documented negative result (PR #21, owner Fedora 43):** the synchronized mid-run cut (`dirfsync_cut_workload`, `dirfsync-negative `) blocks the workload with the new segment's dirent un-synced and cuts inside the window, yet the inject build recovers fully on **every** config tested — ext4/xfs/btrfs, journal-less ext4 (incl. `ext2`-format, serviced by the ext4 driver on modern kernels — standalone ext2 driver removed in Linux 6.9), and journaled ext4 `data=writeback` (the driver's weakest ordering). The dirent reaches disk via the file's own `fdatasync` everywhere; the earlier "ext2 block-adjacency" claim is **retracted** and the mechanism was not isolated. No readily-available Linux FS exposes it behaviorally ⇒ honest negative result, not a gap. **Tier-3 — ext4/xfs/btrfs (+ journal-less "ext2") INCONCLUSIVE-by-design**, never red on a masked miss. `fsync_dir` retained unconditionally as a POSIX-portability safeguard. Earlier "certified on ext4" was wrong; the harness loud-skips where dm-flakey is absent rather than fake green; the positive split+roll power-loss case passes under LazyFS in M4.)* - §14.4g resurrection test passes **and** is demonstrated to fail both (a) if zeroing-on-truncate is disabled and (b) if the invalidation is not durably synced (the power-loss-of-zeroing assertion). -- Fuzzers F1–F4: ≥ N CPU-hours since the last parser/format change, zero outstanding crashes; bounded-scan counter never exceeds the bound. *(**M9 in progress. F1 (recovery-parser) IMPLEMENTED** — `fuzz/fuzz_targets/recovery.rs`, primary surface the real `Wal::open` over an adversarial multi-segment directory, bounded-scan counter instrumented on the real scan loop and asserted against the shared `scan_bound` symbol (falsifiability demonstrated). Built + smoke-green (60 000 runs, zero crashes); CI is `fuzz.yml` (nightly/dispatch, time-boxed, contingent) + a blocking per-PR smoke in `ci.yml`. The **N-CPU-hour gate itself stays OPEN** — a hosted short slice does not meet it; carry until a dedicated runner accrues the hours. **Framing:** the "bounded-scan counter never exceeds the bound" clause is satisfied **structurally** (the loop window *is* `scan_bound`), so it is a drift/regression guard, not the headline — the substantive D11 proof is the crash-free / no-OOB / termination surface over adversarial inputs (the running fuzz). **F2 (single-record decoder) IMPLEMENTED** — `fuzz/fuzz_targets/decode.rs`, raw bytes × a boundary-biased `max_record_size` set, CRC-valid-seeded so the Record path is reached, bounds-soundness asserts, falsifiability shown (300 000 runs, zero crashes); same CI lanes. **F3/F4 still pending** within M9 (interim coverage as before: §14.5 F3 by the codec proptest; F4 by the M6 oracle harness whose `run(cfg, ops)` the cargo-fuzz target reuses verbatim).)* +- Fuzzers F1–F4: ≥ N CPU-hours since the last parser/format change, zero outstanding crashes; bounded-scan counter never exceeds the bound. *(**M9 in progress. F1 (recovery-parser) IMPLEMENTED** — `fuzz/fuzz_targets/recovery.rs`, primary surface the real `Wal::open` over an adversarial multi-segment directory, bounded-scan counter instrumented on the real scan loop and asserted against the shared `scan_bound` symbol (falsifiability demonstrated). Built + smoke-green (60 000 runs, zero crashes); CI is `fuzz.yml` (nightly/dispatch, time-boxed, contingent) + a blocking per-PR smoke in `ci.yml`. The **N-CPU-hour gate itself stays OPEN** — a hosted short slice does not meet it; carry until a dedicated runner accrues the hours. **Framing:** the "bounded-scan counter never exceeds the bound" clause is satisfied **structurally** (the loop window *is* `scan_bound`), so it is a drift/regression guard, not the headline — the substantive D11 proof is the crash-free / no-OOB / termination surface over adversarial inputs (the running fuzz). **F2 (single-record decoder) IMPLEMENTED** — `fuzz/fuzz_targets/decode.rs`, raw bytes × a boundary-biased `max_record_size` set, CRC-valid-seeded so the Record path is reached, bounds-soundness asserts, falsifiability shown (300 000 runs, zero crashes); same CI lanes. **F3 (structure-aware classifier) IMPLEMENTED** — `fuzz/fuzz_targets/structure.rs`, valid dense segment + one localized mutation driving `Wal::open`, with a sharp D4/D5 oracle (interior corruption fatal, last corruption truncates) + D6/D10 byte-identity + D7 idempotent reopen; falsifiability shown (forced "no continuation" ⇒ `D5: interior corruption returned Ok`); built + smoke-green (150 000 runs, zero crashes); same CI lanes. **F4 still pending** within M9 (interim coverage: the M6 oracle harness whose `run(cfg, ops)` the F4 cargo-fuzz target reuses verbatim).)* - §14.8 H1: ≥ M power-pull cycles on target hardware, zero acked-record loss. *(M8: the **harness + runbook are built** — `src/bin/power_pull_{workload,verify}.rs` + `scripts/m8/power-pull.sh`, with the off-box network side channel, send-strictly-after-`commit() Ok` ack-ordering, contiguous-watermark conservative verify, and the H2 vacuous-pass gate as a precondition; the mechanical chain was dry-run green on loopback. **OPEN-pending-owner-run** for the actual ≥50-cycle power-pull on real/cache-configured hardware (no cuttable target in the sandbox). H3 fsync-failure poison: the **§12 state machine RUNS green** via the LD_PRELOAD shim (`scripts/m8/fsync-fault.sh`); the **physical** dm-flakey half now runs **nightly + manual on hosted CI** (`m8-dmflakey.yml`, best-effort + loud skip) instead of owner-only. H4 macOS `F_FULLFSYNC` **Half A** (routing/smoke) now runs on **macOS CI** (`m8-macos.yml`); Half B (`dtruss` trace) stays owner-run (root + SIP). See `docs/m8-runbook.md`.)* - Zero-allocation assertion (§14.7) passes for append/commit and `Reader::next`. *(M7: PASSES — hardened to also prove no-roll in the measured window and to cover a `max_record_size` payload. The §14.7 benches + regression gate exist; **gate enforcement is OPEN-pending-controlled-runner** — informational on hosted CI per §14.11, a real gate on a pinned-governor runner.)* - Miri clean on covered suites. diff --git a/fuzz/Cargo.toml b/fuzz/Cargo.toml index e0d3689..7fb6d42 100644 --- a/fuzz/Cargo.toml +++ b/fuzz/Cargo.toml @@ -45,4 +45,11 @@ name = "decode" path = "fuzz_targets/decode.rs" test = false doc = false +bench = false + +[[bin]] +name = "structure" +path = "fuzz_targets/structure.rs" +test = false +doc = false bench = false \ No newline at end of file diff --git a/fuzz/README.md b/fuzz/README.md index 31aca98..09efae5 100644 --- a/fuzz/README.md +++ b/fuzz/README.md @@ -11,8 +11,9 @@ tail-vs-corruption classifier (D4/D5/D10). |---|---|---| | `recovery` | F1 | A whole **directory of segment files** (adversarial filenames + `base_lsn`s) driven through the real public `Wal::open`, plus a secondary single-file `recover_segment` probe. Asserts the bounded forward scan never exceeds `scan_bound(max_record_size)`. | | `decode` | F2 | The **single-record decoder** in isolation: raw bytes as the decode buffer × a boundary-biased `max_record_size` set. Asserts bounds-soundness of any returned record (payload ≤ max, framed ≤ buf, ≥ 20, 8-aligned, header+payload ≤ framed). Corpus seeded with genuine CRC-valid frames so the Record path is reached. | +| `structure` | F3 | **Structure-aware classifier**: a valid dense segment + one localized mutation (flip CRC/body, zero `rec_type`, extend length, tamper padding, reserved type) driving the real `Wal::open`. Sharp oracle: interior corruption fatal (D5), last corruption truncates (D4), surviving suffix dense + byte-identical (D6/D10), idempotent reopen (D7). | -F3 (structure-aware) and F4 (op-script oracle) land in later slices. +F4 (op-script oracle) lands in a later slice. ## Running diff --git a/fuzz/corpus/structure/042c3eab61390f6689c5e42aeb4ad0ff9f97340d b/fuzz/corpus/structure/042c3eab61390f6689c5e42aeb4ad0ff9f97340d new file mode 100644 index 0000000000000000000000000000000000000000..8df03d8abd4d6213df489fde7c40f813ada29256 GIT binary patch literal 27 gcmZSh|B!)!!QuaZhPsm{Pcnov$O5^HEDS(V0FSu{QUCw| literal 0 HcmV?d00001 diff --git a/fuzz/corpus/structure/0610af0a0d967c9cef07077287855f4a51ab0e28 b/fuzz/corpus/structure/0610af0a0d967c9cef07077287855f4a51ab0e28 new file mode 100644 index 0000000000000000000000000000000000000000..2e34b841c385bf33861edd4a53360c66308eec2b GIT binary patch literal 27 dcmezW|G&!r|3Cl#XJlYlxNzZr2w(tG3;-L66Y&55 literal 0 HcmV?d00001 diff --git a/fuzz/corpus/structure/087e7a2387fd0357f3d4a96af65aca5ccdacf37c b/fuzz/corpus/structure/087e7a2387fd0357f3d4a96af65aca5ccdacf37c new file mode 100644 index 0000000000000000000000000000000000000000..1e1048099d25ce3dab4c8db0458b6b1d54f40a4b GIT binary patch literal 38 kcmZQz00Krv5XtcWKLaBW$}*UPuo*K$IFRlSmu0XA07&cvng9R* literal 0 HcmV?d00001 diff --git a/fuzz/corpus/structure/09d28000e92ae94fdd997b54a6632b469faac183 b/fuzz/corpus/structure/09d28000e92ae94fdd997b54a6632b469faac183 new file mode 100644 index 0000000000000000000000000000000000000000..f9a7438bb923931ed45e2cf23474881a1e8add57 GIT binary patch literal 34 ncmWN@F#!M|48*{^ml|iN7!)xuq`m3#EOEM3Q literal 0 HcmV?d00001 diff --git a/fuzz/corpus/structure/0e298300aac04bafdc922ed58830adf44c90e599 b/fuzz/corpus/structure/0e298300aac04bafdc922ed58830adf44c90e599 new file mode 100644 index 0000000000000000000000000000000000000000..5a8e3ef023f3ca465c557ca0e414626a797298d2 GIT binary patch literal 46 ucma$4|DWOif4h@i-2eYag8-1Peo`HXPX3oWdGaJ!@+8YiyOWG38NvZ*lo;s% literal 0 HcmV?d00001 diff --git a/fuzz/corpus/structure/0e50a80f671fedb8356f70954b38d6cb0b055bd1 b/fuzz/corpus/structure/0e50a80f671fedb8356f70954b38d6cb0b055bd1 new file mode 100644 index 0000000000000000000000000000000000000000..493bed0e0aa62c73323225669d5701b673fc458b GIT binary patch literal 41 bcmezWpW*+1yOSp&B#1)g0i}TGBttj=MU)zb literal 0 HcmV?d00001 diff --git a/fuzz/corpus/structure/11ecca55db4ea38e84a31f35fc7281412611d791 b/fuzz/corpus/structure/11ecca55db4ea38e84a31f35fc7281412611d791 new file mode 100644 index 0000000000000000000000000000000000000000..76c80a4bf314f36af408fdb5a28a66c4f079d89a GIT binary patch literal 27 ZcmezWKcPeA|No!=fk4r~R?Gkn7y#!b41xdv literal 0 HcmV?d00001 diff --git a/fuzz/corpus/structure/130ce4fcec056d98ac7dca28edf11bc6aa318978 b/fuzz/corpus/structure/130ce4fcec056d98ac7dca28edf11bc6aa318978 new file mode 100644 index 0000000000000000000000000000000000000000..9ae5ac50e7b268a946ffee5ff03287f61bab66c7 GIT binary patch literal 27 acmZQzfPqQ>85n_3mf_?{1_tE`_7eaq3IxRf literal 0 HcmV?d00001 diff --git a/fuzz/corpus/structure/155d9d534df0fecb8ddffaf77b57d13539a09c22 b/fuzz/corpus/structure/155d9d534df0fecb8ddffaf77b57d13539a09c22 new file mode 100644 index 0000000000000000000000000000000000000000..6df0bbf6c06062412b2b92ec190f88ddb56e67c9 GIT binary patch literal 510 zcmb_Zu?+$-40OX32q7dSDhd|(HK1k?7$5~D17sT4QIsiO3_x+%@jF52^%Yknm$T!X z;}{L4U@Rd7U=m_iy5_O3JvCe#bAmlyc#pc>k5ZbMpLC`TJqVY9nSg bKRO@X1epHKa6nka=FJe>k@Uh{1Qr7T7k%965|Nj6@o(QJ^ literal 0 HcmV?d00001 diff --git a/fuzz/corpus/structure/28ca2827d34c1e7d53d7397cd620969d86976618 b/fuzz/corpus/structure/28ca2827d34c1e7d53d7397cd620969d86976618 new file mode 100644 index 0000000000000000000000000000000000000000..00fdf6bbb8af1b2186595172bfab435201120827 GIT binary patch literal 262 zcmezW_P@&i_y2#Zfv9@j?*IQ!o;*37A)G;$p`YQu-N};-|AFNHtGBPhz<6KewLi|NG1Ah~nl^CrzXi?DF6`s(V<=o!c!+g?+-*-s< bPz{4NW0@t{44-)yht{tdwx_<2$nEq3YFYpn^8bHc^-2I= C#xM2& literal 0 HcmV?d00001 diff --git a/fuzz/corpus/structure/4969e681a3347db88b99b7dfc4bbe6993cd8ea6e b/fuzz/corpus/structure/4969e681a3347db88b99b7dfc4bbe6993cd8ea6e new file mode 100644 index 0000000000000000000000000000000000000000..320b3d6425ce4d56d66ac4a216b53d5473018f6a GIT binary patch literal 46 ocmZQz00Krv5XtcWKLaBW$}*UnF*Ae%>3)VoUEPGDq^WiU5mW(a5a&T#VN1OP%S29W>& literal 0 HcmV?d00001 diff --git a/fuzz/corpus/structure/52866320303072e5071cf58971044cb211ce7886 b/fuzz/corpus/structure/52866320303072e5071cf58971044cb211ce7886 new file mode 100644 index 0000000000000000000000000000000000000000..dd6954a83fb45f7ab4a586fb8e449293c2ac833f GIT binary patch literal 46 lcmezW`@hQn|0@>$2T}~*8Msd}FfcMKWn}pFf5CrDaR7s97-awe literal 0 HcmV?d00001 diff --git a/fuzz/corpus/structure/52b4780aa46782439b6affca1e9857029a83875a b/fuzz/corpus/structure/52b4780aa46782439b6affca1e9857029a83875a new file mode 100644 index 0000000000000000000000000000000000000000..8c7122ba70f537c09ed07e42b40d84285380325c GIT binary patch literal 27 XcmZQzVCZIm0#yb^MliDhL{0zz688ba literal 0 HcmV?d00001 diff --git a/fuzz/corpus/structure/54794d10b78c852a2401d2e3d718448a9cf63969 b/fuzz/corpus/structure/54794d10b78c852a2401d2e3d718448a9cf63969 new file mode 100644 index 0000000000000000000000000000000000000000..5f13df9fb27c51f8cf4f5ce36449bf231b87eebc GIT binary patch literal 27 gcmezW`@hP6AYf!uy%;x<;(v8Q}{d9 literal 0 HcmV?d00001 diff --git a/fuzz/corpus/structure/6b140356a217f605c361b1eb341e434180233493 b/fuzz/corpus/structure/6b140356a217f605c361b1eb341e434180233493 new file mode 100644 index 0000000000000000000000000000000000000000..20a24ac2c1285c82e31018bdb1f5c31b0780823f GIT binary patch literal 27 ecmezWpW*+1yOSp&$|F80&;X4EO$;I*v42%p*85zF)U+^C!1LVmvn5zJ>88bsTgY5s` aK&8k!VCpwRz%~#CHVbSDhyybltP%i!aW$#{ literal 0 HcmV?d00001 diff --git a/fuzz/corpus/structure/7350b8cd28ebe4a1654b5b08335be83895f5e635 b/fuzz/corpus/structure/7350b8cd28ebe4a1654b5b08335be83895f5e635 new file mode 100644 index 0000000000000000000000000000000000000000..c27d7fb4c86d0dccb9adf4a09342ebafd3fe15df GIT binary patch literal 27 acmezW9|%-H1O%x3|IWame)2y90|Nj-BoiM1 literal 0 HcmV?d00001 diff --git a/fuzz/corpus/structure/739ceab5acd0bea54eabc185ff3b0fdfa2156fbf b/fuzz/corpus/structure/739ceab5acd0bea54eabc185ff3b0fdfa2156fbf new file mode 100644 index 0000000000000000000000000000000000000000..43fa66b2e68ab8bdeced26de296b524f4723b905 GIT binary patch literal 128 zcmezW|M%Pf|5e_DD3$;3>$@4c|3g4Ml>U#dq8kX{atH>)|NnL;ySV>HgTVj)VD3pU VsjhzVB#70BPz+-2`2Sy<0RT)uWi$W) literal 0 HcmV?d00001 diff --git a/fuzz/corpus/structure/73ed03711ab6c7f8326ca100b984f36963adba4c b/fuzz/corpus/structure/73ed03711ab6c7f8326ca100b984f36963adba4c new file mode 100644 index 0000000000000000000000000000000000000000..660c1c09b0c294f3f0359da2c06126672630c3ba GIT binary patch literal 27 XcmZQzfP<4KPcnovFvvQ9MPwNOD#HUU literal 0 HcmV?d00001 diff --git a/fuzz/corpus/structure/75631e6904d2e59e63545e3df4ecdcc974b38190 b/fuzz/corpus/structure/75631e6904d2e59e63545e3df4ecdcc974b38190 new file mode 100644 index 0000000000000000000000000000000000000000..fd7b16f041f0bd03e371d01652c6fde9204321b2 GIT binary patch literal 34 icmZQz00Krv5XtcWKLaBJ<11N)lP4J%m>C!p!D0YYCI-F$ literal 0 HcmV?d00001 diff --git a/fuzz/corpus/structure/75ed8005ee00dc55bf226b02420942332fdb7d90 b/fuzz/corpus/structure/75ed8005ee00dc55bf226b02420942332fdb7d90 new file mode 100644 index 0000000000000000000000000000000000000000..3df7417cc95c57feeb0503b760eefd787120ac3d GIT binary patch literal 129 zcmezW`@aeU!zBg~`2T;!!vFtO{xf`M;6Axno`HdpVJRcSxBpODmcd+snIW73r~`}2 m|6ssz6s%(bkh5YTP#;W>?0+y@1;{r8nZd}w-3c;Zdno{_#4q*$ literal 0 HcmV?d00001 diff --git a/fuzz/corpus/structure/76f7ff2587831bc416bccad4c554dcd4570fd8b4 b/fuzz/corpus/structure/76f7ff2587831bc416bccad4c554dcd4570fd8b4 new file mode 100644 index 0000000000000000000000000000000000000000..35c8f5fe07ca603cc064c17816dde47ead3a57f8 GIT binary patch literal 64 rcma$4_n+nef4l#7Cr_Sa_zz@(DKIbgKUf5dI8d79q}@rzlMLYiS}Z5d literal 0 HcmV?d00001 diff --git a/fuzz/corpus/structure/79b75c6d7fd89e7c949bde1d8f2635bb590a8e18 b/fuzz/corpus/structure/79b75c6d7fd89e7c949bde1d8f2635bb590a8e18 new file mode 100644 index 0000000000000000000000000000000000000000..43ea539c1ad81223514507efa877d156b0d5b2c4 GIT binary patch literal 27 XcmZQzU|?kUj|3PQ7#cug6D9xv%Bu~S literal 0 HcmV?d00001 diff --git a/fuzz/corpus/structure/7bb556e341b903591c498d2843197c357b238e10 b/fuzz/corpus/structure/7bb556e341b903591c498d2843197c357b238e10 new file mode 100644 index 0000000000000000000000000000000000000000..c6ffec86e6efdd6a55f6bfac8619e976ce3c9248 GIT binary patch literal 27 ccmezW|G$dk|Ns9P9T^rbTnJ=90RsaA01I&vdjJ3c literal 0 HcmV?d00001 diff --git a/fuzz/corpus/structure/7bc0a73e7004b6eab650b2f5fbef8c55ab770c3f b/fuzz/corpus/structure/7bc0a73e7004b6eab650b2f5fbef8c55ab770c3f new file mode 100644 index 0000000000000000000000000000000000000000..ce81111f8d3da3dacda681736ae75ad80e0191b8 GIT binary patch literal 880 zcmZ3yc=2KeAh>@0|J&=`-QE9z04RhEF5?0JRsO&K|NANwNMF7B-wzNGAk&EetROXurS`7*NW7cWN0Bq5kk7R3E; zK~%ji(DNrxo}A7Q&LGRs&+tFO?&L{^|Nrey{-?43tnm8};?&WQ21YYHAt^!<0YQ@F dl!TE&&;;x+U&dxBNED|M!0t5br;bV))L$eR8op15gG-oeEH`88bsT sgY18>J|G{$L)Q<}y%`P&s8HFw8LNv1)@@kA1L&dupdd4Q4GJe)0CBmOl>h($ literal 0 HcmV?d00001 diff --git a/fuzz/corpus/structure/81bccba3c7bd64152057250241d946d6516187bd b/fuzz/corpus/structure/81bccba3c7bd64152057250241d946d6516187bd new file mode 100644 index 0000000000000000000000000000000000000000..e985e6e0b80c67292fe44a1d863a684dabffe5de GIT binary patch literal 170 zcmezW_P@&i_y2#Zfv9@j?*IQ!o;*37A)G;$p`YQu-N};-|AFNHs|fJl@IMgL{Xc#E l`t|N^pav8`*eI}akeyJVe)2y9$ZkxtU@HFa`2Sy<0RZ@gf9e1L literal 0 HcmV?d00001 diff --git a/fuzz/corpus/structure/82beaa5bb6a8a694e95ac4da97dcb12671fb1de1 b/fuzz/corpus/structure/82beaa5bb6a8a694e95ac4da97dcb12671fb1de1 new file mode 100644 index 0000000000000000000000000000000000000000..d84be4dff6f30c1b8899c764c3467191ab64aa62 GIT binary patch literal 125 zcmezW_P@&i_y2!`D28wbS%!Xw|8^%&GW-XU|F0sze}n)3>;9jK>g%@ W2B2~EFf}k5Nqq~1z2pCXZ3X~Huw#+{ literal 0 HcmV?d00001 diff --git a/fuzz/corpus/structure/842abc768b13b4adf20a4072c207ae5d01396a43 b/fuzz/corpus/structure/842abc768b13b4adf20a4072c207ae5d01396a43 new file mode 100644 index 0000000000000000000000000000000000000000..3e9638a87329e2bed9e0e1f287bf9faba8b1661e GIT binary patch literal 37 fcmebH|DWMM1k{~831xj_0J6dvWEuJyK!^zdA9)v= literal 0 HcmV?d00001 diff --git a/fuzz/corpus/structure/855051e3744afbca95669a252a521773686cf642 b/fuzz/corpus/structure/855051e3744afbca95669a252a521773686cf642 new file mode 100644 index 0000000000000000000000000000000000000000..3054013987f7aea3e92059ee52cc2d5217e2bf38 GIT binary patch literal 28 bcmZQzfPw%285n_3mf_?{28M73S&n`HG*$$= literal 0 HcmV?d00001 diff --git a/fuzz/corpus/structure/8f550e5caadd1648fa92ea05c3d10b7a044e26aa b/fuzz/corpus/structure/8f550e5caadd1648fa92ea05c3d10b7a044e26aa new file mode 100644 index 0000000000000000000000000000000000000000..d30918958501ec71d1136a76085699ac0db2be91 GIT binary patch literal 85 hcmZQzSO^4*v4J_*1Tn{|x{CGcW)EZ2J`r literal 0 HcmV?d00001 diff --git a/fuzz/corpus/structure/939e9708d7d2c978bf71ac1b0f0f3cea00fb07f7 b/fuzz/corpus/structure/939e9708d7d2c978bf71ac1b0f0f3cea00fb07f7 new file mode 100644 index 0000000000000000000000000000000000000000..d53820c95570edafa5e98142e0a149f95b7a800c GIT binary patch literal 27 ZcmZQzU|?i`00#yJRR%^7uK^@50RRcP0eJub literal 0 HcmV?d00001 diff --git a/fuzz/corpus/structure/93fdb912ff393e72b2d4b41b2600316fee28c9f2 b/fuzz/corpus/structure/93fdb912ff393e72b2d4b41b2600316fee28c9f2 new file mode 100644 index 0000000000000000000000000000000000000000..5d4c2e6028bb5df2a2a77362742adea3d2be670e GIT binary patch literal 29 ZcmZQzKmxK1w*MIz{{R0E#10G$6974&2PFUi literal 0 HcmV?d00001 diff --git a/fuzz/corpus/structure/95c32f32b7c04ad8b0d1abf7436f8b885908e3cd b/fuzz/corpus/structure/95c32f32b7c04ad8b0d1abf7436f8b885908e3cd new file mode 100644 index 0000000000000000000000000000000000000000..071e8db132abac07b017b2079171a381871b918a GIT binary patch literal 29 jcmZQzW?*1Y24V(=KPOKzg#Wi@VBlp?XZ+8|z%UsAO7R8` literal 0 HcmV?d00001 diff --git a/fuzz/corpus/structure/96a43307bbd856c782c6a7dd4543416e0f5e215a b/fuzz/corpus/structure/96a43307bbd856c782c6a7dd4543416e0f5e215a new file mode 100644 index 0000000000000000000000000000000000000000..8ca2f173a2be7b0b839b03c453e4e2b663739506 GIT binary patch literal 27 fcmZQzU|{(F|38rAU|6_t;dfOa^CXbxz`y_idMgMG literal 0 HcmV?d00001 diff --git a/fuzz/corpus/structure/96e80dc54ac22a75ed44fb3d0d6f15dfc862c950 b/fuzz/corpus/structure/96e80dc54ac22a75ed44fb3d0d6f15dfc862c950 new file mode 100644 index 0000000000000000000000000000000000000000..82538f867cf6883c363564bcbfd95695eb68a512 GIT binary patch literal 306 zcmezW_P@&i_y2#Zfv9@j?*IQ!o;*37A)G;$p`XDJ0=%F!jOy+NDuaL@5bF1TyOSpw z{sYPXR}tX9AxO6F|LN=3uOsOJih(UugjonN13}g!m_XJ^Ac8pvVGURb$mLL=e)2y9 Z$OkC)z->Tyi9{1%#zW{G|Nm<<002i+;CTQ5 literal 0 HcmV?d00001 diff --git a/fuzz/corpus/structure/970051bac6e1bd06e7e2f6c660c0645364b83462 b/fuzz/corpus/structure/970051bac6e1bd06e7e2f6c660c0645364b83462 new file mode 100644 index 0000000000000000000000000000000000000000..a0f16448cf4b1c8bfb9430bbbaaf314345b68319 GIT binary patch literal 46 ocmezW|NnnEhW{%6-vb$7pz^;`g+cw~e+Kp4{|$g5a6XU?0QkQijQ{`u literal 0 HcmV?d00001 diff --git a/fuzz/corpus/structure/9764da1c4fe0ea5e7f1c74fa426e25bd988f227e b/fuzz/corpus/structure/9764da1c4fe0ea5e7f1c74fa426e25bd988f227e new file mode 100644 index 0000000000000000000000000000000000000000..b13eee3690b1376b8791a1d73ade0e393715155b GIT binary patch literal 30 hcmZQzW@BJbW&i<=|Nj|cIsO9yKZ82se?|s|Q~+fZ2v7h3 literal 0 HcmV?d00001 diff --git a/fuzz/corpus/structure/97cac2eb9be831bae35f997da7ed8251e115816a b/fuzz/corpus/structure/97cac2eb9be831bae35f997da7ed8251e115816a new file mode 100644 index 0000000000000000000000000000000000000000..0d94ff35a8b85a9bca737b69044efa6b1b351399 GIT binary patch literal 28 acmZQzfB;zr+yDO^87BM(1E81#0|NkTF9~D- literal 0 HcmV?d00001 diff --git a/fuzz/corpus/structure/99937979861509d74572670eb66b4321126fde80 b/fuzz/corpus/structure/99937979861509d74572670eb66b4321126fde80 new file mode 100644 index 0000000000000000000000000000000000000000..944963d4efd3655c7269a18ce18636baadfb7550 GIT binary patch literal 100 zcmZQz00Krv5XtcWKLaBW$}*U%0I?Y}LpX!%|KI;r{{LUG@IR1Z_|Cw65~!MCDI>$T O%}4;33`o~zmVN*+TqrvL literal 0 HcmV?d00001 diff --git a/fuzz/corpus/structure/9d3f2d1e2f3ae5ae3fe4318a8478db6b12edf5f3 b/fuzz/corpus/structure/9d3f2d1e2f3ae5ae3fe4318a8478db6b12edf5f3 new file mode 100644 index 0000000000000000000000000000000000000000..2255fbad4979b7cc6eca81a016a31912ae0e0deb GIT binary patch literal 313 zcmZ3y7y_2pXCw3i=X403JeYO4Gddw5N(gWlpff6}Miu(^^_H^M!UaqcdUOF%!@+vSKurWtuZK z5FEj!SR6SmDzx?m)u*KTCd%Sm*A;^_%rK}54o{qtDmI?i{?K(RlLGB0AK7om`)Ag4 VNGj1S{m@75-QBORpf-C6A$J)Yg0}zw literal 0 HcmV?d00001 diff --git a/fuzz/corpus/structure/a3c440ac8e42d4abf4182be76406df25a4363c48 b/fuzz/corpus/structure/a3c440ac8e42d4abf4182be76406df25a4363c48 new file mode 100644 index 0000000000000000000000000000000000000000..7cba6af3c24ced6d32ba8e32430280df71b7aeb8 GIT binary patch literal 74 lcmezW`@hQn|0@>$2T}~*8Msd}FfcMKWn}oanIJ&Z0|1blD(3(I literal 0 HcmV?d00001 diff --git a/fuzz/corpus/structure/a3e6d0396be8359ae0825f2ba1a230b80dca2250 b/fuzz/corpus/structure/a3e6d0396be8359ae0825f2ba1a230b80dca2250 new file mode 100644 index 0000000000000000000000000000000000000000..d85b5b6a8745b91e8ea966bcd2fd40f8c29aa1da GIT binary patch literal 80 ncmezW|M%PfD*xX@srqgZ_>T*ypZw1NR18vusbt6h|Jn=y&LBhm literal 0 HcmV?d00001 diff --git a/fuzz/corpus/structure/a461aa184b16964471434c7722d1ea171182984f b/fuzz/corpus/structure/a461aa184b16964471434c7722d1ea171182984f new file mode 100644 index 0000000000000000000000000000000000000000..56b96c29555991736781970831f3120127c93a6a GIT binary patch literal 27 RcmezWKcVA45>SD&830q67xn-E literal 0 HcmV?d00001 diff --git a/fuzz/corpus/structure/a63ad1a5a86b0b99cbf7bd6a5b9d8a774f6dcab8 b/fuzz/corpus/structure/a63ad1a5a86b0b99cbf7bd6a5b9d8a774f6dcab8 new file mode 100644 index 0000000000000000000000000000000000000000..6717fdff1cc45bfedb230e768862d76f14de29ad GIT binary patch literal 27 ZcmZQzSh#Q@0~7$6s{g-(IVVpt003SS2Xp`c literal 0 HcmV?d00001 diff --git a/fuzz/corpus/structure/ae13c6bdc21c067b764bf080c3ce545d857c5956 b/fuzz/corpus/structure/ae13c6bdc21c067b764bf080c3ce545d857c5956 new file mode 100644 index 0000000000000000000000000000000000000000..d3a6a40f1fc446e42e47396b1e55ce7007aaae1c GIT binary patch literal 94 wcmezWpW*+1yOSp&B#?goAISgD@E1uCD2K^|>aYXJz}O&GIEZ)hBv>>Y0KNJ>AOHXW literal 0 HcmV?d00001 diff --git a/fuzz/corpus/structure/b1610389177fa92ebe1c913b0c16195b3cbe70db b/fuzz/corpus/structure/b1610389177fa92ebe1c913b0c16195b3cbe70db new file mode 100644 index 0000000000000000000000000000000000000000..2ea6b292c3cc70523dacee6f1a00083c862a95ed GIT binary patch literal 33 icmZQzfB;zr+yDO^87BPy|6dn~dDZ^|#T^(JCIA4W1q)OF literal 0 HcmV?d00001 diff --git a/fuzz/corpus/structure/b2c7c60de0e6c20399d21d0523d34d61c921e69e b/fuzz/corpus/structure/b2c7c60de0e6c20399d21d0523d34d61c921e69e new file mode 100644 index 0000000000000000000000000000000000000000..e69ddc62072333d66e344c2eeac0721dc4319acc GIT binary patch literal 27 gcmZQzU|{(F|38pqU|6_tq3Zwd3=Ah37+HW^0C+A3$^ZZW literal 0 HcmV?d00001 diff --git a/fuzz/corpus/structure/bbea107c5ab1993645f49ae2731089f4eed01a4d b/fuzz/corpus/structure/bbea107c5ab1993645f49ae2731089f4eed01a4d new file mode 100644 index 0000000000000000000000000000000000000000..00389b4a62c5ebed25b91d4cdba733793acf86b9 GIT binary patch literal 535 zcmcgpF$w}P5S$>`+#8N-uC-J_@V;^1I|+i7jj!0Z*lS@iPBuC9#6|=|$YwS>vtb7T z`+y4v-gwv{pvGF)mO@m(6=oni^N^y@vY$2T}|$`a1*nNd^W+1{l4Rk>T6_1^?md;Nk#Wq#v39 literal 0 HcmV?d00001 diff --git a/fuzz/corpus/structure/cc1ace770a7a022dccb17a780155747545399218 b/fuzz/corpus/structure/cc1ace770a7a022dccb17a780155747545399218 new file mode 100644 index 0000000000000000000000000000000000000000..ad90b4befebb9c4cb1113ddd3bc72b8798155d49 GIT binary patch literal 27 XcmZQzU|?hb0S6#tV1%$6Kmrp03|s+} literal 0 HcmV?d00001 diff --git a/fuzz/corpus/structure/d06c3ff8e3ea85766877e03cc169d7ee4e37f589 b/fuzz/corpus/structure/d06c3ff8e3ea85766877e03cc169d7ee4e37f589 new file mode 100644 index 0000000000000000000000000000000000000000..afca2ab4e76a07c9613813b904a7667629beec16 GIT binary patch literal 344 zcmezW_P@&i_y2#Zfv9@j=^Wt$P%y}I8DH(8)h_w-tqsxHUj`X(+RWy literal 0 HcmV?d00001 diff --git a/fuzz/corpus/structure/d6c8aeed118282b3285aa39906dd304daef1277d b/fuzz/corpus/structure/d6c8aeed118282b3285aa39906dd304daef1277d new file mode 100644 index 0000000000000000000000000000000000000000..745f2d6e50dfbeab5e928b8f582b0a2f532aff96 GIT binary patch literal 33 VcmZQz00E8*k7lK&&P~Mx$pEYs3!VS~ literal 0 HcmV?d00001 diff --git a/fuzz/corpus/structure/d8eaa8a080a625fb63a39905a5d410710a36883b b/fuzz/corpus/structure/d8eaa8a080a625fb63a39905a5d410710a36883b new file mode 100644 index 0000000000000000000000000000000000000000..47e34f88ef3fe73b30dbdca56fd993c6dead2306 GIT binary patch literal 54 mcmexw%fMj700RFZK;{4cpa1_eLU|C%{?GsaFnOSeFarR}#V5A_ literal 0 HcmV?d00001 diff --git a/fuzz/corpus/structure/d9bfb773d3007e5025b7c301648d8315ca8e04e0 b/fuzz/corpus/structure/d9bfb773d3007e5025b7c301648d8315ca8e04e0 new file mode 100644 index 0000000000000000000000000000000000000000..095f4cf065a9f11df388fe510c14339ac13b0a4c GIT binary patch literal 61 wcmezW|Nnm$1_p-zD*xXDnIOQc4kT3me`jD&KM589vQ|M!0t5br;bV))L$eR8op15gG-oeEH`88bsT ogY18>J|G{$gXx3m+6)KiCZnlP*}R!pmjI3Y4|1&8Ymf_V0Uw`PQ~&?~ literal 0 HcmV?d00001 diff --git a/fuzz/corpus/structure/dda48b05b3eae4676ab834c7eef007c36f6cd50a b/fuzz/corpus/structure/dda48b05b3eae4676ab834c7eef007c36f6cd50a new file mode 100644 index 0000000000000000000000000000000000000000..13912823ad622ac046101f148cf8e348c0ca4460 GIT binary patch literal 991 zcmd^8K?=e^44fi(=^IKB`T$QA1pAGCuTlh0UiKAvi+6kIA~Zqe0;Re=zqXggmJZ1Sd~|c#U9fNNxoH-TI1r literal 0 HcmV?d00001 diff --git a/fuzz/corpus/structure/f458f65575c8a52555c9c8ef52b6fc78a5a2aa38 b/fuzz/corpus/structure/f458f65575c8a52555c9c8ef52b6fc78a5a2aa38 new file mode 100644 index 0000000000000000000000000000000000000000..96bc0859aff78870609ee8ad6a89f11cdc634448 GIT binary patch literal 494 zcmah^K?(vf49vw>h=?Ev!XESi`V75z@geI2dhqB2@|xhWl)hr|0ect|TuH%F=g>?u z2{U14CPmH|atHwmZIy@(V2u~X!OE@EWf6JMi57Gx97G8EA~~=(gC5VEziwF7VS^4{ z8s7-WtfrtYciS>#nV#8|tJ8_K@}GYWn5TrDnqGQH^#BjlWHVF$T;x%8)<1Q2jIm~D JU0e8QTHk8_aXbJ3 literal 0 HcmV?d00001 diff --git a/fuzz/corpus/structure/f51ffeee1ce264a5a9e44a39c152d9434b050eb3 b/fuzz/corpus/structure/f51ffeee1ce264a5a9e44a39c152d9434b050eb3 new file mode 100644 index 0000000000000000000000000000000000000000..f43bf3116a311c64fc96344489818d07a2f19ee3 GIT binary patch literal 27 ZcmZQzfP<4KPcnov{Qu9Z4ib`O002BB1p)v7 literal 0 HcmV?d00001 diff --git a/fuzz/corpus/structure/f6004d89499154ec3e18efe7e6b25b1651258486 b/fuzz/corpus/structure/f6004d89499154ec3e18efe7e6b25b1651258486 new file mode 100644 index 0000000000000000000000000000000000000000..9c8534f34c753705747aa527c1e9b1e8c019fcb8 GIT binary patch literal 28 bcmezW|G&zAAYf!*Sh;W^hy?)*41XB_Eo&3~ literal 0 HcmV?d00001 diff --git a/fuzz/corpus/structure/faf1d3e5bfead395a1d3c1c8fb4611019b98926a b/fuzz/corpus/structure/faf1d3e5bfead395a1d3c1c8fb4611019b98926a new file mode 100644 index 0000000000000000000000000000000000000000..5690569941792a079c9a550bc4e79dd0159220e8 GIT binary patch literal 27 bcmZQ%`40gMbtg}rWC&-FW$0%B0a*qB&z}nT literal 0 HcmV?d00001 diff --git a/fuzz/corpus/structure/fc28fa89d863e289e8be5a2dd383edbb4303c714 b/fuzz/corpus/structure/fc28fa89d863e289e8be5a2dd383edbb4303c714 new file mode 100644 index 0000000000000000000000000000000000000000..0ca20eb4a523da508a0bbb5d10b248350e57e9aa GIT binary patch literal 174 zcmexw%fVoX32vbZZA9fld0uFu-QC^)q2LFE{{8m?X|Npfa0DvJ$_W%F@ literal 0 HcmV?d00001 diff --git a/fuzz/corpus/structure/fc587f0f172a220dc2b252fba1fce12a7154d896 b/fuzz/corpus/structure/fc587f0f172a220dc2b252fba1fce12a7154d896 new file mode 100644 index 0000000000000000000000000000000000000000..8a3498632119e9ff47833b66e80c567e420b4976 GIT binary patch literal 301 zcmZ3y7y_|K?46+RU4FBy; zo@DqBB>!JUfd7Ud%j*80zJ48M^M6I`0HhEB7R!U!>L>p*fUJT#8X=9xi8!R}FJDGz ON3cMmARQ19APoSLK;V7= literal 0 HcmV?d00001 diff --git a/fuzz/fuzz_targets/structure.rs b/fuzz/fuzz_targets/structure.rs new file mode 100644 index 0000000..7e473af --- /dev/null +++ b/fuzz/fuzz_targets/structure.rs @@ -0,0 +1,284 @@ +//! F3 — structure-aware tail-vs-corruption fuzz (§14.5). +//! +//! Builds a **mostly-valid** dense single segment (proper header + CRC-correct +//! frames) from a fuzzer-chosen shape, applies ONE **localized mutation** at a +//! fuzzer-chosen record, then drives the real public `Wal::open` and checks the +//! recovery classifier's verdict. This is the target with teeth on the +//! tail-vs-corruption state machine: +//! +//! - **D4** torn tail: a corrupt **last** record truncates at its offset (and the +//! region is durably zeroed — verified by an idempotent reopen). +//! - **D5** mid-log corruption is **fatal**, never silently truncated: a corrupt +//! **interior** record (a valid record still follows) makes `open` error. +//! - **D6/D10** no resurrection / no garbage: the surviving suffix is a dense, +//! byte-identical prefix of the records we built — nothing past the cut, no +//! mutated bytes surfaced. +//! - **D11** bounded/total: never panics; the forward scan stays within +//! `scan_bound` (asserted around the production `open`). +//! +//! Because *this code* builds the valid frames (correct CRCs via the `fuzzing` +//! generators), the fuzzer only supplies the scenario shape — so the classifier +//! states a blind byte fuzzer can't reach are hit on every run. + +#![no_main] + +use std::fs; + +use arbitrary::Arbitrary; +use libfuzzer_sys::fuzz_target; +use open_wal::{Lsn, TailState, Wal, WalConfig, WalError}; + +const HEADER_SIZE: usize = 64; +const RECORD_HEADER_SIZE: usize = 20; +const REC_TYPE_OFF: usize = 16; + +/// The localized mutation applied to one record. +#[derive(Arbitrary, Debug, Clone, Copy)] +enum Mutation { + /// Flip a byte of the 4-byte CRC field ⇒ invalid record. + FlipCrc, + /// Flip a CRC-covered body byte ⇒ invalid record. + FlipBody, + /// Zero the `rec_type` byte ⇒ a sentinel (clean end of records), NOT invalid. + ZeroRecType, + /// Enlarge the `length` field ⇒ CRC mismatch / overrun ⇒ invalid record. + ExtendLength, + /// Flip a padding byte (padding is inside CRC coverage) ⇒ invalid record. + TamperPadding, + /// Set `rec_type` to a reserved value AND fix the CRC ⇒ a CRC-valid record + /// the decoder rejects as `UnknownRecType` (invalid). + ReservedRecType, +} + +impl Mutation { + /// Whether this mutation makes the record *invalid* (a corruption boundary) + /// as opposed to turning it into a *sentinel* (clean end). + fn invalidates(self) -> bool { + !matches!(self, Mutation::ZeroRecType) + } +} + +#[derive(Arbitrary, Debug)] +struct Scenario { + base: u64, + seg_big: bool, + max_sel: u32, + payloads: Vec>, + mutate: bool, + mutation: Mutation, + target_sel: u32, + byte_sel: u32, + trailing_zeros: u8, +} + +/// Padding to the next 8-byte boundary for a `payload_len`-byte payload. +fn pad_for(payload_len: usize) -> usize { + (8 - ((RECORD_HEADER_SIZE + payload_len) % 8)) % 8 +} +fn framed_size(payload_len: usize) -> usize { + RECORD_HEADER_SIZE + payload_len + pad_for(payload_len) +} + +fuzz_target!(|s: Scenario| { + // ---- config ---- + let seg: u64 = if s.seg_big { 65536 } else { 4096 }; + let max_hdr = (seg - 91) as u32; // §5.3: max_record_size + 91 <= segment_size + let max_record_size = s.max_sel % (max_hdr + 1); + let cfg = WalConfig { + segment_size: seg, + max_record_size, + }; + // base in [1, 1<<40] so base + n never overflows and the header accepts it. + let base = (s.base % (1u64 << 40)) + 1; + + // ---- build a valid dense segment: header + up to 6 records ---- + let payload_cap = (max_record_size as usize).min(64); + let mut bytes = open_wal::fuzzing::segment_header_bytes(base); + // (offset, framed, payload_len) per record, and the original (lsn, payload). + let mut recs: Vec<(usize, usize, usize)> = Vec::new(); + let mut origs: Vec> = Vec::new(); + for raw in s.payloads.iter().take(6) { + let plen = raw.len().min(payload_cap); + let off = bytes.len(); + if off + framed_size(plen) > seg as usize { + break; // keep the whole segment within segment_size (single segment) + } + let lsn = base + recs.len() as u64; + let framed = open_wal::fuzzing::encode_record_into(&mut bytes, lsn, &raw[..plen]); + recs.push((off, framed, plen)); + origs.push(raw[..plen].to_vec()); + } + let n = recs.len(); + + // ---- apply one localized mutation in the record region ---- + let mut mutated_index: Option = None; + if s.mutate && n > 0 { + let m = (s.target_sel as usize) % n; + let (off, framed, plen) = recs[m]; + apply_mutation(&mut bytes, s.mutation, off, framed, plen, s.byte_sel); + mutated_index = Some(m); + } + + // Optional trailing zero bytes (a partial sentinel region after the records). + bytes.extend(std::iter::repeat(0u8).take(s.trailing_zeros as usize)); + + // ---- materialize and run the REAL public recovery path ---- + let dir = match tempfile::tempdir() { + Ok(d) => d, + Err(_) => return, + }; + let path = dir.path().join(format!("{base:020}.wal")); + if fs::write(&path, &bytes).is_err() { + return; + } + + open_wal::fuzzing::scan_probe_reset(); + let res = Wal::open(dir.path(), cfg); + let peak = open_wal::fuzzing::scan_probe_peak(); + assert!( + peak <= open_wal::fuzzing::scan_bound(cfg.max_record_size), + "forward scan exceeded bound: peak {peak} > {}", + open_wal::fuzzing::scan_bound(cfg.max_record_size) + ); + + match res { + Ok((wal, report)) => { + assert_eq!(report.oldest_lsn, Lsn(base), "oldest must be the segment base"); + // durable is within the records we wrote (never invented). + assert!( + report.durable_lsn.0 + 1 >= base && report.durable_lsn.0 <= base + n as u64, + "durable_lsn {} out of range for base {base}, n {n}", + report.durable_lsn.0 + ); + + // Replay: dense run oldest..=durable, byte-identical to the prefix we + // built (D2/D6/D10 — nothing past the cut, no mutated/garbage bytes). + let replay = replay(&wal); + check_dense_prefix(&replay, base, report.durable_lsn.0, &origs); + + // Idempotence + durable zeroing (D7/D10): reopen must succeed, agree on + // the watermarks, and present a clean tail. + drop(wal); + let (_wal2, report2) = Wal::open(dir.path(), cfg).expect("reopen must succeed"); + assert_eq!(report2.durable_lsn, report.durable_lsn, "D7: durable changed on reopen"); + assert_eq!(report2.oldest_lsn, report.oldest_lsn, "D7: oldest changed on reopen"); + assert_eq!(report2.tail_state, TailState::Clean, "D7/D10: reopen tail not clean"); + + // ---- sharp classifier oracle ---- + if let Some(m) = mutated_index { + let (off_m, _, _) = recs[m]; + if s.mutation.invalidates() { + // An invalid record that returned Ok can ONLY be the last + // record (interior corruption is fatal — handled in Err arm). + assert_eq!(m, n - 1, "D5: interior corruption returned Ok (silent truncation!)"); + assert_eq!( + report.durable_lsn.0, + base + m as u64 - 1, + "D4: torn-tail durable_lsn wrong" + ); + match report.tail_state { + TailState::TruncatedAt { segment_base, offset } => { + assert_eq!(segment_base, Lsn(base)); + assert_eq!(offset, off_m as u64, "D4: truncation offset wrong"); + } + TailState::Clean => panic!("D4: corrupt last record not reported as truncated"), + } + } else { + // ZeroRecType ⇒ sentinel at m ⇒ clean end there. + assert_eq!( + report.durable_lsn.0, + base + m as u64 - 1, + "sentinel at record {m}: durable_lsn wrong" + ); + assert_eq!(report.tail_state, TailState::Clean, "sentinel ⇒ clean tail"); + } + } + } + Err(e) => { + // The ONLY legitimate failure for a header-valid single segment is + // mid-log corruption: an invalid INTERIOR record with a valid record + // still after it (D5 — fatal, never silent). + assert!( + matches!( + e, + WalError::TornMidLog { .. } | WalError::Corruption { .. } + ), + "unexpected error kind: {e:?}" + ); + match mutated_index { + Some(m) if s.mutation.invalidates() && m < n - 1 => { /* expected D5 */ } + _ => panic!("D5: open errored without an interior corruption (m={mutated_index:?}, mutation={:?}, n={n})", s.mutation), + } + } + } +}); + +/// Apply `mutation` to record `m` (at absolute `off`, `framed` bytes, `plen` +/// payload) within the segment `bytes`. +fn apply_mutation(bytes: &mut [u8], mutation: Mutation, off: usize, framed: usize, plen: usize, sel: u32) { + let sel = sel as usize; + match mutation { + Mutation::FlipCrc => { + bytes[off + (sel % 4)] ^= 0xFF; + } + Mutation::FlipBody => { + // Any CRC-covered byte [4, framed). + bytes[off + 4 + (sel % (framed - 4))] ^= 0xFF; + } + Mutation::ZeroRecType => { + bytes[off + REC_TYPE_OFF] = 0; + } + Mutation::ExtendLength => { + let new_len = (plen as u32).wrapping_add(8); + bytes[off + 4..off + 8].copy_from_slice(&new_len.to_le_bytes()); + } + Mutation::TamperPadding => { + let pad = framed - RECORD_HEADER_SIZE - plen; + if pad > 0 { + bytes[off + RECORD_HEADER_SIZE + plen + (sel % pad)] ^= 0xFF; + } else { + bytes[off + 4 + (sel % (framed - 4))] ^= 0xFF; + } + } + Mutation::ReservedRecType => { + bytes[off + REC_TYPE_OFF] = 2; // reserved type + let crc = open_wal::crc32c(&bytes[off + 4..off + framed]); + bytes[off..off + 4].copy_from_slice(&crc.to_le_bytes()); + } + } +} + +/// Replay the whole surviving log into `(lsn, payload)` pairs. +fn replay(wal: &Wal) -> Vec<(u64, Vec)> { + let mut r = match wal.reader_from(Lsn(0)) { + Ok(r) => r, + Err(_) => return Vec::new(), + }; + let mut out = Vec::new(); + while let Some(item) = r.next() { + match item { + Ok((lsn, payload)) => out.push((lsn.0, payload.to_vec())), + Err(_) => break, + } + } + out +} + +/// Assert the replay is a dense run `base..=durable` and byte-identical to the +/// records we built (`origs[lsn - base]`). +fn check_dense_prefix(replay: &[(u64, Vec)], base: u64, durable: u64, origs: &[Vec]) { + if durable + 1 == base { + assert!(replay.is_empty(), "empty suffix expected but replay non-empty"); + return; + } + let expected_len = (durable - base + 1) as usize; + assert_eq!(replay.len(), expected_len, "replay length != dense suffix length"); + for (i, (lsn, payload)) in replay.iter().enumerate() { + assert_eq!(*lsn, base + i as u64, "D2: replay not dense at index {i}"); + assert_eq!( + payload, + &origs[i], + "D6/D10: replayed record {lsn} not byte-identical to the built record" + ); + } +} From cfa6a9612578937dba29bd88aae5a8ee03815628 Mon Sep 17 00:00:00 2001 From: Claude Date: Sun, 28 Jun 2026 16:21:19 +0000 Subject: [PATCH 2/2] fix: recognize end-of-records sentinel by all-zero header (D3/D5, issue #26) F3 surfaced a real recovery-contract correctness bug. `record::decode` returned `Decoded::Sentinel` on `rec_type == 0` ALONE, before the CRC check, so a single-bit corruption of an interior record's `rec_type` byte (1->0) was mistaken for the end-of-records sentinel and silently dropped every acked record after it (durable_lsn rewound) -- violating D3 (no loss <= a returned durable_lsn) and D5 (mid-log corruption must be fatal, never silent). Fix (src/record.rs::decode): a sentinel is recognized only by a full all-zero 20-byte header (rec_type == 0 && buf[..20].iter().all(|&b| b == 0)), short-circuited on rec_type so the Full-record hot path stays a single byte-compare. A corrupt rec_type==0, crc!=0 record now falls through the existing ladder -> CRC fails -> Invalid(BadCrc) -> recovery::classify -> interior => fatal TornMidLog (D5) / tail => torn-tail truncate (D4). No new code path; D11 preserved (bounded before any payload touch). Genuine sentinels (pre-alloc zero region / 8.2.1 zeroing) are always all-zero, so nothing legitimate is lost. Tests (every flip explained): - NEW recovery.rs regression tests rec_type_zeroed_{interior_is_fatal_tornmidlog, at_tail_is_torn_tail_and_zeroed}: interior => TornMidLog (D5); tail => TruncatedAt + clean/zeroed reopen (D4/D10). Both FAIL before the fix, pass after (demonstrated). - record.rs sentinel_header_detected FLIPPED and corrected to the honest contract (a real record with a zeroed rec_type => Invalid(BadCrc); arbitrary non-all-zero rec_type==0 header => Invalid; all-zero => Sentinel). NOTE: the issue's writeup expected the old 0xFF-filled fixture to be BadCrc, but its 0xFF length trips LengthTooLarge first; the fixture now uses a genuine record so it actually exercises the CRC-catches-rec_type mechanism (still Invalid, never Sentinel). Assertion not weakened. Blast radius walked (verified, not patched): - recovery.rs:103 CleanEnd arm -- the intended behavioral change. - segment.rs:259 -- a corrupt header is no longer Sentinel; falls through to the full-record decode -> Invalid (verified, no edit). segment.rs:246 untouched. - reader.rs:134 -- already treats CleanEnd|Invalid identically; reader semantics UNCHANGED (a corrupt rec_type==0 tail record shifts CleanEnd->Invalid, both end the live stream per 15.2). Decision stated, not changed as a side effect. F3 oracle updated in lockstep (fuzz/fuzz_targets/structure.rs): Mutation:: ZeroRecType is now invalidates()==true (a CRC-caught corruption, no longer a sentinel); the dead sentinel `else` arm removed. Re-smoked 80000 runs, exit 0, zero crashes, cov 589 (up from 561 -- ZeroRecType now exercises the corruption arms). Spec (docs/wal_design_v6.md): 8.2 step 1, 5.3 table row, 5.4, 4 D5 prose tightened to "all-zero header" + a v6 changelog bullet. cargo test (both configs, 21 ok-lines), clippy --all-targets -D warnings (both), cargo fmt --check, MSRV 1.85 (both) green. Co-Authored-By: Claude Opus 4.8 Claude-Session: https://claude.ai/code/session_01Rpbwt9JT56hQvVXiqTS131 --- CLAUDE.md | 1 + docs/wal_design_v6.md | 9 +-- .../04bda4c4ea9047c89d389177aa06575cb2fb4c4d | Bin 0 -> 38 bytes .../0610af0a0d967c9cef07077287855f4a51ab0e28 | Bin 27 -> 0 bytes .../09478ba4e544c4589dfbd9d159c363f5f145e4d2 | Bin 0 -> 43 bytes .../109c9d05973d28b461fb70f4350a0d82490588b9 | Bin 0 -> 1185 bytes ... 1fb3edf538d34a8d74a8766253ff470b838feb03} | Bin 176 -> 172 bytes .../215c5ea625176e284141f08fec1acd3fad3254c8 | Bin 0 -> 37 bytes .../24af5fd825fd2bd4f81acba819efe8fcf7df5f65 | Bin 0 -> 33 bytes .../28ca2827d34c1e7d53d7397cd620969d86976618 | Bin 262 -> 0 bytes .../2bf9dc65f0b9b3c8f1c311736993d46ed1ceff79 | Bin 0 -> 83 bytes .../35118fa6bddd04d45f88474ca578fdd2ae94ba29 | Bin 33 -> 0 bytes .../35223c9bae85cd63ae6d2eb5127ad6eec27f0527 | Bin 0 -> 59 bytes .../362ff47495006062081c676a631bc92a90c810bb | Bin 0 -> 35 bytes .../36b482b4fb83d758807099532433781f2f77e57c | Bin 0 -> 27 bytes .../38e49454f0d1312e2e9dd9a2c69b35374ce42c70 | Bin 0 -> 44 bytes .../39a2d254a5b9daf0386317cfc43c7eb2ca2db310 | Bin 0 -> 59 bytes .../3abfba06198b144fff0d2aa4c32329671d12cbbc | Bin 0 -> 52 bytes ... 4254570d53fc2335f29f0f3056a5d512849edb47} | Bin 125 -> 123 bytes .../467d6d9e020535f7d24a2f0b29df2ef4cf0ca7bd | Bin 43 -> 0 bytes .../4ccef8879380fd0548b1d1007c8400e97d3ebbfe | Bin 0 -> 46 bytes .../52b4780aa46782439b6affca1e9857029a83875a | Bin 27 -> 0 bytes .../54794d10b78c852a2401d2e3d718448a9cf63969 | Bin 27 -> 0 bytes .../54c3c3ba2da2a53f2d0ea0015b2fe5ccba1fb30e | Bin 0 -> 71 bytes .../5cf88d86539ce17238ea75173edaa2c77650473e | Bin 0 -> 31 bytes .../62807f033d32099981eb3ce894ce464d0cbe4468 | Bin 29 -> 0 bytes .../6814137aab2d2ade0be401a7598b3297f5f27f3a | Bin 30 -> 0 bytes .../6b92de82214d22fbb5b269a2aae231697260afbc | Bin 0 -> 34 bytes .../6c23c11e5320c7f8192c74c1f6856671529e3199 | Bin 0 -> 34 bytes ... 75ed88dfa2c500f7ebc12e75dfa14b99d17f6ee4} | Bin 114 -> 111 bytes .../79fd5b9afb6816e4fdb7a67c0d1d5582cee0f460 | Bin 0 -> 969 bytes .../7bc0a73e7004b6eab650b2f5fbef8c55ab770c3f | Bin 880 -> 0 bytes .../81bccba3c7bd64152057250241d946d6516187bd | Bin 170 -> 0 bytes .../82a22b2e87172dcf93cf6ac40e52463432adb3a0 | Bin 0 -> 260 bytes .../842abc768b13b4adf20a4072c207ae5d01396a43 | Bin 37 -> 0 bytes .../8b0b6a4bf271f17c661a07c69407ea3470c6b9b6 | Bin 0 -> 34 bytes .../8f550e5caadd1648fa92ea05c3d10b7a044e26aa | Bin 85 -> 0 bytes .../8fd595850abeb23b7de9464da5b6b52a2663b085 | Bin 0 -> 27 bytes ... 9111ab2b59d72ae291bdd0bd255ebab9aaa0aa56} | Bin 187 -> 186 bytes .../93fdb912ff393e72b2d4b41b2600316fee28c9f2 | Bin 29 -> 0 bytes .../9616daa0fbcb1f29f9c546c9e48c2a6cc5b9005c | Bin 0 -> 28 bytes .../970051bac6e1bd06e7e2f6c660c0645364b83462 | Bin 46 -> 0 bytes .../97cac2eb9be831bae35f997da7ed8251e115816a | Bin 28 -> 0 bytes .../b9ec386e3da48c2a5e00bdd463567bd09e625af8 | Bin 0 -> 27 bytes .../c29d11743c93871e0a40689e72d809882323bbb9 | Bin 0 -> 474 bytes .../c3d72425b4fa3de912442fde4ba7b26b95407963 | Bin 0 -> 38 bytes .../c5a3ef9f3c1809402a5a24345ad0c74ef019c27d | Bin 0 -> 49 bytes .../c844a0d39907670da27d2bd72784657ebc992531 | Bin 0 -> 872 bytes .../d7c1b91c87d61e19e7e09dd7dddb752ade1a1b5c | Bin 0 -> 27 bytes .../d9bfb773d3007e5025b7c301648d8315ca8e04e0 | Bin 61 -> 0 bytes .../da6451883a122964a6d7fb4ac17f00516db98faf | Bin 0 -> 170 bytes .../db0c71e896a42fce0fb874f172834657ca913f79 | Bin 0 -> 45 bytes .../dd809dff780b60ad58856aa7a7451396b2e86514 | Bin 0 -> 115 bytes .../dda48b05b3eae4676ab834c7eef007c36f6cd50a | Bin 991 -> 0 bytes .../ded66cadcef6cacb716e92792e02d6595ebf1fd4 | Bin 0 -> 29 bytes .../df3ed3418db66f4ca4503f928b9892f6c8761c4d | Bin 0 -> 62 bytes .../ecd343337a14956d8fb63386fb192975171e16ba | Bin 0 -> 39 bytes .../eef7b2ef2ff224bc6a3401b4c6a4f8253ceac68d | Bin 0 -> 83 bytes .../ef7a3c9bfa28403ed1a8f487326de3a2db93c8c2 | Bin 0 -> 43 bytes .../f458f65575c8a52555c9c8ef52b6fc78a5a2aa38 | Bin 494 -> 0 bytes .../f4e97881545863c9244f4dee63e902bec633bc9b | Bin 0 -> 166 bytes .../f568c065526f5ed5e61fe82491024d46353f4b4a | Bin 0 -> 44 bytes .../fc28fa89d863e289e8be5a2dd383edbb4303c714 | Bin 174 -> 0 bytes fuzz/fuzz_targets/structure.rs | 54 +++++++++--------- src/record.rs | 48 ++++++++++++---- src/recovery.rs | 51 +++++++++++++++++ 66 files changed, 121 insertions(+), 42 deletions(-) create mode 100644 fuzz/corpus/structure/04bda4c4ea9047c89d389177aa06575cb2fb4c4d delete mode 100644 fuzz/corpus/structure/0610af0a0d967c9cef07077287855f4a51ab0e28 create mode 100644 fuzz/corpus/structure/09478ba4e544c4589dfbd9d159c363f5f145e4d2 create mode 100644 fuzz/corpus/structure/109c9d05973d28b461fb70f4350a0d82490588b9 rename fuzz/corpus/structure/{226513999f92baac4000544d766b7e4f01fef052 => 1fb3edf538d34a8d74a8766253ff470b838feb03} (72%) create mode 100644 fuzz/corpus/structure/215c5ea625176e284141f08fec1acd3fad3254c8 create mode 100644 fuzz/corpus/structure/24af5fd825fd2bd4f81acba819efe8fcf7df5f65 delete mode 100644 fuzz/corpus/structure/28ca2827d34c1e7d53d7397cd620969d86976618 create mode 100644 fuzz/corpus/structure/2bf9dc65f0b9b3c8f1c311736993d46ed1ceff79 delete mode 100644 fuzz/corpus/structure/35118fa6bddd04d45f88474ca578fdd2ae94ba29 create mode 100644 fuzz/corpus/structure/35223c9bae85cd63ae6d2eb5127ad6eec27f0527 create mode 100644 fuzz/corpus/structure/362ff47495006062081c676a631bc92a90c810bb create mode 100644 fuzz/corpus/structure/36b482b4fb83d758807099532433781f2f77e57c create mode 100644 fuzz/corpus/structure/38e49454f0d1312e2e9dd9a2c69b35374ce42c70 create mode 100644 fuzz/corpus/structure/39a2d254a5b9daf0386317cfc43c7eb2ca2db310 create mode 100644 fuzz/corpus/structure/3abfba06198b144fff0d2aa4c32329671d12cbbc rename fuzz/corpus/structure/{82beaa5bb6a8a694e95ac4da97dcb12671fb1de1 => 4254570d53fc2335f29f0f3056a5d512849edb47} (51%) delete mode 100644 fuzz/corpus/structure/467d6d9e020535f7d24a2f0b29df2ef4cf0ca7bd create mode 100644 fuzz/corpus/structure/4ccef8879380fd0548b1d1007c8400e97d3ebbfe delete mode 100644 fuzz/corpus/structure/52b4780aa46782439b6affca1e9857029a83875a delete mode 100644 fuzz/corpus/structure/54794d10b78c852a2401d2e3d718448a9cf63969 create mode 100644 fuzz/corpus/structure/54c3c3ba2da2a53f2d0ea0015b2fe5ccba1fb30e create mode 100644 fuzz/corpus/structure/5cf88d86539ce17238ea75173edaa2c77650473e delete mode 100644 fuzz/corpus/structure/62807f033d32099981eb3ce894ce464d0cbe4468 delete mode 100644 fuzz/corpus/structure/6814137aab2d2ade0be401a7598b3297f5f27f3a create mode 100644 fuzz/corpus/structure/6b92de82214d22fbb5b269a2aae231697260afbc create mode 100644 fuzz/corpus/structure/6c23c11e5320c7f8192c74c1f6856671529e3199 rename fuzz/corpus/structure/{6c46bcba457bbe201897b0b15a538c114612b090 => 75ed88dfa2c500f7ebc12e75dfa14b99d17f6ee4} (56%) create mode 100644 fuzz/corpus/structure/79fd5b9afb6816e4fdb7a67c0d1d5582cee0f460 delete mode 100644 fuzz/corpus/structure/7bc0a73e7004b6eab650b2f5fbef8c55ab770c3f delete mode 100644 fuzz/corpus/structure/81bccba3c7bd64152057250241d946d6516187bd create mode 100644 fuzz/corpus/structure/82a22b2e87172dcf93cf6ac40e52463432adb3a0 delete mode 100644 fuzz/corpus/structure/842abc768b13b4adf20a4072c207ae5d01396a43 create mode 100644 fuzz/corpus/structure/8b0b6a4bf271f17c661a07c69407ea3470c6b9b6 delete mode 100644 fuzz/corpus/structure/8f550e5caadd1648fa92ea05c3d10b7a044e26aa create mode 100644 fuzz/corpus/structure/8fd595850abeb23b7de9464da5b6b52a2663b085 rename fuzz/corpus/structure/{bfc28fbe9fdd05f105b5e7ed1c5fa79b2ee9c290 => 9111ab2b59d72ae291bdd0bd255ebab9aaa0aa56} (68%) delete mode 100644 fuzz/corpus/structure/93fdb912ff393e72b2d4b41b2600316fee28c9f2 create mode 100644 fuzz/corpus/structure/9616daa0fbcb1f29f9c546c9e48c2a6cc5b9005c delete mode 100644 fuzz/corpus/structure/970051bac6e1bd06e7e2f6c660c0645364b83462 delete mode 100644 fuzz/corpus/structure/97cac2eb9be831bae35f997da7ed8251e115816a create mode 100644 fuzz/corpus/structure/b9ec386e3da48c2a5e00bdd463567bd09e625af8 create mode 100644 fuzz/corpus/structure/c29d11743c93871e0a40689e72d809882323bbb9 create mode 100644 fuzz/corpus/structure/c3d72425b4fa3de912442fde4ba7b26b95407963 create mode 100644 fuzz/corpus/structure/c5a3ef9f3c1809402a5a24345ad0c74ef019c27d create mode 100644 fuzz/corpus/structure/c844a0d39907670da27d2bd72784657ebc992531 create mode 100644 fuzz/corpus/structure/d7c1b91c87d61e19e7e09dd7dddb752ade1a1b5c delete mode 100644 fuzz/corpus/structure/d9bfb773d3007e5025b7c301648d8315ca8e04e0 create mode 100644 fuzz/corpus/structure/da6451883a122964a6d7fb4ac17f00516db98faf create mode 100644 fuzz/corpus/structure/db0c71e896a42fce0fb874f172834657ca913f79 create mode 100644 fuzz/corpus/structure/dd809dff780b60ad58856aa7a7451396b2e86514 delete mode 100644 fuzz/corpus/structure/dda48b05b3eae4676ab834c7eef007c36f6cd50a create mode 100644 fuzz/corpus/structure/ded66cadcef6cacb716e92792e02d6595ebf1fd4 create mode 100644 fuzz/corpus/structure/df3ed3418db66f4ca4503f928b9892f6c8761c4d create mode 100644 fuzz/corpus/structure/ecd343337a14956d8fb63386fb192975171e16ba create mode 100644 fuzz/corpus/structure/eef7b2ef2ff224bc6a3401b4c6a4f8253ceac68d create mode 100644 fuzz/corpus/structure/ef7a3c9bfa28403ed1a8f487326de3a2db93c8c2 delete mode 100644 fuzz/corpus/structure/f458f65575c8a52555c9c8ef52b6fc78a5a2aa38 create mode 100644 fuzz/corpus/structure/f4e97881545863c9244f4dee63e902bec633bc9b create mode 100644 fuzz/corpus/structure/f568c065526f5ed5e61fe82491024d46353f4b4a delete mode 100644 fuzz/corpus/structure/fc28fa89d863e289e8be5a2dd383edbb4303c714 diff --git a/CLAUDE.md b/CLAUDE.md index d131424..9797596 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -109,6 +109,7 @@ The entire value of this component is **correct behavior under crashes and fault ## Project status (keep this updated) +- **LATEST (2026-06-28): CORRECTNESS FIX (gates F3) — closed a D3/D5 sentinel silent-truncation hole that F3 surfaced (issue #26).** `record::decode` returned `Decoded::Sentinel` on `rec_type == 0` **alone**, *before* the CRC check, so a single-bit corruption of an **interior** record's `rec_type` byte (`1`→`0`) was mistaken for the end-of-records sentinel and **silently dropped every acked record after it** (`durable_lsn` rewound) — violating **D3** (no loss ≤ a returned `durable_lsn`) and **D5** (mid-log corruption must be fatal). **Fix (one line in `src/record.rs::decode`):** a sentinel is recognized only by a full **all-zero 20-byte header** (`rec_type == REC_TYPE_SENTINEL && buf[..20].iter().all(|&b| b == 0)`, short-circuited on `rec_type` so the Full-record hot path stays a single byte-compare). A corrupt `rec_type==0, crc≠0` record now falls through the existing ladder → CRC fails → `Invalid(BadCrc)` → `classify` → interior ⇒ fatal `TornMidLog` (D5) / tail ⇒ torn-tail truncate (D4); **no new code path, D11 preserved**. Genuine sentinels (pre-alloc zero region / §8.2.1 zeroing) are always all-zero ⇒ nothing legitimate lost. **Tests:** new `recovery.rs` regression tests at **interior (D5)** and **tail (D4 + idempotent-zeroed reopen, D10)** — both **fail before the fix, pass after** (demonstrated); `record::sentinel_header_detected` fixture corrected to the honest contract (a real record with a zeroed `rec_type` ⇒ `Invalid(BadCrc)`; all-zero ⇒ `Sentinel`) — **note:** the designer's writeup said the old `0xFF`-filled fixture would be `BadCrc`, but that buffer's `0xFF` length trips `LengthTooLarge` first, so the fixture now uses a genuine record to actually exercise the CRC-catches-`rec_type` mechanism (still `Invalid`, never `Sentinel`). **Blast radius walked:** `segment.rs:259` falls through to the full-record decode → `Invalid` (verified, no edit); `reader.rs:134` already treats `CleanEnd|Invalid` identically ⇒ **reader semantics unchanged** (a corrupt `rec_type==0` tail record shifts `CleanEnd`→`Invalid`, both end the live stream per §15.2). **F3 oracle updated in lockstep:** `Mutation::ZeroRecType` is now `invalidates() == true` (a CRC-caught corruption, no longer a sentinel) — F3 re-smoked **80 000 runs, exit 0, zero crashes, cov 589** (up from 561, since ZeroRecType now exercises the corruption arms). **Spec:** §8.2 step 1, §5.3 table row, §5.4, §4 D5 prose tightened to "all-zero header" + v6 changelog bullet. `cargo test` (both configs), `clippy --all-targets -D warnings` (both), `cargo fmt --check`, MSRV 1.85 green. Ships on the F3 branch (PR #25). - **LATEST (2026-06-28): M9 slice 3 — F3 structure-aware classifier fuzz LANDED, built + smoke-green here; gate stays OPEN.** New `fuzz/fuzz_targets/structure.rs` (third cargo-fuzz bin). Builds a **valid dense single segment** (the harness owns the correct CRCs via the `fuzzing` generators, so the deep classifier states a blind byte fuzzer never reaches are hit every run), applies ONE fuzzer-chosen **localized mutation** at a chosen record (flip CRC / flip a CRC-covered body byte / zero `rec_type`→sentinel / extend `length` / tamper padding / reserved `rec_type`+re-CRC via the public `crc32c`), then drives the **real public `Wal::open`**. **Sharp classifier oracle:** invalid **interior** record (a valid record still follows) ⇒ MUST be fatal `TornMidLog`/`Corruption` (**D5**, never silent truncation); invalid **last** record ⇒ MUST truncate at its offset (**D4**); `rec_type`→0 ⇒ sentinel/clean-end. Plus the surviving suffix is a dense **byte-identical** prefix of the built records (**D6/D10**), an **idempotent reopen** yields a clean tail (**D7**, durable zeroing), and the forward scan stays within `scan_bound` (**D11**). **Smoke-green: 150 000 runs, exit 0, zero crashes** (cov 561); corpus `cargo fuzz cmin`'d (111 entries). **Falsifiability shown**: forcing `forward_scan_finds_valid` to always report "no continuation" (the classic D5 bug — mid-log corruption silently truncated) trips `D5: interior corruption returned Ok (silent truncation!)`, then reverted. **No `src/` change** (`git diff src/` empty). CI: `structure` added to `fuzz.yml` matrix + the per-PR smoke in `ci.yml` (now `fuzz smoke (F1/F2/F3)`, a crash reds the M9 PR). `cargo fmt --check`, `clippy --all-targets -D warnings`, `cargo test`, `actionlint` green. **Still NOT done in M9:** F4 (op-script oracle); Miri; `!Sync` trybuild + dir-lock; loom; soak; CI-matrix tidy-up; the N-CPU-hour release-gate observation. - **LATEST (2026-06-28): M9 slice 2 — F2 single-record decoder fuzz LANDED, built + smoke-green here; gate stays OPEN.** New `fuzz/fuzz_targets/decode.rs` (second cargo-fuzz bin). The **raw fuzzer bytes are the decode buffer** (no `arbitrary` envelope — chosen after the struct-`Arbitrary` byte layout proved fragile; the corpus is now just record bytes), decoded against a boundary-biased `max_record_size` set `{0,1,7,8,64,4096,1<<20,u32::MAX}` so the length bound is hit from both sides for any record the buffer encodes — **including `max < payload`, which keeps the payload-bound assertion non-vacuous**. Asserts bounds-soundness on any returned record: `payload_len ≤ max`, `framed_len ≤ buf.len()`, `framed_len ≥ 20`, 8-aligned, `20 + payload_len ≤ framed_len` (D11, record level). Because a blind byte fuzzer essentially never synthesizes a CRC-valid frame, the corpus is **seeded with genuine CRC-valid records** (a Python `crc32c` generator self-checked against the canonical `0xE3069283` vector so it matches the `crc32c` crate) + `cargo fuzz cmin` (17 entries); **falsifiability shown**: disabling the decoder's length bound trips `payload_len 5 exceeds max_record_size 0` on a valid seed, then reverted. **Smoke-green: 300 000 runs, exit 0, zero crashes**; **no `src/` change** (`git diff src/` empty). CI: `decode` added to `fuzz.yml` matrix + the per-PR smoke in `ci.yml` (renamed `fuzz smoke (F1/F2)`, both targets, a crash reds the M9 PR). `cargo fmt --check`, `clippy --all-targets -D warnings`, record unit tests green. **Still NOT done in M9:** F3 (structure-aware), F4 (op-script oracle); Miri; `!Sync` trybuild + dir-lock; loom; soak; CI-matrix tidy-up; the N-CPU-hour release-gate observation. - **LATEST (2026-06-27): M9 started — F1 recovery-parser fuzz LANDED (slice 1 of M9), built + smoke-green here; the N-CPU-hour gate stays OPEN.** New `fuzzing` Cargo feature (zero-cost when off) gates a `#[doc(hidden)] pub mod fuzzing` in `src/lib.rs` (exposes the internal parse entry points for the cargo-fuzz targets) and the **bounded-scan instrumentation** in `src/recovery.rs`. Per the designer's load-bearing fix, the `max_record_size + 28` bound is hoisted into one `recovery::scan_bound(max_record_size)` symbol used by **both** the real `forward_scan_finds_valid` loop's window **and** the in-loop `assert!`/thread-local probe — so the gate measures **production**, not a harness copy, and the bound cannot drift. New `fuzz/` cargo-fuzz crate (libFuzzer + `arbitrary` + ASan; standalone, never published): `fuzz/fuzz_targets/recovery.rs` (F1). **Primary surface is the real public `Wal::open`** over an adversarial *directory* of segment files — fuzzer-controlled filenames + `base_lsn`s (out-of-order/duplicate/gapped/`0`/malformed-name), valid-header dense bodies and pure garbage — so filename-parse → discovery → sort → header validation → §8.4 incomplete-highest discard → cross-segment continuity → `recover_segment` are all in the blast radius (D11/D2/contiguity), with a secondary single-file `recover_segment` probe asserting the bound directly. **Built with `cargo +nightly fuzz build` and smoke-green: 60 000 runs, exit 0, zero crashes**, corpus = the fuzzer-grown, `cargo fuzz cmin`-minimized coverage-preserving set (`fuzz/corpus/recovery/`, ~174 entries reaching the multi-segment-continuity coverage that hand-authored entropy seeds miss — per the designer's review note). **Falsifiability shown** (§14.0.3): widening the scan loop past `scan_bound` trips the in-loop `assert!` (`distance 4128 > 4124`), then reverted. **Framing (designer note, do not over-read):** the bounded-scan counter holds **structurally** (the loop window *is* `scan_bound`, so `distance ≤ scan_bound` for every input) — it is a **drift/regression guard**, not the headline; the substantive D11 proof in F1 is the **crash-free / no-OOB / no-unbounded-alloc / termination** surface over adversarial inputs. CI: new `.github/workflows/fuzz.yml` (nightly + dispatch, time-boxed, loud "contingent, NOT the N-CPU-hour gate" banner, uploads corpus/artifacts) + a **blocking per-PR smoke** in `ci.yml` (a reproducible crash reds the M9 PR — flag #3; never reds an H1 *dispatch* run). `cargo test` (no feature + `--features fuzzing`, 84 lib + all integration), `cargo clippy --all-targets -D warnings` (both configs), `cargo fmt --check`, MSRV 1.85 (both configs), `cargo build` (no feature ⇒ zero release impact) all green; `actionlint` clean on both workflows. **F4's crash model (when it lands) is the process-crash state machine, not power loss** — flag #2. **Still NOT done in M9:** F2 (decoder), F3 (structure-aware), F4 (op-script oracle); Miri; `!Sync` trybuild + dir-lock; loom publish-barrier; soak; CI-matrix tidy-up; and the F1 N-CPU-hour release-gate observation on a dedicated runner. diff --git a/docs/wal_design_v6.md b/docs/wal_design_v6.md index 54de2ca..82a7560 100644 --- a/docs/wal_design_v6.md +++ b/docs/wal_design_v6.md @@ -11,6 +11,7 @@ Normative corrections surfaced while implementing M3 (intra-segment recovery) and M4 (multi-segment), plus M6/M7 testing-status annotations; no contract section changes. +- **§8.2 step 1 / §5.3 / §5.4 / §4 D5 — the end-of-records sentinel is now recognized by an ALL-ZERO header, not `rec_type == 0` alone (closes a D3/D5 silent-truncation hole).** `record::decode` returned `Sentinel` on `rec_type == 0` *before* the CRC check, so a single-bit corruption of an **interior** record's `rec_type` byte (`1`→`0`) was mistaken for the sentinel and **silently dropped every acked record after it** (`durable_lsn` rewound) — violating **D3** (no loss ≤ a returned `durable_lsn`) and **D5** (mid-log corruption must be fatal). The fix recognizes a sentinel only on a full all-zero 20-byte header; a `rec_type == 0` record with a non-zero CRC now fails the CRC check and is classified as mid-log corruption (fatal `TornMidLog`) or, at the tail, a torn tail (D4). A genuine sentinel (pre-allocated zero region / §8.2.1 post-truncation zeroing) is always all-zero, so nothing legitimate is lost. **Found by the M9 F3 structure-aware fuzzer** (issue #26); regression-tested at interior (D5) and tail (D4/D10) positions. (Found and fixed during M9.) - **§6.2 — added an integrator note on transient `Locked` after a writer crash (additive clarification, no contract change).** A dead writer's `flock` is released during process teardown, which can lag the process's exit, so a crash-recovery reopen may briefly see a spurious `Locked`. The note tells integrators to tolerate a bounded transient `Locked` on the recovery reopen (short retry) and treat only a persistent `Locked` as a real concurrent-writer error — observable POSIX semantics, exercised by the §14.4a process-crash tests' bounded-retry reopen. (Surfaced while fixing an M7 CI flake; integrator guidance, not a behavior change.) - **§14.7 (performance & regression) — IMPLEMENTED in M7; the regression-gate CI *enforcement* is OPEN-pending-controlled-runner.** Added the M7 status block to §14.7, the per-PR/nightly split to §14.11, and the M7 note to the §14.13 zero-alloc DoD row. `benches/wal.rs` implements the four criterion groups (throughput / commit-latency / recovery / split-batch) over the public API against a real `fdatasync`; since criterion reports no arbitrary percentiles, the commit-latency tail (p50/p99/p999) comes from an `hdrhistogram` persisted to `target/perf/`. `tests/zero_alloc.rs` is hardened (proves no-roll in the measured window via segment-file count + `durable_lsn` advance; adds a `max_record_size` variant). `scripts/perf-gate.sh` implements the >10% throughput/median-time and >20% p999 thresholds (median, not the outlier-sensitive mean, from criterion `estimates.json`; p999 from the histogram JSON) and was shown to flag an injected regression. Per the line's own "pin CPU governor", enforcement is real on a controlled runner; on hosted CI the gate runs **informational** (`bench.yml`, `continue-on-error`) like the LazyFS gate — a stopgap, not a downgrade. No `src/` change; testing-status annotations only. (Added during M7 implementation.) @@ -140,7 +141,7 @@ Each invariant maps to tests in §14. All MUST hold on honest hardware (§8.3, - **D2 — Dense, gap-free surviving suffix.** At all times the durable content is a contiguous run of LSNs `P..=k`, where `P` is the `base_lsn` of the *oldest surviving segment* (`P = 1` until the first checkpoint). Recovery MUST never produce an *internal* gap (a missing LSN between `P` and `k`). Recovery does **not**, and by design **cannot**, distinguish an authorized prefix deletion (`P > 1` via checkpoint) from an unauthorized one; preventing unauthorized deletion is the integrator's responsibility, anchored by its durable snapshot. - **D3 — At-most-tail loss on crash.** A crash MAY lose only records appended but not yet covered by a returned `commit()`. It MUST NOT lose any record `≤` the last returned `durable_lsn`. - **D4 — Torn-tail truncation.** A partial/torn write at the physical tail MUST be detected (length bounds + CRC) and cleanly truncated; the truncated region MUST be invalidated per §8.2. A torn record MUST NOT be surfaced as valid. -- **D5 — Mid-log corruption is fatal, not silent.** A corrupt record that is *not* the tail (a structurally valid record with the correct next LSN exists after it, within the bounded scan window) MUST cause recovery to halt with a distinct, loud error. It MUST NOT be silently truncated (that would discard acknowledged data). +- **D5 — Mid-log corruption is fatal, not silent.** A corrupt record that is *not* the tail (a structurally valid record with the correct next LSN exists after it, within the bounded scan window) MUST cause recovery to halt with a distinct, loud error. It MUST NOT be silently truncated (that would discard acknowledged data). This is why the end-of-records sentinel is recognized only by an **all-zero header**, not `rec_type == 0` alone (§8.2 step 1): the CRC covers `rec_type`, so a corruption that zeroes an interior record's `rec_type` byte must fail the CRC and be classified as mid-log corruption, not mistaken for the sentinel. - **D6 — Read-back fidelity.** Replay MUST return exactly the records appended, in LSN order, byte-identical payloads. - **D7 — Idempotent recovery.** open→use→close→open→… converges; recovery is deterministic and repeated cycles do not change recovered content or tail state. - **D8 — Checkpoint safety.** `checkpoint(up_to)` MUST NOT remove any record with `lsn > up_to`, MUST NOT make any retained record unreadable, and MUST preserve D2 over the retained suffix. @@ -200,7 +201,7 @@ Records follow the segment header, contiguously. | 0 | 4 | `crc` | CRC-32C over bytes `[4, 4 + 16 + length + pad)` — i.e. the rest of the header, the payload, **and** the alignment padding | | 4 | 4 | `length` | `u32`, payload length | | 8 | 8 | `lsn` | `u64` | -| 16 | 1 | `rec_type` | `1 = Full`; `0` = zero/sentinel (never a real record); `2..` reserved for future fragmentation | +| 16 | 1 | `rec_type` | `1 = Full`; `0` = zero (never a real record; the end-of-records sentinel is an **all-zero header**, not `rec_type == 0` alone — §8.2 step 1, §5.4); `2..` reserved for future fragmentation | | 17 | 1 | `rflags` | reserved, MUST be 0 | | 18 | 2 | `reserved` | MUST be 0 | | 20 | `length` | `payload` | opaque caller bytes | @@ -219,7 +220,7 @@ Records follow the segment header, contiguously. ### 5.4 Pre-allocation and the zero region - On creation a segment is pre-allocated to `segment_size` (`fallocate` / `F_PREALLOCATE`), so the unwritten remainder is zero-filled. -- A `rec_type == 0` / all-zero record header during scan is the **end-of-records sentinel** for a partially-filled or cleanly-rolled segment. +- An **all-zero 20-byte record header** during scan is the **end-of-records sentinel** for a partially-filled or cleanly-rolled segment. It is recognized by the *whole* header being zero, **not** by `rec_type == 0` alone (a `rec_type == 0` header with a non-zero CRC is a corrupt record, classified per §8.2 step 5 — see §8.2 step 1). --- @@ -373,7 +374,7 @@ Runs in `open`, single-threaded, before any append. Scan each segment from offset 64, tracking `expected_next_lsn`. For each record: -1. If `< 20` bytes remain or `rec_type == 0` / all-zero header ⇒ **end of this segment's records.** +1. If `< 20` bytes remain ⇒ **end of this segment's records.** Otherwise the end-of-records **sentinel** is an **all-zero 20-byte header** (`rec_type == 0` **and** the remaining header bytes — CRC, `length`, `lsn`, reserved — all zero); a header that matches the sentinel ⇒ **end of this segment's records.** A record with `rec_type == 0` but a **non-zero CRC** is **not** a sentinel: it is an `Invalid` record subject to the step-5 tail-vs-corruption classification. (Rationale: the CRC covers `rec_type`, so a corruption of an interior record's `rec_type` to `0` MUST surface as fatal mid-log corruption — D5; recognizing the sentinel by `rec_type == 0` alone bypasses the CRC and is a D3/D5 silent-truncation hole. A genuine sentinel only ever arises from the pre-allocated zero region or the §8.2.1 post-truncation zeroing, both all-zero.) 2. Bound `length`: if `length > max_record_size` **or** `20 + length + pad > remaining_segment_bytes` ⇒ record invalid at this offset (candidate boundary; step 5). 3. Read payload + padding. Short read ⇒ invalid (step 5). 4. Compute CRC-32C over `[4, 4+16+length+pad)`; compare to `crc`. Check `lsn == expected_next_lsn`. Either mismatch ⇒ invalid (step 5). diff --git a/fuzz/corpus/structure/04bda4c4ea9047c89d389177aa06575cb2fb4c4d b/fuzz/corpus/structure/04bda4c4ea9047c89d389177aa06575cb2fb4c4d new file mode 100644 index 0000000000000000000000000000000000000000..1bbc15ce4d0ee699fb4111c10be6d58e01ae0846 GIT binary patch literal 38 lcmezWKjBp0e;8meH)Cc9XZW@m2vTsy)UoF>Dz0D_4C=WtEYGON`kFER?^7^Eu4~UZaEx*NCRr)-;EmnY zCp=>hDX3egqSMW*m{G}mh7t==Rq1;#R@d16bRC{y#smZdY^EZ#bzu7QyxqJ^=Uvy) mWWC*e#)OJ9)oVDIL=%Py<$vkU4qJ9j%q|pntNEpYvAYM$!DN#F literal 0 HcmV?d00001 diff --git a/fuzz/corpus/structure/226513999f92baac4000544d766b7e4f01fef052 b/fuzz/corpus/structure/1fb3edf538d34a8d74a8766253ff470b838feb03 similarity index 72% rename from fuzz/corpus/structure/226513999f92baac4000544d766b7e4f01fef052 rename to fuzz/corpus/structure/1fb3edf538d34a8d74a8766253ff470b838feb03 index bdd53a920743296fcec70dd440c06f7cec66baeb..f9ba2c3d73cea55ef96cf4e649ceba7d01da95d8 100644 GIT binary patch delta 9 QcmdnMxQ1~;@5G7n021>9y#N3J delta 9 QcmZ3(xPfs(@5D}J021E=xc~qF diff --git a/fuzz/corpus/structure/215c5ea625176e284141f08fec1acd3fad3254c8 b/fuzz/corpus/structure/215c5ea625176e284141f08fec1acd3fad3254c8 new file mode 100644 index 0000000000000000000000000000000000000000..d95d61778a786ba6340832d3d2cbc32605eaa893 GIT binary patch literal 37 icmezW9|%<}bqZNz* literal 0 HcmV?d00001 diff --git a/fuzz/corpus/structure/24af5fd825fd2bd4f81acba819efe8fcf7df5f65 b/fuzz/corpus/structure/24af5fd825fd2bd4f81acba819efe8fcf7df5f65 new file mode 100644 index 0000000000000000000000000000000000000000..50e04991c245c280b338528719fc16d6b971b0a8 GIT binary patch literal 33 gcmezW9|%W;2jLqm literal 0 HcmV?d00001 diff --git a/fuzz/corpus/structure/82beaa5bb6a8a694e95ac4da97dcb12671fb1de1 b/fuzz/corpus/structure/4254570d53fc2335f29f0f3056a5d512849edb47 similarity index 51% rename from fuzz/corpus/structure/82beaa5bb6a8a694e95ac4da97dcb12671fb1de1 rename to fuzz/corpus/structure/4254570d53fc2335f29f0f3056a5d512849edb47 index d84be4dff6f30c1b8899c764c3467191ab64aa62..eb078546f3f252d41ff236ada8e9889f4c0cf633 100644 GIT binary patch delta 17 Vcmb=eo{-Pe@*fO#{Qs}b003hU3u6EP delta 7 Ocmb=fosd5@Fh diff --git a/fuzz/corpus/structure/467d6d9e020535f7d24a2f0b29df2ef4cf0ca7bd b/fuzz/corpus/structure/467d6d9e020535f7d24a2f0b29df2ef4cf0ca7bd deleted file mode 100644 index f16c211550dc1f04d6c1ce0a7cd1a4212340ba58..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 43 rcmezWpW*+1yOSruB6Dj}z literal 0 HcmV?d00001 diff --git a/fuzz/corpus/structure/62807f033d32099981eb3ce894ce464d0cbe4468 b/fuzz/corpus/structure/62807f033d32099981eb3ce894ce464d0cbe4468 deleted file mode 100644 index df302c607292390913ae55deffb9c8e589cd1122..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 29 kcmZQzU|{(F|Nn7@Ri79b7A{<<`u{rv!%3hZ1H%Lc0JKyJxBvhE diff --git a/fuzz/corpus/structure/6814137aab2d2ade0be401a7598b3297f5f27f3a b/fuzz/corpus/structure/6814137aab2d2ade0be401a7598b3297f5f27f3a deleted file mode 100644 index 9907544fce0fec601579e0c12b066646f7ac73a8..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 30 gcmaF;|33pG+kYTXVOQs5U|6`2B@_Y{{%80P0N=C?fB*mh diff --git a/fuzz/corpus/structure/6b92de82214d22fbb5b269a2aae231697260afbc b/fuzz/corpus/structure/6b92de82214d22fbb5b269a2aae231697260afbc new file mode 100644 index 0000000000000000000000000000000000000000..9fa846377733994424c7f6574d0f046692b255e9 GIT binary patch literal 34 hcmZQzVBos{|33o*0|&#xg$uu{0+|OHxETWA006$g3fcew literal 0 HcmV?d00001 diff --git a/fuzz/corpus/structure/6c23c11e5320c7f8192c74c1f6856671529e3199 b/fuzz/corpus/structure/6c23c11e5320c7f8192c74c1f6856671529e3199 new file mode 100644 index 0000000000000000000000000000000000000000..1c180878f0bb1e21139b42c6c3ca5521685c074e GIT binary patch literal 34 icmexw%fMj7!0`Y7f7Y7+{}~w6PyPpS7=aiJgc$%X$P?oL literal 0 HcmV?d00001 diff --git a/fuzz/corpus/structure/6c46bcba457bbe201897b0b15a538c114612b090 b/fuzz/corpus/structure/75ed88dfa2c500f7ebc12e75dfa14b99d17f6ee4 similarity index 56% rename from fuzz/corpus/structure/6c46bcba457bbe201897b0b15a538c114612b090 rename to fuzz/corpus/structure/75ed88dfa2c500f7ebc12e75dfa14b99d17f6ee4 index e7d6cba99f865c741e9018c865f1fa8706360b21..083d77830603e52a1254d71520a148a2e0971455 100644 GIT binary patch delta 22 ecmXTQpWrl+L!OVDVJRcSx6PYRGB7YQECm2jfd=*f delta 53 pcmc~Vn&4z`l7WGdVJRcSx6Kf+4MhEC_|Cu$6aD{R1wE$WimMBu2ym1B?MtV|;sm9ayyyUL8Gb$F%~2@DFTQyHE*DE)igc3#qTYaLD2 d*WH&~(D6)-9!@4vyxw@T*)6i{)pdX}hZn&!1m^$% literal 0 HcmV?d00001 diff --git a/fuzz/corpus/structure/7bc0a73e7004b6eab650b2f5fbef8c55ab770c3f b/fuzz/corpus/structure/7bc0a73e7004b6eab650b2f5fbef8c55ab770c3f deleted file mode 100644 index ce81111f8d3da3dacda681736ae75ad80e0191b8..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 880 zcmZ3yc=2KeAh>@0|J&=`-QE9z04RhEF5?0JRsO&K|NANwNMF7B-wzNGAk&EetROXurS`7*NW7cWN0Bq5kk7R3E; zK~%ji(DNrxo}A7Q&LGRs&+tFO?&L{^|Nrey{-?43tnm8};?&WQ21YYHAt^!<0YQ@F dl!TE&&;;x+U&dxBNEDmEa++YdEPINF2e23uT*(&8PLoL{ zp9wQFDRRb~LkLi4sYdJoTfFg^E!+nk7Lg~NX+;mhC^*`>39p?0yRm7*8Xdegz7vow zF1B1A_O+$jE3+#%r-k+0UPcjunN9`ElyK0a*9p>mzzucT!qo0ZZd5xxX|q#|t#ekj IfuE-F3%t2uN&o-= literal 0 HcmV?d00001 diff --git a/fuzz/corpus/structure/c3d72425b4fa3de912442fde4ba7b26b95407963 b/fuzz/corpus/structure/c3d72425b4fa3de912442fde4ba7b26b95407963 new file mode 100644 index 0000000000000000000000000000000000000000..698f5fb14eb8fdca1b9582429d80cd2dd3a292dd GIT binary patch literal 38 scmexw{a@uj5HK>TFfc6SWB@X485sWme*6Ew&inuW8Mcdmzx+P~0G|UEVE_OC literal 0 HcmV?d00001 diff --git a/fuzz/corpus/structure/c5a3ef9f3c1809402a5a24345ad0c74ef019c27d b/fuzz/corpus/structure/c5a3ef9f3c1809402a5a24345ad0c74ef019c27d new file mode 100644 index 0000000000000000000000000000000000000000..7ba25d8d56f4e3b25de9bc940840a410cfde8301 GIT binary patch literal 49 qcmexw%fVm>0zdw<|NsAgJs4@0|J&=`-QE9z04RhEF5?0JRsO&K|NANwNMF7B-wzNGAk&EetROXurS`7*NW7cWN0Bq5kk7R3E; zK~%ji(DNrxo}A7Q&LGRs&+tFO?&L{^|Nrey{-?43tT6nC23+6(}|eMofx literal 0 HcmV?d00001 diff --git a/fuzz/corpus/structure/db0c71e896a42fce0fb874f172834657ca913f79 b/fuzz/corpus/structure/db0c71e896a42fce0fb874f172834657ca913f79 new file mode 100644 index 0000000000000000000000000000000000000000..bdeb007addcd575959d604fa3fa11a42aad63273 GIT binary patch literal 45 scmWN_K@k8z2m`ULkh8+z;xtzg&%!ea&`4w;LCEIwj^j$%?g;930b4>4X#fBK literal 0 HcmV?d00001 diff --git a/fuzz/corpus/structure/dd809dff780b60ad58856aa7a7451396b2e86514 b/fuzz/corpus/structure/dd809dff780b60ad58856aa7a7451396b2e86514 new file mode 100644 index 0000000000000000000000000000000000000000..83c5e68414183fc90961f24891e48e7f510bc7c4 GIT binary patch literal 115 zcmezWpW*+1mj5dM-`DGQ|NnpTRHQVIrjKtHz{-CIFSuDV24Kzm)V@hwk@JVDKLCB`LTf0);MsWH8E6WbX literal 0 HcmV?d00001 diff --git a/fuzz/corpus/structure/f458f65575c8a52555c9c8ef52b6fc78a5a2aa38 b/fuzz/corpus/structure/f458f65575c8a52555c9c8ef52b6fc78a5a2aa38 deleted file mode 100644 index 96bc0859aff78870609ee8ad6a89f11cdc634448..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 494 zcmah^K?(vf49vw>h=?Ev!XESi`V75z@geI2dhqB2@|xhWl)hr|0ect|TuH%F=g>?u z2{U14CPmH|atHwmZIy@(V2u~X!OE@EWf6JMi57Gx97G8EA~~=(gC5VEziwF7VS^4{ z8s7-WtfrtYciS>#nV#8|tJ8_K@}GYWn5TrDnqGQH^#BjlWHVF$T;x%8)<1Q2jIm~D JU0e8QTHk8_aXbJ3 diff --git a/fuzz/corpus/structure/f4e97881545863c9244f4dee63e902bec633bc9b b/fuzz/corpus/structure/f4e97881545863c9244f4dee63e902bec633bc9b new file mode 100644 index 0000000000000000000000000000000000000000..c10dcfdac32372a6b90e2ba247e3027ad4f8aff5 GIT binary patch literal 166 zcmezW_P@&i_y2#Zfv9@j?*IQ!o;*37A)G;$p`YQu-N};-|AFNHs|fJl@IMgL{Xc#E t`t|N^pav9x%czqORUo^dK>g%@29TY|X22!?{|2er@&CUz0}BHK0{~qHdd~m= literal 0 HcmV?d00001 diff --git a/fuzz/corpus/structure/f568c065526f5ed5e61fe82491024d46353f4b4a b/fuzz/corpus/structure/f568c065526f5ed5e61fe82491024d46353f4b4a new file mode 100644 index 0000000000000000000000000000000000000000..559c1e2e80f2373a783ec98a2affae3eeb18ccf7 GIT binary patch literal 44 mcmezWpW*+1yOSpwzJth?X|Npfa0DvJ$_W%F@ diff --git a/fuzz/fuzz_targets/structure.rs b/fuzz/fuzz_targets/structure.rs index 7e473af..51eb956 100644 --- a/fuzz/fuzz_targets/structure.rs +++ b/fuzz/fuzz_targets/structure.rs @@ -39,7 +39,8 @@ enum Mutation { FlipCrc, /// Flip a CRC-covered body byte ⇒ invalid record. FlipBody, - /// Zero the `rec_type` byte ⇒ a sentinel (clean end of records), NOT invalid. + /// Zero the `rec_type` byte ⇒ a corruption the CRC catches (no longer a + /// sentinel; a genuine sentinel is an all-zero header — issue #26). Invalid. ZeroRecType, /// Enlarge the `length` field ⇒ CRC mismatch / overrun ⇒ invalid record. ExtendLength, @@ -51,10 +52,14 @@ enum Mutation { } impl Mutation { - /// Whether this mutation makes the record *invalid* (a corruption boundary) - /// as opposed to turning it into a *sentinel* (clean end). + /// Whether this mutation makes the record *invalid* (a corruption boundary). + /// After the issue #26 fix the sentinel is recognized only by an all-zero + /// header, so `ZeroRecType` (zeroing only the `rec_type` byte) leaves a CRC + /// mismatch the classifier catches — i.e. **every** mutation here now + /// invalidates the record. Kept as a method for the oracle's structure and any + /// future non-invalidating mutation. fn invalidates(self) -> bool { - !matches!(self, Mutation::ZeroRecType) + true } } @@ -165,32 +170,27 @@ fuzz_target!(|s: Scenario| { assert_eq!(report2.tail_state, TailState::Clean, "D7/D10: reopen tail not clean"); // ---- sharp classifier oracle ---- + // Every mutation in the menu invalidates the target record (post + // issue #26, `ZeroRecType` is a CRC-caught corruption too — see + // `Mutation::invalidates`). An invalid record that nonetheless returned + // Ok can ONLY be the LAST record (interior corruption is fatal and is + // handled in the Err arm); it must be a torn-tail truncation at its + // offset (D4/D5). if let Some(m) = mutated_index { let (off_m, _, _) = recs[m]; - if s.mutation.invalidates() { - // An invalid record that returned Ok can ONLY be the last - // record (interior corruption is fatal — handled in Err arm). - assert_eq!(m, n - 1, "D5: interior corruption returned Ok (silent truncation!)"); - assert_eq!( - report.durable_lsn.0, - base + m as u64 - 1, - "D4: torn-tail durable_lsn wrong" - ); - match report.tail_state { - TailState::TruncatedAt { segment_base, offset } => { - assert_eq!(segment_base, Lsn(base)); - assert_eq!(offset, off_m as u64, "D4: truncation offset wrong"); - } - TailState::Clean => panic!("D4: corrupt last record not reported as truncated"), + assert!(s.mutation.invalidates()); + assert_eq!(m, n - 1, "D5: interior corruption returned Ok (silent truncation!)"); + assert_eq!( + report.durable_lsn.0, + base + m as u64 - 1, + "D4: torn-tail durable_lsn wrong" + ); + match report.tail_state { + TailState::TruncatedAt { segment_base, offset } => { + assert_eq!(segment_base, Lsn(base)); + assert_eq!(offset, off_m as u64, "D4: truncation offset wrong"); } - } else { - // ZeroRecType ⇒ sentinel at m ⇒ clean end there. - assert_eq!( - report.durable_lsn.0, - base + m as u64 - 1, - "sentinel at record {m}: durable_lsn wrong" - ); - assert_eq!(report.tail_state, TailState::Clean, "sentinel ⇒ clean tail"); + TailState::Clean => panic!("D4: corrupt last record not reported as truncated"), } } } diff --git a/src/record.rs b/src/record.rs index 36e713d..6ef71ed 100644 --- a/src/record.rs +++ b/src/record.rs @@ -11,7 +11,7 @@ //! | 0 | 4 | `crc` | CRC-32C over `[4, 4 + 16 + length + pad)` — header tail, payload, **and** padding | //! | 4 | 4 | `length` | `u32` payload length | //! | 8 | 8 | `lsn` | `u64` | -//! | 16 | 1 | `rec_type` | `1` = Full; `0` = sentinel (never a real record); `2..` reserved | +//! | 16 | 1 | `rec_type` | `1` = Full; `0` = zero (never a real record; the sentinel is an **all-zero header**, not `rec_type == 0` alone — §8.2 step 1); `2..` reserved | //! | 17 | 1 | `rflags` | reserved, 0 | //! | 18 | 2 | `reserved` | 0 | //! | 20 | `length` | `payload` | opaque caller bytes | @@ -101,8 +101,10 @@ pub(crate) enum Decoded<'a> { /// advances its scan offset by this amount. framed_len: usize, }, - /// A `rec_type == 0` header: the end-of-records sentinel / pre-allocated zero - /// region (§5.4). Not a record. + /// An **all-zero 20-byte header**: the end-of-records sentinel / pre-allocated + /// zero region (§5.4 / §8.2 step 1). Recognized by the *whole* header being + /// zero, not `rec_type == 0` alone (a `rec_type == 0` header with a non-zero + /// CRC is a corrupt record — `Invalid`, issue #26). Not a record. Sentinel, /// Fewer bytes than a full header, or the framed record would overrun the /// slice. At a physical tail this is a short/torn write; the codec does not @@ -202,10 +204,17 @@ pub(crate) fn decode(buf: &[u8], max_record_size: u32) -> Decoded<'_> { return Decoded::Incomplete; } - // rec_type == 0 is the end-of-records sentinel regardless of the other - // (zeroed) header bytes (§8.2 step 1). + // The end-of-records sentinel is an ALL-ZERO 20-byte header (§8.2 step 1), + // not `rec_type == 0` alone. The CRC covers `rec_type`, so a corruption of an + // interior record's `rec_type` to 0 MUST fail the CRC and be classified as + // mid-log corruption (fatal `TornMidLog`, D5) — never mistaken for the + // sentinel, which would silently drop the following acked records (a D3/D5 + // hole; issue #26). A genuine sentinel only ever arises from the pre-allocated + // zero region or the §8.2.1 post-truncation zeroing, both all-zero, so this + // loses nothing legitimate. The `rec_type == 0` test short-circuits first, so + // the common Full-record path (`rec_type == 1`) stays a single byte compare. let rec_type = buf[REC_TYPE_OFF]; - if rec_type == REC_TYPE_SENTINEL { + if rec_type == REC_TYPE_SENTINEL && buf[..RECORD_HEADER_SIZE].iter().all(|&b| b == 0) { return Decoded::Sentinel; } @@ -397,12 +406,29 @@ mod tests { #[test] fn sentinel_header_detected() { - // rec_type == 0 ⇒ Sentinel, even with otherwise garbage bytes. - let mut buf = vec![0xFFu8; RECORD_HEADER_SIZE]; - buf[REC_TYPE_OFF] = REC_TYPE_SENTINEL; - assert!(matches!(decode(&buf, MAX), Decoded::Sentinel)); + // The sentinel is recognized only by an ALL-ZERO 20-byte header (§8.2 step + // 1 / issue #26), NOT `rec_type == 0` alone. The CRC covers `rec_type`, so + // a real record whose `rec_type` byte is zeroed (single-bit flip, CRC not + // recomputed) must decode as `Invalid(BadCrc)`, never a `Sentinel` — this + // is what keeps a `rec_type`→0 corruption of an interior record fatal (D5) + // instead of a silent truncation (D3). + let mut buf = Vec::new(); + encode_into(&mut buf, Lsn(1), &[0xAB, 0xCD]); + buf[REC_TYPE_OFF] = REC_TYPE_SENTINEL; // 1 → 0, leaving the stale (non-zero) CRC + assert!(matches!( + decode(&buf, MAX), + Decoded::Invalid(DecodeError::BadCrc) + )); + + // An arbitrary non-all-zero header with `rec_type == 0` is likewise NOT a + // sentinel (here the `0xFF` length trips the length bound first — still + // `Invalid`, never `Sentinel`). + let mut garbage = vec![0xFFu8; RECORD_HEADER_SIZE]; + garbage[REC_TYPE_OFF] = REC_TYPE_SENTINEL; + assert!(matches!(decode(&garbage, MAX), Decoded::Invalid(_))); - // The all-zero pre-allocated region is also a sentinel. + // A genuine all-zero header (the pre-allocated zero region / post-truncation + // zeroing) is the sentinel. assert!(matches!( decode(&[0u8; RECORD_HEADER_SIZE], MAX), Decoded::Sentinel diff --git a/src/recovery.rs b/src/recovery.rs index 0ad7c56..a225de1 100644 --- a/src/recovery.rs +++ b/src/recovery.rs @@ -611,6 +611,57 @@ mod tests { assert!(matches!(rec.tail_state, TailState::TruncatedAt { offset, .. } if offset == x)); } + #[test] + fn rec_type_zeroed_interior_is_fatal_tornmidlog() { + // issue #26 (D3/D5): a single-bit corruption of an INTERIOR record's + // `rec_type` byte (1→0) must NOT be mistaken for the end-of-records + // sentinel. The CRC covers `rec_type`, so the record fails the CRC check; + // with a valid record still after it, recovery is fatal `TornMidLog` — + // never a silent truncation that would drop the following acked records. + // (The sentinel is recognized only by an all-zero header, not `rec_type==0` + // alone — that short-circuit was the D3/D5 hole F3 found.) + let dir = tempfile::tempdir().unwrap(); + let file = fresh_segment(dir.path(), Lsn(1)); + write_dense(&file, Lsn(1), &[b"one", b"two", b"three"]); + // Offset of the SECOND record (LSN 2); record 3 stays valid after it. + let x = HEADER_SIZE + record::framed_size(3) as u64; + // Zero the rec_type byte WITHOUT recomputing the CRC ⇒ crc ≠ 0 (models the + // single-bit flip; the header is NOT all-zero, so it is not a sentinel). + file.write_all_at(&[0u8], x + 16).unwrap(); + file.sync_data().unwrap(); + + let err = recover_segment(&file, Lsn(1), true, SEGMENT_SIZE, MAX_RECORD_SIZE).unwrap_err(); + assert!( + matches!(err, WalError::TornMidLog { segment, offset } if segment == Lsn(1) && offset == x), + "interior rec_type→0 corruption must be fatal TornMidLog at {x}, got {err:?}" + ); + } + + #[test] + fn rec_type_zeroed_at_tail_is_torn_tail_and_zeroed() { + // issue #26 (D4/D10): the same `rec_type`→0 corruption on the LAST record + // is a torn tail — truncate at its offset, durable LSN at the prior record, + // and the region is durably zeroed so a reopen is clean and idempotent. + let dir = tempfile::tempdir().unwrap(); + let file = fresh_segment(dir.path(), Lsn(1)); + write_dense(&file, Lsn(1), &[b"first", b"second"]); + // Offset of the SECOND (last) record (LSN 2). + let x = HEADER_SIZE + record::framed_size(5) as u64; + file.write_all_at(&[0u8], x + 16).unwrap(); + file.sync_data().unwrap(); + + let rec = recover_segment(&file, Lsn(1), true, SEGMENT_SIZE, MAX_RECORD_SIZE).unwrap(); + assert_eq!(rec.max_lsn, Lsn(1)); + assert_eq!(rec.write_offset, x); + assert!(matches!(rec.tail_state, TailState::TruncatedAt { offset, .. } if offset == x)); + + // D10: the tail was durably zeroed ⇒ a second recovery is clean and stable. + let again = recover_segment(&file, Lsn(1), true, SEGMENT_SIZE, MAX_RECORD_SIZE).unwrap(); + assert_eq!(again.max_lsn, Lsn(1)); + assert_eq!(again.write_offset, x); + assert_eq!(again.tail_state, TailState::Clean); + } + #[test] fn arbitrary_bytes_never_panic_and_terminate() { // Interim D11 coverage (the libFuzzer F1 target is M9): overwrite the