From 617b21673c827299d0c41381426e71efc567bf05 Mon Sep 17 00:00:00 2001 From: Yuansheng Wang Date: Fri, 29 May 2026 11:41:54 +0800 Subject: [PATCH 01/15] docs: design json encode fuzz runner --- .../2026-05-29-json-encode-fuzz-design.md | 119 ++++++++++++++++++ 1 file changed, 119 insertions(+) create mode 100644 docs/superpowers/specs/2026-05-29-json-encode-fuzz-design.md diff --git a/docs/superpowers/specs/2026-05-29-json-encode-fuzz-design.md b/docs/superpowers/specs/2026-05-29-json-encode-fuzz-design.md new file mode 100644 index 0000000..f4eba76 --- /dev/null +++ b/docs/superpowers/specs/2026-05-29-json-encode-fuzz-design.md @@ -0,0 +1,119 @@ +# JSON Encode Fuzz Test Design + +## Goal + +Add a `make fuzz` entry point that runs a long-lived Lua-side fuzz test for the current JSON encode implementation. The test should stress `rapidjson.encode(value, { sort_keys = true })` with realistic Lua table shapes and verify that successful encodes produce valid, sorted, structurally correct JSON. + +This is a property and stress fuzz test, not a C/C++ coverage-guided fuzzer. The highest-value input surface is Lua values, especially tables with JSON-like API response shapes. + +## Command Interface + +Add a root `Makefile` target: + +```sh +make fuzz +make fuzz DURATION=3600 INTERVAL=5 WORKERS=1 SEED=123 +``` + +Defaults: + +- `DURATION=3600` +- `INTERVAL=5` +- `WORKERS=1` +- `SORT_KEYS=1` + +`make fuzz` should use one CPU core by default. `WORKERS` is reserved for a future multi-process mode, where each worker can run the same runner with a distinct seed. + +## Runner + +Add `tools/fuzz_encode.lua`. + +The runner will: + +- Parse duration, reporting interval, worker id, worker count, seed, and sort option from command-line arguments or environment variables. +- Initialize a deterministic pseudo-random generator from the seed. +- Repeatedly generate one Lua value plus expected metadata. +- Call `rapidjson.encode(value, { sort_keys = true })`. +- Validate every successful encode. +- Print progress every reporting interval. +- Stop after the configured duration or after the first validation failure. + +## Generated Cases + +Each fuzz case uses a named schema inspired by real JSON-producing systems: + +- LLM-style chat completion response +- GitHub-style issue or API response +- Twitter or Weibo-style feed response +- Paginated list response +- Nested metadata or configuration object + +Generators should produce Lua tables with controlled depth and size. They should include strings, numbers, booleans, `rapidjson.null`, arrays, objects, empty objects, and nested collections. They should avoid unsupported Lua values such as functions, threads, full userdata, and circular tables because this fuzz target is focused on successful encode quality. + +Each generated case should carry expected metadata such as: + +- Schema name +- Top-level kind +- Expected object key count at selected paths +- Expected sorted key order at selected object paths +- Expected array length at selected paths +- Selected scalar path/value assertions + +## Validation + +For every successful encode: + +- Decode the JSON with `rapidjson.decode` and ensure decoding succeeds. +- Check the decoded top-level kind. +- Check expected object field counts. +- Check expected array lengths. +- Check selected scalar path values. +- Check that keys emitted by `rapidjson.encode(..., { sort_keys = true })` appear in sorted order for tracked object paths. + +The key order check should inspect the encoded JSON for tracked objects. The generated metadata should keep those tracked objects small enough that order checks can be reliable without requiring a full JSON parser beyond `rapidjson.decode`. + +## Reporting + +Every `INTERVAL` seconds, print a compact summary: + +- Elapsed seconds +- Total cases +- Successful cases +- Encode errors +- Validation failures +- Cases per second +- Seed +- Last case id + +On failure, print enough information to reproduce: + +- Seed +- Case id +- Worker id +- Schema name +- Failure reason +- Generated Lua value dump +- Encoded JSON, if available + +The process should exit non-zero on validation failure. + +## Multi-Core Path + +The first implementation runs one worker by default. The command interface should leave room for: + +```sh +make fuzz WORKERS=4 +``` + +A later implementation can start multiple Lua processes with worker-specific seeds, for example `SEED + worker_id`. Worker output can be prefixed with the worker id before adding any central aggregation. + +## Test Strategy + +Keep the fuzz runner separate from the normal `busted` suite because the default fuzz duration is long. Add focused unit coverage only for helper functions if the implementation grows complex enough to justify it. + +Manual verification for the first version: + +- Build or otherwise make `rapidjson.so` available. +- Run `make fuzz DURATION=5 INTERVAL=1 SEED=123`. +- Confirm it reports progress and exits successfully. +- Run with at least one intentionally broken assertion during development to confirm failures are reproducible and exit non-zero. From deacb38460a4029dc61e98464d0c9afe4056789b Mon Sep 17 00:00:00 2001 From: Yuansheng Wang Date: Fri, 29 May 2026 11:48:49 +0800 Subject: [PATCH 02/15] docs: plan json encode fuzz runner --- .../plans/2026-05-29-json-encode-fuzz.md | 1161 +++++++++++++++++ 1 file changed, 1161 insertions(+) create mode 100644 docs/superpowers/plans/2026-05-29-json-encode-fuzz.md diff --git a/docs/superpowers/plans/2026-05-29-json-encode-fuzz.md b/docs/superpowers/plans/2026-05-29-json-encode-fuzz.md new file mode 100644 index 0000000..5523143 --- /dev/null +++ b/docs/superpowers/plans/2026-05-29-json-encode-fuzz.md @@ -0,0 +1,1161 @@ +# JSON Encode Fuzz Implementation Plan + +> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking. + +**Goal:** Build a `make fuzz` entry point that runs a long-lived Lua-side fuzz test for `rapidjson.encode(value, { sort_keys = true })`. + +**Architecture:** Add a reusable Lua module for deterministic generation, metadata, validation, dumping, and summary formatting. Add a thin executable runner that wires that module to `rapidjson`, then expose it through a root `Makefile` target with configurable duration, interval, workers, seed, and sort behavior. + +**Tech Stack:** Lua 5.1-compatible code, LuaJIT/Lua, Busted specs, existing `rapidjson` Lua module, POSIX `make` and shell. + +--- + +## File Structure + +- Create `tools/fuzz_encode_lib.lua`: deterministic RNG, config parsing, real-world-shaped case generation, validation helpers, value dumping, and summary formatting. This is importable from Busted specs. +- Create `tools/fuzz_encode.lua`: command-line runner that requires `rapidjson` and `tools.fuzz_encode_lib`, executes the timed loop, prints progress, and exits non-zero on validation failure. +- Create `spec/fuzz_encode_lib_spec.lua`: fast Busted coverage for config parsing, deterministic generation, expected metadata, validation failures, and summary formatting. +- Create `Makefile`: `make fuzz` target with `DURATION`, `INTERVAL`, `WORKERS`, `SEED`, `SORT_KEYS`, and `LUA` variables. + +No C++ production code changes are needed. + +--- + +## Execution Preflight + +Before running specs that require `rapidjson`, make the module available from the worktree root. + +- [ ] **Step 1: Check whether `rapidjson` already loads** + +Run: + +```bash +luajit -e 'require("rapidjson"); print("rapidjson ok")' +``` + +Expected when already built: `rapidjson ok`. + +- [ ] **Step 2: Build `rapidjson.so` for local LuaJIT if the check fails** + +Run: + +```bash +c++ -std=c++11 -g -Wall -fPIC \ + -I/opt/homebrew/include/luajit-2.1 \ + -Irapidjson/include \ + -bundle -undefined dynamic_lookup -all_load \ + src/Document.cpp src/Schema.cpp src/rapidjson.cpp src/values.cpp \ + -o rapidjson.so +``` + +Expected: compile succeeds. Warnings from `src/luax.hpp` about integer-to-double conversion are pre-existing on this checkout. + +--- + +### Task 1: Testable Fuzz Library Skeleton + +**Files:** +- Create: `tools/fuzz_encode_lib.lua` +- Create: `spec/fuzz_encode_lib_spec.lua` + +- [ ] **Step 1: Write the failing tests for config parsing, deterministic RNG, and summary formatting** + +Create `spec/fuzz_encode_lib_spec.lua` with: + +```lua +require 'busted.runner'() + +describe('tools.fuzz_encode_lib', function() + local fuzz = require('tools.fuzz_encode_lib') + + describe('parse_config', function() + it('uses production defaults', function() + local cfg = fuzz.parse_config({}) + + assert.are.equal(3600, cfg.duration) + assert.are.equal(5, cfg.interval) + assert.are.equal(1, cfg.workers) + assert.are.equal(1, cfg.worker_id) + assert.are.equal(true, cfg.sort_keys) + assert.are.equal('number', type(cfg.seed)) + end) + + it('accepts numeric and boolean overrides', function() + local cfg = fuzz.parse_config({ + DURATION = '12', + INTERVAL = '3', + WORKERS = '2', + WORKER_ID = '2', + SEED = '99', + SORT_KEYS = '0', + }) + + assert.are.equal(12, cfg.duration) + assert.are.equal(3, cfg.interval) + assert.are.equal(2, cfg.workers) + assert.are.equal(2, cfg.worker_id) + assert.are.equal(99, cfg.seed) + assert.are.equal(false, cfg.sort_keys) + end) + end) + + describe('new_rng', function() + it('is deterministic for the same seed', function() + local a = fuzz.new_rng(123) + local b = fuzz.new_rng(123) + + assert.are.equal(a:int(1, 1000000), b:int(1, 1000000)) + assert.are.equal(a:int(1, 1000000), b:int(1, 1000000)) + assert.are.equal(a:bool(), b:bool()) + end) + end) + + describe('format_summary', function() + it('formats the progress counters', function() + local line = fuzz.format_summary({ + elapsed = 5, + total = 100, + encoded = 99, + encode_errors = 1, + validation_failures = 0, + rate = 20, + seed = 123, + last_case_id = 100, + worker_id = 1, + }) + + assert.matches('worker=1', line, 1, true) + assert.matches('elapsed=5s', line, 1, true) + assert.matches('total=100', line, 1, true) + assert.matches('encoded=99', line, 1, true) + assert.matches('encode_errors=1', line, 1, true) + assert.matches('validation_failures=0', line, 1, true) + assert.matches('rate=20.00/s', line, 1, true) + assert.matches('seed=123', line, 1, true) + assert.matches('last_case=100', line, 1, true) + end) + end) +end) +``` + +- [ ] **Step 2: Run the focused spec and verify it fails because the module does not exist** + +Run: + +```bash +/Users/yuanshengwang/.luarocks/bin/busted spec/fuzz_encode_lib_spec.lua +``` + +Expected: FAIL or ERROR with `module 'tools.fuzz_encode_lib' not found`. + +- [ ] **Step 3: Implement the minimal library skeleton** + +Create `tools/fuzz_encode_lib.lua` with: + +```lua +local M = {} + +local DEFAULTS = { + duration = 3600, + interval = 5, + workers = 1, + worker_id = 1, + sort_keys = true, +} + +local function tonumber_or(value, default) + local parsed = tonumber(value) + if parsed == nil then + return default + end + return parsed +end + +local function normalize_seed(value) + local parsed = tonumber(value) + if parsed == nil then + parsed = os.time() + end + parsed = math.floor(parsed) + parsed = parsed % 2147483647 + if parsed <= 0 then + parsed = 1 + end + return parsed +end + +function M.parse_config(env) + env = env or {} + return { + duration = tonumber_or(env.DURATION, DEFAULTS.duration), + interval = tonumber_or(env.INTERVAL, DEFAULTS.interval), + workers = tonumber_or(env.WORKERS, DEFAULTS.workers), + worker_id = tonumber_or(env.WORKER_ID, DEFAULTS.worker_id), + seed = normalize_seed(env.SEED), + sort_keys = env.SORT_KEYS ~= '0', + } +end + +function M.new_rng(seed) + local state = normalize_seed(seed) + local rng = {} + + function rng:next() + local hi = math.floor(state / 127773) + local lo = state % 127773 + local test = 16807 * lo - 2836 * hi + if test <= 0 then + test = test + 2147483647 + end + state = test + return state / 2147483647 + end + + function rng:int(min, max) + return min + math.floor(self:next() * (max - min + 1)) + end + + function rng:bool() + return self:int(0, 1) == 1 + end + + function rng:choice(values) + return values[self:int(1, #values)] + end + + return rng +end + +function M.format_summary(stats) + return string.format( + 'worker=%d elapsed=%ds total=%d encoded=%d encode_errors=%d validation_failures=%d rate=%.2f/s seed=%d last_case=%d', + stats.worker_id, + stats.elapsed, + stats.total, + stats.encoded, + stats.encode_errors, + stats.validation_failures, + stats.rate, + stats.seed, + stats.last_case_id + ) +end + +return M +``` + +- [ ] **Step 4: Run the focused spec and verify it passes** + +Run: + +```bash +/Users/yuanshengwang/.luarocks/bin/busted spec/fuzz_encode_lib_spec.lua +``` + +Expected: PASS with all `tools.fuzz_encode_lib` examples successful. + +- [ ] **Step 5: Commit the skeleton** + +Run: + +```bash +git add tools/fuzz_encode_lib.lua spec/fuzz_encode_lib_spec.lua +git commit -m "test: add fuzz encode library skeleton" +``` + +--- + +### Task 2: Real-World Case Generators and Metadata + +**Files:** +- Modify: `tools/fuzz_encode_lib.lua` +- Modify: `spec/fuzz_encode_lib_spec.lua` + +- [ ] **Step 1: Add failing tests for generated schemas and expected metadata** + +Append these tests inside the top-level `describe('tools.fuzz_encode_lib', function()` block in `spec/fuzz_encode_lib_spec.lua`: + +```lua + describe('generate_case', function() + it('generates deterministic real-world schema cases with metadata', function() + local a = fuzz.generate_case(fuzz.new_rng(321), 1, { null = {} }) + local b = fuzz.generate_case(fuzz.new_rng(321), 1, { null = {} }) + + assert.are.same(a.value, b.value) + assert.are.same(a.expected, b.expected) + assert.are.equal('number', type(a.id)) + assert.are.equal('string', type(a.schema)) + assert.are.equal('object', a.expected.top_level_kind) + assert.is_true(#a.expected.objects >= 1) + assert.is_true(#a.expected.arrays >= 1) + assert.is_true(#a.expected.scalars >= 1) + end) + + it('cycles through the five supported schema families', function() + local rng = fuzz.new_rng(1) + local seen = {} + + for case_id = 1, 10 do + local case = fuzz.generate_case(rng, case_id, { null = {} }) + seen[case.schema] = true + end + + assert.is_true(seen.llm_response) + assert.is_true(seen.github_issue) + assert.is_true(seen.social_feed) + assert.is_true(seen.paginated_list) + assert.is_true(seen.metadata_config) + end) + end) +``` + +- [ ] **Step 2: Run the focused spec and verify it fails because `generate_case` is missing** + +Run: + +```bash +/Users/yuanshengwang/.luarocks/bin/busted spec/fuzz_encode_lib_spec.lua +``` + +Expected: FAIL with `attempt to call field 'generate_case'`. + +- [ ] **Step 3: Implement generator helpers and schema generators** + +Add these helpers to `tools/fuzz_encode_lib.lua` before `return M`: + +```lua +local SCHEMAS = { + 'llm_response', + 'github_issue', + 'social_feed', + 'paginated_list', + 'metadata_config', +} + +local WORDS = { + 'alpha', 'bravo', 'charlie', 'delta', 'echo', 'foxtrot', + 'json', 'encode', 'rapid', 'lua', 'api', '模型', '微博', +} + +local function sentence(rng, min_words, max_words) + local parts = {} + for _ = 1, rng:int(min_words, max_words) do + parts[#parts + 1] = rng:choice(WORDS) + end + return table.concat(parts, ' ') +end + +local function string_keys(tbl) + local keys = {} + for key, _ in pairs(tbl) do + if type(key) == 'string' then + keys[#keys + 1] = key + end + end + table.sort(keys) + return keys +end + +local function path_value(root, path) + local current = root + for i = 1, #path do + current = current[path[i]] + end + return current +end + +local function track_object(expected, label, value, path, check_order) + expected.objects[#expected.objects + 1] = { + label = label, + path = path, + count = #string_keys(value), + keys = string_keys(value), + check_order = check_order == true, + } +end + +local function track_array(expected, label, value, path) + expected.arrays[#expected.arrays + 1] = { + label = label, + path = path, + length = #value, + } +end + +local function track_scalar(expected, label, value, path) + expected.scalars[#expected.scalars + 1] = { + label = label, + path = path, + value = path_value(value, path), + } +end + +local function base_expected(schema) + return { + schema = schema, + top_level_kind = 'object', + objects = {}, + arrays = {}, + scalars = {}, + } +end + +local function generate_llm_response(rng, case_id, rapidjson) + local value = { + id = 'chatcmpl-' .. case_id, + object = 'chat.completion', + created = 1700000000 + case_id, + model = rng:choice({ 'gpt-4.1', 'gpt-4o-mini', 'reasoner-small' }), + choices = { + { + index = 0, + finish_reason = rng:choice({ 'stop', 'length', 'tool_calls' }), + message = { + role = 'assistant', + content = sentence(rng, 4, 10), + }, + }, + }, + usage = { + prompt_tokens = rng:int(1, 2000), + completion_tokens = rng:int(1, 2000), + total_tokens = rng:int(2001, 5000), + }, + metadata = { + request_id = 'req_' .. rng:int(1000, 9999), + cached = rng:bool(), + trace = rapidjson.null, + }, + } + local expected = base_expected('llm_response') + track_object(expected, 'root', value, {}, true) + track_object(expected, 'usage', value.usage, { 'usage' }, true) + track_array(expected, 'choices', value.choices, { 'choices' }) + track_scalar(expected, 'model', value, { 'model' }) + track_scalar(expected, 'message_role', value, { 'choices', 1, 'message', 'role' }) + return value, expected +end + +local function generate_github_issue(rng, case_id, rapidjson) + local labels = {} + for i = 1, rng:int(1, 4) do + labels[i] = { + id = case_id * 100 + i, + name = rng:choice({ 'bug', 'feature', 'fuzz', 'help wanted' }), + color = rng:choice({ 'ff0000', '00ff00', '0052cc' }), + } + end + local value = { + id = case_id, + number = rng:int(1, 10000), + state = rng:choice({ 'open', 'closed' }), + title = sentence(rng, 3, 8), + body = sentence(rng, 8, 18), + user = { + login = 'user' .. rng:int(1, 999), + id = rng:int(1, 100000), + site_admin = false, + }, + labels = labels, + milestone = rapidjson.null, + reactions = { + ['+1'] = rng:int(0, 100), + ['-1'] = rng:int(0, 10), + confused = rng:int(0, 5), + heart = rng:int(0, 50), + }, + } + local expected = base_expected('github_issue') + track_object(expected, 'root', value, {}, true) + track_object(expected, 'user', value.user, { 'user' }, true) + track_object(expected, 'reactions', value.reactions, { 'reactions' }, true) + track_array(expected, 'labels', value.labels, { 'labels' }) + track_scalar(expected, 'state', value, { 'state' }) + return value, expected +end + +local function generate_social_feed(rng, case_id, rapidjson) + local posts = {} + for i = 1, rng:int(2, 5) do + posts[i] = { + id = 'post_' .. case_id .. '_' .. i, + text = sentence(rng, 5, 15), + reposts = rng:int(0, 1000), + likes = rng:int(0, 10000), + verified = rng:bool(), + reply_to = rapidjson.null, + } + end + local value = { + platform = rng:choice({ 'twitter', 'weibo' }), + cursor = 'cursor_' .. rng:int(1000, 9999), + has_more = rng:bool(), + posts = posts, + viewer = { + locale = rng:choice({ 'en-US', 'zh-CN', 'ja-JP' }), + safe_mode = rng:bool(), + }, + } + local expected = base_expected('social_feed') + track_object(expected, 'root', value, {}, true) + track_object(expected, 'viewer', value.viewer, { 'viewer' }, true) + track_array(expected, 'posts', value.posts, { 'posts' }) + track_scalar(expected, 'platform', value, { 'platform' }) + return value, expected +end + +local function generate_paginated_list(rng, case_id, rapidjson) + local items = {} + for i = 1, rng:int(1, 6) do + items[i] = { + id = case_id * 10 + i, + name = 'item_' .. rng:int(100, 999), + enabled = rng:bool(), + score = rng:int(0, 10000) / 100, + extra = rapidjson.null, + } + end + local value = { + page = rng:int(1, 50), + per_page = #items, + total = rng:int(#items, #items + 500), + items = items, + links = { + next = '/api/items?page=' .. rng:int(2, 99), + prev = rapidjson.null, + }, + } + local expected = base_expected('paginated_list') + track_object(expected, 'root', value, {}, true) + track_object(expected, 'links', value.links, { 'links' }, true) + track_array(expected, 'items', value.items, { 'items' }) + track_scalar(expected, 'per_page', value, { 'per_page' }) + return value, expected +end + +local function generate_metadata_config(rng, case_id, rapidjson) + local value = { + version = 'v' .. rng:int(1, 9) .. '.' .. rng:int(0, 9), + rollout = { + percent = rng:int(0, 100), + region = rng:choice({ 'us', 'sg', 'eu', 'cn' }), + enabled = rng:bool(), + }, + features = { + encode_fuzz = true, + sorted_json = true, + experimental = rng:bool(), + }, + owners = { + 'team-api', + 'team-runtime', + }, + annotations = { + case_id = case_id, + note = sentence(rng, 2, 6), + empty = rapidjson.null, + }, + } + local expected = base_expected('metadata_config') + track_object(expected, 'root', value, {}, true) + track_object(expected, 'rollout', value.rollout, { 'rollout' }, true) + track_object(expected, 'features', value.features, { 'features' }, true) + track_array(expected, 'owners', value.owners, { 'owners' }) + track_scalar(expected, 'version', value, { 'version' }) + return value, expected +end + +local GENERATORS = { + llm_response = generate_llm_response, + github_issue = generate_github_issue, + social_feed = generate_social_feed, + paginated_list = generate_paginated_list, + metadata_config = generate_metadata_config, +} + +function M.generate_case(rng, case_id, rapidjson) + local schema = SCHEMAS[((case_id - 1) % #SCHEMAS) + 1] + local value, expected = GENERATORS[schema](rng, case_id, rapidjson) + return { + id = case_id, + schema = schema, + value = value, + expected = expected, + } +end +``` + +- [ ] **Step 4: Run the focused spec and verify it passes** + +Run: + +```bash +/Users/yuanshengwang/.luarocks/bin/busted spec/fuzz_encode_lib_spec.lua +``` + +Expected: PASS. + +- [ ] **Step 5: Commit the generators** + +Run: + +```bash +git add tools/fuzz_encode_lib.lua spec/fuzz_encode_lib_spec.lua +git commit -m "feat: generate json encode fuzz cases" +``` + +--- + +### Task 3: Encode Result Validation and Failure Diagnostics + +**Files:** +- Modify: `tools/fuzz_encode_lib.lua` +- Modify: `spec/fuzz_encode_lib_spec.lua` + +- [ ] **Step 1: Add failing tests for validation and diagnostics** + +Append these tests inside the top-level `describe('tools.fuzz_encode_lib', function()` block: + +```lua + describe('validate_encoded_case', function() + local rapidjson = require('rapidjson') + + it('accepts a generated case encoded with sorted keys', function() + local case = fuzz.generate_case(fuzz.new_rng(77), 1, rapidjson) + local json = rapidjson.encode(case.value, { sort_keys = true }) + + local ok, err = fuzz.validate_encoded_case(rapidjson, case, json) + + assert.is_true(ok) + assert.is_nil(err) + end) + + it('rejects unsorted encoded object keys for tracked objects', function() + local case = { + id = 1, + schema = 'manual', + value = { b = 1, a = 2 }, + expected = { + top_level_kind = 'object', + objects = { + { + label = 'root', + path = {}, + count = 2, + keys = { 'a', 'b' }, + check_order = true, + }, + }, + arrays = {}, + scalars = {}, + }, + } + + local ok, err = fuzz.validate_encoded_case(rapidjson, case, '{"b":1,"a":2}') + + assert.is_false(ok) + assert.matches('key order', err, 1, true) + end) + + it('formats reproducible failure reports', function() + local report = fuzz.format_failure({ + seed = 123, + worker_id = 1, + case = { + id = 7, + schema = 'manual', + value = { b = 1, a = 2 }, + }, + json = '{"b":1,"a":2}', + reason = 'root key order mismatch', + }) + + assert.matches('seed=123', report, 1, true) + assert.matches('worker=1', report, 1, true) + assert.matches('case=7', report, 1, true) + assert.matches('schema=manual', report, 1, true) + assert.matches('root key order mismatch', report, 1, true) + assert.matches('json={"b":1,"a":2}', report, 1, true) + assert.matches('value={', report, 1, true) + end) + end) +``` + +- [ ] **Step 2: Run the focused spec and verify it fails because validation is missing** + +Run: + +```bash +/Users/yuanshengwang/.luarocks/bin/busted spec/fuzz_encode_lib_spec.lua +``` + +Expected: FAIL with `attempt to call field 'validate_encoded_case'`. + +- [ ] **Step 3: Implement validation, path lookup, and value dump helpers** + +Add these functions to `tools/fuzz_encode_lib.lua` before `return M`: + +```lua +local function kind(value) + if type(value) ~= 'table' then + return type(value) + end + if #value > 0 then + return 'array' + end + return 'object' +end + +local function value_at_path(root, path) + local current = root + for i = 1, #path do + if type(current) ~= 'table' then + return nil + end + current = current[path[i]] + end + return current +end + +local function deep_equal(a, b) + if type(a) ~= type(b) then + return false + end + if type(a) ~= 'table' then + return a == b + end + for key, value in pairs(a) do + if not deep_equal(value, b[key]) then + return false + end + end + for key, _ in pairs(b) do + if a[key] == nil then + return false + end + end + return true +end + +local function key_token(key) + return '"' .. key .. '":' +end + +local function keys_are_sorted_in_json(json, keys) + local previous = 0 + for _, key in ipairs(keys) do + local pos = string.find(json, key_token(key), previous + 1, true) + if pos == nil then + return false, 'missing key "' .. key .. '"' + end + if pos < previous then + return false, 'key "' .. key .. '" appeared out of order' + end + previous = pos + end + return true +end + +local function dump_value(value, depth, seen) + depth = depth or 0 + seen = seen or {} + if type(value) == 'string' then + return string.format('%q', value) + end + if type(value) ~= 'table' then + return tostring(value) + end + if seen[value] then + return '' + end + if depth >= 4 then + return '{...}' + end + seen[value] = true + local parts = {} + local keys = {} + for key, _ in pairs(value) do + keys[#keys + 1] = key + end + table.sort(keys, function(a, b) return tostring(a) < tostring(b) end) + for _, key in ipairs(keys) do + parts[#parts + 1] = '[' .. dump_value(key, depth + 1, seen) .. ']=' .. dump_value(value[key], depth + 1, seen) + end + seen[value] = nil + return '{' .. table.concat(parts, ',') .. '}' +end + +function M.validate_encoded_case(rapidjson, case, json) + local decoded, decode_err = rapidjson.decode(json) + if decoded == nil then + return false, 'decode failed: ' .. tostring(decode_err) + end + + if kind(decoded) ~= case.expected.top_level_kind then + return false, 'top-level kind mismatch: expected ' .. case.expected.top_level_kind .. ', got ' .. kind(decoded) + end + + for _, object in ipairs(case.expected.objects) do + local value = value_at_path(decoded, object.path) + if kind(value) ~= 'object' then + return false, object.label .. ' kind mismatch' + end + if #string_keys(value) ~= object.count then + return false, object.label .. ' field count mismatch' + end + if object.check_order then + local ok, reason = keys_are_sorted_in_json(json, object.keys) + if not ok then + return false, object.label .. ' key order mismatch: ' .. reason + end + end + end + + for _, array in ipairs(case.expected.arrays) do + local value = value_at_path(decoded, array.path) + if kind(value) ~= 'array' then + return false, array.label .. ' kind mismatch' + end + if #value ~= array.length then + return false, array.label .. ' length mismatch' + end + end + + for _, scalar in ipairs(case.expected.scalars) do + local value = value_at_path(decoded, scalar.path) + if not deep_equal(value, scalar.value) then + return false, scalar.label .. ' scalar mismatch' + end + end + + return true +end + +function M.dump_value(value) + return dump_value(value) +end + +function M.format_failure(details) + local lines = { + 'FUZZ FAILURE', + 'seed=' .. tostring(details.seed), + 'worker=' .. tostring(details.worker_id), + 'case=' .. tostring(details.case.id), + 'schema=' .. tostring(details.case.schema), + 'reason=' .. tostring(details.reason), + 'value=' .. M.dump_value(details.case.value), + } + if details.json then + lines[#lines + 1] = 'json=' .. details.json + end + return table.concat(lines, '\n') +end +``` + +- [ ] **Step 4: Run the focused spec and verify it passes** + +Run: + +```bash +/Users/yuanshengwang/.luarocks/bin/busted spec/fuzz_encode_lib_spec.lua +``` + +Expected: PASS. + +- [ ] **Step 5: Run the existing encode spec to catch regressions** + +Run: + +```bash +/Users/yuanshengwang/.luarocks/bin/busted spec/json_encode_spec.lua +``` + +Expected: PASS. + +- [ ] **Step 6: Commit validation** + +Run: + +```bash +git add tools/fuzz_encode_lib.lua spec/fuzz_encode_lib_spec.lua +git commit -m "feat: validate json encode fuzz output" +``` + +--- + +### Task 4: Runner and `make fuzz` Entry Point + +**Files:** +- Create: `tools/fuzz_encode.lua` +- Create: `Makefile` +- Modify: `spec/fuzz_encode_lib_spec.lua` + +- [ ] **Step 1: Add failing tests for runner argument environment conversion** + +Append this test inside the top-level `describe('tools.fuzz_encode_lib', function()` block: + +```lua + describe('env_from_args', function() + it('turns KEY=VALUE args into config environment entries', function() + local env = fuzz.env_from_args({ + 'DURATION=2', + 'INTERVAL=1', + 'SEED=123', + 'WORKERS=1', + }) + + assert.are.equal('2', env.DURATION) + assert.are.equal('1', env.INTERVAL) + assert.are.equal('123', env.SEED) + assert.are.equal('1', env.WORKERS) + end) + end) +``` + +- [ ] **Step 2: Run the focused spec and verify it fails because `env_from_args` is missing** + +Run: + +```bash +/Users/yuanshengwang/.luarocks/bin/busted spec/fuzz_encode_lib_spec.lua +``` + +Expected: FAIL with `attempt to call field 'env_from_args'`. + +- [ ] **Step 3: Implement `env_from_args`** + +Add this function to `tools/fuzz_encode_lib.lua` before `return M`: + +```lua +function M.env_from_args(args) + local env = {} + for _, arg in ipairs(args or {}) do + local key, value = string.match(arg, '^([%w_]+)=(.*)$') + if key then + env[key] = value + end + end + return env +end +``` + +- [ ] **Step 4: Run the focused spec and verify it passes** + +Run: + +```bash +/Users/yuanshengwang/.luarocks/bin/busted spec/fuzz_encode_lib_spec.lua +``` + +Expected: PASS. + +- [ ] **Step 5: Implement the fuzz runner** + +Create `tools/fuzz_encode.lua` with: + +```lua +local rapidjson = require('rapidjson') +local fuzz = require('tools.fuzz_encode_lib') + +local env = fuzz.env_from_args(arg) +for _, key in ipairs({ 'DURATION', 'INTERVAL', 'WORKERS', 'WORKER_ID', 'SEED', 'SORT_KEYS' }) do + if env[key] == nil then + env[key] = os.getenv(key) + end +end + +local cfg = fuzz.parse_config(env) +local rng = fuzz.new_rng(cfg.seed) +local started = os.time() +local next_report = started + cfg.interval +local deadline = started + cfg.duration +local stats = { + worker_id = cfg.worker_id, + elapsed = 0, + total = 0, + encoded = 0, + encode_errors = 0, + validation_failures = 0, + rate = 0, + seed = cfg.seed, + last_case_id = 0, +} + +local function update_stats(now) + stats.elapsed = now - started + if stats.elapsed <= 0 then + stats.rate = stats.total + else + stats.rate = stats.total / stats.elapsed + end +end + +while os.time() < deadline do + local case_id = stats.total + 1 + local case = fuzz.generate_case(rng, case_id, rapidjson) + local ok, json_or_err = pcall(rapidjson.encode, case.value, { sort_keys = cfg.sort_keys }) + + stats.total = stats.total + 1 + stats.last_case_id = case_id + + if ok then + stats.encoded = stats.encoded + 1 + local valid, reason = fuzz.validate_encoded_case(rapidjson, case, json_or_err) + if not valid then + stats.validation_failures = stats.validation_failures + 1 + update_stats(os.time()) + io.stderr:write(fuzz.format_failure({ + seed = cfg.seed, + worker_id = cfg.worker_id, + case = case, + json = json_or_err, + reason = reason, + }), '\n') + os.exit(1) + end + else + stats.encode_errors = stats.encode_errors + 1 + end + + local now = os.time() + if now >= next_report then + update_stats(now) + print(fuzz.format_summary(stats)) + next_report = now + cfg.interval + end +end + +update_stats(os.time()) +print(fuzz.format_summary(stats)) +``` + +- [ ] **Step 6: Implement `make fuzz`** + +Create `Makefile` with: + +```make +.PHONY: fuzz + +LUA ?= lua +DURATION ?= 3600 +INTERVAL ?= 5 +WORKERS ?= 1 +SEED ?= $(shell date +%s) +SORT_KEYS ?= 1 + +fuzz: + @if [ "$(WORKERS)" = "1" ]; then \ + DURATION="$(DURATION)" \ + INTERVAL="$(INTERVAL)" \ + WORKERS="$(WORKERS)" \ + WORKER_ID="1" \ + SEED="$(SEED)" \ + SORT_KEYS="$(SORT_KEYS)" \ + $(LUA) tools/fuzz_encode.lua; \ + else \ + i=1; \ + pids=""; \ + while [ $$i -le "$(WORKERS)" ]; do \ + worker_seed=$$(expr "$(SEED)" + $$i - 1); \ + DURATION="$(DURATION)" \ + INTERVAL="$(INTERVAL)" \ + WORKERS="$(WORKERS)" \ + WORKER_ID="$$i" \ + SEED="$$worker_seed" \ + SORT_KEYS="$(SORT_KEYS)" \ + $(LUA) tools/fuzz_encode.lua & \ + pids="$$pids $$!"; \ + i=$$(expr $$i + 1); \ + done; \ + status=0; \ + for pid in $$pids; do \ + wait $$pid || status=$$?; \ + done; \ + exit $$status; \ + fi +``` + +- [ ] **Step 7: Run the focused specs** + +Run: + +```bash +/Users/yuanshengwang/.luarocks/bin/busted spec/fuzz_encode_lib_spec.lua +``` + +Expected: PASS. + +- [ ] **Step 8: Run a short single-worker fuzz smoke test** + +Use LuaJIT if the local `rapidjson.so` was built for LuaJIT: + +```bash +make fuzz LUA=luajit DURATION=2 INTERVAL=1 WORKERS=1 SEED=123 +``` + +Expected: at least one progress line containing `worker=1`, `validation_failures=0`, `seed=123`, and exit code 0. + +- [ ] **Step 9: Run a short multi-worker fuzz smoke test** + +Run: + +```bash +make fuzz LUA=luajit DURATION=2 INTERVAL=1 WORKERS=2 SEED=123 +``` + +Expected: progress lines from `worker=1` and `worker=2`, both with `validation_failures=0`, and exit code 0. + +- [ ] **Step 10: Run the complete existing test suite** + +Run: + +```bash +/Users/yuanshengwang/.luarocks/bin/busted +``` + +Expected: all existing specs pass. + +- [ ] **Step 11: Commit runner and Makefile** + +Run: + +```bash +git add Makefile tools/fuzz_encode.lua tools/fuzz_encode_lib.lua spec/fuzz_encode_lib_spec.lua +git commit -m "feat: add json encode fuzz runner" +``` + +--- + +## Final Verification + +- [ ] Run: + +```bash +git status --short +``` + +Expected: no uncommitted changes except intentional local build outputs ignored by Git. + +- [ ] Run: + +```bash +make fuzz LUA=luajit DURATION=5 INTERVAL=1 WORKERS=1 SEED=123 +``` + +Expected: repeated summaries for 5 seconds, `validation_failures=0`, exit code 0. + +- [ ] Run: + +```bash +/Users/yuanshengwang/.luarocks/bin/busted +``` + +Expected: complete suite passes. + +--- + +## Self-Review + +- Spec coverage: `make fuzz`, default single worker, configurable duration/interval/workers/seed/sort, realistic generators, encode with sort, validation, five-second summaries, failure diagnostics, and multi-worker path are all covered. +- Red-flag scan: no vague implementation tasks remain; every code-changing step includes concrete code. +- Type consistency: functions referenced by tests are defined in earlier or same-task implementation steps: `parse_config`, `new_rng`, `format_summary`, `generate_case`, `validate_encoded_case`, `format_failure`, and `env_from_args`. From 1f6cebd781f8da4bc1ef2ba664089536c851ee5e Mon Sep 17 00:00:00 2001 From: Yuansheng Wang Date: Fri, 29 May 2026 12:05:17 +0800 Subject: [PATCH 03/15] chore: ignore superpowers docs --- .gitignore | 1 + .../plans/2026-05-29-json-encode-fuzz.md | 1161 ----------------- .../2026-05-29-json-encode-fuzz-design.md | 119 -- 3 files changed, 1 insertion(+), 1280 deletions(-) delete mode 100644 docs/superpowers/plans/2026-05-29-json-encode-fuzz.md delete mode 100644 docs/superpowers/specs/2026-05-29-json-encode-fuzz-design.md diff --git a/.gitignore b/.gitignore index bf8360d..50bd38b 100644 --- a/.gitignore +++ b/.gitignore @@ -26,3 +26,4 @@ rapidjson/rapidjson.autopkg rapidjson/travis-doxygen.sh /cmake-build-debug /.idea +/docs/superpowers/ diff --git a/docs/superpowers/plans/2026-05-29-json-encode-fuzz.md b/docs/superpowers/plans/2026-05-29-json-encode-fuzz.md deleted file mode 100644 index 5523143..0000000 --- a/docs/superpowers/plans/2026-05-29-json-encode-fuzz.md +++ /dev/null @@ -1,1161 +0,0 @@ -# JSON Encode Fuzz Implementation Plan - -> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking. - -**Goal:** Build a `make fuzz` entry point that runs a long-lived Lua-side fuzz test for `rapidjson.encode(value, { sort_keys = true })`. - -**Architecture:** Add a reusable Lua module for deterministic generation, metadata, validation, dumping, and summary formatting. Add a thin executable runner that wires that module to `rapidjson`, then expose it through a root `Makefile` target with configurable duration, interval, workers, seed, and sort behavior. - -**Tech Stack:** Lua 5.1-compatible code, LuaJIT/Lua, Busted specs, existing `rapidjson` Lua module, POSIX `make` and shell. - ---- - -## File Structure - -- Create `tools/fuzz_encode_lib.lua`: deterministic RNG, config parsing, real-world-shaped case generation, validation helpers, value dumping, and summary formatting. This is importable from Busted specs. -- Create `tools/fuzz_encode.lua`: command-line runner that requires `rapidjson` and `tools.fuzz_encode_lib`, executes the timed loop, prints progress, and exits non-zero on validation failure. -- Create `spec/fuzz_encode_lib_spec.lua`: fast Busted coverage for config parsing, deterministic generation, expected metadata, validation failures, and summary formatting. -- Create `Makefile`: `make fuzz` target with `DURATION`, `INTERVAL`, `WORKERS`, `SEED`, `SORT_KEYS`, and `LUA` variables. - -No C++ production code changes are needed. - ---- - -## Execution Preflight - -Before running specs that require `rapidjson`, make the module available from the worktree root. - -- [ ] **Step 1: Check whether `rapidjson` already loads** - -Run: - -```bash -luajit -e 'require("rapidjson"); print("rapidjson ok")' -``` - -Expected when already built: `rapidjson ok`. - -- [ ] **Step 2: Build `rapidjson.so` for local LuaJIT if the check fails** - -Run: - -```bash -c++ -std=c++11 -g -Wall -fPIC \ - -I/opt/homebrew/include/luajit-2.1 \ - -Irapidjson/include \ - -bundle -undefined dynamic_lookup -all_load \ - src/Document.cpp src/Schema.cpp src/rapidjson.cpp src/values.cpp \ - -o rapidjson.so -``` - -Expected: compile succeeds. Warnings from `src/luax.hpp` about integer-to-double conversion are pre-existing on this checkout. - ---- - -### Task 1: Testable Fuzz Library Skeleton - -**Files:** -- Create: `tools/fuzz_encode_lib.lua` -- Create: `spec/fuzz_encode_lib_spec.lua` - -- [ ] **Step 1: Write the failing tests for config parsing, deterministic RNG, and summary formatting** - -Create `spec/fuzz_encode_lib_spec.lua` with: - -```lua -require 'busted.runner'() - -describe('tools.fuzz_encode_lib', function() - local fuzz = require('tools.fuzz_encode_lib') - - describe('parse_config', function() - it('uses production defaults', function() - local cfg = fuzz.parse_config({}) - - assert.are.equal(3600, cfg.duration) - assert.are.equal(5, cfg.interval) - assert.are.equal(1, cfg.workers) - assert.are.equal(1, cfg.worker_id) - assert.are.equal(true, cfg.sort_keys) - assert.are.equal('number', type(cfg.seed)) - end) - - it('accepts numeric and boolean overrides', function() - local cfg = fuzz.parse_config({ - DURATION = '12', - INTERVAL = '3', - WORKERS = '2', - WORKER_ID = '2', - SEED = '99', - SORT_KEYS = '0', - }) - - assert.are.equal(12, cfg.duration) - assert.are.equal(3, cfg.interval) - assert.are.equal(2, cfg.workers) - assert.are.equal(2, cfg.worker_id) - assert.are.equal(99, cfg.seed) - assert.are.equal(false, cfg.sort_keys) - end) - end) - - describe('new_rng', function() - it('is deterministic for the same seed', function() - local a = fuzz.new_rng(123) - local b = fuzz.new_rng(123) - - assert.are.equal(a:int(1, 1000000), b:int(1, 1000000)) - assert.are.equal(a:int(1, 1000000), b:int(1, 1000000)) - assert.are.equal(a:bool(), b:bool()) - end) - end) - - describe('format_summary', function() - it('formats the progress counters', function() - local line = fuzz.format_summary({ - elapsed = 5, - total = 100, - encoded = 99, - encode_errors = 1, - validation_failures = 0, - rate = 20, - seed = 123, - last_case_id = 100, - worker_id = 1, - }) - - assert.matches('worker=1', line, 1, true) - assert.matches('elapsed=5s', line, 1, true) - assert.matches('total=100', line, 1, true) - assert.matches('encoded=99', line, 1, true) - assert.matches('encode_errors=1', line, 1, true) - assert.matches('validation_failures=0', line, 1, true) - assert.matches('rate=20.00/s', line, 1, true) - assert.matches('seed=123', line, 1, true) - assert.matches('last_case=100', line, 1, true) - end) - end) -end) -``` - -- [ ] **Step 2: Run the focused spec and verify it fails because the module does not exist** - -Run: - -```bash -/Users/yuanshengwang/.luarocks/bin/busted spec/fuzz_encode_lib_spec.lua -``` - -Expected: FAIL or ERROR with `module 'tools.fuzz_encode_lib' not found`. - -- [ ] **Step 3: Implement the minimal library skeleton** - -Create `tools/fuzz_encode_lib.lua` with: - -```lua -local M = {} - -local DEFAULTS = { - duration = 3600, - interval = 5, - workers = 1, - worker_id = 1, - sort_keys = true, -} - -local function tonumber_or(value, default) - local parsed = tonumber(value) - if parsed == nil then - return default - end - return parsed -end - -local function normalize_seed(value) - local parsed = tonumber(value) - if parsed == nil then - parsed = os.time() - end - parsed = math.floor(parsed) - parsed = parsed % 2147483647 - if parsed <= 0 then - parsed = 1 - end - return parsed -end - -function M.parse_config(env) - env = env or {} - return { - duration = tonumber_or(env.DURATION, DEFAULTS.duration), - interval = tonumber_or(env.INTERVAL, DEFAULTS.interval), - workers = tonumber_or(env.WORKERS, DEFAULTS.workers), - worker_id = tonumber_or(env.WORKER_ID, DEFAULTS.worker_id), - seed = normalize_seed(env.SEED), - sort_keys = env.SORT_KEYS ~= '0', - } -end - -function M.new_rng(seed) - local state = normalize_seed(seed) - local rng = {} - - function rng:next() - local hi = math.floor(state / 127773) - local lo = state % 127773 - local test = 16807 * lo - 2836 * hi - if test <= 0 then - test = test + 2147483647 - end - state = test - return state / 2147483647 - end - - function rng:int(min, max) - return min + math.floor(self:next() * (max - min + 1)) - end - - function rng:bool() - return self:int(0, 1) == 1 - end - - function rng:choice(values) - return values[self:int(1, #values)] - end - - return rng -end - -function M.format_summary(stats) - return string.format( - 'worker=%d elapsed=%ds total=%d encoded=%d encode_errors=%d validation_failures=%d rate=%.2f/s seed=%d last_case=%d', - stats.worker_id, - stats.elapsed, - stats.total, - stats.encoded, - stats.encode_errors, - stats.validation_failures, - stats.rate, - stats.seed, - stats.last_case_id - ) -end - -return M -``` - -- [ ] **Step 4: Run the focused spec and verify it passes** - -Run: - -```bash -/Users/yuanshengwang/.luarocks/bin/busted spec/fuzz_encode_lib_spec.lua -``` - -Expected: PASS with all `tools.fuzz_encode_lib` examples successful. - -- [ ] **Step 5: Commit the skeleton** - -Run: - -```bash -git add tools/fuzz_encode_lib.lua spec/fuzz_encode_lib_spec.lua -git commit -m "test: add fuzz encode library skeleton" -``` - ---- - -### Task 2: Real-World Case Generators and Metadata - -**Files:** -- Modify: `tools/fuzz_encode_lib.lua` -- Modify: `spec/fuzz_encode_lib_spec.lua` - -- [ ] **Step 1: Add failing tests for generated schemas and expected metadata** - -Append these tests inside the top-level `describe('tools.fuzz_encode_lib', function()` block in `spec/fuzz_encode_lib_spec.lua`: - -```lua - describe('generate_case', function() - it('generates deterministic real-world schema cases with metadata', function() - local a = fuzz.generate_case(fuzz.new_rng(321), 1, { null = {} }) - local b = fuzz.generate_case(fuzz.new_rng(321), 1, { null = {} }) - - assert.are.same(a.value, b.value) - assert.are.same(a.expected, b.expected) - assert.are.equal('number', type(a.id)) - assert.are.equal('string', type(a.schema)) - assert.are.equal('object', a.expected.top_level_kind) - assert.is_true(#a.expected.objects >= 1) - assert.is_true(#a.expected.arrays >= 1) - assert.is_true(#a.expected.scalars >= 1) - end) - - it('cycles through the five supported schema families', function() - local rng = fuzz.new_rng(1) - local seen = {} - - for case_id = 1, 10 do - local case = fuzz.generate_case(rng, case_id, { null = {} }) - seen[case.schema] = true - end - - assert.is_true(seen.llm_response) - assert.is_true(seen.github_issue) - assert.is_true(seen.social_feed) - assert.is_true(seen.paginated_list) - assert.is_true(seen.metadata_config) - end) - end) -``` - -- [ ] **Step 2: Run the focused spec and verify it fails because `generate_case` is missing** - -Run: - -```bash -/Users/yuanshengwang/.luarocks/bin/busted spec/fuzz_encode_lib_spec.lua -``` - -Expected: FAIL with `attempt to call field 'generate_case'`. - -- [ ] **Step 3: Implement generator helpers and schema generators** - -Add these helpers to `tools/fuzz_encode_lib.lua` before `return M`: - -```lua -local SCHEMAS = { - 'llm_response', - 'github_issue', - 'social_feed', - 'paginated_list', - 'metadata_config', -} - -local WORDS = { - 'alpha', 'bravo', 'charlie', 'delta', 'echo', 'foxtrot', - 'json', 'encode', 'rapid', 'lua', 'api', '模型', '微博', -} - -local function sentence(rng, min_words, max_words) - local parts = {} - for _ = 1, rng:int(min_words, max_words) do - parts[#parts + 1] = rng:choice(WORDS) - end - return table.concat(parts, ' ') -end - -local function string_keys(tbl) - local keys = {} - for key, _ in pairs(tbl) do - if type(key) == 'string' then - keys[#keys + 1] = key - end - end - table.sort(keys) - return keys -end - -local function path_value(root, path) - local current = root - for i = 1, #path do - current = current[path[i]] - end - return current -end - -local function track_object(expected, label, value, path, check_order) - expected.objects[#expected.objects + 1] = { - label = label, - path = path, - count = #string_keys(value), - keys = string_keys(value), - check_order = check_order == true, - } -end - -local function track_array(expected, label, value, path) - expected.arrays[#expected.arrays + 1] = { - label = label, - path = path, - length = #value, - } -end - -local function track_scalar(expected, label, value, path) - expected.scalars[#expected.scalars + 1] = { - label = label, - path = path, - value = path_value(value, path), - } -end - -local function base_expected(schema) - return { - schema = schema, - top_level_kind = 'object', - objects = {}, - arrays = {}, - scalars = {}, - } -end - -local function generate_llm_response(rng, case_id, rapidjson) - local value = { - id = 'chatcmpl-' .. case_id, - object = 'chat.completion', - created = 1700000000 + case_id, - model = rng:choice({ 'gpt-4.1', 'gpt-4o-mini', 'reasoner-small' }), - choices = { - { - index = 0, - finish_reason = rng:choice({ 'stop', 'length', 'tool_calls' }), - message = { - role = 'assistant', - content = sentence(rng, 4, 10), - }, - }, - }, - usage = { - prompt_tokens = rng:int(1, 2000), - completion_tokens = rng:int(1, 2000), - total_tokens = rng:int(2001, 5000), - }, - metadata = { - request_id = 'req_' .. rng:int(1000, 9999), - cached = rng:bool(), - trace = rapidjson.null, - }, - } - local expected = base_expected('llm_response') - track_object(expected, 'root', value, {}, true) - track_object(expected, 'usage', value.usage, { 'usage' }, true) - track_array(expected, 'choices', value.choices, { 'choices' }) - track_scalar(expected, 'model', value, { 'model' }) - track_scalar(expected, 'message_role', value, { 'choices', 1, 'message', 'role' }) - return value, expected -end - -local function generate_github_issue(rng, case_id, rapidjson) - local labels = {} - for i = 1, rng:int(1, 4) do - labels[i] = { - id = case_id * 100 + i, - name = rng:choice({ 'bug', 'feature', 'fuzz', 'help wanted' }), - color = rng:choice({ 'ff0000', '00ff00', '0052cc' }), - } - end - local value = { - id = case_id, - number = rng:int(1, 10000), - state = rng:choice({ 'open', 'closed' }), - title = sentence(rng, 3, 8), - body = sentence(rng, 8, 18), - user = { - login = 'user' .. rng:int(1, 999), - id = rng:int(1, 100000), - site_admin = false, - }, - labels = labels, - milestone = rapidjson.null, - reactions = { - ['+1'] = rng:int(0, 100), - ['-1'] = rng:int(0, 10), - confused = rng:int(0, 5), - heart = rng:int(0, 50), - }, - } - local expected = base_expected('github_issue') - track_object(expected, 'root', value, {}, true) - track_object(expected, 'user', value.user, { 'user' }, true) - track_object(expected, 'reactions', value.reactions, { 'reactions' }, true) - track_array(expected, 'labels', value.labels, { 'labels' }) - track_scalar(expected, 'state', value, { 'state' }) - return value, expected -end - -local function generate_social_feed(rng, case_id, rapidjson) - local posts = {} - for i = 1, rng:int(2, 5) do - posts[i] = { - id = 'post_' .. case_id .. '_' .. i, - text = sentence(rng, 5, 15), - reposts = rng:int(0, 1000), - likes = rng:int(0, 10000), - verified = rng:bool(), - reply_to = rapidjson.null, - } - end - local value = { - platform = rng:choice({ 'twitter', 'weibo' }), - cursor = 'cursor_' .. rng:int(1000, 9999), - has_more = rng:bool(), - posts = posts, - viewer = { - locale = rng:choice({ 'en-US', 'zh-CN', 'ja-JP' }), - safe_mode = rng:bool(), - }, - } - local expected = base_expected('social_feed') - track_object(expected, 'root', value, {}, true) - track_object(expected, 'viewer', value.viewer, { 'viewer' }, true) - track_array(expected, 'posts', value.posts, { 'posts' }) - track_scalar(expected, 'platform', value, { 'platform' }) - return value, expected -end - -local function generate_paginated_list(rng, case_id, rapidjson) - local items = {} - for i = 1, rng:int(1, 6) do - items[i] = { - id = case_id * 10 + i, - name = 'item_' .. rng:int(100, 999), - enabled = rng:bool(), - score = rng:int(0, 10000) / 100, - extra = rapidjson.null, - } - end - local value = { - page = rng:int(1, 50), - per_page = #items, - total = rng:int(#items, #items + 500), - items = items, - links = { - next = '/api/items?page=' .. rng:int(2, 99), - prev = rapidjson.null, - }, - } - local expected = base_expected('paginated_list') - track_object(expected, 'root', value, {}, true) - track_object(expected, 'links', value.links, { 'links' }, true) - track_array(expected, 'items', value.items, { 'items' }) - track_scalar(expected, 'per_page', value, { 'per_page' }) - return value, expected -end - -local function generate_metadata_config(rng, case_id, rapidjson) - local value = { - version = 'v' .. rng:int(1, 9) .. '.' .. rng:int(0, 9), - rollout = { - percent = rng:int(0, 100), - region = rng:choice({ 'us', 'sg', 'eu', 'cn' }), - enabled = rng:bool(), - }, - features = { - encode_fuzz = true, - sorted_json = true, - experimental = rng:bool(), - }, - owners = { - 'team-api', - 'team-runtime', - }, - annotations = { - case_id = case_id, - note = sentence(rng, 2, 6), - empty = rapidjson.null, - }, - } - local expected = base_expected('metadata_config') - track_object(expected, 'root', value, {}, true) - track_object(expected, 'rollout', value.rollout, { 'rollout' }, true) - track_object(expected, 'features', value.features, { 'features' }, true) - track_array(expected, 'owners', value.owners, { 'owners' }) - track_scalar(expected, 'version', value, { 'version' }) - return value, expected -end - -local GENERATORS = { - llm_response = generate_llm_response, - github_issue = generate_github_issue, - social_feed = generate_social_feed, - paginated_list = generate_paginated_list, - metadata_config = generate_metadata_config, -} - -function M.generate_case(rng, case_id, rapidjson) - local schema = SCHEMAS[((case_id - 1) % #SCHEMAS) + 1] - local value, expected = GENERATORS[schema](rng, case_id, rapidjson) - return { - id = case_id, - schema = schema, - value = value, - expected = expected, - } -end -``` - -- [ ] **Step 4: Run the focused spec and verify it passes** - -Run: - -```bash -/Users/yuanshengwang/.luarocks/bin/busted spec/fuzz_encode_lib_spec.lua -``` - -Expected: PASS. - -- [ ] **Step 5: Commit the generators** - -Run: - -```bash -git add tools/fuzz_encode_lib.lua spec/fuzz_encode_lib_spec.lua -git commit -m "feat: generate json encode fuzz cases" -``` - ---- - -### Task 3: Encode Result Validation and Failure Diagnostics - -**Files:** -- Modify: `tools/fuzz_encode_lib.lua` -- Modify: `spec/fuzz_encode_lib_spec.lua` - -- [ ] **Step 1: Add failing tests for validation and diagnostics** - -Append these tests inside the top-level `describe('tools.fuzz_encode_lib', function()` block: - -```lua - describe('validate_encoded_case', function() - local rapidjson = require('rapidjson') - - it('accepts a generated case encoded with sorted keys', function() - local case = fuzz.generate_case(fuzz.new_rng(77), 1, rapidjson) - local json = rapidjson.encode(case.value, { sort_keys = true }) - - local ok, err = fuzz.validate_encoded_case(rapidjson, case, json) - - assert.is_true(ok) - assert.is_nil(err) - end) - - it('rejects unsorted encoded object keys for tracked objects', function() - local case = { - id = 1, - schema = 'manual', - value = { b = 1, a = 2 }, - expected = { - top_level_kind = 'object', - objects = { - { - label = 'root', - path = {}, - count = 2, - keys = { 'a', 'b' }, - check_order = true, - }, - }, - arrays = {}, - scalars = {}, - }, - } - - local ok, err = fuzz.validate_encoded_case(rapidjson, case, '{"b":1,"a":2}') - - assert.is_false(ok) - assert.matches('key order', err, 1, true) - end) - - it('formats reproducible failure reports', function() - local report = fuzz.format_failure({ - seed = 123, - worker_id = 1, - case = { - id = 7, - schema = 'manual', - value = { b = 1, a = 2 }, - }, - json = '{"b":1,"a":2}', - reason = 'root key order mismatch', - }) - - assert.matches('seed=123', report, 1, true) - assert.matches('worker=1', report, 1, true) - assert.matches('case=7', report, 1, true) - assert.matches('schema=manual', report, 1, true) - assert.matches('root key order mismatch', report, 1, true) - assert.matches('json={"b":1,"a":2}', report, 1, true) - assert.matches('value={', report, 1, true) - end) - end) -``` - -- [ ] **Step 2: Run the focused spec and verify it fails because validation is missing** - -Run: - -```bash -/Users/yuanshengwang/.luarocks/bin/busted spec/fuzz_encode_lib_spec.lua -``` - -Expected: FAIL with `attempt to call field 'validate_encoded_case'`. - -- [ ] **Step 3: Implement validation, path lookup, and value dump helpers** - -Add these functions to `tools/fuzz_encode_lib.lua` before `return M`: - -```lua -local function kind(value) - if type(value) ~= 'table' then - return type(value) - end - if #value > 0 then - return 'array' - end - return 'object' -end - -local function value_at_path(root, path) - local current = root - for i = 1, #path do - if type(current) ~= 'table' then - return nil - end - current = current[path[i]] - end - return current -end - -local function deep_equal(a, b) - if type(a) ~= type(b) then - return false - end - if type(a) ~= 'table' then - return a == b - end - for key, value in pairs(a) do - if not deep_equal(value, b[key]) then - return false - end - end - for key, _ in pairs(b) do - if a[key] == nil then - return false - end - end - return true -end - -local function key_token(key) - return '"' .. key .. '":' -end - -local function keys_are_sorted_in_json(json, keys) - local previous = 0 - for _, key in ipairs(keys) do - local pos = string.find(json, key_token(key), previous + 1, true) - if pos == nil then - return false, 'missing key "' .. key .. '"' - end - if pos < previous then - return false, 'key "' .. key .. '" appeared out of order' - end - previous = pos - end - return true -end - -local function dump_value(value, depth, seen) - depth = depth or 0 - seen = seen or {} - if type(value) == 'string' then - return string.format('%q', value) - end - if type(value) ~= 'table' then - return tostring(value) - end - if seen[value] then - return '' - end - if depth >= 4 then - return '{...}' - end - seen[value] = true - local parts = {} - local keys = {} - for key, _ in pairs(value) do - keys[#keys + 1] = key - end - table.sort(keys, function(a, b) return tostring(a) < tostring(b) end) - for _, key in ipairs(keys) do - parts[#parts + 1] = '[' .. dump_value(key, depth + 1, seen) .. ']=' .. dump_value(value[key], depth + 1, seen) - end - seen[value] = nil - return '{' .. table.concat(parts, ',') .. '}' -end - -function M.validate_encoded_case(rapidjson, case, json) - local decoded, decode_err = rapidjson.decode(json) - if decoded == nil then - return false, 'decode failed: ' .. tostring(decode_err) - end - - if kind(decoded) ~= case.expected.top_level_kind then - return false, 'top-level kind mismatch: expected ' .. case.expected.top_level_kind .. ', got ' .. kind(decoded) - end - - for _, object in ipairs(case.expected.objects) do - local value = value_at_path(decoded, object.path) - if kind(value) ~= 'object' then - return false, object.label .. ' kind mismatch' - end - if #string_keys(value) ~= object.count then - return false, object.label .. ' field count mismatch' - end - if object.check_order then - local ok, reason = keys_are_sorted_in_json(json, object.keys) - if not ok then - return false, object.label .. ' key order mismatch: ' .. reason - end - end - end - - for _, array in ipairs(case.expected.arrays) do - local value = value_at_path(decoded, array.path) - if kind(value) ~= 'array' then - return false, array.label .. ' kind mismatch' - end - if #value ~= array.length then - return false, array.label .. ' length mismatch' - end - end - - for _, scalar in ipairs(case.expected.scalars) do - local value = value_at_path(decoded, scalar.path) - if not deep_equal(value, scalar.value) then - return false, scalar.label .. ' scalar mismatch' - end - end - - return true -end - -function M.dump_value(value) - return dump_value(value) -end - -function M.format_failure(details) - local lines = { - 'FUZZ FAILURE', - 'seed=' .. tostring(details.seed), - 'worker=' .. tostring(details.worker_id), - 'case=' .. tostring(details.case.id), - 'schema=' .. tostring(details.case.schema), - 'reason=' .. tostring(details.reason), - 'value=' .. M.dump_value(details.case.value), - } - if details.json then - lines[#lines + 1] = 'json=' .. details.json - end - return table.concat(lines, '\n') -end -``` - -- [ ] **Step 4: Run the focused spec and verify it passes** - -Run: - -```bash -/Users/yuanshengwang/.luarocks/bin/busted spec/fuzz_encode_lib_spec.lua -``` - -Expected: PASS. - -- [ ] **Step 5: Run the existing encode spec to catch regressions** - -Run: - -```bash -/Users/yuanshengwang/.luarocks/bin/busted spec/json_encode_spec.lua -``` - -Expected: PASS. - -- [ ] **Step 6: Commit validation** - -Run: - -```bash -git add tools/fuzz_encode_lib.lua spec/fuzz_encode_lib_spec.lua -git commit -m "feat: validate json encode fuzz output" -``` - ---- - -### Task 4: Runner and `make fuzz` Entry Point - -**Files:** -- Create: `tools/fuzz_encode.lua` -- Create: `Makefile` -- Modify: `spec/fuzz_encode_lib_spec.lua` - -- [ ] **Step 1: Add failing tests for runner argument environment conversion** - -Append this test inside the top-level `describe('tools.fuzz_encode_lib', function()` block: - -```lua - describe('env_from_args', function() - it('turns KEY=VALUE args into config environment entries', function() - local env = fuzz.env_from_args({ - 'DURATION=2', - 'INTERVAL=1', - 'SEED=123', - 'WORKERS=1', - }) - - assert.are.equal('2', env.DURATION) - assert.are.equal('1', env.INTERVAL) - assert.are.equal('123', env.SEED) - assert.are.equal('1', env.WORKERS) - end) - end) -``` - -- [ ] **Step 2: Run the focused spec and verify it fails because `env_from_args` is missing** - -Run: - -```bash -/Users/yuanshengwang/.luarocks/bin/busted spec/fuzz_encode_lib_spec.lua -``` - -Expected: FAIL with `attempt to call field 'env_from_args'`. - -- [ ] **Step 3: Implement `env_from_args`** - -Add this function to `tools/fuzz_encode_lib.lua` before `return M`: - -```lua -function M.env_from_args(args) - local env = {} - for _, arg in ipairs(args or {}) do - local key, value = string.match(arg, '^([%w_]+)=(.*)$') - if key then - env[key] = value - end - end - return env -end -``` - -- [ ] **Step 4: Run the focused spec and verify it passes** - -Run: - -```bash -/Users/yuanshengwang/.luarocks/bin/busted spec/fuzz_encode_lib_spec.lua -``` - -Expected: PASS. - -- [ ] **Step 5: Implement the fuzz runner** - -Create `tools/fuzz_encode.lua` with: - -```lua -local rapidjson = require('rapidjson') -local fuzz = require('tools.fuzz_encode_lib') - -local env = fuzz.env_from_args(arg) -for _, key in ipairs({ 'DURATION', 'INTERVAL', 'WORKERS', 'WORKER_ID', 'SEED', 'SORT_KEYS' }) do - if env[key] == nil then - env[key] = os.getenv(key) - end -end - -local cfg = fuzz.parse_config(env) -local rng = fuzz.new_rng(cfg.seed) -local started = os.time() -local next_report = started + cfg.interval -local deadline = started + cfg.duration -local stats = { - worker_id = cfg.worker_id, - elapsed = 0, - total = 0, - encoded = 0, - encode_errors = 0, - validation_failures = 0, - rate = 0, - seed = cfg.seed, - last_case_id = 0, -} - -local function update_stats(now) - stats.elapsed = now - started - if stats.elapsed <= 0 then - stats.rate = stats.total - else - stats.rate = stats.total / stats.elapsed - end -end - -while os.time() < deadline do - local case_id = stats.total + 1 - local case = fuzz.generate_case(rng, case_id, rapidjson) - local ok, json_or_err = pcall(rapidjson.encode, case.value, { sort_keys = cfg.sort_keys }) - - stats.total = stats.total + 1 - stats.last_case_id = case_id - - if ok then - stats.encoded = stats.encoded + 1 - local valid, reason = fuzz.validate_encoded_case(rapidjson, case, json_or_err) - if not valid then - stats.validation_failures = stats.validation_failures + 1 - update_stats(os.time()) - io.stderr:write(fuzz.format_failure({ - seed = cfg.seed, - worker_id = cfg.worker_id, - case = case, - json = json_or_err, - reason = reason, - }), '\n') - os.exit(1) - end - else - stats.encode_errors = stats.encode_errors + 1 - end - - local now = os.time() - if now >= next_report then - update_stats(now) - print(fuzz.format_summary(stats)) - next_report = now + cfg.interval - end -end - -update_stats(os.time()) -print(fuzz.format_summary(stats)) -``` - -- [ ] **Step 6: Implement `make fuzz`** - -Create `Makefile` with: - -```make -.PHONY: fuzz - -LUA ?= lua -DURATION ?= 3600 -INTERVAL ?= 5 -WORKERS ?= 1 -SEED ?= $(shell date +%s) -SORT_KEYS ?= 1 - -fuzz: - @if [ "$(WORKERS)" = "1" ]; then \ - DURATION="$(DURATION)" \ - INTERVAL="$(INTERVAL)" \ - WORKERS="$(WORKERS)" \ - WORKER_ID="1" \ - SEED="$(SEED)" \ - SORT_KEYS="$(SORT_KEYS)" \ - $(LUA) tools/fuzz_encode.lua; \ - else \ - i=1; \ - pids=""; \ - while [ $$i -le "$(WORKERS)" ]; do \ - worker_seed=$$(expr "$(SEED)" + $$i - 1); \ - DURATION="$(DURATION)" \ - INTERVAL="$(INTERVAL)" \ - WORKERS="$(WORKERS)" \ - WORKER_ID="$$i" \ - SEED="$$worker_seed" \ - SORT_KEYS="$(SORT_KEYS)" \ - $(LUA) tools/fuzz_encode.lua & \ - pids="$$pids $$!"; \ - i=$$(expr $$i + 1); \ - done; \ - status=0; \ - for pid in $$pids; do \ - wait $$pid || status=$$?; \ - done; \ - exit $$status; \ - fi -``` - -- [ ] **Step 7: Run the focused specs** - -Run: - -```bash -/Users/yuanshengwang/.luarocks/bin/busted spec/fuzz_encode_lib_spec.lua -``` - -Expected: PASS. - -- [ ] **Step 8: Run a short single-worker fuzz smoke test** - -Use LuaJIT if the local `rapidjson.so` was built for LuaJIT: - -```bash -make fuzz LUA=luajit DURATION=2 INTERVAL=1 WORKERS=1 SEED=123 -``` - -Expected: at least one progress line containing `worker=1`, `validation_failures=0`, `seed=123`, and exit code 0. - -- [ ] **Step 9: Run a short multi-worker fuzz smoke test** - -Run: - -```bash -make fuzz LUA=luajit DURATION=2 INTERVAL=1 WORKERS=2 SEED=123 -``` - -Expected: progress lines from `worker=1` and `worker=2`, both with `validation_failures=0`, and exit code 0. - -- [ ] **Step 10: Run the complete existing test suite** - -Run: - -```bash -/Users/yuanshengwang/.luarocks/bin/busted -``` - -Expected: all existing specs pass. - -- [ ] **Step 11: Commit runner and Makefile** - -Run: - -```bash -git add Makefile tools/fuzz_encode.lua tools/fuzz_encode_lib.lua spec/fuzz_encode_lib_spec.lua -git commit -m "feat: add json encode fuzz runner" -``` - ---- - -## Final Verification - -- [ ] Run: - -```bash -git status --short -``` - -Expected: no uncommitted changes except intentional local build outputs ignored by Git. - -- [ ] Run: - -```bash -make fuzz LUA=luajit DURATION=5 INTERVAL=1 WORKERS=1 SEED=123 -``` - -Expected: repeated summaries for 5 seconds, `validation_failures=0`, exit code 0. - -- [ ] Run: - -```bash -/Users/yuanshengwang/.luarocks/bin/busted -``` - -Expected: complete suite passes. - ---- - -## Self-Review - -- Spec coverage: `make fuzz`, default single worker, configurable duration/interval/workers/seed/sort, realistic generators, encode with sort, validation, five-second summaries, failure diagnostics, and multi-worker path are all covered. -- Red-flag scan: no vague implementation tasks remain; every code-changing step includes concrete code. -- Type consistency: functions referenced by tests are defined in earlier or same-task implementation steps: `parse_config`, `new_rng`, `format_summary`, `generate_case`, `validate_encoded_case`, `format_failure`, and `env_from_args`. diff --git a/docs/superpowers/specs/2026-05-29-json-encode-fuzz-design.md b/docs/superpowers/specs/2026-05-29-json-encode-fuzz-design.md deleted file mode 100644 index f4eba76..0000000 --- a/docs/superpowers/specs/2026-05-29-json-encode-fuzz-design.md +++ /dev/null @@ -1,119 +0,0 @@ -# JSON Encode Fuzz Test Design - -## Goal - -Add a `make fuzz` entry point that runs a long-lived Lua-side fuzz test for the current JSON encode implementation. The test should stress `rapidjson.encode(value, { sort_keys = true })` with realistic Lua table shapes and verify that successful encodes produce valid, sorted, structurally correct JSON. - -This is a property and stress fuzz test, not a C/C++ coverage-guided fuzzer. The highest-value input surface is Lua values, especially tables with JSON-like API response shapes. - -## Command Interface - -Add a root `Makefile` target: - -```sh -make fuzz -make fuzz DURATION=3600 INTERVAL=5 WORKERS=1 SEED=123 -``` - -Defaults: - -- `DURATION=3600` -- `INTERVAL=5` -- `WORKERS=1` -- `SORT_KEYS=1` - -`make fuzz` should use one CPU core by default. `WORKERS` is reserved for a future multi-process mode, where each worker can run the same runner with a distinct seed. - -## Runner - -Add `tools/fuzz_encode.lua`. - -The runner will: - -- Parse duration, reporting interval, worker id, worker count, seed, and sort option from command-line arguments or environment variables. -- Initialize a deterministic pseudo-random generator from the seed. -- Repeatedly generate one Lua value plus expected metadata. -- Call `rapidjson.encode(value, { sort_keys = true })`. -- Validate every successful encode. -- Print progress every reporting interval. -- Stop after the configured duration or after the first validation failure. - -## Generated Cases - -Each fuzz case uses a named schema inspired by real JSON-producing systems: - -- LLM-style chat completion response -- GitHub-style issue or API response -- Twitter or Weibo-style feed response -- Paginated list response -- Nested metadata or configuration object - -Generators should produce Lua tables with controlled depth and size. They should include strings, numbers, booleans, `rapidjson.null`, arrays, objects, empty objects, and nested collections. They should avoid unsupported Lua values such as functions, threads, full userdata, and circular tables because this fuzz target is focused on successful encode quality. - -Each generated case should carry expected metadata such as: - -- Schema name -- Top-level kind -- Expected object key count at selected paths -- Expected sorted key order at selected object paths -- Expected array length at selected paths -- Selected scalar path/value assertions - -## Validation - -For every successful encode: - -- Decode the JSON with `rapidjson.decode` and ensure decoding succeeds. -- Check the decoded top-level kind. -- Check expected object field counts. -- Check expected array lengths. -- Check selected scalar path values. -- Check that keys emitted by `rapidjson.encode(..., { sort_keys = true })` appear in sorted order for tracked object paths. - -The key order check should inspect the encoded JSON for tracked objects. The generated metadata should keep those tracked objects small enough that order checks can be reliable without requiring a full JSON parser beyond `rapidjson.decode`. - -## Reporting - -Every `INTERVAL` seconds, print a compact summary: - -- Elapsed seconds -- Total cases -- Successful cases -- Encode errors -- Validation failures -- Cases per second -- Seed -- Last case id - -On failure, print enough information to reproduce: - -- Seed -- Case id -- Worker id -- Schema name -- Failure reason -- Generated Lua value dump -- Encoded JSON, if available - -The process should exit non-zero on validation failure. - -## Multi-Core Path - -The first implementation runs one worker by default. The command interface should leave room for: - -```sh -make fuzz WORKERS=4 -``` - -A later implementation can start multiple Lua processes with worker-specific seeds, for example `SEED + worker_id`. Worker output can be prefixed with the worker id before adding any central aggregation. - -## Test Strategy - -Keep the fuzz runner separate from the normal `busted` suite because the default fuzz duration is long. Add focused unit coverage only for helper functions if the implementation grows complex enough to justify it. - -Manual verification for the first version: - -- Build or otherwise make `rapidjson.so` available. -- Run `make fuzz DURATION=5 INTERVAL=1 SEED=123`. -- Confirm it reports progress and exits successfully. -- Run with at least one intentionally broken assertion during development to confirm failures are reproducible and exit non-zero. From 47a0ab19272ff865b4fab28e0c1109bad3664ff3 Mon Sep 17 00:00:00 2001 From: Yuansheng Wang Date: Fri, 29 May 2026 12:06:25 +0800 Subject: [PATCH 04/15] chore: ignore local build outputs --- .gitignore | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.gitignore b/.gitignore index 50bd38b..86ddc97 100644 --- a/.gitignore +++ b/.gitignore @@ -7,6 +7,8 @@ CMakeFiles cmake_install.cmake json.so json.dll +rapidjson.so +rapidjson.so.dSYM/ deps/ /*.src.rock /rapidjson/test/ From 886cb3bc125e9c5ec1c71bd5a20b05a8826c1470 Mon Sep 17 00:00:00 2001 From: Yuansheng Wang Date: Fri, 29 May 2026 12:09:55 +0800 Subject: [PATCH 05/15] test: add fuzz encode library skeleton --- spec/fuzz_encode_lib_spec.lua | 73 ++++++++++++++++++++++++++++ tools/fuzz_encode_lib.lua | 89 +++++++++++++++++++++++++++++++++++ 2 files changed, 162 insertions(+) create mode 100644 spec/fuzz_encode_lib_spec.lua create mode 100644 tools/fuzz_encode_lib.lua diff --git a/spec/fuzz_encode_lib_spec.lua b/spec/fuzz_encode_lib_spec.lua new file mode 100644 index 0000000..c5e8454 --- /dev/null +++ b/spec/fuzz_encode_lib_spec.lua @@ -0,0 +1,73 @@ +require 'busted.runner'() + +describe('tools.fuzz_encode_lib', function() + local fuzz = require('tools.fuzz_encode_lib') + + describe('parse_config', function() + it('uses production defaults', function() + local cfg = fuzz.parse_config({}) + + assert.are.equal(3600, cfg.duration) + assert.are.equal(5, cfg.interval) + assert.are.equal(1, cfg.workers) + assert.are.equal(1, cfg.worker_id) + assert.are.equal(true, cfg.sort_keys) + assert.are.equal('number', type(cfg.seed)) + end) + + it('accepts numeric and boolean overrides', function() + local cfg = fuzz.parse_config({ + DURATION = '12', + INTERVAL = '3', + WORKERS = '2', + WORKER_ID = '2', + SEED = '99', + SORT_KEYS = '0', + }) + + assert.are.equal(12, cfg.duration) + assert.are.equal(3, cfg.interval) + assert.are.equal(2, cfg.workers) + assert.are.equal(2, cfg.worker_id) + assert.are.equal(99, cfg.seed) + assert.are.equal(false, cfg.sort_keys) + end) + end) + + describe('new_rng', function() + it('is deterministic for the same seed', function() + local a = fuzz.new_rng(123) + local b = fuzz.new_rng(123) + + assert.are.equal(a:int(1, 1000000), b:int(1, 1000000)) + assert.are.equal(a:int(1, 1000000), b:int(1, 1000000)) + assert.are.equal(a:bool(), b:bool()) + end) + end) + + describe('format_summary', function() + it('formats the progress counters', function() + local line = fuzz.format_summary({ + elapsed = 5, + total = 100, + encoded = 99, + encode_errors = 1, + validation_failures = 0, + rate = 20, + seed = 123, + last_case_id = 100, + worker_id = 1, + }) + + assert.matches('worker=1', line, 1, true) + assert.matches('elapsed=5s', line, 1, true) + assert.matches('total=100', line, 1, true) + assert.matches('encoded=99', line, 1, true) + assert.matches('encode_errors=1', line, 1, true) + assert.matches('validation_failures=0', line, 1, true) + assert.matches('rate=20.00/s', line, 1, true) + assert.matches('seed=123', line, 1, true) + assert.matches('last_case=100', line, 1, true) + end) + end) +end) diff --git a/tools/fuzz_encode_lib.lua b/tools/fuzz_encode_lib.lua new file mode 100644 index 0000000..2f5c220 --- /dev/null +++ b/tools/fuzz_encode_lib.lua @@ -0,0 +1,89 @@ +local M = {} + +local DEFAULTS = { + duration = 3600, + interval = 5, + workers = 1, + worker_id = 1, + sort_keys = true, +} + +local function tonumber_or(value, default) + local parsed = tonumber(value) + if parsed == nil then + return default + end + return parsed +end + +local function normalize_seed(value) + local parsed = tonumber(value) + if parsed == nil then + parsed = os.time() + end + parsed = math.floor(parsed) + parsed = parsed % 2147483647 + if parsed <= 0 then + parsed = 1 + end + return parsed +end + +function M.parse_config(env) + env = env or {} + return { + duration = tonumber_or(env.DURATION, DEFAULTS.duration), + interval = tonumber_or(env.INTERVAL, DEFAULTS.interval), + workers = tonumber_or(env.WORKERS, DEFAULTS.workers), + worker_id = tonumber_or(env.WORKER_ID, DEFAULTS.worker_id), + seed = normalize_seed(env.SEED), + sort_keys = env.SORT_KEYS ~= '0', + } +end + +function M.new_rng(seed) + local state = normalize_seed(seed) + local rng = {} + + function rng:next() + local hi = math.floor(state / 127773) + local lo = state % 127773 + local test = 16807 * lo - 2836 * hi + if test <= 0 then + test = test + 2147483647 + end + state = test + return state / 2147483647 + end + + function rng:int(min, max) + return min + math.floor(self:next() * (max - min + 1)) + end + + function rng:bool() + return self:int(0, 1) == 1 + end + + function rng:choice(values) + return values[self:int(1, #values)] + end + + return rng +end + +function M.format_summary(stats) + return string.format( + 'worker=%d elapsed=%ds total=%d encoded=%d encode_errors=%d validation_failures=%d rate=%.2f/s seed=%d last_case=%d', + stats.worker_id, + stats.elapsed, + stats.total, + stats.encoded, + stats.encode_errors, + stats.validation_failures, + stats.rate, + stats.seed, + stats.last_case_id + ) +end + +return M From 78507abc47d539f634b4103985a111b405823b21 Mon Sep 17 00:00:00 2001 From: Yuansheng Wang Date: Fri, 29 May 2026 12:15:53 +0800 Subject: [PATCH 06/15] fix: handle numeric sort keys override --- spec/fuzz_encode_lib_spec.lua | 6 ++++++ tools/fuzz_encode_lib.lua | 2 +- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/spec/fuzz_encode_lib_spec.lua b/spec/fuzz_encode_lib_spec.lua index c5e8454..8c8411c 100644 --- a/spec/fuzz_encode_lib_spec.lua +++ b/spec/fuzz_encode_lib_spec.lua @@ -32,6 +32,12 @@ describe('tools.fuzz_encode_lib', function() assert.are.equal(99, cfg.seed) assert.are.equal(false, cfg.sort_keys) end) + + it('treats numeric zero as disabling sorted keys', function() + local cfg = fuzz.parse_config({ SORT_KEYS = 0 }) + + assert.are.equal(false, cfg.sort_keys) + end) end) describe('new_rng', function() diff --git a/tools/fuzz_encode_lib.lua b/tools/fuzz_encode_lib.lua index 2f5c220..72b7ac6 100644 --- a/tools/fuzz_encode_lib.lua +++ b/tools/fuzz_encode_lib.lua @@ -37,7 +37,7 @@ function M.parse_config(env) workers = tonumber_or(env.WORKERS, DEFAULTS.workers), worker_id = tonumber_or(env.WORKER_ID, DEFAULTS.worker_id), seed = normalize_seed(env.SEED), - sort_keys = env.SORT_KEYS ~= '0', + sort_keys = env.SORT_KEYS ~= '0' and env.SORT_KEYS ~= 0, } end From 5872048b8caf1042a7eba691932402b37620e954 Mon Sep 17 00:00:00 2001 From: Yuansheng Wang Date: Fri, 29 May 2026 12:27:40 +0800 Subject: [PATCH 07/15] feat: generate json encode fuzz cases --- spec/fuzz_encode_lib_spec.lua | 58 +++ tools/fuzz_encode_lib.lua | 655 ++++++++++++++++++++++++++++++++++ 2 files changed, 713 insertions(+) diff --git a/spec/fuzz_encode_lib_spec.lua b/spec/fuzz_encode_lib_spec.lua index 8c8411c..5cd40cd 100644 --- a/spec/fuzz_encode_lib_spec.lua +++ b/spec/fuzz_encode_lib_spec.lua @@ -76,4 +76,62 @@ describe('tools.fuzz_encode_lib', function() assert.matches('last_case=100', line, 1, true) end) end) + + describe('generate_case', function() + it('generates deterministic schema-guided cases with metadata', function() + local a = fuzz.generate_case(fuzz.new_rng(321), 1, { null = {} }) + local b = fuzz.generate_case(fuzz.new_rng(321), 1, { null = {} }) + + assert.are.same(a.value, b.value) + assert.are.same(a.expected, b.expected) + assert.are.equal('number', type(a.id)) + assert.are.equal('string', type(a.schema)) + assert.are.equal('schema_guided', a.kind) + assert.are.equal('object', a.expected.top_level_kind) + assert.are.equal('table', type(a.value.fuzz)) + assert.is_true(#a.expected.objects >= 1) + assert.is_true(#a.expected.arrays >= 1) + assert.is_true(#a.expected.scalars >= 1) + end) + + it('adds pure recursive random cases with nested objects and arrays', function() + local case = fuzz.generate_case(fuzz.new_rng(98765), 3, { null = {} }) + + assert.are.equal('recursive_random', case.kind) + assert.are.equal('recursive_random', case.schema) + assert.are.equal('table', type(case.value)) + assert.are.equal('table', type(case.expected.random)) + assert.is_true(case.expected.random.max_depth >= 3) + assert.is_true(case.expected.random.object_count >= 2) + assert.is_true(case.expected.random.array_count >= 1) + assert.is_true(#case.expected.objects >= case.expected.random.object_count) + assert.is_true(#case.expected.arrays >= case.expected.random.array_count) + end) + + it('runs pure recursive random cases at least as often as schema-guided cases', function() + local rng = fuzz.new_rng(1) + local seen = {} + local counts = { + schema_guided = 0, + recursive_random = 0, + } + + for case_id = 1, 30 do + local case = fuzz.generate_case(rng, case_id, { null = {} }) + counts[case.kind] = counts[case.kind] + 1 + if case.kind == 'schema_guided' then + seen[case.schema] = true + end + end + + assert.is_true(counts.recursive_random >= counts.schema_guided) + assert.are.equal(10, counts.schema_guided) + assert.are.equal(20, counts.recursive_random) + assert.is_true(seen.llm_response) + assert.is_true(seen.github_issue) + assert.is_true(seen.social_feed) + assert.is_true(seen.paginated_list) + assert.is_true(seen.metadata_config) + end) + end) end) diff --git a/tools/fuzz_encode_lib.lua b/tools/fuzz_encode_lib.lua index 72b7ac6..05324d4 100644 --- a/tools/fuzz_encode_lib.lua +++ b/tools/fuzz_encode_lib.lua @@ -86,4 +86,659 @@ function M.format_summary(stats) ) end +local SCHEMA_FAMILIES = { + 'llm_response', + 'github_issue', + 'social_feed', + 'paginated_list', + 'metadata_config', +} + +local CASE_KINDS = { + 'schema_guided', + 'recursive_random', + 'recursive_random', +} + +local EMPTY_ARRAY_MT = { __jsontype = 'array' } +local FALLBACK_NULL = {} + +local KEY_PARTS = { + 'alpha', + 'body', + 'cache', + 'delta', + 'edge', + 'flags', + 'group', + 'hint', + 'index', + 'job', + 'kind', + 'limit', + 'meta', + 'node', + 'option', + 'payload', + 'query', + 'result', + 'state', + 'token', +} + +local STRING_PARTS = { + 'adapter', + 'batch', + 'cursor', + 'draft', + 'event', + 'filter', + 'gateway', + 'header', + 'intent', + 'journal', + 'kernel', + 'ledger', + 'message', + 'notice', + 'offset', + 'profile', + 'record', + 'signal', + 'thread', + 'update', +} + +local function json_null(rapidjson) + if rapidjson and rapidjson.null ~= nil then + return rapidjson.null + end + return FALLBACK_NULL +end + +local function empty_json_array() + return setmetatable({}, EMPTY_ARRAY_MT) +end + +local function random_integer(rng) + return rng:int(-1000000, 1000000) +end + +local function random_float(rng) + local whole = rng:int(1, 100000) + local fraction = rng:int(1, 999) / 1000 + local sign = rng:bool() and 1 or -1 + return sign * (whole + fraction) +end + +local function random_string(rng) + local count = rng:int(1, 4) + local parts = {} + + for i = 1, count do + parts[#parts + 1] = rng:choice(STRING_PARTS) + end + + return table.concat(parts, '-') .. '-' .. tostring(rng:int(1, 9999)) +end + +local function random_key(rng) + return rng:choice(KEY_PARTS) .. '_' .. tostring(rng:int(1, 999)) +end + +local function random_scalar(rng, rapidjson) + local scalar_kind = rng:int(1, 6) + + if scalar_kind == 1 then + return random_string(rng) + elseif scalar_kind == 2 then + return random_integer(rng) + elseif scalar_kind == 3 then + return random_float(rng) + elseif scalar_kind == 4 then + return rng:bool() + elseif scalar_kind == 5 then + return json_null(rapidjson) + end + + return '' +end + +local generate_random_value + +local function unique_random_key(rng, object) + local key = random_key(rng) + + while object[key] ~= nil do + key = random_key(rng) + end + + return key +end + +local function generate_random_object(rng, rapidjson, depth, max_depth) + local object = {} + local width = rng:int(1, 5) + local forced_nested_index + + if depth < max_depth - 1 then + forced_nested_index = rng:int(1, width) + end + + for i = 1, width do + local key = unique_random_key(rng, object) + + if i == forced_nested_index then + object[key] = generate_random_value(rng, rapidjson, depth + 1, max_depth) + elseif depth >= max_depth - 1 or rng:int(1, 100) <= 45 then + object[key] = random_scalar(rng, rapidjson) + else + object[key] = generate_random_value(rng, rapidjson, depth + 1, max_depth) + end + end + + return object +end + +local function generate_random_array(rng, rapidjson, depth, max_depth) + local array = {} + local length = rng:int(1, 5) + local forced_nested_index + + if depth < max_depth - 1 then + forced_nested_index = rng:int(1, length) + end + + for i = 1, length do + if i == forced_nested_index then + array[i] = generate_random_value(rng, rapidjson, depth + 1, max_depth) + elseif depth >= max_depth - 1 or rng:int(1, 100) <= 45 then + array[i] = random_scalar(rng, rapidjson) + else + array[i] = generate_random_value(rng, rapidjson, depth + 1, max_depth) + end + end + + return array +end + +function generate_random_value(rng, rapidjson, depth, max_depth) + if depth >= max_depth then + return random_scalar(rng, rapidjson) + end + + if rng:bool() then + return generate_random_object(rng, rapidjson, depth, max_depth) + end + + return generate_random_array(rng, rapidjson, depth, max_depth) +end + +local function generate_random_payload(rng, rapidjson) + local max_depth = rng:int(3, 6) + local payload = generate_random_object(rng, rapidjson, 1, max_depth) + + payload.empty_object = {} + payload.empty_array = empty_json_array() + payload.scalar_samples = { + boolean = rng:bool(), + empty_string = '', + float = random_float(rng), + integer = random_integer(rng), + null_value = json_null(rapidjson), + string = random_string(rng), + } + + return payload +end + +local function string_keys(value) + local keys = {} + + for key in pairs(value) do + if type(key) == 'string' then + keys[#keys + 1] = key + end + end + + table.sort(keys) + return keys +end + +local function path_value(path) + if path == nil or path == '' then + return '$' + end + return path +end + +local function is_json_null(value, rapidjson) + return value == json_null(rapidjson) +end + +local function is_json_array(value) + local metatable = getmetatable(value) + if metatable and metatable.__jsontype == 'array' then + return true + end + + local count = 0 + local max_index = 0 + + for key in pairs(value) do + if type(key) ~= 'number' or key < 1 or key % 1 ~= 0 then + return false + end + + count = count + 1 + if key > max_index then + max_index = key + end + end + + return count > 0 and max_index == count +end + +local function scalar_metadata(value, rapidjson) + if is_json_null(value, rapidjson) then + return 'null' + end + + if value == '' then + return 'empty_string' + end + + if type(value) == 'number' and value == math.floor(value) then + return 'integer' + end + + if type(value) == 'number' then + return 'float' + end + + return type(value) +end + +local function track_object(expected, path, value) + local keys = string_keys(value) + + expected.objects[#expected.objects + 1] = { + path = path_value(path), + key_count = #keys, + keys = keys, + } +end + +local function track_array(expected, path, value) + expected.arrays[#expected.arrays + 1] = { + path = path_value(path), + length = #value, + } +end + +local function track_scalar(expected, path, value, rapidjson) + local kind = scalar_metadata(value, rapidjson) + local entry = { + path = path_value(path), + kind = kind, + } + + if kind ~= 'null' then + entry.value = value + end + + expected.scalars[#expected.scalars + 1] = entry +end + +local function base_expected(top_level_kind) + return { + top_level_kind = top_level_kind, + objects = {}, + arrays = {}, + scalars = {}, + } +end + +local function collect_random_metadata(expected, value, path, rapidjson) + local stats = { + max_depth = 0, + object_count = 0, + array_count = 0, + scalar_count = 0, + } + + local function visit(node, current_path, depth) + if depth > stats.max_depth then + stats.max_depth = depth + end + + if type(node) ~= 'table' or is_json_null(node, rapidjson) then + stats.scalar_count = stats.scalar_count + 1 + track_scalar(expected, current_path, node, rapidjson) + return + end + + if is_json_array(node) then + stats.array_count = stats.array_count + 1 + track_array(expected, current_path, node) + + for i = 1, #node do + visit(node[i], current_path .. '[' .. tostring(i) .. ']', depth + 1) + end + + return + end + + stats.object_count = stats.object_count + 1 + track_object(expected, current_path, node) + + local keys = string_keys(node) + for _, key in ipairs(keys) do + visit(node[key], current_path .. '.' .. key, depth + 1) + end + end + + visit(value, path_value(path), 1) + + if expected.random == nil then + expected.random = { + max_depth = 0, + object_count = 0, + array_count = 0, + scalar_count = 0, + } + end + + if stats.max_depth > expected.random.max_depth then + expected.random.max_depth = stats.max_depth + end + expected.random.object_count = expected.random.object_count + stats.object_count + expected.random.array_count = expected.random.array_count + stats.array_count + expected.random.scalar_count = expected.random.scalar_count + stats.scalar_count + + return stats +end + +local function build_llm_response(rng, rapidjson) + local value = { + id = 'chatcmpl-' .. tostring(rng:int(100000, 999999)), + object = 'chat.completion', + created = 1700000000 + rng:int(1, 100000), + model = 'fuzz-model-' .. tostring(rng:int(1, 7)), + choices = { + { + index = 0, + message = { + role = 'assistant', + content = random_string(rng), + }, + finish_reason = rng:choice({ 'stop', 'length', 'tool_calls' }), + }, + }, + usage = { + prompt_tokens = rng:int(1, 4096), + completion_tokens = rng:int(1, 4096), + total_tokens = 0, + }, + fuzz = generate_random_payload(rng, rapidjson), + } + value.usage.total_tokens = value.usage.prompt_tokens + value.usage.completion_tokens + + local expected = base_expected('object') + track_object(expected, '$', value) + track_array(expected, '$.choices', value.choices) + track_object(expected, '$.choices[1]', value.choices[1]) + track_object(expected, '$.choices[1].message', value.choices[1].message) + track_object(expected, '$.usage', value.usage) + track_scalar(expected, '$.id', value.id, rapidjson) + track_scalar(expected, '$.model', value.model, rapidjson) + track_scalar(expected, '$.choices[1].message.content', value.choices[1].message.content, rapidjson) + track_scalar(expected, '$.usage.total_tokens', value.usage.total_tokens, rapidjson) + collect_random_metadata(expected, value.fuzz, '$.fuzz', rapidjson) + + return value, expected +end + +local function build_github_issue(rng, rapidjson) + local value = { + number = rng:int(1, 25000), + title = 'Issue: ' .. random_string(rng), + state = rng:choice({ 'open', 'closed' }), + locked = rng:bool(), + user = { + login = 'user-' .. tostring(rng:int(1, 9999)), + id = rng:int(1, 999999), + type = rng:choice({ 'User', 'Bot' }), + }, + labels = { + { name = 'bug', color = 'd73a4a' }, + { name = 'fuzz', color = '5319e7' }, + }, + assignees = { + { + login = 'maintainer-' .. tostring(rng:int(1, 99)), + id = rng:int(1, 999999), + }, + }, + comments = rng:int(0, 1000), + reactions = { + total_count = rng:int(0, 1000), + plus_one = rng:int(0, 250), + heart = rng:int(0, 250), + }, + fuzz = generate_random_payload(rng, rapidjson), + } + + local expected = base_expected('object') + track_object(expected, '$', value) + track_object(expected, '$.user', value.user) + track_array(expected, '$.labels', value.labels) + track_object(expected, '$.labels[1]', value.labels[1]) + track_object(expected, '$.labels[2]', value.labels[2]) + track_array(expected, '$.assignees', value.assignees) + track_object(expected, '$.assignees[1]', value.assignees[1]) + track_object(expected, '$.reactions', value.reactions) + track_scalar(expected, '$.number', value.number, rapidjson) + track_scalar(expected, '$.title', value.title, rapidjson) + track_scalar(expected, '$.state', value.state, rapidjson) + track_scalar(expected, '$.locked', value.locked, rapidjson) + collect_random_metadata(expected, value.fuzz, '$.fuzz', rapidjson) + + return value, expected +end + +local function build_social_feed(rng, rapidjson) + local value = { + feed_id = 'feed-' .. tostring(rng:int(1000, 9999)), + generated_at = '2026-05-' .. tostring(rng:int(10, 29)) .. 'T12:00:00Z', + viewer = { + id = rng:int(1, 99999), + handle = 'viewer-' .. tostring(rng:int(1, 999)), + premium = rng:bool(), + }, + posts = { + { + id = 'post-' .. tostring(rng:int(1, 999999)), + body = random_string(rng), + author = { + handle = 'author-' .. tostring(rng:int(1, 999)), + verified = rng:bool(), + }, + media = { + { + type = 'image', + url = 'https://example.test/media/' .. tostring(rng:int(1, 9999)), + }, + }, + reactions = { + likes = rng:int(0, 10000), + reposts = rng:int(0, 10000), + }, + }, + }, + fuzz = generate_random_payload(rng, rapidjson), + } + + local expected = base_expected('object') + track_object(expected, '$', value) + track_object(expected, '$.viewer', value.viewer) + track_array(expected, '$.posts', value.posts) + track_object(expected, '$.posts[1]', value.posts[1]) + track_object(expected, '$.posts[1].author', value.posts[1].author) + track_array(expected, '$.posts[1].media', value.posts[1].media) + track_object(expected, '$.posts[1].media[1]', value.posts[1].media[1]) + track_object(expected, '$.posts[1].reactions', value.posts[1].reactions) + track_scalar(expected, '$.feed_id', value.feed_id, rapidjson) + track_scalar(expected, '$.posts[1].body', value.posts[1].body, rapidjson) + track_scalar(expected, '$.viewer.premium', value.viewer.premium, rapidjson) + collect_random_metadata(expected, value.fuzz, '$.fuzz', rapidjson) + + return value, expected +end + +local function build_paginated_list(rng, rapidjson) + local value = { + page = rng:int(1, 50), + per_page = rng:choice({ 10, 25, 50, 100 }), + total = rng:int(100, 10000), + has_next = rng:bool(), + links = { + self = '/v1/items?page=1', + next = '/v1/items?page=2', + previous = json_null(rapidjson), + }, + items = { + { + id = rng:int(1, 999999), + name = random_string(rng), + active = rng:bool(), + attributes = { + rank = rng:int(1, 100), + score = random_float(rng), + }, + }, + { + id = rng:int(1, 999999), + name = random_string(rng), + active = rng:bool(), + attributes = { + rank = rng:int(1, 100), + score = random_float(rng), + }, + }, + }, + fuzz = generate_random_payload(rng, rapidjson), + } + + local expected = base_expected('object') + track_object(expected, '$', value) + track_object(expected, '$.links', value.links) + track_array(expected, '$.items', value.items) + track_object(expected, '$.items[1]', value.items[1]) + track_object(expected, '$.items[1].attributes', value.items[1].attributes) + track_object(expected, '$.items[2]', value.items[2]) + track_object(expected, '$.items[2].attributes', value.items[2].attributes) + track_scalar(expected, '$.page', value.page, rapidjson) + track_scalar(expected, '$.per_page', value.per_page, rapidjson) + track_scalar(expected, '$.has_next', value.has_next, rapidjson) + track_scalar(expected, '$.links.previous', value.links.previous, rapidjson) + collect_random_metadata(expected, value.fuzz, '$.fuzz', rapidjson) + + return value, expected +end + +local function build_metadata_config(rng, rapidjson) + local value = { + version = 'v' .. tostring(rng:int(1, 9)) .. '.' .. tostring(rng:int(0, 20)), + environment = rng:choice({ 'dev', 'staging', 'prod' }), + flags = { + beta = rng:bool(), + strict = rng:bool(), + audit = rng:bool(), + }, + limits = { + requests_per_minute = rng:int(1, 10000), + burst = rng:int(1, 1000), + timeout_seconds = rng:int(1, 120), + }, + tags = { + 'json', + 'encode', + 'fuzz', + }, + rules = { + { + name = 'required-metadata', + enabled = true, + threshold = random_float(rng), + }, + { + name = 'optional-overrides', + enabled = rng:bool(), + threshold = random_float(rng), + }, + }, + fuzz = generate_random_payload(rng, rapidjson), + } + + local expected = base_expected('object') + track_object(expected, '$', value) + track_object(expected, '$.flags', value.flags) + track_object(expected, '$.limits', value.limits) + track_array(expected, '$.tags', value.tags) + track_array(expected, '$.rules', value.rules) + track_object(expected, '$.rules[1]', value.rules[1]) + track_object(expected, '$.rules[2]', value.rules[2]) + track_scalar(expected, '$.version', value.version, rapidjson) + track_scalar(expected, '$.environment', value.environment, rapidjson) + track_scalar(expected, '$.flags.strict', value.flags.strict, rapidjson) + track_scalar(expected, '$.limits.requests_per_minute', value.limits.requests_per_minute, rapidjson) + collect_random_metadata(expected, value.fuzz, '$.fuzz', rapidjson) + + return value, expected +end + +local SCHEMA_BUILDERS = { + llm_response = build_llm_response, + github_issue = build_github_issue, + social_feed = build_social_feed, + paginated_list = build_paginated_list, + metadata_config = build_metadata_config, +} + +function M.generate_case(rng, case_id, rapidjson) + case_id = case_id or 1 + rng = rng or M.new_rng(case_id) + + local kind = CASE_KINDS[((case_id - 1) % #CASE_KINDS) + 1] + + if kind == 'schema_guided' then + local schema_index = (math.floor((case_id - 1) / #CASE_KINDS) % #SCHEMA_FAMILIES) + 1 + local schema = SCHEMA_FAMILIES[schema_index] + local value, expected = SCHEMA_BUILDERS[schema](rng, rapidjson) + + return { + id = case_id, + kind = kind, + schema = schema, + value = value, + expected = expected, + } + end + + local value = generate_random_payload(rng, rapidjson) + value.case_id = case_id + + local expected = base_expected('object') + collect_random_metadata(expected, value, '$', rapidjson) + + return { + id = case_id, + kind = kind, + schema = 'recursive_random', + value = value, + expected = expected, + } +end + return M From e7f9e4c2b429f769586ec7900fe25597edaa6b83 Mon Sep 17 00:00:00 2001 From: Yuansheng Wang Date: Fri, 29 May 2026 12:40:51 +0800 Subject: [PATCH 08/15] fix: tighten fuzz generator metadata --- spec/fuzz_encode_lib_spec.lua | 44 +++++++++++++++++--- tools/fuzz_encode_lib.lua | 75 +++++++++++++++++++++++++---------- 2 files changed, 93 insertions(+), 26 deletions(-) diff --git a/spec/fuzz_encode_lib_spec.lua b/spec/fuzz_encode_lib_spec.lua index 5cd40cd..2fb6178 100644 --- a/spec/fuzz_encode_lib_spec.lua +++ b/spec/fuzz_encode_lib_spec.lua @@ -2,6 +2,7 @@ require 'busted.runner'() describe('tools.fuzz_encode_lib', function() local fuzz = require('tools.fuzz_encode_lib') + local rapidjson = require('rapidjson') describe('parse_config', function() it('uses production defaults', function() @@ -78,9 +79,9 @@ describe('tools.fuzz_encode_lib', function() end) describe('generate_case', function() - it('generates deterministic schema-guided cases with metadata', function() - local a = fuzz.generate_case(fuzz.new_rng(321), 1, { null = {} }) - local b = fuzz.generate_case(fuzz.new_rng(321), 1, { null = {} }) + it('generates deterministic schema-guided cases with selected metadata', function() + local a = fuzz.generate_case(fuzz.new_rng(321), 1, rapidjson) + local b = fuzz.generate_case(fuzz.new_rng(321), 1, rapidjson) assert.are.same(a.value, b.value) assert.are.same(a.expected, b.expected) @@ -95,11 +96,12 @@ describe('tools.fuzz_encode_lib', function() end) it('adds pure recursive random cases with nested objects and arrays', function() - local case = fuzz.generate_case(fuzz.new_rng(98765), 3, { null = {} }) + local case = fuzz.generate_case(fuzz.new_rng(98765), 3, rapidjson) assert.are.equal('recursive_random', case.kind) assert.are.equal('recursive_random', case.schema) assert.are.equal('table', type(case.value)) + assert.are.equal('table', type(case.value.random)) assert.are.equal('table', type(case.expected.random)) assert.is_true(case.expected.random.max_depth >= 3) assert.is_true(case.expected.random.object_count >= 2) @@ -108,6 +110,38 @@ describe('tools.fuzz_encode_lib', function() assert.is_true(#case.expected.arrays >= case.expected.random.array_count) end) + it('tracks recursive random arrays from the generated core', function() + local case = fuzz.generate_case(fuzz.new_rng(98765), 3, rapidjson) + local saw_core_array = false + + for _, entry in ipairs(case.expected.arrays) do + if entry.path:match('^%$%.random') then + saw_core_array = true + end + end + + assert.is_true(saw_core_array) + end) + + it('emits rapidjson null sentinels that round-trip as JSON null', function() + local case = fuzz.generate_case(fuzz.new_rng(100), 10, rapidjson) + + assert.are.equal('paginated_list', case.schema) + assert.are.equal(rapidjson.null, case.value.links.previous) + + local encoded = rapidjson.encode(case.value) + local decoded = rapidjson.decode(encoded) + + assert.matches('"previous":null', encoded, 1, true) + assert.are.equal(rapidjson.null, decoded.links.previous) + end) + + it('requires a real rapidjson null sentinel', function() + assert.has_error(function() + fuzz.generate_case(fuzz.new_rng(1), 1, {}) + end, 'generate_case requires rapidjson.null') + end) + it('runs pure recursive random cases at least as often as schema-guided cases', function() local rng = fuzz.new_rng(1) local seen = {} @@ -117,7 +151,7 @@ describe('tools.fuzz_encode_lib', function() } for case_id = 1, 30 do - local case = fuzz.generate_case(rng, case_id, { null = {} }) + local case = fuzz.generate_case(rng, case_id, rapidjson) counts[case.kind] = counts[case.kind] + 1 if case.kind == 'schema_guided' then seen[case.schema] = true diff --git a/tools/fuzz_encode_lib.lua b/tools/fuzz_encode_lib.lua index 05324d4..b9f45de 100644 --- a/tools/fuzz_encode_lib.lua +++ b/tools/fuzz_encode_lib.lua @@ -101,7 +101,6 @@ local CASE_KINDS = { } local EMPTY_ARRAY_MT = { __jsontype = 'array' } -local FALLBACK_NULL = {} local KEY_PARTS = { 'alpha', @@ -153,7 +152,7 @@ local function json_null(rapidjson) if rapidjson and rapidjson.null ~= nil then return rapidjson.null end - return FALLBACK_NULL + error('generate_case requires rapidjson.null', 0) end local function empty_json_array() @@ -276,20 +275,26 @@ end local function generate_random_payload(rng, rapidjson) local max_depth = rng:int(3, 6) - local payload = generate_random_object(rng, rapidjson, 1, max_depth) - - payload.empty_object = {} - payload.empty_array = empty_json_array() - payload.scalar_samples = { - boolean = rng:bool(), - empty_string = '', - float = random_float(rng), - integer = random_integer(rng), - null_value = json_null(rapidjson), - string = random_string(rng), - } + local random_core = generate_random_object(rng, rapidjson, 1, max_depth) + + random_core[unique_random_key(rng, random_core)] = + generate_random_array(rng, rapidjson, 2, max_depth) + random_core[unique_random_key(rng, random_core)] = + generate_random_object(rng, rapidjson, 2, max_depth) - return payload + return { + random = random_core, + empty_object = {}, + empty_array = empty_json_array(), + scalar_samples = { + boolean = rng:bool(), + empty_string = '', + float = random_float(rng), + integer = random_integer(rng), + null_value = json_null(rapidjson), + string = random_string(rng), + }, + } end local function string_keys(value) @@ -390,6 +395,8 @@ local function track_scalar(expected, path, value, rapidjson) expected.scalars[#expected.scalars + 1] = entry end +-- Schema shells track selected scalars only. +-- Recursive-core metadata is exhaustive for that generated core. local function base_expected(top_level_kind) return { top_level_kind = top_level_kind, @@ -459,6 +466,30 @@ local function collect_random_metadata(expected, value, path, rapidjson) return stats end +local function collect_payload_metadata(expected, payload, path, rapidjson) + track_object(expected, path, payload) + track_object(expected, path .. '.empty_object', payload.empty_object) + track_array(expected, path .. '.empty_array', payload.empty_array) + track_object(expected, path .. '.scalar_samples', payload.scalar_samples) + track_scalar(expected, path .. '.scalar_samples.boolean', payload.scalar_samples.boolean, rapidjson) + track_scalar( + expected, + path .. '.scalar_samples.empty_string', + payload.scalar_samples.empty_string, + rapidjson + ) + track_scalar(expected, path .. '.scalar_samples.float', payload.scalar_samples.float, rapidjson) + track_scalar(expected, path .. '.scalar_samples.integer', payload.scalar_samples.integer, rapidjson) + track_scalar( + expected, + path .. '.scalar_samples.null_value', + payload.scalar_samples.null_value, + rapidjson + ) + track_scalar(expected, path .. '.scalar_samples.string', payload.scalar_samples.string, rapidjson) + collect_random_metadata(expected, payload.random, path .. '.random', rapidjson) +end + local function build_llm_response(rng, rapidjson) local value = { id = 'chatcmpl-' .. tostring(rng:int(100000, 999999)), @@ -494,7 +525,7 @@ local function build_llm_response(rng, rapidjson) track_scalar(expected, '$.model', value.model, rapidjson) track_scalar(expected, '$.choices[1].message.content', value.choices[1].message.content, rapidjson) track_scalar(expected, '$.usage.total_tokens', value.usage.total_tokens, rapidjson) - collect_random_metadata(expected, value.fuzz, '$.fuzz', rapidjson) + collect_payload_metadata(expected, value.fuzz, '$.fuzz', rapidjson) return value, expected end @@ -542,7 +573,7 @@ local function build_github_issue(rng, rapidjson) track_scalar(expected, '$.title', value.title, rapidjson) track_scalar(expected, '$.state', value.state, rapidjson) track_scalar(expected, '$.locked', value.locked, rapidjson) - collect_random_metadata(expected, value.fuzz, '$.fuzz', rapidjson) + collect_payload_metadata(expected, value.fuzz, '$.fuzz', rapidjson) return value, expected end @@ -591,7 +622,7 @@ local function build_social_feed(rng, rapidjson) track_scalar(expected, '$.feed_id', value.feed_id, rapidjson) track_scalar(expected, '$.posts[1].body', value.posts[1].body, rapidjson) track_scalar(expected, '$.viewer.premium', value.viewer.premium, rapidjson) - collect_random_metadata(expected, value.fuzz, '$.fuzz', rapidjson) + collect_payload_metadata(expected, value.fuzz, '$.fuzz', rapidjson) return value, expected end @@ -642,7 +673,7 @@ local function build_paginated_list(rng, rapidjson) track_scalar(expected, '$.per_page', value.per_page, rapidjson) track_scalar(expected, '$.has_next', value.has_next, rapidjson) track_scalar(expected, '$.links.previous', value.links.previous, rapidjson) - collect_random_metadata(expected, value.fuzz, '$.fuzz', rapidjson) + collect_payload_metadata(expected, value.fuzz, '$.fuzz', rapidjson) return value, expected end @@ -693,7 +724,7 @@ local function build_metadata_config(rng, rapidjson) track_scalar(expected, '$.environment', value.environment, rapidjson) track_scalar(expected, '$.flags.strict', value.flags.strict, rapidjson) track_scalar(expected, '$.limits.requests_per_minute', value.limits.requests_per_minute, rapidjson) - collect_random_metadata(expected, value.fuzz, '$.fuzz', rapidjson) + collect_payload_metadata(expected, value.fuzz, '$.fuzz', rapidjson) return value, expected end @@ -709,6 +740,7 @@ local SCHEMA_BUILDERS = { function M.generate_case(rng, case_id, rapidjson) case_id = case_id or 1 rng = rng or M.new_rng(case_id) + json_null(rapidjson) local kind = CASE_KINDS[((case_id - 1) % #CASE_KINDS) + 1] @@ -730,7 +762,8 @@ function M.generate_case(rng, case_id, rapidjson) value.case_id = case_id local expected = base_expected('object') - collect_random_metadata(expected, value, '$', rapidjson) + track_scalar(expected, '$.case_id', value.case_id, rapidjson) + collect_payload_metadata(expected, value, '$', rapidjson) return { id = case_id, From f61898af594f4f57c7a41444dca82ca1524118f3 Mon Sep 17 00:00:00 2001 From: Yuansheng Wang Date: Fri, 29 May 2026 12:45:57 +0800 Subject: [PATCH 09/15] fix: reject fake rapidjson null sentinel --- spec/fuzz_encode_lib_spec.lua | 8 +++++++- tools/fuzz_encode_lib.lua | 10 +++++++--- 2 files changed, 14 insertions(+), 4 deletions(-) diff --git a/spec/fuzz_encode_lib_spec.lua b/spec/fuzz_encode_lib_spec.lua index 2fb6178..4b9920d 100644 --- a/spec/fuzz_encode_lib_spec.lua +++ b/spec/fuzz_encode_lib_spec.lua @@ -139,7 +139,13 @@ describe('tools.fuzz_encode_lib', function() it('requires a real rapidjson null sentinel', function() assert.has_error(function() fuzz.generate_case(fuzz.new_rng(1), 1, {}) - end, 'generate_case requires rapidjson.null') + end, 'rapidjson.null is required') + end) + + it('rejects fake table null sentinels', function() + assert.has_error(function() + fuzz.generate_case(fuzz.new_rng(1), 1, { null = {} }) + end, 'rapidjson.null is required') end) it('runs pure recursive random cases at least as often as schema-guided cases', function() diff --git a/tools/fuzz_encode_lib.lua b/tools/fuzz_encode_lib.lua index b9f45de..a4f7f52 100644 --- a/tools/fuzz_encode_lib.lua +++ b/tools/fuzz_encode_lib.lua @@ -149,10 +149,14 @@ local STRING_PARTS = { } local function json_null(rapidjson) - if rapidjson and rapidjson.null ~= nil then - return rapidjson.null + if rapidjson and type(rapidjson.encode) == 'function' and rapidjson.null ~= nil then + local ok, encoded = pcall(rapidjson.encode, rapidjson.null) + if ok and encoded == 'null' then + return rapidjson.null + end end - error('generate_case requires rapidjson.null', 0) + + error('rapidjson.null is required', 0) end local function empty_json_array() From 4501a83797d51bab10f749acadd7045d2b5cac21 Mon Sep 17 00:00:00 2001 From: Yuansheng Wang Date: Fri, 29 May 2026 13:00:36 +0800 Subject: [PATCH 10/15] feat: validate json encode fuzz output --- spec/fuzz_encode_lib_spec.lua | 88 ++++++++ tools/fuzz_encode_lib.lua | 380 ++++++++++++++++++++++++++++++++++ 2 files changed, 468 insertions(+) diff --git a/spec/fuzz_encode_lib_spec.lua b/spec/fuzz_encode_lib_spec.lua index 4b9920d..f16e52e 100644 --- a/spec/fuzz_encode_lib_spec.lua +++ b/spec/fuzz_encode_lib_spec.lua @@ -174,4 +174,92 @@ describe('tools.fuzz_encode_lib', function() assert.is_true(seen.metadata_config) end) end) + + describe('validate_encoded_case', function() + it('accepts a generated case encoded with sorted keys', function() + local case = fuzz.generate_case(fuzz.new_rng(77), 1, rapidjson) + local json = rapidjson.encode(case.value, { sort_keys = true }) + + local ok, err = fuzz.validate_encoded_case(rapidjson, case, json) + + assert.is_true(ok) + assert.is_nil(err) + end) + + it('rejects unsorted encoded object keys for tracked objects', function() + local case = { + id = 1, + kind = 'manual', + schema = 'manual', + value = { b = 1, a = 2 }, + expected = { + top_level_kind = 'object', + objects = { + { path = '$', key_count = 2, keys = { 'a', 'b' } }, + }, + arrays = {}, + scalars = {}, + }, + } + + local ok, err = fuzz.validate_encoded_case(rapidjson, case, '{"b":1,"a":2}') + + assert.is_false(ok) + assert.matches('key order', err, 1, true) + end) + + it('validates recursive_random core metadata after encode and decode', function() + local case = fuzz.generate_case(fuzz.new_rng(98765), 2, rapidjson) + local json = rapidjson.encode(case.value, { sort_keys = true }) + + assert.are.equal('recursive_random', case.kind) + assert.are.equal('recursive_random', case.schema) + assert.are.equal('table', type(case.expected.random)) + + local ok, err = fuzz.validate_encoded_case(rapidjson, case, json) + + assert.is_true(ok) + assert.is_nil(err) + end) + + it('returns decode diagnostics when JSON cannot be decoded', function() + local ok, err = fuzz.validate_encoded_case(rapidjson, { expected = {} }, '{"a":}') + + assert.is_false(ok) + assert.matches('decode failed:', err, 1, true) + end) + end) + + describe('format_failure', function() + it('is reproducible and includes fuzz failure diagnostics', function() + local case = { + id = 42, + kind = 'manual', + schema = 'manual_schema', + value = { b = 1, a = { true, rapidjson.null } }, + } + local details = { + seed = 12345, + worker_id = 2, + case = case, + reason = 'key order mismatch at $', + json = '{"b":1,"a":[true,null]}', + } + + local first = fuzz.format_failure(details) + local second = fuzz.format_failure(details) + + assert.are.equal(first, second) + assert.matches('FUZZ FAILURE', first, 1, true) + assert.matches('seed=12345', first, 1, true) + assert.matches('worker=2', first, 1, true) + assert.matches('case=42', first, 1, true) + assert.matches('kind=manual', first, 1, true) + assert.matches('schema=manual_schema', first, 1, true) + assert.matches('reason=key order mismatch at $', first, 1, true) + assert.matches('value=', first, 1, true) + assert.matches('"a"', first, 1, true) + assert.matches('json={"b":1,"a":[true,null]}', first, 1, true) + end) + end) end) diff --git a/tools/fuzz_encode_lib.lua b/tools/fuzz_encode_lib.lua index a4f7f52..c02a6af 100644 --- a/tools/fuzz_encode_lib.lua +++ b/tools/fuzz_encode_lib.lua @@ -368,6 +368,386 @@ local function scalar_metadata(value, rapidjson) return type(value) end +local function table_key_count(value) + local count = 0 + + for _ in pairs(value) do + count = count + 1 + end + + return count +end + +local function decoded_kind(value, rapidjson) + if type(value) == 'table' and not is_json_null(value, rapidjson) then + if is_json_array(value) then + return 'array' + end + + return 'object' + end + + return scalar_metadata(value, rapidjson) +end + +local function matches_expected_kind(value, expected_kind, rapidjson, expected_length) + if expected_kind == 'object' then + return type(value) == 'table' and not is_json_null(value, rapidjson) and not is_json_array(value) + end + + if expected_kind == 'array' then + if type(value) ~= 'table' or is_json_null(value, rapidjson) then + return false + end + + return is_json_array(value) or (expected_length == 0 and table_key_count(value) == 0) + end + + return scalar_metadata(value, rapidjson) == expected_kind +end + +local function lookup_path(value, path) + if path == '$' then + return true, value + end + + if type(path) ~= 'string' or path:sub(1, 1) ~= '$' then + return false, nil, 'invalid path: ' .. tostring(path) + end + + local current = value + local offset = 2 + + while offset <= #path do + local char = path:sub(offset, offset) + + if char == '.' then + offset = offset + 1 + + local start = offset + while offset <= #path do + local next_char = path:sub(offset, offset) + if next_char == '.' or next_char == '[' then + break + end + offset = offset + 1 + end + + if start == offset then + return false, nil, 'invalid path segment: ' .. path + end + + if type(current) ~= 'table' then + return false, nil, 'path not found: ' .. path + end + + current = current[path:sub(start, offset - 1)] + if current == nil then + return false, nil, 'path not found: ' .. path + end + elseif char == '[' then + local close = path:find(']', offset + 1, true) + if close == nil then + return false, nil, 'invalid path segment: ' .. path + end + + local index = tonumber(path:sub(offset + 1, close - 1)) + if index == nil or index < 1 or index % 1 ~= 0 then + return false, nil, 'invalid array index: ' .. path + end + + if type(current) ~= 'table' then + return false, nil, 'path not found: ' .. path + end + + current = current[index] + if current == nil then + return false, nil, 'path not found: ' .. path + end + + offset = close + 1 + else + return false, nil, 'invalid path segment: ' .. path + end + end + + return true, current +end + +local function format_keys(keys) + return table.concat(keys, ',') +end + +local function find_key_token(json, key, start, rapidjson) + local token = rapidjson.encode(key) + local offset = start + + while true do + local first, last = json:find(token, offset, true) + if first == nil then + return nil + end + + if json:sub(last + 1):match('^%s*:') then + return first + end + + offset = last + 1 + end +end + +local function validate_key_order(rapidjson, json, object_entry) + local offset = 1 + local previous_key + + for _, key in ipairs(object_entry.keys or {}) do + local position = find_key_token(json, key, offset, rapidjson) + if position == nil then + if previous_key == nil then + return false, string.format( + 'key order mismatch at %s: missing key %s', + object_entry.path, + key + ) + end + + return false, string.format( + 'key order mismatch at %s: expected key %s after %s', + object_entry.path, + key, + previous_key + ) + end + + previous_key = key + offset = position + 1 + end + + return true +end + +local function loaded_null(value, rapidjson) + if rapidjson and rapidjson.null ~= nil and value == rapidjson.null then + return true + end + + local loaded = package.loaded.rapidjson + return loaded ~= nil and loaded.null ~= nil and value == loaded.null +end + +local function dump_string(value) + local truncated = value + if #truncated > 120 then + truncated = truncated:sub(1, 117) .. '...' + end + + return string.format('%q', truncated) +end + +local function dump_value_inner(value, rapidjson, depth, seen) + if loaded_null(value, rapidjson) then + return 'null' + end + + local value_type = type(value) + if value_type == 'string' then + return dump_string(value) + end + if value_type == 'number' or value_type == 'boolean' or value_type == 'nil' then + return tostring(value) + end + if value_type ~= 'table' then + return '<' .. value_type .. ':' .. tostring(value) .. '>' + end + + if seen[value] then + return '' + end + if depth >= 5 then + return is_json_array(value) and '[...]' or '{...}' + end + + seen[value] = true + + local parts = {} + if is_json_array(value) then + local limit = math.min(#value, 12) + for index = 1, limit do + parts[#parts + 1] = dump_value_inner(value[index], rapidjson, depth + 1, seen) + end + if #value > limit then + parts[#parts + 1] = '...' + end + seen[value] = nil + return '[' .. table.concat(parts, ',') .. ']' + end + + local keys = string_keys(value) + local limit = math.min(#keys, 12) + for index = 1, limit do + local key = keys[index] + parts[#parts + 1] = + dump_string(key) .. '=' .. dump_value_inner(value[key], rapidjson, depth + 1, seen) + end + if #keys > limit then + parts[#parts + 1] = '...' + end + + seen[value] = nil + return '{' .. table.concat(parts, ',') .. '}' +end + +function M.dump_value(value) + return dump_value_inner(value, nil, 1, {}) +end + +function M.format_failure(details) + details = details or {} + + local case = details.case or {} + local case_id = details.case_id or case.id or '?' + local kind = details.kind or case.kind or '?' + local schema = details.schema or case.schema or '?' + local value = details.value + if value == nil then + value = case.value + end + + local lines = { + 'FUZZ FAILURE', + 'seed=' .. tostring(details.seed or '?'), + 'worker=' .. tostring(details.worker or details.worker_id or '?'), + 'case=' .. tostring(case_id), + 'kind=' .. tostring(kind), + 'schema=' .. tostring(schema), + 'reason=' .. tostring(details.reason or '?'), + 'value=' .. M.dump_value(value), + } + + if details.json ~= nil then + lines[#lines + 1] = 'json=' .. tostring(details.json) + end + + return table.concat(lines, '\n') +end + +function M.validate_encoded_case(rapidjson, case, json) + local ok, decoded, decode_err = pcall(rapidjson.decode, json) + if not ok then + return false, 'decode failed: ' .. tostring(decoded) + end + if decoded == nil then + return false, 'decode failed: ' .. tostring(decode_err or 'nil result') + end + + local expected = case.expected or {} + if expected.top_level_kind ~= nil and + not matches_expected_kind(decoded, expected.top_level_kind, rapidjson) then + return false, string.format( + 'top-level kind mismatch: expected %s got %s', + expected.top_level_kind, + decoded_kind(decoded, rapidjson) + ) + end + + for _, entry in ipairs(expected.objects or {}) do + local found, value, err = lookup_path(decoded, entry.path) + if not found then + return false, err + end + + if not matches_expected_kind(value, 'object', rapidjson) then + return false, string.format( + 'object kind mismatch at %s: got %s', + entry.path, + decoded_kind(value, rapidjson) + ) + end + + local actual_keys = string_keys(value) + if #actual_keys ~= entry.key_count then + return false, string.format( + 'object key count mismatch at %s: expected %d got %d', + entry.path, + entry.key_count, + #actual_keys + ) + end + + for index, key in ipairs(entry.keys or {}) do + if actual_keys[index] ~= key then + return false, string.format( + 'object keys mismatch at %s: expected %s got %s', + entry.path, + format_keys(entry.keys or {}), + format_keys(actual_keys) + ) + end + end + + local ordered, order_err = validate_key_order(rapidjson, json, entry) + if not ordered then + return false, order_err + end + end + + for _, entry in ipairs(expected.arrays or {}) do + local found, value, err = lookup_path(decoded, entry.path) + if not found then + return false, err + end + + if not matches_expected_kind(value, 'array', rapidjson, entry.length) then + return false, string.format( + 'array kind mismatch at %s: got %s', + entry.path, + decoded_kind(value, rapidjson) + ) + end + + if #value ~= entry.length then + return false, string.format( + 'array length mismatch at %s: expected %d got %d', + entry.path, + entry.length, + #value + ) + end + end + + for _, entry in ipairs(expected.scalars or {}) do + local found, value, err = lookup_path(decoded, entry.path) + if not found then + return false, err + end + + local actual_kind = scalar_metadata(value, rapidjson) + if actual_kind ~= entry.kind then + return false, string.format( + 'scalar kind mismatch at %s: expected %s got %s', + entry.path, + entry.kind, + actual_kind + ) + end + + if entry.kind == 'null' then + if value ~= json_null(rapidjson) then + return false, 'scalar value mismatch at ' .. entry.path .. ': expected null' + end + elseif value ~= entry.value then + return false, string.format( + 'scalar value mismatch at %s: expected %s got %s', + entry.path, + dump_value_inner(entry.value, rapidjson, 1, {}), + dump_value_inner(value, rapidjson, 1, {}) + ) + end + end + + return true, nil +end + local function track_object(expected, path, value) local keys = string_keys(value) From ca5b742ee3e1f271303b7c8cd06b8cc8e68a1f0f Mon Sep 17 00:00:00 2001 From: Yuansheng Wang Date: Fri, 29 May 2026 13:07:20 +0800 Subject: [PATCH 11/15] fix: validate nested object key order --- spec/fuzz_encode_lib_spec.lua | 22 ++ tools/fuzz_encode_lib.lua | 369 +++++++++++++++++++++++++++++++--- 2 files changed, 365 insertions(+), 26 deletions(-) diff --git a/spec/fuzz_encode_lib_spec.lua b/spec/fuzz_encode_lib_spec.lua index f16e52e..5325fdc 100644 --- a/spec/fuzz_encode_lib_spec.lua +++ b/spec/fuzz_encode_lib_spec.lua @@ -208,6 +208,28 @@ describe('tools.fuzz_encode_lib', function() assert.matches('key order', err, 1, true) end) + it('rejects unsorted nested object keys for tracked object paths', function() + local case = { + id = 2, + kind = 'manual', + schema = 'manual', + value = { a = { b = 1, a = 2 } }, + expected = { + top_level_kind = 'object', + objects = { + { path = '$.a', key_count = 2, keys = { 'a', 'b' } }, + }, + arrays = {}, + scalars = {}, + }, + } + + local ok, err = fuzz.validate_encoded_case(rapidjson, case, '{"a":{"b":1,"a":2}}') + + assert.is_false(ok) + assert.matches('key order', err, 1, true) + end) + it('validates recursive_random core metadata after encode and decode', function() local case = fuzz.generate_case(fuzz.new_rng(98765), 2, rapidjson) local json = rapidjson.encode(case.value, { sort_keys = true }) diff --git a/tools/fuzz_encode_lib.lua b/tools/fuzz_encode_lib.lua index c02a6af..9eb015b 100644 --- a/tools/fuzz_encode_lib.lua +++ b/tools/fuzz_encode_lib.lua @@ -478,49 +478,366 @@ local function format_keys(keys) return table.concat(keys, ',') end -local function find_key_token(json, key, start, rapidjson) +local function skip_json_ws(json, offset) + while offset <= #json do + local char = json:sub(offset, offset) + if char ~= ' ' and char ~= '\n' and char ~= '\r' and char ~= '\t' then + break + end + + offset = offset + 1 + end + + return offset +end + +local function scan_json_string(json, offset) + if json:sub(offset, offset) ~= '"' then + return nil, 'expected string at byte ' .. tostring(offset) + end + + offset = offset + 1 + while offset <= #json do + local char = json:sub(offset, offset) + if char == '"' then + return offset + end + if char == '\\' then + offset = offset + 2 + else + offset = offset + 1 + end + end + + return nil, 'unterminated string' +end + +local skip_json_value + +local function skip_json_object(json, offset) + offset = skip_json_ws(json, offset + 1) + if json:sub(offset, offset) == '}' then + return offset + 1 + end + + while offset <= #json do + local string_end, err = scan_json_string(json, offset) + if string_end == nil then + return nil, err + end + + offset = skip_json_ws(json, string_end + 1) + if json:sub(offset, offset) ~= ':' then + return nil, 'expected object member colon' + end + + offset = skip_json_value(json, offset + 1) + if offset == nil then + return nil, 'invalid object member value' + end + + offset = skip_json_ws(json, offset) + local char = json:sub(offset, offset) + if char == '}' then + return offset + 1 + end + if char ~= ',' then + return nil, 'expected object member separator' + end + + offset = skip_json_ws(json, offset + 1) + end + + return nil, 'unterminated object' +end + +local function skip_json_array(json, offset) + offset = skip_json_ws(json, offset + 1) + if json:sub(offset, offset) == ']' then + return offset + 1 + end + + while offset <= #json do + offset = skip_json_value(json, offset) + if offset == nil then + return nil, 'invalid array value' + end + + offset = skip_json_ws(json, offset) + local char = json:sub(offset, offset) + if char == ']' then + return offset + 1 + end + if char ~= ',' then + return nil, 'expected array separator' + end + + offset = skip_json_ws(json, offset + 1) + end + + return nil, 'unterminated array' +end + +skip_json_value = function(json, offset) + offset = skip_json_ws(json, offset) + + local char = json:sub(offset, offset) + if char == '"' then + local string_end, err = scan_json_string(json, offset) + if string_end == nil then + return nil, err + end + + return string_end + 1 + end + if char == '{' then + return skip_json_object(json, offset) + end + if char == '[' then + return skip_json_array(json, offset) + end + + while offset <= #json do + char = json:sub(offset, offset) + if char == ',' or char == '}' or char == ']' or char:match('%s') then + break + end + + offset = offset + 1 + end + + return offset +end + +local function json_object_members(json, object_start) + if json:sub(object_start, object_start) ~= '{' then + return nil, 'expected object at byte ' .. tostring(object_start) + end + + local members = {} + local offset = skip_json_ws(json, object_start + 1) + if json:sub(offset, offset) == '}' then + return members, offset + end + + while offset <= #json do + local key_start = offset + local key_end, err = scan_json_string(json, key_start) + if key_end == nil then + return nil, err + end + + offset = skip_json_ws(json, key_end + 1) + if json:sub(offset, offset) ~= ':' then + return nil, 'expected object member colon' + end + + local value_start = skip_json_ws(json, offset + 1) + members[#members + 1] = { + token = json:sub(key_start, key_end), + value_start = value_start, + } + + offset = skip_json_value(json, value_start) + if offset == nil then + return nil, 'invalid object member value' + end + + offset = skip_json_ws(json, offset) + local char = json:sub(offset, offset) + if char == '}' then + return members, offset + end + if char ~= ',' then + return nil, 'expected object member separator' + end + + offset = skip_json_ws(json, offset + 1) + end + + return nil, 'unterminated object' +end + +local function tokenize_path(path) + local steps = {} + + if path == '$' then + return true, steps + end + if type(path) ~= 'string' or path:sub(1, 1) ~= '$' then + return false, 'invalid path: ' .. tostring(path) + end + + local offset = 2 + while offset <= #path do + local char = path:sub(offset, offset) + + if char == '.' then + offset = offset + 1 + + local start = offset + while offset <= #path do + local next_char = path:sub(offset, offset) + if next_char == '.' or next_char == '[' then + break + end + offset = offset + 1 + end + + if start == offset then + return false, 'invalid path segment: ' .. path + end + + steps[#steps + 1] = { + kind = 'field', + key = path:sub(start, offset - 1), + } + elseif char == '[' then + local close = path:find(']', offset + 1, true) + if close == nil then + return false, 'invalid path segment: ' .. path + end + + local index = tonumber(path:sub(offset + 1, close - 1)) + if index == nil or index < 1 or index % 1 ~= 0 then + return false, 'invalid array index: ' .. path + end + + steps[#steps + 1] = { + kind = 'index', + index = index, + } + offset = close + 1 + else + return false, 'invalid path segment: ' .. path + end + end + + return true, steps +end + +local function json_object_member_value_start(rapidjson, json, object_start, key) + local members, err = json_object_members(json, object_start) + if members == nil then + return nil, err + end + local token = rapidjson.encode(key) - local offset = start + for _, member in ipairs(members) do + if member.token == token then + return member.value_start + end + end + + return nil, 'path not found' +end + +local function json_array_value_start(json, array_start, expected_index) + if json:sub(array_start, array_start) ~= '[' then + return nil, 'expected array at byte ' .. tostring(array_start) + end - while true do - local first, last = json:find(token, offset, true) - if first == nil then - return nil + local offset = skip_json_ws(json, array_start + 1) + if json:sub(offset, offset) == ']' then + return nil, 'path not found' + end + + local index = 1 + while offset <= #json do + local value_start = offset + local next_offset = skip_json_value(json, value_start) + if next_offset == nil then + return nil, 'invalid array value' end - if json:sub(last + 1):match('^%s*:') then - return first + if index == expected_index then + return value_start + end + + offset = skip_json_ws(json, next_offset) + local char = json:sub(offset, offset) + if char == ']' then + return nil, 'path not found' + end + if char ~= ',' then + return nil, 'expected array separator' end - offset = last + 1 + index = index + 1 + offset = skip_json_ws(json, offset + 1) end + + return nil, 'unterminated array' end -local function validate_key_order(rapidjson, json, object_entry) - local offset = 1 - local previous_key +local function json_value_start_for_path(rapidjson, json, path) + local ok, steps_or_err = tokenize_path(path) + if not ok then + return nil, steps_or_err + end - for _, key in ipairs(object_entry.keys or {}) do - local position = find_key_token(json, key, offset, rapidjson) - if position == nil then - if previous_key == nil then - return false, string.format( - 'key order mismatch at %s: missing key %s', - object_entry.path, - key - ) + local offset = skip_json_ws(json, 1) + for _, step in ipairs(steps_or_err) do + if step.kind == 'field' then + if json:sub(offset, offset) ~= '{' then + return nil, 'path not found' + end + + local value_start, err = + json_object_member_value_start(rapidjson, json, offset, step.key) + if value_start == nil then + return nil, err end + offset = skip_json_ws(json, value_start) + else + if json:sub(offset, offset) ~= '[' then + return nil, 'path not found' + end + + local value_start, err = json_array_value_start(json, offset, step.index) + if value_start == nil then + return nil, err + end + + offset = skip_json_ws(json, value_start) + end + end + + return offset +end + +local function validate_key_order(rapidjson, json, object_entry) + local object_start, err = json_value_start_for_path(rapidjson, json, object_entry.path) + if object_start == nil then + return false, string.format( + 'key order path lookup failed at %s: %s', + object_entry.path, + err + ) + end + + local members, parse_err = json_object_members(json, object_start) + if members == nil then + return false, string.format( + 'key order object lookup failed at %s: %s', + object_entry.path, + parse_err + ) + end + + for index, key in ipairs(object_entry.keys or {}) do + local member = members[index] + local token = rapidjson.encode(key) + + if member == nil or member.token ~= token then return false, string.format( - 'key order mismatch at %s: expected key %s after %s', + 'key order mismatch at %s: expected key %s at position %d', object_entry.path, key, - previous_key + index ) end - - previous_key = key - offset = position + 1 end return true From 1d0033315fdb419b4088c8a92f777da273d28b1a Mon Sep 17 00:00:00 2001 From: Yuansheng Wang Date: Fri, 29 May 2026 13:13:33 +0800 Subject: [PATCH 12/15] feat: add json encode fuzz runner --- Makefile | 32 +++++++++++++++ spec/fuzz_encode_lib_spec.lua | 16 ++++++++ tools/fuzz_encode.lua | 75 +++++++++++++++++++++++++++++++++++ tools/fuzz_encode_lib.lua | 11 +++++ 4 files changed, 134 insertions(+) create mode 100644 Makefile create mode 100644 tools/fuzz_encode.lua diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..8494338 --- /dev/null +++ b/Makefile @@ -0,0 +1,32 @@ +.PHONY: fuzz + +LUA ?= lua +DURATION ?= 3600 +INTERVAL ?= 5 +WORKERS ?= 1 +SEED ?= $(shell date +%s) +SORT_KEYS ?= 1 + +fuzz: + @set -eu; \ + pids=""; \ + worker=1; \ + while [ "$$worker" -le "$(WORKERS)" ]; do \ + seed=$$(( $(SEED) + $$worker - 1 )); \ + DURATION="$(DURATION)" \ + INTERVAL="$(INTERVAL)" \ + WORKERS="$(WORKERS)" \ + WORKER_ID="$$worker" \ + SEED="$$seed" \ + SORT_KEYS="$(SORT_KEYS)" \ + "$(LUA)" tools/fuzz_encode.lua & \ + pids="$$pids $$!"; \ + worker=$$(( $$worker + 1 )); \ + done; \ + status=0; \ + for pid in $$pids; do \ + if ! wait "$$pid"; then \ + status=1; \ + fi; \ + done; \ + exit "$$status" diff --git a/spec/fuzz_encode_lib_spec.lua b/spec/fuzz_encode_lib_spec.lua index 5325fdc..208cefd 100644 --- a/spec/fuzz_encode_lib_spec.lua +++ b/spec/fuzz_encode_lib_spec.lua @@ -41,6 +41,22 @@ describe('tools.fuzz_encode_lib', function() end) end) + describe('env_from_args', function() + it('turns KEY=VALUE args into config environment entries', function() + local env = fuzz.env_from_args({ + 'DURATION=2', + 'INTERVAL=1', + 'SEED=123', + 'WORKERS=1', + }) + + assert.are.equal('2', env.DURATION) + assert.are.equal('1', env.INTERVAL) + assert.are.equal('123', env.SEED) + assert.are.equal('1', env.WORKERS) + end) + end) + describe('new_rng', function() it('is deterministic for the same seed', function() local a = fuzz.new_rng(123) diff --git a/tools/fuzz_encode.lua b/tools/fuzz_encode.lua new file mode 100644 index 0000000..8cdcf88 --- /dev/null +++ b/tools/fuzz_encode.lua @@ -0,0 +1,75 @@ +local rapidjson = require('rapidjson') +local fuzz = require('tools.fuzz_encode_lib') + +local env = fuzz.env_from_args(arg) +for _, key in ipairs({ 'DURATION', 'INTERVAL', 'WORKERS', 'WORKER_ID', 'SEED', 'SORT_KEYS' }) do + if env[key] == nil then + env[key] = os.getenv(key) + end +end + +local cfg = fuzz.parse_config(env) +local rng = fuzz.new_rng(cfg.seed) +local started = os.time() +local next_report = started + cfg.interval +local deadline = started + cfg.duration +local stats = { + worker_id = cfg.worker_id, + elapsed = 0, + total = 0, + encoded = 0, + encode_errors = 0, + validation_failures = 0, + rate = 0, + seed = cfg.seed, + last_case_id = 0, +} + +local function update_stats(now) + stats.elapsed = now - started + if stats.elapsed <= 0 then + stats.rate = stats.total + else + stats.rate = stats.total / stats.elapsed + end +end + +while os.time() < deadline do + local case_id = stats.total + 1 + local generated_case = fuzz.generate_case(rng, case_id, rapidjson) + local ok, json_or_err = pcall(rapidjson.encode, generated_case.value, { + sort_keys = cfg.sort_keys, + }) + + stats.total = stats.total + 1 + stats.last_case_id = case_id + + if ok then + stats.encoded = stats.encoded + 1 + local valid, reason = fuzz.validate_encoded_case(rapidjson, generated_case, json_or_err) + if not valid then + stats.validation_failures = stats.validation_failures + 1 + update_stats(os.time()) + io.stderr:write(fuzz.format_failure({ + seed = cfg.seed, + worker_id = cfg.worker_id, + case = generated_case, + json = json_or_err, + reason = reason, + }), '\n') + os.exit(1) + end + else + stats.encode_errors = stats.encode_errors + 1 + end + + local now = os.time() + if now >= next_report then + update_stats(now) + print(fuzz.format_summary(stats)) + next_report = now + cfg.interval + end +end + +update_stats(os.time()) +print(fuzz.format_summary(stats)) diff --git a/tools/fuzz_encode_lib.lua b/tools/fuzz_encode_lib.lua index 9eb015b..8e5ff33 100644 --- a/tools/fuzz_encode_lib.lua +++ b/tools/fuzz_encode_lib.lua @@ -1475,4 +1475,15 @@ function M.generate_case(rng, case_id, rapidjson) } end +function M.env_from_args(args) + local env = {} + for _, arg in ipairs(args or {}) do + local key, value = string.match(arg, '^([%w_]+)=(.*)$') + if key then + env[key] = value + end + end + return env +end + return M From 16f843b74644aa247312470e9d072c2be56fb3fe Mon Sep 17 00:00:00 2001 From: Yuansheng Wang Date: Fri, 29 May 2026 13:20:13 +0800 Subject: [PATCH 13/15] fix: fail fuzz run on encode errors --- Makefile | 44 ++++++++++++++++++++++++++++++++++--------- tools/fuzz_encode.lua | 22 ++++++++++++++++++++-- 2 files changed, 55 insertions(+), 11 deletions(-) diff --git a/Makefile b/Makefile index 8494338..2516651 100644 --- a/Makefile +++ b/Makefile @@ -8,24 +8,50 @@ SEED ?= $(shell date +%s) SORT_KEYS ?= 1 fuzz: - @set -eu; \ + @set -u; \ + tmpdir=$$(mktemp -d "$${TMPDIR:-/tmp}/lua-rapidjson-fuzz.XXXXXX"); \ pids=""; \ + cleanup() { rm -rf "$$tmpdir"; }; \ + stop_workers() { for pid in $$pids; do kill "$$pid" 2>/dev/null || true; done; cleanup; }; \ + trap cleanup EXIT; \ + trap stop_workers INT TERM; \ worker=1; \ while [ "$$worker" -le "$(WORKERS)" ]; do \ seed=$$(( $(SEED) + $$worker - 1 )); \ - DURATION="$(DURATION)" \ - INTERVAL="$(INTERVAL)" \ - WORKERS="$(WORKERS)" \ - WORKER_ID="$$worker" \ - SEED="$$seed" \ - SORT_KEYS="$(SORT_KEYS)" \ - "$(LUA)" tools/fuzz_encode.lua & \ + ( \ + DURATION="$(DURATION)" \ + INTERVAL="$(INTERVAL)" \ + WORKERS="$(WORKERS)" \ + WORKER_ID="$$worker" \ + SEED="$$seed" \ + SORT_KEYS="$(SORT_KEYS)" \ + "$(LUA)" tools/fuzz_encode.lua; \ + rc=$$?; \ + if [ "$$rc" -ne 0 ]; then \ + echo "$$rc" > "$$tmpdir/fail.$$worker"; \ + fi; \ + echo "$$rc" > "$$tmpdir/done.$$worker"; \ + ) & \ pids="$$pids $$!"; \ worker=$$(( $$worker + 1 )); \ done; \ status=0; \ + while :; do \ + if ls "$$tmpdir"/fail.* >/dev/null 2>&1; then \ + status=1; \ + for pid in $$pids; do \ + kill "$$pid" 2>/dev/null || true; \ + done; \ + break; \ + fi; \ + done_count=$$(ls "$$tmpdir"/done.* 2>/dev/null | wc -l | tr -d ' '); \ + if [ "$$done_count" -ge "$(WORKERS)" ]; then \ + break; \ + fi; \ + sleep 1; \ + done; \ for pid in $$pids; do \ - if ! wait "$$pid"; then \ + if ! wait "$$pid" 2>/dev/null; then \ status=1; \ fi; \ done; \ diff --git a/tools/fuzz_encode.lua b/tools/fuzz_encode.lua index 8cdcf88..fd4b640 100644 --- a/tools/fuzz_encode.lua +++ b/tools/fuzz_encode.lua @@ -24,6 +24,8 @@ local stats = { seed = cfg.seed, last_case_id = 0, } +local last_report_total = -1 +local last_report_elapsed = -1 local function update_stats(now) stats.elapsed = now - started @@ -34,6 +36,12 @@ local function update_stats(now) end end +local function print_summary() + print(fuzz.format_summary(stats)) + last_report_total = stats.total + last_report_elapsed = stats.elapsed +end + while os.time() < deadline do local case_id = stats.total + 1 local generated_case = fuzz.generate_case(rng, case_id, rapidjson) @@ -61,15 +69,25 @@ while os.time() < deadline do end else stats.encode_errors = stats.encode_errors + 1 + update_stats(os.time()) + io.stderr:write(fuzz.format_failure({ + seed = cfg.seed, + worker_id = cfg.worker_id, + case = generated_case, + reason = 'encode failed: ' .. tostring(json_or_err), + }), '\n') + os.exit(1) end local now = os.time() if now >= next_report then update_stats(now) - print(fuzz.format_summary(stats)) + print_summary() next_report = now + cfg.interval end end update_stats(os.time()) -print(fuzz.format_summary(stats)) +if stats.total ~= last_report_total or stats.elapsed ~= last_report_elapsed then + print_summary() +end From 6a2ab74596dafea535a7007f72766885234da68f Mon Sep 17 00:00:00 2001 From: Yuansheng Wang Date: Fri, 29 May 2026 13:31:23 +0800 Subject: [PATCH 14/15] fix: tolerate json float round-trip drift --- spec/fuzz_encode_lib_spec.lua | 28 ++++++++++++++++++++++++++++ tools/fuzz_encode_lib.lua | 16 ++++++++++++++++ 2 files changed, 44 insertions(+) diff --git a/spec/fuzz_encode_lib_spec.lua b/spec/fuzz_encode_lib_spec.lua index 208cefd..bd4fc36 100644 --- a/spec/fuzz_encode_lib_spec.lua +++ b/spec/fuzz_encode_lib_spec.lua @@ -260,6 +260,34 @@ describe('tools.fuzz_encode_lib', function() assert.is_nil(err) end) + it('accepts small floating point round-trip differences', function() + local case = { + id = 3, + kind = 'manual', + schema = 'manual', + value = { n = 12.931 }, + expected = { + top_level_kind = 'object', + objects = { + { path = '$', key_count = 1, keys = { 'n' } }, + }, + arrays = {}, + scalars = { + { path = '$.n', kind = 'float', value = 12.931 }, + }, + }, + } + + local ok, err = fuzz.validate_encoded_case( + rapidjson, + case, + '{"n":12.931000000000001}' + ) + + assert.is_true(ok) + assert.is_nil(err) + end) + it('returns decode diagnostics when JSON cannot be decoded', function() local ok, err = fuzz.validate_encoded_case(rapidjson, { expected = {} }, '{"a":}') diff --git a/tools/fuzz_encode_lib.lua b/tools/fuzz_encode_lib.lua index 8e5ff33..6ff23f5 100644 --- a/tools/fuzz_encode_lib.lua +++ b/tools/fuzz_encode_lib.lua @@ -948,6 +948,13 @@ function M.format_failure(details) return table.concat(lines, '\n') end +local function numbers_equal(expected, actual) + local delta = math.abs(expected - actual) + local scale = math.max(1, math.abs(expected), math.abs(actual)) + + return delta <= scale * 1e-12 +end + function M.validate_encoded_case(rapidjson, case, json) local ok, decoded, decode_err = pcall(rapidjson.decode, json) if not ok then @@ -1052,6 +1059,15 @@ function M.validate_encoded_case(rapidjson, case, json) if value ~= json_null(rapidjson) then return false, 'scalar value mismatch at ' .. entry.path .. ': expected null' end + elseif entry.kind == 'float' then + if not numbers_equal(entry.value, value) then + return false, string.format( + 'scalar value mismatch at %s: expected %s got %s', + entry.path, + dump_value_inner(entry.value, rapidjson, 1, {}), + dump_value_inner(value, rapidjson, 1, {}) + ) + end elseif value ~= entry.value then return false, string.format( 'scalar value mismatch at %s: expected %s got %s', From 51d17faf4b5be4dbe76dcae43e38fd2f7c7c5d92 Mon Sep 17 00:00:00 2001 From: Yuansheng Wang Date: Fri, 29 May 2026 14:00:20 +0800 Subject: [PATCH 15/15] feat: add time-based fuzz sample output --- Makefile | 4 ++ spec/fuzz_encode_lib_spec.lua | 50 ++++++++++++++++++ tools/fuzz_encode.lua | 56 +++++++++++++++++++- tools/fuzz_encode_lib.lua | 97 +++++++++++++++++++++++++++++++++++ 4 files changed, 206 insertions(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 2516651..0b1eb13 100644 --- a/Makefile +++ b/Makefile @@ -6,6 +6,8 @@ INTERVAL ?= 5 WORKERS ?= 1 SEED ?= $(shell date +%s) SORT_KEYS ?= 1 +SAMPLE_INTERVAL ?= 0 +SAMPLE_LIMIT ?= fuzz: @set -u; \ @@ -25,6 +27,8 @@ fuzz: WORKER_ID="$$worker" \ SEED="$$seed" \ SORT_KEYS="$(SORT_KEYS)" \ + SAMPLE_INTERVAL="$(SAMPLE_INTERVAL)" \ + SAMPLE_LIMIT="$(SAMPLE_LIMIT)" \ "$(LUA)" tools/fuzz_encode.lua; \ rc=$$?; \ if [ "$$rc" -ne 0 ]; then \ diff --git a/spec/fuzz_encode_lib_spec.lua b/spec/fuzz_encode_lib_spec.lua index bd4fc36..d0c0f4c 100644 --- a/spec/fuzz_encode_lib_spec.lua +++ b/spec/fuzz_encode_lib_spec.lua @@ -13,6 +13,8 @@ describe('tools.fuzz_encode_lib', function() assert.are.equal(1, cfg.workers) assert.are.equal(1, cfg.worker_id) assert.are.equal(true, cfg.sort_keys) + assert.are.equal(0, cfg.sample_interval) + assert.are.equal(0, cfg.sample_limit) assert.are.equal('number', type(cfg.seed)) end) @@ -24,6 +26,8 @@ describe('tools.fuzz_encode_lib', function() WORKER_ID = '2', SEED = '99', SORT_KEYS = '0', + SAMPLE_INTERVAL = '3', + SAMPLE_LIMIT = '10', }) assert.are.equal(12, cfg.duration) @@ -32,6 +36,15 @@ describe('tools.fuzz_encode_lib', function() assert.are.equal(2, cfg.worker_id) assert.are.equal(99, cfg.seed) assert.are.equal(false, cfg.sort_keys) + assert.are.equal(3, cfg.sample_interval) + assert.are.equal(10, cfg.sample_limit) + end) + + it('defaults time-based sampling to 10 samples when enabled', function() + local cfg = fuzz.parse_config({ SAMPLE_INTERVAL = '1' }) + + assert.are.equal(1, cfg.sample_interval) + assert.are.equal(10, cfg.sample_limit) end) it('treats numeric zero as disabling sorted keys', function() @@ -328,4 +341,41 @@ describe('tools.fuzz_encode_lib', function() assert.matches('json={"b":1,"a":[true,null]}', first, 1, true) end) end) + + describe('format_sample', function() + it('prints full sample data with aligned value columns', function() + local case = { + id = 7, + kind = 'manual', + schema = 'manual_schema', + value = { + root = { + child = { + leaf = { + value = 'deep-value', + }, + }, + }, + }, + } + + local sample = fuzz.format_sample({ + seed = 123, + worker_id = 1, + elapsed = 2, + case = case, + raw_json_unsorted = '{"root":{"child":{"leaf":{"value":"deep-value"}}}}', + encoded_json_sort_keys = '{"root":{"child":{"leaf":{"value":"deep-value"}}}}', + }) + + assert.matches('FUZZ SAMPLE', sample, 1, true) + assert.matches('case=7', sample, 1, true) + assert.matches('input_lua= {', sample, 1, true) + assert.matches('raw_json_unsorted= {', sample, 1, true) + assert.matches('encoded_json_sort_keys={', sample, 1, true) + assert.matches('deep-value', sample, 1, true) + assert.is_nil(sample:find('{...}', 1, true)) + assert.is_nil(sample:find('[...]', 1, true)) + end) + end) end) diff --git a/tools/fuzz_encode.lua b/tools/fuzz_encode.lua index fd4b640..50b93c5 100644 --- a/tools/fuzz_encode.lua +++ b/tools/fuzz_encode.lua @@ -2,7 +2,16 @@ local rapidjson = require('rapidjson') local fuzz = require('tools.fuzz_encode_lib') local env = fuzz.env_from_args(arg) -for _, key in ipairs({ 'DURATION', 'INTERVAL', 'WORKERS', 'WORKER_ID', 'SEED', 'SORT_KEYS' }) do +for _, key in ipairs({ + 'DURATION', + 'INTERVAL', + 'WORKERS', + 'WORKER_ID', + 'SEED', + 'SORT_KEYS', + 'SAMPLE_INTERVAL', + 'SAMPLE_LIMIT', +}) do if env[key] == nil then env[key] = os.getenv(key) end @@ -26,6 +35,8 @@ local stats = { } local last_report_total = -1 local last_report_elapsed = -1 +local sample_count = 0 +local next_sample_at = started local function update_stats(now) stats.elapsed = now - started @@ -42,6 +53,45 @@ local function print_summary() last_report_elapsed = stats.elapsed end +local function sample_limit_reached() + return cfg.sample_limit > 0 and sample_count >= cfg.sample_limit +end + +local function advance_next_sample_at(now) + repeat + next_sample_at = next_sample_at + cfg.sample_interval + until next_sample_at > now +end + +local function maybe_print_sample(generated_case, encoded_json, now) + if cfg.sample_interval <= 0 or sample_limit_reached() or now < next_sample_at then + return + end + + local raw_ok, raw_json_or_err = pcall(rapidjson.encode, generated_case.value, { + sort_keys = false, + }) + local sorted_ok, sorted_json_or_err = true, encoded_json + if not cfg.sort_keys then + sorted_ok, sorted_json_or_err = pcall(rapidjson.encode, generated_case.value, { + sort_keys = true, + }) + end + + sample_count = sample_count + 1 + update_stats(now) + print(fuzz.format_sample({ + rapidjson = rapidjson, + seed = cfg.seed, + worker_id = cfg.worker_id, + elapsed = stats.elapsed, + case = generated_case, + raw_json_unsorted = raw_ok and raw_json_or_err or (''), + encoded_json_sort_keys = sorted_ok and sorted_json_or_err or (''), + })) + advance_next_sample_at(now) +end + while os.time() < deadline do local case_id = stats.total + 1 local generated_case = fuzz.generate_case(rng, case_id, rapidjson) @@ -80,6 +130,10 @@ while os.time() < deadline do end local now = os.time() + if ok then + maybe_print_sample(generated_case, json_or_err, now) + end + if now >= next_report then update_stats(now) print_summary() diff --git a/tools/fuzz_encode_lib.lua b/tools/fuzz_encode_lib.lua index 6ff23f5..71a9f49 100644 --- a/tools/fuzz_encode_lib.lua +++ b/tools/fuzz_encode_lib.lua @@ -6,6 +6,8 @@ local DEFAULTS = { workers = 1, worker_id = 1, sort_keys = true, + sample_interval = 0, + sample_limit = 0, } local function tonumber_or(value, default) @@ -31,6 +33,12 @@ end function M.parse_config(env) env = env or {} + local sample_interval = tonumber_or(env.SAMPLE_INTERVAL, DEFAULTS.sample_interval) + local sample_limit = tonumber(env.SAMPLE_LIMIT) + if sample_limit == nil then + sample_limit = sample_interval > 0 and 10 or DEFAULTS.sample_limit + end + return { duration = tonumber_or(env.DURATION, DEFAULTS.duration), interval = tonumber_or(env.INTERVAL, DEFAULTS.interval), @@ -38,6 +46,8 @@ function M.parse_config(env) worker_id = tonumber_or(env.WORKER_ID, DEFAULTS.worker_id), seed = normalize_seed(env.SEED), sort_keys = env.SORT_KEYS ~= '0' and env.SORT_KEYS ~= 0, + sample_interval = sample_interval, + sample_limit = sample_limit, } end @@ -918,6 +928,93 @@ function M.dump_value(value) return dump_value_inner(value, nil, 1, {}) end +local function dump_full_string(value) + return string.format('%q', value) +end + +local dump_full_value_inner + +local function dump_full_table(value, rapidjson, seen) + if seen[value] then + return '' + end + + seen[value] = true + + local parts = {} + if is_json_array(value) then + for index = 1, #value do + parts[#parts + 1] = dump_full_value_inner(value[index], rapidjson, seen) + end + + seen[value] = nil + return '[' .. table.concat(parts, ',') .. ']' + end + + local keys = string_keys(value) + for _, key in ipairs(keys) do + parts[#parts + 1] = + dump_full_string(key) .. '=' .. dump_full_value_inner(value[key], rapidjson, seen) + end + + seen[value] = nil + return '{' .. table.concat(parts, ',') .. '}' +end + +function dump_full_value_inner(value, rapidjson, seen) + if loaded_null(value, rapidjson) then + return 'null' + end + + local value_type = type(value) + if value_type == 'string' then + return dump_full_string(value) + end + if value_type == 'number' or value_type == 'boolean' or value_type == 'nil' then + return tostring(value) + end + if value_type ~= 'table' then + return '<' .. value_type .. ':' .. tostring(value) .. '>' + end + + return dump_full_table(value, rapidjson, seen) +end + +function M.dump_full_value(value, rapidjson) + return dump_full_value_inner(value, rapidjson, {}) +end + +local SAMPLE_LABEL_WIDTH = #'encoded_json_sort_keys=' + +local function format_sample_value(label, value) + return label .. string.rep(' ', SAMPLE_LABEL_WIDTH - #label) .. tostring(value or '') +end + +function M.format_sample(details) + details = details or {} + + local case = details.case or {} + local value = details.value + if value == nil then + value = case.value + end + + local lines = { + 'FUZZ SAMPLE', + 'seed=' .. tostring(details.seed or '?'), + 'worker=' .. tostring(details.worker or details.worker_id or '?'), + 'elapsed=' .. tostring(details.elapsed or '?') .. 's', + 'case=' .. tostring(details.case_id or case.id or '?'), + 'kind=' .. tostring(details.kind or case.kind or '?'), + 'schema=' .. tostring(details.schema or case.schema or '?'), + format_sample_value('input_lua=', M.dump_full_value(value, details.rapidjson)), + format_sample_value('raw_json_unsorted=', details.raw_json_unsorted), + format_sample_value('encoded_json_sort_keys=', details.encoded_json_sort_keys), + } + + return table.concat(lines, '\n') +end + function M.format_failure(details) details = details or {}