From 2a21025be53c3d72e4eaef958bf604330829f6b7 Mon Sep 17 00:00:00 2001
From: Max Murshed <murshed@gmail.com>
Date: Sat, 30 May 2026 13:54:41 -0700
Subject: [PATCH] Add split, cached BigMath-vs-GMP benchmark suite
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

bench_vs_gmp.cpp regenerates random operands and reruns GMP on every
invocation. Split that into three cacheable stages so repeated BigMath
runs only pay for BigMath:

  1. gen_dataset  — materialize random operands once (deterministic seeds),
                    written as files + manifest.csv and zipped by the driver.
  2. run_gmp      — time GMP once per machine, record base-independent value
                    hashes (FNV-1a over the LE 32-bit word stream) for
                    correctness cross-check.
  3. run_bigmath  — time BigMath every run, verify mul/div against the cached
                    GMP hashes, save under an incrementing run id, and append
                    flagged rows to HISTORY.md.

run_benchmark.sh drives it, running only stale stages. quick/default/full
profiles trade dataset size for coverage. Cache lives in .benchcache
(gitignored); CMake exposes bench_gen_dataset/run_gmp/run_bigmath for parity.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 .gitignore                                    |  19 +-
 CMakeLists.txt                                |  19 ++
 tests/performance/benchsuite/HISTORY.md       |  19 ++
 tests/performance/benchsuite/README.md        |  49 +++
 tests/performance/benchsuite/bench_common.h   | 313 ++++++++++++++++++
 tests/performance/benchsuite/gen_dataset.cpp  |  51 +++
 tests/performance/benchsuite/run_benchmark.sh | 119 +++++++
 tests/performance/benchsuite/run_bigmath.cpp  | 203 ++++++++++++
 tests/performance/benchsuite/run_gmp.cpp      | 136 ++++++++
 tests/performance/benchsuite/workload.h       | 130 ++++++++
 10 files changed, 1048 insertions(+), 10 deletions(-)
 create mode 100644 tests/performance/benchsuite/HISTORY.md
 create mode 100644 tests/performance/benchsuite/README.md
 create mode 100644 tests/performance/benchsuite/bench_common.h
 create mode 100644 tests/performance/benchsuite/gen_dataset.cpp
 create mode 100755 tests/performance/benchsuite/run_benchmark.sh
 create mode 100644 tests/performance/benchsuite/run_bigmath.cpp
 create mode 100644 tests/performance/benchsuite/run_gmp.cpp
 create mode 100644 tests/performance/benchsuite/workload.h

diff --git a/.gitignore b/.gitignore
index fbacc7a..4805b9a 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,11 +1,10 @@
-.DS_Store
-.vscode/*
-.vs/*
-a.out
-a.out.*/*
-*.class
-*.in
+build/
+*.o
+*.exe
 *.out
-*.ans
-*.csv
-/build
+*.gz
+*.zip
+*.zip.*
+*.gz.*
+.antigravitycli/
+.benchcache/
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 0bf23d9..6c991a0 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -106,10 +106,25 @@ if(BIGMATH_BUILD_TESTS)
     add_executable(divperf_simple tests/divperf_simple.cpp)
     target_link_libraries(divperf_simple PRIVATE bigmath::bigmath)
 
+    add_executable(regression_bench tests/performance/regression_bench.cpp)
+    target_link_libraries(regression_bench PRIVATE bigmath::bigmath)
+
     # BigDecimal performance bench
     add_executable(bigdecimal_perf tests/bigdecimal_perf.cpp)
     target_link_libraries(bigdecimal_perf PRIVATE bigmath::bigmath)
 
+    # Split benchmark suite — dataset gen + BigMath run (GMP run added below).
+    # Primary interface is tests/performance/benchsuite/run_benchmark.sh, which
+    # compiles these standalone; these targets exist for IDE/CI parity.
+    set(BENCHSUITE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/tests/performance/benchsuite)
+    add_executable(bench_gen_dataset ${BENCHSUITE_DIR}/gen_dataset.cpp)
+    target_link_libraries(bench_gen_dataset PRIVATE bigmath::bigmath)
+    target_include_directories(bench_gen_dataset PRIVATE ${BENCHSUITE_DIR})
+
+    add_executable(bench_run_bigmath ${BENCHSUITE_DIR}/run_bigmath.cpp)
+    target_link_libraries(bench_run_bigmath PRIVATE bigmath::bigmath)
+    target_include_directories(bench_run_bigmath PRIVATE ${BENCHSUITE_DIR})
+
     # GMP-comparison bench (requires libgmp)
     find_library(GMP_LIBRARY gmp)
     find_path(GMP_INCLUDE_DIR gmp.h)
@@ -121,6 +136,10 @@ if(BIGMATH_BUILD_TESTS)
         target_link_libraries(mfa_vs_gmp_check PRIVATE bigmath::bigmath ${GMP_LIBRARY})
         target_include_directories(mfa_vs_gmp_check PRIVATE ${GMP_INCLUDE_DIR})
         target_include_directories(bench_vs_gmp PRIVATE ${GMP_INCLUDE_DIR})
+
+        add_executable(bench_run_gmp ${BENCHSUITE_DIR}/run_gmp.cpp)
+        target_link_libraries(bench_run_gmp PRIVATE bigmath::bigmath ${GMP_LIBRARY})
+        target_include_directories(bench_run_gmp PRIVATE ${BENCHSUITE_DIR} ${GMP_INCLUDE_DIR})
     else()
         message(STATUS "GMP not found; bench_vs_gmp target skipped")
     endif()
diff --git a/tests/performance/benchsuite/HISTORY.md b/tests/performance/benchsuite/HISTORY.md
new file mode 100644
index 0000000..f855495
--- /dev/null
+++ b/tests/performance/benchsuite/HISTORY.md
@@ -0,0 +1,19 @@
+# BigMath vs GMP — historical comparison
+
+Selected large-size results, one block per run, appended by `run_bigmath`.
+Lower ratio is better (BigMath ms / GMP ms). Rows are the entries flagged
+`historical` in the dataset manifest.
+
+## run 1 — machine=Apple_M1_Max_arm64 profile=quick
+
+| op | size | BigMath ms | GMP ms | BM/GMP | check |
+|----|------|-----------:|-------:|-------:|:-----:|
+| mul | 10000x10000 digits | 0.093 | 0.030 | 3.12x | FAIL |
+| div | 40000x10000 digits | 1.186 | 0.232 | 5.10x | FAIL |
+
+## run 2 — machine=Apple_M1_Max_arm64 profile=quick
+
+| op | size | BigMath ms | GMP ms | BM/GMP | check |
+|----|------|-----------:|-------:|-------:|:-----:|
+| mul | 10000x10000 digits | 0.231 | 0.030 | 7.76x | ok |
+| div | 40000x10000 digits | 1.545 | 0.232 | 6.65x | ok |
diff --git a/tests/performance/benchsuite/README.md b/tests/performance/benchsuite/README.md
new file mode 100644
index 0000000..1b9486b
--- /dev/null
+++ b/tests/performance/benchsuite/README.md
@@ -0,0 +1,49 @@
+# Split BigMath-vs-GMP benchmark suite
+
+`bench_vs_gmp.cpp` regenerates random operands and reruns GMP on every
+invocation, which is slow. This suite splits that into three cacheable stages so
+repeated BigMath runs only pay for BigMath.
+
+## Usage
+
+```
+./run_benchmark.sh [profile] [--force-dataset] [--force-gmp]
+```
+
+`profile` = `quick` | `default` | `full` (default `default`). The driver runs
+only the stages that are stale.
+
+## Stages
+
+1. **Dataset generation** (`gen_dataset`) — materializes random operands once
+   from deterministic per-entry seeds, writes `manifest.csv` + one decimal file
+   per operand, and the driver zips it to `dataset_<profile>.zip`. Skipped if the
+   zip or an extracted copy already exists.
+2. **GMP run** (`run_gmp`) — times GMP on the dataset and records a
+   base-independent value hash for the exact-integer ops. Saved to
+   `gmp_<machine>_<profile>.csv` and reused across BigMath edits, since GMP
+   timings don't change. Runs once per machine per profile.
+3. **BigMath run** (`run_bigmath`) — times BigMath on the same dataset, verifies
+   mul/div against the cached GMP hashes, and prints BigMath/GMP ratios. Saved
+   under an incrementing run id (`bigmath_<machine>_<profile>_<id>.csv`); the
+   `historical`-flagged rows are appended to `HISTORY.md`.
+
+## Layout
+
+- Cache (gitignored): `$BENCH_DIR` (default `<repo>/.benchcache`)
+  - `dataset/<profile>/` + `dataset_<profile>.zip` — operands
+  - `results/` — per-machine GMP + per-run BigMath CSVs
+  - `bin/` — compiled stage binaries
+- Committed: `HISTORY.md` — long-lived comparison table
+
+## Notes
+
+- Operands are reproducible on any machine (fixed seeds), so the dataset is
+  machine-independent; only timings/results are machine-keyed.
+- The value hash is FNV-1a over the integer's little-endian 32-bit word stream
+  (low 32 bits of each base-2^32 BigMath limb; `mpz_export` for GMP), so
+  correctness is checked without the slow decimal `ToString` path.
+- Workload tables live in `workload.h`; the on-disk `manifest.csv` is the source
+  of truth at run time.
+- CMake also exposes `bench_gen_dataset`, `bench_run_gmp`, `bench_run_bigmath`
+  for IDE/CI parity; the shell driver is the primary interface.
diff --git a/tests/performance/benchsuite/bench_common.h b/tests/performance/benchsuite/bench_common.h
new file mode 100644
index 0000000..ed278c7
--- /dev/null
+++ b/tests/performance/benchsuite/bench_common.h
@@ -0,0 +1,313 @@
+// Shared definitions for the split BigMath-vs-GMP benchmark suite.
+//
+// The suite is split into three independent stages so the slow parts can be
+// cached and reused:
+//   1. gen_dataset  — materialize random operands once, save to a directory
+//                     (the driver then zips it).
+//   2. run_gmp      — time GMP on the dataset once per machine, record answer
+//                     hashes for correctness cross-check.
+//   3. run_bigmath  — time BigMath on the same dataset every run, verify
+//                     against the cached GMP hashes, append to history.
+//
+// All three agree on the workload through the on-disk manifest (manifest.csv),
+// which gen_dataset writes and the other two read. The manifest — not this
+// header — is the source of truth at run time, so a dataset stays valid even
+// if the workload tables below later change.
+
+#ifndef BIGMATH_BENCH_COMMON_H
+#define BIGMATH_BENCH_COMMON_H
+
+#include <algorithm>
+#include <chrono>
+#include <cstdint>
+#include <cstdio>
+#include <cstdlib>
+#include <cstring>
+#include <fstream>
+#include <random>
+#include <stdexcept>
+#include <string>
+#include <vector>
+
+namespace bench {
+
+// ─── operation taxonomy ────────────────────────────────────────────────────
+enum class Op {
+    Mul,        // BigInteger a * b           (2 operands)
+    Div,        // BigInteger a / b           (2 operands)
+    Parse,      // decimal string -> BigInteger
+    ToStr,      // BigInteger -> decimal string
+    DecAdd,     // BigDecimal a + b           (2 operands)
+    DecMul,     // BigDecimal a * b           (2 operands)
+    DecDiv,     // BigDecimal a / b @ scale   (2 operands)
+    DecParse,   // decimal string -> BigDecimal
+    DecToStr,   // BigDecimal -> decimal string
+};
+
+inline const char *OpName(Op op) {
+    switch (op) {
+        case Op::Mul:      return "mul";
+        case Op::Div:      return "div";
+        case Op::Parse:    return "parse";
+        case Op::ToStr:    return "tostr";
+        case Op::DecAdd:   return "dec_add";
+        case Op::DecMul:   return "dec_mul";
+        case Op::DecDiv:   return "dec_div";
+        case Op::DecParse: return "dec_parse";
+        case Op::DecToStr: return "dec_tostr";
+    }
+    return "?";
+}
+
+inline Op OpFromName(const std::string &s) {
+    if (s == "mul")       return Op::Mul;
+    if (s == "div")       return Op::Div;
+    if (s == "parse")     return Op::Parse;
+    if (s == "tostr")     return Op::ToStr;
+    if (s == "dec_add")   return Op::DecAdd;
+    if (s == "dec_mul")   return Op::DecMul;
+    if (s == "dec_div")   return Op::DecDiv;
+    if (s == "dec_parse") return Op::DecParse;
+    if (s == "dec_tostr") return Op::DecToStr;
+    throw std::runtime_error("unknown op: " + s);
+}
+
+// Number of decimal-string operand files a given op consumes.
+inline int OperandCount(Op op) {
+    switch (op) {
+        case Op::Mul: case Op::Div:
+        case Op::DecAdd: case Op::DecMul: case Op::DecDiv:
+            return 2;
+        default:
+            return 1;
+    }
+}
+
+// Whether the op compares two big integers exactly (so a value hash is worth
+// recording). The decimal/float ops compare GMP's mpf against BigDecimal, which
+// round differently, so they are timed but not hash-checked.
+inline bool HasAnswerHash(Op op) { return op == Op::Mul || op == Op::Div; }
+
+// ─── one benchmark entry ────────────────────────────────────────────────────
+struct Entry {
+    int  id        = 0;
+    Op   op        = Op::Mul;
+    int  a_digits  = 0;   // primary operand digit count (int part for Dec ops)
+    int  b_digits  = 0;   // secondary operand digits (frac part for 1-operand Dec ops)
+    int  scale     = 0;   // DecDiv target scale; 0 elsewhere
+    int  reps      = 1;   // timed repetitions; report the minimum
+    int  historical = 0;  // 1 => row is mirrored into the long-lived history table
+
+    // Cosmetic label rebuilt from the numeric fields (manifest stays compact).
+    std::string Label() const {
+        char buf[96];
+        switch (op) {
+            case Op::DecAdd: case Op::DecMul:
+                snprintf(buf, sizeof(buf), "%d.%d digits", a_digits, b_digits);
+                break;
+            case Op::DecDiv:
+                snprintf(buf, sizeof(buf), "%d.%d / %d dp", a_digits, b_digits, scale);
+                break;
+            default:
+                if (b_digits > 0 && OperandCount(op) == 2)
+                    snprintf(buf, sizeof(buf), "%dx%d digits", a_digits, b_digits);
+                else
+                    snprintf(buf, sizeof(buf), "%d digits", a_digits);
+        }
+        return buf;
+    }
+};
+
+// ─── repetition policy (shared so GMP and BigMath agree) ────────────────────
+inline int IterationsForDigits(int digits) {
+    if (digits <= 2000)   return 7;
+    if (digits <= 20000)  return 5;
+    if (digits <= 100000) return 3;
+    return 1;
+}
+
+// ─── deterministic operand generation ───────────────────────────────────────
+// Random digit string of the given length, no leading zero.
+inline std::string GenerateRandomDigits(int length, uint64_t seed) {
+    std::mt19937_64 gen(seed);
+    std::uniform_int_distribution<int> dis(0, 9);
+    std::uniform_int_distribution<int> first_dis(1, 9);
+    std::string s;
+    s.reserve(length);
+    s += char('0' + first_dis(gen));
+    for (int i = 1; i < length; ++i)
+        s += char('0' + dis(gen));
+    return s;
+}
+
+// Stable per-(entry,operand) seed. Fixed mixing constant => reproducible
+// datasets across machines and runs.
+inline uint64_t OperandSeed(int id, int operand_index) {
+    uint64_t h = 0x9E3779B97F4A7C15ull;
+    h ^= (uint64_t)id * 0xD1B54A32D192ED03ull;
+    h ^= (uint64_t)(operand_index + 1) * 0xCBF29CE484222325ull;
+    h ^= h >> 29; h *= 0xBF58476D1CE4E5B9ull; h ^= h >> 32;
+    return h;
+}
+
+// Build the decimal-string operand for an entry slot, exactly as the dataset
+// generator does. Used only by gen_dataset; the run stages read files instead.
+inline std::string BuildOperandString(const Entry &e, int operand_index) {
+    uint64_t seed = OperandSeed(e.id, operand_index);
+    switch (e.op) {
+        case Op::Mul: case Op::Div:
+        case Op::Parse: case Op::ToStr:
+            return GenerateRandomDigits(operand_index == 0 ? e.a_digits : e.b_digits, seed);
+
+        case Op::DecAdd: case Op::DecMul: case Op::DecDiv: {
+            // a_digits = integer-part length, b_digits = fractional-part length.
+            std::string ip = GenerateRandomDigits(e.a_digits, seed);
+            std::string fp = GenerateRandomDigits(e.b_digits, seed ^ 0xF00Dull);
+            return ip + "." + fp;
+        }
+        case Op::DecParse: case Op::DecToStr: {
+            std::string s = GenerateRandomDigits(e.a_digits, seed);
+            s.insert(s.size() / 2, ".");
+            return s;
+        }
+    }
+    return "";
+}
+
+// ─── value hash (base-independent, cheap even at 200M digits) ────────────────
+// FNV-1a over the integer's little-endian 32-bit word stream. BigMath limbs are
+// base-2^32 values held in 64-bit slots, so the low 32 bits of each limb ARE
+// the LE words; GMP produces the identical stream via mpz_export(order=-1,
+// size=4, endian=-1). No decimal conversion required, so hashing never touches
+// the slow ToString path.
+struct Fnv64 {
+    uint64_t h = 0xCBF29CE484222325ull;
+    void Byte(uint8_t b) { h ^= b; h *= 0x100000001B3ull; }
+    void Word32(uint32_t w) {
+        Byte((uint8_t)(w)); Byte((uint8_t)(w >> 8));
+        Byte((uint8_t)(w >> 16)); Byte((uint8_t)(w >> 24));
+    }
+    void Bytes(const uint8_t *p, size_t n) { for (size_t i = 0; i < n; ++i) Byte(p[i]); }
+};
+
+// ─── manifest I/O ───────────────────────────────────────────────────────────
+// CSV columns: id,op,a_digits,b_digits,scale,reps,historical
+inline std::string ManifestPath(const std::string &dir) { return dir + "/manifest.csv"; }
+inline std::string OperandPath(const std::string &dir, int id, int operand_index) {
+    return dir + "/e" + std::to_string(id) + "_" + std::to_string(operand_index) + ".txt";
+}
+
+inline void WriteManifest(const std::string &dir, const std::vector<Entry> &entries) {
+    std::ofstream out(ManifestPath(dir));
+    if (!out) throw std::runtime_error("cannot write manifest in " + dir);
+    out << "id,op,a_digits,b_digits,scale,reps,historical\n";
+    for (const Entry &e : entries) {
+        out << e.id << ',' << OpName(e.op) << ',' << e.a_digits << ',' << e.b_digits
+            << ',' << e.scale << ',' << e.reps << ',' << e.historical << '\n';
+    }
+}
+
+inline std::vector<Entry> ReadManifest(const std::string &dir) {
+    std::ifstream in(ManifestPath(dir));
+    if (!in) throw std::runtime_error("cannot read manifest in " + dir +
+                                      " (run gen_dataset first)");
+    std::vector<Entry> entries;
+    std::string line;
+    std::getline(in, line); // header
+    while (std::getline(in, line)) {
+        if (line.empty()) continue;
+        std::vector<std::string> f;
+        size_t start = 0;
+        for (size_t i = 0; i <= line.size(); ++i) {
+            if (i == line.size() || line[i] == ',') {
+                f.push_back(line.substr(start, i - start));
+                start = i + 1;
+            }
+        }
+        if (f.size() < 7) throw std::runtime_error("bad manifest line: " + line);
+        Entry e;
+        e.id         = std::stoi(f[0]);
+        e.op         = OpFromName(f[1]);
+        e.a_digits   = std::stoi(f[2]);
+        e.b_digits   = std::stoi(f[3]);
+        e.scale      = std::stoi(f[4]);
+        e.reps       = std::stoi(f[5]);
+        e.historical = std::stoi(f[6]);
+        entries.push_back(e);
+    }
+    return entries;
+}
+
+inline std::string LoadOperand(const std::string &dir, int id, int operand_index) {
+    std::ifstream in(OperandPath(dir, id, operand_index), std::ios::binary);
+    if (!in) throw std::runtime_error("missing operand file: " +
+                                      OperandPath(dir, id, operand_index));
+    std::string s((std::istreambuf_iterator<char>(in)),
+                  std::istreambuf_iterator<char>());
+    while (!s.empty() && (s.back() == '\n' || s.back() == '\r')) s.pop_back();
+    return s;
+}
+
+// ─── per-machine result CSV I/O ─────────────────────────────────────────────
+// GMP results: id,op,label,reps,gmp_ms,hash
+struct GmpResult { double gmp_ms = 0; std::string hash; };
+
+inline void WriteGmpResults(const std::string &path,
+                            const std::vector<Entry> &entries,
+                            const std::vector<GmpResult> &res,
+                            const std::string &header_comment) {
+    std::ofstream out(path);
+    if (!out) throw std::runtime_error("cannot write gmp results: " + path);
+    out << "# " << header_comment << "\n";
+    out << "id,op,label,reps,gmp_ms,hash\n";
+    for (size_t i = 0; i < entries.size(); ++i) {
+        const Entry &e = entries[i];
+        out << e.id << ',' << OpName(e.op) << ',' << e.Label() << ',' << e.reps
+            << ',' << res[i].gmp_ms << ',' << res[i].hash << '\n';
+    }
+}
+
+inline std::vector<GmpResult> ReadGmpResults(const std::string &path) {
+    std::ifstream in(path);
+    if (!in) throw std::runtime_error("cannot read gmp results: " + path +
+                                      " (run run_gmp first)");
+    std::vector<GmpResult> res;
+    std::string line;
+    while (std::getline(in, line)) {
+        if (line.empty() || line[0] == '#') continue;
+        if (line.rfind("id,", 0) == 0) continue; // header
+        // split (label may contain spaces but never commas)
+        std::vector<std::string> f;
+        size_t start = 0;
+        for (size_t i = 0; i <= line.size(); ++i) {
+            if (i == line.size() || line[i] == ',') {
+                f.push_back(line.substr(start, i - start));
+                start = i + 1;
+            }
+        }
+        if (f.size() < 6) continue;
+        GmpResult r;
+        r.gmp_ms = std::stod(f[4]);
+        r.hash   = f[5];
+        res.push_back(r);
+    }
+    return res;
+}
+
+// ─── timing ─────────────────────────────────────────────────────────────────
+template <typename F>
+inline double BestMs(F &&fn, int reps) {
+    double best = 1e300;
+    for (int i = 0; i < reps; ++i) {
+        auto t0 = std::chrono::high_resolution_clock::now();
+        fn();
+        auto t1 = std::chrono::high_resolution_clock::now();
+        double ms = std::chrono::duration<double, std::milli>(t1 - t0).count();
+        if (ms < best) best = ms;
+    }
+    return best;
+}
+
+} // namespace bench
+
+#endif // BIGMATH_BENCH_COMMON_H
diff --git a/tests/performance/benchsuite/gen_dataset.cpp b/tests/performance/benchsuite/gen_dataset.cpp
new file mode 100644
index 0000000..b31c7fc
--- /dev/null
+++ b/tests/performance/benchsuite/gen_dataset.cpp
@@ -0,0 +1,51 @@
+// Stage 1: dataset generation.
+//
+//   gen_dataset <dataset_dir> <profile>
+//
+// Writes manifest.csv plus one decimal-string file per operand into
+// <dataset_dir>. The driver zips the directory afterward. Operands are fully
+// determined by (entry id, operand index) so the dataset is reproducible on any
+// machine — the GMP and BigMath stages both operate on these exact files.
+
+#include <cstdio>
+#include <string>
+#include <vector>
+
+#include "bench_common.h"
+#include "workload.h"
+
+using namespace bench;
+
+int main(int argc, char **argv) {
+    if (argc < 3) {
+        fprintf(stderr, "usage: %s <dataset_dir> <profile>\n", argv[0]);
+        return 2;
+    }
+    std::string dir = argv[1];
+    std::string profile = argv[2];
+
+    std::vector<Entry> entries = BuildWorkload(profile);
+    fprintf(stderr, "[gen] profile=%s entries=%zu dir=%s\n",
+            profile.c_str(), entries.size(), dir.c_str());
+
+    WriteManifest(dir, entries);
+
+    size_t total_bytes = 0;
+    for (const Entry &e : entries) {
+        int n = OperandCount(e.op);
+        for (int k = 0; k < n; ++k) {
+            std::string s = BuildOperandString(e, k);
+            std::string path = OperandPath(dir, e.id, k);
+            FILE *f = fopen(path.c_str(), "wb");
+            if (!f) { fprintf(stderr, "[gen] cannot write %s\n", path.c_str()); return 1; }
+            fwrite(s.data(), 1, s.size(), f);
+            fclose(f);
+            total_bytes += s.size();
+        }
+        fprintf(stderr, "  e%-4d %-9s %-22s\n", e.id, OpName(e.op), e.Label().c_str());
+    }
+
+    fprintf(stderr, "[gen] done: %zu entries, %.1f MB of operands\n",
+            entries.size(), total_bytes / 1e6);
+    return 0;
+}
diff --git a/tests/performance/benchsuite/run_benchmark.sh b/tests/performance/benchsuite/run_benchmark.sh
new file mode 100755
index 0000000..a967e25
--- /dev/null
+++ b/tests/performance/benchsuite/run_benchmark.sh
@@ -0,0 +1,119 @@
+#!/usr/bin/env bash
+# Driver for the split BigMath-vs-GMP benchmark.
+#
+#   ./run_benchmark.sh [profile] [--force-dataset] [--force-gmp]
+#
+#   profile = quick | default | full   (default: "default")
+#
+# It runs only the stages that are stale:
+#   1. dataset   — generated + zipped once per profile (skipped if the zip or an
+#                  extracted copy already exists)
+#   2. GMP run   — run once per machine per profile (skipped if its result CSV
+#                  exists), since GMP timings don't change between BigMath edits
+#   3. BigMath   — always run; saved under an incrementing run id and the
+#                  flagged rows appended to HISTORY.md
+#
+# Cache lives in $BENCH_DIR (default <repo>/.benchcache); HISTORY.md is committed.
+set -euo pipefail
+
+# ── locations ────────────────────────────────────────────────────────────────
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+REPO="$(cd "$SCRIPT_DIR/../../.." && pwd)"
+PROFILE="default"
+FORCE_DATASET=0
+FORCE_GMP=0
+for arg in "$@"; do
+    case "$arg" in
+        quick|default|full) PROFILE="$arg" ;;
+        --force-dataset)    FORCE_DATASET=1 ;;
+        --force-gmp)        FORCE_GMP=1 ;;
+        *) echo "unknown arg: $arg" >&2; exit 2 ;;
+    esac
+done
+
+BENCH_DIR="${BENCH_DIR:-$REPO/.benchcache}"
+BIN_DIR="$BENCH_DIR/bin"
+DATASET_ROOT="$BENCH_DIR/dataset"
+DATA_DIR="$DATASET_ROOT/$PROFILE"
+ZIP="$BENCH_DIR/dataset_$PROFILE.zip"
+RESULTS_DIR="$BENCH_DIR/results"
+HISTORY_MD="$SCRIPT_DIR/HISTORY.md"
+mkdir -p "$BIN_DIR" "$DATASET_ROOT" "$RESULTS_DIR"
+
+# ── machine identity ─────────────────────────────────────────────────────────
+if [[ "$(uname)" == "Darwin" ]]; then
+    CPU="$(sysctl -n machdep.cpu.brand_string 2>/dev/null || echo unknown)"
+else
+    CPU="$(grep -m1 'model name' /proc/cpuinfo 2>/dev/null | cut -d: -f2 || echo unknown)"
+fi
+MACHINE="$(printf '%s_%s' "$CPU" "$(uname -m)" | tr -cs 'A-Za-z0-9' '_' | sed 's/_*$//')"
+echo "[driver] machine=$MACHINE profile=$PROFILE bench_dir=$BENCH_DIR"
+
+# ── compiler setup ───────────────────────────────────────────────────────────
+CXX="${CXX:-c++}"
+CXXFLAGS="-std=c++20 -O3 -march=native -DNDEBUG -I$REPO/include -I$SCRIPT_DIR"
+GMP_PREFIX=""
+for p in /opt/homebrew /usr/local; do
+    [[ -f "$p/include/gmp.h" ]] && GMP_PREFIX="$p" && break
+done
+GMP_FLAGS="-lgmp"
+[[ -n "$GMP_PREFIX" ]] && GMP_FLAGS="-I$GMP_PREFIX/include -L$GMP_PREFIX/lib -lgmp"
+
+# The library is not header-only: Parse/ToString/operator*//, BigDecimal, etc.
+# are defined under src/ and bigdecimal/. run_bigmath must compile + link them
+# (gen_dataset and run_gmp reference no BigMath symbols, so they don't need this).
+LIB_SRCS="$(find "$REPO/src" -name '*.cpp'; echo "$REPO/bigdecimal/BigDecimal.cpp")"
+LIB_SRCS="$(echo $LIB_SRCS)"  # collapse newlines to spaces
+
+build() { # <out> <src> [extra-flags-string]
+    local out="$BIN_DIR/$1"; local src="$SCRIPT_DIR/$2"; local extra="${3:-}"
+    if [[ ! -x "$out" || "$src" -nt "$out" || "$SCRIPT_DIR/bench_common.h" -nt "$out" ]]; then
+        echo "[build] $1"
+        # $extra intentionally unquoted so multiple flags word-split.
+        $CXX $CXXFLAGS "$src" -o "$out" $extra
+    fi
+}
+
+# ── stage 1: dataset ─────────────────────────────────────────────────────────
+if [[ $FORCE_DATASET -eq 1 ]]; then rm -rf "$DATA_DIR" "$ZIP"; fi
+if [[ ! -f "$DATA_DIR/manifest.csv" ]]; then
+    if [[ -f "$ZIP" ]]; then
+        echo "[dataset] extracting cached $ZIP"
+        unzip -q -o "$ZIP" -d "$DATASET_ROOT"
+    else
+        echo "[dataset] generating ($PROFILE)"
+        build gen_dataset gen_dataset.cpp
+        mkdir -p "$DATA_DIR"
+        "$BIN_DIR/gen_dataset" "$DATA_DIR" "$PROFILE"
+        echo "[dataset] zipping -> $ZIP"
+        ( cd "$DATASET_ROOT" && zip -q -r -1 "$ZIP" "$PROFILE" )
+    fi
+else
+    echo "[dataset] using existing $DATA_DIR"
+fi
+
+# ── stage 2: GMP reference (per machine) ─────────────────────────────────────
+GMP_CSV="$RESULTS_DIR/gmp_${MACHINE}_${PROFILE}.csv"
+if [[ $FORCE_GMP -eq 1 ]]; then rm -f "$GMP_CSV"; fi
+if [[ ! -f "$GMP_CSV" ]]; then
+    echo "[gmp] running (no cached result for this machine/profile)"
+    build run_gmp run_gmp.cpp "$GMP_FLAGS"
+    "$BIN_DIR/run_gmp" "$DATA_DIR" "$GMP_CSV" "$MACHINE"
+else
+    echo "[gmp] using cached $GMP_CSV"
+fi
+
+# ── stage 3: BigMath (always; incrementing run id) ───────────────────────────
+RUN_ID=1
+shopt -s nullglob
+for f in "$RESULTS_DIR"/bigmath_${MACHINE}_${PROFILE}_*.csv; do
+    n="${f##*_}"; n="${n%.csv}"
+    [[ "$n" =~ ^[0-9]+$ ]] && (( n >= RUN_ID )) && RUN_ID=$((n + 1))
+done
+shopt -u nullglob
+OUT_CSV="$RESULTS_DIR/bigmath_${MACHINE}_${PROFILE}_${RUN_ID}.csv"
+echo "[bigmath] run_id=$RUN_ID -> $OUT_CSV"
+build run_bigmath run_bigmath.cpp "$LIB_SRCS"
+"$BIN_DIR/run_bigmath" "$DATA_DIR" "$GMP_CSV" "$OUT_CSV" "$RUN_ID" "$MACHINE" "$PROFILE" "$HISTORY_MD"
+
+echo "[driver] done. results: $OUT_CSV   history: $HISTORY_MD"
diff --git a/tests/performance/benchsuite/run_bigmath.cpp b/tests/performance/benchsuite/run_bigmath.cpp
new file mode 100644
index 0000000..0504669
--- /dev/null
+++ b/tests/performance/benchsuite/run_bigmath.cpp
@@ -0,0 +1,203 @@
+// Stage 3: BigMath benchmark run.
+//
+//   run_bigmath <dataset_dir> <gmp_csv> <out_csv> <run_id> <machine> <profile> <history_md>
+//
+// Times BigMath on the dataset, verifies the exact-integer ops against the
+// cached GMP hashes, and reports BigMath/GMP ratios. Writes a per-run CSV
+// (incrementing run id) and appends the entries flagged `historical` in the
+// manifest to a long-lived comparison table.
+
+#include <cstdio>
+#include <fstream>
+#include <string>
+#include <vector>
+
+#include "bench_common.h"
+
+#include "biginteger/BigInteger.h"
+#include "biginteger/ops/Operations.h"
+#include "biginteger/common/Builder.h"
+#include "biginteger/common/Parser.h"
+#include "bigdecimal/BigDecimal.h"
+
+using namespace bench;
+using namespace BigMath;
+
+// FNV-1a over the BigInteger value's little-endian 32-bit word stream, matching
+// HashMpz (mpz_export, size=4, order=-1, endian=-1). BigMath limbs are base-2^32
+// or base-2^64 depending on build (Base2_64 == 0 sentinel = full 64-bit limbs):
+// emit one 32-bit word per limb in base-2^32, two (low then high) in base-2^64,
+// then drop trailing zero words so the stream matches GMP's trimmed export.
+static std::string HashBigInteger(const BigInteger &v) {
+    const std::vector<DataT> &limbs = v.GetInteger();
+    const bool limb64 = (BigInteger::Base() == Base2_64);
+    std::vector<uint32_t> words;
+    words.reserve(limbs.size() * (limb64 ? 2 : 1));
+    for (DataT limb : limbs) {
+        words.push_back((uint32_t)limb);
+        if (limb64) words.push_back((uint32_t)(limb >> 32));
+    }
+    while (!words.empty() && words.back() == 0) words.pop_back();
+    Fnv64 h;
+    for (uint32_t w : words) h.Word32(w);
+    char out[20];
+    snprintf(out, sizeof(out), "%016llx", (unsigned long long)h.h);
+    return out;
+}
+
+struct Row {
+    Entry e;
+    double bm_ms = 0;
+    double gmp_ms = 0;
+    std::string correct = "n/a"; // ok / FAIL / n/a
+};
+
+int main(int argc, char **argv) {
+    if (argc < 8) {
+        fprintf(stderr, "usage: %s <dataset_dir> <gmp_csv> <out_csv> <run_id> "
+                        "<machine> <profile> <history_md>\n", argv[0]);
+        return 2;
+    }
+    std::string dir = argv[1], gmp_csv = argv[2], out_csv = argv[3];
+    int run_id = std::stoi(argv[4]);
+    std::string machine = argv[5], profile = argv[6], history_md = argv[7];
+
+    std::vector<Entry> entries = ReadManifest(dir);
+    std::vector<GmpResult> gmp = ReadGmpResults(gmp_csv);
+    if (gmp.size() != entries.size()) {
+        fprintf(stderr, "[bigmath] gmp results (%zu) != manifest entries (%zu); "
+                        "regenerate the GMP run for this dataset\n",
+                gmp.size(), entries.size());
+        return 1;
+    }
+
+    printf("%-9s %-22s %14s %14s %10s %8s\n",
+           "op", "size", "BigMath ms", "GMP ms", "BM/GMP", "check");
+    printf("%-9s %-22s %14s %14s %10s %8s\n",
+           "----", "----", "----------", "------", "------", "-----");
+    fflush(stdout);
+
+    std::vector<Row> rows(entries.size());
+    int failures = 0;
+
+    for (size_t i = 0; i < entries.size(); ++i) {
+        const Entry &e = entries[i];
+        Row &row = rows[i];
+        row.e = e;
+        row.gmp_ms = gmp[i].gmp_ms;
+        fprintf(stderr, "  e%-4d %-9s %-22s ...\n", e.id, OpName(e.op), e.Label().c_str());
+
+        switch (e.op) {
+            case Op::Mul: {
+                BigInteger a = Parse(LoadOperand(dir, e.id, 0).c_str());
+                BigInteger b = Parse(LoadOperand(dir, e.id, 1).c_str());
+                BigInteger c;
+                row.bm_ms = BestMs([&] { c = a * b; if (c.GetInteger().empty()) abort(); }, e.reps);
+                row.correct = (HashBigInteger(c) == gmp[i].hash) ? "ok" : "FAIL";
+                break;
+            }
+            case Op::Div: {
+                BigInteger a = Parse(LoadOperand(dir, e.id, 0).c_str());
+                BigInteger b = Parse(LoadOperand(dir, e.id, 1).c_str());
+                BigInteger q;
+                row.bm_ms = BestMs([&] { q = a / b; if (q.GetInteger().empty()) abort(); }, e.reps);
+                row.correct = (HashBigInteger(q) == gmp[i].hash) ? "ok" : "FAIL";
+                break;
+            }
+            case Op::Parse: {
+                std::string sa = LoadOperand(dir, e.id, 0);
+                row.bm_ms = BestMs([&] {
+                    BigInteger b = Parse(sa.c_str()); if (b.GetInteger().empty()) abort();
+                }, e.reps);
+                break;
+            }
+            case Op::ToStr: {
+                BigInteger b = Parse(LoadOperand(dir, e.id, 0).c_str());
+                row.bm_ms = BestMs([&] {
+                    std::string o = ToString(b); if (o.empty()) abort();
+                }, e.reps);
+                break;
+            }
+            case Op::DecAdd: case Op::DecMul: case Op::DecDiv: {
+                BigDecimal a = BigDecimal::FromString(LoadOperand(dir, e.id, 0));
+                BigDecimal b = BigDecimal::FromString(LoadOperand(dir, e.id, 1));
+                BigDecimal c;
+                if (e.op == Op::DecAdd)
+                    row.bm_ms = BestMs([&] { c = a + b; }, e.reps);
+                else if (e.op == Op::DecMul)
+                    row.bm_ms = BestMs([&] { c = a * b; }, e.reps);
+                else
+                    row.bm_ms = BestMs([&] { c = a.Divide(b, e.scale, RoundingMode::HALF_EVEN); }, e.reps);
+                if (c.Scale() < -1000000) abort();
+                break;
+            }
+            case Op::DecParse: {
+                std::string sa = LoadOperand(dir, e.id, 0);
+                row.bm_ms = BestMs([&] {
+                    BigDecimal b = BigDecimal::FromString(sa); if (b.Scale() < -1000000) abort();
+                }, e.reps);
+                break;
+            }
+            case Op::DecToStr: {
+                BigDecimal b = BigDecimal::FromString(LoadOperand(dir, e.id, 0));
+                row.bm_ms = BestMs([&] {
+                    std::string o = b.ToPlainString(); if (o.empty()) abort();
+                }, e.reps);
+                break;
+            }
+        }
+        if (row.correct == "FAIL") ++failures;
+
+        double ratio = row.gmp_ms > 0 ? row.bm_ms / row.gmp_ms : 0.0;
+        printf("%-9s %-22s %14.3f %14.3f %9.2fx %8s\n",
+               OpName(e.op), e.Label().c_str(), row.bm_ms, row.gmp_ms, ratio,
+               row.correct.c_str());
+        fflush(stdout);
+    }
+
+    // ── per-run CSV ──────────────────────────────────────────────────────────
+    {
+        std::ofstream out(out_csv);
+        out << "# run_id=" << run_id << " machine=" << machine
+            << " profile=" << profile << "\n";
+        out << "id,op,label,reps,bigmath_ms,gmp_ms,ratio,correct\n";
+        for (const Row &r : rows) {
+            double ratio = r.gmp_ms > 0 ? r.bm_ms / r.gmp_ms : 0.0;
+            out << r.e.id << ',' << OpName(r.e.op) << ',' << r.e.Label() << ','
+                << r.e.reps << ',' << r.bm_ms << ',' << r.gmp_ms << ','
+                << ratio << ',' << r.correct << '\n';
+        }
+        fprintf(stderr, "[bigmath] wrote %s\n", out_csv.c_str());
+    }
+
+    // ── append flagged rows to the long-lived history table ──────────────────
+    {
+        bool fresh = !std::ifstream(history_md).good();
+        std::ofstream hist(history_md, std::ios::app);
+        if (fresh) {
+            hist << "# BigMath vs GMP — historical comparison\n\n"
+                 << "Selected large-size results, one block per run. Lower ratio is better "
+                 << "(BigMath ms / GMP ms).\n";
+        }
+        hist << "\n## run " << run_id << " — machine=" << machine
+             << " profile=" << profile << "\n\n"
+             << "| op | size | BigMath ms | GMP ms | BM/GMP | check |\n"
+             << "|----|------|-----------:|-------:|-------:|:-----:|\n";
+        for (const Row &r : rows) {
+            if (!r.e.historical) continue;
+            double ratio = r.gmp_ms > 0 ? r.bm_ms / r.gmp_ms : 0.0;
+            char line[256];
+            snprintf(line, sizeof(line), "| %s | %s | %.3f | %.3f | %.2fx | %s |\n",
+                     OpName(r.e.op), r.e.Label().c_str(), r.bm_ms, r.gmp_ms, ratio,
+                     r.correct.c_str());
+            hist << line;
+        }
+        fprintf(stderr, "[bigmath] appended history -> %s\n", history_md.c_str());
+    }
+
+    if (failures) {
+        fprintf(stderr, "[bigmath] %d correctness FAILURE(s) vs GMP\n", failures);
+        return 1;
+    }
+    return 0;
+}
diff --git a/tests/performance/benchsuite/run_gmp.cpp b/tests/performance/benchsuite/run_gmp.cpp
new file mode 100644
index 0000000..08c8da1
--- /dev/null
+++ b/tests/performance/benchsuite/run_gmp.cpp
@@ -0,0 +1,136 @@
+// Stage 2: GMP reference run (cached per machine).
+//
+//   run_gmp <dataset_dir> <out_csv> ["machine label"]
+//
+// Times GMP on every manifest entry and records, for the exact-integer ops, a
+// base-independent value hash so BigMath can be checked against it later. The
+// result CSV is keyed to this machine by the driver, so GMP only runs once per
+// machine per dataset.
+
+#include <cstdio>
+#include <cstdlib>
+#include <string>
+#include <vector>
+
+#include <gmp.h>
+
+#include "bench_common.h"
+
+using namespace bench;
+
+// FNV-1a over the integer's LE 32-bit word stream, via mpz_export. Matches the
+// BigMath limb hash bit-for-bit.
+static std::string HashMpz(const mpz_t z) {
+    Fnv64 h;
+    size_t count = 0;
+    // order=-1 (LS word first), size=4, endian=-1 (LE within word), nails=0.
+    void *buf = mpz_export(nullptr, &count, -1, 4, -1, 0, z);
+    if (buf) {
+        h.Bytes((const uint8_t *)buf, count * 4);
+        free(buf);
+    }
+    char out[20];
+    snprintf(out, sizeof(out), "%016llx", (unsigned long long)h.h);
+    return out;
+}
+
+static unsigned long PrecBits(int digits) {
+    return (unsigned long)(digits * 3.322) + 64;
+}
+
+int main(int argc, char **argv) {
+    if (argc < 3) {
+        fprintf(stderr, "usage: %s <dataset_dir> <out_csv> [machine]\n", argv[0]);
+        return 2;
+    }
+    std::string dir = argv[1];
+    std::string out_csv = argv[2];
+    std::string machine = argc > 3 ? argv[3] : "unknown";
+
+    std::vector<Entry> entries = ReadManifest(dir);
+    std::vector<GmpResult> results(entries.size());
+
+    fprintf(stderr, "[gmp] %s, %zu entries\n", gmp_version, entries.size());
+
+    for (size_t i = 0; i < entries.size(); ++i) {
+        const Entry &e = entries[i];
+        GmpResult &r = results[i];
+        r.hash = "-";
+        fprintf(stderr, "  e%-4d %-9s %-22s ...\n", e.id, OpName(e.op), e.Label().c_str());
+
+        switch (e.op) {
+            case Op::Mul: {
+                std::string sa = LoadOperand(dir, e.id, 0), sb = LoadOperand(dir, e.id, 1);
+                mpz_t a, b, c; mpz_init_set_str(a, sa.c_str(), 10);
+                mpz_init_set_str(b, sb.c_str(), 10); mpz_init(c);
+                r.gmp_ms = BestMs([&] { mpz_mul(c, a, b); }, e.reps);
+                r.hash = HashMpz(c);
+                mpz_clear(a); mpz_clear(b); mpz_clear(c);
+                break;
+            }
+            case Op::Div: {
+                std::string sa = LoadOperand(dir, e.id, 0), sb = LoadOperand(dir, e.id, 1);
+                mpz_t a, b, q, rem; mpz_init_set_str(a, sa.c_str(), 10);
+                mpz_init_set_str(b, sb.c_str(), 10); mpz_init(q); mpz_init(rem);
+                r.gmp_ms = BestMs([&] { mpz_tdiv_qr(q, rem, a, b); }, e.reps);
+                r.hash = HashMpz(q);
+                mpz_clear(a); mpz_clear(b); mpz_clear(q); mpz_clear(rem);
+                break;
+            }
+            case Op::Parse: {
+                std::string sa = LoadOperand(dir, e.id, 0);
+                r.gmp_ms = BestMs([&] {
+                    mpz_t g; mpz_init(g); mpz_set_str(g, sa.c_str(), 10); mpz_clear(g);
+                }, e.reps);
+                break;
+            }
+            case Op::ToStr: {
+                std::string sa = LoadOperand(dir, e.id, 0);
+                mpz_t g; mpz_init_set_str(g, sa.c_str(), 10);
+                r.gmp_ms = BestMs([&] {
+                    char *o = mpz_get_str(nullptr, 10, g); if (!o) abort(); free(o);
+                }, e.reps);
+                mpz_clear(g);
+                break;
+            }
+            case Op::DecAdd: case Op::DecMul: case Op::DecDiv: {
+                std::string sa = LoadOperand(dir, e.id, 0), sb = LoadOperand(dir, e.id, 1);
+                unsigned long pb = PrecBits(e.a_digits + e.b_digits + e.scale);
+                mpf_t a, b, c; mpf_init2(a, pb); mpf_init2(b, pb); mpf_init2(c, pb);
+                mpf_set_str(a, sa.c_str(), 10); mpf_set_str(b, sb.c_str(), 10);
+                if (e.op == Op::DecAdd)
+                    r.gmp_ms = BestMs([&] { mpf_add(c, a, b); }, e.reps);
+                else if (e.op == Op::DecMul)
+                    r.gmp_ms = BestMs([&] { mpf_mul(c, a, b); }, e.reps);
+                else
+                    r.gmp_ms = BestMs([&] { mpf_div(c, a, b); }, e.reps);
+                mpf_clear(a); mpf_clear(b); mpf_clear(c);
+                break;
+            }
+            case Op::DecParse: {
+                std::string sa = LoadOperand(dir, e.id, 0);
+                unsigned long pb = PrecBits(e.a_digits);
+                r.gmp_ms = BestMs([&] {
+                    mpf_t g; mpf_init2(g, pb); mpf_set_str(g, sa.c_str(), 10); mpf_clear(g);
+                }, e.reps);
+                break;
+            }
+            case Op::DecToStr: {
+                std::string sa = LoadOperand(dir, e.id, 0);
+                unsigned long pb = PrecBits(e.a_digits);
+                mpf_t g; mpf_init2(g, pb); mpf_set_str(g, sa.c_str(), 10);
+                r.gmp_ms = BestMs([&] {
+                    mp_exp_t exp; char *o = mpf_get_str(nullptr, &exp, 10, 0, g);
+                    if (!o) abort(); free(o);
+                }, e.reps);
+                mpf_clear(g);
+                break;
+            }
+        }
+    }
+
+    std::string comment = "GMP " + std::string(gmp_version) + " | machine=" + machine;
+    WriteGmpResults(out_csv, entries, results, comment);
+    fprintf(stderr, "[gmp] wrote %s\n", out_csv.c_str());
+    return 0;
+}
diff --git a/tests/performance/benchsuite/workload.h b/tests/performance/benchsuite/workload.h
new file mode 100644
index 0000000..842d97c
--- /dev/null
+++ b/tests/performance/benchsuite/workload.h
@@ -0,0 +1,130 @@
+// The benchmark workload tables. Only gen_dataset includes this; once a dataset
+// is materialized the manifest is the source of truth for the run stages.
+//
+// Three profiles trade dataset size / wall time for coverage:
+//   quick   — seconds, tiny dataset; for testing the pipeline itself
+//   default — minutes, modest dataset; the everyday regression set
+//   full    — the historical 200M-digit sweep from bench_vs_gmp.cpp (gigabytes)
+
+#ifndef BIGMATH_BENCH_WORKLOAD_H
+#define BIGMATH_BENCH_WORKLOAD_H
+
+#include <string>
+#include <vector>
+
+#include "bench_common.h"
+
+namespace bench {
+
+inline int RepsFor(Op op, int a_digits, int b_digits) {
+    int d = (op == Op::DecAdd || op == Op::DecMul || op == Op::DecDiv ||
+             op == Op::DecParse || op == Op::DecToStr)
+                ? a_digits + b_digits
+                : std::max(a_digits, b_digits);
+    int reps = IterationsForDigits(d);
+    if ((op == Op::ToStr || op == Op::DecToStr) && d >= 100000) reps = 1;
+    return reps;
+}
+
+struct Tables {
+    std::vector<std::pair<int, int>> mul_bal, mul_skew, div_bal, div_skew;
+    std::vector<int> parse, tostr;
+    std::vector<std::pair<int, int>> dec_simple; // (int,frac) for add/mul/div
+    std::vector<std::tuple<int, int, int>> dec_div_scales; // (int,frac,scale)
+    std::vector<int> dec_parse, dec_tostr;
+    int hist_mul_min = 0;  // mark balanced mul >= this as historical
+    int hist_div_min = 0;  // mark skewed   div >= this as historical
+};
+
+inline Tables TablesFor(const std::string &profile) {
+    Tables t;
+    if (profile == "quick") {
+        t.mul_bal  = {{1000,1000},{10000,10000}};
+        t.mul_skew = {{10000,1000}};
+        t.div_bal  = {{1000,1000},{10000,10000}};
+        t.div_skew = {{40000,10000}};
+        t.parse    = {1000,10000};
+        t.tostr    = {1000,10000};
+        t.dec_simple    = {{100,10},{1000,100}};
+        t.dec_div_scales = {{2000,200,0},{2000,200,100}};
+        t.dec_parse = {100,1000};
+        t.dec_tostr = {100,1000};
+        t.hist_mul_min = 10000;
+        t.hist_div_min = 40000;
+    } else if (profile == "full") {
+        for (int d : {1000,5000,10000,50000,100000,500000,1000000,2000000,
+                      5000000,10000000,20000000,50000000,100000000,200000000})
+            t.mul_bal.push_back({d, d});
+        t.mul_skew = {{100000,10000},{500000,50000},{1000000,100000},
+                      {2000000,200000},{5000000,500000},{10000000,1000000},
+                      {20000000,2000000},{50000000,5000000},{100000000,10000000},
+                      {200000000,20000000}};
+        for (int d : {1000,5000,10000,50000,100000,500000,1000000,5000000})
+            t.div_bal.push_back({d, d});
+        t.div_skew = {{40000,10000},{100000,10000},{200000,50000},{500000,100000},
+                      {1000000,200000},{2000000,500000},{5000000,1000000},
+                      {10000000,2000000},{20000000,4000000},{50000000,10000000},
+                      {100000000,20000000},{200000000,40000000}};
+        for (int d : {1000,10000,50000,100000,500000,1000000,2000000,5000000,
+                      10000000,20000000,50000000})
+            t.parse.push_back(d);
+        for (int d : {1000,10000,50000,100000,200000,500000,1000000,2000000,
+                      5000000,10000000,20000000})
+            t.tostr.push_back(d);
+        for (int d : {100,1000,5000,20000}) t.dec_simple.push_back({d, d/10});
+        for (int s : {0,10,100,1000,5000}) t.dec_div_scales.push_back({2000,200,s});
+        t.dec_parse = {100,1000,10000,50000};
+        t.dec_tostr = {100,1000,10000,50000};
+        t.hist_mul_min = 10000000;
+        t.hist_div_min = 10000000;
+    } else { // default
+        for (int d : {1000,10000,100000,1000000}) t.mul_bal.push_back({d, d});
+        t.mul_skew = {{100000,10000},{1000000,100000}};
+        for (int d : {1000,10000,100000,1000000}) t.div_bal.push_back({d, d});
+        t.div_skew = {{100000,10000},{1000000,200000}};
+        t.parse = {1000,10000,100000,1000000};
+        t.tostr = {1000,10000,100000,1000000};
+        for (int d : {100,1000,5000,20000}) t.dec_simple.push_back({d, d/10});
+        for (int s : {0,100,1000}) t.dec_div_scales.push_back({2000,200,s});
+        t.dec_parse = {100,1000,10000};
+        t.dec_tostr = {100,1000,10000};
+        t.hist_mul_min = 1000000;
+        t.hist_div_min = 1000000;
+    }
+    return t;
+}
+
+inline std::vector<Entry> BuildWorkload(const std::string &profile) {
+    Tables t = TablesFor(profile);
+    std::vector<Entry> out;
+    int id = 0;
+    auto add = [&](Op op, int a, int b, int scale, int hist) {
+        Entry e;
+        e.id = id++;
+        e.op = op;
+        e.a_digits = a;
+        e.b_digits = b;
+        e.scale = scale;
+        e.reps = RepsFor(op, a, b);
+        e.historical = hist;
+        out.push_back(e);
+    };
+
+    for (auto [a, b] : t.mul_bal)  add(Op::Mul, a, b, 0, a >= t.hist_mul_min ? 1 : 0);
+    for (auto [a, b] : t.mul_skew) add(Op::Mul, a, b, 0, 0);
+    for (auto [a, b] : t.div_bal)  add(Op::Div, a, b, 0, 0);
+    for (auto [a, b] : t.div_skew) add(Op::Div, a, b, 0, a >= t.hist_div_min ? 1 : 0);
+    for (int d : t.parse) add(Op::Parse, d, 0, 0, 0);
+    for (int d : t.tostr) add(Op::ToStr, d, 0, 0, 0);
+    for (auto [a, b] : t.dec_simple) { add(Op::DecAdd, a, b, 0, 0); add(Op::DecMul, a, b, 0, 0); }
+    for (auto [a, b] : t.dec_simple) add(Op::DecDiv, a, b, b, 0);
+    for (auto [a, b, s] : t.dec_div_scales) add(Op::DecDiv, a, b, s, 0);
+    for (int d : t.dec_parse) add(Op::DecParse, d, 0, 0, 0);
+    for (int d : t.dec_tostr) add(Op::DecToStr, d, 0, 0, 0);
+
+    return out;
+}
+
+} // namespace bench
+
+#endif // BIGMATH_BENCH_WORKLOAD_H