Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
55 changes: 51 additions & 4 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,20 @@ name: build

on:
push:
branches: [master]
branches: [main]
pull_request:
branches: [master]
branches: [main]
workflow_dispatch:

# Builds and tests on Linux, macOS and Windows, each with a platform-specific
# optimization baseline. Release builds pick up the committed per-platform
# tuning profile (include/biginteger/build/platform/<os>-<arch>-<compiler>.h)
# automatically via PlatformConfig.h. The library needs a GCC/Clang toolchain
# (__int128, __builtin_*_overflow), so the Windows job uses MSYS2 + Clang, not
# MSVC.

jobs:
build:
unix:
name: ${{ matrix.os }} / ${{ matrix.cc }}
runs-on: ${{ matrix.os }}
timeout-minutes: 30
Expand Down Expand Up @@ -57,5 +65,44 @@ jobs:
run: ctest --output-on-failure --timeout 600

- name: Smoke-test calculator
run: echo '2^256 - 1' | ./build/calculator

windows:
name: windows-latest / clang (msys2)
runs-on: windows-latest
timeout-minutes: 30
defaults:
run:
shell: msys2 {0}
steps:
- name: Checkout
uses: actions/checkout@v4

- name: Set up MSYS2 + Clang
uses: msys2/setup-msys2@v2
with:
msystem: UCRT64
update: true
install: >-
mingw-w64-ucrt-x86_64-clang
mingw-w64-ucrt-x86_64-cmake
mingw-w64-ucrt-x86_64-ninja

- name: Configure
env:
CC: clang
CXX: clang++
run: |
echo '2^256 - 1' | ./build/calculator
cmake -S . -B build -G Ninja \
-DCMAKE_BUILD_TYPE=Release \
"-DCMAKE_CXX_FLAGS_RELEASE=-O3 -march=x86-64-v3 -DNDEBUG"

- name: Build
run: cmake --build build -j

- name: Test
working-directory: build
run: ctest --output-on-failure --timeout 600

- name: Smoke-test calculator
run: echo '2^256 - 1' | ./build/calculator.exe
143 changes: 143 additions & 0 deletions .github/workflows/tune.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,143 @@
name: tune

# Per-platform dispatch-threshold tuning.
#
# Manual trigger only. For each OS/arch/compiler it builds and runs
# tests/performance/dispatch_tuner (Release, -march=native), which emits a
# profile header into include/biginteger/build/platform/<key>.h. The profiles
# are collected and a single PR is opened for review.
#
# CAVEAT: GitHub-hosted runners are shared VMs whose host CPU varies between
# runs, and the build uses -march=native. Values produced here reflect *that
# runner's* CPU, not a canonical chip — treat the PR as a baseline and prefer
# tuning on real target hardware for values you depend on.

on:
workflow_dispatch:
inputs:
mode:
description: "Tuner mode (full = wider size sweep, slower)"
type: choice
default: full
options: [full, quick]

permissions:
contents: write
pull-requests: write

jobs:
tune-unix:
name: tune ${{ matrix.key }}
runs-on: ${{ matrix.os }}
timeout-minutes: 60
strategy:
fail-fast: false
matrix:
include:
- os: ubuntu-latest
key: linux-x86_64-gcc
cc: gcc
cxx: g++
- os: ubuntu-latest
key: linux-x86_64-clang
cc: clang
cxx: clang++
- os: macos-latest
key: macos-arm64-clang
cc: clang
cxx: clang++
steps:
- uses: actions/checkout@v4

- name: Build tuner
env:
CC: ${{ matrix.cc }}
CXX: ${{ matrix.cxx }}
run: |
cmake -S . -B build -DCMAKE_BUILD_TYPE=Release
cmake --build build --target dispatch_tuner -j

- name: Run tuner
run: |
MODE_FLAG=""
[ "${{ inputs.mode }}" = "full" ] && MODE_FLAG="--full"
./build/dispatch_tuner $MODE_FLAG --emit-header \
"include/biginteger/build/platform/${{ matrix.key }}.h"

- uses: actions/upload-artifact@v4
with:
name: profile-${{ matrix.key }}
path: include/biginteger/build/platform/${{ matrix.key }}.h
if-no-files-found: error

tune-windows:
name: tune windows-x86_64-clang
runs-on: windows-latest
timeout-minutes: 60
defaults:
run:
shell: msys2 {0}
steps:
- uses: actions/checkout@v4

- uses: msys2/setup-msys2@v2
with:
msystem: UCRT64
update: true
install: >-
mingw-w64-ucrt-x86_64-clang
mingw-w64-ucrt-x86_64-cmake
mingw-w64-ucrt-x86_64-ninja

- name: Build tuner
env:
CC: clang
CXX: clang++
run: |
cmake -S . -B build -G Ninja -DCMAKE_BUILD_TYPE=Release
cmake --build build --target dispatch_tuner -j

- name: Run tuner
run: |
MODE_FLAG=""
[ "${{ inputs.mode }}" = "full" ] && MODE_FLAG="--full"
./build/dispatch_tuner.exe $MODE_FLAG --emit-header \
"include/biginteger/build/platform/windows-x86_64-clang.h"

- uses: actions/upload-artifact@v4
with:
name: profile-windows-x86_64-clang
path: include/biginteger/build/platform/windows-x86_64-clang.h
if-no-files-found: error

open-pr:
name: Collect profiles and open PR
needs: [tune-unix, tune-windows]
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4

- name: Download tuned profiles
uses: actions/download-artifact@v4
with:
pattern: profile-*
merge-multiple: true
path: include/biginteger/build/platform

- name: Open pull request
uses: peter-evans/create-pull-request@v6
with:
branch: auto/platform-tuning
title: "Refresh platform tuning profiles"
commit-message: "Regenerate per-platform dispatch threshold profiles"
body: |
Auto-generated by the **tune** workflow (mode: `${{ inputs.mode }}`).

Each `include/biginteger/build/platform/<os>-<arch>-<compiler>.h` was
produced by `dispatch_tuner` on its respective CI runner.

⚠️ CI runners are shared VMs with varying host CPUs and the build
uses `-march=native`. Review the deltas before merging; for values
you depend on, regenerate on real target hardware.
add-paths: include/biginteger/build/platform/*.h
delete-branch: true
5 changes: 5 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -8,3 +8,8 @@ build/
*.gz.*
.antigravitycli/
.benchcache/

# Track build-config headers + platform tuning profiles (the broad build
# rule above also matches include/biginteger/build/).
!include/biginteger/build/
!include/biginteger/build/**
8 changes: 6 additions & 2 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -106,8 +106,12 @@ if(BIGMATH_BUILD_TESTS)
add_executable(divperf_simple tests/divperf_simple.cpp)
target_link_libraries(divperf_simple PRIVATE bigmath::bigmath)

add_executable(regression_bench tests/performance/regression_bench.cpp)
target_link_libraries(regression_bench PRIVATE bigmath::bigmath)
# regression_bench.cpp is an ad-hoc, uncommitted scratch bench; only wire it
# up when it's actually present so a clean checkout (CI) still configures.
if(EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/tests/performance/regression_bench.cpp")
add_executable(regression_bench tests/performance/regression_bench.cpp)
target_link_libraries(regression_bench PRIVATE bigmath::bigmath)
endif()

# BigDecimal performance bench
add_executable(bigdecimal_perf tests/bigdecimal_perf.cpp)
Expand Down
65 changes: 65 additions & 0 deletions docs/PLATFORM_TUNING.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
# Platform tuning

Dispatch thresholds (when multiplication switches Classic → Karatsuba → Toom →
NTT, when division switches Fast → Burnikel-Ziegler → Newton, etc.) have
crossover points that depend on the CPU and compiler. BigMath ships portable
defaults and lets each platform override them with a tuned profile.

## How resolution works

`common/Constants.h` includes, in order (first definition wins; every threshold
macro is `#ifndef`-guarded):

1. command-line `-D` overrides
2. `build/PlatformConfig.h` → the per-platform tuned profile, if one exists
3. `build/DispatchThresholds.h` → generic portable defaults

`PlatformConfig.h` auto-selects a profile with `__has_include`, keyed on the host
OS / arch / compiler:

```
include/biginteger/build/platform/<os>-<arch>-<compiler>.h
```

e.g. `linux-x86_64-gcc.h`, `macos-arm64-clang.h`, `windows-x86_64-clang.h`. If
the matching file isn't committed, the defaults apply unchanged — no profile is
required to build. A profile only `#define`s the macros it tuned, so it overrides
individual defaults without disabling the fallback for the rest.

Escape hatches: `-DBIGMATH_PLATFORM_OVERRIDE='"path/to/profile.h"'` forces a
specific profile; `-DBIGMATH_PLATFORM_NONE` disables auto-selection.

## Generating a profile

### On real hardware (recommended)

```sh
cmake -S . -B build -DCMAKE_BUILD_TYPE=Release # adds -march=native
cmake --build build --target dispatch_tuner -j
./build/dispatch_tuner --full --emit-header \
include/biginteger/build/platform/<os>-<arch>-<compiler>.h
```

`--full` runs a wider size sweep (slower, more accurate). Drop it for a quick
pass. Commit the emitted file; `PlatformConfig.h` finds it automatically on the
next build.

### Via CI

The **tune** workflow (`.github/workflows/tune.yml`, manual `workflow_dispatch`)
runs the tuner across the CI matrix (Linux gcc/clang, macOS clang, Windows
clang) and opens a single PR with the regenerated profiles.

> ⚠️ **CI values are not canonical.** GitHub-hosted runners are shared VMs whose
> host CPU varies between runs, and the build uses `-march=native`. A profile
> generated on CI reflects *that runner's* CPU. Use the workflow for convenience
> and as a baseline; for values you rely on, tune on the deployment hardware.

## Adding a new platform

1. Add a matrix entry (and, if a new OS, a job) to `tune.yml` with a `key`
matching the `<os>-<arch>-<compiler>.h` naming.
2. Add the corresponding auto-selection branch to `PlatformConfig.h`.

The Windows path uses MSYS2 + Clang because the library requires a GCC/Clang
toolchain (`__int128`, `__builtin_*_overflow`); MSVC cannot compile it.
83 changes: 83 additions & 0 deletions include/biginteger/build/PlatformConfig.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
#ifndef BIGMATH_PLATFORM_CONFIG
#define BIGMATH_PLATFORM_CONFIG

// Platform-specific tuned dispatch thresholds.
//
// Pulled in by common/Constants.h *before* build/DispatchThresholds.h. A tuned
// profile only #defines the threshold macros it measured (each #ifndef-guarded),
// so it wins over the generic defaults without disabling the fallback for any
// macro it did not set.
//
// Profiles live under include/biginteger/build/platform/ and are named
//
// <os>-<arch>-<compiler>.h e.g. linux-x86_64-gcc.h, macos-arm64-clang.h
//
// Selection is automatic: the branches below match the host OS/arch/compiler and
// include the matching profile *if it exists* (via __has_include). With no
// profile committed for the host, the defaults in DispatchThresholds.h apply
// unchanged — no profile is required to build.
//
// Generate a profile with the tuner (see docs/PLATFORM_TUNING.md and
// .github/workflows/tune.yml):
//
// dispatch_tuner --full --emit-header \
// include/biginteger/build/platform/<os>-<arch>-<compiler>.h
//
// Escape hatches:
// -DBIGMATH_PLATFORM_OVERRIDE='"path/to/profile.h"' force a specific profile
// -DBIGMATH_PLATFORM_NONE disable auto-selection

#if defined(BIGMATH_PLATFORM_OVERRIDE)

#include BIGMATH_PLATFORM_OVERRIDE

#elif !defined(BIGMATH_PLATFORM_NONE)

// ─── architecture detection ───────────────────────────────────────────────────
#if defined(__x86_64__) || defined(_M_X64)
#define BIGMATH_PLATFORM_X86_64 1
#elif defined(__aarch64__) || defined(_M_ARM64)
#define BIGMATH_PLATFORM_ARM64 1
#endif

// ─── auto-selection (clang checked before gcc: clang also defines __GNUC__) ────
#if defined(__APPLE__) && defined(BIGMATH_PLATFORM_ARM64) && defined(__clang__)
#if __has_include("biginteger/build/platform/macos-arm64-clang.h")
#include "biginteger/build/platform/macos-arm64-clang.h"
#endif

#elif defined(__APPLE__) && defined(BIGMATH_PLATFORM_X86_64) && defined(__clang__)
#if __has_include("biginteger/build/platform/macos-x86_64-clang.h")
#include "biginteger/build/platform/macos-x86_64-clang.h"
#endif

#elif defined(_WIN32) && defined(BIGMATH_PLATFORM_X86_64) && defined(__clang__)
#if __has_include("biginteger/build/platform/windows-x86_64-clang.h")
#include "biginteger/build/platform/windows-x86_64-clang.h"
#endif

#elif defined(__linux__) && defined(BIGMATH_PLATFORM_X86_64) && defined(__clang__)
#if __has_include("biginteger/build/platform/linux-x86_64-clang.h")
#include "biginteger/build/platform/linux-x86_64-clang.h"
#endif

#elif defined(__linux__) && defined(BIGMATH_PLATFORM_X86_64) && defined(__GNUC__) && !defined(__clang__)
#if __has_include("biginteger/build/platform/linux-x86_64-gcc.h")
#include "biginteger/build/platform/linux-x86_64-gcc.h"
#endif

#elif defined(__linux__) && defined(BIGMATH_PLATFORM_ARM64) && defined(__clang__)
#if __has_include("biginteger/build/platform/linux-arm64-clang.h")
#include "biginteger/build/platform/linux-arm64-clang.h"
#endif

#elif defined(__linux__) && defined(BIGMATH_PLATFORM_ARM64) && defined(__GNUC__) && !defined(__clang__)
#if __has_include("biginteger/build/platform/linux-arm64-gcc.h")
#include "biginteger/build/platform/linux-arm64-gcc.h"
#endif

#endif

#endif // selection

#endif
Loading
Loading