Skip to content

perf(stark): borrow trace rows in place for prover transition eval #103

perf(stark): borrow trace rows in place for prover transition eval

perf(stark): borrow trace rows in place for prover transition eval #103

Workflow file for this run

name: Bench ABBA tiebreaker
# Drift-free paired (A/B/B/A) prover benchmark for resolving small (~1%) deltas the
# cheap PR benchmark can't confirm. It builds both binaries and runs ~20 interleaved
# pairs, so it OCCUPIES THE SINGLE BENCH SERVER FOR ~30-40 MIN. For that reason it
# NEVER auto-triggers -- it runs only on an explicit `/bench-abba` comment on a PR.
on:
issue_comment:
types: [created]
# One ABBA run per PR; a re-trigger cancels the stale one. (The single self-hosted
# bench runner serializes across PRs on its own.)
concurrency:
group: bench-abba-${{ github.event.issue.number }}
cancel-in-progress: true
permissions:
contents: read
pull-requests: write
issues: write
jobs:
abba:
# Manual-only: a "/bench-abba" comment on a PR, from a repo member. Never auto.
if: >-
github.event.issue.pull_request &&
startsWith(github.event.comment.body, '/bench-abba') &&
contains(fromJSON('["MEMBER","OWNER","COLLABORATOR"]'), github.event.comment.author_association)
runs-on: [self-hosted, bench]
# Generous ceiling so a hang/OOM can't strand the single bench runner; the
# workload itself is ~30-40 min at the default 20 pairs (clamped to <=40).
timeout-minutes: 120
steps:
- name: Acknowledge (react + occupancy notice)
uses: actions/github-script@v7
with:
script: |
await github.rest.reactions.createForIssueComment({
owner: context.repo.owner, repo: context.repo.repo,
comment_id: context.payload.comment.id, content: 'eyes'
});
await github.rest.issues.createComment({
owner: context.repo.owner, repo: context.repo.repo,
issue_number: context.issue.number,
body: '⏳ **ABBA tiebreaker started** on the bench server (~30–40 min). The bench server is occupied until it finishes.'
});
- name: Resolve PR head + pair count
id: cfg
env:
GH_TOKEN: ${{ github.token }}
PR_NUM: ${{ github.event.issue.number }}
COMMENT_BODY: ${{ github.event.comment.body }}
run: |
# Resolve the head SHA (not the branch name): pinning the commit works for
# fork PRs too (the branch lives in the fork, not origin/) and avoids a
# force-push race mid-run.
HEAD_SHA=$(gh pr view "$PR_NUM" --repo "$GITHUB_REPOSITORY" --json headRefOid -q .headRefOid)
echo "head_sha=$HEAD_SHA" >> "$GITHUB_OUTPUT"
# Optional pair count, e.g. "/bench-abba 32"; default 20. Clamp to [2,40]
# so a "/bench-abba 10000" can't monopolize the single bench server.
N=$(echo "$COMMENT_BODY" | sed -n 's|^/bench-abba[[:space:]]*\([0-9]\+\).*|\1|p')
N=${N:-20}
if [ "$N" -lt 2 ] 2>/dev/null || [ "$N" -gt 40 ] 2>/dev/null; then
echo "::warning::pair count $N out of range [2,40]; using 20"
N=20
fi
echo "pairs=$N" >> "$GITHUB_OUTPUT"
- name: Checkout (full history for ref resolution)
uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Fetch PR head commit (works for fork PRs)
env:
PR_NUM: ${{ github.event.issue.number }}
run: git fetch origin "pull/$PR_NUM/head" --quiet
- name: Add cargo to PATH
run: echo "$HOME/.cargo/bin" >> "$GITHUB_PATH"
- name: Run ABBA tiebreaker
id: run
env:
HEAD_SHA: ${{ steps.cfg.outputs.head_sha }}
PAIRS: ${{ steps.cfg.outputs.pairs }}
run: |
export SYSROOT_DIR="$HOME/.lambda-vm-sysroot"
set -o pipefail
# bench_abba.sh builds the cli at both refs (isolated worktree), runs the
# interleaved pairs, and prints the paired-t CI + exact Wilcoxon test.
# Pass the head SHA (pinned above) so fork PRs resolve.
scripts/bench_abba.sh "$HEAD_SHA" origin/main "$PAIRS" 2>&1 | tee /tmp/abba_out.txt
sed -n '/=== ABBA paired result/,$p' /tmp/abba_out.txt > /tmp/abba_result.txt
- name: Post result
if: always()
uses: actions/github-script@v7
env:
HEAD_SHA: ${{ steps.cfg.outputs.head_sha }}
PAIRS: ${{ steps.cfg.outputs.pairs }}
OUTCOME: ${{ steps.run.outcome }}
with:
script: |
const fs = require('fs');
const read = (p) => { try { return fs.readFileSync(p, 'utf8').trim(); } catch { return ''; } };
const head = (process.env.HEAD_SHA || '').slice(0, 10), pairs = process.env.PAIRS;
let body = `## ABBA tiebreaker — \`${head}\` vs \`main\` (${pairs} pairs)\n\n`;
if (process.env.OUTCOME === 'success') {
const res = read('/tmp/abba_result.txt') || read('/tmp/abba_out.txt');
body += '```\n' + res + '\n```\n';
body += '\n<sub>Drift-free interleaved A/B/B/A measurement. + = PR faster. ';
body += 'Trust the verdict when paired-t and Wilcoxon agree.</sub>\n';
} else {
const tail = read('/tmp/abba_out.txt').split('\n').slice(-30).join('\n');
body += `❌ Run failed. Last log lines:\n\n` + '```\n' + tail + '\n```\n';
}
await github.rest.issues.createComment({
owner: context.repo.owner, repo: context.repo.repo,
issue_number: context.issue.number, body
});