Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
194 changes: 194 additions & 0 deletions .github/workflows/benchmark-backfill.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,194 @@
name: Benchmarks (Backfill)

# This workflow runs up-to-date benchmarks on historical commits to populate
# CodSpeed with baseline data. It uses "Runtime Dependency Injection":
#
# 1. Checkout the historical commit (code under test)
# 2. Copy benchmarks/ from source branch (modern test harness)
# 3. Install project using historical pyproject.toml (correct runtime deps)
# 4. Extract benchmark deps from source branch's pyproject.toml (single source of truth)
# 5. Run pytest directly (bypasses missing Hatch env in old commits)
#
# See `benchmarks/README.md` for more information.

on:
workflow_dispatch:
inputs:
from_commit:
description: 'Start commit SHA (older). Max 200 commits (or 128 with integration).'
required: true
type: string
to_commit:
description: 'End commit SHA (newer). Max 200 commits (or 128 with integration).'
required: false
default: 'HEAD'
type: string
benchmark_source:
description: 'Branch to copy benchmarks from'
required: false
default: 'main'
type: string
benchmark_set:
description: 'Benchmark set to run'
required: false
default: 'fast'
type: choice
options:
- fast
- full
include_integration:
description: 'Include integration benchmarks (uses Macro Runners)'
required: false
default: false
type: boolean

permissions:
contents: read
id-token: write

jobs:
prepare:
runs-on: ubuntu-latest
outputs:
commits: ${{ steps.get-commits.outputs.commits }}
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0

- name: Get commits between range
id: get-commits
run: |
FROM="${{ inputs.from_commit }}"
TO="${{ inputs.to_commit }}"

# GitHub Actions has a hard limit of 256 jobs per matrix.
# When integration benchmarks are enabled, both component and integration
# jobs run on each commit (2 jobs per commit), so we halve the limit.
if [ "${{ inputs.include_integration }}" = "true" ]; then
MAX_COMMITS=128 # 128 commits × 2 jobs = 256 jobs max
else
MAX_COMMITS=200 # Leave room for future matrix expansion
fi

# Get the most recent commits in the range
# rev-list outputs newest first, head takes the N newest, tac reverses to chronological order
COMMITS=$(git rev-list "$FROM^..$TO" | head -$MAX_COMMITS | tac)

TOTAL=$(git rev-list "$FROM^..$TO" | wc -l | tr -d ' ')
SELECTED=$(echo "$COMMITS" | wc -l | tr -d ' ')

if [ "$TOTAL" -gt "$MAX_COMMITS" ]; then
echo "::warning::Range contains $TOTAL commits, but only the $MAX_COMMITS most recent will be benchmarked (GitHub Actions limit is 256 jobs per matrix)."
fi

# Convert to JSON array
JSON_COMMITS=$(echo "$COMMITS" | jq -R -s -c 'split("\n") | map(select(length > 0))')
echo "commits=$JSON_COMMITS" >> $GITHUB_OUTPUT
echo "Will benchmark $SELECTED commits (out of $TOTAL in range):"
echo "$COMMITS"

# Component benchmarks: CPU-bound, pure Python operations
# Uses CPU simulation on standard GitHub runners
backfill-component:
needs: prepare
runs-on: ubuntu-latest
timeout-minutes: 60
strategy:
fail-fast: false
max-parallel: 5 # Throttle to avoid overwhelming CodSpeed ingestion
matrix:
commit: ${{ fromJson(needs.prepare.outputs.commits) }}

steps:
- uses: actions/checkout@v4
with:
ref: ${{ matrix.commit }}
fetch-depth: 0

- name: Fetch benchmark assets from source branch
run: |
git fetch origin ${{ inputs.benchmark_source }}
rm -rf benchmarks/
git restore --source=origin/${{ inputs.benchmark_source }} --worktree benchmarks/
git show origin/${{ inputs.benchmark_source }}:pyproject.toml > /tmp/source_pyproject.toml

- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: "3.11"

- name: Install uv
uses: astral-sh/setup-uv@v4

- name: Install project (uses historical pyproject.toml)
run: uv pip install -e . --system

- name: Install benchmark dependencies (from source branch)
run: |
python benchmarks/scripts/extract_deps.py /tmp/source_pyproject.toml \
| uv pip install -r - --system

- name: Run component benchmarks with CodSpeed
uses: CodSpeedHQ/action@v4
with:
mode: simulation
run: |
if [ "${{ inputs.benchmark_set }}" = "fast" ]; then
pytest benchmarks/ --codspeed -m "not slow and not integration"
else
pytest benchmarks/ --codspeed -m "not integration"
fi

# Integration benchmarks: I/O-bound operations with network and file access
# Uses walltime on Macro Runners - only runs if include_integration is true
backfill-integration:
needs: prepare
if: ${{ inputs.include_integration }}
runs-on: codspeed-macro
timeout-minutes: 60
strategy:
fail-fast: false
max-parallel: 2 # Lower parallelism to conserve 600 min/month Macro Runner quota
matrix:
commit: ${{ fromJson(needs.prepare.outputs.commits) }}

steps:
- uses: actions/checkout@v4
with:
ref: ${{ matrix.commit }}
fetch-depth: 0

- name: Fetch benchmark assets from source branch
run: |
git fetch origin ${{ inputs.benchmark_source }}
rm -rf benchmarks/
git restore --source=origin/${{ inputs.benchmark_source }} --worktree benchmarks/
git show origin/${{ inputs.benchmark_source }}:pyproject.toml > /tmp/source_pyproject.toml

- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: "3.11"

- name: Install uv
uses: astral-sh/setup-uv@v4

- name: Install project (uses historical pyproject.toml)
run: uv pip install -e . --system

- name: Install benchmark dependencies (from source branch)
run: |
python benchmarks/scripts/extract_deps.py /tmp/source_pyproject.toml \
| uv pip install -r - --system

- name: Run integration benchmarks with CodSpeed (walltime)
uses: CodSpeedHQ/action@v4
with:
mode: walltime
run: |
if [ "${{ inputs.benchmark_set }}" = "fast" ]; then
pytest benchmarks/ --codspeed -m "integration and not slow"
else
pytest benchmarks/ --codspeed -m "integration"
fi
54 changes: 54 additions & 0 deletions .github/workflows/benchmarks-integration.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
name: Benchmarks (Integration)

# Runs integration benchmarks on PRs and main using walltime mode on CodSpeed
# Macro Runners. Walltime accurately measures I/O, network, and system calls.
# See benchmarks/README.md for details.

on:
pull_request:
push:
branches: [main]

permissions:
contents: read
id-token: write

jobs:
benchmark-integration:
runs-on: codspeed-macro
timeout-minutes: 30

steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0

- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: "3.11"

- name: Install hatch
run: pip install hatch

- name: Run integration benchmarks with CodSpeed (walltime)
uses: CodSpeedHQ/action@v4
with:
mode: walltime
run: hatch run benchmark:run --codspeed -m "integration and not slow"

- name: Generate benchmark JSON (fallback)
if: always()
run: |
hatch run benchmark:run \
--benchmark-only \
--benchmark-json=benchmark-results-integration.json \
-m "integration and not slow" || true

- name: Upload benchmark results
uses: actions/upload-artifact@v4
if: always()
with:
name: benchmark-results-integration
path: benchmark-results-integration.json
retention-days: 30
137 changes: 137 additions & 0 deletions .github/workflows/benchmarks-nightly.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,137 @@
name: Benchmarks (Nightly)

# Runs full benchmark suite nightly or on PRs with 'run-benchmarks' label.
# Component benchmarks use CPU simulation, integration benchmarks use walltime.
# Skips if no commits in 24 hours. See benchmarks/README.md for details.

on:
schedule:
- cron: "0 2 * * *" # 2 AM UTC daily
workflow_dispatch: # Allow manual trigger
pull_request:
types: [labeled]

permissions:
contents: read
id-token: write

jobs:
check-changes:
runs-on: ubuntu-latest
outputs:
should_run: ${{ steps.check.outputs.should_run }}
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 2

- name: Check if should run
id: check
run: |
# Always run for label triggers and manual dispatch
if [ "${{ github.event_name }}" = "pull_request" ] || [ "${{ github.event_name }}" = "workflow_dispatch" ]; then
echo "should_run=true" >> $GITHUB_OUTPUT
exit 0
fi

# For scheduled runs, skip if HEAD hasn't changed in 24 hours
LAST_COMMIT_TIME=$(git log -1 --format=%ct)
NOW=$(date +%s)
HOURS_AGO=$(( (NOW - LAST_COMMIT_TIME) / 3600 ))

if [ "$HOURS_AGO" -gt 24 ]; then
echo "No commits in the last 24 hours, skipping nightly benchmark"
echo "should_run=false" >> $GITHUB_OUTPUT
else
echo "should_run=true" >> $GITHUB_OUTPUT
fi

# Component benchmarks: CPU-bound, pure Python operations
# Uses CPU simulation for deterministic, hardware-independent measurements
component-benchmarks:
needs: check-changes
if: |
needs.check-changes.outputs.should_run == 'true' &&
(github.event_name != 'pull_request' || contains(github.event.pull_request.labels.*.name, 'run-benchmarks'))
runs-on: ubuntu-latest
timeout-minutes: 60

steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0

- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: "3.11"

- name: Install hatch
run: pip install hatch

- name: Run component benchmarks with CodSpeed
uses: CodSpeedHQ/action@v4
with:
mode: simulation
run: hatch run benchmark:run --codspeed -m "not integration"

- name: Generate benchmark JSON (fallback)
if: always()
run: |
hatch run benchmark:run \
--benchmark-only \
--benchmark-json=benchmark-results-component.json \
-m "not integration" || true

- name: Upload benchmark results
uses: actions/upload-artifact@v4
if: always()
with:
name: benchmark-results-nightly-component
path: benchmark-results-component.json
retention-days: 90

# Integration benchmarks: I/O-bound operations with network and file access
# Uses walltime on Macro Runners for accurate real-world measurements
integration-benchmarks:
needs: check-changes
if: |
needs.check-changes.outputs.should_run == 'true' &&
(github.event_name != 'pull_request' || contains(github.event.pull_request.labels.*.name, 'run-benchmarks'))
runs-on: codspeed-macro
timeout-minutes: 60

steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0

- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: "3.11"

- name: Install hatch
run: pip install hatch

- name: Run integration benchmarks with CodSpeed (walltime)
uses: CodSpeedHQ/action@v4
with:
mode: walltime
run: hatch run benchmark:run --codspeed -m "integration"

- name: Generate benchmark JSON (fallback)
if: always()
run: |
hatch run benchmark:run \
--benchmark-only \
--benchmark-json=benchmark-results-integration.json \
-m "integration" || true

- name: Upload benchmark results
uses: actions/upload-artifact@v4
if: always()
with:
name: benchmark-results-nightly-integration
path: benchmark-results-integration.json
retention-days: 90
Loading
Loading