Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 13 additions & 2 deletions .github/workflows/release-images.yml
Original file line number Diff line number Diff line change
Expand Up @@ -588,6 +588,13 @@ jobs:
done
}
fails=0
# Incremental skip: a combo's content hash folds its build source + the
# digest of every base it's FROM. Identical hash → its :src-<hash> tag
# already exists → skip; a changed base → new digest → new hash → rebuild
# (the cascade). gosu/otel/process-compose/model are shared — resolve once.
dg() { docker buildx imagetools inspect "$1" --format '{{.Manifest.Digest}}' 2>/dev/null || echo absent; }
GOSU_D=$(dg "${REGISTRY}/core/gosu:${TAG}"); OTEL_D=$(dg "${REGISTRY}/core/otel:${TAG}")
PC_D=$(dg "${REGISTRY}/core/process-compose:${TAG}"); MODEL_D=$(dg "${REGISTRY}/models/bifrost:${TAG}")
while read -r it; do
IFS=$'\t' read -r B A TASK < <(jq -r '[.b,.a,.task]|@tsv' <<< "$it")
if [ -n "$TASK" ]; then # per-task combo: evals/<b>-<task>--<a>
Expand All @@ -598,7 +605,9 @@ jobs:
eb="$B"; bench_img="${REGISTRY}/benchmarks/${B}:${TAG}"
PLAT="linux/amd64,linux/arm64" # light overlay → multi-arch is fine
fi
if [ "${{ inputs.skip_published }}" = "true" ] && docker buildx imagetools inspect "${REGISTRY}/evals/${eb}--${A}:${TAG}" >/dev/null 2>&1; then echo "skip-published: evals/${eb}--${A}"; continue; fi
HASH=$(containers/scripts/combo-src-hash.sh "$(dg "$bench_img")" "$(dg "${REGISTRY}/agents/${A}:${TAG}")" "$GOSU_D" "$OTEL_D" "$PC_D" "$MODEL_D" "standalone=${STANDALONE}")
ht="${REGISTRY}/evals/${eb}--${A}:src-${HASH}"
if [ "${{ inputs.skip_published }}" = "true" ] && docker buildx imagetools inspect "$ht" >/dev/null 2>&1; then echo "skip-unchanged: evals/${eb}--${A} (src-${HASH})"; continue; fi
# eval = lean base (sidecar mode); eval-standalone = single-container
# bundle (gateway+otelcol+process-compose in-image). bake builds eval
# once, then layers standalone on it via the eval-base context.
Expand All @@ -610,7 +619,9 @@ jobs:
BENCHMARK_IMAGE="$bench_img" AGENT_IMAGE="${REGISTRY}/agents/${A}:${TAG}" \
retry docker buildx bake -f containers/docker-bake.hcl -f containers/core/combination.docker-bake.hcl \
--set "*.platform=${PLAT}" \
"${ACT[@]}"; then :; \
"${ACT[@]}"; then
# stamp the content-hash tag so a later skip_published run skips this exact source.
[ "$DRY" = "true" ] || docker buildx imagetools create -t "$ht" "${REGISTRY}/evals/${eb}--${A}:${TAG}" 2>/dev/null || true
else echo "::error::combo failed: evals/${eb}--${A}"; fails=$((fails+1)); fi
echo "::endgroup::"
done < <(jq -c '.[]' <<< "$ITEMS")
Expand Down
36 changes: 36 additions & 0 deletions containers/scripts/combo-src-hash.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
#!/usr/bin/env bash
# Content hash for an eval combo (evals/<b>--<a> + its -standalone).
#
# Folds the combo's OWN build source together with the image digest of every
# base it is FROM. Same source + same parent digests -> identical hash, so the
# combos job can skip it (its :src-<hash> tag already exists). Change any base
# -> new parent digest -> new hash -> rebuild. That last property is the
# cascade: a base rebuild ripples into every combo on top of it, for free.
#
# Usage:
# combo-src-hash.sh <bench_d> <agent_d> <gosu_d> <otel_d> <pc_d> <model_d>
# where each <*_d> is a parent image's manifest digest, e.g.
# docker buildx imagetools inspect <ref> --format '{{.Manifest.Digest}}'
#
# Env:
# COMBO_SRC_ROOT dir holding the combo build source (default: containers/core).
set -euo pipefail
cd "$(dirname "$0")/../.." # repo root (this script lives in containers/scripts/)
root="${COMBO_SRC_ROOT:-containers/core}"

# sha256 of stdin — portable across Linux (sha256sum, the CI runners) and macOS
# (shasum). Used only in pipes, never as an xargs target (xargs can't call a
# shell function), so the source hash below cats the files into it.
sha() { if command -v sha256sum >/dev/null 2>&1; then sha256sum; else shasum -a 256; fi; }

# The combo's build inputs, shared across every combo: the two Dockerfiles, the
# framework scripts they COPY, and the bake graph that wires them. Hash the
# concatenated content (sorted by path) so any edit changes the result.
paths=("$root/combination.Dockerfile" "$root/standalone.Dockerfile" \
"$root/runner" "$root/entrypoint" "$root/combination.docker-bake.hcl")
n=$(find "${paths[@]}" -type f 2>/dev/null | wc -l | tr -d ' ')
[ "$n" -gt 0 ] || { echo "combo-src-hash: no source files under $root" >&2; exit 1; }
src=$(find "${paths[@]}" -type f 2>/dev/null | sort | xargs cat | sha | cut -c1-12)

# Fold the source hash with every parent digest -> the combo's content hash.
printf '%s|%s' "$src" "$*" | sha | cut -c1-16
39 changes: 39 additions & 0 deletions tests/build/hash-cascade.sweep.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
#!/usr/bin/env bash
# Contract test for containers/scripts/combo-src-hash.sh — the hash that drives
# the combos job's :src-<hash> skip. Asserts it is:
# - deterministic (same inputs -> same hash, so unchanged combos skip)
# - cascading (any parent digest changes -> hash changes, so a base
# rebuild rebuilds everything on top of it)
# - source-sensitive (a combo-source edit -> hash changes, so a Dockerfile
# change rebuilds)
# Pure hashing — no docker, no network — so it runs on every `cargo test`.
set -euo pipefail
cd "$(dirname "$0")/../.." # repo root
H=containers/scripts/combo-src-hash.sh
fail=0
neq() { if [ "$2" = "$3" ]; then echo "FAIL: $1 — both '$2'"; fail=1; fi; }
eq() { if [ "$2" != "$3" ]; then echo "FAIL: $1 — '$2' != '$3'"; fail=1; fi; }

b=$("$H" benchD agentD gosuD otelD pcD modelD)
eq "deterministic" "$b" "$("$H" benchD agentD gosuD otelD pcD modelD)"
# Each of the 6 parents must, when its digest changes, flip the combo hash.
neq "bench cascade" "$b" "$("$H" BENCH2 agentD gosuD otelD pcD modelD)"
neq "agent cascade" "$b" "$("$H" benchD AGENT2 gosuD otelD pcD modelD)"
neq "gosu cascade" "$b" "$("$H" benchD agentD GOSU2 otelD pcD modelD)"
neq "otel cascade" "$b" "$("$H" benchD agentD gosuD OTEL2 pcD modelD)"
neq "pc cascade" "$b" "$("$H" benchD agentD gosuD otelD PC2 modelD)"
neq "model cascade" "$b" "$("$H" benchD agentD gosuD otelD pcD MODEL2)"

# Source-sensitivity: an edit to a combo-source file flips the hash. Use a temp
# copy so the repo working tree is never touched.
tmp=$(mktemp -d); trap 'rm -rf "$tmp"' EXIT
cp -R containers/core "$tmp/core"
s=$(COMBO_SRC_ROOT="$tmp/core" "$H" benchD agentD gosuD otelD pcD modelD)
printf '\n# hash-cascade test edit\n' >> "$tmp/core/combination.Dockerfile"
neq "source sensitive" "$s" "$(COMBO_SRC_ROOT="$tmp/core" "$H" benchD agentD gosuD otelD pcD modelD)"

if [ "$fail" = 0 ]; then
echo "PASS: combo hash is deterministic, cascades over all 6 parents, and is source-sensitive"
else
exit 1
fi
24 changes: 24 additions & 0 deletions tests/build/test.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1038,3 +1038,27 @@ fn eval_local_resolves_from_full_graph() {
);
}
}

// ─── combo source-hash contract (incremental :src-<hash> skip) ────────
//
// The combos job tags each built combo `:src-<hash>` and, on a re-run with
// skip_published, skips any combo whose tag already exists. The hash
// (containers/scripts/combo-src-hash.sh) must be deterministic, cascade over
// every parent digest, and be source-sensitive — or the skip is wrong. This
// guard is pure hashing (no docker), so unlike the bake/build checks above it
// runs on every `cargo test`, not just the build lane.
#[test]
fn combo_src_hash_cascade() {
let root = test_support::repo_root();
let out = Command::new("bash")
.arg(root.join("tests/build/hash-cascade.sweep.sh"))
.current_dir(&root)
.output()
.expect("run tests/build/hash-cascade.sweep.sh");
assert!(
out.status.success(),
"combo source-hash contract failed:\n--- stdout ---\n{}--- stderr ---\n{}",
String::from_utf8_lossy(&out.stdout),
String::from_utf8_lossy(&out.stderr),
);
}
Loading