IBM · starpit · Jan 6, 2026 · Jan 6, 2026 · Jan 6, 2026 · Jan 6, 2026
diff --git a/.buildkite/ci_config.yaml b/.buildkite/ci_config.yaml
@@ -0,0 +1,24 @@
+name: vllm_ci
+job_dirs:
+  - ".buildkite/test_areas"
+  - ".buildkite/image_build"
+run_all_patterns:
+  - "docker/Dockerfile"
+  - "CMakeLists.txt"
+  - "requirements/common.txt"
+  - "requirements/cuda.txt"
+  - "requirements/build.txt"
+  - "requirements/test.txt"
+  - "setup.py"
+  - "csrc/"
+  - "cmake/"
+run_all_exclude_patterns:
+  - "docker/Dockerfile."
+  - "csrc/cpu/"
+  - "csrc/rocm/"
+  - "cmake/hipify.py"
+  - "cmake/cpu_extension.cmake"
+registries: public.ecr.aws/q9t5s3a7
+repositories:
+  main: "vllm-ci-postmerge-repo"
+  premerge: "vllm-ci-test-repo"
diff --git a/.buildkite/generate_index.py b/.buildkite/generate_index.py
diff --git a/.buildkite/image_build/image_build.sh b/.buildkite/image_build/image_build.sh
@@ -0,0 +1,56 @@
+#!/bin/bash
+set -e
+
+if [[ $# -lt 8 ]]; then
+  echo "Usage: $0 <registry> <repo> <commit> <branch> <vllm_use_precompiled> <vllm_merge_base_commit> <cache_from> <cache_to>"
+  exit 1
+fi
+
+REGISTRY=$1
+REPO=$2
+BUILDKITE_COMMIT=$3
+BRANCH=$4
+VLLM_USE_PRECOMPILED=$5
+VLLM_MERGE_BASE_COMMIT=$6
+CACHE_FROM=$7
+CACHE_TO=$8
+
+# authenticate with AWS ECR
+aws ecr-public get-login-password --region us-east-1 | docker login --username AWS --password-stdin $REGISTRY
+aws ecr get-login-password --region us-east-1 | docker login --username AWS --password-stdin 936637512419.dkr.ecr.us-east-1.amazonaws.com
+
+# docker buildx 
+docker buildx create --name vllm-builder --driver docker-container --use
+docker buildx inspect --bootstrap
+docker buildx ls
+
+# skip build if image already exists
+if [[ -z $(docker manifest inspect $REGISTRY/$REPO:$BUILDKITE_COMMIT) ]]; then
+  echo "Image not found, proceeding with build..."
+else
+  echo "Image found"
+  exit 0
+fi
+
+if [[ "${VLLM_USE_PRECOMPILED:-0}" == "1" ]]; then
+  merge_base_commit_build_args="--build-arg VLLM_MERGE_BASE_COMMIT=${VLLM_MERGE_BASE_COMMIT}"
+else
+  merge_base_commit_build_args=""
+fi
+
+# build
+docker buildx build --file docker/Dockerfile \
+  --build-arg max_jobs=16 \
+  --build-arg buildkite_commit=$BUILDKITE_COMMIT \
+  --build-arg USE_SCCACHE=1 \
+  --build-arg TORCH_CUDA_ARCH_LIST="8.0 8.9 9.0 10.0" \
+  --build-arg FI_TORCH_CUDA_ARCH_LIST="8.0 8.9 9.0a 10.0a" \
+  --build-arg VLLM_USE_PRECOMPILED="${VLLM_USE_PRECOMPILED:-0}" \
+  ${merge_base_commit_build_args} \
+  --cache-from type=registry,ref=${CACHE_FROM},mode=max \
+  --cache-to type=registry,ref=${CACHE_TO},mode=max \
+  --tag ${REGISTRY}/${REPO}:${BUILDKITE_COMMIT} \
+  $( [[ "${BRANCH}" == "main" ]] && echo "--tag ${REGISTRY}/${REPO}:latest" ) \
+  --push \
+  --target test \
+  --progress plain .
diff --git a/.buildkite/image_build/image_build.yaml b/.buildkite/image_build/image_build.yaml
@@ -0,0 +1,57 @@
+group: Abuild
+steps:
+  - label: ":docker: Build image"
+    key: image-build
+    depends_on: []
+    commands:
+    - .buildkite/image_build/image_build.sh $REGISTRY $REPO $BUILDKITE_COMMIT $BRANCH $VLLM_USE_PRECOMPILED $VLLM_MERGE_BASE_COMMIT $CACHE_FROM $CACHE_TO
+    retry:
+      automatic:
+        - exit_status: -1  # Agent was lost
+          limit: 2
+        - exit_status: -10  # Agent was lost
+          limit: 2
+
+  - label: ":docker: Build CPU image"
+    key: image-build-cpu
+    depends_on: []
+    commands:
+    - .buildkite/image_build/image_build_cpu.sh $REGISTRY $REPO $BUILDKITE_COMMIT
+    env:
+      DOCKER_BUILDKIT: "1"
+    retry:
+      automatic:
+        - exit_status: -1  # Agent was lost
+          limit: 2
+        - exit_status: -10  # Agent was lost
+          limit: 2
+
+  - label: ":docker: Build HPU image"
+    soft_fail: true
+    depends_on: []
+    key: image-build-hpu
+    commands:
+    - .buildkite/image_build/image_build_hpu.sh $REGISTRY $REPO $BUILDKITE_COMMIT
+    env:
+      DOCKER_BUILDKIT: "1"
+    retry:
+      automatic:
+        - exit_status: -1  # Agent was lost
+          limit: 2
+        - exit_status: -10  # Agent was lost
+          limit: 2
+
+  - label: ":docker: Build CPU arm64 image"
+    key: cpu-arm64-image-build
+    depends_on: []
+    optional: true
+    commands:
+    - .buildkite/image_build/image_build_cpu_arm64.sh $REGISTRY $REPO $BUILDKITE_COMMIT
+    env:
+      DOCKER_BUILDKIT: "1"
+    retry:
+      automatic:
+        - exit_status: -1  # Agent was lost
+          limit: 2
+        - exit_status: -10  # Agent was lost
+          limit: 2
diff --git a/.buildkite/image_build/image_build_cpu.sh b/.buildkite/image_build/image_build_cpu.sh
@@ -0,0 +1,36 @@
+#!/bin/bash
+set -e
+
+if [[ $# -lt 3 ]]; then
+  echo "Usage: $0 <registry> <repo> <commit>"
+  exit 1
+fi
+
+REGISTRY=$1
+REPO=$2
+BUILDKITE_COMMIT=$3
+
+# authenticate with AWS ECR
+aws ecr-public get-login-password --region us-east-1 | docker login --username AWS --password-stdin $REGISTRY
+
+# skip build if image already exists
+if [[ -z $(docker manifest inspect $REGISTRY/$REPO:$BUILDKITE_COMMIT-cpu) ]]; then
+  echo "Image not found, proceeding with build..."
+else
+  echo "Image found"
+  exit 0
+fi
+
+# build
+docker build --file docker/Dockerfile.cpu \
+  --build-arg max_jobs=16 \
+  --build-arg buildkite_commit=$BUILDKITE_COMMIT \
+  --build-arg VLLM_CPU_AVX512BF16=true \
+  --build-arg VLLM_CPU_AVX512VNNI=true \
+  --build-arg VLLM_CPU_AMXBF16=true \
+  --tag $REGISTRY/$REPO:$BUILDKITE_COMMIT-cpu \
+  --target vllm-test \
+  --progress plain .
+
+# push
+docker push $REGISTRY/$REPO:$BUILDKITE_COMMIT-cpu
diff --git a/.buildkite/image_build/image_build_cpu_arm64.sh b/.buildkite/image_build/image_build_cpu_arm64.sh
@@ -0,0 +1,33 @@
+#!/bin/bash
+set -e
+
+if [[ $# -lt 3 ]]; then
+  echo "Usage: $0 <registry> <repo> <commit>"
+  exit 1
+fi
+
+REGISTRY=$1
+REPO=$2
+BUILDKITE_COMMIT=$3
+
+# authenticate with AWS ECR
+aws ecr-public get-login-password --region us-east-1 | docker login --username AWS --password-stdin $REGISTRY
+
+# skip build if image already exists
+if [[ -z $(docker manifest inspect $REGISTRY/$REPO:$BUILDKITE_COMMIT-cpu) ]]; then
+  echo "Image not found, proceeding with build..."
+else
+  echo "Image found"
+  exit 0
+fi
+
+# build
+docker build --file docker/Dockerfile.cpu \
+  --build-arg max_jobs=16 \
+  --build-arg buildkite_commit=$BUILDKITE_COMMIT \
+  --tag $REGISTRY/$REPO:$BUILDKITE_COMMIT-cpu \
+  --target vllm-test \
+  --progress plain .
+
+# push
+docker push $REGISTRY/$REPO:$BUILDKITE_COMMIT-cpu
diff --git a/.buildkite/image_build/image_build_hpu.sh b/.buildkite/image_build/image_build_hpu.sh
@@ -0,0 +1,34 @@
+#!/bin/bash
+set -e
+
+if [[ $# -lt 3 ]]; then
+  echo "Usage: $0 <registry> <repo> <commit>"
+  exit 1
+fi
+
+REGISTRY=$1
+REPO=$2
+BUILDKITE_COMMIT=$3
+
+# authenticate with AWS ECR
+aws ecr-public get-login-password --region us-east-1 | docker login --username AWS --password-stdin $REGISTRY
+
+# skip build if image already exists
+if [[ -z $(docker manifest inspect $REGISTRY/$REPO:$BUILDKITE_COMMIT-hpu) ]]; then
+  echo "Image not found, proceeding with build..."
+else
+  echo "Image found"
+  exit 0
+fi
+
+# build
+docker build \
+  --file tests/pytorch_ci_hud_benchmark/Dockerfile.hpu \
+  --build-arg max_jobs=16 \
+  --build-arg buildkite_commit=$BUILDKITE_COMMIT \
+  --tag $REGISTRY/$REPO:$BUILDKITE_COMMIT-hpu \
+  --progress plain \
+  https://github.com/vllm-project/vllm-gaudi.git
+
+# push
+docker push $REGISTRY/$REPO:$BUILDKITE_COMMIT-hpu
diff --git a/.buildkite/lm-eval-harness/configs/Meta-Llama-4-Maverick-17B-128E-Instruct-FP8.yaml b/.buildkite/lm-eval-harness/configs/Meta-Llama-4-Maverick-17B-128E-Instruct-FP8.yaml
@@ -8,3 +8,4 @@ tasks:
     value: 0.80
 limit: 250 # will run on 250 * 14 subjects = 3500 samples
 num_fewshot: 5
+rtol: 0.05
diff --git a/.buildkite/lm-eval-harness/configs/models-large-rocm.txt b/.buildkite/lm-eval-harness/configs/models-large-rocm.txt
@@ -0,0 +1 @@
+Meta-Llama-4-Maverick-17B-128E-Instruct-FP8.yaml
diff --git a/.buildkite/lm-eval-harness/run-lm-eval-chartqa-vllm-vlm-baseline.sh b/.buildkite/lm-eval-harness/run-lm-eval-chartqa-vllm-vlm-baseline.sh
@@ -2,7 +2,7 @@
 # We can use this script to compute baseline accuracy on chartqa for vllm.
 #
 # Make sure you have lm-eval-harness installed:
-#   pip install lm-eval==0.4.9
+#   pip install "lm-eval[api]>=0.4.9.2"
 
 usage() {
     echo``

diff --git a/.buildkite/lm-eval-harness/run-lm-eval-gsm-hf-baseline.sh b/.buildkite/lm-eval-harness/run-lm-eval-gsm-hf-baseline.sh
@@ -2,7 +2,7 @@
 # We can use this script to compute baseline accuracy on GSM for transformers.
 #
 # Make sure you have lm-eval-harness installed:
-#   pip install git+https://github.com/EleutherAI/lm-evaluation-harness.git@206b7722158f58c35b7ffcd53b035fdbdda5126d#egg=lm-eval[api]
+#   pip install "lm-eval[api]>=0.4.9.2"
 
 usage() {
     echo``

diff --git a/.buildkite/lm-eval-harness/run-lm-eval-gsm-vllm-baseline.sh b/.buildkite/lm-eval-harness/run-lm-eval-gsm-vllm-baseline.sh
@@ -3,7 +3,7 @@
 # We use this for fp8, which HF does not support.
 #
 # Make sure you have lm-eval-harness installed:
-#   pip install git+https://github.com/EleutherAI/lm-evaluation-harness.git@206b7722158f58c35b7ffcd53b035fdbdda5126d#egg=lm-eval[api]
+#   pip install "lm-eval[api]>=0.4.9.2"
 
 usage() {
     echo``

diff --git a/.buildkite/lm-eval-harness/run-lm-eval-mmlupro-vllm-baseline.sh b/.buildkite/lm-eval-harness/run-lm-eval-mmlupro-vllm-baseline.sh
@@ -3,7 +3,7 @@
 # We use this for fp8, which HF does not support.
 #
 # Make sure you have lm-eval-harness installed:
-#   pip install git+https://github.com/EleutherAI/lm-evaluation-harness.git@206b7722158f58c35b7ffcd53b035fdbdda5126d#egg=lm-eval[api]
+#   pip install "lm-eval[api]>=0.4.9.2"
 
 usage() {
     echo``
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		Meta-Llama-4-Maverick-17B-128E-Instruct-FP8.yaml