Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
97 changes: 97 additions & 0 deletions .github/workflows/build-wheels.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Builds the modelexpress Python client wheels via docker/Dockerfile.client-wheel
# and uploads the resulting artifacts to NV Artifactory.
#
# Triggered on:
# - push to pull-request/<pr_id> (mirrored from contributor PRs by
# copy-pr-bot): lands under pr/<pr_id>/<sha>/<run_id>/<run_attempt>/<arch>/
# - push to main and release/**: lands under
# post-merge/<sha>/<run_id>/<run_attempt>/<arch>/
#
# copy-pr-bot mirrors PRs (including fork PRs) into the pull-request/<pr_id>
# branch namespace inside this repo, which lets the workflow access
# environment-scoped secrets that fork PRs can't reach directly.
#
# Runs in parallel to (does not gate on) the rest of CI. Upload is
# environment-gated on `automated-release`.

name: Build & upload client wheels

on:
push:
branches:
- 'pull-request/*'
- main
- 'release/**'

concurrency:
group: build-wheels-${{ github.ref }}
cancel-in-progress: ${{ startsWith(github.ref, 'refs/heads/pull-request/') }}

permissions:
contents: read

jobs:
build-and-upload:
name: Build & upload wheels (${{ matrix.arch }})
environment: automated-release
runs-on: ${{ matrix.runner }}
strategy:
fail-fast: false
matrix:
include:
- arch: amd64
runner: prod-modelexpress-builder-amd-v1
platform: linux/amd64
- arch: arm64
runner: prod-modelexpress-builder-arm-v1
platform: linux/arm64

steps:
- name: Checkout repository
uses: actions/checkout@v4

- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3

- name: Compute artifactory subpath
id: dest
run: |
case "${GITHUB_REF_NAME}" in
pull-request/*)
PR_ID="${GITHUB_REF_NAME#pull-request/}"
SUBPATH="pr/${PR_ID}/${{ github.sha }}/${{ github.run_id }}/${{ github.run_attempt }}/${{ matrix.arch }}"
;;
*)
SUBPATH="post-merge/${{ github.sha }}/${{ github.run_id }}/${{ github.run_attempt }}/${{ matrix.arch }}"
;;
esac
echo "subpath=${SUBPATH}" >> "$GITHUB_OUTPUT"
echo "Destination subpath: ${SUBPATH}"

- name: Build wheels
run: |
mkdir -p dist
docker buildx build \
--platform ${{ matrix.platform }} \
-f docker/Dockerfile.client-wheel \
--target export \
--output type=local,dest=./dist \
.
echo "Built artifacts:"
ls -la dist/

- name: Set up JFrog CLI
uses: jfrog/setup-jfrog-cli@v4
env:
JF_URL: ${{ secrets.ARTIFACTORY_URL }}
JF_ACCESS_TOKEN: ${{ secrets.ARTIFACTORY_TOKEN }}

- name: Upload to Artifactory
run: |
DEST="${{ secrets.ARTIFACTORY_PYPI_REPO_NAME }}/${{ steps.dest.outputs.subpath }}/"
echo "Uploading to: ${DEST}"
jf rt upload --flat "dist/*.whl" "${DEST}"
jf rt upload --flat "dist/*.tar.gz" "${DEST}"
2 changes: 1 addition & 1 deletion .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -292,7 +292,7 @@ jobs:

- name: Build Docker image
run: |
docker build -t model-express:latest .
docker build -f docker/Dockerfile -t model-express:latest .

- name: Test Docker image
run: |
Expand Down
4 changes: 2 additions & 2 deletions CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -145,10 +145,10 @@ Cache directory resolution order: `MODEL_EXPRESS_CACHE_DIRECTORY` -> `HF_HUB_CAC

```bash
# Build production image
docker build -t model-express .
docker build -f docker/Dockerfile -t model-express .

# Run with docker-compose
docker-compose up --build
docker compose -f docker/docker-compose.yml up --build

# Build P2P client image
docker build -f examples/p2p_transfer_k8s/Dockerfile.client \
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -162,7 +162,7 @@ Load model weights directly from Azure Blob Storage, S3, or a PVC-backed local p
### Docker

```bash
docker-compose up --build
docker compose -f docker/docker-compose.yml up --build
```

---
Expand Down
File renamed without changes.
53 changes: 53 additions & 0 deletions docker/Dockerfile.client-wheel
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
# syntax=docker/dockerfile:1.7
# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Builds the modelexpress Python client distributions for the target
# platform (linux/amd64 or linux/arm64):
# - manylinux_2_28 wheels for cp310, cp311, cp312, cp313 (each compiles
# the modelexpress.vmm._alloc_ext shim against the matching CPython ABI
# and is hardened with `auditwheel repair`)
# - py3-none-any pure-Python wheel built with MX_SKIP_EXT=1 (no compiled
# extension; runtime falls back to the pool-reg path)
# - sdist tarball (.tar.gz)
#
# Multi-arch: pick the platform with buildx --platform. See
# docs/DEPLOYMENT.md for the full invocation examples.

FROM quay.io/pypa/manylinux_2_28_x86_64 AS builder-amd64
FROM quay.io/pypa/manylinux_2_28_aarch64 AS builder-arm64

ARG TARGETARCH
FROM builder-${TARGETARCH} AS builder

WORKDIR /src
COPY modelexpress_client/python /src

ENV PIP_DISABLE_PIP_VERSION_CHECK=1 \
PIP_NO_CACHE_DIR=1 \
PATH="/opt/python/cp313-cp313/bin:${PATH}"

# uv drives the per-Python builds (PEP 517, with build isolation).
# auditwheel only needs to live in one Python; it operates on built wheels.
RUN /opt/python/cp313-cp313/bin/pip install --upgrade uv auditwheel

RUN set -eux; \
arch=$(uname -m); \
mkdir -p /tmp/raw /dist; \
for PY in /opt/python/cp310-cp310/bin/python \
/opt/python/cp311-cp311/bin/python \
/opt/python/cp312-cp312/bin/python \
/opt/python/cp313-cp313/bin/python; do \
uv build --wheel --python "$PY" --out-dir /tmp/raw .; \
done; \
for whl in /tmp/raw/*.whl; do \
auditwheel repair --plat "manylinux_2_28_${arch}" -w /dist "$whl"; \
done

RUN set -eux; \
MX_SKIP_EXT=1 uv build --wheel --sdist \
--python /opt/python/cp313-cp313/bin/python --out-dir /dist .; \
rm -f /dist/.gitignore

FROM scratch AS export
COPY --from=builder /dist/ /
4 changes: 2 additions & 2 deletions docker-compose.yml → docker/docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,8 @@ services:

model-express-server:
build:
context: .
dockerfile: Dockerfile
context: ..
dockerfile: docker/Dockerfile
ports:
- "8001:8001"
environment:
Expand Down
6 changes: 4 additions & 2 deletions docs/ARCHITECTURE.md
Original file line number Diff line number Diff line change
Expand Up @@ -64,8 +64,10 @@ graph TD
ModelExpress/
├── Cargo.toml # Workspace root (4 members)
├── Cargo.lock
├── Dockerfile # Multi-stage production image
├── docker-compose.yml # Single-service dev setup
├── docker/
│ ├── Dockerfile # Multi-stage production image
│ ├── Dockerfile.client-wheel # Builds Python client wheels + sdist
│ └── docker-compose.yml # Single-service dev setup
Comment on lines +67 to +70
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟡 Minor | ⚡ Quick win

Replace the repository tree ASCII block with a Mermaid diagram.

These lines extend an ASCII tree in markdown; this should be represented as Mermaid to meet repo docs standards.

As per coding guidelines, **/*.md: Use mermaid diagrams instead of ASCII art in markdown files.

🤖 Prompt for AI Agents
Verify each finding against current code. Fix only still-valid issues, skip the
rest with a brief reason, keep changes minimal, and validate.

In `@docs/ARCHITECTURE.md` around lines 67 - 70, Replace the ASCII repository tree
block in ARCHITECTURE.md with a Mermaid diagram: remove the 3-line ASCII snippet
starting with "├── docker/" and instead add a Mermaid code block (```mermaid```)
that models the same structure (root node "docker" with child nodes
"Dockerfile", "Dockerfile.client-wheel", and "docker-compose.yml") using a
directed/top-down graph (e.g., graph TD) and include brief labels matching the
original comments; ensure the new block follows markdown fenced code block
syntax and adheres to the repo's mermaid diagram conventions.

├── run_integration_tests.sh # Integration test runner
├── test_client.sh # Client test script
├── test_grpc_transfer_k8s.sh # K8s gRPC transfer test
Expand Down
67 changes: 65 additions & 2 deletions docs/DEPLOYMENT.md
Original file line number Diff line number Diff line change
Expand Up @@ -180,7 +180,7 @@ See [`CLI.md`](CLI.md) for full CLI usage documentation.
The multi-stage Dockerfile builds all binaries (server, CLI, test tools):

```bash
docker build -t model-express .
docker build -f docker/Dockerfile -t model-express .
docker run -p 8001:8001 model-express
```

Expand All @@ -189,9 +189,72 @@ docker run -p 8001:8001 model-express
Single-service setup for local development:

```bash
docker-compose up --build
docker compose -f docker/docker-compose.yml up --build
```

### Python Client Distributions

`docker/Dockerfile.client-wheel` builds the publishable artifacts for the
Python client on top of `quay.io/pypa/manylinux_2_28_${arch}`. Per target
platform it produces:

- `manylinux_2_28_x86_64` or `manylinux_2_28_aarch64` wheels for cp310,
cp311, cp312, cp313 (each compiles the `modelexpress.vmm._alloc_ext` shim
against the matching CPython ABI and is hardened with `auditwheel repair`)
- `py3-none-any` pure-Python wheel built with `MX_SKIP_EXT=1` (no compiled
extension; runtime falls back to the pool-reg path)
- sdist tarball

The Dockerfile is multi-arch. Pick the target with buildx `--platform`:

```bash
# x86_64 only -> ./dist/*.whl, ./dist/*.tar.gz
docker buildx build --platform linux/amd64 \
-f docker/Dockerfile.client-wheel \
--target export --output type=local,dest=./dist .

# arm64 only -> ./dist/*.whl, ./dist/*.tar.gz
docker buildx build --platform linux/arm64 \
-f docker/Dockerfile.client-wheel \
--target export --output type=local,dest=./dist .

# Both at once -> ./dist/linux_amd64/* and ./dist/linux_arm64/*
docker buildx build --platform linux/amd64,linux/arm64 \
-f docker/Dockerfile.client-wheel \
--target export --output type=local,dest=./dist .
```

Cross-platform builds need QEMU emulation registered with buildx
(`docker run --privileged --rm tonistiigi/binfmt --install all` once per
host). Native builds on the matching arch run without emulation.

Without buildx (single arch, matches the host):

```bash
docker build -f docker/Dockerfile.client-wheel -t mx-wheel-builder .
docker run --rm -v "$PWD/dist:/out" mx-wheel-builder cp -r /dist/. /out/
```
Comment on lines +234 to 236
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major | ⚡ Quick win

The non-buildx example will fail with the default export stage image.

At Line 234 the build targets the Dockerfile default final stage (FROM scratch in docker/Dockerfile.client-wheel), so Line 235’s docker run ... cp ... has no cp binary to execute.

Proposed doc fix
-docker build -f docker/Dockerfile.client-wheel -t mx-wheel-builder .
-docker run --rm -v "$PWD/dist:/out" mx-wheel-builder cp -r /dist/. /out/
+docker build -f docker/Dockerfile.client-wheel --target builder -t mx-wheel-builder .
+docker run --rm -v "$PWD/dist:/out" mx-wheel-builder bash -lc 'cp -r /dist/. /out/'
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
docker build -f docker/Dockerfile.client-wheel -t mx-wheel-builder .
docker run --rm -v "$PWD/dist:/out" mx-wheel-builder cp -r /dist/. /out/
```
docker build -f docker/Dockerfile.client-wheel --target builder -t mx-wheel-builder .
docker run --rm -v "$PWD/dist:/out" mx-wheel-builder bash -lc 'cp -r /dist/. /out/'
🤖 Prompt for AI Agents
Verify each finding against current code. Fix only still-valid issues, skip the
rest with a brief reason, keep changes minimal, and validate.

In `@docs/DEPLOYMENT.md` around lines 234 - 236, The docs currently build the
default (scratch) final stage from docker/Dockerfile.client-wheel, so docker run
mx-wheel-builder cp ... fails because scratch has no cp; update the example to
build the stage that contains /dist by adding --target=export (or the
appropriate stage name used in docker/Dockerfile.client-wheel) to the docker
build command (e.g., docker build --target=export -f
docker/Dockerfile.client-wheel -t mx-wheel-builder .) so the subsequent docker
run cp -r /dist/. /out/ works.


#### CI uploads to Artifactory

`.github/workflows/build-wheels.yml` runs this Dockerfile on every PR
(via `copy-pr-bot` mirroring into `pull-request/<pr_id>` branches) and
every push to `main` / `release/**`, building both archs in parallel on
velonix self-hosted runners and uploading the artifacts to NV Artifactory.

Destination layout under `${ARTIFACTORY_PYPI_REPO_NAME}`:

| Event | Subpath |
|---|---|
| `push` to `pull-request/<pr_id>` (copy-pr-bot mirror) | `pr/<pr_id>/<commit_sha>/<run_id>/<run_attempt>/<arch>/` |
| `push` to `main`, `release/**` | `post-merge/<commit_sha>/<run_id>/<run_attempt>/<arch>/` |

Each path contains the 6 artifacts from one arch: 4 manylinux wheels
(cp310-cp313), 1 `py3-none-any` wheel, and 1 sdist. The upload step is
gated on the `automated-release` GitHub environment, which holds three
secrets: `ARTIFACTORY_URL`, `ARTIFACTORY_TOKEN` (JFrog identity token),
and `ARTIFACTORY_PYPI_REPO_NAME`.

### Custom Client Image (P2P Transfers)

For GPU-to-GPU weight transfers with vLLM:
Expand Down
2 changes: 1 addition & 1 deletion examples/dynamo_model_cache_k8s/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@

```bash
# Build the Model Express image
docker build -t model-express:latest .
docker build -f docker/Dockerfile -t model-express:latest .

# Tag for your registry (replace with your registry)
docker tag model-express:latest your-registry/model-express:latest
Expand Down
2 changes: 2 additions & 0 deletions modelexpress_client/python/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,8 @@ def _warn_optional_skip(exc, ext_name="modelexpress.vmm._alloc_ext"):


def _ext_modules():
if os.environ.get("MX_SKIP_EXT"):
return []
extra_compile_args = ["-std=c++17", "-O3", "-fPIC"]
return [
Extension(
Expand Down
2 changes: 1 addition & 1 deletion test_grpc_transfer_k8s.sh
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,7 @@ build_and_load_image() {
cd "$(dirname "$0")"

# Build the image locally
docker build -t "${IMAGE_NAME}:${IMAGE_TAG}" . 2>&1 | tail -5
docker build -f docker/Dockerfile -t "${IMAGE_NAME}:${IMAGE_TAG}" . 2>&1 | tail -5

# Load into kind cluster
log_info "Loading image into kind cluster..."
Expand Down
2 changes: 1 addition & 1 deletion test_multinode_k8s.sh
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ done
# Step 1: Build and distribute image
if [ "$SKIP_BUILD" = false ]; then
echo "Building Docker image modelexpress:multinode-test..."
docker build -t modelexpress:multinode-test .
docker build -f docker/Dockerfile -t modelexpress:multinode-test .

if command -v /snap/bin/microk8s &>/dev/null; then
echo "Distributing image to microk8s cluster nodes..."
Expand Down
Loading