diff --git a/.github/workflows/build-wheels.yml b/.github/workflows/build-wheels.yml new file mode 100644 index 00000000..d2dd3992 --- /dev/null +++ b/.github/workflows/build-wheels.yml @@ -0,0 +1,97 @@ +# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Builds the modelexpress Python client wheels via docker/Dockerfile.client-wheel +# and uploads the resulting artifacts to NV Artifactory. +# +# Triggered on: +# - push to pull-request/ (mirrored from contributor PRs by +# copy-pr-bot): lands under pr////// +# - push to main and release/**: lands under +# post-merge///// +# +# copy-pr-bot mirrors PRs (including fork PRs) into the pull-request/ +# branch namespace inside this repo, which lets the workflow access +# environment-scoped secrets that fork PRs can't reach directly. +# +# Runs in parallel to (does not gate on) the rest of CI. Upload is +# environment-gated on `automated-release`. + +name: Build & upload client wheels + +on: + push: + branches: + - 'pull-request/*' + - main + - 'release/**' + +concurrency: + group: build-wheels-${{ github.ref }} + cancel-in-progress: ${{ startsWith(github.ref, 'refs/heads/pull-request/') }} + +permissions: + contents: read + +jobs: + build-and-upload: + name: Build & upload wheels (${{ matrix.arch }}) + environment: automated-release + runs-on: ${{ matrix.runner }} + strategy: + fail-fast: false + matrix: + include: + - arch: amd64 + runner: prod-modelexpress-builder-amd-v1 + platform: linux/amd64 + - arch: arm64 + runner: prod-modelexpress-builder-arm-v1 + platform: linux/arm64 + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + + - name: Compute artifactory subpath + id: dest + run: | + case "${GITHUB_REF_NAME}" in + pull-request/*) + PR_ID="${GITHUB_REF_NAME#pull-request/}" + SUBPATH="pr/${PR_ID}/${{ github.sha }}/${{ github.run_id }}/${{ github.run_attempt }}/${{ matrix.arch }}" + ;; + *) + SUBPATH="post-merge/${{ github.sha }}/${{ github.run_id }}/${{ github.run_attempt }}/${{ matrix.arch }}" + ;; + esac + echo "subpath=${SUBPATH}" >> "$GITHUB_OUTPUT" + echo "Destination subpath: ${SUBPATH}" + + - name: Build wheels + run: | + mkdir -p dist + docker buildx build \ + --platform ${{ matrix.platform }} \ + -f docker/Dockerfile.client-wheel \ + --target export \ + --output type=local,dest=./dist \ + . + echo "Built artifacts:" + ls -la dist/ + + - name: Set up JFrog CLI + uses: jfrog/setup-jfrog-cli@v4 + env: + JF_URL: ${{ secrets.ARTIFACTORY_URL }} + JF_ACCESS_TOKEN: ${{ secrets.ARTIFACTORY_TOKEN }} + + - name: Upload to Artifactory + run: | + DEST="${{ secrets.ARTIFACTORY_PYPI_REPO_NAME }}/${{ steps.dest.outputs.subpath }}/" + echo "Uploading to: ${DEST}" + jf rt upload --flat "dist/*.whl" "${DEST}" + jf rt upload --flat "dist/*.tar.gz" "${DEST}" diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index d3287be4..db3dc869 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -292,7 +292,7 @@ jobs: - name: Build Docker image run: | - docker build -t model-express:latest . + docker build -f docker/Dockerfile -t model-express:latest . - name: Test Docker image run: | diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index e3954722..b13b6f1d 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -145,10 +145,10 @@ Cache directory resolution order: `MODEL_EXPRESS_CACHE_DIRECTORY` -> `HF_HUB_CAC ```bash # Build production image -docker build -t model-express . +docker build -f docker/Dockerfile -t model-express . # Run with docker-compose -docker-compose up --build +docker compose -f docker/docker-compose.yml up --build # Build P2P client image docker build -f examples/p2p_transfer_k8s/Dockerfile.client \ diff --git a/README.md b/README.md index 346c1bf6..b9bca635 100644 --- a/README.md +++ b/README.md @@ -162,7 +162,7 @@ Load model weights directly from Azure Blob Storage, S3, or a PVC-backed local p ### Docker ```bash -docker-compose up --build +docker compose -f docker/docker-compose.yml up --build ``` --- diff --git a/Dockerfile b/docker/Dockerfile similarity index 100% rename from Dockerfile rename to docker/Dockerfile diff --git a/docker/Dockerfile.client-wheel b/docker/Dockerfile.client-wheel new file mode 100644 index 00000000..ed4fd8a7 --- /dev/null +++ b/docker/Dockerfile.client-wheel @@ -0,0 +1,53 @@ +# syntax=docker/dockerfile:1.7 +# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Builds the modelexpress Python client distributions for the target +# platform (linux/amd64 or linux/arm64): +# - manylinux_2_28 wheels for cp310, cp311, cp312, cp313 (each compiles +# the modelexpress.vmm._alloc_ext shim against the matching CPython ABI +# and is hardened with `auditwheel repair`) +# - py3-none-any pure-Python wheel built with MX_SKIP_EXT=1 (no compiled +# extension; runtime falls back to the pool-reg path) +# - sdist tarball (.tar.gz) +# +# Multi-arch: pick the platform with buildx --platform. See +# docs/DEPLOYMENT.md for the full invocation examples. + +FROM quay.io/pypa/manylinux_2_28_x86_64 AS builder-amd64 +FROM quay.io/pypa/manylinux_2_28_aarch64 AS builder-arm64 + +ARG TARGETARCH +FROM builder-${TARGETARCH} AS builder + +WORKDIR /src +COPY modelexpress_client/python /src + +ENV PIP_DISABLE_PIP_VERSION_CHECK=1 \ + PIP_NO_CACHE_DIR=1 \ + PATH="/opt/python/cp313-cp313/bin:${PATH}" + +# uv drives the per-Python builds (PEP 517, with build isolation). +# auditwheel only needs to live in one Python; it operates on built wheels. +RUN /opt/python/cp313-cp313/bin/pip install --upgrade uv auditwheel + +RUN set -eux; \ + arch=$(uname -m); \ + mkdir -p /tmp/raw /dist; \ + for PY in /opt/python/cp310-cp310/bin/python \ + /opt/python/cp311-cp311/bin/python \ + /opt/python/cp312-cp312/bin/python \ + /opt/python/cp313-cp313/bin/python; do \ + uv build --wheel --python "$PY" --out-dir /tmp/raw .; \ + done; \ + for whl in /tmp/raw/*.whl; do \ + auditwheel repair --plat "manylinux_2_28_${arch}" -w /dist "$whl"; \ + done + +RUN set -eux; \ + MX_SKIP_EXT=1 uv build --wheel --sdist \ + --python /opt/python/cp313-cp313/bin/python --out-dir /dist .; \ + rm -f /dist/.gitignore + +FROM scratch AS export +COPY --from=builder /dist/ / diff --git a/docker-compose.yml b/docker/docker-compose.yml similarity index 92% rename from docker-compose.yml rename to docker/docker-compose.yml index 9da2adc4..a0790f83 100644 --- a/docker-compose.yml +++ b/docker/docker-compose.yml @@ -15,8 +15,8 @@ services: model-express-server: build: - context: . - dockerfile: Dockerfile + context: .. + dockerfile: docker/Dockerfile ports: - "8001:8001" environment: diff --git a/docs/ARCHITECTURE.md b/docs/ARCHITECTURE.md index 717e2bbe..beb63e86 100644 --- a/docs/ARCHITECTURE.md +++ b/docs/ARCHITECTURE.md @@ -64,8 +64,10 @@ graph TD ModelExpress/ ├── Cargo.toml # Workspace root (4 members) ├── Cargo.lock -├── Dockerfile # Multi-stage production image -├── docker-compose.yml # Single-service dev setup +├── docker/ +│ ├── Dockerfile # Multi-stage production image +│ ├── Dockerfile.client-wheel # Builds Python client wheels + sdist +│ └── docker-compose.yml # Single-service dev setup ├── run_integration_tests.sh # Integration test runner ├── test_client.sh # Client test script ├── test_grpc_transfer_k8s.sh # K8s gRPC transfer test diff --git a/docs/DEPLOYMENT.md b/docs/DEPLOYMENT.md index e00e0df0..92b39172 100644 --- a/docs/DEPLOYMENT.md +++ b/docs/DEPLOYMENT.md @@ -180,7 +180,7 @@ See [`CLI.md`](CLI.md) for full CLI usage documentation. The multi-stage Dockerfile builds all binaries (server, CLI, test tools): ```bash -docker build -t model-express . +docker build -f docker/Dockerfile -t model-express . docker run -p 8001:8001 model-express ``` @@ -189,9 +189,72 @@ docker run -p 8001:8001 model-express Single-service setup for local development: ```bash -docker-compose up --build +docker compose -f docker/docker-compose.yml up --build ``` +### Python Client Distributions + +`docker/Dockerfile.client-wheel` builds the publishable artifacts for the +Python client on top of `quay.io/pypa/manylinux_2_28_${arch}`. Per target +platform it produces: + +- `manylinux_2_28_x86_64` or `manylinux_2_28_aarch64` wheels for cp310, + cp311, cp312, cp313 (each compiles the `modelexpress.vmm._alloc_ext` shim + against the matching CPython ABI and is hardened with `auditwheel repair`) +- `py3-none-any` pure-Python wheel built with `MX_SKIP_EXT=1` (no compiled + extension; runtime falls back to the pool-reg path) +- sdist tarball + +The Dockerfile is multi-arch. Pick the target with buildx `--platform`: + +```bash +# x86_64 only -> ./dist/*.whl, ./dist/*.tar.gz +docker buildx build --platform linux/amd64 \ + -f docker/Dockerfile.client-wheel \ + --target export --output type=local,dest=./dist . + +# arm64 only -> ./dist/*.whl, ./dist/*.tar.gz +docker buildx build --platform linux/arm64 \ + -f docker/Dockerfile.client-wheel \ + --target export --output type=local,dest=./dist . + +# Both at once -> ./dist/linux_amd64/* and ./dist/linux_arm64/* +docker buildx build --platform linux/amd64,linux/arm64 \ + -f docker/Dockerfile.client-wheel \ + --target export --output type=local,dest=./dist . +``` + +Cross-platform builds need QEMU emulation registered with buildx +(`docker run --privileged --rm tonistiigi/binfmt --install all` once per +host). Native builds on the matching arch run without emulation. + +Without buildx (single arch, matches the host): + +```bash +docker build -f docker/Dockerfile.client-wheel -t mx-wheel-builder . +docker run --rm -v "$PWD/dist:/out" mx-wheel-builder cp -r /dist/. /out/ +``` + +#### CI uploads to Artifactory + +`.github/workflows/build-wheels.yml` runs this Dockerfile on every PR +(via `copy-pr-bot` mirroring into `pull-request/` branches) and +every push to `main` / `release/**`, building both archs in parallel on +velonix self-hosted runners and uploading the artifacts to NV Artifactory. + +Destination layout under `${ARTIFACTORY_PYPI_REPO_NAME}`: + +| Event | Subpath | +|---|---| +| `push` to `pull-request/` (copy-pr-bot mirror) | `pr//////` | +| `push` to `main`, `release/**` | `post-merge/////` | + +Each path contains the 6 artifacts from one arch: 4 manylinux wheels +(cp310-cp313), 1 `py3-none-any` wheel, and 1 sdist. The upload step is +gated on the `automated-release` GitHub environment, which holds three +secrets: `ARTIFACTORY_URL`, `ARTIFACTORY_TOKEN` (JFrog identity token), +and `ARTIFACTORY_PYPI_REPO_NAME`. + ### Custom Client Image (P2P Transfers) For GPU-to-GPU weight transfers with vLLM: diff --git a/examples/dynamo_model_cache_k8s/README.md b/examples/dynamo_model_cache_k8s/README.md index 4827526a..b2585f5d 100644 --- a/examples/dynamo_model_cache_k8s/README.md +++ b/examples/dynamo_model_cache_k8s/README.md @@ -13,7 +13,7 @@ ```bash # Build the Model Express image - docker build -t model-express:latest . + docker build -f docker/Dockerfile -t model-express:latest . # Tag for your registry (replace with your registry) docker tag model-express:latest your-registry/model-express:latest diff --git a/modelexpress_client/python/setup.py b/modelexpress_client/python/setup.py index a8830c1b..7d7a03a8 100644 --- a/modelexpress_client/python/setup.py +++ b/modelexpress_client/python/setup.py @@ -77,6 +77,8 @@ def _warn_optional_skip(exc, ext_name="modelexpress.vmm._alloc_ext"): def _ext_modules(): + if os.environ.get("MX_SKIP_EXT"): + return [] extra_compile_args = ["-std=c++17", "-O3", "-fPIC"] return [ Extension( diff --git a/test_grpc_transfer_k8s.sh b/test_grpc_transfer_k8s.sh index 2c1ed1fd..aeac4b2d 100644 --- a/test_grpc_transfer_k8s.sh +++ b/test_grpc_transfer_k8s.sh @@ -149,7 +149,7 @@ build_and_load_image() { cd "$(dirname "$0")" # Build the image locally - docker build -t "${IMAGE_NAME}:${IMAGE_TAG}" . 2>&1 | tail -5 + docker build -f docker/Dockerfile -t "${IMAGE_NAME}:${IMAGE_TAG}" . 2>&1 | tail -5 # Load into kind cluster log_info "Loading image into kind cluster..." diff --git a/test_multinode_k8s.sh b/test_multinode_k8s.sh index 96fd01ea..7e99f88e 100755 --- a/test_multinode_k8s.sh +++ b/test_multinode_k8s.sh @@ -47,7 +47,7 @@ done # Step 1: Build and distribute image if [ "$SKIP_BUILD" = false ]; then echo "Building Docker image modelexpress:multinode-test..." - docker build -t modelexpress:multinode-test . + docker build -f docker/Dockerfile -t modelexpress:multinode-test . if command -v /snap/bin/microk8s &>/dev/null; then echo "Distributing image to microk8s cluster nodes..."