-
Notifications
You must be signed in to change notification settings - Fork 18
Expand file tree
/
Copy pathDockerfile
More file actions
135 lines (107 loc) · 5.56 KB
/
Dockerfile
File metadata and controls
135 lines (107 loc) · 5.56 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
# Dockerfile for Pie with CUDA support (Multi-stage build)
# Supports specific verified CUDA/PyTorch combinations only
# See scripts/build_docker_images.sh for supported versions
ARG CUDA_VERSION=12.6
ARG CUDA_MINOR=1
ARG PYTORCH_CUDA=cu126
# ============================================================================
# Stage 1: Builder - Build all components with full development toolchain
# ============================================================================
FROM nvidia/cuda:${CUDA_VERSION}.${CUDA_MINOR}-devel-ubuntu24.04 AS builder
# Re-declare args after FROM
ARG CUDA_VERSION
ARG PYTORCH_CUDA
ENV DEBIAN_FRONTEND=noninteractive \
CARGO_HOME=/usr/local/cargo \
RUSTUP_HOME=/usr/local/rustup \
PYTHONUNBUFFERED=1 \
PIE_HOME=/root/.cache/pie \
TORCH_EXTENSIONS_DIR=/root/.cache/torch_extensions \
PATH="/workspace/engine/backend-python/.venv/bin:/usr/local/cargo/bin:/root/.local/bin:${PATH}"
# Install all build dependencies
RUN apt-get update && apt-get install -y \
git cmake ninja-build curl wget build-essential pkg-config \
libzmq3-dev libcbor-dev libzstd-dev \
libssl-dev \
python3.12 python3.12-dev python3-pip python3.12-venv \
&& rm -rf /var/lib/apt/lists/* \
&& curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y --default-toolchain stable \
&& . $CARGO_HOME/env \
&& rustup target add wasm32-wasip2 \
&& curl -LsSf https://astral.sh/uv/install.sh | sh
WORKDIR /workspace
COPY . .
# Build CUDA backend (disabled - using Python backend only)
# RUN cd backend/backend-cuda && mkdir -p build && cd build \
# && cmake .. -G Ninja -DCMAKE_BUILD_TYPE=Release -DCMAKE_CUDA_ARCHITECTURES="80;86;89;90" \
# && ninja
# Install PIE CLI globally
RUN cd runtime && cargo install --path .
# Build example inferlets
RUN cd sdk/inferlet-examples && cargo build --target wasm32-wasip2 --release
# Setup Python backend with flashinfer (using verified PyTorch CUDA version)
RUN cd engine/backend-python \
&& uv venv \
&& . .venv/bin/activate \
&& uv pip install flashinfer-python==0.3.1 \
&& uv pip install torch torchvision --index-url https://download.pytorch.org/whl/${PYTORCH_CUDA} --force-reinstall \
&& uv pip install triton \
&& uv pip install -e ".[cuda,debug]" \
&& uv pip install ninja
# ============================================================================
# Stage 2: Development - Keep full builder stage for development
# ============================================================================
FROM builder AS development
# Copy entrypoint script for auth setup
COPY scripts/docker-entrypoint.sh /usr/local/bin/docker-entrypoint.sh
RUN chmod +x /usr/local/bin/docker-entrypoint.sh
# Set entrypoint to handle auth setup before starting server
ENTRYPOINT ["/usr/local/bin/docker-entrypoint.sh"]
# Default command: start PIE server for development (allows pie-cli connections)
CMD ["pie", "serve", "--config", "/workspace/runtime/docker_config.toml"]
# ============================================================================
# Stage 3: Runtime - Use devel image for FlashInfer JIT compilation support
# ============================================================================
# Note: FlashInfer requires CUDA development tools (nvcc, headers) for runtime
# JIT compilation. Using devel base is simpler, reliable, and easy to maintain,
# comapred with manually installing specific -dev packages which have complex
# version dependencies.
FROM nvidia/cuda:${CUDA_VERSION}.${CUDA_MINOR}-devel-ubuntu24.04
# Re-declare args after FROM
ARG CUDA_VERSION
ARG PYTORCH_CUDA
ENV DEBIAN_FRONTEND=noninteractive \
PYTHONUNBUFFERED=1 \
PIE_HOME=/root/.cache/pie \
TORCH_EXTENSIONS_DIR=/root/.cache/torch_extensions \
PATH="/workspace/engine/backend-python/.venv/bin:/usr/local/bin:${PATH}"
# Install only runtime dependencies (CUDA dev tools already in devel base)
RUN apt-get update && apt-get install -y \
python3.12 python3-pip python3.12-venv \
libzmq5 libcbor0.10 libzstd1 libssl3 \
curl wget \
&& rm -rf /var/lib/apt/lists/*
WORKDIR /workspace
# Copy PIE CLI binary and uv from builder
COPY --from=builder /usr/local/cargo/bin/pie /usr/local/bin/pie
COPY --from=builder /root/.local/bin/uv /usr/local/bin/uv
# Copy CUDA backend binary (disabled - using Python backend only)
# COPY --from=builder /workspace/backend/backend-cuda/build/bin/pie_cuda_be /workspace/backend/backend-cuda/build/bin/pie_cuda_be
# Copy Python virtual environment
COPY --from=builder /workspace/engine/backend-python/.venv /workspace/engine/backend-python/.venv
# Copy Python backend source code (exclude cache, build, and temp files)
COPY --from=builder /workspace/engine/backend-python/ /workspace/engine/backend-python/
RUN find /workspace/engine/backend-python -name "__pycache__" -type d -exec rm -rf {} + || true && \
find /workspace/engine/backend-python -name "*.pyc" -delete || true && \
rm -rf /workspace/engine/backend-python/build || true
# Copy example inferlets
COPY --from=builder /workspace/sdk/inferlet-examples/target/wasm32-wasip2/release/*.wasm /workspace/sdk/inferlet-examples/
# Copy configuration file
COPY --from=builder /workspace/runtime/docker_config.toml /workspace/runtime/docker_config.toml
# Copy entrypoint script for auth setup
COPY scripts/docker-entrypoint.sh /usr/local/bin/docker-entrypoint.sh
RUN chmod +x /usr/local/bin/docker-entrypoint.sh
# Set entrypoint to handle auth setup before starting server
ENTRYPOINT ["/usr/local/bin/docker-entrypoint.sh"]
# Default command: start PIE server
CMD ["pie", "serve", "--config", "/workspace/runtime/docker_config.toml"]