-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathDockerfile.gpu
More file actions
98 lines (76 loc) · 3.21 KB
/
Dockerfile.gpu
File metadata and controls
98 lines (76 loc) · 3.21 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
# PromptMill - GPU/CUDA Build (Multi-stage)
# AI Prompt Generator with local LLM
# =============================================================================
# Stage 1: Build llama-cpp-python with CUDA support
# =============================================================================
FROM nvidia/cuda:12.1.1-devel-ubuntu22.04 AS builder
ENV DEBIAN_FRONTEND=noninteractive
ENV TZ=UTC
RUN apt-get update && apt-get install -y \
python3.12 \
python3.12-venv \
python3.12-dev \
python3-pip \
git \
build-essential \
cmake \
ninja-build \
&& rm -rf /var/lib/apt/lists/*
RUN update-alternatives --install /usr/bin/python python /usr/bin/python3.12 1 \
&& update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.12 1
# Create virtual environment for cleaner copy to runtime
RUN python -m venv /opt/venv
ENV PATH="/opt/venv/bin:$PATH"
# CUDA build environment
ENV CMAKE_ARGS="-DGGML_CUDA=on -DCMAKE_CUDA_COMPILER=/usr/local/cuda/bin/nvcc"
ENV FORCE_CMAKE=1
ENV CUDACXX=/usr/local/cuda/bin/nvcc
ENV CUDA_HOME=/usr/local/cuda
ENV LD_LIBRARY_PATH=/usr/local/cuda/lib64:/usr/local/cuda/lib64/stubs:$LD_LIBRARY_PATH
ENV LIBRARY_PATH=/usr/local/cuda/lib64:/usr/local/cuda/lib64/stubs:$LIBRARY_PATH
RUN ln -sf /usr/local/cuda/lib64/stubs/libcuda.so /usr/local/cuda/lib64/stubs/libcuda.so.1
RUN pip install --no-cache-dir --upgrade pip
# Install Python dependencies
RUN pip install --no-cache-dir \
"huggingface_hub>=0.20.0,<1.0.0" \
"gradio>=4.44.0,<5.0.0" \
"pytest>=7.0.0" \
"pytest-cov>=4.0.0"
# Install llama-cpp-python with CUDA (this is the heavy compile step)
RUN pip install --no-cache-dir llama-cpp-python
# =============================================================================
# Stage 2: Runtime image (smaller, no build tools)
# =============================================================================
FROM nvidia/cuda:12.1.1-runtime-ubuntu22.04
ENV DEBIAN_FRONTEND=noninteractive
ENV TZ=UTC
ENV PYTHONUNBUFFERED=1
# Install only Python runtime (no dev packages)
RUN apt-get update && apt-get install -y \
python3.12 \
python3.12-venv \
&& rm -rf /var/lib/apt/lists/* \
&& update-alternatives --install /usr/bin/python python /usr/bin/python3.12 1 \
&& update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.12 1
# Copy virtual environment from builder
COPY --from=builder /opt/venv /opt/venv
ENV PATH="/opt/venv/bin:$PATH"
WORKDIR /app
# Create non-root user for security
RUN useradd -m -s /bin/bash -u 1000 promptmill
COPY --chown=promptmill:promptmill src/ ./src/
COPY --chown=promptmill:promptmill assets/ ./assets/
COPY --chown=promptmill:promptmill tests/ ./tests/
# Create models directory and set ownership of entire app directory
RUN mkdir -p /app/models && chown -R promptmill:promptmill /app
ENV MODELS_DIR=/app/models
ENV PYTHONPATH=/app/src
ENV HF_HOME=/app/models/.cache
ENV LD_LIBRARY_PATH=/usr/local/cuda/lib64
# Switch to non-root user
USER promptmill
EXPOSE 7610
# Health check using new /health endpoint
HEALTHCHECK --interval=30s --timeout=10s --start-period=60s --retries=3 \
CMD python -c "import urllib.request; urllib.request.urlopen('http://localhost:7610/health')" || exit 1
CMD ["python", "-m", "promptmill"]