PromptMill/Dockerfile.gpu at master · kekzl/PromptMill · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
# PromptMill - GPU/CUDA Build (Multi-stage)
# AI Prompt Generator with local LLM

# =============================================================================
# Stage 1: Build llama-cpp-python with CUDA support
# =============================================================================
FROM nvidia/cuda:12.1.1-devel-ubuntu22.04 AS builder

ENV DEBIAN_FRONTEND=noninteractive
ENV TZ=UTC

RUN apt-get update && apt-get install -y \
    python3.12 \
    python3.12-venv \
    python3.12-dev \
    python3-pip \
    git \
    build-essential \
    cmake \
    ninja-build \
    && rm -rf /var/lib/apt/lists/*

RUN update-alternatives --install /usr/bin/python python /usr/bin/python3.12 1 \
    && update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.12 1

# Create virtual environment for cleaner copy to runtime
RUN python -m venv /opt/venv
ENV PATH="/opt/venv/bin:$PATH"

# CUDA build environment
ENV CMAKE_ARGS="-DGGML_CUDA=on -DCMAKE_CUDA_COMPILER=/usr/local/cuda/bin/nvcc"
ENV FORCE_CMAKE=1
ENV CUDACXX=/usr/local/cuda/bin/nvcc
ENV CUDA_HOME=/usr/local/cuda
ENV LD_LIBRARY_PATH=/usr/local/cuda/lib64:/usr/local/cuda/lib64/stubs:$LD_LIBRARY_PATH
ENV LIBRARY_PATH=/usr/local/cuda/lib64:/usr/local/cuda/lib64/stubs:$LIBRARY_PATH

RUN ln -sf /usr/local/cuda/lib64/stubs/libcuda.so /usr/local/cuda/lib64/stubs/libcuda.so.1

RUN pip install --no-cache-dir --upgrade pip

# Install Python dependencies
RUN pip install --no-cache-dir \
    "huggingface_hub>=0.20.0,<1.0.0" \
    "gradio>=4.44.0,<5.0.0" \
    "pytest>=7.0.0" \
    "pytest-cov>=4.0.0"

# Install llama-cpp-python with CUDA (this is the heavy compile step)
RUN pip install --no-cache-dir llama-cpp-python

# =============================================================================
# Stage 2: Runtime image (smaller, no build tools)
# =============================================================================
FROM nvidia/cuda:12.1.1-runtime-ubuntu22.04

ENV DEBIAN_FRONTEND=noninteractive
ENV TZ=UTC
ENV PYTHONUNBUFFERED=1

# Install only Python runtime (no dev packages)
RUN apt-get update && apt-get install -y \
    python3.12 \
    python3.12-venv \
    && rm -rf /var/lib/apt/lists/* \
    && update-alternatives --install /usr/bin/python python /usr/bin/python3.12 1 \
    && update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.12 1

# Copy virtual environment from builder
COPY --from=builder /opt/venv /opt/venv
ENV PATH="/opt/venv/bin:$PATH"

WORKDIR /app

# Create non-root user for security
RUN useradd -m -s /bin/bash -u 1000 promptmill

COPY --chown=promptmill:promptmill src/ ./src/
COPY --chown=promptmill:promptmill assets/ ./assets/
COPY --chown=promptmill:promptmill tests/ ./tests/

# Create models directory and set ownership of entire app directory
RUN mkdir -p /app/models && chown -R promptmill:promptmill /app
ENV MODELS_DIR=/app/models
ENV PYTHONPATH=/app/src
ENV HF_HOME=/app/models/.cache
ENV LD_LIBRARY_PATH=/usr/local/cuda/lib64

# Switch to non-root user
USER promptmill

EXPOSE 7610

# Health check using new /health endpoint
HEALTHCHECK --interval=30s --timeout=10s --start-period=60s --retries=3 \
    CMD python -c "import urllib.request; urllib.request.urlopen('http://localhost:7610/health')" || exit 1

CMD ["python", "-m", "promptmill"]