-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathDockerfile
More file actions
131 lines (114 loc) · 4.38 KB
/
Dockerfile
File metadata and controls
131 lines (114 loc) · 4.38 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
# Unified Dockerfile for Docs2Synth
# Supports CPU-only and GPU builds using build arguments
#
# Usage:
# CPU: docker build --build-arg BUILD_TYPE=cpu -t docs2synth:cpu .
# GPU: docker build --build-arg BUILD_TYPE=gpu -t docs2synth:gpu .
# =============================================================================
# Build Arguments
# =============================================================================
ARG BUILD_TYPE=cpu
ARG PYTHON_VERSION=3.11
# =============================================================================
# Base Image Selection
# =============================================================================
# For GPU builds, use NVIDIA CUDA base image; otherwise use Python slim
FROM nvidia/cuda:11.8.0-cudnn8-runtime-ubuntu22.04 AS base-gpu
FROM python:${PYTHON_VERSION}-slim AS base-cpu
# Select the appropriate base
FROM base-${BUILD_TYPE} AS base
# =============================================================================
# System Setup
# =============================================================================
WORKDIR /app
# Environment variables
ENV PYTHONUNBUFFERED=1 \
PYTHONDONTWRITEBYTECODE=1 \
PIP_NO_CACHE_DIR=1 \
PIP_DISABLE_PIP_VERSION_CHECK=1 \
DEBIAN_FRONTEND=noninteractive
# =============================================================================
# System Dependencies - GPU specific setup
# =============================================================================
FROM base AS system-gpu
# Install Python 3.11 and dependencies on Ubuntu (GPU base image doesn't have Python)
# PaddleOCR requires OpenCV which needs graphics libraries
RUN apt-get update && \
apt-get install -y --no-install-recommends \
software-properties-common \
git \
build-essential \
libgomp1 \
libglib2.0-0 \
libsm6 \
libxext6 \
libxrender1 \
libgl1 \
wget && \
add-apt-repository ppa:deadsnakes/ppa && \
apt-get update && \
apt-get install -y --no-install-recommends \
python3.11 \
python3.11-dev \
python3.11-distutils && \
wget https://bootstrap.pypa.io/get-pip.py && \
python3.11 get-pip.py && \
rm get-pip.py && \
ln -sf /usr/bin/python3.11 /usr/bin/python3 && \
ln -sf /usr/bin/python3.11 /usr/bin/python && \
python --version && \
apt-get clean && \
rm -rf /var/lib/apt/lists/*
# =============================================================================
# System Dependencies - CPU build
# =============================================================================
FROM base AS system-cpu
# Dependencies for CPU-based processing
RUN apt-get update && \
apt-get install -y --no-install-recommends \
git \
build-essential \
libgomp1 \
libglib2.0-0 \
libsm6 \
libxext6 \
libxrender1 \
libgl1 && \
apt-get clean && \
rm -rf /var/lib/apt/lists/*
# =============================================================================
# Final Stage - Common for all build types
# =============================================================================
FROM system-${BUILD_TYPE} AS final
# Re-declare build arg for this stage
ARG BUILD_TYPE=cpu
# Copy source code
COPY pyproject.toml README.md ./
COPY docs2synth/ ./docs2synth/
COPY tests/ ./tests/
COPY scripts/ ./scripts/
# Install Python dependencies based on BUILD_TYPE
RUN pip install --upgrade pip && \
if [ "$BUILD_TYPE" = "gpu" ]; then \
echo "Installing PyTorch with CUDA 11.8 support"; \
pip install torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cu118; \
echo "Installing Docs2Synth with GPU extras"; \
pip install -e ".[gpu,dev]"; \
else \
echo "Installing Docs2Synth with CPU extras"; \
pip install -e ".[cpu,dev]"; \
fi
# Create directories for data, logs, and PaddleX cache
# Set permissions to allow any user to write (for non-root users)
RUN mkdir -p /app/data /app/logs /app/.paddlex /app/.cache /tmp && \
chmod -R 777 /app/data /app/logs /app/.paddlex /app/.cache /tmp
# Set PaddleX cache directories to writable locations
ENV PADDLEX_HOME=/app/.paddlex \
XDG_CACHE_HOME=/app/.cache \
HOME=/app
# Add build type label
LABEL build_type="${BUILD_TYPE}"
LABEL maintainer="AI4WA"
LABEL description="Docs2Synth - Document processing and synthesis toolkit"
# Default command
CMD ["/bin/bash"]