forked from MODSetter/SurfSense
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathDockerfile.allinone
More file actions
285 lines (238 loc) · 8.92 KB
/
Dockerfile.allinone
File metadata and controls
285 lines (238 loc) · 8.92 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
# SurfSense All-in-One Docker Image
# This image bundles PostgreSQL+pgvector, Redis, Electric SQL, Backend, and Frontend
# Usage: docker run -d -p 3000:3000 -p 8000:8000 -p 5133:5133 -v surfsense-data:/data --name surfsense ghcr.io/modsetter/surfsense:latest
#
# Included Services (all run locally by default):
# - PostgreSQL 14 + pgvector (vector database)
# - Redis (task queue)
# - Electric SQL (real-time sync)
# - Docling (document processing, CPU-only, OCR disabled)
# - Kokoro TTS (local text-to-speech for podcasts)
# - Faster-Whisper (local speech-to-text for audio files)
# - Playwright Chromium (web scraping)
#
# Note: This is the CPU-only version. A :cuda tagged image with GPU support
# will be available in the future for faster AI inference.
# ====================
# Stage 1: Get Electric SQL Binary
# ====================
FROM electricsql/electric:latest AS electric-builder
# ====================
# Stage 2: Build Frontend
# ====================
FROM node:20-alpine AS frontend-builder
WORKDIR /app
# Install pnpm
RUN corepack enable pnpm
# Copy package files
COPY surfsense_web/package.json surfsense_web/pnpm-lock.yaml* ./
COPY surfsense_web/source.config.ts ./
COPY surfsense_web/content ./content
# Install dependencies (skip postinstall which requires all source files)
RUN pnpm install --frozen-lockfile --ignore-scripts
# Copy source
COPY surfsense_web/ ./
# Run fumadocs-mdx postinstall now that source files are available
RUN pnpm fumadocs-mdx
# Build with placeholder values that will be replaced at runtime
# These unique strings allow runtime substitution via entrypoint script
ENV NEXT_PUBLIC_FASTAPI_BACKEND_URL=__NEXT_PUBLIC_FASTAPI_BACKEND_URL__
ENV NEXT_PUBLIC_FASTAPI_BACKEND_AUTH_TYPE=__NEXT_PUBLIC_FASTAPI_BACKEND_AUTH_TYPE__
ENV NEXT_PUBLIC_ETL_SERVICE=__NEXT_PUBLIC_ETL_SERVICE__
ENV NEXT_PUBLIC_ELECTRIC_URL=__NEXT_PUBLIC_ELECTRIC_URL__
ENV NEXT_PUBLIC_ELECTRIC_AUTH_MODE=__NEXT_PUBLIC_ELECTRIC_AUTH_MODE__
ENV NEXT_PUBLIC_DEPLOYMENT_MODE=__NEXT_PUBLIC_DEPLOYMENT_MODE__
# Build
RUN pnpm run build
# ====================
# Stage 3: Runtime Image
# ====================
FROM ubuntu:22.04 AS runtime
# Prevent interactive prompts
ENV DEBIAN_FRONTEND=noninteractive
# Install system dependencies
RUN apt-get update && apt-get install -y --no-install-recommends \
# PostgreSQL
postgresql-14 \
postgresql-contrib-14 \
# Build tools for pgvector
build-essential \
postgresql-server-dev-14 \
git \
# Redis
redis-server \
# Node.js prerequisites
curl \
ca-certificates \
gnupg \
# Backend dependencies
gcc \
wget \
unzip \
dos2unix \
# For PPAs
software-properties-common \
# ============================
# Local TTS (Kokoro) dependencies
# ============================
espeak-ng \
libespeak-ng1 \
# ============================
# Local STT (Faster-Whisper) dependencies
# ============================
ffmpeg \
# ============================
# Audio processing (soundfile)
# ============================
libsndfile1 \
# ============================
# Image/OpenCV dependencies (for Docling)
# ============================
libgl1 \
libglib2.0-0 \
libsm6 \
libxext6 \
libxrender1 \
# ============================
# Playwright browser dependencies
# ============================
libnspr4 \
libnss3 \
libatk1.0-0 \
libatk-bridge2.0-0 \
libcups2 \
libxkbcommon0 \
libatspi2.0-0 \
libxcomposite1 \
libxdamage1 \
libxrandr2 \
libgbm1 \
libcairo2 \
libpango-1.0-0 \
&& rm -rf /var/lib/apt/lists/*
# Install Pandoc 3.x from GitHub (apt ships 2.9 which has broken table rendering).
RUN ARCH=$(dpkg --print-architecture) && \
wget -qO /tmp/pandoc.deb "https://github.com/jgm/pandoc/releases/download/3.9/pandoc-3.9-1-${ARCH}.deb" && \
dpkg -i /tmp/pandoc.deb && \
rm /tmp/pandoc.deb
# Install Node.js 20.x (for running frontend)
RUN curl -fsSL https://deb.nodesource.com/setup_20.x | bash - \
&& apt-get install -y nodejs \
&& rm -rf /var/lib/apt/lists/*
# Install Python 3.12 from deadsnakes PPA
RUN add-apt-repository ppa:deadsnakes/ppa -y \
&& apt-get update \
&& apt-get install -y --no-install-recommends \
python3.12 \
python3.12-venv \
python3.12-dev \
&& rm -rf /var/lib/apt/lists/*
# Set Python 3.12 as default
RUN update-alternatives --install /usr/bin/python python /usr/bin/python3.12 1 \
&& update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.12 1
# Install pip for Python 3.12
RUN python3.12 -m ensurepip --upgrade \
&& python3.12 -m pip install --upgrade pip
# Install supervisor via pip (system package incompatible with Python 3.12)
RUN pip install --no-cache-dir supervisor
# Build and install pgvector
RUN cd /tmp \
&& git clone --branch v0.7.4 https://github.com/pgvector/pgvector.git \
&& cd pgvector \
&& make \
&& make install \
&& rm -rf /tmp/pgvector
# Update certificates
RUN update-ca-certificates
# Create data directories
RUN mkdir -p /data/postgres /data/redis /data/surfsense \
&& chown -R postgres:postgres /data/postgres
# ====================
# Copy Frontend Build
# ====================
WORKDIR /app/frontend
# Copy only the standalone build (not node_modules)
COPY --from=frontend-builder /app/.next/standalone ./
COPY --from=frontend-builder /app/.next/static ./.next/static
COPY --from=frontend-builder /app/public ./public
COPY surfsense_web/content/docs /app/surfsense_web/content/docs
# ====================
# Copy Electric SQL Release
# ====================
COPY --from=electric-builder /app /app/electric-release
# ====================
# Setup Backend
# ====================
WORKDIR /app/backend
# Copy backend dependency files
COPY surfsense_backend/pyproject.toml surfsense_backend/uv.lock ./
# Install PyTorch CPU-only (Docling needs it but OCR is disabled, no GPU needed)
RUN pip install --no-cache-dir torch torchvision --index-url https://download.pytorch.org/whl/cpu
# Install python dependencies
RUN pip install --no-cache-dir certifi pip-system-certs uv \
&& uv pip install --system --no-cache-dir -e .
# Set SSL environment variables
RUN CERTIFI_PATH=$(python -c "import certifi; print(certifi.where())") \
&& echo "export SSL_CERT_FILE=$CERTIFI_PATH" >> /etc/profile.d/ssl.sh \
&& echo "export REQUESTS_CA_BUNDLE=$CERTIFI_PATH" >> /etc/profile.d/ssl.sh
# Note: EasyOCR models NOT downloaded - OCR is disabled in docling_service.py
# GPU support will be added in a future :cuda tagged image
# Install Playwright browsers
RUN pip install --no-cache-dir playwright \
&& playwright install chromium \
&& rm -rf /root/.cache/ms-playwright/ffmpeg*
# Copy backend source
COPY surfsense_backend/ ./
# ====================
# Configuration
# ====================
WORKDIR /app
# Copy supervisor configuration
COPY scripts/docker/supervisor-allinone.conf /etc/supervisor/conf.d/surfsense.conf
# Copy entrypoint script
COPY scripts/docker/entrypoint-allinone.sh /app/entrypoint.sh
RUN dos2unix /app/entrypoint.sh && chmod +x /app/entrypoint.sh
# PostgreSQL initialization script
COPY scripts/docker/init-postgres.sh /app/init-postgres.sh
RUN dos2unix /app/init-postgres.sh && chmod +x /app/init-postgres.sh
# Clean up build dependencies to reduce image size
RUN apt-get purge -y build-essential postgresql-server-dev-14 \
&& apt-get autoremove -y \
&& rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
# Environment variables with defaults
ENV POSTGRES_USER=surfsense
ENV POSTGRES_PASSWORD=surfsense
ENV POSTGRES_DB=surfsense
ENV DATABASE_URL=postgresql+asyncpg://surfsense:surfsense@localhost:5432/surfsense
ENV CELERY_BROKER_URL=redis://localhost:6379/0
ENV CELERY_RESULT_BACKEND=redis://localhost:6379/0
ENV CELERY_TASK_DEFAULT_QUEUE=surfsense
ENV PYTHONPATH=/app/backend
ENV NEXT_FRONTEND_URL=http://localhost:3000
ENV AUTH_TYPE=LOCAL
ENV ETL_SERVICE=DOCLING
ENV EMBEDDING_MODEL=sentence-transformers/all-MiniLM-L6-v2
# Frontend configuration (can be overridden at runtime)
# These are injected into the Next.js build at container startup
ENV NEXT_PUBLIC_FASTAPI_BACKEND_URL=http://localhost:8000
ENV NEXT_PUBLIC_FASTAPI_BACKEND_AUTH_TYPE=LOCAL
ENV NEXT_PUBLIC_ETL_SERVICE=DOCLING
# Electric SQL configuration (ELECTRIC_DATABASE_URL is built dynamically by entrypoint from these values)
ENV ELECTRIC_DB_USER=electric
ENV ELECTRIC_DB_PASSWORD=electric_password
# Note: ELECTRIC_DATABASE_URL is NOT set here - entrypoint builds it dynamically from ELECTRIC_DB_USER/PASSWORD
ENV ELECTRIC_INSECURE=true
ENV ELECTRIC_WRITE_TO_PG_MODE=direct
ENV ELECTRIC_PORT=5133
ENV PORT=5133
ENV NEXT_PUBLIC_ELECTRIC_URL=http://localhost:5133
ENV NEXT_PUBLIC_ELECTRIC_AUTH_MODE=insecure
# Data volume
VOLUME ["/data"]
# Expose ports (Frontend: 3000, Backend: 8000, Electric: 5133)
EXPOSE 3000 8000 5133
# Health check
HEALTHCHECK --interval=30s --timeout=10s --start-period=120s --retries=3 \
CMD curl -f http://localhost:3000 || exit 1
# Run entrypoint
CMD ["/app/entrypoint.sh"]