llm-stack/.env.example at main · qso-graph/llm-stack · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
# =============================================================================
# qso-graph LLM Stack — Configuration
# =============================================================================
# Copy this file to .env and edit to match your setup.
#
#   cp .env.example .env
#
# =============================================================================

# --- LLM Model ---
# Path to your GGUF model file (relative to ./models/ directory)
# Default: Qwen2.5-7B-Instruct (Q5_K_M) — ~5.5 GB, fits in 16 GB VRAM
LLM_MODEL=Qwen2.5-7B-Instruct-Q5_K_M.gguf

# Context window size (tokens). Larger = more tool schemas fit, more VRAM used.
# 16384 is safe for 16 GB VRAM with a 7B model. Increase if you have headroom.
LLM_CTX_SIZE=16384

# GPU layers to offload. 999 = all layers on GPU (recommended).
# Reduce if you run out of VRAM (try 20-30 for partial offload).
LLM_GPU_LAYERS=999

# Batch size for prompt processing. Default is fine for most setups.
LLM_BATCH_SIZE=512

# CPU threads for non-GPU work. Set to your physical core count.
LLM_THREADS=8

# API key for the llama.cpp endpoint (shared between Open WebUI and llama.cpp)
LLM_API_KEY=sk-local-llm

# --- Open WebUI ---
# Display name shown in the UI
WEBUI_NAME=QSO-Graph AI

# --- IONIS Datasets (optional) ---
# Path to IONIS SQLite datasets. Download from:
# https://sourceforge.net/projects/ionis-ai/files/v1.0/
# Leave empty to skip ionis-mcp (other 5 servers still work)
IONIS_DATA_DIR=

# --- Cloudflare Tunnel (optional) ---
# Set this to expose Open WebUI publicly via Cloudflare Tunnel.
# Leave empty to run locally only.
CLOUDFLARE_TUNNEL_TOKEN=