-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmodel-runner.yaml
More file actions
33 lines (29 loc) · 1.2 KB
/
model-runner.yaml
File metadata and controls
33 lines (29 loc) · 1.2 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
# To review configuration:
# docker compose -f compose.yaml -f model-runner.yaml config
# https://docs.docker.com/compose/how-tos/multiple-compose-files/merge/
# Use !reset to remove values:
# https://docs.docker.com/reference/compose-file/merge/
services:
agent:
secrets: !reset []
environment:
# Base URL for the model (OpenAI API-compatible endpoint)
- MODEL_RUNNER_URL=http://model-runner.docker.internal:12434/engines/llama.cpp/v1
# Model to use (Docker Hub models start with ai/)
#- MODEL_RUNNER_MODEL=ai/gemma3-qat:12B-Q4_K_M
# Use instruction-tuned model for better performance
- MODEL_RUNNER_MODEL=hf.co/unsloth/gemma-3-12b-it-gguf:q8_0
extra_hosts:
# Used to make the API endpoint available in the container
# https://docs.docker.com/ai/model-runner/#what-api-endpoints-are-available
- "model-runner.docker.internal:host-gateway"
models:
- gemma3
models:
gemma3:
# Pre-pull the model when starting Docker Model Runner
#model: ai/gemma3-qat:12B-Q4_K_M
model: hf.co/unsloth/gemma-3-12b-it-gguf:q8_0
context_size: 10000
# Reset openai-api-key so that entrypoint.sh configures app to use Model Runner
secrets: !reset []