plotmydata/model-runner.yaml at main · jedick/plotmydata · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
# To review configuration:
# docker compose -f compose.yaml -f model-runner.yaml config
# https://docs.docker.com/compose/how-tos/multiple-compose-files/merge/

# Use !reset to remove values:
# https://docs.docker.com/reference/compose-file/merge/

services:
  agent:
    secrets: !reset []
    environment:
      # Base URL for the model (OpenAI API-compatible endpoint)
      - MODEL_RUNNER_URL=http://model-runner.docker.internal:12434/engines/llama.cpp/v1
      # Model to use (Docker Hub models start with ai/)
      #- MODEL_RUNNER_MODEL=ai/gemma3-qat:12B-Q4_K_M
      # Use instruction-tuned model for better performance
      - MODEL_RUNNER_MODEL=hf.co/unsloth/gemma-3-12b-it-gguf:q8_0
    extra_hosts:
      # Used to make the API endpoint available in the container
      # https://docs.docker.com/ai/model-runner/#what-api-endpoints-are-available
      - "model-runner.docker.internal:host-gateway"
    models:
      - gemma3

models:
  gemma3:
    # Pre-pull the model when starting Docker Model Runner
    #model: ai/gemma3-qat:12B-Q4_K_M
    model: hf.co/unsloth/gemma-3-12b-it-gguf:q8_0
    context_size: 10000

# Reset openai-api-key so that entrypoint.sh configures app to use Model Runner
secrets: !reset []