JustInternetAI · AndrewBMadison · Jan 22, 2026 · Nov 20, 2025 · Nov 20, 2025 · Nov 20, 2025
diff --git a/.github/workflows/python-tests.yml b/.github/workflows/python-tests.yml
@@ -28,18 +28,19 @@ jobs:
       uses: actions/cache@v3
       with:
         path: ~/.cache/pip
-        key: ${{ runner.os }}-pip-${{ hashFiles('python/requirements.txt') }}
+        key: ${{ runner.os }}-pip-${{ hashFiles('python/requirements-ci.txt') }}
         restore-keys: |
           ${{ runner.os }}-pip-
 
     - name: Install dependencies
       run: |
         python -m pip install --upgrade pip
-        pip install -e .[dev]
+        pip install -r python/requirements-ci.txt
 
     - name: Run tests
-      run: |
-        pytest -v --cov-report=xml --cov-report=term
+      run: pytest tests -v --cov-report=xml --cov-report=term --ignore=tests/test_llama_cpp_backend.py --ignore=tests/test_vllm_backend.py
+      env:
+        PYTHONPATH: ${{ github.workspace }}/python
 
     - name: Upload coverage to Codecov
       uses: codecov/codecov-action@v3
@@ -60,7 +61,7 @@ jobs:
 
     - name: Install package and linting tools
       run: |
-        pip install -e .[dev]
+        pip install -r python/requirements-ci.txt
 
     - name: Run black (format check)
       run: |

diff --git a/README.md b/README.md
@@ -15,6 +15,7 @@ nMaintained by [JustInternetAI](https://github.com/JustInternetAI)
 ### Core
 - **Godot C++ Module**: Deterministic tick loop, event bus, navigation, sensors, stable replay logs
 - **Agent Runtime**: Adapters for llama.cpp, TensorRT-LLM, vLLM with function-calling tool API
+- **Model Management**: Automated LLM model downloading from Hugging Face Hub with caching and verification
 - **Tool System**: World querying (vision rays, inventories), pathfinding, crafting actions via JSON schemas
 - **Memory & RAG**: Short-term scratchpad + long-term vector store with episode summaries
 - **Benchmark Scenes**: 3 sandbox environments (foraging, crafting chain, team capture) with metrics
@@ -130,6 +131,26 @@ agent-arena/
 
 See [docs/quickstart.md](docs/quickstart.md) for a tutorial on creating your first agent-driven scene.
 
+### Model Management
+
+Agent Arena includes a built-in tool to download and manage LLM models from Hugging Face Hub:
+
+```bash
+# Download a model for testing
+cd python
+python -m tools.model_manager download tinyllama-1.1b-chat --format gguf --quant q4_k_m
+
+# List available models in registry
+python -m tools.model_manager info
+
+# List downloaded models
+python -m tools.model_manager list
+```
+
+Supported models include TinyLlama (1.1B), Phi-2 (2.7B), Llama-2 (7B/13B), Mistral (7B), Llama-3 (8B), and Mixtral (8x7B).
+
+For detailed documentation on model management, see [docs/model_management.md](docs/model_management.md).
+
 ## Development Roadmap
 
 - [ ] Phase 1: Core infrastructure (deterministic sim, event bus, basic tools)

diff --git a/configs/models.yaml b/configs/models.yaml
@@ -0,0 +1,159 @@
+# Model Registry for Agent Arena
+# This file defines available models and their Hugging Face Hub sources
+
+models:
+  # Small models for development and testing
+  tinyllama-1.1b-chat:
+    huggingface_id: "TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF"
+    description: "Extremely fast, basic capabilities, great for testing"
+    size_class: "tiny"
+    formats:
+      gguf:
+        q4_k_m:
+          file: "tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf"
+          sha256: null  # Checksums can be added for verification
+        q5_k_m:
+          file: "tinyllama-1.1b-chat-v1.0.Q5_K_M.gguf"
+          sha256: null
+        q8_0:
+          file: "tinyllama-1.1b-chat-v1.0.Q8_0.gguf"
+          sha256: null
+
+  phi-2:
+    huggingface_id: "TheBloke/phi-2-GGUF"
+    description: "Fast, good reasoning for 2.7B size, excellent for development"
+    size_class: "small"
+    formats:
+      gguf:
+        q4_k_m:
+          file: "phi-2.Q4_K_M.gguf"
+          sha256: null
+        q5_k_m:
+          file: "phi-2.Q5_K_M.gguf"
+          sha256: null
+        q8_0:
+          file: "phi-2.Q8_0.gguf"
+          sha256: null
+
+  # Production-ready 7B models
+  llama-2-7b-chat:
+    huggingface_id: "TheBloke/Llama-2-7B-Chat-GGUF"
+    description: "Good balance of speed and quality, widely tested"
+    size_class: "medium"
+    formats:
+      gguf:
+        q4_k_m:
+          file: "llama-2-7b-chat.Q4_K_M.gguf"
+          sha256: null
+        q5_k_m:
+          file: "llama-2-7b-chat.Q5_K_M.gguf"
+          sha256: null
+        q8_0:
+          file: "llama-2-7b-chat.Q8_0.gguf"
+          sha256: null
+
+  mistral-7b-instruct-v0.2:
+    huggingface_id: "TheBloke/Mistral-7B-Instruct-v0.2-GGUF"
+    description: "High quality instruction following, fast inference"
+    size_class: "medium"
+    formats:
+      gguf:
+        q4_k_m:
+          file: "mistral-7b-instruct-v0.2.Q4_K_M.gguf"
+          sha256: null
+        q5_k_m:
+          file: "mistral-7b-instruct-v0.2.Q5_K_M.gguf"
+          sha256: null
+        q8_0:
+          file: "mistral-7b-instruct-v0.2.Q8_0.gguf"
+          sha256: null
+
+  llama-3-8b-instruct:
+    huggingface_id: "QuantFactory/Meta-Llama-3-8B-Instruct-GGUF"
+    description: "Latest Llama 3, best quality in 8B class"
+    size_class: "medium"
+    formats:
+      gguf:
+        q4_k_m:
+          file: "Meta-Llama-3-8B-Instruct.Q4_K_M.gguf"
+          sha256: null
+        q5_k_m:
+          file: "Meta-Llama-3-8B-Instruct.Q5_K_M.gguf"
+          sha256: null
+        q8_0:
+          file: "Meta-Llama-3-8B-Instruct.Q8_0.gguf"
+          sha256: null
+
+  # Larger models for high quality
+  llama-2-13b-chat:
+    huggingface_id: "TheBloke/Llama-2-13B-Chat-GGUF"
+    description: "Better reasoning and instruction following than 7B"
+    size_class: "large"
+    formats:
+      gguf:
+        q4_k_m:
+          file: "llama-2-13b-chat.Q4_K_M.gguf"
+          sha256: null
+        q5_k_m:
+          file: "llama-2-13b-chat.Q5_K_M.gguf"
+          sha256: null
+
+  mixtral-8x7b-instruct:
+    huggingface_id: "TheBloke/Mixtral-8x7B-Instruct-v0.1-GGUF"
+    description: "Mixture of Experts, excellent quality, 47B total parameters"
+    size_class: "xlarge"
+    formats:
+      gguf:
+        q4_k_m:
+          file: "mixtral-8x7b-instruct-v0.1.Q4_K_M.gguf"
+          sha256: null
+        q5_k_m:
+          file: "mixtral-8x7b-instruct-v0.1.Q5_K_M.gguf"
+          sha256: null
+
+# Quantization guide
+quantization_info:
+  q4_k_m:
+    description: "4-bit quantization, good balance of size and quality"
+    quality: "Medium"
+    speed: "Fast"
+    size_factor: 0.25  # Approx 1/4 of original size
+
+  q5_k_m:
+    description: "5-bit quantization, better quality than Q4"
+    quality: "Medium-High"
+    speed: "Medium-Fast"
+    size_factor: 0.31
+
+  q8_0:
+    description: "8-bit quantization, near original quality"
+    quality: "High"
+    speed: "Medium"
+    size_factor: 0.50
+
+# Size class reference (unquantized sizes)
+size_classes:
+  tiny:
+    description: "< 2B parameters"
+    ram_required: "2-4 GB"
+    use_case: "Testing, rapid iteration"
+
+  small:
+    description: "2-4B parameters"
+    ram_required: "4-8 GB"
+    use_case: "Development, basic tasks"
+
+  medium:
+    description: "7-8B parameters"
+    ram_required: "8-16 GB"
+    use_case: "Production, general purpose"
+
+  large:
+    description: "13-14B parameters"
+    ram_required: "16-32 GB"
+    use_case: "High quality tasks"
+
+  xlarge:
+    description: "30B+ parameters"
+    ram_required: "32+ GB"
+    use_case: "Highest quality, research"