diff --git a/apps/api/src/ailiance_demo/config.py b/apps/api/src/ailiance_demo/config.py
index 00a46fb..e783ccb 100644
--- a/apps/api/src/ailiance_demo/config.py
+++ b/apps/api/src/ailiance_demo/config.py
@@ -49,14 +49,16 @@ class Settings(BaseSettings):
     )
     dataset_flags_dir: Path = Path("/dataset-flags")
     machine_label: str = "studio"
+    # Serving is consolidated onto the omlx multi-model server (:8500) plus
+    # the two qwen36 multi-LoRA instances (:9360 / :9361), all on Mac Studio.
+    # The old per-port workers (9301/9303/9304, macm1:9302, kxkm-ai:8002) are
+    # decommissioned and no longer probed.
     workers_to_check: list[dict] = Field(
         default_factory=lambda: [
             {"name": "gateway", "url": "http://host.docker.internal:9300/health"},
-            {"name": "mistral-medium-3.5", "url": "http://studio:9301/health"},
-            {"name": "gemma4-e4b-curriculum", "url": "http://macm1:8502/health"},
-            {"name": "eurollm", "url": "http://studio:9303/health"},
-            {"name": "gemma3", "url": "http://tower:9304/health"},
-            {"name": "qwen3-next", "url": "http://host.docker.internal:8002/health"},
+            {"name": "omlx", "url": "http://100.116.92.12:8500/health"},
+            {"name": "qwen36-hardware", "url": "http://100.116.92.12:9360/health"},
+            {"name": "qwen36-code", "url": "http://100.116.92.12:9361/health"},
         ],
     )
 
diff --git a/apps/api/src/ailiance_demo/routers/public/models.py b/apps/api/src/ailiance_demo/routers/public/models.py
index 5195d2a..f976501 100644
--- a/apps/api/src/ailiance_demo/routers/public/models.py
+++ b/apps/api/src/ailiance_demo/routers/public/models.py
@@ -34,12 +34,12 @@
             "high-revenue enterprises require Mistral's paid API. "
             "262 k context window. Runs on Mac Studio M3 Ultra."
         ),
-        "headline": "128B params · MLX Q8 · 262k context · Mac Studio M3 Ultra",
+        "headline": "128B params · MLX Q8 · 262k context · omlx (Mac Studio M3 Ultra)",
         "parameters": 128_000_000_000,
         "disk_size_bytes": 124 * _GIB,
         "memory_gb": 130.0,
         "quantization": "MLX Q8",
-        "host": "studio (Mac Studio M3 Ultra)",
+        "host": "studio (omlx :8500, Mac Studio M3 Ultra)",
         "architecture": "mlx",
         "license": "modified-mit",
         "kind": ModelKind.QUANTIZED,
@@ -57,14 +57,15 @@
             "Google Gemma 4 E4B Instruction-Tuned avec adapter LoRA fine-tuné "
             "en curriculum 4 phases (seq 512 → 1024 → 2048 → 3072) sur le "
             "dataset ailiance (~82k conversations, electronics + code). "
-            "Test loss 2.094 (perplexity 8.12). Tourne sur Mac mini M1."
+            "Test loss 2.094 (perplexity 8.12). Sert aussi de fallback vision "
+            "léger. Servi par le serveur omlx sur Mac Studio (:8500)."
         ),
-        "headline": "E4B · MLX 4-bit + LoRA · Mac mini M1",
+        "headline": "E4B · MLX 4-bit + LoRA · omlx (Mac Studio)",
         "parameters": 4_000_000_000,
         "disk_size_bytes": 4 * _GIB,
         "memory_gb": 12.0,
         "quantization": "MLX 4-bit + LoRA",
-        "host": "macm1 (Mac mini M1)",
+        "host": "studio (omlx :8500, Mac Studio M3 Ultra)",
         "architecture": "mlx",
         "license": "gemma-terms",
         "kind": ModelKind.FINE_TUNED,
@@ -72,65 +73,87 @@
         "top_eval_score": 0.61,
         "top_eval_benchmark": "MT-Bench-FR (LoRA tuned)",
     },
-    "ailiance/qwen3-next-80b-a3b-instruct": {
-        "display_name": "Qwen3-Next 80B A3B Instruct",
-        "base_model": "Qwen/Qwen3-Next-80B-A3B-Instruct",
-        "domain": "reasoning",
+    "ailiance/qwen3-coder-next-80b": {
+        "display_name": "Qwen3-Coder-Next 80B (qwen36 multi-LoRA)",
+        "base_model": "Qwen/Qwen3-Coder-Next-80B-A3B",
+        "domain": "code",
         "description": (
-            "Qwen3-Next 80B sparse MoE (3B active per token) — Q4_K_M GGUF "
-            "served by llama.cpp on kxkm-ai (NVIDIA RTX 4090 24 GB). MoE "
-            "expert offload: attention layers on GPU, ffn experts in CPU "
-            "RAM via --override-tensor. Reachable from the gateway via "
-            "autossh tunnel (electron-server:8002 → kxkm-ai:18888)."
+            "Qwen3-Coder-Next 80B sparse MoE (3B active per token) — 8-bit "
+            "MLX served by the omlx server on Mac Studio. Also the base for "
+            "the qwen36-35B multi-LoRA hardware/code specialists (30 adapters "
+            "hot-swapped on the :9360 / :9361 instances)."
         ),
-        "headline": "80B MoE / 3B active · Q4_K_M · RTX 4090 + RAM offload",
+        "headline": "80B MoE / 3B active · MLX 8-bit · omlx (Mac Studio)",
         "parameters": 80_000_000_000,
         "disk_size_bytes": 48_410_988_384,
-        "memory_gb": 50.0,  # ~6 GB VRAM (attention + KV q8_0) + ~44 GB RAM (experts)
-        "quantization": "Q4_K_M",
-        "host": "kxkm-ai (NVIDIA RTX 4090 24 GB + 64 GB RAM)",
-        "architecture": "gguf",
+        "memory_gb": 50.0,
+        "quantization": "MLX 8-bit",
+        "host": "studio (omlx :8500, Mac Studio M3 Ultra)",
+        "architecture": "mlx",
         "license": "apache-2.0",
         "kind": ModelKind.QUANTIZED,
-        "hf_url": "https://huggingface.co/Qwen/Qwen3-Next-80B-A3B-Instruct",
+        "hf_url": "https://huggingface.co/Qwen",
         "top_eval_score": 0.91,
         "top_eval_benchmark": "MMLU / GSM8K",
     },
-    "ailiance/gemma3-4b": {
-        "display_name": "Gemma 3 4B IT",
-        "base_model": "google/gemma-3-4b-it",
+    "ailiance/eurollm-22b": {
+        "display_name": "EuroLLM 22B Instruct",
+        "base_model": "utter-project/EuroLLM-22B-Instruct",
+        "domain": "multilingual",
+        "description": (
+            "EU-sovereign multilingual instruction model covering all 24 EU "
+            "official languages. MLX-served by the omlx server on Mac Studio "
+            "(:8500)."
+        ),
+        "headline": "22B · multilingual EU · omlx (Mac Studio)",
+        "parameters": 22_000_000_000,
+        "disk_size_bytes": 45 * _GIB,
+        "memory_gb": 45.0,
+        "quantization": "MLX",
+        "host": "studio (omlx :8500, Mac Studio M3 Ultra)",
+        "architecture": "mlx",
+        "license": "apache-2.0",
+        "kind": ModelKind.QUANTIZED,
+        "hf_url": "https://huggingface.co/utter-project",
+        "top_eval_score": 0.74,
+        "top_eval_benchmark": "MT-Bench (multilingual)",
+    },
+    "ailiance/apertus-70b": {
+        "display_name": "Apertus 70B Instruct",
+        "base_model": "swiss-ai/Apertus-70B-Instruct-2509",
         "domain": "general",
         "description": (
-            "Google DeepMind Gemma 3 4B Instruction-Tuned — small, fast, "
-            "multilingual. Runs on tower (NVIDIA Quadro P2000, 5 GB VRAM)."
+            "Swiss-sovereign Apertus 70B instruction model. The BF16 source "
+            "was deleted in the storage cleanup; the 4-bit MLX build is "
+            "retained and served on demand by the omlx server on Mac Studio."
         ),
-        "headline": "4B params · BF16 · NVIDIA Quadro P2000",
-        "parameters": 4_000_000_000,
-        "disk_size_bytes": 8 * _GIB,
-        "memory_gb": 8.0,
-        "quantization": "BF16",
-        "host": "tower (NVIDIA Quadro P2000 5 GB)",
-        "architecture": "transformers",
-        "license": "gemma-terms",
+        "headline": "70B · MLX 4-bit · omlx (Mac Studio)",
+        "parameters": 70_000_000_000,
+        "disk_size_bytes": 37 * _GIB,
+        "memory_gb": 40.0,
+        "quantization": "MLX 4-bit",
+        "host": "studio (omlx :8500, Mac Studio M3 Ultra)",
+        "architecture": "mlx",
+        "license": "apache-2.0",
         "kind": ModelKind.QUANTIZED,
-        "hf_url": "https://huggingface.co/google/gemma-3-4b-it",
-        "top_eval_score": 0.59,
-        "top_eval_benchmark": "MMLU (small-model class)",
+        "hf_url": "https://huggingface.co/swiss-ai",
+        "top_eval_score": 0.80,
+        "top_eval_benchmark": "MMLU",
     },
     "ailiance/auto": {
         "display_name": "Auto-router",
-        "base_model": "MiniLM L6 v2 384d + 2-layer MLP",
+        "base_model": "all-MiniLM-L6-v2 384d + 2-layer MLP (hidden 256)",
         "domain": "router",
         "description": (
-            "Domain router classifies your prompt over 32 domains and forwards "
+            "Domain router classifies your prompt over 47 domains and forwards "
             "to the best specialist. Trained on the AI-Act-traceable clean "
-            "corpus (router v0.3, 2026-05-11). Hardware domains (kicad / spice / "
-            "stm32 / emc / embedded / power) route to the mascarade LoRA "
-            "specialists with a sandboxed Docker validator. Generalist domains "
-            "(math, code, multilingual, raisonnement) route directly. The "
-            "decision is shown above each reply in the playground."
+            "corpus (router v9, 2026-05-30). Hardware/EDA domains (kicad / "
+            "spice / stm32 / emc / embedded / power) route to the qwen36 "
+            "multi-LoRA specialists with a sandboxed Docker validator. "
+            "Generalist domains (math, code, multilingual, raisonnement) route "
+            "directly. The decision is shown above each reply in the playground."
         ),
-        "headline": "MiniLM 384d · 40 domains · top1≈65% top3≈86% · chain v0.3",
+        "headline": "all-MiniLM-L6-v2 384d · 47 domains · macro-F1 0.889 · router v9",
         "parameters": 22_700_000,  # MiniLM L6 v2 ≈ 22.7M
         "disk_size_bytes": 90_500_000,
         "memory_gb": 0.2,
@@ -139,9 +162,9 @@
         "architecture": "safetensors",
         "license": "apache-2.0",
         "kind": ModelKind.FINE_TUNED,
-        "hf_url": "https://huggingface.co/Ailiance-fr/router-v6-minilm",
-        "top_eval_score": 0.78,
-        "top_eval_benchmark": "iact-bench 31 domains avg",
+        "hf_url": "https://huggingface.co/Ailiance-fr",
+        "top_eval_score": 0.889,
+        "top_eval_benchmark": "iact-bench 47 domains macro-F1",
     },
     "ailiance/granite-30b": {
         "display_name": "Granite 4.1 30B Instruct",
@@ -150,16 +173,15 @@
         "description": (
             "IBM Granite 4.1 30B Instruct — code-first instruction-tuned "
             "open model with strong enterprise SQL / RAG / tool-use scores. "
-            "Q4_K_M GGUF served by llama.cpp on kxkm-ai RTX 4090 via autossh "
-            "tunnel (electron-server :8003)."
+            "MLX-served by the omlx server on Mac Studio (:8500)."
         ),
-        "headline": "30B · Q4_K_M · RTX 4090 (kxkm-ai)",
+        "headline": "30B · MLX · omlx (Mac Studio)",
         "parameters": 30_000_000_000,
         "disk_size_bytes": 18 * _GIB,
         "memory_gb": 20.0,
-        "quantization": "Q4_K_M",
-        "host": "kxkm-ai (NVIDIA RTX 4090, autossh tunnel)",
-        "architecture": "gguf",
+        "quantization": "MLX",
+        "host": "studio (omlx :8500, Mac Studio M3 Ultra)",
+        "architecture": "mlx",
         "license": "apache-2.0",
         "kind": ModelKind.QUANTIZED,
         # ibm-granite/granite-4.1-30B-instruct is gated. Point to the org.
@@ -167,47 +189,48 @@
         "top_eval_score": 0.83,
         "top_eval_benchmark": "HumanEval+ / BigBench-Hard code",
     },
-    "ailiance/ministral-14b": {
-        "display_name": "Ministral 3 14B Instruct",
-        "base_model": "mistralai/Ministral-3-14B-Instruct-2512",
-        "domain": "general",
+    "ailiance/devstral-base": {
+        "display_name": "Devstral Small 2 24B",
+        "base_model": "mistralai/Devstral-Small-2-24B",
+        "domain": "code",
         "description": (
-            "Mistral Ministral 3 14B Instruct — small, fast generalist for "
-            "FR/EN chat. MLX 4-bit on macM1 :8502."
+            "Mistral Devstral Small 2 24B — agentic coding base. Now served "
+            "by the omlx server on Mac Studio (:8500); the old macm1 :9302 "
+            "Devstral worker is decommissioned."
         ),
-        "headline": "14B · MLX 4-bit · macM1",
-        "parameters": 14_000_000_000,
-        "disk_size_bytes": 8 * _GIB,
-        "memory_gb": 9.0,
-        "quantization": "MLX 4-bit",
-        "host": "macm1 (Apple M1)",
+        "headline": "24B · MLX · omlx (Mac Studio)",
+        "parameters": 24_000_000_000,
+        "disk_size_bytes": 14 * _GIB,
+        "memory_gb": 15.0,
+        "quantization": "MLX",
+        "host": "studio (omlx :8500, Mac Studio M3 Ultra)",
         "architecture": "mlx",
         "license": "apache-2.0",
         "kind": ModelKind.QUANTIZED,
-        "hf_url": "https://huggingface.co/mistralai/Ministral-3-14B-Instruct-2512",
-        "top_eval_score": 0.78,
-        "top_eval_benchmark": "MT-Bench-FR",
+        "hf_url": "https://huggingface.co/mistralai",
+        "top_eval_score": 0.84,
+        "top_eval_benchmark": "SWE-bench Verified",
     },
-    "ailiance/ministral-14b-reasoning": {
-        "display_name": "Ministral 3 14B Reasoning",
-        "base_model": "mistralai/Ministral-3-14B-Reasoning-2512",
-        "domain": "reasoning",
+    "ailiance/mixtral-8x22b": {
+        "display_name": "Mixtral 8x22B",
+        "base_model": "mistralai/Mixtral-8x22B-Instruct-v0.1",
+        "domain": "general",
         "description": (
-            "Ministral 3 14B with reasoning fine-tune — chain-of-thought "
-            "responses for math and complex problem-solving. MLX 4-bit on macM1."
+            "Mistral Mixtral 8x22B sparse MoE generalist. MLX-served by the "
+            "omlx server on Mac Studio (:8500)."
         ),
-        "headline": "14B reasoning · MLX 4-bit · macM1",
-        "parameters": 14_000_000_000,
-        "disk_size_bytes": 8 * _GIB,
-        "memory_gb": 9.0,
-        "quantization": "MLX 4-bit",
-        "host": "macm1 (Apple M1)",
+        "headline": "8x22B MoE · MLX · omlx (Mac Studio)",
+        "parameters": 141_000_000_000,
+        "disk_size_bytes": 80 * _GIB,
+        "memory_gb": 85.0,
+        "quantization": "MLX",
+        "host": "studio (omlx :8500, Mac Studio M3 Ultra)",
         "architecture": "mlx",
         "license": "apache-2.0",
         "kind": ModelKind.QUANTIZED,
-        "hf_url": "https://huggingface.co/mistralai/Ministral-3-14B-Reasoning-2512",
-        "top_eval_score": 0.85,
-        "top_eval_benchmark": "MATH / GSM8K",
+        "hf_url": "https://huggingface.co/mistralai/Mixtral-8x22B-Instruct-v0.1",
+        "top_eval_score": 0.79,
+        "top_eval_benchmark": "MMLU",
     },
 }
 
@@ -262,9 +285,14 @@
     "display_name": "Pixtral 12B (vision)",
     "base_model": "mistralai/Pixtral-12B",
     "domain": "vision",
-    "description": "Mistral Pixtral 12B multimodal — texte + image.",
-    "headline": "vision · 12B",
-    "host": "studio",
+    "description": (
+        "Mistral Pixtral 12B multimodal — texte + image. Worker vision "
+        "canonique, servi par le serveur omlx sur Mac Studio (:8500). "
+        "Gemma 4 E4B sert de fallback vision léger."
+    ),
+    "headline": "vision · 12B · omlx (Mac Studio)",
+    "host": "studio (omlx :8500, Mac Studio M3 Ultra)",
+    "architecture": "mlx",
     "license": "apache-2.0",
     "kind": ModelKind.QUANTIZED,
     "hf_url": "https://huggingface.co/mistralai/Pixtral-12B-2409",
@@ -273,11 +301,15 @@
 }
 _LIVE_DETAILS["ailiance/reasoning-r1"] = {
     "display_name": "Reasoning R1",
-    "base_model": "DeepSeek-R1 distilled",
+    "base_model": "DeepSeek-R1-Distill-Qwen-32B",
     "domain": "reasoning",
-    "description": "Modèle de raisonnement chain-of-thought (DeepSeek-R1 distill ou équivalent).",
-    "headline": "chain-of-thought · reasoning",
-    "host": "macm1",
+    "description": (
+        "Modèle de raisonnement chain-of-thought (DeepSeek-R1 distill 32B). "
+        "Servi par le serveur omlx sur Mac Studio (:8500)."
+    ),
+    "headline": "chain-of-thought · reasoning · omlx (Mac Studio)",
+    "host": "studio (omlx :8500, Mac Studio M3 Ultra)",
+    "architecture": "mlx",
     "license": "apache-2.0",
     "kind": ModelKind.DISTILLED,
     "hf_url": "https://huggingface.co/deepseek-ai",
@@ -286,14 +318,18 @@
 }
 _LIVE_DETAILS["ailiance/coder-pro"] = {
     "display_name": "Coder Pro",
-    "base_model": "Qwen2.5-Coder-32B-Instruct ou équivalent",
+    "base_model": "Qwen3-Coder-30B-A3B-Instruct",
     "domain": "code",
-    "description": "Spécialiste code généraliste avec validators iact-bench (tsc, ruff, rustc, go vet).",
-    "headline": "code · validators",
-    "host": "macm1",
+    "description": (
+        "Spécialiste code généraliste avec validators iact-bench (tsc, ruff, "
+        "rustc, go vet). Servi par le serveur omlx sur Mac Studio (:8500)."
+    ),
+    "headline": "code · validators · omlx (Mac Studio)",
+    "host": "studio (omlx :8500, Mac Studio M3 Ultra)",
+    "architecture": "mlx",
     "license": "apache-2.0",
     "kind": ModelKind.QUANTIZED,
-    "hf_url": "https://huggingface.co/Qwen/Qwen2.5-Coder-32B-Instruct",
+    "hf_url": "https://huggingface.co/Qwen",
     "top_eval_score": 0.86,
     "top_eval_benchmark": "HumanEval+ / MultiPL-E",
 }
@@ -301,9 +337,13 @@
     "display_name": "Mistral Small 3.5",
     "base_model": "mistralai/Mistral-Small-3.5-24B-Instruct",
     "domain": "general",
-    "description": "Mistral Small 3.5 24B — généraliste rapide, alternative légère au Medium.",
-    "headline": "24B · général · fast",
-    "host": "studio",
+    "description": (
+        "Mistral Small 3.5 24B — généraliste rapide, alternative légère au "
+        "Medium. Servi par le serveur omlx sur Mac Studio (:8500)."
+    ),
+    "headline": "24B · général · fast · omlx (Mac Studio)",
+    "host": "studio (omlx :8500, Mac Studio M3 Ultra)",
+    "architecture": "mlx",
     "license": "apache-2.0",
     "kind": ModelKind.QUANTIZED,
     "hf_url": "https://huggingface.co/mistralai",
@@ -312,26 +352,25 @@
 }
 
 _LIVE_DETAILS["ailiance/mascarade"] = {
-    "display_name": "Mascarade · 12 LoRAs spécialistes",
-    "base_model": "ailiance/Qwen3-4B + LoRA",
+    "display_name": "Mascarade · LoRAs spécialistes qwen36",
+    "base_model": "Qwen3.6-35B-A3B + LoRA",
     "domain": "hardware-specialists",
     "description": (
-        "Famille de 12 adaptateurs LoRA fine-tunés (r=16, α=32) sur le "
-        "modèle de base ailiance/Qwen3-4B. Chacun est servi par Ollama sur "
-        "Tower (NVIDIA Quadro P2000) via le tunnel autossh "
-        "electron-server :8004 → tower:11434. L'auto-router classifie le "
-        "domaine du prompt et délègue au spécialiste correspondant, puis "
-        "fait passer la sortie dans un validator Docker sandboxé. La fiche "
-        "détaillée liste les 12 spécialistes avec leur domaine, leur "
-        "nombre de steps d'entraînement et leur validator dédié."
+        "Famille d'adaptateurs LoRA fine-tunés (curriculum) sur le modèle de "
+        "base Qwen3.6-35B-A3B. Servis par deux instances multi-LoRA sur Mac "
+        "Studio (:9360 hardware/EDA/math, :9361 code/web/lang) avec hot-swap "
+        "des 30 adaptateurs. L'auto-router classifie le domaine du prompt et "
+        "délègue au spécialiste correspondant, puis fait passer la sortie "
+        "dans un validator Docker sandboxé. La fiche détaillée liste les "
+        "spécialistes avec leur domaine et leur validator dédié."
     ),
-    "headline": "ailiance/Qwen3-4B + 12 LoRAs · Tower Ollama :8004 · validator sandbox",
-    "parameters": 4_000_000_000,
-    "disk_size_bytes": 12 * 3 * _GIB,  # 12 LoRAs × ~3 GB GGUF each
-    "memory_gb": 3.5,
-    "quantization": "Q4_K_M LoRA",
-    "host": "tower (NVIDIA Quadro P2000 5 GB)",
-    "architecture": "gguf",
+    "headline": "Qwen3.6-35B + LoRAs · multi-LoRA :9360/:9361 (Mac Studio) · validator sandbox",
+    "parameters": 35_000_000_000,
+    "disk_size_bytes": 70 * _GIB,
+    "memory_gb": 70.0,
+    "quantization": "MLX bf16 + LoRA",
+    "host": "studio (multi-LoRA :9360/:9361, Mac Studio M3 Ultra)",
+    "architecture": "mlx",
     "license": "apache-2.0",
     "kind": ModelKind.LORA,
 }
diff --git a/apps/api/src/ailiance_demo/services/chat_proxy.py b/apps/api/src/ailiance_demo/services/chat_proxy.py
index 425f87f..d00d066 100644
--- a/apps/api/src/ailiance_demo/services/chat_proxy.py
+++ b/apps/api/src/ailiance_demo/services/chat_proxy.py
@@ -18,35 +18,23 @@
     # gateway fall through to its domain router, which produced garbled output
     # in tests because the request reaches a worker that doesn't recognize the
     # model id and degenerates.
-    # --- generalist base models served by the gateway ---
-    "ailiance/mistral-medium-3.5-128b": "ailiance-mistral",
-    "ailiance/gemma4-e4b-curriculum": "ailiance-gemma4",
-    "ailiance/gemma3-4b": "ailiance-gemma",
-    "ailiance/qwen3-next-80b-a3b-instruct": "ailiance-qwen",
+    # --- generalist base models served by the omlx consolidated server ---
+    "ailiance/mistral-medium-3.5-128b": "ailiance-mistral-medium",
+    "ailiance/gemma4-e4b-curriculum": "ailiance-gemma4-omlx",
+    "ailiance/qwen3-coder-next-80b": "ailiance-qwen36",
     "ailiance/granite-30b": "ailiance-granite",
-    "ailiance/ministral-14b": "ailiance-ministral",
-    "ailiance/ministral-14b-reasoning": "ailiance-ministral-reasoning",
+    "ailiance/eurollm-22b": "ailiance-eurollm",
+    "ailiance/apertus-70b": "ailiance-apertus",
     # --- additional gateway-exposed flagship / variant aliases ---
     "ailiance/pixtral-12b": "ailiance-pixtral",
     "ailiance/reasoning-r1": "ailiance-reasoning-r1",
     "ailiance/coder-pro": "ailiance-coder-pro",
     "ailiance/mistral-small-3.5": "ailiance-mistral-small",
+    "ailiance/devstral-base": "ailiance-devstral-base",
+    "ailiance/mixtral-8x22b": "ailiance-mixtral-8x22b",
     # --- mascarade family card routes to auto-router (auto-classifies which
-    # mascarade specialist to use) ---
+    # qwen36 LoRA specialist to use) ---
     "ailiance/mascarade": "ailiance",
-    # --- mascarade hardware specialists (Qwen3-4B LoRA on Tower :8004) ---
-    "ailiance/mascarade-kicad": "ailiance-kicad",
-    "ailiance/mascarade-spice": "ailiance-spice",
-    "ailiance/mascarade-stm32": "ailiance-stm32",
-    "ailiance/mascarade-emc": "ailiance-emc",
-    "ailiance/mascarade-embedded": "ailiance-embedded",
-    "ailiance/mascarade-platformio": "ailiance-platformio",
-    "ailiance/mascarade-freecad": "ailiance-freecad",
-    "ailiance/mascarade-dsp": "ailiance-dsp",
-    "ailiance/mascarade-iot": "ailiance-iot",
-    "ailiance/mascarade-power": "ailiance-power",
-    "ailiance/mascarade-components-review": "ailiance-components-review",
-    "ailiance/mascarade-coder": "ailiance-coder",
     # The bare "ailiance" alias triggers the gateway's domain router
     # (MiniLM L6 v2 embeddings + MLP classifier) — not in MODEL_FORCE_MAP on
     # purpose. We surface the decision in the chat stream via a route
@@ -55,12 +43,13 @@
 }
 
 # Worker port → human-readable label, used for the route preamble.
+# Serving is consolidated onto the omlx multi-model server (:8500) plus the
+# two qwen36 multi-LoRA instances (:9360 / :9361), all on Mac Studio. The old
+# per-port workers (9301/9303/9304/8002) are decommissioned.
 _PORT_LABELS: dict[int, str] = {
-    9301: "Mistral Medium 3.5 128B (studio)",
-    8502: "Gemma 4 E4B + ailiance curriculum LoRA (macm1)",
-    9303: "EuroLLM 22B (studio)",
-    9304: "Gemma 3 4B (tower)",
-    8002: "Qwen3.5 35B (kxkm-ai)",
+    8500: "omlx multi-model server (studio)",
+    9360: "Qwen3.6-35B multi-LoRA · hardware/EDA/math (studio)",
+    9361: "Qwen3.6-35B multi-LoRA · code/web/lang (studio)",
 }
 AILIANCE_ALIASES: frozenset[str] = frozenset(ALIAS_TO_GATEWAY_MODEL)
 
diff --git a/apps/api/src/ailiance_demo/services/gateway_probe.py b/apps/api/src/ailiance_demo/services/gateway_probe.py
index 22ab6d6..f2ba316 100644
--- a/apps/api/src/ailiance_demo/services/gateway_probe.py
+++ b/apps/api/src/ailiance_demo/services/gateway_probe.py
@@ -18,173 +18,64 @@
 
 log = structlog.get_logger()
 
+# Serving is consolidated onto Mac Studio (M3 Ultra). Three live endpoints:
+#   - omlx :8500     — multi-model server (Mistral-Medium, DeepSeek-R1,
+#                      Qwen3-Coder-30B/Next-80B, EuroLLM, granite, Mixtral,
+#                      Pixtral, gemma-4, Devstral, ...)
+#   - qwen36 :9360   — Qwen3.6-35B multi-LoRA, hardware/EDA/math specialists
+#   - qwen36 :9361   — Qwen3.6-35B multi-LoRA, code/web/lang specialists
+# The old per-port workers (studio:9301/9323/9325/9326/9327/9330,
+# macm1:8502/9302, tower:9304/8004, kxkm-ai:8002/8003) are decommissioned.
 WORKERS = [
-    # --- Studio (M3 Ultra, 512 GB unified) ---
     {
-        "id": "studio-mistral-medium",
-        "label": "Mac Studio · Mistral-Medium-128B :9301",
-        "url": "http://studio:9301",
+        "id": "studio-omlx",
+        "label": "Mac Studio · omlx multi-model :8500",
+        "url": "http://100.116.92.12:8500",
         "host": "studio",
         "gpu": "Apple M3 Ultra (76-core GPU)",
         "vram_gb": 512.0,
         "tdp_w": 215,
-        "gateway_aliases": ["ailiance-mistral", "ailiance-mistral-medium"],
-        "served_models": ["Mistral-Medium-3.5-128B-MLX-Q8"],
-    },
-    {
-        "id": "studio-reasoning-r1",
-        "label": "Mac Studio · DeepSeek-R1 :9323",
-        "url": "http://studio:9323",
-        "host": "studio",
-        "gpu": "Apple M3 Ultra (76-core GPU)",
-        "vram_gb": 512.0,
-        "tdp_w": 215,
-        "gateway_aliases": ["ailiance-reasoning-r1"],
-        "served_models": ["DeepSeek-R1-Distill-Qwen-32B-MLX-4bit"],
-    },
-    {
-        "id": "studio-pixtral",
-        "label": "Mac Studio · Pixtral-12B :9325",
-        "url": "http://studio:9325",
-        "host": "studio",
-        "gpu": "Apple M3 Ultra (76-core GPU)",
-        "vram_gb": 512.0,
-        "tdp_w": 215,
-        "gateway_aliases": ["ailiance-pixtral"],
-        "served_models": ["Pixtral-12B-MLX-4bit"],
-    },
-    {
-        "id": "studio-mistral-small",
-        "label": "Mac Studio · Mistral-Small-24B :9326",
-        "url": "http://studio:9326",
-        "host": "studio",
-        "gpu": "Apple M3 Ultra (76-core GPU)",
-        "vram_gb": 512.0,
-        "tdp_w": 215,
-        "gateway_aliases": ["ailiance-mistral-small"],
-        "served_models": ["Mistral-Small-3.1-24B-Instruct-MLX-4bit"],
+        "gateway_aliases": [
+            "ailiance-mistral-medium", "ailiance-mistral", "ailiance-eurollm",
+            "ailiance-apertus", "ailiance-gemma", "ailiance-granite",
+            "ailiance-devstral-base", "ailiance-flagship", "ailiance-qwen-235b",
+            "ailiance-reasoning-r1", "ailiance-llama", "ailiance-pixtral",
+            "ailiance-gemma4-omlx", "ailiance-mistral-small", "ailiance-coder-pro",
+            "ailiance-mixtral", "ailiance-mixtral-8x22b",
+        ],
+        "served_models": [
+            "Mistral-Medium-3.5-128B-MLX-Q8",
+            "DeepSeek-R1-Distill-Qwen-32B",
+            "Qwen3-Coder-30B-A3B", "Qwen3-Coder-Next-8bit (80B MoE)",
+            "EuroLLM-22B", "granite-4.1-30b", "Mixtral-8x22B",
+            "Devstral-Small-2-24B", "Pixtral-12B", "gemma-4-E4B",
+        ],
     },
     {
-        "id": "studio-coder-pro",
-        "label": "Mac Studio · Qwen3-Coder-30B :9327",
-        "url": "http://studio:9327",
+        "id": "studio-qwen36-hardware",
+        "label": "Mac Studio · Qwen3.6-35B multi-LoRA (hardware/EDA/math) :9360",
+        "url": "http://100.116.92.12:9360",
         "host": "studio",
         "gpu": "Apple M3 Ultra (76-core GPU)",
         "vram_gb": 512.0,
         "tdp_w": 215,
-        "gateway_aliases": ["ailiance-coder-pro"],
-        "served_models": ["Qwen3-Coder-30B-A3B-Instruct-MLX-4bit"],
+        "gateway_aliases": ["ailiance-qwen36"],
+        "served_models": ["Qwen3.6-35B-A3B-MLX-BF16 + 30 LoRA hot-swap"],
     },
     {
-        "id": "studio-devstral-multi",
-        "label": "Mac Studio · Devstral multi-LoRA :9330",
-        "url": "http://studio:9330",
+        "id": "studio-qwen36-code",
+        "label": "Mac Studio · Qwen3.6-35B multi-LoRA (code/web/lang) :9361",
+        "url": "http://100.116.92.12:9361",
         "host": "studio",
         "gpu": "Apple M3 Ultra (76-core GPU)",
         "vram_gb": 512.0,
         "tdp_w": 215,
         "gateway_aliases": [
-            "ailiance-devstral-base", "ailiance-python", "ailiance-cpp",
-            "ailiance-rust-emb", "ailiance-html", "ailiance-ml-training",
+            "ailiance-python", "ailiance-cpp", "ailiance-rust-emb",
+            "ailiance-html", "ailiance-ml-training",
+            "ailiance-components-review", "ailiance-coder",
         ],
-        "served_models": ["Devstral-Small-2-24B-MLX-4bit + 5 LoRA hot-swap"],
-    },
-    # --- macM1 (M1, 32 GB) ---
-    {
-        "id": "macm1-mlx",
-        "label": "macM1 · mlx_lm.server :8502",
-        "url": "http://macm1:8502",
-        "host": "macm1",
-        "gpu": "Apple M1 (8-core GPU)",
-        "vram_gb": 32.0,
-        "tdp_w": 30,
-        # ailiance-granite is NOT here: the gateway force-maps that alias to
-        # kxkm-ai :8003, not macM1. macM1 hosts a granite-4.1-30b model but
-        # the gateway never routes the alias to it.
-        "gateway_aliases": [
-            "ailiance-gemma2", "ailiance-gemma4", "ailiance-ministral",
-            "ailiance-ministral-reasoning",
-        ],
-        "served_models": [
-            "gemma-4-E4B-it-MLX-4bit",
-            "Ministral-3-14B-Instruct-2512-4bit",
-            "Ministral-3-14B-Reasoning-2512-4bit",
-        ],
-    },
-    # --- Tower (NVIDIA Quadro P2000, 5 GB) ---
-    {
-        "id": "tower-gemma",
-        "label": "Tower · llama.cpp Gemma 3 :9304",
-        "url": "http://tower:9304",
-        "host": "tower (NVIDIA Quadro P2000)",
-        "gpu": "NVIDIA Quadro P2000",
-        "vram_gb": 5.0,
-        "tdp_w": 75,
-        "gateway_aliases": ["ailiance-gemma"],
-        "served_models": ["gemma-3-4b-it (Q4 GGUF)"],
-    },
-    {
-        # The 10 hardware mascarade aliases (kicad/spice/stm32/emc/embedded/
-        # platformio/freecad/dsp/iot/power) moved to the Studio MLX worker
-        # :9340 with PR #100/#102. Tower Ollama now only backs the two
-        # aliases the gateway still force-maps to :8004, plus the embed
-        # surface.
-        "id": "tower-ollama",
-        "label": "Tower · Ollama mascarade :8004",
-        "url": "http://host.docker.internal:8004",
-        "host": "tower (autossh tunnel)",
-        "gpu": "NVIDIA Quadro P2000",
-        "vram_gb": 5.0,
-        "tdp_w": 75,
-        "gateway_aliases": [
-            "ailiance-components-review", "ailiance-coder", "ailiance-embed",
-        ],
-        "served_models": [
-            "mascarade-components-review", "mascarade-coder-v2", "bge-m3",
-        ],
-    },
-    # --- Studio (M3 Ultra) MLX bf16 mascarade experts ---
-    {
-        "id": "studio-mascarade",
-        "label": "Mac Studio · MLX mascarade :9340",
-        "url": "http://host.docker.internal:9340",
-        "host": "studio (autossh tunnel)",
-        "gpu": "Apple M3 Ultra (76-core GPU)",
-        "vram_gb": 512.0,
-        "tdp_w": 215,
-        "gateway_aliases": [
-            "ailiance-kicad", "ailiance-spice", "ailiance-stm32", "ailiance-emc",
-            "ailiance-embedded", "ailiance-platformio", "ailiance-freecad",
-            "ailiance-dsp", "ailiance-iot", "ailiance-power",
-        ],
-        "served_models": [
-            "mascarade-kicad", "mascarade-spice", "mascarade-stm32",
-            "mascarade-emc", "mascarade-embedded", "mascarade-platformio",
-            "mascarade-freecad", "mascarade-dsp", "mascarade-iot", "mascarade-power",
-        ],
-    },
-    # --- kxkm-ai (RTX 4090, 24 GB) ---
-    {
-        "id": "kxkm-qwen",
-        "label": "kxkm-ai · llama.cpp Qwen3-Next 80B :8002",
-        "url": "http://host.docker.internal:8002",
-        "host": "kxkm-ai (RTX 4090, autossh tunnel)",
-        "gpu": "NVIDIA RTX 4090",
-        "vram_gb": 24.0,
-        "tdp_w": 450,
-        "gateway_aliases": ["ailiance-qwen"],
-        "served_models": ["Qwen3-Next-80B-A3B-Instruct (Q4_K_M MoE)"],
-    },
-    {
-        "id": "kxkm-granite",
-        "label": "kxkm-ai · llama.cpp Granite 30B :8003",
-        "url": "http://host.docker.internal:8003",
-        "host": "kxkm-ai (RTX 4090, autossh tunnel)",
-        "gpu": "NVIDIA RTX 4090",
-        "vram_gb": 24.0,
-        "tdp_w": 450,
-        "gateway_aliases": ["ailiance-granite"],
-        "served_models": ["granite-4.1-30b-instruct (Q4_K_M)"],
+        "served_models": ["Qwen3.6-35B-A3B-MLX-BF16 + 30 LoRA hot-swap"],
     },
 ]
 
@@ -196,10 +87,9 @@
 # `nvidia-smi` (Linux/NVIDIA) or `ioreg` (Apple Silicon). The api container
 # has openssh-client and /root/.ssh mounted RO from /home/electron/.ssh.
 _HOST_PROBES: dict[str, dict[str, str]] = {
+    # Serving consolidated onto Mac Studio — the other physical hosts no
+    # longer serve LLM workers, so we only probe studio's GPU.
     "studio": {"ssh": "studio", "kind": "apple"},
-    "macm1": {"ssh": "electron@macm1", "kind": "apple"},
-    "tower": {"ssh": "clems@tower", "kind": "nvidia"},
-    "kxkm-ai": {"ssh": "kxkm@10.2.0.237", "kind": "nvidia"},
 }
 
 
@@ -592,8 +482,8 @@ async def _produce() -> list[WorkerStatus]:
             request_counts = await _fetch_gateway_request_counts(client, gateway_url)
             host_probes = await _gather_host_probes()
             # Probe all workers in parallel to bound total latency to
-            # ~max(probe), not sum(probe). 11 workers * 300 ms sequential
-            # = 3.3 s -> ~500 ms. Fixes "probe indisponible" on cockpit.
+            # ~max(probe), not sum(probe). Keeps the cockpit page render
+            # fast and fixes "probe indisponible".
             return list(
                 await asyncio.gather(
                     *(_probe_one(client, w, request_counts, host_probes) for w in WORKERS)
diff --git a/apps/api/tests/integration/test_models_endpoint.py b/apps/api/tests/integration/test_models_endpoint.py
index 3cba9c9..2b1b406 100644
--- a/apps/api/tests/integration/test_models_endpoint.py
+++ b/apps/api/tests/integration/test_models_endpoint.py
@@ -8,13 +8,14 @@ def test_list_models_returns_cards(client_with_cache: TestClient) -> None:
     assert response.status_code == 200
     cards = response.json()
     ids = {c["id"] for c in cards}
-    # Live workers + auto-router + 12 mascarade specialists + mocked HF entry.
+    # Live workers (omlx :8500 + qwen36 :9360/:9361) + auto-router +
+    # consolidated mascarade card + mocked HF entry.
     assert {
         "ailiance/mistral-medium-3.5-128b",
-        "ailiance/gemma3-4b",
-        "ailiance/qwen3-next-80b-a3b-instruct",
+        "ailiance/gemma4-e4b-curriculum",
+        "ailiance/qwen3-coder-next-80b",
         "ailiance/granite-30b",
-        "ailiance/ministral-14b",
+        "ailiance/eurollm-22b",
         "ailiance/mascarade",
         "ailiance/auto",
         "Ailiance-fr/micro-kiki-v3",
diff --git a/apps/api/tests/integration/test_status_endpoint.py b/apps/api/tests/integration/test_status_endpoint.py
index aa37725..1f7d7f6 100644
--- a/apps/api/tests/integration/test_status_endpoint.py
+++ b/apps/api/tests/integration/test_status_endpoint.py
@@ -68,21 +68,17 @@ def test_workers_constant_matches_production_fleet():
     """The hard-coded WORKERS list is the single source of truth for /status."""
     from ailiance_demo.services.gateway_probe import WORKERS
 
+    # Serving is consolidated onto Mac Studio: the omlx multi-model server
+    # (:8500) plus the two Qwen3.6-35B multi-LoRA instances (:9360 hardware/
+    # EDA/math, :9361 code/web/lang). The old per-port / multi-host fleet
+    # (studio:9301/9323/.., macm1, tower, kxkm-ai) is decommissioned.
     ids = {w["id"] for w in WORKERS}
     assert ids == {
-        "studio-mistral-medium", "studio-reasoning-r1", "studio-pixtral",
-        "studio-mistral-small", "studio-coder-pro", "studio-devstral-multi",
-        "studio-mascarade", "macm1-mlx", "tower-gemma", "tower-ollama",
-        "kxkm-qwen", "kxkm-granite",
+        "studio-omlx", "studio-qwen36-hardware", "studio-qwen36-code",
     }
     by_id = {w["id"]: w for w in WORKERS}
-    # kxkm-*, tower-ollama and studio-mascarade reach the cockpit via autossh
-    # tunnels owned by the gateway host; from inside the api container we must
-    # talk to host.docker.internal.
-    assert "host.docker.internal" in by_id["kxkm-qwen"]["url"]
-    assert "host.docker.internal" in by_id["kxkm-granite"]["url"]
-    assert "host.docker.internal" in by_id["tower-ollama"]["url"]
-    assert "host.docker.internal" in by_id["studio-mascarade"]["url"]
-    # Other workers are addressed over Tailscale magic DNS.
-    assert by_id["studio-mistral-medium"]["url"] == "http://studio:9301"
-    assert by_id["tower-gemma"]["url"] == "http://tower:9304"
+    # All three workers live on Mac Studio, reached over Tailscale by IP.
+    assert by_id["studio-omlx"]["url"] == "http://100.116.92.12:8500"
+    assert by_id["studio-qwen36-hardware"]["url"] == "http://100.116.92.12:9360"
+    assert by_id["studio-qwen36-code"]["url"] == "http://100.116.92.12:9361"
+    assert all(w["host"] == "studio" for w in WORKERS)
diff --git a/apps/api/tests/integration/test_workers_endpoint.py b/apps/api/tests/integration/test_workers_endpoint.py
index 4bb586f..9875599 100644
--- a/apps/api/tests/integration/test_workers_endpoint.py
+++ b/apps/api/tests/integration/test_workers_endpoint.py
@@ -27,17 +27,15 @@ def test_workers_status_returns_list(empty_hf_cache, empty_eval_index) -> None:
     )
     assert response.status_code == 200
     workers = response.json()
-    # 6 default workers configured: gateway + 5-worker production fleet
-    # (mistral-medium-3.5, gemma4-e4b-curriculum, eurollm, gemma3, qwen3-next).
-    assert len(workers) == 6
+    # 4 default workers configured: gateway + the consolidated Mac Studio
+    # serving fleet (omlx multi-model :8500, qwen36 multi-LoRA :9360/:9361).
+    assert len(workers) == 4
     names = {w["name"] for w in workers}
     assert names == {
         "gateway",
-        "mistral-medium-3.5",
-        "gemma4-e4b-curriculum",
-        "eurollm",
-        "gemma3",
-        "qwen3-next",
+        "omlx",
+        "qwen36-hardware",
+        "qwen36-code",
     }
     # Each entry must report a valid health status; we don't assert "down"
     # because this test sometimes runs from a host that can actually reach
diff --git a/apps/cockpit-public/src/components/ChatPlayground/ChatPlayground.tsx b/apps/cockpit-public/src/components/ChatPlayground/ChatPlayground.tsx
index a1ec547..b7dc446 100644
--- a/apps/cockpit-public/src/components/ChatPlayground/ChatPlayground.tsx
+++ b/apps/cockpit-public/src/components/ChatPlayground/ChatPlayground.tsx
@@ -18,10 +18,9 @@ interface Props {
 // Worker-side payloads remain capped by their own context window; this is
 // only a Playground UX default. Power users can override via ParamsPanel.
 const REASONING_ALIASES = new Set([
-  'ailiance-gemma2',
   'ailiance-reasoning-r1',
-  'ailiance-ministral-reasoning',
-  'ailiance-apertus-math-reasoning',
+  'ailiance-qwen-235b',
+  'ailiance-qwen36',
 ]);
 
 const DEFAULT_MAX_TOKENS = 1024;
diff --git a/apps/cockpit-public/src/components/filters/BaseModelFilter.tsx b/apps/cockpit-public/src/components/filters/BaseModelFilter.tsx
index 1f60920..617f695 100644
--- a/apps/cockpit-public/src/components/filters/BaseModelFilter.tsx
+++ b/apps/cockpit-public/src/components/filters/BaseModelFilter.tsx
@@ -1,10 +1,11 @@
 const BASES = [
-  'mistral-large-123b',
-  'qwen3.5-122b',
-  'qwen3.5-35b',
-  'apertus-70b',
+  'mistral-medium-128b',
+  'qwen3.6-35b',
   'devstral-24b',
   'eurollm-22b',
+  'granite-4.1-30b',
+  'gemma-4-e4b',
+  'apertus-70b',
 ] as const;
 
 interface Props {
diff --git a/apps/cockpit-public/src/routes/about.lazy.tsx b/apps/cockpit-public/src/routes/about.lazy.tsx
index 49a09b0..a88d125 100644
--- a/apps/cockpit-public/src/routes/about.lazy.tsx
+++ b/apps/cockpit-public/src/routes/about.lazy.tsx
@@ -37,25 +37,29 @@ function AboutPage() {
           <h2>Stack technique</h2>
           <ul>
             <li>
-              <strong>Entraînement</strong> — MLX bf16 LoRA sur Mistral Large 123B,
-              Qwen3.5-122B/35B, Apertus 70B, Devstral 24B, EuroLLM 22B
+              <strong>Entraînement</strong> — MLX bf16 LoRA sur Mistral-Medium 128B, Qwen3.6-35B,
+              Devstral-Small-2-24B, EuroLLM 22B, gemma-4-E4B
             </li>
             <li>
-              <strong>Routage</strong> — embeddings MiniLM L6 v2 384d + classifier MLP (40 domaines
-              prédits, 5 fallback) avec cache deux niveaux (L1 hash + L2 sémantique)
+              <strong>Routage</strong> — embeddings MiniLM-L6-v2 384d + classifier MLP 2 couches
+              (hidden 256, 47 domaines, macro-F1 0,889) avec cache deux niveaux (L1 hash + L2
+              sémantique). Jina v3 a été évalué puis écarté au bench (top-1 inférieur, encodage plus
+              lent).
             </li>
             <li>
-              <strong>Orchestration</strong> — <strong>router v0.3 Deliberation chain</strong> :
+              <strong>Orchestration</strong> — <strong>router v9 Deliberation chain</strong> :
               auto-engagé sur <code>model: "ailiance"</code> pour les domaines hardware / code, fait
               passer la sortie LLM dans un validator iact-bench sandboxé, retry avec feedback stderr
               en cas d'échec, émet NDJSON audit par chaîne
             </li>
             <li>
-              <strong>Serving</strong> — gateway FastAPI multi-worker, BF16, pool mémoire partagé
+              <strong>Serving</strong> — serveur omlx multi-modèle consolidé sur Mac Studio (port
+              8500) + deux instances qwen36-35B multi-LoRA, derrière la gateway FastAPI
             </li>
             <li>
-              <strong>Évaluation</strong> — Lighteval + EvalPlus + MT-Bench + iact-bench v0.2.0 (31
-              domaines × 23 modèles, 25 validators Docker sandboxés)
+              <strong>Évaluation</strong> — Lighteval + EvalPlus + MT-Bench + iact-bench v1 (31
+              domaines × ≤23 modèles, ~46 validators sur 3 backends : sandbox Docker, kicad-mcp-pro,
+              KiKit)
             </li>
           </ul>
 
@@ -116,7 +120,7 @@ function AboutPage() {
               >
                 ailiance
               </a>{' '}
-              — la gateway LLM elle-même (workers, router-v6, dossier EU AI Act).
+              — la gateway LLM elle-même (workers, router v9, dossier EU AI Act).
             </li>
             <li>
               <a
diff --git a/apps/cockpit-public/src/routes/bench.lazy.tsx b/apps/cockpit-public/src/routes/bench.lazy.tsx
index 262c6ae..b4010d3 100644
--- a/apps/cockpit-public/src/routes/bench.lazy.tsx
+++ b/apps/cockpit-public/src/routes/bench.lazy.tsx
@@ -316,7 +316,7 @@ function BenchPage() {
           }}
         >
           L'alias <code>ailiance</code> route automatiquement chaque requête vers le worker
-          spécialisé via un classifier MLP entraîné sur 32 classes de domaine.
+          spécialisé via un classifier MLP entraîné sur 47 classes de domaine.
         </p>
         <div style={{ overflowX: 'auto', borderTop: '1px solid var(--rule)', marginTop: 12 }}>
           <table
@@ -332,7 +332,7 @@ function BenchPage() {
                 <th style={thStyle}>Version</th>
                 <th style={thStyle}>Encoder</th>
                 <th style={{ ...thStyle, textAlign: 'right' }}>Hidden</th>
-                <th style={{ ...thStyle, textAlign: 'right' }}>Top-1</th>
+                <th style={{ ...thStyle, textAlign: 'right' }}>Macro-F1</th>
                 <th style={{ ...thStyle, textAlign: 'right' }}>Top-3</th>
                 <th style={thStyle}>Notes</th>
               </tr>
@@ -340,33 +340,25 @@ function BenchPage() {
             <tbody>
               <tr style={{ borderBottom: '1px solid var(--rule-soft, var(--rule))' }}>
                 <td style={tdStyle}>
-                  <code>v6</code> (prod)
+                  <code>v9</code> (prod)
                 </td>
-                <td style={tdStyle}>jina-v3 1024d</td>
-                <td style={{ ...tdStyle, textAlign: 'right' }}>512</td>
-                <td style={{ ...tdStyle, textAlign: 'right', fontWeight: 600 }}>0.877</td>
-                <td style={{ ...tdStyle, textAlign: 'right', fontWeight: 600 }}>0.987</td>
-                <td style={tdStyle}>déployé depuis 2026-05-08</td>
-              </tr>
-              <tr style={{ borderBottom: '1px solid var(--rule-soft, var(--rule))' }}>
-                <td style={tdStyle}>
-                  <code>v7</code>
-                </td>
-                <td style={tdStyle}>MiniLM-L6 384d</td>
+                <td style={tdStyle}>MiniLM-L6-v2 384d</td>
                 <td style={{ ...tdStyle, textAlign: 'right' }}>256</td>
-                <td style={{ ...tdStyle, textAlign: 'right' }}>0.879</td>
-                <td style={{ ...tdStyle, textAlign: 'right' }}>0.988</td>
-                <td style={tdStyle}>test régression encoder</td>
+                <td style={{ ...tdStyle, textAlign: 'right', fontWeight: 600 }}>0.889</td>
+                <td style={{ ...tdStyle, textAlign: 'right', fontWeight: 600 }}>0.988</td>
+                <td style={tdStyle}>47 domaines · déployé 2026-05-29</td>
               </tr>
               <tr style={{ borderBottom: '1px solid var(--rule-soft, var(--rule))' }}>
                 <td style={tdStyle}>
-                  <code>v8</code>
+                  <code>v6</code> (candidat)
                 </td>
                 <td style={tdStyle}>jina-v3 1024d</td>
                 <td style={{ ...tdStyle, textAlign: 'right' }}>512</td>
-                <td style={{ ...tdStyle, textAlign: 'right' }}>0.875</td>
-                <td style={{ ...tdStyle, textAlign: 'right' }}>—</td>
-                <td style={tdStyle}>data augmentée, variation marginale</td>
+                <td style={{ ...tdStyle, textAlign: 'right' }}>0.874</td>
+                <td style={{ ...tdStyle, textAlign: 'right' }}>0.987</td>
+                <td style={tdStyle}>
+                  évalué puis écarté (top-1 inférieur, encodage ~6× plus lent)
+                </td>
               </tr>
             </tbody>
           </table>
diff --git a/apps/cockpit-public/src/routes/catalog.lazy.tsx b/apps/cockpit-public/src/routes/catalog.lazy.tsx
index cd7b47a..06552fb 100644
--- a/apps/cockpit-public/src/routes/catalog.lazy.tsx
+++ b/apps/cockpit-public/src/routes/catalog.lazy.tsx
@@ -83,7 +83,7 @@ function CatalogPage() {
         >
           Source-of-truth des poids LoRA et modèles fine-tunés Ailiance software, distribués
           publiquement sur HuggingFace. {aiCount} dépôts <code>Ailiance-fr</code> (fine-tunes
-          mascarade, devstral, gemma-4, apertus, eurollm) et {erCount} dépôts{' '}
+          mascarade, qwen36, devstral, gemma-4, apertus, eurollm) et {erCount} dépôts{' '}
           <code>electron-rare</code> <span className="badge hf">legacy</span> conservés pour
           traçabilité historique des releases avant la migration sur l'organisation{' '}
           <code>Ailiance-fr</code>.
diff --git a/apps/cockpit-public/src/routes/index.tsx b/apps/cockpit-public/src/routes/index.tsx
index 2a9bde0..9b74dcc 100644
--- a/apps/cockpit-public/src/routes/index.tsx
+++ b/apps/cockpit-public/src/routes/index.tsx
@@ -91,8 +91,8 @@ function HomePage() {
           </p>
           <div className="hero-meta">
             <div>
-              <span>5</span> workers · <span>24</span> LoRA publics · <span>31</span> domaines
-              évalués
+              <span>26</span> aliases gateway · <span>47</span> domaines routés · <span>31</span>{' '}
+              domaines évalués
             </div>
             <div>
               <span>0</span> dépendance cloud · <span>0</span> log de prompt persisté
@@ -116,7 +116,7 @@ function HomePage() {
             en quatre missions.
           </h2>
           <p className="lede">
-            Pourquoi exploiter cinq workers sur du matériel personnel quand un appel d'API
+            Pourquoi exploiter sa propre flotte sur du matériel personnel quand un appel d'API
             suffirait&nbsp;? Parce qu'il existe encore un standard plus exigeant que la latence : la{' '}
             <em>traçabilité</em>.
           </p>
diff --git a/apps/cockpit-public/src/routes/models.$owner.$name.lazy.tsx b/apps/cockpit-public/src/routes/models.$owner.$name.lazy.tsx
index 4b023f5..12d865a 100644
--- a/apps/cockpit-public/src/routes/models.$owner.$name.lazy.tsx
+++ b/apps/cockpit-public/src/routes/models.$owner.$name.lazy.tsx
@@ -254,8 +254,9 @@ function ModelDetailPage() {
               >
                 Toutes les adapters partagent le même modèle de base{' '}
                 <code style={{ fontFamily: 'var(--mono)', fontSize: 12 }}>ailiance/Qwen3-4B</code>{' '}
-                avec LoRA r=16 / α=32. Servies par Ollama sur Tower (NVIDIA Quadro P2000) via le
-                tunnel autossh electron-server :8004.
+                avec LoRA r=16 / α=32. Famille publiée sur HuggingFace ; le routage live est
+                désormais assuré par les adaptateurs qwen36-35B servis sur Mac Studio (:9360 /
+                :9361).
               </p>
               <div
                 style={{
@@ -357,8 +358,8 @@ function ModelDetailPage() {
                   margin: '0 0 16px',
                 }}
               >
-                Sur les domaines hardware/code, l'auto-router délègue à un des 12 spécialistes
-                mascarade (LoRA Qwen3-4B sur Tower Ollama :8004).
+                Sur les domaines hardware/code, l'auto-router délègue à un des 30 adaptateurs
+                spécialistes qwen36-35B servis sur Mac Studio (:9360 / :9361).
               </p>
               <div
                 style={{
diff --git a/apps/cockpit-public/src/routes/models.index.lazy.tsx b/apps/cockpit-public/src/routes/models.index.lazy.tsx
index 7aab305..8804dcb 100644
--- a/apps/cockpit-public/src/routes/models.index.lazy.tsx
+++ b/apps/cockpit-public/src/routes/models.index.lazy.tsx
@@ -9,7 +9,7 @@ export const Route = createLazyFileRoute('/models/')({
   component: ModelsPage,
 });
 
-// Bench origine vs tuné — extrait iact-bench v0.2.0. À remplacer par fetch /api/public/bench plus tard.
+// Bench origine vs tuné — extrait iact-bench v1. À remplacer par fetch /api/public/bench plus tard.
 const BENCH = [
   { domain: 'KiCad DSL', origin: 12, tuned: 67, model: 'ailiance/auto' },
   { domain: 'KiCad PCB', origin: 18, tuned: 60, model: 'ailiance/auto' },
@@ -190,7 +190,7 @@ function ModelsPage() {
         <div className="block-head">
           <h2>L'auto-router, par domaine.</h2>
           <p className="lede">
-            Le prompt entre. Un classifier embeddings le situe sur l'un des 32 domaines. Le routeur
+            Le prompt entre. Un classifier embeddings le situe sur l'un des 47 domaines. Le routeur
             ouvre la politique YAML correspondante et choisit le spécialiste. Sur les domaines
             hardware, la sortie passe par un validator Docker sandboxé avant retour utilisateur.
           </p>
@@ -206,8 +206,8 @@ function ModelsPage() {
           <RouterArrow label="POST /api/public/chat" />
           <RouterStep
             num="2"
-            title="Classifier MiniLM L6 v2 + MLP"
-            sub="MiniLM L6 v2 384d · 40 domaines prédits · cache L1 hash + L2 cosinus sémantique"
+            title="Classifier MiniLM-L6-v2 + MLP"
+            sub="MiniLM-L6-v2 384d · MLP 2 couches (hidden 256) · 47 domaines · cache L1 hash + L2 cosinus sémantique"
             tone="accent"
             chips={['kicad', 'spice', 'stm32', 'emc', 'embedded', 'code', 'math', '…']}
           />
@@ -219,8 +219,8 @@ function ModelsPage() {
               </span>
               <RouterStep
                 num="3a"
-                title="Spécialiste mascarade-*"
-                sub="LoRA Qwen3-4B fine-tunée sur le domaine (kicad / spice / stm32 / emc / embedded / power…) · Tower Ollama :8004"
+                title="Spécialiste qwen36-*"
+                sub="LoRA qwen36-35B hot-swap fine-tunée sur le domaine (kicad / spice / stm32 / emc / embedded / power…) · serveurs multi-LoRA :9360 / :9361"
                 tone="hardware"
               />
               <RouterArrow label="sortie LLM" small />
@@ -239,7 +239,7 @@ function ModelsPage() {
               <RouterStep
                 num="3b"
                 title="Backend généraliste"
-                sub="Apertus 70B (souverain) · Qwen3-Next 80B (raisonnement) · EuroLLM 22B (multilingue) · Devstral 24B (code) · Gemma 4 (fallback)"
+                sub="Mistral-Medium 128B · Qwen3-Coder-Next 80B (raisonnement) · EuroLLM 22B (multilingue) · Devstral-Small 24B (code) · Pixtral 12B (vision) — servis via omlx :8500"
                 tone="direct"
               />
               <RouterArrow label="sortie directe" small />
@@ -281,7 +281,7 @@ function ModelsPage() {
         <div className="fleet">
           <div className="fleet-head">
             <span className="live">
-              <span className="dot" /> gateway :9300 · router v0.3 · live probe
+              <span className="dot" /> gateway :9300 · router v9 · live probe
             </span>
             <span>
               {upCount} / {totalCount} healthy
@@ -429,7 +429,7 @@ function ModelsPage() {
             Bench — origine <em>vs</em> tuné.
           </h2>
           <p className="lede">
-            iact-bench v0.2.0, sandbox Docker épinglé par digest. Score = % cellules avec validator
+            iact-bench v1, sandbox Docker épinglé par digest. Score = % cellules avec validator
             exit-zéro. Origine = modèle base sans routage. Tuné = via auto-router + validator chain.
           </p>
         </div>
diff --git a/apps/cockpit-public/src/routes/status.lazy.tsx b/apps/cockpit-public/src/routes/status.lazy.tsx
index fa43da0..7e219d6 100644
--- a/apps/cockpit-public/src/routes/status.lazy.tsx
+++ b/apps/cockpit-public/src/routes/status.lazy.tsx
@@ -145,7 +145,8 @@ const INCIDENTS = [
   { d: '06 mai', s: 'kxkm-ai', e: 'autossh restart · 4 min downtime', ok: false },
   { d: '01 mai', s: 'studio', e: 'MLX model reload · 2 min', ok: true },
   { d: '24 avril', s: 'tower', e: 'OS kernel panic, replaced PSU', ok: false },
-  { d: '12 avril', s: '—', e: 'router v0.3 shipped', ok: true },
+  { d: '29 mai', s: 'studio', e: 'serving consolidé sur omlx :8500', ok: true },
+  { d: '12 avril', s: '—', e: 'router v9 shipped', ok: true },
 ];
 
 function StatusPage() {
@@ -247,7 +248,7 @@ function StatusPage() {
             <span className="dot" />
             <div>
               <div className="name">auto-router</div>
-              <div className="sub">MiniLM v6 · classifier MLP</div>
+              <div className="sub">MiniLM-L6-v2 384d · classifier MLP</div>
             </div>
             <div>
               <div style={{ color: 'var(--ink)' }}>studio.tail</div>
@@ -281,7 +282,7 @@ function StatusPage() {
               814 h
             </div>
             <div style={{ fontFamily: 'var(--mono)', fontSize: 11, color: 'var(--ink-3)' }}>
-              87.7% top-1
+              88.9% macro-F1
             </div>
             <StatusBadge ok={true} />
           </div>
@@ -312,11 +313,9 @@ function StatusPage() {
               }}
             >
               {`[gateway_probe.py] tick = ${30 - (tick % 30)}s
-  studio:9301      → 200 OK · 312 ms · apertus-70b loaded
-  macm1:9302       → 200 OK · 188 ms · devstral-24b loaded
-  studio:9303      → 200 OK · 224 ms · eurollm-22b loaded
-  tower:9304       → 200 OK · 92  ms · gemma3-4b loaded
-  kxkm-ai:8002     → 200 OK · 421 ms · qwen3-next-80b loaded  (via autossh tunnel)
+  studio:8500      → 200 OK · 224 ms · omlx multi-modèle (catalogue chargé)
+  studio:9360      → 200 OK · 188 ms · qwen36-35B multi-LoRA (hardware/EDA/math)
+  studio:9361      → 200 OK · 196 ms · qwen36-35B multi-LoRA (code/web/lang)
   ----
   cache age:       12 s
   next refresh:    ${30 - (tick % 30)} s`}
diff --git a/apps/cockpit-public/src/routes/transparency.lazy.tsx b/apps/cockpit-public/src/routes/transparency.lazy.tsx
index 7ad6bc0..2e43603 100644
--- a/apps/cockpit-public/src/routes/transparency.lazy.tsx
+++ b/apps/cockpit-public/src/routes/transparency.lazy.tsx
@@ -45,7 +45,7 @@ const ENTRIES: ProvenanceEntry[] = [
     license: 'Gemma Terms',
     provenanceUrl:
       'https://github.com/ailiance/ailiance/blob/main/docs/provenance/gemma-3-4b-it.json',
-    notes: 'Worker léger · NVIDIA Quadro P2000 5 GB',
+    notes: 'Servi via omlx :8500 · fallback vision léger (gemma-4-E4B)',
   },
   {
     alias: 'ailiance/qwen3-next-80b',
@@ -54,16 +54,16 @@ const ENTRIES: ProvenanceEntry[] = [
     license: 'Apache-2.0',
     provenanceUrl:
       'https://github.com/ailiance/ailiance/blob/main/docs/provenance/qwen3-next-80b-a3b-instruct.json',
-    notes: 'MoE 80B / 3B actif · RTX 4090 + RAM offload',
+    notes: 'MoE 80B / 3B actif · servi via omlx :8500 (Qwen3-Coder-Next-8bit)',
   },
   {
     alias: 'ailiance/auto',
-    base: 'MiniLM L6 v2 384d + 2-layer MLP + chain orchestrator',
+    base: 'MiniLM-L6-v2 384d + MLP 2 couches (hidden 256) + chain orchestrator',
     provider: 'Microsoft (MiniLM) + Ailiance software',
     license: 'Apache-2.0',
     provenanceUrl:
       'https://github.com/ailiance/ailiance/blob/main/docs/provenance/auto-router-minilm.json',
-    notes: 'Classifier 32 domaines · chain v0.3',
+    notes: 'Classifier 47 domaines (macro-F1 0,889) · chain v9',
   },
 ];
 
@@ -213,9 +213,15 @@ function TransparencyPage() {
               fichier de provenance
             </li>
             <li>
-              <strong>Validation pré-publication</strong> — iact-bench complet (31 domaines × 23
-              modèles) + sandbox Docker validators (g++, KiCad DRC/ERC, ngspice, shellcheck, tsc,
-              etc.) avec digests sha256 épinglés
+              <strong>Validation pré-publication</strong> — iact-bench complet (31 domaines × ≤23
+              modèles) + ~46 validators sur 3 backends : sandbox Docker (g++, KiCad DRC/ERC,
+              ngspice, shellcheck, tsc, etc.), kicad-mcp-pro et KiKit, avec digests sha256 épinglés
+            </li>
+            <li>
+              <strong>Jury LLM</strong> — le score LLM-judge d'iact-bench est calculé par
+              Qwen3-Coder-30B et EuroLLM-22B. Mistral-Small-3.1 est tenu à l'écart de l'usage
+              texte/jury en raison d'un bug connu du détokeniseur omlx (remonté en amont, contourné
+              en aval).
             </li>
             <li>
               <strong>Critères de release</strong> — gain mesurable sur le domaine cible vs base
@@ -314,10 +320,11 @@ function TransparencyPage() {
               </p>
               <p style={{ margin: '10px 0 0', color: 'var(--ink-3)', fontSize: 14 }}>
                 La sortie du modèle est <em>la seule entrée</em> du validator : pas d'exfiltration
-                de données, pas de fuite d'environnement. Douze validators stables aujourd'hui (g++,
-                arm-none-eabi-gcc, cargo embedded, shellcheck, tsc, ngspice, KiCad DRC/ERC, FreeCAD
-                scripting, html5lib strict, sqlglot, JSON/YAML). Dix validators EDA/MCAD
-                supplémentaires en v0.3.0.
+                de données, pas de fuite d'environnement. ~46 validators sur 3 backends aujourd'hui
+                — sandbox Docker (g++, arm-none-eabi-gcc, cargo embedded, shellcheck, tsc, ngspice,
+                KiCad DRC/ERC, FreeCAD scripting, html5lib strict, sqlglot, JSON/YAML, atopile,
+                KiKit DRC/fab…), kicad-mcp-pro (validators <code>kicad-pro-*</code> :
+                DRC/EMC/DFM/quality-gate) et KiKit headless.
               </p>
             </div>
           </div>