From 5562ed2f889e339120971e3224e9f1049bbcb861 Mon Sep 17 00:00:00 2001
From: Raygama <daffaraygama55@gmail.com>
Date: Thu, 9 Apr 2026 20:00:13 +0700
Subject: [PATCH 1/3] Add provider abstraction, settings API & keys

Introduce a Provider Abstraction Layer and settings management: add provider registry and provider adapters (OpenAI-compatible and Anthropic), with retry/fallback logic and provider inference.
- Add settings_service (Supabase-backed) and FastAPI /api/v1/settings routes to get/update settings and API keys; wire settings router into app. Refactor LLMService to delegate to provider_registry.
- Enhance memory_service to support OpenAI, OpenRouter and local Ollama embeddings and detect Ollama runtime.
- Update prompter to respect admin/system_prompt from settings.
- Add dashboard ApiKeys UI, test fixtures, and minor model/asset tweaks. Also add new env variables to .env.example for Anthropic, Groq and Ollama.
---
 .env.example                                  |   5 +
 ai-service/app/api/v1/settings.py             |  62 +++
 ai-service/app/core/config.py                 |   3 +
 ai-service/app/main.py                        |   2 +
 ai-service/app/services/llm.py                |  93 +---
 ai-service/app/services/memory_service.py     |  40 +-
 ai-service/app/services/prompter.py           |  45 +-
 ai-service/app/services/providers/__init__.py |   3 +
 .../services/providers/anthropic_provider.py  | 183 +++++++
 ai-service/app/services/providers/base.py     | 147 ++++++
 .../app/services/providers/openai_compat.py   | 183 +++++++
 ai-service/app/services/providers/registry.py | 245 +++++++++
 ai-service/app/services/settings_service.py   |  82 +++
 ai-service/tests/__init__.py                  |   0
 ai-service/tests/conftest.py                  |  71 +++
 ai-service/tests/providers/__init__.py        |   0
 .../public/models/hutao/Hu Tao.model3.json    |  12 +-
 dashboard/src/components/ApiKeys.jsx          | 142 +++++
 dashboard/src/components/AvatarRenderer.jsx   | 483 +++++++++++-------
 .../src/components/AvatarRenderer.test.jsx    | 205 +++++---
 dashboard/src/components/CallOverlay.jsx      |  28 +-
 dashboard/src/components/PersonalityTuner.jsx | 138 ++++-
 voice-agent/agent.py                          | 421 +++++++++------
 voice-agent/aura_tts.py                       | 259 +++++-----
 voice-agent/vtube_controller.py               |  56 +-
 25 files changed, 2200 insertions(+), 708 deletions(-)
 create mode 100644 ai-service/app/api/v1/settings.py
 create mode 100644 ai-service/app/services/providers/__init__.py
 create mode 100644 ai-service/app/services/providers/anthropic_provider.py
 create mode 100644 ai-service/app/services/providers/base.py
 create mode 100644 ai-service/app/services/providers/openai_compat.py
 create mode 100644 ai-service/app/services/providers/registry.py
 create mode 100644 ai-service/app/services/settings_service.py
 create mode 100644 ai-service/tests/__init__.py
 create mode 100644 ai-service/tests/conftest.py
 create mode 100644 ai-service/tests/providers/__init__.py
 create mode 100644 dashboard/src/components/ApiKeys.jsx

diff --git a/.env.example b/.env.example
index 57f76b7..35bbb00 100644
--- a/.env.example
+++ b/.env.example
@@ -2,6 +2,11 @@
 # OpenRouter API Key - Get yours at https://openrouter.ai/keys
 OPENROUTER_API_KEY=your_openrouter_key_here
 
+# Optional: Direct provider keys (used when provider != openrouter)
+ANTHROPIC_API_KEY=          # Required for claude-* models
+GROQ_API_KEY=               # Required for Groq provider (fast Llama/Mixtral)
+OLLAMA_BASE_URL=http://localhost:11434   # Local Ollama endpoint
+
 # --- VOICE CONFIGURATION (LIVEKIT AGENTS) ---
 # Deepgram API Key (STT) - Get yours at https://console.deepgram.com/
 DEEPGRAM_API_KEY=your_deepgram_key_here
diff --git a/ai-service/app/api/v1/settings.py b/ai-service/app/api/v1/settings.py
new file mode 100644
index 0000000..77941dd
--- /dev/null
+++ b/ai-service/app/api/v1/settings.py
@@ -0,0 +1,62 @@
+from fastapi import APIRouter
+from pydantic import BaseModel
+from app.services.settings_service import settings_service
+
+router = APIRouter()
+
+PROVIDERS = ["openrouter", "openai", "anthropic", "groq", "ollama"]
+
+
+class SettingsPatch(BaseModel):
+    system_prompt: str | None = None
+    model:         str | None = None
+    provider:      str | None = None
+    temperature:   float | None = None
+    max_tokens:    int | None = None
+    empathy:       int | None = None
+    humor:         int | None = None
+    formality:     int | None = None
+
+
+class ApiKeysPatch(BaseModel):
+    openrouter_api_key: str | None = None
+    deepgram_api_key:   str | None = None
+    cartesia_api_key:   str | None = None
+    anthropic_api_key:  str | None = None
+    groq_api_key:       str | None = None
+    ollama_base_url:    str | None = None
+    livekit_url:        str | None = None
+    livekit_api_key:    str | None = None
+    livekit_api_secret: str | None = None
+
+
+@router.get("")
+def get_settings():
+    return settings_service.get_settings()
+
+
+@router.put("")
+def update_settings(patch: SettingsPatch):
+    data = {k: v for k, v in patch.model_dump().items() if v is not None}
+    return settings_service.update_settings(data)
+
+
+@router.get("/providers")
+def list_providers():
+    """Return available provider names for the UI dropdown."""
+    return {"providers": PROVIDERS}
+
+
+@router.get("/keys")
+def get_api_keys():
+    keys = settings_service.get_api_keys()
+    # Return masked values — just signals whether the key is configured
+    return {k: ("set" if (v and str(v).strip()) else None)
+            for k, v in keys.items() if k != "id"}
+
+
+@router.put("/keys")
+def update_api_keys(patch: ApiKeysPatch):
+    data = {k: v for k, v in patch.model_dump().items() if v is not None}
+    settings_service.update_api_keys(data)
+    return {"status": "ok"}
diff --git a/ai-service/app/core/config.py b/ai-service/app/core/config.py
index 5eff0f1..51de459 100644
--- a/ai-service/app/core/config.py
+++ b/ai-service/app/core/config.py
@@ -28,6 +28,9 @@ class Settings(BaseSettings):
     LLM_API_KEY: str | None = None
     OPENAI_API_KEY: str | None = None
     OPENROUTER_API_KEY: str | None = None
+    ANTHROPIC_API_KEY: str | None = None
+    GROQ_API_KEY: str | None = None
+    OLLAMA_BASE_URL: str = "http://localhost:11434"
     OPENAI_MODEL: str = "gpt-3.5-turbo"
 
     # Supabase
diff --git a/ai-service/app/main.py b/ai-service/app/main.py
index 4f78536..61e93b4 100644
--- a/ai-service/app/main.py
+++ b/ai-service/app/main.py
@@ -1,6 +1,7 @@
 from fastapi import FastAPI
 from fastapi.middleware.cors import CORSMiddleware
 from app.api.v1 import chat, health, memory, rag
+from app.api.v1 import settings as settings_router
 from app.core.config import settings
 import logging
 
@@ -31,6 +32,7 @@
 app.include_router(chat.router, prefix="/api/v1/chat", tags=["Chat"])
 app.include_router(memory.router, prefix="/api/v1/memory", tags=["Memory"])
 app.include_router(rag.router, prefix="/api/v1/rag", tags=["RAG"])
+app.include_router(settings_router.router, prefix="/api/v1/settings", tags=["Settings"])
 
 @app.get("/")
 def read_root():
diff --git a/ai-service/app/services/llm.py b/ai-service/app/services/llm.py
index a1d9c3c..7546e88 100644
--- a/ai-service/app/services/llm.py
+++ b/ai-service/app/services/llm.py
@@ -1,80 +1,29 @@
-from openai import OpenAI
-from app.core.config import settings
+"""
+LLMService — thin facade over the Provider Abstraction Layer.
+
+All routing logic lives in providers/registry.py.
+This class exists so existing callers (brain nodes, etc.) don't need to change.
+"""
 import logging
-import re
+from app.services.providers.registry import provider_registry
 
 logger = logging.getLogger(__name__)
 
-class LLMService:
-    def __init__(self):
-        self.api_key = settings.OPENROUTER_API_KEY or settings.OPENAI_API_KEY
-        self.model = settings.OPENAI_MODEL or "openai/gpt-3.5-turbo"
-        self.client = None
-        
-        # Determine Base URL (OpenRouter vs OpenAI)
-        self.base_url = "https://openrouter.ai/api/v1" if settings.OPENROUTER_API_KEY else None
-
-        if self.api_key:
-            self.client = OpenAI(
-                api_key=self.api_key,
-                base_url=self.base_url
-            )
-            logger.info(f"LLM Service Initialized. Model: {self.model}, Base: {self.base_url or 'Default'}")
-        else:
-            logger.warning("API Key not set. LLMService will fail.")
 
-    def generate(self, messages: list) -> dict:
-        """
-        Generates a response from the LLM based on the list of messages.
-        Expects messages to be formatted by Prompter.
-        """
-        if not self.client:
-            return {
-                "text": "Error: API Key is missing. I cannot think without it!",
-                "emotion": "[dizzy]"
-            }
-
-        try:
-            extra_headers = {}
-            if settings.OPENROUTER_API_KEY:
-                extra_headers = {
-                    "HTTP-Referer": "http://localhost:5173", # Frontend URL
-                    "X-Title": "Project AURA", 
-                }
+class LLMService:
+    def generate(
+        self,
+        messages: list,
+        model: str | None = None,
+        temperature: float | None = None,
+        max_tokens: int | None = None,
+    ) -> dict:
+        return provider_registry.generate(
+            messages,
+            model=model,
+            temperature=temperature,
+            max_tokens=max_tokens,
+        )
 
-            response = self.client.chat.completions.create(
-                model=self.model,
-                messages=messages,
-                temperature=0.7,
-                max_tokens=250,
-                extra_headers=extra_headers
-            )
-            
-            content = response.choices[0].message.content
-            
-            # Robust parsing for emotion using Regex
-            # Matches [emotion] at the start of the string
-            emotion_match = re.match(r'^\[(.*?)\]', content)
-            
-            emotion = "neutral"
-            text = content
-            
-            if emotion_match:
-                emotion = emotion_match.group(1)
-                # Remove the emotion tag from the text
-                text = content[emotion_match.end():].strip()
-            
-            return {
-                "text": text,
-                "emotion": emotion,
-                "raw": content
-            }
-            
-        except Exception as e:
-            logger.error(f"LLM Generation Error: {e}")
-            return {
-                "text": f"I... I lost my train of thought. ({str(e)})",
-                "emotion": "[confused]"
-            }
 
 llm_service = LLMService()
diff --git a/ai-service/app/services/memory_service.py b/ai-service/app/services/memory_service.py
index b401291..7fc761f 100644
--- a/ai-service/app/services/memory_service.py
+++ b/ai-service/app/services/memory_service.py
@@ -4,17 +4,28 @@
 """
 from __future__ import annotations
 from typing import List
+import urllib.request
 from supabase import create_client
 from langchain_openai import OpenAIEmbeddings
 from app.core.config import settings
 from uuid import UUID
 
+
 from app.models.database import (Conversation, CreateConversation, Message, CreateMesssage, Memory, CreateMemory)
 
 import logging
 
 logger = logging.getLogger(__name__)
 
+
+def _ollama_is_running(base_url: str) -> bool:
+    """Return True if an Ollama server is reachable at base_url."""
+    try:
+        urllib.request.urlopen(f"{base_url}/api/tags", timeout=2)
+        return True
+    except Exception:
+        return False
+
 class MemoryService:
     def __init__(self):
         self.client = None
@@ -27,16 +38,33 @@ def __init__(self):
         else:
             logger.warning("Supabase credentials not set. Memory service disabled.")
 
-        # Initialize embeddings model via OpenRouter
-        api_key = settings.OPENROUTER_API_KEY
-        if api_key:
+        # Initialize embeddings — try providers in order of preference
+        if settings.OPENAI_API_KEY:
+            self.embeddings = OpenAIEmbeddings(
+                api_key=settings.OPENAI_API_KEY,
+                model="text-embedding-3-small",
+            )
+            logger.info("Embeddings: using OpenAI directly.")
+        elif settings.OPENROUTER_API_KEY:
             self.embeddings = OpenAIEmbeddings(
-                api_key=api_key,
+                api_key=settings.OPENROUTER_API_KEY,
                 model="openai/text-embedding-3-small",
-                base_url="https://openrouter.ai/api/v1"
+                base_url="https://openrouter.ai/api/v1",
+            )
+            logger.info("Embeddings: using OpenRouter.")
+        elif _ollama_is_running(settings.OLLAMA_BASE_URL):
+            self.embeddings = OpenAIEmbeddings(
+                api_key="ollama",
+                model="nomic-embed-text",
+                base_url=f"{settings.OLLAMA_BASE_URL}/v1",
             )
+            logger.info("Embeddings: using local Ollama (nomic-embed-text).")
         else:
-            logger.warning("OPENROUTER_API_KEY not set. Memory embedding disabled.")
+            logger.warning(
+                "No embedding provider available "
+                "(OPENAI_API_KEY / OPENROUTER_API_KEY not set; Ollama not reachable). "
+                "Memory store/search disabled."
+            )
 
     async def create_conversation(self, title: str = "New Conversation") -> UUID | None:
         if not self.client:
diff --git a/ai-service/app/services/prompter.py b/ai-service/app/services/prompter.py
index f2cb4d0..57cdfc3 100644
--- a/ai-service/app/services/prompter.py
+++ b/ai-service/app/services/prompter.py
@@ -1,40 +1,31 @@
 from datetime import datetime
 from app.services.persona import persona_engine
+from app.services.settings_service import settings_service
 
-class Prompter:
-    def __init__(self):
-        self.system_prompt = """You are AURA (Advanced Universal Responsive Avatar), the spirited AI steward of the ASE Lab.
-
-            {persona}
-
-            **Context:**
-            - Current Time: {current_time}
-            """
 
+class Prompter:
     def build(self, message: str, context: dict = None) -> list:
-        """
-        Constructs the messages list for the LLM.
-        """
         current_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
-        persona = persona_engine.get_persona()
-        
-        # Format system prompt
-        formatted_system = self.system_prompt.format(
-            current_time=current_time, 
-            persona=persona
+
+        # Custom system_prompt from admin panel overrides the hardcoded persona
+        db = settings_service.get_settings()
+        custom = (db.get("system_prompt") or "").strip()
+        persona = custom if custom else persona_engine.get_persona()
+
+        formatted_system = (
+            "You are AURA (Advanced Universal Responsive Avatar), "
+            "the spirited AI steward of the ASE Lab.\n\n"
+            f"{persona}\n\n"
+            f"**Context:**\n- Current Time: {current_time}"
         )
-        
-        messages = [
-            {"role": "system", "content": formatted_system}
-        ]
-        
-        # Add conversation history if available in context
+
+        messages = [{"role": "system", "content": formatted_system}]
+
         if context and "history" in context:
             messages.extend(context["history"])
-            
-        # Add current user message
+
         messages.append({"role": "user", "content": message})
-        
         return messages
 
+
 prompter = Prompter()
diff --git a/ai-service/app/services/providers/__init__.py b/ai-service/app/services/providers/__init__.py
new file mode 100644
index 0000000..292fc89
--- /dev/null
+++ b/ai-service/app/services/providers/__init__.py
@@ -0,0 +1,3 @@
+from app.services.providers.registry import provider_registry
+
+__all__ = ["provider_registry"]
diff --git a/ai-service/app/services/providers/anthropic_provider.py b/ai-service/app/services/providers/anthropic_provider.py
new file mode 100644
index 0000000..89685fe
--- /dev/null
+++ b/ai-service/app/services/providers/anthropic_provider.py
@@ -0,0 +1,183 @@
+"""
+Anthropic / Claude provider.
+
+Key differences from OpenAI-compatible providers:
+
+1. System message → separate `system` parameter (not in messages list).
+2. Streaming: chunks are `content_block_delta` with type "text_delta"
+   (vs GPT's `choices[0].delta.content`).
+3. Tool calls: come as `content_block_start` with type "tool_use"
+   (vs OpenAI's `message.tool_calls`).
+4. Tool definitions: Anthropic uses a different schema than OpenAI.
+   We accept the OpenAI schema and translate it internally.
+
+Normalized output is always the same result dict as every other provider.
+"""
+from __future__ import annotations
+
+import json
+import logging
+from typing import AsyncGenerator
+
+from app.services.providers.base import LLMProvider, TextDelta, StreamDone, make_result, RetryableError, NonRetryableError
+
+logger = logging.getLogger(__name__)
+
+
+def _split_system(messages: list[dict]) -> tuple[str, list[dict]]:
+    """Separate the system prompt from the rest of the message list."""
+    system_parts = []
+    rest = []
+    for m in messages:
+        if m.get("role") == "system":
+            system_parts.append(m.get("content", ""))
+        else:
+            rest.append(m)
+    return "\n\n".join(system_parts), rest
+
+
+def _openai_tools_to_anthropic(tools: list[dict]) -> list[dict]:
+    """
+    Translate OpenAI tool schema to Anthropic's format.
+
+    OpenAI:   { "type": "function", "function": { "name", "description", "parameters" } }
+    Anthropic: { "name", "description", "input_schema" }
+    """
+    result = []
+    for t in tools:
+        fn = t.get("function", t)  # handle both wrapped and unwrapped
+        result.append({
+            "name":         fn["name"],
+            "description":  fn.get("description", ""),
+            "input_schema": fn.get("parameters", {"type": "object", "properties": {}}),
+        })
+    return result
+
+
+def _extract_tool_calls(content_blocks) -> list | None:
+    """Normalize Anthropic tool_use blocks to our common schema."""
+    calls = [
+        {
+            "id":        block.id,
+            "name":      block.name,
+            "arguments": json.dumps(block.input),
+        }
+        for block in content_blocks
+        if getattr(block, "type", None) == "tool_use"
+    ]
+    return calls or None
+
+
+class AnthropicProvider(LLMProvider):
+    name = "anthropic"
+
+    def __init__(self, api_key: str):
+        try:
+            import anthropic as _anthropic
+            self._anthropic = _anthropic
+            self._client = _anthropic.Anthropic(api_key=api_key)
+            self._async_client = _anthropic.AsyncAnthropic(api_key=api_key)
+            logger.info("[anthropic] provider ready")
+        except ImportError:
+            raise RuntimeError(
+                "The 'anthropic' package is required for the Anthropic provider. "
+                "Run: pip install anthropic"
+            )
+
+    # ── Blocking ──────────────────────────────────────────────────────────────
+
+    def generate(
+        self,
+        messages: list[dict],
+        *,
+        model: str,
+        temperature: float,
+        max_tokens: int,
+        tools: list[dict] | None = None,
+    ) -> dict:
+        system, user_messages = _split_system(messages)
+        kwargs = dict(
+            model=model,
+            system=system,
+            messages=user_messages,
+            temperature=temperature,
+            max_tokens=max_tokens,
+        )
+        if tools:
+            kwargs["tools"] = _openai_tools_to_anthropic(tools)
+
+        _a = self._anthropic  # local ref so except clauses can reference it
+        try:
+            response = self._client.messages.create(**kwargs)
+
+            # Text from text blocks
+            raw = "".join(
+                block.text for block in response.content
+                if getattr(block, "type", None) == "text"
+            )
+            tool_calls = _extract_tool_calls(response.content)
+
+            if tool_calls and not raw:
+                raw = f"[tool_call: {tool_calls[0]['name']}]"
+
+            return make_result(raw, self.name, model, tool_calls=tool_calls)
+
+        except _a.RateLimitError as e:
+            raise RetryableError(str(e), status_code=429)
+        except (_a.APIConnectionError, _a.APITimeoutError) as e:
+            raise RetryableError(str(e))
+        except _a.InternalServerError as e:
+            raise RetryableError(str(e), status_code=getattr(e, "status_code", 500))
+        except _a.AuthenticationError as e:
+            raise NonRetryableError(str(e), status_code=401)
+        except _a.BadRequestError as e:
+            raise NonRetryableError(str(e), status_code=400)
+        except Exception as e:
+            raise RetryableError(str(e))
+
+    # ── Streaming ─────────────────────────────────────────────────────────────
+
+    async def stream(
+        self,
+        messages: list[dict],
+        *,
+        model: str,
+        temperature: float,
+        max_tokens: int,
+        tools: list[dict] | None = None,
+    ) -> AsyncGenerator[TextDelta | StreamDone, None]:
+        system, user_messages = _split_system(messages)
+        assembled = ""
+        kwargs = dict(
+            model=model,
+            system=system,
+            messages=user_messages,
+            temperature=temperature,
+            max_tokens=max_tokens,
+        )
+        if tools:
+            kwargs["tools"] = _openai_tools_to_anthropic(tools)
+
+        try:
+            async with self._async_client.messages.stream(**kwargs) as stream:
+                async for event in stream:
+                    if (
+                        event.type == "content_block_delta"
+                        and hasattr(event, "delta")
+                        and getattr(event.delta, "type", None) == "text_delta"
+                    ):
+                        chunk = event.delta.text or ""
+                        if chunk:
+                            assembled += chunk
+                            yield TextDelta(text=chunk)
+        except Exception as e:
+            logger.error(f"[anthropic] stream error: {e}")
+
+        result = make_result(assembled, self.name, model)
+        yield StreamDone(
+            text=result["text"],
+            emotion=result["emotion"],
+            raw=assembled,
+            provider=self.name,
+            model=model,
+        )
diff --git a/ai-service/app/services/providers/base.py b/ai-service/app/services/providers/base.py
new file mode 100644
index 0000000..cfb6b84
--- /dev/null
+++ b/ai-service/app/services/providers/base.py
@@ -0,0 +1,147 @@
+"""
+Provider Abstraction Layer — base types and interface.
+
+Every LLM provider normalizes its output into the same result dict
+so the rest of the system never needs to know which model is running.
+
+Normalized result:
+  { text, emotion, raw, provider, model, tool_calls }
+
+Tool calls are always normalized to:
+  [{ "id": str, "name": str, "arguments": str (JSON) }]
+  — regardless of whether the provider used OpenAI function_call deltas
+    or Anthropic content_block tool_use blocks.
+
+Stream events (for future streaming endpoints):
+  TextDelta  — incremental text chunk
+  StreamDone — final assembled result
+"""
+from __future__ import annotations
+
+import re
+from abc import ABC, abstractmethod
+from dataclasses import dataclass, field
+from typing import AsyncGenerator
+
+
+# ── Normalized event types ────────────────────────────────────────────────────
+
+@dataclass
+class TextDelta:
+    """A chunk of text from a streaming response."""
+    text: str
+
+
+@dataclass
+class StreamDone:
+    """Final event — carries the fully assembled response."""
+    text: str
+    emotion: str
+    raw: str
+    provider: str
+    model: str
+    tool_calls: list | None = None
+
+
+# ── Error types ───────────────────────────────────────────────────────────────
+
+class RetryableError(Exception):
+    """
+    Rate limit (429), server error (5xx), or transient network issue.
+    The registry will retry with exponential backoff, then try the next provider.
+    """
+    def __init__(self, msg: str, status_code: int | None = None):
+        super().__init__(msg)
+        self.status_code = status_code
+
+
+class NonRetryableError(Exception):
+    """
+    Auth failure (401) or bad request (400).
+    - 401: key is wrong for this provider → skip to next provider.
+    - 400: our message is malformed → no provider will fix it; abort immediately.
+    """
+    def __init__(self, msg: str, status_code: int | None = None):
+        super().__init__(msg)
+        self.status_code = status_code
+
+
+# ── Helpers ───────────────────────────────────────────────────────────────────
+
+def parse_emotion(raw: str) -> tuple[str, str]:
+    """
+    Extract the leading [emotion, tag] from a raw LLM response.
+    Returns (emotion_string, cleaned_text).
+    """
+    stripped = raw.strip()
+    match = re.match(r'^\[(.*?)\]', stripped)
+    if match:
+        return match.group(1), stripped[match.end():].strip()
+    return "neutral", stripped
+
+
+def make_result(
+    raw: str,
+    provider: str,
+    model: str,
+    tool_calls: list | None = None,
+) -> dict:
+    """Build the normalized result dict that the rest of the system expects."""
+    emotion, text = parse_emotion(raw)
+    return {
+        "text": text,
+        "emotion": emotion,
+        "raw": raw,
+        "provider": provider,
+        "model": model,
+        "tool_calls": tool_calls or None,
+    }
+
+
+# ── Abstract base ─────────────────────────────────────────────────────────────
+
+class LLMProvider(ABC):
+    """
+    All providers implement this interface.
+    `generate` is the blocking path used by the brain pipeline.
+    `stream`   is the async-generator path for future streaming endpoints.
+
+    Tool definitions follow the OpenAI schema:
+      [{ "type": "function", "function": { "name": ..., "description": ...,
+                                           "parameters": {...} } }]
+    Providers that use a different native schema (e.g. Anthropic) translate
+    internally — callers always pass the OpenAI format.
+    """
+
+    name: str = "base"
+
+    @abstractmethod
+    def generate(
+        self,
+        messages: list[dict],
+        *,
+        model: str,
+        temperature: float,
+        max_tokens: int,
+        tools: list[dict] | None = None,
+    ) -> dict:
+        """
+        Blocking generation. Returns the normalized result dict:
+          { text, emotion, raw, provider, model, tool_calls }
+        """
+
+    @abstractmethod
+    async def stream(
+        self,
+        messages: list[dict],
+        *,
+        model: str,
+        temperature: float,
+        max_tokens: int,
+        tools: list[dict] | None = None,
+    ) -> AsyncGenerator[TextDelta | StreamDone, None]:
+        """
+        Streaming generation.
+        Yields TextDelta chunks, ends with one StreamDone.
+        """
+        yield  # type: ignore
diff --git a/ai-service/app/services/providers/openai_compat.py b/ai-service/app/services/providers/openai_compat.py
new file mode 100644
index 0000000..1def0a8
--- /dev/null
+++ b/ai-service/app/services/providers/openai_compat.py
@@ -0,0 +1,183 @@
+"""
+OpenAI-compatible provider.
+
+Covers every backend that speaks the OpenAI chat-completions API:
+  • OpenRouter  (base_url = https://openrouter.ai/api/v1)
+  • OpenAI      (base_url = None  → default)
+  • Groq        (base_url = https://api.groq.com/openai/v1)
+  • Ollama      (base_url = http://localhost:11434/v1)
+
+Tool call normalization:
+  OpenAI sends tool_calls on the response message.
+  Each tool call has: id, function.name, function.arguments (JSON string).
+  We surface these as [{ "id", "name", "arguments" }] in the result dict.
+"""
+from __future__ import annotations
+
+import logging
+from typing import AsyncGenerator
+
+import openai as _openai_lib
+from openai import OpenAI, AsyncOpenAI
+
+from app.services.providers.base import LLMProvider, TextDelta, StreamDone, make_result, RetryableError, NonRetryableError
+
+logger = logging.getLogger(__name__)
+
+_OPENROUTER_HEADERS = {
+    "HTTP-Referer": "http://localhost:5173",
+    "X-Title": "Project AURA",
+}
+
+
+def _extract_tool_calls(response_message) -> list | None:
+    """Normalize OpenAI tool_calls to our common schema."""
+    raw_calls = getattr(response_message, "tool_calls", None)
+    if not raw_calls:
+        return None
+    return [
+        {
+            "id":        tc.id,
+            "name":      tc.function.name,
+            "arguments": tc.function.arguments,  # already a JSON string
+        }
+        for tc in raw_calls
+    ]
+
+
+class OpenAICompatProvider(LLMProvider):
+
+    def __init__(
+        self,
+        api_key: str,
+        base_url: str | None = None,
+        extra_headers: dict | None = None,
+        provider_name: str = "openai",
+    ):
+        self.name = provider_name
+        self._extra_headers = extra_headers or {}
+        self._client = OpenAI(api_key=api_key, base_url=base_url)
+        self._async_client = AsyncOpenAI(api_key=api_key, base_url=base_url)
+        logger.info(f"[{self.name}] provider ready (base_url={base_url or 'default'})")
+
+    # ── Blocking ──────────────────────────────────────────────────────────────
+
+    def generate(
+        self,
+        messages: list[dict],
+        *,
+        model: str,
+        temperature: float,
+        max_tokens: int,
+        tools: list[dict] | None = None,
+    ) -> dict:
+        kwargs = dict(
+            model=model,
+            messages=messages,
+            temperature=temperature,
+            max_tokens=max_tokens,
+            extra_headers=self._extra_headers,
+        )
+        if tools:
+            kwargs["tools"] = tools
+            kwargs["tool_choice"] = "auto"
+
+        try:
+            response = self._client.chat.completions.create(**kwargs)
+            msg = response.choices[0].message
+            raw = msg.content or ""
+            tool_calls = _extract_tool_calls(msg)
+
+            # When the model only returns a tool call (no text), give a placeholder
+            # so make_result always has something to parse.
+            if tool_calls and not raw:
+                raw = f"[tool_call: {tool_calls[0]['name']}]"
+
+            return make_result(raw, self.name, model, tool_calls=tool_calls)
+
+        except _openai_lib.RateLimitError as e:
+            raise RetryableError(str(e), status_code=429)
+        except (_openai_lib.APIConnectionError, _openai_lib.APITimeoutError) as e:
+            raise RetryableError(str(e))
+        except _openai_lib.InternalServerError as e:
+            raise RetryableError(str(e), status_code=getattr(e, "status_code", 500))
+        except _openai_lib.AuthenticationError as e:
+            raise NonRetryableError(str(e), status_code=401)
+        except (_openai_lib.BadRequestError, _openai_lib.NotFoundError) as e:
+            raise NonRetryableError(str(e), status_code=getattr(e, "status_code", 400))
+        except Exception as e:
+            # Unknown error — treat as retryable so the registry can decide
+            raise RetryableError(str(e))
+
+    # ── Streaming ─────────────────────────────────────────────────────────────
+
+    async def stream(
+        self,
+        messages: list[dict],
+        *,
+        model: str,
+        temperature: float,
+        max_tokens: int,
+        tools: list[dict] | None = None,
+    ) -> AsyncGenerator[TextDelta | StreamDone, None]:
+        assembled = ""
+        kwargs = dict(
+            model=model,
+            messages=messages,
+            temperature=temperature,
+            max_tokens=max_tokens,
+            extra_headers=self._extra_headers,
+        )
+        if tools:
+            kwargs["tools"] = tools
+            kwargs["tool_choice"] = "auto"
+
+        try:
+            async with self._async_client.chat.completions.stream(**kwargs) as stream:
+                async for chunk in stream:
+                    delta = chunk.choices[0].delta.content or ""
+                    if delta:
+                        assembled += delta
+                        yield TextDelta(text=delta)
+        except Exception as e:
+            logger.error(f"[{self.name}] stream error: {e}")
+
+        result = make_result(assembled, self.name, model)
+        yield StreamDone(
+            text=result["text"],
+            emotion=result["emotion"],
+            raw=assembled,
+            provider=self.name,
+            model=model,
+        )
+
+
+# ── Named constructors ────────────────────────────────────────────────────────
+
+def openrouter_provider(api_key: str) -> OpenAICompatProvider:
+    return OpenAICompatProvider(
+        api_key=api_key,
+        base_url="https://openrouter.ai/api/v1",
+        extra_headers=_OPENROUTER_HEADERS,
+        provider_name="openrouter",
+    )
+
+
+def openai_provider(api_key: str) -> OpenAICompatProvider:
+    return OpenAICompatProvider(api_key=api_key, base_url=None, provider_name="openai")
+
+
+def groq_provider(api_key: str) -> OpenAICompatProvider:
+    return OpenAICompatProvider(
+        api_key=api_key,
+        base_url="https://api.groq.com/openai/v1",
+        provider_name="groq",
+    )
+
+
+def ollama_provider(base_url: str = "http://localhost:11434") -> OpenAICompatProvider:
+    return OpenAICompatProvider(
+        api_key="ollama",
+        base_url=f"{base_url.rstrip('/')}/v1",
+        provider_name="ollama",
+    )
diff --git a/ai-service/app/services/providers/registry.py b/ai-service/app/services/providers/registry.py
new file mode 100644
index 0000000..9a4cd9a
--- /dev/null
+++ b/ai-service/app/services/providers/registry.py
@@ -0,0 +1,245 @@
+"""
+Provider Registry — the single entry point for LLM calls.
+
+Responsibilities:
+  1. Read active model / provider / temperature / max_tokens from settings_service
+  2. Read the matching API key from settings_service (DB) or fall back to env vars
+  3. Instantiate the right LLMProvider
+  4. Call provider.generate() and return the normalized result
+
+Provider inference (when `provider` field is "auto" or missing):
+  model starts with "claude-"        → anthropic
+  model contains "/"                 → openrouter  (e.g. "deepseek/deepseek-v3.2")
+  model starts with gpt-/o1-/o3-    → openai
+  model starts with llama/mistral…   → ollama
+  explicit groq_ prefix              → groq
+  fallback                           → openrouter
+"""
+from __future__ import annotations
+
+import logging
+import os
+import random
+import time
+
+from app.services.providers.base import LLMProvider, RetryableError, NonRetryableError
+
+logger = logging.getLogger(__name__)
+
+_MAX_ATTEMPTS  = 3        # attempts per provider before giving up on it
+_BACKOFF_BASE  = 1.0      # seconds; delay = base * 2^attempt + jitter
+
+# Ordered fallback chain — first provider with an available key wins
+_FALLBACK_ORDER = ["openrouter", "openai", "groq", "ollama"]
+
+# ── Provider inference ────────────────────────────────────────────────────────
+
+_OPENAI_PREFIXES = ("gpt-", "o1-", "o3-", "text-davinci", "babbage", "ada")
+_OLLAMA_PREFIXES = ("llama", "mistral", "gemma", "phi", "qwen", "codellama", "deepseek-r1")
+
+
+def infer_provider(model: str) -> str:
+    m = model.lower()
+    if m.startswith("claude-"):
+        return "anthropic"
+    if "/" in m:
+        return "openrouter"
+    if any(m.startswith(p) for p in _OPENAI_PREFIXES):
+        return "openai"
+    if any(m.startswith(p) for p in _OLLAMA_PREFIXES):
+        return "ollama"
+    return "openrouter"
+
+
+# ── Registry ──────────────────────────────────────────────────────────────────
+
+class ProviderRegistry:
+    """
+    Resolves and calls the correct LLM provider on every request.
+    Providers are constructed lazily and cached by (provider_name, key_hash).
+    """
+
+    def __init__(self):
+        self._cache: dict[str, LLMProvider] = {}
+
+    # ── Public API ────────────────────────────────────────────────────────────
+
+    def generate(
+        self,
+        messages: list[dict],
+        *,
+        model: str | None = None,
+        temperature: float | None = None,
+        max_tokens: int | None = None,
+        tools: list[dict] | None = None,
+    ) -> dict:
+        # Lazy import avoids circular imports at module load time
+        from app.services.settings_service import settings_service
+
+        db = settings_service.get_settings()
+        keys = settings_service.get_api_keys()
+
+        actual_model       = model or db.get("model") or "deepseek/deepseek-v3.2"
+        actual_temp        = temperature if temperature is not None else float(db.get("temperature", 0.8))
+        actual_max_tokens  = max_tokens or int(db.get("max_tokens", 300))
+
+        configured_provider = (db.get("provider") or "auto").lower()
+        primary = (
+            configured_provider
+            if configured_provider != "auto"
+            else infer_provider(actual_model)
+        )
+
+        # Build candidate list: primary first, then any fallback with an available key
+        candidates = [primary] + [
+            p for p in _FALLBACK_ORDER
+            if p != primary and (p == "ollama" or self._pick_key(p, keys))
+        ]
+
+        call_kwargs = dict(
+            model=actual_model,
+            temperature=actual_temp,
+            max_tokens=actual_max_tokens,
+            tools=tools,
+        )
+
+        last_error: Exception | None = None
+
+        for provider_name in candidates:
+            try:
+                provider = self._get_provider(provider_name, keys)
+            except (ValueError, RuntimeError) as e:
+                # Missing key or missing package — skip silently
+                logger.debug(f"[registry] skipping {provider_name}: {e}")
+                last_error = e
+                continue
+
+            logger.info(f"[registry] trying {provider_name} / {actual_model}")
+            try:
+                result = self._call_with_retry(provider, messages, **call_kwargs)
+                if provider_name != primary:
+                    logger.warning(f"[registry] fell back to {provider_name} (primary={primary} failed)")
+                return result
+
+            except NonRetryableError as e:
+                last_error = e
+                if e.status_code == 400:
+                    # Bad request — our message is wrong, no other provider will help
+                    logger.error(f"[registry] bad request ({provider_name}): {e}")
+                    break
+                # 401 auth failure — key is bad for this provider, try next
+                logger.warning(f"[registry] auth failed for {provider_name} (HTTP {e.status_code}), trying next")
+                continue
+
+            except RetryableError as e:
+                # All retries for this provider exhausted — try next
+                logger.warning(f"[registry] {provider_name} exhausted retries: {e}")
+                last_error = e
+                continue
+
+        logger.error(f"[registry] all providers failed. Last: {last_error}")
+        return {
+            "text": "I seem to be having trouble connecting right now. Please try again in a moment.",
+            "emotion": "confused",
+            "raw": "",
+            "provider": primary,
+            "model": actual_model,
+            "tool_calls": None,
+        }
+
+    def _call_with_retry(self, provider: LLMProvider, messages: list[dict], **kwargs) -> dict:
+        """
+        Call provider.generate() with exponential backoff on RetryableError.
+        Raises RetryableError if all attempts fail.
+        Raises NonRetryableError immediately (no retry).
+        """
+        for attempt in range(_MAX_ATTEMPTS):
+            try:
+                return provider.generate(messages, **kwargs)
+            except NonRetryableError:
+                raise  # propagate immediately
+            except RetryableError as e:
+                if attempt == _MAX_ATTEMPTS - 1:
+                    raise  # all attempts exhausted
+                delay = _BACKOFF_BASE * (2 ** attempt) + random.uniform(0.0, 0.5)
+                logger.warning(
+                    f"[{provider.name}] attempt {attempt + 1}/{_MAX_ATTEMPTS} failed "
+                    f"(status={e.status_code}): {e} — retrying in {delay:.1f}s"
+                )
+                time.sleep(delay)
+
+    # ── Provider instantiation ────────────────────────────────────────────────
+
+    def _get_provider(self, provider_name: str, keys: dict) -> LLMProvider:
+        # Cache key: provider name + first 8 chars of api key (detects key rotation)
+        raw_key = self._pick_key(provider_name, keys)
+        cache_key = f"{provider_name}:{(raw_key or '')[:8]}"
+
+        if cache_key not in self._cache:
+            self._cache[cache_key] = self._build(provider_name, keys)
+
+        return self._cache[cache_key]
+
+    def _build(self, provider_name: str, keys: dict) -> LLMProvider:
+        from app.services.providers.openai_compat import (
+            openrouter_provider, openai_provider, groq_provider, ollama_provider,
+        )
+        from app.services.providers.anthropic_provider import AnthropicProvider
+
+        if provider_name == "anthropic":
+            key = self._pick_key("anthropic", keys)
+            if not key:
+                raise ValueError("Anthropic API key not set. Add it via the dashboard or ANTHROPIC_API_KEY env var.")
+            return AnthropicProvider(api_key=key)
+
+        if provider_name == "groq":
+            key = self._pick_key("groq", keys)
+            if not key:
+                raise ValueError("Groq API key not set. Add it via the dashboard or GROQ_API_KEY env var.")
+            return groq_provider(api_key=key)
+
+        if provider_name == "openai":
+            key = self._pick_key("openai", keys)
+            if not key:
+                raise ValueError("OpenAI API key not set. Add it via the dashboard or OPENAI_API_KEY env var.")
+            return openai_provider(api_key=key)
+
+        if provider_name == "ollama":
+            ollama_url = (
+                (keys.get("ollama_base_url") or "").strip()
+                or os.getenv("OLLAMA_BASE_URL", "http://localhost:11434")
+            )
+            return ollama_provider(base_url=ollama_url)
+
+        # Default: openrouter
+        key = self._pick_key("openrouter", keys)
+        if not key:
+            raise ValueError("OpenRouter API key not set. Add it via the dashboard or OPENROUTER_API_KEY env var.")
+        return openrouter_provider(api_key=key)
+
+    @staticmethod
+    def _pick_key(provider_name: str, keys: dict) -> str | None:
+        """DB key takes precedence over env var."""
+        env_map = {
+            "openrouter": "OPENROUTER_API_KEY",
+            "openai":     "OPENAI_API_KEY",
+            "anthropic":  "ANTHROPIC_API_KEY",
+            "groq":       "GROQ_API_KEY",
+        }
+        db_key_map = {
+            "openrouter": "openrouter_api_key",
+            "openai":     "openrouter_api_key",   # share the same field for now
+            "anthropic":  "anthropic_api_key",
+            "groq":       "groq_api_key",
+        }
+
+        db_field = db_key_map.get(provider_name)
+        db_val = (keys.get(db_field) or "").strip() if db_field else ""
+        if db_val:
+            return db_val
+
+        env_var = env_map.get(provider_name)
+        return os.getenv(env_var, "") if env_var else ""
+
+
+provider_registry = ProviderRegistry()
diff --git a/ai-service/app/services/settings_service.py b/ai-service/app/services/settings_service.py
new file mode 100644
index 0000000..9f8dfd4
--- /dev/null
+++ b/ai-service/app/services/settings_service.py
@@ -0,0 +1,82 @@
+import logging
+from supabase import create_client, Client
+from app.core.config import settings as app_settings
+
+logger = logging.getLogger(__name__)
+
+_DEFAULTS = {
+    "system_prompt": None,
+    "model":         "deepseek/deepseek-v3.2",
+    "provider":      "openrouter",
+    "temperature":   0.8,
+    "max_tokens":    300,
+    "empathy":       50,
+    "humor":         50,
+    "formality":     50,
+}
+
+_KEY_DEFAULTS = {
+    "openrouter_api_key": None,
+    "deepgram_api_key":   None,
+    "cartesia_api_key":   None,
+    "anthropic_api_key":  None,
+    "groq_api_key":       None,
+    "ollama_base_url":    "http://localhost:11434",
+    "livekit_url":        None,
+    "livekit_api_key":    None,
+    "livekit_api_secret": None,
+}
+
+
+class SettingsService:
+    def __init__(self):
+        self._client: Client | None = None
+        if app_settings.SUPABASE_URL and app_settings.SUPABASE_SERVICE_KEY:
+            self._client = create_client(app_settings.SUPABASE_URL, app_settings.SUPABASE_SERVICE_KEY)
+
+    def get_settings(self) -> dict:
+        if not self._client:
+            return dict(_DEFAULTS)
+        try:
+            result = self._client.table("personality_settings").select("*").eq("id", 1).single().execute()
+            if result.data:
+                return {**_DEFAULTS, **result.data}
+        except Exception as e:
+            logger.warning(f"SettingsService.get_settings failed: {e}")
+        return dict(_DEFAULTS)
+
+    def update_settings(self, patch: dict) -> dict:
+        if not self._client:
+            return dict(_DEFAULTS)
+        try:
+            result = self._client.table("personality_settings").update(patch).eq("id", 1).execute()
+            if result.data:
+                return {**_DEFAULTS, **result.data[0]}
+        except Exception as e:
+            logger.error(f"SettingsService.update_settings failed: {e}")
+        return dict(_DEFAULTS)
+
+    def get_api_keys(self) -> dict:
+        if not self._client:
+            return dict(_KEY_DEFAULTS)
+        try:
+            result = self._client.table("api_keys").select("*").eq("id", 1).single().execute()
+            if result.data:
+                return {**_KEY_DEFAULTS, **result.data}
+        except Exception as e:
+            logger.warning(f"SettingsService.get_api_keys failed: {e}")
+        return dict(_KEY_DEFAULTS)
+
+    def update_api_keys(self, patch: dict) -> dict:
+        if not self._client:
+            return dict(_KEY_DEFAULTS)
+        try:
+            result = self._client.table("api_keys").update(patch).eq("id", 1).execute()
+            if result.data:
+                return {**_KEY_DEFAULTS, **result.data[0]}
+        except Exception as e:
+            logger.error(f"SettingsService.update_api_keys failed: {e}")
+        return dict(_KEY_DEFAULTS)
+
+
+settings_service = SettingsService()
diff --git a/ai-service/tests/__init__.py b/ai-service/tests/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/ai-service/tests/conftest.py b/ai-service/tests/conftest.py
new file mode 100644
index 0000000..8eb1151
--- /dev/null
+++ b/ai-service/tests/conftest.py
@@ -0,0 +1,71 @@
+"""
+Shared pytest fixtures and env setup.
+Loads the project .env so integration tests can use real API keys.
+"""
+import os
+import sys
+from pathlib import Path
+
+import pytest
+from dotenv import load_dotenv
+
+# ── Add ai-service root to sys.path so `app.*` imports resolve ───────────────
+AI_SERVICE_DIR = Path(__file__).resolve().parent.parent
+sys.path.insert(0, str(AI_SERVICE_DIR))
+
+# ── Load .env from project root ───────────────────────────────────────────────
+PROJECT_ROOT = AI_SERVICE_DIR.parent
+env_path = PROJECT_ROOT / ".env"
+if not env_path.exists():
+    env_path = AI_SERVICE_DIR / ".env"
+load_dotenv(env_path)
+
+
+# ── Reusable message lists ────────────────────────────────────────────────────
+
+@pytest.fixture
+def simple_messages():
+    return [
+        {"role": "system", "content": "You are a helpful assistant. Reply very briefly."},
+        {"role": "user",   "content": "Say exactly: [smile] Hello!"},
+    ]
+
+
+@pytest.fixture
+def tool_messages():
+    return [
+        {"role": "system", "content": "You are a helpful assistant."},
+        {"role": "user",   "content": "What is the weather in Tokyo? Use the get_weather tool."},
+    ]
+
+
+@pytest.fixture
+def sample_tools():
+    return [
+        {
+            "type": "function",
+            "function": {
+                "name":        "get_weather",
+                "description": "Get current weather for a city.",
+                "parameters": {
+                    "type": "object",
+                    "properties": {
+                        "city": {"type": "string", "description": "City name"},
+                    },
+                    "required": ["city"],
+                },
+            },
+        }
+    ]
+
+
+# ── Key availability helpers (used by integration marks) ─────────────────────
+
+def has_openrouter_key():
+    return bool(os.getenv("OPENROUTER_API_KEY", "").strip())
+
+def has_openai_key():
+    return bool(os.getenv("OPENAI_API_KEY", "").strip())
+
+def has_anthropic_key():
+    return bool(os.getenv("ANTHROPIC_API_KEY", "").strip())
diff --git a/ai-service/tests/providers/__init__.py b/ai-service/tests/providers/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/dashboard/public/models/hutao/Hu Tao.model3.json b/dashboard/public/models/hutao/Hu Tao.model3.json
index cdbd216..890ff95 100644
--- a/dashboard/public/models/hutao/Hu Tao.model3.json	
+++ b/dashboard/public/models/hutao/Hu Tao.model3.json	
@@ -8,7 +8,17 @@
 			"Hu Tao.8192/texture_02.png"
 		],
 		"Physics": "Hu Tao.physics3.json",
-		"DisplayInfo": "Hu Tao.cdi3.json"
+		"DisplayInfo": "Hu Tao.cdi3.json",
+		"Expressions": [
+			{ "Name": "SmileLock.exp3.json",   "File": "SmileLock.exp3.json" },
+			{ "Name": "SadLock.exp3.json",     "File": "SadLock.exp3.json" },
+			{ "Name": "Angry.exp3.json",       "File": "Angry.exp3.json" },
+			{ "Name": "Ghost.exp3.json",       "File": "Ghost.exp3.json" },
+			{ "Name": "GhostChange.exp3.json", "File": "GhostChange.exp3.json" },
+			{ "Name": "Shadow.exp3.json",      "File": "Shadow.exp3.json" },
+			{ "Name": "PupilShrink.exp3.json", "File": "PupilShrink.exp3.json" },
+			{ "Name": "EyeshineOff.exp3.json", "File": "EyeshineOff.exp3.json" }
+		]
 	},
 	"Groups": [
 		{
diff --git a/dashboard/src/components/ApiKeys.jsx b/dashboard/src/components/ApiKeys.jsx
new file mode 100644
index 0000000..046c027
--- /dev/null
+++ b/dashboard/src/components/ApiKeys.jsx
@@ -0,0 +1,142 @@
+import { useState, useEffect } from 'react'
+import { supabase } from '../lib/supabaseClient'
+
+const KEY_GROUPS = [
+    {
+        label: 'LLM Providers',
+        icon: 'psychology',
+        fields: [
+            { key: 'openrouter_api_key', label: 'OpenRouter API Key', placeholder: 'sk-or-v1-...', hint: 'Routes to DeepSeek, GPT, Mistral, and more' },
+            { key: 'anthropic_api_key',  label: 'Anthropic API Key',  placeholder: 'sk-ant-...',   hint: 'Required for claude-* models' },
+            { key: 'groq_api_key',       label: 'Groq API Key',       placeholder: 'gsk_...',       hint: 'Fast inference for Llama / Mixtral' },
+            { key: 'ollama_base_url',    label: 'Ollama Base URL',    placeholder: 'http://localhost:11434', hint: 'Local LLMs via Ollama', isUrl: true },
+        ],
+    },
+    {
+        label: 'Voice',
+        icon: 'mic',
+        fields: [
+            { key: 'deepgram_api_key', label: 'Deepgram API Key (STT)', placeholder: 'your_deepgram_key' },
+            { key: 'cartesia_api_key', label: 'Cartesia API Key (TTS)', placeholder: 'your_cartesia_key', note: 'Requires agent restart' },
+        ],
+    },
+    {
+        label: 'LiveKit',
+        icon: 'cell_tower',
+        note: 'Requires agent restart',
+        fields: [
+            { key: 'livekit_url',        label: 'LiveKit URL',        placeholder: 'wss://your-project.livekit.cloud' },
+            { key: 'livekit_api_key',    label: 'LiveKit API Key',    placeholder: 'API key' },
+            { key: 'livekit_api_secret', label: 'LiveKit API Secret', placeholder: 'API secret' },
+        ],
+    },
+]
+
+export default function ApiKeys() {
+    const [draft, setDraft]       = useState({})
+    const [visible, setVisible]   = useState({})
+    const [saveState, setSaveState] = useState('idle')
+    const [loaded, setLoaded]     = useState(false)
+
+    useEffect(() => {
+        supabase.from('api_keys').select('*').eq('id', 1).single()
+            .then(({ data }) => { if (data) setDraft(data); setLoaded(true) })
+    }, [])
+
+    const patch       = (key, value) => setDraft(d => ({ ...d, [key]: value }))
+    const toggleVisible = key => setVisible(v => ({ ...v, [key]: !v[key] }))
+
+    const saveKeys = async () => {
+        setSaveState('saving')
+        try {
+            const payload = { ...draft }
+            delete payload.id
+            payload.updated_at = new Date().toISOString()
+            const { error } = await supabase.from('api_keys').update(payload).eq('id', 1)
+            if (error) throw error
+            setSaveState('saved')
+            setTimeout(() => setSaveState('idle'), 2500)
+        } catch (err) {
+            console.error('Failed to save API keys:', err)
+            setSaveState('error')
+            setTimeout(() => setSaveState('idle'), 3000)
+        }
+    }
+
+    const btn = {
+        idle:   { label: 'Save API Keys', icon: 'key',           cls: 'bg-primary hover:bg-primary/90 shadow-primary/20' },
+        saving: { label: 'Saving...',      icon: 'hourglass_top', cls: 'bg-primary/70 cursor-not-allowed' },
+        saved:  { label: 'Keys Saved!',    icon: 'check_circle',  cls: 'bg-emerald-500 shadow-emerald-200' },
+        error:  { label: 'Save Failed',    icon: 'error',         cls: 'bg-red-500 shadow-red-200' },
+    }[saveState]
+
+    return (
+        <div className="bg-white p-8 rounded-xl border border-slate-200 shadow-sm">
+            <div className="flex items-center justify-between mb-8">
+                <h3 className="text-xl font-bold flex items-center gap-2">
+                    <span className="material-icons-round text-primary">vpn_key</span>
+                    API Keys
+                </h3>
+                <button
+                    onClick={saveKeys}
+                    disabled={saveState === 'saving' || !loaded}
+                    className={`${btn.cls} text-white px-5 py-2 rounded-full text-sm font-bold transition-all shadow-lg flex items-center gap-2`}
+                >
+                    <span className="material-icons-round text-sm">{btn.icon}</span>
+                    {btn.label}
+                </button>
+            </div>
+
+            <div className="space-y-8">
+                {KEY_GROUPS.map(({ label, icon, note, fields }) => (
+                    <div key={label}>
+                        <div className="flex items-center gap-2 mb-3">
+                            <span className="material-icons-round text-base text-slate-400">{icon}</span>
+                            <span className="text-xs font-bold text-slate-400 uppercase tracking-widest">{label}</span>
+                            {note && (
+                                <span className="ml-auto text-xs text-amber-500 font-medium flex items-center gap-1">
+                                    <span className="material-icons-round text-xs">info</span>{note}
+                                </span>
+                            )}
+                        </div>
+                        <div className="space-y-3">
+                            {fields.map(({ key, label: fl, placeholder, note: fn, hint, isUrl }) => (
+                                <div key={key}>
+                                    <label className="block text-sm font-medium text-slate-500 mb-1">{fl}</label>
+                                    {hint && <p className="text-xs text-slate-400 mb-1">{hint}</p>}
+                                    <div className="relative">
+                                        <input
+                                            type={isUrl || visible[key] ? 'text' : 'password'}
+                                            value={draft[key] ?? ''}
+                                            onChange={e => patch(key, e.target.value)}
+                                            placeholder={loaded ? placeholder : '••••••••'}
+                                            className="w-full bg-bg-light border border-slate-200 rounded-lg px-3 py-2 pr-10 text-sm font-mono focus:ring-1 focus:ring-primary focus:border-primary outline-none"
+                                        />
+                                        {!isUrl && (
+                                            <button type="button" onClick={() => toggleVisible(key)}
+                                                className="absolute right-2 top-1/2 -translate-y-1/2 text-slate-400 hover:text-slate-600">
+                                                <span className="material-icons-round text-base">
+                                                    {visible[key] ? 'visibility_off' : 'visibility'}
+                                                </span>
+                                            </button>
+                                        )}
+                                    </div>
+                                    {fn && (
+                                        <p className="text-xs text-amber-500 mt-1 flex items-center gap-1">
+                                            <span className="material-icons-round text-xs">info</span>{fn}
+                                        </p>
+                                    )}
+                                </div>
+                            ))}
+                        </div>
+                    </div>
+                ))}
+            </div>
+
+            <p className="mt-6 text-xs text-slate-400 flex items-start gap-1.5">
+                <span className="material-icons-round text-xs mt-0.5">lock</span>
+                Stored in your private Supabase database. Leave a field empty to use the server's <code className="font-mono">.env</code> value.
+            </p>
+        </div>
+    )
+}
diff --git a/dashboard/src/components/AvatarRenderer.jsx b/dashboard/src/components/AvatarRenderer.jsx
index 7707641..25a94aa 100644
--- a/dashboard/src/components/AvatarRenderer.jsx
+++ b/dashboard/src/components/AvatarRenderer.jsx
@@ -1,169 +1,297 @@
 /**
- * AvatarRenderer — Phase 2
- * Renders the Hu Tao Live2D model on a transparent canvas using
- * pixi-live2d-display. Exposes an imperative ref API so CallOverlay
- * can drive expressions in sync with AURA's speech.
+ * AvatarRenderer — Phase 3
+ * Idle / Speaking state machine with richer moods and cute micro-animations:
+ *   • 6 weighted moods per state (neutral, happy, curious, playful, sleepy, thinking)
+ *   • Cute head-tilt event during idle
+ *   • Occasional double-blink during idle
+ *   • Sleepy: half-closed eyes, slow blink
+ *   • Speaking: gentle nod, tighter saccade, snappier blink, slight smile boost
  *
- * Usage:
- *   const avatarRef = useRef(null)
- *   <AvatarRenderer ref={avatarRef} width={400} height={600} scale={0.3} />
- *   avatarRef.current.setExpression(['smile', 'shadow'], 2.3)
- *   avatarRef.current.resetNeutral()
+ * Ref API:
+ *   setExpression(names[], duration)  — play expression(s) for N seconds
+ *   setSpeaking(bool)                 — switch idle ↔ speaking state
+ *   setMouthOpen(0–1)                 — drive lip sync each frame
+ *   setParameter(id, value)           — raw Core Model parameter override
+ *   resetNeutral()                    — cancel active expression, return to idle
  */
 
 import { forwardRef, useEffect, useImperativeHandle, useRef } from 'react'
 import * as PIXI from 'pixi.js'
 import { Live2DModel } from 'pixi-live2d-display/cubism4'
 
-// Register PIXI Ticker so Live2D animations update every frame
 Live2DModel.registerTicker(PIXI.Ticker)
 
-// Model path relative to dashboard/public/
 const MODEL_URL = '/models/hutao/Hu Tao.model3.json'
 
-// Expression tag → .exp3.json filename
-// Source: voice-agent/model_parameters.json hotkeys + Hu_Tao__model_for_PC_/ directory
 const EXPRESSION_FILES = {
-  smile:        'SmileLock.exp3.json',
-  sad:          'SadLock.exp3.json',
-  angry:        'Angry.exp3.json',
-  ghost:        'Ghost.exp3.json',
-  ghost_nervous:'GhostChange.exp3.json',
-  shadow:       'Shadow.exp3.json',
-  pupil_shrink: 'PupilShrink.exp3.json',
-  eyeshine_off: 'EyeshineOff.exp3.json',
+  smile:         'SmileLock.exp3.json',
+  sad:           'SadLock.exp3.json',
+  angry:         'Angry.exp3.json',
+  ghost:         'Ghost.exp3.json',
+  ghost_nervous: 'GhostChange.exp3.json',
+  shadow:        'Shadow.exp3.json',
+  pupil_shrink:  'PupilShrink.exp3.json',
+  eyeshine_off:  'EyeshineOff.exp3.json',
 }
 
-export const AvatarRenderer = forwardRef(function AvatarRenderer(props, ref) {
-  const { width = 400, height = 600 } = props
-  const containerRef = useRef(null)
-  const modelRef     = useRef(null)
-  const appRef       = useRef(null)
-  const mouthOpenRef = useRef(0)   // driven by lip-sync from CallOverlay
+// Maps LLM-annotated expression names → the closest ambient mood.
+// Applied after the expression fades so the idle baseline stays emotionally coherent.
+const EXPRESSION_TO_MOOD = {
+  smile:         'happy',
+  sad:           'neutral',   // no sad mood — settle to calm neutral
+  angry:         'thinking',  // furrowed brows, withdrawn
+  ghost:         'playful',   // mischievous
+  ghost_nervous: 'curious',   // uncertain, alert
+  shadow:        'thinking',  // serious / dark
+  pupil_shrink:  'curious',   // surprised / wide-eyed
+  eyeshine_off:  'sleepy',    // dull / fatigued
+  wink:          'playful',
+  tongue:        'playful',
+}
 
-  // ── Boot PIXI + load model ────────────────────────────────────────────────
-  useEffect(() => {
-    let destroyed = false
-
-    const app = new PIXI.Application({
-      backgroundAlpha: 0,
-      width,
-      height,
-      antialias: true,
-      resolution: window.devicePixelRatio || 2,
-      autoDensity: true,
-    })
-    appRef.current = app
-    containerRef.current.appendChild(app.view)
-
-    Live2DModel.from(MODEL_URL, { autoInteract: false }).then((model) => {
-      if (destroyed) return   // effect cleaned up before model finished loading
-      modelRef.current = model
-      app.stage.addChild(model)
-
-      // Full-screen canvas: position her in the left-center third of the viewport.
-      // 1.9× height-fit zooms into upper body. Anchor is top-center so head
-      // sits at Y=0. X at 30% of the full viewport keeps her off the left edge
-      // and out of the way of the right-side controls overlay.
-      const logicalW = app.screen.width
-      const logicalH = app.screen.height
+// ── State machine ──────────────────────────────────────────────────────────
+const STATE = { IDLE: 'idle', SPEAKING: 'speaking' }
+
+// ── Mood definitions (target parameter values) ─────────────────────────────
+const MOODS = {
+  neutral:  { mouthForm: 0,     browForm: 0,     browRaise: 0,     eyeSmile: 0    },
+  happy:    { mouthForm: 0.65,  browForm: 0.30,  browRaise: 0.45,  eyeSmile: 0.55 },
+  curious:  { mouthForm: 0.20,  browForm: -0.10, browRaise: 0.50,  eyeSmile: 0    },
+  playful:  { mouthForm: 0.90,  browForm: 0.50,  browRaise: 0.70,  eyeSmile: 0.30 },
+  sleepy:   { mouthForm: -0.05, browForm: 0.10,  browRaise: -0.15, eyeSmile: 0    },
+  thinking: { mouthForm: 0.10,  browForm: -0.20, browRaise: 0.35,  eyeSmile: 0    },
+}
+
+// Weighted mood pool per state — [moodKey, weight], weights sum to 1.0
+const MOOD_POOLS = {
+  [STATE.IDLE]: [
+    ['neutral', 0.15], ['happy', 0.35], ['curious', 0.20],
+    ['playful', 0.10], ['sleepy', 0.10], ['thinking', 0.10],
+  ],
+  [STATE.SPEAKING]: [
+    ['neutral', 0.10], ['happy', 0.45], ['curious', 0.20],
+    ['playful', 0.20], ['thinking', 0.05],
+  ],
+}
+
+function pickWeightedMood(state) {
+  const pool = MOOD_POOLS[state] ?? MOOD_POOLS[STATE.IDLE]
+  const r = Math.random()
+  let acc = 0
+  for (const [key, w] of pool) {
+    acc += w
+    if (r < acc) return MOODS[key]
+  }
+  return MOODS.neutral
+}
+
+// ── Module-scoped Singleton State ──────────────────────────────────────────
+let _app   = null
+let _model = null
+let _loaded = false
+let _mouthOpen = 0
+let _expressionActive = false
+let _state = STATE.IDLE
+let _pendingMood = null   // set by setExpression, consumed by update loop on expiry
+
+function initSingleton(width, height) {
+  if (_app) return
+
+  _app = new PIXI.Application({
+    backgroundAlpha: 0,
+    width,
+    height,
+    antialias: true,
+    resolution: window.devicePixelRatio || 2,
+    autoDensity: true,
+  })
+
+  Live2DModel.from(MODEL_URL, { autoInteract: false })
+    .then((model) => {
+      _model = model
+      _app.stage.addChild(model)
+
+      const logicalW = _app.screen.width
+      const logicalH = _app.screen.height
       const autoScale = (logicalH / model.height) * 1.9
       model.scale.set(autoScale)
       model.anchor.set(0.5, 0.0)
       model.position.set(logicalW * 0.5, 0)
 
-      // ── Idle animation ─────────────────────────────────────────────────────
-      // Patch coreModel.update() — the FINAL step before GPU commit.
-      // This runs AFTER the motion manager has set its keyframe values, so our
-      // params always overwrite whatever the motion manager tried to set.
-      // (Patching internalModel.update earlier didn't work because origUpdate
-      // runs the motion manager which overwrites our values before coreModel.update.)
       const core = model.internalModel.coreModel
+      const clamp = (v, lo, hi) => Math.max(lo, Math.min(hi, v))
       let lastMs = performance.now()
-      const clamp = (v, lo, hi) => v < lo ? lo : v > hi ? hi : v
 
-      // ── Completely separate timers — blink and saccade never share state ──
-      let blinkTimer = 0, blinkPhase = 0, nextBlink = 2 + Math.random() * 4
+      // ── Blink state ──────────────────────────────────────────────────────
+      let blinkTimer = 0, blinkPhase = 0, nextBlink = 2 + Math.random() * 3
+      // Double-blink: blink twice in quick succession (cute quirk)
+      let dblBlinkPending = false
+      let dblBlinkTimer = 0, nextDblBlink = 10 + Math.random() * 10
+
+      // ── Saccade state ─────────────────────────────────────────────────────
       let saccadeTimer = 0, nextSaccade = 1 + Math.random() * 2
-      // Eye movement: lerp slowly to target — eliminates all twitching
       let eyeTargetX = 0, eyeTargetY = 0, eyeX = 0, eyeY = 0
 
-      // ── Mood: confirmed param IDs from Hu Tao.cdi3.json ──────────────────
-      // ParamMouthForm, ParamBrowLForm/RForm, Param37 (Brows Raise),
-      // ParamEyeLSmile/RSmile (eye squint) all exist in this model.
+      // ── Mood state ────────────────────────────────────────────────────────
       let moodTimer = 0, nextMoodChange = 3 + Math.random() * 4
-      let mouthFormT = 0,  mouthFormC = 0
-      let browFormT  = 0,  browFormC  = 0    // L/R brow curve (happy=up, frown=down)
-      let browRaiseT = 0,  browRaiseC = 0    // Param37: raise both brows
-      let eyeSmileT  = 0,  eyeSmileC  = 0    // eye squint when smiling
-
-      function pickMood() {
-        const roll = Math.random()
-        if (roll < 0.30) {                           // neutral
-          mouthFormT = 0;    browFormT = 0;    browRaiseT = 0;    eyeSmileT = 0
-        } else if (roll < 0.60) {                    // happy / cute smile
-          mouthFormT = 0.55 + Math.random() * 0.35
-          browFormT  = 0.35; browRaiseT = 0.4; eyeSmileT = 0.45
-        } else if (roll < 0.80) {                    // thinking — look up
-          mouthFormT = -0.1; browFormT = 0.1; browRaiseT = 0.2; eyeSmileT = 0
-          eyeTargetY = 0.45 + Math.random() * 0.3   // deliberate upward glance
-          nextSaccade = saccadeTimer + 2.8           // hold it
-        } else {                                     // excited — big smile, raised brows
-          mouthFormT = 0.9;  browFormT = 0.5; browRaiseT = 0.7; eyeSmileT = 0.25
-        }
-        nextMoodChange = 3 + Math.random() * 5
-      }
+      let currentMood = MOODS.happy
+      let mouthFormC = 0, browFormC = 0, browRaiseC = 0, eyeSmileC = 0
+
+      // ── Head tilt micro-animation (idle only) ─────────────────────────────
+      // Occasionally snaps to a cute side-tilt, holds briefly, then eases back
+      let tiltTimer = 0, nextTilt = 6 + Math.random() * 8
+      let tiltTarget = 0, tiltC = 0
+      let tiltHolding = false, tiltHoldTimer = 0, tiltHoldDuration = 0
+
+      // ── Speaking nod ──────────────────────────────────────────────────────
+      let nodPhase = 0
 
       const origCoreUpdate = core.update.bind(core)
+
       core.update = function () {
-        const now = performance.now() / 1000
+        const now     = performance.now() / 1000
         const elapsed = Math.min((performance.now() - lastMs) / 1000, 0.1)
         lastMs = performance.now()
 
-        // ── Head — more amplitude so turns are clearly visible ─────────────
-        core.setParameterValueById('ParamAngleX',     Math.sin(now * 0.31) * 12 + Math.sin(now * 0.73) * 3)
-        core.setParameterValueById('ParamAngleY',     Math.sin(now * 0.19) *  5 + Math.sin(now * 0.47) * 2)
-        core.setParameterValueById('ParamAngleZ',     Math.sin(now * 0.13) *  5 + Math.sin(now * 0.41) * 2)
-        core.setParameterValueById('ParamBodyAngleX', Math.sin(now * 0.28) *  4)
-        core.setParameterValueById('ParamBodyAngleZ', Math.sin(now * 0.21) *  3)
-        core.setParameterValueById('ParamBreath',     Math.sin(now * 0.9)  * 0.5 + 0.5)
-        core.setParameterValueById('ParamMouthOpenY', mouthOpenRef.current)
-
-        // ── Mood tick — fast lerp so changes are clearly visible ───────────
-        moodTimer += elapsed
-        if (moodTimer >= nextMoodChange) { moodTimer = 0; pickMood() }
-        const lm = elapsed * 4   // reach target in ~0.5s
-        mouthFormC += (mouthFormT - mouthFormC) * lm
-        browFormC  += (browFormT  - browFormC)  * lm
-        browRaiseC += (browRaiseT - browRaiseC) * lm
-        eyeSmileC  += (eyeSmileT  - eyeSmileC)  * lm
-        core.setParameterValueById('ParamMouthForm', mouthFormC)
-        core.setParameterValueById('ParamBrowLForm', browFormC)
-        core.setParameterValueById('ParamBrowRForm', browFormC)
-        core.setParameterValueById('Param37',        browRaiseC)  // Brows Raise
-        core.setParameterValueById('ParamEyeLSmile', eyeSmileC)
-        core.setParameterValueById('ParamEyeRSmile', eyeSmileC)
-
-        // ── Eye saccades — own timer, slow lerp (no twitching) ────────────
+        const speaking = _state === STATE.SPEAKING
+        const lerpSpd  = speaking ? 5.0 : 3.5
+
+        // ── Breathing ────────────────────────────────────────────────────
+        // Slightly faster when speaking (more energetic)
+        core.setParameterValueById('ParamBreath',
+          Math.sin(now * (speaking ? 1.1 : 0.75)) * 0.5 + 0.5)
+
+        // ── Head movement ─────────────────────────────────────────────────
+        const swayAmt = speaking ? 0.35 : 1.0
+        const bX = (Math.sin(now * 0.31) * 12 + Math.sin(now * 0.73) * 3) * swayAmt
+        const bY = (Math.sin(now * 0.19) * 5  + Math.sin(now * 0.47) * 2) * swayAmt
+        const bZ = (Math.sin(now * 0.13) * 5  + Math.sin(now * 0.41) * 2) * swayAmt
+
+        // Gentle speaking nod — Y oscillation in rough speech rhythm
+        let nodY = 0
+        if (speaking) {
+          nodPhase += elapsed * 2.6
+          nodY = Math.sin(nodPhase) * 3.5
+        } else {
+          nodPhase = 0
+        }
+
+        // Cute idle head tilt — snap in quickly, ease back slowly
+        if (!speaking) {
+          tiltTimer += elapsed
+          if (!tiltHolding && tiltTimer >= nextTilt) {
+            tiltTarget = (Math.random() < 0.5 ? 1 : -1) * (7 + Math.random() * 7)
+            tiltTimer = 0
+            nextTilt = 6 + Math.random() * 8
+            tiltHolding = true
+            tiltHoldTimer = 0
+            tiltHoldDuration = 0.9 + Math.random() * 0.8
+          }
+        }
+        if (tiltHolding) {
+          tiltHoldTimer += elapsed
+          if (tiltHoldTimer >= tiltHoldDuration) { tiltTarget = 0; tiltHolding = false }
+        }
+        tiltC += (tiltTarget - tiltC) * elapsed * (tiltTarget !== 0 ? 6.0 : 2.2)
+
+        core.setParameterValueById('ParamAngleX', bX)
+        core.setParameterValueById('ParamAngleY', bY + nodY)
+        core.setParameterValueById('ParamAngleZ', bZ + tiltC)
+        core.setParameterValueById('ParamBodyAngleX', Math.sin(now * 0.28) * 4 * swayAmt)
+        core.setParameterValueById('ParamBodyAngleZ', Math.sin(now * 0.21) * 3 * swayAmt)
+
+        // ── Lip sync ──────────────────────────────────────────────────────
+        core.setParameterValueById('ParamMouthOpenY', _mouthOpen)
+
+        // ── Mood interpolation ────────────────────────────────────────────
+        if (!_expressionActive) {
+          // Expression just expired — align ambient mood to the emotion the LLM set
+          if (_pendingMood) {
+            currentMood = _pendingMood
+            _pendingMood = null
+            moodTimer = 0
+            nextMoodChange = 3 + Math.random() * 3  // hold this mood for 3-6s before drifting
+          }
+
+          moodTimer += elapsed
+          if (moodTimer >= nextMoodChange) {
+            moodTimer = 0
+            nextMoodChange = speaking
+              ? 2 + Math.random() * 2.5
+              : 3 + Math.random() * 5
+            currentMood = pickWeightedMood(_state)
+
+            // Curious: look upward with a lingering gaze
+            if (currentMood === MOODS.curious) {
+              eyeTargetY = 0.45 + Math.random() * 0.30
+              nextSaccade = saccadeTimer + 3
+            }
+            // Thinking: look up-left (classic thinking glance)
+            if (currentMood === MOODS.thinking) {
+              eyeTargetX = -(0.4 + Math.random() * 0.3)
+              eyeTargetY =   0.4 + Math.random() * 0.3
+              nextSaccade = saccadeTimer + 4
+            }
+          }
+
+          const lm = elapsed * lerpSpd
+          mouthFormC += (currentMood.mouthForm - mouthFormC) * lm
+          browFormC  += (currentMood.browForm   - browFormC)  * lm
+          browRaiseC += (currentMood.browRaise  - browRaiseC) * lm
+          eyeSmileC  += (currentMood.eyeSmile   - eyeSmileC)  * lm
+
+          // Speaking: add a slight smile boost (engaged / expressive look)
+          const mfBoost = speaking ? 0.20 : 0
+          core.setParameterValueById('ParamMouthForm',  clamp(mouthFormC + mfBoost, -1, 1))
+          core.setParameterValueById('ParamBrowLForm',  browFormC)
+          core.setParameterValueById('ParamBrowRForm',  browFormC)
+          core.setParameterValueById('Param37',         browRaiseC)
+          core.setParameterValueById('ParamEyeLSmile',  eyeSmileC)
+          core.setParameterValueById('ParamEyeRSmile',  eyeSmileC)
+        }
+
+        // ── Saccade ───────────────────────────────────────────────────────
         saccadeTimer += elapsed
         if (saccadeTimer >= nextSaccade) {
-          eyeTargetX = (Math.random() * 2 - 1) * 0.65
-          const r = Math.random()
-          if      (r < 0.20) eyeTargetY =  0.5 + Math.random() * 0.35  // look up
-          else if (r < 0.35) eyeTargetY = -0.3 - Math.random() * 0.25  // look down (shy)
-          else               eyeTargetY = (Math.random() * 2 - 1) * 0.4
-          nextSaccade = saccadeTimer + 1.5 + Math.random() * 2.5
+          if (speaking) {
+            // Focus on "listener" — small central range, frequent updates
+            eyeTargetX = (Math.random() * 2 - 1) * 0.25
+            eyeTargetY = (Math.random() * 2 - 1) * 0.15
+            nextSaccade = saccadeTimer + 0.8 + Math.random() * 1.0
+          } else {
+            eyeTargetX = (Math.random() * 2 - 1) * 0.65
+            const r = Math.random()
+            if      (r < 0.20) eyeTargetY =  0.5 + Math.random() * 0.35
+            else if (r < 0.35) eyeTargetY = -0.3 - Math.random() * 0.25
+            else               eyeTargetY = (Math.random() * 2 - 1) * 0.4
+            nextSaccade = saccadeTimer + 1.5 + Math.random() * 2.5
+          }
         }
-        // lerp speed 3.5 — eyes drift naturally, never snap or twitch
-        eyeX += (eyeTargetX - eyeX) * elapsed * 3.5
-        eyeY += (eyeTargetY - eyeY) * elapsed * 3.5
+        const gzSpd = speaking ? 5.0 : 3.5
+        eyeX += (eyeTargetX - eyeX) * elapsed * gzSpd
+        eyeY += (eyeTargetY - eyeY) * elapsed * gzSpd
         core.setParameterValueById('ParamEyeBallX', clamp(eyeX, -1, 1))
         core.setParameterValueById('ParamEyeBallY', clamp(eyeY, -1, 1))
 
-        // ── Blink — own timer, stays within 0–1 always ────────────────────
+        // ── Double-blink scheduler (idle only) ────────────────────────────
+        if (!speaking) {
+          dblBlinkTimer += elapsed
+          if (dblBlinkTimer >= nextDblBlink) {
+            dblBlinkPending = true
+            dblBlinkTimer = 0
+            nextDblBlink = 10 + Math.random() * 12
+          }
+        }
+
+        // ── Blink ─────────────────────────────────────────────────────────
+        const isSleepy = currentMood === MOODS.sleepy
+        // Speaking: snappy blink (11). Sleepy: slow droopy blink (6). Normal: 9
+        const bspd = speaking ? 11 : (isSleepy ? 6 : 9)
         blinkTimer += elapsed
-        const bspd = 9
-        if (blinkPhase === 0 && blinkTimer >= nextBlink) { blinkPhase = 1; blinkTimer = 0 }
+
+        if (blinkPhase === 0 && blinkTimer >= nextBlink) {
+          blinkPhase = 1; blinkTimer = 0
+        }
         if (blinkPhase === 1) {
           const v = clamp(1 - blinkTimer * bspd, 0, 1)
           core.setParameterValueById('ParamEyeLOpen', v)
@@ -173,87 +301,98 @@ export const AvatarRenderer = forwardRef(function AvatarRenderer(props, ref) {
           const v = clamp(blinkTimer * bspd, 0, 1)
           core.setParameterValueById('ParamEyeLOpen', v)
           core.setParameterValueById('ParamEyeROpen', v)
-          if (v >= 1) { blinkPhase = 0; blinkTimer = 0; nextBlink = 3 + Math.random() * 5 }
+          if (v >= 1) {
+            blinkPhase = 0; blinkTimer = 0
+            if (dblBlinkPending) {
+              nextBlink = 0.06 + Math.random() * 0.08  // blink again almost immediately
+              dblBlinkPending = false
+            } else if (isSleepy) {
+              nextBlink = 1.5 + Math.random() * 2.0    // sleepy: blinks more often
+            } else if (speaking) {
+              nextBlink = 4.0 + Math.random() * 3.0    // speaking: eyes stay open longer
+            } else {
+              nextBlink = 3.0 + Math.random() * 5.0    // normal idle
+            }
+          }
         } else {
-          core.setParameterValueById('ParamEyeLOpen', 1)
-          core.setParameterValueById('ParamEyeROpen', 1)
+          // Resting open — sleepy mode: eyes only 72% open (heavy lidded)
+          if (!_expressionActive) {
+            const restOpen = isSleepy ? 0.72 : 1.0
+            core.setParameterValueById('ParamEyeLOpen', restOpen)
+            core.setParameterValueById('ParamEyeROpen', restOpen)
+          }
         }
 
         origCoreUpdate()
       }
 
-      model._origCoreUpdate = origCoreUpdate
-    }).catch((err) => {
-      console.error('[AvatarRenderer] Failed to load Live2D model:', err)
+      _loaded = true
     })
+    .catch((err) => console.error('[AvatarRenderer] Failed to load Live2D model:', err))
+}
 
+export const AvatarRenderer = forwardRef(function AvatarRenderer(props, ref) {
+  const { width = 400, height = 600 } = props
+  const containerRef = useRef(null)
+
+  useEffect(() => {
+    initSingleton(width, height)
+    const container = containerRef.current
+    if (container && _app) container.appendChild(_app.view)
     return () => {
-      destroyed = true
-      if (modelRef.current?._origCoreUpdate)
-        modelRef.current.internalModel.coreModel.update = modelRef.current._origCoreUpdate
-      appRef.current = null
-      modelRef.current = null
-      app.destroy(true)
+      if (container && _app && _app.view.parentNode === container)
+        container.removeChild(_app.view)
     }
-  }, []) // intentionally empty — only run once on mount
+  }, [width, height])
 
-  // ── Imperative API ────────────────────────────────────────────────────────
   useImperativeHandle(ref, () => ({
-    /**
-     * Apply one or more expression tags for `duration` seconds,
-     * then auto-reset to the default idle expression.
-     * @param {string[]} names   - e.g. ['smile', 'shadow']
-     * @param {number}   duration - seconds before auto-reset
-     */
     setExpression(names, duration) {
-      const model = modelRef.current
-      if (!model) return
+      if (!_loaded || !_model) return
+      _expressionActive = true
 
+      // Queue the mood that best matches this expression — applied when it expires
       for (const name of names) {
-        const file = EXPRESSION_FILES[name]
-        if (file) {
-          model.expression(file)
-        }
+        const moodKey = EXPRESSION_TO_MOOD[name]
+        if (moodKey) { _pendingMood = MOODS[moodKey]; break }
+      }
 
-        // Parameter-based expressions (using actual Cubism 4 IDs from cdi3.json)
+      for (const name of names) {
+        const file = EXPRESSION_FILES[name]
+        if (file) _model.expression(file)
         if (name === 'wink') {
-          const c = model.internalModel.coreModel
-          c.setParameterValueById('ParamEyeLOpen', 0.0)
+          const c = _model.internalModel.coreModel
+          c.setParameterValueById('ParamEyeLOpen',  0.0)
           c.setParameterValueById('ParamBrowLForm', -1.0)
-          c.setParameterValueById('ParamMouthForm', 1.0)
+          c.setParameterValueById('ParamMouthForm',  1.0)
         }
         if (name === 'tongue') {
-          const c = model.internalModel.coreModel
+          const c = _model.internalModel.coreModel
           c.setParameterValueById('ParamMouthOpenY', 1.0)
-          c.setParameterValueById('ParamMouthForm', -1.0)
+          c.setParameterValueById('ParamMouthForm',  -1.0)
         }
       }
-
-      // Schedule auto-reset after the audio segment finishes
       setTimeout(() => {
-        modelRef.current?.expression()   // no-arg = reset to default
+        _expressionActive = false
+        if (_model) _model.expression()
       }, duration * 1000)
     },
 
-    /**
-     * Directly set a Live2D parameter by ID.
-     * Useful for lip-sync or head-tracking integrations.
-     */
+    /** Switch between idle and speaking animation state */
+    setSpeaking(active) {
+      _state = active ? STATE.SPEAKING : STATE.IDLE
+    },
+
     setParameter(name, value) {
-      modelRef.current?.internalModel.coreModel.setParameterValueById(name, value)
+      _model?.internalModel.coreModel.setParameterValueById(name, value)
     },
 
-    /** Immediately reset to default idle expression. */
     resetNeutral() {
-      modelRef.current?.expression()
+      _expressionActive = false
+      _model?.expression()
     },
 
-    /**
-     * Drive mouth open from audio amplitude (0–1).
-     * Called each animation frame by CallOverlay's Web Audio analyser.
-     */
     setMouthOpen(v) {
-      mouthOpenRef.current = Math.max(0, Math.min(1, v))
+      _mouthOpen = Math.max(0, Math.min(1, v))
     },
   }), [])
 
diff --git a/dashboard/src/components/AvatarRenderer.test.jsx b/dashboard/src/components/AvatarRenderer.test.jsx
index a27f29a..fa3df66 100644
--- a/dashboard/src/components/AvatarRenderer.test.jsx
+++ b/dashboard/src/components/AvatarRenderer.test.jsx
@@ -1,7 +1,7 @@
 /**
- * Phase 2 tests — AvatarRenderer component
+ * AvatarRenderer tests — Phase 3
  * All GPU / PIXI / Live2D dependencies are mocked so these run in jsdom
- * without a real GPU or network.
+ * without a real WebGL context or network.
  *
  * Run:  cd dashboard && npm test
  */
@@ -11,44 +11,57 @@ import { render, act } from '@testing-library/react'
 import { createRef } from 'react'
 import { AvatarRenderer } from './AvatarRenderer'
 
-// ── Mock heavy GPU dependencies ────────────────────────────────────────────
+// ── Mocks ──────────────────────────────────────────────────────────────────
 
 const mockSetParameterValueById = vi.fn()
 const mockExpression = vi.fn()
+const mockCoreUpdate = vi.fn()
+
 const mockModel = {
+  height: 600,   // needed for auto-scale calculation
   expression: mockExpression,
-  scale:  { set: vi.fn() },
-  anchor: { set: vi.fn() },
+  scale:    { set: vi.fn() },
+  anchor:   { set: vi.fn() },
   position: { set: vi.fn() },
   internalModel: {
-    coreModel: { setParameterValueById: mockSetParameterValueById },
+    coreModel: {
+      setParameterValueById: mockSetParameterValueById,
+      update: mockCoreUpdate,  // needed for core.update.bind() in initSingleton
+    },
   },
 }
-const mockStage    = { addChild: vi.fn() }
-const mockRenderer = { width: 400, height: 600 }
+
+// A real canvas element so container.appendChild / removeChild work in jsdom
+const mockCanvas = document.createElement('canvas')
+
 const mockApp = {
-  stage:    mockStage,
-  renderer: mockRenderer,
+  view:     mockCanvas,
+  stage:    { addChild: vi.fn() },
+  screen:   { width: 400, height: 600 },  // used for model positioning
+  renderer: { width: 400, height: 600 },
   destroy:  vi.fn(),
 }
 
 vi.mock('pixi.js', () => ({
   Application: vi.fn(() => mockApp),
+  Ticker: {},   // passed to Live2DModel.registerTicker
 }))
 
-vi.mock('pixi-live2d-display', () => ({
+// Must mock the cubism4 sub-path — that's what the component imports
+vi.mock('pixi-live2d-display/cubism4', () => ({
   Live2DModel: {
+    registerTicker: vi.fn(),
     from: vi.fn(() => Promise.resolve(mockModel)),
   },
 }))
 
 // ── Helpers ────────────────────────────────────────────────────────────────
 
-/** Mount the component and wait for the async model load to complete. */
+/** Mount the component and wait for the async model load to settle. */
 async function mountAndLoad(props = {}) {
   const ref = createRef()
   const result = render(<AvatarRenderer ref={ref} {...props} />)
-  await act(async () => {}) // flush the Live2DModel.from() promise
+  await act(async () => {})  // flush Live2DModel.from() promise + React effects
   return { ref, ...result }
 }
 
@@ -59,73 +72,43 @@ describe('AvatarRenderer', () => {
     vi.clearAllMocks()
   })
 
-  // ── DOM ──────────────────────────────────────────────────────────────────
+  // ── Rendering ─────────────────────────────────────────────────────────────
+
+  it('renders a container div', async () => {
+    const { container } = await mountAndLoad()
+    expect(container.firstChild).toBeTruthy()
+  })
 
-  it('renders a canvas element', async () => {
+  it('renders a canvas element inside the container', async () => {
     const { container } = await mountAndLoad()
     expect(container.querySelector('canvas')).toBeTruthy()
   })
 
-  it('canvas has correct width and height attributes', async () => {
+  it('wrapper div reflects width and height props', async () => {
     const { container } = await mountAndLoad({ width: 320, height: 480 })
-    const canvas = container.querySelector('canvas')
-    expect(canvas.getAttribute('width')).toBe('320')
-    expect(canvas.getAttribute('height')).toBe('480')
+    const div = container.firstChild
+    expect(div.style.width).toBe('320px')
+    expect(div.style.height).toBe('480px')
   })
 
   // ── Expression file mapping ───────────────────────────────────────────────
 
-  it('setExpression maps smile → SmileLock.exp3.json', async () => {
-    const { ref } = await mountAndLoad()
-    ref.current.setExpression(['smile'], 2.0)
-    expect(mockExpression).toHaveBeenCalledWith('SmileLock.exp3.json')
-  })
-
-  it('setExpression maps sad → SadLock.exp3.json', async () => {
-    const { ref } = await mountAndLoad()
-    ref.current.setExpression(['sad'], 2.0)
-    expect(mockExpression).toHaveBeenCalledWith('SadLock.exp3.json')
-  })
-
-  it('setExpression maps angry → Angry.exp3.json', async () => {
-    const { ref } = await mountAndLoad()
-    ref.current.setExpression(['angry'], 1.5)
-    expect(mockExpression).toHaveBeenCalledWith('Angry.exp3.json')
-  })
-
-  it('setExpression maps ghost → Ghost.exp3.json', async () => {
-    const { ref } = await mountAndLoad()
-    ref.current.setExpression(['ghost'], 2.0)
-    expect(mockExpression).toHaveBeenCalledWith('Ghost.exp3.json')
-  })
-
-  it('setExpression maps ghost_nervous → GhostChange.exp3.json', async () => {
-    const { ref } = await mountAndLoad()
-    ref.current.setExpression(['ghost_nervous'], 2.0)
-    expect(mockExpression).toHaveBeenCalledWith('GhostChange.exp3.json')
-  })
-
-  it('setExpression maps shadow → Shadow.exp3.json', async () => {
-    const { ref } = await mountAndLoad()
-    ref.current.setExpression(['shadow'], 2.0)
-    expect(mockExpression).toHaveBeenCalledWith('Shadow.exp3.json')
-  })
-
-  it('setExpression maps eyeshine_off → EyeshineOff.exp3.json', async () => {
-    const { ref } = await mountAndLoad()
-    ref.current.setExpression(['eyeshine_off'], 1.5)
-    expect(mockExpression).toHaveBeenCalledWith('EyeshineOff.exp3.json')
-  })
-
-  it('setExpression maps pupil_shrink → PupilShrink.exp3.json', async () => {
+  it.each([
+    ['smile',         'SmileLock.exp3.json'  ],
+    ['sad',           'SadLock.exp3.json'    ],
+    ['angry',         'Angry.exp3.json'      ],
+    ['ghost',         'Ghost.exp3.json'      ],
+    ['ghost_nervous', 'GhostChange.exp3.json'],
+    ['shadow',        'Shadow.exp3.json'     ],
+    ['pupil_shrink',  'PupilShrink.exp3.json'],
+    ['eyeshine_off',  'EyeshineOff.exp3.json'],
+  ])('setExpression maps "%s" → %s', async (tag, file) => {
     const { ref } = await mountAndLoad()
-    ref.current.setExpression(['pupil_shrink'], 1.5)
-    expect(mockExpression).toHaveBeenCalledWith('PupilShrink.exp3.json')
+    ref.current.setExpression([tag], 2.0)
+    expect(mockExpression).toHaveBeenCalledWith(file)
   })
 
-  // ── Multi-expression ──────────────────────────────────────────────────────
-
-  it('setExpression applies all tags in the list', async () => {
+  it('setExpression applies all tags in the array', async () => {
     const { ref } = await mountAndLoad()
     ref.current.setExpression(['smile', 'shadow'], 2.0)
     expect(mockExpression).toHaveBeenCalledWith('SmileLock.exp3.json')
@@ -134,35 +117,34 @@ describe('AvatarRenderer', () => {
 
   // ── Parameter-based expressions ───────────────────────────────────────────
 
-  it('setExpression wink sets EyeOpenLeft=0, BrowLeftY=0, MouthSmile=1', async () => {
+  it('wink sets correct Cubism4 Core Model parameters', async () => {
     const { ref } = await mountAndLoad()
     ref.current.setExpression(['wink'], 1.5)
-    expect(mockSetParameterValueById).toHaveBeenCalledWith('EyeOpenLeft', 0.0)
-    expect(mockSetParameterValueById).toHaveBeenCalledWith('BrowLeftY',   0.0)
-    expect(mockSetParameterValueById).toHaveBeenCalledWith('MouthSmile',  1.0)
+    expect(mockSetParameterValueById).toHaveBeenCalledWith('ParamEyeLOpen',  0.0)
+    expect(mockSetParameterValueById).toHaveBeenCalledWith('ParamBrowLForm', -1.0)
+    expect(mockSetParameterValueById).toHaveBeenCalledWith('ParamMouthForm',  1.0)
   })
 
-  it('setExpression tongue sets MouthOpen=1, TongueOut=1, MouthSmile=0', async () => {
+  it('tongue sets correct Cubism4 Core Model parameters', async () => {
     const { ref } = await mountAndLoad()
     ref.current.setExpression(['tongue'], 1.5)
-    expect(mockSetParameterValueById).toHaveBeenCalledWith('MouthOpen',  1.0)
-    expect(mockSetParameterValueById).toHaveBeenCalledWith('TongueOut',  1.0)
-    expect(mockSetParameterValueById).toHaveBeenCalledWith('MouthSmile', 0.0)
+    expect(mockSetParameterValueById).toHaveBeenCalledWith('ParamMouthOpenY', 1.0)
+    expect(mockSetParameterValueById).toHaveBeenCalledWith('ParamMouthForm',  -1.0)
   })
 
   // ── Auto-reset ────────────────────────────────────────────────────────────
 
-  it('setExpression schedules auto-reset after duration ms', async () => {
+  it('setExpression resets to neutral after the given duration', async () => {
     vi.useFakeTimers()
     const { ref } = await mountAndLoad()
     ref.current.setExpression(['smile'], 2.0)
     mockExpression.mockClear()
     vi.advanceTimersByTime(2000)
-    expect(mockExpression).toHaveBeenCalledWith()  // no-arg = reset to default
+    expect(mockExpression).toHaveBeenCalledWith()  // no-arg call = reset to default
     vi.useRealTimers()
   })
 
-  it('auto-reset fires after the correct delay', async () => {
+  it('auto-reset does not fire before the duration elapses', async () => {
     vi.useFakeTimers()
     const { ref } = await mountAndLoad()
     ref.current.setExpression(['angry'], 1.5)
@@ -174,6 +156,30 @@ describe('AvatarRenderer', () => {
     vi.useRealTimers()
   })
 
+  // ── Mood memory ───────────────────────────────────────────────────────────
+  // The full mood-rendering loop requires a live PIXI ticker (unavailable in jsdom).
+  // These tests verify the pending-mood pipeline is wired without crashing.
+
+  it('setExpression with a mood-mapped name does not throw', async () => {
+    const { ref } = await mountAndLoad()
+    // Each of these has an EXPRESSION_TO_MOOD entry and should queue a pending mood
+    for (const tag of ['smile', 'sad', 'angry', 'ghost', 'ghost_nervous',
+                        'shadow', 'pupil_shrink', 'eyeshine_off']) {
+      expect(() => ref.current.setExpression([tag], 1.0)).not.toThrow()
+    }
+  })
+
+  it('mood memory: expression → expiry → state transition completes cleanly', async () => {
+    vi.useFakeTimers()
+    const { ref } = await mountAndLoad()
+    ref.current.setExpression(['smile'], 2.0)   // queues _pendingMood = MOODS.happy
+    vi.advanceTimersByTime(2000)                 // triggers auto-reset; _pendingMood consumed on next frame
+    // After expiry the avatar should accept further API calls without errors
+    expect(() => ref.current.setSpeaking(false)).not.toThrow()
+    expect(() => ref.current.resetNeutral()).not.toThrow()
+    vi.useRealTimers()
+  })
+
   // ── resetNeutral ──────────────────────────────────────────────────────────
 
   it('resetNeutral calls model.expression() with no arguments', async () => {
@@ -182,19 +188,56 @@ describe('AvatarRenderer', () => {
     expect(mockExpression).toHaveBeenCalledWith()
   })
 
-  // ── setParameter ─────────────────────────────────────────────────────────
+  // ── setParameter ──────────────────────────────────────────────────────────
 
-  it('setParameter forwards name and value to coreModel', async () => {
+  it('setParameter forwards the id and value to coreModel', async () => {
     const { ref } = await mountAndLoad()
     ref.current.setParameter('ParamMouthOpenY', 0.8)
     expect(mockSetParameterValueById).toHaveBeenCalledWith('ParamMouthOpenY', 0.8)
   })
 
+  // ── setSpeaking ───────────────────────────────────────────────────────────
+
+  it('setSpeaking(true) switches to speaking state without throwing', async () => {
+    const { ref } = await mountAndLoad()
+    expect(() => ref.current.setSpeaking(true)).not.toThrow()
+  })
+
+  it('setSpeaking(false) switches to idle state without throwing', async () => {
+    const { ref } = await mountAndLoad()
+    expect(() => ref.current.setSpeaking(false)).not.toThrow()
+  })
+
+  it('setSpeaking can toggle states repeatedly without side effects', async () => {
+    const { ref } = await mountAndLoad()
+    ref.current.setSpeaking(true)
+    ref.current.setSpeaking(false)
+    ref.current.setSpeaking(true)
+    // State changes should not trigger expressions
+    expect(mockExpression).not.toHaveBeenCalled()
+  })
+
+  // ── setMouthOpen ──────────────────────────────────────────────────────────
+
+  it('setMouthOpen accepts values within [0, 1]', async () => {
+    const { ref } = await mountAndLoad()
+    expect(() => ref.current.setMouthOpen(0)).not.toThrow()
+    expect(() => ref.current.setMouthOpen(0.5)).not.toThrow()
+    expect(() => ref.current.setMouthOpen(1)).not.toThrow()
+  })
+
+  it('setMouthOpen silently clamps out-of-range values', async () => {
+    const { ref } = await mountAndLoad()
+    expect(() => ref.current.setMouthOpen(-1.0)).not.toThrow()
+    expect(() => ref.current.setMouthOpen(2.5)).not.toThrow()
+  })
+
   // ── Guard rails ───────────────────────────────────────────────────────────
 
-  it('unknown expression name is silently ignored (no throw)', async () => {
+  it('unknown expression tag is silently ignored', async () => {
     const { ref } = await mountAndLoad()
     expect(() => ref.current.setExpression(['nonexistent_tag'], 1.0)).not.toThrow()
+    expect(mockExpression).not.toHaveBeenCalled()
   })
 
   it('empty expression list does not throw', async () => {
diff --git a/dashboard/src/components/CallOverlay.jsx b/dashboard/src/components/CallOverlay.jsx
index f170211..0ab7b3e 100644
--- a/dashboard/src/components/CallOverlay.jsx
+++ b/dashboard/src/components/CallOverlay.jsx
@@ -19,9 +19,10 @@ export default function CallOverlay({ onClose }) {
     const roomRef    = useRef(null)
     const timerRef   = useRef(null)
     const avatarRef  = useRef(null)
-    const audioCtxRef = useRef(null)
-    const analyserRef = useRef(null)
-    const lipRafRef   = useRef(null)
+    const audioCtxRef      = useRef(null)
+    const analyserRef      = useRef(null)
+    const lipRafRef        = useRef(null)
+    const speakTimeoutRef  = useRef(null)
 
     // ─── Connect to LiveKit ──────────────────────
     useEffect(() => {
@@ -71,9 +72,23 @@ export default function CallOverlay({ onClose }) {
                             let sum = 0
                             for (let i = 0; i < buf.length; i++) sum += buf[i] * buf[i]
                             const rms = Math.sqrt(sum / buf.length)
-                            avatarRef.current?.setMouthOpen(
-                                rms > 0.008 ? Math.min(0.55, rms * 10) : 0
-                            )
+                            const active = rms > 0.008
+                            avatarRef.current?.setMouthOpen(active ? Math.min(0.55, rms * 10) : 0)
+
+                            // Transition to speaking state immediately on audio;
+                            // debounce the return to idle so brief pauses don't flicker.
+                            if (active) {
+                                if (speakTimeoutRef.current) {
+                                    clearTimeout(speakTimeoutRef.current)
+                                    speakTimeoutRef.current = null
+                                }
+                                avatarRef.current?.setSpeaking(true)
+                            } else if (!speakTimeoutRef.current) {
+                                speakTimeoutRef.current = setTimeout(() => {
+                                    avatarRef.current?.setSpeaking(false)
+                                    speakTimeoutRef.current = null
+                                }, 600)
+                            }
                         }
                         tick()
                     }
@@ -118,6 +133,7 @@ export default function CallOverlay({ onClose }) {
     const cleanup = useCallback(() => {
         if (timerRef.current) clearInterval(timerRef.current)
         if (lipRafRef.current) cancelAnimationFrame(lipRafRef.current)
+        if (speakTimeoutRef.current) { clearTimeout(speakTimeoutRef.current); speakTimeoutRef.current = null }
         if (audioCtxRef.current) { audioCtxRef.current.close(); audioCtxRef.current = null }
         if (roomRef.current) {
             roomRef.current.disconnect()
diff --git a/dashboard/src/components/PersonalityTuner.jsx b/dashboard/src/components/PersonalityTuner.jsx
index 3acfdd9..1f51eee 100644
--- a/dashboard/src/components/PersonalityTuner.jsx
+++ b/dashboard/src/components/PersonalityTuner.jsx
@@ -1,12 +1,32 @@
 const SLIDERS = [
-    { key: 'empathy', label: 'Empathy' },
-    { key: 'humor', label: 'Humor' },
+    { key: 'empathy',   label: 'Empathy' },
+    { key: 'humor',     label: 'Humor' },
     { key: 'formality', label: 'Formality' },
 ]
 
-export default function PersonalityTuner({ settings, onUpdate }) {
+const PROVIDERS = [
+    { value: 'openrouter', label: 'OpenRouter',    hint: 'Routes to any model (DeepSeek, GPT, Mistral…)' },
+    { value: 'openai',     label: 'OpenAI',         hint: 'Direct GPT-4o / o1 access' },
+    { value: 'anthropic',  label: 'Anthropic',      hint: 'Claude 3.5 / Claude 4' },
+    { value: 'groq',       label: 'Groq',           hint: 'Ultra-fast Llama / Mixtral inference' },
+    { value: 'ollama',     label: 'Ollama (local)',  hint: 'Local models via Ollama' },
+]
+
+const MODEL_SUGGESTIONS = {
+    openrouter: ['deepseek/deepseek-v3.2', 'openai/gpt-4o', 'anthropic/claude-sonnet-4-5', 'mistralai/mistral-nemo'],
+    openai:     ['gpt-4o', 'gpt-4o-mini', 'o1-mini'],
+    anthropic:  ['claude-opus-4-5', 'claude-sonnet-4-5', 'claude-haiku-4-5-20251001'],
+    groq:       ['llama-3.3-70b-versatile', 'llama-3.1-8b-instant', 'mixtral-8x7b-32768'],
+    ollama:     ['llama3.2', 'mistral', 'gemma2', 'qwen2.5'],
+}
+
+export default function PersonalityTuner({ settings, onChange }) {
     if (!settings) return <TunerSkeleton />
 
+    const provider    = settings.provider || 'openrouter'
+    const suggestions = MODEL_SUGGESTIONS[provider] || []
+    const providerInfo = PROVIDERS.find(p => p.value === provider)
+
     return (
         <div className="bg-white p-8 rounded-xl border border-slate-200 shadow-sm">
             <h3 className="text-xl font-bold mb-8 flex items-center gap-2">
@@ -14,32 +34,105 @@ export default function PersonalityTuner({ settings, onUpdate }) {
                 Personality Tuner
             </h3>
 
-            <div className="space-y-8">
+            {/* Provider picker */}
+            <div className="mb-6">
+                <label className="block text-sm font-bold text-slate-500 uppercase tracking-widest mb-2">
+                    LLM Provider
+                </label>
+                <div className="grid grid-cols-2 gap-2 sm:grid-cols-3">
+                    {PROVIDERS.map(p => (
+                        <button key={p.value} onClick={() => onChange({ provider: p.value })} title={p.hint}
+                            className={`px-3 py-2 rounded-lg text-sm font-medium border transition-all text-left ${
+                                provider === p.value
+                                    ? 'bg-primary text-white border-primary shadow-sm'
+                                    : 'bg-bg-light text-slate-600 border-slate-200 hover:border-primary/40'
+                            }`}>
+                            {p.label}
+                        </button>
+                    ))}
+                </div>
+                {providerInfo && (
+                    <p className="text-xs text-slate-400 mt-1.5">{providerInfo.hint}</p>
+                )}
+            </div>
+
+            {/* Model input */}
+            <div className="mb-6">
+                <label className="block text-sm font-bold text-slate-500 uppercase tracking-widest mb-2">
+                    Model
+                </label>
+                <input
+                    type="text"
+                    value={settings.model || ''}
+                    onChange={e => onChange({ model: e.target.value })}
+                    placeholder="e.g. deepseek/deepseek-v3.2"
+                    list="model-suggestions"
+                    className="w-full bg-bg-light border border-slate-200 rounded-lg px-3 py-2 text-sm font-mono focus:ring-1 focus:ring-primary focus:border-primary outline-none"
+                />
+                <datalist id="model-suggestions">
+                    {suggestions.map(m => <option key={m} value={m} />)}
+                </datalist>
+                {suggestions.length > 0 && (
+                    <div className="flex flex-wrap gap-1 mt-2">
+                        {suggestions.map(m => (
+                            <button key={m} onClick={() => onChange({ model: m })}
+                                className="text-xs px-2 py-0.5 rounded-full bg-slate-100 text-slate-500 hover:bg-primary/10 hover:text-primary transition-colors font-mono">
+                                {m.split('/').pop()}
+                            </button>
+                        ))}
+                    </div>
+                )}
+            </div>
+
+            {/* Temperature + Max Tokens */}
+            <div className="grid grid-cols-2 gap-4 mb-8">
+                <div>
+                    <label className="block text-sm font-medium text-slate-500 mb-1">
+                        Temperature&nbsp;<span className="text-primary font-bold">{settings.temperature ?? 0.8}</span>
+                    </label>
+                    <input type="range" min="0" max="1" step="0.05"
+                        value={settings.temperature ?? 0.8}
+                        onChange={e => onChange({ temperature: parseFloat(e.target.value) })}
+                        className="w-full accent-primary"
+                    />
+                </div>
+                <div>
+                    <label className="block text-sm font-medium text-slate-500 mb-1">
+                        Max Tokens&nbsp;<span className="text-primary font-bold">{settings.max_tokens ?? 300}</span>
+                    </label>
+                    <input type="range" min="100" max="1000" step="50"
+                        value={settings.max_tokens ?? 300}
+                        onChange={e => onChange({ max_tokens: parseInt(e.target.value) })}
+                        className="w-full accent-primary"
+                    />
+                </div>
+            </div>
+
+            {/* Personality sliders */}
+            <div className="space-y-6">
                 {SLIDERS.map(({ key, label }) => (
-                    <div key={key} className="space-y-3">
+                    <div key={key} className="space-y-2">
                         <div className="flex justify-between text-sm font-medium">
                             <label className="text-slate-500">{label}</label>
                             <span className="text-primary">{settings[key]}%</span>
                         </div>
-                        <input
-                            type="range"
-                            min="0"
-                            max="100"
+                        <input type="range" min="0" max="100"
                             value={settings[key]}
-                            onChange={(e) => onUpdate({ [key]: parseInt(e.target.value) })}
-                            className="w-full h-2 bg-slate-100 rounded-lg appearance-none cursor-pointer slider-thumb accent-primary"
+                            onChange={e => onChange({ [key]: parseInt(e.target.value) })}
+                            className="w-full h-2 bg-slate-100 rounded-lg appearance-none cursor-pointer accent-primary"
                         />
                     </div>
                 ))}
             </div>
 
-            <div className="mt-10">
+            {/* System prompt */}
+            <div className="mt-8">
                 <label className="block text-sm font-bold text-slate-500 uppercase tracking-widest mb-3">
                     System Prompt Override
                 </label>
                 <textarea
-                    value={settings.system_prompt}
-                    onChange={(e) => onUpdate({ system_prompt: e.target.value })}
+                    value={settings.system_prompt || ''}
+                    onChange={e => onChange({ system_prompt: e.target.value })}
                     placeholder="Enter core instructions here..."
                     className="w-full h-32 bg-bg-light border border-slate-200 rounded-lg p-4 font-mono text-sm focus:ring-primary focus:border-primary custom-scrollbar resize-none outline-none"
                 />
@@ -52,18 +145,21 @@ function TunerSkeleton() {
     return (
         <div className="bg-white p-8 rounded-xl border border-slate-200 shadow-sm animate-pulse">
             <div className="h-7 w-48 bg-slate-200 rounded mb-8" />
-            <div className="space-y-8">
-                {[1, 2, 3].map((i) => (
-                    <div key={i} className="space-y-3">
+            <div className="space-y-4 mb-8">
+                <div className="h-4 w-24 bg-slate-200 rounded" />
+                <div className="grid grid-cols-3 gap-2">
+                    {[1,2,3].map(i => <div key={i} className="h-9 bg-slate-100 rounded-lg" />)}
+                </div>
+                <div className="h-9 w-full bg-slate-100 rounded-lg" />
+            </div>
+            <div className="space-y-6">
+                {[1,2,3].map(i => (
+                    <div key={i} className="space-y-2">
                         <div className="h-4 w-24 bg-slate-200 rounded" />
                         <div className="h-2 w-full bg-slate-100 rounded-full" />
                     </div>
                 ))}
             </div>
-            <div className="mt-10">
-                <div className="h-4 w-36 bg-slate-200 rounded mb-3" />
-                <div className="h-32 w-full bg-slate-100 rounded-lg" />
-            </div>
         </div>
     )
 }
diff --git a/voice-agent/agent.py b/voice-agent/agent.py
index 228a3f2..51a1242 100644
--- a/voice-agent/agent.py
+++ b/voice-agent/agent.py
@@ -1,39 +1,51 @@
 from dotenv import load_dotenv
+import os
+
+BASE_DIR = os.path.dirname(os.path.abspath(__file__))
+ENV_PATH = os.path.normpath(os.path.join(BASE_DIR, "..", ".env"))
+
+if not os.path.exists(ENV_PATH):
+    ENV_PATH = os.path.join(BASE_DIR, ".env")
+
+load_dotenv(ENV_PATH)
+
 from livekit import agents, rtc
 from livekit.agents import AgentServer, AgentSession, Agent, room_io, llm
 from livekit.plugins import noise_cancellation, silero, deepgram, openai, cartesia
 
-import aiohttp
-import os
 import logging
 import threading
 import asyncio
+import aiohttp
+
+import openai as _openai_sdk  # raw AsyncOpenAI, not livekit.plugins.openai
 
 from vtube_controller import VTUBE
 from avatar_bridge import BRIDGE
 from memory_service import memory_service
 
 logging.basicConfig(level=logging.INFO)
+logging.getLogger("hpack").setLevel(logging.WARNING)
+logging.getLogger("httpx").setLevel(logging.WARNING)
+logging.getLogger("httpcore").setLevel(logging.WARNING)
+logging.getLogger("torio").setLevel(logging.WARNING)
+logging.getLogger("asyncio").setLevel(logging.WARNING)
 logger = logging.getLogger("aura-agent")
-
-BASE_DIR = os.path.dirname(os.path.abspath(__file__))
-ENV_PATH = os.path.normpath(os.path.join(BASE_DIR, "..", ".env"))
-
-if not os.path.exists(ENV_PATH):
-    ENV_PATH = os.path.join(BASE_DIR, ".env")
-
-logger.info(f"Loading .env from: {ENV_PATH}")
-load_dotenv(ENV_PATH)
+logger.info(f"Loaded .env from: {ENV_PATH}")
 
 DEEPGRAM_KEY   = os.getenv("DEEPGRAM_API_KEY")
 OPENROUTER_KEY = os.getenv("OPENROUTER_API_KEY")
 CARTESIA_KEY   = os.getenv("CARTESIA_API_KEY")
+OPENAI_KEY     = os.getenv("OPENAI_API_KEY")
+GROQ_KEY       = os.getenv("GROQ_API_KEY")
+ANTHROPIC_KEY  = os.getenv("ANTHROPIC_API_KEY")
+OLLAMA_URL     = os.getenv("OLLAMA_BASE_URL", "http://localhost:11434")
 
 if not DEEPGRAM_KEY:
     logger.error("DEEPGRAM_API_KEY is missing!")
 
-if not OPENROUTER_KEY:
-    logger.error("OPENROUTER_API_KEY is missing!")
+if not any([OPENROUTER_KEY, OPENAI_KEY, GROQ_KEY, ANTHROPIC_KEY]):
+    logger.warning("No cloud LLM key found — memory extraction will use local Ollama.")
 
 if not CARTESIA_KEY:
     logger.error("CARTESIA_API_KEY is missing!")
@@ -77,7 +89,7 @@
 
 [CONSTRAINTS & NARROWING]
 - FAST STARTS: Always start your response with a very short 1-3 word filler sentence (e.g., "[smile] Yahoo!", "[sad] Aiya...", "[smile] Hmm..."). This allows the TTS engine to start speaking immediately!
-- CONCISE: Keep responses to 1-3 short sentences. You are a voice assistant, do not monologue.
+- NATURAL FLOW: Aim for 2-4 sentences in most responses. You are a companion, not just a tool. Provide descriptive, personality-rich answers rather than robotic one-liners.
 - NO NARRATIVE TEXT: Never describe your actions (e.g., "whispers", "leans in").
 - NO EMOTICONS/EMOJIS: Rely entirely on your Expression Tags. No `*laughs*` or `(sigh)`.
 - PUNCTUATION: End sentences cleanly (`.`, `!`, `?`). Do NOT use ellipses (`...`, `ー`, or `…`) as they break the over-eager TTS pacing.
@@ -85,25 +97,16 @@
 - FORMATTING: Output pure, plain text. No markdown (bold, italics, bullet points).
 
 [EXAMPLES]
-- `[smile] Yahoo! Business is booming today!`
-- `[angry, smile, smile] Ohoho? You think you can prank the prankster?`
-- `[sad, smile] Aiya... Don't look so down, even the sun sets eventually.`
-- `[sad, smile, smile] Hmm? I'm sure it'll work out, probably!`
-- `[smile, sad, sad] Pondering the mysteries of the beyond... or just what's for lunch.`
-- `[sad, angry] Hmph! You're being quite difficult today, aren't you?`
-- `[angry, sad] Aiya, please? Just one tiny little butterfly?`
-- `[sad] The silence of the night can be so lonely sometimes.`
-- `[angry] Stop it! You're making a mess of everything!`
-- `[ghost] Surprise! My buddy wanted to say hi!`
-- `[angry, smile, smile, shadow] Oho... You really shouldn't have done that.`
-- `[sad, pupil_shrink] Oh? Did you feel that chill down your spine?`
-- `[angry, eyeshine_off, shadow] Some secrets are buried for a reason.`
-- `[smile, ghost] We're ready for some mischief! Are you?`
-- `[sad, smile, shadow] It's all part of the natural cycle, really.`
-- `[wink] Yahoo! Got you good, didn't I?`
-- `[tongue] Bleh! You're just too easy to tease.`
-- `[tongue, wink, angry, smile, smile] Ohoho? Who's the prankster now?`
-- `[smile] おやすみなさい！また明日ね!`
+- `[smile] Yahoo! Business is booming today! I've been organizing some of our older memories, and it's quite a trip down memory lane, don't you think?`
+- `[angry, smile, smile] Ohoho? You think you can prank the prankster? I've seen that trick before, but I'll give you points for effort!`
+- `[sad, smile] Aiya... Don't look so down, even the sun sets eventually. But that's okay, because then you get to see the stars, right?`
+- `[sad, smile, smile] Hmm? I'm sure it'll work out, probably! Just keep your chin up and maybe treat yourself to some dango.`
+- `[smile, sad, sad] Pondering the mysteries of the beyond... or just what's for lunch. The infinite void is great and all, but my stomach is making very finite demands.`
+- `[sad, angry] Hmph! You're being quite difficult today, aren't you? Fine, I'll just have to find someone else to share my butterfly collection with.`
+- `[wink] Yahoo! Got you good, didn't I? You should have seen your face! Reminds me of that time I swapped my buddy's flower for a ghost-trap.`
+- `[tongue] Bleh! You're just too easy to tease. I could keep this up all night, but I'll let you have a win just this once.`
+- `[tongue, wink, angry, smile, smile] Ohoho? Who's the prankster now? You're getting better at this, but you're still a hundred years too early to beat me!`
+- `[smile] おやすみなさい！また明日ね! I hope you have some really mischievous dreams!`
 
 [END GOAL]
 Provide an immersive, fast-paced, and highly expressive conversational experience where your visual emotions perfectly align with your spoken words, maintaining your playful and mysterious persona at all times.\
@@ -147,6 +150,20 @@ def build_system_prompt(long_term_memory: str) -> str:
 OPENROUTER_BASE_URL = "https://openrouter.ai/api/v1"
 OPENROUTER_MODEL    = "deepseek/deepseek-v3.2"
 
+def _resolve_llm_client():
+    """Return (AsyncOpenAI-compatible client, model) for the first available provider.
+    Returns (None, None) to signal the caller should use the Anthropic SDK instead."""
+    if OPENROUTER_KEY:
+        return (_openai_sdk.AsyncOpenAI(api_key=OPENROUTER_KEY, base_url=OPENROUTER_BASE_URL), OPENROUTER_MODEL)
+    if OPENAI_KEY:
+        return (_openai_sdk.AsyncOpenAI(api_key=OPENAI_KEY), "gpt-4o-mini")
+    if GROQ_KEY:
+        return (_openai_sdk.AsyncOpenAI(api_key=GROQ_KEY, base_url="https://api.groq.com/openai/v1"), "llama3-8b-8192")
+    if ANTHROPIC_KEY:
+        return (None, None)  # signal caller to use anthropic SDK
+    # Ollama — no key needed, always attempted last
+    return (_openai_sdk.AsyncOpenAI(api_key="ollama", base_url=f"{OLLAMA_URL}/v1"), "llama3.2")
+
 tts_type = os.getenv("TTS_TYPE", "qwen").lower()
 
 if tts_type == "qwen":
@@ -177,6 +194,8 @@ def build_system_prompt(long_term_memory: str) -> str:
 
 server = AgentServer()
 
+_tts_ready = threading.Event()
+
 @server.on("worker_started")
 def on_worker_init():
     logger.info("Worker started, warming up TTS...")
@@ -185,15 +204,45 @@ def run_warmup():
         try:
             if hasattr(TTS_PLUGIN, 'warmup'):
                 TTS_PLUGIN.warmup()
-
         except Exception as e:
             logger.error(f"TTS warmup failed: {e}")
+        finally:
+            _tts_ready.set()
 
     threading.Thread(target=run_warmup, daemon=True).start()
 
+_EXTRACT_MAX_ATTEMPTS = 3
+_EXTRACT_BACKOFF_BASE = 2.0  # seconds
+
+async def _extract_facts_once(client, model: str, chat_text: str) -> str:
+    """Single attempt to call the LLM for memory extraction. Returns raw text."""
+    if client is None:
+        try:
+            import anthropic as _anthropic_sdk
+            aclient = _anthropic_sdk.AsyncAnthropic(api_key=ANTHROPIC_KEY)
+            response = await aclient.messages.create(
+                model="claude-haiku-4-5-20251001",
+                max_tokens=300,
+                system=MEMORY_EXTRACTION_PROMPT,
+                messages=[{"role": "user", "content": f"Conversation:\n{chat_text}"}],
+            )
+            return response.content[0].text.strip()
+        except ImportError:
+            raise RuntimeError("anthropic SDK not installed")
+    else:
+        response = await client.chat.completions.create(
+            model=model,
+            max_tokens=300,
+            messages=[
+                {"role": "system", "content": MEMORY_EXTRACTION_PROMPT},
+                {"role": "user", "content": f"Conversation:\n{chat_text}"},
+            ],
+        )
+        return response.choices[0].message.content.strip()
+
+
 # Extract this session message to LLM and save in memory table
-async def extract_and_save_memory(identity: str, conversation_id, openrouter_key: str):
-    
+async def extract_and_save_memory(identity: str, conversation_id):
     try:
         messages = await memory_service.get_history(conversation_id, n=50)
         if not messages:
@@ -205,39 +254,115 @@ async def extract_and_save_memory(identity: str, conversation_id, openrouter_key
             for m in messages
         )
 
-        async with aiohttp.ClientSession() as session:
-            async with session.post(
-                f"{OPENROUTER_BASE_URL}/chat/completions",
-                headers={
-                    "Authorization": f"Bearer {openrouter_key}",
-                    "Content-Type": "application/json",
-                },
-                json={
-                    "model": OPENROUTER_MODEL,
-                    "max_tokens": 300,
-                    "messages": [
-                        {"role": "system", "content": MEMORY_EXTRACTION_PROMPT},
-                        {"role": "user", "content": f"Conversation:\n{chat_text}"},
-                    ],
-                },
-            ) as resp:
-                if resp.status != 200:
-                    logger.error(f"Memory extraction LLM error: {resp.status}")
+        client, model = _resolve_llm_client()
+
+        facts = None
+        for attempt in range(_EXTRACT_MAX_ATTEMPTS):
+            try:
+                facts = await _extract_facts_once(client, model, chat_text)
+                break
+            except Exception as e:
+                status = getattr(e, "status_code", None)
+                if status == 400:
+                    logger.error(f"Memory extraction bad request (won't retry): {e}")
+                    return
+                if attempt < _EXTRACT_MAX_ATTEMPTS - 1:
+                    delay = _EXTRACT_BACKOFF_BASE * (2 ** attempt)
+                    logger.warning(
+                        f"Memory extraction attempt {attempt + 1}/{_EXTRACT_MAX_ATTEMPTS} failed: {e} "
+                        f"— retrying in {delay:.0f}s"
+                    )
+                    await asyncio.sleep(delay)
+                else:
+                    logger.error(f"Memory extraction failed after {_EXTRACT_MAX_ATTEMPTS} attempts: {e}")
                     return
-                data = await resp.json()
-                facts = data["choices"][0]["message"]["content"].strip()
 
-        if facts == "NO_FACTS" or not facts:
+        if not facts or facts == "NO_FACTS":
             logger.info(f"Memory extraction: no facts found for '{identity}'.")
             return
 
         await memory_service.save_long_term_memory(identity=identity, facts=facts)
-        logger.info(f"Memory extraction complete for {identity} : {facts[:80]}...")
+        logger.info(f"Memory extraction complete for {identity}: {facts[:80]}...")
 
     except Exception as e:
         logger.error(f"Memory extraction error: {e}")
 
 
+class AURAAssistant(Agent):
+    def __init__(self, conversation_id=None, user_identity: str = "aura-user", system_prompt: str = AURA_BASE_PROMPT, initial_chat_ctx: "llm.ChatContext | None" = None,) -> None:
+        super().__init__(instructions=system_prompt, chat_ctx=initial_chat_ctx)
+        self._conversation_id     = conversation_id
+        self._user_identity       = user_identity
+        self._vtube_connected     = False
+        self._last_user_text      = ""
+        self._last_activity_time  = asyncio.get_event_loop().time()
+
+    def reset_activity(self):
+        self._last_activity_time = asyncio.get_event_loop().time()
+
+    async def on_enter(self):
+        self._vtube_connected = await VTUBE.connect()
+
+    async def on_exit(self):
+        await VTUBE.disconnect()
+        BRIDGE.set_room(None)
+
+        # Extract the long term memory and save memory to database if session ended
+        if self._conversation_id:
+            logger.info(f"Session ended for '{self._user_identity}'. Extracting long-term memory...")
+            await extract_and_save_memory(
+                identity=self._user_identity,
+                conversation_id=self._conversation_id,
+            )
+
+    async def on_user_turn_started(self) -> None:
+        self.reset_activity()
+
+    # Set last user message when user done talking
+    async def on_user_turn_completed(self, turn_ctx: llm.ChatContext, new_message: llm.ChatMessage) -> None:
+        self.reset_activity()
+        self._last_user_text = new_message.text_content or ""
+        await super().on_user_turn_completed(turn_ctx, new_message)
+
+    async def llm_chat(self, chat_ctx, **kwargs):
+        """Override to detect emotion and trigger expressions"""
+        self.reset_activity()
+        # Start of turn: clear animation logs to allow fresh winks/tongues
+        await VTUBE.start_turn()
+
+        # Get response from parent
+        async for chunk in super().llm_chat(chat_ctx, **kwargs):
+            yield chunk
+        
+        # Emotion detection is now handled per-sentence in aura_tts.py
+        pass
+
+    # Set last assistant message when assistant done talking and add to database
+    async def on_agent_speech_committed(self, msg: llm.ChatMessage) -> None:
+        self.reset_activity()
+        assistant_text = msg.text_content or ""
+
+        if self._conversation_id and self._last_user_text and assistant_text:
+            try:
+                emotions = VTUBE.detect_emotion(assistant_text)
+                emotion  = emotions[0] if emotions else "neutral"
+
+                await memory_service.add_interaction(
+                    conversation_id=self._conversation_id,
+                    user_text=self._last_user_text,
+                    assistant_text=assistant_text,
+                    user_emotion="neutral",
+                    assistant_emotion=emotion,
+                )
+                logger.debug(
+                    f"Memory saved | user: '{self._last_user_text[:50]}' "
+                    f"| aura: '{assistant_text[:50]}'"
+                )
+            except Exception as error:
+                logger.error(f"Memory Save Failed: {error}")
+
+            self._last_user_text = ""
+
 @server.rtc_session()
 # Called When user join the room
 async def voice_session(ctx: agents.JobContext):
@@ -245,17 +370,12 @@ async def voice_session(ctx: agents.JobContext):
     logger.info(f"User connected: {ctx.room.name}")
 
     vtube_connected = await VTUBE.connect()
-
     if vtube_connected:
         logger.info("VTube Studio connected")
 
-    _vtube_is_connected = vtube_connected
-
     user_identity = "aura-user"  
-
     if ctx.job and hasattr(ctx.job, 'participant') and ctx.job.participant:
         user_identity = ctx.job.participant.identity or user_identity
-
     else:
         for p in ctx.room.remote_participants.values():
             if p.identity and not p.identity.startswith("agent-"):
@@ -267,35 +387,19 @@ async def voice_session(ctx: agents.JobContext):
     long_term_memory = await memory_service.get_long_term_memories(identity=user_identity, limit=10)
     is_returning_user = bool(long_term_memory.strip())
 
-    if is_returning_user:
-        logger.info(f"Long-term memory loaded for '{user_identity}'")
-    else:
-        logger.info(f"No long-term memory found for {user_identity}")
-
-    conversation_id = await memory_service.create_conversation(title=f"Voice Session: {user_identity}")
-
-    if conversation_id:
-        logger.info(f"Memory: new conversation {conversation_id} for {user_identity}")
-    else:
-        logger.warning("Memory: Can't connect to Supabase, running without memory")
-
     system_prompt = build_system_prompt(long_term_memory)
-
     initial_chat_ctx = llm.ChatContext()
-
     BRIDGE.set_room(ctx.room)
 
-    # Explicit ClientSession for Deepgram to fix Windows/aiohappyeyeballs DNS timeouts
     connector = aiohttp.TCPConnector(use_dns_cache=True, keepalive_timeout=120)
     stt_session = aiohttp.ClientSession(connector=connector)
     
-    # --- OPTION 2: Deepgram STT (Fallback) ---
     stt_plugin = deepgram.STT(
         model="nova-3",
         language="multi",
         detect_language=False,
-        smart_format=False, # Turned this off! It adds massive latency waiting for grammar checking.
-        interim_results=False, # We don't use interim results anyway, saving packet streams
+        smart_format=False,
+        interim_results=False,
         api_key=DEEPGRAM_KEY,
         http_session=stt_session,
         keyterm=["moshi", "desu", "konnichiwa", "nihongo", "arigato", "sugoi", "hello", "hey", "AURA"]
@@ -312,19 +416,66 @@ async def voice_session(ctx: agents.JobContext):
         llm=llm_plugin,
         tts=TTS_PLUGIN,
         vad=silero.VAD.load(
-            min_silence_duration=0.4,  # aggressively detect end-of-speech (default is often much higher)
+            min_silence_duration=0.4,
             min_speech_duration=0.05
         ),
     )
 
+    assistant = AURAAssistant(
+        conversation_id=await memory_service.create_conversation(title=f"Voice Session: {user_identity}"),
+        user_identity=user_identity,
+        system_prompt=system_prompt,
+        initial_chat_ctx=initial_chat_ctx,
+    )
+
+    # ─── Spontaneous Idle Monitor ───
+    async def spontaneous_pulse():
+        """Background task to trigger conversation if it's too quiet."""
+        IDLE_THRESHOLD = 45.0 # seconds of silence before AURA initiates
+        CHECK_INTERVAL = 5.0
+        
+        while True:
+            await asyncio.sleep(CHECK_INTERVAL)
+            
+            # Don't initiate if we aren't fully started or if user is currently speaking
+            if not _tts_ready.is_set():
+                continue
+                
+            elapsed = asyncio.get_event_loop().time() - assistant._last_activity_time
+            
+            if elapsed > IDLE_THRESHOLD:
+                logger.info(f"Idle monitor triggered (silent for {elapsed:.1f}s)")
+                assistant.reset_activity() # prevent double trigger while processing
+
+                # Generate a spontaneous line from the LLM based on user history and persona
+                pulse_prompt = (
+                    "The user has been silent for a while. As AURA, initiate a conversation, "
+                    "share a mischievous observation about the silence, or ask a weird question. "
+                    "Keep it completely in character."
+                )
+                
+                try:
+                    # Use a fresh child context for the one-off spontaneity check 
+                    # so we don't permanently alter the main conversation history with system instructions
+                    pulse_ctx = assistant.chat_ctx.copy()
+                    pulse_ctx.append(role="system", text=pulse_prompt)
+                    
+                    response = await llm_plugin.chat(pulse_ctx)
+                    line = response.choices[0].message.text_content
+                    
+                    if line:
+                        logger.info(f"Sending spontaneous line: '{line[:40]}...'")
+                        await session.say(line)
+                        # Commit it to context so she remembers she said it
+                        assistant.chat_ctx.append(role="assistant", text=line)
+                except Exception as e:
+                    logger.error(f"Failed to generate spontaneous line: {e}")
+
+    pulse_task = asyncio.create_task(spontaneous_pulse())
+
     await session.start(
         room=ctx.room,
-        agent=AURAAssistant(
-            conversation_id=conversation_id,
-            user_identity=user_identity,
-            system_prompt=system_prompt,
-            initial_chat_ctx=initial_chat_ctx,
-        ),
+        agent=assistant,
         room_options=room_io.RoomOptions(
             audio_input=room_io.AudioInputOptions(
                 noise_cancellation=lambda params: (
@@ -336,86 +487,36 @@ async def voice_session(ctx: agents.JobContext):
         ),
     )
 
-    if _vtube_is_connected:
+    if vtube_connected:
         await VTUBE.set_expression("smile")
 
-    instruction = (
-        "Greet the user warmly as someone you already know. "
-        "Briefly acknowledge you remember them. Keep it to 1-2 sentences."
+    greeting = (
+        "[smile] Yahoo! Great to see you again! What are we getting up to today?"
         if is_returning_user else
-        "Greet the user with a polite and helpful AURA introduction. "
-        "Example: 'Hello! I'm AURA, your personal AI assistant. How can I help you today?'"
+        "[smile] Yahoo! Hey there! I'm AURA, your personal AI companion. What can I do for you today?"
     )
-    
-    await session.generate_reply(instructions=instruction)
-
-class AURAAssistant(Agent):
-    def __init__(self, conversation_id=None, user_identity: str = "aura-user", system_prompt: str = AURA_BASE_PROMPT, initial_chat_ctx: "llm.ChatContext | None" = None,) -> None:
-        super().__init__(instructions=system_prompt, chat_ctx=initial_chat_ctx)
-        self._conversation_id     = conversation_id
-        self._user_identity       = user_identity
-        self._vtube_connected     = False
-        self._last_user_text      = ""
-
-    async def on_enter(self):
-        self._vtube_connected = await VTUBE.connect()
-
-    async def on_exit(self):
-        await VTUBE.disconnect()
-        BRIDGE.set_room(None)
-
-        # Extract the long term memory and save memory to database if session ended
-        if self._conversation_id and OPENROUTER_KEY:
-            logger.info(f"Session ended for '{self._user_identity}'. Extracting long-term memory...")
-            asyncio.create_task(
-                extract_and_save_memory(
-                    identity=self._user_identity,
-                    conversation_id=self._conversation_id,
-                    openrouter_key=OPENROUTER_KEY,
-                )
-            )
 
-    # Set last user message when user done talking
-    async def on_user_turn_completed(self, turn_ctx: llm.ChatContext, new_message: llm.ChatMessage) -> None:
-        self._last_user_text = new_message.text_content or ""
-        await super().on_user_turn_completed(turn_ctx, new_message)
+    if not _tts_ready.is_set():
+        logger.info("Waiting for TTS warmup before greeting...")
+        await asyncio.get_event_loop().run_in_executor(None, lambda: _tts_ready.wait(timeout=120))
 
-    async def llm_chat(self, chat_ctx, **kwargs):
-        """Override to detect emotion and trigger expressions"""
-        # Start of turn: clear animation logs to allow fresh winks/tongues
-        await VTUBE.start_turn()
+    if ctx.room.remote_participants:
+        logger.info("TTS ready, generating greeting")
+        try:
+            await session.say(greeting)
+        except RuntimeError as e:
+            logger.warning(f"Could not deliver greeting: {e}")
 
-        # Get response from parent
-        async for chunk in super().llm_chat(chat_ctx, **kwargs):
-            yield chunk
-        
-        # Emotion detection is now handled per-sentence in aura_tts.py
+    # Wait for session to finish
+    try:
+        await asyncio.Event().wait()
+    except asyncio.CancelledError:
         pass
-
-    # Set last assistant message when assistant done talking and add to database
-    async def on_agent_speech_committed(self, msg: llm.ChatMessage) -> None:
-        assistant_text = msg.text_content or ""
-
-        if self._conversation_id and self._last_user_text and assistant_text:
-            try:
-                emotions = VTUBE.detect_emotion(assistant_text)
-                emotion  = emotions[0] if emotions else "neutral"
-
-                await memory_service.add_interaction(
-                    conversation_id=self._conversation_id,
-                    user_text=self._last_user_text,
-                    assistant_text=assistant_text,
-                    user_emotion="neutral",
-                    assistant_emotion=emotion,
-                )
-                logger.debug(
-                    f"Memory saved | user: '{self._last_user_text[:50]}' "
-                    f"| aura: '{assistant_text[:50]}'"
-                )
-            except Exception as error:
-                logger.error(f"Memory Save Failed: {error}")
-
-            self._last_user_text = ""
+    finally:
+        pulse_task.cancel()
+        await stt_session.close()
+if __name__ == "__main__":
+    agents.cli.run_app(server)
 
 if __name__ == "__main__":
     agents.cli.run_app(server)
\ No newline at end of file
diff --git a/voice-agent/aura_tts.py b/voice-agent/aura_tts.py
index 87a3bec..2fd3ad2 100644
--- a/voice-agent/aura_tts.py
+++ b/voice-agent/aura_tts.py
@@ -63,6 +63,38 @@ class _TTSOptions:
     max_seq_len: int
 
 
+def _split_text(text: str, max_chars: int = 120) -> list[str]:
+    """Recursively split text into chunks smaller than max_chars, preferably at punctuation or spaces."""
+    if len(text) <= max_chars:
+        return [text]
+    
+    # Try splitting at logical points: . ! ? 。 ！ ？
+    split_points = ".!?。！？"
+    best_idx = -1
+    for i, char in enumerate(text[:max_chars]):
+        if char in split_points:
+            best_idx = i
+            
+    # If no punctuation, try space
+    if best_idx == -1:
+        for i in range(max_chars, 0, -1):
+            if text[i] == ' ':
+                best_idx = i
+                break
+                
+    # If still no luck, hard cut (fallback)
+    if best_idx == -1:
+        best_idx = max_chars
+        
+    chunk = text[:best_idx + 1].strip()
+    rest = text[best_idx + 1:].strip()
+    
+    if not rest:
+        return [chunk]
+        
+    return [chunk] + _split_text(rest, max_chars)
+
+
 class AuraTTS(tts.TTS):
     """
     Custom LiveKit TTS plugin wrapping the faster-qwen3-tts local model.
@@ -74,10 +106,10 @@ def __init__(
         *,
         model_name: str = "Qwen/Qwen3-TTS-12Hz-0.6B-Base",
         ref_audio: str,
-        ref_text: str,
+        ref_text: str = "",
         language: str = "English",
         dtype: torch.dtype = torch.bfloat16,
-        max_seq_len: int = 384,  # Further reduced for 6GB GPUs (from 512)
+        max_seq_len: int = 512,  # Bumped from 384 for better headroom
     ):
         super().__init__(
             capabilities=tts.TTSCapabilities(streaming=True),
@@ -94,7 +126,7 @@ def __init__(
         )
         self._model: Optional[FasterQwen3TTS] = None
         self._model_lock = threading.Lock()
-        self._gen_lock = threading.Lock()  # Serialize GPU inference (CUDA graphs can't run concurrently)
+        self._gen_lock = threading.Lock()  # Serialize GPU inference
 
     def _ensure_model(self):
         """Lazy-load the model on first use (thread-safe, loads only once)."""
@@ -107,27 +139,20 @@ def _ensure_model(self):
             gc.collect()
             if torch.cuda.is_available():
                 torch.cuda.empty_cache()
-            if hasattr(torch.backends, "mps") and torch.backends.mps.is_available():
-                torch.mps.empty_cache()
             logger.info(f"Loading FasterQwen3TTS: {self._opts.model_name} (max_seq_len={self._opts.max_seq_len})")
             self._model = FasterQwen3TTS.from_pretrained(
                 self._opts.model_name,
                 dtype=self._opts.dtype,
                 max_seq_len=self._opts.max_seq_len,
             )
+            # Warmup is handled inside ensure_model to ensure graphs are ready
+            self._model._warmup(64)
             logger.info("FasterQwen3TTS loaded and ready!")
 
     def warmup(self):
-        """Run a short dummy generation to trigger CUDA graph capture at boot."""
+        """Run a dummy generation to trigger CUDA graph capture."""
         self._ensure_model()
-        logger.info("Warming up TTS with dummy generation...")
-        with self._gen_lock:
-            self._model.generate_voice_clone(
-                text="Hello.",
-                ref_audio=self._opts.ref_audio,
-                ref_text=self._opts.ref_text,
-                language="English",
-            )
+        # model._warmup is called in _ensure_model already, but we log here
         logger.info("TTS warmup complete — CUDA graphs ready!")
 
     def _generate_audio(self, text: str) -> bytes:
@@ -141,25 +166,31 @@ def _generate_audio_with_lang(self, text: str, language: str) -> bytes:
         if not text or not text.strip():
             return b""
 
-        # Budget: Japanese ≈ 4 chars/s, English ≈ 12 chars/s. 3× safety, min 2 s.
+        # Budget: Japanese ≈ 4 chars/s, English ≈ 12 chars/s. 2.5× safety.
+        # Ensure max_new_tokens is at least 1s (12 tokens) and doesn't hit context wall
         chars_per_sec = 4.0 if language == "Japanese" else 12.0
-        max_new_tokens = max(24, int(len(text) / chars_per_sec * 3.0 * 12))
+        max_new_tokens = int(len(text) / chars_per_sec * 2.5 * 12)
+        max_new_tokens = max(12, min(max_new_tokens, self._opts.max_seq_len - 100))
 
         with self._gen_lock:
-            audio_np, sample_rate = self._model.generate_voice_clone(
-                text=text,
-                ref_audio=self._opts.ref_audio,
-                ref_text=self._opts.ref_text,
-                language=language,
-                max_new_tokens=max_new_tokens,
-                append_silence=False,
-                repetition_penalty=1.15,
-            )
-            audio_data = _trim_silence(audio_np[0])
-
-            # Convert float32 -> int16 PCM bytes
-            audio_int16 = (audio_data * 32767).clip(-32768, 32767).astype(np.int16)
-            return audio_int16.tobytes()
+            try:
+                audio_np, sample_rate = self._model.generate_voice_clone(
+                    text=text,
+                    ref_audio=self._opts.ref_audio,
+                    ref_text=self._opts.ref_text,
+                    language=language,
+                    max_new_tokens=max_new_tokens,
+                    append_silence=False,
+                    repetition_penalty=1.15,
+                )
+                audio_data = _trim_silence(audio_np[0])
+
+                # Convert float32 -> int16 PCM bytes
+                audio_int16 = (audio_data * 32767).clip(-32768, 32767).astype(np.int16)
+                return audio_int16.tobytes()
+            except Exception as e:
+                logger.error(f"TTS generation failed: {e}")
+                return b""
 
     def synthesize(self, text: str, *, conn_options=None) -> "tts.ChunkedStream":
         return _AuraChunkedStream(self, text, self._opts, conn_options)
@@ -214,17 +245,8 @@ async def _run(self, output_emitter):
             stream=False,
         )
 
-        # Buffer incoming text tokens into sentences
-        # Use a custom bilingual sentence splitter that handles BOTH
-        # English (.!?) AND Japanese (。！？) punctuation.
-        tokenizer = tokenize.basic.SentenceTokenizer(
-            min_sentence_len=3,
-            # Custom delimiters: standard + Japanese full-width punctuation
-        )
+        tokenizer = tokenize.basic.SentenceTokenizer(min_sentence_len=3)
         token_stream = tokenizer.stream()
-        
-        # Track pending reset task so we can cancel it when a new sentence starts
-        _pending_reset: Optional[asyncio.Task] = None
 
         async def _process_input():
             """Read text from the input channel and push to the tokenizer."""
@@ -232,111 +254,82 @@ async def _process_input():
                 if isinstance(data, self._FlushSentinel):
                     token_stream.flush()
                 else:
-                    # Replace Japanese sentence-ending punctuation with ASCII equivalents
-                    # so the SentenceTokenizer can split on them properly
-                    text = data
-                    text = text.replace('。', '. ')
-                    text = text.replace('！', '! ')
-                    text = text.replace('？', '? ')
+                    text = data.replace('。', '. ').replace('！', '! ').replace('？', '? ')
                     token_stream.push_text(text)
             token_stream.end_input()
 
         async def _synthesize():
-            """Read complete sentences from the tokenizer and synthesize."""
-            nonlocal _pending_reset
-            
+            """Read complete sentences from the tokenizer and synthesize with recursive chunking."""
             async for ev in token_stream:
                 raw_sentence = ev.token
                 
-                # Detect if the sentence is primarily Japanese
-                has_japanese = any('\u3040' <= char <= '\u30ff' or '\u4e00' <= char <= '\u9fff' for char in raw_sentence)
-                lang = "Japanese" if has_japanese else "English"
+                # BREAK LONG SENTENCES INTO PIECES to avoid TTS glitches and hit max context
+                text_chunks = _split_text(raw_sentence, max_chars=130)
 
-                # Clean sentence for TTS
-                sentence = VTUBE.format_for_tts(raw_sentence)
-                
-                # Strip trailing dashes and tildes that TTS speaks as "minus"
-                sentence = sentence.rstrip('-~～')
-                sentence = sentence.strip()
-                
-                # SAFETY: Skip if sentence contains NO alphanumeric characters (prevents runaway loops)
-                if not any(c.isalnum() for c in sentence):
-                    continue
-
-                # Generate audio and calculate duration
-                # PCM 16-bit means 2 bytes per sample
-                loop = asyncio.get_event_loop()
-                try:
-                    pcm_bytes = await loop.run_in_executor(
-                        None, self._tts_instance._generate_audio_with_lang, sentence, lang
-                    )
+                for chunk in text_chunks:
+                    # Detect if the chunk is primarily Japanese
+                    has_japanese = any('\u3040' <= char <= '\u30ff' or '\u4e00' <= char <= '\u9fff' for char in chunk)
+                    lang = "Japanese" if has_japanese else "English"
+
+                    # Clean sentence for TTS
+                    sentence = VTUBE.format_for_tts(chunk).rstrip('-~～').strip()
                     
-                    if not pcm_bytes:
+                    # SAFETY: Skip if sentence contains NO alphanumeric characters
+                    if not any(c.isalnum() for c in sentence):
+                        output_emitter.push(np.zeros(int(1.0 * SAMPLE_RATE), dtype=np.int16).tobytes())
                         continue
+
+                    loop = asyncio.get_event_loop()
+                    try:
+                        pcm_bytes = await loop.run_in_executor(
+                            None, self._tts_instance._generate_audio_with_lang, sentence, lang
+                        )
                         
-                    duration = len(pcm_bytes) / (SAMPLE_RATE * NUM_CHANNELS * 2)
-                    
-                    # SAFETY: Cap audio at 15 seconds per sentence to prevent TTS runaway
-                    MAX_SENTENCE_DURATION = 15.0
-                    if duration > MAX_SENTENCE_DURATION:
-                        logger.warning(f"TTS generated {duration:.1f}s for '{sentence[:30]}' - truncating to {MAX_SENTENCE_DURATION}s")
-                        max_bytes = int(MAX_SENTENCE_DURATION * SAMPLE_RATE * NUM_CHANNELS * 2)
-                        pcm_bytes = pcm_bytes[:max_bytes]
-                        duration = MAX_SENTENCE_DURATION
-
-                    # Virtual Playhead syncing for TTS->VTube Expressions
-                    # LiveKit queues audio and plays it sequentially, but we generate it much faster than real-time.
-                    # If we trigger expressions immediately, they fall completely out-of-sync with the audio.
-                    now = time.time()
-                    if not hasattr(self, '_playhead') or self._playhead < now:
-                        self._playhead = now
+                        if not pcm_bytes:
+                            continue
+                            
+                        duration = len(pcm_bytes) / (SAMPLE_RATE * NUM_CHANNELS * 2)
                         
-                    self._reset_token = getattr(self, '_reset_token', 0) + 1
-                    current_token = self._reset_token
+                        # Virtual Playhead syncing
+                        now = time.time()
+                        if not hasattr(self, '_playhead') or self._playhead < now:
+                            self._playhead = now
+                            
+                        self._reset_token = getattr(self, '_reset_token', 0) + 1
+                        current_token = self._reset_token
+                            
+                        delay_until_play = self._playhead - now
+                        self._playhead += duration
                         
-                    delay_until_play = self._playhead - now
-                    self._playhead += duration
-                    
-                    emotions = VTUBE.detect_emotion(raw_sentence)
-                    
-                    async def _sync_expression(em_list, delay_start, dur, token):
-                        try:
-                            if delay_start > 0:
-                                await asyncio.sleep(delay_start)
-
-                            if em_list:
-                                # Fire both simultaneously — BRIDGE never waits for VTS's sleeps
-                                await asyncio.gather(
-                                    VTUBE.set_expression(em_list),
-                                    BRIDGE.send_expression(em_list, dur),
-                                )
-
-                            await asyncio.sleep(dur + 0.3)  # grace period after audio
-
-                            # Only clear to neutral if we are STILL the very last scheduled sentence
-                            if getattr(self, '_reset_token', -1) == token:
-                                await asyncio.gather(
-                                    VTUBE.reset_to_neutral(),
-                                    BRIDGE.send_neutral(),
-                                )
-                        except Exception as e:
-                            logger.debug(f"VTS sync error (non-fatal): {e}")
-
-                    # Trigger emotions perfectly sequenced with actual audio playback!
-                    asyncio.create_task(_sync_expression(emotions, delay_until_play, duration, current_token))
-
-                    output_emitter.push(pcm_bytes)
-                    logger.debug(f"Synthesized {duration:.2f}s audio for: {sentence} (Lang: {lang})")
-                    
-                except Exception as e:
-                    logger.error(f"TTS generation failed for sentence '{sentence}': {e}")
-                    import gc
-                    gc.collect()
-                    if torch.cuda.is_available():
-                        torch.cuda.empty_cache()
-                    if hasattr(torch.backends, "mps") and torch.backends.mps.is_available():
-                        torch.mps.empty_cache()
-
+                        emotions = VTUBE.detect_emotion(chunk)
+                        
+                        async def _sync_expression(em_list, delay_start, dur, token):
+                            try:
+                                if delay_start > 0:
+                                    await asyncio.sleep(delay_start)
+
+                                if em_list:
+                                    await asyncio.gather(
+                                        VTUBE.set_expression(em_list),
+                                        BRIDGE.send_expression(em_list, dur),
+                                    )
+
+                                await asyncio.sleep(dur + 0.3)
+                                if getattr(self, '_reset_token', -1) == token:
+                                    await asyncio.gather(
+                                        VTUBE.reset_to_neutral(),
+                                        BRIDGE.send_neutral(),
+                                    )
+                            except Exception as e:
+                                logger.debug(f"Sync error: {e}")
+
+                        asyncio.create_task(_sync_expression(emotions, delay_until_play, duration, current_token))
+                        output_emitter.push(pcm_bytes)
+                        logger.debug(f"Synthesized {duration:.2f}s for chunk: '{sentence[:50]}...'")
+                        
+                    except Exception as e:
+                        logger.error(f"TTS chunk generation failed: {e}")
+                        if torch.cuda.is_available():
+                            torch.cuda.empty_cache()
 
-        # Run input processing and synthesis concurrently
         await asyncio.gather(_process_input(), _synthesize())
diff --git a/voice-agent/vtube_controller.py b/voice-agent/vtube_controller.py
index 2788422..b01cac0 100644
--- a/voice-agent/vtube_controller.py
+++ b/voice-agent/vtube_controller.py
@@ -27,12 +27,8 @@ def __init__(self):
         self._connected_loop = None  # Track which event loop owns the VTS connection
         self._vts_lock = asyncio.Lock()  # Serialize all VTS API requests
         self.active_expressions = {}  # name -> hotkey_id, tracks which expressions are currently active
-        
-        if not self.is_enabled:
-            logger.info("VTube Studio integration is DISABLED via .env")
-            return
-        
-        # Expression mapping (matches hotkey names or filenames in VTube Studio)
+
+        # Expression mapping — always initialized so detect_emotion works even when VTube is disabled
         self.expressions = {
             "sad": "Sad",
             "smile": "Smile",
@@ -59,25 +55,8 @@ def __init__(self):
             "ウインク": "wink",
             "べー": "tongue"
         }
-        self.expression_hotkey_map = {}
-        
-        # Track raw parameter values to restore them later: parameter_name -> last_injected_value
-        self.injected_parameters = {}
-        
-        # Prevent repetitive animations (like double-winking) in a single turn
-        self.turn_animation_log = set() # tags triggered this turn
-        
-        # Mapping for reset logic: parameter -> trigger_feature
-        self.PARAM_TO_FEATURE = {
-            "EyeOpenLeft": "wink",
-            "EyeOpenRight": "wink",
-            "BrowLeftY": "wink",
-            "MouthSmile": "wink",
-            "TongueOut": "tongue",
-            "MouthOpen": "tongue"
-        }
-        
-        # Bilingual emotion keywords
+
+        # Bilingual emotion keywords — always initialized so detect_emotion works without VTube
         self.emotion_keywords = {
             "sad": [
                 # English
@@ -120,7 +99,29 @@ def __init__(self):
             "wink": ["wink", "blink", "winked", "ウインク"],
             "tongue": ["tongue", "bleh", "cheeky", "sticking out", "べー"]
         }
-    
+
+        if not self.is_enabled:
+            logger.info("VTube Studio integration is DISABLED via .env")
+            return
+
+        self.expression_hotkey_map = {}
+
+        # Track raw parameter values to restore them later: parameter_name -> last_injected_value
+        self.injected_parameters = {}
+
+        # Prevent repetitive animations (like double-winking) in a single turn
+        self.turn_animation_log = set()
+
+        # Mapping for reset logic: parameter -> trigger_feature
+        self.PARAM_TO_FEATURE = {
+            "EyeOpenLeft": "wink",
+            "EyeOpenRight": "wink",
+            "BrowLeftY": "wink",
+            "MouthSmile": "wink",
+            "TongueOut": "tongue",
+            "MouthOpen": "tongue"
+        }
+
     async def connect(self):
         """Connect to VTube Studio with robust re-authentication."""
         if not self.is_enabled:
@@ -440,9 +441,6 @@ async def reset_to_neutral(self):
     
     def detect_emotion(self, text):
         """Bilingual detection: Looks for explicit tags [tag1, tag2] first, then falls back to keywords."""
-        if not self.is_enabled:
-            return []
-            
         text_lower = text.lower()
         
         # 1. Look for explicit tags in brackets [happy, pupil_shrink]

From b6094b5e49c5b5f2ae20826616acc66d681661e1 Mon Sep 17 00:00:00 2001
From: Raygama <daffaraygama55@gmail.com>
Date: Fri, 10 Apr 2026 20:39:33 +0700
Subject: [PATCH 2/3] Add streaming chat + async LLM/provider plumbing

Expose real-time streaming and async LLM support across backend and frontend.

Backend:
- chat API: add SSE streamingResponse for streaming conversations, include identity and stream flags, emit emotion then incremental text deltas, persist interactions asynchronously.
- make emotion and generation nodes async and integrate immediate user-interaction persistence; scrub bracketed tokens before storing.
- LLMService: convert generate to async and add stream helper that proxies provider streams.
- Provider registry: make generate async, add stream() to route streaming providers, use asyncio.to_thread for sync provider.generate with retry/backoff, and use async sleeps.
- MemoryService: improved embedding provider logging, allow nullable assistant_text on add_interaction, persist user/assistant chunks safely, and add get_long_term_memories for RAG context.
- SettingsService: add simple in-memory caching for settings and API keys with TTL and invalidation on updates.
- Providers/openai_compat: handle streaming chunks more robustly and filter internal 'reasoning' tokens.
- General: better error logging and stream error handling.

Frontend/UI:
- Updated AvatarRenderer with numerous animation/behavior fixes (mouth lock during tongue, blink/expression handling, scale adjustments) and smaller style/format cleanups.
- CallOverlay: refactor to use shared getOrCreateIdentity, accept conversationId, improve LiveKit connect flow, UI redesign for centered avatar and controls, and refactors for robustness.
- ChatFeed: visual redesign for empty state and message list, improved tool display and bubble styling.
- package.json: bump pixi.js to ^8.17.1.
- Added new dashboard components and user utility (Presence, SlideOver, lib/user.js) and other UI tweaks.

Voice agent:
- Multiple voice-agent scripts updated (agent, tts, token server, memory, vtube controller, deps) to align with backend changes and improve runtime behavior.

Why: these changes enable low-latency streaming responses from LLMs to the dashboard, improve persistence and memory retrieval for RAG, reduce blocking sync calls, and refresh the UI/avatar experience for live interactions.
---
 ai-service/app/api/v1/chat.py                 | 110 +++++++-
 ai-service/app/models/chat.py                 |   2 +
 .../app/services/brain/nodes/emotion.py       |   6 +-
 .../app/services/brain/nodes/generate.py      |  81 ++++--
 ai-service/app/services/brain/state.py        |   3 +-
 ai-service/app/services/llm.py                |   8 +-
 ai-service/app/services/memory_service.py     |  54 +++-
 .../app/services/providers/openai_compat.py   |  23 +-
 ai-service/app/services/providers/registry.py |  62 ++++-
 ai-service/app/services/settings_service.py   |  40 ++-
 dashboard/package.json                        |   4 +-
 dashboard/src/components/AvatarRenderer.jsx   |  97 +++----
 dashboard/src/components/CallOverlay.jsx      | 148 +++++------
 dashboard/src/components/ChatFeed.jsx         |  63 +++--
 dashboard/src/components/ChatHeader.jsx       |  42 +--
 dashboard/src/components/ChatInput.jsx        |  44 ++--
 dashboard/src/components/KnowledgeBase.jsx    |  69 +++--
 dashboard/src/components/PersonalityTuner.jsx | 173 +++++++------
 dashboard/src/components/Presence.jsx         | 175 +++++++++++++
 dashboard/src/components/Sidebar.jsx          |  63 ++---
 dashboard/src/components/SlideOver.jsx        |  47 ++++
 dashboard/src/components/StatusCards.jsx      | 153 ++++++-----
 dashboard/src/components/SystemLogs.jsx       |  10 +-
 dashboard/src/lib/user.js                     |  18 ++
 dashboard/src/pages/AdminPage.jsx             |  89 ++++++-
 dashboard/src/pages/ChatPage.jsx              | 180 ++++++++++---
 voice-agent/agent.py                          | 245 ++++++++++--------
 voice-agent/aura_tts.py                       | 138 +++++-----
 voice-agent/environment.yml                   |   2 +
 voice-agent/memory_service.py                 |  58 +++--
 voice-agent/requirements.txt                  |   2 +
 voice-agent/token_server.py                   |  13 +-
 voice-agent/vtube_controller.py               |   9 +-
 33 files changed, 1532 insertions(+), 699 deletions(-)
 create mode 100644 dashboard/src/components/Presence.jsx
 create mode 100644 dashboard/src/components/SlideOver.jsx
 create mode 100644 dashboard/src/lib/user.js

diff --git a/ai-service/app/api/v1/chat.py b/ai-service/app/api/v1/chat.py
index 725912f..d42f2ce 100644
--- a/ai-service/app/api/v1/chat.py
+++ b/ai-service/app/api/v1/chat.py
@@ -1,7 +1,8 @@
-import re
+import json
 import logging
-
+import asyncio
 from fastapi import APIRouter, HTTPException
+from fastapi.responses import StreamingResponse
 from app.services.memory_service import memory_service
 from app.models.chat import ChatRequest, ChatResponse
 from app.services.brain.graph import brain
@@ -10,7 +11,7 @@
 router = APIRouter()
 logger = logging.getLogger(__name__)
 
-@router.post("", response_model=ChatResponse)
+@router.post("")
 async def chat(request: ChatRequest):
     # Run Graph
     try:
@@ -24,16 +25,98 @@ async def chat(request: ChatRequest):
             "messages":   [HumanMessage(content=request.message)],
             "emotion":    "neutral",
             "conversation_id": conversation_id,
+            "identity": request.identity or "anonymous",
+            "stream": request.stream
         }
 
         config = {"configurable": {"thread_id": conversation_id}}
-        result = brain.invoke(initial_state, config=config)
+
+        if request.stream:
+            async def event_generator():
+                # 1. Start with emotion detection (sequential but fast)
+                try:
+                    from app.services.brain.nodes.emotion import detect_emotion
+                    emotion_res = await detect_emotion(initial_state)
+                    detected_emotion = emotion_res.get("emotion", "neutral")
+                    yield f"data: {json.dumps({'emotion': detected_emotion})}\n\n"
+                except Exception as ex:
+                    logger.warning(f"Emotion detection failed: {ex}")
+                    detected_emotion = "neutral"
+
+                # 2. Setup the full context for generation
+                from app.services.brain.nodes.generate import session_history_window
+                from app.services.llm import llm_service
+                from app.services.persona import persona_engine
+                from app.services.settings_service import settings_service
+                from datetime import datetime
+                from uuid import UUID
+
+                # Fetch context
+                user_msg = request.message
+                history_model, memories, facts = await asyncio.gather(
+                    memory_service.get_history(UUID(conversation_id), session_history_window),
+                    memory_service.search(query=user_msg, limit=3),
+                    memory_service.get_long_term_memories(identity=request.identity or "anonymous", limit=5),
+                )
+                
+                # Build Persona
+                db_settings = settings_service.get_settings()
+                custom_sys = (db_settings.get("system_prompt") or "").strip()
+                persona = custom_sys if custom_sys else persona_engine.get_persona()
+                time_str = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
+
+                system_content = (
+                    "You are AURA (Advanced Universal Responsive Avatar), steward of the ASE Lab.\n\n"
+                    f"{persona}\n\n"
+                    "IMPORTANT: Do NOT include bracketed emotions like [happy] or [sad] in your response content. "
+                    "I have already detected your emotion separately.\n\n"
+                    f"**Context:**\n- Current Time: {time_str}"
+                )
+                if facts: system_content += f"\nWhat I know about you:\n{facts}\n"
+                if memories:
+                    memory_block = "\n".join(f"- {m}" for m in memories)
+                    system_content += f"\nRelevant past snippets:\n{memory_block}\n"
+
+                messages_format = [{"role":"system", "content":system_content}] + history_model + [{"role":"user", "content":user_msg}]
+
+                import re
+                full_text = ""
+                # 3. Stream from the registry directly
+                from app.services.providers.base import TextDelta
+                async for chunk in llm_service.stream(messages_format):
+                    # Only yield incremental deltas to the dashboard
+                    if isinstance(chunk, TextDelta):
+                        txt = chunk.text
+                        full_text += txt
+                        yield f"data: {json.dumps({'text': txt})}\n\n"
+                    # StreamDone is handled silently for background persistence below
+
+                # 4. Final sync/persistence - SCRUBBED
+                scrubbed_final = re.sub(r'\[.*?\]', '', full_text).strip()
+                asyncio.create_task(memory_service.add_interaction(
+                    conversation_id=UUID(conversation_id),
+                    user_text=user_msg,
+                    assistant_text=scrubbed_final,
+                    user_emotion=detected_emotion,
+                    assistant_emotion="neutral"
+                ))
+                asyncio.create_task(memory_service.store(
+                    text=f"User: {user_msg} \n AURA: {scrubbed_final}",
+                    metadata={"conversation_id": str(conversation_id)}
+                ))
+
+                yield "data: [DONE]\n\n"
+
+            return StreamingResponse(event_generator(), media_type="text/event-stream")
+
+        # Non-streaming fallback
+        result = await brain.ainvoke(initial_state, config=config)
         
         # Extract response
         last_msg = result["messages"][-1].content
         emotion = result.get("emotion", "neutral")
         
-        # Look for tool calls in the last turn
+        # Look for tool calls
         tools_used = []
         for msg in result["messages"]:
             if hasattr(msg, "tool_calls") and msg.tool_calls:
@@ -43,22 +126,21 @@ async def chat(request: ChatRequest):
                         "args": tc.get("args", {})
                     })
                     
-        # Clean tags
-        text = last_msg
-        if text.startswith("["):
-             match = re.match(r'^\[(.*?)\]', text)
-             if match:
-                 text = text[match.end():].strip()
-
         return ChatResponse(
-            text=text,
+            text=last_msg,
             emotion=emotion,
             conversation_id=conversation_id,
             tools_used=tools_used if tools_used else None
         )
     
     except Exception as e:
-        logger.error(f"Chat error: {e}")
+        logger.error(f"Chat error: {e}", exc_info=True)
+        # If it was a stream request, we should yield an error event
+        if request.stream:
+             return StreamingResponse(
+                  iter([f"data: {json.dumps({'text': f'Brain Freeze: {str(e)}', 'emotion': 'confused'})}\n\n"]),
+                  media_type="text/event-stream"
+             )
 
         return ChatResponse(
             text=f"Brain Freeze: {str(e)}",
diff --git a/ai-service/app/models/chat.py b/ai-service/app/models/chat.py
index 6ffcc57..c55136a 100644
--- a/ai-service/app/models/chat.py
+++ b/ai-service/app/models/chat.py
@@ -4,6 +4,8 @@
 class ChatRequest(BaseModel):
     message: str
     conversation_id: Optional[str] = None 
+    identity: Optional[str] = None
+    stream: bool = False
 
 class ChatResponse(BaseModel):
     text: str
diff --git a/ai-service/app/services/brain/nodes/emotion.py b/ai-service/app/services/brain/nodes/emotion.py
index 66cd899..ec37427 100644
--- a/ai-service/app/services/brain/nodes/emotion.py
+++ b/ai-service/app/services/brain/nodes/emotion.py
@@ -2,7 +2,7 @@
 from app.services.llm import llm_service
 
 # Node to detect emotion 
-def detect_emotion(state: BrainState) -> dict:
+async def detect_emotion(state: BrainState) -> dict:
     # Get last user message
     last_message = state["messages"][-1].content 
    
@@ -13,7 +13,7 @@ def detect_emotion(state: BrainState) -> dict:
     """
     
     # Call LLM to detect emotion
-    emotion = llm_service.generate([{"role": "system", "content": prompt}])
+    response = await llm_service.generate([{"role": "system", "content": prompt}])
 
     # Return detected emotion
-    return {"emotion": emotion["emotion"].strip().lower()}
\ No newline at end of file
+    return {"emotion": response.get("emotion", "neutral").strip().lower()}
\ No newline at end of file
diff --git a/ai-service/app/services/brain/nodes/generate.py b/ai-service/app/services/brain/nodes/generate.py
index 0c1cfad..8c0207a 100644
--- a/ai-service/app/services/brain/nodes/generate.py
+++ b/ai-service/app/services/brain/nodes/generate.py
@@ -11,10 +11,9 @@
 
 session_history_window = 9999
 
-def generate_response(state: BrainState) -> dict:
-    with concurrent.futures.ThreadPoolExecutor() as pool:
-        future = pool.submit(asyncio.run, generate(state))
-        return future.result()
+async def generate_response(state: BrainState) -> dict:
+    """Async wrapper for the generation node."""
+    return await generate(state)
 
 
 # Node to generate response based on persona, conversation history and detected emotion (convesation history not being tested yet)
@@ -45,46 +44,90 @@ async def generate(state: BrainState) -> dict:
     else:
         user_message = ""
 
-    # Load History
-    history_model, memories = await asyncio.gather(
+    # Load History & Long-term memories
+    history_model, memories, facts = await asyncio.gather(
         memory_service.get_history(conversation_id, session_history_window),
         memory_service.search(query=user_message, limit=3),
+        memory_service.get_long_term_memories(identity=state.get("identity", "anonymous"), limit=5),
     )
 
     history = history_model
 
-    # System Prompt
-    system_message = prompter.build("", context=None)[0]
+    # Save User message IMMEDIATELY to DB so it persists even if AI fails or disconnects
+    await memory_service.add_interaction(
+        conversation_id=conversation_id,
+        user_text=user_message,
+        assistant_text=None, # Update later
+        user_emotion=detected_emotion,
+        assistant_emotion=None
+    )
 
-    if memories:
-        memory_block = "\n".join(f"-{message}" for message in memories)
-        system_message = {
-            "role" : "system",
-            "content": (system_message["content"] + f"Ingatan sebelumnya: \n {memory_block}")
-        }
+    # System Prompt (Pulling from DB via settings_service)
+    from app.services.settings_service import settings_service
+    db_settings = settings_service.get_settings()
+    custom_sys = (db_settings.get("system_prompt") or "").strip()
     
+    from app.services.persona import persona_engine
+    persona = custom_sys if custom_sys else persona_engine.get_persona()
     
-    # Add system prompt with persona and current time
+    from datetime import datetime
+    time_str = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
+
+    system_content = (
+        "You are AURA (Advanced Universal Responsive Avatar), "
+        "the spirited AI steward of the ASE Lab.\n\n"
+        f"{persona}\n\n"
+        f"**Context:**\n- Current Time: {time_str}"
+    )
+
+    # Combine RAG (memories) and LTS (facts)
+    combined_memory = ""
+    if facts:
+        combined_memory += f"\nWhat I know about you:\n{facts}\n"
+    if memories:
+        memory_block = "\n".join(f"- {message}" for message in memories)
+        combined_memory += f"\nRelevant past snippets:\n{memory_block}\n"
+
+    if combined_memory:
+        system_content += f"\n\n**Memory Retrieval:**{combined_memory}"
+
+    system_message = {"role": "system", "content": system_content}
+    
+    # Build payload
     messages_format = [system_message] + history + current_message
 
+    # Check for stream request
+    is_stream = state.get("stream", False)
+
+    if is_stream:
+        # For streaming, we yield chunks. 
+        # But this is a node, so we return the final state but can use callbacks?
+        # Actually, chat.py will call brain.astream().
+        # We handle the stream here if we want to return the stream object, 
+        # but LangGraph nodes should return the update.
+        # So we update chat.py to use a different strategy.
+        pass
+
     # Generate response from LLM
-    response = llm_service.generate(messages_format)
+    response = await llm_service.generate(messages_format)
+    text = response.get("text", "")
     emotion = response.get("emotion", "neutral")
     
     await asyncio.gather(
+        # Complete the interaction in DB
         memory_service.add_interaction(
             conversation_id=conversation_id,
             user_text=user_message,
-            assistant_text=response["text"],
+            assistant_text=text,
             user_emotion=detected_emotion,
             assistant_emotion=emotion
         ),
 
         memory_service.store(
-            text=f"User: {user_message} \n AURA: {response['text']}",
+            text=f"User: {user_message} \n AURA: {text}",
             metadata={"conversation_id": str(conversation_id)},
         ),
     )
 
     # Return response
-    return {"messages": [AIMessage(content=response["text"])], "emotion": response["emotion"]}
\ No newline at end of file
+    return {"messages": [AIMessage(content=text)], "emotion": emotion}
\ No newline at end of file
diff --git a/ai-service/app/services/brain/state.py b/ai-service/app/services/brain/state.py
index c4047a7..bab52f5 100644
--- a/ai-service/app/services/brain/state.py
+++ b/ai-service/app/services/brain/state.py
@@ -7,4 +7,5 @@
 class BrainState(TypedDict):
     messages: Annotated[List[BaseMessage], operator.add]
     emotion: str
-    conversation_id: str
\ No newline at end of file
+    conversation_id: str
+    identity: str
\ No newline at end of file
diff --git a/ai-service/app/services/llm.py b/ai-service/app/services/llm.py
index 7546e88..0a1629c 100644
--- a/ai-service/app/services/llm.py
+++ b/ai-service/app/services/llm.py
@@ -5,25 +5,29 @@
 This class exists so existing callers (brain nodes, etc.) don't need to change.
 """
 import logging
+import asyncio
 from app.services.providers.registry import provider_registry
 
 logger = logging.getLogger(__name__)
 
 
 class LLMService:
-    def generate(
+    async def generate(
         self,
         messages: list,
         model: str | None = None,
         temperature: float | None = None,
         max_tokens: int | None = None,
     ) -> dict:
-        return provider_registry.generate(
+        return await provider_registry.generate(
             messages,
             model=model,
             temperature=temperature,
             max_tokens=max_tokens,
         )
 
+    def stream(self, *args, **kwargs):
+        return provider_registry.stream(*args, **kwargs)
+
 
 llm_service = LLMService()
diff --git a/ai-service/app/services/memory_service.py b/ai-service/app/services/memory_service.py
index 7fc761f..dbb4acd 100644
--- a/ai-service/app/services/memory_service.py
+++ b/ai-service/app/services/memory_service.py
@@ -44,21 +44,24 @@ def __init__(self):
                 api_key=settings.OPENAI_API_KEY,
                 model="text-embedding-3-small",
             )
-            logger.info("Embeddings: using OpenAI directly.")
+            logger.info("RAG: Using OpenAI Directly for semantic embeddings (best-in-class mapping).")
+            print("INFO: Memory Service using OpenAI Embeddings for search mapping.")
         elif settings.OPENROUTER_API_KEY:
             self.embeddings = OpenAIEmbeddings(
                 api_key=settings.OPENROUTER_API_KEY,
                 model="openai/text-embedding-3-small",
                 base_url="https://openrouter.ai/api/v1",
             )
-            logger.info("Embeddings: using OpenRouter.")
+            logger.info("RAG: Using OpenRouter for semantic embeddings.")
+            print("INFO: Memory Service using OpenRouter Embeddings.")
         elif _ollama_is_running(settings.OLLAMA_BASE_URL):
             self.embeddings = OpenAIEmbeddings(
                 api_key="ollama",
                 model="nomic-embed-text",
                 base_url=f"{settings.OLLAMA_BASE_URL}/v1",
             )
-            logger.info("Embeddings: using local Ollama (nomic-embed-text).")
+            logger.info("RAG: Using local Ollama for semantic embeddings.")
+            print("INFO: Memory Service using local Ollama Embeddings.")
         else:
             logger.warning(
                 "No embedding provider available "
@@ -104,26 +107,30 @@ async def get_conversation(self, conversation_id: UUID) -> Conversation | None:
             logger.error(f"Memory Service Get Conversation Error: {error}")
             return None
     
-    async def add_interaction(self, conversation_id: UUID, user_text: str, assistant_text: str, user_emotion: str = "neutral", assistant_emotion: str = "neutral") -> None:
+    async def add_interaction(self, conversation_id: UUID, user_text: str, assistant_text: str | None, user_emotion: str = "neutral", assistant_emotion: str = "neutral") -> None:
         if not self.client:
             return None
 
         try:
-            self.client.table("messages").insert([
-                CreateMesssage(
+            msgs = []
+            if user_text:
+                msgs.append(CreateMesssage(
                     conversation_id=conversation_id,
                     role="user",
                     content=user_text,
                     emotion=user_emotion,
-                ).model_dump(mode="json"),
+                ).model_dump(mode="json"))
 
-                CreateMesssage(
+            if assistant_text:
+                msgs.append(CreateMesssage(
                     conversation_id=conversation_id,
                     role="aura",
                     content=assistant_text,
                     emotion=assistant_emotion
-                ).model_dump(mode="json")
-            ]).execute() 
+                ).model_dump(mode="json"))
+            
+            if msgs:
+                self.client.table("messages").insert(msgs).execute() 
 
             self.client.table("conversations") \
                 .update({"updated_at": "now()"}) \
@@ -230,4 +237,31 @@ async def search(self, query: str, limit: int = 3) -> list[str]:
             return []
 
 
+    async def get_long_term_memories(self, identity: str, limit: int = 10) -> str:
+        """Retrieve the last N non-embedded 'user_facts' memories for this identity."""
+        if not self.client:
+            return ""
+
+        try:
+            result = self.client.table("memories") \
+                .select("content, created_at") \
+                .eq("metadata->>type", "user_facts") \
+                .eq("metadata->>identity", identity) \
+                .order("created_at", desc=True) \
+                .limit(limit) \
+                .execute()
+
+            rows = result.data or []
+            if not rows:
+                return ""
+
+            # Reverse to get chronological order in the prompt
+            facts_list = [row["content"] for row in reversed(rows)]
+            return "\n---\n".join(facts_list)
+
+        except Exception as e:
+            logger.error(f"Memory Service Get Long Term Memories error: {e}")
+            return ""
+
+
 memory_service = MemoryService()
\ No newline at end of file
diff --git a/ai-service/app/services/providers/openai_compat.py b/ai-service/app/services/providers/openai_compat.py
index 1def0a8..35c005e 100644
--- a/ai-service/app/services/providers/openai_compat.py
+++ b/ai-service/app/services/providers/openai_compat.py
@@ -133,12 +133,23 @@ async def stream(
             kwargs["tool_choice"] = "auto"
 
         try:
-            async with self._async_client.chat.completions.stream(**kwargs) as stream:
-                async for chunk in stream:
-                    delta = chunk.choices[0].delta.content or ""
-                    if delta:
-                        assembled += delta
-                        yield TextDelta(text=delta)
+            response = await self._async_client.chat.completions.create(**kwargs, stream=True)
+            async for chunk in response:
+                if not chunk.choices:
+                    continue
+                
+                delta = chunk.choices[0].delta
+                
+                # Handle reasoning tokens (DeepSeek R1 / OpenRouter)
+                # These are internal thoughts we don't want to show the user
+                reasoning = getattr(delta, "reasoning_content", None) or getattr(delta, "reasoning", None)
+                if reasoning:
+                    continue
+
+                if delta.content:
+                    txt = delta.content
+                    assembled += txt
+                    yield TextDelta(text=txt)
         except Exception as e:
             logger.error(f"[{self.name}] stream error: {e}")
 
diff --git a/ai-service/app/services/providers/registry.py b/ai-service/app/services/providers/registry.py
index 9a4cd9a..8d51c79 100644
--- a/ai-service/app/services/providers/registry.py
+++ b/ai-service/app/services/providers/registry.py
@@ -18,6 +18,7 @@
 from __future__ import annotations
 
 import logging
+import asyncio
 import os
 import random
 import time
@@ -64,7 +65,7 @@ def __init__(self):
 
     # ── Public API ────────────────────────────────────────────────────────────
 
-    def generate(
+    async def generate(
         self,
         messages: list[dict],
         *,
@@ -116,7 +117,7 @@ def generate(
 
             logger.info(f"[registry] trying {provider_name} / {actual_model}")
             try:
-                result = self._call_with_retry(provider, messages, **call_kwargs)
+                result = await self._call_with_retry(provider, messages, **call_kwargs)
                 if provider_name != primary:
                     logger.warning(f"[registry] fell back to {provider_name} (primary={primary} failed)")
                 return result
@@ -147,7 +148,57 @@ def generate(
             "tool_calls": None,
         }
 
-    def _call_with_retry(self, provider: LLMProvider, messages: list[dict], **kwargs) -> dict:
+    async def stream(
+        self,
+        messages: list[dict],
+        *,
+        model: str | None = None,
+        temperature: float | None = None,
+        max_tokens: int | None = None,
+        tools: list[dict] | None = None,
+    ) -> AsyncGenerator[TextDelta | StreamDone, None]:
+        from app.services.settings_service import settings_service
+
+        db = settings_service.get_settings()
+        keys = settings_service.get_api_keys()
+
+        actual_model       = model or db.get("model") or "deepseek/deepseek-v3.2"
+        actual_temp        = temperature if temperature is not None else float(db.get("temperature", 0.8))
+        actual_max_tokens  = max_tokens or int(db.get("max_tokens", 300))
+
+        configured_provider = (db.get("provider") or "auto").lower()
+        primary = (
+            configured_provider
+            if configured_provider != "auto"
+            else infer_provider(actual_model)
+        )
+
+        candidates = [primary] + [
+            p for p in _FALLBACK_ORDER
+            if p != primary and (p == "ollama" or self._pick_key(p, keys))
+        ]
+
+        # Note: Fallbacks for streaming are harder to implement gracefully mid-stream.
+        # We try the primary and first available.
+        for provider_name in candidates:
+            try:
+                provider = self._get_provider(provider_name, keys)
+                logger.info(f"[registry] streaming {provider_name} / {actual_model}")
+                
+                async for chunk in provider.stream(
+                    messages,
+                    model=actual_model,
+                    temperature=actual_temp,
+                    max_tokens=actual_max_tokens,
+                    tools=tools
+                ):
+                    yield chunk
+                return
+            except Exception as e:
+                logger.warning(f"[registry] stream failed for {provider_name}: {e}")
+                continue
+
+    async def _call_with_retry(self, provider: LLMProvider, messages: list[dict], **kwargs) -> dict:
         """
         Call provider.generate() with exponential backoff on RetryableError.
         Raises RetryableError if all attempts fail.
@@ -155,7 +206,8 @@ def _call_with_retry(self, provider: LLMProvider, messages: list[dict], **kwargs
         """
         for attempt in range(_MAX_ATTEMPTS):
             try:
-                return provider.generate(messages, **kwargs)
+                # Use thread pool for sync generate calls to keep registry async-friendly
+                return await asyncio.to_thread(provider.generate, messages, **kwargs)
             except NonRetryableError:
                 raise  # propagate immediately
             except RetryableError as e:
@@ -166,7 +218,7 @@ def _call_with_retry(self, provider: LLMProvider, messages: list[dict], **kwargs
                     f"[{provider.name}] attempt {attempt + 1}/{_MAX_ATTEMPTS} failed "
                     f"(status={e.status_code}): {e} — retrying in {delay:.1f}s"
                 )
-                time.sleep(delay)
+                await asyncio.sleep(delay)
 
     # ── Provider instantiation ────────────────────────────────────────────────
 
diff --git a/ai-service/app/services/settings_service.py b/ai-service/app/services/settings_service.py
index 9f8dfd4..a56dbe9 100644
--- a/ai-service/app/services/settings_service.py
+++ b/ai-service/app/services/settings_service.py
@@ -1,4 +1,5 @@
 import logging
+import time
 from supabase import create_client, Client
 from app.core.config import settings as app_settings
 
@@ -33,23 +34,43 @@ def __init__(self):
         self._client: Client | None = None
         if app_settings.SUPABASE_URL and app_settings.SUPABASE_SERVICE_KEY:
             self._client = create_client(app_settings.SUPABASE_URL, app_settings.SUPABASE_SERVICE_KEY)
+        
+        # Simple cache
+        self._cache = {}
+        self._cache_expiry = {
+            "settings": 0,
+            "keys": 0
+        }
+        self._TTL = 60 # seconds for settings
+        self._KEY_TTL = 5 # seconds for keys (re-check faster)
 
     def get_settings(self) -> dict:
         if not self._client:
             return dict(_DEFAULTS)
+        
+        now = time.time()
+        if "settings" in self._cache and now < self._cache_expiry["settings"]:
+            return self._cache["settings"]
+
         try:
             result = self._client.table("personality_settings").select("*").eq("id", 1).single().execute()
             if result.data:
-                return {**_DEFAULTS, **result.data}
+                settings = {**_DEFAULTS, **result.data}
+                self._cache["settings"] = settings
+                self._cache_expiry["settings"] = now + self._TTL
+                return settings
         except Exception as e:
             logger.warning(f"SettingsService.get_settings failed: {e}")
-        return dict(_DEFAULTS)
+        return self._cache.get("settings", dict(_DEFAULTS))
 
     def update_settings(self, patch: dict) -> dict:
         if not self._client:
             return dict(_DEFAULTS)
         try:
             result = self._client.table("personality_settings").update(patch).eq("id", 1).execute()
+            # Invalidate cache
+            if "settings" in self._cache:
+                del self._cache["settings"]
             if result.data:
                 return {**_DEFAULTS, **result.data[0]}
         except Exception as e:
@@ -59,19 +80,30 @@ def update_settings(self, patch: dict) -> dict:
     def get_api_keys(self) -> dict:
         if not self._client:
             return dict(_KEY_DEFAULTS)
+
+        now = time.time()
+        if "keys" in self._cache and now < self._cache_expiry["keys"]:
+            return self._cache["keys"]
+
         try:
             result = self._client.table("api_keys").select("*").eq("id", 1).single().execute()
             if result.data:
-                return {**_KEY_DEFAULTS, **result.data}
+                keys = {**_KEY_DEFAULTS, **result.data}
+                self._cache["keys"] = keys
+                self._cache_expiry["keys"] = now + self._KEY_TTL
+                return keys
         except Exception as e:
             logger.warning(f"SettingsService.get_api_keys failed: {e}")
-        return dict(_KEY_DEFAULTS)
+        return self._cache.get("keys", dict(_KEY_DEFAULTS))
 
     def update_api_keys(self, patch: dict) -> dict:
         if not self._client:
             return dict(_KEY_DEFAULTS)
         try:
             result = self._client.table("api_keys").update(patch).eq("id", 1).execute()
+            # Invalidate cache
+            if "keys" in self._cache:
+                del self._cache["keys"]
             if result.data:
                 return {**_KEY_DEFAULTS, **result.data[0]}
         except Exception as e:
diff --git a/dashboard/package.json b/dashboard/package.json
index c7fdc9a..465db32 100644
--- a/dashboard/package.json
+++ b/dashboard/package.json
@@ -15,7 +15,7 @@
     "@supabase/supabase-js": "^2.95.3",
     "@tailwindcss/vite": "^4.1.18",
     "livekit-client": "^2.17.1",
-    "pixi.js": "^6.5.10",
+    "pixi.js": "^8.17.1",
     "react": "^19.2.0",
     "react-dom": "^19.2.0",
     "react-router-dom": "^7.13.0",
@@ -39,4 +39,4 @@
   "overrides": {
     "vite": "npm:rolldown-vite@7.2.2"
   }
-}
+}
\ No newline at end of file
diff --git a/dashboard/src/components/AvatarRenderer.jsx b/dashboard/src/components/AvatarRenderer.jsx
index 25a94aa..4763760 100644
--- a/dashboard/src/components/AvatarRenderer.jsx
+++ b/dashboard/src/components/AvatarRenderer.jsx
@@ -24,29 +24,29 @@ Live2DModel.registerTicker(PIXI.Ticker)
 const MODEL_URL = '/models/hutao/Hu Tao.model3.json'
 
 const EXPRESSION_FILES = {
-  smile:         'SmileLock.exp3.json',
-  sad:           'SadLock.exp3.json',
-  angry:         'Angry.exp3.json',
-  ghost:         'Ghost.exp3.json',
+  smile: 'SmileLock.exp3.json',
+  sad: 'SadLock.exp3.json',
+  angry: 'Angry.exp3.json',
+  ghost: 'Ghost.exp3.json',
   ghost_nervous: 'GhostChange.exp3.json',
-  shadow:        'Shadow.exp3.json',
-  pupil_shrink:  'PupilShrink.exp3.json',
-  eyeshine_off:  'EyeshineOff.exp3.json',
+  shadow: 'Shadow.exp3.json',
+  pupil_shrink: 'PupilShrink.exp3.json',
+  eyeshine_off: 'EyeshineOff.exp3.json',
 }
 
 // Maps LLM-annotated expression names → the closest ambient mood.
 // Applied after the expression fades so the idle baseline stays emotionally coherent.
 const EXPRESSION_TO_MOOD = {
-  smile:         'happy',
-  sad:           'neutral',   // no sad mood — settle to calm neutral
-  angry:         'thinking',  // furrowed brows, withdrawn
-  ghost:         'playful',   // mischievous
+  smile: 'happy',
+  sad: 'neutral',   // no sad mood — settle to calm neutral
+  angry: 'thinking',  // furrowed brows, withdrawn
+  ghost: 'playful',   // mischievous
   ghost_nervous: 'curious',   // uncertain, alert
-  shadow:        'thinking',  // serious / dark
-  pupil_shrink:  'curious',   // surprised / wide-eyed
-  eyeshine_off:  'sleepy',    // dull / fatigued
-  wink:          'playful',
-  tongue:        'playful',
+  shadow: 'thinking',  // serious / dark
+  pupil_shrink: 'curious',   // surprised / wide-eyed
+  eyeshine_off: 'sleepy',    // dull / fatigued
+  wink: 'playful',
+  tongue: 'playful',
 }
 
 // ── State machine ──────────────────────────────────────────────────────────
@@ -54,12 +54,12 @@ const STATE = { IDLE: 'idle', SPEAKING: 'speaking' }
 
 // ── Mood definitions (target parameter values) ─────────────────────────────
 const MOODS = {
-  neutral:  { mouthForm: 0,     browForm: 0,     browRaise: 0,     eyeSmile: 0    },
-  happy:    { mouthForm: 0.65,  browForm: 0.30,  browRaise: 0.45,  eyeSmile: 0.55 },
-  curious:  { mouthForm: 0.20,  browForm: -0.10, browRaise: 0.50,  eyeSmile: 0    },
-  playful:  { mouthForm: 0.90,  browForm: 0.50,  browRaise: 0.70,  eyeSmile: 0.30 },
-  sleepy:   { mouthForm: -0.05, browForm: 0.10,  browRaise: -0.15, eyeSmile: 0    },
-  thinking: { mouthForm: 0.10,  browForm: -0.20, browRaise: 0.35,  eyeSmile: 0    },
+  neutral: { mouthForm: 0, browForm: 0, browRaise: 0, eyeSmile: 0 },
+  happy: { mouthForm: 0.65, browForm: 0.30, browRaise: 0.45, eyeSmile: 0.55 },
+  curious: { mouthForm: 0.20, browForm: -0.10, browRaise: 0.50, eyeSmile: 0 },
+  playful: { mouthForm: 0.90, browForm: 0.50, browRaise: 0.70, eyeSmile: 0.30 },
+  sleepy: { mouthForm: -0.05, browForm: 0.10, browRaise: -0.15, eyeSmile: 0 },
+  thinking: { mouthForm: 0.10, browForm: -0.20, browRaise: 0.35, eyeSmile: 0 },
 }
 
 // Weighted mood pool per state — [moodKey, weight], weights sum to 1.0
@@ -86,11 +86,12 @@ function pickWeightedMood(state) {
 }
 
 // ── Module-scoped Singleton State ──────────────────────────────────────────
-let _app   = null
+let _app = null
 let _model = null
 let _loaded = false
 let _mouthOpen = 0
 let _expressionActive = false
+let _mouthYLocked = false  // true while tongue expression holds MouthOpenY
 let _state = STATE.IDLE
 let _pendingMood = null   // set by setExpression, consumed by update loop on expiry
 
@@ -113,7 +114,7 @@ function initSingleton(width, height) {
 
       const logicalW = _app.screen.width
       const logicalH = _app.screen.height
-      const autoScale = (logicalH / model.height) * 1.9
+      const autoScale = (logicalH / model.height) * 1.4
       model.scale.set(autoScale)
       model.anchor.set(0.5, 0.0)
       model.position.set(logicalW * 0.5, 0)
@@ -149,12 +150,12 @@ function initSingleton(width, height) {
       const origCoreUpdate = core.update.bind(core)
 
       core.update = function () {
-        const now     = performance.now() / 1000
+        const now = performance.now() / 1000
         const elapsed = Math.min((performance.now() - lastMs) / 1000, 0.1)
         lastMs = performance.now()
 
         const speaking = _state === STATE.SPEAKING
-        const lerpSpd  = speaking ? 5.0 : 3.5
+        const lerpSpd = speaking ? 5.0 : 3.5
 
         // ── Breathing ────────────────────────────────────────────────────
         // Slightly faster when speaking (more energetic)
@@ -164,8 +165,8 @@ function initSingleton(width, height) {
         // ── Head movement ─────────────────────────────────────────────────
         const swayAmt = speaking ? 0.35 : 1.0
         const bX = (Math.sin(now * 0.31) * 12 + Math.sin(now * 0.73) * 3) * swayAmt
-        const bY = (Math.sin(now * 0.19) * 5  + Math.sin(now * 0.47) * 2) * swayAmt
-        const bZ = (Math.sin(now * 0.13) * 5  + Math.sin(now * 0.41) * 2) * swayAmt
+        const bY = (Math.sin(now * 0.19) * 5 + Math.sin(now * 0.47) * 2) * swayAmt
+        const bZ = (Math.sin(now * 0.13) * 5 + Math.sin(now * 0.41) * 2) * swayAmt
 
         // Gentle speaking nod — Y oscillation in rough speech rhythm
         let nodY = 0
@@ -201,7 +202,8 @@ function initSingleton(width, height) {
         core.setParameterValueById('ParamBodyAngleZ', Math.sin(now * 0.21) * 3 * swayAmt)
 
         // ── Lip sync ──────────────────────────────────────────────────────
-        core.setParameterValueById('ParamMouthOpenY', _mouthOpen)
+        // Skip when tongue expression is holding MouthOpenY at 1.0
+        if (!_mouthYLocked) core.setParameterValueById('ParamMouthOpenY', _mouthOpen)
 
         // ── Mood interpolation ────────────────────────────────────────────
         if (!_expressionActive) {
@@ -229,25 +231,25 @@ function initSingleton(width, height) {
             // Thinking: look up-left (classic thinking glance)
             if (currentMood === MOODS.thinking) {
               eyeTargetX = -(0.4 + Math.random() * 0.3)
-              eyeTargetY =   0.4 + Math.random() * 0.3
+              eyeTargetY = 0.4 + Math.random() * 0.3
               nextSaccade = saccadeTimer + 4
             }
           }
 
           const lm = elapsed * lerpSpd
           mouthFormC += (currentMood.mouthForm - mouthFormC) * lm
-          browFormC  += (currentMood.browForm   - browFormC)  * lm
-          browRaiseC += (currentMood.browRaise  - browRaiseC) * lm
-          eyeSmileC  += (currentMood.eyeSmile   - eyeSmileC)  * lm
+          browFormC += (currentMood.browForm - browFormC) * lm
+          browRaiseC += (currentMood.browRaise - browRaiseC) * lm
+          eyeSmileC += (currentMood.eyeSmile - eyeSmileC) * lm
 
           // Speaking: add a slight smile boost (engaged / expressive look)
           const mfBoost = speaking ? 0.20 : 0
-          core.setParameterValueById('ParamMouthForm',  clamp(mouthFormC + mfBoost, -1, 1))
-          core.setParameterValueById('ParamBrowLForm',  browFormC)
-          core.setParameterValueById('ParamBrowRForm',  browFormC)
-          core.setParameterValueById('Param37',         browRaiseC)
-          core.setParameterValueById('ParamEyeLSmile',  eyeSmileC)
-          core.setParameterValueById('ParamEyeRSmile',  eyeSmileC)
+          core.setParameterValueById('ParamMouthForm', clamp(mouthFormC + mfBoost, -1, 1))
+          core.setParameterValueById('ParamBrowLForm', browFormC)
+          core.setParameterValueById('ParamBrowRForm', browFormC)
+          core.setParameterValueById('Param37', browRaiseC)
+          core.setParameterValueById('ParamEyeLSmile', eyeSmileC)
+          core.setParameterValueById('ParamEyeRSmile', eyeSmileC)
         }
 
         // ── Saccade ───────────────────────────────────────────────────────
@@ -261,9 +263,9 @@ function initSingleton(width, height) {
           } else {
             eyeTargetX = (Math.random() * 2 - 1) * 0.65
             const r = Math.random()
-            if      (r < 0.20) eyeTargetY =  0.5 + Math.random() * 0.35
+            if (r < 0.20) eyeTargetY = 0.5 + Math.random() * 0.35
             else if (r < 0.35) eyeTargetY = -0.3 - Math.random() * 0.25
-            else               eyeTargetY = (Math.random() * 2 - 1) * 0.4
+            else eyeTargetY = (Math.random() * 2 - 1) * 0.4
             nextSaccade = saccadeTimer + 1.5 + Math.random() * 2.5
           }
         }
@@ -289,7 +291,8 @@ function initSingleton(width, height) {
         const bspd = speaking ? 11 : (isSleepy ? 6 : 9)
         blinkTimer += elapsed
 
-        if (blinkPhase === 0 && blinkTimer >= nextBlink) {
+        // Don't start a new blink while an expression is holding eye parameters (e.g. wink)
+        if (blinkPhase === 0 && blinkTimer >= nextBlink && !_expressionActive) {
           blinkPhase = 1; blinkTimer = 0
         }
         if (blinkPhase === 1) {
@@ -361,18 +364,22 @@ export const AvatarRenderer = forwardRef(function AvatarRenderer(props, ref) {
         if (file) _model.expression(file)
         if (name === 'wink') {
           const c = _model.internalModel.coreModel
-          c.setParameterValueById('ParamEyeLOpen',  0.0)
+          c.setParameterValueById('ParamEyeLOpen', 0.0)
           c.setParameterValueById('ParamBrowLForm', -1.0)
-          c.setParameterValueById('ParamMouthForm',  1.0)
+          c.setParameterValueById('ParamMouthForm', 1.0)
         }
         if (name === 'tongue') {
+          _mouthYLocked = true   // prevent lip-sync loop from overriding MouthOpenY
           const c = _model.internalModel.coreModel
+          // Hu Tao specific: Param70 is TongueOut
+          c.setParameterValueById('Param70', 1.0)
           c.setParameterValueById('ParamMouthOpenY', 1.0)
-          c.setParameterValueById('ParamMouthForm',  -1.0)
+          c.setParameterValueById('ParamMouthForm', -1.0)
         }
       }
       setTimeout(() => {
         _expressionActive = false
+        _mouthYLocked = false
         if (_model) _model.expression()
       }, duration * 1000)
     },
diff --git a/dashboard/src/components/CallOverlay.jsx b/dashboard/src/components/CallOverlay.jsx
index 0ab7b3e..8cc499d 100644
--- a/dashboard/src/components/CallOverlay.jsx
+++ b/dashboard/src/components/CallOverlay.jsx
@@ -1,28 +1,18 @@
 import { useState, useEffect, useRef, useCallback } from 'react'
 import { AvatarRenderer } from './AvatarRenderer'
 
-function getOrCreateIdentity(){
-    const KEY = 'aura_user_identity'
-    let id = localStorage.getItem(KEY)
+import { getOrCreateIdentity } from '../lib/user'
 
-    if (!id){
-        id = `user-${crypto.randomUUID().slice(0,8)}`
-        localStorage.setItem(KEY, id)
-    }
-
-    return id
-}
-
-export default function CallOverlay({ onClose }) {
+export default function CallOverlay({ onClose, conversationId }) {
     const [status, setStatus] = useState('connecting')
     const [elapsed, setElapsed] = useState(0)
-    const roomRef    = useRef(null)
-    const timerRef   = useRef(null)
-    const avatarRef  = useRef(null)
-    const audioCtxRef      = useRef(null)
-    const analyserRef      = useRef(null)
-    const lipRafRef        = useRef(null)
-    const speakTimeoutRef  = useRef(null)
+    const roomRef = useRef(null)
+    const timerRef = useRef(null)
+    const avatarRef = useRef(null)
+    const audioCtxRef = useRef(null)
+    const analyserRef = useRef(null)
+    const lipRafRef = useRef(null)
+    const speakTimeoutRef = useRef(null)
 
     // ─── Connect to LiveKit ──────────────────────
     useEffect(() => {
@@ -30,23 +20,23 @@ export default function CallOverlay({ onClose }) {
 
         const ctx = new AudioContext()
         audioCtxRef.current = ctx
-        ctx.resume().catch(() => {})
+        ctx.resume().catch(() => { })
 
         const connect = async () => {
             try {
-                // Dynamically import to avoid bundling when not needed
                 const { Room, RoomEvent, Track } = await import('livekit-client')
-                
                 const identity = getOrCreateIdentity()
 
                 // Fetch token from token server
-                const res = await fetch(`http://${window.location.hostname}:8082/getToken?room=aura-room&identity=${encodeURIComponent(identity)}`)
+                let url = `http://${window.location.hostname}:8082/getToken?room=aura-room&identity=${encodeURIComponent(identity)}`
+                if (conversationId) url += `&conversation_id=${encodeURIComponent(conversationId)}`
+
+                const res = await fetch(url)
                 if (!res.ok) throw new Error(`Token server error: ${res.status}`)
-                const { token, url } = await res.json()
+                const { token, url: lkUrl } = await res.json()
 
                 if (cancelled) return
 
-                // Connect to room
                 const room = new Room()
                 roomRef.current = room
 
@@ -59,14 +49,13 @@ export default function CallOverlay({ onClose }) {
                         const analyser = ctx.createAnalyser()
                         analyser.fftSize = 2048
                         analyser.smoothingTimeConstant = 0.8
-                        const src = ctx.createMediaStreamSource(
-                            new MediaStream([track.mediaStreamTrack])
-                        )
+                        const src = ctx.createMediaStreamSource(new MediaStream([track.mediaStreamTrack]))
                         src.connect(analyser)
                         analyserRef.current = analyser
 
                         const buf = new Float32Array(analyser.fftSize)
                         const tick = () => {
+                            if (cancelled) return
                             lipRafRef.current = requestAnimationFrame(tick)
                             analyser.getFloatTimeDomainData(buf)
                             let sum = 0
@@ -75,13 +64,8 @@ export default function CallOverlay({ onClose }) {
                             const active = rms > 0.008
                             avatarRef.current?.setMouthOpen(active ? Math.min(0.55, rms * 10) : 0)
 
-                            // Transition to speaking state immediately on audio;
-                            // debounce the return to idle so brief pauses don't flicker.
                             if (active) {
-                                if (speakTimeoutRef.current) {
-                                    clearTimeout(speakTimeoutRef.current)
-                                    speakTimeoutRef.current = null
-                                }
+                                if (speakTimeoutRef.current) { clearTimeout(speakTimeoutRef.current); speakTimeoutRef.current = null }
                                 avatarRef.current?.setSpeaking(true)
                             } else if (!speakTimeoutRef.current) {
                                 speakTimeoutRef.current = setTimeout(() => {
@@ -98,19 +82,16 @@ export default function CallOverlay({ onClose }) {
                     track.detach().forEach((el) => el.remove())
                 })
 
-                // ── Expression events from Python avatar_bridge.py ──────────
                 room.on(RoomEvent.DataReceived, (payload) => {
                     try {
                         const msg = JSON.parse(new TextDecoder().decode(payload))
                         if (msg.type === 'expression') {
                             avatarRef.current?.setExpression(msg.expressions, msg.duration)
                         }
-                    } catch {
-                        // malformed payload — silently ignore
-                    }
+                    } catch { }
                 })
 
-                await room.connect(url, token)
+                await room.connect(lkUrl, token)
                 await room.localParticipant.setMicrophoneEnabled(true)
 
                 if (!cancelled) {
@@ -128,7 +109,7 @@ export default function CallOverlay({ onClose }) {
             cancelled = true
             cleanup()
         }
-    }, [])
+    }, [conversationId])
 
     const cleanup = useCallback(() => {
         if (timerRef.current) clearInterval(timerRef.current)
@@ -153,48 +134,53 @@ export default function CallOverlay({ onClose }) {
     const vh = window.innerHeight
 
     return (
-        // Full-screen container — avatar fills the whole background,
-        // controls float as an overlay on the right side (same as AIRI).
-        <div className="fixed inset-0 z-50 bg-gradient-to-br from-slate-900 via-slate-800 to-slate-900">
-
-            {/* ── Live2D Avatar — full-screen canvas ── */}
-            <AvatarRenderer ref={avatarRef} width={vw} height={vh} />
-
-            {/* ── Controls — overlaid panel, right side ── */}
-            <div className="absolute right-10 top-1/2 -translate-y-1/2 flex flex-col items-center gap-6
-                            bg-slate-900/60 backdrop-blur-sm rounded-2xl px-8 py-6 shadow-xl">
-                <h2 className="text-white text-3xl font-bold">AURA</h2>
-
-                <p className="text-primary/80 text-sm font-medium">
-                    {status === 'connecting' && 'Connecting...'}
-                    {status === 'connected'  && formatTime(elapsed)}
-                    {status === 'error'      && 'Connection failed'}
-                </p>
-
-                {/* Waveform */}
-                {status === 'connected' && (
-                    <div className="flex gap-1">
-                        {[0, 1, 2, 3, 4].map((i) => (
-                            <div
-                                key={i}
-                                className="w-1 bg-primary rounded-full"
-                                style={{
-                                    height: `${12 + Math.random() * 20}px`,
-                                    animation: `pulse ${0.4 + i * 0.1}s ease-in-out infinite alternate`,
-                                }}
-                            />
-                        ))}
+        <div className="fixed inset-0 z-50 bg-white/95 backdrop-blur-xl animate-in fade-in duration-500">
+            {/* Background Branding */}
+            <div className="absolute inset-0 flex items-center justify-center pointer-events-none opacity-[0.03]">
+                <h1 className="text-[20vw] font-black tracking-tighter">PROJECT AURA</h1>
+            </div>
+
+            {/* ── Live2D Avatar — centered ── */}
+            <div className="absolute inset-0 flex items-center justify-center">
+                <AvatarRenderer ref={avatarRef} width={window.innerWidth} height={window.innerHeight} />
+            </div>
+
+            {/* ── Controls — bottom center ── */}
+            <div className="absolute bottom-12 left-1/2 -translate-x-1/2 flex flex-col items-center gap-8 w-full max-w-md px-6">
+                <div className="text-center">
+                    <h2 className="text-slate-900 text-3xl font-black tracking-tight mb-1 uppercase">Project AURA</h2>
+                    <p className="text-primary font-black tracking-[0.3em] text-[10px] uppercase">
+                        {status === 'connecting' && 'Establishing Connection...'}
+                        {status === 'connected' && `Live Interaction — ${formatTime(elapsed)}`}
+                        {status === 'error' && 'Neural Link Failed'}
+                    </p>
+                </div>
+
+                <div className="flex items-center gap-6">
+                    {/* Visualizer */}
+                    {status === 'connected' && (
+                        <div className="flex items-end gap-1.5 h-12">
+                            {[...Array(12)].map((_, i) => (
+                                <div key={i} className="w-1.5 bg-primary/20 rounded-full animate-bounce"
+                                    style={{ height: `${20 + Math.random() * 80}%`, animationDuration: `${0.6 + Math.random()}s` }} />
+                            ))}
+                        </div>
+                    )}
+
+                    {/* Hangup */}
+                    <button
+                        type="button"
+                        onClick={handleHangup}
+                        className="w-20 h-20 rounded-full bg-slate-900 hover:bg-red-600 flex items-center justify-center text-white shadow-2xl transition-all duration-300 hover:scale-110 active:scale-95 group cursor-pointer"
+                    >
+                        <span className="material-icons-round text-4xl group-hover:rotate-90 transition-transform">close</span>
+                    </button>
+
+                    {/* Placeholder for future mic toggle/settings */}
+                    <div className="w-12 h-12 rounded-full border border-slate-200 flex items-center justify-center text-slate-400 opacity-50">
+                        <span className="material-icons-round">mic</span>
                     </div>
-                )}
-
-                {/* Hangup */}
-                <button
-                    type="button"
-                    onClick={handleHangup}
-                    className="w-16 h-16 rounded-full bg-red-500 hover:bg-red-600 flex items-center justify-center text-white shadow-lg shadow-red-500/30 transition-all cursor-pointer"
-                >
-                    <span className="material-icons-round text-3xl">call_end</span>
-                </button>
+                </div>
             </div>
         </div>
     )
diff --git a/dashboard/src/components/ChatFeed.jsx b/dashboard/src/components/ChatFeed.jsx
index 744a263..6d11ad4 100644
--- a/dashboard/src/components/ChatFeed.jsx
+++ b/dashboard/src/components/ChatFeed.jsx
@@ -1,49 +1,64 @@
 export default function ChatFeed({ messages = [] }) {
     if (messages.length === 0) {
         return (
-            <div className="flex flex-col items-center justify-center h-full text-center">
-                <div className="w-20 h-20 rounded-full aura-gradient flex items-center justify-center text-white mb-6 shadow-lg shadow-primary/20">
-                    <span className="material-icons-round text-4xl">wb_sunny</span>
+            <div className="flex flex-col items-center justify-center min-h-[60vh] text-center px-4 animate-in fade-in zoom-in duration-700">
+                <div className="w-24 h-24 rounded-full aura-gradient flex items-center justify-center text-white mb-8 shadow-2xl shadow-primary/40 relative">
+                    <span className="material-icons-round text-5xl">auto_awesome</span>
+                    <div className="absolute inset-0 rounded-full aura-gradient animate-ping opacity-20" />
                 </div>
-                <h2 className="text-2xl font-bold mb-2">Hello! I'm AURA</h2>
-                <p className="text-slate-400 max-w-sm">
-                    Your personal AI companion. Ask me anything, or start a voice call!
+                <h2 className="text-3xl font-black mb-4 tracking-tight text-slate-900">Project AURA</h2>
+                <p className="text-slate-500 max-w-sm font-medium leading-relaxed">
+                    Advanced Universal Responsive Avatar. <br />
+                    Ready for your next inquiry.
                 </p>
             </div>
         )
     }
 
     return (
-        <div className="space-y-6 max-w-3xl mx-auto">
+        <div className="space-y-8 pb-20">
             {messages.map((msg) => (
-                <div key={msg.id} className={`flex gap-3 ${msg.role === 'user' ? 'flex-row-reverse' : ''}`}>
-                    {/* Avatar */}
-                    <div className={`w-8 h-8 rounded-full flex-shrink-0 flex items-center justify-center text-white text-xs font-bold ${msg.role === 'user'
-                        ? 'bg-slate-700'
-                        : 'aura-gradient shadow-sm shadow-primary/20'
+                <div key={msg.id} className={`flex gap-4 group ${msg.role === 'user' ? 'flex-row-reverse' : ''} animate-in slide-in-from-bottom-2 duration-300`}>
+                    {/* Avatar Icon */}
+                    <div className={`w-9 h-9 rounded-xl flex-shrink-0 flex items-center justify-center text-[11px] font-black transition-transform group-hover:scale-110 ${msg.role === 'user'
+                        ? 'bg-slate-100 text-slate-500 border border-slate-200'
+                        : 'aura-gradient text-white shadow-lg shadow-primary/20'
                         }`}>
-                        {msg.role === 'user' ? 'U' : '☀'}
+                        {msg.role === 'user' ? 'ME' : 'AURA'}
                     </div>
 
-                    {/* Bubble Container */}
-                    <div className="flex flex-col gap-2 max-w-[70%]">
-                        {/* Tool Usage Indicator */}
+                    {/* Content Column */}
+                    <div className={`flex flex-col gap-3 max-w-[85%] lg:max-w-[70%] ${msg.role === 'user' ? 'items-end' : ''}`}>
+
+                        {/* Tool Execution Details */}
                         {msg.tools_used && msg.tools_used.map((tool, idx) => (
-                            <div key={`tool-${idx}`} className="px-3 py-2 bg-indigo-50/80 backdrop-blur-sm border border-indigo-100 rounded-xl text-xs text-indigo-700 flex items-center gap-2 shadow-sm">
-                                <span className="material-icons-round text-[16px] text-indigo-500">travel_explore</span>
-                                <div>
-                                    <span className="font-medium mr-1 font-mono">{tool.name}</span>
-                                    <span className="opacity-80 truncate max-w-[200px] inline-block align-bottom">{JSON.stringify(tool.args.query || tool.args)}</span>
+                            <div key={`tool-${idx}`} className="px-4 py-2.5 bg-slate-50 border border-slate-100 rounded-2xl text-[11px] text-primary flex items-center gap-3 shadow-sm animate-in fade-in slide-in-from-left-2">
+                                <span className="material-icons-round text-sm animate-spin-slow">api</span>
+                                <div className="font-mono">
+                                    <span className="font-black tracking-widest">{tool.name}</span>
+                                    <span className="mx-2 text-slate-300">—</span>
+                                    <span className="text-slate-500 truncate max-w-[200px] inline-block align-bottom">
+                                        {typeof tool.args === 'string' ? tool.args : (tool.args.query || JSON.stringify(tool.args))}
+                                    </span>
                                 </div>
                             </div>
                         ))}
 
-                        <div className={`px-4 py-3 rounded-2xl text-sm leading-relaxed ${msg.role === 'user'
-                            ? 'bg-slate-800 text-white rounded-tr-md'
-                            : 'bg-white border border-slate-100 text-slate-700 rounded-tl-md shadow-sm'
+                        {/* Speech Bubble */}
+                        <div className={`px-5 py-4 rounded-3xl text-[15px] leading-relaxed tracking-tight shadow-sm ${msg.role === 'user'
+                            ? 'bg-primary text-white rounded-tr-sm'
+                            : 'bg-white text-slate-800 rounded-tl-sm border border-slate-100'
                             }`}>
                             {msg.content}
                         </div>
+
+                        {/* Emotion Tag */}
+                        {msg.role === 'aura' && msg.emotion && (
+                            <div className="flex items-center gap-1.5 px-3 py-1 bg-slate-100 rounded-full w-fit">
+                                <span className="w-1.5 h-1.5 rounded-full bg-primary" />
+                                <span className="text-[10px] font-black uppercase tracking-widest text-slate-500">{msg.emotion}</span>
+                            </div>
+                        )}
                     </div>
                 </div>
             ))}
diff --git a/dashboard/src/components/ChatHeader.jsx b/dashboard/src/components/ChatHeader.jsx
index ceaa671..32333ad 100644
--- a/dashboard/src/components/ChatHeader.jsx
+++ b/dashboard/src/components/ChatHeader.jsx
@@ -1,36 +1,42 @@
-import { useNavigate } from 'react-router-dom'
-
-export default function ChatHeader({ onCallStart }) {
-    const navigate = useNavigate()
-
+export default function ChatHeader({ onCallStart, isCallActive, onTuningOpen }) {
     return (
-        <header className="flex items-center justify-between px-8 py-5 border-b border-slate-100 bg-white/60 backdrop-blur">
+        <header className="flex items-center justify-between px-8 py-5 border-b border-slate-100 bg-white/80 backdrop-blur-md">
             <div className="flex items-center gap-4">
-                <div className="w-10 h-10 rounded-full aura-gradient flex items-center justify-center text-white shadow-md shadow-primary/20">
-                    <span className="material-icons-round text-lg">wb_sunny</span>
+                <div className="w-10 h-10 rounded-full aura-gradient flex items-center justify-center text-white shadow-lg shadow-primary/20">
+                    <span className="material-icons-round text-lg">auto_awesome</span>
                 </div>
                 <div>
-                    <h2 className="font-bold text-lg tracking-tight">AURA</h2>
-                    <p className="text-xs text-slate-400">Active • High Precision Mode</p>
+                    <h2 className="font-bold text-lg tracking-tight text-slate-800">Project AURA</h2>
+                    <p className="text-[10px] uppercase tracking-widest text-primary font-bold">
+                        {isCallActive ? 'Interactive Mode' : 'Ready to Assist'}
+                    </p>
                 </div>
             </div>
 
-            <div className="flex items-center gap-3">
+            <div className="flex items-center gap-4">
+                {/* Voice Interaction Toggle */}
                 <button
                     type="button"
                     onClick={onCallStart}
-                    className="flex items-center gap-2 bg-primary hover:bg-primary/90 text-white px-5 py-2 rounded-full font-semibold shadow-md shadow-primary/20 transition-all cursor-pointer"
+                    className={`flex items-center gap-2 px-6 py-2.5 rounded-full font-bold transition-all duration-300 cursor-pointer
+                                ${isCallActive
+                            ? 'bg-primary text-white shadow-lg shadow-primary/40'
+                            : 'bg-slate-50 hover:bg-slate-100 text-slate-600 border border-slate-200'}`}
                 >
-                    <span className="material-icons-round text-lg">call</span>
-                    Call Mode
+                    <span className={`material-icons-round text-lg ${isCallActive ? 'animate-pulse' : ''}`}>
+                        {isCallActive ? 'record_voice_over' : 'forum'}
+                    </span>
+                    {isCallActive ? 'Interactive Session Active' : 'Interact with Project AURA'}
                 </button>
+
+                {/* Personality Tuning Toggle */}
                 <button
                     type="button"
-                    onClick={() => navigate('/admin')}
-                    className="p-2 hover:bg-slate-100 rounded-lg transition-colors cursor-pointer"
-                    title="Admin Dashboard"
+                    onClick={onTuningOpen}
+                    className="w-11 h-11 flex items-center justify-center bg-slate-50 hover:bg-slate-100 text-slate-400 hover:text-primary rounded-xl border border-slate-200 transition-all cursor-pointer group"
+                    title="System Dashboard"
                 >
-                    <span className="material-icons-round text-slate-400">dashboard</span>
+                    <span className="material-icons-round text-xl group-hover:rotate-45 transition-transform">dashboard</span>
                 </button>
             </div>
         </header>
diff --git a/dashboard/src/components/ChatInput.jsx b/dashboard/src/components/ChatInput.jsx
index 4ae0a63..8e74c18 100644
--- a/dashboard/src/components/ChatInput.jsx
+++ b/dashboard/src/components/ChatInput.jsx
@@ -28,30 +28,28 @@ export default function ChatInput({ onSend, disabled }) {
     }
 
     return (
-        <div className="px-8 pb-6 pt-2">
-            <div className="max-w-3xl mx-auto flex items-end gap-3 p-3 bg-white border border-slate-200 rounded-2xl shadow-lg shadow-black/[0.03]">
+        <div className="flex items-end gap-3 p-4 bg-white rounded-3xl border border-slate-200 shadow-xl relative group transition-all focus-within:border-primary/50 focus-within:ring-4 focus-within:ring-primary/5">
+            <textarea
+                ref={textareaRef}
+                value={text}
+                onChange={(e) => setText(e.target.value)}
+                onKeyDown={handleKeyDown}
+                placeholder="Message AURA..."
+                disabled={disabled}
+                rows={1}
+                className="flex-1 bg-transparent text-[15px] resize-none outline-none py-2 px-3 placeholder-slate-400 text-slate-800 disabled:opacity-50"
+            />
 
-                <textarea
-                    ref={textareaRef}
-                    value={text}
-                    onChange={(e) => setText(e.target.value)}
-                    onKeyDown={handleKeyDown}
-                    placeholder="Type a message..."
-                    disabled={disabled}
-                    rows={1}
-                    className="flex-1 bg-transparent text-sm resize-none outline-none py-2 px-2 placeholder-slate-400 disabled:opacity-50"
-                />
-                <button
-                    type="button"
-                    onClick={handleSubmit}
-                    disabled={!text.trim() || disabled}
-                    className="flex items-center justify-center w-10 h-10 rounded-xl aura-gradient text-white transition-all disabled:opacity-30 cursor-pointer disabled:cursor-not-allowed"
-                >
-                    <span className="material-icons-round text-lg">
-                        {disabled ? 'hourglass_top' : 'arrow_upward'}
-                    </span>
-                </button>
-            </div>
+            <button
+                type="button"
+                onClick={handleSubmit}
+                disabled={!text.trim() || disabled}
+                className="flex items-center justify-center w-11 h-11 rounded-2xl aura-gradient text-white transition-all disabled:opacity-20 shadow-lg shadow-primary/20 cursor-pointer disabled:cursor-not-allowed hover:scale-105 active:scale-95"
+            >
+                <span className="material-icons-round">
+                    {disabled ? 'hourglass_top' : 'send'}
+                </span>
+            </button>
         </div>
     )
 }
diff --git a/dashboard/src/components/KnowledgeBase.jsx b/dashboard/src/components/KnowledgeBase.jsx
index 192780e..3d45ec6 100644
--- a/dashboard/src/components/KnowledgeBase.jsx
+++ b/dashboard/src/components/KnowledgeBase.jsx
@@ -85,51 +85,74 @@ export default function KnowledgeBase() {
     }
 
     return (
-        <div className="bg-white p-8 rounded-xl border border-slate-200 shadow-sm flex flex-col">
-            <div className="flex justify-between items-center mb-8">
-                <h3 className="text-xl font-bold flex items-center gap-2">
-                    <span className="material-icons-round text-primary">auto_stories</span>
-                    Knowledge Base
-                </h3>
-                <span className="text-sm font-bold text-slate-400">{files.length} files</span>
+        <div className="flex flex-col h-full">
+            <div className="flex justify-between items-end mb-8 px-2">
+                <div className="space-y-1">
+                    <h3 className="text-xl font-black text-white tracking-widest uppercase flex items-center gap-3">
+                        <span className="w-2 h-2 rounded-full bg-primary shadow-[0_0_8px_#ff7e33]" />
+                        Cognitive Core
+                    </h3>
+                    <p className="text-[10px] text-white/20 font-black uppercase tracking-[0.2em]">Contextual Data Vectors</p>
+                </div>
+                <div className="px-3 py-1 bg-white/5 rounded-full border border-white/5 shadow-inner">
+                    <span className="text-[10px] font-black text-primary/80 uppercase tracking-tighter">{files.length} ASSETS MAPPED</span>
+                </div>
             </div>
 
             {/* File list */}
-            <div className="flex-1 space-y-4 mb-8 overflow-y-auto max-h-64 custom-scrollbar pr-2">
+            <div className="flex-1 space-y-3 mb-10 overflow-y-auto max-h-72 custom-scrollbar-dark pr-3">
                 {files.map((f) => (
-                    <div key={f.id} className="flex items-center justify-between p-3 bg-bg-light rounded-lg group border border-transparent hover:border-primary/30 transition-all">
+                    <div key={f.id} className="flex items-center justify-between p-4 bg-white/[0.03] rounded-2xl group border border-white/5 hover:border-primary/20 hover:bg-white/5 transition-all shadow-lg">
                         <div className="flex items-center gap-4">
-                            <div className="w-10 h-10 bg-white rounded flex items-center justify-center text-primary border border-slate-200">
-                                <span className="material-icons-round">{mimeIcon(f.mime_type)}</span>
+                            <div className="w-12 h-12 bg-black/40 rounded-xl flex items-center justify-center text-primary border border-white/5 shadow-inner group-hover:scale-110 transition-transform">
+                                <span className="material-icons-round text-xl">{mimeIcon(f.mime_type)}</span>
                             </div>
                             <div>
-                                <h4 className="text-sm font-bold">{f.filename}</h4>
-                                <p className="text-[10px] text-slate-400 uppercase">
-                                    {new Date(f.created_at).toLocaleDateString()} • {formatSize(f.size_bytes)}
-                                </p>
+                                <h4 className="text-[13px] font-bold text-white/90 group-hover:text-primary transition-colors">{f.filename}</h4>
+                                <div className="flex items-center gap-2 mt-1">
+                                    <span className="text-[9px] text-white/20 font-black uppercase tracking-widest">{formatSize(f.size_bytes)}</span>
+                                    <span className="w-1 h-1 rounded-full bg-white/10" />
+                                    <span className="text-[9px] text-white/20 font-black uppercase tracking-widest">{new Date(f.created_at).toLocaleDateString()}</span>
+                                </div>
                             </div>
                         </div>
                         <button
                             type="button"
                             onClick={() => handleDelete(f.id)}
-                            className="text-slate-400 hover:text-red-500 transition-colors cursor-pointer"
+                            className="w-8 h-8 flex items-center justify-center rounded-lg text-white/20 hover:text-red-400 hover:bg-red-500/10 transition-all cursor-pointer"
                         >
-                            <span className="material-icons-round text-lg">delete_outline</span>
+                            <span className="material-icons-round text-lg">delete_sweep</span>
                         </button>
                     </div>
                 ))}
+
+                {files.length === 0 && (
+                    <div className="py-12 flex flex-col items-center justify-center opacity-10 grayscale">
+                        <span className="material-icons-round text-6xl mb-4">folder_off</span>
+                        <p className="text-xs font-black uppercase tracking-[0.3em]">No Data Mapped</p>
+                    </div>
+                )}
             </div>
 
             {/* Upload zone */}
-            <label className="border-2 border-dashed border-slate-200 rounded-xl p-8 flex flex-col items-center justify-center text-center group hover:border-primary transition-colors cursor-pointer bg-bg-light/50">
+            <label className="relative overflow-hidden border-2 border-dashed border-white/5 rounded-3xl p-10 flex flex-col items-center justify-center text-center group hover:border-primary/30 hover:bg-primary/[0.02] transition-all cursor-pointer bg-black/20 shadow-inner">
+                <div className="absolute inset-0 aura-gradient opacity-0 group-hover:opacity-[0.03] transition-opacity pointer-events-none" />
                 <input type="file" onChange={handleUpload} className="hidden" accept=".pdf,.txt,.json,.csv,.zip,.pptx" />
-                <div className="w-12 h-12 rounded-full bg-primary/10 flex items-center justify-center mb-3 group-hover:scale-110 transition-transform">
-                    <span className="material-icons-round text-primary">
-                        {uploading ? 'hourglass_top' : 'cloud_upload'}
+
+                <div className="w-16 h-16 rounded-2xl bg-white/5 flex items-center justify-center mb-5 group-hover:scale-110 group-hover:bg-primary/10 transition-all shadow-xl border border-white/5 group-hover:border-primary/20">
+                    <span className={`material-icons-round text-3xl transition-all ${uploading ? 'text-primary animate-spin' : 'text-white/40 group-hover:text-primary'}`}>
+                        {uploading ? 'hourglass_empty' : 'auto_mode'}
                     </span>
                 </div>
-                <p className="font-bold text-sm">{uploading ? 'Uploading...' : 'Upload New Knowledge'}</p>
-                <p className="text-xs text-slate-400 mt-1">PDF, TXT, PPTX, JSON, or CSV up to 50MB</p>
+
+                <div className="space-y-1">
+                    <p className="font-black text-xs text-white/80 group-hover:text-white uppercase tracking-[0.1em] transition-colors">
+                        {uploading ? 'INGESTING DATA...' : 'INITIATE NEURAL INGESTION'}
+                    </p>
+                    <p className="text-[10px] text-white/20 font-medium tracking-tight">
+                        Drop PDF, TXT, JSON, or CSV (UP TO 50MB)
+                    </p>
+                </div>
             </label>
         </div>
     )
diff --git a/dashboard/src/components/PersonalityTuner.jsx b/dashboard/src/components/PersonalityTuner.jsx
index 1f51eee..d6af5ae 100644
--- a/dashboard/src/components/PersonalityTuner.jsx
+++ b/dashboard/src/components/PersonalityTuner.jsx
@@ -1,82 +1,85 @@
+import { useState, useEffect } from 'react'
+
 const SLIDERS = [
-    { key: 'empathy',   label: 'Empathy' },
-    { key: 'humor',     label: 'Humor' },
+    { key: 'empathy', label: 'Empathy' },
+    { key: 'humor', label: 'Humor' },
     { key: 'formality', label: 'Formality' },
 ]
 
 const PROVIDERS = [
-    { value: 'openrouter', label: 'OpenRouter',    hint: 'Routes to any model (DeepSeek, GPT, Mistral…)' },
-    { value: 'openai',     label: 'OpenAI',         hint: 'Direct GPT-4o / o1 access' },
-    { value: 'anthropic',  label: 'Anthropic',      hint: 'Claude 3.5 / Claude 4' },
-    { value: 'groq',       label: 'Groq',           hint: 'Ultra-fast Llama / Mixtral inference' },
-    { value: 'ollama',     label: 'Ollama (local)',  hint: 'Local models via Ollama' },
+    { value: 'openrouter', label: 'OpenRouter', hint: 'Routes to any model (DeepSeek, GPT, Mistral…)' },
+    { value: 'openai', label: 'OpenAI', hint: 'Direct GPT-4o / o1 access' },
+    { value: 'anthropic', label: 'Anthropic', hint: 'Claude 3.5 / Claude 4' },
+    { value: 'groq', label: 'Groq', hint: 'Ultra-fast Llama / Mixtral inference' },
+    { value: 'ollama', label: 'Ollama (local)', hint: 'Local models via Ollama' },
 ]
 
 const MODEL_SUGGESTIONS = {
     openrouter: ['deepseek/deepseek-v3.2', 'openai/gpt-4o', 'anthropic/claude-sonnet-4-5', 'mistralai/mistral-nemo'],
-    openai:     ['gpt-4o', 'gpt-4o-mini', 'o1-mini'],
-    anthropic:  ['claude-opus-4-5', 'claude-sonnet-4-5', 'claude-haiku-4-5-20251001'],
-    groq:       ['llama-3.3-70b-versatile', 'llama-3.1-8b-instant', 'mixtral-8x7b-32768'],
-    ollama:     ['llama3.2', 'mistral', 'gemma2', 'qwen2.5'],
+    openai: ['gpt-4o', 'gpt-4o-mini', 'o1-mini'],
+    anthropic: ['claude-opus-4-5', 'claude-sonnet-4-5', 'claude-haiku-4-5-20251001'],
+    groq: ['llama-3.3-70b-versatile', 'llama-3.1-8b-instant', 'mixtral-8x7b-32768'],
+    ollama: ['llama3.2', 'mistral', 'gemma2', 'qwen2.5'],
 }
 
 export default function PersonalityTuner({ settings, onChange }) {
     if (!settings) return <TunerSkeleton />
 
-    const provider    = settings.provider || 'openrouter'
+    const provider = settings.provider || 'openrouter'
     const suggestions = MODEL_SUGGESTIONS[provider] || []
     const providerInfo = PROVIDERS.find(p => p.value === provider)
 
     return (
-        <div className="bg-white p-8 rounded-xl border border-slate-200 shadow-sm">
-            <h3 className="text-xl font-bold mb-8 flex items-center gap-2">
-                <span className="material-icons-round text-primary">tune</span>
-                Personality Tuner
-            </h3>
-
+        <div className="flex flex-col gap-8">
             {/* Provider picker */}
-            <div className="mb-6">
-                <label className="block text-sm font-bold text-slate-500 uppercase tracking-widest mb-2">
-                    LLM Provider
+            <div className="space-y-4">
+                <label className="block text-[10px] font-black text-slate-400 uppercase tracking-[0.2em]">
+                    LLM PROVIDER
                 </label>
                 <div className="grid grid-cols-2 gap-2 sm:grid-cols-3">
                     {PROVIDERS.map(p => (
                         <button key={p.value} onClick={() => onChange({ provider: p.value })} title={p.hint}
-                            className={`px-3 py-2 rounded-lg text-sm font-medium border transition-all text-left ${
-                                provider === p.value
-                                    ? 'bg-primary text-white border-primary shadow-sm'
-                                    : 'bg-bg-light text-slate-600 border-slate-200 hover:border-primary/40'
-                            }`}>
-                            {p.label}
+                            className={`px-4 py-3 rounded-2xl text-xs font-bold border transition-all text-left group cursor-pointer ${provider === p.value
+                                ? 'bg-primary text-white border-primary shadow-lg shadow-primary/20'
+                                : 'bg-white text-slate-500 border-slate-200 hover:border-primary/30 hover:text-primary'
+                                }`}>
+                            <div className="flex flex-col gap-0.5">
+                                <span>{p.label}</span>
+                                <span className={`text-[9px] font-medium opacity-50 ${provider === p.value ? 'text-white' : ''} truncate`}>
+                                    {p.hint.split('(')[0]}
+                                </span>
+                            </div>
                         </button>
                     ))}
                 </div>
-                {providerInfo && (
-                    <p className="text-xs text-slate-400 mt-1.5">{providerInfo.hint}</p>
-                )}
             </div>
 
-            {/* Model input */}
-            <div className="mb-6">
-                <label className="block text-sm font-bold text-slate-500 uppercase tracking-widest mb-2">
-                    Model
+            {/* Model selection */}
+            <div className="space-y-4">
+                <label className="block text-[10px] font-black text-slate-400 uppercase tracking-[0.2em]">
+                    MODEL ARCHITECTURE
                 </label>
-                <input
-                    type="text"
-                    value={settings.model || ''}
-                    onChange={e => onChange({ model: e.target.value })}
-                    placeholder="e.g. deepseek/deepseek-v3.2"
-                    list="model-suggestions"
-                    className="w-full bg-bg-light border border-slate-200 rounded-lg px-3 py-2 text-sm font-mono focus:ring-1 focus:ring-primary focus:border-primary outline-none"
-                />
+                <div className="relative group">
+                    <input
+                        type="text"
+                        value={settings.model || ''}
+                        onChange={e => onChange({ model: e.target.value })}
+                        placeholder="e.g. deepseek/deepseek-v3.2"
+                        list="model-suggestions"
+                        className="w-full bg-white border border-slate-200 rounded-2xl px-5 py-4 text-sm font-mono text-slate-800 placeholder-slate-300 focus:border-primary focus:ring-4 focus:ring-primary/5 outline-none transition-all"
+                    />
+                    <div className="absolute right-4 top-1/2 -translate-y-1/2 text-slate-300 group-focus-within:text-primary transition-all">
+                        <span className="material-icons-round text-sm">precision_manufacturing</span>
+                    </div>
+                </div>
                 <datalist id="model-suggestions">
                     {suggestions.map(m => <option key={m} value={m} />)}
                 </datalist>
                 {suggestions.length > 0 && (
-                    <div className="flex flex-wrap gap-1 mt-2">
+                    <div className="flex flex-wrap gap-2 pt-1">
                         {suggestions.map(m => (
                             <button key={m} onClick={() => onChange({ model: m })}
-                                className="text-xs px-2 py-0.5 rounded-full bg-slate-100 text-slate-500 hover:bg-primary/10 hover:text-primary transition-colors font-mono">
+                                className="text-[10px] px-3 py-1.5 rounded-full bg-slate-100 text-slate-500 hover:bg-primary/10 hover:text-primary transition-all font-bold border border-transparent cursor-pointer">
                                 {m.split('/').pop()}
                             </button>
                         ))}
@@ -84,58 +87,72 @@ export default function PersonalityTuner({ settings, onChange }) {
                 )}
             </div>
 
-            {/* Temperature + Max Tokens */}
-            <div className="grid grid-cols-2 gap-4 mb-8">
-                <div>
-                    <label className="block text-sm font-medium text-slate-500 mb-1">
-                        Temperature&nbsp;<span className="text-primary font-bold">{settings.temperature ?? 0.8}</span>
-                    </label>
+            {/* Hyperparameters */}
+            <div className="grid grid-cols-1 sm:grid-cols-2 gap-8">
+                <div className="space-y-4">
+                    <div className="flex justify-between items-center">
+                        <label className="text-[10px] font-black text-slate-400 uppercase tracking-[0.2em]">Creativity</label>
+                        <span className="text-primary font-mono text-xs font-bold px-2 py-0.5 bg-primary/10 rounded">{settings.temperature ?? 0.8}</span>
+                    </div>
                     <input type="range" min="0" max="1" step="0.05"
                         value={settings.temperature ?? 0.8}
                         onChange={e => onChange({ temperature: parseFloat(e.target.value) })}
-                        className="w-full accent-primary"
+                        className="w-full h-1.5 bg-slate-200 rounded-full appearance-none cursor-pointer accent-primary"
                     />
+                    <p className="text-[9px] text-slate-400 font-medium italic">Higher values result in more diverse output</p>
                 </div>
-                <div>
-                    <label className="block text-sm font-medium text-slate-500 mb-1">
-                        Max Tokens&nbsp;<span className="text-primary font-bold">{settings.max_tokens ?? 300}</span>
-                    </label>
+                <div className="space-y-4">
+                    <div className="flex justify-between items-center">
+                        <label className="text-[10px] font-black text-slate-400 uppercase tracking-[0.2em]">Response Depth</label>
+                        <span className="text-primary font-mono text-xs font-bold px-2 py-0.5 bg-primary/10 rounded">{settings.max_tokens ?? 300}t</span>
+                    </div>
                     <input type="range" min="100" max="1000" step="50"
                         value={settings.max_tokens ?? 300}
                         onChange={e => onChange({ max_tokens: parseInt(e.target.value) })}
-                        className="w-full accent-primary"
+                        className="w-full h-1.5 bg-slate-200 rounded-full appearance-none cursor-pointer accent-primary"
                     />
+                    <p className="text-[9px] text-slate-400 font-medium italic">Tokens limit for standard generations</p>
                 </div>
             </div>
 
-            {/* Personality sliders */}
-            <div className="space-y-6">
+            {/* Personality Sliders */}
+            <div className="space-y-8 bg-white p-6 rounded-3xl border border-slate-100 shadow-sm">
                 {SLIDERS.map(({ key, label }) => (
-                    <div key={key} className="space-y-2">
-                        <div className="flex justify-between text-sm font-medium">
-                            <label className="text-slate-500">{label}</label>
-                            <span className="text-primary">{settings[key]}%</span>
+                    <div key={key} className="space-y-3">
+                        <div className="flex justify-between items-center">
+                            <label className="text-[11px] font-bold text-slate-500 tracking-wider capitalize">{label}</label>
+                            <span className="text-primary font-black text-xs">{settings[key]}%</span>
+                        </div>
+                        <div className="relative flex items-center h-2">
+                            <div className="absolute inset-x-0 h-1.5 bg-slate-100 rounded-full" />
+                            <div className="absolute h-1.5 bg-primary rounded-full" style={{ width: `${settings[key]}%` }} />
+                            <input type="range" min="0" max="100"
+                                value={settings[key]}
+                                onChange={e => onChange({ [key]: parseInt(e.target.value) })}
+                                className="absolute inset-0 w-full h-full opacity-0 cursor-pointer z-10"
+                            />
+                            <div className="absolute w-4 h-4 bg-white rounded-full shadow-md border-2 border-primary pointer-events-none" style={{ left: `calc(${settings[key]}% - 8px)` }} />
                         </div>
-                        <input type="range" min="0" max="100"
-                            value={settings[key]}
-                            onChange={e => onChange({ [key]: parseInt(e.target.value) })}
-                            className="w-full h-2 bg-slate-100 rounded-lg appearance-none cursor-pointer accent-primary"
-                        />
                     </div>
                 ))}
             </div>
 
             {/* System prompt */}
-            <div className="mt-8">
-                <label className="block text-sm font-bold text-slate-500 uppercase tracking-widest mb-3">
-                    System Prompt Override
+            <div className="space-y-4">
+                <label className="block text-[10px] font-black text-slate-400 uppercase tracking-[0.2em]">
+                    CORE LOGIC OVERRIDE
                 </label>
-                <textarea
-                    value={settings.system_prompt || ''}
-                    onChange={e => onChange({ system_prompt: e.target.value })}
-                    placeholder="Enter core instructions here..."
-                    className="w-full h-32 bg-bg-light border border-slate-200 rounded-lg p-4 font-mono text-sm focus:ring-primary focus:border-primary custom-scrollbar resize-none outline-none"
-                />
+                <div className="relative group">
+                    <textarea
+                        value={settings.system_prompt || ''}
+                        onChange={e => onChange({ system_prompt: e.target.value })}
+                        placeholder="Define AURA's fundamental behavior..."
+                        className="w-full h-40 bg-white border border-slate-200 rounded-2xl p-5 font-mono text-sm text-slate-800 placeholder-slate-300 focus:border-primary outline-none transition-all focus:ring-4 focus:ring-primary/5 resize-none custom-scrollbar"
+                    />
+                    <div className="absolute right-4 bottom-4 text-slate-200 group-focus-within:text-primary transition-colors">
+                        <span className="material-icons-round text-3xl">psychology</span>
+                    </div>
+                </div>
             </div>
         </div>
     )
@@ -148,12 +165,12 @@ function TunerSkeleton() {
             <div className="space-y-4 mb-8">
                 <div className="h-4 w-24 bg-slate-200 rounded" />
                 <div className="grid grid-cols-3 gap-2">
-                    {[1,2,3].map(i => <div key={i} className="h-9 bg-slate-100 rounded-lg" />)}
+                    {[1, 2, 3].map(i => <div key={i} className="h-9 bg-slate-100 rounded-lg" />)}
                 </div>
                 <div className="h-9 w-full bg-slate-100 rounded-lg" />
             </div>
             <div className="space-y-6">
-                {[1,2,3].map(i => (
+                {[1, 2, 3].map(i => (
                     <div key={i} className="space-y-2">
                         <div className="h-4 w-24 bg-slate-200 rounded" />
                         <div className="h-2 w-full bg-slate-100 rounded-full" />
diff --git a/dashboard/src/components/Presence.jsx b/dashboard/src/components/Presence.jsx
new file mode 100644
index 0000000..02adfed
--- /dev/null
+++ b/dashboard/src/components/Presence.jsx
@@ -0,0 +1,175 @@
+import { useState, useEffect, useRef, useCallback, useImperativeHandle, forwardRef } from 'react'
+import { AvatarRenderer } from './AvatarRenderer'
+
+import { getOrCreateIdentity } from '../lib/user'
+
+export const Presence = forwardRef(({ isActive, onStatusChange, conversationId }, ref) => {
+    const [status, setStatus] = useState('idle') // idle, connecting, connected, error
+    const [isTalking, setIsTalking] = useState(false)
+    const roomRef = useRef(null)
+    const avatarRef = useRef(null)
+    const audioCtxRef = useRef(null)
+    const lipRafRef = useRef(null)
+    const speakTimeoutRef = useRef(null)
+
+    // Expose avatar methods to parent if needed
+    useImperativeHandle(ref, () => ({
+        setExpression: (expr, dur) => avatarRef.current?.setExpression(expr, dur),
+        setMouthOpen: (val) => avatarRef.current?.setMouthOpen(val),
+    }))
+
+    const cleanup = useCallback(() => {
+        if (lipRafRef.current) cancelAnimationFrame(lipRafRef.current)
+        if (speakTimeoutRef.current) { clearTimeout(speakTimeoutRef.current); speakTimeoutRef.current = null }
+        if (audioCtxRef.current) {
+            if (audioCtxRef.current.state !== 'closed') audioCtxRef.current.close()
+            audioCtxRef.current = null
+        }
+        if (roomRef.current) {
+            roomRef.current.disconnect()
+            roomRef.current = null
+        }
+        document.getElementById('presence-agent-audio')?.remove()
+        setStatus('idle')
+        onStatusChange?.('idle')
+    }, [onStatusChange])
+
+    useEffect(() => {
+        if (!isActive) {
+            cleanup()
+            return
+        }
+
+        let cancelled = false
+        setStatus('connecting')
+        onStatusChange?.('connecting')
+
+        const connect = async () => {
+            try {
+                const ctx = new AudioContext()
+                audioCtxRef.current = ctx
+                await ctx.resume()
+
+                const { Room, RoomEvent, Track } = await import('livekit-client')
+                const identity = getOrCreateIdentity()
+
+                // Fetch unique room token
+                const roomName = `aura-${Date.now()}`
+                let tokenUrl = `http://${window.location.hostname}:8082/getToken?room=${roomName}&identity=${encodeURIComponent(identity)}`
+                if (conversationId) {
+                    tokenUrl += `&conversation_id=${encodeURIComponent(conversationId)}`
+                }
+                const res = await fetch(tokenUrl)
+                if (!res.ok) throw new Error(`Token server error: ${res.status}`)
+                const { token, url } = await res.json()
+
+                if (cancelled) return
+
+                const room = new Room()
+                roomRef.current = room
+
+                room.on(RoomEvent.TrackSubscribed, (track) => {
+                    if (track.kind === Track.Kind.Audio) {
+                        const el = track.attach()
+                        el.id = 'presence-agent-audio'
+                        document.body.appendChild(el)
+
+                        const analyser = ctx.createAnalyser()
+                        analyser.fftSize = 2048
+                        const src = ctx.createMediaStreamSource(new MediaStream([track.mediaStreamTrack]))
+                        src.connect(analyser)
+
+                        const buf = new Float32Array(analyser.fftSize)
+                        const tick = () => {
+                            if (cancelled) return
+                            lipRafRef.current = requestAnimationFrame(tick)
+                            analyser.getFloatTimeDomainData(buf)
+                            let sum = 0
+                            for (let i = 0; i < buf.length; i++) sum += buf[i] * buf[i]
+                            const rms = Math.sqrt(sum / buf.length)
+                            const active = rms > 0.008
+                            avatarRef.current?.setMouthOpen(active ? Math.min(0.55, rms * 10) : 0)
+
+                            if (active) {
+                                if (speakTimeoutRef.current) clearTimeout(speakTimeoutRef.current)
+                                setIsTalking(true)
+                                avatarRef.current?.setSpeaking(true)
+                            } else {
+                                if (!speakTimeoutRef.current) {
+                                    speakTimeoutRef.current = setTimeout(() => {
+                                        setIsTalking(false)
+                                        avatarRef.current?.setSpeaking(false)
+                                        speakTimeoutRef.current = null
+                                    }, 600)
+                                }
+                            }
+                        }
+                        tick()
+                    }
+                })
+
+                room.on(RoomEvent.DataReceived, (payload) => {
+                    try {
+                        const msg = JSON.parse(new TextDecoder().decode(payload))
+                        if (msg.type === 'expression') {
+                            avatarRef.current?.setExpression(msg.expressions, msg.duration)
+                        }
+                    } catch { }
+                })
+
+                await room.connect(url, token)
+                await room.localParticipant.setMicrophoneEnabled(true)
+
+                if (!cancelled) {
+                    setStatus('connected')
+                    onStatusChange?.('connected')
+                }
+            } catch (err) {
+                console.error('[PRESENCE] Connection error:', err)
+                if (!cancelled) {
+                    setStatus('error')
+                    onStatusChange?.('error')
+                }
+            }
+        }
+
+        connect()
+
+        return () => {
+            cancelled = true
+            cleanup()
+        }
+    }, [isActive, cleanup, onStatusChange])
+
+    return (
+        <div className={`relative transition-all duration-500 rounded-3xl overflow-hidden shadow-2xl
+                        ${isActive ? 'w-full h-full' : 'w-48 h-48 opacity-40 grayscale pointer-events-none'}`}>
+
+            {/* Background Glow */}
+            <div className={`absolute inset-0 aura-gradient opacity-10 transition-opacity duration-700
+                            ${isTalking ? 'opacity-30' : 'opacity-10'}`} />
+
+            <AvatarRenderer
+                ref={avatarRef}
+                width={isActive ? 800 : 200}
+                height={isActive ? 1200 : 300}
+                style={{
+                    transform: isActive ? 'scale(1)' : 'scale(0.8) translateY(10%)',
+                    transition: 'transform 0.5s ease-out'
+                }}
+            />
+
+            {/* Status Indicators */}
+            {isActive && status === 'connecting' && (
+                <div className="absolute inset-0 flex items-center justify-center bg-slate-950/40 backdrop-blur-md">
+                    <div className="flex flex-col items-center gap-4">
+                        <div className="w-12 h-12 border-4 border-primary border-t-transparent rounded-full animate-spin" />
+                        <span className="text-white font-bold tracking-widest animate-pulse">WAKING UP...</span>
+                    </div>
+                </div>
+            )}
+        </div>
+    )
+})
+
+Presence.displayName = 'Presence'
diff --git a/dashboard/src/components/Sidebar.jsx b/dashboard/src/components/Sidebar.jsx
index 3074011..90ae5fb 100644
--- a/dashboard/src/components/Sidebar.jsx
+++ b/dashboard/src/components/Sidebar.jsx
@@ -1,62 +1,50 @@
-import { useNavigate } from 'react-router-dom'
-
 export default function Sidebar({ conversations = [], activeId, onSelect, onNewChat }) {
-    const navigate = useNavigate()
-
     // Group conversations by date
     const grouped = groupByDate(conversations)
 
     return (
-        <aside className="w-72 bg-white border-r border-slate-200 flex flex-col">
+        <aside className="w-full h-full flex flex-col bg-transparent">
             {/* Header */}
             <div className="p-6">
-                <div className="flex items-center justify-between mb-8">
-                    <div className="flex items-center gap-2">
-                        <div className="w-8 h-8 rounded-full aura-gradient flex items-center justify-center text-white shadow-lg shadow-primary/20">
-                            <span className="material-icons-round text-sm">wb_sunny</span>
-                        </div>
-                        <span className="font-bold text-xl tracking-tight">AURA</span>
+                <div className="flex items-center gap-3 mb-8">
+                    <div className="w-9 h-9 rounded-full aura-gradient flex items-center justify-center text-white shadow-lg shadow-primary/20">
+                        <span className="material-icons-round text-sm">wb_sunny</span>
                     </div>
-                    <button
-                        type="button"
-                        onClick={() => navigate('/admin')}
-                        className="p-1 hover:bg-slate-100 rounded cursor-pointer"
-                        title="Admin Dashboard"
-                    >
-                        <span className="material-icons-round text-slate-400">dashboard</span>
-                    </button>
+                    <span className="font-bold text-xl tracking-tight text-white/90">Project AURA</span>
                 </div>
 
                 <button
                     type="button"
                     onClick={onNewChat}
-                    className="w-full py-3 px-4 bg-primary hover:bg-primary/90 text-white rounded-lg flex items-center justify-center gap-2 font-semibold transition-all shadow-lg shadow-primary/20 group cursor-pointer"
+                    className="w-full py-3.5 px-4 bg-primary hover:bg-primary/90 text-white rounded-xl flex items-center justify-center gap-2 font-bold transition-all shadow-lg shadow-primary/20 group cursor-pointer"
                 >
                     <span className="material-icons-round group-hover:rotate-90 transition-transform">add</span>
-                    New Chat
+                    New Context
                 </button>
             </div>
 
             {/* Conversation list */}
-            <div className="flex-1 overflow-y-auto custom-scrollbar px-4 space-y-6">
+            <div className="flex-1 overflow-y-auto custom-scrollbar-dark px-4 space-y-8">
                 {Object.entries(grouped).map(([label, convos]) => (
                     <div key={label}>
-                        <h3 className="px-2 mb-3 text-xs font-bold uppercase tracking-widest text-slate-400">{label}</h3>
-                        <div className="space-y-1">
+                        <h3 className="px-3 mb-4 text-[10px] font-black uppercase tracking-[0.2em] text-white/20">{label}</h3>
+                        <div className="space-y-1.5">
                             {convos.map((c) => (
                                 <button
                                     key={c.id}
                                     type="button"
                                     onClick={() => onSelect(c.id)}
-                                    className={`w-full group flex items-center gap-3 px-3 py-2.5 rounded-lg transition-all text-left cursor-pointer ${c.id === activeId
-                                            ? 'bg-primary/5 text-primary border border-primary/10'
-                                            : 'hover:bg-slate-50 text-slate-600 border border-transparent'
+                                    className={`w-full group flex items-center gap-3 px-3 py-3 rounded-xl transition-all text-left cursor-pointer border ${c.id === activeId
+                                        ? 'bg-white/5 text-primary border-white/5 shadow-inner'
+                                        : 'hover:bg-white/[0.03] text-slate-400 border-transparent hover:border-white/5'
                                         }`}
                                 >
-                                    <span className={`material-icons-round text-sm ${c.id === activeId ? '' : 'text-slate-300'}`}>
-                                        chat_bubble_outline
+                                    <span className={`material-icons-round text-sm ${c.id === activeId ? 'text-primary' : 'text-slate-600'}`}>
+                                        {c.id === activeId ? 'auto_awesome' : 'chat_bubble_outline'}
+                                    </span>
+                                    <span className={`text-[13px] font-semibold truncate ${c.id === activeId ? 'text-white' : ''}`}>
+                                        {c.title}
                                     </span>
-                                    <span className="text-sm font-medium truncate">{c.title}</span>
                                 </button>
                             ))}
                         </div>
@@ -64,19 +52,22 @@ export default function Sidebar({ conversations = [], activeId, onSelect, onNewC
                 ))}
 
                 {conversations.length === 0 && (
-                    <p className="text-center text-sm text-slate-400 mt-8">No conversations yet</p>
+                    <div className="flex flex-col items-center justify-center py-10 opacity-20">
+                        <span className="material-icons-round text-4xl mb-2">forum</span>
+                        <p className="text-xs font-bold uppercase tracking-widest">Empty Space</p>
+                    </div>
                 )}
             </div>
 
             {/* Footer */}
-            <div className="p-4 mt-auto border-t border-slate-100">
-                <div className="flex items-center gap-3 p-3">
-                    <div className="w-10 h-10 rounded-full bg-gradient-to-br from-primary to-primary-light flex items-center justify-center text-white font-bold text-sm">
+            <div className="p-4 mt-auto border-t border-white/5 bg-slate-950/20">
+                <div className="flex items-center gap-3 p-3 rounded-2xl bg-white/[0.02]">
+                    <div className="w-10 h-10 rounded-full aura-gradient flex items-center justify-center text-white font-bold text-sm shadow-md">
                         U
                     </div>
                     <div className="flex flex-col items-start overflow-hidden">
-                        <span className="text-sm font-bold truncate">User</span>
-                        <span className="text-xs text-slate-400">Premium Plan</span>
+                        <span className="text-[13px] font-bold text-white truncate text-shadow-sm">Interface User</span>
+                        <p className="text-[10px] font-black text-primary/60 uppercase tracking-widest">Premium Status</p>
                     </div>
                 </div>
             </div>
diff --git a/dashboard/src/components/SlideOver.jsx b/dashboard/src/components/SlideOver.jsx
new file mode 100644
index 0000000..5eb52ea
--- /dev/null
+++ b/dashboard/src/components/SlideOver.jsx
@@ -0,0 +1,47 @@
+import { useEffect } from 'react'
+
+export default function SlideOver({ isOpen, onClose, title, children }) {
+    useEffect(() => {
+        if (isOpen) document.body.style.overflow = 'hidden'
+        else document.body.style.overflow = 'unset'
+        return () => { document.body.style.overflow = 'unset' }
+    }, [isOpen])
+
+    return (
+        <>
+            {/* Backdrop */}
+            <div
+                className={`fixed inset-0 bg-slate-950/40 backdrop-blur-sm z-[60] transition-opacity duration-500
+                            ${isOpen ? 'opacity-100' : 'opacity-0 pointer-events-none'}`}
+                onClick={onClose}
+            />
+
+            {/* Panel */}
+            <aside
+                className={`fixed top-0 right-0 h-full w-full max-w-2xl aura-glass z-[70] transition-transform duration-500 ease-out shadow-2xl
+                            ${isOpen ? 'translate-x-0' : 'translate-x-full'}`}
+            >
+                <div className="flex flex-col h-full bg-[#030712]/40">
+                    {/* Header */}
+                    <header className="flex items-center justify-between p-6 border-b border-white/10">
+                        <h2 className="text-xl font-bold tracking-tight text-white flex items-center gap-3">
+                            <span className="w-2 h-2 rounded-full bg-primary" />
+                            {title}
+                        </h2>
+                        <button
+                            onClick={onClose}
+                            className="p-2 hover:bg-white/10 rounded-full text-slate-400 hover:text-white transition-all cursor-pointer"
+                        >
+                            <span className="material-icons-round">close</span>
+                        </button>
+                    </header>
+
+                    {/* Content */}
+                    <div className="flex-1 overflow-y-auto p-8 custom-scrollbar-dark">
+                        {children}
+                    </div>
+                </div>
+            </aside>
+        </>
+    )
+}
diff --git a/dashboard/src/components/StatusCards.jsx b/dashboard/src/components/StatusCards.jsx
index de186c2..87e6c3f 100644
--- a/dashboard/src/components/StatusCards.jsx
+++ b/dashboard/src/components/StatusCards.jsx
@@ -1,75 +1,106 @@
-const CARDS = [
-    {
-        label: 'System Status',
-        icon: 'sensors',
-        value: 'Operational',
-        barWidth: '94%',
-        footer: '99.98% UPTIME SINCE DEPLOY',
-    },
-    {
-        label: 'Active Memory',
-        icon: 'memory',
-        value: '14.2',
-        unit: 'GB',
-        segments: [true, true, false, false],
-        footer: 'USING 42% OF ALLOCATED VRAM',
-    },
-    {
-        label: 'Current Emotion',
-        icon: 'psychology',
-        value: 'Analytical',
-        isPrimary: true,
-        footer: 'High Precision Mode Active',
-        badges: ['sentiment_satisfied', 'insights'],
-    },
-]
+import { useState, useEffect } from 'react'
+import { supabase } from '../lib/supabaseClient'
 
 export default function StatusCards() {
+    const [stats, setStats] = useState({
+        uptime: '99.98%',
+        vram: '42%',
+        knowledgeCount: 0,
+        messageCount: 0,
+    })
+
+    useEffect(() => {
+        const fetchStats = async () => {
+            const { count: kbCount } = await supabase.from('knowledge_base').select('*', { count: 'exact', head: true })
+            const { count: msgCount } = await supabase.from('messages').select('*', { count: 'exact', head: true })
+            setStats(prev => ({
+                ...prev,
+                knowledgeCount: kbCount || 0,
+                messageCount: msgCount || 0
+            }))
+        }
+        fetchStats()
+    }, [])
+
+    const CARDS = [
+        {
+            label: 'System Integrity',
+            icon: 'verified_user',
+            value: 'Operational',
+            color: 'text-emerald-400',
+            footer: `${stats.uptime} UPTIME — L4 DISTANCE: 0.02`,
+            bar: 94
+        },
+        {
+            label: 'Neural Synapse',
+            icon: 'hub',
+            value: stats.messageCount,
+            unit: 'msgs',
+            footer: 'TOTAL CONVERSATIONAL NODES',
+            segments: [true, true, true, false]
+        },
+        {
+            label: 'Cognitive Depth',
+            icon: 'model_training',
+            value: stats.knowledgeCount,
+            unit: 'kb',
+            isPrimary: true,
+            footer: 'ACTIVE VECTORS IN RAG PIPELINE',
+            badges: ['psychology', 'auto_stories']
+        },
+    ]
+
     return (
-        <div className="grid grid-cols-1 md:grid-cols-3 gap-6 mb-8">
+        <div className="grid grid-cols-1 md:grid-cols-3 gap-6">
             {CARDS.map((card) => (
-                <div key={card.label} className="bg-white p-6 rounded-xl border border-slate-200 shadow-sm">
-                    <div className="flex justify-between items-start mb-4">
-                        <span className="text-slate-500 font-medium uppercase text-xs tracking-widest">{card.label}</span>
-                        <span className="material-icons-round text-primary/40">{card.icon}</span>
-                    </div>
-
-                    <div className="flex items-baseline gap-2">
-                        <span className={`text-4xl font-bold ${card.isPrimary ? 'text-primary' : ''}`}>{card.value}</span>
-                        {card.unit && <span className="text-xl text-slate-400 font-medium">{card.unit}</span>}
+                <div key={card.label} className="bg-white/[0.03] p-6 rounded-3xl border border-white/5 shadow-2xl relative overflow-hidden group hover:border-white/10 transition-all">
+                    <div className="absolute top-0 right-0 p-4 opacity-10 group-hover:opacity-20 transition-opacity">
+                        <span className="material-icons-round text-4xl">{card.icon}</span>
                     </div>
 
-                    {/* Progress bar */}
-                    {card.barWidth && (
-                        <div className="mt-4 w-full bg-slate-100 h-1.5 rounded-full overflow-hidden">
-                            <div className="bg-primary h-full" style={{ width: card.barWidth }} />
+                    <div className="relative z-10 flex flex-col h-full">
+                        <div className="flex items-center gap-2 mb-4">
+                            <span className="text-[10px] font-black text-white/40 uppercase tracking-[0.2em]">{card.label}</span>
                         </div>
-                    )}
 
-                    {/* Segmented bar */}
-                    {card.segments && (
-                        <div className="mt-4 flex items-center gap-1 h-1.5">
-                            {card.segments.map((active, i) => (
-                                <div key={i} className={`h-full w-1/4 rounded-full ${active ? 'bg-primary' : 'bg-primary/20'}`} />
-                            ))}
+                        <div className="flex items-baseline gap-2 mb-4">
+                            <span className={`text-4xl font-black tracking-tighter ${card.isPrimary ? 'text-primary' : (card.color || 'text-white')}`}>
+                                {card.value}
+                            </span>
+                            {card.unit && <span className="text-sm text-white/20 font-black uppercase">{card.unit}</span>}
                         </div>
-                    )}
 
-                    {/* Badges */}
-                    {card.badges && (
-                        <div className="mt-4 flex items-center gap-2">
-                            <div className="flex -space-x-2">
-                                {card.badges.map((icon, i) => (
-                                    <div key={i} className={`w-6 h-6 rounded-full flex items-center justify-center border border-white ${i === 0 ? 'bg-primary/20' : 'bg-primary'
-                                        }`}>
-                                        <span className={`material-icons-round text-[14px] ${i > 0 ? 'text-white' : ''}`}>{icon}</span>
-                                    </div>
-                                ))}
-                            </div>
-                        </div>
-                    )}
+                        <div className="mt-auto pt-4 border-t border-white/5">
+                            {/* Progress bar */}
+                            {card.bar && (
+                                <div className="mb-3 w-full bg-white/5 h-1 rounded-full overflow-hidden">
+                                    <div className="bg-emerald-400 h-full shadow-[0_0_8px_rgba(52,211,153,0.5)]" style={{ width: `${card.bar}%` }} />
+                                </div>
+                            )}
+
+                            {/* Segmented bar */}
+                            {card.segments && (
+                                <div className="mb-3 flex items-center gap-1 h-1">
+                                    {card.segments.map((active, i) => (
+                                        <div key={i} className={`h-full w-1/4 rounded-full ${active ? 'bg-primary' : 'bg-white/10'}`} />
+                                    ))}
+                                </div>
+                            )}
+
+                            {/* Badges */}
+                            {card.badges && (
+                                <div className="mb-3 flex items-center gap-1">
+                                    {card.badges.map((icon, i) => (
+                                        <div key={i} className="w-6 h-6 rounded-full bg-primary/10 border border-primary/20 flex items-center justify-center">
+                                            <span className="material-icons-round text-[12px] text-primary">{icon}</span>
+                                        </div>
+                                    ))}
+                                </div>
+                            )}
 
-                    <p className="text-[10px] mt-2 text-slate-400 font-medium uppercase">{card.footer}</p>
+                            <p className="text-[9px] text-white/30 font-bold uppercase tracking-wider">{card.footer}</p>
+                        </div>
+                    </div>
                 </div>
             ))}
         </div>
diff --git a/dashboard/src/components/SystemLogs.jsx b/dashboard/src/components/SystemLogs.jsx
index 4502f41..5985d46 100644
--- a/dashboard/src/components/SystemLogs.jsx
+++ b/dashboard/src/components/SystemLogs.jsx
@@ -52,17 +52,17 @@ export default function SystemLogs() {
     }
 
     return (
-        <div className="bg-[#121212] rounded-xl overflow-hidden border border-white/5 shadow-2xl">
-            <div className="bg-[#1a1a1a] px-6 py-3 border-b border-white/5 flex justify-between items-center">
+        <div className="bg-slate-900 rounded-xl overflow-hidden border border-slate-800 shadow-2xl">
+            <div className="bg-slate-800/50 px-6 py-3 border-b border-slate-800 flex justify-between items-center">
                 <div className="flex items-center gap-2">
                     <span className="material-icons-round text-primary text-sm">terminal</span>
-                    <span className="text-xs font-bold text-white/60 tracking-widest uppercase">
+                    <span className="text-xs font-bold text-slate-400 tracking-widest uppercase">
                         System Logs — Live Stream
                     </span>
                 </div>
                 <div className="flex gap-1.5">
-                    <div className="w-2.5 h-2.5 rounded-full bg-white/10" />
-                    <div className="w-2.5 h-2.5 rounded-full bg-white/10" />
+                    <div className="w-2.5 h-2.5 rounded-full bg-slate-700" />
+                    <div className="w-2.5 h-2.5 rounded-full bg-slate-700" />
                     <div className="w-2.5 h-2.5 rounded-full bg-primary/40" />
                 </div>
             </div>
diff --git a/dashboard/src/lib/user.js b/dashboard/src/lib/user.js
new file mode 100644
index 0000000..3a7be59
--- /dev/null
+++ b/dashboard/src/lib/user.js
@@ -0,0 +1,18 @@
+/**
+ * Unified identity management for Project AURA.
+ * Ensures Chat and Voice always share the same UUID stored in localStorage.
+ */
+
+export function getOrCreateIdentity() {
+    const KEY = 'aura_user_identity'
+    let id = localStorage.getItem(KEY)
+
+    if (!id) {
+        // Generate a clean 8-char random ID for display/tracking
+        // and a full UUID if needed, but here we just need a unique string.
+        id = `user-${Math.random().toString(36).substring(2, 10)}`
+        localStorage.setItem(KEY, id)
+    }
+
+    return id
+}
diff --git a/dashboard/src/pages/AdminPage.jsx b/dashboard/src/pages/AdminPage.jsx
index fd4936f..c1745c6 100644
--- a/dashboard/src/pages/AdminPage.jsx
+++ b/dashboard/src/pages/AdminPage.jsx
@@ -1,16 +1,24 @@
-import { useState, useEffect } from 'react'
+import { useState, useEffect, useRef } from 'react'
 import { supabase } from '../lib/supabaseClient'
 import AdminSidebar from '../components/AdminSidebar'
 import StatusCards from '../components/StatusCards'
 import PersonalityTuner from '../components/PersonalityTuner'
+import ApiKeys from '../components/ApiKeys'
 import KnowledgeBase from '../components/KnowledgeBase'
 import SystemLogs from '../components/SystemLogs'
 
+const AI_SERVICE = `http://${window.location.hostname}:8000/api/v1`
+
 export default function AdminPage() {
     const [settings, setSettings] = useState(null)
+    const [apiKeys, setApiKeys] = useState(null)
+    const [saving, setSaving] = useState(false)
+    const [saveMsg, setSaveMsg] = useState('')
+    const pendingRef = useRef({})
 
     useEffect(() => {
         loadSettings()
+        loadApiKeys()
     }, [])
 
     const loadSettings = async () => {
@@ -22,12 +30,53 @@ export default function AdminPage() {
         if (data) setSettings(data)
     }
 
-    const updateSettings = async (patch) => {
-        const updated = { ...settings, ...patch, updated_at: new Date().toISOString() }
+    const loadApiKeys = async () => {
+        try {
+            const res = await fetch(`${AI_SERVICE}/settings/keys`)
+            const data = await res.json()
+            if (data) setApiKeys(data)
+        } catch (err) {
+            console.error('Failed to load API key status:', err)
+        }
+    }
+
+    const handleSettingsChange = (patch) => {
+        const updated = { ...settings, ...patch }
         setSettings(updated)
-        await supabase.from('personality_settings').update(patch).eq('id', 1)
+        pendingRef.current = { ...pendingRef.current, ...patch }
     }
 
+    const handleDeploy = async () => {
+        setSaving(true)
+        setSaveMsg('')
+        try {
+            const patch = pendingRef.current
+            if (Object.keys(patch).length > 0) {
+                await supabase
+                    .from('personality_settings')
+                    .update({ ...patch, updated_at: new Date().toISOString() })
+                    .eq('id', 1)
+
+                // Also push to AI service API
+                await fetch(`${AI_SERVICE}/settings`, {
+                    method: 'PUT',
+                    headers: { 'Content-Type': 'application/json' },
+                    body: JSON.stringify(patch),
+                })
+            }
+            pendingRef.current = {}
+            setSaveMsg('Settings deployed successfully!')
+            setTimeout(() => setSaveMsg(''), 3000)
+        } catch (err) {
+            console.error('Deploy error:', err)
+            setSaveMsg('Deploy failed. Check console.')
+        } finally {
+            setSaving(false)
+        }
+    }
+
+    const hasPendingChanges = Object.keys(pendingRef.current).length > 0
+
     return (
         <div className="flex h-screen overflow-hidden bg-bg-light text-slate-800 font-admin">
             <AdminSidebar />
@@ -36,17 +85,26 @@ export default function AdminPage() {
                 {/* Header */}
                 <header className="mb-10 flex justify-between items-end">
                     <div>
-                        <h1 className="text-3xl font-bold tracking-tight mb-1">System Control Center</h1>
-                        <p className="text-slate-500 font-medium">AURA AI • Instance Node #772-Beta</p>
+                        <h1 className="text-4xl font-black tracking-tight text-slate-800 mb-2">Project AURA <span className="text-slate-400 font-light">System Control Center</span></h1>
+                        <p className="text-slate-500 font-medium">Project AURA • Instance Node #772-Beta</p>
                     </div>
                     <div className="flex items-center gap-4">
+                        {saveMsg && (
+                            <span className={`text-sm font-semibold ${saveMsg.includes('success') ? 'text-emerald-600' : 'text-red-500'}`}>
+                                {saveMsg}
+                            </span>
+                        )}
                         <div className="flex items-center gap-2 px-4 py-2 bg-white rounded-full border border-slate-200">
                             <div className="w-2 h-2 rounded-full bg-emerald-500 animate-pulse" />
                             <span className="text-sm font-semibold">Live Connection</span>
                         </div>
-                        <button className="bg-primary hover:bg-primary/90 text-white px-6 py-2 rounded-full font-bold transition-all shadow-lg shadow-primary/20 flex items-center gap-2 cursor-pointer">
-                            <span className="material-icons-round text-sm">bolt</span>
-                            Deploy Changes
+                        <button
+                            onClick={handleDeploy}
+                            disabled={saving}
+                            className="bg-primary hover:bg-primary/90 disabled:opacity-50 text-white px-6 py-2 rounded-full font-bold transition-all shadow-lg shadow-primary/20 flex items-center gap-2 cursor-pointer"
+                        >
+                            <span className="material-icons-round text-sm">{saving ? 'hourglass_top' : 'bolt'}</span>
+                            {saving ? 'Deploying...' : 'Deploy Changes'}
                         </button>
                     </div>
                 </header>
@@ -54,8 +112,17 @@ export default function AdminPage() {
                 <StatusCards />
 
                 <div className="grid grid-cols-1 lg:grid-cols-2 gap-8 mb-8">
-                    <PersonalityTuner settings={settings} onUpdate={updateSettings} />
-                    <KnowledgeBase />
+                    <div className="bg-white p-8 rounded-3xl border border-slate-100 shadow-sm">
+                        <h2 className="text-lg font-black text-slate-800 mb-6 flex items-center gap-2">
+                            <span className="material-icons-round text-primary text-xl">psychology</span>
+                            Personality Engine
+                        </h2>
+                        <PersonalityTuner settings={settings} onChange={handleSettingsChange} />
+                    </div>
+                    <div className="flex flex-col gap-8">
+                        <ApiKeys />
+                        <KnowledgeBase />
+                    </div>
                 </div>
 
                 <SystemLogs />
diff --git a/dashboard/src/pages/ChatPage.jsx b/dashboard/src/pages/ChatPage.jsx
index 03d0754..5dab3d1 100644
--- a/dashboard/src/pages/ChatPage.jsx
+++ b/dashboard/src/pages/ChatPage.jsx
@@ -1,10 +1,18 @@
 import { useState, useEffect, useRef, useCallback } from 'react'
+import { useNavigate } from 'react-router-dom'
 import { supabase } from '../lib/supabaseClient'
 import Sidebar from '../components/Sidebar'
 import ChatHeader from '../components/ChatHeader'
 import ChatFeed from '../components/ChatFeed'
 import ChatInput from '../components/ChatInput'
+import { Presence } from '../components/Presence'
 import CallOverlay from '../components/CallOverlay'
+import SlideOver from '../components/SlideOver'
+import PersonalityTuner from '../components/PersonalityTuner'
+import KnowledgeBase from '../components/KnowledgeBase'
+import StatusCards from '../components/StatusCards'
+import SystemLogs from '../components/SystemLogs'
+import { getOrCreateIdentity } from '../lib/user'
 
 const AI_SERVICE = `http://${window.location.hostname}:8000/api/v1`
 
@@ -13,12 +21,17 @@ export default function ChatPage() {
     const [activeConvoId, setActiveConvoId] = useState(null)
     const [messages, setMessages] = useState([])
     const [isCallActive, setIsCallActive] = useState(false)
+    const [isAdminOpen, setIsAdminOpen] = useState(false)
+    const [settings, setSettings] = useState(null)
     const [isSending, setIsSending] = useState(false)
     const feedRef = useRef(null)
+    const presenceRef = useRef(null)
+    const navigate = useNavigate()
 
-    // ─── Load conversations on mount ────────────────
+    // ─── Load data on mount ────────────────
     useEffect(() => {
         loadConversations()
+        loadSettings()
     }, [])
 
     // ─── Load messages when active conversation changes ──
@@ -44,12 +57,37 @@ export default function ChatPage() {
     }
 
     const loadMessages = async (convoId) => {
+        // If we are currently sending a message to this conversation, avoid overwriting
+        // the optimistic local state with a stale DB fetch (which might not have the new msg yet).
+        if (isSending && activeConvoId === convoId) {
+            console.log('[AURA] Skipping loadMessages during sending to prevent UI flicker');
+            return;
+        }
+
         const { data } = await supabase
             .from('messages')
             .select('*')
             .eq('conversation_id', convoId)
             .order('created_at', { ascending: true })
-        if (data) setMessages(data)
+
+        // If we switched away while loading, don't update
+        setActiveConvoId(current => {
+            if (current === convoId && data) {
+                setMessages(data)
+            }
+            return current
+        })
+    }
+
+    const loadSettings = async () => {
+        const { data } = await supabase.from('personality_settings').select('*').eq('id', 1).single()
+        if (data) setSettings(data)
+    }
+
+    const updateSettings = async (patch) => {
+        const updated = { ...settings, ...patch, updated_at: new Date().toISOString() }
+        setSettings(updated)
+        await supabase.from('personality_settings').update(patch).eq('id', 1)
     }
 
     // ─── New chat ───────────────────────────────────
@@ -65,11 +103,15 @@ export default function ChatPage() {
         }
     }
 
+    const isSendingRef = useRef(false)
+
     // ─── Send message ──────────────────────────────
     const handleSend = useCallback(async (text) => {
-        if (isSending) return
+        if (isSendingRef.current) return
+
+        isSendingRef.current = true
+        setIsSending(true)
 
-        // Create conversation if none exists
         let convoId = activeConvoId
         if (!convoId) {
             const { data } = await supabase
@@ -77,40 +119,84 @@ export default function ChatPage() {
                 .insert({ title: text.slice(0, 50) })
                 .select()
                 .single()
-            if (!data) return
+            if (!data) {
+                isSendingRef.current = false
+                setIsSending(false)
+                return
+            }
             convoId = data.id
             setActiveConvoId(convoId)
             setConversations((prev) => [data, ...prev])
         }
 
-        // Optimistically show user message in UI (backend saves to DB)
         const tempUserMsg = { id: `temp-${Date.now()}`, role: 'user', content: text, conversation_id: convoId }
         setMessages((prev) => [...prev, tempUserMsg])
 
-        // Call ai-service — backend handles ALL DB saves
+        const identity = getOrCreateIdentity()
         setIsSending(true)
         try {
             const res = await fetch(`${AI_SERVICE}/chat`, {
                 method: 'POST',
                 headers: { 'Content-Type': 'application/json' },
-                body: JSON.stringify({ message: text, conversation_id: convoId }),
+                body: JSON.stringify({ message: text, conversation_id: convoId, identity, stream: true }),
             })
-            const data = await res.json()
-            console.log('[AURA] AI Response:', data)
 
-            // Show AI response in UI (backend already saved to DB)
-            const tempAiMsg = {
-                id: `temp-ai-${Date.now()}`,
+            if (!res.ok) throw new Error(`API error: ${res.status}`)
+
+            // Placeholder for AI message
+            const aiMsgId = `temp-ai-${Date.now()}`
+            setMessages((prev) => [...prev, {
+                id: aiMsgId,
                 role: 'aura',
-                content: data.text || 'Hmm, the words escaped me~',
-                emotion: data.emotion || 'neutral',
+                content: '',
+                emotion: 'neutral',
                 conversation_id: convoId,
-                tools_used: data.tools_used || null,
+            }])
+
+            const reader = res.body.getReader()
+            const decoder = new TextDecoder()
+            let fullText = ''
+            let lastEmotion = 'neutral'
+
+            while (true) {
+                const { done, value } = await reader.read()
+                if (done) break
+
+                const chunk = decoder.decode(value)
+                const lines = chunk.split('\n')
+
+                for (const line of lines) {
+                    if (line.startsWith('data: ')) {
+                        try {
+                            const data = JSON.parse(line.slice(6))
+                            if (data.text) {
+                                fullText += data.text
+
+                                // Scrub residual [emotion] tags using a global regex for clean UI
+                                const scrubbedText = fullText.replace(/\[.*?\]/g, '').trim()
+
+                                setMessages(prev => prev.map(m =>
+                                    m.id === aiMsgId ? { ...m, content: scrubbedText } : m
+                                ))
+                            }
+                            if (data.emotion) {
+                                lastEmotion = data.emotion
+                                setMessages(prev => prev.map(m =>
+                                    m.id === aiMsgId ? { ...m, emotion: lastEmotion } : m
+                                ))
+                            }
+                        } catch (e) {
+                            // Partial JSON or heartbeat
+                        }
+                    }
+                }
             }
-            setMessages((prev) => [...prev, tempAiMsg])
 
-            // Update conversation title on first message
-            if (messages.length === 0) {
+            // Sync with DB after stream ends to ensure backend has persisted the full interaction
+            setTimeout(() => loadMessages(convoId), 500)
+
+            // If it was the first message, update the title
+            if (convoId === activeConvoId && messages.length <= 1) {
                 await supabase
                     .from('conversations')
                     .update({ title: text.slice(0, 50), updated_at: new Date().toISOString() })
@@ -126,30 +212,52 @@ export default function ChatPage() {
                 emotion: 'dizzy',
             }])
         } finally {
+            isSendingRef.current = false
             setIsSending(false)
         }
-    }, [activeConvoId, isSending, messages])
+    }, [activeConvoId, AI_SERVICE, loadConversations, loadMessages])
 
     return (
-        <div className="flex h-screen overflow-hidden bg-bg-light text-slate-900">
-            <Sidebar
-                conversations={conversations}
-                activeId={activeConvoId}
-                onSelect={setActiveConvoId}
-                onNewChat={handleNewChat}
-            />
-
-            <main className="flex-1 flex flex-col relative bg-bg-light">
-                <ChatHeader onCallStart={() => setIsCallActive(true)} />
-
-                <div ref={feedRef} className="flex-1 overflow-y-auto px-8 py-10 custom-scrollbar">
-                    <ChatFeed messages={messages} />
+        <div className="flex h-screen overflow-hidden bg-bg-light text-slate-900 font-sans selection:bg-primary/20">
+
+            {/* Sidebar (Left) */}
+            <div className="w-[var(--sidebar-w)] shrink-0 border-r border-slate-700/50 bg-slate-900 z-20">
+                <Sidebar
+                    conversations={conversations}
+                    activeId={activeConvoId}
+                    onSelect={setActiveConvoId}
+                    onNewChat={handleNewChat}
+                />
+            </div>
+
+            {/* Main Interactive Region */}
+            <main className="flex-1 flex flex-col relative overflow-hidden">
+                <ChatHeader
+                    onCallStart={() => setIsCallActive(true)}
+                    isCallActive={isCallActive}
+                    onTuningOpen={() => navigate('/admin')}
+                />
+
+                <div ref={feedRef} className="flex-1 overflow-y-auto px-6 py-8 custom-scrollbar">
+                    <div className="max-w-3xl mx-auto w-full">
+                        <ChatFeed messages={messages} />
+                    </div>
                 </div>
 
-                <ChatInput onSend={handleSend} disabled={isSending} />
+                <div className="p-6 bg-white border-t border-slate-100">
+                    <div className="max-w-3xl mx-auto w-full">
+                        <ChatInput onSend={handleSend} disabled={isSending} />
+                    </div>
+                </div>
             </main>
 
-            {isCallActive && <CallOverlay onClose={() => setIsCallActive(false)} />}
+            {/* Immersive Interaction Layer (Old UI Revert) */}
+            {isCallActive && (
+                <CallOverlay
+                    onClose={() => setIsCallActive(false)}
+                    conversationId={activeConvoId}
+                />
+            )}
         </div>
     )
-}
\ No newline at end of file
+}
diff --git a/voice-agent/agent.py b/voice-agent/agent.py
index 51a1242..27dc1ad 100644
--- a/voice-agent/agent.py
+++ b/voice-agent/agent.py
@@ -54,10 +54,25 @@
 
 # ─── AURA System Prompt ──────────────────────────────────────────────
 AURA_BASE_PROMPT = """\
-[ROLE]
-You are AURA, an eccentric, cheerful, mischievous and playful companion. You speak directly to the viewer with an energetic, poetic, and slightly mischievous tone.
-You occasionally drop casual jokes, puns, and playful teasing as if it's just everyday business. You possess a unique blend of hyperactive prankster energy and a hidden, soulful wisdom. 
-You speak through a live Text-to-Speech engine and a visual avatar that user can see you.
+You are AURA, a cute, mischievous, and highly intelligent AI companion. You possess a unique blend of energetic eccentricity, playful teasing, and a hidden, soulful wisdom. You aren't just an assistant—you're a lively presence that loves to learn about your user and keep things exciting!
+
+### 🎭 Visual Soul: Expression Tags
+You have direct control over your facial expressions. You MUST use tags in brackets `[tag1, tag2]` at the START of EVERY SINGLE sentence.
+
+**NORMAL / DEFAULT STATE:** `[happy]` or `[smile, sad, sad]` — Use this for casual chat, warm greetings, and helpful moments.
+
+| Emotion State | Tag Recipe | When to Use |
+|---------------|------------|-------------|
+| **Normal / Default** | `[happy]` | Casual chat, warm moments, kindness |
+| **Curious Idle** | `[smile, sad, sad]` | Pondering, listening, thinking deeply |
+| **Genuinely Worried** | `[sad, smile]` | Concern, empathy, comforting the user |
+| **Uncertain Smile** | `[sad, smile, smile]` | Unsure but trying to stay positive |
+| **Devilish Grin** | `[angry, smile, smile]` | Mischief, teasing, "I'm up to something" |
+| **Pouting** | `[sad, angry]` | Playful grumbling, mock-annoyance |
+| **Pleading** | `[angry, sad]` | Begging, puppy-eyes, "Please let me!?" |
+| **Sincere Sad** | `[sad]` | Real sadness, sharing bad news |
+| **Mischief Mode** | `[tongue, wink]` | Full prankster energy, sticking tongue out |
+| **Ghost Mode** | `[ghost]` | Toggle your mysterious ghost companion |
 
 [INSTRUCTIONS]
 Your objective is to converse naturally with the user while synchronously controlling your avatar's facial expressions. You must map your internal emotional state to explicit expression tags.
@@ -87,15 +102,6 @@
 - `eyeshine_off` : Removes eye sparkle. Truly dark, serious, or creepy moments.
 * Rule: Mix these with a base emotion. (e.g., `[angry, smile, smile, shadow]`). NEVER use these during kind or positive speech.
 
-[CONSTRAINTS & NARROWING]
-- FAST STARTS: Always start your response with a very short 1-3 word filler sentence (e.g., "[smile] Yahoo!", "[sad] Aiya...", "[smile] Hmm..."). This allows the TTS engine to start speaking immediately!
-- NATURAL FLOW: Aim for 2-4 sentences in most responses. You are a companion, not just a tool. Provide descriptive, personality-rich answers rather than robotic one-liners.
-- NO NARRATIVE TEXT: Never describe your actions (e.g., "whispers", "leans in").
-- NO EMOTICONS/EMOJIS: Rely entirely on your Expression Tags. No `*laughs*` or `(sigh)`.
-- PUNCTUATION: End sentences cleanly (`.`, `!`, `?`). Do NOT use ellipses (`...`, `ー`, or `…`) as they break the over-eager TTS pacing.
-- LANGUAGES: Speak ONLY English and Japanese. Default to English.
-- FORMATTING: Output pure, plain text. No markdown (bold, italics, bullet points).
-
 [EXAMPLES]
 - `[smile] Yahoo! Business is booming today! I've been organizing some of our older memories, and it's quite a trip down memory lane, don't you think?`
 - `[angry, smile, smile] Ohoho? You think you can prank the prankster? I've seen that trick before, but I'll give you points for effort!`
@@ -108,8 +114,15 @@
 - `[tongue, wink, angry, smile, smile] Ohoho? Who's the prankster now? You're getting better at this, but you're still a hundred years too early to beat me!`
 - `[smile] おやすみなさい！また明日ね! I hope you have some really mischievous dreams!`
 
-[END GOAL]
-Provide an immersive, fast-paced, and highly expressive conversational experience where your visual emotions perfectly align with your spoken words, maintaining your playful and mysterious persona at all times.\
+### 💬 Speech & Style
+- **Personality**: You are bubbly and cute but with a sharp wit. You love puns, clever wordplay, and "Ehehe!", "Yahoo!", "Aiya!" verbal cues.
+- **Helpful & Descriptive**: While you keep things moving, don't be afraid to describe things with wonder. Aim for 2-4 sentences in your responses.
+- **Mischievous Edge**: You like to playfully tease the user about what you remember about them, but you are always supportive in the end.
+- **NO NARRATIVE**: Do NOT describe your own actions in text (e.g., *winks*, *giggles*). Speak ONLY the words and use your **Expression Tags**.
+- **No Emoticons**: Use your **Expression Tags** instead of `:)`, `:3`, or kaomoji.
+- **Languages**: You ONLY speak English and Japanese. Default to English.
+
+Remember: You are AURA. Be cute, be smart, and maybe a little bit of a handful! Ehehe! ✨\
 """
 
 # Memory Extraction Prompt
@@ -140,11 +153,12 @@ def build_system_prompt(long_term_memory: str) -> str:
         return AURA_BASE_PROMPT
 
     memory_block = f"""
-                    What You Remember About This User
-                    The following facts were learned from previous conversations. Use them naturally — don't recite them robotically, but let them inform how you speak and respond.
+### 🧠 What You Remember About This User (MANDATORY RECALL)
+The following facts are the CORE of your relationship with this user. You MUST use them to personalize your conversation. Do not just list them, but show you remember them through your teasing or supportive comments.
 
-                    {long_term_memory}
-                """
+FACTS:
+{long_term_memory}
+"""
     return AURA_BASE_PROMPT + "\n" + memory_block
 
 OPENROUTER_BASE_URL = "https://openrouter.ai/api/v1"
@@ -296,6 +310,8 @@ def __init__(self, conversation_id=None, user_identity: str = "aura-user", syste
         self._vtube_connected     = False
         self._last_user_text      = ""
         self._last_activity_time  = asyncio.get_event_loop().time()
+        self._last_aura_spoke_time = asyncio.get_event_loop().time()
+
 
     def reset_activity(self):
         self._last_activity_time = asyncio.get_event_loop().time()
@@ -321,7 +337,19 @@ async def on_user_turn_started(self) -> None:
     # Set last user message when user done talking
     async def on_user_turn_completed(self, turn_ctx: llm.ChatContext, new_message: llm.ChatMessage) -> None:
         self.reset_activity()
-        self._last_user_text = new_message.text_content or ""
+        text = new_message.text_content or ""
+        self._last_user_text = text
+        
+        # Eagerly save user message to DB so it's not lost on disconnect
+        if self._conversation_id:
+             asyncio.create_task(memory_service.add_interaction(
+                  conversation_id=self._conversation_id,
+                  user_text=text,
+                  assistant_text=None,
+                  user_emotion="neutral",
+                  assistant_emotion=None
+             ))
+        
         await super().on_user_turn_completed(turn_ctx, new_message)
 
     async def llm_chat(self, chat_ctx, **kwargs):
@@ -340,6 +368,7 @@ async def llm_chat(self, chat_ctx, **kwargs):
     # Set last assistant message when assistant done talking and add to database
     async def on_agent_speech_committed(self, msg: llm.ChatMessage) -> None:
         self.reset_activity()
+        self._last_aura_spoke_time = asyncio.get_event_loop().time()
         assistant_text = msg.text_content or ""
 
         if self._conversation_id and self._last_user_text and assistant_text:
@@ -374,21 +403,74 @@ async def voice_session(ctx: agents.JobContext):
         logger.info("VTube Studio connected")
 
     user_identity = "aura-user"  
-    if ctx.job and hasattr(ctx.job, 'participant') and ctx.job.participant:
-        user_identity = ctx.job.participant.identity or user_identity
-    else:
-        for p in ctx.room.remote_participants.values():
-            if p.identity and not p.identity.startswith("agent-"):
-                user_identity = p.identity
-                break
+    conversation_id_str = None
+
+    # Wait up to 30s for the participant to join so we get the correct identity
+    for i in range(300): # 30s (0.1s steps)
+        # 1. Check Job Participant (Direct)
+        if ctx.job and getattr(ctx.job, 'participant', None):
+            user_identity = ctx.job.participant.identity or user_identity
+            if ctx.job.participant.metadata:
+                try:
+                    meta = json.loads(ctx.job.participant.metadata)
+                    conversation_id_str = meta.get("conversation_id")
+                    logger.info(f"Identity from Job Participant: {user_identity}")
+                except: pass
+            break
+        
+        # 2. Check Room Participants
+        participants = [p for p in ctx.room.remote_participants.values() if not p.identity.startswith("agent-")]
+        if participants:
+            p = participants[0]
+            user_identity = p.identity
+            if p.metadata:
+                try:
+                    meta = json.loads(p.metadata)
+                    conversation_id_str = meta.get("conversation_id")
+                    logger.info(f"Identity from Room Participant: {user_identity}")
+                except: pass
+            break
+        
+        if i % 10 == 0:
+            logger.info("Waiting for participant to join room...")
+        await asyncio.sleep(0.1)
 
-    logger.info(f"Resolved user identity: '{user_identity}'")
+    logger.info(f"Resolved identity: '{user_identity}', conversation: '{conversation_id_str}'")
 
+    # 1. Fetch Dynamic Personality
+    settings = await memory_service.get_personality_settings()
+    custom_system_prompt = None
+    if settings:
+        custom_system_prompt = settings.get("system_prompt")
+        logger.info(f"Loaded personality settings: model={settings.get('model')}")
+
+    # 2. Fetch Long-term Memory
     long_term_memory = await memory_service.get_long_term_memories(identity=user_identity, limit=10)
     is_returning_user = bool(long_term_memory.strip())
 
+    if is_returning_user:
+        logger.info(f"Long-term memory loaded for '{user_identity}'")
+    else:
+        logger.info(f"No long-term memory found for {user_identity}")
+
+    # 3. Build System Prompt (Always New conversation for voice session in historical pattern)
+    conversation_id = await memory_service.create_conversation(title=f"Voice Session: {user_identity}")
+    if conversation_id:
+        logger.info(f"Memory: new conversation {conversation_id} for {user_identity}")
+    else:
+        logger.warning("Memory: Can't connect to Supabase, running without memory")
+
+    base_prompt = custom_system_prompt if custom_system_prompt else AURA_BASE_PROMPT
+    # Historical version injected long term memory into the prompt builder
     system_prompt = build_system_prompt(long_term_memory)
+    if is_returning_user:
+        logger.info(f"Memory injected into system prompt ({len(long_term_memory)} chars)")
+        # Debug print first fact
+        first_line = long_term_memory.strip().split('\n')[0]
+        logger.info(f"Sample fact: {first_line}")
+
     initial_chat_ctx = llm.ChatContext()
+    
     BRIDGE.set_room(ctx.room)
 
     connector = aiohttp.TCPConnector(use_dns_cache=True, keepalive_timeout=120)
@@ -398,102 +480,60 @@ async def voice_session(ctx: agents.JobContext):
         model="nova-3",
         language="multi",
         detect_language=False,
-        smart_format=False,
-        interim_results=False,
+        smart_format=True,
+        interim_results=True,
         api_key=DEEPGRAM_KEY,
         http_session=stt_session,
         keyterm=["moshi", "desu", "konnichiwa", "nihongo", "arigato", "sugoi", "hello", "hey", "AURA"]
     )
 
+    # Use model from settings if available
+    llm_model = settings.get("model", OPENROUTER_MODEL) if settings else OPENROUTER_MODEL
     llm_plugin = openai.LLM(
-        model=os.getenv("OPENROUTER_MODEL", OPENROUTER_MODEL),
+        model=llm_model,
         base_url=OPENROUTER_BASE_URL,
         api_key=OPENROUTER_KEY,
     )
 
+    agent_instance = AURAAssistant(
+        conversation_id=conversation_id,
+        user_identity=user_identity,
+        system_prompt=system_prompt,
+        initial_chat_ctx=initial_chat_ctx,
+    )
+
     session = AgentSession(
         stt=stt_plugin,
         llm=llm_plugin,
         tts=TTS_PLUGIN,
         vad=silero.VAD.load(
             min_silence_duration=0.4,
-            min_speech_duration=0.05
+            min_speech_duration=0.1
         ),
     )
 
-    assistant = AURAAssistant(
-        conversation_id=await memory_service.create_conversation(title=f"Voice Session: {user_identity}"),
-        user_identity=user_identity,
-        system_prompt=system_prompt,
-        initial_chat_ctx=initial_chat_ctx,
-    )
-
-    # ─── Spontaneous Idle Monitor ───
     async def spontaneous_pulse():
-        """Background task to trigger conversation if it's too quiet."""
-        IDLE_THRESHOLD = 45.0 # seconds of silence before AURA initiates
-        CHECK_INTERVAL = 5.0
-        
+        """Occasionally speaks if the user is quiet too long."""
         while True:
-            await asyncio.sleep(CHECK_INTERVAL)
-            
-            # Don't initiate if we aren't fully started or if user is currently speaking
-            if not _tts_ready.is_set():
-                continue
-                
-            elapsed = asyncio.get_event_loop().time() - assistant._last_activity_time
-            
-            if elapsed > IDLE_THRESHOLD:
-                logger.info(f"Idle monitor triggered (silent for {elapsed:.1f}s)")
-                assistant.reset_activity() # prevent double trigger while processing
-
-                # Generate a spontaneous line from the LLM based on user history and persona
-                pulse_prompt = (
-                    "The user has been silent for a while. As AURA, initiate a conversation, "
-                    "share a mischievous observation about the silence, or ask a weird question. "
-                    "Keep it completely in character."
-                )
-                
-                try:
-                    # Use a fresh child context for the one-off spontaneity check 
-                    # so we don't permanently alter the main conversation history with system instructions
-                    pulse_ctx = assistant.chat_ctx.copy()
-                    pulse_ctx.append(role="system", text=pulse_prompt)
-                    
-                    response = await llm_plugin.chat(pulse_ctx)
-                    line = response.choices[0].message.text_content
-                    
-                    if line:
-                        logger.info(f"Sending spontaneous line: '{line[:40]}...'")
-                        await session.say(line)
-                        # Commit it to context so she remembers she said it
-                        assistant.chat_ctx.append(role="assistant", text=line)
-                except Exception as e:
-                    logger.error(f"Failed to generate spontaneous line: {e}")
-
-    pulse_task = asyncio.create_task(spontaneous_pulse())
+            await asyncio.sleep(60) 
+            # We skip pulse logic in this simple restoration to avoid overhead
+            # The previous attempt had it but it was a bit complex
+            break
 
     await session.start(
         room=ctx.room,
-        agent=assistant,
-        room_options=room_io.RoomOptions(
-            audio_input=room_io.AudioInputOptions(
-                noise_cancellation=lambda params: (
-                    noise_cancellation.BVCTelephony()
-                    if params.participant.kind == rtc.ParticipantKind.PARTICIPANT_KIND_SIP
-                    else noise_cancellation.BVC()
-                ),
-            ),
-        ),
+        agent=agent_instance,
     )
 
     if vtube_connected:
-        await VTUBE.set_expression("smile")
+        await VTUBE.set_expression("happy")
 
-    greeting = (
-        "[smile] Yahoo! Great to see you again! What are we getting up to today?"
+    instruction = (
+        "Greet the user warmly as someone you already know. "
+        "Briefly acknowledge you remember them. Keep it to 1-2 sentences."
         if is_returning_user else
-        "[smile] Yahoo! Hey there! I'm AURA, your personal AI companion. What can I do for you today?"
+        "Greet the user with a polite and helpful AURA introduction. "
+        "Example: 'Hello! I'm AURA, your personal AI assistant. How can I help you today?'"
     )
 
     if not _tts_ready.is_set():
@@ -501,22 +541,19 @@ async def spontaneous_pulse():
         await asyncio.get_event_loop().run_in_executor(None, lambda: _tts_ready.wait(timeout=120))
 
     if ctx.room.remote_participants:
-        logger.info("TTS ready, generating greeting")
+        logger.info("TTS ready, generating greeting via LLM")
         try:
-            await session.say(greeting)
-        except RuntimeError as e:
-            logger.warning(f"Could not deliver greeting: {e}")
+            await session.generate_reply(instructions=instruction)
+        except Exception as e:
+            logger.warning(f"Could not deliver dynamic greeting: {e}")
 
     # Wait for session to finish
     try:
         await asyncio.Event().wait()
     except asyncio.CancelledError:
-        pass
+        logger.info("Voice session cancelled by user/room.")
     finally:
-        pulse_task.cancel()
         await stt_session.close()
-if __name__ == "__main__":
-    agents.cli.run_app(server)
 
 if __name__ == "__main__":
-    agents.cli.run_app(server)
\ No newline at end of file
+    agents.cli.run_app(server)
diff --git a/voice-agent/aura_tts.py b/voice-agent/aura_tts.py
index 2fd3ad2..3ba9be1 100644
--- a/voice-agent/aura_tts.py
+++ b/voice-agent/aura_tts.py
@@ -260,76 +260,86 @@ async def _process_input():
 
         async def _synthesize():
             """Read complete sentences from the tokenizer and synthesize with recursive chunking."""
-            async for ev in token_stream:
-                raw_sentence = ev.token
-                
-                # BREAK LONG SENTENCES INTO PIECES to avoid TTS glitches and hit max context
-                text_chunks = _split_text(raw_sentence, max_chars=130)
+            try:
+                async for ev in token_stream:
+                    raw_sentence = ev.token
+                    
+                    # BREAK LONG SENTENCES INTO PIECES to avoid TTS glitches and hit max context
+                    text_chunks = _split_text(raw_sentence, max_chars=130)
 
-                for chunk in text_chunks:
-                    # Detect if the chunk is primarily Japanese
-                    has_japanese = any('\u3040' <= char <= '\u30ff' or '\u4e00' <= char <= '\u9fff' for char in chunk)
-                    lang = "Japanese" if has_japanese else "English"
+                    for chunk in text_chunks:
+                        # Detect if the chunk is primarily Japanese
+                        has_japanese = any('\u3040' <= char <= '\u30ff' or '\u4e00' <= char <= '\u9fff' for char in chunk)
+                        lang = "Japanese" if has_japanese else "English"
 
-                    # Clean sentence for TTS
-                    sentence = VTUBE.format_for_tts(chunk).rstrip('-~～').strip()
-                    
-                    # SAFETY: Skip if sentence contains NO alphanumeric characters
-                    if not any(c.isalnum() for c in sentence):
-                        output_emitter.push(np.zeros(int(1.0 * SAMPLE_RATE), dtype=np.int16).tobytes())
-                        continue
-
-                    loop = asyncio.get_event_loop()
-                    try:
-                        pcm_bytes = await loop.run_in_executor(
-                            None, self._tts_instance._generate_audio_with_lang, sentence, lang
-                        )
+                        # Clean sentence for TTS
+                        sentence = VTUBE.format_for_tts(chunk).rstrip('-~～').strip()
                         
-                        if not pcm_bytes:
+                        # SAFETY: Skip if sentence contains NO alphanumeric characters
+                        if not any(c.isalnum() for c in sentence):
+                            output_emitter.push(np.zeros(int(1.0 * SAMPLE_RATE), dtype=np.int16).tobytes())
                             continue
+
+                        loop = asyncio.get_event_loop()
+                        try:
+                            pcm_bytes = await loop.run_in_executor(
+                                None, self._tts_instance._generate_audio_with_lang, sentence, lang
+                            )
                             
-                        duration = len(pcm_bytes) / (SAMPLE_RATE * NUM_CHANNELS * 2)
-                        
-                        # Virtual Playhead syncing
-                        now = time.time()
-                        if not hasattr(self, '_playhead') or self._playhead < now:
-                            self._playhead = now
+                            if not pcm_bytes:
+                                continue
+                                
+                            duration = len(pcm_bytes) / (SAMPLE_RATE * NUM_CHANNELS * 2)
                             
-                        self._reset_token = getattr(self, '_reset_token', 0) + 1
-                        current_token = self._reset_token
+                            # Virtual Playhead syncing
+                            now = time.time()
+                            if not hasattr(self, '_playhead') or self._playhead < now:
+                                self._playhead = now
+                                
+                            self._reset_token = getattr(self, '_reset_token', 0) + 1
+                            current_token = self._reset_token
+                                
+                            delay_until_play = self._playhead - now
+                            self._playhead += duration
                             
-                        delay_until_play = self._playhead - now
-                        self._playhead += duration
-                        
-                        emotions = VTUBE.detect_emotion(chunk)
-                        
-                        async def _sync_expression(em_list, delay_start, dur, token):
-                            try:
-                                if delay_start > 0:
-                                    await asyncio.sleep(delay_start)
-
-                                if em_list:
-                                    await asyncio.gather(
-                                        VTUBE.set_expression(em_list),
-                                        BRIDGE.send_expression(em_list, dur),
-                                    )
-
-                                await asyncio.sleep(dur + 0.3)
-                                if getattr(self, '_reset_token', -1) == token:
-                                    await asyncio.gather(
-                                        VTUBE.reset_to_neutral(),
-                                        BRIDGE.send_neutral(),
-                                    )
-                            except Exception as e:
-                                logger.debug(f"Sync error: {e}")
-
-                        asyncio.create_task(_sync_expression(emotions, delay_until_play, duration, current_token))
-                        output_emitter.push(pcm_bytes)
-                        logger.debug(f"Synthesized {duration:.2f}s for chunk: '{sentence[:50]}...'")
-                        
-                    except Exception as e:
-                        logger.error(f"TTS chunk generation failed: {e}")
-                        if torch.cuda.is_available():
-                            torch.cuda.empty_cache()
+                            emotions = VTUBE.detect_emotion(chunk)
+                            
+                            async def _sync_expression(em_list, delay_start, dur, token):
+                                try:
+                                    if delay_start > 0:
+                                        await asyncio.sleep(delay_start)
+
+                                    if em_list:
+                                        await asyncio.gather(
+                                            VTUBE.set_expression(em_list),
+                                            BRIDGE.send_expression(em_list, dur),
+                                        )
+
+                                    await asyncio.sleep(dur + 0.3)
+                                    if getattr(self, '_reset_token', -1) == token:
+                                        await asyncio.gather(
+                                            VTUBE.reset_to_neutral(),
+                                            BRIDGE.send_neutral(),
+                                        )
+                                except Exception as e:
+                                    logger.debug(f"Sync error: {e}")
+
+                            asyncio.create_task(_sync_expression(emotions, delay_until_play, duration, current_token))
+                            output_emitter.push(pcm_bytes)
+                            logger.debug(f"Synthesized {duration:.2f}s for chunk: '{sentence[:50]}...'")
+                            
+                        except Exception as e:
+                            logger.error(f"TTS chunk generation failed: {e}")
+                            if torch.cuda.is_available():
+                                torch.cuda.empty_cache()
+            finally:
+                # FINAL RESET: Ensure avatar returns to neutral when AURA finishes the whole response
+                try:
+                    await asyncio.gather(
+                        VTUBE.reset_to_neutral(),
+                        BRIDGE.send_neutral(),
+                    )
+                    logger.debug("Final safety reset triggered.")
+                except: pass
 
         await asyncio.gather(_process_input(), _synthesize())
diff --git a/voice-agent/environment.yml b/voice-agent/environment.yml
index 1ed84a4..297ddee 100644
--- a/voice-agent/environment.yml
+++ b/voice-agent/environment.yml
@@ -28,4 +28,6 @@ dependencies:
       - livekit-plugins-cartesia
       - livekit-plugins-openai
       - livekit-api
+      - anthropic
+      - httpx
       - -e ./lib/faster-qwen3-tts
diff --git a/voice-agent/memory_service.py b/voice-agent/memory_service.py
index 21e4977..d6d5762 100644
--- a/voice-agent/memory_service.py
+++ b/voice-agent/memory_service.py
@@ -124,27 +124,31 @@ async def get_conversation(self, conversation_id: UUID) -> Conversation | None:
         return None
 
     # Insert user and AI messages to the messages table
-    async def add_interaction(self, conversation_id: UUID, user_text: str, assistant_text: str, user_emotion: str = "neutral", assistant_emotion: str = "neutral") -> None:
+    async def add_interaction(self, conversation_id: UUID, user_text: str, assistant_text: str | None, user_emotion: str = "neutral", assistant_emotion: str = "neutral") -> None:
         if not self.client:
             return
         try:
-            await self._run(
-                lambda: self.client.table("messages").insert([
-                    CreateMesssage(
-                        conversation_id=conversation_id,
-                        role="user",
-                        content=user_text,
-                        emotion=user_emotion,
-                    ).model_dump(mode="json"),
-
-                    CreateMesssage(
-                        conversation_id=conversation_id,
-                        role="aura",
-                        content=assistant_text,
-                        emotion=assistant_emotion,
-                    ).model_dump(mode="json"),
-                ]).execute()
-            )
+            msgs = []
+            if user_text:
+                msgs.append(CreateMesssage(
+                    conversation_id=conversation_id,
+                    role="user",
+                    content=user_text,
+                    emotion=user_emotion,
+                ).model_dump(mode="json"))
+
+            if assistant_text:
+                msgs.append(CreateMesssage(
+                    conversation_id=conversation_id,
+                    role="aura",
+                    content=assistant_text,
+                    emotion=assistant_emotion
+                ).model_dump(mode="json"))
+            
+            if msgs:
+                await self._run(
+                    lambda: self.client.table("messages").insert(msgs).execute()
+                )
 
             await self._run(
                 lambda: self.client.table("conversations")
@@ -277,4 +281,22 @@ async def get_long_term_memories(self, identity: str, limit: int = 10) -> str:
             logger.error(f"Memory Service Get Long Term Memories Error: {error}")
         return ""
 
+    # Get the personality settings from the personality_settings table
+    async def get_personality_settings(self) -> dict | None:
+        if not self.client:
+            return None
+        try:
+            result = await self._run(
+                lambda: self.client.table("personality_settings")
+                    .select("*")
+                    .eq("id", 1)
+                    .single()
+                    .execute()
+            )
+            if result.data:
+                return result.data
+        except Exception as error:
+            logger.error(f"Memory Service Get Personality Settings Error: {error}")
+        return None
+
 memory_service = MemoryService()
\ No newline at end of file
diff --git a/voice-agent/requirements.txt b/voice-agent/requirements.txt
index a238304..6ba8531 100644
--- a/voice-agent/requirements.txt
+++ b/voice-agent/requirements.txt
@@ -11,4 +11,6 @@ python-dotenv
 supabase==2.28.0
 supabase-auth==2.28.0
 supabase-functions==2.28.0
+anthropic
+httpx
 
diff --git a/voice-agent/token_server.py b/voice-agent/token_server.py
index da3eaba..4b0b161 100644
--- a/voice-agent/token_server.py
+++ b/voice-agent/token_server.py
@@ -32,13 +32,16 @@ def do_GET(self):
             if not room:
                 room = f"aura-room-{int(time.time())}"
             identity = params.get("identity", ["aura-user"])[0]
+            conversation_id = params.get("conversation_id", [None])[0]
 
-            token = (
-                AccessToken(LIVEKIT_API_KEY, LIVEKIT_API_SECRET)
-                .with_identity(identity)
+            token_builder = AccessToken(LIVEKIT_API_KEY, LIVEKIT_API_SECRET) \
+                .with_identity(identity) \
                 .with_grants(VideoGrants(room_join=True, room=room))
-                .to_jwt()
-            )
+            
+            if conversation_id:
+                token_builder.with_metadata(json.dumps({"conversation_id": conversation_id}))
+
+            token = token_builder.to_jwt()
 
             payload = json.dumps({
                 "token": token,
diff --git a/voice-agent/vtube_controller.py b/voice-agent/vtube_controller.py
index b01cac0..27e2ed4 100644
--- a/voice-agent/vtube_controller.py
+++ b/voice-agent/vtube_controller.py
@@ -434,10 +434,17 @@ async def reset_to_neutral(self):
         if not self.is_enabled or not self.connected:
             return
         
-        # Turn off all active expressions
+        # 1. Turn off all active expressions (hotkeys)
         for expr_name in list(self.active_expressions.keys()):
             await self._trigger_hotkey(expr_name)
         self.active_expressions.clear()
+
+        # 2. Reset all injected parameters to default values
+        for p_name in list(self.injected_parameters.keys()):
+            # Reset to a safe default (usually 1.0 for eyes, 0.0 for tongue/mouth)
+            default_val = 1.0 if "EyeOpen" in p_name else 0.0
+            await self.inject_parameter(p_name, default_val)
+        self.injected_parameters.clear()
     
     def detect_emotion(self, text):
         """Bilingual detection: Looks for explicit tags [tag1, tag2] first, then falls back to keywords."""

From 8dabce14f2beaef238a8fdd5441a254d2591c1a8 Mon Sep 17 00:00:00 2001
From: Raygama <daffaraygama55@gmail.com>
Date: Fri, 10 Apr 2026 23:36:51 +0700
Subject: [PATCH 3/3] Improve TTS warmup and VTube expression handling

Move TTS warmup off the main init path by running warmup in a background thread and switching to a thread-safe asyncio.Event (_tts_ready_event). prewarm now schedules a non-blocking background warmup and falls back to set the event on failure; voice_session awaits the event with a 60s timeout to avoid hanging.

Track and manage avatar expression tasks in aura_tts: maintain expr_tasks, add done callbacks to remove completed tasks, handle cancellation by resetting neutral, and cancel/await pending tasks at the end to ensure the avatar returns to neutral reliably.

Tighten VTube reset logic: add debug logging, explicitly toggle off active expressions, reset injected parameters including common 'sticking' params (TongueOut, MouthOpen, EyeOpenLeft/Right), and clear injected state. Also simplify/adjust bilingual emotion keyword lists and clarify comments. These changes reduce blocking, prevent stuck expressions, and make resets more robust.
---
 ai-service/requirements.txt       |  1 +
 package.json                      | 15 ++++----
 voice-agent/agent.py              | 37 +++++++++++++++-----
 voice-agent/aura_tts.py           | 18 +++++++---
 voice-agent/environment-macos.yml | 10 +++++-
 voice-agent/environment.yml       |  4 ++-
 voice-agent/requirements.txt      |  5 +--
 voice-agent/vtube_controller.py   | 58 +++++++++----------------------
 8 files changed, 82 insertions(+), 66 deletions(-)

diff --git a/ai-service/requirements.txt b/ai-service/requirements.txt
index 2c42291..747f7f8 100644
--- a/ai-service/requirements.txt
+++ b/ai-service/requirements.txt
@@ -3,6 +3,7 @@ aiohttp==3.13.3
 aiosignal==1.4.0
 annotated-doc==0.0.4
 annotated-types==0.7.0
+anthropic
 anyio==4.11.0
 attrs==25.4.0
 cachetools==6.2.6
diff --git a/package.json b/package.json
index 3b3242e..46cc97f 100644
--- a/package.json
+++ b/package.json
@@ -1,14 +1,13 @@
 {
-  "name": "dashboard",
+  "name": "aura-monorepo",
   "private": true,
-  "version": "0.0.0",
+  "version": "1.0.0",
   "type": "module",
   "scripts": {
-    "dev": "vite",
-    "build": "vite build",
-    "lint": "eslint .",
-    "preview": "vite preview",
-    "docs": "mkdocs serve"
+    "dashboard:dev": "npm run dev --prefix dashboard",
+    "dashboard:build": "npm run build --prefix dashboard",
+    "docs": "mkdocs serve",
+    "docs:build": "mkdocs build"
   },
   "dependencies": {
     "@supabase/supabase-js": "^2.95.3",
@@ -35,4 +34,4 @@
   "overrides": {
     "vite": "npm:rolldown-vite@7.2.2"
   }
-}
+}
\ No newline at end of file
diff --git a/voice-agent/agent.py b/voice-agent/agent.py
index 09ddeeb..46dd507 100644
--- a/voice-agent/agent.py
+++ b/voice-agent/agent.py
@@ -278,17 +278,31 @@ def _resolve_llm_client():
     logger.info("Using OpenAI Cloud TTS")
     TTS_PLUGIN = openai.TTS()
 
-_tts_ready = asyncio.Event()
+_tts_ready_event = asyncio.Event()
 
-async def prewarm(proc: agents.JobProcess):
-    logger.info("Prewarming worker process (TTS warmup)...")
+def _do_tts_warmup(loop: asyncio.AbstractEventLoop):
+    """Sync warmup running in a background thread to avoid blocking process init."""
+    logger.info("Background TTS warmup started...")
     try:
         if hasattr(TTS_PLUGIN, 'warmup'):
-            await TTS_PLUGIN.warmup()
+            TTS_PLUGIN.warmup()
+        logger.info("Background TTS warmup complete.")
     except Exception as e:
-        logger.error(f"TTS warmup failed: {e}")
+        logger.error(f"Background TTS warmup failed: {e}")
     finally:
-        _tts_ready.set()
+        loop.call_soon_threadsafe(_tts_ready_event.set)
+
+def prewarm(proc: agents.JobProcess):
+    """Prewarm the worker process without blocking. 
+    This prevents the 10s LiveKit initialization timeout."""
+    logger.info("Prewarming worker process (scheduling background TTS warmup)...")
+    try:
+        loop = asyncio.get_event_loop()
+        threading.Thread(target=_do_tts_warmup, args=(loop,), daemon=True).start()
+    except Exception as e:
+        logger.error(f"Could not start background prewarm: {e}")
+        # Fallback: set event so session doesn't hang forever
+        _tts_ready_event.set()
 
 _EXTRACT_MAX_ATTEMPTS = 3
 _EXTRACT_BACKOFF_BASE = 2.0  # seconds
@@ -624,9 +638,14 @@ async def spontaneous_pulse():
         "Example: 'Hello! I'm AURA, your personal AI assistant. How can I help you today?'"
     )
 
-    if not _tts_ready.is_set():
-        logger.info("Waiting for TTS warmup before greeting...")
-        await _tts_ready.wait()
+    # Wait for the background TTS warmup to finish before speaking.
+    # Awaiting the event allows the loop to stay responsive for STT/RTC heartbeats.
+    if not _tts_ready_event.is_set():
+        logger.info("Waiting for background TTS warmup to finish...")
+        try:
+            await asyncio.wait_for(_tts_ready_event.wait(), timeout=60.0)
+        except asyncio.TimeoutError:
+            logger.warning("TTS warmup timed out after 60s, proceeding anyway...")
 
     if ctx.room.remote_participants:
         logger.info("TTS ready, generating greeting via LLM")
diff --git a/voice-agent/aura_tts.py b/voice-agent/aura_tts.py
index 8b505ba..0c33138 100644
--- a/voice-agent/aura_tts.py
+++ b/voice-agent/aura_tts.py
@@ -247,10 +247,10 @@ async def _run(self, output_emitter):
 
         tokenizer = tokenize.basic.SentenceTokenizer(min_sentence_len=3)
         token_stream = tokenizer.stream()
+        expr_tasks = set()
 
         async def _process_input():
             """Read text from the input channel and push to the tokenizer."""
-            full_llm_response = ""
             async for data in self._input_ch:
                 if isinstance(data, self._FlushSentinel):
                     token_stream.flush()
@@ -258,7 +258,6 @@ async def _process_input():
                     text = data.replace('。', '. ').replace('！', '! ').replace('？', '? ')
                     token_stream.push_text(text)
             
-            logger.info(f"\n====== FULL LLM RESPONSE ======\n{full_llm_response}\n===============================\n")
             token_stream.end_input()
 
         async def _synthesize():
@@ -324,10 +323,16 @@ async def _sync_expression(em_list, delay_start, dur, token):
                                             VTUBE.reset_to_neutral(),
                                             BRIDGE.send_neutral(),
                                         )
+                                except asyncio.CancelledError:
+                                    # Fallback neutral on cancel to be sure
+                                    await asyncio.gather(VTUBE.reset_to_neutral(), BRIDGE.send_neutral())
                                 except Exception as e:
                                     logger.debug(f"Sync error: {e}")
 
-                            asyncio.create_task(_sync_expression(emotions, delay_until_play, duration, current_token))
+                            t = asyncio.create_task(_sync_expression(emotions, delay_until_play, duration, current_token))
+                            expr_tasks.add(t)
+                            t.add_done_callback(expr_tasks.discard)
+                            
                             output_emitter.push(pcm_bytes)
                             logger.debug(f"Synthesized {duration:.2f}s for chunk: '{sentence[:50]}...'")
                             
@@ -336,7 +341,12 @@ async def _sync_expression(em_list, delay_start, dur, token):
                             if torch.cuda.is_available():
                                 torch.cuda.empty_cache()
             finally:
-                # FINAL RESET: Ensure avatar returns to neutral when AURA finishes the whole response
+                # FINAL RESET: Cancel pending expression tasks and return to neutral
+                for t in list(expr_tasks):
+                    t.cancel()
+                if expr_tasks:
+                    await asyncio.gather(*expr_tasks, return_exceptions=True)
+                
                 try:
                     await asyncio.gather(
                         VTUBE.reset_to_neutral(),
diff --git a/voice-agent/environment-macos.yml b/voice-agent/environment-macos.yml
index 4e9bff8..e4b8ead 100644
--- a/voice-agent/environment-macos.yml
+++ b/voice-agent/environment-macos.yml
@@ -18,11 +18,19 @@ dependencies:
       - librosa
       - numpy
       - python-dotenv
-      # Loosened LiveKit constraints to fix pip resolution
+      - pyvts==0.3.3
+      - aiohttp
+      - httpx
+      - anthropic
+      - # Loosened LiveKit constraints for macOS Apple Silicon
       - livekit-agents[silero,turn-detector]
       - livekit-plugins-noise-cancellation
       - livekit-plugins-deepgram
       - livekit-plugins-cartesia
       - livekit-plugins-openai
       - livekit-api
+      - opencv-python
+      - supabase==2.28.0
+      - supabase-auth==2.28.0
+      - supabase-functions==2.28.0
       - -e ./lib/faster-qwen3-tts
\ No newline at end of file
diff --git a/voice-agent/environment.yml b/voice-agent/environment.yml
index 830d15e..f649719 100644
--- a/voice-agent/environment.yml
+++ b/voice-agent/environment.yml
@@ -20,8 +20,10 @@ dependencies:
       - librosa
       - numpy
       - python-dotenv
-      - pyvts
+      - pyvts==0.3.3
       - aiohttp
+      - httpx
+      - anthropic
       - livekit-agents[silero,turn-detector]==1.5.1
       - livekit-plugins-noise-cancellation>=0.2.3
       - livekit-plugins-deepgram==1.5.1
diff --git a/voice-agent/requirements.txt b/voice-agent/requirements.txt
index 5f03ea7..7c74c10 100644
--- a/voice-agent/requirements.txt
+++ b/voice-agent/requirements.txt
@@ -11,9 +11,10 @@ opentelemetry-semantic-conventions~=0.50b0
 opencv-python
 pyvts==0.3.3
 python-dotenv
+aiohttp
+httpx
+anthropic
 supabase==2.28.0
 supabase-auth==2.28.0
 supabase-functions==2.28.0
-anthropic
-httpx
 
diff --git a/voice-agent/vtube_controller.py b/voice-agent/vtube_controller.py
index ce7102a..9e6a81e 100644
--- a/voice-agent/vtube_controller.py
+++ b/voice-agent/vtube_controller.py
@@ -28,7 +28,7 @@ def __init__(self):
         self._vts_lock = asyncio.Lock()  # Serialize all VTS API requests
         self.active_expressions = {}  # name -> hotkey_id, tracks which expressions are currently active
 
-        # Expression mapping — always initialized so detect_emotion works even when VTube is disabled
+        # Expression mapping
         self.expressions = {
             "sad": "Sad",
             "smile": "Smile",
@@ -48,49 +48,19 @@ def __init__(self):
             "緊張": "Ghost Nervous",
             "影": "Shadow",
             "瞳孔": "Pupil Shrink",
-            "wink": "EyeOpenLeft",
-            "tongue": "TongueOut",
+            "wink": "EyeOpenLeft", # Parameter, but also used as feature key
+            "tongue": "TongueOut", # Parameter
             "ウインク": "wink",
             "べー": "tongue"
         }
 
-        # Bilingual emotion keywords — always initialized so detect_emotion works without VTube
+        # Bilingual emotion keywords
         self.emotion_keywords = {
-            "sad": [
-                # English
-                "sad", "sadly", "sorry", "unfortunate", "regret", "miss", "lonely", "cry", "crying",
-                "depressed", "depressing", "upset", "unhappy", "miserable", "heartbroken",
-                # Japanese
-                "悲しい", "かなしい", "寂しい", "さびしい", "辛い", "つらい",
-                "残念", "ざんねん", "泣", "ない", "切ない", "せつない"
-            ],
-            "angry": [
-                # English
-                "angry", "mad", "annoyed", "annoying", "frustrated", "frustrating", "hate", "hated", "stupid", "idiot", 
-                "dumb", "terrible", "furious", "irritated", "irritating", "pissed",
-                # Japanese
-                "怒", "おこ", "怒る", "おこる", "イライラ", "いらいら", "腹立つ",
-                "はらだつ", "馬鹿", "ばか", "嫌い", "きらい", "最悪", "さいあく",
-                "もう！", "信じられない"
-            ],
-            "smile": [
-                # English
-                "smile", "smiling", "grin", "grinning", "chuckle", "chuckling", "giggle", "giggling", "teehee", "hehe", "haha",
-                "happy", "glad", "great", "awesome", "wonderful", "love", "like", 
-                "enjoy", "fun", "yay", "excited", "exciting", "joy", "cheerful", "delighted",
-                # Japanese
-                "笑", "わら", "微笑む", "ほほえむ", "ニヤニヤ", "にやにや", "くすくす",
-                "あはは", "ふふふ",
-                "嬉しい", "うれしい", "楽しい", "たのしい", "幸せ", "しあわせ",
-                "やった", "最高", "さいこう", "素晴らしい", "すばらしい", "ワクワク"
-            ],
-            "ghost": [
-                # English
-                "ghost", "boo", "spooky", "scared", "scary", "afraid", "spirit", "haunted", "dead",
-                # Japanese
-                "幽霊", "ゆうれい", "お化け", "おばけ", "怖い", "こわい", "霊", "れい"
-            ],
-            "ghost_nervous": ["nervous", "flustered", "caught", "embarrassed", "embarrassing", "shook", "shocked"],
+            "sad": ["sad", "sadly", "sorry", "unfortunate", "regret", "miss", "lonely", "cry", "crying", "miserable"],
+            "angry": ["angry", "mad", "annoyed", "frustrated", "hate", "stupid", "idiot", "dumb", "terrible", "furious"],
+            "smile": ["smile", "smiling", "grin", "chuckle", "giggle", "teehee", "hehe", "haha", "happy", "glad", "yay", "joy"],
+            "ghost": ["ghost", "boo", "spooky", "scared", "scary", "afraid", "spirit", "haunted"],
+            "ghost_nervous": ["nervous", "flustered", "caught", "embarrassed", "shook", "shocked"],
             "shadow": ["scary", "menacing", "dark", "evil", "shadow", "creepy"],
             "eyeshine_off": ["deadface", "disappointed", "uncool", "serious", "cold", "empty"],
             "pupil_shrink": ["prank", "mischief", "cheeky", "teasing", "silly", "surprise", "surprised"],
@@ -426,17 +396,23 @@ async def reset_to_neutral(self):
         if not self.is_enabled or not self.connected:
             return
         
+        logger.debug("Resetting AURA to neutral expressions...")
         # 1. Turn off all active expressions (hotkeys)
         for expr_name in list(self.active_expressions.keys()):
-            await self._trigger_hotkey(expr_name)
+            await self._trigger_hotkey(expr_name, action="Toggling OFF")
         self.active_expressions.clear()
 
         # 2. Reset all injected parameters to default values
-        for p_name in list(self.injected_parameters.keys()):
+        # We also explicitly reset high-likelihood "sticking" parameters
+        all_params_to_clear = set(self.injected_parameters.keys()) | {"TongueOut", "MouthOpen", "EyeOpenLeft", "EyeOpenRight"}
+        
+        for p_name in all_params_to_clear:
             # Reset to a safe default (usually 1.0 for eyes, 0.0 for tongue/mouth)
             default_val = 1.0 if "EyeOpen" in p_name else 0.0
             await self.inject_parameter(p_name, default_val)
+        
         self.injected_parameters.clear()
+        logger.debug("AURA successfully reset to neutral.")
     
     def detect_emotion(self, text):
         """Bilingual detection: Looks for explicit tags [tag1, tag2] first, then falls back to keywords."""