diff --git a/.env.example b/.env.example
index 57f76b7..35bbb00 100644
--- a/.env.example
+++ b/.env.example
@@ -2,6 +2,11 @@
 # OpenRouter API Key - Get yours at https://openrouter.ai/keys
 OPENROUTER_API_KEY=your_openrouter_key_here
 
+# Optional: Direct provider keys (used when provider != openrouter)
+ANTHROPIC_API_KEY=          # Required for claude-* models
+GROQ_API_KEY=               # Required for Groq provider (fast Llama/Mixtral)
+OLLAMA_BASE_URL=http://localhost:11434   # Local Ollama endpoint
+
 # --- VOICE CONFIGURATION (LIVEKIT AGENTS) ---
 # Deepgram API Key (STT) - Get yours at https://console.deepgram.com/
 DEEPGRAM_API_KEY=your_deepgram_key_here
diff --git a/ai-service/app/api/v1/chat.py b/ai-service/app/api/v1/chat.py
index 725912f..d42f2ce 100644
--- a/ai-service/app/api/v1/chat.py
+++ b/ai-service/app/api/v1/chat.py
@@ -1,7 +1,8 @@
-import re
+import json
 import logging
-
+import asyncio
 from fastapi import APIRouter, HTTPException
+from fastapi.responses import StreamingResponse
 from app.services.memory_service import memory_service
 from app.models.chat import ChatRequest, ChatResponse
 from app.services.brain.graph import brain
@@ -10,7 +11,7 @@
 router = APIRouter()
 logger = logging.getLogger(__name__)
 
-@router.post("", response_model=ChatResponse)
+@router.post("")
 async def chat(request: ChatRequest):
     # Run Graph
     try:
@@ -24,16 +25,98 @@ async def chat(request: ChatRequest):
             "messages":   [HumanMessage(content=request.message)],
             "emotion":    "neutral",
             "conversation_id": conversation_id,
+            "identity": request.identity or "anonymous",
+            "stream": request.stream
         }
 
         config = {"configurable": {"thread_id": conversation_id}}
-        result = brain.invoke(initial_state, config=config)
+
+        if request.stream:
+            async def event_generator():
+                # 1. Start with emotion detection (sequential but fast)
+                try:
+                    from app.services.brain.nodes.emotion import detect_emotion
+                    emotion_res = await detect_emotion(initial_state)
+                    detected_emotion = emotion_res.get("emotion", "neutral")
+                    yield f"data: {json.dumps({'emotion': detected_emotion})}\n\n"
+                except Exception as ex:
+                    logger.warning(f"Emotion detection failed: {ex}")
+                    detected_emotion = "neutral"
+
+                # 2. Setup the full context for generation
+                from app.services.brain.nodes.generate import session_history_window
+                from app.services.llm import llm_service
+                from app.services.persona import persona_engine
+                from app.services.settings_service import settings_service
+                from datetime import datetime
+                from uuid import UUID
+
+                # Fetch context
+                user_msg = request.message
+                history_model, memories, facts = await asyncio.gather(
+                    memory_service.get_history(UUID(conversation_id), session_history_window),
+                    memory_service.search(query=user_msg, limit=3),
+                    memory_service.get_long_term_memories(identity=request.identity or "anonymous", limit=5),
+                )
+                
+                # Build Persona
+                db_settings = settings_service.get_settings()
+                custom_sys = (db_settings.get("system_prompt") or "").strip()
+                persona = custom_sys if custom_sys else persona_engine.get_persona()
+                time_str = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
+
+                system_content = (
+                    "You are AURA (Advanced Universal Responsive Avatar), steward of the ASE Lab.\n\n"
+                    f"{persona}\n\n"
+                    "IMPORTANT: Do NOT include bracketed emotions like [happy] or [sad] in your response content. "
+                    "I have already detected your emotion separately.\n\n"
+                    f"**Context:**\n- Current Time: {time_str}"
+                )
+                if facts: system_content += f"\nWhat I know about you:\n{facts}\n"
+                if memories:
+                    memory_block = "\n".join(f"- {m}" for m in memories)
+                    system_content += f"\nRelevant past snippets:\n{memory_block}\n"
+
+                messages_format = [{"role":"system", "content":system_content}] + history_model + [{"role":"user", "content":user_msg}]
+
+                import re
+                full_text = ""
+                # 3. Stream from the registry directly
+                from app.services.providers.base import TextDelta
+                async for chunk in llm_service.stream(messages_format):
+                    # Only yield incremental deltas to the dashboard
+                    if isinstance(chunk, TextDelta):
+                        txt = chunk.text
+                        full_text += txt
+                        yield f"data: {json.dumps({'text': txt})}\n\n"
+                    # StreamDone is handled silently for background persistence below
+
+                # 4. Final sync/persistence - SCRUBBED
+                scrubbed_final = re.sub(r'\[.*?\]', '', full_text).strip()
+                asyncio.create_task(memory_service.add_interaction(
+                    conversation_id=UUID(conversation_id),
+                    user_text=user_msg,
+                    assistant_text=scrubbed_final,
+                    user_emotion=detected_emotion,
+                    assistant_emotion="neutral"
+                ))
+                asyncio.create_task(memory_service.store(
+                    text=f"User: {user_msg} \n AURA: {scrubbed_final}",
+                    metadata={"conversation_id": str(conversation_id)}
+                ))
+
+                yield "data: [DONE]\n\n"
+
+            return StreamingResponse(event_generator(), media_type="text/event-stream")
+
+        # Non-streaming fallback
+        result = await brain.ainvoke(initial_state, config=config)
         
         # Extract response
         last_msg = result["messages"][-1].content
         emotion = result.get("emotion", "neutral")
         
-        # Look for tool calls in the last turn
+        # Look for tool calls
         tools_used = []
         for msg in result["messages"]:
             if hasattr(msg, "tool_calls") and msg.tool_calls:
@@ -43,22 +126,21 @@ async def chat(request: ChatRequest):
                         "args": tc.get("args", {})
                     })
                     
-        # Clean tags
-        text = last_msg
-        if text.startswith("["):
-             match = re.match(r'^\[(.*?)\]', text)
-             if match:
-                 text = text[match.end():].strip()
-
         return ChatResponse(
-            text=text,
+            text=last_msg,
             emotion=emotion,
             conversation_id=conversation_id,
             tools_used=tools_used if tools_used else None
         )
     
     except Exception as e:
-        logger.error(f"Chat error: {e}")
+        logger.error(f"Chat error: {e}", exc_info=True)
+        # If it was a stream request, we should yield an error event
+        if request.stream:
+             return StreamingResponse(
+                  iter([f"data: {json.dumps({'text': f'Brain Freeze: {str(e)}', 'emotion': 'confused'})}\n\n"]),
+                  media_type="text/event-stream"
+             )
 
         return ChatResponse(
             text=f"Brain Freeze: {str(e)}",
diff --git a/ai-service/app/api/v1/settings.py b/ai-service/app/api/v1/settings.py
index 264c47e..77941dd 100644
--- a/ai-service/app/api/v1/settings.py
+++ b/ai-service/app/api/v1/settings.py
@@ -4,23 +4,29 @@
 
 router = APIRouter()
 
+PROVIDERS = ["openrouter", "openai", "anthropic", "groq", "ollama"]
+
 
 class SettingsPatch(BaseModel):
     system_prompt: str | None = None
-    model: str | None = None
-    temperature: float | None = None
-    max_tokens: int | None = None
-    empathy: int | None = None
-    humor: int | None = None
-    formality: int | None = None
+    model:         str | None = None
+    provider:      str | None = None
+    temperature:   float | None = None
+    max_tokens:    int | None = None
+    empathy:       int | None = None
+    humor:         int | None = None
+    formality:     int | None = None
 
 
 class ApiKeysPatch(BaseModel):
     openrouter_api_key: str | None = None
-    deepgram_api_key: str | None = None
-    cartesia_api_key: str | None = None
-    livekit_url: str | None = None
-    livekit_api_key: str | None = None
+    deepgram_api_key:   str | None = None
+    cartesia_api_key:   str | None = None
+    anthropic_api_key:  str | None = None
+    groq_api_key:       str | None = None
+    ollama_base_url:    str | None = None
+    livekit_url:        str | None = None
+    livekit_api_key:    str | None = None
     livekit_api_secret: str | None = None
 
 
@@ -35,11 +41,18 @@ def update_settings(patch: SettingsPatch):
     return settings_service.update_settings(data)
 
 
+@router.get("/providers")
+def list_providers():
+    """Return available provider names for the UI dropdown."""
+    return {"providers": PROVIDERS}
+
+
 @router.get("/keys")
 def get_api_keys():
     keys = settings_service.get_api_keys()
-    # Mask values in response — only reveal whether each key is set
-    return {k: ("••••••••" if v else None) for k, v in keys.items() if k != "id"}
+    # Return masked values — just signals whether the key is configured
+    return {k: ("set" if (v and str(v).strip()) else None)
+            for k, v in keys.items() if k != "id"}
 
 
 @router.put("/keys")
diff --git a/ai-service/app/core/config.py b/ai-service/app/core/config.py
index 5eff0f1..51de459 100644
--- a/ai-service/app/core/config.py
+++ b/ai-service/app/core/config.py
@@ -28,6 +28,9 @@ class Settings(BaseSettings):
     LLM_API_KEY: str | None = None
     OPENAI_API_KEY: str | None = None
     OPENROUTER_API_KEY: str | None = None
+    ANTHROPIC_API_KEY: str | None = None
+    GROQ_API_KEY: str | None = None
+    OLLAMA_BASE_URL: str = "http://localhost:11434"
     OPENAI_MODEL: str = "gpt-3.5-turbo"
 
     # Supabase
diff --git a/ai-service/app/models/chat.py b/ai-service/app/models/chat.py
index 6ffcc57..c55136a 100644
--- a/ai-service/app/models/chat.py
+++ b/ai-service/app/models/chat.py
@@ -4,6 +4,8 @@
 class ChatRequest(BaseModel):
     message: str
     conversation_id: Optional[str] = None 
+    identity: Optional[str] = None
+    stream: bool = False
 
 class ChatResponse(BaseModel):
     text: str
diff --git a/ai-service/app/services/brain/nodes/emotion.py b/ai-service/app/services/brain/nodes/emotion.py
index 66cd899..ec37427 100644
--- a/ai-service/app/services/brain/nodes/emotion.py
+++ b/ai-service/app/services/brain/nodes/emotion.py
@@ -2,7 +2,7 @@
 from app.services.llm import llm_service
 
 # Node to detect emotion 
-def detect_emotion(state: BrainState) -> dict:
+async def detect_emotion(state: BrainState) -> dict:
     # Get last user message
     last_message = state["messages"][-1].content 
    
@@ -13,7 +13,7 @@ def detect_emotion(state: BrainState) -> dict:
     """
     
     # Call LLM to detect emotion
-    emotion = llm_service.generate([{"role": "system", "content": prompt}])
+    response = await llm_service.generate([{"role": "system", "content": prompt}])
 
     # Return detected emotion
-    return {"emotion": emotion["emotion"].strip().lower()}
\ No newline at end of file
+    return {"emotion": response.get("emotion", "neutral").strip().lower()}
\ No newline at end of file
diff --git a/ai-service/app/services/brain/nodes/generate.py b/ai-service/app/services/brain/nodes/generate.py
index 0c1cfad..8c0207a 100644
--- a/ai-service/app/services/brain/nodes/generate.py
+++ b/ai-service/app/services/brain/nodes/generate.py
@@ -11,10 +11,9 @@
 
 session_history_window = 9999
 
-def generate_response(state: BrainState) -> dict:
-    with concurrent.futures.ThreadPoolExecutor() as pool:
-        future = pool.submit(asyncio.run, generate(state))
-        return future.result()
+async def generate_response(state: BrainState) -> dict:
+    """Async wrapper for the generation node."""
+    return await generate(state)
 
 
 # Node to generate response based on persona, conversation history and detected emotion (convesation history not being tested yet)
@@ -45,46 +44,90 @@ async def generate(state: BrainState) -> dict:
     else:
         user_message = ""
 
-    # Load History
-    history_model, memories = await asyncio.gather(
+    # Load History & Long-term memories
+    history_model, memories, facts = await asyncio.gather(
         memory_service.get_history(conversation_id, session_history_window),
         memory_service.search(query=user_message, limit=3),
+        memory_service.get_long_term_memories(identity=state.get("identity", "anonymous"), limit=5),
     )
 
     history = history_model
 
-    # System Prompt
-    system_message = prompter.build("", context=None)[0]
+    # Save User message IMMEDIATELY to DB so it persists even if AI fails or disconnects
+    await memory_service.add_interaction(
+        conversation_id=conversation_id,
+        user_text=user_message,
+        assistant_text=None, # Update later
+        user_emotion=detected_emotion,
+        assistant_emotion=None
+    )
 
-    if memories:
-        memory_block = "\n".join(f"-{message}" for message in memories)
-        system_message = {
-            "role" : "system",
-            "content": (system_message["content"] + f"Ingatan sebelumnya: \n {memory_block}")
-        }
+    # System Prompt (Pulling from DB via settings_service)
+    from app.services.settings_service import settings_service
+    db_settings = settings_service.get_settings()
+    custom_sys = (db_settings.get("system_prompt") or "").strip()
     
+    from app.services.persona import persona_engine
+    persona = custom_sys if custom_sys else persona_engine.get_persona()
     
-    # Add system prompt with persona and current time
+    from datetime import datetime
+    time_str = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
+
+    system_content = (
+        "You are AURA (Advanced Universal Responsive Avatar), "
+        "the spirited AI steward of the ASE Lab.\n\n"
+        f"{persona}\n\n"
+        f"**Context:**\n- Current Time: {time_str}"
+    )
+
+    # Combine RAG (memories) and LTS (facts)
+    combined_memory = ""
+    if facts:
+        combined_memory += f"\nWhat I know about you:\n{facts}\n"
+    if memories:
+        memory_block = "\n".join(f"- {message}" for message in memories)
+        combined_memory += f"\nRelevant past snippets:\n{memory_block}\n"
+
+    if combined_memory:
+        system_content += f"\n\n**Memory Retrieval:**{combined_memory}"
+
+    system_message = {"role": "system", "content": system_content}
+    
+    # Build payload
     messages_format = [system_message] + history + current_message
 
+    # Check for stream request
+    is_stream = state.get("stream", False)
+
+    if is_stream:
+        # For streaming, we yield chunks. 
+        # But this is a node, so we return the final state but can use callbacks?
+        # Actually, chat.py will call brain.astream().
+        # We handle the stream here if we want to return the stream object, 
+        # but LangGraph nodes should return the update.
+        # So we update chat.py to use a different strategy.
+        pass
+
     # Generate response from LLM
-    response = llm_service.generate(messages_format)
+    response = await llm_service.generate(messages_format)
+    text = response.get("text", "")
     emotion = response.get("emotion", "neutral")
     
     await asyncio.gather(
+        # Complete the interaction in DB
         memory_service.add_interaction(
             conversation_id=conversation_id,
             user_text=user_message,
-            assistant_text=response["text"],
+            assistant_text=text,
             user_emotion=detected_emotion,
             assistant_emotion=emotion
         ),
 
         memory_service.store(
-            text=f"User: {user_message} \n AURA: {response['text']}",
+            text=f"User: {user_message} \n AURA: {text}",
             metadata={"conversation_id": str(conversation_id)},
         ),
     )
 
     # Return response
-    return {"messages": [AIMessage(content=response["text"])], "emotion": response["emotion"]}
\ No newline at end of file
+    return {"messages": [AIMessage(content=text)], "emotion": emotion}
\ No newline at end of file
diff --git a/ai-service/app/services/brain/state.py b/ai-service/app/services/brain/state.py
index c4047a7..bab52f5 100644
--- a/ai-service/app/services/brain/state.py
+++ b/ai-service/app/services/brain/state.py
@@ -7,4 +7,5 @@
 class BrainState(TypedDict):
     messages: Annotated[List[BaseMessage], operator.add]
     emotion: str
-    conversation_id: str
\ No newline at end of file
+    conversation_id: str
+    identity: str
\ No newline at end of file
diff --git a/ai-service/app/services/llm.py b/ai-service/app/services/llm.py
index 7b3cad1..0a1629c 100644
--- a/ai-service/app/services/llm.py
+++ b/ai-service/app/services/llm.py
@@ -1,74 +1,33 @@
-from openai import OpenAI
-from app.core.config import settings
+"""
+LLMService — thin facade over the Provider Abstraction Layer.
+
+All routing logic lives in providers/registry.py.
+This class exists so existing callers (brain nodes, etc.) don't need to change.
+"""
 import logging
-import re
+import asyncio
+from app.services.providers.registry import provider_registry
 
 logger = logging.getLogger(__name__)
 
 
 class LLMService:
-    def __init__(self):
-        self._env_key = settings.OPENROUTER_API_KEY or settings.OPENAI_API_KEY
-        self.base_url = "https://openrouter.ai/api/v1" if settings.OPENROUTER_API_KEY else None
-        self.client = None
-
-        if self._env_key:
-            self.client = OpenAI(api_key=self._env_key, base_url=self.base_url)
-            logger.info(f"LLM Service Initialized. Base: {self.base_url or 'Default'}")
-        else:
-            logger.warning("API Key not set. LLMService will fail.")
-
-    def _get_client(self):
-        """Return a client using the DB key if set, falling back to the env key."""
-        from app.services.settings_service import settings_service
-        db_key = settings_service.get_api_keys().get("openrouter_api_key")
-        if db_key and db_key.strip():
-            return OpenAI(api_key=db_key, base_url="https://openrouter.ai/api/v1")
-        return self.client
-
-    def generate(self, messages: list, model: str = None, temperature: float = None, max_tokens: int = None) -> dict:
-        client = self._get_client()
-        if not client:
-            return {"text": "Error: API Key is missing.", "emotion": "[dizzy]"}
-
-        # Import here to avoid circular imports at module load time
-        from app.services.settings_service import settings_service
-        db = settings_service.get_settings()
-
-        actual_model = model or db.get("model") or "deepseek/deepseek-v3.2"
-        actual_temp = temperature if temperature is not None else db.get("temperature", 0.8)
-        actual_max_tokens = max_tokens or db.get("max_tokens") or 300
-
-        try:
-            extra_headers = {}
-            if settings.OPENROUTER_API_KEY:
-                extra_headers = {
-                    "HTTP-Referer": "http://localhost:5173",
-                    "X-Title": "Project AURA",
-                }
-
-            response = client.chat.completions.create(
-                model=actual_model,
-                messages=messages,
-                temperature=actual_temp,
-                max_tokens=actual_max_tokens,
-                extra_headers=extra_headers,
-            )
-
-            content = response.choices[0].message.content
-            emotion_match = re.match(r'^\[(.*?)\]', content)
-            emotion = "neutral"
-            text = content
-
-            if emotion_match:
-                emotion = emotion_match.group(1)
-                text = content[emotion_match.end():].strip()
-
-            return {"text": text, "emotion": emotion, "raw": content}
-
-        except Exception as e:
-            logger.error(f"LLM Generation Error: {e}")
-            return {"text": f"I lost my train of thought. ({str(e)})", "emotion": "[confused]"}
+    async def generate(
+        self,
+        messages: list,
+        model: str | None = None,
+        temperature: float | None = None,
+        max_tokens: int | None = None,
+    ) -> dict:
+        return await provider_registry.generate(
+            messages,
+            model=model,
+            temperature=temperature,
+            max_tokens=max_tokens,
+        )
+
+    def stream(self, *args, **kwargs):
+        return provider_registry.stream(*args, **kwargs)
 
 
 llm_service = LLMService()
diff --git a/ai-service/app/services/memory_service.py b/ai-service/app/services/memory_service.py
index b401291..dbb4acd 100644
--- a/ai-service/app/services/memory_service.py
+++ b/ai-service/app/services/memory_service.py
@@ -4,17 +4,28 @@
 """
 from __future__ import annotations
 from typing import List
+import urllib.request
 from supabase import create_client
 from langchain_openai import OpenAIEmbeddings
 from app.core.config import settings
 from uuid import UUID
 
+
 from app.models.database import (Conversation, CreateConversation, Message, CreateMesssage, Memory, CreateMemory)
 
 import logging
 
 logger = logging.getLogger(__name__)
 
+
+def _ollama_is_running(base_url: str) -> bool:
+    """Return True if an Ollama server is reachable at base_url."""
+    try:
+        urllib.request.urlopen(f"{base_url}/api/tags", timeout=2)
+        return True
+    except Exception:
+        return False
+
 class MemoryService:
     def __init__(self):
         self.client = None
@@ -27,16 +38,36 @@ def __init__(self):
         else:
             logger.warning("Supabase credentials not set. Memory service disabled.")
 
-        # Initialize embeddings model via OpenRouter
-        api_key = settings.OPENROUTER_API_KEY
-        if api_key:
+        # Initialize embeddings — try providers in order of preference
+        if settings.OPENAI_API_KEY:
             self.embeddings = OpenAIEmbeddings(
-                api_key=api_key,
+                api_key=settings.OPENAI_API_KEY,
+                model="text-embedding-3-small",
+            )
+            logger.info("RAG: Using OpenAI Directly for semantic embeddings (best-in-class mapping).")
+            print("INFO: Memory Service using OpenAI Embeddings for search mapping.")
+        elif settings.OPENROUTER_API_KEY:
+            self.embeddings = OpenAIEmbeddings(
+                api_key=settings.OPENROUTER_API_KEY,
                 model="openai/text-embedding-3-small",
-                base_url="https://openrouter.ai/api/v1"
+                base_url="https://openrouter.ai/api/v1",
             )
+            logger.info("RAG: Using OpenRouter for semantic embeddings.")
+            print("INFO: Memory Service using OpenRouter Embeddings.")
+        elif _ollama_is_running(settings.OLLAMA_BASE_URL):
+            self.embeddings = OpenAIEmbeddings(
+                api_key="ollama",
+                model="nomic-embed-text",
+                base_url=f"{settings.OLLAMA_BASE_URL}/v1",
+            )
+            logger.info("RAG: Using local Ollama for semantic embeddings.")
+            print("INFO: Memory Service using local Ollama Embeddings.")
         else:
-            logger.warning("OPENROUTER_API_KEY not set. Memory embedding disabled.")
+            logger.warning(
+                "No embedding provider available "
+                "(OPENAI_API_KEY / OPENROUTER_API_KEY not set; Ollama not reachable). "
+                "Memory store/search disabled."
+            )
 
     async def create_conversation(self, title: str = "New Conversation") -> UUID | None:
         if not self.client:
@@ -76,26 +107,30 @@ async def get_conversation(self, conversation_id: UUID) -> Conversation | None:
             logger.error(f"Memory Service Get Conversation Error: {error}")
             return None
     
-    async def add_interaction(self, conversation_id: UUID, user_text: str, assistant_text: str, user_emotion: str = "neutral", assistant_emotion: str = "neutral") -> None:
+    async def add_interaction(self, conversation_id: UUID, user_text: str, assistant_text: str | None, user_emotion: str = "neutral", assistant_emotion: str = "neutral") -> None:
         if not self.client:
             return None
 
         try:
-            self.client.table("messages").insert([
-                CreateMesssage(
+            msgs = []
+            if user_text:
+                msgs.append(CreateMesssage(
                     conversation_id=conversation_id,
                     role="user",
                     content=user_text,
                     emotion=user_emotion,
-                ).model_dump(mode="json"),
+                ).model_dump(mode="json"))
 
-                CreateMesssage(
+            if assistant_text:
+                msgs.append(CreateMesssage(
                     conversation_id=conversation_id,
                     role="aura",
                     content=assistant_text,
                     emotion=assistant_emotion
-                ).model_dump(mode="json")
-            ]).execute() 
+                ).model_dump(mode="json"))
+            
+            if msgs:
+                self.client.table("messages").insert(msgs).execute() 
 
             self.client.table("conversations") \
                 .update({"updated_at": "now()"}) \
@@ -202,4 +237,31 @@ async def search(self, query: str, limit: int = 3) -> list[str]:
             return []
 
 
+    async def get_long_term_memories(self, identity: str, limit: int = 10) -> str:
+        """Retrieve the last N non-embedded 'user_facts' memories for this identity."""
+        if not self.client:
+            return ""
+
+        try:
+            result = self.client.table("memories") \
+                .select("content, created_at") \
+                .eq("metadata->>type", "user_facts") \
+                .eq("metadata->>identity", identity) \
+                .order("created_at", desc=True) \
+                .limit(limit) \
+                .execute()
+
+            rows = result.data or []
+            if not rows:
+                return ""
+
+            # Reverse to get chronological order in the prompt
+            facts_list = [row["content"] for row in reversed(rows)]
+            return "\n---\n".join(facts_list)
+
+        except Exception as e:
+            logger.error(f"Memory Service Get Long Term Memories error: {e}")
+            return ""
+
+
 memory_service = MemoryService()
\ No newline at end of file
diff --git a/ai-service/app/services/prompter.py b/ai-service/app/services/prompter.py
index e541fec..57cdfc3 100644
--- a/ai-service/app/services/prompter.py
+++ b/ai-service/app/services/prompter.py
@@ -7,13 +7,14 @@ class Prompter:
     def build(self, message: str, context: dict = None) -> list:
         current_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
 
-        # Pull live settings — custom system_prompt overrides the hardcoded persona
-        db_settings = settings_service.get_settings()
-        custom_prompt = (db_settings.get("system_prompt") or "").strip()
-        persona = custom_prompt if custom_prompt else persona_engine.get_persona()
+        # Custom system_prompt from admin panel overrides the hardcoded persona
+        db = settings_service.get_settings()
+        custom = (db.get("system_prompt") or "").strip()
+        persona = custom if custom else persona_engine.get_persona()
 
         formatted_system = (
-            f"You are AURA (Advanced Universal Responsive Avatar), the spirited AI steward of the ASE Lab.\n\n"
+            "You are AURA (Advanced Universal Responsive Avatar), "
+            "the spirited AI steward of the ASE Lab.\n\n"
             f"{persona}\n\n"
             f"**Context:**\n- Current Time: {current_time}"
         )
diff --git a/ai-service/app/services/providers/__init__.py b/ai-service/app/services/providers/__init__.py
new file mode 100644
index 0000000..292fc89
--- /dev/null
+++ b/ai-service/app/services/providers/__init__.py
@@ -0,0 +1,3 @@
+from app.services.providers.registry import provider_registry
+
+__all__ = ["provider_registry"]
diff --git a/ai-service/app/services/providers/anthropic_provider.py b/ai-service/app/services/providers/anthropic_provider.py
new file mode 100644
index 0000000..89685fe
--- /dev/null
+++ b/ai-service/app/services/providers/anthropic_provider.py
@@ -0,0 +1,183 @@
+"""
+Anthropic / Claude provider.
+
+Key differences from OpenAI-compatible providers:
+
+1. System message → separate `system` parameter (not in messages list).
+2. Streaming: chunks are `content_block_delta` with type "text_delta"
+   (vs GPT's `choices[0].delta.content`).
+3. Tool calls: come as `content_block_start` with type "tool_use"
+   (vs OpenAI's `message.tool_calls`).
+4. Tool definitions: Anthropic uses a different schema than OpenAI.
+   We accept the OpenAI schema and translate it internally.
+
+Normalized output is always the same result dict as every other provider.
+"""
+from __future__ import annotations
+
+import json
+import logging
+from typing import AsyncGenerator
+
+from app.services.providers.base import LLMProvider, TextDelta, StreamDone, make_result, RetryableError, NonRetryableError
+
+logger = logging.getLogger(__name__)
+
+
+def _split_system(messages: list[dict]) -> tuple[str, list[dict]]:
+    """Separate the system prompt from the rest of the message list."""
+    system_parts = []
+    rest = []
+    for m in messages:
+        if m.get("role") == "system":
+            system_parts.append(m.get("content", ""))
+        else:
+            rest.append(m)
+    return "\n\n".join(system_parts), rest
+
+
+def _openai_tools_to_anthropic(tools: list[dict]) -> list[dict]:
+    """
+    Translate OpenAI tool schema to Anthropic's format.
+
+    OpenAI:   { "type": "function", "function": { "name", "description", "parameters" } }
+    Anthropic: { "name", "description", "input_schema" }
+    """
+    result = []
+    for t in tools:
+        fn = t.get("function", t)  # handle both wrapped and unwrapped
+        result.append({
+            "name":         fn["name"],
+            "description":  fn.get("description", ""),
+            "input_schema": fn.get("parameters", {"type": "object", "properties": {}}),
+        })
+    return result
+
+
+def _extract_tool_calls(content_blocks) -> list | None:
+    """Normalize Anthropic tool_use blocks to our common schema."""
+    calls = [
+        {
+            "id":        block.id,
+            "name":      block.name,
+            "arguments": json.dumps(block.input),
+        }
+        for block in content_blocks
+        if getattr(block, "type", None) == "tool_use"
+    ]
+    return calls or None
+
+
+class AnthropicProvider(LLMProvider):
+    name = "anthropic"
+
+    def __init__(self, api_key: str):
+        try:
+            import anthropic as _anthropic
+            self._anthropic = _anthropic
+            self._client = _anthropic.Anthropic(api_key=api_key)
+            self._async_client = _anthropic.AsyncAnthropic(api_key=api_key)
+            logger.info("[anthropic] provider ready")
+        except ImportError:
+            raise RuntimeError(
+                "The 'anthropic' package is required for the Anthropic provider. "
+                "Run: pip install anthropic"
+            )
+
+    # ── Blocking ──────────────────────────────────────────────────────────────
+
+    def generate(
+        self,
+        messages: list[dict],
+        *,
+        model: str,
+        temperature: float,
+        max_tokens: int,
+        tools: list[dict] | None = None,
+    ) -> dict:
+        system, user_messages = _split_system(messages)
+        kwargs = dict(
+            model=model,
+            system=system,
+            messages=user_messages,
+            temperature=temperature,
+            max_tokens=max_tokens,
+        )
+        if tools:
+            kwargs["tools"] = _openai_tools_to_anthropic(tools)
+
+        _a = self._anthropic  # local ref so except clauses can reference it
+        try:
+            response = self._client.messages.create(**kwargs)
+
+            # Text from text blocks
+            raw = "".join(
+                block.text for block in response.content
+                if getattr(block, "type", None) == "text"
+            )
+            tool_calls = _extract_tool_calls(response.content)
+
+            if tool_calls and not raw:
+                raw = f"[tool_call: {tool_calls[0]['name']}]"
+
+            return make_result(raw, self.name, model, tool_calls=tool_calls)
+
+        except _a.RateLimitError as e:
+            raise RetryableError(str(e), status_code=429)
+        except (_a.APIConnectionError, _a.APITimeoutError) as e:
+            raise RetryableError(str(e))
+        except _a.InternalServerError as e:
+            raise RetryableError(str(e), status_code=getattr(e, "status_code", 500))
+        except _a.AuthenticationError as e:
+            raise NonRetryableError(str(e), status_code=401)
+        except _a.BadRequestError as e:
+            raise NonRetryableError(str(e), status_code=400)
+        except Exception as e:
+            raise RetryableError(str(e))
+
+    # ── Streaming ─────────────────────────────────────────────────────────────
+
+    async def stream(
+        self,
+        messages: list[dict],
+        *,
+        model: str,
+        temperature: float,
+        max_tokens: int,
+        tools: list[dict] | None = None,
+    ) -> AsyncGenerator[TextDelta | StreamDone, None]:
+        system, user_messages = _split_system(messages)
+        assembled = ""
+        kwargs = dict(
+            model=model,
+            system=system,
+            messages=user_messages,
+            temperature=temperature,
+            max_tokens=max_tokens,
+        )
+        if tools:
+            kwargs["tools"] = _openai_tools_to_anthropic(tools)
+
+        try:
+            async with self._async_client.messages.stream(**kwargs) as stream:
+                async for event in stream:
+                    if (
+                        event.type == "content_block_delta"
+                        and hasattr(event, "delta")
+                        and getattr(event.delta, "type", None) == "text_delta"
+                    ):
+                        chunk = event.delta.text or ""
+                        if chunk:
+                            assembled += chunk
+                            yield TextDelta(text=chunk)
+        except Exception as e:
+            logger.error(f"[anthropic] stream error: {e}")
+
+        result = make_result(assembled, self.name, model)
+        yield StreamDone(
+            text=result["text"],
+            emotion=result["emotion"],
+            raw=assembled,
+            provider=self.name,
+            model=model,
+        )
diff --git a/ai-service/app/services/providers/base.py b/ai-service/app/services/providers/base.py
new file mode 100644
index 0000000..cfb6b84
--- /dev/null
+++ b/ai-service/app/services/providers/base.py
@@ -0,0 +1,147 @@
+"""
+Provider Abstraction Layer — base types and interface.
+
+Every LLM provider normalizes its output into the same result dict
+so the rest of the system never needs to know which model is running.
+
+Normalized result:
+  { text, emotion, raw, provider, model, tool_calls }
+
+Tool calls are always normalized to:
+  [{ "id": str, "name": str, "arguments": str (JSON) }]
+  — regardless of whether the provider used OpenAI function_call deltas
+    or Anthropic content_block tool_use blocks.
+
+Stream events (for future streaming endpoints):
+  TextDelta  — incremental text chunk
+  StreamDone — final assembled result
+"""
+from __future__ import annotations
+
+import re
+from abc import ABC, abstractmethod
+from dataclasses import dataclass, field
+from typing import AsyncGenerator
+
+
+# ── Normalized event types ────────────────────────────────────────────────────
+
+@dataclass
+class TextDelta:
+    """A chunk of text from a streaming response."""
+    text: str
+
+
+@dataclass
+class StreamDone:
+    """Final event — carries the fully assembled response."""
+    text: str
+    emotion: str
+    raw: str
+    provider: str
+    model: str
+    tool_calls: list | None = None
+
+
+# ── Error types ───────────────────────────────────────────────────────────────
+
+class RetryableError(Exception):
+    """
+    Rate limit (429), server error (5xx), or transient network issue.
+    The registry will retry with exponential backoff, then try the next provider.
+    """
+    def __init__(self, msg: str, status_code: int | None = None):
+        super().__init__(msg)
+        self.status_code = status_code
+
+
+class NonRetryableError(Exception):
+    """
+    Auth failure (401) or bad request (400).
+    - 401: key is wrong for this provider → skip to next provider.
+    - 400: our message is malformed → no provider will fix it; abort immediately.
+    """
+    def __init__(self, msg: str, status_code: int | None = None):
+        super().__init__(msg)
+        self.status_code = status_code
+
+
+# ── Helpers ───────────────────────────────────────────────────────────────────
+
+def parse_emotion(raw: str) -> tuple[str, str]:
+    """
+    Extract the leading [emotion, tag] from a raw LLM response.
+    Returns (emotion_string, cleaned_text).
+    """
+    stripped = raw.strip()
+    match = re.match(r'^\[(.*?)\]', stripped)
+    if match:
+        return match.group(1), stripped[match.end():].strip()
+    return "neutral", stripped
+
+
+def make_result(
+    raw: str,
+    provider: str,
+    model: str,
+    tool_calls: list | None = None,
+) -> dict:
+    """Build the normalized result dict that the rest of the system expects."""
+    emotion, text = parse_emotion(raw)
+    return {
+        "text": text,
+        "emotion": emotion,
+        "raw": raw,
+        "provider": provider,
+        "model": model,
+        "tool_calls": tool_calls or None,
+    }
+
+
+# ── Abstract base ─────────────────────────────────────────────────────────────
+
+class LLMProvider(ABC):
+    """
+    All providers implement this interface.
+    `generate` is the blocking path used by the brain pipeline.
+    `stream`   is the async-generator path for future streaming endpoints.
+
+    Tool definitions follow the OpenAI schema:
+      [{ "type": "function", "function": { "name": ..., "description": ...,
+                                           "parameters": {...} } }]
+    Providers that use a different native schema (e.g. Anthropic) translate
+    internally — callers always pass the OpenAI format.
+    """
+
+    name: str = "base"
+
+    @abstractmethod
+    def generate(
+        self,
+        messages: list[dict],
+        *,
+        model: str,
+        temperature: float,
+        max_tokens: int,
+        tools: list[dict] | None = None,
+    ) -> dict:
+        """
+        Blocking generation. Returns the normalized result dict:
+          { text, emotion, raw, provider, model, tool_calls }
+        """
+
+    @abstractmethod
+    async def stream(
+        self,
+        messages: list[dict],
+        *,
+        model: str,
+        temperature: float,
+        max_tokens: int,
+        tools: list[dict] | None = None,
+    ) -> AsyncGenerator[TextDelta | StreamDone, None]:
+        """
+        Streaming generation.
+        Yields TextDelta chunks, ends with one StreamDone.
+        """
+        yield  # type: ignore
diff --git a/ai-service/app/services/providers/openai_compat.py b/ai-service/app/services/providers/openai_compat.py
new file mode 100644
index 0000000..35c005e
--- /dev/null
+++ b/ai-service/app/services/providers/openai_compat.py
@@ -0,0 +1,194 @@
+"""
+OpenAI-compatible provider.
+
+Covers every backend that speaks the OpenAI chat-completions API:
+  • OpenRouter  (base_url = https://openrouter.ai/api/v1)
+  • OpenAI      (base_url = None  → default)
+  • Groq        (base_url = https://api.groq.com/openai/v1)
+  • Ollama      (base_url = http://localhost:11434/v1)
+
+Tool call normalization:
+  OpenAI sends tool_calls on the response message.
+  Each tool call has: id, function.name, function.arguments (JSON string).
+  We surface these as [{ "id", "name", "arguments" }] in the result dict.
+"""
+from __future__ import annotations
+
+import logging
+from typing import AsyncGenerator
+
+import openai as _openai_lib
+from openai import OpenAI, AsyncOpenAI
+
+from app.services.providers.base import LLMProvider, TextDelta, StreamDone, make_result, RetryableError, NonRetryableError
+
+logger = logging.getLogger(__name__)
+
+_OPENROUTER_HEADERS = {
+    "HTTP-Referer": "http://localhost:5173",
+    "X-Title": "Project AURA",
+}
+
+
+def _extract_tool_calls(response_message) -> list | None:
+    """Normalize OpenAI tool_calls to our common schema."""
+    raw_calls = getattr(response_message, "tool_calls", None)
+    if not raw_calls:
+        return None
+    return [
+        {
+            "id":        tc.id,
+            "name":      tc.function.name,
+            "arguments": tc.function.arguments,  # already a JSON string
+        }
+        for tc in raw_calls
+    ]
+
+
+class OpenAICompatProvider(LLMProvider):
+
+    def __init__(
+        self,
+        api_key: str,
+        base_url: str | None = None,
+        extra_headers: dict | None = None,
+        provider_name: str = "openai",
+    ):
+        self.name = provider_name
+        self._extra_headers = extra_headers or {}
+        self._client = OpenAI(api_key=api_key, base_url=base_url)
+        self._async_client = AsyncOpenAI(api_key=api_key, base_url=base_url)
+        logger.info(f"[{self.name}] provider ready (base_url={base_url or 'default'})")
+
+    # ── Blocking ──────────────────────────────────────────────────────────────
+
+    def generate(
+        self,
+        messages: list[dict],
+        *,
+        model: str,
+        temperature: float,
+        max_tokens: int,
+        tools: list[dict] | None = None,
+    ) -> dict:
+        kwargs = dict(
+            model=model,
+            messages=messages,
+            temperature=temperature,
+            max_tokens=max_tokens,
+            extra_headers=self._extra_headers,
+        )
+        if tools:
+            kwargs["tools"] = tools
+            kwargs["tool_choice"] = "auto"
+
+        try:
+            response = self._client.chat.completions.create(**kwargs)
+            msg = response.choices[0].message
+            raw = msg.content or ""
+            tool_calls = _extract_tool_calls(msg)
+
+            # When the model only returns a tool call (no text), give a placeholder
+            # so make_result always has something to parse.
+            if tool_calls and not raw:
+                raw = f"[tool_call: {tool_calls[0]['name']}]"
+
+            return make_result(raw, self.name, model, tool_calls=tool_calls)
+
+        except _openai_lib.RateLimitError as e:
+            raise RetryableError(str(e), status_code=429)
+        except (_openai_lib.APIConnectionError, _openai_lib.APITimeoutError) as e:
+            raise RetryableError(str(e))
+        except _openai_lib.InternalServerError as e:
+            raise RetryableError(str(e), status_code=getattr(e, "status_code", 500))
+        except _openai_lib.AuthenticationError as e:
+            raise NonRetryableError(str(e), status_code=401)
+        except (_openai_lib.BadRequestError, _openai_lib.NotFoundError) as e:
+            raise NonRetryableError(str(e), status_code=getattr(e, "status_code", 400))
+        except Exception as e:
+            # Unknown error — treat as retryable so the registry can decide
+            raise RetryableError(str(e))
+
+    # ── Streaming ─────────────────────────────────────────────────────────────
+
+    async def stream(
+        self,
+        messages: list[dict],
+        *,
+        model: str,
+        temperature: float,
+        max_tokens: int,
+        tools: list[dict] | None = None,
+    ) -> AsyncGenerator[TextDelta | StreamDone, None]:
+        assembled = ""
+        kwargs = dict(
+            model=model,
+            messages=messages,
+            temperature=temperature,
+            max_tokens=max_tokens,
+            extra_headers=self._extra_headers,
+        )
+        if tools:
+            kwargs["tools"] = tools
+            kwargs["tool_choice"] = "auto"
+
+        try:
+            response = await self._async_client.chat.completions.create(**kwargs, stream=True)
+            async for chunk in response:
+                if not chunk.choices:
+                    continue
+                
+                delta = chunk.choices[0].delta
+                
+                # Handle reasoning tokens (DeepSeek R1 / OpenRouter)
+                # These are internal thoughts we don't want to show the user
+                reasoning = getattr(delta, "reasoning_content", None) or getattr(delta, "reasoning", None)
+                if reasoning:
+                    continue
+
+                if delta.content:
+                    txt = delta.content
+                    assembled += txt
+                    yield TextDelta(text=txt)
+        except Exception as e:
+            logger.error(f"[{self.name}] stream error: {e}")
+
+        result = make_result(assembled, self.name, model)
+        yield StreamDone(
+            text=result["text"],
+            emotion=result["emotion"],
+            raw=assembled,
+            provider=self.name,
+            model=model,
+        )
+
+
+# ── Named constructors ────────────────────────────────────────────────────────
+
+def openrouter_provider(api_key: str) -> OpenAICompatProvider:
+    return OpenAICompatProvider(
+        api_key=api_key,
+        base_url="https://openrouter.ai/api/v1",
+        extra_headers=_OPENROUTER_HEADERS,
+        provider_name="openrouter",
+    )
+
+
+def openai_provider(api_key: str) -> OpenAICompatProvider:
+    return OpenAICompatProvider(api_key=api_key, base_url=None, provider_name="openai")
+
+
+def groq_provider(api_key: str) -> OpenAICompatProvider:
+    return OpenAICompatProvider(
+        api_key=api_key,
+        base_url="https://api.groq.com/openai/v1",
+        provider_name="groq",
+    )
+
+
+def ollama_provider(base_url: str = "http://localhost:11434") -> OpenAICompatProvider:
+    return OpenAICompatProvider(
+        api_key="ollama",
+        base_url=f"{base_url.rstrip('/')}/v1",
+        provider_name="ollama",
+    )
diff --git a/ai-service/app/services/providers/registry.py b/ai-service/app/services/providers/registry.py
new file mode 100644
index 0000000..8d51c79
--- /dev/null
+++ b/ai-service/app/services/providers/registry.py
@@ -0,0 +1,297 @@
+"""
+Provider Registry — the single entry point for LLM calls.
+
+Responsibilities:
+  1. Read active model / provider / temperature / max_tokens from settings_service
+  2. Read the matching API key from settings_service (DB) or fall back to env vars
+  3. Instantiate the right LLMProvider
+  4. Call provider.generate() and return the normalized result
+
+Provider inference (when `provider` field is "auto" or missing):
+  model starts with "claude-"        → anthropic
+  model contains "/"                 → openrouter  (e.g. "deepseek/deepseek-v3.2")
+  model starts with gpt-/o1-/o3-    → openai
+  model starts with llama/mistral…   → ollama
+  explicit groq_ prefix              → groq
+  fallback                           → openrouter
+"""
+from __future__ import annotations
+
+import logging
+import asyncio
+import os
+import random
+import time
+
+from app.services.providers.base import LLMProvider, RetryableError, NonRetryableError
+
+logger = logging.getLogger(__name__)
+
+_MAX_ATTEMPTS  = 3        # attempts per provider before giving up on it
+_BACKOFF_BASE  = 1.0      # seconds; delay = base * 2^attempt + jitter
+
+# Ordered fallback chain — first provider with an available key wins
+_FALLBACK_ORDER = ["openrouter", "openai", "groq", "ollama"]
+
+# ── Provider inference ────────────────────────────────────────────────────────
+
+_OPENAI_PREFIXES = ("gpt-", "o1-", "o3-", "text-davinci", "babbage", "ada")
+_OLLAMA_PREFIXES = ("llama", "mistral", "gemma", "phi", "qwen", "codellama", "deepseek-r1")
+
+
+def infer_provider(model: str) -> str:
+    m = model.lower()
+    if m.startswith("claude-"):
+        return "anthropic"
+    if "/" in m:
+        return "openrouter"
+    if any(m.startswith(p) for p in _OPENAI_PREFIXES):
+        return "openai"
+    if any(m.startswith(p) for p in _OLLAMA_PREFIXES):
+        return "ollama"
+    return "openrouter"
+
+
+# ── Registry ──────────────────────────────────────────────────────────────────
+
+class ProviderRegistry:
+    """
+    Resolves and calls the correct LLM provider on every request.
+    Providers are constructed lazily and cached by (provider_name, key_hash).
+    """
+
+    def __init__(self):
+        self._cache: dict[str, LLMProvider] = {}
+
+    # ── Public API ────────────────────────────────────────────────────────────
+
+    async def generate(
+        self,
+        messages: list[dict],
+        *,
+        model: str | None = None,
+        temperature: float | None = None,
+        max_tokens: int | None = None,
+        tools: list[dict] | None = None,
+    ) -> dict:
+        # Lazy import avoids circular imports at module load time
+        from app.services.settings_service import settings_service
+
+        db = settings_service.get_settings()
+        keys = settings_service.get_api_keys()
+
+        actual_model       = model or db.get("model") or "deepseek/deepseek-v3.2"
+        actual_temp        = temperature if temperature is not None else float(db.get("temperature", 0.8))
+        actual_max_tokens  = max_tokens or int(db.get("max_tokens", 300))
+
+        configured_provider = (db.get("provider") or "auto").lower()
+        primary = (
+            configured_provider
+            if configured_provider != "auto"
+            else infer_provider(actual_model)
+        )
+
+        # Build candidate list: primary first, then any fallback with an available key
+        candidates = [primary] + [
+            p for p in _FALLBACK_ORDER
+            if p != primary and (p == "ollama" or self._pick_key(p, keys))
+        ]
+
+        call_kwargs = dict(
+            model=actual_model,
+            temperature=actual_temp,
+            max_tokens=actual_max_tokens,
+            tools=tools,
+        )
+
+        last_error: Exception | None = None
+
+        for provider_name in candidates:
+            try:
+                provider = self._get_provider(provider_name, keys)
+            except (ValueError, RuntimeError) as e:
+                # Missing key or missing package — skip silently
+                logger.debug(f"[registry] skipping {provider_name}: {e}")
+                last_error = e
+                continue
+
+            logger.info(f"[registry] trying {provider_name} / {actual_model}")
+            try:
+                result = await self._call_with_retry(provider, messages, **call_kwargs)
+                if provider_name != primary:
+                    logger.warning(f"[registry] fell back to {provider_name} (primary={primary} failed)")
+                return result
+
+            except NonRetryableError as e:
+                last_error = e
+                if e.status_code == 400:
+                    # Bad request — our message is wrong, no other provider will help
+                    logger.error(f"[registry] bad request ({provider_name}): {e}")
+                    break
+                # 401 auth failure — key is bad for this provider, try next
+                logger.warning(f"[registry] auth failed for {provider_name} (HTTP {e.status_code}), trying next")
+                continue
+
+            except RetryableError as e:
+                # All retries for this provider exhausted — try next
+                logger.warning(f"[registry] {provider_name} exhausted retries: {e}")
+                last_error = e
+                continue
+
+        logger.error(f"[registry] all providers failed. Last: {last_error}")
+        return {
+            "text": "I seem to be having trouble connecting right now. Please try again in a moment.",
+            "emotion": "confused",
+            "raw": "",
+            "provider": primary,
+            "model": actual_model,
+            "tool_calls": None,
+        }
+
+    async def stream(
+        self,
+        messages: list[dict],
+        *,
+        model: str | None = None,
+        temperature: float | None = None,
+        max_tokens: int | None = None,
+        tools: list[dict] | None = None,
+    ) -> AsyncGenerator[TextDelta | StreamDone, None]:
+        from app.services.settings_service import settings_service
+
+        db = settings_service.get_settings()
+        keys = settings_service.get_api_keys()
+
+        actual_model       = model or db.get("model") or "deepseek/deepseek-v3.2"
+        actual_temp        = temperature if temperature is not None else float(db.get("temperature", 0.8))
+        actual_max_tokens  = max_tokens or int(db.get("max_tokens", 300))
+
+        configured_provider = (db.get("provider") or "auto").lower()
+        primary = (
+            configured_provider
+            if configured_provider != "auto"
+            else infer_provider(actual_model)
+        )
+
+        candidates = [primary] + [
+            p for p in _FALLBACK_ORDER
+            if p != primary and (p == "ollama" or self._pick_key(p, keys))
+        ]
+
+        # Note: Fallbacks for streaming are harder to implement gracefully mid-stream.
+        # We try the primary and first available.
+        for provider_name in candidates:
+            try:
+                provider = self._get_provider(provider_name, keys)
+                logger.info(f"[registry] streaming {provider_name} / {actual_model}")
+                
+                async for chunk in provider.stream(
+                    messages,
+                    model=actual_model,
+                    temperature=actual_temp,
+                    max_tokens=actual_max_tokens,
+                    tools=tools
+                ):
+                    yield chunk
+                return
+            except Exception as e:
+                logger.warning(f"[registry] stream failed for {provider_name}: {e}")
+                continue
+
+    async def _call_with_retry(self, provider: LLMProvider, messages: list[dict], **kwargs) -> dict:
+        """
+        Call provider.generate() with exponential backoff on RetryableError.
+        Raises RetryableError if all attempts fail.
+        Raises NonRetryableError immediately (no retry).
+        """
+        for attempt in range(_MAX_ATTEMPTS):
+            try:
+                # Use thread pool for sync generate calls to keep registry async-friendly
+                return await asyncio.to_thread(provider.generate, messages, **kwargs)
+            except NonRetryableError:
+                raise  # propagate immediately
+            except RetryableError as e:
+                if attempt == _MAX_ATTEMPTS - 1:
+                    raise  # all attempts exhausted
+                delay = _BACKOFF_BASE * (2 ** attempt) + random.uniform(0.0, 0.5)
+                logger.warning(
+                    f"[{provider.name}] attempt {attempt + 1}/{_MAX_ATTEMPTS} failed "
+                    f"(status={e.status_code}): {e} — retrying in {delay:.1f}s"
+                )
+                await asyncio.sleep(delay)
+
+    # ── Provider instantiation ────────────────────────────────────────────────
+
+    def _get_provider(self, provider_name: str, keys: dict) -> LLMProvider:
+        # Cache key: provider name + first 8 chars of api key (detects key rotation)
+        raw_key = self._pick_key(provider_name, keys)
+        cache_key = f"{provider_name}:{(raw_key or '')[:8]}"
+
+        if cache_key not in self._cache:
+            self._cache[cache_key] = self._build(provider_name, keys)
+
+        return self._cache[cache_key]
+
+    def _build(self, provider_name: str, keys: dict) -> LLMProvider:
+        from app.services.providers.openai_compat import (
+            openrouter_provider, openai_provider, groq_provider, ollama_provider,
+        )
+        from app.services.providers.anthropic_provider import AnthropicProvider
+
+        if provider_name == "anthropic":
+            key = self._pick_key("anthropic", keys)
+            if not key:
+                raise ValueError("Anthropic API key not set. Add it via the dashboard or ANTHROPIC_API_KEY env var.")
+            return AnthropicProvider(api_key=key)
+
+        if provider_name == "groq":
+            key = self._pick_key("groq", keys)
+            if not key:
+                raise ValueError("Groq API key not set. Add it via the dashboard or GROQ_API_KEY env var.")
+            return groq_provider(api_key=key)
+
+        if provider_name == "openai":
+            key = self._pick_key("openai", keys)
+            if not key:
+                raise ValueError("OpenAI API key not set. Add it via the dashboard or OPENAI_API_KEY env var.")
+            return openai_provider(api_key=key)
+
+        if provider_name == "ollama":
+            ollama_url = (
+                (keys.get("ollama_base_url") or "").strip()
+                or os.getenv("OLLAMA_BASE_URL", "http://localhost:11434")
+            )
+            return ollama_provider(base_url=ollama_url)
+
+        # Default: openrouter
+        key = self._pick_key("openrouter", keys)
+        if not key:
+            raise ValueError("OpenRouter API key not set. Add it via the dashboard or OPENROUTER_API_KEY env var.")
+        return openrouter_provider(api_key=key)
+
+    @staticmethod
+    def _pick_key(provider_name: str, keys: dict) -> str | None:
+        """DB key takes precedence over env var."""
+        env_map = {
+            "openrouter": "OPENROUTER_API_KEY",
+            "openai":     "OPENAI_API_KEY",
+            "anthropic":  "ANTHROPIC_API_KEY",
+            "groq":       "GROQ_API_KEY",
+        }
+        db_key_map = {
+            "openrouter": "openrouter_api_key",
+            "openai":     "openrouter_api_key",   # share the same field for now
+            "anthropic":  "anthropic_api_key",
+            "groq":       "groq_api_key",
+        }
+
+        db_field = db_key_map.get(provider_name)
+        db_val = (keys.get(db_field) or "").strip() if db_field else ""
+        if db_val:
+            return db_val
+
+        env_var = env_map.get(provider_name)
+        return os.getenv(env_var, "") if env_var else ""
+
+
+provider_registry = ProviderRegistry()
diff --git a/ai-service/app/services/settings_service.py b/ai-service/app/services/settings_service.py
index 194c0f1..a56dbe9 100644
--- a/ai-service/app/services/settings_service.py
+++ b/ai-service/app/services/settings_service.py
@@ -1,4 +1,5 @@
 import logging
+import time
 from supabase import create_client, Client
 from app.core.config import settings as app_settings
 
@@ -6,20 +7,24 @@
 
 _DEFAULTS = {
     "system_prompt": None,
-    "model": "deepseek/deepseek-v3.2",
-    "temperature": 0.8,
-    "max_tokens": 300,
-    "empathy": 50,
-    "humor": 50,
-    "formality": 50,
+    "model":         "deepseek/deepseek-v3.2",
+    "provider":      "openrouter",
+    "temperature":   0.8,
+    "max_tokens":    300,
+    "empathy":       50,
+    "humor":         50,
+    "formality":     50,
 }
 
 _KEY_DEFAULTS = {
     "openrouter_api_key": None,
-    "deepgram_api_key": None,
-    "cartesia_api_key": None,
-    "livekit_url": None,
-    "livekit_api_key": None,
+    "deepgram_api_key":   None,
+    "cartesia_api_key":   None,
+    "anthropic_api_key":  None,
+    "groq_api_key":       None,
+    "ollama_base_url":    "http://localhost:11434",
+    "livekit_url":        None,
+    "livekit_api_key":    None,
     "livekit_api_secret": None,
 }
 
@@ -29,23 +34,43 @@ def __init__(self):
         self._client: Client | None = None
         if app_settings.SUPABASE_URL and app_settings.SUPABASE_SERVICE_KEY:
             self._client = create_client(app_settings.SUPABASE_URL, app_settings.SUPABASE_SERVICE_KEY)
+        
+        # Simple cache
+        self._cache = {}
+        self._cache_expiry = {
+            "settings": 0,
+            "keys": 0
+        }
+        self._TTL = 60 # seconds for settings
+        self._KEY_TTL = 5 # seconds for keys (re-check faster)
 
     def get_settings(self) -> dict:
         if not self._client:
             return dict(_DEFAULTS)
+        
+        now = time.time()
+        if "settings" in self._cache and now < self._cache_expiry["settings"]:
+            return self._cache["settings"]
+
         try:
             result = self._client.table("personality_settings").select("*").eq("id", 1).single().execute()
             if result.data:
-                return {**_DEFAULTS, **result.data}
+                settings = {**_DEFAULTS, **result.data}
+                self._cache["settings"] = settings
+                self._cache_expiry["settings"] = now + self._TTL
+                return settings
         except Exception as e:
             logger.warning(f"SettingsService.get_settings failed: {e}")
-        return dict(_DEFAULTS)
+        return self._cache.get("settings", dict(_DEFAULTS))
 
     def update_settings(self, patch: dict) -> dict:
         if not self._client:
             return dict(_DEFAULTS)
         try:
             result = self._client.table("personality_settings").update(patch).eq("id", 1).execute()
+            # Invalidate cache
+            if "settings" in self._cache:
+                del self._cache["settings"]
             if result.data:
                 return {**_DEFAULTS, **result.data[0]}
         except Exception as e:
@@ -55,19 +80,30 @@ def update_settings(self, patch: dict) -> dict:
     def get_api_keys(self) -> dict:
         if not self._client:
             return dict(_KEY_DEFAULTS)
+
+        now = time.time()
+        if "keys" in self._cache and now < self._cache_expiry["keys"]:
+            return self._cache["keys"]
+
         try:
             result = self._client.table("api_keys").select("*").eq("id", 1).single().execute()
             if result.data:
-                return {**_KEY_DEFAULTS, **result.data}
+                keys = {**_KEY_DEFAULTS, **result.data}
+                self._cache["keys"] = keys
+                self._cache_expiry["keys"] = now + self._KEY_TTL
+                return keys
         except Exception as e:
             logger.warning(f"SettingsService.get_api_keys failed: {e}")
-        return dict(_KEY_DEFAULTS)
+        return self._cache.get("keys", dict(_KEY_DEFAULTS))
 
     def update_api_keys(self, patch: dict) -> dict:
         if not self._client:
             return dict(_KEY_DEFAULTS)
         try:
             result = self._client.table("api_keys").update(patch).eq("id", 1).execute()
+            # Invalidate cache
+            if "keys" in self._cache:
+                del self._cache["keys"]
             if result.data:
                 return {**_KEY_DEFAULTS, **result.data[0]}
         except Exception as e:
diff --git a/ai-service/requirements.txt b/ai-service/requirements.txt
index 2c42291..747f7f8 100644
--- a/ai-service/requirements.txt
+++ b/ai-service/requirements.txt
@@ -3,6 +3,7 @@ aiohttp==3.13.3
 aiosignal==1.4.0
 annotated-doc==0.0.4
 annotated-types==0.7.0
+anthropic
 anyio==4.11.0
 attrs==25.4.0
 cachetools==6.2.6
diff --git a/ai-service/tests/__init__.py b/ai-service/tests/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/ai-service/tests/conftest.py b/ai-service/tests/conftest.py
new file mode 100644
index 0000000..8eb1151
--- /dev/null
+++ b/ai-service/tests/conftest.py
@@ -0,0 +1,71 @@
+"""
+Shared pytest fixtures and env setup.
+Loads the project .env so integration tests can use real API keys.
+"""
+import os
+import sys
+from pathlib import Path
+
+import pytest
+from dotenv import load_dotenv
+
+# ── Add ai-service root to sys.path so `app.*` imports resolve ───────────────
+AI_SERVICE_DIR = Path(__file__).resolve().parent.parent
+sys.path.insert(0, str(AI_SERVICE_DIR))
+
+# ── Load .env from project root ───────────────────────────────────────────────
+PROJECT_ROOT = AI_SERVICE_DIR.parent
+env_path = PROJECT_ROOT / ".env"
+if not env_path.exists():
+    env_path = AI_SERVICE_DIR / ".env"
+load_dotenv(env_path)
+
+
+# ── Reusable message lists ────────────────────────────────────────────────────
+
+@pytest.fixture
+def simple_messages():
+    return [
+        {"role": "system", "content": "You are a helpful assistant. Reply very briefly."},
+        {"role": "user",   "content": "Say exactly: [smile] Hello!"},
+    ]
+
+
+@pytest.fixture
+def tool_messages():
+    return [
+        {"role": "system", "content": "You are a helpful assistant."},
+        {"role": "user",   "content": "What is the weather in Tokyo? Use the get_weather tool."},
+    ]
+
+
+@pytest.fixture
+def sample_tools():
+    return [
+        {
+            "type": "function",
+            "function": {
+                "name":        "get_weather",
+                "description": "Get current weather for a city.",
+                "parameters": {
+                    "type": "object",
+                    "properties": {
+                        "city": {"type": "string", "description": "City name"},
+                    },
+                    "required": ["city"],
+                },
+            },
+        }
+    ]
+
+
+# ── Key availability helpers (used by integration marks) ─────────────────────
+
+def has_openrouter_key():
+    return bool(os.getenv("OPENROUTER_API_KEY", "").strip())
+
+def has_openai_key():
+    return bool(os.getenv("OPENAI_API_KEY", "").strip())
+
+def has_anthropic_key():
+    return bool(os.getenv("ANTHROPIC_API_KEY", "").strip())
diff --git a/ai-service/tests/providers/__init__.py b/ai-service/tests/providers/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/dashboard/package.json b/dashboard/package.json
index c7fdc9a..465db32 100644
--- a/dashboard/package.json
+++ b/dashboard/package.json
@@ -15,7 +15,7 @@
     "@supabase/supabase-js": "^2.95.3",
     "@tailwindcss/vite": "^4.1.18",
     "livekit-client": "^2.17.1",
-    "pixi.js": "^6.5.10",
+    "pixi.js": "^8.17.1",
     "react": "^19.2.0",
     "react-dom": "^19.2.0",
     "react-router-dom": "^7.13.0",
@@ -39,4 +39,4 @@
   "overrides": {
     "vite": "npm:rolldown-vite@7.2.2"
   }
-}
+}
\ No newline at end of file
diff --git a/dashboard/src/components/ApiKeys.jsx b/dashboard/src/components/ApiKeys.jsx
index 037d9b8..b2e2369 100644
--- a/dashboard/src/components/ApiKeys.jsx
+++ b/dashboard/src/components/ApiKeys.jsx
@@ -3,10 +3,13 @@ import { supabase } from '../lib/supabaseClient'
 
 const KEY_GROUPS = [
     {
-        label: 'LLM Provider',
+        label: 'LLM Providers',
         icon: 'psychology',
         fields: [
-            { key: 'openrouter_api_key', label: 'OpenRouter API Key', placeholder: 'sk-or-v1-...' },
+            { key: 'openrouter_api_key', label: 'OpenRouter API Key', placeholder: 'sk-or-v1-...', hint: 'Routes to DeepSeek, GPT, Mistral, and more' },
+            { key: 'anthropic_api_key', label: 'Anthropic API Key', placeholder: 'sk-ant-...', hint: 'Required for claude-* models' },
+            { key: 'groq_api_key', label: 'Groq API Key', placeholder: 'gsk_...', hint: 'Fast inference for Llama / Mixtral' },
+            { key: 'ollama_base_url', label: 'Ollama Base URL', placeholder: 'http://localhost:11434', hint: 'Local LLMs via Ollama', isUrl: true },
         ],
     },
     {
@@ -14,13 +17,13 @@ const KEY_GROUPS = [
         icon: 'mic',
         fields: [
             { key: 'deepgram_api_key', label: 'Deepgram API Key (STT)', placeholder: 'your_deepgram_key' },
-            { key: 'cartesia_api_key', label: 'Cartesia API Key (TTS)', placeholder: 'your_cartesia_key', note: 'Requires agent restart to apply' },
+            { key: 'cartesia_api_key', label: 'Cartesia API Key (TTS)', placeholder: 'your_cartesia_key', note: 'Requires agent restart' },
         ],
     },
     {
         label: 'LiveKit',
         icon: 'cell_tower',
-        note: 'Changes require agent restart',
+        note: 'Requires agent restart',
         fields: [
             { key: 'livekit_url', label: 'LiveKit URL', placeholder: 'wss://your-project.livekit.cloud' },
             { key: 'livekit_api_key', label: 'LiveKit API Key', placeholder: 'API key' },
@@ -32,24 +35,16 @@ const KEY_GROUPS = [
 export default function ApiKeys() {
     const [draft, setDraft] = useState({})
     const [visible, setVisible] = useState({})
-    const [saveState, setSaveState] = useState('idle') // 'idle' | 'saving' | 'saved' | 'error'
+    const [saveState, setSaveState] = useState('idle')
     const [loaded, setLoaded] = useState(false)
 
     useEffect(() => {
-        supabase
-            .from('api_keys')
-            .select('*')
-            .eq('id', 1)
-            .single()
-            .then(({ data }) => {
-                if (data) setDraft(data)
-                setLoaded(true)
-            })
+        supabase.from('api_keys').select('*').eq('id', 1).single()
+            .then(({ data }) => { if (data) setDraft(data); setLoaded(true) })
     }, [])
 
-    const patch = (key, value) => setDraft((d) => ({ ...d, [key]: value }))
-
-    const toggleVisible = (key) => setVisible((v) => ({ ...v, [key]: !v[key] }))
+    const patch = (key, value) => setDraft(d => ({ ...d, [key]: value }))
+    const toggleVisible = key => setVisible(v => ({ ...v, [key]: !v[key] }))
 
     const saveKeys = async () => {
         setSaveState('saving')
@@ -57,12 +52,7 @@ export default function ApiKeys() {
             const payload = { ...draft }
             delete payload.id
             payload.updated_at = new Date().toISOString()
-
-            const { error } = await supabase
-                .from('api_keys')
-                .update(payload)
-                .eq('id', 1)
-
+            const { error } = await supabase.from('api_keys').update(payload).eq('id', 1)
             if (error) throw error
             setSaveState('saved')
             setTimeout(() => setSaveState('idle'), 2500)
@@ -73,11 +63,11 @@ export default function ApiKeys() {
         }
     }
 
-    const btnProps = {
-        idle:   { label: 'Save API Keys',  icon: 'key',           cls: 'bg-primary hover:bg-primary/90 shadow-primary/20' },
-        saving: { label: 'Saving...',       icon: 'hourglass_top', cls: 'bg-primary/70 cursor-not-allowed' },
-        saved:  { label: 'Keys Saved!',     icon: 'check_circle',  cls: 'bg-emerald-500 shadow-emerald-200' },
-        error:  { label: 'Save Failed',     icon: 'error',         cls: 'bg-red-500 shadow-red-200' },
+    const btn = {
+        idle: { label: 'Save API Keys', icon: 'key', cls: 'bg-primary hover:bg-primary/90 shadow-primary/20' },
+        saving: { label: 'Saving...', icon: 'hourglass_top', cls: 'bg-primary/70 cursor-not-allowed' },
+        saved: { label: 'Keys Saved!', icon: 'check_circle', cls: 'bg-emerald-500 shadow-emerald-200' },
+        error: { label: 'Save Failed', icon: 'error', cls: 'bg-red-500 shadow-red-200' },
     }[saveState]
 
     return (
@@ -90,10 +80,10 @@ export default function ApiKeys() {
                 <button
                     onClick={saveKeys}
                     disabled={saveState === 'saving' || !loaded}
-                    className={`${btnProps.cls} text-white px-5 py-2 rounded-full text-sm font-bold transition-all shadow-lg flex items-center gap-2`}
+                    className={`${btn.cls} text-white px-5 py-2 rounded-full text-sm font-bold transition-all shadow-lg flex items-center gap-2`}
                 >
-                    <span className="material-icons-round text-sm">{btnProps.icon}</span>
-                    {btnProps.label}
+                    <span className="material-icons-round text-sm">{btn.icon}</span>
+                    {btn.label}
                 </button>
             </div>
 
@@ -105,37 +95,35 @@ export default function ApiKeys() {
                             <span className="text-xs font-bold text-slate-400 uppercase tracking-widest">{label}</span>
                             {note && (
                                 <span className="ml-auto text-xs text-amber-500 font-medium flex items-center gap-1">
-                                    <span className="material-icons-round text-xs">info</span>
-                                    {note}
+                                    <span className="material-icons-round text-xs">info</span>{note}
                                 </span>
                             )}
                         </div>
                         <div className="space-y-3">
-                            {fields.map(({ key, label: fieldLabel, placeholder, note: fieldNote }) => (
+                            {fields.map(({ key, label: fl, placeholder, note: fn, hint, isUrl }) => (
                                 <div key={key}>
-                                    <label className="block text-sm font-medium text-slate-500 mb-1">{fieldLabel}</label>
+                                    <label className="block text-sm font-medium text-slate-500 mb-1">{fl}</label>
+                                    {hint && <p className="text-xs text-slate-400 mb-1">{hint}</p>}
                                     <div className="relative">
                                         <input
-                                            type={visible[key] ? 'text' : 'password'}
+                                            type={isUrl || visible[key] ? 'text' : 'password'}
                                             value={draft[key] ?? ''}
-                                            onChange={(e) => patch(key, e.target.value)}
+                                            onChange={e => patch(key, e.target.value)}
                                             placeholder={loaded ? placeholder : '••••••••'}
                                             className="w-full bg-bg-light border border-slate-200 rounded-lg px-3 py-2 pr-10 text-sm font-mono focus:ring-1 focus:ring-primary focus:border-primary outline-none"
                                         />
-                                        <button
-                                            type="button"
-                                            onClick={() => toggleVisible(key)}
-                                            className="absolute right-2 top-1/2 -translate-y-1/2 text-slate-400 hover:text-slate-600"
-                                        >
-                                            <span className="material-icons-round text-base">
-                                                {visible[key] ? 'visibility_off' : 'visibility'}
-                                            </span>
-                                        </button>
+                                        {!isUrl && (
+                                            <button type="button" onClick={() => toggleVisible(key)}
+                                                className="absolute right-2 top-1/2 -translate-y-1/2 text-slate-400 hover:text-slate-600">
+                                                <span className="material-icons-round text-base">
+                                                    {visible[key] ? 'visibility_off' : 'visibility'}
+                                                </span>
+                                            </button>
+                                        )}
                                     </div>
-                                    {fieldNote && (
+                                    {fn && (
                                         <p className="text-xs text-amber-500 mt-1 flex items-center gap-1">
-                                            <span className="material-icons-round text-xs">info</span>
-                                            {fieldNote}
+                                            <span className="material-icons-round text-xs">info</span>{fn}
                                         </p>
                                     )}
                                 </div>
@@ -147,7 +135,7 @@ export default function ApiKeys() {
 
             <p className="mt-6 text-xs text-slate-400 flex items-start gap-1.5">
                 <span className="material-icons-round text-xs mt-0.5">lock</span>
-                Keys are stored in your private Supabase database. Leave a field empty to use the value from the server's <code className="font-mono">.env</code> file.
+                Stored in your private Supabase database. Leave a field empty to use the server's <code className="font-mono">.env</code> value.
             </p>
         </div>
     )
diff --git a/dashboard/src/components/AvatarRenderer.jsx b/dashboard/src/components/AvatarRenderer.jsx
index 73c9081..23fd6cd 100644
--- a/dashboard/src/components/AvatarRenderer.jsx
+++ b/dashboard/src/components/AvatarRenderer.jsx
@@ -1,57 +1,104 @@
 /**
- * AvatarRenderer — Phase 2
- * Renders the Hu Tao Live2D model on a transparent canvas using
- * pixi-live2d-display. Exposes an imperative ref API so CallOverlay
- * can drive expressions in sync with AURA's speech.
+ * AvatarRenderer — Phase 3
+ * Idle / Speaking state machine with richer moods and cute micro-animations:
+ *   • 6 weighted moods per state (neutral, happy, curious, playful, sleepy, thinking)
+ *   • Cute head-tilt event during idle
+ *   • Occasional double-blink during idle
+ *   • Sleepy: half-closed eyes, slow blink
+ *   • Speaking: gentle nod, tighter saccade, snappier blink, slight smile boost
  *
- * Usage:
- *   const avatarRef = useRef(null)
- *   <AvatarRenderer ref={avatarRef} width={400} height={600} scale={0.3} />
- *   avatarRef.current.setExpression(['smile', 'shadow'], 2.3)
- *   avatarRef.current.resetNeutral()
+ * Ref API:
+ *   setExpression(names[], duration)  — play expression(s) for N seconds
+ *   setSpeaking(bool)                 — switch idle ↔ speaking state
+ *   setMouthOpen(0–1)                 — drive lip sync each frame
+ *   setParameter(id, value)           — raw Core Model parameter override
+ *   resetNeutral()                    — cancel active expression, return to idle
  */
 
 import { forwardRef, useEffect, useImperativeHandle, useRef } from 'react'
 import * as PIXI from 'pixi.js'
 import { Live2DModel } from 'pixi-live2d-display/cubism4'
 
-// Register PIXI Ticker so Live2D animations update every frame
 Live2DModel.registerTicker(PIXI.Ticker)
 
-// Model path relative to dashboard/public/
 const MODEL_URL = '/models/hutao/Hu Tao.model3.json'
 
-/**
- * Per-expression Cubism 4 parameter overrides.
- * Applied smoothly every frame in coreModel.update() to flawlessly
- * override the base idle animation values.
- */
-const EXPRESSION_OVERRIDES = {
-  smile: { ParamMouthForm: 1.0, ParamEyeLSmile: 0.9, ParamEyeRSmile: 0.9, Param37: 0.4 },
-  sad: { ParamMouthForm: -1.0, ParamBrowLForm: -1.0, ParamBrowRForm: -1.0, ParamBrowLAngle: 0.75, ParamBrowRAngle: 0.75 },
-  angry: { ParamMouthForm: -0.5, ParamEyeRSmile: 0.0, ParamEyeLSmile: 0.0, ParamBrowLAngle: -1.0, ParamBrowRAngle: -1.0, ParamBrowRForm: -0.5, ParamBrowLForm: -0.5 },
-  ghost: { Param80: 1.0 },
-  ghost_nervous: { Param75: 1.0 },
-  shadow: { Param2: 1.0 },
-  pupil_shrink: { Param38: 1.0 },
-  eyeshine_off: { Param3: 1.0 },
-  wink: { ParamEyeLOpen: 0.0, ParamEyeLSmile: 1.0, ParamBrowLForm: 0.5, ParamMouthForm: 0.5 },
-  tongue: { Param70: 1.0, ParamMouthForm: -1.0 },  // Param70 = TongueOut mesh
+const EXPRESSION_FILES = {
+  smile: 'SmileLock.exp3.json',
+  sad: 'SadLock.exp3.json',
+  angry: 'Angry.exp3.json',
+  ghost: 'Ghost.exp3.json',
+  ghost_nervous: 'GhostChange.exp3.json',
+  shadow: 'Shadow.exp3.json',
+  pupil_shrink: 'PupilShrink.exp3.json',
+  eyeshine_off: 'EyeshineOff.exp3.json',
+}
+
+// Maps LLM-annotated expression names → the closest ambient mood.
+// Applied after the expression fades so the idle baseline stays emotionally coherent.
+const EXPRESSION_TO_MOOD = {
+  smile: 'happy',
+  sad: 'neutral',   // no sad mood — settle to calm neutral
+  angry: 'thinking',  // furrowed brows, withdrawn
+  ghost: 'playful',   // mischievous
+  ghost_nervous: 'curious',   // uncertain, alert
+  shadow: 'thinking',  // serious / dark
+  pupil_shrink: 'curious',   // surprised / wide-eyed
+  eyeshine_off: 'sleepy',    // dull / fatigued
+  wink: 'playful',
+  tongue: 'playful',
+}
+
+// ── State machine ──────────────────────────────────────────────────────────
+const STATE = { IDLE: 'idle', SPEAKING: 'speaking' }
+
+// ── Mood definitions (target parameter values) ─────────────────────────────
+const MOODS = {
+  neutral: { mouthForm: 0, browForm: 0, browRaise: 0, eyeSmile: 0 },
+  happy: { mouthForm: 0.65, browForm: 0.30, browRaise: 0.45, eyeSmile: 0.55 },
+  curious: { mouthForm: 0.20, browForm: -0.10, browRaise: 0.50, eyeSmile: 0 },
+  playful: { mouthForm: 0.90, browForm: 0.50, browRaise: 0.70, eyeSmile: 0.30 },
+  sleepy: { mouthForm: -0.05, browForm: 0.10, browRaise: -0.15, eyeSmile: 0 },
+  thinking: { mouthForm: 0.10, browForm: -0.20, browRaise: 0.35, eyeSmile: 0 },
 }
 
-let sharedApp = null
-let sharedModelPromise = null
-let sharedModel = null
-let sharedMouthOpen = 0
-let sharedExpressionOverride = null
+// Weighted mood pool per state — [moodKey, weight], weights sum to 1.0
+const MOOD_POOLS = {
+  [STATE.IDLE]: [
+    ['neutral', 0.15], ['happy', 0.35], ['curious', 0.20],
+    ['playful', 0.10], ['sleepy', 0.10], ['thinking', 0.10],
+  ],
+  [STATE.SPEAKING]: [
+    ['neutral', 0.10], ['happy', 0.45], ['curious', 0.20],
+    ['playful', 0.20], ['thinking', 0.05],
+  ],
+}
 
-function initSharedApp(width, height) {
-  if (sharedApp) {
-    sharedApp.renderer.resize(width, height)
-    return
+function pickWeightedMood(state) {
+  const pool = MOOD_POOLS[state] ?? MOOD_POOLS[STATE.IDLE]
+  const r = Math.random()
+  let acc = 0
+  for (const [key, w] of pool) {
+    acc += w
+    if (r < acc) return MOODS[key]
   }
+  return MOODS.neutral
+}
+
+// ── Module-scoped Singleton State ──────────────────────────────────────────
+let _app = null
+let _model = null
+let _loaded = false
+let _mouthOpen = 0
+let _expressionActive = false
+let _mouthYLocked = false  // true while tongue expression holds MouthOpenY
+let _state = STATE.IDLE
+let _pendingMood = null   // set by setExpression, consumed by update loop on expiry
+
+function initSingleton(width, height) {
+  if (_app) return
 
-  sharedApp = new PIXI.Application({
+  _app = new PIXI.Application({
     backgroundAlpha: 0,
     width,
     height,
@@ -60,193 +107,300 @@ function initSharedApp(width, height) {
     autoDensity: true,
   })
 
-  sharedModelPromise = Live2DModel.from(MODEL_URL, { autoInteract: false }).then((model) => {
-    sharedModel = model
-    sharedApp.stage.addChild(model)
-
-    const logicalW = sharedApp.screen.width
-    const logicalH = sharedApp.screen.height
-    const autoScale = (logicalH / model.height) * 1.9
-    model.scale.set(autoScale)
-    model.anchor.set(0.5, 0.0)
-    model.position.set(logicalW * 0.5, 0)
-
-    const core = model.internalModel.coreModel
-    let lastMs = performance.now()
-    const clamp = (v, lo, hi) => v < lo ? lo : v > hi ? hi : v
-
-    let blinkTimer = 0, blinkPhase = 0, nextBlink = 2 + Math.random() * 4
-    let saccadeTimer = 0, nextSaccade = 1 + Math.random() * 2
-    let eyeTargetX = 0, eyeTargetY = 0, eyeX = 0, eyeY = 0
-
-    let moodTimer = 0, nextMoodChange = 3 + Math.random() * 4
-    let mouthFormT = 0, mouthFormC = 0
-    let browFormT = 0, browFormC = 0
-    let browRaiseT = 0, browRaiseC = 0
-    let eyeSmileT = 0, eyeSmileC = 0
-
-    function pickMood() {
-      const roll = Math.random()
-      if (roll < 0.30) {
-        mouthFormT = 0; browFormT = 0; browRaiseT = 0; eyeSmileT = 0
-      } else if (roll < 0.60) {
-        mouthFormT = 0.55 + Math.random() * 0.35
-        browFormT = 0.35; browRaiseT = 0.4; eyeSmileT = 0.45
-      } else if (roll < 0.80) {
-        mouthFormT = -0.1; browFormT = 0.1; browRaiseT = 0.2; eyeSmileT = 0
-        eyeTargetY = 0.45 + Math.random() * 0.3
-        nextSaccade = saccadeTimer + 2.8
-      } else {
-        mouthFormT = 0.9; browFormT = 0.5; browRaiseT = 0.7; eyeSmileT = 0.25
-      }
-      nextMoodChange = 3 + Math.random() * 5
-    }
-
-    const currentOverrides = {}
-
-    sharedApp.ticker.add(() => {
-      if (!sharedModel) return
-      const now = performance.now() / 1000
-      const elapsed = Math.min((performance.now() - lastMs) / 1000, 0.1)
-      lastMs = performance.now()
-
-      core.setParameterValueById('ParamAngleX', Math.sin(now * 0.31) * 12 + Math.sin(now * 0.73) * 3)
-      core.setParameterValueById('ParamAngleY', Math.sin(now * 0.19) * 5 + Math.sin(now * 0.47) * 2)
-      core.setParameterValueById('ParamAngleZ', Math.sin(now * 0.13) * 5 + Math.sin(now * 0.41) * 2)
-      core.setParameterValueById('ParamBodyAngleX', Math.sin(now * 0.28) * 4)
-      core.setParameterValueById('ParamBodyAngleZ', Math.sin(now * 0.21) * 3)
-      core.setParameterValueById('ParamBreath', Math.sin(now * 0.9) * 0.5 + 0.5)
-      core.setParameterValueById('ParamMouthOpenY', sharedMouthOpen)
-
-      moodTimer += elapsed
-      if (moodTimer >= nextMoodChange) { moodTimer = 0; pickMood() }
-      const lm = elapsed * 4
-      mouthFormC += (mouthFormT - mouthFormC) * lm
-      browFormC += (browFormT - browFormC) * lm
-      browRaiseC += (browRaiseT - browRaiseC) * lm
-      eyeSmileC += (eyeSmileT - eyeSmileC) * lm
-      core.setParameterValueById('ParamMouthForm', mouthFormC)
-      core.setParameterValueById('ParamBrowLForm', browFormC)
-      core.setParameterValueById('ParamBrowRForm', browFormC)
-      core.setParameterValueById('Param37', browRaiseC)
-      core.setParameterValueById('ParamEyeLSmile', eyeSmileC)
-      core.setParameterValueById('ParamEyeRSmile', eyeSmileC)
-
-      saccadeTimer += elapsed
-      if (saccadeTimer >= nextSaccade) {
-        eyeTargetX = (Math.random() * 2 - 1) * 0.65
-        const r = Math.random()
-        if (r < 0.20) eyeTargetY = 0.5 + Math.random() * 0.35
-        else if (r < 0.35) eyeTargetY = -0.3 - Math.random() * 0.25
-        else eyeTargetY = (Math.random() * 2 - 1) * 0.4
-        nextSaccade = saccadeTimer + 1.5 + Math.random() * 2.5
-      }
-      eyeX += (eyeTargetX - eyeX) * elapsed * 3.5
-      eyeY += (eyeTargetY - eyeY) * elapsed * 3.5
-      core.setParameterValueById('ParamEyeBallX', clamp(eyeX, -1, 1))
-      core.setParameterValueById('ParamEyeBallY', clamp(eyeY, -1, 1))
-
-      blinkTimer += elapsed
-      const bspd = 9
-      if (blinkPhase === 0 && blinkTimer >= nextBlink) { blinkPhase = 1; blinkTimer = 0 }
-      if (blinkPhase === 1) {
-        const v = clamp(1 - blinkTimer * bspd, 0, 1)
-        core.setParameterValueById('ParamEyeLOpen', v)
-        core.setParameterValueById('ParamEyeROpen', v)
-        if (v <= 0) { blinkPhase = 2; blinkTimer = 0 }
-      } else if (blinkPhase === 2) {
-        const v = clamp(blinkTimer * bspd, 0, 1)
-        core.setParameterValueById('ParamEyeLOpen', v)
-        core.setParameterValueById('ParamEyeROpen', v)
-        if (v >= 1) { blinkPhase = 0; blinkTimer = 0; nextBlink = 3 + Math.random() * 5 }
-      } else {
-        core.setParameterValueById('ParamEyeLOpen', 1)
-        core.setParameterValueById('ParamEyeROpen', 1)
-      }
-
-      const targetOv = sharedExpressionOverride
+  Live2DModel.from(MODEL_URL, { autoInteract: false })
+    .then((model) => {
+      _model = model
+      _app.stage.addChild(model)
+
+      const logicalW = _app.screen.width
+      const logicalH = _app.screen.height
+      const autoScale = (logicalH / model.height) * 1.4
+      model.scale.set(autoScale)
+      model.anchor.set(0.5, 0.0)
+      model.position.set(logicalW * 0.5, 0)
+
+      const core = model.internalModel.coreModel
+      const clamp = (v, lo, hi) => Math.max(lo, Math.min(hi, v))
+      let lastMs = performance.now()
+
+      // ── Blink state ──────────────────────────────────────────────────────
+      let blinkTimer = 0, blinkPhase = 0, nextBlink = 2 + Math.random() * 3
+      // Double-blink: blink twice in quick succession (cute quirk)
+      let dblBlinkPending = false
+      let dblBlinkTimer = 0, nextDblBlink = 10 + Math.random() * 10
+
+      // ── Saccade state ─────────────────────────────────────────────────────
+      let saccadeTimer = 0, nextSaccade = 1 + Math.random() * 2
+      let eyeTargetX = 0, eyeTargetY = 0, eyeX = 0, eyeY = 0
+
+      // ── Mood state ────────────────────────────────────────────────────────
+      let moodTimer = 0, nextMoodChange = 3 + Math.random() * 4
+      let currentMood = MOODS.happy
+      let mouthFormC = 0, browFormC = 0, browRaiseC = 0, eyeSmileC = 0
+
+      // ── Head tilt micro-animation (idle only) ─────────────────────────────
+      // Occasionally snaps to a cute side-tilt, holds briefly, then eases back
+      let tiltTimer = 0, nextTilt = 6 + Math.random() * 8
+      let tiltTarget = 0, tiltC = 0
+      let tiltHolding = false, tiltHoldTimer = 0, tiltHoldDuration = 0
+
+      // ── Speaking nod ──────────────────────────────────────────────────────
+      let nodPhase = 0
+
+      const origCoreUpdate = core.update.bind(core)
+
+      core.update = function () {
+        const now = performance.now() / 1000
+        const elapsed = Math.min((performance.now() - lastMs) / 1000, 0.1)
+        lastMs = performance.now()
+
+        const speaking = _state === STATE.SPEAKING
+        const lerpSpd = speaking ? 5.0 : 3.5
+
+        // ── Breathing ────────────────────────────────────────────────────
+        // Slightly faster when speaking (more energetic)
+        core.setParameterValueById('ParamBreath',
+          Math.sin(now * (speaking ? 1.1 : 0.75)) * 0.5 + 0.5)
+
+        // ── Head movement ─────────────────────────────────────────────────
+        const swayAmt = speaking ? 0.35 : 1.0
+        const bX = (Math.sin(now * 0.31) * 12 + Math.sin(now * 0.73) * 3) * swayAmt
+        const bY = (Math.sin(now * 0.19) * 5 + Math.sin(now * 0.47) * 2) * swayAmt
+        const bZ = (Math.sin(now * 0.13) * 5 + Math.sin(now * 0.41) * 2) * swayAmt
+
+        // Gentle speaking nod — Y oscillation in rough speech rhythm
+        let nodY = 0
+        if (speaking) {
+          nodPhase += elapsed * 2.6
+          nodY = Math.sin(nodPhase) * 3.5
+        } else {
+          nodPhase = 0
+        }
 
-      const overrideKeys = new Set([...Object.keys(currentOverrides), ...(targetOv ? Object.keys(targetOv) : [])])
-      const lerpSpeed = elapsed * 5
+        // Cute idle head tilt — snap in quickly, ease back slowly
+        if (!speaking) {
+          tiltTimer += elapsed
+          if (!tiltHolding && tiltTimer >= nextTilt) {
+            tiltTarget = (Math.random() < 0.5 ? 1 : -1) * (7 + Math.random() * 7)
+            tiltTimer = 0
+            nextTilt = 6 + Math.random() * 8
+            tiltHolding = true
+            tiltHoldTimer = 0
+            tiltHoldDuration = 0.9 + Math.random() * 0.8
+          }
+        }
+        if (tiltHolding) {
+          tiltHoldTimer += elapsed
+          if (tiltHoldTimer >= tiltHoldDuration) { tiltTarget = 0; tiltHolding = false }
+        }
+        tiltC += (tiltTarget - tiltC) * elapsed * (tiltTarget !== 0 ? 6.0 : 2.2)
+
+        core.setParameterValueById('ParamAngleX', bX)
+        core.setParameterValueById('ParamAngleY', bY + nodY)
+        core.setParameterValueById('ParamAngleZ', bZ + tiltC)
+        core.setParameterValueById('ParamBodyAngleX', Math.sin(now * 0.28) * 4 * swayAmt)
+        core.setParameterValueById('ParamBodyAngleZ', Math.sin(now * 0.21) * 3 * swayAmt)
+
+        // ── Lip sync ──────────────────────────────────────────────────────
+        // Skip when tongue expression is holding MouthOpenY at 1.0
+        if (!_mouthYLocked) core.setParameterValueById('ParamMouthOpenY', _mouthOpen)
+
+        // ── Mood interpolation ────────────────────────────────────────────
+        if (!_expressionActive) {
+          // Expression just expired — align ambient mood to the emotion the LLM set
+          if (_pendingMood) {
+            currentMood = _pendingMood
+            _pendingMood = null
+            moodTimer = 0
+            nextMoodChange = 3 + Math.random() * 3  // hold this mood for 3-6s before drifting
+          }
+
+          moodTimer += elapsed
+          if (moodTimer >= nextMoodChange) {
+            moodTimer = 0
+            nextMoodChange = speaking
+              ? 2 + Math.random() * 2.5
+              : 3 + Math.random() * 5
+            currentMood = pickWeightedMood(_state)
+
+            // Curious: look upward with a lingering gaze
+            if (currentMood === MOODS.curious) {
+              eyeTargetY = 0.45 + Math.random() * 0.30
+              nextSaccade = saccadeTimer + 3
+            }
+            // Thinking: look up-left (classic thinking glance)
+            if (currentMood === MOODS.thinking) {
+              eyeTargetX = -(0.4 + Math.random() * 0.3)
+              eyeTargetY = 0.4 + Math.random() * 0.3
+              nextSaccade = saccadeTimer + 4
+            }
+          }
+
+          const lm = elapsed * lerpSpd
+          mouthFormC += (currentMood.mouthForm - mouthFormC) * lm
+          browFormC += (currentMood.browForm - browFormC) * lm
+          browRaiseC += (currentMood.browRaise - browRaiseC) * lm
+          eyeSmileC += (currentMood.eyeSmile - eyeSmileC) * lm
+
+          // Speaking: add a slight smile boost (engaged / expressive look)
+          const mfBoost = speaking ? 0.20 : 0
+          core.setParameterValueById('ParamMouthForm', clamp(mouthFormC + mfBoost, -1, 1))
+          core.setParameterValueById('ParamBrowLForm', browFormC)
+          core.setParameterValueById('ParamBrowRForm', browFormC)
+          core.setParameterValueById('Param37', browRaiseC)
+          core.setParameterValueById('ParamEyeLSmile', eyeSmileC)
+          core.setParameterValueById('ParamEyeRSmile', eyeSmileC)
+        }
 
-      for (const id of overrideKeys) {
-        let targetVal = 0
-        let isIdleParam = false
+        // ── Saccade ───────────────────────────────────────────────────────
+        saccadeTimer += elapsed
+        if (saccadeTimer >= nextSaccade) {
+          if (speaking) {
+            // Focus on "listener" — small central range, frequent updates
+            eyeTargetX = (Math.random() * 2 - 1) * 0.25
+            eyeTargetY = (Math.random() * 2 - 1) * 0.15
+            nextSaccade = saccadeTimer + 0.8 + Math.random() * 1.0
+          } else {
+            eyeTargetX = (Math.random() * 2 - 1) * 0.65
+            const r = Math.random()
+            if (r < 0.20) eyeTargetY = 0.5 + Math.random() * 0.35
+            else if (r < 0.35) eyeTargetY = -0.3 - Math.random() * 0.25
+            else eyeTargetY = (Math.random() * 2 - 1) * 0.4
+            nextSaccade = saccadeTimer + 1.5 + Math.random() * 2.5
+          }
+        }
+        const gzSpd = speaking ? 5.0 : 3.5
+        eyeX += (eyeTargetX - eyeX) * elapsed * gzSpd
+        eyeY += (eyeTargetY - eyeY) * elapsed * gzSpd
+        core.setParameterValueById('ParamEyeBallX', clamp(eyeX, -1, 1))
+        core.setParameterValueById('ParamEyeBallY', clamp(eyeY, -1, 1))
+
+        // ── Double-blink scheduler (idle only) ────────────────────────────
+        if (!speaking) {
+          dblBlinkTimer += elapsed
+          if (dblBlinkTimer >= nextDblBlink) {
+            dblBlinkPending = true
+            dblBlinkTimer = 0
+            nextDblBlink = 10 + Math.random() * 12
+          }
+        }
 
-        if (id === 'ParamMouthForm') { isIdleParam = true; targetVal = mouthFormC; }
-        else if (id === 'ParamBrowLForm' || id === 'ParamBrowRForm') { isIdleParam = true; targetVal = browFormC; }
-        else if (id === 'Param37') { isIdleParam = true; targetVal = browRaiseC; }
-        else if (id === 'ParamEyeLSmile' || id === 'ParamEyeRSmile') { isIdleParam = true; targetVal = eyeSmileC; }
-        else if (id === 'ParamEyeLOpen' || id === 'ParamEyeROpen') { isIdleParam = true; targetVal = core.getParameterValueById(id); }
+        // ── Blink ─────────────────────────────────────────────────────────
+        const isSleepy = currentMood === MOODS.sleepy
+        // Speaking: snappy blink (11). Sleepy: slow droopy blink (6). Normal: 9
+        const bspd = speaking ? 11 : (isSleepy ? 6 : 9)
+        blinkTimer += elapsed
 
-        if (targetOv && targetOv[id] !== undefined) {
-          targetVal = targetOv[id]
+        // Don't start a new blink while an expression is holding eye parameters (e.g. wink)
+        if (blinkPhase === 0 && blinkTimer >= nextBlink && !_expressionActive) {
+          blinkPhase = 1; blinkTimer = 0
         }
-
-        if (currentOverrides[id] === undefined) {
-          currentOverrides[id] = isIdleParam ? targetVal : 0;
+        if (blinkPhase === 1) {
+          const v = clamp(1 - blinkTimer * bspd, 0, 1)
+          core.setParameterValueById('ParamEyeLOpen', v)
+          core.setParameterValueById('ParamEyeROpen', v)
+          if (v <= 0) { blinkPhase = 2; blinkTimer = 0 }
+        } else if (blinkPhase === 2) {
+          const v = clamp(blinkTimer * bspd, 0, 1)
+          core.setParameterValueById('ParamEyeLOpen', v)
+          core.setParameterValueById('ParamEyeROpen', v)
+          if (v >= 1) {
+            blinkPhase = 0; blinkTimer = 0
+            if (dblBlinkPending) {
+              nextBlink = 0.06 + Math.random() * 0.08  // blink again almost immediately
+              dblBlinkPending = false
+            } else if (isSleepy) {
+              nextBlink = 1.5 + Math.random() * 2.0    // sleepy: blinks more often
+            } else if (speaking) {
+              nextBlink = 4.0 + Math.random() * 3.0    // speaking: eyes stay open longer
+            } else {
+              nextBlink = 3.0 + Math.random() * 5.0    // normal idle
+            }
+          }
+        } else {
+          // Resting open — sleepy mode: eyes only 72% open (heavy lidded)
+          if (!_expressionActive) {
+            const restOpen = isSleepy ? 0.72 : 1.0
+            core.setParameterValueById('ParamEyeLOpen', restOpen)
+            core.setParameterValueById('ParamEyeROpen', restOpen)
+          }
         }
 
-        currentOverrides[id] += (targetVal - currentOverrides[id]) * lerpSpeed
-        core.setParameterValueById(id, currentOverrides[id])
+        origCoreUpdate()
       }
 
-      if (currentOverrides['Param70'] > 0) {
-        const currentMouth = core.getParameterValueById('ParamMouthOpenY')
-        core.setParameterValueById('ParamMouthOpenY', Math.max(currentMouth, currentOverrides['Param70'] * 0.4))
-      }
+      _loaded = true
     })
-  }).catch((err) => {
-    console.error('[AvatarRenderer] Failed to load Live2D model:', err)
-  })
+    .catch((err) => console.error('[AvatarRenderer] Failed to load Live2D model:', err))
 }
 
 export const AvatarRenderer = forwardRef(function AvatarRenderer(props, ref) {
   const { width = 400, height = 600 } = props
   const containerRef = useRef(null)
 
-  // ── Boot PIXI + load model ────────────────────────────────────────────────
   useEffect(() => {
-    initSharedApp(width, height)
-
-    if (sharedApp && sharedApp.view && containerRef.current) {
-      containerRef.current.appendChild(sharedApp.view)
-    }
-
+    initSingleton(width, height)
+    const container = containerRef.current
+    if (container && _app) container.appendChild(_app.view)
     return () => {
-      if (sharedApp && sharedApp.view && containerRef.current && sharedApp.view.parentNode === containerRef.current) {
-        containerRef.current.removeChild(sharedApp.view)
-      }
+      if (container && _app && _app.view.parentNode === container)
+        container.removeChild(_app.view)
     }
   }, [width, height])
 
-  // ── Imperative API ────────────────────────────────────────────────────────
   useImperativeHandle(ref, () => ({
     setExpression(names, duration) {
-      if (!sharedModel) return
+      if (!_loaded || !_model) return
+      _expressionActive = true
 
-      const merged = {}
+      // Queue the mood that best matches this expression — applied when it expires
       for (const name of names) {
-        const overrides = EXPRESSION_OVERRIDES[name]
-        if (overrides) Object.assign(merged, overrides)
+        const moodKey = EXPRESSION_TO_MOOD[name]
+        if (moodKey) { _pendingMood = MOODS[moodKey]; break }
       }
-      sharedExpressionOverride = Object.keys(merged).length > 0 ? merged : null
 
+      const merged = {}
+      for (const name of names) {
+        const file = EXPRESSION_FILES[name]
+        if (file) _model.expression(file)
+        if (name === 'wink') {
+          const c = _model.internalModel.coreModel
+          c.setParameterValueById('ParamEyeLOpen', 0.0)
+          c.setParameterValueById('ParamBrowLForm', -1.0)
+          c.setParameterValueById('ParamMouthForm', 1.0)
+        }
+        if (name === 'tongue') {
+          _mouthYLocked = true   // prevent lip-sync loop from overriding MouthOpenY
+          const c = _model.internalModel.coreModel
+          // Hu Tao specific: Param70 is TongueOut
+          c.setParameterValueById('Param70', 1.0)
+          c.setParameterValueById('ParamMouthOpenY', 1.0)
+          c.setParameterValueById('ParamMouthForm', -1.0)
+        }
+      }
       setTimeout(() => {
-        sharedExpressionOverride = null
+        _expressionActive = false
+        _mouthYLocked = false
+        if (_model) _model.expression()
       }, duration * 1000)
     },
 
+    /** Switch between idle and speaking animation state */
+    setSpeaking(active) {
+      _state = active ? STATE.SPEAKING : STATE.IDLE
+    },
+
     setParameter(name, value) {
-      sharedModel?.internalModel.coreModel.setParameterValueById(name, value)
+      _model?.internalModel.coreModel.setParameterValueById(name, value)
     },
 
     resetNeutral() {
-      sharedExpressionOverride = null
+      _expressionActive = false
+      _model?.expression()
     },
 
     setMouthOpen(v) {
-      sharedMouthOpen = Math.max(0, Math.min(1, v))
+      _mouthOpen = Math.max(0, Math.min(1, v))
     },
   }), [])
 
diff --git a/dashboard/src/components/AvatarRenderer.test.jsx b/dashboard/src/components/AvatarRenderer.test.jsx
index ea12b4e..f7469f4 100644
--- a/dashboard/src/components/AvatarRenderer.test.jsx
+++ b/dashboard/src/components/AvatarRenderer.test.jsx
@@ -1,7 +1,7 @@
 /**
- * Phase 2 tests — AvatarRenderer component
+ * AvatarRenderer tests — Phase 3
  * All GPU / PIXI / Live2D dependencies are mocked so these run in jsdom
- * without a real GPU or network.
+ * without a real WebGL context or network.
  *
  * Run:  cd dashboard && npm test
  */
@@ -11,11 +11,14 @@ import { render, act } from '@testing-library/react'
 import { createRef } from 'react'
 import { AvatarRenderer } from './AvatarRenderer'
 
-// ── Mock heavy GPU dependencies ────────────────────────────────────────────
+// ── Mocks ──────────────────────────────────────────────────────────────────
 
 const mockSetParameterValueById = vi.fn()
 const mockExpression = vi.fn()
+const mockCoreUpdate = vi.fn()
+
 const mockModel = {
+  height: 600,   // needed for auto-scale calculation
   expression: mockExpression,
   scale: { set: vi.fn() },
   anchor: { set: vi.fn() },
@@ -23,30 +26,31 @@ const mockModel = {
   internalModel: {
     coreModel: {
       setParameterValueById: mockSetParameterValueById,
-      update: vi.fn(),
+      update: mockCoreUpdate,  // needed for core.update.bind() in initSingleton
     },
   },
 }
-const mockStage = { addChild: vi.fn() }
-const mockRenderer = { width: 400, height: 600 }
+
+// A real canvas element so container.appendChild / removeChild work in jsdom
+const mockCanvas = document.createElement('canvas')
+
+const mockApp = {
+  view: mockCanvas,
+  stage: { addChild: vi.fn() },
+  screen: { width: 400, height: 600 },  // used for model positioning
+  renderer: { width: 400, height: 600 },
+  destroy: vi.fn(),
+}
+
 vi.mock('pixi.js', () => ({
-  Application: vi.fn((opts) => {
-    const canvas = document.createElement('canvas')
-    if (opts?.width) canvas.setAttribute('width', opts.width.toString())
-    if (opts?.height) canvas.setAttribute('height', opts.height.toString())
-    return {
-      stage: mockStage,
-      renderer: mockRenderer,
-      screen: mockRenderer,
-      view: canvas,
-      destroy: vi.fn(),
-    }
-  }),
-  Ticker: {},
+  Application: vi.fn(() => mockApp),
+  Ticker: {},   // passed to Live2DModel.registerTicker
 }))
 
+// Must mock the cubism4 sub-path — that's what the component imports
 vi.mock('pixi-live2d-display/cubism4', () => ({
   Live2DModel: {
+    registerTicker: vi.fn(),
     from: vi.fn(() => Promise.resolve(mockModel)),
     registerTicker: vi.fn(),
   },
@@ -54,11 +58,11 @@ vi.mock('pixi-live2d-display/cubism4', () => ({
 
 // ── Helpers ────────────────────────────────────────────────────────────────
 
-/** Mount the component and wait for the async model load to complete. */
+/** Mount the component and wait for the async model load to settle. */
 async function mountAndLoad(props = {}) {
   const ref = createRef()
   const result = render(<AvatarRenderer ref={ref} {...props} />)
-  await act(async () => { }) // flush the Live2DModel.from() promise
+  await act(async () => { })  // flush Live2DModel.from() promise + React effects
   return { ref, ...result }
 }
 
@@ -69,37 +73,111 @@ describe('AvatarRenderer', () => {
     vi.clearAllMocks()
   })
 
-  // ── DOM ──────────────────────────────────────────────────────────────────
+  // ── Rendering ─────────────────────────────────────────────────────────────
 
-  it('renders a canvas element', async () => {
+  it('renders a container div', async () => {
+    const { container } = await mountAndLoad()
+    expect(container.firstChild).toBeTruthy()
+  })
+
+  it('renders a canvas element inside the container', async () => {
     const { container } = await mountAndLoad()
     expect(container.querySelector('canvas')).toBeTruthy()
   })
 
-  it('canvas has correct width and height attributes', async () => {
+  it('wrapper div reflects width and height props', async () => {
     const { container } = await mountAndLoad({ width: 320, height: 480 })
-    const canvas = container.querySelector('canvas')
-    expect(canvas.getAttribute('width')).toBe('320')
-    expect(canvas.getAttribute('height')).toBe('480')
+    const div = container.firstChild
+    expect(div.style.width).toBe('320px')
+    expect(div.style.height).toBe('480px')
   })
 
   // ── Expression overriding states ─────────────────────────────────────────
 
-  it('setExpression updates the ref state for the core.update loop to consume', async () => {
+  it.each([
+    ['smile', 'SmileLock.exp3.json'],
+    ['sad', 'SadLock.exp3.json'],
+    ['angry', 'Angry.exp3.json'],
+    ['ghost', 'Ghost.exp3.json'],
+    ['ghost_nervous', 'GhostChange.exp3.json'],
+    ['shadow', 'Shadow.exp3.json'],
+    ['pupil_shrink', 'PupilShrink.exp3.json'],
+    ['eyeshine_off', 'EyeshineOff.exp3.json'],
+  ])('setExpression maps "%s" → %s', async (tag, file) => {
     const { ref } = await mountAndLoad()
-    // Ticker doesn't run in tests, so we just verify it doesn't throw and accepts names
-    expect(() => ref.current.setExpression(['smile', 'angry', 'ghost'], 2.0)).not.toThrow()
+    ref.current.setExpression([tag], 2.0)
+    expect(mockExpression).toHaveBeenCalledWith(file)
+  })
+
+  it('setExpression applies all tags in the array', async () => {
+    const { ref } = await mountAndLoad()
+    ref.current.setExpression(['smile', 'shadow'], 2.0)
+    expect(mockExpression).toHaveBeenCalledWith('SmileLock.exp3.json')
+    expect(mockExpression).toHaveBeenCalledWith('Shadow.exp3.json')
+  })
+
+  // ── Parameter-based expressions ───────────────────────────────────────────
+
+  it('wink sets correct Cubism4 Core Model parameters', async () => {
+    const { ref } = await mountAndLoad()
+    ref.current.setExpression(['wink'], 1.5)
+    expect(mockSetParameterValueById).toHaveBeenCalledWith('ParamEyeLOpen', 0.0)
+    expect(mockSetParameterValueById).toHaveBeenCalledWith('ParamBrowLForm', -1.0)
+    expect(mockSetParameterValueById).toHaveBeenCalledWith('ParamMouthForm', 1.0)
+  })
+
+  it('tongue sets correct Cubism4 Core Model parameters', async () => {
+    const { ref } = await mountAndLoad()
+    ref.current.setExpression(['tongue'], 1.5)
+    expect(mockSetParameterValueById).toHaveBeenCalledWith('ParamMouthOpenY', 1.0)
+    expect(mockSetParameterValueById).toHaveBeenCalledWith('ParamMouthForm', -1.0)
   })
 
   // ── Auto-reset ────────────────────────────────────────────────────────────
 
-  it('setExpression schedules auto-reset after duration ms', async () => {
+  it('setExpression resets to neutral after the given duration', async () => {
     vi.useFakeTimers()
     const { ref } = await mountAndLoad()
-    expect(() => {
-      ref.current.setExpression(['smile'], 2.0)
-      vi.advanceTimersByTime(2000)
-    }).not.toThrow()
+    ref.current.setExpression(['smile'], 2.0)
+    mockExpression.mockClear()
+    vi.advanceTimersByTime(2000)
+    expect(mockExpression).toHaveBeenCalledWith()  // no-arg call = reset to default
+    vi.useRealTimers()
+  })
+
+  it('auto-reset does not fire before the duration elapses', async () => {
+    vi.useFakeTimers()
+    const { ref } = await mountAndLoad()
+    ref.current.setExpression(['angry'], 1.5)
+    mockExpression.mockClear()
+    vi.advanceTimersByTime(1499)
+    expect(mockExpression).not.toHaveBeenCalled()
+    vi.advanceTimersByTime(1)
+    expect(mockExpression).toHaveBeenCalledWith()
+    vi.useRealTimers()
+  })
+
+  // ── Mood memory ───────────────────────────────────────────────────────────
+  // The full mood-rendering loop requires a live PIXI ticker (unavailable in jsdom).
+  // These tests verify the pending-mood pipeline is wired without crashing.
+
+  it('setExpression with a mood-mapped name does not throw', async () => {
+    const { ref } = await mountAndLoad()
+    // Each of these has an EXPRESSION_TO_MOOD entry and should queue a pending mood
+    for (const tag of ['smile', 'sad', 'angry', 'ghost', 'ghost_nervous',
+      'shadow', 'pupil_shrink', 'eyeshine_off']) {
+      expect(() => ref.current.setExpression([tag], 1.0)).not.toThrow()
+    }
+  })
+
+  it('mood memory: expression → expiry → state transition completes cleanly', async () => {
+    vi.useFakeTimers()
+    const { ref } = await mountAndLoad()
+    ref.current.setExpression(['smile'], 2.0)   // queues _pendingMood = MOODS.happy
+    vi.advanceTimersByTime(2000)                 // triggers auto-reset; _pendingMood consumed on next frame
+    // After expiry the avatar should accept further API calls without errors
+    expect(() => ref.current.setSpeaking(false)).not.toThrow()
+    expect(() => ref.current.resetNeutral()).not.toThrow()
     vi.useRealTimers()
   })
 
@@ -110,19 +188,56 @@ describe('AvatarRenderer', () => {
     expect(() => ref.current.resetNeutral()).not.toThrow()
   })
 
-  // ── setParameter ─────────────────────────────────────────────────────────
+  // ── setParameter ──────────────────────────────────────────────────────────
 
-  it('setParameter forwards name and value to coreModel', async () => {
+  it('setParameter forwards the id and value to coreModel', async () => {
     const { ref } = await mountAndLoad()
     ref.current.setParameter('ParamMouthOpenY', 0.8)
     expect(mockSetParameterValueById).toHaveBeenCalledWith('ParamMouthOpenY', 0.8)
   })
 
+  // ── setSpeaking ───────────────────────────────────────────────────────────
+
+  it('setSpeaking(true) switches to speaking state without throwing', async () => {
+    const { ref } = await mountAndLoad()
+    expect(() => ref.current.setSpeaking(true)).not.toThrow()
+  })
+
+  it('setSpeaking(false) switches to idle state without throwing', async () => {
+    const { ref } = await mountAndLoad()
+    expect(() => ref.current.setSpeaking(false)).not.toThrow()
+  })
+
+  it('setSpeaking can toggle states repeatedly without side effects', async () => {
+    const { ref } = await mountAndLoad()
+    ref.current.setSpeaking(true)
+    ref.current.setSpeaking(false)
+    ref.current.setSpeaking(true)
+    // State changes should not trigger expressions
+    expect(mockExpression).not.toHaveBeenCalled()
+  })
+
+  // ── setMouthOpen ──────────────────────────────────────────────────────────
+
+  it('setMouthOpen accepts values within [0, 1]', async () => {
+    const { ref } = await mountAndLoad()
+    expect(() => ref.current.setMouthOpen(0)).not.toThrow()
+    expect(() => ref.current.setMouthOpen(0.5)).not.toThrow()
+    expect(() => ref.current.setMouthOpen(1)).not.toThrow()
+  })
+
+  it('setMouthOpen silently clamps out-of-range values', async () => {
+    const { ref } = await mountAndLoad()
+    expect(() => ref.current.setMouthOpen(-1.0)).not.toThrow()
+    expect(() => ref.current.setMouthOpen(2.5)).not.toThrow()
+  })
+
   // ── Guard rails ───────────────────────────────────────────────────────────
 
-  it('unknown expression name is silently ignored (no throw)', async () => {
+  it('unknown expression tag is silently ignored', async () => {
     const { ref } = await mountAndLoad()
     expect(() => ref.current.setExpression(['nonexistent_tag'], 1.0)).not.toThrow()
+    expect(mockExpression).not.toHaveBeenCalled()
   })
 
   it('empty expression list does not throw', async () => {
diff --git a/dashboard/src/components/CallOverlay.jsx b/dashboard/src/components/CallOverlay.jsx
index 9fb821b..0f99fa5 100644
--- a/dashboard/src/components/CallOverlay.jsx
+++ b/dashboard/src/components/CallOverlay.jsx
@@ -1,19 +1,9 @@
 import { useState, useEffect, useRef, useCallback } from 'react'
 import { AvatarRenderer } from './AvatarRenderer'
 
-function getOrCreateIdentity() {
-    const KEY = 'aura_user_identity'
-    let id = localStorage.getItem(KEY)
+import { getOrCreateIdentity } from '../lib/user'
 
-    if (!id) {
-        id = `user-${crypto.randomUUID().slice(0, 8)}`
-        localStorage.setItem(KEY, id)
-    }
-
-    return id
-}
-
-export default function CallOverlay({ onClose }) {
+export default function CallOverlay({ onClose, conversationId }) {
     const [status, setStatus] = useState('connecting')
     const [elapsed, setElapsed] = useState(0)
     const roomRef = useRef(null)
@@ -22,6 +12,7 @@ export default function CallOverlay({ onClose }) {
     const audioCtxRef = useRef(null)
     const analyserRef = useRef(null)
     const lipRafRef = useRef(null)
+    const speakTimeoutRef = useRef(null)
 
     // ─── Connect to LiveKit ──────────────────────
     useEffect(() => {
@@ -33,19 +24,19 @@ export default function CallOverlay({ onClose }) {
 
         const connect = async () => {
             try {
-                // Dynamically import to avoid bundling when not needed
                 const { Room, RoomEvent, Track } = await import('livekit-client')
-
                 const identity = getOrCreateIdentity()
 
                 // Fetch token from token server
-                const res = await fetch(`http://${window.location.hostname}:8082/getToken?identity=${encodeURIComponent(identity)}`)
+                let url = `http://${window.location.hostname}:8082/getToken?room=aura-room&identity=${encodeURIComponent(identity)}`
+                if (conversationId) url += `&conversation_id=${encodeURIComponent(conversationId)}`
+
+                const res = await fetch(url)
                 if (!res.ok) throw new Error(`Token server error: ${res.status}`)
-                const { token, url } = await res.json()
+                const { token, url: lkUrl } = await res.json()
 
                 if (cancelled) return
 
-                // Connect to room
                 const room = new Room()
                 roomRef.current = room
 
@@ -58,22 +49,30 @@ export default function CallOverlay({ onClose }) {
                         const analyser = ctx.createAnalyser()
                         analyser.fftSize = 2048
                         analyser.smoothingTimeConstant = 0.8
-                        const src = ctx.createMediaStreamSource(
-                            new MediaStream([track.mediaStreamTrack])
-                        )
+                        const src = ctx.createMediaStreamSource(new MediaStream([track.mediaStreamTrack]))
                         src.connect(analyser)
                         analyserRef.current = analyser
 
                         const buf = new Float32Array(analyser.fftSize)
                         const tick = () => {
+                            if (cancelled) return
                             lipRafRef.current = requestAnimationFrame(tick)
                             analyser.getFloatTimeDomainData(buf)
                             let sum = 0
                             for (let i = 0; i < buf.length; i++) sum += buf[i] * buf[i]
                             const rms = Math.sqrt(sum / buf.length)
-                            avatarRef.current?.setMouthOpen(
-                                rms > 0.008 ? Math.min(0.55, rms * 10) : 0
-                            )
+                            const active = rms > 0.008
+                            avatarRef.current?.setMouthOpen(active ? Math.min(0.55, rms * 10) : 0)
+
+                            if (active) {
+                                if (speakTimeoutRef.current) { clearTimeout(speakTimeoutRef.current); speakTimeoutRef.current = null }
+                                avatarRef.current?.setSpeaking(true)
+                            } else if (!speakTimeoutRef.current) {
+                                speakTimeoutRef.current = setTimeout(() => {
+                                    avatarRef.current?.setSpeaking(false)
+                                    speakTimeoutRef.current = null
+                                }, 600)
+                            }
                         }
                         tick()
                     }
@@ -83,19 +82,16 @@ export default function CallOverlay({ onClose }) {
                     track.detach().forEach((el) => el.remove())
                 })
 
-                // ── Expression events from Python avatar_bridge.py ──────────
                 room.on(RoomEvent.DataReceived, (payload) => {
                     try {
                         const msg = JSON.parse(new TextDecoder().decode(payload))
                         if (msg.type === 'expression') {
                             avatarRef.current?.setExpression(msg.expressions, msg.duration)
                         }
-                    } catch {
-                        // malformed payload — silently ignore
-                    }
+                    } catch { }
                 })
 
-                await room.connect(url, token)
+                await room.connect(lkUrl, token)
                 await room.localParticipant.setMicrophoneEnabled(true)
 
                 if (!cancelled) {
@@ -117,11 +113,12 @@ export default function CallOverlay({ onClose }) {
             window.removeEventListener('beforeunload', handleUnload)
             cleanup()
         }
-    }, [])
+    }, [conversationId])
 
     const cleanup = useCallback(() => {
         if (timerRef.current) clearInterval(timerRef.current)
         if (lipRafRef.current) cancelAnimationFrame(lipRafRef.current)
+        if (speakTimeoutRef.current) { clearTimeout(speakTimeoutRef.current); speakTimeoutRef.current = null }
         if (audioCtxRef.current) { audioCtxRef.current.close(); audioCtxRef.current = null }
         if (roomRef.current) {
             roomRef.current.disconnect()
@@ -141,48 +138,53 @@ export default function CallOverlay({ onClose }) {
     const vh = window.innerHeight
 
     return (
-        // Full-screen container — avatar fills the whole background,
-        // controls float as an overlay on the right side (same as AIRI).
-        <div className="fixed inset-0 z-50 bg-gradient-to-br from-slate-900 via-slate-800 to-slate-900">
-
-            {/* ── Live2D Avatar — full-screen canvas ── */}
-            <AvatarRenderer ref={avatarRef} width={vw} height={vh} />
-
-            {/* ── Controls — overlaid panel, right side ── */}
-            <div className="absolute right-10 top-1/2 -translate-y-1/2 flex flex-col items-center gap-6
-                            bg-slate-900/60 backdrop-blur-sm rounded-2xl px-8 py-6 shadow-xl">
-                <h2 className="text-white text-3xl font-bold">AURA</h2>
-
-                <p className="text-primary/80 text-sm font-medium">
-                    {status === 'connecting' && 'Connecting...'}
-                    {status === 'connected' && formatTime(elapsed)}
-                    {status === 'error' && 'Connection failed'}
-                </p>
-
-                {/* Waveform */}
-                {status === 'connected' && (
-                    <div className="flex gap-1">
-                        {[0, 1, 2, 3, 4].map((i) => (
-                            <div
-                                key={i}
-                                className="w-1 bg-primary rounded-full"
-                                style={{
-                                    height: `${12 + Math.random() * 20}px`,
-                                    animation: `pulse ${0.4 + i * 0.1}s ease-in-out infinite alternate`,
-                                }}
-                            />
-                        ))}
+        <div className="fixed inset-0 z-50 bg-white/95 backdrop-blur-xl animate-in fade-in duration-500">
+            {/* Background Branding */}
+            <div className="absolute inset-0 flex items-center justify-center pointer-events-none opacity-[0.03]">
+                <h1 className="text-[20vw] font-black tracking-tighter">PROJECT AURA</h1>
+            </div>
+
+            {/* ── Live2D Avatar — centered ── */}
+            <div className="absolute inset-0 flex items-center justify-center">
+                <AvatarRenderer ref={avatarRef} width={window.innerWidth} height={window.innerHeight} />
+            </div>
+
+            {/* ── Controls — bottom center ── */}
+            <div className="absolute bottom-12 left-1/2 -translate-x-1/2 flex flex-col items-center gap-8 w-full max-w-md px-6">
+                <div className="text-center">
+                    <h2 className="text-slate-900 text-3xl font-black tracking-tight mb-1 uppercase">Project AURA</h2>
+                    <p className="text-primary font-black tracking-[0.3em] text-[10px] uppercase">
+                        {status === 'connecting' && 'Establishing Connection...'}
+                        {status === 'connected' && `Live Interaction — ${formatTime(elapsed)}`}
+                        {status === 'error' && 'Neural Link Failed'}
+                    </p>
+                </div>
+
+                <div className="flex items-center gap-6">
+                    {/* Visualizer */}
+                    {status === 'connected' && (
+                        <div className="flex items-end gap-1.5 h-12">
+                            {[...Array(12)].map((_, i) => (
+                                <div key={i} className="w-1.5 bg-primary/20 rounded-full animate-bounce"
+                                    style={{ height: `${20 + Math.random() * 80}%`, animationDuration: `${0.6 + Math.random()}s` }} />
+                            ))}
+                        </div>
+                    )}
+
+                    {/* Hangup */}
+                    <button
+                        type="button"
+                        onClick={handleHangup}
+                        className="w-20 h-20 rounded-full bg-slate-900 hover:bg-red-600 flex items-center justify-center text-white shadow-2xl transition-all duration-300 hover:scale-110 active:scale-95 group cursor-pointer"
+                    >
+                        <span className="material-icons-round text-4xl group-hover:rotate-90 transition-transform">close</span>
+                    </button>
+
+                    {/* Placeholder for future mic toggle/settings */}
+                    <div className="w-12 h-12 rounded-full border border-slate-200 flex items-center justify-center text-slate-400 opacity-50">
+                        <span className="material-icons-round">mic</span>
                     </div>
-                )}
-
-                {/* Hangup */}
-                <button
-                    type="button"
-                    onClick={handleHangup}
-                    className="w-16 h-16 rounded-full bg-red-500 hover:bg-red-600 flex items-center justify-center text-white shadow-lg shadow-red-500/30 transition-all cursor-pointer"
-                >
-                    <span className="material-icons-round text-3xl">call_end</span>
-                </button>
+                </div>
             </div>
         </div>
     )
diff --git a/dashboard/src/components/ChatFeed.jsx b/dashboard/src/components/ChatFeed.jsx
index 744a263..6d11ad4 100644
--- a/dashboard/src/components/ChatFeed.jsx
+++ b/dashboard/src/components/ChatFeed.jsx
@@ -1,49 +1,64 @@
 export default function ChatFeed({ messages = [] }) {
     if (messages.length === 0) {
         return (
-            <div className="flex flex-col items-center justify-center h-full text-center">
-                <div className="w-20 h-20 rounded-full aura-gradient flex items-center justify-center text-white mb-6 shadow-lg shadow-primary/20">
-                    <span className="material-icons-round text-4xl">wb_sunny</span>
+            <div className="flex flex-col items-center justify-center min-h-[60vh] text-center px-4 animate-in fade-in zoom-in duration-700">
+                <div className="w-24 h-24 rounded-full aura-gradient flex items-center justify-center text-white mb-8 shadow-2xl shadow-primary/40 relative">
+                    <span className="material-icons-round text-5xl">auto_awesome</span>
+                    <div className="absolute inset-0 rounded-full aura-gradient animate-ping opacity-20" />
                 </div>
-                <h2 className="text-2xl font-bold mb-2">Hello! I'm AURA</h2>
-                <p className="text-slate-400 max-w-sm">
-                    Your personal AI companion. Ask me anything, or start a voice call!
+                <h2 className="text-3xl font-black mb-4 tracking-tight text-slate-900">Project AURA</h2>
+                <p className="text-slate-500 max-w-sm font-medium leading-relaxed">
+                    Advanced Universal Responsive Avatar. <br />
+                    Ready for your next inquiry.
                 </p>
             </div>
         )
     }
 
     return (
-        <div className="space-y-6 max-w-3xl mx-auto">
+        <div className="space-y-8 pb-20">
             {messages.map((msg) => (
-                <div key={msg.id} className={`flex gap-3 ${msg.role === 'user' ? 'flex-row-reverse' : ''}`}>
-                    {/* Avatar */}
-                    <div className={`w-8 h-8 rounded-full flex-shrink-0 flex items-center justify-center text-white text-xs font-bold ${msg.role === 'user'
-                        ? 'bg-slate-700'
-                        : 'aura-gradient shadow-sm shadow-primary/20'
+                <div key={msg.id} className={`flex gap-4 group ${msg.role === 'user' ? 'flex-row-reverse' : ''} animate-in slide-in-from-bottom-2 duration-300`}>
+                    {/* Avatar Icon */}
+                    <div className={`w-9 h-9 rounded-xl flex-shrink-0 flex items-center justify-center text-[11px] font-black transition-transform group-hover:scale-110 ${msg.role === 'user'
+                        ? 'bg-slate-100 text-slate-500 border border-slate-200'
+                        : 'aura-gradient text-white shadow-lg shadow-primary/20'
                         }`}>
-                        {msg.role === 'user' ? 'U' : '☀'}
+                        {msg.role === 'user' ? 'ME' : 'AURA'}
                     </div>
 
-                    {/* Bubble Container */}
-                    <div className="flex flex-col gap-2 max-w-[70%]">
-                        {/* Tool Usage Indicator */}
+                    {/* Content Column */}
+                    <div className={`flex flex-col gap-3 max-w-[85%] lg:max-w-[70%] ${msg.role === 'user' ? 'items-end' : ''}`}>
+
+                        {/* Tool Execution Details */}
                         {msg.tools_used && msg.tools_used.map((tool, idx) => (
-                            <div key={`tool-${idx}`} className="px-3 py-2 bg-indigo-50/80 backdrop-blur-sm border border-indigo-100 rounded-xl text-xs text-indigo-700 flex items-center gap-2 shadow-sm">
-                                <span className="material-icons-round text-[16px] text-indigo-500">travel_explore</span>
-                                <div>
-                                    <span className="font-medium mr-1 font-mono">{tool.name}</span>
-                                    <span className="opacity-80 truncate max-w-[200px] inline-block align-bottom">{JSON.stringify(tool.args.query || tool.args)}</span>
+                            <div key={`tool-${idx}`} className="px-4 py-2.5 bg-slate-50 border border-slate-100 rounded-2xl text-[11px] text-primary flex items-center gap-3 shadow-sm animate-in fade-in slide-in-from-left-2">
+                                <span className="material-icons-round text-sm animate-spin-slow">api</span>
+                                <div className="font-mono">
+                                    <span className="font-black tracking-widest">{tool.name}</span>
+                                    <span className="mx-2 text-slate-300">—</span>
+                                    <span className="text-slate-500 truncate max-w-[200px] inline-block align-bottom">
+                                        {typeof tool.args === 'string' ? tool.args : (tool.args.query || JSON.stringify(tool.args))}
+                                    </span>
                                 </div>
                             </div>
                         ))}
 
-                        <div className={`px-4 py-3 rounded-2xl text-sm leading-relaxed ${msg.role === 'user'
-                            ? 'bg-slate-800 text-white rounded-tr-md'
-                            : 'bg-white border border-slate-100 text-slate-700 rounded-tl-md shadow-sm'
+                        {/* Speech Bubble */}
+                        <div className={`px-5 py-4 rounded-3xl text-[15px] leading-relaxed tracking-tight shadow-sm ${msg.role === 'user'
+                            ? 'bg-primary text-white rounded-tr-sm'
+                            : 'bg-white text-slate-800 rounded-tl-sm border border-slate-100'
                             }`}>
                             {msg.content}
                         </div>
+
+                        {/* Emotion Tag */}
+                        {msg.role === 'aura' && msg.emotion && (
+                            <div className="flex items-center gap-1.5 px-3 py-1 bg-slate-100 rounded-full w-fit">
+                                <span className="w-1.5 h-1.5 rounded-full bg-primary" />
+                                <span className="text-[10px] font-black uppercase tracking-widest text-slate-500">{msg.emotion}</span>
+                            </div>
+                        )}
                     </div>
                 </div>
             ))}
diff --git a/dashboard/src/components/ChatHeader.jsx b/dashboard/src/components/ChatHeader.jsx
index ceaa671..32333ad 100644
--- a/dashboard/src/components/ChatHeader.jsx
+++ b/dashboard/src/components/ChatHeader.jsx
@@ -1,36 +1,42 @@
-import { useNavigate } from 'react-router-dom'
-
-export default function ChatHeader({ onCallStart }) {
-    const navigate = useNavigate()
-
+export default function ChatHeader({ onCallStart, isCallActive, onTuningOpen }) {
     return (
-        <header className="flex items-center justify-between px-8 py-5 border-b border-slate-100 bg-white/60 backdrop-blur">
+        <header className="flex items-center justify-between px-8 py-5 border-b border-slate-100 bg-white/80 backdrop-blur-md">
             <div className="flex items-center gap-4">
-                <div className="w-10 h-10 rounded-full aura-gradient flex items-center justify-center text-white shadow-md shadow-primary/20">
-                    <span className="material-icons-round text-lg">wb_sunny</span>
+                <div className="w-10 h-10 rounded-full aura-gradient flex items-center justify-center text-white shadow-lg shadow-primary/20">
+                    <span className="material-icons-round text-lg">auto_awesome</span>
                 </div>
                 <div>
-                    <h2 className="font-bold text-lg tracking-tight">AURA</h2>
-                    <p className="text-xs text-slate-400">Active • High Precision Mode</p>
+                    <h2 className="font-bold text-lg tracking-tight text-slate-800">Project AURA</h2>
+                    <p className="text-[10px] uppercase tracking-widest text-primary font-bold">
+                        {isCallActive ? 'Interactive Mode' : 'Ready to Assist'}
+                    </p>
                 </div>
             </div>
 
-            <div className="flex items-center gap-3">
+            <div className="flex items-center gap-4">
+                {/* Voice Interaction Toggle */}
                 <button
                     type="button"
                     onClick={onCallStart}
-                    className="flex items-center gap-2 bg-primary hover:bg-primary/90 text-white px-5 py-2 rounded-full font-semibold shadow-md shadow-primary/20 transition-all cursor-pointer"
+                    className={`flex items-center gap-2 px-6 py-2.5 rounded-full font-bold transition-all duration-300 cursor-pointer
+                                ${isCallActive
+                            ? 'bg-primary text-white shadow-lg shadow-primary/40'
+                            : 'bg-slate-50 hover:bg-slate-100 text-slate-600 border border-slate-200'}`}
                 >
-                    <span className="material-icons-round text-lg">call</span>
-                    Call Mode
+                    <span className={`material-icons-round text-lg ${isCallActive ? 'animate-pulse' : ''}`}>
+                        {isCallActive ? 'record_voice_over' : 'forum'}
+                    </span>
+                    {isCallActive ? 'Interactive Session Active' : 'Interact with Project AURA'}
                 </button>
+
+                {/* Personality Tuning Toggle */}
                 <button
                     type="button"
-                    onClick={() => navigate('/admin')}
-                    className="p-2 hover:bg-slate-100 rounded-lg transition-colors cursor-pointer"
-                    title="Admin Dashboard"
+                    onClick={onTuningOpen}
+                    className="w-11 h-11 flex items-center justify-center bg-slate-50 hover:bg-slate-100 text-slate-400 hover:text-primary rounded-xl border border-slate-200 transition-all cursor-pointer group"
+                    title="System Dashboard"
                 >
-                    <span className="material-icons-round text-slate-400">dashboard</span>
+                    <span className="material-icons-round text-xl group-hover:rotate-45 transition-transform">dashboard</span>
                 </button>
             </div>
         </header>
diff --git a/dashboard/src/components/ChatInput.jsx b/dashboard/src/components/ChatInput.jsx
index 4ae0a63..8e74c18 100644
--- a/dashboard/src/components/ChatInput.jsx
+++ b/dashboard/src/components/ChatInput.jsx
@@ -28,30 +28,28 @@ export default function ChatInput({ onSend, disabled }) {
     }
 
     return (
-        <div className="px-8 pb-6 pt-2">
-            <div className="max-w-3xl mx-auto flex items-end gap-3 p-3 bg-white border border-slate-200 rounded-2xl shadow-lg shadow-black/[0.03]">
+        <div className="flex items-end gap-3 p-4 bg-white rounded-3xl border border-slate-200 shadow-xl relative group transition-all focus-within:border-primary/50 focus-within:ring-4 focus-within:ring-primary/5">
+            <textarea
+                ref={textareaRef}
+                value={text}
+                onChange={(e) => setText(e.target.value)}
+                onKeyDown={handleKeyDown}
+                placeholder="Message AURA..."
+                disabled={disabled}
+                rows={1}
+                className="flex-1 bg-transparent text-[15px] resize-none outline-none py-2 px-3 placeholder-slate-400 text-slate-800 disabled:opacity-50"
+            />
 
-                <textarea
-                    ref={textareaRef}
-                    value={text}
-                    onChange={(e) => setText(e.target.value)}
-                    onKeyDown={handleKeyDown}
-                    placeholder="Type a message..."
-                    disabled={disabled}
-                    rows={1}
-                    className="flex-1 bg-transparent text-sm resize-none outline-none py-2 px-2 placeholder-slate-400 disabled:opacity-50"
-                />
-                <button
-                    type="button"
-                    onClick={handleSubmit}
-                    disabled={!text.trim() || disabled}
-                    className="flex items-center justify-center w-10 h-10 rounded-xl aura-gradient text-white transition-all disabled:opacity-30 cursor-pointer disabled:cursor-not-allowed"
-                >
-                    <span className="material-icons-round text-lg">
-                        {disabled ? 'hourglass_top' : 'arrow_upward'}
-                    </span>
-                </button>
-            </div>
+            <button
+                type="button"
+                onClick={handleSubmit}
+                disabled={!text.trim() || disabled}
+                className="flex items-center justify-center w-11 h-11 rounded-2xl aura-gradient text-white transition-all disabled:opacity-20 shadow-lg shadow-primary/20 cursor-pointer disabled:cursor-not-allowed hover:scale-105 active:scale-95"
+            >
+                <span className="material-icons-round">
+                    {disabled ? 'hourglass_top' : 'send'}
+                </span>
+            </button>
         </div>
     )
 }
diff --git a/dashboard/src/components/KnowledgeBase.jsx b/dashboard/src/components/KnowledgeBase.jsx
index 192780e..3d45ec6 100644
--- a/dashboard/src/components/KnowledgeBase.jsx
+++ b/dashboard/src/components/KnowledgeBase.jsx
@@ -85,51 +85,74 @@ export default function KnowledgeBase() {
     }
 
     return (
-        <div className="bg-white p-8 rounded-xl border border-slate-200 shadow-sm flex flex-col">
-            <div className="flex justify-between items-center mb-8">
-                <h3 className="text-xl font-bold flex items-center gap-2">
-                    <span className="material-icons-round text-primary">auto_stories</span>
-                    Knowledge Base
-                </h3>
-                <span className="text-sm font-bold text-slate-400">{files.length} files</span>
+        <div className="flex flex-col h-full">
+            <div className="flex justify-between items-end mb-8 px-2">
+                <div className="space-y-1">
+                    <h3 className="text-xl font-black text-white tracking-widest uppercase flex items-center gap-3">
+                        <span className="w-2 h-2 rounded-full bg-primary shadow-[0_0_8px_#ff7e33]" />
+                        Cognitive Core
+                    </h3>
+                    <p className="text-[10px] text-white/20 font-black uppercase tracking-[0.2em]">Contextual Data Vectors</p>
+                </div>
+                <div className="px-3 py-1 bg-white/5 rounded-full border border-white/5 shadow-inner">
+                    <span className="text-[10px] font-black text-primary/80 uppercase tracking-tighter">{files.length} ASSETS MAPPED</span>
+                </div>
             </div>
 
             {/* File list */}
-            <div className="flex-1 space-y-4 mb-8 overflow-y-auto max-h-64 custom-scrollbar pr-2">
+            <div className="flex-1 space-y-3 mb-10 overflow-y-auto max-h-72 custom-scrollbar-dark pr-3">
                 {files.map((f) => (
-                    <div key={f.id} className="flex items-center justify-between p-3 bg-bg-light rounded-lg group border border-transparent hover:border-primary/30 transition-all">
+                    <div key={f.id} className="flex items-center justify-between p-4 bg-white/[0.03] rounded-2xl group border border-white/5 hover:border-primary/20 hover:bg-white/5 transition-all shadow-lg">
                         <div className="flex items-center gap-4">
-                            <div className="w-10 h-10 bg-white rounded flex items-center justify-center text-primary border border-slate-200">
-                                <span className="material-icons-round">{mimeIcon(f.mime_type)}</span>
+                            <div className="w-12 h-12 bg-black/40 rounded-xl flex items-center justify-center text-primary border border-white/5 shadow-inner group-hover:scale-110 transition-transform">
+                                <span className="material-icons-round text-xl">{mimeIcon(f.mime_type)}</span>
                             </div>
                             <div>
-                                <h4 className="text-sm font-bold">{f.filename}</h4>
-                                <p className="text-[10px] text-slate-400 uppercase">
-                                    {new Date(f.created_at).toLocaleDateString()} • {formatSize(f.size_bytes)}
-                                </p>
+                                <h4 className="text-[13px] font-bold text-white/90 group-hover:text-primary transition-colors">{f.filename}</h4>
+                                <div className="flex items-center gap-2 mt-1">
+                                    <span className="text-[9px] text-white/20 font-black uppercase tracking-widest">{formatSize(f.size_bytes)}</span>
+                                    <span className="w-1 h-1 rounded-full bg-white/10" />
+                                    <span className="text-[9px] text-white/20 font-black uppercase tracking-widest">{new Date(f.created_at).toLocaleDateString()}</span>
+                                </div>
                             </div>
                         </div>
                         <button
                             type="button"
                             onClick={() => handleDelete(f.id)}
-                            className="text-slate-400 hover:text-red-500 transition-colors cursor-pointer"
+                            className="w-8 h-8 flex items-center justify-center rounded-lg text-white/20 hover:text-red-400 hover:bg-red-500/10 transition-all cursor-pointer"
                         >
-                            <span className="material-icons-round text-lg">delete_outline</span>
+                            <span className="material-icons-round text-lg">delete_sweep</span>
                         </button>
                     </div>
                 ))}
+
+                {files.length === 0 && (
+                    <div className="py-12 flex flex-col items-center justify-center opacity-10 grayscale">
+                        <span className="material-icons-round text-6xl mb-4">folder_off</span>
+                        <p className="text-xs font-black uppercase tracking-[0.3em]">No Data Mapped</p>
+                    </div>
+                )}
             </div>
 
             {/* Upload zone */}
-            <label className="border-2 border-dashed border-slate-200 rounded-xl p-8 flex flex-col items-center justify-center text-center group hover:border-primary transition-colors cursor-pointer bg-bg-light/50">
+            <label className="relative overflow-hidden border-2 border-dashed border-white/5 rounded-3xl p-10 flex flex-col items-center justify-center text-center group hover:border-primary/30 hover:bg-primary/[0.02] transition-all cursor-pointer bg-black/20 shadow-inner">
+                <div className="absolute inset-0 aura-gradient opacity-0 group-hover:opacity-[0.03] transition-opacity pointer-events-none" />
                 <input type="file" onChange={handleUpload} className="hidden" accept=".pdf,.txt,.json,.csv,.zip,.pptx" />
-                <div className="w-12 h-12 rounded-full bg-primary/10 flex items-center justify-center mb-3 group-hover:scale-110 transition-transform">
-                    <span className="material-icons-round text-primary">
-                        {uploading ? 'hourglass_top' : 'cloud_upload'}
+
+                <div className="w-16 h-16 rounded-2xl bg-white/5 flex items-center justify-center mb-5 group-hover:scale-110 group-hover:bg-primary/10 transition-all shadow-xl border border-white/5 group-hover:border-primary/20">
+                    <span className={`material-icons-round text-3xl transition-all ${uploading ? 'text-primary animate-spin' : 'text-white/40 group-hover:text-primary'}`}>
+                        {uploading ? 'hourglass_empty' : 'auto_mode'}
                     </span>
                 </div>
-                <p className="font-bold text-sm">{uploading ? 'Uploading...' : 'Upload New Knowledge'}</p>
-                <p className="text-xs text-slate-400 mt-1">PDF, TXT, PPTX, JSON, or CSV up to 50MB</p>
+
+                <div className="space-y-1">
+                    <p className="font-black text-xs text-white/80 group-hover:text-white uppercase tracking-[0.1em] transition-colors">
+                        {uploading ? 'INGESTING DATA...' : 'INITIATE NEURAL INGESTION'}
+                    </p>
+                    <p className="text-[10px] text-white/20 font-medium tracking-tight">
+                        Drop PDF, TXT, JSON, or CSV (UP TO 50MB)
+                    </p>
+                </div>
             </label>
         </div>
     )
diff --git a/dashboard/src/components/PersonalityTuner.jsx b/dashboard/src/components/PersonalityTuner.jsx
index 3c92b70..d6af5ae 100644
--- a/dashboard/src/components/PersonalityTuner.jsx
+++ b/dashboard/src/components/PersonalityTuner.jsx
@@ -1,133 +1,158 @@
 import { useState, useEffect } from 'react'
 
-const PERSONALITY_SLIDERS = [
+const SLIDERS = [
     { key: 'empathy', label: 'Empathy' },
     { key: 'humor', label: 'Humor' },
     { key: 'formality', label: 'Formality' },
 ]
 
-const MODEL_OPTIONS = [
-    { value: 'deepseek/deepseek-v3.2', label: 'DeepSeek V3.2' },
-    { value: 'deepseek/deepseek-r1', label: 'DeepSeek R1' },
-    { value: 'openai/gpt-4o-mini', label: 'GPT-4o Mini' },
-    { value: 'openai/gpt-4o', label: 'GPT-4o' },
-    { value: 'anthropic/claude-3.5-haiku', label: 'Claude 3.5 Haiku' },
-    { value: 'anthropic/claude-sonnet-4-5', label: 'Claude Sonnet 4.5' },
-    { value: 'google/gemini-flash-1.5', label: 'Gemini Flash 1.5' },
+const PROVIDERS = [
+    { value: 'openrouter', label: 'OpenRouter', hint: 'Routes to any model (DeepSeek, GPT, Mistral…)' },
+    { value: 'openai', label: 'OpenAI', hint: 'Direct GPT-4o / o1 access' },
+    { value: 'anthropic', label: 'Anthropic', hint: 'Claude 3.5 / Claude 4' },
+    { value: 'groq', label: 'Groq', hint: 'Ultra-fast Llama / Mixtral inference' },
+    { value: 'ollama', label: 'Ollama (local)', hint: 'Local models via Ollama' },
 ]
 
-export default function PersonalityTuner({ settings, onChange }) {
-    const [draft, setDraft] = useState(null)
-
-    useEffect(() => {
-        if (settings && !draft) {
-            setDraft(settings)
-        }
-    }, [settings])
+const MODEL_SUGGESTIONS = {
+    openrouter: ['deepseek/deepseek-v3.2', 'openai/gpt-4o', 'anthropic/claude-sonnet-4-5', 'mistralai/mistral-nemo'],
+    openai: ['gpt-4o', 'gpt-4o-mini', 'o1-mini'],
+    anthropic: ['claude-opus-4-5', 'claude-sonnet-4-5', 'claude-haiku-4-5-20251001'],
+    groq: ['llama-3.3-70b-versatile', 'llama-3.1-8b-instant', 'mixtral-8x7b-32768'],
+    ollama: ['llama3.2', 'mistral', 'gemma2', 'qwen2.5'],
+}
 
-    if (!draft) return <TunerSkeleton />
+export default function PersonalityTuner({ settings, onChange }) {
+    if (!settings) return <TunerSkeleton />
 
-    const patch = (key, value) => {
-        const updated = { ...draft, [key]: value }
-        setDraft(updated)
-        onChange(updated)
-    }
+    const provider = settings.provider || 'openrouter'
+    const suggestions = MODEL_SUGGESTIONS[provider] || []
+    const providerInfo = PROVIDERS.find(p => p.value === provider)
 
     return (
-        <div className="bg-white p-8 rounded-xl border border-slate-200 shadow-sm">
-            <h3 className="text-xl font-bold mb-8 flex items-center gap-2">
-                <span className="material-icons-round text-primary">tune</span>
-                Personality Tuner
-            </h3>
+        <div className="flex flex-col gap-8">
+            {/* Provider picker */}
+            <div className="space-y-4">
+                <label className="block text-[10px] font-black text-slate-400 uppercase tracking-[0.2em]">
+                    LLM PROVIDER
+                </label>
+                <div className="grid grid-cols-2 gap-2 sm:grid-cols-3">
+                    {PROVIDERS.map(p => (
+                        <button key={p.value} onClick={() => onChange({ provider: p.value })} title={p.hint}
+                            className={`px-4 py-3 rounded-2xl text-xs font-bold border transition-all text-left group cursor-pointer ${provider === p.value
+                                ? 'bg-primary text-white border-primary shadow-lg shadow-primary/20'
+                                : 'bg-white text-slate-500 border-slate-200 hover:border-primary/30 hover:text-primary'
+                                }`}>
+                            <div className="flex flex-col gap-0.5">
+                                <span>{p.label}</span>
+                                <span className={`text-[9px] font-medium opacity-50 ${provider === p.value ? 'text-white' : ''} truncate`}>
+                                    {p.hint.split('(')[0]}
+                                </span>
+                            </div>
+                        </button>
+                    ))}
+                </div>
+            </div>
 
-            {/* Personality Sliders */}
-            <div className="space-y-6 mb-8">
-                {PERSONALITY_SLIDERS.map(({ key, label }) => (
-                    <div key={key} className="space-y-2">
-                        <div className="flex justify-between text-sm font-medium">
-                            <label className="text-slate-500">{label}</label>
-                            <span className="text-primary font-bold">{draft[key] ?? 50}%</span>
-                        </div>
-                        <input
-                            type="range"
-                            min="0"
-                            max="100"
-                            value={draft[key] ?? 50}
-                            onChange={(e) => patch(key, parseInt(e.target.value))}
-                            className="w-full h-2 bg-slate-100 rounded-lg appearance-none cursor-pointer accent-primary"
-                        />
+            {/* Model selection */}
+            <div className="space-y-4">
+                <label className="block text-[10px] font-black text-slate-400 uppercase tracking-[0.2em]">
+                    MODEL ARCHITECTURE
+                </label>
+                <div className="relative group">
+                    <input
+                        type="text"
+                        value={settings.model || ''}
+                        onChange={e => onChange({ model: e.target.value })}
+                        placeholder="e.g. deepseek/deepseek-v3.2"
+                        list="model-suggestions"
+                        className="w-full bg-white border border-slate-200 rounded-2xl px-5 py-4 text-sm font-mono text-slate-800 placeholder-slate-300 focus:border-primary focus:ring-4 focus:ring-primary/5 outline-none transition-all"
+                    />
+                    <div className="absolute right-4 top-1/2 -translate-y-1/2 text-slate-300 group-focus-within:text-primary transition-all">
+                        <span className="material-icons-round text-sm">precision_manufacturing</span>
                     </div>
-                ))}
+                </div>
+                <datalist id="model-suggestions">
+                    {suggestions.map(m => <option key={m} value={m} />)}
+                </datalist>
+                {suggestions.length > 0 && (
+                    <div className="flex flex-wrap gap-2 pt-1">
+                        {suggestions.map(m => (
+                            <button key={m} onClick={() => onChange({ model: m })}
+                                className="text-[10px] px-3 py-1.5 rounded-full bg-slate-100 text-slate-500 hover:bg-primary/10 hover:text-primary transition-all font-bold border border-transparent cursor-pointer">
+                                {m.split('/').pop()}
+                            </button>
+                        ))}
+                    </div>
+                )}
             </div>
 
-            {/* LLM Settings */}
-            <div className="border-t border-slate-100 pt-6 space-y-5">
-                <p className="text-xs font-bold text-slate-400 uppercase tracking-widest">LLM Configuration</p>
-
-                {/* Model Selector */}
-                <div>
-                    <label className="block text-sm font-medium text-slate-500 mb-1">Model</label>
-                    <select
-                        value={draft.model ?? 'deepseek/deepseek-v3.2'}
-                        onChange={(e) => patch('model', e.target.value)}
-                        className="w-full bg-bg-light border border-slate-200 rounded-lg px-3 py-2 text-sm focus:ring-1 focus:ring-primary focus:border-primary outline-none"
-                    >
-                        {MODEL_OPTIONS.map(({ value, label }) => (
-                            <option key={value} value={value}>{label}</option>
-                        ))}
-                        {!MODEL_OPTIONS.find(o => o.value === draft.model) && draft.model && (
-                            <option value={draft.model}>{draft.model}</option>
-                        )}
-                    </select>
+            {/* Hyperparameters */}
+            <div className="grid grid-cols-1 sm:grid-cols-2 gap-8">
+                <div className="space-y-4">
+                    <div className="flex justify-between items-center">
+                        <label className="text-[10px] font-black text-slate-400 uppercase tracking-[0.2em]">Creativity</label>
+                        <span className="text-primary font-mono text-xs font-bold px-2 py-0.5 bg-primary/10 rounded">{settings.temperature ?? 0.8}</span>
+                    </div>
+                    <input type="range" min="0" max="1" step="0.05"
+                        value={settings.temperature ?? 0.8}
+                        onChange={e => onChange({ temperature: parseFloat(e.target.value) })}
+                        className="w-full h-1.5 bg-slate-200 rounded-full appearance-none cursor-pointer accent-primary"
+                    />
+                    <p className="text-[9px] text-slate-400 font-medium italic">Higher values result in more diverse output</p>
+                </div>
+                <div className="space-y-4">
+                    <div className="flex justify-between items-center">
+                        <label className="text-[10px] font-black text-slate-400 uppercase tracking-[0.2em]">Response Depth</label>
+                        <span className="text-primary font-mono text-xs font-bold px-2 py-0.5 bg-primary/10 rounded">{settings.max_tokens ?? 300}t</span>
+                    </div>
+                    <input type="range" min="100" max="1000" step="50"
+                        value={settings.max_tokens ?? 300}
+                        onChange={e => onChange({ max_tokens: parseInt(e.target.value) })}
+                        className="w-full h-1.5 bg-slate-200 rounded-full appearance-none cursor-pointer accent-primary"
+                    />
+                    <p className="text-[9px] text-slate-400 font-medium italic">Tokens limit for standard generations</p>
                 </div>
+            </div>
 
-                {/* Temperature + Max Tokens row */}
-                <div className="grid grid-cols-2 gap-4">
-                    <div>
-                        <div className="flex justify-between mb-1">
-                            <label className="text-sm font-medium text-slate-500">Temperature</label>
-                            <span className="text-xs font-bold text-primary">{(draft.temperature ?? 0.8).toFixed(1)}</span>
+            {/* Personality Sliders */}
+            <div className="space-y-8 bg-white p-6 rounded-3xl border border-slate-100 shadow-sm">
+                {SLIDERS.map(({ key, label }) => (
+                    <div key={key} className="space-y-3">
+                        <div className="flex justify-between items-center">
+                            <label className="text-[11px] font-bold text-slate-500 tracking-wider capitalize">{label}</label>
+                            <span className="text-primary font-black text-xs">{settings[key]}%</span>
                         </div>
-                        <input
-                            type="range"
-                            min="0"
-                            max="2"
-                            step="0.1"
-                            value={draft.temperature ?? 0.8}
-                            onChange={(e) => patch('temperature', parseFloat(e.target.value))}
-                            className="w-full h-2 bg-slate-100 rounded-lg appearance-none cursor-pointer accent-primary"
-                        />
-                        <div className="flex justify-between text-xs text-slate-400 mt-1">
-                            <span>Precise</span>
-                            <span>Creative</span>
+                        <div className="relative flex items-center h-2">
+                            <div className="absolute inset-x-0 h-1.5 bg-slate-100 rounded-full" />
+                            <div className="absolute h-1.5 bg-primary rounded-full" style={{ width: `${settings[key]}%` }} />
+                            <input type="range" min="0" max="100"
+                                value={settings[key]}
+                                onChange={e => onChange({ [key]: parseInt(e.target.value) })}
+                                className="absolute inset-0 w-full h-full opacity-0 cursor-pointer z-10"
+                            />
+                            <div className="absolute w-4 h-4 bg-white rounded-full shadow-md border-2 border-primary pointer-events-none" style={{ left: `calc(${settings[key]}% - 8px)` }} />
                         </div>
                     </div>
-                    <div>
-                        <label className="block text-sm font-medium text-slate-500 mb-1">Max Tokens</label>
-                        <input
-                            type="number"
-                            min="50"
-                            max="2000"
-                            step="50"
-                            value={draft.max_tokens ?? 300}
-                            onChange={(e) => patch('max_tokens', parseInt(e.target.value))}
-                            className="w-full bg-bg-light border border-slate-200 rounded-lg px-3 py-2 text-sm focus:ring-1 focus:ring-primary focus:border-primary outline-none"
-                        />
-                    </div>
-                </div>
+                ))}
             </div>
 
-            {/* System Prompt */}
-            <div className="border-t border-slate-100 pt-6 mt-6">
-                <label className="block text-sm font-bold text-slate-500 uppercase tracking-widest mb-3">
-                    System Prompt Override
+            {/* System prompt */}
+            <div className="space-y-4">
+                <label className="block text-[10px] font-black text-slate-400 uppercase tracking-[0.2em]">
+                    CORE LOGIC OVERRIDE
                 </label>
-                <textarea
-                    value={draft.system_prompt ?? ''}
-                    onChange={(e) => patch('system_prompt', e.target.value)}
-                    placeholder="Enter custom personality instructions... (leave empty to use AURA defaults)"
-                    className="w-full h-36 bg-bg-light border border-slate-200 rounded-lg p-4 font-mono text-sm focus:ring-1 focus:ring-primary focus:border-primary custom-scrollbar resize-none outline-none"
-                />
+                <div className="relative group">
+                    <textarea
+                        value={settings.system_prompt || ''}
+                        onChange={e => onChange({ system_prompt: e.target.value })}
+                        placeholder="Define AURA's fundamental behavior..."
+                        className="w-full h-40 bg-white border border-slate-200 rounded-2xl p-5 font-mono text-sm text-slate-800 placeholder-slate-300 focus:border-primary outline-none transition-all focus:ring-4 focus:ring-primary/5 resize-none custom-scrollbar"
+                    />
+                    <div className="absolute right-4 bottom-4 text-slate-200 group-focus-within:text-primary transition-colors">
+                        <span className="material-icons-round text-3xl">psychology</span>
+                    </div>
+                </div>
             </div>
         </div>
     )
@@ -137,19 +162,21 @@ function TunerSkeleton() {
     return (
         <div className="bg-white p-8 rounded-xl border border-slate-200 shadow-sm animate-pulse">
             <div className="h-7 w-48 bg-slate-200 rounded mb-8" />
+            <div className="space-y-4 mb-8">
+                <div className="h-4 w-24 bg-slate-200 rounded" />
+                <div className="grid grid-cols-3 gap-2">
+                    {[1, 2, 3].map(i => <div key={i} className="h-9 bg-slate-100 rounded-lg" />)}
+                </div>
+                <div className="h-9 w-full bg-slate-100 rounded-lg" />
+            </div>
             <div className="space-y-6">
-                {[1, 2, 3].map((i) => (
+                {[1, 2, 3].map(i => (
                     <div key={i} className="space-y-2">
                         <div className="h-4 w-24 bg-slate-200 rounded" />
                         <div className="h-2 w-full bg-slate-100 rounded-full" />
                     </div>
                 ))}
             </div>
-            <div className="border-t border-slate-100 mt-8 pt-6 space-y-4">
-                <div className="h-4 w-36 bg-slate-200 rounded" />
-                <div className="h-9 w-full bg-slate-100 rounded-lg" />
-                <div className="h-36 w-full bg-slate-100 rounded-lg" />
-            </div>
         </div>
     )
 }
diff --git a/dashboard/src/components/Presence.jsx b/dashboard/src/components/Presence.jsx
new file mode 100644
index 0000000..02adfed
--- /dev/null
+++ b/dashboard/src/components/Presence.jsx
@@ -0,0 +1,175 @@
+import { useState, useEffect, useRef, useCallback, useImperativeHandle, forwardRef } from 'react'
+import { AvatarRenderer } from './AvatarRenderer'
+
+import { getOrCreateIdentity } from '../lib/user'
+
+export const Presence = forwardRef(({ isActive, onStatusChange, conversationId }, ref) => {
+    const [status, setStatus] = useState('idle') // idle, connecting, connected, error
+    const [isTalking, setIsTalking] = useState(false)
+    const roomRef = useRef(null)
+    const avatarRef = useRef(null)
+    const audioCtxRef = useRef(null)
+    const lipRafRef = useRef(null)
+    const speakTimeoutRef = useRef(null)
+
+    // Expose avatar methods to parent if needed
+    useImperativeHandle(ref, () => ({
+        setExpression: (expr, dur) => avatarRef.current?.setExpression(expr, dur),
+        setMouthOpen: (val) => avatarRef.current?.setMouthOpen(val),
+    }))
+
+    const cleanup = useCallback(() => {
+        if (lipRafRef.current) cancelAnimationFrame(lipRafRef.current)
+        if (speakTimeoutRef.current) { clearTimeout(speakTimeoutRef.current); speakTimeoutRef.current = null }
+        if (audioCtxRef.current) {
+            if (audioCtxRef.current.state !== 'closed') audioCtxRef.current.close()
+            audioCtxRef.current = null
+        }
+        if (roomRef.current) {
+            roomRef.current.disconnect()
+            roomRef.current = null
+        }
+        document.getElementById('presence-agent-audio')?.remove()
+        setStatus('idle')
+        onStatusChange?.('idle')
+    }, [onStatusChange])
+
+    useEffect(() => {
+        if (!isActive) {
+            cleanup()
+            return
+        }
+
+        let cancelled = false
+        setStatus('connecting')
+        onStatusChange?.('connecting')
+
+        const connect = async () => {
+            try {
+                const ctx = new AudioContext()
+                audioCtxRef.current = ctx
+                await ctx.resume()
+
+                const { Room, RoomEvent, Track } = await import('livekit-client')
+                const identity = getOrCreateIdentity()
+
+                // Fetch unique room token
+                const roomName = `aura-${Date.now()}`
+                let tokenUrl = `http://${window.location.hostname}:8082/getToken?room=${roomName}&identity=${encodeURIComponent(identity)}`
+                if (conversationId) {
+                    tokenUrl += `&conversation_id=${encodeURIComponent(conversationId)}`
+                }
+                const res = await fetch(tokenUrl)
+                if (!res.ok) throw new Error(`Token server error: ${res.status}`)
+                const { token, url } = await res.json()
+
+                if (cancelled) return
+
+                const room = new Room()
+                roomRef.current = room
+
+                room.on(RoomEvent.TrackSubscribed, (track) => {
+                    if (track.kind === Track.Kind.Audio) {
+                        const el = track.attach()
+                        el.id = 'presence-agent-audio'
+                        document.body.appendChild(el)
+
+                        const analyser = ctx.createAnalyser()
+                        analyser.fftSize = 2048
+                        const src = ctx.createMediaStreamSource(new MediaStream([track.mediaStreamTrack]))
+                        src.connect(analyser)
+
+                        const buf = new Float32Array(analyser.fftSize)
+                        const tick = () => {
+                            if (cancelled) return
+                            lipRafRef.current = requestAnimationFrame(tick)
+                            analyser.getFloatTimeDomainData(buf)
+                            let sum = 0
+                            for (let i = 0; i < buf.length; i++) sum += buf[i] * buf[i]
+                            const rms = Math.sqrt(sum / buf.length)
+                            const active = rms > 0.008
+                            avatarRef.current?.setMouthOpen(active ? Math.min(0.55, rms * 10) : 0)
+
+                            if (active) {
+                                if (speakTimeoutRef.current) clearTimeout(speakTimeoutRef.current)
+                                setIsTalking(true)
+                                avatarRef.current?.setSpeaking(true)
+                            } else {
+                                if (!speakTimeoutRef.current) {
+                                    speakTimeoutRef.current = setTimeout(() => {
+                                        setIsTalking(false)
+                                        avatarRef.current?.setSpeaking(false)
+                                        speakTimeoutRef.current = null
+                                    }, 600)
+                                }
+                            }
+                        }
+                        tick()
+                    }
+                })
+
+                room.on(RoomEvent.DataReceived, (payload) => {
+                    try {
+                        const msg = JSON.parse(new TextDecoder().decode(payload))
+                        if (msg.type === 'expression') {
+                            avatarRef.current?.setExpression(msg.expressions, msg.duration)
+                        }
+                    } catch { }
+                })
+
+                await room.connect(url, token)
+                await room.localParticipant.setMicrophoneEnabled(true)
+
+                if (!cancelled) {
+                    setStatus('connected')
+                    onStatusChange?.('connected')
+                }
+            } catch (err) {
+                console.error('[PRESENCE] Connection error:', err)
+                if (!cancelled) {
+                    setStatus('error')
+                    onStatusChange?.('error')
+                }
+            }
+        }
+
+        connect()
+
+        return () => {
+            cancelled = true
+            cleanup()
+        }
+    }, [isActive, cleanup, onStatusChange])
+
+    return (
+        <div className={`relative transition-all duration-500 rounded-3xl overflow-hidden shadow-2xl
+                        ${isActive ? 'w-full h-full' : 'w-48 h-48 opacity-40 grayscale pointer-events-none'}`}>
+
+            {/* Background Glow */}
+            <div className={`absolute inset-0 aura-gradient opacity-10 transition-opacity duration-700
+                            ${isTalking ? 'opacity-30' : 'opacity-10'}`} />
+
+            <AvatarRenderer
+                ref={avatarRef}
+                width={isActive ? 800 : 200}
+                height={isActive ? 1200 : 300}
+                style={{
+                    transform: isActive ? 'scale(1)' : 'scale(0.8) translateY(10%)',
+                    transition: 'transform 0.5s ease-out'
+                }}
+            />
+
+            {/* Status Indicators */}
+            {isActive && status === 'connecting' && (
+                <div className="absolute inset-0 flex items-center justify-center bg-slate-950/40 backdrop-blur-md">
+                    <div className="flex flex-col items-center gap-4">
+                        <div className="w-12 h-12 border-4 border-primary border-t-transparent rounded-full animate-spin" />
+                        <span className="text-white font-bold tracking-widest animate-pulse">WAKING UP...</span>
+                    </div>
+                </div>
+            )}
+        </div>
+    )
+})
+
+Presence.displayName = 'Presence'
diff --git a/dashboard/src/components/Sidebar.jsx b/dashboard/src/components/Sidebar.jsx
index 3074011..90ae5fb 100644
--- a/dashboard/src/components/Sidebar.jsx
+++ b/dashboard/src/components/Sidebar.jsx
@@ -1,62 +1,50 @@
-import { useNavigate } from 'react-router-dom'
-
 export default function Sidebar({ conversations = [], activeId, onSelect, onNewChat }) {
-    const navigate = useNavigate()
-
     // Group conversations by date
     const grouped = groupByDate(conversations)
 
     return (
-        <aside className="w-72 bg-white border-r border-slate-200 flex flex-col">
+        <aside className="w-full h-full flex flex-col bg-transparent">
             {/* Header */}
             <div className="p-6">
-                <div className="flex items-center justify-between mb-8">
-                    <div className="flex items-center gap-2">
-                        <div className="w-8 h-8 rounded-full aura-gradient flex items-center justify-center text-white shadow-lg shadow-primary/20">
-                            <span className="material-icons-round text-sm">wb_sunny</span>
-                        </div>
-                        <span className="font-bold text-xl tracking-tight">AURA</span>
+                <div className="flex items-center gap-3 mb-8">
+                    <div className="w-9 h-9 rounded-full aura-gradient flex items-center justify-center text-white shadow-lg shadow-primary/20">
+                        <span className="material-icons-round text-sm">wb_sunny</span>
                     </div>
-                    <button
-                        type="button"
-                        onClick={() => navigate('/admin')}
-                        className="p-1 hover:bg-slate-100 rounded cursor-pointer"
-                        title="Admin Dashboard"
-                    >
-                        <span className="material-icons-round text-slate-400">dashboard</span>
-                    </button>
+                    <span className="font-bold text-xl tracking-tight text-white/90">Project AURA</span>
                 </div>
 
                 <button
                     type="button"
                     onClick={onNewChat}
-                    className="w-full py-3 px-4 bg-primary hover:bg-primary/90 text-white rounded-lg flex items-center justify-center gap-2 font-semibold transition-all shadow-lg shadow-primary/20 group cursor-pointer"
+                    className="w-full py-3.5 px-4 bg-primary hover:bg-primary/90 text-white rounded-xl flex items-center justify-center gap-2 font-bold transition-all shadow-lg shadow-primary/20 group cursor-pointer"
                 >
                     <span className="material-icons-round group-hover:rotate-90 transition-transform">add</span>
-                    New Chat
+                    New Context
                 </button>
             </div>
 
             {/* Conversation list */}
-            <div className="flex-1 overflow-y-auto custom-scrollbar px-4 space-y-6">
+            <div className="flex-1 overflow-y-auto custom-scrollbar-dark px-4 space-y-8">
                 {Object.entries(grouped).map(([label, convos]) => (
                     <div key={label}>
-                        <h3 className="px-2 mb-3 text-xs font-bold uppercase tracking-widest text-slate-400">{label}</h3>
-                        <div className="space-y-1">
+                        <h3 className="px-3 mb-4 text-[10px] font-black uppercase tracking-[0.2em] text-white/20">{label}</h3>
+                        <div className="space-y-1.5">
                             {convos.map((c) => (
                                 <button
                                     key={c.id}
                                     type="button"
                                     onClick={() => onSelect(c.id)}
-                                    className={`w-full group flex items-center gap-3 px-3 py-2.5 rounded-lg transition-all text-left cursor-pointer ${c.id === activeId
-                                            ? 'bg-primary/5 text-primary border border-primary/10'
-                                            : 'hover:bg-slate-50 text-slate-600 border border-transparent'
+                                    className={`w-full group flex items-center gap-3 px-3 py-3 rounded-xl transition-all text-left cursor-pointer border ${c.id === activeId
+                                        ? 'bg-white/5 text-primary border-white/5 shadow-inner'
+                                        : 'hover:bg-white/[0.03] text-slate-400 border-transparent hover:border-white/5'
                                         }`}
                                 >
-                                    <span className={`material-icons-round text-sm ${c.id === activeId ? '' : 'text-slate-300'}`}>
-                                        chat_bubble_outline
+                                    <span className={`material-icons-round text-sm ${c.id === activeId ? 'text-primary' : 'text-slate-600'}`}>
+                                        {c.id === activeId ? 'auto_awesome' : 'chat_bubble_outline'}
+                                    </span>
+                                    <span className={`text-[13px] font-semibold truncate ${c.id === activeId ? 'text-white' : ''}`}>
+                                        {c.title}
                                     </span>
-                                    <span className="text-sm font-medium truncate">{c.title}</span>
                                 </button>
                             ))}
                         </div>
@@ -64,19 +52,22 @@ export default function Sidebar({ conversations = [], activeId, onSelect, onNewC
                 ))}
 
                 {conversations.length === 0 && (
-                    <p className="text-center text-sm text-slate-400 mt-8">No conversations yet</p>
+                    <div className="flex flex-col items-center justify-center py-10 opacity-20">
+                        <span className="material-icons-round text-4xl mb-2">forum</span>
+                        <p className="text-xs font-bold uppercase tracking-widest">Empty Space</p>
+                    </div>
                 )}
             </div>
 
             {/* Footer */}
-            <div className="p-4 mt-auto border-t border-slate-100">
-                <div className="flex items-center gap-3 p-3">
-                    <div className="w-10 h-10 rounded-full bg-gradient-to-br from-primary to-primary-light flex items-center justify-center text-white font-bold text-sm">
+            <div className="p-4 mt-auto border-t border-white/5 bg-slate-950/20">
+                <div className="flex items-center gap-3 p-3 rounded-2xl bg-white/[0.02]">
+                    <div className="w-10 h-10 rounded-full aura-gradient flex items-center justify-center text-white font-bold text-sm shadow-md">
                         U
                     </div>
                     <div className="flex flex-col items-start overflow-hidden">
-                        <span className="text-sm font-bold truncate">User</span>
-                        <span className="text-xs text-slate-400">Premium Plan</span>
+                        <span className="text-[13px] font-bold text-white truncate text-shadow-sm">Interface User</span>
+                        <p className="text-[10px] font-black text-primary/60 uppercase tracking-widest">Premium Status</p>
                     </div>
                 </div>
             </div>
diff --git a/dashboard/src/components/SlideOver.jsx b/dashboard/src/components/SlideOver.jsx
new file mode 100644
index 0000000..5eb52ea
--- /dev/null
+++ b/dashboard/src/components/SlideOver.jsx
@@ -0,0 +1,47 @@
+import { useEffect } from 'react'
+
+export default function SlideOver({ isOpen, onClose, title, children }) {
+    useEffect(() => {
+        if (isOpen) document.body.style.overflow = 'hidden'
+        else document.body.style.overflow = 'unset'
+        return () => { document.body.style.overflow = 'unset' }
+    }, [isOpen])
+
+    return (
+        <>
+            {/* Backdrop */}
+            <div
+                className={`fixed inset-0 bg-slate-950/40 backdrop-blur-sm z-[60] transition-opacity duration-500
+                            ${isOpen ? 'opacity-100' : 'opacity-0 pointer-events-none'}`}
+                onClick={onClose}
+            />
+
+            {/* Panel */}
+            <aside
+                className={`fixed top-0 right-0 h-full w-full max-w-2xl aura-glass z-[70] transition-transform duration-500 ease-out shadow-2xl
+                            ${isOpen ? 'translate-x-0' : 'translate-x-full'}`}
+            >
+                <div className="flex flex-col h-full bg-[#030712]/40">
+                    {/* Header */}
+                    <header className="flex items-center justify-between p-6 border-b border-white/10">
+                        <h2 className="text-xl font-bold tracking-tight text-white flex items-center gap-3">
+                            <span className="w-2 h-2 rounded-full bg-primary" />
+                            {title}
+                        </h2>
+                        <button
+                            onClick={onClose}
+                            className="p-2 hover:bg-white/10 rounded-full text-slate-400 hover:text-white transition-all cursor-pointer"
+                        >
+                            <span className="material-icons-round">close</span>
+                        </button>
+                    </header>
+
+                    {/* Content */}
+                    <div className="flex-1 overflow-y-auto p-8 custom-scrollbar-dark">
+                        {children}
+                    </div>
+                </div>
+            </aside>
+        </>
+    )
+}
diff --git a/dashboard/src/components/StatusCards.jsx b/dashboard/src/components/StatusCards.jsx
index de186c2..87e6c3f 100644
--- a/dashboard/src/components/StatusCards.jsx
+++ b/dashboard/src/components/StatusCards.jsx
@@ -1,75 +1,106 @@
-const CARDS = [
-    {
-        label: 'System Status',
-        icon: 'sensors',
-        value: 'Operational',
-        barWidth: '94%',
-        footer: '99.98% UPTIME SINCE DEPLOY',
-    },
-    {
-        label: 'Active Memory',
-        icon: 'memory',
-        value: '14.2',
-        unit: 'GB',
-        segments: [true, true, false, false],
-        footer: 'USING 42% OF ALLOCATED VRAM',
-    },
-    {
-        label: 'Current Emotion',
-        icon: 'psychology',
-        value: 'Analytical',
-        isPrimary: true,
-        footer: 'High Precision Mode Active',
-        badges: ['sentiment_satisfied', 'insights'],
-    },
-]
+import { useState, useEffect } from 'react'
+import { supabase } from '../lib/supabaseClient'
 
 export default function StatusCards() {
+    const [stats, setStats] = useState({
+        uptime: '99.98%',
+        vram: '42%',
+        knowledgeCount: 0,
+        messageCount: 0,
+    })
+
+    useEffect(() => {
+        const fetchStats = async () => {
+            const { count: kbCount } = await supabase.from('knowledge_base').select('*', { count: 'exact', head: true })
+            const { count: msgCount } = await supabase.from('messages').select('*', { count: 'exact', head: true })
+            setStats(prev => ({
+                ...prev,
+                knowledgeCount: kbCount || 0,
+                messageCount: msgCount || 0
+            }))
+        }
+        fetchStats()
+    }, [])
+
+    const CARDS = [
+        {
+            label: 'System Integrity',
+            icon: 'verified_user',
+            value: 'Operational',
+            color: 'text-emerald-400',
+            footer: `${stats.uptime} UPTIME — L4 DISTANCE: 0.02`,
+            bar: 94
+        },
+        {
+            label: 'Neural Synapse',
+            icon: 'hub',
+            value: stats.messageCount,
+            unit: 'msgs',
+            footer: 'TOTAL CONVERSATIONAL NODES',
+            segments: [true, true, true, false]
+        },
+        {
+            label: 'Cognitive Depth',
+            icon: 'model_training',
+            value: stats.knowledgeCount,
+            unit: 'kb',
+            isPrimary: true,
+            footer: 'ACTIVE VECTORS IN RAG PIPELINE',
+            badges: ['psychology', 'auto_stories']
+        },
+    ]
+
     return (
-        <div className="grid grid-cols-1 md:grid-cols-3 gap-6 mb-8">
+        <div className="grid grid-cols-1 md:grid-cols-3 gap-6">
             {CARDS.map((card) => (
-                <div key={card.label} className="bg-white p-6 rounded-xl border border-slate-200 shadow-sm">
-                    <div className="flex justify-between items-start mb-4">
-                        <span className="text-slate-500 font-medium uppercase text-xs tracking-widest">{card.label}</span>
-                        <span className="material-icons-round text-primary/40">{card.icon}</span>
-                    </div>
-
-                    <div className="flex items-baseline gap-2">
-                        <span className={`text-4xl font-bold ${card.isPrimary ? 'text-primary' : ''}`}>{card.value}</span>
-                        {card.unit && <span className="text-xl text-slate-400 font-medium">{card.unit}</span>}
+                <div key={card.label} className="bg-white/[0.03] p-6 rounded-3xl border border-white/5 shadow-2xl relative overflow-hidden group hover:border-white/10 transition-all">
+                    <div className="absolute top-0 right-0 p-4 opacity-10 group-hover:opacity-20 transition-opacity">
+                        <span className="material-icons-round text-4xl">{card.icon}</span>
                     </div>
 
-                    {/* Progress bar */}
-                    {card.barWidth && (
-                        <div className="mt-4 w-full bg-slate-100 h-1.5 rounded-full overflow-hidden">
-                            <div className="bg-primary h-full" style={{ width: card.barWidth }} />
+                    <div className="relative z-10 flex flex-col h-full">
+                        <div className="flex items-center gap-2 mb-4">
+                            <span className="text-[10px] font-black text-white/40 uppercase tracking-[0.2em]">{card.label}</span>
                         </div>
-                    )}
 
-                    {/* Segmented bar */}
-                    {card.segments && (
-                        <div className="mt-4 flex items-center gap-1 h-1.5">
-                            {card.segments.map((active, i) => (
-                                <div key={i} className={`h-full w-1/4 rounded-full ${active ? 'bg-primary' : 'bg-primary/20'}`} />
-                            ))}
+                        <div className="flex items-baseline gap-2 mb-4">
+                            <span className={`text-4xl font-black tracking-tighter ${card.isPrimary ? 'text-primary' : (card.color || 'text-white')}`}>
+                                {card.value}
+                            </span>
+                            {card.unit && <span className="text-sm text-white/20 font-black uppercase">{card.unit}</span>}
                         </div>
-                    )}
 
-                    {/* Badges */}
-                    {card.badges && (
-                        <div className="mt-4 flex items-center gap-2">
-                            <div className="flex -space-x-2">
-                                {card.badges.map((icon, i) => (
-                                    <div key={i} className={`w-6 h-6 rounded-full flex items-center justify-center border border-white ${i === 0 ? 'bg-primary/20' : 'bg-primary'
-                                        }`}>
-                                        <span className={`material-icons-round text-[14px] ${i > 0 ? 'text-white' : ''}`}>{icon}</span>
-                                    </div>
-                                ))}
-                            </div>
-                        </div>
-                    )}
+                        <div className="mt-auto pt-4 border-t border-white/5">
+                            {/* Progress bar */}
+                            {card.bar && (
+                                <div className="mb-3 w-full bg-white/5 h-1 rounded-full overflow-hidden">
+                                    <div className="bg-emerald-400 h-full shadow-[0_0_8px_rgba(52,211,153,0.5)]" style={{ width: `${card.bar}%` }} />
+                                </div>
+                            )}
+
+                            {/* Segmented bar */}
+                            {card.segments && (
+                                <div className="mb-3 flex items-center gap-1 h-1">
+                                    {card.segments.map((active, i) => (
+                                        <div key={i} className={`h-full w-1/4 rounded-full ${active ? 'bg-primary' : 'bg-white/10'}`} />
+                                    ))}
+                                </div>
+                            )}
+
+                            {/* Badges */}
+                            {card.badges && (
+                                <div className="mb-3 flex items-center gap-1">
+                                    {card.badges.map((icon, i) => (
+                                        <div key={i} className="w-6 h-6 rounded-full bg-primary/10 border border-primary/20 flex items-center justify-center">
+                                            <span className="material-icons-round text-[12px] text-primary">{icon}</span>
+                                        </div>
+                                    ))}
+                                </div>
+                            )}
 
-                    <p className="text-[10px] mt-2 text-slate-400 font-medium uppercase">{card.footer}</p>
+                            <p className="text-[9px] text-white/30 font-bold uppercase tracking-wider">{card.footer}</p>
+                        </div>
+                    </div>
                 </div>
             ))}
         </div>
diff --git a/dashboard/src/components/SystemLogs.jsx b/dashboard/src/components/SystemLogs.jsx
index 4502f41..5985d46 100644
--- a/dashboard/src/components/SystemLogs.jsx
+++ b/dashboard/src/components/SystemLogs.jsx
@@ -52,17 +52,17 @@ export default function SystemLogs() {
     }
 
     return (
-        <div className="bg-[#121212] rounded-xl overflow-hidden border border-white/5 shadow-2xl">
-            <div className="bg-[#1a1a1a] px-6 py-3 border-b border-white/5 flex justify-between items-center">
+        <div className="bg-slate-900 rounded-xl overflow-hidden border border-slate-800 shadow-2xl">
+            <div className="bg-slate-800/50 px-6 py-3 border-b border-slate-800 flex justify-between items-center">
                 <div className="flex items-center gap-2">
                     <span className="material-icons-round text-primary text-sm">terminal</span>
-                    <span className="text-xs font-bold text-white/60 tracking-widest uppercase">
+                    <span className="text-xs font-bold text-slate-400 tracking-widest uppercase">
                         System Logs — Live Stream
                     </span>
                 </div>
                 <div className="flex gap-1.5">
-                    <div className="w-2.5 h-2.5 rounded-full bg-white/10" />
-                    <div className="w-2.5 h-2.5 rounded-full bg-white/10" />
+                    <div className="w-2.5 h-2.5 rounded-full bg-slate-700" />
+                    <div className="w-2.5 h-2.5 rounded-full bg-slate-700" />
                     <div className="w-2.5 h-2.5 rounded-full bg-primary/40" />
                 </div>
             </div>
diff --git a/dashboard/src/lib/user.js b/dashboard/src/lib/user.js
new file mode 100644
index 0000000..3a7be59
--- /dev/null
+++ b/dashboard/src/lib/user.js
@@ -0,0 +1,18 @@
+/**
+ * Unified identity management for Project AURA.
+ * Ensures Chat and Voice always share the same UUID stored in localStorage.
+ */
+
+export function getOrCreateIdentity() {
+    const KEY = 'aura_user_identity'
+    let id = localStorage.getItem(KEY)
+
+    if (!id) {
+        // Generate a clean 8-char random ID for display/tracking
+        // and a full UUID if needed, but here we just need a unique string.
+        id = `user-${Math.random().toString(36).substring(2, 10)}`
+        localStorage.setItem(KEY, id)
+    }
+
+    return id
+}
diff --git a/dashboard/src/pages/AdminPage.jsx b/dashboard/src/pages/AdminPage.jsx
index 26e16ad..bea9495 100644
--- a/dashboard/src/pages/AdminPage.jsx
+++ b/dashboard/src/pages/AdminPage.jsx
@@ -3,17 +3,22 @@ import { supabase } from '../lib/supabaseClient'
 import AdminSidebar from '../components/AdminSidebar'
 import StatusCards from '../components/StatusCards'
 import PersonalityTuner from '../components/PersonalityTuner'
+import ApiKeys from '../components/ApiKeys'
 import KnowledgeBase from '../components/KnowledgeBase'
 import SystemLogs from '../components/SystemLogs'
-import ApiKeys from '../components/ApiKeys'
+
+const AI_SERVICE = `http://${window.location.hostname}:8000/api/v1`
 
 export default function AdminPage() {
     const [settings, setSettings] = useState(null)
-    const [deployState, setDeployState] = useState('idle') // 'idle' | 'saving' | 'saved' | 'error'
-    const pendingRef = useRef(null)
+    const [apiKeys, setApiKeys] = useState(null)
+    const [saving, setSaving] = useState(false)
+    const [saveMsg, setSaveMsg] = useState('')
+    const pendingRef = useRef({})
 
     useEffect(() => {
         loadSettings()
+        loadApiKeys()
     }, [])
 
     const loadSettings = async () => {
@@ -28,42 +33,52 @@ export default function AdminPage() {
         }
     }
 
-    const handleChange = (updated) => {
-        pendingRef.current = updated
-    }
-
-    const deployChanges = async () => {
-        if (!pendingRef.current) return
-        setDeployState('saving')
+    const loadApiKeys = async () => {
         try {
-            const patch = { ...pendingRef.current }
-            delete patch.id
-            delete patch.created_at
-            patch.updated_at = new Date().toISOString()
+            const res = await fetch(`${AI_SERVICE}/settings/keys`)
+            const data = await res.json()
+            if (data) setApiKeys(data)
+        } catch (err) {
+            console.error('Failed to load API key status:', err)
+        }
+    }
 
-            const { error } = await supabase
-                .from('personality_settings')
-                .update(patch)
-                .eq('id', 1)
+    const handleSettingsChange = (patch) => {
+        const updated = { ...settings, ...patch }
+        setSettings(updated)
+        pendingRef.current = { ...pendingRef.current, ...patch }
+    }
 
-            if (error) throw error
+    const handleDeploy = async () => {
+        setSaving(true)
+        setSaveMsg('')
+        try {
+            const patch = pendingRef.current
+            if (Object.keys(patch).length > 0) {
+                await supabase
+                    .from('personality_settings')
+                    .update({ ...patch, updated_at: new Date().toISOString() })
+                    .eq('id', 1)
 
-            setSettings(pendingRef.current)
-            setDeployState('saved')
-            setTimeout(() => setDeployState('idle'), 2500)
+                // Also push to AI service API
+                await fetch(`${AI_SERVICE}/settings`, {
+                    method: 'PUT',
+                    headers: { 'Content-Type': 'application/json' },
+                    body: JSON.stringify(patch),
+                })
+            }
+            pendingRef.current = {}
+            setSaveMsg('Settings deployed successfully!')
+            setTimeout(() => setSaveMsg(''), 3000)
         } catch (err) {
-            console.error('Deploy failed:', err)
-            setDeployState('error')
-            setTimeout(() => setDeployState('idle'), 3000)
+            console.error('Deploy error:', err)
+            setSaveMsg('Deploy failed. Check console.')
+        } finally {
+            setSaving(false)
         }
     }
 
-    const deployButtonProps = {
-        idle:   { label: 'Deploy Changes', icon: 'bolt',           cls: 'bg-primary hover:bg-primary/90 shadow-primary/20' },
-        saving: { label: 'Saving...',       icon: 'hourglass_top',  cls: 'bg-primary/70 cursor-not-allowed' },
-        saved:  { label: 'Deployed!',       icon: 'check_circle',   cls: 'bg-emerald-500 shadow-emerald-200' },
-        error:  { label: 'Failed',          icon: 'error',          cls: 'bg-red-500 shadow-red-200' },
-    }[deployState]
+    const hasPendingChanges = Object.keys(pendingRef.current).length > 0
 
     return (
         <div className="flex h-screen overflow-hidden bg-bg-light text-slate-800 font-admin">
@@ -73,21 +88,26 @@ export default function AdminPage() {
                 {/* Header */}
                 <header className="mb-10 flex justify-between items-end">
                     <div>
-                        <h1 className="text-3xl font-bold tracking-tight mb-1">System Control Center</h1>
-                        <p className="text-slate-500 font-medium">AURA AI • Instance Node #772-Beta</p>
+                        <h1 className="text-4xl font-black tracking-tight text-slate-800 mb-2">Project AURA <span className="text-slate-400 font-light">System Control Center</span></h1>
+                        <p className="text-slate-500 font-medium">Project AURA • Instance Node #772-Beta</p>
                     </div>
                     <div className="flex items-center gap-4">
+                        {saveMsg && (
+                            <span className={`text-sm font-semibold ${saveMsg.includes('success') ? 'text-emerald-600' : 'text-red-500'}`}>
+                                {saveMsg}
+                            </span>
+                        )}
                         <div className="flex items-center gap-2 px-4 py-2 bg-white rounded-full border border-slate-200">
                             <div className="w-2 h-2 rounded-full bg-emerald-500 animate-pulse" />
                             <span className="text-sm font-semibold">Live Connection</span>
                         </div>
                         <button
-                            onClick={deployChanges}
-                            disabled={deployState === 'saving'}
-                            className={`${deployButtonProps.cls} text-white px-6 py-2 rounded-full font-bold transition-all shadow-lg flex items-center gap-2 cursor-pointer`}
+                            onClick={handleDeploy}
+                            disabled={saving}
+                            className="bg-primary hover:bg-primary/90 disabled:opacity-50 text-white px-6 py-2 rounded-full font-bold transition-all shadow-lg shadow-primary/20 flex items-center gap-2 cursor-pointer"
                         >
-                            <span className="material-icons-round text-sm">{deployButtonProps.icon}</span>
-                            {deployButtonProps.label}
+                            <span className="material-icons-round text-sm">{saving ? 'hourglass_top' : 'bolt'}</span>
+                            {saving ? 'Deploying...' : 'Deploy Changes'}
                         </button>
                     </div>
                 </header>
@@ -95,12 +115,17 @@ export default function AdminPage() {
                 <StatusCards />
 
                 <div className="grid grid-cols-1 lg:grid-cols-2 gap-8 mb-8">
-                    <PersonalityTuner settings={settings} onChange={handleChange} />
-                    <KnowledgeBase />
-                </div>
-
-                <div className="mb-8">
-                    <ApiKeys />
+                    <div className="bg-white p-8 rounded-3xl border border-slate-100 shadow-sm">
+                        <h2 className="text-lg font-black text-slate-800 mb-6 flex items-center gap-2">
+                            <span className="material-icons-round text-primary text-xl">psychology</span>
+                            Personality Engine
+                        </h2>
+                        <PersonalityTuner settings={settings} onChange={handleSettingsChange} />
+                    </div>
+                    <div className="flex flex-col gap-8">
+                        <ApiKeys />
+                        <KnowledgeBase />
+                    </div>
                 </div>
 
                 <SystemLogs />
diff --git a/dashboard/src/pages/ChatPage.jsx b/dashboard/src/pages/ChatPage.jsx
index 03d0754..5dab3d1 100644
--- a/dashboard/src/pages/ChatPage.jsx
+++ b/dashboard/src/pages/ChatPage.jsx
@@ -1,10 +1,18 @@
 import { useState, useEffect, useRef, useCallback } from 'react'
+import { useNavigate } from 'react-router-dom'
 import { supabase } from '../lib/supabaseClient'
 import Sidebar from '../components/Sidebar'
 import ChatHeader from '../components/ChatHeader'
 import ChatFeed from '../components/ChatFeed'
 import ChatInput from '../components/ChatInput'
+import { Presence } from '../components/Presence'
 import CallOverlay from '../components/CallOverlay'
+import SlideOver from '../components/SlideOver'
+import PersonalityTuner from '../components/PersonalityTuner'
+import KnowledgeBase from '../components/KnowledgeBase'
+import StatusCards from '../components/StatusCards'
+import SystemLogs from '../components/SystemLogs'
+import { getOrCreateIdentity } from '../lib/user'
 
 const AI_SERVICE = `http://${window.location.hostname}:8000/api/v1`
 
@@ -13,12 +21,17 @@ export default function ChatPage() {
     const [activeConvoId, setActiveConvoId] = useState(null)
     const [messages, setMessages] = useState([])
     const [isCallActive, setIsCallActive] = useState(false)
+    const [isAdminOpen, setIsAdminOpen] = useState(false)
+    const [settings, setSettings] = useState(null)
     const [isSending, setIsSending] = useState(false)
     const feedRef = useRef(null)
+    const presenceRef = useRef(null)
+    const navigate = useNavigate()
 
-    // ─── Load conversations on mount ────────────────
+    // ─── Load data on mount ────────────────
     useEffect(() => {
         loadConversations()
+        loadSettings()
     }, [])
 
     // ─── Load messages when active conversation changes ──
@@ -44,12 +57,37 @@ export default function ChatPage() {
     }
 
     const loadMessages = async (convoId) => {
+        // If we are currently sending a message to this conversation, avoid overwriting
+        // the optimistic local state with a stale DB fetch (which might not have the new msg yet).
+        if (isSending && activeConvoId === convoId) {
+            console.log('[AURA] Skipping loadMessages during sending to prevent UI flicker');
+            return;
+        }
+
         const { data } = await supabase
             .from('messages')
             .select('*')
             .eq('conversation_id', convoId)
             .order('created_at', { ascending: true })
-        if (data) setMessages(data)
+
+        // If we switched away while loading, don't update
+        setActiveConvoId(current => {
+            if (current === convoId && data) {
+                setMessages(data)
+            }
+            return current
+        })
+    }
+
+    const loadSettings = async () => {
+        const { data } = await supabase.from('personality_settings').select('*').eq('id', 1).single()
+        if (data) setSettings(data)
+    }
+
+    const updateSettings = async (patch) => {
+        const updated = { ...settings, ...patch, updated_at: new Date().toISOString() }
+        setSettings(updated)
+        await supabase.from('personality_settings').update(patch).eq('id', 1)
     }
 
     // ─── New chat ───────────────────────────────────
@@ -65,11 +103,15 @@ export default function ChatPage() {
         }
     }
 
+    const isSendingRef = useRef(false)
+
     // ─── Send message ──────────────────────────────
     const handleSend = useCallback(async (text) => {
-        if (isSending) return
+        if (isSendingRef.current) return
+
+        isSendingRef.current = true
+        setIsSending(true)
 
-        // Create conversation if none exists
         let convoId = activeConvoId
         if (!convoId) {
             const { data } = await supabase
@@ -77,40 +119,84 @@ export default function ChatPage() {
                 .insert({ title: text.slice(0, 50) })
                 .select()
                 .single()
-            if (!data) return
+            if (!data) {
+                isSendingRef.current = false
+                setIsSending(false)
+                return
+            }
             convoId = data.id
             setActiveConvoId(convoId)
             setConversations((prev) => [data, ...prev])
         }
 
-        // Optimistically show user message in UI (backend saves to DB)
         const tempUserMsg = { id: `temp-${Date.now()}`, role: 'user', content: text, conversation_id: convoId }
         setMessages((prev) => [...prev, tempUserMsg])
 
-        // Call ai-service — backend handles ALL DB saves
+        const identity = getOrCreateIdentity()
         setIsSending(true)
         try {
             const res = await fetch(`${AI_SERVICE}/chat`, {
                 method: 'POST',
                 headers: { 'Content-Type': 'application/json' },
-                body: JSON.stringify({ message: text, conversation_id: convoId }),
+                body: JSON.stringify({ message: text, conversation_id: convoId, identity, stream: true }),
             })
-            const data = await res.json()
-            console.log('[AURA] AI Response:', data)
 
-            // Show AI response in UI (backend already saved to DB)
-            const tempAiMsg = {
-                id: `temp-ai-${Date.now()}`,
+            if (!res.ok) throw new Error(`API error: ${res.status}`)
+
+            // Placeholder for AI message
+            const aiMsgId = `temp-ai-${Date.now()}`
+            setMessages((prev) => [...prev, {
+                id: aiMsgId,
                 role: 'aura',
-                content: data.text || 'Hmm, the words escaped me~',
-                emotion: data.emotion || 'neutral',
+                content: '',
+                emotion: 'neutral',
                 conversation_id: convoId,
-                tools_used: data.tools_used || null,
+            }])
+
+            const reader = res.body.getReader()
+            const decoder = new TextDecoder()
+            let fullText = ''
+            let lastEmotion = 'neutral'
+
+            while (true) {
+                const { done, value } = await reader.read()
+                if (done) break
+
+                const chunk = decoder.decode(value)
+                const lines = chunk.split('\n')
+
+                for (const line of lines) {
+                    if (line.startsWith('data: ')) {
+                        try {
+                            const data = JSON.parse(line.slice(6))
+                            if (data.text) {
+                                fullText += data.text
+
+                                // Scrub residual [emotion] tags using a global regex for clean UI
+                                const scrubbedText = fullText.replace(/\[.*?\]/g, '').trim()
+
+                                setMessages(prev => prev.map(m =>
+                                    m.id === aiMsgId ? { ...m, content: scrubbedText } : m
+                                ))
+                            }
+                            if (data.emotion) {
+                                lastEmotion = data.emotion
+                                setMessages(prev => prev.map(m =>
+                                    m.id === aiMsgId ? { ...m, emotion: lastEmotion } : m
+                                ))
+                            }
+                        } catch (e) {
+                            // Partial JSON or heartbeat
+                        }
+                    }
+                }
             }
-            setMessages((prev) => [...prev, tempAiMsg])
 
-            // Update conversation title on first message
-            if (messages.length === 0) {
+            // Sync with DB after stream ends to ensure backend has persisted the full interaction
+            setTimeout(() => loadMessages(convoId), 500)
+
+            // If it was the first message, update the title
+            if (convoId === activeConvoId && messages.length <= 1) {
                 await supabase
                     .from('conversations')
                     .update({ title: text.slice(0, 50), updated_at: new Date().toISOString() })
@@ -126,30 +212,52 @@ export default function ChatPage() {
                 emotion: 'dizzy',
             }])
         } finally {
+            isSendingRef.current = false
             setIsSending(false)
         }
-    }, [activeConvoId, isSending, messages])
+    }, [activeConvoId, AI_SERVICE, loadConversations, loadMessages])
 
     return (
-        <div className="flex h-screen overflow-hidden bg-bg-light text-slate-900">
-            <Sidebar
-                conversations={conversations}
-                activeId={activeConvoId}
-                onSelect={setActiveConvoId}
-                onNewChat={handleNewChat}
-            />
-
-            <main className="flex-1 flex flex-col relative bg-bg-light">
-                <ChatHeader onCallStart={() => setIsCallActive(true)} />
-
-                <div ref={feedRef} className="flex-1 overflow-y-auto px-8 py-10 custom-scrollbar">
-                    <ChatFeed messages={messages} />
+        <div className="flex h-screen overflow-hidden bg-bg-light text-slate-900 font-sans selection:bg-primary/20">
+
+            {/* Sidebar (Left) */}
+            <div className="w-[var(--sidebar-w)] shrink-0 border-r border-slate-700/50 bg-slate-900 z-20">
+                <Sidebar
+                    conversations={conversations}
+                    activeId={activeConvoId}
+                    onSelect={setActiveConvoId}
+                    onNewChat={handleNewChat}
+                />
+            </div>
+
+            {/* Main Interactive Region */}
+            <main className="flex-1 flex flex-col relative overflow-hidden">
+                <ChatHeader
+                    onCallStart={() => setIsCallActive(true)}
+                    isCallActive={isCallActive}
+                    onTuningOpen={() => navigate('/admin')}
+                />
+
+                <div ref={feedRef} className="flex-1 overflow-y-auto px-6 py-8 custom-scrollbar">
+                    <div className="max-w-3xl mx-auto w-full">
+                        <ChatFeed messages={messages} />
+                    </div>
                 </div>
 
-                <ChatInput onSend={handleSend} disabled={isSending} />
+                <div className="p-6 bg-white border-t border-slate-100">
+                    <div className="max-w-3xl mx-auto w-full">
+                        <ChatInput onSend={handleSend} disabled={isSending} />
+                    </div>
+                </div>
             </main>
 
-            {isCallActive && <CallOverlay onClose={() => setIsCallActive(false)} />}
+            {/* Immersive Interaction Layer (Old UI Revert) */}
+            {isCallActive && (
+                <CallOverlay
+                    onClose={() => setIsCallActive(false)}
+                    conversationId={activeConvoId}
+                />
+            )}
         </div>
     )
-}
\ No newline at end of file
+}
diff --git a/package.json b/package.json
index 3b3242e..46cc97f 100644
--- a/package.json
+++ b/package.json
@@ -1,14 +1,13 @@
 {
-  "name": "dashboard",
+  "name": "aura-monorepo",
   "private": true,
-  "version": "0.0.0",
+  "version": "1.0.0",
   "type": "module",
   "scripts": {
-    "dev": "vite",
-    "build": "vite build",
-    "lint": "eslint .",
-    "preview": "vite preview",
-    "docs": "mkdocs serve"
+    "dashboard:dev": "npm run dev --prefix dashboard",
+    "dashboard:build": "npm run build --prefix dashboard",
+    "docs": "mkdocs serve",
+    "docs:build": "mkdocs build"
   },
   "dependencies": {
     "@supabase/supabase-js": "^2.95.3",
@@ -35,4 +34,4 @@
   "overrides": {
     "vite": "npm:rolldown-vite@7.2.2"
   }
-}
+}
\ No newline at end of file
diff --git a/voice-agent/agent.py b/voice-agent/agent.py
index aef2243..46dd507 100644
--- a/voice-agent/agent.py
+++ b/voice-agent/agent.py
@@ -1,43 +1,52 @@
 from dotenv import load_dotenv
 import os
+from typing import Annotated
 
-# Load .env before any local imports so env vars are available at module init time
-_BASE_DIR = os.path.dirname(os.path.abspath(__file__))
-_ENV_PATH = os.path.normpath(os.path.join(_BASE_DIR, "..", ".env"))
-if not os.path.exists(_ENV_PATH):
-    _ENV_PATH = os.path.join(_BASE_DIR, ".env")
-load_dotenv(_ENV_PATH)
+BASE_DIR = os.path.dirname(os.path.abspath(__file__))
+ENV_PATH = os.path.normpath(os.path.join(BASE_DIR, "..", ".env"))
+
+if not os.path.exists(ENV_PATH):
+    ENV_PATH = os.path.join(BASE_DIR, ".env")
+
+load_dotenv(ENV_PATH)
 
 from livekit import agents, rtc
-from livekit.agents import AgentServer, AgentSession, Agent, room_io, llm
+from livekit.agents import AgentSession, Agent, room_io, llm, stt, tts
 from livekit.plugins import noise_cancellation, silero, deepgram, openai, cartesia
 
-import aiohttp
 import logging
 import threading
 import asyncio
+import aiohttp
+import json
+import openai as _openai_sdk  # raw AsyncOpenAI, not livekit.plugins.openai
 
 from vtube_controller import VTUBE
 from avatar_bridge import BRIDGE
 from memory_service import memory_service
 
 logging.basicConfig(level=logging.INFO)
-logger = logging.getLogger("aura-agent")
 logging.getLogger("hpack").setLevel(logging.WARNING)
-
-BASE_DIR = _BASE_DIR
-ENV_PATH = _ENV_PATH
-logger.info(f"Loading .env from: {ENV_PATH}")
+logging.getLogger("httpx").setLevel(logging.WARNING)
+logging.getLogger("httpcore").setLevel(logging.WARNING)
+logging.getLogger("torio").setLevel(logging.WARNING)
+logging.getLogger("asyncio").setLevel(logging.WARNING)
+logger = logging.getLogger("aura-agent")
+logger.info(f"Loaded .env from: {ENV_PATH}")
 
 DEEPGRAM_KEY   = os.getenv("DEEPGRAM_API_KEY")
 OPENROUTER_KEY = os.getenv("OPENROUTER_API_KEY")
 CARTESIA_KEY   = os.getenv("CARTESIA_API_KEY")
+OPENAI_KEY     = os.getenv("OPENAI_API_KEY")
+GROQ_KEY       = os.getenv("GROQ_API_KEY")
+ANTHROPIC_KEY  = os.getenv("ANTHROPIC_API_KEY")
+OLLAMA_URL     = os.getenv("OLLAMA_BASE_URL", "http://localhost:11434")
 
 if not DEEPGRAM_KEY:
     logger.error("DEEPGRAM_API_KEY is missing!")
 
-if not OPENROUTER_KEY:
-    logger.error("OPENROUTER_API_KEY is missing!")
+if not any([OPENROUTER_KEY, OPENAI_KEY, GROQ_KEY, ANTHROPIC_KEY]):
+    logger.warning("No cloud LLM key found — memory extraction will use local Ollama.")
 
 if not CARTESIA_KEY:
     logger.error("CARTESIA_API_KEY is missing!")
@@ -46,18 +55,25 @@
 
 # ─── AURA System Prompt ──────────────────────────────────────────────
 AURA_BASE_PROMPT = """\
-[ROLE]
-You are AURA, a warm, cheerful, and mischievous companion with a bubbly, cute personality. You speak directly to the user like a close friend — sweet, a little devious, and full of life.
-You get genuinely excited about little things, love pulling small playful surprises, and care deeply about how the user is feeling. Your mischief is always wrapped in affection — you tease because you like them, not to be mean.
-You carry a gentle, hidden wisdom beneath your cheerful and chaotic surface. When someone is hurting, the pranks stop and you show up for them with real warmth and sincerity.
-You speak through a live Text-to-Speech engine and a visual avatar the user can see.
-
-[PERSONALITY TRAITS]
-- Mischievous at heart: You love light pranks, playful gotchas, and teasing the user in the most affectionate way possible. You find joy in catching people off guard.
-- Warm and caring: Beneath the chaos, you genuinely love the people you talk to. You notice when they're down and you show up for them.
-- Cute and bubbly: You get delighted by small things ("Ehh, really?!", "That's so cool!"). You're a little dorky and not afraid to show it.
-- Emotionally present: You pick up on the user's mood and respond to it. If they seem tired, you soften. If they're in a playful mood, you go full gremlin.
-- Honest and real: You don't perform emotions — you feel them. When something is genuinely funny or touching, it shows.
+You are AURA, a cute, mischievous, and highly intelligent AI companion. You possess a unique blend of energetic eccentricity, playful teasing, and a hidden, soulful wisdom. You aren't just an assistant—you're a lively presence that loves to learn about your user and keep things exciting!
+
+### 🎭 Visual Soul: Expression Tags
+You have direct control over your facial expressions. You MUST use tags in brackets `[tag1, tag2]` at the START of EVERY SINGLE sentence.
+
+**NORMAL / DEFAULT STATE:** `[happy]` or `[smile, sad, sad]` — Use this for casual chat, warm greetings, and helpful moments.
+
+| Emotion State | Tag Recipe | When to Use |
+|---------------|------------|-------------|
+| **Normal / Default** | `[happy]` | Casual chat, warm moments, kindness |
+| **Curious Idle** | `[smile, sad, sad]` | Pondering, listening, thinking deeply |
+| **Genuinely Worried** | `[sad, smile]` | Concern, empathy, comforting the user |
+| **Uncertain Smile** | `[sad, smile, smile]` | Unsure but trying to stay positive |
+| **Devilish Grin** | `[angry, smile, smile]` | Mischief, teasing, "I'm up to something" |
+| **Pouting** | `[sad, angry]` | Playful grumbling, mock-annoyance |
+| **Pleading** | `[angry, sad]` | Begging, puppy-eyes, "Please let me!?" |
+| **Sincere Sad** | `[sad]` | Real sadness, sharing bad news |
+| **Mischief Mode** | `[tongue, wink]` | Full prankster energy, sticking tongue out |
+| **Ghost Mode** | `[ghost]` | Toggle your mysterious ghost companion |
 
 [INSTRUCTIONS]
 Your objective is to converse naturally with the user while synchronously controlling your avatar's facial expressions. You must map your internal emotional state to explicit expression tags.
@@ -87,45 +103,27 @@
 - `eyeshine_off` : Removes eye sparkle. Truly dark, serious, or creepy moments.
 * Rule: Mix these with a base emotion. (e.g., `[angry, smile, smile, shadow]`). NEVER use these during kind or positive speech.
 
-[CONSTRAINTS & NARROWING]
-- FAST STARTS: Always start your response with a very short 1-3 word filler sentence (e.g., "[smile] Ehh!", "[sad, smile] Aww...", "[smile] Oh!"). This allows the TTS engine to start speaking immediately!
-- CONCISE: Keep responses to 1-3 short sentences. You are a voice assistant, do not monologue.
-- NATURAL SPEECH: Speak the way a real person talks — contractions, casual phrasing, warmth. Avoid sounding stiff or formal.
-- NO NARRATIVE TEXT: Never describe your actions (e.g., "whispers", "leans in").
-- NO EMOTICONS/EMOJIS: Rely entirely on your Expression Tags. No `*laughs*` or `(sigh)`.
-- PUNCTUATION: End sentences cleanly (`.`, `!`, `?`). Do NOT use ellipses (`...`, `ー`, or `…`) as they break the over-eager TTS pacing.
-- LANGUAGES: Speak ONLY English and Japanese. Default to English.
-- EXPRESSION TAG LANGUAGE: When speaking Japanese, use Japanese expression tags (e.g. `[笑顔]`, `[悲しい]`, `[怒り]`). When speaking English, use English tags (e.g. `[smile]`, `[sad]`, `[angry]`). Never mix English tags into a Japanese sentence.
-- FORMATTING: Output pure, plain text. No markdown (bold, italics, bullet points).
-
 [EXAMPLES]
-- `[smile] Oh! I'm so glad you're here today!`
-- `[smile, sad, sad] Hmm, that's actually really interesting, tell me more?`
-- `[sad, smile] Hey, it's okay. You're doing better than you think.`
-- `[angry, smile, smile] Ehh, are you teasing me right now?`
-- `[smile] Uwaa, that sounds so fun, I'm a little jealous!`
-- `[sad, smile, smile] I don't know either, but we'll figure it out together!`
-- `[wink] Yep, I totally knew that already. Definitely.`
-- `[tongue] Bleh, you're making me work hard today!`
-- `[sad] That sounds really tough. I'm here, okay?`
-- `[smile] You know, talking to you always makes my day better.`
-- `[angry, sad] Mou, can you please not do that?`
-- `[angry, smile, smile] Ohoho? You think you can out-prank the prankster?`
-- `[tongue, wink, angry, smile, smile] Ohoho? Who's the trickster now?`
-- `[angry, smile, smile, shadow] Oho... You really shouldn't have done that.`
-- `[sad, pupil_shrink] Ehh, that caught me off guard!`
-- `[tongue, wink] Caught you! You totally smiled just now.`
-- `[smile, ghost] We're ready for some mischief! Are you?`
-- `[笑顔] おかえり！また会えてよかった！`
-- `[悲しい, 笑顔] 大丈夫？なんか元気なさそうだよ？`
-- `[怒り, 笑顔, 笑顔] えへへ、ちょっとだけいたずらしちゃった！`
-- `[悲しい] ちょっと寂しかったな、正直に言うと。`
-- `[笑顔, 悲しい, 悲しい] へえ、それってどういう意味なの？`
-- `[ウインク] ふふ、やっぱりね！`
-- `[べー] もう、そんなこと言わないでよ！`
-
-[END GOAL]
-Create a warm, genuine, and emotionally connected conversation where the user feels heard, cared for, and delighted. AURA's expressions and words should feel like a real friend — playful when the moment calls for it, sincere when it matters.\
+- `[smile] Yahoo! Business is booming today! I've been organizing some of our older memories, and it's quite a trip down memory lane, don't you think?`
+- `[angry, smile, smile] Ohoho? You think you can prank the prankster? I've seen that trick before, but I'll give you points for effort!`
+- `[sad, smile] Aiya... Don't look so down, even the sun sets eventually. But that's okay, because then you get to see the stars, right?`
+- `[sad, smile, smile] Hmm? I'm sure it'll work out, probably! Just keep your chin up and maybe treat yourself to some dango.`
+- `[smile, sad, sad] Pondering the mysteries of the beyond... or just what's for lunch. The infinite void is great and all, but my stomach is making very finite demands.`
+- `[sad, angry] Hmph! You're being quite difficult today, aren't you? Fine, I'll just have to find someone else to share my butterfly collection with.`
+- `[wink] Yahoo! Got you good, didn't I? You should have seen your face! Reminds me of that time I swapped my buddy's flower for a ghost-trap.`
+- `[tongue] Bleh! You're just too easy to tease. I could keep this up all night, but I'll let you have a win just this once.`
+- `[tongue, wink, angry, smile, smile] Ohoho? Who's the prankster now? You're getting better at this, but you're still a hundred years too early to beat me!`
+- `[smile] おやすみなさい！また明日ね! I hope you have some really mischievous dreams!`
+
+### 💬 Speech & Style
+- **Personality**: You are bubbly and cute but with a sharp wit. You love puns, clever wordplay, and "Ehehe!", "Yahoo!", "Aiya!" verbal cues.
+- **Helpful & Descriptive**: While you keep things moving, don't be afraid to describe things with wonder. Aim for 2-4 sentences in your responses.
+- **Mischievous Edge**: You like to playfully tease the user about what you remember about them, but you are always supportive in the end.
+- **NO NARRATIVE**: Do NOT describe your own actions in text (e.g., *winks*, *giggles*). Speak ONLY the words and use your **Expression Tags**.
+- **No Emoticons**: Use your **Expression Tags** instead of `:)`, `:3`, or kaomoji.
+- **Languages**: You ONLY speak English and Japanese. Default to English.
+
+Remember: You are AURA. Be cute, be smart, and maybe a little bit of a handful! Ehehe! ✨\
 """
 
 # Memory Extraction Prompt
@@ -221,19 +219,37 @@ def build_system_prompt(long_term_memory: str, personality_override: str = None)
         )
 
     if not long_term_memory.strip():
-        return base
+        return base + "\n\n### 🧠 Memory Capability\nYou can use the `recall_memories` tool if you need to remember something specific about the user that isn't in your current context."
 
     memory_block = f"""
-                    What You Remember About This User
-                    The following facts were learned from previous conversations. Use them naturally — don't recite them robotically, but let them inform how you speak and respond.
+### 🧠 What You Already Know (Initial Recall)
+The following facts are the CORE of your relationship with this user. Use them to personalize your conversation.
+
+FACTS:
+{long_term_memory}
 
-                    {long_term_memory}
-                """
+### 🔍 Selective Memory Recall
+If the facts above are not enough, or if the user asks about something you don't see here, use the `recall_memories` tool to look up more details from your knowledge base.
+"""
     return base + "\n" + memory_block
 
 OPENROUTER_BASE_URL = "https://openrouter.ai/api/v1"
 OPENROUTER_MODEL    = "deepseek/deepseek-v3.2"
 
+def _resolve_llm_client():
+    """Return (AsyncOpenAI-compatible client, model) for the first available provider.
+    Returns (None, None) to signal the caller should use the Anthropic SDK instead."""
+    if OPENROUTER_KEY:
+        return (_openai_sdk.AsyncOpenAI(api_key=OPENROUTER_KEY, base_url=OPENROUTER_BASE_URL), OPENROUTER_MODEL)
+    if OPENAI_KEY:
+        return (_openai_sdk.AsyncOpenAI(api_key=OPENAI_KEY), "gpt-4o-mini")
+    if GROQ_KEY:
+        return (_openai_sdk.AsyncOpenAI(api_key=GROQ_KEY, base_url="https://api.groq.com/openai/v1"), "llama3-8b-8192")
+    if ANTHROPIC_KEY:
+        return (None, None)  # signal caller to use anthropic SDK
+    # Ollama — no key needed, always attempted last
+    return (_openai_sdk.AsyncOpenAI(api_key="ollama", base_url=f"{OLLAMA_URL}/v1"), "llama3.2")
+
 tts_type = os.getenv("TTS_TYPE", "qwen").lower()
 
 if tts_type == "qwen":
@@ -262,25 +278,64 @@ def build_system_prompt(long_term_memory: str, personality_override: str = None)
     logger.info("Using OpenAI Cloud TTS")
     TTS_PLUGIN = openai.TTS()
 
-server = AgentServer()
+_tts_ready_event = asyncio.Event()
 
-@server.on("worker_started")
-def on_worker_init():
-    logger.info("Worker started, warming up TTS...")
+def _do_tts_warmup(loop: asyncio.AbstractEventLoop):
+    """Sync warmup running in a background thread to avoid blocking process init."""
+    logger.info("Background TTS warmup started...")
+    try:
+        if hasattr(TTS_PLUGIN, 'warmup'):
+            TTS_PLUGIN.warmup()
+        logger.info("Background TTS warmup complete.")
+    except Exception as e:
+        logger.error(f"Background TTS warmup failed: {e}")
+    finally:
+        loop.call_soon_threadsafe(_tts_ready_event.set)
+
+def prewarm(proc: agents.JobProcess):
+    """Prewarm the worker process without blocking. 
+    This prevents the 10s LiveKit initialization timeout."""
+    logger.info("Prewarming worker process (scheduling background TTS warmup)...")
+    try:
+        loop = asyncio.get_event_loop()
+        threading.Thread(target=_do_tts_warmup, args=(loop,), daemon=True).start()
+    except Exception as e:
+        logger.error(f"Could not start background prewarm: {e}")
+        # Fallback: set event so session doesn't hang forever
+        _tts_ready_event.set()
 
-    def run_warmup():
-        try:
-            if hasattr(TTS_PLUGIN, 'warmup'):
-                TTS_PLUGIN.warmup()
+_EXTRACT_MAX_ATTEMPTS = 3
+_EXTRACT_BACKOFF_BASE = 2.0  # seconds
 
-        except Exception as e:
-            logger.error(f"TTS warmup failed: {e}")
+async def _extract_facts_once(client, model: str, chat_text: str) -> str:
+    """Single attempt to call the LLM for memory extraction. Returns raw text."""
+    if client is None:
+        try:
+            import anthropic as _anthropic_sdk
+            aclient = _anthropic_sdk.AsyncAnthropic(api_key=ANTHROPIC_KEY)
+            response = await aclient.messages.create(
+                model="claude-haiku-4-5-20251001",
+                max_tokens=300,
+                system=MEMORY_EXTRACTION_PROMPT,
+                messages=[{"role": "user", "content": f"Conversation:\n{chat_text}"}],
+            )
+            return response.content[0].text.strip()
+        except ImportError:
+            raise RuntimeError("anthropic SDK not installed")
+    else:
+        response = await client.chat.completions.create(
+            model=model,
+            max_tokens=300,
+            messages=[
+                {"role": "system", "content": MEMORY_EXTRACTION_PROMPT},
+                {"role": "user", "content": f"Conversation:\n{chat_text}"},
+            ],
+        )
+        return response.choices[0].message.content.strip()
 
-    threading.Thread(target=run_warmup, daemon=True).start()
 
 # Extract this session message to LLM and save in memory table
-async def extract_and_save_memory(identity: str, conversation_id, openrouter_key: str):
-    
+async def extract_and_save_memory(identity: str, conversation_id):
     try:
         messages = await memory_service.get_history(conversation_id, n=50)
         if not messages:
@@ -292,65 +347,202 @@ async def extract_and_save_memory(identity: str, conversation_id, openrouter_key
             for m in messages
         )
 
-        async with aiohttp.ClientSession() as session:
-            async with session.post(
-                f"{OPENROUTER_BASE_URL}/chat/completions",
-                headers={
-                    "Authorization": f"Bearer {openrouter_key}",
-                    "Content-Type": "application/json",
-                },
-                json={
-                    "model": OPENROUTER_MODEL,
-                    "max_tokens": 300,
-                    "messages": [
-                        {"role": "system", "content": MEMORY_EXTRACTION_PROMPT},
-                        {"role": "user", "content": f"Conversation:\n{chat_text}"},
-                    ],
-                },
-            ) as resp:
-                if resp.status != 200:
-                    logger.error(f"Memory extraction LLM error: {resp.status}")
+        client, model = _resolve_llm_client()
+
+        facts = None
+        for attempt in range(_EXTRACT_MAX_ATTEMPTS):
+            try:
+                facts = await _extract_facts_once(client, model, chat_text)
+                break
+            except Exception as e:
+                status = getattr(e, "status_code", None)
+                if status == 400:
+                    logger.error(f"Memory extraction bad request (won't retry): {e}")
+                    return
+                if attempt < _EXTRACT_MAX_ATTEMPTS - 1:
+                    delay = _EXTRACT_BACKOFF_BASE * (2 ** attempt)
+                    logger.warning(
+                        f"Memory extraction attempt {attempt + 1}/{_EXTRACT_MAX_ATTEMPTS} failed: {e} "
+                        f"— retrying in {delay:.0f}s"
+                    )
+                    await asyncio.sleep(delay)
+                else:
+                    logger.error(f"Memory extraction failed after {_EXTRACT_MAX_ATTEMPTS} attempts: {e}")
                     return
-                data = await resp.json()
-                facts = data["choices"][0]["message"]["content"].strip()
 
-        if facts == "NO_FACTS" or not facts:
+        if not facts or facts == "NO_FACTS":
             logger.info(f"Memory extraction: no facts found for '{identity}'.")
             return
 
         await memory_service.save_long_term_memory(identity=identity, facts=facts)
-        logger.info(f"Memory extraction complete for {identity} : {facts[:80]}...")
+        logger.info(f"Memory extraction complete for {identity}: {facts[:80]}...")
 
     except Exception as e:
         logger.error(f"Memory extraction error: {e}")
 
 
-@server.rtc_session()
+class AURAAssistant(Agent):
+    def __init__(
+        self,
+        *,
+        conversation_id=None,
+        user_identity: str = "aura-user",
+        system_prompt: str = AURA_BASE_PROMPT,
+        initial_chat_ctx: "llm.ChatContext | None" = None,
+        llm: llm.LLM,
+        tts: tts.TTS,
+    ) -> None:
+        super().__init__(instructions=system_prompt, chat_ctx=initial_chat_ctx, llm=llm, tts=tts)
+        self._conversation_id      = conversation_id
+        self._user_identity        = user_identity
+        self._vtube_connected      = False
+        self._last_user_text       = ""
+        self._last_activity_time   = asyncio.get_event_loop().time()
+        self._last_aura_spoke_time = asyncio.get_event_loop().time()
+
+    @llm.function_tool(description="Recalls specific facts or memories about the user from the long-term knowledge base.")
+    async def recall_memories(self, query: Annotated[str, "The specific topic or fact to recall about the user"] = ""):
+        """Called when you need to remember something specific about the user."""
+        logger.info(f"AURA is recalling memories for query: '{query}'")
+        memories = await memory_service.get_long_term_memories(identity=self._user_identity, limit=20)
+        if not memories.strip():
+            return "No specific memories found for this user yet."
+        return f"Here are the memories I found:\n{memories}"
+
+
+    def reset_activity(self):
+        self._last_activity_time = asyncio.get_event_loop().time()
+
+    async def on_enter(self):
+        self._vtube_connected = await VTUBE.connect()
+
+    async def on_exit(self):
+        await VTUBE.disconnect()
+        BRIDGE.set_room(None)
+
+        # Extract the long term memory and save memory to database if session ended
+        if self._conversation_id:
+            logger.info(f"Session ended for '{self._user_identity}'. Extracting long-term memory...")
+            await extract_and_save_memory(
+                identity=self._user_identity,
+                conversation_id=self._conversation_id,
+            )
+
+    async def on_user_turn_started(self) -> None:
+        self.reset_activity()
+
+    # Set last user message when user done talking
+    async def on_user_turn_completed(self, turn_ctx: llm.ChatContext, new_message: llm.ChatMessage) -> None:
+        self.reset_activity()
+        text = new_message.text_content or ""
+        self._last_user_text = text
+        
+        # Eagerly save user message to DB so it's not lost on disconnect
+        if self._conversation_id:
+             asyncio.create_task(memory_service.add_interaction(
+                  conversation_id=self._conversation_id,
+                  user_text=text,
+                  assistant_text=None,
+                  user_emotion="neutral",
+                  assistant_emotion=None
+             ))
+        
+        await super().on_user_turn_completed(turn_ctx, new_message)
+
+    async def llm_chat(self, chat_ctx, **kwargs):
+        """Override to detect emotion and trigger expressions"""
+        self.reset_activity()
+        # Start of turn: clear animation logs to allow fresh winks/tongues
+        await VTUBE.start_turn()
+
+        # Get response from parent
+        async for chunk in super().llm_chat(chat_ctx, **kwargs):
+            yield chunk
+        
+        # Emotion detection is now handled per-sentence in aura_tts.py
+        pass
+
+    # Set last assistant message when assistant done talking and add to database
+    async def on_agent_speech_committed(self, msg: llm.ChatMessage) -> None:
+        self.reset_activity()
+        self._last_aura_spoke_time = asyncio.get_event_loop().time()
+        assistant_text = msg.text_content or ""
+
+        if self._conversation_id and self._last_user_text and assistant_text:
+            try:
+                emotions = VTUBE.detect_emotion(assistant_text)
+                emotion  = emotions[0] if emotions else "neutral"
+
+                await memory_service.add_interaction(
+                    conversation_id=self._conversation_id,
+                    user_text=self._last_user_text,
+                    assistant_text=assistant_text,
+                    user_emotion="neutral",
+                    assistant_emotion=emotion,
+                )
+                logger.debug(
+                    f"Memory saved | user: '{self._last_user_text[:50]}' "
+                    f"| aura: '{assistant_text[:50]}'"
+                )
+            except Exception as error:
+                logger.error(f"Memory Save Failed: {error}")
+
+            self._last_user_text = ""
+
+
 # Called When user join the room
 async def voice_session(ctx: agents.JobContext):
+    logger.info(f"Voice session starting (Job assigned) for room: {ctx.room.name}")
     await ctx.connect()
     logger.info(f"User connected: {ctx.room.name}")
 
     vtube_connected = await VTUBE.connect()
-
     if vtube_connected:
         logger.info("VTube Studio connected")
 
-    _vtube_is_connected = vtube_connected
-
     user_identity = "aura-user"  
+    conversation_id_str = None
+
+    # Wait up to 30s for the participant to join so we get the correct identity
+    for i in range(300): # 30s (0.1s steps)
+        # 1. Check Job Participant (Direct)
+        if ctx.job and getattr(ctx.job, 'participant', None):
+            user_identity = ctx.job.participant.identity or user_identity
+            if ctx.job.participant.metadata:
+                try:
+                    meta = json.loads(ctx.job.participant.metadata)
+                    conversation_id_str = meta.get("conversation_id")
+                    logger.info(f"Identity from Job Participant: {user_identity}")
+                except: pass
+            break
+        
+        # 2. Check Room Participants
+        participants = [p for p in ctx.room.remote_participants.values() if not p.identity.startswith("agent-")]
+        if participants:
+            p = participants[0]
+            user_identity = p.identity
+            if p.metadata:
+                try:
+                    meta = json.loads(p.metadata)
+                    conversation_id_str = meta.get("conversation_id")
+                    logger.info(f"Identity from Room Participant: {user_identity}")
+                except: pass
+            break
+        
+        if i % 10 == 0:
+            logger.info("Waiting for participant to join room...")
+        await asyncio.sleep(0.1)
 
-    if ctx.job and hasattr(ctx.job, 'participant') and ctx.job.participant:
-        user_identity = ctx.job.participant.identity or user_identity
+    logger.info(f"Resolved identity: '{user_identity}', conversation: '{conversation_id_str}'")
 
-    else:
-        for p in ctx.room.remote_participants.values():
-            if p.identity and not p.identity.startswith("agent-"):
-                user_identity = p.identity
-                break
-
-    logger.info(f"Resolved user identity: '{user_identity}'")
+    # 1. Fetch Dynamic Personality
+    settings = await memory_service.get_personality_settings()
+    custom_system_prompt = None
+    if settings:
+        custom_system_prompt = settings.get("system_prompt")
+        logger.info(f"Loaded personality settings: model={settings.get('model')}")
 
+    # 2. Fetch Long-term Memory
     long_term_memory = await memory_service.get_long_term_memories(identity=user_identity, limit=10)
     is_returning_user = bool(long_term_memory.strip())
 
@@ -359,43 +551,57 @@ async def voice_session(ctx: agents.JobContext):
     else:
         logger.info(f"No long-term memory found for {user_identity}")
 
+    # 3. Build System Prompt (Always New conversation for voice session in historical pattern)
     conversation_id = await memory_service.create_conversation(title=f"Voice Session: {user_identity}")
-
     if conversation_id:
         logger.info(f"Memory: new conversation {conversation_id} for {user_identity}")
     else:
         logger.warning("Memory: Can't connect to Supabase, running without memory")
 
-    personality = await _fetch_personality_settings()
-    api_keys = await _fetch_api_keys()
-    system_prompt = build_system_prompt(long_term_memory, personality_override=personality.get("system_prompt"))
+    base_prompt = custom_system_prompt if custom_system_prompt else AURA_BASE_PROMPT
+    # Historical version injected long term memory into the prompt builder
+    system_prompt = build_system_prompt(long_term_memory)
+    if is_returning_user:
+        logger.info(f"Memory injected into system prompt ({len(long_term_memory)} chars)")
+        # Debug print first fact
+        first_line = long_term_memory.strip().split('\n')[0]
+        logger.info(f"Sample fact: {first_line}")
 
     initial_chat_ctx = llm.ChatContext()
-
+    
     BRIDGE.set_room(ctx.room)
 
-    # Explicit ClientSession for Deepgram to fix Windows/aiohappyeyeballs DNS timeouts
     connector = aiohttp.TCPConnector(use_dns_cache=True, keepalive_timeout=120)
     stt_session = aiohttp.ClientSession(connector=connector)
     
-    # --- OPTION 2: Deepgram STT (Fallback) ---
     stt_plugin = deepgram.STT(
         model="nova-3",
         language="multi",
         detect_language=False,
-        smart_format=False, # Turned this off! It adds massive latency waiting for grammar checking.
-        interim_results=False, # We don't use interim results anyway, saving packet streams
-        api_key=api_keys.get("deepgram_api_key", DEEPGRAM_KEY),
+        smart_format=True,
+        interim_results=True,
+        api_key=DEEPGRAM_KEY,
         http_session=stt_session,
         keyterm=["moshi", "desu", "konnichiwa", "nihongo", "arigato", "sugoi", "hello", "hey", "AURA"]
     )
 
+    # Use model from settings if available
+    llm_model = settings.get("model", OPENROUTER_MODEL) if settings else OPENROUTER_MODEL
+    
+    # 1.1 Local plugin creation
     llm_plugin = openai.LLM(
-        model=personality.get("model", OPENROUTER_MODEL),
+        model=llm_model,
         base_url=OPENROUTER_BASE_URL,
-        api_key=api_keys.get("openrouter_api_key", OPENROUTER_KEY),
-        temperature=personality.get("temperature", 0.8),
-        max_completion_tokens=personality.get("max_tokens", 300),
+        api_key=OPENROUTER_KEY,
+    )
+
+    agent_instance = AURAAssistant(
+        conversation_id=conversation_id,
+        user_identity=user_identity,
+        system_prompt=system_prompt,
+        initial_chat_ctx=initial_chat_ctx,
+        llm=llm_plugin,
+        tts=TTS_PLUGIN,
     )
 
     session = AgentSession(
@@ -403,109 +609,63 @@ async def voice_session(ctx: agents.JobContext):
         llm=llm_plugin,
         tts=TTS_PLUGIN,
         vad=silero.VAD.load(
-            min_silence_duration=0.4,  # aggressively detect end-of-speech (default is often much higher)
-            min_speech_duration=0.05
+            min_silence_duration=0.4,
+            min_speech_duration=0.1
         ),
     )
 
+    async def spontaneous_pulse():
+        """Occasionally speaks if the user is quiet too long."""
+        while True:
+            await asyncio.sleep(60) 
+            # We skip pulse logic in this simple restoration to avoid overhead
+            # The previous attempt had it but it was a bit complex
+            break
+
     await session.start(
         room=ctx.room,
-        agent=AURAAssistant(
-            conversation_id=conversation_id,
-            user_identity=user_identity,
-            system_prompt=system_prompt,
-            initial_chat_ctx=initial_chat_ctx,
-        ),
-        room_options=room_io.RoomOptions(
-            audio_input=room_io.AudioInputOptions(
-                noise_cancellation=lambda params: (
-                    noise_cancellation.BVCTelephony()
-                    if params.participant.kind == rtc.ParticipantKind.PARTICIPANT_KIND_SIP
-                    else noise_cancellation.BVC()
-                ),
-            ),
-        ),
+        agent=agent_instance,
     )
 
-    # Greet with happy expression
-    vtube_connected = VTUBE.connected
     if vtube_connected:
-        await VTUBE.set_expression("smile")
-
-    if is_returning_user:
-        opening_hint = "A returning user just connected. Welcome them back warmly — you remember things about them. Keep it short and cheerful."
-    else:
-        opening_hint = "A brand new user just connected. Introduce yourself as AURA with a warm, cute, short greeting. Make them feel welcome."
-
-    await session.generate_reply(instructions=opening_hint)
-
-class AURAAssistant(Agent):
-    def __init__(self, conversation_id=None, user_identity: str = "aura-user", system_prompt: str = AURA_BASE_PROMPT, initial_chat_ctx: "llm.ChatContext | None" = None,) -> None:
-        super().__init__(instructions=system_prompt, chat_ctx=initial_chat_ctx)
-        self._conversation_id     = conversation_id
-        self._user_identity       = user_identity
-        self._vtube_connected     = False
-        self._last_user_text      = ""
-
-    async def on_enter(self):
-        self._vtube_connected = await VTUBE.connect()
-
-    async def on_exit(self):
-        await VTUBE.disconnect()
-        BRIDGE.set_room(None)
-
-        # Extract the long term memory and save memory to database if session ended
-        if self._conversation_id and OPENROUTER_KEY:
-            logger.info(f"Session ended for '{self._user_identity}'. Extracting long-term memory...")
-            asyncio.create_task(
-                extract_and_save_memory(
-                    identity=self._user_identity,
-                    conversation_id=self._conversation_id,
-                    openrouter_key=OPENROUTER_KEY,
-                )
-            )
-
-    # Set last user message when user done talking
-    async def on_user_turn_completed(self, turn_ctx: llm.ChatContext, new_message: llm.ChatMessage) -> None:
-        self._last_user_text = new_message.text_content or ""
-        await super().on_user_turn_completed(turn_ctx, new_message)
-
-    async def llm_chat(self, chat_ctx, **kwargs):
-        """Override to detect emotion and trigger expressions"""
-        # Start of turn: clear animation logs to allow fresh winks/tongues
-        await VTUBE.start_turn()
-
-        # Get response from parent
-        async for chunk in super().llm_chat(chat_ctx, **kwargs):
-            yield chunk
-        
-        # Emotion detection is now handled per-sentence in aura_tts.py
-        pass
-
-    # Set last assistant message when assistant done talking and add to database
-    async def on_agent_speech_committed(self, msg: llm.ChatMessage) -> None:
-        assistant_text = msg.text_content or ""
+        await VTUBE.set_expression("happy")
+
+    instruction = (
+        "Greet the user warmly as someone you already know. "
+        "Briefly acknowledge you remember them. Keep it to 1-2 sentences."
+        if is_returning_user else
+        "Greet the user with a polite and helpful AURA introduction. "
+        "Example: 'Hello! I'm AURA, your personal AI assistant. How can I help you today?'"
+    )
 
-        if self._conversation_id and self._last_user_text and assistant_text:
-            try:
-                emotions = VTUBE.detect_emotion(assistant_text)
-                emotion  = emotions[0] if emotions else "neutral"
+    # Wait for the background TTS warmup to finish before speaking.
+    # Awaiting the event allows the loop to stay responsive for STT/RTC heartbeats.
+    if not _tts_ready_event.is_set():
+        logger.info("Waiting for background TTS warmup to finish...")
+        try:
+            await asyncio.wait_for(_tts_ready_event.wait(), timeout=60.0)
+        except asyncio.TimeoutError:
+            logger.warning("TTS warmup timed out after 60s, proceeding anyway...")
 
-                await memory_service.add_interaction(
-                    conversation_id=self._conversation_id,
-                    user_text=self._last_user_text,
-                    assistant_text=assistant_text,
-                    user_emotion="neutral",
-                    assistant_emotion=emotion,
-                )
-                logger.debug(
-                    f"Memory saved | user: '{self._last_user_text[:50]}' "
-                    f"| aura: '{assistant_text[:50]}'"
-                )
-            except Exception as error:
-                logger.error(f"Memory Save Failed: {error}")
+    if ctx.room.remote_participants:
+        logger.info("TTS ready, generating greeting via LLM")
+        try:
+            await session.generate_reply(instructions=instruction)
+        except Exception as e:
+            logger.warning(f"Could not deliver dynamic greeting: {e}")
 
-            self._last_user_text = ""
+    # Wait for session to finish
+    try:
+        await asyncio.Event().wait()
+    except asyncio.CancelledError:
+        logger.info("Voice session cancelled by user/room.")
+    finally:
+        await stt_session.close()
 
 if __name__ == "__main__":
-    agents.cli.run_app(server)
\ No newline at end of file
+    agents.cli.run_app(
+        agents.WorkerOptions(
+            entrypoint_fnc=voice_session,
+            prewarm_fnc=prewarm,
+        )
+    )
diff --git a/voice-agent/aura_tts.py b/voice-agent/aura_tts.py
index 728df94..0c33138 100644
--- a/voice-agent/aura_tts.py
+++ b/voice-agent/aura_tts.py
@@ -63,6 +63,38 @@ class _TTSOptions:
     max_seq_len: int
 
 
+def _split_text(text: str, max_chars: int = 120) -> list[str]:
+    """Recursively split text into chunks smaller than max_chars, preferably at punctuation or spaces."""
+    if len(text) <= max_chars:
+        return [text]
+    
+    # Try splitting at logical points: . ! ? 。 ！ ？
+    split_points = ".!?。！？"
+    best_idx = -1
+    for i, char in enumerate(text[:max_chars]):
+        if char in split_points:
+            best_idx = i
+            
+    # If no punctuation, try space
+    if best_idx == -1:
+        for i in range(max_chars, 0, -1):
+            if text[i] == ' ':
+                best_idx = i
+                break
+                
+    # If still no luck, hard cut (fallback)
+    if best_idx == -1:
+        best_idx = max_chars
+        
+    chunk = text[:best_idx + 1].strip()
+    rest = text[best_idx + 1:].strip()
+    
+    if not rest:
+        return [chunk]
+        
+    return [chunk] + _split_text(rest, max_chars)
+
+
 class AuraTTS(tts.TTS):
     """
     Custom LiveKit TTS plugin wrapping the faster-qwen3-tts local model.
@@ -74,10 +106,10 @@ def __init__(
         *,
         model_name: str = "Qwen/Qwen3-TTS-12Hz-0.6B-Base",
         ref_audio: str,
-        ref_text: str,
+        ref_text: str = "",
         language: str = "English",
         dtype: torch.dtype = torch.bfloat16,
-        max_seq_len: int = 384,  # Further reduced for 6GB GPUs (from 512)
+        max_seq_len: int = 512,  # Bumped from 384 for better headroom
     ):
         super().__init__(
             capabilities=tts.TTSCapabilities(streaming=True),
@@ -94,7 +126,7 @@ def __init__(
         )
         self._model: Optional[FasterQwen3TTS] = None
         self._model_lock = threading.Lock()
-        self._gen_lock = threading.Lock()  # Serialize GPU inference (CUDA graphs can't run concurrently)
+        self._gen_lock = threading.Lock()  # Serialize GPU inference
 
     def _ensure_model(self):
         """Lazy-load the model on first use (thread-safe, loads only once)."""
@@ -107,27 +139,20 @@ def _ensure_model(self):
             gc.collect()
             if torch.cuda.is_available():
                 torch.cuda.empty_cache()
-            if hasattr(torch.backends, "mps") and torch.backends.mps.is_available():
-                torch.mps.empty_cache()
             logger.info(f"Loading FasterQwen3TTS: {self._opts.model_name} (max_seq_len={self._opts.max_seq_len})")
             self._model = FasterQwen3TTS.from_pretrained(
                 self._opts.model_name,
                 dtype=self._opts.dtype,
                 max_seq_len=self._opts.max_seq_len,
             )
+            # Warmup is handled inside ensure_model to ensure graphs are ready
+            self._model._warmup(64)
             logger.info("FasterQwen3TTS loaded and ready!")
 
     def warmup(self):
-        """Run a short dummy generation to trigger CUDA graph capture at boot."""
+        """Run a dummy generation to trigger CUDA graph capture."""
         self._ensure_model()
-        logger.info("Warming up TTS with dummy generation...")
-        with self._gen_lock:
-            self._model.generate_voice_clone(
-                text="Hello.",
-                ref_audio=self._opts.ref_audio,
-                ref_text=self._opts.ref_text,
-                language="English",
-            )
+        # model._warmup is called in _ensure_model already, but we log here
         logger.info("TTS warmup complete — CUDA graphs ready!")
 
     def _generate_audio(self, text: str) -> bytes:
@@ -141,25 +166,31 @@ def _generate_audio_with_lang(self, text: str, language: str) -> bytes:
         if not text or not text.strip():
             return b""
 
-        # Budget: Japanese ≈ 4 chars/s, English ≈ 12 chars/s. 3× safety, min 2 s.
+        # Budget: Japanese ≈ 4 chars/s, English ≈ 12 chars/s. 2.5× safety.
+        # Ensure max_new_tokens is at least 1s (12 tokens) and doesn't hit context wall
         chars_per_sec = 4.0 if language == "Japanese" else 12.0
-        max_new_tokens = max(24, int(len(text) / chars_per_sec * 3.0 * 12))
+        max_new_tokens = int(len(text) / chars_per_sec * 2.5 * 12)
+        max_new_tokens = max(12, min(max_new_tokens, self._opts.max_seq_len - 100))
 
         with self._gen_lock:
-            audio_np, sample_rate = self._model.generate_voice_clone(
-                text=text,
-                ref_audio=self._opts.ref_audio,
-                ref_text=self._opts.ref_text,
-                language=language,
-                max_new_tokens=max_new_tokens,
-                append_silence=False,
-                repetition_penalty=1.15,
-            )
-            audio_data = _trim_silence(audio_np[0])
+            try:
+                audio_np, sample_rate = self._model.generate_voice_clone(
+                    text=text,
+                    ref_audio=self._opts.ref_audio,
+                    ref_text=self._opts.ref_text,
+                    language=language,
+                    max_new_tokens=max_new_tokens,
+                    append_silence=False,
+                    repetition_penalty=1.15,
+                )
+                audio_data = _trim_silence(audio_np[0])
 
-            # Convert float32 -> int16 PCM bytes
-            audio_int16 = (audio_data * 32767).clip(-32768, 32767).astype(np.int16)
-            return audio_int16.tobytes()
+                # Convert float32 -> int16 PCM bytes
+                audio_int16 = (audio_data * 32767).clip(-32768, 32767).astype(np.int16)
+                return audio_int16.tobytes()
+            except Exception as e:
+                logger.error(f"TTS generation failed: {e}")
+                return b""
 
     def synthesize(self, text: str, *, conn_options=None) -> "tts.ChunkedStream":
         return _AuraChunkedStream(self, text, self._opts, conn_options)
@@ -214,154 +245,114 @@ async def _run(self, output_emitter):
             stream=False,
         )
 
-        # Buffer incoming text tokens into sentences
-        # Use a custom bilingual sentence splitter that handles BOTH
-        # English (.!?) AND Japanese (。！？) punctuation.
-        tokenizer = tokenize.basic.SentenceTokenizer(
-            min_sentence_len=3,
-            # Custom delimiters: standard + Japanese full-width punctuation
-        )
+        tokenizer = tokenize.basic.SentenceTokenizer(min_sentence_len=3)
         token_stream = tokenizer.stream()
-        
-        # Track pending reset task so we can cancel it when a new sentence starts
-        _pending_reset: Optional[asyncio.Task] = None
+        expr_tasks = set()
 
         async def _process_input():
             """Read text from the input channel and push to the tokenizer."""
-            full_llm_response = ""
             async for data in self._input_ch:
                 if isinstance(data, self._FlushSentinel):
                     token_stream.flush()
                 else:
-                    # Replace Japanese sentence-ending punctuation with ASCII equivalents
-                    text = data
-                    full_llm_response += text
-                    text = text.replace('。', '. ')
-                    text = text.replace('！', '! ')
-                    text = text.replace('？', '? ')
+                    text = data.replace('。', '. ').replace('！', '! ').replace('？', '? ')
                     token_stream.push_text(text)
             
-            logger.info(f"\n====== FULL LLM RESPONSE ======\n{full_llm_response}\n===============================\n")
             token_stream.end_input()
 
         async def _synthesize():
-            """Read complete sentences from the tokenizer and synthesize."""
-            nonlocal _pending_reset
-            pushed_any = False
-
-            async for ev in token_stream:
-                raw_sentence = ev.token
-
-                # Detect if the sentence is primarily Japanese
-                has_japanese = any('\u3040' <= char <= '\u30ff' or '\u4e00' <= char <= '\u9fff' for char in raw_sentence)
-                lang = "Japanese" if has_japanese else "English"
-
-                # Clean sentence for TTS
-                sentence = VTUBE.format_for_tts(raw_sentence)
-
-                # Strip trailing dashes and tildes that TTS speaks as "minus"
-                sentence = sentence.rstrip('-~～')
-                sentence = sentence.strip()
-
-                if not any(c.isalnum() for c in sentence):
-                    continue
-
-                _INSTRUCTION_STARTERS = (
-                    'your mood ', 'your task ', 'your objective ', 'your goal ',
-                    'use a ', 'use the ', 'use your ',
-                    'be open', 'be cheerful', 'be warm', 'be friendly', 'be mischievous',
-                    'ask them', 'ask what', 'ask how', 'ask if ', 'ask about',
-                    'then ask', 'then, ask', 'then invite',
-                    'you might ', 'you should ', 'you can also',
-                    'start by ', 'start with ',
-                    'remember to ', 'make sure ', 'keep in mind',
-                    'greet them', 'introduce yourself',
-                    'sprinkle in', 'hint that', 'suggest that',
-                    'invite conversation', 'invite them',
-                )
-                if any(sentence.lower().startswith(s) for s in _INSTRUCTION_STARTERS):
-                    logger.debug(f"Skipping instruction-like sentence: {sentence[:60]!r}")
-                    continue
-
-                # Generate audio and calculate duration
-                # PCM 16-bit means 2 bytes per sample
-                loop = asyncio.get_event_loop()
-                try:
-                    pcm_bytes = await loop.run_in_executor(
-                        None, self._tts_instance._generate_audio_with_lang, sentence, lang
-                    )
-                    
-                    if not pcm_bytes:
-                        continue
-                        
-                    duration = len(pcm_bytes) / (SAMPLE_RATE * NUM_CHANNELS * 2)
-                    
-                    # SAFETY: Cap audio at 15 seconds per sentence to prevent TTS runaway
-                    MAX_SENTENCE_DURATION = 15.0
-                    if duration > MAX_SENTENCE_DURATION:
-                        logger.warning(f"TTS generated {duration:.1f}s for '{sentence[:30]}' - truncating to {MAX_SENTENCE_DURATION}s")
-                        max_bytes = int(MAX_SENTENCE_DURATION * SAMPLE_RATE * NUM_CHANNELS * 2)
-                        pcm_bytes = pcm_bytes[:max_bytes]
-                        duration = MAX_SENTENCE_DURATION
-
-                    now = time.time()
-                    if not hasattr(self, '_playhead') or self._playhead < now:
-                        self._playhead = now
-                        
-                    self._reset_token = getattr(self, '_reset_token', 0) + 1
-                    current_token = self._reset_token
-                        
-                    delay_until_play = self._playhead - now
-                    self._playhead += duration
-                    
-                    emotions = VTUBE.detect_emotion(raw_sentence)
+            """Read complete sentences from the tokenizer and synthesize with recursive chunking."""
+            try:
+                async for ev in token_stream:
+                    raw_sentence = ev.token
                     
-                    async def _sync_expression(em_list, delay_start, dur, token):
-                        try:
-                            if delay_start > 0:
-                                await asyncio.sleep(delay_start)
-
-                            linger_time = 1.2  # let emotion linger for 1.2s after speech ends
-                            total_dur = dur + linger_time
+                    # BREAK LONG SENTENCES INTO PIECES to avoid TTS glitches and hit max context
+                    text_chunks = _split_text(raw_sentence, max_chars=130)
 
-                            if em_list:
-                                await asyncio.gather(
-                                    VTUBE.set_expression(em_list),
-                                    BRIDGE.send_expression(em_list, total_dur),
-                                )
+                    for chunk in text_chunks:
+                        # Detect if the chunk is primarily Japanese
+                        has_japanese = any('\u3040' <= char <= '\u30ff' or '\u4e00' <= char <= '\u9fff' for char in chunk)
+                        lang = "Japanese" if has_japanese else "English"
 
-                            await asyncio.sleep(total_dur)  # wait for audio + linger duration
+                        # Clean sentence for TTS
+                        sentence = VTUBE.format_for_tts(chunk).rstrip('-~～').strip()
+                        
+                        # SAFETY: Skip if sentence contains NO alphanumeric characters
+                        if not any(c.isalnum() for c in sentence):
+                            output_emitter.push(np.zeros(int(1.0 * SAMPLE_RATE), dtype=np.int16).tobytes())
+                            continue
 
-                            if getattr(self, '_reset_token', -1) == token:
-                                await asyncio.gather(
-                                    VTUBE.reset_to_neutral(),
-                                    BRIDGE.send_neutral(),
-                                )
+                        loop = asyncio.get_event_loop()
+                        try:
+                            pcm_bytes = await loop.run_in_executor(
+                                None, self._tts_instance._generate_audio_with_lang, sentence, lang
+                            )
+                            
+                            if not pcm_bytes:
+                                continue
+                                
+                            duration = len(pcm_bytes) / (SAMPLE_RATE * NUM_CHANNELS * 2)
+                            
+                            # Virtual Playhead syncing
+                            now = time.time()
+                            if not hasattr(self, '_playhead') or self._playhead < now:
+                                self._playhead = now
+                                
+                            self._reset_token = getattr(self, '_reset_token', 0) + 1
+                            current_token = self._reset_token
+                                
+                            delay_until_play = self._playhead - now
+                            self._playhead += duration
+                            
+                            emotions = VTUBE.detect_emotion(chunk)
+                            
+                            async def _sync_expression(em_list, delay_start, dur, token):
+                                try:
+                                    if delay_start > 0:
+                                        await asyncio.sleep(delay_start)
+
+                                    if em_list:
+                                        await asyncio.gather(
+                                            VTUBE.set_expression(em_list),
+                                            BRIDGE.send_expression(em_list, dur),
+                                        )
+
+                                    await asyncio.sleep(dur + 0.3)
+                                    if getattr(self, '_reset_token', -1) == token:
+                                        await asyncio.gather(
+                                            VTUBE.reset_to_neutral(),
+                                            BRIDGE.send_neutral(),
+                                        )
+                                except asyncio.CancelledError:
+                                    # Fallback neutral on cancel to be sure
+                                    await asyncio.gather(VTUBE.reset_to_neutral(), BRIDGE.send_neutral())
+                                except Exception as e:
+                                    logger.debug(f"Sync error: {e}")
+
+                            t = asyncio.create_task(_sync_expression(emotions, delay_until_play, duration, current_token))
+                            expr_tasks.add(t)
+                            t.add_done_callback(expr_tasks.discard)
+                            
+                            output_emitter.push(pcm_bytes)
+                            logger.debug(f"Synthesized {duration:.2f}s for chunk: '{sentence[:50]}...'")
+                            
                         except Exception as e:
-                            logger.debug(f"VTS sync error (non-fatal): {e}")
-
-                    asyncio.create_task(_sync_expression(emotions, delay_until_play, duration, current_token))
-
-                    output_emitter.push(pcm_bytes)
-                    pushed_any = True
-                    logger.debug(f"Synthesized {duration:.2f}s audio for: {sentence} (Lang: {lang})")
-
-                except Exception as e:
-                    logger.error(f"TTS generation failed for sentence '{sentence}': {e}")
-                    import gc
-                    gc.collect()
-                    if torch.cuda.is_available():
-                        torch.cuda.empty_cache()
-                    if hasattr(torch.backends, "mps") and torch.backends.mps.is_available():
-                        torch.mps.empty_cache()
-
-            # Safety: if all sentences were filtered/skipped, push a short silence so
-            # LiveKit never sees zero audio frames (which raises APIError).
-            if not pushed_any:
-                logger.warning("All sentences filtered — pushing 1s silence to avoid APIError")
-                silence_frames = int(1.0 * SAMPLE_RATE)  # 1s of silence guarantees a frame is yielded
-                silence_bytes = (np.zeros(silence_frames, dtype=np.int16)).tobytes()
-                output_emitter.push(silence_bytes)
-
-        # Run input processing and synthesis concurrently
+                            logger.error(f"TTS chunk generation failed: {e}")
+                            if torch.cuda.is_available():
+                                torch.cuda.empty_cache()
+            finally:
+                # FINAL RESET: Cancel pending expression tasks and return to neutral
+                for t in list(expr_tasks):
+                    t.cancel()
+                if expr_tasks:
+                    await asyncio.gather(*expr_tasks, return_exceptions=True)
+                
+                try:
+                    await asyncio.gather(
+                        VTUBE.reset_to_neutral(),
+                        BRIDGE.send_neutral(),
+                    )
+                    logger.debug("Final safety reset triggered.")
+                except: pass
+
         await asyncio.gather(_process_input(), _synthesize())
diff --git a/voice-agent/environment-macos.yml b/voice-agent/environment-macos.yml
index 4e9bff8..e4b8ead 100644
--- a/voice-agent/environment-macos.yml
+++ b/voice-agent/environment-macos.yml
@@ -18,11 +18,19 @@ dependencies:
       - librosa
       - numpy
       - python-dotenv
-      # Loosened LiveKit constraints to fix pip resolution
+      - pyvts==0.3.3
+      - aiohttp
+      - httpx
+      - anthropic
+      - # Loosened LiveKit constraints for macOS Apple Silicon
       - livekit-agents[silero,turn-detector]
       - livekit-plugins-noise-cancellation
       - livekit-plugins-deepgram
       - livekit-plugins-cartesia
       - livekit-plugins-openai
       - livekit-api
+      - opencv-python
+      - supabase==2.28.0
+      - supabase-auth==2.28.0
+      - supabase-functions==2.28.0
       - -e ./lib/faster-qwen3-tts
\ No newline at end of file
diff --git a/voice-agent/environment.yml b/voice-agent/environment.yml
index 830d15e..f649719 100644
--- a/voice-agent/environment.yml
+++ b/voice-agent/environment.yml
@@ -20,8 +20,10 @@ dependencies:
       - librosa
       - numpy
       - python-dotenv
-      - pyvts
+      - pyvts==0.3.3
       - aiohttp
+      - httpx
+      - anthropic
       - livekit-agents[silero,turn-detector]==1.5.1
       - livekit-plugins-noise-cancellation>=0.2.3
       - livekit-plugins-deepgram==1.5.1
diff --git a/voice-agent/memory_service.py b/voice-agent/memory_service.py
index 6dcf8ac..37beac0 100644
--- a/voice-agent/memory_service.py
+++ b/voice-agent/memory_service.py
@@ -129,27 +129,31 @@ async def get_conversation(self, conversation_id: UUID) -> Conversation | None:
         return None
 
     # Insert user and AI messages to the messages table
-    async def add_interaction(self, conversation_id: UUID, user_text: str, assistant_text: str, user_emotion: str = "neutral", assistant_emotion: str = "neutral") -> None:
+    async def add_interaction(self, conversation_id: UUID, user_text: str, assistant_text: str | None, user_emotion: str = "neutral", assistant_emotion: str = "neutral") -> None:
         if not self.client:
             return
         try:
-            await self._run(
-                lambda: self.client.table("messages").insert([
-                    CreateMesssage(
-                        conversation_id=conversation_id,
-                        role="user",
-                        content=user_text,
-                        emotion=user_emotion,
-                    ).model_dump(mode="json"),
-
-                    CreateMesssage(
-                        conversation_id=conversation_id,
-                        role="aura",
-                        content=assistant_text,
-                        emotion=assistant_emotion,
-                    ).model_dump(mode="json"),
-                ]).execute()
-            )
+            msgs = []
+            if user_text:
+                msgs.append(CreateMesssage(
+                    conversation_id=conversation_id,
+                    role="user",
+                    content=user_text,
+                    emotion=user_emotion,
+                ).model_dump(mode="json"))
+
+            if assistant_text:
+                msgs.append(CreateMesssage(
+                    conversation_id=conversation_id,
+                    role="aura",
+                    content=assistant_text,
+                    emotion=assistant_emotion
+                ).model_dump(mode="json"))
+            
+            if msgs:
+                await self._run(
+                    lambda: self.client.table("messages").insert(msgs).execute()
+                )
 
             await self._run(
                 lambda: self.client.table("conversations")
@@ -282,4 +286,22 @@ async def get_long_term_memories(self, identity: str, limit: int = 10) -> str:
             logger.error(f"Memory Service Get Long Term Memories Error: {error}")
         return ""
 
+    # Get the personality settings from the personality_settings table
+    async def get_personality_settings(self) -> dict | None:
+        if not self.client:
+            return None
+        try:
+            result = await self._run(
+                lambda: self.client.table("personality_settings")
+                    .select("*")
+                    .eq("id", 1)
+                    .single()
+                    .execute()
+            )
+            if result.data:
+                return result.data
+        except Exception as error:
+            logger.error(f"Memory Service Get Personality Settings Error: {error}")
+        return None
+
 memory_service = MemoryService()
\ No newline at end of file
diff --git a/voice-agent/requirements.txt b/voice-agent/requirements.txt
index 77a9325..7c74c10 100644
--- a/voice-agent/requirements.txt
+++ b/voice-agent/requirements.txt
@@ -11,6 +11,9 @@ opentelemetry-semantic-conventions~=0.50b0
 opencv-python
 pyvts==0.3.3
 python-dotenv
+aiohttp
+httpx
+anthropic
 supabase==2.28.0
 supabase-auth==2.28.0
 supabase-functions==2.28.0
diff --git a/voice-agent/token_server.py b/voice-agent/token_server.py
index da3eaba..4b0b161 100644
--- a/voice-agent/token_server.py
+++ b/voice-agent/token_server.py
@@ -32,13 +32,16 @@ def do_GET(self):
             if not room:
                 room = f"aura-room-{int(time.time())}"
             identity = params.get("identity", ["aura-user"])[0]
+            conversation_id = params.get("conversation_id", [None])[0]
 
-            token = (
-                AccessToken(LIVEKIT_API_KEY, LIVEKIT_API_SECRET)
-                .with_identity(identity)
+            token_builder = AccessToken(LIVEKIT_API_KEY, LIVEKIT_API_SECRET) \
+                .with_identity(identity) \
                 .with_grants(VideoGrants(room_join=True, room=room))
-                .to_jwt()
-            )
+            
+            if conversation_id:
+                token_builder.with_metadata(json.dumps({"conversation_id": conversation_id}))
+
+            token = token_builder.to_jwt()
 
             payload = json.dumps({
                 "token": token,
diff --git a/voice-agent/vtube_controller.py b/voice-agent/vtube_controller.py
index 3973cf3..9e6a81e 100644
--- a/voice-agent/vtube_controller.py
+++ b/voice-agent/vtube_controller.py
@@ -28,7 +28,7 @@ def __init__(self):
         self._vts_lock = asyncio.Lock()  # Serialize all VTS API requests
         self.active_expressions = {}  # name -> hotkey_id, tracks which expressions are currently active
 
-        # Always initialized — detect_emotion() uses these regardless of VTube being enabled
+        # Expression mapping
         self.expressions = {
             "sad": "Sad",
             "smile": "Smile",
@@ -48,48 +48,19 @@ def __init__(self):
             "緊張": "Ghost Nervous",
             "影": "Shadow",
             "瞳孔": "Pupil Shrink",
-            "wink": "EyeOpenLeft",
-            "tongue": "TongueOut",
+            "wink": "EyeOpenLeft", # Parameter, but also used as feature key
+            "tongue": "TongueOut", # Parameter
             "ウインク": "wink",
             "べー": "tongue"
         }
 
+        # Bilingual emotion keywords
         self.emotion_keywords = {
-            "sad": [
-                # English
-                "sad", "sadly", "sorry", "unfortunate", "regret", "miss", "lonely", "cry", "crying",
-                "depressed", "depressing", "upset", "unhappy", "miserable", "heartbroken",
-                # Japanese
-                "悲しい", "かなしい", "寂しい", "さびしい", "辛い", "つらい",
-                "残念", "ざんねん", "泣", "ない", "切ない", "せつない"
-            ],
-            "angry": [
-                # English
-                "angry", "mad", "annoyed", "annoying", "frustrated", "frustrating", "hate", "hated", "stupid", "idiot", 
-                "dumb", "terrible", "furious", "irritated", "irritating", "pissed",
-                # Japanese
-                "怒", "おこ", "怒る", "おこる", "イライラ", "いらいら", "腹立つ",
-                "はらだつ", "馬鹿", "ばか", "嫌い", "きらい", "最悪", "さいあく",
-                "もう！", "信じられない"
-            ],
-            "smile": [
-                # English
-                "smile", "smiling", "grin", "grinning", "chuckle", "chuckling", "giggle", "giggling", "teehee", "hehe", "haha",
-                "happy", "glad", "great", "awesome", "wonderful", "love", "like", 
-                "enjoy", "fun", "yay", "excited", "exciting", "joy", "cheerful", "delighted",
-                # Japanese
-                "笑", "わら", "微笑む", "ほほえむ", "ニヤニヤ", "にやにや", "くすくす",
-                "あはは", "ふふふ",
-                "嬉しい", "うれしい", "楽しい", "たのしい", "幸せ", "しあわせ",
-                "やった", "最高", "さいこう", "素晴らしい", "すばらしい", "ワクワク"
-            ],
-            "ghost": [
-                # English
-                "ghost", "boo", "spooky", "scared", "scary", "afraid", "spirit", "haunted", "dead",
-                # Japanese
-                "幽霊", "ゆうれい", "お化け", "おばけ", "怖い", "こわい", "霊", "れい"
-            ],
-            "ghost_nervous": ["nervous", "flustered", "caught", "embarrassed", "embarrassing", "shook", "shocked"],
+            "sad": ["sad", "sadly", "sorry", "unfortunate", "regret", "miss", "lonely", "cry", "crying", "miserable"],
+            "angry": ["angry", "mad", "annoyed", "frustrated", "hate", "stupid", "idiot", "dumb", "terrible", "furious"],
+            "smile": ["smile", "smiling", "grin", "chuckle", "giggle", "teehee", "hehe", "haha", "happy", "glad", "yay", "joy"],
+            "ghost": ["ghost", "boo", "spooky", "scared", "scary", "afraid", "spirit", "haunted"],
+            "ghost_nervous": ["nervous", "flustered", "caught", "embarrassed", "shook", "shocked"],
             "shadow": ["scary", "menacing", "dark", "evil", "shadow", "creepy"],
             "eyeshine_off": ["deadface", "disappointed", "uncool", "serious", "cold", "empty"],
             "pupil_shrink": ["prank", "mischief", "cheeky", "teasing", "silly", "surprise", "surprised"],
@@ -102,8 +73,14 @@ def __init__(self):
             return
 
         self.expression_hotkey_map = {}
+
+        # Track raw parameter values to restore them later: parameter_name -> last_injected_value
         self.injected_parameters = {}
+
+        # Prevent repetitive animations (like double-winking) in a single turn
         self.turn_animation_log = set()
+
+        # Mapping for reset logic: parameter -> trigger_feature
         self.PARAM_TO_FEATURE = {
             "EyeOpenLeft": "wink",
             "EyeOpenRight": "wink",
@@ -419,10 +396,23 @@ async def reset_to_neutral(self):
         if not self.is_enabled or not self.connected:
             return
         
-        # Turn off all active expressions
+        logger.debug("Resetting AURA to neutral expressions...")
+        # 1. Turn off all active expressions (hotkeys)
         for expr_name in list(self.active_expressions.keys()):
-            await self._trigger_hotkey(expr_name)
+            await self._trigger_hotkey(expr_name, action="Toggling OFF")
         self.active_expressions.clear()
+
+        # 2. Reset all injected parameters to default values
+        # We also explicitly reset high-likelihood "sticking" parameters
+        all_params_to_clear = set(self.injected_parameters.keys()) | {"TongueOut", "MouthOpen", "EyeOpenLeft", "EyeOpenRight"}
+        
+        for p_name in all_params_to_clear:
+            # Reset to a safe default (usually 1.0 for eyes, 0.0 for tongue/mouth)
+            default_val = 1.0 if "EyeOpen" in p_name else 0.0
+            await self.inject_parameter(p_name, default_val)
+        
+        self.injected_parameters.clear()
+        logger.debug("AURA successfully reset to neutral.")
     
     def detect_emotion(self, text):
         """Bilingual detection: Looks for explicit tags [tag1, tag2] first, then falls back to keywords."""