From 5562ed2f889e339120971e3224e9f1049bbcb861 Mon Sep 17 00:00:00 2001 From: Raygama Date: Thu, 9 Apr 2026 20:00:13 +0700 Subject: [PATCH 1/3] Add provider abstraction, settings API & keys Introduce a Provider Abstraction Layer and settings management: add provider registry and provider adapters (OpenAI-compatible and Anthropic), with retry/fallback logic and provider inference. - Add settings_service (Supabase-backed) and FastAPI /api/v1/settings routes to get/update settings and API keys; wire settings router into app. Refactor LLMService to delegate to provider_registry. - Enhance memory_service to support OpenAI, OpenRouter and local Ollama embeddings and detect Ollama runtime. - Update prompter to respect admin/system_prompt from settings. - Add dashboard ApiKeys UI, test fixtures, and minor model/asset tweaks. Also add new env variables to .env.example for Anthropic, Groq and Ollama. --- .env.example | 5 + ai-service/app/api/v1/settings.py | 62 +++ ai-service/app/core/config.py | 3 + ai-service/app/main.py | 2 + ai-service/app/services/llm.py | 93 +--- ai-service/app/services/memory_service.py | 40 +- ai-service/app/services/prompter.py | 45 +- ai-service/app/services/providers/__init__.py | 3 + .../services/providers/anthropic_provider.py | 183 +++++++ ai-service/app/services/providers/base.py | 147 ++++++ .../app/services/providers/openai_compat.py | 183 +++++++ ai-service/app/services/providers/registry.py | 245 +++++++++ ai-service/app/services/settings_service.py | 82 +++ ai-service/tests/__init__.py | 0 ai-service/tests/conftest.py | 71 +++ ai-service/tests/providers/__init__.py | 0 .../public/models/hutao/Hu Tao.model3.json | 12 +- dashboard/src/components/ApiKeys.jsx | 142 +++++ dashboard/src/components/AvatarRenderer.jsx | 483 +++++++++++------- .../src/components/AvatarRenderer.test.jsx | 205 +++++--- dashboard/src/components/CallOverlay.jsx | 28 +- dashboard/src/components/PersonalityTuner.jsx | 138 ++++- voice-agent/agent.py | 421 +++++++++------ voice-agent/aura_tts.py | 259 +++++----- voice-agent/vtube_controller.py | 56 +- 25 files changed, 2200 insertions(+), 708 deletions(-) create mode 100644 ai-service/app/api/v1/settings.py create mode 100644 ai-service/app/services/providers/__init__.py create mode 100644 ai-service/app/services/providers/anthropic_provider.py create mode 100644 ai-service/app/services/providers/base.py create mode 100644 ai-service/app/services/providers/openai_compat.py create mode 100644 ai-service/app/services/providers/registry.py create mode 100644 ai-service/app/services/settings_service.py create mode 100644 ai-service/tests/__init__.py create mode 100644 ai-service/tests/conftest.py create mode 100644 ai-service/tests/providers/__init__.py create mode 100644 dashboard/src/components/ApiKeys.jsx diff --git a/.env.example b/.env.example index 57f76b7..35bbb00 100644 --- a/.env.example +++ b/.env.example @@ -2,6 +2,11 @@ # OpenRouter API Key - Get yours at https://openrouter.ai/keys OPENROUTER_API_KEY=your_openrouter_key_here +# Optional: Direct provider keys (used when provider != openrouter) +ANTHROPIC_API_KEY= # Required for claude-* models +GROQ_API_KEY= # Required for Groq provider (fast Llama/Mixtral) +OLLAMA_BASE_URL=http://localhost:11434 # Local Ollama endpoint + # --- VOICE CONFIGURATION (LIVEKIT AGENTS) --- # Deepgram API Key (STT) - Get yours at https://console.deepgram.com/ DEEPGRAM_API_KEY=your_deepgram_key_here diff --git a/ai-service/app/api/v1/settings.py b/ai-service/app/api/v1/settings.py new file mode 100644 index 0000000..77941dd --- /dev/null +++ b/ai-service/app/api/v1/settings.py @@ -0,0 +1,62 @@ +from fastapi import APIRouter +from pydantic import BaseModel +from app.services.settings_service import settings_service + +router = APIRouter() + +PROVIDERS = ["openrouter", "openai", "anthropic", "groq", "ollama"] + + +class SettingsPatch(BaseModel): + system_prompt: str | None = None + model: str | None = None + provider: str | None = None + temperature: float | None = None + max_tokens: int | None = None + empathy: int | None = None + humor: int | None = None + formality: int | None = None + + +class ApiKeysPatch(BaseModel): + openrouter_api_key: str | None = None + deepgram_api_key: str | None = None + cartesia_api_key: str | None = None + anthropic_api_key: str | None = None + groq_api_key: str | None = None + ollama_base_url: str | None = None + livekit_url: str | None = None + livekit_api_key: str | None = None + livekit_api_secret: str | None = None + + +@router.get("") +def get_settings(): + return settings_service.get_settings() + + +@router.put("") +def update_settings(patch: SettingsPatch): + data = {k: v for k, v in patch.model_dump().items() if v is not None} + return settings_service.update_settings(data) + + +@router.get("/providers") +def list_providers(): + """Return available provider names for the UI dropdown.""" + return {"providers": PROVIDERS} + + +@router.get("/keys") +def get_api_keys(): + keys = settings_service.get_api_keys() + # Return masked values — just signals whether the key is configured + return {k: ("set" if (v and str(v).strip()) else None) + for k, v in keys.items() if k != "id"} + + +@router.put("/keys") +def update_api_keys(patch: ApiKeysPatch): + data = {k: v for k, v in patch.model_dump().items() if v is not None} + settings_service.update_api_keys(data) + return {"status": "ok"} diff --git a/ai-service/app/core/config.py b/ai-service/app/core/config.py index 5eff0f1..51de459 100644 --- a/ai-service/app/core/config.py +++ b/ai-service/app/core/config.py @@ -28,6 +28,9 @@ class Settings(BaseSettings): LLM_API_KEY: str | None = None OPENAI_API_KEY: str | None = None OPENROUTER_API_KEY: str | None = None + ANTHROPIC_API_KEY: str | None = None + GROQ_API_KEY: str | None = None + OLLAMA_BASE_URL: str = "http://localhost:11434" OPENAI_MODEL: str = "gpt-3.5-turbo" # Supabase diff --git a/ai-service/app/main.py b/ai-service/app/main.py index 4f78536..61e93b4 100644 --- a/ai-service/app/main.py +++ b/ai-service/app/main.py @@ -1,6 +1,7 @@ from fastapi import FastAPI from fastapi.middleware.cors import CORSMiddleware from app.api.v1 import chat, health, memory, rag +from app.api.v1 import settings as settings_router from app.core.config import settings import logging @@ -31,6 +32,7 @@ app.include_router(chat.router, prefix="/api/v1/chat", tags=["Chat"]) app.include_router(memory.router, prefix="/api/v1/memory", tags=["Memory"]) app.include_router(rag.router, prefix="/api/v1/rag", tags=["RAG"]) +app.include_router(settings_router.router, prefix="/api/v1/settings", tags=["Settings"]) @app.get("/") def read_root(): diff --git a/ai-service/app/services/llm.py b/ai-service/app/services/llm.py index a1d9c3c..7546e88 100644 --- a/ai-service/app/services/llm.py +++ b/ai-service/app/services/llm.py @@ -1,80 +1,29 @@ -from openai import OpenAI -from app.core.config import settings +""" +LLMService — thin facade over the Provider Abstraction Layer. + +All routing logic lives in providers/registry.py. +This class exists so existing callers (brain nodes, etc.) don't need to change. +""" import logging -import re +from app.services.providers.registry import provider_registry logger = logging.getLogger(__name__) -class LLMService: - def __init__(self): - self.api_key = settings.OPENROUTER_API_KEY or settings.OPENAI_API_KEY - self.model = settings.OPENAI_MODEL or "openai/gpt-3.5-turbo" - self.client = None - - # Determine Base URL (OpenRouter vs OpenAI) - self.base_url = "https://openrouter.ai/api/v1" if settings.OPENROUTER_API_KEY else None - - if self.api_key: - self.client = OpenAI( - api_key=self.api_key, - base_url=self.base_url - ) - logger.info(f"LLM Service Initialized. Model: {self.model}, Base: {self.base_url or 'Default'}") - else: - logger.warning("API Key not set. LLMService will fail.") - def generate(self, messages: list) -> dict: - """ - Generates a response from the LLM based on the list of messages. - Expects messages to be formatted by Prompter. - """ - if not self.client: - return { - "text": "Error: API Key is missing. I cannot think without it!", - "emotion": "[dizzy]" - } - - try: - extra_headers = {} - if settings.OPENROUTER_API_KEY: - extra_headers = { - "HTTP-Referer": "http://localhost:5173", # Frontend URL - "X-Title": "Project AURA", - } +class LLMService: + def generate( + self, + messages: list, + model: str | None = None, + temperature: float | None = None, + max_tokens: int | None = None, + ) -> dict: + return provider_registry.generate( + messages, + model=model, + temperature=temperature, + max_tokens=max_tokens, + ) - response = self.client.chat.completions.create( - model=self.model, - messages=messages, - temperature=0.7, - max_tokens=250, - extra_headers=extra_headers - ) - - content = response.choices[0].message.content - - # Robust parsing for emotion using Regex - # Matches [emotion] at the start of the string - emotion_match = re.match(r'^\[(.*?)\]', content) - - emotion = "neutral" - text = content - - if emotion_match: - emotion = emotion_match.group(1) - # Remove the emotion tag from the text - text = content[emotion_match.end():].strip() - - return { - "text": text, - "emotion": emotion, - "raw": content - } - - except Exception as e: - logger.error(f"LLM Generation Error: {e}") - return { - "text": f"I... I lost my train of thought. ({str(e)})", - "emotion": "[confused]" - } llm_service = LLMService() diff --git a/ai-service/app/services/memory_service.py b/ai-service/app/services/memory_service.py index b401291..7fc761f 100644 --- a/ai-service/app/services/memory_service.py +++ b/ai-service/app/services/memory_service.py @@ -4,17 +4,28 @@ """ from __future__ import annotations from typing import List +import urllib.request from supabase import create_client from langchain_openai import OpenAIEmbeddings from app.core.config import settings from uuid import UUID + from app.models.database import (Conversation, CreateConversation, Message, CreateMesssage, Memory, CreateMemory) import logging logger = logging.getLogger(__name__) + +def _ollama_is_running(base_url: str) -> bool: + """Return True if an Ollama server is reachable at base_url.""" + try: + urllib.request.urlopen(f"{base_url}/api/tags", timeout=2) + return True + except Exception: + return False + class MemoryService: def __init__(self): self.client = None @@ -27,16 +38,33 @@ def __init__(self): else: logger.warning("Supabase credentials not set. Memory service disabled.") - # Initialize embeddings model via OpenRouter - api_key = settings.OPENROUTER_API_KEY - if api_key: + # Initialize embeddings — try providers in order of preference + if settings.OPENAI_API_KEY: + self.embeddings = OpenAIEmbeddings( + api_key=settings.OPENAI_API_KEY, + model="text-embedding-3-small", + ) + logger.info("Embeddings: using OpenAI directly.") + elif settings.OPENROUTER_API_KEY: self.embeddings = OpenAIEmbeddings( - api_key=api_key, + api_key=settings.OPENROUTER_API_KEY, model="openai/text-embedding-3-small", - base_url="https://openrouter.ai/api/v1" + base_url="https://openrouter.ai/api/v1", + ) + logger.info("Embeddings: using OpenRouter.") + elif _ollama_is_running(settings.OLLAMA_BASE_URL): + self.embeddings = OpenAIEmbeddings( + api_key="ollama", + model="nomic-embed-text", + base_url=f"{settings.OLLAMA_BASE_URL}/v1", ) + logger.info("Embeddings: using local Ollama (nomic-embed-text).") else: - logger.warning("OPENROUTER_API_KEY not set. Memory embedding disabled.") + logger.warning( + "No embedding provider available " + "(OPENAI_API_KEY / OPENROUTER_API_KEY not set; Ollama not reachable). " + "Memory store/search disabled." + ) async def create_conversation(self, title: str = "New Conversation") -> UUID | None: if not self.client: diff --git a/ai-service/app/services/prompter.py b/ai-service/app/services/prompter.py index f2cb4d0..57cdfc3 100644 --- a/ai-service/app/services/prompter.py +++ b/ai-service/app/services/prompter.py @@ -1,40 +1,31 @@ from datetime import datetime from app.services.persona import persona_engine +from app.services.settings_service import settings_service -class Prompter: - def __init__(self): - self.system_prompt = """You are AURA (Advanced Universal Responsive Avatar), the spirited AI steward of the ASE Lab. - - {persona} - - **Context:** - - Current Time: {current_time} - """ +class Prompter: def build(self, message: str, context: dict = None) -> list: - """ - Constructs the messages list for the LLM. - """ current_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S") - persona = persona_engine.get_persona() - - # Format system prompt - formatted_system = self.system_prompt.format( - current_time=current_time, - persona=persona + + # Custom system_prompt from admin panel overrides the hardcoded persona + db = settings_service.get_settings() + custom = (db.get("system_prompt") or "").strip() + persona = custom if custom else persona_engine.get_persona() + + formatted_system = ( + "You are AURA (Advanced Universal Responsive Avatar), " + "the spirited AI steward of the ASE Lab.\n\n" + f"{persona}\n\n" + f"**Context:**\n- Current Time: {current_time}" ) - - messages = [ - {"role": "system", "content": formatted_system} - ] - - # Add conversation history if available in context + + messages = [{"role": "system", "content": formatted_system}] + if context and "history" in context: messages.extend(context["history"]) - - # Add current user message + messages.append({"role": "user", "content": message}) - return messages + prompter = Prompter() diff --git a/ai-service/app/services/providers/__init__.py b/ai-service/app/services/providers/__init__.py new file mode 100644 index 0000000..292fc89 --- /dev/null +++ b/ai-service/app/services/providers/__init__.py @@ -0,0 +1,3 @@ +from app.services.providers.registry import provider_registry + +__all__ = ["provider_registry"] diff --git a/ai-service/app/services/providers/anthropic_provider.py b/ai-service/app/services/providers/anthropic_provider.py new file mode 100644 index 0000000..89685fe --- /dev/null +++ b/ai-service/app/services/providers/anthropic_provider.py @@ -0,0 +1,183 @@ +""" +Anthropic / Claude provider. + +Key differences from OpenAI-compatible providers: + +1. System message → separate `system` parameter (not in messages list). +2. Streaming: chunks are `content_block_delta` with type "text_delta" + (vs GPT's `choices[0].delta.content`). +3. Tool calls: come as `content_block_start` with type "tool_use" + (vs OpenAI's `message.tool_calls`). +4. Tool definitions: Anthropic uses a different schema than OpenAI. + We accept the OpenAI schema and translate it internally. + +Normalized output is always the same result dict as every other provider. +""" +from __future__ import annotations + +import json +import logging +from typing import AsyncGenerator + +from app.services.providers.base import LLMProvider, TextDelta, StreamDone, make_result, RetryableError, NonRetryableError + +logger = logging.getLogger(__name__) + + +def _split_system(messages: list[dict]) -> tuple[str, list[dict]]: + """Separate the system prompt from the rest of the message list.""" + system_parts = [] + rest = [] + for m in messages: + if m.get("role") == "system": + system_parts.append(m.get("content", "")) + else: + rest.append(m) + return "\n\n".join(system_parts), rest + + +def _openai_tools_to_anthropic(tools: list[dict]) -> list[dict]: + """ + Translate OpenAI tool schema to Anthropic's format. + + OpenAI: { "type": "function", "function": { "name", "description", "parameters" } } + Anthropic: { "name", "description", "input_schema" } + """ + result = [] + for t in tools: + fn = t.get("function", t) # handle both wrapped and unwrapped + result.append({ + "name": fn["name"], + "description": fn.get("description", ""), + "input_schema": fn.get("parameters", {"type": "object", "properties": {}}), + }) + return result + + +def _extract_tool_calls(content_blocks) -> list | None: + """Normalize Anthropic tool_use blocks to our common schema.""" + calls = [ + { + "id": block.id, + "name": block.name, + "arguments": json.dumps(block.input), + } + for block in content_blocks + if getattr(block, "type", None) == "tool_use" + ] + return calls or None + + +class AnthropicProvider(LLMProvider): + name = "anthropic" + + def __init__(self, api_key: str): + try: + import anthropic as _anthropic + self._anthropic = _anthropic + self._client = _anthropic.Anthropic(api_key=api_key) + self._async_client = _anthropic.AsyncAnthropic(api_key=api_key) + logger.info("[anthropic] provider ready") + except ImportError: + raise RuntimeError( + "The 'anthropic' package is required for the Anthropic provider. " + "Run: pip install anthropic" + ) + + # ── Blocking ────────────────────────────────────────────────────────────── + + def generate( + self, + messages: list[dict], + *, + model: str, + temperature: float, + max_tokens: int, + tools: list[dict] | None = None, + ) -> dict: + system, user_messages = _split_system(messages) + kwargs = dict( + model=model, + system=system, + messages=user_messages, + temperature=temperature, + max_tokens=max_tokens, + ) + if tools: + kwargs["tools"] = _openai_tools_to_anthropic(tools) + + _a = self._anthropic # local ref so except clauses can reference it + try: + response = self._client.messages.create(**kwargs) + + # Text from text blocks + raw = "".join( + block.text for block in response.content + if getattr(block, "type", None) == "text" + ) + tool_calls = _extract_tool_calls(response.content) + + if tool_calls and not raw: + raw = f"[tool_call: {tool_calls[0]['name']}]" + + return make_result(raw, self.name, model, tool_calls=tool_calls) + + except _a.RateLimitError as e: + raise RetryableError(str(e), status_code=429) + except (_a.APIConnectionError, _a.APITimeoutError) as e: + raise RetryableError(str(e)) + except _a.InternalServerError as e: + raise RetryableError(str(e), status_code=getattr(e, "status_code", 500)) + except _a.AuthenticationError as e: + raise NonRetryableError(str(e), status_code=401) + except _a.BadRequestError as e: + raise NonRetryableError(str(e), status_code=400) + except Exception as e: + raise RetryableError(str(e)) + + # ── Streaming ───────────────────────────────────────────────────────────── + + async def stream( + self, + messages: list[dict], + *, + model: str, + temperature: float, + max_tokens: int, + tools: list[dict] | None = None, + ) -> AsyncGenerator[TextDelta | StreamDone, None]: + system, user_messages = _split_system(messages) + assembled = "" + kwargs = dict( + model=model, + system=system, + messages=user_messages, + temperature=temperature, + max_tokens=max_tokens, + ) + if tools: + kwargs["tools"] = _openai_tools_to_anthropic(tools) + + try: + async with self._async_client.messages.stream(**kwargs) as stream: + async for event in stream: + if ( + event.type == "content_block_delta" + and hasattr(event, "delta") + and getattr(event.delta, "type", None) == "text_delta" + ): + chunk = event.delta.text or "" + if chunk: + assembled += chunk + yield TextDelta(text=chunk) + except Exception as e: + logger.error(f"[anthropic] stream error: {e}") + + result = make_result(assembled, self.name, model) + yield StreamDone( + text=result["text"], + emotion=result["emotion"], + raw=assembled, + provider=self.name, + model=model, + ) diff --git a/ai-service/app/services/providers/base.py b/ai-service/app/services/providers/base.py new file mode 100644 index 0000000..cfb6b84 --- /dev/null +++ b/ai-service/app/services/providers/base.py @@ -0,0 +1,147 @@ +""" +Provider Abstraction Layer — base types and interface. + +Every LLM provider normalizes its output into the same result dict +so the rest of the system never needs to know which model is running. + +Normalized result: + { text, emotion, raw, provider, model, tool_calls } + +Tool calls are always normalized to: + [{ "id": str, "name": str, "arguments": str (JSON) }] + — regardless of whether the provider used OpenAI function_call deltas + or Anthropic content_block tool_use blocks. + +Stream events (for future streaming endpoints): + TextDelta — incremental text chunk + StreamDone — final assembled result +""" +from __future__ import annotations + +import re +from abc import ABC, abstractmethod +from dataclasses import dataclass, field +from typing import AsyncGenerator + + +# ── Normalized event types ──────────────────────────────────────────────────── + +@dataclass +class TextDelta: + """A chunk of text from a streaming response.""" + text: str + + +@dataclass +class StreamDone: + """Final event — carries the fully assembled response.""" + text: str + emotion: str + raw: str + provider: str + model: str + tool_calls: list | None = None + + +# ── Error types ─────────────────────────────────────────────────────────────── + +class RetryableError(Exception): + """ + Rate limit (429), server error (5xx), or transient network issue. + The registry will retry with exponential backoff, then try the next provider. + """ + def __init__(self, msg: str, status_code: int | None = None): + super().__init__(msg) + self.status_code = status_code + + +class NonRetryableError(Exception): + """ + Auth failure (401) or bad request (400). + - 401: key is wrong for this provider → skip to next provider. + - 400: our message is malformed → no provider will fix it; abort immediately. + """ + def __init__(self, msg: str, status_code: int | None = None): + super().__init__(msg) + self.status_code = status_code + + +# ── Helpers ─────────────────────────────────────────────────────────────────── + +def parse_emotion(raw: str) -> tuple[str, str]: + """ + Extract the leading [emotion, tag] from a raw LLM response. + Returns (emotion_string, cleaned_text). + """ + stripped = raw.strip() + match = re.match(r'^\[(.*?)\]', stripped) + if match: + return match.group(1), stripped[match.end():].strip() + return "neutral", stripped + + +def make_result( + raw: str, + provider: str, + model: str, + tool_calls: list | None = None, +) -> dict: + """Build the normalized result dict that the rest of the system expects.""" + emotion, text = parse_emotion(raw) + return { + "text": text, + "emotion": emotion, + "raw": raw, + "provider": provider, + "model": model, + "tool_calls": tool_calls or None, + } + + +# ── Abstract base ───────────────────────────────────────────────────────────── + +class LLMProvider(ABC): + """ + All providers implement this interface. + `generate` is the blocking path used by the brain pipeline. + `stream` is the async-generator path for future streaming endpoints. + + Tool definitions follow the OpenAI schema: + [{ "type": "function", "function": { "name": ..., "description": ..., + "parameters": {...} } }] + Providers that use a different native schema (e.g. Anthropic) translate + internally — callers always pass the OpenAI format. + """ + + name: str = "base" + + @abstractmethod + def generate( + self, + messages: list[dict], + *, + model: str, + temperature: float, + max_tokens: int, + tools: list[dict] | None = None, + ) -> dict: + """ + Blocking generation. Returns the normalized result dict: + { text, emotion, raw, provider, model, tool_calls } + """ + + @abstractmethod + async def stream( + self, + messages: list[dict], + *, + model: str, + temperature: float, + max_tokens: int, + tools: list[dict] | None = None, + ) -> AsyncGenerator[TextDelta | StreamDone, None]: + """ + Streaming generation. + Yields TextDelta chunks, ends with one StreamDone. + """ + yield # type: ignore diff --git a/ai-service/app/services/providers/openai_compat.py b/ai-service/app/services/providers/openai_compat.py new file mode 100644 index 0000000..1def0a8 --- /dev/null +++ b/ai-service/app/services/providers/openai_compat.py @@ -0,0 +1,183 @@ +""" +OpenAI-compatible provider. + +Covers every backend that speaks the OpenAI chat-completions API: + • OpenRouter (base_url = https://openrouter.ai/api/v1) + • OpenAI (base_url = None → default) + • Groq (base_url = https://api.groq.com/openai/v1) + • Ollama (base_url = http://localhost:11434/v1) + +Tool call normalization: + OpenAI sends tool_calls on the response message. + Each tool call has: id, function.name, function.arguments (JSON string). + We surface these as [{ "id", "name", "arguments" }] in the result dict. +""" +from __future__ import annotations + +import logging +from typing import AsyncGenerator + +import openai as _openai_lib +from openai import OpenAI, AsyncOpenAI + +from app.services.providers.base import LLMProvider, TextDelta, StreamDone, make_result, RetryableError, NonRetryableError + +logger = logging.getLogger(__name__) + +_OPENROUTER_HEADERS = { + "HTTP-Referer": "http://localhost:5173", + "X-Title": "Project AURA", +} + + +def _extract_tool_calls(response_message) -> list | None: + """Normalize OpenAI tool_calls to our common schema.""" + raw_calls = getattr(response_message, "tool_calls", None) + if not raw_calls: + return None + return [ + { + "id": tc.id, + "name": tc.function.name, + "arguments": tc.function.arguments, # already a JSON string + } + for tc in raw_calls + ] + + +class OpenAICompatProvider(LLMProvider): + + def __init__( + self, + api_key: str, + base_url: str | None = None, + extra_headers: dict | None = None, + provider_name: str = "openai", + ): + self.name = provider_name + self._extra_headers = extra_headers or {} + self._client = OpenAI(api_key=api_key, base_url=base_url) + self._async_client = AsyncOpenAI(api_key=api_key, base_url=base_url) + logger.info(f"[{self.name}] provider ready (base_url={base_url or 'default'})") + + # ── Blocking ────────────────────────────────────────────────────────────── + + def generate( + self, + messages: list[dict], + *, + model: str, + temperature: float, + max_tokens: int, + tools: list[dict] | None = None, + ) -> dict: + kwargs = dict( + model=model, + messages=messages, + temperature=temperature, + max_tokens=max_tokens, + extra_headers=self._extra_headers, + ) + if tools: + kwargs["tools"] = tools + kwargs["tool_choice"] = "auto" + + try: + response = self._client.chat.completions.create(**kwargs) + msg = response.choices[0].message + raw = msg.content or "" + tool_calls = _extract_tool_calls(msg) + + # When the model only returns a tool call (no text), give a placeholder + # so make_result always has something to parse. + if tool_calls and not raw: + raw = f"[tool_call: {tool_calls[0]['name']}]" + + return make_result(raw, self.name, model, tool_calls=tool_calls) + + except _openai_lib.RateLimitError as e: + raise RetryableError(str(e), status_code=429) + except (_openai_lib.APIConnectionError, _openai_lib.APITimeoutError) as e: + raise RetryableError(str(e)) + except _openai_lib.InternalServerError as e: + raise RetryableError(str(e), status_code=getattr(e, "status_code", 500)) + except _openai_lib.AuthenticationError as e: + raise NonRetryableError(str(e), status_code=401) + except (_openai_lib.BadRequestError, _openai_lib.NotFoundError) as e: + raise NonRetryableError(str(e), status_code=getattr(e, "status_code", 400)) + except Exception as e: + # Unknown error — treat as retryable so the registry can decide + raise RetryableError(str(e)) + + # ── Streaming ───────────────────────────────────────────────────────────── + + async def stream( + self, + messages: list[dict], + *, + model: str, + temperature: float, + max_tokens: int, + tools: list[dict] | None = None, + ) -> AsyncGenerator[TextDelta | StreamDone, None]: + assembled = "" + kwargs = dict( + model=model, + messages=messages, + temperature=temperature, + max_tokens=max_tokens, + extra_headers=self._extra_headers, + ) + if tools: + kwargs["tools"] = tools + kwargs["tool_choice"] = "auto" + + try: + async with self._async_client.chat.completions.stream(**kwargs) as stream: + async for chunk in stream: + delta = chunk.choices[0].delta.content or "" + if delta: + assembled += delta + yield TextDelta(text=delta) + except Exception as e: + logger.error(f"[{self.name}] stream error: {e}") + + result = make_result(assembled, self.name, model) + yield StreamDone( + text=result["text"], + emotion=result["emotion"], + raw=assembled, + provider=self.name, + model=model, + ) + + +# ── Named constructors ──────────────────────────────────────────────────────── + +def openrouter_provider(api_key: str) -> OpenAICompatProvider: + return OpenAICompatProvider( + api_key=api_key, + base_url="https://openrouter.ai/api/v1", + extra_headers=_OPENROUTER_HEADERS, + provider_name="openrouter", + ) + + +def openai_provider(api_key: str) -> OpenAICompatProvider: + return OpenAICompatProvider(api_key=api_key, base_url=None, provider_name="openai") + + +def groq_provider(api_key: str) -> OpenAICompatProvider: + return OpenAICompatProvider( + api_key=api_key, + base_url="https://api.groq.com/openai/v1", + provider_name="groq", + ) + + +def ollama_provider(base_url: str = "http://localhost:11434") -> OpenAICompatProvider: + return OpenAICompatProvider( + api_key="ollama", + base_url=f"{base_url.rstrip('/')}/v1", + provider_name="ollama", + ) diff --git a/ai-service/app/services/providers/registry.py b/ai-service/app/services/providers/registry.py new file mode 100644 index 0000000..9a4cd9a --- /dev/null +++ b/ai-service/app/services/providers/registry.py @@ -0,0 +1,245 @@ +""" +Provider Registry — the single entry point for LLM calls. + +Responsibilities: + 1. Read active model / provider / temperature / max_tokens from settings_service + 2. Read the matching API key from settings_service (DB) or fall back to env vars + 3. Instantiate the right LLMProvider + 4. Call provider.generate() and return the normalized result + +Provider inference (when `provider` field is "auto" or missing): + model starts with "claude-" → anthropic + model contains "/" → openrouter (e.g. "deepseek/deepseek-v3.2") + model starts with gpt-/o1-/o3- → openai + model starts with llama/mistral… → ollama + explicit groq_ prefix → groq + fallback → openrouter +""" +from __future__ import annotations + +import logging +import os +import random +import time + +from app.services.providers.base import LLMProvider, RetryableError, NonRetryableError + +logger = logging.getLogger(__name__) + +_MAX_ATTEMPTS = 3 # attempts per provider before giving up on it +_BACKOFF_BASE = 1.0 # seconds; delay = base * 2^attempt + jitter + +# Ordered fallback chain — first provider with an available key wins +_FALLBACK_ORDER = ["openrouter", "openai", "groq", "ollama"] + +# ── Provider inference ──────────────────────────────────────────────────────── + +_OPENAI_PREFIXES = ("gpt-", "o1-", "o3-", "text-davinci", "babbage", "ada") +_OLLAMA_PREFIXES = ("llama", "mistral", "gemma", "phi", "qwen", "codellama", "deepseek-r1") + + +def infer_provider(model: str) -> str: + m = model.lower() + if m.startswith("claude-"): + return "anthropic" + if "/" in m: + return "openrouter" + if any(m.startswith(p) for p in _OPENAI_PREFIXES): + return "openai" + if any(m.startswith(p) for p in _OLLAMA_PREFIXES): + return "ollama" + return "openrouter" + + +# ── Registry ────────────────────────────────────────────────────────────────── + +class ProviderRegistry: + """ + Resolves and calls the correct LLM provider on every request. + Providers are constructed lazily and cached by (provider_name, key_hash). + """ + + def __init__(self): + self._cache: dict[str, LLMProvider] = {} + + # ── Public API ──────────────────────────────────────────────────────────── + + def generate( + self, + messages: list[dict], + *, + model: str | None = None, + temperature: float | None = None, + max_tokens: int | None = None, + tools: list[dict] | None = None, + ) -> dict: + # Lazy import avoids circular imports at module load time + from app.services.settings_service import settings_service + + db = settings_service.get_settings() + keys = settings_service.get_api_keys() + + actual_model = model or db.get("model") or "deepseek/deepseek-v3.2" + actual_temp = temperature if temperature is not None else float(db.get("temperature", 0.8)) + actual_max_tokens = max_tokens or int(db.get("max_tokens", 300)) + + configured_provider = (db.get("provider") or "auto").lower() + primary = ( + configured_provider + if configured_provider != "auto" + else infer_provider(actual_model) + ) + + # Build candidate list: primary first, then any fallback with an available key + candidates = [primary] + [ + p for p in _FALLBACK_ORDER + if p != primary and (p == "ollama" or self._pick_key(p, keys)) + ] + + call_kwargs = dict( + model=actual_model, + temperature=actual_temp, + max_tokens=actual_max_tokens, + tools=tools, + ) + + last_error: Exception | None = None + + for provider_name in candidates: + try: + provider = self._get_provider(provider_name, keys) + except (ValueError, RuntimeError) as e: + # Missing key or missing package — skip silently + logger.debug(f"[registry] skipping {provider_name}: {e}") + last_error = e + continue + + logger.info(f"[registry] trying {provider_name} / {actual_model}") + try: + result = self._call_with_retry(provider, messages, **call_kwargs) + if provider_name != primary: + logger.warning(f"[registry] fell back to {provider_name} (primary={primary} failed)") + return result + + except NonRetryableError as e: + last_error = e + if e.status_code == 400: + # Bad request — our message is wrong, no other provider will help + logger.error(f"[registry] bad request ({provider_name}): {e}") + break + # 401 auth failure — key is bad for this provider, try next + logger.warning(f"[registry] auth failed for {provider_name} (HTTP {e.status_code}), trying next") + continue + + except RetryableError as e: + # All retries for this provider exhausted — try next + logger.warning(f"[registry] {provider_name} exhausted retries: {e}") + last_error = e + continue + + logger.error(f"[registry] all providers failed. Last: {last_error}") + return { + "text": "I seem to be having trouble connecting right now. Please try again in a moment.", + "emotion": "confused", + "raw": "", + "provider": primary, + "model": actual_model, + "tool_calls": None, + } + + def _call_with_retry(self, provider: LLMProvider, messages: list[dict], **kwargs) -> dict: + """ + Call provider.generate() with exponential backoff on RetryableError. + Raises RetryableError if all attempts fail. + Raises NonRetryableError immediately (no retry). + """ + for attempt in range(_MAX_ATTEMPTS): + try: + return provider.generate(messages, **kwargs) + except NonRetryableError: + raise # propagate immediately + except RetryableError as e: + if attempt == _MAX_ATTEMPTS - 1: + raise # all attempts exhausted + delay = _BACKOFF_BASE * (2 ** attempt) + random.uniform(0.0, 0.5) + logger.warning( + f"[{provider.name}] attempt {attempt + 1}/{_MAX_ATTEMPTS} failed " + f"(status={e.status_code}): {e} — retrying in {delay:.1f}s" + ) + time.sleep(delay) + + # ── Provider instantiation ──────────────────────────────────────────────── + + def _get_provider(self, provider_name: str, keys: dict) -> LLMProvider: + # Cache key: provider name + first 8 chars of api key (detects key rotation) + raw_key = self._pick_key(provider_name, keys) + cache_key = f"{provider_name}:{(raw_key or '')[:8]}" + + if cache_key not in self._cache: + self._cache[cache_key] = self._build(provider_name, keys) + + return self._cache[cache_key] + + def _build(self, provider_name: str, keys: dict) -> LLMProvider: + from app.services.providers.openai_compat import ( + openrouter_provider, openai_provider, groq_provider, ollama_provider, + ) + from app.services.providers.anthropic_provider import AnthropicProvider + + if provider_name == "anthropic": + key = self._pick_key("anthropic", keys) + if not key: + raise ValueError("Anthropic API key not set. Add it via the dashboard or ANTHROPIC_API_KEY env var.") + return AnthropicProvider(api_key=key) + + if provider_name == "groq": + key = self._pick_key("groq", keys) + if not key: + raise ValueError("Groq API key not set. Add it via the dashboard or GROQ_API_KEY env var.") + return groq_provider(api_key=key) + + if provider_name == "openai": + key = self._pick_key("openai", keys) + if not key: + raise ValueError("OpenAI API key not set. Add it via the dashboard or OPENAI_API_KEY env var.") + return openai_provider(api_key=key) + + if provider_name == "ollama": + ollama_url = ( + (keys.get("ollama_base_url") or "").strip() + or os.getenv("OLLAMA_BASE_URL", "http://localhost:11434") + ) + return ollama_provider(base_url=ollama_url) + + # Default: openrouter + key = self._pick_key("openrouter", keys) + if not key: + raise ValueError("OpenRouter API key not set. Add it via the dashboard or OPENROUTER_API_KEY env var.") + return openrouter_provider(api_key=key) + + @staticmethod + def _pick_key(provider_name: str, keys: dict) -> str | None: + """DB key takes precedence over env var.""" + env_map = { + "openrouter": "OPENROUTER_API_KEY", + "openai": "OPENAI_API_KEY", + "anthropic": "ANTHROPIC_API_KEY", + "groq": "GROQ_API_KEY", + } + db_key_map = { + "openrouter": "openrouter_api_key", + "openai": "openrouter_api_key", # share the same field for now + "anthropic": "anthropic_api_key", + "groq": "groq_api_key", + } + + db_field = db_key_map.get(provider_name) + db_val = (keys.get(db_field) or "").strip() if db_field else "" + if db_val: + return db_val + + env_var = env_map.get(provider_name) + return os.getenv(env_var, "") if env_var else "" + + +provider_registry = ProviderRegistry() diff --git a/ai-service/app/services/settings_service.py b/ai-service/app/services/settings_service.py new file mode 100644 index 0000000..9f8dfd4 --- /dev/null +++ b/ai-service/app/services/settings_service.py @@ -0,0 +1,82 @@ +import logging +from supabase import create_client, Client +from app.core.config import settings as app_settings + +logger = logging.getLogger(__name__) + +_DEFAULTS = { + "system_prompt": None, + "model": "deepseek/deepseek-v3.2", + "provider": "openrouter", + "temperature": 0.8, + "max_tokens": 300, + "empathy": 50, + "humor": 50, + "formality": 50, +} + +_KEY_DEFAULTS = { + "openrouter_api_key": None, + "deepgram_api_key": None, + "cartesia_api_key": None, + "anthropic_api_key": None, + "groq_api_key": None, + "ollama_base_url": "http://localhost:11434", + "livekit_url": None, + "livekit_api_key": None, + "livekit_api_secret": None, +} + + +class SettingsService: + def __init__(self): + self._client: Client | None = None + if app_settings.SUPABASE_URL and app_settings.SUPABASE_SERVICE_KEY: + self._client = create_client(app_settings.SUPABASE_URL, app_settings.SUPABASE_SERVICE_KEY) + + def get_settings(self) -> dict: + if not self._client: + return dict(_DEFAULTS) + try: + result = self._client.table("personality_settings").select("*").eq("id", 1).single().execute() + if result.data: + return {**_DEFAULTS, **result.data} + except Exception as e: + logger.warning(f"SettingsService.get_settings failed: {e}") + return dict(_DEFAULTS) + + def update_settings(self, patch: dict) -> dict: + if not self._client: + return dict(_DEFAULTS) + try: + result = self._client.table("personality_settings").update(patch).eq("id", 1).execute() + if result.data: + return {**_DEFAULTS, **result.data[0]} + except Exception as e: + logger.error(f"SettingsService.update_settings failed: {e}") + return dict(_DEFAULTS) + + def get_api_keys(self) -> dict: + if not self._client: + return dict(_KEY_DEFAULTS) + try: + result = self._client.table("api_keys").select("*").eq("id", 1).single().execute() + if result.data: + return {**_KEY_DEFAULTS, **result.data} + except Exception as e: + logger.warning(f"SettingsService.get_api_keys failed: {e}") + return dict(_KEY_DEFAULTS) + + def update_api_keys(self, patch: dict) -> dict: + if not self._client: + return dict(_KEY_DEFAULTS) + try: + result = self._client.table("api_keys").update(patch).eq("id", 1).execute() + if result.data: + return {**_KEY_DEFAULTS, **result.data[0]} + except Exception as e: + logger.error(f"SettingsService.update_api_keys failed: {e}") + return dict(_KEY_DEFAULTS) + + +settings_service = SettingsService() diff --git a/ai-service/tests/__init__.py b/ai-service/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/ai-service/tests/conftest.py b/ai-service/tests/conftest.py new file mode 100644 index 0000000..8eb1151 --- /dev/null +++ b/ai-service/tests/conftest.py @@ -0,0 +1,71 @@ +""" +Shared pytest fixtures and env setup. +Loads the project .env so integration tests can use real API keys. +""" +import os +import sys +from pathlib import Path + +import pytest +from dotenv import load_dotenv + +# ── Add ai-service root to sys.path so `app.*` imports resolve ─────────────── +AI_SERVICE_DIR = Path(__file__).resolve().parent.parent +sys.path.insert(0, str(AI_SERVICE_DIR)) + +# ── Load .env from project root ─────────────────────────────────────────────── +PROJECT_ROOT = AI_SERVICE_DIR.parent +env_path = PROJECT_ROOT / ".env" +if not env_path.exists(): + env_path = AI_SERVICE_DIR / ".env" +load_dotenv(env_path) + + +# ── Reusable message lists ──────────────────────────────────────────────────── + +@pytest.fixture +def simple_messages(): + return [ + {"role": "system", "content": "You are a helpful assistant. Reply very briefly."}, + {"role": "user", "content": "Say exactly: [smile] Hello!"}, + ] + + +@pytest.fixture +def tool_messages(): + return [ + {"role": "system", "content": "You are a helpful assistant."}, + {"role": "user", "content": "What is the weather in Tokyo? Use the get_weather tool."}, + ] + + +@pytest.fixture +def sample_tools(): + return [ + { + "type": "function", + "function": { + "name": "get_weather", + "description": "Get current weather for a city.", + "parameters": { + "type": "object", + "properties": { + "city": {"type": "string", "description": "City name"}, + }, + "required": ["city"], + }, + }, + } + ] + + +# ── Key availability helpers (used by integration marks) ───────────────────── + +def has_openrouter_key(): + return bool(os.getenv("OPENROUTER_API_KEY", "").strip()) + +def has_openai_key(): + return bool(os.getenv("OPENAI_API_KEY", "").strip()) + +def has_anthropic_key(): + return bool(os.getenv("ANTHROPIC_API_KEY", "").strip()) diff --git a/ai-service/tests/providers/__init__.py b/ai-service/tests/providers/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/dashboard/public/models/hutao/Hu Tao.model3.json b/dashboard/public/models/hutao/Hu Tao.model3.json index cdbd216..890ff95 100644 --- a/dashboard/public/models/hutao/Hu Tao.model3.json +++ b/dashboard/public/models/hutao/Hu Tao.model3.json @@ -8,7 +8,17 @@ "Hu Tao.8192/texture_02.png" ], "Physics": "Hu Tao.physics3.json", - "DisplayInfo": "Hu Tao.cdi3.json" + "DisplayInfo": "Hu Tao.cdi3.json", + "Expressions": [ + { "Name": "SmileLock.exp3.json", "File": "SmileLock.exp3.json" }, + { "Name": "SadLock.exp3.json", "File": "SadLock.exp3.json" }, + { "Name": "Angry.exp3.json", "File": "Angry.exp3.json" }, + { "Name": "Ghost.exp3.json", "File": "Ghost.exp3.json" }, + { "Name": "GhostChange.exp3.json", "File": "GhostChange.exp3.json" }, + { "Name": "Shadow.exp3.json", "File": "Shadow.exp3.json" }, + { "Name": "PupilShrink.exp3.json", "File": "PupilShrink.exp3.json" }, + { "Name": "EyeshineOff.exp3.json", "File": "EyeshineOff.exp3.json" } + ] }, "Groups": [ { diff --git a/dashboard/src/components/ApiKeys.jsx b/dashboard/src/components/ApiKeys.jsx new file mode 100644 index 0000000..046c027 --- /dev/null +++ b/dashboard/src/components/ApiKeys.jsx @@ -0,0 +1,142 @@ +import { useState, useEffect } from 'react' +import { supabase } from '../lib/supabaseClient' + +const KEY_GROUPS = [ + { + label: 'LLM Providers', + icon: 'psychology', + fields: [ + { key: 'openrouter_api_key', label: 'OpenRouter API Key', placeholder: 'sk-or-v1-...', hint: 'Routes to DeepSeek, GPT, Mistral, and more' }, + { key: 'anthropic_api_key', label: 'Anthropic API Key', placeholder: 'sk-ant-...', hint: 'Required for claude-* models' }, + { key: 'groq_api_key', label: 'Groq API Key', placeholder: 'gsk_...', hint: 'Fast inference for Llama / Mixtral' }, + { key: 'ollama_base_url', label: 'Ollama Base URL', placeholder: 'http://localhost:11434', hint: 'Local LLMs via Ollama', isUrl: true }, + ], + }, + { + label: 'Voice', + icon: 'mic', + fields: [ + { key: 'deepgram_api_key', label: 'Deepgram API Key (STT)', placeholder: 'your_deepgram_key' }, + { key: 'cartesia_api_key', label: 'Cartesia API Key (TTS)', placeholder: 'your_cartesia_key', note: 'Requires agent restart' }, + ], + }, + { + label: 'LiveKit', + icon: 'cell_tower', + note: 'Requires agent restart', + fields: [ + { key: 'livekit_url', label: 'LiveKit URL', placeholder: 'wss://your-project.livekit.cloud' }, + { key: 'livekit_api_key', label: 'LiveKit API Key', placeholder: 'API key' }, + { key: 'livekit_api_secret', label: 'LiveKit API Secret', placeholder: 'API secret' }, + ], + }, +] + +export default function ApiKeys() { + const [draft, setDraft] = useState({}) + const [visible, setVisible] = useState({}) + const [saveState, setSaveState] = useState('idle') + const [loaded, setLoaded] = useState(false) + + useEffect(() => { + supabase.from('api_keys').select('*').eq('id', 1).single() + .then(({ data }) => { if (data) setDraft(data); setLoaded(true) }) + }, []) + + const patch = (key, value) => setDraft(d => ({ ...d, [key]: value })) + const toggleVisible = key => setVisible(v => ({ ...v, [key]: !v[key] })) + + const saveKeys = async () => { + setSaveState('saving') + try { + const payload = { ...draft } + delete payload.id + payload.updated_at = new Date().toISOString() + const { error } = await supabase.from('api_keys').update(payload).eq('id', 1) + if (error) throw error + setSaveState('saved') + setTimeout(() => setSaveState('idle'), 2500) + } catch (err) { + console.error('Failed to save API keys:', err) + setSaveState('error') + setTimeout(() => setSaveState('idle'), 3000) + } + } + + const btn = { + idle: { label: 'Save API Keys', icon: 'key', cls: 'bg-primary hover:bg-primary/90 shadow-primary/20' }, + saving: { label: 'Saving...', icon: 'hourglass_top', cls: 'bg-primary/70 cursor-not-allowed' }, + saved: { label: 'Keys Saved!', icon: 'check_circle', cls: 'bg-emerald-500 shadow-emerald-200' }, + error: { label: 'Save Failed', icon: 'error', cls: 'bg-red-500 shadow-red-200' }, + }[saveState] + + return ( +
+
+

+ vpn_key + API Keys +

+ +
+ +
+ {KEY_GROUPS.map(({ label, icon, note, fields }) => ( +
+
+ {icon} + {label} + {note && ( + + info{note} + + )} +
+
+ {fields.map(({ key, label: fl, placeholder, note: fn, hint, isUrl }) => ( +
+ + {hint &&

{hint}

} +
+ patch(key, e.target.value)} + placeholder={loaded ? placeholder : '••••••••'} + className="w-full bg-bg-light border border-slate-200 rounded-lg px-3 py-2 pr-10 text-sm font-mono focus:ring-1 focus:ring-primary focus:border-primary outline-none" + /> + {!isUrl && ( + + )} +
+ {fn && ( +

+ info{fn} +

+ )} +
+ ))} +
+
+ ))} +
+ +

+ lock + Stored in your private Supabase database. Leave a field empty to use the server's .env value. +

+
+ ) +} diff --git a/dashboard/src/components/AvatarRenderer.jsx b/dashboard/src/components/AvatarRenderer.jsx index 7707641..25a94aa 100644 --- a/dashboard/src/components/AvatarRenderer.jsx +++ b/dashboard/src/components/AvatarRenderer.jsx @@ -1,169 +1,297 @@ /** - * AvatarRenderer — Phase 2 - * Renders the Hu Tao Live2D model on a transparent canvas using - * pixi-live2d-display. Exposes an imperative ref API so CallOverlay - * can drive expressions in sync with AURA's speech. + * AvatarRenderer — Phase 3 + * Idle / Speaking state machine with richer moods and cute micro-animations: + * • 6 weighted moods per state (neutral, happy, curious, playful, sleepy, thinking) + * • Cute head-tilt event during idle + * • Occasional double-blink during idle + * • Sleepy: half-closed eyes, slow blink + * • Speaking: gentle nod, tighter saccade, snappier blink, slight smile boost * - * Usage: - * const avatarRef = useRef(null) - * - * avatarRef.current.setExpression(['smile', 'shadow'], 2.3) - * avatarRef.current.resetNeutral() + * Ref API: + * setExpression(names[], duration) — play expression(s) for N seconds + * setSpeaking(bool) — switch idle ↔ speaking state + * setMouthOpen(0–1) — drive lip sync each frame + * setParameter(id, value) — raw Core Model parameter override + * resetNeutral() — cancel active expression, return to idle */ import { forwardRef, useEffect, useImperativeHandle, useRef } from 'react' import * as PIXI from 'pixi.js' import { Live2DModel } from 'pixi-live2d-display/cubism4' -// Register PIXI Ticker so Live2D animations update every frame Live2DModel.registerTicker(PIXI.Ticker) -// Model path relative to dashboard/public/ const MODEL_URL = '/models/hutao/Hu Tao.model3.json' -// Expression tag → .exp3.json filename -// Source: voice-agent/model_parameters.json hotkeys + Hu_Tao__model_for_PC_/ directory const EXPRESSION_FILES = { - smile: 'SmileLock.exp3.json', - sad: 'SadLock.exp3.json', - angry: 'Angry.exp3.json', - ghost: 'Ghost.exp3.json', - ghost_nervous:'GhostChange.exp3.json', - shadow: 'Shadow.exp3.json', - pupil_shrink: 'PupilShrink.exp3.json', - eyeshine_off: 'EyeshineOff.exp3.json', + smile: 'SmileLock.exp3.json', + sad: 'SadLock.exp3.json', + angry: 'Angry.exp3.json', + ghost: 'Ghost.exp3.json', + ghost_nervous: 'GhostChange.exp3.json', + shadow: 'Shadow.exp3.json', + pupil_shrink: 'PupilShrink.exp3.json', + eyeshine_off: 'EyeshineOff.exp3.json', } -export const AvatarRenderer = forwardRef(function AvatarRenderer(props, ref) { - const { width = 400, height = 600 } = props - const containerRef = useRef(null) - const modelRef = useRef(null) - const appRef = useRef(null) - const mouthOpenRef = useRef(0) // driven by lip-sync from CallOverlay +// Maps LLM-annotated expression names → the closest ambient mood. +// Applied after the expression fades so the idle baseline stays emotionally coherent. +const EXPRESSION_TO_MOOD = { + smile: 'happy', + sad: 'neutral', // no sad mood — settle to calm neutral + angry: 'thinking', // furrowed brows, withdrawn + ghost: 'playful', // mischievous + ghost_nervous: 'curious', // uncertain, alert + shadow: 'thinking', // serious / dark + pupil_shrink: 'curious', // surprised / wide-eyed + eyeshine_off: 'sleepy', // dull / fatigued + wink: 'playful', + tongue: 'playful', +} - // ── Boot PIXI + load model ──────────────────────────────────────────────── - useEffect(() => { - let destroyed = false - - const app = new PIXI.Application({ - backgroundAlpha: 0, - width, - height, - antialias: true, - resolution: window.devicePixelRatio || 2, - autoDensity: true, - }) - appRef.current = app - containerRef.current.appendChild(app.view) - - Live2DModel.from(MODEL_URL, { autoInteract: false }).then((model) => { - if (destroyed) return // effect cleaned up before model finished loading - modelRef.current = model - app.stage.addChild(model) - - // Full-screen canvas: position her in the left-center third of the viewport. - // 1.9× height-fit zooms into upper body. Anchor is top-center so head - // sits at Y=0. X at 30% of the full viewport keeps her off the left edge - // and out of the way of the right-side controls overlay. - const logicalW = app.screen.width - const logicalH = app.screen.height +// ── State machine ────────────────────────────────────────────────────────── +const STATE = { IDLE: 'idle', SPEAKING: 'speaking' } + +// ── Mood definitions (target parameter values) ───────────────────────────── +const MOODS = { + neutral: { mouthForm: 0, browForm: 0, browRaise: 0, eyeSmile: 0 }, + happy: { mouthForm: 0.65, browForm: 0.30, browRaise: 0.45, eyeSmile: 0.55 }, + curious: { mouthForm: 0.20, browForm: -0.10, browRaise: 0.50, eyeSmile: 0 }, + playful: { mouthForm: 0.90, browForm: 0.50, browRaise: 0.70, eyeSmile: 0.30 }, + sleepy: { mouthForm: -0.05, browForm: 0.10, browRaise: -0.15, eyeSmile: 0 }, + thinking: { mouthForm: 0.10, browForm: -0.20, browRaise: 0.35, eyeSmile: 0 }, +} + +// Weighted mood pool per state — [moodKey, weight], weights sum to 1.0 +const MOOD_POOLS = { + [STATE.IDLE]: [ + ['neutral', 0.15], ['happy', 0.35], ['curious', 0.20], + ['playful', 0.10], ['sleepy', 0.10], ['thinking', 0.10], + ], + [STATE.SPEAKING]: [ + ['neutral', 0.10], ['happy', 0.45], ['curious', 0.20], + ['playful', 0.20], ['thinking', 0.05], + ], +} + +function pickWeightedMood(state) { + const pool = MOOD_POOLS[state] ?? MOOD_POOLS[STATE.IDLE] + const r = Math.random() + let acc = 0 + for (const [key, w] of pool) { + acc += w + if (r < acc) return MOODS[key] + } + return MOODS.neutral +} + +// ── Module-scoped Singleton State ────────────────────────────────────────── +let _app = null +let _model = null +let _loaded = false +let _mouthOpen = 0 +let _expressionActive = false +let _state = STATE.IDLE +let _pendingMood = null // set by setExpression, consumed by update loop on expiry + +function initSingleton(width, height) { + if (_app) return + + _app = new PIXI.Application({ + backgroundAlpha: 0, + width, + height, + antialias: true, + resolution: window.devicePixelRatio || 2, + autoDensity: true, + }) + + Live2DModel.from(MODEL_URL, { autoInteract: false }) + .then((model) => { + _model = model + _app.stage.addChild(model) + + const logicalW = _app.screen.width + const logicalH = _app.screen.height const autoScale = (logicalH / model.height) * 1.9 model.scale.set(autoScale) model.anchor.set(0.5, 0.0) model.position.set(logicalW * 0.5, 0) - // ── Idle animation ───────────────────────────────────────────────────── - // Patch coreModel.update() — the FINAL step before GPU commit. - // This runs AFTER the motion manager has set its keyframe values, so our - // params always overwrite whatever the motion manager tried to set. - // (Patching internalModel.update earlier didn't work because origUpdate - // runs the motion manager which overwrites our values before coreModel.update.) const core = model.internalModel.coreModel + const clamp = (v, lo, hi) => Math.max(lo, Math.min(hi, v)) let lastMs = performance.now() - const clamp = (v, lo, hi) => v < lo ? lo : v > hi ? hi : v - // ── Completely separate timers — blink and saccade never share state ── - let blinkTimer = 0, blinkPhase = 0, nextBlink = 2 + Math.random() * 4 + // ── Blink state ────────────────────────────────────────────────────── + let blinkTimer = 0, blinkPhase = 0, nextBlink = 2 + Math.random() * 3 + // Double-blink: blink twice in quick succession (cute quirk) + let dblBlinkPending = false + let dblBlinkTimer = 0, nextDblBlink = 10 + Math.random() * 10 + + // ── Saccade state ───────────────────────────────────────────────────── let saccadeTimer = 0, nextSaccade = 1 + Math.random() * 2 - // Eye movement: lerp slowly to target — eliminates all twitching let eyeTargetX = 0, eyeTargetY = 0, eyeX = 0, eyeY = 0 - // ── Mood: confirmed param IDs from Hu Tao.cdi3.json ────────────────── - // ParamMouthForm, ParamBrowLForm/RForm, Param37 (Brows Raise), - // ParamEyeLSmile/RSmile (eye squint) all exist in this model. + // ── Mood state ──────────────────────────────────────────────────────── let moodTimer = 0, nextMoodChange = 3 + Math.random() * 4 - let mouthFormT = 0, mouthFormC = 0 - let browFormT = 0, browFormC = 0 // L/R brow curve (happy=up, frown=down) - let browRaiseT = 0, browRaiseC = 0 // Param37: raise both brows - let eyeSmileT = 0, eyeSmileC = 0 // eye squint when smiling - - function pickMood() { - const roll = Math.random() - if (roll < 0.30) { // neutral - mouthFormT = 0; browFormT = 0; browRaiseT = 0; eyeSmileT = 0 - } else if (roll < 0.60) { // happy / cute smile - mouthFormT = 0.55 + Math.random() * 0.35 - browFormT = 0.35; browRaiseT = 0.4; eyeSmileT = 0.45 - } else if (roll < 0.80) { // thinking — look up - mouthFormT = -0.1; browFormT = 0.1; browRaiseT = 0.2; eyeSmileT = 0 - eyeTargetY = 0.45 + Math.random() * 0.3 // deliberate upward glance - nextSaccade = saccadeTimer + 2.8 // hold it - } else { // excited — big smile, raised brows - mouthFormT = 0.9; browFormT = 0.5; browRaiseT = 0.7; eyeSmileT = 0.25 - } - nextMoodChange = 3 + Math.random() * 5 - } + let currentMood = MOODS.happy + let mouthFormC = 0, browFormC = 0, browRaiseC = 0, eyeSmileC = 0 + + // ── Head tilt micro-animation (idle only) ───────────────────────────── + // Occasionally snaps to a cute side-tilt, holds briefly, then eases back + let tiltTimer = 0, nextTilt = 6 + Math.random() * 8 + let tiltTarget = 0, tiltC = 0 + let tiltHolding = false, tiltHoldTimer = 0, tiltHoldDuration = 0 + + // ── Speaking nod ────────────────────────────────────────────────────── + let nodPhase = 0 const origCoreUpdate = core.update.bind(core) + core.update = function () { - const now = performance.now() / 1000 + const now = performance.now() / 1000 const elapsed = Math.min((performance.now() - lastMs) / 1000, 0.1) lastMs = performance.now() - // ── Head — more amplitude so turns are clearly visible ───────────── - core.setParameterValueById('ParamAngleX', Math.sin(now * 0.31) * 12 + Math.sin(now * 0.73) * 3) - core.setParameterValueById('ParamAngleY', Math.sin(now * 0.19) * 5 + Math.sin(now * 0.47) * 2) - core.setParameterValueById('ParamAngleZ', Math.sin(now * 0.13) * 5 + Math.sin(now * 0.41) * 2) - core.setParameterValueById('ParamBodyAngleX', Math.sin(now * 0.28) * 4) - core.setParameterValueById('ParamBodyAngleZ', Math.sin(now * 0.21) * 3) - core.setParameterValueById('ParamBreath', Math.sin(now * 0.9) * 0.5 + 0.5) - core.setParameterValueById('ParamMouthOpenY', mouthOpenRef.current) - - // ── Mood tick — fast lerp so changes are clearly visible ─────────── - moodTimer += elapsed - if (moodTimer >= nextMoodChange) { moodTimer = 0; pickMood() } - const lm = elapsed * 4 // reach target in ~0.5s - mouthFormC += (mouthFormT - mouthFormC) * lm - browFormC += (browFormT - browFormC) * lm - browRaiseC += (browRaiseT - browRaiseC) * lm - eyeSmileC += (eyeSmileT - eyeSmileC) * lm - core.setParameterValueById('ParamMouthForm', mouthFormC) - core.setParameterValueById('ParamBrowLForm', browFormC) - core.setParameterValueById('ParamBrowRForm', browFormC) - core.setParameterValueById('Param37', browRaiseC) // Brows Raise - core.setParameterValueById('ParamEyeLSmile', eyeSmileC) - core.setParameterValueById('ParamEyeRSmile', eyeSmileC) - - // ── Eye saccades — own timer, slow lerp (no twitching) ──────────── + const speaking = _state === STATE.SPEAKING + const lerpSpd = speaking ? 5.0 : 3.5 + + // ── Breathing ──────────────────────────────────────────────────── + // Slightly faster when speaking (more energetic) + core.setParameterValueById('ParamBreath', + Math.sin(now * (speaking ? 1.1 : 0.75)) * 0.5 + 0.5) + + // ── Head movement ───────────────────────────────────────────────── + const swayAmt = speaking ? 0.35 : 1.0 + const bX = (Math.sin(now * 0.31) * 12 + Math.sin(now * 0.73) * 3) * swayAmt + const bY = (Math.sin(now * 0.19) * 5 + Math.sin(now * 0.47) * 2) * swayAmt + const bZ = (Math.sin(now * 0.13) * 5 + Math.sin(now * 0.41) * 2) * swayAmt + + // Gentle speaking nod — Y oscillation in rough speech rhythm + let nodY = 0 + if (speaking) { + nodPhase += elapsed * 2.6 + nodY = Math.sin(nodPhase) * 3.5 + } else { + nodPhase = 0 + } + + // Cute idle head tilt — snap in quickly, ease back slowly + if (!speaking) { + tiltTimer += elapsed + if (!tiltHolding && tiltTimer >= nextTilt) { + tiltTarget = (Math.random() < 0.5 ? 1 : -1) * (7 + Math.random() * 7) + tiltTimer = 0 + nextTilt = 6 + Math.random() * 8 + tiltHolding = true + tiltHoldTimer = 0 + tiltHoldDuration = 0.9 + Math.random() * 0.8 + } + } + if (tiltHolding) { + tiltHoldTimer += elapsed + if (tiltHoldTimer >= tiltHoldDuration) { tiltTarget = 0; tiltHolding = false } + } + tiltC += (tiltTarget - tiltC) * elapsed * (tiltTarget !== 0 ? 6.0 : 2.2) + + core.setParameterValueById('ParamAngleX', bX) + core.setParameterValueById('ParamAngleY', bY + nodY) + core.setParameterValueById('ParamAngleZ', bZ + tiltC) + core.setParameterValueById('ParamBodyAngleX', Math.sin(now * 0.28) * 4 * swayAmt) + core.setParameterValueById('ParamBodyAngleZ', Math.sin(now * 0.21) * 3 * swayAmt) + + // ── Lip sync ────────────────────────────────────────────────────── + core.setParameterValueById('ParamMouthOpenY', _mouthOpen) + + // ── Mood interpolation ──────────────────────────────────────────── + if (!_expressionActive) { + // Expression just expired — align ambient mood to the emotion the LLM set + if (_pendingMood) { + currentMood = _pendingMood + _pendingMood = null + moodTimer = 0 + nextMoodChange = 3 + Math.random() * 3 // hold this mood for 3-6s before drifting + } + + moodTimer += elapsed + if (moodTimer >= nextMoodChange) { + moodTimer = 0 + nextMoodChange = speaking + ? 2 + Math.random() * 2.5 + : 3 + Math.random() * 5 + currentMood = pickWeightedMood(_state) + + // Curious: look upward with a lingering gaze + if (currentMood === MOODS.curious) { + eyeTargetY = 0.45 + Math.random() * 0.30 + nextSaccade = saccadeTimer + 3 + } + // Thinking: look up-left (classic thinking glance) + if (currentMood === MOODS.thinking) { + eyeTargetX = -(0.4 + Math.random() * 0.3) + eyeTargetY = 0.4 + Math.random() * 0.3 + nextSaccade = saccadeTimer + 4 + } + } + + const lm = elapsed * lerpSpd + mouthFormC += (currentMood.mouthForm - mouthFormC) * lm + browFormC += (currentMood.browForm - browFormC) * lm + browRaiseC += (currentMood.browRaise - browRaiseC) * lm + eyeSmileC += (currentMood.eyeSmile - eyeSmileC) * lm + + // Speaking: add a slight smile boost (engaged / expressive look) + const mfBoost = speaking ? 0.20 : 0 + core.setParameterValueById('ParamMouthForm', clamp(mouthFormC + mfBoost, -1, 1)) + core.setParameterValueById('ParamBrowLForm', browFormC) + core.setParameterValueById('ParamBrowRForm', browFormC) + core.setParameterValueById('Param37', browRaiseC) + core.setParameterValueById('ParamEyeLSmile', eyeSmileC) + core.setParameterValueById('ParamEyeRSmile', eyeSmileC) + } + + // ── Saccade ─────────────────────────────────────────────────────── saccadeTimer += elapsed if (saccadeTimer >= nextSaccade) { - eyeTargetX = (Math.random() * 2 - 1) * 0.65 - const r = Math.random() - if (r < 0.20) eyeTargetY = 0.5 + Math.random() * 0.35 // look up - else if (r < 0.35) eyeTargetY = -0.3 - Math.random() * 0.25 // look down (shy) - else eyeTargetY = (Math.random() * 2 - 1) * 0.4 - nextSaccade = saccadeTimer + 1.5 + Math.random() * 2.5 + if (speaking) { + // Focus on "listener" — small central range, frequent updates + eyeTargetX = (Math.random() * 2 - 1) * 0.25 + eyeTargetY = (Math.random() * 2 - 1) * 0.15 + nextSaccade = saccadeTimer + 0.8 + Math.random() * 1.0 + } else { + eyeTargetX = (Math.random() * 2 - 1) * 0.65 + const r = Math.random() + if (r < 0.20) eyeTargetY = 0.5 + Math.random() * 0.35 + else if (r < 0.35) eyeTargetY = -0.3 - Math.random() * 0.25 + else eyeTargetY = (Math.random() * 2 - 1) * 0.4 + nextSaccade = saccadeTimer + 1.5 + Math.random() * 2.5 + } } - // lerp speed 3.5 — eyes drift naturally, never snap or twitch - eyeX += (eyeTargetX - eyeX) * elapsed * 3.5 - eyeY += (eyeTargetY - eyeY) * elapsed * 3.5 + const gzSpd = speaking ? 5.0 : 3.5 + eyeX += (eyeTargetX - eyeX) * elapsed * gzSpd + eyeY += (eyeTargetY - eyeY) * elapsed * gzSpd core.setParameterValueById('ParamEyeBallX', clamp(eyeX, -1, 1)) core.setParameterValueById('ParamEyeBallY', clamp(eyeY, -1, 1)) - // ── Blink — own timer, stays within 0–1 always ──────────────────── + // ── Double-blink scheduler (idle only) ──────────────────────────── + if (!speaking) { + dblBlinkTimer += elapsed + if (dblBlinkTimer >= nextDblBlink) { + dblBlinkPending = true + dblBlinkTimer = 0 + nextDblBlink = 10 + Math.random() * 12 + } + } + + // ── Blink ───────────────────────────────────────────────────────── + const isSleepy = currentMood === MOODS.sleepy + // Speaking: snappy blink (11). Sleepy: slow droopy blink (6). Normal: 9 + const bspd = speaking ? 11 : (isSleepy ? 6 : 9) blinkTimer += elapsed - const bspd = 9 - if (blinkPhase === 0 && blinkTimer >= nextBlink) { blinkPhase = 1; blinkTimer = 0 } + + if (blinkPhase === 0 && blinkTimer >= nextBlink) { + blinkPhase = 1; blinkTimer = 0 + } if (blinkPhase === 1) { const v = clamp(1 - blinkTimer * bspd, 0, 1) core.setParameterValueById('ParamEyeLOpen', v) @@ -173,87 +301,98 @@ export const AvatarRenderer = forwardRef(function AvatarRenderer(props, ref) { const v = clamp(blinkTimer * bspd, 0, 1) core.setParameterValueById('ParamEyeLOpen', v) core.setParameterValueById('ParamEyeROpen', v) - if (v >= 1) { blinkPhase = 0; blinkTimer = 0; nextBlink = 3 + Math.random() * 5 } + if (v >= 1) { + blinkPhase = 0; blinkTimer = 0 + if (dblBlinkPending) { + nextBlink = 0.06 + Math.random() * 0.08 // blink again almost immediately + dblBlinkPending = false + } else if (isSleepy) { + nextBlink = 1.5 + Math.random() * 2.0 // sleepy: blinks more often + } else if (speaking) { + nextBlink = 4.0 + Math.random() * 3.0 // speaking: eyes stay open longer + } else { + nextBlink = 3.0 + Math.random() * 5.0 // normal idle + } + } } else { - core.setParameterValueById('ParamEyeLOpen', 1) - core.setParameterValueById('ParamEyeROpen', 1) + // Resting open — sleepy mode: eyes only 72% open (heavy lidded) + if (!_expressionActive) { + const restOpen = isSleepy ? 0.72 : 1.0 + core.setParameterValueById('ParamEyeLOpen', restOpen) + core.setParameterValueById('ParamEyeROpen', restOpen) + } } origCoreUpdate() } - model._origCoreUpdate = origCoreUpdate - }).catch((err) => { - console.error('[AvatarRenderer] Failed to load Live2D model:', err) + _loaded = true }) + .catch((err) => console.error('[AvatarRenderer] Failed to load Live2D model:', err)) +} +export const AvatarRenderer = forwardRef(function AvatarRenderer(props, ref) { + const { width = 400, height = 600 } = props + const containerRef = useRef(null) + + useEffect(() => { + initSingleton(width, height) + const container = containerRef.current + if (container && _app) container.appendChild(_app.view) return () => { - destroyed = true - if (modelRef.current?._origCoreUpdate) - modelRef.current.internalModel.coreModel.update = modelRef.current._origCoreUpdate - appRef.current = null - modelRef.current = null - app.destroy(true) + if (container && _app && _app.view.parentNode === container) + container.removeChild(_app.view) } - }, []) // intentionally empty — only run once on mount + }, [width, height]) - // ── Imperative API ──────────────────────────────────────────────────────── useImperativeHandle(ref, () => ({ - /** - * Apply one or more expression tags for `duration` seconds, - * then auto-reset to the default idle expression. - * @param {string[]} names - e.g. ['smile', 'shadow'] - * @param {number} duration - seconds before auto-reset - */ setExpression(names, duration) { - const model = modelRef.current - if (!model) return + if (!_loaded || !_model) return + _expressionActive = true + // Queue the mood that best matches this expression — applied when it expires for (const name of names) { - const file = EXPRESSION_FILES[name] - if (file) { - model.expression(file) - } + const moodKey = EXPRESSION_TO_MOOD[name] + if (moodKey) { _pendingMood = MOODS[moodKey]; break } + } - // Parameter-based expressions (using actual Cubism 4 IDs from cdi3.json) + for (const name of names) { + const file = EXPRESSION_FILES[name] + if (file) _model.expression(file) if (name === 'wink') { - const c = model.internalModel.coreModel - c.setParameterValueById('ParamEyeLOpen', 0.0) + const c = _model.internalModel.coreModel + c.setParameterValueById('ParamEyeLOpen', 0.0) c.setParameterValueById('ParamBrowLForm', -1.0) - c.setParameterValueById('ParamMouthForm', 1.0) + c.setParameterValueById('ParamMouthForm', 1.0) } if (name === 'tongue') { - const c = model.internalModel.coreModel + const c = _model.internalModel.coreModel c.setParameterValueById('ParamMouthOpenY', 1.0) - c.setParameterValueById('ParamMouthForm', -1.0) + c.setParameterValueById('ParamMouthForm', -1.0) } } - - // Schedule auto-reset after the audio segment finishes setTimeout(() => { - modelRef.current?.expression() // no-arg = reset to default + _expressionActive = false + if (_model) _model.expression() }, duration * 1000) }, - /** - * Directly set a Live2D parameter by ID. - * Useful for lip-sync or head-tracking integrations. - */ + /** Switch between idle and speaking animation state */ + setSpeaking(active) { + _state = active ? STATE.SPEAKING : STATE.IDLE + }, + setParameter(name, value) { - modelRef.current?.internalModel.coreModel.setParameterValueById(name, value) + _model?.internalModel.coreModel.setParameterValueById(name, value) }, - /** Immediately reset to default idle expression. */ resetNeutral() { - modelRef.current?.expression() + _expressionActive = false + _model?.expression() }, - /** - * Drive mouth open from audio amplitude (0–1). - * Called each animation frame by CallOverlay's Web Audio analyser. - */ setMouthOpen(v) { - mouthOpenRef.current = Math.max(0, Math.min(1, v)) + _mouthOpen = Math.max(0, Math.min(1, v)) }, }), []) diff --git a/dashboard/src/components/AvatarRenderer.test.jsx b/dashboard/src/components/AvatarRenderer.test.jsx index a27f29a..fa3df66 100644 --- a/dashboard/src/components/AvatarRenderer.test.jsx +++ b/dashboard/src/components/AvatarRenderer.test.jsx @@ -1,7 +1,7 @@ /** - * Phase 2 tests — AvatarRenderer component + * AvatarRenderer tests — Phase 3 * All GPU / PIXI / Live2D dependencies are mocked so these run in jsdom - * without a real GPU or network. + * without a real WebGL context or network. * * Run: cd dashboard && npm test */ @@ -11,44 +11,57 @@ import { render, act } from '@testing-library/react' import { createRef } from 'react' import { AvatarRenderer } from './AvatarRenderer' -// ── Mock heavy GPU dependencies ──────────────────────────────────────────── +// ── Mocks ────────────────────────────────────────────────────────────────── const mockSetParameterValueById = vi.fn() const mockExpression = vi.fn() +const mockCoreUpdate = vi.fn() + const mockModel = { + height: 600, // needed for auto-scale calculation expression: mockExpression, - scale: { set: vi.fn() }, - anchor: { set: vi.fn() }, + scale: { set: vi.fn() }, + anchor: { set: vi.fn() }, position: { set: vi.fn() }, internalModel: { - coreModel: { setParameterValueById: mockSetParameterValueById }, + coreModel: { + setParameterValueById: mockSetParameterValueById, + update: mockCoreUpdate, // needed for core.update.bind() in initSingleton + }, }, } -const mockStage = { addChild: vi.fn() } -const mockRenderer = { width: 400, height: 600 } + +// A real canvas element so container.appendChild / removeChild work in jsdom +const mockCanvas = document.createElement('canvas') + const mockApp = { - stage: mockStage, - renderer: mockRenderer, + view: mockCanvas, + stage: { addChild: vi.fn() }, + screen: { width: 400, height: 600 }, // used for model positioning + renderer: { width: 400, height: 600 }, destroy: vi.fn(), } vi.mock('pixi.js', () => ({ Application: vi.fn(() => mockApp), + Ticker: {}, // passed to Live2DModel.registerTicker })) -vi.mock('pixi-live2d-display', () => ({ +// Must mock the cubism4 sub-path — that's what the component imports +vi.mock('pixi-live2d-display/cubism4', () => ({ Live2DModel: { + registerTicker: vi.fn(), from: vi.fn(() => Promise.resolve(mockModel)), }, })) // ── Helpers ──────────────────────────────────────────────────────────────── -/** Mount the component and wait for the async model load to complete. */ +/** Mount the component and wait for the async model load to settle. */ async function mountAndLoad(props = {}) { const ref = createRef() const result = render() - await act(async () => {}) // flush the Live2DModel.from() promise + await act(async () => {}) // flush Live2DModel.from() promise + React effects return { ref, ...result } } @@ -59,73 +72,43 @@ describe('AvatarRenderer', () => { vi.clearAllMocks() }) - // ── DOM ────────────────────────────────────────────────────────────────── + // ── Rendering ───────────────────────────────────────────────────────────── + + it('renders a container div', async () => { + const { container } = await mountAndLoad() + expect(container.firstChild).toBeTruthy() + }) - it('renders a canvas element', async () => { + it('renders a canvas element inside the container', async () => { const { container } = await mountAndLoad() expect(container.querySelector('canvas')).toBeTruthy() }) - it('canvas has correct width and height attributes', async () => { + it('wrapper div reflects width and height props', async () => { const { container } = await mountAndLoad({ width: 320, height: 480 }) - const canvas = container.querySelector('canvas') - expect(canvas.getAttribute('width')).toBe('320') - expect(canvas.getAttribute('height')).toBe('480') + const div = container.firstChild + expect(div.style.width).toBe('320px') + expect(div.style.height).toBe('480px') }) // ── Expression file mapping ─────────────────────────────────────────────── - it('setExpression maps smile → SmileLock.exp3.json', async () => { - const { ref } = await mountAndLoad() - ref.current.setExpression(['smile'], 2.0) - expect(mockExpression).toHaveBeenCalledWith('SmileLock.exp3.json') - }) - - it('setExpression maps sad → SadLock.exp3.json', async () => { - const { ref } = await mountAndLoad() - ref.current.setExpression(['sad'], 2.0) - expect(mockExpression).toHaveBeenCalledWith('SadLock.exp3.json') - }) - - it('setExpression maps angry → Angry.exp3.json', async () => { - const { ref } = await mountAndLoad() - ref.current.setExpression(['angry'], 1.5) - expect(mockExpression).toHaveBeenCalledWith('Angry.exp3.json') - }) - - it('setExpression maps ghost → Ghost.exp3.json', async () => { - const { ref } = await mountAndLoad() - ref.current.setExpression(['ghost'], 2.0) - expect(mockExpression).toHaveBeenCalledWith('Ghost.exp3.json') - }) - - it('setExpression maps ghost_nervous → GhostChange.exp3.json', async () => { - const { ref } = await mountAndLoad() - ref.current.setExpression(['ghost_nervous'], 2.0) - expect(mockExpression).toHaveBeenCalledWith('GhostChange.exp3.json') - }) - - it('setExpression maps shadow → Shadow.exp3.json', async () => { - const { ref } = await mountAndLoad() - ref.current.setExpression(['shadow'], 2.0) - expect(mockExpression).toHaveBeenCalledWith('Shadow.exp3.json') - }) - - it('setExpression maps eyeshine_off → EyeshineOff.exp3.json', async () => { - const { ref } = await mountAndLoad() - ref.current.setExpression(['eyeshine_off'], 1.5) - expect(mockExpression).toHaveBeenCalledWith('EyeshineOff.exp3.json') - }) - - it('setExpression maps pupil_shrink → PupilShrink.exp3.json', async () => { + it.each([ + ['smile', 'SmileLock.exp3.json' ], + ['sad', 'SadLock.exp3.json' ], + ['angry', 'Angry.exp3.json' ], + ['ghost', 'Ghost.exp3.json' ], + ['ghost_nervous', 'GhostChange.exp3.json'], + ['shadow', 'Shadow.exp3.json' ], + ['pupil_shrink', 'PupilShrink.exp3.json'], + ['eyeshine_off', 'EyeshineOff.exp3.json'], + ])('setExpression maps "%s" → %s', async (tag, file) => { const { ref } = await mountAndLoad() - ref.current.setExpression(['pupil_shrink'], 1.5) - expect(mockExpression).toHaveBeenCalledWith('PupilShrink.exp3.json') + ref.current.setExpression([tag], 2.0) + expect(mockExpression).toHaveBeenCalledWith(file) }) - // ── Multi-expression ────────────────────────────────────────────────────── - - it('setExpression applies all tags in the list', async () => { + it('setExpression applies all tags in the array', async () => { const { ref } = await mountAndLoad() ref.current.setExpression(['smile', 'shadow'], 2.0) expect(mockExpression).toHaveBeenCalledWith('SmileLock.exp3.json') @@ -134,35 +117,34 @@ describe('AvatarRenderer', () => { // ── Parameter-based expressions ─────────────────────────────────────────── - it('setExpression wink sets EyeOpenLeft=0, BrowLeftY=0, MouthSmile=1', async () => { + it('wink sets correct Cubism4 Core Model parameters', async () => { const { ref } = await mountAndLoad() ref.current.setExpression(['wink'], 1.5) - expect(mockSetParameterValueById).toHaveBeenCalledWith('EyeOpenLeft', 0.0) - expect(mockSetParameterValueById).toHaveBeenCalledWith('BrowLeftY', 0.0) - expect(mockSetParameterValueById).toHaveBeenCalledWith('MouthSmile', 1.0) + expect(mockSetParameterValueById).toHaveBeenCalledWith('ParamEyeLOpen', 0.0) + expect(mockSetParameterValueById).toHaveBeenCalledWith('ParamBrowLForm', -1.0) + expect(mockSetParameterValueById).toHaveBeenCalledWith('ParamMouthForm', 1.0) }) - it('setExpression tongue sets MouthOpen=1, TongueOut=1, MouthSmile=0', async () => { + it('tongue sets correct Cubism4 Core Model parameters', async () => { const { ref } = await mountAndLoad() ref.current.setExpression(['tongue'], 1.5) - expect(mockSetParameterValueById).toHaveBeenCalledWith('MouthOpen', 1.0) - expect(mockSetParameterValueById).toHaveBeenCalledWith('TongueOut', 1.0) - expect(mockSetParameterValueById).toHaveBeenCalledWith('MouthSmile', 0.0) + expect(mockSetParameterValueById).toHaveBeenCalledWith('ParamMouthOpenY', 1.0) + expect(mockSetParameterValueById).toHaveBeenCalledWith('ParamMouthForm', -1.0) }) // ── Auto-reset ──────────────────────────────────────────────────────────── - it('setExpression schedules auto-reset after duration ms', async () => { + it('setExpression resets to neutral after the given duration', async () => { vi.useFakeTimers() const { ref } = await mountAndLoad() ref.current.setExpression(['smile'], 2.0) mockExpression.mockClear() vi.advanceTimersByTime(2000) - expect(mockExpression).toHaveBeenCalledWith() // no-arg = reset to default + expect(mockExpression).toHaveBeenCalledWith() // no-arg call = reset to default vi.useRealTimers() }) - it('auto-reset fires after the correct delay', async () => { + it('auto-reset does not fire before the duration elapses', async () => { vi.useFakeTimers() const { ref } = await mountAndLoad() ref.current.setExpression(['angry'], 1.5) @@ -174,6 +156,30 @@ describe('AvatarRenderer', () => { vi.useRealTimers() }) + // ── Mood memory ─────────────────────────────────────────────────────────── + // The full mood-rendering loop requires a live PIXI ticker (unavailable in jsdom). + // These tests verify the pending-mood pipeline is wired without crashing. + + it('setExpression with a mood-mapped name does not throw', async () => { + const { ref } = await mountAndLoad() + // Each of these has an EXPRESSION_TO_MOOD entry and should queue a pending mood + for (const tag of ['smile', 'sad', 'angry', 'ghost', 'ghost_nervous', + 'shadow', 'pupil_shrink', 'eyeshine_off']) { + expect(() => ref.current.setExpression([tag], 1.0)).not.toThrow() + } + }) + + it('mood memory: expression → expiry → state transition completes cleanly', async () => { + vi.useFakeTimers() + const { ref } = await mountAndLoad() + ref.current.setExpression(['smile'], 2.0) // queues _pendingMood = MOODS.happy + vi.advanceTimersByTime(2000) // triggers auto-reset; _pendingMood consumed on next frame + // After expiry the avatar should accept further API calls without errors + expect(() => ref.current.setSpeaking(false)).not.toThrow() + expect(() => ref.current.resetNeutral()).not.toThrow() + vi.useRealTimers() + }) + // ── resetNeutral ────────────────────────────────────────────────────────── it('resetNeutral calls model.expression() with no arguments', async () => { @@ -182,19 +188,56 @@ describe('AvatarRenderer', () => { expect(mockExpression).toHaveBeenCalledWith() }) - // ── setParameter ───────────────────────────────────────────────────────── + // ── setParameter ────────────────────────────────────────────────────────── - it('setParameter forwards name and value to coreModel', async () => { + it('setParameter forwards the id and value to coreModel', async () => { const { ref } = await mountAndLoad() ref.current.setParameter('ParamMouthOpenY', 0.8) expect(mockSetParameterValueById).toHaveBeenCalledWith('ParamMouthOpenY', 0.8) }) + // ── setSpeaking ─────────────────────────────────────────────────────────── + + it('setSpeaking(true) switches to speaking state without throwing', async () => { + const { ref } = await mountAndLoad() + expect(() => ref.current.setSpeaking(true)).not.toThrow() + }) + + it('setSpeaking(false) switches to idle state without throwing', async () => { + const { ref } = await mountAndLoad() + expect(() => ref.current.setSpeaking(false)).not.toThrow() + }) + + it('setSpeaking can toggle states repeatedly without side effects', async () => { + const { ref } = await mountAndLoad() + ref.current.setSpeaking(true) + ref.current.setSpeaking(false) + ref.current.setSpeaking(true) + // State changes should not trigger expressions + expect(mockExpression).not.toHaveBeenCalled() + }) + + // ── setMouthOpen ────────────────────────────────────────────────────────── + + it('setMouthOpen accepts values within [0, 1]', async () => { + const { ref } = await mountAndLoad() + expect(() => ref.current.setMouthOpen(0)).not.toThrow() + expect(() => ref.current.setMouthOpen(0.5)).not.toThrow() + expect(() => ref.current.setMouthOpen(1)).not.toThrow() + }) + + it('setMouthOpen silently clamps out-of-range values', async () => { + const { ref } = await mountAndLoad() + expect(() => ref.current.setMouthOpen(-1.0)).not.toThrow() + expect(() => ref.current.setMouthOpen(2.5)).not.toThrow() + }) + // ── Guard rails ─────────────────────────────────────────────────────────── - it('unknown expression name is silently ignored (no throw)', async () => { + it('unknown expression tag is silently ignored', async () => { const { ref } = await mountAndLoad() expect(() => ref.current.setExpression(['nonexistent_tag'], 1.0)).not.toThrow() + expect(mockExpression).not.toHaveBeenCalled() }) it('empty expression list does not throw', async () => { diff --git a/dashboard/src/components/CallOverlay.jsx b/dashboard/src/components/CallOverlay.jsx index f170211..0ab7b3e 100644 --- a/dashboard/src/components/CallOverlay.jsx +++ b/dashboard/src/components/CallOverlay.jsx @@ -19,9 +19,10 @@ export default function CallOverlay({ onClose }) { const roomRef = useRef(null) const timerRef = useRef(null) const avatarRef = useRef(null) - const audioCtxRef = useRef(null) - const analyserRef = useRef(null) - const lipRafRef = useRef(null) + const audioCtxRef = useRef(null) + const analyserRef = useRef(null) + const lipRafRef = useRef(null) + const speakTimeoutRef = useRef(null) // ─── Connect to LiveKit ────────────────────── useEffect(() => { @@ -71,9 +72,23 @@ export default function CallOverlay({ onClose }) { let sum = 0 for (let i = 0; i < buf.length; i++) sum += buf[i] * buf[i] const rms = Math.sqrt(sum / buf.length) - avatarRef.current?.setMouthOpen( - rms > 0.008 ? Math.min(0.55, rms * 10) : 0 - ) + const active = rms > 0.008 + avatarRef.current?.setMouthOpen(active ? Math.min(0.55, rms * 10) : 0) + + // Transition to speaking state immediately on audio; + // debounce the return to idle so brief pauses don't flicker. + if (active) { + if (speakTimeoutRef.current) { + clearTimeout(speakTimeoutRef.current) + speakTimeoutRef.current = null + } + avatarRef.current?.setSpeaking(true) + } else if (!speakTimeoutRef.current) { + speakTimeoutRef.current = setTimeout(() => { + avatarRef.current?.setSpeaking(false) + speakTimeoutRef.current = null + }, 600) + } } tick() } @@ -118,6 +133,7 @@ export default function CallOverlay({ onClose }) { const cleanup = useCallback(() => { if (timerRef.current) clearInterval(timerRef.current) if (lipRafRef.current) cancelAnimationFrame(lipRafRef.current) + if (speakTimeoutRef.current) { clearTimeout(speakTimeoutRef.current); speakTimeoutRef.current = null } if (audioCtxRef.current) { audioCtxRef.current.close(); audioCtxRef.current = null } if (roomRef.current) { roomRef.current.disconnect() diff --git a/dashboard/src/components/PersonalityTuner.jsx b/dashboard/src/components/PersonalityTuner.jsx index 3acfdd9..1f51eee 100644 --- a/dashboard/src/components/PersonalityTuner.jsx +++ b/dashboard/src/components/PersonalityTuner.jsx @@ -1,12 +1,32 @@ const SLIDERS = [ - { key: 'empathy', label: 'Empathy' }, - { key: 'humor', label: 'Humor' }, + { key: 'empathy', label: 'Empathy' }, + { key: 'humor', label: 'Humor' }, { key: 'formality', label: 'Formality' }, ] -export default function PersonalityTuner({ settings, onUpdate }) { +const PROVIDERS = [ + { value: 'openrouter', label: 'OpenRouter', hint: 'Routes to any model (DeepSeek, GPT, Mistral…)' }, + { value: 'openai', label: 'OpenAI', hint: 'Direct GPT-4o / o1 access' }, + { value: 'anthropic', label: 'Anthropic', hint: 'Claude 3.5 / Claude 4' }, + { value: 'groq', label: 'Groq', hint: 'Ultra-fast Llama / Mixtral inference' }, + { value: 'ollama', label: 'Ollama (local)', hint: 'Local models via Ollama' }, +] + +const MODEL_SUGGESTIONS = { + openrouter: ['deepseek/deepseek-v3.2', 'openai/gpt-4o', 'anthropic/claude-sonnet-4-5', 'mistralai/mistral-nemo'], + openai: ['gpt-4o', 'gpt-4o-mini', 'o1-mini'], + anthropic: ['claude-opus-4-5', 'claude-sonnet-4-5', 'claude-haiku-4-5-20251001'], + groq: ['llama-3.3-70b-versatile', 'llama-3.1-8b-instant', 'mixtral-8x7b-32768'], + ollama: ['llama3.2', 'mistral', 'gemma2', 'qwen2.5'], +} + +export default function PersonalityTuner({ settings, onChange }) { if (!settings) return + const provider = settings.provider || 'openrouter' + const suggestions = MODEL_SUGGESTIONS[provider] || [] + const providerInfo = PROVIDERS.find(p => p.value === provider) + return (

@@ -14,32 +34,105 @@ export default function PersonalityTuner({ settings, onUpdate }) { Personality Tuner

-
+ {/* Provider picker */} +
+ +
+ {PROVIDERS.map(p => ( + + ))} +
+ {providerInfo && ( +

{providerInfo.hint}

+ )} +
+ + {/* Model input */} +
+ + onChange({ model: e.target.value })} + placeholder="e.g. deepseek/deepseek-v3.2" + list="model-suggestions" + className="w-full bg-bg-light border border-slate-200 rounded-lg px-3 py-2 text-sm font-mono focus:ring-1 focus:ring-primary focus:border-primary outline-none" + /> + + {suggestions.map(m => + {suggestions.length > 0 && ( +
+ {suggestions.map(m => ( + + ))} +
+ )} +
+ + {/* Temperature + Max Tokens */} +
+
+ + onChange({ temperature: parseFloat(e.target.value) })} + className="w-full accent-primary" + /> +
+
+ + onChange({ max_tokens: parseInt(e.target.value) })} + className="w-full accent-primary" + /> +
+
+ + {/* Personality sliders */} +
{SLIDERS.map(({ key, label }) => ( -
+
{settings[key]}%
- onUpdate({ [key]: parseInt(e.target.value) })} - className="w-full h-2 bg-slate-100 rounded-lg appearance-none cursor-pointer slider-thumb accent-primary" + onChange={e => onChange({ [key]: parseInt(e.target.value) })} + className="w-full h-2 bg-slate-100 rounded-lg appearance-none cursor-pointer accent-primary" />
))}
-
+ {/* System prompt */} +