diff --git a/.env.example b/.env.example index 57f76b7..35bbb00 100644 --- a/.env.example +++ b/.env.example @@ -2,6 +2,11 @@ # OpenRouter API Key - Get yours at https://openrouter.ai/keys OPENROUTER_API_KEY=your_openrouter_key_here +# Optional: Direct provider keys (used when provider != openrouter) +ANTHROPIC_API_KEY= # Required for claude-* models +GROQ_API_KEY= # Required for Groq provider (fast Llama/Mixtral) +OLLAMA_BASE_URL=http://localhost:11434 # Local Ollama endpoint + # --- VOICE CONFIGURATION (LIVEKIT AGENTS) --- # Deepgram API Key (STT) - Get yours at https://console.deepgram.com/ DEEPGRAM_API_KEY=your_deepgram_key_here diff --git a/ai-service/app/api/v1/chat.py b/ai-service/app/api/v1/chat.py index 725912f..d42f2ce 100644 --- a/ai-service/app/api/v1/chat.py +++ b/ai-service/app/api/v1/chat.py @@ -1,7 +1,8 @@ -import re +import json import logging - +import asyncio from fastapi import APIRouter, HTTPException +from fastapi.responses import StreamingResponse from app.services.memory_service import memory_service from app.models.chat import ChatRequest, ChatResponse from app.services.brain.graph import brain @@ -10,7 +11,7 @@ router = APIRouter() logger = logging.getLogger(__name__) -@router.post("", response_model=ChatResponse) +@router.post("") async def chat(request: ChatRequest): # Run Graph try: @@ -24,16 +25,98 @@ async def chat(request: ChatRequest): "messages": [HumanMessage(content=request.message)], "emotion": "neutral", "conversation_id": conversation_id, + "identity": request.identity or "anonymous", + "stream": request.stream } config = {"configurable": {"thread_id": conversation_id}} - result = brain.invoke(initial_state, config=config) + + if request.stream: + async def event_generator(): + # 1. Start with emotion detection (sequential but fast) + try: + from app.services.brain.nodes.emotion import detect_emotion + emotion_res = await detect_emotion(initial_state) + detected_emotion = emotion_res.get("emotion", "neutral") + yield f"data: {json.dumps({'emotion': detected_emotion})}\n\n" + except Exception as ex: + logger.warning(f"Emotion detection failed: {ex}") + detected_emotion = "neutral" + + # 2. Setup the full context for generation + from app.services.brain.nodes.generate import session_history_window + from app.services.llm import llm_service + from app.services.persona import persona_engine + from app.services.settings_service import settings_service + from datetime import datetime + from uuid import UUID + + # Fetch context + user_msg = request.message + history_model, memories, facts = await asyncio.gather( + memory_service.get_history(UUID(conversation_id), session_history_window), + memory_service.search(query=user_msg, limit=3), + memory_service.get_long_term_memories(identity=request.identity or "anonymous", limit=5), + ) + + # Build Persona + db_settings = settings_service.get_settings() + custom_sys = (db_settings.get("system_prompt") or "").strip() + persona = custom_sys if custom_sys else persona_engine.get_persona() + time_str = datetime.now().strftime("%Y-%m-%d %H:%M:%S") + + system_content = ( + "You are AURA (Advanced Universal Responsive Avatar), steward of the ASE Lab.\n\n" + f"{persona}\n\n" + "IMPORTANT: Do NOT include bracketed emotions like [happy] or [sad] in your response content. " + "I have already detected your emotion separately.\n\n" + f"**Context:**\n- Current Time: {time_str}" + ) + if facts: system_content += f"\nWhat I know about you:\n{facts}\n" + if memories: + memory_block = "\n".join(f"- {m}" for m in memories) + system_content += f"\nRelevant past snippets:\n{memory_block}\n" + + messages_format = [{"role":"system", "content":system_content}] + history_model + [{"role":"user", "content":user_msg}] + + import re + full_text = "" + # 3. Stream from the registry directly + from app.services.providers.base import TextDelta + async for chunk in llm_service.stream(messages_format): + # Only yield incremental deltas to the dashboard + if isinstance(chunk, TextDelta): + txt = chunk.text + full_text += txt + yield f"data: {json.dumps({'text': txt})}\n\n" + # StreamDone is handled silently for background persistence below + + # 4. Final sync/persistence - SCRUBBED + scrubbed_final = re.sub(r'\[.*?\]', '', full_text).strip() + asyncio.create_task(memory_service.add_interaction( + conversation_id=UUID(conversation_id), + user_text=user_msg, + assistant_text=scrubbed_final, + user_emotion=detected_emotion, + assistant_emotion="neutral" + )) + asyncio.create_task(memory_service.store( + text=f"User: {user_msg} \n AURA: {scrubbed_final}", + metadata={"conversation_id": str(conversation_id)} + )) + + yield "data: [DONE]\n\n" + + return StreamingResponse(event_generator(), media_type="text/event-stream") + + # Non-streaming fallback + result = await brain.ainvoke(initial_state, config=config) # Extract response last_msg = result["messages"][-1].content emotion = result.get("emotion", "neutral") - # Look for tool calls in the last turn + # Look for tool calls tools_used = [] for msg in result["messages"]: if hasattr(msg, "tool_calls") and msg.tool_calls: @@ -43,22 +126,21 @@ async def chat(request: ChatRequest): "args": tc.get("args", {}) }) - # Clean tags - text = last_msg - if text.startswith("["): - match = re.match(r'^\[(.*?)\]', text) - if match: - text = text[match.end():].strip() - return ChatResponse( - text=text, + text=last_msg, emotion=emotion, conversation_id=conversation_id, tools_used=tools_used if tools_used else None ) except Exception as e: - logger.error(f"Chat error: {e}") + logger.error(f"Chat error: {e}", exc_info=True) + # If it was a stream request, we should yield an error event + if request.stream: + return StreamingResponse( + iter([f"data: {json.dumps({'text': f'Brain Freeze: {str(e)}', 'emotion': 'confused'})}\n\n"]), + media_type="text/event-stream" + ) return ChatResponse( text=f"Brain Freeze: {str(e)}", diff --git a/ai-service/app/api/v1/settings.py b/ai-service/app/api/v1/settings.py index 264c47e..77941dd 100644 --- a/ai-service/app/api/v1/settings.py +++ b/ai-service/app/api/v1/settings.py @@ -4,23 +4,29 @@ router = APIRouter() +PROVIDERS = ["openrouter", "openai", "anthropic", "groq", "ollama"] + class SettingsPatch(BaseModel): system_prompt: str | None = None - model: str | None = None - temperature: float | None = None - max_tokens: int | None = None - empathy: int | None = None - humor: int | None = None - formality: int | None = None + model: str | None = None + provider: str | None = None + temperature: float | None = None + max_tokens: int | None = None + empathy: int | None = None + humor: int | None = None + formality: int | None = None class ApiKeysPatch(BaseModel): openrouter_api_key: str | None = None - deepgram_api_key: str | None = None - cartesia_api_key: str | None = None - livekit_url: str | None = None - livekit_api_key: str | None = None + deepgram_api_key: str | None = None + cartesia_api_key: str | None = None + anthropic_api_key: str | None = None + groq_api_key: str | None = None + ollama_base_url: str | None = None + livekit_url: str | None = None + livekit_api_key: str | None = None livekit_api_secret: str | None = None @@ -35,11 +41,18 @@ def update_settings(patch: SettingsPatch): return settings_service.update_settings(data) +@router.get("/providers") +def list_providers(): + """Return available provider names for the UI dropdown.""" + return {"providers": PROVIDERS} + + @router.get("/keys") def get_api_keys(): keys = settings_service.get_api_keys() - # Mask values in response — only reveal whether each key is set - return {k: ("••••••••" if v else None) for k, v in keys.items() if k != "id"} + # Return masked values — just signals whether the key is configured + return {k: ("set" if (v and str(v).strip()) else None) + for k, v in keys.items() if k != "id"} @router.put("/keys") diff --git a/ai-service/app/core/config.py b/ai-service/app/core/config.py index 5eff0f1..51de459 100644 --- a/ai-service/app/core/config.py +++ b/ai-service/app/core/config.py @@ -28,6 +28,9 @@ class Settings(BaseSettings): LLM_API_KEY: str | None = None OPENAI_API_KEY: str | None = None OPENROUTER_API_KEY: str | None = None + ANTHROPIC_API_KEY: str | None = None + GROQ_API_KEY: str | None = None + OLLAMA_BASE_URL: str = "http://localhost:11434" OPENAI_MODEL: str = "gpt-3.5-turbo" # Supabase diff --git a/ai-service/app/models/chat.py b/ai-service/app/models/chat.py index 6ffcc57..c55136a 100644 --- a/ai-service/app/models/chat.py +++ b/ai-service/app/models/chat.py @@ -4,6 +4,8 @@ class ChatRequest(BaseModel): message: str conversation_id: Optional[str] = None + identity: Optional[str] = None + stream: bool = False class ChatResponse(BaseModel): text: str diff --git a/ai-service/app/services/brain/nodes/emotion.py b/ai-service/app/services/brain/nodes/emotion.py index 66cd899..ec37427 100644 --- a/ai-service/app/services/brain/nodes/emotion.py +++ b/ai-service/app/services/brain/nodes/emotion.py @@ -2,7 +2,7 @@ from app.services.llm import llm_service # Node to detect emotion -def detect_emotion(state: BrainState) -> dict: +async def detect_emotion(state: BrainState) -> dict: # Get last user message last_message = state["messages"][-1].content @@ -13,7 +13,7 @@ def detect_emotion(state: BrainState) -> dict: """ # Call LLM to detect emotion - emotion = llm_service.generate([{"role": "system", "content": prompt}]) + response = await llm_service.generate([{"role": "system", "content": prompt}]) # Return detected emotion - return {"emotion": emotion["emotion"].strip().lower()} \ No newline at end of file + return {"emotion": response.get("emotion", "neutral").strip().lower()} \ No newline at end of file diff --git a/ai-service/app/services/brain/nodes/generate.py b/ai-service/app/services/brain/nodes/generate.py index 0c1cfad..8c0207a 100644 --- a/ai-service/app/services/brain/nodes/generate.py +++ b/ai-service/app/services/brain/nodes/generate.py @@ -11,10 +11,9 @@ session_history_window = 9999 -def generate_response(state: BrainState) -> dict: - with concurrent.futures.ThreadPoolExecutor() as pool: - future = pool.submit(asyncio.run, generate(state)) - return future.result() +async def generate_response(state: BrainState) -> dict: + """Async wrapper for the generation node.""" + return await generate(state) # Node to generate response based on persona, conversation history and detected emotion (convesation history not being tested yet) @@ -45,46 +44,90 @@ async def generate(state: BrainState) -> dict: else: user_message = "" - # Load History - history_model, memories = await asyncio.gather( + # Load History & Long-term memories + history_model, memories, facts = await asyncio.gather( memory_service.get_history(conversation_id, session_history_window), memory_service.search(query=user_message, limit=3), + memory_service.get_long_term_memories(identity=state.get("identity", "anonymous"), limit=5), ) history = history_model - # System Prompt - system_message = prompter.build("", context=None)[0] + # Save User message IMMEDIATELY to DB so it persists even if AI fails or disconnects + await memory_service.add_interaction( + conversation_id=conversation_id, + user_text=user_message, + assistant_text=None, # Update later + user_emotion=detected_emotion, + assistant_emotion=None + ) - if memories: - memory_block = "\n".join(f"-{message}" for message in memories) - system_message = { - "role" : "system", - "content": (system_message["content"] + f"Ingatan sebelumnya: \n {memory_block}") - } + # System Prompt (Pulling from DB via settings_service) + from app.services.settings_service import settings_service + db_settings = settings_service.get_settings() + custom_sys = (db_settings.get("system_prompt") or "").strip() + from app.services.persona import persona_engine + persona = custom_sys if custom_sys else persona_engine.get_persona() - # Add system prompt with persona and current time + from datetime import datetime + time_str = datetime.now().strftime("%Y-%m-%d %H:%M:%S") + + system_content = ( + "You are AURA (Advanced Universal Responsive Avatar), " + "the spirited AI steward of the ASE Lab.\n\n" + f"{persona}\n\n" + f"**Context:**\n- Current Time: {time_str}" + ) + + # Combine RAG (memories) and LTS (facts) + combined_memory = "" + if facts: + combined_memory += f"\nWhat I know about you:\n{facts}\n" + if memories: + memory_block = "\n".join(f"- {message}" for message in memories) + combined_memory += f"\nRelevant past snippets:\n{memory_block}\n" + + if combined_memory: + system_content += f"\n\n**Memory Retrieval:**{combined_memory}" + + system_message = {"role": "system", "content": system_content} + + # Build payload messages_format = [system_message] + history + current_message + # Check for stream request + is_stream = state.get("stream", False) + + if is_stream: + # For streaming, we yield chunks. + # But this is a node, so we return the final state but can use callbacks? + # Actually, chat.py will call brain.astream(). + # We handle the stream here if we want to return the stream object, + # but LangGraph nodes should return the update. + # So we update chat.py to use a different strategy. + pass + # Generate response from LLM - response = llm_service.generate(messages_format) + response = await llm_service.generate(messages_format) + text = response.get("text", "") emotion = response.get("emotion", "neutral") await asyncio.gather( + # Complete the interaction in DB memory_service.add_interaction( conversation_id=conversation_id, user_text=user_message, - assistant_text=response["text"], + assistant_text=text, user_emotion=detected_emotion, assistant_emotion=emotion ), memory_service.store( - text=f"User: {user_message} \n AURA: {response['text']}", + text=f"User: {user_message} \n AURA: {text}", metadata={"conversation_id": str(conversation_id)}, ), ) # Return response - return {"messages": [AIMessage(content=response["text"])], "emotion": response["emotion"]} \ No newline at end of file + return {"messages": [AIMessage(content=text)], "emotion": emotion} \ No newline at end of file diff --git a/ai-service/app/services/brain/state.py b/ai-service/app/services/brain/state.py index c4047a7..bab52f5 100644 --- a/ai-service/app/services/brain/state.py +++ b/ai-service/app/services/brain/state.py @@ -7,4 +7,5 @@ class BrainState(TypedDict): messages: Annotated[List[BaseMessage], operator.add] emotion: str - conversation_id: str \ No newline at end of file + conversation_id: str + identity: str \ No newline at end of file diff --git a/ai-service/app/services/llm.py b/ai-service/app/services/llm.py index 7b3cad1..0a1629c 100644 --- a/ai-service/app/services/llm.py +++ b/ai-service/app/services/llm.py @@ -1,74 +1,33 @@ -from openai import OpenAI -from app.core.config import settings +""" +LLMService — thin facade over the Provider Abstraction Layer. + +All routing logic lives in providers/registry.py. +This class exists so existing callers (brain nodes, etc.) don't need to change. +""" import logging -import re +import asyncio +from app.services.providers.registry import provider_registry logger = logging.getLogger(__name__) class LLMService: - def __init__(self): - self._env_key = settings.OPENROUTER_API_KEY or settings.OPENAI_API_KEY - self.base_url = "https://openrouter.ai/api/v1" if settings.OPENROUTER_API_KEY else None - self.client = None - - if self._env_key: - self.client = OpenAI(api_key=self._env_key, base_url=self.base_url) - logger.info(f"LLM Service Initialized. Base: {self.base_url or 'Default'}") - else: - logger.warning("API Key not set. LLMService will fail.") - - def _get_client(self): - """Return a client using the DB key if set, falling back to the env key.""" - from app.services.settings_service import settings_service - db_key = settings_service.get_api_keys().get("openrouter_api_key") - if db_key and db_key.strip(): - return OpenAI(api_key=db_key, base_url="https://openrouter.ai/api/v1") - return self.client - - def generate(self, messages: list, model: str = None, temperature: float = None, max_tokens: int = None) -> dict: - client = self._get_client() - if not client: - return {"text": "Error: API Key is missing.", "emotion": "[dizzy]"} - - # Import here to avoid circular imports at module load time - from app.services.settings_service import settings_service - db = settings_service.get_settings() - - actual_model = model or db.get("model") or "deepseek/deepseek-v3.2" - actual_temp = temperature if temperature is not None else db.get("temperature", 0.8) - actual_max_tokens = max_tokens or db.get("max_tokens") or 300 - - try: - extra_headers = {} - if settings.OPENROUTER_API_KEY: - extra_headers = { - "HTTP-Referer": "http://localhost:5173", - "X-Title": "Project AURA", - } - - response = client.chat.completions.create( - model=actual_model, - messages=messages, - temperature=actual_temp, - max_tokens=actual_max_tokens, - extra_headers=extra_headers, - ) - - content = response.choices[0].message.content - emotion_match = re.match(r'^\[(.*?)\]', content) - emotion = "neutral" - text = content - - if emotion_match: - emotion = emotion_match.group(1) - text = content[emotion_match.end():].strip() - - return {"text": text, "emotion": emotion, "raw": content} - - except Exception as e: - logger.error(f"LLM Generation Error: {e}") - return {"text": f"I lost my train of thought. ({str(e)})", "emotion": "[confused]"} + async def generate( + self, + messages: list, + model: str | None = None, + temperature: float | None = None, + max_tokens: int | None = None, + ) -> dict: + return await provider_registry.generate( + messages, + model=model, + temperature=temperature, + max_tokens=max_tokens, + ) + + def stream(self, *args, **kwargs): + return provider_registry.stream(*args, **kwargs) llm_service = LLMService() diff --git a/ai-service/app/services/memory_service.py b/ai-service/app/services/memory_service.py index b401291..dbb4acd 100644 --- a/ai-service/app/services/memory_service.py +++ b/ai-service/app/services/memory_service.py @@ -4,17 +4,28 @@ """ from __future__ import annotations from typing import List +import urllib.request from supabase import create_client from langchain_openai import OpenAIEmbeddings from app.core.config import settings from uuid import UUID + from app.models.database import (Conversation, CreateConversation, Message, CreateMesssage, Memory, CreateMemory) import logging logger = logging.getLogger(__name__) + +def _ollama_is_running(base_url: str) -> bool: + """Return True if an Ollama server is reachable at base_url.""" + try: + urllib.request.urlopen(f"{base_url}/api/tags", timeout=2) + return True + except Exception: + return False + class MemoryService: def __init__(self): self.client = None @@ -27,16 +38,36 @@ def __init__(self): else: logger.warning("Supabase credentials not set. Memory service disabled.") - # Initialize embeddings model via OpenRouter - api_key = settings.OPENROUTER_API_KEY - if api_key: + # Initialize embeddings — try providers in order of preference + if settings.OPENAI_API_KEY: self.embeddings = OpenAIEmbeddings( - api_key=api_key, + api_key=settings.OPENAI_API_KEY, + model="text-embedding-3-small", + ) + logger.info("RAG: Using OpenAI Directly for semantic embeddings (best-in-class mapping).") + print("INFO: Memory Service using OpenAI Embeddings for search mapping.") + elif settings.OPENROUTER_API_KEY: + self.embeddings = OpenAIEmbeddings( + api_key=settings.OPENROUTER_API_KEY, model="openai/text-embedding-3-small", - base_url="https://openrouter.ai/api/v1" + base_url="https://openrouter.ai/api/v1", ) + logger.info("RAG: Using OpenRouter for semantic embeddings.") + print("INFO: Memory Service using OpenRouter Embeddings.") + elif _ollama_is_running(settings.OLLAMA_BASE_URL): + self.embeddings = OpenAIEmbeddings( + api_key="ollama", + model="nomic-embed-text", + base_url=f"{settings.OLLAMA_BASE_URL}/v1", + ) + logger.info("RAG: Using local Ollama for semantic embeddings.") + print("INFO: Memory Service using local Ollama Embeddings.") else: - logger.warning("OPENROUTER_API_KEY not set. Memory embedding disabled.") + logger.warning( + "No embedding provider available " + "(OPENAI_API_KEY / OPENROUTER_API_KEY not set; Ollama not reachable). " + "Memory store/search disabled." + ) async def create_conversation(self, title: str = "New Conversation") -> UUID | None: if not self.client: @@ -76,26 +107,30 @@ async def get_conversation(self, conversation_id: UUID) -> Conversation | None: logger.error(f"Memory Service Get Conversation Error: {error}") return None - async def add_interaction(self, conversation_id: UUID, user_text: str, assistant_text: str, user_emotion: str = "neutral", assistant_emotion: str = "neutral") -> None: + async def add_interaction(self, conversation_id: UUID, user_text: str, assistant_text: str | None, user_emotion: str = "neutral", assistant_emotion: str = "neutral") -> None: if not self.client: return None try: - self.client.table("messages").insert([ - CreateMesssage( + msgs = [] + if user_text: + msgs.append(CreateMesssage( conversation_id=conversation_id, role="user", content=user_text, emotion=user_emotion, - ).model_dump(mode="json"), + ).model_dump(mode="json")) - CreateMesssage( + if assistant_text: + msgs.append(CreateMesssage( conversation_id=conversation_id, role="aura", content=assistant_text, emotion=assistant_emotion - ).model_dump(mode="json") - ]).execute() + ).model_dump(mode="json")) + + if msgs: + self.client.table("messages").insert(msgs).execute() self.client.table("conversations") \ .update({"updated_at": "now()"}) \ @@ -202,4 +237,31 @@ async def search(self, query: str, limit: int = 3) -> list[str]: return [] + async def get_long_term_memories(self, identity: str, limit: int = 10) -> str: + """Retrieve the last N non-embedded 'user_facts' memories for this identity.""" + if not self.client: + return "" + + try: + result = self.client.table("memories") \ + .select("content, created_at") \ + .eq("metadata->>type", "user_facts") \ + .eq("metadata->>identity", identity) \ + .order("created_at", desc=True) \ + .limit(limit) \ + .execute() + + rows = result.data or [] + if not rows: + return "" + + # Reverse to get chronological order in the prompt + facts_list = [row["content"] for row in reversed(rows)] + return "\n---\n".join(facts_list) + + except Exception as e: + logger.error(f"Memory Service Get Long Term Memories error: {e}") + return "" + + memory_service = MemoryService() \ No newline at end of file diff --git a/ai-service/app/services/prompter.py b/ai-service/app/services/prompter.py index e541fec..57cdfc3 100644 --- a/ai-service/app/services/prompter.py +++ b/ai-service/app/services/prompter.py @@ -7,13 +7,14 @@ class Prompter: def build(self, message: str, context: dict = None) -> list: current_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S") - # Pull live settings — custom system_prompt overrides the hardcoded persona - db_settings = settings_service.get_settings() - custom_prompt = (db_settings.get("system_prompt") or "").strip() - persona = custom_prompt if custom_prompt else persona_engine.get_persona() + # Custom system_prompt from admin panel overrides the hardcoded persona + db = settings_service.get_settings() + custom = (db.get("system_prompt") or "").strip() + persona = custom if custom else persona_engine.get_persona() formatted_system = ( - f"You are AURA (Advanced Universal Responsive Avatar), the spirited AI steward of the ASE Lab.\n\n" + "You are AURA (Advanced Universal Responsive Avatar), " + "the spirited AI steward of the ASE Lab.\n\n" f"{persona}\n\n" f"**Context:**\n- Current Time: {current_time}" ) diff --git a/ai-service/app/services/providers/__init__.py b/ai-service/app/services/providers/__init__.py new file mode 100644 index 0000000..292fc89 --- /dev/null +++ b/ai-service/app/services/providers/__init__.py @@ -0,0 +1,3 @@ +from app.services.providers.registry import provider_registry + +__all__ = ["provider_registry"] diff --git a/ai-service/app/services/providers/anthropic_provider.py b/ai-service/app/services/providers/anthropic_provider.py new file mode 100644 index 0000000..89685fe --- /dev/null +++ b/ai-service/app/services/providers/anthropic_provider.py @@ -0,0 +1,183 @@ +""" +Anthropic / Claude provider. + +Key differences from OpenAI-compatible providers: + +1. System message → separate `system` parameter (not in messages list). +2. Streaming: chunks are `content_block_delta` with type "text_delta" + (vs GPT's `choices[0].delta.content`). +3. Tool calls: come as `content_block_start` with type "tool_use" + (vs OpenAI's `message.tool_calls`). +4. Tool definitions: Anthropic uses a different schema than OpenAI. + We accept the OpenAI schema and translate it internally. + +Normalized output is always the same result dict as every other provider. +""" +from __future__ import annotations + +import json +import logging +from typing import AsyncGenerator + +from app.services.providers.base import LLMProvider, TextDelta, StreamDone, make_result, RetryableError, NonRetryableError + +logger = logging.getLogger(__name__) + + +def _split_system(messages: list[dict]) -> tuple[str, list[dict]]: + """Separate the system prompt from the rest of the message list.""" + system_parts = [] + rest = [] + for m in messages: + if m.get("role") == "system": + system_parts.append(m.get("content", "")) + else: + rest.append(m) + return "\n\n".join(system_parts), rest + + +def _openai_tools_to_anthropic(tools: list[dict]) -> list[dict]: + """ + Translate OpenAI tool schema to Anthropic's format. + + OpenAI: { "type": "function", "function": { "name", "description", "parameters" } } + Anthropic: { "name", "description", "input_schema" } + """ + result = [] + for t in tools: + fn = t.get("function", t) # handle both wrapped and unwrapped + result.append({ + "name": fn["name"], + "description": fn.get("description", ""), + "input_schema": fn.get("parameters", {"type": "object", "properties": {}}), + }) + return result + + +def _extract_tool_calls(content_blocks) -> list | None: + """Normalize Anthropic tool_use blocks to our common schema.""" + calls = [ + { + "id": block.id, + "name": block.name, + "arguments": json.dumps(block.input), + } + for block in content_blocks + if getattr(block, "type", None) == "tool_use" + ] + return calls or None + + +class AnthropicProvider(LLMProvider): + name = "anthropic" + + def __init__(self, api_key: str): + try: + import anthropic as _anthropic + self._anthropic = _anthropic + self._client = _anthropic.Anthropic(api_key=api_key) + self._async_client = _anthropic.AsyncAnthropic(api_key=api_key) + logger.info("[anthropic] provider ready") + except ImportError: + raise RuntimeError( + "The 'anthropic' package is required for the Anthropic provider. " + "Run: pip install anthropic" + ) + + # ── Blocking ────────────────────────────────────────────────────────────── + + def generate( + self, + messages: list[dict], + *, + model: str, + temperature: float, + max_tokens: int, + tools: list[dict] | None = None, + ) -> dict: + system, user_messages = _split_system(messages) + kwargs = dict( + model=model, + system=system, + messages=user_messages, + temperature=temperature, + max_tokens=max_tokens, + ) + if tools: + kwargs["tools"] = _openai_tools_to_anthropic(tools) + + _a = self._anthropic # local ref so except clauses can reference it + try: + response = self._client.messages.create(**kwargs) + + # Text from text blocks + raw = "".join( + block.text for block in response.content + if getattr(block, "type", None) == "text" + ) + tool_calls = _extract_tool_calls(response.content) + + if tool_calls and not raw: + raw = f"[tool_call: {tool_calls[0]['name']}]" + + return make_result(raw, self.name, model, tool_calls=tool_calls) + + except _a.RateLimitError as e: + raise RetryableError(str(e), status_code=429) + except (_a.APIConnectionError, _a.APITimeoutError) as e: + raise RetryableError(str(e)) + except _a.InternalServerError as e: + raise RetryableError(str(e), status_code=getattr(e, "status_code", 500)) + except _a.AuthenticationError as e: + raise NonRetryableError(str(e), status_code=401) + except _a.BadRequestError as e: + raise NonRetryableError(str(e), status_code=400) + except Exception as e: + raise RetryableError(str(e)) + + # ── Streaming ───────────────────────────────────────────────────────────── + + async def stream( + self, + messages: list[dict], + *, + model: str, + temperature: float, + max_tokens: int, + tools: list[dict] | None = None, + ) -> AsyncGenerator[TextDelta | StreamDone, None]: + system, user_messages = _split_system(messages) + assembled = "" + kwargs = dict( + model=model, + system=system, + messages=user_messages, + temperature=temperature, + max_tokens=max_tokens, + ) + if tools: + kwargs["tools"] = _openai_tools_to_anthropic(tools) + + try: + async with self._async_client.messages.stream(**kwargs) as stream: + async for event in stream: + if ( + event.type == "content_block_delta" + and hasattr(event, "delta") + and getattr(event.delta, "type", None) == "text_delta" + ): + chunk = event.delta.text or "" + if chunk: + assembled += chunk + yield TextDelta(text=chunk) + except Exception as e: + logger.error(f"[anthropic] stream error: {e}") + + result = make_result(assembled, self.name, model) + yield StreamDone( + text=result["text"], + emotion=result["emotion"], + raw=assembled, + provider=self.name, + model=model, + ) diff --git a/ai-service/app/services/providers/base.py b/ai-service/app/services/providers/base.py new file mode 100644 index 0000000..cfb6b84 --- /dev/null +++ b/ai-service/app/services/providers/base.py @@ -0,0 +1,147 @@ +""" +Provider Abstraction Layer — base types and interface. + +Every LLM provider normalizes its output into the same result dict +so the rest of the system never needs to know which model is running. + +Normalized result: + { text, emotion, raw, provider, model, tool_calls } + +Tool calls are always normalized to: + [{ "id": str, "name": str, "arguments": str (JSON) }] + — regardless of whether the provider used OpenAI function_call deltas + or Anthropic content_block tool_use blocks. + +Stream events (for future streaming endpoints): + TextDelta — incremental text chunk + StreamDone — final assembled result +""" +from __future__ import annotations + +import re +from abc import ABC, abstractmethod +from dataclasses import dataclass, field +from typing import AsyncGenerator + + +# ── Normalized event types ──────────────────────────────────────────────────── + +@dataclass +class TextDelta: + """A chunk of text from a streaming response.""" + text: str + + +@dataclass +class StreamDone: + """Final event — carries the fully assembled response.""" + text: str + emotion: str + raw: str + provider: str + model: str + tool_calls: list | None = None + + +# ── Error types ─────────────────────────────────────────────────────────────── + +class RetryableError(Exception): + """ + Rate limit (429), server error (5xx), or transient network issue. + The registry will retry with exponential backoff, then try the next provider. + """ + def __init__(self, msg: str, status_code: int | None = None): + super().__init__(msg) + self.status_code = status_code + + +class NonRetryableError(Exception): + """ + Auth failure (401) or bad request (400). + - 401: key is wrong for this provider → skip to next provider. + - 400: our message is malformed → no provider will fix it; abort immediately. + """ + def __init__(self, msg: str, status_code: int | None = None): + super().__init__(msg) + self.status_code = status_code + + +# ── Helpers ─────────────────────────────────────────────────────────────────── + +def parse_emotion(raw: str) -> tuple[str, str]: + """ + Extract the leading [emotion, tag] from a raw LLM response. + Returns (emotion_string, cleaned_text). + """ + stripped = raw.strip() + match = re.match(r'^\[(.*?)\]', stripped) + if match: + return match.group(1), stripped[match.end():].strip() + return "neutral", stripped + + +def make_result( + raw: str, + provider: str, + model: str, + tool_calls: list | None = None, +) -> dict: + """Build the normalized result dict that the rest of the system expects.""" + emotion, text = parse_emotion(raw) + return { + "text": text, + "emotion": emotion, + "raw": raw, + "provider": provider, + "model": model, + "tool_calls": tool_calls or None, + } + + +# ── Abstract base ───────────────────────────────────────────────────────────── + +class LLMProvider(ABC): + """ + All providers implement this interface. + `generate` is the blocking path used by the brain pipeline. + `stream` is the async-generator path for future streaming endpoints. + + Tool definitions follow the OpenAI schema: + [{ "type": "function", "function": { "name": ..., "description": ..., + "parameters": {...} } }] + Providers that use a different native schema (e.g. Anthropic) translate + internally — callers always pass the OpenAI format. + """ + + name: str = "base" + + @abstractmethod + def generate( + self, + messages: list[dict], + *, + model: str, + temperature: float, + max_tokens: int, + tools: list[dict] | None = None, + ) -> dict: + """ + Blocking generation. Returns the normalized result dict: + { text, emotion, raw, provider, model, tool_calls } + """ + + @abstractmethod + async def stream( + self, + messages: list[dict], + *, + model: str, + temperature: float, + max_tokens: int, + tools: list[dict] | None = None, + ) -> AsyncGenerator[TextDelta | StreamDone, None]: + """ + Streaming generation. + Yields TextDelta chunks, ends with one StreamDone. + """ + yield # type: ignore diff --git a/ai-service/app/services/providers/openai_compat.py b/ai-service/app/services/providers/openai_compat.py new file mode 100644 index 0000000..35c005e --- /dev/null +++ b/ai-service/app/services/providers/openai_compat.py @@ -0,0 +1,194 @@ +""" +OpenAI-compatible provider. + +Covers every backend that speaks the OpenAI chat-completions API: + • OpenRouter (base_url = https://openrouter.ai/api/v1) + • OpenAI (base_url = None → default) + • Groq (base_url = https://api.groq.com/openai/v1) + • Ollama (base_url = http://localhost:11434/v1) + +Tool call normalization: + OpenAI sends tool_calls on the response message. + Each tool call has: id, function.name, function.arguments (JSON string). + We surface these as [{ "id", "name", "arguments" }] in the result dict. +""" +from __future__ import annotations + +import logging +from typing import AsyncGenerator + +import openai as _openai_lib +from openai import OpenAI, AsyncOpenAI + +from app.services.providers.base import LLMProvider, TextDelta, StreamDone, make_result, RetryableError, NonRetryableError + +logger = logging.getLogger(__name__) + +_OPENROUTER_HEADERS = { + "HTTP-Referer": "http://localhost:5173", + "X-Title": "Project AURA", +} + + +def _extract_tool_calls(response_message) -> list | None: + """Normalize OpenAI tool_calls to our common schema.""" + raw_calls = getattr(response_message, "tool_calls", None) + if not raw_calls: + return None + return [ + { + "id": tc.id, + "name": tc.function.name, + "arguments": tc.function.arguments, # already a JSON string + } + for tc in raw_calls + ] + + +class OpenAICompatProvider(LLMProvider): + + def __init__( + self, + api_key: str, + base_url: str | None = None, + extra_headers: dict | None = None, + provider_name: str = "openai", + ): + self.name = provider_name + self._extra_headers = extra_headers or {} + self._client = OpenAI(api_key=api_key, base_url=base_url) + self._async_client = AsyncOpenAI(api_key=api_key, base_url=base_url) + logger.info(f"[{self.name}] provider ready (base_url={base_url or 'default'})") + + # ── Blocking ────────────────────────────────────────────────────────────── + + def generate( + self, + messages: list[dict], + *, + model: str, + temperature: float, + max_tokens: int, + tools: list[dict] | None = None, + ) -> dict: + kwargs = dict( + model=model, + messages=messages, + temperature=temperature, + max_tokens=max_tokens, + extra_headers=self._extra_headers, + ) + if tools: + kwargs["tools"] = tools + kwargs["tool_choice"] = "auto" + + try: + response = self._client.chat.completions.create(**kwargs) + msg = response.choices[0].message + raw = msg.content or "" + tool_calls = _extract_tool_calls(msg) + + # When the model only returns a tool call (no text), give a placeholder + # so make_result always has something to parse. + if tool_calls and not raw: + raw = f"[tool_call: {tool_calls[0]['name']}]" + + return make_result(raw, self.name, model, tool_calls=tool_calls) + + except _openai_lib.RateLimitError as e: + raise RetryableError(str(e), status_code=429) + except (_openai_lib.APIConnectionError, _openai_lib.APITimeoutError) as e: + raise RetryableError(str(e)) + except _openai_lib.InternalServerError as e: + raise RetryableError(str(e), status_code=getattr(e, "status_code", 500)) + except _openai_lib.AuthenticationError as e: + raise NonRetryableError(str(e), status_code=401) + except (_openai_lib.BadRequestError, _openai_lib.NotFoundError) as e: + raise NonRetryableError(str(e), status_code=getattr(e, "status_code", 400)) + except Exception as e: + # Unknown error — treat as retryable so the registry can decide + raise RetryableError(str(e)) + + # ── Streaming ───────────────────────────────────────────────────────────── + + async def stream( + self, + messages: list[dict], + *, + model: str, + temperature: float, + max_tokens: int, + tools: list[dict] | None = None, + ) -> AsyncGenerator[TextDelta | StreamDone, None]: + assembled = "" + kwargs = dict( + model=model, + messages=messages, + temperature=temperature, + max_tokens=max_tokens, + extra_headers=self._extra_headers, + ) + if tools: + kwargs["tools"] = tools + kwargs["tool_choice"] = "auto" + + try: + response = await self._async_client.chat.completions.create(**kwargs, stream=True) + async for chunk in response: + if not chunk.choices: + continue + + delta = chunk.choices[0].delta + + # Handle reasoning tokens (DeepSeek R1 / OpenRouter) + # These are internal thoughts we don't want to show the user + reasoning = getattr(delta, "reasoning_content", None) or getattr(delta, "reasoning", None) + if reasoning: + continue + + if delta.content: + txt = delta.content + assembled += txt + yield TextDelta(text=txt) + except Exception as e: + logger.error(f"[{self.name}] stream error: {e}") + + result = make_result(assembled, self.name, model) + yield StreamDone( + text=result["text"], + emotion=result["emotion"], + raw=assembled, + provider=self.name, + model=model, + ) + + +# ── Named constructors ──────────────────────────────────────────────────────── + +def openrouter_provider(api_key: str) -> OpenAICompatProvider: + return OpenAICompatProvider( + api_key=api_key, + base_url="https://openrouter.ai/api/v1", + extra_headers=_OPENROUTER_HEADERS, + provider_name="openrouter", + ) + + +def openai_provider(api_key: str) -> OpenAICompatProvider: + return OpenAICompatProvider(api_key=api_key, base_url=None, provider_name="openai") + + +def groq_provider(api_key: str) -> OpenAICompatProvider: + return OpenAICompatProvider( + api_key=api_key, + base_url="https://api.groq.com/openai/v1", + provider_name="groq", + ) + + +def ollama_provider(base_url: str = "http://localhost:11434") -> OpenAICompatProvider: + return OpenAICompatProvider( + api_key="ollama", + base_url=f"{base_url.rstrip('/')}/v1", + provider_name="ollama", + ) diff --git a/ai-service/app/services/providers/registry.py b/ai-service/app/services/providers/registry.py new file mode 100644 index 0000000..8d51c79 --- /dev/null +++ b/ai-service/app/services/providers/registry.py @@ -0,0 +1,297 @@ +""" +Provider Registry — the single entry point for LLM calls. + +Responsibilities: + 1. Read active model / provider / temperature / max_tokens from settings_service + 2. Read the matching API key from settings_service (DB) or fall back to env vars + 3. Instantiate the right LLMProvider + 4. Call provider.generate() and return the normalized result + +Provider inference (when `provider` field is "auto" or missing): + model starts with "claude-" → anthropic + model contains "/" → openrouter (e.g. "deepseek/deepseek-v3.2") + model starts with gpt-/o1-/o3- → openai + model starts with llama/mistral… → ollama + explicit groq_ prefix → groq + fallback → openrouter +""" +from __future__ import annotations + +import logging +import asyncio +import os +import random +import time + +from app.services.providers.base import LLMProvider, RetryableError, NonRetryableError + +logger = logging.getLogger(__name__) + +_MAX_ATTEMPTS = 3 # attempts per provider before giving up on it +_BACKOFF_BASE = 1.0 # seconds; delay = base * 2^attempt + jitter + +# Ordered fallback chain — first provider with an available key wins +_FALLBACK_ORDER = ["openrouter", "openai", "groq", "ollama"] + +# ── Provider inference ──────────────────────────────────────────────────────── + +_OPENAI_PREFIXES = ("gpt-", "o1-", "o3-", "text-davinci", "babbage", "ada") +_OLLAMA_PREFIXES = ("llama", "mistral", "gemma", "phi", "qwen", "codellama", "deepseek-r1") + + +def infer_provider(model: str) -> str: + m = model.lower() + if m.startswith("claude-"): + return "anthropic" + if "/" in m: + return "openrouter" + if any(m.startswith(p) for p in _OPENAI_PREFIXES): + return "openai" + if any(m.startswith(p) for p in _OLLAMA_PREFIXES): + return "ollama" + return "openrouter" + + +# ── Registry ────────────────────────────────────────────────────────────────── + +class ProviderRegistry: + """ + Resolves and calls the correct LLM provider on every request. + Providers are constructed lazily and cached by (provider_name, key_hash). + """ + + def __init__(self): + self._cache: dict[str, LLMProvider] = {} + + # ── Public API ──────────────────────────────────────────────────────────── + + async def generate( + self, + messages: list[dict], + *, + model: str | None = None, + temperature: float | None = None, + max_tokens: int | None = None, + tools: list[dict] | None = None, + ) -> dict: + # Lazy import avoids circular imports at module load time + from app.services.settings_service import settings_service + + db = settings_service.get_settings() + keys = settings_service.get_api_keys() + + actual_model = model or db.get("model") or "deepseek/deepseek-v3.2" + actual_temp = temperature if temperature is not None else float(db.get("temperature", 0.8)) + actual_max_tokens = max_tokens or int(db.get("max_tokens", 300)) + + configured_provider = (db.get("provider") or "auto").lower() + primary = ( + configured_provider + if configured_provider != "auto" + else infer_provider(actual_model) + ) + + # Build candidate list: primary first, then any fallback with an available key + candidates = [primary] + [ + p for p in _FALLBACK_ORDER + if p != primary and (p == "ollama" or self._pick_key(p, keys)) + ] + + call_kwargs = dict( + model=actual_model, + temperature=actual_temp, + max_tokens=actual_max_tokens, + tools=tools, + ) + + last_error: Exception | None = None + + for provider_name in candidates: + try: + provider = self._get_provider(provider_name, keys) + except (ValueError, RuntimeError) as e: + # Missing key or missing package — skip silently + logger.debug(f"[registry] skipping {provider_name}: {e}") + last_error = e + continue + + logger.info(f"[registry] trying {provider_name} / {actual_model}") + try: + result = await self._call_with_retry(provider, messages, **call_kwargs) + if provider_name != primary: + logger.warning(f"[registry] fell back to {provider_name} (primary={primary} failed)") + return result + + except NonRetryableError as e: + last_error = e + if e.status_code == 400: + # Bad request — our message is wrong, no other provider will help + logger.error(f"[registry] bad request ({provider_name}): {e}") + break + # 401 auth failure — key is bad for this provider, try next + logger.warning(f"[registry] auth failed for {provider_name} (HTTP {e.status_code}), trying next") + continue + + except RetryableError as e: + # All retries for this provider exhausted — try next + logger.warning(f"[registry] {provider_name} exhausted retries: {e}") + last_error = e + continue + + logger.error(f"[registry] all providers failed. Last: {last_error}") + return { + "text": "I seem to be having trouble connecting right now. Please try again in a moment.", + "emotion": "confused", + "raw": "", + "provider": primary, + "model": actual_model, + "tool_calls": None, + } + + async def stream( + self, + messages: list[dict], + *, + model: str | None = None, + temperature: float | None = None, + max_tokens: int | None = None, + tools: list[dict] | None = None, + ) -> AsyncGenerator[TextDelta | StreamDone, None]: + from app.services.settings_service import settings_service + + db = settings_service.get_settings() + keys = settings_service.get_api_keys() + + actual_model = model or db.get("model") or "deepseek/deepseek-v3.2" + actual_temp = temperature if temperature is not None else float(db.get("temperature", 0.8)) + actual_max_tokens = max_tokens or int(db.get("max_tokens", 300)) + + configured_provider = (db.get("provider") or "auto").lower() + primary = ( + configured_provider + if configured_provider != "auto" + else infer_provider(actual_model) + ) + + candidates = [primary] + [ + p for p in _FALLBACK_ORDER + if p != primary and (p == "ollama" or self._pick_key(p, keys)) + ] + + # Note: Fallbacks for streaming are harder to implement gracefully mid-stream. + # We try the primary and first available. + for provider_name in candidates: + try: + provider = self._get_provider(provider_name, keys) + logger.info(f"[registry] streaming {provider_name} / {actual_model}") + + async for chunk in provider.stream( + messages, + model=actual_model, + temperature=actual_temp, + max_tokens=actual_max_tokens, + tools=tools + ): + yield chunk + return + except Exception as e: + logger.warning(f"[registry] stream failed for {provider_name}: {e}") + continue + + async def _call_with_retry(self, provider: LLMProvider, messages: list[dict], **kwargs) -> dict: + """ + Call provider.generate() with exponential backoff on RetryableError. + Raises RetryableError if all attempts fail. + Raises NonRetryableError immediately (no retry). + """ + for attempt in range(_MAX_ATTEMPTS): + try: + # Use thread pool for sync generate calls to keep registry async-friendly + return await asyncio.to_thread(provider.generate, messages, **kwargs) + except NonRetryableError: + raise # propagate immediately + except RetryableError as e: + if attempt == _MAX_ATTEMPTS - 1: + raise # all attempts exhausted + delay = _BACKOFF_BASE * (2 ** attempt) + random.uniform(0.0, 0.5) + logger.warning( + f"[{provider.name}] attempt {attempt + 1}/{_MAX_ATTEMPTS} failed " + f"(status={e.status_code}): {e} — retrying in {delay:.1f}s" + ) + await asyncio.sleep(delay) + + # ── Provider instantiation ──────────────────────────────────────────────── + + def _get_provider(self, provider_name: str, keys: dict) -> LLMProvider: + # Cache key: provider name + first 8 chars of api key (detects key rotation) + raw_key = self._pick_key(provider_name, keys) + cache_key = f"{provider_name}:{(raw_key or '')[:8]}" + + if cache_key not in self._cache: + self._cache[cache_key] = self._build(provider_name, keys) + + return self._cache[cache_key] + + def _build(self, provider_name: str, keys: dict) -> LLMProvider: + from app.services.providers.openai_compat import ( + openrouter_provider, openai_provider, groq_provider, ollama_provider, + ) + from app.services.providers.anthropic_provider import AnthropicProvider + + if provider_name == "anthropic": + key = self._pick_key("anthropic", keys) + if not key: + raise ValueError("Anthropic API key not set. Add it via the dashboard or ANTHROPIC_API_KEY env var.") + return AnthropicProvider(api_key=key) + + if provider_name == "groq": + key = self._pick_key("groq", keys) + if not key: + raise ValueError("Groq API key not set. Add it via the dashboard or GROQ_API_KEY env var.") + return groq_provider(api_key=key) + + if provider_name == "openai": + key = self._pick_key("openai", keys) + if not key: + raise ValueError("OpenAI API key not set. Add it via the dashboard or OPENAI_API_KEY env var.") + return openai_provider(api_key=key) + + if provider_name == "ollama": + ollama_url = ( + (keys.get("ollama_base_url") or "").strip() + or os.getenv("OLLAMA_BASE_URL", "http://localhost:11434") + ) + return ollama_provider(base_url=ollama_url) + + # Default: openrouter + key = self._pick_key("openrouter", keys) + if not key: + raise ValueError("OpenRouter API key not set. Add it via the dashboard or OPENROUTER_API_KEY env var.") + return openrouter_provider(api_key=key) + + @staticmethod + def _pick_key(provider_name: str, keys: dict) -> str | None: + """DB key takes precedence over env var.""" + env_map = { + "openrouter": "OPENROUTER_API_KEY", + "openai": "OPENAI_API_KEY", + "anthropic": "ANTHROPIC_API_KEY", + "groq": "GROQ_API_KEY", + } + db_key_map = { + "openrouter": "openrouter_api_key", + "openai": "openrouter_api_key", # share the same field for now + "anthropic": "anthropic_api_key", + "groq": "groq_api_key", + } + + db_field = db_key_map.get(provider_name) + db_val = (keys.get(db_field) or "").strip() if db_field else "" + if db_val: + return db_val + + env_var = env_map.get(provider_name) + return os.getenv(env_var, "") if env_var else "" + + +provider_registry = ProviderRegistry() diff --git a/ai-service/app/services/settings_service.py b/ai-service/app/services/settings_service.py index 194c0f1..a56dbe9 100644 --- a/ai-service/app/services/settings_service.py +++ b/ai-service/app/services/settings_service.py @@ -1,4 +1,5 @@ import logging +import time from supabase import create_client, Client from app.core.config import settings as app_settings @@ -6,20 +7,24 @@ _DEFAULTS = { "system_prompt": None, - "model": "deepseek/deepseek-v3.2", - "temperature": 0.8, - "max_tokens": 300, - "empathy": 50, - "humor": 50, - "formality": 50, + "model": "deepseek/deepseek-v3.2", + "provider": "openrouter", + "temperature": 0.8, + "max_tokens": 300, + "empathy": 50, + "humor": 50, + "formality": 50, } _KEY_DEFAULTS = { "openrouter_api_key": None, - "deepgram_api_key": None, - "cartesia_api_key": None, - "livekit_url": None, - "livekit_api_key": None, + "deepgram_api_key": None, + "cartesia_api_key": None, + "anthropic_api_key": None, + "groq_api_key": None, + "ollama_base_url": "http://localhost:11434", + "livekit_url": None, + "livekit_api_key": None, "livekit_api_secret": None, } @@ -29,23 +34,43 @@ def __init__(self): self._client: Client | None = None if app_settings.SUPABASE_URL and app_settings.SUPABASE_SERVICE_KEY: self._client = create_client(app_settings.SUPABASE_URL, app_settings.SUPABASE_SERVICE_KEY) + + # Simple cache + self._cache = {} + self._cache_expiry = { + "settings": 0, + "keys": 0 + } + self._TTL = 60 # seconds for settings + self._KEY_TTL = 5 # seconds for keys (re-check faster) def get_settings(self) -> dict: if not self._client: return dict(_DEFAULTS) + + now = time.time() + if "settings" in self._cache and now < self._cache_expiry["settings"]: + return self._cache["settings"] + try: result = self._client.table("personality_settings").select("*").eq("id", 1).single().execute() if result.data: - return {**_DEFAULTS, **result.data} + settings = {**_DEFAULTS, **result.data} + self._cache["settings"] = settings + self._cache_expiry["settings"] = now + self._TTL + return settings except Exception as e: logger.warning(f"SettingsService.get_settings failed: {e}") - return dict(_DEFAULTS) + return self._cache.get("settings", dict(_DEFAULTS)) def update_settings(self, patch: dict) -> dict: if not self._client: return dict(_DEFAULTS) try: result = self._client.table("personality_settings").update(patch).eq("id", 1).execute() + # Invalidate cache + if "settings" in self._cache: + del self._cache["settings"] if result.data: return {**_DEFAULTS, **result.data[0]} except Exception as e: @@ -55,19 +80,30 @@ def update_settings(self, patch: dict) -> dict: def get_api_keys(self) -> dict: if not self._client: return dict(_KEY_DEFAULTS) + + now = time.time() + if "keys" in self._cache and now < self._cache_expiry["keys"]: + return self._cache["keys"] + try: result = self._client.table("api_keys").select("*").eq("id", 1).single().execute() if result.data: - return {**_KEY_DEFAULTS, **result.data} + keys = {**_KEY_DEFAULTS, **result.data} + self._cache["keys"] = keys + self._cache_expiry["keys"] = now + self._KEY_TTL + return keys except Exception as e: logger.warning(f"SettingsService.get_api_keys failed: {e}") - return dict(_KEY_DEFAULTS) + return self._cache.get("keys", dict(_KEY_DEFAULTS)) def update_api_keys(self, patch: dict) -> dict: if not self._client: return dict(_KEY_DEFAULTS) try: result = self._client.table("api_keys").update(patch).eq("id", 1).execute() + # Invalidate cache + if "keys" in self._cache: + del self._cache["keys"] if result.data: return {**_KEY_DEFAULTS, **result.data[0]} except Exception as e: diff --git a/ai-service/requirements.txt b/ai-service/requirements.txt index 2c42291..747f7f8 100644 --- a/ai-service/requirements.txt +++ b/ai-service/requirements.txt @@ -3,6 +3,7 @@ aiohttp==3.13.3 aiosignal==1.4.0 annotated-doc==0.0.4 annotated-types==0.7.0 +anthropic anyio==4.11.0 attrs==25.4.0 cachetools==6.2.6 diff --git a/ai-service/tests/__init__.py b/ai-service/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/ai-service/tests/conftest.py b/ai-service/tests/conftest.py new file mode 100644 index 0000000..8eb1151 --- /dev/null +++ b/ai-service/tests/conftest.py @@ -0,0 +1,71 @@ +""" +Shared pytest fixtures and env setup. +Loads the project .env so integration tests can use real API keys. +""" +import os +import sys +from pathlib import Path + +import pytest +from dotenv import load_dotenv + +# ── Add ai-service root to sys.path so `app.*` imports resolve ─────────────── +AI_SERVICE_DIR = Path(__file__).resolve().parent.parent +sys.path.insert(0, str(AI_SERVICE_DIR)) + +# ── Load .env from project root ─────────────────────────────────────────────── +PROJECT_ROOT = AI_SERVICE_DIR.parent +env_path = PROJECT_ROOT / ".env" +if not env_path.exists(): + env_path = AI_SERVICE_DIR / ".env" +load_dotenv(env_path) + + +# ── Reusable message lists ──────────────────────────────────────────────────── + +@pytest.fixture +def simple_messages(): + return [ + {"role": "system", "content": "You are a helpful assistant. Reply very briefly."}, + {"role": "user", "content": "Say exactly: [smile] Hello!"}, + ] + + +@pytest.fixture +def tool_messages(): + return [ + {"role": "system", "content": "You are a helpful assistant."}, + {"role": "user", "content": "What is the weather in Tokyo? Use the get_weather tool."}, + ] + + +@pytest.fixture +def sample_tools(): + return [ + { + "type": "function", + "function": { + "name": "get_weather", + "description": "Get current weather for a city.", + "parameters": { + "type": "object", + "properties": { + "city": {"type": "string", "description": "City name"}, + }, + "required": ["city"], + }, + }, + } + ] + + +# ── Key availability helpers (used by integration marks) ───────────────────── + +def has_openrouter_key(): + return bool(os.getenv("OPENROUTER_API_KEY", "").strip()) + +def has_openai_key(): + return bool(os.getenv("OPENAI_API_KEY", "").strip()) + +def has_anthropic_key(): + return bool(os.getenv("ANTHROPIC_API_KEY", "").strip()) diff --git a/ai-service/tests/providers/__init__.py b/ai-service/tests/providers/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/dashboard/package.json b/dashboard/package.json index c7fdc9a..465db32 100644 --- a/dashboard/package.json +++ b/dashboard/package.json @@ -15,7 +15,7 @@ "@supabase/supabase-js": "^2.95.3", "@tailwindcss/vite": "^4.1.18", "livekit-client": "^2.17.1", - "pixi.js": "^6.5.10", + "pixi.js": "^8.17.1", "react": "^19.2.0", "react-dom": "^19.2.0", "react-router-dom": "^7.13.0", @@ -39,4 +39,4 @@ "overrides": { "vite": "npm:rolldown-vite@7.2.2" } -} +} \ No newline at end of file diff --git a/dashboard/src/components/ApiKeys.jsx b/dashboard/src/components/ApiKeys.jsx index 037d9b8..b2e2369 100644 --- a/dashboard/src/components/ApiKeys.jsx +++ b/dashboard/src/components/ApiKeys.jsx @@ -3,10 +3,13 @@ import { supabase } from '../lib/supabaseClient' const KEY_GROUPS = [ { - label: 'LLM Provider', + label: 'LLM Providers', icon: 'psychology', fields: [ - { key: 'openrouter_api_key', label: 'OpenRouter API Key', placeholder: 'sk-or-v1-...' }, + { key: 'openrouter_api_key', label: 'OpenRouter API Key', placeholder: 'sk-or-v1-...', hint: 'Routes to DeepSeek, GPT, Mistral, and more' }, + { key: 'anthropic_api_key', label: 'Anthropic API Key', placeholder: 'sk-ant-...', hint: 'Required for claude-* models' }, + { key: 'groq_api_key', label: 'Groq API Key', placeholder: 'gsk_...', hint: 'Fast inference for Llama / Mixtral' }, + { key: 'ollama_base_url', label: 'Ollama Base URL', placeholder: 'http://localhost:11434', hint: 'Local LLMs via Ollama', isUrl: true }, ], }, { @@ -14,13 +17,13 @@ const KEY_GROUPS = [ icon: 'mic', fields: [ { key: 'deepgram_api_key', label: 'Deepgram API Key (STT)', placeholder: 'your_deepgram_key' }, - { key: 'cartesia_api_key', label: 'Cartesia API Key (TTS)', placeholder: 'your_cartesia_key', note: 'Requires agent restart to apply' }, + { key: 'cartesia_api_key', label: 'Cartesia API Key (TTS)', placeholder: 'your_cartesia_key', note: 'Requires agent restart' }, ], }, { label: 'LiveKit', icon: 'cell_tower', - note: 'Changes require agent restart', + note: 'Requires agent restart', fields: [ { key: 'livekit_url', label: 'LiveKit URL', placeholder: 'wss://your-project.livekit.cloud' }, { key: 'livekit_api_key', label: 'LiveKit API Key', placeholder: 'API key' }, @@ -32,24 +35,16 @@ const KEY_GROUPS = [ export default function ApiKeys() { const [draft, setDraft] = useState({}) const [visible, setVisible] = useState({}) - const [saveState, setSaveState] = useState('idle') // 'idle' | 'saving' | 'saved' | 'error' + const [saveState, setSaveState] = useState('idle') const [loaded, setLoaded] = useState(false) useEffect(() => { - supabase - .from('api_keys') - .select('*') - .eq('id', 1) - .single() - .then(({ data }) => { - if (data) setDraft(data) - setLoaded(true) - }) + supabase.from('api_keys').select('*').eq('id', 1).single() + .then(({ data }) => { if (data) setDraft(data); setLoaded(true) }) }, []) - const patch = (key, value) => setDraft((d) => ({ ...d, [key]: value })) - - const toggleVisible = (key) => setVisible((v) => ({ ...v, [key]: !v[key] })) + const patch = (key, value) => setDraft(d => ({ ...d, [key]: value })) + const toggleVisible = key => setVisible(v => ({ ...v, [key]: !v[key] })) const saveKeys = async () => { setSaveState('saving') @@ -57,12 +52,7 @@ export default function ApiKeys() { const payload = { ...draft } delete payload.id payload.updated_at = new Date().toISOString() - - const { error } = await supabase - .from('api_keys') - .update(payload) - .eq('id', 1) - + const { error } = await supabase.from('api_keys').update(payload).eq('id', 1) if (error) throw error setSaveState('saved') setTimeout(() => setSaveState('idle'), 2500) @@ -73,11 +63,11 @@ export default function ApiKeys() { } } - const btnProps = { - idle: { label: 'Save API Keys', icon: 'key', cls: 'bg-primary hover:bg-primary/90 shadow-primary/20' }, - saving: { label: 'Saving...', icon: 'hourglass_top', cls: 'bg-primary/70 cursor-not-allowed' }, - saved: { label: 'Keys Saved!', icon: 'check_circle', cls: 'bg-emerald-500 shadow-emerald-200' }, - error: { label: 'Save Failed', icon: 'error', cls: 'bg-red-500 shadow-red-200' }, + const btn = { + idle: { label: 'Save API Keys', icon: 'key', cls: 'bg-primary hover:bg-primary/90 shadow-primary/20' }, + saving: { label: 'Saving...', icon: 'hourglass_top', cls: 'bg-primary/70 cursor-not-allowed' }, + saved: { label: 'Keys Saved!', icon: 'check_circle', cls: 'bg-emerald-500 shadow-emerald-200' }, + error: { label: 'Save Failed', icon: 'error', cls: 'bg-red-500 shadow-red-200' }, }[saveState] return ( @@ -90,10 +80,10 @@ export default function ApiKeys() { @@ -105,37 +95,35 @@ export default function ApiKeys() { {label} {note && ( - info - {note} + info{note} )}
{hint}
}- info - {fieldNote} + info{fn}
)}
lock
- Keys are stored in your private Supabase database. Leave a field empty to use the value from the server's .env file.
+ Stored in your private Supabase database. Leave a field empty to use the server's .env value.
- {status === 'connecting' && 'Connecting...'} - {status === 'connected' && formatTime(elapsed)} - {status === 'error' && 'Connection failed'} -
- - {/* Waveform */} - {status === 'connected' && ( -+ {status === 'connecting' && 'Establishing Connection...'} + {status === 'connected' && `Live Interaction — ${formatTime(elapsed)}`} + {status === 'error' && 'Neural Link Failed'} +
+- Your personal AI companion. Ask me anything, or start a voice call! +
+ Advanced Universal Responsive Avatar.
+ Ready for your next inquiry.
Active • High Precision Mode
++ {isCallActive ? 'Interactive Mode' : 'Ready to Assist'} +
Contextual Data Vectors
+- {new Date(f.created_at).toLocaleDateString()} • {formatSize(f.size_bytes)} -
+No Data Mapped
+