Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .env.example
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,11 @@
# OpenRouter API Key - Get yours at https://openrouter.ai/keys
OPENROUTER_API_KEY=your_openrouter_key_here

# Optional: Direct provider keys (used when provider != openrouter)
ANTHROPIC_API_KEY= # Required for claude-* models
GROQ_API_KEY= # Required for Groq provider (fast Llama/Mixtral)
OLLAMA_BASE_URL=http://localhost:11434 # Local Ollama endpoint

# --- VOICE CONFIGURATION (LIVEKIT AGENTS) ---
# Deepgram API Key (STT) - Get yours at https://console.deepgram.com/
DEEPGRAM_API_KEY=your_deepgram_key_here
Expand Down
110 changes: 96 additions & 14 deletions ai-service/app/api/v1/chat.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
import re
import json
import logging

import asyncio
from fastapi import APIRouter, HTTPException
from fastapi.responses import StreamingResponse
from app.services.memory_service import memory_service
from app.models.chat import ChatRequest, ChatResponse
from app.services.brain.graph import brain
Expand All @@ -10,7 +11,7 @@
router = APIRouter()
logger = logging.getLogger(__name__)

@router.post("", response_model=ChatResponse)
@router.post("")
async def chat(request: ChatRequest):
# Run Graph
try:
Expand All @@ -24,16 +25,98 @@ async def chat(request: ChatRequest):
"messages": [HumanMessage(content=request.message)],
"emotion": "neutral",
"conversation_id": conversation_id,
"identity": request.identity or "anonymous",
"stream": request.stream
}

config = {"configurable": {"thread_id": conversation_id}}
result = brain.invoke(initial_state, config=config)

if request.stream:
async def event_generator():
# 1. Start with emotion detection (sequential but fast)
try:
from app.services.brain.nodes.emotion import detect_emotion
emotion_res = await detect_emotion(initial_state)
detected_emotion = emotion_res.get("emotion", "neutral")
yield f"data: {json.dumps({'emotion': detected_emotion})}\n\n"
except Exception as ex:
logger.warning(f"Emotion detection failed: {ex}")
detected_emotion = "neutral"

# 2. Setup the full context for generation
from app.services.brain.nodes.generate import session_history_window
from app.services.llm import llm_service
from app.services.persona import persona_engine
from app.services.settings_service import settings_service
from datetime import datetime
from uuid import UUID

# Fetch context
user_msg = request.message
history_model, memories, facts = await asyncio.gather(
memory_service.get_history(UUID(conversation_id), session_history_window),
memory_service.search(query=user_msg, limit=3),
memory_service.get_long_term_memories(identity=request.identity or "anonymous", limit=5),
)

# Build Persona
db_settings = settings_service.get_settings()
custom_sys = (db_settings.get("system_prompt") or "").strip()
persona = custom_sys if custom_sys else persona_engine.get_persona()
time_str = datetime.now().strftime("%Y-%m-%d %H:%M:%S")

system_content = (
"You are AURA (Advanced Universal Responsive Avatar), steward of the ASE Lab.\n\n"
f"{persona}\n\n"
"IMPORTANT: Do NOT include bracketed emotions like [happy] or [sad] in your response content. "
"I have already detected your emotion separately.\n\n"
f"**Context:**\n- Current Time: {time_str}"
)
if facts: system_content += f"\nWhat I know about you:\n{facts}\n"
if memories:
memory_block = "\n".join(f"- {m}" for m in memories)
system_content += f"\nRelevant past snippets:\n{memory_block}\n"

messages_format = [{"role":"system", "content":system_content}] + history_model + [{"role":"user", "content":user_msg}]

import re
full_text = ""
# 3. Stream from the registry directly
from app.services.providers.base import TextDelta
async for chunk in llm_service.stream(messages_format):
# Only yield incremental deltas to the dashboard
if isinstance(chunk, TextDelta):
txt = chunk.text
full_text += txt
yield f"data: {json.dumps({'text': txt})}\n\n"
# StreamDone is handled silently for background persistence below

# 4. Final sync/persistence - SCRUBBED
scrubbed_final = re.sub(r'\[.*?\]', '', full_text).strip()
asyncio.create_task(memory_service.add_interaction(
conversation_id=UUID(conversation_id),
user_text=user_msg,
assistant_text=scrubbed_final,
user_emotion=detected_emotion,
assistant_emotion="neutral"
))
asyncio.create_task(memory_service.store(
text=f"User: {user_msg} \n AURA: {scrubbed_final}",
metadata={"conversation_id": str(conversation_id)}
))

yield "data: [DONE]\n\n"

return StreamingResponse(event_generator(), media_type="text/event-stream")

# Non-streaming fallback
result = await brain.ainvoke(initial_state, config=config)

# Extract response
last_msg = result["messages"][-1].content
emotion = result.get("emotion", "neutral")

# Look for tool calls in the last turn
# Look for tool calls
tools_used = []
for msg in result["messages"]:
if hasattr(msg, "tool_calls") and msg.tool_calls:
Expand All @@ -43,22 +126,21 @@ async def chat(request: ChatRequest):
"args": tc.get("args", {})
})

# Clean tags
text = last_msg
if text.startswith("["):
match = re.match(r'^\[(.*?)\]', text)
if match:
text = text[match.end():].strip()

return ChatResponse(
text=text,
text=last_msg,
emotion=emotion,
conversation_id=conversation_id,
tools_used=tools_used if tools_used else None
)

except Exception as e:
logger.error(f"Chat error: {e}")
logger.error(f"Chat error: {e}", exc_info=True)
# If it was a stream request, we should yield an error event
if request.stream:
return StreamingResponse(
iter([f"data: {json.dumps({'text': f'Brain Freeze: {str(e)}', 'emotion': 'confused'})}\n\n"]),
media_type="text/event-stream"
)

return ChatResponse(
text=f"Brain Freeze: {str(e)}",
Expand Down
37 changes: 25 additions & 12 deletions ai-service/app/api/v1/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,23 +4,29 @@

router = APIRouter()

PROVIDERS = ["openrouter", "openai", "anthropic", "groq", "ollama"]


class SettingsPatch(BaseModel):
system_prompt: str | None = None
model: str | None = None
temperature: float | None = None
max_tokens: int | None = None
empathy: int | None = None
humor: int | None = None
formality: int | None = None
model: str | None = None
provider: str | None = None
temperature: float | None = None
max_tokens: int | None = None
empathy: int | None = None
humor: int | None = None
formality: int | None = None


class ApiKeysPatch(BaseModel):
openrouter_api_key: str | None = None
deepgram_api_key: str | None = None
cartesia_api_key: str | None = None
livekit_url: str | None = None
livekit_api_key: str | None = None
deepgram_api_key: str | None = None
cartesia_api_key: str | None = None
anthropic_api_key: str | None = None
groq_api_key: str | None = None
ollama_base_url: str | None = None
livekit_url: str | None = None
livekit_api_key: str | None = None
livekit_api_secret: str | None = None


Expand All @@ -35,11 +41,18 @@ def update_settings(patch: SettingsPatch):
return settings_service.update_settings(data)


@router.get("/providers")
def list_providers():
"""Return available provider names for the UI dropdown."""
return {"providers": PROVIDERS}


@router.get("/keys")
def get_api_keys():
keys = settings_service.get_api_keys()
# Mask values in response — only reveal whether each key is set
return {k: ("••••••••" if v else None) for k, v in keys.items() if k != "id"}
# Return masked values — just signals whether the key is configured
return {k: ("set" if (v and str(v).strip()) else None)
for k, v in keys.items() if k != "id"}


@router.put("/keys")
Expand Down
3 changes: 3 additions & 0 deletions ai-service/app/core/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,9 @@ class Settings(BaseSettings):
LLM_API_KEY: str | None = None
OPENAI_API_KEY: str | None = None
OPENROUTER_API_KEY: str | None = None
ANTHROPIC_API_KEY: str | None = None
GROQ_API_KEY: str | None = None
OLLAMA_BASE_URL: str = "http://localhost:11434"
OPENAI_MODEL: str = "gpt-3.5-turbo"

# Supabase
Expand Down
2 changes: 2 additions & 0 deletions ai-service/app/models/chat.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
class ChatRequest(BaseModel):
message: str
conversation_id: Optional[str] = None
identity: Optional[str] = None
stream: bool = False

class ChatResponse(BaseModel):
text: str
Expand Down
6 changes: 3 additions & 3 deletions ai-service/app/services/brain/nodes/emotion.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from app.services.llm import llm_service

# Node to detect emotion
def detect_emotion(state: BrainState) -> dict:
async def detect_emotion(state: BrainState) -> dict:
# Get last user message
last_message = state["messages"][-1].content

Expand All @@ -13,7 +13,7 @@ def detect_emotion(state: BrainState) -> dict:
"""

# Call LLM to detect emotion
emotion = llm_service.generate([{"role": "system", "content": prompt}])
response = await llm_service.generate([{"role": "system", "content": prompt}])

# Return detected emotion
return {"emotion": emotion["emotion"].strip().lower()}
return {"emotion": response.get("emotion", "neutral").strip().lower()}
81 changes: 62 additions & 19 deletions ai-service/app/services/brain/nodes/generate.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,9 @@

session_history_window = 9999

def generate_response(state: BrainState) -> dict:
with concurrent.futures.ThreadPoolExecutor() as pool:
future = pool.submit(asyncio.run, generate(state))
return future.result()
async def generate_response(state: BrainState) -> dict:
"""Async wrapper for the generation node."""
return await generate(state)


# Node to generate response based on persona, conversation history and detected emotion (convesation history not being tested yet)
Expand Down Expand Up @@ -45,46 +44,90 @@ async def generate(state: BrainState) -> dict:
else:
user_message = ""

# Load History
history_model, memories = await asyncio.gather(
# Load History & Long-term memories
history_model, memories, facts = await asyncio.gather(
memory_service.get_history(conversation_id, session_history_window),
memory_service.search(query=user_message, limit=3),
memory_service.get_long_term_memories(identity=state.get("identity", "anonymous"), limit=5),
)

history = history_model

# System Prompt
system_message = prompter.build("", context=None)[0]
# Save User message IMMEDIATELY to DB so it persists even if AI fails or disconnects
await memory_service.add_interaction(
conversation_id=conversation_id,
user_text=user_message,
assistant_text=None, # Update later
user_emotion=detected_emotion,
assistant_emotion=None
)

if memories:
memory_block = "\n".join(f"-{message}" for message in memories)
system_message = {
"role" : "system",
"content": (system_message["content"] + f"Ingatan sebelumnya: \n {memory_block}")
}
# System Prompt (Pulling from DB via settings_service)
from app.services.settings_service import settings_service
db_settings = settings_service.get_settings()
custom_sys = (db_settings.get("system_prompt") or "").strip()

from app.services.persona import persona_engine
persona = custom_sys if custom_sys else persona_engine.get_persona()

# Add system prompt with persona and current time
from datetime import datetime
time_str = datetime.now().strftime("%Y-%m-%d %H:%M:%S")

system_content = (
"You are AURA (Advanced Universal Responsive Avatar), "
"the spirited AI steward of the ASE Lab.\n\n"
f"{persona}\n\n"
f"**Context:**\n- Current Time: {time_str}"
)

# Combine RAG (memories) and LTS (facts)
combined_memory = ""
if facts:
combined_memory += f"\nWhat I know about you:\n{facts}\n"
if memories:
memory_block = "\n".join(f"- {message}" for message in memories)
combined_memory += f"\nRelevant past snippets:\n{memory_block}\n"

if combined_memory:
system_content += f"\n\n**Memory Retrieval:**{combined_memory}"

system_message = {"role": "system", "content": system_content}

# Build payload
messages_format = [system_message] + history + current_message

# Check for stream request
is_stream = state.get("stream", False)

if is_stream:
# For streaming, we yield chunks.
# But this is a node, so we return the final state but can use callbacks?
# Actually, chat.py will call brain.astream().
# We handle the stream here if we want to return the stream object,
# but LangGraph nodes should return the update.
# So we update chat.py to use a different strategy.
pass

# Generate response from LLM
response = llm_service.generate(messages_format)
response = await llm_service.generate(messages_format)
text = response.get("text", "")
emotion = response.get("emotion", "neutral")

await asyncio.gather(
# Complete the interaction in DB
memory_service.add_interaction(
conversation_id=conversation_id,
user_text=user_message,
assistant_text=response["text"],
assistant_text=text,
user_emotion=detected_emotion,
assistant_emotion=emotion
),

memory_service.store(
text=f"User: {user_message} \n AURA: {response['text']}",
text=f"User: {user_message} \n AURA: {text}",
metadata={"conversation_id": str(conversation_id)},
),
)

# Return response
return {"messages": [AIMessage(content=response["text"])], "emotion": response["emotion"]}
return {"messages": [AIMessage(content=text)], "emotion": emotion}
3 changes: 2 additions & 1 deletion ai-service/app/services/brain/state.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,4 +7,5 @@
class BrainState(TypedDict):
messages: Annotated[List[BaseMessage], operator.add]
emotion: str
conversation_id: str
conversation_id: str
identity: str
Loading