diff --git a/app/ai/voice/agents/breeze_buddy/template/types.py b/app/ai/voice/agents/breeze_buddy/template/types.py index ec681580e..70ea90459 100644 --- a/app/ai/voice/agents/breeze_buddy/template/types.py +++ b/app/ai/voice/agents/breeze_buddy/template/types.py @@ -274,6 +274,43 @@ class TTSProvider(str, Enum): GEMINI = "gemini" +class ElevenLabsVoiceModel(str, Enum): + """ElevenLabs TTS voice models.""" + + MULTILINGUAL_V2 = "eleven_multilingual_v2" + FLASH_V2_5 = "eleven_flash_v2_5" + TURBO_V2_5 = "eleven_turbo_v2_5" + MULTILINGUAL_V2_STS = "eleven_multilingual_v2_sts" + + +class ElevenLabsVoiceId(str, Enum): + """ElevenLabs TTS voice IDs.""" + + ROOHI = "Zjj2iX3aHYDcJSG4mMzk" + ANIKA = "B18ifp9INVN3SE2RoopA" + RANBIR = "2iAXJEMO2o0PqUHzvZwQ" + RAJU = "pzxut4zZz4GImZNlqQ3H" + + +class CartesiaVoiceModel(str, Enum): + """Cartesia TTS voice models.""" + + SONIC_3_STABLE = "sonic-3-stable" + SONIC_3_LATEST = "sonic-3-latest" + SONIC_2 = "sonic-2" + SONIC_TURBO = "sonic-turbo" + SONIC = "sonic" + + +class CartesiaVoiceId(str, Enum): + """Cartesia TTS voice IDs.""" + + ARUSHI = "95d51f79-c397-46f9-b49a-23763d3eaa2d" + NISHA = "0f14d8cb-f039-41fe-a813-a9b4bee7eed8" + ROHAN = "4877b818-c7fe-4c89-b1cf-eadf8e23da72" + VISHAL = "098fb15d-2597-4186-8b74-25340050b6e7" + + # Maps legacy tts_voice_name values to current provider strings for backward compat. # Used by decoder migration and runtime lead payload resolution. LEGACY_VOICE_TO_PROVIDER: Dict[str, str] = { diff --git a/app/ai/voice/llm/types.py b/app/ai/voice/llm/types.py index 4074d7c3c..e2723756e 100644 --- a/app/ai/voice/llm/types.py +++ b/app/ai/voice/llm/types.py @@ -66,6 +66,20 @@ class RealtimeConfig(BaseModel): ) +class AzureLLMModel(str, Enum): + """Azure OpenAI LLM models .""" + + GPT_4O = "gpt-4o" + GPT_4O_AUTOMATIC = "gpt-4o-automatic" + + +class GoogleVertexModel(str, Enum): + """Google Vertex AI models (Gemini and Claude).""" + + GEMINI_2_0_FLASH = "gemini-2.0-flash" + CLAUDE_3_5_SONNET = "claude-3-5-sonnet" + + class LLMSdk(str, Enum): """SDK used for LLM communication. diff --git a/app/api/routers/breeze_buddy/playground/handlers.py b/app/api/routers/breeze_buddy/playground/handlers.py index 282d40ca7..75511cc13 100644 --- a/app/api/routers/breeze_buddy/playground/handlers.py +++ b/app/api/routers/breeze_buddy/playground/handlers.py @@ -4,6 +4,10 @@ from app.ai.voice.agents.breeze_buddy.template.types import ( BackgroundSoundFile, + CartesiaVoiceId, + CartesiaVoiceModel, + ElevenLabsVoiceId, + ElevenLabsVoiceModel, InterruptionMode, KeywordMatchType, NoiseFilterType, @@ -16,8 +20,10 @@ SHORT_TO_FULL_LANGUAGE_CODE, ) from app.ai.voice.llm.types import ( + AzureLLMModel, AzureLLMPlaygroundConfig, AzureThinkingPlaygroundConfig, + GoogleVertexModel, LLMProvider, LLMSdk, VertexClaudeThinkingPlaygroundConfig, @@ -120,10 +126,49 @@ def _voice_config_fields(model_class) -> list: ), } + # TTS voice models per provider (label derived from value) + tts_voice_models = { + TTSProvider.ELEVENLABS.value: [ + { + "value": m.value, + "label": m.value.replace("eleven_", "").replace("_", " ").title(), + } + for m in ElevenLabsVoiceModel + ], + TTSProvider.CARTESIA.value: [ + {"value": m.value, "label": m.value.replace("-", " ").title()} + for m in CartesiaVoiceModel + ], + TTSProvider.SARVAM.value: [], + } + + # TTS voice IDs per provider (label from enum name since values are UUIDs) + tts_voice_ids = { + TTSProvider.ELEVENLABS.value: [ + {"value": v.value, "label": v.name.title()} for v in ElevenLabsVoiceId + ], + TTSProvider.CARTESIA.value: [ + {"value": v.value, "label": v.name.title()} for v in CartesiaVoiceId + ], + TTSProvider.SARVAM.value: [], + } + + # LLM models per provider (keyed by provider or provider__sdk) + llm_models = { + LLMProvider.AZURE.value: [ + {"value": m.value, "label": m.value} for m in AzureLLMModel + ], + LLMProvider.GOOGLE_VERTEX.value: [ + {"value": m.value, "label": m.value} for m in GoogleVertexModel + ], + } + return { "stt_providers": stt_providers, "tts_providers": tts_providers, "tts_configuration_fields": tts_configuration_fields, + "tts_voice_models": tts_voice_models, + "tts_voice_ids": tts_voice_ids, "stt_languages": stt_languages, "background_sounds": background_sounds, "noise_filter_types": noise_filter_types, @@ -133,4 +178,5 @@ def _voice_config_fields(model_class) -> list: "llm_sdks": llm_sdks, "llm_fields": llm_fields, "llm_thinking_fields": llm_thinking_fields, + "llm_models": llm_models, }