Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 37 additions & 0 deletions app/ai/voice/agents/breeze_buddy/template/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -266,6 +266,43 @@ class TTSProvider(str, Enum):
SARVAM = "sarvam"


class ElevenLabsVoiceModel(str, Enum):
"""ElevenLabs TTS voice models."""

MULTILINGUAL_V2 = "eleven_multilingual_v2"
FLASH_V2_5 = "eleven_flash_v2_5"
TURBO_V2_5 = "eleven_turbo_v2_5"
MULTILINGUAL_V2_STS = "eleven_multilingual_v2_sts"


class ElevenLabsVoiceId(str, Enum):
"""ElevenLabs TTS voice IDs."""

ROOHI = "Zjj2iX3aHYDcJSG4mMzk"
ANIKA = "B18ifp9INVN3SE2RoopA"
RANBIR = "2iAXJEMO2o0PqUHzvZwQ"
RAJU = "pzxut4zZz4GImZNlqQ3H"


class CartesiaVoiceModel(str, Enum):
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@kanakagrawal-crypto When models change we need to again change these types, can you check if these providers have any api exposed which lists the models that they have ?

Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

okay for now

"""Cartesia TTS voice models."""

SONIC_3_STABLE = "sonic-3-stable"
SONIC_3_LATEST = "sonic-3-latest"
SONIC_2 = "sonic-2"
SONIC_TURBO = "sonic-turbo"
SONIC = "sonic"


class CartesiaVoiceId(str, Enum):
"""Cartesia TTS voice IDs."""

ARUSHI = "95d51f79-c397-46f9-b49a-23763d3eaa2d"
NISHA = "0f14d8cb-f039-41fe-a813-a9b4bee7eed8"
ROHAN = "4877b818-c7fe-4c89-b1cf-eadf8e23da72"
VISHAL = "098fb15d-2597-4186-8b74-25340050b6e7"


# Maps legacy tts_voice_name values to current provider strings for backward compat.
# Used by decoder migration and runtime lead payload resolution.
LEGACY_VOICE_TO_PROVIDER: Dict[str, str] = {
Expand Down
14 changes: 14 additions & 0 deletions app/ai/voice/llm/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,20 @@ class LLMProvider(str, Enum):
GOOGLE_VERTEX = "google_vertex"


class AzureLLMModel(str, Enum):
"""Azure OpenAI LLM models ."""
Comment thread
kanakagrawal-crypto marked this conversation as resolved.

GPT_4O = "gpt-4o"
GPT_4O_AUTOMATIC = "gpt-4o-automatic"


class GoogleVertexModel(str, Enum):
"""Google Vertex AI models (Gemini and Claude)."""

GEMINI_2_0_FLASH = "gemini-2.0-flash"
CLAUDE_3_5_SONNET = "claude-3-5-sonnet"


class LLMSdk(str, Enum):
"""SDK used for LLM communication.

Expand Down
46 changes: 46 additions & 0 deletions app/api/routers/breeze_buddy/playground/handlers.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,10 @@

from app.ai.voice.agents.breeze_buddy.template.types import (
BackgroundSoundFile,
CartesiaVoiceId,
CartesiaVoiceModel,
ElevenLabsVoiceId,
ElevenLabsVoiceModel,
InterruptionMode,
KeywordMatchType,
NoiseFilterType,
Expand All @@ -16,8 +20,10 @@
SHORT_TO_FULL_LANGUAGE_CODE,
)
from app.ai.voice.llm.types import (
AzureLLMModel,
AzureLLMPlaygroundConfig,
AzureThinkingPlaygroundConfig,
GoogleVertexModel,
LLMProvider,
LLMSdk,
VertexClaudeThinkingPlaygroundConfig,
Expand Down Expand Up @@ -120,10 +126,49 @@ def _voice_config_fields(model_class) -> list:
),
}

# TTS voice models per provider (label derived from value)
tts_voice_models = {
TTSProvider.ELEVENLABS.value: [
{
"value": m.value,
"label": m.value.replace("eleven_", "").replace("_", " ").title(),
}
for m in ElevenLabsVoiceModel
],
TTSProvider.CARTESIA.value: [
{"value": m.value, "label": m.value.replace("-", " ").title()}
for m in CartesiaVoiceModel
],
TTSProvider.SARVAM.value: [],
}

# TTS voice IDs per provider (label from enum name since values are UUIDs)
tts_voice_ids = {
TTSProvider.ELEVENLABS.value: [
{"value": v.value, "label": v.name.title()} for v in ElevenLabsVoiceId
],
TTSProvider.CARTESIA.value: [
{"value": v.value, "label": v.name.title()} for v in CartesiaVoiceId
],
TTSProvider.SARVAM.value: [],
}
Comment thread
coderabbitai[bot] marked this conversation as resolved.

# LLM models per provider (keyed by provider or provider__sdk)
llm_models = {
LLMProvider.AZURE.value: [
{"value": m.value, "label": m.value} for m in AzureLLMModel
Comment on lines +157 to +159
Copy link

Copilot AI Apr 16, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The Azure model options here only include gpt-4o, but the Breeze Buddy Azure default is gpt-4o-automatic (see AZURE_BREEZE_BUDDY_OPENAI_MODEL default). If the playground UI relies on this list to populate a dropdown, users may be unable to select the currently configured default. Consider adding the default model string to AzureLLMModel (or generating the options from config + known models) so the options reflect what the backend actually supports.

Suggested change
llm_models = {
LLMProvider.AZURE.value: [
{"value": m.value, "label": m.value} for m in AzureLLMModel
azure_model_values = list(dict.fromkeys([*(m.value for m in AzureLLMModel), "gpt-4o-automatic"]))
llm_models = {
LLMProvider.AZURE.value: [
{"value": model_value, "label": model_value}
for model_value in azure_model_values

Copilot uses AI. Check for mistakes.
Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done

],
LLMProvider.GOOGLE_VERTEX.value: [
{"value": m.value, "label": m.value} for m in GoogleVertexModel
],
Comment on lines +156 to +163
Copy link

Copilot AI Apr 16, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

llm_models is keyed only by provider, but Vertex model choice is SDK-dependent (Gemini via Google SDK vs Claude via Anthropic SDK). As-is, the options list can surface claude-3-5-sonnet even when the user selects google_vertex + sdk=google, which will route to the Gemini build_vertex_llm path and likely fail at runtime. Consider keying Vertex model options the same way as llm_thinking_fields (e.g., google_vertex__google vs google_vertex__anthropic) and separating Gemini vs Claude model lists accordingly.

Suggested change
# LLM models per provider (keyed by provider or provider__sdk)
llm_models = {
LLMProvider.AZURE.value: [
{"value": m.value, "label": m.value} for m in AzureLLMModel
],
LLMProvider.GOOGLE_VERTEX.value: [
{"value": m.value, "label": m.value} for m in GoogleVertexModel
],
# LLM models per provider (azure) or provider__sdk (google_vertex)
llm_models = {
LLMProvider.AZURE.value: [
{"value": m.value, "label": m.value} for m in AzureLLMModel
],
f"{LLMProvider.GOOGLE_VERTEX.value}__{LLMSdk.GOOGLE.value}": [
{"value": m.value, "label": m.value}
for m in GoogleVertexModel
if not m.value.startswith("claude-")
],
f"{LLMProvider.GOOGLE_VERTEX.value}__{LLMSdk.ANTHROPIC.value}": [
{"value": m.value, "label": m.value}
for m in GoogleVertexModel
if m.value.startswith("claude-")
],

Copilot uses AI. Check for mistakes.
}

return {
"stt_providers": stt_providers,
"tts_providers": tts_providers,
"tts_configuration_fields": tts_configuration_fields,
"tts_voice_models": tts_voice_models,
"tts_voice_ids": tts_voice_ids,
"stt_languages": stt_languages,
"background_sounds": background_sounds,
"noise_filter_types": noise_filter_types,
Expand All @@ -133,4 +178,5 @@ def _voice_config_fields(model_class) -> list:
"llm_sdks": llm_sdks,
"llm_fields": llm_fields,
"llm_thinking_fields": llm_thinking_fields,
"llm_models": llm_models,
}
Loading