sayna/config.example.yaml at master · SaynaAI/sayna · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
# Sayna Configuration File Example
#
# This file demonstrates all available configuration options.
# Copy this file to config.yaml and customize as needed.
#
# Usage: sayna -c config.yaml
#
# Priority: Environment variables > YAML values > Defaults
# This means you can override any value in this file by setting
# the corresponding environment variable.

# Server configuration
server:
  host: "0.0.0.0"      # ENV: HOST
  port: 3001           # ENV: PORT

# LiveKit configuration
livekit:
  url: "ws://localhost:7880"              # ENV: LIVEKIT_URL
  public_url: "http://localhost:7880"     # ENV: LIVEKIT_PUBLIC_URL
  api_key: "your-livekit-api-key"         # ENV: LIVEKIT_API_KEY
  api_secret: "your-livekit-api-secret"   # ENV: LIVEKIT_API_SECRET

# Provider API keys
providers:
  deepgram_api_key: "your-deepgram-api-key"     # ENV: DEEPGRAM_API_KEY
  elevenlabs_api_key: "your-elevenlabs-api-key" # ENV: ELEVENLABS_API_KEY

  # Google Cloud settings (used for both Speech-to-Text and Text-to-Speech)
  # Authentication options (in order of precedence):
  # 1. Path to service account JSON file
  # 2. JSON content string (starts with '{')
  # 3. Empty string to use Application Default Credentials (ADC)
  # Note: project_id is automatically extracted from the credentials JSON
  google_credentials: ""                         # ENV: GOOGLE_APPLICATION_CREDENTIALS

  # Microsoft Azure Speech Services settings (used for both STT and TTS)
  # Get these from Azure Portal → Your Speech resource → Keys and Endpoint
  # IMPORTANT: The subscription key is tied to a specific Azure region.
  # Using a key with the wrong region will result in 401 Unauthorized errors.
  azure_speech_subscription_key: ""              # ENV: AZURE_SPEECH_SUBSCRIPTION_KEY
  # Azure region where your Speech resource was created
  # Common regions: eastus, westus, westus2, westeurope, northeurope, eastasia, southeastasia
  # Note: Both STT and TTS use the same region setting
  azure_speech_region: "eastus"                  # ENV: AZURE_SPEECH_REGION (default: eastus)

  # Cartesia API key (used for both STT and TTS)
  # STT uses ink-whisper model, TTS uses sonic-3 model
  # Get from Cartesia dashboard: https://play.cartesia.ai/
  cartesia_api_key: ""                           # ENV: CARTESIA_API_KEY

# STT Provider Configuration (optional - can also be set via WebSocket config)
# stt:
#   provider: deepgram  # Options: "deepgram", "google", "elevenlabs", "microsoft-azure", "cartesia"
#   # Google-specific settings (only used when provider is "google")
#   google:
#     location: global           # Options: global, us, eu, asia-southeast1, etc.
#     recognizer_id: ""          # Optional: pre-configured recognizer ID
#     model: latest_long         # Options: latest_long, latest_short, chirp_2, telephony

# TTS Provider Configuration (optional - can also be set via WebSocket config)
# tts:
#   provider: elevenlabs  # Options: "deepgram", "elevenlabs", "google", "azure", "cartesia"
#   # Cartesia-specific settings (only used when provider is "cartesia")
#   cartesia:
#     model: sonic-3           # Options: sonic-3, sonic-3-2025-10-27
#     voice_id: ""             # Voice UUID from Cartesia voice library
#     audio_format: linear16   # Options: linear16/pcm, wav, mp3
#     sample_rate: 24000       # Options: 8000, 16000, 22050, 24000, 44100, 48000

# Voice Activity Detection (VAD) + Turn Detection Configuration
# Requires: --features stt-vad
# When the stt-vad feature is enabled, VAD and turn detection are always bundled together.
# VAD monitors audio for silence and triggers the turn detection model to confirm
# if the speaker's turn is complete.
# vad:
#   # Speech probability threshold (0.0 to 1.0)
#   # Audio with probability above this is considered speech
#   threshold: 0.5
#
#   # Silence duration in milliseconds to trigger turn detection
#   # When this duration of silence is detected, the turn detection model is invoked
#   # This is the primary control for turn-taking responsiveness
#   silence_duration_ms: 300
#
#   # Minimum speech duration (ms) before checking for silence
#   # Prevents false triggers on brief pauses
#   min_speech_duration_ms: 100
#
#   # Path to custom Silero-VAD model (optional)
#   # model_path: /path/to/silero_vad.onnx
#
#   # Model download URL (if not using model_path)
#   # model_url: https://huggingface.co/onnx-community/silero-vad/resolve/main/silero_vad.onnx

# LiveKit recording storage (Amazon S3 or Google Cloud Storage).
#
# `prefix` is shared by all backends. Recording paths are constructed as
# `{prefix}/{stream_id}/audio.ogg` and the same value is used both by LiveKit
# Egress for upload and by Sayna's `GET /recording/{stream_id}` for download.
#
# `backend.type` selects the storage backend; only one variant is active at a
# time. Set `RECORDING_BACKEND={s3|gcs}` to override the variant via env, and
# fill in the `RECORDING_<S3|GCS>_*` variables for that variant.
recording:
  prefix: "recordings/production"           # ENV: RECORDING_PREFIX

  # ---- Amazon S3 (or S3-compatible: MinIO, Cloudflare R2, etc.) ----
  backend:
    type: s3                                # ENV: RECORDING_BACKEND=s3
    bucket: "my-recordings"                 # ENV: RECORDING_S3_BUCKET
    region: "us-west-2"                     # ENV: RECORDING_S3_REGION
    access_key: "your-access-key"           # ENV: RECORDING_S3_ACCESS_KEY
    secret_key: "your-secret-key"           # ENV: RECORDING_S3_SECRET_KEY
    # Optional. Omit for the default AWS S3 endpoint; required for MinIO/R2/etc.
    endpoint: "https://s3.amazonaws.com"    # ENV: RECORDING_S3_ENDPOINT
    # Optional. `true` selects path-style addressing (required for MinIO);
    # `false` uses virtual-hosted style (the AWS S3 default).
    force_path_style: false                 # ENV: RECORDING_S3_FORCE_PATH_STYLE

  # ---- Google Cloud Storage (alternative to S3 above) ----
  # Comment out the s3 block above and use this instead.
  # backend:
  #   type: gcs                                       # ENV: RECORDING_BACKEND=gcs
  #   bucket: "my-recordings"                         # ENV: RECORDING_GCS_BUCKET
  #   # Provide exactly one of `credentials_path` or `credentials_json`. The
  #   # JSON content is read eagerly at startup because LiveKit Egress requires
  #   # the credentials inline in its protobuf message.
  #   credentials_path: "/etc/sayna/gcs-sa.json"      # ENV: RECORDING_GCS_CREDENTIALS_PATH
  #   # credentials_json: |                            # ENV: RECORDING_GCS_CREDENTIALS_JSON
  #   #   { "type": "service_account", ... }

# Cache configuration
cache:
  path: "/var/cache/sayna"  # ENV: CACHE_PATH (if omitted, uses in-memory cache)
  ttl_seconds: 2592000      # ENV: CACHE_TTL_SECONDS (default: 30 days)

# Authentication configuration
auth:
  required: false                             # ENV: AUTH_REQUIRED (true/false/1/0/yes/no)
  service_url: "https://auth.example.com"     # ENV: AUTH_SERVICE_URL
  signing_key_path: "/path/to/key.pem"        # ENV: AUTH_SIGNING_KEY_PATH
  # ENV: AUTH_API_SECRETS_JSON (JSON array of {id, secret})
  api_secrets:
    - id: "default"
      secret: "sk_test_default_123456"        # ENV: AUTH_API_SECRETS_JSON (preferred)
    - id: "partner-1"
      secret: "sk_test_partner_abcdef"        # ENV: AUTH_API_SECRETS_JSON (preferred)
  # Legacy single-secret alias (ignored when api_secrets is non-empty):
  # api_secret: "sk_test_legacy_123456"       # ENV: AUTH_API_SECRET (AUTH_API_SECRET_ID optional)
  timeout_seconds: 5                          # ENV: AUTH_TIMEOUT_SECONDS

# SIP configuration (optional)
# Used for SIP trunk integration and webhook forwarding
sip:
  room_prefix: "sip-"                         # ENV: SIP_ROOM_PREFIX
  allowed_addresses:                          # ENV: SIP_ALLOWED_ADDRESSES (comma-separated)
    - "192.168.1.0/24"
    - "10.0.0.1"

  # Target SIP server address for outbound trunks (required for outbound calls)
  # Format: hostname or hostname:port (e.g., "sip.example.com" or "sip.example.com:5060")
  # This is required when using the /sip/call endpoint for outbound SIP calls
  outbound_address: "sip.trunk.example.com:5060"  # ENV: SIP_OUTBOUND_ADDRESS

  # Outbound trunk authentication credentials (optional)
  # Only required if your SIP provider requires authentication for outbound calls.
  # Both username and password must be set together if authentication is needed.
  # outbound_auth_username: "your-sip-username"   # ENV: SIP_OUTBOUND_AUTH_USERNAME
  # outbound_auth_password: "your-sip-password"   # ENV: SIP_OUTBOUND_AUTH_PASSWORD

  # REQUIRED if hooks are configured: Global signing secret for webhook authentication
  # All outbound SIP webhooks are signed with HMAC-SHA256 for security
  # Generate with: openssl rand -hex 32
  # See docs/livekit_webhook.md#webhook-signing for details
  hook_secret: "your-global-signing-secret"   # ENV: SIP_HOOK_SECRET (min 16 chars, 32+ recommended)

  # Downstream webhook endpoints for SIP event forwarding
  # Each hook receives participant_joined events for matching SIP domains
  # All requests include X-Sayna-Signature header for verification
  #
  # Runtime management: Hooks can be added/replaced at runtime via the
  # POST /sip/hooks REST endpoint. Runtime changes are persisted to
  # <cache_path>/sip_hooks.json and merged with this config on startup.
  # Note: Runtime hooks use the global hook_secret (per-hook secrets not supported via API)
  hooks:                                      # ENV: SIP_HOOKS_JSON (JSON array with "auth_id" and optional "secret" field)
    - host: "example.com"                     # SIP domain to match (case-insensitive)
      url: "https://webhook.example.com/events"  # HTTPS strongly recommended
      auth_id: "tenant-123"                   # Tenant identifier for room metadata (required when AUTH_REQUIRED=true; optional/empty when AUTH_REQUIRED=false)
      # Optional: per-hook secret override (omit to use global hook_secret)
      # Use for multi-tenant deployments or zero-trust architectures

    - host: "another.com"
      url: "https://webhook2.example.com/events"
      auth_id: "tenant-456"                   # Tenant identifier for room metadata (required when AUTH_REQUIRED=true; optional/empty when AUTH_REQUIRED=false)