vilberta/config.toml at main · charstorm/vilberta · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
# Vilberta Configuration File
# Place this file in the working directory or specify with -c config.toml

[GENERAL]
# Mode: "basic" or "mcp"
mode = "basic"
# mode = "mcp"

[LLM_API]
api_base_url = "https://openrouter.ai/api/v1"
# This is not the key, but the environment variable that will be used
api_key_env = "OPENROUTER_API_KEY"

# Three-step pipeline uses different models for each step:
# transcriber_llm_model_name = "mistralai/voxtral-small-24b-2507"
transcriber_llm_model_name = "google/gemini-2.5-flash-lite"
# basic_chat_llm_model_name = "openai/gpt-4o-mini"
basic_chat_llm_model_name = "google/gemini-2.5-flash"
toolcall_chat_llm_model_name = "google/gemini-2.5-flash"

[TTS]
# Supported voices: alba, marius, javert, jean, fantine, cosette, eponine, azelma
tts_voice = "fantine"

[CHAT]
max_hist_threshold_size = 16
hist_reset_size = 8

[MCP]
# MCP server URL for tool calling (only used when mode = "mcp")
server_url = "http://localhost:8000/mcp"

[VAD]
# Voice Activity Detection settings
# threshold: VAD sensitivity (0.0-1.0, higher = less sensitive)
threshold = 0.5
# min_speech_duration_ms: minimum speech before silence can end recording
min_speech_duration_ms = 300
# min_silence_duration_ms: silence duration to trigger end of recording
min_silence_duration_ms = 1200
# speech_pad_ms: padding added to start/end of recordings
speech_pad_ms = 300
# max_speech_duration_sec: maximum recording duration before force stop
max_speech_duration_sec = 300
# min_audio_duration_ms: minimum final audio duration to process (filters short noises)
min_audio_duration_ms = 500