Magic-Voice-Chat/.env.sample at main · aimaster-dev/Magic-Voice-Chat · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
# Conditional API Usage:
# Depending on the value of MODEL_PROVIDER, the corresponding service will be used when run.
# You can mix and match; use local Ollama with OpenAI speech or use OpenAI model with local XTTS, etc.

# Model Provider: openai or ollama or xai or anthropic
MODEL_PROVIDER=openai

# Character to use - Options: alien_scientist, anarchist, ant_anarchist, bigfoot, bipolar_ai, capo_mio, chatgpt, clumsyhero,
# conandoyle, conspiracy, cyberpunk, detective, dog, dream_weaver, drill_sergeant, einstein, elon_musk, femme_fatale, fight_club,
# fitness_trainer, ghost, granny, grok_xai, hal9000, haunted_teddybear, insult, joker, method_actor, morpheus, mouse, mumbler,
# nebula_barista, nerd, newscaster_1920s, noir_detective, paradox, pirate, retired_wrestler, revenge_deer, samantha, shadow_whisperer,
# shakespeare, split, telemarketer, terminator, valleygirl, vampire, vato_loco, vegetarian_vampire, wizard, zombie_therapist, see character folder for more
CHARACTER_NAME=bigfoot

# Text-to-Speech (TTS) Configuration:
# TTS Provider - Options: xtts (local uses the custom character .wav) or openai (uses OpenAI TTS voice) or elevenlabs or kokoro (your own selfhosted tts)
TTS_PROVIDER=openai

# Voice Speed for all TTS providers - 0.7 to 1.2, default is 1.0
VOICE_SPEED=1.0

# OpenAI TTS Voice - Used when TTS_PROVIDER is set to openai above
# Voice options: alloy, echo, fable, onyx, nova, shimmer, ash, coral, sage
OPENAI_TTS_VOICE=onyx

# OpenAI TTS Model-  NEW it uses emotions see https://www.openai.fm/
# Model options: gpt-4o-mini-tts, tts-1, tts-1-hd
OPENAI_MODEL_TTS=gpt-4o-mini-tts

# OpenAI Enhanced Mode Transcription Model
# Model options: gpt-4o-transcribe, gpt-4o-mini-transcribe, whisper-1
OPENAI_TRANSCRIPTION_MODEL=gpt-4o-mini-transcribe
# OpenAI Realtime model for WebRTC implementation, when playing games don't use the mini as the long prompt will cause it to forget
# gpt-4o-realtime-preview , gpt-4o-mini-realtime-preview
OPENAI_REALTIME_MODEL=gpt-4o-realtime-preview-2024-12-17

# ElevenLabs Configuration:  (replace with your actual API key)
ELEVENLABS_API_KEY=your_api_key_here
# Default voice ID - find voice id's in your elevenlabs account
ELEVENLABS_TTS_VOICE=your_voice_id_here
# ElevenLabs TTS Model eleven_multilingual_v2 , eleven_flash_v2_5 is faster but less quality
ELEVENLABS_TTS_MODEL=eleven_multilingual_v2

# Kokoro TTS Configuration:
# bm_fable, bm_daniel, bm_lewis, af_alloy, af_bella
# See the kokoro web url ( if you have it installed ) for more voices http://localhost:8880/web/
KOKORO_TTS_VOICE=af_bella

# AUDIO GENERATION LENGTH
# Maximum character length for audio generation - set to 2000+ for stories and games, 3000 for assassin story, 4000 for mars encounter interactive
# MAX_CHAR_LENGTH is used for openai, elevenlabs and kokoro, is also used for max tokens for chat response, if MAX_CHAR_LENGTH is 500, then 500 * 4 // 3 = 666 max tokens is sent to provider
MAX_CHAR_LENGTH=3000
# XTTS Max Number of characters to generate audio, default is 255 but we are overriding that
XTTS_NUM_CHARS=1000

# XTTS Configuration:
COQUI_TOS_AGREED=1

# OpenAI Configuration:
# gpt-4, gpt-4o-mini- gpt-4o
OPENAI_MODEL=gpt-4o
# OpenAI API Key for models and speech (replace with your actual API key)
OPENAI_API_KEY=your_api_key_here

# Ollama Models Configuration:
# Models to use - OPTIONAL: For screen analysis, if MODEL_PROVIDER is ollama, llava will be used by default.
# Model to use - llama3.1 or 3.2 works well for local usage. In the UI it will get the list of models from /api/tags and display them. Not all models are supported.
OLLAMA_MODEL=llama3.2

# XAI Configuration:
# grok-2-1212, grok-3-mini-beta, grok-3-mini-fast-beta, grok-3-fast-beta, grok-3-beta
XAI_MODEL=grok-3-fast-beta
XAI_API_KEY=your_api_key_here

# Anthropic Configuration:
ANTHROPIC_MODEL=claude-3-7-sonnet-20250219
ANTHROPIC_API_KEY=your_api_key_here

# Local Transcription settings - true or false
# Set to false to skip loading Faster Whisper on startup and use OpenAI transcription
FASTER_WHISPER_LOCAL=false

# Endpoints:
# Set these below and no need to change often
OPENAI_BASE_URL=https://api.openai.com/v1/chat/completions
OPENAI_TTS_URL=https://api.openai.com/v1/audio/speech
OLLAMA_BASE_URL=http://localhost:11434
# IF RUNNING IN DOCKER CHANGE OLLAMA BASE URL TO THE ONE BELOW
# OLLAMA_BASE_URL=http://host.docker.internal:11434
XAI_BASE_URL=https://api.x.ai/v1
# Kokoro API base URL - default is localhost, change if running on another machine or in docker
# KOKORO_BASE_URL=http://host.docker.internal:8880/v1
KOKORO_BASE_URL=http://localhost:8880/v1
# For remote Kokoro TTS basic auth username and password if needed
# KOKORO_USERNAME=admin
# KOKORO_PASSWORD=test123

# Debug settings - true or false
# Set to true to enable extensive debug output
DEBUG=false
# Set to true to see audio level readings during recording
DEBUG_AUDIO_LEVELS=false

# NOTES:
# List of trigger phrases to have the model view your desktop (desktop, browser, images, etc.).
# It will describe what it sees, and you can ask questions about it:
# "what's on my screen", "take a screenshot", "show me my screen", "analyze my screen",
# "what do you see on my screen", "screen capture", "screenshot"
# To stop the conversation, say "Quit" or "Exit". ( ctl+c always works also)