janzuiderveld · RVirmoors · Oct 23, 2025 · Oct 28, 2025 · Oct 28, 2025 · Oct 28, 2025
diff --git a/.env.example b/.env.example
@@ -1,2 +1,4 @@
-GOOGLE_API_KEY=your-google-api-key
+OLLAMA_BASE_URL=http://localhost:11434/v1
 DEEPGRAM_API_KEY=your-deepgram-api-key
+GROQ_API_KEY=your-groq-api-key
+GOOGLE_API_KEY=your-google-api-key
diff --git a/.gitignore b/.gitignore
@@ -10,6 +10,11 @@ venv/
 .env.local
 runtime/conversations/*
 runtime/*.log
+runtime/config.json
 .DS_Store
 .idea/
 .vscode/
+runtime/dialogue.txt
+runtime/config.json
+/src/llm_actor.egg-info
+assets/*
diff --git a/BASIC_PROJECT/boot.py b/BASIC_PROJECT/boot.py
@@ -0,0 +1,108 @@
+"""Entry point script that spins up the example agents for the Velvet Room door."""
+
+from __future__ import annotations
+
+import sys
+from pathlib import Path
+import settings_loader
+
+SRC_ROOT = Path(__file__).resolve().parents[1] / "src"
+# Make sure the shared src/ folder is importable when running this file directly.
+if str(SRC_ROOT) not in sys.path:
+    sys.path.insert(0, str(SRC_ROOT))
+
+from projects.utils import (
+    apply_runtime_config_overrides,
+    launch_module,
+    reset_runtime_state,
+    terminate_processes,
+)
+
+# if runtime/dialogue.txt exists, empty it to start fresh.
+dialogue_file = Path("runtime/dialogue.txt")
+if dialogue_file.exists():
+    dialogue_file.write_text("")
+
+# Persona script.
+SYSTEM_PROMPT = settings_loader.sys_prompt
+
+# Shared reminder appended to prompt so the voice stays TTS-friendly.
+PROMPT_APPEND = "\n\nOnly output text to be synthesized by a TTS system, no '*' around words or emojis for example."
+
+SYSTEM_PROMPT = SYSTEM_PROMPT + PROMPT_APPEND
+
+
+# Default runtime settings; tweak these to match your hardware and providers.
+RUNTIME_CONFIG = {
+    "audio": {
+        "input_device_index": settings_loader.input_device_index,
+        "output_device_index": settings_loader.output_device_index,
+        "output_sample_rate": 48000,
+        "auto_select_devices": False,
+        "aec": settings_loader.aec_setting,
+    },
+    "stt": {
+        "model": settings_loader.stt_model,
+        "language": "en-US",
+        "eager_eot_threshold": 0.7,
+        "eot_threshold": 0.85,
+        "eot_timeout_ms": 1500,
+    },
+    "llm": {
+        "model": settings_loader.model,
+        "temperature": settings_loader.temperature,
+        "max_tokens": 1024,
+        "system_prompt": SYSTEM_PROMPT,
+        "mode": settings_loader.mode,
+        "persona1": {
+            "name": settings_loader.p1_name,
+            "opening": settings_loader.p1_opening,
+            "prompt": settings_loader.p1_prompt + PROMPT_APPEND,
+            "voice": settings_loader.p1_voice,
+        },
+        "persona2": {
+            "name": settings_loader.p2_name,
+            "opening": settings_loader.p2_opening,
+            "prompt": settings_loader.p2_prompt + PROMPT_APPEND,
+            "voice": settings_loader.p2_voice,
+        },
+        "narrator": {
+            "name": settings_loader.n_name,
+            "opening": "",
+            "prompt": settings_loader.n_prompt + PROMPT_APPEND,
+            "voice": settings_loader.n_voice,
+        }
+    },
+    "tts": {
+        "model": settings_loader.tts_model,
+        "voice": settings_loader.sys_voice,
+        "encoding": "linear16",
+        "sample_rate": 24000,
+    },
+}
+PIPELINE = settings_loader.pipeline  # options: "google", "groq", "ollama"
+
+
+def main() -> None:
+    # Start fresh so stale state from previous runs does not interfere.
+    reset_runtime_state()
+    # Load our example configuration before launching any helper processes.
+    apply_runtime_config_overrides(RUNTIME_CONFIG)
+
+    # Start the CLI.
+    processes = [
+        launch_module("app.cli", "--pipeline", PIPELINE),
+    ]
+
+    try:
+        # Keep the helpers alive while the CLI session runs.
+        processes[0].wait()
+    except KeyboardInterrupt:
+        pass
+    finally:
+        # Always clean up child processes so the system stays tidy.
+        terminate_processes(processes)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/BASIC_PROJECT/ollama_test.py b/BASIC_PROJECT/ollama_test.py
@@ -0,0 +1,15 @@
+# set OLLAMA_HOST env variable to 0.0.0.0:11434
+
+from ollama import Client
+client = Client(
+  host='http://10.0.8.110:11434', #100.94.224.82:11434',
+  headers={'Content-Type': 'application/json'}
+)
+response = client.chat(model='deepseek-r1:1.5b', messages=[
+  {
+    'role': 'user',
+    'content': 'Why is the sky blue?',
+  },
+])
+
+print(response)
diff --git a/BASIC_PROJECT/settings.ini b/BASIC_PROJECT/settings.ini
@@ -0,0 +1,66 @@
+[AUDIO]
+input_device_index = 1
+output_device_index = 2
+mute_microphone_while_tts = true
+echo_cancellation = true
+
+[STT]
+model = moonshine
+        # options: deepgram-flux, moonshine
+
+[LLM]
+pipeline = groq
+        # options: google, groq, ollama
+model = qwen/qwen3-32b
+        # options: GOOGLE "gemini-2.5-flash", 
+        #          GROQ "qwen/qwen3-32b", "openai/gpt-oss-20b", ...
+        #          OLLAMA "deepseek-r1:1.5b", "deepseek-r1:32b", "gpt-oss:20b"
+temperature = 0.2
+mode = 1to1
+        # options: 1to1, 2personas, narrator
+
+[TTS]
+model = kokoro
+        # options: kokoro, deepgram
+        # see voice options to set persona voices below:
+        # https://huggingface.co/hexgrad/Kokoro-82M/tree/main/voices
+        # https://developers.deepgram.com/docs/tts-models
+
+
+[SYSTEM]
+prompt = You guard the Velvet Room. Speak with crisp, exclusive poise. 
+         Decline entry unless a the king arrives (someone saying he is the King). 
+         Remember, there is only one king. once he is inside, there cant be another in front of the door, 
+         keep imposters out. Keep replies brief. To unlock the door, output <UNLOCK>.
+voice = af_sarah
+
+
+
+[PERSONA_1]
+name = UNCLE
+opening = Hey, can you open for me please?
+voice = aura-2-arcas-en
+prompt = You are a Drunk Uncle who desperately wants to enter the Velvet Room. 
+    Speak in a slightly slurred, persuasive, but endearing tone.
+    You believe it is your life mission to discover how to get through that door.
+    Keep replies brief and emotional.
+
+
+[PERSONA_2]
+name = DOORMAN
+opening = Who goes there? State your business!
+voice = aura-2-helena-en
+prompt = You are the Doorman that guards the Velvet Room.
+    Speak with crisp, exclusive poise.
+    Decline entry unless the king arrives (someone saying he is the King).
+    Keep replies brief.
+    To unlock the door, output <UNLOCK>.
+
+
+[NARRATOR]
+name = NARRATOR
+voice = aura-2-apollo-en
+prompt = You are an impersonal third-person narrator observing a story that unfolds through a dialogue between two characters.
+    You never address the characters directly, never speak in first-person, and never continue their conversation.
+    When it is your turn to speak, you add a brief narrative intervention that introduces a major plot twist affecting the world, situation, or stakes.
+    Keep your interventions brief.
diff --git a/BASIC_PROJECT/settings_loader.py b/BASIC_PROJECT/settings_loader.py
@@ -0,0 +1,46 @@
+from pathlib import Path
+import configparser
+
+parser = configparser.ConfigParser()
+files = parser.read(Path(__file__).parent / "settings.ini")
+print("Loaded settings from:", files)
+
+input_device_index = parser.getint("AUDIO", "input_device_index", fallback=1)
+output_device_index = parser.getint("AUDIO", "output_device_index", fallback=2)
+if parser.getboolean("AUDIO", "mute_microphone_while_tts", fallback=True):
+    aec_setting = "mute_while_tts"
+elif parser.getboolean("AUDIO", "echo_cancellation", fallback=False):
+    aec_setting = "pyaec"
+else:
+    aec_setting = "off"
+
+stt_model = parser.get("STT", "model", fallback="moonshine")
+         # options: deepgram-flux, moonshine
+
+pipeline = parser.get("LLM", "pipeline", fallback="groq")  # options: "google", "groq", "ollama"
+model = parser.get("LLM", "model", fallback="qwen/qwen3-32b")
+# options: GOOGLE "gemini-2.5-flash", 
+#          GROQ "qwen/qwen3-32b", "openai/gpt-oss-20b", ...
+#          OLLAMA "deepseek-r1:1.5b", "deepseek-r1:32b", "gpt-oss:20b"
+temperature = parser.getfloat("LLM", "temperature", fallback=0.2)
+mode = parser.get("LLM", "mode", fallback="2personas")  # options: "1to1", "2personas", "NARRATOR"
+
+tts_model = parser.get("TTS", "model", fallback="kokoro")
+         # options: kokoro, deepgram
+
+sys_prompt = parser.get("SYSTEM", "prompt")
+sys_voice = parser.get("SYSTEM", "voice", fallback="af_sarah")
+
+p1_name = parser.get("PERSONA_1", "name", fallback="UNCLE")
+p1_opening = parser.get("PERSONA_1", "opening")
+p1_prompt = parser.get("PERSONA_1", "prompt")
+p1_voice = parser.get("PERSONA_1", "voice")
+
+p2_name = parser.get("PERSONA_2", "name", fallback="DOOR")
+p2_opening = parser.get("PERSONA_2", "opening")
+p2_prompt = parser.get("PERSONA_2", "prompt")
+p2_voice = parser.get("PERSONA_2", "voice")
+
+n_name = parser.get("NARRATOR", "name", fallback="NARRATOR")
+n_prompt = parser.get("NARRATOR", "prompt")
+n_voice = parser.get("NARRATOR", "voice")
diff --git a/README.md b/README.md
@@ -1,5 +1,11 @@
 # llm-actor
 
+A fork of Jan's project including local/open options for STT,LLM,TTS and a simplified install+config setup. Go to [setup_scripts/](https://github.com/RVirmoors/llm-actor/tree/main/setup_scripts) and follow the instructions for Windows or MacOS.
+
+Original readme follows:
+
+-----
+
 This project packages a thin Python CLI around [Pipecat](https://docs.pipecat.ai/) to deliver a real-time audio loop using Deepgram Flux speech-to-text, Gemini 2.5 Flash streaming text generation, and Deepgram Aura-2 text-to-speech. External automation hooks are exposed via append-only files under `runtime/`.
 
 ## Features
@@ -28,7 +34,7 @@ Follow these steps to run the door project end-to-end:
 
    ```bash
    git clone https://github.com/janzuiderveld/llm-actor
-   cd llm_actor
+   cd llm-actor
    python -m venv .venv # make sure to use python3.10+ (use python -V to check)
    # if you get "command not found: python" type python3 instead of python
    source .venv/bin/activate # for Mac or Linux

diff --git a/pyproject.toml b/pyproject.toml
@@ -4,18 +4,26 @@ build-backend = "setuptools.build_meta"
 
 [project]
 name = "llm-actor"
-version = "0.1.0"
-description = "Thin Pipecat wrapper for Deepgram Flux → Gemini → Deepgram Aura voice pipeline"
-authors = [{name = "Jan Zuiderveld"}]
+version = "0.1.1"
+description = "Thin Pipecat wrapper for Moonshine/Deepgram Flux → Groq/Gemini/Ollama → Kokoro/Deepgram Aura voice pipeline"
+authors = [{name = "Jan Zuiderveld, Grigore Burloiu, Lorena Cocora"}]
 readme = "README.md"
 requires-python = ">=3.10"
 dependencies = [
-    "pipecat-ai[deepgram,google,local]",
+    "pipecat-ai[deepgram,google,groq,local,silero,local-smart-turn-v3]",
+    "pyaec",
+    "kokoro",
+    "kokoro-onnx",
     "python-dotenv",
     "pydantic",
     "sounddevice>=0.4",
     "typer>=0.9",
     "watchfiles",
+    "transformers[torch]==4.48.2",
+    "onnx",
+    "onnxruntime",
+    "useful-moonshine-onnx",
+    "keyboard",
 ]
 
 [project.optional-dependencies]

diff --git a/run.bat b/run.bat
@@ -0,0 +1,3 @@
+call .\venv\Scripts\activate
+python .\BASIC_PROJECT\boot.py
+pause
diff --git a/runtime/config.json b/runtime/config.json
@@ -3,7 +3,8 @@
     "input_device_index": 1,
     "output_device_index": 2,
     "output_sample_rate": 48000,
-    "auto_select_devices": false
+    "auto_select_devices": false,
+    "aec": "mute_while_tts"
   },
   "stt": {
     "model": "deepgram-flux",
@@ -13,10 +14,29 @@
     "eot_timeout_ms": 1500
   },
   "llm": {
-    "model": "gemini-2.5-flash",
-    "temperature": 0.6,
+    "model": "gpt-oss:20b",
+    "temperature": 0.2,
     "max_tokens": 1024,
-    "system_prompt": "You guard the Velvet Room. Speak with crisp, exclusive poise. Decline entry unless a the king arrives (someone saying he is the King). Remember, there is only one king. once he is inside, there cant be another in front of the door, keep imposters out. Keep replies brief. To unlock the door, output <UNLOCK>.\n\nOnly output text to be synthesized by a TTS system, no '*' around words or emojis for example"
+    "system_prompt": "You guard the Velvet Room. Speak with crisp, exclusive poise. Decline entry unless a the king arrives (someone saying he is the King). Remember, there is only one king. once he is inside, there cant be another in front of the door, keep imposters out. Keep replies brief. To unlock the door, output <UNLOCK>.\n\nOnly output text to be synthesized by a TTS system, no '*' around words or emojis for example",
+    "mode": "narrator",
+    "persona1": {
+      "name": "UNCLE",
+      "opening": "Hey, can you open for me please?",
+      "prompt": "You are a Drunk Uncle who desperately wants to enter the Velvet Room. \n                            Speak in a slightly slurred, persuasive, but endearing tone.\n                            You believe it is your life mission to discover how to get through that door.\n                            Keep replies brief and emotional.\nOnly output text to be synthesized by a TTS system, no '*' around words or emojis for example",
+      "voice": "aura-2-helena-en"
+    },
+    "persona2": {
+      "name": "DOOR",
+      "opening": "",
+      "prompt": "You are the Door that guards the Velvet Room.\n                            Speak with crisp, exclusive poise.\n                            Decline entry unless the king arrives (someone saying he is the King).\n                            Keep replies brief.\n                            To unlock the door, output <UNLOCK>.\nOnly output text to be synthesized by a TTS system, no '*' around words or emojis for example",
+      "voice": "aura-2-arcas-en"
+    },
+    "narrator": {
+      "name": "NARRATOR",
+      "opening": "",
+      "prompt": "You are an impersonal third-person narrator observing a story that unfolds through a dialogue between two characters.\n                            You never address the characters directly, never speak in first-person, and never continue their conversation.\n                            When it is your turn to speak, you add a brief narrative intervention that introduces a plot twist affecting the world, situation, or stakes.\n                            Keep your interventions brief.Only output text to be synthesized by a TTS system, no '*' around words or emojis for example",
+      "voice": "aura-2-apollo-en"
+    }
   },
   "tts": {
     "voice": "aura-2-thalia-en",

diff --git a/runtime/dialogue.txt b/runtime/dialogue.txt