From d0db5f3632c3e659bc825f39d56aaf7d7994f406 Mon Sep 17 00:00:00 2001 From: Logan King Date: Mon, 30 Jun 2025 01:28:03 -0700 Subject: [PATCH] =?UTF-8?q?=E2=9C=A8=20(assistant=5Fv2):=20add=20voice=20a?= =?UTF-8?q?nd=20speed=20controls?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- assistant_v2/FEATURE_PROGRESS.md | 4 +- assistant_v2/src/main.rs | 220 +++++++++++++++++++++++++++++++ 2 files changed, 222 insertions(+), 2 deletions(-) diff --git a/assistant_v2/FEATURE_PROGRESS.md b/assistant_v2/FEATURE_PROGRESS.md index 769d2f9..a85d7d8 100644 --- a/assistant_v2/FEATURE_PROGRESS.md +++ b/assistant_v2/FEATURE_PROGRESS.md @@ -14,8 +14,8 @@ This document tracks which features from the original assistant have been implem | Run internet speed tests | Pending | | Set the clipboard contents | Done | | Timers with alarm sounds | Pending | -| Change voice or speaking speed | Pending | -| Mute/unmute voice output | Pending | +| Change voice or speaking speed | Done | +| Mute/unmute voice output | Done | | Open OpenAI billing page | Done | | Push-to-talk text-to-speech interface | Done | | Interrupt AI speech with push-to-talk | Done | diff --git a/assistant_v2/src/main.rs b/assistant_v2/src/main.rs index 186b42c..dcda414 100644 --- a/assistant_v2/src/main.rs +++ b/assistant_v2/src/main.rs @@ -44,6 +44,36 @@ struct Opt { duck_ptt: bool, } +fn parse_voice(name: &str) -> Option { + match name.to_lowercase().as_str() { + "alloy" => Some(Voice::Alloy), + "ash" => Some(Voice::Ash), + "coral" => Some(Voice::Coral), + "echo" => Some(Voice::Echo), + "fable" => Some(Voice::Fable), + "onyx" => Some(Voice::Onyx), + "nova" => Some(Voice::Nova), + "sage" => Some(Voice::Sage), + "shimmer" => Some(Voice::Shimmer), + _ => None, + } +} + +fn voice_to_str(voice: &Voice) -> &'static str { + match voice { + Voice::Alloy => "alloy", + Voice::Ash => "ash", + Voice::Coral => "coral", + Voice::Echo => "echo", + Voice::Fable => "fable", + Voice::Onyx => "onyx", + Voice::Nova => "nova", + Voice::Sage => "sage", + Voice::Shimmer => "shimmer", + _ => "unknown", + } +} + #[tokio::main] async fn main() -> Result<(), Box> { let opt = Opt::parse(); @@ -138,6 +168,76 @@ async fn main() -> Result<(), Box> { strict: None, } .into(), + FunctionObject { + name: "set_speech_speed".into(), + description: Some( + "Sets how fast the AI voice speaks. Speed must be between 0.5 and 100.".into(), + ), + parameters: Some(serde_json::json!({ + "type": "object", + "properties": {"speed": {"type": "number"}}, + "required": ["speed"], + })), + strict: None, + } + .into(), + FunctionObject { + name: "get_speech_speed".into(), + description: Some("Returns the current AI voice speech speed.".into()), + parameters: Some(serde_json::json!({ + "type": "object", + "properties": {}, + "required": [], + })), + strict: None, + } + .into(), + FunctionObject { + name: "mute_speech".into(), + description: Some("Mutes the AI voice output.".into()), + parameters: Some(serde_json::json!({ + "type": "object", + "properties": {}, + "required": [], + })), + strict: None, + } + .into(), + FunctionObject { + name: "unmute_speech".into(), + description: Some("Unmutes the AI voice output.".into()), + parameters: Some(serde_json::json!({ + "type": "object", + "properties": {}, + "required": [], + })), + strict: None, + } + .into(), + FunctionObject { + name: "set_voice".into(), + description: Some( + "Changes the AI speaking voice. Pass one of: alloy, ash, coral, echo, fable, onyx, nova, sage, shimmer.".into(), + ), + parameters: Some(serde_json::json!({ + "type": "object", + "properties": {"voice": {"type": "string"}}, + "required": ["voice"], + })), + strict: None, + } + .into(), + FunctionObject { + name: "get_voice".into(), + description: Some("Returns the name of the current AI voice.".into()), + parameters: Some(serde_json::json!({ + "type": "object", + "properties": {}, + "required": [], + })), + strict: None, + } + .into(), ]) .build()?; @@ -382,6 +482,73 @@ async fn handle_requires_action( output: Some(msg.into()), }); } + + if tool.function.name == "set_speech_speed" { + let speed = match serde_json::from_str::(&tool.function.arguments) { + Ok(v) => v["speed"].as_f64().unwrap_or(1.0) as f32, + Err(_) => 1.0, + }; + let msg = if (0.5..=100.0).contains(&speed) { + speak_stream.lock().unwrap().set_speech_speed(speed); + format!("Speech speed set to {}", speed) + } else { + "Speed must be between 0.5 and 100.0".to_string() + }; + tool_outputs.push(ToolsOutputs { + tool_call_id: Some(tool.id.clone()), + output: Some(msg.into()), + }); + } + + if tool.function.name == "get_speech_speed" { + let speed = speak_stream.lock().unwrap().get_speech_speed(); + tool_outputs.push(ToolsOutputs { + tool_call_id: Some(tool.id.clone()), + output: Some(format!("{}", speed).into()), + }); + } + + if tool.function.name == "mute_speech" { + speak_stream.lock().unwrap().mute(); + tool_outputs.push(ToolsOutputs { + tool_call_id: Some(tool.id.clone()), + output: Some("AI voice muted".into()), + }); + } + + if tool.function.name == "unmute_speech" { + speak_stream.lock().unwrap().unmute(); + tool_outputs.push(ToolsOutputs { + tool_call_id: Some(tool.id.clone()), + output: Some("AI voice unmuted".into()), + }); + } + + if tool.function.name == "set_voice" { + let name = match serde_json::from_str::(&tool.function.arguments) { + Ok(v) => v["voice"].as_str().unwrap_or("").to_string(), + Err(_) => String::new(), + }; + let msg = match parse_voice(&name) { + Some(v) => { + speak_stream.lock().unwrap().set_voice(v); + format!("Voice set to {}", name.to_lowercase()) + } + None => "Invalid voice name".to_string(), + }; + tool_outputs.push(ToolsOutputs { + tool_call_id: Some(tool.id.clone()), + output: Some(msg.into()), + }); + } + + if tool.function.name == "get_voice" { + let name = voice_to_str(&speak_stream.lock().unwrap().get_voice()); + tool_outputs.push(ToolsOutputs { + tool_call_id: Some(tool.id.clone()), + output: Some(format!("{}", name).into()), + }); + } } if let Err(e) = submit_tool_outputs(client, run_object, tool_outputs, speak_stream).await { @@ -503,4 +670,57 @@ mod tests { _ => false, })); } + + #[test] + fn includes_set_speech_speed_function() { + let req = CreateAssistantRequestArgs::default() + .instructions("test") + .model("gpt-4o") + .tools(vec![FunctionObject { + name: "set_speech_speed".into(), + description: Some("Sets how fast the AI voice speaks.".into()), + parameters: Some(serde_json::json!({ + "type": "object", + "properties": {"speed": {"type": "number"}}, + "required": ["speed"], + })), + strict: None, + } + .into()]) + .build() + .unwrap(); + + let tools = req.tools.unwrap(); + assert!(tools.iter().any(|t| match t { + async_openai::types::AssistantTools::Function(f) => + f.function.name == "set_speech_speed", + _ => false, + })); + } + + #[test] + fn includes_mute_speech_function() { + let req = CreateAssistantRequestArgs::default() + .instructions("test") + .model("gpt-4o") + .tools(vec![FunctionObject { + name: "mute_speech".into(), + description: Some("Mutes the AI voice output.".into()), + parameters: Some(serde_json::json!({ + "type": "object", + "properties": {}, + "required": [], + })), + strict: None, + } + .into()]) + .build() + .unwrap(); + + let tools = req.tools.unwrap(); + assert!(tools.iter().any(|t| match t { + async_openai::types::AssistantTools::Function(f) => f.function.name == "mute_speech", + _ => false, + })); + } }