diff --git a/crates/tool_parser/src/factory.rs b/crates/tool_parser/src/factory.rs index 442c0670b..cc0ee86ac 100644 --- a/crates/tool_parser/src/factory.rs +++ b/crates/tool_parser/src/factory.rs @@ -7,9 +7,9 @@ use tokio::sync::Mutex; use crate::{ parsers::{ - CohereParser, DeepSeek31Parser, DeepSeekParser, Glm4MoeParser, JsonParser, KimiK2Parser, - LlamaParser, MinimaxM2Parser, MistralParser, PassthroughParser, PythonicParser, - QwenCoderParser, QwenParser, Step3Parser, + CohereParser, DeepSeek31Parser, DeepSeek32Parser, DeepSeekParser, Glm4MoeParser, + JsonParser, KimiK2Parser, LlamaParser, MinimaxM2Parser, MistralParser, PassthroughParser, + PythonicParser, QwenCoderParser, QwenParser, Step3Parser, }, traits::ToolParser, }; @@ -240,6 +240,7 @@ impl ParserFactory { registry.register_parser("llama", || Box::new(LlamaParser::new())); registry.register_parser("deepseek", || Box::new(DeepSeekParser::new())); registry.register_parser("deepseek31", || Box::new(DeepSeek31Parser::new())); + registry.register_parser("deepseek32", || Box::new(DeepSeek32Parser::new())); registry.register_parser("glm45_moe", || Box::new(Glm4MoeParser::glm45())); registry.register_parser("glm47_moe", || Box::new(Glm4MoeParser::glm47())); registry.register_parser("step3", || Box::new(Step3Parser::new())); @@ -291,6 +292,12 @@ impl ParserFactory { registry.map_model("deepseek-ai/DeepSeek-V3*", "deepseek"); registry.map_model("deepseek-v3.1*", "deepseek31"); registry.map_model("deepseek-ai/DeepSeek-V3.1*", "deepseek31"); + // V3.2-Exp uses V3.1 format (longer patterns take precedence) + registry.map_model("deepseek-v3.2-exp*", "deepseek31"); + registry.map_model("deepseek-ai/DeepSeek-V3.2-Exp*", "deepseek31"); + // V3.2 DSML format + registry.map_model("deepseek-v3.2*", "deepseek32"); + registry.map_model("deepseek-ai/DeepSeek-V3.2*", "deepseek32"); registry.map_model("deepseek-*", "pythonic"); // GLM models diff --git a/crates/tool_parser/src/lib.rs b/crates/tool_parser/src/lib.rs index b5957b06d..3a037ee54 100644 --- a/crates/tool_parser/src/lib.rs +++ b/crates/tool_parser/src/lib.rs @@ -17,8 +17,9 @@ mod tests; // Re-export types used outside this module pub use factory::{ParserFactory, PooledParser}; pub use parsers::{ - CohereParser, DeepSeek31Parser, DeepSeekParser, Glm4MoeParser, JsonParser, KimiK2Parser, - LlamaParser, MinimaxM2Parser, MistralParser, PythonicParser, QwenParser, Step3Parser, + CohereParser, DeepSeek31Parser, DeepSeek32Parser, DeepSeekParser, Glm4MoeParser, JsonParser, + KimiK2Parser, LlamaParser, MinimaxM2Parser, MistralParser, PythonicParser, QwenParser, + Step3Parser, }; pub use traits::ToolParser; pub use types::{FunctionCall, PartialToolCall, StreamingParseResult, ToolCall}; diff --git a/crates/tool_parser/src/parsers/deepseek32.rs b/crates/tool_parser/src/parsers/deepseek32.rs new file mode 100644 index 000000000..ed53f36ab --- /dev/null +++ b/crates/tool_parser/src/parsers/deepseek32.rs @@ -0,0 +1,461 @@ +use async_trait::async_trait; +use openai_protocol::common::Tool; +use regex::Regex; +use serde_json::Value; + +use crate::{ + errors::{ParserError, ParserResult}, + parsers::helpers, + traits::ToolParser, + types::{FunctionCall, StreamingParseResult, ToolCall, ToolCallItem}, +}; + +/// DeepSeek V3.2 DSML format parser for tool calls +/// +/// Handles the DeepSeek V3.2 DSML format: +/// ```text +/// <|DSML|function_calls> +/// <|DSML|invoke name="func"> +/// <|DSML|parameter name="key" string="true">value +/// +/// +/// ``` +/// +/// Also supports direct JSON inside invoke blocks as a fallback format. +/// +/// Reference: https://huggingface.co/deepseek-ai/DeepSeek-V3.2 +pub struct DeepSeek32Parser { + /// Regex for extracting full function_calls block content + tool_call_complete_regex: Regex, + /// Regex for extracting complete invoke blocks (name + body) + invoke_complete_regex: Regex, + /// Regex for extracting complete parameter tags (name, string attr, value) + parameter_complete_regex: Regex, + /// Regex for matching partial parameter tag during streaming (no closing tag) + partial_parameter_regex: Regex, + /// Regex for matching invoke blocks (complete or partial, for streaming) + invoke_regex: Regex, + + /// Buffer for accumulating incomplete patterns across chunks + buffer: String, + /// Stores complete tool call info for each tool being parsed + prev_tool_call_arr: Vec, + /// Index of currently streaming tool call (-1 means no active tool) + current_tool_id: i32, + /// Flag for whether current tool's name has been sent to client + current_tool_name_sent: bool, + /// Tracks raw JSON string content streamed to client for each tool's arguments + streamed_args_for_tool: Vec, +} + +/// Full DSML closing tags for suffix-based stripping during streaming. +const DSML_PARAMETER_END_TAG: &str = ""; +const DSML_INVOKE_END_TAG: &str = ""; + +/// Strip a trailing partial DSML closing tag from a string. +/// +/// If the string ends with a prefix of `closing_tag` (e.g. `"Tokyo"`), that trailing portion is removed. +/// Unlike character-set stripping, this only removes text that actually starts +/// the specified closing tag, so legitimate value bytes are preserved. +fn strip_dsml_trailing(s: &str, closing_tag: &str) -> String { + for (idx, _) in s.char_indices() { + if closing_tag.starts_with(&s[idx..]) { + return s[..idx].to_string(); + } + } + s.to_string() +} + +impl DeepSeek32Parser { + /// Create a new DeepSeek V3.2 parser + #[expect( + clippy::expect_used, + reason = "regex patterns are compile-time string literals" + )] + pub fn new() -> Self { + let tool_call_complete_regex = + Regex::new(r"(?s)<|DSML|function_calls>(.*?)") + .expect("Valid regex pattern"); + + let invoke_complete_regex = + Regex::new(r#"(?s)<|DSML|invoke\s+name="([^"]+)"\s*>(.*?)"#) + .expect("Valid regex pattern"); + + let parameter_complete_regex = Regex::new( + r#"(?s)<|DSML|parameter\s+name="([^"]+)"\s+string="(true|false)"\s*>(.*?)"#, + ) + .expect("Valid regex pattern"); + + let partial_parameter_regex = Regex::new( + r#"(?s)<|DSML|parameter\s+name="([^"]+)"\s+string="(true|false)"\s*>(.*)$"#, + ) + .expect("Valid regex pattern"); + + let invoke_regex = + Regex::new(r#"(?s)<|DSML|invoke\s+name="([^"]+)"\s*>(.*?)(|$)"#) + .expect("Valid regex pattern"); + + Self { + tool_call_complete_regex, + invoke_complete_regex, + parameter_complete_regex, + partial_parameter_regex, + invoke_regex, + buffer: String::new(), + prev_tool_call_arr: Vec::new(), + current_tool_id: -1, + current_tool_name_sent: false, + streamed_args_for_tool: Vec::new(), + } + } + + /// Parse DSML parameters from invoke content into a JSON string. + /// + /// Supports two formats: + /// 1. Direct JSON: content starts with `{` — returned as-is + /// 2. XML parameters: `<|DSML|parameter name="k" string="true|false">v` + /// + /// When `allow_partial` is true (streaming), also matches open parameter tags + /// and strips trailing DSML fragments. + fn parse_parameters_from_dsml(&self, invoke_content: &str, allow_partial: bool) -> String { + let trimmed = invoke_content.trim(); + + // Direct JSON path + if trimmed.starts_with('{') { + if allow_partial { + return strip_dsml_trailing(trimmed, DSML_INVOKE_END_TAG); + } else if trimmed.ends_with('}') { + return trimmed.to_string(); + } + } + + // XML parameter path + let mut params = serde_json::Map::new(); + + for cap in self.parameter_complete_regex.captures_iter(invoke_content) { + let name = cap.get(1).map_or("", |m| m.as_str()); + let is_string = cap.get(2).map_or("true", |m| m.as_str()); + let value = cap.get(3).map_or("", |m| m.as_str()); + + let json_value = if is_string == "true" { + Value::String(value.to_string()) + } else { + serde_json::from_str(value.trim()) + .unwrap_or_else(|_| Value::String(value.to_string())) + }; + + params.insert(name.to_string(), json_value); + } + + // Partial parameter matching for streaming + // Following SGLang: strip DSML fragments from remaining content BEFORE + // running the partial regex, so the regex captures a clean value. + if allow_partial { + // Find where the last complete parameter match ended + let last_match_end = self + .parameter_complete_regex + .find_iter(invoke_content) + .last() + .map(|m| m.end()) + .unwrap_or(0); + + let remaining = &invoke_content[last_match_end..]; + let cleaned = strip_dsml_trailing(remaining, DSML_PARAMETER_END_TAG); + + if let Some(cap) = self.partial_parameter_regex.captures(&cleaned) { + let name = cap.get(1).map_or("", |m| m.as_str()); + let is_string = cap.get(2).map_or("true", |m| m.as_str()); + let value = cap.get(3).map_or("", |m| m.as_str()).trim(); + + // Only add if we have actual content and this param isn't already complete + if !value.is_empty() && !params.contains_key(name) { + let json_value = if is_string == "true" { + Value::String(value.to_string()) + } else { + serde_json::from_str(value) + .unwrap_or_else(|_| Value::String(value.to_string())) + }; + params.insert(name.to_string(), json_value); + } + } + } + + serde_json::to_string(&Value::Object(params)).unwrap_or_else(|_| "{}".to_string()) + } + + /// Parse a single complete invoke block into a ToolCall + fn parse_invoke(&self, name: &str, content: &str) -> ToolCall { + let arguments = self.parse_parameters_from_dsml(content, false); + + ToolCall { + function: FunctionCall { + name: name.trim().to_string(), + arguments, + }, + } + } +} + +impl Default for DeepSeek32Parser { + fn default() -> Self { + Self::new() + } +} + +#[async_trait] +impl ToolParser for DeepSeek32Parser { + async fn parse_complete(&self, text: &str) -> ParserResult<(String, Vec)> { + if !self.has_tool_markers(text) { + return Ok((text.to_string(), vec![])); + } + + let idx = text + .find("<|DSML|function_calls>") + .ok_or_else(|| ParserError::ParsingFailed("DSML marker not found".to_string()))?; + let normal_text = text[..idx].trim_end().to_string(); + + let mut tools = Vec::new(); + + for fc_cap in self.tool_call_complete_regex.captures_iter(text) { + let fc_content = fc_cap.get(1).map_or("", |m| m.as_str()); + + for inv_cap in self.invoke_complete_regex.captures_iter(fc_content) { + let func_name = inv_cap.get(1).map_or("", |m| m.as_str()); + let invoke_content = inv_cap.get(2).map_or("", |m| m.as_str()); + + tools.push(self.parse_invoke(func_name, invoke_content)); + } + } + + if tools.is_empty() { + return Ok((normal_text, vec![])); + } + + Ok((normal_text, tools)) + } + + async fn parse_incremental( + &mut self, + chunk: &str, + tools: &[Tool], + ) -> ParserResult { + self.buffer.push_str(chunk); + let current_text = self.buffer.clone(); + + // Check for DSML markers or partial DSML prefixes + let has_dsml = + self.has_tool_markers(¤t_text) || current_text.contains("<|DSML|invoke"); + let has_partial_prefix = current_text.ends_with('<') + || current_text.ends_with("<|") + || current_text.ends_with("", + "", + "", + "<|end▁of▁sentence|>", + ] { + normal_text = normal_text.replace(end_token, ""); + } + return Ok(StreamingParseResult { + normal_text, + calls: vec![], + }); + } + + // If we have partial prefix but no actual DSML content, buffer and wait + if !has_dsml && has_partial_prefix { + return Ok(StreamingParseResult::default()); + } + + let tool_indices = helpers::get_tool_indices(tools); + let mut all_calls: Vec = Vec::new(); + + // Process invoke blocks in a loop (handles multiple complete invokes in buffer) + loop { + let buf_snapshot = self.buffer.clone(); + let invoke_match = self.invoke_regex.captures(&buf_snapshot); + + let captures = match invoke_match { + Some(c) => c, + None => break, + }; + + let func_name = captures + .get(1) + .map_or(String::new(), |m| m.as_str().trim().to_string()); + let invoke_content = captures + .get(2) + .map_or(String::new(), |m| m.as_str().to_string()); + let is_complete = captures + .get(3) + .is_some_and(|m| m.as_str().contains("")); + let match_end = captures.get(0).map(|m| m.end()); + drop(captures); + + // Skip if tool name is not in provided tools list + if !func_name.is_empty() && !tool_indices.contains_key(func_name.as_str()) { + tracing::debug!("Invalid tool name '{}' - skipping", func_name); + if is_complete { + // Complete invalid invoke — advance buffer past it and try next + if let Some(end) = match_end { + self.buffer = self.buffer[end..].to_string(); + } + continue; + } else { + // Incomplete invalid invoke — reset state and wait for more data + // Return any calls already collected from previous complete invokes + helpers::reset_current_tool_state( + &mut self.buffer, + &mut self.current_tool_name_sent, + &mut self.streamed_args_for_tool, + &self.prev_tool_call_arr, + ); + return Ok(StreamingParseResult { + normal_text: String::new(), + calls: all_calls, + }); + } + } + + // Initialize state on first tool + if self.current_tool_id == -1 { + self.current_tool_id = 0; + self.prev_tool_call_arr = Vec::new(); + self.streamed_args_for_tool = vec![String::new()]; + } + + helpers::ensure_capacity( + self.current_tool_id, + &mut self.prev_tool_call_arr, + &mut self.streamed_args_for_tool, + ); + + // Emit tool name if not sent + if !self.current_tool_name_sent && !func_name.is_empty() { + all_calls.push(ToolCallItem { + tool_index: self.current_tool_id as usize, + name: Some(func_name.to_string()), + parameters: String::new(), + }); + self.current_tool_name_sent = true; + + let tool_id = self.current_tool_id as usize; + if self.prev_tool_call_arr.len() <= tool_id { + self.prev_tool_call_arr + .resize_with(tool_id + 1, || Value::Null); + } + self.prev_tool_call_arr[tool_id] = serde_json::json!({ + "name": func_name, + "arguments": {}, + }); + } + + // Parse current arguments (partial or complete) + let current_args = self.parse_parameters_from_dsml(&invoke_content, !is_complete); + let tool_id = self.current_tool_id as usize; + + // Compute diff against what we've already sent + let sent_len = self + .streamed_args_for_tool + .get(tool_id) + .map(|s| s.len()) + .unwrap_or(0); + + let prev_args = if tool_id < self.prev_tool_call_arr.len() { + self.prev_tool_call_arr[tool_id] + .get("arguments") + .and_then(|v| v.as_str()) + .map(|s| s.to_string()) + } else { + None + }; + + let argument_diff = if is_complete { + if sent_len < current_args.len() { + Some(current_args[sent_len..].to_string()) + } else { + Some(String::new()) + } + } else if let Some(prev) = &prev_args { + if current_args == *prev { + None + } else { + let prefix = helpers::find_common_prefix(prev, ¤t_args); + if prefix.len() > sent_len { + Some(prefix[sent_len..].to_string()) + } else { + None + } + } + } else if sent_len < current_args.len() && current_args != "{}" { + // First partial chunk — no prev_args yet, emit from sent_len. + // Skip empty "{}" to avoid corrupting the delta stream when the + // buffer ends right after with no parameter content yet. + Some(current_args[sent_len..].to_string()) + } else { + None + }; + + if let Some(diff) = argument_diff { + if !diff.is_empty() { + if tool_id < self.streamed_args_for_tool.len() { + self.streamed_args_for_tool[tool_id].push_str(&diff); + } + all_calls.push(ToolCallItem { + tool_index: tool_id, + name: None, + parameters: diff, + }); + } + } + + // Update prev state + if tool_id < self.prev_tool_call_arr.len() { + self.prev_tool_call_arr[tool_id] = serde_json::json!({ + "name": func_name, + "arguments": current_args, + }); + } + + // If invoke is complete, advance to next tool + if is_complete { + if let Some(end) = match_end { + self.buffer = self.buffer[end..].to_string(); + } else { + self.buffer.clear(); + } + self.current_tool_id += 1; + self.current_tool_name_sent = false; + continue; + } else { + break; + } + } + + Ok(StreamingParseResult { + normal_text: String::new(), + calls: all_calls, + }) + } + + fn has_tool_markers(&self, text: &str) -> bool { + text.contains("<|DSML|function_calls>") + } + + fn get_unstreamed_tool_args(&self) -> Option> { + helpers::get_unstreamed_args(&self.prev_tool_call_arr, &self.streamed_args_for_tool) + } + + fn reset(&mut self) { + self.buffer.clear(); + self.prev_tool_call_arr.clear(); + self.current_tool_id = -1; + self.current_tool_name_sent = false; + self.streamed_args_for_tool.clear(); + } +} diff --git a/crates/tool_parser/src/parsers/mod.rs b/crates/tool_parser/src/parsers/mod.rs index 79e3880b4..a82e03bff 100644 --- a/crates/tool_parser/src/parsers/mod.rs +++ b/crates/tool_parser/src/parsers/mod.rs @@ -6,6 +6,7 @@ pub mod cohere; pub mod deepseek; pub mod deepseek31; +pub mod deepseek32; pub mod glm4_moe; pub mod json; pub mod kimik2; @@ -25,6 +26,7 @@ pub mod helpers; pub use cohere::CohereParser; pub use deepseek::DeepSeekParser; pub use deepseek31::DeepSeek31Parser; +pub use deepseek32::DeepSeek32Parser; pub use glm4_moe::Glm4MoeParser; pub use json::JsonParser; pub use kimik2::KimiK2Parser; diff --git a/crates/tool_parser/tests/tool_parser_deepseek32.rs b/crates/tool_parser/tests/tool_parser_deepseek32.rs new file mode 100644 index 000000000..d4e5f136d --- /dev/null +++ b/crates/tool_parser/tests/tool_parser_deepseek32.rs @@ -0,0 +1,318 @@ +//! DeepSeek V3.2 Parser Integration Tests +mod common; + +use common::create_test_tools; +use tool_parser::{DeepSeek32Parser, ToolParser}; + +#[tokio::test] +async fn test_deepseek32_complete_single_tool() { + let parser = DeepSeek32Parser::new(); + + let input = concat!( + "Let me check that.\n\n", + "<|DSML|function_calls>\n", + "<|DSML|invoke name=\"get_weather\">\n", + "<|DSML|parameter name=\"location\" string=\"true\">Tokyo\n", + "<|DSML|parameter name=\"units\" string=\"true\">celsius\n", + "\n", + "", + ); + + let (normal_text, tools) = parser.parse_complete(input).await.unwrap(); + assert_eq!(normal_text, "Let me check that."); + assert_eq!(tools.len(), 1); + assert_eq!(tools[0].function.name, "get_weather"); + + let args: serde_json::Value = serde_json::from_str(&tools[0].function.arguments).unwrap(); + assert_eq!(args["location"], "Tokyo"); + assert_eq!(args["units"], "celsius"); +} + +#[tokio::test] +async fn test_deepseek32_complete_multiple_tools() { + let parser = DeepSeek32Parser::new(); + + let input = concat!( + "<|DSML|function_calls>\n", + "<|DSML|invoke name=\"search\">\n", + "<|DSML|parameter name=\"query\" string=\"true\">rust programming\n", + "\n", + "<|DSML|invoke name=\"translate\">\n", + "<|DSML|parameter name=\"text\" string=\"true\">Hello World\n", + "<|DSML|parameter name=\"to\" string=\"true\">ja\n", + "\n", + "", + ); + + let (_normal_text, tools) = parser.parse_complete(input).await.unwrap(); + assert_eq!(tools.len(), 2); + assert_eq!(tools[0].function.name, "search"); + assert_eq!(tools[1].function.name, "translate"); +} + +#[tokio::test] +async fn test_deepseek32_complete_direct_json() { + let parser = DeepSeek32Parser::new(); + + let input = concat!( + "<|DSML|function_calls>\n", + "<|DSML|invoke name=\"get_weather\">\n", + "{\"location\": \"Beijing\", \"date\": \"2024-01-16\"}\n", + "\n", + "", + ); + + let (_normal_text, tools) = parser.parse_complete(input).await.unwrap(); + assert_eq!(tools.len(), 1); + assert_eq!(tools[0].function.name, "get_weather"); + + let args: serde_json::Value = serde_json::from_str(&tools[0].function.arguments).unwrap(); + assert_eq!(args["location"], "Beijing"); + assert_eq!(args["date"], "2024-01-16"); +} + +#[tokio::test] +async fn test_deepseek32_complete_mixed_types() { + let parser = DeepSeek32Parser::new(); + + let input = concat!( + "<|DSML|function_calls>\n", + "<|DSML|invoke name=\"process\">\n", + "<|DSML|parameter name=\"text\" string=\"true\">hello\n", + "<|DSML|parameter name=\"count\" string=\"false\">42\n", + "<|DSML|parameter name=\"enabled\" string=\"false\">true\n", + "\n", + "", + ); + + let (_normal_text, tools) = parser.parse_complete(input).await.unwrap(); + assert_eq!(tools.len(), 1); + + let args: serde_json::Value = serde_json::from_str(&tools[0].function.arguments).unwrap(); + assert_eq!(args["text"], "hello"); + assert_eq!(args["count"], 42); + assert_eq!(args["enabled"], true); +} + +#[tokio::test] +async fn test_deepseek32_complete_nested_json_param() { + let parser = DeepSeek32Parser::new(); + + let input = concat!( + "<|DSML|function_calls>\n", + "<|DSML|invoke name=\"process\">\n", + "<|DSML|parameter name=\"data\" string=\"false\">{\"nested\": [1, 2, 3]}\n", + "\n", + "", + ); + + let (_normal_text, tools) = parser.parse_complete(input).await.unwrap(); + assert_eq!(tools.len(), 1); + + let args: serde_json::Value = serde_json::from_str(&tools[0].function.arguments).unwrap(); + assert!(args["data"]["nested"].is_array()); +} + +#[tokio::test] +async fn test_deepseek32_complete_malformed_skips() { + let parser = DeepSeek32Parser::new(); + + let input = concat!( + "<|DSML|function_calls>\n", + "<|DSML|invoke name=\"search\">\n", + "not valid at all\n", + "\n", + "<|DSML|invoke name=\"translate\">\n", + "<|DSML|parameter name=\"text\" string=\"true\">hello\n", + "<|DSML|parameter name=\"to\" string=\"true\">ja\n", + "\n", + "", + ); + + let (_normal_text, tools) = parser.parse_complete(input).await.unwrap(); + assert!(!tools.is_empty()); + assert!(tools.iter().any(|t| t.function.name == "translate")); +} + +#[test] +fn test_deepseek32_format_detection() { + let parser = DeepSeek32Parser::new(); + + assert!(parser.has_tool_markers("<|DSML|function_calls>")); + assert!(parser.has_tool_markers("text with <|DSML|function_calls> marker")); + + assert!(!parser.has_tool_markers("<|tool▁calls▁begin|>")); + assert!(!parser.has_tool_markers("[TOOL_CALLS]")); + assert!(!parser.has_tool_markers("plain text")); +} + +#[tokio::test] +async fn test_deepseek32_no_tool_calls() { + let parser = DeepSeek32Parser::new(); + + let input = "Just a normal response."; + let (normal_text, tools) = parser.parse_complete(input).await.unwrap(); + assert_eq!(normal_text, input); + assert!(tools.is_empty()); +} + +#[tokio::test] +async fn test_deepseek32_streaming_single_tool() { + let tools = create_test_tools(); + let mut parser = DeepSeek32Parser::new(); + + let chunks = vec![ + "<|DSML|function_calls>\n", + "<|DSML|invoke name=\"get_weather\">\n", + "<|DSML|parameter name=\"location\" string=\"true\">", + "Beijing", + "\n", + "<|DSML|parameter name=\"units\" string=\"true\">", + "celsius", + "\n", + "\n", + "", + ]; + + let mut found_name = false; + let mut collected_args = String::new(); + + for chunk in chunks { + let result = parser.parse_incremental(chunk, &tools).await.unwrap(); + for call in result.calls { + if let Some(name) = call.name { + assert_eq!(name, "get_weather"); + found_name = true; + } + if !call.parameters.is_empty() { + collected_args.push_str(&call.parameters); + } + } + } + + assert!(found_name, "Should have found tool name during streaming"); + assert!(!collected_args.is_empty(), "Should have streamed arguments"); +} + +#[tokio::test] +async fn test_deepseek32_streaming_multiple_tools() { + let tools = create_test_tools(); + let mut parser = DeepSeek32Parser::new(); + + let chunks = vec![ + "<|DSML|function_calls>\n", + "<|DSML|invoke name=\"search\">\n", + "<|DSML|parameter name=\"query\" string=\"true\">rust\n", + "\n", + "<|DSML|invoke name=\"get_weather\">\n", + "<|DSML|parameter name=\"location\" string=\"true\">Tokyo\n", + "\n", + "", + ]; + + let mut tool_names: Vec = Vec::new(); + + for chunk in chunks { + let result = parser.parse_incremental(chunk, &tools).await.unwrap(); + for call in result.calls { + if let Some(name) = call.name { + tool_names.push(name); + } + } + } + + assert_eq!(tool_names, vec!["search", "get_weather"]); +} + +#[tokio::test] +async fn test_deepseek32_streaming_text_before_tools() { + let tools = create_test_tools(); + let mut parser = DeepSeek32Parser::new(); + + let chunks = vec![ + "Here is ", + "the result\n\n", + "<|DSML|function_calls>\n", + "<|DSML|invoke name=\"search\">\n", + "<|DSML|parameter name=\"query\" string=\"true\">test\n", + "\n", + "", + ]; + + let mut normal_text = String::new(); + let mut found_tool = false; + + for chunk in chunks { + let result = parser.parse_incremental(chunk, &tools).await.unwrap(); + normal_text.push_str(&result.normal_text); + for call in result.calls { + if call.name.is_some() { + found_tool = true; + } + } + } + + assert!(normal_text.contains("Here is the result")); + assert!(found_tool); +} + +#[tokio::test] +async fn test_deepseek32_streaming_end_tokens_stripped() { + let tools = create_test_tools(); + let mut parser = DeepSeek32Parser::new(); + + let result = parser + .parse_incremental("", &tools) + .await + .unwrap(); + assert!(!result.normal_text.contains("")); +} + +use tool_parser::ParserFactory; + +#[tokio::test] +async fn test_deepseek32_factory_registration() { + let factory = ParserFactory::new(); + + assert!(factory.has_parser("deepseek32")); + + // V3.2 DSML models resolve to deepseek32 parser + let dsml_input = concat!( + "<|DSML|function_calls>\n", + "<|DSML|invoke name=\"search\">\n", + "<|DSML|parameter name=\"query\" string=\"true\">test\n", + "\n", + "", + ); + for model in ["deepseek-v3.2", "deepseek-ai/DeepSeek-V3.2"] { + let parser = factory + .registry() + .create_for_model(model) + .expect("parser should exist"); + let (_text, calls) = parser.parse_complete(dsml_input).await.unwrap(); + assert_eq!(calls.len(), 1, "model {model} should parse DSML format"); + assert_eq!(calls[0].function.name, "search"); + } + + // V3.2-Exp resolves to deepseek31 parser (V3.1 format) + let v31_input = concat!( + "<|tool▁calls▁begin|>", + "<|tool▁call▁begin|>search<|tool▁sep|>", + r#"{"query": "test"}"#, + "<|tool▁call▁end|>", + "<|tool▁calls▁end|>", + ); + for model in ["deepseek-v3.2-exp", "deepseek-ai/DeepSeek-V3.2-Exp"] { + let parser = factory + .registry() + .create_for_model(model) + .expect("parser should exist"); + let (_text, calls) = parser.parse_complete(v31_input).await.unwrap(); + assert_eq!(calls.len(), 1, "model {model} should parse V3.1 format"); + assert_eq!(calls[0].function.name, "search"); + } + + // Existing V3 and V3.1 mappings still work + assert!(factory.registry().has_parser_for_model("deepseek-v3")); + assert!(factory.registry().has_parser_for_model("deepseek-v3.1")); +}