From 6b3d3e015f3c80e79a5df2162da9bad7309ff36c Mon Sep 17 00:00:00 2001 From: Dylan Ross Date: Sun, 19 Oct 2025 20:23:24 -0500 Subject: [PATCH 001/123] added ModelName partial eq implementations for string types (#94) --- src/common/model_name.rs | 42 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) diff --git a/src/common/model_name.rs b/src/common/model_name.rs index 9517d8bc..b8821d28 100644 --- a/src/common/model_name.rs +++ b/src/common/model_name.rs @@ -69,6 +69,48 @@ impl Deref for ModelName { // endregion: --- Froms +// region: --- EQ + +// PartialEq implementations for various string types +impl PartialEq for ModelName { + fn eq(&self, other: &str) -> bool { + &*self.0 == other + } +} + +impl PartialEq<&str> for ModelName { + fn eq(&self, other: &&str) -> bool { + &*self.0 == *other + } +} + +impl PartialEq for ModelName { + fn eq(&self, other: &String) -> bool { + &*self.0 == other + } +} + +// Symmetric implementations (allow "string" == model_name) +impl PartialEq for str { + fn eq(&self, other: &ModelName) -> bool { + self == &*other.0 + } +} + +impl PartialEq for &str { + fn eq(&self, other: &ModelName) -> bool { + *self == &*other.0 + } +} + +impl PartialEq for String { + fn eq(&self, other: &ModelName) -> bool { + self == &*other.0 + } +} + +// endregion: --- EQ + // TODO: replace with derive_more Display impl std::fmt::Display for ModelName { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { From d3fcf0b7613d5ff1e50e33f59371ccbf6c5e507e Mon Sep 17 00:00:00 2001 From: Bart Carroll <103963480+bartCarroll@users.noreply.github.com> Date: Mon, 20 Oct 2025 16:11:49 -0500 Subject: [PATCH 002/123] Fixed streaming tool calls for openai models (#91) --- src/adapter/adapters/openai/streamer.rs | 33 +++++++++++++------------ 1 file changed, 17 insertions(+), 16 deletions(-) diff --git a/src/adapter/adapters/openai/streamer.rs b/src/adapter/adapters/openai/streamer.rs index f6a29bfe..4370cee9 100644 --- a/src/adapter/adapters/openai/streamer.rs +++ b/src/adapter/adapters/openai/streamer.rs @@ -135,13 +135,11 @@ impl futures::Stream for OpenAIStreamer { .unwrap_or_else(|_| format!("call_{index}")); let fn_name = function.x_take::("name").unwrap_or_default(); let arguments = function.x_take::("arguments").unwrap_or_default(); - // Create the tool call - let fn_arguments = serde_json::from_str(&arguments) - .unwrap_or(serde_json::Value::String(arguments.clone())); + // Don't parse yet - accumulate as string first let mut tool_call = crate::chat::ToolCall { call_id, fn_name, - fn_arguments: fn_arguments.clone(), + fn_arguments: serde_json::Value::String(arguments.clone()), }; // Capture the tool call if enabled @@ -149,19 +147,22 @@ impl futures::Stream for OpenAIStreamer { match &mut self.captured_data.tool_calls { Some(calls) => { self.captured_data.tool_calls = Some({ - // When fn_arguments can not be parsed, we need to append the arguments to the existing fn_arguments as json string - let mut captured_fn_argments = String::new(); - if calls[index as usize].fn_arguments.is_string() { - captured_fn_argments.push_str( - calls[index as usize].fn_arguments.as_str().unwrap_or(""), - ); - captured_fn_argments.push_str(&arguments); + // Accumulate arguments as strings, don't parse until complete + let accumulated = if let Some(existing) = calls[index as usize].fn_arguments.as_str() { + format!("{}{}", existing, arguments) + } else { + arguments.clone() + }; + + // Store as string (will be parsed at stream end) + calls[index as usize].fn_arguments = serde_json::Value::String(accumulated); + + // Update call_id and fn_name on first chunk + if !tool_call.fn_name.is_empty() { + calls[index as usize].call_id = tool_call.call_id.clone(); + calls[index as usize].fn_name = tool_call.fn_name.clone(); } - let fn_arguments = serde_json::from_str(&captured_fn_argments) - .unwrap_or(serde_json::Value::String( - captured_fn_argments.clone(), - )); - calls[index as usize].fn_arguments = fn_arguments.clone(); + tool_call = calls[index as usize].clone(); calls.to_vec() }) From 4123ee2db51a4dcb461e85cf22ac5733ffcae958 Mon Sep 17 00:00:00 2001 From: Jeremy Chone Date: Mon, 20 Oct 2025 16:47:47 -0700 Subject: [PATCH 003/123] - stream tool - openai - fix issue that captured_tool_calls ado not have the fn_arguments parsed --- src/adapter/adapters/openai/streamer.rs | 54 +++++++++++++++++++++---- tests/support/common_tests.rs | 31 ++++++++++++++ tests/tests_p_openai.rs | 6 +++ 3 files changed, 84 insertions(+), 7 deletions(-) diff --git a/src/adapter/adapters/openai/streamer.rs b/src/adapter/adapters/openai/streamer.rs index 4370cee9..787386f1 100644 --- a/src/adapter/adapters/openai/streamer.rs +++ b/src/adapter/adapters/openai/streamer.rs @@ -2,7 +2,7 @@ use crate::adapter::AdapterKind; use crate::adapter::adapters::support::{StreamerCapturedData, StreamerOptions}; use crate::adapter::inter_stream::{InterStreamEnd, InterStreamEvent}; use crate::adapter::openai::OpenAIAdapter; -use crate::chat::ChatOptionsSet; +use crate::chat::{ChatOptionsSet, ToolCall}; use crate::{Error, ModelIden, Result}; use reqwest_eventsource::{Event, EventSource}; use serde_json::Value; @@ -58,11 +58,48 @@ impl futures::Stream for OpenAIStreamer { None }; + // -- Process the captured_tool_calls + // NOTE: here we attempt to parse the `fn_arguments` if it is string, because it means that it was accumulated + let captured_tool_calls = if let Some(tools_calls) = self.captured_data.tool_calls.take() { + let tools_calls: Vec = tools_calls + .into_iter() + .map(|tool_call| { + // extrat + let ToolCall { + call_id, + fn_name, + fn_arguments, + } = tool_call; + // parse fn_arguments if needed + let fn_arguments = match fn_arguments { + Value::String(fn_arguments_string) => { + // NOTE: Here we are resilient for now, if we cannot parse, just return the original String + match serde_json::from_str::(&fn_arguments_string) { + Ok(fn_arguments) => fn_arguments, + Err(_) => Value::String(fn_arguments_string), + } + } + _ => fn_arguments, + }; + + ToolCall { + call_id, + fn_name, + fn_arguments, + } + }) + .collect(); + Some(tools_calls) + } else { + None + }; + + // Return the internal stream end let inter_stream_end = InterStreamEnd { captured_usage, captured_text_content: self.captured_data.content.take(), captured_reasoning_content: self.captured_data.reasoning_content.take(), - captured_tool_calls: self.captured_data.tool_calls.take(), + captured_tool_calls, }; return Poll::Ready(Some(Ok(InterStreamEvent::End(inter_stream_end)))); @@ -148,21 +185,24 @@ impl futures::Stream for OpenAIStreamer { Some(calls) => { self.captured_data.tool_calls = Some({ // Accumulate arguments as strings, don't parse until complete - let accumulated = if let Some(existing) = calls[index as usize].fn_arguments.as_str() { + let accumulated = if let Some(existing) = + calls[index as usize].fn_arguments.as_str() + { format!("{}{}", existing, arguments) } else { arguments.clone() }; - + // Store as string (will be parsed at stream end) - calls[index as usize].fn_arguments = serde_json::Value::String(accumulated); - + calls[index as usize].fn_arguments = + serde_json::Value::String(accumulated); + // Update call_id and fn_name on first chunk if !tool_call.fn_name.is_empty() { calls[index as usize].call_id = tool_call.call_id.clone(); calls[index as usize].fn_name = tool_call.fn_name.clone(); } - + tool_call = calls[index as usize].clone(); calls.to_vec() }) diff --git a/tests/support/common_tests.rs b/tests/support/common_tests.rs index 10efdc20..e338c520 100644 --- a/tests/support/common_tests.rs +++ b/tests/support/common_tests.rs @@ -655,6 +655,37 @@ pub async fn common_test_chat_stream_capture_all_ok(model: &str, checks: Option< Ok(()) } +/// Just making the tool request, and checking the tool call response +/// `complete_check` if for LLMs that are better at giving back the unit and weather. +pub async fn common_test_chat_stream_tool_capture_ok(model: &str) -> TestResult<()> { + // -- Setup & Fixtures + let client = Client::default(); + let chat_req = seed_chat_req_tool_simple(); + let mut chat_options = ChatOptions::default().with_capture_tool_calls(true); + + // -- Exec + let chat_res = client.exec_chat_stream(model, chat_req, Some(&chat_options)).await?; + + // Extract Stream content + let StreamExtract { + stream_end, + content, + reasoning_content, + } = extract_stream_end(chat_res.stream).await?; + + // -- Check + let mut tool_calls = stream_end.captured_tool_calls().ok_or("Should have captured tools")?; + if tool_calls.is_empty() { + return Err("Should have tool calls in chat_res".into()); + } + let tool_call = tool_calls.pop().ok_or("Should have at least one tool call")?; + assert_eq!(tool_call.fn_arguments.x_get_as::<&str>("city")?, "Paris"); + assert_eq!(tool_call.fn_arguments.x_get_as::<&str>("country")?, "France"); + assert_eq!(tool_call.fn_arguments.x_get_as::<&str>("unit")?, "C"); + + Ok(()) +} + // endregion: --- Chat Stream Tests // region: --- Binaries diff --git a/tests/tests_p_openai.rs b/tests/tests_p_openai.rs index 8c3571e4..74fc6f56 100644 --- a/tests/tests_p_openai.rs +++ b/tests/tests_p_openai.rs @@ -97,6 +97,12 @@ async fn test_chat_stream_capture_all_ok() -> TestResult<()> { common_tests::common_test_chat_stream_capture_all_ok(MODEL, Some(Check::REASONING_USAGE)).await } +#[tokio::test] +async fn test_chat_stream_tool_capture_ok() -> TestResult<()> { + // NOTE: For now the OpenAI Adapter do not capture the thinking as not available in chat completions + common_tests::common_test_chat_stream_tool_capture_ok(MODEL).await +} + // endregion: --- Chat Stream Tests // region: --- Binary Tests From 5a1df3c1ab2c47987f073cb6bd4904cf534ca81f Mon Sep 17 00:00:00 2001 From: Rui Andrada <27135+shingonoide@users.noreply.github.com> Date: Tue, 21 Oct 2025 21:53:42 -0300 Subject: [PATCH 004/123] Refactor ZHIPU adapter to ZAI with namespace-based endpoint routing (#95) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Rename zhipu adapter to zai adapter - Implement namespace-based routing for regular vs coding endpoints - Regular models: "glm-4.6" → credit-based API - Coding models: "zai::glm-4.6" → subscription API - Add comprehensive example c07-zai.rs - Update adapter kind detection to handle "zai" namespace - Remove dual-endpoint complexity from user-facing API Co-authored-by: Rui Andrada --- examples/c00-readme.rs | 6 +- examples/c07-zai.rs | 56 +++++++++++++++ src/adapter/adapter_kind.rs | 22 +++--- src/adapter/adapters/mod.rs | 2 +- src/adapter/adapters/openai/streamer.rs | 2 +- .../adapters/{zhipu => zai}/adapter_impl.rs | 72 ++++++++++++++++--- src/adapter/adapters/zai/mod.rs | 61 ++++++++++++++++ src/adapter/adapters/zhipu/mod.rs | 12 ---- src/adapter/dispatcher.rs | 20 +++--- tests/{tests_p_zhipu.rs => tests_p_zai.rs} | 6 +- ..._reasoning.rs => tests_p_zai_reasoning.rs} | 4 +- 11 files changed, 212 insertions(+), 51 deletions(-) create mode 100644 examples/c07-zai.rs rename src/adapter/adapters/{zhipu => zai}/adapter_impl.rs (51%) create mode 100644 src/adapter/adapters/zai/mod.rs delete mode 100644 src/adapter/adapters/zhipu/mod.rs rename tests/{tests_p_zhipu.rs => tests_p_zai.rs} (95%) rename tests/{tests_p_zhipu_reasoning.rs => tests_p_zai_reasoning.rs} (96%) diff --git a/examples/c00-readme.rs b/examples/c00-readme.rs index eb2f7fe7..83fb4760 100644 --- a/examples/c00-readme.rs +++ b/examples/c00-readme.rs @@ -15,7 +15,7 @@ const MODEL_GROQ: &str = "llama-3.1-8b-instant"; const MODEL_OLLAMA: &str = "gemma:2b"; // sh: `ollama pull gemma:2b` const MODEL_XAI: &str = "grok-3-mini"; const MODEL_DEEPSEEK: &str = "deepseek-chat"; -const MODEL_ZHIPU: &str = "glm-4-plus"; +const MODEL_ZAI: &str = "glm-4-plus"; const MODEL_COHERE: &str = "command-r7b-12-2024"; // NOTE: These are the default environment keys for each AI Adapter Type. @@ -31,7 +31,7 @@ const MODEL_AND_KEY_ENV_NAME_LIST: &[(&str, &str)] = &[ (MODEL_XAI, "XAI_API_KEY"), (MODEL_DEEPSEEK, "DEEPSEEK_API_KEY"), (MODEL_OLLAMA, ""), - (MODEL_ZHIPU, "ZHIPU_API_KEY"), + (MODEL_ZAI, "ZAI_API_KEY"), (MODEL_COHERE, "COHERE_API_KEY"), ]; @@ -41,7 +41,7 @@ const MODEL_AND_KEY_ENV_NAME_LIST: &[(&str, &str)] = &[ // - starts_with "command" -> Cohere // - starts_with "gemini" -> Gemini // - model in Groq models -> Groq -// - starts_with "glm" -> Zhipu +// - starts_with "glm" -> ZAI // - For anything else -> Ollama // // This can be customized; see `examples/c03-mapper.rs` diff --git a/examples/c07-zai.rs b/examples/c07-zai.rs new file mode 100644 index 00000000..5ee13223 --- /dev/null +++ b/examples/c07-zai.rs @@ -0,0 +1,56 @@ +//! ZAI (Zhipu AI) adapter example +//! +//! Demonstrates how to use ZAI models with automatic endpoint routing: +//! - `glm-4.6` → Regular credit-based API +//! - `zai::glm-4.6` → Coding subscription API (automatically routed) + +use genai::chat::{ChatMessage, ChatRequest}; +use genai::Client; + +#[tokio::main] +async fn main() -> Result<(), Box> { + let client = Client::builder().build(); + + // Test cases demonstrating automatic endpoint routing + let test_cases = vec![ + ("glm-4.6", "Regular ZAI model"), + ("zai::glm-4.6", "Coding subscription model"), + ]; + + for (model_name, description) in test_cases { + println!("\n=== {} ===", description); + println!("Model: {}", model_name); + + let chat_req = ChatRequest::default() + .with_system("You are a helpful assistant.") + .append_message(ChatMessage::user("Say 'hello' and nothing else.")); + + match client.exec_chat(model_name, chat_req, None).await { + Ok(response) => { + println!("✅ Success!"); + if let Some(content) = response.first_text() { + println!("Response: {}", content); + } + if response.usage.prompt_tokens.is_some() || response.usage.completion_tokens.is_some() { + println!("Usage: prompt={}, output={}", + response.usage.prompt_tokens.unwrap_or(0), + response.usage.completion_tokens.unwrap_or(0)); + } + } + Err(e) => { + println!("❌ Error: {}", e); + if e.to_string().contains("insufficient balance") { + println!("ℹ️ This model requires credits or subscription"); + } else if e.to_string().contains("401") { + println!("ℹ️ Set ZAI_API_KEY environment variable"); + } + } + } + } + + println!("\n=== SUMMARY ==="); + println!("✅ ZAI adapter handles namespace routing automatically"); + println!("✅ Use ZAI_API_KEY environment variable"); + + Ok(()) +} \ No newline at end of file diff --git a/src/adapter/adapter_kind.rs b/src/adapter/adapter_kind.rs index 9fa0f6ad..03fb87f5 100644 --- a/src/adapter/adapter_kind.rs +++ b/src/adapter/adapter_kind.rs @@ -8,7 +8,7 @@ use crate::adapter::groq::{self, GroqAdapter}; use crate::adapter::nebius::NebiusAdapter; use crate::adapter::openai::OpenAIAdapter; use crate::adapter::xai::XaiAdapter; -use crate::adapter::zhipu::ZhipuAdapter; +use crate::adapter::adapters::zai::ZaiAdapter; use crate::{ModelName, Result}; use derive_more::Display; use serde::{Deserialize, Serialize}; @@ -39,8 +39,8 @@ pub enum AdapterKind { Xai, /// For DeepSeek (Mostly use OpenAI) DeepSeek, - /// For Zhipu (Mostly use OpenAI) - Zhipu, + /// For ZAI (Mostly use OpenAI) + Zai, /// Cohere today use it's own native protocol but might move to OpenAI Adapter Cohere, /// OpenAI shared behavior + some custom. (currently, localhost only, can be customize with ServerTargetResolver). @@ -62,7 +62,7 @@ impl AdapterKind { AdapterKind::Nebius => "Nebius", AdapterKind::Xai => "xAi", AdapterKind::DeepSeek => "DeepSeek", - AdapterKind::Zhipu => "Zhipu", + AdapterKind::Zai => "Zai", AdapterKind::Cohere => "Cohere", AdapterKind::Ollama => "Ollama", } @@ -81,7 +81,7 @@ impl AdapterKind { AdapterKind::Nebius => "nebius", AdapterKind::Xai => "xai", AdapterKind::DeepSeek => "deepseek", - AdapterKind::Zhipu => "zhipu", + AdapterKind::Zai => "zai", AdapterKind::Cohere => "cohere", AdapterKind::Ollama => "ollama", } @@ -99,7 +99,7 @@ impl AdapterKind { "nebius" => Some(AdapterKind::Nebius), "xai" => Some(AdapterKind::Xai), "deepseek" => Some(AdapterKind::DeepSeek), - "zhipu" => Some(AdapterKind::Zhipu), + "zai" => Some(AdapterKind::Zai), "cohere" => Some(AdapterKind::Cohere), "ollama" => Some(AdapterKind::Ollama), _ => None, @@ -122,7 +122,7 @@ impl AdapterKind { AdapterKind::Nebius => Some(NebiusAdapter::API_KEY_DEFAULT_ENV_NAME), AdapterKind::Xai => Some(XaiAdapter::API_KEY_DEFAULT_ENV_NAME), AdapterKind::DeepSeek => Some(DeepSeekAdapter::API_KEY_DEFAULT_ENV_NAME), - AdapterKind::Zhipu => Some(ZhipuAdapter::API_KEY_DEFAULT_ENV_NAME), + AdapterKind::Zai => Some(ZaiAdapter::API_KEY_DEFAULT_ENV_NAME), AdapterKind::Cohere => Some(CohereAdapter::API_KEY_DEFAULT_ENV_NAME), AdapterKind::Ollama => None, } @@ -149,6 +149,7 @@ impl AdapterKind { /// Other Some adapters have to have model name namespaced to be used, /// - e.g., for together.ai `together::meta-llama/Llama-3-8b-chat-hf` /// - e.g., for nebius with `nebius::Qwen/Qwen3-235B-A22B` + /// - e.g., for ZAI coding plan with `coding::glm-4.6` /// /// And all adapters can be force namspaced as well. /// @@ -157,6 +158,11 @@ impl AdapterKind { pub fn from_model(model: &str) -> Result { // -- First check if namespaced if let (_, Some(ns)) = ModelName::model_name_and_namespace(model) { + // Special handling: "zai" namespace should route to ZAI for coding endpoint + if ns == "zai" { + return Ok(AdapterKind::Zai); + } + if let Some(adapter) = Self::from_lower_str(ns) { return Ok(adapter); } else { @@ -194,7 +200,7 @@ impl AdapterKind { } else if model.starts_with("grok") { Ok(Self::Xai) } else if model.starts_with("glm") { - Ok(Self::Zhipu) + Ok(Self::Zai) } // For now, fallback to Ollama else { diff --git a/src/adapter/adapters/mod.rs b/src/adapter/adapters/mod.rs index a31217cd..b6495189 100644 --- a/src/adapter/adapters/mod.rs +++ b/src/adapter/adapters/mod.rs @@ -12,4 +12,4 @@ pub(super) mod openai; pub(super) mod openai_resp; pub(super) mod together; pub(super) mod xai; -pub(super) mod zhipu; +pub(super) mod zai; diff --git a/src/adapter/adapters/openai/streamer.rs b/src/adapter/adapters/openai/streamer.rs index 787386f1..65d4d44e 100644 --- a/src/adapter/adapters/openai/streamer.rs +++ b/src/adapter/adapters/openai/streamer.rs @@ -136,7 +136,7 @@ impl futures::Stream for OpenAIStreamer { self.captured_data.usage = Some(usage) } AdapterKind::DeepSeek - | AdapterKind::Zhipu + | AdapterKind::Zai | AdapterKind::Fireworks | AdapterKind::Together => { let usage = message_data diff --git a/src/adapter/adapters/zhipu/adapter_impl.rs b/src/adapter/adapters/zai/adapter_impl.rs similarity index 51% rename from src/adapter/adapters/zhipu/adapter_impl.rs rename to src/adapter/adapters/zai/adapter_impl.rs index ae684724..7f1ce6e2 100644 --- a/src/adapter/adapters/zhipu/adapter_impl.rs +++ b/src/adapter/adapters/zai/adapter_impl.rs @@ -7,10 +7,46 @@ use crate::webc::WebResponse; use crate::{Result, ServiceTarget}; use reqwest::RequestBuilder; -pub struct ZhipuAdapter; +/// Helper structure to hold ZAI model parsing information +struct ZaiModelEndpoint { + endpoint: Endpoint, +} + +impl ZaiModelEndpoint { + /// Parse ModelIden to determine if it's a coding model and return endpoint + fn from_model(model: &ModelIden) -> Self { + let (_, namespace) = model.model_name.as_model_name_and_namespace(); + + // Check if namespace is "zai" to route to coding endpoint + let endpoint = match namespace { + Some("zai") => Endpoint::from_static("https://api.z.ai/api/coding/paas/v4/"), + _ => ZaiAdapter::default_endpoint(), + }; + + Self { endpoint } + } +} + +/// The ZAI API is mostly compatible with the OpenAI API. +/// +/// NOTE: This adapter will automatically route to the coding endpoint +/// when the model name starts with "zai::". +/// +/// For example, `glm-4.6` uses the regular API endpoint, +/// while `zai::glm-4.6` uses the coding plan endpoint. +/// +pub struct ZaiAdapter; pub(in crate::adapter) const MODELS: &[&str] = &[ "glm-4-plus", + "glm-4.6", + "glm-4.5", + "glm-4.5v", + "glm-4.5-x", + "glm-4.5-air", + "glm-4.5-airx", + "glm-4-32b-0414-128k", + "glm-4.5-flash", "glm-4-air-250414", "glm-4-flashx-250414", "glm-4-flash-250414", @@ -26,17 +62,16 @@ pub(in crate::adapter) const MODELS: &[&str] = &[ "glm-z1-flashx", "glm-4.1v-thinking-flash", "glm-4.1v-thinking-flashx", - "glm-4.5", ]; -impl ZhipuAdapter { - pub const API_KEY_DEFAULT_ENV_NAME: &str = "ZHIPU_API_KEY"; +impl ZaiAdapter { + pub const API_KEY_DEFAULT_ENV_NAME: &str = "ZAI_API_KEY"; } -// The Zhipu API is mostly compatible with the OpenAI API. -impl Adapter for ZhipuAdapter { +// The ZAI API is mostly compatible with the OpenAI API. +impl Adapter for ZaiAdapter { fn default_endpoint() -> Endpoint { - const BASE_URL: &str = "https://open.bigmodel.cn/api/paas/v4/"; + const BASE_URL: &str = "https://api.z.ai/api/paas/v4/"; Endpoint::from_static(BASE_URL) } @@ -48,16 +83,28 @@ impl Adapter for ZhipuAdapter { Ok(MODELS.iter().map(|s| s.to_string()).collect()) } - fn get_service_url(model: &ModelIden, service_type: ServiceType, endpoint: Endpoint) -> Result { - OpenAIAdapter::util_get_service_url(model, service_type, endpoint) + fn get_service_url(_model: &ModelIden, service_type: ServiceType, endpoint: Endpoint) -> Result { + // For ZAI, we need to handle model-specific routing at this level + // because get_service_url is called with the modified endpoint from to_web_request_data + let base_url = endpoint.base_url(); + + let url = match service_type { + ServiceType::Chat | ServiceType::ChatStream => format!("{base_url}chat/completions"), + ServiceType::Embed => format!("{base_url}embeddings"), + }; + Ok(url) } fn to_web_request_data( - target: ServiceTarget, + mut target: ServiceTarget, service_type: ServiceType, chat_req: ChatRequest, chat_options: ChatOptionsSet<'_, '_>, ) -> Result { + // Parse model name and determine appropriate endpoint + let zai_info = ZaiModelEndpoint::from_model(&target.model); + target.endpoint = zai_info.endpoint; + OpenAIAdapter::util_to_web_request_data(target, service_type, chat_req, chat_options, None) } @@ -78,10 +125,13 @@ impl Adapter for ZhipuAdapter { } fn to_embed_request_data( - service_target: crate::ServiceTarget, + mut service_target: crate::ServiceTarget, embed_req: crate::embed::EmbedRequest, options_set: crate::embed::EmbedOptionsSet<'_, '_>, ) -> Result { + let zai_info = ZaiModelEndpoint::from_model(&service_target.model); + service_target.endpoint = zai_info.endpoint; + OpenAIAdapter::to_embed_request_data(service_target, embed_req, options_set) } diff --git a/src/adapter/adapters/zai/mod.rs b/src/adapter/adapters/zai/mod.rs new file mode 100644 index 00000000..3eaf9b80 --- /dev/null +++ b/src/adapter/adapters/zai/mod.rs @@ -0,0 +1,61 @@ +//! ZAI API Documentation +//! API Documentation: +//! Model Names: GLM series models +//! Pricing: +//! +//! ## Dual Endpoint Support +//! +//! ZAI supports two different API endpoints using the ServiceTargetResolver pattern: +//! +//! ### Regular API (Credit-based) +//! - Endpoint: `https://api.z.ai/api/paas/v4/` +//! - Models: `glm-4.6`, `glm-4.5`, etc. +//! - Usage: Standard API calls billed per token +//! +//! ### Coding Plan (Subscription-based) +//! - Endpoint: `https://api.z.ai/api/coding/paas/v4/` +//! - Models: `coding::glm-4.6`, `coding:glm-4.5`, etc. +//! - Usage: Fixed monthly subscription for coding tasks +//! +//! ## Usage with ServiceTargetResolver +//! +//! ```rust +//! use genai::resolver::{Endpoint, ServiceTargetResolver}; +//! use genai::{Client, AdapterKind, ModelIden}; +//! +//! let target_resolver = ServiceTargetResolver::from_resolver_fn( +//! |service_target| -> Result { +//! let model_name = service_target.model.model_name.to_string(); +//! +//! // Route to appropriate endpoint based on model naming +//! let endpoint_url = if model_name.starts_with("coding::") { +//! "https://api.z.ai/api/coding/paas/v4/" +//! } else { +//! "https://api.z.ai/api/paas/v4/" +//! }; +//! +//! let final_endpoint = Endpoint::from_static(endpoint_url); +//! let final_model = ModelIden::new(AdapterKind::Zai, clean_model_name); +//! +//! Ok(ServiceTarget { endpoint: final_endpoint, model: final_model }) +//! } +//! ); +//! +//! let client = Client::builder().with_service_target_resolver(target_resolver).build(); +//! +//! // Use regular API +//! let response = client.exec_chat("glm-4.6", chat_request, None).await?; +//! +//! // Use coding plan +//! let response = client.exec_chat("coding::glm-4.6", chat_request, None).await?; +//! ``` +//! +//! See `examples/c07-zai-dual-endpoints.rs` for a complete working example. + +// region: --- Modules + +mod adapter_impl; + +pub use adapter_impl::*; + +// endregion: --- Modules \ No newline at end of file diff --git a/src/adapter/adapters/zhipu/mod.rs b/src/adapter/adapters/zhipu/mod.rs deleted file mode 100644 index bc7f0f9f..00000000 --- a/src/adapter/adapters/zhipu/mod.rs +++ /dev/null @@ -1,12 +0,0 @@ -//! Click the globe icon on the top-right corner of the page to switch language. -//! API Documentation: -//! Model Names: -//! Pricing: - -// region: --- Modules - -mod adapter_impl; - -pub use adapter_impl::*; - -// endregion: --- Modules diff --git a/src/adapter/dispatcher.rs b/src/adapter/dispatcher.rs index 4e6030e7..f0909dc9 100644 --- a/src/adapter/dispatcher.rs +++ b/src/adapter/dispatcher.rs @@ -10,7 +10,7 @@ use crate::adapter::ollama::OllamaAdapter; use crate::adapter::openai::OpenAIAdapter; use crate::adapter::openai_resp::OpenAIRespAdapter; use crate::adapter::xai::XaiAdapter; -use crate::adapter::zhipu::ZhipuAdapter; +use crate::adapter::adapters::zai::ZaiAdapter; use crate::adapter::{Adapter, AdapterKind, ServiceType, WebRequestData}; use crate::chat::{ChatOptionsSet, ChatRequest, ChatResponse, ChatStreamResponse}; use crate::embed::{EmbedOptionsSet, EmbedRequest, EmbedResponse}; @@ -40,7 +40,7 @@ impl AdapterDispatcher { AdapterKind::Nebius => NebiusAdapter::default_endpoint(), AdapterKind::Xai => XaiAdapter::default_endpoint(), AdapterKind::DeepSeek => DeepSeekAdapter::default_endpoint(), - AdapterKind::Zhipu => ZhipuAdapter::default_endpoint(), + AdapterKind::Zai => ZaiAdapter::default_endpoint(), AdapterKind::Cohere => CohereAdapter::default_endpoint(), AdapterKind::Ollama => OllamaAdapter::default_endpoint(), } @@ -58,7 +58,7 @@ impl AdapterDispatcher { AdapterKind::Nebius => NebiusAdapter::default_auth(), AdapterKind::Xai => XaiAdapter::default_auth(), AdapterKind::DeepSeek => DeepSeekAdapter::default_auth(), - AdapterKind::Zhipu => ZhipuAdapter::default_auth(), + AdapterKind::Zai => ZaiAdapter::default_auth(), AdapterKind::Cohere => CohereAdapter::default_auth(), AdapterKind::Ollama => OllamaAdapter::default_auth(), } @@ -76,7 +76,7 @@ impl AdapterDispatcher { AdapterKind::Nebius => NebiusAdapter::all_model_names(kind).await, AdapterKind::Xai => XaiAdapter::all_model_names(kind).await, AdapterKind::DeepSeek => DeepSeekAdapter::all_model_names(kind).await, - AdapterKind::Zhipu => ZhipuAdapter::all_model_names(kind).await, + AdapterKind::Zai => ZaiAdapter::all_model_names(kind).await, AdapterKind::Cohere => CohereAdapter::all_model_names(kind).await, AdapterKind::Ollama => OllamaAdapter::all_model_names(kind).await, } @@ -94,7 +94,7 @@ impl AdapterDispatcher { AdapterKind::Nebius => NebiusAdapter::get_service_url(model, service_type, endpoint), AdapterKind::Xai => XaiAdapter::get_service_url(model, service_type, endpoint), AdapterKind::DeepSeek => DeepSeekAdapter::get_service_url(model, service_type, endpoint), - AdapterKind::Zhipu => ZhipuAdapter::get_service_url(model, service_type, endpoint), + AdapterKind::Zai => ZaiAdapter::get_service_url(model, service_type, endpoint), AdapterKind::Cohere => CohereAdapter::get_service_url(model, service_type, endpoint), AdapterKind::Ollama => OllamaAdapter::get_service_url(model, service_type, endpoint), } @@ -124,7 +124,7 @@ impl AdapterDispatcher { AdapterKind::Nebius => NebiusAdapter::to_web_request_data(target, service_type, chat_req, options_set), AdapterKind::Xai => XaiAdapter::to_web_request_data(target, service_type, chat_req, options_set), AdapterKind::DeepSeek => DeepSeekAdapter::to_web_request_data(target, service_type, chat_req, options_set), - AdapterKind::Zhipu => ZhipuAdapter::to_web_request_data(target, service_type, chat_req, options_set), + AdapterKind::Zai => ZaiAdapter::to_web_request_data(target, service_type, chat_req, options_set), AdapterKind::Cohere => CohereAdapter::to_web_request_data(target, service_type, chat_req, options_set), AdapterKind::Ollama => OllamaAdapter::to_web_request_data(target, service_type, chat_req, options_set), } @@ -146,7 +146,7 @@ impl AdapterDispatcher { AdapterKind::Nebius => NebiusAdapter::to_chat_response(model_iden, web_response, options_set), AdapterKind::Xai => XaiAdapter::to_chat_response(model_iden, web_response, options_set), AdapterKind::DeepSeek => DeepSeekAdapter::to_chat_response(model_iden, web_response, options_set), - AdapterKind::Zhipu => ZhipuAdapter::to_chat_response(model_iden, web_response, options_set), + AdapterKind::Zai => ZaiAdapter::to_chat_response(model_iden, web_response, options_set), AdapterKind::Cohere => CohereAdapter::to_chat_response(model_iden, web_response, options_set), AdapterKind::Ollama => OllamaAdapter::to_chat_response(model_iden, web_response, options_set), } @@ -171,7 +171,7 @@ impl AdapterDispatcher { AdapterKind::Nebius => NebiusAdapter::to_chat_stream(model_iden, reqwest_builder, options_set), AdapterKind::Xai => XaiAdapter::to_chat_stream(model_iden, reqwest_builder, options_set), AdapterKind::DeepSeek => DeepSeekAdapter::to_chat_stream(model_iden, reqwest_builder, options_set), - AdapterKind::Zhipu => ZhipuAdapter::to_chat_stream(model_iden, reqwest_builder, options_set), + AdapterKind::Zai => ZaiAdapter::to_chat_stream(model_iden, reqwest_builder, options_set), AdapterKind::Cohere => CohereAdapter::to_chat_stream(model_iden, reqwest_builder, options_set), AdapterKind::Ollama => OllamaAdapter::to_chat_stream(model_iden, reqwest_builder, options_set), } @@ -197,7 +197,7 @@ impl AdapterDispatcher { AdapterKind::Nebius => NebiusAdapter::to_embed_request_data(target, embed_req, options_set), AdapterKind::Xai => XaiAdapter::to_embed_request_data(target, embed_req, options_set), AdapterKind::DeepSeek => DeepSeekAdapter::to_embed_request_data(target, embed_req, options_set), - AdapterKind::Zhipu => ZhipuAdapter::to_embed_request_data(target, embed_req, options_set), + AdapterKind::Zai => ZaiAdapter::to_embed_request_data(target, embed_req, options_set), AdapterKind::Cohere => CohereAdapter::to_embed_request_data(target, embed_req, options_set), AdapterKind::Ollama => OllamaAdapter::to_embed_request_data(target, embed_req, options_set), } @@ -222,7 +222,7 @@ impl AdapterDispatcher { AdapterKind::Nebius => NebiusAdapter::to_embed_response(model_iden, web_response, options_set), AdapterKind::Xai => XaiAdapter::to_embed_response(model_iden, web_response, options_set), AdapterKind::DeepSeek => DeepSeekAdapter::to_embed_response(model_iden, web_response, options_set), - AdapterKind::Zhipu => ZhipuAdapter::to_embed_response(model_iden, web_response, options_set), + AdapterKind::Zai => ZaiAdapter::to_embed_response(model_iden, web_response, options_set), AdapterKind::Cohere => CohereAdapter::to_embed_response(model_iden, web_response, options_set), AdapterKind::Ollama => OllamaAdapter::to_embed_response(model_iden, web_response, options_set), } diff --git a/tests/tests_p_zhipu.rs b/tests/tests_p_zai.rs similarity index 95% rename from tests/tests_p_zhipu.rs rename to tests/tests_p_zai.rs index d32f53df..70bc6952 100644 --- a/tests/tests_p_zhipu.rs +++ b/tests/tests_p_zai.rs @@ -5,7 +5,7 @@ use genai::adapter::AdapterKind; use genai::resolver::AuthData; const MODEL: &str = "glm-4-plus"; -const MODEL_NS: &str = "zhipu::glm-4-plus"; +const MODEL_NS: &str = "zai::glm-4-plus"; const MODEL_V: &str = "glm-4v-flash"; // Visual language model does not support function calling // region: --- Chat @@ -106,7 +106,7 @@ async fn test_tool_full_flow_ok() -> TestResult<()> { #[tokio::test] async fn test_resolver_auth_ok() -> TestResult<()> { - common_tests::common_test_resolver_auth_ok(MODEL, AuthData::from_env("ZHIPU_API_KEY")).await + common_tests::common_test_resolver_auth_ok(MODEL, AuthData::from_env("ZAI_API_KEY")).await } // endregion: --- Resolver Tests @@ -115,7 +115,7 @@ async fn test_resolver_auth_ok() -> TestResult<()> { #[tokio::test] async fn test_list_models() -> TestResult<()> { - common_tests::common_test_list_models(AdapterKind::Zhipu, "glm-4-plus").await + common_tests::common_test_list_models(AdapterKind::Zai, "glm-4-plus").await } // endregion: --- List diff --git a/tests/tests_p_zhipu_reasoning.rs b/tests/tests_p_zai_reasoning.rs similarity index 96% rename from tests/tests_p_zhipu_reasoning.rs rename to tests/tests_p_zai_reasoning.rs index 9031a409..c405e759 100644 --- a/tests/tests_p_zhipu_reasoning.rs +++ b/tests/tests_p_zai_reasoning.rs @@ -66,7 +66,7 @@ async fn test_chat_stream_capture_content_ok() -> TestResult<()> { #[tokio::test] async fn test_resolver_auth_ok() -> TestResult<()> { - common_tests::common_test_resolver_auth_ok(MODEL, AuthData::from_env("ZHIPU_API_KEY")).await + common_tests::common_test_resolver_auth_ok(MODEL, AuthData::from_env("ZAI_API_KEY")).await } // endregion: --- Resolver Tests @@ -75,7 +75,7 @@ async fn test_resolver_auth_ok() -> TestResult<()> { #[tokio::test] async fn test_list_models() -> TestResult<()> { - common_tests::common_test_list_models(AdapterKind::Zhipu, "glm-z1-flash").await + common_tests::common_test_list_models(AdapterKind::Zai, "glm-z1-flash").await } // endregion: --- List From 4aef5cafd51cabbdaf316b281cb5b29b0600f6d7 Mon Sep 17 00:00:00 2001 From: Jeremy Chone Date: Fri, 24 Oct 2025 16:36:51 -0700 Subject: [PATCH 005/123] . anthropic - update model name for haiku 4.5 --- src/adapter/adapters/anthropic/adapter_impl.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/adapter/adapters/anthropic/adapter_impl.rs b/src/adapter/adapters/anthropic/adapter_impl.rs index 334e514c..5b16e281 100644 --- a/src/adapter/adapters/anthropic/adapter_impl.rs +++ b/src/adapter/adapters/anthropic/adapter_impl.rs @@ -41,7 +41,7 @@ const ANTHROPIC_VERSION: &str = "2023-06-01"; const MODELS: &[&str] = &[ "claude-opus-4-1-20250805", "claude-sonnet-4-5-20250929", - "claude-3-5-haiku-latest", + "claude-haiku-4-5-20251001", ]; impl AnthropicAdapter { From d23451338fbea160350bb143c08dc41c9d31a336 Mon Sep 17 00:00:00 2001 From: Jeremy Chone Date: Fri, 24 Oct 2025 16:42:51 -0700 Subject: [PATCH 006/123] . anthropic - update claude-haiku(4.5) max tokens to 64k as per spec --- src/adapter/adapters/anthropic/adapter_impl.rs | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/adapter/adapters/anthropic/adapter_impl.rs b/src/adapter/adapters/anthropic/adapter_impl.rs index 5b16e281..505a3f31 100644 --- a/src/adapter/adapters/anthropic/adapter_impl.rs +++ b/src/adapter/adapters/anthropic/adapter_impl.rs @@ -31,7 +31,7 @@ const REASONING_HIGH: u32 = 24000; // For max model tokens see: https://docs.anthropic.com/en/docs/about-claude/models/overview // // fall back -const MAX_TOKENS_64K: u32 = 64000; // claude-3-7-sonnet, claude-sonnet-4.x +const MAX_TOKENS_64K: u32 = 64000; // claude-3-7-sonnet, claude-sonnet-4.x, claude-haiku-4-5 // custom const MAX_TOKENS_32K: u32 = 32000; // claude-opus-4 const MAX_TOKENS_8K: u32 = 8192; // claude-3-5-sonnet, claude-3-5-haiku @@ -181,7 +181,10 @@ impl Adapter for AnthropicAdapter { // const MAX_TOKENS_4K: u32 = 4096; // claude-3-opus, claude-3-haiku let max_tokens = options_set.max_tokens().unwrap_or_else(|| { // most likely models used, so put first. Also a little wider with `claude-sonnet` (since name from version 4) - if model_name.contains("claude-sonnet") || model_name.contains("claude-3-7-sonnet") { + if model_name.contains("claude-sonnet") + || model_name.contains("claude-haiku") + || model_name.contains("claude-3-7-sonnet") + { MAX_TOKENS_64K } else if model_name.contains("claude-opus-4") { MAX_TOKENS_32K From db19d5dc1a9e71bd93ac3893d8d5dc855a8894a0 Mon Sep 17 00:00:00 2001 From: Jeremy Chone Date: Sat, 25 Oct 2025 10:22:00 -0700 Subject: [PATCH 007/123] . first pass at adding the module spec --- .gitignore | 1 + dev/spec/_spec-rules.md | 59 +++++++++++++++++++++++++++++++++++ dev/spec/spec-adapter.md | 33 ++++++++++++++++++++ dev/spec/spec-chat.md | 66 ++++++++++++++++++++++++++++++++++++++++ dev/spec/spec-client.md | 59 +++++++++++++++++++++++++++++++++++ dev/spec/spec-common.md | 36 ++++++++++++++++++++++ dev/spec/spec-webc.md | 36 ++++++++++++++++++++++ 7 files changed, 290 insertions(+) create mode 100644 dev/spec/_spec-rules.md create mode 100644 dev/spec/spec-adapter.md create mode 100644 dev/spec/spec-chat.md create mode 100644 dev/spec/spec-client.md create mode 100644 dev/spec/spec-common.md create mode 100644 dev/spec/spec-webc.md diff --git a/.gitignore b/.gitignore index 8d6ad4fd..4ade7c75 100644 --- a/.gitignore +++ b/.gitignore @@ -6,6 +6,7 @@ _* # '_' in src dir, ok. !**/src/**/_* +!**/spec/**/_* *.lock *.lockb diff --git a/dev/spec/_spec-rules.md b/dev/spec/_spec-rules.md new file mode 100644 index 00000000..a666acb3 --- /dev/null +++ b/dev/spec/_spec-rules.md @@ -0,0 +1,59 @@ +# Specification Guidelines + +This document defines the rules for creating and maintaining specification files. + +Important formatting rules + +- Use `-` for bullet points. +- For numbering bullet point style, have empty lines between numbering line. + + +## Types of Specification Files + +### `spec--index.md` + +A single file providing a high-level summary of the entire system. + +### `spec-module_name.md` + +A specification file for each individual module. +- `module-path-name` represents the module’s hierarchy path, flattened with `-`. +- Each file documents the specification for a single module. + +Make sure that the `module_name` is the top most common just after `src/` + +For example `src/module_01/sub_mod/some_file.rs` the spec module name will be `dev/spec/spec-module_01.md` + +(module_name is lowercase) + +## Required Structure for Module Specification Files + +Each `spec-module-path-name.md` file must include the following sections. + + + +## module-path-name + +### Goal + +A clear description of the module’s purpose and responsibilities. + +### Public Module API + +A description of the APIs exposed by the module. +- Define what is exported and how it can be consumed by other modules. +- Include function signatures, data structures, or endpoints as needed. + +### Module Parts + +A breakdown of the module’s internal components. +- May reference sub-files or sub-modules. +- Should explain how the parts work together. + +### Key Design Considerations + +Key design considerations of this module and of its key parts. + + + + diff --git a/dev/spec/spec-adapter.md b/dev/spec/spec-adapter.md new file mode 100644 index 00000000..17e24158 --- /dev/null +++ b/dev/spec/spec-adapter.md @@ -0,0 +1,33 @@ +## adapter + +### Goal + +The `adapter` module is responsible for abstracting the communication with various Generative AI providers (e.g., OpenAI, Gemini, Anthropic, Groq, DeepSeek). It translates generic GenAI requests (like `ChatRequest` and `EmbedRequest`) into provider-specific HTTP request data and converts provider-specific web responses back into generic GenAI response structures. It acts as the translation and dispatch layer between the client logic and the underlying web communication. + +### Public Module API + +The primary public API exposed by the `adapter` module is: + +- `AdapterKind`: An enum identifying the AI provider or protocol type (e.g., `OpenAI`, `Gemini`, `Anthropic`, `Cohere`). This type is used by the client and resolver layers to determine which adapter implementation should handle a specific model request. + +### Module Parts + +- `adapter_kind.rs`: Defines the `AdapterKind` enum. It includes implementation details for serialization, environment variable name resolution, and a default static mapping logic (`from_model`) to associate model names with a specific `AdapterKind`. + +- `adapter_types.rs`: Defines the `Adapter` trait, which sets the contract for all concrete adapter implementations. It also defines common types like `ServiceType` (Chat, ChatStream, Embed) and `WebRequestData` (the normalized structure holding URL, headers, and payload before web execution). + +- `dispatcher.rs`: Contains the `AdapterDispatcher` struct, which acts as the central routing mechanism. It dispatches calls from the client layer to the correct concrete adapter implementation based on the resolved `AdapterKind`. + +- `inter_stream.rs`: Defines internal types (`InterStreamEvent`, `InterStreamEnd`) used by streaming adapters to standardize the output format from diverse provider streaming protocols. This intermediary layer handles complex stream features like capturing usage, reasoning content, and tool calls before conversion to public `ChatStreamResponse` events. + +- `adapters/`: This submodule contains the concrete implementation of the `Adapter` trait for each provider (e.g., `openai`, `gemini`, `anthropic`, `zai`). These submodules handle the specific request/response translation logic for their respective protocols. + +### Key Design Considerations + +- **Stateless and Static Dispatch:** Adapters are designed to be stateless, with all methods in the `Adapter` trait being associated functions (static). Requests are routed efficiently using static dispatch through the `AdapterDispatcher`, minimizing runtime overhead and simplifying dependency management. + +- **Request/Response Normalization:** The adapter layer ensures that incoming requests and outgoing responses conform to generic GenAI types, hiding provider-specific implementation details from the rest of the library. + +- **Dynamic Resolution:** While `AdapterKind::from_model` provides a default mapping from model names (based on common prefixes or keywords), the system allows this to be overridden by custom `ServiceTargetResolver` configurations, enabling flexible routing (e.g., mapping a custom model name to an `OpenAI` adapter with a custom endpoint). + +- **Stream Intermediation:** The introduction of `InterStreamEvent` is crucial for handling the variance in streaming protocols across providers. it ensures that complex data transmitted at the end of a stream (like final usage statistics or aggregated tool calls) can be correctly collected and normalized, regardless of the provider's specific event format. diff --git a/dev/spec/spec-chat.md b/dev/spec/spec-chat.md new file mode 100644 index 00000000..52c1be62 --- /dev/null +++ b/dev/spec/spec-chat.md @@ -0,0 +1,66 @@ +## chat + +### Goal + +The `chat` module provides the core primitives for constructing chat requests, defining messages (including multi-part content like text, binary, and tool data), and handling synchronous and asynchronous (streaming) chat responses across all supported AI providers. It standardizes the data structures necessary for modern LLM interactions. + +### Public Module API + +The module exports the following key data structures: + +- **Request/Message Structure:** + - `ChatRequest`: The primary structure for initiating a chat completion call, containing the history (`messages`), an optional system prompt (`system`), and tool definitions (`tools`). + - `ChatMessage`: Represents a single interaction turn, comprising a `ChatRole`, `MessageContent`, and optional `MessageOptions`. + - `ChatRole`: Enum defining message roles (`System`, `User`, `Assistant`, `Tool`). + - `MessageContent`: A unified container for multi-part content, wrapping a list of `ContentPart`s. + - `ContentPart`: Enum defining content types: `Text`, `Binary`, `ToolCall`, `ToolResponse`. + - `Binary`, `BinarySource`: Structures defining binary payloads (e.g., images), sourced via base64 or URL. + - `MessageOptions`, `CacheControl`: Per-message configuration hints (e.g., for cache behavior). + +- **Configuration:** + - `ChatOptions`: General request configuration, including sampling parameters (`temperature`, `max_tokens`, `top_p`, `seed`), streaming capture flags, and format control. + - `ReasoningEffort`, `Verbosity`: Provider-specific hints for reasoning intensity or output verbosity. + - `ChatResponseFormat`, `JsonSpec`: Defines desired structured output formats (e.g., JSON mode). + +- **Responses:** + - `ChatResponse`: The result of a non-streaming request, including final content, usage, and model identifiers. + - `ChatStreamResponse`: The result wrapper for streaming requests, containing the `ChatStream` and model identity. + +- **Streaming:** + - `ChatStream`: A `futures::Stream` implementation yielding `ChatStreamEvent`s. + - `ChatStreamEvent`: Enum defining streaming events: `Start`, `Chunk` (content), `ReasoningChunk`, `ToolCallChunk`, and `End`. + - `StreamEnd`: Terminal event data including optional captured usage, content, and reasoning content. + +- **Tooling:** + - `Tool`: Metadata and schema defining a function the model can call. + - `ToolCall`: The model's invocation request for a specific tool. + - `ToolResponse`: The output returned from executing a tool, matched by call ID. + +- **Metadata:** + - `Usage`, `PromptTokensDetails`, `CompletionTokensDetails`: Normalized token usage statistics. + +- **Utilities:** + - `printer` module: Contains `print_chat_stream` for console output utilities. + +### Module Parts + +The functionality is divided into specialized files/sub-modules: + +- `chat_message.rs`: Defines the `ChatMessage` fundamental structure and associated types (`ChatRole`, `MessageOptions`). +- `chat_options.rs`: Manages request configuration (`ChatOptions`) and provides parsing logic for provider-specific hints like `ReasoningEffort` and `Verbosity`. +- `chat_req_response_format.rs`: Handles configuration for structured output (`ChatResponseFormat`, `JsonSpec`). +- `chat_request.rs`: Defines the top-level `ChatRequest` and methods for managing the request history and properties. +- `chat_response.rs`: Defines synchronous chat response structures (`ChatResponse`). +- `chat_stream.rs`: Implements the public `ChatStream` and its events, mapping from the internal adapter stream. +- `content_part.rs`: Defines `ContentPart`, `Binary`, and `BinarySource` for handling multi-modal inputs/outputs. +- `message_content.rs`: Defines `MessageContent`, focusing on collection management and convenient accessors for content parts (e.g., joining all text). +- `tool/mod.rs` (and associated files): Defines the tooling primitives (`Tool`, `ToolCall`, `ToolResponse`). +- `usage.rs`: Defines the normalized token counting structures (`Usage`). +- `printer.rs`: Provides utility functions for rendering stream events to standard output. + +### Key Design Considerations + +- **Unified Content Model:** The use of `MessageContent` composed of `ContentPart` allows any message role (user, assistant, tool) to handle complex, multi-part data seamlessly, including text, binary payloads, and tooling actions. +- **Decoupled Streaming:** The public `ChatStream` is an abstraction layer over an internal stream (`InterStream`), ensuring a consistent external interface regardless of adapter implementation details (like internal handling of usage reporting or reasoning chunks). +- **Normalized Usage Metrics:** The `Usage` structure provides an OpenAI-compatible interface while allowing for provider-specific breakdowns (e.g., caching or reasoning tokens) via detailed sub-structures. +- **Hierarchical Options:** `ChatOptions` can be applied globally at the client level or specifically per request. The internal resolution logic ensures request-specific options take precedence over client defaults. diff --git a/dev/spec/spec-client.md b/dev/spec/spec-client.md new file mode 100644 index 00000000..cedd505d --- /dev/null +++ b/dev/spec/spec-client.md @@ -0,0 +1,59 @@ +## client + +### Goal + +The `client` module provides the core entry point (`Client`) for interacting with various Generative AI providers. It encapsulates configuration (`ClientConfig`, `WebConfig`), a builder pattern (`ClientBuilder`), request execution (`exec_chat`, `exec_embed`), and service resolution logic (e.g., determining endpoints and authentication). + +### Public Module API + +The `client` module exposes the following public types: + +- **`Client`**: The main interface for executing AI requests (chat, embedding, streaming, model listing). + - `Client::builder()`: Starts the configuration process. + - `Client::default()`: Creates a client with default configuration. + - Core execution methods: `exec_chat`, `exec_chat_stream`, `exec_embed`, `embed`, `embed_batch`. + - Resolution/Discovery methods: `all_model_names`, `resolve_service_target`. + +- **`ClientBuilder`**: Provides a fluent interface for constructing a `Client`. Used to set `ClientConfig`, default `ChatOptions`, `EmbedOptions`, and custom resolvers (`AuthResolver`, `ServiceTargetResolver`, `ModelMapper`). + +- **`ClientConfig`**: Holds the resolved and default configurations used by the `Client`, including resolver functions and default options. + +- **`Headers`**: A simple map wrapper (`HashMap`) for managing HTTP headers in requests. + +- **`ServiceTarget`**: A struct containing the final resolved components needed to execute a request: `Endpoint`, `AuthData`, and `ModelIden`. + +- **`WebConfig`**: Configuration options specifically for building the underlying `reqwest::Client` (e.g., timeouts, proxies, default headers). + +### Module Parts + +The module is composed of several files that implement the layered client architecture: + +- `builder.rs`: Implements `ClientBuilder`, handling the creation and configuration flow. It initializes or updates the nested `ClientConfig` and optionally an internal `WebClient`. + +- `client_types.rs`: Defines the main `Client` struct and `ClientInner` (which holds `WebClient` and `ClientConfig` behind an `Arc`). + +- `config.rs`: Defines `ClientConfig` and the core `resolve_service_target` logic, which orchestrates calls to `ModelMapper`, `AuthResolver`, and `ServiceTargetResolver` before falling back to adapter defaults. + +- `client_impl.rs`: Contains the main implementation of the public API methods on `Client`, such as `exec_chat` and `exec_embed`. These methods perform service resolution and delegate to `AdapterDispatcher` for request creation and response parsing. + +- `headers.rs`: Implements the `Headers` utility for managing key-value HTTP header maps. + +- `service_target.rs`: Defines the `ServiceTarget` structure for resolved endpoints, authentication, and model identifiers. + +- `web_config.rs`: Defines `WebConfig` and its logic for applying settings to a `reqwest::ClientBuilder`. + +### Key Design Considerations + +- **Client Immutability and Sharing**: The `Client` holds its internal state (`ClientInner` with `WebClient` and `ClientConfig`) wrapped in an `Arc`. This design ensures that the client is thread-safe and cheaply cloneable, aligning with common client patterns in asynchronous Rust applications. + +- **Config Layering and Resolution**: The client architecture employs a sophisticated resolution process managed by `ClientConfig::resolve_service_target`. + - It first applies a `ModelMapper` to potentially translate the input model identifier. + - It then consults the `AuthResolver` for authentication data. If the resolver is absent or returns `None`, it defaults to the adapter's standard authentication mechanism (e.g., API key headers). + - It determines the adapter's default endpoint. + - Finally, it applies the optional `ServiceTargetResolver`, allowing users to override the endpoint, auth, or model for complex scenarios (e.g., custom proxies or routing). + +- **WebClient Abstraction**: The core HTTP client logic is delegated to the `WebClient` (from the `webc` module), which handles low-level request execution and streaming setup. This separation keeps the `client` module focused on business logic and AI provider orchestration. + +- **Builder Pattern for Configuration**: `ClientBuilder` enforces configuration before client creation, simplifying object construction and ensuring necessary dependencies are set up correctly. + +- **Headers Simplification**: The `Headers` struct abstracts HTTP header management, ensuring that subsequent merges or overrides result in a single, final header value, which is typical for API key authorization overrides. diff --git a/dev/spec/spec-common.md b/dev/spec/spec-common.md new file mode 100644 index 00000000..b2d13024 --- /dev/null +++ b/dev/spec/spec-common.md @@ -0,0 +1,36 @@ +## common + +### Goal + +The `common` module provides fundamental data structures used throughout the `genai` library, primarily focusing on identifying models and adapters in a clear and efficient manner. + +### Public Module API + +The module exposes two main types: `ModelName` and `ModelIden`. + +- `ModelName`: Represents a generative AI model identifier (e.g., `"gpt-4o"`, `"claude-3-opus"`). + - It wraps an `Arc` for efficient cloning and sharing across threads. + - Implements `From`, `From<&String>`, `From<&str>`, and `Deref`. + - Supports equality comparison (`PartialEq`) with various string types (`&str`, `String`). + +- `ModelIden`: Uniquely identifies a model by coupling an `AdapterKind` with a `ModelName`. + - Fields: + - `adapter_kind: AdapterKind` + - `model_name: ModelName` + - Constructor: `fn new(adapter_kind: AdapterKind, model_name: impl Into) -> Self` + - Utility methods for creating new identifiers based on name changes: + - `fn from_name(&self, new_name: T) -> ModelIden` + - `fn from_optional_name(&self, new_name: Option) -> ModelIden` + +### Module Parts + +The `common` module consists of: + +- `model_name.rs`: Defines the `ModelName` type and related string manipulation utilities, including parsing optional namespaces (e.g., `namespace::model_name`). +- `model_iden.rs`: Defines the `ModelIden` type, which associates a `ModelName` with an `AdapterKind`. + +### Key Design Considerations + +- **Efficiency of ModelName:** `ModelName` uses `Arc` to ensure that cloning the model identifier is cheap, which is crucial as model identifiers are frequently passed around in request and response structures. +- **Deref Implementation:** Implementing `Deref` for `ModelName` allows it to be used naturally as a string reference. +- **ModelIden Immutability:** `ModelIden` is designed to be immutable and fully identifiable, combining the model string identity (`ModelName`) with the service provider identity (`AdapterKind`). diff --git a/dev/spec/spec-webc.md b/dev/spec/spec-webc.md new file mode 100644 index 00000000..a4bd3232 --- /dev/null +++ b/dev/spec/spec-webc.md @@ -0,0 +1,36 @@ +## webc + +### Goal + +The `webc` module provides a low-level, internal web client layer utilizing `reqwest`. Its primary role is to abstract standard HTTP requests (GET/POST) and manage complex streaming responses required by various AI providers, especially those that do not fully conform to the Server-Sent Events (SSE) standard (`text/event-stream`). It handles standard JSON requests/responses and custom stream parsing. + +### Public Module API + +The `webc` module is primarily an internal component, only exposing its dedicated error type publicly. + +- `pub use error::Error;` + - `Error`: An enum representing all possible errors originating from the web communication layer (e.g., failed status codes, JSON parsing errors, reqwest errors, stream clone errors). + +(All other types like `WebClient`, `WebResponse`, `WebStream`, and `Result` are exported as `pub(crate)` for internal library use.) + +### Module Parts + +The module consists of three main internal components: + +- `error.rs`: Defines the `Error` enum and the module-scoped `Result` type alias. It captures network/HTTP related failures and external errors like `reqwest::Error` and `value_ext::JsonValueExtError`. + +- `web_client.rs`: Contains the `WebClient` struct, a thin wrapper around `reqwest::Client`. It provides methods (`do_get`, `do_post`) for non-streaming standard HTTP communication, which assumes the response body is JSON and is parsed into `serde_json::Value`. It also defines `WebResponse`, which encapsulates the HTTP status and parsed JSON body. + +- `web_stream.rs`: Implements `WebStream`, a custom `futures::Stream` implementation designed for handling non-SSE streaming protocols used by some AI providers (e.g., Cohere, Gemini). It defines `StreamMode` to specify how stream chunks should be parsed (either by a fixed delimiter or specialized handling for "Pretty JSON Array" formats). + +### Key Design Considerations + +- **Internal Focus:** The module is designed strictly for internal use (`pub(crate)`) except for the public error type. This shields the rest of the library from direct `reqwest` dependency details. + +- **Custom Streaming:** `WebStream` exists specifically to manage streaming protocols that deviate from the standard SSE format, providing message splitting based on `StreamMode`. This ensures compatibility with providers like Cohere (delimiter-based) and Gemini (JSON array chunking). + +- **Generic JSON Response Handling:** `WebResponse` abstracts successful non-streaming responses by immediately parsing the body into `serde_json::Value`. This allows adapter modules to deserialize into their specific structures subsequently. + +- **Error Richness:** The `Error::ResponseFailedStatus` variant includes the `StatusCode`, full `body`, and `HeaderMap` to provide comprehensive debugging information upon API failure. + +- **Async Implementation:** All network operations rely on `tokio` and `reqwest`, ensuring non-blocking execution throughout the I/O layer. `WebStream` leverages `futures::Stream` traits for integration with standard Rust async infrastructure. From 28011fdcdc241ffae55c5bfc06f3e511a22af79c Mon Sep 17 00:00:00 2001 From: Jeremy Chone Date: Sat, 25 Oct 2025 10:41:03 -0700 Subject: [PATCH 008/123] . cargo fmt --- examples/c07-zai.rs | 93 ++++++++++++++++----------------- src/adapter/adapter_kind.rs | 4 +- src/adapter/adapters/zai/mod.rs | 2 +- src/adapter/dispatcher.rs | 2 +- 4 files changed, 50 insertions(+), 51 deletions(-) diff --git a/examples/c07-zai.rs b/examples/c07-zai.rs index 5ee13223..e8f7b13b 100644 --- a/examples/c07-zai.rs +++ b/examples/c07-zai.rs @@ -1,56 +1,55 @@ //! ZAI (Zhipu AI) adapter example -//! +//! //! Demonstrates how to use ZAI models with automatic endpoint routing: //! - `glm-4.6` → Regular credit-based API //! - `zai::glm-4.6` → Coding subscription API (automatically routed) -use genai::chat::{ChatMessage, ChatRequest}; use genai::Client; +use genai::chat::{ChatMessage, ChatRequest}; #[tokio::main] async fn main() -> Result<(), Box> { - let client = Client::builder().build(); - - // Test cases demonstrating automatic endpoint routing - let test_cases = vec![ - ("glm-4.6", "Regular ZAI model"), - ("zai::glm-4.6", "Coding subscription model"), - ]; - - for (model_name, description) in test_cases { - println!("\n=== {} ===", description); - println!("Model: {}", model_name); - - let chat_req = ChatRequest::default() - .with_system("You are a helpful assistant.") - .append_message(ChatMessage::user("Say 'hello' and nothing else.")); - - match client.exec_chat(model_name, chat_req, None).await { - Ok(response) => { - println!("✅ Success!"); - if let Some(content) = response.first_text() { - println!("Response: {}", content); - } - if response.usage.prompt_tokens.is_some() || response.usage.completion_tokens.is_some() { - println!("Usage: prompt={}, output={}", - response.usage.prompt_tokens.unwrap_or(0), - response.usage.completion_tokens.unwrap_or(0)); - } - } - Err(e) => { - println!("❌ Error: {}", e); - if e.to_string().contains("insufficient balance") { - println!("ℹ️ This model requires credits or subscription"); - } else if e.to_string().contains("401") { - println!("ℹ️ Set ZAI_API_KEY environment variable"); - } - } - } - } - - println!("\n=== SUMMARY ==="); - println!("✅ ZAI adapter handles namespace routing automatically"); - println!("✅ Use ZAI_API_KEY environment variable"); - - Ok(()) -} \ No newline at end of file + let client = Client::builder().build(); + + // Test cases demonstrating automatic endpoint routing + let test_cases = vec![("glm-4.6", "Regular ZAI model"), ("zai::glm-4.6", "Coding subscription model")]; + + for (model_name, description) in test_cases { + println!("\n=== {} ===", description); + println!("Model: {}", model_name); + + let chat_req = ChatRequest::default() + .with_system("You are a helpful assistant.") + .append_message(ChatMessage::user("Say 'hello' and nothing else.")); + + match client.exec_chat(model_name, chat_req, None).await { + Ok(response) => { + println!("✅ Success!"); + if let Some(content) = response.first_text() { + println!("Response: {}", content); + } + if response.usage.prompt_tokens.is_some() || response.usage.completion_tokens.is_some() { + println!( + "Usage: prompt={}, output={}", + response.usage.prompt_tokens.unwrap_or(0), + response.usage.completion_tokens.unwrap_or(0) + ); + } + } + Err(e) => { + println!("❌ Error: {}", e); + if e.to_string().contains("insufficient balance") { + println!("ℹ️ This model requires credits or subscription"); + } else if e.to_string().contains("401") { + println!("ℹ️ Set ZAI_API_KEY environment variable"); + } + } + } + } + + println!("\n=== SUMMARY ==="); + println!("✅ ZAI adapter handles namespace routing automatically"); + println!("✅ Use ZAI_API_KEY environment variable"); + + Ok(()) +} diff --git a/src/adapter/adapter_kind.rs b/src/adapter/adapter_kind.rs index 03fb87f5..9e52d50a 100644 --- a/src/adapter/adapter_kind.rs +++ b/src/adapter/adapter_kind.rs @@ -1,4 +1,5 @@ use crate::adapter::adapters::together::TogetherAdapter; +use crate::adapter::adapters::zai::ZaiAdapter; use crate::adapter::anthropic::AnthropicAdapter; use crate::adapter::cohere::CohereAdapter; use crate::adapter::deepseek::{self, DeepSeekAdapter}; @@ -8,7 +9,6 @@ use crate::adapter::groq::{self, GroqAdapter}; use crate::adapter::nebius::NebiusAdapter; use crate::adapter::openai::OpenAIAdapter; use crate::adapter::xai::XaiAdapter; -use crate::adapter::adapters::zai::ZaiAdapter; use crate::{ModelName, Result}; use derive_more::Display; use serde::{Deserialize, Serialize}; @@ -162,7 +162,7 @@ impl AdapterKind { if ns == "zai" { return Ok(AdapterKind::Zai); } - + if let Some(adapter) = Self::from_lower_str(ns) { return Ok(adapter); } else { diff --git a/src/adapter/adapters/zai/mod.rs b/src/adapter/adapters/zai/mod.rs index 3eaf9b80..a7d774ff 100644 --- a/src/adapter/adapters/zai/mod.rs +++ b/src/adapter/adapters/zai/mod.rs @@ -58,4 +58,4 @@ mod adapter_impl; pub use adapter_impl::*; -// endregion: --- Modules \ No newline at end of file +// endregion: --- Modules diff --git a/src/adapter/dispatcher.rs b/src/adapter/dispatcher.rs index f0909dc9..f2fd064f 100644 --- a/src/adapter/dispatcher.rs +++ b/src/adapter/dispatcher.rs @@ -1,5 +1,6 @@ use super::groq::GroqAdapter; use crate::adapter::adapters::together::TogetherAdapter; +use crate::adapter::adapters::zai::ZaiAdapter; use crate::adapter::anthropic::AnthropicAdapter; use crate::adapter::cohere::CohereAdapter; use crate::adapter::deepseek::DeepSeekAdapter; @@ -10,7 +11,6 @@ use crate::adapter::ollama::OllamaAdapter; use crate::adapter::openai::OpenAIAdapter; use crate::adapter::openai_resp::OpenAIRespAdapter; use crate::adapter::xai::XaiAdapter; -use crate::adapter::adapters::zai::ZaiAdapter; use crate::adapter::{Adapter, AdapterKind, ServiceType, WebRequestData}; use crate::chat::{ChatOptionsSet, ChatRequest, ChatResponse, ChatStreamResponse}; use crate::embed::{EmbedOptionsSet, EmbedRequest, EmbedResponse}; From 1181667fc6c81223ad6a20131206272b7568fa7f Mon Sep 17 00:00:00 2001 From: Jeremy Chone Date: Sat, 25 Oct 2025 10:41:16 -0700 Subject: [PATCH 009/123] . update to v0.4.3 --- CHANGELOG.md | 7 +++++++ Cargo.toml | 2 +- README.md | 4 +++- 3 files changed, 11 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 910efe74..0ae27500 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,13 @@ `.` minor | `-` Fix | `+` Addition | `^` improvement | `!` Change | `*` Refactor +## 2025-10-25 - [v0.4.3](https://github.com/jeremychone/rust-genai/compare/v0.4.2...v0.4.3) + +- `!` Refactor ZHIPU adapter to ZAI with namespace-based endpoint routing (#95) +- `-` openai - stream tool - Fix streaming too issue (#91) +- `.` added ModelName partial eq implementations for string types (#94) +- `.` anthropic - update model name for haiku 4.5 + ## 2025-10-12 - [v0.4.2](https://github.com/jeremychone/rust-genai/compare/v0.4.1...v0.4.2) - `.` test - make the common_test_chat_stop_sequences_ok more resilient diff --git a/Cargo.toml b/Cargo.toml index 4c1ff3f6..fe5de75e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "genai" -version = "0.4.3-wip" +version = "0.4.3" edition = "2024" license = "MIT OR Apache-2.0" description = "Multi-AI Providers Library for Rust. (OpenAI, Gemini, Anthropic, xAI, Ollama, Groq, DeepSeek, Grok)" diff --git a/README.md b/README.md index 2581d6a9..6cff5844 100644 --- a/README.md +++ b/README.md @@ -18,7 +18,7 @@ Provides a single, ergonomic API to many generative AI providers, such as Anthro **NOTE:** Big update with **v0.4.x** - More adapters, PDF and image support, embeddings, custom headers, and transparent support for the OpenAI Responses API (gpt-5-codex) -## v0.4.0 Big Release +## v0.4.x Big Release - **What's new**: - **PDF and Images** support (thanks to [Andrew Rademacher](https://github.com/AndrewRademacher)) @@ -39,6 +39,8 @@ See: ## Big Thanks to +- [Bart Carroll](https://github.com/bartCarroll) For [#91](https://github.com/jeremychone/rust-genai/pull/91) Fixed streaming tool calls for openai models +- [Rui Andrada](https://github.com/shingonoide) For [#95](https://github.com/jeremychone/rust-genai/pull/95) refactoring ZHIPU adapter to ZAI - [Adrien](https://github.com/XciD) Extra headers in requests, seed for chat requests, and fixes (with [Julien Chaumond](https://github.com/julien-c) for extra headers) - [Andrew Rademacher](https://github.com/AndrewRademacher) for PDF support, Anthropic streamer, and insight on flattening the message content (e.g., ContentParts) - [Jesus Santander](https://github.com/jsantanders) Embedding support [PR #83](https://github.com/jeremychone/rust-genai/pull/83) From 7b30e42d4513da16cb08715001527591ff658f10 Mon Sep 17 00:00:00 2001 From: Jeremy Chone Date: Sat, 25 Oct 2025 10:42:57 -0700 Subject: [PATCH 010/123] . v0.4.4-WIP --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index fe5de75e..6e04e7f1 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "genai" -version = "0.4.3" +version = "0.4.4-WIP" edition = "2024" license = "MIT OR Apache-2.0" description = "Multi-AI Providers Library for Rust. (OpenAI, Gemini, Anthropic, xAI, Ollama, Groq, DeepSeek, Grok)" From 545a8c0b0e52afb47f2195cf78fcf53dfdc15d18 Mon Sep 17 00:00:00 2001 From: Jeremy Chone Date: Mon, 3 Nov 2025 10:08:50 -0800 Subject: [PATCH 011/123] . gitignore - add audio --- .gitignore | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/.gitignore b/.gitignore index 4ade7c75..e691404d 100644 --- a/.gitignore +++ b/.gitignore @@ -41,6 +41,11 @@ out/ *.ogg *.avi +# Audio +*.mp3 +*.wav +*.flac + # Images *.icns *.ico From 95783b4119fa177af56f917906b903c4c4b09467 Mon Sep 17 00:00:00 2001 From: Jeremy Chone Date: Mon, 3 Nov 2025 10:12:24 -0800 Subject: [PATCH 012/123] . gitignore - add ignore doc types --- .gitignore | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index e691404d..b656c7da 100644 --- a/.gitignore +++ b/.gitignore @@ -24,6 +24,16 @@ target/ dist/ out/ +# Doc Files +*.pdf +*.docx +*.xlsx +*.pptx +*.doc +*.xls +*.ppt +*.page + # Data Files *.db3 *.parquet @@ -54,7 +64,9 @@ out/ *.png *.bmp -!tests/data/*.jpg +# -- Test data (one by one) +!tests/data/duck-small.jpg +!tests/data/small.pdf # -- Nodejs node_modules/ From cb24f4f3df35be655a57865638e1523593ba5a8a Mon Sep 17 00:00:00 2001 From: Himmelschmidt <46351743+Himmelschmidt@users.noreply.github.com> Date: Mon, 3 Nov 2025 18:09:52 -0500 Subject: [PATCH 013/123] Add support for OpenAI `service_tier` parameter (#98) --- src/adapter/adapters/openai/adapter_impl.rs | 5 ++ src/chat/chat_options.rs | 79 +++++++++++++++++++++ src/error.rs | 3 + 3 files changed, 87 insertions(+) diff --git a/src/adapter/adapters/openai/adapter_impl.rs b/src/adapter/adapters/openai/adapter_impl.rs index a65f8d87..e5820087 100644 --- a/src/adapter/adapters/openai/adapter_impl.rs +++ b/src/adapter/adapters/openai/adapter_impl.rs @@ -332,6 +332,11 @@ impl OpenAIAdapter { if let Some(seed) = options_set.seed() { payload.x_insert("seed", seed)?; } + if let Some(service_tier) = options_set.service_tier() + && let Some(keyword) = service_tier.as_keyword() + { + payload.x_insert("service_tier", keyword)?; + } Ok(WebRequestData { url, headers, payload }) } diff --git a/src/chat/chat_options.rs b/src/chat/chat_options.rs index 6ebf3f95..73e24a57 100644 --- a/src/chat/chat_options.rs +++ b/src/chat/chat_options.rs @@ -63,6 +63,9 @@ pub struct ChatOptions { /// Seed for repeatability, if supported. pub seed: Option, + /// Service tier preference (OpenAI-specific, for flex processing). + pub service_tier: Option, + /// Additional HTTP headers to include with the request. pub extra_headers: Option, } @@ -153,6 +156,12 @@ impl ChatOptions { self } + /// Sets the service tier preference (OpenAI-specific). + pub fn with_service_tier(mut self, value: ServiceTier) -> Self { + self.service_tier = Some(value); + self + } + /// Adds extra HTTP headers. pub fn with_extra_headers(mut self, headers: impl Into) -> Self { self.extra_headers = Some(headers.into()); @@ -332,6 +341,70 @@ impl std::str::FromStr for Verbosity { // endregion: --- Verbosity +// region: --- ServiceTier + +/// OpenAI service tier preference for flex processing. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub enum ServiceTier { + /// Flex processing - lower costs, slower response times + Flex, + /// Auto - standard processing (default) + Auto, + /// Default - standard processing + Default, +} + +impl ServiceTier { + /// Returns the lowercase variant name. + pub fn variant_name(&self) -> &'static str { + match self { + ServiceTier::Flex => "flex", + ServiceTier::Auto => "auto", + ServiceTier::Default => "default", + } + } + + /// Returns the keyword for API usage. + pub fn as_keyword(&self) -> Option<&'static str> { + match self { + ServiceTier::Flex => Some("flex"), + ServiceTier::Auto => Some("auto"), + ServiceTier::Default => Some("default"), + } + } + + /// Parses a service tier keyword. + pub fn from_keyword(name: &str) -> Option { + match name { + "flex" => Some(ServiceTier::Flex), + "auto" => Some(ServiceTier::Auto), + "default" => Some(ServiceTier::Default), + _ => None, + } + } +} + +impl std::fmt::Display for ServiceTier { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + ServiceTier::Flex => write!(f, "flex"), + ServiceTier::Auto => write!(f, "auto"), + ServiceTier::Default => write!(f, "default"), + } + } +} + +impl std::str::FromStr for ServiceTier { + type Err = Error; + + /// Parses a service tier keyword. + fn from_str(s: &str) -> Result { + Self::from_keyword(s).ok_or(Error::ServiceTierParsing { actual: s.to_string() }) + } +} + +// endregion: --- ServiceTier + // region: --- ChatOptionsSet /// This is an internal crate struct to resolve the ChatOptions value in a cascading manner. @@ -440,6 +513,12 @@ impl ChatOptionsSet<'_, '_> { .or_else(|| self.client.and_then(|client| client.seed)) } + pub fn service_tier(&self) -> Option<&ServiceTier> { + self.chat + .and_then(|chat| chat.service_tier.as_ref()) + .or_else(|| self.client.and_then(|client| client.service_tier.as_ref())) + } + pub fn extra_headers(&self) -> Option<&Headers> { self.chat .and_then(|chat| chat.extra_headers.as_ref()) diff --git a/src/error.rs b/src/error.rs index 02d95080..745e1b04 100644 --- a/src/error.rs +++ b/src/error.rs @@ -36,6 +36,9 @@ pub enum Error { #[display("Failed to parse reasoning. Actual: '{actual}'")] ReasoningParsingError { actual: String }, + #[display("Failed to parse service tier. Actual: '{actual}'")] + ServiceTierParsing { actual: String }, + // -- Chat Output #[display("No chat response from model '{model_iden}'")] NoChatResponse { model_iden: ModelIden }, From 039678fde421fae28c7174f5341a0712feed44bc Mon Sep 17 00:00:00 2001 From: Vagmi Mudumbai Date: Wed, 29 Oct 2025 18:16:30 -0300 Subject: [PATCH 014/123] Add support for audio_type content part for voice agent support. (cherry picked from commit 448e6ec33e15711791c0e1983dcbebe9c1487aea) --- src/adapter/adapters/openai/adapter_impl.rs | 24 ++++++++++++++++++++- src/chat/content_part.rs | 13 +++++++++++ 2 files changed, 36 insertions(+), 1 deletion(-) diff --git a/src/adapter/adapters/openai/adapter_impl.rs b/src/adapter/adapters/openai/adapter_impl.rs index e5820087..b1578381 100644 --- a/src/adapter/adapters/openai/adapter_impl.rs +++ b/src/adapter/adapters/openai/adapter_impl.rs @@ -27,6 +27,8 @@ const MODELS: &[&str] = &[ "gpt-5", "gpt-5-mini", "gpt-5-nano", + "gpt-audio-mini", + "gpt-audio" ]; impl OpenAIAdapter { @@ -404,12 +406,32 @@ impl OpenAIAdapter { match part { ContentPart::Text(content) => values.push(json!({"type": "text", "text": content})), ContentPart::Binary(binary) => { + let is_audio = binary.is_audio(); let is_image = binary.is_image(); let Binary { content_type, source, .. } = binary; - if is_image { + if is_audio { + match &source { + BinarySource::Url(_url) => { + warn!("OpenAI doesn't support audio from URL, need to handle it gracefully"); + } + BinarySource::Base64(content) => { + let mut format = content_type.split('/').last().unwrap_or(""); + if format == "mpeg" { + format = "mp3"; + } + values.push(json!({ + "type": "input_audio", + "input_audio": { + "data": content, + "format": format + } + })); + } + } + } else if is_image { match &source { BinarySource::Url(url) => { values.push(json!({"type": "image_url", "image_url": {"url": url}})) diff --git a/src/chat/content_part.rs b/src/chat/content_part.rs index 8a4b5939..9b71d624 100644 --- a/src/chat/content_part.rs +++ b/src/chat/content_part.rs @@ -153,6 +153,14 @@ impl ContentPart { } } + /// Returns true if this part is a binary audio (content_type starts with "audio/"). + pub fn is_audio(&self) -> bool { + match self { + ContentPart::Binary(binary) => binary.content_type.trim().to_ascii_lowercase().starts_with("audio/"), + _ => false, + } + } + #[allow(unused)] /// Returns true if this part is a PDF binary (content_type equals "application/pdf"). pub fn is_pdf(&self) -> bool { @@ -207,6 +215,11 @@ impl Binary { self.content_type.trim().to_ascii_lowercase().starts_with("image/") } + /// Returns true if this binary is an audio file (content_type starts with "audio/"). + pub fn is_audio(&self) -> bool { + self.content_type.trim().to_ascii_lowercase().starts_with("audio/") + } + /// Returns true if this binary is a PDF (content_type equals "application/pdf"). pub fn is_pdf(&self) -> bool { self.content_type.trim().eq_ignore_ascii_case("application/pdf") From 66ff7fc344d200d6d6f59d659f8bbe3194dd2477 Mon Sep 17 00:00:00 2001 From: Jeremy Chone Date: Fri, 14 Nov 2025 11:25:10 -0800 Subject: [PATCH 015/123] . openai audio - vgami commit about adding test (jc edit: minus .wav file in git) --- tests/support/common_tests.rs | 31 ++++++++++++++++++++++++++++++- tests/support/data.rs | 10 ++++++++++ tests/tests_p_openai.rs | 6 ++++++ 3 files changed, 46 insertions(+), 1 deletion(-) diff --git a/tests/support/common_tests.rs b/tests/support/common_tests.rs index e338c520..6a5a1e20 100644 --- a/tests/support/common_tests.rs +++ b/tests/support/common_tests.rs @@ -1,5 +1,5 @@ use crate::get_option_value; -use crate::support::data::{IMAGE_URL_JPG_DUCK, get_b64_duck, get_b64_pdf}; +use crate::support::data::{IMAGE_URL_JPG_DUCK, get_b64_audio, get_b64_duck, get_b64_pdf, has_audio_file}; use crate::support::{ Check, StreamExtract, TestResult, assert_contains, assert_reasoning_content, assert_reasoning_usage, contains_checks, extract_stream_end, get_big_content, seed_chat_req_simple, seed_chat_req_tool_simple, @@ -731,6 +731,35 @@ pub async fn common_test_chat_image_b64_ok(model: &str) -> TestResult<()> { Ok(()) } +pub async fn common_test_chat_audio_b64_ok(model: &str) -> TestResult<()> { + if !has_audio_file() { + println!("No test audio file. Skipping this test."); + return Ok(()); + } + + // -- Setup + let client = Client::default(); + + // -- Build & Exec + let mut chat_req = ChatRequest::default().with_system("Transcribe the audio"); + // This is similar to sending initial system chat messages (which will be cumulative with system chat messages) + chat_req = chat_req.append_message(ChatMessage::user(vec![ContentPart::from_binary_base64( + "audio/wav", + get_b64_audio()?, + None, + )])); + + let chat_res = client.exec_chat(model, chat_req, None).await?; + + // -- Check + let res = chat_res.first_text().ok_or("Should have text result")?; + // NOTE: here we make the test a little loose as the point of this test is not to test the model accuracy + assert_contains(res, "one small step"); + assert_contains(res, "one giant leap"); + + Ok(()) +} + pub async fn common_test_chat_pdf_b64_ok(model: &str) -> TestResult<()> { // -- Setup let client = Client::default(); diff --git a/tests/support/data.rs b/tests/support/data.rs index 0e03fe4f..d7fa5c53 100644 --- a/tests/support/data.rs +++ b/tests/support/data.rs @@ -3,16 +3,26 @@ use crate::support::TestResult; use base64::Engine; use base64::engine::general_purpose; +use simple_fs::SPath; use std::fs::File; use std::io::Read; pub const IMAGE_URL_JPG_DUCK: &str = "https://upload.wikimedia.org/wikipedia/commons/thumb/b/bf/Bucephala-albeola-010.jpg/440px-Bucephala-albeola-010.jpg"; +pub const AUDIO_TEST_FILE_PATH: &str = "./tests/data/phrase_neil_armstrong.wav"; /// Get the base64 of the image above (but resized/lower to fit 5kb) pub fn get_b64_duck() -> TestResult { get_b64_file("./tests/data/duck-small.jpg") } +pub fn has_audio_file() -> bool { + SPath::new(AUDIO_TEST_FILE_PATH).exists() +} + +pub fn get_b64_audio() -> TestResult { + get_b64_file(AUDIO_TEST_FILE_PATH) +} + pub fn get_b64_pdf() -> TestResult { get_b64_file("./tests/data/small.pdf") } diff --git a/tests/tests_p_openai.rs b/tests/tests_p_openai.rs index 74fc6f56..20996195 100644 --- a/tests/tests_p_openai.rs +++ b/tests/tests_p_openai.rs @@ -7,6 +7,7 @@ use genai::resolver::AuthData; // note: "gpt-4o-mini" has issue when image & pdf // as for 2025-08-08 gpt-5-mini does not support temperature & stop sequence const MODEL: &str = "gpt-5-mini"; +const AUDIO_MODEL: &str = "gpt-audio-mini"; const MODEL2: &str = "gpt-4.1-mini"; // for temperature & stop sequence const MODEL_NS: &str = "openai::gpt-4.1-mini"; @@ -117,6 +118,11 @@ async fn test_chat_binary_image_b64_ok() -> TestResult<()> { common_tests::common_test_chat_image_b64_ok(MODEL).await } +#[tokio::test] +async fn test_chat_binary_audio_b64_ok() -> TestResult<()> { + common_tests::common_test_chat_audio_b64_ok(AUDIO_MODEL).await +} + #[tokio::test] async fn test_chat_binary_pdf_b64_ok() -> TestResult<()> { common_tests::common_test_chat_pdf_b64_ok(MODEL).await From c17b9eac9f2807f4ff0603eb73197f8cb02199fe Mon Sep 17 00:00:00 2001 From: Jeremy Chone Date: Fri, 14 Nov 2025 11:40:39 -0800 Subject: [PATCH 016/123] . minor typo & clippy --- src/adapter/adapters/openai/adapter_impl.rs | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/adapter/adapters/openai/adapter_impl.rs b/src/adapter/adapters/openai/adapter_impl.rs index b1578381..173c0362 100644 --- a/src/adapter/adapters/openai/adapter_impl.rs +++ b/src/adapter/adapters/openai/adapter_impl.rs @@ -27,8 +27,8 @@ const MODELS: &[&str] = &[ "gpt-5", "gpt-5-mini", "gpt-5-nano", - "gpt-audio-mini", - "gpt-audio" + "gpt-audio-mini", + "gpt-audio", ]; impl OpenAIAdapter { @@ -415,10 +415,12 @@ impl OpenAIAdapter { if is_audio { match &source { BinarySource::Url(_url) => { - warn!("OpenAI doesn't support audio from URL, need to handle it gracefully"); + warn!( + "OpenAI doesn't support audio from URL, need to handle it gracefully" + ); } BinarySource::Base64(content) => { - let mut format = content_type.split('/').last().unwrap_or(""); + let mut format = content_type.split('/').next_back().unwrap_or(""); if format == "mpeg" { format = "mp3"; } From 9022f1a2168673ef66a07d2207181d152f8c0305 Mon Sep 17 00:00:00 2001 From: Jeremy Chone Date: Fri, 14 Nov 2025 11:49:12 -0800 Subject: [PATCH 017/123] + openai - adding support for gpt-5-pro (must be mapped to OpenaiResp adapter) --- src/adapter/adapter_kind.rs | 2 +- tests/tests_p_openai.rs | 7 +++++++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/src/adapter/adapter_kind.rs b/src/adapter/adapter_kind.rs index 9e52d50a..7ff9dcc6 100644 --- a/src/adapter/adapter_kind.rs +++ b/src/adapter/adapter_kind.rs @@ -180,7 +180,7 @@ impl AdapterKind { || model.starts_with("text-embedding") // migh be a little generic on this one { - if model.starts_with("gpt") && model.contains("codex") { + if model.starts_with("gpt") && (model.contains("codex") || model.contains("pro")) { Ok(Self::OpenAIResp) } else { Ok(Self::OpenAI) diff --git a/tests/tests_p_openai.rs b/tests/tests_p_openai.rs index 20996195..8b0ba7c3 100644 --- a/tests/tests_p_openai.rs +++ b/tests/tests_p_openai.rs @@ -19,6 +19,13 @@ async fn test_chat_reasoning_minimal_ok() -> TestResult<()> { common_tests::common_test_chat_simple_ok("gpt-5-mini-minimal", None).await } +// gpt-5-pro (different api than gpt-5) +// expensive, so, will be commented most of the time. +// #[tokio::test] +// async fn test_chat_gpt_5_pro_simple_ok() -> TestResult<()> { +// common_tests::common_test_chat_simple_ok("gpt-5-pro", None).await +// } + // endregion: --- Provider Specific // region: --- Chat From caa83957a060447f580c7ebbd6e3190acc802d07 Mon Sep 17 00:00:00 2001 From: Jeremy Chone Date: Fri, 14 Nov 2025 16:48:20 -0800 Subject: [PATCH 018/123] . test - openai - added gpt-5.1 --- tests/support/common_tests.rs | 16 +++++------ tests/support/helpers.rs | 2 +- tests/tests_p_anthropic.rs | 2 +- tests/tests_p_deepseek_reasoning.rs | 6 ++--- tests/tests_p_openai.rs | 41 +++++++++++++++-------------- 5 files changed, 34 insertions(+), 33 deletions(-) diff --git a/tests/support/common_tests.rs b/tests/support/common_tests.rs index 6a5a1e20..f52a14dc 100644 --- a/tests/support/common_tests.rs +++ b/tests/support/common_tests.rs @@ -20,7 +20,7 @@ use value_ext::JsonValueExt; // region: --- Chat pub async fn common_test_chat_simple_ok(model: &str, checks: Option) -> TestResult<()> { - validate_checks(checks.clone(), Check::REASONING | Check::REASONING_USAGE)?; + validate_checks(checks.clone(), Check::REASONING_CONTENT | Check::REASONING_USAGE)?; // -- Setup & Fixtures let client = Client::default(); @@ -50,7 +50,7 @@ pub async fn common_test_chat_simple_ok(model: &str, checks: Option) -> T } // -- Check Reasoning Content - if contains_checks(checks, Check::REASONING) { + if contains_checks(checks, Check::REASONING_CONTENT) { assert_reasoning_content(&chat_res)?; } @@ -93,7 +93,7 @@ pub async fn common_test_chat_reasoning_ok(model: &str, checks: Option) - } // -- Check Reasoning Content - if contains_checks(checks, Check::REASONING) { + if contains_checks(checks, Check::REASONING_CONTENT) { let reasoning_content = chat_res .reasoning_content .as_deref() @@ -512,7 +512,7 @@ pub async fn common_test_chat_cache_explicit_system_ok(model: &str) -> TestResul // region: --- Chat Stream Tests pub async fn common_test_chat_stream_simple_ok(model: &str, checks: Option) -> TestResult<()> { - validate_checks(checks.clone(), Check::REASONING)?; + validate_checks(checks.clone(), Check::REASONING_CONTENT)?; // -- Setup & Fixtures let client = Client::default(); @@ -541,7 +541,7 @@ pub async fn common_test_chat_stream_simple_ok(model: &str, checks: Option TestResu } pub async fn common_test_chat_stream_capture_all_ok(model: &str, checks: Option) -> TestResult<()> { - validate_checks(checks.clone(), Check::REASONING | Check::REASONING_USAGE)?; + validate_checks(checks.clone(), Check::REASONING_CONTENT | Check::REASONING_USAGE)?; // -- Setup & Fixtures let mut chat_options = ChatOptions::default() @@ -604,7 +604,7 @@ pub async fn common_test_chat_stream_capture_all_ok(model: &str, checks: Option< .with_capture_content(true) .with_capture_reasoning_content(true); - if contains_checks(checks.clone(), Check::REASONING | Check::REASONING_USAGE) { + if contains_checks(checks.clone(), Check::REASONING_CONTENT | Check::REASONING_USAGE) { chat_options = chat_options.with_reasoning_effort(ReasoningEffort::Medium); } @@ -648,7 +648,7 @@ pub async fn common_test_chat_stream_capture_all_ok(model: &str, checks: Option< } // -- Check Reasoning Content - if contains_checks(checks, Check::REASONING) { + if contains_checks(checks, Check::REASONING_CONTENT) { let _reasoning_content = reasoning_content.ok_or("Should have reasoning content")?; } diff --git a/tests/support/helpers.rs b/tests/support/helpers.rs index 0d29ab0e..a870b89a 100644 --- a/tests/support/helpers.rs +++ b/tests/support/helpers.rs @@ -33,7 +33,7 @@ bitflags::bitflags! { #[derive(Clone)] pub struct Check: u8 { /// Check if the - const REASONING = 0b00000001; + const REASONING_CONTENT = 0b00000001; const REASONING_USAGE = 0b00000010; const USAGE = 0b00000100; } diff --git a/tests/tests_p_anthropic.rs b/tests/tests_p_anthropic.rs index 93530135..a010610c 100644 --- a/tests/tests_p_anthropic.rs +++ b/tests/tests_p_anthropic.rs @@ -25,7 +25,7 @@ async fn test_chat_simple_ok() -> TestResult<()> { #[serial(anthropic)] async fn test_chat_reasoning_ok() -> TestResult<()> { // NOTE: Does not test REASONING_USAGE as Anthropic does not report it - common_tests::common_test_chat_reasoning_ok(MODEL_THINKING, Some(Check::REASONING)).await + common_tests::common_test_chat_reasoning_ok(MODEL_THINKING, Some(Check::REASONING_CONTENT)).await } #[tokio::test] diff --git a/tests/tests_p_deepseek_reasoning.rs b/tests/tests_p_deepseek_reasoning.rs index d6533ea9..54324e4a 100644 --- a/tests/tests_p_deepseek_reasoning.rs +++ b/tests/tests_p_deepseek_reasoning.rs @@ -11,7 +11,7 @@ const MODEL: &str = "deepseek-reasoner"; #[tokio::test] async fn test_chat_simple_ok() -> TestResult<()> { - common_tests::common_test_chat_simple_ok(MODEL, Some(Check::REASONING)).await + common_tests::common_test_chat_simple_ok(MODEL, Some(Check::REASONING_CONTENT)).await } #[tokio::test] @@ -47,7 +47,7 @@ async fn test_chat_reasoning_normalize_ok() -> TestResult<()> { #[tokio::test] async fn test_chat_stream_simple_ok() -> TestResult<()> { - common_tests::common_test_chat_stream_simple_ok(MODEL, Some(Check::REASONING)).await + common_tests::common_test_chat_stream_simple_ok(MODEL, Some(Check::REASONING_CONTENT)).await } #[tokio::test] @@ -57,7 +57,7 @@ async fn test_chat_stream_capture_content_ok() -> TestResult<()> { #[tokio::test] async fn test_chat_stream_capture_all_ok() -> TestResult<()> { - common_tests::common_test_chat_stream_capture_all_ok(MODEL, Some(Check::REASONING)).await + common_tests::common_test_chat_stream_capture_all_ok(MODEL, Some(Check::REASONING_CONTENT)).await } // endregion: --- Chat Stream Tests diff --git a/tests/tests_p_openai.rs b/tests/tests_p_openai.rs index 8b0ba7c3..b4edd700 100644 --- a/tests/tests_p_openai.rs +++ b/tests/tests_p_openai.rs @@ -6,7 +6,8 @@ use genai::resolver::AuthData; // note: "gpt-4o-mini" has issue when image & pdf // as for 2025-08-08 gpt-5-mini does not support temperature & stop sequence -const MODEL: &str = "gpt-5-mini"; +const MODEL_LATEST: &str = "gpt-5.1"; +const MODEL_GPT_5_MINI: &str = "gpt-5-mini"; // p const AUDIO_MODEL: &str = "gpt-audio-mini"; const MODEL2: &str = "gpt-4.1-mini"; // for temperature & stop sequence const MODEL_NS: &str = "openai::gpt-4.1-mini"; @@ -32,18 +33,18 @@ async fn test_chat_reasoning_minimal_ok() -> TestResult<()> { #[tokio::test] async fn test_chat_simple_ok() -> TestResult<()> { - common_tests::common_test_chat_simple_ok(MODEL, None).await + common_tests::common_test_chat_simple_ok(MODEL_LATEST, None).await } #[tokio::test] async fn test_chat_reasoning_ok() -> TestResult<()> { // For now, do not test Check::REASONING, for OpenAI as it is not captured - common_tests::common_test_chat_reasoning_ok(MODEL, Some(Check::REASONING_USAGE)).await + common_tests::common_test_chat_reasoning_ok(MODEL_LATEST, Some(Check::REASONING_USAGE)).await } #[tokio::test] async fn test_chat_verbosity_ok() -> TestResult<()> { - common_tests::common_test_chat_verbosity_ok(MODEL).await + common_tests::common_test_chat_verbosity_ok(MODEL_LATEST).await } #[tokio::test] @@ -53,17 +54,17 @@ async fn test_chat_namespaced_ok() -> TestResult<()> { #[tokio::test] async fn test_chat_multi_system_ok() -> TestResult<()> { - common_tests::common_test_chat_multi_system_ok(MODEL).await + common_tests::common_test_chat_multi_system_ok(MODEL_LATEST).await } #[tokio::test] async fn test_chat_json_mode_ok() -> TestResult<()> { - common_tests::common_test_chat_json_mode_ok(MODEL, Some(Check::USAGE)).await + common_tests::common_test_chat_json_mode_ok(MODEL_LATEST, Some(Check::USAGE)).await } #[tokio::test] async fn test_chat_json_structured_ok() -> TestResult<()> { - common_tests::common_test_chat_json_structured_ok(MODEL, Some(Check::USAGE)).await + common_tests::common_test_chat_json_structured_ok(MODEL_LATEST, Some(Check::USAGE)).await } #[tokio::test] @@ -82,7 +83,7 @@ async fn test_chat_stop_sequences_ok() -> TestResult<()> { #[tokio::test] async fn test_chat_cache_implicit_simple_ok() -> TestResult<()> { - common_tests::common_test_chat_cache_implicit_simple_ok(MODEL).await + common_tests::common_test_chat_cache_implicit_simple_ok(MODEL_LATEST).await } // endregion: --- Chat Implicit Cache @@ -91,24 +92,24 @@ async fn test_chat_cache_implicit_simple_ok() -> TestResult<()> { #[tokio::test] async fn test_chat_stream_simple_ok() -> TestResult<()> { - common_tests::common_test_chat_stream_simple_ok(MODEL, None).await + common_tests::common_test_chat_stream_simple_ok(MODEL_LATEST, None).await } #[tokio::test] async fn test_chat_stream_capture_content_ok() -> TestResult<()> { - common_tests::common_test_chat_stream_capture_content_ok(MODEL).await + common_tests::common_test_chat_stream_capture_content_ok(MODEL_LATEST).await } #[tokio::test] async fn test_chat_stream_capture_all_ok() -> TestResult<()> { - // NOTE: For now the OpenAI Adapter do not capture the thinking as not available in chat completions - common_tests::common_test_chat_stream_capture_all_ok(MODEL, Some(Check::REASONING_USAGE)).await + // NOTE: gpt-5.1 even when reasoning is Medium, does not give reasoning when simple chat when streaming + common_tests::common_test_chat_stream_capture_all_ok(MODEL_GPT_5_MINI, Some(Check::REASONING_USAGE)).await } #[tokio::test] async fn test_chat_stream_tool_capture_ok() -> TestResult<()> { // NOTE: For now the OpenAI Adapter do not capture the thinking as not available in chat completions - common_tests::common_test_chat_stream_tool_capture_ok(MODEL).await + common_tests::common_test_chat_stream_tool_capture_ok(MODEL_LATEST).await } // endregion: --- Chat Stream Tests @@ -117,12 +118,12 @@ async fn test_chat_stream_tool_capture_ok() -> TestResult<()> { #[tokio::test] async fn test_chat_binary_image_url_ok() -> TestResult<()> { - common_tests::common_test_chat_image_url_ok(MODEL).await + common_tests::common_test_chat_image_url_ok(MODEL_LATEST).await } #[tokio::test] async fn test_chat_binary_image_b64_ok() -> TestResult<()> { - common_tests::common_test_chat_image_b64_ok(MODEL).await + common_tests::common_test_chat_image_b64_ok(MODEL_LATEST).await } #[tokio::test] @@ -132,12 +133,12 @@ async fn test_chat_binary_audio_b64_ok() -> TestResult<()> { #[tokio::test] async fn test_chat_binary_pdf_b64_ok() -> TestResult<()> { - common_tests::common_test_chat_pdf_b64_ok(MODEL).await + common_tests::common_test_chat_pdf_b64_ok(MODEL_LATEST).await } #[tokio::test] async fn test_chat_binary_multi_b64_ok() -> TestResult<()> { - common_tests::common_test_chat_multi_binary_b64_ok(MODEL).await + common_tests::common_test_chat_multi_binary_b64_ok(MODEL_LATEST).await } // endregion: --- Binary Tests @@ -146,12 +147,12 @@ async fn test_chat_binary_multi_b64_ok() -> TestResult<()> { #[tokio::test] async fn test_tool_simple_ok() -> TestResult<()> { - common_tests::common_test_tool_simple_ok(MODEL).await + common_tests::common_test_tool_simple_ok(MODEL_LATEST).await } #[tokio::test] async fn test_tool_full_flow_ok() -> TestResult<()> { - common_tests::common_test_tool_full_flow_ok(MODEL).await + common_tests::common_test_tool_full_flow_ok(MODEL_LATEST).await } // endregion: --- Tool Tests @@ -159,7 +160,7 @@ async fn test_tool_full_flow_ok() -> TestResult<()> { #[tokio::test] async fn test_resolver_auth_ok() -> TestResult<()> { - common_tests::common_test_resolver_auth_ok(MODEL, AuthData::from_env("OPENAI_API_KEY")).await + common_tests::common_test_resolver_auth_ok(MODEL_LATEST, AuthData::from_env("OPENAI_API_KEY")).await } // endregion: --- Resolver Tests From 0dc9e28396b6eb73a4e89230496f6a5ab79a3476 Mon Sep 17 00:00:00 2001 From: Jeremy Chone Date: Fri, 14 Nov 2025 17:20:21 -0800 Subject: [PATCH 019/123] . openai - update model names --- src/adapter/adapters/openai/adapter_impl.rs | 3 +++ tests/tests_p_openai.rs | 6 +++--- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/src/adapter/adapters/openai/adapter_impl.rs b/src/adapter/adapters/openai/adapter_impl.rs index 173c0362..3ea31161 100644 --- a/src/adapter/adapters/openai/adapter_impl.rs +++ b/src/adapter/adapters/openai/adapter_impl.rs @@ -24,6 +24,9 @@ pub struct OpenAIAdapter; // Latest models const MODELS: &[&str] = &[ // + "gpt-5.1", + "gpt-5.1-codex", + "gpt-5.1-codex-mini", "gpt-5", "gpt-5-mini", "gpt-5-nano", diff --git a/tests/tests_p_openai.rs b/tests/tests_p_openai.rs index b4edd700..92b57177 100644 --- a/tests/tests_p_openai.rs +++ b/tests/tests_p_openai.rs @@ -7,7 +7,7 @@ use genai::resolver::AuthData; // note: "gpt-4o-mini" has issue when image & pdf // as for 2025-08-08 gpt-5-mini does not support temperature & stop sequence const MODEL_LATEST: &str = "gpt-5.1"; -const MODEL_GPT_5_MINI: &str = "gpt-5-mini"; // p +const MODEL_GPT_5_MINI: &str = "gpt-5-mini"; // for the streaming reasoning test const AUDIO_MODEL: &str = "gpt-audio-mini"; const MODEL2: &str = "gpt-4.1-mini"; // for temperature & stop sequence const MODEL_NS: &str = "openai::gpt-4.1-mini"; @@ -44,7 +44,7 @@ async fn test_chat_reasoning_ok() -> TestResult<()> { #[tokio::test] async fn test_chat_verbosity_ok() -> TestResult<()> { - common_tests::common_test_chat_verbosity_ok(MODEL_LATEST).await + common_tests::common_test_chat_verbosity_ok(MODEL_GPT_5_MINI).await } #[tokio::test] @@ -83,7 +83,7 @@ async fn test_chat_stop_sequences_ok() -> TestResult<()> { #[tokio::test] async fn test_chat_cache_implicit_simple_ok() -> TestResult<()> { - common_tests::common_test_chat_cache_implicit_simple_ok(MODEL_LATEST).await + common_tests::common_test_chat_cache_implicit_simple_ok(MODEL_GPT_5_MINI).await } // endregion: --- Chat Implicit Cache From 41d172aea11362c4853f48e599a3289a5497ab8c Mon Sep 17 00:00:00 2001 From: Jeremy Chone Date: Fri, 14 Nov 2025 17:30:58 -0800 Subject: [PATCH 020/123] . readme and changelog --- CHANGELOG.md | 6 ++++++ README.md | 6 ++++-- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0ae27500..aa600fd8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,11 @@ `.` minor | `-` Fix | `+` Addition | `^` improvement | `!` Change | `*` Refactor +## 2025-11-14 - [v0.4.4](https://github.com/jeremychone/rust-genai/compare/v0.4.3...v0.4.4) + +- `+` openai - adding support for gpt-5-pro (must be mapped to OpenaiResp adapter) +- `+` Add support for openai audio_type content part for voice agent support. ([PR #96](https://github.com/jeremychone/rust-genai/pull/96) thanks to [Vagmi Mudumbai](https://github.com/vagmi)) +- `+` Add support for OpenAI `service_tier` parameter. ([PR #98](https://github.com/jeremychone/rust-genai/pull/98) thanks to [Himmelschmidt](https://github.com/Himmelschmidt)) + ## 2025-10-25 - [v0.4.3](https://github.com/jeremychone/rust-genai/compare/v0.4.2...v0.4.3) diff --git a/README.md b/README.md index 6cff5844..00b718d9 100644 --- a/README.md +++ b/README.md @@ -39,8 +39,10 @@ See: ## Big Thanks to -- [Bart Carroll](https://github.com/bartCarroll) For [#91](https://github.com/jeremychone/rust-genai/pull/91) Fixed streaming tool calls for openai models -- [Rui Andrada](https://github.com/shingonoide) For [#95](https://github.com/jeremychone/rust-genai/pull/95) refactoring ZHIPU adapter to ZAI +- [Vagmi Mudumbai](https://github.com/vagmi)) for [#96](https://github.com/jeremychone/rust-genai/pull/96) openai audio_type +- [Himmelschmidt](https://github.com/Himmelschmidt) for [#98](https://github.com/jeremychone/rust-genai/pull/98) openai service_tier +- [Bart Carroll](https://github.com/bartCarroll) for [#91](https://github.com/jeremychone/rust-genai/pull/91) Fixed streaming tool calls for openai models +- [Rui Andrada](https://github.com/shingonoide) for [#95](https://github.com/jeremychone/rust-genai/pull/95) refactoring ZHIPU adapter to ZAI - [Adrien](https://github.com/XciD) Extra headers in requests, seed for chat requests, and fixes (with [Julien Chaumond](https://github.com/julien-c) for extra headers) - [Andrew Rademacher](https://github.com/AndrewRademacher) for PDF support, Anthropic streamer, and insight on flattening the message content (e.g., ContentParts) - [Jesus Santander](https://github.com/jsantanders) Embedding support [PR #83](https://github.com/jeremychone/rust-genai/pull/83) From 176a05af96b2355d364e6a67cb18bb98532e48d1 Mon Sep 17 00:00:00 2001 From: Jeremy Chone Date: Fri, 14 Nov 2025 17:32:20 -0800 Subject: [PATCH 021/123] . update to v0.4.4 --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 6e04e7f1..50a03b14 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "genai" -version = "0.4.4-WIP" +version = "0.4.4" edition = "2024" license = "MIT OR Apache-2.0" description = "Multi-AI Providers Library for Rust. (OpenAI, Gemini, Anthropic, xAI, Ollama, Groq, DeepSeek, Grok)" From b0bf4da0e6022011c9f536d7147f2654b4e5cddc Mon Sep 17 00:00:00 2001 From: Jeremy Chone Date: Fri, 14 Nov 2025 17:33:27 -0800 Subject: [PATCH 022/123] . update to v0.4.5-WIP --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 50a03b14..951a143b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "genai" -version = "0.4.4" +version = "0.4.5-WIP" edition = "2024" license = "MIT OR Apache-2.0" description = "Multi-AI Providers Library for Rust. (OpenAI, Gemini, Anthropic, xAI, Ollama, Groq, DeepSeek, Grok)" From aed37485dcf55ea504a4430358e66a29fd34654c Mon Sep 17 00:00:00 2001 From: Maple Date: Sun, 23 Nov 2025 23:45:06 +0700 Subject: [PATCH 023/123] Feat/gemini thought signature (#1) * feat: parts thought signature * feat: gemini adapter thought signature support - Add ContentPart::ThoughtSignature with accessors and helpers. - Introduce InterStreamEvent::ThoughtSignatureChunk and ChatStreamEvent::ThoughtSignatureChunk. - Capture thought signatures into StreamEnd.captured_content when enabled. - Update printer to display ThoughtSignatureChunk events (non-captured). - Gemini adapter: parse thoughtSignature (fallback: thought) and include in outgoing messages. - Gemini streamer: surface thought as ThoughtSignatureChunk and record in captured_thought_signatures. - Preserve thoughtSignature in assistant history before tool calls when looping tools. - Example c10-tooluse-streaming.rs: switch to gemini-3-pro-preview, log thoughtSignature chunks, and prepend captured thoughts to assistant message. * chore: impl thought signature * chore: no API change --- examples/c10-tooluse-streaming.rs | 74 ++++++++--- .../adapters/anthropic/adapter_impl.rs | 3 + src/adapter/adapters/anthropic/streamer.rs | 2 + src/adapter/adapters/cohere/streamer.rs | 1 + src/adapter/adapters/gemini/adapter_impl.rs | 116 ++++++++++++++++-- src/adapter/adapters/gemini/streamer.rs | 64 +++++++--- src/adapter/adapters/openai/adapter_impl.rs | 3 + src/adapter/adapters/openai/streamer.rs | 4 + .../adapters/openai_resp/adapter_impl.rs | 6 +- .../resp_types/resp_output_helper.rs | 1 + src/adapter/adapters/support.rs | 1 + src/adapter/inter_stream.rs | 4 + src/chat/chat_message.rs | 19 ++- src/chat/chat_request.rs | 25 +++- src/chat/chat_stream.rs | 88 ++++++++++++- src/chat/content_part.rs | 26 ++++ src/chat/message_content.rs | 23 ++++ src/chat/printer.rs | 14 +++ src/chat/tool/tool_call.rs | 10 ++ src/webc/web_stream.rs | 104 +++++++++++----- tests/support/helpers.rs | 3 +- 21 files changed, 510 insertions(+), 81 deletions(-) diff --git a/examples/c10-tooluse-streaming.rs b/examples/c10-tooluse-streaming.rs index 1589001d..aa5f88e5 100644 --- a/examples/c10-tooluse-streaming.rs +++ b/examples/c10-tooluse-streaming.rs @@ -3,11 +3,13 @@ use genai::Client; use genai::chat::printer::{PrintChatStreamOptions, print_chat_stream}; use genai::chat::{ChatMessage, ChatOptions, ChatRequest, Tool, ToolResponse}; use genai::chat::{ChatStreamEvent, ToolCall}; +use genai::resolver::AuthData; use serde_json::json; use tracing_subscriber::EnvFilter; // const MODEL: &str = "gemini-2.0-flash"; -const MODEL: &str = "deepseek-chat"; +// const MODEL: &str = "deepseek-chat"; +const MODEL: &str = "gemini-3-pro-preview"; #[tokio::main] async fn main() -> Result<(), Box> { @@ -18,6 +20,8 @@ async fn main() -> Result<(), Box> { let client = Client::default(); + println!("--- Model: {MODEL}"); + // 1. Define a tool for getting weather information let weather_tool = Tool::new("get_weather") .with_description("Get the current weather for a location") @@ -53,6 +57,8 @@ async fn main() -> Result<(), Box> { let mut chat_stream = client.exec_chat_stream(MODEL, chat_req.clone(), Some(&chat_options)).await?; let mut tool_calls: Vec = [].to_vec(); + let mut captured_thoughts: Option> = None; + // print_chat_stream(chat_res, Some(&print_options)).await?; println!("--- Streaming response with tool calls"); while let Some(result) = chat_stream.stream.next().await { @@ -63,25 +69,53 @@ async fn main() -> Result<(), Box> { ChatStreamEvent::Chunk(chunk) => { print!("{}", chunk.content); } - ChatStreamEvent::ToolCallChunk(tool_chunk) => { - println!( - "\nTool Call: {} with args: {}", - tool_chunk.tool_call.fn_name, tool_chunk.tool_call.fn_arguments - ); + ChatStreamEvent::ToolCallChunk(chunk) => { + println!(" ToolCallChunk: {:?}", chunk.tool_call); } ChatStreamEvent::ReasoningChunk(chunk) => { - println!("\nReasoning: {}", chunk.content); + println!(" ReasoningChunk: {:?}", chunk.content); + } + ChatStreamEvent::ThoughtSignatureChunk(chunk) => { + println!(" ThoughtSignatureChunk: {:?}", chunk.content); } ChatStreamEvent::End(end) => { println!("\nStream ended"); // Check if we captured any tool calls - if let Some(captured_tool_calls) = end.captured_into_tool_calls() { - println!("\nCaptured Tool Calls:"); - tool_calls = captured_tool_calls.clone(); - for tool_call in captured_tool_calls { - println!("- Function: {}", tool_call.fn_name); - println!(" Arguments: {}", tool_call.fn_arguments); + // Note: captured_into_tool_calls consumes self, so we can't use end afterwards. + // We should access captured_content directly or use references if possible, + // but StreamEnd getters often consume or clone. + // Let's access captured_content directly since we need both tool calls and thoughts. + + if let Some(content) = end.captured_content { + // Let's refactor to avoid ownership issues. + // We have `content` (MessageContent). + // We want `tool_calls` (Vec) and `thoughts` (Vec). + + // We can iterate and split. + let parts = content.into_parts(); + let mut extracted_tool_calls = Vec::new(); + let mut extracted_thoughts = Vec::new(); + + for part in parts { + match part { + genai::chat::ContentPart::ToolCall(tc) => extracted_tool_calls.push(tc), + genai::chat::ContentPart::ThoughtSignature(t) => extracted_thoughts.push(t), + _ => {} + } + } + + if !extracted_tool_calls.is_empty() { + println!("\nCaptured Tool Calls:"); + for tool_call in &extracted_tool_calls { + println!("- Function: {}", tool_call.fn_name); + println!(" Arguments: {}", tool_call.fn_arguments); + } + tool_calls = extracted_tool_calls; + } + + if !extracted_thoughts.is_empty() { + captured_thoughts = Some(extracted_thoughts); } } } @@ -107,7 +141,19 @@ async fn main() -> Result<(), Box> { ); // Add both the tool calls and response to chat history - let chat_req = chat_req.append_message(tool_calls).append_message(tool_response); + // Note: For Gemini 3, we MUST include the thoughtSignature in the history if it was generated. + let mut assistant_msg = ChatMessage::from(tool_calls); + if let Some(thoughts) = captured_thoughts { + // We need to insert the thought at the beginning. + // MessageContent wraps Vec, but doesn't expose insert. + // We can convert to Vec, insert, and convert back. + let mut parts = assistant_msg.content.into_parts(); + for thought in thoughts.into_iter().rev() { + parts.insert(0, genai::chat::ContentPart::ThoughtSignature(thought)); + } + assistant_msg.content = genai::chat::MessageContent::from_parts(parts); + } + let chat_req = chat_req.append_message(assistant_msg).append_message(tool_response); // Get final streaming response let chat_options = ChatOptions::default(); diff --git a/src/adapter/adapters/anthropic/adapter_impl.rs b/src/adapter/adapters/anthropic/adapter_impl.rs index 505a3f31..e22c6852 100644 --- a/src/adapter/adapters/anthropic/adapter_impl.rs +++ b/src/adapter/adapters/anthropic/adapter_impl.rs @@ -252,6 +252,7 @@ impl Adapter for AnthropicAdapter { call_id, fn_name, fn_arguments, + thought_signatures: None, }; let part = ContentPart::ToolCall(tool_call); @@ -451,6 +452,7 @@ impl AnthropicAdapter { "tool_use_id": tool_response.call_id, })); } + ContentPart::ThoughtSignature(_) => {} } } let values = apply_cache_control_to_parts(is_cache_control, values); @@ -483,6 +485,7 @@ impl AnthropicAdapter { // Unsupported for assistant role in Anthropic message content ContentPart::Binary(_) => {} ContentPart::ToolResponse(_) => {} + ContentPart::ThoughtSignature(_) => {} } } diff --git a/src/adapter/adapters/anthropic/streamer.rs b/src/adapter/adapters/anthropic/streamer.rs index 843f484f..e1264872 100644 --- a/src/adapter/adapters/anthropic/streamer.rs +++ b/src/adapter/adapters/anthropic/streamer.rs @@ -136,6 +136,7 @@ impl futures::Stream for AnthropicStreamer { call_id: id, fn_name: name, fn_arguments: serde_json::from_str(&input)?, + thought_signatures: None, }; // Add to the captured_tool_calls if chat options say so @@ -182,6 +183,7 @@ impl futures::Stream for AnthropicStreamer { captured_text_content: self.captured_data.content.take(), captured_reasoning_content: self.captured_data.reasoning_content.take(), captured_tool_calls: self.captured_data.tool_calls.take(), + captured_thought_signatures: None, }; // TODO: Need to capture the data as needed diff --git a/src/adapter/adapters/cohere/streamer.rs b/src/adapter/adapters/cohere/streamer.rs index c92bcb02..40d8c9ea 100644 --- a/src/adapter/adapters/cohere/streamer.rs +++ b/src/adapter/adapters/cohere/streamer.rs @@ -108,6 +108,7 @@ impl futures::Stream for CohereStreamer { captured_text_content: self.captured_data.content.take(), captured_reasoning_content: self.captured_data.reasoning_content.take(), captured_tool_calls: self.captured_data.tool_calls.take(), + captured_thought_signatures: None, }; InterStreamEvent::End(inter_stream_end) diff --git a/src/adapter/adapters/gemini/adapter_impl.rs b/src/adapter/adapters/gemini/adapter_impl.rs index e232e821..6870575a 100644 --- a/src/adapter/adapters/gemini/adapter_impl.rs +++ b/src/adapter/adapters/gemini/adapter_impl.rs @@ -208,15 +208,41 @@ impl Adapter for GeminiAdapter { usage, } = gemini_response; - // FIXME: Needs to take the content list - let mut content: MessageContent = MessageContent::default(); + let mut thoughts: Vec = Vec::new(); + let mut texts: Vec = Vec::new(); + let mut tool_calls: Vec = Vec::new(); + for g_item in gemini_content { match g_item { - GeminiChatContent::Text(text) => content.push(text), - GeminiChatContent::ToolCall(tool_call) => content.push(tool_call), + GeminiChatContent::Text(text) => texts.push(text), + GeminiChatContent::ToolCall(tool_call) => tool_calls.push(tool_call), + GeminiChatContent::ThoughtSignature(thought) => thoughts.push(thought), + } + } + + let thought_signatures_for_call = (!thoughts.is_empty() && !tool_calls.is_empty()).then(|| thoughts.clone()); + let mut parts: Vec = thoughts.into_iter().map(ContentPart::ThoughtSignature).collect(); + + if let Some(signatures) = thought_signatures_for_call { + if let Some(first_call) = tool_calls.first_mut() { + first_call.thought_signatures = Some(signatures); } } + if !texts.is_empty() { + let total_len: usize = texts.iter().map(|t| t.len()).sum(); + let mut combined_text = String::with_capacity(total_len); + for text in texts { + combined_text.push_str(&text); + } + if !combined_text.is_empty() { + parts.push(ContentPart::Text(combined_text)); + } + } + + parts.extend(tool_calls.into_iter().map(ContentPart::ToolCall)); + let content = MessageContent::from_parts(parts); + Ok(ChatResponse { content, reasoning_content: None, @@ -293,6 +319,28 @@ impl GeminiAdapter { }; for mut part in parts { + // -- Capture eventual thought signature + { + if let Some(thought) = part + .x_take::("thoughtSignature") + .ok() + .and_then(|v| if let Value::String(v) = v { Some(v) } else { None }) + { + content.push(GeminiChatContent::ThoughtSignature(thought)); + } + // Note: sometime the thought is in "thought" (undocumented, but observed in some cases or older models?) + // But for Gemini 3 it is thoughtSignature. Keeping this just in case or for backward compat if it was used. + // Actually, let's stick to thoughtSignature as per docs, but if we see "thought" we might want to capture it too. + // Let's check for "thought" if "thoughtSignature" was not found. + else if let Some(thought) = part + .x_take::("thought") + .ok() + .and_then(|v| if let Value::String(v) = v { Some(v) } else { None }) + { + content.push(GeminiChatContent::ThoughtSignature(thought)); + } + } + // -- Capture eventual function call if let Ok(fn_call_value) = part.x_take::("functionCall") { let tool_call = ToolCall { @@ -300,6 +348,7 @@ impl GeminiAdapter { call_id: fn_call_value.x_get("name").unwrap_or("".to_string()), // TODO: Handle this, gemini does not return the call_id fn_name: fn_call_value.x_get("name").unwrap_or("".to_string()), fn_arguments: fn_call_value.x_get("args").unwrap_or(Value::Null), + thought_signatures: None, }; content.push(GeminiChatContent::ToolCall(tool_call)) } @@ -458,6 +507,11 @@ impl GeminiAdapter { } })); } + ContentPart::ThoughtSignature(thought) => { + parts_values.push(json!({ + "thoughtSignature": thought + })); + } } } @@ -465,22 +519,54 @@ impl GeminiAdapter { } ChatRole::Assistant => { let mut parts_values: Vec = Vec::new(); + let mut pending_thought: Option = None; for part in msg.content { match part { - ContentPart::Text(text) => parts_values.push(json!({"text": text})), + ContentPart::Text(text) => { + if let Some(thought) = pending_thought.take() { + parts_values.push(json!({"thoughtSignature": thought})); + } + parts_values.push(json!({"text": text})); + } ContentPart::ToolCall(tool_call) => { - parts_values.push(json!({ - "functionCall": { + let mut part_obj = serde_json::Map::new(); + part_obj.insert( + "functionCall".to_string(), + json!({ "name": tool_call.fn_name, "args": tool_call.fn_arguments, - } - })); + }), + ); + + if let Some(thought) = pending_thought.take() { + // Inject thoughtSignature alongside functionCall in the same Part object + part_obj.insert("thoughtSignature".to_string(), json!(thought)); + } + + parts_values.push(Value::Object(part_obj)); + } + ContentPart::ThoughtSignature(thought) => { + if let Some(prev_thought) = pending_thought.take() { + parts_values.push(json!({"thoughtSignature": prev_thought})); + } + pending_thought = Some(thought); } // Ignore unsupported parts for Assistant role - ContentPart::Binary(_) => {} - ContentPart::ToolResponse(_) => {} + ContentPart::Binary(_) => { + if let Some(thought) = pending_thought.take() { + parts_values.push(json!({"thoughtSignature": thought})); + } + } + ContentPart::ToolResponse(_) => { + if let Some(thought) = pending_thought.take() { + parts_values.push(json!({"thoughtSignature": thought})); + } + } } } + if let Some(thought) = pending_thought { + parts_values.push(json!({"thoughtSignature": thought})); + } if !parts_values.is_empty() { contents.push(json!({"role": "model", "parts": parts_values})); } @@ -508,10 +594,15 @@ impl GeminiAdapter { } })); } + ContentPart::ThoughtSignature(thought) => { + parts_values.push(json!({ + "thoughtSignature": thought + })); + } _ => { return Err(Error::MessageContentTypeNotSupported { model_iden: model_iden.clone(), - cause: "ChatRole::Tool can only contain ToolCall or ToolResponse content parts", + cause: "ChatRole::Tool can only contain ToolCall, ToolResponse, or Thought content parts", }); } } @@ -580,6 +671,7 @@ pub(super) struct GeminiChatResponse { pub(super) enum GeminiChatContent { Text(String), ToolCall(ToolCall), + ThoughtSignature(String), } struct GeminiChatRequestParts { diff --git a/src/adapter/adapters/gemini/streamer.rs b/src/adapter/adapters/gemini/streamer.rs index c076ab09..94cf25bd 100644 --- a/src/adapter/adapters/gemini/streamer.rs +++ b/src/adapter/adapters/gemini/streamer.rs @@ -10,6 +10,8 @@ use std::task::{Context, Poll}; use super::GeminiChatContent; +use std::collections::VecDeque; + pub struct GeminiStreamer { inner: WebStream, options: StreamerOptions, @@ -18,6 +20,7 @@ pub struct GeminiStreamer { /// Flag to not poll the EventSource after a MessageStop event. done: bool, captured_data: StreamerCapturedData, + pending_events: VecDeque, } impl GeminiStreamer { @@ -27,6 +30,7 @@ impl GeminiStreamer { done: false, options: StreamerOptions::new(model_iden, options_set), captured_data: Default::default(), + pending_events: VecDeque::new(), } } } @@ -40,6 +44,11 @@ impl futures::Stream for GeminiStreamer { return Poll::Ready(None); } + // 1. Check if we have pending events + if let Some(event) = self.pending_events.pop_front() { + return Poll::Ready(Some(Ok(event))); + } + while let Poll::Ready(item) = Pin::new(&mut self.inner).poll_next(cx) { match item { Some(Ok(raw_message)) => { @@ -47,17 +56,18 @@ impl futures::Stream for GeminiStreamer { // - `[` document start // - `{...}` block // - `]` document end - let inter_event = match raw_message.as_str() { - "[" => InterStreamEvent::Start, + match raw_message.as_str() { + "[" => return Poll::Ready(Some(Ok(InterStreamEvent::Start))), "]" => { let inter_stream_end = InterStreamEnd { captured_usage: self.captured_data.usage.take(), captured_text_content: self.captured_data.content.take(), captured_reasoning_content: self.captured_data.reasoning_content.take(), captured_tool_calls: self.captured_data.tool_calls.take(), + captured_thought_signatures: self.captured_data.thought_signatures.take(), }; - InterStreamEvent::End(inter_stream_end) + return Poll::Ready(Some(Ok(InterStreamEvent::End(inter_stream_end)))); } block_string => { // -- Parse the block to JSON @@ -91,15 +101,35 @@ impl futures::Stream for GeminiStreamer { // WARNING: Assume that only ONE tool call per message (or take the last one) let mut stream_text_content: String = String::new(); let mut stream_tool_call: Option = None; + let mut stream_thought: Option = None; + for g_content_item in content { match g_content_item { GeminiChatContent::Text(text) => stream_text_content.push_str(&text), GeminiChatContent::ToolCall(tool_call) => stream_tool_call = Some(tool_call), + GeminiChatContent::ThoughtSignature(thought) => stream_thought = Some(thought), } } - // -- Send Event - // WARNING: Assume only text or toolcall (not both on the same event) + // -- Queue Events + // Priority: Thought -> Text -> ToolCall + + // 1. Thought + if let Some(thought) = stream_thought { + // Capture thought + match self.captured_data.thought_signatures { + Some(ref mut thoughts) => thoughts.push(thought.clone()), + None => self.captured_data.thought_signatures = Some(vec![thought.clone()]), + } + + if self.options.capture_usage { + self.captured_data.usage = Some(usage.clone()); + } + + self.pending_events.push_back(InterStreamEvent::ThoughtSignatureChunk(thought)); + } + + // 2. Text if !stream_text_content.is_empty() { // Capture content if self.options.capture_content { @@ -109,18 +139,15 @@ impl futures::Stream for GeminiStreamer { } } - // NOTE: Apparently in the Gemini API, all events have cumulative usage, - // meaning each message seems to include the tokens for all previous streams. - // Thus, we do not need to add it; we only need to replace captured_data.usage with the latest one. - // See https://twitter.com/jeremychone/status/1813734565967802859 for potential additional information. if self.options.capture_usage { - self.captured_data.usage = Some(usage); + self.captured_data.usage = Some(usage.clone()); } - InterStreamEvent::Chunk(stream_text_content) + self.pending_events.push_back(InterStreamEvent::Chunk(stream_text_content)); } - // tool call - else if let Some(tool_call) = stream_tool_call { + + // 3. Tool Call + if let Some(tool_call) = stream_tool_call { if self.options.capture_tool_calls { match self.captured_data.tool_calls { Some(ref mut tool_calls) => tool_calls.push(tool_call.clone()), @@ -130,14 +157,15 @@ impl futures::Stream for GeminiStreamer { if self.options.capture_usage { self.captured_data.usage = Some(usage); } - InterStreamEvent::ToolCallChunk(tool_call) - } else { - continue; + self.pending_events.push_back(InterStreamEvent::ToolCallChunk(tool_call)); + } + + // Return the first event if any + if let Some(event) = self.pending_events.pop_front() { + return Poll::Ready(Some(Ok(event))); } } }; - - return Poll::Ready(Some(Ok(inter_event))); } Some(Err(err)) => { tracing::error!("Gemini Adapter Stream Error: {}", err); diff --git a/src/adapter/adapters/openai/adapter_impl.rs b/src/adapter/adapters/openai/adapter_impl.rs index 3ea31161..91c8ef6f 100644 --- a/src/adapter/adapters/openai/adapter_impl.rs +++ b/src/adapter/adapters/openai/adapter_impl.rs @@ -479,6 +479,7 @@ impl OpenAIAdapter { // TODO: Probably need to warn if it is a ToolCalls type of content ContentPart::ToolCall(_) => (), ContentPart::ToolResponse(_) => (), + ContentPart::ThoughtSignature(_) => (), } } messages.push(json! ({"role": "user", "content": values})); @@ -508,6 +509,7 @@ impl OpenAIAdapter { // TODO: Probably need towarn on this one (probably need to add binary here) ContentPart::Binary(_) => (), ContentPart::ToolResponse(_) => (), + ContentPart::ThoughtSignature(_) => {} } } let content = texts.join("\n\n"); @@ -653,6 +655,7 @@ fn parse_tool_call(raw_tool_call: Value) -> Result { call_id: iterim.id, fn_name, fn_arguments, + thought_signatures: None, }) } diff --git a/src/adapter/adapters/openai/streamer.rs b/src/adapter/adapters/openai/streamer.rs index 65d4d44e..026d11c6 100644 --- a/src/adapter/adapters/openai/streamer.rs +++ b/src/adapter/adapters/openai/streamer.rs @@ -69,6 +69,7 @@ impl futures::Stream for OpenAIStreamer { call_id, fn_name, fn_arguments, + .. } = tool_call; // parse fn_arguments if needed let fn_arguments = match fn_arguments { @@ -86,6 +87,7 @@ impl futures::Stream for OpenAIStreamer { call_id, fn_name, fn_arguments, + thought_signatures: None, } }) .collect(); @@ -100,6 +102,7 @@ impl futures::Stream for OpenAIStreamer { captured_text_content: self.captured_data.content.take(), captured_reasoning_content: self.captured_data.reasoning_content.take(), captured_tool_calls, + captured_thought_signatures: None, }; return Poll::Ready(Some(Ok(InterStreamEvent::End(inter_stream_end)))); @@ -177,6 +180,7 @@ impl futures::Stream for OpenAIStreamer { call_id, fn_name, fn_arguments: serde_json::Value::String(arguments.clone()), + thought_signatures: None, }; // Capture the tool call if enabled diff --git a/src/adapter/adapters/openai_resp/adapter_impl.rs b/src/adapter/adapters/openai_resp/adapter_impl.rs index 51fa543c..206be489 100644 --- a/src/adapter/adapters/openai_resp/adapter_impl.rs +++ b/src/adapter/adapters/openai_resp/adapter_impl.rs @@ -394,6 +394,7 @@ impl OpenAIRespAdapter { // TODO: Probably need to warn if it is a ToolCalls type of content ContentPart::ToolCall(_) => (), ContentPart::ToolResponse(_) => (), + ContentPart::ThoughtSignature(_) => (), } } input_items.push(json! ({"role": "user", "content": values})); @@ -434,8 +435,9 @@ impl OpenAIRespAdapter { } // TODO: Probably need towarn on this one (probably need to add binary here) - ContentPart::Binary(_) => (), - ContentPart::ToolResponse(_) => (), + ContentPart::Binary(_) => {} + ContentPart::ToolResponse(_) => {} + ContentPart::ThoughtSignature(_) => {} } } diff --git a/src/adapter/adapters/openai_resp/resp_types/resp_output_helper.rs b/src/adapter/adapters/openai_resp/resp_types/resp_output_helper.rs index a628bc11..c723bd59 100644 --- a/src/adapter/adapters/openai_resp/resp_types/resp_output_helper.rs +++ b/src/adapter/adapters/openai_resp/resp_types/resp_output_helper.rs @@ -39,6 +39,7 @@ impl ContentPart { call_id, fn_name, fn_arguments, + thought_signatures: None, }; parts.push(tool_call.into()); diff --git a/src/adapter/adapters/support.rs b/src/adapter/adapters/support.rs index 98712f72..d78f31d8 100644 --- a/src/adapter/adapters/support.rs +++ b/src/adapter/adapters/support.rs @@ -46,6 +46,7 @@ pub struct StreamerCapturedData { pub content: Option, pub reasoning_content: Option, pub tool_calls: Option>, + pub thought_signatures: Option>, } // endregion: --- Streamer Captured Data diff --git a/src/adapter/inter_stream.rs b/src/adapter/inter_stream.rs index 8b58ed56..ca1c0450 100644 --- a/src/adapter/inter_stream.rs +++ b/src/adapter/inter_stream.rs @@ -20,6 +20,9 @@ pub struct InterStreamEnd { // When `ChatOptions..capture_tool_calls == true` pub captured_tool_calls: Option>, + + // When `ChatOptions..capture_thought_signatures == true` (implied or explicit) + pub captured_thought_signatures: Option>, } /// Intermediary StreamEvent @@ -28,6 +31,7 @@ pub enum InterStreamEvent { Start, Chunk(String), ReasoningChunk(String), + ThoughtSignatureChunk(String), ToolCallChunk(crate::chat::ToolCall), End(InterStreamEnd), } diff --git a/src/chat/chat_message.rs b/src/chat/chat_message.rs index d0d14077..0d164bae 100644 --- a/src/chat/chat_message.rs +++ b/src/chat/chat_message.rs @@ -1,4 +1,4 @@ -use crate::chat::{MessageContent, ToolCall, ToolResponse}; +use crate::chat::{ContentPart, MessageContent, ToolCall, ToolResponse}; use derive_more::From; use serde::{Deserialize, Serialize}; @@ -57,6 +57,16 @@ impl ChatMessage { self.options = Some(options.into()); self } + + /// Convenience: build an assistant message that contains an optional list + /// of thought signatures followed by tool calls. Useful for providers + /// (e.g., Gemini 3) that require the thought signature to appear before + /// tool calls in the assistant turn when continuing a tool-use exchange. + pub fn assistant_tool_calls_with_thoughts(tool_calls: Vec, thought_signatures: Vec) -> Self { + let mut parts: Vec = thought_signatures.into_iter().map(ContentPart::ThoughtSignature).collect(); + parts.extend(tool_calls.into_iter().map(ContentPart::ToolCall)); + ChatMessage::assistant(MessageContent::from_parts(parts)) + } } // region: --- MessageOptions @@ -105,6 +115,13 @@ pub enum ChatRole { /// Will create a Assisttant ChatMessage with this vect of tool impl From> for ChatMessage { fn from(tool_calls: Vec) -> Self { + if let Some(first) = tool_calls.first() { + if let Some(thoughts) = &first.thought_signatures { + let mut parts: Vec = thoughts.iter().cloned().map(ContentPart::ThoughtSignature).collect(); + parts.extend(tool_calls.into_iter().map(ContentPart::ToolCall)); + return ChatMessage::assistant(MessageContent::from_parts(parts)); + } + } Self { role: ChatRole::Assistant, content: MessageContent::from(tool_calls), diff --git a/src/chat/chat_request.rs b/src/chat/chat_request.rs index dc73a03b..62608eb3 100644 --- a/src/chat/chat_request.rs +++ b/src/chat/chat_request.rs @@ -1,6 +1,6 @@ //! This module contains all the types related to a Chat Request (except ChatOptions, which has its own file). -use crate::chat::{ChatMessage, ChatRole, Tool}; +use crate::chat::{ChatMessage, ChatRole, StreamEnd, Tool, ToolCall, ToolResponse}; use crate::support; use serde::{Deserialize, Serialize}; @@ -98,6 +98,29 @@ impl ChatRequest { self.tools.get_or_insert_with(Vec::new).push(tool.into()); self } + + /// Append an assistant tool-use turn and the corresponding tool response based on a + /// streaming `StreamEnd` capture. Thought signatures are included automatically and + /// ordered before tool calls when present. + /// + /// If neither content nor tool calls were captured, this is a no-op before appending + /// the provided tool response. + pub fn append_tool_use_from_stream_end(mut self, end: &StreamEnd, tool_response: ToolResponse) -> Self { + if let Some(content) = &end.captured_content { + // Use captured content directly (contains thoughts/text/tool calls in correct order) + self.messages.push(ChatMessage::assistant(content.clone())); + } else if let Some(calls_ref) = end.captured_tool_calls() { + // Fallback: build assistant message from tool calls only + let calls: Vec = calls_ref.into_iter().cloned().collect(); + if !calls.is_empty() { + self.messages.push(ChatMessage::from(calls)); + } + } + + // Append the tool response turn + self.messages.push(ChatMessage::from(tool_response)); + self + } } /// Getters diff --git a/src/chat/chat_stream.rs b/src/chat/chat_stream.rs index 15bf9d4f..62020a63 100644 --- a/src/chat/chat_stream.rs +++ b/src/chat/chat_stream.rs @@ -1,5 +1,5 @@ use crate::adapter::inter_stream::{InterStreamEnd, InterStreamEvent}; -use crate::chat::{MessageContent, ToolCall, Usage}; +use crate::chat::{ChatMessage, ContentPart, MessageContent, ToolCall, Usage}; use futures::Stream; use serde::{Deserialize, Serialize}; use std::pin::Pin; @@ -42,6 +42,9 @@ impl Stream for ChatStream { InterStreamEvent::ReasoningChunk(content) => { ChatStreamEvent::ReasoningChunk(StreamChunk { content }) } + InterStreamEvent::ThoughtSignatureChunk(content) => { + ChatStreamEvent::ThoughtSignatureChunk(StreamChunk { content }) + } InterStreamEvent::ToolCallChunk(tool_call) => { ChatStreamEvent::ToolCallChunk(ToolChunk { tool_call }) } @@ -72,6 +75,9 @@ pub enum ChatStreamEvent { /// Reasoning content chunk. ReasoningChunk(StreamChunk), + /// Thought signature content chunk. + ThoughtSignatureChunk(StreamChunk), + /// Tool-call chunk. ToolCallChunk(ToolChunk), @@ -114,13 +120,42 @@ pub struct StreamEnd { impl From for StreamEnd { fn from(inter_end: InterStreamEnd) -> Self { let captured_text_content = inter_end.captured_text_content; - let captured_tool_calls = inter_end.captured_tool_calls; + let mut captured_tool_calls = inter_end.captured_tool_calls; // -- create public captured_content + // Ordering policy: ThoughtSignature -> Text -> ToolCall + // This matches provider expectations (e.g., Gemini 3 requires thought first). let mut captured_content: Option = None; + if let Some(captured_thoughts) = inter_end.captured_thought_signatures { + let thoughts_content = captured_thoughts + .into_iter() + .map(ContentPart::ThoughtSignature) + .collect::>(); + // Also attach thoughts to the first tool call so that + // ChatMessage::from(Vec) can auto-prepend them. + if let Some(tool_calls) = captured_tool_calls.as_mut() { + if let Some(first_call) = tool_calls.first_mut() { + first_call.thought_signatures = Some( + thoughts_content + .iter() + .filter_map(|p| p.as_thought_signature().map(|s| s.to_string())) + .collect(), + ); + } + } + if let Some(existing_content) = &mut captured_content { + existing_content.extend_front(thoughts_content); + } else { + captured_content = Some(MessageContent::from_parts(thoughts_content)); + } + } if let Some(captured_text_content) = captured_text_content { // This `captured_text_content` is the concatenation of all text chunks received. - captured_content = Some(MessageContent::from_text(captured_text_content)); + if let Some(existing_content) = &mut captured_content { + existing_content.extend(MessageContent::from_text(captured_text_content)); + } else { + captured_content = Some(MessageContent::from_text(captured_text_content)); + } } if let Some(captured_tool_calls) = captured_tool_calls { if let Some(existing_content) = &mut captured_content { @@ -179,6 +214,53 @@ impl StreamEnd { let captured_content = self.captured_content?; Some(captured_content.into_tool_calls()) } + + /// Returns all captured thought signatures, if any. + pub fn captured_thought_signatures(&self) -> Option> { + let captured_content = self.captured_content.as_ref()?; + Some( + captured_content + .parts() + .iter() + .filter_map(|p| p.as_thought_signature()) + .collect(), + ) + } + + /// Consumes `self` and returns all captured thought signatures, if any. + pub fn captured_into_thought_signatures(self) -> Option> { + let captured_content = self.captured_content?; + Some( + captured_content + .into_parts() + .into_iter() + .filter_map(|p| p.into_thought_signature()) + .collect(), + ) + } + + /// Convenience: build an assistant message for a tool-use handoff that places + /// thought signatures (if any) before tool calls. Returns None if no tool calls + /// were captured. + pub fn into_assistant_message_for_tool_use(self) -> Option { + let content = self.captured_content?; + let mut thought_signatures: Vec = Vec::new(); + let mut tool_calls: Vec = Vec::new(); + for part in content.into_parts() { + match part { + ContentPart::ThoughtSignature(t) => thought_signatures.push(t), + ContentPart::ToolCall(tc) => tool_calls.push(tc), + _ => {} + } + } + if tool_calls.is_empty() { + return None; + } + Some(ChatMessage::assistant_tool_calls_with_thoughts( + tool_calls, + thought_signatures, + )) + } } // endregion: --- ChatStreamEvent diff --git a/src/chat/content_part.rs b/src/chat/content_part.rs index 9b71d624..b30679f7 100644 --- a/src/chat/content_part.rs +++ b/src/chat/content_part.rs @@ -21,6 +21,9 @@ pub enum ContentPart { #[from] ToolResponse(ToolResponse), + + #[from(ignore)] + ThoughtSignature(String), } /// Constructors @@ -136,6 +139,24 @@ impl ContentPart { None } } + + /// Borrow the thought signature if present. + pub fn as_thought_signature(&self) -> Option<&str> { + if let ContentPart::ThoughtSignature(thought_signature) = self { + Some(thought_signature) + } else { + None + } + } + + /// Extract the thought, consuming the part. + pub fn into_thought_signature(self) -> Option { + if let ContentPart::ThoughtSignature(thought_signature) = self { + Some(thought_signature) + } else { + None + } + } } /// is_.. Accessors @@ -179,6 +200,11 @@ impl ContentPart { pub fn is_tool_response(&self) -> bool { matches!(self, ContentPart::ToolResponse(_)) } + + /// Returns true if this part is a thought. + pub fn is_thought_signature(&self) -> bool { + matches!(self, ContentPart::ThoughtSignature(_)) + } } // endregion: --- Content Part diff --git a/src/chat/message_content.rs b/src/chat/message_content.rs index bf6bdcc8..1a1d66d0 100644 --- a/src/chat/message_content.rs +++ b/src/chat/message_content.rs @@ -47,6 +47,29 @@ impl MessageContent { self.parts.push(part.into()); } + /// Insert one part at the given index (mutating). + pub fn insert(&mut self, index: usize, part: impl Into) { + self.parts.insert(index, part.into()); + } + + /// Prepend one part to the beginning (mutating). + pub fn prepend(&mut self, part: impl Into) { + self.parts.insert(0, part.into()); + } + + /// Prepend multiple parts while preserving their original order. + pub fn extend_front(&mut self, iter: I) + where + I: IntoIterator, + { + // Collect then insert in reverse so that the first element in `iter` + // ends up closest to the front after all insertions. + let collected: Vec = iter.into_iter().collect(); + for part in collected.into_iter().rev() { + self.parts.insert(0, part); + } + } + /// Extend with an iterator of parts, returning self. pub fn extended(mut self, iter: I) -> Self where diff --git a/src/chat/printer.rs b/src/chat/printer.rs index a14c9db9..9c08cd18 100644 --- a/src/chat/printer.rs +++ b/src/chat/printer.rs @@ -68,6 +68,7 @@ async fn print_chat_stream_inner( let mut first_chunk = true; let mut first_reasoning_chunk = true; + let mut first_thought_signature_chunk = true; let mut first_tool_chunk = true; while let Some(next) = stream.next().await { @@ -109,6 +110,19 @@ async fn print_chat_stream_inner( } } + ChatStreamEvent::ThoughtSignatureChunk(StreamChunk { content }) => { + if print_events && first_thought_signature_chunk { + first_thought_signature_chunk = false; + ( + Some("\n-- ChatStreamEvent::ThoughtSignatureChunk (concatenated):\n".to_string()), + Some(content), + false, // print but do not capture + ) + } else { + (None, Some(content), false) // print but do not capture + } + } + ChatStreamEvent::ToolCallChunk(tool_chunk) => { if print_events && first_tool_chunk { first_tool_chunk = false; diff --git a/src/chat/tool/tool_call.rs b/src/chat/tool/tool_call.rs index 36effb7d..49bd8c10 100644 --- a/src/chat/tool/tool_call.rs +++ b/src/chat/tool/tool_call.rs @@ -14,4 +14,14 @@ pub struct ToolCall { /// JSON arguments payload as provided by the model. /// Kept as `serde_json::Value` so callers can deserialize into their own types. pub fn_arguments: Value, + + /// Optional thought signatures that should precede tool calls in the assistant turn. + /// + /// When present on the first tool call in a batch, `ChatMessage::from(Vec)` + /// will automatically include these as leading `ThoughtSignature` parts in the + /// assistant message content. This enables simple continuations like: + /// `append_message(tool_calls).append_message(tool_response)` without having to + /// manually inject thoughts. + #[serde(skip_serializing_if = "Option::is_none")] + pub thought_signatures: Option>, } diff --git a/src/webc/web_stream.rs b/src/webc/web_stream.rs index 864c6eb7..8cd01b23 100644 --- a/src/webc/web_stream.rs +++ b/src/webc/web_stream.rs @@ -175,45 +175,91 @@ fn new_with_pretty_json_array( buff_string: String, partial_message: &mut Option, ) -> Result { - let buff_str = buff_string.trim(); + let mut buff_str = buff_string.as_str(); let mut messages: Vec = Vec::new(); - // -- Capture the array start/end and each eventual sub-object (assuming only one sub-object) - let (array_start, rest_str) = match buff_str.strip_prefix('[') { - Some(rest) => (Some("["), rest.trim()), - None => (None, buff_str), - }; + // -- 1. Prepend partial message if any + let full_string_holder: String; + if let Some(partial) = partial_message.take() { + full_string_holder = format!("{}{}", partial, buff_str); + buff_str = full_string_holder.as_str(); + } - // Remove the eventual ',' prefix and suffix. - let rest_str = rest_str.strip_prefix(',').unwrap_or(rest_str); - let rest_str = rest_str.strip_suffix(',').unwrap_or(rest_str); + // -- 2. Process the buffer + // We want to extract valid JSON objects. + // The stream is expected to be: `[` (optional), `{...}`, `,`, `{...}`, `]` (optional) + // We need to be robust against whitespace and commas. - let (rest_str, array_end) = match rest_str.strip_suffix(']') { - Some(rest) => (rest.trim(), Some("]")), - None => (rest_str, None), - }; + let mut depth = 0; + let mut in_string = false; + let mut escape = false; + let mut start_idx = 0; + let mut last_idx = 0; // Track the end of the last processed object - // -- Prep the BuffResponse - if let Some(array_start) = array_start { - messages.push(array_start.to_string()); - } - if !rest_str.is_empty() { - let full_str = if let Some(partial) = partial_message.take() { - format!("{partial}{rest_str}") + for (idx, c) in buff_str.char_indices() { + if in_string { + if escape { + escape = false; + } else if c == '\\' { + escape = true; + } else if c == '"' { + in_string = false; + } } else { - rest_str.to_string() - }; + match c { + '"' => in_string = true, + '{' => { + if depth == 0 { + start_idx = idx; + } + depth += 1; + } + '}' => { + depth -= 1; + if depth == 0 { + // Found a complete JSON object + // idx is the byte index of '}'. We want to include it. + // '}' is 1 byte, so end range is idx + 1 + let json_str = &buff_str[start_idx..idx + 1]; - if serde_json::from_str::(&full_str).is_ok() { - messages.push(full_str); - } else { - *partial_message = Some(full_str); + // Verify it's valid JSON (optional but good for safety) + if serde_json::from_str::(json_str).is_ok() { + messages.push(json_str.to_string()); + } else { + // Should not happen if logic is correct + tracing::warn!("WebStream: Extracted block failed JSON validation: {}", json_str); + } + // Update last_idx to point after this object + last_idx = idx + 1; + } + } + '[' => { + if depth == 0 { + messages.push("[".to_string()); + last_idx = idx + 1; + } + } + ']' => { + if depth == 0 { + messages.push("]".to_string()); + last_idx = idx + 1; + } + } + _ => { + // Ignore other characters outside of objects (whitespace, commas) + } + } } } - // We ignore the comma - if let Some(array_end) = array_end { - messages.push(array_end.to_string()); + + // -- 3. Handle remaining partial + // last_idx points to the byte after the last successfully processed object/token + if last_idx < buff_str.len() { + let remaining = &buff_str[last_idx..]; + if !remaining.trim().is_empty() { + *partial_message = Some(remaining.to_string()); + } } // -- Return the buff response diff --git a/tests/support/helpers.rs b/tests/support/helpers.rs index a870b89a..f3a0764e 100644 --- a/tests/support/helpers.rs +++ b/tests/support/helpers.rs @@ -91,7 +91,8 @@ pub async fn extract_stream_end(mut chat_stream: ChatStream) -> TestResult (), // nothing to do ChatStreamEvent::Chunk(s_chunk) => content.push(s_chunk.content), ChatStreamEvent::ReasoningChunk(s_chunk) => reasoning_content.push(s_chunk.content), - ChatStreamEvent::ToolCallChunk(_) => (), // ignore tool call chunks for now + ChatStreamEvent::ThoughtSignatureChunk(_) => (), // ignore thought signature chunks for now + ChatStreamEvent::ToolCallChunk(_) => (), // ignore tool call chunks for now ChatStreamEvent::End(s_end) => { stream_end = Some(s_end); break; From 2fd9df9a566364a228e99e315f4e82828385c149 Mon Sep 17 00:00:00 2001 From: Jeremy Chone Date: Mon, 24 Nov 2025 15:05:56 -0800 Subject: [PATCH 024/123] . update 0.5.0-WIP --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 951a143b..aae3f0cd 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "genai" -version = "0.4.5-WIP" +version = "0.5.0-WIP" edition = "2024" license = "MIT OR Apache-2.0" description = "Multi-AI Providers Library for Rust. (OpenAI, Gemini, Anthropic, xAI, Ollama, Groq, DeepSeek, Grok)" From 5ff9a400942071eec13b15782360dcee150d32e2 Mon Sep 17 00:00:00 2001 From: Jeremy Chone Date: Mon, 24 Nov 2025 15:07:18 -0800 Subject: [PATCH 025/123] . clippy clean --- examples/c10-tooluse-streaming.rs | 1 - src/adapter/adapters/gemini/adapter_impl.rs | 8 ++++---- src/chat/chat_message.rs | 12 ++++++------ src/chat/chat_stream.rs | 18 +++++++++--------- 4 files changed, 19 insertions(+), 20 deletions(-) diff --git a/examples/c10-tooluse-streaming.rs b/examples/c10-tooluse-streaming.rs index aa5f88e5..800093ed 100644 --- a/examples/c10-tooluse-streaming.rs +++ b/examples/c10-tooluse-streaming.rs @@ -3,7 +3,6 @@ use genai::Client; use genai::chat::printer::{PrintChatStreamOptions, print_chat_stream}; use genai::chat::{ChatMessage, ChatOptions, ChatRequest, Tool, ToolResponse}; use genai::chat::{ChatStreamEvent, ToolCall}; -use genai::resolver::AuthData; use serde_json::json; use tracing_subscriber::EnvFilter; diff --git a/src/adapter/adapters/gemini/adapter_impl.rs b/src/adapter/adapters/gemini/adapter_impl.rs index 6870575a..7f6c240d 100644 --- a/src/adapter/adapters/gemini/adapter_impl.rs +++ b/src/adapter/adapters/gemini/adapter_impl.rs @@ -223,10 +223,10 @@ impl Adapter for GeminiAdapter { let thought_signatures_for_call = (!thoughts.is_empty() && !tool_calls.is_empty()).then(|| thoughts.clone()); let mut parts: Vec = thoughts.into_iter().map(ContentPart::ThoughtSignature).collect(); - if let Some(signatures) = thought_signatures_for_call { - if let Some(first_call) = tool_calls.first_mut() { - first_call.thought_signatures = Some(signatures); - } + if let Some(signatures) = thought_signatures_for_call + && let Some(first_call) = tool_calls.first_mut() + { + first_call.thought_signatures = Some(signatures); } if !texts.is_empty() { diff --git a/src/chat/chat_message.rs b/src/chat/chat_message.rs index 0d164bae..c4b2d345 100644 --- a/src/chat/chat_message.rs +++ b/src/chat/chat_message.rs @@ -115,12 +115,12 @@ pub enum ChatRole { /// Will create a Assisttant ChatMessage with this vect of tool impl From> for ChatMessage { fn from(tool_calls: Vec) -> Self { - if let Some(first) = tool_calls.first() { - if let Some(thoughts) = &first.thought_signatures { - let mut parts: Vec = thoughts.iter().cloned().map(ContentPart::ThoughtSignature).collect(); - parts.extend(tool_calls.into_iter().map(ContentPart::ToolCall)); - return ChatMessage::assistant(MessageContent::from_parts(parts)); - } + if let Some(first) = tool_calls.first() + && let Some(thoughts) = &first.thought_signatures + { + let mut parts: Vec = thoughts.iter().cloned().map(ContentPart::ThoughtSignature).collect(); + parts.extend(tool_calls.into_iter().map(ContentPart::ToolCall)); + return ChatMessage::assistant(MessageContent::from_parts(parts)); } Self { role: ChatRole::Assistant, diff --git a/src/chat/chat_stream.rs b/src/chat/chat_stream.rs index 62020a63..9b9aecd8 100644 --- a/src/chat/chat_stream.rs +++ b/src/chat/chat_stream.rs @@ -133,15 +133,15 @@ impl From for StreamEnd { .collect::>(); // Also attach thoughts to the first tool call so that // ChatMessage::from(Vec) can auto-prepend them. - if let Some(tool_calls) = captured_tool_calls.as_mut() { - if let Some(first_call) = tool_calls.first_mut() { - first_call.thought_signatures = Some( - thoughts_content - .iter() - .filter_map(|p| p.as_thought_signature().map(|s| s.to_string())) - .collect(), - ); - } + if let Some(tool_calls) = captured_tool_calls.as_mut() + && let Some(first_call) = tool_calls.first_mut() + { + first_call.thought_signatures = Some( + thoughts_content + .iter() + .filter_map(|p| p.as_thought_signature().map(|s| s.to_string())) + .collect(), + ); } if let Some(existing_content) = &mut captured_content { existing_content.extend_front(thoughts_content); From c654781501a8f2c00ecfcbb0076d0d8a048e384d Mon Sep 17 00:00:00 2001 From: Jeremy Chone Date: Mon, 24 Nov 2025 15:10:40 -0800 Subject: [PATCH 026/123] . anthropic - update with opus-4-5 (and set correct max_tokens) --- src/adapter/adapters/anthropic/adapter_impl.rs | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/src/adapter/adapters/anthropic/adapter_impl.rs b/src/adapter/adapters/anthropic/adapter_impl.rs index e22c6852..73d652c0 100644 --- a/src/adapter/adapters/anthropic/adapter_impl.rs +++ b/src/adapter/adapters/anthropic/adapter_impl.rs @@ -31,18 +31,14 @@ const REASONING_HIGH: u32 = 24000; // For max model tokens see: https://docs.anthropic.com/en/docs/about-claude/models/overview // // fall back -const MAX_TOKENS_64K: u32 = 64000; // claude-3-7-sonnet, claude-sonnet-4.x, claude-haiku-4-5 +const MAX_TOKENS_64K: u32 = 64000; // 3-7-sonnet, connet-4.x, opus-5-x, haiku-4-5 // custom const MAX_TOKENS_32K: u32 = 32000; // claude-opus-4 const MAX_TOKENS_8K: u32 = 8192; // claude-3-5-sonnet, claude-3-5-haiku const MAX_TOKENS_4K: u32 = 4096; // claude-3-opus, claude-3-haiku const ANTHROPIC_VERSION: &str = "2023-06-01"; -const MODELS: &[&str] = &[ - "claude-opus-4-1-20250805", - "claude-sonnet-4-5-20250929", - "claude-haiku-4-5-20251001", -]; +const MODELS: &[&str] = &["claude-opus-4-5", "claude-sonnet-4-5", "claude-haiku-4-5"]; impl AnthropicAdapter { pub const API_KEY_DEFAULT_ENV_NAME: &str = "ANTHROPIC_API_KEY"; @@ -174,8 +170,7 @@ impl Adapter for AnthropicAdapter { payload.x_insert("stop_sequences", options_set.stop_sequences())?; } - //const MAX_TOKENS_64K: u32 = 64000; // claude-sonnet-4, claude-3-7-sonnet, - // custom + // const MAX_TOKENS_64K: u32 = 64000; // claude-opus-4-5 claude-sonnet-4, claude-3-7-sonnet, // const MAX_TOKENS_32K: u32 = 32000; // claude-opus-4 // const MAX_TOKENS_8K: u32 = 8192; // claude-3-5-sonnet, claude-3-5-haiku // const MAX_TOKENS_4K: u32 = 4096; // claude-3-opus, claude-3-haiku @@ -184,6 +179,7 @@ impl Adapter for AnthropicAdapter { if model_name.contains("claude-sonnet") || model_name.contains("claude-haiku") || model_name.contains("claude-3-7-sonnet") + || model_name.contains("claude-opus-4-5") { MAX_TOKENS_64K } else if model_name.contains("claude-opus-4") { From abee8365cdf79d7fabf3d68d3c52405cb247243c Mon Sep 17 00:00:00 2001 From: Jeremy Chone Date: Mon, 24 Nov 2025 15:18:53 -0800 Subject: [PATCH 027/123] . gemini model names --- src/adapter/adapters/gemini/adapter_impl.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/adapter/adapters/gemini/adapter_impl.rs b/src/adapter/adapters/gemini/adapter_impl.rs index 7f6c240d..3cbbfa18 100644 --- a/src/adapter/adapters/gemini/adapter_impl.rs +++ b/src/adapter/adapters/gemini/adapter_impl.rs @@ -18,6 +18,7 @@ pub struct GeminiAdapter; // Note: Those model names are just informative, as the Gemini AdapterKind is selected on `startsWith("gemini")` const MODELS: &[&str] = &[ // + "gemini-3-pro-preview", "gemini-2.5-pro", "gemini-2.5-flash", "gemini-2.5-flash-lite", From 76528663527b9ccb9e1d360b5803f19bf369bba7 Mon Sep 17 00:00:00 2001 From: Jeremy Chone Date: Mon, 24 Nov 2025 15:21:28 -0800 Subject: [PATCH 028/123] . test - fix wikipedia image issue with image url (use aipack image url) --- examples/c07-image.rs | 2 +- tests/support/data.rs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/c07-image.rs b/examples/c07-image.rs index 7b17e0dd..b2da4a29 100644 --- a/examples/c07-image.rs +++ b/examples/c07-image.rs @@ -6,7 +6,7 @@ use genai::chat::{ChatMessage, ChatRequest, ContentPart}; use tracing_subscriber::EnvFilter; const MODEL: &str = "gpt-4o-mini"; -const IMAGE_URL: &str = "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg"; +const IMAGE_URL: &str = "https://aipack.ai/images/test-duck.jpg"; #[tokio::main] async fn main() -> Result<(), Box> { diff --git a/tests/support/data.rs b/tests/support/data.rs index d7fa5c53..fc026a7a 100644 --- a/tests/support/data.rs +++ b/tests/support/data.rs @@ -7,7 +7,7 @@ use simple_fs::SPath; use std::fs::File; use std::io::Read; -pub const IMAGE_URL_JPG_DUCK: &str = "https://upload.wikimedia.org/wikipedia/commons/thumb/b/bf/Bucephala-albeola-010.jpg/440px-Bucephala-albeola-010.jpg"; +pub const IMAGE_URL_JPG_DUCK: &str = "https://aipack.ai/images/test-duck.jpg"; pub const AUDIO_TEST_FILE_PATH: &str = "./tests/data/phrase_neil_armstrong.wav"; /// Get the base64 of the image above (but resized/lower to fit 5kb) From 01783e3a05457647c593af7d9f176572c8387a50 Mon Sep 17 00:00:00 2001 From: Jeremy Chone Date: Mon, 24 Nov 2025 16:14:53 -0800 Subject: [PATCH 029/123] ^ gemini - for gemini-3, convert ReasoningEffort Low/High to the appropriate gemini thinkingLevel LOW/HIGH, fall back on budget if not gemini 3 or other effort --- src/adapter/adapters/gemini/adapter_impl.rs | 56 ++++++++++++++++----- tests/support/common_tests.rs | 8 ++- tests/tests_p_anthropic.rs | 4 +- tests/tests_p_gemini.rs | 12 +++++ tests/tests_p_openai.rs | 3 +- 5 files changed, 66 insertions(+), 17 deletions(-) diff --git a/src/adapter/adapters/gemini/adapter_impl.rs b/src/adapter/adapters/gemini/adapter_impl.rs index 3cbbfa18..5d39c337 100644 --- a/src/adapter/adapters/gemini/adapter_impl.rs +++ b/src/adapter/adapters/gemini/adapter_impl.rs @@ -30,6 +30,16 @@ const REASONING_LOW: u32 = 1000; const REASONING_MEDIUM: u32 = 8000; const REASONING_HIGH: u32 = 24000; +fn get_gemini_thinking_budget_value(effort: &ReasoningEffort) -> u32 { + // -- for now, match minimal to Low (because zero is not supported by 2.5 pro) + match effort { + ReasoningEffort::Budget(budget) => *budget, + ReasoningEffort::Low | ReasoningEffort::Minimal => REASONING_LOW, + ReasoningEffort::Medium => REASONING_MEDIUM, + ReasoningEffort::High => REASONING_HIGH, + } +} + // curl \ // -H 'Content-Type: application/json' \ // -d '{"contents":[{"parts":[{"text":"Explain how AI works"}]}]}' \ @@ -83,16 +93,16 @@ impl Adapter for GeminiAdapter { let headers = Headers::from(("x-goog-api-key".to_string(), api_key.to_string())); // -- Reasoning Budget - let (provider_model_name, reasoning_budget) = match (model_name, options_set.reasoning_effort()) { - // No explicity reasoning_effor, try to infer from model name suffix (supports -zero) + let (provider_model_name, computed_reasoning_effort) = match (model_name, options_set.reasoning_effort()) { + // No explicity reasoning_effort, try to infer from model name suffix (supports -zero) (model, None) => { // let model_name: &str = &model.model_name; if let Some((prefix, last)) = model_name.rsplit_once('-') { let reasoning = match last { - "zero" => Some(REASONING_ZERO), - "low" => Some(REASONING_LOW), - "medium" => Some(REASONING_MEDIUM), - "high" => Some(REASONING_HIGH), + "zero" => Some(ReasoningEffort::Budget(REASONING_ZERO)), + "low" => Some(ReasoningEffort::Low), + "medium" => Some(ReasoningEffort::Medium), + "high" => Some(ReasoningEffort::High), _ => None, }; // create the model name if there was a `-..` reasoning suffix @@ -107,11 +117,11 @@ impl Adapter for GeminiAdapter { (model, Some(effort)) => { let effort = match effort { // -- for now, match minimal to Low (because zero is not supported by 2.5 pro) - ReasoningEffort::Minimal => REASONING_LOW, - ReasoningEffort::Low => REASONING_LOW, - ReasoningEffort::Medium => REASONING_MEDIUM, - ReasoningEffort::High => REASONING_HIGH, - ReasoningEffort::Budget(budget) => *budget, + ReasoningEffort::Minimal => ReasoningEffort::Low, + ReasoningEffort::Low => ReasoningEffort::Low, + ReasoningEffort::Medium => ReasoningEffort::Medium, + ReasoningEffort::High => ReasoningEffort::High, + ReasoningEffort::Budget(budget) => ReasoningEffort::Budget(*budget), }; (model, Some(effort)) } @@ -130,8 +140,28 @@ impl Adapter for GeminiAdapter { }); // -- Set the reasoning effort - if let Some(budget) = reasoning_budget { - payload.x_insert("/generationConfig/thinkingConfig/thinkingBudget", budget)?; + if let Some(computed_reasoning_effort) = computed_reasoning_effort { + // -- For gemini-3 use the thinkingLevel if Low or High (does not support mediume for now) + if provider_model_name.contains("gemini-3") { + match computed_reasoning_effort { + ReasoningEffort::Low | ReasoningEffort::Minimal => { + payload.x_insert("/generationConfig/thinkingConfig/thinkingLevel", "LOW")?; + } + ReasoningEffort::High => { + payload.x_insert("/generationConfig/thinkingConfig/thinkingLevel", "HIGH")?; + } + // Fallback on thinkingBudget + other => { + let budget = get_gemini_thinking_budget_value(&other); + payload.x_insert("/generationConfig/thinkingConfig/thinkingBudget", budget)?; + } + } + } + // -- Otherwise, Do thinking budget + else { + let budget = get_gemini_thinking_budget_value(&computed_reasoning_effort); + payload.x_insert("/generationConfig/thinkingConfig/thinkingBudget", budget)?; + } } // Note: It's unclear from the spec if the content of systemInstruction should have a role. diff --git a/tests/support/common_tests.rs b/tests/support/common_tests.rs index f52a14dc..ea514a48 100644 --- a/tests/support/common_tests.rs +++ b/tests/support/common_tests.rs @@ -58,11 +58,15 @@ pub async fn common_test_chat_simple_ok(model: &str, checks: Option) -> T } // NOTE: here we still have the options about checking REASONING_USAGE, because Anthropic does not have reasoning token. -pub async fn common_test_chat_reasoning_ok(model: &str, checks: Option) -> TestResult<()> { +pub async fn common_test_chat_reasoning_ok( + model: &str, + reasoning_effort: ReasoningEffort, + checks: Option, +) -> TestResult<()> { // -- Setup & Fixtures let client = Client::default(); let chat_req = seed_chat_req_simple(); - let options = ChatOptions::default().with_reasoning_effort(ReasoningEffort::High); + let options = ChatOptions::default().with_reasoning_effort(reasoning_effort); // -- Exec let chat_res = client.exec_chat(model, chat_req, Some(&options)).await?; diff --git a/tests/tests_p_anthropic.rs b/tests/tests_p_anthropic.rs index a010610c..e794b978 100644 --- a/tests/tests_p_anthropic.rs +++ b/tests/tests_p_anthropic.rs @@ -2,6 +2,7 @@ mod support; use crate::support::{Check, TestResult, common_tests}; use genai::adapter::AdapterKind; +use genai::chat::ReasoningEffort; use genai::resolver::AuthData; use serial_test::serial; @@ -25,7 +26,8 @@ async fn test_chat_simple_ok() -> TestResult<()> { #[serial(anthropic)] async fn test_chat_reasoning_ok() -> TestResult<()> { // NOTE: Does not test REASONING_USAGE as Anthropic does not report it - common_tests::common_test_chat_reasoning_ok(MODEL_THINKING, Some(Check::REASONING_CONTENT)).await + common_tests::common_test_chat_reasoning_ok(MODEL_THINKING, ReasoningEffort::High, Some(Check::REASONING_CONTENT)) + .await } #[tokio::test] diff --git a/tests/tests_p_gemini.rs b/tests/tests_p_gemini.rs index b4ea38d1..57381c90 100644 --- a/tests/tests_p_gemini.rs +++ b/tests/tests_p_gemini.rs @@ -2,10 +2,12 @@ mod support; use crate::support::{Check, TestResult, common_tests}; use genai::adapter::AdapterKind; +use genai::chat::ReasoningEffort; use genai::resolver::AuthData; // "gemini-2.5-flash" "gemini-2.5-pro" "gemini-2.5-flash-lite" // "gemini-2.5-flash-zero" +const MODEL_GPRO_3: &str = "gemini-3-pro-preview"; const MODEL: &str = "gemini-2.5-flash"; const MODEL_NS: &str = "gemini::gemini-2.5-flash"; @@ -16,6 +18,16 @@ async fn test_chat_simple_ok() -> TestResult<()> { common_tests::common_test_chat_simple_ok(MODEL, None).await } +#[tokio::test] +async fn test_chat_reasoning_ok() -> TestResult<()> { + common_tests::common_test_chat_reasoning_ok( + MODEL_GPRO_3, + ReasoningEffort::Low, + Some(Check::REASONING_USAGE | Check::REASONING_USAGE), + ) + .await +} + #[tokio::test] async fn test_chat_namespaced_ok() -> TestResult<()> { common_tests::common_test_chat_simple_ok(MODEL_NS, None).await diff --git a/tests/tests_p_openai.rs b/tests/tests_p_openai.rs index 92b57177..825091eb 100644 --- a/tests/tests_p_openai.rs +++ b/tests/tests_p_openai.rs @@ -2,6 +2,7 @@ mod support; use crate::support::{Check, TestResult, common_tests}; use genai::adapter::AdapterKind; +use genai::chat::ReasoningEffort; use genai::resolver::AuthData; // note: "gpt-4o-mini" has issue when image & pdf @@ -39,7 +40,7 @@ async fn test_chat_simple_ok() -> TestResult<()> { #[tokio::test] async fn test_chat_reasoning_ok() -> TestResult<()> { // For now, do not test Check::REASONING, for OpenAI as it is not captured - common_tests::common_test_chat_reasoning_ok(MODEL_LATEST, Some(Check::REASONING_USAGE)).await + common_tests::common_test_chat_reasoning_ok(MODEL_LATEST, ReasoningEffort::High, Some(Check::REASONING_USAGE)).await } #[tokio::test] From fa51b2487bad01a225bafe188dcd36a7496aa7d9 Mon Sep 17 00:00:00 2001 From: Jeremy Chone Date: Mon, 24 Nov 2025 16:15:35 -0800 Subject: [PATCH 030/123] . update to v0.5.0-alpha.1 --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index aae3f0cd..2cbca139 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "genai" -version = "0.5.0-WIP" +version = "0.5.0-alpha.1" edition = "2024" license = "MIT OR Apache-2.0" description = "Multi-AI Providers Library for Rust. (OpenAI, Gemini, Anthropic, xAI, Ollama, Groq, DeepSeek, Grok)" From ebae961abd2c14e12f1ef6f6c61522855367824c Mon Sep 17 00:00:00 2001 From: Jeremy Chone Date: Mon, 24 Nov 2025 16:17:11 -0800 Subject: [PATCH 031/123] . update version to 0.5.0-alpha.2-WIP --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 2cbca139..6b6448a9 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "genai" -version = "0.5.0-alpha.1" +version = "0.5.0-alpha.2-WIP" edition = "2024" license = "MIT OR Apache-2.0" description = "Multi-AI Providers Library for Rust. (OpenAI, Gemini, Anthropic, xAI, Ollama, Groq, DeepSeek, Grok)" From 4adece94cad31669bb2e8b85885011a2929bc5f6 Mon Sep 17 00:00:00 2001 From: Jeremy Chone Date: Tue, 25 Nov 2025 13:46:51 -0800 Subject: [PATCH 032/123] ^ anthropic - implemented new output_config.effort for opus-4-5 (matching ReasonningEffort) --- .../adapters/anthropic/adapter_impl.rs | 72 +++++++++++++++---- src/adapter/adapters/anthropic/mod.rs | 1 + src/adapter/adapters/gemini/adapter_impl.rs | 1 + tests/tests_p_anthropic.rs | 3 +- 4 files changed, 61 insertions(+), 16 deletions(-) diff --git a/src/adapter/adapters/anthropic/adapter_impl.rs b/src/adapter/adapters/anthropic/adapter_impl.rs index 73d652c0..4f572d2a 100644 --- a/src/adapter/adapters/anthropic/adapter_impl.rs +++ b/src/adapter/adapters/anthropic/adapter_impl.rs @@ -21,6 +21,15 @@ const REASONING_LOW: u32 = 1024; const REASONING_MEDIUM: u32 = 8000; const REASONING_HIGH: u32 = 24000; +fn get_anthropic_thinking_budget_value(effort: &ReasoningEffort) -> u32 { + match effort { + ReasoningEffort::Budget(budget) => *budget, + ReasoningEffort::Low | ReasoningEffort::Minimal => REASONING_LOW, + ReasoningEffort::Medium => REASONING_MEDIUM, + ReasoningEffort::High => REASONING_HIGH, + } +} + // NOTE: For Anthropic, the max_tokens must be specified. // To avoid surprises, the default value for genai is the maximum for a given model. // Current logic: @@ -31,7 +40,7 @@ const REASONING_HIGH: u32 = 24000; // For max model tokens see: https://docs.anthropic.com/en/docs/about-claude/models/overview // // fall back -const MAX_TOKENS_64K: u32 = 64000; // 3-7-sonnet, connet-4.x, opus-5-x, haiku-4-5 +const MAX_TOKENS_64K: u32 = 64000; // claude-opus-4-5 claude-sonnet... (4 and above), claude-haiku..., claude-3-7-sonnet, // custom const MAX_TOKENS_32K: u32 = 32000; // claude-opus-4 const MAX_TOKENS_8K: u32 = 8192; // claude-3-5-sonnet, claude-3-5-haiku @@ -87,6 +96,7 @@ impl Adapter for AnthropicAdapter { let headers = Headers::from(vec![ // headers ("x-api-key".to_string(), api_key), + ("anthropic-beta".to_string(), "effort-2025-11-24".to_string()), ("anthropic-version".to_string(), ANTHROPIC_VERSION.to_string()), ]); @@ -100,21 +110,23 @@ impl Adapter for AnthropicAdapter { // -- Extract Model Name and Reasoning let (raw_model_name, _) = model.model_name.as_model_name_and_namespace(); - let (model_name, thinking_budget) = match (raw_model_name, options_set.reasoning_effort()) { + // -- Reasoning Budget + let (model_name, computed_reasoning_effort) = match (raw_model_name, options_set.reasoning_effort()) { // No explicity reasoning_effor, try to infer from model name suffix (supports -zero) (model, None) => { // let model_name: &str = &model.model_name; if let Some((prefix, last)) = raw_model_name.rsplit_once('-') { let reasoning = match last { - "zero" => None, // That will disable thinking - "minimal" => None, // That will disable thinking - "low" => Some(REASONING_LOW), - "medium" => Some(REASONING_MEDIUM), - "high" => Some(REASONING_HIGH), + "zero" => None, // That will disable thinking + "minimal" => Some(ReasoningEffort::Low), + "low" => Some(ReasoningEffort::Low), + "medium" => Some(ReasoningEffort::Medium), + "high" => Some(ReasoningEffort::High), _ => None, }; // create the model name if there was a `-..` reasoning suffix let model = if reasoning.is_some() { prefix } else { model }; + (model, reasoning) } else { (model, None) @@ -123,14 +135,14 @@ impl Adapter for AnthropicAdapter { // If reasoning effort, turn the low, medium, budget ones into Budget (model, Some(effort)) => { let effort = match effort { - // -- When minimal, same a zeror - ReasoningEffort::Minimal => None, - ReasoningEffort::Low => Some(REASONING_LOW), - ReasoningEffort::Medium => Some(REASONING_MEDIUM), - ReasoningEffort::High => Some(REASONING_HIGH), - ReasoningEffort::Budget(budget) => Some(*budget), + // -- for now, match minimal to Low (because zero is not supported by 2.5 pro) + ReasoningEffort::Minimal => ReasoningEffort::Low, + ReasoningEffort::Low => ReasoningEffort::Low, + ReasoningEffort::Medium => ReasoningEffort::Medium, + ReasoningEffort::High => ReasoningEffort::High, + ReasoningEffort::Budget(budget) => ReasoningEffort::Budget(*budget), }; - (model, effort) + (model, Some(effort)) } }; @@ -151,7 +163,36 @@ impl Adapter for AnthropicAdapter { } // -- Set the reasoning effort - if let Some(budget) = thinking_budget { + if let Some(computed_reasoning_effort) = computed_reasoning_effort { + // DOC: https://platform.claude.com/docs/en/build-with-claude/effort + // - Effort parameter: Controls how Claude spends all tokens—including thinking tokens, text responses, and tool calls + // - Thinking token budget: Sets a maximum limit on thinking tokens specifically + // For best performance on complex reasoning tasks, use high effort (the default) with a high thinking token budget. + // This allows Claude to think thoroughly and provide comprehensive responses. + + // In short, should use both thinking budget and effort + + // -- if opus-4-5 then, we set the anthropic effort + if model_name.contains("opus-4-5") { + let effort = match computed_reasoning_effort { + ReasoningEffort::Minimal => "low", + ReasoningEffort::Low => "low", + ReasoningEffort::Medium => "medium", + ReasoningEffort::High => "high", + ReasoningEffort::Budget(_) => "", // for now, will not set + }; + if !effort.is_empty() { + payload.x_insert( + "output_config", + json!({ + "effort": effort + }), + )?; + } + } + + // -- All models, including opus-4-5, we see the thinking budget + let budget = get_anthropic_thinking_budget_value(&computed_reasoning_effort); payload.x_insert( "thinking", json!({ @@ -219,6 +260,7 @@ impl Adapter for AnthropicAdapter { // -- Capture the usage let usage = body.x_take::("usage"); + let usage = usage.map(Self::into_usage).unwrap_or_default(); // -- Capture the content diff --git a/src/adapter/adapters/anthropic/mod.rs b/src/adapter/adapters/anthropic/mod.rs index e685fdae..a7b33a47 100644 --- a/src/adapter/adapters/anthropic/mod.rs +++ b/src/adapter/adapters/anthropic/mod.rs @@ -1,5 +1,6 @@ //! API Documentation: https://docs.anthropic.com/en/api/messages //! Tool Documentation: https://docs.anthropic.com/en/docs/build-with-claude/tool-use +//! Effort Documentation: https://platform.claude.com/docs/en/build-with-claude/effort //! Model Names: https://docs.anthropic.com/en/docs/models-overview //! Pricing: https://www.anthropic.com/pricing#anthropic-api diff --git a/src/adapter/adapters/gemini/adapter_impl.rs b/src/adapter/adapters/gemini/adapter_impl.rs index 5d39c337..0edac468 100644 --- a/src/adapter/adapters/gemini/adapter_impl.rs +++ b/src/adapter/adapters/gemini/adapter_impl.rs @@ -100,6 +100,7 @@ impl Adapter for GeminiAdapter { if let Some((prefix, last)) = model_name.rsplit_once('-') { let reasoning = match last { "zero" => Some(ReasoningEffort::Budget(REASONING_ZERO)), + "minimal" => Some(ReasoningEffort::Low), "low" => Some(ReasoningEffort::Low), "medium" => Some(ReasoningEffort::Medium), "high" => Some(ReasoningEffort::High), diff --git a/tests/tests_p_anthropic.rs b/tests/tests_p_anthropic.rs index e794b978..087169a8 100644 --- a/tests/tests_p_anthropic.rs +++ b/tests/tests_p_anthropic.rs @@ -11,7 +11,8 @@ use serial_test::serial; // "claude-sonnet-4-20250514" (fail on test_chat_json_mode_ok) // const MODEL: &str = "claude-3-5-haiku-latest"; -const MODEL_THINKING: &str = "claude-sonnet-4-5-20250929"; +// const MODEL_THINKING: &str = "claude-sonnet-4-5-20250929"; +const MODEL_THINKING: &str = "claude-opus-4-5"; const MODEL_NS: &str = "anthropic::claude-3-5-haiku-latest"; // region: --- Chat From d4f56d2ed369a5191dac8263f4b071551a3f79bc Mon Sep 17 00:00:00 2001 From: Jeremy Chone Date: Tue, 25 Nov 2025 14:50:37 -0800 Subject: [PATCH 033/123] . 0.5.0-alpha.2 --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 6b6448a9..2e2b09b0 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "genai" -version = "0.5.0-alpha.2-WIP" +version = "0.5.0-alpha.2" edition = "2024" license = "MIT OR Apache-2.0" description = "Multi-AI Providers Library for Rust. (OpenAI, Gemini, Anthropic, xAI, Ollama, Groq, DeepSeek, Grok)" From 502e396eddb217a93527e811a212ecac1301ae22 Mon Sep 17 00:00:00 2001 From: Jeremy Chone Date: Tue, 25 Nov 2025 14:51:43 -0800 Subject: [PATCH 034/123] . 0.5.0-alpha.3-WIP --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 2e2b09b0..7e0e7ddd 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "genai" -version = "0.5.0-alpha.2" +version = "0.5.0-alpha.3-WIP" edition = "2024" license = "MIT OR Apache-2.0" description = "Multi-AI Providers Library for Rust. (OpenAI, Gemini, Anthropic, xAI, Ollama, Groq, DeepSeek, Grok)" From ffb37a713f96ed9a2b4fcb4c49f43fd91ce1ab20 Mon Sep 17 00:00:00 2001 From: Jeremy Chone Date: Wed, 26 Nov 2025 13:45:05 -0800 Subject: [PATCH 035/123] + ContentPart - Binary rom file (as base64) --- Cargo.toml | 3 +++ src/chat/content_part.rs | 33 +++++++++++++++++++++++++++++++++ 2 files changed, 36 insertions(+) diff --git a/Cargo.toml b/Cargo.toml index 7e0e7ddd..946310ce 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -29,6 +29,9 @@ reqwest = {version = "0.12", default-features = false, features = ["json", "rust reqwest-eventsource = "0.6" eventsource-stream = "0.2" bytes = "1.6" +# -- File +base64 = "0.22.0" +mime_guess = "=2.0.5" # -- Others derive_more = { version = "2", features = ["from", "display"] } value-ext = "0.1.2" diff --git a/src/chat/content_part.rs b/src/chat/content_part.rs index b30679f7..8cd4b646 100644 --- a/src/chat/content_part.rs +++ b/src/chat/content_part.rs @@ -1,6 +1,8 @@ +use crate::Result; use crate::chat::{ToolCall, ToolResponse}; use derive_more::From; use serde::{Deserialize, Serialize}; +use std::path::Path; use std::sync::Arc; // region: --- Content Part @@ -64,6 +66,37 @@ impl ContentPart { source: BinarySource::Url(url.into()), }) } + + /// Create a binary content part from a file path. + /// + /// Reads the file, determines the MIME type from the file extension, + /// and base64-encodes the content. + /// + /// - file_path: Path to the file to read. + /// + /// Returns an error if the file cannot be read. + pub fn from_binary_file(file_path: impl AsRef) -> Result { + let file_path = file_path.as_ref(); + + // Read the file content + let content = std::fs::read(file_path) + .map_err(|e| crate::Error::Internal(format!("Failed to read file '{}': {}", file_path.display(), e)))?; + + // Determine MIME type from extension + let content_type = mime_guess::from_path(file_path).first_or_octet_stream().to_string(); + + // Base64 encode + let b64_content = base64::Engine::encode(&base64::engine::general_purpose::STANDARD, &content); + + // Extract file name + let name = file_path.file_name().and_then(|n| n.to_str()).map(String::from); + + Ok(ContentPart::Binary(Binary { + name, + content_type, + source: BinarySource::Base64(b64_content.into()), + })) + } } /// as_.., into_.. Accessors From adbff8d99f83eaba0227348ea206584e78a4b3bb Mon Sep 17 00:00:00 2001 From: Jeremy Chone Date: Wed, 26 Nov 2025 14:07:04 -0800 Subject: [PATCH 036/123] > content_part - refactor binary into own file + binary - add constructors (from_base64, from_url, from_file)" --- src/chat/binary.rs | 133 +++++++++++++++++++++++++++++++++++++++ src/chat/content_part.rs | 115 ++------------------------------- src/chat/mod.rs | 2 + 3 files changed, 139 insertions(+), 111 deletions(-) create mode 100644 src/chat/binary.rs diff --git a/src/chat/binary.rs b/src/chat/binary.rs new file mode 100644 index 00000000..6ea32434 --- /dev/null +++ b/src/chat/binary.rs @@ -0,0 +1,133 @@ +use crate::Result; +use serde::{Deserialize, Serialize}; +use std::path::Path; +use std::sync::Arc; + +/// Binary payload attached to a message (e.g., image or PDF). +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct Binary { + /// MIME type, such as "image/png" or "application/pdf". + pub content_type: String, + + /// Where the bytes come from (base64 or URL). + pub source: BinarySource, + + /// Optional display name or filename. + pub name: Option, +} + +/// Constructors +impl Binary { + /// Construct a new Binary value. + pub fn new(content_type: impl Into, source: BinarySource, name: Option) -> Self { + Self { + name, + content_type: content_type.into(), + source, + } + } + + /// Create a binary from a base64 payload. + /// + /// - content_type: MIME type (e.g., "image/png", "application/pdf"). + /// - content: base64-encoded bytes. + /// - name: optional display name or filename. + pub fn from_base64(content_type: impl Into, content: impl Into>, name: Option) -> Binary { + Binary { + name, + content_type: content_type.into(), + source: BinarySource::Base64(content.into()), + } + } + + /// Create a binary referencing a URL. + /// + /// Note: Only some providers accept URL-based inputs. + pub fn from_url(content_type: impl Into, url: impl Into, name: Option) -> Binary { + Binary { + name, + content_type: content_type.into(), + source: BinarySource::Url(url.into()), + } + } + + /// Create a binary from a file path. + /// + /// Reads the file, determines the MIME type from the file extension, + /// and base64-encodes the content. + /// + /// - file_path: Path to the file to read. + /// + /// Returns an error if the file cannot be read. + pub fn from_file(file_path: impl AsRef) -> Result { + let file_path = file_path.as_ref(); + + // Read the file content + let content = std::fs::read(file_path) + .map_err(|e| crate::Error::Internal(format!("Failed to read file '{}': {}", file_path.display(), e)))?; + + // Determine MIME type from extension + let content_type = mime_guess::from_path(file_path).first_or_octet_stream().to_string(); + + // Base64 encode + let b64_content = base64::Engine::encode(&base64::engine::general_purpose::STANDARD, &content); + + // Extract file name + let name = file_path.file_name().and_then(|n| n.to_str()).map(String::from); + + Ok(Binary { + name, + content_type, + source: BinarySource::Base64(b64_content.into()), + }) + } +} + +/// is_.., into_.. Accessors +impl Binary { + /// Returns true if this binary is an image (content_type starts with "image/"). + pub fn is_image(&self) -> bool { + self.content_type.trim().to_ascii_lowercase().starts_with("image/") + } + + /// Returns true if this binary is an audio file (content_type starts with "audio/"). + pub fn is_audio(&self) -> bool { + self.content_type.trim().to_ascii_lowercase().starts_with("audio/") + } + + /// Returns true if this binary is a PDF (content_type equals "application/pdf"). + pub fn is_pdf(&self) -> bool { + self.content_type.trim().eq_ignore_ascii_case("application/pdf") + } + + /// Generate the web or data url from this binary + pub fn into_url(self) -> String { + match self.source { + BinarySource::Url(url) => url, + BinarySource::Base64(b64_content) => format!("data:{};base64,{b64_content}", self.content_type), + } + } +} + +// region: --- BinarySource + +/// Origin of a binary payload. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub enum BinarySource { + /// For models/services that support URL as input + /// NOTE: Few AI services support this. + Url(String), + + /// The base64 string of the image + /// + /// NOTE: Here we use an `Arc` to avoid cloning large amounts of data when cloning a ChatRequest. + /// The overhead is minimal compared to cloning relatively large data. + /// The downside is that it will be an Arc even when used only once, but for this particular data type, the net benefit is positive. + Base64(Arc), +} + +// endregion: --- BinarySource + +// No `Local` location; this would require handling errors like "file not found" etc. +// Such a file can be easily provided by the user as Base64, and we can implement a convenient +// TryFrom to Base64 version. All LLMs accept local images only as Base64. diff --git a/src/chat/content_part.rs b/src/chat/content_part.rs index 8cd4b646..ff512c75 100644 --- a/src/chat/content_part.rs +++ b/src/chat/content_part.rs @@ -1,12 +1,10 @@ use crate::Result; -use crate::chat::{ToolCall, ToolResponse}; +use crate::chat::{Binary, ToolCall, ToolResponse}; use derive_more::From; use serde::{Deserialize, Serialize}; use std::path::Path; use std::sync::Arc; -// region: --- Content Part - /// A single content segment in a chat message. /// /// Variants cover plain text, binary payloads (e.g., images/PDF), and tool calls/responses. @@ -45,11 +43,7 @@ impl ContentPart { content: impl Into>, name: Option, ) -> ContentPart { - ContentPart::Binary(Binary { - name, - content_type: content_type.into(), - source: BinarySource::Base64(content.into()), - }) + ContentPart::Binary(Binary::from_base64(content_type, content, name)) } /// Create a binary content part referencing a URL. @@ -60,11 +54,7 @@ impl ContentPart { url: impl Into, name: Option, ) -> ContentPart { - ContentPart::Binary(Binary { - name, - content_type: content_type.into(), - source: BinarySource::Url(url.into()), - }) + ContentPart::Binary(Binary::from_url(content_type, url, name)) } /// Create a binary content part from a file path. @@ -76,26 +66,7 @@ impl ContentPart { /// /// Returns an error if the file cannot be read. pub fn from_binary_file(file_path: impl AsRef) -> Result { - let file_path = file_path.as_ref(); - - // Read the file content - let content = std::fs::read(file_path) - .map_err(|e| crate::Error::Internal(format!("Failed to read file '{}': {}", file_path.display(), e)))?; - - // Determine MIME type from extension - let content_type = mime_guess::from_path(file_path).first_or_octet_stream().to_string(); - - // Base64 encode - let b64_content = base64::Engine::encode(&base64::engine::general_purpose::STANDARD, &content); - - // Extract file name - let name = file_path.file_name().and_then(|n| n.to_str()).map(String::from); - - Ok(ContentPart::Binary(Binary { - name, - content_type, - source: BinarySource::Base64(b64_content.into()), - })) + Ok(ContentPart::Binary(Binary::from_file(file_path)?)) } } @@ -239,81 +210,3 @@ impl ContentPart { matches!(self, ContentPart::ThoughtSignature(_)) } } - -// endregion: --- Content Part - -// region: --- Binary - -/// Binary payload attached to a message (e.g., image or PDF). -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct Binary { - /// MIME type, such as "image/png" or "application/pdf". - pub content_type: String, - - /// Where the bytes come from (base64 or URL). - pub source: BinarySource, - - /// Optional display name or filename. - pub name: Option, -} - -impl Binary { - /// Construct a new Binary value. - pub fn new(content_type: impl Into, source: BinarySource, name: Option) -> Self { - Self { - name, - content_type: content_type.into(), - source, - } - } -} - -impl Binary { - /// Returns true if this binary is an image (content_type starts with "image/"). - pub fn is_image(&self) -> bool { - self.content_type.trim().to_ascii_lowercase().starts_with("image/") - } - - /// Returns true if this binary is an audio file (content_type starts with "audio/"). - pub fn is_audio(&self) -> bool { - self.content_type.trim().to_ascii_lowercase().starts_with("audio/") - } - - /// Returns true if this binary is a PDF (content_type equals "application/pdf"). - pub fn is_pdf(&self) -> bool { - self.content_type.trim().eq_ignore_ascii_case("application/pdf") - } - - /// Generate the web or data url from this binary - pub fn into_url(self) -> String { - match self.source { - BinarySource::Url(url) => url, - BinarySource::Base64(b64_content) => format!("data:{};base64,{b64_content}", self.content_type), - } - } -} - -// endregion: --- Binary - -// region: --- BinarySource - -/// Origin of a binary payload. -#[derive(Debug, Clone, Serialize, Deserialize)] -pub enum BinarySource { - /// For models/services that support URL as input - /// NOTE: Few AI services support this. - Url(String), - - /// The base64 string of the image - /// - /// NOTE: Here we use an `Arc` to avoid cloning large amounts of data when cloning a ChatRequest. - /// The overhead is minimal compared to cloning relatively large data. - /// The downside is that it will be an Arc even when used only once, but for this particular data type, the net benefit is positive. - Base64(Arc), -} - -// endregion: --- BinarySource - -// No `Local` location; this would require handling errors like "file not found" etc. -// Such a file can be easily provided by the user as Base64, and we can implement a convenient -// TryFrom to Base64 version. All LLMs accept local images only as Base64. diff --git a/src/chat/mod.rs b/src/chat/mod.rs index d4deade9..12ca5dca 100644 --- a/src/chat/mod.rs +++ b/src/chat/mod.rs @@ -9,6 +9,7 @@ mod chat_req_response_format; mod chat_request; mod chat_response; mod chat_stream; +mod binary; mod content_part; mod message_content; mod tool; @@ -21,6 +22,7 @@ pub use chat_req_response_format::*; pub use chat_request::*; pub use chat_response::*; pub use chat_stream::*; +pub use binary::*; pub use content_part::*; pub use message_content::*; pub use tool::*; From 963f4a35241cb899a7254988bdd5b49e4726b72a Mon Sep 17 00:00:00 2001 From: Jeremy Chone Date: Wed, 26 Nov 2025 15:00:58 -0800 Subject: [PATCH 037/123] > binary - refactor openai to use into_url for the base64 url --- src/adapter/adapters/openai/adapter_impl.rs | 55 +++++++-------------- src/chat/binary.rs | 33 ++++++++++++- tests/support/common_tests.rs | 25 +++++++++- tests/support/data.rs | 3 +- tests/tests_p_openai.rs | 5 ++ 5 files changed, 81 insertions(+), 40 deletions(-) diff --git a/src/adapter/adapters/openai/adapter_impl.rs b/src/adapter/adapters/openai/adapter_impl.rs index 91c8ef6f..a154b725 100644 --- a/src/adapter/adapters/openai/adapter_impl.rs +++ b/src/adapter/adapters/openai/adapter_impl.rs @@ -2,7 +2,6 @@ use crate::adapter::adapters::support::get_api_key; use crate::adapter::openai::OpenAIStreamer; use crate::adapter::openai::ToWebRequestCustom; use crate::adapter::{Adapter, AdapterDispatcher, AdapterKind, ServiceType, WebRequestData}; -use crate::chat::Binary; use crate::chat::{ BinarySource, ChatOptionsSet, ChatRequest, ChatResponse, ChatResponseFormat, ChatRole, ChatStream, ChatStreamResponse, ContentPart, MessageContent, ReasoningEffort, ToolCall, Usage, @@ -411,19 +410,21 @@ impl OpenAIAdapter { ContentPart::Binary(binary) => { let is_audio = binary.is_audio(); let is_image = binary.is_image(); - let Binary { - content_type, source, .. - } = binary; + + // let Binary { + // content_type, source, .. + // } = binary; if is_audio { - match &source { + match &binary.source { BinarySource::Url(_url) => { warn!( "OpenAI doesn't support audio from URL, need to handle it gracefully" ); } BinarySource::Base64(content) => { - let mut format = content_type.split('/').next_back().unwrap_or(""); + let mut format = + binary.content_type.split('/').next_back().unwrap_or(""); if format == "mpeg" { format = "mp3"; } @@ -437,38 +438,18 @@ impl OpenAIAdapter { } } } else if is_image { - match &source { - BinarySource::Url(url) => { - values.push(json!({"type": "image_url", "image_url": {"url": url}})) - } - BinarySource::Base64(content) => { - let image_url = format!("data:{};base64,{}", content_type, content); - values - .push(json!({"type": "image_url", "image_url": {"url": image_url}})) - } - } + let image_url = binary.into_url(); + values.push(json!({"type": "image_url", "image_url": {"url": image_url}})); + } else if matches!(&binary.source, BinarySource::Url(_)) { + // TODO: Need to return error + warn!("OpenAI doesn't support file from URL, need to handle it gracefully"); } else { - match &source { - BinarySource::Url(_url) => { - // TODO: Need to return error - warn!( - "OpenAI doesn't support file from URL, need to handle it gracefully" - ); - } - BinarySource::Base64(content) => { - let file_data = format!("data:{};base64,{}", content_type, content); - values.push(json!({"type": "file", "file": { - "filename": binary.name, - "file_data": file_data - }})) - } - } - - // "type": "file", - // "file": { - // "filename": "draconomicon.pdf", - // "file_data": f"data:application/pdf;base64,{base64_string}", - // } + let filename = binary.name.clone(); + let file_base64_url = binary.into_url(); + values.push(json!({"type": "file", "file": { + "filename": filename, + "file_data": file_base64_url + }})) } } diff --git a/src/chat/binary.rs b/src/chat/binary.rs index 6ea32434..ecd4c86a 100644 --- a/src/chat/binary.rs +++ b/src/chat/binary.rs @@ -104,7 +104,18 @@ impl Binary { pub fn into_url(self) -> String { match self.source { BinarySource::Url(url) => url, - BinarySource::Base64(b64_content) => format!("data:{};base64,{b64_content}", self.content_type), + BinarySource::Base64(b64_content) => { + // NOTE: Openai does not support filename in the URL. + // let filename_section: Cow = if let Some(name) = self.name { + // let name = normalize_name(&name); + // format!("filename={name};").into() + // } else { + // "".into() + // }; + let filename_section = ""; + + format!("data:{};{filename_section}base64,{b64_content}", self.content_type) + } } } } @@ -131,3 +142,23 @@ pub enum BinarySource { // No `Local` location; this would require handling errors like "file not found" etc. // Such a file can be easily provided by the user as Base64, and we can implement a convenient // TryFrom to Base64 version. All LLMs accept local images only as Base64. + +// region: --- Support + +#[allow(unused)] +fn normalize_name(input: &str) -> String { + input + .chars() + .map(|c| { + match c { + // allowed + 'a'..='z' | 'A'..='Z' | '0'..='9' | '.' | '_' | '-' | '(' | ')' => c, + + // everything else becomes '-' + _ => '-', + } + }) + .collect() +} + +// endregion: --- Support diff --git a/tests/support/common_tests.rs b/tests/support/common_tests.rs index ea514a48..c8ea4eb1 100644 --- a/tests/support/common_tests.rs +++ b/tests/support/common_tests.rs @@ -1,5 +1,7 @@ use crate::get_option_value; -use crate::support::data::{IMAGE_URL_JPG_DUCK, get_b64_audio, get_b64_duck, get_b64_pdf, has_audio_file}; +use crate::support::data::{ + IMAGE_URL_JPG_DUCK, TEST_IMAGE_FILE_PATH, get_b64_audio, get_b64_duck, get_b64_pdf, has_audio_file, +}; use crate::support::{ Check, StreamExtract, TestResult, assert_contains, assert_reasoning_content, assert_reasoning_usage, contains_checks, extract_stream_end, get_big_content, seed_chat_req_simple, seed_chat_req_tool_simple, @@ -735,6 +737,27 @@ pub async fn common_test_chat_image_b64_ok(model: &str) -> TestResult<()> { Ok(()) } +pub async fn common_test_chat_image_file_ok(model: &str) -> TestResult<()> { + // -- Setup + let client = Client::default(); + + // -- Build & Exec + let mut chat_req = ChatRequest::default().with_system("Answer in one sentence"); + // This is similar to sending initial system chat messages (which will be cumulative with system chat messages) + chat_req = chat_req.append_message(ChatMessage::user(vec![ + ContentPart::from_text("What is in this picture?"), + ContentPart::from_binary_file(TEST_IMAGE_FILE_PATH)?, + ])); + + let chat_res = client.exec_chat(model, chat_req, None).await?; + + // -- Check + let res = chat_res.first_text().ok_or("Should have text result")?; + assert_contains(res, "duck"); + + Ok(()) +} + pub async fn common_test_chat_audio_b64_ok(model: &str) -> TestResult<()> { if !has_audio_file() { println!("No test audio file. Skipping this test."); diff --git a/tests/support/data.rs b/tests/support/data.rs index fc026a7a..031fcf36 100644 --- a/tests/support/data.rs +++ b/tests/support/data.rs @@ -9,10 +9,11 @@ use std::io::Read; pub const IMAGE_URL_JPG_DUCK: &str = "https://aipack.ai/images/test-duck.jpg"; pub const AUDIO_TEST_FILE_PATH: &str = "./tests/data/phrase_neil_armstrong.wav"; +pub const TEST_IMAGE_FILE_PATH: &str = "./tests/data/duck-small.jpg"; /// Get the base64 of the image above (but resized/lower to fit 5kb) pub fn get_b64_duck() -> TestResult { - get_b64_file("./tests/data/duck-small.jpg") + get_b64_file(TEST_IMAGE_FILE_PATH) } pub fn has_audio_file() -> bool { diff --git a/tests/tests_p_openai.rs b/tests/tests_p_openai.rs index 825091eb..4c311fa9 100644 --- a/tests/tests_p_openai.rs +++ b/tests/tests_p_openai.rs @@ -127,6 +127,11 @@ async fn test_chat_binary_image_b64_ok() -> TestResult<()> { common_tests::common_test_chat_image_b64_ok(MODEL_LATEST).await } +#[tokio::test] +async fn test_chat_binary_image_file_ok() -> TestResult<()> { + common_tests::common_test_chat_image_file_ok(MODEL_LATEST).await +} + #[tokio::test] async fn test_chat_binary_audio_b64_ok() -> TestResult<()> { common_tests::common_test_chat_audio_b64_ok(AUDIO_MODEL).await From 90255707d5970e108737fb048446da24a19988af Mon Sep 17 00:00:00 2001 From: Jeremy Chone Date: Wed, 26 Nov 2025 18:25:44 -0800 Subject: [PATCH 038/123] . c07-image - update with two images --- .gitignore | 1 + examples/c07-image.rs | 50 +++++++++++++++++++++++++++------------ tests/data/other-one.png | Bin 0 -> 6527 bytes 3 files changed, 36 insertions(+), 15 deletions(-) create mode 100644 tests/data/other-one.png diff --git a/.gitignore b/.gitignore index b656c7da..0ea91b0b 100644 --- a/.gitignore +++ b/.gitignore @@ -66,6 +66,7 @@ out/ # -- Test data (one by one) !tests/data/duck-small.jpg +!tests/data/other-one.png !tests/data/small.pdf # -- Nodejs diff --git a/examples/c07-image.rs b/examples/c07-image.rs index b2da4a29..cfd39d68 100644 --- a/examples/c07-image.rs +++ b/examples/c07-image.rs @@ -1,12 +1,12 @@ //! This example demonstrates how to properly attach image to the conversations use genai::Client; -use genai::chat::printer::print_chat_stream; use genai::chat::{ChatMessage, ChatRequest, ContentPart}; use tracing_subscriber::EnvFilter; -const MODEL: &str = "gpt-4o-mini"; +const MODEL: &str = "gpt-5.1-codex"; const IMAGE_URL: &str = "https://aipack.ai/images/test-duck.jpg"; +const IMAGE_OTHER_ONE_PATH: &str = "tests/data/other-one.png"; #[tokio::main] async fn main() -> Result<(), Box> { @@ -17,20 +17,40 @@ async fn main() -> Result<(), Box> { let client = Client::default(); - let question = "What is in this picture?"; - let mut chat_req = ChatRequest::default().with_system("Answer in one sentence"); - // This is similar to sending initial system chat messages (which will be cumulative with system chat messages) - chat_req = chat_req.append_message(ChatMessage::user(vec![ - ContentPart::from_text(question), - ContentPart::from_binary_url("image/jpg", IMAGE_URL, None), - ])); - - println!("\n--- Question:\n{question}"); - let chat_res = client.exec_chat_stream(MODEL, chat_req.clone(), None).await?; - - println!("\n--- Answer: (streaming)"); - let _assistant_answer = print_chat_stream(chat_res, None).await?; + chat_req = chat_req + .append_message(ChatMessage::user(vec![ContentPart::from_binary_url( + "image/jpg", + IMAGE_URL, + None, + )])) + .append_message(ChatMessage::user(vec![ + ContentPart::from_text("here is the file: 'other-one.png'"), // this is the most model portable way to provide image name/info + ContentPart::from_binary_file(IMAGE_OTHER_ONE_PATH)?, + ])); + + let questions = [ + "What is the first image about? and what is the file name for this image if you have it?", + "What is the second image about? and what is the file name for this image if you have it?", + ]; + + for question in questions { + println!("\nQuestion: {question}"); + + let chat_req = chat_req.clone().append_message(ChatMessage::user(question)); + let chat_res = client.exec_chat(MODEL, chat_req, None).await?; + + let usage = chat_res.usage; + let response_content = chat_res.content.joined_texts().ok_or("Should have response")?; + + println!("\nAnswer: {response_content}"); + println!( + "prompt: {:?} tokens | completion: {:?} tokens", + usage.prompt_tokens, usage.completion_tokens + ); + + println!(); + } Ok(()) } diff --git a/tests/data/other-one.png b/tests/data/other-one.png new file mode 100644 index 0000000000000000000000000000000000000000..6e281aa41e5a313414a3ceb4cec7681822e40435 GIT binary patch literal 6527 zcmd5>c|4SB`&T*EgF&)029YwuAdD?$gzO?jWEo7B88d^Ctwg59GTAy=$CfN1qzMs` z?9>SvTe6H35s^^8htBDo^E>bRetv)a{&@d*=JVY5_qxB=b=}wU+|M)DtSpSUI7B#@ zn3%Xsj8QgBO#9Y=`=@OCf!~M3UqhIfexhP*9f%I*W(W_gKh(_=>yCz!{BZ!z#H6i9 z!nt|)qKRO4v^OR|2lAo52?EA=>Okz(%$3Y>NVE^e_#z&C`l5xc$3M8bVbAp{lB=q=8UU`XOzO^~89E{wrxUC4|~P zq=9_$bR)X`U*vy82(c^u-6|lL|FcPdE<$4cuz0{azzdZ$)(hhYU<4Z@ z1F(r9^0>0*@#BhcsPdldZsZVF7!umg0fhm=P5`n}2cq`7-+!Wxe?gTszw6mU_q3k| z$`MfUopLw22orxdZ?rwelj!p&+n<0LI>37u`GaW>*^QE4&~91S1z>=* zas(##D)Ia1j}_Xm|3#?1m#+w83;|FP`k!>{v4O#R*@|%U*mY0`g2Q7ygFMildp-j8 zen$ydFJg!r9UB&rTX-^&XFY(*$o1*-IWyncv=_@Durzvswb6A|{Wz4(XK-`)I6 zPXg9`p96Xw&@^E`x+XCA(Ra}SK!W0du6iMX=_Am+qcA|G{UgJenB*xzl)j@V>x0kr zSSV80A&zD(ojz9lqUZvuOuTiZ|9C^FO8#v8P?+Vd@&|S)!=Gy>VdqQBQXw`EyiEOW zXRmq{orKQ?FKyC4G|mw}&?c`H*^QO3UCOTNG1hpOk(IGw*Z8(D)eB{B+a;8J=T2@x z#Q5picBdw%(luICkLb`bfzFcHINR2m*yk~FXZ%}d#HS9c6+VmYi>WuK6fMyJIu#rS!B3`(PZM9wKd=)zrOPps2)Z!#JWNXiQd7X8yCy5^7ZHZY{?v;joKd_m`Ja1 z5smKjejfTEro_J^K(lvL^W69fX6=*l7+BaekZAY4`SYh=}Dcsa-* z=)Y%j-(-OiVjnMs>ag_LonIo1>v^y792f#m==!i>>Ya2jN~rcRM3yVHYpVW8N7iWs zh?l}E7V&PW@#YyxV4i7R;^~C$IFp; zpI8;rp03~PNbYJeOIwwZUVKbs>olWovvH(w>#x6igx`?Ke)l%RxGANYKM^Kdvi5-^LPQA&Rvez{g_X(MjS zto{B0uOcZ3JpM{&gwcN8%v*{GP%?yV>_C>Jkfa}Bl; zVKhK!IS`Li6o7Fxd}0%UK%Dv;=oLkIW4U2t6~vrK-K#GA@oXrRCwos<;})s4x7h65 z%89vPGP$gPK!Xim>T5cVZ&p7UXvqV4!0O+%vwWmM?aibz?3`ho)T3;-dR{S z%lr9b;O#W`?G7!z{-~=ft$e(~rhWJi>H_sVcho-5$2yX(0%2$4>eBHNZC5-Va=lj? z_a&Nk8pNx^HrBcGEVSWE6{DJTYXBn@@YyVfOqLI;QP{XCI&ygd4{-offE=@s(CgYandqXEz-nJN$8kU`7)Y*?5F*g0S zlX`XSH(WihSj;4tnRVg|GMg?hbbVPVCXRYuz6ELBAP?z+cvr67g{SMzJPGBw+E+z9 znZpw$9-ZSQbu6cFm5`m> zbGBj1;h9KXcJVk4?Tn2A){3Z`GtxohNr@fK!otHl+-^{q2!o62)zU=4JJevc zw!Zc^^+1La#jluh_z^_vSXItUl^+_^B&XoNh#l^0uL*sGvqoIk4{#9>QsGLpH|T@< z4Dsn-9{gpgdpz!rt6xi}|DaUlkT*ObCz)HnfGjyZz&Mvua+#NZ4YRKIrjaKrEZ3In zXHoBMcM0j?M|7PRq>xM^S?>{^3mefnR`X&64zGOY`^rCdp+`CD2%H#obzx*xNJB)n zbQ;@O71Bh!qgNG33z1CZOfNugkGv7TEqYWu+8A|8K{x9qgyOG?NpHf(-`}|F`i$x^Xq~cx{#oK$dl}60MswQC zco<)=LU^p#h`IwG&_O!(cfAHHh^Wk(&RJOw$~KI1#*a>u^)ocQvvxXl)>*sHDvCo? zKHRihy2wPK^ky1msP{kS?9EJ1$g7N>$g?>Rj<9>z@}Q@4r&Fx~v({VewJekWoWua3 z4sXI)tOp!Qw)N9EY-|R+g)ZnO=AV&l87-r4^;Pzq{rHn;<;gWF8>;9w4YXKa@=;Z& zez;c9vbOzc{+V9PUA(37vv=r0sRwTxuRXfI1$V`#9U=73L_mc)5>9*bjw=4A3qsQ;zk&Rld%HTL6xlB~ME6@KM zxm)cb$3^a2+-^}4u!SbFj-HcJs!pfQG|NTv6mE*5iMA(1VQRW)4u3wJsUDC3iA<1P zxoCs9+RK@~u_ z!Zf;iR?E;Wg7=+iuBL5kLJZJsMkNZdZ$O8c)l?2A#Qy|0(;4xU#YmeeV&u&B#WR4a zv4iaC7L!VboWi;VXJ?i+Jht5#ptR*pr59(MSZ-AxR6Lf|>~?+LWhqc^YR+R&sthj8 zVt;yfm#kwy4zi22$H1J+@gW^6* zgX}SxmMxe0fh1azml4zzmlrp1b^A%_UBv2D`$BqFc>U}OVUV4?RJ2GSJx(LCna9t^ z^>5E|Rh**b;Ft4wmwHeOc-ry^?j1L!mzs6fGS`&#w6|3u-4gL=GIM+61&c)arQx4C zGK>&$Cokw+R$J|H82xGlc#xbPrnOm}tn=z#!lxU{=?#OLHC7)?oTNU`mh zfuu}8HT;vOo!|E0E5fsc{#@?Y-PN;0V7tyn2Ym)Jepi7-cJWQ~8`es;h&X?>YwabP zY3G{H)4B*Bz2!3Q@x?xpmG8dW=si}=RpovN@3;|$=sM?8rNm%HZB)E%9hSIq@>2m} z*ysck%Ubi`FC7@hh<94Owp>*3gTg@`w=simksJG?C<%EDB8hGx$F6=dw90K$IeL1D zTlb>k!epOC@C<(H*YwIL&NsD533(++&O++U6XmKO_8r(LIz`iZIQj9Nfy+49;JBTJ z*o~s|ZO0D6AmWC~=6Ne!UTgF2>+b7ez;20$U1+J2%v>ADW9NI@zbL>EyTI0ys5xB~ ztb0mNMiiBbhw&!R6P%ZJa3+B1JWy8`Gm&dC8Vx%SQaKG?*%ZmNbGHvHk~R&Kv)^XO zmZXGPfwS*Pi5nQ)KmdDq^P@3yQFwH6{ql=Ad23rld~BYt+lZOimqR)eR&%;8cAerU zj?R^r1CiA3%2MUOBPrQ%C4aQahvR}SG^^cA|1zmiG59$ofc>nRfOcb~5l8A)b!`%o zs&S^+jyIOmI1UU)o@aVf+q|JX{DedNtQ7bJB8l44s#Gi(xwEhOIdvg_5VK@P#cHcc zF4*bQB^Y9dE;!mF_EAQ%%dY>FtQ5%n+W+Z-xkJ6MLM)cf={T`C$=pr`9GxZH%-j477;rxO(e{2RqYR2=M`p z)2mbOei=2)!GTy4ga|}Q5U#W{9|95p&V1T@)EOi`<>*G8lRG`#_*F#Hd;1Kjk{Ti# z1Wg;0cNCMok&oFK#k%xe+JI;aPOY>|#{x&ud&ZVVZ+h%S1_vq9tkL^<#codKI#tl6 z*m&x9I&&;zwQe5Eb*8bhx_=g(S2aV#{eow4>-Y^QeLp!RCPNOYu@pmWJ@;+PM{X`_ zH3LMR|t4oP&FJop~S;c`yNLjKy7Qz%?N)1#48m>^k@Mih2 zcsg0kd;rh7rcHvKV;*x!;?_rT=LXR1$|sz zY|}r7r>aAFDTXP~jmZ3gXdZF(d=eYE{g1<|c~VQx|;CM&jEN zPu+s+ab89%OT?-J^DK7~_X<-NqaG=tU`Bb1U3GyIK z2MCB!MXrYj1LXJ)?{?C){60HB<=k)){T#HLk@z*MkU+0%kRop~SE#1;>Z1}oGw`gF z!5$>RV?yLi{XuK^>H8WFE!(PV3kV>}>5+SM>_R`=gc+(TdQfxeex%*|+wF2+_$V@4 zsals71S$!L68ogyK)ErO6Yqs$W|9HQUau~|l``IAVOWQM zRo4Im^^$|2ho8{*JsR^FV%nN>4-Yw5gUcCx4gqPg{PxvoH6owfkwBE+$cWxQuwy9Z z81prYH+zCk1KvgH`-asHn3rT|@$y=)&j&BUTmydo5G_^Vu?2H@BGC2lZO0Lhlfg8r zE$f<+xU?W%>z3Qf*T`mhPPB=X)_6p!)m^@mL52p9GvPO8oT^s!MuZhKK=Gydb~@Vj zqOqJp;wj_~v5RS2Q8edb2Nr>@$=3A1&MU5bf$cY53PwhS27cM!OA6FEKqKUlG_DJv z29yqRBCDe7ToR2SmQY5bJ}Si!+60ug{_VcFSHw|0xNhlvXJk7U1C;VhgA`kb6!1EH vZN>ssAQf?@3&cAR^ Date: Thu, 27 Nov 2025 08:42:02 -0800 Subject: [PATCH 039/123] . openai - fix model names for openai and openaiResp adapters --- src/adapter/adapters/openai/adapter_impl.rs | 2 -- src/adapter/adapters/openai_resp/adapter_impl.rs | 3 +++ 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/src/adapter/adapters/openai/adapter_impl.rs b/src/adapter/adapters/openai/adapter_impl.rs index a154b725..0d0cd6f5 100644 --- a/src/adapter/adapters/openai/adapter_impl.rs +++ b/src/adapter/adapters/openai/adapter_impl.rs @@ -24,8 +24,6 @@ pub struct OpenAIAdapter; const MODELS: &[&str] = &[ // "gpt-5.1", - "gpt-5.1-codex", - "gpt-5.1-codex-mini", "gpt-5", "gpt-5-mini", "gpt-5-nano", diff --git a/src/adapter/adapters/openai_resp/adapter_impl.rs b/src/adapter/adapters/openai_resp/adapter_impl.rs index 206be489..37f7badc 100644 --- a/src/adapter/adapters/openai_resp/adapter_impl.rs +++ b/src/adapter/adapters/openai_resp/adapter_impl.rs @@ -20,7 +20,10 @@ pub struct OpenAIRespAdapter; // Latest models const MODELS: &[&str] = &[ // + "gpt-5-pro", "gpt-5-codex", + "gpt-5.1-codex", + "gpt-5.1-codex-mini", ]; impl OpenAIRespAdapter { From f24745be75c0fdb70c24af9ba40ca845e3e5aee1 Mon Sep 17 00:00:00 2001 From: Jeremy Chone Date: Thu, 27 Nov 2025 08:57:17 -0800 Subject: [PATCH 040/123] . c07-image - updae to use local images only (no url, because some model does not support) --- examples/c07-image.rs | 24 +++++++++++++++++------- 1 file changed, 17 insertions(+), 7 deletions(-) diff --git a/examples/c07-image.rs b/examples/c07-image.rs index cfd39d68..2520c425 100644 --- a/examples/c07-image.rs +++ b/examples/c07-image.rs @@ -4,8 +4,12 @@ use genai::Client; use genai::chat::{ChatMessage, ChatRequest, ContentPart}; use tracing_subscriber::EnvFilter; -const MODEL: &str = "gpt-5.1-codex"; -const IMAGE_URL: &str = "https://aipack.ai/images/test-duck.jpg"; +const MODEL: &str = "gpt-5.1"; +// const MODEL: &str = "claude-sonnet-4-5"; + +// const IMAGE_URL: &str = "https://aipack.ai/images/test-duck.jpg"; + +const IMAGE_SOME_PATH: &str = "tests/data/duck-small.jpg"; const IMAGE_OTHER_ONE_PATH: &str = "tests/data/other-one.png"; #[tokio::main] @@ -19,11 +23,10 @@ async fn main() -> Result<(), Box> { let mut chat_req = ChatRequest::default().with_system("Answer in one sentence"); chat_req = chat_req - .append_message(ChatMessage::user(vec![ContentPart::from_binary_url( - "image/jpg", - IMAGE_URL, - None, - )])) + .append_message(ChatMessage::user(vec![ + ContentPart::from_text("here is the file: 'some-image.jpg'"), // To test when name is different, should take precedence + ContentPart::from_binary_file(IMAGE_SOME_PATH)?, + ])) .append_message(ChatMessage::user(vec![ ContentPart::from_text("here is the file: 'other-one.png'"), // this is the most model portable way to provide image name/info ContentPart::from_binary_file(IMAGE_OTHER_ONE_PATH)?, @@ -52,5 +55,12 @@ async fn main() -> Result<(), Box> { println!(); } + // NOTE: For web url image, we can `from_binary_url` but not supported by all models (e.g., Anthropic does not support those) + // ContentPart::from_binary_url( + // "image/jpg", + // IMAGE_URL, + // None, + // ) + Ok(()) } From 7cc05065839ad4fec3a55d7a9f6a513bf9d45ac9 Mon Sep 17 00:00:00 2001 From: Jeremy Chone Date: Thu, 27 Nov 2025 17:39:06 -0800 Subject: [PATCH 041/123] . test - test audio, minor change --- tests/support/common_tests.rs | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/tests/support/common_tests.rs b/tests/support/common_tests.rs index c8ea4eb1..f5d2cf67 100644 --- a/tests/support/common_tests.rs +++ b/tests/support/common_tests.rs @@ -1,6 +1,7 @@ use crate::get_option_value; use crate::support::data::{ - IMAGE_URL_JPG_DUCK, TEST_IMAGE_FILE_PATH, get_b64_audio, get_b64_duck, get_b64_pdf, has_audio_file, + AUDIO_TEST_FILE_PATH, IMAGE_URL_JPG_DUCK, TEST_IMAGE_FILE_PATH, get_b64_audio, get_b64_duck, get_b64_pdf, + has_audio_file, }; use crate::support::{ Check, StreamExtract, TestResult, assert_contains, assert_reasoning_content, assert_reasoning_usage, @@ -769,12 +770,11 @@ pub async fn common_test_chat_audio_b64_ok(model: &str) -> TestResult<()> { // -- Build & Exec let mut chat_req = ChatRequest::default().with_system("Transcribe the audio"); - // This is similar to sending initial system chat messages (which will be cumulative with system chat messages) - chat_req = chat_req.append_message(ChatMessage::user(vec![ContentPart::from_binary_base64( - "audio/wav", - get_b64_audio()?, - None, - )])); + let cp_audio = ContentPart::from_binary_file(AUDIO_TEST_FILE_PATH)?; + // similar as the from_binary_file but manual + // let cp_audio = ContentPart::from_binary_base64("audio/wav", get_b64_audio()?, None); + + chat_req = chat_req.append_message(ChatMessage::user(vec![cp_audio])); let chat_res = client.exec_chat(model, chat_req, None).await?; From 65fc2802016c13212108e1268213fe2544b65c88 Mon Sep 17 00:00:00 2001 From: Jeremy Chone Date: Thu, 27 Nov 2025 17:51:57 -0800 Subject: [PATCH 042/123] ^ MessageContent - add .binaries() and .into_binaries() --- src/chat/message_content.rs | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/src/chat/message_content.rs b/src/chat/message_content.rs index 1a1d66d0..7941ee4a 100644 --- a/src/chat/message_content.rs +++ b/src/chat/message_content.rs @@ -1,5 +1,5 @@ /// Note: MessageContent is used for ChatRequest and ChatResponse. -use crate::chat::{ContentPart, ToolCall, ToolResponse}; +use crate::chat::{Binary, ContentPart, ToolCall, ToolResponse}; use serde::{Deserialize, Serialize}; /// Message content container used in ChatRequest and ChatResponse. @@ -149,6 +149,14 @@ impl MessageContent { self.parts.into_iter().filter_map(|p| p.into_text()).collect() } + pub fn binaries(&self) -> Vec<&Binary> { + self.parts.iter().filter_map(|p| p.as_binary()).collect() + } + + pub fn into_binaries(self) -> Vec { + self.parts.into_iter().filter_map(|p| p.into_binary()).collect() + } + /// Return references to all ToolCall parts. pub fn tool_calls(&self) -> Vec<&ToolCall> { self.parts From c92e722a439b8591671316d45075eeb3200c5601 Mon Sep 17 00:00:00 2001 From: Jeremy Chone Date: Thu, 27 Nov 2025 18:02:46 -0800 Subject: [PATCH 043/123] ^ .size - implement .size in ContentPart and MessageContent --- src/chat/binary.rs | 21 +++++++++++++++++++++ src/chat/chat_message.rs | 10 ++++++++++ src/chat/content_part.rs | 19 +++++++++++++++++++ src/chat/message_content.rs | 9 +++++++++ src/chat/tool/tool_base.rs | 25 +++++++++++++++++++++++++ src/chat/tool/tool_call.rs | 21 +++++++++++++++++++++ src/chat/tool/tool_response.rs | 11 +++++++++++ 7 files changed, 116 insertions(+) diff --git a/src/chat/binary.rs b/src/chat/binary.rs index ecd4c86a..3f7c0255 100644 --- a/src/chat/binary.rs +++ b/src/chat/binary.rs @@ -120,6 +120,27 @@ impl Binary { } } +/// Computed assessors +impl Binary { + /// Returns an approximate in-memory size of this `Binary`, in bytes, + /// computed as the sum of the UTF-8 lengths of: + /// - `content_type` + /// - `name` (if any) + /// - the underlying URL or base64 string in `source`. + /// + /// This does **not** return the decoded byte length of the file. + /// This does **not** return the size of the URL content + pub fn size(&self) -> usize { + let mut size = self.content_type.len(); + size += self.name.as_ref().map(|n| n.len()).unwrap_or_default(); + size += match &self.source { + BinarySource::Url(url) => url.len(), + BinarySource::Base64(data) => data.len(), + }; + size + } +} + // region: --- BinarySource /// Origin of a binary payload. diff --git a/src/chat/chat_message.rs b/src/chat/chat_message.rs index c4b2d345..bb2a2be4 100644 --- a/src/chat/chat_message.rs +++ b/src/chat/chat_message.rs @@ -51,6 +51,16 @@ impl ChatMessage { } } +/// Computed accessors +impl ChatMessage { + /// Returns an approximate in-memory size of this `ChatMessage`, in bytes, + /// computed as the size of the content plus. + pub fn size(&self) -> usize { + // Note: Do not include the role len + self.content.size() + } +} + impl ChatMessage { /// Attaches options to this message. pub fn with_options(mut self, options: impl Into) -> Self { diff --git a/src/chat/content_part.rs b/src/chat/content_part.rs index ff512c75..6a444415 100644 --- a/src/chat/content_part.rs +++ b/src/chat/content_part.rs @@ -163,6 +163,25 @@ impl ContentPart { } } +/// Computed accessors +impl ContentPart { + /// Returns an approximate in-memory size of this `ContentPart`, in bytes. + /// + /// - For `Text` and `ThoughtSignature`: the UTF-8 length of the string. + /// - For `Binary`: delegates to `Binary::size()`. + /// - For `ToolCall`: delegates to `ToolCall::size()`. + /// - For `ToolResponse`: delegates to `ToolResponse::size()`. + pub fn size(&self) -> usize { + match self { + ContentPart::Text(text) => text.len(), + ContentPart::Binary(binary) => binary.size(), + ContentPart::ToolCall(tool_call) => tool_call.size(), + ContentPart::ToolResponse(tool_response) => tool_response.size(), + ContentPart::ThoughtSignature(thought) => thought.len(), + } + } +} + /// is_.. Accessors impl ContentPart { #[allow(unused)] diff --git a/src/chat/message_content.rs b/src/chat/message_content.rs index 7941ee4a..4c2e2671 100644 --- a/src/chat/message_content.rs +++ b/src/chat/message_content.rs @@ -86,6 +86,15 @@ impl Extend for MessageContent { } } +/// Computed accessors +impl MessageContent { + /// Returns an approximate in-memory size of this `MessageContent`, in bytes, + /// computed as the sum of the sizes of all parts. + pub fn size(&self) -> usize { + self.parts.iter().map(|p| p.size()).sum() + } +} + // region: --- Iterator Support use crate::support; diff --git a/src/chat/tool/tool_base.rs b/src/chat/tool/tool_base.rs index 1e8b1ef8..5a74b200 100644 --- a/src/chat/tool/tool_base.rs +++ b/src/chat/tool/tool_base.rs @@ -42,6 +42,31 @@ pub struct Tool { pub config: Option, } +/// Computed accessors +impl Tool { + /// Returns an approximate in-memory size of this `Tool`, in bytes, + /// computed as the sum of the UTF-8 lengths of: + /// - `name` + /// - `description` (if any) + /// - JSON-serialized `schema` (if any) + /// - JSON-serialized `config` (if any) + pub fn size(&self) -> usize { + let mut size = self.name.len(); + size += self.description.as_ref().map(|d| d.len()).unwrap_or_default(); + size += self + .schema + .as_ref() + .map(|s| serde_json::to_string(s).map(|j| j.len()).unwrap_or_default()) + .unwrap_or_default(); + size += self + .config + .as_ref() + .map(|c| serde_json::to_string(c).map(|j| j.len()).unwrap_or_default()) + .unwrap_or_default(); + size + } +} + /// Constructor impl Tool { /// Create a new tool with the given name. diff --git a/src/chat/tool/tool_call.rs b/src/chat/tool/tool_call.rs index 49bd8c10..76bfaf44 100644 --- a/src/chat/tool/tool_call.rs +++ b/src/chat/tool/tool_call.rs @@ -25,3 +25,24 @@ pub struct ToolCall { #[serde(skip_serializing_if = "Option::is_none")] pub thought_signatures: Option>, } + +/// Computed accessors +impl ToolCall { + /// Returns an approximate in-memory size of this `ToolCall`, in bytes, + /// computed as the sum of the UTF-8 lengths of: + /// - `call_id` + /// - `fn_name` + /// - JSON-serialized `fn_arguments` + /// - all `thought_signatures` strings (if any) + pub fn size(&self) -> usize { + let mut size = self.call_id.len(); + size += self.fn_name.len(); + size += serde_json::to_string(&self.fn_arguments).map(|j| j.len()).unwrap_or_default(); + size += self + .thought_signatures + .as_ref() + .map(|sigs| sigs.iter().map(|s| s.len()).sum::()) + .unwrap_or_default(); + size + } +} diff --git a/src/chat/tool/tool_response.rs b/src/chat/tool/tool_response.rs index 511e8953..288f292e 100644 --- a/src/chat/tool/tool_response.rs +++ b/src/chat/tool/tool_response.rs @@ -21,6 +21,17 @@ impl ToolResponse { } } +/// Computed accessors +impl ToolResponse { + /// Returns an approximate in-memory size of this `ToolResponse`, in bytes, + /// computed as the sum of the UTF-8 lengths of: + /// - `call_id` + /// - `content` + pub fn size(&self) -> usize { + self.call_id.len() + self.content.len() + } +} + /// Getters #[allow(unused)] impl ToolResponse { From 16b823dd02cd465db7adb0d3cec99d3c2a52b363 Mon Sep 17 00:00:00 2001 From: Jeremy Chone Date: Thu, 27 Nov 2025 19:29:53 -0800 Subject: [PATCH 044/123] . update to version 0.5.0-alpha.3 --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 946310ce..5ecfa86d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "genai" -version = "0.5.0-alpha.3-WIP" +version = "0.5.0-alpha.3" edition = "2024" license = "MIT OR Apache-2.0" description = "Multi-AI Providers Library for Rust. (OpenAI, Gemini, Anthropic, xAI, Ollama, Groq, DeepSeek, Grok)" From a47e3949b91a602ea493e597ce8f48b588ed8723 Mon Sep 17 00:00:00 2001 From: Jeremy Chone Date: Thu, 27 Nov 2025 19:30:28 -0800 Subject: [PATCH 045/123] . v0.5.0-alpha.4-WIP --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 5ecfa86d..5b9f7747 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "genai" -version = "0.5.0-alpha.3" +version = "0.5.0-alpha.4-WIP" edition = "2024" license = "MIT OR Apache-2.0" description = "Multi-AI Providers Library for Rust. (OpenAI, Gemini, Anthropic, xAI, Ollama, Groq, DeepSeek, Grok)" From 8d52992d87ca14c89a4922dbddf661e0ce06b5cb Mon Sep 17 00:00:00 2001 From: Jeremy Chone Date: Sun, 30 Nov 2025 19:11:00 -0800 Subject: [PATCH 046/123] + bigmodel - add back bigmodel.cn and BigModel adapter (only via namespace) --- src/adapter/adapter_kind.rs | 10 +++ src/adapter/adapters/bigmodel/adapter_impl.rs | 88 +++++++++++++++++++ src/adapter/adapters/bigmodel/mod.rs | 14 +++ src/adapter/adapters/mod.rs | 1 + src/adapter/dispatcher.rs | 10 +++ 5 files changed, 123 insertions(+) create mode 100644 src/adapter/adapters/bigmodel/adapter_impl.rs create mode 100644 src/adapter/adapters/bigmodel/mod.rs diff --git a/src/adapter/adapter_kind.rs b/src/adapter/adapter_kind.rs index 7ff9dcc6..4fd81d4f 100644 --- a/src/adapter/adapter_kind.rs +++ b/src/adapter/adapter_kind.rs @@ -1,6 +1,7 @@ use crate::adapter::adapters::together::TogetherAdapter; use crate::adapter::adapters::zai::ZaiAdapter; use crate::adapter::anthropic::AnthropicAdapter; +use crate::adapter::bigmodel::BigModelAdapter; use crate::adapter::cohere::CohereAdapter; use crate::adapter::deepseek::{self, DeepSeekAdapter}; use crate::adapter::fireworks::FireworksAdapter; @@ -41,6 +42,8 @@ pub enum AdapterKind { DeepSeek, /// For ZAI (Mostly use OpenAI) Zai, + /// For big model (only accessible via namespace bigmodel::) + BigModel, /// Cohere today use it's own native protocol but might move to OpenAI Adapter Cohere, /// OpenAI shared behavior + some custom. (currently, localhost only, can be customize with ServerTargetResolver). @@ -63,6 +66,7 @@ impl AdapterKind { AdapterKind::Xai => "xAi", AdapterKind::DeepSeek => "DeepSeek", AdapterKind::Zai => "Zai", + AdapterKind::BigModel => "BigModel", AdapterKind::Cohere => "Cohere", AdapterKind::Ollama => "Ollama", } @@ -82,6 +86,7 @@ impl AdapterKind { AdapterKind::Xai => "xai", AdapterKind::DeepSeek => "deepseek", AdapterKind::Zai => "zai", + AdapterKind::BigModel => "BigModel", AdapterKind::Cohere => "cohere", AdapterKind::Ollama => "ollama", } @@ -100,6 +105,7 @@ impl AdapterKind { "xai" => Some(AdapterKind::Xai), "deepseek" => Some(AdapterKind::DeepSeek), "zai" => Some(AdapterKind::Zai), + "bigmodel" => Some(AdapterKind::BigModel), "cohere" => Some(AdapterKind::Cohere), "ollama" => Some(AdapterKind::Ollama), _ => None, @@ -123,6 +129,7 @@ impl AdapterKind { AdapterKind::Xai => Some(XaiAdapter::API_KEY_DEFAULT_ENV_NAME), AdapterKind::DeepSeek => Some(DeepSeekAdapter::API_KEY_DEFAULT_ENV_NAME), AdapterKind::Zai => Some(ZaiAdapter::API_KEY_DEFAULT_ENV_NAME), + AdapterKind::BigModel => Some(BigModelAdapter::API_KEY_DEFAULT_ENV_NAME), AdapterKind::Cohere => Some(CohereAdapter::API_KEY_DEFAULT_ENV_NAME), AdapterKind::Ollama => None, } @@ -158,6 +165,9 @@ impl AdapterKind { pub fn from_model(model: &str) -> Result { // -- First check if namespaced if let (_, Some(ns)) = ModelName::model_name_and_namespace(model) { + // JC-NOTE-2025-11-30: We should not need a special way to handle zai for this, since it should match to zai already. + // Now, for 0.5.0, I am planning to change the logic to have `zai-coding::` namespace (so, adapters will have some namespace 'aliases') + // See reasoning here: https://github.com/jeremychone/rust-genai/pull/76#issuecomment-3594311524 // Special handling: "zai" namespace should route to ZAI for coding endpoint if ns == "zai" { return Ok(AdapterKind::Zai); diff --git a/src/adapter/adapters/bigmodel/adapter_impl.rs b/src/adapter/adapters/bigmodel/adapter_impl.rs new file mode 100644 index 00000000..5e347b78 --- /dev/null +++ b/src/adapter/adapters/bigmodel/adapter_impl.rs @@ -0,0 +1,88 @@ +use crate::ModelIden; +use crate::adapter::openai::OpenAIAdapter; +use crate::adapter::{Adapter, AdapterKind, ServiceType, WebRequestData}; +use crate::chat::{ChatOptionsSet, ChatRequest, ChatResponse, ChatStreamResponse}; +use crate::resolver::{AuthData, Endpoint}; +use crate::webc::WebResponse; +use crate::{Result, ServiceTarget}; +use reqwest::RequestBuilder; + +/// The BigModel adapter. Only available via namespace. +/// +pub struct BigModelAdapter; + +pub(in crate::adapter) const MODELS: &[&str] = &[]; + +impl BigModelAdapter { + pub const API_KEY_DEFAULT_ENV_NAME: &str = "BIGMODEL_API_KEY"; +} + +// The ZAI API is mostly compatible with the OpenAI API. +impl Adapter for BigModelAdapter { + fn default_endpoint() -> Endpoint { + const BASE_URL: &str = "https://open.bigmodel.cn/api/paas/v4/"; + Endpoint::from_static(BASE_URL) + } + + fn default_auth() -> AuthData { + AuthData::from_env(Self::API_KEY_DEFAULT_ENV_NAME) + } + + async fn all_model_names(_kind: AdapterKind) -> Result> { + Ok(MODELS.iter().map(|s| s.to_string()).collect()) + } + + fn get_service_url(_model: &ModelIden, service_type: ServiceType, endpoint: Endpoint) -> Result { + // For ZAI, we need to handle model-specific routing at this level + // because get_service_url is called with the modified endpoint from to_web_request_data + let base_url = endpoint.base_url(); + + let url = match service_type { + ServiceType::Chat | ServiceType::ChatStream => format!("{base_url}chat/completions"), + ServiceType::Embed => format!("{base_url}embeddings"), + }; + Ok(url) + } + + fn to_web_request_data( + target: ServiceTarget, + service_type: ServiceType, + chat_req: ChatRequest, + chat_options: ChatOptionsSet<'_, '_>, + ) -> Result { + // Parse model name and determine appropriate endpoint + OpenAIAdapter::util_to_web_request_data(target, service_type, chat_req, chat_options, None) + } + + fn to_chat_response( + model_iden: ModelIden, + web_response: WebResponse, + options_set: ChatOptionsSet<'_, '_>, + ) -> Result { + OpenAIAdapter::to_chat_response(model_iden, web_response, options_set) + } + + fn to_chat_stream( + model_iden: ModelIden, + reqwest_builder: RequestBuilder, + options_set: ChatOptionsSet<'_, '_>, + ) -> Result { + OpenAIAdapter::to_chat_stream(model_iden, reqwest_builder, options_set) + } + + fn to_embed_request_data( + service_target: crate::ServiceTarget, + embed_req: crate::embed::EmbedRequest, + options_set: crate::embed::EmbedOptionsSet<'_, '_>, + ) -> Result { + OpenAIAdapter::to_embed_request_data(service_target, embed_req, options_set) + } + + fn to_embed_response( + model_iden: crate::ModelIden, + web_response: crate::webc::WebResponse, + options_set: crate::embed::EmbedOptionsSet<'_, '_>, + ) -> Result { + OpenAIAdapter::to_embed_response(model_iden, web_response, options_set) + } +} diff --git a/src/adapter/adapters/bigmodel/mod.rs b/src/adapter/adapters/bigmodel/mod.rs new file mode 100644 index 00000000..63404fee --- /dev/null +++ b/src/adapter/adapters/bigmodel/mod.rs @@ -0,0 +1,14 @@ +//! Click the globe icon on the top-right corner of the page to switch language. +//! API Documentation: +//! Model Names: +//! Pricing: +//! +//! + +// region: --- Modules + +mod adapter_impl; + +pub use adapter_impl::*; + +// endregion: --- Modules diff --git a/src/adapter/adapters/mod.rs b/src/adapter/adapters/mod.rs index b6495189..41a79e83 100644 --- a/src/adapter/adapters/mod.rs +++ b/src/adapter/adapters/mod.rs @@ -1,6 +1,7 @@ mod support; pub(super) mod anthropic; +pub(super) mod bigmodel; pub(super) mod cohere; pub(super) mod deepseek; pub(super) mod fireworks; diff --git a/src/adapter/dispatcher.rs b/src/adapter/dispatcher.rs index f2fd064f..7feaa766 100644 --- a/src/adapter/dispatcher.rs +++ b/src/adapter/dispatcher.rs @@ -2,6 +2,7 @@ use super::groq::GroqAdapter; use crate::adapter::adapters::together::TogetherAdapter; use crate::adapter::adapters::zai::ZaiAdapter; use crate::adapter::anthropic::AnthropicAdapter; +use crate::adapter::bigmodel::BigModelAdapter; use crate::adapter::cohere::CohereAdapter; use crate::adapter::deepseek::DeepSeekAdapter; use crate::adapter::fireworks::FireworksAdapter; @@ -41,6 +42,7 @@ impl AdapterDispatcher { AdapterKind::Xai => XaiAdapter::default_endpoint(), AdapterKind::DeepSeek => DeepSeekAdapter::default_endpoint(), AdapterKind::Zai => ZaiAdapter::default_endpoint(), + AdapterKind::BigModel => BigModelAdapter::default_endpoint(), AdapterKind::Cohere => CohereAdapter::default_endpoint(), AdapterKind::Ollama => OllamaAdapter::default_endpoint(), } @@ -59,6 +61,7 @@ impl AdapterDispatcher { AdapterKind::Xai => XaiAdapter::default_auth(), AdapterKind::DeepSeek => DeepSeekAdapter::default_auth(), AdapterKind::Zai => ZaiAdapter::default_auth(), + AdapterKind::BigModel => BigModelAdapter::default_auth(), AdapterKind::Cohere => CohereAdapter::default_auth(), AdapterKind::Ollama => OllamaAdapter::default_auth(), } @@ -77,6 +80,7 @@ impl AdapterDispatcher { AdapterKind::Xai => XaiAdapter::all_model_names(kind).await, AdapterKind::DeepSeek => DeepSeekAdapter::all_model_names(kind).await, AdapterKind::Zai => ZaiAdapter::all_model_names(kind).await, + AdapterKind::BigModel => BigModelAdapter::all_model_names(kind).await, AdapterKind::Cohere => CohereAdapter::all_model_names(kind).await, AdapterKind::Ollama => OllamaAdapter::all_model_names(kind).await, } @@ -95,6 +99,7 @@ impl AdapterDispatcher { AdapterKind::Xai => XaiAdapter::get_service_url(model, service_type, endpoint), AdapterKind::DeepSeek => DeepSeekAdapter::get_service_url(model, service_type, endpoint), AdapterKind::Zai => ZaiAdapter::get_service_url(model, service_type, endpoint), + AdapterKind::BigModel => BigModelAdapter::get_service_url(model, service_type, endpoint), AdapterKind::Cohere => CohereAdapter::get_service_url(model, service_type, endpoint), AdapterKind::Ollama => OllamaAdapter::get_service_url(model, service_type, endpoint), } @@ -125,6 +130,7 @@ impl AdapterDispatcher { AdapterKind::Xai => XaiAdapter::to_web_request_data(target, service_type, chat_req, options_set), AdapterKind::DeepSeek => DeepSeekAdapter::to_web_request_data(target, service_type, chat_req, options_set), AdapterKind::Zai => ZaiAdapter::to_web_request_data(target, service_type, chat_req, options_set), + AdapterKind::BigModel => BigModelAdapter::to_web_request_data(target, service_type, chat_req, options_set), AdapterKind::Cohere => CohereAdapter::to_web_request_data(target, service_type, chat_req, options_set), AdapterKind::Ollama => OllamaAdapter::to_web_request_data(target, service_type, chat_req, options_set), } @@ -147,6 +153,7 @@ impl AdapterDispatcher { AdapterKind::Xai => XaiAdapter::to_chat_response(model_iden, web_response, options_set), AdapterKind::DeepSeek => DeepSeekAdapter::to_chat_response(model_iden, web_response, options_set), AdapterKind::Zai => ZaiAdapter::to_chat_response(model_iden, web_response, options_set), + AdapterKind::BigModel => BigModelAdapter::to_chat_response(model_iden, web_response, options_set), AdapterKind::Cohere => CohereAdapter::to_chat_response(model_iden, web_response, options_set), AdapterKind::Ollama => OllamaAdapter::to_chat_response(model_iden, web_response, options_set), } @@ -172,6 +179,7 @@ impl AdapterDispatcher { AdapterKind::Xai => XaiAdapter::to_chat_stream(model_iden, reqwest_builder, options_set), AdapterKind::DeepSeek => DeepSeekAdapter::to_chat_stream(model_iden, reqwest_builder, options_set), AdapterKind::Zai => ZaiAdapter::to_chat_stream(model_iden, reqwest_builder, options_set), + AdapterKind::BigModel => BigModelAdapter::to_chat_stream(model_iden, reqwest_builder, options_set), AdapterKind::Cohere => CohereAdapter::to_chat_stream(model_iden, reqwest_builder, options_set), AdapterKind::Ollama => OllamaAdapter::to_chat_stream(model_iden, reqwest_builder, options_set), } @@ -198,6 +206,7 @@ impl AdapterDispatcher { AdapterKind::Xai => XaiAdapter::to_embed_request_data(target, embed_req, options_set), AdapterKind::DeepSeek => DeepSeekAdapter::to_embed_request_data(target, embed_req, options_set), AdapterKind::Zai => ZaiAdapter::to_embed_request_data(target, embed_req, options_set), + AdapterKind::BigModel => BigModelAdapter::to_embed_request_data(target, embed_req, options_set), AdapterKind::Cohere => CohereAdapter::to_embed_request_data(target, embed_req, options_set), AdapterKind::Ollama => OllamaAdapter::to_embed_request_data(target, embed_req, options_set), } @@ -223,6 +232,7 @@ impl AdapterDispatcher { AdapterKind::Xai => XaiAdapter::to_embed_response(model_iden, web_response, options_set), AdapterKind::DeepSeek => DeepSeekAdapter::to_embed_response(model_iden, web_response, options_set), AdapterKind::Zai => ZaiAdapter::to_embed_response(model_iden, web_response, options_set), + AdapterKind::BigModel => BigModelAdapter::to_embed_response(model_iden, web_response, options_set), AdapterKind::Cohere => CohereAdapter::to_embed_response(model_iden, web_response, options_set), AdapterKind::Ollama => OllamaAdapter::to_embed_response(model_iden, web_response, options_set), } From 2f315aa186e212e2edbd23b2804fdd6205082309 Mon Sep 17 00:00:00 2001 From: Jeremy Chone Date: Tue, 2 Dec 2025 14:09:49 -0800 Subject: [PATCH 047/123] . tests - add test_chat_binary_image_file_ok to anthropic and gemini --- tests/tests_p_anthropic.rs | 5 +++++ tests/tests_p_gemini.rs | 5 +++++ 2 files changed, 10 insertions(+) diff --git a/tests/tests_p_anthropic.rs b/tests/tests_p_anthropic.rs index 087169a8..10a11d71 100644 --- a/tests/tests_p_anthropic.rs +++ b/tests/tests_p_anthropic.rs @@ -122,6 +122,11 @@ async fn test_chat_binary_pdf_b64_ok() -> TestResult<()> { common_tests::common_test_chat_pdf_b64_ok(MODEL).await } +#[tokio::test] +async fn test_chat_binary_image_file_ok() -> TestResult<()> { + common_tests::common_test_chat_image_file_ok(MODEL).await +} + #[tokio::test] async fn test_chat_binary_multi_b64_ok() -> TestResult<()> { common_tests::common_test_chat_multi_binary_b64_ok(MODEL).await diff --git a/tests/tests_p_gemini.rs b/tests/tests_p_gemini.rs index 57381c90..a4228841 100644 --- a/tests/tests_p_gemini.rs +++ b/tests/tests_p_gemini.rs @@ -103,6 +103,11 @@ async fn test_chat_binary_pdf_b64_ok() -> TestResult<()> { common_tests::common_test_chat_pdf_b64_ok(MODEL).await } +#[tokio::test] +async fn test_chat_binary_image_file_ok() -> TestResult<()> { + common_tests::common_test_chat_image_file_ok(MODEL).await +} + #[tokio::test] async fn test_chat_binary_multi_b64_ok() -> TestResult<()> { common_tests::common_test_chat_multi_binary_b64_ok(MODEL).await From 0f6c13390b58961608fbda77e8a87285ec08324b Mon Sep 17 00:00:00 2001 From: Himmelschmidt <46351743+Himmelschmidt@users.noreply.github.com> Date: Fri, 5 Dec 2025 15:35:41 -0500 Subject: [PATCH 048/123] Capture response body in ResponseFailedNotJson error (#103) Fixes #102 --- src/chat/mod.rs | 4 ++-- src/webc/error.rs | 4 ++-- src/webc/web_client.rs | 2 ++ 3 files changed, 6 insertions(+), 4 deletions(-) diff --git a/src/chat/mod.rs b/src/chat/mod.rs index 12ca5dca..f78457ed 100644 --- a/src/chat/mod.rs +++ b/src/chat/mod.rs @@ -3,26 +3,26 @@ // region: --- Modules +mod binary; mod chat_message; mod chat_options; mod chat_req_response_format; mod chat_request; mod chat_response; mod chat_stream; -mod binary; mod content_part; mod message_content; mod tool; mod usage; // -- Flatten +pub use binary::*; pub use chat_message::*; pub use chat_options::*; pub use chat_req_response_format::*; pub use chat_request::*; pub use chat_response::*; pub use chat_stream::*; -pub use binary::*; pub use content_part::*; pub use message_content::*; pub use tool::*; diff --git a/src/webc/error.rs b/src/webc/error.rs index 44ccb7a0..24a0a6cd 100644 --- a/src/webc/error.rs +++ b/src/webc/error.rs @@ -8,8 +8,8 @@ pub type Result = core::result::Result; #[allow(missing_docs)] #[derive(Debug, From, Display)] pub enum Error { - #[display("Response content type '{content_type}' is not JSON as expected.")] - ResponseFailedNotJson { content_type: String }, + #[display("Response content type '{content_type}' is not JSON as expected. Response body:\n{body}")] + ResponseFailedNotJson { content_type: String, body: String }, #[display("Request failed with status code '{status}'. Response body:\n{body}")] ResponseFailedStatus { diff --git a/src/webc/web_client.rs b/src/webc/web_client.rs index 1812a654..34b8010a 100644 --- a/src/webc/web_client.rs +++ b/src/webc/web_client.rs @@ -108,8 +108,10 @@ impl WebResponse { let body = if ct.starts_with("application/json") { res.json::().await? } else { + let body = res.text().await?; return Err(Error::ResponseFailedNotJson { content_type: ct.to_string(), + body, }); }; From 5e885627c49cf08b68f7e6a1c8e9882ed7763ed4 Mon Sep 17 00:00:00 2001 From: Jeremy Chone Date: Fri, 5 Dec 2025 12:41:12 -0800 Subject: [PATCH 049/123] ^ MessageContent - Add from ContentPart and Binary --- src/chat/message_content.rs | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/src/chat/message_content.rs b/src/chat/message_content.rs index 4c2e2671..b496ec7d 100644 --- a/src/chat/message_content.rs +++ b/src/chat/message_content.rs @@ -353,6 +353,20 @@ impl From for MessageContent { } } +impl From for MessageContent { + fn from(part: ContentPart) -> Self { + Self { parts: vec![part] } + } +} + +impl From for MessageContent { + fn from(bin: Binary) -> Self { + Self { + parts: vec![bin.into()], + } + } +} + impl From> for MessageContent { fn from(parts: Vec) -> Self { Self { parts } From 4c270a9a63c253bfe1933830361de63b29e552ed Mon Sep 17 00:00:00 2001 From: Jeremy Chone Date: Fri, 5 Dec 2025 14:37:32 -0800 Subject: [PATCH 050/123] ~ zai - change namespace strategy with (zai:: for default, and zai-codding:: for subscription, same Adapter) --- examples/c07-zai.rs | 5 ++- src/adapter/adapter_kind.rs | 48 +++++++++++++++--------- src/adapter/adapters/zai/adapter_impl.rs | 4 +- src/adapter/adapters/zai/mod.rs | 30 ++++----------- src/common/model_name.rs | 16 +++++++- 5 files changed, 59 insertions(+), 44 deletions(-) diff --git a/examples/c07-zai.rs b/examples/c07-zai.rs index e8f7b13b..3228bb1f 100644 --- a/examples/c07-zai.rs +++ b/examples/c07-zai.rs @@ -12,7 +12,10 @@ async fn main() -> Result<(), Box> { let client = Client::builder().build(); // Test cases demonstrating automatic endpoint routing - let test_cases = vec![("glm-4.6", "Regular ZAI model"), ("zai::glm-4.6", "Coding subscription model")]; + let test_cases = vec![ + ("glm-4.6", "Regular ZAI model"), + ("zai-coding::glm-4.6", "Coding subscription model"), + ]; for (model_name, description) in test_cases { println!("\n=== {} ===", description); diff --git a/src/adapter/adapter_kind.rs b/src/adapter/adapter_kind.rs index 4fd81d4f..b8263af6 100644 --- a/src/adapter/adapter_kind.rs +++ b/src/adapter/adapter_kind.rs @@ -10,10 +10,10 @@ use crate::adapter::groq::{self, GroqAdapter}; use crate::adapter::nebius::NebiusAdapter; use crate::adapter::openai::OpenAIAdapter; use crate::adapter::xai::XaiAdapter; +use crate::adapter::zai; use crate::{ModelName, Result}; use derive_more::Display; use serde::{Deserialize, Serialize}; -use tracing::info; /// AdapterKind is an enum that represents the different types of adapters that can be used to interact with the API. /// @@ -164,23 +164,11 @@ impl AdapterKind { /// This might change in the future, hence the Result return type. pub fn from_model(model: &str) -> Result { // -- First check if namespaced - if let (_, Some(ns)) = ModelName::model_name_and_namespace(model) { - // JC-NOTE-2025-11-30: We should not need a special way to handle zai for this, since it should match to zai already. - // Now, for 0.5.0, I am planning to change the logic to have `zai-coding::` namespace (so, adapters will have some namespace 'aliases') - // See reasoning here: https://github.com/jeremychone/rust-genai/pull/76#issuecomment-3594311524 - // Special handling: "zai" namespace should route to ZAI for coding endpoint - if ns == "zai" { - return Ok(AdapterKind::Zai); - } - - if let Some(adapter) = Self::from_lower_str(ns) { - return Ok(adapter); - } else { - info!("No AdapterKind found for '{ns}'") - } - } + if let Some(adapter) = Self::from_model_namespace(model) { + return Ok(adapter); + }; - // -- Resolve from modelname + // -- Otherwise, Resolve from modelname if model.starts_with("o3") || model.starts_with("o4") || model.starts_with("o1") @@ -218,3 +206,29 @@ impl AdapterKind { } } } + +// region: --- Support + +/// Inner api to return +impl AdapterKind { + fn from_model_namespace(model: &str) -> Option { + let (_, namespace) = ModelName::split_as_model_name_and_namespace(model); + let namespace = namespace?; + + // -- First, check if simple adapter lower string match + if let Some(adapter) = Self::from_lower_str(namespace) { + Some(adapter) + } + // -- Second, custom, for now, we harcode this exceptin here (might become more generic later) + else if namespace == zai::ZAI_CODING_NAMESPACE { + Some(Self::Zai) + } + // + // -- Otherwise, no adapter from namespace, because no matching namespace + else { + None + } + } +} + +// endregion: --- Support diff --git a/src/adapter/adapters/zai/adapter_impl.rs b/src/adapter/adapters/zai/adapter_impl.rs index 7f1ce6e2..d3c25659 100644 --- a/src/adapter/adapters/zai/adapter_impl.rs +++ b/src/adapter/adapters/zai/adapter_impl.rs @@ -7,6 +7,8 @@ use crate::webc::WebResponse; use crate::{Result, ServiceTarget}; use reqwest::RequestBuilder; +pub const ZAI_CODING_NAMESPACE: &str = "zai-coding"; + /// Helper structure to hold ZAI model parsing information struct ZaiModelEndpoint { endpoint: Endpoint, @@ -19,7 +21,7 @@ impl ZaiModelEndpoint { // Check if namespace is "zai" to route to coding endpoint let endpoint = match namespace { - Some("zai") => Endpoint::from_static("https://api.z.ai/api/coding/paas/v4/"), + Some(ZAI_CODING_NAMESPACE) => Endpoint::from_static("https://api.z.ai/api/coding/paas/v4/"), _ => ZaiAdapter::default_endpoint(), }; diff --git a/src/adapter/adapters/zai/mod.rs b/src/adapter/adapters/zai/mod.rs index a7d774ff..cdf214a1 100644 --- a/src/adapter/adapters/zai/mod.rs +++ b/src/adapter/adapters/zai/mod.rs @@ -7,47 +7,31 @@ //! //! ZAI supports two different API endpoints using the ServiceTargetResolver pattern: //! -//! ### Regular API (Credit-based) +//! ### Regular API (Credit-based) (default for those models or with `zai::` namespace) //! - Endpoint: `https://api.z.ai/api/paas/v4/` //! - Models: `glm-4.6`, `glm-4.5`, etc. //! - Usage: Standard API calls billed per token //! -//! ### Coding Plan (Subscription-based) +//! ### Coding Plan (Subscription-based only with the `zai-coding::` namepace) //! - Endpoint: `https://api.z.ai/api/coding/paas/v4/` -//! - Models: `coding::glm-4.6`, `coding:glm-4.5`, etc. +//! - Models: `zai-coding::glm-4.6`, `zai-coding::glm-4.5`, etc. //! - Usage: Fixed monthly subscription for coding tasks //! -//! ## Usage with ServiceTargetResolver +//! ## For example //! //! ```rust //! use genai::resolver::{Endpoint, ServiceTargetResolver}; //! use genai::{Client, AdapterKind, ModelIden}; //! -//! let target_resolver = ServiceTargetResolver::from_resolver_fn( -//! |service_target| -> Result { -//! let model_name = service_target.model.model_name.to_string(); -//! -//! // Route to appropriate endpoint based on model naming -//! let endpoint_url = if model_name.starts_with("coding::") { -//! "https://api.z.ai/api/coding/paas/v4/" -//! } else { -//! "https://api.z.ai/api/paas/v4/" -//! }; -//! -//! let final_endpoint = Endpoint::from_static(endpoint_url); -//! let final_model = ModelIden::new(AdapterKind::Zai, clean_model_name); -//! -//! Ok(ServiceTarget { endpoint: final_endpoint, model: final_model }) -//! } -//! ); -//! //! let client = Client::builder().with_service_target_resolver(target_resolver).build(); //! //! // Use regular API //! let response = client.exec_chat("glm-4.6", chat_request, None).await?; +//! // Same, regular API +//! let response = client.exec_chat("zai::glm-4.6", chat_request, None).await?; //! //! // Use coding plan -//! let response = client.exec_chat("coding::glm-4.6", chat_request, None).await?; +//! let response = client.exec_chat("zai-coding::glm-4.6", chat_request, None).await?; //! ``` //! //! See `examples/c07-zai-dual-endpoints.rs` for a complete working example. diff --git a/src/common/model_name.rs b/src/common/model_name.rs index b8821d28..e77f7fcf 100644 --- a/src/common/model_name.rs +++ b/src/common/model_name.rs @@ -9,14 +9,26 @@ pub struct ModelName(Arc); /// Utilities impl ModelName { + pub fn has_namespace(&self, namespace: &str) -> bool { + if let Some(ns) = self.namespace() { + ns == namespace + } else { + false + } + } + + pub fn namespace(&self) -> Option<&str> { + self.as_model_name_and_namespace().1 + } + /// Calling the `model_name_and_namespace` pub(crate) fn as_model_name_and_namespace(&self) -> (&str, Option<&str>) { - Self::model_name_and_namespace(&self.0) + Self::split_as_model_name_and_namespace(&self.0) } /// e.g., `openai::gpt4.1` ("gpt4.1", Some("openai")) /// `gpt4.1` ("gpt4.1", None) - pub(crate) fn model_name_and_namespace(model: &str) -> (&str, Option<&str>) { + pub(crate) fn split_as_model_name_and_namespace(model: &str) -> (&str, Option<&str>) { if let Some(ns_idx) = model.find("::") { let ns: &str = &model[..ns_idx]; let name: &str = &model[(ns_idx + 2)..]; From 754e03e48ef0b12b57d40823727d28cb19283b77 Mon Sep 17 00:00:00 2001 From: Jeremy Chone Date: Fri, 5 Dec 2025 14:41:33 -0800 Subject: [PATCH 051/123] ^ ModelName - add namespace_is(..), namespace(), namespace_and_name() --- src/adapter/adapter_kind.rs | 2 +- .../adapters/anthropic/adapter_impl.rs | 2 +- src/adapter/adapters/cohere/adapter_impl.rs | 2 +- src/adapter/adapters/cohere/embed.rs | 2 +- .../adapters/fireworks/adapter_impl.rs | 2 +- src/adapter/adapters/gemini/adapter_impl.rs | 4 +-- src/adapter/adapters/gemini/embed.rs | 6 ++-- src/adapter/adapters/openai/adapter_impl.rs | 2 +- src/adapter/adapters/openai/embed.rs | 2 +- .../adapters/openai_resp/adapter_impl.rs | 2 +- src/adapter/adapters/zai/adapter_impl.rs | 2 +- src/common/model_name.rs | 29 ++++++++----------- 12 files changed, 25 insertions(+), 32 deletions(-) diff --git a/src/adapter/adapter_kind.rs b/src/adapter/adapter_kind.rs index b8263af6..31a82206 100644 --- a/src/adapter/adapter_kind.rs +++ b/src/adapter/adapter_kind.rs @@ -212,7 +212,7 @@ impl AdapterKind { /// Inner api to return impl AdapterKind { fn from_model_namespace(model: &str) -> Option { - let (_, namespace) = ModelName::split_as_model_name_and_namespace(model); + let (namespace, _) = ModelName::split_as_namespace_and_name(model); let namespace = namespace?; // -- First, check if simple adapter lower string match diff --git a/src/adapter/adapters/anthropic/adapter_impl.rs b/src/adapter/adapters/anthropic/adapter_impl.rs index 4f572d2a..e95322a9 100644 --- a/src/adapter/adapters/anthropic/adapter_impl.rs +++ b/src/adapter/adapters/anthropic/adapter_impl.rs @@ -108,7 +108,7 @@ impl Adapter for AnthropicAdapter { } = Self::into_anthropic_request_parts(chat_req)?; // -- Extract Model Name and Reasoning - let (raw_model_name, _) = model.model_name.as_model_name_and_namespace(); + let (_, raw_model_name) = model.model_name.namespace_and_name(); // -- Reasoning Budget let (model_name, computed_reasoning_effort) = match (raw_model_name, options_set.reasoning_effort()) { diff --git a/src/adapter/adapters/cohere/adapter_impl.rs b/src/adapter/adapters/cohere/adapter_impl.rs index ef6b2a07..ef22e2ae 100644 --- a/src/adapter/adapters/cohere/adapter_impl.rs +++ b/src/adapter/adapters/cohere/adapter_impl.rs @@ -80,7 +80,7 @@ impl Adapter for CohereAdapter { } = Self::into_cohere_request_parts(model.clone(), chat_req)?; // -- Build the basic payload - let (model_name, _) = model.model_name.as_model_name_and_namespace(); + let (_, model_name) = model.model_name.namespace_and_name(); let stream = matches!(service_type, ServiceType::ChatStream); let mut payload = json!({ "model": model_name.to_string(), diff --git a/src/adapter/adapters/cohere/embed.rs b/src/adapter/adapters/cohere/embed.rs index 119446f1..be08d2f1 100644 --- a/src/adapter/adapters/cohere/embed.rs +++ b/src/adapter/adapters/cohere/embed.rs @@ -83,7 +83,7 @@ pub fn to_embed_request_data( let api_key = get_api_key(auth, &model)?; // Extract the actual model name (without namespace) - let (model_name, _) = model.model_name.as_model_name_and_namespace(); + let (_, model_name) = model.model_name.namespace_and_name(); // Build headers let mut headers = Headers::from(vec![ diff --git a/src/adapter/adapters/fireworks/adapter_impl.rs b/src/adapter/adapters/fireworks/adapter_impl.rs index e361c545..69653866 100644 --- a/src/adapter/adapters/fireworks/adapter_impl.rs +++ b/src/adapter/adapters/fireworks/adapter_impl.rs @@ -59,7 +59,7 @@ impl Adapter for FireworksAdapter { if !target.model.model_name.contains('/') { target.model = target.model.from_name(format!( "accounts/fireworks/models/{}", - target.model.model_name.as_model_name_and_namespace().0 + target.model.model_name.namespace_and_name().1 )) } // NOTE: Fireworks max_tokens is set at 2K by default, which is unpractical for most task. diff --git a/src/adapter/adapters/gemini/adapter_impl.rs b/src/adapter/adapters/gemini/adapter_impl.rs index 0edac468..787514d5 100644 --- a/src/adapter/adapters/gemini/adapter_impl.rs +++ b/src/adapter/adapters/gemini/adapter_impl.rs @@ -68,7 +68,7 @@ impl Adapter for GeminiAdapter { /// this will return the URL without the API_KEY in it. The API_KEY will need to be added by the caller. fn get_service_url(model: &ModelIden, service_type: ServiceType, endpoint: Endpoint) -> Result { let base_url = endpoint.base_url(); - let (model_name, _) = model.model_name.as_model_name_and_namespace(); + let (_, model_name) = model.model_name.namespace_and_name(); let url = match service_type { ServiceType::Chat => format!("{base_url}models/{model_name}:generateContent"), ServiceType::ChatStream => format!("{base_url}models/{model_name}:streamGenerateContent"), @@ -84,7 +84,7 @@ impl Adapter for GeminiAdapter { options_set: ChatOptionsSet<'_, '_>, ) -> Result { let ServiceTarget { endpoint, auth, model } = target; - let (model_name, _) = model.model_name.as_model_name_and_namespace(); + let (_, model_name) = model.model_name.namespace_and_name(); // -- api_key let api_key = get_api_key(auth, &model)?; diff --git a/src/adapter/adapters/gemini/embed.rs b/src/adapter/adapters/gemini/embed.rs index 7e142872..a4fe0f6e 100644 --- a/src/adapter/adapters/gemini/embed.rs +++ b/src/adapter/adapters/gemini/embed.rs @@ -83,9 +83,6 @@ pub fn to_embed_request_data( let ServiceTarget { model, auth, .. } = service_target; let api_key = get_api_key(auth, &model)?; - // Extract the actual model name (without namespace) - not needed for Gemini request body - let (_model_name, _) = model.model_name.as_model_name_and_namespace(); - // Build headers - Gemini uses x-goog-api-key header let mut headers = Headers::from(vec![ ("x-goog-api-key".to_string(), api_key), @@ -97,8 +94,9 @@ pub fn to_embed_request_data( headers.merge_with(custom_headers); } + // Extract the actual model name (without namespace) - not needed for Gemini request body // Get the model name for the request - let (model_name, _) = model.model_name.as_model_name_and_namespace(); + let (_, model_name) = model.model_name.namespace_and_name(); let full_model_name = format!("models/{model_name}",); // Convert EmbedRequest to Gemini format and determine URL diff --git a/src/adapter/adapters/openai/adapter_impl.rs b/src/adapter/adapters/openai/adapter_impl.rs index 0d0cd6f5..bd0ad86f 100644 --- a/src/adapter/adapters/openai/adapter_impl.rs +++ b/src/adapter/adapters/openai/adapter_impl.rs @@ -209,7 +209,7 @@ impl OpenAIAdapter { custom: Option, ) -> Result { let ServiceTarget { model, auth, endpoint } = target; - let (model_name, _) = model.model_name.as_model_name_and_namespace(); + let (_, model_name) = model.model_name.namespace_and_name(); let adapter_kind = model.adapter_kind; // -- api_key diff --git a/src/adapter/adapters/openai/embed.rs b/src/adapter/adapters/openai/embed.rs index 2a131b09..8c9d439c 100644 --- a/src/adapter/adapters/openai/embed.rs +++ b/src/adapter/adapters/openai/embed.rs @@ -83,7 +83,7 @@ pub fn to_embed_request_data( }; // Extract the actual model name (without namespace) - let (model_name, _) = model.model_name.as_model_name_and_namespace(); + let (_, model_name) = model.model_name.namespace_and_name(); let openai_req = OpenAIEmbedRequest { input, diff --git a/src/adapter/adapters/openai_resp/adapter_impl.rs b/src/adapter/adapters/openai_resp/adapter_impl.rs index 37f7badc..45311190 100644 --- a/src/adapter/adapters/openai_resp/adapter_impl.rs +++ b/src/adapter/adapters/openai_resp/adapter_impl.rs @@ -64,7 +64,7 @@ impl Adapter for OpenAIRespAdapter { chat_options: ChatOptionsSet<'_, '_>, ) -> Result { let ServiceTarget { model, auth, endpoint } = target; - let (model_name, _) = model.model_name.as_model_name_and_namespace(); + let (_, model_name) = model.model_name.namespace_and_name(); let adapter_kind = model.adapter_kind; // -- api_key diff --git a/src/adapter/adapters/zai/adapter_impl.rs b/src/adapter/adapters/zai/adapter_impl.rs index d3c25659..82835549 100644 --- a/src/adapter/adapters/zai/adapter_impl.rs +++ b/src/adapter/adapters/zai/adapter_impl.rs @@ -17,7 +17,7 @@ struct ZaiModelEndpoint { impl ZaiModelEndpoint { /// Parse ModelIden to determine if it's a coding model and return endpoint fn from_model(model: &ModelIden) -> Self { - let (_, namespace) = model.model_name.as_model_name_and_namespace(); + let (namespace, _) = model.model_name.namespace_and_name(); // Check if namespace is "zai" to route to coding endpoint let endpoint = match namespace { diff --git a/src/common/model_name.rs b/src/common/model_name.rs index e77f7fcf..a3a7e399 100644 --- a/src/common/model_name.rs +++ b/src/common/model_name.rs @@ -7,35 +7,30 @@ use serde::{Deserialize, Serialize}; #[derive(Clone, Debug, Serialize, Deserialize, Hash, Eq, PartialEq)] pub struct ModelName(Arc); -/// Utilities impl ModelName { - pub fn has_namespace(&self, namespace: &str) -> bool { - if let Some(ns) = self.namespace() { - ns == namespace - } else { - false - } + pub fn namespace_is(&self, namespace: &str) -> bool { + self.namespace() == Some(namespace) } pub fn namespace(&self) -> Option<&str> { - self.as_model_name_and_namespace().1 + self.namespace_and_name().0 } - /// Calling the `model_name_and_namespace` - pub(crate) fn as_model_name_and_namespace(&self) -> (&str, Option<&str>) { - Self::split_as_model_name_and_namespace(&self.0) + /// Returns `(namespace, name)` + pub fn namespace_and_name(&self) -> (Option<&str>, &str) { + Self::split_as_namespace_and_name(&self.0) } - /// e.g., `openai::gpt4.1` ("gpt4.1", Some("openai")) - /// `gpt4.1` ("gpt4.1", None) - pub(crate) fn split_as_model_name_and_namespace(model: &str) -> (&str, Option<&str>) { + /// e.g.: + /// `openai::gpt4.1` → (Some("openai"), "gpt4.1") + /// `gpt4.1` → (None, "gpt4.1") + pub(crate) fn split_as_namespace_and_name(model: &str) -> (Option<&str>, &str) { if let Some(ns_idx) = model.find("::") { let ns: &str = &model[..ns_idx]; let name: &str = &model[(ns_idx + 2)..]; - // TODO: assess what to do when name or ns is empty - (name, Some(ns)) + (Some(ns), name) } else { - (model, None) + (None, model) } } } From ace5bf9cccc70a365acf9da0b2ef2ec30e52271d Mon Sep 17 00:00:00 2001 From: Jeremy Chone Date: Fri, 5 Dec 2025 14:53:00 -0800 Subject: [PATCH 052/123] . update to 0.5.0-alpha.4 --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 5b9f7747..953f7a4b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "genai" -version = "0.5.0-alpha.4-WIP" +version = "0.5.0-alpha.4" edition = "2024" license = "MIT OR Apache-2.0" description = "Multi-AI Providers Library for Rust. (OpenAI, Gemini, Anthropic, xAI, Ollama, Groq, DeepSeek, Grok)" From 56260f2efff3d3f34557e7fedfb22925079d1d5a Mon Sep 17 00:00:00 2001 From: Jeremy Chone Date: Fri, 5 Dec 2025 14:53:51 -0800 Subject: [PATCH 053/123] . 0.5.0-alpha.5-WIP --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 953f7a4b..1b6b6c6b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "genai" -version = "0.5.0-alpha.4" +version = "0.5.0-alpha.5-WIP" edition = "2024" license = "MIT OR Apache-2.0" description = "Multi-AI Providers Library for Rust. (OpenAI, Gemini, Anthropic, xAI, Ollama, Groq, DeepSeek, Grok)" From d91cda085bf3d3368d631eb4047b608720717dfa Mon Sep 17 00:00:00 2001 From: Jeremy Chone Date: Fri, 5 Dec 2025 19:22:43 -0800 Subject: [PATCH 054/123] . doc - clippy clean --- src/adapter/adapters/anthropic/mod.rs | 10 +++++----- src/adapter/adapters/cohere/embed.rs | 2 +- src/adapter/adapters/cohere/mod.rs | 6 +++--- src/adapter/adapters/deepseek/mod.rs | 6 +++--- src/adapter/adapters/fireworks/mod.rs | 6 +++--- src/adapter/adapters/gemini/embed.rs | 2 +- src/adapter/adapters/gemini/mod.rs | 6 +++--- src/adapter/adapters/groq/mod.rs | 6 +++--- src/adapter/adapters/nebius/mod.rs | 6 +++--- src/adapter/adapters/ollama/adapter_impl.rs | 4 ++-- src/adapter/adapters/ollama/mod.rs | 6 +++--- src/adapter/adapters/zai/mod.rs | 4 ++-- src/client/headers.rs | 2 +- src/common/model_iden.rs | 2 +- 14 files changed, 34 insertions(+), 34 deletions(-) diff --git a/src/adapter/adapters/anthropic/mod.rs b/src/adapter/adapters/anthropic/mod.rs index a7b33a47..133ff33e 100644 --- a/src/adapter/adapters/anthropic/mod.rs +++ b/src/adapter/adapters/anthropic/mod.rs @@ -1,8 +1,8 @@ -//! API Documentation: https://docs.anthropic.com/en/api/messages -//! Tool Documentation: https://docs.anthropic.com/en/docs/build-with-claude/tool-use -//! Effort Documentation: https://platform.claude.com/docs/en/build-with-claude/effort -//! Model Names: https://docs.anthropic.com/en/docs/models-overview -//! Pricing: https://www.anthropic.com/pricing#anthropic-api +//! API Documentation: +//! Tool Documentation: +//! Effort Documentation: +//! Model Names: +//! Pricing: // region: --- Modules diff --git a/src/adapter/adapters/cohere/embed.rs b/src/adapter/adapters/cohere/embed.rs index be08d2f1..0bfb32ba 100644 --- a/src/adapter/adapters/cohere/embed.rs +++ b/src/adapter/adapters/cohere/embed.rs @@ -1,5 +1,5 @@ //! Cohere Embeddings API implementation -//! API Documentation: https://docs.cohere.com/reference/embed +//! API Documentation: use crate::adapter::adapters::support::get_api_key; use crate::adapter::{Adapter, ServiceType, WebRequestData}; diff --git a/src/adapter/adapters/cohere/mod.rs b/src/adapter/adapters/cohere/mod.rs index 8d35d71a..d105ec4e 100644 --- a/src/adapter/adapters/cohere/mod.rs +++ b/src/adapter/adapters/cohere/mod.rs @@ -1,6 +1,6 @@ -//! API DOC: https://docs.cohere.com/reference/chat -//! MODEL NAMES: https://docs.cohere.com/docs/models -//! PRICING: https://cohere.com/pricing +//! API DOC: +//! MODEL NAMES: +//! PRICING: // region: --- Modules diff --git a/src/adapter/adapters/deepseek/mod.rs b/src/adapter/adapters/deepseek/mod.rs index 2d2c105b..bfe5b5ae 100644 --- a/src/adapter/adapters/deepseek/mod.rs +++ b/src/adapter/adapters/deepseek/mod.rs @@ -1,6 +1,6 @@ -//! API Documentation: https://api-docs.deepseek.com/ -//! Model Names: https://api-docs.deepseek.com/quick_start/pricing -//! Pricing: https://api-docs.deepseek.com/quick_start/pricing +//! API Documentation: +//! Model Names: +//! Pricing: // region: --- Modules diff --git a/src/adapter/adapters/fireworks/mod.rs b/src/adapter/adapters/fireworks/mod.rs index 8ff83801..f7a984f8 100644 --- a/src/adapter/adapters/fireworks/mod.rs +++ b/src/adapter/adapters/fireworks/mod.rs @@ -1,7 +1,7 @@ //! Click the globe icon on the top-right corner of the page to switch language. -//! API Documentation: https://fireworks.ai/docs/getting-started/introduction -//! Model Names: https://fireworks.ai/models -//! Pricing: https://fireworks.ai/pricing#serverless-pricing +//! API Documentation: +//! Model Names: +//! Pricing: // region: --- Modules diff --git a/src/adapter/adapters/gemini/embed.rs b/src/adapter/adapters/gemini/embed.rs index a4fe0f6e..e1a63ab4 100644 --- a/src/adapter/adapters/gemini/embed.rs +++ b/src/adapter/adapters/gemini/embed.rs @@ -1,5 +1,5 @@ //! Gemini Embeddings API implementation -//! API Documentation: https://ai.google.dev/gemini-api/docs/embeddings +//! API Documentation: use crate::adapter::adapters::support::get_api_key; use crate::adapter::{Adapter, ServiceType, WebRequestData}; diff --git a/src/adapter/adapters/gemini/mod.rs b/src/adapter/adapters/gemini/mod.rs index c34237b8..d62d7492 100644 --- a/src/adapter/adapters/gemini/mod.rs +++ b/src/adapter/adapters/gemini/mod.rs @@ -1,6 +1,6 @@ -//! API Documentation: https://ai.google.dev/api/rest/v1beta/models/generateContent -//! Model Names: https://ai.google.dev/gemini-api/docs/models/gemini -//! Pricing: https://ai.google.dev/pricing +//! API Documentation: +//! Model Names: +//! Pricing: // region: --- Modules diff --git a/src/adapter/adapters/groq/mod.rs b/src/adapter/adapters/groq/mod.rs index 9c5bbd0c..e95858f2 100644 --- a/src/adapter/adapters/groq/mod.rs +++ b/src/adapter/adapters/groq/mod.rs @@ -1,6 +1,6 @@ -//! API Documentation: https://console.groq.com/docs/api-reference#chat -//! Model Names: https://console.groq.com/docs/models -//! Pricing: https://groq.com/pricing/ +//! API Documentation: +//! Model Names: +//! Pricing: // region: --- Modules diff --git a/src/adapter/adapters/nebius/mod.rs b/src/adapter/adapters/nebius/mod.rs index a347bf0c..1fdeee1b 100644 --- a/src/adapter/adapters/nebius/mod.rs +++ b/src/adapter/adapters/nebius/mod.rs @@ -1,6 +1,6 @@ -//! API Documentation: https://studio.nebius.com/api-reference -//! Model Names: https://studio.nebius.com/ -//! Endpoint: https://api.studio.nebius.ai/v1/ +//! API Documentation: +//! Model Names: +//! Endpoint: // region: --- Modules diff --git a/src/adapter/adapters/ollama/adapter_impl.rs b/src/adapter/adapters/ollama/adapter_impl.rs index b0c2b061..4e6c173f 100644 --- a/src/adapter/adapters/ollama/adapter_impl.rs +++ b/src/adapter/adapters/ollama/adapter_impl.rs @@ -1,4 +1,4 @@ -//! API DOC: https://github.com/ollama/ollama/blob/main/docs/openai.md +//! API DOC: use crate::adapter::openai::OpenAIAdapter; use crate::adapter::{Adapter, AdapterKind, ServiceType, WebRequestData}; @@ -14,7 +14,7 @@ use value_ext::JsonValueExt; pub struct OllamaAdapter; /// Note: For now, it uses the OpenAI compatibility layer -/// (https://github.com/ollama/ollama/blob/main/docs/openai.md) +/// () /// Since the base Ollama API supports `application/x-ndjson` for streaming, whereas others support `text/event-stream` impl Adapter for OllamaAdapter { fn default_endpoint() -> Endpoint { diff --git a/src/adapter/adapters/ollama/mod.rs b/src/adapter/adapters/ollama/mod.rs index b0d785b7..2ddf1258 100644 --- a/src/adapter/adapters/ollama/mod.rs +++ b/src/adapter/adapters/ollama/mod.rs @@ -1,7 +1,7 @@ //! NOTE: Currently, GenAI uses the OpenAI compatibility layer, except for listing models. -//! OPENAI API DOC: https://platform.openai.com/docs/api-reference/chat -//! OLLAMA API DOC: https://github.com/ollama/ollama/blob/main/docs/api.md -//! OLLAMA Models: https://ollama.com/library +//! OPENAI API DOC: +//! OLLAMA API DOC: +//! OLLAMA Models: // region: --- Modules diff --git a/src/adapter/adapters/zai/mod.rs b/src/adapter/adapters/zai/mod.rs index cdf214a1..be9942c4 100644 --- a/src/adapter/adapters/zai/mod.rs +++ b/src/adapter/adapters/zai/mod.rs @@ -8,12 +8,12 @@ //! ZAI supports two different API endpoints using the ServiceTargetResolver pattern: //! //! ### Regular API (Credit-based) (default for those models or with `zai::` namespace) -//! - Endpoint: `https://api.z.ai/api/paas/v4/` +//! - Endpoint: `` //! - Models: `glm-4.6`, `glm-4.5`, etc. //! - Usage: Standard API calls billed per token //! //! ### Coding Plan (Subscription-based only with the `zai-coding::` namepace) -//! - Endpoint: `https://api.z.ai/api/coding/paas/v4/` +//! - Endpoint: `` //! - Models: `zai-coding::glm-4.6`, `zai-coding::glm-4.5`, etc. //! - Usage: Fixed monthly subscription for coding tasks //! diff --git a/src/client/headers.rs b/src/client/headers.rs index 0aaa007e..2b3a74ef 100644 --- a/src/client/headers.rs +++ b/src/client/headers.rs @@ -17,7 +17,7 @@ pub struct Headers { impl Headers { /// Merge headers from overlay into self, consuming overlay. /// Later values override existing ones. - /// Use [`merge_with`] for a borrowed overlay. + /// Use [`Headers::merge_with`] for a borrowed overlay. pub fn merge(&mut self, overlay: impl Into) { let overlay = overlay.into(); diff --git a/src/common/model_iden.rs b/src/common/model_iden.rs index 60e01861..6f5d3165 100644 --- a/src/common/model_iden.rs +++ b/src/common/model_iden.rs @@ -49,7 +49,7 @@ impl ModelIden { } /// Creates a new `ModelIden` with the specified name, or clones the existing one if the name is the same. - /// NOTE: Might be deprecated in favor of [`from_name`] + /// NOTE: Might be deprecated in favor of [`ModelIden::from_name`] pub fn from_optional_name(&self, new_name: Option) -> ModelIden { if let Some(new_name) = new_name { self.from_name(new_name) From 3c478ccb5e175d0bbb521388492bbc3cfe151785 Mon Sep 17 00:00:00 2001 From: Jeremy Chone Date: Thu, 11 Dec 2025 18:43:39 -0800 Subject: [PATCH 055/123] . openai - update names and tests to gpt-5.2 --- src/adapter/adapters/openai/adapter_impl.rs | 4 ++-- tests/tests_p_openai.rs | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/adapter/adapters/openai/adapter_impl.rs b/src/adapter/adapters/openai/adapter_impl.rs index bd0ad86f..552e9081 100644 --- a/src/adapter/adapters/openai/adapter_impl.rs +++ b/src/adapter/adapters/openai/adapter_impl.rs @@ -23,8 +23,8 @@ pub struct OpenAIAdapter; // Latest models const MODELS: &[&str] = &[ // - "gpt-5.1", - "gpt-5", + "gpt-5.2", + "gpt-5.2-pro", "gpt-5-mini", "gpt-5-nano", "gpt-audio-mini", diff --git a/tests/tests_p_openai.rs b/tests/tests_p_openai.rs index 4c311fa9..f5eaa3a2 100644 --- a/tests/tests_p_openai.rs +++ b/tests/tests_p_openai.rs @@ -7,7 +7,7 @@ use genai::resolver::AuthData; // note: "gpt-4o-mini" has issue when image & pdf // as for 2025-08-08 gpt-5-mini does not support temperature & stop sequence -const MODEL_LATEST: &str = "gpt-5.1"; +const MODEL_LATEST: &str = "gpt-5.2"; const MODEL_GPT_5_MINI: &str = "gpt-5-mini"; // for the streaming reasoning test const AUDIO_MODEL: &str = "gpt-audio-mini"; const MODEL2: &str = "gpt-4.1-mini"; // for temperature & stop sequence From c37984bdd91af485d609aa73f92cc5a92132b56b Mon Sep 17 00:00:00 2001 From: Jeremy Chone Date: Thu, 11 Dec 2025 19:53:54 -0800 Subject: [PATCH 056/123] ^ reasoning - add RasoningEffort::Nonewq --- .../adapters/anthropic/adapter_impl.rs | 52 +++++++++---------- src/adapter/adapters/gemini/adapter_impl.rs | 48 ++++++++--------- src/chat/chat_options.rs | 25 ++++++--- tests/support/common_tests.rs | 6 ++- 4 files changed, 71 insertions(+), 60 deletions(-) diff --git a/src/adapter/adapters/anthropic/adapter_impl.rs b/src/adapter/adapters/anthropic/adapter_impl.rs index e95322a9..7f6f5c74 100644 --- a/src/adapter/adapters/anthropic/adapter_impl.rs +++ b/src/adapter/adapters/anthropic/adapter_impl.rs @@ -21,13 +21,25 @@ const REASONING_LOW: u32 = 1024; const REASONING_MEDIUM: u32 = 8000; const REASONING_HIGH: u32 = 24000; -fn get_anthropic_thinking_budget_value(effort: &ReasoningEffort) -> u32 { - match effort { - ReasoningEffort::Budget(budget) => *budget, - ReasoningEffort::Low | ReasoningEffort::Minimal => REASONING_LOW, - ReasoningEffort::Medium => REASONING_MEDIUM, - ReasoningEffort::High => REASONING_HIGH, +fn insert_anthropic_thinking_budget_value(payload: &mut Value, effort: &ReasoningEffort) -> Result<()> { + let thinking_budget = match effort { + ReasoningEffort::None => None, + ReasoningEffort::Budget(budget) => Some(*budget), + ReasoningEffort::Low | ReasoningEffort::Minimal => Some(REASONING_LOW), + ReasoningEffort::Medium => Some(REASONING_MEDIUM), + ReasoningEffort::High => Some(REASONING_HIGH), + }; + + if let Some(thinking_budget) = thinking_budget { + payload.x_insert( + "thinking", + json!({ + "type": "enabled", + "budget_tokens": thinking_budget + }), + )?; } + Ok(()) } // NOTE: For Anthropic, the max_tokens must be specified. @@ -117,7 +129,8 @@ impl Adapter for AnthropicAdapter { // let model_name: &str = &model.model_name; if let Some((prefix, last)) = raw_model_name.rsplit_once('-') { let reasoning = match last { - "zero" => None, // That will disable thinking + "zero" => None, + "None" => Some(ReasoningEffort::Low), "minimal" => Some(ReasoningEffort::Low), "low" => Some(ReasoningEffort::Low), "medium" => Some(ReasoningEffort::Medium), @@ -133,17 +146,7 @@ impl Adapter for AnthropicAdapter { } } // If reasoning effort, turn the low, medium, budget ones into Budget - (model, Some(effort)) => { - let effort = match effort { - // -- for now, match minimal to Low (because zero is not supported by 2.5 pro) - ReasoningEffort::Minimal => ReasoningEffort::Low, - ReasoningEffort::Low => ReasoningEffort::Low, - ReasoningEffort::Medium => ReasoningEffort::Medium, - ReasoningEffort::High => ReasoningEffort::High, - ReasoningEffort::Budget(budget) => ReasoningEffort::Budget(*budget), - }; - (model, Some(effort)) - } + (model, Some(effort)) => (model, Some(effort.clone())), }; // -- Build the basic payload @@ -179,7 +182,9 @@ impl Adapter for AnthropicAdapter { ReasoningEffort::Low => "low", ReasoningEffort::Medium => "medium", ReasoningEffort::High => "high", - ReasoningEffort::Budget(_) => "", // for now, will not set + // -- for now, will not set + ReasoningEffort::Budget(_) => "", + ReasoningEffort::None => "", }; if !effort.is_empty() { payload.x_insert( @@ -192,14 +197,7 @@ impl Adapter for AnthropicAdapter { } // -- All models, including opus-4-5, we see the thinking budget - let budget = get_anthropic_thinking_budget_value(&computed_reasoning_effort); - payload.x_insert( - "thinking", - json!({ - "type": "enabled", - "budget_tokens": budget - }), - )?; + insert_anthropic_thinking_budget_value(&mut payload, &computed_reasoning_effort)?; } // -- Add supported ChatOptions diff --git a/src/adapter/adapters/gemini/adapter_impl.rs b/src/adapter/adapters/gemini/adapter_impl.rs index 787514d5..eb3b54c8 100644 --- a/src/adapter/adapters/gemini/adapter_impl.rs +++ b/src/adapter/adapters/gemini/adapter_impl.rs @@ -30,14 +30,23 @@ const REASONING_LOW: u32 = 1000; const REASONING_MEDIUM: u32 = 8000; const REASONING_HIGH: u32 = 24000; -fn get_gemini_thinking_budget_value(effort: &ReasoningEffort) -> u32 { +/// Important +/// - For now Low and Minimal aare the same for geminia +/// - +fn insert_gemini_thinking_budget_value(payload: &mut Value, effort: &ReasoningEffort) -> Result<()> { // -- for now, match minimal to Low (because zero is not supported by 2.5 pro) - match effort { - ReasoningEffort::Budget(budget) => *budget, - ReasoningEffort::Low | ReasoningEffort::Minimal => REASONING_LOW, - ReasoningEffort::Medium => REASONING_MEDIUM, - ReasoningEffort::High => REASONING_HIGH, + let budget = match effort { + ReasoningEffort::None => None, + ReasoningEffort::Low | ReasoningEffort::Minimal => Some(REASONING_LOW), + ReasoningEffort::Medium => Some(REASONING_MEDIUM), + ReasoningEffort::High => Some(REASONING_HIGH), + ReasoningEffort::Budget(budget) => Some(*budget), + }; + + if let Some(budget) = budget { + payload.x_insert("/generationConfig/thinkingConfig/thinkingBudget", budget)?; } + Ok(()) } // curl \ @@ -99,9 +108,10 @@ impl Adapter for GeminiAdapter { // let model_name: &str = &model.model_name; if let Some((prefix, last)) = model_name.rsplit_once('-') { let reasoning = match last { + // 'zero' is a gemini special "zero" => Some(ReasoningEffort::Budget(REASONING_ZERO)), - "minimal" => Some(ReasoningEffort::Low), - "low" => Some(ReasoningEffort::Low), + "none" => Some(ReasoningEffort::None), + "low" | "minimal" => Some(ReasoningEffort::Low), "medium" => Some(ReasoningEffort::Medium), "high" => Some(ReasoningEffort::High), _ => None, @@ -114,18 +124,8 @@ impl Adapter for GeminiAdapter { (model, None) } } - // If reasoning effort, turn the low, medium, budget ones into Budget - (model, Some(effort)) => { - let effort = match effort { - // -- for now, match minimal to Low (because zero is not supported by 2.5 pro) - ReasoningEffort::Minimal => ReasoningEffort::Low, - ReasoningEffort::Low => ReasoningEffort::Low, - ReasoningEffort::Medium => ReasoningEffort::Medium, - ReasoningEffort::High => ReasoningEffort::High, - ReasoningEffort::Budget(budget) => ReasoningEffort::Budget(*budget), - }; - (model, Some(effort)) - } + // TOOD: make it more elegant + (model, Some(effort)) => (model, Some(effort.clone())), }; // -- parts @@ -142,7 +142,7 @@ impl Adapter for GeminiAdapter { // -- Set the reasoning effort if let Some(computed_reasoning_effort) = computed_reasoning_effort { - // -- For gemini-3 use the thinkingLevel if Low or High (does not support mediume for now) + // -- For gemini-3 use the thinkingLevel if Low or High (does not support medium for now) if provider_model_name.contains("gemini-3") { match computed_reasoning_effort { ReasoningEffort::Low | ReasoningEffort::Minimal => { @@ -153,15 +153,13 @@ impl Adapter for GeminiAdapter { } // Fallback on thinkingBudget other => { - let budget = get_gemini_thinking_budget_value(&other); - payload.x_insert("/generationConfig/thinkingConfig/thinkingBudget", budget)?; + insert_gemini_thinking_budget_value(&mut payload, &other)?; } } } // -- Otherwise, Do thinking budget else { - let budget = get_gemini_thinking_budget_value(&computed_reasoning_effort); - payload.x_insert("/generationConfig/thinkingConfig/thinkingBudget", budget)?; + insert_gemini_thinking_budget_value(&mut payload, &computed_reasoning_effort)?; } } diff --git a/src/chat/chat_options.rs b/src/chat/chat_options.rs index 73e24a57..2d2cd833 100644 --- a/src/chat/chat_options.rs +++ b/src/chat/chat_options.rs @@ -188,50 +188,59 @@ impl ChatOptions { /// Provider-specific hint for reasoning intensity/budget. #[derive(Debug, Clone, Serialize, Deserialize)] pub enum ReasoningEffort { - Minimal, + None, Low, Medium, High, Budget(u32), + + // Legacy reasoning for <= gpt-5 + Minimal, } impl ReasoningEffort { /// Returns the lowercase variant name. pub fn variant_name(&self) -> &'static str { match self { - ReasoningEffort::Minimal => "minimal", + ReasoningEffort::None => "none", ReasoningEffort::Low => "low", ReasoningEffort::Medium => "medium", ReasoningEffort::High => "high", ReasoningEffort::Budget(_) => "budget", + // Legacy + ReasoningEffort::Minimal => "minimal", } } /// Returns a keyword for non-`Budget` variants; `None` for `Budget(_)`. pub fn as_keyword(&self) -> Option<&'static str> { match self { - ReasoningEffort::Minimal => Some("minimal"), + ReasoningEffort::None => Some("none"), ReasoningEffort::Low => Some("low"), ReasoningEffort::Medium => Some("medium"), ReasoningEffort::High => Some("high"), ReasoningEffort::Budget(_) => None, + // Legacy + ReasoningEffort::Minimal => Some("minimal"), } } /// Parses a verbosity keyword. pub fn from_keyword(name: &str) -> Option { match name { - "minimal" => Some(ReasoningEffort::Minimal), + "none" => Some(ReasoningEffort::None), "low" => Some(ReasoningEffort::Low), "medium" => Some(ReasoningEffort::Medium), "high" => Some(ReasoningEffort::High), + // legacy + "minimal" => Some(ReasoningEffort::Minimal), _ => None, } } - /// If `model_name` ends with `-`, returns the parsed verbosity and the trimmed name. + /// If `model_name` ends with `-reasoning_effort`, returns the parsed verbosity and the trimmed name. /// - /// Returns `(verbosity, trimmed_model_name)`. + /// Returns `(reasosing_effort?, trimmed_model_name)`. pub fn from_model_name(model_name: &str) -> (Option, &str) { if let Some((prefix, last)) = model_name.rsplit_once('-') && let Some(effort) = ReasoningEffort::from_keyword(last) @@ -245,11 +254,13 @@ impl ReasoningEffort { impl std::fmt::Display for ReasoningEffort { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { - ReasoningEffort::Minimal => write!(f, "minimal"), + ReasoningEffort::None => write!(f, "none"), ReasoningEffort::Low => write!(f, "low"), ReasoningEffort::Medium => write!(f, "medium"), ReasoningEffort::High => write!(f, "high"), ReasoningEffort::Budget(n) => write!(f, "{n}"), + // Legacy + ReasoningEffort::Minimal => write!(f, "minimal"), } } } diff --git a/tests/support/common_tests.rs b/tests/support/common_tests.rs index f5d2cf67..0a3b901d 100644 --- a/tests/support/common_tests.rs +++ b/tests/support/common_tests.rs @@ -68,7 +68,11 @@ pub async fn common_test_chat_reasoning_ok( ) -> TestResult<()> { // -- Setup & Fixtures let client = Client::default(); - let chat_req = seed_chat_req_simple(); + let chat_req = ChatRequest::new(vec![ + // -- Messages (deactivate to see the differences) + ChatMessage::system("Answer in one sentence. But make think hard to make sure it is not a trick question."), + ChatMessage::user("Why is the sky red?"), + ]); let options = ChatOptions::default().with_reasoning_effort(reasoning_effort); // -- Exec From b6b8673354fbc534209a607c6c4fed3076b05a8d Mon Sep 17 00:00:00 2001 From: Jeremy Chone Date: Thu, 11 Dec 2025 19:54:20 -0800 Subject: [PATCH 057/123] . 0.5.0-alpha.5 --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 1b6b6c6b..14adc303 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "genai" -version = "0.5.0-alpha.5-WIP" +version = "0.5.0-alpha.5" edition = "2024" license = "MIT OR Apache-2.0" description = "Multi-AI Providers Library for Rust. (OpenAI, Gemini, Anthropic, xAI, Ollama, Groq, DeepSeek, Grok)" From e7e88a70645b9aa1a2724696b07754719988948c Mon Sep 17 00:00:00 2001 From: Jeremy Chone Date: Thu, 11 Dec 2025 19:54:51 -0800 Subject: [PATCH 058/123] . 0.5.0-alpha.6-WIP --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 14adc303..3c9f672f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "genai" -version = "0.5.0-alpha.5" +version = "0.5.0-alpha.6-WIP" edition = "2024" license = "MIT OR Apache-2.0" description = "Multi-AI Providers Library for Rust. (OpenAI, Gemini, Anthropic, xAI, Ollama, Groq, DeepSeek, Grok)" From 69c4fb6044fd296870e855850209eaadaa734528 Mon Sep 17 00:00:00 2001 From: Bob Liu Date: Mon, 29 Dec 2025 04:59:10 +0800 Subject: [PATCH 059/123] Feat: Add MIMO model adapter (#105) * feat(adapter): add MimoAdapter with support for mimo-v2-flash model * docs(readme): add Mimo provider details and update model info --- README.md | 9 ++- src/adapter/adapter_kind.rs | 9 +++ src/adapter/adapters/mimo/adapter_impl.rs | 76 +++++++++++++++++++++++ src/adapter/adapters/mimo/mod.rs | 7 +++ src/adapter/adapters/mod.rs | 1 + src/adapter/dispatcher.rs | 10 +++ 6 files changed, 109 insertions(+), 3 deletions(-) create mode 100644 src/adapter/adapters/mimo/adapter_impl.rs create mode 100644 src/adapter/adapters/mimo/mod.rs diff --git a/README.md b/README.md index 00b718d9..d77a24b1 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # genai - Multi-AI Providers Library for Rust -Currently natively supports: **OpenAI**, **Anthropic**, **Gemini**, **XAI/Grok**, **Ollama**, **Groq**, **DeepSeek** (deepseek.com & Groq), **Cohere** (more to come) +Currently natively supports: **OpenAI**, **Anthropic**, **Gemini**, **XAI/Grok**, **Ollama**, **Groq**, **DeepSeek** (deepseek.com & Groq), **Cohere**, **Mimo** (more to come) Also allows a custom URL with `ServiceTargetResolver` (see [examples/c06-target-resolver.rs](examples/c06-target-resolver.rs)) @@ -97,6 +97,7 @@ const MODEL_GROQ: &str = "llama-3.1-8b-instant"; const MODEL_OLLAMA: &str = "gemma:2b"; // sh: `ollama pull gemma:2b` const MODEL_XAI: &str = "grok-beta"; const MODEL_DEEPSEEK: &str = "deepseek-chat"; +const MODEL_MIMO: &str = "mimo-v2-flash"; // NOTE: These are the default environment keys for each AI Adapter Type. // They can be customized; see `examples/c02-auth.rs` @@ -109,6 +110,7 @@ const MODEL_AND_KEY_ENV_NAME_LIST: &[(&str, &str)] = &[ (MODEL_GROQ, "GROQ_API_KEY"), (MODEL_XAI, "XAI_API_KEY"), (MODEL_DEEPSEEK, "DEEPSEEK_API_KEY"), + (MODEL_MIMO, "MIMO_API_KEY"), (MODEL_OLLAMA, ""), ]; @@ -118,6 +120,7 @@ const MODEL_AND_KEY_ENV_NAME_LIST: &[(&str, &str)] = &[ // - starts_with "command" -> Cohere // - starts_with "gemini" -> Gemini // - model in Groq models -> Groq +// - starts_with "mimo" -> Mimo // - For anything else -> Ollama // // This can be customized; see `examples/c03-mapper.rs` @@ -205,7 +208,7 @@ async fn main() -> Result<(), Box> { ## ChatOptions - **(1)** - **OpenAI-compatible** notes - - Models: OpenAI, DeepSeek, Groq, Ollama, xAI + - Models: OpenAI, DeepSeek, Groq, Ollama, xAI, Mimo | Property | OpenAI Compatibles (*1) | Anthropic | Gemini `generationConfig.` | Cohere | |---------------|-------------------------|-----------------------------|----------------------------|---------------| @@ -225,7 +228,7 @@ async fn main() -> Result<(), Box> { - **(1)** - **OpenAI-compatible** notes - - Models: OpenAI, DeepSeek, Groq, Ollama, xAI + - Models: OpenAI, DeepSeek, Groq, Ollama, xAI, Mimo - For **Groq**, the property `x_groq.usage.` - At this point, **Ollama** does not emit input/output tokens when streaming due to the Ollama OpenAI compatibility layer limitation. (see [ollama #4448 - Streaming Chat Completion via OpenAI API should support stream option to include Usage](https://github.com/ollama/ollama/issues/4448)) - `prompt_tokens_details` and `completion_tokens_details` will have the value sent by the compatible provider (or None) diff --git a/src/adapter/adapter_kind.rs b/src/adapter/adapter_kind.rs index 31a82206..5179a187 100644 --- a/src/adapter/adapter_kind.rs +++ b/src/adapter/adapter_kind.rs @@ -7,6 +7,7 @@ use crate::adapter::deepseek::{self, DeepSeekAdapter}; use crate::adapter::fireworks::FireworksAdapter; use crate::adapter::gemini::GeminiAdapter; use crate::adapter::groq::{self, GroqAdapter}; +use crate::adapter::mimo::{self, MimoAdapter}; use crate::adapter::nebius::NebiusAdapter; use crate::adapter::openai::OpenAIAdapter; use crate::adapter::xai::XaiAdapter; @@ -34,6 +35,8 @@ pub enum AdapterKind { Together, /// Reuse some of the OpenAI adapter behavior, customize some (e.g., normalize thinking budget) Groq, + /// For Mimo (Mostly use OpenAI) + Mimo, /// For Nebius (Mostly use OpenAI) Nebius, /// For xAI (Mostly use OpenAI) @@ -62,6 +65,7 @@ impl AdapterKind { AdapterKind::Fireworks => "Fireworks", AdapterKind::Together => "Together", AdapterKind::Groq => "Groq", + AdapterKind::Mimo => "Mimo", AdapterKind::Nebius => "Nebius", AdapterKind::Xai => "xAi", AdapterKind::DeepSeek => "DeepSeek", @@ -82,6 +86,7 @@ impl AdapterKind { AdapterKind::Fireworks => "fireworks", AdapterKind::Together => "together", AdapterKind::Groq => "groq", + AdapterKind::Mimo => "mimo", AdapterKind::Nebius => "nebius", AdapterKind::Xai => "xai", AdapterKind::DeepSeek => "deepseek", @@ -101,6 +106,7 @@ impl AdapterKind { "fireworks" => Some(AdapterKind::Fireworks), "together" => Some(AdapterKind::Together), "groq" => Some(AdapterKind::Groq), + "mimo" => Some(AdapterKind::Mimo), "nebius" => Some(AdapterKind::Nebius), "xai" => Some(AdapterKind::Xai), "deepseek" => Some(AdapterKind::DeepSeek), @@ -125,6 +131,7 @@ impl AdapterKind { AdapterKind::Fireworks => Some(FireworksAdapter::API_KEY_DEFAULT_ENV_NAME), AdapterKind::Together => Some(TogetherAdapter::API_KEY_DEFAULT_ENV_NAME), AdapterKind::Groq => Some(GroqAdapter::API_KEY_DEFAULT_ENV_NAME), + AdapterKind::Mimo => Some(MimoAdapter::API_KEY_DEFAULT_ENV_NAME), AdapterKind::Nebius => Some(NebiusAdapter::API_KEY_DEFAULT_ENV_NAME), AdapterKind::Xai => Some(XaiAdapter::API_KEY_DEFAULT_ENV_NAME), AdapterKind::DeepSeek => Some(DeepSeekAdapter::API_KEY_DEFAULT_ENV_NAME), @@ -191,6 +198,8 @@ impl AdapterKind { Ok(Self::Fireworks) } else if groq::MODELS.contains(&model) { Ok(Self::Groq) + } else if mimo::MODELS.contains(&model) { + Ok(Self::Mimo) } else if model.starts_with("command") || model.starts_with("embed-") { Ok(Self::Cohere) } else if deepseek::MODELS.contains(&model) { diff --git a/src/adapter/adapters/mimo/adapter_impl.rs b/src/adapter/adapters/mimo/adapter_impl.rs new file mode 100644 index 00000000..d89ef898 --- /dev/null +++ b/src/adapter/adapters/mimo/adapter_impl.rs @@ -0,0 +1,76 @@ +use crate::ModelIden; +use crate::adapter::openai::OpenAIAdapter; +use crate::adapter::{Adapter, AdapterKind, ServiceType, WebRequestData}; +use crate::chat::{ChatOptionsSet, ChatRequest, ChatResponse, ChatStreamResponse}; +use crate::resolver::{AuthData, Endpoint}; +use crate::webc::WebResponse; +use crate::{Result, ServiceTarget}; +use reqwest::RequestBuilder; + +pub struct MimoAdapter; + +pub(in crate::adapter) const MODELS: &[&str] = &["mimo-v2-flash"]; + +impl MimoAdapter { + pub const API_KEY_DEFAULT_ENV_NAME: &str = "MIMO_API_KEY"; +} + +impl Adapter for MimoAdapter { + fn default_auth() -> AuthData { + AuthData::from_env(Self::API_KEY_DEFAULT_ENV_NAME) + } + + fn default_endpoint() -> Endpoint { + const BASE_URL: &str = "https://api.xiaomimimo.com/v1/"; + Endpoint::from_static(BASE_URL) + } + + async fn all_model_names(_kind: AdapterKind) -> Result> { + Ok(MODELS.iter().map(|s| s.to_string()).collect()) + } + + fn get_service_url(model: &ModelIden, service_type: ServiceType, endpoint: Endpoint) -> Result { + OpenAIAdapter::util_get_service_url(model, service_type, endpoint) + } + + fn to_web_request_data( + target: ServiceTarget, + service_type: ServiceType, + chat_req: ChatRequest, + chat_options: ChatOptionsSet<'_, '_>, + ) -> Result { + OpenAIAdapter::util_to_web_request_data(target, service_type, chat_req, chat_options, None) + } + + fn to_chat_response( + model_iden: ModelIden, + web_response: WebResponse, + options_set: ChatOptionsSet<'_, '_>, + ) -> Result { + OpenAIAdapter::to_chat_response(model_iden, web_response, options_set) + } + + fn to_chat_stream( + model_iden: ModelIden, + reqwest_builder: RequestBuilder, + options_set: ChatOptionsSet<'_, '_>, + ) -> Result { + OpenAIAdapter::to_chat_stream(model_iden, reqwest_builder, options_set) + } + + fn to_embed_request_data( + service_target: crate::ServiceTarget, + embed_req: crate::embed::EmbedRequest, + options_set: crate::embed::EmbedOptionsSet<'_, '_>, + ) -> Result { + OpenAIAdapter::to_embed_request_data(service_target, embed_req, options_set) + } + + fn to_embed_response( + model_iden: crate::ModelIden, + web_response: crate::webc::WebResponse, + options_set: crate::embed::EmbedOptionsSet<'_, '_>, + ) -> Result { + OpenAIAdapter::to_embed_response(model_iden, web_response, options_set) + } +} diff --git a/src/adapter/adapters/mimo/mod.rs b/src/adapter/adapters/mimo/mod.rs new file mode 100644 index 00000000..997f386a --- /dev/null +++ b/src/adapter/adapters/mimo/mod.rs @@ -0,0 +1,7 @@ +// region: --- Modules + +mod adapter_impl; + +pub use adapter_impl::*; + +// endregion: --- Modules diff --git a/src/adapter/adapters/mod.rs b/src/adapter/adapters/mod.rs index 41a79e83..8a20e1e0 100644 --- a/src/adapter/adapters/mod.rs +++ b/src/adapter/adapters/mod.rs @@ -7,6 +7,7 @@ pub(super) mod deepseek; pub(super) mod fireworks; pub(super) mod gemini; pub(super) mod groq; +pub(super) mod mimo; pub(super) mod nebius; pub(super) mod ollama; pub(super) mod openai; diff --git a/src/adapter/dispatcher.rs b/src/adapter/dispatcher.rs index 7feaa766..6524e212 100644 --- a/src/adapter/dispatcher.rs +++ b/src/adapter/dispatcher.rs @@ -1,4 +1,5 @@ use super::groq::GroqAdapter; +use crate::adapter::adapters::mimo::MimoAdapter; use crate::adapter::adapters::together::TogetherAdapter; use crate::adapter::adapters::zai::ZaiAdapter; use crate::adapter::anthropic::AnthropicAdapter; @@ -38,6 +39,7 @@ impl AdapterDispatcher { AdapterKind::Fireworks => FireworksAdapter::default_endpoint(), AdapterKind::Together => TogetherAdapter::default_endpoint(), AdapterKind::Groq => GroqAdapter::default_endpoint(), + AdapterKind::Mimo => MimoAdapter::default_endpoint(), AdapterKind::Nebius => NebiusAdapter::default_endpoint(), AdapterKind::Xai => XaiAdapter::default_endpoint(), AdapterKind::DeepSeek => DeepSeekAdapter::default_endpoint(), @@ -57,6 +59,7 @@ impl AdapterDispatcher { AdapterKind::Fireworks => FireworksAdapter::default_auth(), AdapterKind::Together => TogetherAdapter::default_auth(), AdapterKind::Groq => GroqAdapter::default_auth(), + AdapterKind::Mimo => MimoAdapter::default_auth(), AdapterKind::Nebius => NebiusAdapter::default_auth(), AdapterKind::Xai => XaiAdapter::default_auth(), AdapterKind::DeepSeek => DeepSeekAdapter::default_auth(), @@ -76,6 +79,7 @@ impl AdapterDispatcher { AdapterKind::Fireworks => FireworksAdapter::all_model_names(kind).await, AdapterKind::Together => TogetherAdapter::all_model_names(kind).await, AdapterKind::Groq => GroqAdapter::all_model_names(kind).await, + AdapterKind::Mimo => MimoAdapter::all_model_names(kind).await, AdapterKind::Nebius => NebiusAdapter::all_model_names(kind).await, AdapterKind::Xai => XaiAdapter::all_model_names(kind).await, AdapterKind::DeepSeek => DeepSeekAdapter::all_model_names(kind).await, @@ -95,6 +99,7 @@ impl AdapterDispatcher { AdapterKind::Fireworks => FireworksAdapter::get_service_url(model, service_type, endpoint), AdapterKind::Together => TogetherAdapter::get_service_url(model, service_type, endpoint), AdapterKind::Groq => GroqAdapter::get_service_url(model, service_type, endpoint), + AdapterKind::Mimo => MimoAdapter::get_service_url(model, service_type, endpoint), AdapterKind::Nebius => NebiusAdapter::get_service_url(model, service_type, endpoint), AdapterKind::Xai => XaiAdapter::get_service_url(model, service_type, endpoint), AdapterKind::DeepSeek => DeepSeekAdapter::get_service_url(model, service_type, endpoint), @@ -126,6 +131,7 @@ impl AdapterDispatcher { } AdapterKind::Together => TogetherAdapter::to_web_request_data(target, service_type, chat_req, options_set), AdapterKind::Groq => GroqAdapter::to_web_request_data(target, service_type, chat_req, options_set), + AdapterKind::Mimo => MimoAdapter::to_web_request_data(target, service_type, chat_req, options_set), AdapterKind::Nebius => NebiusAdapter::to_web_request_data(target, service_type, chat_req, options_set), AdapterKind::Xai => XaiAdapter::to_web_request_data(target, service_type, chat_req, options_set), AdapterKind::DeepSeek => DeepSeekAdapter::to_web_request_data(target, service_type, chat_req, options_set), @@ -149,6 +155,7 @@ impl AdapterDispatcher { AdapterKind::Fireworks => FireworksAdapter::to_chat_response(model_iden, web_response, options_set), AdapterKind::Together => TogetherAdapter::to_chat_response(model_iden, web_response, options_set), AdapterKind::Groq => GroqAdapter::to_chat_response(model_iden, web_response, options_set), + AdapterKind::Mimo => MimoAdapter::to_chat_response(model_iden, web_response, options_set), AdapterKind::Nebius => NebiusAdapter::to_chat_response(model_iden, web_response, options_set), AdapterKind::Xai => XaiAdapter::to_chat_response(model_iden, web_response, options_set), AdapterKind::DeepSeek => DeepSeekAdapter::to_chat_response(model_iden, web_response, options_set), @@ -175,6 +182,7 @@ impl AdapterDispatcher { AdapterKind::Fireworks => FireworksAdapter::to_chat_stream(model_iden, reqwest_builder, options_set), AdapterKind::Together => TogetherAdapter::to_chat_stream(model_iden, reqwest_builder, options_set), AdapterKind::Groq => GroqAdapter::to_chat_stream(model_iden, reqwest_builder, options_set), + AdapterKind::Mimo => MimoAdapter::to_chat_stream(model_iden, reqwest_builder, options_set), AdapterKind::Nebius => NebiusAdapter::to_chat_stream(model_iden, reqwest_builder, options_set), AdapterKind::Xai => XaiAdapter::to_chat_stream(model_iden, reqwest_builder, options_set), AdapterKind::DeepSeek => DeepSeekAdapter::to_chat_stream(model_iden, reqwest_builder, options_set), @@ -202,6 +210,7 @@ impl AdapterDispatcher { AdapterKind::Fireworks => FireworksAdapter::to_embed_request_data(target, embed_req, options_set), AdapterKind::Together => TogetherAdapter::to_embed_request_data(target, embed_req, options_set), AdapterKind::Groq => GroqAdapter::to_embed_request_data(target, embed_req, options_set), + AdapterKind::Mimo => MimoAdapter::to_embed_request_data(target, embed_req, options_set), AdapterKind::Nebius => NebiusAdapter::to_embed_request_data(target, embed_req, options_set), AdapterKind::Xai => XaiAdapter::to_embed_request_data(target, embed_req, options_set), AdapterKind::DeepSeek => DeepSeekAdapter::to_embed_request_data(target, embed_req, options_set), @@ -228,6 +237,7 @@ impl AdapterDispatcher { AdapterKind::Fireworks => FireworksAdapter::to_embed_response(model_iden, web_response, options_set), AdapterKind::Together => TogetherAdapter::to_embed_response(model_iden, web_response, options_set), AdapterKind::Groq => GroqAdapter::to_embed_response(model_iden, web_response, options_set), + AdapterKind::Mimo => MimoAdapter::to_embed_response(model_iden, web_response, options_set), AdapterKind::Nebius => NebiusAdapter::to_embed_response(model_iden, web_response, options_set), AdapterKind::Xai => XaiAdapter::to_embed_response(model_iden, web_response, options_set), AdapterKind::DeepSeek => DeepSeekAdapter::to_embed_response(model_iden, web_response, options_set), From f5d711c439935e2b9355aec138d616c629b373f4 Mon Sep 17 00:00:00 2001 From: Jeremy Chone Date: Sun, 28 Dec 2025 13:01:13 -0800 Subject: [PATCH 060/123] . test - antrophic minor fix (model name) --- tests/tests_p_anthropic.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/tests_p_anthropic.rs b/tests/tests_p_anthropic.rs index 10a11d71..26ee1412 100644 --- a/tests/tests_p_anthropic.rs +++ b/tests/tests_p_anthropic.rs @@ -164,7 +164,7 @@ async fn test_resolver_auth_ok() -> TestResult<()> { #[tokio::test] async fn test_list_models() -> TestResult<()> { - common_tests::common_test_list_models(AdapterKind::Anthropic, "claude-sonnet-4-5-20250929").await + common_tests::common_test_list_models(AdapterKind::Anthropic, "claude-sonnet-4-5").await } // endregion: --- List From b9f5ea9a32229232663bfc22f363ac944eb81904 Mon Sep 17 00:00:00 2001 From: Jeremy Chone Date: Sun, 28 Dec 2025 13:10:22 -0800 Subject: [PATCH 061/123] . update dependencies --- Cargo.toml | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 3c9f672f..797d5162 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -23,23 +23,23 @@ tokio-stream = "0.1" # -- Json serde = { version = "1", features = ["derive", "rc"] } # Opted to rc for Arc serialization serde_json = "1" -serde_with = "3.12.0" +serde_with = "3" # -- Web reqwest = {version = "0.12", default-features = false, features = ["json", "rustls-tls"]} reqwest-eventsource = "0.6" eventsource-stream = "0.2" bytes = "1.6" # -- File -base64 = "0.22.0" -mime_guess = "=2.0.5" +base64 = "0.22.0" +mime_guess = "2" # -- Others derive_more = { version = "2", features = ["from", "display"] } -value-ext = "0.1.2" +value-ext = "0.1.2" [dev-dependencies] -simple-fs = "0.8.0" +simple-fs = "0.9.2" tracing-subscriber = {version = "0.3", features = ["env-filter"]} -serial_test = "3.2.0" +serial_test = "3.2" base64 = "0.22.0" # Check for the latest version -bitflags = "2.8.0" -gcp_auth = "0.12.3" +bitflags = "2.8" +gcp_auth = "0.12" From 56cc7017bf7fb95b66685ecab55c180b8a5296b5 Mon Sep 17 00:00:00 2001 From: Nochum Sossonko <75810045+malyavi-nochum@users.noreply.github.com> Date: Mon, 29 Dec 2025 12:21:04 -0500 Subject: [PATCH 062/123] Fix Fireworks default depending on streaming (#109) --- src/adapter/adapters/fireworks/adapter_impl.rs | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/adapter/adapters/fireworks/adapter_impl.rs b/src/adapter/adapters/fireworks/adapter_impl.rs index 69653866..9c82c287 100644 --- a/src/adapter/adapters/fireworks/adapter_impl.rs +++ b/src/adapter/adapters/fireworks/adapter_impl.rs @@ -68,9 +68,13 @@ impl Adapter for FireworksAdapter { // NOTE: The `genai` strategy is to set a large max_tokens value, letting the model enforce its own lower limit by default to avoid unpleasant and confusing surprises. // Users can use [`ChatOptions`] to specify a specific max_tokens value. // NOTE: As mentioned in the Fireworks FAQ above, typically, for Fireworks-hosted models the top max_tokens is equal to the context window. - // Since, Qwen3 models are at 256k, so we will use this upper bound (without going to the 1M/10M of Llama 4). + // Since, Qwen3 models are at 256k, so we will use this upper bound (without going to the 1M/10M of Llama 4) for non-streaming. + // However, since anything over 5k requires streaming API, we cap the default to 5k for non-streaming here so that the request doesn't fail. let custom = ToWebRequestCustom { - default_max_tokens: Some(256_000), + default_max_tokens: match service_type { + ServiceType::ChatStream => Some(256_000), + _ => Some(5_000), + }, }; OpenAIAdapter::util_to_web_request_data(target, service_type, chat_req, chat_options, Some(custom)) From 7d5ba1275fd286f9d321b92b862bdb6ed459c9f7 Mon Sep 17 00:00:00 2001 From: wenmou <99769916+mengdehong@users.noreply.github.com> Date: Tue, 30 Dec 2025 06:46:36 +0800 Subject: [PATCH 063/123] Fix Ollama reasoning streaming (#108) * Fix Ollama reasoning streaming * Skip empty reasoning chunks in streaming --- src/adapter/adapters/openai/streamer.rs | 68 ++++++++++++++--------- tests/tests_p_ollama_reasoning.rs | 71 ++++++++++++++++++++++++- 2 files changed, 112 insertions(+), 27 deletions(-) diff --git a/src/adapter/adapters/openai/streamer.rs b/src/adapter/adapters/openai/streamer.rs index 026d11c6..3c1da288 100644 --- a/src/adapter/adapters/openai/streamer.rs +++ b/src/adapter/adapters/openai/streamer.rs @@ -222,37 +222,53 @@ impl futures::Stream for OpenAIStreamer { // No valid tool call found, continue to next message continue; } - // -- Content - // If there is no finish_reason but there is some content, we can get the delta content and send the Internal Stream Event - else if let Ok(Some(content)) = first_choice.x_take::>("/delta/content") { - // Add to the captured_content if chat options allow it - if self.options.capture_content { - match self.captured_data.content { - Some(ref mut c) => c.push_str(&content), - None => self.captured_data.content = Some(content.clone()), + // -- Content / Reasoning Content + // Some providers (e.g., Ollama) emit reasoning in `delta.reasoning` and send empty content. + else { + let content = first_choice + .x_take::>("/delta/content") + .ok() + .flatten(); + let reasoning_content = first_choice + .x_take::>("/delta/reasoning_content") + .ok() + .flatten() + .or_else(|| { + first_choice + .x_take::>("/delta/reasoning") + .ok() + .flatten() + }); + + if let Some(content) = content + && !content.is_empty() + { + // Add to the captured_content if chat options allow it + if self.options.capture_content { + match self.captured_data.content { + Some(ref mut c) => c.push_str(&content), + None => self.captured_data.content = Some(content.clone()), + } } - } - // Return the Event - return Poll::Ready(Some(Ok(InterStreamEvent::Chunk(content)))); - } - // -- Reasoning Content - else if let Ok(Some(reasoning_content)) = - first_choice.x_take::>("/delta/reasoning_content") - { - // Add to the captured_content if chat options allow it - if self.options.capture_reasoning_content { - match self.captured_data.reasoning_content { - Some(ref mut c) => c.push_str(&reasoning_content), - None => self.captured_data.reasoning_content = Some(reasoning_content.clone()), + // Return the Event + return Poll::Ready(Some(Ok(InterStreamEvent::Chunk(content)))); + } else if let Some(reasoning_content) = reasoning_content + && !reasoning_content.is_empty() + { + // Add to the captured_content if chat options allow it + if self.options.capture_reasoning_content { + match self.captured_data.reasoning_content { + Some(ref mut c) => c.push_str(&reasoning_content), + None => self.captured_data.reasoning_content = Some(reasoning_content.clone()), + } } + + // Return the Event + return Poll::Ready(Some(Ok(InterStreamEvent::ReasoningChunk(reasoning_content)))); } - // Return the Event - return Poll::Ready(Some(Ok(InterStreamEvent::ReasoningChunk(reasoning_content)))); - } - // If we do not have content, then log a trace message - else { + // If we do not have content, then log a trace message // TODO: use tracing debug tracing::warn!("EMPTY CHOICE CONTENT"); } diff --git a/tests/tests_p_ollama_reasoning.rs b/tests/tests_p_ollama_reasoning.rs index bc19489e..7a981712 100644 --- a/tests/tests_p_ollama_reasoning.rs +++ b/tests/tests_p_ollama_reasoning.rs @@ -1,9 +1,12 @@ mod support; -use crate::support::{TestResult, common_tests}; +use crate::support::{TestResult, common_tests, seed_chat_req_simple}; use genai::adapter::AdapterKind; +use genai::chat::ChatStreamEvent; +use genai::Client; use genai::resolver::AuthData; use serial_test::serial; +use tokio_stream::StreamExt; // NOTE: Sometimes the 1.5b model does not provide the reasoning or has some issues. // Rerunning the test or switching to the 8b model would generally solve the issues. @@ -11,6 +14,7 @@ use serial_test::serial; // NOTE: Also, #[serial(ollama)] seems more reliable when using it. const MODEL: &str = "deepseek-r1:1.5b"; // "deepseek-r1:8b" "deepseek-r1:1.5b" +const MODEL_QWEN3: &str = "qwen3:4b"; // region: --- Chat @@ -69,6 +73,71 @@ async fn test_chat_stream_capture_content_ok() -> TestResult<()> { common_tests::common_test_chat_stream_capture_content_ok(MODEL).await } +#[tokio::test] +#[serial(ollama)] +async fn test_chat_stream_reasoning_chunk_ok() -> TestResult<()> { + let client = Client::default(); + let chat_req = seed_chat_req_simple(); + + let chat_res = client.exec_chat_stream(MODEL_QWEN3, chat_req, None).await?; + let mut stream = chat_res.stream; + let mut reasoning_content = String::new(); + + while let Some(result) = stream.next().await { + match result? { + ChatStreamEvent::ReasoningChunk(chunk) => { + reasoning_content.push_str(&chunk.content); + break; + } + ChatStreamEvent::End(_) => break, + _ => {} + } + } + assert!( + !reasoning_content.is_empty(), + "reasoning_content should not be empty" + ); + + Ok(()) +} + +#[tokio::test] +#[serial(ollama)] +async fn test_chat_stream_non_empty_chunk_deepseek_ok() -> TestResult<()> { + let client = Client::default(); + let chat_req = seed_chat_req_simple(); + + let chat_res = client.exec_chat_stream(MODEL, chat_req, None).await?; + let mut stream = chat_res.stream; + let mut found_non_empty = false; + + while let Some(result) = stream.next().await { + match result? { + ChatStreamEvent::Chunk(chunk) => { + if !chunk.content.is_empty() { + found_non_empty = true; + break; + } + } + ChatStreamEvent::ReasoningChunk(chunk) => { + if !chunk.content.is_empty() { + found_non_empty = true; + break; + } + } + ChatStreamEvent::End(_) => break, + _ => {} + } + } + + assert!( + found_non_empty, + "stream should yield non-empty content or reasoning chunks" + ); + + Ok(()) +} + // /// COMMENTED FOR NOW AS OLLAMA OpenAI Compatibility Layer does not support // /// usage tokens when streaming. See https://github.com/ollama/ollama/issues/4448 // #[tokio::test] From e63d4648832b5d061840a1e26644504f0ce1f893 Mon Sep 17 00:00:00 2001 From: Himmelschmidt <46351743+Himmelschmidt@users.noreply.github.com> Date: Mon, 29 Dec 2025 17:52:55 -0500 Subject: [PATCH 064/123] Fix Gemini adapter to use responseJsonSchema (#111) Use responseJsonSchema instead of deprecated responseSchema to support full JSON Schema spec including nullable types. --- src/adapter/adapters/gemini/adapter_impl.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/adapter/adapters/gemini/adapter_impl.rs b/src/adapter/adapters/gemini/adapter_impl.rs index eb3b54c8..710a4d26 100644 --- a/src/adapter/adapters/gemini/adapter_impl.rs +++ b/src/adapter/adapters/gemini/adapter_impl.rs @@ -193,7 +193,7 @@ impl Adapter for GeminiAdapter { } true }); - payload.x_insert("/generationConfig/responseSchema", schema)?; + payload.x_insert("/generationConfig/responseJsonSchema", schema)?; } // -- Add supported ChatOptions From 093f6e14e5e9496fdd10ab43216c988dc66a24f4 Mon Sep 17 00:00:00 2001 From: Jeremy Chone Date: Mon, 29 Dec 2025 15:11:37 -0800 Subject: [PATCH 065/123] . update bug report --- .github/ISSUE_TEMPLATE/bug_report.yaml | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/.github/ISSUE_TEMPLATE/bug_report.yaml b/.github/ISSUE_TEMPLATE/bug_report.yaml index 3ed194d5..5d643e46 100644 --- a/.github/ISSUE_TEMPLATE/bug_report.yaml +++ b/.github/ISSUE_TEMPLATE/bug_report.yaml @@ -1,6 +1,6 @@ name: Bug Report description: File a bug report. -title: "🐛 " +title: "bug - " labels: ["bug"] body: - type: markdown @@ -9,19 +9,21 @@ body: Thanks for taking the time to fill out this bug report! - type: textarea attributes: - label: Bug description + label: Bug description & expected description: Describe the bug placeholder: Ran {this}, did {that}, expected {the other} - validations: + validations: required: true + - type: input + attributes: + label: Narrow cargo test + description: If possible, give the narrow cargo test to show the error + placeholder: cargo test --test tests_p_openai test_tool_full_flow_ok - type: input attributes: label: Adapter - description: The AdapterKind if known + description: The AdapterKind if known - type: input attributes: label: Model description: The Model name if known - - type: textarea - attributes: - label: Suggested Resolution \ No newline at end of file From 318bcfeecfb1e9312f7dff1fec1014919c29c9a4 Mon Sep 17 00:00:00 2001 From: Jeremy Chone Date: Mon, 29 Dec 2025 15:36:20 -0800 Subject: [PATCH 066/123] > openai/streamer - refactor / replace direct array indexing with .get(..) and if-let-chain to prevent panics --- src/adapter/adapters/openai/streamer.rs | 61 ++++++++++--------------- 1 file changed, 23 insertions(+), 38 deletions(-) diff --git a/src/adapter/adapters/openai/streamer.rs b/src/adapter/adapters/openai/streamer.rs index 3c1da288..d6e90c07 100644 --- a/src/adapter/adapters/openai/streamer.rs +++ b/src/adapter/adapters/openai/streamer.rs @@ -160,10 +160,10 @@ impl futures::Stream for OpenAIStreamer { { // Check if there's a tool call in the delta if let Some(delta_tool_calls) = delta_tool_calls.as_array() - && !delta_tool_calls.is_empty() + && let Some(tool_call_obj_val) = delta_tool_calls.get(0) { // Extract the first tool call object as a mutable value - let mut tool_call_obj = delta_tool_calls[0].clone(); + let mut tool_call_obj = tool_call_obj_val.clone(); // Extract tool call data if let (Ok(index), Ok(mut function)) = ( @@ -177,41 +177,34 @@ impl futures::Stream for OpenAIStreamer { let arguments = function.x_take::("arguments").unwrap_or_default(); // Don't parse yet - accumulate as string first let mut tool_call = crate::chat::ToolCall { - call_id, - fn_name, + call_id: call_id.clone(), + fn_name: fn_name.clone(), fn_arguments: serde_json::Value::String(arguments.clone()), thought_signatures: None, }; // Capture the tool call if enabled if self.options.capture_tool_calls { - match &mut self.captured_data.tool_calls { - Some(calls) => { - self.captured_data.tool_calls = Some({ - // Accumulate arguments as strings, don't parse until complete - let accumulated = if let Some(existing) = - calls[index as usize].fn_arguments.as_str() - { - format!("{}{}", existing, arguments) - } else { - arguments.clone() - }; - - // Store as string (will be parsed at stream end) - calls[index as usize].fn_arguments = - serde_json::Value::String(accumulated); + let calls = self.captured_data.tool_calls.get_or_insert_with(Vec::new); + let idx = index as usize; - // Update call_id and fn_name on first chunk - if !tool_call.fn_name.is_empty() { - calls[index as usize].call_id = tool_call.call_id.clone(); - calls[index as usize].fn_name = tool_call.fn_name.clone(); - } + if let Some(call) = calls.get_mut(idx) { + // Accumulate arguments as strings, don't parse until complete + if let Some(existing) = call.fn_arguments.as_str() { + let accumulated = format!("{existing}{arguments}"); + call.fn_arguments = Value::String(accumulated); + } - tool_call = calls[index as usize].clone(); - calls.to_vec() - }) + // Update call_id and fn_name on first chunk that has them + if !fn_name.is_empty() { + call.call_id = call_id.clone(); + call.fn_name = fn_name.clone(); } - None => self.captured_data.tool_calls = Some(vec![tool_call.clone()]), + tool_call = call.clone(); + } else { + // If it doesn't exist, we add it. + // We use resize to handle potential gaps (though unlikely in streaming). + calls.resize(idx + 1, tool_call.clone()); } } @@ -225,20 +218,12 @@ impl futures::Stream for OpenAIStreamer { // -- Content / Reasoning Content // Some providers (e.g., Ollama) emit reasoning in `delta.reasoning` and send empty content. else { - let content = first_choice - .x_take::>("/delta/content") - .ok() - .flatten(); + let content = first_choice.x_take::>("/delta/content").ok().flatten(); let reasoning_content = first_choice .x_take::>("/delta/reasoning_content") .ok() .flatten() - .or_else(|| { - first_choice - .x_take::>("/delta/reasoning") - .ok() - .flatten() - }); + .or_else(|| first_choice.x_take::>("/delta/reasoning").ok().flatten()); if let Some(content) = content && !content.is_empty() From c1a8a2ceeb3cf6c1ec94dffe0fedb58a3b0e0f36 Mon Sep 17 00:00:00 2001 From: Jeremy Chone Date: Tue, 30 Dec 2025 08:59:01 -0800 Subject: [PATCH 067/123] . v0.5.0-alpha.6 --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 797d5162..9fd7c462 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "genai" -version = "0.5.0-alpha.6-WIP" +version = "0.5.0-alpha.6" edition = "2024" license = "MIT OR Apache-2.0" description = "Multi-AI Providers Library for Rust. (OpenAI, Gemini, Anthropic, xAI, Ollama, Groq, DeepSeek, Grok)" From 88c1518ea47931fd381458d721d8bd7f8f3b5f98 Mon Sep 17 00:00:00 2001 From: Jeremy Chone Date: Tue, 30 Dec 2025 08:59:50 -0800 Subject: [PATCH 068/123] . v0.5.0-alpha.7-WIP --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 9fd7c462..5ad25440 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "genai" -version = "0.5.0-alpha.6" +version = "0.5.0-alpha.7-WIP" edition = "2024" license = "MIT OR Apache-2.0" description = "Multi-AI Providers Library for Rust. (OpenAI, Gemini, Anthropic, xAI, Ollama, Groq, DeepSeek, Grok)" From 6c82d99bc9b611e7fa6727a18c3f357609341703 Mon Sep 17 00:00:00 2001 From: Jeremy Chone Date: Thu, 1 Jan 2026 16:44:46 -0800 Subject: [PATCH 069/123] > webc - remove 'reqwest-eventsource' dependency, all based in same WebStream (EventsourceStream wrapper) --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 5ad25440..443c3960 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -25,7 +25,7 @@ serde = { version = "1", features = ["derive", "rc"] } # Opted to rc for Arc serde_json = "1" serde_with = "3" # -- Web -reqwest = {version = "0.12", default-features = false, features = ["json", "rustls-tls"]} +reqwest = {version = "0.12", default-features = false, features = ["json", "rustls-tls", "stream"]} reqwest-eventsource = "0.6" eventsource-stream = "0.2" bytes = "1.6" From 139e46c30e231423d370310be128427a38c0b5c7 Mon Sep 17 00:00:00 2001 From: Jeremy Chone Date: Thu, 1 Jan 2026 17:27:52 -0800 Subject: [PATCH 070/123] ^ adapter_openai - switched to custom webc::EventSourceStream based on WebStream --- src/adapter/adapters/openai/adapter_impl.rs | 5 +- src/adapter/adapters/openai/streamer.rs | 12 +-- .../adapters/openai_resp/adapter_impl.rs | 5 +- src/webc/event_source_stream.rs | 86 +++++++++++++++++++ src/webc/mod.rs | 3 +- 5 files changed, 99 insertions(+), 12 deletions(-) create mode 100644 src/webc/event_source_stream.rs diff --git a/src/adapter/adapters/openai/adapter_impl.rs b/src/adapter/adapters/openai/adapter_impl.rs index 552e9081..18efb72d 100644 --- a/src/adapter/adapters/openai/adapter_impl.rs +++ b/src/adapter/adapters/openai/adapter_impl.rs @@ -7,11 +7,10 @@ use crate::chat::{ ChatStreamResponse, ContentPart, MessageContent, ReasoningEffort, ToolCall, Usage, }; use crate::resolver::{AuthData, Endpoint}; -use crate::webc::WebResponse; +use crate::webc::{EventSourceStream, WebResponse}; use crate::{Error, Headers, Result}; use crate::{ModelIden, ServiceTarget}; use reqwest::RequestBuilder; -use reqwest_eventsource::EventSource; use serde::Deserialize; use serde_json::{Value, json}; use tracing::error; @@ -146,7 +145,7 @@ impl Adapter for OpenAIAdapter { reqwest_builder: RequestBuilder, options_sets: ChatOptionsSet<'_, '_>, ) -> Result { - let event_source = EventSource::new(reqwest_builder)?; + let event_source = EventSourceStream::new(reqwest_builder); let openai_stream = OpenAIStreamer::new(event_source, model_iden.clone(), options_sets); let chat_stream = ChatStream::from_inter_stream(openai_stream); diff --git a/src/adapter/adapters/openai/streamer.rs b/src/adapter/adapters/openai/streamer.rs index d6e90c07..ac3eef96 100644 --- a/src/adapter/adapters/openai/streamer.rs +++ b/src/adapter/adapters/openai/streamer.rs @@ -3,15 +3,15 @@ use crate::adapter::adapters::support::{StreamerCapturedData, StreamerOptions}; use crate::adapter::inter_stream::{InterStreamEnd, InterStreamEvent}; use crate::adapter::openai::OpenAIAdapter; use crate::chat::{ChatOptionsSet, ToolCall}; +use crate::webc::{Event, EventSourceStream}; use crate::{Error, ModelIden, Result}; -use reqwest_eventsource::{Event, EventSource}; use serde_json::Value; use std::pin::Pin; use std::task::{Context, Poll}; use value_ext::JsonValueExt; pub struct OpenAIStreamer { - inner: EventSource, + inner: EventSourceStream, options: StreamerOptions, // -- Set by the poll_next @@ -21,8 +21,7 @@ pub struct OpenAIStreamer { } impl OpenAIStreamer { - // TODO: Problem - need the ChatOptions `.capture_content` and `.capture_usage` - pub fn new(inner: EventSource, model_iden: ModelIden, options_set: ChatOptionsSet<'_, '_>) -> Self { + pub fn new(inner: EventSourceStream, model_iden: ModelIden, options_set: ChatOptionsSet<'_, '_>) -> Self { Self { inner, done: false, @@ -277,7 +276,10 @@ impl futures::Stream for OpenAIStreamer { } Some(Err(err)) => { tracing::error!("Error: {}", err); - return Poll::Ready(Some(Err(Error::ReqwestEventSource(err.into())))); + return Poll::Ready(Some(Err(Error::WebStream { + model_iden: self.options.model_iden.clone(), + cause: err.to_string(), + }))); } None => { return Poll::Ready(None); diff --git a/src/adapter/adapters/openai_resp/adapter_impl.rs b/src/adapter/adapters/openai_resp/adapter_impl.rs index 45311190..46b62b13 100644 --- a/src/adapter/adapters/openai_resp/adapter_impl.rs +++ b/src/adapter/adapters/openai_resp/adapter_impl.rs @@ -7,11 +7,10 @@ use crate::chat::{ ContentPart, MessageContent, ReasoningEffort, Usage, }; use crate::resolver::{AuthData, Endpoint}; -use crate::webc::WebResponse; +use crate::webc::{EventSourceStream, WebResponse}; use crate::{Error, Headers, Result}; use crate::{ModelIden, ServiceTarget}; use reqwest::RequestBuilder; -use reqwest_eventsource::EventSource; use serde_json::{Map, Value, json}; use value_ext::JsonValueExt; @@ -247,7 +246,7 @@ impl Adapter for OpenAIRespAdapter { reqwest_builder: RequestBuilder, options_sets: ChatOptionsSet<'_, '_>, ) -> Result { - let event_source = EventSource::new(reqwest_builder)?; + let event_source = EventSourceStream::new(reqwest_builder); let openai_stream = OpenAIStreamer::new(event_source, model_iden.clone(), options_sets); let chat_stream = ChatStream::from_inter_stream(openai_stream); diff --git a/src/webc/event_source_stream.rs b/src/webc/event_source_stream.rs new file mode 100644 index 00000000..1ae1dedb --- /dev/null +++ b/src/webc/event_source_stream.rs @@ -0,0 +1,86 @@ +use crate::webc::WebStream; +use futures::Stream; +use reqwest::RequestBuilder; +use std::pin::Pin; +use std::task::{Context, Poll}; + +/// Simple EventSource stream implementation that uses WebStream as a foundation. +pub struct EventSourceStream { + inner: WebStream, + opened: bool, +} + +#[derive(Debug)] +pub enum Event { + Open, + Message(Message), +} + +#[derive(Debug)] +pub struct Message { + pub data: String, +} + +impl EventSourceStream { + pub fn new(reqwest_builder: RequestBuilder) -> Self { + // Standard EventSource uses \n\n as event separator + Self { + inner: WebStream::new_with_delimiter(reqwest_builder, "\n\n"), + opened: false, + } + } +} + +impl Stream for EventSourceStream { + type Item = Result>; + + fn poll_next(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll> { + let this = self.get_mut(); + + // -- 1. Handle initial "Open" event + if !this.opened { + this.opened = true; + return Poll::Ready(Some(Ok(Event::Open))); + } + + // -- 2. Poll the inner WebStream for next event block + loop { + let nx = Pin::new(&mut this.inner).poll_next(cx); + + match nx { + Poll::Ready(Some(Ok(raw_event))) => { + println!("->> {raw_event}"); + let mut data = String::new(); + for line in raw_event.lines() { + let line = line.trim(); + // Skip empty lines or comments (starting with :) + if line.is_empty() || line.starts_with(':') { + continue; + } + + // We only care about "data:" lines for now + if let Some(d) = line.strip_prefix("data:") { + if !data.is_empty() { + data.push('\n'); + } + data.push_str(d.trim()); + } + } + + // If no data found in this block, poll for the next one + if data.is_empty() { + continue; + } + + return Poll::Ready(Some(Ok(Event::Message(Message { data })))); + } + Poll::Ready(Some(Err(e))) => { + // Convert Box to Box + return Poll::Ready(Some(Err(e.to_string().into()))); + } + Poll::Ready(None) => return Poll::Ready(None), + Poll::Pending => return Poll::Pending, + } + } + } +} diff --git a/src/webc/mod.rs b/src/webc/mod.rs index 5c5e0d11..3cdc6d77 100644 --- a/src/webc/mod.rs +++ b/src/webc/mod.rs @@ -4,12 +4,13 @@ mod error; mod web_client; -// For when not using `text/event-stream` mod web_stream; +mod event_source_stream; pub(crate) use error::Result; pub(crate) use web_client::*; pub(crate) use web_stream::*; +pub(crate) use event_source_stream::*; // Only public for external use pub use error::Error; From b08af87e29abee7cdc2cb19e034e78bf4eb12660 Mon Sep 17 00:00:00 2001 From: Jeremy Chone Date: Thu, 1 Jan 2026 17:49:13 -0800 Subject: [PATCH 071/123] - webc - fix message loss in delimited WebStream --- src/webc/event_source_stream.rs | 1 - src/webc/web_stream.rs | 48 +++++++++++++++------------------ 2 files changed, 21 insertions(+), 28 deletions(-) diff --git a/src/webc/event_source_stream.rs b/src/webc/event_source_stream.rs index 1ae1dedb..77fa30c6 100644 --- a/src/webc/event_source_stream.rs +++ b/src/webc/event_source_stream.rs @@ -49,7 +49,6 @@ impl Stream for EventSourceStream { match nx { Poll::Ready(Some(Ok(raw_event))) => { - println!("->> {raw_event}"); let mut data = String::new(); for line in raw_event.lines() { let line = line.trim(); diff --git a/src/webc/web_stream.rs b/src/webc/web_stream.rs index 8cd01b23..44bdae01 100644 --- a/src/webc/web_stream.rs +++ b/src/webc/web_stream.rs @@ -288,33 +288,27 @@ fn process_buff_string_delimited( partial_message: &mut Option, delimiter: &str, ) -> Result { - let mut first_message: Option = None; - let mut candidate_message: Option = None; - let mut next_messages: Option> = None; - - let parts = buff_string.split(delimiter); - - for part in parts { - // If we already have a candidate, the candidate becomes the message - if let Some(candidate_message) = candidate_message.take() { - // If candidate is empty, we skip - if !candidate_message.is_empty() { - let message = candidate_message.to_string(); - if first_message.is_none() { - first_message = Some(message); - } else { - next_messages.get_or_insert_with(Vec::new).push(message); - } - } else { - continue; - } - } else { - // And then, this part becomes the candidate - if let Some(partial) = partial_message.take() { - candidate_message = Some(format!("{partial}{part}")); - } else { - candidate_message = Some(part.to_string()); - } + let full_string = if let Some(partial) = partial_message.take() { + format!("{partial}{buff_string}") + } else { + buff_string + }; + + let mut parts: Vec = full_string.split(delimiter).map(|s| s.to_string()).collect(); + + // The last part is the new partial (what's after the last delimiter) + let candidate_message = parts.pop(); + + // Filter out empty strings that result from multiple delimiters (e.g., \n\n\n\n) + let mut messages: Vec = parts.into_iter().filter(|s| !s.is_empty()).collect(); + + let mut first_message = None; + let mut next_messages = None; + + if !messages.is_empty() { + first_message = Some(messages.remove(0)); + if !messages.is_empty() { + next_messages = Some(messages); } } From d44b094958bb1977318e2b5a5f332a07dbaa9554 Mon Sep 17 00:00:00 2001 From: Jeremy Chone Date: Thu, 1 Jan 2026 18:00:17 -0800 Subject: [PATCH 072/123] ^ anthropic - Refactor streamer to use webc::EventSourceStream --- Cargo.toml | 1 - src/adapter/adapters/anthropic/adapter_impl.rs | 5 ++--- src/adapter/adapters/anthropic/streamer.rs | 11 +++++++---- src/error.rs | 7 ------- src/webc/error.rs | 4 ---- src/webc/event_source_stream.rs | 9 ++++++--- 6 files changed, 15 insertions(+), 22 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 443c3960..b846d931 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -26,7 +26,6 @@ serde_json = "1" serde_with = "3" # -- Web reqwest = {version = "0.12", default-features = false, features = ["json", "rustls-tls", "stream"]} -reqwest-eventsource = "0.6" eventsource-stream = "0.2" bytes = "1.6" # -- File diff --git a/src/adapter/adapters/anthropic/adapter_impl.rs b/src/adapter/adapters/anthropic/adapter_impl.rs index 7f6f5c74..bb08ce66 100644 --- a/src/adapter/adapters/anthropic/adapter_impl.rs +++ b/src/adapter/adapters/anthropic/adapter_impl.rs @@ -6,11 +6,10 @@ use crate::chat::{ ContentPart, MessageContent, PromptTokensDetails, ReasoningEffort, ToolCall, Usage, }; use crate::resolver::{AuthData, Endpoint}; -use crate::webc::WebResponse; +use crate::webc::{EventSourceStream, WebResponse}; use crate::{Headers, ModelIden}; use crate::{Result, ServiceTarget}; use reqwest::RequestBuilder; -use reqwest_eventsource::EventSource; use serde_json::{Value, json}; use tracing::warn; use value_ext::JsonValueExt; @@ -319,7 +318,7 @@ impl Adapter for AnthropicAdapter { reqwest_builder: RequestBuilder, options_set: ChatOptionsSet<'_, '_>, ) -> Result { - let event_source = EventSource::new(reqwest_builder)?; + let event_source = EventSourceStream::new(reqwest_builder); let anthropic_stream = AnthropicStreamer::new(event_source, model_iden.clone(), options_set); let chat_stream = ChatStream::from_inter_stream(anthropic_stream); Ok(ChatStreamResponse { diff --git a/src/adapter/adapters/anthropic/streamer.rs b/src/adapter/adapters/anthropic/streamer.rs index e1264872..552573a6 100644 --- a/src/adapter/adapters/anthropic/streamer.rs +++ b/src/adapter/adapters/anthropic/streamer.rs @@ -1,15 +1,15 @@ use crate::adapter::adapters::support::{StreamerCapturedData, StreamerOptions}; use crate::adapter::inter_stream::{InterStreamEnd, InterStreamEvent}; use crate::chat::{ChatOptionsSet, ToolCall, Usage}; +use crate::webc::{Event, EventSourceStream}; use crate::{Error, ModelIden, Result}; -use reqwest_eventsource::{Event, EventSource}; use serde_json::Value; use std::pin::Pin; use std::task::{Context, Poll}; use value_ext::JsonValueExt; pub struct AnthropicStreamer { - inner: EventSource, + inner: EventSourceStream, options: StreamerOptions, // -- Set by the poll_next @@ -27,7 +27,7 @@ enum InProgressBlock { } impl AnthropicStreamer { - pub fn new(inner: EventSource, model_iden: ModelIden, options_set: ChatOptionsSet<'_, '_>) -> Self { + pub fn new(inner: EventSourceStream, model_iden: ModelIden, options_set: ChatOptionsSet<'_, '_>) -> Self { Self { inner, done: false, @@ -196,7 +196,10 @@ impl futures::Stream for AnthropicStreamer { } Some(Err(err)) => { tracing::error!("Error: {}", err); - return Poll::Ready(Some(Err(Error::ReqwestEventSource(err.into())))); + return Poll::Ready(Some(Err(Error::WebStream { + model_iden: self.options.model_iden.clone(), + cause: err.to_string(), + }))); } None => return Poll::Ready(None), } diff --git a/src/error.rs b/src/error.rs index 745e1b04..91afa148 100644 --- a/src/error.rs +++ b/src/error.rs @@ -107,17 +107,10 @@ pub enum Error { Internal(String), // -- Externals - #[display("Failed to clone EventSource request: {_0}")] - #[from] - EventSourceClone(reqwest_eventsource::CannotCloneRequestError), - #[display("JSON value extension error: {_0}")] #[from] JsonValueExt(JsonValueExtError), - #[display("Reqwest EventSource error: {_0}")] - ReqwestEventSource(Box), - #[display("Serde JSON error: {_0}")] #[from] SerdeJson(serde_json::Error), diff --git a/src/webc/error.rs b/src/webc/error.rs index 24a0a6cd..b9a68d33 100644 --- a/src/webc/error.rs +++ b/src/webc/error.rs @@ -27,10 +27,6 @@ pub enum Error { #[display("Reqwest error: {_0}")] #[from] Reqwest(reqwest::Error), - - #[display("Failed to clone EventSource request: {_0}")] - #[from] - EventSourceClone(reqwest_eventsource::CannotCloneRequestError), } // region: --- Error Boilerplate diff --git a/src/webc/event_source_stream.rs b/src/webc/event_source_stream.rs index 77fa30c6..8cc04636 100644 --- a/src/webc/event_source_stream.rs +++ b/src/webc/event_source_stream.rs @@ -18,6 +18,7 @@ pub enum Event { #[derive(Debug)] pub struct Message { + pub event: String, pub data: String, } @@ -49,6 +50,7 @@ impl Stream for EventSourceStream { match nx { Poll::Ready(Some(Ok(raw_event))) => { + let mut event = "message".to_string(); let mut data = String::new(); for line in raw_event.lines() { let line = line.trim(); @@ -57,8 +59,9 @@ impl Stream for EventSourceStream { continue; } - // We only care about "data:" lines for now - if let Some(d) = line.strip_prefix("data:") { + if let Some(e) = line.strip_prefix("event:") { + event = e.trim().to_string(); + } else if let Some(d) = line.strip_prefix("data:") { if !data.is_empty() { data.push('\n'); } @@ -71,7 +74,7 @@ impl Stream for EventSourceStream { continue; } - return Poll::Ready(Some(Ok(Event::Message(Message { data })))); + return Poll::Ready(Some(Ok(Event::Message(Message { event, data })))); } Poll::Ready(Some(Err(e))) => { // Convert Box to Box From ad097aa194e3572856dd6e7d3229b30aaf1348a1 Mon Sep 17 00:00:00 2001 From: Jeremy Chone Date: Thu, 1 Jan 2026 18:08:05 -0800 Subject: [PATCH 073/123] . tests - added serial to cohere tests --- tests/tests_p_cohere.rs | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/tests/tests_p_cohere.rs b/tests/tests_p_cohere.rs index 4fed909a..fe581857 100644 --- a/tests/tests_p_cohere.rs +++ b/tests/tests_p_cohere.rs @@ -3,6 +3,7 @@ mod support; use crate::support::{TestResult, common_tests}; use genai::adapter::AdapterKind; use genai::resolver::AuthData; +use serial_test::serial; const MODEL: &str = "command-r7b-12-2024"; const MODEL_NS: &str = "cohere::command-r7b-12-2024"; @@ -10,21 +11,25 @@ const MODEL_NS: &str = "cohere::command-r7b-12-2024"; // region: --- Chat #[tokio::test] +#[serial(cohere)] async fn test_chat_simple_ok() -> TestResult<()> { common_tests::common_test_chat_simple_ok(MODEL, None).await } #[tokio::test] +#[serial(cohere)] async fn test_chat_namespaced_ok() -> TestResult<()> { common_tests::common_test_chat_simple_ok(MODEL_NS, None).await } #[tokio::test] +#[serial(cohere)] async fn test_chat_multi_system_ok() -> TestResult<()> { common_tests::common_test_chat_multi_system_ok(MODEL).await } #[tokio::test] +#[serial(cohere)] async fn test_chat_stop_sequences_ok() -> TestResult<()> { common_tests::common_test_chat_stop_sequences_ok(MODEL).await } @@ -34,6 +39,7 @@ async fn test_chat_stop_sequences_ok() -> TestResult<()> { // region: --- Chat Stream Tests #[tokio::test] +#[serial(cohere)] async fn test_chat_stream_simple_ok() -> TestResult<()> { common_tests::common_test_chat_stream_simple_ok(MODEL, None).await } @@ -41,16 +47,18 @@ async fn test_chat_stream_simple_ok() -> TestResult<()> { // NOTE 2024-06-23 - Occasionally, the last stream message sent by Cohere is malformed and cannot be parsed. // Will investigate further if requested. // #[tokio::test] +#[serial(cohere)] // async fn test_chat_stream_capture_content_ok() -> TestResult<()> { // common_tests::common_test_chat_stream_capture_content_ok(MODEL).await // } - #[tokio::test] +#[serial(cohere)] async fn test_chat_stream_capture_all_ok() -> TestResult<()> { common_tests::common_test_chat_stream_capture_all_ok(MODEL, None).await } #[tokio::test] +#[serial(cohere)] async fn test_chat_temperature_ok() -> TestResult<()> { common_tests::common_test_chat_temperature_ok(MODEL).await } @@ -60,6 +68,7 @@ async fn test_chat_temperature_ok() -> TestResult<()> { // region: --- Resolver Tests #[tokio::test] +#[serial(cohere)] async fn test_resolver_auth_ok() -> TestResult<()> { common_tests::common_test_resolver_auth_ok(MODEL, AuthData::from_env("COHERE_API_KEY")).await } @@ -69,6 +78,7 @@ async fn test_resolver_auth_ok() -> TestResult<()> { // region: --- List #[tokio::test] +#[serial(cohere)] async fn test_list_models() -> TestResult<()> { common_tests::common_test_list_models(AdapterKind::Cohere, "command-r-plus").await } From 702bc6df6702806314cc96aee1170ba26dc6a168 Mon Sep 17 00:00:00 2001 From: Jeremy Chone Date: Thu, 1 Jan 2026 18:19:23 -0800 Subject: [PATCH 074/123] . dependencies - update to reqwest '0.13', remove reqwest-eventsource --- Cargo.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index b846d931..4ec7d3c2 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -17,7 +17,7 @@ unsafe_code = "forbid" # -- Tracing tracing = { version = "0.1", features = ["default"] } # -- Async -tokio = { version = "1", features = ["macros","rt-multi-thread", "io-std", "test-util"]} +tokio = { version = "1", features = ["macros", "rt-multi-thread", "io-std", "test-util", "io-util"]} futures = "0.3" tokio-stream = "0.1" # -- Json @@ -25,7 +25,7 @@ serde = { version = "1", features = ["derive", "rc"] } # Opted to rc for Arc serde_json = "1" serde_with = "3" # -- Web -reqwest = {version = "0.12", default-features = false, features = ["json", "rustls-tls", "stream"]} +reqwest = {version = "0.13", default-features = false, features = ["json", "stream"]} eventsource-stream = "0.2" bytes = "1.6" # -- File From b9b8cf43a7d1d16452ab43bce6a8f293f78092f0 Mon Sep 17 00:00:00 2001 From: Jeremy Chone Date: Thu, 1 Jan 2026 19:55:39 -0800 Subject: [PATCH 075/123] - fix blocker - reqwest 0.13 should be used with default now (cherry picked from commit 3e4fbcfb835818cc1609511b54d7a6447fdb88ef) --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 4ec7d3c2..a8084d71 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -25,7 +25,7 @@ serde = { version = "1", features = ["derive", "rc"] } # Opted to rc for Arc serde_json = "1" serde_with = "3" # -- Web -reqwest = {version = "0.13", default-features = false, features = ["json", "stream"]} +reqwest = {version = "0.13", features = ["json", "stream"]} eventsource-stream = "0.2" bytes = "1.6" # -- File From d24a921251684c1405c52c41e7f8b1646bfced26 Mon Sep 17 00:00:00 2001 From: Jeremy Chone Date: Thu, 1 Jan 2026 20:09:24 -0800 Subject: [PATCH 076/123] . v0.5.0-alpha.9 --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index a8084d71..84db55fa 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "genai" -version = "0.5.0-alpha.7-WIP" +version = "0.5.0-alpha.9" edition = "2024" license = "MIT OR Apache-2.0" description = "Multi-AI Providers Library for Rust. (OpenAI, Gemini, Anthropic, xAI, Ollama, Groq, DeepSeek, Grok)" From 4872945440d945816f45a4512edbf2f0f14f94b6 Mon Sep 17 00:00:00 2001 From: Jeremy Chone Date: Thu, 1 Jan 2026 20:11:03 -0800 Subject: [PATCH 077/123] . 0.5.0-alpha.10-WIP --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 84db55fa..3cc61e04 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "genai" -version = "0.5.0-alpha.9" +version = "0.5.0-alpha.10-WIP" edition = "2024" license = "MIT OR Apache-2.0" description = "Multi-AI Providers Library for Rust. (OpenAI, Gemini, Anthropic, xAI, Ollama, Groq, DeepSeek, Grok)" From 425cf68f8a4ece03cea33b59aaf57008cf449f12 Mon Sep 17 00:00:00 2001 From: Michelle Tilley Date: Fri, 9 Jan 2026 11:28:07 -0800 Subject: [PATCH 078/123] fix: Anthropic ToolCalls with no parameters are not parsed correctly while streaming If Claude responds to a request with a tool call that does not require any parameters, it sends a set of SSE's that look like the following: ``` { "type": "content_block_start", "index": 0, "content_block": { "type": "tool_use", "id": "toolu_01K7Ko97mtSRWJCpKX5Fd6A6", "name": "tool_name", "input": {} } } { "type": "content_block_delta", "index": 0, "delta": { "type": "input_json_delta", "partial_json": "" } } ``` Previously, the streaming parser was only handling input from the `content_block_delta` event; this change adds handling to `content_block_start` to handle any input Claude may place here. --- src/adapter/adapters/anthropic/streamer.rs | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/src/adapter/adapters/anthropic/streamer.rs b/src/adapter/adapters/anthropic/streamer.rs index 552573a6..6ddccc89 100644 --- a/src/adapter/adapters/anthropic/streamer.rs +++ b/src/adapter/adapters/anthropic/streamer.rs @@ -3,7 +3,7 @@ use crate::adapter::inter_stream::{InterStreamEnd, InterStreamEvent}; use crate::chat::{ChatOptionsSet, ToolCall, Usage}; use crate::webc::{Event, EventSourceStream}; use crate::{Error, ModelIden, Result}; -use serde_json::Value; +use serde_json::{Map, Value}; use std::pin::Pin; use std::task::{Context, Poll}; use value_ext::JsonValueExt; @@ -73,10 +73,26 @@ impl futures::Stream for AnthropicStreamer { Ok("text") => self.in_progress_block = InProgressBlock::Text, Ok("thinking") => self.in_progress_block = InProgressBlock::Thinking, Ok("tool_use") => { + // If a tool call does not require any parameters, Claude may respond + // with an empty JSON object as the `input` of the content_start_block event, + // and then an empty string as the `input` of the following `content_block_delta` event. + let input = if let Ok(input) = data.x_get::("/content_block/input") { + input + } else if let Ok(obj) = data.x_take::>("/content_block/input") { + serde_json::to_string(&obj).map_err(|_| { + value_ext::JsonValueExtError::PropertyValueNotOfType { + name: "/content_block/input".to_string(), + not_of_type: "map", + } + })? + } else { + String::new() + }; + self.in_progress_block = InProgressBlock::ToolUse { id: data.x_take("/content_block/id")?, name: data.x_take("/content_block/name")?, - input: String::new(), + input, }; } Ok(txt) => { From 536248cc91aead006f983e87efd728e7e7cdb911 Mon Sep 17 00:00:00 2001 From: Michelle Tilley Date: Fri, 9 Jan 2026 14:12:29 -0800 Subject: [PATCH 079/123] Correct tool use parameter handling for parameter-less tool calls --- src/adapter/adapters/anthropic/streamer.rs | 26 +++++++--------------- 1 file changed, 8 insertions(+), 18 deletions(-) diff --git a/src/adapter/adapters/anthropic/streamer.rs b/src/adapter/adapters/anthropic/streamer.rs index 6ddccc89..3bd6ad8a 100644 --- a/src/adapter/adapters/anthropic/streamer.rs +++ b/src/adapter/adapters/anthropic/streamer.rs @@ -73,26 +73,10 @@ impl futures::Stream for AnthropicStreamer { Ok("text") => self.in_progress_block = InProgressBlock::Text, Ok("thinking") => self.in_progress_block = InProgressBlock::Thinking, Ok("tool_use") => { - // If a tool call does not require any parameters, Claude may respond - // with an empty JSON object as the `input` of the content_start_block event, - // and then an empty string as the `input` of the following `content_block_delta` event. - let input = if let Ok(input) = data.x_get::("/content_block/input") { - input - } else if let Ok(obj) = data.x_take::>("/content_block/input") { - serde_json::to_string(&obj).map_err(|_| { - value_ext::JsonValueExtError::PropertyValueNotOfType { - name: "/content_block/input".to_string(), - not_of_type: "map", - } - })? - } else { - String::new() - }; - self.in_progress_block = InProgressBlock::ToolUse { id: data.x_take("/content_block/id")?, name: data.x_take("/content_block/name")?, - input, + input: String::new(), }; } Ok(txt) => { @@ -148,10 +132,16 @@ impl futures::Stream for AnthropicStreamer { "content_block_stop" => { match std::mem::replace(&mut self.in_progress_block, InProgressBlock::Text) { InProgressBlock::ToolUse { id, name, input } => { + let fn_arguments = if input.is_empty() { + Value::Object(Map::new()) + } else { + serde_json::from_str(&input)? + }; + let tc = ToolCall { call_id: id, fn_name: name, - fn_arguments: serde_json::from_str(&input)?, + fn_arguments, thought_signatures: None, }; From c3f94f0068c0a05b2ede84e9dc93451a8c363144 Mon Sep 17 00:00:00 2001 From: Jeremy Chone Date: Fri, 9 Jan 2026 18:51:54 -0800 Subject: [PATCH 080/123] . update changelog --- CHANGELOG.md | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index aa600fd8..4915619a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,32 @@ `.` minor | `-` Fix | `+` Addition | `^` improvement | `!` Change | `*` Refactor +## 2026-01-09 - [v0.5.0](https://github.com/jeremychone/rust-genai/compare/v0.4.4...v0.5.0) + +- `!` zai - change namespace strategy with (zai:: for default, and zai-codding:: for subscription, same Adapter) +- `+` New Adapter: bigmodel - add back bigmodel.cn and BigModel adapter (only via namespace) +- `+` MessageContent - Add from ContentPart and Binary +- `+` New Adatper: : Add MIMO model adapter (#105) +- `+` gemini adapter - impl thought signature - ThoughtSignature api update +- `^` anthropic - implemented new output_config.effort for opus-4-5 (matching ReasonningEffort) +- `^` gemini - for gemini-3, convert ReasoningEffort Low/High to the appropriate gemini thinkingLevel LOW/HIGH, fall back on budget if not gemini 3 or other effort +- `^` reasoning - add RasoningEffort::None +- `^` dependency - update to reqwest 0.13 +- `^` MessageContent - add .binaries() and .into_binaries() +- `^` .size - implement .size in ContentPart and MessageContent +- `^` ContentPart - Binary from file (as base64) +- `^` binary - add constructors (from_base64, from_url, from_file) +- `-` pr-anthropic-tool-fix - #pr 114 - Anthropic ToolCalls with no parameters are not parsed correctly while streaming +- `-` Fix Gemini adapter to use responseJsonSchema (PR #111) +- `-` Fix Ollama reasoning streaming (Skip empty reasoning chunks in streaming) +- `-` Fix Fireworks default depending on streaming (#109) +- `-` Capture response body in ResponseFailedNotJson error (#103) +- `>` anthropic - Refactor streamer to use webc::EventSourceStream +- `>` adapter_openai - switched to custom webc::EventSourceStream based on WebStream +- `>` webc - remove 'reqwest-eventsource' dependency, all based in same WebStream (EventsourceStream wrapper) +- `>` ModelName - add namespace_is(..), namespace(), namespace_and_name() +- `>` binary - refactor openai to use into_url for the base64 url +- `>` content_part - refactor binary into own file + ## 2025-11-14 - [v0.4.4](https://github.com/jeremychone/rust-genai/compare/v0.4.3...v0.4.4) - `+` openai - adding support for gpt-5-pro (must be mapped to OpenaiResp adapter) From f4404aec9d7a886cd0a159d5a55caaa29639a312 Mon Sep 17 00:00:00 2001 From: Jeremy Chone Date: Fri, 9 Jan 2026 18:52:03 -0800 Subject: [PATCH 081/123] . cago fmt --- src/webc/mod.rs | 4 ++-- tests/tests_p_ollama_reasoning.rs | 7 ++----- 2 files changed, 4 insertions(+), 7 deletions(-) diff --git a/src/webc/mod.rs b/src/webc/mod.rs index 3cdc6d77..cb1a1480 100644 --- a/src/webc/mod.rs +++ b/src/webc/mod.rs @@ -3,14 +3,14 @@ // region: --- Modules mod error; +mod event_source_stream; mod web_client; mod web_stream; -mod event_source_stream; pub(crate) use error::Result; +pub(crate) use event_source_stream::*; pub(crate) use web_client::*; pub(crate) use web_stream::*; -pub(crate) use event_source_stream::*; // Only public for external use pub use error::Error; diff --git a/tests/tests_p_ollama_reasoning.rs b/tests/tests_p_ollama_reasoning.rs index 7a981712..eeda2911 100644 --- a/tests/tests_p_ollama_reasoning.rs +++ b/tests/tests_p_ollama_reasoning.rs @@ -1,9 +1,9 @@ mod support; use crate::support::{TestResult, common_tests, seed_chat_req_simple}; +use genai::Client; use genai::adapter::AdapterKind; use genai::chat::ChatStreamEvent; -use genai::Client; use genai::resolver::AuthData; use serial_test::serial; use tokio_stream::StreamExt; @@ -93,10 +93,7 @@ async fn test_chat_stream_reasoning_chunk_ok() -> TestResult<()> { _ => {} } } - assert!( - !reasoning_content.is_empty(), - "reasoning_content should not be empty" - ); + assert!(!reasoning_content.is_empty(), "reasoning_content should not be empty"); Ok(()) } From c895574594ce24b98ad3dd6d4b834d2b3046e667 Mon Sep 17 00:00:00 2001 From: Jeremy Chone Date: Fri, 9 Jan 2026 18:59:45 -0800 Subject: [PATCH 082/123] . doc - first pass at the api-reference-for-llm.md --- doc/for-llm/api-reference-for-llm.md | 171 +++++++++++++++++++++++++++ 1 file changed, 171 insertions(+) create mode 100644 doc/for-llm/api-reference-for-llm.md diff --git a/doc/for-llm/api-reference-for-llm.md b/doc/for-llm/api-reference-for-llm.md new file mode 100644 index 00000000..9fa2db31 --- /dev/null +++ b/doc/for-llm/api-reference-for-llm.md @@ -0,0 +1,171 @@ +# GenAI API Reference for LLMs + +Dry, concise reference for the `genai` library. + +## Core Concepts + +- **Client**: Main entry point (`genai::Client`). Thread-safe (`Arc` wrapper). +- **ModelIden**: `AdapterKind` + `ModelName`. Identifies which provider to use. +- **ServiceTarget**: Resolved `ModelIden`, `Endpoint`, and `AuthData`. +- **Resolvers**: Hooks to customize model mapping, authentication, and service endpoints. +- **AdapterKind**: Supported: `OpenAI`, `OpenAIResp`, `Gemini`, `Anthropic`, `Fireworks`, `Together`, `Groq`, `Mimo`, `Nebius`, `Xai`, `DeepSeek`, `Zai`, `BigModel`, `Cohere`, `Ollama`. + +## Client & Configuration + +### `Client` +- `Client::default()`: Standard client. +- `Client::builder()`: Returns `ClientBuilder`. +- `exec_chat(model, chat_req, options)`: Returns `ChatResponse`. +- `exec_chat_stream(model, chat_req, options)`: Returns `ChatStreamResponse`. +- `exec_embed(model, embed_req, options)`: Returns `EmbedResponse`. +- `embed(model, text, options)`: Convenience for single text embedding. +- `embed_batch(model, texts, options)`: Convenience for batch embedding. +- `resolve_service_target(model_name)`: Returns `ServiceTarget`. +- `all_model_names(adapter_kind)`: Returns a list of models for a provider (Ollama is dynamic). + +### `ClientBuilder` +- `with_auth_resolver(resolver)` / `with_auth_resolver_fn(f)`: Set sync/async auth lookup. +- `with_service_target_resolver(resolver)` / `with_service_target_resolver_fn(f)`: Full control over URL/Headers/Auth per call. +- `with_model_mapper(mapper)` / `with_model_mapper_fn(f)`: Map model names before execution. +- `with_chat_options(options)`: Set client-level default chat options. +- `with_web_config(web_config)`: Configure `reqwest` (timeouts, proxies, default headers). + +## Chat Request Structure + +### `ChatRequest` +- `system`: Initial system string (optional). +- `messages`: `Vec`. +- `tools`: `Vec` (optional). +- `from_system(text)`, `from_user(text)`, `from_messages(vec)`: Constructors. +- `append_message(msg)`: Adds a message to the sequence. +- `append_messages(iter)`: Adds multiple messages. +- `append_tool(tool)`: Adds a single tool definition. +- `append_tool_use_from_stream_end(end, tool_response)`: Simplifies tool-use loops by appending the assistant turn (with thoughts/tools) and the tool result. +- `join_systems()`: Concatenates all system content (top-level + system-role messages) into one string. + +### `ChatMessage` +- `role`: `System`, `User`, `Assistant`, `Tool`. +- `content`: `MessageContent` (multipart). +- `options`: `MessageOptions` (e.g., `cache_control: Ephemeral` for Anthropic). +- **Constructors**: `ChatMessage::system(text)`, `user(text)`, `assistant(text)`. +- **Tool Handoff**: `assistant_tool_calls_with_thoughts(calls, thoughts)` for continuing tool exchanges where thoughts must precede tool calls. + +### `MessageContent` (Multipart) +- Transparent wrapper for `Vec`. +- **Constructors**: `from_text(text)`, `from_parts(vec)`, `from_tool_calls(vec)`. +- **Methods**: `joined_texts()` (joins with blank line), `first_text()`, `prepend(part)`, `extend_front(parts)`. +- `ContentPart` variants: + - `Text(String)`: Plain text. + - `Binary(Binary)`: Images/PDFs/Audio. + - `ToolCall(ToolCall)`: Model-requested function call. + - `ToolResponse(ToolResponse)`: Result of function call. + - `ThoughtSignature(String)`: Reasoning/thoughts (e.g., Gemini/Anthropic). + +### `Binary` +- `content_type`: MIME (e.g., `image/jpeg`, `application/pdf`). +- `source`: `Url(String)` or `Base64(Arc)`. +- `from_file(path)`: Reads file and detects MIME. +- `is_image()`, `is_audio()`, `is_pdf()`: Type checks. +- `size()`: Approximate in-memory size in bytes. + +## Chat Options & Features + +### `ChatOptions` +- `temperature`, `max_tokens`, `top_p`. +- `stop_sequences`: `Vec`. +- `response_format`: `ChatResponseFormat::JsonMode` or `JsonSpec(name, schema)`. +- `reasoning_effort`: `Low`, `Medium`, `High`, `Budget(u32)`, `None`. +- `verbosity`: `Low`, `Medium`, `High` (e.g., for GPT-5). +- `normalize_reasoning_content`: Extract `` blocks into response field. +- `capture_usage`, `capture_content`, `capture_reasoning_content`, `capture_tool_calls`: (Streaming) Accumulate results in `StreamEnd`. +- `seed`: Deterministic generation. +- `service_tier`: `Flex`, `Auto`, `Default` (OpenAI). +- `extra_headers`: `Headers` added to the request. + +## Embedding + +### `EmbedRequest` +- `input`: `EmbedInput` (Single string or Batch `Vec`). + +### `EmbedOptions` +- `dimensions`, `encoding_format` ("float", "base64"). +- `user`, `truncate` ("NONE", "START", "END"). +- `embedding_type`: Provider specific (e.g., "search_document" for Cohere, "RETRIEVAL_QUERY" for Gemini). + +### `EmbedResponse` +- `embeddings`: `Vec` (contains `vector: Vec`, `index`, `dimensions`). +- `usage`: `Usage`. +- `model_iden`, `provider_model_iden`. + +## Tooling + +### `Tool` +- `name`, `description`, `schema` (JSON Schema). +- `config`: Optional provider-specific config. + +### `ToolCall` +- `call_id`, `fn_name`, `fn_arguments` (JSON `Value`). +- `thought_signatures`: Leading thoughts associated with the call (captured during streaming). + +### `ToolResponse` +- `call_id`, `content` (Result as string, usually JSON). + +## Responses & Streaming + +### `ChatResponse` +- `content`: `MessageContent`. +- `reasoning_content`: Extracted thoughts (if normalized). +- `usage`: `Usage`. +- `model_iden`, `provider_model_iden`. +- `first_text()`, `into_first_text()`, `tool_calls()`. + +### `ChatStream` +- Sequence of `ChatStreamEvent`: `Start`, `Chunk(text)`, `ReasoningChunk(text)`, `ThoughtSignatureChunk(text)`, `ToolCallChunk(ToolCall)`, `End(StreamEnd)`. + +### `StreamEnd` +- `captured_usage`: `Option`. +- `captured_content`: Concatenated `MessageContent` (text, tools, thoughts). +- `captured_reasoning_content`: Concatenated reasoning content. +- `captured_first_text()`, `captured_tool_calls()`, `captured_thought_signatures()`. +- `into_assistant_message_for_tool_use()`: Returns a `ChatMessage` ready for the next request in a tool-use flow. + +## Usage & Metadata + +### `Usage` +- `prompt_tokens`: Total input tokens. +- `completion_tokens`: Total output tokens. +- `total_tokens`: Sum of input and output. +- `prompt_tokens_details`: `cache_creation_tokens`, `cached_tokens`, `audio_tokens`. +- `completion_tokens_details`: `reasoning_tokens`, `audio_tokens`, `accepted_prediction_tokens`, `rejected_prediction_tokens`. + +## Resolvers & Auth + +### `AuthData` +- `Key(String)`: The API key. +- `FromEnv(String)`: Env var name to lookup. +- `RequestOverride { url, headers }`: For unorthodox auth or endpoint overrides (e.g., Vertex AI, Bedrock). + +### `AuthResolver` +- `from_resolver_fn(f)` / `from_resolver_async_fn(f)`. +- Resolves `AuthData` based on `ModelIden`. + +### `ServiceTargetResolver` +- `from_resolver_fn(f)` / `from_resolver_async_fn(f)`. +- Maps `ServiceTarget` (Model, Auth, Endpoint) to a final call target. + +### `Headers` +- `merge(overlay)`, `applied_to(target)`. +- Iteration and `From` conversions for `HashMap`, `Vec<(K,V)>`, etc. + +## Model Resolution Nuances + +- **Auto-detection**: `AdapterKind` inferred from name (e.g., `gpt-` -> `OpenAI`, `claude-` -> `Anthropic`, `gemini-` -> `Gemini`, `command` -> `Cohere`, `grok` -> `Xai`, `glm` -> `Zai`). +- **Namespacing**: `namespace::model_name` (e.g., `together::meta-llama/...`, `nebius::Qwen/...`). +- **Ollama Fallback**: Unrecognized names default to `Ollama` adapter (localhost:11434). +- **Reasoning**: Automatic extraction for DeepSeek/Ollama when `normalize_reasoning_content` is enabled. + +## Error Handling + +- `genai::Error`: Covers `ChatReqHasNoMessages`, `RequiresApiKey`, `WebModelCall`, `StreamParse`, `AdapterNotSupported`, `Resolver`, etc. +- `Result`: Alias for `core::result::Result`. +- `size()`: Many types implement `.size()` for approximate memory tracking. From fbe2bed4ec2a0bedff6b64a787eb91e7bfd89d30 Mon Sep 17 00:00:00 2001 From: Jeremy Chone Date: Fri, 9 Jan 2026 19:06:57 -0800 Subject: [PATCH 083/123] . update README.md --- README.md | 96 +++++++++++++++++++++++++++++++------------------------ 1 file changed, 54 insertions(+), 42 deletions(-) diff --git a/README.md b/README.md index d77a24b1..7c2d28d1 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # genai - Multi-AI Providers Library for Rust -Currently natively supports: **OpenAI**, **Anthropic**, **Gemini**, **XAI/Grok**, **Ollama**, **Groq**, **DeepSeek** (deepseek.com & Groq), **Cohere**, **Mimo** (more to come) +Currently natively supports: **OpenAI**, **Anthropic**, **Gemini**, **xAI**, **Ollama**, **Groq**, **DeepSeek**, **Cohere**, **Together**, **Fireworks**, **Nebius**, **Mimo**, **Zai** (Zhipu AI), **BigModel**. Also allows a custom URL with `ServiceTargetResolver` (see [examples/c06-target-resolver.rs](examples/c06-target-resolver.rs)) @@ -16,50 +16,57 @@ Also allows a custom URL with `ServiceTargetResolver` (see [examples/c06-target- Provides a single, ergonomic API to many generative AI providers, such as Anthropic, OpenAI, Gemini, xAI, Ollama, Groq, and more. -**NOTE:** Big update with **v0.4.x** - More adapters, PDF and image support, embeddings, custom headers, and transparent support for the OpenAI Responses API (gpt-5-codex) +**NOTE:** Big update with **v0.5.0** - New adapters (BigModel, MIMO), Gemini Thinking support, Anthropic Reasoning Effort, and a more robust internal streaming engine. -## v0.4.x Big Release +## v0.5.0 - (2026-01-09) - **What's new**: - - **PDF and Images** support (thanks to [Andrew Rademacher](https://github.com/AndrewRademacher)) - - **Embedding** support (thanks to [Jesus Santander](https://github.com/jsantanders)) - - **Custom Headers** support (for AWS Bedrock, Vertex, etc.) (thanks to [Adrien](https://github.com/XciD)/[Julien Chaumond](https://github.com/julien-c)) - - **Simpler, flatter `MessageContent`** multi-part format (API change) (thanks to [Andrew Rademacher](https://github.com/AndrewRademacher) for insights) - - **Raw body capture** with `ChatOptions::with_capture_raw_body(true)` (thanks to [4t145](https://github.com/4t145)) - - **Transparent gpt-5-codex support with the Responses API**, even if gpt-5-codex uses a new API protocol (OpenAI Responses API) + - **New Adapters**: BigModel.cn and MIMO model adapter (thanks to [Akagi201](https://github.com/Akagi201)). + - **zai - change namespace strategy** with (zai:: for default, and zai-codding:: for subscription, same Adapter) + - **Gemini Thinking & Thought**: Full support for Gemini Thought signatures (thanks to [Himmelschmidt](https://github.com/Himmelschmidt)) and thinking levels. + - **Reasoning Effort Control**: Support for `ReasoningEffort` for Anthropic (Claude 3.7/4.5) and Gemini (Thinking levels), including `ReasoningEffort::None`. + - **Content & Binary Improvements**: Enhanced binary/PDF API and size tracking. + - **Internal Stream Refactor**: Switched to a unified `EventSourceStream` and `WebStream` for better reliability and performance across all providers. + - **Dependency Upgrade**: Now using `reqwest 0.13`. - **What's still awesome**: - - Normalized and ergonomic Chat API across all providers - - Most providers built in (OpenAI, Gemini, Anthropic, xAI, Groq, Together.ai, Fireworks.ai, ...) - - Native protocol support for Gemini and Anthropic protocols, for example allowing full budget controls with Gemini models - - Can override auth, endpoint, and headers to connect to AWS Bedrock, Vertex AI, etc. + - Normalized and ergonomic Chat API across all major providers. + - Native protocol support for Gemini and Anthropic protocols (Reasoning/Thinking controls). + - PDF, Image, and Embedding support. + - Custom Auth, Endpoint, and Header overrides. See: - - [migration from v0.3 to v0.4](doc/migration/migration-v_0_3_to_0_4.md) - [CHANGELOG](CHANGELOG.md) ## Big Thanks to -- [Vagmi Mudumbai](https://github.com/vagmi)) for [#96](https://github.com/jeremychone/rust-genai/pull/96) openai audio_type -- [Himmelschmidt](https://github.com/Himmelschmidt) for [#98](https://github.com/jeremychone/rust-genai/pull/98) openai service_tier -- [Bart Carroll](https://github.com/bartCarroll) for [#91](https://github.com/jeremychone/rust-genai/pull/91) Fixed streaming tool calls for openai models -- [Rui Andrada](https://github.com/shingonoide) for [#95](https://github.com/jeremychone/rust-genai/pull/95) refactoring ZHIPU adapter to ZAI -- [Adrien](https://github.com/XciD) Extra headers in requests, seed for chat requests, and fixes (with [Julien Chaumond](https://github.com/julien-c) for extra headers) -- [Andrew Rademacher](https://github.com/AndrewRademacher) for PDF support, Anthropic streamer, and insight on flattening the message content (e.g., ContentParts) -- [Jesus Santander](https://github.com/jsantanders) Embedding support [PR #83](https://github.com/jeremychone/rust-genai/pull/83) -- [4t145](https://github.com/4t145) for raw body capture [PR #68](https://github.com/jeremychone/rust-genai/pull/68) -- [Vagmi Mudumbai](https://github.com/vagmi) exec_chat bug fix [PR #86](https://github.com/jeremychone/rust-genai/pull/86) -- [Maximilian Goisser](https://github.com/hobofan) Fix OpenAI adapter to use ServiceTarget -- [ClanceyLu](https://github.com/ClanceyLu) for Tool Use Streaming support, web configuration support, and fixes -- [@SilasMarvin](https://github.com/SilasMarvin) for fixing content/tools issues with some Ollama models [PR #55](https://github.com/jeremychone/rust-genai/pull/55) -- [@una-spirito](https://github.com/luna-spirito) for Gemini `ReasoningEffort::Budget` support -- [@jBernavaPrah](https://github.com/jBernavaPrah) for adding tracing (it was long overdue). [PR #45](https://github.com/jeremychone/rust-genai/pull/45) -- [@GustavoWidman](https://github.com/GustavoWidman) for the initial Gemini tool/function support! [PR #41](https://github.com/jeremychone/rust-genai/pull/41) -- [@AdamStrojek](https://github.com/AdamStrojek) for initial image support [PR #36](https://github.com/jeremychone/rust-genai/pull/36) -- [@semtexzv](https://github.com/semtexzv) for `stop_sequences` Anthropic support [PR #34](https://github.com/jeremychone/rust-genai/pull/34) -- [@omarshehab221](https://github.com/omarshehab221) for de/serialize on structs [PR #19](https://github.com/jeremychone/rust-genai/pull/19) -- [@tusharmath](https://github.com/tusharmath) for making webc::Error [PR #12](https://github.com/jeremychone/rust-genai/pull/12) -- [@giangndm](https://github.com/giangndm) for making stream Send [PR #10](https://github.com/jeremychone/rust-genai/pull/10) -- [@stargazing-dino](https://github.com/stargazing-dino) for [PR #2](https://github.com/jeremychone/rust-genai/pull/2) - implement Groq completions +- v0.5.x + - [BinaryMuse](https://github.com/BinaryMuse) for [#114](https://github.com/jeremychone/rust-genai/pull/114) Anthropic ToolCalls streaming fix + - [Himmelschmidt](https://github.com/Himmelschmidt) for [#111](https://github.com/jeremychone/rust-genai/pull/111) Gemini `responseJsonSchema` support, [#103](https://github.com/jeremychone/rust-genai/pull/103) error body capture, and Gemini Thought signatures + - [malyavi-nochum](https://github.com/malyavi-nochum) for [#109](https://github.com/jeremychone/rust-genai/pull/109) Fireworks default streaming fix + - [mengdehong](https://github.com/mengdehong) for [#108](https://github.com/jeremychone/rust-genai/pull/108) Ollama reasoning streaming fix + - [Akagi201](https://github.com/Akagi201) for [#105](https://github.com/jeremychone/rust-genai/pull/105) MIMO model adapter +- v0.1.x .. v0.4.x + - [Vagmi Mudumbai](https://github.com/vagmi) for [#96](https://github.com/jeremychone/rust-genai/pull/96) openai audio_type + - [Himmelschmidt](https://github.com/Himmelschmidt) for [#98](https://github.com/jeremychone/rust-genai/pull/98) openai service_tier + - [Bart Carroll](https://github.com/bartCarroll) for [#91](https://github.com/jeremychone/rust-genai/pull/91) Fixed streaming tool calls for openai models + - [Rui Andrada](https://github.com/shingonoide) for [#95](https://github.com/jeremychone/rust-genai/pull/95) refactoring ZHIPU adapter to ZAI + - [Adrien](https://github.com/XciD) Extra headers in requests, seed for chat requests, and fixes (with [Julien Chaumond](https://github.com/julien-c) for extra headers) + - [Andrew Rademacher](https://github.com/AndrewRademacher) for PDF support, Anthropic streamer + - [Jesus Santander](https://github.com/jsantanders) Embedding support [PR #83](https://github.com/jeremychone/rust-genai/pull/83) + - [4t145](https://github.com/4t145) for raw body capture [PR #68](https://github.com/jeremychone/rust-genai/pull/68) + - [Vagmi Mudumbai](https://github.com/vagmi) exec_chat bug fix [PR #86](https://github.com/jeremychone/rust-genai/pull/86) + - [Maximilian Goisser](https://github.com/hobofan) Fix OpenAI adapter to use ServiceTarget + - [ClanceyLu](https://github.com/ClanceyLu) for Tool Use Streaming support, web configuration support, and fixes + - [@SilasMarvin](https://github.com/SilasMarvin) for fixing content/tools issues with some Ollama models [PR #55](https://github.com/jeremychone/rust-genai/pull/55) + - [@una-spirito](https://github.com/luna-spirito) for Gemini `ReasoningEffort::Budget` support + - [@jBernavaPrah](https://github.com/jBernavaPrah) for adding tracing (it was long overdue). [PR #45](https://github.com/jeremychone/rust-genai/pull/45) + - [@GustavoWidman](https://github.com/GustavoWidman) for the initial Gemini tool/function support! [PR #41](https://github.com/jeremychone/rust-genai/pull/41) + - [@AdamStrojek](https://github.com/AdamStrojek) for initial image support [PR #36](https://github.com/jeremychone/rust-genai/pull/36) + - [@semtexzv](https://github.com/semtexzv) for `stop_sequences` Anthropic support [PR #34](https://github.com/jeremychone/rust-genai/pull/34) + - [@omarshehab221](https://github.com/omarshehab221) for de/serialize on structs [PR #19](https://github.com/jeremychone/rust-genai/pull/19) + - [@tusharmath](https://github.com/tusharmath) for making webc::Error [PR #12](https://github.com/jeremychone/rust-genai/pull/12) + - [@giangndm](https://github.com/giangndm) for making stream Send [PR #10](https://github.com/jeremychone/rust-genai/pull/10) + - [@stargazing-dino](https://github.com/stargazing-dino) for [PR #2](https://github.com/jeremychone/rust-genai/pull/2) - implement Groq completions ## Usage examples @@ -91,13 +98,16 @@ use genai::Client; const MODEL_OPENAI: &str = "gpt-4o-mini"; // o1-mini, gpt-4o-mini const MODEL_ANTHROPIC: &str = "claude-3-haiku-20240307"; -const MODEL_COHERE: &str = "command-light"; +// or namespaced with simple name "fireworks::qwen3-30b-a3b", or "fireworks::accounts/fireworks/models/qwen3-30b-a3b" +const MODEL_FIREWORKS: &str = "accounts/fireworks/models/qwen3-30b-a3b"; +const MODEL_TOGETHER: &str = "together::openai/gpt-oss-20b"; const MODEL_GEMINI: &str = "gemini-2.0-flash"; const MODEL_GROQ: &str = "llama-3.1-8b-instant"; const MODEL_OLLAMA: &str = "gemma:2b"; // sh: `ollama pull gemma:2b` -const MODEL_XAI: &str = "grok-beta"; +const MODEL_XAI: &str = "grok-3-mini"; const MODEL_DEEPSEEK: &str = "deepseek-chat"; -const MODEL_MIMO: &str = "mimo-v2-flash"; +const MODEL_ZAI: &str = "glm-4-plus"; +const MODEL_COHERE: &str = "command-r7b-12-2024"; // NOTE: These are the default environment keys for each AI Adapter Type. // They can be customized; see `examples/c02-auth.rs` @@ -105,13 +115,15 @@ const MODEL_AND_KEY_ENV_NAME_LIST: &[(&str, &str)] = &[ // -- De/activate models/providers (MODEL_OPENAI, "OPENAI_API_KEY"), (MODEL_ANTHROPIC, "ANTHROPIC_API_KEY"), - (MODEL_COHERE, "COHERE_API_KEY"), (MODEL_GEMINI, "GEMINI_API_KEY"), + (MODEL_FIREWORKS, "FIREWORKS_API_KEY"), + (MODEL_TOGETHER, "TOGETHER_API_KEY"), (MODEL_GROQ, "GROQ_API_KEY"), (MODEL_XAI, "XAI_API_KEY"), (MODEL_DEEPSEEK, "DEEPSEEK_API_KEY"), - (MODEL_MIMO, "MIMO_API_KEY"), (MODEL_OLLAMA, ""), + (MODEL_ZAI, "ZAI_API_KEY"), + (MODEL_COHERE, "COHERE_API_KEY"), ]; // NOTE: Model to AdapterKind (AI Provider) type mapping rule @@ -120,7 +132,7 @@ const MODEL_AND_KEY_ENV_NAME_LIST: &[(&str, &str)] = &[ // - starts_with "command" -> Cohere // - starts_with "gemini" -> Gemini // - model in Groq models -> Groq -// - starts_with "mimo" -> Mimo +// - starts_with "glm" -> ZAI // - For anything else -> Ollama // // This can be customized; see `examples/c03-mapper.rs` @@ -208,7 +220,7 @@ async fn main() -> Result<(), Box> { ## ChatOptions - **(1)** - **OpenAI-compatible** notes - - Models: OpenAI, DeepSeek, Groq, Ollama, xAI, Mimo + - Models: OpenAI, DeepSeek, Groq, Ollama, xAI, Mimo, Together, Fireworks, Nebius, Zai, Together, Fireworks, Nebius, Zai | Property | OpenAI Compatibles (*1) | Anthropic | Gemini `generationConfig.` | Cohere | |---------------|-------------------------|-----------------------------|----------------------------|---------------| From db22b321141fe2b77f50c681cf9f2f669abcd24e Mon Sep 17 00:00:00 2001 From: Jeremy Chone Date: Fri, 9 Jan 2026 19:35:02 -0800 Subject: [PATCH 084/123] . update to v0.5.0 --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 3cc61e04..ecc1f95f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "genai" -version = "0.5.0-alpha.10-WIP" +version = "0.5.0" edition = "2024" license = "MIT OR Apache-2.0" description = "Multi-AI Providers Library for Rust. (OpenAI, Gemini, Anthropic, xAI, Ollama, Groq, DeepSeek, Grok)" From 90107d7f212f9b279191e049c4e4ff27ef220655 Mon Sep 17 00:00:00 2001 From: Jeremy Chone Date: Fri, 9 Jan 2026 19:35:52 -0800 Subject: [PATCH 085/123] . 0.5.1-WIP --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index ecc1f95f..06721d26 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "genai" -version = "0.5.0" +version = "0.5.1-WIP" edition = "2024" license = "MIT OR Apache-2.0" description = "Multi-AI Providers Library for Rust. (OpenAI, Gemini, Anthropic, xAI, Ollama, Groq, DeepSeek, Grok)" From 8f9f79e31ea2a418a9c867945a918237737e253f Mon Sep 17 00:00:00 2001 From: Michelle Tilley Date: Mon, 12 Jan 2026 13:07:12 -0800 Subject: [PATCH 086/123] > Ensure extra headers are applied in exec_chat and exec_chat_stream (#116) --- src/adapter/adapters/openai/adapter_impl.rs | 7 +------ src/adapter/adapters/openai_resp/adapter_impl.rs | 7 +------ src/client/client_impl.rs | 8 ++++++++ 3 files changed, 10 insertions(+), 12 deletions(-) diff --git a/src/adapter/adapters/openai/adapter_impl.rs b/src/adapter/adapters/openai/adapter_impl.rs index 18efb72d..93d1a8e0 100644 --- a/src/adapter/adapters/openai/adapter_impl.rs +++ b/src/adapter/adapters/openai/adapter_impl.rs @@ -218,12 +218,7 @@ impl OpenAIAdapter { let url = AdapterDispatcher::get_service_url(&model, service_type, endpoint)?; // -- headers - let mut headers = Headers::from(("Authorization".to_string(), format!("Bearer {api_key}"))); - - // -- extra headers - if let Some(extra_headers) = options_set.extra_headers() { - headers.merge_with(extra_headers); - } + let headers = Headers::from(("Authorization".to_string(), format!("Bearer {api_key}"))); let stream = matches!(service_type, ServiceType::ChatStream); diff --git a/src/adapter/adapters/openai_resp/adapter_impl.rs b/src/adapter/adapters/openai_resp/adapter_impl.rs index 46b62b13..29b055e7 100644 --- a/src/adapter/adapters/openai_resp/adapter_impl.rs +++ b/src/adapter/adapters/openai_resp/adapter_impl.rs @@ -73,12 +73,7 @@ impl Adapter for OpenAIRespAdapter { let url = AdapterDispatcher::get_service_url(&model, service_type, endpoint)?; // -- headers - let mut headers = Headers::from(("Authorization".to_string(), format!("Bearer {api_key}"))); - - // -- extra headers - if let Some(extra_headers) = chat_options.extra_headers() { - headers.merge_with(extra_headers); - } + let headers = Headers::from(("Authorization".to_string(), format!("Bearer {api_key}"))); // -- for new v1/responses/ for now do not support stream let stream = matches!(service_type, ServiceType::ChatStream); diff --git a/src/client/client_impl.rs b/src/client/client_impl.rs index 505d7e5b..b85b32d9 100644 --- a/src/client/client_impl.rs +++ b/src/client/client_impl.rs @@ -70,6 +70,10 @@ impl Client { payload, } = AdapterDispatcher::to_web_request_data(target, ServiceType::Chat, chat_req, options_set.clone())?; + if let Some(extra_headers) = options.and_then(|o| o.extra_headers.as_ref()) { + headers.merge_with(&extra_headers); + } + if let AuthData::RequestOverride { url: override_url, headers: override_headers, @@ -115,6 +119,10 @@ impl Client { payload, } = AdapterDispatcher::to_web_request_data(target, ServiceType::ChatStream, chat_req, options_set.clone())?; + if let Some(extra_headers) = options.and_then(|o| o.extra_headers.as_ref()) { + headers.merge_with(&extra_headers); + } + // TODO: Need to check this. // This was part of the 429c5cee2241dbef9f33699b9c91202233c22816 commit // But now it is missing in the the exec_chat(..) above, which is probably an issue. From 45a515633a8264fb01e762b306b45ad15f6a9e38 Mon Sep 17 00:00:00 2001 From: Vince Mutolo Date: Sat, 17 Jan 2026 12:43:12 -0500 Subject: [PATCH 087/123] ^ gemini adapter - allow empty tool thoughtSignature for Gemini 3 (#115) --- examples/c11-tooluse-deterministic.rs | 62 +++++++++++++++++++++ src/adapter/adapters/gemini/adapter_impl.rs | 23 +++++++- tests/tests_p_gemini.rs | 45 +++++++++++++++ 3 files changed, 127 insertions(+), 3 deletions(-) create mode 100644 examples/c11-tooluse-deterministic.rs diff --git a/examples/c11-tooluse-deterministic.rs b/examples/c11-tooluse-deterministic.rs new file mode 100644 index 00000000..9eb51654 --- /dev/null +++ b/examples/c11-tooluse-deterministic.rs @@ -0,0 +1,62 @@ +use genai::Client; +use genai::chat::{ChatMessage, ChatRequest, Tool, ToolCall, ToolResponse}; +use serde_json::json; + +const MODEL: &str = "gemini-3-flash-preview"; + +#[tokio::main] +async fn main() -> Result<(), Box> { + let client = Client::default(); + + let weather_tool = Tool::new("get_weather") + .with_description("Get the current weather for a location") + .with_schema(json!({ + "type": "object", + "properties": { + "city": { "type": "string" }, + "unit": { "type": "string", "enum": ["C", "F"] } + }, + "required": ["city", "unit"] + })); + + // Create a synthetic conversation history. These tool calls were not generated by + // Gemini 3 (or any LLM) and do not have a thought signature. This is useful for e.g. + // pre-seeding a conversation with tool calls and responses, or executing deterministic + // logic "in-band" with the LLM conversation. genai will correctly inject the required + // string "skip_thought_signature_validator" in place of a valid signature. Otherwise, + // the call would error out on Gemini 3 models. + let messages = vec![ + ChatMessage::user("What's the weather like in Paris?"), + ChatMessage::assistant(vec![ToolCall { + call_id: "call_123".to_string(), + fn_name: "get_weather".to_string(), + fn_arguments: json!({"city": "Paris", "unit": "C"}), + thought_signatures: None, + }]), + ChatMessage::from(ToolResponse::new( + "call_123".to_string(), + json!({"temperature": 15, "condition": "Cloudy"}).to_string(), + )), + ]; + + let chat_req = ChatRequest::new(messages).with_tools(vec![weather_tool]); + + println!("--- Model: {MODEL}"); + println!("--- Sending deterministic history (synthetic tool call)..."); + + match client.exec_chat(MODEL, chat_req, None).await { + Ok(chat_res) => { + println!("\n--- Response received successfully:"); + if let Some(text) = chat_res.first_text() { + println!("{}", text); + } + } + Err(e) => { + eprintln!("\n--- Error: Request failed!"); + eprintln!("{}", e); + return Err(e.into()); + } + } + + Ok(()) +} diff --git a/src/adapter/adapters/gemini/adapter_impl.rs b/src/adapter/adapters/gemini/adapter_impl.rs index 710a4d26..93c6ba76 100644 --- a/src/adapter/adapters/gemini/adapter_impl.rs +++ b/src/adapter/adapters/gemini/adapter_impl.rs @@ -550,6 +550,8 @@ impl GeminiAdapter { ChatRole::Assistant => { let mut parts_values: Vec = Vec::new(); let mut pending_thought: Option = None; + let mut is_first_tool_call = true; + for part in msg.content { match part { ContentPart::Text(text) => { @@ -568,12 +570,27 @@ impl GeminiAdapter { }), ); - if let Some(thought) = pending_thought.take() { - // Inject thoughtSignature alongside functionCall in the same Part object - part_obj.insert("thoughtSignature".to_string(), json!(thought)); + match pending_thought.take() { + Some(thought) => { + // Inject thoughtSignature alongside functionCall in the same Part object + part_obj.insert("thoughtSignature".to_string(), json!(thought)); + } + None => { + // For Gemini 3 models, if there haven't been any thoughts, and this is + // still the first tool call, we are required to inject a special flag. + // See: https://ai.google.dev/gemini-api/docs/thought-signatures#faqs + let is_gemini_3 = model_iden.model_name.contains("gemini-3"); + if is_gemini_3 && is_first_tool_call { + part_obj.insert( + "thoughtSignature".to_string(), + json!("skip_thought_signature_validator"), + ); + } + } } parts_values.push(Value::Object(part_obj)); + is_first_tool_call = false; } ContentPart::ThoughtSignature(thought) => { if let Some(prev_thought) = pending_thought.take() { diff --git a/tests/tests_p_gemini.rs b/tests/tests_p_gemini.rs index a4228841..3a48e097 100644 --- a/tests/tests_p_gemini.rs +++ b/tests/tests_p_gemini.rs @@ -126,6 +126,51 @@ async fn test_tool_simple_ok() -> TestResult<()> { async fn test_tool_full_flow_ok() -> TestResult<()> { common_tests::common_test_tool_full_flow_ok(MODEL).await } + +#[tokio::test] +async fn test_tool_deterministic_history_gemini_3_ok() -> TestResult<()> { + use genai::chat::{ChatMessage, ChatRequest, Tool, ToolCall, ToolResponse}; + use serde_json::json; + + let client = genai::Client::default(); + + let weather_tool = Tool::new("get_weather").with_schema(json!({ + "type": "object", + "properties": { + "city": { "type": "string" }, + "unit": { "type": "string", "enum": ["C", "F"] } + }, + "required": ["city", "unit"] + })); + + // Pre-seed history with a "synthetic" tool call (missing thought signatures) + let messages = vec![ + ChatMessage::user("What's the weather like in Paris?"), + ChatMessage::assistant(vec![ToolCall { + call_id: "call_123".to_string(), + fn_name: "get_weather".to_string(), + fn_arguments: json!({"city": "Paris", "unit": "C"}), + thought_signatures: None, + }]), + ChatMessage::from(ToolResponse::new( + "call_123".to_string(), + json!({"temperature": 15, "condition": "Cloudy"}).to_string(), + )), + ]; + + let chat_req = ChatRequest::new(messages).with_tools(vec![weather_tool]); + + // This verifies that the adapter correctly injects 'skip_thought_signature_validator'. + // (Otherwise Gemini 3 would return a 400 error.) + let chat_res = client.exec_chat(MODEL_GPRO_3, chat_req, None).await?; + + assert!( + chat_res.first_text().is_some(), + "Expected a text response from the model" + ); + + Ok(()) +} // endregion: --- Tool Tests // region: --- Resolver Tests From 2a35fbe55dbcd2e311c7abd29f772541e580deae Mon Sep 17 00:00:00 2001 From: Michelle Tilley Date: Sat, 17 Jan 2026 09:47:19 -0800 Subject: [PATCH 088/123] - fix - check HTTP status in WebStream before processing byte stream (#117) ! Add error field to Error::WebStream to preserve original error --- src/adapter/adapters/anthropic/streamer.rs | 1 + src/adapter/adapters/cohere/streamer.rs | 1 + src/adapter/adapters/gemini/streamer.rs | 1 + src/adapter/adapters/openai/streamer.rs | 1 + src/error.rs | 17 ++++++++++- src/lib.rs | 2 +- src/webc/event_source_stream.rs | 6 ++-- src/webc/web_stream.rs | 35 +++++++++++++++++----- 8 files changed, 52 insertions(+), 12 deletions(-) diff --git a/src/adapter/adapters/anthropic/streamer.rs b/src/adapter/adapters/anthropic/streamer.rs index 3bd6ad8a..1c18c376 100644 --- a/src/adapter/adapters/anthropic/streamer.rs +++ b/src/adapter/adapters/anthropic/streamer.rs @@ -205,6 +205,7 @@ impl futures::Stream for AnthropicStreamer { return Poll::Ready(Some(Err(Error::WebStream { model_iden: self.options.model_iden.clone(), cause: err.to_string(), + error: err, }))); } None => return Poll::Ready(None), diff --git a/src/adapter/adapters/cohere/streamer.rs b/src/adapter/adapters/cohere/streamer.rs index 40d8c9ea..f17d1e0e 100644 --- a/src/adapter/adapters/cohere/streamer.rs +++ b/src/adapter/adapters/cohere/streamer.rs @@ -129,6 +129,7 @@ impl futures::Stream for CohereStreamer { return Poll::Ready(Some(Err(Error::WebStream { model_iden: self.options.model_iden.clone(), cause: err.to_string(), + error: err, }))); } None => { diff --git a/src/adapter/adapters/gemini/streamer.rs b/src/adapter/adapters/gemini/streamer.rs index 94cf25bd..f02ac5e9 100644 --- a/src/adapter/adapters/gemini/streamer.rs +++ b/src/adapter/adapters/gemini/streamer.rs @@ -172,6 +172,7 @@ impl futures::Stream for GeminiStreamer { return Poll::Ready(Some(Err(Error::WebStream { model_iden: self.options.model_iden.clone(), cause: err.to_string(), + error: err, }))); } None => { diff --git a/src/adapter/adapters/openai/streamer.rs b/src/adapter/adapters/openai/streamer.rs index ac3eef96..7bdedf04 100644 --- a/src/adapter/adapters/openai/streamer.rs +++ b/src/adapter/adapters/openai/streamer.rs @@ -279,6 +279,7 @@ impl futures::Stream for OpenAIStreamer { return Poll::Ready(Some(Err(Error::WebStream { model_iden: self.options.model_iden.clone(), cause: err.to_string(), + error: err, }))); } None => { diff --git a/src/error.rs b/src/error.rs index 91afa148..d706add2 100644 --- a/src/error.rs +++ b/src/error.rs @@ -2,8 +2,12 @@ use crate::adapter::AdapterKind; use crate::chat::ChatRole; use crate::{ModelIden, resolver, webc}; use derive_more::{Display, From}; +use reqwest::StatusCode; use value_ext::JsonValueExtError; +/// Type alias for boxed errors that are Send + Sync +pub type BoxError = Box; + /// GenAI main Result type alias (with genai::Error) pub type Result = core::result::Result; @@ -90,7 +94,18 @@ pub enum Error { }, #[display("Web stream error for model '{model_iden}'.\nCause: {cause}")] - WebStream { model_iden: ModelIden, cause: String }, + WebStream { + model_iden: ModelIden, + cause: String, + error: BoxError, + }, + + #[display("HTTP error.\nStatus: {status} {canonical_reason}\nBody: {body}")] + HttpError { + status: StatusCode, + canonical_reason: String, + body: String, + }, // -- Modules #[display("Resolver error for model '{model_iden}'.\nCause: {resolver_error}")] diff --git a/src/lib.rs b/src/lib.rs index f8591550..07a218f5 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -12,7 +12,7 @@ mod error; // -- Flatten pub use client::*; pub use common::*; -pub use error::{Error, Result}; +pub use error::{BoxError, Error, Result}; // -- Public Modules pub mod adapter; diff --git a/src/webc/event_source_stream.rs b/src/webc/event_source_stream.rs index 8cc04636..722cd136 100644 --- a/src/webc/event_source_stream.rs +++ b/src/webc/event_source_stream.rs @@ -1,3 +1,4 @@ +use crate::error::BoxError; use crate::webc::WebStream; use futures::Stream; use reqwest::RequestBuilder; @@ -33,7 +34,7 @@ impl EventSourceStream { } impl Stream for EventSourceStream { - type Item = Result>; + type Item = Result; fn poll_next(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll> { let this = self.get_mut(); @@ -77,8 +78,7 @@ impl Stream for EventSourceStream { return Poll::Ready(Some(Ok(Event::Message(Message { event, data })))); } Poll::Ready(Some(Err(e))) => { - // Convert Box to Box - return Poll::Ready(Some(Err(e.to_string().into()))); + return Poll::Ready(Some(Err(e))); } Poll::Ready(None) => return Poll::Ready(None), Poll::Pending => return Poll::Pending, diff --git a/src/webc/web_stream.rs b/src/webc/web_stream.rs index 44bdae01..300ea236 100644 --- a/src/webc/web_stream.rs +++ b/src/webc/web_stream.rs @@ -3,10 +3,11 @@ use futures::stream::TryStreamExt; use futures::{Future, Stream}; use reqwest::{RequestBuilder, Response}; use std::collections::VecDeque; -use std::error::Error; use std::pin::Pin; use std::task::{Context, Poll}; +use crate::error::{BoxError, Error as GenaiError}; + /// WebStream is a simple web stream implementation that splits the stream messages by a given delimiter. /// - It is intended to be a pragmatic solution for services that do not adhere to the `text/event-stream` format and content type. /// - For providers that support the standard `text/event-stream`, `genai` uses the `reqwest-eventsource`/`eventsource-stream` crates. @@ -17,8 +18,8 @@ use std::task::{Context, Poll}; pub struct WebStream { stream_mode: StreamMode, reqwest_builder: Option, - response_future: Option>> + Send>>>, - bytes_stream: Option>> + Send>>>, + response_future: Option> + Send>>>, + bytes_stream: Option> + Send>>>, // If a poll was a partial message, then we keep the previous part partial_message: Option, // If a poll retrieved multiple messages, we keep them to be sent in the next poll @@ -57,7 +58,7 @@ impl WebStream { } impl Stream for WebStream { - type Item = Result>; + type Item = Result; fn poll_next(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll> { let this = self.get_mut(); @@ -74,7 +75,27 @@ impl Stream for WebStream { if let Some(ref mut fut) = this.response_future { match Pin::new(fut).poll(cx) { Poll::Ready(Ok(response)) => { - let bytes_stream = response.bytes_stream().map_err(|e| Box::new(e) as Box); + // Check HTTP status before proceeding with the stream + let status = response.status(); + if !status.is_success() { + this.response_future = None; + // For error responses, we need to read the body to get the error message + // Store a future that reads the body and returns an error + let error_future = async move { + let body = response + .text() + .await + .unwrap_or_else(|e| format!("Failed to read error body: {}", e)); + Err::(Box::new(GenaiError::HttpError { + status, + canonical_reason: status.canonical_reason().unwrap_or("Unknown").to_string(), + body, + })) + }; + this.response_future = Some(Box::pin(error_future)); + continue; + } + let bytes_stream = response.bytes_stream().map_err(|e| Box::new(e) as BoxError); this.bytes_stream = Some(Box::pin(bytes_stream)); this.response_future = None; } @@ -91,7 +112,7 @@ impl Stream for WebStream { Poll::Ready(Some(Ok(bytes))) => { let buff_string = match String::from_utf8(bytes.to_vec()) { Ok(s) => s, - Err(e) => return Poll::Ready(Some(Err(Box::new(e) as Box))), + Err(e) => return Poll::Ready(Some(Err(Box::new(e) as BoxError))), }; // -- Iterate through the parts @@ -145,7 +166,7 @@ impl Stream for WebStream { } if let Some(reqwest_builder) = this.reqwest_builder.take() { - let fut = async move { reqwest_builder.send().await.map_err(|e| Box::new(e) as Box) }; + let fut = async move { reqwest_builder.send().await.map_err(|e| Box::new(e) as BoxError) }; this.response_future = Some(Box::pin(fut)); continue; } From 65bca040641888fe1a47baa1b2aad826bc6c43c8 Mon Sep 17 00:00:00 2001 From: Thomas Anagrius Date: Sat, 17 Jan 2026 18:50:58 +0100 Subject: [PATCH 089/123] - fix - openai_resp - use output_text for assistant message content (#119) --- .../adapters/openai_resp/adapter_impl.rs | 62 ++++++++++++++++++- 1 file changed, 61 insertions(+), 1 deletion(-) diff --git a/src/adapter/adapters/openai_resp/adapter_impl.rs b/src/adapter/adapters/openai_resp/adapter_impl.rs index 29b055e7..4ea2ae07 100644 --- a/src/adapter/adapters/openai_resp/adapter_impl.rs +++ b/src/adapter/adapters/openai_resp/adapter_impl.rs @@ -408,7 +408,7 @@ impl OpenAIRespAdapter { match part { ContentPart::Text(text) => { item_message_content.push(json!({ - "type": "input_text", + "type": "output_text", "text": text })); } @@ -498,3 +498,63 @@ struct OpenAIRespRequestParts { } // endregion: --- Support + +// region: --- Tests + +#[cfg(test)] +mod tests { + use super::*; + use crate::adapter::AdapterKind; + use crate::chat::ChatMessage; + + /// Test that assistant message text content uses "output_text" type (not "input_text"). + /// + /// This is required by OpenAI's Responses API - assistant content is model output, + /// so it must use "output_text". Using "input_text" causes: + /// "Invalid value: 'input_text'. Supported values are: 'output_text' and 'refusal'." + #[test] + fn test_assistant_message_uses_output_text_content_type() { + let model_iden = ModelIden::new(AdapterKind::OpenAIResp, "gpt-5-codex"); + + // Create a chat request with an assistant message + let chat_req = ChatRequest::default() + .with_system("You are a helpful assistant.") + .append_message(ChatMessage::user("What's the weather?")) + .append_message(ChatMessage::assistant("The weather is sunny.")); + + // Serialize to OpenAI Responses API format + let parts = + OpenAIRespAdapter::into_openai_request_parts(&model_iden, chat_req).expect("Should serialize successfully"); + + // Find the assistant message in input_items + let assistant_msg = parts + .input_items + .iter() + .find(|item| { + item.get("type").and_then(|t| t.as_str()) == Some("message") + && item.get("role").and_then(|r| r.as_str()) == Some("assistant") + }) + .expect("Should have an assistant message"); + + // Check the content uses "output_text" type + let content = assistant_msg + .get("content") + .and_then(|c| c.as_array()) + .expect("Assistant message should have content array"); + + assert!(!content.is_empty(), "Content should not be empty"); + + let first_content = &content[0]; + let content_type = first_content + .get("type") + .and_then(|t| t.as_str()) + .expect("Content should have a type"); + + assert_eq!( + content_type, "output_text", + "Assistant message content should use 'output_text' type, not 'input_text'" + ); + } +} + +// endregion: --- Tests From 475e5b1837e14aafac664e6435b61ceae47f0026 Mon Sep 17 00:00:00 2001 From: Jeremy Chone Date: Sat, 17 Jan 2026 10:01:04 -0800 Subject: [PATCH 090/123] . minor readme and code update --- .gitignore | 3 ++- README.md | 42 +++++++++++++++++++--------------------- src/chat/chat_options.rs | 1 + 3 files changed, 23 insertions(+), 23 deletions(-) diff --git a/.gitignore b/.gitignore index 0ea91b0b..57e68e32 100644 --- a/.gitignore +++ b/.gitignore @@ -24,6 +24,7 @@ target/ dist/ out/ +*.html # Doc Files *.pdf *.docx @@ -79,7 +80,7 @@ __pycache__/ # -- others -# Allows .env (make sure only dev info) +# Allows .env (make sure only dev info) # !.env # Commented by default # Allow vscode diff --git a/README.md b/README.md index 7c2d28d1..5b75fe62 100644 --- a/README.md +++ b/README.md @@ -1,8 +1,8 @@ -# genai - Multi-AI Providers Library for Rust +# genai, Multi-AI Providers Library for Rust Currently natively supports: **OpenAI**, **Anthropic**, **Gemini**, **xAI**, **Ollama**, **Groq**, **DeepSeek**, **Cohere**, **Together**, **Fireworks**, **Nebius**, **Mimo**, **Zai** (Zhipu AI), **BigModel**. -Also allows a custom URL with `ServiceTargetResolver` (see [examples/c06-target-resolver.rs](examples/c06-target-resolver.rs)) +Also supports a custom URL with `ServiceTargetResolver` (see [examples/c06-target-resolver.rs](examples/c06-target-resolver.rs)).
@@ -12,17 +12,17 @@ Also allows a custom URL with `ServiceTargetResolver` (see [examples/c06-target-
-
+[Docs for LLMs](doc/for-llm/api-reference-for-llm.md) -Provides a single, ergonomic API to many generative AI providers, such as Anthropic, OpenAI, Gemini, xAI, Ollama, Groq, and more. +Provides a single, ergonomic API for many generative AI providers, such as Anthropic, OpenAI, Gemini, xAI, Ollama, Groq, and more. -**NOTE:** Big update with **v0.5.0** - New adapters (BigModel, MIMO), Gemini Thinking support, Anthropic Reasoning Effort, and a more robust internal streaming engine. +**NOTE:** Big update with **v0.5.0**: New adapters (BigModel, MIMO), Gemini Thinking support, Anthropic Reasoning Effort, and a more robust internal streaming engine. ## v0.5.0 - (2026-01-09) - **What's new**: - - **New Adapters**: BigModel.cn and MIMO model adapter (thanks to [Akagi201](https://github.com/Akagi201)). - - **zai - change namespace strategy** with (zai:: for default, and zai-codding:: for subscription, same Adapter) + - **New Adapters**: BigModel.cn and the MIMO model adapter (thanks to [Akagi201](https://github.com/Akagi201)). + - **zai: changed namespace strategy**, with (zai:: for default, and zai-codding:: for subscription, same adapter) - **Gemini Thinking & Thought**: Full support for Gemini Thought signatures (thanks to [Himmelschmidt](https://github.com/Himmelschmidt)) and thinking levels. - **Reasoning Effort Control**: Support for `ReasoningEffort` for Anthropic (Claude 3.7/4.5) and Gemini (Thinking levels), including `ReasoningEffort::None`. - **Content & Binary Improvements**: Enhanced binary/PDF API and size tracking. @@ -31,8 +31,8 @@ Provides a single, ergonomic API to many generative AI providers, such as Anthro - **What's still awesome**: - Normalized and ergonomic Chat API across all major providers. - Native protocol support for Gemini and Anthropic protocols (Reasoning/Thinking controls). - - PDF, Image, and Embedding support. - - Custom Auth, Endpoint, and Header overrides. + - PDF, image, and embedding support. + - Custom auth, endpoint, and header overrides. See: - [CHANGELOG](CHANGELOG.md) @@ -56,7 +56,7 @@ See: - [4t145](https://github.com/4t145) for raw body capture [PR #68](https://github.com/jeremychone/rust-genai/pull/68) - [Vagmi Mudumbai](https://github.com/vagmi) exec_chat bug fix [PR #86](https://github.com/jeremychone/rust-genai/pull/86) - [Maximilian Goisser](https://github.com/hobofan) Fix OpenAI adapter to use ServiceTarget - - [ClanceyLu](https://github.com/ClanceyLu) for Tool Use Streaming support, web configuration support, and fixes + - [ClanceyLu](https://github.com/ClanceyLu) for tool use streaming support, web configuration support, and fixes - [@SilasMarvin](https://github.com/SilasMarvin) for fixing content/tools issues with some Ollama models [PR #55](https://github.com/jeremychone/rust-genai/pull/55) - [@una-spirito](https://github.com/luna-spirito) for Gemini `ReasoningEffort::Budget` support - [@jBernavaPrah](https://github.com/jBernavaPrah) for adding tracing (it was long overdue). [PR #45](https://github.com/jeremychone/rust-genai/pull/45) @@ -66,17 +66,17 @@ See: - [@omarshehab221](https://github.com/omarshehab221) for de/serialize on structs [PR #19](https://github.com/jeremychone/rust-genai/pull/19) - [@tusharmath](https://github.com/tusharmath) for making webc::Error [PR #12](https://github.com/jeremychone/rust-genai/pull/12) - [@giangndm](https://github.com/giangndm) for making stream Send [PR #10](https://github.com/jeremychone/rust-genai/pull/10) - - [@stargazing-dino](https://github.com/stargazing-dino) for [PR #2](https://github.com/jeremychone/rust-genai/pull/2) - implement Groq completions + - [@stargazing-dino](https://github.com/stargazing-dino) for [PR #2](https://github.com/jeremychone/rust-genai/pull/2), implement Groq completions ## Usage examples - Check out [AIPACK](https://aipack.ai), which wraps this **genai** library into an agentic runtime to run, build, and share AI Agent Packs. See [`pro@coder`](https://www.youtube.com/watch?v=zL1BzPVM8-Y&list=PL7r-PXl6ZPcB2zN0XHsYIDaD5yW8I40AE) for a simple example of how I use AI PACK/genai for production coding. -> Note: Feel free to send me a short description and a link to your application or library using genai. +> Note: Feel free to send me a short description and a link to your application or library that uses genai. ## Key Features -- Native Multi-AI Provider/Model: OpenAI, Anthropic, Gemini, Ollama, Groq, xAI, DeepSeek (Direct chat and stream) (see [examples/c00-readme.rs](examples/c00-readme.rs)) +- Native Multi-AI Provider/Model: OpenAI, Anthropic, Gemini, Ollama, Groq, xAI, DeepSeek (direct chat and streaming) (see [examples/c00-readme.rs](examples/c00-readme.rs)) - DeepSeekR1 support, with `reasoning_content` (and stream support), plus DeepSeek Groq and Ollama support (and `reasoning_content` normalization) - Image Analysis (for OpenAI, Gemini flash-2, Anthropic) (see [examples/c07-image.rs](examples/c07-image.rs)) - Custom Auth/API Key (see [examples/c02-auth.rs](examples/c02-auth.rs)) @@ -185,7 +185,7 @@ async fn main() -> Result<(), Box> { - [examples/c01-conv.rs](examples/c01-conv.rs) - Shows how to build a conversation flow. - [examples/c02-auth.rs](examples/c02-auth.rs) - Demonstrates how to provide a custom `AuthResolver` to provide auth data (i.e., for api_key) per adapter kind. - [examples/c03-mapper.rs](examples/c03-mapper.rs) - Demonstrates how to provide a custom `AdapterKindResolver` to customize the "model name" to "adapter kind" mapping. -- [examples/c04-chat-options.rs](examples/c04-chat-options.rs) - Demonstrates how to set chat generation options such as `temperature` and `max_tokens` at the client level (for all requests) and per-request level. +- [examples/c04-chat-options.rs](examples/c04-chat-options.rs) - Demonstrates how to set chat generation options such as `temperature` and `max_tokens` at the client level (for all requests) and at the per-request level. - [examples/c05-model-names.rs](examples/c05-model-names.rs) - Shows how to get model names per AdapterKind. - [examples/c06-target-resolver.rs](examples/c06-target-resolver.rs) - For custom auth, endpoint, and model. - [examples/c07-image.rs](examples/c07-image.rs) - Image analysis support @@ -234,28 +234,26 @@ async fn main() -> Result<(), Box> { |-----------------------------|-----------------------------|-------------------------|----------------------------|-----------------------| | `prompt_tokens` | `prompt_tokens` | `input_tokens` (added) | `promptTokenCount` (2) | `input_tokens` | | `completion_tokens` | `completion_tokens` | `output_tokens` (added) | `candidatesTokenCount` (2) | `output_tokens` | -| `total_tokens` | `total_tokens` | (computed) | `totalTokenCount` (2) | (computed) | +| `total_tokens` | `total_tokens` | (computed) | `totalTokenCount` (2) | (computed) | | `prompt_tokens_details` | `prompt_tokens_details` | `cached/cache_creation` | N/A for now | N/A for now | | `completion_tokens_details` | `completion_tokens_details` | N/A for now | N/A for now | N/A for now | - - **(1)** - **OpenAI-compatible** notes - Models: OpenAI, DeepSeek, Groq, Ollama, xAI, Mimo - - For **Groq**, the property `x_groq.usage.` - - At this point, **Ollama** does not emit input/output tokens when streaming due to the Ollama OpenAI compatibility layer limitation. (see [ollama #4448 - Streaming Chat Completion via OpenAI API should support stream option to include Usage](https://github.com/ollama/ollama/issues/4448)) + - For **Groq**, the property `x_groq.usage.` + - At this point, **Ollama** does not emit input/output tokens when streaming due to a limitation in the Ollama OpenAI compatibility layer. (see [ollama #4448 - Streaming Chat Completion via OpenAI API should support stream option to include Usage](https://github.com/ollama/ollama/issues/4448)) - `prompt_tokens_details` and `completion_tokens_details` will have the value sent by the compatible provider (or None) - **(2)**: **Gemini** tokens - - Right now, with the [Gemini Stream API](https://ai.google.dev/api/rest/v1beta/models/streamGenerateContent), it's not clear whether usage for each event is cumulative or must be summed. It appears to be cumulative, meaning the last message shows the total amount of input, output, and total tokens, so that is the current assumption. See [possible tweet answer](https://twitter.com/jeremychone/status/1813734565967802859) for more info. - + - Right now, with the [Gemini Stream API](https://ai.google.dev/api/rest/v1beta/models/streamGenerateContent), it's not clear whether usage for each event is cumulative or must be summed. It appears to be cumulative, meaning the last message shows the total number of input, output, and total tokens, so that is the current assumption. See [possible tweet answer](https://twitter.com/jeremychone/status/1813734565967802859) for more info. ## Notes on Possible Direction -- Will add more data on ChatResponse and ChatStream, especially metadata about usage. +- Will add more data to ChatResponse and ChatStream, especially usage metadata. - Add vision/image support to chat messages and responses. - Add function calling support to chat messages and responses. - Add `embed` and `embed_batch`. -- Add the AWS Bedrock variants (e.g., Mistral and Anthropic). Most of the work will be on the "interesting" token signature scheme; trying to avoid bringing in large SDKs, this might be a lower-priority feature. +- Add the AWS Bedrock variants (e.g., Mistral and Anthropic). Most of the work will be on the "interesting" token signature scheme. To avoid bringing in large SDKs, this might be a lower-priority feature. - Add the Google Vertex AI variants. - May add the Azure OpenAI variant (not sure yet). diff --git a/src/chat/chat_options.rs b/src/chat/chat_options.rs index 2d2cd833..4396167d 100644 --- a/src/chat/chat_options.rs +++ b/src/chat/chat_options.rs @@ -530,6 +530,7 @@ impl ChatOptionsSet<'_, '_> { .or_else(|| self.client.and_then(|client| client.service_tier.as_ref())) } + #[allow(unused)] pub fn extra_headers(&self) -> Option<&Headers> { self.chat .and_then(|chat| chat.extra_headers.as_ref()) From 3d0a5bc1089e0c48ab8c2fbfc4df01caa94650dc Mon Sep 17 00:00:00 2001 From: Jeremy Chone Date: Sat, 17 Jan 2026 10:16:00 -0800 Subject: [PATCH 091/123] . test - openai_resp - fix test_chat_reasoning_minimal_ok --- tests/tests_p_openai_resp.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/tests_p_openai_resp.rs b/tests/tests_p_openai_resp.rs index 4f69febf..24250d95 100644 --- a/tests/tests_p_openai_resp.rs +++ b/tests/tests_p_openai_resp.rs @@ -14,7 +14,7 @@ const MODEL_NS: &str = "openai_resp::gpt-5-mini"; // openai specific #[tokio::test] async fn test_chat_reasoning_minimal_ok() -> TestResult<()> { - common_tests::common_test_chat_simple_ok("gpt-5-codex-minimal", None).await + common_tests::common_test_chat_simple_ok("gpt-5-minimal", None).await } // endregion: --- Provider Specific From 97beae41ab056bcb306379f941d11c7dd9e5091b Mon Sep 17 00:00:00 2001 From: Jeremy Chone Date: Sat, 17 Jan 2026 13:47:09 -0800 Subject: [PATCH 092/123] . v0.5.1 --- BIG-THANKS.md | 34 ++++++++++++++++++++++++++++++++++ CHANGELOG.md | 10 +++++++++- Cargo.toml | 2 +- README.md | 40 ++++------------------------------------ 4 files changed, 48 insertions(+), 38 deletions(-) create mode 100644 BIG-THANKS.md diff --git a/BIG-THANKS.md b/BIG-THANKS.md new file mode 100644 index 00000000..ab56fae3 --- /dev/null +++ b/BIG-THANKS.md @@ -0,0 +1,34 @@ +# Big Thanks to + +- v0.5.1 + - [anagrius](https://github.com/anagrius) for [#119](https://github.com/jeremychone/rust-genai/pull/119) openai_resp assistant content fix + - [BinaryMuse](https://github.com/BinaryMuse) for [#117](https://github.com/jeremychone/rust-genai/pull/117) WebStream status check and [#116](https://github.com/jeremychone/rust-genai/pull/116) extra headers fix + - [vlmutolo](https://github.com/vlmutolo) for [#115](https://github.com/jeremychone/rust-genai/pull/115) Gemini 3 tool thoughtSignature fix +- v0.5.x + - [BinaryMuse](https://github.com/BinaryMuse) for [#114](https://github.com/jeremychone/rust-genai/pull/114) Anthropic ToolCalls streaming fix + - [Himmelschmidt](https://github.com/Himmelschmidt) for [#111](https://github.com/jeremychone/rust-genai/pull/111) Gemini `responseJsonSchema` support, [#103](https://github.com/jeremychone/rust-genai/pull/103) error body capture, and Gemini Thought signatures + - [malyavi-nochum](https://github.com/malyavi-nochum) for [#109](https://github.com/jeremychone/rust-genai/pull/109) Fireworks default streaming fix + - [mengdehong](https://github.com/mengdehong) for [#108](https://github.com/jeremychone/rust-genai/pull/108) Ollama reasoning streaming fix + - [Akagi201](https://github.com/Akagi201) for [#105](https://github.com/jeremychone/rust-genai/pull/105) MIMO model adapter +- v0.1.x .. v0.4.x + - [Vagmi Mudumbai](https://github.com/vagmi) for [#96](https://github.com/jeremychone/rust-genai/pull/96) openai audio_type + - [Himmelschmidt](https://github.com/Himmelschmidt) for [#98](https://github.com/jeremychone/rust-genai/pull/98) openai service_tier + - [Bart Carroll](https://github.com/bartCarroll) for [#91](https://github.com/jeremychone/rust-genai/pull/91) Fixed streaming tool calls for openai models + - [Rui Andrada](https://github.com/shingonoide) for [#95](https://github.com/jeremychone/rust-genai/pull/95) refactoring ZHIPU adapter to ZAI + - [Adrien](https://github.com/XciD) Extra headers in requests, seed for chat requests, and fixes (with [Julien Chaumond](https://github.com/julien-c) for extra headers) + - [Andrew Rademacher](https://github.com/AndrewRademacher) for PDF support, Anthropic streamer + - [Jesus Santander](https://github.com/jsantanders) Embedding support [PR #83](https://github.com/jeremychone/rust-genai/pull/83) + - [4t145](https://github.com/4t145) for raw body capture [PR #68](https://github.com/jeremychone/rust-genai/pull/68) + - [Vagmi Mudumbai](https://github.com/vagmi) exec_chat bug fix [PR #86](https://github.com/jeremychone/rust-genai/pull/86) + - [Maximilian Goisser](https://github.com/hobofan) Fix OpenAI adapter to use ServiceTarget + - [ClanceyLu](https://github.com/ClanceyLu) for tool use streaming support, web configuration support, and fixes + - [@SilasMarvin](https://github.com/SilasMarvin) for fixing content/tools issues with some Ollama models [PR #55](https://github.com/jeremychone/rust-genai/pull/55) + - [@una-spirito](https://github.com/luna-spirito) for Gemini `ReasoningEffort::Budget` support + - [@jBernavaPrah](https://github.com/jBernavaPrah) for adding tracing (it was long overdue). [PR #45](https://github.com/jeremychone/rust-genai/pull/45) + - [@GustavoWidman](https://github.com/GustavoWidman) for the initial Gemini tool/function support! [PR #41](https://github.com/jeremychone/rust-genai/pull/41) + - [@AdamStrojek](https://github.com/AdamStrojek) for initial image support [PR #36](https://github.com/jeremychone/rust-genai/pull/36) + - [@semtexzv](https://github.com/semtexzv) for `stop_sequences` Anthropic support [PR #34](https://github.com/jeremychone/rust-genai/pull/34) + - [@omarshehab221](https://github.com/omarshehab221) for de/serialize on structs [PR #19](https://github.com/jeremychone/rust-genai/pull/19) + - [@tusharmath](https://github.com/tusharmath) for making webc::Error [PR #12](https://github.com/jeremychone/rust-genai/pull/12) + - [@giangndm](https://github.com/giangndm) for making stream Send [PR #10](https://github.com/jeremychone/rust-genai/pull/10) + - [@stargazing-dino](https://github.com/stargazing-dino) for [PR #2](https://github.com/jeremychone/rust-genai/pull/2), implement Groq completions diff --git a/CHANGELOG.md b/CHANGELOG.md index 4915619a..bf5ccae5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,13 @@ `.` minor | `-` Fix | `+` Addition | `^` improvement | `!` Change | `*` Refactor +## 2026-01-17 - [v0.5.1](https://github.com/jeremychone/rust-genai/compare/v0.5.0...v0.5.1) + +`!` `Error::WebStream` - added error field to preserve original error +`^` gemini - allow empty tool `thoughtSignature` for Gemini 3 (#115) +`-` webc - check HTTP status in `WebStream` before processing byte stream (#117) +`-` client - ensure extra headers are applied in `exec_chat` and `exec_chat_stream` (#116) +`-` openai_resp - fix assistant message content to use `output_text` (#119) + ## 2026-01-09 - [v0.5.0](https://github.com/jeremychone/rust-genai/compare/v0.4.4...v0.5.0) - `!` zai - change namespace strategy with (zai:: for default, and zai-codding:: for subscription, same Adapter) @@ -406,4 +414,4 @@ Some **API Changes** - See [migration-v_0_3_to_0_4](doc/migration/migration-v_0_ - `+` Added AdapterKindResolver - `-` Adapter::list_models api impl and change -- `^` chat_printer - added PrintChatStreamOptions with print_events \ No newline at end of file +- `^` chat_printer - added PrintChatStreamOptions with print_events diff --git a/Cargo.toml b/Cargo.toml index 06721d26..9270da0c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "genai" -version = "0.5.1-WIP" +version = "0.5.1" edition = "2024" license = "MIT OR Apache-2.0" description = "Multi-AI Providers Library for Rust. (OpenAI, Gemini, Anthropic, xAI, Ollama, Groq, DeepSeek, Grok)" diff --git a/README.md b/README.md index 5b75fe62..f738a32f 100644 --- a/README.md +++ b/README.md @@ -12,13 +12,13 @@ Also supports a custom URL with `ServiceTargetResolver` (see [examples/c06-targe -[Docs for LLMs](doc/for-llm/api-reference-for-llm.md) - Provides a single, ergonomic API for many generative AI providers, such as Anthropic, OpenAI, Gemini, xAI, Ollama, Groq, and more. **NOTE:** Big update with **v0.5.0**: New adapters (BigModel, MIMO), Gemini Thinking support, Anthropic Reasoning Effort, and a more robust internal streaming engine. -## v0.5.0 - (2026-01-09) +[Docs for LLMs](doc/for-llm/api-reference-for-llm.md) | [CHANGELOG](CHANGELOG.md) | [BIG THANKS](BIG-THANKS.md) + +## v0.5.x - (2026-01-09...) - **What's new**: - **New Adapters**: BigModel.cn and the MIMO model adapter (thanks to [Akagi201](https://github.com/Akagi201)). @@ -34,39 +34,7 @@ Provides a single, ergonomic API for many generative AI providers, such as Anthr - PDF, image, and embedding support. - Custom auth, endpoint, and header overrides. -See: - - [CHANGELOG](CHANGELOG.md) - -## Big Thanks to - -- v0.5.x - - [BinaryMuse](https://github.com/BinaryMuse) for [#114](https://github.com/jeremychone/rust-genai/pull/114) Anthropic ToolCalls streaming fix - - [Himmelschmidt](https://github.com/Himmelschmidt) for [#111](https://github.com/jeremychone/rust-genai/pull/111) Gemini `responseJsonSchema` support, [#103](https://github.com/jeremychone/rust-genai/pull/103) error body capture, and Gemini Thought signatures - - [malyavi-nochum](https://github.com/malyavi-nochum) for [#109](https://github.com/jeremychone/rust-genai/pull/109) Fireworks default streaming fix - - [mengdehong](https://github.com/mengdehong) for [#108](https://github.com/jeremychone/rust-genai/pull/108) Ollama reasoning streaming fix - - [Akagi201](https://github.com/Akagi201) for [#105](https://github.com/jeremychone/rust-genai/pull/105) MIMO model adapter -- v0.1.x .. v0.4.x - - [Vagmi Mudumbai](https://github.com/vagmi) for [#96](https://github.com/jeremychone/rust-genai/pull/96) openai audio_type - - [Himmelschmidt](https://github.com/Himmelschmidt) for [#98](https://github.com/jeremychone/rust-genai/pull/98) openai service_tier - - [Bart Carroll](https://github.com/bartCarroll) for [#91](https://github.com/jeremychone/rust-genai/pull/91) Fixed streaming tool calls for openai models - - [Rui Andrada](https://github.com/shingonoide) for [#95](https://github.com/jeremychone/rust-genai/pull/95) refactoring ZHIPU adapter to ZAI - - [Adrien](https://github.com/XciD) Extra headers in requests, seed for chat requests, and fixes (with [Julien Chaumond](https://github.com/julien-c) for extra headers) - - [Andrew Rademacher](https://github.com/AndrewRademacher) for PDF support, Anthropic streamer - - [Jesus Santander](https://github.com/jsantanders) Embedding support [PR #83](https://github.com/jeremychone/rust-genai/pull/83) - - [4t145](https://github.com/4t145) for raw body capture [PR #68](https://github.com/jeremychone/rust-genai/pull/68) - - [Vagmi Mudumbai](https://github.com/vagmi) exec_chat bug fix [PR #86](https://github.com/jeremychone/rust-genai/pull/86) - - [Maximilian Goisser](https://github.com/hobofan) Fix OpenAI adapter to use ServiceTarget - - [ClanceyLu](https://github.com/ClanceyLu) for tool use streaming support, web configuration support, and fixes - - [@SilasMarvin](https://github.com/SilasMarvin) for fixing content/tools issues with some Ollama models [PR #55](https://github.com/jeremychone/rust-genai/pull/55) - - [@una-spirito](https://github.com/luna-spirito) for Gemini `ReasoningEffort::Budget` support - - [@jBernavaPrah](https://github.com/jBernavaPrah) for adding tracing (it was long overdue). [PR #45](https://github.com/jeremychone/rust-genai/pull/45) - - [@GustavoWidman](https://github.com/GustavoWidman) for the initial Gemini tool/function support! [PR #41](https://github.com/jeremychone/rust-genai/pull/41) - - [@AdamStrojek](https://github.com/AdamStrojek) for initial image support [PR #36](https://github.com/jeremychone/rust-genai/pull/36) - - [@semtexzv](https://github.com/semtexzv) for `stop_sequences` Anthropic support [PR #34](https://github.com/jeremychone/rust-genai/pull/34) - - [@omarshehab221](https://github.com/omarshehab221) for de/serialize on structs [PR #19](https://github.com/jeremychone/rust-genai/pull/19) - - [@tusharmath](https://github.com/tusharmath) for making webc::Error [PR #12](https://github.com/jeremychone/rust-genai/pull/12) - - [@giangndm](https://github.com/giangndm) for making stream Send [PR #10](https://github.com/jeremychone/rust-genai/pull/10) - - [@stargazing-dino](https://github.com/stargazing-dino) for [PR #2](https://github.com/jeremychone/rust-genai/pull/2), implement Groq completions +See [CHANGELOG](CHANGELOG.md) ## Usage examples From 6c43d4b4dc45a99a2b43887bccd2887885faafe3 Mon Sep 17 00:00:00 2001 From: Jeremy Chone Date: Sat, 17 Jan 2026 14:07:25 -0800 Subject: [PATCH 093/123] . 0.5.2-WIP --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 9270da0c..97eae5d3 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "genai" -version = "0.5.1" +version = "0.5.2-WIP" edition = "2024" license = "MIT OR Apache-2.0" description = "Multi-AI Providers Library for Rust. (OpenAI, Gemini, Anthropic, xAI, Ollama, Groq, DeepSeek, Grok)" From 0f7c7633d048153423bbd4df834e9191706095c5 Mon Sep 17 00:00:00 2001 From: Vagmi Mudumbai Date: Wed, 21 Jan 2026 17:31:46 -0400 Subject: [PATCH 094/123] ^ gemini - Include thoughts and capture thoughts are reasoning content (#121) --- src/adapter/adapters/gemini/adapter_impl.rs | 27 +++++++++++++++++---- src/adapter/adapters/gemini/streamer.rs | 15 ++++++++++++ 2 files changed, 37 insertions(+), 5 deletions(-) diff --git a/src/adapter/adapters/gemini/adapter_impl.rs b/src/adapter/adapters/gemini/adapter_impl.rs index 93c6ba76..41f30c0f 100644 --- a/src/adapter/adapters/gemini/adapter_impl.rs +++ b/src/adapter/adapters/gemini/adapter_impl.rs @@ -161,6 +161,8 @@ impl Adapter for GeminiAdapter { else { insert_gemini_thinking_budget_value(&mut payload, &computed_reasoning_effort)?; } + // -- Always include thoughts when reasoning effort is set since you are already paying for them + payload.x_insert("/generationConfig/thinkingConfig/includeThoughts", true)?; } // Note: It's unclear from the spec if the content of systemInstruction should have a role. @@ -239,6 +241,7 @@ impl Adapter for GeminiAdapter { } = gemini_response; let mut thoughts: Vec = Vec::new(); + let mut reasonings: Vec = Vec::new(); let mut texts: Vec = Vec::new(); let mut tool_calls: Vec = Vec::new(); @@ -247,6 +250,7 @@ impl Adapter for GeminiAdapter { GeminiChatContent::Text(text) => texts.push(text), GeminiChatContent::ToolCall(tool_call) => tool_calls.push(tool_call), GeminiChatContent::ThoughtSignature(thought) => thoughts.push(thought), + GeminiChatContent::Reasoning(reasoning_text) => reasonings.push(reasoning_text), } } @@ -269,13 +273,20 @@ impl Adapter for GeminiAdapter { parts.push(ContentPart::Text(combined_text)); } } + let mut reasoning_text = String::new(); + if !reasonings.is_empty() { + for reasoning in &reasonings { + reasoning_text.push_str(reasoning); + } + } parts.extend(tool_calls.into_iter().map(ContentPart::ToolCall)); let content = MessageContent::from_parts(parts); + Ok(ChatResponse { content, - reasoning_content: None, + reasoning_content: Some(reasoning_text), model_iden, provider_model_iden, usage, @@ -351,12 +362,12 @@ impl GeminiAdapter { for mut part in parts { // -- Capture eventual thought signature { - if let Some(thought) = part + if let Some(thought_signature) = part .x_take::("thoughtSignature") .ok() .and_then(|v| if let Value::String(v) = v { Some(v) } else { None }) { - content.push(GeminiChatContent::ThoughtSignature(thought)); + content.push(GeminiChatContent::ThoughtSignature(thought_signature)); } // Note: sometime the thought is in "thought" (undocumented, but observed in some cases or older models?) // But for Gemini 3 it is thoughtSignature. Keeping this just in case or for backward compat if it was used. @@ -365,9 +376,14 @@ impl GeminiAdapter { else if let Some(thought) = part .x_take::("thought") .ok() - .and_then(|v| if let Value::String(v) = v { Some(v) } else { None }) + .and_then(|v| if let Value::Bool(v) = v { Some(v) } else { None }) { - content.push(GeminiChatContent::ThoughtSignature(thought)); + if thought { + if let Some(val) = part.x_take::("text") + .ok().and_then(|v| if let Value::String(v) = v { Some(v) } else {None}) { + content.push(GeminiChatContent::Reasoning(val)); + } + } } } @@ -718,6 +734,7 @@ pub(super) struct GeminiChatResponse { pub(super) enum GeminiChatContent { Text(String), ToolCall(ToolCall), + Reasoning(String), ThoughtSignature(String), } diff --git a/src/adapter/adapters/gemini/streamer.rs b/src/adapter/adapters/gemini/streamer.rs index f02ac5e9..ab9c2ef6 100644 --- a/src/adapter/adapters/gemini/streamer.rs +++ b/src/adapter/adapters/gemini/streamer.rs @@ -100,11 +100,13 @@ impl futures::Stream for GeminiStreamer { // -- Extract text and toolcall // WARNING: Assume that only ONE tool call per message (or take the last one) let mut stream_text_content: String = String::new(); + let mut stream_reasoning_content: Option = None; let mut stream_tool_call: Option = None; let mut stream_thought: Option = None; for g_content_item in content { match g_content_item { + GeminiChatContent::Reasoning(reasoning) => stream_reasoning_content = Some(reasoning), GeminiChatContent::Text(text) => stream_text_content.push_str(&text), GeminiChatContent::ToolCall(tool_call) => stream_tool_call = Some(tool_call), GeminiChatContent::ThoughtSignature(thought) => stream_thought = Some(thought), @@ -128,6 +130,19 @@ impl futures::Stream for GeminiStreamer { self.pending_events.push_back(InterStreamEvent::ThoughtSignatureChunk(thought)); } + if let Some(reasoning_content) = stream_reasoning_content { + // Capture reasoning content + if self.options.capture_content { + match self.captured_data.reasoning_content { + Some(ref mut rc) => rc.push_str(&reasoning_content), + None => self.captured_data.reasoning_content = Some(reasoning_content.clone()), + } + } + if self.options.capture_usage { + self.captured_data.usage = Some(usage.clone()); + } + self.pending_events.push_back(InterStreamEvent::ReasoningChunk(reasoning_content)); + } // 2. Text if !stream_text_content.is_empty() { From b37321c79308e29fddd77f4a6d6bc6a7a84688bd Mon Sep 17 00:00:00 2001 From: Jeremy Chone Date: Wed, 21 Jan 2026 15:15:36 -0800 Subject: [PATCH 095/123] ^ test - ollama - add tool tests --- tests/tests_p_ollama.rs | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/tests/tests_p_ollama.rs b/tests/tests_p_ollama.rs index 3c4ff68a..8bac571f 100644 --- a/tests/tests_p_ollama.rs +++ b/tests/tests_p_ollama.rs @@ -6,8 +6,9 @@ use genai::resolver::AuthData; // "gemma3:4b" "phi3:latest" "gpt-oss:20b" // NOTE: "gpt-oss:20b" has some issues on json_mode, stop_sequence -const MODEL: &str = "gemma3:4b"; // +const MODEL: &str = "gemma3:4b"; // const MODEL_NS: &str = "ollama::gemma3:4b"; +const MODEL_TOOL: &str = "ollama::gpt-oss:20b"; // region: --- Chat @@ -65,6 +66,19 @@ async fn test_chat_stream_capture_content_ok() -> TestResult<()> { // endregion: --- Chat Stream Tests +// region: --- Tool Tests + +#[tokio::test] +async fn test_tool_simple_ok() -> TestResult<()> { + common_tests::common_test_tool_simple_ok(MODEL_TOOL).await +} + +#[tokio::test] +async fn test_tool_full_flow_ok() -> TestResult<()> { + common_tests::common_test_tool_full_flow_ok(MODEL_TOOL).await +} +// endregion: --- Tool Tests + /* Added Binary Tests region (commented-out until Ollama supports binary inputs) */ // region: --- Binary Tests From a4a1883fda3e0caaa69f4314f0c8d64c1e373664 Mon Sep 17 00:00:00 2001 From: Michelle Tilley Date: Wed, 21 Jan 2026 15:16:20 -0800 Subject: [PATCH 096/123] - fix - Ollama tool calls are silently swallowed in OpenAI adapter (streaming) (#124) --- src/adapter/adapters/openai/streamer.rs | 91 ++++++++++++++++--------- 1 file changed, 60 insertions(+), 31 deletions(-) diff --git a/src/adapter/adapters/openai/streamer.rs b/src/adapter/adapters/openai/streamer.rs index 7bdedf04..e16e5625 100644 --- a/src/adapter/adapters/openai/streamer.rs +++ b/src/adapter/adapters/openai/streamer.rs @@ -29,6 +29,41 @@ impl OpenAIStreamer { captured_data: Default::default(), } } + + /// Captures a single tool call into `captured_data.tool_calls`, merging with existing if needed. + /// Returns the (possibly merged) tool call for use in events. + fn capture_tool_call(&mut self, index: usize, call_id: String, fn_name: String, arguments: String) -> ToolCall { + let tool_call = ToolCall { + call_id: call_id.clone(), + fn_name: fn_name.clone(), + fn_arguments: Value::String(arguments.clone()), + thought_signatures: None, + }; + + if !self.options.capture_tool_calls { + return tool_call; + } + + let calls = self.captured_data.tool_calls.get_or_insert_with(Vec::new); + + if let Some(existing_call) = calls.get_mut(index) { + // Merge with existing: accumulate arguments as strings + if let Some(existing_args) = existing_call.fn_arguments.as_str() { + let accumulated = format!("{existing_args}{arguments}"); + existing_call.fn_arguments = Value::String(accumulated); + } + // Update call_id and fn_name on first chunk that has them + if !fn_name.is_empty() { + existing_call.call_id = call_id; + existing_call.fn_name = fn_name; + } + existing_call.clone() + } else { + // New tool call - resize to handle potential gaps (though unlikely in streaming) + calls.resize(index + 1, tool_call.clone()); + tool_call + } + } } impl futures::Stream for OpenAIStreamer { @@ -127,6 +162,30 @@ impl futures::Stream for OpenAIStreamer { // as there might be other messages, and the last one contains data: `[DONE]` // NOTE: xAI has no `finish_reason` when not finished, so, need to just account for both null/absent if let Ok(_finish_reason) = first_choice.x_take::("finish_reason") { + // NOTE: Some providers (e.g., Ollama) send tool_calls AND finish_reason in the same message. + // We need to capture tool_calls here before continuing to the next message. + if let Ok(delta_tool_calls) = first_choice.x_take::("/delta/tool_calls") + && delta_tool_calls != Value::Null + { + if let Some(delta_tool_calls) = delta_tool_calls.as_array() { + for tool_call_obj_val in delta_tool_calls { + let mut tool_call_obj = tool_call_obj_val.clone(); + if let (Ok(index), Ok(mut function)) = ( + tool_call_obj.x_take::("index"), + tool_call_obj.x_take::("function"), + ) { + let call_id = tool_call_obj + .x_take::("id") + .unwrap_or_else(|_| format!("call_{index}")); + let fn_name = function.x_take::("name").unwrap_or_default(); + let arguments = function.x_take::("arguments").unwrap_or_default(); + + self.capture_tool_call(index as usize, call_id, fn_name, arguments); + } + } + } + } + // NOTE: For Groq, the usage is captured when finish_reason indicates stopping, and in the `/x_groq/usage` if self.options.capture_usage { match adapter_kind { @@ -174,38 +233,8 @@ impl futures::Stream for OpenAIStreamer { .unwrap_or_else(|_| format!("call_{index}")); let fn_name = function.x_take::("name").unwrap_or_default(); let arguments = function.x_take::("arguments").unwrap_or_default(); - // Don't parse yet - accumulate as string first - let mut tool_call = crate::chat::ToolCall { - call_id: call_id.clone(), - fn_name: fn_name.clone(), - fn_arguments: serde_json::Value::String(arguments.clone()), - thought_signatures: None, - }; - // Capture the tool call if enabled - if self.options.capture_tool_calls { - let calls = self.captured_data.tool_calls.get_or_insert_with(Vec::new); - let idx = index as usize; - - if let Some(call) = calls.get_mut(idx) { - // Accumulate arguments as strings, don't parse until complete - if let Some(existing) = call.fn_arguments.as_str() { - let accumulated = format!("{existing}{arguments}"); - call.fn_arguments = Value::String(accumulated); - } - - // Update call_id and fn_name on first chunk that has them - if !fn_name.is_empty() { - call.call_id = call_id.clone(); - call.fn_name = fn_name.clone(); - } - tool_call = call.clone(); - } else { - // If it doesn't exist, we add it. - // We use resize to handle potential gaps (though unlikely in streaming). - calls.resize(idx + 1, tool_call.clone()); - } - } + let tool_call = self.capture_tool_call(index as usize, call_id, fn_name, arguments); // Return the ToolCallChunk event return Poll::Ready(Some(Ok(InterStreamEvent::ToolCallChunk(tool_call)))); From ee37c820c91f72ad041c1811b7ae7e7ec3e646b9 Mon Sep 17 00:00:00 2001 From: Vagmi Mudumbai Date: Thu, 22 Jan 2026 22:29:06 -0400 Subject: [PATCH 097/123] ^ Anthropic - Add separate reasoning content and thought signature for anthropic messages api (#125) --- src/adapter/adapters/anthropic/streamer.rs | 28 +++++++++++++++------- 1 file changed, 19 insertions(+), 9 deletions(-) diff --git a/src/adapter/adapters/anthropic/streamer.rs b/src/adapter/adapters/anthropic/streamer.rs index 1c18c376..8ea06c73 100644 --- a/src/adapter/adapters/anthropic/streamer.rs +++ b/src/adapter/adapters/anthropic/streamer.rs @@ -115,17 +115,27 @@ impl futures::Stream for AnthropicStreamer { continue; } InProgressBlock::Thinking => { - let thinking: String = data.x_take("/delta/thinking")?; - - // Add to the captured_thinking if chat options say so - if self.options.capture_reasoning_content { - match self.captured_data.reasoning_content { - Some(ref mut r) => r.push_str(&thinking), - None => self.captured_data.reasoning_content = Some(thinking.clone()), + if let Ok(thinking) = data.x_take::("/delta/thinking") { + // Add to the captured_thinking if chat options say so + if self.options.capture_reasoning_content { + match self.captured_data.reasoning_content { + Some(ref mut r) => r.push_str(&thinking), + None => self.captured_data.reasoning_content = Some(thinking.clone()), + } } - } - return Poll::Ready(Some(Ok(InterStreamEvent::ReasoningChunk(thinking)))); + return Poll::Ready(Some(Ok(InterStreamEvent::ReasoningChunk(thinking)))); + } else if let Ok(signature) = data.x_take::("/delta/signature") { + return Poll::Ready(Some(Ok(InterStreamEvent::ThoughtSignatureChunk( + signature, + )))); + } else { + // If it is thinking but no thinking or signature field, we log and skip. + tracing::warn!( + "content_block_delta for thinking block but no thinking or signature found: {data:?}" + ); + continue; + } } } } From 9cc0217047bf48b4ad62e9f09549ffe03add7a21 Mon Sep 17 00:00:00 2001 From: Jeremy Chone Date: Tue, 27 Jan 2026 18:52:54 -0800 Subject: [PATCH 098/123] - Does not capture body when json parse fail (#128) --- src/webc/error.rs | 3 +++ src/webc/web_client.rs | 9 +++++++-- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/src/webc/error.rs b/src/webc/error.rs index b9a68d33..8e658034 100644 --- a/src/webc/error.rs +++ b/src/webc/error.rs @@ -11,6 +11,9 @@ pub enum Error { #[display("Response content type '{content_type}' is not JSON as expected. Response body:\n{body}")] ResponseFailedNotJson { content_type: String, body: String }, + #[display("Response was invalid json. Cause:\n'{cause}'\nBody:\n{body}")] + ResponseFailedInvalidJson { body: String, cause: String }, + #[display("Request failed with status code '{status}'. Response body:\n{body}")] ResponseFailedStatus { status: StatusCode, diff --git a/src/webc/web_client.rs b/src/webc/web_client.rs index 34b8010a..f9526878 100644 --- a/src/webc/web_client.rs +++ b/src/webc/web_client.rs @@ -105,10 +105,15 @@ impl WebResponse { // Capture the body let ct = header_map.get("content-type").and_then(|v| v.to_str().ok()).unwrap_or_default(); + let body = res.text().await?; + let body = if ct.starts_with("application/json") { - res.json::().await? + let value: Value = serde_json::from_str(&body).map_err(|err| Error::ResponseFailedInvalidJson { + body, + cause: err.to_string(), + })?; + value } else { - let body = res.text().await?; return Err(Error::ResponseFailedNotJson { content_type: ct.to_string(), body, From f19d08c0ef7ca7bbe966eebc17cdd0e7f4c27314 Mon Sep 17 00:00:00 2001 From: Jeremy Chone Date: Tue, 27 Jan 2026 18:55:36 -0800 Subject: [PATCH 099/123] . version 0.5.2 --- CHANGELOG.md | 8 ++++++++ Cargo.toml | 2 +- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index bf5ccae5..86f22539 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,13 @@ `.` minor | `-` Fix | `+` Addition | `^` improvement | `!` Change | `*` Refactor +## 2026-01-27 - [v0.5.2](https://github.com/jeremychone/rust-genai/compare/v0.5.1...v0.5.2) + +- `-` Does not capture body when json parse fail (#128) +- `^` Anthropic - Add separate reasoning content and thought signature for anthropic messages api (#125) +- `-` fix - Ollama tool calls are silently swallowed in OpenAI adapter (streaming) (#124) +- `^` test - ollama - add tool tests +- `^` gemini - Include thoughts and capture thoughts are reasoning content (#121) + ## 2026-01-17 - [v0.5.1](https://github.com/jeremychone/rust-genai/compare/v0.5.0...v0.5.1) `!` `Error::WebStream` - added error field to preserve original error diff --git a/Cargo.toml b/Cargo.toml index 97eae5d3..d4f56d0f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "genai" -version = "0.5.2-WIP" +version = "0.5.2" edition = "2024" license = "MIT OR Apache-2.0" description = "Multi-AI Providers Library for Rust. (OpenAI, Gemini, Anthropic, xAI, Ollama, Groq, DeepSeek, Grok)" From bda3edbe5bd4be55ba6dadb0c2fbc2ea0530b130 Mon Sep 17 00:00:00 2001 From: Jeremy Chone Date: Tue, 27 Jan 2026 19:01:37 -0800 Subject: [PATCH 100/123] . 0.5.3-WIP --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index d4f56d0f..dcd22855 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "genai" -version = "0.5.2" +version = "0.5.3-WIP" edition = "2024" license = "MIT OR Apache-2.0" description = "Multi-AI Providers Library for Rust. (OpenAI, Gemini, Anthropic, xAI, Ollama, Groq, DeepSeek, Grok)" From 8e280188406f1bd64a1b57ca39522b7b6c268764 Mon Sep 17 00:00:00 2001 From: Jeremy Chone Date: Wed, 28 Jan 2026 08:03:00 -0800 Subject: [PATCH 101/123] . ChatRole - Add PartialEq / Eq (#131) --- src/chat/chat_message.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/chat/chat_message.rs b/src/chat/chat_message.rs index bb2a2be4..b703cb38 100644 --- a/src/chat/chat_message.rs +++ b/src/chat/chat_message.rs @@ -111,7 +111,7 @@ impl From for MessageOptions { // endregion: --- MessageOptions /// Chat roles recognized across providers. -#[derive(Debug, Clone, Serialize, Deserialize, derive_more::Display)] +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, derive_more::Display)] #[allow(missing_docs)] pub enum ChatRole { System, From 9682697075768858173894c9090be67facb60004 Mon Sep 17 00:00:00 2001 From: Ross MacLeod Date: Wed, 28 Jan 2026 17:43:49 -0500 Subject: [PATCH 102/123] =?UTF-8?q?Fix=20incorrect=20empty=20output=20from?= =?UTF-8?q?=20MessageContent::joined=5Ftexts=20for=20=E2=89=A5=202=20text?= =?UTF-8?q?=20parts=20(fixes=20#135)=20(#136)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Ross MacLeod --- src/chat/message_content.rs | 51 ++++++++++++++++++++++++++++++++++--- 1 file changed, 48 insertions(+), 3 deletions(-) diff --git a/src/chat/message_content.rs b/src/chat/message_content.rs index b496ec7d..a7eb2724 100644 --- a/src/chat/message_content.rs +++ b/src/chat/message_content.rs @@ -262,9 +262,7 @@ impl MessageContent { let mut combined = String::new(); for text in texts { - if !combined.is_empty() { - support::combine_text_with_empty_line(&mut combined, text); - } + support::combine_text_with_empty_line(&mut combined, text); } Some(combined) } @@ -374,3 +372,50 @@ impl From> for MessageContent { } // endregion: --- Froms + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_message_content_joined_texts_empty() { + assert_eq!(MessageContent::from_parts(vec![]).joined_texts(), None); + } + + #[test] + fn test_message_content_joined_texts_single_part() { + assert_eq!(MessageContent::from_parts(vec![ContentPart::Text("Hello".to_string())]).joined_texts(), Some("Hello".to_string())); + } + + #[test] + fn test_message_content_joined_texts_two_parts() { + assert_eq!( + MessageContent::from_parts(vec![ + ContentPart::Text("Hello".to_string()), + ContentPart::Text("World".to_string()), + ]).joined_texts(), + Some("Hello\n\nWorld".to_string()) + ); + } + + #[test] + fn test_message_content_into_joined_texts_empty() { + assert_eq!(MessageContent::from_parts(vec![]).into_joined_texts(), None); + } + + #[test] + fn test_message_content_into_joined_texts_single_part() { + assert_eq!(MessageContent::from_parts(vec![ContentPart::Text("Hello".to_string())]).into_joined_texts(), Some("Hello".to_string())); + } + + #[test] + fn test_message_content_into_joined_texts_two_parts() { + assert_eq!( + MessageContent::from_parts(vec![ + ContentPart::Text("Hello".to_string()), + ContentPart::Text("World".to_string()), + ]).into_joined_texts(), + Some("Hello\n\nWorld".to_string()) + ); + } +} From b403a55efb20315e28a54a7db3a6c4db7a53ff75 Mon Sep 17 00:00:00 2001 From: Jeremy Chone Date: Wed, 28 Jan 2026 15:42:20 -0800 Subject: [PATCH 103/123] . tracing - add traced to web-client for ai response (#132) --- src/webc/web_client.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/webc/web_client.rs b/src/webc/web_client.rs index f9526878..21d2cbc5 100644 --- a/src/webc/web_client.rs +++ b/src/webc/web_client.rs @@ -92,6 +92,7 @@ impl WebResponse { if !status.is_success() { let headers = res.headers().clone(); let body = res.text().await?; + tracing::trace!("AI Response failed. Body:\n{body}"); return Err(Error::ResponseFailedStatus { status, body, @@ -108,6 +109,7 @@ impl WebResponse { let body = res.text().await?; let body = if ct.starts_with("application/json") { + tracing::trace!("AI Response body:\n{body}"); let value: Value = serde_json::from_str(&body).map_err(|err| Error::ResponseFailedInvalidJson { body, cause: err.to_string(), From 0b2366e1393689bbd3e3af9e5c0b05af32f7ee67 Mon Sep 17 00:00:00 2001 From: Jeremy Chone Date: Wed, 28 Jan 2026 20:09:50 -0800 Subject: [PATCH 104/123] . gemini test - add test_tool_google_web_search_ok --- tests/tests_p_gemini.rs | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/tests/tests_p_gemini.rs b/tests/tests_p_gemini.rs index 3a48e097..27e8e18d 100644 --- a/tests/tests_p_gemini.rs +++ b/tests/tests_p_gemini.rs @@ -8,6 +8,7 @@ use genai::resolver::AuthData; // "gemini-2.5-flash" "gemini-2.5-pro" "gemini-2.5-flash-lite" // "gemini-2.5-flash-zero" const MODEL_GPRO_3: &str = "gemini-3-pro-preview"; +const MODEL_FLASH_3: &str = "gemini-3-flash-preview"; // pure gem, fast, cheap, and good! const MODEL: &str = "gemini-2.5-flash"; const MODEL_NS: &str = "gemini::gemini-2.5-flash"; @@ -171,6 +172,29 @@ async fn test_tool_deterministic_history_gemini_3_ok() -> TestResult<()> { Ok(()) } + +// NOTE: Issue of this test is that it is pretty slow +#[tokio::test] +async fn test_tool_google_web_search_ok() -> TestResult<()> { + use genai::chat::{ChatRequest, Tool}; + use serde_json::json; + + // -- Fixtures & Setup + let client = genai::Client::default(); + let web_search_tool = Tool::new("googleSearch").with_config(json!({})); + let chat_req = + ChatRequest::from_user("What is the latest version of Rust? (be concise)").append_tool(web_search_tool); + + // Exec + let res = client.exec_chat(MODEL_GPRO_3, chat_req, None).await?; + + // Check + let res_txt = res.content.into_first_text().ok_or("Should have result")?; + assert!(res_txt.contains("Rust"), "should contains 'Rust'"); + + Ok(()) +} + // endregion: --- Tool Tests // region: --- Resolver Tests From fa72baa711d0d67c5d6c92a0b034223547e0ddd3 Mon Sep 17 00:00:00 2001 From: Jeremy Chone Date: Thu, 29 Jan 2026 07:41:58 -0800 Subject: [PATCH 105/123] . test - use flash 3 for test_tool_google_web_search_ok --- tests/tests_p_gemini.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/tests_p_gemini.rs b/tests/tests_p_gemini.rs index 27e8e18d..aec90373 100644 --- a/tests/tests_p_gemini.rs +++ b/tests/tests_p_gemini.rs @@ -186,7 +186,7 @@ async fn test_tool_google_web_search_ok() -> TestResult<()> { ChatRequest::from_user("What is the latest version of Rust? (be concise)").append_tool(web_search_tool); // Exec - let res = client.exec_chat(MODEL_GPRO_3, chat_req, None).await?; + let res = client.exec_chat(MODEL_FLASH_3, chat_req, None).await?; // Check let res_txt = res.content.into_first_text().ok_or("Should have result")?; From 6bba9f08ea5f3831e3fcb6c98287d494b98b25f1 Mon Sep 17 00:00:00 2001 From: Jeremy Chone Date: Sat, 31 Jan 2026 09:57:09 -0800 Subject: [PATCH 106/123] . minor refact - webc post new take & of payload (did not need to own payload) --- src/client/client_impl.rs | 34 +++++++++++++++++----------------- src/webc/web_client.rs | 6 +++--- 2 files changed, 20 insertions(+), 20 deletions(-) diff --git a/src/client/client_impl.rs b/src/client/client_impl.rs index b85b32d9..0994269c 100644 --- a/src/client/client_impl.rs +++ b/src/client/client_impl.rs @@ -83,14 +83,14 @@ impl Client { headers = override_headers; }; - let web_res = - self.web_client() - .do_post(&url, &headers, payload) - .await - .map_err(|webc_error| Error::WebModelCall { - model_iden: model.clone(), - webc_error, - })?; + let web_res = self + .web_client() + .do_post(&url, &headers, &payload) + .await + .map_err(|webc_error| Error::WebModelCall { + model_iden: model.clone(), + webc_error, + })?; let chat_res = AdapterDispatcher::to_chat_response(model, web_res, options_set)?; @@ -137,7 +137,7 @@ impl Client { let reqwest_builder = self .web_client() - .new_req_builder(&url, &headers, payload) + .new_req_builder(&url, &headers, &payload) .map_err(|webc_error| Error::WebModelCall { model_iden: model.clone(), webc_error, @@ -188,14 +188,14 @@ impl Client { let WebRequestData { headers, payload, url } = AdapterDispatcher::to_embed_request_data(target, embed_req, options_set.clone())?; - let web_res = - self.web_client() - .do_post(&url, &headers, payload) - .await - .map_err(|webc_error| Error::WebModelCall { - model_iden: model.clone(), - webc_error, - })?; + let web_res = self + .web_client() + .do_post(&url, &headers, &payload) + .await + .map_err(|webc_error| Error::WebModelCall { + model_iden: model.clone(), + webc_error, + })?; let res = AdapterDispatcher::to_embed_response(model, web_res, options_set)?; diff --git a/src/webc/web_client.rs b/src/webc/web_client.rs index 21d2cbc5..cb2f17d6 100644 --- a/src/webc/web_client.rs +++ b/src/webc/web_client.rs @@ -45,7 +45,7 @@ impl WebClient { Ok(response) } - pub async fn do_post(&self, url: &str, headers: &Headers, content: Value) -> Result { + pub async fn do_post(&self, url: &str, headers: &Headers, content: &Value) -> Result { let reqwest_builder = self.new_req_builder(url, headers, content)?; let reqwest_res = reqwest_builder.send().await?; @@ -55,14 +55,14 @@ impl WebClient { Ok(response) } - pub fn new_req_builder(&self, url: &str, headers: &Headers, content: Value) -> Result { + pub fn new_req_builder(&self, url: &str, headers: &Headers, content: &Value) -> Result { let method = Method::POST; let mut reqwest_builder = self.reqwest_client.request(method, url); for (k, v) in headers.iter() { reqwest_builder = reqwest_builder.header(k, v); } - reqwest_builder = reqwest_builder.json(&content); + reqwest_builder = reqwest_builder.json(content); Ok(reqwest_builder) } From b68f0de18e489987ed9bd89c95147e7e00fb7547 Mon Sep 17 00:00:00 2001 From: Jeremy Chone Date: Sat, 31 Jan 2026 10:37:09 -0800 Subject: [PATCH 107/123] > refactor captured_raw_body into client .exec_chat (prep for #137) --- .../adapters/anthropic/adapter_impl.rs | 6 +-- src/adapter/adapters/cohere/adapter_impl.rs | 5 +-- src/adapter/adapters/gemini/adapter_impl.rs | 44 +++++++++---------- src/adapter/adapters/openai/adapter_impl.rs | 4 +- src/chat/chat_response.rs | 3 +- src/client/client_impl.rs | 6 ++- 6 files changed, 34 insertions(+), 34 deletions(-) diff --git a/src/adapter/adapters/anthropic/adapter_impl.rs b/src/adapter/adapters/anthropic/adapter_impl.rs index bb08ce66..08b66f9a 100644 --- a/src/adapter/adapters/anthropic/adapter_impl.rs +++ b/src/adapter/adapters/anthropic/adapter_impl.rs @@ -244,12 +244,10 @@ impl Adapter for AnthropicAdapter { fn to_chat_response( model_iden: ModelIden, web_response: WebResponse, - options_set: ChatOptionsSet<'_, '_>, + _options_set: ChatOptionsSet<'_, '_>, ) -> Result { let WebResponse { mut body, .. } = web_response; - let captured_raw_body = options_set.capture_raw_body().unwrap_or_default().then(|| body.clone()); - // -- Capture the provider_model_iden // TODO: Need to be implemented (if available), for now, just clone model_iden let provider_model_name: Option = body.x_remove("model").ok(); @@ -309,7 +307,7 @@ impl Adapter for AnthropicAdapter { model_iden, provider_model_iden, usage, - captured_raw_body, + captured_raw_body: None, // Set by the client exec_chat }) } diff --git a/src/adapter/adapters/cohere/adapter_impl.rs b/src/adapter/adapters/cohere/adapter_impl.rs index ef22e2ae..e0e07a16 100644 --- a/src/adapter/adapters/cohere/adapter_impl.rs +++ b/src/adapter/adapters/cohere/adapter_impl.rs @@ -118,10 +118,9 @@ impl Adapter for CohereAdapter { fn to_chat_response( model_iden: ModelIden, web_response: WebResponse, - options_set: ChatOptionsSet<'_, '_>, + _options_set: ChatOptionsSet<'_, '_>, ) -> Result { let WebResponse { mut body, .. } = web_response; - let captured_raw_body = options_set.capture_raw_body().unwrap_or_default().then(|| body.clone()); // -- Capture the provider_model_iden // TODO: Need to be implemented (if available), for now, just clone model_iden @@ -148,7 +147,7 @@ impl Adapter for CohereAdapter { model_iden, provider_model_iden, usage, - captured_raw_body, + captured_raw_body: None, // Set by the client exec_chat }) } diff --git a/src/adapter/adapters/gemini/adapter_impl.rs b/src/adapter/adapters/gemini/adapter_impl.rs index 41f30c0f..c8b1cc31 100644 --- a/src/adapter/adapters/gemini/adapter_impl.rs +++ b/src/adapter/adapters/gemini/adapter_impl.rs @@ -161,8 +161,8 @@ impl Adapter for GeminiAdapter { else { insert_gemini_thinking_budget_value(&mut payload, &computed_reasoning_effort)?; } - // -- Always include thoughts when reasoning effort is set since you are already paying for them - payload.x_insert("/generationConfig/thinkingConfig/includeThoughts", true)?; + // -- Always include thoughts when reasoning effort is set since you are already paying for them + payload.x_insert("/generationConfig/thinkingConfig/includeThoughts", true)?; } // Note: It's unclear from the spec if the content of systemInstruction should have a role. @@ -224,12 +224,10 @@ impl Adapter for GeminiAdapter { fn to_chat_response( model_iden: ModelIden, web_response: WebResponse, - options_set: ChatOptionsSet<'_, '_>, + _options_set: ChatOptionsSet<'_, '_>, ) -> Result { let WebResponse { mut body, .. } = web_response; - let captured_raw_body = options_set.capture_raw_body().unwrap_or_default().then(|| body.clone()); - // -- Capture the provider_model_iden // TODO: Need to be implemented (if available), for now, just clone model_iden let provider_model_name: Option = body.x_remove("modelVersion").ok(); @@ -241,7 +239,7 @@ impl Adapter for GeminiAdapter { } = gemini_response; let mut thoughts: Vec = Vec::new(); - let mut reasonings: Vec = Vec::new(); + let mut reasonings: Vec = Vec::new(); let mut texts: Vec = Vec::new(); let mut tool_calls: Vec = Vec::new(); @@ -250,7 +248,7 @@ impl Adapter for GeminiAdapter { GeminiChatContent::Text(text) => texts.push(text), GeminiChatContent::ToolCall(tool_call) => tool_calls.push(tool_call), GeminiChatContent::ThoughtSignature(thought) => thoughts.push(thought), - GeminiChatContent::Reasoning(reasoning_text) => reasonings.push(reasoning_text), + GeminiChatContent::Reasoning(reasoning_text) => reasonings.push(reasoning_text), } } @@ -273,24 +271,23 @@ impl Adapter for GeminiAdapter { parts.push(ContentPart::Text(combined_text)); } } - let mut reasoning_text = String::new(); - if !reasonings.is_empty() { - for reasoning in &reasonings { - reasoning_text.push_str(reasoning); - } - } + let mut reasoning_text = String::new(); + if !reasonings.is_empty() { + for reasoning in &reasonings { + reasoning_text.push_str(reasoning); + } + } parts.extend(tool_calls.into_iter().map(ContentPart::ToolCall)); let content = MessageContent::from_parts(parts); - Ok(ChatResponse { content, reasoning_content: Some(reasoning_text), model_iden, provider_model_iden, usage, - captured_raw_body, + captured_raw_body: None, // Set by the client exec_chat }) } @@ -378,12 +375,15 @@ impl GeminiAdapter { .ok() .and_then(|v| if let Value::Bool(v) = v { Some(v) } else { None }) { - if thought { - if let Some(val) = part.x_take::("text") - .ok().and_then(|v| if let Value::String(v) = v { Some(v) } else {None}) { - content.push(GeminiChatContent::Reasoning(val)); - } - } + if thought { + if let Some(val) = part + .x_take::("text") + .ok() + .and_then(|v| if let Value::String(v) = v { Some(v) } else { None }) + { + content.push(GeminiChatContent::Reasoning(val)); + } + } } } @@ -734,7 +734,7 @@ pub(super) struct GeminiChatResponse { pub(super) enum GeminiChatContent { Text(String), ToolCall(ToolCall), - Reasoning(String), + Reasoning(String), ThoughtSignature(String), } diff --git a/src/adapter/adapters/openai/adapter_impl.rs b/src/adapter/adapters/openai/adapter_impl.rs index 93d1a8e0..1a91df14 100644 --- a/src/adapter/adapters/openai/adapter_impl.rs +++ b/src/adapter/adapters/openai/adapter_impl.rs @@ -69,8 +69,6 @@ impl Adapter for OpenAIAdapter { ) -> Result { let WebResponse { mut body, .. } = web_response; - let captured_raw_body = options_set.capture_raw_body().unwrap_or_default().then(|| body.clone()); - // -- Capture the provider_model_iden let provider_model_name: Option = body.x_remove("model").ok(); let provider_model_iden = model_iden.from_optional_name(provider_model_name); @@ -136,7 +134,7 @@ impl Adapter for OpenAIAdapter { model_iden, provider_model_iden, usage, - captured_raw_body, + captured_raw_body: None, // Set by the client exec_chat }) } diff --git a/src/chat/chat_response.rs b/src/chat/chat_response.rs index a4624260..594e2954 100644 --- a/src/chat/chat_response.rs +++ b/src/chat/chat_response.rs @@ -30,7 +30,8 @@ pub struct ChatResponse { /// Token usage reported by the provider. pub usage: Usage, - /// Raw response body for provider-specific features. + /// IMPORTANT: (since 0.5.3) This is populated at the client.exec_chat when the options capture_raw_body is set to true + /// Raw response body (only if asked via options.capture_raw_body) pub captured_raw_body: Option, } diff --git a/src/client/client_impl.rs b/src/client/client_impl.rs index 0994269c..5b639138 100644 --- a/src/client/client_impl.rs +++ b/src/client/client_impl.rs @@ -92,7 +92,11 @@ impl Client { webc_error, })?; - let chat_res = AdapterDispatcher::to_chat_response(model, web_res, options_set)?; + // Note: here we capture/clone the raw body if set in the options_set + let captured_raw_body = options_set.capture_raw_body().unwrap_or_default().then(|| web_res.body.clone()); + + let mut chat_res = AdapterDispatcher::to_chat_response(model, web_res, options_set)?; + chat_res.captured_raw_body = captured_raw_body; Ok(chat_res) } From 295d8cdd65cba7398e1c13714af104da2d609604 Mon Sep 17 00:00:00 2001 From: Jeremy Chone Date: Sat, 31 Jan 2026 11:23:17 -0800 Subject: [PATCH 108/123] ^ error - add request payload / response body when to chat response fail --- src/client/client_impl.rs | 19 +++++++++++++++---- src/error.rs | 14 ++++++++++++++ 2 files changed, 29 insertions(+), 4 deletions(-) diff --git a/src/client/client_impl.rs b/src/client/client_impl.rs index 5b639138..e7badd78 100644 --- a/src/client/client_impl.rs +++ b/src/client/client_impl.rs @@ -95,10 +95,21 @@ impl Client { // Note: here we capture/clone the raw body if set in the options_set let captured_raw_body = options_set.capture_raw_body().unwrap_or_default().then(|| web_res.body.clone()); - let mut chat_res = AdapterDispatcher::to_chat_response(model, web_res, options_set)?; - chat_res.captured_raw_body = captured_raw_body; - - Ok(chat_res) + match AdapterDispatcher::to_chat_response(model.clone(), web_res, options_set) { + Ok(mut chat_res) => { + chat_res.captured_raw_body = captured_raw_body; + Ok(chat_res) + } + Err(err) => { + let err = Error::ChatResponseGeneration { + model_iden: model, + request_payload: Box::new(payload), + response_body: Box::new(captured_raw_body.unwrap_or_default()), + cause: err.to_string(), + }; + Err(err) + } + } } /// Streams a chat response. diff --git a/src/error.rs b/src/error.rs index d706add2..429df6c3 100644 --- a/src/error.rs +++ b/src/error.rs @@ -80,6 +80,20 @@ pub enum Error { webc_error: webc::Error, }, + #[display( + "Error while generating a ChatResponse from a ChatRequest. (for Model: '{model_iden}') +Request Payload:\n{request_payload:#} +Response Body:\n{response_body:#} +Cause:\n{cause} +" + )] + ChatResponseGeneration { + model_iden: ModelIden, + request_payload: Box, + response_body: Box, + cause: String, + }, + #[display("Error event in stream for model '{model_iden}'. Body: {body}")] ChatResponse { model_iden: ModelIden, From ce571468a2c0ef3c4a0844077c87f4d06d7f485d Mon Sep 17 00:00:00 2001 From: Jeremy Chone Date: Sat, 31 Jan 2026 11:33:52 -0800 Subject: [PATCH 109/123] . error - add comment --- src/error.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/error.rs b/src/error.rs index 429df6c3..99c7a376 100644 --- a/src/error.rs +++ b/src/error.rs @@ -90,6 +90,7 @@ Cause:\n{cause} ChatResponseGeneration { model_iden: ModelIden, request_payload: Box, + /// Require ChatOptions::default().with_capture_raw_body(true); otherwise "null" response_body: Box, cause: String, }, From 1dd352aa158e38d7ea62679f56f5d8d269a141ef Mon Sep 17 00:00:00 2001 From: Jeremy Chone Date: Sat, 31 Jan 2026 11:39:23 -0800 Subject: [PATCH 110/123] ^ error - ChatResponseGeneration - add 'doc text' when raw response is not given (#137) --- src/client/client_impl.rs | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/client/client_impl.rs b/src/client/client_impl.rs index e7badd78..8a0f48e7 100644 --- a/src/client/client_impl.rs +++ b/src/client/client_impl.rs @@ -101,10 +101,13 @@ impl Client { Ok(chat_res) } Err(err) => { + let response_body = captured_raw_body.unwrap_or_else(|| { + "Raw response not captured. Use the ChatOptions.capturre_raw_body flag to see raw response in this error".into() + }); let err = Error::ChatResponseGeneration { model_iden: model, request_payload: Box::new(payload), - response_body: Box::new(captured_raw_body.unwrap_or_default()), + response_body: Box::new(response_body), cause: err.to_string(), }; Err(err) From 9a66c28d4085a075904ec5c74bb554711f16dbb6 Mon Sep 17 00:00:00 2001 From: Jeremy Chone Date: Sat, 31 Jan 2026 11:45:24 -0800 Subject: [PATCH 111/123] . v0.5.3 --- CHANGELOG.md | 9 +++++++++ Cargo.toml | 2 +- doc/for-llm/api-reference-for-llm.md | 4 ++++ 3 files changed, 14 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 86f22539..40376a10 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,14 @@ `.` minor | `-` Fix | `+` Addition | `^` improvement | `!` Change | `*` Refactor +## 2026-01-31 - [v0.5.3](https://github.com/jeremychone/rust-genai/compare/v0.5.2...v0.5.3) + +- `^` error - add request payload / response body when to chat response fail +- `>` refactor captured_raw_body into client .exec_chat (prep for #137) +- `.` tracing - add traced to web-client for ai response (#132) +- `-` Fix incorrect empty output from MessageContent::joined_texts for ≥ 2 text parts (fixes #135) (#136) Co-authored-by: Ross MacLeod +- `.` ChatRole - Add PartialEq / Eq (#131) + + ## 2026-01-27 - [v0.5.2](https://github.com/jeremychone/rust-genai/compare/v0.5.1...v0.5.2) - `-` Does not capture body when json parse fail (#128) diff --git a/Cargo.toml b/Cargo.toml index dcd22855..9777dbb0 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "genai" -version = "0.5.3-WIP" +version = "0.5.3" edition = "2024" license = "MIT OR Apache-2.0" description = "Multi-AI Providers Library for Rust. (OpenAI, Gemini, Anthropic, xAI, Ollama, Groq, DeepSeek, Grok)" diff --git a/doc/for-llm/api-reference-for-llm.md b/doc/for-llm/api-reference-for-llm.md index 9fa2db31..f1b13d92 100644 --- a/doc/for-llm/api-reference-for-llm.md +++ b/doc/for-llm/api-reference-for-llm.md @@ -2,6 +2,10 @@ Dry, concise reference for the `genai` library. +```toml +genai = "0.5.3" +``` + ## Core Concepts - **Client**: Main entry point (`genai::Client`). Thread-safe (`Arc` wrapper). From c9720c33030fbcca2a929cf93a5098f7262c4694 Mon Sep 17 00:00:00 2001 From: Jeremy Chone Date: Sat, 31 Jan 2026 11:46:49 -0800 Subject: [PATCH 112/123] . 0.5.4-WIP --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 9777dbb0..9fcc0fa7 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "genai" -version = "0.5.3" +version = "0.5.4-WIP" edition = "2024" license = "MIT OR Apache-2.0" description = "Multi-AI Providers Library for Rust. (OpenAI, Gemini, Anthropic, xAI, Ollama, Groq, DeepSeek, Grok)" From b13418779047d8e55ebb520d44265eea74e36548 Mon Sep 17 00:00:00 2001 From: Jeremy Chone Date: Sat, 31 Jan 2026 13:59:42 -0800 Subject: [PATCH 113/123] . cargo fmt --- src/adapter/adapters/gemini/streamer.rs | 31 ++++++++++++++----------- src/chat/message_content.rs | 16 +++++++++---- 2 files changed, 29 insertions(+), 18 deletions(-) diff --git a/src/adapter/adapters/gemini/streamer.rs b/src/adapter/adapters/gemini/streamer.rs index ab9c2ef6..0ee1ea13 100644 --- a/src/adapter/adapters/gemini/streamer.rs +++ b/src/adapter/adapters/gemini/streamer.rs @@ -106,7 +106,9 @@ impl futures::Stream for GeminiStreamer { for g_content_item in content { match g_content_item { - GeminiChatContent::Reasoning(reasoning) => stream_reasoning_content = Some(reasoning), + GeminiChatContent::Reasoning(reasoning) => { + stream_reasoning_content = Some(reasoning) + } GeminiChatContent::Text(text) => stream_text_content.push_str(&text), GeminiChatContent::ToolCall(tool_call) => stream_tool_call = Some(tool_call), GeminiChatContent::ThoughtSignature(thought) => stream_thought = Some(thought), @@ -130,19 +132,20 @@ impl futures::Stream for GeminiStreamer { self.pending_events.push_back(InterStreamEvent::ThoughtSignatureChunk(thought)); } - if let Some(reasoning_content) = stream_reasoning_content { - // Capture reasoning content - if self.options.capture_content { - match self.captured_data.reasoning_content { - Some(ref mut rc) => rc.push_str(&reasoning_content), - None => self.captured_data.reasoning_content = Some(reasoning_content.clone()), - } - } - if self.options.capture_usage { - self.captured_data.usage = Some(usage.clone()); - } - self.pending_events.push_back(InterStreamEvent::ReasoningChunk(reasoning_content)); - } + if let Some(reasoning_content) = stream_reasoning_content { + // Capture reasoning content + if self.options.capture_content { + match self.captured_data.reasoning_content { + Some(ref mut rc) => rc.push_str(&reasoning_content), + None => self.captured_data.reasoning_content = Some(reasoning_content.clone()), + } + } + if self.options.capture_usage { + self.captured_data.usage = Some(usage.clone()); + } + self.pending_events + .push_back(InterStreamEvent::ReasoningChunk(reasoning_content)); + } // 2. Text if !stream_text_content.is_empty() { diff --git a/src/chat/message_content.rs b/src/chat/message_content.rs index a7eb2724..95f021b6 100644 --- a/src/chat/message_content.rs +++ b/src/chat/message_content.rs @@ -384,7 +384,10 @@ mod tests { #[test] fn test_message_content_joined_texts_single_part() { - assert_eq!(MessageContent::from_parts(vec![ContentPart::Text("Hello".to_string())]).joined_texts(), Some("Hello".to_string())); + assert_eq!( + MessageContent::from_parts(vec![ContentPart::Text("Hello".to_string())]).joined_texts(), + Some("Hello".to_string()) + ); } #[test] @@ -393,7 +396,8 @@ mod tests { MessageContent::from_parts(vec![ ContentPart::Text("Hello".to_string()), ContentPart::Text("World".to_string()), - ]).joined_texts(), + ]) + .joined_texts(), Some("Hello\n\nWorld".to_string()) ); } @@ -405,7 +409,10 @@ mod tests { #[test] fn test_message_content_into_joined_texts_single_part() { - assert_eq!(MessageContent::from_parts(vec![ContentPart::Text("Hello".to_string())]).into_joined_texts(), Some("Hello".to_string())); + assert_eq!( + MessageContent::from_parts(vec![ContentPart::Text("Hello".to_string())]).into_joined_texts(), + Some("Hello".to_string()) + ); } #[test] @@ -414,7 +421,8 @@ mod tests { MessageContent::from_parts(vec![ ContentPart::Text("Hello".to_string()), ContentPart::Text("World".to_string()), - ]).into_joined_texts(), + ]) + .into_joined_texts(), Some("Hello\n\nWorld".to_string()) ); } From 21b634cda31db042a2dff7bcd5655fe74e80950b Mon Sep 17 00:00:00 2001 From: Jeremy Chone Date: Sat, 31 Jan 2026 14:29:51 -0800 Subject: [PATCH 114/123] . 0.6.0-alpha.1-WIP --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 9fcc0fa7..1009f3ef 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "genai" -version = "0.5.4-WIP" +version = "0.6.0-alpha.1-WIP" edition = "2024" license = "MIT OR Apache-2.0" description = "Multi-AI Providers Library for Rust. (OpenAI, Gemini, Anthropic, xAI, Ollama, Groq, DeepSeek, Grok)" From c3d4b97f3dbdc751fc9f25c1c5b8f471003d607c Mon Sep 17 00:00:00 2001 From: Michelle Tilley Date: Sat, 31 Jan 2026 14:30:23 -0800 Subject: [PATCH 115/123] feat: Add `ModelSpec` to allow for additional model details when making API calls (#126) --- src/client/client_impl.rs | 54 +++++++++++------ src/client/config.rs | 104 +++++++++++++++++++++++---------- src/client/mod.rs | 2 + src/client/model_spec.rs | 109 +++++++++++++++++++++++++++++++++++ src/client/service_target.rs | 1 + 5 files changed, 223 insertions(+), 47 deletions(-) create mode 100644 src/client/model_spec.rs diff --git a/src/client/client_impl.rs b/src/client/client_impl.rs index 8a0f48e7..93e21322 100644 --- a/src/client/client_impl.rs +++ b/src/client/client_impl.rs @@ -1,5 +1,6 @@ use crate::adapter::{AdapterDispatcher, AdapterKind, ServiceType, WebRequestData}; use crate::chat::{ChatOptions, ChatOptionsSet, ChatRequest, ChatResponse, ChatStreamResponse}; +use crate::client::ModelSpec; use crate::embed::{EmbedOptions, EmbedOptionsSet, EmbedRequest, EmbedResponse}; use crate::resolver::AuthData; use crate::{Client, Error, ModelIden, Result, ServiceTarget}; @@ -41,26 +42,33 @@ impl Client { Ok(target.model) } - /// Resolves the service target (endpoint, auth, and model) for the given model name. - pub async fn resolve_service_target(&self, model_name: &str) -> Result { - let model = self.default_model(model_name)?; - self.config().resolve_service_target(model).await + /// Resolves the service target (endpoint, auth, and model) for the given model. + /// + /// Accepts any type that implements `Into`: + /// - `&str` or `String`: Model name with full inference + /// - `ModelIden`: Explicit adapter, resolves auth/endpoint + /// - `ServiceTarget`: Uses directly, bypasses model mapping and auth resolution + pub async fn resolve_service_target(&self, model: impl Into) -> Result { + self.config().resolve_model_spec(model.into()).await } /// Sends a chat request and returns the full response. + /// + /// Accepts any type that implements `Into`: + /// - `&str` or `String`: Model name with full inference + /// - `ModelIden`: Explicit adapter, resolves auth/endpoint + /// - `ServiceTarget`: Uses directly, bypasses model mapping and auth resolution pub async fn exec_chat( &self, - model: &str, + model: impl Into, chat_req: ChatRequest, - // options not implemented yet options: Option<&ChatOptions>, ) -> Result { let options_set = ChatOptionsSet::default() .with_chat_options(options) .with_client_options(self.config().chat_options()); - let model = self.default_model(model)?; - let target = self.config().resolve_service_target(model).await?; + let target = self.config().resolve_model_spec(model.into()).await?; let model = target.model.clone(); let auth_data = target.auth.clone(); @@ -116,18 +124,22 @@ impl Client { } /// Streams a chat response. + /// + /// Accepts any type that implements `Into`: + /// - `&str` or `String`: Model name with full inference + /// - `ModelIden`: Explicit adapter, resolves auth/endpoint + /// - `ServiceTarget`: Uses directly, bypasses model mapping and auth resolution pub async fn exec_chat_stream( &self, - model: &str, - chat_req: ChatRequest, // options not implemented yet + model: impl Into, + chat_req: ChatRequest, options: Option<&ChatOptions>, ) -> Result { let options_set = ChatOptionsSet::default() .with_chat_options(options) .with_client_options(self.config().chat_options()); - let model = self.default_model(model)?; - let target = self.config().resolve_service_target(model).await?; + let target = self.config().resolve_model_spec(model.into()).await?; let model = target.model.clone(); let auth_data = target.auth.clone(); @@ -167,9 +179,11 @@ impl Client { } /// Creates embeddings for a single input string. + /// + /// Accepts any type that implements `Into` for the model parameter. pub async fn embed( &self, - model: &str, + model: impl Into, input: impl Into, options: Option<&EmbedOptions>, ) -> Result { @@ -178,9 +192,11 @@ impl Client { } /// Creates embeddings for multiple input strings. + /// + /// Accepts any type that implements `Into` for the model parameter. pub async fn embed_batch( &self, - model: &str, + model: impl Into, inputs: Vec, options: Option<&EmbedOptions>, ) -> Result { @@ -189,9 +205,14 @@ impl Client { } /// Sends an embedding request and returns the response. + /// + /// Accepts any type that implements `Into`: + /// - `&str` or `String`: Model name with full inference + /// - `ModelIden`: Explicit adapter, resolves auth/endpoint + /// - `ServiceTarget`: Uses directly, bypasses model mapping and auth resolution pub async fn exec_embed( &self, - model: &str, + model: impl Into, embed_req: EmbedRequest, options: Option<&EmbedOptions>, ) -> Result { @@ -199,8 +220,7 @@ impl Client { .with_request_options(options) .with_client_options(self.config().embed_options()); - let model = self.default_model(model)?; - let target = self.config().resolve_service_target(model).await?; + let target = self.config().resolve_model_spec(model.into()).await?; let model = target.model.clone(); let WebRequestData { headers, payload, url } = diff --git a/src/client/config.rs b/src/client/config.rs index bce41a8c..e5858e69 100644 --- a/src/client/config.rs +++ b/src/client/config.rs @@ -1,8 +1,8 @@ -use crate::adapter::AdapterDispatcher; +use crate::adapter::{AdapterDispatcher, AdapterKind}; use crate::chat::ChatOptions; -use crate::client::ServiceTarget; +use crate::client::{ModelSpec, ServiceTarget}; use crate::embed::EmbedOptions; -use crate::resolver::{AuthResolver, ModelMapper, ServiceTargetResolver}; +use crate::resolver::{AuthData, AuthResolver, ModelMapper, ServiceTargetResolver}; use crate::{Error, ModelIden, Result, WebConfig}; /// Configuration for building and customizing a `Client`. @@ -104,41 +104,65 @@ impl ClientConfig { /// Errors with Error::Resolver if any resolver step fails. pub async fn resolve_service_target(&self, model: ModelIden) -> Result { // -- Resolve the Model first - let model = match self.model_mapper() { - Some(model_mapper) => model_mapper.map_model(model.clone()), - None => Ok(model.clone()), - } - .map_err(|resolver_error| Error::Resolver { - model_iden: model.clone(), - resolver_error, - })?; + let model = self.run_model_mapper(model.clone())?; // -- Get the auth - let auth = if let Some(auth) = self.auth_resolver() { - // resolve async which may be async - auth.resolve(model.clone()) - .await - .map_err(|err| Error::Resolver { - model_iden: model.clone(), - resolver_error: err, - })? - // default the resolver resolves to nothing - .unwrap_or_else(|| AdapterDispatcher::default_auth(model.adapter_kind)) - } else { - AdapterDispatcher::default_auth(model.adapter_kind) - }; + let auth = self.run_auth_resolver(model.clone()).await?; // -- Get the default endpoint // For now, just get the default endpoint; the `resolve_target` will allow overriding it. let endpoint = AdapterDispatcher::default_endpoint(model.adapter_kind); - // -- Resolve the service_target + // -- Create the default service target let service_target = ServiceTarget { model: model.clone(), auth, endpoint, }; - let service_target = match self.service_target_resolver() { + + // -- Resolve the service target + let service_target = self.run_service_target_resolver(service_target).await?; + + Ok(service_target) + } + + /// Resolves a [`ModelIden`] to a [`ModelIden`] via the [`ModelMapper`] (if any). + fn run_model_mapper(&self, model: ModelIden) -> Result { + match self.model_mapper() { + Some(model_mapper) => model_mapper.map_model(model.clone()), + None => Ok(model.clone()), + } + .map_err(|resolver_error| Error::Resolver { + model_iden: model.clone(), + resolver_error, + }) + } + + /// Resolves a [`ModelIden`] to an [`AuthData`] via the [`AuthResolver`] (if any). + async fn run_auth_resolver(&self, model: ModelIden) -> Result { + match self.auth_resolver() { + Some(auth_resolver) => { + let auth_data = auth_resolver + .resolve(model.clone()) + .await + .map_err(|err| Error::Resolver { + model_iden: model.clone(), + resolver_error: err, + })? + // default the resolver resolves to nothing + .unwrap_or_else(|| AdapterDispatcher::default_auth(model.adapter_kind)); + + Ok(auth_data) + } + None => Ok(AdapterDispatcher::default_auth(model.adapter_kind)), + } + } + + /// Resolves a [`ServiceTarget`] via the [`ServiceTargetResolver`] (if any). + async fn run_service_target_resolver(&self, service_target: ServiceTarget) -> Result { + let model = service_target.model.clone(); + + match self.service_target_resolver() { Some(service_target_resolver) => { service_target_resolver .resolve(service_target) @@ -146,11 +170,31 @@ impl ClientConfig { .map_err(|resolver_error| Error::Resolver { model_iden: model, resolver_error, - })? + }) } - None => service_target, - }; + None => Ok(service_target), + } + } - Ok(service_target) + /// Resolves a [`ModelSpec`] to a [`ServiceTarget`]. + /// + /// The resolution behavior depends on the variant: + /// + /// - [`ModelSpec::Name`]: Infers adapter from name, then applies full resolution + /// (model mapper, auth resolver, service target resolver). + /// + /// - [`ModelSpec::Iden`]: Skips adapter inference, applies full resolution. + /// + /// - [`ModelSpec::Target`]: Returns the target directly, running only the service target resolver. + pub async fn resolve_model_spec(&self, spec: ModelSpec) -> Result { + match spec { + ModelSpec::Name(name) => { + let adapter_kind = AdapterKind::from_model(&name)?; + let model = ModelIden::new(adapter_kind, name); + self.resolve_service_target(model).await + } + ModelSpec::Iden(model) => self.resolve_service_target(model).await, + ModelSpec::Target(target) => self.run_service_target_resolver(target).await, + } } } diff --git a/src/client/mod.rs b/src/client/mod.rs index d5943c78..3fbb6d75 100644 --- a/src/client/mod.rs +++ b/src/client/mod.rs @@ -10,6 +10,7 @@ mod client_impl; mod client_types; mod config; mod headers; +mod model_spec; mod service_target; mod web_config; @@ -17,6 +18,7 @@ pub use builder::*; pub use client_types::*; pub use config::*; pub use headers::*; +pub use model_spec::*; pub use service_target::*; pub use web_config::*; diff --git a/src/client/model_spec.rs b/src/client/model_spec.rs new file mode 100644 index 00000000..98ec9870 --- /dev/null +++ b/src/client/model_spec.rs @@ -0,0 +1,109 @@ +use crate::adapter::AdapterKind; +use crate::{ModelIden, ServiceTarget}; + +/// Specifies how to identify and resolve a model for API calls. +/// +/// `ModelSpec` provides three levels of control over model resolution: +/// +/// - [`ModelSpec::Name`]: Just a model name string. The adapter kind is inferred +/// from the name, and auth/endpoint are resolved via the client's configured resolvers. +/// +/// - [`ModelSpec::Iden`]: An explicit [`ModelIden`] with adapter kind specified. +/// Skips adapter inference but still resolves auth/endpoint via config. +/// +/// - [`ModelSpec::Target`]: A complete [`ServiceTarget`] with endpoint, auth, and model. +/// Used directly, only runs the service target resolver. +/// +/// # Examples +/// +/// ```rust +/// use genai::adapter::AdapterKind; +/// use genai::resolver::{AuthData, Endpoint}; +/// use genai::{ModelIden, ModelSpec, ServiceTarget}; +/// +/// // Using a string name (full inference) +/// let spec: ModelSpec = "gpt-4".into(); +/// +/// // Using an explicit ModelIden (skip adapter inference) +/// let spec: ModelSpec = ModelIden::new(AdapterKind::OpenAI, "gpt-4").into(); +/// +/// // Using a complete ServiceTarget (bypass all resolution) +/// let target = ServiceTarget { +/// endpoint: Endpoint::from_static("https://custom.api/v1/"), +/// auth: AuthData::from_env("CUSTOM_API_KEY"), +/// model: ModelIden::new(AdapterKind::OpenAI, "custom-model"), +/// }; +/// let spec: ModelSpec = target.into(); +/// ``` +#[derive(Debug, Clone)] +pub enum ModelSpec { + /// Model name string - adapter kind is inferred, auth/endpoint resolved via config. + Name(String), + + /// Explicit [`ModelIden`] - skips adapter inference, still resolves auth/endpoint. + Iden(ModelIden), + + /// Complete [`ServiceTarget`] - used directly, bypasses model mapping and auth resolution + Target(ServiceTarget), +} + +// region: --- Constructors + +impl ModelSpec { + /// Creates a `ModelSpec::Name` from a string. + pub fn from_model_name(name: impl Into) -> Self { + ModelSpec::Name(name.into()) + } + + /// Creates a `ModelSpec::Iden` from adapter kind and model name. + pub fn from_model(adapter_kind: AdapterKind, model_name: impl Into) -> Self { + ModelSpec::Iden(ModelIden::new(adapter_kind, model_name.into())) + } + + /// Creates a `ModelSpec::Target` from a complete service target. + pub fn from_target(target: ServiceTarget) -> Self { + ModelSpec::Target(target) + } +} + +// endregion: --- Constructors + +// region: --- From Implementations + +impl From<&str> for ModelSpec { + fn from(name: &str) -> Self { + ModelSpec::Name(name.to_string()) + } +} + +impl From<&&str> for ModelSpec { + fn from(name: &&str) -> Self { + ModelSpec::Name((*name).to_string()) + } +} + +impl From for ModelSpec { + fn from(name: String) -> Self { + ModelSpec::Name(name) + } +} + +impl From<&String> for ModelSpec { + fn from(name: &String) -> Self { + ModelSpec::Name(name.clone()) + } +} + +impl From for ModelSpec { + fn from(model: ModelIden) -> Self { + ModelSpec::Iden(model) + } +} + +impl From for ModelSpec { + fn from(target: ServiceTarget) -> Self { + ModelSpec::Target(target) + } +} + +// endregion: --- From Implementations diff --git a/src/client/service_target.rs b/src/client/service_target.rs index dac06af1..a2d5a37e 100644 --- a/src/client/service_target.rs +++ b/src/client/service_target.rs @@ -9,6 +9,7 @@ use crate::resolver::{AuthData, Endpoint}; /// - `auth`: Authentication data for the request. /// /// - `model`: Target model identifier. +#[derive(Debug, Clone)] pub struct ServiceTarget { pub endpoint: Endpoint, pub auth: AuthData, From f23bb27ff104ccc50560e8f4e9b333e60ce378db Mon Sep 17 00:00:00 2001 From: Artem Holovskyi Date: Sun, 1 Feb 2026 00:37:18 +0200 Subject: [PATCH 116/123] - fix - anthropic - prompt caching fixes (#130) - fix - anthropic - streaming adapter now captures cache tokens ^ cache - anthropic - Adds them to prompt_tokens (matching into_usage behavior) ^ cache - anthropic - add TTL support for prompt caching - fix - anthropic - apply per-part cache_control on system messages and add cache TTL example --- examples/c18-cache-ttl.rs | 179 +++++++++++++ .../adapters/anthropic/adapter_impl.rs | 237 ++++++++++++++---- src/adapter/adapters/anthropic/streamer.rs | 36 ++- src/adapter/adapters/gemini/adapter_impl.rs | 1 + .../openai_resp/resp_types/resp_usage.rs | 1 + src/chat/chat_message.rs | 23 +- src/chat/usage.rs | 28 ++- tests/support/common_tests.rs | 90 +++++++ tests/tests_p_anthropic.rs | 14 ++ 9 files changed, 556 insertions(+), 53 deletions(-) create mode 100644 examples/c18-cache-ttl.rs diff --git a/examples/c18-cache-ttl.rs b/examples/c18-cache-ttl.rs new file mode 100644 index 00000000..330b4ca0 --- /dev/null +++ b/examples/c18-cache-ttl.rs @@ -0,0 +1,179 @@ +//! Example demonstrating prompt caching with mixed TTLs (1h + 5m). +//! +//! This example shows how to: +//! 1. Use `CacheControl::Ephemeral1h` and `CacheControl::Ephemeral5m` on system messages +//! 2. Verify cache creation on the first request +//! 3. Verify cache hits on a subsequent identical request +//! 4. Inspect TTL-specific token breakdowns +//! +//! Requires: ANTHROPIC_API_KEY environment variable +//! +//! Run with: `cargo run --example c18-cache-ttl` + +use genai::chat::{CacheControl, ChatMessage, ChatRequest}; +use genai::Client; + +const MODEL: &str = "claude-haiku-4-5-20251001"; + +/// Large text for the 1h-cached system message (~3000 tokens). +/// Anthropic requires a minimum of 2048 tokens per cacheable block for Haiku. +fn long_system_text() -> String { + let paragraph = "The field of artificial intelligence has seen remarkable progress over \ + the past decade. Machine learning models have grown from simple classifiers to complex \ + systems capable of generating human-like text, creating images from descriptions, and \ + solving intricate reasoning problems. These advances have been driven by improvements \ + in hardware, the availability of massive datasets, and breakthroughs in model \ + architectures such as transformers. The transformer architecture, introduced in 2017, \ + revolutionized natural language processing by enabling models to attend to all parts of \ + an input sequence simultaneously, rather than processing tokens one at a time. This \ + parallel processing capability, combined with scaling laws that showed predictable \ + performance improvements with increased compute and data, led to the development of \ + large language models that can perform a wide variety of tasks. "; + // Repeat enough to exceed 4096 tokens (Haiku 4.5 minimum for caching) + paragraph.repeat(40) +} + +/// Large text for the 5m-cached system message (~3000 tokens). +fn medium_system_text() -> String { + let paragraph = "When responding to user queries, always provide clear, accurate, and \ + well-structured answers. Break down complex topics into digestible parts. Use examples \ + where appropriate to illustrate concepts. Maintain a professional and helpful tone \ + throughout the conversation. If you are unsure about something, say so rather than \ + guessing. Cite sources when possible. Keep responses concise but thorough. "; + // Repeat enough to exceed 4096 tokens (Haiku 4.5 minimum for caching) + paragraph.repeat(55) +} + +fn build_chat_request(user_msg: &str) -> ChatRequest { + let sys1 = ChatMessage::system(long_system_text()).with_options(CacheControl::Ephemeral1h); + let sys2 = ChatMessage::system(medium_system_text()).with_options(CacheControl::Ephemeral5m); + + ChatRequest::default() + .append_message(sys1) + .append_message(sys2) + .append_message(ChatMessage::user(user_msg)) +} + +fn get_or_zero(val: Option) -> i32 { + val.unwrap_or(0) +} + +#[tokio::main] +async fn main() -> Result<(), Box> { + let client = Client::default(); + let mut all_passed = true; + + // -- Request 1: Cache creation + println!("=== Request 1: Cache Creation ===\n"); + + let req = build_chat_request("What is 2+2?"); + let res = client.exec_chat(MODEL, req, None).await?; + + if let Some(text) = res.content.first_text() { + println!("Response: {}\n", text); + } + + let usage = &res.usage; + let details = usage.prompt_tokens_details.as_ref(); + + let prompt_tokens = get_or_zero(usage.prompt_tokens); + let completion_tokens = get_or_zero(usage.completion_tokens); + let total_tokens = get_or_zero(usage.total_tokens); + let cache_creation_tokens = get_or_zero(details.and_then(|d| d.cache_creation_tokens)); + let cached_tokens = get_or_zero(details.and_then(|d| d.cached_tokens)); + let eph_1h = get_or_zero( + details + .and_then(|d| d.cache_creation_details.as_ref()) + .and_then(|cd| cd.ephemeral_1h_tokens), + ); + let eph_5m = get_or_zero( + details + .and_then(|d| d.cache_creation_details.as_ref()) + .and_then(|cd| cd.ephemeral_5m_tokens), + ); + + println!(" prompt_tokens: {prompt_tokens}"); + println!(" completion_tokens: {completion_tokens}"); + println!(" total_tokens: {total_tokens}"); + println!(" cache_creation_tokens: {cache_creation_tokens}"); + println!(" cached_tokens: {cached_tokens}"); + println!(" ephemeral_1h_tokens: {eph_1h}"); + println!(" ephemeral_5m_tokens: {eph_5m}"); + println!(); + + // Verify creation request + if cache_creation_tokens <= 0 { + println!(" FAIL: cache_creation_tokens should be > 0"); + all_passed = false; + } + if cached_tokens != 0 { + println!(" FAIL: cached_tokens should be 0 on first request, got {cached_tokens}"); + all_passed = false; + } + if eph_1h <= 0 { + println!(" FAIL: ephemeral_1h_tokens should be > 0"); + all_passed = false; + } + if eph_5m <= 0 { + println!(" FAIL: ephemeral_5m_tokens should be > 0"); + all_passed = false; + } + + // -- Request 2: Cache read + println!("=== Request 2: Cache Read ===\n"); + + let req = build_chat_request("What is 3+3?"); + let res = client.exec_chat(MODEL, req, None).await?; + + if let Some(text) = res.content.first_text() { + println!("Response: {}\n", text); + } + + let usage = &res.usage; + let details = usage.prompt_tokens_details.as_ref(); + + let prompt_tokens = get_or_zero(usage.prompt_tokens); + let completion_tokens = get_or_zero(usage.completion_tokens); + let total_tokens = get_or_zero(usage.total_tokens); + let cache_creation_tokens = get_or_zero(details.and_then(|d| d.cache_creation_tokens)); + let cached_tokens = get_or_zero(details.and_then(|d| d.cached_tokens)); + let eph_1h = get_or_zero( + details + .and_then(|d| d.cache_creation_details.as_ref()) + .and_then(|cd| cd.ephemeral_1h_tokens), + ); + let eph_5m = get_or_zero( + details + .and_then(|d| d.cache_creation_details.as_ref()) + .and_then(|cd| cd.ephemeral_5m_tokens), + ); + + println!(" prompt_tokens: {prompt_tokens}"); + println!(" completion_tokens: {completion_tokens}"); + println!(" total_tokens: {total_tokens}"); + println!(" cache_creation_tokens: {cache_creation_tokens}"); + println!(" cached_tokens: {cached_tokens}"); + println!(" ephemeral_1h_tokens: {eph_1h}"); + println!(" ephemeral_5m_tokens: {eph_5m}"); + println!(); + + // Verify cache hit + if cached_tokens <= 0 { + println!(" FAIL: cached_tokens should be > 0 (cache hit)"); + all_passed = false; + } + if cache_creation_tokens != 0 { + println!(" FAIL: cache_creation_tokens should be 0 on cache hit, got {cache_creation_tokens}"); + all_passed = false; + } + + // -- Final result + println!(); + if all_passed { + println!("Cache TTL test PASSED"); + } else { + println!("Cache TTL test FAILED"); + } + + Ok(()) +} diff --git a/src/adapter/adapters/anthropic/adapter_impl.rs b/src/adapter/adapters/anthropic/adapter_impl.rs index 08b66f9a..33612efd 100644 --- a/src/adapter/adapters/anthropic/adapter_impl.rs +++ b/src/adapter/adapters/anthropic/adapter_impl.rs @@ -2,8 +2,8 @@ use crate::adapter::adapters::support::get_api_key; use crate::adapter::anthropic::AnthropicStreamer; use crate::adapter::{Adapter, AdapterKind, ServiceType, WebRequestData}; use crate::chat::{ - Binary, BinarySource, ChatOptionsSet, ChatRequest, ChatResponse, ChatRole, ChatStream, ChatStreamResponse, - ContentPart, MessageContent, PromptTokensDetails, ReasoningEffort, ToolCall, Usage, + Binary, BinarySource, CacheControl, CacheCreationDetails, ChatOptionsSet, ChatRequest, ChatResponse, ChatRole, + ChatStream, ChatStreamResponse, ContentPart, MessageContent, PromptTokensDetails, ReasoningEffort, ToolCall, Usage, }; use crate::resolver::{AuthData, Endpoint}; use crate::webc::{EventSourceStream, WebResponse}; @@ -359,6 +359,11 @@ impl AnthropicAdapter { let cache_read_input_tokens: i32 = usage_value.x_take("cache_read_input_tokens").unwrap_or(0); let completion_tokens: i32 = usage_value.x_take("output_tokens").ok().unwrap_or(0); + // Parse cache_creation breakdown if present (TTL-specific breakdown) + let cache_creation_details = usage_value + .get("cache_creation") + .and_then(parse_cache_creation_details); + // compute the prompt_tokens let prompt_tokens = input_tokens + cache_creation_input_tokens + cache_read_input_tokens; @@ -367,15 +372,17 @@ impl AnthropicAdapter { // For now the logic is to have a Some of PromptTokensDetails if at least one of those value is not 0 // TODO: Needs to be normalized across adapters. - let prompt_tokens_details = if cache_creation_input_tokens > 0 || cache_read_input_tokens > 0 { - Some(PromptTokensDetails { - cache_creation_tokens: Some(cache_creation_input_tokens), - cached_tokens: Some(cache_read_input_tokens), - audio_tokens: None, - }) - } else { - None - }; + let prompt_tokens_details = + if cache_creation_input_tokens > 0 || cache_read_input_tokens > 0 || cache_creation_details.is_some() { + Some(PromptTokensDetails { + cache_creation_tokens: Some(cache_creation_input_tokens), + cache_creation_details, + cached_tokens: Some(cache_read_input_tokens), + audio_tokens: None, + }) + } else { + None + }; Usage { prompt_tokens: Some(prompt_tokens), @@ -393,24 +400,45 @@ impl AnthropicAdapter { /// - Will push the `ChatRequest.system` and system message to `AnthropicRequestParts.system` fn into_anthropic_request_parts(chat_req: ChatRequest) -> Result { let mut messages: Vec = Vec::new(); - // (content, is_cache_control) - let mut systems: Vec<(String, bool)> = Vec::new(); + // (content, cache_control) + let mut systems: Vec<(String, Option)> = Vec::new(); + + // Track TTL ordering for validation (1h must come before 5m) + let mut seen_5m_cache = false; // NOTE: For now, this means the first System cannot have a cache control // so that we do not change too much. if let Some(system) = chat_req.system { - systems.push((system, false)); + systems.push((system, None)); } // -- Process the messages for msg in chat_req.messages { - let is_cache_control = msg.options.map(|o| o.cache_control.is_some()).unwrap_or(false); + let cache_control = msg.options.and_then(|o| o.cache_control); + + // Check TTL ordering constraint + if let Some(ref cc) = cache_control { + match cc { + CacheControl::Ephemeral | CacheControl::Ephemeral5m => { + seen_5m_cache = true; + } + CacheControl::Ephemeral1h => { + if seen_5m_cache { + warn!( + "Anthropic cache TTL ordering violation: Ephemeral1h appears after Ephemeral/Ephemeral5m. \ + 1-hour cache entries must appear before 5-minute cache entries. \ + See: https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching#mixing-different-ttls" + ); + } + } + } + } match msg.role { // Collect only text for system; other content parts are ignored by Anthropic here. ChatRole::System => { if let Some(system_text) = msg.content.joined_texts() { - systems.push((system_text, is_cache_control)); + systems.push((system_text, cache_control)); } } @@ -418,7 +446,7 @@ impl AnthropicAdapter { ChatRole::User => { if msg.content.is_text_only() { let text = msg.content.joined_texts().unwrap_or_else(String::new); - let content = apply_cache_control_to_text(is_cache_control, text); + let content = apply_cache_control_to_text(cache_control.as_ref(), text); messages.push(json!({"role": "user", "content": content})); } else { let mut values: Vec = Vec::new(); @@ -488,7 +516,7 @@ impl AnthropicAdapter { ContentPart::ThoughtSignature(_) => {} } } - let values = apply_cache_control_to_parts(is_cache_control, values); + let values = apply_cache_control_to_parts(cache_control.as_ref(), values); messages.push(json!({"role": "user", "content": values})); } } @@ -522,7 +550,7 @@ impl AnthropicAdapter { } } - if !has_tool_use && has_text && !is_cache_control && values.len() == 1 { + if !has_tool_use && has_text && cache_control.is_none() && values.len() == 1 { // Optimize to simple string when it's only one text part and no cache control. let text = values .first() @@ -530,10 +558,10 @@ impl AnthropicAdapter { .and_then(|v| v.as_str()) .unwrap_or_default() .to_string(); - let content = apply_cache_control_to_text(false, text); + let content = apply_cache_control_to_text(None, text); messages.push(json!({"role": "assistant", "content": content})); } else { - let values = apply_cache_control_to_parts(is_cache_control, values); + let values = apply_cache_control_to_parts(cache_control.as_ref(), values); messages.push(json!({"role": "assistant", "content": values})); } } @@ -551,7 +579,7 @@ impl AnthropicAdapter { } } if !values.is_empty() { - let values = apply_cache_control_to_parts(is_cache_control, values); + let values = apply_cache_control_to_parts(cache_control.as_ref(), values); messages.push(json!({"role": "user", "content": values})); } } @@ -561,26 +589,19 @@ impl AnthropicAdapter { // -- Create the Anthropic system // NOTE: Anthropic does not have a "role": "system", just a single optional system property let system = if !systems.is_empty() { - let mut last_cache_idx = -1; - // first determine the last cache control index - for (idx, (_, is_cache_control)) in systems.iter().enumerate() { - if *is_cache_control { - last_cache_idx = idx as i32; - } - } - // Now build the system multi part - let system: Value = if last_cache_idx > 0 { - let mut parts: Vec = Vec::new(); - for (idx, (content, _)) in systems.iter().enumerate() { - let idx = idx as i32; - if idx == last_cache_idx { - let part = json!({"type": "text", "text": content, "cache_control": {"type": "ephemeral"}}); - parts.push(part); - } else { - let part = json!({"type": "text", "text": content}); - parts.push(part); - } - } + let has_any_cache = systems.iter().any(|(_, cc)| cc.is_some()); + let system: Value = if has_any_cache { + // Build multi-part system with per-part cache_control + let parts: Vec = systems + .iter() + .map(|(content, cc)| { + if let Some(cc) = cc { + json!({"type": "text", "text": content, "cache_control": cache_control_to_json(cc)}) + } else { + json!({"type": "text", "text": content}) + } + }) + .collect(); json!(parts) } else { let content_buff = systems.iter().map(|(content, _)| content.as_str()).collect::>(); @@ -623,10 +644,57 @@ impl AnthropicAdapter { } } +/// Convert CacheControl to Anthropic JSON format. +/// +/// See: https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching#1-hour-cache-duration +fn cache_control_to_json(cache_control: &CacheControl) -> Value { + match cache_control { + CacheControl::Ephemeral => { + json!({"type": "ephemeral"}) + } + CacheControl::Ephemeral5m => { + json!({"type": "ephemeral", "ttl": "5m"}) + } + CacheControl::Ephemeral1h => { + json!({"type": "ephemeral", "ttl": "1h"}) + } + } +} + +/// Parse cache_creation breakdown from Anthropic API response. +/// +/// The API returns TTL-specific token counts in the `cache_creation` object: +/// ```json +/// "cache_creation": { +/// "ephemeral_5m_input_tokens": 456, +/// "ephemeral_1h_input_tokens": 100 +/// } +/// ``` +pub(super) fn parse_cache_creation_details(cache_creation: &Value) -> Option { + let ephemeral_5m_tokens = cache_creation + .get("ephemeral_5m_input_tokens") + .and_then(|v| v.as_i64()) + .map(|v| v as i32); + let ephemeral_1h_tokens = cache_creation + .get("ephemeral_1h_input_tokens") + .and_then(|v| v.as_i64()) + .map(|v| v as i32); + + // Only return Some if at least one TTL has tokens + if ephemeral_5m_tokens.is_some() || ephemeral_1h_tokens.is_some() { + Some(CacheCreationDetails { + ephemeral_5m_tokens, + ephemeral_1h_tokens, + }) + } else { + None + } +} + /// Apply the cache control logic to a text content -fn apply_cache_control_to_text(is_cache_control: bool, content: String) -> Value { - if is_cache_control { - let value = json!({"type": "text", "text": content, "cache_control": {"type": "ephemeral"}}); +fn apply_cache_control_to_text(cache_control: Option<&CacheControl>, content: String) -> Value { + if let Some(cc) = cache_control { + let value = json!({"type": "text", "text": content, "cache_control": cache_control_to_json(cc)}); json!(vec![value]) } // simple return @@ -636,13 +704,15 @@ fn apply_cache_control_to_text(is_cache_control: bool, content: String) -> Value } /// Apply the cache control logic to a text content -fn apply_cache_control_to_parts(is_cache_control: bool, parts: Vec) -> Vec { +fn apply_cache_control_to_parts(cache_control: Option<&CacheControl>, parts: Vec) -> Vec { let mut parts = parts; - if is_cache_control && !parts.is_empty() { + if let Some(cc) = cache_control + && !parts.is_empty() + { let len = parts.len(); if let Some(last_value) = parts.get_mut(len - 1) { // NOTE: For now, if it fails, then, no cache - let _ = last_value.x_insert("cache_control", json!( {"type": "ephemeral"})); + let _ = last_value.x_insert("cache_control", cache_control_to_json(cc)); // TODO: Should warn } } @@ -656,3 +726,74 @@ struct AnthropicRequestParts { } // endregion: --- Support + +// region: --- Tests + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_cache_control_to_json_ephemeral() { + let result = cache_control_to_json(&CacheControl::Ephemeral); + assert_eq!(result, json!({"type": "ephemeral"})); + } + + #[test] + fn test_cache_control_to_json_ephemeral_5m() { + let result = cache_control_to_json(&CacheControl::Ephemeral5m); + assert_eq!(result, json!({"type": "ephemeral", "ttl": "5m"})); + } + + #[test] + fn test_cache_control_to_json_ephemeral_1h() { + let result = cache_control_to_json(&CacheControl::Ephemeral1h); + assert_eq!(result, json!({"type": "ephemeral", "ttl": "1h"})); + } + + #[test] + fn test_parse_cache_creation_details_with_both_ttls() { + let cache_creation = json!({ + "ephemeral_5m_input_tokens": 456, + "ephemeral_1h_input_tokens": 100 + }); + let result = parse_cache_creation_details(&cache_creation); + assert!(result.is_some()); + let details = result.unwrap(); + assert_eq!(details.ephemeral_5m_tokens, Some(456)); + assert_eq!(details.ephemeral_1h_tokens, Some(100)); + } + + #[test] + fn test_parse_cache_creation_details_with_5m_only() { + let cache_creation = json!({ + "ephemeral_5m_input_tokens": 456 + }); + let result = parse_cache_creation_details(&cache_creation); + assert!(result.is_some()); + let details = result.unwrap(); + assert_eq!(details.ephemeral_5m_tokens, Some(456)); + assert_eq!(details.ephemeral_1h_tokens, None); + } + + #[test] + fn test_parse_cache_creation_details_with_1h_only() { + let cache_creation = json!({ + "ephemeral_1h_input_tokens": 100 + }); + let result = parse_cache_creation_details(&cache_creation); + assert!(result.is_some()); + let details = result.unwrap(); + assert_eq!(details.ephemeral_5m_tokens, None); + assert_eq!(details.ephemeral_1h_tokens, Some(100)); + } + + #[test] + fn test_parse_cache_creation_details_empty() { + let cache_creation = json!({}); + let result = parse_cache_creation_details(&cache_creation); + assert!(result.is_none()); + } +} + +// endregion: --- Tests diff --git a/src/adapter/adapters/anthropic/streamer.rs b/src/adapter/adapters/anthropic/streamer.rs index 8ea06c73..755246e8 100644 --- a/src/adapter/adapters/anthropic/streamer.rs +++ b/src/adapter/adapters/anthropic/streamer.rs @@ -1,6 +1,7 @@ use crate::adapter::adapters::support::{StreamerCapturedData, StreamerOptions}; +use crate::adapter::anthropic::parse_cache_creation_details; use crate::adapter::inter_stream::{InterStreamEnd, InterStreamEvent}; -use crate::chat::{ChatOptionsSet, ToolCall, Usage}; +use crate::chat::{ChatOptionsSet, PromptTokensDetails, ToolCall, Usage}; use crate::webc::{Event, EventSourceStream}; use crate::{Error, ModelIden, Result}; use serde_json::{Map, Value}; @@ -265,6 +266,39 @@ impl AnthropicStreamer { .get_or_insert(0); *val += output_tokens; } + + // -- Capture cache tokens (only present in message_start) + // NOTE: Anthropic's input_tokens does NOT include cached tokens, so we must add them. + // See also: AnthropicAdapter::into_usage() for non-streaming equivalent. + if message_type == "message_start" { + let cache_creation: i32 = data.x_get("/message/usage/cache_creation_input_tokens").unwrap_or(0); + let cache_read: i32 = data.x_get("/message/usage/cache_read_input_tokens").unwrap_or(0); + + // Parse cache_creation breakdown if present (TTL-specific breakdown) + // Use x_get with JSON pointer to navigate to /message/usage/cache_creation + let cache_creation_details = data + .x_get::("/message/usage/cache_creation") + .ok() + .as_ref() + .and_then(parse_cache_creation_details); + + if cache_creation > 0 || cache_read > 0 || cache_creation_details.is_some() { + let usage = self.captured_data.usage.get_or_insert(Usage::default()); + + // Add cache tokens to prompt_tokens (same as into_usage does) + if let Some(ref mut pt) = usage.prompt_tokens { + *pt += cache_creation + cache_read; + } + + // Set prompt_tokens_details (match into_usage behavior: always Some(value)) + usage.prompt_tokens_details = Some(PromptTokensDetails { + cache_creation_tokens: Some(cache_creation), + cache_creation_details, + cached_tokens: Some(cache_read), + audio_tokens: None, + }); + } + } } Ok(()) diff --git a/src/adapter/adapters/gemini/adapter_impl.rs b/src/adapter/adapters/gemini/adapter_impl.rs index c8b1cc31..c2f32a97 100644 --- a/src/adapter/adapters/gemini/adapter_impl.rs +++ b/src/adapter/adapters/gemini/adapter_impl.rs @@ -436,6 +436,7 @@ impl GeminiAdapter { let g_cached_tokens: Option = usage_value.x_take("cachedContentTokenCount").ok(); let prompt_tokens_details = g_cached_tokens.map(|g_cached_tokens| PromptTokensDetails { cache_creation_tokens: None, + cache_creation_details: None, cached_tokens: Some(g_cached_tokens), audio_tokens: None, }); diff --git a/src/adapter/adapters/openai_resp/resp_types/resp_usage.rs b/src/adapter/adapters/openai_resp/resp_types/resp_usage.rs index 2b43d949..ba503966 100644 --- a/src/adapter/adapters/openai_resp/resp_types/resp_usage.rs +++ b/src/adapter/adapters/openai_resp/resp_types/resp_usage.rs @@ -93,6 +93,7 @@ impl From for PromptTokensDetails { fn from(value: InputTokensDetails) -> Self { PromptTokensDetails { cache_creation_tokens: value.cache_creation_tokens, + cache_creation_details: None, cached_tokens: value.cached_tokens, audio_tokens: value.audio_tokens, } diff --git a/src/chat/chat_message.rs b/src/chat/chat_message.rs index b703cb38..a098c06a 100644 --- a/src/chat/chat_message.rs +++ b/src/chat/chat_message.rs @@ -88,17 +88,34 @@ pub struct MessageOptions { pub cache_control: Option, } -/// Cache control +/// Cache control for prompt caching. /// /// Notes: /// - Currently used for Anthropic only. /// - Anthropic applies cache_control at the content-part level; genai exposes it at the /// ChatMessage level and maps it appropriately. /// - OpenAI ignores it; Gemini uses a separate API, so it is not supported there yet. -#[derive(Debug, Clone, Serialize, Deserialize)] +/// +/// ## TTL Ordering Constraint (Anthropic) +/// +/// When mixing different TTLs in the same request, cache entries with longer TTL +/// must appear **before** shorter TTLs. That is, `Ephemeral1h` entries must appear +/// before any `Ephemeral` or `Ephemeral5m` entries in the message sequence. +/// +/// Violating this constraint may cause the API to reject the request or behave unexpectedly. +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] pub enum CacheControl { - /// Hint to avoid persisting this message in provider caches. + /// Default ephemeral cache (5 minutes TTL). Ephemeral, + /// Explicit 5-minute TTL cache. + Ephemeral5m, + /// Extended 1-hour TTL cache. + /// + /// **Important:** When mixing TTLs, 1-hour cache entries must appear before + /// any 5-minute cache entries in the request. + /// + /// Note: Costs 2x base input token price vs 1.25x for 5m. + Ephemeral1h, } impl From for MessageOptions { diff --git a/src/chat/usage.rs b/src/chat/usage.rs index 024b5fb3..d69da990 100644 --- a/src/chat/usage.rs +++ b/src/chat/usage.rs @@ -40,6 +40,26 @@ impl Usage { } } +/// Breakdown of cache creation tokens by TTL. +#[serde_as] +#[skip_serializing_none] +#[derive(Default, Debug, Clone, Serialize, Deserialize)] +pub struct CacheCreationDetails { + /// Tokens written to 5-minute ephemeral cache. + #[serde(default, deserialize_with = "crate::support::zero_as_none")] + pub ephemeral_5m_tokens: Option, + /// Tokens written to 1-hour ephemeral cache. + #[serde(default, deserialize_with = "crate::support::zero_as_none")] + pub ephemeral_1h_tokens: Option, +} + +impl CacheCreationDetails { + /// True if all fields are `None`. + pub fn is_empty(&self) -> bool { + self.ephemeral_5m_tokens.is_none() && self.ephemeral_1h_tokens.is_none() + } +} + #[serde_as] #[skip_serializing_none] #[derive(Default, Debug, Clone, Serialize, Deserialize)] @@ -48,6 +68,9 @@ pub struct PromptTokensDetails { /// Tokens used to build the cache (not yet cached). These may incur a small surcharge; subsequent requests benefit via `cached_tokens`. #[serde(default, deserialize_with = "crate::support::zero_as_none")] pub cache_creation_tokens: Option, + /// Breakdown of cache creation tokens by TTL (5m vs 1h). + /// Only populated when the provider returns TTL-specific breakdown. + pub cache_creation_details: Option, /// Anthropic: `cache_read_input_tokens`. #[serde(default, deserialize_with = "crate::support::zero_as_none")] pub cached_tokens: Option, @@ -58,7 +81,10 @@ pub struct PromptTokensDetails { impl PromptTokensDetails { /// True if all fields are `None`. pub fn is_empty(&self) -> bool { - self.cache_creation_tokens.is_none() && self.cached_tokens.is_none() && self.audio_tokens.is_none() + self.cache_creation_tokens.is_none() + && self.cache_creation_details.as_ref().map(|d| d.is_empty()).unwrap_or(true) + && self.cached_tokens.is_none() + && self.audio_tokens.is_none() } } diff --git a/tests/support/common_tests.rs b/tests/support/common_tests.rs index 0a3b901d..162827e8 100644 --- a/tests/support/common_tests.rs +++ b/tests/support/common_tests.rs @@ -518,6 +518,96 @@ pub async fn common_test_chat_cache_explicit_system_ok(model: &str) -> TestResul Ok(()) } +/// Test for 1-hour TTL cache (Ephemeral1h). +/// Note: 1h TTL is only supported on Claude 4.5 models (Opus 4.5, Sonnet 4.5, Haiku 4.5). +pub async fn common_test_chat_cache_explicit_1h_ttl_ok(model: &str) -> TestResult<()> { + // -- Setup & Fixtures + let client = Client::default(); + let big_content = get_big_content()?; + let chat_req = ChatRequest::new(vec![ + // -- Messages + ChatMessage::system("Give a very short summary of what each of those files are about"), + ChatMessage::user(big_content).with_options(CacheControl::Ephemeral1h), + ]); + + // -- Exec + let chat_res = client.exec_chat(model, chat_req, None).await?; + + // -- Check Content + let content = chat_res.first_text().ok_or("Should have content")?; + assert!(!content.trim().is_empty(), "Content should not be empty"); + + // -- Check Usage + let usage = &chat_res.usage; + + let total_tokens = get_option_value!(usage.total_tokens); + let prompt_tokens_details = usage + .prompt_tokens_details + .as_ref() + .ok_or("Should have prompt_tokens_details")?; + let cache_creation_tokens = get_option_value!(prompt_tokens_details.cache_creation_tokens); + let cached_tokens = get_option_value!(prompt_tokens_details.cached_tokens); + + assert!( + cache_creation_tokens > 0 || cached_tokens > 0, + "one of cache_creation_tokens or cached_tokens should be greater than 0" + ); + assert!(total_tokens > 0, "total_tokens should be > 0"); + + // Note: cache_creation_details may or may not be present depending on provider response format + // The API may return TTL-specific breakdown in cache_creation_details when using different TTLs + + Ok(()) +} + +/// Streaming test for 1-hour TTL cache (Ephemeral1h). +/// Note: 1h TTL is only supported on Claude 4.5 models (Opus 4.5, Sonnet 4.5, Haiku 4.5). +pub async fn common_test_chat_stream_cache_explicit_1h_ttl_ok(model: &str) -> TestResult<()> { + // -- Setup & Fixtures + let client = Client::builder() + .with_chat_options(ChatOptions::default().with_capture_usage(true)) + .build(); + let big_content = get_big_content()?; + let chat_req = ChatRequest::new(vec![ + // -- Messages + ChatMessage::system("Give a very short summary of what each of those files are about"), + ChatMessage::user(big_content).with_options(CacheControl::Ephemeral1h), + ]); + + // -- Exec + let chat_res = client.exec_chat_stream(model, chat_req, None).await?; + + // -- Extract Stream content + let StreamExtract { + stream_end, + content, + reasoning_content: _, + } = extract_stream_end(chat_res.stream).await?; + let content = content.ok_or("extract_stream_end SHOULD have extracted some content")?; + + // -- Check Content + assert!(!content.trim().is_empty(), "Content should not be empty"); + + // -- Check Usage + let usage = stream_end.captured_usage.as_ref().ok_or("Should have captured_usage")?; + + let total_tokens = get_option_value!(usage.total_tokens); + let prompt_tokens_details = usage + .prompt_tokens_details + .as_ref() + .ok_or("Should have prompt_tokens_details")?; + let cache_creation_tokens = get_option_value!(prompt_tokens_details.cache_creation_tokens); + let cached_tokens = get_option_value!(prompt_tokens_details.cached_tokens); + + assert!( + cache_creation_tokens > 0 || cached_tokens > 0, + "one of cache_creation_tokens or cached_tokens should be greater than 0" + ); + assert!(total_tokens > 0, "total_tokens should be > 0"); + + Ok(()) +} + // endregion: --- Chat Explicit Cache // region: --- Chat Stream Tests diff --git a/tests/tests_p_anthropic.rs b/tests/tests_p_anthropic.rs index 26ee1412..22f3d50d 100644 --- a/tests/tests_p_anthropic.rs +++ b/tests/tests_p_anthropic.rs @@ -78,6 +78,20 @@ async fn test_chat_cache_explicit_system_ok() -> TestResult<()> { common_tests::common_test_chat_cache_explicit_system_ok(MODEL).await } +/// Test for 1-hour TTL cache (only supported on Claude 4.5 models) +#[tokio::test] +#[serial(anthropic)] +async fn test_chat_cache_explicit_1h_ttl_ok() -> TestResult<()> { + common_tests::common_test_chat_cache_explicit_1h_ttl_ok(MODEL_THINKING).await +} + +/// Streaming test for 1-hour TTL cache (only supported on Claude 4.5 models) +#[tokio::test] +#[serial(anthropic)] +async fn test_chat_stream_cache_explicit_1h_ttl_ok() -> TestResult<()> { + common_tests::common_test_chat_stream_cache_explicit_1h_ttl_ok(MODEL_THINKING).await +} + // endregion: --- Chat Explicit Cache // region: --- Chat Stream Tests From a1777249b934796ec0a12599edd98bf9ab610144 Mon Sep 17 00:00:00 2001 From: Jeremy Chone Date: Sat, 31 Jan 2026 15:48:15 -0800 Subject: [PATCH 117/123] ^ ModelName - add Static/Shared inner --- src/common/model_name.rs | 80 ++++++++++++++++++++++++++++------------ 1 file changed, 57 insertions(+), 23 deletions(-) diff --git a/src/common/model_name.rs b/src/common/model_name.rs index a3a7e399..087a4d6b 100644 --- a/src/common/model_name.rs +++ b/src/common/model_name.rs @@ -1,13 +1,48 @@ use std::ops::Deref; use std::sync::Arc; -use serde::{Deserialize, Serialize}; +use serde::{Deserialize, Deserializer, Serialize}; -/// The model name, which is just an `Arc` wrapper (simple and relatively efficient to clone) -#[derive(Clone, Debug, Serialize, Deserialize, Hash, Eq, PartialEq)] -pub struct ModelName(Arc); +/// Store a model name with or without namespace +/// e.g. `gemini-3-flash-preview` or `gemini::gemini-3-flash-preview` +#[derive(Clone, Debug, Serialize, Hash, Eq, PartialEq)] +pub struct ModelName(Inner); + +#[derive(Clone, Debug, Serialize, Hash, Eq, PartialEq)] +enum Inner { + Static(&'static str), + Shared(Arc), +} + +impl<'de> Deserialize<'de> for ModelName { + fn deserialize(deserializer: D) -> Result + where + D: Deserializer<'de>, + { + let s: &str = <&str>::deserialize(deserializer)?; + Ok(ModelName(Inner::Shared(Arc::::from(s)))) + } +} + +/// Constructor +impl ModelName { + pub fn new(name: impl Into>) -> Self { + Self(Inner::Shared(name.into())) + } + + pub fn from_static(name: &'static str) -> Self { + Self(Inner::Static(name)) + } +} impl ModelName { + pub fn as_str(&self) -> &str { + match self.0 { + Inner::Static(s) => s, + Inner::Shared(ref s) => s, + } + } + pub fn namespace_is(&self, namespace: &str) -> bool { self.namespace() == Some(namespace) } @@ -18,7 +53,7 @@ impl ModelName { /// Returns `(namespace, name)` pub fn namespace_and_name(&self) -> (Option<&str>, &str) { - Self::split_as_namespace_and_name(&self.0) + Self::split_as_namespace_and_name(self.as_str()) } /// e.g.: @@ -35,11 +70,17 @@ impl ModelName { } } +impl std::fmt::Display for ModelName { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", self.as_str()) + } +} + // region: --- Froms impl From for String { fn from(model_name: ModelName) -> Self { - model_name.0.to_string() + model_name.as_str().to_string() } } @@ -49,19 +90,19 @@ impl From for String { impl From for ModelName { fn from(s: String) -> Self { - Self(Arc::from(s)) + Self(Inner::Shared(Arc::from(s))) } } impl From<&String> for ModelName { fn from(s: &String) -> Self { - Self(Arc::from(s.as_str())) + Self(Inner::Shared(Arc::from(s.as_str()))) } } impl From<&str> for ModelName { fn from(s: &str) -> Self { - Self(Arc::from(s)) + Self(Inner::Shared(Arc::from(s))) } } @@ -70,7 +111,7 @@ impl Deref for ModelName { type Target = str; fn deref(&self) -> &Self::Target { - &self.0 + self.as_str() } } @@ -81,46 +122,39 @@ impl Deref for ModelName { // PartialEq implementations for various string types impl PartialEq for ModelName { fn eq(&self, other: &str) -> bool { - &*self.0 == other + self.as_str() == other } } impl PartialEq<&str> for ModelName { fn eq(&self, other: &&str) -> bool { - &*self.0 == *other + self.as_str() == *other } } impl PartialEq for ModelName { fn eq(&self, other: &String) -> bool { - &*self.0 == other + self.as_str() == other } } // Symmetric implementations (allow "string" == model_name) impl PartialEq for str { fn eq(&self, other: &ModelName) -> bool { - self == &*other.0 + self == other.as_str() } } impl PartialEq for &str { fn eq(&self, other: &ModelName) -> bool { - *self == &*other.0 + *self == other.as_str() } } impl PartialEq for String { fn eq(&self, other: &ModelName) -> bool { - self == &*other.0 + self == other.as_str() } } // endregion: --- EQ - -// TODO: replace with derive_more Display -impl std::fmt::Display for ModelName { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "{}", self.0) - } -} From bdec7a5f423e7db5c142d92b45b3d35fbed1a30a Mon Sep 17 00:00:00 2001 From: Jeremy Chone Date: Sat, 31 Jan 2026 16:37:54 -0800 Subject: [PATCH 118/123] . ModelSpec - update ModelSpec from apis --- src/client/model_spec.rs | 30 ++++++++++++++++++------------ src/common/model_iden.rs | 12 ++++++++++-- 2 files changed, 28 insertions(+), 14 deletions(-) diff --git a/src/client/model_spec.rs b/src/client/model_spec.rs index 98ec9870..9aac7a25 100644 --- a/src/client/model_spec.rs +++ b/src/client/model_spec.rs @@ -1,5 +1,4 @@ -use crate::adapter::AdapterKind; -use crate::{ModelIden, ServiceTarget}; +use crate::{ModelIden, ModelName, ServiceTarget}; /// Specifies how to identify and resolve a model for API calls. /// @@ -37,8 +36,8 @@ use crate::{ModelIden, ServiceTarget}; /// ``` #[derive(Debug, Clone)] pub enum ModelSpec { - /// Model name string - adapter kind is inferred, auth/endpoint resolved via config. - Name(String), + /// Model name - without or without model namespace + Name(ModelName), /// Explicit [`ModelIden`] - skips adapter inference, still resolves auth/endpoint. Iden(ModelIden), @@ -51,13 +50,20 @@ pub enum ModelSpec { impl ModelSpec { /// Creates a `ModelSpec::Name` from a string. - pub fn from_model_name(name: impl Into) -> Self { + pub fn from_name(name: impl Into) -> Self { ModelSpec::Name(name.into()) } - /// Creates a `ModelSpec::Iden` from adapter kind and model name. - pub fn from_model(adapter_kind: AdapterKind, model_name: impl Into) -> Self { - ModelSpec::Iden(ModelIden::new(adapter_kind, model_name.into())) + /// Creates a `ModelSpec::Name` from a static str. + pub fn from_static_name(name: &'static str) -> Self { + let name = ModelName::from_static(name); + ModelSpec::Name(name) + } + + /// Creates a `ModelSpec::Iden` from a ModelIden + pub fn from_iden(model_iden: impl Into) -> Self { + let model_iden = model_iden.into(); + Self::Iden(model_iden) } /// Creates a `ModelSpec::Target` from a complete service target. @@ -72,25 +78,25 @@ impl ModelSpec { impl From<&str> for ModelSpec { fn from(name: &str) -> Self { - ModelSpec::Name(name.to_string()) + ModelSpec::Name(name.into()) } } impl From<&&str> for ModelSpec { fn from(name: &&str) -> Self { - ModelSpec::Name((*name).to_string()) + ModelSpec::Name((*name).into()) } } impl From for ModelSpec { fn from(name: String) -> Self { - ModelSpec::Name(name) + ModelSpec::Name(name.into()) } } impl From<&String> for ModelSpec { fn from(name: &String) -> Self { - ModelSpec::Name(name.clone()) + ModelSpec::Name(name.into()) } } diff --git a/src/common/model_iden.rs b/src/common/model_iden.rs index 6f5d3165..ed40c64e 100644 --- a/src/common/model_iden.rs +++ b/src/common/model_iden.rs @@ -20,12 +20,20 @@ pub struct ModelIden { /// Contructor impl ModelIden { /// Create a new `ModelIden` with the given adapter kind and model name. - pub fn new(adapter_kind: AdapterKind, model_name: impl Into) -> Self { + pub fn new(adapter_kind: impl Into, model_name: impl Into) -> Self { Self { - adapter_kind, + adapter_kind: adapter_kind.into(), model_name: model_name.into(), } } + + /// Create a new `ModelIden` with the given adapter kind and model name. + pub fn from_static(adapter_kind: impl Into, model_name: &'static str) -> Self { + Self { + adapter_kind: adapter_kind.into(), + model_name: ModelName::from_static(model_name), + } + } } impl ModelIden { From de31e443d88e88b863b6eee189a7e1436cf862b1 Mon Sep 17 00:00:00 2001 From: Jeremy Chone Date: Sat, 31 Jan 2026 16:52:31 -0800 Subject: [PATCH 119/123] + example - c06-model-spec --- examples/c06-model-spec.rs | 66 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 66 insertions(+) create mode 100644 examples/c06-model-spec.rs diff --git a/examples/c06-model-spec.rs b/examples/c06-model-spec.rs new file mode 100644 index 00000000..518cdd0e --- /dev/null +++ b/examples/c06-model-spec.rs @@ -0,0 +1,66 @@ +//! This example shows how to use a custom AdapterKindResolver to have some custom +//! mapping from a model name to an AdapterKind. +//! This allows mapping missing models to their Adapter implementations. + +use genai::adapter::AdapterKind; +use genai::chat::{ChatMessage, ChatRequest}; +use genai::resolver::{AuthData, Endpoint}; +use genai::{Client, ModelIden, ModelSpec, ServiceTarget}; +use tracing_subscriber::EnvFilter; + +pub enum AppModel { + Fast, + Pro, + Local, + Custom(String), +} + +impl From<&AppModel> for ModelSpec { + fn from(model: &AppModel) -> Self { + match model { + AppModel::Fast => ModelSpec::from_static_name("gemini-3-flash-preview"), + + // ModelName will be Arc (use `ModelIden::from_static(..) for micro optimization) + AppModel::Pro => ModelSpec::from_iden((AdapterKind::Anthropic, "claude-opus-4-5")), + + AppModel::Local => ModelSpec::Target(ServiceTarget { + model: ModelIden::from_static(AdapterKind::Ollama, "gemma3:1b"), + endpoint: Endpoint::from_static("http://localhost:11434"), + auth: AuthData::Key("".to_string()), + }), + + AppModel::Custom(name) => ModelSpec::from_name(name), + } + } +} + +#[tokio::main] +async fn main() -> Result<(), Box> { + tracing_subscriber::fmt() + .with_env_filter(EnvFilter::new("genai=debug")) + // .with_max_level(tracing::Level::DEBUG) // To enable all sub-library tracing + .init(); + + // -- Model Spec (unselect one below) + let model_spec = AppModel::Fast; + // let model_spec = AppModel::Custom("gpt-5.2".to_string()); + + let question = "Why is the sky red? (be concise)"; + + // -- Build the new client with this client_config + let client = Client::default(); + + // -- Build the chat request + let chat_req = ChatRequest::new(vec![ChatMessage::user(question)]); + + // -- Execute and print + println!("\n--- Question:\n{question}"); + let chat_res = client.exec_chat(&model_spec, chat_req.clone(), None).await?; + + let model_iden = chat_res.model_iden; + let res_txt = chat_res.content.into_joined_texts().ok_or("Should have some response")?; + + println!("\n--- Answer: ({model_iden})\n{res_txt}"); + + Ok(()) +} From 2491f618d427b726d852621e5d5e4ca557e100e1 Mon Sep 17 00:00:00 2001 From: Jeremy Chone Date: Sat, 31 Jan 2026 17:03:26 -0800 Subject: [PATCH 120/123] ^ ModelSpec - implement missing From ModelName --- src/client/model_spec.rs | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/src/client/model_spec.rs b/src/client/model_spec.rs index 9aac7a25..2641eded 100644 --- a/src/client/model_spec.rs +++ b/src/client/model_spec.rs @@ -100,12 +100,30 @@ impl From<&String> for ModelSpec { } } +impl From for ModelSpec { + fn from(model: ModelName) -> Self { + ModelSpec::Name(model) + } +} + +impl From<&ModelName> for ModelSpec { + fn from(model: &ModelName) -> Self { + ModelSpec::Name(model.clone()) + } +} + impl From for ModelSpec { fn from(model: ModelIden) -> Self { ModelSpec::Iden(model) } } +impl From<&ModelIden> for ModelSpec { + fn from(model: &ModelIden) -> Self { + ModelSpec::Iden(model.clone()) + } +} + impl From for ModelSpec { fn from(target: ServiceTarget) -> Self { ModelSpec::Target(target) From 4b83c8ebf0a6d64c878ede40e8a32d3e2da0fe3e Mon Sep 17 00:00:00 2001 From: Jeremy Chone Date: Sat, 31 Jan 2026 17:52:04 -0800 Subject: [PATCH 121/123] . v0.6.0-alpha.1 --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 1009f3ef..212ae30b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "genai" -version = "0.6.0-alpha.1-WIP" +version = "0.6.0-alpha.1" edition = "2024" license = "MIT OR Apache-2.0" description = "Multi-AI Providers Library for Rust. (OpenAI, Gemini, Anthropic, xAI, Ollama, Groq, DeepSeek, Grok)" From b3fe8121158e64e19498d55d3a1be74ee88785d1 Mon Sep 17 00:00:00 2001 From: Jeremy Chone Date: Sat, 31 Jan 2026 17:53:15 -0800 Subject: [PATCH 122/123] . 0.6.0-alpha.2-WIP --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 212ae30b..9866f4f3 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "genai" -version = "0.6.0-alpha.1" +version = "0.6.0-alpha.2-WIP" edition = "2024" license = "MIT OR Apache-2.0" description = "Multi-AI Providers Library for Rust. (OpenAI, Gemini, Anthropic, xAI, Ollama, Groq, DeepSeek, Grok)" From ba1f4efc9ac016ec17f4cf57b88f7b35518d66f3 Mon Sep 17 00:00:00 2001 From: Alex Mikhalev Date: Fri, 6 Feb 2026 21:24:02 +0100 Subject: [PATCH 123/123] style: fix formatting issues post-merge Apply cargo fmt to fix minor formatting inconsistencies in: - src/chat/tool/tool_response.rs (extra blank line) - tests/test_verify_model_lists.rs (println formatting) - tests/test_zai_adapter.rs (import order) Co-Authored-By: Terraphim AI --- src/chat/tool/tool_response.rs | 1 - tests/test_verify_model_lists.rs | 5 ++++- tests/test_zai_adapter.rs | 2 +- 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/src/chat/tool/tool_response.rs b/src/chat/tool/tool_response.rs index 25ba5166..288f292e 100644 --- a/src/chat/tool/tool_response.rs +++ b/src/chat/tool/tool_response.rs @@ -43,4 +43,3 @@ impl ToolResponse { &self.content } } - diff --git a/tests/test_verify_model_lists.rs b/tests/test_verify_model_lists.rs index d8d580a3..5d06d590 100644 --- a/tests/test_verify_model_lists.rs +++ b/tests/test_verify_model_lists.rs @@ -137,7 +137,10 @@ async fn test_model_resolution_edge_cases() -> Result<(), Box {}", model, actual_adapter); } else { - println!(" [FAIL] {} -> {} (expected {})", model, actual_adapter, expected_adapter); + println!( + " [FAIL] {} -> {} (expected {})", + model, actual_adapter, expected_adapter + ); all_passed = false; } } diff --git a/tests/test_zai_adapter.rs b/tests/test_zai_adapter.rs index 845c5e37..c43b5b67 100644 --- a/tests/test_zai_adapter.rs +++ b/tests/test_zai_adapter.rs @@ -1,8 +1,8 @@ //! Test for Z.AI adapter support (upstream v0.6.0-alpha.2) //! Note: Adapter is now named "Zai" (not "ZAi") -use genai::chat::{ChatMessage, ChatRequest}; use genai::Client; +use genai::chat::{ChatMessage, ChatRequest}; #[tokio::test] async fn test_zai_model_resolution() -> Result<(), Box> {