fix: capture text from intermediate tool_use iterations

gitoschina · gitoschina · commit 33c3bf1998b6 · 2026-04-06T11:04:32.000+08:00
RightNow-AI#989
diff --git a/crates/openfang-runtime/src/agent_loop.rs b/crates/openfang-runtime/src/agent_loop.rs
@@ -333,6 +333,11 @@ pub async fn run_agent_loop(
     let mut total_usage = TokenUsage::default();
     let final_response;
 
+    // Accumulate text from intermediate iterations (tool_use turns may include text
+    // alongside tool calls — this text would otherwise be lost when the final
+    // EndTurn iteration has empty text).
+    let mut accumulated_text = String::new();
+
     // Safety valve: trim excessively long message histories to prevent context overflow.
     // The full compaction system handles sophisticated summarization, but this prevents
     // the catastrophic case where 200+ messages cause instant context overflow.
@@ -518,19 +523,29 @@ pub async fn run_agent_loop(
                 }
 
                 // Guard against empty response — covers both iteration 0 and post-tool cycles
+                // Use accumulated_text from intermediate tool_use iterations as fallback.
                 let text = if text.trim().is_empty() {
-                    warn!(
-                        agent = %manifest.name,
-                        iteration,
-                        input_tokens = total_usage.input_tokens,
-                        output_tokens = total_usage.output_tokens,
-                        messages_count = messages.len(),
-                        "Empty response from LLM — guard activated"
-                    );
-                    if any_tools_executed {
-                        "[Task completed — the agent executed tools but did not produce a text summary.]".to_string()
+                    if !accumulated_text.is_empty() {
+                        debug!(
+                            agent = %manifest.name,
+                            accumulated_len = accumulated_text.len(),
+                            "Using accumulated text from intermediate tool_use iterations"
+                        );
+                        accumulated_text.clone()
                     } else {
-                        "[The model returned an empty response. This usually means the model is overloaded, the context is too large, or the API key lacks credits. Try again or check /status.]".to_string()
+                        warn!(
+                            agent = %manifest.name,
+                            iteration,
+                            input_tokens = total_usage.input_tokens,
+                            output_tokens = total_usage.output_tokens,
+                            messages_count = messages.len(),
+                            "Empty response from LLM — guard activated"
+                        );
+                        if any_tools_executed {
+                            "[Task completed — the agent executed tools but did not produce a text summary.]".to_string()
+                        } else {
+                            "[The model returned an empty response. This usually means the model is overloaded, the context is too large, or the API key lacks credits. Try again or check /status.]".to_string()
+                        }
                     }
                 } else {
                     text
@@ -651,6 +666,18 @@ pub async fn run_agent_loop(
                 consecutive_max_tokens = 0;
                 any_tools_executed = true;
 
+                // Capture any text content from this tool_use turn — the LLM may
+                // produce text alongside tool calls (e.g., a message to the user
+                // before calling memory_store). Without this, the text is lost if
+                // the next iteration returns EndTurn with empty text.
+                let intermediate_text = response.text();
+                if !intermediate_text.trim().is_empty() {
+                    if !accumulated_text.is_empty() {
+                        accumulated_text.push_str("\n\n");
+                    }
+                    accumulated_text.push_str(intermediate_text.trim());
+                }
+
                 // Execute tool calls
                 let assistant_blocks = response.content.clone();
 
@@ -1352,6 +1379,7 @@ pub async fn run_agent_loop_streaming(
 
     let mut total_usage = TokenUsage::default();
     let final_response;
+    let mut accumulated_text = String::new();
 
     // Safety valve: trim excessively long message histories to prevent context overflow.
     if messages.len() > MAX_HISTORY_MESSAGES {
@@ -1549,20 +1577,29 @@ pub async fn run_agent_loop_streaming(
                     }
                 }
 
-                // Guard against empty response — covers both iteration 0 and post-tool cycles
+                // Guard against empty response — use accumulated text as fallback (streaming).
                 let text = if text.trim().is_empty() {
-                    warn!(
-                        agent = %manifest.name,
-                        iteration,
-                        input_tokens = total_usage.input_tokens,
-                        output_tokens = total_usage.output_tokens,
-                        messages_count = messages.len(),
-                        "Empty response from LLM (streaming) — guard activated"
-                    );
-                    if any_tools_executed {
-                        "[Task completed — the agent executed tools but did not produce a text summary.]".to_string()
+                    if !accumulated_text.is_empty() {
+                        debug!(
+                            agent = %manifest.name,
+                            accumulated_len = accumulated_text.len(),
+                            "Using accumulated text from intermediate tool_use iterations (streaming)"
+                        );
+                        accumulated_text.clone()
                     } else {
-                        "[The model returned an empty response. This usually means the model is overloaded, the context is too large, or the API key lacks credits. Try again or check /status.]".to_string()
+                        warn!(
+                            agent = %manifest.name,
+                            iteration,
+                            input_tokens = total_usage.input_tokens,
+                            output_tokens = total_usage.output_tokens,
+                            messages_count = messages.len(),
+                            "Empty response from LLM (streaming) — guard activated"
+                        );
+                        if any_tools_executed {
+                            "[Task completed — the agent executed tools but did not produce a text summary.]".to_string()
+                        } else {
+                            "[The model returned an empty response. This usually means the model is overloaded, the context is too large, or the API key lacks credits. Try again or check /status.]".to_string()
+                        }
                     }
                 } else {
                     text
@@ -1662,6 +1699,15 @@ pub async fn run_agent_loop_streaming(
                 consecutive_max_tokens = 0;
                 any_tools_executed = true;
 
+                // Capture text from intermediate tool_use turns (streaming path).
+                let intermediate_text = response.text();
+                if !intermediate_text.trim().is_empty() {
+                    if !accumulated_text.is_empty() {
+                        accumulated_text.push_str("\n\n");
+                    }
+                    accumulated_text.push_str(intermediate_text.trim());
+                }
+
                 let assistant_blocks = response.content.clone();
 
                 session.messages.push(Message {