From 98e2a06d5a61c4599082ce726f91d38f3565f202 Mon Sep 17 00:00:00 2001 From: ekeith <55766816+evanmkeith@users.noreply.github.com> Date: Wed, 1 Apr 2026 10:26:44 -0700 Subject: [PATCH] fix(lingua): skip content ToolUse blocks when tool_calls is non-empty MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## Summary LangGraph stores AIMessages in graph state with two representations of the same tool call: `content[].tool_use` (Anthropic/Bedrock provider format) and `tool_calls[]` (LangChain-native format). `parse_assistant_content` processed both without deduplication, emitting two `ToolCall` parts that rendered as duplicate entries in the LLM view tab. ## Change In `parse_assistant_content` (`langchain.rs`), skip `LangChainContentPartCompat::ToolUse` blocks in the `content[]` branch when `tool_calls` is non-empty. The `tool_calls[]` array is the canonical source in this case. Text parts in `content[]` continue to be processed normally. The early-return path (when `tool_calls.is_empty() == true`) is unchanged — providers that only populate `content[]` are unaffected. ## Notes - Visual-only bug. Raw span data in brainstore is unaffected. - Workaround for affected users: Thread view or JSON view. - Related: the surviving entry's arguments may still show a `$serde_json::private::Number` artifact for integer values due to a `serde_wasm_bindgen` / `serde_json::Value` serialization issue — tracked separately. ## Testing - [ ] Existing Lingua tests pass (`cargo test`) - [ ] New test: `parse_assistant_content` with dual-representation input produces exactly one `ToolCall` part - [ ] End-to-end: reproduction script at `pylon-13952/reproduce_tool_call_duplication.py` shows tool call once in LLM view tab Fixes Pylon #13952 / BT-4608 --- .../lingua/src/processing/import/langchain.rs | 106 ++++++++++++++++-- ...ggraph-dual-tool-call-repr.assertions.json | 8 ++ ...n-langgraph-dual-tool-call-repr.spans.json | 40 +++++++ 3 files changed, 144 insertions(+), 10 deletions(-) create mode 100644 payloads/import-cases/langchain-langgraph-dual-tool-call-repr.assertions.json create mode 100644 payloads/import-cases/langchain-langgraph-dual-tool-call-repr.spans.json diff --git a/crates/lingua/src/processing/import/langchain.rs b/crates/lingua/src/processing/import/langchain.rs index c0453296..321c3b0c 100644 --- a/crates/lingua/src/processing/import/langchain.rs +++ b/crates/lingua/src/processing/import/langchain.rs @@ -408,16 +408,10 @@ fn parse_assistant_content( provider_options: None, })); } - LangChainContentPartCompat::ToolUse { id, name, input } => { - parts.push(AssistantContentPart::ToolCall { - tool_call_id: id, - tool_name: name, - arguments: parse_tool_call_arguments(Some(input)), - encrypted_content: None, - provider_options: None, - provider_executed: None, - }); - } + // When tool_calls is non-empty, ToolUse blocks in content[] are the + // same tool calls in provider format (Anthropic/Bedrock). Skip them + // here — they are already captured by the tool_calls loop below. + LangChainContentPartCompat::ToolUse { .. } => {} _ => {} } } @@ -541,3 +535,95 @@ pub(crate) fn try_parse_langchain_for_import(data: &Value) -> Option = parts + .iter() + .filter(|p| matches!(p, AssistantContentPart::ToolCall { .. })) + .collect(); + + assert_eq!( + tool_call_parts.len(), + 1, + "expected exactly one ToolCall part, got {}: {:?}", + tool_call_parts.len(), + tool_call_parts + ); + + let AssistantContentPart::ToolCall { tool_call_id, tool_name, arguments, .. } = + tool_call_parts[0] + else { + unreachable!() + }; + + assert_eq!(tool_call_id, "tooluse_abc"); + assert_eq!(tool_name, "get_weather"); + assert!( + matches!(arguments, ToolCallArguments::Valid(_)), + "expected Valid arguments" + ); + } + + /// When tool_calls is non-empty but content also has a text preamble, the text + /// part should still be included alongside the single tool call. + #[test] + fn test_text_preamble_preserved_with_tool_calls() { + let input = crate::serde_json::json!([{ + "type": "AIMessage", + "content": [ + {"type": "text", "text": "Let me check the weather for you."}, + {"type": "tool_use", "id": "tooluse_xyz", "name": "get_weather", "input": {"city": "London"}} + ], + "tool_calls": [{"id": "tooluse_xyz", "name": "get_weather", "args": {"city": "London"}}] + }]); + + let messages = try_parse_langchain_for_import(&input).expect("should parse successfully"); + assert_eq!(messages.len(), 1); + + let Message::Assistant { content, .. } = &messages[0] else { + panic!("expected assistant message"); + }; + + let AssistantContent::Array(parts) = content else { + panic!("expected array content"); + }; + + let text_parts: Vec<_> = parts + .iter() + .filter(|p| matches!(p, AssistantContentPart::Text(_))) + .collect(); + let tool_call_parts: Vec<_> = parts + .iter() + .filter(|p| matches!(p, AssistantContentPart::ToolCall { .. })) + .collect(); + + assert_eq!(text_parts.len(), 1, "expected one text part"); + assert_eq!(tool_call_parts.len(), 1, "expected one tool call part"); + } +} diff --git a/payloads/import-cases/langchain-langgraph-dual-tool-call-repr.assertions.json b/payloads/import-cases/langchain-langgraph-dual-tool-call-repr.assertions.json new file mode 100644 index 00000000..8a38b7de --- /dev/null +++ b/payloads/import-cases/langchain-langgraph-dual-tool-call-repr.assertions.json @@ -0,0 +1,8 @@ +{ + "expectedMessageCount": 2, + "expectedRolesInOrder": [ + "user", + "assistant" + ], + "mustContainText": ["tooluse_abc123", "get_weather", "Paris"] +} diff --git a/payloads/import-cases/langchain-langgraph-dual-tool-call-repr.spans.json b/payloads/import-cases/langchain-langgraph-dual-tool-call-repr.spans.json new file mode 100644 index 00000000..047559f5 --- /dev/null +++ b/payloads/import-cases/langchain-langgraph-dual-tool-call-repr.spans.json @@ -0,0 +1,40 @@ +[ + { + "input": { + "messages": [ + { + "content": "What's the weather in Paris for the next 5 days?", + "type": "human" + } + ] + }, + "output": { + "messages": [ + { + "content": [ + { + "type": "tool_use", + "id": "tooluse_abc123", + "name": "get_weather", + "input": {"city": "Paris", "days": 5} + } + ], + "tool_calls": [ + { + "id": "tooluse_abc123", + "name": "get_weather", + "args": { + "city": "Paris", + "days": 5 + }, + "type": "tool_call" + } + ], + "type": "ai" + } + ] + }, + "metadata": {"braintrust": {"integration_name": "langchain-py"}} + } +] +