From 98e2a06d5a61c4599082ce726f91d38f3565f202 Mon Sep 17 00:00:00 2001
From: ekeith <55766816+evanmkeith@users.noreply.github.com>
Date: Wed, 1 Apr 2026 10:26:44 -0700
Subject: [PATCH] fix(lingua): skip content ToolUse blocks when tool_calls is
 non-empty
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

  ## Summary

  LangGraph stores AIMessages in graph state with two representations of the
  same tool call: `content[].tool_use` (Anthropic/Bedrock provider format) and
  `tool_calls[]` (LangChain-native format). `parse_assistant_content` processed
  both without deduplication, emitting two `ToolCall` parts that rendered as
  duplicate entries in the LLM view tab.

  ## Change

  In `parse_assistant_content` (`langchain.rs`), skip `LangChainContentPartCompat::ToolUse`
  blocks in the `content[]` branch when `tool_calls` is non-empty. The
  `tool_calls[]` array is the canonical source in this case. Text parts in
  `content[]` continue to be processed normally.

  The early-return path (when `tool_calls.is_empty() == true`) is unchanged —
  providers that only populate `content[]` are unaffected.

  ## Notes

  - Visual-only bug. Raw span data in brainstore is unaffected.
  - Workaround for affected users: Thread view or JSON view.
  - Related: the surviving entry's arguments may still show a
    `$serde_json::private::Number` artifact for integer values due to a
    `serde_wasm_bindgen` / `serde_json::Value` serialization issue — tracked
    separately.

  ## Testing

  - [ ] Existing Lingua tests pass (`cargo test`)
  - [ ] New test: `parse_assistant_content` with dual-representation input
    produces exactly one `ToolCall` part
  - [ ] End-to-end: reproduction script at
    `pylon-13952/reproduce_tool_call_duplication.py` shows tool call once in
    LLM view tab

  Fixes Pylon #13952 / BT-4608
---
 .../lingua/src/processing/import/langchain.rs | 106 ++++++++++++++++--
 ...ggraph-dual-tool-call-repr.assertions.json |   8 ++
 ...n-langgraph-dual-tool-call-repr.spans.json |  40 +++++++
 3 files changed, 144 insertions(+), 10 deletions(-)
 create mode 100644 payloads/import-cases/langchain-langgraph-dual-tool-call-repr.assertions.json
 create mode 100644 payloads/import-cases/langchain-langgraph-dual-tool-call-repr.spans.json

diff --git a/crates/lingua/src/processing/import/langchain.rs b/crates/lingua/src/processing/import/langchain.rs
index c0453296..321c3b0c 100644
--- a/crates/lingua/src/processing/import/langchain.rs
+++ b/crates/lingua/src/processing/import/langchain.rs
@@ -408,16 +408,10 @@ fn parse_assistant_content(
                             provider_options: None,
                         }));
                     }
-                    LangChainContentPartCompat::ToolUse { id, name, input } => {
-                        parts.push(AssistantContentPart::ToolCall {
-                            tool_call_id: id,
-                            tool_name: name,
-                            arguments: parse_tool_call_arguments(Some(input)),
-                            encrypted_content: None,
-                            provider_options: None,
-                            provider_executed: None,
-                        });
-                    }
+                    // When tool_calls is non-empty, ToolUse blocks in content[] are the
+                    // same tool calls in provider format (Anthropic/Bedrock). Skip them
+                    // here — they are already captured by the tool_calls loop below.
+                    LangChainContentPartCompat::ToolUse { .. } => {}
                     _ => {}
                 }
             }
@@ -541,3 +535,95 @@ pub(crate) fn try_parse_langchain_for_import(data: &Value) -> Option<Vec<Message
     }
     try_parse_output_shape(data)
 }
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::universal::{AssistantContent, AssistantContentPart, Message, ToolCallArguments};
+
+    /// LangGraph (Anthropic/Bedrock) populates both `content[{type:"tool_use",...}]` and
+    /// `tool_calls[{id,name,args,...}]` on the same AIMessage. The importer must emit
+    /// exactly one ToolCall part — not two.
+    #[test]
+    fn test_no_duplicate_tool_calls_when_both_content_and_tool_calls_present() {
+        let input = crate::serde_json::json!([{
+            "type": "AIMessage",
+            "content": [{"type": "tool_use", "id": "tooluse_abc", "name": "get_weather", "input": {"city": "Paris", "days": 5}}],
+            "tool_calls": [{"id": "tooluse_abc", "name": "get_weather", "args": {"city": "Paris", "days": 5}}]
+        }]);
+
+        let messages = try_parse_langchain_for_import(&input).expect("should parse successfully");
+        assert_eq!(messages.len(), 1);
+
+        let Message::Assistant { content, .. } = &messages[0] else {
+            panic!("expected assistant message, got {:?}", messages[0]);
+        };
+
+        let AssistantContent::Array(parts) = content else {
+            panic!("expected array content, got {:?}", content);
+        };
+
+        let tool_call_parts: Vec<_> = parts
+            .iter()
+            .filter(|p| matches!(p, AssistantContentPart::ToolCall { .. }))
+            .collect();
+
+        assert_eq!(
+            tool_call_parts.len(),
+            1,
+            "expected exactly one ToolCall part, got {}: {:?}",
+            tool_call_parts.len(),
+            tool_call_parts
+        );
+
+        let AssistantContentPart::ToolCall { tool_call_id, tool_name, arguments, .. } =
+            tool_call_parts[0]
+        else {
+            unreachable!()
+        };
+
+        assert_eq!(tool_call_id, "tooluse_abc");
+        assert_eq!(tool_name, "get_weather");
+        assert!(
+            matches!(arguments, ToolCallArguments::Valid(_)),
+            "expected Valid arguments"
+        );
+    }
+
+    /// When tool_calls is non-empty but content also has a text preamble, the text
+    /// part should still be included alongside the single tool call.
+    #[test]
+    fn test_text_preamble_preserved_with_tool_calls() {
+        let input = crate::serde_json::json!([{
+            "type": "AIMessage",
+            "content": [
+                {"type": "text", "text": "Let me check the weather for you."},
+                {"type": "tool_use", "id": "tooluse_xyz", "name": "get_weather", "input": {"city": "London"}}
+            ],
+            "tool_calls": [{"id": "tooluse_xyz", "name": "get_weather", "args": {"city": "London"}}]
+        }]);
+
+        let messages = try_parse_langchain_for_import(&input).expect("should parse successfully");
+        assert_eq!(messages.len(), 1);
+
+        let Message::Assistant { content, .. } = &messages[0] else {
+            panic!("expected assistant message");
+        };
+
+        let AssistantContent::Array(parts) = content else {
+            panic!("expected array content");
+        };
+
+        let text_parts: Vec<_> = parts
+            .iter()
+            .filter(|p| matches!(p, AssistantContentPart::Text(_)))
+            .collect();
+        let tool_call_parts: Vec<_> = parts
+            .iter()
+            .filter(|p| matches!(p, AssistantContentPart::ToolCall { .. }))
+            .collect();
+
+        assert_eq!(text_parts.len(), 1, "expected one text part");
+        assert_eq!(tool_call_parts.len(), 1, "expected one tool call part");
+    }
+}
diff --git a/payloads/import-cases/langchain-langgraph-dual-tool-call-repr.assertions.json b/payloads/import-cases/langchain-langgraph-dual-tool-call-repr.assertions.json
new file mode 100644
index 00000000..8a38b7de
--- /dev/null
+++ b/payloads/import-cases/langchain-langgraph-dual-tool-call-repr.assertions.json
@@ -0,0 +1,8 @@
+{
+  "expectedMessageCount": 2,
+  "expectedRolesInOrder": [
+    "user",
+    "assistant"
+  ],
+  "mustContainText": ["tooluse_abc123", "get_weather", "Paris"]
+}
diff --git a/payloads/import-cases/langchain-langgraph-dual-tool-call-repr.spans.json b/payloads/import-cases/langchain-langgraph-dual-tool-call-repr.spans.json
new file mode 100644
index 00000000..047559f5
--- /dev/null
+++ b/payloads/import-cases/langchain-langgraph-dual-tool-call-repr.spans.json
@@ -0,0 +1,40 @@
+[
+  {
+    "input": {
+      "messages": [
+        {
+          "content": "What's the weather in Paris for the next 5 days?",
+          "type": "human"
+        }
+      ]
+    },
+    "output": {
+      "messages": [
+        {
+          "content": [
+            {
+              "type": "tool_use",
+              "id": "tooluse_abc123",
+              "name": "get_weather",
+              "input": {"city": "Paris", "days": 5}
+            }
+          ],
+          "tool_calls": [
+            {
+              "id": "tooluse_abc123",
+              "name": "get_weather",
+              "args": {
+                "city": "Paris",
+                "days": 5
+              },
+              "type": "tool_call"
+            }
+          ],
+          "type": "ai"
+        }
+      ]
+    },
+    "metadata": {"braintrust": {"integration_name": "langchain-py"}}
+  }
+]
+