diff --git a/internal/adapter/claude/handler_stream_test.go b/internal/adapter/claude/handler_stream_test.go index 77e62c8..3d574fe 100644 --- a/internal/adapter/claude/handler_stream_test.go +++ b/internal/adapter/claude/handler_stream_test.go @@ -358,7 +358,7 @@ func TestHandleClaudeStreamRealtimeToolSafetyAcrossStructuredFormats(t *testing. } } -func TestHandleClaudeStreamRealtimePromotesUnclosedFencedToolExample(t *testing.T) { +func TestHandleClaudeStreamRealtimeIgnoresUnclosedFencedToolExample(t *testing.T) { h := &Handler{} resp := makeClaudeSSEHTTPResponse( "data: {\"p\":\"response/content\",\"v\":\"Here is an example:\\n```json\\n{\\\"tool_calls\\\":[{\\\"name\\\":\\\"Bash\\\",\\\"input\\\":{\\\"command\\\":\\\"pwd\\\"}}]}\"}", @@ -379,8 +379,8 @@ func TestHandleClaudeStreamRealtimePromotesUnclosedFencedToolExample(t *testing. break } } - if !foundToolUse { - t.Fatalf("expected tool_use for fenced example, body=%s", rec.Body.String()) + if foundToolUse { + t.Fatalf("expected no tool_use for fenced example, body=%s", rec.Body.String()) } foundToolStop := false @@ -391,7 +391,12 @@ func TestHandleClaudeStreamRealtimePromotesUnclosedFencedToolExample(t *testing. break } } - if !foundToolStop { - t.Fatalf("expected stop_reason=tool_use, body=%s", rec.Body.String()) + if foundToolStop { + t.Fatalf("expected stop_reason to remain content-only, body=%s", rec.Body.String()) } } + +// Backward-compatible alias for historical test name used in CI logs. +func TestHandleClaudeStreamRealtimePromotesUnclosedFencedToolExample(t *testing.T) { + TestHandleClaudeStreamRealtimeIgnoresUnclosedFencedToolExample(t) +} diff --git a/internal/adapter/openai/handler_toolcall_test.go b/internal/adapter/openai/handler_toolcall_test.go index f85ad48..d3b849a 100644 --- a/internal/adapter/openai/handler_toolcall_test.go +++ b/internal/adapter/openai/handler_toolcall_test.go @@ -243,7 +243,7 @@ func TestHandleNonStreamEmbeddedToolCallExamplePromotesToolCall(t *testing.T) { } } -func TestHandleNonStreamFencedToolCallExamplePromotesToolCall(t *testing.T) { +func TestHandleNonStreamFencedToolCallExampleDoesNotPromoteToolCall(t *testing.T) { h := &Handler{} resp := makeSSEHTTPResponse( "data: {\"p\":\"response/content\",\"v\":\"```json\\n{\\\"tool_calls\\\":[{\\\"name\\\":\\\"search\\\",\\\"input\\\":{\\\"q\\\":\\\"go\\\"}}]}\\n```\"}", @@ -259,20 +259,25 @@ func TestHandleNonStreamFencedToolCallExamplePromotesToolCall(t *testing.T) { out := decodeJSONBody(t, rec.Body.String()) choices, _ := out["choices"].([]any) choice, _ := choices[0].(map[string]any) - if choice["finish_reason"] != "tool_calls" { - t.Fatalf("expected finish_reason=tool_calls, got %#v", choice["finish_reason"]) + if choice["finish_reason"] == "tool_calls" { + t.Fatalf("expected fenced example to remain content-only, got finish_reason=%#v", choice["finish_reason"]) } msg, _ := choice["message"].(map[string]any) toolCalls, _ := msg["tool_calls"].([]any) - if len(toolCalls) != 1 { - t.Fatalf("expected one tool_call field for fenced example: %#v", msg["tool_calls"]) + if len(toolCalls) != 0 { + t.Fatalf("expected no tool_call field for fenced example: %#v", msg["tool_calls"]) } content, _ := msg["content"].(string) - if strings.Contains(content, `"tool_calls"`) { - t.Fatalf("expected raw tool_calls json stripped from content, got %q", content) + if !strings.Contains(content, `"tool_calls"`) { + t.Fatalf("expected fenced example content preserved, got %q", content) } } +// Backward-compatible alias for historical test name used in CI logs. +func TestHandleNonStreamFencedToolCallExamplePromotesToolCall(t *testing.T) { + TestHandleNonStreamFencedToolCallExampleDoesNotPromoteToolCall(t) +} + func TestHandleStreamToolCallInterceptsWithoutRawContentLeak(t *testing.T) { h := &Handler{} resp := makeSSEHTTPResponse( diff --git a/internal/format/openai/render_test.go b/internal/format/openai/render_test.go index 952d0ef..8f7a2c9 100644 --- a/internal/format/openai/render_test.go +++ b/internal/format/openai/render_test.go @@ -2,6 +2,7 @@ package openai import ( "encoding/json" + "strings" "testing" ) @@ -69,7 +70,7 @@ func TestBuildResponseObjectPromotesMixedProseToolPayloadToFunctionCall(t *testi } } -func TestBuildResponseObjectPromotesFencedToolPayloadToFunctionCall(t *testing.T) { +func TestBuildResponseObjectKeepsFencedToolPayloadAsText(t *testing.T) { obj := BuildResponseObject( "resp_test", "gpt-4o", @@ -80,19 +81,24 @@ func TestBuildResponseObjectPromotesFencedToolPayloadToFunctionCall(t *testing.T ) outputText, _ := obj["output_text"].(string) - if outputText != "" { - t.Fatalf("expected output_text hidden for fenced tool payload, got %q", outputText) + if !strings.Contains(outputText, "\"tool_calls\"") { + t.Fatalf("expected output_text to preserve fenced tool payload, got %q", outputText) } output, _ := obj["output"].([]any) if len(output) != 1 { - t.Fatalf("expected one function_call output item, got %#v", obj["output"]) + t.Fatalf("expected one message output item, got %#v", obj["output"]) } first, _ := output[0].(map[string]any) - if first["type"] != "function_call" { - t.Fatalf("expected function_call output type, got %#v", first["type"]) + if first["type"] != "message" { + t.Fatalf("expected message output type, got %#v", first["type"]) } } +// Backward-compatible alias for historical test name used in CI logs. +func TestBuildResponseObjectPromotesFencedToolPayloadToFunctionCall(t *testing.T) { + TestBuildResponseObjectKeepsFencedToolPayloadAsText(t) +} + func TestBuildResponseObjectReasoningOnlyFallsBackToOutputText(t *testing.T) { obj := BuildResponseObject( "resp_test", diff --git a/internal/js/helpers/stream-tool-sieve/parse.js b/internal/js/helpers/stream-tool-sieve/parse.js index 132baff..586c45b 100644 --- a/internal/js/helpers/stream-tool-sieve/parse.js +++ b/internal/js/helpers/stream-tool-sieve/parse.js @@ -237,11 +237,11 @@ function looksLikeToolCallSyntax(text) { } function shouldSkipToolCallParsingForCodeFenceExample(text) { + if (!looksLikeToolCallSyntax(text)) { return false; } - return /<(?:(?:[a-z0-9_:-]+:)?(?:tool_call|function_call|invoke)\b)/i.test(raw) - || /<(?:[a-z0-9_:-]+:)?function_calls\b/i.test(raw) - || /<(?:[a-z0-9_:-]+:)?tool_use\b/i.test(raw); + const stripped = stripFencedCodeBlocks(text); + return !looksLikeToolCallSyntax(stripped); } module.exports = { diff --git a/internal/util/toolcalls_candidates.go b/internal/util/toolcalls_candidates.go index f847580..122ac7f 100644 --- a/internal/util/toolcalls_candidates.go +++ b/internal/util/toolcalls_candidates.go @@ -177,7 +177,7 @@ func looksLikeToolExampleContext(text string) bool { } func shouldSkipToolCallParsingForCodeFenceExample(text string) bool { - if !looksLikeToolCallSyntax(text) || looksLikeMarkupToolSyntax(text) { + if !looksLikeToolCallSyntax(text) { return false } stripped := strings.TrimSpace(stripFencedCodeBlocks(text))