From 19654ced0a6e59596d2eb145e5802fe578ceeebe Mon Sep 17 00:00:00 2001 From: Codex Date: Wed, 17 Jun 2026 09:44:58 -0700 Subject: [PATCH] Fix OpenCode leaked think normalization Preserve leaked OpenCode/Kimi think content as thinking transcript items instead of stripping it from assistant text. Handle balanced tags, malformed leading closer markers, marker-only parts, unterminated open tags, and role-missing OpenCode messages while preserving user-authored text. --- .../adapters/opencode/normalize.ts | 163 +++++++++++++++--- .../fresh-agent/opencode-normalize.test.ts | 115 +++++++++++- 2 files changed, 254 insertions(+), 24 deletions(-) diff --git a/server/fresh-agent/adapters/opencode/normalize.ts b/server/fresh-agent/adapters/opencode/normalize.ts index 10d0529ca..bf03f5733 100644 --- a/server/fresh-agent/adapters/opencode/normalize.ts +++ b/server/fresh-agent/adapters/opencode/normalize.ts @@ -97,33 +97,118 @@ function stripOpencodeRunArgumentQuoting(text: string): string { return text } -/** OpenCode / Kimi may leak internal reasoning inside `` / `` tags. - * Strip both the tags and their content from assistant transcript text. - * User input is preserved so legitimate markup the user typed is not lost. */ -function stripThinkTags(text: string): string { - const after = text.replace(/]*>[\s\S]*?<\/thinking>/gi, '').replace(/]*>[\s\S]*?<\/think>/gi, '') - return after.length === text.length ? text : after.trim() +type NormalizedTextSegment = { + kind: 'text' | 'thinking' + text: string +} + +const THINK_TAG_PATTERN = /<\/?thinking\b[^>]*>|<\/?think\b[^>]*>/gi +const BALANCED_THINK_TAG_PATTERN = /<(thinking|think)\b[^>]*>([\s\S]*?)<\/\1>/gi +const LEADING_THINK_CLOSER_PATTERN = /^\s*(?:(?:<\/thinking>|<\/think>)\s*)+/i +const THINK_OPEN_TAG_PATTERN = /<(thinking|think)\b[^>]*>/i +const THINK_CLOSE_TAG_PATTERN = /<\/(?:thinking|think)>/i +const SYNTHETIC_TEXT_SEGMENT_ID_SUFFIX_PATTERN = /:(?:text|thinking)-\d+$/ + +function hasThinkTag(text: string): boolean { + THINK_TAG_PATTERN.lastIndex = 0 + return THINK_TAG_PATTERN.test(text) +} + +function stripThinkTagMarkers(text: string): string { + THINK_TAG_PATTERN.lastIndex = 0 + return text.replace(THINK_TAG_PATTERN, '') +} + +function normalizeBalancedThinkTags(text: string): NormalizedTextSegment[] | null { + const segments: NormalizedTextSegment[] = [] + let cursor = 0 + let matched = false + BALANCED_THINK_TAG_PATTERN.lastIndex = 0 + for (const match of text.matchAll(BALANCED_THINK_TAG_PATTERN)) { + matched = true + if (match.index > cursor) { + segments.push({ kind: 'text', text: stripThinkTagMarkers(text.slice(cursor, match.index)) }) + } + segments.push({ kind: 'thinking', text: (match[2] ?? '').trim() }) + cursor = match.index + match[0].length + } + if (!matched) return null + if (cursor < text.length) { + segments.push({ kind: 'text', text: stripThinkTagMarkers(text.slice(cursor)) }) + } + return segments +} + +function segmentsToItems(id: string, segments: NormalizedTextSegment[]): FreshAgentTranscriptItem[] { + const visibleSegments = segments.filter((segment) => segment.text.length > 0) + if (visibleSegments.length === 0) return [] + return visibleSegments.map((segment, index) => ({ + id: visibleSegments.length === 1 ? id : `${id}:${segment.kind}-${index}`, + kind: segment.kind, + text: segment.text, + })) +} + +/** OpenCode / Kimi can leak internal reasoning tags into assistant text. + * OpenCode should surface those as thinking blocks; until it does, normalize + * the provider-specific leakage here and keep user-authored text untouched. */ +function itemsFromAssistantTextPart(text: string, id: string, leadingCloserIsThinking: boolean): FreshAgentTranscriptItem[] { + if (!hasThinkTag(text)) return [{ id, kind: 'text', text }] + + const balanced = normalizeBalancedThinkTags(text) + if (balanced) return segmentsToItems(id, balanced) + + const withoutMarkers = stripThinkTagMarkers(text) + if (LEADING_THINK_CLOSER_PATTERN.test(text)) { + const normalized = withoutMarkers.trim() + if (!normalized) return [] + return [{ + id, + kind: leadingCloserIsThinking ? 'thinking' : 'text', + text: normalized, + }] + } + + const openMatch = THINK_OPEN_TAG_PATTERN.exec(text) + if (openMatch?.index !== undefined) { + return segmentsToItems(id, [ + { kind: 'text', text: stripThinkTagMarkers(text.slice(0, openMatch.index)) }, + { kind: 'thinking', text: stripThinkTagMarkers(text.slice(openMatch.index + openMatch[0].length)).trim() }, + ]) + } + + const closeMatch = THINK_CLOSE_TAG_PATTERN.exec(text) + if (closeMatch?.index !== undefined) { + return segmentsToItems(id, [ + { kind: 'thinking', text: stripThinkTagMarkers(text.slice(0, closeMatch.index)).trim() }, + { kind: 'text', text: stripThinkTagMarkers(text.slice(closeMatch.index + closeMatch[0].length)) }, + ]) + } + + return withoutMarkers.length > 0 ? [{ id, kind: 'text', text: withoutMarkers }] : [] } function itemFromPart( part: Record, fallbackId: string, role?: FreshAgentTurn['role'], -): FreshAgentTranscriptItem | undefined { + followedByTool = false, +): FreshAgentTranscriptItem[] { const id = typeof part.id === 'string' && part.id.length > 0 ? part.id : fallbackId if (part.type === 'text') { const rawText = typeof part.text === 'string' ? part.text : '' - const stripped = role === 'user' ? rawText : stripThinkTags(rawText) - const text = role === 'user' ? stripOpencodeRunArgumentQuoting(stripped) : stripped - return { id, kind: 'text', text } + if (role === 'user') { + return [{ id, kind: 'text', text: stripOpencodeRunArgumentQuoting(rawText) }] + } + return itemsFromAssistantTextPart(rawText, id, followedByTool) } if (part.type === 'reasoning') { const text = typeof part.text === 'string' ? part.text : '' - return { id, kind: 'reasoning', summary: text ? [text] : [], content: text ? [text] : [], text } + return [{ id, kind: 'reasoning', summary: text ? [text] : [], content: text ? [text] : [], text }] } if (part.type === 'tool') { const state = part.state && typeof part.state === 'object' ? part.state as Record : {} - return { + return [{ id, kind: 'dynamic_tool', namespace: 'opencode', @@ -132,24 +217,55 @@ function itemFromPart( arguments: state.input ?? {}, contentItems: typeof state.output === 'string' ? [state.output] : undefined, success: state.status === 'completed' ? true : undefined, - } + }] } if (part.type === 'file') { - return { id, kind: 'text', text: `Attached file: ${fileAttachmentTarget(part)}` } + return [{ id, kind: 'text', text: `Attached file: ${fileAttachmentTarget(part)}` }] } if (part.type === 'patch') { - return { + return [{ id, kind: 'file_change', status: 'completed', changes: normalizePatchChanges(part.files), extensions: { opencode: part }, - } + }] } if (part.type === 'compaction') { - return { id, kind: 'context_compaction' } + return [{ id, kind: 'context_compaction' }] + } + return [] +} + +function computeToolAfterByPartIndex(parts: Record[]): boolean[] { + const toolAfterByPartIndex = new Array(parts.length).fill(false) + let hasToolAfter = false + for (let index = parts.length - 1; index >= 0; index -= 1) { + toolAfterByPartIndex[index] = hasToolAfter + if (parts[index]?.type === 'tool') hasToolAfter = true + } + return toolAfterByPartIndex +} + +function textSummaryFromItems(items: FreshAgentTranscriptItem[]): string | undefined { + const textItems = items.filter((item): item is Extract => item.kind === 'text') + if (textItems.length === 0) return undefined + const groups: string[] = [] + let currentSourceId: string | undefined + let currentText = '' + for (const item of textItems) { + const sourceId = item.id.replace(SYNTHETIC_TEXT_SEGMENT_ID_SUFFIX_PATTERN, '') + if (currentSourceId === undefined || sourceId === currentSourceId) { + currentSourceId = sourceId + currentText += item.text + continue + } + if (currentText.length > 0) groups.push(currentText) + currentSourceId = sourceId + currentText = item.text } - return undefined + if (currentText.length > 0) groups.push(currentText) + return groups.join('\n\n') } function collectOpencodePartMetadata(messages: NonNullable): Record { @@ -213,11 +329,16 @@ export function normalizeOpencodeTurn( const id = typeof info.id === 'string' && info.id.length > 0 ? info.id : `message-${ordinal}` const role = normalizeOpencodeRole(info.role) const parts = Array.isArray(message.parts) ? message.parts : [] + const toolAfterByPartIndex = computeToolAfterByPartIndex(parts) const items = parts - .map((part, index) => itemFromPart(part, `${id}:part-${index}`, role)) - .filter((item): item is FreshAgentTranscriptItem => Boolean(item)) + .flatMap((part, index) => itemFromPart( + part, + `${id}:part-${index}`, + role, + toolAfterByPartIndex[index] ?? false, + )) if (!role && items.length > 0) return null - const textSummary = items.find((item) => item.kind === 'text')?.text + const textSummary = textSummaryFromItems(items) const reasoningSummary = items.find((item) => item.kind === 'reasoning')?.summary?.[0] return { id, diff --git a/test/unit/server/fresh-agent/opencode-normalize.test.ts b/test/unit/server/fresh-agent/opencode-normalize.test.ts index 9998be50b..478f7cdaa 100644 --- a/test/unit/server/fresh-agent/opencode-normalize.test.ts +++ b/test/unit/server/fresh-agent/opencode-normalize.test.ts @@ -199,7 +199,7 @@ describe('OpenCode fresh-agent normalization', () => { expect(turn.items[0]?.text).toBe('"nested" quotes') }) - it('strips leaked think/thinking tags and their content from assistant text parts', () => { + it('maps balanced leaked think/thinking tags to thinking items while preserving visible assistant text', () => { const turn = normalizeOpencodeTurn({ info: { id: 'msg-think-tags', role: 'assistant' }, parts: [ @@ -217,11 +217,120 @@ describe('OpenCode fresh-agent normalization', () => { }, 0)! expect(turn.items).toEqual([ - { id: 'part-think', kind: 'text', text: 'Before and after.' }, - { id: 'part-thinking', kind: 'text', text: 'Intro done.' }, + { id: 'part-think:text-0', kind: 'text', text: 'Before ' }, + { id: 'part-think:thinking-1', kind: 'thinking', text: 'Internal plan\n1 tool used' }, + { id: 'part-think:text-2', kind: 'text', text: ' and after.' }, + { id: 'part-thinking:text-0', kind: 'text', text: 'Intro ' }, + { id: 'part-thinking:thinking-1', kind: 'thinking', text: 'I could change all instances.' }, + { id: 'part-thinking:text-2', kind: 'text', text: ' done.' }, ]) + expect(turn.summary).toBe('Before and after.\n\nIntro done.') }) + it('maps malformed leading think closers in OpenCode tool turns to thinking and drops marker-only parts', () => { + const turn = normalizeOpencodeTurn({ + info: { id: 'msg-leaked-tool-thinking', role: 'assistant' }, + parts: [ + { id: 'part-marker-only', type: 'text', text: '' }, + { id: 'part-leading-close', type: 'text', text: 'Need to edit the CSS.' }, + { + id: 'part-tool', + type: 'tool', + tool: 'edit', + state: { + status: 'completed', + input: { filePath: 'src/index.css' }, + output: 'Edit applied successfully.', + }, + }, + ], + }, 0) + + expect(turn.items).toEqual([ + { id: 'part-leading-close', kind: 'thinking', text: 'Need to edit the CSS.' }, + { + id: 'part-tool', + kind: 'dynamic_tool', + namespace: 'opencode', + tool: 'edit', + status: 'completed', + arguments: { filePath: 'src/index.css' }, + contentItems: ['Edit applied successfully.'], + success: true, + }, + ]) + }) + + it('keeps final visible assistant text when a malformed leading think closer leaks outside a tool turn', () => { + const turn = normalizeOpencodeTurn({ + info: { id: 'msg-final-text', role: 'assistant' }, + parts: [{ id: 'part-final', type: 'text', text: 'Done. Updated the CSS.' }], + }, 0) + + expect(turn.items).toEqual([ + { id: 'part-final', kind: 'text', text: 'Done. Updated the CSS.' }, + ]) + expect(turn.summary).toBe('Done. Updated the CSS.') + }) + + it('keeps final visible assistant text when a malformed leading think closer appears after a tool', () => { + const turn = normalizeOpencodeTurn({ + info: { id: 'msg-final-text-after-tool', role: 'assistant' }, + parts: [ + { + id: 'part-tool', + type: 'tool', + tool: 'edit', + state: { status: 'completed', input: {}, output: 'done' }, + }, + { id: 'part-final', type: 'text', text: 'Done. Updated the CSS.' }, + ], + }, 0) + + expect(turn.items.at(-1)).toEqual({ id: 'part-final', kind: 'text', text: 'Done. Updated the CSS.' }) + expect(turn.summary).toBe('Done. Updated the CSS.') + }) + + it('maps unterminated leaked think tags to thinking without empty transcript text', () => { + const bare = normalizeOpencodeTurn({ + info: { id: 'msg-bare-open', role: 'assistant' }, + parts: [{ id: 'part-bare-open', type: 'text', text: '' }], + }, 0) + expect(bare.items).toEqual([]) + + const mixed = normalizeOpencodeTurn({ + info: { id: 'msg-open', role: 'assistant' }, + parts: [{ id: 'part-open', type: 'text', text: 'Visible hidden plan' }], + }, 0) + + expect(mixed.items).toEqual([ + { id: 'part-open:text-0', kind: 'text', text: 'Visible ' }, + { id: 'part-open:thinking-1', kind: 'thinking', text: 'hidden plan' }, + ]) + expect(mixed.summary).toBe('Visible ') + }) + + it('maps dangling leaked think closers to thinking plus visible text', () => { + const turn = normalizeOpencodeTurn({ + info: { id: 'msg-dangling-close', role: 'assistant' }, + parts: [{ id: 'part-dangling-close', type: 'text', text: 'hidden planVisible text' }], + }, 0) + + expect(turn.items).toEqual([ + { id: 'part-dangling-close:thinking-0', kind: 'thinking', text: 'hidden plan' }, + { id: 'part-dangling-close:text-1', kind: 'text', text: 'Visible text' }, + ]) + expect(turn.summary).toBe('Visible text') + }) + + it('rejects roleless messages even when they contain leaked think tags', () => { + const turn = normalizeOpencodeTurn({ + info: { id: 'msg-unknown-role' }, + parts: [{ id: 'part-unknown-role', type: 'text', text: 'hidden planVISIBLE text' }], + }, 0) + + expect(turn).toBeNull() + }) it('preserves think tags in user text parts', () => { const turn = normalizeOpencodeTurn({