From 19654ced0a6e59596d2eb145e5802fe578ceeebe Mon Sep 17 00:00:00 2001
From: Codex <codex@openai.com>
Date: Wed, 17 Jun 2026 09:44:58 -0700
Subject: [PATCH] Fix OpenCode leaked think normalization

Preserve leaked OpenCode/Kimi think content as thinking transcript items instead of stripping it from assistant text. Handle balanced tags, malformed leading closer markers, marker-only parts, unterminated open tags, and role-missing OpenCode messages while preserving user-authored text.
---
 .../adapters/opencode/normalize.ts            | 163 +++++++++++++++---
 .../fresh-agent/opencode-normalize.test.ts    | 115 +++++++++++-
 2 files changed, 254 insertions(+), 24 deletions(-)
diff --git a/server/fresh-agent/adapters/opencode/normalize.ts b/server/fresh-agent/adapters/opencode/normalize.ts
index 10d0529ca..bf03f5733 100644
--- a/server/fresh-agent/adapters/opencode/normalize.ts
+++ b/server/fresh-agent/adapters/opencode/normalize.ts
@@ -97,33 +97,118 @@ function stripOpencodeRunArgumentQuoting(text: string): string {
   return text
 }
 
-/** OpenCode / Kimi may leak internal reasoning inside `<think>` / `<thinking>` tags.
- * Strip both the tags and their content from assistant transcript text.
- * User input is preserved so legitimate markup the user typed is not lost. */
-function stripThinkTags(text: string): string {
-  const after = text.replace(/<thinking\b[^>]*>[\s\S]*?<\/thinking>/gi, '').replace(/<think\b[^>]*>[\s\S]*?<\/think>/gi, '')
-  return after.length === text.length ? text : after.trim()
+type NormalizedTextSegment = {
+  kind: 'text' | 'thinking'
+  text: string
+}
+
+const THINK_TAG_PATTERN = /<\/?thinking\b[^>]*>|<\/?think\b[^>]*>/gi
+const BALANCED_THINK_TAG_PATTERN = /<(thinking|think)\b[^>]*>([\s\S]*?)<\/\1>/gi
+const LEADING_THINK_CLOSER_PATTERN = /^\s*(?:(?:<\/thinking>|<\/think>)\s*)+/i
+const THINK_OPEN_TAG_PATTERN = /<(thinking|think)\b[^>]*>/i
+const THINK_CLOSE_TAG_PATTERN = /<\/(?:thinking|think)>/i
+const SYNTHETIC_TEXT_SEGMENT_ID_SUFFIX_PATTERN = /:(?:text|thinking)-\d+$/
+
+function hasThinkTag(text: string): boolean {
+  THINK_TAG_PATTERN.lastIndex = 0
+  return THINK_TAG_PATTERN.test(text)
+}
+
+function stripThinkTagMarkers(text: string): string {
+  THINK_TAG_PATTERN.lastIndex = 0
+  return text.replace(THINK_TAG_PATTERN, '')
+}
+
+function normalizeBalancedThinkTags(text: string): NormalizedTextSegment[] | null {
+  const segments: NormalizedTextSegment[] = []
+  let cursor = 0
+  let matched = false
+  BALANCED_THINK_TAG_PATTERN.lastIndex = 0
+  for (const match of text.matchAll(BALANCED_THINK_TAG_PATTERN)) {
+    matched = true
+    if (match.index > cursor) {
+      segments.push({ kind: 'text', text: stripThinkTagMarkers(text.slice(cursor, match.index)) })
+    }
+    segments.push({ kind: 'thinking', text: (match[2] ?? '').trim() })
+    cursor = match.index + match[0].length
+  }
+  if (!matched) return null
+  if (cursor < text.length) {
+    segments.push({ kind: 'text', text: stripThinkTagMarkers(text.slice(cursor)) })
+  }
+  return segments
+}
+
+function segmentsToItems(id: string, segments: NormalizedTextSegment[]): FreshAgentTranscriptItem[] {
+  const visibleSegments = segments.filter((segment) => segment.text.length > 0)
+  if (visibleSegments.length === 0) return []
+  return visibleSegments.map((segment, index) => ({
+    id: visibleSegments.length === 1 ? id : `${id}:${segment.kind}-${index}`,
+    kind: segment.kind,
+    text: segment.text,
+  }))
+}
+
+/** OpenCode / Kimi can leak internal reasoning tags into assistant text.
+ * OpenCode should surface those as thinking blocks; until it does, normalize
+ * the provider-specific leakage here and keep user-authored text untouched. */
+function itemsFromAssistantTextPart(text: string, id: string, leadingCloserIsThinking: boolean): FreshAgentTranscriptItem[] {
+  if (!hasThinkTag(text)) return [{ id, kind: 'text', text }]
+
+  const balanced = normalizeBalancedThinkTags(text)
+  if (balanced) return segmentsToItems(id, balanced)
+
+  const withoutMarkers = stripThinkTagMarkers(text)
+  if (LEADING_THINK_CLOSER_PATTERN.test(text)) {
+    const normalized = withoutMarkers.trim()
+    if (!normalized) return []
+    return [{
+      id,
+      kind: leadingCloserIsThinking ? 'thinking' : 'text',
+      text: normalized,
+    }]
+  }
+
+  const openMatch = THINK_OPEN_TAG_PATTERN.exec(text)
+  if (openMatch?.index !== undefined) {
+    return segmentsToItems(id, [
+      { kind: 'text', text: stripThinkTagMarkers(text.slice(0, openMatch.index)) },
+      { kind: 'thinking', text: stripThinkTagMarkers(text.slice(openMatch.index + openMatch[0].length)).trim() },
+    ])
+  }
+
+  const closeMatch = THINK_CLOSE_TAG_PATTERN.exec(text)
+  if (closeMatch?.index !== undefined) {
+    return segmentsToItems(id, [
+      { kind: 'thinking', text: stripThinkTagMarkers(text.slice(0, closeMatch.index)).trim() },
+      { kind: 'text', text: stripThinkTagMarkers(text.slice(closeMatch.index + closeMatch[0].length)) },
+    ])
+  }
+
+  return withoutMarkers.length > 0 ? [{ id, kind: 'text', text: withoutMarkers }] : []
 }
 
 function itemFromPart(
   part: Record<string, any>,
   fallbackId: string,
   role?: FreshAgentTurn['role'],
-): FreshAgentTranscriptItem | undefined {
+  followedByTool = false,
+): FreshAgentTranscriptItem[] {
   const id = typeof part.id === 'string' && part.id.length > 0 ? part.id : fallbackId
   if (part.type === 'text') {
     const rawText = typeof part.text === 'string' ? part.text : ''
-    const stripped = role === 'user' ? rawText : stripThinkTags(rawText)
-    const text = role === 'user' ? stripOpencodeRunArgumentQuoting(stripped) : stripped
-    return { id, kind: 'text', text }
+    if (role === 'user') {
+      return [{ id, kind: 'text', text: stripOpencodeRunArgumentQuoting(rawText) }]
+    }
+    return itemsFromAssistantTextPart(rawText, id, followedByTool)
   }
   if (part.type === 'reasoning') {
     const text = typeof part.text === 'string' ? part.text : ''
-    return { id, kind: 'reasoning', summary: text ? [text] : [], content: text ? [text] : [], text }
+    return [{ id, kind: 'reasoning', summary: text ? [text] : [], content: text ? [text] : [], text }]
   }
   if (part.type === 'tool') {
     const state = part.state && typeof part.state === 'object' ? part.state as Record<string, any> : {}
-    return {
+    return [{
       id,
       kind: 'dynamic_tool',
       namespace: 'opencode',
@@ -132,24 +217,55 @@ function itemFromPart(
       arguments: state.input ?? {},
       contentItems: typeof state.output === 'string' ? [state.output] : undefined,
       success: state.status === 'completed' ? true : undefined,
-    }
+    }]
   }
   if (part.type === 'file') {
-    return { id, kind: 'text', text: `Attached file: ${fileAttachmentTarget(part)}` }
+    return [{ id, kind: 'text', text: `Attached file: ${fileAttachmentTarget(part)}` }]
   }
   if (part.type === 'patch') {
-    return {
+    return [{
       id,
       kind: 'file_change',
       status: 'completed',
       changes: normalizePatchChanges(part.files),
       extensions: { opencode: part },
-    }
+    }]
   }
   if (part.type === 'compaction') {
-    return { id, kind: 'context_compaction' }
+    return [{ id, kind: 'context_compaction' }]
+  }
+  return []
+}
+
+function computeToolAfterByPartIndex(parts: Record<string, any>[]): boolean[] {
+  const toolAfterByPartIndex = new Array<boolean>(parts.length).fill(false)
+  let hasToolAfter = false
+  for (let index = parts.length - 1; index >= 0; index -= 1) {
+    toolAfterByPartIndex[index] = hasToolAfter
+    if (parts[index]?.type === 'tool') hasToolAfter = true
+  }
+  return toolAfterByPartIndex
+}
+
+function textSummaryFromItems(items: FreshAgentTranscriptItem[]): string | undefined {
+  const textItems = items.filter((item): item is Extract<FreshAgentTranscriptItem, { kind: 'text' }> => item.kind === 'text')
+  if (textItems.length === 0) return undefined
+  const groups: string[] = []
+  let currentSourceId: string | undefined
+  let currentText = ''
+  for (const item of textItems) {
+    const sourceId = item.id.replace(SYNTHETIC_TEXT_SEGMENT_ID_SUFFIX_PATTERN, '')
+    if (currentSourceId === undefined || sourceId === currentSourceId) {
+      currentSourceId = sourceId
+      currentText += item.text
+      continue
+    }
+    if (currentText.length > 0) groups.push(currentText)
+    currentSourceId = sourceId
+    currentText = item.text
   }
-  return undefined
+  if (currentText.length > 0) groups.push(currentText)
+  return groups.join('\n\n')
 }
 
 function collectOpencodePartMetadata(messages: NonNullable<OpencodeExport['messages']>): Record<string, unknown> {
@@ -213,11 +329,16 @@ export function normalizeOpencodeTurn(
   const id = typeof info.id === 'string' && info.id.length > 0 ? info.id : `message-${ordinal}`
   const role = normalizeOpencodeRole(info.role)
   const parts = Array.isArray(message.parts) ? message.parts : []
+  const toolAfterByPartIndex = computeToolAfterByPartIndex(parts)
   const items = parts
-    .map((part, index) => itemFromPart(part, `${id}:part-${index}`, role))
-    .filter((item): item is FreshAgentTranscriptItem => Boolean(item))
+    .flatMap((part, index) => itemFromPart(
+      part,
+      `${id}:part-${index}`,
+      role,
+      toolAfterByPartIndex[index] ?? false,
+    ))
   if (!role && items.length > 0) return null
-  const textSummary = items.find((item) => item.kind === 'text')?.text
+  const textSummary = textSummaryFromItems(items)
   const reasoningSummary = items.find((item) => item.kind === 'reasoning')?.summary?.[0]
   return {
     id,
diff --git a/test/unit/server/fresh-agent/opencode-normalize.test.ts b/test/unit/server/fresh-agent/opencode-normalize.test.ts
index 9998be50b..478f7cdaa 100644
--- a/test/unit/server/fresh-agent/opencode-normalize.test.ts
+++ b/test/unit/server/fresh-agent/opencode-normalize.test.ts
@@ -199,7 +199,7 @@ describe('OpenCode fresh-agent normalization', () => {
     expect(turn.items[0]?.text).toBe('"nested" quotes')
   })
 
-  it('strips leaked think/thinking tags and their content from assistant text parts', () => {
+  it('maps balanced leaked think/thinking tags to thinking items while preserving visible assistant text', () => {
     const turn = normalizeOpencodeTurn({
       info: { id: 'msg-think-tags', role: 'assistant' },
       parts: [
@@ -217,11 +217,120 @@ describe('OpenCode fresh-agent normalization', () => {
     }, 0)!
 
     expect(turn.items).toEqual([
-      { id: 'part-think', kind: 'text', text: 'Before  and after.' },
-      { id: 'part-thinking', kind: 'text', text: 'Intro  done.' },
+      { id: 'part-think:text-0', kind: 'text', text: 'Before ' },
+      { id: 'part-think:thinking-1', kind: 'thinking', text: 'Internal plan\n1 tool used' },
+      { id: 'part-think:text-2', kind: 'text', text: ' and after.' },
+      { id: 'part-thinking:text-0', kind: 'text', text: 'Intro ' },
+      { id: 'part-thinking:thinking-1', kind: 'thinking', text: 'I could change all instances.' },
+      { id: 'part-thinking:text-2', kind: 'text', text: ' done.' },
     ])
+    expect(turn.summary).toBe('Before  and after.\n\nIntro  done.')
   })
 
+  it('maps malformed leading think closers in OpenCode tool turns to thinking and drops marker-only parts', () => {
+    const turn = normalizeOpencodeTurn({
+      info: { id: 'msg-leaked-tool-thinking', role: 'assistant' },
+      parts: [
+        { id: 'part-marker-only', type: 'text', text: '</think>' },
+        { id: 'part-leading-close', type: 'text', text: '</think></think>Need to edit the CSS.</think>' },
+        {
+          id: 'part-tool',
+          type: 'tool',
+          tool: 'edit',
+          state: {
+            status: 'completed',
+            input: { filePath: 'src/index.css' },
+            output: 'Edit applied successfully.',
+          },
+        },
+      ],
+    }, 0)
+
+    expect(turn.items).toEqual([
+      { id: 'part-leading-close', kind: 'thinking', text: 'Need to edit the CSS.' },
+      {
+        id: 'part-tool',
+        kind: 'dynamic_tool',
+        namespace: 'opencode',
+        tool: 'edit',
+        status: 'completed',
+        arguments: { filePath: 'src/index.css' },
+        contentItems: ['Edit applied successfully.'],
+        success: true,
+      },
+    ])
+  })
+
+  it('keeps final visible assistant text when a malformed leading think closer leaks outside a tool turn', () => {
+    const turn = normalizeOpencodeTurn({
+      info: { id: 'msg-final-text', role: 'assistant' },
+      parts: [{ id: 'part-final', type: 'text', text: '</think>Done. Updated the CSS.' }],
+    }, 0)
+
+    expect(turn.items).toEqual([
+      { id: 'part-final', kind: 'text', text: 'Done. Updated the CSS.' },
+    ])
+    expect(turn.summary).toBe('Done. Updated the CSS.')
+  })
+
+  it('keeps final visible assistant text when a malformed leading think closer appears after a tool', () => {
+    const turn = normalizeOpencodeTurn({
+      info: { id: 'msg-final-text-after-tool', role: 'assistant' },
+      parts: [
+        {
+          id: 'part-tool',
+          type: 'tool',
+          tool: 'edit',
+          state: { status: 'completed', input: {}, output: 'done' },
+        },
+        { id: 'part-final', type: 'text', text: '</think>Done. Updated the CSS.' },
+      ],
+    }, 0)
+
+    expect(turn.items.at(-1)).toEqual({ id: 'part-final', kind: 'text', text: 'Done. Updated the CSS.' })
+    expect(turn.summary).toBe('Done. Updated the CSS.')
+  })
+
+  it('maps unterminated leaked think tags to thinking without empty transcript text', () => {
+    const bare = normalizeOpencodeTurn({
+      info: { id: 'msg-bare-open', role: 'assistant' },
+      parts: [{ id: 'part-bare-open', type: 'text', text: '<think>' }],
+    }, 0)
+    expect(bare.items).toEqual([])
+
+    const mixed = normalizeOpencodeTurn({
+      info: { id: 'msg-open', role: 'assistant' },
+      parts: [{ id: 'part-open', type: 'text', text: 'Visible <think>hidden plan' }],
+    }, 0)
+
+    expect(mixed.items).toEqual([
+      { id: 'part-open:text-0', kind: 'text', text: 'Visible ' },
+      { id: 'part-open:thinking-1', kind: 'thinking', text: 'hidden plan' },
+    ])
+    expect(mixed.summary).toBe('Visible ')
+  })
+
+  it('maps dangling leaked think closers to thinking plus visible text', () => {
+    const turn = normalizeOpencodeTurn({
+      info: { id: 'msg-dangling-close', role: 'assistant' },
+      parts: [{ id: 'part-dangling-close', type: 'text', text: 'hidden plan</think>Visible text' }],
+    }, 0)
+
+    expect(turn.items).toEqual([
+      { id: 'part-dangling-close:thinking-0', kind: 'thinking', text: 'hidden plan' },
+      { id: 'part-dangling-close:text-1', kind: 'text', text: 'Visible text' },
+    ])
+    expect(turn.summary).toBe('Visible text')
+  })
+
+  it('rejects roleless messages even when they contain leaked think tags', () => {
+    const turn = normalizeOpencodeTurn({
+      info: { id: 'msg-unknown-role' },
+      parts: [{ id: 'part-unknown-role', type: 'text', text: 'hidden planVISIBLE text' }],
+    }, 0)
+
+    expect(turn).toBeNull()
+  })
 
   it('preserves think tags in user text parts', () => {
     const turn = normalizeOpencodeTurn({