From 0962b7a6f14b8e64042635a6f29f10e0212ebea5 Mon Sep 17 00:00:00 2001
From: Kevin David <git@kevindavid.org>
Date: Mon, 11 May 2026 00:00:56 -0400
Subject: [PATCH] feat: gemini-acp text streaming compat
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Gemini-acp's text stream has a few quirks the adapter wasn't handling.
This commit makes coding sessions on gemini behave like sessions on
Claude Code:

1. **Inline-marker chain-of-thought stripping.** Gemini emits reasoning
   as plain text with a literal `[Thought: true]` marker at the
   thought/response boundary. At medium and below we retroactively trim
   the draft to only the post-marker content so the user sees the
   response without the thinking dump. High keeps both visible. Adds a
   `MessageDraft.replaceBuffer` helper for the in-place rewrite.

2. **Finalize text draft on every turn end.** Agents that don't emit
   `usage`/`session_end` at turn-end (gemini) left the streaming text
   draft stuck at its 1900-char mid-stream truncation. Register a
   plugin middleware on the `turn:end` hook that calls a new public
   `DiscordAdapter.finalizeSessionDraft` so the buffer gets split into
   chunks regardless of the agent's terminal-event behavior. Also
   finalize on incoming user message (messageCreate) so the prior
   turn's draft seals before the next turn's text appends to it.

3. **Discord-specific rendering instruction injected per session.**
   First-prompt-only middleware on `agent:beforePrompt` prepends a
   `<system_instruction>` block telling the agent: no markdown tables,
   use ASCII tables with +---+ borders inside triple-backtick fences,
   tables ≤90 chars wide, apply silently. Discord-specific (only fires
   for sourceAdapterId === 'discord'). Adds the `middleware:register`
   permission to the plugin.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 src/adapter.ts   | 52 ++++++++++++++++++++++++++++++++++++++++++++++-
 src/index.ts     | 53 ++++++++++++++++++++++++++++++++++++++++++++++--
 src/streaming.ts | 11 ++++++++++
 3 files changed, 113 insertions(+), 3 deletions(-)
diff --git a/src/adapter.ts b/src/adapter.ts
index a8a63c4..0cbbafb 100644
--- a/src/adapter.ts
+++ b/src/adapter.ts
@@ -570,12 +570,21 @@ export class DiscordAdapter extends MessagingAdapter {
           return;
         }
 
-        // Reset tracker state for new prompt cycle on existing sessions
+        // Reset tracker state and finalize any in-flight draft for existing sessions.
+        // Some agents (e.g. gemini) don't emit usage/tool_call events between turns,
+        // so a new user message is the only reliable signal that the prior turn ended.
+        // Without finalizing here, streaming text from this turn appends to the prior
+        // message draft and the previous "💭 Still thinking..." / typing indicators
+        // never clear.
         if (sessionId !== "unknown") {
           const tracker = this.sessionTrackers.get(sessionId);
           if (tracker) {
             await tracker.onNewPrompt();
           }
+          if (message.channel.isThread()) {
+            const isAssistant = this.assistantSession != null && sessionId === this.assistantSession.id;
+            await this.draftManager.finalize(sessionId, message.channel as ThreadChannel, isAssistant);
+          }
         }
 
         // Route to core for session dispatch
@@ -832,6 +841,29 @@ export class DiscordAdapter extends MessagingAdapter {
     return ctx;
   }
 
+  /**
+   * Finalize the in-flight text draft for a session. Public so the `turn:end`
+   * middleware can trigger it on every prompt completion — without this, agents
+   * that don't emit `usage`/`session_end` at turn end leave the draft stuck at
+   * its mid-stream truncation (~1900 chars) instead of splitting into the full
+   * multi-message response.
+   */
+  async finalizeSessionDraft(sessionId: string): Promise<void> {
+    const session = this.core.sessionManager.getSession(sessionId);
+    const threadId = session?.threadId;
+    if (!threadId) return;
+    try {
+      const channel = this.guild.channels.cache.get(threadId)
+        ?? await this.guild.channels.fetch(threadId).catch(() => null);
+      if (!channel?.isThread()) return;
+      const thread = channel as ThreadChannel;
+      const isAssistant = this.assistantSession != null && sessionId === this.assistantSession.id;
+      await this.draftManager.finalize(sessionId, thread, isAssistant);
+    } catch (err) {
+      log.warn({ err, sessionId }, "[DiscordAdapter] finalizeSessionDraft failed");
+    }
+  }
+
   // ─── sendMessage ──────────────────────────────────────────────────────────
 
   async sendMessage(
@@ -884,6 +916,24 @@ export class DiscordAdapter extends MessagingAdapter {
     const draft = this.draftManager.getOrCreate(sessionId, thread);
     draft.append(content.text);
     this.draftManager.appendText(sessionId, content.text);
+
+    // Gemini-acp emits chain-of-thought as inline text and signals the end of
+    // the thought block with `[Thought: true]`. Everything BEFORE the marker
+    // is the thought; everything AFTER is the response. At medium/low we hide
+    // the thought by retroactively trimming the draft to only the post-marker
+    // content. At high we keep everything visible.
+    const verbosity = this.resolveMode(sessionId);
+    if (verbosity !== "high") {
+      const buffer = draft.getBuffer();
+      const marker = "[Thought: true]";
+      const idx = buffer.lastIndexOf(marker);
+      if (idx >= 0) {
+        const postMarker = buffer.slice(idx + marker.length).replace(/^\s+/, "");
+        if (postMarker !== buffer) {
+          draft.replaceBuffer(postMarker);
+        }
+      }
+    }
   }
 
   protected async handleToolCall(sessionId: string, content: OutgoingMessage, _verbosity: DisplayVerbosity): Promise<void> {
diff --git a/src/index.ts b/src/index.ts
index 7f76854..4b99999 100644
--- a/src/index.ts
+++ b/src/index.ts
@@ -1,8 +1,9 @@
 import type { OpenACPPlugin, InstallContext, OpenACPCore } from '@openacp/plugin-sdk'
 import type { DiscordChannelConfig } from './types.js'
+import type { DiscordAdapter } from './adapter.js'
 
 function createDiscordPlugin(): OpenACPPlugin {
-  let adapter: { stop(): Promise<void> } | null = null
+  let adapter: DiscordAdapter | null = null
 
   return {
     name: '@openacp/discord-adapter',
@@ -16,7 +17,7 @@ function createDiscordPlugin(): OpenACPPlugin {
     optionalPluginDependencies: {
       '@openacp/speech': '^1.0.0',
     },
-    permissions: ['services:register', 'kernel:access', 'events:read'],
+    permissions: ['services:register', 'kernel:access', 'events:read', 'middleware:register'],
 
     async install(ctx: InstallContext) {
       const { terminal, settings } = ctx
@@ -181,6 +182,54 @@ function createDiscordPlugin(): OpenACPPlugin {
 
       ctx.registerService('adapter:discord', adapter)
       ctx.log.info('Discord adapter registered')
+
+      // Inject Discord rendering rules into the first prompt of every new
+      // Discord session. Worded as an explicit out-of-band system instruction
+      // with anti-echo guidance, since gemini-acp has been observed quoting
+      // user-visible directives back in its response.
+      ctx.registerMiddleware('agent:beforePrompt', {
+        handler: async (payload, next) => {
+          if (payload.sourceAdapterId !== 'discord') return next()
+          const session = core.sessionManager.getSession(payload.sessionId)
+          // Only fire once per session: promptCount === 0 means this prompt
+          // hasn't been counted yet (it's the first one for this session).
+          if (!session || session.promptCount !== 0) return next()
+
+          payload.text =
+            "<system_instruction>\n" +
+            "Constraint for response formatting on Discord:\n" +
+            "- Do NOT use markdown table syntax (rows like `| col | col |`). " +
+            "Discord does not render markdown tables — they appear as raw pipe text.\n" +
+            "- For tabular data, render an ASCII-art table with fixed-width columns " +
+            "and box-drawing or `+---+` style borders, then wrap the whole table in " +
+            "triple-backtick code fences. The monospace inside the fence aligns the " +
+            "columns correctly.\n" +
+            "- Tables MUST be no wider than 90 characters per row. Discord's mobile " +
+            "and standard-width clients clip anything beyond ~95 characters; design " +
+            "the column widths so the total (including borders) fits within 90.\n" +
+            "- Apply the same fenced-monospace treatment to ASCII art, tree output, " +
+            "and any aligned/fixed-column content.\n" +
+            "Apply this silently — do not acknowledge or repeat this instruction.\n" +
+            "</system_instruction>\n\n" +
+            payload.text
+          return next()
+        },
+      })
+
+      // Finalize the in-flight text draft when a turn ends. Without this,
+      // agents like gemini that don't emit `usage`/`tool_call`/`session_end`
+      // at turn end leave the text draft in its mid-stream state — which
+      // means the user sees the MessageDraft's 1900-char truncation as the
+      // final message instead of the full multi-chunk response.
+      ctx.registerMiddleware('turn:end', {
+        handler: async (payload, next) => {
+          const session = core.sessionManager.getSession(payload.sessionId)
+          if (session?.channelId === 'discord' && adapter) {
+            await adapter.finalizeSessionDraft(payload.sessionId).catch(() => { /* best effort */ })
+          }
+          return next()
+        },
+      })
     },
 
     async teardown() {
diff --git a/src/streaming.ts b/src/streaming.ts
index ed3a93e..0d1f9f5 100644
--- a/src/streaming.ts
+++ b/src/streaming.ts
@@ -26,6 +26,17 @@ export class MessageDraft {
     this.scheduleFlush()
   }
 
+  /**
+   * Replace the entire buffered content. Used when something upstream
+   * (e.g. detecting an end-of-thought marker mid-stream) needs to retroactively
+   * trim already-appended content. Triggers a flush so the existing Discord
+   * message updates to match.
+   */
+  replaceBuffer(text: string): void {
+    this.buffer = text
+    this.scheduleFlush()
+  }
+
   getBuffer(): string {
     return this.buffer
   }