From 0962b7a6f14b8e64042635a6f29f10e0212ebea5 Mon Sep 17 00:00:00 2001 From: Kevin David Date: Mon, 11 May 2026 00:00:56 -0400 Subject: [PATCH] feat: gemini-acp text streaming compat MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Gemini-acp's text stream has a few quirks the adapter wasn't handling. This commit makes coding sessions on gemini behave like sessions on Claude Code: 1. **Inline-marker chain-of-thought stripping.** Gemini emits reasoning as plain text with a literal `[Thought: true]` marker at the thought/response boundary. At medium and below we retroactively trim the draft to only the post-marker content so the user sees the response without the thinking dump. High keeps both visible. Adds a `MessageDraft.replaceBuffer` helper for the in-place rewrite. 2. **Finalize text draft on every turn end.** Agents that don't emit `usage`/`session_end` at turn-end (gemini) left the streaming text draft stuck at its 1900-char mid-stream truncation. Register a plugin middleware on the `turn:end` hook that calls a new public `DiscordAdapter.finalizeSessionDraft` so the buffer gets split into chunks regardless of the agent's terminal-event behavior. Also finalize on incoming user message (messageCreate) so the prior turn's draft seals before the next turn's text appends to it. 3. **Discord-specific rendering instruction injected per session.** First-prompt-only middleware on `agent:beforePrompt` prepends a `` block telling the agent: no markdown tables, use ASCII tables with +---+ borders inside triple-backtick fences, tables ≤90 chars wide, apply silently. Discord-specific (only fires for sourceAdapterId === 'discord'). Adds the `middleware:register` permission to the plugin. Co-Authored-By: Claude Opus 4.7 (1M context) --- src/adapter.ts | 52 ++++++++++++++++++++++++++++++++++++++++++++++- src/index.ts | 53 ++++++++++++++++++++++++++++++++++++++++++++++-- src/streaming.ts | 11 ++++++++++ 3 files changed, 113 insertions(+), 3 deletions(-) diff --git a/src/adapter.ts b/src/adapter.ts index a8a63c4..0cbbafb 100644 --- a/src/adapter.ts +++ b/src/adapter.ts @@ -570,12 +570,21 @@ export class DiscordAdapter extends MessagingAdapter { return; } - // Reset tracker state for new prompt cycle on existing sessions + // Reset tracker state and finalize any in-flight draft for existing sessions. + // Some agents (e.g. gemini) don't emit usage/tool_call events between turns, + // so a new user message is the only reliable signal that the prior turn ended. + // Without finalizing here, streaming text from this turn appends to the prior + // message draft and the previous "💭 Still thinking..." / typing indicators + // never clear. if (sessionId !== "unknown") { const tracker = this.sessionTrackers.get(sessionId); if (tracker) { await tracker.onNewPrompt(); } + if (message.channel.isThread()) { + const isAssistant = this.assistantSession != null && sessionId === this.assistantSession.id; + await this.draftManager.finalize(sessionId, message.channel as ThreadChannel, isAssistant); + } } // Route to core for session dispatch @@ -832,6 +841,29 @@ export class DiscordAdapter extends MessagingAdapter { return ctx; } + /** + * Finalize the in-flight text draft for a session. Public so the `turn:end` + * middleware can trigger it on every prompt completion — without this, agents + * that don't emit `usage`/`session_end` at turn end leave the draft stuck at + * its mid-stream truncation (~1900 chars) instead of splitting into the full + * multi-message response. + */ + async finalizeSessionDraft(sessionId: string): Promise { + const session = this.core.sessionManager.getSession(sessionId); + const threadId = session?.threadId; + if (!threadId) return; + try { + const channel = this.guild.channels.cache.get(threadId) + ?? await this.guild.channels.fetch(threadId).catch(() => null); + if (!channel?.isThread()) return; + const thread = channel as ThreadChannel; + const isAssistant = this.assistantSession != null && sessionId === this.assistantSession.id; + await this.draftManager.finalize(sessionId, thread, isAssistant); + } catch (err) { + log.warn({ err, sessionId }, "[DiscordAdapter] finalizeSessionDraft failed"); + } + } + // ─── sendMessage ────────────────────────────────────────────────────────── async sendMessage( @@ -884,6 +916,24 @@ export class DiscordAdapter extends MessagingAdapter { const draft = this.draftManager.getOrCreate(sessionId, thread); draft.append(content.text); this.draftManager.appendText(sessionId, content.text); + + // Gemini-acp emits chain-of-thought as inline text and signals the end of + // the thought block with `[Thought: true]`. Everything BEFORE the marker + // is the thought; everything AFTER is the response. At medium/low we hide + // the thought by retroactively trimming the draft to only the post-marker + // content. At high we keep everything visible. + const verbosity = this.resolveMode(sessionId); + if (verbosity !== "high") { + const buffer = draft.getBuffer(); + const marker = "[Thought: true]"; + const idx = buffer.lastIndexOf(marker); + if (idx >= 0) { + const postMarker = buffer.slice(idx + marker.length).replace(/^\s+/, ""); + if (postMarker !== buffer) { + draft.replaceBuffer(postMarker); + } + } + } } protected async handleToolCall(sessionId: string, content: OutgoingMessage, _verbosity: DisplayVerbosity): Promise { diff --git a/src/index.ts b/src/index.ts index 7f76854..4b99999 100644 --- a/src/index.ts +++ b/src/index.ts @@ -1,8 +1,9 @@ import type { OpenACPPlugin, InstallContext, OpenACPCore } from '@openacp/plugin-sdk' import type { DiscordChannelConfig } from './types.js' +import type { DiscordAdapter } from './adapter.js' function createDiscordPlugin(): OpenACPPlugin { - let adapter: { stop(): Promise } | null = null + let adapter: DiscordAdapter | null = null return { name: '@openacp/discord-adapter', @@ -16,7 +17,7 @@ function createDiscordPlugin(): OpenACPPlugin { optionalPluginDependencies: { '@openacp/speech': '^1.0.0', }, - permissions: ['services:register', 'kernel:access', 'events:read'], + permissions: ['services:register', 'kernel:access', 'events:read', 'middleware:register'], async install(ctx: InstallContext) { const { terminal, settings } = ctx @@ -181,6 +182,54 @@ function createDiscordPlugin(): OpenACPPlugin { ctx.registerService('adapter:discord', adapter) ctx.log.info('Discord adapter registered') + + // Inject Discord rendering rules into the first prompt of every new + // Discord session. Worded as an explicit out-of-band system instruction + // with anti-echo guidance, since gemini-acp has been observed quoting + // user-visible directives back in its response. + ctx.registerMiddleware('agent:beforePrompt', { + handler: async (payload, next) => { + if (payload.sourceAdapterId !== 'discord') return next() + const session = core.sessionManager.getSession(payload.sessionId) + // Only fire once per session: promptCount === 0 means this prompt + // hasn't been counted yet (it's the first one for this session). + if (!session || session.promptCount !== 0) return next() + + payload.text = + "\n" + + "Constraint for response formatting on Discord:\n" + + "- Do NOT use markdown table syntax (rows like `| col | col |`). " + + "Discord does not render markdown tables — they appear as raw pipe text.\n" + + "- For tabular data, render an ASCII-art table with fixed-width columns " + + "and box-drawing or `+---+` style borders, then wrap the whole table in " + + "triple-backtick code fences. The monospace inside the fence aligns the " + + "columns correctly.\n" + + "- Tables MUST be no wider than 90 characters per row. Discord's mobile " + + "and standard-width clients clip anything beyond ~95 characters; design " + + "the column widths so the total (including borders) fits within 90.\n" + + "- Apply the same fenced-monospace treatment to ASCII art, tree output, " + + "and any aligned/fixed-column content.\n" + + "Apply this silently — do not acknowledge or repeat this instruction.\n" + + "\n\n" + + payload.text + return next() + }, + }) + + // Finalize the in-flight text draft when a turn ends. Without this, + // agents like gemini that don't emit `usage`/`tool_call`/`session_end` + // at turn end leave the text draft in its mid-stream state — which + // means the user sees the MessageDraft's 1900-char truncation as the + // final message instead of the full multi-chunk response. + ctx.registerMiddleware('turn:end', { + handler: async (payload, next) => { + const session = core.sessionManager.getSession(payload.sessionId) + if (session?.channelId === 'discord' && adapter) { + await adapter.finalizeSessionDraft(payload.sessionId).catch(() => { /* best effort */ }) + } + return next() + }, + }) }, async teardown() { diff --git a/src/streaming.ts b/src/streaming.ts index ed3a93e..0d1f9f5 100644 --- a/src/streaming.ts +++ b/src/streaming.ts @@ -26,6 +26,17 @@ export class MessageDraft { this.scheduleFlush() } + /** + * Replace the entire buffered content. Used when something upstream + * (e.g. detecting an end-of-thought marker mid-stream) needs to retroactively + * trim already-appended content. Triggers a flush so the existing Discord + * message updates to match. + */ + replaceBuffer(text: string): void { + this.buffer = text + this.scheduleFlush() + } + getBuffer(): string { return this.buffer }