Skip to content

Commit 8eb0fa8

Browse files
committed
Fix ACP file attachment support
- Implement proper handling for all ACP content block types: - ContentBlockText: extracts text content - ContentBlockImage: decodes base64 to LLMFilePart - ContentBlockAudio: decodes base64 to LLMFilePart - ContentBlockResource: handles text and binary embedded resources - ContentBlockResourceLink: reads files from disk - Text files are now included inline in the message (not as FilePart) to avoid OpenAI API errors. Only binary files (images, audio, PDFs) are sent as FilePart attachments. - Add fallback MIME types when not provided by client - Add default prompt text when user attaches files without text - Add comprehensive debug logging for content extraction - Enable debug logging in ACP command when --debug flag is used
1 parent 3bf696c commit 8eb0fa8

2 files changed

Lines changed: 214 additions & 15 deletions

File tree

cmd/acp.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ import (
1111
"os/signal"
1212
"syscall"
1313

14+
"github.com/charmbracelet/log"
1415
acp "github.com/coder/acp-go-sdk"
1516

1617
"github.com/mark3labs/kit/internal/acpserver"
@@ -54,6 +55,8 @@ func runACP(cmd *cobra.Command, _ []string) error {
5455
conn.SetLogger(slog.New(slog.NewTextHandler(os.Stderr, &slog.HandlerOptions{
5556
Level: slog.LevelDebug,
5657
})))
58+
// Also set charmbracelet/log level for acpserver package logging
59+
log.SetLevel(log.DebugLevel)
5760
}
5861

5962
// Wait for either the client to disconnect or a signal.

internal/acpserver/agent.go

Lines changed: 211 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,11 @@ package acpserver
77

88
import (
99
"context"
10+
"encoding/base64"
1011
"encoding/json"
1112
"fmt"
13+
"os"
14+
"strings"
1215
"sync/atomic"
1316

1417
"github.com/charmbracelet/log"
@@ -111,13 +114,20 @@ func (a *Agent) Prompt(ctx context.Context, params acp.PromptRequest) (acp.Promp
111114
)
112115
}
113116

114-
// Extract text from prompt content blocks.
115-
promptText := extractPromptText(params.Prompt)
116-
if promptText == "" {
117+
// Extract text and file attachments from prompt content blocks.
118+
promptText, files := extractPromptContent(params.Prompt)
119+
if promptText == "" && len(files) == 0 {
117120
return acp.PromptResponse{}, acp.NewInvalidParams("empty prompt")
118121
}
119122

120-
log.Debug("acp: prompt", "session", sessionID, "prompt_len", len(promptText))
123+
// If we have files but no text prompt, add a default prompt
124+
// This is required because the underlying LLM library needs a non-empty prompt
125+
// when there are no previous messages in the conversation.
126+
if promptText == "" && len(files) > 0 {
127+
promptText = "Please analyze the attached file."
128+
}
129+
130+
log.Debug("acp: prompt", "session", sessionID, "prompt_len", len(promptText), "files", len(files))
121131

122132
// Create a cancellable context for this prompt turn.
123133
promptCtx, cancel := context.WithCancel(ctx)
@@ -129,7 +139,13 @@ func (a *Agent) Prompt(ctx context.Context, params acp.PromptRequest) (acp.Promp
129139
defer unsub()
130140

131141
// Run the prompt through Kit's full turn lifecycle.
132-
_, err := sess.kit.PromptResult(promptCtx, promptText)
142+
// Use PromptResultWithFiles when file attachments are present.
143+
var err error
144+
if len(files) > 0 {
145+
_, err = sess.kit.PromptResultWithFiles(promptCtx, promptText, files)
146+
} else {
147+
_, err = sess.kit.PromptResult(promptCtx, promptText)
148+
}
133149
if err != nil {
134150
if promptCtx.Err() != nil {
135151
return acp.PromptResponse{
@@ -231,19 +247,199 @@ func (a *Agent) subscribeEvents(ctx context.Context, k *kit.Kit, sessionID acp.S
231247
// Helpers
232248
// ---------------------------------------------------------------------------
233249

234-
// extractPromptText extracts the concatenated text content from ACP content
235-
// blocks. Non-text blocks are ignored for now.
236-
func extractPromptText(blocks []acp.ContentBlock) string {
237-
var text string
238-
for _, block := range blocks {
239-
if block.Text != nil {
240-
if text != "" {
241-
text += "\n"
250+
// extractPromptContent extracts text and file attachments from ACP content blocks.
251+
// It converts supported content blocks (image, audio, resource) to Kit's LLMFilePart.
252+
func extractPromptContent(blocks []acp.ContentBlock) (string, []kit.LLMFilePart) {
253+
var textParts []string
254+
var files []kit.LLMFilePart
255+
256+
log.Debug("acp: extracting content", "blocks", len(blocks))
257+
258+
for i, block := range blocks {
259+
switch {
260+
// Text content
261+
case block.Text != nil:
262+
log.Debug("acp: content block", "index", i, "type", "text", "len", len(block.Text.Text))
263+
textParts = append(textParts, block.Text.Text)
264+
265+
// Image data (base64)
266+
case block.Image != nil:
267+
mimeType := block.Image.MimeType
268+
if mimeType == "" {
269+
mimeType = "image/png" // Default fallback
270+
}
271+
log.Debug("acp: content block", "index", i, "type", "image", "mime", mimeType, "data_len", len(block.Image.Data))
272+
if data, err := base64.StdEncoding.DecodeString(block.Image.Data); err == nil {
273+
files = append(files, kit.LLMFilePart{
274+
Filename: "image.png",
275+
Data: data,
276+
MediaType: mimeType,
277+
})
278+
} else {
279+
log.Debug("acp: failed to decode image", "error", err)
280+
}
281+
282+
// Audio data (base64)
283+
case block.Audio != nil:
284+
mimeType := block.Audio.MimeType
285+
if mimeType == "" {
286+
mimeType = "audio/wav" // Default fallback
287+
}
288+
log.Debug("acp: content block", "index", i, "type", "audio", "mime", mimeType)
289+
if data, err := base64.StdEncoding.DecodeString(block.Audio.Data); err == nil {
290+
files = append(files, kit.LLMFilePart{
291+
Filename: "audio.wav",
292+
Data: data,
293+
MediaType: mimeType,
294+
})
295+
} else {
296+
log.Debug("acp: failed to decode audio", "error", err)
297+
}
298+
299+
// Embedded resource (text or binary file content)
300+
case block.Resource != nil:
301+
log.Debug("acp: content block", "index", i, "type", "resource")
302+
res := block.Resource.Resource
303+
// Text resource - append as text content with file reference
304+
if res.TextResourceContents != nil {
305+
uri := res.TextResourceContents.Uri
306+
content := res.TextResourceContents.Text
307+
mimeType := "text/plain"
308+
if res.TextResourceContents.MimeType != nil {
309+
mimeType = *res.TextResourceContents.MimeType
310+
}
311+
log.Debug("acp: text resource", "uri", uri, "mime", mimeType, "len", len(content))
312+
// Text files are included as formatted text, NOT as FilePart
313+
// FilePart is for binary files (images, audio, PDFs) only
314+
textParts = append(textParts, fmt.Sprintf("[File: %s]\n```\n%s\n```", uri, content))
315+
}
316+
// Binary resource (base64 blob) - these become FilePart
317+
if res.BlobResourceContents != nil {
318+
uri := res.BlobResourceContents.Uri
319+
mimeType := "application/octet-stream"
320+
if res.BlobResourceContents.MimeType != nil {
321+
mimeType = *res.BlobResourceContents.MimeType
322+
}
323+
log.Debug("acp: binary resource", "uri", uri, "mime", mimeType, "blob_len", len(res.BlobResourceContents.Blob))
324+
if data, err := base64.StdEncoding.DecodeString(res.BlobResourceContents.Blob); err == nil {
325+
files = append(files, kit.LLMFilePart{
326+
Filename: extractFilenameFromURI(uri),
327+
Data: data,
328+
MediaType: mimeType,
329+
})
330+
} else {
331+
log.Debug("acp: failed to decode binary resource", "error", err)
332+
}
333+
}
334+
335+
// Resource link (file reference without embedded content)
336+
case block.ResourceLink != nil:
337+
uri := block.ResourceLink.Uri
338+
name := block.ResourceLink.Name
339+
log.Debug("acp: content block", "index", i, "type", "resource_link", "uri", uri, "name", name)
340+
// For resource links, we'll try to read the file from disk
341+
// This requires the file URI to be accessible (file:// scheme)
342+
if content, err := readResourceFromURI(uri); err == nil {
343+
// Detect if it's a text file or binary file
344+
mimeType := "text/plain"
345+
if block.ResourceLink.MimeType != nil {
346+
mimeType = *block.ResourceLink.MimeType
347+
}
348+
log.Debug("acp: resource link loaded", "uri", uri, "mime", mimeType, "size", len(content))
349+
350+
// Only create FilePart for binary files (images, audio, PDFs, etc.)
351+
// Text files are included as formatted text in the message
352+
if isTextMimeType(mimeType) || looksLikeText(content) {
353+
textParts = append(textParts, fmt.Sprintf("[File: %s]\n```\n%s\n```", uri, string(content)))
354+
} else {
355+
// Binary file - create FilePart for models that support it
356+
files = append(files, kit.LLMFilePart{
357+
Filename: extractFilenameFromURI(uri),
358+
Data: content,
359+
MediaType: mimeType,
360+
})
361+
}
362+
} else {
363+
// If we can't read it, include as a text reference
364+
log.Debug("acp: resource link failed to load", "uri", uri, "error", err)
365+
textParts = append(textParts, fmt.Sprintf("[Referenced file: %s]", uri))
242366
}
243-
text += block.Text.Text
367+
368+
default:
369+
log.Debug("acp: content block", "index", i, "type", "unknown/unhandled")
370+
}
371+
}
372+
373+
// Debug log the extracted content
374+
for i, f := range files {
375+
log.Debug("acp: extracted file", "index", i, "filename", f.Filename, "mime", f.MediaType, "size", len(f.Data))
376+
}
377+
378+
return strings.Join(textParts, "\n"), files
379+
}
380+
381+
// isTextMimeType returns true if the MIME type indicates text content.
382+
func isTextMimeType(mimeType string) bool {
383+
return strings.HasPrefix(mimeType, "text/") ||
384+
mimeType == "application/json" ||
385+
mimeType == "application/xml" ||
386+
mimeType == "application/javascript" ||
387+
mimeType == "application/typescript" ||
388+
mimeType == "application/x-sh" ||
389+
mimeType == "application/x-python" ||
390+
mimeType == "application/x-yaml" ||
391+
mimeType == "application/x-toml"
392+
}
393+
394+
// looksLikeText checks if the content appears to be text (not binary).
395+
// It samples the first 512 bytes and checks for null bytes or high
396+
// concentration of non-printable characters.
397+
func looksLikeText(data []byte) bool {
398+
if len(data) == 0 {
399+
return true
400+
}
401+
// Check first 512 bytes (or less if file is smaller)
402+
sampleSize := 512
403+
if len(data) < sampleSize {
404+
sampleSize = len(data)
405+
}
406+
sample := data[:sampleSize]
407+
408+
// Count non-printable characters
409+
nonPrintable := 0
410+
for _, b := range sample {
411+
// Null byte indicates binary
412+
if b == 0 {
413+
return false
244414
}
415+
// Count control characters (except common whitespace)
416+
if b < 32 && b != '\n' && b != '\r' && b != '\t' {
417+
nonPrintable++
418+
}
419+
}
420+
421+
// If more than 30% non-printable, consider it binary
422+
return float64(nonPrintable)/float64(sampleSize) < 0.3
423+
}
424+
425+
// extractFilenameFromURI extracts a filename from a file URI or path.
426+
func extractFilenameFromURI(uri string) string {
427+
// Handle file:// URIs
428+
uri = strings.TrimPrefix(uri, "file://")
429+
// Extract basename
430+
if idx := strings.LastIndex(uri, "/"); idx >= 0 {
431+
return uri[idx+1:]
432+
}
433+
return uri
434+
}
435+
436+
// readResourceFromURI attempts to read file content from a file:// URI.
437+
func readResourceFromURI(uri string) ([]byte, error) {
438+
if !strings.HasPrefix(uri, "file://") {
439+
return nil, fmt.Errorf("unsupported URI scheme: %s", uri)
245440
}
246-
return text
441+
path := uri[7:] // Remove file:// prefix
442+
return os.ReadFile(path)
247443
}
248444

249445
// parseToolArgs attempts to parse a JSON tool args string into a map for

0 commit comments

Comments
 (0)