@@ -7,8 +7,11 @@ package acpserver
77
88import (
99 "context"
10+ "encoding/base64"
1011 "encoding/json"
1112 "fmt"
13+ "os"
14+ "strings"
1215 "sync/atomic"
1316
1417 "github.com/charmbracelet/log"
@@ -111,13 +114,20 @@ func (a *Agent) Prompt(ctx context.Context, params acp.PromptRequest) (acp.Promp
111114 )
112115 }
113116
114- // Extract text from prompt content blocks.
115- promptText := extractPromptText (params .Prompt )
116- if promptText == "" {
117+ // Extract text and file attachments from prompt content blocks.
118+ promptText , files := extractPromptContent (params .Prompt )
119+ if promptText == "" && len ( files ) == 0 {
117120 return acp.PromptResponse {}, acp .NewInvalidParams ("empty prompt" )
118121 }
119122
120- log .Debug ("acp: prompt" , "session" , sessionID , "prompt_len" , len (promptText ))
123+ // If we have files but no text prompt, add a default prompt
124+ // This is required because the underlying LLM library needs a non-empty prompt
125+ // when there are no previous messages in the conversation.
126+ if promptText == "" && len (files ) > 0 {
127+ promptText = "Please analyze the attached file."
128+ }
129+
130+ log .Debug ("acp: prompt" , "session" , sessionID , "prompt_len" , len (promptText ), "files" , len (files ))
121131
122132 // Create a cancellable context for this prompt turn.
123133 promptCtx , cancel := context .WithCancel (ctx )
@@ -129,7 +139,13 @@ func (a *Agent) Prompt(ctx context.Context, params acp.PromptRequest) (acp.Promp
129139 defer unsub ()
130140
131141 // Run the prompt through Kit's full turn lifecycle.
132- _ , err := sess .kit .PromptResult (promptCtx , promptText )
142+ // Use PromptResultWithFiles when file attachments are present.
143+ var err error
144+ if len (files ) > 0 {
145+ _ , err = sess .kit .PromptResultWithFiles (promptCtx , promptText , files )
146+ } else {
147+ _ , err = sess .kit .PromptResult (promptCtx , promptText )
148+ }
133149 if err != nil {
134150 if promptCtx .Err () != nil {
135151 return acp.PromptResponse {
@@ -231,19 +247,199 @@ func (a *Agent) subscribeEvents(ctx context.Context, k *kit.Kit, sessionID acp.S
231247// Helpers
232248// ---------------------------------------------------------------------------
233249
234- // extractPromptText extracts the concatenated text content from ACP content
235- // blocks. Non-text blocks are ignored for now.
236- func extractPromptText (blocks []acp.ContentBlock ) string {
237- var text string
238- for _ , block := range blocks {
239- if block .Text != nil {
240- if text != "" {
241- text += "\n "
250+ // extractPromptContent extracts text and file attachments from ACP content blocks.
251+ // It converts supported content blocks (image, audio, resource) to Kit's LLMFilePart.
252+ func extractPromptContent (blocks []acp.ContentBlock ) (string , []kit.LLMFilePart ) {
253+ var textParts []string
254+ var files []kit.LLMFilePart
255+
256+ log .Debug ("acp: extracting content" , "blocks" , len (blocks ))
257+
258+ for i , block := range blocks {
259+ switch {
260+ // Text content
261+ case block .Text != nil :
262+ log .Debug ("acp: content block" , "index" , i , "type" , "text" , "len" , len (block .Text .Text ))
263+ textParts = append (textParts , block .Text .Text )
264+
265+ // Image data (base64)
266+ case block .Image != nil :
267+ mimeType := block .Image .MimeType
268+ if mimeType == "" {
269+ mimeType = "image/png" // Default fallback
270+ }
271+ log .Debug ("acp: content block" , "index" , i , "type" , "image" , "mime" , mimeType , "data_len" , len (block .Image .Data ))
272+ if data , err := base64 .StdEncoding .DecodeString (block .Image .Data ); err == nil {
273+ files = append (files , kit.LLMFilePart {
274+ Filename : "image.png" ,
275+ Data : data ,
276+ MediaType : mimeType ,
277+ })
278+ } else {
279+ log .Debug ("acp: failed to decode image" , "error" , err )
280+ }
281+
282+ // Audio data (base64)
283+ case block .Audio != nil :
284+ mimeType := block .Audio .MimeType
285+ if mimeType == "" {
286+ mimeType = "audio/wav" // Default fallback
287+ }
288+ log .Debug ("acp: content block" , "index" , i , "type" , "audio" , "mime" , mimeType )
289+ if data , err := base64 .StdEncoding .DecodeString (block .Audio .Data ); err == nil {
290+ files = append (files , kit.LLMFilePart {
291+ Filename : "audio.wav" ,
292+ Data : data ,
293+ MediaType : mimeType ,
294+ })
295+ } else {
296+ log .Debug ("acp: failed to decode audio" , "error" , err )
297+ }
298+
299+ // Embedded resource (text or binary file content)
300+ case block .Resource != nil :
301+ log .Debug ("acp: content block" , "index" , i , "type" , "resource" )
302+ res := block .Resource .Resource
303+ // Text resource - append as text content with file reference
304+ if res .TextResourceContents != nil {
305+ uri := res .TextResourceContents .Uri
306+ content := res .TextResourceContents .Text
307+ mimeType := "text/plain"
308+ if res .TextResourceContents .MimeType != nil {
309+ mimeType = * res .TextResourceContents .MimeType
310+ }
311+ log .Debug ("acp: text resource" , "uri" , uri , "mime" , mimeType , "len" , len (content ))
312+ // Text files are included as formatted text, NOT as FilePart
313+ // FilePart is for binary files (images, audio, PDFs) only
314+ textParts = append (textParts , fmt .Sprintf ("[File: %s]\n ```\n %s\n ```" , uri , content ))
315+ }
316+ // Binary resource (base64 blob) - these become FilePart
317+ if res .BlobResourceContents != nil {
318+ uri := res .BlobResourceContents .Uri
319+ mimeType := "application/octet-stream"
320+ if res .BlobResourceContents .MimeType != nil {
321+ mimeType = * res .BlobResourceContents .MimeType
322+ }
323+ log .Debug ("acp: binary resource" , "uri" , uri , "mime" , mimeType , "blob_len" , len (res .BlobResourceContents .Blob ))
324+ if data , err := base64 .StdEncoding .DecodeString (res .BlobResourceContents .Blob ); err == nil {
325+ files = append (files , kit.LLMFilePart {
326+ Filename : extractFilenameFromURI (uri ),
327+ Data : data ,
328+ MediaType : mimeType ,
329+ })
330+ } else {
331+ log .Debug ("acp: failed to decode binary resource" , "error" , err )
332+ }
333+ }
334+
335+ // Resource link (file reference without embedded content)
336+ case block .ResourceLink != nil :
337+ uri := block .ResourceLink .Uri
338+ name := block .ResourceLink .Name
339+ log .Debug ("acp: content block" , "index" , i , "type" , "resource_link" , "uri" , uri , "name" , name )
340+ // For resource links, we'll try to read the file from disk
341+ // This requires the file URI to be accessible (file:// scheme)
342+ if content , err := readResourceFromURI (uri ); err == nil {
343+ // Detect if it's a text file or binary file
344+ mimeType := "text/plain"
345+ if block .ResourceLink .MimeType != nil {
346+ mimeType = * block .ResourceLink .MimeType
347+ }
348+ log .Debug ("acp: resource link loaded" , "uri" , uri , "mime" , mimeType , "size" , len (content ))
349+
350+ // Only create FilePart for binary files (images, audio, PDFs, etc.)
351+ // Text files are included as formatted text in the message
352+ if isTextMimeType (mimeType ) || looksLikeText (content ) {
353+ textParts = append (textParts , fmt .Sprintf ("[File: %s]\n ```\n %s\n ```" , uri , string (content )))
354+ } else {
355+ // Binary file - create FilePart for models that support it
356+ files = append (files , kit.LLMFilePart {
357+ Filename : extractFilenameFromURI (uri ),
358+ Data : content ,
359+ MediaType : mimeType ,
360+ })
361+ }
362+ } else {
363+ // If we can't read it, include as a text reference
364+ log .Debug ("acp: resource link failed to load" , "uri" , uri , "error" , err )
365+ textParts = append (textParts , fmt .Sprintf ("[Referenced file: %s]" , uri ))
242366 }
243- text += block .Text .Text
367+
368+ default :
369+ log .Debug ("acp: content block" , "index" , i , "type" , "unknown/unhandled" )
370+ }
371+ }
372+
373+ // Debug log the extracted content
374+ for i , f := range files {
375+ log .Debug ("acp: extracted file" , "index" , i , "filename" , f .Filename , "mime" , f .MediaType , "size" , len (f .Data ))
376+ }
377+
378+ return strings .Join (textParts , "\n " ), files
379+ }
380+
381+ // isTextMimeType returns true if the MIME type indicates text content.
382+ func isTextMimeType (mimeType string ) bool {
383+ return strings .HasPrefix (mimeType , "text/" ) ||
384+ mimeType == "application/json" ||
385+ mimeType == "application/xml" ||
386+ mimeType == "application/javascript" ||
387+ mimeType == "application/typescript" ||
388+ mimeType == "application/x-sh" ||
389+ mimeType == "application/x-python" ||
390+ mimeType == "application/x-yaml" ||
391+ mimeType == "application/x-toml"
392+ }
393+
394+ // looksLikeText checks if the content appears to be text (not binary).
395+ // It samples the first 512 bytes and checks for null bytes or high
396+ // concentration of non-printable characters.
397+ func looksLikeText (data []byte ) bool {
398+ if len (data ) == 0 {
399+ return true
400+ }
401+ // Check first 512 bytes (or less if file is smaller)
402+ sampleSize := 512
403+ if len (data ) < sampleSize {
404+ sampleSize = len (data )
405+ }
406+ sample := data [:sampleSize ]
407+
408+ // Count non-printable characters
409+ nonPrintable := 0
410+ for _ , b := range sample {
411+ // Null byte indicates binary
412+ if b == 0 {
413+ return false
244414 }
415+ // Count control characters (except common whitespace)
416+ if b < 32 && b != '\n' && b != '\r' && b != '\t' {
417+ nonPrintable ++
418+ }
419+ }
420+
421+ // If more than 30% non-printable, consider it binary
422+ return float64 (nonPrintable )/ float64 (sampleSize ) < 0.3
423+ }
424+
425+ // extractFilenameFromURI extracts a filename from a file URI or path.
426+ func extractFilenameFromURI (uri string ) string {
427+ // Handle file:// URIs
428+ uri = strings .TrimPrefix (uri , "file://" )
429+ // Extract basename
430+ if idx := strings .LastIndex (uri , "/" ); idx >= 0 {
431+ return uri [idx + 1 :]
432+ }
433+ return uri
434+ }
435+
436+ // readResourceFromURI attempts to read file content from a file:// URI.
437+ func readResourceFromURI (uri string ) ([]byte , error ) {
438+ if ! strings .HasPrefix (uri , "file://" ) {
439+ return nil , fmt .Errorf ("unsupported URI scheme: %s" , uri )
245440 }
246- return text
441+ path := uri [7 :] // Remove file:// prefix
442+ return os .ReadFile (path )
247443}
248444
249445// parseToolArgs attempts to parse a JSON tool args string into a map for
0 commit comments