Thread page og:image through browse_url as base64 attachment (jaredlockhart#919)

penny-team[bot] · jaredlockhart · claude · web-flow · commit 4365de9e4560 · 2026-03-30T00:17:59.000-04:00
The browser content script already extracts og:image from pages, but it
was dropped in formatResult. Now the addon downloads the image using the
browser's fetch API (which has session cookies for CDN auth), base64
encodes it, and sends it as a separate image field on the tool response.

The server threads it through SearchResult.image_base64 → MultiTool
(first image wins) → ControllerResponse.attachments → send_response.
When attachments are present, the Serper image search fallback is
skipped. Signal receives the base64 data URI directly.

This was necessary because CDN bot detection (e.g., Akamai on CBC)
blocks server-side image downloads via httpx — the browser's fetch has
the authenticated session and cookies needed to pass bot checks.

- Addon: downloadImageAsDataUri in browse_url.ts using browser fetch
- Addon: BrowseResult type with text + image fields
- Addon: sendToolResponse includes image field
- Server: BrowserToolResponse.image field (optional)
- Server: send_tool_request returns tuple[str, str | None]
- Server: BrowseUrlTool.execute returns SearchResult with image
- Server: MultiTool passes first image through combined result

Co-authored-by: Jared Lockhart &lt;119884+jaredlockhart@users.noreply.github.com&gt;
Co-authored-by: Claude Opus 4.6 (1M context) &lt;noreply@anthropic.com&gt;
diff --git a/browser/src/background/background.ts b/browser/src/background/background.ts
@@ -344,13 +344,16 @@ async function setToolUse(enabled: boolean): Promise<void> {
   broadcastToSidebar({ type: RuntimeMessageType.ToolUseState, enabled });
 }
 
-function sendToolResponse(requestId: string, result?: string, error?: string): void {
+function sendToolResponse(
+  requestId: string, result?: string, error?: string, image?: string,
+): void {
   if (!ws || ws.readyState !== WebSocket.OPEN) return;
   ws.send(JSON.stringify({
     type: WsOutgoingType.ToolResponse,
     request_id: requestId,
     result,
     error,
+    image,
   }));
 }
 
@@ -362,7 +365,7 @@ async function handleToolRequest(request: WsIncomingToolRequestPayload): Promise
   try {
     if (tool === "browse_url") {
       const result = await executeBrowseUrl(request_id, args);
-      sendToolResponse(request_id, result);
+      sendToolResponse(request_id, result.text, undefined, result.image);
     } else {
       sendToolResponse(request_id, undefined, `Unknown tool: ${tool}`);
     }
@@ -375,7 +378,7 @@ async function handleToolRequest(request: WsIncomingToolRequestPayload): Promise
 async function executeBrowseUrl(
   _requestId: string,
   args: Record<string, unknown>,
-): Promise<string> {
+): Promise<{ text: string; image: string }> {
   const url = args.url as string;
   if (!url) throw new Error("Missing required argument: url");
   return await browseUrl(url);
@@ -409,8 +412,8 @@ browser.runtime.onConnect.addListener(async (port) => {
 globalThis.debugBrowseUrl = (url: string): void => {
   browseUrl(url).then(
     (result) => {
-      console.log(`[debug] ${result.length} chars`);
-      console.log(result);
+      console.log(`[debug] ${result.text.length} chars, image: ${result.image || "none"}`);
+      console.log(result.text);
     },
     (err) => console.error("[debug] ERROR:", err),
   );
diff --git a/browser/src/background/tools/browse_url.ts b/browser/src/background/tools/browse_url.ts
@@ -15,29 +15,30 @@ interface PageData {
   title: string;
   url: string;
   text: string;
+  image: string;
   ready: boolean;
 }
 
 const MAX_TAB_ATTEMPTS = 3;
 
-export async function browseUrl(url: string): Promise<string> {
+export async function browseUrl(url: string): Promise<BrowseResult> {
   for (let attempt = 1; attempt <= MAX_TAB_ATTEMPTS; attempt++) {
     console.log(`[browse_url] opening: ${url} (attempt ${attempt}/${MAX_TAB_ATTEMPTS})`);
     const tab = await openHiddenTab(url);
     try {
       await waitForTabLoad(tab.id!);
       const pageData = await pollForContent(tab.id!);
-      return formatResult(pageData);
+      return await formatResult(pageData);
     } catch (err) {
       console.warn(`[browse_url] attempt ${attempt} failed:`, err);
       if (attempt === MAX_TAB_ATTEMPTS) {
-        return `Failed to read ${url}: ${err}`;
+        return { text: `Failed to read ${url}: ${err}`, image: "" };
       }
     } finally {
       await closeTab(tab.id!);
     }
   }
-  return `Failed to read ${url}`;
+  return { text: `Failed to read ${url}`, image: "" };
 }
 
 async function pollForContent(tabId: number): Promise<PageData> {
@@ -120,6 +121,33 @@ async function closeTab(tabId: number): Promise<void> {
   }
 }
 
-function formatResult(data: PageData): string {
-  return `Title: ${data.title}\nURL: ${data.url}\n\n${data.text}`;
+interface BrowseResult {
+  text: string;
+  image: string;
+}
+
+async function formatResult(data: PageData): Promise<BrowseResult> {
+  const image = data.image ? await downloadImageAsDataUri(data.image) : "";
+  console.log(`[browse_url] image: ${image ? `${image.length} chars` : "none"}`);
+  return {
+    text: `Title: ${data.title}\nURL: ${data.url}\n\n${data.text}`,
+    image,
+  };
+}
+
+async function downloadImageAsDataUri(url: string): Promise<string> {
+  try {
+    const resp = await fetch(url);
+    if (!resp.ok) return "";
+    const blob = await resp.blob();
+    const buffer = await blob.arrayBuffer();
+    const bytes = new Uint8Array(buffer);
+    let binary = "";
+    for (const b of bytes) binary += String.fromCharCode(b);
+    const b64 = btoa(binary);
+    return `data:${blob.type};base64,${b64}`;
+  } catch {
+    console.warn("[browse_url] failed to download image:", url);
+    return "";
+  }
 }
diff --git a/penny/penny/agents/base.py b/penny/penny/agents/base.py
@@ -562,6 +562,7 @@ def _build_final_response(
         tool_call_records: list[ToolCallRecord],
     ) -> ControllerResponse:
         """Build the ControllerResponse from the model's final (non-tool) answer."""
+        logger.debug("Building final response with %d attachments", len(attachments))
         content = response.content.strip()
 
         if not content:
@@ -736,7 +737,6 @@ async def _execute_single_tool(
             record.failed = _is_tool_result_failed(result_str)
             logger.debug("Tool result: %s", result_str[:200])
             return result_str, record, urls, image
-
         result_str = self._truncate_tool_result(str(tool_result.result))
         record.failed = _is_tool_result_failed(result_str)
         logger.debug("Tool result: %s", result_str[:200])
diff --git a/penny/penny/channels/base.py b/penny/penny/channels/base.py
@@ -243,28 +243,26 @@ async def send_response(
         recipient: str,
         content: str,
         parent_id: int | None,
-        image_prompt: str,
+        image_prompt: str = "",
         attachments: list[str] | None = None,
         quote_message: MessageLog | None = None,
         thought_id: int | None = None,
     ) -> int | None:
         """
-        Log and send an outgoing message with an image attachment.
+        Log and send an outgoing message with optional image attachments.
 
         Args:
             recipient: Identifier for the recipient
             content: Message content
             parent_id: Parent message ID for thread linking
-            image_prompt: Search query for image attachment (max 300 chars)
-            attachments: Optional list of base64-encoded attachments
+            image_prompt: Deprecated — previously used for Serper image search
+            attachments: Optional list of base64-encoded image attachments
             quote_message: Optional message to quote-reply to
             thought_id: Optional FK to the thought that triggered this message
 
         Returns:
             Database message ID if send was successful, None otherwise
         """
-        image_prompt = image_prompt[: self.MAX_IMAGE_PROMPT_LENGTH]
-
         if not attachments and image_prompt:
             attachments = await self._resolve_image(image_prompt, attachments)
         elif not attachments:
diff --git a/penny/penny/channels/browser/channel.py b/penny/penny/channels/browser/channel.py
@@ -111,7 +111,7 @@ def __init__(
         self._port = port
         self._server: Server | None = None
         self._connections: dict[str, ConnectionInfo] = {}
-        self._pending_requests: dict[str, asyncio.Future[str]] = {}
+        self._pending_requests: dict[str, asyncio.Future[tuple[str, str | None]]] = {}
         self._pending_permissions: dict[str, asyncio.Future[bool]] = {}
         self._channel_manager: MessageChannel | None = None
 
@@ -414,10 +414,15 @@ def _handle_tool_response(self, data: dict) -> None:
             logger.warning("No pending request for id: %s", response.request_id)
             return
 
+        logger.debug(
+            "Tool response: result=%d chars, image=%s",
+            len(response.result or ""),
+            f"{len(response.image)} chars" if response.image else "none",
+        )
         if response.error:
             future.set_exception(RuntimeError(response.error))
         else:
-            future.set_result(response.result or "")
+            future.set_result((response.result or "", response.image))
 
     async def _handle_thoughts_request(self, ws: ServerConnection) -> None:
         """Query recent thoughts and send them to the browser."""
@@ -626,11 +631,16 @@ async def _handle_chat_message(
 
     # --- Tool requests ---
 
-    async def send_tool_request(self, tool: str, arguments: dict) -> str:
-        """Send a tool request to a connected browser and await the sanitized response.
-
-        Checks domain permission server-side before dispatching. If the domain
-        is unknown, prompts all connected addons and Signal for a decision.
+    async def send_tool_request(
+        self,
+        tool: str,
+        arguments: dict,
+    ) -> tuple[str, str | None]:
+        """Send a tool request to a connected browser and await the response.
+
+        Returns (result_text, image_url). Checks domain permission server-side
+        before dispatching. If the domain is unknown, prompts all connected
+        addons and Signal for a decision.
         """
         if tool == "browse_url" and "url" in arguments:
             await self._check_domain_permission(arguments["url"])
@@ -640,7 +650,7 @@ async def send_tool_request(self, tool: str, arguments: dict) -> str:
             raise RuntimeError("No browser with tool-use enabled is connected")
 
         request_id = str(uuid.uuid4())
-        future: asyncio.Future[str] = asyncio.get_event_loop().create_future()
+        future: asyncio.Future[tuple[str, str | None]] = asyncio.get_event_loop().create_future()
         self._pending_requests[request_id] = future
 
         request = BrowserToolRequest(
diff --git a/penny/penny/channels/browser/models.py b/penny/penny/channels/browser/models.py
@@ -53,6 +53,7 @@ class BrowserToolResponse(BaseModel):
     request_id: str
     result: str | None = None
     error: str | None = None
+    image: str | None = None
 
 
 class BrowserOutgoing(BaseModel):
diff --git a/penny/penny/tests/channels/test_browser_channel.py b/penny/penny/tests/channels/test_browser_channel.py
diff --git a/penny/penny/tools/browse_url.py b/penny/penny/tools/browse_url.py
diff --git a/penny/penny/tools/multi.py b/penny/penny/tools/multi.py