From 2a5cab8204f4e5bdb37e538c6706184a9a9df5c9 Mon Sep 17 00:00:00 2001 From: Thomas Kosiewski Date: Fri, 29 May 2026 12:53:08 +0000 Subject: [PATCH] Remove experimental image tools --- ...0001-experimental-image-generation-tool.md | 63 -- docs/adr/0002-image-editing-visual-mockups.md | 59 -- docs/config/models.mdx | 12 - docs/docs.json | 2 - docs/hooks/tools.mdx | 25 - .../Messages/GeneratedImageMessage.tsx | 303 ------ .../Messages/MessageRenderer.stories.tsx | 161 ---- .../Messages/MessageRenderer.test.tsx | 93 -- .../features/Messages/MessageRenderer.tsx | 7 - .../Sections/ExperimentsSection.stories.tsx | 71 +- .../Settings/Sections/ExperimentsSection.tsx | 5 - .../ImageGenerationExperimentConfig.tsx | 368 -------- .../Settings/Sections/settingsStoryUtils.tsx | 4 - .../Tools/Shared/NestedToolRenderer.test.tsx | 15 +- .../features/Tools/Shared/ToolPrimitives.tsx | 3 - src/browser/hooks/useSendMessageOptions.ts | 2 - src/browser/stories/mocks/orpc.ts | 14 - .../StreamingMessageAggregator.test.ts | 281 ------ .../messages/applyToolOutputRedaction.test.ts | 101 +- .../messages/applyToolOutputRedaction.ts | 65 +- .../utils/messages/buildSendMessageOptions.ts | 1 - .../utils/messages/displayedMessageBuilder.ts | 254 +---- src/browser/utils/messages/sendOptions.ts | 1 - .../transcriptRenderProjection.test.ts | 48 +- .../config/schemas/appConfigOnDisk.test.ts | 22 +- src/common/config/schemas/appConfigOnDisk.ts | 20 - src/common/constants/experiments.ts | 10 - src/common/orpc/schemas/api.test.ts | 15 - src/common/orpc/schemas/api.ts | 21 - src/common/orpc/schemas/stream.ts | 1 - src/common/schemas/project.ts | 1 - src/common/types/imageGeneration.test.ts | 13 - src/common/types/imageGeneration.ts | 48 - src/common/types/message.ts | 65 -- src/common/types/project.ts | 3 - src/common/types/tools.ts | 10 - .../utils/imageGenerationToolResult.test.ts | 33 - src/common/utils/imageGenerationToolResult.ts | 105 --- src/common/utils/tokens/modelStats.test.ts | 11 +- .../utils/tools/toolDefinitions.test.ts | 23 - src/common/utils/tools/toolDefinitions.ts | 133 --- src/common/utils/tools/tools.ts | 26 +- src/node/builtinSkills/imagegen.md | 105 --- src/node/builtinSkills/mux-docs.md | 2 - src/node/config.ts | 6 - src/node/orpc/router.ts | 47 +- .../agentSession.agentSkillSnapshot.test.ts | 19 - src/node/services/agentSession.ts | 32 +- .../agentSkills/agentSkillsService.test.ts | 2 - .../agentSkills/agentSkillsService.ts | 41 - .../builtInSkillContent.generated.ts | 279 ------ src/node/services/aiService.ts | 29 +- .../services/providerModelFactory.test.ts | 165 ---- src/node/services/providerModelFactory.ts | 265 +----- .../services/streamContextBuilder.test.ts | 94 -- src/node/services/streamContextBuilder.ts | 15 +- src/node/services/taskService.ts | 1 - .../services/tools/agent_skill_read.test.ts | 24 - src/node/services/tools/agent_skill_read.ts | 21 +- .../tools/agent_skill_read_file.test.ts | 20 - .../services/tools/agent_skill_read_file.ts | 18 +- src/node/services/tools/imageArtifacts.ts | 433 --------- src/node/services/tools/image_edit.test.ts | 881 ------------------ src/node/services/tools/image_edit.ts | 230 ----- .../services/tools/image_generate.test.ts | 532 ----------- src/node/services/tools/image_generate.ts | 121 --- src/node/services/workspaceService.ts | 1 - 67 files changed, 135 insertions(+), 5766 deletions(-) delete mode 100644 docs/adr/0001-experimental-image-generation-tool.md delete mode 100644 docs/adr/0002-image-editing-visual-mockups.md delete mode 100644 src/browser/features/Messages/GeneratedImageMessage.tsx delete mode 100644 src/browser/features/Settings/Sections/ImageGenerationExperimentConfig.tsx delete mode 100644 src/common/types/imageGeneration.test.ts delete mode 100644 src/common/types/imageGeneration.ts delete mode 100644 src/common/utils/imageGenerationToolResult.test.ts delete mode 100644 src/common/utils/imageGenerationToolResult.ts delete mode 100644 src/node/builtinSkills/imagegen.md delete mode 100644 src/node/services/tools/imageArtifacts.ts delete mode 100644 src/node/services/tools/image_edit.test.ts delete mode 100644 src/node/services/tools/image_edit.ts delete mode 100644 src/node/services/tools/image_generate.test.ts delete mode 100644 src/node/services/tools/image_generate.ts diff --git a/docs/adr/0001-experimental-image-generation-tool.md b/docs/adr/0001-experimental-image-generation-tool.md deleted file mode 100644 index b9516c490d..0000000000 --- a/docs/adr/0001-experimental-image-generation-tool.md +++ /dev/null @@ -1,63 +0,0 @@ ---- -title: Experimental Image Generation Tool -description: Architecture decision for Mux's experimental image generation tool and generated-image display messages ---- - -# 0001. Experimental Image Generation Uses a Mux-Executed Tool with Derived Display Messages - -## Status - -Accepted - -## Context - -Mux is adding an experimental image generation capability. The capability needs a configurable image model, must avoid surprising users with default-on costful behavior, and should fit Mux's existing tool, settings, runtime, and transcript systems. - -The Codex CLI report described a layered design built around a model-facing image generation skill, a hosted OpenAI Responses image-generation tool, artifact saving, and optional fallback scripts. Mux has different constraints: image generation should work independently from the selected chat model, and the configured image model should be controlled from Mux settings. - -## Decision - -Mux will implement the first image generation experiment as a Mux-executed model-callable tool named `image_generate`, backed by OpenAI image models through the AI SDK image generation API. The default image model is `openai:gpt-image-2`. - -The feature is gated behind a visible, default-off Image Generation Tool experiment. The experiment owns an app-level `imageGeneration` configuration object containing `modelString` and `maxImagesPerCall`. - -The first tool operation is text-to-image generation only. It exposes prompt, image count, quality, and output format. Editing, masks, batch generation, transparent-background workflows, seed, aspect ratio, style, moderation overrides, and compression are deferred. - -Generated full-resolution images are saved under the active runtime artifact directory. The persisted tool result stores saved paths plus bounded thumbnails for transcript preview. Full image bytes are not stored in chat history. - -The frontend renders successful `image_generate` results as a first-class Generated Image Display Message derived from the persisted tool result. The persisted transcript source of truth remains the normal tool call and tool result; no new persisted chat part or stream protocol event is required for the first experiment. Pending, executing, failed, interrupted, or redacted image-generation calls continue to render as normal tool rows. - -The tool is available to Exec-mode agents by default when the experiment is enabled. Built-in Plan and Explore agents remove it from their tool policies. The tool enforces Mux provider/model policy before provider calls and reports image-generation usage through existing tool-side usage reporting when provider metadata is available. - -Mux will also ship a richer built-in `/imagegen` Agent Skill as a single built-in skill file. The skill will teach prompting principles, use-case recipes, iteration guidance, and artifact policy, but it will not include fallback CLI scripts or executable workflows for deferred capabilities. - -## Alternatives Considered - -### Hosted OpenAI Responses image-generation tool - -This would mirror Codex's built-in path more closely. It was rejected for v1 because the image capability should be independent from the selected chat model, and the configured Image Generation Model should be the model that directly handles generation. - -### Skill-only implementation - -A skill without a Mux tool would provide guidance but no integrated artifact, settings, usage-reporting, or display path. It was rejected because the product goal is a configurable image generation capability, not only model instructions. - -### Direct full image bytes in chat history - -Persisting base64 output directly would make previews easy, but it would quickly bloat chat history. Mux will persist bounded thumbnails and keep full-resolution images as runtime artifacts instead. - -### New persisted chat message or stream event - -A fully new persisted item/event model may be appropriate later. It was deferred because a derived display message gives first-class UX while preserving existing replay, retry, and history compatibility for the experiment. - -### Saving generated images directly into the workspace - -This would make generated images immediately project-usable, but it would also pollute the git working tree with every preview or discarded variant. Mux will save to runtime temp by default and require agents to explicitly copy selected final assets into the workspace. - -## Consequences - -- Image generation is usable from non-OpenAI chat models because the image tool owns its own configured model. -- Users must opt into the experiment before the tool is exposed. -- Power users can raise the image-count cap within the configured range, while the default stays conservative. -- Generated-image transcript previews remain available even if runtime-temp full artifacts are later cleaned up outside Mux. -- Project-bound image assets require an explicit copy step into the workspace. -- Future work can add editing, masks, transparent workflows, provider adapters, artifact indexing, cleanup, or a persisted generated-image event without changing the initial domain model. diff --git a/docs/adr/0002-image-editing-visual-mockups.md b/docs/adr/0002-image-editing-visual-mockups.md deleted file mode 100644 index 2ca7338512..0000000000 --- a/docs/adr/0002-image-editing-visual-mockups.md +++ /dev/null @@ -1,59 +0,0 @@ ---- -title: Image Editing Uses a Separate General-Purpose Tool -description: Architecture decision for Mux's experimental image editing tool and edited image display messages -status: accepted ---- - -# 0002. Image Editing Uses a Separate General-Purpose Tool - -## Status - -Accepted - -## Context - -Mux already has an experimental `image_generate` tool for text-to-image generation. Screenshot-driven UI workflows need a related but different capability: an agent should be able to take a local image, such as a UI screenshot, and produce a visual edit mockup from a prompt. - -Editing an existing image has a different privacy boundary from generation. Text-to-image sends only prompt text to the configured image provider, while image editing uploads a local file from the active runtime. That file may include sensitive pixels and embedded metadata. The product model therefore needs explicit upload consent and clear separation between visual mockups and implementation work. - -## Decision - -Mux will add general-purpose image editing through a separate model-callable `image_edit` tool. The tool edits exactly one PNG, JPEG, or WebP source image by path and returns edited image artifacts. When the source is a product screenshot or UI image, the output is a **Visual Edit Mockup**: a design reference artifact only, not source code, a direct UI mutation, or an authoritative implementation plan. - -`image_edit` is distinct from `image_generate`, but both tools share the same user-facing Image Tools experiment, configured image model, max-images-per-call cap, artifact conventions, and tool-side image-model usage reporting path. Internal configuration names may remain generation-oriented until a broader cleanup justifies the churn. The v1 image-upload consent boolean lives inside the existing `imageGeneration` config object as `allowImageUploadsForEditing`. - -The Image Tools setting has one main experiment toggle plus subordinate image-upload consent. Image generation requires the main toggle. `image_edit` is hidden unless both the main toggle and `allowImageUploadsForEditing` are enabled. Upload consent is separate because editing uploads local images or screenshots to the configured image provider. - -The first version validates source type from actual decoded bytes/metadata rather than extension. It follows symlinks through the runtime, records both the requested path and resolved real path for UI provenance, and rejects unsupported or unreadable images before provider calls. It does not attempt automatic screenshot redaction or metadata stripping; source images are uploaded as-is. - -Edited outputs are saved as runtime artifacts under a separate `edited_images` area with generated-style filenames. Extensions are selected from provider media type first, requested output format second, and PNG fallback. Bounded output thumbnails are persisted for transcript preview and stripped from model-visible tool output. Thumbnail generation failures keep the edited image result and add warnings. - -The result uses a separate top-level `ImageEditToolResult` schema that shares common image artifact schema pieces with `ImageGenerateToolResult`. It records the edit prompt, requested source path, resolved real source path, source file size, source dimensions, output dimensions detected from actual output bytes, edited image paths, provider revised prompts when available, and warnings. V1 does not persist a source thumbnail. The display shows the requested source path by default and reveals the resolved real path in details only when different. - -Successful `image_edit` outputs render as a first-class `edited-image` display message derived from persisted tool results, mirroring generated-image display rows. Pending, failed, malformed, interrupted, or hook-augmented edit calls continue to render as normal tool rows. - -## Alternatives Considered - -### Expand `image_generate` to accept input images - -This was rejected because generation and editing have different user promises, privacy boundaries, and tool-selection semantics. A separate tool gives agents and users a clearer mental model. - -### Direct OpenAI image-edit API calls - -This was rejected for v1 because the installed AI SDK already routes image prompts with input images through the provider edit path while preserving Mux's existing model configuration, policy, usage reporting, and artifact flow. - -### Masks and multi-image references - -These were deferred because the initial product goal is prompt-based editing of one source image. Masks and multi-image references add provenance, UI, validation, and provider-specific complexity. - -### Saving edited images directly into the workspace - -This was rejected because most edit iterations are disposable and should not pollute the git working tree. Mux saves runtime artifacts by default, and agents copy selected final assets into the workspace only when the user wants them used by the project. - -## Consequences - -- Users opt into image editing separately from text-to-image generation. -- Agents can create visual mockups from screenshots without claiming to implement UI changes. -- Source image uploads can include arbitrary runtime-readable image files, so settings, docs, and tool guidance must warn about sensitive pixels and embedded metadata. -- Plan and Explore agents do not receive image editing by default. -- Future work can add masks, multi-image references, artifact indexing, cleanup, redaction, or a persisted edited-image event without changing the initial domain model. diff --git a/docs/config/models.mdx b/docs/config/models.mdx index 70d77213a5..709ff04bb7 100644 --- a/docs/config/models.mdx +++ b/docs/config/models.mdx @@ -86,18 +86,6 @@ mux run -t 0 "Quick fix" # Lowest thinking for the model mux run -t high "Deep analysis" # Named level ``` -## Experimental Image Tools Model - -Mux can expose experimental Image Tools for Exec agents. Enable them in **Settings → Experiments → Image Tools**. - -The image model is configured separately from the chat model. The default is `openai:gpt-image-2`. Image Tools are OpenAI-only in this experiment and require OpenAI provider credentials in **Settings → Providers**. - -`image_generate` creates raster images from text prompts. It supports prompt, image count, quality, and output format. - -`image_edit` edits one existing PNG, JPEG, or WebP image by path. Because editing uploads the source file to the image provider, including embedded metadata, it is hidden unless you also enable **Allow image uploads for editing** under the Image Tools settings. Source images are sent as-is; Mux does not automatically redact screenshot contents or strip image metadata. - -Generated and edited full-resolution images are saved as runtime artifacts. Copy selected final assets into the workspace when they should become project files. - ## Next Steps diff --git a/docs/docs.json b/docs/docs.json index 6fadfcdd6b..b568f0519b 100644 --- a/docs/docs.json +++ b/docs/docs.json @@ -120,8 +120,6 @@ "reference/telemetry", "reference/storybook", "reference/benchmarking", - "adr/0001-experimental-image-generation-tool", - "adr/0002-image-editing-visual-mockups", "adr/0003-context-boundaries-for-compaction-and-reset", "adr/0004-cli-goal-runs-are-not-strict-goal-aliases", "AGENTS" diff --git a/docs/hooks/tools.mdx b/docs/hooks/tools.mdx index 0921ffa5e6..bde56b6c89 100644 --- a/docs/hooks/tools.mdx +++ b/docs/hooks/tools.mdx @@ -517,31 +517,6 @@ If a value is too large for the environment, it may be omitted (not set). Mux al -
-image_edit (5) - -| Env var | JSON path | Type | Description | -| ------------------------------ | -------------- | ------ | ------------------------------------------------------------------------------------------------------------------------------------- | -| `MUX_TOOL_INPUT_N` | `n` | number | Number of edited variants to create. Defaults to 1; request multiple variants only when the user asks or variants are clearly useful. | -| `MUX_TOOL_INPUT_OUTPUT_FORMAT` | `outputFormat` | enum | Optional output format. Defaults to png. | -| `MUX_TOOL_INPUT_PROMPT` | `prompt` | string | Edit prompt describing the desired image changes | -| `MUX_TOOL_INPUT_QUALITY` | `quality` | enum | Optional edit quality. Defaults to the provider/model default. | -| `MUX_TOOL_INPUT_SOURCE_PATH` | `sourcePath` | string | Path to the existing source image to edit | - -
- -
-image_generate (4) - -| Env var | JSON path | Type | Description | -| ------------------------------ | -------------- | ------ | ---------------------------------------------------------------------------------------------------------- | -| `MUX_TOOL_INPUT_N` | `n` | number | Number of images to generate. Defaults to 1 and must not exceed the user's configured Image Tools maximum. | -| `MUX_TOOL_INPUT_OUTPUT_FORMAT` | `outputFormat` | enum | Optional output format. Defaults to png. | -| `MUX_TOOL_INPUT_PROMPT` | `prompt` | string | Prompt describing the image(s) to generate | -| `MUX_TOOL_INPUT_QUALITY` | `quality` | enum | Optional generation quality. Defaults to the provider/model default. | - -
-
mux_agents_write (2) diff --git a/src/browser/features/Messages/GeneratedImageMessage.tsx b/src/browser/features/Messages/GeneratedImageMessage.tsx deleted file mode 100644 index b5cb347145..0000000000 --- a/src/browser/features/Messages/GeneratedImageMessage.tsx +++ /dev/null @@ -1,303 +0,0 @@ -import { type CSSProperties, useState } from "react"; -import { AlertTriangle, FileImage, Image as ImageIcon, Maximize2 } from "lucide-react"; - -import { CopyButton } from "@/browser/components/CopyButton/CopyButton"; -import { ImageLightbox } from "@/browser/components/ImageLightbox"; -import { TooltipIfPresent } from "@/browser/components/Tooltip/Tooltip"; -import type { DisplayedMessage } from "@/common/types/message"; -import { isValidBase64AttachmentData } from "@/common/utils/attachments/base64"; -import { cn } from "@/common/lib/utils"; - -interface GeneratedImageMessageProps { - message: Extract; - className?: string; -} - -interface EditedImageMessageProps { - message: Extract; - className?: string; -} - -type ImageToolMessage = GeneratedImageMessageProps["message"] | EditedImageMessageProps["message"]; -type ImageArtifact = ImageToolMessage["images"][number]; -type EditedImageArtifact = EditedImageMessageProps["message"]["images"][number]; - -function isEditedImageArtifact(image: ImageArtifact): image is EditedImageArtifact { - return "outputDimensions" in image; -} - -function getThumbnailDataUrl(image: ImageArtifact): string | null { - const thumbnail = image.thumbnail; - if (!thumbnail) { - return null; - } - const mediaType = thumbnail.mediaType.toLowerCase().trim(); - if (mediaType !== "image/webp" && mediaType !== "image/png" && mediaType !== "image/jpeg") { - return null; - } - if (!isValidBase64AttachmentData(thumbnail.data)) { - return null; - } - return `data:${mediaType};base64,${thumbnail.data}`; -} - -function getThumbnailAspectStyle(image: ImageArtifact): CSSProperties | undefined { - const thumbnail = image.thumbnail; - if (!thumbnail) { - return undefined; - } - return { aspectRatio: `${thumbnail.width} / ${thumbnail.height}` }; -} - -function getImageMetadata(image: ImageArtifact, messageType: ImageToolMessage["type"]): string[] { - const metadata = [image.mediaType]; - if (messageType === "edited-image" && isEditedImageArtifact(image)) { - metadata.push(`${image.outputDimensions.width}×${image.outputDimensions.height}`); - } else if (image.thumbnail) { - metadata.push(`${image.thumbnail.width}×${image.thumbnail.height}`); - } - return metadata; -} - -interface ImageArtifactCardProps { - image: ImageArtifact; - index: number; - imageCount: number; - altPrefix: string; - onSelect: (src: string) => void; - messageType: ImageToolMessage["type"]; -} - -function ImageArtifactCard(props: ImageArtifactCardProps) { - const dataUrl = getThumbnailDataUrl(props.image); - const metadata = getImageMetadata(props.image, props.messageType); - const imageNumber = props.index + 1; - - return ( -
- {dataUrl ? ( - - - - ) : ( -
-
- )} - -
-
-
- - - {props.image.path} - - - {props.image.revisedPrompt && ( -
-
- Revised prompt -
-
- {props.image.revisedPrompt} -
-
- )} -
-
- ); -} - -function SourceImageSummary(props: { - source: Extract["source"]; -}) { - const resolvedDiffers = props.source.resolvedPath !== props.source.path; - - return ( -
-
Source
-
- - {props.source.path} - - -
-
- - {props.source.dimensions.width}×{props.source.dimensions.height} - - {props.source.sizeBytes.toLocaleString()} bytes -
- {resolvedDiffers && ( -
- Resolved path - - {props.source.resolvedPath} - -
- )} -
- ); -} - -interface ImageToolMessageBaseProps { - message: ImageToolMessage; - className?: string; - copy: { - titleSingular: string; - titlePlural: (count: number) => string; - lightboxTitle: string; - altPrefix: string; - }; -} - -function ImageToolMessageBase(props: ImageToolMessageBaseProps) { - const [selectedImage, setSelectedImage] = useState(null); - const imageCount = props.message.images.length; - const title = imageCount === 1 ? props.copy.titleSingular : props.copy.titlePlural(imageCount); - - return ( -
-
-
-
-
-
-
-
-
{title}
- - {imageCount} {imageCount === 1 ? "artifact" : "artifacts"} - - - {props.message.model} - -
-
-
- - {props.message.type === "edited-image" && ( - - )} - -
-
- Prompt -
-
- {props.message.prompt} -
-
-
- -
- {props.message.images.map((image, index) => ( - - ))} -
- - {props.message.warnings && props.message.warnings.length > 0 && ( -
-
- )} -
- - setSelectedImage(null)} - /> -
- ); -} - -export function GeneratedImageMessage(props: GeneratedImageMessageProps) { - return ( - `Generated ${count} image previews`, - lightboxTitle: "Generated image preview", - altPrefix: "Generated image", - }} - /> - ); -} - -export function EditedImageMessage(props: EditedImageMessageProps) { - return ( - `Edited ${count} image previews`, - lightboxTitle: "Edited image preview", - altPrefix: "Edited image", - }} - /> - ); -} diff --git a/src/browser/features/Messages/MessageRenderer.stories.tsx b/src/browser/features/Messages/MessageRenderer.stories.tsx index 7667e80ec1..cd59541c79 100644 --- a/src/browser/features/Messages/MessageRenderer.stories.tsx +++ b/src/browser/features/Messages/MessageRenderer.stories.tsx @@ -1,4 +1,3 @@ -import { expect, userEvent, within } from "@storybook/test"; import type { WorkspaceChatMessage, ChatMuxMessage } from "@/common/orpc/types"; import type { AppStory } from "@/browser/stories/meta.js"; import { appMeta, AppWithMocks, CHROMATIC_SMOKE_MODES } from "@/browser/stories/meta.js"; @@ -308,166 +307,6 @@ export const BudgetLimitWrapupMessages: AppStory = { ), }; -export const GeneratedImages: AppStory = { - parameters: { chromatic: { modes: CHROMATIC_SMOKE_MODES } }, - render: () => ( - { - collapseLeftSidebar(); - return setupSimpleChatStory({ - workspaceId: "ws-generated-images", - messages: [ - createUserMessage("msg-1", "/imagegen generate three soft gradient orb variants", { - historySequence: 1, - timestamp: STABLE_TIMESTAMP - 120000, - }), - createAssistantMessage("msg-2", "", { - historySequence: 2, - timestamp: STABLE_TIMESTAMP - 110000, - toolCalls: [ - { - type: "dynamic-tool" as const, - toolCallId: "image-tool-1", - toolName: "image_generate", - input: { prompt: "Three soft gradient orb variants" }, - state: "output-available" as const, - output: { - success: true, - model: "openai:gpt-image-1.5", - prompt: "Three soft gradient orb variants", - requestedCount: 3, - images: [ - { - path: "/tmp/mux/imagegen/image-tool-1/image-1.png", - filename: "image-1.png", - mediaType: "image/png", - thumbnail: { - data: "UklGRvYAAABXRUJQVlA4IOoAAACQEgCdASpAAdwAPpFIoU0lpCMiICgAsBIJaW7hd2EIQAnsA99snIe+2TkPfbJyHvtk5D6F9LteLk5D325l+ntk5D32yc4iLk5D32ych9C+l2vFych77cy/T2ych77ZOcRFych77ZOQ+hfS7Xi5OQ99uZfp7ZOQ99snOIi5OQ99snIfQvpdrxcnIe+3Mv09snIe+2TnERcnIe+2ThQAAP7/Q8H//M0f+k3/Ybtc/pLpcY3xLt3+3jjX4zxxr8Z441+M8ca+4CDr+AHZM0QqO+UnfKTvlJ3yk75Sd8pO+UnfKTvlJ3yk74AAAAA=", - mediaType: "image/webp", - width: 320, - height: 220, - }, - }, - { - path: "/tmp/mux/imagegen/image-tool-1/image-2.png", - filename: "image-2.png", - mediaType: "image/png", - thumbnail: { - data: "UklGRtQAAABXRUJQVlA4IMgAAABQEgCdASpAAdwAPpFIoU0lpCMiICgAsBIJaW7hd2EWgA7/Ie+2TkPfbJyHvtk5D32ych77ZOQ99snIe+2TkPfbJyHvtk5D32ych77ZOQ99snIe+2TkPfbJyHvtk5D32ych77ZOQ99snIe+2TkPfbJyHvtk5D32ych77ZOQ99snIe+2TkPfbJyHvtk5D32ych77ZOQ99snIe+1YAAD+ZP+dtop/ov//z8z/+B//dPl7L+4zc5pge8hugeOnA34EAAAAAAAAAAAAAA==", - mediaType: "image/webp", - width: 320, - height: 220, - }, - }, - { - path: "/tmp/mux/imagegen/image-tool-1/image-3.png", - filename: "image-3.png", - mediaType: "image/png", - thumbnail: { - data: "UklGRtYAAABXRUJQVlA4IMoAAADQEgCdASpAAdwAPpFIoU0lpCMiICgAsBIJaW7hd2EaHAfgAAAT2Ae+2TkPfbKBl4uTkPfbJyIgeTkPfbJyHvtk5D32ych77ZOQ99snIe+2TkPfbJyHvtk5D32ych77ZOQ99snIe+2TkPfbJyHvtk5D32ych77ZOQ99snIe+2TkPfbJyHvtk5D32ych77ZOQ99snIe+2TkPfbJyHvtkxAAA/v89Yf//NgVzXPqj///OJY6ndzYJN7fLMDchXQVoJwLkgQAAAAAAAAAA", - mediaType: "image/webp", - width: 320, - height: 220, - }, - }, - ], - }, - }, - ], - }), - ], - }); - }} - /> - ), -}; - -GeneratedImages.play = async ({ canvasElement }) => { - const canvas = within(canvasElement); - await expect(canvas.findByText("Generated 3 image previews")).resolves.toBeInTheDocument(); - await expect(canvas.findByAltText("Generated image 3")).resolves.toBeInTheDocument(); - await userEvent.click(await canvas.findByAltText("Generated image 2")); - const body = within(document.body); - await expect(body.findByAltText("Generated image preview")).resolves.toBeInTheDocument(); -}; - -export const EditedImages: AppStory = { - parameters: { chromatic: { modes: CHROMATIC_SMOKE_MODES } }, - render: () => ( - { - collapseLeftSidebar(); - return setupSimpleChatStory({ - workspaceId: "ws-edited-images", - messages: [ - createUserMessage("msg-1", "Mock the settings screenshot with upload consent enabled", { - historySequence: 1, - timestamp: STABLE_TIMESTAMP - 120000, - }), - createAssistantMessage("msg-2", "", { - historySequence: 2, - timestamp: STABLE_TIMESTAMP - 110000, - toolCalls: [ - { - type: "dynamic-tool" as const, - toolCallId: "image-edit-tool-1", - toolName: "image_edit", - input: { - sourcePath: "screenshots/settings.png", - prompt: "Show the Image Tools upload consent switch enabled.", - }, - state: "output-available" as const, - output: { - success: true, - model: "openai:gpt-image-1.5", - prompt: "Show the Image Tools upload consent switch enabled.", - requestedCount: 1, - source: { - path: "screenshots/settings.png", - resolvedPath: "/home/user/projects/my-app/screenshots/settings.png", - sizeBytes: 123456, - dimensions: { width: 640, height: 480 }, - }, - images: [ - { - path: "/tmp/mux/edited_images/image-edit-tool-1/image-1.png", - filename: "image-1.png", - mediaType: "image/png", - outputDimensions: { width: 640, height: 480 }, - thumbnail: { - data: "UklGRiIAAABXRUJQVlA4IBYAAAAwAQCdASoBAAEADsD+JaQAA3AAAAAA", - mediaType: "image/webp", - width: 1, - height: 1, - }, - }, - ], - }, - }, - ], - }), - ], - }); - }} - /> - ), -}; - -EditedImages.play = async ({ canvasElement }) => { - const canvas = within(canvasElement); - await expect(canvas.findByText("Edited image preview")).resolves.toBeInTheDocument(); - const sourcePathMatches = await canvas.findAllByText("screenshots/settings.png"); - await expect(sourcePathMatches.length).toBeGreaterThan(0); - const dimensionsMatches = await canvas.findAllByText("640×480"); - await expect(dimensionsMatches.length).toBeGreaterThan(0); - await expect(canvas.findByText("123,456 bytes")).resolves.toBeInTheDocument(); - await userEvent.click(await canvas.findByText("Resolved path")); - await expect( - canvas.findByText("/home/user/projects/my-app/screenshots/settings.png") - ).resolves.toBeInTheDocument(); - await expect(canvas.findByAltText("Edited image 1")).resolves.toBeInTheDocument(); -}; - export const WithReasoning: AppStory = { render: () => ( { - beforeEach(() => { - globalThis.window = new GlobalWindow() as unknown as Window & typeof globalThis; - globalThis.document = globalThis.window.document; - globalThis.localStorage = globalThis.window.localStorage; - }); - - afterEach(() => { - cleanup(); - - globalThis.window = undefined as unknown as Window & typeof globalThis; - globalThis.document = undefined as unknown as Document; - globalThis.localStorage = undefined as unknown as Storage; - }); - - test("renders generated image artifacts with prompt, model, preview, and saved path", () => { - const tinyWebp = "UklGRiIAAABXRUJQVlA4IBYAAAAwAQCdASoBAAEADsD+JaQAA3AAAAAA"; - const message: DisplayedMessage = { - type: "generated-image", - id: "generated-image-row", - historyId: "assistant-1", - toolCallId: "tool-1", - prompt: "A soft gradient orb", - model: "openai:gpt-image-1.5", - images: [ - { - path: "/tmp/mux/imagegen/tool-1/image-1.png", - filename: "image-1.png", - mediaType: "image/png", - thumbnail: { - data: tinyWebp, - mediaType: "image/webp", - width: 1, - height: 1, - }, - }, - ], - historySequence: 12, - isPartial: false, - }; - - const { getByAltText, getByText } = render(); - - expect(getByText("Generated image preview")).toBeDefined(); - expect(getByText("openai:gpt-image-1.5")).toBeDefined(); - expect(getByText("A soft gradient orb")).toBeDefined(); - expect(getByText("/tmp/mux/imagegen/tool-1/image-1.png")).toBeDefined(); - expect(getByAltText("Generated image 1")).toBeDefined(); - }); - - test("renders edited image artifacts with source metadata and saved path", () => { - const tinyWebp = "UklGRiIAAABXRUJQVlA4IBYAAAAwAQCdASoBAAEADsD+JaQAA3AAAAAA"; - const message: DisplayedMessage = { - type: "edited-image", - id: "edited-image-row", - historyId: "assistant-2", - toolCallId: "tool-2", - prompt: "Make the square blue", - model: "openai:gpt-image-1.5", - source: { - path: "/tmp/source.png", - resolvedPath: "/tmp/source.png", - sizeBytes: 100, - dimensions: { width: 16, height: 16 }, - }, - images: [ - { - path: "/tmp/mux/edited_images/tool-2/image-1.png", - filename: "image-1.png", - mediaType: "image/png", - outputDimensions: { width: 16, height: 16 }, - thumbnail: { - data: tinyWebp, - mediaType: "image/webp", - width: 1, - height: 1, - }, - }, - ], - historySequence: 13, - isPartial: false, - }; - - const { getByAltText, getByText } = render(); - - expect(getByText("Edited image preview")).toBeDefined(); - expect(getByText("Make the square blue")).toBeDefined(); - expect(getByText("/tmp/source.png")).toBeDefined(); - expect(getByText("/tmp/mux/edited_images/tool-2/image-1.png")).toBeDefined(); - expect(getByAltText("Edited image 1")).toBeDefined(); - }); -}); - describe("MessageRenderer compaction boundary rows", () => { beforeEach(() => { globalThis.window = new GlobalWindow() as unknown as Window & typeof globalThis; diff --git a/src/browser/features/Messages/MessageRenderer.tsx b/src/browser/features/Messages/MessageRenderer.tsx index 1e720f6042..77c893be81 100644 --- a/src/browser/features/Messages/MessageRenderer.tsx +++ b/src/browser/features/Messages/MessageRenderer.tsx @@ -12,7 +12,6 @@ import { StreamErrorMessage } from "./StreamErrorMessage"; import { CompactionBoundaryMessage } from "./CompactionBoundaryMessage"; import { HistoryHiddenMessage } from "./HistoryHiddenMessage"; import { InitMessage } from "./InitMessage"; -import { EditedImageMessage, GeneratedImageMessage } from "./GeneratedImageMessage"; import { ProposePlanToolCall } from "../Tools/ProposePlanToolCall"; import { removeEphemeralMessage } from "@/browser/stores/WorkspaceStore"; import { TranscriptMessageBoundary, TranscriptQuoteRoot } from "./TranscriptQuoteBoundary"; @@ -121,12 +120,6 @@ export const MessageRenderer = React.memo( /> ); break; - case "generated-image": - renderedMessage = ; - break; - case "edited-image": - renderedMessage = ; - break; case "reasoning": renderedMessage = ( diff --git a/src/browser/features/Settings/Sections/ExperimentsSection.stories.tsx b/src/browser/features/Settings/Sections/ExperimentsSection.stories.tsx index 5924f8dc8c..178f063c5a 100644 --- a/src/browser/features/Settings/Sections/ExperimentsSection.stories.tsx +++ b/src/browser/features/Settings/Sections/ExperimentsSection.stories.tsx @@ -1,8 +1,6 @@ -import { expect, userEvent, waitFor, within } from "@storybook/test"; +import { expect, waitFor, within } from "@storybook/test"; import { lightweightMeta } from "@/browser/stories/meta.js"; -import { replaceInputValue } from "@/browser/stories/storyPlayHelpers.js"; import { EXPERIMENT_IDS } from "@/common/constants/experiments"; -import { DEFAULT_IMAGE_GENERATION_MODEL } from "@/common/types/imageGeneration"; import { DEFAULT_GOAL_DEFAULTS } from "@/constants/goals"; import type { Meta, StoryObj } from "@storybook/react-vite"; import { ExperimentsSection } from "./ExperimentsSection.js"; @@ -39,50 +37,6 @@ export const ExperimentsToggleOn: Story = { ), }; -export const ImageGenerationEnabled: Story = { - render: () => ( - - setupSettingsStory({ - experiments: { [EXPERIMENT_IDS.IMAGE_GENERATION_TOOL]: true }, - imageGeneration: { - modelString: DEFAULT_IMAGE_GENERATION_MODEL, - maxImagesPerCall: 4, - allowImageUploadsForEditing: true, - }, - }) - } - > - - - ), - play: async ({ canvasElement }) => { - const canvas = within(canvasElement); - await expect(canvas.findByText("Image Tools")).resolves.toBeInTheDocument(); - await expect( - canvas.findByDisplayValue(DEFAULT_IMAGE_GENERATION_MODEL) - ).resolves.toBeInTheDocument(); - - const uploadConsentSwitch = await canvas.findByLabelText("Allow image uploads for editing"); - await waitFor(() => expect(uploadConsentSwitch).toHaveAttribute("aria-checked", "true")); - await userEvent.click(uploadConsentSwitch); - await waitFor(() => expect(uploadConsentSwitch).toHaveAttribute("aria-checked", "false")); - - const maxImagesInput = await canvas.findByDisplayValue("4"); - // Use replaceInputValue (focus + select-all + type) instead of clear+type. - // The max-images input has an onBlur that normalizes invalid drafts back - // to the default; raw clear+type can interleave a spurious blur that - // resets the value and produces flaky assertions. See replaceInputValue - // for details. - await replaceInputValue(maxImagesInput, "11"); - await expect( - canvas.findByText("Enter a whole number from 1 to 10.") - ).resolves.toBeInTheDocument(); - - await replaceInputValue(maxImagesInput, "2"); - }, -}; - export const HeartbeatSettingsEnabled: Story = { // Goals graduated to GA, so they no longer appear in the Experiments // panel at all (configuration lives in the Goal tab's @@ -126,26 +80,3 @@ export const HeartbeatSettingsEnabled: Story = { ); }, }; - -export const ExperimentsToggleOff: Story = { - render: () => ( - - setupSettingsStory({ - experiments: { [EXPERIMENT_IDS.IMAGE_GENERATION_TOOL]: false }, - }) - } - > - - - ), - play: async ({ canvasElement }) => { - const canvas = within(canvasElement); - await expect(canvas.findByText("Image Tools")).resolves.toBeInTheDocument(); - const imageToolsToggle = await canvas.findByLabelText("Toggle Image Tools"); - await waitFor(() => expect(imageToolsToggle).toHaveAttribute("aria-checked", "false")); - await expect(canvas.queryByText("Image model")).toBeNull(); - await expect(canvas.queryByText("Max images per call")).toBeNull(); - await expect(canvas.queryByText("Allow image uploads for editing")).toBeNull(); - }, -}; diff --git a/src/browser/features/Settings/Sections/ExperimentsSection.tsx b/src/browser/features/Settings/Sections/ExperimentsSection.tsx index 8e9e09fa52..02113421ce 100644 --- a/src/browser/features/Settings/Sections/ExperimentsSection.tsx +++ b/src/browser/features/Settings/Sections/ExperimentsSection.tsx @@ -27,7 +27,6 @@ import type { ApiServerStatus, DesktopPrereqStatus } from "@/common/orpc/types"; import { Input } from "@/browser/components/Input/Input"; import { useAPI, type APIClient } from "@/browser/contexts/API"; import { useTelemetry } from "@/browser/hooks/useTelemetry"; -import { ImageGenerationExperimentConfig } from "./ImageGenerationExperimentConfig"; import { AdvisorToolExperimentConfig } from "./AdvisorToolExperimentConfig"; import { HeartbeatDefaultsControls } from "./HeartbeatSection"; @@ -664,7 +663,6 @@ export function ExperimentsSection() { const allExperiments = getExperimentList(); const { api } = useAPI(); const advisorToolEnabled = useExperimentValue(EXPERIMENT_IDS.ADVISOR_TOOL); - const imageGenerationToolEnabled = useExperimentValue(EXPERIMENT_IDS.IMAGE_GENERATION_TOOL); const workspaceHeartbeatsEnabled = useExperimentValue(EXPERIMENT_IDS.WORKSPACE_HEARTBEATS); const settingsConfigRequestRef = useRef<{ api: APIClient; @@ -753,9 +751,6 @@ export function ExperimentsSection() { {exp.id === EXPERIMENT_IDS.ADVISOR_TOOL && advisorToolEnabled && ( )} - {exp.id === EXPERIMENT_IDS.IMAGE_GENERATION_TOOL && imageGenerationToolEnabled && ( - - )} {exp.id === EXPERIMENT_IDS.WORKSPACE_HEARTBEATS && workspaceHeartbeatsEnabled && ( MAX_IMAGE_GENERATION_MAX_IMAGES - ) { - return null; - } - return parsed; -} - -function normalizeDraft( - modelDraft: string, - maxImagesDraft: string, - allowImageUploadsForEditing: boolean -): ImageGenerationConfig | null { - const modelString = modelDraft.trim(); - const maxImagesPerCall = parseMaxImages(maxImagesDraft); - if (!modelString || maxImagesPerCall == null) { - return null; - } - return { modelString, maxImagesPerCall, allowImageUploadsForEditing }; -} - -function areConfigsEqual(a: ImageGenerationConfig, b: ImageGenerationConfig): boolean { - return ( - a.modelString === b.modelString && - a.maxImagesPerCall === b.maxImagesPerCall && - a.allowImageUploadsForEditing === b.allowImageUploadsForEditing - ); -} - -interface ImageGenerationExperimentConfigProps { - enabled?: boolean; -} - -export function ImageGenerationExperimentConfig(props: ImageGenerationExperimentConfigProps) { - const imageToolsEnabled = props.enabled ?? true; - const { api } = useAPI(); - const [modelDraft, setModelDraft] = useState(DEFAULT_IMAGE_GENERATION_MODEL); - const [maxImagesDraft, setMaxImagesDraft] = useState(String(DEFAULT_IMAGE_GENERATION_MAX_IMAGES)); - const [allowUploadsDraft, setAllowUploadsDraft] = useState(false); - const [loaded, setLoaded] = useState(false); - const [loadFailed, setLoadFailed] = useState(false); - const [saveError, setSaveError] = useState(null); - - const saveTimerRef = useRef | null>(null); - const savingRef = useRef(false); - const pendingSaveRef = useRef(null); - const draftRef = useRef({ modelDraft, maxImagesDraft, allowUploadsDraft }); - const lastSyncedRef = useRef(null); - const isMountedRef = useRef(true); - - useEffect(() => { - return () => { - isMountedRef.current = false; - }; - }, []); - - useEffect(() => { - draftRef.current = { modelDraft, maxImagesDraft, allowUploadsDraft }; - }, [modelDraft, maxImagesDraft, allowUploadsDraft]); - - useEffect(() => { - if (!api) { - return; - } - - let ignore = false; - setLoaded(false); - setLoadFailed(false); - setSaveError(null); - - void api.config - .getConfig() - .then((cfg) => { - if (ignore) return; - const imageGeneration = normalizeImageGenerationConfig(cfg.imageGeneration); - setModelDraft(imageGeneration.modelString); - setAllowUploadsDraft(imageGeneration.allowImageUploadsForEditing); - setMaxImagesDraft(String(imageGeneration.maxImagesPerCall)); - lastSyncedRef.current = imageGeneration; - setLoaded(true); - }) - .catch((error: unknown) => { - if (ignore) return; - setSaveError(getErrorMessage(error)); - setLoadFailed(true); - setLoaded(true); - }); - - return () => { - ignore = true; - }; - }, [api]); - - useEffect(() => { - if (!api || !loaded || loadFailed) { - return; - } - - const normalizedDraft = normalizeDraft(modelDraft, maxImagesDraft, allowUploadsDraft); - if (normalizedDraft == null) { - // Invalid drafts should not flush an older valid payload when Settings closes. - pendingSaveRef.current = null; - if (saveTimerRef.current) { - clearTimeout(saveTimerRef.current); - saveTimerRef.current = null; - } - return; - } - - const lastSynced = lastSyncedRef.current; - if (lastSynced && areConfigsEqual(lastSynced, normalizedDraft)) { - pendingSaveRef.current = null; - if (saveTimerRef.current) { - clearTimeout(saveTimerRef.current); - saveTimerRef.current = null; - } - return; - } - - pendingSaveRef.current = normalizedDraft; - if (saveTimerRef.current) { - clearTimeout(saveTimerRef.current); - saveTimerRef.current = null; - } - - const flush = () => { - if (savingRef.current) return; - const payload = pendingSaveRef.current; - if (!payload) return; - - pendingSaveRef.current = null; - savingRef.current = true; - let saveSucceeded = false; - void api.config - .updateImageGenerationConfig({ imageGeneration: payload }) - .then(() => { - saveSucceeded = true; - lastSyncedRef.current = payload; - if (isMountedRef.current) { - setSaveError(null); - } - }) - .catch((error: unknown) => { - const currentDraft = isMountedRef.current - ? normalizeDraft( - draftRef.current.modelDraft, - draftRef.current.maxImagesDraft, - draftRef.current.allowUploadsDraft - ) - : null; - pendingSaveRef.current = - currentDraft != null && - lastSyncedRef.current != null && - !areConfigsEqual(lastSyncedRef.current, currentDraft) - ? currentDraft - : null; - if (isMountedRef.current) { - setSaveError(getErrorMessage(error)); - } - }) - .finally(() => { - savingRef.current = false; - if (!saveSucceeded) { - return; - } - if (!isMountedRef.current) { - // Preserve edits made while the previous save was in flight; closing Settings - // should not silently drop the latest valid draft. - const pendingUnmountSave = pendingSaveRef.current; - if (pendingUnmountSave != null) { - pendingSaveRef.current = null; - savingRef.current = true; - void api.config - .updateImageGenerationConfig({ imageGeneration: pendingUnmountSave }) - .catch(() => undefined) - .finally(() => { - savingRef.current = false; - }); - } - return; - } - - const currentDraft = normalizeDraft( - draftRef.current.modelDraft, - draftRef.current.maxImagesDraft, - draftRef.current.allowUploadsDraft - ); - if ( - currentDraft != null && - lastSyncedRef.current != null && - !areConfigsEqual(lastSyncedRef.current, currentDraft) && - pendingSaveRef.current == null - ) { - pendingSaveRef.current = currentDraft; - } - if (pendingSaveRef.current != null) { - flush(); - } - }); - }; - - saveTimerRef.current = setTimeout(flush, 400); - return () => { - if (saveTimerRef.current) { - clearTimeout(saveTimerRef.current); - saveTimerRef.current = null; - } - }; - }, [api, loaded, loadFailed, modelDraft, maxImagesDraft, allowUploadsDraft]); - - useEffect(() => { - if (!api || !loaded || loadFailed) { - return; - } - - return () => { - if (saveTimerRef.current) { - clearTimeout(saveTimerRef.current); - saveTimerRef.current = null; - } - - if (savingRef.current) { - return; - } - - const payload = pendingSaveRef.current; - if (!payload) { - return; - } - - // Image generation settings auto-save. If Settings closes during the debounce window, - // flush the pending valid edit rather than silently dropping the user's change. - pendingSaveRef.current = null; - savingRef.current = true; - void api.config - .updateImageGenerationConfig({ imageGeneration: payload }) - .catch(() => undefined) - .finally(() => { - savingRef.current = false; - }); - }; - }, [api, loaded, loadFailed]); - - // NOTE: This blur handler resets invalid drafts back to a default. That - // interacts badly with naive Storybook play sequences like - // `userEvent.clear(...)` followed by `userEvent.type(...)`, because an - // interleaved blur can reset the field mid-sequence and produce flaky - // assertions (e.g. "42" instead of "2"). Story authors interacting with - // this input should use `replaceInputValue` from `storyPlayHelpers`, which - // overwrites via focus+select-all+type and never produces a transient - // empty/invalid value that this handler would clobber. - const handleMaxImagesBlur = () => { - const parsed = parseMaxImages(maxImagesDraft) ?? DEFAULT_IMAGE_GENERATION_MAX_IMAGES; - setMaxImagesDraft(String(parsed)); - }; - - const maxImagesInvalid = parseMaxImages(maxImagesDraft) == null; - const modelInvalid = modelDraft.trim().length === 0; - - const controlsDisabled = loadFailed || !imageToolsEnabled; - - if (!api) { - return ( -
-
Connect to mux to configure Image Tools.
-
- ); - } - - return ( -
-
- Experimental Image Tools require OpenAI provider credentials. Full images are saved as - runtime artifacts; copy final assets into the workspace when they matter. Image editing - uploads source images as-is, including metadata, only when upload consent is enabled. -
- {!imageToolsEnabled && ( -
- Turn on Image Tools above to enable image generation settings and upload consent. -
- )} - -
-
-
Image model
-
- {`Default ${DEFAULT_IMAGE_GENERATION_MODEL}; pinned snapshot ${PINNED_IMAGE_GENERATION_MODEL}`} -
-
- ) => - setModelDraft(event.target.value) - } - placeholder={DEFAULT_IMAGE_GENERATION_MODEL} - disabled={controlsDisabled} - className="border-border-medium bg-background-secondary h-9 w-72" - /> -
- {modelInvalid &&
Image model is required.
} - -
-
-
Max images per call
-
- {`${MIN_IMAGE_GENERATION_MAX_IMAGES}-${MAX_IMAGE_GENERATION_MAX_IMAGES}; requests above this fail instead of being silently clamped`} -
-
- ) => - setMaxImagesDraft(event.target.value) - } - onBlur={handleMaxImagesBlur} - inputMode="numeric" - disabled={controlsDisabled} - className="border-border-medium bg-background-secondary h-9 w-24" - /> -
- -
-
-
Allow image uploads for editing
-
- Enables the image_edit tool. The agent may select any image file the runtime can access; - source images are uploaded as-is, including embedded metadata. -
-
- -
- {maxImagesInvalid && ( -
- {`Enter a whole number from ${MIN_IMAGE_GENERATION_MAX_IMAGES} to ${MAX_IMAGE_GENERATION_MAX_IMAGES}.`} -
- )} - {saveError &&
{saveError}
} -
- ); -} diff --git a/src/browser/features/Settings/Sections/settingsStoryUtils.tsx b/src/browser/features/Settings/Sections/settingsStoryUtils.tsx index 7f27d43564..130429c911 100644 --- a/src/browser/features/Settings/Sections/settingsStoryUtils.tsx +++ b/src/browser/features/Settings/Sections/settingsStoryUtils.tsx @@ -18,7 +18,6 @@ import { SELECTED_WORKSPACE_KEY, UI_THEME_KEY } from "@/common/constants/storage import type { ServerAuthSession } from "@/common/orpc/types"; import type { AgentAiDefaults } from "@/common/types/agentAiDefaults"; import type { ProjectConfig } from "@/common/types/project"; -import type { ImageGenerationConfig } from "@/common/types/imageGeneration"; import type { TaskSettings } from "@/common/types/tasks"; import type { GoalDefaults } from "@/constants/goals"; import type { LayoutPresetsConfig } from "@/common/types/uiLayouts"; @@ -109,8 +108,6 @@ interface SetupSettingsStoryOptions { providersList?: string[]; agentAiDefaults?: AgentAiDefaults; taskSettings?: Partial; - /** Initial image generation config for config.getConfig */ - imageGeneration?: Partial; /** Initial global heartbeat default prompt for config.getConfig */ heartbeatDefaultPrompt?: string; /** Initial global heartbeat default interval for config.getConfig */ @@ -142,7 +139,6 @@ export function setupSettingsStory(options: SetupSettingsStoryOptions): APIClien providersConfig: options.providersConfig ?? {}, agentAiDefaults: options.agentAiDefaults, providersList: options.providersList ?? ["anthropic", "openai", "xai"], - imageGeneration: options.imageGeneration, heartbeatDefaultPrompt: options.heartbeatDefaultPrompt, heartbeatDefaultIntervalMs: options.heartbeatDefaultIntervalMs, goalDefaults: options.goalDefaults, diff --git a/src/browser/features/Tools/Shared/NestedToolRenderer.test.tsx b/src/browser/features/Tools/Shared/NestedToolRenderer.test.tsx index 7ca0ed885a..f2bcb4eb5b 100644 --- a/src/browser/features/Tools/Shared/NestedToolRenderer.test.tsx +++ b/src/browser/features/Tools/Shared/NestedToolRenderer.test.tsx @@ -1,6 +1,7 @@ import { afterEach, beforeEach, describe, expect, test } from "bun:test"; import { cleanup, render } from "@testing-library/react"; import { GlobalWindow } from "happy-dom"; +import { TooltipProvider } from "@radix-ui/react-tooltip"; import { NestedToolRenderer } from "./NestedToolRenderer"; @@ -23,12 +24,14 @@ afterEach(() => { describe("NestedToolRenderer", () => { test("renders hook output for nested tool results", () => { const { getByText } = render( - + + + ); expect(getByText("hook output")).toBeDefined(); diff --git a/src/browser/features/Tools/Shared/ToolPrimitives.tsx b/src/browser/features/Tools/Shared/ToolPrimitives.tsx index b3ce53eea9..59cac65ce1 100644 --- a/src/browser/features/Tools/Shared/ToolPrimitives.tsx +++ b/src/browser/features/Tools/Shared/ToolPrimitives.tsx @@ -16,7 +16,6 @@ import { Globe, GraduationCap, Hand, - Image as ImageIcon, Keyboard, Lightbulb, MessageCircleQuestion, @@ -244,8 +243,6 @@ export const TOOL_NAME_TO_ICON: Partial> = { ask_user_question: MessageCircleQuestion, file_read: BookOpen, attach_file: Paperclip, - image_generate: ImageIcon, - image_edit: ImageIcon, desktop_screenshot: Monitor, desktop_move_mouse: Move, desktop_click: MousePointerClick, diff --git a/src/browser/hooks/useSendMessageOptions.ts b/src/browser/hooks/useSendMessageOptions.ts index 04a61c3c03..3426e7042d 100644 --- a/src/browser/hooks/useSendMessageOptions.ts +++ b/src/browser/hooks/useSendMessageOptions.ts @@ -61,7 +61,6 @@ export function useSendMessageOptions(workspaceId: string): SendMessageOptionsWi const execSubagentHardRestart = useExperimentOverrideValue( EXPERIMENT_IDS.EXEC_SUBAGENT_HARD_RESTART ); - const imageGenerationTool = useExperimentOverrideValue(EXPERIMENT_IDS.IMAGE_GENERATION_TOOL); // Prefer metadata over the global default until workspace localStorage seeding catches up. const metadataSettings = getWorkspaceAiSettingsFromMetadata( @@ -83,7 +82,6 @@ export function useSendMessageOptions(workspaceId: string): SendMessageOptionsWi programmaticToolCallingExclusive, advisorTool, execSubagentHardRestart, - imageGenerationTool, }, disableWorkspaceAgents, }); diff --git a/src/browser/stories/mocks/orpc.ts b/src/browser/stories/mocks/orpc.ts index 5a611070ed..a53e39cc52 100644 --- a/src/browser/stories/mocks/orpc.ts +++ b/src/browser/stories/mocks/orpc.ts @@ -42,10 +42,6 @@ import { type RuntimeEnablementId, } from "@/common/types/runtime"; import { DEFAULT_RUNTIME_CONFIG } from "@/common/constants/workspace"; -import { - normalizeImageGenerationConfig, - type ImageGenerationConfig, -} from "@/common/types/imageGeneration"; import { DEFAULT_TASK_SETTINGS, normalizeSubagentAiDefaults, @@ -144,8 +140,6 @@ export interface MockORPCClientOptions { heartbeatDefaultPrompt?: string; /** Initial global heartbeat default interval for config.getConfig */ heartbeatDefaultIntervalMs?: number; - /** Initial image generation config for config.getConfig */ - imageGeneration?: Partial; /** Initial global goal defaults for config.getConfig */ goalDefaults?: GoalDefaults; /** @@ -377,7 +371,6 @@ export function createMockORPCClient(options: MockORPCClientOptions = {}): APICl onePasswordAccountName: initialOnePasswordAccountName = null, heartbeatDefaultPrompt: initialHeartbeatDefaultPrompt, heartbeatDefaultIntervalMs: initialHeartbeatDefaultIntervalMs, - imageGeneration: initialImageGeneration, goalDefaults: initialGoalDefaults, goalBoardSnapshots = new Map(), routePriority: initialRoutePriority = ["direct"], @@ -514,7 +507,6 @@ export function createMockORPCClient(options: MockORPCClientOptions = {}): APICl let onePasswordAccountName: string | null = initialOnePasswordAccountName; let heartbeatDefaultPrompt = initialHeartbeatDefaultPrompt; let heartbeatDefaultIntervalMs = initialHeartbeatDefaultIntervalMs; - let imageGeneration = normalizeImageGenerationConfig(initialImageGeneration); let goalDefaults = normalizeGoalDefaults(initialGoalDefaults ?? DEFAULT_GOAL_DEFAULTS); let routePriority = [...initialRoutePriority]; let routeOverrides = { ...initialRouteOverrides }; @@ -708,7 +700,6 @@ export function createMockORPCClient(options: MockORPCClientOptions = {}): APICl onePasswordAccountName, heartbeatDefaultPrompt, heartbeatDefaultIntervalMs, - imageGeneration, goalDefaults, chatTranscriptFullWidth, muxGovernorEnrolled, @@ -813,11 +804,6 @@ export function createMockORPCClient(options: MockORPCClientOptions = {}): APICl notifyConfigChanged(); return Promise.resolve(undefined); }, - updateImageGenerationConfig: (input: { imageGeneration: ImageGenerationConfig }) => { - imageGeneration = normalizeImageGenerationConfig(input.imageGeneration); - notifyConfigChanged(); - return Promise.resolve(undefined); - }, updateGoalDefaults: (input: { goalDefaults: GoalDefaults }) => { goalDefaults = normalizeGoalDefaults(input.goalDefaults); notifyConfigChanged(); diff --git a/src/browser/utils/messages/StreamingMessageAggregator.test.ts b/src/browser/utils/messages/StreamingMessageAggregator.test.ts index a442747246..b8e8012373 100644 --- a/src/browser/utils/messages/StreamingMessageAggregator.test.ts +++ b/src/browser/utils/messages/StreamingMessageAggregator.test.ts @@ -253,288 +253,7 @@ function historicalTodoMessage( return historicalToolMessage(id, "todo_write", { todos }, options); } -function imageGenerateOutput(prompt: string, path: string, extra: Record = {}) { - return { - success: true, - model: "openai:gpt-image-1.5", - prompt, - requestedCount: 1, - images: [{ path, filename: "image-1.png", mediaType: "image/png" }], - ...extra, - }; -} - -function imageEditOutput(prompt: string, path: string, extra: Record = {}) { - return { - ...imageGenerateOutput(prompt, path), - source: { - path: "/tmp/source.png", - resolvedPath: "/tmp/source.png", - sizeBytes: 100, - dimensions: { width: 16, height: 16 }, - }, - images: [ - { - path, - filename: "image-1.png", - mediaType: "image/png", - outputDimensions: { width: 16, height: 16 }, - }, - ], - ...extra, - }; -} - -function displayedFromTool( - toolName: string, - input: Record, - output: unknown, - options: { id?: string; toolCallId?: string; partial?: boolean } = {} -): DisplayedMessage[] { - const aggregator = createTestAggregator(); - aggregator.loadHistoricalMessages([ - historicalToolMessage(options.id ?? "assistant-tool", toolName, input, { - toolCallId: options.toolCallId, - output, - partial: options.partial, - }), - ]); - return aggregator.getDisplayedMessages(); -} - describe("StreamingMessageAggregator", () => { - describe("image generation display messages", () => { - test("renders successful image_generate tool output as a generated-image row", () => { - const displayed = displayedFromTool( - "image_generate", - { prompt: "A small blue square" }, - imageGenerateOutput("A small blue square", "/tmp/mux/imagegen/image-tool-1/image-1.png"), - { id: "assistant-image", toolCallId: "image-tool-1" } - ); - - expect(displayed).toHaveLength(1); - expect(displayed[0]?.type).toBe("generated-image"); - if (displayed[0]?.type !== "generated-image") { - throw new Error("Expected generated-image display row"); - } - expect(displayed[0].toolCallId).toBe("image-tool-1"); - expect(displayed[0].model).toBe("openai:gpt-image-1.5"); - expect(displayed[0].images[0]?.path).toBe("/tmp/mux/imagegen/image-tool-1/image-1.png"); - }); - - test("keeps image_generate output with hook output as a normal tool row", () => { - const displayed = displayedFromTool( - "image_generate", - { prompt: "A small blue square" }, - imageGenerateOutput("A small blue square", "/tmp/mux/imagegen/image-tool-1/image-1.png", { - hook_output: "post-processing hook ran", - }), - { id: "assistant-image-hook", toolCallId: "image-tool-hook" } - ); - - expect(displayed).toHaveLength(1); - expect(displayed[0]?.type).toBe("tool"); - if (displayed[0]?.type !== "tool") { - throw new Error("Expected hooked image generation to remain a tool row"); - } - expect(displayed[0].toolName).toBe("image_generate"); - expect(displayed[0].result).toMatchObject({ hook_output: "post-processing hook ran" }); - }); - - test("renders nested PTC image_generate output as a generated-image row", () => { - const imageOutput = imageGenerateOutput( - "A nested blue square", - "/tmp/mux/generated_images/ptc-image/image-1.png" - ); - const displayed = displayedFromTool( - "code_execution", - { code: "await mux.image_generate(...)" }, - { - success: true, - result: "done", - toolCalls: [ - { - toolName: "image_generate", - args: { prompt: "A nested blue square" }, - result: imageOutput, - duration_ms: 12, - }, - ], - }, - { id: "assistant-ptc-image", toolCallId: "code-tool-1" } - ); - - expect(displayed).toHaveLength(2); - expect(displayed[0]?.type).toBe("tool"); - expect(displayed[1]?.type).toBe("generated-image"); - if (displayed[0]?.type !== "tool" || displayed[1]?.type !== "generated-image") { - throw new Error("Expected code_execution tool row followed by generated image row"); - } - expect(displayed[0].toolName).toBe("code_execution"); - expect(displayed[0].nestedCalls).toEqual([]); - expect(displayed[0].isLastPartOfMessage).toBe(false); - expect(displayed[1].toolCallId).toBe("code-tool-1-nested-0"); - expect(displayed[1].prompt).toBe("A nested blue square"); - expect(displayed[1].images[0]?.path).toBe("/tmp/mux/generated_images/ptc-image/image-1.png"); - expect(displayed[1].isLastPartOfMessage).toBe(true); - }); - - test("renders successful image_edit tool output as an edited-image row", () => { - const displayed = displayedFromTool( - "image_edit", - { sourcePath: "/tmp/source.png", prompt: "Make the square blue" }, - imageEditOutput( - "Make the square blue", - "/tmp/mux/edited_images/image-edit-tool-1/image-1.png" - ), - { id: "assistant-edit-image", toolCallId: "image-edit-tool-1" } - ); - - expect(displayed).toHaveLength(1); - expect(displayed[0]?.type).toBe("edited-image"); - if (displayed[0]?.type !== "edited-image") { - throw new Error("Expected edited-image display row"); - } - expect(displayed[0].toolCallId).toBe("image-edit-tool-1"); - expect(displayed[0].source.path).toBe("/tmp/source.png"); - expect(displayed[0].images[0]?.outputDimensions).toEqual({ width: 16, height: 16 }); - }); - - test("renders nested PTC image_edit output as an edited-image row", () => { - const imageOutput = imageEditOutput( - "Make a nested square blue", - "/tmp/mux/edited_images/ptc-edit/image-1.png" - ); - const displayed = displayedFromTool( - "code_execution", - { code: "await mux.image_edit(...)" }, - { - success: true, - result: "done", - toolCalls: [ - { - toolName: "image_edit", - args: { sourcePath: "/tmp/source.png", prompt: "Make a nested square blue" }, - result: imageOutput, - duration_ms: 12, - }, - ], - }, - { id: "assistant-ptc-edit-image", toolCallId: "code-tool-edit-1" } - ); - - expect(displayed).toHaveLength(2); - expect(displayed[0]?.type).toBe("tool"); - expect(displayed[1]?.type).toBe("edited-image"); - if (displayed[0]?.type !== "tool" || displayed[1]?.type !== "edited-image") { - throw new Error("Expected code_execution tool row followed by edited image row"); - } - expect(displayed[0].toolName).toBe("code_execution"); - expect(displayed[0].nestedCalls).toEqual([]); - expect(displayed[0].isLastPartOfMessage).toBe(false); - expect(displayed[1].toolCallId).toBe("code-tool-edit-1-nested-0"); - expect(displayed[1].prompt).toBe("Make a nested square blue"); - expect(displayed[1].images[0]?.path).toBe("/tmp/mux/edited_images/ptc-edit/image-1.png"); - expect(displayed[1].isLastPartOfMessage).toBe(true); - }); - - const toolRowScenarios = [ - { - name: "keeps malformed successful image_edit output as a normal tool row", - toolName: "image_edit", - toolCallId: "image-edit-tool-malformed", - input: { sourcePath: "/tmp/source.png", prompt: "Make the square blue" }, - output: imageEditOutput( - "Make the square blue", - "/tmp/mux/edited_images/image-edit-tool-1/image-1.png", - { - images: [ - { - path: "/tmp/mux/edited_images/image-edit-tool-1/image-1.png", - filename: "image-1.png", - mediaType: "image/png", - }, - ], - } - ), - error: "Expected malformed image edit to remain a tool row", - }, - { - name: "keeps malformed successful image_generate output as a normal tool row", - toolName: "image_generate", - toolCallId: "image-tool-malformed", - input: { prompt: "A small blue square" }, - output: imageGenerateOutput("A small blue square", "", { images: [null] }), - expectedStatus: "completed", - error: "Expected malformed image generation to remain a tool row", - }, - { - name: "keeps non-string image_generate warnings as a normal tool row", - toolName: "image_generate", - toolCallId: "image-tool-bad-warnings", - input: { prompt: "A small blue square" }, - output: imageGenerateOutput( - "A small blue square", - "/tmp/mux/generated_images/image-tool-1/image-1.png", - { - warnings: "thumbnail warning", - } - ), - error: "Expected bad image warnings to remain a tool row", - }, - { - name: "keeps successful image_generate output as a normal tool row when the message is partial", - toolName: "image_generate", - toolCallId: "image-tool-partial", - input: { prompt: "A small blue square" }, - output: imageGenerateOutput( - "A small blue square", - "/tmp/mux/generated_images/image-tool-1/image-1.png" - ), - partial: true, - expectedStatus: "completed", - error: "Expected partial image generation to remain a tool row", - }, - { - name: "keeps failed image_edit output as a normal tool row", - toolName: "image_edit", - toolCallId: "image-edit-tool-failed", - input: { sourcePath: "/tmp/source.png", prompt: "Make the square blue" }, - output: { success: false, error: "Image editing requires upload consent." }, - expectedStatus: "failed", - error: "Expected failed image edit to remain a tool row", - }, - { - name: "keeps failed image_generate output as a normal tool row", - toolName: "image_generate", - toolCallId: "image-tool-failed", - input: { prompt: "A small blue square" }, - output: { success: false, error: "Image generation requires an OpenAI API key." }, - expectedStatus: "failed", - error: "Expected failed image generation to remain a tool row", - }, - ] as const; - - for (const scenario of toolRowScenarios) { - test(scenario.name, () => { - const displayed = displayedFromTool(scenario.toolName, scenario.input, scenario.output, { - toolCallId: scenario.toolCallId, - partial: "partial" in scenario ? scenario.partial : undefined, - }); - - expect(displayed).toHaveLength(1); - expect(displayed[0]?.type).toBe("tool"); - if (displayed[0]?.type !== "tool") { - throw new Error(scenario.error); - } - expect(displayed[0].toolName).toBe(scenario.toolName); - if ("expectedStatus" in scenario) { - expect(displayed[0].status).toBe(scenario.expectedStatus); - } - }); - } - }); - describe("init state reference stability", () => { test("should return new array reference when state changes", async () => { const aggregator = new StreamingMessageAggregator(TEST_CREATED_AT); diff --git a/src/browser/utils/messages/applyToolOutputRedaction.test.ts b/src/browser/utils/messages/applyToolOutputRedaction.test.ts index 680704419c..188fa5eba6 100644 --- a/src/browser/utils/messages/applyToolOutputRedaction.test.ts +++ b/src/browser/utils/messages/applyToolOutputRedaction.test.ts @@ -3,36 +3,22 @@ import type { MuxMessage } from "@/common/types/message"; import { applyToolOutputRedaction } from "./applyToolOutputRedaction"; describe("applyToolOutputRedaction", () => { - it("strips image generation thumbnails from provider-bound tool output", () => { + it("strips UI-only fields from provider-bound tool output", () => { const messages: MuxMessage[] = [ { id: "assistant-1", - role: "assistant" as const, + role: "assistant", parts: [ { - type: "dynamic-tool" as const, - toolCallId: "image-tool-1", - toolName: "image_generate", + type: "dynamic-tool", + toolCallId: "tool-1", + toolName: "ask_user_question", input: {}, - state: "output-available" as const, + state: "output-available", output: { success: true, - model: "openai:gpt-image-1.5", - prompt: "square", - requestedCount: 1, - images: [ - { - path: "/tmp/image.png", - filename: "image.png", - mediaType: "image/png", - thumbnail: { - data: "large-base64", - mediaType: "image/webp", - width: 512, - height: 512, - }, - }, - ], + answer: "continue", + ui_only: { ask_user_question: { questions: [], answers: {} } }, }, }, ], @@ -42,36 +28,30 @@ describe("applyToolOutputRedaction", () => { const result = applyToolOutputRedaction(messages); const part = result[0]?.parts[0]; if (part?.type !== "dynamic-tool" || part.state !== "output-available") { - throw new Error("Expected image generation tool output"); + throw new Error("Expected dynamic tool output"); } - expect(part.output).toEqual({ - success: true, - model: "openai:gpt-image-1.5", - prompt: "square", - requestedCount: 1, - images: [ - { - path: "/tmp/image.png", - filename: "image.png", - mediaType: "image/png", - }, - ], - }); + + expect(part.output).toEqual({ success: true, answer: "continue" }); }); - it("strips image generation thumbnails from nested code execution tool calls", () => { + it("scrubs legacy image tool payloads before replaying history to providers", () => { const imageResult = { success: true, model: "openai:gpt-image-2", prompt: "square", requestedCount: 1, + source: { + path: "/tmp/source.png", + resolvedPath: "/home/user/project/source.png", + sizeBytes: 100, + }, images: [ { path: "/tmp/image.png", filename: "image.png", mediaType: "image/png", thumbnail: { - data: "nested-large-base64", + data: "large-base64", mediaType: "image/webp", width: 512, height: 512, @@ -82,23 +62,20 @@ describe("applyToolOutputRedaction", () => { const messages: MuxMessage[] = [ { id: "assistant-1", - role: "assistant" as const, + role: "assistant", parts: [ { - type: "dynamic-tool" as const, + type: "dynamic-tool", toolCallId: "code-execution-1", toolName: "code_execution", input: {}, - state: "output-available" as const, + state: "output-available", output: { success: true, - result: "done", toolCalls: [ { toolName: "image_generate", - args: { prompt: "square" }, result: imageResult, - duration_ms: 12, }, ], }, @@ -107,7 +84,7 @@ describe("applyToolOutputRedaction", () => { toolCallId: "nested-image-1", toolName: "image_generate", input: { prompt: "square" }, - state: "output-available" as const, + state: "output-available", output: imageResult, }, ], @@ -119,23 +96,25 @@ describe("applyToolOutputRedaction", () => { const result = applyToolOutputRedaction(messages); const part = result[0]?.parts[0]; if (part?.type !== "dynamic-tool" || part.state !== "output-available") { - throw new Error("Expected code execution tool output"); + throw new Error("Expected dynamic tool output"); } + expect(part.output).toEqual({ success: true, - result: "done", toolCalls: [ { toolName: "image_generate", - args: { prompt: "square" }, result: { success: true, model: "openai:gpt-image-2", prompt: "square", requestedCount: 1, + source: { + path: "/tmp/source.png", + sizeBytes: 100, + }, images: [{ path: "/tmp/image.png", filename: "image.png", mediaType: "image/png" }], }, - duration_ms: 12, }, ], }); @@ -144,32 +123,36 @@ describe("applyToolOutputRedaction", () => { model: "openai:gpt-image-2", prompt: "square", requestedCount: 1, + source: { + path: "/tmp/source.png", + sizeBytes: 100, + }, images: [{ path: "/tmp/image.png", filename: "image.png", mediaType: "image/png" }], }); }); - it("redacts binary-like provider error strings from tool output sent to models", () => { + it("sanitizes binary-like provider output strings for top-level and nested tools", () => { const messages: MuxMessage[] = [ { id: "assistant-1", - role: "assistant" as const, + role: "assistant", parts: [ { - type: "dynamic-tool" as const, - toolCallId: "image-edit-1", - toolName: "image_edit", + type: "dynamic-tool", + toolCallId: "tool-1", + toolName: "example_tool", input: {}, - state: "output-available" as const, + state: "output-available", output: { success: false, error: "Invalid JSON response: \u001b\u0000\ufffdpayload", }, nestedCalls: [ { - toolCallId: "nested-image-edit-1", - toolName: "image_edit", + toolCallId: "nested-tool-1", + toolName: "nested_tool", input: {}, - state: "output-available" as const, + state: "output-available", output: { success: false, error: "Nested bad body \u0000", @@ -184,7 +167,7 @@ describe("applyToolOutputRedaction", () => { const result = applyToolOutputRedaction(messages); const part = result[0]?.parts[0]; if (part?.type !== "dynamic-tool" || part.state !== "output-available") { - throw new Error("Expected image edit tool output"); + throw new Error("Expected dynamic tool output"); } const output = part.output as { success?: unknown; error?: unknown }; diff --git a/src/browser/utils/messages/applyToolOutputRedaction.ts b/src/browser/utils/messages/applyToolOutputRedaction.ts index a0c99e42de..924a3a4ca1 100644 --- a/src/browser/utils/messages/applyToolOutputRedaction.ts +++ b/src/browser/utils/messages/applyToolOutputRedaction.ts @@ -3,10 +3,69 @@ * Produces a cloned array safe for sending to providers without touching persisted history/UI. */ import type { MuxMessage } from "@/common/types/message"; -import { stripImageToolOutputForModel } from "@/common/utils/imageGenerationToolResult"; import { sanitizeUnknownForProviderOutput } from "@/common/utils/providerOutputSanitization"; import { stripToolOutputUiOnly } from "@/common/utils/tools/toolOutputUiOnly"; +function isRecord(value: unknown): value is Record { + return typeof value === "object" && value !== null && !Array.isArray(value); +} + +function stripThumbnailFromLegacyImage(image: unknown): unknown { + if (!isRecord(image)) { + return image; + } + + const stripped: Record = {}; + for (const [key, value] of Object.entries(image)) { + if (key !== "thumbnail") { + stripped[key] = value; + } + } + return stripped; +} + +function stripResolvedSourcePath(source: unknown): unknown { + if (!isRecord(source)) { + return source; + } + + const stripped: Record = {}; + for (const [key, value] of Object.entries(source)) { + if (key !== "resolvedPath") { + stripped[key] = value; + } + } + return stripped; +} + +function stripLegacyImageToolOutputForModel(output: unknown): unknown { + if (Array.isArray(output)) { + return output.map(stripLegacyImageToolOutputForModel); + } + if (!isRecord(output)) { + return output; + } + + const images = output.images; + const isLegacyImageToolSuccess = output.success === true && Array.isArray(images); + const record: Record = isLegacyImageToolSuccess + ? { + ...output, + images: images.map(stripThumbnailFromLegacyImage), + ...(isRecord(output.source) ? { source: stripResolvedSourcePath(output.source) } : {}), + } + : output; + + const stripped: Record = {}; + for (const [key, value] of Object.entries(record)) { + stripped[key] = + isLegacyImageToolSuccess && key === "images" + ? value + : stripLegacyImageToolOutputForModel(value); + } + return stripped; +} + export function applyToolOutputRedaction(messages: MuxMessage[]): MuxMessage[] { return messages.map((msg) => { if (msg.role !== "assistant") return msg; @@ -17,7 +76,7 @@ export function applyToolOutputRedaction(messages: MuxMessage[]): MuxMessage[] { const outputWithoutUiOnly = stripToolOutputUiOnly(part.output); const sanitizedOutput = sanitizeUnknownForProviderOutput( - stripImageToolOutputForModel(outputWithoutUiOnly) + stripLegacyImageToolOutputForModel(outputWithoutUiOnly) ); const nestedCalls = part.nestedCalls?.map((nestedCall) => { if (nestedCall.state !== "output-available") { @@ -27,7 +86,7 @@ export function applyToolOutputRedaction(messages: MuxMessage[]): MuxMessage[] { return { ...nestedCall, output: sanitizeUnknownForProviderOutput( - stripImageToolOutputForModel(nestedOutputWithoutUiOnly) + stripLegacyImageToolOutputForModel(nestedOutputWithoutUiOnly) ), }; }); diff --git a/src/browser/utils/messages/buildSendMessageOptions.ts b/src/browser/utils/messages/buildSendMessageOptions.ts index 066e669bef..a8f7cb62e0 100644 --- a/src/browser/utils/messages/buildSendMessageOptions.ts +++ b/src/browser/utils/messages/buildSendMessageOptions.ts @@ -8,7 +8,6 @@ export interface ExperimentValues { programmaticToolCallingExclusive: boolean | undefined; advisorTool: boolean | undefined; execSubagentHardRestart: boolean | undefined; - imageGenerationTool: boolean | undefined; } export interface SendMessageOptionsInput { diff --git a/src/browser/utils/messages/displayedMessageBuilder.ts b/src/browser/utils/messages/displayedMessageBuilder.ts index b8cb6b4b18..8e654be526 100644 --- a/src/browser/utils/messages/displayedMessageBuilder.ts +++ b/src/browser/utils/messages/displayedMessageBuilder.ts @@ -8,12 +8,6 @@ import type { } from "@/common/types/message"; import { getCompactionFollowUpContent } from "@/common/types/message"; import type { StreamErrorType } from "@/common/types/errors"; -import { extractHookOutput } from "@/common/utils/tools/hookOutput"; -import type { ImageEditToolResult, ImageGenerateToolResult } from "@/common/types/tools"; -import { - ImageEditToolResultSchema, - ImageGenerateToolResultSchema, -} from "@/common/utils/tools/toolDefinitions"; import { GOAL_BUDGET_LIMIT_KIND, GOAL_CONTINUATION_KIND } from "@/constants/goals"; import { getFollowUpContentText } from "@/browser/utils/compaction/format"; import { getGoalClearedSummaryDisplayText } from "@/common/utils/goalClearedSummaryDisplay"; @@ -28,87 +22,6 @@ import { isSideQuestionUserMessage, } from "@/common/utils/messages/sideQuestion"; -function isSuccessfulImageGenerateResult( - result: unknown -): result is Extract { - const parsed = ImageGenerateToolResultSchema.safeParse(result); - return parsed.success && parsed.data.success; -} - -function isSuccessfulImageEditResult( - result: unknown -): result is Extract { - const parsed = ImageEditToolResultSchema.safeParse(result); - return parsed.success && parsed.data.success; -} - -function hasVisibleHookOutput(result: unknown): boolean { - return extractHookOutput(result) !== null; -} - -function appendGeneratedImageMessage( - displayedMessages: DisplayedMessage[], - options: { - id: string; - historyId: string; - toolCallId: string; - output: Extract; - isPartial: boolean; - historySequence: number; - streamSequence: number; - isLastPartOfMessage: boolean; - timestamp?: number; - } -): void { - displayedMessages.push({ - type: "generated-image", - id: options.id, - historyId: options.historyId, - toolCallId: options.toolCallId, - prompt: options.output.prompt, - model: options.output.model, - images: options.output.images, - warnings: options.output.warnings, - isPartial: options.isPartial, - historySequence: options.historySequence, - streamSequence: options.streamSequence, - isLastPartOfMessage: options.isLastPartOfMessage, - timestamp: options.timestamp, - }); -} - -function appendEditedImageMessage( - displayedMessages: DisplayedMessage[], - options: { - id: string; - historyId: string; - toolCallId: string; - output: Extract; - isPartial: boolean; - historySequence: number; - streamSequence: number; - isLastPartOfMessage: boolean; - timestamp?: number; - } -): void { - displayedMessages.push({ - type: "edited-image", - id: options.id, - historyId: options.historyId, - toolCallId: options.toolCallId, - prompt: options.output.prompt, - model: options.output.model, - source: options.output.source, - images: options.output.images, - warnings: options.output.warnings, - isPartial: options.isPartial, - historySequence: options.historySequence, - streamSequence: options.streamSequence, - isLastPartOfMessage: options.isLastPartOfMessage, - timestamp: options.timestamp, - }); -} - /** * Check if a tool result indicates success (for tools that return { success: boolean }) */ @@ -550,112 +463,6 @@ function getNestedCallsForDisplay(part: DynamicToolPart): NestedToolCalls | unde return part.nestedCalls ?? reconstructCodeExecutionNestedCalls(part); } -type NestedImageMessage = - | { - kind: "generated"; - nestedCall: { - toolCallId: string; - timestamp?: number; - output: Extract; - }; - } - | { - kind: "edited"; - nestedCall: { - toolCallId: string; - timestamp?: number; - output: Extract; - }; - }; - -function collectNestedImageMessages( - nestedCalls: NestedToolCalls | undefined -): NestedImageMessage[] { - const nestedImageMessages: NestedImageMessage[] = []; - if (!nestedCalls) { - return nestedImageMessages; - } - - for (const nestedCall of nestedCalls) { - if ( - nestedCall.toolName === "image_generate" && - nestedCall.state === "output-available" && - !hasVisibleHookOutput(nestedCall.output) && - isSuccessfulImageGenerateResult(nestedCall.output) - ) { - nestedImageMessages.push({ - kind: "generated", - nestedCall: { - toolCallId: nestedCall.toolCallId, - timestamp: nestedCall.timestamp, - output: nestedCall.output, - }, - }); - continue; - } - if ( - nestedCall.toolName === "image_edit" && - nestedCall.state === "output-available" && - !hasVisibleHookOutput(nestedCall.output) && - isSuccessfulImageEditResult(nestedCall.output) - ) { - nestedImageMessages.push({ - kind: "edited", - nestedCall: { - toolCallId: nestedCall.toolCallId, - timestamp: nestedCall.timestamp, - output: nestedCall.output, - }, - }); - } - } - - return nestedImageMessages; -} - -function appendNestedImageRows( - displayedMessages: DisplayedMessage[], - options: { - message: MuxMessage; - part: DynamicToolPart; - partIndex: number; - nestedImageMessages: NestedImageMessage[]; - isPartial: boolean; - historySequence: number; - isLastPartOfMessage: boolean; - baseTimestamp?: number; - nextStreamSequence: () => number; - } -): void { - options.nestedImageMessages.forEach(({ kind, nestedCall }, nestedIndex) => { - const isLastNestedImage = nestedIndex === options.nestedImageMessages.length - 1; - const common = { - historyId: options.message.id, - toolCallId: nestedCall.toolCallId, - isPartial: options.isPartial, - historySequence: options.historySequence, - streamSequence: options.nextStreamSequence(), - isLastPartOfMessage: options.isLastPartOfMessage && isLastNestedImage, - timestamp: nestedCall.timestamp ?? options.part.timestamp ?? options.baseTimestamp, - }; - - if (kind === "generated") { - appendGeneratedImageMessage(displayedMessages, { - ...common, - id: `${options.message.id}-${options.partIndex}-nested-image-${nestedIndex}`, - output: nestedCall.output, - }); - return; - } - - appendEditedImageMessage(displayedMessages, { - ...common, - id: `${options.message.id}-${options.partIndex}-nested-edited-image-${nestedIndex}`, - output: nestedCall.output, - }); - }); -} - function appendToolRows( displayedMessages: DisplayedMessage[], options: { @@ -672,58 +479,6 @@ function appendToolRows( const { message, part } = options; const status = getToolDisplayStatus(part, options.isPartial); const nestedCalls = getNestedCallsForDisplay(part); - const nestedImageMessages = options.isPartial ? [] : collectNestedImageMessages(nestedCalls); - const nestedImageMessageIds = new Set( - nestedImageMessages.map(({ nestedCall }) => nestedCall.toolCallId) - ); - const nestedCallsForToolRow = nestedCalls?.filter( - (nestedCall) => !nestedImageMessageIds.has(nestedCall.toolCallId) - ); - - if ( - part.toolName === "image_generate" && - part.state === "output-available" && - status === "completed" && - !options.isPartial && - !hasVisibleHookOutput(part.output) && - isSuccessfulImageGenerateResult(part.output) - ) { - appendGeneratedImageMessage(displayedMessages, { - id: `${message.id}-${options.partIndex}`, - historyId: message.id, - toolCallId: part.toolCallId, - output: part.output, - isPartial: options.isPartial, - historySequence: options.historySequence, - streamSequence: options.nextStreamSequence(), - isLastPartOfMessage: options.isLastPartOfMessage, - timestamp: part.timestamp ?? options.baseTimestamp, - }); - return; - } - - if ( - part.toolName === "image_edit" && - part.state === "output-available" && - status === "completed" && - !options.isPartial && - !hasVisibleHookOutput(part.output) && - isSuccessfulImageEditResult(part.output) - ) { - appendEditedImageMessage(displayedMessages, { - id: `${message.id}-${options.partIndex}`, - historyId: message.id, - toolCallId: part.toolCallId, - output: part.output, - isPartial: options.isPartial, - historySequence: options.historySequence, - streamSequence: options.nextStreamSequence(), - isLastPartOfMessage: options.isLastPartOfMessage, - timestamp: part.timestamp ?? options.baseTimestamp, - }); - return; - } - displayedMessages.push({ type: "tool", id: `${message.id}-${options.partIndex}`, @@ -736,14 +491,9 @@ function appendToolRows( isPartial: options.isPartial, historySequence: options.historySequence, streamSequence: options.nextStreamSequence(), - isLastPartOfMessage: options.isLastPartOfMessage && nestedImageMessages.length === 0, + isLastPartOfMessage: options.isLastPartOfMessage, timestamp: part.timestamp ?? options.baseTimestamp, - nestedCalls: nestedCallsForToolRow, - }); - - appendNestedImageRows(displayedMessages, { - ...options, - nestedImageMessages, + nestedCalls, }); } diff --git a/src/browser/utils/messages/sendOptions.ts b/src/browser/utils/messages/sendOptions.ts index ae098b0883..90830fc7fc 100644 --- a/src/browser/utils/messages/sendOptions.ts +++ b/src/browser/utils/messages/sendOptions.ts @@ -84,7 +84,6 @@ export function getSendOptionsFromStorage(workspaceId: string): SendMessageOptio ), advisorTool: isExperimentEnabled(EXPERIMENT_IDS.ADVISOR_TOOL), execSubagentHardRestart: isExperimentEnabled(EXPERIMENT_IDS.EXEC_SUBAGENT_HARD_RESTART), - imageGenerationTool: isExperimentEnabled(EXPERIMENT_IDS.IMAGE_GENERATION_TOOL), }, }); } diff --git a/src/browser/utils/messages/transcriptRenderProjection.test.ts b/src/browser/utils/messages/transcriptRenderProjection.test.ts index 85c3dfdcef..4180b84969 100644 --- a/src/browser/utils/messages/transcriptRenderProjection.test.ts +++ b/src/browser/utils/messages/transcriptRenderProjection.test.ts @@ -91,50 +91,6 @@ function streamError(id: string, historyId: string): DisplayedMessage & { type: }; } -function generatedImage( - id: string, - historyId: string -): DisplayedMessage & { type: "generated-image" } { - return { - type: "generated-image", - id, - historyId, - toolCallId: `call-${id}`, - prompt: "Draw a chart", - model: "image-model", - images: [{ path: "/tmp/chart.png", filename: "chart.png", mediaType: "image/png" }], - historySequence: 1, - isPartial: false, - }; -} - -function editedImage(id: string, historyId: string): DisplayedMessage & { type: "edited-image" } { - return { - type: "edited-image", - id, - historyId, - toolCallId: `call-${id}`, - prompt: "Adjust the chart", - model: "image-model", - source: { - path: "/tmp/chart.png", - resolvedPath: "/tmp/chart.png", - sizeBytes: 100, - dimensions: { width: 10, height: 10 }, - }, - images: [ - { - path: "/tmp/chart-edited.png", - filename: "chart-edited.png", - mediaType: "image/png", - outputDimensions: { width: 10, height: 10 }, - }, - ], - historySequence: 1, - isPartial: false, - }; -} - function planDisplay(id: string, historyId: string): DisplayedMessage & { type: "plan-display" } { return { type: "plan-display", @@ -531,14 +487,12 @@ describe("work bundle coalescing", () => { expect(infos[5]).toMatchObject({ key: "work:search-1", position: "final" }); }); - test("keeps visible artifacts and stream errors out of work bundles", () => { + test("keeps stream errors and plan displays out of work bundles", () => { const historyId = "history-a1"; const messages = [ reasoning({ id: "think-1", historyId }), tool({ id: "read-1", historyId }), streamError("error-1", historyId), - generatedImage("generated-1", historyId), - editedImage("edited-1", historyId), planDisplay("plan-1", historyId), ]; diff --git a/src/common/config/schemas/appConfigOnDisk.test.ts b/src/common/config/schemas/appConfigOnDisk.test.ts index e88deab005..d2270db376 100644 --- a/src/common/config/schemas/appConfigOnDisk.test.ts +++ b/src/common/config/schemas/appConfigOnDisk.test.ts @@ -69,30 +69,10 @@ describe("AppConfigOnDiskSchema", () => { ).toBe(true); }); - it("accepts sparse configs without image generation settings", () => { + it("accepts sparse configs", () => { expect(AppConfigOnDiskSchema.safeParse({ defaultModel: "openai:gpt-4o" }).success).toBe(true); }); - it("validates image generation configuration limits", () => { - expect( - AppConfigOnDiskSchema.safeParse({ - imageGeneration: { - modelString: "openai:gpt-image-1.5", - maxImagesPerCall: 10, - }, - }).success - ).toBe(true); - - expect( - AppConfigOnDiskSchema.safeParse({ - imageGeneration: { - modelString: "openai:gpt-image-1.5", - maxImagesPerCall: 11, - }, - }).success - ).toBe(false); - }); - it("preserves unknown fields via passthrough", () => { const valid = { futureField: "something" }; diff --git a/src/common/config/schemas/appConfigOnDisk.ts b/src/common/config/schemas/appConfigOnDisk.ts index 3af551ee00..57b60fb894 100644 --- a/src/common/config/schemas/appConfigOnDisk.ts +++ b/src/common/config/schemas/appConfigOnDisk.ts @@ -9,11 +9,6 @@ import { WORKTREE_ARCHIVE_BEHAVIORS } from "../worktreeArchiveBehavior"; import { TaskSettingsSchema } from "./taskSettings"; import { HEARTBEAT_MAX_INTERVAL_MS, HEARTBEAT_MIN_INTERVAL_MS } from "@/constants/heartbeat"; import { DEFAULT_GOAL_DEFAULTS } from "@/constants/goals"; -import { - DEFAULT_IMAGE_GENERATION_MAX_IMAGES, - MAX_IMAGE_GENERATION_MAX_IMAGES, - MIN_IMAGE_GENERATION_MAX_IMAGES, -} from "@/common/types/imageGeneration"; export { RuntimeEnablementOverridesSchema } from "../../schemas/runtimeEnablement"; export type { RuntimeEnablementOverrides } from "../../schemas/runtimeEnablement"; @@ -57,20 +52,6 @@ export const AppConfigMigrationsSchema = z.object({ execSubagentDefaultsSplit: z.boolean().optional(), }); -export const ImageGenerationConfigSchema = z - .object({ - modelString: z.string().optional(), - maxImagesPerCall: z - .number() - .int() - .min(MIN_IMAGE_GENERATION_MAX_IMAGES) - .max(MAX_IMAGE_GENERATION_MAX_IMAGES) - .default(DEFAULT_IMAGE_GENERATION_MAX_IMAGES) - .optional(), - allowImageUploadsForEditing: z.boolean().default(false).optional(), - }) - .optional(); - export const FeatureFlagOverrideSchema = z.enum(["default", "on", "off"]); export const UpdateChannelSchema = z.enum(["stable", "nightly"]); @@ -111,7 +92,6 @@ export const AppConfigOnDiskSchema = z advisorMaxOutputTokens: z.number().int().positive().nullable().optional(), hiddenModels: z.array(z.string()).optional(), preferredCompactionModel: z.string().optional(), - imageGeneration: ImageGenerationConfigSchema.optional(), agentAiDefaults: AgentAiDefaultsSchema.optional(), /** * Sparse per-agent override that wins over agentAiDefaults when an agent runs as a diff --git a/src/common/constants/experiments.ts b/src/common/constants/experiments.ts index e2cbe9b489..eebb22e742 100644 --- a/src/common/constants/experiments.ts +++ b/src/common/constants/experiments.ts @@ -14,7 +14,6 @@ export const EXPERIMENT_IDS = { MULTI_PROJECT_WORKSPACES: "multi-project-workspaces", AGENT_BROWSER: "agent-browser", ADVISOR_TOOL: "advisor-tool", - IMAGE_GENERATION_TOOL: "image-generation-tool", WORKSPACE_HEARTBEATS: "workspace-heartbeats", PORTABLE_DESKTOP: "portable-desktop", } as const; @@ -115,15 +114,6 @@ export const EXPERIMENTS: Record = { userOverridable: true, showInSettings: true, }, - [EXPERIMENT_IDS.IMAGE_GENERATION_TOOL]: { - id: EXPERIMENT_IDS.IMAGE_GENERATION_TOOL, - name: "Image Tools", - description: - "Enable experimental agent tools for generating and editing image artifacts with a separately configured image model", - enabledByDefault: false, - userOverridable: true, - showInSettings: true, - }, [EXPERIMENT_IDS.WORKSPACE_HEARTBEATS]: { id: EXPERIMENT_IDS.WORKSPACE_HEARTBEATS, name: "Workspace Heartbeats", diff --git a/src/common/orpc/schemas/api.test.ts b/src/common/orpc/schemas/api.test.ts index 05d1b4cb51..7e5c7a13b9 100644 --- a/src/common/orpc/schemas/api.test.ts +++ b/src/common/orpc/schemas/api.test.ts @@ -206,21 +206,6 @@ describe("ProviderConfigInfoSchema conformance", () => { }); }); -describe("config imageGeneration schema", () => { - it("preserves image upload consent across get and update payloads", () => { - const full = { - modelString: "openai:gpt-image-1.5", - maxImagesPerCall: 2, - allowImageUploadsForEditing: true, - }; - - expect(config.getConfig.output.shape.imageGeneration.parse(full)).toEqual(full); - expect(config.updateImageGenerationConfig.input.parse({ imageGeneration: full })).toEqual({ - imageGeneration: full, - }); - }); -}); - describe("workspace.getProjectGitStatuses schema", () => { it("accepts omitted and null baseRef values", () => { expect(workspace.getProjectGitStatuses.input.safeParse({ workspaceId: "ws" }).success).toBe( diff --git a/src/common/orpc/schemas/api.ts b/src/common/orpc/schemas/api.ts index 554810e4e4..5e6fa705d9 100644 --- a/src/common/orpc/schemas/api.ts +++ b/src/common/orpc/schemas/api.ts @@ -6,10 +6,6 @@ import { WORKTREE_ARCHIVE_BEHAVIORS } from "@/common/config/worktreeArchiveBehav import { HEARTBEAT_MAX_INTERVAL_MS, HEARTBEAT_MIN_INTERVAL_MS } from "@/constants/heartbeat"; import { DEFAULT_GOAL_DEFAULTS } from "@/constants/goals"; import { EXPERIMENT_IDS } from "@/common/constants/experiments"; -import { - MAX_IMAGE_GENERATION_MAX_IMAGES, - MIN_IMAGE_GENERATION_MAX_IMAGES, -} from "@/common/types/imageGeneration"; import { ChatStatsSchema, SessionUsageFileSchema } from "./chatStats"; import { AdditionalSystemContextSchema, WorkspaceInstructionsSchema } from "./instructions"; import { @@ -1942,16 +1938,6 @@ const AdvisorModelStringSchema = z.string().nullable(); const AdvisorThinkingLevelSchema = ThinkingLevelSchema.nullable(); const AdvisorMaxUsesPerTurnSchema = z.number().int().positive().nullable(); const AdvisorMaxOutputTokensSchema = z.number().int().positive().nullable(); -const ImageGenerationConfigSchema = z.object({ - modelString: z.string(), - maxImagesPerCall: z - .number() - .int() - .min(MIN_IMAGE_GENERATION_MAX_IMAGES) - .max(MAX_IMAGE_GENERATION_MAX_IMAGES), - allowImageUploadsForEditing: z.boolean(), -}); - const GoalDefaultsConfigSchema = z.object({ defaultBudgetCents: z .number() @@ -1992,7 +1978,6 @@ export const config = { advisorThinkingLevel: AdvisorThinkingLevelSchema, advisorMaxUsesPerTurn: AdvisorMaxUsesPerTurnSchema.optional(), advisorMaxOutputTokens: AdvisorMaxOutputTokensSchema.optional(), - imageGeneration: ImageGenerationConfigSchema, hiddenModels: z.array(z.string()).optional(), coderWorkspaceArchiveBehavior: z.enum(CODER_ARCHIVE_BEHAVIORS), worktreeArchiveBehavior: z.enum(WORKTREE_ARCHIVE_BEHAVIORS), @@ -2049,12 +2034,6 @@ export const config = { }), output: z.void(), }, - updateImageGenerationConfig: { - input: z.object({ - imageGeneration: ImageGenerationConfigSchema, - }), - output: z.void(), - }, updateModelPreferences: { input: z.object({ defaultModel: z.string().optional(), diff --git a/src/common/orpc/schemas/stream.ts b/src/common/orpc/schemas/stream.ts index 868e41cb75..7eb0c9d4c4 100644 --- a/src/common/orpc/schemas/stream.ts +++ b/src/common/orpc/schemas/stream.ts @@ -671,7 +671,6 @@ export const ExperimentsSchema = z.object({ programmaticToolCallingExclusive: z.boolean().optional(), advisorTool: z.boolean().optional(), execSubagentHardRestart: z.boolean().optional(), - imageGenerationTool: z.boolean().optional(), }); /** diff --git a/src/common/schemas/project.ts b/src/common/schemas/project.ts index 3288e95986..0de08229a5 100644 --- a/src/common/schemas/project.ts +++ b/src/common/schemas/project.ts @@ -135,7 +135,6 @@ export const WorkspaceConfigSchema = z.object({ programmaticToolCalling: z.boolean().optional(), programmaticToolCallingExclusive: z.boolean().optional(), advisorTool: z.boolean().optional(), - imageGenerationTool: z.boolean().optional(), execSubagentHardRestart: z.boolean().optional(), }) .optional() diff --git a/src/common/types/imageGeneration.test.ts b/src/common/types/imageGeneration.test.ts deleted file mode 100644 index f2cdf656eb..0000000000 --- a/src/common/types/imageGeneration.test.ts +++ /dev/null @@ -1,13 +0,0 @@ -import { describe, expect, test } from "bun:test"; - -import { normalizeImageGenerationConfig } from "./imageGeneration"; - -describe("normalizeImageGenerationConfig", () => { - test("defaults image upload consent off and preserves explicit consent", () => { - expect(normalizeImageGenerationConfig(undefined).allowImageUploadsForEditing).toBe(false); - expect( - normalizeImageGenerationConfig({ allowImageUploadsForEditing: true }) - .allowImageUploadsForEditing - ).toBe(true); - }); -}); diff --git a/src/common/types/imageGeneration.ts b/src/common/types/imageGeneration.ts deleted file mode 100644 index 7728db8d66..0000000000 --- a/src/common/types/imageGeneration.ts +++ /dev/null @@ -1,48 +0,0 @@ -import assert from "@/common/utils/assert"; - -export const DEFAULT_IMAGE_GENERATION_MODEL = "openai:gpt-image-2"; -export const PINNED_IMAGE_GENERATION_MODEL = "openai:gpt-image-2-2026-04-21"; -export const DEFAULT_IMAGE_GENERATION_MAX_IMAGES = 4; -export const MIN_IMAGE_GENERATION_MAX_IMAGES = 1; -export const MAX_IMAGE_GENERATION_MAX_IMAGES = 10; - -export const IMAGE_GENERATION_QUALITY_VALUES = ["low", "medium", "high", "auto"] as const; -export type ImageGenerationQuality = (typeof IMAGE_GENERATION_QUALITY_VALUES)[number]; - -export const IMAGE_GENERATION_OUTPUT_FORMAT_VALUES = ["png", "jpeg", "webp"] as const; -export type ImageGenerationOutputFormat = (typeof IMAGE_GENERATION_OUTPUT_FORMAT_VALUES)[number]; - -export interface ImageGenerationConfig { - modelString: string; - maxImagesPerCall: number; - allowImageUploadsForEditing: boolean; -} - -export function clampImageGenerationMaxImages(value: number): number { - assert(Number.isFinite(value), "image generation maxImagesPerCall must be finite"); - return Math.min( - MAX_IMAGE_GENERATION_MAX_IMAGES, - Math.max(MIN_IMAGE_GENERATION_MAX_IMAGES, Math.trunc(value)) - ); -} - -export function normalizeImageGenerationConfig(value: unknown): ImageGenerationConfig { - const record = - typeof value === "object" && value !== null ? (value as Record) : {}; - - const rawModelString = record.modelString; - const modelString = - typeof rawModelString === "string" && rawModelString.trim().length > 0 - ? rawModelString.trim() - : DEFAULT_IMAGE_GENERATION_MODEL; - - const rawMaxImagesPerCall = record.maxImagesPerCall; - const maxImagesPerCall = - typeof rawMaxImagesPerCall === "number" && Number.isFinite(rawMaxImagesPerCall) - ? clampImageGenerationMaxImages(rawMaxImagesPerCall) - : DEFAULT_IMAGE_GENERATION_MAX_IMAGES; - - const allowImageUploadsForEditing = record.allowImageUploadsForEditing === true; - - return { modelString, maxImagesPerCall, allowImageUploadsForEditing }; -} diff --git a/src/common/types/message.ts b/src/common/types/message.ts index 94eca05120..863f9bf655 100644 --- a/src/common/types/message.ts +++ b/src/common/types/message.ts @@ -739,71 +739,6 @@ export type DisplayedMessage = timestamp?: number; }>; } - | { - type: "generated-image"; - id: string; - historyId: string; - toolCallId: string; - prompt: string; - model: string; - images: Array<{ - path: string; - filename: string; - mediaType: string; - thumbnail?: { - data: string; - mediaType: string; - width: number; - height: number; - }; - revisedPrompt?: string; - }>; - warnings?: string[]; - historySequence: number; - streamSequence?: number; - isPartial: boolean; - isLastPartOfMessage?: boolean; - timestamp?: number; - } - | { - type: "edited-image"; - id: string; - historyId: string; - toolCallId: string; - prompt: string; - model: string; - source: { - path: string; - resolvedPath: string; - sizeBytes: number; - dimensions: { - width: number; - height: number; - }; - }; - images: Array<{ - path: string; - filename: string; - mediaType: string; - outputDimensions: { - width: number; - height: number; - }; - thumbnail?: { - data: string; - mediaType: string; - width: number; - height: number; - }; - revisedPrompt?: string; - }>; - warnings?: string[]; - historySequence: number; - streamSequence?: number; - isPartial: boolean; - isLastPartOfMessage?: boolean; - timestamp?: number; - } | { type: "reasoning"; id: string; // Display ID for UI/React keys diff --git a/src/common/types/project.ts b/src/common/types/project.ts index 654675de98..3e14692a4c 100644 --- a/src/common/types/project.ts +++ b/src/common/types/project.ts @@ -18,7 +18,6 @@ import type { TaskSettings, SubagentAiDefaults } from "./tasks"; import type { LayoutPresetsConfig } from "./uiLayouts"; import type { ThinkingLevel } from "./thinking"; import type { GoalDefaults } from "@/constants/goals"; -import type { ImageGenerationConfig } from "./imageGeneration"; export type Workspace = z.infer; @@ -114,8 +113,6 @@ export interface ProjectsConfig { advisorMaxUsesPerTurn?: number | null; /** Positive max-output-tokens cap for advisor responses; null/undefined means unlimited. */ advisorMaxOutputTokens?: number | null; - /** Global image-generation defaults for the experimental image generation tool. */ - imageGeneration?: ImageGenerationConfig; /** * Hidden model IDs (shared via ~/.mux/config.json). * Mirrors the browser localStorage cache (HIDDEN_MODELS_KEY). diff --git a/src/common/types/tools.ts b/src/common/types/tools.ts index 807d5515aa..fb76014e9f 100644 --- a/src/common/types/tools.ts +++ b/src/common/types/tools.ts @@ -23,8 +23,6 @@ import type { MuxAgentsWriteToolResultSchema, FileReadToolResultSchema, AttachFileToolResultSchema, - ImageGenerateToolResultSchema, - ImageEditToolResultSchema, TaskToolResultSchema, TaskAwaitToolResultSchema, TaskApplyGitPatchToolResultSchema, @@ -40,14 +38,6 @@ export type BashToolArgs = z.infer; // BashToolResult derived from Zod schema (single source of truth) export type BashToolResult = z.infer; -// Image generation tool types, derived from schema (avoid drift) -export type ImageGenerateToolArgs = z.infer; -export type ImageGenerateToolResult = z.infer; - -// Image edit tool types, derived from schema (avoid drift) -export type ImageEditToolArgs = z.infer; -export type ImageEditToolResult = z.infer; - // File Read Tool Types, derived from schema (avoid drift) export type FileReadToolArgs = z.infer; diff --git a/src/common/utils/imageGenerationToolResult.test.ts b/src/common/utils/imageGenerationToolResult.test.ts deleted file mode 100644 index 6caabadf47..0000000000 --- a/src/common/utils/imageGenerationToolResult.test.ts +++ /dev/null @@ -1,33 +0,0 @@ -import { describe, expect, it } from "bun:test"; -import { stripImageToolOutputForModel } from "./imageGenerationToolResult"; - -describe("stripImageToolOutputForModel", () => { - it("bounds huge binary-looking failed image tool errors without mutating history objects", () => { - const hugeBinaryText = `${"\u0000\u0001\ufffd".repeat(20_000)}trailing detail`; - const output = { - success: false, - error: `Image editing failed: Invalid JSON response. Text: ${hugeBinaryText}`, - setupHint: "Check credentials.", - }; - - const stripped = stripImageToolOutputForModel(output); - - expect(output.error).toContain("trailing detail"); - expect(stripped).toMatchObject({ - success: false, - setupHint: "Check credentials.", - }); - if (typeof (stripped as { error?: unknown }).error !== "string") { - throw new Error("Expected stripped image error to remain a string"); - } - const strippedError = (stripped as { error: string }).error; - expect(strippedError).toContain("omitted binary image tool error"); - expect(strippedError.length).toBeLessThan(1_000); - }); - - it("keeps short failed image tool errors readable", () => { - const output = { success: false, error: "Image edit prompt is required." }; - - expect(stripImageToolOutputForModel(output)).toEqual(output); - }); -}); diff --git a/src/common/utils/imageGenerationToolResult.ts b/src/common/utils/imageGenerationToolResult.ts deleted file mode 100644 index bbca6ba8fa..0000000000 --- a/src/common/utils/imageGenerationToolResult.ts +++ /dev/null @@ -1,105 +0,0 @@ -function isRecord(value: unknown): value is Record { - return typeof value === "object" && value !== null && !Array.isArray(value); -} - -function isUnknownArray(value: unknown): value is unknown[] { - return Array.isArray(value); -} - -const IMAGE_TOOL_ERROR_MAX_CHARS = 4_096; -const IMAGE_TOOL_ERROR_PREFIX_CHARS = 700; - -function isProbablyBinaryText(value: string): boolean { - let suspiciousCharacters = 0; - const sample = value.slice(0, IMAGE_TOOL_ERROR_MAX_CHARS); - for (const char of sample) { - const codePoint = char.codePointAt(0); - if (codePoint == null) { - continue; - } - if (char === "\ufffd" || (codePoint < 32 && char !== "\n" && char !== "\r" && char !== "\t")) { - suspiciousCharacters += 1; - } - } - return suspiciousCharacters >= 8; -} - -// Provider/SDK response-shape bugs can decode image bytes as text; never replay -// megabyte-scale binary-looking errors back into model context or persisted tool output. -export function sanitizeImageToolErrorForModel(error: string): string { - if (error.length <= IMAGE_TOOL_ERROR_MAX_CHARS) { - return error; - } - - const prefix = error.slice(0, IMAGE_TOOL_ERROR_PREFIX_CHARS).trimEnd(); - const reason = isProbablyBinaryText(error) ? "binary" : "oversized"; - return `${prefix}\n[omitted ${reason} image tool error: original length ${error.length} characters]`; -} - -function stripFailedImageToolError(output: Record): Record { - if (output.success !== false || typeof output.error !== "string") { - return output; - } - - const sanitizedError = sanitizeImageToolErrorForModel(output.error); - if (sanitizedError === output.error) { - return output; - } - - return { ...output, error: sanitizedError }; -} - -function stripThumbnailFromImage(image: unknown): unknown { - if (!isRecord(image)) { - return image; - } - - const stripped: Record = {}; - for (const [key, value] of Object.entries(image)) { - if (key !== "thumbnail") { - stripped[key] = value; - } - } - return stripped; -} - -function stripResolvedSourcePath(source: unknown): unknown { - if (!isRecord(source)) { - return source; - } - - const stripped: Record = {}; - for (const [key, value] of Object.entries(source)) { - if (key !== "resolvedPath") { - stripped[key] = value; - } - } - return stripped; -} - -export function stripImageToolOutputForModel(output: unknown): unknown { - if (isUnknownArray(output)) { - return output.map(stripImageToolOutputForModel); - } - if (!isRecord(output)) { - return output; - } - - const images = output.images; - const stripsCurrentImageResult = output.success === true && isUnknownArray(images); - const record: Record = stripFailedImageToolError( - stripsCurrentImageResult - ? { - ...output, - images: images.map(stripThumbnailFromImage), - ...(isRecord(output.source) ? { source: stripResolvedSourcePath(output.source) } : {}), - } - : output - ); - const stripped: Record = {}; - for (const [key, value] of Object.entries(record)) { - stripped[key] = - stripsCurrentImageResult && key === "images" ? value : stripImageToolOutputForModel(value); - } - return stripped; -} diff --git a/src/common/utils/tokens/modelStats.test.ts b/src/common/utils/tokens/modelStats.test.ts index a94709f81f..70a975afb7 100644 --- a/src/common/utils/tokens/modelStats.test.ts +++ b/src/common/utils/tokens/modelStats.test.ts @@ -1,12 +1,11 @@ import { describe, expect, test } from "bun:test"; import type { ProvidersConfigMap } from "@/common/orpc/types"; import { KNOWN_MODELS } from "@/common/constants/knownModels"; -import { - DEFAULT_IMAGE_GENERATION_MODEL, - PINNED_IMAGE_GENERATION_MODEL, -} from "@/common/types/imageGeneration"; import { getModelStats, getModelStatsResolved, type ModelStats } from "./modelStats"; +const DEFAULT_IMAGE_MODEL = "openai:gpt-image-2"; +const PINNED_IMAGE_MODEL = "openai:gpt-image-2-2026-04-21"; + function expectStats(modelString: string): ModelStats { const stats = getModelStats(modelString); expect(stats).not.toBeNull(); @@ -109,12 +108,12 @@ describe("getModelStats", () => { }); test("resolves the default image generation model pricing", () => { - const stats = expectStats(DEFAULT_IMAGE_GENERATION_MODEL); + const stats = expectStats(DEFAULT_IMAGE_MODEL); expect(stats.input_cost_per_token).toBe(0.000005); expect(stats.cache_read_input_token_cost).toBe(0.00000125); expect(stats.output_cost_per_token).toBe(0.00003); - expect(expectStats(PINNED_IMAGE_GENERATION_MODEL)).toEqual(stats); + expect(expectStats(PINNED_IMAGE_MODEL)).toEqual(stats); }); test("returns null for unknown models across direct and gateway forms", () => { diff --git a/src/common/utils/tools/toolDefinitions.test.ts b/src/common/utils/tools/toolDefinitions.test.ts index 657f7fded9..9ba385c9ed 100644 --- a/src/common/utils/tools/toolDefinitions.test.ts +++ b/src/common/utils/tools/toolDefinitions.test.ts @@ -410,29 +410,6 @@ describe("TOOL_DEFINITIONS", () => { expect(tools).toContain("skills_catalog_read"); }); - it("includes image_generate only when image generation is enabled", () => { - expect(getAvailableTools("openai:gpt-5")).not.toContain("image_generate"); - expect(getAvailableTools("openai:gpt-5", { enableImageGeneration: true })).toContain( - "image_generate" - ); - }); - - it("includes image_edit only when image tools and upload consent are both enabled", () => { - expect(getAvailableTools("openai:gpt-5")).not.toContain("image_edit"); - expect(getAvailableTools("openai:gpt-5", { enableImageEditing: true })).not.toContain( - "image_edit" - ); - expect(getAvailableTools("openai:gpt-5", { enableImageGeneration: true })).not.toContain( - "image_edit" - ); - expect( - getAvailableTools("openai:gpt-5", { - enableImageGeneration: true, - enableImageEditing: true, - }) - ).toContain("image_edit"); - }); - it("agent_skill_write schema rejects an advertise tool argument (advertise is authored in content)", () => { const parsed = TOOL_DEFINITIONS.agent_skill_write.schema.safeParse({ name: "demo-skill", diff --git a/src/common/utils/tools/toolDefinitions.ts b/src/common/utils/tools/toolDefinitions.ts index 0dea3965ec..c96292cb33 100644 --- a/src/common/utils/tools/toolDefinitions.ts +++ b/src/common/utils/tools/toolDefinitions.ts @@ -47,11 +47,6 @@ import { zodToJsonSchema } from "zod-to-json-schema"; import { extractToolFilePath } from "@/common/utils/tools/toolInputFilePath"; import { TASK_VARIANT_PLACEHOLDER, TASK_GROUP_KIND_VALUES } from "@/common/utils/tools/taskGroups"; -import { - IMAGE_GENERATION_OUTPUT_FORMAT_VALUES, - IMAGE_GENERATION_QUALITY_VALUES, -} from "@/common/types/imageGeneration"; - // ----------------------------------------------------------------------------- // ask_user_question (plan-mode interactive questions) // ----------------------------------------------------------------------------- @@ -888,64 +883,6 @@ function renameAliasField( * Key = tool name, Value = { description, schema } */ export const TOOL_DEFINITIONS = { - image_generate: { - description: - "Generate raster image artifacts using Mux's experimental Image Tools configuration. " + - "Use only when the user explicitly asks to generate or create image artifacts. " + - "Do not call for ordinary code, design discussion, or prompt brainstorming. " + - "Generated full-resolution images are saved as runtime artifacts; copy selected final assets into the workspace when needed.", - schema: z - .object({ - prompt: z.string().min(1).describe("Prompt describing the image(s) to generate"), - n: z - .number() - .int() - .positive() - .nullish() - .describe( - "Number of images to generate. Defaults to 1 and must not exceed the user's configured Image Tools maximum." - ), - quality: z - .enum(IMAGE_GENERATION_QUALITY_VALUES) - .nullish() - .describe("Optional generation quality. Defaults to the provider/model default."), - outputFormat: z - .enum(IMAGE_GENERATION_OUTPUT_FORMAT_VALUES) - .nullish() - .describe("Optional output format. Defaults to png."), - }) - .strict(), - }, - image_edit: { - description: - "Edit one existing local PNG, JPEG, or WebP image using Mux's experimental Image Tools configuration. " + - "Use only when image editing is requested or clearly required by the current task; do not upload incidental image files. " + - "The source image is uploaded as-is, including embedded metadata, to the configured image provider. " + - "Do not edit images containing secrets or sensitive visual/metadata content. " + - "Edited full-resolution images are saved as runtime artifacts; copy selected final assets into the workspace when needed.", - schema: z - .object({ - sourcePath: z.string().min(1).describe("Path to the existing source image to edit"), - prompt: z.string().min(1).describe("Edit prompt describing the desired image changes"), - n: z - .number() - .int() - .positive() - .nullish() - .describe( - "Number of edited variants to create. Defaults to 1; request multiple variants only when the user asks or variants are clearly useful." - ), - quality: z - .enum(IMAGE_GENERATION_QUALITY_VALUES) - .nullish() - .describe("Optional edit quality. Defaults to the provider/model default."), - outputFormat: z - .enum(IMAGE_GENERATION_OUTPUT_FORMAT_VALUES) - .nullish() - .describe("Optional output format. Defaults to png."), - }) - .strict(), - }, bash: { description: "Execute a bash command with a configurable timeout. " + @@ -1825,70 +1762,6 @@ const TruncatedInfoSchema = z.object({ totalLines: z.number(), }); -const ImageToolThumbnailSchema = z.object({ - data: z.string(), - mediaType: z.string(), - width: z.number().int().positive(), - height: z.number().int().positive(), -}); - -const ImageToolDimensionsSchema = z.object({ - width: z.number().int().positive(), - height: z.number().int().positive(), -}); - -const ImageToolImageSchema = z.object({ - path: z.string(), - filename: z.string(), - mediaType: z.string(), - thumbnail: ImageToolThumbnailSchema.optional(), - revisedPrompt: z.string().optional(), -}); - -const ImageEditImageSchema = ImageToolImageSchema.extend({ - outputDimensions: ImageToolDimensionsSchema, -}); - -const ImageEditSourceSchema = z.object({ - path: z.string(), - resolvedPath: z.string(), - sizeBytes: z.number().int().nonnegative(), - dimensions: ImageToolDimensionsSchema, -}); - -export const ImageGenerateToolResultSchema = z.discriminatedUnion("success", [ - z.object({ - success: z.literal(true), - model: z.string(), - prompt: z.string(), - requestedCount: z.number().int().positive(), - images: z.array(ImageToolImageSchema).min(1), - warnings: z.array(z.string()).optional(), - }), - z.object({ - success: z.literal(false), - error: z.string(), - setupHint: z.string().optional(), - }), -]); - -export const ImageEditToolResultSchema = z.discriminatedUnion("success", [ - z.object({ - success: z.literal(true), - model: z.string(), - prompt: z.string(), - requestedCount: z.number().int().positive(), - source: ImageEditSourceSchema, - images: z.array(ImageEditImageSchema).min(1), - warnings: z.array(z.string()).optional(), - }), - z.object({ - success: z.literal(false), - error: z.string(), - setupHint: z.string().optional(), - }), -]); - /** * Bash tool result - success, background spawn, or failure. */ @@ -2301,8 +2174,6 @@ export function getAvailableTools( enableAgentReport?: boolean; enableAnalyticsQuery?: boolean; enableAdvisor?: boolean; - enableImageGeneration?: boolean; - enableImageEditing?: boolean; /** @deprecated Mux global tools are always included. */ enableMuxGlobalAgentsTools?: boolean; } @@ -2311,8 +2182,6 @@ export function getAvailableTools( const enableAgentReport = options?.enableAgentReport ?? true; const enableAnalyticsQuery = options?.enableAnalyticsQuery ?? true; const enableAdvisor = options?.enableAdvisor ?? false; - const enableImageGeneration = options?.enableImageGeneration ?? false; - const enableImageEditing = enableImageGeneration && (options?.enableImageEditing ?? false); // Base tools available for all models // Note: Tool availability is controlled by agent tool policy (allowlist), not mode checks here. @@ -2342,8 +2211,6 @@ export function getAvailableTools( // "file_edit_replace_lines", // DISABLED: causes models to break repo state "file_edit_insert", ...(enableAdvisor ? ["advisor"] : []), - ...(enableImageGeneration ? ["image_generate"] : []), - ...(enableImageEditing ? ["image_edit"] : []), "ask_user_question", "propose_plan", "bash", diff --git a/src/common/utils/tools/tools.ts b/src/common/utils/tools/tools.ts index 896445def9..a506b59bf0 100644 --- a/src/common/utils/tools/tools.ts +++ b/src/common/utils/tools/tools.ts @@ -1,4 +1,4 @@ -import { type ImageModel, type LanguageModel, type Tool } from "ai"; +import { type LanguageModel, type Tool } from "ai"; import type { LanguageModelV2Usage } from "@ai-sdk/provider"; import { cloneToolPreservingDescriptors } from "@/common/utils/tools/cloneToolPreservingDescriptors"; import { createFileReadTool } from "@/node/services/tools/file_read"; @@ -11,8 +11,6 @@ import { createFileEditReplaceStringTool } from "@/node/services/tools/file_edit // DISABLED: import { createFileEditReplaceLinesTool } from "@/node/services/tools/file_edit_replace_lines"; import { createFileEditInsertTool } from "@/node/services/tools/file_edit_insert"; import { createAskUserQuestionTool } from "@/node/services/tools/ask_user_question"; -import { createImageGenerateTool } from "@/node/services/tools/image_generate"; -import { createImageEditTool } from "@/node/services/tools/image_edit"; import { createAdvisorTool } from "@/node/services/tools/advisor"; import { createProposePlanTool } from "@/node/services/tools/propose_plan"; import { createTodoWriteTool, createTodoReadTool } from "@/node/services/tools/todo"; @@ -59,11 +57,9 @@ import type { DesktopSessionManager } from "@/node/services/desktop/DesktopSessi import type { TaskService } from "@/node/services/taskService"; import type { WorkspaceGoalService } from "@/node/services/workspaceGoalService"; import type { WorkspaceChatMessage } from "@/common/orpc/types"; -import type { SendMessageError } from "@/common/types/errors"; import type { FileState } from "@/node/services/agentSession"; import type { AgentDefinitionDescriptor } from "@/common/types/agentDefinition"; import type { AgentSkillDescriptor } from "@/common/types/agentSkill"; -import type { Result } from "@/common/types/result"; import type { ModelMessage } from "@/common/types/message"; import type { ProjectRef } from "@/common/types/workspace"; @@ -156,7 +152,6 @@ export interface ToolConfiguration { programmaticToolCalling?: boolean; programmaticToolCallingExclusive?: boolean; advisorTool?: boolean; - imageGenerationTool?: boolean; execSubagentHardRestart?: boolean; }; /** Available sub-agents for the task tool description (dynamic context) */ @@ -169,17 +164,6 @@ export interface ToolConfiguration { analyticsService?: { executeRawQuery(sql: string): Promise; }; - /** Runtime bundle for image tools (present only when the experiment is enabled). */ - imageGenerationRuntime?: { - /** Configured image model string (e.g. "openai:gpt-image-2"). */ - modelString: string; - /** Per-call image count cap configured by the user. */ - maxImagesPerCall: number; - /** Creates an AI SDK image model for the configured image model string. */ - createImageModel: (modelString: string) => Promise>; - }; - /** Whether image upload consent permits registering the image editing tool. */ - imageEditingEnabled?: boolean; /** Runtime bundle for the advisor tool (present only when advisor is eligible for this stream). */ advisorRuntime?: { /** The advisor model string (e.g. "anthropic:claude-sonnet-4-20250514") */ @@ -421,12 +405,6 @@ export async function getToolsForModel( // Runtime-dependent tools need to wait for workspace initialization // Wrap them to handle init waiting centrally instead of in each tool const runtimeTools: Record = { - ...(config.imageGenerationRuntime - ? { image_generate: wrap(createImageGenerateTool(config)) } - : {}), - ...(config.imageEditingEnabled && config.imageGenerationRuntime - ? { image_edit: wrap(createImageEditTool(config)) } - : {}), file_read: wrap(createFileReadTool(config)), attach_file: wrap(createAttachFileTool(config)), agent_skill_read: wrap(createAgentSkillReadTool(config)), @@ -589,8 +567,6 @@ export async function getToolsForModel( enableAgentReport: config.enableAgentReport, enableAnalyticsQuery: Boolean(config.analyticsService), enableAdvisor: Boolean(config.advisorRuntime), - enableImageGeneration: Boolean(config.imageGenerationRuntime), - enableImageEditing: Boolean(config.imageGenerationRuntime && config.imageEditingEnabled), // Mux global tools are always created; tool policy (agent frontmatter) // controls which agents can actually use them. enableMuxGlobalAgentsTools: true, diff --git a/src/node/builtinSkills/imagegen.md b/src/node/builtinSkills/imagegen.md deleted file mode 100644 index 33dc40cdc0..0000000000 --- a/src/node/builtinSkills/imagegen.md +++ /dev/null @@ -1,105 +0,0 @@ ---- -name: imagegen -description: Create or edit raster image artifacts for this workspace using Mux's experimental Image Tools ---- - -# Image Tools - -Use this skill when the user asks to generate raster image artifacts or edit an existing image: hero images, illustrations, product mockups, UI visuals, icons, game assets, textures, infographics, visual variants, or prompt-based edits to a local image path. - -## Current capability - -Use `image_generate` for text-to-image generation. - -Use `image_edit` when the user asks to edit an existing local PNG, JPEG, or WebP image and the tool is available. If `image_edit` is not in your toolset, explain that image editing requires upload consent in Settings > Experiments > Image Tools and offer image generation or implementation guidance instead. The tool edits exactly one source image from a prompt and returns edited image artifacts. It does not capture screenshots, write code, plan implementation work, or verify UI changes; those steps belong to the calling workflow when the user explicitly asks for them. - -Image editing uploads the selected source file to the configured image provider as-is, including embedded metadata. Do not upload incidental images, screenshots, secrets, or sensitive visual/metadata content unless image editing is requested or clearly required by the task. - -Deferred capabilities: - -- No masks or region-specific edits. -- No multi-image reference editing. -- No batch JSONL workflow. -- No transparent-background or chroma-key workflow. -- No fallback CLI scripts. - -If the user asks for a deferred capability, explain the limitation and offer the closest prompt-based generate/edit alternative. - -## Prompting principles - -Preserve the user's intent. Do not expand a specific prompt into an over-authored creative brief. - -When the prompt is generic, add useful visual detail: - -- subject and setting -- style or medium -- composition and framing -- lighting and mood -- color palette -- constraints and avoid-list - -Do not invent brand palettes, slogans, characters, logos, or text unless the user asked for them. For text in an image, quote the exact text and keep it short. - -## Prompt structure - -Use a concise prompt with optional sections: - -```text -Primary request: ... -Subject/source: ... -Style/medium: ... -Composition/framing: ... -Lighting/mood: ... -Palette: ... -Text, verbatim: "..." -Constraints: ... -Avoid: ... -``` - -Only include sections that help. A one-sentence prompt is fine when the user already gave clear direction. - -## Use-case recipes - -### Website hero - -Describe the product, audience, visual metaphor, aspect/framing needs, and any empty space needed for overlay text. Do not add copy unless requested. - -### Product mockup - -Describe the product surface, environment, camera angle, material, lighting, and brand-neutral constraints. Keep labels/logos out unless provided by the user. - -### UI visual or screenshot edit - -If the user provided or asked you to capture a screenshot, use separate screenshot tooling first, then call `image_edit` on that saved image path. Treat the edited output as a visual mockup only; do not claim it changed the product UI. - -### Icon or logo concept - -Generate concept art only. Do not claim the output is final brand identity. Keep shapes simple and avoid tiny text. - -### Game asset or sprite concept - -Specify subject, pose, perspective, style, background simplicity, and whether the result is concept art or a production asset. - -### Infographic or diagram raster - -Keep labels minimal. For precise diagrams, prefer Mermaid or SVG/code instead of raster image generation. - -### Texture or background - -Describe pattern scale, seamlessness if desired, material, palette, and whether the image should avoid obvious focal subjects. - -## Variants and iteration - -For variants, request the count the user asked for when it is within the configured maximum. If the request exceeds the configured maximum, ask for fewer images or tell the user to adjust Settings > Experiments > Image Tools. - -Default to one output. Request multiple variants only when the user asks for variants or variants are clearly useful. - -For prompt refinements to an existing image artifact, use `image_edit` only when the source image path is available and upload consent permits editing; otherwise use `image_generate` from an updated prompt. - -## Artifact handling - -Generated and edited full-resolution images are saved under runtime artifact directories. These are best-effort session artifacts, not permanent project assets. - -Preview or discarded images can remain in the runtime artifact directory. When the user wants an image used by the project, copy the selected final image into the workspace and report the workspace path. - -Keep originals unless the user explicitly asks to delete them. diff --git a/src/node/builtinSkills/mux-docs.md b/src/node/builtinSkills/mux-docs.md index 1092be7896..5520cee1eb 100644 --- a/src/node/builtinSkills/mux-docs.md +++ b/src/node/builtinSkills/mux-docs.md @@ -105,8 +105,6 @@ Use this index to find a page's: - Telemetry (`/reference/telemetry`) → `references/docs/reference/telemetry.mdx` — What Mux collects, what it doesn’t, and how to disable it - Storybook (`/reference/storybook`) → `references/docs/reference/storybook.mdx` — Develop and test Mux UI states in isolation - Terminal Benchmarking (`/reference/benchmarking`) → `references/docs/reference/benchmarking.mdx` — Run Terminal-Bench benchmarks with the Mux adapter - - Experimental Image Generation Tool (`/adr/0001-experimental-image-generation-tool`) → `references/docs/adr/0001-experimental-image-generation-tool.md` — Architecture decision for Mux's experimental image generation tool and generated-image display messages - - Image Editing Uses a Separate General-Purpose Tool (`/adr/0002-image-editing-visual-mockups`) → `references/docs/adr/0002-image-editing-visual-mockups.md` — Architecture decision for Mux's experimental image editing tool and edited image display messages - Context Boundaries for Compaction and Reset (`/adr/0003-context-boundaries-for-compaction-and-reset`) → `references/docs/adr/0003-context-boundaries-for-compaction-and-reset.md` — Architecture decision for modeling provider context windows separately from transcript history - CLI Goal Runs are not strict /goal aliases (`/adr/0004-cli-goal-runs-are-not-strict-goal-aliases`) → `references/docs/adr/0004-cli-goal-runs-are-not-strict-goal-aliases.md` — Architecture decision for giving mux run --goal CLI-specific completion and limit semantics - AGENTS.md (`/AGENTS`) → `references/docs/AGENTS.md` — Agent instructions for AI assistants working on the Mux codebase diff --git a/src/node/config.ts b/src/node/config.ts index 5c1f1edd0a..6463bc51ac 100644 --- a/src/node/config.ts +++ b/src/node/config.ts @@ -67,7 +67,6 @@ import { isProviderAutoRouteEligible } from "@/node/utils/providerRequirements"; import { getContainerName as getDockerContainerName } from "@/node/runtime/DockerRuntime"; import { deriveProjectHierarchy } from "@/common/utils/subProjects"; import { coerceThinkingLevel, type ThinkingLevel } from "@/common/types/thinking"; -import { normalizeImageGenerationConfig } from "@/common/types/imageGeneration"; // Re-export project/provider types from dedicated schema/types files (for preload usage) export type { Workspace, ProjectConfig, ProjectsConfig, ProviderConfig, CanonicalProvidersConfig }; @@ -771,7 +770,6 @@ export class Config { ? null : parseOptionalPositiveInteger(parsed.advisorMaxOutputTokens); const hiddenModels = normalizeOptionalModelStringArray(parsed.hiddenModels); - const imageGeneration = normalizeImageGenerationConfig(parsed.imageGeneration); let legacySubagentAiDefaults = normalizeSubagentAiDefaults(parsed.subagentAiDefaults); const agentAiDefaults = parsed.agentAiDefaults !== undefined @@ -895,7 +893,6 @@ export class Config { advisorMaxUsesPerTurn, advisorMaxOutputTokens, hiddenModels, - imageGeneration, agentAiDefaults, // Subagent defaults: exec is canonical active storage, non-exec entries // support legacy mirror compatibility. @@ -926,7 +923,6 @@ export class Config { taskSettings: DEFAULT_TASK_SETTINGS, agentAiDefaults: {}, subagentAiDefaults: {}, - imageGeneration: normalizeImageGenerationConfig(undefined), routePriority: this.seedRoutePriorityFromProviders(), coderWorkspaceArchiveBehavior: DEFAULT_CODER_ARCHIVE_BEHAVIOR, worktreeArchiveBehavior: DEFAULT_WORKTREE_ARCHIVE_BEHAVIOR, @@ -1024,8 +1020,6 @@ export class Config { data.hiddenModels = hiddenModels; } - data.imageGeneration = normalizeImageGenerationConfig(config.imageGeneration); - const routePriority = parseOptionalStringArray(config.routePriority); if (routePriority !== undefined) { data.routePriority = routePriority; diff --git a/src/node/orpc/router.ts b/src/node/orpc/router.ts index dded35ac6a..a8de0dad4e 100644 --- a/src/node/orpc/router.ts +++ b/src/node/orpc/router.ts @@ -67,11 +67,7 @@ import { import { discoverAgentSkills, discoverAgentSkillsDiagnostics, - filterUnavailableImagegenSkills, - IMAGEGEN_SKILL_DISABLED_MESSAGE, - isBuiltInImagegenSkillPackage, readAgentSkill, - type ResolvedAgentSkill, } from "@/node/services/agentSkills/agentSkillsService"; import { discoverAgentDefinitions, @@ -89,7 +85,6 @@ import * as path from "node:path"; import type { DevToolsEvent } from "@/common/types/devtools"; import type { MuxMessage } from "@/common/types/message"; import { coerceThinkingLevel } from "@/common/types/thinking"; -import { normalizeImageGenerationConfig } from "@/common/types/imageGeneration"; import { normalizeLegacyMuxMetadata } from "@/node/utils/messages/legacy"; import { log } from "@/node/services/log"; import { BROWSER_BRIDGE_WS_PATH, DESKTOP_WS_PATH } from "@/node/orpc/wsPaths"; @@ -159,23 +154,6 @@ async function resolveAgentDiscoveryContext( return { runtime, discoveryPath: input.projectPath! }; } -function isImageGenerationToolExperimentEnabled(context: ORPCContext): boolean { - return context.experimentsService.isExperimentEnabled(EXPERIMENT_IDS.IMAGE_GENERATION_TOOL); -} - -function assertImagegenSkillAvailable( - context: ORPCContext, - resolvedSkill: ResolvedAgentSkill -): void { - if (!isBuiltInImagegenSkillPackage(resolvedSkill.package)) { - return; - } - - if (!isImageGenerationToolExperimentEnabled(context)) { - throw new Error(IMAGEGEN_SKILL_DISABLED_MESSAGE); - } -} - function isTrustedProjectPath(context: ORPCContext, projectPath?: string | null): boolean { return isProjectTrusted(context.config, projectPath); } @@ -751,7 +729,6 @@ export const router = (authToken?: string) => { advisorThinkingLevel: config.advisorThinkingLevel ?? null, advisorMaxUsesPerTurn: config.advisorMaxUsesPerTurn, advisorMaxOutputTokens: config.advisorMaxOutputTokens, - imageGeneration: normalizeImageGenerationConfig(config.imageGeneration), hiddenModels: config.hiddenModels, coderWorkspaceArchiveBehavior: config.coderWorkspaceArchiveBehavior ?? DEFAULT_CODER_ARCHIVE_BEHAVIOR, @@ -898,15 +875,6 @@ export const router = (authToken?: string) => { routeOverrides, })); }), - updateImageGenerationConfig: t - .input(schemas.config.updateImageGenerationConfig.input) - .output(schemas.config.updateImageGenerationConfig.output) - .handler(async ({ context, input }) => { - await context.config.editConfig((config) => ({ - ...config, - imageGeneration: normalizeImageGenerationConfig(input.imageGeneration), - })); - }), updateModelPreferences: t .input(schemas.config.updateModelPreferences.input) .output(schemas.config.updateModelPreferences.output) @@ -1557,11 +1525,7 @@ export const router = (authToken?: string) => { await context.aiService.waitForInit(input.workspaceId); } const { runtime, discoveryPath } = await resolveAgentDiscoveryContext(context, input); - const skills = await discoverAgentSkills(runtime, discoveryPath); - return filterUnavailableImagegenSkills( - skills, - isImageGenerationToolExperimentEnabled(context) - ); + return discoverAgentSkills(runtime, discoveryPath); }), listDiagnostics: t .input(schemas.agentSkills.listDiagnostics.input) @@ -1573,13 +1537,7 @@ export const router = (authToken?: string) => { } const { runtime, discoveryPath } = await resolveAgentDiscoveryContext(context, input); const diagnostics = await discoverAgentSkillsDiagnostics(runtime, discoveryPath); - return { - ...diagnostics, - skills: filterUnavailableImagegenSkills( - diagnostics.skills, - isImageGenerationToolExperimentEnabled(context) - ), - }; + return diagnostics; }), get: t .input(schemas.agentSkills.get.input) @@ -1591,7 +1549,6 @@ export const router = (authToken?: string) => { } const { runtime, discoveryPath } = await resolveAgentDiscoveryContext(context, input); const result = await readAgentSkill(runtime, discoveryPath, input.skillName); - assertImagegenSkillAvailable(context, result); return result.package; }), }, diff --git a/src/node/services/agentSession.agentSkillSnapshot.test.ts b/src/node/services/agentSession.agentSkillSnapshot.test.ts index b2da249e16..ac8a07e15f 100644 --- a/src/node/services/agentSession.agentSkillSnapshot.test.ts +++ b/src/node/services/agentSession.agentSkillSnapshot.test.ts @@ -125,25 +125,6 @@ describe("AgentSession.sendMessage (agent skill snapshots)", () => { expect(userText).toBe("do X"); }); - it("skips built-in imagegen snapshots when image generation is disabled", async () => { - const { workspacePath } = await createTestWorkspaceWithSkills({ skills: [] }); - const { session, appendToHistory, messages } = await createSessionHarness({ workspacePath }); - - const result = await session.sendMessage("retry image generation", { - model: "anthropic:claude-3-5-sonnet-latest", - agentId: "exec", - experiments: { imageGenerationTool: false }, - muxMetadata: { - agentSkillRefs: [{ skillName: "imagegen", scope: "built-in", source: "inline" }], - }, - }); - - expect(result.success).toBe(true); - expect(appendToHistory.mock.calls).toHaveLength(1); - expect(messages[0]?.metadata?.agentSkillSnapshot).toBeUndefined(); - expect(getMessageText(messages[0])).toBe("retry image generation"); - }); - it("honors disableWorkspaceAgents when resolving skill snapshots", async () => { const workspaceId = "ws-test"; diff --git a/src/node/services/agentSession.ts b/src/node/services/agentSession.ts index 19b6a6ce52..4e2b242289 100644 --- a/src/node/services/agentSession.ts +++ b/src/node/services/agentSession.ts @@ -13,7 +13,6 @@ import type { InitStateManager } from "@/node/services/initStateManager"; import type { FrontendWorkspaceMetadata, WorkspaceMetadata } from "@/common/types/workspace"; import type { RuntimeConfig } from "@/common/types/runtime"; import { DEFAULT_RUNTIME_CONFIG } from "@/common/constants/workspace"; -import { EXPERIMENT_IDS } from "@/common/constants/experiments"; import { DEFAULT_MODEL } from "@/common/constants/knownModels"; import { computePriorHistoryFingerprint } from "@/common/orpc/onChatCursorFingerprint"; import type { @@ -94,7 +93,6 @@ import { CompactionHandler } from "./compactionHandler"; import { RetryManager, type RetryFailureError, type RetryStatusEvent } from "./retryManager"; import type { TelemetryService } from "./telemetryService"; import type { BackgroundProcessManager } from "./backgroundProcessManager"; -import type { ExperimentsService } from "./experimentsService"; import { AttachmentService } from "./attachmentService"; import type { TodoItem } from "@/common/types/tools"; @@ -124,11 +122,7 @@ import { isNonRetryableStreamError, } from "@/common/utils/messages/retryEligibility"; import { createDisplayUsage } from "@/common/utils/tokens/displayUsage"; -import { - IMAGEGEN_SKILL_DISABLED_MESSAGE, - isBuiltInImagegenSkillUnavailable, - readAgentSkill, -} from "@/node/services/agentSkills/agentSkillsService"; +import { readAgentSkill } from "@/node/services/agentSkills/agentSkillsService"; import { createLoadedSkillSnapshot, extractLoadedSkillSnapshotsFromMessages, @@ -314,7 +308,6 @@ interface AgentSessionOptions { aiService: AIService; initStateManager: InitStateManager; telemetryService?: TelemetryService; - experimentsService?: ExperimentsService; backgroundProcessManager: BackgroundProcessManager; workspaceGoalService?: WorkspaceGoalService; /** When true, skip terminating background processes on dispose/compaction (for bench/CI) */ @@ -341,7 +334,6 @@ export class AgentSession { private readonly aiService: AIService; private readonly initStateManager: InitStateManager; private readonly backgroundProcessManager: BackgroundProcessManager; - private readonly experimentsService?: ExperimentsService; private readonly workspaceGoalService?: WorkspaceGoalService; private readonly keepBackgroundProcesses: boolean; private readonly onCompactionComplete?: () => void; @@ -502,7 +494,6 @@ export class AgentSession { initStateManager, telemetryService, backgroundProcessManager, - experimentsService, workspaceGoalService, keepBackgroundProcesses, onCompactionComplete, @@ -518,7 +509,6 @@ export class AgentSession { this.historyService = historyService; this.aiService = aiService; this.initStateManager = initStateManager; - this.experimentsService = experimentsService; this.backgroundProcessManager = backgroundProcessManager; this.workspaceGoalService = workspaceGoalService; this.keepBackgroundProcesses = keepBackgroundProcesses ?? false; @@ -2543,8 +2533,7 @@ export class AgentSession { try { skillSnapshotMessages = await this.materializeAgentSkillSnapshots( typedMuxMetadata, - options?.disableWorkspaceAgents, - this.isImageGenerationToolEnabled(options?.experiments) + options?.disableWorkspaceAgents ); } catch (error) { return Err(createUnknownSendMessageError(getErrorMessage(error))); @@ -5379,17 +5368,9 @@ export class AgentSession { return { snapshotMessage, materializedTokens: tokens }; } - private isImageGenerationToolEnabled(experiments: SendMessageOptions["experiments"]): boolean { - return ( - experiments?.imageGenerationTool ?? - this.experimentsService?.isExperimentEnabled(EXPERIMENT_IDS.IMAGE_GENERATION_TOOL) === true - ); - } - private async materializeAgentSkillSnapshots( muxMetadata: MuxMessageMetadata | undefined, - disableWorkspaceAgents: boolean | undefined, - imageGenerationToolEnabled: boolean + disableWorkspaceAgents: boolean | undefined ): Promise { const refs = extractAgentSkillRefs(muxMetadata); if (refs.length === 0) { @@ -5453,13 +5434,6 @@ export class AgentSession { continue; } - if (isBuiltInImagegenSkillUnavailable(resolved.package, imageGenerationToolEnabled)) { - if (ref.source === "slash") { - throw new Error(IMAGEGEN_SKILL_DISABLED_MESSAGE); - } - continue; - } - const skill = resolved.package; // Include the parsed YAML frontmatter in the hash so frontmatter-only edits (e.g. description) diff --git a/src/node/services/agentSkills/agentSkillsService.test.ts b/src/node/services/agentSkills/agentSkillsService.test.ts index a70fa56ec1..b5fdf80b28 100644 --- a/src/node/services/agentSkills/agentSkillsService.test.ts +++ b/src/node/services/agentSkills/agentSkillsService.test.ts @@ -252,7 +252,6 @@ describe("agentSkillsService", () => { expect(skills.map((s) => s.name)).toEqual([ "bar", "foo", - "imagegen", "init", "mux-diagram", "mux-docs", @@ -659,7 +658,6 @@ describe("agentSkillsService", () => { expect(diagnostics.skills.map((s) => s.name)).toEqual([ "foo", - "imagegen", "init", "mux-diagram", "mux-docs", diff --git a/src/node/services/agentSkills/agentSkillsService.ts b/src/node/services/agentSkills/agentSkillsService.ts index 89bb4bd970..db7fd08810 100644 --- a/src/node/services/agentSkills/agentSkillsService.ts +++ b/src/node/services/agentSkills/agentSkillsService.ts @@ -27,47 +27,6 @@ import { AgentSkillParseError, parseSkillMarkdown } from "./parseSkillMarkdown"; import { getBuiltInSkillByName, getBuiltInSkillDescriptors } from "./builtInSkillDefinitions"; import type { ProjectSkillContainment } from "./skillStorageContext"; -export const IMAGEGEN_BUILT_IN_SKILL_NAME = "imagegen" satisfies SkillName; - -export const IMAGEGEN_SKILL_DISABLED_MESSAGE = - "Built-in imagegen skill is only available when the Image Tools experiment is enabled."; - -export function isBuiltInImagegenSkill( - skill: Pick -): boolean { - return skill.scope === "built-in" && skill.name === IMAGEGEN_BUILT_IN_SKILL_NAME; -} - -export function isBuiltInImagegenSkillPackage(skillPackage: AgentSkillPackage): boolean { - return ( - skillPackage.scope === "built-in" && - skillPackage.frontmatter.name === IMAGEGEN_BUILT_IN_SKILL_NAME - ); -} - -export function filterUnavailableImagegenSkills< - T extends Pick, ->(skills: T[], imageGenerationToolAvailable: boolean | undefined): T[] { - if (imageGenerationToolAvailable === true) { - return skills; - } - - return skills.filter((skill) => !isBuiltInImagegenSkill(skill)); -} - -/** - * True iff the resolved skill package is the built-in imagegen skill AND the - * image generation tool is not currently available. Centralizes the - * "is-imagegen + experiment-off" check used by tools/services that need to - * refuse loading the built-in skill when the tool is gated off. - */ -export function isBuiltInImagegenSkillUnavailable( - skillPackage: AgentSkillPackage, - imageGenerationToolAvailable: boolean | undefined -): boolean { - return isBuiltInImagegenSkillPackage(skillPackage) && imageGenerationToolAvailable !== true; -} - const UNIVERSAL_SKILLS_ROOT = "~/.agents/skills"; export interface AgentSkillsRoots { diff --git a/src/node/services/agentSkills/builtInSkillContent.generated.ts b/src/node/services/agentSkills/builtInSkillContent.generated.ts index 8a44125b9c..ccec0f43dc 100644 --- a/src/node/services/agentSkills/builtInSkillContent.generated.ts +++ b/src/node/services/agentSkills/builtInSkillContent.generated.ts @@ -3,116 +3,6 @@ // Source: src/node/builtinSkills/ and docs/ export const BUILTIN_SKILL_FILES: Record> = { - imagegen: { - "SKILL.md": [ - "---", - "name: imagegen", - "description: Create or edit raster image artifacts for this workspace using Mux's experimental Image Tools", - "---", - "", - "# Image Tools", - "", - "Use this skill when the user asks to generate raster image artifacts or edit an existing image: hero images, illustrations, product mockups, UI visuals, icons, game assets, textures, infographics, visual variants, or prompt-based edits to a local image path.", - "", - "## Current capability", - "", - "Use `image_generate` for text-to-image generation.", - "", - "Use `image_edit` when the user asks to edit an existing local PNG, JPEG, or WebP image and the tool is available. If `image_edit` is not in your toolset, explain that image editing requires upload consent in Settings > Experiments > Image Tools and offer image generation or implementation guidance instead. The tool edits exactly one source image from a prompt and returns edited image artifacts. It does not capture screenshots, write code, plan implementation work, or verify UI changes; those steps belong to the calling workflow when the user explicitly asks for them.", - "", - "Image editing uploads the selected source file to the configured image provider as-is, including embedded metadata. Do not upload incidental images, screenshots, secrets, or sensitive visual/metadata content unless image editing is requested or clearly required by the task.", - "", - "Deferred capabilities:", - "", - "- No masks or region-specific edits.", - "- No multi-image reference editing.", - "- No batch JSONL workflow.", - "- No transparent-background or chroma-key workflow.", - "- No fallback CLI scripts.", - "", - "If the user asks for a deferred capability, explain the limitation and offer the closest prompt-based generate/edit alternative.", - "", - "## Prompting principles", - "", - "Preserve the user's intent. Do not expand a specific prompt into an over-authored creative brief.", - "", - "When the prompt is generic, add useful visual detail:", - "", - "- subject and setting", - "- style or medium", - "- composition and framing", - "- lighting and mood", - "- color palette", - "- constraints and avoid-list", - "", - "Do not invent brand palettes, slogans, characters, logos, or text unless the user asked for them. For text in an image, quote the exact text and keep it short.", - "", - "## Prompt structure", - "", - "Use a concise prompt with optional sections:", - "", - "```text", - "Primary request: ...", - "Subject/source: ...", - "Style/medium: ...", - "Composition/framing: ...", - "Lighting/mood: ...", - "Palette: ...", - 'Text, verbatim: "..."', - "Constraints: ...", - "Avoid: ...", - "```", - "", - "Only include sections that help. A one-sentence prompt is fine when the user already gave clear direction.", - "", - "## Use-case recipes", - "", - "### Website hero", - "", - "Describe the product, audience, visual metaphor, aspect/framing needs, and any empty space needed for overlay text. Do not add copy unless requested.", - "", - "### Product mockup", - "", - "Describe the product surface, environment, camera angle, material, lighting, and brand-neutral constraints. Keep labels/logos out unless provided by the user.", - "", - "### UI visual or screenshot edit", - "", - "If the user provided or asked you to capture a screenshot, use separate screenshot tooling first, then call `image_edit` on that saved image path. Treat the edited output as a visual mockup only; do not claim it changed the product UI.", - "", - "### Icon or logo concept", - "", - "Generate concept art only. Do not claim the output is final brand identity. Keep shapes simple and avoid tiny text.", - "", - "### Game asset or sprite concept", - "", - "Specify subject, pose, perspective, style, background simplicity, and whether the result is concept art or a production asset.", - "", - "### Infographic or diagram raster", - "", - "Keep labels minimal. For precise diagrams, prefer Mermaid or SVG/code instead of raster image generation.", - "", - "### Texture or background", - "", - "Describe pattern scale, seamlessness if desired, material, palette, and whether the image should avoid obvious focal subjects.", - "", - "## Variants and iteration", - "", - "For variants, request the count the user asked for when it is within the configured maximum. If the request exceeds the configured maximum, ask for fewer images or tell the user to adjust Settings > Experiments > Image Tools.", - "", - "Default to one output. Request multiple variants only when the user asks for variants or variants are clearly useful.", - "", - "For prompt refinements to an existing image artifact, use `image_edit` only when the source image path is available and upload consent permits editing; otherwise use `image_generate` from an updated prompt.", - "", - "## Artifact handling", - "", - "Generated and edited full-resolution images are saved under runtime artifact directories. These are best-effort session artifacts, not permanent project assets.", - "", - "Preview or discarded images can remain in the runtime artifact directory. When the user wants an image used by the project, copy the selected final image into the workspace and report the workspace path.", - "", - "Keep originals unless the user explicitly asks to delete them.", - "", - ].join("\n"), - }, init: { "SKILL.md": [ "---", @@ -246,134 +136,6 @@ export const BUILTIN_SKILL_FILES: Record> = { ].join("\n"), }, "mux-docs": { - "references/docs/adr/0001-experimental-image-generation-tool.md": [ - "---", - "title: Experimental Image Generation Tool", - "description: Architecture decision for Mux's experimental image generation tool and generated-image display messages", - "---", - "", - "# 0001. Experimental Image Generation Uses a Mux-Executed Tool with Derived Display Messages", - "", - "## Status", - "", - "Accepted", - "", - "## Context", - "", - "Mux is adding an experimental image generation capability. The capability needs a configurable image model, must avoid surprising users with default-on costful behavior, and should fit Mux's existing tool, settings, runtime, and transcript systems.", - "", - "The Codex CLI report described a layered design built around a model-facing image generation skill, a hosted OpenAI Responses image-generation tool, artifact saving, and optional fallback scripts. Mux has different constraints: image generation should work independently from the selected chat model, and the configured image model should be controlled from Mux settings.", - "", - "## Decision", - "", - "Mux will implement the first image generation experiment as a Mux-executed model-callable tool named `image_generate`, backed by OpenAI image models through the AI SDK image generation API. The default image model is `openai:gpt-image-2`.", - "", - "The feature is gated behind a visible, default-off Image Generation Tool experiment. The experiment owns an app-level `imageGeneration` configuration object containing `modelString` and `maxImagesPerCall`.", - "", - "The first tool operation is text-to-image generation only. It exposes prompt, image count, quality, and output format. Editing, masks, batch generation, transparent-background workflows, seed, aspect ratio, style, moderation overrides, and compression are deferred.", - "", - "Generated full-resolution images are saved under the active runtime artifact directory. The persisted tool result stores saved paths plus bounded thumbnails for transcript preview. Full image bytes are not stored in chat history.", - "", - "The frontend renders successful `image_generate` results as a first-class Generated Image Display Message derived from the persisted tool result. The persisted transcript source of truth remains the normal tool call and tool result; no new persisted chat part or stream protocol event is required for the first experiment. Pending, executing, failed, interrupted, or redacted image-generation calls continue to render as normal tool rows.", - "", - "The tool is available to Exec-mode agents by default when the experiment is enabled. Built-in Plan and Explore agents remove it from their tool policies. The tool enforces Mux provider/model policy before provider calls and reports image-generation usage through existing tool-side usage reporting when provider metadata is available.", - "", - "Mux will also ship a richer built-in `/imagegen` Agent Skill as a single built-in skill file. The skill will teach prompting principles, use-case recipes, iteration guidance, and artifact policy, but it will not include fallback CLI scripts or executable workflows for deferred capabilities.", - "", - "## Alternatives Considered", - "", - "### Hosted OpenAI Responses image-generation tool", - "", - "This would mirror Codex's built-in path more closely. It was rejected for v1 because the image capability should be independent from the selected chat model, and the configured Image Generation Model should be the model that directly handles generation.", - "", - "### Skill-only implementation", - "", - "A skill without a Mux tool would provide guidance but no integrated artifact, settings, usage-reporting, or display path. It was rejected because the product goal is a configurable image generation capability, not only model instructions.", - "", - "### Direct full image bytes in chat history", - "", - "Persisting base64 output directly would make previews easy, but it would quickly bloat chat history. Mux will persist bounded thumbnails and keep full-resolution images as runtime artifacts instead.", - "", - "### New persisted chat message or stream event", - "", - "A fully new persisted item/event model may be appropriate later. It was deferred because a derived display message gives first-class UX while preserving existing replay, retry, and history compatibility for the experiment.", - "", - "### Saving generated images directly into the workspace", - "", - "This would make generated images immediately project-usable, but it would also pollute the git working tree with every preview or discarded variant. Mux will save to runtime temp by default and require agents to explicitly copy selected final assets into the workspace.", - "", - "## Consequences", - "", - "- Image generation is usable from non-OpenAI chat models because the image tool owns its own configured model.", - "- Users must opt into the experiment before the tool is exposed.", - "- Power users can raise the image-count cap within the configured range, while the default stays conservative.", - "- Generated-image transcript previews remain available even if runtime-temp full artifacts are later cleaned up outside Mux.", - "- Project-bound image assets require an explicit copy step into the workspace.", - "- Future work can add editing, masks, transparent workflows, provider adapters, artifact indexing, cleanup, or a persisted generated-image event without changing the initial domain model.", - "", - ].join("\n"), - "references/docs/adr/0002-image-editing-visual-mockups.md": [ - "---", - "title: Image Editing Uses a Separate General-Purpose Tool", - "description: Architecture decision for Mux's experimental image editing tool and edited image display messages", - "status: accepted", - "---", - "", - "# 0002. Image Editing Uses a Separate General-Purpose Tool", - "", - "## Status", - "", - "Accepted", - "", - "## Context", - "", - "Mux already has an experimental `image_generate` tool for text-to-image generation. Screenshot-driven UI workflows need a related but different capability: an agent should be able to take a local image, such as a UI screenshot, and produce a visual edit mockup from a prompt.", - "", - "Editing an existing image has a different privacy boundary from generation. Text-to-image sends only prompt text to the configured image provider, while image editing uploads a local file from the active runtime. That file may include sensitive pixels and embedded metadata. The product model therefore needs explicit upload consent and clear separation between visual mockups and implementation work.", - "", - "## Decision", - "", - "Mux will add general-purpose image editing through a separate model-callable `image_edit` tool. The tool edits exactly one PNG, JPEG, or WebP source image by path and returns edited image artifacts. When the source is a product screenshot or UI image, the output is a **Visual Edit Mockup**: a design reference artifact only, not source code, a direct UI mutation, or an authoritative implementation plan.", - "", - "`image_edit` is distinct from `image_generate`, but both tools share the same user-facing Image Tools experiment, configured image model, max-images-per-call cap, artifact conventions, and tool-side image-model usage reporting path. Internal configuration names may remain generation-oriented until a broader cleanup justifies the churn. The v1 image-upload consent boolean lives inside the existing `imageGeneration` config object as `allowImageUploadsForEditing`.", - "", - "The Image Tools setting has one main experiment toggle plus subordinate image-upload consent. Image generation requires the main toggle. `image_edit` is hidden unless both the main toggle and `allowImageUploadsForEditing` are enabled. Upload consent is separate because editing uploads local images or screenshots to the configured image provider.", - "", - "The first version validates source type from actual decoded bytes/metadata rather than extension. It follows symlinks through the runtime, records both the requested path and resolved real path for UI provenance, and rejects unsupported or unreadable images before provider calls. It does not attempt automatic screenshot redaction or metadata stripping; source images are uploaded as-is.", - "", - "Edited outputs are saved as runtime artifacts under a separate `edited_images` area with generated-style filenames. Extensions are selected from provider media type first, requested output format second, and PNG fallback. Bounded output thumbnails are persisted for transcript preview and stripped from model-visible tool output. Thumbnail generation failures keep the edited image result and add warnings.", - "", - "The result uses a separate top-level `ImageEditToolResult` schema that shares common image artifact schema pieces with `ImageGenerateToolResult`. It records the edit prompt, requested source path, resolved real source path, source file size, source dimensions, output dimensions detected from actual output bytes, edited image paths, provider revised prompts when available, and warnings. V1 does not persist a source thumbnail. The display shows the requested source path by default and reveals the resolved real path in details only when different.", - "", - "Successful `image_edit` outputs render as a first-class `edited-image` display message derived from persisted tool results, mirroring generated-image display rows. Pending, failed, malformed, interrupted, or hook-augmented edit calls continue to render as normal tool rows.", - "", - "## Alternatives Considered", - "", - "### Expand `image_generate` to accept input images", - "", - "This was rejected because generation and editing have different user promises, privacy boundaries, and tool-selection semantics. A separate tool gives agents and users a clearer mental model.", - "", - "### Direct OpenAI image-edit API calls", - "", - "This was rejected for v1 because the installed AI SDK already routes image prompts with input images through the provider edit path while preserving Mux's existing model configuration, policy, usage reporting, and artifact flow.", - "", - "### Masks and multi-image references", - "", - "These were deferred because the initial product goal is prompt-based editing of one source image. Masks and multi-image references add provenance, UI, validation, and provider-specific complexity.", - "", - "### Saving edited images directly into the workspace", - "", - "This was rejected because most edit iterations are disposable and should not pollute the git working tree. Mux saves runtime artifacts by default, and agents copy selected final assets into the workspace only when the user wants them used by the project.", - "", - "## Consequences", - "", - "- Users opt into image editing separately from text-to-image generation.", - "- Agents can create visual mockups from screenshots without claiming to implement UI changes.", - "- Source image uploads can include arbitrary runtime-readable image files, so settings, docs, and tool guidance must warn about sensitive pixels and embedded metadata.", - "- Plan and Explore agents do not receive image editing by default.", - "- Future work can add masks, multi-image references, artifact indexing, cleanup, redaction, or a persisted edited-image event without changing the initial domain model.", - "", - ].join("\n"), "references/docs/adr/0003-context-boundaries-for-compaction-and-reset.md": [ "---", "title: Context Boundaries for Compaction and Reset", @@ -2456,18 +2218,6 @@ export const BUILTIN_SKILL_FILES: Record> = { 'mux run -t high "Deep analysis" # Named level', "```", "", - "## Experimental Image Tools Model", - "", - "Mux can expose experimental Image Tools for Exec agents. Enable them in **Settings → Experiments → Image Tools**.", - "", - "The image model is configured separately from the chat model. The default is `openai:gpt-image-2`. Image Tools are OpenAI-only in this experiment and require OpenAI provider credentials in **Settings → Providers**.", - "", - "`image_generate` creates raster images from text prompts. It supports prompt, image count, quality, and output format.", - "", - "`image_edit` edits one existing PNG, JPEG, or WebP image by path. Because editing uploads the source file to the image provider, including embedded metadata, it is hidden unless you also enable **Allow image uploads for editing** under the Image Tools settings. Source images are sent as-is; Mux does not automatically redact screenshot contents or strip image metadata.", - "", - "Generated and edited full-resolution images are saved as runtime artifacts. Copy selected final assets into the workspace when they should become project files.", - "", "## Next Steps", "", '', @@ -3581,8 +3331,6 @@ export const BUILTIN_SKILL_FILES: Record> = { ' "reference/telemetry",', ' "reference/storybook",', ' "reference/benchmarking",', - ' "adr/0001-experimental-image-generation-tool",', - ' "adr/0002-image-editing-visual-mockups",', ' "adr/0003-context-boundaries-for-compaction-and-reset",', ' "adr/0004-cli-goal-runs-are-not-strict-goal-aliases",', ' "AGENTS"', @@ -4512,31 +4260,6 @@ export const BUILTIN_SKILL_FILES: Record> = { "
", "", "
", - "image_edit (5)", - "", - "| Env var | JSON path | Type | Description |", - "| ------------------------------ | -------------- | ------ | ------------------------------------------------------------------------------------------------------------------------------------- |", - "| `MUX_TOOL_INPUT_N` | `n` | number | Number of edited variants to create. Defaults to 1; request multiple variants only when the user asks or variants are clearly useful. |", - "| `MUX_TOOL_INPUT_OUTPUT_FORMAT` | `outputFormat` | enum | Optional output format. Defaults to png. |", - "| `MUX_TOOL_INPUT_PROMPT` | `prompt` | string | Edit prompt describing the desired image changes |", - "| `MUX_TOOL_INPUT_QUALITY` | `quality` | enum | Optional edit quality. Defaults to the provider/model default. |", - "| `MUX_TOOL_INPUT_SOURCE_PATH` | `sourcePath` | string | Path to the existing source image to edit |", - "", - "
", - "", - "
", - "image_generate (4)", - "", - "| Env var | JSON path | Type | Description |", - "| ------------------------------ | -------------- | ------ | ---------------------------------------------------------------------------------------------------------- |", - "| `MUX_TOOL_INPUT_N` | `n` | number | Number of images to generate. Defaults to 1 and must not exceed the user's configured Image Tools maximum. |", - "| `MUX_TOOL_INPUT_OUTPUT_FORMAT` | `outputFormat` | enum | Optional output format. Defaults to png. |", - "| `MUX_TOOL_INPUT_PROMPT` | `prompt` | string | Prompt describing the image(s) to generate |", - "| `MUX_TOOL_INPUT_QUALITY` | `quality` | enum | Optional generation quality. Defaults to the provider/model default. |", - "", - "
", - "", - "
", "mux_agents_write (2)", "", "| Env var | JSON path | Type | Description |", @@ -6683,8 +6406,6 @@ export const BUILTIN_SKILL_FILES: Record> = { " - Telemetry (`/reference/telemetry`) → `references/docs/reference/telemetry.mdx` — What Mux collects, what it doesn’t, and how to disable it", " - Storybook (`/reference/storybook`) → `references/docs/reference/storybook.mdx` — Develop and test Mux UI states in isolation", " - Terminal Benchmarking (`/reference/benchmarking`) → `references/docs/reference/benchmarking.mdx` — Run Terminal-Bench benchmarks with the Mux adapter", - " - Experimental Image Generation Tool (`/adr/0001-experimental-image-generation-tool`) → `references/docs/adr/0001-experimental-image-generation-tool.md` — Architecture decision for Mux's experimental image generation tool and generated-image display messages", - " - Image Editing Uses a Separate General-Purpose Tool (`/adr/0002-image-editing-visual-mockups`) → `references/docs/adr/0002-image-editing-visual-mockups.md` — Architecture decision for Mux's experimental image editing tool and edited image display messages", " - Context Boundaries for Compaction and Reset (`/adr/0003-context-boundaries-for-compaction-and-reset`) → `references/docs/adr/0003-context-boundaries-for-compaction-and-reset.md` — Architecture decision for modeling provider context windows separately from transcript history", " - CLI Goal Runs are not strict /goal aliases (`/adr/0004-cli-goal-runs-are-not-strict-goal-aliases`) → `references/docs/adr/0004-cli-goal-runs-are-not-strict-goal-aliases.md` — Architecture decision for giving mux run --goal CLI-specific completion and limit semantics", " - AGENTS.md (`/AGENTS`) → `references/docs/AGENTS.md` — Agent instructions for AI assistants working on the Mux codebase", diff --git a/src/node/services/aiService.ts b/src/node/services/aiService.ts index 64bea0295c..a3d168ddbf 100644 --- a/src/node/services/aiService.ts +++ b/src/node/services/aiService.ts @@ -16,10 +16,6 @@ import { ADVISOR_DEFAULT_MAX_USES_PER_TURN, resolveAdvisorEnabledForAgent, } from "@/common/constants/advisor"; -import { - DEFAULT_IMAGE_GENERATION_MAX_IMAGES, - DEFAULT_IMAGE_GENERATION_MODEL, -} from "@/common/types/imageGeneration"; import { EXPERIMENT_IDS } from "@/common/constants/experiments"; import type { GoalRecordV1 } from "@/common/types/goal"; @@ -105,7 +101,7 @@ import type { StreamAbortReason, StreamEndEvent, } from "@/common/types/stream"; -import { applyToolPolicyToNames, type ToolPolicy } from "@/common/utils/tools/toolPolicy"; +import type { ToolPolicy } from "@/common/utils/tools/toolPolicy"; import type { PTCEventWithParent } from "@/node/services/tools/code_execution"; import { MockAiStreamPlayer } from "./mock/mockAiStreamPlayer"; import { DEVTOOLS_RUN_METADATA_ID_HEADER } from "./devToolsHeaderCapture"; @@ -1133,9 +1129,6 @@ export class AIService extends EventEmitter { const advisorExperimentEnabled = experiments?.advisorTool ?? this.experimentsService?.isExperimentEnabled(EXPERIMENT_IDS.ADVISOR_TOOL) === true; - const imageGenerationExperimentEnabled = - experiments?.imageGenerationTool ?? - this.experimentsService?.isExperimentEnabled(EXPERIMENT_IDS.IMAGE_GENERATION_TOOL) === true; emitStartupBreadcrumb("loading_workspace_context"); const resolveAgentForStreamStartedAt = Date.now(); const agentResult = await resolveAgentForStream({ @@ -1279,14 +1272,6 @@ export class AIService extends EventEmitter { return desktopCapabilityPromise; }; - const imageGenerationDirectToolAvailable = - imageGenerationExperimentEnabled && - experiments?.programmaticToolCallingExclusive !== true && - applyToolPolicyToNames(["image_generate"], effectiveToolPolicy).includes("image_generate"); - const imageEditingEnabled = - imageGenerationExperimentEnabled && - cfg.imageGeneration?.allowImageUploadsForEditing === true; - const buildStreamSystemContextForAdvisor = (advisorToolAvailable: boolean) => buildStreamSystemContext({ runtime, @@ -1305,7 +1290,6 @@ export class AIService extends EventEmitter { muxScope, loadDesktopCapability, advisorToolAvailable, - imageGenerationToolAvailable: imageGenerationDirectToolAvailable, }); // Build provisional agent context before tool policy finalizes the toolset. @@ -1514,17 +1498,6 @@ export class AIService extends EventEmitter { secrets: await secretsToRecord(projectSecrets, this.opResolver), muxEnv, runtimeTempDir, - ...(imageGenerationExperimentEnabled - ? { - imageGenerationRuntime: { - modelString: cfg.imageGeneration?.modelString ?? DEFAULT_IMAGE_GENERATION_MODEL, - maxImagesPerCall: - cfg.imageGeneration?.maxImagesPerCall ?? DEFAULT_IMAGE_GENERATION_MAX_IMAGES, - createImageModel: (ms: string) => this.providerModelFactory.createImageModel(ms), - }, - } - : {}), - imageEditingEnabled, ...(advisorToolEligible ? { advisorRuntime: { diff --git a/src/node/services/providerModelFactory.test.ts b/src/node/services/providerModelFactory.test.ts index c8d997328f..9a2b2940f1 100644 --- a/src/node/services/providerModelFactory.test.ts +++ b/src/node/services/providerModelFactory.test.ts @@ -20,7 +20,6 @@ import { resolveAIProviderHeaderSource, resolveOpenAIWebSocketResponsesUrl, wrapFetchWithAnthropicCacheControl, - wrapFetchWithOpenAIImageResponseNormalization, } from "./providerModelFactory"; import { MUX_ANTHROPIC_EFFORT_OVERRIDE_HEADER } from "@/common/utils/ai/providerOptions"; import { hasLanguageModelCleanup } from "./languageModelCleanup"; @@ -230,90 +229,6 @@ describe("normalizeCodexResponsesBody", () => { }); }); -describe("wrapFetchWithOpenAIImageResponseNormalization", () => { - it("normalizes successful binary OpenAI image edit responses into AI SDK JSON", async () => { - const pngBytes = Buffer.from("tiny-png-bytes"); - const calls: Array<{ - input: Parameters[0]; - init?: Parameters[1]; - }> = []; - const baseFetch = Object.assign( - (input: Parameters[0], init?: Parameters[1]) => { - calls.push({ input, init }); - return Promise.resolve( - new Response(pngBytes, { - status: 200, - statusText: "OK", - headers: { - "content-type": "image/png", - "content-length": String(pngBytes.length), - }, - }) - ); - }, - fetch - ) as typeof fetch; - const wrappedFetch = wrapFetchWithOpenAIImageResponseNormalization(baseFetch); - const form = new FormData(); - form.set("model", "gpt-image-2"); - form.set("n", "1"); - form.set("output_format", "png"); - - const response = await wrappedFetch("https://api.openai.com/v1/images/edits", { - method: "POST", - body: form, - }); - - expect(calls).toHaveLength(1); - expect(response.headers.get("content-type")).toContain("application/json"); - expect(response.headers.get("content-length")).toBeNull(); - const body = (await response.json()) as { - created: number; - output_format: string; - data: Array<{ b64_json: string }>; - }; - expect(Number.isInteger(body.created)).toBe(true); - expect(body.output_format).toBe("png"); - expect(body.data).toEqual([{ b64_json: pngBytes.toString("base64") }]); - }); - - it("adds filenames to OpenAI image edit uploads before sending multipart requests", async () => { - let capturedImage: FormDataEntryValue | null = null; - const baseFetch = Object.assign( - (_input: Parameters[0], init?: Parameters[1]) => { - if (init?.body instanceof FormData) { - capturedImage = init.body.get("image"); - } - return Promise.resolve( - new Response( - JSON.stringify({ data: [{ b64_json: Buffer.from("png").toString("base64") }] }), - { - status: 200, - headers: { "content-type": "application/json" }, - } - ) - ); - }, - fetch - ) as typeof fetch; - const wrappedFetch = wrapFetchWithOpenAIImageResponseNormalization(baseFetch); - const form = new FormData(); - form.set("model", "gpt-image-1.5"); - form.set("prompt", "make it blue"); - form.set("image", new Blob([Buffer.from("png")], { type: "image/png" })); - - await wrappedFetch("https://api.openai.com/v1/images/edits", { - method: "POST", - body: form, - }); - - if (capturedImage == null || typeof capturedImage === "string") { - throw new Error("Expected OpenAI image edit upload to be a file-like object"); - } - expect((capturedImage as { name?: unknown }).name).toBe("image.png"); - }); -}); - describe("ProviderModelFactory.createModel", () => { it("returns provider_disabled when a non-gateway provider is disabled", async () => { await withTempConfig(async (config, factory) => { @@ -558,86 +473,6 @@ describe("ProviderModelFactory.createModel", () => { }); }); -describe("ProviderModelFactory.createImageModel", () => { - it("creates an OpenAI image model when credentials are configured", async () => { - await withTempConfig(async (config, factory) => { - config.saveProvidersConfig({ openai: { apiKey: "sk-test" } }); - - const result = await factory.createImageModel("openai:gpt-image-1.5"); - - expect(result.success).toBe(true); - }); - }); - - it("rejects non-OpenAI image providers in v1", async () => { - await withTempConfig(async (_config, factory) => { - const result = await factory.createImageModel("google:imagen-test"); - - expect(result.success).toBe(false); - if (!result.success) { - expect(result.error).toEqual({ type: "provider_not_supported", provider: "google" }); - } - }); - }); - - it("returns api_key_not_found when OpenAI credentials are missing", async () => { - const savedApiKey = process.env.OPENAI_API_KEY; - delete process.env.OPENAI_API_KEY; - try { - await withOpenAIBaseUrlEnvUnset(async () => { - await withTempConfig(async (_config, factory) => { - const result = await factory.createImageModel("openai:gpt-image-1.5"); - - expect(result.success).toBe(false); - if (!result.success) { - expect(result.error).toEqual({ type: "api_key_not_found", provider: "openai" }); - } - }); - }); - } finally { - if (savedApiKey === undefined) { - delete process.env.OPENAI_API_KEY; - } else { - process.env.OPENAI_API_KEY = savedApiKey; - } - } - }); - - it("returns provider_disabled when OpenAI is disabled", async () => { - await withTempConfig(async (config, factory) => { - config.saveProvidersConfig({ openai: { apiKey: "sk-test", enabled: false } }); - - const result = await factory.createImageModel("openai:gpt-image-1.5"); - - expect(result.success).toBe(false); - if (!result.success) { - expect(result.error).toEqual({ type: "provider_disabled", provider: "openai" }); - } - }); - }); - - it("enforces provider and model policy for image models", async () => { - await withTempPolicyProviderFactory( - { - policy_format_version: "0.1", - provider_access: [{ id: "openai", model_access: ["gpt-image-1-mini"] }], - }, - async (config, factory) => { - config.saveProvidersConfig({ openai: { apiKey: "sk-test" } }); - - const denied = await factory.createImageModel("openai:gpt-image-1.5"); - expect(denied.success).toBe(false); - if (!denied.success) { - expect(denied.error.type).toBe("policy_denied"); - } - - const allowed = await factory.createImageModel("openai:gpt-image-1-mini"); - expect(allowed.success).toBe(true); - } - ); - }); -}); - describe("ProviderModelFactory GitHub Copilot", () => { it("creates routed gpt-5.5 models with the chat completions API mode", async () => { await withTempConfig(async (config, factory) => { diff --git a/src/node/services/providerModelFactory.ts b/src/node/services/providerModelFactory.ts index cf3a64c8cb..1f03b22e66 100644 --- a/src/node/services/providerModelFactory.ts +++ b/src/node/services/providerModelFactory.ts @@ -1,9 +1,8 @@ import assert from "node:assert"; -import { Buffer } from "node:buffer"; import { createOpenAICompatible } from "@ai-sdk/openai-compatible"; import type { XaiProviderOptions } from "@ai-sdk/xai"; import { fromNodeProviderChain } from "@aws-sdk/credential-providers"; -import { wrapLanguageModel, type ImageModel, type LanguageModel } from "ai"; +import { wrapLanguageModel, type LanguageModel } from "ai"; import { anthropicSupportsNativeXhigh, type ThinkingLevel } from "@/common/types/thinking"; import { Ok, Err } from "@/common/types/result"; import type { Result } from "@/common/types/result"; @@ -522,189 +521,6 @@ function getProviderFetch(providerConfig: ProviderConfig): typeof fetch { return Object.assign(wrappedFetch, customFetch) as typeof fetch; } -function getFormString(body: BodyInit | null | undefined, name: string): string | null { - if (!(body instanceof FormData)) { - return null; - } - const value = body.get(name); - return typeof value === "string" ? value : null; -} - -function getImageOutputFormat(contentType: string, body: BodyInit | null | undefined): string { - const mediaType = contentType.split(";", 1)[0]?.trim().toLowerCase(); - switch (mediaType) { - case "image/jpeg": - case "image/jpg": - return "jpeg"; - case "image/webp": - return "webp"; - case "image/png": - return "png"; - } - - const requestedFormat = getFormString(body, "output_format"); - if (requestedFormat === "jpeg" || requestedFormat === "png" || requestedFormat === "webp") { - return requestedFormat; - } - return "png"; -} - -function getRequestedImageCount(body: BodyInit | null | undefined): number { - const value = getFormString(body, "n"); - if (value == null || value.trim() === "") { - return 1; - } - const count = Number(value); - return Number.isInteger(count) && count > 0 ? count : 1; -} - -function createOpenAIImageToolErrorResponse(message: string): Response { - return new Response(JSON.stringify({ error: { message, type: "mux_image_response_error" } }), { - status: 502, - statusText: "Bad Gateway", - headers: { "content-type": "application/json" }, - }); -} - -function isOpenAIImageEditEndpoint(input: Parameters[0]): boolean { - const url = getFetchInputUrl(input); - if (!url) { - return false; - } - try { - return new URL(url).pathname.endsWith("/images/edits"); - } catch { - return false; - } -} - -function getOpenAIImageEditFileName(key: string, mediaType: string): string { - const prefix = key === "mask" ? "mask" : "image"; - switch (mediaType.toLowerCase().trim()) { - case "image/jpeg": - case "image/jpg": - return `${prefix}.jpg`; - case "image/webp": - return `${prefix}.webp`; - default: - return `${prefix}.png`; - } -} - -function isOpenAIImageEditUploadKey(key: string): boolean { - return key === "image" || key === "image[]" || key === "mask"; -} - -function hasUploadFileName(value: Blob): boolean { - const name = (value as { name?: unknown }).name; - return typeof name === "string" && name.length > 0; -} - -// OpenAI's `/v1/images/edits` endpoint rejects multipart uploads that lack a -// filename (the AI SDK's `Image`/`Mask` blobs are nameless), so -// `normalizeOpenAIImageEditFormData` rewrites them with a deterministic name. -// This predicate matches the form entries that need that rewrite: an upload -// field carrying a binary blob without a filename already attached. -function isNamelessOpenAIImageEditUpload(key: string, formValue: FormDataEntryValue): boolean { - return ( - isOpenAIImageEditUploadKey(key) && - typeof formValue !== "string" && - !hasUploadFileName(formValue) - ); -} - -function normalizeOpenAIImageEditFormData( - init: Parameters[1] -): Parameters[1] { - if (!(init?.body instanceof FormData)) { - return init; - } - - const entries = Array.from(init.body.entries()); - const hasNamelessUpload = entries.some(([key, formValue]) => - isNamelessOpenAIImageEditUpload(key, formValue) - ); - if (!hasNamelessUpload) { - return init; - } - - const formData = new FormData(); - for (const [key, formValue] of entries) { - if (isNamelessOpenAIImageEditUpload(key, formValue)) { - const upload = formValue as unknown as Blob; - formData.append( - key, - upload, - getOpenAIImageEditFileName(key === "image[]" ? "image" : key, upload.type ?? "image/png") - ); - } else { - formData.append(key, formValue); - } - } - - const headers = init.headers == null ? undefined : new Headers(init.headers); - if (headers?.get("content-type")?.toLowerCase().startsWith("multipart/form-data")) { - headers.delete("content-type"); - } - - return { ...init, body: formData, ...(headers != null ? { headers } : {}) }; -} - -// Some OpenAI-compatible image edit deployments return a successful image/* body -// instead of the JSON shape AI SDK expects. Normalize only that narrow success path. -export function wrapFetchWithOpenAIImageResponseNormalization( - baseFetch: typeof fetch -): typeof fetch { - const wrappedFetch = async ( - input: Parameters[0], - init?: Parameters[1] - ): Promise => { - const isImageEditRequest = isOpenAIImageEditEndpoint(input); - const requestInit = isImageEditRequest ? normalizeOpenAIImageEditFormData(init) : init; - const response = await baseFetch(input, requestInit); - if (!isImageEditRequest || !response.ok) { - return response; - } - - const contentType = response.headers.get("content-type") ?? ""; - if (!contentType.toLowerCase().startsWith("image/")) { - return response; - } - - const requestedCount = getRequestedImageCount(requestInit?.body); - if (requestedCount !== 1) { - return createOpenAIImageToolErrorResponse( - `OpenAI returned one binary image for an edit request that asked for ${requestedCount} images.` - ); - } - - const imageBuffer = await response.arrayBuffer(); - if (imageBuffer.byteLength === 0) { - return createOpenAIImageToolErrorResponse("OpenAI returned an empty binary image response."); - } - - const headers = new Headers(response.headers); - headers.set("content-type", "application/json"); - headers.delete("content-length"); - headers.delete("content-encoding"); - - return new Response( - JSON.stringify({ - created: Math.floor(Date.now() / 1_000), - data: [{ b64_json: Buffer.from(imageBuffer).toString("base64") }], - output_format: getImageOutputFormat(contentType, requestInit?.body), - }), - { - status: response.status, - statusText: response.statusText, - headers, - } - ); - }; - - return Object.assign(wrappedFetch, baseFetch) as typeof fetch; -} - // --------------------------------------------------------------------------- // Exported helpers (re-exported from aiService.ts for backward compatibility) // --------------------------------------------------------------------------- @@ -1134,85 +950,6 @@ export class ProviderModelFactory { return true; } - async createImageModel(modelString: string): Promise> { - try { - const [providerName, modelId] = parseModelString(modelString); - if (!providerName || !modelId) { - return Err({ - type: "invalid_model_string", - message: `Invalid image model string format: "${modelString}". Expected "provider:model-id"`, - }); - } - - if (providerName !== "openai") { - return Err({ - type: "provider_not_supported", - provider: providerName, - }); - } - - if (this.policyService?.isEnforced()) { - if (!this.policyService.isProviderAllowed(providerName)) { - return Err({ - type: "policy_denied", - message: `Provider ${providerName} is not allowed by policy`, - }); - } - - if (!this.policyService.isModelAllowed(providerName, modelId)) { - return Err({ - type: "policy_denied", - message: `Image model ${providerName}:${modelId} is not allowed by policy`, - }); - } - } - - const providersConfig = this.config.loadProvidersConfig() ?? {}; - let providerConfig = providersConfig.openai ?? {}; - if (isProviderDisabledInConfig(providerConfig as { enabled?: unknown })) { - return Err({ type: "provider_disabled", provider: providerName }); - } - - const { baseUrl, ...configWithoutBaseUrl } = providerConfig; - providerConfig = baseUrl - ? { ...configWithoutBaseUrl, baseURL: baseUrl } - : configWithoutBaseUrl; - - const forcedBaseUrl = this.policyService?.isEnforced() - ? this.policyService.getForcedBaseUrl(providerName) - : undefined; - if (forcedBaseUrl) { - providerConfig = { ...providerConfig, baseURL: forcedBaseUrl }; - } - - const creds = resolveProviderCredentials("openai", providerConfig); - const resolvedApiKey = await this.resolveApiKey(creds.apiKey); - if (creds.apiKey && isOpReference(creds.apiKey) && !resolvedApiKey) { - return Err({ type: "api_key_not_found", provider: providerName }); - } - if (!creds.isConfigured) { - return Err({ type: "api_key_not_found", provider: providerName }); - } - - const configWithCreds = { - ...providerConfig, - apiKey: resolvedApiKey, - ...(creds.baseUrl && !providerConfig.baseURL ? { baseURL: creds.baseUrl } : {}), - ...(creds.organization ? { organization: creds.organization } : {}), - headers: buildAppAttributionHeaders(providerConfig.headers), - }; - - const { createOpenAI } = await PROVIDER_REGISTRY.openai(); - const provider = createOpenAI({ - ...configWithCreds, - fetch: wrapFetchWithOpenAIImageResponseNormalization(getProviderFetch(providerConfig)), - }); - return Ok(provider.image(modelId)); - } catch (error) { - return Err({ type: "unknown", raw: getErrorMessage(error) }); - } - } - /** * Create an AI SDK model from a model string (e.g., "anthropic:claude-opus-4-1") * diff --git a/src/node/services/streamContextBuilder.test.ts b/src/node/services/streamContextBuilder.test.ts index cff2d10538..57cb5d0415 100644 --- a/src/node/services/streamContextBuilder.test.ts +++ b/src/node/services/streamContextBuilder.test.ts @@ -81,7 +81,6 @@ async function buildSystemContextForTest(args: { isSubagentWorkspace: boolean; effectiveAdditionalInstructions?: string; planFilePath?: string; - imageGenerationToolAvailable?: boolean; }) { return buildStreamSystemContext({ runtime: args.runtime, @@ -97,7 +96,6 @@ async function buildSystemContextForTest(args: { cfg: args.cfg, providersConfig: null, mcpServers: {}, - imageGenerationToolAvailable: args.imageGenerationToolAvailable, }); } @@ -238,98 +236,6 @@ describe("buildPlanInstructions", () => { }); describe("buildStreamSystemContext", () => { - test("omits built-in imagegen skill when image generation tool is unavailable", async () => { - using tempRoot = new DisposableTempDir("stream-system-context"); - - const projectPath = path.join(tempRoot.path, "project"); - const muxHome = path.join(tempRoot.path, "mux-home"); - await fs.mkdir(projectPath, { recursive: true }); - await fs.mkdir(muxHome, { recursive: true }); - - const metadata = createWorkspaceMetadata({ - id: "top-level-ws", - name: "top-level-workspace", - projectName: "project", - projectPath, - }); - const cfg = createProjectsConfig({ - projectPath, - workspaces: [{ id: metadata.id, name: metadata.name }], - }); - - const result = await buildSystemContextForTest({ - runtime: new TestRuntime(projectPath, muxHome), - metadata, - workspacePath: projectPath, - cfg, - isSubagentWorkspace: false, - imageGenerationToolAvailable: false, - }); - - expect(result.availableSkills?.some((skill) => skill.name === "imagegen")).toBe(false); - }); - - test("omits built-in imagegen skill by default", async () => { - using tempRoot = new DisposableTempDir("stream-system-context"); - - const projectPath = path.join(tempRoot.path, "project"); - const muxHome = path.join(tempRoot.path, "mux-home"); - await fs.mkdir(projectPath, { recursive: true }); - await fs.mkdir(muxHome, { recursive: true }); - - const metadata = createWorkspaceMetadata({ - id: "top-level-ws", - name: "top-level-workspace", - projectName: "project", - projectPath, - }); - const cfg = createProjectsConfig({ - projectPath, - workspaces: [{ id: metadata.id, name: metadata.name }], - }); - - const result = await buildSystemContextForTest({ - runtime: new TestRuntime(projectPath, muxHome), - metadata, - workspacePath: projectPath, - cfg, - isSubagentWorkspace: false, - }); - - expect(result.availableSkills?.some((skill) => skill.name === "imagegen")).toBe(false); - }); - - test("includes built-in imagegen skill when image generation tool is available", async () => { - using tempRoot = new DisposableTempDir("stream-system-context"); - - const projectPath = path.join(tempRoot.path, "project"); - const muxHome = path.join(tempRoot.path, "mux-home"); - await fs.mkdir(projectPath, { recursive: true }); - await fs.mkdir(muxHome, { recursive: true }); - - const metadata = createWorkspaceMetadata({ - id: "top-level-ws", - name: "top-level-workspace", - projectName: "project", - projectPath, - }); - const cfg = createProjectsConfig({ - projectPath, - workspaces: [{ id: metadata.id, name: metadata.name }], - }); - - const result = await buildSystemContextForTest({ - runtime: new TestRuntime(projectPath, muxHome), - metadata, - workspacePath: projectPath, - cfg, - isSubagentWorkspace: false, - imageGenerationToolAvailable: true, - }); - - expect(result.availableSkills?.some((skill) => skill.name === "imagegen")).toBe(true); - }); - test("includes the direct parent plan path ahead of caller instructions", async () => { using tempRoot = new DisposableTempDir("stream-system-context"); diff --git a/src/node/services/streamContextBuilder.ts b/src/node/services/streamContextBuilder.ts index 2eeb3e1269..154b087e13 100644 --- a/src/node/services/streamContextBuilder.ts +++ b/src/node/services/streamContextBuilder.ts @@ -41,10 +41,7 @@ import { } from "@/node/services/agentDefinitions/agentDefinitionsService"; import { isAgentEffectivelyDisabled } from "@/node/services/agentDefinitions/agentEnablement"; import { resolveAgentInheritanceChain } from "@/node/services/agentDefinitions/resolveAgentInheritanceChain"; -import { - discoverAgentSkills, - filterUnavailableImagegenSkills, -} from "@/node/services/agentSkills/agentSkillsService"; +import { discoverAgentSkills } from "@/node/services/agentSkills/agentSkillsService"; import { resolveSkillStorageContext } from "@/node/services/agentSkills/skillStorageContext"; import { buildSystemMessage } from "./systemMessage"; import { getTokenizerForModel } from "@/node/utils/main/tokenizer"; @@ -246,8 +243,6 @@ export interface BuildStreamSystemContextOptions { loadDesktopCapability?: () => Promise; /** Whether the advisor tool is available for the current agent */ advisorToolAvailable?: boolean; - /** Whether the image_generate tool is available as a direct tool for the current agent. */ - imageGenerationToolAvailable?: boolean; } /** Result of system context assembly. */ @@ -471,7 +466,6 @@ export async function buildStreamSystemContext( muxScope, loadDesktopCapability, advisorToolAvailable, - imageGenerationToolAvailable, } = opts; const workspaceLog = log.withFields({ workspaceId, workspaceName: metadata.name }); @@ -543,13 +537,6 @@ export async function buildStreamSystemContext( workspaceLog.warn("Failed to discover agent skills for tool description", { error }); } - if (availableSkills) { - availableSkills = filterUnavailableImagegenSkills( - availableSkills, - imageGenerationToolAvailable - ); - } - const ancestorPlanContext = resolveAncestorPlanContext({ metadata, workspaceId, diff --git a/src/node/services/taskService.ts b/src/node/services/taskService.ts index 8ee623fa13..e240da89c0 100644 --- a/src/node/services/taskService.ts +++ b/src/node/services/taskService.ts @@ -138,7 +138,6 @@ export interface TaskCreateArgs { programmaticToolCalling?: boolean; programmaticToolCallingExclusive?: boolean; advisorTool?: boolean; - imageGenerationTool?: boolean; execSubagentHardRestart?: boolean; }; } diff --git a/src/node/services/tools/agent_skill_read.test.ts b/src/node/services/tools/agent_skill_read.test.ts index e102800196..aa046c1bc2 100644 --- a/src/node/services/tools/agent_skill_read.test.ts +++ b/src/node/services/tools/agent_skill_read.test.ts @@ -148,30 +148,6 @@ describe("agent_skill_read", () => { } }); - it("blocks the built-in imagegen skill when the image generation tool is unavailable", async () => { - using tempDir = new TestTempDir("test-agent-skill-read-imagegen-disabled"); - const baseConfig = createTestToolConfig(tempDir.path, { - workspaceId: GLOBAL_WORKSPACE_ID, - }); - - const tool = createAgentSkillReadTool(baseConfig); - - const raw: unknown = await Promise.resolve( - tool.execute!({ name: "imagegen" }, mockToolCallOptions) - ); - - const parsed = AgentSkillReadToolResultSchema.safeParse(raw); - expect(parsed.success).toBe(true); - if (!parsed.success) { - throw new Error(parsed.error.message); - } - - expect(parsed.data.success).toBe(false); - if (!parsed.data.success) { - expect(parsed.data.error).toContain("Image Tools experiment"); - } - }); - it("allows reading global skills on disk in global-scope workspace", async () => { using tempDir = new TestTempDir("test-agent-skill-read-global"); const previousMuxRoot = process.env.MUX_ROOT; diff --git a/src/node/services/tools/agent_skill_read.ts b/src/node/services/tools/agent_skill_read.ts index 324d852cc9..8ddeabf707 100644 --- a/src/node/services/tools/agent_skill_read.ts +++ b/src/node/services/tools/agent_skill_read.ts @@ -5,12 +5,7 @@ import type { ToolConfiguration, ToolFactory } from "@/common/utils/tools/tools" import { TOOL_DEFINITIONS } from "@/common/utils/tools/toolDefinitions"; import { SkillNameSchema } from "@/common/orpc/schemas"; import { getErrorMessage } from "@/common/utils/errors"; -import { - filterUnavailableImagegenSkills, - IMAGEGEN_SKILL_DISABLED_MESSAGE, - isBuiltInImagegenSkillUnavailable, - readAgentSkill, -} from "@/node/services/agentSkills/agentSkillsService"; +import { readAgentSkill } from "@/node/services/agentSkills/agentSkillsService"; import { resolveSkillStorageContext } from "@/node/services/agentSkills/skillStorageContext"; /** @@ -22,10 +17,7 @@ function buildSkillReadDescription(config: ToolConfiguration): string { const baseDescription = TOOL_DEFINITIONS.agent_skill_read.description; // Filter out unadvertised skills from the tool description. // Unadvertised skills can still be invoked via /skill-name or agent_skill_read. - const skills = filterUnavailableImagegenSkills( - config.availableSkills ?? [], - config.imageGenerationRuntime != null - ).filter((s) => s.advertise !== false); + const skills = (config.availableSkills ?? []).filter((s) => s.advertise !== false); if (skills.length === 0) { return baseDescription; @@ -88,15 +80,6 @@ export const createAgentSkillReadTool: ToolFactory = (config: ToolConfiguration) containment: skillCtx.containment, } ); - if ( - isBuiltInImagegenSkillUnavailable(resolved.package, config.imageGenerationRuntime != null) - ) { - return { - success: false, - error: IMAGEGEN_SKILL_DISABLED_MESSAGE, - }; - } - return { success: true, skill: resolved.package, diff --git a/src/node/services/tools/agent_skill_read_file.test.ts b/src/node/services/tools/agent_skill_read_file.test.ts index 8e11abcf67..682b76551a 100644 --- a/src/node/services/tools/agent_skill_read_file.test.ts +++ b/src/node/services/tools/agent_skill_read_file.test.ts @@ -72,26 +72,6 @@ describe("agent_skill_read_file", () => { } }); - it("blocks built-in imagegen skill files when the image generation tool is unavailable", async () => { - using tempDir = new TestTempDir("test-agent-skill-read-file-imagegen-disabled"); - const baseConfig = createTestToolConfig(tempDir.path, { - workspaceId: GLOBAL_WORKSPACE_ID, - }); - - const tool = createAgentSkillReadFileTool(baseConfig); - - const result = await executeReadFile(tool, { - name: "imagegen", - filePath: "SKILL.md", - offset: 1, - limit: 25, - }); - expect(result.success).toBe(false); - if (!result.success) { - expect(result.error).toContain("Image Tools experiment"); - } - }); - it("allows reading global skill files on disk in global-scope workspace", async () => { using tempDir = new TestTempDir("test-agent-skill-read-file-global"); const previousMuxRoot = process.env.MUX_ROOT; diff --git a/src/node/services/tools/agent_skill_read_file.ts b/src/node/services/tools/agent_skill_read_file.ts index 422039e435..35a19146b6 100644 --- a/src/node/services/tools/agent_skill_read_file.ts +++ b/src/node/services/tools/agent_skill_read_file.ts @@ -5,11 +5,7 @@ import type { ToolConfiguration, ToolFactory } from "@/common/utils/tools/tools" import { TOOL_DEFINITIONS } from "@/common/utils/tools/toolDefinitions"; import { getErrorMessage } from "@/common/utils/errors"; import { SkillNameSchema } from "@/common/orpc/schemas"; -import { - IMAGEGEN_SKILL_DISABLED_MESSAGE, - isBuiltInImagegenSkillUnavailable, - readAgentSkill, -} from "@/node/services/agentSkills/agentSkillsService"; +import { readAgentSkill } from "@/node/services/agentSkills/agentSkillsService"; import { resolveSkillStorageContext } from "@/node/services/agentSkills/skillStorageContext"; import { MAX_FILE_SIZE, validateFileSize } from "@/node/services/tools/fileCommon"; import { readBuiltInSkillFile } from "@/node/services/agentSkills/builtInSkillDefinitions"; @@ -145,18 +141,6 @@ export const createAgentSkillReadFileTool: ToolFactory = (config: ToolConfigurat } ); - if ( - isBuiltInImagegenSkillUnavailable( - resolvedSkill.package, - config.imageGenerationRuntime != null - ) - ) { - return { - success: false, - error: IMAGEGEN_SKILL_DISABLED_MESSAGE, - }; - } - // Built-in skills are embedded in the app bundle (no filesystem access). if (resolvedSkill.package.scope === "built-in") { const builtIn = readBuiltInSkillFile(parsedName.data, filePath); diff --git a/src/node/services/tools/imageArtifacts.ts b/src/node/services/tools/imageArtifacts.ts deleted file mode 100644 index e712091f83..0000000000 --- a/src/node/services/tools/imageArtifacts.ts +++ /dev/null @@ -1,433 +0,0 @@ -import path from "node:path"; -import assert from "node:assert/strict"; -import sharp from "sharp"; -import type { LanguageModelV2Usage } from "@ai-sdk/provider"; - -import type { ToolConfiguration } from "@/common/utils/tools/tools"; -import { DEFAULT_IMAGE_GENERATION_MODEL } from "@/common/types/imageGeneration"; -import { getErrorMessage } from "@/common/utils/errors"; -import { sanitizeErrorMessageForDisplay } from "@/common/utils/providerOutputSanitization"; -import { shellQuote } from "@/common/utils/shell"; -import { log } from "@/node/services/log"; -import { getRasterImageDimensionsFromMetadata } from "@/node/utils/attachments/resizeRasterImageAttachment"; - -const THUMBNAIL_MAX_DIMENSION = 512; -const THUMBNAIL_QUALITY = 75; -const THUMBNAIL_MEDIA_TYPE = "image/webp"; - -// Generic "go look at your OpenAI account" advice used as a fallback setup hint -// whenever an image tool surfaces an opaque provider failure (unknown -// `formatImageModelError` shapes, or the post-`generateImage` catch in -// `image_generate` / `image_edit`). Kept as a single source of truth so the -// guidance stays consistent across every image-tool error path. -export const IMAGE_TOOL_PROVIDER_SETUP_HINT = - "Check OpenAI provider credentials, billing, rate limits, and content policy."; - -type ImageModelOperation = "generation" | "editing"; -type ImageToolName = "image_generate" | "image_edit"; - -/** - * Convert image-model setup failures from provider/model policy into tool errors. - * Expected inputs are Mux error records like `{ type: string; message?: string; raw?: string }`. - */ -export function formatImageModelError( - error: unknown, - operation: ImageModelOperation -): { error: string; setupHint?: string } { - if (typeof error !== "object" || error === null) { - return { error: getErrorMessage(error) }; - } - - const record = error as Record; - switch (record.type) { - case "api_key_not_found": - return { - error: `Image ${operation} requires an OpenAI API key.`, - setupHint: - "Configure an OpenAI API key in Settings > Providers or set OPENAI_API_KEY; Codex OAuth does not currently provide image credentials.", - }; - case "provider_disabled": - return { - error: "The OpenAI provider is disabled.", - setupHint: `Enable OpenAI in Settings > Providers to use image ${operation}.`, - }; - case "provider_not_supported": - return { - error: `Image ${operation} v1 only supports OpenAI image models.`, - setupHint: `Choose ${DEFAULT_IMAGE_GENERATION_MODEL} in Settings > Experiments > Image Tools.`, - }; - case "invalid_model_string": - return { - error: typeof record.message === "string" ? record.message : "Invalid image model string.", - setupHint: `Use the provider:model-id format, for example ${DEFAULT_IMAGE_GENERATION_MODEL}.`, - }; - case "policy_denied": - return { - error: - typeof record.message === "string" - ? record.message - : `Image ${operation} is denied by policy.`, - }; - case "unknown": - return { - error: sanitizeErrorMessageForDisplay( - typeof record.raw === "string" ? record.raw : getErrorMessage(error) - ), - setupHint: IMAGE_TOOL_PROVIDER_SETUP_HINT, - }; - default: - return { - error: sanitizeErrorMessageForDisplay(getErrorMessage(error)), - setupHint: IMAGE_TOOL_PROVIDER_SETUP_HINT, - }; - } -} - -export function buildOpenAIImageProviderOptions( - quality: string | null | undefined, - outputFormat: string | null | undefined -): { openai: { quality?: string; outputFormat: string } } { - return { - openai: { - ...(quality != null ? { quality } : {}), - outputFormat: outputFormat ?? "png", - }, - }; -} - -export interface ImageDimensions { - width: number; - height: number; -} - -function sanitizePathSegment(value: string, fallback: string): string { - const sanitized = value.replace(/[^a-zA-Z0-9_-]/g, "_"); - return sanitized.length > 0 ? sanitized : fallback; -} - -export function getExtension(mediaType: string, outputFormat: string | null | undefined): string { - switch (mediaType.toLowerCase().trim()) { - case "image/jpeg": - return "jpg"; - case "image/webp": - return "webp"; - case "image/png": - return "png"; - } - - if (outputFormat === "jpeg" || outputFormat === "png" || outputFormat === "webp") { - return outputFormat === "jpeg" ? "jpg" : outputFormat; - } - - return "png"; -} - -export async function writeRuntimeFile( - config: ToolConfiguration, - filePath: string, - data: Uint8Array, - logName: string -): Promise { - assert(data.length > 0, "image artifact data must not be empty"); - const writer = config.runtime.writeFile(filePath).getWriter(); - try { - await writer.write(data); - await writer.close(); - } catch (error) { - try { - await writer.abort(error); - } catch (abortError) { - log.debug(`${logName}: failed to abort artifact write`, { - error: getErrorMessage(abortError), - }); - } - throw error; - } -} - -export async function createThumbnail(data: Uint8Array): Promise<{ - data: string; - mediaType: string; - width: number; - height: number; -}> { - const resized = sharp(Buffer.from(data)).resize({ - width: THUMBNAIL_MAX_DIMENSION, - height: THUMBNAIL_MAX_DIMENSION, - fit: "inside", - withoutEnlargement: true, - }); - const buffer = await resized.webp({ quality: THUMBNAIL_QUALITY }).toBuffer(); - const metadata = await sharp(buffer).metadata(); - const width = metadata.width; - const height = metadata.height; - assert(width != null && width > 0, "thumbnail width must be positive"); - assert(height != null && height > 0, "thumbnail height must be positive"); - return { - data: buffer.toString("base64"), - mediaType: THUMBNAIL_MEDIA_TYPE, - width, - height, - }; -} - -export const getImageDimensionsFromMetadata = getRasterImageDimensionsFromMetadata; - -export async function getImageDimensions(data: Uint8Array): Promise { - const dimensions = getImageDimensionsFromMetadata(await sharp(Buffer.from(data)).metadata()); - assert(dimensions != null, "image dimensions must be readable"); - return dimensions; -} - -// OpenAI returns per-image diagnostics (revised prompts, token counts) under -// `providerMetadata.openai.images`. Both revised-prompt and usage extraction need -// the same narrowed array, so the walk lives in one helper. -function getOpenAIImageInfos(providerMetadata: unknown): unknown[] | undefined { - if (typeof providerMetadata !== "object" || providerMetadata === null) { - return undefined; - } - const openai = (providerMetadata as { openai?: unknown }).openai; - if (typeof openai !== "object" || openai === null) { - return undefined; - } - const images = (openai as { images?: unknown }).images; - return Array.isArray(images) ? images : undefined; -} - -export function getRevisedPrompt(providerMetadata: unknown, index: number): string | undefined { - const images = getOpenAIImageInfos(providerMetadata); - if (!images) { - return undefined; - } - const image: unknown = images[index]; - if (typeof image !== "object" || image === null) { - return undefined; - } - const revisedPrompt = (image as { revisedPrompt?: unknown }).revisedPrompt; - return typeof revisedPrompt === "string" && revisedPrompt.trim().length > 0 - ? revisedPrompt - : undefined; -} - -function numberOrZero(value: unknown): number { - return typeof value === "number" && Number.isFinite(value) ? value : 0; -} - -function getOpenAIImageTokenUsage(providerMetadata: unknown): LanguageModelV2Usage | undefined { - const images = getOpenAIImageInfos(providerMetadata); - if (!images) { - return undefined; - } - - let inputTokens = 0; - let outputTokens = 0; - for (const image of images) { - if (typeof image !== "object" || image === null) { - continue; - } - inputTokens += numberOrZero((image as { textTokens?: unknown }).textTokens); - outputTokens += numberOrZero((image as { imageTokens?: unknown }).imageTokens); - } - - if (inputTokens === 0 && outputTokens === 0) { - return undefined; - } - - return { - inputTokens, - outputTokens, - totalTokens: inputTokens + outputTokens, - }; -} - -export function getLanguageModelUsageForImageResult( - usage: unknown, - providerMetadata: unknown -): LanguageModelV2Usage | undefined { - if (usage != null && typeof usage === "object") { - const candidate = usage as { - inputTokens?: unknown; - outputTokens?: unknown; - totalTokens?: unknown; - cachedInputTokens?: unknown; - reasoningTokens?: unknown; - }; - const hasTokenUsage = - typeof candidate.inputTokens === "number" || - typeof candidate.outputTokens === "number" || - typeof candidate.totalTokens === "number"; - if (hasTokenUsage) { - return { - inputTokens: typeof candidate.inputTokens === "number" ? candidate.inputTokens : undefined, - outputTokens: - typeof candidate.outputTokens === "number" ? candidate.outputTokens : undefined, - totalTokens: typeof candidate.totalTokens === "number" ? candidate.totalTokens : undefined, - cachedInputTokens: - typeof candidate.cachedInputTokens === "number" ? candidate.cachedInputTokens : undefined, - reasoningTokens: - typeof candidate.reasoningTokens === "number" ? candidate.reasoningTokens : undefined, - }; - } - } - - return getOpenAIImageTokenUsage(providerMetadata); -} - -export function reportImageToolUsage( - config: ToolConfiguration, - toolName: ImageToolName, - modelString: string, - usage: unknown, - providerMetadata: unknown, - toolCallId?: string -): void { - const usageForModelAccounting = getLanguageModelUsageForImageResult(usage, providerMetadata); - if (config.reportModelUsage == null || usageForModelAccounting == null) { - return; - } - - try { - config.reportModelUsage({ - source: "tool", - toolName, - model: modelString, - usage: usageForModelAccounting, - providerMetadata: providerMetadata as Record | undefined, - toolCallId, - timestamp: Date.now(), - }); - } catch (error) { - log.debug(`${toolName}: failed to report model usage`, { - error: getErrorMessage(error), - }); - } -} - -interface ProviderImageArtifact { - mediaType?: string | undefined; - uint8Array: Uint8Array; -} - -interface ProcessImageArtifactsOptions { - config: ToolConfiguration; - outputDir: string; - toolName: ImageToolName; - outputFormat?: string | null; - providerMetadata: unknown; - images: Iterable; - getExtraFields: ( - bytes: Uint8Array, - index: number, - filename: string - ) => Promise<{ success: true; fields: Extra } | { success: false; error: string }>; -} - -export interface ImageToolArtifactBase { - path: string; - filename: string; - mediaType: string; - thumbnail?: { - data: string; - mediaType: string; - width: number; - height: number; - }; - revisedPrompt?: string; -} - -async function cleanupWrittenArtifacts( - config: ToolConfiguration, - toolName: ImageToolName, - paths: readonly string[] -): Promise { - if (paths.length === 0) { - return; - } - - try { - const cleanup = await config.runtime.exec(`rm -f -- ${paths.map(shellQuote).join(" ")}`, { - cwd: config.cwd, - timeout: 5, - }); - const exitCode = await cleanup.exitCode; - if (exitCode !== 0) { - log.debug(`${toolName}: partial image artifact cleanup exited non-zero`, { exitCode }); - } - } catch (error) { - log.debug(`${toolName}: failed to clean up partial image artifacts`, { - error: getErrorMessage(error), - }); - } -} - -export async function processImageArtifacts>( - options: ProcessImageArtifactsOptions -): Promise< - | { success: true; images: Array; warnings: string[] } - | { success: false; error: string } -> { - const images: Array = []; - const warnings: string[] = []; - const writtenPaths: string[] = []; - - try { - for (const [index, image] of Array.from(options.images).entries()) { - const mediaType = image.mediaType ?? `image/${options.outputFormat ?? "png"}`; - const extension = getExtension(mediaType, options.outputFormat); - const filename = `image-${index + 1}.${extension}`; - const artifactPath = options.config.runtime.normalizePath(filename, options.outputDir); - const bytes = image.uint8Array; - - const extraResult = await options.getExtraFields(bytes, index, filename); - if (!extraResult.success) { - await cleanupWrittenArtifacts(options.config, options.toolName, writtenPaths); - return extraResult; - } - - await writeRuntimeFile(options.config, artifactPath, bytes, options.toolName); - writtenPaths.push(artifactPath); - - let thumbnail; - try { - thumbnail = await createThumbnail(bytes); - } catch (error) { - warnings.push(`Thumbnail generation failed for ${filename}: ${getErrorMessage(error)}`); - } - - const revisedPrompt = getRevisedPrompt(options.providerMetadata, index); - images.push({ - path: artifactPath, - filename, - mediaType, - ...extraResult.fields, - ...(thumbnail ? { thumbnail } : {}), - ...(revisedPrompt ? { revisedPrompt } : {}), - }); - } - } catch (error) { - await cleanupWrittenArtifacts(options.config, options.toolName, writtenPaths); - throw error; - } - - return { success: true, images, warnings }; -} - -export async function getImageOutputDir( - config: ToolConfiguration, - artifactRoot: "generated_images" | "edited_images", - fallbackToolName: string, - toolCallId?: string -): Promise { - const muxHome = await config.runtime.resolvePath(config.runtime.getMuxHome()); - const workspaceSegment = sanitizePathSegment(config.workspaceId ?? "workspace", "workspace"); - const callSegment = sanitizePathSegment( - toolCallId ?? `${fallbackToolName}_${Date.now()}`, - fallbackToolName - ); - const outputDir = config.runtime.normalizePath( - path.posix.join(artifactRoot, workspaceSegment, callSegment), - muxHome - ); - await config.runtime.ensureDir(outputDir); - return outputDir; -} diff --git a/src/node/services/tools/image_edit.test.ts b/src/node/services/tools/image_edit.test.ts deleted file mode 100644 index e64b191cb5..0000000000 --- a/src/node/services/tools/image_edit.test.ts +++ /dev/null @@ -1,881 +0,0 @@ -import * as fs from "fs/promises"; -import * as path from "path"; - -import { describe, expect, test } from "bun:test"; -import type { ToolExecutionOptions } from "ai"; -import type { ImageModelV2 } from "@ai-sdk/provider"; - -import type { ImageEditToolResult } from "@/common/types/tools"; -import { Err, Ok } from "@/common/types/result"; -import { LocalRuntime } from "@/node/runtime/LocalRuntime"; -import { createImageEditTool } from "./image_edit"; -import { TestTempDir, createTestToolConfig } from "./testHelpers"; - -const testPngBase64 = - "iVBORw0KGgoAAAANSUhEUgAAABAAAAAQCAYAAAAf8/9hAAAACXBIWXMAAAPoAAAD6AG1e1JrAAAAHUlEQVR4nGNgYPj/nzLMMGoAw2gYMIyGwf9hEAYAMqb+ENPK2kcAAAAASUVORK5CYII="; -const sharpInvalidPngBase64 = - "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mP8z8BQDwAFgwJ/lKrL7wAAAABJRU5ErkJggg=="; -const testGifBytes = Buffer.from("R0lGODlhAQABAIAAAAAAAP///ywAAAAAAQABAAACAUwAOw==", "base64"); -const testPngBytes = Buffer.from(testPngBase64, "base64"); - -function createMockImageModel(doGenerate: ImageModelV2["doGenerate"]): ImageModelV2 { - return { - specificationVersion: "v2", - provider: "test", - modelId: "test-image-model", - maxImagesPerCall: 10, - doGenerate, - }; -} - -class ImageEditTestRuntime extends LocalRuntime { - constructor( - projectPath: string, - private readonly muxHome: string - ) { - super(projectPath); - } - - override getMuxHome(): string { - return this.muxHome; - } -} - -function createImageEditTestConfig( - workspacePath: string, - options?: Parameters[1] -) { - return createTestToolConfig(workspacePath, { - ...options, - runtime: options?.runtime ?? new ImageEditTestRuntime(workspacePath, workspacePath), - }); -} - -let nextToolCallId = 0; -function createMockToolCallOptions(): ToolExecutionOptions { - nextToolCallId += 1; - return { - toolCallId: `image-edit-call-${nextToolCallId}`, - messages: [], - }; -} - -describe("image_edit tool", () => { - test("rejects blank prompts before reading source files or creating an image model", async () => { - using workspaceDir = new TestTempDir("image-edit-workspace"); - let createImageModelCalled = false; - const tool = createImageEditTool({ - ...createImageEditTestConfig(workspaceDir.path), - imageEditingEnabled: true, - imageGenerationRuntime: { - modelString: "openai:gpt-image-1.5", - maxImagesPerCall: 2, - createImageModel: () => { - createImageModelCalled = true; - return Promise.reject(new Error("should not create a model for blank prompts")); - }, - }, - }); - - const result = (await tool.execute!( - { sourcePath: "missing.png", prompt: " " }, - createMockToolCallOptions() - )) as ImageEditToolResult; - - expect(result.success).toBe(false); - if (result.success) { - throw new Error("Expected blank prompt to fail"); - } - expect(result.error).toContain("prompt is required"); - expect(createImageModelCalled).toBe(false); - }); - - test("rejects requests above the configured maximum before reading source files", async () => { - using workspaceDir = new TestTempDir("image-edit-workspace"); - let createImageModelCalled = false; - const tool = createImageEditTool({ - ...createImageEditTestConfig(workspaceDir.path), - imageEditingEnabled: true, - imageGenerationRuntime: { - modelString: "openai:gpt-image-1.5", - maxImagesPerCall: 2, - createImageModel: () => { - createImageModelCalled = true; - return Promise.reject(new Error("should not create a model when count exceeds limit")); - }, - }, - }); - - const result = (await tool.execute!( - { sourcePath: "missing.png", prompt: "Make variants", n: 3 }, - createMockToolCallOptions() - )) as ImageEditToolResult; - - expect(result.success).toBe(false); - if (result.success) { - throw new Error("Expected image_edit to fail when n exceeds configured maximum"); - } - expect(result.error).toContain("configured for a maximum of 2"); - expect(createImageModelCalled).toBe(false); - }); - - test("rejects source directories before provider calls", async () => { - using workspaceDir = new TestTempDir("image-edit-workspace"); - const sourcePath = path.join(workspaceDir.path, "source-dir"); - await fs.mkdir(sourcePath); - let createImageModelCalled = false; - const tool = createImageEditTool({ - ...createImageEditTestConfig(workspaceDir.path), - imageEditingEnabled: true, - imageGenerationRuntime: { - modelString: "openai:gpt-image-1.5", - maxImagesPerCall: 2, - createImageModel: () => { - createImageModelCalled = true; - return Promise.reject(new Error("should not create a model for directories")); - }, - }, - }); - - const result = (await tool.execute!( - { sourcePath, prompt: "Make it blue" }, - createMockToolCallOptions() - )) as ImageEditToolResult; - - expect(result.success).toBe(false); - if (result.success) { - throw new Error("Expected directory source to fail"); - } - expect(result.error).toContain("Source image is a directory"); - expect(createImageModelCalled).toBe(false); - }); - - test("rejects oversized sources before reading file content", async () => { - using workspaceDir = new TestTempDir("image-edit-workspace"); - const sourcePath = path.join(workspaceDir.path, "source.png"); - await fs.writeFile(sourcePath, testPngBytes); - - class OversizedStatRuntime extends LocalRuntime { - override async stat(filePath: string, abortSignal?: AbortSignal) { - const stat = await super.stat(filePath, abortSignal); - return filePath === sourcePath ? { ...stat, size: 51 * 1024 * 1024 } : stat; - } - } - - let createImageModelCalled = false; - const tool = createImageEditTool({ - ...createImageEditTestConfig(workspaceDir.path, { - runtime: new OversizedStatRuntime(workspaceDir.path), - }), - imageEditingEnabled: true, - imageGenerationRuntime: { - modelString: "openai:gpt-image-1.5", - maxImagesPerCall: 2, - createImageModel: () => { - createImageModelCalled = true; - return Promise.reject(new Error("should not create a model for oversized sources")); - }, - }, - }); - - const result = (await tool.execute!( - { sourcePath, prompt: "Make it blue" }, - createMockToolCallOptions() - )) as ImageEditToolResult; - - expect(result.success).toBe(false); - if (result.success) { - throw new Error("Expected oversized source to fail"); - } - expect(result.error).toContain("exceeds the 50 MB"); - expect(createImageModelCalled).toBe(false); - }); - - test("returns a read error for missing source files before provider calls", async () => { - using workspaceDir = new TestTempDir("image-edit-workspace"); - const sourcePath = path.join(workspaceDir.path, "missing.png"); - let createImageModelCalled = false; - const tool = createImageEditTool({ - ...createImageEditTestConfig(workspaceDir.path), - imageEditingEnabled: true, - imageGenerationRuntime: { - modelString: "openai:gpt-image-1.5", - maxImagesPerCall: 2, - createImageModel: () => { - createImageModelCalled = true; - return Promise.reject(new Error("should not create a model for missing sources")); - }, - }, - }); - - const result = (await tool.execute!( - { sourcePath, prompt: "Make it blue" }, - createMockToolCallOptions() - )) as ImageEditToolResult; - - expect(result.success).toBe(false); - if (result.success) { - throw new Error("Expected missing source to fail"); - } - expect(result.error).toContain("Failed to read source image"); - expect(createImageModelCalled).toBe(false); - }); - - test("rejects decodable but unsupported image formats before provider calls", async () => { - using workspaceDir = new TestTempDir("image-edit-workspace"); - const sourcePath = path.join(workspaceDir.path, "unsupported.gif"); - await fs.writeFile(sourcePath, testGifBytes); - let createImageModelCalled = false; - const tool = createImageEditTool({ - ...createImageEditTestConfig(workspaceDir.path), - imageEditingEnabled: true, - imageGenerationRuntime: { - modelString: "openai:gpt-image-1.5", - maxImagesPerCall: 2, - createImageModel: () => { - createImageModelCalled = true; - return Promise.reject(new Error("should not create a model for unsupported formats")); - }, - }, - }); - - const result = (await tool.execute!( - { sourcePath, prompt: "Make it blue" }, - createMockToolCallOptions() - )) as ImageEditToolResult; - - expect(result.success).toBe(false); - if (result.success) { - throw new Error("Expected unsupported source image to fail"); - } - expect(result.error).toContain("PNG, JPEG, or WebP"); - expect(createImageModelCalled).toBe(false); - }); - - test("rejects files that are not decodable supported images before provider calls", async () => { - using workspaceDir = new TestTempDir("image-edit-workspace"); - const sourcePath = path.join(workspaceDir.path, "not-an-image.png"); - await fs.writeFile(sourcePath, "this is not image data"); - let createImageModelCalled = false; - const tool = createImageEditTool({ - ...createImageEditTestConfig(workspaceDir.path), - imageEditingEnabled: true, - imageGenerationRuntime: { - modelString: "openai:gpt-image-1.5", - maxImagesPerCall: 2, - createImageModel: () => { - createImageModelCalled = true; - return Promise.reject(new Error("should not create a model for invalid source images")); - }, - }, - }); - - const result = (await tool.execute!( - { sourcePath, prompt: "Make the square blue" }, - createMockToolCallOptions() - )) as ImageEditToolResult; - - expect(result.success).toBe(false); - if (result.success) { - throw new Error("Expected invalid source image to fail"); - } - expect(result.error).toContain("Source image"); - expect(createImageModelCalled).toBe(false); - }); - - test("returns a user-facing error when source size changes during read", async () => { - using workspaceDir = new TestTempDir("image-edit-workspace"); - const sourcePath = path.join(workspaceDir.path, "source.png"); - await fs.writeFile(sourcePath, testPngBytes); - - class StaleStatRuntime extends LocalRuntime { - override async stat(filePath: string, abortSignal?: AbortSignal) { - const stat = await super.stat(filePath, abortSignal); - return filePath === sourcePath ? { ...stat, size: stat.size + 1 } : stat; - } - } - - let createImageModelCalled = false; - const tool = createImageEditTool({ - ...createImageEditTestConfig(workspaceDir.path, { - runtime: new StaleStatRuntime(workspaceDir.path), - }), - imageEditingEnabled: true, - imageGenerationRuntime: { - modelString: "openai:gpt-image-1.5", - maxImagesPerCall: 2, - createImageModel: () => { - createImageModelCalled = true; - return Promise.reject(new Error("should not create a model for stale source reads")); - }, - }, - }); - - const result = (await tool.execute!( - { sourcePath, prompt: "Make the square blue" }, - createMockToolCallOptions() - )) as ImageEditToolResult; - - expect(result.success).toBe(false); - if (result.success) { - throw new Error("Expected stale source read to fail"); - } - expect(result.error).toContain("Source image was modified"); - expect(createImageModelCalled).toBe(false); - }); - - test("returns actionable setup failures from image model creation", async () => { - using workspaceDir = new TestTempDir("image-edit-workspace"); - const sourcePath = path.join(workspaceDir.path, "source.png"); - await fs.writeFile(sourcePath, testPngBytes); - const tool = createImageEditTool({ - ...createImageEditTestConfig(workspaceDir.path), - imageEditingEnabled: true, - imageGenerationRuntime: { - modelString: "google:imagen-test", - maxImagesPerCall: 2, - createImageModel: () => - Promise.resolve(Err({ type: "provider_not_supported", provider: "google" })), - }, - }); - - const result = (await tool.execute!( - { sourcePath, prompt: "Make it blue" }, - createMockToolCallOptions() - )) as ImageEditToolResult; - - expect(result.success).toBe(false); - if (result.success) { - throw new Error("Expected provider setup failure"); - } - expect(result.error).toContain("only supports OpenAI"); - }); - - test("passes OpenAI image edit options using AI SDK option names", async () => { - using workspaceDir = new TestTempDir("image-edit-workspace"); - const sourcePath = path.join(workspaceDir.path, "source.png"); - await fs.writeFile(sourcePath, testPngBytes); - let capturedProviderOptions: unknown; - const tool = createImageEditTool({ - ...createImageEditTestConfig(workspaceDir.path), - imageEditingEnabled: true, - imageGenerationRuntime: { - modelString: "openai:gpt-image-2", - maxImagesPerCall: 2, - createImageModel: () => - Promise.resolve( - Ok( - createMockImageModel((options) => { - capturedProviderOptions = options.providerOptions; - return Promise.resolve({ - images: [testPngBase64], - warnings: [], - response: { timestamp: new Date(), modelId: "test-image-model", headers: {} }, - providerMetadata: {}, - }); - }) - ) - ), - }, - }); - - const result = (await tool.execute!( - { sourcePath, prompt: "Make it blue", quality: "high", outputFormat: "webp" }, - createMockToolCallOptions() - )) as ImageEditToolResult; - - expect(result.success).toBe(true); - expect(capturedProviderOptions).toEqual({ - openai: { quality: "high", outputFormat: "webp" }, - }); - }); - - test("returns provider errors when image editing generation fails", async () => { - using workspaceDir = new TestTempDir("image-edit-workspace"); - const sourcePath = path.join(workspaceDir.path, "source.png"); - await fs.writeFile(sourcePath, testPngBytes); - const tool = createImageEditTool({ - ...createImageEditTestConfig(workspaceDir.path), - imageEditingEnabled: true, - imageGenerationRuntime: { - modelString: "openai:gpt-image-1.5", - maxImagesPerCall: 2, - createImageModel: () => - Promise.resolve( - Ok(createMockImageModel(() => Promise.reject(new Error("provider exploded")))) - ), - }, - }); - - const result = (await tool.execute!( - { sourcePath, prompt: "Make it blue" }, - createMockToolCallOptions() - )) as ImageEditToolResult; - - expect(result.success).toBe(false); - if (result.success) { - throw new Error("Expected provider generation failure"); - } - expect(result.error).toContain("Image editing failed"); - expect(result.error).toContain("provider exploded"); - }); - - test("sanitizes binary-like provider errors when image editing fails", async () => { - using workspaceDir = new TestTempDir("image-edit-workspace"); - const sourcePath = path.join(workspaceDir.path, "source.png"); - await fs.writeFile(sourcePath, testPngBytes); - const tool = createImageEditTool({ - ...createImageEditTestConfig(workspaceDir.path), - imageEditingEnabled: true, - imageGenerationRuntime: { - modelString: "openai:gpt-image-1.5", - maxImagesPerCall: 2, - createImageModel: () => - Promise.resolve( - Ok( - createMockImageModel(() => - Promise.reject(new Error("Invalid JSON response: \u001b\u0000\ufffdpayload")) - ) - ) - ), - }, - }); - - const result = (await tool.execute!( - { sourcePath, prompt: "Make it blue" }, - createMockToolCallOptions() - )) as ImageEditToolResult; - - expect(result.success).toBe(false); - if (result.success) { - throw new Error("Expected provider generation failure"); - } - expect(result.error).toContain("Image editing failed:"); - expect(result.error).toContain("nul=1"); - expect(result.error).not.toContain("\u0000"); - expect(result.error).not.toContain("�"); - }); - - test("does not write edited artifacts when output dimensions cannot be read", async () => { - using workspaceDir = new TestTempDir("image-edit-workspace"); - const sourcePath = path.join(workspaceDir.path, "source.png"); - await fs.writeFile(sourcePath, testPngBytes); - const tool = createImageEditTool({ - ...createImageEditTestConfig(workspaceDir.path), - imageEditingEnabled: true, - imageGenerationRuntime: { - modelString: "openai:gpt-image-1.5", - maxImagesPerCall: 2, - createImageModel: () => - Promise.resolve( - Ok( - createMockImageModel(() => - Promise.resolve({ - images: [Buffer.from("not an image").toString("base64")], - warnings: [], - response: { timestamp: new Date(), modelId: "test-image-model", headers: {} }, - providerMetadata: {}, - }) - ) - ) - ), - }, - }); - - const toolCallOptions = createMockToolCallOptions(); - const result = (await tool.execute!( - { sourcePath, prompt: "Make it blue" }, - toolCallOptions - )) as ImageEditToolResult; - - expect(result.success).toBe(false); - if (result.success) { - throw new Error("Expected invalid provider image to fail"); - } - expect(result.error).toContain("Edited image dimensions could not be read"); - let artifactExists = true; - try { - await fs.access( - path.join( - workspaceDir.path, - `edited_images/test-workspace/${toolCallOptions.toolCallId}/image-1.png` - ) - ); - } catch { - artifactExists = false; - } - expect(artifactExists).toBe(false); - }); - - test("stops reading sources that exceed the upload limit despite a stale stat", async () => { - using workspaceDir = new TestTempDir("image-edit-workspace"); - const sourcePath = path.join(workspaceDir.path, "source.png"); - await fs.writeFile(sourcePath, testPngBytes); - - let readCanceled = false; - class UnboundedReadRuntime extends LocalRuntime { - override async stat(filePath: string, abortSignal?: AbortSignal) { - const stat = await super.stat(filePath, abortSignal); - return filePath === sourcePath ? { ...stat, size: 0 } : stat; - } - - override readFile(filePath: string, abortSignal?: AbortSignal): ReadableStream { - if (filePath !== sourcePath) { - return super.readFile(filePath, abortSignal); - } - - let emittedBytes = 0; - return new ReadableStream({ - pull(controller) { - if (abortSignal?.aborted) { - controller.error(new Error("aborted")); - return; - } - emittedBytes += 1024 * 1024; - controller.enqueue(new Uint8Array(1024 * 1024)); - if (emittedBytes > 51 * 1024 * 1024) { - controller.close(); - } - }, - cancel() { - readCanceled = true; - }, - }); - } - } - - let createImageModelCalled = false; - const tool = createImageEditTool({ - ...createImageEditTestConfig(workspaceDir.path, { - runtime: new UnboundedReadRuntime(workspaceDir.path), - }), - imageEditingEnabled: true, - imageGenerationRuntime: { - modelString: "openai:gpt-image-1.5", - maxImagesPerCall: 2, - createImageModel: () => { - createImageModelCalled = true; - return Promise.reject( - new Error("should not create a model for oversized source streams") - ); - }, - }, - }); - - const result = (await tool.execute!( - { sourcePath, prompt: "Make it blue" }, - createMockToolCallOptions() - )) as ImageEditToolResult; - - expect(result.success).toBe(false); - if (result.success) { - throw new Error("Expected oversized source stream to fail"); - } - expect(result.error).toContain("Stream exceeded"); - expect(readCanceled).toBe(true); - expect(createImageModelCalled).toBe(false); - }); - - test("cleans up earlier edited artifacts when a later output image is invalid", async () => { - using workspaceDir = new TestTempDir("image-edit-workspace"); - const sourcePath = path.join(workspaceDir.path, "source.png"); - await fs.writeFile(sourcePath, testPngBytes); - const tool = createImageEditTool({ - ...createImageEditTestConfig(workspaceDir.path), - imageEditingEnabled: true, - imageGenerationRuntime: { - modelString: "openai:gpt-image-1.5", - maxImagesPerCall: 2, - createImageModel: () => - Promise.resolve( - Ok( - createMockImageModel(() => - Promise.resolve({ - images: [testPngBase64, Buffer.from("not an image").toString("base64")], - warnings: [], - response: { timestamp: new Date(), modelId: "test-image-model", headers: {} }, - providerMetadata: {}, - }) - ) - ) - ), - }, - }); - - const toolCallOptions = createMockToolCallOptions(); - const result = (await tool.execute!( - { sourcePath, prompt: "Make it blue", n: 2 }, - toolCallOptions - )) as ImageEditToolResult; - - expect(result.success).toBe(false); - if (result.success) { - throw new Error("Expected invalid second provider image to fail"); - } - expect(result.error).toContain("Edited image dimensions could not be read"); - let firstArtifactExists = true; - try { - await fs.access( - path.join( - workspaceDir.path, - `edited_images/test-workspace/${toolCallOptions.toolCallId}/image-1.png` - ) - ); - } catch { - firstArtifactExists = false; - } - expect(firstArtifactExists).toBe(false); - }); - - test("writes multiple edited artifacts with per-image thumbnails", async () => { - using workspaceDir = new TestTempDir("image-edit-workspace"); - const sourcePath = path.join(workspaceDir.path, "source.png"); - await fs.writeFile(sourcePath, testPngBytes); - const tool = createImageEditTool({ - ...createImageEditTestConfig(workspaceDir.path), - imageEditingEnabled: true, - imageGenerationRuntime: { - modelString: "openai:gpt-image-1.5", - maxImagesPerCall: 4, - createImageModel: () => - Promise.resolve( - Ok( - createMockImageModel(() => - Promise.resolve({ - images: [testPngBase64, testPngBase64], - warnings: [], - response: { timestamp: new Date(), modelId: "test-image-model", headers: {} }, - providerMetadata: {}, - }) - ) - ) - ), - }, - }); - - const result = (await tool.execute!( - { sourcePath, prompt: "Two tiny squares", n: 2 }, - createMockToolCallOptions() - )) as ImageEditToolResult; - - if (!result.success) { - throw new Error(`Expected image_edit to succeed, got ${result.error}`); - } - expect(result.requestedCount).toBe(2); - expect(result.images).toHaveLength(2); - expect(result.images.map((image) => image.filename)).toEqual(["image-1.png", "image-2.png"]); - expect(result.images.map((image) => image.outputDimensions)).toEqual([ - { width: 16, height: 16 }, - { width: 16, height: 16 }, - ]); - expect(result.images.every((image) => image.thumbnail?.mediaType === "image/webp")).toBe(true); - await Promise.all(result.images.map((image) => fs.stat(image.path))); - }); - - test("keeps edited image results when thumbnail creation fails", async () => { - using workspaceDir = new TestTempDir("image-edit-workspace"); - const sourcePath = path.join(workspaceDir.path, "source.png"); - await fs.writeFile(sourcePath, testPngBytes); - const tool = createImageEditTool({ - ...createImageEditTestConfig(workspaceDir.path), - imageEditingEnabled: true, - imageGenerationRuntime: { - modelString: "openai:gpt-image-1.5", - maxImagesPerCall: 2, - createImageModel: () => - Promise.resolve( - Ok( - createMockImageModel(() => - Promise.resolve({ - images: [sharpInvalidPngBase64], - warnings: [], - response: { timestamp: new Date(), modelId: "test-image-model", headers: {} }, - providerMetadata: {}, - }) - ) - ) - ), - }, - }); - - const result = (await tool.execute!( - { sourcePath, prompt: "Make it blue" }, - createMockToolCallOptions() - )) as ImageEditToolResult; - - if (!result.success) { - throw new Error(`Expected image_edit to succeed, got ${result.error}`); - } - expect(result.images).toHaveLength(1); - expect(result.images[0]?.outputDimensions).toEqual({ width: 1, height: 1 }); - expect(result.images[0]?.thumbnail).toBeUndefined(); - expect(result.warnings?.[0]).toContain("Thumbnail generation failed for image-1.png"); - }); - - test("writes edited artifacts with source and output metadata", async () => { - using workspaceDir = new TestTempDir("image-edit-workspace"); - const sourcePath = path.join(workspaceDir.path, "source-with-wrong-extension.txt"); - await fs.writeFile(sourcePath, testPngBytes); - const tool = createImageEditTool({ - ...createImageEditTestConfig(workspaceDir.path), - imageEditingEnabled: true, - imageGenerationRuntime: { - modelString: "openai:gpt-image-1.5", - maxImagesPerCall: 2, - createImageModel: () => - Promise.resolve( - Ok( - createMockImageModel(() => - Promise.resolve({ - images: [testPngBase64], - warnings: [], - response: { timestamp: new Date(), modelId: "test-image-model", headers: {} }, - providerMetadata: { - openai: { - images: [{ revisedPrompt: "Make the small square blue" }], - }, - }, - }) - ) - ) - ), - }, - }); - - const result = (await tool.execute!( - { sourcePath, prompt: "Make the square blue" }, - createMockToolCallOptions() - )) as ImageEditToolResult; - - if (!result.success) { - throw new Error(`Expected image_edit to succeed, got ${result.error}`); - } - expect(result.source).toMatchObject({ - path: sourcePath, - resolvedPath: sourcePath, - sizeBytes: testPngBytes.length, - dimensions: { width: 16, height: 16 }, - }); - expect(result.images).toHaveLength(1); - const image = result.images[0]; - expect(image).toMatchObject({ - filename: "image-1.png", - mediaType: "image/png", - outputDimensions: { width: 16, height: 16 }, - revisedPrompt: "Make the small square blue", - }); - expect(image?.path).toContain("edited_images/test-workspace/image-edit-call"); - expect(image?.thumbnail).toMatchObject({ mediaType: "image/webp" }); - if (!image) { - throw new Error("Expected an edited image result"); - } - await fs.stat(image.path); - }); - - test("records requested and resolved paths for symlinked sources", async () => { - using workspaceDir = new TestTempDir("image-edit-workspace"); - const realSourcePath = path.join(workspaceDir.path, "real-source.png"); - const symlinkPath = path.join(workspaceDir.path, "linked-source.png"); - await fs.writeFile(realSourcePath, testPngBytes); - await fs.symlink(realSourcePath, symlinkPath); - const tool = createImageEditTool({ - ...createImageEditTestConfig(workspaceDir.path), - imageEditingEnabled: true, - imageGenerationRuntime: { - modelString: "openai:gpt-image-1.5", - maxImagesPerCall: 2, - createImageModel: () => - Promise.resolve( - Ok( - createMockImageModel(() => - Promise.resolve({ - images: [testPngBase64], - warnings: [], - response: { timestamp: new Date(), modelId: "test-image-model", headers: {} }, - providerMetadata: {}, - }) - ) - ) - ), - }, - }); - - const result = (await tool.execute!( - { sourcePath: symlinkPath, prompt: "Make the square blue" }, - createMockToolCallOptions() - )) as ImageEditToolResult; - - if (!result.success) { - throw new Error(`Expected image_edit to succeed, got ${result.error}`); - } - expect(result.source.path).toBe(symlinkPath); - expect(result.source.resolvedPath).toBe(realSourcePath); - }); - - test("omits thumbnails from model-visible image_edit output", async () => { - using workspaceDir = new TestTempDir("image-edit-workspace"); - const tool = createImageEditTool({ - ...createImageEditTestConfig(workspaceDir.path), - imageEditingEnabled: true, - imageGenerationRuntime: { - modelString: "openai:gpt-image-1.5", - maxImagesPerCall: 2, - createImageModel: () => Promise.reject(new Error("not used")), - }, - }); - - const modelOutput = await tool.toModelOutput!({ - toolCallId: "image-edit-call", - input: {}, - output: { - success: true, - model: "openai:gpt-image-1.5", - prompt: "edit", - requestedCount: 1, - source: { - path: "/tmp/source.png", - resolvedPath: "/tmp/source.png", - sizeBytes: 10, - dimensions: { width: 16, height: 16 }, - }, - images: [ - { - path: "/tmp/image.png", - filename: "image.png", - mediaType: "image/png", - outputDimensions: { width: 16, height: 16 }, - thumbnail: { - data: "large-base64", - mediaType: "image/webp", - width: 512, - height: 512, - }, - }, - ], - }, - }); - - expect(modelOutput).toEqual({ - type: "json", - value: { - success: true, - model: "openai:gpt-image-1.5", - prompt: "edit", - requestedCount: 1, - source: { - path: "/tmp/source.png", - sizeBytes: 10, - dimensions: { width: 16, height: 16 }, - }, - images: [ - { - path: "/tmp/image.png", - filename: "image.png", - mediaType: "image/png", - outputDimensions: { width: 16, height: 16 }, - }, - ], - }, - }); - }); -}); diff --git a/src/node/services/tools/image_edit.ts b/src/node/services/tools/image_edit.ts deleted file mode 100644 index cf1f750d48..0000000000 --- a/src/node/services/tools/image_edit.ts +++ /dev/null @@ -1,230 +0,0 @@ -import * as fs from "node:fs/promises"; -import assert from "node:assert/strict"; -import type { JSONValue } from "@ai-sdk/provider"; -import { generateImage, tool } from "ai"; -import sharp from "sharp"; - -import type { ImageEditToolResult } from "@/common/types/tools"; -import { stripImageToolOutputForModel } from "@/common/utils/imageGenerationToolResult"; -import { getErrorMessage } from "@/common/utils/errors"; -import { sanitizeErrorMessageForDisplay } from "@/common/utils/providerOutputSanitization"; -import { TOOL_DEFINITIONS } from "@/common/utils/tools/toolDefinitions"; -import type { ToolFactory } from "@/common/utils/tools/tools"; -import { LocalBaseRuntime } from "@/node/runtime/LocalBaseRuntime"; -import { streamToUint8Array } from "@/node/runtime/streamUtils"; -import type { ImageDimensions } from "./imageArtifacts"; -import { - buildOpenAIImageProviderOptions, - formatImageModelError, - getImageDimensions, - getImageDimensionsFromMetadata, - getImageOutputDir, - IMAGE_TOOL_PROVIDER_SETUP_HINT, - processImageArtifacts, - reportImageToolUsage, -} from "./imageArtifacts"; - -const MAX_SOURCE_IMAGE_BYTES = 50 * 1024 * 1024; -const SUPPORTED_SOURCE_FORMATS = new Set(["png", "jpeg", "webp"]); - -function getSupportedMediaType(format: string | undefined): string | null { - if (!format || !SUPPORTED_SOURCE_FORMATS.has(format)) { - return null; - } - return format === "jpeg" ? "image/jpeg" : `image/${format}`; -} - -export const createImageEditTool: ToolFactory = (config) => { - return tool({ - description: TOOL_DEFINITIONS.image_edit.description, - inputSchema: TOOL_DEFINITIONS.image_edit.schema, - toModelOutput: ({ output }) => ({ - type: "json", - value: stripImageToolOutputForModel(output) as JSONValue, - }), - execute: async ( - { sourcePath, prompt, n, quality, outputFormat }, - { abortSignal, toolCallId } - ) => { - const runtime = config.imageGenerationRuntime; - assert(runtime, "imageGenerationRuntime must be set when image_edit is registered"); - const modelString = runtime.modelString.trim(); - assert(modelString.length > 0, "image editing modelString must be non-empty"); - assert( - Number.isInteger(runtime.maxImagesPerCall) && runtime.maxImagesPerCall > 0, - "image editing maxImagesPerCall must be a positive integer" - ); - - const trimmedPrompt = prompt.trim(); - if (!trimmedPrompt) { - return { success: false, error: "Image edit prompt is required." }; - } - - const requestedCount = n ?? 1; - if (!Number.isInteger(requestedCount) || requestedCount <= 0) { - return { success: false, error: "Image edit count must be a positive integer." }; - } - if (requestedCount > runtime.maxImagesPerCall) { - return { - success: false, - error: `Requested ${requestedCount} edited images, but Image Tools is configured for a maximum of ${runtime.maxImagesPerCall}.`, - setupHint: "Adjust Settings > Experiments > Image Tools or request fewer images.", - }; - } - - const requestedSourcePath = sourcePath.trim(); - if (!requestedSourcePath) { - return { success: false, error: "Source image path is required." }; - } - - const normalizedSourcePath = config.runtime.normalizePath(requestedSourcePath, config.cwd); - let resolvedSourcePath: string; - let sourceSizeBytes: number; - let sourceBytes: Uint8Array; - let sourceDimensions: { width: number; height: number }; - try { - const stat = await config.runtime.stat(normalizedSourcePath, abortSignal); - if (stat.isDirectory) { - return { success: false, error: `Source image is a directory: ${requestedSourcePath}` }; - } - sourceSizeBytes = stat.size; - if (sourceSizeBytes > MAX_SOURCE_IMAGE_BYTES) { - return { - success: false, - error: `Source image is ${sourceSizeBytes} bytes, which exceeds the 50 MB Image Edit Tool limit.`, - }; - } - - const runtimeResolvedSourcePath = await config.runtime.resolvePath(normalizedSourcePath); - // Host realpath is only valid for runtimes backed by the local filesystem. - if (config.runtime instanceof LocalBaseRuntime) { - try { - resolvedSourcePath = await fs.realpath(runtimeResolvedSourcePath); - } catch { - resolvedSourcePath = runtimeResolvedSourcePath; - } - } else { - resolvedSourcePath = runtimeResolvedSourcePath; - } - sourceBytes = await streamToUint8Array( - config.runtime.readFile(resolvedSourcePath, abortSignal), - MAX_SOURCE_IMAGE_BYTES - ); - if (sourceBytes.length !== sourceSizeBytes) { - return { - success: false, - error: - "Source image was modified between reading its size and reading its content. Try again.", - } satisfies ImageEditToolResult; - } - } catch (error) { - return { - success: false, - error: `Failed to read source image: ${getErrorMessage(error)}`, - }; - } - - try { - const metadata = await sharp(Buffer.from(sourceBytes)).metadata(); - if (getSupportedMediaType(metadata.format) == null) { - return { - success: false, - error: "Source image must be a decodable PNG, JPEG, or WebP file.", - }; - } - const dimensions = getImageDimensionsFromMetadata(metadata); - if (dimensions == null) { - return { success: false, error: "Source image has no readable pixel dimensions." }; - } - sourceDimensions = dimensions; - } catch { - return { - success: false, - error: "Source image must be a decodable PNG, JPEG, or WebP file.", - }; - } - - const imageModelResult = await runtime.createImageModel(modelString); - if (!imageModelResult.success) { - return { - success: false, - ...formatImageModelError(imageModelResult.error, "editing"), - } satisfies ImageEditToolResult; - } - - try { - const result = await generateImage({ - model: imageModelResult.data, - prompt: { text: trimmedPrompt, images: [sourceBytes] }, - n: requestedCount, - abortSignal, - providerOptions: buildOpenAIImageProviderOptions(quality, outputFormat), - }); - - reportImageToolUsage( - config, - "image_edit", - modelString, - result.usage, - result.providerMetadata, - toolCallId - ); - - const outputDir = await getImageOutputDir( - config, - "edited_images", - "image_edit", - toolCallId - ); - const artifacts = await processImageArtifacts<{ outputDimensions: ImageDimensions }>({ - config, - outputDir, - toolName: "image_edit", - outputFormat, - providerMetadata: result.providerMetadata, - images: result.images, - getExtraFields: async (bytes) => { - try { - return { - success: true, - fields: { outputDimensions: await getImageDimensions(bytes) }, - }; - } catch (error) { - return { - success: false, - error: `Edited image dimensions could not be read: ${getErrorMessage(error)}`, - }; - } - }, - }); - if (!artifacts.success) { - return { - success: false, - error: artifacts.error, - } satisfies ImageEditToolResult; - } - - return { - success: true, - model: modelString, - prompt: trimmedPrompt, - requestedCount, - source: { - path: requestedSourcePath, - resolvedPath: resolvedSourcePath, - sizeBytes: sourceSizeBytes, - dimensions: sourceDimensions, - }, - images: artifacts.images, - ...(artifacts.warnings.length > 0 ? { warnings: artifacts.warnings } : {}), - } satisfies ImageEditToolResult; - } catch (error) { - return { - success: false, - error: `Image editing failed: ${sanitizeErrorMessageForDisplay(getErrorMessage(error))}`, - setupHint: IMAGE_TOOL_PROVIDER_SETUP_HINT, - } satisfies ImageEditToolResult; - } - }, - }); -}; diff --git a/src/node/services/tools/image_generate.test.ts b/src/node/services/tools/image_generate.test.ts deleted file mode 100644 index 9595cf4d02..0000000000 --- a/src/node/services/tools/image_generate.test.ts +++ /dev/null @@ -1,532 +0,0 @@ -import * as fs from "fs/promises"; - -import { describe, expect, test } from "bun:test"; -import type { ToolExecutionOptions } from "ai"; -import type { ImageModelV2 } from "@ai-sdk/provider"; - -import type { ImageGenerateToolResult } from "@/common/types/tools"; -import { LocalRuntime } from "@/node/runtime/LocalRuntime"; -import { createImageGenerateTool } from "./image_generate"; -import { TestTempDir, createTestToolConfig } from "./testHelpers"; -import { Err, Ok } from "@/common/types/result"; -import { DEFAULT_IMAGE_GENERATION_MODEL } from "@/common/types/imageGeneration"; - -const testPngBase64 = - "iVBORw0KGgoAAAANSUhEUgAAABAAAAAQCAYAAAAf8/9hAAAACXBIWXMAAAPoAAAD6AG1e1JrAAAAHUlEQVR4nGNgYPj/nzLMMGoAw2gYMIyGwf9hEAYAMqb+ENPK2kcAAAAASUVORK5CYII="; -const sharpInvalidPngBase64 = - "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mP8z8BQDwAFgwJ/lKrL7wAAAABJRU5ErkJggg=="; - -function createMockImageModel(doGenerate: ImageModelV2["doGenerate"]): ImageModelV2 { - return { - specificationVersion: "v2", - provider: "test", - modelId: "test-image-model", - maxImagesPerCall: 10, - doGenerate, - }; -} - -const mockToolCallOptions: ToolExecutionOptions = { - toolCallId: "image-tool-call", - messages: [], -}; - -describe("image_generate tool", () => { - test("rejects requests above the configured maximum image count", async () => { - using workspaceDir = new TestTempDir("image-generate-workspace"); - let createImageModelCalled = false; - const tool = createImageGenerateTool({ - ...createTestToolConfig(workspaceDir.path), - imageGenerationRuntime: { - modelString: "openai:gpt-image-1.5", - maxImagesPerCall: 2, - createImageModel: () => { - createImageModelCalled = true; - return Promise.reject( - new Error("should not create a provider model when count exceeds limit") - ); - }, - }, - }); - - const result = (await tool.execute!( - { prompt: "A small blue square", n: 3 }, - mockToolCallOptions - )) as ImageGenerateToolResult; - - expect(result.success).toBe(false); - if (result.success) { - throw new Error("Expected image_generate to fail when n exceeds configured maximum"); - } - expect(result.error).toContain("configured for a maximum of 2"); - expect(createImageModelCalled).toBe(false); - }); - - test("passes OpenAI image options using AI SDK option names", async () => { - using workspaceDir = new TestTempDir("image-generate-workspace"); - let capturedProviderOptions: unknown; - const tool = createImageGenerateTool({ - ...createTestToolConfig(workspaceDir.path), - imageGenerationRuntime: { - modelString: "openai:gpt-image-2", - maxImagesPerCall: 2, - createImageModel: () => - Promise.resolve( - Ok( - createMockImageModel((options) => { - capturedProviderOptions = options.providerOptions; - return Promise.resolve({ - images: [testPngBase64], - warnings: [], - response: { timestamp: new Date(), modelId: "test-image-model", headers: {} }, - providerMetadata: {}, - }); - }) - ) - ), - }, - }); - - const result = (await tool.execute!( - { prompt: "A tiny square", quality: "high", outputFormat: "webp" }, - mockToolCallOptions - )) as ImageGenerateToolResult; - - expect(result.success).toBe(true); - expect(capturedProviderOptions).toEqual({ - openai: { quality: "high", outputFormat: "webp" }, - }); - }); - - test("reports OpenAI image token usage through the tool usage path", async () => { - using workspaceDir = new TestTempDir("image-generate-workspace"); - const reportedUsage: Array<{ - inputTokens?: number; - outputTokens?: number; - totalTokens?: number; - }> = []; - const tool = createImageGenerateTool({ - ...createTestToolConfig(workspaceDir.path), - reportModelUsage: (event) => { - reportedUsage.push(event.usage); - }, - imageGenerationRuntime: { - modelString: "openai:gpt-image-1.5", - maxImagesPerCall: 2, - createImageModel: () => - Promise.resolve( - Ok( - createMockImageModel(() => - Promise.resolve({ - images: [testPngBase64], - warnings: [], - response: { timestamp: new Date(), modelId: "test-image-model", headers: {} }, - providerMetadata: { - openai: { - images: [{ textTokens: 7, imageTokens: 11 }], - }, - }, - }) - ) - ) - ), - }, - }); - - const result = (await tool.execute!( - { prompt: "A tiny square", n: 1 }, - mockToolCallOptions - )) as ImageGenerateToolResult; - - expect(result.success).toBe(true); - expect(reportedUsage).toEqual([{ inputTokens: 7, outputTokens: 11, totalTokens: 18 }]); - }); - - test("omits thumbnails from model-visible tool output", async () => { - using workspaceDir = new TestTempDir("image-generate-workspace"); - const tool = createImageGenerateTool({ - ...createTestToolConfig(workspaceDir.path), - imageGenerationRuntime: { - modelString: "openai:gpt-image-1.5", - maxImagesPerCall: 2, - createImageModel: () => Promise.reject(new Error("not used")), - }, - }); - - const modelOutput = await tool.toModelOutput!({ - toolCallId: "image-tool-call", - input: {}, - output: { - success: true, - model: "openai:gpt-image-1.5", - prompt: "square", - requestedCount: 1, - images: [ - { - path: "/tmp/image.png", - filename: "image.png", - mediaType: "image/png", - thumbnail: { - data: "large-base64", - mediaType: "image/webp", - width: 512, - height: 512, - }, - }, - ], - }, - }); - - expect(modelOutput).toEqual({ - type: "json", - value: { - success: true, - model: "openai:gpt-image-1.5", - prompt: "square", - requestedCount: 1, - images: [ - { - path: "/tmp/image.png", - filename: "image.png", - mediaType: "image/png", - }, - ], - }, - }); - }); - - test("rejects blank prompts before creating an image model", async () => { - using workspaceDir = new TestTempDir("image-generate-workspace"); - let createImageModelCalled = false; - const tool = createImageGenerateTool({ - ...createTestToolConfig(workspaceDir.path), - imageGenerationRuntime: { - modelString: "openai:gpt-image-1.5", - maxImagesPerCall: 2, - createImageModel: () => { - createImageModelCalled = true; - return Promise.reject(new Error("should not create a model for blank prompts")); - }, - }, - }); - - const result = (await tool.execute!( - { prompt: " " }, - mockToolCallOptions - )) as ImageGenerateToolResult; - - expect(result.success).toBe(false); - if (result.success) { - throw new Error("Expected blank prompt to fail"); - } - expect(result.error).toContain("prompt is required"); - expect(createImageModelCalled).toBe(false); - }); - - test("returns actionable setup failures from image model creation", async () => { - using workspaceDir = new TestTempDir("image-generate-workspace"); - const tool = createImageGenerateTool({ - ...createTestToolConfig(workspaceDir.path), - imageGenerationRuntime: { - modelString: "google:imagen-test", - maxImagesPerCall: 2, - createImageModel: () => - Promise.resolve(Err({ type: "provider_not_supported", provider: "google" })), - }, - }); - - const result = (await tool.execute!( - { prompt: "A small square" }, - mockToolCallOptions - )) as ImageGenerateToolResult; - - expect(result.success).toBe(false); - if (result.success) { - throw new Error("Expected provider setup failure"); - } - expect(result.error).toContain("only supports OpenAI"); - expect(result.setupHint).toContain(DEFAULT_IMAGE_GENERATION_MODEL); - }); - - test("sanitizes binary-like provider errors when image generation fails", async () => { - using workspaceDir = new TestTempDir("image-generate-workspace"); - const tool = createImageGenerateTool({ - ...createTestToolConfig(workspaceDir.path), - imageGenerationRuntime: { - modelString: "openai:gpt-image-1.5", - maxImagesPerCall: 2, - createImageModel: () => - Promise.resolve( - Ok( - createMockImageModel(() => - Promise.reject(new Error("Invalid JSON response: \u001b\u0000\ufffdpayload")) - ) - ) - ), - }, - }); - - const result = (await tool.execute!( - { prompt: "A small square" }, - mockToolCallOptions - )) as ImageGenerateToolResult; - - expect(result.success).toBe(false); - if (result.success) { - throw new Error("Expected provider generation failure"); - } - expect(result.error).toContain("Image generation failed:"); - expect(result.error).toContain("nul=1"); - expect(result.error).not.toContain("\u0000"); - expect(result.error).not.toContain("�"); - }); - - test("writes generated artifacts outside the stream temp directory", async () => { - using workspaceDir = new TestTempDir("image-generate-workspace"); - const tool = createImageGenerateTool({ - ...createTestToolConfig(workspaceDir.path), - imageGenerationRuntime: { - modelString: "openai:gpt-image-1.5", - maxImagesPerCall: 2, - createImageModel: () => - Promise.resolve( - Ok( - createMockImageModel(() => - Promise.resolve({ - images: [testPngBase64], - warnings: [], - response: { timestamp: new Date(), modelId: "test-image-model", headers: {} }, - providerMetadata: {}, - }) - ) - ) - ), - }, - }); - - const result = (await tool.execute!( - { prompt: "A tiny square", n: 1, outputFormat: "jpeg" }, - mockToolCallOptions - )) as ImageGenerateToolResult; - - if (!result.success) { - throw new Error(`Expected image_generate to succeed, got ${result.error}`); - } - expect(result).toMatchObject({ - success: true, - model: "openai:gpt-image-1.5", - prompt: "A tiny square", - requestedCount: 1, - }); - expect(result.images).toHaveLength(1); - const image = result.images[0]; - expect(image).toBeDefined(); - if (!image) { - throw new Error("Expected a generated image result"); - } - expect(image.path).toContain("generated_images/test-workspace/image-tool-call"); - expect(image.path).not.toContain("imagegen/image-tool-call"); - expect(image.filename).toBe("image-1.png"); - expect(image.mediaType).toBe("image/png"); - expect(image.thumbnail).toMatchObject({ mediaType: "image/webp", width: 16, height: 16 }); - const artifactStats = await fs.stat(image.path); - expect(artifactStats.isFile()).toBe(true); - }); - - test("writes multiple generated images with per-image thumbnails", async () => { - using workspaceDir = new TestTempDir("image-generate-workspace"); - const tool = createImageGenerateTool({ - ...createTestToolConfig(workspaceDir.path), - imageGenerationRuntime: { - modelString: "openai:gpt-image-1.5", - maxImagesPerCall: 4, - createImageModel: () => - Promise.resolve( - Ok( - createMockImageModel(() => - Promise.resolve({ - images: [testPngBase64, testPngBase64], - warnings: [], - response: { timestamp: new Date(), modelId: "test-image-model", headers: {} }, - providerMetadata: {}, - }) - ) - ) - ), - }, - }); - - const result = (await tool.execute!( - { prompt: "Two tiny squares", n: 2 }, - mockToolCallOptions - )) as ImageGenerateToolResult; - - if (!result.success) { - throw new Error(`Expected image_generate to succeed, got ${result.error}`); - } - expect(result.requestedCount).toBe(2); - expect(result.images).toHaveLength(2); - expect(result.images.map((image) => image.filename)).toEqual(["image-1.png", "image-2.png"]); - expect(result.images.every((image) => image.thumbnail?.mediaType === "image/webp")).toBe(true); - await Promise.all(result.images.map((image) => fs.stat(image.path))); - }); - - test("keeps generated image results when thumbnail creation fails", async () => { - using workspaceDir = new TestTempDir("image-generate-workspace"); - const tool = createImageGenerateTool({ - ...createTestToolConfig(workspaceDir.path), - imageGenerationRuntime: { - modelString: "openai:gpt-image-1.5", - maxImagesPerCall: 2, - createImageModel: () => - Promise.resolve( - Ok( - createMockImageModel(() => - Promise.resolve({ - images: [sharpInvalidPngBase64], - warnings: [], - response: { timestamp: new Date(), modelId: "test-image-model", headers: {} }, - providerMetadata: {}, - }) - ) - ) - ), - }, - }); - - const result = (await tool.execute!( - { prompt: "A tiny square", n: 1 }, - mockToolCallOptions - )) as ImageGenerateToolResult; - - if (!result.success) { - throw new Error(`Expected image_generate to succeed, got ${result.error}`); - } - expect(result.images).toHaveLength(1); - expect(result.images[0]?.thumbnail).toBeUndefined(); - expect(result.warnings?.[0]).toContain("Thumbnail generation failed for image-1.png"); - }); - - test("returns a setup hint when the provider image request fails", async () => { - using workspaceDir = new TestTempDir("image-generate-workspace"); - const tool = createImageGenerateTool({ - ...createTestToolConfig(workspaceDir.path), - imageGenerationRuntime: { - modelString: "openai:gpt-image-1.5", - maxImagesPerCall: 2, - createImageModel: () => - Promise.resolve( - Ok(createMockImageModel(() => Promise.reject(new Error("rate limit exceeded")))) - ), - }, - }); - - const result = (await tool.execute!( - { prompt: "A tiny square", n: 1 }, - mockToolCallOptions - )) as ImageGenerateToolResult; - - expect(result.success).toBe(false); - if (result.success) { - throw new Error("Expected provider failure"); - } - expect(result.error).toContain("rate limit exceeded"); - expect(result.setupHint).toContain("credentials, billing, rate limits, and content policy"); - }); - - test("aborts artifact writes instead of closing partial files after write failures", async () => { - using workspaceDir = new TestTempDir("image-generate-workspace"); - let closeCalled = false; - let abortCalled = false; - - class FailingWriteRuntime extends LocalRuntime { - override writeFile(): WritableStream { - return { - getWriter: () => ({ - closed: Promise.resolve(undefined), - desiredSize: 1, - ready: Promise.resolve(undefined), - write: () => Promise.reject(new Error("disk full")), - close: () => { - closeCalled = true; - return Promise.resolve(); - }, - abort: () => { - abortCalled = true; - return Promise.resolve(); - }, - releaseLock: () => undefined, - }), - } as unknown as WritableStream; - } - } - - const tool = createImageGenerateTool({ - ...createTestToolConfig(workspaceDir.path, { - runtime: new FailingWriteRuntime(workspaceDir.path), - }), - imageGenerationRuntime: { - modelString: "openai:gpt-image-1.5", - maxImagesPerCall: 2, - createImageModel: () => - Promise.resolve( - Ok( - createMockImageModel(() => - Promise.resolve({ - images: [testPngBase64], - warnings: [], - response: { timestamp: new Date(), modelId: "test-image-model", headers: {} }, - providerMetadata: {}, - }) - ) - ) - ), - }, - }); - - const result = (await tool.execute!( - { prompt: "A tiny square", n: 1 }, - mockToolCallOptions - )) as ImageGenerateToolResult; - - expect(result.success).toBe(false); - expect(closeCalled).toBe(false); - expect(abortCalled).toBe(true); - }); - - test("returns a setup hint when the AI SDK rejects a zero-image provider response", async () => { - using workspaceDir = new TestTempDir("image-generate-workspace"); - const tool = createImageGenerateTool({ - ...createTestToolConfig(workspaceDir.path), - imageGenerationRuntime: { - modelString: "openai:gpt-image-1.5", - maxImagesPerCall: 2, - createImageModel: () => - Promise.resolve( - Ok( - createMockImageModel(() => - Promise.resolve({ - images: [], - warnings: [], - response: { timestamp: new Date(), modelId: "test-image-model", headers: {} }, - providerMetadata: {}, - }) - ) - ) - ), - }, - }); - - const result = (await tool.execute!( - { prompt: "A tiny square", n: 1 }, - mockToolCallOptions - )) as ImageGenerateToolResult; - - expect(result.success).toBe(false); - if (result.success) { - throw new Error("Expected zero-image provider response to fail"); - } - expect(result.error).toContain("No image generated"); - expect(result.setupHint).toContain("credentials, billing, rate limits, and content policy"); - }); -}); diff --git a/src/node/services/tools/image_generate.ts b/src/node/services/tools/image_generate.ts deleted file mode 100644 index 2cc00bd497..0000000000 --- a/src/node/services/tools/image_generate.ts +++ /dev/null @@ -1,121 +0,0 @@ -import assert from "node:assert/strict"; -import type { JSONValue } from "@ai-sdk/provider"; -import { generateImage, tool } from "ai"; - -import type { ImageGenerateToolResult } from "@/common/types/tools"; -import { stripImageToolOutputForModel } from "@/common/utils/imageGenerationToolResult"; -import { getErrorMessage } from "@/common/utils/errors"; -import { sanitizeErrorMessageForDisplay } from "@/common/utils/providerOutputSanitization"; -import { TOOL_DEFINITIONS } from "@/common/utils/tools/toolDefinitions"; -import type { ToolFactory } from "@/common/utils/tools/tools"; -import { - buildOpenAIImageProviderOptions, - formatImageModelError, - getImageOutputDir, - IMAGE_TOOL_PROVIDER_SETUP_HINT, - processImageArtifacts, - reportImageToolUsage, -} from "./imageArtifacts"; - -export const createImageGenerateTool: ToolFactory = (config) => { - return tool({ - description: TOOL_DEFINITIONS.image_generate.description, - inputSchema: TOOL_DEFINITIONS.image_generate.schema, - toModelOutput: ({ output }) => ({ - type: "json", - value: stripImageToolOutputForModel(output) as JSONValue, - }), - execute: async ({ prompt, n, quality, outputFormat }, { abortSignal, toolCallId }) => { - const runtime = config.imageGenerationRuntime; - assert(runtime, "imageGenerationRuntime must be set when image_generate is registered"); - const modelString = runtime.modelString.trim(); - assert(modelString.length > 0, "image generation modelString must be non-empty"); - assert( - Number.isInteger(runtime.maxImagesPerCall) && runtime.maxImagesPerCall > 0, - "image generation maxImagesPerCall must be a positive integer" - ); - - const trimmedPrompt = prompt.trim(); - if (!trimmedPrompt) { - return { success: false, error: "Image generation prompt is required." }; - } - - const requestedCount = n ?? 1; - if (!Number.isInteger(requestedCount) || requestedCount <= 0) { - return { success: false, error: "Image count must be a positive integer." }; - } - if (requestedCount > runtime.maxImagesPerCall) { - return { - success: false, - error: `Requested ${requestedCount} images, but Image Tools is configured for a maximum of ${runtime.maxImagesPerCall}.`, - setupHint: "Adjust Settings > Experiments > Image Tools or request fewer images.", - }; - } - - const imageModelResult = await runtime.createImageModel(modelString); - if (!imageModelResult.success) { - return { - success: false, - ...formatImageModelError(imageModelResult.error, "generation"), - } satisfies ImageGenerateToolResult; - } - - try { - const result = await generateImage({ - model: imageModelResult.data, - prompt: trimmedPrompt, - n: requestedCount, - abortSignal, - providerOptions: buildOpenAIImageProviderOptions(quality, outputFormat), - }); - - reportImageToolUsage( - config, - "image_generate", - modelString, - result.usage, - result.providerMetadata, - toolCallId - ); - - const outputDir = await getImageOutputDir( - config, - "generated_images", - "image_generate", - toolCallId - ); - - const artifacts = await processImageArtifacts({ - config, - outputDir, - toolName: "image_generate", - outputFormat, - providerMetadata: result.providerMetadata, - images: result.images, - getExtraFields: () => Promise.resolve({ success: true, fields: {} }), - }); - if (!artifacts.success) { - return { - success: false, - error: artifacts.error, - } satisfies ImageGenerateToolResult; - } - - return { - success: true, - model: modelString, - prompt: trimmedPrompt, - requestedCount, - images: artifacts.images, - ...(artifacts.warnings.length > 0 ? { warnings: artifacts.warnings } : {}), - } satisfies ImageGenerateToolResult; - } catch (error) { - return { - success: false, - error: `Image generation failed: ${sanitizeErrorMessageForDisplay(getErrorMessage(error))}`, - setupHint: IMAGE_TOOL_PROVIDER_SETUP_HINT, - } satisfies ImageGenerateToolResult; - } - }, - }); -}; diff --git a/src/node/services/workspaceService.ts b/src/node/services/workspaceService.ts index 31f748f64e..0a787ef1fc 100644 --- a/src/node/services/workspaceService.ts +++ b/src/node/services/workspaceService.ts @@ -1943,7 +1943,6 @@ export class WorkspaceService extends EventEmitter { aiService: this.aiService, telemetryService: this.telemetryService, initStateManager: this.initStateManager, - experimentsService: this.experimentsService, workspaceGoalService: this.workspaceGoalService, backgroundProcessManager: this.backgroundProcessManager, onCompactionComplete: () => {