diff --git a/packages/agent-sdk/src/managers/messageManager.ts b/packages/agent-sdk/src/managers/messageManager.ts index a1f765cc..a7d8cd73 100644 --- a/packages/agent-sdk/src/managers/messageManager.ts +++ b/packages/agent-sdk/src/managers/messageManager.ts @@ -241,6 +241,16 @@ export class MessageManager { this.filesInContext.add(normalizedPath); } + /** + * Checks if a file has been read or edited in this conversation. + */ + public hasFileInContext(filePath: string): boolean { + const normalizedPath = isAbsolute(filePath) + ? relative(this.workdir, filePath) + : filePath; + return this.filesInContext.has(normalizedPath); + } + /** * Extracts and adds file paths from a message's tool blocks. */ diff --git a/packages/agent-sdk/src/tools/editTool.ts b/packages/agent-sdk/src/tools/editTool.ts index 50c6821a..af0f29c1 100644 --- a/packages/agent-sdk/src/tools/editTool.ts +++ b/packages/agent-sdk/src/tools/editTool.ts @@ -30,6 +30,7 @@ Usage: - When editing text from ${READ_TOOL_NAME} tool output, ensure you preserve the exact indentation (tabs/spaces) as it appears AFTER the line number prefix. The line number prefix format is: spaces + line number + tab. Everything after that tab is the actual file content to match. Never include any part of the line number prefix in the old_string or new_string. - ALWAYS prefer editing existing files in the codebase. NEVER write new files unless explicitly required. - Only use emojis if the user explicitly requests it. Avoid adding emojis to files unless asked. +- Use the smallest \`old_string\` that's clearly unique — usually 2-4 adjacent lines is sufficient. Avoid including 10+ lines of context when less uniquely identifies the target. Shorter matches are less likely to contain reproduction errors. - The edit will FAIL if \`old_string\` is not unique in the file. Either provide a larger string with more surrounding context to make it unique or use \`replace_all\` to change every instance of \`old_string\`. - Use \`replace_all\` for replacing and renaming strings across the file. This parameter is useful if you want to rename a variable for instance.`, config: { @@ -109,6 +110,18 @@ Usage: // Touch file to track it in context context.messageManager?.touchFile(filePath); + // Enforce read-before-edit: the file must have been read first + if ( + context.messageManager && + !context.messageManager.hasFileInContext(filePath) + ) { + return { + success: false, + content: "", + error: `You must read the file with the ${READ_TOOL_NAME} tool before editing it. Use ${READ_TOOL_NAME} on ${filePath} first.`, + }; + } + try { const resolvedPath = resolvePath(filePath, context.workdir); @@ -124,19 +137,23 @@ Usage: }; } + // Normalize line endings for matching + const normalizedContent = originalContent.replace(/\r\n/g, "\n"); + const normalizedOldString = oldString.replace(/\r\n/g, "\n"); + // Check if old_string exists - const index = originalContent.indexOf(oldString); - const matchedOldString = index !== -1 ? oldString : null; + const index = normalizedContent.indexOf(normalizedOldString); + const matchedOldString = index !== -1 ? normalizedOldString : null; const startLineNumber = index !== -1 - ? originalContent.substring(0, index).split("\n").length + ? normalizedContent.substring(0, index).split("\n").length : undefined; if (!matchedOldString) { return { success: false, content: "", - error: analyzeEditMismatch(), + error: analyzeEditMismatch(normalizedOldString), }; } @@ -146,11 +163,11 @@ Usage: if (replaceAll) { // Replace all matches const regex = new RegExp(escapeRegExp(matchedOldString), "g"); - newContent = originalContent.replace(regex, newString); - replacementCount = (originalContent.match(regex) || []).length; + newContent = normalizedContent.replace(regex, newString); + replacementCount = (normalizedContent.match(regex) || []).length; } else { // Replace only the first match, but first check if it's unique - const matches = originalContent.split(matchedOldString).length - 1; + const matches = normalizedContent.split(matchedOldString).length - 1; if (matches > 1) { return { success: false, @@ -159,7 +176,7 @@ Usage: }; } - newContent = originalContent.replace(matchedOldString, newString); + newContent = normalizedContent.replace(matchedOldString, newString); replacementCount = 1; } diff --git a/packages/agent-sdk/src/utils/editUtils.ts b/packages/agent-sdk/src/utils/editUtils.ts index 0d701422..81ac5d9c 100644 --- a/packages/agent-sdk/src/utils/editUtils.ts +++ b/packages/agent-sdk/src/utils/editUtils.ts @@ -10,8 +10,11 @@ export function escapeRegExp(string: string): string { } /** - * Returns a generic error message when old_string is not found. + * Returns an error message when old_string is not found, including + * the attempted string to help the model self-correct on retry. */ -export function analyzeEditMismatch(): string { - return "old_string not found in file"; +export function analyzeEditMismatch(oldString: string): string { + const displayString = + oldString.length > 200 ? oldString.substring(0, 200) + "..." : oldString; + return `String to replace not found in file.\nString: ${displayString}`; } diff --git a/packages/agent-sdk/tests/tools/editTool.test.ts b/packages/agent-sdk/tests/tools/editTool.test.ts index 7179170f..0262f954 100644 --- a/packages/agent-sdk/tests/tools/editTool.test.ts +++ b/packages/agent-sdk/tests/tools/editTool.test.ts @@ -8,8 +8,10 @@ import { Container } from "@/utils/container.js"; // Mock fs/promises vi.mock("fs/promises"); vi.mock("../../src/utils/editUtils.js", () => ({ - escapeRegExp: vi.fn((s) => s.replace(/[.*+?^${}()|[\]\\]/g, "\\$&")), - analyzeEditMismatch: vi.fn(() => "old_string not found in file"), + escapeRegExp: vi.fn((s) => s.replace(/[.*+?^${}()|[\]\\]/g, String.raw`\$&`)), + analyzeEditMismatch: vi.fn( + (s) => `String to replace not found in file.\nString: ${s}`, + ), })); describe("editTool", () => { @@ -143,7 +145,7 @@ describe("editTool", () => { ); expect(result.success).toBe(false); - expect(result.error).toContain("old_string not found in file"); + expect(result.error).toContain("String to replace not found in file"); }); it("should fail when file cannot be read", async () => { @@ -439,4 +441,97 @@ describe("editTool", () => { ); expect(result).toBe("src/index.ts"); }); + + it("should reject edit when file has not been read first", async () => { + const mockMessageManager = { + touchFile: vi.fn(), + hasFileInContext: vi.fn().mockReturnValue(false), + }; + + const result = await editTool.execute( + { + file_path: "/test/workdir/file.js", + old_string: "old", + new_string: "new", + }, + { + ...mockContext, + messageManager: + mockMessageManager as unknown as ToolContext["messageManager"], + }, + ); + + expect(result.success).toBe(false); + expect(result.error).toContain("must read the file"); + expect(result.error).toContain("Read"); + }); + + it("should allow edit when file has been read first", async () => { + const mockContent = "some content"; + vi.mocked(readFile).mockResolvedValue(mockContent); + vi.mocked(writeFile).mockResolvedValue(undefined); + + const mockMessageManager = { + touchFile: vi.fn(), + hasFileInContext: vi.fn().mockReturnValue(true), + }; + + const result = await editTool.execute( + { + file_path: "/test/workdir/file.js", + old_string: "some", + new_string: "other", + }, + { + ...mockContext, + messageManager: + mockMessageManager as unknown as ToolContext["messageManager"], + }, + ); + + expect(result.success).toBe(true); + }); + + it("should handle CRLF line endings in file content", async () => { + const mockContent = "function hello() {\r\n console.log('Hello');\r\n}"; + // CRLF is normalized to LF for matching, so output also uses LF + const expectedContent = + "function hello() {\n console.log('Hello World');\n}"; + + vi.mocked(readFile).mockResolvedValue(mockContent); + vi.mocked(writeFile).mockResolvedValue(undefined); + + const result = await editTool.execute( + { + file_path: "/test/file.js", + old_string: "console.log('Hello');", + new_string: "console.log('Hello World');", + }, + mockContext, + ); + + expect(result.success).toBe(true); + expect(writeFile).toHaveBeenCalledWith( + "/test/file.js", + expectedContent, + "utf-8", + ); + }); + + it("should match old_string with LF against CRLF file", async () => { + const mockContent = "line1\r\nline2\r\nline3"; + vi.mocked(readFile).mockResolvedValue(mockContent); + vi.mocked(writeFile).mockResolvedValue(undefined); + + const result = await editTool.execute( + { + file_path: "/test/file.js", + old_string: "line1\nline2", + new_string: "line1\nmodified", + }, + mockContext, + ); + + expect(result.success).toBe(true); + }); }); diff --git a/packages/agent-sdk/tests/utils/mismatchAnalysis.test.ts b/packages/agent-sdk/tests/utils/mismatchAnalysis.test.ts index 5c0f2930..13645852 100644 --- a/packages/agent-sdk/tests/utils/mismatchAnalysis.test.ts +++ b/packages/agent-sdk/tests/utils/mismatchAnalysis.test.ts @@ -2,7 +2,22 @@ import { describe, it, expect } from "vitest"; import { analyzeEditMismatch } from "../../src/utils/editUtils.js"; describe("analyzeEditMismatch", () => { - it("should return generic message", () => { - expect(analyzeEditMismatch()).toBe("old_string not found in file"); + it("should include the attempted string in error message", () => { + expect(analyzeEditMismatch("hello")).toBe( + "String to replace not found in file.\nString: hello", + ); + }); + + it("should truncate long strings", () => { + const longString = "a".repeat(300); + const result = analyzeEditMismatch(longString); + expect(result).toContain("String to replace not found in file."); + expect(result).toContain("a".repeat(200) + "..."); + }); + + it("should not truncate short strings", () => { + const shortString = "a".repeat(200); + const result = analyzeEditMismatch(shortString); + expect(result).not.toContain("..."); }); }); diff --git a/specs/001-fs-tools/contracts/fs-tools-api.md b/specs/001-fs-tools/contracts/fs-tools-api.md index d73ce6c3..2bdd6153 100644 --- a/specs/001-fs-tools/contracts/fs-tools-api.md +++ b/specs/001-fs-tools/contracts/fs-tools-api.md @@ -78,6 +78,12 @@ interface EditArgs { } ``` +**Edit Tool Behavioral Contracts**: +- **Read-before-edit**: The tool MUST reject edits to files that have not been read in the current conversation (tracked via `MessageManager.hasFileInContext()`). Returns error: `"You must read the file with the Read tool before editing it."` +- **CRLF normalization**: Both file content and `old_string` are normalized (`\r\n` → `\n`) before matching. Output uses normalized (LF) line endings. +- **Error detail**: When `old_string` is not found, the error includes the attempted string (truncated to 200 chars): `"String to replace not found in file.\nString: "` +- **Uniqueness check**: When `old_string` appears multiple times and `replace_all` is false, the tool MUST fail with the count of occurrences. + ### LS Tool ```typescript interface LSArgs { diff --git a/specs/001-fs-tools/data-model.md b/specs/001-fs-tools/data-model.md index 5280c481..b064f967 100644 --- a/specs/001-fs-tools/data-model.md +++ b/specs/001-fs-tools/data-model.md @@ -43,6 +43,14 @@ - `new_string: string`: Replacement text. - `replace_all?: boolean`: Whether to replace all occurrences. +### EditToolBehavior +**Purpose**: Behavioral contracts enforced by the Edit tool at execution time. +**Invariants**: +- Read-before-edit: File must be in `MessageManager.filesInContext` or edit is rejected. +- CRLF normalization: `\r\n` → `\n` applied to both file content and `old_string` before matching. +- Error detail: `old_string` not found errors include the attempted string (truncated to 200 chars). +- Smallest `old_string` guidance: Tool prompt advises 2-4 adjacent lines for uniqueness. + ### GlobArguments **Purpose**: Arguments for the `Glob` tool. **Fields**: diff --git a/specs/001-fs-tools/spec.md b/specs/001-fs-tools/spec.md index f1692b7e..6279c471 100644 --- a/specs/001-fs-tools/spec.md +++ b/specs/001-fs-tools/spec.md @@ -34,8 +34,11 @@ As an AI agent, I want to modify files using exact string replacements so that I **Acceptance Scenarios**: 1. **Given** a file has been read, **When** the agent calls `Edit` with a unique `old_string`, **Then** the file MUST be updated with `new_string`. -3. **Given** the `old_string` is not unique, **When** the agent calls `Edit` without `replace_all`, **Then** the operation MUST fail. -4. **Given** an `Edit` tool call, **When** the diff is displayed, **Then** it MUST show word-level highlights for changed parts when the number of removed and added lines match. +2. **Given** the `old_string` is not unique, **When** the agent calls `Edit` without `replace_all`, **Then** the operation MUST fail. +3. **Given** an `Edit` tool call, **When** the diff is displayed, **Then** it MUST show word-level highlights for changed parts when the number of removed and added lines match. +4. **Given** a file with CRLF line endings, **When** the agent calls `Edit` with LF `old_string`, **Then** the match MUST succeed by normalizing line endings before comparison. +5. **Given** a file that has not been read, **When** the agent calls `Edit`, **Then** the operation MUST fail with a message instructing the agent to read the file first. +6. **Given** an `Edit` tool call where `old_string` is not found, **When** the error is returned, **Then** it MUST include the attempted string to help the model self-correct. --- @@ -59,7 +62,10 @@ As an AI agent, I want to search for patterns using glob patterns or regex so th - **Large Files**: Reading files that exceed memory limits or token windows. Handled via `offset` and `limit`. If estimated tokens exceed `maxTokens`, the tool returns an error suggesting offset/limit or grep/jq. - **Binary Documents**: Attempting to read PDF, DOCX, or other unsupported binary formats. The tool MUST prevent this and return an error. -- **Mismatch Analysis**: `Edit` tool must provide detailed mismatch reports when `old_string` is not found, highlighting exactly which lines differ. +- **Mismatch Analysis**: `Edit` tool must provide detailed mismatch reports when `old_string` is not found, including the attempted string (truncated to 200 chars) to help the model self-correct. +- **Read Before Edit**: `Edit` tool must reject edits to files that have not been read in the current conversation, preventing blind edits. +- **CRLF Normalization**: `Edit` tool must normalize `\r\n` → `\n` before matching, so the model can use LF line endings in `old_string` regardless of the file's actual line endings. +- **Smallest old_string**: `Edit` tool prompt should guide the model to use the smallest `old_string` that is clearly unique (typically 2-4 adjacent lines), reducing reproduction errors on long template-literal-heavy text. - **File Permissions**: Attempting to write to read-only files or directories without proper permissions. ## Requirements *(mandatory)* @@ -76,6 +82,10 @@ As an AI agent, I want to search for patterns using glob patterns or regex so th - **FR-003**: System MUST provide a `Write` tool that automatically creates parent directories. - **FR-004**: `Write` tool SHOULD verify that the file was read before being overwritten to prevent accidental data loss. - **FR-005**: System MUST provide an `Edit` tool for exact string replacement with detailed mismatch analysis. +- **FR-020**: `Edit` tool MUST reject edits to files that have not been read in the current conversation, returning an error instructing the agent to read the file first. +- **FR-021**: `Edit` tool MUST normalize CRLF (`\r\n`) to LF (`\n`) before matching `old_string`, so models can use LF in `old_string` against CRLF files. +- **FR-022**: `Edit` tool MUST include the attempted `old_string` (truncated to 200 chars) in the error message when `old_string` is not found. +- **FR-023**: `Edit` tool prompt SHOULD guide the model to use the smallest `old_string` that is clearly unique (typically 2-4 adjacent lines), reducing reproduction errors on long text. - **FR-009**: System MUST provide a `Glob` tool for fast pattern matching. It MUST NOT respect `.gitignore` (but MUST always ignore the `.git` directory), support a configurable `limit`, and provide structured metadata. - **FR-010**: System MUST provide a `Grep` tool based on ripgrep for powerful text searching. It MUST respect `.gitignore` and common ignore patterns, support `offset` and `context` arguments, and provide structured metadata. - **FR-011**: All tools MUST integrate with the `PermissionManager` for authorization. diff --git a/specs/001-fs-tools/tasks.md b/specs/001-fs-tools/tasks.md index 30bf5ed0..9ecb8aa2 100644 --- a/specs/001-fs-tools/tasks.md +++ b/specs/001-fs-tools/tasks.md @@ -3,6 +3,11 @@ - [x] T001 Implement `Read` tool with line numbering and token-level validation - [x] T002 Implement `Write` tool with directory auto-creation - [x] T003 Implement `Edit` tool with detailed mismatch analysis + - [x] T003.1 Add "smallest old_string" guidance to Edit tool prompt (2-4 adjacent lines) + - [x] T003.2 Add CRLF normalization (`\r\n` → `\n`) before matching + - [x] T003.3 Include attempted `old_string` in error message when not found (truncated to 200 chars) + - [x] T003.4 Enforce read-before-edit: reject edits to files not in `filesInContext` + - [x] T003.5 Add `hasFileInContext()` method to `MessageManager` - [x] T007 Implement `Glob` tool for pattern-based file discovery - [x] T008 Implement `Grep` tool using ripgrep for content search - [x] T009 Integrate all FS tools with `PermissionManager` diff --git a/specs/README.md b/specs/README.md index bbd11e79..c2517011 100644 --- a/specs/README.md +++ b/specs/README.md @@ -30,15 +30,15 @@ This directory contains feature specifications that serve as the source of truth |--------|-------| | Specs | 54 | | User Stories | 236 | -| Functional Requirements | 880 | +| Functional Requirements | 884 | | Test Files | 302 | -| Test Cases | 3,822 | +| Test Cases | 3,828 | ## Specs | Feature | Description | US | FR | Links | |---------|-------------|----|----|-------| -| File System Tools | Read, Write, Edit, Glob, Grep tools for file operations | 3 | 15 | [spec](001-fs-tools/spec.md) · [plan](001-fs-tools/plan.md) | +| File System Tools | Read, Write, Edit, Glob, Grep tools for file operations | 3 | 19 | [spec](001-fs-tools/spec.md) · [plan](001-fs-tools/plan.md) | | Bash Tools | Bash, BashOutput, KillBash tools for shell command execution | 3 | 17 | [spec](002-bash-tools/spec.md) · [plan](002-bash-tools/plan.md) | | MCP | Model Context Protocol support for external tools and context sources | 4 | 23 | [spec](003-mcp/spec.md) · [plan](003-mcp/plan.md) | | Session Management | Performance-optimized, project-based session management system | 3 | 17 | [spec](004-session-management/spec.md) · [plan](004-session-management/plan.md) |