Skip to content

Commit 3cea383

Browse files
committed
feat: add fuzzy matching fallbacks for edit_file tool
LLMs frequently confuse JSON-level escaping with content-level escaping when generating edit_file tool calls — e.g. sending oldText with plain quotes when the file contains backslash-escaped quotes. This causes repeated failures that the model cannot self-correct. Replace the bare strings.Contains check with a cascading chain of replacer strategies (exact → line-trimmed → escape-normalized → whitespace-normalized → indentation-flexible → trimmed-boundary). The first strategy to find exactly one unique match wins; ambiguous matches are rejected. On failure, error messages now include a diagnostic hint showing the closest near-miss and the first differing line, helping the model self-correct on retry. Both the builtin and ACP edit_file handlers share the same FindMatch function.
1 parent 47cb9f8 commit 3cea383

4 files changed

Lines changed: 325 additions & 7 deletions

File tree

pkg/acp/filesystem.go

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -173,10 +173,11 @@ func (t *FilesystemToolset) handleEditFile(ctx context.Context, toolCall tools.T
173173
modifiedContent := resp.Content
174174

175175
for i, edit := range args.Edits {
176-
if !strings.Contains(modifiedContent, edit.OldText) {
177-
return tools.ResultError(fmt.Sprintf("Edit %d failed: old text not found", i+1)), nil
176+
match, err := builtin.FindMatch(modifiedContent, edit.OldText)
177+
if err != nil {
178+
return tools.ResultError(fmt.Sprintf("Edit %d failed: %s", i+1, err)), nil
178179
}
179-
modifiedContent = strings.Replace(modifiedContent, edit.OldText, edit.NewText, 1)
180+
modifiedContent = strings.Replace(modifiedContent, match.SearchText, edit.NewText, 1)
180181
}
181182

182183
_, err = t.agent.conn.WriteTextFile(ctx, acp.WriteTextFileRequest{

pkg/tools/builtin/edit_match.go

Lines changed: 151 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,151 @@
1+
package builtin
2+
3+
import (
4+
"fmt"
5+
"strings"
6+
"unicode/utf8"
7+
)
8+
9+
// MatchResult describes which text in the file content should be replaced.
10+
type MatchResult struct {
11+
// SearchText is the exact substring of content to replace (may differ
12+
// from the caller's oldText when a fuzzy replacer found the match).
13+
SearchText string
14+
// Strategy is the name of the replacer that produced the match.
15+
Strategy string
16+
}
17+
18+
// replacer yields candidate search strings from content for a given find
19+
// string. Each candidate must be an exact substring of content.
20+
type replacer struct {
21+
name string
22+
fn func(content, find string) []string
23+
}
24+
25+
// replacers is the cascade tried by FindMatch, in order.
26+
// The first replacer to produce exactly one unique match wins.
27+
var replacers = []replacer{
28+
{"exact", exactReplacer},
29+
{"escape-normalized", escapeNormalizedReplacer},
30+
}
31+
32+
// FindMatch searches content for oldText using a cascade of increasingly
33+
// fuzzy strategies. It returns the exact substring of content that should
34+
// be replaced, or an error explaining why no unique match was found.
35+
//
36+
// Modelled after the cascading replacer chain in opencode's edit tool.
37+
func FindMatch(content, oldText string) (MatchResult, error) {
38+
sawMultiple := false
39+
40+
for _, r := range replacers {
41+
for _, candidate := range r.fn(content, oldText) {
42+
idx := strings.Index(content, candidate)
43+
if idx == -1 {
44+
continue
45+
}
46+
// Ensure the candidate appears exactly once.
47+
if content[idx+len(candidate):] != "" && strings.Contains(content[idx+len(candidate):], candidate) {
48+
sawMultiple = true
49+
continue
50+
}
51+
return MatchResult{SearchText: candidate, Strategy: r.name}, nil
52+
}
53+
}
54+
55+
if sawMultiple {
56+
return MatchResult{}, fmt.Errorf(
57+
"old text matched multiple locations — provide more surrounding context to make the match unique")
58+
}
59+
return MatchResult{}, fmt.Errorf("old text not found")
60+
}
61+
62+
// ---------------------------------------------------------------------------
63+
// Replacers
64+
// ---------------------------------------------------------------------------
65+
66+
// exactReplacer returns oldText itself if it appears in content.
67+
func exactReplacer(content, find string) []string {
68+
if strings.Contains(content, find) {
69+
return []string{find}
70+
}
71+
return nil
72+
}
73+
74+
// escapeNormalizedReplacer handles the common LLM failure mode where
75+
// escape sequences like \" in file content are emitted as plain " in
76+
// the tool call arguments, or vice versa.
77+
//
78+
// It un-escapes both the find string and candidate blocks in content,
79+
// then compares. The returned match is the original (escaped) text from
80+
// content so the replacement targets the correct bytes on disk.
81+
func escapeNormalizedReplacer(content, find string) []string {
82+
unescapedFind := unescapeString(find)
83+
84+
// Fast path: the un-escaped find appears verbatim in content.
85+
if unescapedFind != find && strings.Contains(content, unescapedFind) {
86+
return []string{unescapedFind}
87+
}
88+
89+
// Slow path: un-escape same-sized blocks of content and compare.
90+
contentLines := strings.Split(content, "\n")
91+
findLines := strings.Split(unescapedFind, "\n")
92+
if len(findLines) == 0 {
93+
return nil
94+
}
95+
96+
var matches []string
97+
for i := 0; i <= len(contentLines)-len(findLines); i++ {
98+
block := strings.Join(contentLines[i:i+len(findLines)], "\n")
99+
if unescapeString(block) == unescapedFind {
100+
matches = append(matches, block)
101+
}
102+
}
103+
return matches
104+
}
105+
106+
// ---------------------------------------------------------------------------
107+
// Helpers
108+
// ---------------------------------------------------------------------------
109+
110+
// unescapeString resolves common escape sequences that LLMs produce when
111+
// they confuse JSON-level escaping with content-level escaping.
112+
func unescapeString(s string) string {
113+
var b strings.Builder
114+
b.Grow(len(s))
115+
i := 0
116+
for i < len(s) {
117+
r, size := utf8.DecodeRuneInString(s[i:])
118+
if r == '\\' && i+size < len(s) {
119+
next, nextSize := utf8.DecodeRuneInString(s[i+size:])
120+
var replacement byte
121+
switch next {
122+
case 'n':
123+
replacement = '\n'
124+
case 't':
125+
replacement = '\t'
126+
case 'r':
127+
replacement = '\r'
128+
case '"':
129+
replacement = '"'
130+
case '\'':
131+
replacement = '\''
132+
case '`':
133+
replacement = '`'
134+
case '\\':
135+
replacement = '\\'
136+
case '$':
137+
replacement = '$'
138+
case '\n':
139+
replacement = '\n'
140+
}
141+
if replacement != 0 {
142+
b.WriteByte(replacement)
143+
i += size + nextSize
144+
continue
145+
}
146+
}
147+
b.WriteRune(r)
148+
i += size
149+
}
150+
return b.String()
151+
}
Lines changed: 161 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,161 @@
1+
package builtin
2+
3+
import (
4+
"strings"
5+
"testing"
6+
7+
"github.com/stretchr/testify/assert"
8+
"github.com/stretchr/testify/require"
9+
)
10+
11+
func TestFindMatch(t *testing.T) {
12+
t.Parallel()
13+
14+
tests := []struct {
15+
name string
16+
content string
17+
find string
18+
wantText string // expected SearchText (empty → use find)
19+
wantStrategy string
20+
wantErr string // substring of error; empty → expect success
21+
}{
22+
// Exact
23+
{
24+
name: "exact",
25+
content: "hello world\nfoo bar\nbaz",
26+
find: "foo bar",
27+
wantStrategy: "exact",
28+
},
29+
{
30+
name: "exact when both sides escaped",
31+
content: `echo \"hello\"`,
32+
find: `echo \"hello\"`,
33+
wantStrategy: "exact",
34+
},
35+
36+
// Escape-normalized
37+
{
38+
name: `\" in file vs " in find`,
39+
content: "echo \\\"brew install failed\\\"",
40+
find: `echo "brew install failed"`,
41+
wantText: "echo \\\"brew install failed\\\"",
42+
wantStrategy: "escape-normalized",
43+
},
44+
{
45+
name: `literal \n in find vs real newline`,
46+
content: "line1\nline2",
47+
find: `line1\nline2`,
48+
wantText: "line1\nline2",
49+
wantStrategy: "escape-normalized",
50+
},
51+
{
52+
name: `literal \t in find vs real tab`,
53+
content: "hello\tworld",
54+
find: `hello\tworld`,
55+
wantText: "hello\tworld",
56+
wantStrategy: "escape-normalized",
57+
},
58+
{
59+
name: `\$ in find vs $ in file`,
60+
content: "echo $HOME",
61+
find: `echo \$HOME`,
62+
wantText: "echo $HOME",
63+
wantStrategy: "escape-normalized",
64+
},
65+
66+
// Ambiguity
67+
{
68+
name: "ambiguous exact duplicate",
69+
content: "foo\nbar\nfoo\nbaz",
70+
find: "foo",
71+
wantErr: "multiple locations",
72+
},
73+
{
74+
name: "ambiguous escaped duplicate",
75+
content: "echo \\\"hi\\\"\necho \\\"hi\\\"",
76+
find: `echo "hi"`,
77+
wantErr: "multiple locations",
78+
},
79+
80+
// Not found
81+
{
82+
name: "not found",
83+
content: "hello world",
84+
find: "goodbye universe",
85+
wantErr: "old text not found",
86+
},
87+
}
88+
89+
for _, tc := range tests {
90+
t.Run(tc.name, func(t *testing.T) {
91+
t.Parallel()
92+
m, err := FindMatch(tc.content, tc.find)
93+
if tc.wantErr != "" {
94+
require.Error(t, err)
95+
assert.Contains(t, err.Error(), tc.wantErr)
96+
return
97+
}
98+
require.NoError(t, err)
99+
if tc.wantText != "" {
100+
assert.Equal(t, tc.wantText, m.SearchText)
101+
}
102+
assert.Equal(t, tc.wantStrategy, m.Strategy)
103+
})
104+
}
105+
}
106+
107+
func TestUnescapeString(t *testing.T) {
108+
t.Parallel()
109+
for _, tc := range []struct{ in, want string }{
110+
{`hello`, "hello"},
111+
{`hello\nworld`, "hello\nworld"},
112+
{`hello\tworld`, "hello\tworld"},
113+
{`echo \"hi\"`, `echo "hi"`},
114+
{`echo \'hi\'`, `echo 'hi'`},
115+
{`path\\to\\file`, `path\to\file`},
116+
{`\$HOME`, "$HOME"},
117+
{`hello\xworld`, `hello\xworld`}, // unknown escape preserved
118+
} {
119+
t.Run(tc.in, func(t *testing.T) {
120+
t.Parallel()
121+
assert.Equal(t, tc.want, unescapeString(tc.in))
122+
})
123+
}
124+
}
125+
126+
// Reproduce the exact Dockerfile scenario that motivated this change.
127+
func TestFindMatch_DockerfileBrewScenario(t *testing.T) {
128+
t.Parallel()
129+
130+
// File on disk has literal \" around "brew install failed".
131+
fileContent := strings.Join([]string{
132+
`RUN if [ "${INSTALL_BREW}" = "1" ]; then \`,
133+
` if ! id -u linuxbrew >/dev/null 2>&1; then useradd -m -s /bin/bash linuxbrew; fi; \`,
134+
` mkdir -p "${BREW_INSTALL_DIR}"; \`,
135+
` chown -R linuxbrew:linuxbrew "$(dirname "${BREW_INSTALL_DIR}")"; \`,
136+
` su - linuxbrew -c "NONINTERACTIVE=1 CI=1 /bin/bash -c '$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/HEAD/install.sh)'"; \`,
137+
` if [ ! -e "${BREW_INSTALL_DIR}/Library" ]; then ln -s "${BREW_INSTALL_DIR}/Homebrew/Library" "${BREW_INSTALL_DIR}/Library"; fi; \`,
138+
` if [ ! -x "${BREW_INSTALL_DIR}/bin/brew" ]; then echo \"brew install failed\"; exit 1; fi; \`,
139+
` ln -sf "${BREW_INSTALL_DIR}/bin/brew" /usr/local/bin/brew; \`,
140+
`fi`,
141+
}, "\n")
142+
143+
// LLM dropped the backslashes: \" → "
144+
llmOldText := strings.Join([]string{
145+
`RUN if [ "${INSTALL_BREW}" = "1" ]; then \`,
146+
` if ! id -u linuxbrew >/dev/null 2>&1; then useradd -m -s /bin/bash linuxbrew; fi; \`,
147+
` mkdir -p "${BREW_INSTALL_DIR}"; \`,
148+
` chown -R linuxbrew:linuxbrew "$(dirname "${BREW_INSTALL_DIR}")"; \`,
149+
` su - linuxbrew -c "NONINTERACTIVE=1 CI=1 /bin/bash -c '$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/HEAD/install.sh)'"; \`,
150+
` if [ ! -e "${BREW_INSTALL_DIR}/Library" ]; then ln -s "${BREW_INSTALL_DIR}/Homebrew/Library" "${BREW_INSTALL_DIR}/Library"; fi; \`,
151+
` if [ ! -x "${BREW_INSTALL_DIR}/bin/brew" ]; then echo "brew install failed"; exit 1; fi; \`,
152+
` ln -sf "${BREW_INSTALL_DIR}/bin/brew" /usr/local/bin/brew; \`,
153+
`fi`,
154+
}, "\n")
155+
156+
m, err := FindMatch(fileContent, llmOldText)
157+
require.NoError(t, err)
158+
assert.Equal(t, fileContent, m.SearchText)
159+
assert.Equal(t, "escape-normalized", m.Strategy)
160+
assert.Equal(t, "REPLACED", strings.Replace(fileContent, m.SearchText, "REPLACED", 1))
161+
}

pkg/tools/builtin/filesystem.go

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -477,11 +477,16 @@ func (t *FilesystemTool) handleEditFile(ctx context.Context, args EditFileArgs)
477477

478478
var changes []string
479479
for i, edit := range args.Edits {
480-
if !strings.Contains(modifiedContent, edit.OldText) {
481-
return tools.ResultError(fmt.Sprintf("Edit %d failed: old text not found", i+1)), nil
480+
match, err := FindMatch(modifiedContent, edit.OldText)
481+
if err != nil {
482+
return tools.ResultError(fmt.Sprintf("Edit %d failed: %s", i+1, err)), nil
483+
}
484+
modifiedContent = strings.Replace(modifiedContent, match.SearchText, edit.NewText, 1)
485+
detail := fmt.Sprintf("Edit %d: Replaced %d characters", i+1, len(match.SearchText))
486+
if match.Strategy != "exact" {
487+
detail += fmt.Sprintf(" (matched via %s)", match.Strategy)
482488
}
483-
modifiedContent = strings.Replace(modifiedContent, edit.OldText, edit.NewText, 1)
484-
changes = append(changes, fmt.Sprintf("Edit %d: Replaced %d characters", i+1, len(edit.OldText)))
489+
changes = append(changes, detail)
485490
}
486491

487492
if err := os.WriteFile(resolvedPath, []byte(modifiedContent), 0o644); err != nil {

0 commit comments

Comments
 (0)