From 7da41e6c7bf22f08d55c693f92502713bb147b8e Mon Sep 17 00:00:00 2001 From: Alex Ong Date: Tue, 3 Mar 2026 16:12:12 +1100 Subject: [PATCH 1/2] Fix phantom file carry-forward causing lingering shadow branches Agent transcript parsers capture file paths from all tool calls, including ones that were attempted but never created on disk (e.g. agent writes src/types.go but creates src/types/types.go instead). These phantom paths in FilesTouched caused filesWithRemainingAgentChanges to always return non-empty results, creating an infinite carry-forward loop where shadow branches were never cleaned up. Fix: for non-committed files, also check the shadow branch tree before carrying forward. buildTreeWithChanges already skips non-existent files, so the shadow tree is the ground truth of what was actually checkpointed. Co-Authored-By: Claude Opus 4.6 Entire-Checkpoint: 95bb0b3d0af0 --- cmd/entire/cli/strategy/content_overlap.go | 13 ++++- .../cli/strategy/content_overlap_test.go | 47 +++++++++++++++++++ 2 files changed, 59 insertions(+), 1 deletion(-) diff --git a/cmd/entire/cli/strategy/content_overlap.go b/cmd/entire/cli/strategy/content_overlap.go index 6a025dc88..6524dcc1e 100644 --- a/cmd/entire/cli/strategy/content_overlap.go +++ b/cmd/entire/cli/strategy/content_overlap.go @@ -434,8 +434,19 @@ func filesWithRemainingAgentChanges( var remaining []string for _, filePath := range filesTouched { - // If file wasn't committed at all, it definitely has remaining changes + // If file wasn't committed, check if it actually exists in the shadow tree. + // Transcript parsers capture file paths from all tool calls, including ones + // that were attempted but never created (e.g. agent writes src/types.go then + // creates src/types/types.go instead). buildTreeWithChanges skips non-existent + // files, so the shadow tree is the ground truth. Without this check, phantom + // paths cause infinite carry-forward loops. if _, wasCommitted := committedFiles[filePath]; !wasCommitted { + if _, err := shadowTree.File(filePath); err != nil { + logging.Debug(logCtx, "filesWithRemainingAgentChanges: file not committed and not in shadow tree, skipping", + slog.String("file", filePath), + ) + continue + } remaining = append(remaining, filePath) logging.Debug(logCtx, "filesWithRemainingAgentChanges: file not committed, keeping", slog.String("file", filePath), diff --git a/cmd/entire/cli/strategy/content_overlap_test.go b/cmd/entire/cli/strategy/content_overlap_test.go index 6ae9cf3fb..e7cfb763f 100644 --- a/cmd/entire/cli/strategy/content_overlap_test.go +++ b/cmd/entire/cli/strategy/content_overlap_test.go @@ -663,6 +663,53 @@ func TestFilesWithRemainingAgentChanges_CacheEquivalence(t *testing.T) { assert.NotContains(t, resultWith, "fileA.txt") } +// TestFilesWithRemainingAgentChanges_PhantomFile tests that files tracked in +// filesTouched but not present in the shadow branch tree are skipped. This +// happens when an agent's transcript references a file path (e.g. via a +// write_file tool call) that was never actually created on disk — for example +// when Gemini tries to write src/types.go but creates src/types/types.go +// instead. Without this check, phantom files cause infinite carry-forward. +func TestFilesWithRemainingAgentChanges_PhantomFile(t *testing.T) { + t.Parallel() + dir := setupGitRepo(t) + + repo, err := git.PlainOpen(dir) + require.NoError(t, err) + + // Shadow branch only contains the REAL file (buildTreeWithChanges skips + // non-existent files, so the phantom path is never in the tree). + createShadowBranchWithContent(t, repo, "phn1234", "e3b0c4", map[string][]byte{ + "src/types/types.go": []byte("package types\n\ntype User struct{}\n"), + }) + + // Create the real file on disk and commit it. + require.NoError(t, os.MkdirAll(filepath.Join(dir, "src", "types"), 0o755)) + require.NoError(t, os.WriteFile(filepath.Join(dir, "src", "types", "types.go"), + []byte("package types\n\ntype User struct{}\n"), 0o644)) + wt, err := repo.Worktree() + require.NoError(t, err) + _, err = wt.Add("src/types/types.go") + require.NoError(t, err) + headCommit, err := wt.Commit("Add types.go", &git.CommitOptions{ + Author: &object.Signature{Name: "Test", Email: "test@test.com", When: time.Now()}, + }) + require.NoError(t, err) + + commit, err := repo.CommitObject(headCommit) + require.NoError(t, err) + + shadowBranch := checkpoint.ShadowBranchNameForCommit("phn1234", "e3b0c4") + committedFiles := map[string]struct{}{"src/types/types.go": {}} + + // filesTouched includes both the real path and a phantom path. + remaining := filesWithRemainingAgentChanges(context.Background(), repo, shadowBranch, commit, + []string{"src/types.go", "src/types/types.go"}, committedFiles) + + // src/types.go is not committed AND not in shadow tree → skip. + // src/types/types.go is committed with matching content → skip. + assert.Empty(t, remaining, "Phantom files not in shadow tree should not be carried forward") +} + // TestStagedFilesOverlapWithContent_ModifiedFile tests that a modified file // (exists in HEAD) always counts as overlap. func TestStagedFilesOverlapWithContent_ModifiedFile(t *testing.T) { From 6d6e6ea05f5c1bda5c1204bda8eb5bda73f6e28d Mon Sep 17 00:00:00 2001 From: Alex Ong Date: Wed, 4 Mar 2026 11:48:17 +1100 Subject: [PATCH 2/2] Simplify shadow tree check in filesWithRemainingAgentChanges Move the shadow tree existence check to the top of the loop instead of nesting it inside the wasCommitted branch. Both code paths (committed and uncommitted) skip files absent from the shadow tree, so a single early guard is equivalent and clearer. Add a test for the uncommitted-deletion scenario to document that skipping agent-deleted files is intentional (no content on disk to carry forward). Co-Authored-By: Claude Opus 4.6 Entire-Checkpoint: 04e65de01f59 --- cmd/entire/cli/strategy/content_overlap.go | 35 ++++---- .../cli/strategy/content_overlap_test.go | 82 +++++++++++++++++++ 2 files changed, 97 insertions(+), 20 deletions(-) diff --git a/cmd/entire/cli/strategy/content_overlap.go b/cmd/entire/cli/strategy/content_overlap.go index 6524dcc1e..e142132d6 100644 --- a/cmd/entire/cli/strategy/content_overlap.go +++ b/cmd/entire/cli/strategy/content_overlap.go @@ -434,31 +434,26 @@ func filesWithRemainingAgentChanges( var remaining []string for _, filePath := range filesTouched { - // If file wasn't committed, check if it actually exists in the shadow tree. - // Transcript parsers capture file paths from all tool calls, including ones - // that were attempted but never created (e.g. agent writes src/types.go then - // creates src/types/types.go instead). buildTreeWithChanges skips non-existent - // files, so the shadow tree is the ground truth. Without this check, phantom - // paths cause infinite carry-forward loops. - if _, wasCommitted := committedFiles[filePath]; !wasCommitted { - if _, err := shadowTree.File(filePath); err != nil { - logging.Debug(logCtx, "filesWithRemainingAgentChanges: file not committed and not in shadow tree, skipping", - slog.String("file", filePath), - ) - continue - } - remaining = append(remaining, filePath) - logging.Debug(logCtx, "filesWithRemainingAgentChanges: file not committed, keeping", + // Skip files absent from the shadow tree — nothing to carry forward. + // This covers two cases: + // 1. Phantom paths: transcript mentions files the agent never created + // (e.g. agent writes src/types.go then creates src/types/types.go). + // 2. Agent deletions: file was deleted on disk, so buildTreeWithChanges + // excluded it from the shadow tree. Carrying it forward would be a + // no-op since there's no content on disk to snapshot. + // Without this check, phantom paths cause infinite carry-forward loops. + shadowFile, err := shadowTree.File(filePath) + if err != nil { + logging.Debug(logCtx, "filesWithRemainingAgentChanges: file not in shadow tree, skipping", slog.String("file", filePath), ) continue } - // File was committed - check if committed content matches shadow branch - shadowFile, err := shadowTree.File(filePath) - if err != nil { - // File not in shadow branch - nothing to carry forward for this file - logging.Debug(logCtx, "filesWithRemainingAgentChanges: file not in shadow branch, skipping", + // File wasn't committed at all — it has remaining changes + if _, wasCommitted := committedFiles[filePath]; !wasCommitted { + remaining = append(remaining, filePath) + logging.Debug(logCtx, "filesWithRemainingAgentChanges: file not committed, keeping", slog.String("file", filePath), ) continue diff --git a/cmd/entire/cli/strategy/content_overlap_test.go b/cmd/entire/cli/strategy/content_overlap_test.go index e7cfb763f..d462cfd8d 100644 --- a/cmd/entire/cli/strategy/content_overlap_test.go +++ b/cmd/entire/cli/strategy/content_overlap_test.go @@ -710,6 +710,88 @@ func TestFilesWithRemainingAgentChanges_PhantomFile(t *testing.T) { assert.Empty(t, remaining, "Phantom files not in shadow tree should not be carried forward") } +// TestFilesWithRemainingAgentChanges_UncommittedDeletion verifies that an +// agent-deleted file that the user didn't commit is correctly skipped. +// The file won't be in the shadow tree (buildTreeWithChanges excludes files +// missing from disk), so the "not in shadow tree" guard handles it. +// Carrying it forward would be a no-op — buildTreeWithChanges would just +// record another deletion since there's nothing on disk to snapshot. +func TestFilesWithRemainingAgentChanges_UncommittedDeletion(t *testing.T) { + t.Parallel() + dir := setupGitRepo(t) + + repo, err := git.PlainOpen(dir) + require.NoError(t, err) + + // Create a file that the agent will "delete" + targetFile := filepath.Join(dir, "to_delete.txt") + require.NoError(t, os.WriteFile(targetFile, []byte("will be deleted"), 0o644)) + wt, err := repo.Worktree() + require.NoError(t, err) + _, err = wt.Add("to_delete.txt") + require.NoError(t, err) + baseCommitHash, err := wt.Commit("Add file that agent will delete", &git.CommitOptions{ + Author: &object.Signature{Name: "Test", Email: "test@test.com", When: time.Now()}, + }) + require.NoError(t, err) + + // Build shadow branch WITHOUT to_delete.txt (agent deleted it on disk, + // so buildTreeWithChanges excluded it from the shadow tree). + shadowBranchName := checkpoint.ShadowBranchNameForCommit("del1234", "e3b0c4") + refName := plumbing.NewBranchReferenceName(shadowBranchName) + + baseCommit, err := repo.CommitObject(baseCommitHash) + require.NoError(t, err) + baseTree, err := baseCommit.Tree() + require.NoError(t, err) + + entries := make(map[string]object.TreeEntry) + err = checkpoint.FlattenTree(repo, baseTree, "", entries) + require.NoError(t, err) + delete(entries, "to_delete.txt") + + treeHash, err := checkpoint.BuildTreeFromEntries(repo, entries) + require.NoError(t, err) + + shadowCommitObj := &object.Commit{ + Author: object.Signature{Name: "Test", Email: "test@test.com", When: time.Now()}, + Committer: object.Signature{Name: "Test", Email: "test@test.com", When: time.Now()}, + Message: "Shadow checkpoint (agent deleted to_delete.txt)", + TreeHash: treeHash, + } + encodedObj := repo.Storer.NewEncodedObject() + err = shadowCommitObj.Encode(encodedObj) + require.NoError(t, err) + shadowHash, err := repo.Storer.SetEncodedObject(encodedObj) + require.NoError(t, err) + require.NoError(t, repo.Storer.SetReference(plumbing.NewHashReference(refName, shadowHash))) + + // Delete file on disk (agent did this) but user doesn't commit the deletion + require.NoError(t, os.Remove(targetFile)) + + // User commits something else + otherFile := filepath.Join(dir, "other.txt") + require.NoError(t, os.WriteFile(otherFile, []byte("other changes"), 0o644)) + _, err = wt.Add("other.txt") + require.NoError(t, err) + userCommitHash, err := wt.Commit("User commit (not including deletion)", &git.CommitOptions{ + Author: &object.Signature{Name: "Test", Email: "test@test.com", When: time.Now()}, + }) + require.NoError(t, err) + + userCommit, err := repo.CommitObject(userCommitHash) + require.NoError(t, err) + + committedFiles := map[string]struct{}{"other.txt": {}} + remaining := filesWithRemainingAgentChanges(context.Background(), repo, shadowBranchName, userCommit, + []string{"to_delete.txt", "other.txt"}, committedFiles) + + // to_delete.txt is correctly skipped: it's not in the shadow tree because + // the agent deleted it from disk. Carrying it forward would be pointless — + // buildTreeWithChanges would just see the file is missing and record a no-op. + assert.Empty(t, remaining, "Deleted file not in shadow tree should not be carried forward") +} + // TestStagedFilesOverlapWithContent_ModifiedFile tests that a modified file // (exists in HEAD) always counts as overlap. func TestStagedFilesOverlapWithContent_ModifiedFile(t *testing.T) {