From dd9fc105a1b9893253fbd5f4feee0f60646d56b6 Mon Sep 17 00:00:00 2001 From: fullsend-code <278716306+fullsend-ai-coder[bot]@users.noreply.github.com> Date: Tue, 16 Jun 2026 19:24:17 +0000 Subject: [PATCH] perf(#2351): batch path-existence checks via Git Trees API MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add forge.Client.ListRepositoryFiles to retrieve all file paths in a repository's default branch with a single Git Trees API call (refs → commit → tree?recursive=1). This replaces the O(N) GetFileContent pattern used by ComparePathPresence, reducing 100+ sequential API calls to 3 fixed calls regardless of path count. Changes: - forge.Client: add ListRepositoryFiles(ctx, owner, repo) - github.LiveClient: implement using Git Trees API (reuses the same refs/commits/trees pattern as CommitFiles) - forge.FakeClient: implement using FileContents map keys - scaffold.ComparePathPresence: new batch implementation that calls ListRepositoryFiles once and checks membership locally - Tests: 6 ComparePathPresence tests including a guard that GetFileContent is never called; error injection and thread safety coverage for the new forge method PR #1954 introduces a naive ComparePathPresence in vendormanifest.go that loops GetFileContent per path. When that PR merges, its version should be replaced with this batch implementation. Closes #2351 --- internal/forge/fake.go | 18 ++++ internal/forge/fake_test.go | 5 ++ internal/forge/forge.go | 6 ++ internal/forge/github/github.go | 78 +++++++++++++++++ internal/scaffold/pathpresence.go | 37 ++++++++ internal/scaffold/pathpresence_test.go | 113 +++++++++++++++++++++++++ 6 files changed, 257 insertions(+) create mode 100644 internal/scaffold/pathpresence.go create mode 100644 internal/scaffold/pathpresence_test.go diff --git a/internal/forge/fake.go b/internal/forge/fake.go index 2b9863277..8eb540945 100644 --- a/internal/forge/fake.go +++ b/internal/forge/fake.go @@ -400,6 +400,24 @@ func (f *FakeClient) DeleteFile(_ context.Context, owner, repo, path, message st return nil } +func (f *FakeClient) ListRepositoryFiles(_ context.Context, owner, repo string) ([]string, error) { + f.mu.Lock() + defer f.mu.Unlock() + + if e := f.err("ListRepositoryFiles"); e != nil { + return nil, e + } + + prefix := owner + "/" + repo + "/" + var paths []string + for key := range f.FileContents { + if len(key) > len(prefix) && key[:len(prefix)] == prefix { + paths = append(paths, key[len(prefix):]) + } + } + return paths, nil +} + func (f *FakeClient) ListDirectoryContents(_ context.Context, owner, repo, path, ref string, _ bool) ([]DirectoryEntry, error) { f.mu.Lock() defer f.mu.Unlock() diff --git a/internal/forge/fake_test.go b/internal/forge/fake_test.go index 42bdf4ac6..ab7a90ef1 100644 --- a/internal/forge/fake_test.go +++ b/internal/forge/fake_test.go @@ -471,6 +471,10 @@ func TestFakeClient_ErrorInjection(t *testing.T) { _, err := fc.ListDirectoryContents(ctx, "o", "r", "p", "main", false) return err }}, + {"ListRepositoryFiles", func(fc *FakeClient) error { + _, err := fc.ListRepositoryFiles(ctx, "o", "r") + return err + }}, {"GetFileContentAtRef", func(fc *FakeClient) error { _, err := fc.GetFileContentAtRef(ctx, "o", "r", "p", "main") return err @@ -544,6 +548,7 @@ func TestFakeClient_ThreadSafety(t *testing.T) { _, _ = fc.GetOrgVariableRepos(ctx, "o", "n") _ = fc.DeleteIssueComment(ctx, "o", "r", 1) _, _ = fc.ListDirectoryContents(ctx, "o", "r", "p", "main", false) + _, _ = fc.ListRepositoryFiles(ctx, "o", "r") _, _ = fc.GetFileContentAtRef(ctx, "o", "r", "p", "main") }(i) } diff --git a/internal/forge/forge.go b/internal/forge/forge.go index b6b295aca..e994b33ad 100644 --- a/internal/forge/forge.go +++ b/internal/forge/forge.go @@ -192,6 +192,12 @@ type Client interface { // Returns forge.ErrNotFound if the path does not exist or is not a directory. ListDirectoryContents(ctx context.Context, owner, repo, path, ref string, recursive bool) ([]DirectoryEntry, error) + // ListRepositoryFiles returns all file paths in the repository's default + // branch using the Git Trees API. This retrieves the entire tree in a + // single API call, making it efficient for batch path-existence checks. + // Returns ErrNotFound if the repository does not exist. + ListRepositoryFiles(ctx context.Context, owner, repo string) ([]string, error) + // GetFileContentAtRef retrieves the content of a file at a specific ref // (commit SHA, branch, or tag). Unlike GetFileContent which reads from // the default branch, this reads from the specified ref. diff --git a/internal/forge/github/github.go b/internal/forge/github/github.go index b110b55c3..587c59b23 100644 --- a/internal/forge/github/github.go +++ b/internal/forge/github/github.go @@ -952,6 +952,84 @@ func (c *LiveClient) listDirContents(ctx context.Context, owner, repo, path, ref return result, nil } +// ListRepositoryFiles returns all file paths in the default branch using +// the Git Trees API (single recursive call). +func (c *LiveClient) ListRepositoryFiles(ctx context.Context, owner, repo string) ([]string, error) { + // 1. Get default branch. + repoResp, err := c.get(ctx, fmt.Sprintf("/repos/%s/%s", owner, repo)) + if err != nil { + return nil, fmt.Errorf("get repo: %w", err) + } + var repoInfo struct { + DefaultBranch string `json:"default_branch"` + } + if err := decodeJSON(repoResp, &repoInfo); err != nil { + return nil, fmt.Errorf("decode repo info: %w", err) + } + + // 2. Get branch ref → commit SHA. + var commitSHA string + if err := c.retryOnTransient(ctx, "get branch ref", func() error { + refResp, refErr := c.get(ctx, fmt.Sprintf("/repos/%s/%s/git/ref/heads/%s", owner, repo, repoInfo.DefaultBranch)) + if refErr != nil { + return fmt.Errorf("get branch ref: %w", refErr) + } + var ref struct { + Object struct { + SHA string `json:"sha"` + } `json:"object"` + } + if decErr := decodeJSON(refResp, &ref); decErr != nil { + return fmt.Errorf("decode ref: %w", decErr) + } + commitSHA = ref.Object.SHA + return nil + }); err != nil { + return nil, err + } + + // 3. Get commit → tree SHA. + cResp, err := c.get(ctx, fmt.Sprintf("/repos/%s/%s/git/commits/%s", owner, repo, commitSHA)) + if err != nil { + return nil, fmt.Errorf("get commit: %w", err) + } + var commitObj struct { + Tree struct { + SHA string `json:"sha"` + } `json:"tree"` + } + if err := decodeJSON(cResp, &commitObj); err != nil { + return nil, fmt.Errorf("decode commit: %w", err) + } + + // 4. Get recursive tree → file paths. + treeResp, err := c.get(ctx, fmt.Sprintf("/repos/%s/%s/git/trees/%s?recursive=1", owner, repo, commitObj.Tree.SHA)) + if err != nil { + return nil, fmt.Errorf("get tree: %w", err) + } + var tree struct { + Tree []struct { + Path string `json:"path"` + Type string `json:"type"` // "blob" or "tree" + } `json:"tree"` + Truncated bool `json:"truncated"` + } + if err := decodeJSON(treeResp, &tree); err != nil { + return nil, fmt.Errorf("decode tree: %w", err) + } + if tree.Truncated { + return nil, fmt.Errorf("repository tree too large (truncated)") + } + + paths := make([]string, 0, len(tree.Tree)) + for _, entry := range tree.Tree { + if entry.Type == "blob" { + paths = append(paths, entry.Path) + } + } + return paths, nil +} + // DeleteFile deletes a file from the repository's default branch. // It first fetches the file to obtain its SHA (required by the GitHub Contents // API), then issues the DELETE. Retries on transient 404/409 errors. diff --git a/internal/scaffold/pathpresence.go b/internal/scaffold/pathpresence.go new file mode 100644 index 000000000..ccecb8212 --- /dev/null +++ b/internal/scaffold/pathpresence.go @@ -0,0 +1,37 @@ +package scaffold + +import ( + "context" + "fmt" + "sort" + + "github.com/fullsend-ai/fullsend/internal/forge" +) + +// ComparePathPresence checks which expected paths exist in the repo's +// default branch. It uses forge.Client.ListRepositoryFiles to fetch all +// file paths in a single Git Trees API call, then checks membership +// locally. This replaces O(N) GetFileContent calls with O(1) API calls. +func ComparePathPresence(ctx context.Context, client forge.Client, owner, repo string, expected []string) (missing []string, err error) { + if len(expected) == 0 { + return nil, nil + } + + allPaths, err := client.ListRepositoryFiles(ctx, owner, repo) + if err != nil { + return nil, fmt.Errorf("listing repository files: %w", err) + } + + existing := make(map[string]struct{}, len(allPaths)) + for _, p := range allPaths { + existing[p] = struct{}{} + } + + for _, path := range expected { + if _, ok := existing[path]; !ok { + missing = append(missing, path) + } + } + sort.Strings(missing) + return missing, nil +} diff --git a/internal/scaffold/pathpresence_test.go b/internal/scaffold/pathpresence_test.go new file mode 100644 index 000000000..cd0d76062 --- /dev/null +++ b/internal/scaffold/pathpresence_test.go @@ -0,0 +1,113 @@ +package scaffold + +import ( + "context" + "errors" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "github.com/fullsend-ai/fullsend/internal/forge" +) + +func TestComparePathPresence_AllPresent(t *testing.T) { + client := &forge.FakeClient{ + FileContents: map[string][]byte{ + "org/.fullsend/.defaults/action.yml": []byte("marker"), + "org/.fullsend/.github/workflows/reusable-triage.yml": []byte("wf"), + "org/.fullsend/bin/fullsend": []byte("binary"), + }, + } + + missing, err := ComparePathPresence(context.Background(), client, "org", ".fullsend", []string{ + ".defaults/action.yml", + ".github/workflows/reusable-triage.yml", + "bin/fullsend", + }) + require.NoError(t, err) + assert.Empty(t, missing) +} + +func TestComparePathPresence_SomeMissing(t *testing.T) { + client := &forge.FakeClient{ + FileContents: map[string][]byte{ + "org/.fullsend/.defaults/action.yml": []byte("marker"), + "org/.fullsend/bin/fullsend": []byte("binary"), + }, + } + + missing, err := ComparePathPresence(context.Background(), client, "org", ".fullsend", []string{ + ".defaults/action.yml", + ".github/workflows/reusable-triage.yml", + ".github/workflows/reusable-code.yml", + "bin/fullsend", + }) + require.NoError(t, err) + assert.Equal(t, []string{ + ".github/workflows/reusable-code.yml", + ".github/workflows/reusable-triage.yml", + }, missing) +} + +func TestComparePathPresence_AllMissing(t *testing.T) { + client := &forge.FakeClient{ + FileContents: map[string][]byte{}, + } + + missing, err := ComparePathPresence(context.Background(), client, "org", ".fullsend", []string{ + ".defaults/action.yml", + "bin/fullsend", + }) + require.NoError(t, err) + assert.Equal(t, []string{".defaults/action.yml", "bin/fullsend"}, missing) +} + +func TestComparePathPresence_EmptyExpected(t *testing.T) { + client := &forge.FakeClient{ + FileContents: map[string][]byte{ + "org/.fullsend/bin/fullsend": []byte("binary"), + }, + } + + missing, err := ComparePathPresence(context.Background(), client, "org", ".fullsend", nil) + require.NoError(t, err) + assert.Nil(t, missing) +} + +func TestComparePathPresence_ForgeError(t *testing.T) { + client := &forge.FakeClient{ + Errors: map[string]error{ + "ListRepositoryFiles": errors.New("network error"), + }, + } + + _, err := ComparePathPresence(context.Background(), client, "org", ".fullsend", []string{ + ".defaults/action.yml", + }) + require.Error(t, err) + assert.Contains(t, err.Error(), "listing repository files") +} + +func TestComparePathPresence_UsesOneAPICall(t *testing.T) { + // Verify that ComparePathPresence uses ListRepositoryFiles (batch) + // rather than per-path GetFileContent. We inject an error on + // GetFileContent to ensure it is never called. + client := &forge.FakeClient{ + FileContents: map[string][]byte{ + "org/repo/path-a": []byte("a"), + "org/repo/path-b": []byte("b"), + }, + Errors: map[string]error{ + "GetFileContent": errors.New("should not be called"), + }, + } + + missing, err := ComparePathPresence(context.Background(), client, "org", "repo", []string{ + "path-a", + "path-b", + "path-c", + }) + require.NoError(t, err) + assert.Equal(t, []string{"path-c"}, missing) +}