From 485c68998494d1343d75389bd493d4dca20df644 Mon Sep 17 00:00:00 2001 From: Brad Fitzpatrick Date: Fri, 23 Jan 2026 16:34:20 -0800 Subject: [PATCH 1/2] cmd/go/internal/test: add opt-in file hashing instead of modtime for test caching (w/ git) Updates golang/go#58571 Updates tailscale/go#150 (cherry picked from commit 64af022d6cf6a303dded52f0dbe67f2fb4b153c9) --- src/cmd/go/internal/test/githash.go | 154 ++++++++++++++++++++++++++++ src/cmd/go/internal/test/test.go | 18 +++- 2 files changed, 170 insertions(+), 2 deletions(-) create mode 100644 src/cmd/go/internal/test/githash.go diff --git a/src/cmd/go/internal/test/githash.go b/src/cmd/go/internal/test/githash.go new file mode 100644 index 00000000000000..b5b22797d77b31 --- /dev/null +++ b/src/cmd/go/internal/test/githash.go @@ -0,0 +1,154 @@ +// Copyright 2026 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package test + +import ( + "bytes" + "io/fs" + "os" + "os/exec" + "path" + "strconv" + "strings" + "sync" +) + +type gitHash string // hex blog hash from git (probably SHA-1, but not necessarily) + +var useGitHash = sync.OnceValue(func() bool { + s := os.Getenv("CMD_GO_USE_GIT_HASH") + if s == "" { + return false + } + v, _ := strconv.ParseBool(s) + return v +}) + +// gitHashKey is the key used to look up possible files in +// a git repo that match the same base name & size. +// +// This is used to avoid statting all files in a git repo +// when trying to find the git hash for a given file. +// Instead, we only stat files that match on name & size. +type gitHashKey struct { + baseName string // base name of file; as that's fs.FileInfo.Name gives us + size int64 +} + +type gitHashMap struct { + gitRoot string // absolute path to git repo root + + // cands is a list of files in the git repo, bucketed by their (base name, + // size) bucket key. This makes looking for a file faster later, without + // statting the whole world, yet still permitting lookup only from a + // fs.FileInfo that only has a base name & size & Sys info. + cands map[gitHashKey][]*gitHashCand +} + +type gitHashCand struct { + rel string // the relative git path from "git ls-files -r" + hash gitHash + + statOnce sync.Once + stat fs.FileInfo +} + +func (c *gitHashCand) getStat(m *gitHashMap) fs.FileInfo { + c.statOnce.Do(func() { + fullPath := path.Join(m.gitRoot, c.rel) + info, err := os.Lstat(fullPath) + if err == nil { + c.stat = info + } + }) + return c.stat +} + +var getGitHashMap = sync.OnceValue(buildGitHashMap) + +func buildGitHashMap() *gitHashMap { + m := &gitHashMap{ + cands: make(map[gitHashKey][]*gitHashCand), + } + gitRoot, err := exec.Command("git", "rev-parse", "--show-toplevel").Output() + if err != nil { + return nil + } + m.gitRoot = strings.TrimSpace(string(gitRoot)) + + cmd := exec.Command("git", "ls-tree", + "-r", // recursive + "--long", // include file sizes + "-z", // null-separated entries; don't have to deal with C quoting of some filenames + "HEAD", + ) + cmd.Dir = m.gitRoot // effectively git -C ; either way. + out, err := cmd.Output() + if err != nil { + return nil + } + // Parse lines of the form: + // + // 100644 blob cabbb1732c418125f9c773ce7a28ba34f2708554 639 .gitattributes + // 100644 blob 2b4a5fccdaf12f98cf8e255affa28cfd7e6a784d 95 .github/CODE_OF_CONDUCT.md + // + // .... but null-terminated instead of newline-terminated, so we don't have to deal + // with C quoting of filenames with certain characters. + // + // We don't care about the permissions. + remain := out + for len(remain) > 0 { + line, rest, ok := bytes.Cut(remain, []byte{0}) + if !ok { + break + } + remain = rest + meta, nameB, ok := bytes.Cut(line, []byte("\t")) + + _, hashAndSize, ok := bytes.Cut(meta, []byte(" blob ")) + if !ok { + continue + } + hashB, sizeB, ok := bytes.Cut(hashAndSize, []byte(" ")) + if !ok { + continue + } + size, err := strconv.ParseInt(strings.TrimSpace(string(sizeB)), 10, 64) + if err != nil { + continue + } + name := strings.TrimSpace(string(nameB)) + hash := strings.TrimSpace(string(hashB)) + k := gitHashKey{ + baseName: path.Base(name), + size: size, + } + m.cands[k] = append(m.cands[k], &gitHashCand{ + rel: name, + hash: gitHash(hash), + }) + } + return m +} + +func getGitHash(info fs.FileInfo) (gitHash, bool) { + if !useGitHash() || info == nil || !info.Mode().IsRegular() { + return "", false + } + k := gitHashKey{ + baseName: info.Name(), + size: info.Size(), + } + m := getGitHashMap() + if m == nil { + return "", false + } + for _, cand := range m.cands[k] { + if os.SameFile(info, cand.getStat(m)) { + return cand.hash, true + } + } + return "", false +} diff --git a/src/cmd/go/internal/test/test.go b/src/cmd/go/internal/test/test.go index 6c4a6a574d10ef..570c859ca4125d 100644 --- a/src/cmd/go/internal/test/test.go +++ b/src/cmd/go/internal/test/test.go @@ -2028,7 +2028,7 @@ func hashOpen(name string) (cache.ActionID, error) { hashWriteStat(h, finfo) } } - } else if info.Mode().IsRegular() { + } else if info.Mode().IsRegular() && !useGitHash() { // Because files might be very large, do not attempt // to hash the entirety of their content. Instead assume // the mtime and size recorded in hashWriteStat above @@ -2061,7 +2061,21 @@ func hashStat(name string) cache.ActionID { } func hashWriteStat(h io.Writer, info fs.FileInfo) { - fmt.Fprintf(h, "stat %d %x %v %v\n", info.Size(), uint64(info.Mode()), info.ModTime(), info.IsDir()) + if !useGitHash() { + // Classic behavior: use mod time. + fmt.Fprintf(h, "stat %d %x %v %v\n", info.Size(), uint64(info.Mode()), info.ModTime(), info.IsDir()) + return + } + var modTimeOrHash any = info.ModTime() + switch { + case info.Mode().IsRegular(): + if hash, ok := getGitHash(info); ok { + modTimeOrHash = hash + } + default: + modTimeOrHash = nil // including for directories + } + fmt.Fprintf(h, "stat %d %x %v %v\n", info.Size(), uint64(info.Mode()), modTimeOrHash, info.IsDir()) } // testAndInputKey returns the actual cache key for the pair (testID, testInputsID). From 560747fb7e62ade1d2e4d8d8548265a84b534046 Mon Sep 17 00:00:00 2001 From: Brad Fitzpatrick Date: Fri, 23 Jan 2026 16:34:20 -0800 Subject: [PATCH 2/2] cmd/go/internal/test: add opt-in file hashing instead of modtime for test caching (w/ git) Updates golang/go#58571 Updates tailscale/go#150 (cherry picked from commit 64af022d6cf6a303dded52f0dbe67f2fb4b153c9) --- .../go/internal/{test => githash}/githash.go | 52 ++++++++++++++----- src/cmd/go/internal/modindex/read.go | 5 +- src/cmd/go/internal/test/test.go | 19 ++----- 3 files changed, 46 insertions(+), 30 deletions(-) rename src/cmd/go/internal/{test => githash}/githash.go (76%) diff --git a/src/cmd/go/internal/test/githash.go b/src/cmd/go/internal/githash/githash.go similarity index 76% rename from src/cmd/go/internal/test/githash.go rename to src/cmd/go/internal/githash/githash.go index b5b22797d77b31..2682462f141b54 100644 --- a/src/cmd/go/internal/test/githash.go +++ b/src/cmd/go/internal/githash/githash.go @@ -2,7 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -package test +package githash import ( "bytes" @@ -15,16 +15,22 @@ import ( "sync" ) -type gitHash string // hex blog hash from git (probably SHA-1, but not necessarily) +// GitHash is a git hash in hex form. +// +// It's usually a SHA-1 hash, but could be SHA-256 depending on the git +// configuration. +type GitHash string + +// Enabled is whether git hash lookups are enabled via the CMD_GO_USE_GIT_HASH +// environment variable. +var Enabled bool -var useGitHash = sync.OnceValue(func() bool { +func init() { s := os.Getenv("CMD_GO_USE_GIT_HASH") - if s == "" { - return false + if s != "" { + Enabled, _ = strconv.ParseBool(s) } - v, _ := strconv.ParseBool(s) - return v -}) +} // gitHashKey is the key used to look up possible files in // a git repo that match the same base name & size. @@ -49,7 +55,7 @@ type gitHashMap struct { type gitHashCand struct { rel string // the relative git path from "git ls-files -r" - hash gitHash + hash GitHash statOnce sync.Once stat fs.FileInfo @@ -127,14 +133,15 @@ func buildGitHashMap() *gitHashMap { } m.cands[k] = append(m.cands[k], &gitHashCand{ rel: name, - hash: gitHash(hash), + hash: GitHash(hash), }) } return m } -func getGitHash(info fs.FileInfo) (gitHash, bool) { - if !useGitHash() || info == nil || !info.Mode().IsRegular() { +// Hash returns the git hash for the given file info, if available. +func Hash(info fs.FileInfo) (GitHash, bool) { + if !Enabled || info == nil || !info.Mode().IsRegular() { return "", false } k := gitHashKey{ @@ -152,3 +159,24 @@ func getGitHash(info fs.FileInfo) (gitHash, bool) { } return "", false } + +// ModTimeOrHash returns either the git hash (if enabled and available) or the +// mod time of the given file info. +// +// For non-regular files (notably directories), it returns nil if git hash is +// enabled. +// +// It always returns one of nil, time.Time, or GitHash (a string), all suitable +// for use in Sprintf verb %v. +func ModTimeOrHash(info fs.FileInfo) any { + if !Enabled { + return info.ModTime() + } + if h, ok := Hash(info); ok { + return h + } + if info.Mode().IsRegular() { + return info.ModTime() + } + return nil +} diff --git a/src/cmd/go/internal/modindex/read.go b/src/cmd/go/internal/modindex/read.go index d87fb06b57e173..9cf03262c40278 100644 --- a/src/cmd/go/internal/modindex/read.go +++ b/src/cmd/go/internal/modindex/read.go @@ -28,6 +28,7 @@ import ( "cmd/go/internal/cache" "cmd/go/internal/cfg" "cmd/go/internal/fsys" + "cmd/go/internal/githash" "cmd/go/internal/imports" "cmd/go/internal/str" "cmd/internal/par" @@ -109,11 +110,11 @@ func dirHash(modroot, pkgdir string) (cache.ActionID, error) { if err != nil { return cache.ActionID{}, ErrNotIndexed } - if info.ModTime().After(cutoff) { + if !githash.Enabled && info.ModTime().After(cutoff) { return cache.ActionID{}, ErrNotIndexed } - fmt.Fprintf(h, "file %v %v %v\n", info.Name(), info.ModTime(), info.Size()) + fmt.Fprintf(h, "file %v %v %v\n", info.Name(), githash.ModTimeOrHash(info), info.Size()) } return h.Sum(), nil } diff --git a/src/cmd/go/internal/test/test.go b/src/cmd/go/internal/test/test.go index 570c859ca4125d..f8d4bc28d9a709 100644 --- a/src/cmd/go/internal/test/test.go +++ b/src/cmd/go/internal/test/test.go @@ -27,6 +27,7 @@ import ( "cmd/go/internal/base" "cmd/go/internal/cache" "cmd/go/internal/cfg" + "cmd/go/internal/githash" "cmd/go/internal/load" "cmd/go/internal/lockedfile" "cmd/go/internal/modload" @@ -2028,7 +2029,7 @@ func hashOpen(name string) (cache.ActionID, error) { hashWriteStat(h, finfo) } } - } else if info.Mode().IsRegular() && !useGitHash() { + } else if info.Mode().IsRegular() && !githash.Enabled { // Because files might be very large, do not attempt // to hash the entirety of their content. Instead assume // the mtime and size recorded in hashWriteStat above @@ -2061,21 +2062,7 @@ func hashStat(name string) cache.ActionID { } func hashWriteStat(h io.Writer, info fs.FileInfo) { - if !useGitHash() { - // Classic behavior: use mod time. - fmt.Fprintf(h, "stat %d %x %v %v\n", info.Size(), uint64(info.Mode()), info.ModTime(), info.IsDir()) - return - } - var modTimeOrHash any = info.ModTime() - switch { - case info.Mode().IsRegular(): - if hash, ok := getGitHash(info); ok { - modTimeOrHash = hash - } - default: - modTimeOrHash = nil // including for directories - } - fmt.Fprintf(h, "stat %d %x %v %v\n", info.Size(), uint64(info.Mode()), modTimeOrHash, info.IsDir()) + fmt.Fprintf(h, "stat %d %x %v %v\n", info.Size(), uint64(info.Mode()), githash.ModTimeOrHash(info), info.IsDir()) } // testAndInputKey returns the actual cache key for the pair (testID, testInputsID).