From 40024d57e503e683c3dc1eb38970e2b944232319 Mon Sep 17 00:00:00 2001 From: Fabian Meumertzheim Date: Wed, 7 Jan 2026 02:04:51 -0800 Subject: [PATCH 1/2] [8.8.0] Prefetch `.bzl` files in the remote repo contents cache `.bzl` files are typically small, but can form deep DAGs that require a large number of sequential cache requests to fetch lazily. By prefetching them (as well as `REPO.bazel` files) eagerly, the wall time of one particular fully cached cold `--nobuild` build of Bazel itself decreased by a factor of 5. Along the way, make remote repo contents cache failures non-fatal, matching the behavior of the remote cache. Closes #27910. PiperOrigin-RevId: 853153815 Change-Id: I368a14a845a8d9fb543f473d8c0c2178a4590c78 (cherry picked from commit 361c4204682393b7bf8ac3b6f67c26e9cc746dcf) --- .../remote/AbstractActionInputPrefetcher.java | 5 +- .../RemoteExternalOverlayFileSystem.java | 106 ++++++++++++------ .../remote/RemoteRepoContentsCacheImpl.java | 3 +- .../RepositoryDelegatorFunction.java | 11 +- .../bzlmod/remote_repo_contents_cache_test.py | 75 +++++++++++++ 5 files changed, 158 insertions(+), 42 deletions(-) diff --git a/src/main/java/com/google/devtools/build/lib/remote/AbstractActionInputPrefetcher.java b/src/main/java/com/google/devtools/build/lib/remote/AbstractActionInputPrefetcher.java index 10813dce52bbc3..45c114ea2cdadc 100644 --- a/src/main/java/com/google/devtools/build/lib/remote/AbstractActionInputPrefetcher.java +++ b/src/main/java/com/google/devtools/build/lib/remote/AbstractActionInputPrefetcher.java @@ -269,8 +269,9 @@ private boolean shouldDownloadFile(Path path, FileArtifactValue metadata) throws } // If an action output is stale, Skyframe will delete it prior to action execution. However, - // this doesn't apply to spawn outputs that aren't action outputs. To avoid incorrectly reusing - // one such stale output, check for its up-to-dateness here. + // this doesn't apply to spawn outputs that aren't action outputs, or to files in external repos + // that are remote repo contents cache hits. To avoid incorrectly reusing one such stale file, + // check for its up-to-dateness here. if (stat.getSize() != metadata.getSize()) { return true; } diff --git a/src/main/java/com/google/devtools/build/lib/remote/RemoteExternalOverlayFileSystem.java b/src/main/java/com/google/devtools/build/lib/remote/RemoteExternalOverlayFileSystem.java index 39fdaf35d58744..d89181fa2c999e 100644 --- a/src/main/java/com/google/devtools/build/lib/remote/RemoteExternalOverlayFileSystem.java +++ b/src/main/java/com/google/devtools/build/lib/remote/RemoteExternalOverlayFileSystem.java @@ -26,7 +26,7 @@ import build.bazel.remote.execution.v2.Tree; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; -import com.google.common.collect.Iterables; +import com.google.common.collect.Lists; import com.google.devtools.build.lib.actions.ActionInputHelper; import com.google.devtools.build.lib.actions.ActionInputPrefetcher; import com.google.devtools.build.lib.actions.FileArtifactValue; @@ -68,6 +68,7 @@ import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; import java.util.concurrent.Future; +import java.util.function.Consumer; import javax.annotation.Nullable; /** @@ -79,16 +80,6 @@ */ public final class RemoteExternalOverlayFileSystem extends FileSystem { - @Override - public boolean isFilePathCaseSensitive() { - return nativeFs.isFilePathCaseSensitive(); - } - - @Override - public FileSystem getHostFileSystem() { - return nativeFs.getHostFileSystem(); - } - private final PathFragment externalDirectory; private final int externalDirectorySegmentCount; private final FileSystem nativeFs; @@ -189,31 +180,54 @@ public void afterCommand() { this.evaluator = null; } - public void injectRemoteRepo(RepositoryName repo, Tree remoteContents, String markerFile) - throws IOException { + /** + * Injects the given remote contents, possibly prefetching some files, and returns true on + * success. + */ + public boolean injectRemoteRepo(RepositoryName repo, Tree remoteContents, String markerFile) + throws IOException, InterruptedException { var childMap = remoteContents.getChildrenList().stream() .collect( toImmutableMap(cache.digestUtil::compute, directory -> directory, (a, b) -> a)); + var repoDir = externalDirectory.getChild(repo.getName()); + var filesToPrefetch = new ArrayList(); injectRecursively( - externalFs, externalDirectory.getChild(repo.getName()), remoteContents.getRoot(), childMap); + externalFs, repoDir, remoteContents.getRoot(), childMap, filesToPrefetch::add); + try { + // TODO: This prefetches a large number of small files. Investigate whether BatchReadBlobs + // would be more efficient. + prefetch(filesToPrefetch); + } catch (BulkTransferException e) { + if (e.allCausedByCacheNotFoundException()) { + // The cache has lost the .bzl files, which should be treated just like a cache miss. + externalFs.deleteTree(repoDir); + return false; + } + throw e; + } // Create the repo directory on disk so that readdir reflects the overlaid state of the external // directory. nativeFs.createDirectoryAndParents(externalDirectory.getChild(repo.getName())); // Keep the marker file contents in memory so that it can be written out when the repo is // materialized. This doubles as a presence marker for the in-memory repo contents. markerFileContents.put(repo.getName(), markerFile); + return true; } private static void injectRecursively( RemoteExternalFileSystem fs, PathFragment path, Directory dir, - ImmutableMap childMap) + ImmutableMap childMap, + Consumer filesToPrefetch) throws IOException { fs.createDirectoryAndParents(path); for (var file : dir.getFilesList()) { var filePath = path.getRelative(unicodeToInternal(file.getName())); + if (shouldPrefetch(filePath)) { + filesToPrefetch.accept(filePath); + } fs.injectFile( filePath, FileArtifactValue.RemoteFileArtifactValue.create( @@ -238,7 +252,7 @@ private static void injectRecursively( "Directory %s with digest %s not found in tree" .formatted(subdirPath, subdirNode.getDigest().getHash())); } - injectRecursively(fs, subdirPath, subdir, childMap); + injectRecursively(fs, subdirPath, subdir, childMap, filesToPrefetch); } } @@ -276,22 +290,15 @@ private void doMaterialize(RepositoryName repo, ExtendedEventHandler reporter) var remoteRepo = externalFs.getPath(repoPath); var walkResult = walk(remoteRepo); for (var directory : walkResult.directories()) { - nativeFs.getPath(directory.asFragment()).createDirectory(); + nativeFs.getPath(directory).createDirectory(); } - var unused = - getFromFuture( - inputPrefetcher.prefetchFiles( - /* action= */ null, - Iterables.transform( - walkResult.files(), path -> ActionInputHelper.fromPath(path.asFragment())), - actionInput -> externalFs.getMetadata(actionInput.getExecPath()), - ActionInputPrefetcher.Priority.CRITICAL, - ActionInputPrefetcher.Reason.INPUTS)); + prefetch(walkResult.files()); // Create symlinks last as some platforms don't allow creating a symlink to a non-existent // target. A symlink may have already been created as an input to an action. for (var remoteSymlink : walkResult.symlinks()) { - var nativeSymlink = nativeFs.getPath(remoteSymlink.asFragment()); - FileSystemUtils.ensureSymbolicLink(nativeSymlink, remoteSymlink.readSymbolicLink()); + var nativeSymlink = nativeFs.getPath(remoteSymlink); + FileSystemUtils.ensureSymbolicLink( + nativeSymlink, externalFs.getPath(remoteSymlink).readSymbolicLink()); } // After the repo has been copied, atomically materialize the marker file. This ensures that the @@ -304,7 +311,19 @@ private void doMaterialize(RepositoryName repo, ExtendedEventHandler reporter) markerFileSibling.renameTo(markerFile); } - private record WalkResult(List files, List symlinks, List directories) {} + private void prefetch(List paths) throws IOException, InterruptedException { + var unused = + getFromFuture( + inputPrefetcher.prefetchFiles( + /* action= */ null, + Lists.transform(paths, ActionInputHelper::fromPath), + actionInput -> externalFs.getMetadata(actionInput.getExecPath()), + ActionInputPrefetcher.Priority.CRITICAL, + ActionInputPrefetcher.Reason.INPUTS)); + } + + private record WalkResult( + List files, List symlinks, List directories) {} private static WalkResult walk(Path root) throws IOException { var result = new WalkResult(new ArrayList<>(), new ArrayList<>(), new ArrayList<>()); @@ -316,10 +335,10 @@ private static void walk(Path root, WalkResult result) throws IOException { for (var dirent : root.readdir(Symlinks.NOFOLLOW)) { var fromChild = root.getChild(dirent.getName()); switch (dirent.getType()) { - case FILE -> result.files.add(fromChild); - case SYMLINK -> result.symlinks.add(fromChild); + case FILE -> result.files.add(fromChild.asFragment()); + case SYMLINK -> result.symlinks.add(fromChild.asFragment()); case DIRECTORY -> { - result.directories.add(fromChild); + result.directories.add(fromChild.asFragment()); walk(fromChild, result); } default -> throw new IOException("Unsupported file type: " + dirent); @@ -327,6 +346,26 @@ private static void walk(Path root, WalkResult result) throws IOException { } } + /** Whether the file with the given path should be materialized eagerly when injecting a repo. */ + private static boolean shouldPrefetch(PathFragment path) { + // .bzl files are typically small and the loads between them can form complex DAGs that can only + // be discovered layer by layer, so prefetching is worthwhile to reduce the number of sequential + // cache requests. + // The REPO.bazel file, if present, is a dependency of any package and will thus have to be + // fetched anyway. + return path.getFileExtension().equals("bzl") || path.getBaseName().equals("REPO.bazel"); + } + + @Override + public FileSystem getHostFileSystem() { + return nativeFs.getHostFileSystem(); + } + + @Override + public boolean isFilePathCaseSensitive() { + return nativeFs.isFilePathCaseSensitive(); + } + // Always mirror tree deletions to the underlying native file system to support bazel clean and // repository refetching. @@ -603,6 +642,9 @@ private FileArtifactValue getMetadata(PathFragment path) throws IOException { @Override public synchronized InputStream getInputStream(PathFragment path) throws IOException { + if (shouldPrefetch(path)) { + return nativeFs.getInputStream(path); + } var relativePath = path.relativeTo(externalDirectory); var info = (RemoteActionFileSystem.RemoteInMemoryFileInfo) stat(path, /* followSymlinks= */ true); diff --git a/src/main/java/com/google/devtools/build/lib/remote/RemoteRepoContentsCacheImpl.java b/src/main/java/com/google/devtools/build/lib/remote/RemoteRepoContentsCacheImpl.java index 2066cb9bb77909..ee89cc45062863 100644 --- a/src/main/java/com/google/devtools/build/lib/remote/RemoteRepoContentsCacheImpl.java +++ b/src/main/java/com/google/devtools/build/lib/remote/RemoteRepoContentsCacheImpl.java @@ -270,8 +270,7 @@ public boolean lookupCache( return false; } - remoteFs.injectRemoteRepo(repoName, repoDirectoryContent, markerFileContent); - return true; + return remoteFs.injectRemoteRepo(repoName, repoDirectoryContent, markerFileContent); } private RemoteActionExecutionContext buildContext(RepositoryName repoName) { diff --git a/src/main/java/com/google/devtools/build/lib/rules/repository/RepositoryDelegatorFunction.java b/src/main/java/com/google/devtools/build/lib/rules/repository/RepositoryDelegatorFunction.java index 958be029590d9a..12ec94bc977bf0 100644 --- a/src/main/java/com/google/devtools/build/lib/rules/repository/RepositoryDelegatorFunction.java +++ b/src/main/java/com/google/devtools/build/lib/rules/repository/RepositoryDelegatorFunction.java @@ -272,12 +272,11 @@ public SkyValue compute(SkyKey skyKey, Environment env) repoRoot, /* isFetchingDelayed= */ false, excludeRepoFromVendoring); } } catch (IOException e) { - throw new RepositoryFunctionException( - new IOException( - "error looking up repo %s in remote repo contents cache: %s" - .formatted(repositoryName, e.getMessage()), - e), - Transience.TRANSIENT); + env.getListener() + .handle( + Event.warn( + "Remote repo contents cache lookup failed for %s: %s" + .formatted(repositoryName, e.getMessage()))); } } } diff --git a/src/test/py/bazel/bzlmod/remote_repo_contents_cache_test.py b/src/test/py/bazel/bzlmod/remote_repo_contents_cache_test.py index c901e9b216c900..7ebce79de93ee8 100644 --- a/src/test/py/bazel/bzlmod/remote_repo_contents_cache_test.py +++ b/src/test/py/bazel/bzlmod/remote_repo_contents_cache_test.py @@ -641,6 +641,81 @@ def testLostRemoteFile_build(self): self.assertFalse(os.path.exists(os.path.join(repo_dir, 'sub/BUILD'))) self.assertFalse(os.path.exists(os.path.join(repo_dir, 'sub/sub.txt'))) + def testBzlFilePrefetching(self): + self.ScratchFile( + 'MODULE.bazel', + [ + 'repo = use_repo_rule("//:repo.bzl", "repo")', + 'repo(name = "my_repo")', + ], + ) + self.ScratchFile('BUILD.bazel') + self.ScratchFile( + 'repo.bzl', + [ + 'def _repo_impl(rctx):', + ' rctx.file("BUILD", """', + 'load(":nested.bzl", "nested_fg")', + 'nested_fg(name = "haha")', + '""")', + ' rctx.file("nested.bzl", """', + 'load("//subdir:more_nested.bzl", "more_nested_fg")', + 'def nested_fg(name):', + ' more_nested_fg(name = name)', + '""")', + ' rctx.file("subdir/BUILD")', + ' rctx.file("subdir/more_nested.bzl", """', + 'def more_nested_fg(name):', + ' native.filegroup(name = name)', + '""")', + ' rctx.file("file.txt", "hello")', + ' print("JUST FETCHED")', + ' return rctx.repo_metadata(reproducible=True)', + 'repo = repository_rule(_repo_impl)', + ], + ) + + repo_dir = self.RepoDir('my_repo') + + # First fetch: not cached + _, _, stderr = self.RunBazel(['build', '@my_repo//:haha']) + self.assertIn('JUST FETCHED', '\n'.join(stderr)) + self.assertTrue(os.path.exists(os.path.join(repo_dir, 'file.txt'))) + self.assertTrue(os.path.exists(os.path.join(repo_dir, 'BUILD'))) + self.assertTrue(os.path.exists(os.path.join(repo_dir, 'subdir/BUILD'))) + self.assertTrue(os.path.exists(os.path.join(repo_dir, 'nested.bzl'))) + self.assertTrue( + os.path.exists(os.path.join(repo_dir, 'subdir/more_nested.bzl')) + ) + + # After expunging: cached, .bzl files materialized + self.RunBazel(['clean', '--expunge']) + _, _, stderr = self.RunBazel(['build', '@my_repo//:haha']) + self.assertNotIn('JUST FETCHED', '\n'.join(stderr)) + self.assertFalse(os.path.exists(os.path.join(repo_dir, 'file.txt'))) + self.assertFalse(os.path.exists(os.path.join(repo_dir, 'BUILD'))) + self.assertFalse(os.path.exists(os.path.join(repo_dir, 'subdir/BUILD'))) + self.assertTrue(os.path.exists(os.path.join(repo_dir, 'nested.bzl'))) + self.assertTrue( + os.path.exists(os.path.join(repo_dir, 'subdir/more_nested.bzl')) + ) + + # After expunging, without using repo contents cache: not cached + self.RunBazel(['clean', '--expunge']) + _, _, stderr = self.RunBazel([ + '--noexperimental_remote_repo_contents_cache', + 'build', + '@my_repo//:haha', + ]) + self.assertIn('JUST FETCHED', '\n'.join(stderr)) + self.assertTrue(os.path.exists(os.path.join(repo_dir, 'file.txt'))) + self.assertTrue(os.path.exists(os.path.join(repo_dir, 'BUILD'))) + self.assertTrue(os.path.exists(os.path.join(repo_dir, 'subdir/BUILD'))) + self.assertTrue(os.path.exists(os.path.join(repo_dir, 'nested.bzl'))) + self.assertTrue( + os.path.exists(os.path.join(repo_dir, 'subdir/more_nested.bzl')) + ) + if __name__ == '__main__': absltest.main() From 719c4abdcd071db1c59442e5d56eaaa061ebee14 Mon Sep 17 00:00:00 2001 From: Fabian Meumertzheim Date: Wed, 7 Jan 2026 06:44:31 -0800 Subject: [PATCH 2/2] Show stack traces in the remote repo contents cache with `--verbose_failures` Makes it easier to debug issues with this experimental feature and also matches the behavior of remote execution/caching. Work towards #27965 Closes #27970. PiperOrigin-RevId: 853238791 Change-Id: Id46ccbb105d93fd17114fab13b086d0b46139fb4 (cherry picked from commit fc5f160593b5039f3cd10ae844e4eb7f5dce03b8) --- .../build/lib/remote/RemoteModule.java | 3 +- .../remote/RemoteRepoContentsCacheImpl.java | 33 ++++++++++++++++--- .../RepositoryRemoteHelpersFactoryImpl.java | 7 ++-- 3 files changed, 36 insertions(+), 7 deletions(-) diff --git a/src/main/java/com/google/devtools/build/lib/remote/RemoteModule.java b/src/main/java/com/google/devtools/build/lib/remote/RemoteModule.java index 6fb1dd302d1926..7d4dc72c888cf1 100644 --- a/src/main/java/com/google/devtools/build/lib/remote/RemoteModule.java +++ b/src/main/java/com/google/devtools/build/lib/remote/RemoteModule.java @@ -768,7 +768,8 @@ public void beforeCommand(CommandEnvironment env) throws AbruptExitException { invocationId, remoteOptions.remoteInstanceName, remoteOptions.remoteAcceptCached, - remoteOptions.remoteUploadLocalResults)); + remoteOptions.remoteUploadLocalResults, + verboseFailures)); if (env.getDirectories().getOutputBase().getFileSystem() instanceof RemoteExternalOverlayFileSystem remoteFs) { remoteFs.beforeCommand( diff --git a/src/main/java/com/google/devtools/build/lib/remote/RemoteRepoContentsCacheImpl.java b/src/main/java/com/google/devtools/build/lib/remote/RemoteRepoContentsCacheImpl.java index ee89cc45062863..c8a2c20a5f9fc5 100644 --- a/src/main/java/com/google/devtools/build/lib/remote/RemoteRepoContentsCacheImpl.java +++ b/src/main/java/com/google/devtools/build/lib/remote/RemoteRepoContentsCacheImpl.java @@ -24,6 +24,7 @@ import build.bazel.remote.execution.v2.Directory; import build.bazel.remote.execution.v2.Tree; import com.google.common.base.Splitter; +import com.google.common.base.Throwables; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableSet; import com.google.common.util.concurrent.Futures; @@ -95,6 +96,7 @@ public final class RemoteRepoContentsCacheImpl implements RemoteRepoContentsCach private final String commandId; private final boolean acceptCached; private final boolean uploadLocalResults; + private final boolean verboseFailures; private final DigestUtil digestUtil; private final Action baseAction; @@ -103,12 +105,14 @@ public RemoteRepoContentsCacheImpl( String buildRequestId, String commandId, boolean acceptCached, - boolean uploadLocalResults) { + boolean uploadLocalResults, + boolean verboseFailures) { this.buildRequestId = buildRequestId; this.commandId = commandId; this.cache = cache; this.acceptCached = acceptCached; this.uploadLocalResults = uploadLocalResults; + this.verboseFailures = verboseFailures; this.digestUtil = cache.digestUtil; this.baseAction = Action.newBuilder() @@ -151,7 +155,7 @@ public void addToCache( reporter.handle( Event.warn( "Failed to read marker file repo %s, skipping: %s" - .formatted(repoName, e.getMessage()))); + .formatted(repoName, maybeGetStackTrace(e)))); } var action = buildAction(predeclaredInputHash); var actionKey = new ActionKey(digestUtil.compute(action)); @@ -178,7 +182,7 @@ public void addToCache( reporter.handle( Event.warn( "Failed to upload repo contents to remote cache for repo %s: %s" - .formatted(repoName, e.getMessage()))); + .formatted(repoName, maybeGetStackTrace(e)))); } } @@ -189,6 +193,22 @@ public boolean lookupCache( String predeclaredInputHash, ExtendedEventHandler reporter) throws IOException, InterruptedException { + try { + return doLookupCache(repoName, repoDir, predeclaredInputHash, reporter); + } catch (IOException e) { + throw new IOException( + "Failed to look up repo %s in the remote repo contents cache: %s" + .formatted(repoName, maybeGetStackTrace(e)), + e); + } + } + + private boolean doLookupCache( + RepositoryName repoName, + Path repoDir, + String predeclaredInputHash, + ExtendedEventHandler reporter) + throws IOException, InterruptedException { if (!(repoDir.getFileSystem() instanceof RemoteExternalOverlayFileSystem remoteFs)) { return false; } @@ -246,7 +266,8 @@ public boolean lookupCache( markerFileContent = new String(markerFileContentFuture.get(), StandardCharsets.ISO_8859_1); repoDirectoryContent = repoDirectoryContentFuture.get(); } catch (ExecutionException e) { - throw new IllegalStateException("waitForBulkTransfer should have thrown", e); + throw new IllegalStateException( + "waitForBulkTransfer should have thrown: " + maybeGetStackTrace(e)); } var markerFileLines = Splitter.on('\n') @@ -294,6 +315,10 @@ private Action buildAction(String predeclaredInputHash) { .build(); } + private String maybeGetStackTrace(Exception e) { + return verboseFailures ? Throwables.getStackTraceAsString(e) : e.getMessage(); + } + private record RepoRemotePathResolver(Path fetchedRepoMarkerFile, Path fetchedRepoDir) implements RemotePathResolver { diff --git a/src/main/java/com/google/devtools/build/lib/remote/RepositoryRemoteHelpersFactoryImpl.java b/src/main/java/com/google/devtools/build/lib/remote/RepositoryRemoteHelpersFactoryImpl.java index 5d3d95a022534f..2b4035128f7b98 100644 --- a/src/main/java/com/google/devtools/build/lib/remote/RepositoryRemoteHelpersFactoryImpl.java +++ b/src/main/java/com/google/devtools/build/lib/remote/RepositoryRemoteHelpersFactoryImpl.java @@ -30,6 +30,7 @@ class RepositoryRemoteHelpersFactoryImpl implements RepositoryRemoteHelpersFacto private final String remoteInstanceName; private final boolean acceptCached; private final boolean uploadLocalResults; + private final boolean verboseFailures; RepositoryRemoteHelpersFactoryImpl( CombinedCache cache, @@ -38,7 +39,8 @@ class RepositoryRemoteHelpersFactoryImpl implements RepositoryRemoteHelpersFacto String commandId, String remoteInstanceName, boolean acceptCached, - boolean uploadLocalResults) { + boolean uploadLocalResults, + boolean verboseFailures) { this.cache = cache; this.remoteExecutor = remoteExecutor; this.buildRequestId = buildRequestId; @@ -46,6 +48,7 @@ class RepositoryRemoteHelpersFactoryImpl implements RepositoryRemoteHelpersFacto this.remoteInstanceName = remoteInstanceName; this.acceptCached = acceptCached; this.uploadLocalResults = uploadLocalResults; + this.verboseFailures = verboseFailures; } @Nullable @@ -68,6 +71,6 @@ public RepositoryRemoteExecutor createExecutor() { @Override public RemoteRepoContentsCache createRepoContentsCache() { return new RemoteRepoContentsCacheImpl( - cache, buildRequestId, commandId, acceptCached, uploadLocalResults); + cache, buildRequestId, commandId, acceptCached, uploadLocalResults, verboseFailures); } }