From 12d132387940c07e3499d918dbe45f30bed626fa Mon Sep 17 00:00:00 2001 From: Xdng Yng Date: Tue, 23 Jun 2026 18:05:24 +0200 Subject: [PATCH 1/8] Get the local and remote repo contents cache to work together * Also upload to the remote cache when the local cache is in use. The fix is simple but subtle: the logic for the two caches in `RepositoryFetchFunction` has to be flipped since the Skyframe restart after adding an entry to the local cache meant that the same code path would not be taken again. * Fix a crash when using both by ensuring that the local repo contents cache uses the file system backing the output base, not the workspace directory: ``` FATAL: bazel crashed due to an internal error. Printing stack trace: java.lang.RuntimeException: Unrecoverable error while evaluating node 'REPOSITORY_DIRECTORY:@@rules_python+' (requested by nodes 'REPO_FILE:@@rules_python+') at com.google.devtools.build.skyframe.AbstractParallelEvaluator$Evaluate.run(AbstractParallelEvaluator.java:552) at com.google.devtools.build.lib.concurrent.AbstractQueueVisitor$WrappedRunnable.run(AbstractQueueVisitor.java:435) at java.base/java.util.concurrent.ForkJoinTask$AdaptedRunnableAction.exec(Unknown Source) at java.base/java.util.concurrent.ForkJoinTask.doExec(Unknown Source) at java.base/java.util.concurrent.ForkJoinPool$WorkQueue.topLevelExec(Unknown Source) at java.base/java.util.concurrent.ForkJoinPool.runWorker(Unknown Source) at java.base/java.util.concurrent.ForkJoinWorkerThread.run(Unknown Source) Caused by: java.lang.IllegalArgumentException: Files are on different filesystems: C:/users/runneradmin/_bazel_runneradmin/ebfu7cpi/external/@rules_python+.marker (on com.google.devtools.build.lib.remote.RemoteExternalOverlayFileSystem@79583b9), C:/Users/runneradmin/.cache/bazel-repo/contents/_trash/26a5feef-bf8c-4326-bf3d-500997c7362e (on com.google.devtools.build.lib.windows.WindowsFileSystem@24180f0f) at com.google.devtools.build.lib.vfs.Path.checkSameFileSystem(Path.java:964) at com.google.devtools.build.lib.vfs.Path.renameTo(Path.java:630) at com.google.devtools.build.lib.vfs.FileSystemUtils.moveFile(FileSystemUtils.java:456) at com.google.devtools.build.lib.bazel.repository.cache.LocalRepoContentsCache.moveToCache(LocalRepoContentsCache.java:172) at com.google.devtools.build.lib.bazel.repository.RepositoryFetchFunction.compute(RepositoryFetchFunction.java:297) at com.google.devtools.build.skyframe.AbstractParallelEvaluator$Evaluate.run(AbstractParallelEvaluator.java:471) ``` Closes #28002. PiperOrigin-RevId: 855211557 Change-Id: I2f3c40a6aef594682fba989853f7ee982f30c294 (cherry picked from commit b143070b825db7c968ee6e881959473693e17980) --- .../build/lib/analysis/BlazeDirectories.java | 1 + .../lib/bazel/BazelRepositoryModule.java | 12 +++- .../RepositoryDelegatorFunction.java | 16 ++--- .../bzlmod/remote_repo_contents_cache_test.py | 65 +++++++++++++++++++ 4 files changed, 84 insertions(+), 10 deletions(-) diff --git a/src/main/java/com/google/devtools/build/lib/analysis/BlazeDirectories.java b/src/main/java/com/google/devtools/build/lib/analysis/BlazeDirectories.java index 83f417c9a34f7c..ad781f0a63ca87 100644 --- a/src/main/java/com/google/devtools/build/lib/analysis/BlazeDirectories.java +++ b/src/main/java/com/google/devtools/build/lib/analysis/BlazeDirectories.java @@ -141,6 +141,7 @@ public Path getWorkspace() { } /** Returns working directory of the server. */ + @Nullable public Path getWorkingDirectory() { return workspace; } diff --git a/src/main/java/com/google/devtools/build/lib/bazel/BazelRepositoryModule.java b/src/main/java/com/google/devtools/build/lib/bazel/BazelRepositoryModule.java index 98c8ac2f3bc402..e8d3ce7c49cd80 100644 --- a/src/main/java/com/google/devtools/build/lib/bazel/BazelRepositoryModule.java +++ b/src/main/java/com/google/devtools/build/lib/bazel/BazelRepositoryModule.java @@ -720,10 +720,18 @@ private String getAbsolutePath(String path, CommandEnvironment env) { */ @Nullable private Path toPath(PathFragment path, CommandEnvironment env) { - if (path.isEmpty() || env.getBlazeWorkspace().getWorkspace() == null) { + if (path.isEmpty() || env.getDirectories().getWorkspace() == null) { return null; } - return env.getBlazeWorkspace().getWorkspace().getRelative(path); + // It is important to use getWorkspace() here, not getWorkingDirectory(). Both Paths have the + // same underlying PathFragment, but may differ in their FileSystem if the remote repo contents + // cache is in use. getWorkspace() uses the same FileSystem as everything other than the + // workspace directory, while getWorkingDirectory() uses the workspace directory's FileSystem. + // Even though the users of the returned Path may end up writing to it, they are not expected to + // update source files within the workspace. Thus, the correct FileSystem is the one from + // getWorkspace(), which e.g. allows moves from the external directory under the output base to + // the local repo contents cache without crossing FileSystems. + return env.getDirectories().getWorkspace().getRelative(path); } @Override diff --git a/src/main/java/com/google/devtools/build/lib/rules/repository/RepositoryDelegatorFunction.java b/src/main/java/com/google/devtools/build/lib/rules/repository/RepositoryDelegatorFunction.java index bd8a1003028172..1a2a27ef7e3a5f 100644 --- a/src/main/java/com/google/devtools/build/lib/rules/repository/RepositoryDelegatorFunction.java +++ b/src/main/java/com/google/devtools/build/lib/rules/repository/RepositoryDelegatorFunction.java @@ -298,6 +298,14 @@ public SkyValue compute(SkyKey skyKey, Environment env) } digestWriter.writeMarkerFile(result.recordedInputValues()); if (result.reproducible() == Reproducibility.YES && !handler.isLocal(rule)) { + if (remoteRepoContentsCache != null) { + remoteRepoContentsCache.addToCache( + repositoryName, + repoRoot, + digestWriter.markerPath, + digestWriter.predeclaredInputHash, + env.getListener()); + } if (repoContentsCache.isEnabled()) { // This repo is eligible for the repo contents cache. Path cachedRepoDir; @@ -348,14 +356,6 @@ public SkyValue compute(SkyKey skyKey, Environment env) throw new IllegalStateException( "FileStateValue unexpectedly present for " + cachedRepoDir); } - if (remoteRepoContentsCache != null) { - remoteRepoContentsCache.addToCache( - repositoryName, - repoRoot, - digestWriter.markerPath, - digestWriter.predeclaredInputHash, - env.getListener()); - } } return new RepositoryDirectoryValue.Success( repoRoot, /* isFetchingDelayed= */ false, excludeRepoFromVendoring); diff --git a/src/test/py/bazel/bzlmod/remote_repo_contents_cache_test.py b/src/test/py/bazel/bzlmod/remote_repo_contents_cache_test.py index 7ebce79de93ee8..79727294164277 100644 --- a/src/test/py/bazel/bzlmod/remote_repo_contents_cache_test.py +++ b/src/test/py/bazel/bzlmod/remote_repo_contents_cache_test.py @@ -18,6 +18,7 @@ import json import os import re +import tempfile from absl.testing import absltest from src.test.py.bazel import test_base @@ -99,6 +100,70 @@ def testCachedAfterCleanExpunge(self): self.assertIn('JUST FETCHED', '\n'.join(stderr)) self.assertTrue(os.path.exists(os.path.join(repo_dir, 'BUILD'))) + def testLocalRepoContentsCacheInteraction(self): + self.ScratchFile( + 'MODULE.bazel', + [ + 'repo = use_repo_rule("//:repo.bzl", "repo")', + 'repo(name = "my_repo")', + ], + ) + self.ScratchFile('BUILD.bazel') + self.ScratchFile( + 'repo.bzl', + [ + 'def _repo_impl(rctx):', + ' rctx.file("BUILD", "filegroup(name=\'haha\')")', + ' print("JUST FETCHED")', + ' return rctx.repo_metadata(reproducible=True)', + 'repo = repository_rule(_repo_impl)', + ], + ) + + repo_dir = self.RepoDir('my_repo') + + # First fetch: not cached + repo_contents_cache = tempfile.mkdtemp(dir=os.environ['TEST_TMPDIR']) + _, _, stderr = self.RunBazel([ + 'build', + '@my_repo//:haha', + '--repo_contents_cache=' + repo_contents_cache, + ]) + self.assertIn('JUST FETCHED', '\n'.join(stderr)) + self.assertTrue(os.path.exists(os.path.join(repo_dir, 'BUILD'))) + + # After expunging: cached, hits the local repo contents cache + self.RunBazel(['clean', '--expunge']) + _, _, stderr = self.RunBazel([ + 'build', + '@my_repo//:haha', + '--repo_contents_cache=' + repo_contents_cache, + ]) + self.assertNotIn('JUST FETCHED', '\n'.join(stderr)) + self.assertTrue(os.path.exists(os.path.join(repo_dir, 'BUILD'))) + + # After cleaning out local repo contents cache: cached, hits the remote + # cache + self.RunBazel(['clean', '--expunge']) + # Deleting the cache fails on Windows, so we just use a different directory. + _, _, stderr = self.RunBazel([ + 'build', + '@my_repo//:haha', + '--repo_contents_cache=' + repo_contents_cache + '2', + ]) + self.assertNotIn('JUST FETCHED', '\n'.join(stderr)) + self.assertFalse(os.path.exists(os.path.join(repo_dir, 'BUILD'))) + + # After expunging, without using any repo contents cache: not cached + self.RunBazel(['clean', '--expunge']) + _, _, stderr = self.RunBazel([ + '--noexperimental_remote_repo_contents_cache', + 'build', + '@my_repo//:haha', + ]) + self.assertIn('JUST FETCHED', '\n'.join(stderr)) + self.assertTrue(os.path.exists(os.path.join(repo_dir, 'BUILD'))) + def testNotCachedWhenPredeclaredInputsChange(self): self.ScratchFile( 'MODULE.bazel', From 48ab596e7d35cb532531659716805b3e13f784b0 Mon Sep 17 00:00:00 2001 From: Fabian Meumertzheim Date: Tue, 23 Jun 2026 18:14:44 +0200 Subject: [PATCH 2/8] Clarify the invalidation of REPO_CONTENTS_CACHE_DIRS FileStateValues Since this behavior is quite surprising (it definitely was to the author), this change also improves the test coverage for repo contents cache deletion by asserting that non-BUILD files within it actually exist on disk rather than just exist from the point of Skyframe. Also fix a crash observed while working on the test improvements. Closes #28222. PiperOrigin-RevId: 855225639 Change-Id: Ie4a88e93d14a4f4b7bb5217fc924e998a1779ccd (cherry picked from commit 4839f46d9de21d2e5f59d1cf0975c2adc342a796) --- .../google/devtools/build/lib/skyframe/BUILD | 1 + .../lib/skyframe/DirtinessCheckerUtils.java | 4 +- .../lib/skyframe/ExternalFilesHelper.java | 16 +++--- .../lib/skyframe/SkyframeErrorProcessor.java | 11 +++- .../build/lib/skyframe/SkyframeExecutor.java | 2 + src/test/py/bazel/bzlmod/bazel_module_test.py | 30 +++++++++++ .../bazel/bzlmod/repo_contents_cache_test.py | 54 ++++++++++++------- 7 files changed, 91 insertions(+), 27 deletions(-) diff --git a/src/main/java/com/google/devtools/build/lib/skyframe/BUILD b/src/main/java/com/google/devtools/build/lib/skyframe/BUILD index 7a741b87cbf93b..f130dfbd247a1f 100644 --- a/src/main/java/com/google/devtools/build/lib/skyframe/BUILD +++ b/src/main/java/com/google/devtools/build/lib/skyframe/BUILD @@ -2537,6 +2537,7 @@ java_library( "//src/main/java/com/google/devtools/build/lib/analysis:analysis_cluster", "//src/main/java/com/google/devtools/build/lib/analysis:constraints/top_level_constraint_semantics", "//src/main/java/com/google/devtools/build/lib/analysis:view_creation_failed_exception", + "//src/main/java/com/google/devtools/build/lib/bazel/bzlmod:exception", "//src/main/java/com/google/devtools/build/lib/bugreport", "//src/main/java/com/google/devtools/build/lib/buildeventstream", "//src/main/java/com/google/devtools/build/lib/causes", diff --git a/src/main/java/com/google/devtools/build/lib/skyframe/DirtinessCheckerUtils.java b/src/main/java/com/google/devtools/build/lib/skyframe/DirtinessCheckerUtils.java index fd68caaf19fb8b..c6f310b42a7547 100644 --- a/src/main/java/com/google/devtools/build/lib/skyframe/DirtinessCheckerUtils.java +++ b/src/main/java/com/google/devtools/build/lib/skyframe/DirtinessCheckerUtils.java @@ -165,8 +165,10 @@ private static boolean isCacheableType(FileType fileType) { return true; case EXTERNAL_REPO: case OUTPUT: - case REPO_CONTENTS_CACHE_DIRS: return false; + case REPO_CONTENTS_CACHE_DIRS: + throw new IllegalStateException( + "Repo contents cache dirs are not expected to be checked for dirtiness"); } throw new AssertionError("Unknown type " + fileType); } diff --git a/src/main/java/com/google/devtools/build/lib/skyframe/ExternalFilesHelper.java b/src/main/java/com/google/devtools/build/lib/skyframe/ExternalFilesHelper.java index e3d59041344bda..b75e78cc79a8f7 100644 --- a/src/main/java/com/google/devtools/build/lib/skyframe/ExternalFilesHelper.java +++ b/src/main/java/com/google/devtools/build/lib/skyframe/ExternalFilesHelper.java @@ -180,14 +180,16 @@ public enum FileType { /** * The directory containing the repo contents cache entries as well as direct children * corresponding to individual predeclared input hashes. These directories are created by Bazel - * but may be deleted when users delete the entire repo contents cache. + * but may be deleted when users delete the entire repo contents cache. However, they are always + * recreated by Bazel before they are used and/or depended on via Skyframe. They are thus + * immutably present from the perspective of Skyframe and don't require invalidation. * - *

These files' handling differs from EXTERNAL_REPO as they are never modified after they are - * created and don't live under the external directory, as well as from EXTERNAL as they - * can be recreated by Bazel after diff detection. - * - *

The contents of these directories are considered EXTERNAL as they carry UUID names - * and are thus never reused. + *

Note: If these directories ever need to be checked for dirtiness during diffing, they have + * to be made non-cacheable according to {@link + * DirtinessCheckerUtils.ExternalDirtinessChecker#isCacheableType} so that they are not locked + * in as non-existent if they have been removed. This would result in FileValues for files below + * them (the actual repo contents, of type EXTERNAL) being locked in as non-existent too, + * even after a refetch of the repo has added a new cache entry. */ REPO_CONTENTS_CACHE_DIRS, } diff --git a/src/main/java/com/google/devtools/build/lib/skyframe/SkyframeErrorProcessor.java b/src/main/java/com/google/devtools/build/lib/skyframe/SkyframeErrorProcessor.java index 84284a27743514..9c8502a561fd89 100644 --- a/src/main/java/com/google/devtools/build/lib/skyframe/SkyframeErrorProcessor.java +++ b/src/main/java/com/google/devtools/build/lib/skyframe/SkyframeErrorProcessor.java @@ -38,6 +38,7 @@ import com.google.devtools.build.lib.actions.TestExecException; import com.google.devtools.build.lib.actions.TopLevelOutputException; import com.google.devtools.build.lib.analysis.AnalysisFailureEvent; +import com.google.devtools.build.lib.bazel.bzlmod.ExternalDepsException; import com.google.devtools.build.lib.analysis.ViewCreationFailedException; import com.google.devtools.build.lib.analysis.constraints.TopLevelConstraintSemantics.TargetCompatibilityCheckException; import com.google.devtools.build.lib.bugreport.BugReport; @@ -540,6 +541,13 @@ && isExecutionCycle(errorInfo.getCycleInfo())) { configurationIdMessage(ctKey.getConfigurationKey()), ((NoSuchThingException) exception).getDetailedExitCode()); analysisRootCauses = NestedSetBuilder.create(Order.STABLE_ORDER, analysisFailedCause); + } else if (exception instanceof ExternalDepsException externalDepsException) { + AnalysisFailedCause analysisFailedCause = + new AnalysisFailedCause( + topLevelLabel, + configurationIdMessage(ctKey.getConfigurationKey()), + externalDepsException.getDetailedExitCode()); + analysisRootCauses = NestedSetBuilder.create(Order.STABLE_ORDER, analysisFailedCause); } else if (exception instanceof TargetCompatibilityCheckException) { analysisRootCauses = NestedSetBuilder.emptySet(Order.STABLE_ORDER); } else if (isExecutionException(exception)) { @@ -779,7 +787,8 @@ private static DetailedException convertToAnalysisException(Throwable cause) { // analyze with --nokeep_going. if (cause instanceof SaneAnalysisException || cause instanceof NoSuchTargetException - || cause instanceof NoSuchPackageException) { + || cause instanceof NoSuchPackageException + || cause instanceof ExternalDepsException) { return (DetailedException) cause; } return null; diff --git a/src/main/java/com/google/devtools/build/lib/skyframe/SkyframeExecutor.java b/src/main/java/com/google/devtools/build/lib/skyframe/SkyframeExecutor.java index 0eb6797f0b275a..5e5f012d5452e6 100644 --- a/src/main/java/com/google/devtools/build/lib/skyframe/SkyframeExecutor.java +++ b/src/main/java/com/google/devtools/build/lib/skyframe/SkyframeExecutor.java @@ -3469,6 +3469,8 @@ protected void handleDiffsWithMissingDiffInformation( fileTypesToCheck.add(FileType.OUTPUT); } if (!fileTypesToCheck.isEmpty()) { + // FileType.REPO_CONTENTS_CACHE_DIRS is intentionally never checked here. See the comment on + // that enum constant for details. dirtinessCheckers = Iterables.concat( dirtinessCheckers, diff --git a/src/test/py/bazel/bzlmod/bazel_module_test.py b/src/test/py/bazel/bzlmod/bazel_module_test.py index ed092907c07e8a..5d1e11037cc48d 100644 --- a/src/test/py/bazel/bzlmod/bazel_module_test.py +++ b/src/test/py/bazel/bzlmod/bazel_module_test.py @@ -1540,6 +1540,36 @@ def testUnknownRegistryInModuleMirrors(self): ) self.assertIn('https://unknown-registry.example.com', stderr) + def testInvalidRepoRuleReferencedByTargetDoesNotCrash(self): + self.ScratchFile( + 'MODULE.bazel', + [ + 'repo = use_repo_rule("//:repo.bzl", "repo")', + 'repo(name = "my_repo")', + ], + ) + self.ScratchFile( + 'BUILD', + [ + 'genrule(', + ' name = "gen",', + ' srcs = ["@my_repo//:a.txt"],', + ' outs = ["out.txt"],', + ' cmd = "cat $(SRCS) > $(OUTS)",', + ')', + ], + ) + self.ScratchFile('repo.bzl', ['nonsense']) + + exit_code, _, stderr = self.RunBazel( + ['build', '//:gen'], + allow_failure=True, + ) + self.AssertNotExitCode(exit_code, 0, stderr) + stderr = '\n'.join(stderr) + self.assertNotIn('FATAL', stderr) + self.assertIn("compilation of module 'repo.bzl' failed", stderr) + if __name__ == '__main__': absltest.main() diff --git a/src/test/py/bazel/bzlmod/repo_contents_cache_test.py b/src/test/py/bazel/bzlmod/repo_contents_cache_test.py index 961323f952594a..60ea22b616d088 100644 --- a/src/test/py/bazel/bzlmod/repo_contents_cache_test.py +++ b/src/test/py/bazel/bzlmod/repo_contents_cache_test.py @@ -503,77 +503,93 @@ def doTestRepoContentsCacheDeleted(self, check_external_repository_files): 'repo(name = "my_repo")', ], ) - self.ScratchFile('workspace/BUILD.bazel') + self.ScratchFile( + 'workspace/BUILD.bazel', + [ + 'genrule(', + ' name = "gen",', + ' srcs = ["@my_repo//:haha", "in.txt"],', + ' outs = ["out.txt"],', + ' cmd = "cat $(SRCS) > $(OUTS)",', + ')', + ], + ) self.ScratchFile( 'workspace/repo.bzl', [ 'def _repo_impl(rctx):', - ' rctx.file("BUILD", "filegroup(name=\'haha\')")', + ( + ' rctx.file("BUILD", "filegroup(name=\'haha\',' + " srcs=['a.txt'], visibility=['//visibility:public'])\")" + ), + ' rctx.file("a.txt", "hello world")', ' print("JUST FETCHED")', ' return rctx.repo_metadata(reproducible=True)', 'repo = repository_rule(_repo_impl)', ], ) # First fetch: not cached + self.ScratchFile('workspace/in.txt', ['1']) _, _, stderr = self.RunBazel( [ 'build', - '@my_repo//:haha', + '//:gen', ] + extra_args, cwd=workspace, ) self.assertIn('JUST FETCHED', '\n'.join(stderr)) + with open(os.path.join(workspace, 'bazel-bin/out.txt'), 'r') as f: + self.assertEqual(f.read(), 'hello world1\n') # Second fetch: cached + self.ScratchFile('workspace/in.txt', ['2']) _, _, stderr = self.RunBazel( [ 'build', - '@my_repo//:haha', + '//:gen', ] + extra_args, cwd=workspace, ) self.assertNotIn('JUST FETCHED', '\n'.join(stderr)) + with open(os.path.join(workspace, 'bazel-bin/out.txt'), 'r') as f: + self.assertEqual(f.read(), 'hello world2\n') # Delete the entire repo contents cache and fetch again: not cached # Avoid access denied on Windows due to files being read-only by moving to # a different location instead. os.rename(repo_contents_cache, repo_contents_cache + '_deleted') + self.ScratchFile('workspace/in.txt', ['3']) _, _, stderr = self.RunBazel( - [ - 'build', - '@my_repo//:haha', - ] - + extra_args, + ['build', '//:gen'] + extra_args, cwd=workspace, ) stderr = '\n'.join(stderr) self.assertIn('JUST FETCHED', stderr) self.assertNotIn('WARNING', stderr) + with open(os.path.join(workspace, 'bazel-bin/out.txt'), 'r') as f: + self.assertEqual(f.read(), 'hello world3\n') # Second fetch after deletion: cached + self.ScratchFile('workspace/in.txt', ['4']) _, _, stderr = self.RunBazel( - [ - 'build', - '@my_repo//:haha', - ] - + extra_args, + ['build', '//:gen'] + extra_args, cwd=workspace, ) self.assertNotIn('JUST FETCHED', '\n'.join(stderr)) self.assertNotIn('WARNING', '\n'.join(stderr)) + with open(os.path.join(workspace, 'bazel-bin/out.txt'), 'r') as f: + self.assertEqual(f.read(), 'hello world4\n') # Delete the entire repo contents cache and fetch again with a different # path: not cached # Avoid access denied on Windows due to files being read-only by moving to # a different location instead. os.rename(repo_contents_cache, repo_contents_cache + '_deleted_again') + self.ScratchFile('workspace/in.txt', ['5']) _, _, stderr = self.RunBazel( - [ - 'build', - '@my_repo//:haha', - ] + ['build', '//:gen'] + extra_args + [ '--repo_contents_cache=%s' % repo_contents_cache + '2', @@ -583,6 +599,8 @@ def doTestRepoContentsCacheDeleted(self, check_external_repository_files): stderr = '\n'.join(stderr) self.assertIn('JUST FETCHED', stderr) self.assertNotIn('WARNING', stderr) + with open(os.path.join(workspace, 'bazel-bin/out.txt'), 'r') as f: + self.assertEqual(f.read(), 'hello world5\n') def testRepoContentsCacheDeleted_withCheckExternalRepositoryFiles(self): self.doTestRepoContentsCacheDeleted(check_external_repository_files=True) From 7ea07542eecb3805700a0763296a0f87e68986d8 Mon Sep 17 00:00:00 2001 From: Fabian Meumertzheim Date: Fri, 13 Mar 2026 19:02:56 +0100 Subject: [PATCH 3/8] [8.7.0] Manual port of essential parts of: Fix and consolidate repo env handling Ports the essential API changes from 01407ce758 needed by later feature commits: - Add EnvironmentVariableValue record type - Add RepoEnvironmentFunction with REPO_ENV + client env fallback - Register REPOSITORY_ENVIRONMENT_VARIABLE in SkyFunctions and SkyframeExecutor - Update EnvVar.getSkyKey() to use RepoEnvironmentFunction - Update EnvVar.isOutdated() to use EnvironmentVariableValue On 8.7.0, RepoEnvironmentFunction checks --repo_env first, then falls back to the client environment via ClientEnvironmentFunction, since the consolidated repo env computation from CommandEnvironment is not present. (cherry picked from commit 01407ce758) --- .../com/google/devtools/build/lib/rules/BUILD | 4 +- .../rules/repository/RepoRecordedInput.java | 13 +- .../google/devtools/build/lib/skyframe/BUILD | 29 +++++ .../skyframe/EnvironmentVariableValue.java | 28 +++++ .../lib/skyframe/RepoEnvironmentFunction.java | 112 ++++++++++++++++++ .../build/lib/skyframe/SkyFunctions.java | 2 + .../build/lib/skyframe/SkyframeExecutor.java | 1 + .../build/lib/skyframe/packages/BUILD | 1 + .../skyframe/packages/BazelPackageLoader.java | 2 + 9 files changed, 184 insertions(+), 8 deletions(-) create mode 100644 src/main/java/com/google/devtools/build/lib/skyframe/EnvironmentVariableValue.java create mode 100644 src/main/java/com/google/devtools/build/lib/skyframe/RepoEnvironmentFunction.java diff --git a/src/main/java/com/google/devtools/build/lib/rules/BUILD b/src/main/java/com/google/devtools/build/lib/rules/BUILD index 453d06ec3ba748..65e8a78f9d27ff 100644 --- a/src/main/java/com/google/devtools/build/lib/rules/BUILD +++ b/src/main/java/com/google/devtools/build/lib/rules/BUILD @@ -340,8 +340,8 @@ java_library( "//src/main/java/com/google/devtools/build/lib/analysis:blaze_directories", "//src/main/java/com/google/devtools/build/lib/cmdline", "//src/main/java/com/google/devtools/build/lib/cmdline:LabelValidator", - "//src/main/java/com/google/devtools/build/lib/skyframe:action_environment_function", - "//src/main/java/com/google/devtools/build/lib/skyframe:client_environment_value", + "//src/main/java/com/google/devtools/build/lib/skyframe:environment_variable_value", + "//src/main/java/com/google/devtools/build/lib/skyframe:repo_environment_function", "//src/main/java/com/google/devtools/build/lib/skyframe:directory_listing_value", "//src/main/java/com/google/devtools/build/lib/skyframe:directory_tree_digest_value", "//src/main/java/com/google/devtools/build/lib/skyframe:precomputed_value", diff --git a/src/main/java/com/google/devtools/build/lib/rules/repository/RepoRecordedInput.java b/src/main/java/com/google/devtools/build/lib/rules/repository/RepoRecordedInput.java index 055b4e329ac32a..39a5b43e444dae 100644 --- a/src/main/java/com/google/devtools/build/lib/rules/repository/RepoRecordedInput.java +++ b/src/main/java/com/google/devtools/build/lib/rules/repository/RepoRecordedInput.java @@ -31,8 +31,8 @@ import com.google.devtools.build.lib.cmdline.LabelSyntaxException; import com.google.devtools.build.lib.cmdline.LabelValidator; import com.google.devtools.build.lib.cmdline.RepositoryName; -import com.google.devtools.build.lib.skyframe.ActionEnvironmentFunction; -import com.google.devtools.build.lib.skyframe.ClientEnvironmentValue; +import com.google.devtools.build.lib.skyframe.EnvironmentVariableValue; +import com.google.devtools.build.lib.skyframe.RepoEnvironmentFunction; import com.google.devtools.build.lib.skyframe.DirectoryListingValue; import com.google.devtools.build.lib.skyframe.DirectoryTreeDigestValue; import com.google.devtools.build.lib.skyframe.PrecomputedValue; @@ -625,17 +625,18 @@ public String toStringInternal() { @Override public SkyKey getSkyKey(BlazeDirectories directories) { - return ActionEnvironmentFunction.key(name); + return RepoEnvironmentFunction.key(name); } @Override public Optional isOutdated( Environment env, BlazeDirectories directories, @Nullable String oldValue) throws InterruptedException { - String v = PrecomputedValue.REPO_ENV.get(env).get(name); - if (v == null) { - v = ((ClientEnvironmentValue) env.getValue(getSkyKey(directories))).getValue(); + var envValue = (EnvironmentVariableValue) env.getValue(getSkyKey(directories)); + if (envValue == null) { + return Optional.empty(); } + String v = envValue.value(); // Note that `oldValue` can be null if the env var was not set. if (!Objects.equals(oldValue, v)) { return Optional.of( diff --git a/src/main/java/com/google/devtools/build/lib/skyframe/BUILD b/src/main/java/com/google/devtools/build/lib/skyframe/BUILD index f130dfbd247a1f..4100aa78ef4df4 100644 --- a/src/main/java/com/google/devtools/build/lib/skyframe/BUILD +++ b/src/main/java/com/google/devtools/build/lib/skyframe/BUILD @@ -110,6 +110,7 @@ java_library( ":cached_bzl_load_value_and_deps_builder_factory", ":client_environment_function", ":client_environment_value", + ":environment_variable_value", ":collect_packages_under_directory_function", ":collect_packages_under_directory_value", ":collect_targets_in_package_function", @@ -173,6 +174,7 @@ java_library( ":recursive_package_provider_backed_target_pattern_resolver", ":recursive_pkg_function", ":recursive_pkg_value", + ":repo_environment_function", ":repo_file_function", ":repo_package_args_function", ":repository_mapping_function", @@ -1025,6 +1027,16 @@ java_library( ], ) +java_library( + name = "environment_variable_value", + srcs = ["EnvironmentVariableValue.java"], + deps = [ + "//src/main/java/com/google/devtools/build/lib/skyframe/serialization/autocodec", + "//src/main/java/com/google/devtools/build/skyframe:skyframe-objects", + "//third_party:jsr305", + ], +) + java_library( name = "collect_packages_under_directory_function", srcs = ["CollectPackagesUnderDirectoryFunction.java"], @@ -2335,6 +2347,23 @@ java_library( ], ) +java_library( + name = "repo_environment_function", + srcs = ["RepoEnvironmentFunction.java"], + deps = [ + ":client_environment_function", + ":client_environment_value", + ":environment_variable_value", + ":precomputed_value", + ":sky_functions", + "//src/main/java/com/google/devtools/build/lib/skyframe/serialization:visible-for-serialization", + "//src/main/java/com/google/devtools/build/lib/skyframe/serialization/autocodec", + "//src/main/java/com/google/devtools/build/skyframe:skyframe-objects", + "//third_party:guava", + "//third_party:jsr305", + ], +) + java_library( name = "repo_file_function", srcs = ["RepoFileFunction.java"], diff --git a/src/main/java/com/google/devtools/build/lib/skyframe/EnvironmentVariableValue.java b/src/main/java/com/google/devtools/build/lib/skyframe/EnvironmentVariableValue.java new file mode 100644 index 00000000000000..eea16f29607648 --- /dev/null +++ b/src/main/java/com/google/devtools/build/lib/skyframe/EnvironmentVariableValue.java @@ -0,0 +1,28 @@ +// Copyright 2026 The Bazel Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package com.google.devtools.build.lib.skyframe; + +import com.google.devtools.build.lib.skyframe.serialization.autocodec.AutoCodec; +import com.google.devtools.build.skyframe.SkyValue; +import javax.annotation.Nullable; + +/** + * The value of an environmental variable from an environment (client env, action env or repository + * env). These are invalidated and injected by {@link SequencedSkyframeExecutor}. + * + * @param value the value in the client environment or null if unset in the environment. + */ +@AutoCodec +public record EnvironmentVariableValue(@Nullable String value) implements SkyValue {} diff --git a/src/main/java/com/google/devtools/build/lib/skyframe/RepoEnvironmentFunction.java b/src/main/java/com/google/devtools/build/lib/skyframe/RepoEnvironmentFunction.java new file mode 100644 index 00000000000000..ffddeed475ab4a --- /dev/null +++ b/src/main/java/com/google/devtools/build/lib/skyframe/RepoEnvironmentFunction.java @@ -0,0 +1,112 @@ +// Copyright 2026 The Bazel Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package com.google.devtools.build.lib.skyframe; + +import com.google.common.collect.Collections2; +import com.google.common.collect.ImmutableSortedMap; +import com.google.devtools.build.lib.skyframe.serialization.VisibleForSerialization; +import com.google.devtools.build.lib.skyframe.serialization.autocodec.AutoCodec; +import com.google.devtools.build.skyframe.AbstractSkyKey; +import com.google.devtools.build.skyframe.SkyFunction; +import com.google.devtools.build.skyframe.SkyFunctionName; +import com.google.devtools.build.skyframe.SkyKey; +import com.google.devtools.build.skyframe.SkyValue; +import com.google.devtools.build.skyframe.SkyframeLookupResult; +import java.util.Map; +import java.util.Optional; +import java.util.Set; +import javax.annotation.Nullable; + +/** + * Skyframe function that provides the effective value for an environment variable in the context of + * repository rules and module extensions. This checks {@code --repo_env} overrides first, then falls + * back to the client environment. + */ +public final class RepoEnvironmentFunction implements SkyFunction { + @Override + public SkyValue compute(SkyKey skyKey, Environment env) throws InterruptedException { + Map repoEnv = PrecomputedValue.REPO_ENV.get(env); + String key = (String) skyKey.argument(); + if (repoEnv.containsKey(key)) { + return new EnvironmentVariableValue(repoEnv.get(key)); + } + // Fall back to the client environment. + ClientEnvironmentValue clientValue = + (ClientEnvironmentValue) env.getValue(ClientEnvironmentFunction.key(key)); + if (clientValue == null) { + return null; + } + return new EnvironmentVariableValue(clientValue.getValue()); + } + + /** Returns the SkyKey to invoke this function for the environment variable {@code variable}. */ + public static Key key(String variable) { + return Key.create(variable); + } + + @VisibleForSerialization + @AutoCodec + static class Key extends AbstractSkyKey { + private static final SkyKeyInterner interner = SkyKey.newInterner(); + + private Key(String arg) { + super(arg); + } + + private static Key create(String arg) { + return interner.intern(new Key(arg)); + } + + @VisibleForSerialization + @AutoCodec.Interner + static Key intern(Key key) { + return interner.intern(key); + } + + @Override + public SkyFunctionName functionName() { + return SkyFunctions.REPOSITORY_ENVIRONMENT_VARIABLE; + } + + @Override + public SkyKeyInterner getSkyKeyInterner() { + return interner; + } + } + + /** + * Returns a map of environment variable key => values, getting them from Skyframe. Returns null + * if and only if some dependencies from Skyframe still need to be resolved. + */ + @Nullable + public static ImmutableSortedMap> getEnvironmentView( + Environment env, Set keys) throws InterruptedException { + var skyKeys = Collections2.transform(keys, RepoEnvironmentFunction::key); + SkyframeLookupResult values = env.getValuesAndExceptions(skyKeys); + if (env.valuesMissing()) { + return null; + } + + var result = ImmutableSortedMap.>naturalOrder(); + for (var key : skyKeys) { + var value = (EnvironmentVariableValue) values.get(key); + if (value == null) { + return null; + } + result.put(key.argument().toString(), Optional.ofNullable(value.value())); + } + return result.buildOrThrow(); + } +} diff --git a/src/main/java/com/google/devtools/build/lib/skyframe/SkyFunctions.java b/src/main/java/com/google/devtools/build/lib/skyframe/SkyFunctions.java index 5a06e025aa420c..b7fc0c162a8f9e 100644 --- a/src/main/java/com/google/devtools/build/lib/skyframe/SkyFunctions.java +++ b/src/main/java/com/google/devtools/build/lib/skyframe/SkyFunctions.java @@ -26,6 +26,8 @@ public final class SkyFunctions { static final SkyFunctionName ACTION_SKETCH = SkyFunctionName.createHermetic("ACTION_SKETCH"); public static final SkyFunctionName ACTION_ENVIRONMENT_VARIABLE = SkyFunctionName.createHermetic("ACTION_ENVIRONMENT_VARIABLE"); + public static final SkyFunctionName REPOSITORY_ENVIRONMENT_VARIABLE = + SkyFunctionName.createHermetic("REPOSITORY_ENVIRONMENT_VARIABLE"); public static final SkyFunctionName DIRECTORY_LISTING_STATE = SkyFunctionName.createNonHermetic("DIRECTORY_LISTING_STATE"); public static final SkyFunctionName DIRECTORY_LISTING = diff --git a/src/main/java/com/google/devtools/build/lib/skyframe/SkyframeExecutor.java b/src/main/java/com/google/devtools/build/lib/skyframe/SkyframeExecutor.java index 5e5f012d5452e6..79c92ea8506b92 100644 --- a/src/main/java/com/google/devtools/build/lib/skyframe/SkyframeExecutor.java +++ b/src/main/java/com/google/devtools/build/lib/skyframe/SkyframeExecutor.java @@ -640,6 +640,7 @@ private ImmutableMap skyFunctions() { map.put(SkyFunctions.PRECOMPUTED, new PrecomputedFunction()); map.put(SkyFunctions.CLIENT_ENVIRONMENT_VARIABLE, new ClientEnvironmentFunction(clientEnv)); map.put(SkyFunctions.ACTION_ENVIRONMENT_VARIABLE, new ActionEnvironmentFunction()); + map.put(SkyFunctions.REPOSITORY_ENVIRONMENT_VARIABLE, new RepoEnvironmentFunction()); map.put(FileStateKey.FILE_STATE, newFileStateFunction()); map.put(SkyFunctions.DIRECTORY_LISTING_STATE, newDirectoryListingStateFunction()); map.put(FileSymlinkCycleUniquenessFunction.NAME, new FileSymlinkCycleUniquenessFunction()); diff --git a/src/main/java/com/google/devtools/build/lib/skyframe/packages/BUILD b/src/main/java/com/google/devtools/build/lib/skyframe/packages/BUILD index a86993c2995fba..5bb1b2b50084a8 100644 --- a/src/main/java/com/google/devtools/build/lib/skyframe/packages/BUILD +++ b/src/main/java/com/google/devtools/build/lib/skyframe/packages/BUILD @@ -102,6 +102,7 @@ java_library( "//src/main/java/com/google/devtools/build/lib/rules:repository/repository_function", "//src/main/java/com/google/devtools/build/lib/skyframe:action_environment_function", "//src/main/java/com/google/devtools/build/lib/skyframe:client_environment_function", + "//src/main/java/com/google/devtools/build/lib/skyframe:repo_environment_function", "//src/main/java/com/google/devtools/build/lib/skyframe:directory_listing_function", "//src/main/java/com/google/devtools/build/lib/skyframe:package_lookup_function", "//src/main/java/com/google/devtools/build/lib/skyframe:precomputed_value", diff --git a/src/main/java/com/google/devtools/build/lib/skyframe/packages/BazelPackageLoader.java b/src/main/java/com/google/devtools/build/lib/skyframe/packages/BazelPackageLoader.java index 4bf99d967d5a91..da5bd0113f6daa 100644 --- a/src/main/java/com/google/devtools/build/lib/skyframe/packages/BazelPackageLoader.java +++ b/src/main/java/com/google/devtools/build/lib/skyframe/packages/BazelPackageLoader.java @@ -48,6 +48,7 @@ import com.google.devtools.build.lib.skyframe.PackageFunction.ActionOnIOExceptionReadingBuildFile; import com.google.devtools.build.lib.skyframe.PackageLookupFunction.CrossRepositoryLabelViolationStrategy; import com.google.devtools.build.lib.skyframe.PrecomputedValue; +import com.google.devtools.build.lib.skyframe.RepoEnvironmentFunction; import com.google.devtools.build.lib.skyframe.RepositoryMappingFunction; import com.google.devtools.build.lib.skyframe.SkyFunctions; import com.google.devtools.build.lib.vfs.Path; @@ -198,6 +199,7 @@ public void handle(Event event) {} SkyFunctions.DIRECTORY_LISTING_STATE, new DirectoryListingStateFunction(externalFilesHelper, SyscallCache.NO_CACHE)) .put(SkyFunctions.ACTION_ENVIRONMENT_VARIABLE, new ActionEnvironmentFunction()) + .put(SkyFunctions.REPOSITORY_ENVIRONMENT_VARIABLE, new RepoEnvironmentFunction()) .put(SkyFunctions.DIRECTORY_LISTING, new DirectoryListingFunction()) .put( SkyFunctions.LOCAL_REPOSITORY_LOOKUP, From 6823709f83e74dbc88d9cecddfe60000826856e8 Mon Sep 17 00:00:00 2001 From: Fabian Meumertzheim Date: Tue, 23 Jun 2026 13:57:15 +0200 Subject: [PATCH 4/8] [8.7.0] Add regression test for repo-env over-invalidation Adds the integration test from https://github.com/bazelbuild/bazel/pull/29946 (test_unrelated_env_var_does_not_invalidate_repo): changing an unrelated environment variable via an interleaved `bazel mod` command must not refetch a repository that doesn't depend on it. PR #29946's production fix (inject the repo env per-variable, dropping the whole-map PrecomputedValue.REPO_ENV) is NOT needed on this branch: the manual repo-env port here injects only the narrow repoEnvFromOptions (PATH/PATHEXT + --action_env + --repo_env) into REPO_ENV and falls back to the already per-variable ClientEnvironmentFunction for everything else. An unrelated client variable therefore never mutates the whole-map node, so the test passes as-is. The regression the PR addresses only exists upstream, where the entire client environment is folded into REPO_ENV via getRepoEnv(). --- .../shell/bazel/starlark_repository_test.sh | 67 +++++++++++++++++++ 1 file changed, 67 insertions(+) diff --git a/src/test/shell/bazel/starlark_repository_test.sh b/src/test/shell/bazel/starlark_repository_test.sh index 72404b23dba234..bbbbab8631ab14 100755 --- a/src/test/shell/bazel/starlark_repository_test.sh +++ b/src/test/shell/bazel/starlark_repository_test.sh @@ -801,6 +801,73 @@ function test_starlark_repository_environ_invalidation_action_env_batch() { environ_invalidation_action_env_test_template --batch } +# Regression test for the repository environment being a single whole-map Skyframe +# node (introduced together with RepoEnvironmentFunction): changing one environment +# variable invalidated every repository rule and module extension, even ones that +# don't depend on that variable. +# +# In particular, running an interleaved command such as `bazel mod` with an +# environment variable changed that the build doesn't even read (e.g. TERM, set +# differently by an IDE/BSP server than by the user's shell) caused the next build +# to spuriously refetch repositories and discard the analysis cache. +# +# The repo here only depends on FOO, yet an interleaved `mod` invocation with an +# unrelated variable OTHER changed must not cause it to be refetched. +function test_unrelated_env_var_does_not_invalidate_repo() { + local execution_file="${TEST_TMPDIR}/execution" + echo 0 > "${execution_file}" + + cat > $(setup_module_dot_bazel) <<'EOF' +ext = use_extension("//:ext.bzl", "ext") +use_repo(ext, "foo") +EOF + cat > ext.bzl < ${execution_file}" % count]) + repository_ctx.file( + "BUILD", "filegroup(name = 'bar', visibility = ['//visibility:public'])") + repository_ctx.file("REPO.bazel", "") + +# A local repo is refetched whenever its node is recomputed, which makes spurious +# invalidation directly observable through the fetch counter. +repo = repository_rule(implementation = _repo_impl, local = True) + +def _ext_impl(module_ctx): + repo(name = "foo") + +ext = module_extension(implementation = _ext_impl) +EOF + cat > BUILD <<'EOF' +filegroup(name = "t", srcs = ["@foo//:bar"]) +EOF + + # Initial fetch. + FOO=1 OTHER=a bazel build //:t >& $TEST_log || fail "Failed to build" + assert_equals 1 $(cat "${execution_file}") + # Rebuilding with the same environment must not refetch. + FOO=1 OTHER=a bazel build //:t >& $TEST_log || fail "Failed to build" + assert_equals 1 $(cat "${execution_file}") + + # Evaluate the whole module/repo graph with an unrelated variable (OTHER) + # changed, then build again with the original environment. The repo only reads + # FOO, so it must not be refetched. + FOO=1 OTHER=b bazel mod graph >& $TEST_log || fail "Failed to run mod" + FOO=1 OTHER=a bazel build //:t >& $TEST_log || fail "Failed to build" + assert_equals 1 $(cat "${execution_file}") + + # A second interleaving with yet another value must likewise not refetch. + FOO=1 OTHER=c bazel mod graph >& $TEST_log || fail "Failed to run mod" + FOO=1 OTHER=a bazel build //:t >& $TEST_log || fail "Failed to build" + assert_equals 1 $(cat "${execution_file}") + + # Sanity check: changing FOO, which the repo does depend on, must refetch. + FOO=2 OTHER=a bazel build //:t >& $TEST_log || fail "Failed to build" + assert_equals 2 $(cat "${execution_file}") +} + # Test invalidation based on change to the bzl files function bzl_invalidation_test_template() { local startup_flag="${1-}" From d73e1b1d30f74d80170ad3cbcdc24b24de294f2c Mon Sep 17 00:00:00 2001 From: Fabian Meumertzheim Date: Tue, 23 Jun 2026 23:02:22 +0200 Subject: [PATCH 5/8] Fix cycles when checking the local repo contents cache Fixes #27517 by checking Skyframe deps in batches that stop right before any dep that may cause a cycle if checked while previous deps are out-of-date. This is accompanied by a restructuring of `RepoRecordedInput` that consolidates all Skyframe logic associated with the computation of the corresponding value exclusively within that class. This will also be helpful in adding support for dynamic inputs to the remote repo contents cache in future work. The upstream commit additionally moved the entirety of `RepositoryFetchFunction` to Skyframe workers so that checking the up-to-dateness of local repo contents cache entries isn't quadratic. That worker refactor is omitted in this 8.x backport, which keeps the existing split `RepositoryDelegatorFunction`/`StarlarkRepositoryFunction` architecture and restart-based evaluation; the cycle fix and the `RepoRecordedInput` restructuring are applied on that model. The batched up-to-dateness check is restart-safe (`isAnyValueOutdated` short-circuits while its batch's values are still missing). Closes #28206. Co-authored-by: Xudong Yang PiperOrigin-RevId: 855252657 Change-Id: Ica18760ae79da5155fc0f3d8cd4f24c52a034c86 (cherry picked from commit 72a25a9cfe857fd8b55ed834c316b1805d41078f) --- .../bazel/bzlmod/ModuleExtensionContext.java | 2 +- .../bzlmod/SingleExtensionEvalFunction.java | 16 +- .../cache/LocalRepoContentsCache.java | 6 +- .../starlark/StarlarkBaseExternalContext.java | 59 ++- .../starlark/StarlarkRepositoryContext.java | 11 +- .../rules/repository/RepoRecordedInput.java | 429 ++++++++++++------ .../RepositoryDelegatorFunction.java | 5 +- .../rules/repository/RepositoryFunction.java | 19 +- .../repository/RepoRecordedInputTest.java | 25 +- src/test/py/bazel/bzlmod/bazel_module_test.py | 60 +++ .../bazel/bzlmod/repo_contents_cache_test.py | 7 +- 11 files changed, 432 insertions(+), 207 deletions(-) diff --git a/src/main/java/com/google/devtools/build/lib/bazel/bzlmod/ModuleExtensionContext.java b/src/main/java/com/google/devtools/build/lib/bazel/bzlmod/ModuleExtensionContext.java index 5c7b8de8a1d6c7..812fe8f27b7e08 100644 --- a/src/main/java/com/google/devtools/build/lib/bazel/bzlmod/ModuleExtensionContext.java +++ b/src/main/java/com/google/devtools/build/lib/bazel/bzlmod/ModuleExtensionContext.java @@ -91,7 +91,7 @@ protected ModuleExtensionContext( this.rootModuleHasNonDevDependency = rootModuleHasNonDevDependency; // Record inputs to the extension that are known prior to evaluation. RepoRecordedInput.EnvVar.wrap(staticEnvVars) - .forEach((input, value) -> recordInput(input, value.orElse(null))); + .forEach((input, value) -> recordInputWithValue(input, value.orElse(null))); repoMappingRecorder.record(staticRepoMappingEntries); } diff --git a/src/main/java/com/google/devtools/build/lib/bazel/bzlmod/SingleExtensionEvalFunction.java b/src/main/java/com/google/devtools/build/lib/bazel/bzlmod/SingleExtensionEvalFunction.java index c83afc25132cf3..cd9e163f8d8f9d 100644 --- a/src/main/java/com/google/devtools/build/lib/bazel/bzlmod/SingleExtensionEvalFunction.java +++ b/src/main/java/com/google/devtools/build/lib/bazel/bzlmod/SingleExtensionEvalFunction.java @@ -395,12 +395,18 @@ private static Optional didRecordedInputsChange( BlazeDirectories directories, List recordedInputs) throws InterruptedException, NeedsSkyframeRestartException { - Optional outdated = - RepoRecordedInput.isAnyValueOutdated(env, directories, recordedInputs); - if (env.valuesMissing()) { - throw new NeedsSkyframeRestartException(); + // Check inputs in batches to prevent Skyframe cycles caused by outdated dependencies. + for (ImmutableList batch : + RepoRecordedInput.WithValue.splitIntoBatches(recordedInputs)) { + Optional outdated = RepoRecordedInput.isAnyValueOutdated(env, directories, batch); + if (env.valuesMissing()) { + throw new NeedsSkyframeRestartException(); + } + if (outdated.isPresent()) { + return outdated; + } } - return outdated; + return Optional.empty(); } private SingleExtensionValue createSingleExtensionValue( diff --git a/src/main/java/com/google/devtools/build/lib/bazel/repository/cache/LocalRepoContentsCache.java b/src/main/java/com/google/devtools/build/lib/bazel/repository/cache/LocalRepoContentsCache.java index 89af2cde2a1f14..df98556b784b45 100644 --- a/src/main/java/com/google/devtools/build/lib/bazel/repository/cache/LocalRepoContentsCache.java +++ b/src/main/java/com/google/devtools/build/lib/bazel/repository/cache/LocalRepoContentsCache.java @@ -154,9 +154,9 @@ private Path ensureTrashDir() throws IOException { /** * Moves a freshly fetched repo into the contents cache. * - * @return the repo dir in the contents cache. + * @return the new cache entry */ - public Path moveToCache( + public CandidateRepo moveToCache( Path fetchedRepoDir, Path fetchedRepoMarkerFile, String predeclaredInputHash) throws IOException { Preconditions.checkState(path != null); @@ -183,7 +183,7 @@ public Path moveToCache( // Set up a symlink at the original fetched repo dir path. fetchedRepoDir.deleteTree(); FileSystemUtils.ensureSymbolicLink(fetchedRepoDir, cacheRepoDir); - return cacheRepoDir; + return new CandidateRepo(cacheRecordedInputsFile, cacheRepoDir); } public void acquireSharedLock() throws IOException, InterruptedException { diff --git a/src/main/java/com/google/devtools/build/lib/bazel/repository/starlark/StarlarkBaseExternalContext.java b/src/main/java/com/google/devtools/build/lib/bazel/repository/starlark/StarlarkBaseExternalContext.java index 5cf3e3069fcf0d..cb7893673dd8b1 100644 --- a/src/main/java/com/google/devtools/build/lib/bazel/repository/starlark/StarlarkBaseExternalContext.java +++ b/src/main/java/com/google/devtools/build/lib/bazel/repository/starlark/StarlarkBaseExternalContext.java @@ -23,13 +23,10 @@ import com.google.common.base.Strings; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; -import com.google.common.collect.ImmutableSet; import com.google.common.collect.ImmutableSortedMap; -import com.google.common.collect.Iterables; import com.google.common.collect.Maps; import com.google.common.flogger.GoogleLogger; import com.google.common.util.concurrent.Futures; -import com.google.devtools.build.lib.actions.FileValue; import com.google.devtools.build.lib.analysis.BlazeDirectories; import com.google.devtools.build.lib.bazel.debug.WorkspaceRuleEvent; import com.google.devtools.build.lib.bazel.repository.DecompressorDescriptor; @@ -55,6 +52,7 @@ import com.google.devtools.build.lib.remote.RemoteExternalOverlayFileSystem; import com.google.devtools.build.lib.rules.repository.NeedsSkyframeRestartException; import com.google.devtools.build.lib.rules.repository.RepoRecordedInput; +import com.google.devtools.build.lib.rules.repository.RepoRecordedInput.MaybeValue; import com.google.devtools.build.lib.rules.repository.RepoRecordedInput.RepoCacheFriendlyPath; import com.google.devtools.build.lib.rules.repository.RepositoryFunction; import com.google.devtools.build.lib.rules.repository.RepositoryFunction.RepositoryFunctionException; @@ -71,6 +69,7 @@ import com.google.devtools.build.lib.vfs.Symlinks; import com.google.devtools.build.skyframe.SkyFunction.Environment; import com.google.devtools.build.skyframe.SkyFunctionException.Transience; +import com.google.errorprone.annotations.CanIgnoreReturnValue; import com.google.errorprone.annotations.ForOverride; import java.io.File; import java.io.IOException; @@ -212,7 +211,7 @@ protected StarlarkBaseExternalContext( // apparent repo names to canonical repo names. See #20721 for why this is necessary. this.repoMappingRecorder = (fromRepo, apparentRepoName, canonicalRepoName) -> - recordInput( + recordInputWithValue( new RepoRecordedInput.RecordedRepoMapping(fromRepo, apparentRepoName), canonicalRepoName.isVisible() ? canonicalRepoName.getName() : null); } @@ -251,7 +250,7 @@ public void storeRepoMappingRecorderInThread(StarlarkThread thread) { repoMappingRecorder.storeInThread(thread); } - protected void recordInput(RepoRecordedInput input, @Nullable String value) { + protected void recordInputWithValue(RepoRecordedInput input, @Nullable String value) { if (recordedInputs.containsKey(input) && !Objects.equals(recordedInputs.get(input), value)) { throw new IllegalStateException( "Conflicting values recorded for input %s: '%s' vs. '%s'" @@ -260,6 +259,23 @@ protected void recordInput(RepoRecordedInput input, @Nullable String value) { recordedInputs.put(input, value); } + @CanIgnoreReturnValue + @Nullable + protected String getValueAndRecordInput(RepoRecordedInput input) + throws InterruptedException, NeedsSkyframeRestartException, IOException { + var maybeValue = input.getValue(env, directories); + if (env.valuesMissing()) { + throw new NeedsSkyframeRestartException(); + } + return switch (maybeValue) { + case MaybeValue.Invalid(String reason) -> throw new IOException(reason); + case MaybeValue.Valid(String value) -> { + recordInputWithValue(input, value); + yield value; + } + }; + } + private boolean cancelPendingAsyncTasks() { boolean hadPendingItems = false; for (AsyncTask task : asyncTasks) { @@ -1489,18 +1505,12 @@ private static T nullIfNone(Object object, Class type) { @Nullable public String getEnvironmentValue(String name, Object defaultValue) throws InterruptedException, NeedsSkyframeRestartException { - // Must look up via Skyframe, rather than solely copy from `this.repoEnvVariables`, in order to - // establish a SkyKey dependency relationship. - var nameAndValue = RepositoryFunction.getEnvVarValues(env, ImmutableSet.of(name)); - if (nameAndValue == null) { - return null; + try { + String value = getValueAndRecordInput(new RepoRecordedInput.EnvVar(name)); + return value != null ? value : nullIfNone(defaultValue, String.class); + } catch (IOException e) { + throw new IllegalStateException("getting EnvVar never throws IOException", e); } - // However, to account for --repo_env we take the value from `this.repoEnvVariables`. - // See https://github.com/bazelbuild/bazel/pull/20787#discussion_r1445571248 . - var entry = Iterables.getOnlyElement(RepoRecordedInput.EnvVar.wrap(nameAndValue).entrySet()); - String envVarValue = repoEnvVariables.get(name); - recordInput(entry.getKey(), envVarValue); - return envVarValue != null ? envVarValue : nullIfNone(defaultValue, String.class); } @StarlarkMethod( @@ -1661,17 +1671,8 @@ protected void maybeWatch(StarlarkPath starlarkPath, ShouldWatch shouldWatch) if (repoCacheFriendlyPath == null) { return; } - var recordedInput = new RepoRecordedInput.File(repoCacheFriendlyPath); - var skyKey = recordedInput.getSkyKey(directories); try { - FileValue fileValue = (FileValue) env.getValueOrThrow(skyKey, IOException.class); - if (fileValue == null) { - throw new NeedsSkyframeRestartException(); - } - - recordInput( - recordedInput, - RepoRecordedInput.File.fileValueToMarkerValue((RootedPath) skyKey.argument(), fileValue)); + getValueAndRecordInput(new RepoRecordedInput.File(repoCacheFriendlyPath)); } catch (IOException e) { throw new RepositoryFunctionException(e, Transience.TRANSIENT); } @@ -1683,12 +1684,8 @@ protected void maybeWatchDirents(Path path, ShouldWatch shouldWatch) if (repoCacheFriendlyPath == null) { return; } - var recordedInput = new RepoRecordedInput.Dirents(repoCacheFriendlyPath); - if (env.getValue(recordedInput.getSkyKey(directories)) == null) { - throw new NeedsSkyframeRestartException(); - } try { - recordInput(recordedInput, RepoRecordedInput.Dirents.getDirentsMarkerValue(path)); + getValueAndRecordInput(new RepoRecordedInput.Dirents(repoCacheFriendlyPath)); } catch (IOException e) { throw new RepositoryFunctionException(e, Transience.TRANSIENT); } diff --git a/src/main/java/com/google/devtools/build/lib/bazel/repository/starlark/StarlarkRepositoryContext.java b/src/main/java/com/google/devtools/build/lib/bazel/repository/starlark/StarlarkRepositoryContext.java index 751d8305e94a58..6cb1a331464eaf 100644 --- a/src/main/java/com/google/devtools/build/lib/bazel/repository/starlark/StarlarkRepositoryContext.java +++ b/src/main/java/com/google/devtools/build/lib/bazel/repository/starlark/StarlarkRepositoryContext.java @@ -42,7 +42,6 @@ import com.google.devtools.build.lib.rules.repository.WorkspaceAttributeMapper; import com.google.devtools.build.lib.runtime.ProcessWrapper; import com.google.devtools.build.lib.runtime.RepositoryRemoteExecutor; -import com.google.devtools.build.lib.skyframe.DirectoryTreeDigestValue; import com.google.devtools.build.lib.util.StringUtilities; import com.google.devtools.build.lib.vfs.FileSystemUtils; import com.google.devtools.build.lib.vfs.Path; @@ -587,15 +586,7 @@ public void watchTree(Object path) return; } try { - var recordedInput = new RepoRecordedInput.DirTree(repoCacheFriendlyPath); - DirectoryTreeDigestValue digestValue = - (DirectoryTreeDigestValue) - env.getValueOrThrow(recordedInput.getSkyKey(directories), IOException.class); - if (digestValue == null) { - throw new NeedsSkyframeRestartException(); - } - - recordInput(recordedInput, digestValue.hexDigest()); + getValueAndRecordInput(new RepoRecordedInput.DirTree(repoCacheFriendlyPath)); } catch (IOException e) { throw new RepositoryFunctionException(e, Transience.TRANSIENT); } diff --git a/src/main/java/com/google/devtools/build/lib/rules/repository/RepoRecordedInput.java b/src/main/java/com/google/devtools/build/lib/rules/repository/RepoRecordedInput.java index 39a5b43e444dae..b3532d7ff919b9 100644 --- a/src/main/java/com/google/devtools/build/lib/rules/repository/RepoRecordedInput.java +++ b/src/main/java/com/google/devtools/build/lib/rules/repository/RepoRecordedInput.java @@ -23,6 +23,8 @@ import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Preconditions; import com.google.common.base.Splitter; +import com.google.common.collect.Collections2; +import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; import com.google.common.io.BaseEncoding; import com.google.devtools.build.lib.actions.FileValue; @@ -31,20 +33,22 @@ import com.google.devtools.build.lib.cmdline.LabelSyntaxException; import com.google.devtools.build.lib.cmdline.LabelValidator; import com.google.devtools.build.lib.cmdline.RepositoryName; +import com.google.devtools.build.lib.skyframe.DirectoryListingValue; import com.google.devtools.build.lib.skyframe.EnvironmentVariableValue; import com.google.devtools.build.lib.skyframe.RepoEnvironmentFunction; -import com.google.devtools.build.lib.skyframe.DirectoryListingValue; import com.google.devtools.build.lib.skyframe.DirectoryTreeDigestValue; import com.google.devtools.build.lib.skyframe.PrecomputedValue; import com.google.devtools.build.lib.skyframe.RepositoryMappingValue; import com.google.devtools.build.lib.util.Fingerprint; -import com.google.devtools.build.lib.vfs.Path; +import com.google.devtools.build.lib.vfs.Dirent; import com.google.devtools.build.lib.vfs.PathFragment; import com.google.devtools.build.lib.vfs.Root; import com.google.devtools.build.lib.vfs.RootedPath; import com.google.devtools.build.skyframe.SkyFunction.Environment; import com.google.devtools.build.skyframe.SkyKey; import java.io.IOException; +import java.util.ArrayList; +import java.util.Collection; import java.util.List; import java.util.Map; import java.util.Objects; @@ -121,60 +125,45 @@ public static Optional parse(String s) { return Optional.empty(); } + /** + * Splits the given list of recorded input values into batches such that within each batch, all + * recorded inputs's {@link SkyKey}s can be requested together. + */ + public static ImmutableList> splitIntoBatches( + List recordedInputValues) { + var batches = ImmutableList.>builder(); + var currentBatch = new ArrayList(); + for (var recordedInputValue : recordedInputValues) { + if (!recordedInputValue.input().canBeRequestedUnconditionally() + && !currentBatch.isEmpty()) { + batches.add(ImmutableList.copyOf(currentBatch)); + currentBatch.clear(); + } + currentBatch.add(recordedInputValue); + } + if (!currentBatch.isEmpty()) { + batches.add(ImmutableList.copyOf(currentBatch)); + } + return batches.build(); + } + /** Converts this {@link WithValue} to a string in a format compatible with {@link #parse}. */ @Override public String toString() { - return escape(input.toString()) + " " + escape(value); - } - - @VisibleForTesting - static String escape(String str) { - return str == null - ? "\\0" - : str.replace("\\", "\\\\").replace("\n", "\\n").replace(" ", "\\s"); - } - - @VisibleForTesting - @Nullable - static String unescape(String str) { - if (str.equals("\\0")) { - return null; // \0 == null string - } - StringBuilder result = new StringBuilder(); - boolean escaped = false; - for (int i = 0; i < str.length(); i++) { - char c = str.charAt(i); - if (escaped) { - if (c == 'n') { // n means new line - result.append("\n"); - } else if (c == 's') { // s means space - result.append(" "); - } else { // Any other escaped characters are just un-escaped - result.append(c); - } - escaped = false; - } else if (c == '\\') { - escaped = true; - } else { - result.append(c); - } - } - return result.toString(); + return input + " " + escape(value); } } /** - * Returns whether all values are still up-to-date for each recorded input. If Skyframe values are - * missing, the return value should be ignored; callers are responsible for checking {@code - * env.valuesMissing()} and triggering a Skyframe restart if needed. + * Returns whether all values are still up-to-date for each recorded input or a human-readable + * reason for why that's not the case. If Skyframe values are missing, the return value should be + * ignored; callers are responsible for checking {@code env.valuesMissing()} and triggering a + * Skyframe restart if needed. */ public static Optional isAnyValueOutdated( Environment env, BlazeDirectories directories, List recordedInputValues) throws InterruptedException { - env.getValuesAndExceptions( - recordedInputValues.stream() - .map(riv -> riv.input().getSkyKey(directories)) - .collect(toImmutableSet())); + prefetch(env, directories, Collections2.transform(recordedInputValues, WithValue::input)); if (env.valuesMissing()) { return UNDECIDED; } @@ -188,40 +177,146 @@ public static Optional isAnyValueOutdated( return Optional.empty(); } + /** + * Requests the information from Skyframe that is required by future calls to {@link + * #isAnyValueOutdated} for the given set of inputs. + */ + public static void prefetch( + Environment env, BlazeDirectories directories, Collection recordedInputs) + throws InterruptedException { + var keys = + recordedInputs.stream().map(rri -> rri.getSkyKey(directories)).collect(toImmutableSet()); + if (env.valuesMissing()) { + return; + } + var results = env.getValuesAndExceptions(keys); + // Per the contract of Environment.getValuesAndExceptions, we need to access the results to + // actually find all missing values. + for (SkyKey key : keys) { + var unused = results.get(key); + } + } + + /** + * Returns a human-readable reason for why the given {@code oldValue} is no longer up-to-date for + * this recorded input, or an empty Optional if it is still up-to-date. + * + *

The method can request Skyframe evaluations, and if any values are missing, this method can + * return any value (doesn't matter what, although {@link #UNDECIDED} is recommended for clarity) + * and will be reinvoked after a Skyframe restart. + */ + private Optional isOutdated( + Environment env, BlazeDirectories directories, @Nullable String oldValue) + throws InterruptedException { + MaybeValue wrappedNewValue = getValue(env, directories); + if (env.valuesMissing()) { + return UNDECIDED; + } + return switch (wrappedNewValue) { + case MaybeValue.Invalid(String reason) -> Optional.of(reason); + case MaybeValue.Valid(String newValue) -> + Objects.equals(oldValue, newValue) + ? Optional.empty() + : Optional.of(describeChange(oldValue, newValue)); + }; + } + @Override public abstract boolean equals(Object obj); @Override public abstract int hashCode(); + @VisibleForTesting + static String escape(String str) { + return str == null ? "\\0" : str.replace("\\", "\\\\").replace("\n", "\\n").replace(" ", "\\s"); + } + + @VisibleForTesting + @Nullable + static String unescape(String str) { + if (str.equals("\\0")) { + return null; // \0 == null string + } + StringBuilder result = new StringBuilder(); + boolean escaped = false; + for (int i = 0; i < str.length(); i++) { + char c = str.charAt(i); + if (escaped) { + if (c == 'n') { // n means new line + result.append("\n"); + } else if (c == 's') { // s means space + result.append(" "); + } else { // Any other escaped characters are just un-escaped + result.append(c); + } + escaped = false; + } else if (c == '\\') { + escaped = true; + } else { + result.append(c); + } + } + return result.toString(); + } + + /** + * Returns the string representation of this recorded input, in the format suitable for parsing + * back via {@link #parse}. + * + *

The returned string never contains spaces or newlines; those characters are escaped. + */ @Override public final String toString() { - return getParser().getPrefix() + ":" + toStringInternal(); + return getParser().getPrefix() + ":" + escape(toStringInternal()); } + /** + * Represents the possible values returned by {@link #getValue}: either a valid value (which may + * be null), or an invalid value with a reason (e.g. due to I/O failure). + */ + public sealed interface MaybeValue { + MaybeValue VALUES_MISSING = new MaybeValue.Invalid("values missing"); + + /** Represents a valid value, which may be null. */ + record Valid(@Nullable String value) implements MaybeValue {} + + /** Represents an invalid value with a reason (e.g. due to I/O failure). */ + record Invalid(String reason) implements MaybeValue {} + } + + /** + * Returns the current value of this input, which may be null, wrapped in a {@link + * MaybeValue.Valid}, or a {@link MaybeValue.Invalid} if the value is known to be invalid. + * + *

The method can request Skyframe evaluations, and if any values are missing, this method can + * return any value and will be reinvoked after a Skyframe restart. + */ + public abstract MaybeValue getValue(Environment env, BlazeDirectories directories) + throws InterruptedException; + + /** + * Returns a human-readable description of the change from {@code oldValue} to {@code newValue}. + */ + protected abstract String describeChange(String oldValue, String newValue); + /** * Returns the post-colon substring that identifies the specific input: for example, the {@code * MY_ENV_VAR} part of {@code ENV:MY_ENV_VAR}. */ - public abstract String toStringInternal(); + protected abstract String toStringInternal(); /** Returns the parser object for this type of recorded inputs. */ - public abstract Parser getParser(); + protected abstract Parser getParser(); /** Returns the {@link SkyKey} that is necessary to determine {@link #isOutdated}. */ - public abstract SkyKey getSkyKey(BlazeDirectories directories); + protected abstract SkyKey getSkyKey(BlazeDirectories directories); /** - * Returns a human-readable reason for why the given {@code oldValue} is no longer up-to-date for - * this recorded input, or an empty Optional if it is still up-to-date. This method can assume - * that {@link #getSkyKey(BlazeDirectories)} is already evaluated; it can request further Skyframe - * evaluations, and if any values are missing, this method can return any value (doesn't matter - * what, although {@link #UNDECIDED} is recommended for clarity) and will be reinvoked after a - * Skyframe restart. + * Returns true if the {@link #getValue} can be requested even if previous recorded inputs have + * not been verified to be up to date. */ - public abstract Optional isOutdated( - Environment env, BlazeDirectories directories, @Nullable String oldValue) - throws InterruptedException; + protected abstract boolean canBeRequestedUnconditionally(); private static final Optional UNDECIDED = Optional.of("values missing"); @@ -299,6 +394,11 @@ public final RootedPath getRootedPath(BlazeDirectories directories) { } return RootedPath.toRootedPath(root, path()); } + + /** Returns true if the path points into an external repository. */ + public boolean inExternalRepo() { + return repoName().isPresent() && !repoName().get().isMain(); + } } /** @@ -363,7 +463,8 @@ public String toStringInternal() { * for placing in a repository marker file. The file need not exist, and can be a file or a * directory. */ - public static String fileValueToMarkerValue(RootedPath rootedPath, FileValue fileValue) + @VisibleForTesting + static String fileValueToMarkerValue(RootedPath rootedPath, FileValue fileValue) throws IOException { if (fileValue.isDirectory()) { return "DIR"; @@ -386,23 +487,32 @@ public SkyKey getSkyKey(BlazeDirectories directories) { } @Override - public Optional isOutdated( - Environment env, BlazeDirectories directories, @Nullable String oldValue) + protected boolean canBeRequestedUnconditionally() { + // Requesting files in external repositories can result in cycles if the external repo now + // transitively depends on the requesting repo. + return !path.inExternalRepo(); + } + + @Override + public MaybeValue getValue(Environment env, BlazeDirectories directories) throws InterruptedException { var skyKey = getSkyKey(directories); try { - FileValue fileValue = (FileValue) env.getValueOrThrow(skyKey, IOException.class); + var fileValue = (FileValue) env.getValueOrThrow(skyKey, IOException.class); if (fileValue == null) { - return UNDECIDED; - } - if (!oldValue.equals(fileValueToMarkerValue((RootedPath) skyKey.argument(), fileValue))) { - return Optional.of("file info or contents of %s changed".formatted(path)); + return MaybeValue.VALUES_MISSING; } - return Optional.empty(); + return new MaybeValue.Valid( + fileValueToMarkerValue((RootedPath) skyKey.argument(), fileValue)); } catch (IOException e) { - return Optional.of("failed to stat %s: %s".formatted(path, e.getMessage())); + return new MaybeValue.Invalid("failed to stat %s: %s".formatted(path, e.getMessage())); } } + + @Override + public String describeChange(String oldValue, String newValue) { + return "file info or contents of %s changed".formatted(path); + } } /** Represents the list of entries under a directory accessed during the fetch. */ @@ -457,38 +567,42 @@ public Parser getParser() { return PARSER; } + private String directoryListingValueToMarkerValue(DirectoryListingValue directoryListingValue) { + var fp = new Fingerprint(); + fp.addStrings( + com.google.common.collect.Streams.stream(directoryListingValue.getDirents()) + .map(Dirent::getName) + .sorted() + .collect(toImmutableList())); + return fp.hexDigestAndReset(); + } + @Override public SkyKey getSkyKey(BlazeDirectories directories) { return DirectoryListingValue.key(path.getRootedPath(directories)); } @Override - public Optional isOutdated( - Environment env, BlazeDirectories directories, @Nullable String oldValue) + protected boolean canBeRequestedUnconditionally() { + // Requesting directories in external repositories can result in cycles if the external repo + // transitively depends on the requesting repo. + return !path.inExternalRepo(); + } + + @Override + public MaybeValue getValue(Environment env, BlazeDirectories directories) throws InterruptedException { - SkyKey skyKey = getSkyKey(directories); - if (env.getValue(skyKey) == null) { - return UNDECIDED; - } - try { - if (!oldValue.equals( - getDirentsMarkerValue(((DirectoryListingValue.Key) skyKey).argument().asPath()))) { - return Optional.of("directory entries of %s changed".formatted(path)); - } - return Optional.empty(); - } catch (IOException e) { - return Optional.of("failed to readdir %s: %s".formatted(path, e.getMessage())); + var skyKey = getSkyKey(directories); + var directoryListingValue = (DirectoryListingValue) env.getValue(skyKey); + if (directoryListingValue == null) { + return MaybeValue.VALUES_MISSING; } + return new MaybeValue.Valid(directoryListingValueToMarkerValue(directoryListingValue)); } - public static String getDirentsMarkerValue(Path path) throws IOException { - Fingerprint fp = new Fingerprint(); - fp.addStrings( - path.getDirectoryEntries().stream() - .map(Path::getBaseName) - .sorted() - .collect(toImmutableList())); - return fp.hexDigestAndReset(); + @Override + public String describeChange(String oldValue, String newValue) { + return "directory entries of %s changed".formatted(path); } } @@ -554,18 +668,32 @@ public SkyKey getSkyKey(BlazeDirectories directories) { } @Override - public Optional isOutdated( - Environment env, BlazeDirectories directories, @Nullable String oldValue) + protected boolean canBeRequestedUnconditionally() { + // Requesting directory trees in external repositories can result in cycles if the external + // repo now transitively depends on the requesting repo. + return !path.inExternalRepo(); + } + + @Override + public MaybeValue getValue(Environment env, BlazeDirectories directories) throws InterruptedException { - DirectoryTreeDigestValue value = - (DirectoryTreeDigestValue) env.getValue(getSkyKey(directories)); - if (value == null) { - return UNDECIDED; - } - if (!oldValue.equals(value.hexDigest())) { - return Optional.of("directory tree at %s changed".formatted(path)); + var skyKey = getSkyKey(directories); + try { + var directoryTreeDigestValue = + (DirectoryTreeDigestValue) env.getValueOrThrow(skyKey, IOException.class); + if (directoryTreeDigestValue == null) { + return MaybeValue.VALUES_MISSING; + } + return new MaybeValue.Valid(directoryTreeDigestValue.hexDigest()); + } catch (IOException e) { + return new MaybeValue.Invalid( + "failed to digest directory tree at %s: %s".formatted(path, e.getMessage())); } - return Optional.empty(); + } + + @Override + public String describeChange(String oldValue, String newValue) { + return "directory tree at %s changed".formatted(path); } } @@ -593,7 +721,7 @@ public static ImmutableMap> wrap( .collect(toImmutableMap(e -> new EnvVar(e.getKey()), Map.Entry::getValue)); } - private EnvVar(String name) { + public EnvVar(String name) { this.name = name; } @@ -629,24 +757,29 @@ public SkyKey getSkyKey(BlazeDirectories directories) { } @Override - public Optional isOutdated( - Environment env, BlazeDirectories directories, @Nullable String oldValue) + protected boolean canBeRequestedUnconditionally() { + // Environment variables are static data injected into Skyframe, so there is no risk of + // cycles. + return true; + } + + @Override + public MaybeValue getValue(Environment env, BlazeDirectories directories) throws InterruptedException { - var envValue = (EnvironmentVariableValue) env.getValue(getSkyKey(directories)); - if (envValue == null) { - return Optional.empty(); - } - String v = envValue.value(); - // Note that `oldValue` can be null if the env var was not set. - if (!Objects.equals(oldValue, v)) { - return Optional.of( - "environment variable %s changed: %s -> %s" - .formatted( - name, - oldValue == null ? "" : "'%s'".formatted(oldValue), - v == null ? "" : "'%s'".formatted(v))); + var value = (EnvironmentVariableValue) env.getValue(getSkyKey(directories)); + if (value == null) { + return MaybeValue.VALUES_MISSING; } - return Optional.empty(); + return new MaybeValue.Valid(value.value()); + } + + @Override + public String describeChange(String oldValue, String newValue) { + return "environment variable %s changed: %s -> %s" + .formatted( + name, + oldValue == null ? "" : "'%s'".formatted(oldValue), + newValue == null ? "" : "'%s'".formatted(newValue)); } } @@ -721,32 +854,32 @@ public SkyKey getSkyKey(BlazeDirectories directories) { } @Override - public Optional isOutdated( - Environment env, BlazeDirectories directories, @Nullable String oldValue) + protected boolean canBeRequestedUnconditionally() { + // Starlark can only request the mapping of the repo it is currently executing from, which + // means that the repo has already been fetched (either to execute the code or to verify the + // transitive .bzl hash). Further cycles aren't possible. + return true; + } + + @Override + public MaybeValue getValue(Environment env, BlazeDirectories directories) throws InterruptedException { - RepositoryMappingValue repoMappingValue = - (RepositoryMappingValue) env.getValue(getSkyKey(directories)); + var repoMappingValue = (RepositoryMappingValue) env.getValue(getSkyKey(directories)); if (Objects.equals(repoMappingValue, RepositoryMappingValue.NOT_FOUND_VALUE)) { - return Optional.of("source repo %s doesn't exist anymore".formatted(sourceRepo)); - } - RepositoryName oldCanonicalName; - try { - oldCanonicalName = RepositoryName.create(oldValue); - } catch (LabelSyntaxException e) { - // malformed old value causes refetch - return Optional.of("invalid recorded repo name: %s".formatted(e.getMessage())); - } - RepositoryName newCanonicalName = repoMappingValue.repositoryMapping().get(apparentName); - if (!oldCanonicalName.equals(newCanonicalName)) { - return Optional.of( - "canonical name for @%s in %s changed: %s -> %s" - .formatted( - apparentName, - sourceRepo, - oldCanonicalName, - newCanonicalName == null ? "" : newCanonicalName)); + return new MaybeValue.Invalid("source repo %s doesn't exist anymore".formatted(sourceRepo)); } - return Optional.empty(); + RepositoryName canonicalName = repoMappingValue.repositoryMapping().get(apparentName); + return new MaybeValue.Valid(canonicalName != null ? canonicalName.getName() : null); + } + + @Override + public String describeChange(String oldValue, String newValue) { + return "canonical name for @%s in %s changed: %s -> %s" + .formatted( + apparentName, + sourceRepo, + oldValue == null ? "" : oldValue, + newValue == null ? "" : newValue); } } @@ -789,9 +922,19 @@ public SkyKey getSkyKey(BlazeDirectories directories) { } @Override - public Optional isOutdated( - Environment env, BlazeDirectories directories, @Nullable String oldValue) { - return Optional.of(reason); + protected boolean canBeRequestedUnconditionally() { + return true; + } + + @Override + public MaybeValue getValue(Environment env, BlazeDirectories directories) { + return new MaybeValue.Invalid(reason); + } + + @Override + public String describeChange(String oldValue, String newValue) { + throw new UnsupportedOperationException( + "the value for this sentinel input is always invalid"); } } } diff --git a/src/main/java/com/google/devtools/build/lib/rules/repository/RepositoryDelegatorFunction.java b/src/main/java/com/google/devtools/build/lib/rules/repository/RepositoryDelegatorFunction.java index 1a2a27ef7e3a5f..878429eb7252d4 100644 --- a/src/main/java/com/google/devtools/build/lib/rules/repository/RepositoryDelegatorFunction.java +++ b/src/main/java/com/google/devtools/build/lib/rules/repository/RepositoryDelegatorFunction.java @@ -308,9 +308,9 @@ public SkyValue compute(SkyKey skyKey, Environment env) } if (repoContentsCache.isEnabled()) { // This repo is eligible for the repo contents cache. - Path cachedRepoDir; + CandidateRepo newCacheEntry; try { - cachedRepoDir = + newCacheEntry = repoContentsCache.moveToCache( repoRoot, digestWriter.markerPath, digestWriter.predeclaredInputHash); } catch (IOException e) { @@ -321,6 +321,7 @@ public SkyValue compute(SkyKey skyKey, Environment env) e), Transience.TRANSIENT); } + Path cachedRepoDir = newCacheEntry.contentsDir(); // Don't forget to register a FileStateValue on the cache repo dir, so that we know to // refetch if the cache entry gets GC'd from under us or the entire cache is deleted. // diff --git a/src/main/java/com/google/devtools/build/lib/rules/repository/RepositoryFunction.java b/src/main/java/com/google/devtools/build/lib/rules/repository/RepositoryFunction.java index 4471325f4649ac..4d6657c9178a43 100644 --- a/src/main/java/com/google/devtools/build/lib/rules/repository/RepositoryFunction.java +++ b/src/main/java/com/google/devtools/build/lib/rules/repository/RepositoryFunction.java @@ -239,12 +239,23 @@ public Optional isAnyRecordedInputOutdated( Map recordedInputValues, Environment env) throws InterruptedException { - return RepoRecordedInput.isAnyValueOutdated( - env, - directories, + var withValues = recordedInputValues.entrySet().stream() .map(e -> new RepoRecordedInput.WithValue(e.getKey(), e.getValue())) - .collect(com.google.common.collect.ImmutableList.toImmutableList())); + .collect(com.google.common.collect.ImmutableList.toImmutableList()); + // Check inputs in batches to prevent Skyframe cycles caused by outdated dependencies: a batch + // ends right before any input that may cause a cycle if requested while earlier inputs are out + // of date. A later batch's deps are only requested after the earlier batches have been + // confirmed up to date across Skyframe restarts (isAnyValueOutdated returns a non-empty reason + // when its batch's values are still missing, short-circuiting this loop). + for (var batch : RepoRecordedInput.WithValue.splitIntoBatches(withValues)) { + Optional outdatedReason = + RepoRecordedInput.isAnyValueOutdated(env, directories, batch); + if (outdatedReason.isPresent()) { + return outdatedReason; + } + } + return Optional.empty(); } public static RootedPath getRootedPathFromLabel(Label label, Environment env) diff --git a/src/test/java/com/google/devtools/build/lib/rules/repository/RepoRecordedInputTest.java b/src/test/java/com/google/devtools/build/lib/rules/repository/RepoRecordedInputTest.java index dae6be132a65a1..ea928c53d6115f 100644 --- a/src/test/java/com/google/devtools/build/lib/rules/repository/RepoRecordedInputTest.java +++ b/src/test/java/com/google/devtools/build/lib/rules/repository/RepoRecordedInputTest.java @@ -15,8 +15,11 @@ package com.google.devtools.build.lib.rules.repository; import static com.google.common.truth.Truth.assertThat; +import static com.google.devtools.build.lib.rules.repository.RepoRecordedInput.WithValue.parse; +import static com.google.devtools.build.lib.rules.repository.RepoRecordedInput.WithValue.splitIntoBatches; import static org.mockito.Mockito.when; +import com.google.common.collect.ImmutableList; import com.google.common.io.BaseEncoding; import com.google.devtools.build.lib.actions.FileContentsProxy; import com.google.devtools.build.lib.actions.FileStateValue.RegularFileStateValueWithContentsProxy; @@ -35,8 +38,8 @@ @RunWith(JUnit4.class) public class RepoRecordedInputTest extends BuildViewTestCase { private static void assertMarkerFileEscaping(String testCase) { - String escaped = RepoRecordedInput.WithValue.escape(testCase); - assertThat(RepoRecordedInput.WithValue.unescape(escaped)).isEqualTo(testCase); + String escaped = RepoRecordedInput.escape(testCase); + assertThat(RepoRecordedInput.unescape(escaped)).isEqualTo(testCase); } @Test @@ -70,4 +73,22 @@ public void testFileValueToMarkerValue() throws Exception { String expectedDigest = BaseEncoding.base16().lowerCase().encode(path.asPath().getDigest()); assertThat(RepoRecordedInput.File.fileValueToMarkerValue(path, fv)).isEqualTo(expectedDigest); } + + @Test + public void testSplitIntoBatches() { + assertThat(splitIntoBatches(ImmutableList.of())).isEmpty(); + assertThat( + splitIntoBatches( + ImmutableList.of( + parse("FILE:@@//foo:bar abc").orElseThrow(), + parse("FILE:@@//:baz cba").orElseThrow(), + parse("FILE:@@foo//:baz bac").orElseThrow(), + parse("ENV:KEY value").orElseThrow()))) + .containsExactly( + ImmutableList.of( + parse("FILE:@@//foo:bar abc").orElseThrow(), + parse("FILE:@@//:baz cba").orElseThrow()), + ImmutableList.of( + parse("FILE:@@foo//:baz bac").orElseThrow(), parse("ENV:KEY value").orElseThrow())); + } } diff --git a/src/test/py/bazel/bzlmod/bazel_module_test.py b/src/test/py/bazel/bzlmod/bazel_module_test.py index 5d1e11037cc48d..9aef1ec859e7c6 100644 --- a/src/test/py/bazel/bzlmod/bazel_module_test.py +++ b/src/test/py/bazel/bzlmod/bazel_module_test.py @@ -1570,6 +1570,66 @@ def testInvalidRepoRuleReferencedByTargetDoesNotCrash(self): self.assertNotIn('FATAL', stderr) self.assertIn("compilation of module 'repo.bzl' failed", stderr) + def testReverseDependencyDirection(self): + # Set up two module extensions that retain their predeclared input hashes + # across two builds but still reverse their dependency direction. Depending + # on how repo cache candidates are checked, this could lead to a Skyframe + # cycle. + self.ScratchFile( + 'MODULE.bazel', + [ + 'foo_ext = use_extension("//:repo.bzl", "foo_ext")', + 'use_repo(foo_ext, "foo")', + 'bar_ext = use_extension("//:repo.bzl", "bar_ext")', + 'use_repo(bar_ext, "bar")', + ], + ) + self.ScratchFile('BUILD.bazel') + self.ScratchFile( + 'repo.bzl', + [ + 'def _repo_impl(rctx):', + ' rctx.file("output.txt", rctx.attr.content)', + ' rctx.file("BUILD", "exports_files([\'output.txt\'])")', + ' print("JUST FETCHED: %s" % rctx.original_name)', + ' return rctx.repo_metadata(reproducible=True)', + 'repo = repository_rule(', + ' implementation = _repo_impl,', + ' attrs = {"content": attr.string()},', + ')', + 'def _foo_ext_impl(mctx):', + ' deps = mctx.read(Label("@//:foo_deps.txt")).splitlines()', + ' content = "foo"', + ' for dep in deps:', + ' if dep:', + ' content += " + " + mctx.read(Label(dep))', + ' repo(name = "foo", content = content)', + 'foo_ext = module_extension(implementation = _foo_ext_impl)', + 'def _bar_ext_impl(mctx):', + ' deps = mctx.read(Label("@//:bar_deps.txt")).splitlines()', + ' content = "bar"', + ' for dep in deps:', + ' if dep:', + ' content += " + " + mctx.read(Label(dep))', + ' repo(name = "bar", content = content)', + 'bar_ext = module_extension(implementation = _bar_ext_impl)', + ], + ) + + self.ScratchFile('foo_deps.txt', ['@bar//:output.txt']) + self.ScratchFile('bar_deps.txt') + + # First fetch: not cached + _, _, stderr = self.RunBazel(['build', '@foo//:output.txt']) + self.assertIn('JUST FETCHED: bar', '\n'.join(stderr)) + self.assertIn('JUST FETCHED: foo', '\n'.join(stderr)) + + # After expunging and reversing: cached + self.RunBazel(['clean', '--expunge']) + self.ScratchFile('foo_deps.txt') + self.ScratchFile('bar_deps.txt', ['@foo//:output.txt']) + self.RunBazel(['build', '@foo//:output.txt']) + if __name__ == '__main__': absltest.main() diff --git a/src/test/py/bazel/bzlmod/repo_contents_cache_test.py b/src/test/py/bazel/bzlmod/repo_contents_cache_test.py index 60ea22b616d088..274d3cff6fc575 100644 --- a/src/test/py/bazel/bzlmod/repo_contents_cache_test.py +++ b/src/test/py/bazel/bzlmod/repo_contents_cache_test.py @@ -480,12 +480,7 @@ def testReverseDependencyDirection(self): self.RunBazel(['clean', '--expunge']) self.ScratchFile('foo_deps.txt', ['']) self.ScratchFile('bar_deps.txt', ['@foo//:output.txt']) - exit_code, stdout, stderr = self.RunBazel( - ['build', '@foo//:output.txt'], allow_failure=True - ) - # TODO: b/xxxxxxx - This is NOT the intended behavior. - self.AssertNotExitCode(exit_code, 0, stderr, stdout) - self.assertIn('possible dependency cycle detected', '\n'.join(stderr)) + self.RunBazel(['build', '@foo//:output.txt']) def doTestRepoContentsCacheDeleted(self, check_external_repository_files): repo_contents_cache = self.ScratchDir('repo_contents_cache') From fbd1442ec484cea4fd8eda54883a4e75493336b6 Mon Sep 17 00:00:00 2001 From: Fabian Meumertzheim Date: Wed, 24 Jun 2026 00:08:20 +0200 Subject: [PATCH 6/8] Fix remote repo contents cache issues * The cache was always written to, even if not enabled. (On 8.x this guard was already folded into the remote repo contents cache port, #29075, so only the second fix below is applied here.) * Google RBE doesn't accept `Command`s without the (deprecated) `Platform` field set. We set it both on `Command` and `Action`, just to be safe. Fixes https://github.com/bazelbuild/bazel/pull/28294#issuecomment-3748866589 Closes #28295. PiperOrigin-RevId: 856169835 Change-Id: I2479119a173e325a7d39643a36536569f5f831fc (cherry picked from commit a9946096847e22de98e0e11b1f5dfbb6ec6ecdbb) --- .../devtools/build/lib/remote/RemoteRepoContentsCacheImpl.java | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/main/java/com/google/devtools/build/lib/remote/RemoteRepoContentsCacheImpl.java b/src/main/java/com/google/devtools/build/lib/remote/RemoteRepoContentsCacheImpl.java index c8a2c20a5f9fc5..1391d1f2ca04f2 100644 --- a/src/main/java/com/google/devtools/build/lib/remote/RemoteRepoContentsCacheImpl.java +++ b/src/main/java/com/google/devtools/build/lib/remote/RemoteRepoContentsCacheImpl.java @@ -22,6 +22,7 @@ import build.bazel.remote.execution.v2.Action; import build.bazel.remote.execution.v2.Command; import build.bazel.remote.execution.v2.Directory; +import build.bazel.remote.execution.v2.Platform; import build.bazel.remote.execution.v2.Tree; import com.google.common.base.Splitter; import com.google.common.base.Throwables; @@ -88,6 +89,7 @@ public final class RemoteRepoContentsCacheImpl implements RemoteRepoContentsCach .addOutputPaths(REPO_DIRECTORY_PATH) .addOutputFiles(MARKER_FILE_PATH) .addOutputDirectories(REPO_DIRECTORY_PATH) + .setPlatform(Platform.getDefaultInstance()) .build(); private static final Directory INPUT_ROOT = Directory.getDefaultInstance(); @@ -118,6 +120,7 @@ public RemoteRepoContentsCacheImpl( Action.newBuilder() .setCommandDigest(digestUtil.compute(COMMAND)) .setInputRootDigest(digestUtil.compute(INPUT_ROOT)) + .setPlatform(Platform.getDefaultInstance()) .build(); } From ddbaca04ae247b4ba8ce0c2dd0cc3364954b9d37 Mon Sep 17 00:00:00 2001 From: Fabian Meumertzheim Date: Wed, 24 Jun 2026 10:18:06 +0200 Subject: [PATCH 7/8] Materialize important outputs from remote external repos Important outputs and runfiles from external repos that are remote repo contents cache hits got stuck at various levels of the materialization pipeline for being source artifacts. This is fixed by consolidating the skip logic in a `RemoteOutputChecker.mayBeRemote` static helper and letting external-repo source artifacts flow through the toplevel-output download path. 8.x adaptation: v9 routes toplevel outputs through `RemoteImportantOutputHandler`, which does not exist on 8.x (`ImportantOutputHandler` has no production implementor). The equivalent toplevel-output materialization on 8.x lives in `CompletionFunction.ensureToplevelArtifacts`/`downloadArtifact`, which previously bailed out on non-`DerivedArtifact`s and only ran under skymeld. This change lets external-repo source artifacts (which have no generating action and are thus never downloaded by `finalizeAction`) flow through that path in both skymeld and non-skymeld builds, so they honor `--remote_download_outputs` just like build outputs do. `AbstractActionInputPrefetcher.prefetchFiles` already only skips main-repo source artifacts, so it is unchanged beyond annotating the `action` parameter `@Nullable` (source artifacts have no generating action). Closes #28308. PiperOrigin-RevId: 881618604 Change-Id: Ifaae8e39b0bcab3803653ca82bcf00d26c487316 (cherry picked from commit 16613f14abf10f7d7747b0cda590cef08ed06ed9) --- .../lib/actions/ActionInputPrefetcher.java | 5 ++- .../remote/AbstractActionInputPrefetcher.java | 2 +- .../build/lib/remote/RemoteOutputChecker.java | 28 +++++++++---- .../lib/skyframe/CompletionFunction.java | 34 +++++++++++---- .../bzlmod/remote_repo_contents_cache_test.py | 41 ++++++++++++++++--- 5 files changed, 85 insertions(+), 25 deletions(-) diff --git a/src/main/java/com/google/devtools/build/lib/actions/ActionInputPrefetcher.java b/src/main/java/com/google/devtools/build/lib/actions/ActionInputPrefetcher.java index f1ed84d33fe159..a0152db7ef766e 100644 --- a/src/main/java/com/google/devtools/build/lib/actions/ActionInputPrefetcher.java +++ b/src/main/java/com/google/devtools/build/lib/actions/ActionInputPrefetcher.java @@ -17,6 +17,7 @@ import com.google.common.util.concurrent.ListenableFuture; import java.io.IOException; +import javax.annotation.Nullable; /** Prefetches files to local disk. */ public interface ActionInputPrefetcher { @@ -34,7 +35,7 @@ public interface MetadataSupplier { new ActionInputPrefetcher() { @Override public ListenableFuture prefetchFiles( - ActionExecutionMetadata action, + @Nullable ActionExecutionMetadata action, Iterable inputs, MetadataSupplier metadataSupplier, Priority priority, @@ -90,7 +91,7 @@ enum Reason { * @return future success if prefetch is finished or {@link IOException}. */ ListenableFuture prefetchFiles( - ActionExecutionMetadata action, + @Nullable ActionExecutionMetadata action, Iterable inputs, MetadataSupplier metadataSupplier, Priority priority, diff --git a/src/main/java/com/google/devtools/build/lib/remote/AbstractActionInputPrefetcher.java b/src/main/java/com/google/devtools/build/lib/remote/AbstractActionInputPrefetcher.java index 45c114ea2cdadc..638f9d7a561efc 100644 --- a/src/main/java/com/google/devtools/build/lib/remote/AbstractActionInputPrefetcher.java +++ b/src/main/java/com/google/devtools/build/lib/remote/AbstractActionInputPrefetcher.java @@ -329,7 +329,7 @@ protected void prefetchVirtualActionInput(VirtualActionInput input) throws IOExc */ @Override public ListenableFuture prefetchFiles( - ActionExecutionMetadata action, + @Nullable ActionExecutionMetadata action, Iterable inputs, MetadataSupplier metadataSupplier, Priority priority, diff --git a/src/main/java/com/google/devtools/build/lib/remote/RemoteOutputChecker.java b/src/main/java/com/google/devtools/build/lib/remote/RemoteOutputChecker.java index b295e050a86253..eebf092af061c9 100644 --- a/src/main/java/com/google/devtools/build/lib/remote/RemoteOutputChecker.java +++ b/src/main/java/com/google/devtools/build/lib/remote/RemoteOutputChecker.java @@ -190,24 +190,21 @@ private void addRunfiles(ProviderCollection buildTarget) { } var runfiles = runfilesSupport.getRunfiles(); for (Artifact runfile : runfiles.getArtifacts().toList()) { - if (runfile.isSourceArtifact()) { - continue; + if (mayBeRemote(runfile)) { + addOutputToDownload(runfile); } - addOutputToDownload(runfile); } for (var symlink : runfiles.getSymlinks().toList()) { var artifact = symlink.getArtifact(); - if (artifact.isSourceArtifact()) { - continue; + if (mayBeRemote(artifact)) { + addOutputToDownload(artifact); } - addOutputToDownload(artifact); } for (var symlink : runfiles.getRootSymlinks().toList()) { var artifact = symlink.getArtifact(); - if (artifact.isSourceArtifact()) { - continue; + if (mayBeRemote(artifact)) { + addOutputToDownload(artifact); } - addOutputToDownload(artifact); } } @@ -292,6 +289,19 @@ private boolean matchesPattern(PathFragment execPath) { return false; } + /** + * Returns whether this {@link ActionInput} could conceivably be only available remotely. + * + *

Use this as a quick check to avoid unnecessary extra work for artifacts that are definitely + * local. + */ + public static boolean mayBeRemote(ActionInput actionInput) { + return !(actionInput instanceof Artifact artifact + && artifact.isSourceArtifact() + // Source artifacts in the main repo don't need to be fetched. + && (artifact.getOwner() == null || artifact.getOwner().getRepository().isMain())); + } + /** Returns whether this {@link ActionInput} should be downloaded. */ @Override public boolean shouldDownloadOutput(ActionInput output, RemoteFileArtifactValue metadata) { diff --git a/src/main/java/com/google/devtools/build/lib/skyframe/CompletionFunction.java b/src/main/java/com/google/devtools/build/lib/skyframe/CompletionFunction.java index 00befa94bf0a73..467b915a2e592c 100644 --- a/src/main/java/com/google/devtools/build/lib/skyframe/CompletionFunction.java +++ b/src/main/java/com/google/devtools/build/lib/skyframe/CompletionFunction.java @@ -386,13 +386,6 @@ public SkyValue compute(SkyKey skyKey, Environment env) private void ensureToplevelArtifacts( Environment env, ImmutableCollection importantArtifacts, ActionInputMap inputMap) throws CompletionFunctionException, InterruptedException { - // For skymeld, a non-toplevel target might become a toplevel after it has been executed. This - // is the last chance to download the missing toplevel outputs in this case before sending out - // TargetCompleteEvent. See https://github.com/bazelbuild/bazel/issues/20737. - if (!isSkymeld.get()) { - return; - } - var actionInputPrefetcher = skyframeActionExecutor.getActionInputPrefetcher(); if (actionInputPrefetcher == null || actionInputPrefetcher == ActionInputPrefetcher.NONE) { return; @@ -444,6 +437,33 @@ private void downloadArtifact( List> futures) throws InterruptedException { if (!(artifact instanceof DerivedArtifact derivedArtifact)) { + // Source artifacts from external repos (e.g. remote repo contents cache hits) have no + // generating action and are thus never downloaded by finalizeAction, so materialize the + // toplevel ones here so they honor --remote_download_outputs like build outputs do. Source + // artifacts in the main repo are always local. + if (artifact.getOwner() != null && !artifact.getOwner().getRepository().isMain()) { + var metadata = inputMap.getInputMetadata(artifact); + if (metadata != null + && metadata.isRemote() + && remoteArtifactChecker.shouldDownloadOutput( + artifact, (RemoteFileArtifactValue) metadata)) { + futures.add( + actionInputPrefetcher.prefetchFiles( + /* action= */ null, + ImmutableList.of(artifact), + inputMap::getInputMetadata, + Priority.LOW, + Reason.OUTPUTS)); + } + } + return; + } + + // Derived toplevel outputs are downloaded eagerly by finalizeAction during execution. For + // skymeld, a non-toplevel target might only become toplevel after it has been executed, so this + // is the last chance to download the missing toplevel outputs in that case before sending out + // TargetCompleteEvent. See https://github.com/bazelbuild/bazel/issues/20737. + if (!isSkymeld.get()) { return; } diff --git a/src/test/py/bazel/bzlmod/remote_repo_contents_cache_test.py b/src/test/py/bazel/bzlmod/remote_repo_contents_cache_test.py index 79727294164277..0e08858ec1d0b8 100644 --- a/src/test/py/bazel/bzlmod/remote_repo_contents_cache_test.py +++ b/src/test/py/bazel/bzlmod/remote_repo_contents_cache_test.py @@ -617,6 +617,16 @@ def testUseRepoFileInBuildRule_actionDoesNotUseCache_withExplicitSandboxBase( ) def testLostRemoteFile_build(self): + self._runLostRemoteFileBuildTest( + '--experimental_merged_skyframe_analysis_execution' + ) + + def testLostRemoteFile_build_noSkymeld(self): + self._runLostRemoteFileBuildTest( + '--noexperimental_merged_skyframe_analysis_execution' + ) + + def _runLostRemoteFileBuildTest(self, skymeld_flag): # Create a repo with two BUILD files (one in a subpackage), build a target # from one to cause it to be cached, then build that target again after # expunging to verify it is cached. @@ -653,7 +663,7 @@ def testLostRemoteFile_build(self): repo_dir = self.RepoDir('my_repo') # First fetch: not cached - _, _, stderr = self.RunBazel(['build', '@my_repo//:root']) + _, _, stderr = self.RunBazel(['build', skymeld_flag, '@my_repo//:root']) self.assertIn('JUST FETCHED', '\n'.join(stderr)) self.assertTrue(os.path.exists(os.path.join(repo_dir, 'BUILD'))) self.assertTrue(os.path.exists(os.path.join(repo_dir, 'root.txt'))) @@ -662,10 +672,10 @@ def testLostRemoteFile_build(self): # After expunging: cached self.RunBazel(['clean', '--expunge']) - _, _, stderr = self.RunBazel(['build', '@my_repo//:root']) + _, _, stderr = self.RunBazel(['build', skymeld_flag, '@my_repo//:root']) self.assertNotIn('JUST FETCHED', '\n'.join(stderr)) self.assertFalse(os.path.exists(os.path.join(repo_dir, 'BUILD'))) - self.assertFalse(os.path.exists(os.path.join(repo_dir, 'root.txt'))) + self.assertTrue(os.path.exists(os.path.join(repo_dir, 'root.txt'))) self.assertFalse(os.path.exists(os.path.join(repo_dir, 'sub/BUILD'))) self.assertFalse(os.path.exists(os.path.join(repo_dir, 'sub/sub.txt'))) @@ -675,7 +685,7 @@ def testLostRemoteFile_build(self): # Build the other target: fails due to the lost input # TODO: #26450 - Assert success and enable the checks below. _, _, stderr = self.RunBazel( - ['build', '@my_repo//sub:sub'], allow_failure=True + ['build', skymeld_flag, '@my_repo//sub:sub'], allow_failure=True ) self.assertEqual( 1, @@ -699,12 +709,12 @@ def testLostRemoteFile_build(self): # After expunging again: cached self.RunBazel(['clean', '--expunge']) - _, _, stderr = self.RunBazel(['build', '@my_repo//sub:sub']) + _, _, stderr = self.RunBazel(['build', skymeld_flag, '@my_repo//sub:sub']) self.assertNotIn('JUST FETCHED', '\n'.join(stderr)) self.assertFalse(os.path.exists(os.path.join(repo_dir, 'BUILD'))) self.assertFalse(os.path.exists(os.path.join(repo_dir, 'root.txt'))) self.assertFalse(os.path.exists(os.path.join(repo_dir, 'sub/BUILD'))) - self.assertFalse(os.path.exists(os.path.join(repo_dir, 'sub/sub.txt'))) + self.assertTrue(os.path.exists(os.path.join(repo_dir, 'sub/sub.txt'))) def testBzlFilePrefetching(self): self.ScratchFile( @@ -781,6 +791,25 @@ def testBzlFilePrefetching(self): os.path.exists(os.path.join(repo_dir, 'subdir/more_nested.bzl')) ) + def testRun(self): + self.ScratchFile( + 'MODULE.bazel', + [ + 'bazel_dep(name = "buildozer", version = "8.5.1")', + ], + ) + + # First fetch: not cached + _, stdout, _ = self.RunBazel(['run', '@buildozer', '--', '--version']) + self.assertIn('buildozer version: 8.5.1', '\n'.join(stdout)) + + # After expunging: cached + self.RunBazel(['clean', '--expunge']) + _, stdout, _ = self.RunBazel(['run', '@buildozer', '--', '--version']) + self.assertIn('buildozer version: 8.5.1', '\n'.join(stdout)) + repo_dir = self.RepoDir('buildozer') + self.assertFalse(os.path.exists(os.path.join(repo_dir, 'MODULE.bazel'))) + if __name__ == '__main__': absltest.main() From b689a3590fd43e85da39839a6d8457d13d7cb048 Mon Sep 17 00:00:00 2001 From: Fabian Meumertzheim Date: Wed, 24 Jun 2026 11:16:59 +0200 Subject: [PATCH 8/8] Support dynamic inputs with the remote repo contents cache With this change, all reproducible repository rules can now be cached in a disk or remote cache, including those with dependencies recorded dynamically during evaluation. This is made possible by introducing a new intermediate type of synthetic AC entries. When looking up the predeclared inputs hash for a repo rule with dynamic dependencies, the action result for such an intermediate entry lists one or more sets of inputs (e.g. a particular file in another repo or an environment variable name). These inputs are then requested from Skyframe and their current values are hashed to obtain the key of the next AC entry, which is again either an intermediate entry or a final entry containing the contents of the repository. 8.x: adapted to the split RepositoryDelegatorFunction/StarlarkRepositoryFunction restart model (there is no merged RepositoryFetchFunction worker on 8.x). The cache lookup is wired through RepositoryDelegatorFunction, which returns null to trigger a Skyframe restart when lookupCache reports missing values via env.valuesMissing(). Since DigestWriter is a nested class of RepositoryDelegatorFunction on 8.x rather than a standalone target, the marker file is parsed inline in RemoteRepoContentsCacheImpl via RepoRecordedInput.WithValue.parse to avoid a dependency cycle between the remote and repository rule libraries. RELNOTES: The remote repo contents cache now supports all reproducible repo rules. Closes #27634. PiperOrigin-RevId: 889750228 Change-Id: I9c7e4fed9d86432a85a96b3318f6eccc9c0558eb (cherry picked from commit ffebc5b1195ac918e177486c9f4eb29004923813) --- .../java/com/google/devtools/build/lib/BUILD | 1 + .../google/devtools/build/lib/remote/BUILD | 1 + .../build/lib/remote/CombinedCache.java | 11 +- .../RemoteExternalOverlayFileSystem.java | 6 +- .../build/lib/remote/RemoteModule.java | 1 + .../remote/RemoteRepoContentsCacheImpl.java | 481 ++++++++++++++---- .../RepositoryRemoteHelpersFactoryImpl.java | 12 +- .../RepositoryDelegatorFunction.java | 9 +- .../lib/runtime/RemoteRepoContentsCache.java | 5 +- .../bzlmod/remote_repo_contents_cache_test.py | 279 +++++++++- 10 files changed, 704 insertions(+), 102 deletions(-) diff --git a/src/main/java/com/google/devtools/build/lib/BUILD b/src/main/java/com/google/devtools/build/lib/BUILD index 82f62b05e62f58..4ad86766f353fc 100644 --- a/src/main/java/com/google/devtools/build/lib/BUILD +++ b/src/main/java/com/google/devtools/build/lib/BUILD @@ -288,6 +288,7 @@ java_library( "//src/main/java/com/google/devtools/build/lib/cmdline", "//src/main/java/com/google/devtools/build/lib/events", "//src/main/java/com/google/devtools/build/lib/vfs", + "//src/main/java/com/google/devtools/build/skyframe:skyframe-objects", ], ) diff --git a/src/main/java/com/google/devtools/build/lib/remote/BUILD b/src/main/java/com/google/devtools/build/lib/remote/BUILD index c00dcb0d86766f..e82ce27e1139f1 100644 --- a/src/main/java/com/google/devtools/build/lib/remote/BUILD +++ b/src/main/java/com/google/devtools/build/lib/remote/BUILD @@ -119,6 +119,7 @@ java_library( "//src/main/java/com/google/devtools/build/lib/remote/util", "//src/main/java/com/google/devtools/build/lib/remote/util:digest_utils", "//src/main/java/com/google/devtools/build/lib/remote/zstd", + "//src/main/java/com/google/devtools/build/lib/rules:repository/repo_recorded_input", "//src/main/java/com/google/devtools/build/lib/skyframe:action_execution_value", "//src/main/java/com/google/devtools/build/lib/skyframe:sky_functions", "//src/main/java/com/google/devtools/build/lib/skyframe:tree_artifact_value", diff --git a/src/main/java/com/google/devtools/build/lib/remote/CombinedCache.java b/src/main/java/com/google/devtools/build/lib/remote/CombinedCache.java index d8fcea582c368a..a40e3e98950edc 100644 --- a/src/main/java/com/google/devtools/build/lib/remote/CombinedCache.java +++ b/src/main/java/com/google/devtools/build/lib/remote/CombinedCache.java @@ -215,6 +215,15 @@ public CachedActionResult downloadActionResult( boolean inlineOutErr, Set inlineOutputFiles) throws IOException, InterruptedException { + return getFromFuture( + downloadActionResultAsync(context, actionKey, inlineOutErr, inlineOutputFiles)); + } + + public ListenableFuture downloadActionResultAsync( + RemoteActionExecutionContext context, + ActionKey actionKey, + boolean inlineOutErr, + Set inlineOutputFiles) { var spawnExecutionContext = context.getSpawnExecutionContext(); ListenableFuture future = immediateFuture(null); @@ -258,7 +267,7 @@ public CachedActionResult downloadActionResult( directExecutor()); } - return getFromFuture(future); + return future; } private ListenableFuture downloadActionResultFromRemote( diff --git a/src/main/java/com/google/devtools/build/lib/remote/RemoteExternalOverlayFileSystem.java b/src/main/java/com/google/devtools/build/lib/remote/RemoteExternalOverlayFileSystem.java index d89181fa2c999e..e0451991392d3c 100644 --- a/src/main/java/com/google/devtools/build/lib/remote/RemoteExternalOverlayFileSystem.java +++ b/src/main/java/com/google/devtools/build/lib/remote/RemoteExternalOverlayFileSystem.java @@ -186,11 +186,13 @@ public void afterCommand() { */ public boolean injectRemoteRepo(RepositoryName repo, Tree remoteContents, String markerFile) throws IOException, InterruptedException { + var repoDir = externalDirectory.getChild(repo.getName()); + deleteTree(repoDir); + var unused = delete(externalDirectory.getChild(repo.getMarkerFileName())); var childMap = remoteContents.getChildrenList().stream() .collect( toImmutableMap(cache.digestUtil::compute, directory -> directory, (a, b) -> a)); - var repoDir = externalDirectory.getChild(repo.getName()); var filesToPrefetch = new ArrayList(); injectRecursively( externalFs, repoDir, remoteContents.getRoot(), childMap, filesToPrefetch::add); @@ -208,7 +210,7 @@ public boolean injectRemoteRepo(RepositoryName repo, Tree remoteContents, String } // Create the repo directory on disk so that readdir reflects the overlaid state of the external // directory. - nativeFs.createDirectoryAndParents(externalDirectory.getChild(repo.getName())); + nativeFs.createDirectoryAndParents(repoDir); // Keep the marker file contents in memory so that it can be written out when the repo is // materialized. This doubles as a presence marker for the in-memory repo contents. markerFileContents.put(repo.getName(), markerFile); diff --git a/src/main/java/com/google/devtools/build/lib/remote/RemoteModule.java b/src/main/java/com/google/devtools/build/lib/remote/RemoteModule.java index 7d4dc72c888cf1..d78c4cea26047a 100644 --- a/src/main/java/com/google/devtools/build/lib/remote/RemoteModule.java +++ b/src/main/java/com/google/devtools/build/lib/remote/RemoteModule.java @@ -762,6 +762,7 @@ public void beforeCommand(CommandEnvironment env) throws AbruptExitException { repositoryRemoteHelpersFactoryDelegate.init( new RepositoryRemoteHelpersFactoryImpl( + env.getDirectories(), actionContextProvider.getCombinedCache(), actionContextProvider.getRemoteExecutionClient(), buildRequestId, diff --git a/src/main/java/com/google/devtools/build/lib/remote/RemoteRepoContentsCacheImpl.java b/src/main/java/com/google/devtools/build/lib/remote/RemoteRepoContentsCacheImpl.java index 1391d1f2ca04f2..d37a2195bb072c 100644 --- a/src/main/java/com/google/devtools/build/lib/remote/RemoteRepoContentsCacheImpl.java +++ b/src/main/java/com/google/devtools/build/lib/remote/RemoteRepoContentsCacheImpl.java @@ -15,27 +15,40 @@ package com.google.devtools.build.lib.remote; import static com.google.common.collect.ImmutableList.toImmutableList; +import static com.google.common.collect.ImmutableSet.toImmutableSet; import static com.google.common.util.concurrent.Futures.immediateFuture; +import static com.google.common.util.concurrent.Futures.transformAsync; +import static com.google.common.util.concurrent.Futures.whenAllSucceed; import static com.google.common.util.concurrent.MoreExecutors.directExecutor; import static com.google.devtools.build.lib.remote.util.Utils.waitForBulkTransfer; +import static com.google.devtools.build.lib.unsafe.StringUnsafe.getInternalStringBytes; +import static java.nio.charset.StandardCharsets.ISO_8859_1; +import static java.util.stream.Collectors.joining; import build.bazel.remote.execution.v2.Action; +import build.bazel.remote.execution.v2.ActionResult; import build.bazel.remote.execution.v2.Command; +import build.bazel.remote.execution.v2.Digest; import build.bazel.remote.execution.v2.Directory; +import build.bazel.remote.execution.v2.OutputDirectory; +import build.bazel.remote.execution.v2.OutputFile; import build.bazel.remote.execution.v2.Platform; import build.bazel.remote.execution.v2.Tree; +import com.google.common.base.Preconditions; import com.google.common.base.Splitter; import com.google.common.base.Throwables; +import com.google.common.collect.Collections2; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableSet; import com.google.common.util.concurrent.Futures; import com.google.common.util.concurrent.ListenableFuture; import com.google.devtools.build.lib.actions.ActionInput; import com.google.devtools.build.lib.actions.ExecException; +import com.google.devtools.build.lib.actions.Spawn; +import com.google.devtools.build.lib.analysis.BlazeDirectories; import com.google.devtools.build.lib.cmdline.RepositoryName; import com.google.devtools.build.lib.events.Event; import com.google.devtools.build.lib.events.ExtendedEventHandler; -import com.google.devtools.build.lib.actions.Spawn; import com.google.devtools.build.lib.exec.SpawnInputExpander; import com.google.devtools.build.lib.exec.SpawnRunner; import com.google.devtools.build.lib.remote.common.RemoteActionExecutionContext; @@ -44,18 +57,24 @@ import com.google.devtools.build.lib.remote.common.RemotePathResolver; import com.google.devtools.build.lib.remote.util.DigestUtil; import com.google.devtools.build.lib.remote.util.TracingMetadataUtils; +import com.google.devtools.build.lib.rules.repository.RepoRecordedInput; import com.google.devtools.build.lib.runtime.RemoteRepoContentsCache; import com.google.devtools.build.lib.unsafe.StringUnsafe; +import com.google.devtools.build.lib.util.Fingerprint; import com.google.devtools.build.lib.vfs.FileSystemUtils; import com.google.devtools.build.lib.vfs.Path; import com.google.devtools.build.lib.vfs.PathFragment; +import com.google.devtools.build.skyframe.SkyFunction; import com.google.protobuf.ByteString; import java.io.IOException; -import java.nio.charset.StandardCharsets; import java.time.Instant; +import java.util.ArrayList; +import java.util.Collection; +import java.util.List; +import java.util.Optional; import java.util.SortedMap; import java.util.UUID; -import java.util.concurrent.ExecutionException; +import javax.annotation.Nullable; /** * A cache for the contents of external repositories that is backed by an ordinary remote cache. @@ -66,19 +85,32 @@ * action executed locally. This can save both time taken to execute repo rules and compute file * digests and disk space required to store the contents of external repositories. * - *

Repositories are cached as AC entries for a synthetic command with the predeclared input hash - * as the salt. The contents are represented as an output file for the marker file and an output - * directory for the contents. + *

Repositories are cached as AC entries for a synthetic command with a special hash as the salt. + * The contents are represented as an output file for the marker file and an output directory for + * the contents. + * + *

If a repo rule does not record any {@link RepoRecordedInput}s during its execution, this hash + * is just the predeclared inputs hash. Otherwise, the AC entry for the predeclared inputs hash will + * be an intermediate entry that lists one or more sets of {@link RepoRecordedInput}s that a + * previously cached repo consumed during the evaluation of its rule. The cache requests the current + * values of these inputs and computes the next hash to look up by a rolling construction that + * combines the previous hash with the string representations of the {@link + * RepoRecordedInput.WithValue}. This process is repeated until a final entry with the repo contents + * is found or no matching entry exists. * - *

At this point the cache only supports repository rules with no dependencies expressed at - * runtime. Verifying whether such dependencies are up to date can't be done via a single hash as - * the set of dependencies is not known ahead of time. Support for such rules would require a - * two-stage cache lookup in which the first lookup may produce multiple marker files. + *

By representing repos with recorded inputs as DAGs of AC entries, lookups are efficient (they + * don't scale with the number of cached repos per predeclared inputs hash) and regular LRU eviction + * policies remain effective for the most part. If a repo rule often requests different inputs even + * with the same predeclared inputs hash and previously requested inputs and values, it could result + * in large action results that grow over time. This is considered an acceptable trade-off for + * simplicity for now and could be mitigated in the future by an explicit GC mechanism such as + * "least recently added" eviction when the size of action result exceeds a certain threshold. */ public final class RemoteRepoContentsCacheImpl implements RemoteRepoContentsCache { private static final UUID GUID = UUID.fromString("f4a165a9-5557-45a7-bf25-230b6d42393a"); private static final String MARKER_FILE_PATH = ".recorded_inputs"; private static final String REPO_DIRECTORY_PATH = "repo_contents"; + private static final Splitter SPLIT_ON_SPACE = Splitter.on(' '); private static final Command COMMAND = Command.newBuilder() @@ -91,8 +123,10 @@ public final class RemoteRepoContentsCacheImpl implements RemoteRepoContentsCach .addOutputDirectories(REPO_DIRECTORY_PATH) .setPlatform(Platform.getDefaultInstance()) .build(); + private static final ByteString COMMAND_BYTES = COMMAND.toByteString(); private static final Directory INPUT_ROOT = Directory.getDefaultInstance(); + private final BlazeDirectories directories; private final CombinedCache cache; private final String buildRequestId; private final String commandId; @@ -101,17 +135,20 @@ public final class RemoteRepoContentsCacheImpl implements RemoteRepoContentsCach private final boolean verboseFailures; private final DigestUtil digestUtil; private final Action baseAction; + private final Digest commandDigest; public RemoteRepoContentsCacheImpl( + BlazeDirectories directories, CombinedCache cache, String buildRequestId, String commandId, boolean acceptCached, boolean uploadLocalResults, boolean verboseFailures) { + this.directories = directories; + this.cache = cache; this.buildRequestId = buildRequestId; this.commandId = commandId; - this.cache = cache; this.acceptCached = acceptCached; this.uploadLocalResults = uploadLocalResults; this.verboseFailures = verboseFailures; @@ -122,6 +159,7 @@ public RemoteRepoContentsCacheImpl( .setInputRootDigest(digestUtil.compute(INPUT_ROOT)) .setPlatform(Platform.getDefaultInstance()) .build(); + this.commandDigest = digestUtil.compute(COMMAND); } @Override @@ -139,32 +177,33 @@ public void addToCache( if (!(fetchedRepoDir.getFileSystem() instanceof RemoteExternalOverlayFileSystem)) { return; } - var context = buildContext(repoName); + var context = buildContext(repoName, CacheOp.UPLOAD); if (!context.getWriteCachePolicy().allowRemoteCache()) { return; } + List recordedInputValues; try { - if (FileSystemUtils.readLinesAsLatin1(fetchedRepoMarkerFile).stream() - .filter(line -> !line.isEmpty()) - .count() - != 1) { - // This cache currently only supports marker files that contain nothing but the predeclared - // inputs hash. Repo rules with dependencies expressed only at runtime would require a - // two-stage cache lookup. Among the rules that are supported are http_archive and - // git_repository without patches. + var maybeRecordedInputValues = + readMarkerFile( + FileSystemUtils.readContent(fetchedRepoMarkerFile, ISO_8859_1), predeclaredInputHash); + if (maybeRecordedInputValues.isEmpty()) { return; } + recordedInputValues = maybeRecordedInputValues.get(); } catch (IOException e) { reporter.handle( Event.warn( - "Failed to read marker file repo %s, skipping: %s" + "Failed to read marker file for repo %s, skipping: %s" .formatted(repoName, maybeGetStackTrace(e)))); + return; } - var action = buildAction(predeclaredInputHash); - var actionKey = new ActionKey(digestUtil.compute(action)); - var remotePathResolver = new RepoRemotePathResolver(fetchedRepoMarkerFile, fetchedRepoDir); try { // TODO: Consider uploading asynchronously. + var finalHash = + uploadIntermediateActionResults(context, predeclaredInputHash, recordedInputValues); + var action = buildAction(finalHash); + var actionKey = new ActionKey(digestUtil.compute(action)); + var remotePathResolver = new RepoRemotePathResolver(fetchedRepoMarkerFile, fetchedRepoDir); var unused = UploadManifest.create( /* remoteOptions= */ null, @@ -194,10 +233,10 @@ public boolean lookupCache( RepositoryName repoName, Path repoDir, String predeclaredInputHash, - ExtendedEventHandler reporter) + SkyFunction.Environment env) throws IOException, InterruptedException { try { - return doLookupCache(repoName, repoDir, predeclaredInputHash, reporter); + return doLookupCache(repoName, repoDir, predeclaredInputHash, env); } catch (IOException e) { throw new IOException( "Failed to look up repo %s in the remote repo contents cache: %s" @@ -210,43 +249,23 @@ private boolean doLookupCache( RepositoryName repoName, Path repoDir, String predeclaredInputHash, - ExtendedEventHandler reporter) + SkyFunction.Environment env) throws IOException, InterruptedException { if (!(repoDir.getFileSystem() instanceof RemoteExternalOverlayFileSystem remoteFs)) { return false; } - var context = buildContext(repoName); + var context = buildContext(repoName, CacheOp.DOWNLOAD); if (!context.getReadCachePolicy().allowRemoteCache()) { return false; } - var actionKey = new ActionKey(digestUtil.compute(buildAction(predeclaredInputHash))); - // The marker file is read right after and thus requested to be inlined. - var cachedActionResult = - cache.downloadActionResult( - context, actionKey, /* inlineOutErr= */ false, ImmutableSet.of(MARKER_FILE_PATH)); - if (cachedActionResult == null) { - return false; - } - - var actionResult = cachedActionResult.actionResult(); - if (actionResult.getExitCode() != 0 - || actionResult.getOutputFilesCount() != 1 - || actionResult.getOutputDirectoriesCount() != 1) { - reporter.handle( - Event.warn( - String.format( - "Unexpected action result for cached repo %s: exit code %d, %d output files, %d" - + " output directories", - repoName, - actionResult.getExitCode(), - actionResult.getOutputFilesCount(), - actionResult.getOutputDirectoriesCount()))); + var finalEntry = fetchFinalCacheEntry(env, context, predeclaredInputHash); + if (env.valuesMissing() || finalEntry == null) { return false; } ListenableFuture markerFileContentFuture; - var markerFile = actionResult.getOutputFiles(0); + var markerFile = finalEntry.markerFile(); // Inlining is an optional feature, so we have to be prepared to download the marker file. if (markerFile.getContents().isEmpty()) { markerFileContentFuture = @@ -255,69 +274,351 @@ private boolean doLookupCache( } else { markerFileContentFuture = immediateFuture(markerFile.getContents().toByteArray()); } - var repoDirectory = actionResult.getOutputDirectories(0); + var repoDirectory = finalEntry.repoDirectory(); var repoDirectoryContentFuture = - Futures.transformAsync( + transformAsync( cache.downloadBlob( context, REPO_DIRECTORY_PATH, /* execPath= */ null, repoDirectory.getTreeDigest()), (treeBytes) -> immediateFuture(Tree.parseFrom(treeBytes)), directExecutor()); waitForBulkTransfer(ImmutableList.of(markerFileContentFuture, repoDirectoryContentFuture)); - String markerFileContent; - Tree repoDirectoryContent; - try { - markerFileContent = new String(markerFileContentFuture.get(), StandardCharsets.ISO_8859_1); - repoDirectoryContent = repoDirectoryContentFuture.get(); - } catch (ExecutionException e) { - throw new IllegalStateException( - "waitForBulkTransfer should have thrown: " + maybeGetStackTrace(e)); - } - var markerFileLines = - Splitter.on('\n') - .splitToStream(markerFileContent) - .filter(line -> !line.isEmpty()) - .collect(toImmutableList()); - if (markerFileLines.size() > 1) { - reporter.handle( - Event.warn( - "Marker file for repo %s has extra lines, skipping:\n%s" - .formatted( - repoName, - String.join("\n", markerFileLines.subList(1, markerFileLines.size()))))); + + String markerFileContent = new String(markerFileContentFuture.resultNow(), ISO_8859_1); + var maybeRecordedInputs = readMarkerFile(markerFileContent, predeclaredInputHash); + if (maybeRecordedInputs.isEmpty()) { return false; } - if (!markerFileLines.getFirst().equals(predeclaredInputHash)) { - reporter.handle( - Event.warn( - "Predeclared input hash mismatch for repo %s: expected %s, got %s" - .formatted(repoName, predeclaredInputHash, markerFileLines.getFirst()))); + var outdatedReason = + RepoRecordedInput.isAnyValueOutdated(env, directories, maybeRecordedInputs.get()); + if (env.valuesMissing() || outdatedReason.isPresent()) { + env.getListener() + .handle( + Event.warn( + "Unexpectedly outdated cached repo %s: %s" + .formatted(repoName, outdatedReason.orElse("unknown reason")))); return false; } - return remoteFs.injectRemoteRepo(repoName, repoDirectoryContent, markerFileContent); + return remoteFs.injectRemoteRepo( + repoName, repoDirectoryContentFuture.resultNow(), markerFileContent); + } + + private enum CacheOp { + DOWNLOAD, + UPLOAD, } - private RemoteActionExecutionContext buildContext(RepositoryName repoName) { + private RemoteActionExecutionContext buildContext(RepositoryName repoName, CacheOp cacheOp) { var metadata = TracingMetadataUtils.buildMetadata( buildRequestId, commandId, repoName.getName(), /* actionMetadata= */ null); - // Don't use the disk cache as `--repo_contents_cache` is a strictly better alternative for - // local caching. + // Don't upload local repo contents to the disk cache as the (local) `--repo_contents_cache` is + // a better alternative for local caching. Do write through the disk cache for downloads from + // the remote cache to speed up future usage. return RemoteActionExecutionContext.create(metadata) - .withReadCachePolicy(acceptCached ? CachePolicy.REMOTE_CACHE_ONLY : CachePolicy.NO_CACHE) + .withReadCachePolicy(acceptCached ? CachePolicy.ANY_CACHE : CachePolicy.NO_CACHE) .withWriteCachePolicy( - uploadLocalResults ? CachePolicy.REMOTE_CACHE_ONLY : CachePolicy.NO_CACHE); + switch (cacheOp) { + case DOWNLOAD -> CachePolicy.ANY_CACHE; + case UPLOAD -> + uploadLocalResults ? CachePolicy.REMOTE_CACHE_ONLY : CachePolicy.NO_CACHE; + }); } - private Action buildAction(String predeclaredInputHash) { - // The predeclared input hash uniquely identifies the repo rule and all its attributes, but not - // dependencies established at runtime. We choose to embed it into the salt simply because that - // results in a constant Command message. + private Action buildAction(String inputHash) { + // We choose to embed the hash into the salt simply because that results in a constant Command + // message. return baseAction.toBuilder() - .setSalt(ByteString.copyFrom(StringUnsafe.getByteArray(predeclaredInputHash))) + .setSalt(ByteString.copyFrom(StringUnsafe.getByteArray(inputHash))) .build(); } + /** + * Uploads the intermediate action results representing the inputs recorded at runtime and returns + * the input hash to use for the final action result. + */ + private String uploadIntermediateActionResults( + RemoteActionExecutionContext context, + String predeclaredInputHash, + List recordedInputValues) + throws IOException, InterruptedException { + // The command is shared by all action results and small enough that FindMissingBlobs is not + // worthwhile. The REAPI spec requires the command to be uploaded before an action result that + // references it. + waitForBulkTransfer(ImmutableSet.of(cache.uploadBlob(context, commandDigest, COMMAND_BYTES))); + + String rollingHash = predeclaredInputHash; + var batches = RepoRecordedInput.WithValue.splitIntoBatches(recordedInputValues); + var futures = new ArrayList>(batches.size()); + for (var batch : batches) { + futures.add( + addToActionResult( + context, + buildAction(rollingHash), + Collections2.transform(batch, RepoRecordedInput.WithValue::input))); + for (var recordedInputValue : batch) { + rollingHash = rollForwardHash(rollingHash, recordedInputValue); + } + } + waitForBulkTransfer(futures); + return rollingHash; + } + + /** + * Adds the given set of recorded inputs as one of the alternative paths to the action result for + * the given action, if not already present. + * + *

Most repo rule evaluations with a fixed previous batch of hashes (in particular, the same + * predeclared inputs hash) will request a fixed set of inputs in the next batch. Thus, most + * intermediate action results will only contain a single set of recorded inputs. + */ + private ListenableFuture addToActionResult( + RemoteActionExecutionContext context, + Action action, + Collection newInputs) { + var actionKey = digestUtil.computeActionKey(action); + var currentInputsFuture = + Futures.transformAsync( + cache.downloadActionResultAsync( + context, actionKey, /* inlineOutErr= */ true, ImmutableSet.of()), + (currentResult) -> { + if (currentResult == null + || currentResult.actionResult().getStdoutDigest().getSizeBytes() == 0) { + return immediateFuture(""); + } + return fetchStdout(context, currentResult.actionResult()); + }, + directExecutor()); + return Futures.transformAsync( + currentInputsFuture, + currentInputsString -> { + // RepoRecordedInput.toString() is guaranteed to return a string that doesn't contain + // spaces or newlines. We can thus safely use spaces to separate inputs within a batch + // and newlines to separate different batches. + var newInputString = + newInputs.stream().map(RepoRecordedInput::toString).collect(joining(" ")); + if (currentInputsString.lines().anyMatch(newInputString::equals)) { + // The current batch of inputs is already present, no need to update the action result. + return immediateFuture(null); + } + // Add the new inputs to the top so that the most recently added inputs stay at the top. + // This could be used to implement a simple "least recently added" eviction strategy in + // the future in case the size of action results becomes a concern. + // + // Note that this update is inherently racy: multiple clients may add inputs concurrently, + // resulting in some added inputs being lost since the REAPI does not provide a way to + // update action results atomically. However, since different batches of inputs are + // already rare and them being added concurrently even more so, the temporary loss of a + // cache entry is an acceptable trade-off for simplicity. + var newInputsString = newInputString + '\n' + currentInputsString; + var stdoutBytes = getInternalStringBytes(newInputsString); + var stdoutDigest = digestUtil.compute(stdoutBytes); + var actionResult = + ActionResult.newBuilder().setExitCode(0).setStdoutDigest(stdoutDigest).build(); + return whenAllSucceed( + cache.uploadBlob(context, actionKey.getDigest(), action.toByteString()), + cache.uploadBlob(context, stdoutDigest, ByteString.copyFrom(stdoutBytes))) + .callAsync( + () -> cache.uploadActionResult(context, actionKey, actionResult), + directExecutor()); + }, + directExecutor()); + } + + /** Represents a single AC entry in the internal format used by the remote repo contents cache. */ + private sealed interface CacheEntry { + /** + * A final cache entry containing the contents of a repository. + * + *

Represented as an ActionResult with one output directory and one output file. + * + * @param repoDirectory the contents of the repository directory + * @param markerFile the contents of the repository's marker file + */ + record Final(OutputDirectory repoDirectory, OutputFile markerFile) implements CacheEntry {} + + /** + * An intermediate cache entry that points to the keys of any number of further AC entries, + * which can themselves be intermediate or final entries. The remote repo contents cache will + * try them in order. + * + * @param nextInputHashes the keys under which the next AC entries should be looked up + */ + record Intermediate(ImmutableList nextInputHashes) implements CacheEntry {} + + /** + * The cache entry didn't match any of the formats expected by this version of the remote repo + * contents cache for the given human-readable reason. + */ + record Invalid(String reason) implements CacheEntry {} + } + + /** + * Fetches a final cache entry for the given predeclared input hash by recursively following + * intermediate entries if needed or returns null if no final entry could be found or a Skyframe + * restart is needed. + */ + @Nullable + private CacheEntry.Final fetchFinalCacheEntry( + SkyFunction.Environment env, + RemoteActionExecutionContext context, + String predeclaredInputHash) + throws IOException, InterruptedException { + var currentHashes = ImmutableList.of(predeclaredInputHash); + while (!currentHashes.isEmpty()) { + var nextHashes = ImmutableList.builder(); + for (var hash : currentHashes) { + switch (fetchCacheEntry(env, context, hash)) { + case CacheEntry.Final finalEntry -> { + return finalEntry; + } + case CacheEntry.Intermediate(ImmutableList nextInputHashes) -> + nextHashes.addAll(nextInputHashes); + case CacheEntry.Invalid(String reason) -> env.getListener().handle(Event.warn(reason)); + case null -> { + // Keep checking hashes to batch missing values in fewer restarts. + Preconditions.checkState(env.valuesMissing()); + } + } + } + if (env.valuesMissing()) { + return null; + } + currentHashes = nextHashes.build(); + } + return null; + } + + // Returns null if and only if values are missing. + @Nullable + private CacheEntry fetchCacheEntry( + SkyFunction.Environment env, RemoteActionExecutionContext context, String inputHash) + throws IOException, InterruptedException { + var actionKey = new ActionKey(digestUtil.compute(buildAction(inputHash))); + // The marker file is read right after and thus requested to be inlined. If the action result + // is an intermediate node, the full result will be contained in the stdout, which should thus + // also be inlined. + var cachedActionResult = + cache.downloadActionResult( + context, actionKey, /* inlineOutErr= */ true, ImmutableSet.of(MARKER_FILE_PATH)); + if (cachedActionResult == null) { + return new CacheEntry.Intermediate(ImmutableList.of()); + } + var actionResult = cachedActionResult.actionResult(); + + if (actionResult.getExitCode() != 0) { + return new CacheEntry.Invalid( + "Unexpected exit code in action result for remotely cached repo %s:\n%s" + .formatted(context.getRequestMetadata().getActionId(), actionResult)); + } + if (actionResult.getOutputFilesCount() == 1 + && actionResult.getOutputDirectoriesCount() == 1 + && actionResult.getOutputSymlinksCount() == 0) { + return new CacheEntry.Final( + actionResult.getOutputDirectories(0), actionResult.getOutputFiles(0)); + } + if (!(actionResult.getOutputFilesCount() == 0 + && actionResult.getOutputDirectoriesCount() == 0 + && actionResult.getOutputSymlinksCount() == 0 + && actionResult.getStdoutDigest().getSizeBytes() > 0)) { + return new CacheEntry.Invalid( + "Unexpected intermediate action result for remotely cached repo %s:\n%s" + .formatted(context.getRequestMetadata().getActionId(), actionResult)); + } + var stdoutFuture = fetchStdout(context, actionResult); + waitForBulkTransfer(ImmutableList.of(stdoutFuture)); + + // The action result's stdout contains multiple lines, each representing a batch of + // RepoRecordedInputs separated by spaces. A given batch is valid only if all inputs in the + // batch are, but separate batches are tried independently. + var nextInputBatches = + stdoutFuture + .resultNow() + .lines() + .map( + line -> + SPLIT_ON_SPACE + .splitToStream(line) + .map(RepoRecordedInput::parse) + .collect(toImmutableList())) + .collect(toImmutableList()); + var uniqueNextInputs = + nextInputBatches.stream().flatMap(List::stream).collect(toImmutableSet()); + RepoRecordedInput.prefetch(env, directories, uniqueNextInputs); + if (env.valuesMissing()) { + return null; + } + var nextHashes = ImmutableList.builder(); + nextBatch: + for (var batch : nextInputBatches) { + var rollingHash = inputHash; + for (var input : batch) { + var value = input.getValue(env, directories); + // Values have been prefetched above. + Preconditions.checkState(!env.valuesMissing()); + if (!(value instanceof RepoRecordedInput.MaybeValue.Valid(String valueString))) { + continue nextBatch; + } + rollingHash = + rollForwardHash(rollingHash, new RepoRecordedInput.WithValue(input, valueString)); + } + nextHashes.add(rollingHash); + } + return new CacheEntry.Intermediate(nextHashes.build()); + } + + private String rollForwardHash(String hash, RepoRecordedInput.WithValue inputWithValue) { + return new Fingerprint() + .addString(hash) + .addString(inputWithValue.toString()) + .hexDigestAndReset(); + } + + private ListenableFuture fetchStdout( + RemoteActionExecutionContext context, ActionResult actionResult) { + if (!actionResult.getStdoutRaw().isEmpty()) { + return immediateFuture( + StringUnsafe.newInstance(actionResult.getStdoutRaw().toByteArray(), StringUnsafe.LATIN1)); + } + return Futures.transform( + cache.downloadBlob(context, actionResult.getStdoutDigest()), + stdout -> StringUnsafe.newInstance(stdout, StringUnsafe.LATIN1), + directExecutor()); + } + + /** + * Parses a marker file written by {@code DigestWriter}: the first non-empty line is the + * predeclared input hash, followed by one serialized {@link RepoRecordedInput.WithValue} per line. + * Returns an empty {@link Optional} if the first line doesn't match the expected hash or any + * subsequent line fails to parse. + */ + private static Optional> readMarkerFile( + String content, String predeclaredInputHash) { + var recordedInputValues = ImmutableList.builder(); + boolean firstLineVerified = false; + for (String line : Splitter.on('\n').split(content)) { + if (line.isEmpty()) { + continue; + } + if (!firstLineVerified) { + if (!line.equals(predeclaredInputHash)) { + return Optional.empty(); + } + firstLineVerified = true; + } else { + Optional withValue = RepoRecordedInput.WithValue.parse(line); + if (withValue.isEmpty()) { + return Optional.empty(); + } + recordedInputValues.add(withValue.get()); + } + } + if (!firstLineVerified) { + return Optional.empty(); + } + return Optional.of(recordedInputValues.build()); + } + private String maybeGetStackTrace(Exception e) { return verboseFailures ? Throwables.getStackTraceAsString(e) : e.getMessage(); } diff --git a/src/main/java/com/google/devtools/build/lib/remote/RepositoryRemoteHelpersFactoryImpl.java b/src/main/java/com/google/devtools/build/lib/remote/RepositoryRemoteHelpersFactoryImpl.java index 2b4035128f7b98..eec030b4376da4 100644 --- a/src/main/java/com/google/devtools/build/lib/remote/RepositoryRemoteHelpersFactoryImpl.java +++ b/src/main/java/com/google/devtools/build/lib/remote/RepositoryRemoteHelpersFactoryImpl.java @@ -13,6 +13,7 @@ // limitations under the License. package com.google.devtools.build.lib.remote; +import com.google.devtools.build.lib.analysis.BlazeDirectories; import com.google.devtools.build.lib.remote.common.RemoteExecutionClient; import com.google.devtools.build.lib.runtime.RemoteRepoContentsCache; import com.google.devtools.build.lib.runtime.RepositoryRemoteExecutor; @@ -22,6 +23,7 @@ /** Factory for {@link RemoteRepositoryRemoteExecutor} and {@link RemoteRepoContentsCacheImpl}. */ class RepositoryRemoteHelpersFactoryImpl implements RepositoryRemoteHelpersFactory { + private final BlazeDirectories directories; private final CombinedCache cache; @Nullable private final RemoteExecutionClient remoteExecutor; private final String buildRequestId; @@ -33,6 +35,7 @@ class RepositoryRemoteHelpersFactoryImpl implements RepositoryRemoteHelpersFacto private final boolean verboseFailures; RepositoryRemoteHelpersFactoryImpl( + BlazeDirectories directories, CombinedCache cache, @Nullable RemoteExecutionClient remoteExecutor, String buildRequestId, @@ -41,6 +44,7 @@ class RepositoryRemoteHelpersFactoryImpl implements RepositoryRemoteHelpersFacto boolean acceptCached, boolean uploadLocalResults, boolean verboseFailures) { + this.directories = directories; this.cache = cache; this.remoteExecutor = remoteExecutor; this.buildRequestId = buildRequestId; @@ -71,6 +75,12 @@ public RepositoryRemoteExecutor createExecutor() { @Override public RemoteRepoContentsCache createRepoContentsCache() { return new RemoteRepoContentsCacheImpl( - cache, buildRequestId, commandId, acceptCached, uploadLocalResults, verboseFailures); + directories, + cache, + buildRequestId, + commandId, + acceptCached, + uploadLocalResults, + verboseFailures); } } diff --git a/src/main/java/com/google/devtools/build/lib/rules/repository/RepositoryDelegatorFunction.java b/src/main/java/com/google/devtools/build/lib/rules/repository/RepositoryDelegatorFunction.java index 878429eb7252d4..22fec8e4a3a0ff 100644 --- a/src/main/java/com/google/devtools/build/lib/rules/repository/RepositoryDelegatorFunction.java +++ b/src/main/java/com/google/devtools/build/lib/rules/repository/RepositoryDelegatorFunction.java @@ -267,8 +267,13 @@ public SkyValue compute(SkyKey skyKey, Environment env) if (remoteRepoContentsCache != null) { try { - if (remoteRepoContentsCache.lookupCache( - repositoryName, repoRoot, digestWriter.predeclaredInputHash, env.getListener())) { + boolean cacheHit = + remoteRepoContentsCache.lookupCache( + repositoryName, repoRoot, digestWriter.predeclaredInputHash, env); + if (env.valuesMissing()) { + return null; + } + if (cacheHit) { return new RepositoryDirectoryValue.Success( repoRoot, /* isFetchingDelayed= */ false, excludeRepoFromVendoring); } diff --git a/src/main/java/com/google/devtools/build/lib/runtime/RemoteRepoContentsCache.java b/src/main/java/com/google/devtools/build/lib/runtime/RemoteRepoContentsCache.java index 8c4f0f25d1cf41..663c2472efdc07 100644 --- a/src/main/java/com/google/devtools/build/lib/runtime/RemoteRepoContentsCache.java +++ b/src/main/java/com/google/devtools/build/lib/runtime/RemoteRepoContentsCache.java @@ -17,6 +17,7 @@ import com.google.devtools.build.lib.cmdline.RepositoryName; import com.google.devtools.build.lib.events.ExtendedEventHandler; import com.google.devtools.build.lib.vfs.Path; +import com.google.devtools.build.skyframe.SkyFunction; import java.io.IOException; /** A remote cache for the contents of external repositories. */ @@ -33,6 +34,8 @@ void addToCache( /** * Retrieves a repository from the remote cache if possible. * + *

Callers have to check {@code env.valuesMissing()} after this method returns. + * * @return true if there was a cache hit and the repository has been fetched into the given * directory. */ @@ -40,6 +43,6 @@ boolean lookupCache( RepositoryName repoName, Path repoDir, String predeclaredInputHash, - ExtendedEventHandler reporter) + SkyFunction.Environment env) throws IOException, InterruptedException; } diff --git a/src/test/py/bazel/bzlmod/remote_repo_contents_cache_test.py b/src/test/py/bazel/bzlmod/remote_repo_contents_cache_test.py index 0e08858ec1d0b8..64a0fe68ded180 100644 --- a/src/test/py/bazel/bzlmod/remote_repo_contents_cache_test.py +++ b/src/test/py/bazel/bzlmod/remote_repo_contents_cache_test.py @@ -251,14 +251,12 @@ def testNotCachedWhenRecordedInputsChange_dynamicDep(self): self.assertIn('JUST FETCHED', '\n'.join(stderr)) self.assertTrue(os.path.exists(os.path.join(repo_dir, 'BUILD'))) - # Change back to previous recorded inputs: not cached - # TODO: This is the current behavior, but it's not desired. Support for - # caching repos with dynamic deps should be added. + # Change back to previous recorded inputs: cached (even after expunging) self.RunBazel(['clean', '--expunge']) self.ScratchFile('data.txt', ['one']) _, _, stderr = self.RunBazel(['build', '@my_repo//:haha']) - self.assertIn('JUST FETCHED', '\n'.join(stderr)) - self.assertTrue(os.path.exists(os.path.join(repo_dir, 'BUILD'))) + self.assertNotIn('JUST FETCHED', '\n'.join(stderr)) + self.assertFalse(os.path.exists(os.path.join(repo_dir, 'BUILD'))) def testNotCachedWhenRecordedInputsChange_staticDep(self): self.ScratchFile( @@ -305,6 +303,211 @@ def testNotCachedWhenRecordedInputsChange_staticDep(self): self.assertNotIn('JUST FETCHED', '\n'.join(stderr)) self.assertFalse(os.path.exists(os.path.join(repo_dir, 'BUILD'))) + def testRecordedInputs_differentValues(self): + platform_file = self.ScratchFile('platform.txt') + + self.ScratchFile( + 'MODULE.bazel', + [ + ( + 'platform_dependent_repo =' + ' use_repo_rule("//:platform_dependent_repo.bzl",' + ' "platform_dependent_repo")' + ), + 'platform_dependent_repo(name = "platform_dependent_repo")', + 'repo = use_repo_rule("//:repo.bzl", "repo")', + 'repo(name = "my_repo")', + ], + ) + self.ScratchFile( + 'BUILD.bazel', + [ + 'genrule(', + ' name = "show_platform",', + ' outs = ["platform.txt"],', + ' cmd = "cat $(location @my_repo//:data.txt) > $@",', + ' srcs = ["@my_repo//:data.txt"],', + ')', + ], + ) + self.ScratchFile( + 'platform_dependent_repo.bzl', + [ + 'def _platform_dependent_repo_impl(rctx):', + ' rctx.file("BUILD")', + ' print("DETERMINING PLATFORM")', + ' platform = rctx.read(rctx.path("%s"))' + % platform_file.replace('\\', '\\\\'), + ' rctx.file("data.txt", platform)', + ( + 'platform_dependent_repo =' + ' repository_rule(_platform_dependent_repo_impl)' + ), + ], + ) + self.ScratchFile( + 'repo.bzl', + [ + 'def _repo_impl(rctx):', + ' rctx.file("BUILD", "exports_files([\'data.txt\'])")', + ( + ' platform =' + ' rctx.read(Label("@platform_dependent_repo//:data.txt"))' + ), + ' print("JUST FETCHED ON " + platform)', + ' rctx.file("data.txt", platform)', + ' return rctx.repo_metadata(reproducible=True)', + 'repo = repository_rule(_repo_impl)', + ], + ) + + repo_dir = self.RepoDir('my_repo') + + # First fetch on Linux: not cached + self.ScratchFile('platform.txt', ['Linux']) + _, _, stderr = self.RunBazel(['build', '//:show_platform']) + self.assertIn('DETERMINING PLATFORM', '\n'.join(stderr)) + self.assertIn('JUST FETCHED ON Linux', '\n'.join(stderr)) + self.assertTrue(os.path.exists(os.path.join(repo_dir, 'BUILD'))) + with open(self.Path('bazel-bin/platform.txt')) as f: + self.assertEqual(f.read().strip(), 'Linux') + + # First fetch on macOS: not cached + self.ScratchFile('platform.txt', ['macOS']) + _, _, stderr = self.RunBazel(['build', '//:show_platform']) + self.assertIn('DETERMINING PLATFORM', '\n'.join(stderr)) + self.assertIn('JUST FETCHED ON macOS', '\n'.join(stderr)) + self.assertTrue(os.path.exists(os.path.join(repo_dir, 'BUILD'))) + with open(self.Path('bazel-bin/platform.txt')) as f: + self.assertEqual(f.read().strip(), 'macOS') + + # Second fetch on Linux: cached + self.ScratchFile('platform.txt', ['Linux']) + _, _, stderr = self.RunBazel(['build', '//:show_platform']) + self.assertIn('DETERMINING PLATFORM', '\n'.join(stderr)) + self.assertNotIn('JUST FETCHED', '\n'.join(stderr)) + self.assertFalse(os.path.exists(os.path.join(repo_dir, 'BUILD'))) + with open(self.Path('bazel-bin/platform.txt')) as f: + self.assertEqual(f.read().strip(), 'Linux') + + # Second fetch on macOS: cached + self.ScratchFile('platform.txt', ['macOS']) + _, _, stderr = self.RunBazel(['build', '//:show_platform']) + self.assertIn('DETERMINING PLATFORM', '\n'.join(stderr)) + self.assertNotIn('JUST FETCHED', '\n'.join(stderr)) + self.assertFalse(os.path.exists(os.path.join(repo_dir, 'BUILD'))) + with open(self.Path('bazel-bin/platform.txt')) as f: + self.assertEqual(f.read().strip(), 'macOS') + + def testRecordedInputs_differentInputs(self): + platform_file = self.ScratchFile('platform.txt') + + self.ScratchFile( + 'MODULE.bazel', + [ + ( + 'platform_dependent_binary =' + ' use_repo_rule("//:platform_dependent_binary.bzl",' + ' "platform_dependent_binary")' + ), + 'platform_dependent_binary(name = "platform_dependent_binary")', + 'repo = use_repo_rule("//:repo.bzl", "repo")', + 'repo(name = "my_repo")', + ], + ) + self.ScratchFile( + 'BUILD.bazel', + [ + 'genrule(', + ' name = "show_data",', + ' outs = ["data.txt"],', + ' cmd = "cat $(location @my_repo//:data.txt) > $@",', + ' srcs = ["@my_repo//:data.txt"],', + ')', + ], + ) + self.ScratchFile( + 'platform_dependent_binary.bzl', + [ + 'def _platform_dependent_binary_impl(rctx):', + ' rctx.file("BUILD")', + ' platform = rctx.read(rctx.path("%s")).strip()' + % platform_file.replace('\\', '\\\\'), + ' print("DETERMINED PLATFORM (%s)" % platform)', + ' if platform == "Windows":', + ' rctx.file("binary.exe", "PE")', + ' else:', + ' rctx.file("binary.sh", "ELF")', + ( + 'platform_dependent_binary =' + ' repository_rule(_platform_dependent_binary_impl)' + ), + ], + ) + self.ScratchFile( + 'repo.bzl', + [ + 'def _repo_impl(rctx):', + ' rctx.file("BUILD", "exports_files([\'data.txt\'])")', + # Simulate a uname -s by reading from a file instead of using + # rctx.execute (more complex to mock) or rctx.os.name (which may be + # tracked as an input in the future, making this test vacuous). + ' platform = rctx.read(rctx.path("%s"), watch = "no").strip()' + % platform_file.replace('\\', '\\\\'), + ' ext = ".exe" if platform == "Windows" else ".sh"', + # Simulate rctx.execute with a watched binary. + ( + ' out = rctx.read(Label("@platform_dependent_binary//:binary"' + ' + ext))' + ), + ' rctx.file("data.txt", out)', + ' print("JUST FETCHED")', + ' return rctx.repo_metadata(reproducible=True)', + 'repo = repository_rule(_repo_impl)', + ], + ) + + repo_dir = self.RepoDir('my_repo') + + # First fetch on Linux: not cached + self.ScratchFile('platform.txt', ['Linux']) + _, _, stderr = self.RunBazel(['build', '//:show_data']) + self.assertIn('DETERMINED PLATFORM (Linux)', '\n'.join(stderr)) + self.assertIn('JUST FETCHED', '\n'.join(stderr)) + self.assertTrue(os.path.exists(os.path.join(repo_dir, 'BUILD'))) + with open(self.Path('bazel-bin/data.txt')) as f: + self.assertEqual(f.read().strip(), 'ELF') + + # First fetch on Windows: not cached + self.RunBazel(['clean', '--expunge']) + self.ScratchFile('platform.txt', ['Windows']) + _, _, stderr = self.RunBazel(['build', '//:show_data']) + self.assertIn('DETERMINED PLATFORM (Windows)', '\n'.join(stderr)) + self.assertIn('JUST FETCHED', '\n'.join(stderr)) + self.assertTrue(os.path.exists(os.path.join(repo_dir, 'BUILD'))) + with open(self.Path('bazel-bin/data.txt')) as f: + self.assertEqual(f.read().strip(), 'PE') + + # Second fetch on Linux: cached + self.RunBazel(['clean', '--expunge']) + self.ScratchFile('platform.txt', ['Linux']) + _, _, stderr = self.RunBazel(['build', '//:show_data']) + self.assertIn('DETERMINED PLATFORM (Linux)', '\n'.join(stderr)) + self.assertNotIn('JUST FETCHED', '\n'.join(stderr)) + self.assertFalse(os.path.exists(os.path.join(repo_dir, 'BUILD'))) + with open(self.Path('bazel-bin/data.txt')) as f: + self.assertEqual(f.read().strip(), 'ELF') + + # Second fetch on Windows: cached + self.RunBazel(['clean', '--expunge']) + self.ScratchFile('platform.txt', ['Windows']) + _, _, stderr = self.RunBazel(['build', '//:show_data']) + self.assertIn('DETERMINED PLATFORM (Windows)', '\n'.join(stderr)) + self.assertNotIn('JUST FETCHED', '\n'.join(stderr)) + self.assertFalse(os.path.exists(os.path.join(repo_dir, 'BUILD'))) + with open(self.Path('bazel-bin/data.txt')) as f: + self.assertEqual(f.read().strip(), 'PE') + def testNoThrashingBetweenWorkspaces(self): module_bazel_lines = [ 'repo = use_repo_rule("//:repo.bzl", "repo")', @@ -810,6 +1013,72 @@ def testRun(self): repo_dir = self.RepoDir('buildozer') self.assertFalse(os.path.exists(os.path.join(repo_dir, 'MODULE.bazel'))) + def testReverseDependencyDirection(self): + # Set up two repos that retain their predeclared input hashes across two + # builds but still reverse their dependency direction. Depending on how repo + # cache candidates are checked, this could lead to a Skyframe cycle. + self.ScratchFile( + 'MODULE.bazel', + [ + 'repo = use_repo_rule("//:repo.bzl", "repo")', + 'repo(', + ' name = "foo",', + ' deps_file = "//:foo_deps.txt",', + ')', + 'repo(', + ' name = "bar",', + ' deps_file = "//:bar_deps.txt",', + ')', + ], + ) + self.ScratchFile('BUILD.bazel') + self.ScratchFile( + 'repo.bzl', + [ + 'def _repo_impl(rctx):', + ' deps = rctx.read(rctx.attr.deps_file).splitlines()', + ' output = ""', + ' for dep in deps:', + ' if dep:', + ' output += "{}: {}\\n".format(dep, rctx.read(Label(dep)))', + ' rctx.file("output.txt", output)', + ' rctx.file("BUILD", "exports_files([\'output.txt\'])")', + ' print("JUST FETCHED: %s" % rctx.original_name)', + ' return rctx.repo_metadata(reproducible=True)', + 'repo = repository_rule(', + ' implementation = _repo_impl,', + ' attrs = {', + ' "deps_file": attr.label(), }', + ')', + ], + ) + + self.ScratchFile('foo_deps.txt', ['@bar//:output.txt']) + self.ScratchFile('bar_deps.txt', ['']) + + # First fetch: not cached + _, _, stderr = self.RunBazel(['build', '@foo//:output.txt']) + stderr = '\n'.join(stderr) + self.assertIn('JUST FETCHED: bar', stderr) + self.assertIn('JUST FETCHED: foo', stderr) + + # After expunging and reversing the dependency direction: not cached + self.RunBazel(['clean', '--expunge']) + self.ScratchFile('foo_deps.txt', ['']) + self.ScratchFile('bar_deps.txt', ['@foo//:output.txt']) + _, _, stderr = self.RunBazel(['build', '@foo//:output.txt']) + stderr = '\n'.join(stderr) + self.assertIn('JUST FETCHED: foo', stderr) + self.assertNotIn('JUST FETCHED: bar', stderr) + + # After expunging and reversing the dependency direction: both cached + self.RunBazel(['clean', '--expunge']) + self.ScratchFile('foo_deps.txt', ['@bar//:output.txt']) + self.ScratchFile('bar_deps.txt', ['']) + _, _, stderr = self.RunBazel(['build', '@foo//:output.txt']) + stderr = '\n'.join(stderr) + self.assertNotIn('JUST FETCHED', stderr) + if __name__ == '__main__': absltest.main()