diff --git a/src/main/java/com/google/devtools/build/lib/BUILD b/src/main/java/com/google/devtools/build/lib/BUILD index 82f62b05e62f58..4ad86766f353fc 100644 --- a/src/main/java/com/google/devtools/build/lib/BUILD +++ b/src/main/java/com/google/devtools/build/lib/BUILD @@ -288,6 +288,7 @@ java_library( "//src/main/java/com/google/devtools/build/lib/cmdline", "//src/main/java/com/google/devtools/build/lib/events", "//src/main/java/com/google/devtools/build/lib/vfs", + "//src/main/java/com/google/devtools/build/skyframe:skyframe-objects", ], ) diff --git a/src/main/java/com/google/devtools/build/lib/actions/ActionInputPrefetcher.java b/src/main/java/com/google/devtools/build/lib/actions/ActionInputPrefetcher.java index f1ed84d33fe159..a0152db7ef766e 100644 --- a/src/main/java/com/google/devtools/build/lib/actions/ActionInputPrefetcher.java +++ b/src/main/java/com/google/devtools/build/lib/actions/ActionInputPrefetcher.java @@ -17,6 +17,7 @@ import com.google.common.util.concurrent.ListenableFuture; import java.io.IOException; +import javax.annotation.Nullable; /** Prefetches files to local disk. */ public interface ActionInputPrefetcher { @@ -34,7 +35,7 @@ public interface MetadataSupplier { new ActionInputPrefetcher() { @Override public ListenableFuture prefetchFiles( - ActionExecutionMetadata action, + @Nullable ActionExecutionMetadata action, Iterable inputs, MetadataSupplier metadataSupplier, Priority priority, @@ -90,7 +91,7 @@ enum Reason { * @return future success if prefetch is finished or {@link IOException}. */ ListenableFuture prefetchFiles( - ActionExecutionMetadata action, + @Nullable ActionExecutionMetadata action, Iterable inputs, MetadataSupplier metadataSupplier, Priority priority, diff --git a/src/main/java/com/google/devtools/build/lib/analysis/BlazeDirectories.java b/src/main/java/com/google/devtools/build/lib/analysis/BlazeDirectories.java index 83f417c9a34f7c..ad781f0a63ca87 100644 --- a/src/main/java/com/google/devtools/build/lib/analysis/BlazeDirectories.java +++ b/src/main/java/com/google/devtools/build/lib/analysis/BlazeDirectories.java @@ -141,6 +141,7 @@ public Path getWorkspace() { } /** Returns working directory of the server. */ + @Nullable public Path getWorkingDirectory() { return workspace; } diff --git a/src/main/java/com/google/devtools/build/lib/bazel/BazelRepositoryModule.java b/src/main/java/com/google/devtools/build/lib/bazel/BazelRepositoryModule.java index 98c8ac2f3bc402..e8d3ce7c49cd80 100644 --- a/src/main/java/com/google/devtools/build/lib/bazel/BazelRepositoryModule.java +++ b/src/main/java/com/google/devtools/build/lib/bazel/BazelRepositoryModule.java @@ -720,10 +720,18 @@ private String getAbsolutePath(String path, CommandEnvironment env) { */ @Nullable private Path toPath(PathFragment path, CommandEnvironment env) { - if (path.isEmpty() || env.getBlazeWorkspace().getWorkspace() == null) { + if (path.isEmpty() || env.getDirectories().getWorkspace() == null) { return null; } - return env.getBlazeWorkspace().getWorkspace().getRelative(path); + // It is important to use getWorkspace() here, not getWorkingDirectory(). Both Paths have the + // same underlying PathFragment, but may differ in their FileSystem if the remote repo contents + // cache is in use. getWorkspace() uses the same FileSystem as everything other than the + // workspace directory, while getWorkingDirectory() uses the workspace directory's FileSystem. + // Even though the users of the returned Path may end up writing to it, they are not expected to + // update source files within the workspace. Thus, the correct FileSystem is the one from + // getWorkspace(), which e.g. allows moves from the external directory under the output base to + // the local repo contents cache without crossing FileSystems. + return env.getDirectories().getWorkspace().getRelative(path); } @Override diff --git a/src/main/java/com/google/devtools/build/lib/bazel/bzlmod/ModuleExtensionContext.java b/src/main/java/com/google/devtools/build/lib/bazel/bzlmod/ModuleExtensionContext.java index 5c7b8de8a1d6c7..812fe8f27b7e08 100644 --- a/src/main/java/com/google/devtools/build/lib/bazel/bzlmod/ModuleExtensionContext.java +++ b/src/main/java/com/google/devtools/build/lib/bazel/bzlmod/ModuleExtensionContext.java @@ -91,7 +91,7 @@ protected ModuleExtensionContext( this.rootModuleHasNonDevDependency = rootModuleHasNonDevDependency; // Record inputs to the extension that are known prior to evaluation. RepoRecordedInput.EnvVar.wrap(staticEnvVars) - .forEach((input, value) -> recordInput(input, value.orElse(null))); + .forEach((input, value) -> recordInputWithValue(input, value.orElse(null))); repoMappingRecorder.record(staticRepoMappingEntries); } diff --git a/src/main/java/com/google/devtools/build/lib/bazel/bzlmod/SingleExtensionEvalFunction.java b/src/main/java/com/google/devtools/build/lib/bazel/bzlmod/SingleExtensionEvalFunction.java index c83afc25132cf3..cd9e163f8d8f9d 100644 --- a/src/main/java/com/google/devtools/build/lib/bazel/bzlmod/SingleExtensionEvalFunction.java +++ b/src/main/java/com/google/devtools/build/lib/bazel/bzlmod/SingleExtensionEvalFunction.java @@ -395,12 +395,18 @@ private static Optional didRecordedInputsChange( BlazeDirectories directories, List recordedInputs) throws InterruptedException, NeedsSkyframeRestartException { - Optional outdated = - RepoRecordedInput.isAnyValueOutdated(env, directories, recordedInputs); - if (env.valuesMissing()) { - throw new NeedsSkyframeRestartException(); + // Check inputs in batches to prevent Skyframe cycles caused by outdated dependencies. + for (ImmutableList batch : + RepoRecordedInput.WithValue.splitIntoBatches(recordedInputs)) { + Optional outdated = RepoRecordedInput.isAnyValueOutdated(env, directories, batch); + if (env.valuesMissing()) { + throw new NeedsSkyframeRestartException(); + } + if (outdated.isPresent()) { + return outdated; + } } - return outdated; + return Optional.empty(); } private SingleExtensionValue createSingleExtensionValue( diff --git a/src/main/java/com/google/devtools/build/lib/bazel/repository/cache/LocalRepoContentsCache.java b/src/main/java/com/google/devtools/build/lib/bazel/repository/cache/LocalRepoContentsCache.java index 89af2cde2a1f14..df98556b784b45 100644 --- a/src/main/java/com/google/devtools/build/lib/bazel/repository/cache/LocalRepoContentsCache.java +++ b/src/main/java/com/google/devtools/build/lib/bazel/repository/cache/LocalRepoContentsCache.java @@ -154,9 +154,9 @@ private Path ensureTrashDir() throws IOException { /** * Moves a freshly fetched repo into the contents cache. * - * @return the repo dir in the contents cache. + * @return the new cache entry */ - public Path moveToCache( + public CandidateRepo moveToCache( Path fetchedRepoDir, Path fetchedRepoMarkerFile, String predeclaredInputHash) throws IOException { Preconditions.checkState(path != null); @@ -183,7 +183,7 @@ public Path moveToCache( // Set up a symlink at the original fetched repo dir path. fetchedRepoDir.deleteTree(); FileSystemUtils.ensureSymbolicLink(fetchedRepoDir, cacheRepoDir); - return cacheRepoDir; + return new CandidateRepo(cacheRecordedInputsFile, cacheRepoDir); } public void acquireSharedLock() throws IOException, InterruptedException { diff --git a/src/main/java/com/google/devtools/build/lib/bazel/repository/starlark/StarlarkBaseExternalContext.java b/src/main/java/com/google/devtools/build/lib/bazel/repository/starlark/StarlarkBaseExternalContext.java index 5cf3e3069fcf0d..cb7893673dd8b1 100644 --- a/src/main/java/com/google/devtools/build/lib/bazel/repository/starlark/StarlarkBaseExternalContext.java +++ b/src/main/java/com/google/devtools/build/lib/bazel/repository/starlark/StarlarkBaseExternalContext.java @@ -23,13 +23,10 @@ import com.google.common.base.Strings; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; -import com.google.common.collect.ImmutableSet; import com.google.common.collect.ImmutableSortedMap; -import com.google.common.collect.Iterables; import com.google.common.collect.Maps; import com.google.common.flogger.GoogleLogger; import com.google.common.util.concurrent.Futures; -import com.google.devtools.build.lib.actions.FileValue; import com.google.devtools.build.lib.analysis.BlazeDirectories; import com.google.devtools.build.lib.bazel.debug.WorkspaceRuleEvent; import com.google.devtools.build.lib.bazel.repository.DecompressorDescriptor; @@ -55,6 +52,7 @@ import com.google.devtools.build.lib.remote.RemoteExternalOverlayFileSystem; import com.google.devtools.build.lib.rules.repository.NeedsSkyframeRestartException; import com.google.devtools.build.lib.rules.repository.RepoRecordedInput; +import com.google.devtools.build.lib.rules.repository.RepoRecordedInput.MaybeValue; import com.google.devtools.build.lib.rules.repository.RepoRecordedInput.RepoCacheFriendlyPath; import com.google.devtools.build.lib.rules.repository.RepositoryFunction; import com.google.devtools.build.lib.rules.repository.RepositoryFunction.RepositoryFunctionException; @@ -71,6 +69,7 @@ import com.google.devtools.build.lib.vfs.Symlinks; import com.google.devtools.build.skyframe.SkyFunction.Environment; import com.google.devtools.build.skyframe.SkyFunctionException.Transience; +import com.google.errorprone.annotations.CanIgnoreReturnValue; import com.google.errorprone.annotations.ForOverride; import java.io.File; import java.io.IOException; @@ -212,7 +211,7 @@ protected StarlarkBaseExternalContext( // apparent repo names to canonical repo names. See #20721 for why this is necessary. this.repoMappingRecorder = (fromRepo, apparentRepoName, canonicalRepoName) -> - recordInput( + recordInputWithValue( new RepoRecordedInput.RecordedRepoMapping(fromRepo, apparentRepoName), canonicalRepoName.isVisible() ? canonicalRepoName.getName() : null); } @@ -251,7 +250,7 @@ public void storeRepoMappingRecorderInThread(StarlarkThread thread) { repoMappingRecorder.storeInThread(thread); } - protected void recordInput(RepoRecordedInput input, @Nullable String value) { + protected void recordInputWithValue(RepoRecordedInput input, @Nullable String value) { if (recordedInputs.containsKey(input) && !Objects.equals(recordedInputs.get(input), value)) { throw new IllegalStateException( "Conflicting values recorded for input %s: '%s' vs. '%s'" @@ -260,6 +259,23 @@ protected void recordInput(RepoRecordedInput input, @Nullable String value) { recordedInputs.put(input, value); } + @CanIgnoreReturnValue + @Nullable + protected String getValueAndRecordInput(RepoRecordedInput input) + throws InterruptedException, NeedsSkyframeRestartException, IOException { + var maybeValue = input.getValue(env, directories); + if (env.valuesMissing()) { + throw new NeedsSkyframeRestartException(); + } + return switch (maybeValue) { + case MaybeValue.Invalid(String reason) -> throw new IOException(reason); + case MaybeValue.Valid(String value) -> { + recordInputWithValue(input, value); + yield value; + } + }; + } + private boolean cancelPendingAsyncTasks() { boolean hadPendingItems = false; for (AsyncTask task : asyncTasks) { @@ -1489,18 +1505,12 @@ private static T nullIfNone(Object object, Class type) { @Nullable public String getEnvironmentValue(String name, Object defaultValue) throws InterruptedException, NeedsSkyframeRestartException { - // Must look up via Skyframe, rather than solely copy from `this.repoEnvVariables`, in order to - // establish a SkyKey dependency relationship. - var nameAndValue = RepositoryFunction.getEnvVarValues(env, ImmutableSet.of(name)); - if (nameAndValue == null) { - return null; + try { + String value = getValueAndRecordInput(new RepoRecordedInput.EnvVar(name)); + return value != null ? value : nullIfNone(defaultValue, String.class); + } catch (IOException e) { + throw new IllegalStateException("getting EnvVar never throws IOException", e); } - // However, to account for --repo_env we take the value from `this.repoEnvVariables`. - // See https://github.com/bazelbuild/bazel/pull/20787#discussion_r1445571248 . - var entry = Iterables.getOnlyElement(RepoRecordedInput.EnvVar.wrap(nameAndValue).entrySet()); - String envVarValue = repoEnvVariables.get(name); - recordInput(entry.getKey(), envVarValue); - return envVarValue != null ? envVarValue : nullIfNone(defaultValue, String.class); } @StarlarkMethod( @@ -1661,17 +1671,8 @@ protected void maybeWatch(StarlarkPath starlarkPath, ShouldWatch shouldWatch) if (repoCacheFriendlyPath == null) { return; } - var recordedInput = new RepoRecordedInput.File(repoCacheFriendlyPath); - var skyKey = recordedInput.getSkyKey(directories); try { - FileValue fileValue = (FileValue) env.getValueOrThrow(skyKey, IOException.class); - if (fileValue == null) { - throw new NeedsSkyframeRestartException(); - } - - recordInput( - recordedInput, - RepoRecordedInput.File.fileValueToMarkerValue((RootedPath) skyKey.argument(), fileValue)); + getValueAndRecordInput(new RepoRecordedInput.File(repoCacheFriendlyPath)); } catch (IOException e) { throw new RepositoryFunctionException(e, Transience.TRANSIENT); } @@ -1683,12 +1684,8 @@ protected void maybeWatchDirents(Path path, ShouldWatch shouldWatch) if (repoCacheFriendlyPath == null) { return; } - var recordedInput = new RepoRecordedInput.Dirents(repoCacheFriendlyPath); - if (env.getValue(recordedInput.getSkyKey(directories)) == null) { - throw new NeedsSkyframeRestartException(); - } try { - recordInput(recordedInput, RepoRecordedInput.Dirents.getDirentsMarkerValue(path)); + getValueAndRecordInput(new RepoRecordedInput.Dirents(repoCacheFriendlyPath)); } catch (IOException e) { throw new RepositoryFunctionException(e, Transience.TRANSIENT); } diff --git a/src/main/java/com/google/devtools/build/lib/bazel/repository/starlark/StarlarkRepositoryContext.java b/src/main/java/com/google/devtools/build/lib/bazel/repository/starlark/StarlarkRepositoryContext.java index 751d8305e94a58..6cb1a331464eaf 100644 --- a/src/main/java/com/google/devtools/build/lib/bazel/repository/starlark/StarlarkRepositoryContext.java +++ b/src/main/java/com/google/devtools/build/lib/bazel/repository/starlark/StarlarkRepositoryContext.java @@ -42,7 +42,6 @@ import com.google.devtools.build.lib.rules.repository.WorkspaceAttributeMapper; import com.google.devtools.build.lib.runtime.ProcessWrapper; import com.google.devtools.build.lib.runtime.RepositoryRemoteExecutor; -import com.google.devtools.build.lib.skyframe.DirectoryTreeDigestValue; import com.google.devtools.build.lib.util.StringUtilities; import com.google.devtools.build.lib.vfs.FileSystemUtils; import com.google.devtools.build.lib.vfs.Path; @@ -587,15 +586,7 @@ public void watchTree(Object path) return; } try { - var recordedInput = new RepoRecordedInput.DirTree(repoCacheFriendlyPath); - DirectoryTreeDigestValue digestValue = - (DirectoryTreeDigestValue) - env.getValueOrThrow(recordedInput.getSkyKey(directories), IOException.class); - if (digestValue == null) { - throw new NeedsSkyframeRestartException(); - } - - recordInput(recordedInput, digestValue.hexDigest()); + getValueAndRecordInput(new RepoRecordedInput.DirTree(repoCacheFriendlyPath)); } catch (IOException e) { throw new RepositoryFunctionException(e, Transience.TRANSIENT); } diff --git a/src/main/java/com/google/devtools/build/lib/remote/AbstractActionInputPrefetcher.java b/src/main/java/com/google/devtools/build/lib/remote/AbstractActionInputPrefetcher.java index 45c114ea2cdadc..638f9d7a561efc 100644 --- a/src/main/java/com/google/devtools/build/lib/remote/AbstractActionInputPrefetcher.java +++ b/src/main/java/com/google/devtools/build/lib/remote/AbstractActionInputPrefetcher.java @@ -329,7 +329,7 @@ protected void prefetchVirtualActionInput(VirtualActionInput input) throws IOExc */ @Override public ListenableFuture prefetchFiles( - ActionExecutionMetadata action, + @Nullable ActionExecutionMetadata action, Iterable inputs, MetadataSupplier metadataSupplier, Priority priority, diff --git a/src/main/java/com/google/devtools/build/lib/remote/BUILD b/src/main/java/com/google/devtools/build/lib/remote/BUILD index c00dcb0d86766f..e82ce27e1139f1 100644 --- a/src/main/java/com/google/devtools/build/lib/remote/BUILD +++ b/src/main/java/com/google/devtools/build/lib/remote/BUILD @@ -119,6 +119,7 @@ java_library( "//src/main/java/com/google/devtools/build/lib/remote/util", "//src/main/java/com/google/devtools/build/lib/remote/util:digest_utils", "//src/main/java/com/google/devtools/build/lib/remote/zstd", + "//src/main/java/com/google/devtools/build/lib/rules:repository/repo_recorded_input", "//src/main/java/com/google/devtools/build/lib/skyframe:action_execution_value", "//src/main/java/com/google/devtools/build/lib/skyframe:sky_functions", "//src/main/java/com/google/devtools/build/lib/skyframe:tree_artifact_value", diff --git a/src/main/java/com/google/devtools/build/lib/remote/CombinedCache.java b/src/main/java/com/google/devtools/build/lib/remote/CombinedCache.java index d8fcea582c368a..a40e3e98950edc 100644 --- a/src/main/java/com/google/devtools/build/lib/remote/CombinedCache.java +++ b/src/main/java/com/google/devtools/build/lib/remote/CombinedCache.java @@ -215,6 +215,15 @@ public CachedActionResult downloadActionResult( boolean inlineOutErr, Set inlineOutputFiles) throws IOException, InterruptedException { + return getFromFuture( + downloadActionResultAsync(context, actionKey, inlineOutErr, inlineOutputFiles)); + } + + public ListenableFuture downloadActionResultAsync( + RemoteActionExecutionContext context, + ActionKey actionKey, + boolean inlineOutErr, + Set inlineOutputFiles) { var spawnExecutionContext = context.getSpawnExecutionContext(); ListenableFuture future = immediateFuture(null); @@ -258,7 +267,7 @@ public CachedActionResult downloadActionResult( directExecutor()); } - return getFromFuture(future); + return future; } private ListenableFuture downloadActionResultFromRemote( diff --git a/src/main/java/com/google/devtools/build/lib/remote/RemoteExternalOverlayFileSystem.java b/src/main/java/com/google/devtools/build/lib/remote/RemoteExternalOverlayFileSystem.java index d89181fa2c999e..e0451991392d3c 100644 --- a/src/main/java/com/google/devtools/build/lib/remote/RemoteExternalOverlayFileSystem.java +++ b/src/main/java/com/google/devtools/build/lib/remote/RemoteExternalOverlayFileSystem.java @@ -186,11 +186,13 @@ public void afterCommand() { */ public boolean injectRemoteRepo(RepositoryName repo, Tree remoteContents, String markerFile) throws IOException, InterruptedException { + var repoDir = externalDirectory.getChild(repo.getName()); + deleteTree(repoDir); + var unused = delete(externalDirectory.getChild(repo.getMarkerFileName())); var childMap = remoteContents.getChildrenList().stream() .collect( toImmutableMap(cache.digestUtil::compute, directory -> directory, (a, b) -> a)); - var repoDir = externalDirectory.getChild(repo.getName()); var filesToPrefetch = new ArrayList(); injectRecursively( externalFs, repoDir, remoteContents.getRoot(), childMap, filesToPrefetch::add); @@ -208,7 +210,7 @@ public boolean injectRemoteRepo(RepositoryName repo, Tree remoteContents, String } // Create the repo directory on disk so that readdir reflects the overlaid state of the external // directory. - nativeFs.createDirectoryAndParents(externalDirectory.getChild(repo.getName())); + nativeFs.createDirectoryAndParents(repoDir); // Keep the marker file contents in memory so that it can be written out when the repo is // materialized. This doubles as a presence marker for the in-memory repo contents. markerFileContents.put(repo.getName(), markerFile); diff --git a/src/main/java/com/google/devtools/build/lib/remote/RemoteModule.java b/src/main/java/com/google/devtools/build/lib/remote/RemoteModule.java index 7d4dc72c888cf1..d78c4cea26047a 100644 --- a/src/main/java/com/google/devtools/build/lib/remote/RemoteModule.java +++ b/src/main/java/com/google/devtools/build/lib/remote/RemoteModule.java @@ -762,6 +762,7 @@ public void beforeCommand(CommandEnvironment env) throws AbruptExitException { repositoryRemoteHelpersFactoryDelegate.init( new RepositoryRemoteHelpersFactoryImpl( + env.getDirectories(), actionContextProvider.getCombinedCache(), actionContextProvider.getRemoteExecutionClient(), buildRequestId, diff --git a/src/main/java/com/google/devtools/build/lib/remote/RemoteOutputChecker.java b/src/main/java/com/google/devtools/build/lib/remote/RemoteOutputChecker.java index b295e050a86253..eebf092af061c9 100644 --- a/src/main/java/com/google/devtools/build/lib/remote/RemoteOutputChecker.java +++ b/src/main/java/com/google/devtools/build/lib/remote/RemoteOutputChecker.java @@ -190,24 +190,21 @@ private void addRunfiles(ProviderCollection buildTarget) { } var runfiles = runfilesSupport.getRunfiles(); for (Artifact runfile : runfiles.getArtifacts().toList()) { - if (runfile.isSourceArtifact()) { - continue; + if (mayBeRemote(runfile)) { + addOutputToDownload(runfile); } - addOutputToDownload(runfile); } for (var symlink : runfiles.getSymlinks().toList()) { var artifact = symlink.getArtifact(); - if (artifact.isSourceArtifact()) { - continue; + if (mayBeRemote(artifact)) { + addOutputToDownload(artifact); } - addOutputToDownload(artifact); } for (var symlink : runfiles.getRootSymlinks().toList()) { var artifact = symlink.getArtifact(); - if (artifact.isSourceArtifact()) { - continue; + if (mayBeRemote(artifact)) { + addOutputToDownload(artifact); } - addOutputToDownload(artifact); } } @@ -292,6 +289,19 @@ private boolean matchesPattern(PathFragment execPath) { return false; } + /** + * Returns whether this {@link ActionInput} could conceivably be only available remotely. + * + *

Use this as a quick check to avoid unnecessary extra work for artifacts that are definitely + * local. + */ + public static boolean mayBeRemote(ActionInput actionInput) { + return !(actionInput instanceof Artifact artifact + && artifact.isSourceArtifact() + // Source artifacts in the main repo don't need to be fetched. + && (artifact.getOwner() == null || artifact.getOwner().getRepository().isMain())); + } + /** Returns whether this {@link ActionInput} should be downloaded. */ @Override public boolean shouldDownloadOutput(ActionInput output, RemoteFileArtifactValue metadata) { diff --git a/src/main/java/com/google/devtools/build/lib/remote/RemoteRepoContentsCacheImpl.java b/src/main/java/com/google/devtools/build/lib/remote/RemoteRepoContentsCacheImpl.java index c8a2c20a5f9fc5..d37a2195bb072c 100644 --- a/src/main/java/com/google/devtools/build/lib/remote/RemoteRepoContentsCacheImpl.java +++ b/src/main/java/com/google/devtools/build/lib/remote/RemoteRepoContentsCacheImpl.java @@ -15,26 +15,40 @@ package com.google.devtools.build.lib.remote; import static com.google.common.collect.ImmutableList.toImmutableList; +import static com.google.common.collect.ImmutableSet.toImmutableSet; import static com.google.common.util.concurrent.Futures.immediateFuture; +import static com.google.common.util.concurrent.Futures.transformAsync; +import static com.google.common.util.concurrent.Futures.whenAllSucceed; import static com.google.common.util.concurrent.MoreExecutors.directExecutor; import static com.google.devtools.build.lib.remote.util.Utils.waitForBulkTransfer; +import static com.google.devtools.build.lib.unsafe.StringUnsafe.getInternalStringBytes; +import static java.nio.charset.StandardCharsets.ISO_8859_1; +import static java.util.stream.Collectors.joining; import build.bazel.remote.execution.v2.Action; +import build.bazel.remote.execution.v2.ActionResult; import build.bazel.remote.execution.v2.Command; +import build.bazel.remote.execution.v2.Digest; import build.bazel.remote.execution.v2.Directory; +import build.bazel.remote.execution.v2.OutputDirectory; +import build.bazel.remote.execution.v2.OutputFile; +import build.bazel.remote.execution.v2.Platform; import build.bazel.remote.execution.v2.Tree; +import com.google.common.base.Preconditions; import com.google.common.base.Splitter; import com.google.common.base.Throwables; +import com.google.common.collect.Collections2; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableSet; import com.google.common.util.concurrent.Futures; import com.google.common.util.concurrent.ListenableFuture; import com.google.devtools.build.lib.actions.ActionInput; import com.google.devtools.build.lib.actions.ExecException; +import com.google.devtools.build.lib.actions.Spawn; +import com.google.devtools.build.lib.analysis.BlazeDirectories; import com.google.devtools.build.lib.cmdline.RepositoryName; import com.google.devtools.build.lib.events.Event; import com.google.devtools.build.lib.events.ExtendedEventHandler; -import com.google.devtools.build.lib.actions.Spawn; import com.google.devtools.build.lib.exec.SpawnInputExpander; import com.google.devtools.build.lib.exec.SpawnRunner; import com.google.devtools.build.lib.remote.common.RemoteActionExecutionContext; @@ -43,18 +57,24 @@ import com.google.devtools.build.lib.remote.common.RemotePathResolver; import com.google.devtools.build.lib.remote.util.DigestUtil; import com.google.devtools.build.lib.remote.util.TracingMetadataUtils; +import com.google.devtools.build.lib.rules.repository.RepoRecordedInput; import com.google.devtools.build.lib.runtime.RemoteRepoContentsCache; import com.google.devtools.build.lib.unsafe.StringUnsafe; +import com.google.devtools.build.lib.util.Fingerprint; import com.google.devtools.build.lib.vfs.FileSystemUtils; import com.google.devtools.build.lib.vfs.Path; import com.google.devtools.build.lib.vfs.PathFragment; +import com.google.devtools.build.skyframe.SkyFunction; import com.google.protobuf.ByteString; import java.io.IOException; -import java.nio.charset.StandardCharsets; import java.time.Instant; +import java.util.ArrayList; +import java.util.Collection; +import java.util.List; +import java.util.Optional; import java.util.SortedMap; import java.util.UUID; -import java.util.concurrent.ExecutionException; +import javax.annotation.Nullable; /** * A cache for the contents of external repositories that is backed by an ordinary remote cache. @@ -65,19 +85,32 @@ * action executed locally. This can save both time taken to execute repo rules and compute file * digests and disk space required to store the contents of external repositories. * - *

Repositories are cached as AC entries for a synthetic command with the predeclared input hash - * as the salt. The contents are represented as an output file for the marker file and an output - * directory for the contents. + *

Repositories are cached as AC entries for a synthetic command with a special hash as the salt. + * The contents are represented as an output file for the marker file and an output directory for + * the contents. + * + *

If a repo rule does not record any {@link RepoRecordedInput}s during its execution, this hash + * is just the predeclared inputs hash. Otherwise, the AC entry for the predeclared inputs hash will + * be an intermediate entry that lists one or more sets of {@link RepoRecordedInput}s that a + * previously cached repo consumed during the evaluation of its rule. The cache requests the current + * values of these inputs and computes the next hash to look up by a rolling construction that + * combines the previous hash with the string representations of the {@link + * RepoRecordedInput.WithValue}. This process is repeated until a final entry with the repo contents + * is found or no matching entry exists. * - *

At this point the cache only supports repository rules with no dependencies expressed at - * runtime. Verifying whether such dependencies are up to date can't be done via a single hash as - * the set of dependencies is not known ahead of time. Support for such rules would require a - * two-stage cache lookup in which the first lookup may produce multiple marker files. + *

By representing repos with recorded inputs as DAGs of AC entries, lookups are efficient (they + * don't scale with the number of cached repos per predeclared inputs hash) and regular LRU eviction + * policies remain effective for the most part. If a repo rule often requests different inputs even + * with the same predeclared inputs hash and previously requested inputs and values, it could result + * in large action results that grow over time. This is considered an acceptable trade-off for + * simplicity for now and could be mitigated in the future by an explicit GC mechanism such as + * "least recently added" eviction when the size of action result exceeds a certain threshold. */ public final class RemoteRepoContentsCacheImpl implements RemoteRepoContentsCache { private static final UUID GUID = UUID.fromString("f4a165a9-5557-45a7-bf25-230b6d42393a"); private static final String MARKER_FILE_PATH = ".recorded_inputs"; private static final String REPO_DIRECTORY_PATH = "repo_contents"; + private static final Splitter SPLIT_ON_SPACE = Splitter.on(' '); private static final Command COMMAND = Command.newBuilder() @@ -88,9 +121,12 @@ public final class RemoteRepoContentsCacheImpl implements RemoteRepoContentsCach .addOutputPaths(REPO_DIRECTORY_PATH) .addOutputFiles(MARKER_FILE_PATH) .addOutputDirectories(REPO_DIRECTORY_PATH) + .setPlatform(Platform.getDefaultInstance()) .build(); + private static final ByteString COMMAND_BYTES = COMMAND.toByteString(); private static final Directory INPUT_ROOT = Directory.getDefaultInstance(); + private final BlazeDirectories directories; private final CombinedCache cache; private final String buildRequestId; private final String commandId; @@ -99,17 +135,20 @@ public final class RemoteRepoContentsCacheImpl implements RemoteRepoContentsCach private final boolean verboseFailures; private final DigestUtil digestUtil; private final Action baseAction; + private final Digest commandDigest; public RemoteRepoContentsCacheImpl( + BlazeDirectories directories, CombinedCache cache, String buildRequestId, String commandId, boolean acceptCached, boolean uploadLocalResults, boolean verboseFailures) { + this.directories = directories; + this.cache = cache; this.buildRequestId = buildRequestId; this.commandId = commandId; - this.cache = cache; this.acceptCached = acceptCached; this.uploadLocalResults = uploadLocalResults; this.verboseFailures = verboseFailures; @@ -118,7 +157,9 @@ public RemoteRepoContentsCacheImpl( Action.newBuilder() .setCommandDigest(digestUtil.compute(COMMAND)) .setInputRootDigest(digestUtil.compute(INPUT_ROOT)) + .setPlatform(Platform.getDefaultInstance()) .build(); + this.commandDigest = digestUtil.compute(COMMAND); } @Override @@ -136,32 +177,33 @@ public void addToCache( if (!(fetchedRepoDir.getFileSystem() instanceof RemoteExternalOverlayFileSystem)) { return; } - var context = buildContext(repoName); + var context = buildContext(repoName, CacheOp.UPLOAD); if (!context.getWriteCachePolicy().allowRemoteCache()) { return; } + List recordedInputValues; try { - if (FileSystemUtils.readLinesAsLatin1(fetchedRepoMarkerFile).stream() - .filter(line -> !line.isEmpty()) - .count() - != 1) { - // This cache currently only supports marker files that contain nothing but the predeclared - // inputs hash. Repo rules with dependencies expressed only at runtime would require a - // two-stage cache lookup. Among the rules that are supported are http_archive and - // git_repository without patches. + var maybeRecordedInputValues = + readMarkerFile( + FileSystemUtils.readContent(fetchedRepoMarkerFile, ISO_8859_1), predeclaredInputHash); + if (maybeRecordedInputValues.isEmpty()) { return; } + recordedInputValues = maybeRecordedInputValues.get(); } catch (IOException e) { reporter.handle( Event.warn( - "Failed to read marker file repo %s, skipping: %s" + "Failed to read marker file for repo %s, skipping: %s" .formatted(repoName, maybeGetStackTrace(e)))); + return; } - var action = buildAction(predeclaredInputHash); - var actionKey = new ActionKey(digestUtil.compute(action)); - var remotePathResolver = new RepoRemotePathResolver(fetchedRepoMarkerFile, fetchedRepoDir); try { // TODO: Consider uploading asynchronously. + var finalHash = + uploadIntermediateActionResults(context, predeclaredInputHash, recordedInputValues); + var action = buildAction(finalHash); + var actionKey = new ActionKey(digestUtil.compute(action)); + var remotePathResolver = new RepoRemotePathResolver(fetchedRepoMarkerFile, fetchedRepoDir); var unused = UploadManifest.create( /* remoteOptions= */ null, @@ -191,10 +233,10 @@ public boolean lookupCache( RepositoryName repoName, Path repoDir, String predeclaredInputHash, - ExtendedEventHandler reporter) + SkyFunction.Environment env) throws IOException, InterruptedException { try { - return doLookupCache(repoName, repoDir, predeclaredInputHash, reporter); + return doLookupCache(repoName, repoDir, predeclaredInputHash, env); } catch (IOException e) { throw new IOException( "Failed to look up repo %s in the remote repo contents cache: %s" @@ -207,43 +249,23 @@ private boolean doLookupCache( RepositoryName repoName, Path repoDir, String predeclaredInputHash, - ExtendedEventHandler reporter) + SkyFunction.Environment env) throws IOException, InterruptedException { if (!(repoDir.getFileSystem() instanceof RemoteExternalOverlayFileSystem remoteFs)) { return false; } - var context = buildContext(repoName); + var context = buildContext(repoName, CacheOp.DOWNLOAD); if (!context.getReadCachePolicy().allowRemoteCache()) { return false; } - var actionKey = new ActionKey(digestUtil.compute(buildAction(predeclaredInputHash))); - // The marker file is read right after and thus requested to be inlined. - var cachedActionResult = - cache.downloadActionResult( - context, actionKey, /* inlineOutErr= */ false, ImmutableSet.of(MARKER_FILE_PATH)); - if (cachedActionResult == null) { - return false; - } - - var actionResult = cachedActionResult.actionResult(); - if (actionResult.getExitCode() != 0 - || actionResult.getOutputFilesCount() != 1 - || actionResult.getOutputDirectoriesCount() != 1) { - reporter.handle( - Event.warn( - String.format( - "Unexpected action result for cached repo %s: exit code %d, %d output files, %d" - + " output directories", - repoName, - actionResult.getExitCode(), - actionResult.getOutputFilesCount(), - actionResult.getOutputDirectoriesCount()))); + var finalEntry = fetchFinalCacheEntry(env, context, predeclaredInputHash); + if (env.valuesMissing() || finalEntry == null) { return false; } ListenableFuture markerFileContentFuture; - var markerFile = actionResult.getOutputFiles(0); + var markerFile = finalEntry.markerFile(); // Inlining is an optional feature, so we have to be prepared to download the marker file. if (markerFile.getContents().isEmpty()) { markerFileContentFuture = @@ -252,69 +274,351 @@ private boolean doLookupCache( } else { markerFileContentFuture = immediateFuture(markerFile.getContents().toByteArray()); } - var repoDirectory = actionResult.getOutputDirectories(0); + var repoDirectory = finalEntry.repoDirectory(); var repoDirectoryContentFuture = - Futures.transformAsync( + transformAsync( cache.downloadBlob( context, REPO_DIRECTORY_PATH, /* execPath= */ null, repoDirectory.getTreeDigest()), (treeBytes) -> immediateFuture(Tree.parseFrom(treeBytes)), directExecutor()); waitForBulkTransfer(ImmutableList.of(markerFileContentFuture, repoDirectoryContentFuture)); - String markerFileContent; - Tree repoDirectoryContent; - try { - markerFileContent = new String(markerFileContentFuture.get(), StandardCharsets.ISO_8859_1); - repoDirectoryContent = repoDirectoryContentFuture.get(); - } catch (ExecutionException e) { - throw new IllegalStateException( - "waitForBulkTransfer should have thrown: " + maybeGetStackTrace(e)); - } - var markerFileLines = - Splitter.on('\n') - .splitToStream(markerFileContent) - .filter(line -> !line.isEmpty()) - .collect(toImmutableList()); - if (markerFileLines.size() > 1) { - reporter.handle( - Event.warn( - "Marker file for repo %s has extra lines, skipping:\n%s" - .formatted( - repoName, - String.join("\n", markerFileLines.subList(1, markerFileLines.size()))))); + + String markerFileContent = new String(markerFileContentFuture.resultNow(), ISO_8859_1); + var maybeRecordedInputs = readMarkerFile(markerFileContent, predeclaredInputHash); + if (maybeRecordedInputs.isEmpty()) { return false; } - if (!markerFileLines.getFirst().equals(predeclaredInputHash)) { - reporter.handle( - Event.warn( - "Predeclared input hash mismatch for repo %s: expected %s, got %s" - .formatted(repoName, predeclaredInputHash, markerFileLines.getFirst()))); + var outdatedReason = + RepoRecordedInput.isAnyValueOutdated(env, directories, maybeRecordedInputs.get()); + if (env.valuesMissing() || outdatedReason.isPresent()) { + env.getListener() + .handle( + Event.warn( + "Unexpectedly outdated cached repo %s: %s" + .formatted(repoName, outdatedReason.orElse("unknown reason")))); return false; } - return remoteFs.injectRemoteRepo(repoName, repoDirectoryContent, markerFileContent); + return remoteFs.injectRemoteRepo( + repoName, repoDirectoryContentFuture.resultNow(), markerFileContent); + } + + private enum CacheOp { + DOWNLOAD, + UPLOAD, } - private RemoteActionExecutionContext buildContext(RepositoryName repoName) { + private RemoteActionExecutionContext buildContext(RepositoryName repoName, CacheOp cacheOp) { var metadata = TracingMetadataUtils.buildMetadata( buildRequestId, commandId, repoName.getName(), /* actionMetadata= */ null); - // Don't use the disk cache as `--repo_contents_cache` is a strictly better alternative for - // local caching. + // Don't upload local repo contents to the disk cache as the (local) `--repo_contents_cache` is + // a better alternative for local caching. Do write through the disk cache for downloads from + // the remote cache to speed up future usage. return RemoteActionExecutionContext.create(metadata) - .withReadCachePolicy(acceptCached ? CachePolicy.REMOTE_CACHE_ONLY : CachePolicy.NO_CACHE) + .withReadCachePolicy(acceptCached ? CachePolicy.ANY_CACHE : CachePolicy.NO_CACHE) .withWriteCachePolicy( - uploadLocalResults ? CachePolicy.REMOTE_CACHE_ONLY : CachePolicy.NO_CACHE); + switch (cacheOp) { + case DOWNLOAD -> CachePolicy.ANY_CACHE; + case UPLOAD -> + uploadLocalResults ? CachePolicy.REMOTE_CACHE_ONLY : CachePolicy.NO_CACHE; + }); } - private Action buildAction(String predeclaredInputHash) { - // The predeclared input hash uniquely identifies the repo rule and all its attributes, but not - // dependencies established at runtime. We choose to embed it into the salt simply because that - // results in a constant Command message. + private Action buildAction(String inputHash) { + // We choose to embed the hash into the salt simply because that results in a constant Command + // message. return baseAction.toBuilder() - .setSalt(ByteString.copyFrom(StringUnsafe.getByteArray(predeclaredInputHash))) + .setSalt(ByteString.copyFrom(StringUnsafe.getByteArray(inputHash))) .build(); } + /** + * Uploads the intermediate action results representing the inputs recorded at runtime and returns + * the input hash to use for the final action result. + */ + private String uploadIntermediateActionResults( + RemoteActionExecutionContext context, + String predeclaredInputHash, + List recordedInputValues) + throws IOException, InterruptedException { + // The command is shared by all action results and small enough that FindMissingBlobs is not + // worthwhile. The REAPI spec requires the command to be uploaded before an action result that + // references it. + waitForBulkTransfer(ImmutableSet.of(cache.uploadBlob(context, commandDigest, COMMAND_BYTES))); + + String rollingHash = predeclaredInputHash; + var batches = RepoRecordedInput.WithValue.splitIntoBatches(recordedInputValues); + var futures = new ArrayList>(batches.size()); + for (var batch : batches) { + futures.add( + addToActionResult( + context, + buildAction(rollingHash), + Collections2.transform(batch, RepoRecordedInput.WithValue::input))); + for (var recordedInputValue : batch) { + rollingHash = rollForwardHash(rollingHash, recordedInputValue); + } + } + waitForBulkTransfer(futures); + return rollingHash; + } + + /** + * Adds the given set of recorded inputs as one of the alternative paths to the action result for + * the given action, if not already present. + * + *

Most repo rule evaluations with a fixed previous batch of hashes (in particular, the same + * predeclared inputs hash) will request a fixed set of inputs in the next batch. Thus, most + * intermediate action results will only contain a single set of recorded inputs. + */ + private ListenableFuture addToActionResult( + RemoteActionExecutionContext context, + Action action, + Collection newInputs) { + var actionKey = digestUtil.computeActionKey(action); + var currentInputsFuture = + Futures.transformAsync( + cache.downloadActionResultAsync( + context, actionKey, /* inlineOutErr= */ true, ImmutableSet.of()), + (currentResult) -> { + if (currentResult == null + || currentResult.actionResult().getStdoutDigest().getSizeBytes() == 0) { + return immediateFuture(""); + } + return fetchStdout(context, currentResult.actionResult()); + }, + directExecutor()); + return Futures.transformAsync( + currentInputsFuture, + currentInputsString -> { + // RepoRecordedInput.toString() is guaranteed to return a string that doesn't contain + // spaces or newlines. We can thus safely use spaces to separate inputs within a batch + // and newlines to separate different batches. + var newInputString = + newInputs.stream().map(RepoRecordedInput::toString).collect(joining(" ")); + if (currentInputsString.lines().anyMatch(newInputString::equals)) { + // The current batch of inputs is already present, no need to update the action result. + return immediateFuture(null); + } + // Add the new inputs to the top so that the most recently added inputs stay at the top. + // This could be used to implement a simple "least recently added" eviction strategy in + // the future in case the size of action results becomes a concern. + // + // Note that this update is inherently racy: multiple clients may add inputs concurrently, + // resulting in some added inputs being lost since the REAPI does not provide a way to + // update action results atomically. However, since different batches of inputs are + // already rare and them being added concurrently even more so, the temporary loss of a + // cache entry is an acceptable trade-off for simplicity. + var newInputsString = newInputString + '\n' + currentInputsString; + var stdoutBytes = getInternalStringBytes(newInputsString); + var stdoutDigest = digestUtil.compute(stdoutBytes); + var actionResult = + ActionResult.newBuilder().setExitCode(0).setStdoutDigest(stdoutDigest).build(); + return whenAllSucceed( + cache.uploadBlob(context, actionKey.getDigest(), action.toByteString()), + cache.uploadBlob(context, stdoutDigest, ByteString.copyFrom(stdoutBytes))) + .callAsync( + () -> cache.uploadActionResult(context, actionKey, actionResult), + directExecutor()); + }, + directExecutor()); + } + + /** Represents a single AC entry in the internal format used by the remote repo contents cache. */ + private sealed interface CacheEntry { + /** + * A final cache entry containing the contents of a repository. + * + *

Represented as an ActionResult with one output directory and one output file. + * + * @param repoDirectory the contents of the repository directory + * @param markerFile the contents of the repository's marker file + */ + record Final(OutputDirectory repoDirectory, OutputFile markerFile) implements CacheEntry {} + + /** + * An intermediate cache entry that points to the keys of any number of further AC entries, + * which can themselves be intermediate or final entries. The remote repo contents cache will + * try them in order. + * + * @param nextInputHashes the keys under which the next AC entries should be looked up + */ + record Intermediate(ImmutableList nextInputHashes) implements CacheEntry {} + + /** + * The cache entry didn't match any of the formats expected by this version of the remote repo + * contents cache for the given human-readable reason. + */ + record Invalid(String reason) implements CacheEntry {} + } + + /** + * Fetches a final cache entry for the given predeclared input hash by recursively following + * intermediate entries if needed or returns null if no final entry could be found or a Skyframe + * restart is needed. + */ + @Nullable + private CacheEntry.Final fetchFinalCacheEntry( + SkyFunction.Environment env, + RemoteActionExecutionContext context, + String predeclaredInputHash) + throws IOException, InterruptedException { + var currentHashes = ImmutableList.of(predeclaredInputHash); + while (!currentHashes.isEmpty()) { + var nextHashes = ImmutableList.builder(); + for (var hash : currentHashes) { + switch (fetchCacheEntry(env, context, hash)) { + case CacheEntry.Final finalEntry -> { + return finalEntry; + } + case CacheEntry.Intermediate(ImmutableList nextInputHashes) -> + nextHashes.addAll(nextInputHashes); + case CacheEntry.Invalid(String reason) -> env.getListener().handle(Event.warn(reason)); + case null -> { + // Keep checking hashes to batch missing values in fewer restarts. + Preconditions.checkState(env.valuesMissing()); + } + } + } + if (env.valuesMissing()) { + return null; + } + currentHashes = nextHashes.build(); + } + return null; + } + + // Returns null if and only if values are missing. + @Nullable + private CacheEntry fetchCacheEntry( + SkyFunction.Environment env, RemoteActionExecutionContext context, String inputHash) + throws IOException, InterruptedException { + var actionKey = new ActionKey(digestUtil.compute(buildAction(inputHash))); + // The marker file is read right after and thus requested to be inlined. If the action result + // is an intermediate node, the full result will be contained in the stdout, which should thus + // also be inlined. + var cachedActionResult = + cache.downloadActionResult( + context, actionKey, /* inlineOutErr= */ true, ImmutableSet.of(MARKER_FILE_PATH)); + if (cachedActionResult == null) { + return new CacheEntry.Intermediate(ImmutableList.of()); + } + var actionResult = cachedActionResult.actionResult(); + + if (actionResult.getExitCode() != 0) { + return new CacheEntry.Invalid( + "Unexpected exit code in action result for remotely cached repo %s:\n%s" + .formatted(context.getRequestMetadata().getActionId(), actionResult)); + } + if (actionResult.getOutputFilesCount() == 1 + && actionResult.getOutputDirectoriesCount() == 1 + && actionResult.getOutputSymlinksCount() == 0) { + return new CacheEntry.Final( + actionResult.getOutputDirectories(0), actionResult.getOutputFiles(0)); + } + if (!(actionResult.getOutputFilesCount() == 0 + && actionResult.getOutputDirectoriesCount() == 0 + && actionResult.getOutputSymlinksCount() == 0 + && actionResult.getStdoutDigest().getSizeBytes() > 0)) { + return new CacheEntry.Invalid( + "Unexpected intermediate action result for remotely cached repo %s:\n%s" + .formatted(context.getRequestMetadata().getActionId(), actionResult)); + } + var stdoutFuture = fetchStdout(context, actionResult); + waitForBulkTransfer(ImmutableList.of(stdoutFuture)); + + // The action result's stdout contains multiple lines, each representing a batch of + // RepoRecordedInputs separated by spaces. A given batch is valid only if all inputs in the + // batch are, but separate batches are tried independently. + var nextInputBatches = + stdoutFuture + .resultNow() + .lines() + .map( + line -> + SPLIT_ON_SPACE + .splitToStream(line) + .map(RepoRecordedInput::parse) + .collect(toImmutableList())) + .collect(toImmutableList()); + var uniqueNextInputs = + nextInputBatches.stream().flatMap(List::stream).collect(toImmutableSet()); + RepoRecordedInput.prefetch(env, directories, uniqueNextInputs); + if (env.valuesMissing()) { + return null; + } + var nextHashes = ImmutableList.builder(); + nextBatch: + for (var batch : nextInputBatches) { + var rollingHash = inputHash; + for (var input : batch) { + var value = input.getValue(env, directories); + // Values have been prefetched above. + Preconditions.checkState(!env.valuesMissing()); + if (!(value instanceof RepoRecordedInput.MaybeValue.Valid(String valueString))) { + continue nextBatch; + } + rollingHash = + rollForwardHash(rollingHash, new RepoRecordedInput.WithValue(input, valueString)); + } + nextHashes.add(rollingHash); + } + return new CacheEntry.Intermediate(nextHashes.build()); + } + + private String rollForwardHash(String hash, RepoRecordedInput.WithValue inputWithValue) { + return new Fingerprint() + .addString(hash) + .addString(inputWithValue.toString()) + .hexDigestAndReset(); + } + + private ListenableFuture fetchStdout( + RemoteActionExecutionContext context, ActionResult actionResult) { + if (!actionResult.getStdoutRaw().isEmpty()) { + return immediateFuture( + StringUnsafe.newInstance(actionResult.getStdoutRaw().toByteArray(), StringUnsafe.LATIN1)); + } + return Futures.transform( + cache.downloadBlob(context, actionResult.getStdoutDigest()), + stdout -> StringUnsafe.newInstance(stdout, StringUnsafe.LATIN1), + directExecutor()); + } + + /** + * Parses a marker file written by {@code DigestWriter}: the first non-empty line is the + * predeclared input hash, followed by one serialized {@link RepoRecordedInput.WithValue} per line. + * Returns an empty {@link Optional} if the first line doesn't match the expected hash or any + * subsequent line fails to parse. + */ + private static Optional> readMarkerFile( + String content, String predeclaredInputHash) { + var recordedInputValues = ImmutableList.builder(); + boolean firstLineVerified = false; + for (String line : Splitter.on('\n').split(content)) { + if (line.isEmpty()) { + continue; + } + if (!firstLineVerified) { + if (!line.equals(predeclaredInputHash)) { + return Optional.empty(); + } + firstLineVerified = true; + } else { + Optional withValue = RepoRecordedInput.WithValue.parse(line); + if (withValue.isEmpty()) { + return Optional.empty(); + } + recordedInputValues.add(withValue.get()); + } + } + if (!firstLineVerified) { + return Optional.empty(); + } + return Optional.of(recordedInputValues.build()); + } + private String maybeGetStackTrace(Exception e) { return verboseFailures ? Throwables.getStackTraceAsString(e) : e.getMessage(); } diff --git a/src/main/java/com/google/devtools/build/lib/remote/RepositoryRemoteHelpersFactoryImpl.java b/src/main/java/com/google/devtools/build/lib/remote/RepositoryRemoteHelpersFactoryImpl.java index 2b4035128f7b98..eec030b4376da4 100644 --- a/src/main/java/com/google/devtools/build/lib/remote/RepositoryRemoteHelpersFactoryImpl.java +++ b/src/main/java/com/google/devtools/build/lib/remote/RepositoryRemoteHelpersFactoryImpl.java @@ -13,6 +13,7 @@ // limitations under the License. package com.google.devtools.build.lib.remote; +import com.google.devtools.build.lib.analysis.BlazeDirectories; import com.google.devtools.build.lib.remote.common.RemoteExecutionClient; import com.google.devtools.build.lib.runtime.RemoteRepoContentsCache; import com.google.devtools.build.lib.runtime.RepositoryRemoteExecutor; @@ -22,6 +23,7 @@ /** Factory for {@link RemoteRepositoryRemoteExecutor} and {@link RemoteRepoContentsCacheImpl}. */ class RepositoryRemoteHelpersFactoryImpl implements RepositoryRemoteHelpersFactory { + private final BlazeDirectories directories; private final CombinedCache cache; @Nullable private final RemoteExecutionClient remoteExecutor; private final String buildRequestId; @@ -33,6 +35,7 @@ class RepositoryRemoteHelpersFactoryImpl implements RepositoryRemoteHelpersFacto private final boolean verboseFailures; RepositoryRemoteHelpersFactoryImpl( + BlazeDirectories directories, CombinedCache cache, @Nullable RemoteExecutionClient remoteExecutor, String buildRequestId, @@ -41,6 +44,7 @@ class RepositoryRemoteHelpersFactoryImpl implements RepositoryRemoteHelpersFacto boolean acceptCached, boolean uploadLocalResults, boolean verboseFailures) { + this.directories = directories; this.cache = cache; this.remoteExecutor = remoteExecutor; this.buildRequestId = buildRequestId; @@ -71,6 +75,12 @@ public RepositoryRemoteExecutor createExecutor() { @Override public RemoteRepoContentsCache createRepoContentsCache() { return new RemoteRepoContentsCacheImpl( - cache, buildRequestId, commandId, acceptCached, uploadLocalResults, verboseFailures); + directories, + cache, + buildRequestId, + commandId, + acceptCached, + uploadLocalResults, + verboseFailures); } } diff --git a/src/main/java/com/google/devtools/build/lib/rules/BUILD b/src/main/java/com/google/devtools/build/lib/rules/BUILD index 453d06ec3ba748..65e8a78f9d27ff 100644 --- a/src/main/java/com/google/devtools/build/lib/rules/BUILD +++ b/src/main/java/com/google/devtools/build/lib/rules/BUILD @@ -340,8 +340,8 @@ java_library( "//src/main/java/com/google/devtools/build/lib/analysis:blaze_directories", "//src/main/java/com/google/devtools/build/lib/cmdline", "//src/main/java/com/google/devtools/build/lib/cmdline:LabelValidator", - "//src/main/java/com/google/devtools/build/lib/skyframe:action_environment_function", - "//src/main/java/com/google/devtools/build/lib/skyframe:client_environment_value", + "//src/main/java/com/google/devtools/build/lib/skyframe:environment_variable_value", + "//src/main/java/com/google/devtools/build/lib/skyframe:repo_environment_function", "//src/main/java/com/google/devtools/build/lib/skyframe:directory_listing_value", "//src/main/java/com/google/devtools/build/lib/skyframe:directory_tree_digest_value", "//src/main/java/com/google/devtools/build/lib/skyframe:precomputed_value", diff --git a/src/main/java/com/google/devtools/build/lib/rules/repository/RepoRecordedInput.java b/src/main/java/com/google/devtools/build/lib/rules/repository/RepoRecordedInput.java index 055b4e329ac32a..b3532d7ff919b9 100644 --- a/src/main/java/com/google/devtools/build/lib/rules/repository/RepoRecordedInput.java +++ b/src/main/java/com/google/devtools/build/lib/rules/repository/RepoRecordedInput.java @@ -23,6 +23,8 @@ import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Preconditions; import com.google.common.base.Splitter; +import com.google.common.collect.Collections2; +import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; import com.google.common.io.BaseEncoding; import com.google.devtools.build.lib.actions.FileValue; @@ -31,20 +33,22 @@ import com.google.devtools.build.lib.cmdline.LabelSyntaxException; import com.google.devtools.build.lib.cmdline.LabelValidator; import com.google.devtools.build.lib.cmdline.RepositoryName; -import com.google.devtools.build.lib.skyframe.ActionEnvironmentFunction; -import com.google.devtools.build.lib.skyframe.ClientEnvironmentValue; import com.google.devtools.build.lib.skyframe.DirectoryListingValue; +import com.google.devtools.build.lib.skyframe.EnvironmentVariableValue; +import com.google.devtools.build.lib.skyframe.RepoEnvironmentFunction; import com.google.devtools.build.lib.skyframe.DirectoryTreeDigestValue; import com.google.devtools.build.lib.skyframe.PrecomputedValue; import com.google.devtools.build.lib.skyframe.RepositoryMappingValue; import com.google.devtools.build.lib.util.Fingerprint; -import com.google.devtools.build.lib.vfs.Path; +import com.google.devtools.build.lib.vfs.Dirent; import com.google.devtools.build.lib.vfs.PathFragment; import com.google.devtools.build.lib.vfs.Root; import com.google.devtools.build.lib.vfs.RootedPath; import com.google.devtools.build.skyframe.SkyFunction.Environment; import com.google.devtools.build.skyframe.SkyKey; import java.io.IOException; +import java.util.ArrayList; +import java.util.Collection; import java.util.List; import java.util.Map; import java.util.Objects; @@ -121,60 +125,45 @@ public static Optional parse(String s) { return Optional.empty(); } + /** + * Splits the given list of recorded input values into batches such that within each batch, all + * recorded inputs's {@link SkyKey}s can be requested together. + */ + public static ImmutableList> splitIntoBatches( + List recordedInputValues) { + var batches = ImmutableList.>builder(); + var currentBatch = new ArrayList(); + for (var recordedInputValue : recordedInputValues) { + if (!recordedInputValue.input().canBeRequestedUnconditionally() + && !currentBatch.isEmpty()) { + batches.add(ImmutableList.copyOf(currentBatch)); + currentBatch.clear(); + } + currentBatch.add(recordedInputValue); + } + if (!currentBatch.isEmpty()) { + batches.add(ImmutableList.copyOf(currentBatch)); + } + return batches.build(); + } + /** Converts this {@link WithValue} to a string in a format compatible with {@link #parse}. */ @Override public String toString() { - return escape(input.toString()) + " " + escape(value); - } - - @VisibleForTesting - static String escape(String str) { - return str == null - ? "\\0" - : str.replace("\\", "\\\\").replace("\n", "\\n").replace(" ", "\\s"); - } - - @VisibleForTesting - @Nullable - static String unescape(String str) { - if (str.equals("\\0")) { - return null; // \0 == null string - } - StringBuilder result = new StringBuilder(); - boolean escaped = false; - for (int i = 0; i < str.length(); i++) { - char c = str.charAt(i); - if (escaped) { - if (c == 'n') { // n means new line - result.append("\n"); - } else if (c == 's') { // s means space - result.append(" "); - } else { // Any other escaped characters are just un-escaped - result.append(c); - } - escaped = false; - } else if (c == '\\') { - escaped = true; - } else { - result.append(c); - } - } - return result.toString(); + return input + " " + escape(value); } } /** - * Returns whether all values are still up-to-date for each recorded input. If Skyframe values are - * missing, the return value should be ignored; callers are responsible for checking {@code - * env.valuesMissing()} and triggering a Skyframe restart if needed. + * Returns whether all values are still up-to-date for each recorded input or a human-readable + * reason for why that's not the case. If Skyframe values are missing, the return value should be + * ignored; callers are responsible for checking {@code env.valuesMissing()} and triggering a + * Skyframe restart if needed. */ public static Optional isAnyValueOutdated( Environment env, BlazeDirectories directories, List recordedInputValues) throws InterruptedException { - env.getValuesAndExceptions( - recordedInputValues.stream() - .map(riv -> riv.input().getSkyKey(directories)) - .collect(toImmutableSet())); + prefetch(env, directories, Collections2.transform(recordedInputValues, WithValue::input)); if (env.valuesMissing()) { return UNDECIDED; } @@ -188,40 +177,146 @@ public static Optional isAnyValueOutdated( return Optional.empty(); } + /** + * Requests the information from Skyframe that is required by future calls to {@link + * #isAnyValueOutdated} for the given set of inputs. + */ + public static void prefetch( + Environment env, BlazeDirectories directories, Collection recordedInputs) + throws InterruptedException { + var keys = + recordedInputs.stream().map(rri -> rri.getSkyKey(directories)).collect(toImmutableSet()); + if (env.valuesMissing()) { + return; + } + var results = env.getValuesAndExceptions(keys); + // Per the contract of Environment.getValuesAndExceptions, we need to access the results to + // actually find all missing values. + for (SkyKey key : keys) { + var unused = results.get(key); + } + } + + /** + * Returns a human-readable reason for why the given {@code oldValue} is no longer up-to-date for + * this recorded input, or an empty Optional if it is still up-to-date. + * + *

The method can request Skyframe evaluations, and if any values are missing, this method can + * return any value (doesn't matter what, although {@link #UNDECIDED} is recommended for clarity) + * and will be reinvoked after a Skyframe restart. + */ + private Optional isOutdated( + Environment env, BlazeDirectories directories, @Nullable String oldValue) + throws InterruptedException { + MaybeValue wrappedNewValue = getValue(env, directories); + if (env.valuesMissing()) { + return UNDECIDED; + } + return switch (wrappedNewValue) { + case MaybeValue.Invalid(String reason) -> Optional.of(reason); + case MaybeValue.Valid(String newValue) -> + Objects.equals(oldValue, newValue) + ? Optional.empty() + : Optional.of(describeChange(oldValue, newValue)); + }; + } + @Override public abstract boolean equals(Object obj); @Override public abstract int hashCode(); + @VisibleForTesting + static String escape(String str) { + return str == null ? "\\0" : str.replace("\\", "\\\\").replace("\n", "\\n").replace(" ", "\\s"); + } + + @VisibleForTesting + @Nullable + static String unescape(String str) { + if (str.equals("\\0")) { + return null; // \0 == null string + } + StringBuilder result = new StringBuilder(); + boolean escaped = false; + for (int i = 0; i < str.length(); i++) { + char c = str.charAt(i); + if (escaped) { + if (c == 'n') { // n means new line + result.append("\n"); + } else if (c == 's') { // s means space + result.append(" "); + } else { // Any other escaped characters are just un-escaped + result.append(c); + } + escaped = false; + } else if (c == '\\') { + escaped = true; + } else { + result.append(c); + } + } + return result.toString(); + } + + /** + * Returns the string representation of this recorded input, in the format suitable for parsing + * back via {@link #parse}. + * + *

The returned string never contains spaces or newlines; those characters are escaped. + */ @Override public final String toString() { - return getParser().getPrefix() + ":" + toStringInternal(); + return getParser().getPrefix() + ":" + escape(toStringInternal()); } + /** + * Represents the possible values returned by {@link #getValue}: either a valid value (which may + * be null), or an invalid value with a reason (e.g. due to I/O failure). + */ + public sealed interface MaybeValue { + MaybeValue VALUES_MISSING = new MaybeValue.Invalid("values missing"); + + /** Represents a valid value, which may be null. */ + record Valid(@Nullable String value) implements MaybeValue {} + + /** Represents an invalid value with a reason (e.g. due to I/O failure). */ + record Invalid(String reason) implements MaybeValue {} + } + + /** + * Returns the current value of this input, which may be null, wrapped in a {@link + * MaybeValue.Valid}, or a {@link MaybeValue.Invalid} if the value is known to be invalid. + * + *

The method can request Skyframe evaluations, and if any values are missing, this method can + * return any value and will be reinvoked after a Skyframe restart. + */ + public abstract MaybeValue getValue(Environment env, BlazeDirectories directories) + throws InterruptedException; + + /** + * Returns a human-readable description of the change from {@code oldValue} to {@code newValue}. + */ + protected abstract String describeChange(String oldValue, String newValue); + /** * Returns the post-colon substring that identifies the specific input: for example, the {@code * MY_ENV_VAR} part of {@code ENV:MY_ENV_VAR}. */ - public abstract String toStringInternal(); + protected abstract String toStringInternal(); /** Returns the parser object for this type of recorded inputs. */ - public abstract Parser getParser(); + protected abstract Parser getParser(); /** Returns the {@link SkyKey} that is necessary to determine {@link #isOutdated}. */ - public abstract SkyKey getSkyKey(BlazeDirectories directories); + protected abstract SkyKey getSkyKey(BlazeDirectories directories); /** - * Returns a human-readable reason for why the given {@code oldValue} is no longer up-to-date for - * this recorded input, or an empty Optional if it is still up-to-date. This method can assume - * that {@link #getSkyKey(BlazeDirectories)} is already evaluated; it can request further Skyframe - * evaluations, and if any values are missing, this method can return any value (doesn't matter - * what, although {@link #UNDECIDED} is recommended for clarity) and will be reinvoked after a - * Skyframe restart. + * Returns true if the {@link #getValue} can be requested even if previous recorded inputs have + * not been verified to be up to date. */ - public abstract Optional isOutdated( - Environment env, BlazeDirectories directories, @Nullable String oldValue) - throws InterruptedException; + protected abstract boolean canBeRequestedUnconditionally(); private static final Optional UNDECIDED = Optional.of("values missing"); @@ -299,6 +394,11 @@ public final RootedPath getRootedPath(BlazeDirectories directories) { } return RootedPath.toRootedPath(root, path()); } + + /** Returns true if the path points into an external repository. */ + public boolean inExternalRepo() { + return repoName().isPresent() && !repoName().get().isMain(); + } } /** @@ -363,7 +463,8 @@ public String toStringInternal() { * for placing in a repository marker file. The file need not exist, and can be a file or a * directory. */ - public static String fileValueToMarkerValue(RootedPath rootedPath, FileValue fileValue) + @VisibleForTesting + static String fileValueToMarkerValue(RootedPath rootedPath, FileValue fileValue) throws IOException { if (fileValue.isDirectory()) { return "DIR"; @@ -386,23 +487,32 @@ public SkyKey getSkyKey(BlazeDirectories directories) { } @Override - public Optional isOutdated( - Environment env, BlazeDirectories directories, @Nullable String oldValue) + protected boolean canBeRequestedUnconditionally() { + // Requesting files in external repositories can result in cycles if the external repo now + // transitively depends on the requesting repo. + return !path.inExternalRepo(); + } + + @Override + public MaybeValue getValue(Environment env, BlazeDirectories directories) throws InterruptedException { var skyKey = getSkyKey(directories); try { - FileValue fileValue = (FileValue) env.getValueOrThrow(skyKey, IOException.class); + var fileValue = (FileValue) env.getValueOrThrow(skyKey, IOException.class); if (fileValue == null) { - return UNDECIDED; + return MaybeValue.VALUES_MISSING; } - if (!oldValue.equals(fileValueToMarkerValue((RootedPath) skyKey.argument(), fileValue))) { - return Optional.of("file info or contents of %s changed".formatted(path)); - } - return Optional.empty(); + return new MaybeValue.Valid( + fileValueToMarkerValue((RootedPath) skyKey.argument(), fileValue)); } catch (IOException e) { - return Optional.of("failed to stat %s: %s".formatted(path, e.getMessage())); + return new MaybeValue.Invalid("failed to stat %s: %s".formatted(path, e.getMessage())); } } + + @Override + public String describeChange(String oldValue, String newValue) { + return "file info or contents of %s changed".formatted(path); + } } /** Represents the list of entries under a directory accessed during the fetch. */ @@ -457,38 +567,42 @@ public Parser getParser() { return PARSER; } + private String directoryListingValueToMarkerValue(DirectoryListingValue directoryListingValue) { + var fp = new Fingerprint(); + fp.addStrings( + com.google.common.collect.Streams.stream(directoryListingValue.getDirents()) + .map(Dirent::getName) + .sorted() + .collect(toImmutableList())); + return fp.hexDigestAndReset(); + } + @Override public SkyKey getSkyKey(BlazeDirectories directories) { return DirectoryListingValue.key(path.getRootedPath(directories)); } @Override - public Optional isOutdated( - Environment env, BlazeDirectories directories, @Nullable String oldValue) + protected boolean canBeRequestedUnconditionally() { + // Requesting directories in external repositories can result in cycles if the external repo + // transitively depends on the requesting repo. + return !path.inExternalRepo(); + } + + @Override + public MaybeValue getValue(Environment env, BlazeDirectories directories) throws InterruptedException { - SkyKey skyKey = getSkyKey(directories); - if (env.getValue(skyKey) == null) { - return UNDECIDED; - } - try { - if (!oldValue.equals( - getDirentsMarkerValue(((DirectoryListingValue.Key) skyKey).argument().asPath()))) { - return Optional.of("directory entries of %s changed".formatted(path)); - } - return Optional.empty(); - } catch (IOException e) { - return Optional.of("failed to readdir %s: %s".formatted(path, e.getMessage())); + var skyKey = getSkyKey(directories); + var directoryListingValue = (DirectoryListingValue) env.getValue(skyKey); + if (directoryListingValue == null) { + return MaybeValue.VALUES_MISSING; } + return new MaybeValue.Valid(directoryListingValueToMarkerValue(directoryListingValue)); } - public static String getDirentsMarkerValue(Path path) throws IOException { - Fingerprint fp = new Fingerprint(); - fp.addStrings( - path.getDirectoryEntries().stream() - .map(Path::getBaseName) - .sorted() - .collect(toImmutableList())); - return fp.hexDigestAndReset(); + @Override + public String describeChange(String oldValue, String newValue) { + return "directory entries of %s changed".formatted(path); } } @@ -554,18 +668,32 @@ public SkyKey getSkyKey(BlazeDirectories directories) { } @Override - public Optional isOutdated( - Environment env, BlazeDirectories directories, @Nullable String oldValue) + protected boolean canBeRequestedUnconditionally() { + // Requesting directory trees in external repositories can result in cycles if the external + // repo now transitively depends on the requesting repo. + return !path.inExternalRepo(); + } + + @Override + public MaybeValue getValue(Environment env, BlazeDirectories directories) throws InterruptedException { - DirectoryTreeDigestValue value = - (DirectoryTreeDigestValue) env.getValue(getSkyKey(directories)); - if (value == null) { - return UNDECIDED; - } - if (!oldValue.equals(value.hexDigest())) { - return Optional.of("directory tree at %s changed".formatted(path)); + var skyKey = getSkyKey(directories); + try { + var directoryTreeDigestValue = + (DirectoryTreeDigestValue) env.getValueOrThrow(skyKey, IOException.class); + if (directoryTreeDigestValue == null) { + return MaybeValue.VALUES_MISSING; + } + return new MaybeValue.Valid(directoryTreeDigestValue.hexDigest()); + } catch (IOException e) { + return new MaybeValue.Invalid( + "failed to digest directory tree at %s: %s".formatted(path, e.getMessage())); } - return Optional.empty(); + } + + @Override + public String describeChange(String oldValue, String newValue) { + return "directory tree at %s changed".formatted(path); } } @@ -593,7 +721,7 @@ public static ImmutableMap> wrap( .collect(toImmutableMap(e -> new EnvVar(e.getKey()), Map.Entry::getValue)); } - private EnvVar(String name) { + public EnvVar(String name) { this.name = name; } @@ -625,27 +753,33 @@ public String toStringInternal() { @Override public SkyKey getSkyKey(BlazeDirectories directories) { - return ActionEnvironmentFunction.key(name); + return RepoEnvironmentFunction.key(name); } @Override - public Optional isOutdated( - Environment env, BlazeDirectories directories, @Nullable String oldValue) + protected boolean canBeRequestedUnconditionally() { + // Environment variables are static data injected into Skyframe, so there is no risk of + // cycles. + return true; + } + + @Override + public MaybeValue getValue(Environment env, BlazeDirectories directories) throws InterruptedException { - String v = PrecomputedValue.REPO_ENV.get(env).get(name); - if (v == null) { - v = ((ClientEnvironmentValue) env.getValue(getSkyKey(directories))).getValue(); - } - // Note that `oldValue` can be null if the env var was not set. - if (!Objects.equals(oldValue, v)) { - return Optional.of( - "environment variable %s changed: %s -> %s" - .formatted( - name, - oldValue == null ? "" : "'%s'".formatted(oldValue), - v == null ? "" : "'%s'".formatted(v))); + var value = (EnvironmentVariableValue) env.getValue(getSkyKey(directories)); + if (value == null) { + return MaybeValue.VALUES_MISSING; } - return Optional.empty(); + return new MaybeValue.Valid(value.value()); + } + + @Override + public String describeChange(String oldValue, String newValue) { + return "environment variable %s changed: %s -> %s" + .formatted( + name, + oldValue == null ? "" : "'%s'".formatted(oldValue), + newValue == null ? "" : "'%s'".formatted(newValue)); } } @@ -720,32 +854,32 @@ public SkyKey getSkyKey(BlazeDirectories directories) { } @Override - public Optional isOutdated( - Environment env, BlazeDirectories directories, @Nullable String oldValue) + protected boolean canBeRequestedUnconditionally() { + // Starlark can only request the mapping of the repo it is currently executing from, which + // means that the repo has already been fetched (either to execute the code or to verify the + // transitive .bzl hash). Further cycles aren't possible. + return true; + } + + @Override + public MaybeValue getValue(Environment env, BlazeDirectories directories) throws InterruptedException { - RepositoryMappingValue repoMappingValue = - (RepositoryMappingValue) env.getValue(getSkyKey(directories)); + var repoMappingValue = (RepositoryMappingValue) env.getValue(getSkyKey(directories)); if (Objects.equals(repoMappingValue, RepositoryMappingValue.NOT_FOUND_VALUE)) { - return Optional.of("source repo %s doesn't exist anymore".formatted(sourceRepo)); - } - RepositoryName oldCanonicalName; - try { - oldCanonicalName = RepositoryName.create(oldValue); - } catch (LabelSyntaxException e) { - // malformed old value causes refetch - return Optional.of("invalid recorded repo name: %s".formatted(e.getMessage())); - } - RepositoryName newCanonicalName = repoMappingValue.repositoryMapping().get(apparentName); - if (!oldCanonicalName.equals(newCanonicalName)) { - return Optional.of( - "canonical name for @%s in %s changed: %s -> %s" - .formatted( - apparentName, - sourceRepo, - oldCanonicalName, - newCanonicalName == null ? "" : newCanonicalName)); + return new MaybeValue.Invalid("source repo %s doesn't exist anymore".formatted(sourceRepo)); } - return Optional.empty(); + RepositoryName canonicalName = repoMappingValue.repositoryMapping().get(apparentName); + return new MaybeValue.Valid(canonicalName != null ? canonicalName.getName() : null); + } + + @Override + public String describeChange(String oldValue, String newValue) { + return "canonical name for @%s in %s changed: %s -> %s" + .formatted( + apparentName, + sourceRepo, + oldValue == null ? "" : oldValue, + newValue == null ? "" : newValue); } } @@ -788,9 +922,19 @@ public SkyKey getSkyKey(BlazeDirectories directories) { } @Override - public Optional isOutdated( - Environment env, BlazeDirectories directories, @Nullable String oldValue) { - return Optional.of(reason); + protected boolean canBeRequestedUnconditionally() { + return true; + } + + @Override + public MaybeValue getValue(Environment env, BlazeDirectories directories) { + return new MaybeValue.Invalid(reason); + } + + @Override + public String describeChange(String oldValue, String newValue) { + throw new UnsupportedOperationException( + "the value for this sentinel input is always invalid"); } } } diff --git a/src/main/java/com/google/devtools/build/lib/rules/repository/RepositoryDelegatorFunction.java b/src/main/java/com/google/devtools/build/lib/rules/repository/RepositoryDelegatorFunction.java index bd8a1003028172..22fec8e4a3a0ff 100644 --- a/src/main/java/com/google/devtools/build/lib/rules/repository/RepositoryDelegatorFunction.java +++ b/src/main/java/com/google/devtools/build/lib/rules/repository/RepositoryDelegatorFunction.java @@ -267,8 +267,13 @@ public SkyValue compute(SkyKey skyKey, Environment env) if (remoteRepoContentsCache != null) { try { - if (remoteRepoContentsCache.lookupCache( - repositoryName, repoRoot, digestWriter.predeclaredInputHash, env.getListener())) { + boolean cacheHit = + remoteRepoContentsCache.lookupCache( + repositoryName, repoRoot, digestWriter.predeclaredInputHash, env); + if (env.valuesMissing()) { + return null; + } + if (cacheHit) { return new RepositoryDirectoryValue.Success( repoRoot, /* isFetchingDelayed= */ false, excludeRepoFromVendoring); } @@ -298,11 +303,19 @@ public SkyValue compute(SkyKey skyKey, Environment env) } digestWriter.writeMarkerFile(result.recordedInputValues()); if (result.reproducible() == Reproducibility.YES && !handler.isLocal(rule)) { + if (remoteRepoContentsCache != null) { + remoteRepoContentsCache.addToCache( + repositoryName, + repoRoot, + digestWriter.markerPath, + digestWriter.predeclaredInputHash, + env.getListener()); + } if (repoContentsCache.isEnabled()) { // This repo is eligible for the repo contents cache. - Path cachedRepoDir; + CandidateRepo newCacheEntry; try { - cachedRepoDir = + newCacheEntry = repoContentsCache.moveToCache( repoRoot, digestWriter.markerPath, digestWriter.predeclaredInputHash); } catch (IOException e) { @@ -313,6 +326,7 @@ public SkyValue compute(SkyKey skyKey, Environment env) e), Transience.TRANSIENT); } + Path cachedRepoDir = newCacheEntry.contentsDir(); // Don't forget to register a FileStateValue on the cache repo dir, so that we know to // refetch if the cache entry gets GC'd from under us or the entire cache is deleted. // @@ -348,14 +362,6 @@ public SkyValue compute(SkyKey skyKey, Environment env) throw new IllegalStateException( "FileStateValue unexpectedly present for " + cachedRepoDir); } - if (remoteRepoContentsCache != null) { - remoteRepoContentsCache.addToCache( - repositoryName, - repoRoot, - digestWriter.markerPath, - digestWriter.predeclaredInputHash, - env.getListener()); - } } return new RepositoryDirectoryValue.Success( repoRoot, /* isFetchingDelayed= */ false, excludeRepoFromVendoring); diff --git a/src/main/java/com/google/devtools/build/lib/rules/repository/RepositoryFunction.java b/src/main/java/com/google/devtools/build/lib/rules/repository/RepositoryFunction.java index 4471325f4649ac..4d6657c9178a43 100644 --- a/src/main/java/com/google/devtools/build/lib/rules/repository/RepositoryFunction.java +++ b/src/main/java/com/google/devtools/build/lib/rules/repository/RepositoryFunction.java @@ -239,12 +239,23 @@ public Optional isAnyRecordedInputOutdated( Map recordedInputValues, Environment env) throws InterruptedException { - return RepoRecordedInput.isAnyValueOutdated( - env, - directories, + var withValues = recordedInputValues.entrySet().stream() .map(e -> new RepoRecordedInput.WithValue(e.getKey(), e.getValue())) - .collect(com.google.common.collect.ImmutableList.toImmutableList())); + .collect(com.google.common.collect.ImmutableList.toImmutableList()); + // Check inputs in batches to prevent Skyframe cycles caused by outdated dependencies: a batch + // ends right before any input that may cause a cycle if requested while earlier inputs are out + // of date. A later batch's deps are only requested after the earlier batches have been + // confirmed up to date across Skyframe restarts (isAnyValueOutdated returns a non-empty reason + // when its batch's values are still missing, short-circuiting this loop). + for (var batch : RepoRecordedInput.WithValue.splitIntoBatches(withValues)) { + Optional outdatedReason = + RepoRecordedInput.isAnyValueOutdated(env, directories, batch); + if (outdatedReason.isPresent()) { + return outdatedReason; + } + } + return Optional.empty(); } public static RootedPath getRootedPathFromLabel(Label label, Environment env) diff --git a/src/main/java/com/google/devtools/build/lib/runtime/RemoteRepoContentsCache.java b/src/main/java/com/google/devtools/build/lib/runtime/RemoteRepoContentsCache.java index 8c4f0f25d1cf41..663c2472efdc07 100644 --- a/src/main/java/com/google/devtools/build/lib/runtime/RemoteRepoContentsCache.java +++ b/src/main/java/com/google/devtools/build/lib/runtime/RemoteRepoContentsCache.java @@ -17,6 +17,7 @@ import com.google.devtools.build.lib.cmdline.RepositoryName; import com.google.devtools.build.lib.events.ExtendedEventHandler; import com.google.devtools.build.lib.vfs.Path; +import com.google.devtools.build.skyframe.SkyFunction; import java.io.IOException; /** A remote cache for the contents of external repositories. */ @@ -33,6 +34,8 @@ void addToCache( /** * Retrieves a repository from the remote cache if possible. * + *

Callers have to check {@code env.valuesMissing()} after this method returns. + * * @return true if there was a cache hit and the repository has been fetched into the given * directory. */ @@ -40,6 +43,6 @@ boolean lookupCache( RepositoryName repoName, Path repoDir, String predeclaredInputHash, - ExtendedEventHandler reporter) + SkyFunction.Environment env) throws IOException, InterruptedException; } diff --git a/src/main/java/com/google/devtools/build/lib/skyframe/BUILD b/src/main/java/com/google/devtools/build/lib/skyframe/BUILD index 7a741b87cbf93b..4100aa78ef4df4 100644 --- a/src/main/java/com/google/devtools/build/lib/skyframe/BUILD +++ b/src/main/java/com/google/devtools/build/lib/skyframe/BUILD @@ -110,6 +110,7 @@ java_library( ":cached_bzl_load_value_and_deps_builder_factory", ":client_environment_function", ":client_environment_value", + ":environment_variable_value", ":collect_packages_under_directory_function", ":collect_packages_under_directory_value", ":collect_targets_in_package_function", @@ -173,6 +174,7 @@ java_library( ":recursive_package_provider_backed_target_pattern_resolver", ":recursive_pkg_function", ":recursive_pkg_value", + ":repo_environment_function", ":repo_file_function", ":repo_package_args_function", ":repository_mapping_function", @@ -1025,6 +1027,16 @@ java_library( ], ) +java_library( + name = "environment_variable_value", + srcs = ["EnvironmentVariableValue.java"], + deps = [ + "//src/main/java/com/google/devtools/build/lib/skyframe/serialization/autocodec", + "//src/main/java/com/google/devtools/build/skyframe:skyframe-objects", + "//third_party:jsr305", + ], +) + java_library( name = "collect_packages_under_directory_function", srcs = ["CollectPackagesUnderDirectoryFunction.java"], @@ -2335,6 +2347,23 @@ java_library( ], ) +java_library( + name = "repo_environment_function", + srcs = ["RepoEnvironmentFunction.java"], + deps = [ + ":client_environment_function", + ":client_environment_value", + ":environment_variable_value", + ":precomputed_value", + ":sky_functions", + "//src/main/java/com/google/devtools/build/lib/skyframe/serialization:visible-for-serialization", + "//src/main/java/com/google/devtools/build/lib/skyframe/serialization/autocodec", + "//src/main/java/com/google/devtools/build/skyframe:skyframe-objects", + "//third_party:guava", + "//third_party:jsr305", + ], +) + java_library( name = "repo_file_function", srcs = ["RepoFileFunction.java"], @@ -2537,6 +2566,7 @@ java_library( "//src/main/java/com/google/devtools/build/lib/analysis:analysis_cluster", "//src/main/java/com/google/devtools/build/lib/analysis:constraints/top_level_constraint_semantics", "//src/main/java/com/google/devtools/build/lib/analysis:view_creation_failed_exception", + "//src/main/java/com/google/devtools/build/lib/bazel/bzlmod:exception", "//src/main/java/com/google/devtools/build/lib/bugreport", "//src/main/java/com/google/devtools/build/lib/buildeventstream", "//src/main/java/com/google/devtools/build/lib/causes", diff --git a/src/main/java/com/google/devtools/build/lib/skyframe/CompletionFunction.java b/src/main/java/com/google/devtools/build/lib/skyframe/CompletionFunction.java index 00befa94bf0a73..467b915a2e592c 100644 --- a/src/main/java/com/google/devtools/build/lib/skyframe/CompletionFunction.java +++ b/src/main/java/com/google/devtools/build/lib/skyframe/CompletionFunction.java @@ -386,13 +386,6 @@ public SkyValue compute(SkyKey skyKey, Environment env) private void ensureToplevelArtifacts( Environment env, ImmutableCollection importantArtifacts, ActionInputMap inputMap) throws CompletionFunctionException, InterruptedException { - // For skymeld, a non-toplevel target might become a toplevel after it has been executed. This - // is the last chance to download the missing toplevel outputs in this case before sending out - // TargetCompleteEvent. See https://github.com/bazelbuild/bazel/issues/20737. - if (!isSkymeld.get()) { - return; - } - var actionInputPrefetcher = skyframeActionExecutor.getActionInputPrefetcher(); if (actionInputPrefetcher == null || actionInputPrefetcher == ActionInputPrefetcher.NONE) { return; @@ -444,6 +437,33 @@ private void downloadArtifact( List> futures) throws InterruptedException { if (!(artifact instanceof DerivedArtifact derivedArtifact)) { + // Source artifacts from external repos (e.g. remote repo contents cache hits) have no + // generating action and are thus never downloaded by finalizeAction, so materialize the + // toplevel ones here so they honor --remote_download_outputs like build outputs do. Source + // artifacts in the main repo are always local. + if (artifact.getOwner() != null && !artifact.getOwner().getRepository().isMain()) { + var metadata = inputMap.getInputMetadata(artifact); + if (metadata != null + && metadata.isRemote() + && remoteArtifactChecker.shouldDownloadOutput( + artifact, (RemoteFileArtifactValue) metadata)) { + futures.add( + actionInputPrefetcher.prefetchFiles( + /* action= */ null, + ImmutableList.of(artifact), + inputMap::getInputMetadata, + Priority.LOW, + Reason.OUTPUTS)); + } + } + return; + } + + // Derived toplevel outputs are downloaded eagerly by finalizeAction during execution. For + // skymeld, a non-toplevel target might only become toplevel after it has been executed, so this + // is the last chance to download the missing toplevel outputs in that case before sending out + // TargetCompleteEvent. See https://github.com/bazelbuild/bazel/issues/20737. + if (!isSkymeld.get()) { return; } diff --git a/src/main/java/com/google/devtools/build/lib/skyframe/DirtinessCheckerUtils.java b/src/main/java/com/google/devtools/build/lib/skyframe/DirtinessCheckerUtils.java index fd68caaf19fb8b..c6f310b42a7547 100644 --- a/src/main/java/com/google/devtools/build/lib/skyframe/DirtinessCheckerUtils.java +++ b/src/main/java/com/google/devtools/build/lib/skyframe/DirtinessCheckerUtils.java @@ -165,8 +165,10 @@ private static boolean isCacheableType(FileType fileType) { return true; case EXTERNAL_REPO: case OUTPUT: - case REPO_CONTENTS_CACHE_DIRS: return false; + case REPO_CONTENTS_CACHE_DIRS: + throw new IllegalStateException( + "Repo contents cache dirs are not expected to be checked for dirtiness"); } throw new AssertionError("Unknown type " + fileType); } diff --git a/src/main/java/com/google/devtools/build/lib/skyframe/EnvironmentVariableValue.java b/src/main/java/com/google/devtools/build/lib/skyframe/EnvironmentVariableValue.java new file mode 100644 index 00000000000000..eea16f29607648 --- /dev/null +++ b/src/main/java/com/google/devtools/build/lib/skyframe/EnvironmentVariableValue.java @@ -0,0 +1,28 @@ +// Copyright 2026 The Bazel Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package com.google.devtools.build.lib.skyframe; + +import com.google.devtools.build.lib.skyframe.serialization.autocodec.AutoCodec; +import com.google.devtools.build.skyframe.SkyValue; +import javax.annotation.Nullable; + +/** + * The value of an environmental variable from an environment (client env, action env or repository + * env). These are invalidated and injected by {@link SequencedSkyframeExecutor}. + * + * @param value the value in the client environment or null if unset in the environment. + */ +@AutoCodec +public record EnvironmentVariableValue(@Nullable String value) implements SkyValue {} diff --git a/src/main/java/com/google/devtools/build/lib/skyframe/ExternalFilesHelper.java b/src/main/java/com/google/devtools/build/lib/skyframe/ExternalFilesHelper.java index e3d59041344bda..b75e78cc79a8f7 100644 --- a/src/main/java/com/google/devtools/build/lib/skyframe/ExternalFilesHelper.java +++ b/src/main/java/com/google/devtools/build/lib/skyframe/ExternalFilesHelper.java @@ -180,14 +180,16 @@ public enum FileType { /** * The directory containing the repo contents cache entries as well as direct children * corresponding to individual predeclared input hashes. These directories are created by Bazel - * but may be deleted when users delete the entire repo contents cache. + * but may be deleted when users delete the entire repo contents cache. However, they are always + * recreated by Bazel before they are used and/or depended on via Skyframe. They are thus + * immutably present from the perspective of Skyframe and don't require invalidation. * - *

These files' handling differs from EXTERNAL_REPO as they are never modified after they are - * created and don't live under the external directory, as well as from EXTERNAL as they - * can be recreated by Bazel after diff detection. - * - *

The contents of these directories are considered EXTERNAL as they carry UUID names - * and are thus never reused. + *

Note: If these directories ever need to be checked for dirtiness during diffing, they have + * to be made non-cacheable according to {@link + * DirtinessCheckerUtils.ExternalDirtinessChecker#isCacheableType} so that they are not locked + * in as non-existent if they have been removed. This would result in FileValues for files below + * them (the actual repo contents, of type EXTERNAL) being locked in as non-existent too, + * even after a refetch of the repo has added a new cache entry. */ REPO_CONTENTS_CACHE_DIRS, } diff --git a/src/main/java/com/google/devtools/build/lib/skyframe/RepoEnvironmentFunction.java b/src/main/java/com/google/devtools/build/lib/skyframe/RepoEnvironmentFunction.java new file mode 100644 index 00000000000000..ffddeed475ab4a --- /dev/null +++ b/src/main/java/com/google/devtools/build/lib/skyframe/RepoEnvironmentFunction.java @@ -0,0 +1,112 @@ +// Copyright 2026 The Bazel Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package com.google.devtools.build.lib.skyframe; + +import com.google.common.collect.Collections2; +import com.google.common.collect.ImmutableSortedMap; +import com.google.devtools.build.lib.skyframe.serialization.VisibleForSerialization; +import com.google.devtools.build.lib.skyframe.serialization.autocodec.AutoCodec; +import com.google.devtools.build.skyframe.AbstractSkyKey; +import com.google.devtools.build.skyframe.SkyFunction; +import com.google.devtools.build.skyframe.SkyFunctionName; +import com.google.devtools.build.skyframe.SkyKey; +import com.google.devtools.build.skyframe.SkyValue; +import com.google.devtools.build.skyframe.SkyframeLookupResult; +import java.util.Map; +import java.util.Optional; +import java.util.Set; +import javax.annotation.Nullable; + +/** + * Skyframe function that provides the effective value for an environment variable in the context of + * repository rules and module extensions. This checks {@code --repo_env} overrides first, then falls + * back to the client environment. + */ +public final class RepoEnvironmentFunction implements SkyFunction { + @Override + public SkyValue compute(SkyKey skyKey, Environment env) throws InterruptedException { + Map repoEnv = PrecomputedValue.REPO_ENV.get(env); + String key = (String) skyKey.argument(); + if (repoEnv.containsKey(key)) { + return new EnvironmentVariableValue(repoEnv.get(key)); + } + // Fall back to the client environment. + ClientEnvironmentValue clientValue = + (ClientEnvironmentValue) env.getValue(ClientEnvironmentFunction.key(key)); + if (clientValue == null) { + return null; + } + return new EnvironmentVariableValue(clientValue.getValue()); + } + + /** Returns the SkyKey to invoke this function for the environment variable {@code variable}. */ + public static Key key(String variable) { + return Key.create(variable); + } + + @VisibleForSerialization + @AutoCodec + static class Key extends AbstractSkyKey { + private static final SkyKeyInterner interner = SkyKey.newInterner(); + + private Key(String arg) { + super(arg); + } + + private static Key create(String arg) { + return interner.intern(new Key(arg)); + } + + @VisibleForSerialization + @AutoCodec.Interner + static Key intern(Key key) { + return interner.intern(key); + } + + @Override + public SkyFunctionName functionName() { + return SkyFunctions.REPOSITORY_ENVIRONMENT_VARIABLE; + } + + @Override + public SkyKeyInterner getSkyKeyInterner() { + return interner; + } + } + + /** + * Returns a map of environment variable key => values, getting them from Skyframe. Returns null + * if and only if some dependencies from Skyframe still need to be resolved. + */ + @Nullable + public static ImmutableSortedMap> getEnvironmentView( + Environment env, Set keys) throws InterruptedException { + var skyKeys = Collections2.transform(keys, RepoEnvironmentFunction::key); + SkyframeLookupResult values = env.getValuesAndExceptions(skyKeys); + if (env.valuesMissing()) { + return null; + } + + var result = ImmutableSortedMap.>naturalOrder(); + for (var key : skyKeys) { + var value = (EnvironmentVariableValue) values.get(key); + if (value == null) { + return null; + } + result.put(key.argument().toString(), Optional.ofNullable(value.value())); + } + return result.buildOrThrow(); + } +} diff --git a/src/main/java/com/google/devtools/build/lib/skyframe/SkyFunctions.java b/src/main/java/com/google/devtools/build/lib/skyframe/SkyFunctions.java index 5a06e025aa420c..b7fc0c162a8f9e 100644 --- a/src/main/java/com/google/devtools/build/lib/skyframe/SkyFunctions.java +++ b/src/main/java/com/google/devtools/build/lib/skyframe/SkyFunctions.java @@ -26,6 +26,8 @@ public final class SkyFunctions { static final SkyFunctionName ACTION_SKETCH = SkyFunctionName.createHermetic("ACTION_SKETCH"); public static final SkyFunctionName ACTION_ENVIRONMENT_VARIABLE = SkyFunctionName.createHermetic("ACTION_ENVIRONMENT_VARIABLE"); + public static final SkyFunctionName REPOSITORY_ENVIRONMENT_VARIABLE = + SkyFunctionName.createHermetic("REPOSITORY_ENVIRONMENT_VARIABLE"); public static final SkyFunctionName DIRECTORY_LISTING_STATE = SkyFunctionName.createNonHermetic("DIRECTORY_LISTING_STATE"); public static final SkyFunctionName DIRECTORY_LISTING = diff --git a/src/main/java/com/google/devtools/build/lib/skyframe/SkyframeErrorProcessor.java b/src/main/java/com/google/devtools/build/lib/skyframe/SkyframeErrorProcessor.java index 84284a27743514..9c8502a561fd89 100644 --- a/src/main/java/com/google/devtools/build/lib/skyframe/SkyframeErrorProcessor.java +++ b/src/main/java/com/google/devtools/build/lib/skyframe/SkyframeErrorProcessor.java @@ -38,6 +38,7 @@ import com.google.devtools.build.lib.actions.TestExecException; import com.google.devtools.build.lib.actions.TopLevelOutputException; import com.google.devtools.build.lib.analysis.AnalysisFailureEvent; +import com.google.devtools.build.lib.bazel.bzlmod.ExternalDepsException; import com.google.devtools.build.lib.analysis.ViewCreationFailedException; import com.google.devtools.build.lib.analysis.constraints.TopLevelConstraintSemantics.TargetCompatibilityCheckException; import com.google.devtools.build.lib.bugreport.BugReport; @@ -540,6 +541,13 @@ && isExecutionCycle(errorInfo.getCycleInfo())) { configurationIdMessage(ctKey.getConfigurationKey()), ((NoSuchThingException) exception).getDetailedExitCode()); analysisRootCauses = NestedSetBuilder.create(Order.STABLE_ORDER, analysisFailedCause); + } else if (exception instanceof ExternalDepsException externalDepsException) { + AnalysisFailedCause analysisFailedCause = + new AnalysisFailedCause( + topLevelLabel, + configurationIdMessage(ctKey.getConfigurationKey()), + externalDepsException.getDetailedExitCode()); + analysisRootCauses = NestedSetBuilder.create(Order.STABLE_ORDER, analysisFailedCause); } else if (exception instanceof TargetCompatibilityCheckException) { analysisRootCauses = NestedSetBuilder.emptySet(Order.STABLE_ORDER); } else if (isExecutionException(exception)) { @@ -779,7 +787,8 @@ private static DetailedException convertToAnalysisException(Throwable cause) { // analyze with --nokeep_going. if (cause instanceof SaneAnalysisException || cause instanceof NoSuchTargetException - || cause instanceof NoSuchPackageException) { + || cause instanceof NoSuchPackageException + || cause instanceof ExternalDepsException) { return (DetailedException) cause; } return null; diff --git a/src/main/java/com/google/devtools/build/lib/skyframe/SkyframeExecutor.java b/src/main/java/com/google/devtools/build/lib/skyframe/SkyframeExecutor.java index 0eb6797f0b275a..79c92ea8506b92 100644 --- a/src/main/java/com/google/devtools/build/lib/skyframe/SkyframeExecutor.java +++ b/src/main/java/com/google/devtools/build/lib/skyframe/SkyframeExecutor.java @@ -640,6 +640,7 @@ private ImmutableMap skyFunctions() { map.put(SkyFunctions.PRECOMPUTED, new PrecomputedFunction()); map.put(SkyFunctions.CLIENT_ENVIRONMENT_VARIABLE, new ClientEnvironmentFunction(clientEnv)); map.put(SkyFunctions.ACTION_ENVIRONMENT_VARIABLE, new ActionEnvironmentFunction()); + map.put(SkyFunctions.REPOSITORY_ENVIRONMENT_VARIABLE, new RepoEnvironmentFunction()); map.put(FileStateKey.FILE_STATE, newFileStateFunction()); map.put(SkyFunctions.DIRECTORY_LISTING_STATE, newDirectoryListingStateFunction()); map.put(FileSymlinkCycleUniquenessFunction.NAME, new FileSymlinkCycleUniquenessFunction()); @@ -3469,6 +3470,8 @@ protected void handleDiffsWithMissingDiffInformation( fileTypesToCheck.add(FileType.OUTPUT); } if (!fileTypesToCheck.isEmpty()) { + // FileType.REPO_CONTENTS_CACHE_DIRS is intentionally never checked here. See the comment on + // that enum constant for details. dirtinessCheckers = Iterables.concat( dirtinessCheckers, diff --git a/src/main/java/com/google/devtools/build/lib/skyframe/packages/BUILD b/src/main/java/com/google/devtools/build/lib/skyframe/packages/BUILD index a86993c2995fba..5bb1b2b50084a8 100644 --- a/src/main/java/com/google/devtools/build/lib/skyframe/packages/BUILD +++ b/src/main/java/com/google/devtools/build/lib/skyframe/packages/BUILD @@ -102,6 +102,7 @@ java_library( "//src/main/java/com/google/devtools/build/lib/rules:repository/repository_function", "//src/main/java/com/google/devtools/build/lib/skyframe:action_environment_function", "//src/main/java/com/google/devtools/build/lib/skyframe:client_environment_function", + "//src/main/java/com/google/devtools/build/lib/skyframe:repo_environment_function", "//src/main/java/com/google/devtools/build/lib/skyframe:directory_listing_function", "//src/main/java/com/google/devtools/build/lib/skyframe:package_lookup_function", "//src/main/java/com/google/devtools/build/lib/skyframe:precomputed_value", diff --git a/src/main/java/com/google/devtools/build/lib/skyframe/packages/BazelPackageLoader.java b/src/main/java/com/google/devtools/build/lib/skyframe/packages/BazelPackageLoader.java index 4bf99d967d5a91..da5bd0113f6daa 100644 --- a/src/main/java/com/google/devtools/build/lib/skyframe/packages/BazelPackageLoader.java +++ b/src/main/java/com/google/devtools/build/lib/skyframe/packages/BazelPackageLoader.java @@ -48,6 +48,7 @@ import com.google.devtools.build.lib.skyframe.PackageFunction.ActionOnIOExceptionReadingBuildFile; import com.google.devtools.build.lib.skyframe.PackageLookupFunction.CrossRepositoryLabelViolationStrategy; import com.google.devtools.build.lib.skyframe.PrecomputedValue; +import com.google.devtools.build.lib.skyframe.RepoEnvironmentFunction; import com.google.devtools.build.lib.skyframe.RepositoryMappingFunction; import com.google.devtools.build.lib.skyframe.SkyFunctions; import com.google.devtools.build.lib.vfs.Path; @@ -198,6 +199,7 @@ public void handle(Event event) {} SkyFunctions.DIRECTORY_LISTING_STATE, new DirectoryListingStateFunction(externalFilesHelper, SyscallCache.NO_CACHE)) .put(SkyFunctions.ACTION_ENVIRONMENT_VARIABLE, new ActionEnvironmentFunction()) + .put(SkyFunctions.REPOSITORY_ENVIRONMENT_VARIABLE, new RepoEnvironmentFunction()) .put(SkyFunctions.DIRECTORY_LISTING, new DirectoryListingFunction()) .put( SkyFunctions.LOCAL_REPOSITORY_LOOKUP, diff --git a/src/test/java/com/google/devtools/build/lib/rules/repository/RepoRecordedInputTest.java b/src/test/java/com/google/devtools/build/lib/rules/repository/RepoRecordedInputTest.java index dae6be132a65a1..ea928c53d6115f 100644 --- a/src/test/java/com/google/devtools/build/lib/rules/repository/RepoRecordedInputTest.java +++ b/src/test/java/com/google/devtools/build/lib/rules/repository/RepoRecordedInputTest.java @@ -15,8 +15,11 @@ package com.google.devtools.build.lib.rules.repository; import static com.google.common.truth.Truth.assertThat; +import static com.google.devtools.build.lib.rules.repository.RepoRecordedInput.WithValue.parse; +import static com.google.devtools.build.lib.rules.repository.RepoRecordedInput.WithValue.splitIntoBatches; import static org.mockito.Mockito.when; +import com.google.common.collect.ImmutableList; import com.google.common.io.BaseEncoding; import com.google.devtools.build.lib.actions.FileContentsProxy; import com.google.devtools.build.lib.actions.FileStateValue.RegularFileStateValueWithContentsProxy; @@ -35,8 +38,8 @@ @RunWith(JUnit4.class) public class RepoRecordedInputTest extends BuildViewTestCase { private static void assertMarkerFileEscaping(String testCase) { - String escaped = RepoRecordedInput.WithValue.escape(testCase); - assertThat(RepoRecordedInput.WithValue.unescape(escaped)).isEqualTo(testCase); + String escaped = RepoRecordedInput.escape(testCase); + assertThat(RepoRecordedInput.unescape(escaped)).isEqualTo(testCase); } @Test @@ -70,4 +73,22 @@ public void testFileValueToMarkerValue() throws Exception { String expectedDigest = BaseEncoding.base16().lowerCase().encode(path.asPath().getDigest()); assertThat(RepoRecordedInput.File.fileValueToMarkerValue(path, fv)).isEqualTo(expectedDigest); } + + @Test + public void testSplitIntoBatches() { + assertThat(splitIntoBatches(ImmutableList.of())).isEmpty(); + assertThat( + splitIntoBatches( + ImmutableList.of( + parse("FILE:@@//foo:bar abc").orElseThrow(), + parse("FILE:@@//:baz cba").orElseThrow(), + parse("FILE:@@foo//:baz bac").orElseThrow(), + parse("ENV:KEY value").orElseThrow()))) + .containsExactly( + ImmutableList.of( + parse("FILE:@@//foo:bar abc").orElseThrow(), + parse("FILE:@@//:baz cba").orElseThrow()), + ImmutableList.of( + parse("FILE:@@foo//:baz bac").orElseThrow(), parse("ENV:KEY value").orElseThrow())); + } } diff --git a/src/test/py/bazel/bzlmod/bazel_module_test.py b/src/test/py/bazel/bzlmod/bazel_module_test.py index ed092907c07e8a..9aef1ec859e7c6 100644 --- a/src/test/py/bazel/bzlmod/bazel_module_test.py +++ b/src/test/py/bazel/bzlmod/bazel_module_test.py @@ -1540,6 +1540,96 @@ def testUnknownRegistryInModuleMirrors(self): ) self.assertIn('https://unknown-registry.example.com', stderr) + def testInvalidRepoRuleReferencedByTargetDoesNotCrash(self): + self.ScratchFile( + 'MODULE.bazel', + [ + 'repo = use_repo_rule("//:repo.bzl", "repo")', + 'repo(name = "my_repo")', + ], + ) + self.ScratchFile( + 'BUILD', + [ + 'genrule(', + ' name = "gen",', + ' srcs = ["@my_repo//:a.txt"],', + ' outs = ["out.txt"],', + ' cmd = "cat $(SRCS) > $(OUTS)",', + ')', + ], + ) + self.ScratchFile('repo.bzl', ['nonsense']) + + exit_code, _, stderr = self.RunBazel( + ['build', '//:gen'], + allow_failure=True, + ) + self.AssertNotExitCode(exit_code, 0, stderr) + stderr = '\n'.join(stderr) + self.assertNotIn('FATAL', stderr) + self.assertIn("compilation of module 'repo.bzl' failed", stderr) + + def testReverseDependencyDirection(self): + # Set up two module extensions that retain their predeclared input hashes + # across two builds but still reverse their dependency direction. Depending + # on how repo cache candidates are checked, this could lead to a Skyframe + # cycle. + self.ScratchFile( + 'MODULE.bazel', + [ + 'foo_ext = use_extension("//:repo.bzl", "foo_ext")', + 'use_repo(foo_ext, "foo")', + 'bar_ext = use_extension("//:repo.bzl", "bar_ext")', + 'use_repo(bar_ext, "bar")', + ], + ) + self.ScratchFile('BUILD.bazel') + self.ScratchFile( + 'repo.bzl', + [ + 'def _repo_impl(rctx):', + ' rctx.file("output.txt", rctx.attr.content)', + ' rctx.file("BUILD", "exports_files([\'output.txt\'])")', + ' print("JUST FETCHED: %s" % rctx.original_name)', + ' return rctx.repo_metadata(reproducible=True)', + 'repo = repository_rule(', + ' implementation = _repo_impl,', + ' attrs = {"content": attr.string()},', + ')', + 'def _foo_ext_impl(mctx):', + ' deps = mctx.read(Label("@//:foo_deps.txt")).splitlines()', + ' content = "foo"', + ' for dep in deps:', + ' if dep:', + ' content += " + " + mctx.read(Label(dep))', + ' repo(name = "foo", content = content)', + 'foo_ext = module_extension(implementation = _foo_ext_impl)', + 'def _bar_ext_impl(mctx):', + ' deps = mctx.read(Label("@//:bar_deps.txt")).splitlines()', + ' content = "bar"', + ' for dep in deps:', + ' if dep:', + ' content += " + " + mctx.read(Label(dep))', + ' repo(name = "bar", content = content)', + 'bar_ext = module_extension(implementation = _bar_ext_impl)', + ], + ) + + self.ScratchFile('foo_deps.txt', ['@bar//:output.txt']) + self.ScratchFile('bar_deps.txt') + + # First fetch: not cached + _, _, stderr = self.RunBazel(['build', '@foo//:output.txt']) + self.assertIn('JUST FETCHED: bar', '\n'.join(stderr)) + self.assertIn('JUST FETCHED: foo', '\n'.join(stderr)) + + # After expunging and reversing: cached + self.RunBazel(['clean', '--expunge']) + self.ScratchFile('foo_deps.txt') + self.ScratchFile('bar_deps.txt', ['@foo//:output.txt']) + self.RunBazel(['build', '@foo//:output.txt']) + if __name__ == '__main__': absltest.main() diff --git a/src/test/py/bazel/bzlmod/remote_repo_contents_cache_test.py b/src/test/py/bazel/bzlmod/remote_repo_contents_cache_test.py index 7ebce79de93ee8..64a0fe68ded180 100644 --- a/src/test/py/bazel/bzlmod/remote_repo_contents_cache_test.py +++ b/src/test/py/bazel/bzlmod/remote_repo_contents_cache_test.py @@ -18,6 +18,7 @@ import json import os import re +import tempfile from absl.testing import absltest from src.test.py.bazel import test_base @@ -99,6 +100,70 @@ def testCachedAfterCleanExpunge(self): self.assertIn('JUST FETCHED', '\n'.join(stderr)) self.assertTrue(os.path.exists(os.path.join(repo_dir, 'BUILD'))) + def testLocalRepoContentsCacheInteraction(self): + self.ScratchFile( + 'MODULE.bazel', + [ + 'repo = use_repo_rule("//:repo.bzl", "repo")', + 'repo(name = "my_repo")', + ], + ) + self.ScratchFile('BUILD.bazel') + self.ScratchFile( + 'repo.bzl', + [ + 'def _repo_impl(rctx):', + ' rctx.file("BUILD", "filegroup(name=\'haha\')")', + ' print("JUST FETCHED")', + ' return rctx.repo_metadata(reproducible=True)', + 'repo = repository_rule(_repo_impl)', + ], + ) + + repo_dir = self.RepoDir('my_repo') + + # First fetch: not cached + repo_contents_cache = tempfile.mkdtemp(dir=os.environ['TEST_TMPDIR']) + _, _, stderr = self.RunBazel([ + 'build', + '@my_repo//:haha', + '--repo_contents_cache=' + repo_contents_cache, + ]) + self.assertIn('JUST FETCHED', '\n'.join(stderr)) + self.assertTrue(os.path.exists(os.path.join(repo_dir, 'BUILD'))) + + # After expunging: cached, hits the local repo contents cache + self.RunBazel(['clean', '--expunge']) + _, _, stderr = self.RunBazel([ + 'build', + '@my_repo//:haha', + '--repo_contents_cache=' + repo_contents_cache, + ]) + self.assertNotIn('JUST FETCHED', '\n'.join(stderr)) + self.assertTrue(os.path.exists(os.path.join(repo_dir, 'BUILD'))) + + # After cleaning out local repo contents cache: cached, hits the remote + # cache + self.RunBazel(['clean', '--expunge']) + # Deleting the cache fails on Windows, so we just use a different directory. + _, _, stderr = self.RunBazel([ + 'build', + '@my_repo//:haha', + '--repo_contents_cache=' + repo_contents_cache + '2', + ]) + self.assertNotIn('JUST FETCHED', '\n'.join(stderr)) + self.assertFalse(os.path.exists(os.path.join(repo_dir, 'BUILD'))) + + # After expunging, without using any repo contents cache: not cached + self.RunBazel(['clean', '--expunge']) + _, _, stderr = self.RunBazel([ + '--noexperimental_remote_repo_contents_cache', + 'build', + '@my_repo//:haha', + ]) + self.assertIn('JUST FETCHED', '\n'.join(stderr)) + self.assertTrue(os.path.exists(os.path.join(repo_dir, 'BUILD'))) + def testNotCachedWhenPredeclaredInputsChange(self): self.ScratchFile( 'MODULE.bazel', @@ -186,14 +251,12 @@ def testNotCachedWhenRecordedInputsChange_dynamicDep(self): self.assertIn('JUST FETCHED', '\n'.join(stderr)) self.assertTrue(os.path.exists(os.path.join(repo_dir, 'BUILD'))) - # Change back to previous recorded inputs: not cached - # TODO: This is the current behavior, but it's not desired. Support for - # caching repos with dynamic deps should be added. + # Change back to previous recorded inputs: cached (even after expunging) self.RunBazel(['clean', '--expunge']) self.ScratchFile('data.txt', ['one']) _, _, stderr = self.RunBazel(['build', '@my_repo//:haha']) - self.assertIn('JUST FETCHED', '\n'.join(stderr)) - self.assertTrue(os.path.exists(os.path.join(repo_dir, 'BUILD'))) + self.assertNotIn('JUST FETCHED', '\n'.join(stderr)) + self.assertFalse(os.path.exists(os.path.join(repo_dir, 'BUILD'))) def testNotCachedWhenRecordedInputsChange_staticDep(self): self.ScratchFile( @@ -240,6 +303,211 @@ def testNotCachedWhenRecordedInputsChange_staticDep(self): self.assertNotIn('JUST FETCHED', '\n'.join(stderr)) self.assertFalse(os.path.exists(os.path.join(repo_dir, 'BUILD'))) + def testRecordedInputs_differentValues(self): + platform_file = self.ScratchFile('platform.txt') + + self.ScratchFile( + 'MODULE.bazel', + [ + ( + 'platform_dependent_repo =' + ' use_repo_rule("//:platform_dependent_repo.bzl",' + ' "platform_dependent_repo")' + ), + 'platform_dependent_repo(name = "platform_dependent_repo")', + 'repo = use_repo_rule("//:repo.bzl", "repo")', + 'repo(name = "my_repo")', + ], + ) + self.ScratchFile( + 'BUILD.bazel', + [ + 'genrule(', + ' name = "show_platform",', + ' outs = ["platform.txt"],', + ' cmd = "cat $(location @my_repo//:data.txt) > $@",', + ' srcs = ["@my_repo//:data.txt"],', + ')', + ], + ) + self.ScratchFile( + 'platform_dependent_repo.bzl', + [ + 'def _platform_dependent_repo_impl(rctx):', + ' rctx.file("BUILD")', + ' print("DETERMINING PLATFORM")', + ' platform = rctx.read(rctx.path("%s"))' + % platform_file.replace('\\', '\\\\'), + ' rctx.file("data.txt", platform)', + ( + 'platform_dependent_repo =' + ' repository_rule(_platform_dependent_repo_impl)' + ), + ], + ) + self.ScratchFile( + 'repo.bzl', + [ + 'def _repo_impl(rctx):', + ' rctx.file("BUILD", "exports_files([\'data.txt\'])")', + ( + ' platform =' + ' rctx.read(Label("@platform_dependent_repo//:data.txt"))' + ), + ' print("JUST FETCHED ON " + platform)', + ' rctx.file("data.txt", platform)', + ' return rctx.repo_metadata(reproducible=True)', + 'repo = repository_rule(_repo_impl)', + ], + ) + + repo_dir = self.RepoDir('my_repo') + + # First fetch on Linux: not cached + self.ScratchFile('platform.txt', ['Linux']) + _, _, stderr = self.RunBazel(['build', '//:show_platform']) + self.assertIn('DETERMINING PLATFORM', '\n'.join(stderr)) + self.assertIn('JUST FETCHED ON Linux', '\n'.join(stderr)) + self.assertTrue(os.path.exists(os.path.join(repo_dir, 'BUILD'))) + with open(self.Path('bazel-bin/platform.txt')) as f: + self.assertEqual(f.read().strip(), 'Linux') + + # First fetch on macOS: not cached + self.ScratchFile('platform.txt', ['macOS']) + _, _, stderr = self.RunBazel(['build', '//:show_platform']) + self.assertIn('DETERMINING PLATFORM', '\n'.join(stderr)) + self.assertIn('JUST FETCHED ON macOS', '\n'.join(stderr)) + self.assertTrue(os.path.exists(os.path.join(repo_dir, 'BUILD'))) + with open(self.Path('bazel-bin/platform.txt')) as f: + self.assertEqual(f.read().strip(), 'macOS') + + # Second fetch on Linux: cached + self.ScratchFile('platform.txt', ['Linux']) + _, _, stderr = self.RunBazel(['build', '//:show_platform']) + self.assertIn('DETERMINING PLATFORM', '\n'.join(stderr)) + self.assertNotIn('JUST FETCHED', '\n'.join(stderr)) + self.assertFalse(os.path.exists(os.path.join(repo_dir, 'BUILD'))) + with open(self.Path('bazel-bin/platform.txt')) as f: + self.assertEqual(f.read().strip(), 'Linux') + + # Second fetch on macOS: cached + self.ScratchFile('platform.txt', ['macOS']) + _, _, stderr = self.RunBazel(['build', '//:show_platform']) + self.assertIn('DETERMINING PLATFORM', '\n'.join(stderr)) + self.assertNotIn('JUST FETCHED', '\n'.join(stderr)) + self.assertFalse(os.path.exists(os.path.join(repo_dir, 'BUILD'))) + with open(self.Path('bazel-bin/platform.txt')) as f: + self.assertEqual(f.read().strip(), 'macOS') + + def testRecordedInputs_differentInputs(self): + platform_file = self.ScratchFile('platform.txt') + + self.ScratchFile( + 'MODULE.bazel', + [ + ( + 'platform_dependent_binary =' + ' use_repo_rule("//:platform_dependent_binary.bzl",' + ' "platform_dependent_binary")' + ), + 'platform_dependent_binary(name = "platform_dependent_binary")', + 'repo = use_repo_rule("//:repo.bzl", "repo")', + 'repo(name = "my_repo")', + ], + ) + self.ScratchFile( + 'BUILD.bazel', + [ + 'genrule(', + ' name = "show_data",', + ' outs = ["data.txt"],', + ' cmd = "cat $(location @my_repo//:data.txt) > $@",', + ' srcs = ["@my_repo//:data.txt"],', + ')', + ], + ) + self.ScratchFile( + 'platform_dependent_binary.bzl', + [ + 'def _platform_dependent_binary_impl(rctx):', + ' rctx.file("BUILD")', + ' platform = rctx.read(rctx.path("%s")).strip()' + % platform_file.replace('\\', '\\\\'), + ' print("DETERMINED PLATFORM (%s)" % platform)', + ' if platform == "Windows":', + ' rctx.file("binary.exe", "PE")', + ' else:', + ' rctx.file("binary.sh", "ELF")', + ( + 'platform_dependent_binary =' + ' repository_rule(_platform_dependent_binary_impl)' + ), + ], + ) + self.ScratchFile( + 'repo.bzl', + [ + 'def _repo_impl(rctx):', + ' rctx.file("BUILD", "exports_files([\'data.txt\'])")', + # Simulate a uname -s by reading from a file instead of using + # rctx.execute (more complex to mock) or rctx.os.name (which may be + # tracked as an input in the future, making this test vacuous). + ' platform = rctx.read(rctx.path("%s"), watch = "no").strip()' + % platform_file.replace('\\', '\\\\'), + ' ext = ".exe" if platform == "Windows" else ".sh"', + # Simulate rctx.execute with a watched binary. + ( + ' out = rctx.read(Label("@platform_dependent_binary//:binary"' + ' + ext))' + ), + ' rctx.file("data.txt", out)', + ' print("JUST FETCHED")', + ' return rctx.repo_metadata(reproducible=True)', + 'repo = repository_rule(_repo_impl)', + ], + ) + + repo_dir = self.RepoDir('my_repo') + + # First fetch on Linux: not cached + self.ScratchFile('platform.txt', ['Linux']) + _, _, stderr = self.RunBazel(['build', '//:show_data']) + self.assertIn('DETERMINED PLATFORM (Linux)', '\n'.join(stderr)) + self.assertIn('JUST FETCHED', '\n'.join(stderr)) + self.assertTrue(os.path.exists(os.path.join(repo_dir, 'BUILD'))) + with open(self.Path('bazel-bin/data.txt')) as f: + self.assertEqual(f.read().strip(), 'ELF') + + # First fetch on Windows: not cached + self.RunBazel(['clean', '--expunge']) + self.ScratchFile('platform.txt', ['Windows']) + _, _, stderr = self.RunBazel(['build', '//:show_data']) + self.assertIn('DETERMINED PLATFORM (Windows)', '\n'.join(stderr)) + self.assertIn('JUST FETCHED', '\n'.join(stderr)) + self.assertTrue(os.path.exists(os.path.join(repo_dir, 'BUILD'))) + with open(self.Path('bazel-bin/data.txt')) as f: + self.assertEqual(f.read().strip(), 'PE') + + # Second fetch on Linux: cached + self.RunBazel(['clean', '--expunge']) + self.ScratchFile('platform.txt', ['Linux']) + _, _, stderr = self.RunBazel(['build', '//:show_data']) + self.assertIn('DETERMINED PLATFORM (Linux)', '\n'.join(stderr)) + self.assertNotIn('JUST FETCHED', '\n'.join(stderr)) + self.assertFalse(os.path.exists(os.path.join(repo_dir, 'BUILD'))) + with open(self.Path('bazel-bin/data.txt')) as f: + self.assertEqual(f.read().strip(), 'ELF') + + # Second fetch on Windows: cached + self.RunBazel(['clean', '--expunge']) + self.ScratchFile('platform.txt', ['Windows']) + _, _, stderr = self.RunBazel(['build', '//:show_data']) + self.assertIn('DETERMINED PLATFORM (Windows)', '\n'.join(stderr)) + self.assertNotIn('JUST FETCHED', '\n'.join(stderr)) + self.assertFalse(os.path.exists(os.path.join(repo_dir, 'BUILD'))) + with open(self.Path('bazel-bin/data.txt')) as f: + self.assertEqual(f.read().strip(), 'PE') + def testNoThrashingBetweenWorkspaces(self): module_bazel_lines = [ 'repo = use_repo_rule("//:repo.bzl", "repo")', @@ -552,6 +820,16 @@ def testUseRepoFileInBuildRule_actionDoesNotUseCache_withExplicitSandboxBase( ) def testLostRemoteFile_build(self): + self._runLostRemoteFileBuildTest( + '--experimental_merged_skyframe_analysis_execution' + ) + + def testLostRemoteFile_build_noSkymeld(self): + self._runLostRemoteFileBuildTest( + '--noexperimental_merged_skyframe_analysis_execution' + ) + + def _runLostRemoteFileBuildTest(self, skymeld_flag): # Create a repo with two BUILD files (one in a subpackage), build a target # from one to cause it to be cached, then build that target again after # expunging to verify it is cached. @@ -588,7 +866,7 @@ def testLostRemoteFile_build(self): repo_dir = self.RepoDir('my_repo') # First fetch: not cached - _, _, stderr = self.RunBazel(['build', '@my_repo//:root']) + _, _, stderr = self.RunBazel(['build', skymeld_flag, '@my_repo//:root']) self.assertIn('JUST FETCHED', '\n'.join(stderr)) self.assertTrue(os.path.exists(os.path.join(repo_dir, 'BUILD'))) self.assertTrue(os.path.exists(os.path.join(repo_dir, 'root.txt'))) @@ -597,10 +875,10 @@ def testLostRemoteFile_build(self): # After expunging: cached self.RunBazel(['clean', '--expunge']) - _, _, stderr = self.RunBazel(['build', '@my_repo//:root']) + _, _, stderr = self.RunBazel(['build', skymeld_flag, '@my_repo//:root']) self.assertNotIn('JUST FETCHED', '\n'.join(stderr)) self.assertFalse(os.path.exists(os.path.join(repo_dir, 'BUILD'))) - self.assertFalse(os.path.exists(os.path.join(repo_dir, 'root.txt'))) + self.assertTrue(os.path.exists(os.path.join(repo_dir, 'root.txt'))) self.assertFalse(os.path.exists(os.path.join(repo_dir, 'sub/BUILD'))) self.assertFalse(os.path.exists(os.path.join(repo_dir, 'sub/sub.txt'))) @@ -610,7 +888,7 @@ def testLostRemoteFile_build(self): # Build the other target: fails due to the lost input # TODO: #26450 - Assert success and enable the checks below. _, _, stderr = self.RunBazel( - ['build', '@my_repo//sub:sub'], allow_failure=True + ['build', skymeld_flag, '@my_repo//sub:sub'], allow_failure=True ) self.assertEqual( 1, @@ -634,12 +912,12 @@ def testLostRemoteFile_build(self): # After expunging again: cached self.RunBazel(['clean', '--expunge']) - _, _, stderr = self.RunBazel(['build', '@my_repo//sub:sub']) + _, _, stderr = self.RunBazel(['build', skymeld_flag, '@my_repo//sub:sub']) self.assertNotIn('JUST FETCHED', '\n'.join(stderr)) self.assertFalse(os.path.exists(os.path.join(repo_dir, 'BUILD'))) self.assertFalse(os.path.exists(os.path.join(repo_dir, 'root.txt'))) self.assertFalse(os.path.exists(os.path.join(repo_dir, 'sub/BUILD'))) - self.assertFalse(os.path.exists(os.path.join(repo_dir, 'sub/sub.txt'))) + self.assertTrue(os.path.exists(os.path.join(repo_dir, 'sub/sub.txt'))) def testBzlFilePrefetching(self): self.ScratchFile( @@ -716,6 +994,91 @@ def testBzlFilePrefetching(self): os.path.exists(os.path.join(repo_dir, 'subdir/more_nested.bzl')) ) + def testRun(self): + self.ScratchFile( + 'MODULE.bazel', + [ + 'bazel_dep(name = "buildozer", version = "8.5.1")', + ], + ) + + # First fetch: not cached + _, stdout, _ = self.RunBazel(['run', '@buildozer', '--', '--version']) + self.assertIn('buildozer version: 8.5.1', '\n'.join(stdout)) + + # After expunging: cached + self.RunBazel(['clean', '--expunge']) + _, stdout, _ = self.RunBazel(['run', '@buildozer', '--', '--version']) + self.assertIn('buildozer version: 8.5.1', '\n'.join(stdout)) + repo_dir = self.RepoDir('buildozer') + self.assertFalse(os.path.exists(os.path.join(repo_dir, 'MODULE.bazel'))) + + def testReverseDependencyDirection(self): + # Set up two repos that retain their predeclared input hashes across two + # builds but still reverse their dependency direction. Depending on how repo + # cache candidates are checked, this could lead to a Skyframe cycle. + self.ScratchFile( + 'MODULE.bazel', + [ + 'repo = use_repo_rule("//:repo.bzl", "repo")', + 'repo(', + ' name = "foo",', + ' deps_file = "//:foo_deps.txt",', + ')', + 'repo(', + ' name = "bar",', + ' deps_file = "//:bar_deps.txt",', + ')', + ], + ) + self.ScratchFile('BUILD.bazel') + self.ScratchFile( + 'repo.bzl', + [ + 'def _repo_impl(rctx):', + ' deps = rctx.read(rctx.attr.deps_file).splitlines()', + ' output = ""', + ' for dep in deps:', + ' if dep:', + ' output += "{}: {}\\n".format(dep, rctx.read(Label(dep)))', + ' rctx.file("output.txt", output)', + ' rctx.file("BUILD", "exports_files([\'output.txt\'])")', + ' print("JUST FETCHED: %s" % rctx.original_name)', + ' return rctx.repo_metadata(reproducible=True)', + 'repo = repository_rule(', + ' implementation = _repo_impl,', + ' attrs = {', + ' "deps_file": attr.label(), }', + ')', + ], + ) + + self.ScratchFile('foo_deps.txt', ['@bar//:output.txt']) + self.ScratchFile('bar_deps.txt', ['']) + + # First fetch: not cached + _, _, stderr = self.RunBazel(['build', '@foo//:output.txt']) + stderr = '\n'.join(stderr) + self.assertIn('JUST FETCHED: bar', stderr) + self.assertIn('JUST FETCHED: foo', stderr) + + # After expunging and reversing the dependency direction: not cached + self.RunBazel(['clean', '--expunge']) + self.ScratchFile('foo_deps.txt', ['']) + self.ScratchFile('bar_deps.txt', ['@foo//:output.txt']) + _, _, stderr = self.RunBazel(['build', '@foo//:output.txt']) + stderr = '\n'.join(stderr) + self.assertIn('JUST FETCHED: foo', stderr) + self.assertNotIn('JUST FETCHED: bar', stderr) + + # After expunging and reversing the dependency direction: both cached + self.RunBazel(['clean', '--expunge']) + self.ScratchFile('foo_deps.txt', ['@bar//:output.txt']) + self.ScratchFile('bar_deps.txt', ['']) + _, _, stderr = self.RunBazel(['build', '@foo//:output.txt']) + stderr = '\n'.join(stderr) + self.assertNotIn('JUST FETCHED', stderr) + if __name__ == '__main__': absltest.main() diff --git a/src/test/py/bazel/bzlmod/repo_contents_cache_test.py b/src/test/py/bazel/bzlmod/repo_contents_cache_test.py index 961323f952594a..274d3cff6fc575 100644 --- a/src/test/py/bazel/bzlmod/repo_contents_cache_test.py +++ b/src/test/py/bazel/bzlmod/repo_contents_cache_test.py @@ -480,12 +480,7 @@ def testReverseDependencyDirection(self): self.RunBazel(['clean', '--expunge']) self.ScratchFile('foo_deps.txt', ['']) self.ScratchFile('bar_deps.txt', ['@foo//:output.txt']) - exit_code, stdout, stderr = self.RunBazel( - ['build', '@foo//:output.txt'], allow_failure=True - ) - # TODO: b/xxxxxxx - This is NOT the intended behavior. - self.AssertNotExitCode(exit_code, 0, stderr, stdout) - self.assertIn('possible dependency cycle detected', '\n'.join(stderr)) + self.RunBazel(['build', '@foo//:output.txt']) def doTestRepoContentsCacheDeleted(self, check_external_repository_files): repo_contents_cache = self.ScratchDir('repo_contents_cache') @@ -503,77 +498,93 @@ def doTestRepoContentsCacheDeleted(self, check_external_repository_files): 'repo(name = "my_repo")', ], ) - self.ScratchFile('workspace/BUILD.bazel') + self.ScratchFile( + 'workspace/BUILD.bazel', + [ + 'genrule(', + ' name = "gen",', + ' srcs = ["@my_repo//:haha", "in.txt"],', + ' outs = ["out.txt"],', + ' cmd = "cat $(SRCS) > $(OUTS)",', + ')', + ], + ) self.ScratchFile( 'workspace/repo.bzl', [ 'def _repo_impl(rctx):', - ' rctx.file("BUILD", "filegroup(name=\'haha\')")', + ( + ' rctx.file("BUILD", "filegroup(name=\'haha\',' + " srcs=['a.txt'], visibility=['//visibility:public'])\")" + ), + ' rctx.file("a.txt", "hello world")', ' print("JUST FETCHED")', ' return rctx.repo_metadata(reproducible=True)', 'repo = repository_rule(_repo_impl)', ], ) # First fetch: not cached + self.ScratchFile('workspace/in.txt', ['1']) _, _, stderr = self.RunBazel( [ 'build', - '@my_repo//:haha', + '//:gen', ] + extra_args, cwd=workspace, ) self.assertIn('JUST FETCHED', '\n'.join(stderr)) + with open(os.path.join(workspace, 'bazel-bin/out.txt'), 'r') as f: + self.assertEqual(f.read(), 'hello world1\n') # Second fetch: cached + self.ScratchFile('workspace/in.txt', ['2']) _, _, stderr = self.RunBazel( [ 'build', - '@my_repo//:haha', + '//:gen', ] + extra_args, cwd=workspace, ) self.assertNotIn('JUST FETCHED', '\n'.join(stderr)) + with open(os.path.join(workspace, 'bazel-bin/out.txt'), 'r') as f: + self.assertEqual(f.read(), 'hello world2\n') # Delete the entire repo contents cache and fetch again: not cached # Avoid access denied on Windows due to files being read-only by moving to # a different location instead. os.rename(repo_contents_cache, repo_contents_cache + '_deleted') + self.ScratchFile('workspace/in.txt', ['3']) _, _, stderr = self.RunBazel( - [ - 'build', - '@my_repo//:haha', - ] - + extra_args, + ['build', '//:gen'] + extra_args, cwd=workspace, ) stderr = '\n'.join(stderr) self.assertIn('JUST FETCHED', stderr) self.assertNotIn('WARNING', stderr) + with open(os.path.join(workspace, 'bazel-bin/out.txt'), 'r') as f: + self.assertEqual(f.read(), 'hello world3\n') # Second fetch after deletion: cached + self.ScratchFile('workspace/in.txt', ['4']) _, _, stderr = self.RunBazel( - [ - 'build', - '@my_repo//:haha', - ] - + extra_args, + ['build', '//:gen'] + extra_args, cwd=workspace, ) self.assertNotIn('JUST FETCHED', '\n'.join(stderr)) self.assertNotIn('WARNING', '\n'.join(stderr)) + with open(os.path.join(workspace, 'bazel-bin/out.txt'), 'r') as f: + self.assertEqual(f.read(), 'hello world4\n') # Delete the entire repo contents cache and fetch again with a different # path: not cached # Avoid access denied on Windows due to files being read-only by moving to # a different location instead. os.rename(repo_contents_cache, repo_contents_cache + '_deleted_again') + self.ScratchFile('workspace/in.txt', ['5']) _, _, stderr = self.RunBazel( - [ - 'build', - '@my_repo//:haha', - ] + ['build', '//:gen'] + extra_args + [ '--repo_contents_cache=%s' % repo_contents_cache + '2', @@ -583,6 +594,8 @@ def doTestRepoContentsCacheDeleted(self, check_external_repository_files): stderr = '\n'.join(stderr) self.assertIn('JUST FETCHED', stderr) self.assertNotIn('WARNING', stderr) + with open(os.path.join(workspace, 'bazel-bin/out.txt'), 'r') as f: + self.assertEqual(f.read(), 'hello world5\n') def testRepoContentsCacheDeleted_withCheckExternalRepositoryFiles(self): self.doTestRepoContentsCacheDeleted(check_external_repository_files=True) diff --git a/src/test/shell/bazel/starlark_repository_test.sh b/src/test/shell/bazel/starlark_repository_test.sh index 72404b23dba234..bbbbab8631ab14 100755 --- a/src/test/shell/bazel/starlark_repository_test.sh +++ b/src/test/shell/bazel/starlark_repository_test.sh @@ -801,6 +801,73 @@ function test_starlark_repository_environ_invalidation_action_env_batch() { environ_invalidation_action_env_test_template --batch } +# Regression test for the repository environment being a single whole-map Skyframe +# node (introduced together with RepoEnvironmentFunction): changing one environment +# variable invalidated every repository rule and module extension, even ones that +# don't depend on that variable. +# +# In particular, running an interleaved command such as `bazel mod` with an +# environment variable changed that the build doesn't even read (e.g. TERM, set +# differently by an IDE/BSP server than by the user's shell) caused the next build +# to spuriously refetch repositories and discard the analysis cache. +# +# The repo here only depends on FOO, yet an interleaved `mod` invocation with an +# unrelated variable OTHER changed must not cause it to be refetched. +function test_unrelated_env_var_does_not_invalidate_repo() { + local execution_file="${TEST_TMPDIR}/execution" + echo 0 > "${execution_file}" + + cat > $(setup_module_dot_bazel) <<'EOF' +ext = use_extension("//:ext.bzl", "ext") +use_repo(ext, "foo") +EOF + cat > ext.bzl < ${execution_file}" % count]) + repository_ctx.file( + "BUILD", "filegroup(name = 'bar', visibility = ['//visibility:public'])") + repository_ctx.file("REPO.bazel", "") + +# A local repo is refetched whenever its node is recomputed, which makes spurious +# invalidation directly observable through the fetch counter. +repo = repository_rule(implementation = _repo_impl, local = True) + +def _ext_impl(module_ctx): + repo(name = "foo") + +ext = module_extension(implementation = _ext_impl) +EOF + cat > BUILD <<'EOF' +filegroup(name = "t", srcs = ["@foo//:bar"]) +EOF + + # Initial fetch. + FOO=1 OTHER=a bazel build //:t >& $TEST_log || fail "Failed to build" + assert_equals 1 $(cat "${execution_file}") + # Rebuilding with the same environment must not refetch. + FOO=1 OTHER=a bazel build //:t >& $TEST_log || fail "Failed to build" + assert_equals 1 $(cat "${execution_file}") + + # Evaluate the whole module/repo graph with an unrelated variable (OTHER) + # changed, then build again with the original environment. The repo only reads + # FOO, so it must not be refetched. + FOO=1 OTHER=b bazel mod graph >& $TEST_log || fail "Failed to run mod" + FOO=1 OTHER=a bazel build //:t >& $TEST_log || fail "Failed to build" + assert_equals 1 $(cat "${execution_file}") + + # A second interleaving with yet another value must likewise not refetch. + FOO=1 OTHER=c bazel mod graph >& $TEST_log || fail "Failed to run mod" + FOO=1 OTHER=a bazel build //:t >& $TEST_log || fail "Failed to build" + assert_equals 1 $(cat "${execution_file}") + + # Sanity check: changing FOO, which the repo does depend on, must refetch. + FOO=2 OTHER=a bazel build //:t >& $TEST_log || fail "Failed to build" + assert_equals 2 $(cat "${execution_file}") +} + # Test invalidation based on change to the bzl files function bzl_invalidation_test_template() { local startup_flag="${1-}"