From 92e812493be7fa7aa900e5eeda749dacee80af3d Mon Sep 17 00:00:00 2001 From: Fabian Meumertzheim Date: Tue, 23 Jun 2026 23:02:22 +0200 Subject: [PATCH 1/4] Fix cycles when checking the local repo contents cache Fixes #27517 by checking Skyframe deps in batches that stop right before any dep that may cause a cycle if checked while previous deps are out-of-date. This is accompanied by a restructuring of `RepoRecordedInput` that consolidates all Skyframe logic associated with the computation of the corresponding value exclusively within that class. This will also be helpful in adding support for dynamic inputs to the remote repo contents cache in future work. The upstream commit additionally moved the entirety of `RepositoryFetchFunction` to Skyframe workers so that checking the up-to-dateness of local repo contents cache entries isn't quadratic. That worker refactor is omitted in this 8.x backport, which keeps the existing split `RepositoryDelegatorFunction`/`StarlarkRepositoryFunction` architecture and restart-based evaluation; the cycle fix and the `RepoRecordedInput` restructuring are applied on that model. The batched up-to-dateness check is restart-safe (`isAnyValueOutdated` short-circuits while its batch's values are still missing). Closes #28206. Co-authored-by: Xudong Yang PiperOrigin-RevId: 855252657 Change-Id: Ica18760ae79da5155fc0f3d8cd4f24c52a034c86 (cherry picked from commit 72a25a9cfe857fd8b55ed834c316b1805d41078f) --- .../bazel/bzlmod/ModuleExtensionContext.java | 2 +- .../bzlmod/SingleExtensionEvalFunction.java | 16 +- .../cache/LocalRepoContentsCache.java | 6 +- .../starlark/StarlarkBaseExternalContext.java | 59 ++- .../starlark/StarlarkRepositoryContext.java | 11 +- .../rules/repository/RepoRecordedInput.java | 429 ++++++++++++------ .../RepositoryDelegatorFunction.java | 5 +- .../rules/repository/RepositoryFunction.java | 19 +- .../repository/RepoRecordedInputTest.java | 25 +- src/test/py/bazel/bzlmod/bazel_module_test.py | 60 +++ .../bazel/bzlmod/repo_contents_cache_test.py | 7 +- 11 files changed, 432 insertions(+), 207 deletions(-) diff --git a/src/main/java/com/google/devtools/build/lib/bazel/bzlmod/ModuleExtensionContext.java b/src/main/java/com/google/devtools/build/lib/bazel/bzlmod/ModuleExtensionContext.java index 5c7b8de8a1d6c7..812fe8f27b7e08 100644 --- a/src/main/java/com/google/devtools/build/lib/bazel/bzlmod/ModuleExtensionContext.java +++ b/src/main/java/com/google/devtools/build/lib/bazel/bzlmod/ModuleExtensionContext.java @@ -91,7 +91,7 @@ protected ModuleExtensionContext( this.rootModuleHasNonDevDependency = rootModuleHasNonDevDependency; // Record inputs to the extension that are known prior to evaluation. RepoRecordedInput.EnvVar.wrap(staticEnvVars) - .forEach((input, value) -> recordInput(input, value.orElse(null))); + .forEach((input, value) -> recordInputWithValue(input, value.orElse(null))); repoMappingRecorder.record(staticRepoMappingEntries); } diff --git a/src/main/java/com/google/devtools/build/lib/bazel/bzlmod/SingleExtensionEvalFunction.java b/src/main/java/com/google/devtools/build/lib/bazel/bzlmod/SingleExtensionEvalFunction.java index c83afc25132cf3..cd9e163f8d8f9d 100644 --- a/src/main/java/com/google/devtools/build/lib/bazel/bzlmod/SingleExtensionEvalFunction.java +++ b/src/main/java/com/google/devtools/build/lib/bazel/bzlmod/SingleExtensionEvalFunction.java @@ -395,12 +395,18 @@ private static Optional didRecordedInputsChange( BlazeDirectories directories, List recordedInputs) throws InterruptedException, NeedsSkyframeRestartException { - Optional outdated = - RepoRecordedInput.isAnyValueOutdated(env, directories, recordedInputs); - if (env.valuesMissing()) { - throw new NeedsSkyframeRestartException(); + // Check inputs in batches to prevent Skyframe cycles caused by outdated dependencies. + for (ImmutableList batch : + RepoRecordedInput.WithValue.splitIntoBatches(recordedInputs)) { + Optional outdated = RepoRecordedInput.isAnyValueOutdated(env, directories, batch); + if (env.valuesMissing()) { + throw new NeedsSkyframeRestartException(); + } + if (outdated.isPresent()) { + return outdated; + } } - return outdated; + return Optional.empty(); } private SingleExtensionValue createSingleExtensionValue( diff --git a/src/main/java/com/google/devtools/build/lib/bazel/repository/cache/LocalRepoContentsCache.java b/src/main/java/com/google/devtools/build/lib/bazel/repository/cache/LocalRepoContentsCache.java index 89af2cde2a1f14..df98556b784b45 100644 --- a/src/main/java/com/google/devtools/build/lib/bazel/repository/cache/LocalRepoContentsCache.java +++ b/src/main/java/com/google/devtools/build/lib/bazel/repository/cache/LocalRepoContentsCache.java @@ -154,9 +154,9 @@ private Path ensureTrashDir() throws IOException { /** * Moves a freshly fetched repo into the contents cache. * - * @return the repo dir in the contents cache. + * @return the new cache entry */ - public Path moveToCache( + public CandidateRepo moveToCache( Path fetchedRepoDir, Path fetchedRepoMarkerFile, String predeclaredInputHash) throws IOException { Preconditions.checkState(path != null); @@ -183,7 +183,7 @@ public Path moveToCache( // Set up a symlink at the original fetched repo dir path. fetchedRepoDir.deleteTree(); FileSystemUtils.ensureSymbolicLink(fetchedRepoDir, cacheRepoDir); - return cacheRepoDir; + return new CandidateRepo(cacheRecordedInputsFile, cacheRepoDir); } public void acquireSharedLock() throws IOException, InterruptedException { diff --git a/src/main/java/com/google/devtools/build/lib/bazel/repository/starlark/StarlarkBaseExternalContext.java b/src/main/java/com/google/devtools/build/lib/bazel/repository/starlark/StarlarkBaseExternalContext.java index 5cf3e3069fcf0d..cb7893673dd8b1 100644 --- a/src/main/java/com/google/devtools/build/lib/bazel/repository/starlark/StarlarkBaseExternalContext.java +++ b/src/main/java/com/google/devtools/build/lib/bazel/repository/starlark/StarlarkBaseExternalContext.java @@ -23,13 +23,10 @@ import com.google.common.base.Strings; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; -import com.google.common.collect.ImmutableSet; import com.google.common.collect.ImmutableSortedMap; -import com.google.common.collect.Iterables; import com.google.common.collect.Maps; import com.google.common.flogger.GoogleLogger; import com.google.common.util.concurrent.Futures; -import com.google.devtools.build.lib.actions.FileValue; import com.google.devtools.build.lib.analysis.BlazeDirectories; import com.google.devtools.build.lib.bazel.debug.WorkspaceRuleEvent; import com.google.devtools.build.lib.bazel.repository.DecompressorDescriptor; @@ -55,6 +52,7 @@ import com.google.devtools.build.lib.remote.RemoteExternalOverlayFileSystem; import com.google.devtools.build.lib.rules.repository.NeedsSkyframeRestartException; import com.google.devtools.build.lib.rules.repository.RepoRecordedInput; +import com.google.devtools.build.lib.rules.repository.RepoRecordedInput.MaybeValue; import com.google.devtools.build.lib.rules.repository.RepoRecordedInput.RepoCacheFriendlyPath; import com.google.devtools.build.lib.rules.repository.RepositoryFunction; import com.google.devtools.build.lib.rules.repository.RepositoryFunction.RepositoryFunctionException; @@ -71,6 +69,7 @@ import com.google.devtools.build.lib.vfs.Symlinks; import com.google.devtools.build.skyframe.SkyFunction.Environment; import com.google.devtools.build.skyframe.SkyFunctionException.Transience; +import com.google.errorprone.annotations.CanIgnoreReturnValue; import com.google.errorprone.annotations.ForOverride; import java.io.File; import java.io.IOException; @@ -212,7 +211,7 @@ protected StarlarkBaseExternalContext( // apparent repo names to canonical repo names. See #20721 for why this is necessary. this.repoMappingRecorder = (fromRepo, apparentRepoName, canonicalRepoName) -> - recordInput( + recordInputWithValue( new RepoRecordedInput.RecordedRepoMapping(fromRepo, apparentRepoName), canonicalRepoName.isVisible() ? canonicalRepoName.getName() : null); } @@ -251,7 +250,7 @@ public void storeRepoMappingRecorderInThread(StarlarkThread thread) { repoMappingRecorder.storeInThread(thread); } - protected void recordInput(RepoRecordedInput input, @Nullable String value) { + protected void recordInputWithValue(RepoRecordedInput input, @Nullable String value) { if (recordedInputs.containsKey(input) && !Objects.equals(recordedInputs.get(input), value)) { throw new IllegalStateException( "Conflicting values recorded for input %s: '%s' vs. '%s'" @@ -260,6 +259,23 @@ protected void recordInput(RepoRecordedInput input, @Nullable String value) { recordedInputs.put(input, value); } + @CanIgnoreReturnValue + @Nullable + protected String getValueAndRecordInput(RepoRecordedInput input) + throws InterruptedException, NeedsSkyframeRestartException, IOException { + var maybeValue = input.getValue(env, directories); + if (env.valuesMissing()) { + throw new NeedsSkyframeRestartException(); + } + return switch (maybeValue) { + case MaybeValue.Invalid(String reason) -> throw new IOException(reason); + case MaybeValue.Valid(String value) -> { + recordInputWithValue(input, value); + yield value; + } + }; + } + private boolean cancelPendingAsyncTasks() { boolean hadPendingItems = false; for (AsyncTask task : asyncTasks) { @@ -1489,18 +1505,12 @@ private static T nullIfNone(Object object, Class type) { @Nullable public String getEnvironmentValue(String name, Object defaultValue) throws InterruptedException, NeedsSkyframeRestartException { - // Must look up via Skyframe, rather than solely copy from `this.repoEnvVariables`, in order to - // establish a SkyKey dependency relationship. - var nameAndValue = RepositoryFunction.getEnvVarValues(env, ImmutableSet.of(name)); - if (nameAndValue == null) { - return null; + try { + String value = getValueAndRecordInput(new RepoRecordedInput.EnvVar(name)); + return value != null ? value : nullIfNone(defaultValue, String.class); + } catch (IOException e) { + throw new IllegalStateException("getting EnvVar never throws IOException", e); } - // However, to account for --repo_env we take the value from `this.repoEnvVariables`. - // See https://github.com/bazelbuild/bazel/pull/20787#discussion_r1445571248 . - var entry = Iterables.getOnlyElement(RepoRecordedInput.EnvVar.wrap(nameAndValue).entrySet()); - String envVarValue = repoEnvVariables.get(name); - recordInput(entry.getKey(), envVarValue); - return envVarValue != null ? envVarValue : nullIfNone(defaultValue, String.class); } @StarlarkMethod( @@ -1661,17 +1671,8 @@ protected void maybeWatch(StarlarkPath starlarkPath, ShouldWatch shouldWatch) if (repoCacheFriendlyPath == null) { return; } - var recordedInput = new RepoRecordedInput.File(repoCacheFriendlyPath); - var skyKey = recordedInput.getSkyKey(directories); try { - FileValue fileValue = (FileValue) env.getValueOrThrow(skyKey, IOException.class); - if (fileValue == null) { - throw new NeedsSkyframeRestartException(); - } - - recordInput( - recordedInput, - RepoRecordedInput.File.fileValueToMarkerValue((RootedPath) skyKey.argument(), fileValue)); + getValueAndRecordInput(new RepoRecordedInput.File(repoCacheFriendlyPath)); } catch (IOException e) { throw new RepositoryFunctionException(e, Transience.TRANSIENT); } @@ -1683,12 +1684,8 @@ protected void maybeWatchDirents(Path path, ShouldWatch shouldWatch) if (repoCacheFriendlyPath == null) { return; } - var recordedInput = new RepoRecordedInput.Dirents(repoCacheFriendlyPath); - if (env.getValue(recordedInput.getSkyKey(directories)) == null) { - throw new NeedsSkyframeRestartException(); - } try { - recordInput(recordedInput, RepoRecordedInput.Dirents.getDirentsMarkerValue(path)); + getValueAndRecordInput(new RepoRecordedInput.Dirents(repoCacheFriendlyPath)); } catch (IOException e) { throw new RepositoryFunctionException(e, Transience.TRANSIENT); } diff --git a/src/main/java/com/google/devtools/build/lib/bazel/repository/starlark/StarlarkRepositoryContext.java b/src/main/java/com/google/devtools/build/lib/bazel/repository/starlark/StarlarkRepositoryContext.java index 751d8305e94a58..6cb1a331464eaf 100644 --- a/src/main/java/com/google/devtools/build/lib/bazel/repository/starlark/StarlarkRepositoryContext.java +++ b/src/main/java/com/google/devtools/build/lib/bazel/repository/starlark/StarlarkRepositoryContext.java @@ -42,7 +42,6 @@ import com.google.devtools.build.lib.rules.repository.WorkspaceAttributeMapper; import com.google.devtools.build.lib.runtime.ProcessWrapper; import com.google.devtools.build.lib.runtime.RepositoryRemoteExecutor; -import com.google.devtools.build.lib.skyframe.DirectoryTreeDigestValue; import com.google.devtools.build.lib.util.StringUtilities; import com.google.devtools.build.lib.vfs.FileSystemUtils; import com.google.devtools.build.lib.vfs.Path; @@ -587,15 +586,7 @@ public void watchTree(Object path) return; } try { - var recordedInput = new RepoRecordedInput.DirTree(repoCacheFriendlyPath); - DirectoryTreeDigestValue digestValue = - (DirectoryTreeDigestValue) - env.getValueOrThrow(recordedInput.getSkyKey(directories), IOException.class); - if (digestValue == null) { - throw new NeedsSkyframeRestartException(); - } - - recordInput(recordedInput, digestValue.hexDigest()); + getValueAndRecordInput(new RepoRecordedInput.DirTree(repoCacheFriendlyPath)); } catch (IOException e) { throw new RepositoryFunctionException(e, Transience.TRANSIENT); } diff --git a/src/main/java/com/google/devtools/build/lib/rules/repository/RepoRecordedInput.java b/src/main/java/com/google/devtools/build/lib/rules/repository/RepoRecordedInput.java index 39a5b43e444dae..b3532d7ff919b9 100644 --- a/src/main/java/com/google/devtools/build/lib/rules/repository/RepoRecordedInput.java +++ b/src/main/java/com/google/devtools/build/lib/rules/repository/RepoRecordedInput.java @@ -23,6 +23,8 @@ import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Preconditions; import com.google.common.base.Splitter; +import com.google.common.collect.Collections2; +import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; import com.google.common.io.BaseEncoding; import com.google.devtools.build.lib.actions.FileValue; @@ -31,20 +33,22 @@ import com.google.devtools.build.lib.cmdline.LabelSyntaxException; import com.google.devtools.build.lib.cmdline.LabelValidator; import com.google.devtools.build.lib.cmdline.RepositoryName; +import com.google.devtools.build.lib.skyframe.DirectoryListingValue; import com.google.devtools.build.lib.skyframe.EnvironmentVariableValue; import com.google.devtools.build.lib.skyframe.RepoEnvironmentFunction; -import com.google.devtools.build.lib.skyframe.DirectoryListingValue; import com.google.devtools.build.lib.skyframe.DirectoryTreeDigestValue; import com.google.devtools.build.lib.skyframe.PrecomputedValue; import com.google.devtools.build.lib.skyframe.RepositoryMappingValue; import com.google.devtools.build.lib.util.Fingerprint; -import com.google.devtools.build.lib.vfs.Path; +import com.google.devtools.build.lib.vfs.Dirent; import com.google.devtools.build.lib.vfs.PathFragment; import com.google.devtools.build.lib.vfs.Root; import com.google.devtools.build.lib.vfs.RootedPath; import com.google.devtools.build.skyframe.SkyFunction.Environment; import com.google.devtools.build.skyframe.SkyKey; import java.io.IOException; +import java.util.ArrayList; +import java.util.Collection; import java.util.List; import java.util.Map; import java.util.Objects; @@ -121,60 +125,45 @@ public static Optional parse(String s) { return Optional.empty(); } + /** + * Splits the given list of recorded input values into batches such that within each batch, all + * recorded inputs's {@link SkyKey}s can be requested together. + */ + public static ImmutableList> splitIntoBatches( + List recordedInputValues) { + var batches = ImmutableList.>builder(); + var currentBatch = new ArrayList(); + for (var recordedInputValue : recordedInputValues) { + if (!recordedInputValue.input().canBeRequestedUnconditionally() + && !currentBatch.isEmpty()) { + batches.add(ImmutableList.copyOf(currentBatch)); + currentBatch.clear(); + } + currentBatch.add(recordedInputValue); + } + if (!currentBatch.isEmpty()) { + batches.add(ImmutableList.copyOf(currentBatch)); + } + return batches.build(); + } + /** Converts this {@link WithValue} to a string in a format compatible with {@link #parse}. */ @Override public String toString() { - return escape(input.toString()) + " " + escape(value); - } - - @VisibleForTesting - static String escape(String str) { - return str == null - ? "\\0" - : str.replace("\\", "\\\\").replace("\n", "\\n").replace(" ", "\\s"); - } - - @VisibleForTesting - @Nullable - static String unescape(String str) { - if (str.equals("\\0")) { - return null; // \0 == null string - } - StringBuilder result = new StringBuilder(); - boolean escaped = false; - for (int i = 0; i < str.length(); i++) { - char c = str.charAt(i); - if (escaped) { - if (c == 'n') { // n means new line - result.append("\n"); - } else if (c == 's') { // s means space - result.append(" "); - } else { // Any other escaped characters are just un-escaped - result.append(c); - } - escaped = false; - } else if (c == '\\') { - escaped = true; - } else { - result.append(c); - } - } - return result.toString(); + return input + " " + escape(value); } } /** - * Returns whether all values are still up-to-date for each recorded input. If Skyframe values are - * missing, the return value should be ignored; callers are responsible for checking {@code - * env.valuesMissing()} and triggering a Skyframe restart if needed. + * Returns whether all values are still up-to-date for each recorded input or a human-readable + * reason for why that's not the case. If Skyframe values are missing, the return value should be + * ignored; callers are responsible for checking {@code env.valuesMissing()} and triggering a + * Skyframe restart if needed. */ public static Optional isAnyValueOutdated( Environment env, BlazeDirectories directories, List recordedInputValues) throws InterruptedException { - env.getValuesAndExceptions( - recordedInputValues.stream() - .map(riv -> riv.input().getSkyKey(directories)) - .collect(toImmutableSet())); + prefetch(env, directories, Collections2.transform(recordedInputValues, WithValue::input)); if (env.valuesMissing()) { return UNDECIDED; } @@ -188,40 +177,146 @@ public static Optional isAnyValueOutdated( return Optional.empty(); } + /** + * Requests the information from Skyframe that is required by future calls to {@link + * #isAnyValueOutdated} for the given set of inputs. + */ + public static void prefetch( + Environment env, BlazeDirectories directories, Collection recordedInputs) + throws InterruptedException { + var keys = + recordedInputs.stream().map(rri -> rri.getSkyKey(directories)).collect(toImmutableSet()); + if (env.valuesMissing()) { + return; + } + var results = env.getValuesAndExceptions(keys); + // Per the contract of Environment.getValuesAndExceptions, we need to access the results to + // actually find all missing values. + for (SkyKey key : keys) { + var unused = results.get(key); + } + } + + /** + * Returns a human-readable reason for why the given {@code oldValue} is no longer up-to-date for + * this recorded input, or an empty Optional if it is still up-to-date. + * + *

The method can request Skyframe evaluations, and if any values are missing, this method can + * return any value (doesn't matter what, although {@link #UNDECIDED} is recommended for clarity) + * and will be reinvoked after a Skyframe restart. + */ + private Optional isOutdated( + Environment env, BlazeDirectories directories, @Nullable String oldValue) + throws InterruptedException { + MaybeValue wrappedNewValue = getValue(env, directories); + if (env.valuesMissing()) { + return UNDECIDED; + } + return switch (wrappedNewValue) { + case MaybeValue.Invalid(String reason) -> Optional.of(reason); + case MaybeValue.Valid(String newValue) -> + Objects.equals(oldValue, newValue) + ? Optional.empty() + : Optional.of(describeChange(oldValue, newValue)); + }; + } + @Override public abstract boolean equals(Object obj); @Override public abstract int hashCode(); + @VisibleForTesting + static String escape(String str) { + return str == null ? "\\0" : str.replace("\\", "\\\\").replace("\n", "\\n").replace(" ", "\\s"); + } + + @VisibleForTesting + @Nullable + static String unescape(String str) { + if (str.equals("\\0")) { + return null; // \0 == null string + } + StringBuilder result = new StringBuilder(); + boolean escaped = false; + for (int i = 0; i < str.length(); i++) { + char c = str.charAt(i); + if (escaped) { + if (c == 'n') { // n means new line + result.append("\n"); + } else if (c == 's') { // s means space + result.append(" "); + } else { // Any other escaped characters are just un-escaped + result.append(c); + } + escaped = false; + } else if (c == '\\') { + escaped = true; + } else { + result.append(c); + } + } + return result.toString(); + } + + /** + * Returns the string representation of this recorded input, in the format suitable for parsing + * back via {@link #parse}. + * + *

The returned string never contains spaces or newlines; those characters are escaped. + */ @Override public final String toString() { - return getParser().getPrefix() + ":" + toStringInternal(); + return getParser().getPrefix() + ":" + escape(toStringInternal()); } + /** + * Represents the possible values returned by {@link #getValue}: either a valid value (which may + * be null), or an invalid value with a reason (e.g. due to I/O failure). + */ + public sealed interface MaybeValue { + MaybeValue VALUES_MISSING = new MaybeValue.Invalid("values missing"); + + /** Represents a valid value, which may be null. */ + record Valid(@Nullable String value) implements MaybeValue {} + + /** Represents an invalid value with a reason (e.g. due to I/O failure). */ + record Invalid(String reason) implements MaybeValue {} + } + + /** + * Returns the current value of this input, which may be null, wrapped in a {@link + * MaybeValue.Valid}, or a {@link MaybeValue.Invalid} if the value is known to be invalid. + * + *

The method can request Skyframe evaluations, and if any values are missing, this method can + * return any value and will be reinvoked after a Skyframe restart. + */ + public abstract MaybeValue getValue(Environment env, BlazeDirectories directories) + throws InterruptedException; + + /** + * Returns a human-readable description of the change from {@code oldValue} to {@code newValue}. + */ + protected abstract String describeChange(String oldValue, String newValue); + /** * Returns the post-colon substring that identifies the specific input: for example, the {@code * MY_ENV_VAR} part of {@code ENV:MY_ENV_VAR}. */ - public abstract String toStringInternal(); + protected abstract String toStringInternal(); /** Returns the parser object for this type of recorded inputs. */ - public abstract Parser getParser(); + protected abstract Parser getParser(); /** Returns the {@link SkyKey} that is necessary to determine {@link #isOutdated}. */ - public abstract SkyKey getSkyKey(BlazeDirectories directories); + protected abstract SkyKey getSkyKey(BlazeDirectories directories); /** - * Returns a human-readable reason for why the given {@code oldValue} is no longer up-to-date for - * this recorded input, or an empty Optional if it is still up-to-date. This method can assume - * that {@link #getSkyKey(BlazeDirectories)} is already evaluated; it can request further Skyframe - * evaluations, and if any values are missing, this method can return any value (doesn't matter - * what, although {@link #UNDECIDED} is recommended for clarity) and will be reinvoked after a - * Skyframe restart. + * Returns true if the {@link #getValue} can be requested even if previous recorded inputs have + * not been verified to be up to date. */ - public abstract Optional isOutdated( - Environment env, BlazeDirectories directories, @Nullable String oldValue) - throws InterruptedException; + protected abstract boolean canBeRequestedUnconditionally(); private static final Optional UNDECIDED = Optional.of("values missing"); @@ -299,6 +394,11 @@ public final RootedPath getRootedPath(BlazeDirectories directories) { } return RootedPath.toRootedPath(root, path()); } + + /** Returns true if the path points into an external repository. */ + public boolean inExternalRepo() { + return repoName().isPresent() && !repoName().get().isMain(); + } } /** @@ -363,7 +463,8 @@ public String toStringInternal() { * for placing in a repository marker file. The file need not exist, and can be a file or a * directory. */ - public static String fileValueToMarkerValue(RootedPath rootedPath, FileValue fileValue) + @VisibleForTesting + static String fileValueToMarkerValue(RootedPath rootedPath, FileValue fileValue) throws IOException { if (fileValue.isDirectory()) { return "DIR"; @@ -386,23 +487,32 @@ public SkyKey getSkyKey(BlazeDirectories directories) { } @Override - public Optional isOutdated( - Environment env, BlazeDirectories directories, @Nullable String oldValue) + protected boolean canBeRequestedUnconditionally() { + // Requesting files in external repositories can result in cycles if the external repo now + // transitively depends on the requesting repo. + return !path.inExternalRepo(); + } + + @Override + public MaybeValue getValue(Environment env, BlazeDirectories directories) throws InterruptedException { var skyKey = getSkyKey(directories); try { - FileValue fileValue = (FileValue) env.getValueOrThrow(skyKey, IOException.class); + var fileValue = (FileValue) env.getValueOrThrow(skyKey, IOException.class); if (fileValue == null) { - return UNDECIDED; - } - if (!oldValue.equals(fileValueToMarkerValue((RootedPath) skyKey.argument(), fileValue))) { - return Optional.of("file info or contents of %s changed".formatted(path)); + return MaybeValue.VALUES_MISSING; } - return Optional.empty(); + return new MaybeValue.Valid( + fileValueToMarkerValue((RootedPath) skyKey.argument(), fileValue)); } catch (IOException e) { - return Optional.of("failed to stat %s: %s".formatted(path, e.getMessage())); + return new MaybeValue.Invalid("failed to stat %s: %s".formatted(path, e.getMessage())); } } + + @Override + public String describeChange(String oldValue, String newValue) { + return "file info or contents of %s changed".formatted(path); + } } /** Represents the list of entries under a directory accessed during the fetch. */ @@ -457,38 +567,42 @@ public Parser getParser() { return PARSER; } + private String directoryListingValueToMarkerValue(DirectoryListingValue directoryListingValue) { + var fp = new Fingerprint(); + fp.addStrings( + com.google.common.collect.Streams.stream(directoryListingValue.getDirents()) + .map(Dirent::getName) + .sorted() + .collect(toImmutableList())); + return fp.hexDigestAndReset(); + } + @Override public SkyKey getSkyKey(BlazeDirectories directories) { return DirectoryListingValue.key(path.getRootedPath(directories)); } @Override - public Optional isOutdated( - Environment env, BlazeDirectories directories, @Nullable String oldValue) + protected boolean canBeRequestedUnconditionally() { + // Requesting directories in external repositories can result in cycles if the external repo + // transitively depends on the requesting repo. + return !path.inExternalRepo(); + } + + @Override + public MaybeValue getValue(Environment env, BlazeDirectories directories) throws InterruptedException { - SkyKey skyKey = getSkyKey(directories); - if (env.getValue(skyKey) == null) { - return UNDECIDED; - } - try { - if (!oldValue.equals( - getDirentsMarkerValue(((DirectoryListingValue.Key) skyKey).argument().asPath()))) { - return Optional.of("directory entries of %s changed".formatted(path)); - } - return Optional.empty(); - } catch (IOException e) { - return Optional.of("failed to readdir %s: %s".formatted(path, e.getMessage())); + var skyKey = getSkyKey(directories); + var directoryListingValue = (DirectoryListingValue) env.getValue(skyKey); + if (directoryListingValue == null) { + return MaybeValue.VALUES_MISSING; } + return new MaybeValue.Valid(directoryListingValueToMarkerValue(directoryListingValue)); } - public static String getDirentsMarkerValue(Path path) throws IOException { - Fingerprint fp = new Fingerprint(); - fp.addStrings( - path.getDirectoryEntries().stream() - .map(Path::getBaseName) - .sorted() - .collect(toImmutableList())); - return fp.hexDigestAndReset(); + @Override + public String describeChange(String oldValue, String newValue) { + return "directory entries of %s changed".formatted(path); } } @@ -554,18 +668,32 @@ public SkyKey getSkyKey(BlazeDirectories directories) { } @Override - public Optional isOutdated( - Environment env, BlazeDirectories directories, @Nullable String oldValue) + protected boolean canBeRequestedUnconditionally() { + // Requesting directory trees in external repositories can result in cycles if the external + // repo now transitively depends on the requesting repo. + return !path.inExternalRepo(); + } + + @Override + public MaybeValue getValue(Environment env, BlazeDirectories directories) throws InterruptedException { - DirectoryTreeDigestValue value = - (DirectoryTreeDigestValue) env.getValue(getSkyKey(directories)); - if (value == null) { - return UNDECIDED; - } - if (!oldValue.equals(value.hexDigest())) { - return Optional.of("directory tree at %s changed".formatted(path)); + var skyKey = getSkyKey(directories); + try { + var directoryTreeDigestValue = + (DirectoryTreeDigestValue) env.getValueOrThrow(skyKey, IOException.class); + if (directoryTreeDigestValue == null) { + return MaybeValue.VALUES_MISSING; + } + return new MaybeValue.Valid(directoryTreeDigestValue.hexDigest()); + } catch (IOException e) { + return new MaybeValue.Invalid( + "failed to digest directory tree at %s: %s".formatted(path, e.getMessage())); } - return Optional.empty(); + } + + @Override + public String describeChange(String oldValue, String newValue) { + return "directory tree at %s changed".formatted(path); } } @@ -593,7 +721,7 @@ public static ImmutableMap> wrap( .collect(toImmutableMap(e -> new EnvVar(e.getKey()), Map.Entry::getValue)); } - private EnvVar(String name) { + public EnvVar(String name) { this.name = name; } @@ -629,24 +757,29 @@ public SkyKey getSkyKey(BlazeDirectories directories) { } @Override - public Optional isOutdated( - Environment env, BlazeDirectories directories, @Nullable String oldValue) + protected boolean canBeRequestedUnconditionally() { + // Environment variables are static data injected into Skyframe, so there is no risk of + // cycles. + return true; + } + + @Override + public MaybeValue getValue(Environment env, BlazeDirectories directories) throws InterruptedException { - var envValue = (EnvironmentVariableValue) env.getValue(getSkyKey(directories)); - if (envValue == null) { - return Optional.empty(); - } - String v = envValue.value(); - // Note that `oldValue` can be null if the env var was not set. - if (!Objects.equals(oldValue, v)) { - return Optional.of( - "environment variable %s changed: %s -> %s" - .formatted( - name, - oldValue == null ? "" : "'%s'".formatted(oldValue), - v == null ? "" : "'%s'".formatted(v))); + var value = (EnvironmentVariableValue) env.getValue(getSkyKey(directories)); + if (value == null) { + return MaybeValue.VALUES_MISSING; } - return Optional.empty(); + return new MaybeValue.Valid(value.value()); + } + + @Override + public String describeChange(String oldValue, String newValue) { + return "environment variable %s changed: %s -> %s" + .formatted( + name, + oldValue == null ? "" : "'%s'".formatted(oldValue), + newValue == null ? "" : "'%s'".formatted(newValue)); } } @@ -721,32 +854,32 @@ public SkyKey getSkyKey(BlazeDirectories directories) { } @Override - public Optional isOutdated( - Environment env, BlazeDirectories directories, @Nullable String oldValue) + protected boolean canBeRequestedUnconditionally() { + // Starlark can only request the mapping of the repo it is currently executing from, which + // means that the repo has already been fetched (either to execute the code or to verify the + // transitive .bzl hash). Further cycles aren't possible. + return true; + } + + @Override + public MaybeValue getValue(Environment env, BlazeDirectories directories) throws InterruptedException { - RepositoryMappingValue repoMappingValue = - (RepositoryMappingValue) env.getValue(getSkyKey(directories)); + var repoMappingValue = (RepositoryMappingValue) env.getValue(getSkyKey(directories)); if (Objects.equals(repoMappingValue, RepositoryMappingValue.NOT_FOUND_VALUE)) { - return Optional.of("source repo %s doesn't exist anymore".formatted(sourceRepo)); - } - RepositoryName oldCanonicalName; - try { - oldCanonicalName = RepositoryName.create(oldValue); - } catch (LabelSyntaxException e) { - // malformed old value causes refetch - return Optional.of("invalid recorded repo name: %s".formatted(e.getMessage())); - } - RepositoryName newCanonicalName = repoMappingValue.repositoryMapping().get(apparentName); - if (!oldCanonicalName.equals(newCanonicalName)) { - return Optional.of( - "canonical name for @%s in %s changed: %s -> %s" - .formatted( - apparentName, - sourceRepo, - oldCanonicalName, - newCanonicalName == null ? "" : newCanonicalName)); + return new MaybeValue.Invalid("source repo %s doesn't exist anymore".formatted(sourceRepo)); } - return Optional.empty(); + RepositoryName canonicalName = repoMappingValue.repositoryMapping().get(apparentName); + return new MaybeValue.Valid(canonicalName != null ? canonicalName.getName() : null); + } + + @Override + public String describeChange(String oldValue, String newValue) { + return "canonical name for @%s in %s changed: %s -> %s" + .formatted( + apparentName, + sourceRepo, + oldValue == null ? "" : oldValue, + newValue == null ? "" : newValue); } } @@ -789,9 +922,19 @@ public SkyKey getSkyKey(BlazeDirectories directories) { } @Override - public Optional isOutdated( - Environment env, BlazeDirectories directories, @Nullable String oldValue) { - return Optional.of(reason); + protected boolean canBeRequestedUnconditionally() { + return true; + } + + @Override + public MaybeValue getValue(Environment env, BlazeDirectories directories) { + return new MaybeValue.Invalid(reason); + } + + @Override + public String describeChange(String oldValue, String newValue) { + throw new UnsupportedOperationException( + "the value for this sentinel input is always invalid"); } } } diff --git a/src/main/java/com/google/devtools/build/lib/rules/repository/RepositoryDelegatorFunction.java b/src/main/java/com/google/devtools/build/lib/rules/repository/RepositoryDelegatorFunction.java index 1a2a27ef7e3a5f..878429eb7252d4 100644 --- a/src/main/java/com/google/devtools/build/lib/rules/repository/RepositoryDelegatorFunction.java +++ b/src/main/java/com/google/devtools/build/lib/rules/repository/RepositoryDelegatorFunction.java @@ -308,9 +308,9 @@ public SkyValue compute(SkyKey skyKey, Environment env) } if (repoContentsCache.isEnabled()) { // This repo is eligible for the repo contents cache. - Path cachedRepoDir; + CandidateRepo newCacheEntry; try { - cachedRepoDir = + newCacheEntry = repoContentsCache.moveToCache( repoRoot, digestWriter.markerPath, digestWriter.predeclaredInputHash); } catch (IOException e) { @@ -321,6 +321,7 @@ public SkyValue compute(SkyKey skyKey, Environment env) e), Transience.TRANSIENT); } + Path cachedRepoDir = newCacheEntry.contentsDir(); // Don't forget to register a FileStateValue on the cache repo dir, so that we know to // refetch if the cache entry gets GC'd from under us or the entire cache is deleted. // diff --git a/src/main/java/com/google/devtools/build/lib/rules/repository/RepositoryFunction.java b/src/main/java/com/google/devtools/build/lib/rules/repository/RepositoryFunction.java index 4471325f4649ac..4d6657c9178a43 100644 --- a/src/main/java/com/google/devtools/build/lib/rules/repository/RepositoryFunction.java +++ b/src/main/java/com/google/devtools/build/lib/rules/repository/RepositoryFunction.java @@ -239,12 +239,23 @@ public Optional isAnyRecordedInputOutdated( Map recordedInputValues, Environment env) throws InterruptedException { - return RepoRecordedInput.isAnyValueOutdated( - env, - directories, + var withValues = recordedInputValues.entrySet().stream() .map(e -> new RepoRecordedInput.WithValue(e.getKey(), e.getValue())) - .collect(com.google.common.collect.ImmutableList.toImmutableList())); + .collect(com.google.common.collect.ImmutableList.toImmutableList()); + // Check inputs in batches to prevent Skyframe cycles caused by outdated dependencies: a batch + // ends right before any input that may cause a cycle if requested while earlier inputs are out + // of date. A later batch's deps are only requested after the earlier batches have been + // confirmed up to date across Skyframe restarts (isAnyValueOutdated returns a non-empty reason + // when its batch's values are still missing, short-circuiting this loop). + for (var batch : RepoRecordedInput.WithValue.splitIntoBatches(withValues)) { + Optional outdatedReason = + RepoRecordedInput.isAnyValueOutdated(env, directories, batch); + if (outdatedReason.isPresent()) { + return outdatedReason; + } + } + return Optional.empty(); } public static RootedPath getRootedPathFromLabel(Label label, Environment env) diff --git a/src/test/java/com/google/devtools/build/lib/rules/repository/RepoRecordedInputTest.java b/src/test/java/com/google/devtools/build/lib/rules/repository/RepoRecordedInputTest.java index dae6be132a65a1..ea928c53d6115f 100644 --- a/src/test/java/com/google/devtools/build/lib/rules/repository/RepoRecordedInputTest.java +++ b/src/test/java/com/google/devtools/build/lib/rules/repository/RepoRecordedInputTest.java @@ -15,8 +15,11 @@ package com.google.devtools.build.lib.rules.repository; import static com.google.common.truth.Truth.assertThat; +import static com.google.devtools.build.lib.rules.repository.RepoRecordedInput.WithValue.parse; +import static com.google.devtools.build.lib.rules.repository.RepoRecordedInput.WithValue.splitIntoBatches; import static org.mockito.Mockito.when; +import com.google.common.collect.ImmutableList; import com.google.common.io.BaseEncoding; import com.google.devtools.build.lib.actions.FileContentsProxy; import com.google.devtools.build.lib.actions.FileStateValue.RegularFileStateValueWithContentsProxy; @@ -35,8 +38,8 @@ @RunWith(JUnit4.class) public class RepoRecordedInputTest extends BuildViewTestCase { private static void assertMarkerFileEscaping(String testCase) { - String escaped = RepoRecordedInput.WithValue.escape(testCase); - assertThat(RepoRecordedInput.WithValue.unescape(escaped)).isEqualTo(testCase); + String escaped = RepoRecordedInput.escape(testCase); + assertThat(RepoRecordedInput.unescape(escaped)).isEqualTo(testCase); } @Test @@ -70,4 +73,22 @@ public void testFileValueToMarkerValue() throws Exception { String expectedDigest = BaseEncoding.base16().lowerCase().encode(path.asPath().getDigest()); assertThat(RepoRecordedInput.File.fileValueToMarkerValue(path, fv)).isEqualTo(expectedDigest); } + + @Test + public void testSplitIntoBatches() { + assertThat(splitIntoBatches(ImmutableList.of())).isEmpty(); + assertThat( + splitIntoBatches( + ImmutableList.of( + parse("FILE:@@//foo:bar abc").orElseThrow(), + parse("FILE:@@//:baz cba").orElseThrow(), + parse("FILE:@@foo//:baz bac").orElseThrow(), + parse("ENV:KEY value").orElseThrow()))) + .containsExactly( + ImmutableList.of( + parse("FILE:@@//foo:bar abc").orElseThrow(), + parse("FILE:@@//:baz cba").orElseThrow()), + ImmutableList.of( + parse("FILE:@@foo//:baz bac").orElseThrow(), parse("ENV:KEY value").orElseThrow())); + } } diff --git a/src/test/py/bazel/bzlmod/bazel_module_test.py b/src/test/py/bazel/bzlmod/bazel_module_test.py index 5d1e11037cc48d..9aef1ec859e7c6 100644 --- a/src/test/py/bazel/bzlmod/bazel_module_test.py +++ b/src/test/py/bazel/bzlmod/bazel_module_test.py @@ -1570,6 +1570,66 @@ def testInvalidRepoRuleReferencedByTargetDoesNotCrash(self): self.assertNotIn('FATAL', stderr) self.assertIn("compilation of module 'repo.bzl' failed", stderr) + def testReverseDependencyDirection(self): + # Set up two module extensions that retain their predeclared input hashes + # across two builds but still reverse their dependency direction. Depending + # on how repo cache candidates are checked, this could lead to a Skyframe + # cycle. + self.ScratchFile( + 'MODULE.bazel', + [ + 'foo_ext = use_extension("//:repo.bzl", "foo_ext")', + 'use_repo(foo_ext, "foo")', + 'bar_ext = use_extension("//:repo.bzl", "bar_ext")', + 'use_repo(bar_ext, "bar")', + ], + ) + self.ScratchFile('BUILD.bazel') + self.ScratchFile( + 'repo.bzl', + [ + 'def _repo_impl(rctx):', + ' rctx.file("output.txt", rctx.attr.content)', + ' rctx.file("BUILD", "exports_files([\'output.txt\'])")', + ' print("JUST FETCHED: %s" % rctx.original_name)', + ' return rctx.repo_metadata(reproducible=True)', + 'repo = repository_rule(', + ' implementation = _repo_impl,', + ' attrs = {"content": attr.string()},', + ')', + 'def _foo_ext_impl(mctx):', + ' deps = mctx.read(Label("@//:foo_deps.txt")).splitlines()', + ' content = "foo"', + ' for dep in deps:', + ' if dep:', + ' content += " + " + mctx.read(Label(dep))', + ' repo(name = "foo", content = content)', + 'foo_ext = module_extension(implementation = _foo_ext_impl)', + 'def _bar_ext_impl(mctx):', + ' deps = mctx.read(Label("@//:bar_deps.txt")).splitlines()', + ' content = "bar"', + ' for dep in deps:', + ' if dep:', + ' content += " + " + mctx.read(Label(dep))', + ' repo(name = "bar", content = content)', + 'bar_ext = module_extension(implementation = _bar_ext_impl)', + ], + ) + + self.ScratchFile('foo_deps.txt', ['@bar//:output.txt']) + self.ScratchFile('bar_deps.txt') + + # First fetch: not cached + _, _, stderr = self.RunBazel(['build', '@foo//:output.txt']) + self.assertIn('JUST FETCHED: bar', '\n'.join(stderr)) + self.assertIn('JUST FETCHED: foo', '\n'.join(stderr)) + + # After expunging and reversing: cached + self.RunBazel(['clean', '--expunge']) + self.ScratchFile('foo_deps.txt') + self.ScratchFile('bar_deps.txt', ['@foo//:output.txt']) + self.RunBazel(['build', '@foo//:output.txt']) + if __name__ == '__main__': absltest.main() diff --git a/src/test/py/bazel/bzlmod/repo_contents_cache_test.py b/src/test/py/bazel/bzlmod/repo_contents_cache_test.py index 60ea22b616d088..274d3cff6fc575 100644 --- a/src/test/py/bazel/bzlmod/repo_contents_cache_test.py +++ b/src/test/py/bazel/bzlmod/repo_contents_cache_test.py @@ -480,12 +480,7 @@ def testReverseDependencyDirection(self): self.RunBazel(['clean', '--expunge']) self.ScratchFile('foo_deps.txt', ['']) self.ScratchFile('bar_deps.txt', ['@foo//:output.txt']) - exit_code, stdout, stderr = self.RunBazel( - ['build', '@foo//:output.txt'], allow_failure=True - ) - # TODO: b/xxxxxxx - This is NOT the intended behavior. - self.AssertNotExitCode(exit_code, 0, stderr, stdout) - self.assertIn('possible dependency cycle detected', '\n'.join(stderr)) + self.RunBazel(['build', '@foo//:output.txt']) def doTestRepoContentsCacheDeleted(self, check_external_repository_files): repo_contents_cache = self.ScratchDir('repo_contents_cache') From 4c7aae0235cf49a4922cc1fd1b0a369111801518 Mon Sep 17 00:00:00 2001 From: Fabian Meumertzheim Date: Wed, 24 Jun 2026 00:08:20 +0200 Subject: [PATCH 2/4] Fix remote repo contents cache issues * The cache was always written to, even if not enabled. (On 8.x this guard was already folded into the remote repo contents cache port, #29075, so only the second fix below is applied here.) * Google RBE doesn't accept `Command`s without the (deprecated) `Platform` field set. We set it both on `Command` and `Action`, just to be safe. Fixes https://github.com/bazelbuild/bazel/pull/28294#issuecomment-3748866589 Closes #28295. PiperOrigin-RevId: 856169835 Change-Id: I2479119a173e325a7d39643a36536569f5f831fc (cherry picked from commit a9946096847e22de98e0e11b1f5dfbb6ec6ecdbb) --- .../devtools/build/lib/remote/RemoteRepoContentsCacheImpl.java | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/main/java/com/google/devtools/build/lib/remote/RemoteRepoContentsCacheImpl.java b/src/main/java/com/google/devtools/build/lib/remote/RemoteRepoContentsCacheImpl.java index c8a2c20a5f9fc5..1391d1f2ca04f2 100644 --- a/src/main/java/com/google/devtools/build/lib/remote/RemoteRepoContentsCacheImpl.java +++ b/src/main/java/com/google/devtools/build/lib/remote/RemoteRepoContentsCacheImpl.java @@ -22,6 +22,7 @@ import build.bazel.remote.execution.v2.Action; import build.bazel.remote.execution.v2.Command; import build.bazel.remote.execution.v2.Directory; +import build.bazel.remote.execution.v2.Platform; import build.bazel.remote.execution.v2.Tree; import com.google.common.base.Splitter; import com.google.common.base.Throwables; @@ -88,6 +89,7 @@ public final class RemoteRepoContentsCacheImpl implements RemoteRepoContentsCach .addOutputPaths(REPO_DIRECTORY_PATH) .addOutputFiles(MARKER_FILE_PATH) .addOutputDirectories(REPO_DIRECTORY_PATH) + .setPlatform(Platform.getDefaultInstance()) .build(); private static final Directory INPUT_ROOT = Directory.getDefaultInstance(); @@ -118,6 +120,7 @@ public RemoteRepoContentsCacheImpl( Action.newBuilder() .setCommandDigest(digestUtil.compute(COMMAND)) .setInputRootDigest(digestUtil.compute(INPUT_ROOT)) + .setPlatform(Platform.getDefaultInstance()) .build(); } From c6125b505167a6d54e54cd4fe9ff8f9a0b7e3c43 Mon Sep 17 00:00:00 2001 From: Fabian Meumertzheim Date: Wed, 24 Jun 2026 10:18:06 +0200 Subject: [PATCH 3/4] Materialize important outputs from remote external repos Important outputs and runfiles from external repos that are remote repo contents cache hits got stuck at various levels of the materialization pipeline for being source artifacts. This is fixed by consolidating the skip logic in a `RemoteOutputChecker.mayBeRemote` static helper and letting external-repo source artifacts flow through the toplevel-output download path. 8.x adaptation: v9 routes toplevel outputs through `RemoteImportantOutputHandler`, which does not exist on 8.x (`ImportantOutputHandler` has no production implementor). The equivalent toplevel-output materialization on 8.x lives in `CompletionFunction.ensureToplevelArtifacts`/`downloadArtifact`, which previously bailed out on non-`DerivedArtifact`s and only ran under skymeld. This change lets external-repo source artifacts (which have no generating action and are thus never downloaded by `finalizeAction`) flow through that path in both skymeld and non-skymeld builds, so they honor `--remote_download_outputs` just like build outputs do. `AbstractActionInputPrefetcher.prefetchFiles` already only skips main-repo source artifacts, so it is unchanged beyond annotating the `action` parameter `@Nullable` (source artifacts have no generating action). Closes #28308. PiperOrigin-RevId: 881618604 Change-Id: Ifaae8e39b0bcab3803653ca82bcf00d26c487316 (cherry picked from commit 16613f14abf10f7d7747b0cda590cef08ed06ed9) --- .../lib/actions/ActionInputPrefetcher.java | 5 ++- .../remote/AbstractActionInputPrefetcher.java | 2 +- .../build/lib/remote/RemoteOutputChecker.java | 28 +++++++++---- .../lib/skyframe/CompletionFunction.java | 34 +++++++++++---- .../bzlmod/remote_repo_contents_cache_test.py | 41 ++++++++++++++++--- 5 files changed, 85 insertions(+), 25 deletions(-) diff --git a/src/main/java/com/google/devtools/build/lib/actions/ActionInputPrefetcher.java b/src/main/java/com/google/devtools/build/lib/actions/ActionInputPrefetcher.java index f1ed84d33fe159..a0152db7ef766e 100644 --- a/src/main/java/com/google/devtools/build/lib/actions/ActionInputPrefetcher.java +++ b/src/main/java/com/google/devtools/build/lib/actions/ActionInputPrefetcher.java @@ -17,6 +17,7 @@ import com.google.common.util.concurrent.ListenableFuture; import java.io.IOException; +import javax.annotation.Nullable; /** Prefetches files to local disk. */ public interface ActionInputPrefetcher { @@ -34,7 +35,7 @@ public interface MetadataSupplier { new ActionInputPrefetcher() { @Override public ListenableFuture prefetchFiles( - ActionExecutionMetadata action, + @Nullable ActionExecutionMetadata action, Iterable inputs, MetadataSupplier metadataSupplier, Priority priority, @@ -90,7 +91,7 @@ enum Reason { * @return future success if prefetch is finished or {@link IOException}. */ ListenableFuture prefetchFiles( - ActionExecutionMetadata action, + @Nullable ActionExecutionMetadata action, Iterable inputs, MetadataSupplier metadataSupplier, Priority priority, diff --git a/src/main/java/com/google/devtools/build/lib/remote/AbstractActionInputPrefetcher.java b/src/main/java/com/google/devtools/build/lib/remote/AbstractActionInputPrefetcher.java index 45c114ea2cdadc..638f9d7a561efc 100644 --- a/src/main/java/com/google/devtools/build/lib/remote/AbstractActionInputPrefetcher.java +++ b/src/main/java/com/google/devtools/build/lib/remote/AbstractActionInputPrefetcher.java @@ -329,7 +329,7 @@ protected void prefetchVirtualActionInput(VirtualActionInput input) throws IOExc */ @Override public ListenableFuture prefetchFiles( - ActionExecutionMetadata action, + @Nullable ActionExecutionMetadata action, Iterable inputs, MetadataSupplier metadataSupplier, Priority priority, diff --git a/src/main/java/com/google/devtools/build/lib/remote/RemoteOutputChecker.java b/src/main/java/com/google/devtools/build/lib/remote/RemoteOutputChecker.java index b295e050a86253..eebf092af061c9 100644 --- a/src/main/java/com/google/devtools/build/lib/remote/RemoteOutputChecker.java +++ b/src/main/java/com/google/devtools/build/lib/remote/RemoteOutputChecker.java @@ -190,24 +190,21 @@ private void addRunfiles(ProviderCollection buildTarget) { } var runfiles = runfilesSupport.getRunfiles(); for (Artifact runfile : runfiles.getArtifacts().toList()) { - if (runfile.isSourceArtifact()) { - continue; + if (mayBeRemote(runfile)) { + addOutputToDownload(runfile); } - addOutputToDownload(runfile); } for (var symlink : runfiles.getSymlinks().toList()) { var artifact = symlink.getArtifact(); - if (artifact.isSourceArtifact()) { - continue; + if (mayBeRemote(artifact)) { + addOutputToDownload(artifact); } - addOutputToDownload(artifact); } for (var symlink : runfiles.getRootSymlinks().toList()) { var artifact = symlink.getArtifact(); - if (artifact.isSourceArtifact()) { - continue; + if (mayBeRemote(artifact)) { + addOutputToDownload(artifact); } - addOutputToDownload(artifact); } } @@ -292,6 +289,19 @@ private boolean matchesPattern(PathFragment execPath) { return false; } + /** + * Returns whether this {@link ActionInput} could conceivably be only available remotely. + * + *

Use this as a quick check to avoid unnecessary extra work for artifacts that are definitely + * local. + */ + public static boolean mayBeRemote(ActionInput actionInput) { + return !(actionInput instanceof Artifact artifact + && artifact.isSourceArtifact() + // Source artifacts in the main repo don't need to be fetched. + && (artifact.getOwner() == null || artifact.getOwner().getRepository().isMain())); + } + /** Returns whether this {@link ActionInput} should be downloaded. */ @Override public boolean shouldDownloadOutput(ActionInput output, RemoteFileArtifactValue metadata) { diff --git a/src/main/java/com/google/devtools/build/lib/skyframe/CompletionFunction.java b/src/main/java/com/google/devtools/build/lib/skyframe/CompletionFunction.java index 00befa94bf0a73..467b915a2e592c 100644 --- a/src/main/java/com/google/devtools/build/lib/skyframe/CompletionFunction.java +++ b/src/main/java/com/google/devtools/build/lib/skyframe/CompletionFunction.java @@ -386,13 +386,6 @@ public SkyValue compute(SkyKey skyKey, Environment env) private void ensureToplevelArtifacts( Environment env, ImmutableCollection importantArtifacts, ActionInputMap inputMap) throws CompletionFunctionException, InterruptedException { - // For skymeld, a non-toplevel target might become a toplevel after it has been executed. This - // is the last chance to download the missing toplevel outputs in this case before sending out - // TargetCompleteEvent. See https://github.com/bazelbuild/bazel/issues/20737. - if (!isSkymeld.get()) { - return; - } - var actionInputPrefetcher = skyframeActionExecutor.getActionInputPrefetcher(); if (actionInputPrefetcher == null || actionInputPrefetcher == ActionInputPrefetcher.NONE) { return; @@ -444,6 +437,33 @@ private void downloadArtifact( List> futures) throws InterruptedException { if (!(artifact instanceof DerivedArtifact derivedArtifact)) { + // Source artifacts from external repos (e.g. remote repo contents cache hits) have no + // generating action and are thus never downloaded by finalizeAction, so materialize the + // toplevel ones here so they honor --remote_download_outputs like build outputs do. Source + // artifacts in the main repo are always local. + if (artifact.getOwner() != null && !artifact.getOwner().getRepository().isMain()) { + var metadata = inputMap.getInputMetadata(artifact); + if (metadata != null + && metadata.isRemote() + && remoteArtifactChecker.shouldDownloadOutput( + artifact, (RemoteFileArtifactValue) metadata)) { + futures.add( + actionInputPrefetcher.prefetchFiles( + /* action= */ null, + ImmutableList.of(artifact), + inputMap::getInputMetadata, + Priority.LOW, + Reason.OUTPUTS)); + } + } + return; + } + + // Derived toplevel outputs are downloaded eagerly by finalizeAction during execution. For + // skymeld, a non-toplevel target might only become toplevel after it has been executed, so this + // is the last chance to download the missing toplevel outputs in that case before sending out + // TargetCompleteEvent. See https://github.com/bazelbuild/bazel/issues/20737. + if (!isSkymeld.get()) { return; } diff --git a/src/test/py/bazel/bzlmod/remote_repo_contents_cache_test.py b/src/test/py/bazel/bzlmod/remote_repo_contents_cache_test.py index 79727294164277..0e08858ec1d0b8 100644 --- a/src/test/py/bazel/bzlmod/remote_repo_contents_cache_test.py +++ b/src/test/py/bazel/bzlmod/remote_repo_contents_cache_test.py @@ -617,6 +617,16 @@ def testUseRepoFileInBuildRule_actionDoesNotUseCache_withExplicitSandboxBase( ) def testLostRemoteFile_build(self): + self._runLostRemoteFileBuildTest( + '--experimental_merged_skyframe_analysis_execution' + ) + + def testLostRemoteFile_build_noSkymeld(self): + self._runLostRemoteFileBuildTest( + '--noexperimental_merged_skyframe_analysis_execution' + ) + + def _runLostRemoteFileBuildTest(self, skymeld_flag): # Create a repo with two BUILD files (one in a subpackage), build a target # from one to cause it to be cached, then build that target again after # expunging to verify it is cached. @@ -653,7 +663,7 @@ def testLostRemoteFile_build(self): repo_dir = self.RepoDir('my_repo') # First fetch: not cached - _, _, stderr = self.RunBazel(['build', '@my_repo//:root']) + _, _, stderr = self.RunBazel(['build', skymeld_flag, '@my_repo//:root']) self.assertIn('JUST FETCHED', '\n'.join(stderr)) self.assertTrue(os.path.exists(os.path.join(repo_dir, 'BUILD'))) self.assertTrue(os.path.exists(os.path.join(repo_dir, 'root.txt'))) @@ -662,10 +672,10 @@ def testLostRemoteFile_build(self): # After expunging: cached self.RunBazel(['clean', '--expunge']) - _, _, stderr = self.RunBazel(['build', '@my_repo//:root']) + _, _, stderr = self.RunBazel(['build', skymeld_flag, '@my_repo//:root']) self.assertNotIn('JUST FETCHED', '\n'.join(stderr)) self.assertFalse(os.path.exists(os.path.join(repo_dir, 'BUILD'))) - self.assertFalse(os.path.exists(os.path.join(repo_dir, 'root.txt'))) + self.assertTrue(os.path.exists(os.path.join(repo_dir, 'root.txt'))) self.assertFalse(os.path.exists(os.path.join(repo_dir, 'sub/BUILD'))) self.assertFalse(os.path.exists(os.path.join(repo_dir, 'sub/sub.txt'))) @@ -675,7 +685,7 @@ def testLostRemoteFile_build(self): # Build the other target: fails due to the lost input # TODO: #26450 - Assert success and enable the checks below. _, _, stderr = self.RunBazel( - ['build', '@my_repo//sub:sub'], allow_failure=True + ['build', skymeld_flag, '@my_repo//sub:sub'], allow_failure=True ) self.assertEqual( 1, @@ -699,12 +709,12 @@ def testLostRemoteFile_build(self): # After expunging again: cached self.RunBazel(['clean', '--expunge']) - _, _, stderr = self.RunBazel(['build', '@my_repo//sub:sub']) + _, _, stderr = self.RunBazel(['build', skymeld_flag, '@my_repo//sub:sub']) self.assertNotIn('JUST FETCHED', '\n'.join(stderr)) self.assertFalse(os.path.exists(os.path.join(repo_dir, 'BUILD'))) self.assertFalse(os.path.exists(os.path.join(repo_dir, 'root.txt'))) self.assertFalse(os.path.exists(os.path.join(repo_dir, 'sub/BUILD'))) - self.assertFalse(os.path.exists(os.path.join(repo_dir, 'sub/sub.txt'))) + self.assertTrue(os.path.exists(os.path.join(repo_dir, 'sub/sub.txt'))) def testBzlFilePrefetching(self): self.ScratchFile( @@ -781,6 +791,25 @@ def testBzlFilePrefetching(self): os.path.exists(os.path.join(repo_dir, 'subdir/more_nested.bzl')) ) + def testRun(self): + self.ScratchFile( + 'MODULE.bazel', + [ + 'bazel_dep(name = "buildozer", version = "8.5.1")', + ], + ) + + # First fetch: not cached + _, stdout, _ = self.RunBazel(['run', '@buildozer', '--', '--version']) + self.assertIn('buildozer version: 8.5.1', '\n'.join(stdout)) + + # After expunging: cached + self.RunBazel(['clean', '--expunge']) + _, stdout, _ = self.RunBazel(['run', '@buildozer', '--', '--version']) + self.assertIn('buildozer version: 8.5.1', '\n'.join(stdout)) + repo_dir = self.RepoDir('buildozer') + self.assertFalse(os.path.exists(os.path.join(repo_dir, 'MODULE.bazel'))) + if __name__ == '__main__': absltest.main() From 7a70d73609e3816065c88fcde504e9cf16cf83b0 Mon Sep 17 00:00:00 2001 From: Fabian Meumertzheim Date: Mon, 16 Mar 2026 21:49:51 -0700 Subject: [PATCH 4/4] Use `FileArtifactValue#setContentsProxy` for remote repo contents cache Even though expiration times don't matter in Bazel (they aren't honored), supporting the contents proxy optimization avoids Skyframe invalidation when external repo files are materialized later. Along the way ensure that the persistent action cache always recreates remote metadata via `FileArtifactValue.createForRemoteFileWithMaterializationData`. Before this change, such metadata would roundtrip into a `RemoteFileArtifactValue` (without the content proxy optimization) if `expirationTime` is set to `null`. 8.x adaptation: the remote metadata factories still live on the nested `FileArtifactValue.RemoteFileArtifactValue` class (`create`/`createWithMaterializationData`) rather than the v9 top-level `createForRemoteFile*` overloads. The overlay filesystem therefore injects external repo files via `RemoteFileArtifactValue.createWithMaterializationData` (passing the expiration as epoch millis and a null materialization path), and `CompactPersistentActionCache.decodeRemoteMetadata` drops its early return to the non-materialization `RemoteFileArtifactValue.create` so decoded remote metadata always supports the `FileContentsProxy` optimization. Closes #28654. PiperOrigin-RevId: 884796123 Change-Id: Ib399aff3864f5fbbef230b64a7a5ef619bf854d0 (cherry picked from commit 257a224b57c90cc4138b4006f2f70169c162cebd) --- .../build/lib/actions/FileArtifactValue.java | 5 +++ .../cache/CompactPersistentActionCache.java | 4 --- .../RemoteExternalOverlayFileSystem.java | 31 +++++++++++++++---- .../build/lib/remote/RemoteModule.java | 3 +- 4 files changed, 32 insertions(+), 11 deletions(-) diff --git a/src/main/java/com/google/devtools/build/lib/actions/FileArtifactValue.java b/src/main/java/com/google/devtools/build/lib/actions/FileArtifactValue.java index 27b81dad58b281..bd6d6a6f7e3356 100644 --- a/src/main/java/com/google/devtools/build/lib/actions/FileArtifactValue.java +++ b/src/main/java/com/google/devtools/build/lib/actions/FileArtifactValue.java @@ -572,6 +572,11 @@ private RemoteFileArtifactValue(byte[] digest, long size, int locationIndex) { this.locationIndex = locationIndex; } + /** + * Prefer {@link #createWithMaterializationData} if the remote file may be materialized in the + * local filesystem at a later point, as the value returned here doesn't support the {@link + * FileContentsProxy} optimization. + */ public static RemoteFileArtifactValue create(byte[] digest, long size, int locationIndex) { return new RemoteFileArtifactValue(digest, size, locationIndex); } diff --git a/src/main/java/com/google/devtools/build/lib/actions/cache/CompactPersistentActionCache.java b/src/main/java/com/google/devtools/build/lib/actions/cache/CompactPersistentActionCache.java index 7bcdb454854a94..5b7ff81f64b47f 100644 --- a/src/main/java/com/google/devtools/build/lib/actions/cache/CompactPersistentActionCache.java +++ b/src/main/java/com/google/devtools/build/lib/actions/cache/CompactPersistentActionCache.java @@ -585,10 +585,6 @@ private static RemoteFileArtifactValue decodeRemoteMetadata( PathFragment.create(getStringForIndex(indexer, VarInt.getVarInt(source))); } - if (expireAtEpochMilli < 0 && materializationExecPath == null) { - return RemoteFileArtifactValue.create(digest, size, locationIndex); - } - return RemoteFileArtifactValue.createWithMaterializationData( digest, size, locationIndex, expireAtEpochMilli, materializationExecPath); } diff --git a/src/main/java/com/google/devtools/build/lib/remote/RemoteExternalOverlayFileSystem.java b/src/main/java/com/google/devtools/build/lib/remote/RemoteExternalOverlayFileSystem.java index d89181fa2c999e..ea1e7d754faa9a 100644 --- a/src/main/java/com/google/devtools/build/lib/remote/RemoteExternalOverlayFileSystem.java +++ b/src/main/java/com/google/devtools/build/lib/remote/RemoteExternalOverlayFileSystem.java @@ -59,6 +59,8 @@ import java.io.InterruptedIOException; import java.io.OutputStream; import java.nio.channels.SeekableByteChannel; +import java.time.Duration; +import java.time.Instant; import java.util.ArrayList; import java.util.Collection; import java.util.List; @@ -98,6 +100,7 @@ public final class RemoteExternalOverlayFileSystem extends FileSystem { @Nullable private String buildRequestId; @Nullable private String commandId; @Nullable private MemoizingEvaluator evaluator; + @Nullable private Duration remoteCacheTtl; @Nullable private ExecutorService materializationExecutor; public RemoteExternalOverlayFileSystem(PathFragment externalDirectory, FileSystem nativeFs) { @@ -115,7 +118,8 @@ public void beforeCommand( Reporter reporter, String buildRequestId, String commandId, - MemoizingEvaluator evaluator) { + MemoizingEvaluator evaluator, + Duration remoteCacheTtl) { checkState( this.cache == null && this.inputPrefetcher == null @@ -123,6 +127,7 @@ public void beforeCommand( && this.buildRequestId == null && this.commandId == null && this.evaluator == null + && this.remoteCacheTtl == null && this.materializationExecutor == null); this.cache = cache; this.inputPrefetcher = inputPrefetcher; @@ -130,6 +135,7 @@ public void beforeCommand( this.buildRequestId = buildRequestId; this.commandId = commandId; this.evaluator = evaluator; + this.remoteCacheTtl = remoteCacheTtl; this.materializationExecutor = Executors.newThreadPerTaskExecutor( Thread.ofVirtual().name("remote-repo-materialization-", 0).factory()); @@ -146,6 +152,7 @@ public void afterCommand() { this.reporter = null; this.buildRequestId = null; this.commandId = null; + this.remoteCacheTtl = null; // Materializations happen synchronously and upon request by other repo rules, so there is no // reason to await their orderly completion in afterCommand. materializationExecutor.shutdownNow(); @@ -193,7 +200,12 @@ public boolean injectRemoteRepo(RepositoryName repo, Tree remoteContents, String var repoDir = externalDirectory.getChild(repo.getName()); var filesToPrefetch = new ArrayList(); injectRecursively( - externalFs, repoDir, remoteContents.getRoot(), childMap, filesToPrefetch::add); + externalFs, + repoDir, + remoteContents.getRoot(), + childMap, + filesToPrefetch::add, + Instant.now().plus(remoteCacheTtl)); try { // TODO: This prefetches a large number of small files. Investigate whether BatchReadBlobs // would be more efficient. @@ -220,7 +232,8 @@ private static void injectRecursively( PathFragment path, Directory dir, ImmutableMap childMap, - Consumer filesToPrefetch) + Consumer filesToPrefetch, + Instant expirationTime) throws IOException { fs.createDirectoryAndParents(path); for (var file : dir.getFilesList()) { @@ -230,10 +243,16 @@ private static void injectRecursively( } fs.injectFile( filePath, - FileArtifactValue.RemoteFileArtifactValue.create( + // Using the *WithMaterializationData variant ensures that the file benefits from the + // FileContentsProxy optimization to avoid widespread invalidation when it is + // materialized later, even if expiration times aren't relevant (depends on the usage + // of the lease extension). + FileArtifactValue.RemoteFileArtifactValue.createWithMaterializationData( DigestUtil.toBinaryDigest(file.getDigest()), file.getDigest().getSizeBytes(), - /* locationIndex= */ 1)); + /* locationIndex= */ 1, + expirationTime.toEpochMilli(), + /* materializationExecPath= */ null)); fs.getPath(filePath).setExecutable(file.getIsExecutable()); // The RE API does not track whether a file is readable or writable. We choose to make all // files readable and not writable to ensure that other repo rules can't accidentally modify @@ -252,7 +271,7 @@ private static void injectRecursively( "Directory %s with digest %s not found in tree" .formatted(subdirPath, subdirNode.getDigest().getHash())); } - injectRecursively(fs, subdirPath, subdir, childMap, filesToPrefetch); + injectRecursively(fs, subdirPath, subdir, childMap, filesToPrefetch, expirationTime); } } diff --git a/src/main/java/com/google/devtools/build/lib/remote/RemoteModule.java b/src/main/java/com/google/devtools/build/lib/remote/RemoteModule.java index 7d4dc72c888cf1..0a29616124622c 100644 --- a/src/main/java/com/google/devtools/build/lib/remote/RemoteModule.java +++ b/src/main/java/com/google/devtools/build/lib/remote/RemoteModule.java @@ -778,7 +778,8 @@ public void beforeCommand(CommandEnvironment env) throws AbruptExitException { env.getReporter(), buildRequestId, invocationId, - env.getSkyframeExecutor().getEvaluator()); + env.getSkyframeExecutor().getEvaluator(), + remoteOptions.remoteCacheTtl); } buildEventArtifactUploaderFactoryDelegate.init(