From 9bcc021fd2ea206c1bbf2e1f80af794690ffc9bf Mon Sep 17 00:00:00 2001 From: Fabian Meumertzheim Date: Wed, 24 Jun 2026 11:16:59 +0200 Subject: [PATCH 1/4] Support dynamic inputs with the remote repo contents cache With this change, all reproducible repository rules can now be cached in a disk or remote cache, including those with dependencies recorded dynamically during evaluation. This is made possible by introducing a new intermediate type of synthetic AC entries. When looking up the predeclared inputs hash for a repo rule with dynamic dependencies, the action result for such an intermediate entry lists one or more sets of inputs (e.g. a particular file in another repo or an environment variable name). These inputs are then requested from Skyframe and their current values are hashed to obtain the key of the next AC entry, which is again either an intermediate entry or a final entry containing the contents of the repository. 8.x: adapted to the split RepositoryDelegatorFunction/StarlarkRepositoryFunction restart model (there is no merged RepositoryFetchFunction worker on 8.x). The cache lookup is wired through RepositoryDelegatorFunction, which returns null to trigger a Skyframe restart when lookupCache reports missing values via env.valuesMissing(). Since DigestWriter is a nested class of RepositoryDelegatorFunction on 8.x rather than a standalone target, the marker file is parsed inline in RemoteRepoContentsCacheImpl via RepoRecordedInput.WithValue.parse to avoid a dependency cycle between the remote and repository rule libraries. RELNOTES: The remote repo contents cache now supports all reproducible repo rules. Closes #27634. PiperOrigin-RevId: 889750228 Change-Id: I9c7e4fed9d86432a85a96b3318f6eccc9c0558eb (cherry picked from commit ffebc5b1195ac918e177486c9f4eb29004923813) --- .../java/com/google/devtools/build/lib/BUILD | 1 + .../google/devtools/build/lib/remote/BUILD | 1 + .../build/lib/remote/CombinedCache.java | 11 +- .../RemoteExternalOverlayFileSystem.java | 6 +- .../build/lib/remote/RemoteModule.java | 1 + .../remote/RemoteRepoContentsCacheImpl.java | 481 ++++++++++++++---- .../RepositoryRemoteHelpersFactoryImpl.java | 12 +- .../RepositoryDelegatorFunction.java | 9 +- .../lib/runtime/RemoteRepoContentsCache.java | 5 +- .../bzlmod/remote_repo_contents_cache_test.py | 279 +++++++++- 10 files changed, 704 insertions(+), 102 deletions(-) diff --git a/src/main/java/com/google/devtools/build/lib/BUILD b/src/main/java/com/google/devtools/build/lib/BUILD index 82f62b05e62f58..4ad86766f353fc 100644 --- a/src/main/java/com/google/devtools/build/lib/BUILD +++ b/src/main/java/com/google/devtools/build/lib/BUILD @@ -288,6 +288,7 @@ java_library( "//src/main/java/com/google/devtools/build/lib/cmdline", "//src/main/java/com/google/devtools/build/lib/events", "//src/main/java/com/google/devtools/build/lib/vfs", + "//src/main/java/com/google/devtools/build/skyframe:skyframe-objects", ], ) diff --git a/src/main/java/com/google/devtools/build/lib/remote/BUILD b/src/main/java/com/google/devtools/build/lib/remote/BUILD index c00dcb0d86766f..e82ce27e1139f1 100644 --- a/src/main/java/com/google/devtools/build/lib/remote/BUILD +++ b/src/main/java/com/google/devtools/build/lib/remote/BUILD @@ -119,6 +119,7 @@ java_library( "//src/main/java/com/google/devtools/build/lib/remote/util", "//src/main/java/com/google/devtools/build/lib/remote/util:digest_utils", "//src/main/java/com/google/devtools/build/lib/remote/zstd", + "//src/main/java/com/google/devtools/build/lib/rules:repository/repo_recorded_input", "//src/main/java/com/google/devtools/build/lib/skyframe:action_execution_value", "//src/main/java/com/google/devtools/build/lib/skyframe:sky_functions", "//src/main/java/com/google/devtools/build/lib/skyframe:tree_artifact_value", diff --git a/src/main/java/com/google/devtools/build/lib/remote/CombinedCache.java b/src/main/java/com/google/devtools/build/lib/remote/CombinedCache.java index 4d59c40fce020d..e9beee4733fd64 100644 --- a/src/main/java/com/google/devtools/build/lib/remote/CombinedCache.java +++ b/src/main/java/com/google/devtools/build/lib/remote/CombinedCache.java @@ -211,6 +211,15 @@ public CachedActionResult downloadActionResult( boolean inlineOutErr, Set inlineOutputFiles) throws IOException, InterruptedException { + return getFromFuture( + downloadActionResultAsync(context, actionKey, inlineOutErr, inlineOutputFiles)); + } + + public ListenableFuture downloadActionResultAsync( + RemoteActionExecutionContext context, + ActionKey actionKey, + boolean inlineOutErr, + Set inlineOutputFiles) { var spawnExecutionContext = context.getSpawnExecutionContext(); ListenableFuture future = immediateFuture(null); @@ -254,7 +263,7 @@ public CachedActionResult downloadActionResult( directExecutor()); } - return getFromFuture(future); + return future; } private ListenableFuture downloadActionResultFromRemote( diff --git a/src/main/java/com/google/devtools/build/lib/remote/RemoteExternalOverlayFileSystem.java b/src/main/java/com/google/devtools/build/lib/remote/RemoteExternalOverlayFileSystem.java index ea1e7d754faa9a..049c3411033744 100644 --- a/src/main/java/com/google/devtools/build/lib/remote/RemoteExternalOverlayFileSystem.java +++ b/src/main/java/com/google/devtools/build/lib/remote/RemoteExternalOverlayFileSystem.java @@ -193,11 +193,13 @@ public void afterCommand() { */ public boolean injectRemoteRepo(RepositoryName repo, Tree remoteContents, String markerFile) throws IOException, InterruptedException { + var repoDir = externalDirectory.getChild(repo.getName()); + deleteTree(repoDir); + var unused = delete(externalDirectory.getChild(repo.getMarkerFileName())); var childMap = remoteContents.getChildrenList().stream() .collect( toImmutableMap(cache.digestUtil::compute, directory -> directory, (a, b) -> a)); - var repoDir = externalDirectory.getChild(repo.getName()); var filesToPrefetch = new ArrayList(); injectRecursively( externalFs, @@ -220,7 +222,7 @@ public boolean injectRemoteRepo(RepositoryName repo, Tree remoteContents, String } // Create the repo directory on disk so that readdir reflects the overlaid state of the external // directory. - nativeFs.createDirectoryAndParents(externalDirectory.getChild(repo.getName())); + nativeFs.createDirectoryAndParents(repoDir); // Keep the marker file contents in memory so that it can be written out when the repo is // materialized. This doubles as a presence marker for the in-memory repo contents. markerFileContents.put(repo.getName(), markerFile); diff --git a/src/main/java/com/google/devtools/build/lib/remote/RemoteModule.java b/src/main/java/com/google/devtools/build/lib/remote/RemoteModule.java index 0a29616124622c..92aeefef05abec 100644 --- a/src/main/java/com/google/devtools/build/lib/remote/RemoteModule.java +++ b/src/main/java/com/google/devtools/build/lib/remote/RemoteModule.java @@ -762,6 +762,7 @@ public void beforeCommand(CommandEnvironment env) throws AbruptExitException { repositoryRemoteHelpersFactoryDelegate.init( new RepositoryRemoteHelpersFactoryImpl( + env.getDirectories(), actionContextProvider.getCombinedCache(), actionContextProvider.getRemoteExecutionClient(), buildRequestId, diff --git a/src/main/java/com/google/devtools/build/lib/remote/RemoteRepoContentsCacheImpl.java b/src/main/java/com/google/devtools/build/lib/remote/RemoteRepoContentsCacheImpl.java index 1391d1f2ca04f2..d37a2195bb072c 100644 --- a/src/main/java/com/google/devtools/build/lib/remote/RemoteRepoContentsCacheImpl.java +++ b/src/main/java/com/google/devtools/build/lib/remote/RemoteRepoContentsCacheImpl.java @@ -15,27 +15,40 @@ package com.google.devtools.build.lib.remote; import static com.google.common.collect.ImmutableList.toImmutableList; +import static com.google.common.collect.ImmutableSet.toImmutableSet; import static com.google.common.util.concurrent.Futures.immediateFuture; +import static com.google.common.util.concurrent.Futures.transformAsync; +import static com.google.common.util.concurrent.Futures.whenAllSucceed; import static com.google.common.util.concurrent.MoreExecutors.directExecutor; import static com.google.devtools.build.lib.remote.util.Utils.waitForBulkTransfer; +import static com.google.devtools.build.lib.unsafe.StringUnsafe.getInternalStringBytes; +import static java.nio.charset.StandardCharsets.ISO_8859_1; +import static java.util.stream.Collectors.joining; import build.bazel.remote.execution.v2.Action; +import build.bazel.remote.execution.v2.ActionResult; import build.bazel.remote.execution.v2.Command; +import build.bazel.remote.execution.v2.Digest; import build.bazel.remote.execution.v2.Directory; +import build.bazel.remote.execution.v2.OutputDirectory; +import build.bazel.remote.execution.v2.OutputFile; import build.bazel.remote.execution.v2.Platform; import build.bazel.remote.execution.v2.Tree; +import com.google.common.base.Preconditions; import com.google.common.base.Splitter; import com.google.common.base.Throwables; +import com.google.common.collect.Collections2; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableSet; import com.google.common.util.concurrent.Futures; import com.google.common.util.concurrent.ListenableFuture; import com.google.devtools.build.lib.actions.ActionInput; import com.google.devtools.build.lib.actions.ExecException; +import com.google.devtools.build.lib.actions.Spawn; +import com.google.devtools.build.lib.analysis.BlazeDirectories; import com.google.devtools.build.lib.cmdline.RepositoryName; import com.google.devtools.build.lib.events.Event; import com.google.devtools.build.lib.events.ExtendedEventHandler; -import com.google.devtools.build.lib.actions.Spawn; import com.google.devtools.build.lib.exec.SpawnInputExpander; import com.google.devtools.build.lib.exec.SpawnRunner; import com.google.devtools.build.lib.remote.common.RemoteActionExecutionContext; @@ -44,18 +57,24 @@ import com.google.devtools.build.lib.remote.common.RemotePathResolver; import com.google.devtools.build.lib.remote.util.DigestUtil; import com.google.devtools.build.lib.remote.util.TracingMetadataUtils; +import com.google.devtools.build.lib.rules.repository.RepoRecordedInput; import com.google.devtools.build.lib.runtime.RemoteRepoContentsCache; import com.google.devtools.build.lib.unsafe.StringUnsafe; +import com.google.devtools.build.lib.util.Fingerprint; import com.google.devtools.build.lib.vfs.FileSystemUtils; import com.google.devtools.build.lib.vfs.Path; import com.google.devtools.build.lib.vfs.PathFragment; +import com.google.devtools.build.skyframe.SkyFunction; import com.google.protobuf.ByteString; import java.io.IOException; -import java.nio.charset.StandardCharsets; import java.time.Instant; +import java.util.ArrayList; +import java.util.Collection; +import java.util.List; +import java.util.Optional; import java.util.SortedMap; import java.util.UUID; -import java.util.concurrent.ExecutionException; +import javax.annotation.Nullable; /** * A cache for the contents of external repositories that is backed by an ordinary remote cache. @@ -66,19 +85,32 @@ * action executed locally. This can save both time taken to execute repo rules and compute file * digests and disk space required to store the contents of external repositories. * - *

Repositories are cached as AC entries for a synthetic command with the predeclared input hash - * as the salt. The contents are represented as an output file for the marker file and an output - * directory for the contents. + *

Repositories are cached as AC entries for a synthetic command with a special hash as the salt. + * The contents are represented as an output file for the marker file and an output directory for + * the contents. + * + *

If a repo rule does not record any {@link RepoRecordedInput}s during its execution, this hash + * is just the predeclared inputs hash. Otherwise, the AC entry for the predeclared inputs hash will + * be an intermediate entry that lists one or more sets of {@link RepoRecordedInput}s that a + * previously cached repo consumed during the evaluation of its rule. The cache requests the current + * values of these inputs and computes the next hash to look up by a rolling construction that + * combines the previous hash with the string representations of the {@link + * RepoRecordedInput.WithValue}. This process is repeated until a final entry with the repo contents + * is found or no matching entry exists. * - *

At this point the cache only supports repository rules with no dependencies expressed at - * runtime. Verifying whether such dependencies are up to date can't be done via a single hash as - * the set of dependencies is not known ahead of time. Support for such rules would require a - * two-stage cache lookup in which the first lookup may produce multiple marker files. + *

By representing repos with recorded inputs as DAGs of AC entries, lookups are efficient (they + * don't scale with the number of cached repos per predeclared inputs hash) and regular LRU eviction + * policies remain effective for the most part. If a repo rule often requests different inputs even + * with the same predeclared inputs hash and previously requested inputs and values, it could result + * in large action results that grow over time. This is considered an acceptable trade-off for + * simplicity for now and could be mitigated in the future by an explicit GC mechanism such as + * "least recently added" eviction when the size of action result exceeds a certain threshold. */ public final class RemoteRepoContentsCacheImpl implements RemoteRepoContentsCache { private static final UUID GUID = UUID.fromString("f4a165a9-5557-45a7-bf25-230b6d42393a"); private static final String MARKER_FILE_PATH = ".recorded_inputs"; private static final String REPO_DIRECTORY_PATH = "repo_contents"; + private static final Splitter SPLIT_ON_SPACE = Splitter.on(' '); private static final Command COMMAND = Command.newBuilder() @@ -91,8 +123,10 @@ public final class RemoteRepoContentsCacheImpl implements RemoteRepoContentsCach .addOutputDirectories(REPO_DIRECTORY_PATH) .setPlatform(Platform.getDefaultInstance()) .build(); + private static final ByteString COMMAND_BYTES = COMMAND.toByteString(); private static final Directory INPUT_ROOT = Directory.getDefaultInstance(); + private final BlazeDirectories directories; private final CombinedCache cache; private final String buildRequestId; private final String commandId; @@ -101,17 +135,20 @@ public final class RemoteRepoContentsCacheImpl implements RemoteRepoContentsCach private final boolean verboseFailures; private final DigestUtil digestUtil; private final Action baseAction; + private final Digest commandDigest; public RemoteRepoContentsCacheImpl( + BlazeDirectories directories, CombinedCache cache, String buildRequestId, String commandId, boolean acceptCached, boolean uploadLocalResults, boolean verboseFailures) { + this.directories = directories; + this.cache = cache; this.buildRequestId = buildRequestId; this.commandId = commandId; - this.cache = cache; this.acceptCached = acceptCached; this.uploadLocalResults = uploadLocalResults; this.verboseFailures = verboseFailures; @@ -122,6 +159,7 @@ public RemoteRepoContentsCacheImpl( .setInputRootDigest(digestUtil.compute(INPUT_ROOT)) .setPlatform(Platform.getDefaultInstance()) .build(); + this.commandDigest = digestUtil.compute(COMMAND); } @Override @@ -139,32 +177,33 @@ public void addToCache( if (!(fetchedRepoDir.getFileSystem() instanceof RemoteExternalOverlayFileSystem)) { return; } - var context = buildContext(repoName); + var context = buildContext(repoName, CacheOp.UPLOAD); if (!context.getWriteCachePolicy().allowRemoteCache()) { return; } + List recordedInputValues; try { - if (FileSystemUtils.readLinesAsLatin1(fetchedRepoMarkerFile).stream() - .filter(line -> !line.isEmpty()) - .count() - != 1) { - // This cache currently only supports marker files that contain nothing but the predeclared - // inputs hash. Repo rules with dependencies expressed only at runtime would require a - // two-stage cache lookup. Among the rules that are supported are http_archive and - // git_repository without patches. + var maybeRecordedInputValues = + readMarkerFile( + FileSystemUtils.readContent(fetchedRepoMarkerFile, ISO_8859_1), predeclaredInputHash); + if (maybeRecordedInputValues.isEmpty()) { return; } + recordedInputValues = maybeRecordedInputValues.get(); } catch (IOException e) { reporter.handle( Event.warn( - "Failed to read marker file repo %s, skipping: %s" + "Failed to read marker file for repo %s, skipping: %s" .formatted(repoName, maybeGetStackTrace(e)))); + return; } - var action = buildAction(predeclaredInputHash); - var actionKey = new ActionKey(digestUtil.compute(action)); - var remotePathResolver = new RepoRemotePathResolver(fetchedRepoMarkerFile, fetchedRepoDir); try { // TODO: Consider uploading asynchronously. + var finalHash = + uploadIntermediateActionResults(context, predeclaredInputHash, recordedInputValues); + var action = buildAction(finalHash); + var actionKey = new ActionKey(digestUtil.compute(action)); + var remotePathResolver = new RepoRemotePathResolver(fetchedRepoMarkerFile, fetchedRepoDir); var unused = UploadManifest.create( /* remoteOptions= */ null, @@ -194,10 +233,10 @@ public boolean lookupCache( RepositoryName repoName, Path repoDir, String predeclaredInputHash, - ExtendedEventHandler reporter) + SkyFunction.Environment env) throws IOException, InterruptedException { try { - return doLookupCache(repoName, repoDir, predeclaredInputHash, reporter); + return doLookupCache(repoName, repoDir, predeclaredInputHash, env); } catch (IOException e) { throw new IOException( "Failed to look up repo %s in the remote repo contents cache: %s" @@ -210,43 +249,23 @@ private boolean doLookupCache( RepositoryName repoName, Path repoDir, String predeclaredInputHash, - ExtendedEventHandler reporter) + SkyFunction.Environment env) throws IOException, InterruptedException { if (!(repoDir.getFileSystem() instanceof RemoteExternalOverlayFileSystem remoteFs)) { return false; } - var context = buildContext(repoName); + var context = buildContext(repoName, CacheOp.DOWNLOAD); if (!context.getReadCachePolicy().allowRemoteCache()) { return false; } - var actionKey = new ActionKey(digestUtil.compute(buildAction(predeclaredInputHash))); - // The marker file is read right after and thus requested to be inlined. - var cachedActionResult = - cache.downloadActionResult( - context, actionKey, /* inlineOutErr= */ false, ImmutableSet.of(MARKER_FILE_PATH)); - if (cachedActionResult == null) { - return false; - } - - var actionResult = cachedActionResult.actionResult(); - if (actionResult.getExitCode() != 0 - || actionResult.getOutputFilesCount() != 1 - || actionResult.getOutputDirectoriesCount() != 1) { - reporter.handle( - Event.warn( - String.format( - "Unexpected action result for cached repo %s: exit code %d, %d output files, %d" - + " output directories", - repoName, - actionResult.getExitCode(), - actionResult.getOutputFilesCount(), - actionResult.getOutputDirectoriesCount()))); + var finalEntry = fetchFinalCacheEntry(env, context, predeclaredInputHash); + if (env.valuesMissing() || finalEntry == null) { return false; } ListenableFuture markerFileContentFuture; - var markerFile = actionResult.getOutputFiles(0); + var markerFile = finalEntry.markerFile(); // Inlining is an optional feature, so we have to be prepared to download the marker file. if (markerFile.getContents().isEmpty()) { markerFileContentFuture = @@ -255,69 +274,351 @@ private boolean doLookupCache( } else { markerFileContentFuture = immediateFuture(markerFile.getContents().toByteArray()); } - var repoDirectory = actionResult.getOutputDirectories(0); + var repoDirectory = finalEntry.repoDirectory(); var repoDirectoryContentFuture = - Futures.transformAsync( + transformAsync( cache.downloadBlob( context, REPO_DIRECTORY_PATH, /* execPath= */ null, repoDirectory.getTreeDigest()), (treeBytes) -> immediateFuture(Tree.parseFrom(treeBytes)), directExecutor()); waitForBulkTransfer(ImmutableList.of(markerFileContentFuture, repoDirectoryContentFuture)); - String markerFileContent; - Tree repoDirectoryContent; - try { - markerFileContent = new String(markerFileContentFuture.get(), StandardCharsets.ISO_8859_1); - repoDirectoryContent = repoDirectoryContentFuture.get(); - } catch (ExecutionException e) { - throw new IllegalStateException( - "waitForBulkTransfer should have thrown: " + maybeGetStackTrace(e)); - } - var markerFileLines = - Splitter.on('\n') - .splitToStream(markerFileContent) - .filter(line -> !line.isEmpty()) - .collect(toImmutableList()); - if (markerFileLines.size() > 1) { - reporter.handle( - Event.warn( - "Marker file for repo %s has extra lines, skipping:\n%s" - .formatted( - repoName, - String.join("\n", markerFileLines.subList(1, markerFileLines.size()))))); + + String markerFileContent = new String(markerFileContentFuture.resultNow(), ISO_8859_1); + var maybeRecordedInputs = readMarkerFile(markerFileContent, predeclaredInputHash); + if (maybeRecordedInputs.isEmpty()) { return false; } - if (!markerFileLines.getFirst().equals(predeclaredInputHash)) { - reporter.handle( - Event.warn( - "Predeclared input hash mismatch for repo %s: expected %s, got %s" - .formatted(repoName, predeclaredInputHash, markerFileLines.getFirst()))); + var outdatedReason = + RepoRecordedInput.isAnyValueOutdated(env, directories, maybeRecordedInputs.get()); + if (env.valuesMissing() || outdatedReason.isPresent()) { + env.getListener() + .handle( + Event.warn( + "Unexpectedly outdated cached repo %s: %s" + .formatted(repoName, outdatedReason.orElse("unknown reason")))); return false; } - return remoteFs.injectRemoteRepo(repoName, repoDirectoryContent, markerFileContent); + return remoteFs.injectRemoteRepo( + repoName, repoDirectoryContentFuture.resultNow(), markerFileContent); + } + + private enum CacheOp { + DOWNLOAD, + UPLOAD, } - private RemoteActionExecutionContext buildContext(RepositoryName repoName) { + private RemoteActionExecutionContext buildContext(RepositoryName repoName, CacheOp cacheOp) { var metadata = TracingMetadataUtils.buildMetadata( buildRequestId, commandId, repoName.getName(), /* actionMetadata= */ null); - // Don't use the disk cache as `--repo_contents_cache` is a strictly better alternative for - // local caching. + // Don't upload local repo contents to the disk cache as the (local) `--repo_contents_cache` is + // a better alternative for local caching. Do write through the disk cache for downloads from + // the remote cache to speed up future usage. return RemoteActionExecutionContext.create(metadata) - .withReadCachePolicy(acceptCached ? CachePolicy.REMOTE_CACHE_ONLY : CachePolicy.NO_CACHE) + .withReadCachePolicy(acceptCached ? CachePolicy.ANY_CACHE : CachePolicy.NO_CACHE) .withWriteCachePolicy( - uploadLocalResults ? CachePolicy.REMOTE_CACHE_ONLY : CachePolicy.NO_CACHE); + switch (cacheOp) { + case DOWNLOAD -> CachePolicy.ANY_CACHE; + case UPLOAD -> + uploadLocalResults ? CachePolicy.REMOTE_CACHE_ONLY : CachePolicy.NO_CACHE; + }); } - private Action buildAction(String predeclaredInputHash) { - // The predeclared input hash uniquely identifies the repo rule and all its attributes, but not - // dependencies established at runtime. We choose to embed it into the salt simply because that - // results in a constant Command message. + private Action buildAction(String inputHash) { + // We choose to embed the hash into the salt simply because that results in a constant Command + // message. return baseAction.toBuilder() - .setSalt(ByteString.copyFrom(StringUnsafe.getByteArray(predeclaredInputHash))) + .setSalt(ByteString.copyFrom(StringUnsafe.getByteArray(inputHash))) .build(); } + /** + * Uploads the intermediate action results representing the inputs recorded at runtime and returns + * the input hash to use for the final action result. + */ + private String uploadIntermediateActionResults( + RemoteActionExecutionContext context, + String predeclaredInputHash, + List recordedInputValues) + throws IOException, InterruptedException { + // The command is shared by all action results and small enough that FindMissingBlobs is not + // worthwhile. The REAPI spec requires the command to be uploaded before an action result that + // references it. + waitForBulkTransfer(ImmutableSet.of(cache.uploadBlob(context, commandDigest, COMMAND_BYTES))); + + String rollingHash = predeclaredInputHash; + var batches = RepoRecordedInput.WithValue.splitIntoBatches(recordedInputValues); + var futures = new ArrayList>(batches.size()); + for (var batch : batches) { + futures.add( + addToActionResult( + context, + buildAction(rollingHash), + Collections2.transform(batch, RepoRecordedInput.WithValue::input))); + for (var recordedInputValue : batch) { + rollingHash = rollForwardHash(rollingHash, recordedInputValue); + } + } + waitForBulkTransfer(futures); + return rollingHash; + } + + /** + * Adds the given set of recorded inputs as one of the alternative paths to the action result for + * the given action, if not already present. + * + *

Most repo rule evaluations with a fixed previous batch of hashes (in particular, the same + * predeclared inputs hash) will request a fixed set of inputs in the next batch. Thus, most + * intermediate action results will only contain a single set of recorded inputs. + */ + private ListenableFuture addToActionResult( + RemoteActionExecutionContext context, + Action action, + Collection newInputs) { + var actionKey = digestUtil.computeActionKey(action); + var currentInputsFuture = + Futures.transformAsync( + cache.downloadActionResultAsync( + context, actionKey, /* inlineOutErr= */ true, ImmutableSet.of()), + (currentResult) -> { + if (currentResult == null + || currentResult.actionResult().getStdoutDigest().getSizeBytes() == 0) { + return immediateFuture(""); + } + return fetchStdout(context, currentResult.actionResult()); + }, + directExecutor()); + return Futures.transformAsync( + currentInputsFuture, + currentInputsString -> { + // RepoRecordedInput.toString() is guaranteed to return a string that doesn't contain + // spaces or newlines. We can thus safely use spaces to separate inputs within a batch + // and newlines to separate different batches. + var newInputString = + newInputs.stream().map(RepoRecordedInput::toString).collect(joining(" ")); + if (currentInputsString.lines().anyMatch(newInputString::equals)) { + // The current batch of inputs is already present, no need to update the action result. + return immediateFuture(null); + } + // Add the new inputs to the top so that the most recently added inputs stay at the top. + // This could be used to implement a simple "least recently added" eviction strategy in + // the future in case the size of action results becomes a concern. + // + // Note that this update is inherently racy: multiple clients may add inputs concurrently, + // resulting in some added inputs being lost since the REAPI does not provide a way to + // update action results atomically. However, since different batches of inputs are + // already rare and them being added concurrently even more so, the temporary loss of a + // cache entry is an acceptable trade-off for simplicity. + var newInputsString = newInputString + '\n' + currentInputsString; + var stdoutBytes = getInternalStringBytes(newInputsString); + var stdoutDigest = digestUtil.compute(stdoutBytes); + var actionResult = + ActionResult.newBuilder().setExitCode(0).setStdoutDigest(stdoutDigest).build(); + return whenAllSucceed( + cache.uploadBlob(context, actionKey.getDigest(), action.toByteString()), + cache.uploadBlob(context, stdoutDigest, ByteString.copyFrom(stdoutBytes))) + .callAsync( + () -> cache.uploadActionResult(context, actionKey, actionResult), + directExecutor()); + }, + directExecutor()); + } + + /** Represents a single AC entry in the internal format used by the remote repo contents cache. */ + private sealed interface CacheEntry { + /** + * A final cache entry containing the contents of a repository. + * + *

Represented as an ActionResult with one output directory and one output file. + * + * @param repoDirectory the contents of the repository directory + * @param markerFile the contents of the repository's marker file + */ + record Final(OutputDirectory repoDirectory, OutputFile markerFile) implements CacheEntry {} + + /** + * An intermediate cache entry that points to the keys of any number of further AC entries, + * which can themselves be intermediate or final entries. The remote repo contents cache will + * try them in order. + * + * @param nextInputHashes the keys under which the next AC entries should be looked up + */ + record Intermediate(ImmutableList nextInputHashes) implements CacheEntry {} + + /** + * The cache entry didn't match any of the formats expected by this version of the remote repo + * contents cache for the given human-readable reason. + */ + record Invalid(String reason) implements CacheEntry {} + } + + /** + * Fetches a final cache entry for the given predeclared input hash by recursively following + * intermediate entries if needed or returns null if no final entry could be found or a Skyframe + * restart is needed. + */ + @Nullable + private CacheEntry.Final fetchFinalCacheEntry( + SkyFunction.Environment env, + RemoteActionExecutionContext context, + String predeclaredInputHash) + throws IOException, InterruptedException { + var currentHashes = ImmutableList.of(predeclaredInputHash); + while (!currentHashes.isEmpty()) { + var nextHashes = ImmutableList.builder(); + for (var hash : currentHashes) { + switch (fetchCacheEntry(env, context, hash)) { + case CacheEntry.Final finalEntry -> { + return finalEntry; + } + case CacheEntry.Intermediate(ImmutableList nextInputHashes) -> + nextHashes.addAll(nextInputHashes); + case CacheEntry.Invalid(String reason) -> env.getListener().handle(Event.warn(reason)); + case null -> { + // Keep checking hashes to batch missing values in fewer restarts. + Preconditions.checkState(env.valuesMissing()); + } + } + } + if (env.valuesMissing()) { + return null; + } + currentHashes = nextHashes.build(); + } + return null; + } + + // Returns null if and only if values are missing. + @Nullable + private CacheEntry fetchCacheEntry( + SkyFunction.Environment env, RemoteActionExecutionContext context, String inputHash) + throws IOException, InterruptedException { + var actionKey = new ActionKey(digestUtil.compute(buildAction(inputHash))); + // The marker file is read right after and thus requested to be inlined. If the action result + // is an intermediate node, the full result will be contained in the stdout, which should thus + // also be inlined. + var cachedActionResult = + cache.downloadActionResult( + context, actionKey, /* inlineOutErr= */ true, ImmutableSet.of(MARKER_FILE_PATH)); + if (cachedActionResult == null) { + return new CacheEntry.Intermediate(ImmutableList.of()); + } + var actionResult = cachedActionResult.actionResult(); + + if (actionResult.getExitCode() != 0) { + return new CacheEntry.Invalid( + "Unexpected exit code in action result for remotely cached repo %s:\n%s" + .formatted(context.getRequestMetadata().getActionId(), actionResult)); + } + if (actionResult.getOutputFilesCount() == 1 + && actionResult.getOutputDirectoriesCount() == 1 + && actionResult.getOutputSymlinksCount() == 0) { + return new CacheEntry.Final( + actionResult.getOutputDirectories(0), actionResult.getOutputFiles(0)); + } + if (!(actionResult.getOutputFilesCount() == 0 + && actionResult.getOutputDirectoriesCount() == 0 + && actionResult.getOutputSymlinksCount() == 0 + && actionResult.getStdoutDigest().getSizeBytes() > 0)) { + return new CacheEntry.Invalid( + "Unexpected intermediate action result for remotely cached repo %s:\n%s" + .formatted(context.getRequestMetadata().getActionId(), actionResult)); + } + var stdoutFuture = fetchStdout(context, actionResult); + waitForBulkTransfer(ImmutableList.of(stdoutFuture)); + + // The action result's stdout contains multiple lines, each representing a batch of + // RepoRecordedInputs separated by spaces. A given batch is valid only if all inputs in the + // batch are, but separate batches are tried independently. + var nextInputBatches = + stdoutFuture + .resultNow() + .lines() + .map( + line -> + SPLIT_ON_SPACE + .splitToStream(line) + .map(RepoRecordedInput::parse) + .collect(toImmutableList())) + .collect(toImmutableList()); + var uniqueNextInputs = + nextInputBatches.stream().flatMap(List::stream).collect(toImmutableSet()); + RepoRecordedInput.prefetch(env, directories, uniqueNextInputs); + if (env.valuesMissing()) { + return null; + } + var nextHashes = ImmutableList.builder(); + nextBatch: + for (var batch : nextInputBatches) { + var rollingHash = inputHash; + for (var input : batch) { + var value = input.getValue(env, directories); + // Values have been prefetched above. + Preconditions.checkState(!env.valuesMissing()); + if (!(value instanceof RepoRecordedInput.MaybeValue.Valid(String valueString))) { + continue nextBatch; + } + rollingHash = + rollForwardHash(rollingHash, new RepoRecordedInput.WithValue(input, valueString)); + } + nextHashes.add(rollingHash); + } + return new CacheEntry.Intermediate(nextHashes.build()); + } + + private String rollForwardHash(String hash, RepoRecordedInput.WithValue inputWithValue) { + return new Fingerprint() + .addString(hash) + .addString(inputWithValue.toString()) + .hexDigestAndReset(); + } + + private ListenableFuture fetchStdout( + RemoteActionExecutionContext context, ActionResult actionResult) { + if (!actionResult.getStdoutRaw().isEmpty()) { + return immediateFuture( + StringUnsafe.newInstance(actionResult.getStdoutRaw().toByteArray(), StringUnsafe.LATIN1)); + } + return Futures.transform( + cache.downloadBlob(context, actionResult.getStdoutDigest()), + stdout -> StringUnsafe.newInstance(stdout, StringUnsafe.LATIN1), + directExecutor()); + } + + /** + * Parses a marker file written by {@code DigestWriter}: the first non-empty line is the + * predeclared input hash, followed by one serialized {@link RepoRecordedInput.WithValue} per line. + * Returns an empty {@link Optional} if the first line doesn't match the expected hash or any + * subsequent line fails to parse. + */ + private static Optional> readMarkerFile( + String content, String predeclaredInputHash) { + var recordedInputValues = ImmutableList.builder(); + boolean firstLineVerified = false; + for (String line : Splitter.on('\n').split(content)) { + if (line.isEmpty()) { + continue; + } + if (!firstLineVerified) { + if (!line.equals(predeclaredInputHash)) { + return Optional.empty(); + } + firstLineVerified = true; + } else { + Optional withValue = RepoRecordedInput.WithValue.parse(line); + if (withValue.isEmpty()) { + return Optional.empty(); + } + recordedInputValues.add(withValue.get()); + } + } + if (!firstLineVerified) { + return Optional.empty(); + } + return Optional.of(recordedInputValues.build()); + } + private String maybeGetStackTrace(Exception e) { return verboseFailures ? Throwables.getStackTraceAsString(e) : e.getMessage(); } diff --git a/src/main/java/com/google/devtools/build/lib/remote/RepositoryRemoteHelpersFactoryImpl.java b/src/main/java/com/google/devtools/build/lib/remote/RepositoryRemoteHelpersFactoryImpl.java index 2b4035128f7b98..eec030b4376da4 100644 --- a/src/main/java/com/google/devtools/build/lib/remote/RepositoryRemoteHelpersFactoryImpl.java +++ b/src/main/java/com/google/devtools/build/lib/remote/RepositoryRemoteHelpersFactoryImpl.java @@ -13,6 +13,7 @@ // limitations under the License. package com.google.devtools.build.lib.remote; +import com.google.devtools.build.lib.analysis.BlazeDirectories; import com.google.devtools.build.lib.remote.common.RemoteExecutionClient; import com.google.devtools.build.lib.runtime.RemoteRepoContentsCache; import com.google.devtools.build.lib.runtime.RepositoryRemoteExecutor; @@ -22,6 +23,7 @@ /** Factory for {@link RemoteRepositoryRemoteExecutor} and {@link RemoteRepoContentsCacheImpl}. */ class RepositoryRemoteHelpersFactoryImpl implements RepositoryRemoteHelpersFactory { + private final BlazeDirectories directories; private final CombinedCache cache; @Nullable private final RemoteExecutionClient remoteExecutor; private final String buildRequestId; @@ -33,6 +35,7 @@ class RepositoryRemoteHelpersFactoryImpl implements RepositoryRemoteHelpersFacto private final boolean verboseFailures; RepositoryRemoteHelpersFactoryImpl( + BlazeDirectories directories, CombinedCache cache, @Nullable RemoteExecutionClient remoteExecutor, String buildRequestId, @@ -41,6 +44,7 @@ class RepositoryRemoteHelpersFactoryImpl implements RepositoryRemoteHelpersFacto boolean acceptCached, boolean uploadLocalResults, boolean verboseFailures) { + this.directories = directories; this.cache = cache; this.remoteExecutor = remoteExecutor; this.buildRequestId = buildRequestId; @@ -71,6 +75,12 @@ public RepositoryRemoteExecutor createExecutor() { @Override public RemoteRepoContentsCache createRepoContentsCache() { return new RemoteRepoContentsCacheImpl( - cache, buildRequestId, commandId, acceptCached, uploadLocalResults, verboseFailures); + directories, + cache, + buildRequestId, + commandId, + acceptCached, + uploadLocalResults, + verboseFailures); } } diff --git a/src/main/java/com/google/devtools/build/lib/rules/repository/RepositoryDelegatorFunction.java b/src/main/java/com/google/devtools/build/lib/rules/repository/RepositoryDelegatorFunction.java index 878429eb7252d4..22fec8e4a3a0ff 100644 --- a/src/main/java/com/google/devtools/build/lib/rules/repository/RepositoryDelegatorFunction.java +++ b/src/main/java/com/google/devtools/build/lib/rules/repository/RepositoryDelegatorFunction.java @@ -267,8 +267,13 @@ public SkyValue compute(SkyKey skyKey, Environment env) if (remoteRepoContentsCache != null) { try { - if (remoteRepoContentsCache.lookupCache( - repositoryName, repoRoot, digestWriter.predeclaredInputHash, env.getListener())) { + boolean cacheHit = + remoteRepoContentsCache.lookupCache( + repositoryName, repoRoot, digestWriter.predeclaredInputHash, env); + if (env.valuesMissing()) { + return null; + } + if (cacheHit) { return new RepositoryDirectoryValue.Success( repoRoot, /* isFetchingDelayed= */ false, excludeRepoFromVendoring); } diff --git a/src/main/java/com/google/devtools/build/lib/runtime/RemoteRepoContentsCache.java b/src/main/java/com/google/devtools/build/lib/runtime/RemoteRepoContentsCache.java index 8c4f0f25d1cf41..663c2472efdc07 100644 --- a/src/main/java/com/google/devtools/build/lib/runtime/RemoteRepoContentsCache.java +++ b/src/main/java/com/google/devtools/build/lib/runtime/RemoteRepoContentsCache.java @@ -17,6 +17,7 @@ import com.google.devtools.build.lib.cmdline.RepositoryName; import com.google.devtools.build.lib.events.ExtendedEventHandler; import com.google.devtools.build.lib.vfs.Path; +import com.google.devtools.build.skyframe.SkyFunction; import java.io.IOException; /** A remote cache for the contents of external repositories. */ @@ -33,6 +34,8 @@ void addToCache( /** * Retrieves a repository from the remote cache if possible. * + *

Callers have to check {@code env.valuesMissing()} after this method returns. + * * @return true if there was a cache hit and the repository has been fetched into the given * directory. */ @@ -40,6 +43,6 @@ boolean lookupCache( RepositoryName repoName, Path repoDir, String predeclaredInputHash, - ExtendedEventHandler reporter) + SkyFunction.Environment env) throws IOException, InterruptedException; } diff --git a/src/test/py/bazel/bzlmod/remote_repo_contents_cache_test.py b/src/test/py/bazel/bzlmod/remote_repo_contents_cache_test.py index 0e08858ec1d0b8..64a0fe68ded180 100644 --- a/src/test/py/bazel/bzlmod/remote_repo_contents_cache_test.py +++ b/src/test/py/bazel/bzlmod/remote_repo_contents_cache_test.py @@ -251,14 +251,12 @@ def testNotCachedWhenRecordedInputsChange_dynamicDep(self): self.assertIn('JUST FETCHED', '\n'.join(stderr)) self.assertTrue(os.path.exists(os.path.join(repo_dir, 'BUILD'))) - # Change back to previous recorded inputs: not cached - # TODO: This is the current behavior, but it's not desired. Support for - # caching repos with dynamic deps should be added. + # Change back to previous recorded inputs: cached (even after expunging) self.RunBazel(['clean', '--expunge']) self.ScratchFile('data.txt', ['one']) _, _, stderr = self.RunBazel(['build', '@my_repo//:haha']) - self.assertIn('JUST FETCHED', '\n'.join(stderr)) - self.assertTrue(os.path.exists(os.path.join(repo_dir, 'BUILD'))) + self.assertNotIn('JUST FETCHED', '\n'.join(stderr)) + self.assertFalse(os.path.exists(os.path.join(repo_dir, 'BUILD'))) def testNotCachedWhenRecordedInputsChange_staticDep(self): self.ScratchFile( @@ -305,6 +303,211 @@ def testNotCachedWhenRecordedInputsChange_staticDep(self): self.assertNotIn('JUST FETCHED', '\n'.join(stderr)) self.assertFalse(os.path.exists(os.path.join(repo_dir, 'BUILD'))) + def testRecordedInputs_differentValues(self): + platform_file = self.ScratchFile('platform.txt') + + self.ScratchFile( + 'MODULE.bazel', + [ + ( + 'platform_dependent_repo =' + ' use_repo_rule("//:platform_dependent_repo.bzl",' + ' "platform_dependent_repo")' + ), + 'platform_dependent_repo(name = "platform_dependent_repo")', + 'repo = use_repo_rule("//:repo.bzl", "repo")', + 'repo(name = "my_repo")', + ], + ) + self.ScratchFile( + 'BUILD.bazel', + [ + 'genrule(', + ' name = "show_platform",', + ' outs = ["platform.txt"],', + ' cmd = "cat $(location @my_repo//:data.txt) > $@",', + ' srcs = ["@my_repo//:data.txt"],', + ')', + ], + ) + self.ScratchFile( + 'platform_dependent_repo.bzl', + [ + 'def _platform_dependent_repo_impl(rctx):', + ' rctx.file("BUILD")', + ' print("DETERMINING PLATFORM")', + ' platform = rctx.read(rctx.path("%s"))' + % platform_file.replace('\\', '\\\\'), + ' rctx.file("data.txt", platform)', + ( + 'platform_dependent_repo =' + ' repository_rule(_platform_dependent_repo_impl)' + ), + ], + ) + self.ScratchFile( + 'repo.bzl', + [ + 'def _repo_impl(rctx):', + ' rctx.file("BUILD", "exports_files([\'data.txt\'])")', + ( + ' platform =' + ' rctx.read(Label("@platform_dependent_repo//:data.txt"))' + ), + ' print("JUST FETCHED ON " + platform)', + ' rctx.file("data.txt", platform)', + ' return rctx.repo_metadata(reproducible=True)', + 'repo = repository_rule(_repo_impl)', + ], + ) + + repo_dir = self.RepoDir('my_repo') + + # First fetch on Linux: not cached + self.ScratchFile('platform.txt', ['Linux']) + _, _, stderr = self.RunBazel(['build', '//:show_platform']) + self.assertIn('DETERMINING PLATFORM', '\n'.join(stderr)) + self.assertIn('JUST FETCHED ON Linux', '\n'.join(stderr)) + self.assertTrue(os.path.exists(os.path.join(repo_dir, 'BUILD'))) + with open(self.Path('bazel-bin/platform.txt')) as f: + self.assertEqual(f.read().strip(), 'Linux') + + # First fetch on macOS: not cached + self.ScratchFile('platform.txt', ['macOS']) + _, _, stderr = self.RunBazel(['build', '//:show_platform']) + self.assertIn('DETERMINING PLATFORM', '\n'.join(stderr)) + self.assertIn('JUST FETCHED ON macOS', '\n'.join(stderr)) + self.assertTrue(os.path.exists(os.path.join(repo_dir, 'BUILD'))) + with open(self.Path('bazel-bin/platform.txt')) as f: + self.assertEqual(f.read().strip(), 'macOS') + + # Second fetch on Linux: cached + self.ScratchFile('platform.txt', ['Linux']) + _, _, stderr = self.RunBazel(['build', '//:show_platform']) + self.assertIn('DETERMINING PLATFORM', '\n'.join(stderr)) + self.assertNotIn('JUST FETCHED', '\n'.join(stderr)) + self.assertFalse(os.path.exists(os.path.join(repo_dir, 'BUILD'))) + with open(self.Path('bazel-bin/platform.txt')) as f: + self.assertEqual(f.read().strip(), 'Linux') + + # Second fetch on macOS: cached + self.ScratchFile('platform.txt', ['macOS']) + _, _, stderr = self.RunBazel(['build', '//:show_platform']) + self.assertIn('DETERMINING PLATFORM', '\n'.join(stderr)) + self.assertNotIn('JUST FETCHED', '\n'.join(stderr)) + self.assertFalse(os.path.exists(os.path.join(repo_dir, 'BUILD'))) + with open(self.Path('bazel-bin/platform.txt')) as f: + self.assertEqual(f.read().strip(), 'macOS') + + def testRecordedInputs_differentInputs(self): + platform_file = self.ScratchFile('platform.txt') + + self.ScratchFile( + 'MODULE.bazel', + [ + ( + 'platform_dependent_binary =' + ' use_repo_rule("//:platform_dependent_binary.bzl",' + ' "platform_dependent_binary")' + ), + 'platform_dependent_binary(name = "platform_dependent_binary")', + 'repo = use_repo_rule("//:repo.bzl", "repo")', + 'repo(name = "my_repo")', + ], + ) + self.ScratchFile( + 'BUILD.bazel', + [ + 'genrule(', + ' name = "show_data",', + ' outs = ["data.txt"],', + ' cmd = "cat $(location @my_repo//:data.txt) > $@",', + ' srcs = ["@my_repo//:data.txt"],', + ')', + ], + ) + self.ScratchFile( + 'platform_dependent_binary.bzl', + [ + 'def _platform_dependent_binary_impl(rctx):', + ' rctx.file("BUILD")', + ' platform = rctx.read(rctx.path("%s")).strip()' + % platform_file.replace('\\', '\\\\'), + ' print("DETERMINED PLATFORM (%s)" % platform)', + ' if platform == "Windows":', + ' rctx.file("binary.exe", "PE")', + ' else:', + ' rctx.file("binary.sh", "ELF")', + ( + 'platform_dependent_binary =' + ' repository_rule(_platform_dependent_binary_impl)' + ), + ], + ) + self.ScratchFile( + 'repo.bzl', + [ + 'def _repo_impl(rctx):', + ' rctx.file("BUILD", "exports_files([\'data.txt\'])")', + # Simulate a uname -s by reading from a file instead of using + # rctx.execute (more complex to mock) or rctx.os.name (which may be + # tracked as an input in the future, making this test vacuous). + ' platform = rctx.read(rctx.path("%s"), watch = "no").strip()' + % platform_file.replace('\\', '\\\\'), + ' ext = ".exe" if platform == "Windows" else ".sh"', + # Simulate rctx.execute with a watched binary. + ( + ' out = rctx.read(Label("@platform_dependent_binary//:binary"' + ' + ext))' + ), + ' rctx.file("data.txt", out)', + ' print("JUST FETCHED")', + ' return rctx.repo_metadata(reproducible=True)', + 'repo = repository_rule(_repo_impl)', + ], + ) + + repo_dir = self.RepoDir('my_repo') + + # First fetch on Linux: not cached + self.ScratchFile('platform.txt', ['Linux']) + _, _, stderr = self.RunBazel(['build', '//:show_data']) + self.assertIn('DETERMINED PLATFORM (Linux)', '\n'.join(stderr)) + self.assertIn('JUST FETCHED', '\n'.join(stderr)) + self.assertTrue(os.path.exists(os.path.join(repo_dir, 'BUILD'))) + with open(self.Path('bazel-bin/data.txt')) as f: + self.assertEqual(f.read().strip(), 'ELF') + + # First fetch on Windows: not cached + self.RunBazel(['clean', '--expunge']) + self.ScratchFile('platform.txt', ['Windows']) + _, _, stderr = self.RunBazel(['build', '//:show_data']) + self.assertIn('DETERMINED PLATFORM (Windows)', '\n'.join(stderr)) + self.assertIn('JUST FETCHED', '\n'.join(stderr)) + self.assertTrue(os.path.exists(os.path.join(repo_dir, 'BUILD'))) + with open(self.Path('bazel-bin/data.txt')) as f: + self.assertEqual(f.read().strip(), 'PE') + + # Second fetch on Linux: cached + self.RunBazel(['clean', '--expunge']) + self.ScratchFile('platform.txt', ['Linux']) + _, _, stderr = self.RunBazel(['build', '//:show_data']) + self.assertIn('DETERMINED PLATFORM (Linux)', '\n'.join(stderr)) + self.assertNotIn('JUST FETCHED', '\n'.join(stderr)) + self.assertFalse(os.path.exists(os.path.join(repo_dir, 'BUILD'))) + with open(self.Path('bazel-bin/data.txt')) as f: + self.assertEqual(f.read().strip(), 'ELF') + + # Second fetch on Windows: cached + self.RunBazel(['clean', '--expunge']) + self.ScratchFile('platform.txt', ['Windows']) + _, _, stderr = self.RunBazel(['build', '//:show_data']) + self.assertIn('DETERMINED PLATFORM (Windows)', '\n'.join(stderr)) + self.assertNotIn('JUST FETCHED', '\n'.join(stderr)) + self.assertFalse(os.path.exists(os.path.join(repo_dir, 'BUILD'))) + with open(self.Path('bazel-bin/data.txt')) as f: + self.assertEqual(f.read().strip(), 'PE') + def testNoThrashingBetweenWorkspaces(self): module_bazel_lines = [ 'repo = use_repo_rule("//:repo.bzl", "repo")', @@ -810,6 +1013,72 @@ def testRun(self): repo_dir = self.RepoDir('buildozer') self.assertFalse(os.path.exists(os.path.join(repo_dir, 'MODULE.bazel'))) + def testReverseDependencyDirection(self): + # Set up two repos that retain their predeclared input hashes across two + # builds but still reverse their dependency direction. Depending on how repo + # cache candidates are checked, this could lead to a Skyframe cycle. + self.ScratchFile( + 'MODULE.bazel', + [ + 'repo = use_repo_rule("//:repo.bzl", "repo")', + 'repo(', + ' name = "foo",', + ' deps_file = "//:foo_deps.txt",', + ')', + 'repo(', + ' name = "bar",', + ' deps_file = "//:bar_deps.txt",', + ')', + ], + ) + self.ScratchFile('BUILD.bazel') + self.ScratchFile( + 'repo.bzl', + [ + 'def _repo_impl(rctx):', + ' deps = rctx.read(rctx.attr.deps_file).splitlines()', + ' output = ""', + ' for dep in deps:', + ' if dep:', + ' output += "{}: {}\\n".format(dep, rctx.read(Label(dep)))', + ' rctx.file("output.txt", output)', + ' rctx.file("BUILD", "exports_files([\'output.txt\'])")', + ' print("JUST FETCHED: %s" % rctx.original_name)', + ' return rctx.repo_metadata(reproducible=True)', + 'repo = repository_rule(', + ' implementation = _repo_impl,', + ' attrs = {', + ' "deps_file": attr.label(), }', + ')', + ], + ) + + self.ScratchFile('foo_deps.txt', ['@bar//:output.txt']) + self.ScratchFile('bar_deps.txt', ['']) + + # First fetch: not cached + _, _, stderr = self.RunBazel(['build', '@foo//:output.txt']) + stderr = '\n'.join(stderr) + self.assertIn('JUST FETCHED: bar', stderr) + self.assertIn('JUST FETCHED: foo', stderr) + + # After expunging and reversing the dependency direction: not cached + self.RunBazel(['clean', '--expunge']) + self.ScratchFile('foo_deps.txt', ['']) + self.ScratchFile('bar_deps.txt', ['@foo//:output.txt']) + _, _, stderr = self.RunBazel(['build', '@foo//:output.txt']) + stderr = '\n'.join(stderr) + self.assertIn('JUST FETCHED: foo', stderr) + self.assertNotIn('JUST FETCHED: bar', stderr) + + # After expunging and reversing the dependency direction: both cached + self.RunBazel(['clean', '--expunge']) + self.ScratchFile('foo_deps.txt', ['@bar//:output.txt']) + self.ScratchFile('bar_deps.txt', ['']) + _, _, stderr = self.RunBazel(['build', '@foo//:output.txt']) + stderr = '\n'.join(stderr) + self.assertNotIn('JUST FETCHED', stderr) + if __name__ == '__main__': absltest.main() From 916abb6186f7af30993cfb439a32d3ffa12616fd Mon Sep 17 00:00:00 2001 From: Fabian Meumertzheim Date: Wed, 24 Jun 2026 17:00:33 +0200 Subject: [PATCH 2/4] Memoize remote repo contents cache lookups across Skyframe restarts On 8.x the remote repo contents cache lookup runs in RepositoryDelegatorFunction's restart-based compute (there is no merged repository fetch worker). Looking up a repository with dynamically recorded inputs walks a DAG of action cache entries, requesting the current values of recorded inputs from Skyframe along the way. Each such request can trigger a Skyframe restart, after which the lookup re-walks the DAG from the root and re-downloads every action cache entry it already fetched, turning an O(depth) lookup into O(depth^2) remote round-trips. Memoize the action cache entries fetched during a lookup, keyed by the input hash under which they were looked up, in a SkyKeyComputeState that survives restarts. Action cache entries don't change within a command, so a memoized entry can be reused on the next restart instead of being re-downloaded. The repository's compute-state slot is already occupied by StarlarkRepositoryFunction's worker state (created by wasJustFetched before the lookup runs), and Skyframe stores a single state instance per key. The lookup therefore cannot allocate its own SkyKeyComputeState via env.getState. Instead, the memo is stored on the existing state and handed to lookupCache through a new RepositoryFunction.getRemoteRepoContentsCacheLookupState hook; the default returns a fresh, non-memoizing instance for handlers without a compute state. Follow-up to the dynamic inputs support (cherry-pick of #27634). --- .../build/lib/bazel/repository/starlark/BUILD | 1 + .../starlark/StarlarkRepositoryFunction.java | 8 +++ .../remote/RemoteRepoContentsCacheImpl.java | 65 +++++++++++++------ .../RepositoryDelegatorFunction.java | 6 +- .../rules/repository/RepositoryFunction.java | 14 ++++ .../lib/runtime/RemoteRepoContentsCache.java | 33 +++++++++- 6 files changed, 105 insertions(+), 22 deletions(-) diff --git a/src/main/java/com/google/devtools/build/lib/bazel/repository/starlark/BUILD b/src/main/java/com/google/devtools/build/lib/bazel/repository/starlark/BUILD index 6a9e7af5fb3af5..9cec5bdbcb276c 100644 --- a/src/main/java/com/google/devtools/build/lib/bazel/repository/starlark/BUILD +++ b/src/main/java/com/google/devtools/build/lib/bazel/repository/starlark/BUILD @@ -17,6 +17,7 @@ java_library( deps = [ "//src/main/java/com/google/devtools/build/docgen/annot", "//src/main/java/com/google/devtools/build/lib:runtime", + "//src/main/java/com/google/devtools/build/lib:runtime/remote_repo_contents_cache", "//src/main/java/com/google/devtools/build/lib/actions:file_metadata", "//src/main/java/com/google/devtools/build/lib/analysis:analysis_cluster", "//src/main/java/com/google/devtools/build/lib/analysis:blaze_directories", diff --git a/src/main/java/com/google/devtools/build/lib/bazel/repository/starlark/StarlarkRepositoryFunction.java b/src/main/java/com/google/devtools/build/lib/bazel/repository/starlark/StarlarkRepositoryFunction.java index c804ee6c9dc6c0..79615f7b3f0dd5 100644 --- a/src/main/java/com/google/devtools/build/lib/bazel/repository/starlark/StarlarkRepositoryFunction.java +++ b/src/main/java/com/google/devtools/build/lib/bazel/repository/starlark/StarlarkRepositoryFunction.java @@ -47,6 +47,7 @@ import com.google.devtools.build.lib.rules.repository.RepositoryFunction; import com.google.devtools.build.lib.rules.repository.WorkspaceFileHelper; import com.google.devtools.build.lib.runtime.ProcessWrapper; +import com.google.devtools.build.lib.runtime.RemoteRepoContentsCache; import com.google.devtools.build.lib.runtime.RepositoryRemoteExecutor; import com.google.devtools.build.lib.skyframe.IgnoredSubdirectoriesValue; import com.google.devtools.build.lib.skyframe.PrecomputedValue; @@ -125,6 +126,8 @@ public void reportSkyframeRestart(Environment env, RepositoryName repoName) { private static class State extends WorkerSkyKeyComputeState { @Nullable FetchResult result; + final RemoteRepoContentsCache.LookupState repoContentsCacheLookupState = + new RemoteRepoContentsCache.LookupState(); } @Override @@ -132,6 +135,11 @@ public boolean wasJustFetched(Environment env) { return env.getState(State::new).result != null; } + @Override + public RemoteRepoContentsCache.LookupState getRemoteRepoContentsCacheLookupState(Environment env) { + return env.getState(State::new).repoContentsCacheLookupState; + } + private record FetchArgs( Rule rule, Path outputDirectory, BlazeDirectories directories, Environment env, SkyKey key) { FetchArgs toWorkerArgs(Environment env) { diff --git a/src/main/java/com/google/devtools/build/lib/remote/RemoteRepoContentsCacheImpl.java b/src/main/java/com/google/devtools/build/lib/remote/RemoteRepoContentsCacheImpl.java index d37a2195bb072c..f7bb8e3bb04489 100644 --- a/src/main/java/com/google/devtools/build/lib/remote/RemoteRepoContentsCacheImpl.java +++ b/src/main/java/com/google/devtools/build/lib/remote/RemoteRepoContentsCacheImpl.java @@ -229,14 +229,22 @@ public void addToCache( } @Override + // The LookupState is only populated by this class and thus always contains CacheEntry values. + @SuppressWarnings("unchecked") public boolean lookupCache( RepositoryName repoName, Path repoDir, String predeclaredInputHash, - SkyFunction.Environment env) + SkyFunction.Environment env, + RemoteRepoContentsCache.LookupState lookupState) throws IOException, InterruptedException { try { - return doLookupCache(repoName, repoDir, predeclaredInputHash, env); + return doLookupCache( + repoName, + repoDir, + predeclaredInputHash, + env, + (RemoteRepoContentsCache.LookupState) lookupState); } catch (IOException e) { throw new IOException( "Failed to look up repo %s in the remote repo contents cache: %s" @@ -249,7 +257,8 @@ private boolean doLookupCache( RepositoryName repoName, Path repoDir, String predeclaredInputHash, - SkyFunction.Environment env) + SkyFunction.Environment env, + RemoteRepoContentsCache.LookupState lookupState) throws IOException, InterruptedException { if (!(repoDir.getFileSystem() instanceof RemoteExternalOverlayFileSystem remoteFs)) { return false; @@ -259,7 +268,7 @@ private boolean doLookupCache( if (!context.getReadCachePolicy().allowRemoteCache()) { return false; } - var finalEntry = fetchFinalCacheEntry(env, context, predeclaredInputHash); + var finalEntry = fetchFinalCacheEntry(env, lookupState, context, predeclaredInputHash); if (env.valuesMissing() || finalEntry == null) { return false; } @@ -426,7 +435,7 @@ private ListenableFuture addToActionResult( } /** Represents a single AC entry in the internal format used by the remote repo contents cache. */ - private sealed interface CacheEntry { + private sealed interface CacheEntry extends OpaqueCacheEntry { /** * A final cache entry containing the contents of a repository. * @@ -461,6 +470,7 @@ record Invalid(String reason) implements CacheEntry {} @Nullable private CacheEntry.Final fetchFinalCacheEntry( SkyFunction.Environment env, + RemoteRepoContentsCache.LookupState lookupState, RemoteActionExecutionContext context, String predeclaredInputHash) throws IOException, InterruptedException { @@ -468,7 +478,7 @@ private CacheEntry.Final fetchFinalCacheEntry( while (!currentHashes.isEmpty()) { var nextHashes = ImmutableList.builder(); for (var hash : currentHashes) { - switch (fetchCacheEntry(env, context, hash)) { + switch (fetchCacheEntry(env, lookupState, context, hash)) { case CacheEntry.Final finalEntry -> { return finalEntry; } @@ -492,8 +502,17 @@ private CacheEntry.Final fetchFinalCacheEntry( // Returns null if and only if values are missing. @Nullable private CacheEntry fetchCacheEntry( - SkyFunction.Environment env, RemoteActionExecutionContext context, String inputHash) + SkyFunction.Environment env, + RemoteRepoContentsCache.LookupState lookupState, + RemoteActionExecutionContext context, + String inputHash) throws IOException, InterruptedException { + // Cache action cache entries so that network calls don't have to be repeated on Skyframe + // restarts. + var memoized = lookupState.get(inputHash); + if (memoized != null) { + return memoized; + } var actionKey = new ActionKey(digestUtil.compute(buildAction(inputHash))); // The marker file is read right after and thus requested to be inlined. If the action result // is an intermediate node, the full result will be contained in the stdout, which should thus @@ -502,28 +521,34 @@ private CacheEntry fetchCacheEntry( cache.downloadActionResult( context, actionKey, /* inlineOutErr= */ true, ImmutableSet.of(MARKER_FILE_PATH)); if (cachedActionResult == null) { - return new CacheEntry.Intermediate(ImmutableList.of()); + return lookupState.memoize(inputHash, new CacheEntry.Intermediate(ImmutableList.of())); } var actionResult = cachedActionResult.actionResult(); if (actionResult.getExitCode() != 0) { - return new CacheEntry.Invalid( - "Unexpected exit code in action result for remotely cached repo %s:\n%s" - .formatted(context.getRequestMetadata().getActionId(), actionResult)); + return lookupState.memoize( + inputHash, + new CacheEntry.Invalid( + "Unexpected exit code in action result for remotely cached repo %s:\n%s" + .formatted(context.getRequestMetadata().getActionId(), actionResult))); } if (actionResult.getOutputFilesCount() == 1 && actionResult.getOutputDirectoriesCount() == 1 && actionResult.getOutputSymlinksCount() == 0) { - return new CacheEntry.Final( - actionResult.getOutputDirectories(0), actionResult.getOutputFiles(0)); + return lookupState.memoize( + inputHash, + new CacheEntry.Final( + actionResult.getOutputDirectories(0), actionResult.getOutputFiles(0))); } if (!(actionResult.getOutputFilesCount() == 0 && actionResult.getOutputDirectoriesCount() == 0 && actionResult.getOutputSymlinksCount() == 0 && actionResult.getStdoutDigest().getSizeBytes() > 0)) { - return new CacheEntry.Invalid( - "Unexpected intermediate action result for remotely cached repo %s:\n%s" - .formatted(context.getRequestMetadata().getActionId(), actionResult)); + return lookupState.memoize( + inputHash, + new CacheEntry.Invalid( + "Unexpected intermediate action result for remotely cached repo %s:\n%s" + .formatted(context.getRequestMetadata().getActionId(), actionResult))); } var stdoutFuture = fetchStdout(context, actionResult); waitForBulkTransfer(ImmutableList.of(stdoutFuture)); @@ -564,7 +589,7 @@ private CacheEntry fetchCacheEntry( } nextHashes.add(rollingHash); } - return new CacheEntry.Intermediate(nextHashes.build()); + return lookupState.memoize(inputHash, new CacheEntry.Intermediate(nextHashes.build())); } private String rollForwardHash(String hash, RepoRecordedInput.WithValue inputWithValue) { @@ -588,9 +613,9 @@ private ListenableFuture fetchStdout( /** * Parses a marker file written by {@code DigestWriter}: the first non-empty line is the - * predeclared input hash, followed by one serialized {@link RepoRecordedInput.WithValue} per line. - * Returns an empty {@link Optional} if the first line doesn't match the expected hash or any - * subsequent line fails to parse. + * predeclared input hash, followed by one serialized {@link RepoRecordedInput.WithValue} per + * line. Returns an empty {@link Optional} if the first line doesn't match the expected hash or + * any subsequent line fails to parse. */ private static Optional> readMarkerFile( String content, String predeclaredInputHash) { diff --git a/src/main/java/com/google/devtools/build/lib/rules/repository/RepositoryDelegatorFunction.java b/src/main/java/com/google/devtools/build/lib/rules/repository/RepositoryDelegatorFunction.java index 22fec8e4a3a0ff..748e8e1c903367 100644 --- a/src/main/java/com/google/devtools/build/lib/rules/repository/RepositoryDelegatorFunction.java +++ b/src/main/java/com/google/devtools/build/lib/rules/repository/RepositoryDelegatorFunction.java @@ -269,7 +269,11 @@ public SkyValue compute(SkyKey skyKey, Environment env) try { boolean cacheHit = remoteRepoContentsCache.lookupCache( - repositoryName, repoRoot, digestWriter.predeclaredInputHash, env); + repositoryName, + repoRoot, + digestWriter.predeclaredInputHash, + env, + handler.getRemoteRepoContentsCacheLookupState(env)); if (env.valuesMissing()) { return null; } diff --git a/src/main/java/com/google/devtools/build/lib/rules/repository/RepositoryFunction.java b/src/main/java/com/google/devtools/build/lib/rules/repository/RepositoryFunction.java index 4d6657c9178a43..f92ca18be71e6b 100644 --- a/src/main/java/com/google/devtools/build/lib/rules/repository/RepositoryFunction.java +++ b/src/main/java/com/google/devtools/build/lib/rules/repository/RepositoryFunction.java @@ -33,6 +33,7 @@ import com.google.devtools.build.lib.packages.Type; import com.google.devtools.build.lib.repository.ExternalPackageException; import com.google.devtools.build.lib.repository.RepositoryFetchProgress; +import com.google.devtools.build.lib.runtime.RemoteRepoContentsCache; import com.google.devtools.build.lib.skyframe.ActionEnvironmentFunction; import com.google.devtools.build.lib.skyframe.AlreadyReportedException; import com.google.devtools.build.lib.skyframe.PackageLookupFunction; @@ -229,6 +230,19 @@ public boolean wasJustFetched(Environment env) { return false; } + /** + * Returns the {@link RemoteRepoContentsCache.LookupState} to use for the remote repo contents + * cache lookup of the repository currently being computed. + * + *

The default returns a fresh instance on each call, which is correct but doesn't memoize + * anything across Skyframe restarts. Handlers that store a {@link + * com.google.devtools.build.skyframe.SkyFunction.Environment.SkyKeyComputeState} should override + * this to return a per-evaluation instance held in that state. + */ + public RemoteRepoContentsCache.LookupState getRemoteRepoContentsCacheLookupState(Environment env) { + return new RemoteRepoContentsCache.LookupState(); + } + /** * Verify the data provided by the marker file to check if a refetch is needed. Returns an empty * Optional if the data is up to date and no refetch is needed and an Optional with a diff --git a/src/main/java/com/google/devtools/build/lib/runtime/RemoteRepoContentsCache.java b/src/main/java/com/google/devtools/build/lib/runtime/RemoteRepoContentsCache.java index 663c2472efdc07..cbf738bd46c86b 100644 --- a/src/main/java/com/google/devtools/build/lib/runtime/RemoteRepoContentsCache.java +++ b/src/main/java/com/google/devtools/build/lib/runtime/RemoteRepoContentsCache.java @@ -19,9 +19,37 @@ import com.google.devtools.build.lib.vfs.Path; import com.google.devtools.build.skyframe.SkyFunction; import java.io.IOException; +import java.util.Map; +import java.util.concurrent.ConcurrentHashMap; /** A remote cache for the contents of external repositories. */ public interface RemoteRepoContentsCache { + /** + * An entry memoized in a {@link LookupState}. The concrete representation is an implementation + * detail of the cache, so this is an opaque marker to callers. + */ + interface OpaqueCacheEntry {} + + /** + * Per-repository scratch state for {@link #lookupCache} that is meant to be stored in the + * repository's {@link SkyFunction.Environment.SkyKeyComputeState} so that it survives Skyframe + * restarts. + */ + final class LookupState { + private final Map entries = new ConcurrentHashMap<>(); + + /** Returns the entry memoized under the given input hash, or null if there is none. */ + public T get(String inputHash) { + return entries.get(inputHash); + } + + /** Memoizes the given entry under the given input hash. */ + public T memoize(String inputHash, T entry) { + entries.put(inputHash, entry); + return entry; + } + } + /** Adds a repository that has been fetched locally to the remote cache. */ void addToCache( RepositoryName repoName, @@ -36,6 +64,8 @@ void addToCache( * *

Callers have to check {@code env.valuesMissing()} after this method returns. * + * @param lookupState scratch state surviving Skyframe restarts; pass the same instance across + * restarts of the repository's evaluation to avoid re-fetching action cache entries * @return true if there was a cache hit and the repository has been fetched into the given * directory. */ @@ -43,6 +73,7 @@ boolean lookupCache( RepositoryName repoName, Path repoDir, String predeclaredInputHash, - SkyFunction.Environment env) + SkyFunction.Environment env, + LookupState lookupState) throws IOException, InterruptedException; } From 85afd28bd654d5d6efad6f2d87207229b20086d7 Mon Sep 17 00:00:00 2001 From: Fabian Meumertzheim Date: Fri, 26 Jun 2026 13:21:55 +0200 Subject: [PATCH 3/4] Include `rctx.os.{name,arch}` in the pre declared inputs hash This info is accessible via `rctx.os.{name,arch}` and can also influence the result of a repo rule in subtle ways (e.g. behavior of host tools, line breaks, etc), so it is now mixed into the predeclared inputs hash used as the key for the local and remote repo contents cache. 8.x adaptation: `DigestWriter` is a nested class of `RepositoryDelegatorFunction` rather than a standalone target, and the predeclared inputs hash is computed in `computePredeclaredInputHash` from the serialized `Rule` instead of a `RepoDefinition`. The host OS name and CPU architecture are therefore mixed into that fingerprint right before the environment variable inputs. Closes #29148. PiperOrigin-RevId: 893352625 Change-Id: I02c0ded7ffe2b5aa9bc2ef489dc5240b5716ebdf (cherry picked from commit 2ec24b41c4c28c677349cfca027e82707278c04f) --- .../lib/rules/repository/RepositoryDelegatorFunction.java | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/main/java/com/google/devtools/build/lib/rules/repository/RepositoryDelegatorFunction.java b/src/main/java/com/google/devtools/build/lib/rules/repository/RepositoryDelegatorFunction.java index 748e8e1c903367..09039ff3ed7e35 100644 --- a/src/main/java/com/google/devtools/build/lib/rules/repository/RepositoryDelegatorFunction.java +++ b/src/main/java/com/google/devtools/build/lib/rules/repository/RepositoryDelegatorFunction.java @@ -15,6 +15,7 @@ package com.google.devtools.build.lib.rules.repository; +import static com.google.common.base.StandardSystemProperty.OS_NAME; import static com.google.devtools.build.lib.skyframe.RepositoryMappingFunction.REPOSITORY_OVERRIDES; import static java.nio.charset.StandardCharsets.ISO_8859_1; @@ -63,6 +64,7 @@ import com.google.devtools.build.skyframe.SkyKey; import com.google.devtools.build.skyframe.SkyValue; import java.io.IOException; +import java.util.Locale; import java.util.Map; import java.util.Optional; import java.util.LinkedHashMap; @@ -862,6 +864,11 @@ static String computePredeclaredInputHash( .addBytes(RuleFormatter.serializeRule(rule).build().toByteArray()) .addInt(MARKER_FILE_VERSION) .addBytes(BuildLanguageOptions.stableFingerprint(starlarkSemantics)) + // This info is accessible via rctx.os.{name,arch} and can also influence the + // result of a repo rule in subtle ways (e.g. behavior of host tools, line breaks, + // etc). + .addString(OS_NAME.value().toLowerCase(Locale.ROOT)) + .addString(System.getProperty("os.arch").toLowerCase(Locale.ROOT)) .addInt(environ.size()); environ.forEach( (key, value) -> fp.addString(key.toString()).addNullableString(value.orElse(null))); From 6d1239f0fb51d7c187fc36c9255fca3ccf6f4561 Mon Sep 17 00:00:00 2001 From: Fabian Meumertzheim Date: Thu, 2 Apr 2026 01:33:16 -0700 Subject: [PATCH 4/4] Materialize RRCC symlinks via prefetching When using the remote repo contents cache, repo materialization now uses the same code path as local action input prefetching for symlinks. This ensures that each symlink is only materialized once and that there are no races in case symlinking is not atomic (on Windows with default settings). Along the way, remove unreachable code from `UploadManifest`. 8.x adaptation: `AbstractActionInputPrefetcher` on 8.x resolves input paths via the Bazel 8.x-specific `resolveExecPath(execPath)` helper (which avoids evaluating the WORKSPACE-named exec root when possible) and exposes the exec root through the `execRoot()` method rather than an `execRoot` field, so the new symlink branch is expressed in those terms. Because `execRoot()` is not available during external repository materialization, the branch detects external repo paths via a new `isUnderExternalRepoRoot` helper (mirroring `DirectoryTracker.setWritable`) instead of `!inputPath.startsWith(execRoot())`, and reads the symlink target from `UnresolvedSymlinkArtifactValue#getSymlinkTarget` (8.x has no `FileArtifactValue#getUnresolvedSymlinkTarget`). Only the `FileStateType` import is added; the other imports touched upstream don't apply because 8.x resolves `VirtualActionInput` to `actions.cache.VirtualActionInput`. The three symlink-materialization tests added upstream are deferred to the chains-of-symlinks backport (#29767): on 8.x, repo files are materialized lazily through `RemoteExternalOverlayFileSystem`, and faithfully reproducing (chains of) symlinks on disk only works with that later change, not with this commit's single-link prefetching alone. Fixes #28575. Closes #28683. PiperOrigin-RevId: 893359962 Change-Id: If0eb64e41858a4b9298c2609d83160b0ecbfd451 (cherry picked from commit 90eb56ec1dc70e38192b8c93bca236d0dcebcf6e) --- .../remote/AbstractActionInputPrefetcher.java | 37 ++++++++++++++++++- .../RemoteExternalOverlayFileSystem.java | 16 ++++---- .../build/lib/remote/UploadManifest.java | 6 --- 3 files changed, 43 insertions(+), 16 deletions(-) diff --git a/src/main/java/com/google/devtools/build/lib/remote/AbstractActionInputPrefetcher.java b/src/main/java/com/google/devtools/build/lib/remote/AbstractActionInputPrefetcher.java index 638f9d7a561efc..4b4da15a635bd9 100644 --- a/src/main/java/com/google/devtools/build/lib/remote/AbstractActionInputPrefetcher.java +++ b/src/main/java/com/google/devtools/build/lib/remote/AbstractActionInputPrefetcher.java @@ -45,6 +45,7 @@ import com.google.devtools.build.lib.actions.FileArtifactValue.RemoteFileArtifactValue; import com.google.devtools.build.lib.actions.FileArtifactValue.RemoteFileArtifactValueWithMaterializationData; import com.google.devtools.build.lib.actions.FileContentsProxy; +import com.google.devtools.build.lib.actions.FileStateType; import com.google.devtools.build.lib.actions.cache.OutputMetadataStore; import com.google.devtools.build.lib.actions.cache.VirtualActionInput; import com.google.devtools.build.lib.cmdline.LabelConstants; @@ -247,6 +248,17 @@ protected Path execRoot() { return execRootSupplier.get(); } + /** + * Returns whether the given path lives under the external repository root. Unlike {@link + * #execRoot()}, this can be evaluated during external repository materialization, where the exec + * root isn't available yet. + */ + private boolean isUnderExternalRepoRoot(Path path) { + return path.asFragment() + .startsWith( + outputBase.getRelative(LabelConstants.EXTERNAL_REPOSITORY_LOCATION).asFragment()); + } + /** * Resolves an exec path to an absolute path, avoiding evaluation of execRoot() if possible as it * isn't available during server startup. This logic is unique to Bazel 8.x as it still names the @@ -412,7 +424,19 @@ private ListenableFuture prefetchFile( PathFragment execPath = input.getExecPath(); FileArtifactValue metadata = metadataSupplier.getMetadata(input); - if (metadata == null || !canDownloadFile(resolveExecPath(execPath), metadata)) { + if (metadata == null) { + return immediateVoidFuture(); + } + Path inputPath = resolveExecPath(execPath); + if (metadata.getType() == FileStateType.SYMLINK && isUnderExternalRepoRoot(inputPath)) { + return toListenableFuture( + plantUnresolvedSymlink( + inputPath.forHostFileSystem(), + PathFragment.create( + ((FileArtifactValue.UnresolvedSymlinkArtifactValue) metadata) + .getSymlinkTarget()))); + } + if (!canDownloadFile(inputPath, metadata)) { return immediateVoidFuture(); } @@ -752,6 +776,17 @@ private Completable plantSymlink(Symlink symlink) { forceRefetch(resolveExecPath(symlink.linkExecPath()))); } + private Completable plantUnresolvedSymlink(Path linkPath, PathFragment target) { + return downloadCache.executeIfNot( + linkPath, + Completable.defer( + () -> { + linkPath.delete(); + linkPath.createSymbolicLink(target); + return Completable.complete(); + })); + } + public ImmutableSet downloadedFiles() { return downloadCache.getFinishedTasks(); } diff --git a/src/main/java/com/google/devtools/build/lib/remote/RemoteExternalOverlayFileSystem.java b/src/main/java/com/google/devtools/build/lib/remote/RemoteExternalOverlayFileSystem.java index 049c3411033744..3f2045fd958232 100644 --- a/src/main/java/com/google/devtools/build/lib/remote/RemoteExternalOverlayFileSystem.java +++ b/src/main/java/com/google/devtools/build/lib/remote/RemoteExternalOverlayFileSystem.java @@ -315,12 +315,8 @@ private void doMaterialize(RepositoryName repo, ExtendedEventHandler reporter) } prefetch(walkResult.files()); // Create symlinks last as some platforms don't allow creating a symlink to a non-existent - // target. A symlink may have already been created as an input to an action. - for (var remoteSymlink : walkResult.symlinks()) { - var nativeSymlink = nativeFs.getPath(remoteSymlink); - FileSystemUtils.ensureSymbolicLink( - nativeSymlink, externalFs.getPath(remoteSymlink).readSymbolicLink()); - } + // target. + prefetch(walkResult.symlinks()); // After the repo has been copied, atomically materialize the marker file. This ensures that the // repo doesn't have to be refetched after the next server restart. @@ -656,9 +652,11 @@ private RemoteActionExecutionContext makeRemoteContext(PathFragment relativePath } private FileArtifactValue getMetadata(PathFragment path) throws IOException { - var info = - (RemoteActionFileSystem.RemoteInMemoryFileInfo) stat(path, /* followSymlinks= */ true); - return info.getMetadata(); + var status = stat(path, /* followSymlinks= */ false); + if (!status.isSymbolicLink()) { + return ((RemoteActionFileSystem.RemoteInMemoryFileInfo) status).getMetadata(); + } + return FileArtifactValue.createForUnresolvedSymlink(externalFs.getPath(path)); } @Override diff --git a/src/main/java/com/google/devtools/build/lib/remote/UploadManifest.java b/src/main/java/com/google/devtools/build/lib/remote/UploadManifest.java index 65e12131f5a71f..fe05404622593c 100644 --- a/src/main/java/com/google/devtools/build/lib/remote/UploadManifest.java +++ b/src/main/java/com/google/devtools/build/lib/remote/UploadManifest.java @@ -274,9 +274,6 @@ void addFiles(Collection files) throws ExecException, IOException, Interru addFile(digestUtil.compute(file, statFollow), file, statNoFollow); } else { // Symlink to file uploaded as a symlink. - if (target.isAbsolute()) { - checkAbsoluteSymlinkAllowed(file, target); - } addFileSymbolicLink(file, target); } continue; @@ -287,9 +284,6 @@ void addFiles(Collection files) throws ExecException, IOException, Interru addDirectory(file); } else { // Symlink to directory uploaded as a symlink. - if (target.isAbsolute()) { - checkAbsoluteSymlinkAllowed(file, target); - } addDirectorySymbolicLink(file, target); } continue;