diff --git a/src/main/java/com/google/devtools/build/lib/bazel/repository/cache/RepoContentsCache.java b/src/main/java/com/google/devtools/build/lib/bazel/repository/cache/RepoContentsCache.java index 2f72a2cf9ae6e5..d7faafe70bb9fb 100644 --- a/src/main/java/com/google/devtools/build/lib/bazel/repository/cache/RepoContentsCache.java +++ b/src/main/java/com/google/devtools/build/lib/bazel/repository/cache/RepoContentsCache.java @@ -31,10 +31,10 @@ import com.google.devtools.build.lib.vfs.Path; import com.google.devtools.build.lib.vfs.Symlinks; import java.io.IOException; -import java.nio.charset.StandardCharsets; import java.time.Duration; import java.time.Instant; import java.util.Comparator; +import java.util.HashMap; import java.util.HashSet; import java.util.UUID; import javax.annotation.Nullable; @@ -47,12 +47,9 @@ * transitive bzl digest, repo attrs, starlark semantics, etc). Each distinct predeclared inputs * hash is its own entry directory in the first layer. * - *

Inside each entry directory are pairs of directories and files {@code } - * where {@code N} is an integer. The file {@code N.recorded_inputs} contains the recorded inputs - * and their values of a cached repo, and the directory {@code N} contains the cached repo contents. - * There is also a file named {@code counter} that stores the next available {@code N} for this - * entry directory, and a file named {@code lock} to ensure exclusive access to the {@code counter} - * file. + *

Inside each entry directory are pairs of directories and files {@code }. The file {@code UUID.recorded_inputs} contains the recorded inputs and + * their values of a cached repo, and the directory {@code UUID} contains the cached repo contents. * *

On a cache hit (that is, the predeclared inputs hash matches, and recorded inputs are * up-to-date), the recorded inputs file has its mtime updated. Cached repos whose recorded inputs @@ -124,17 +121,19 @@ public ImmutableList getCandidateRepos(String predeclaredInputHas Preconditions.checkState(path != null); Path entryDir = path.getRelative(predeclaredInputHash); try { + // Prefer more recently used cache entries over older ones. They're more likely to be + // up-to-date; plus, if a repo is force-fetched, we want to use the new repo instead of always + // being stuck with the old one. Since the inputs file is touched on use, we can just sort by + // mtime. This is slightly more complex than in runGc below as the files may be touched + // concurrently and we need to ensure that the equality relation is consistent. + var mtimes = new HashMap(); return entryDir.getDirectoryEntries().stream() .filter(path -> path.getBaseName().endsWith(RECORDED_INPUTS_SUFFIX)) - // Prefer newer cache entries over older ones. They're more likely to be up-to-date; plus, - // if a repo is force-fetched, we want to use the new repo instead of always being stuck - // with the old one. - // To "prefer newer cache entries", we sort the entry file names by length DESC and then - // lexicographically DESC. This approximates sorting by converting to int and then DESC, - // but is defensive against non-numerically named entries. .sorted( - Comparator.comparing((Path path) -> path.getBaseName().length()) - .thenComparing(Path::getBaseName) + Comparator.comparingLong( + (Path path) -> + mtimes.computeIfAbsent( + path, RepoContentsCache::getLastModifiedTimeOrZero)) .reversed()) .map(CandidateRepo::fromRecordedInputsFile) .collect(toImmutableList()); @@ -163,9 +162,9 @@ public Path moveToCache( Preconditions.checkState(path != null); Path entryDir = path.getRelative(predeclaredInputHash); - String counter = getNextCounterInDir(entryDir); - Path cacheRecordedInputsFile = entryDir.getChild(counter + RECORDED_INPUTS_SUFFIX); - Path cacheRepoDir = entryDir.getChild(counter); + String uniqueEntryName = UUID.randomUUID().toString(); + Path cacheRecordedInputsFile = entryDir.getChild(uniqueEntryName + RECORDED_INPUTS_SUFFIX); + Path cacheRepoDir = entryDir.getChild(uniqueEntryName); cacheRepoDir.deleteTree(); cacheRepoDir.getParentDirectory().createDirectoryAndParents(); @@ -187,27 +186,6 @@ public Path moveToCache( return cacheRepoDir; } - private static String getNextCounterInDir(Path entryDir) - throws IOException, InterruptedException { - Path counterFile = entryDir.getRelative("counter"); - // This use of FileSystemLock.get is safe since the predeclared input hash is part of entryDir's - // path and in particular includes the canonical repository name. This ensures that the same - // lock file will not be acquired concurrently by multiple threads, which isn't supported. - try (var lock = FileSystemLock.get(entryDir.getRelative("lock"), LockMode.EXCLUSIVE)) { - int c = 0; - if (counterFile.exists()) { - try { - c = Integer.parseInt(FileSystemUtils.readContent(counterFile, StandardCharsets.UTF_8)); - } catch (NumberFormatException e) { - // ignored - } - } - String counter = Integer.toString(c + 1); - FileSystemUtils.writeContent(counterFile, StandardCharsets.UTF_8, counter); - return counter; - } - } - public void acquireSharedLock() throws IOException, InterruptedException { Preconditions.checkState(path != null); Preconditions.checkState(sharedLock == null, "this process already has the shared lock"); @@ -273,18 +251,7 @@ private void runGc(Duration maxAge) throws InterruptedException, IOException { var recordedInputsFiles = path.getChild(dirent.getName()).getDirectoryEntries().stream() .filter(file -> file.getBaseName().endsWith(RECORDED_INPUTS_SUFFIX)) - .sorted( - comparingLong( - (Path path) -> { - try { - return path.getLastModifiedTime(); - } catch (IOException e) { - // If we can't read the mtime from the entry, it's broken and treated - // as outdated. - return 0; - } - }) - .reversed()) + .sorted(comparingLong(RepoContentsCache::getLastModifiedTimeOrZero).reversed()) .collect(toImmutableList()); var seen = new HashSet(); for (Path recordedInputsFile : recordedInputsFiles) { @@ -305,4 +272,13 @@ private void runGc(Duration maxAge) throws InterruptedException, IOException { } } } + + private static long getLastModifiedTimeOrZero(Path path) { + try { + return path.getLastModifiedTime(); + } catch (IOException e) { + // If we can't read the mtime from the entry, it's broken and treated as outdated. + return 0L; + } + } }