Skip to content

Commit

Permalink
Optimize RepoMappingManifestAction
Browse files Browse the repository at this point in the history
The following optimizations reduce the time spent writing the repo mapping manifest from >6s to <80ms for a test referencing each of ~4,000 repos created by a module extension:

* The set of repository names appearing in runfiles paths is only constructed once rather than for each repo by moving the `build()` call out of a closure.
* The relevant entries per repository are now sorted after filtering out those for repos that don't contribute runfiles.
* Crucially, the relevant mapping entries per repository are now cached per instance of `RepositoryMapping#entries()`. Since extension repos all share the same instance due to interning, this reduces the complexity from quadratic to linear in the number of extension repos.

Closes bazelbuild#20091.

PiperOrigin-RevId: 581128978
Change-Id: I946e7788b8538e84714cf25ece89a86edd0d6948
  • Loading branch information
fmeum authored and copybara-github committed Nov 10, 2023
1 parent 19c73bc commit 305ab3b
Show file tree
Hide file tree
Showing 2 changed files with 83 additions and 46 deletions.
1 change: 1 addition & 0 deletions src/main/java/com/google/devtools/build/lib/analysis/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -985,6 +985,7 @@ java_library(
"//src/main/java/com/google/devtools/build/lib/packages",
"//src/main/java/com/google/devtools/build/lib/util",
"//src/main/java/net/starlark/java/eval",
"//third_party:caffeine",
"//third_party:guava",
"//third_party:jsr305",
],
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,16 @@
// limitations under the License.
package com.google.devtools.build.lib.analysis;

import static com.google.common.collect.ImmutableList.toImmutableList;
import static com.google.common.collect.ImmutableSortedMap.toImmutableSortedMap;
import static java.nio.charset.StandardCharsets.ISO_8859_1;
import static java.nio.charset.StandardCharsets.UTF_8;
import static java.util.Comparator.comparing;

import com.github.benmanes.caffeine.cache.Caffeine;
import com.github.benmanes.caffeine.cache.LoadingCache;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.ImmutableSortedMap;
import com.google.devtools.build.lib.actions.ActionExecutionContext;
Expand All @@ -41,6 +46,7 @@
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.PrintWriter;
import java.util.IdentityHashMap;
import java.util.Map.Entry;
import java.util.UUID;
import javax.annotation.Nullable;
Expand All @@ -52,19 +58,28 @@ public final class RepoMappingManifestAction extends AbstractFileWriteAction

private static final UUID MY_UUID = UUID.fromString("458e351c-4d30-433d-b927-da6cddd4737f");

private static final LoadingCache<ImmutableMap<String, RepositoryName>, String>
repoMappingFingerprintCache =
Caffeine.newBuilder()
.weakKeys()
.build(
repoMapping -> {
Fingerprint fp = new Fingerprint();
fp.addInt(repoMapping.size());
repoMapping.forEach(
(apparentName, canonicalName) -> {
fp.addString(apparentName);
fp.addString(canonicalName.getName());
});
return fp.hexDigestAndReset();
});

// Uses MapFn's args parameter just like Fingerprint#addString to compute a cacheable fingerprint
// of just the repo name and mapping of a given Package.
private static final MapFn<Package> REPO_AND_MAPPING_DIGEST_FN =
(pkg, args) -> {
args.accept(pkg.getPackageIdentifier().getRepository().getName());

var mapping = pkg.getRepositoryMapping().entries();
args.accept(String.valueOf(mapping.size()));
mapping.forEach(
(apparentName, canonicalName) -> {
args.accept(apparentName);
args.accept(canonicalName.getName());
});
args.accept(repoMappingFingerprintCache.get(pkg.getRepositoryMapping().entries()));
};

private static final MapFn<Artifact> OWNER_REPO_FN =
Expand Down Expand Up @@ -144,67 +159,88 @@ public DeterministicWriter newDeterministicWriter() {
return out -> {
PrintWriter writer = new PrintWriter(out, /* autoFlush= */ false, ISO_8859_1);

var reposInRunfilePaths = ImmutableSet.<String>builder();

var reposInRunfilesPathsBuilder = ImmutableSet.<String>builder();
// The runfiles paths of symlinks are always prefixed with the main workspace name, *not* the
// name of the repository adding the symlink.
if (hasRunfilesSymlinks) {
reposInRunfilePaths.add(RepositoryName.MAIN.getName());
reposInRunfilesPathsBuilder.add(RepositoryName.MAIN.getName());
}

// Since root symlinks are the only way to stage a runfile at a specific path under the
// current repository's runfiles directory, recognize canonical repository names that appear
// as the first segment of their runfiles paths.
for (SymlinkEntry symlink : runfilesRootSymlinks.toList()) {
reposInRunfilePaths.add(symlink.getPath().getSegment(0));
reposInRunfilesPathsBuilder.add(symlink.getPath().getSegment(0));
}

for (Artifact artifact : runfilesArtifacts.toList()) {
Label owner = artifact.getOwner();
if (owner != null) {
reposInRunfilePaths.add(owner.getRepository().getName());
reposInRunfilesPathsBuilder.add(owner.getRepository().getName());
}
}

transitivePackages.toList().stream()
.collect(
toImmutableSortedMap(
comparing(RepositoryName::getName),
pkg -> pkg.getPackageIdentifier().getRepository(),
Package::getRepositoryMapping,
// All packages in a given repository have the same repository mapping, so the
// particular way of resolving duplicates does not matter.
(first, second) -> first))
.forEach(
(repoName, mapping) ->
writeRepoMapping(writer, reposInRunfilePaths.build(), repoName, mapping));
var reposInRunfilesPaths = reposInRunfilesPathsBuilder.build();

ImmutableSortedMap<RepositoryName, RepositoryMapping> sortedRepoMappings =
transitivePackages.toList().stream()
.collect(
toImmutableSortedMap(
comparing(RepositoryName::getName),
pkg -> pkg.getPackageIdentifier().getRepository(),
Package::getRepositoryMapping,
// All packages in a given repository have the same repository mapping, so the
// particular way of resolving duplicates does not matter.
(first, second) -> first));
// All repositories generated by a module extension have the same Map instance as the entries
// of their RepositoryMapping, with every repo appearing as an entry. If a module extension
// generates N repos and all of them are in transitivePackages, iterating over the packages
// and then over each mapping's entries would thus require time quadratic in N. We prevent
// this by caching the relevant (target apparent name, target canonical name) pairs per entry
// map instance.
IdentityHashMap<
ImmutableMap<String, RepositoryName>, ImmutableList<Entry<String, RepositoryName>>>
cachedRelevantEntries = new IdentityHashMap<>();
for (var repoAndMapping : sortedRepoMappings.entrySet()) {
cachedRelevantEntries
.computeIfAbsent(
repoAndMapping.getValue().entries(),
entries -> computeRelevantEntries(reposInRunfilesPaths, entries))
.forEach(
mappingEntry -> {
// The canonical name of the main repo is the empty string, which is not a valid
// name for a directory, so the "workspace name" is used the name of the directory
// under the runfiles tree for it.
String targetRepoDirectoryName =
mappingEntry.getValue().isMain()
? workspaceName
: mappingEntry.getValue().getName();
writer.format(
"%s,%s,%s\n",
repoAndMapping.getKey().getName(),
mappingEntry.getKey(),
targetRepoDirectoryName);
});
}
writer.flush();
};
}

private void writeRepoMapping(
PrintWriter writer,
private ImmutableList<Entry<String, RepositoryName>> computeRelevantEntries(
ImmutableSet<String> reposInRunfilesPaths,
RepositoryName repoName,
RepositoryMapping repoMapping) {
for (Entry<String, RepositoryName> mappingEntry :
ImmutableSortedMap.copyOf(repoMapping.entries()).entrySet()) {
if (mappingEntry.getKey().isEmpty()) {
ImmutableMap<String, RepositoryName> mappingEntries) {
// TODO: If this becomes a hotspot, consider iterating over reposInRunfilesPaths and looking
// up the apparent name in the inverse of mappingEntries, which ensures that the runtime is
// always linear in the number of entries ultimately emitted into the manifest and independent
// of the size of the individual mappings. This requires making RepositoryMapping#entries() an
// ImmutableBiMap, or even ImmutableMultimap since repositories can have multiple apparent
// names.
return mappingEntries.entrySet().stream()
// The apparent repo name can only be empty for the main repo. We skip this line as
// Rlocation paths can't reference an empty apparent name anyway.
continue;
}
if (!reposInRunfilesPaths.contains(mappingEntry.getValue().getName())) {
.filter(mappingEntry -> !mappingEntry.getKey().isEmpty())
// We only write entries for repos whose canonical names appear in runfiles paths.
continue;
}
// The canonical name of the main repo is the empty string, which is not a valid name for a
// directory, so the "workspace name" is used the name of the directory under the runfiles
// tree for it.
String targetRepoDirectoryName =
mappingEntry.getValue().isMain() ? workspaceName : mappingEntry.getValue().getName();
writer.format(
"%s,%s,%s\n", repoName.getName(), mappingEntry.getKey(), targetRepoDirectoryName);
}
.filter(entry -> reposInRunfilesPaths.contains(entry.getValue().getName()))
.sorted(Entry.comparingByKey())
.collect(toImmutableList());
}
}

0 comments on commit 305ab3b

Please sign in to comment.