Skip to content

Commit

Permalink
Introduce options for remote cache key scrubbing.
Browse files Browse the repository at this point in the history
This PR introduces a set of --experimental_remote_scrub_* flags that can be
used to scrub platform-dependent data from the key used to retrieve and store
action results from a remote or disk cache, so that actions executing on
different platforms but targeting the same platform may be able to share
cache entries.

This is a simplified implementation of one of the ideas described in [1],
highly influenced by Olivier Notteghem's earlier proposal [2], intended to
provide a simple yet flexible API to enable further experimentation. It must
be used with care, as incorrect settings can compromise build correctness.

[1] https://docs.google.com/document/d/1uMPj2s0TlHSIKSngqOkWJoeqOtKzaxQLtBrRfYif3Lo/edit?usp=sharing
[2] bazelbuild#18669
  • Loading branch information
tjgq committed Sep 14, 2023
1 parent 3d318f1 commit 41d01db
Show file tree
Hide file tree
Showing 13 changed files with 839 additions and 19 deletions.
14 changes: 14 additions & 0 deletions src/main/java/com/google/devtools/build/lib/remote/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ java_library(
"RemoteOutputChecker.java",
"AbstractActionInputPrefetcher.java",
"LeaseService.java",
"Scrubber.java",
],
),
exports = [
Expand All @@ -51,6 +52,7 @@ java_library(
":abstract_action_input_prefetcher",
":lease_service",
":remote_output_checker",
":scrubber",
"//src/main/java/com/google/devtools/build/lib:build-request-options",
"//src/main/java/com/google/devtools/build/lib:runtime",
"//src/main/java/com/google/devtools/build/lib:runtime/command_line_path_factory",
Expand Down Expand Up @@ -249,3 +251,15 @@ java_library(
"//third_party:jsr305",
],
)

java_library(
name = "scrubber",
srcs = ["Scrubber.java"],
deps = [
"//src/main/java/com/google/devtools/build/lib/actions",
"//src/main/java/com/google/devtools/build/lib/actions:artifacts",
"//src/main/java/com/google/devtools/build/lib/remote/options",
"//src/main/java/com/google/devtools/common/options:options_internal",
"//third_party:guava",
],
)
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,7 @@
import com.google.devtools.build.lib.remote.RemoteExecutionService.ActionResultMetadata.DirectoryMetadata;
import com.google.devtools.build.lib.remote.RemoteExecutionService.ActionResultMetadata.FileMetadata;
import com.google.devtools.build.lib.remote.RemoteExecutionService.ActionResultMetadata.SymlinkMetadata;
import com.google.devtools.build.lib.remote.Scrubber.SpawnScrubber;
import com.google.devtools.build.lib.remote.common.BulkTransferException;
import com.google.devtools.build.lib.remote.common.OperationObserver;
import com.google.devtools.build.lib.remote.common.OutputDigestMismatchException;
Expand Down Expand Up @@ -179,6 +180,8 @@ public class RemoteExecutionService {

@Nullable private final RemoteOutputChecker remoteOutputChecker;

private final Scrubber scrubber;

public RemoteExecutionService(
Executor executor,
Reporter reporter,
Expand Down Expand Up @@ -210,6 +213,7 @@ public RemoteExecutionService(
if (remoteOptions.remoteMerkleTreeCacheSize != 0) {
merkleTreeCacheBuilder.maximumSize(remoteOptions.remoteMerkleTreeCacheSize);
}
this.scrubber = Scrubber.forOptions(remoteOptions);
this.merkleTreeCache = merkleTreeCacheBuilder.build();

this.tempPathGenerator = tempPathGenerator;
Expand All @@ -219,12 +223,13 @@ public RemoteExecutionService(
this.remoteOutputChecker = remoteOutputChecker;
}

static Command buildCommand(
private Command buildCommand(
Collection<? extends ActionInput> outputs,
List<String> arguments,
ImmutableMap<String, String> env,
@Nullable Platform platform,
RemotePathResolver remotePathResolver) {
RemotePathResolver remotePathResolver,
@Nullable SpawnScrubber spawnScrubber) {
Command.Builder command = Command.newBuilder();
ArrayList<String> outputFiles = new ArrayList<>();
ArrayList<String> outputDirectories = new ArrayList<>();
Expand All @@ -249,6 +254,9 @@ static Command buildCommand(
command.setPlatform(platform);
}
for (String arg : arguments) {
if (spawnScrubber != null) {
arg = spawnScrubber.transformArgument(arg);
}
command.addArguments(decodeBytestringUtf8(arg));
}
// Sorting the environment pairs by variable name.
Expand Down Expand Up @@ -349,15 +357,16 @@ private SortedMap<PathFragment, ActionInput> buildOutputDirMap(Spawn spawn) {
}

private MerkleTree buildInputMerkleTree(
Spawn spawn, SpawnExecutionContext context, ToolSignature toolSignature)
Spawn spawn, SpawnExecutionContext context, ToolSignature toolSignature,
@Nullable SpawnScrubber spawnScrubber)
throws IOException, ForbiddenActionInputException {
// Add output directories to inputs so that they are created as empty directories by the
// executor. The spec only requires the executor to create the parent directory of an output
// directory, which differs from the behavior of both local and sandboxed execution.
SortedMap<PathFragment, ActionInput> outputDirMap = buildOutputDirMap(spawn);
boolean useMerkleTreeCache = remoteOptions.remoteMerkleTreeCache;
if (toolSignature != null) {
// Marking tool files is not yet supported in conjunction with the merkle tree cache.
if (toolSignature != null || spawnScrubber != null) {
// The Merkle tree cache is not yet compatible with scrubbing or marking tool files.
useMerkleTreeCache = false;
}
if (useMerkleTreeCache) {
Expand All @@ -369,7 +378,8 @@ private MerkleTree buildInputMerkleTree(
(Object nodeKey, InputWalker walker) -> {
subMerkleTrees.add(
buildMerkleTreeVisitor(
nodeKey, walker, inputMetadataProvider, context.getPathResolver()));
nodeKey, walker, inputMetadataProvider, context.getPathResolver(),
spawnScrubber));
});
if (!outputDirMap.isEmpty()) {
subMerkleTrees.add(
Expand All @@ -378,6 +388,7 @@ private MerkleTree buildInputMerkleTree(
inputMetadataProvider,
execRoot,
context.getPathResolver(),
/* scrubber= */ null,
digestUtil));
}
return MerkleTree.merge(subMerkleTrees, digestUtil);
Expand All @@ -399,6 +410,7 @@ private MerkleTree buildInputMerkleTree(
context.getInputMetadataProvider(),
execRoot,
context.getPathResolver(),
spawnScrubber,
digestUtil);
}
}
Expand All @@ -407,7 +419,8 @@ private MerkleTree buildMerkleTreeVisitor(
Object nodeKey,
InputWalker walker,
InputMetadataProvider inputMetadataProvider,
ArtifactPathResolver artifactPathResolver)
ArtifactPathResolver artifactPathResolver,
@Nullable SpawnScrubber spawnScrubber)
throws IOException, ForbiddenActionInputException {
// Deduplicate concurrent computations for the same node. It's not possible to use
// MerkleTreeCache#get(key, loader) because the loading computation may cause other nodes to be
Expand All @@ -419,7 +432,8 @@ private MerkleTree buildMerkleTreeVisitor(
// No preexisting cache entry, so we must do the computation ourselves.
try {
freshFuture.complete(
uncachedBuildMerkleTreeVisitor(walker, inputMetadataProvider, artifactPathResolver));
uncachedBuildMerkleTreeVisitor(walker, inputMetadataProvider, artifactPathResolver,
spawnScrubber));
} catch (Exception e) {
freshFuture.completeExceptionally(e);
}
Expand All @@ -443,7 +457,8 @@ private MerkleTree buildMerkleTreeVisitor(
public MerkleTree uncachedBuildMerkleTreeVisitor(
InputWalker walker,
InputMetadataProvider inputMetadataProvider,
ArtifactPathResolver artifactPathResolver)
ArtifactPathResolver artifactPathResolver,
@Nullable SpawnScrubber scrubber)
throws IOException, ForbiddenActionInputException {
ConcurrentLinkedQueue<MerkleTree> subMerkleTrees = new ConcurrentLinkedQueue<>();
subMerkleTrees.add(
Expand All @@ -452,18 +467,19 @@ public MerkleTree uncachedBuildMerkleTreeVisitor(
inputMetadataProvider,
execRoot,
artifactPathResolver,
scrubber,
digestUtil));
walker.visitNonLeaves(
(Object subNodeKey, InputWalker subWalker) -> {
subMerkleTrees.add(
buildMerkleTreeVisitor(
subNodeKey, subWalker, inputMetadataProvider, artifactPathResolver));
subNodeKey, subWalker, inputMetadataProvider, artifactPathResolver, scrubber));
});
return MerkleTree.merge(subMerkleTrees, digestUtil);
}

@Nullable
private static ByteString buildSalt(Spawn spawn) {
private static ByteString buildSalt(Spawn spawn, @Nullable SpawnScrubber spawnScrubber) {
CacheSalt.Builder saltBuilder =
CacheSalt.newBuilder().setMayBeExecutedRemotely(Spawns.mayBeExecutedRemotely(spawn));

Expand All @@ -473,6 +489,12 @@ private static ByteString buildSalt(Spawn spawn) {
saltBuilder.setWorkspace(workspace);
}

if (spawnScrubber != null) {
saltBuilder.setScrubSalt(
CacheSalt.ScrubSalt.newBuilder().setIsScrubbed(true).setSalt(spawnScrubber.getSalt())
.build());
}

return saltBuilder.build().toByteString();
}

Expand Down Expand Up @@ -508,7 +530,9 @@ public RemoteAction buildRemoteAction(Spawn spawn, SpawnExecutionContext context
remoteActionBuildingSemaphore.acquire();
try {
ToolSignature toolSignature = getToolSignature(spawn, context);
final MerkleTree merkleTree = buildInputMerkleTree(spawn, context, toolSignature);
SpawnScrubber spawnScrubber = scrubber.forSpawn(spawn);
final MerkleTree merkleTree = buildInputMerkleTree(spawn, context, toolSignature,
spawnScrubber);

// Get the remote platform properties.
Platform platform = PlatformUtils.getPlatformProto(spawn, remoteOptions);
Expand All @@ -526,7 +550,8 @@ public RemoteAction buildRemoteAction(Spawn spawn, SpawnExecutionContext context
spawn.getArguments(),
spawn.getEnvironment(),
platform,
remotePathResolver);
remotePathResolver,
spawnScrubber);
Digest commandHash = digestUtil.compute(command);
Action action =
Utils.buildAction(
Expand All @@ -535,7 +560,7 @@ public RemoteAction buildRemoteAction(Spawn spawn, SpawnExecutionContext context
platform,
context.getTimeout(),
Spawns.mayBeCachedRemotely(spawn),
buildSalt(spawn));
buildSalt(spawn, spawnScrubber));

ActionKey actionKey = digestUtil.computeActionKey(action);

Expand Down Expand Up @@ -1414,7 +1439,8 @@ public void uploadInputsIfNotPresent(RemoteAction action, boolean force)
Spawn spawn = action.getSpawn();
SpawnExecutionContext context = action.getSpawnExecutionContext();
ToolSignature toolSignature = getToolSignature(spawn, context);
merkleTree = buildInputMerkleTree(spawn, context, toolSignature);
SpawnScrubber spawnScrubber = scrubber.forSpawn(spawn);
merkleTree = buildInputMerkleTree(spawn, context, toolSignature, spawnScrubber);
}

remoteExecutionCache.ensureInputsPresent(
Expand Down
156 changes: 156 additions & 0 deletions src/main/java/com/google/devtools/build/lib/remote/Scrubber.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,156 @@
// Copyright 2023 The Bazel Authors. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package com.google.devtools.build.lib.remote;

import static com.google.common.base.Preconditions.checkState;

import com.google.common.base.Predicates;
import com.google.common.collect.ImmutableMap;
import com.google.devtools.build.lib.actions.ActionInput;
import com.google.devtools.build.lib.actions.Spawn;
import com.google.devtools.build.lib.actions.cache.VirtualActionInput;
import com.google.devtools.build.lib.remote.options.RemoteOptions;
import com.google.devtools.common.options.RegexPatternOption;
import java.util.Collection;
import java.util.Map;
import java.util.function.Predicate;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import javax.annotation.Nullable;

/**
* The {@link Scrubber} implements scrubbing of remote cache keys.
*
* <p>See the documentation for the {@code --experimental_remote_scrub_*} flags for more
* information.
*/
public class Scrubber {

/**
* A {@link SpawnScrubber} determines how to scrub the cache key for a {@link Spawn}.
*/
public static class SpawnScrubber {

private final Predicate<ActionInput> inputMatcher;
private final ImmutableMap<RegexPatternOption, String> argReplacements;
private final String salt;

private SpawnScrubber(RemoteOptions options) {
this.inputMatcher = buildInputMatcher(options);
this.argReplacements = ImmutableMap.copyOf(options.scrubArgReplacements);
this.salt = options.scrubSalt;
}

private static Predicate<ActionInput> buildInputMatcher(RemoteOptions options) {
Predicate<String> execPathMatcher = buildStringMatcher(options.scrubInput);
return (input) -> !input.equals(VirtualActionInput.EMPTY_MARKER) && execPathMatcher.test(input.getExecPathString());
}

/**
* Whether the given input should be omitted from the cache key.
*/
public boolean shouldOmitInput(ActionInput input) {
return inputMatcher.test(input);
}

/**
* Transforms an action command line argument.
*/
public String transformArgument(String arg) {
for (Map.Entry<RegexPatternOption, String> entry : argReplacements.entrySet()) {
Pattern pattern = entry.getKey().regexPattern();
String replacement = entry.getValue();
// Don't use Pattern#replaceFirst because it allows references to capture groups.
Matcher m = pattern.matcher(arg);
if (m.find()) {
arg = arg.substring(0, m.start()) + replacement + arg.substring(m.end());
}
}
return arg;
}

/**
* Returns the scrubbing salt.
*/
public String getSalt() {
return salt;
}
}

private final Predicate<Spawn> spawnMatcher;

private final SpawnScrubber spawnScrubber;

private Scrubber(RemoteOptions options) {
this.spawnMatcher = buildSpawnMatcher(options);
this.spawnScrubber = new SpawnScrubber(options);
}

/**
* Returns a {@link Scrubber} that performs scrubbing according to the {@link RemoteOptions}.
*/
@Nullable
public static Scrubber forOptions(RemoteOptions options) {
return new Scrubber(options);
}

private static Predicate<Spawn> buildSpawnMatcher(RemoteOptions options) {
if (!options.scrubEnabled) {
return Predicates.alwaysFalse();
}

Predicate<String> mnemonicMatcher = buildStringMatcher(options.scrubMnemonic);
Predicate<String> repoMatcher = buildStringMatcher(options.scrubRepo);
boolean scrubExec = options.scrubExec;

return (spawn) -> {
String mnemonic = spawn.getMnemonic();
String repo = spawn.getResourceOwner().getOwner().getLabel().getRepository().getName();
boolean isForTool = spawn.getResourceOwner().getOwner().isBuildConfigurationForTool();

return (!isForTool || scrubExec) && mnemonicMatcher.test(mnemonic) && repoMatcher.test(repo);
};
}

private static Predicate<String> buildStringMatcher(Collection<RegexPatternOption> options) {
if (options.isEmpty()) {
// If no patterns are specified, match nothing.
return Predicates.alwaysFalse();
}
// Combine multiple patterns into a single one for efficiency.
StringBuilder sb = new StringBuilder();
for (RegexPatternOption opt : options) {
if (sb.length() > 0) {
sb.append("|");
}
sb.append("(?:");
sb.append(opt.regexPattern().pattern());
sb.append(")");
}
Pattern pattern = Pattern.compile(sb.toString());
return (str) -> pattern.matcher(str).find();
}

/**
* Returns a {@link SpawnScrubber} suitable for a {@link Spawn}, or {@code null} if the spawn does
* not need to be scrubbed.
*/
@Nullable
public SpawnScrubber forSpawn(Spawn spawn) {
if (spawnMatcher.test(spawn)) {
return spawnScrubber;
}
return null;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ java_library(
"//src/main/java/com/google/devtools/build/lib/actions:artifacts",
"//src/main/java/com/google/devtools/build/lib/actions:file_metadata",
"//src/main/java/com/google/devtools/build/lib/profiler",
"//src/main/java/com/google/devtools/build/lib/remote:scrubber",
"//src/main/java/com/google/devtools/build/lib/remote/util",
"//src/main/java/com/google/devtools/build/lib/util:string",
"//src/main/java/com/google/devtools/build/lib/vfs",
Expand Down
Loading

0 comments on commit 41d01db

Please sign in to comment.