Skip to content

Commit

Permalink
Introduce an experimental flag for remote cache key scrubbing.
Browse files Browse the repository at this point in the history
This PR introduces the --experimental_remote_scrubbing_config flag, which can
be used to scrub platform-dependent data from the key used to retrieve and
store action results from a remote or disk cache This way, actions executing
on different platforms but targeting the same platform may be able to share
cache entries.

To avoid flag proliferation and make for a nicer API, the scrubbing parameters
are supplied via an external file in text proto format.

This is a simplified implementation of one of the ideas described in [1],
highly influenced by Olivier Notteghem's earlier proposal [2], intended to
provide a simple yet flexible API to enable further experimentation. It must
be used with care, as incorrect settings can compromise build correctness.

[1] https://docs.google.com/document/d/1uMPj2s0TlHSIKSngqOkWJoeqOtKzaxQLtBrRfYif3Lo/edit?usp=sharing
[2] bazelbuild#18669
  • Loading branch information
tjgq committed Oct 4, 2023
1 parent 3453d03 commit f566465
Show file tree
Hide file tree
Showing 21 changed files with 873 additions and 20 deletions.
15 changes: 15 additions & 0 deletions src/main/java/com/google/devtools/build/lib/remote/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ java_library(
"RemoteOutputChecker.java",
"AbstractActionInputPrefetcher.java",
"LeaseService.java",
"Scrubber.java",
"Store.java",
],
),
Expand Down Expand Up @@ -90,6 +91,7 @@ java_library(
"//src/main/java/com/google/devtools/build/lib/exec/local",
"//src/main/java/com/google/devtools/build/lib/packages/semantics",
"//src/main/java/com/google/devtools/build/lib/profiler",
"//src/main/java/com/google/devtools/build/lib/remote:scrubber",
"//src/main/java/com/google/devtools/build/lib/remote/circuitbreaker",
"//src/main/java/com/google/devtools/build/lib/remote/common",
"//src/main/java/com/google/devtools/build/lib/remote/common:bulk_transfer_exception",
Expand Down Expand Up @@ -252,6 +254,19 @@ java_library(
],
)

java_library(
name = "scrubber",
srcs = ["Scrubber.java"],
deps = [
"//src/main/java/com/google/devtools/build/lib/actions",
"//src/main/java/com/google/devtools/build/lib/actions:artifacts",
"//src/main/java/com/google/devtools/common/options:options_internal",
"//src/main/protobuf:remote_scrubbing_java_proto",
"//third_party:guava",
"//third_party/protobuf:protobuf_java",
],
)

java_library(
name = "store",
srcs = ["Store.java"],
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,7 @@
import com.google.devtools.build.lib.remote.RemoteExecutionService.ActionResultMetadata.DirectoryMetadata;
import com.google.devtools.build.lib.remote.RemoteExecutionService.ActionResultMetadata.FileMetadata;
import com.google.devtools.build.lib.remote.RemoteExecutionService.ActionResultMetadata.SymlinkMetadata;
import com.google.devtools.build.lib.remote.Scrubber.SpawnScrubber;
import com.google.devtools.build.lib.remote.common.BulkTransferException;
import com.google.devtools.build.lib.remote.common.OperationObserver;
import com.google.devtools.build.lib.remote.common.OutputDigestMismatchException;
Expand Down Expand Up @@ -179,6 +180,9 @@ public class RemoteExecutionService {

@Nullable private final RemoteOutputChecker remoteOutputChecker;

@Nullable
private final Scrubber scrubber;

public RemoteExecutionService(
Executor executor,
Reporter reporter,
Expand Down Expand Up @@ -210,6 +214,7 @@ public RemoteExecutionService(
if (remoteOptions.remoteMerkleTreeCacheSize != 0) {
merkleTreeCacheBuilder.maximumSize(remoteOptions.remoteMerkleTreeCacheSize);
}
this.scrubber = remoteOptions.scrubber;
this.merkleTreeCache = merkleTreeCacheBuilder.build();

this.tempPathGenerator = tempPathGenerator;
Expand All @@ -219,12 +224,13 @@ public RemoteExecutionService(
this.remoteOutputChecker = remoteOutputChecker;
}

static Command buildCommand(
private Command buildCommand(
Collection<? extends ActionInput> outputs,
List<String> arguments,
ImmutableMap<String, String> env,
@Nullable Platform platform,
RemotePathResolver remotePathResolver) {
RemotePathResolver remotePathResolver,
@Nullable SpawnScrubber spawnScrubber) {
Command.Builder command = Command.newBuilder();
ArrayList<String> outputFiles = new ArrayList<>();
ArrayList<String> outputDirectories = new ArrayList<>();
Expand All @@ -249,6 +255,9 @@ static Command buildCommand(
command.setPlatform(platform);
}
for (String arg : arguments) {
if (spawnScrubber != null) {
arg = spawnScrubber.transformArgument(arg);
}
command.addArguments(decodeBytestringUtf8(arg));
}
// Sorting the environment pairs by variable name.
Expand Down Expand Up @@ -349,15 +358,16 @@ private SortedMap<PathFragment, ActionInput> buildOutputDirMap(Spawn spawn) {
}

private MerkleTree buildInputMerkleTree(
Spawn spawn, SpawnExecutionContext context, ToolSignature toolSignature)
Spawn spawn, SpawnExecutionContext context, ToolSignature toolSignature,
@Nullable SpawnScrubber spawnScrubber)
throws IOException, ForbiddenActionInputException {
// Add output directories to inputs so that they are created as empty directories by the
// executor. The spec only requires the executor to create the parent directory of an output
// directory, which differs from the behavior of both local and sandboxed execution.
SortedMap<PathFragment, ActionInput> outputDirMap = buildOutputDirMap(spawn);
boolean useMerkleTreeCache = remoteOptions.remoteMerkleTreeCache;
if (toolSignature != null) {
// Marking tool files is not yet supported in conjunction with the merkle tree cache.
if (toolSignature != null || spawnScrubber != null) {
// The Merkle tree cache is not yet compatible with scrubbing or marking tool files.
useMerkleTreeCache = false;
}
if (useMerkleTreeCache) {
Expand All @@ -369,7 +379,8 @@ private MerkleTree buildInputMerkleTree(
(Object nodeKey, InputWalker walker) -> {
subMerkleTrees.add(
buildMerkleTreeVisitor(
nodeKey, walker, inputMetadataProvider, context.getPathResolver()));
nodeKey, walker, inputMetadataProvider, context.getPathResolver(),
spawnScrubber));
});
if (!outputDirMap.isEmpty()) {
subMerkleTrees.add(
Expand All @@ -378,6 +389,7 @@ private MerkleTree buildInputMerkleTree(
inputMetadataProvider,
execRoot,
context.getPathResolver(),
/* scrubber= */ null,
digestUtil));
}
return MerkleTree.merge(subMerkleTrees, digestUtil);
Expand All @@ -399,6 +411,7 @@ private MerkleTree buildInputMerkleTree(
context.getInputMetadataProvider(),
execRoot,
context.getPathResolver(),
spawnScrubber,
digestUtil);
}
}
Expand All @@ -407,7 +420,8 @@ private MerkleTree buildMerkleTreeVisitor(
Object nodeKey,
InputWalker walker,
InputMetadataProvider inputMetadataProvider,
ArtifactPathResolver artifactPathResolver)
ArtifactPathResolver artifactPathResolver,
@Nullable SpawnScrubber spawnScrubber)
throws IOException, ForbiddenActionInputException {
// Deduplicate concurrent computations for the same node. It's not possible to use
// MerkleTreeCache#get(key, loader) because the loading computation may cause other nodes to be
Expand All @@ -419,7 +433,8 @@ private MerkleTree buildMerkleTreeVisitor(
// No preexisting cache entry, so we must do the computation ourselves.
try {
freshFuture.complete(
uncachedBuildMerkleTreeVisitor(walker, inputMetadataProvider, artifactPathResolver));
uncachedBuildMerkleTreeVisitor(walker, inputMetadataProvider, artifactPathResolver,
spawnScrubber));
} catch (Exception e) {
freshFuture.completeExceptionally(e);
}
Expand All @@ -443,7 +458,8 @@ private MerkleTree buildMerkleTreeVisitor(
public MerkleTree uncachedBuildMerkleTreeVisitor(
InputWalker walker,
InputMetadataProvider inputMetadataProvider,
ArtifactPathResolver artifactPathResolver)
ArtifactPathResolver artifactPathResolver,
@Nullable SpawnScrubber scrubber)
throws IOException, ForbiddenActionInputException {
ConcurrentLinkedQueue<MerkleTree> subMerkleTrees = new ConcurrentLinkedQueue<>();
subMerkleTrees.add(
Expand All @@ -452,18 +468,19 @@ public MerkleTree uncachedBuildMerkleTreeVisitor(
inputMetadataProvider,
execRoot,
artifactPathResolver,
scrubber,
digestUtil));
walker.visitNonLeaves(
(Object subNodeKey, InputWalker subWalker) -> {
subMerkleTrees.add(
buildMerkleTreeVisitor(
subNodeKey, subWalker, inputMetadataProvider, artifactPathResolver));
subNodeKey, subWalker, inputMetadataProvider, artifactPathResolver, scrubber));
});
return MerkleTree.merge(subMerkleTrees, digestUtil);
}

@Nullable
private static ByteString buildSalt(Spawn spawn) {
private static ByteString buildSalt(Spawn spawn, @Nullable SpawnScrubber spawnScrubber) {
CacheSalt.Builder saltBuilder =
CacheSalt.newBuilder().setMayBeExecutedRemotely(Spawns.mayBeExecutedRemotely(spawn));

Expand All @@ -473,6 +490,11 @@ private static ByteString buildSalt(Spawn spawn) {
saltBuilder.setWorkspace(workspace);
}

if (spawnScrubber != null) {
saltBuilder.setScrubSalt(
CacheSalt.ScrubSalt.newBuilder().setSalt(spawnScrubber.getSalt()).build());
}

return saltBuilder.build().toByteString();
}

Expand Down Expand Up @@ -508,7 +530,9 @@ public RemoteAction buildRemoteAction(Spawn spawn, SpawnExecutionContext context
remoteActionBuildingSemaphore.acquire();
try {
ToolSignature toolSignature = getToolSignature(spawn, context);
final MerkleTree merkleTree = buildInputMerkleTree(spawn, context, toolSignature);
SpawnScrubber spawnScrubber = scrubber != null ? scrubber.forSpawn(spawn) : null;
final MerkleTree merkleTree = buildInputMerkleTree(spawn, context, toolSignature,
spawnScrubber);

// Get the remote platform properties.
Platform platform = PlatformUtils.getPlatformProto(spawn, remoteOptions);
Expand All @@ -526,7 +550,8 @@ public RemoteAction buildRemoteAction(Spawn spawn, SpawnExecutionContext context
spawn.getArguments(),
spawn.getEnvironment(),
platform,
remotePathResolver);
remotePathResolver,
spawnScrubber);
Digest commandHash = digestUtil.compute(command);
Action action =
Utils.buildAction(
Expand All @@ -535,7 +560,7 @@ public RemoteAction buildRemoteAction(Spawn spawn, SpawnExecutionContext context
platform,
context.getTimeout(),
Spawns.mayBeCachedRemotely(spawn),
buildSalt(spawn));
buildSalt(spawn, spawnScrubber));

ActionKey actionKey = digestUtil.computeActionKey(action);

Expand Down Expand Up @@ -1414,7 +1439,8 @@ public void uploadInputsIfNotPresent(RemoteAction action, boolean force)
Spawn spawn = action.getSpawn();
SpawnExecutionContext context = action.getSpawnExecutionContext();
ToolSignature toolSignature = getToolSignature(spawn, context);
merkleTree = buildInputMerkleTree(spawn, context, toolSignature);
SpawnScrubber spawnScrubber = scrubber != null ? scrubber.forSpawn(spawn) : null;
merkleTree = buildInputMerkleTree(spawn, context, toolSignature, spawnScrubber);
}

remoteExecutionCache.ensureInputsPresent(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -311,6 +311,14 @@ public void beforeCommand(CommandEnvironment env) throws AbruptExitException {
FailureDetails.RemoteOptions.Code.EXECUTION_WITH_INVALID_CACHE);
}

boolean enableScrubbing = remoteOptions.scrubber != null;
if (enableScrubbing && enableRemoteExecution) {

throw createOptionsExitException(
"Cannot combine remote cache key scrubbing with remote execution",
FailureDetails.RemoteOptions.Code.EXECUTION_WITH_SCRUBBING);
}

// TODO(bazel-team): Consider adding a warning or more validation if the remoteDownloadRegex is
// used without Build without the Bytes.
ImmutableList.Builder<Pattern> patternsToDownloadBuilder = ImmutableList.builder();
Expand Down
Loading

0 comments on commit f566465

Please sign in to comment.