diff --git a/src/main/java/com/google/devtools/build/lib/exec/AbstractSpawnStrategy.java b/src/main/java/com/google/devtools/build/lib/exec/AbstractSpawnStrategy.java index 01ea75faba166f..3b88dd56dd57a7 100644 --- a/src/main/java/com/google/devtools/build/lib/exec/AbstractSpawnStrategy.java +++ b/src/main/java/com/google/devtools/build/lib/exec/AbstractSpawnStrategy.java @@ -243,14 +243,36 @@ public int getId() { public ListenableFuture prefetchInputs() throws IOException, ForbiddenActionInputException { if (Spawns.shouldPrefetchInputsForLocalExecution(spawn)) { - return actionExecutionContext - .getActionInputPrefetcher() - .prefetchFiles( - getInputMapping(PathFragment.EMPTY_FRAGMENT, /* willAccessRepeatedly= */ true) - .values(), - getMetadataProvider()); + return Futures.catchingAsync( + actionExecutionContext + .getActionInputPrefetcher() + .prefetchFiles( + getInputMapping(PathFragment.EMPTY_FRAGMENT, /* willAccessRepeatedly= */ true) + .values(), + getMetadataProvider(), + Priority.MEDIUM), + BulkTransferException.class, + (BulkTransferException e) -> { + if (BulkTransferException.allCausedByCacheNotFoundException(e)) { + var code = + (executionOptions.useNewExitCodeForLostInputs + || executionOptions.remoteRetryOnCacheEviction > 0) + ? Code.REMOTE_CACHE_EVICTED + : Code.REMOTE_CACHE_FAILED; + throw new EnvironmentalExecException( + e, + FailureDetail.newBuilder() + .setMessage("Failed to fetch blobs because they do not exist remotely.") + .setSpawn(FailureDetails.Spawn.newBuilder().setCode(code)) + .build()); + } else { + throw e; + } + }, + directExecutor()); } + return immediateVoidFuture(); } diff --git a/src/main/java/com/google/devtools/build/lib/exec/ExecutionOptions.java b/src/main/java/com/google/devtools/build/lib/exec/ExecutionOptions.java index f964d876a7e0c6..8811254af822e1 100644 --- a/src/main/java/com/google/devtools/build/lib/exec/ExecutionOptions.java +++ b/src/main/java/com/google/devtools/build/lib/exec/ExecutionOptions.java @@ -494,6 +494,28 @@ public boolean usingLocalTestJobs() { + "test log. Otherwise, Bazel generates a test.xml as part of the test action.") public boolean splitXmlGeneration; + @Option( + name = "incompatible_remote_use_new_exit_code_for_lost_inputs", + defaultValue = "true", + documentationCategory = OptionDocumentationCategory.REMOTE, + effectTags = {OptionEffectTag.UNKNOWN}, + metadataTags = {OptionMetadataTag.INCOMPATIBLE_CHANGE}, + help = + "If set to true, Bazel will use new exit code 39 instead of 34 if remote cache evicts" + + " blobs during the build.") + public boolean useNewExitCodeForLostInputs; + + @Option( + name = "experimental_remote_cache_eviction_retries", + defaultValue = "0", + documentationCategory = OptionDocumentationCategory.REMOTE, + effectTags = {OptionEffectTag.EXECUTION}, + help = + "The maximum number of attempts to retry if the build encountered remote cache eviction" + + " error. A non-zero value will implicitly set" + + " --incompatible_remote_use_new_exit_code_for_lost_inputs to true.") + public int remoteRetryOnCacheEviction; + /** An enum for specifying different formats of test output. */ public enum TestOutputFormat { SUMMARY, // Provide summary output only. diff --git a/src/main/java/com/google/devtools/build/lib/remote/RemoteSpawnRunner.java b/src/main/java/com/google/devtools/build/lib/remote/RemoteSpawnRunner.java index 0f7bb077577efa..1b0baa3808c395 100644 --- a/src/main/java/com/google/devtools/build/lib/remote/RemoteSpawnRunner.java +++ b/src/main/java/com/google/devtools/build/lib/remote/RemoteSpawnRunner.java @@ -557,7 +557,8 @@ private SpawnResult handleError( catastrophe = true; } else if (remoteCacheFailed) { status = Status.REMOTE_CACHE_FAILED; - if (remoteOptions.useNewExitCodeForLostInputs) { + if (executionOptions.useNewExitCodeForLostInputs + || executionOptions.remoteRetryOnCacheEviction > 0) { detailedCode = FailureDetails.Spawn.Code.REMOTE_CACHE_EVICTED; } else { detailedCode = FailureDetails.Spawn.Code.REMOTE_CACHE_FAILED; diff --git a/src/main/java/com/google/devtools/build/lib/runtime/BlazeCommandDispatcher.java b/src/main/java/com/google/devtools/build/lib/runtime/BlazeCommandDispatcher.java index 0fbd5a2d3f0327..b91d69e7384ac3 100644 --- a/src/main/java/com/google/devtools/build/lib/runtime/BlazeCommandDispatcher.java +++ b/src/main/java/com/google/devtools/build/lib/runtime/BlazeCommandDispatcher.java @@ -43,6 +43,7 @@ import com.google.devtools.build.lib.events.PrintingEventHandler; import com.google.devtools.build.lib.events.Reporter; import com.google.devtools.build.lib.events.StoredEventHandler; +import com.google.devtools.build.lib.exec.ExecutionOptions; import com.google.devtools.build.lib.profiler.MemoryProfiler; import com.google.devtools.build.lib.profiler.Profiler; import com.google.devtools.build.lib.profiler.SilentCloseable; @@ -54,6 +55,7 @@ import com.google.devtools.build.lib.util.AnsiStrippingOutputStream; import com.google.devtools.build.lib.util.DebugLoggerConfigurator; import com.google.devtools.build.lib.util.DetailedExitCode; +import com.google.devtools.build.lib.util.ExitCode; import com.google.devtools.build.lib.util.InterruptedFailureDetails; import com.google.devtools.build.lib.util.LoggingUtil; import com.google.devtools.build.lib.util.Pair; @@ -230,18 +232,29 @@ public BlazeCommandResult exec( return createDetailedCommandResult( retrievedShutdownReason, FailureDetails.Command.Code.PREVIOUSLY_SHUTDOWN); } - BlazeCommandResult result = - execExclusively( - originalCommandLine, - invocationPolicy, - args, - outErr, - firstContactTimeMillis, - commandName, - command, - waitTimeInMs, - startupOptionsTaggedWithBazelRc, - commandExtensions); + BlazeCommandResult result; + int attempt = 0; + while (true) { + try { + result = + execExclusively( + originalCommandLine, + invocationPolicy, + args, + outErr, + firstContactTimeMillis, + commandName, + command, + waitTimeInMs, + startupOptionsTaggedWithBazelRc, + commandExtensions, + attempt); + break; + } catch (RemoteCacheEvictedException e) { + outErr.printErrLn("Found remote cache eviction error, retrying the build..."); + attempt += 1; + } + } if (result.shutdown()) { setShutdownReason( "Server shut down " @@ -289,7 +302,9 @@ private BlazeCommandResult execExclusively( BlazeCommand command, long waitTimeInMs, Optional>> startupOptionsTaggedWithBazelRc, - List commandExtensions) { + List commandExtensions, + int attempt) + throws RemoteCacheEvictedException { // Record the start time for the profiler. Do not put anything before this! long execStartTimeNanos = runtime.getClock().nanoTime(); @@ -631,7 +646,18 @@ private BlazeCommandResult execExclusively( } needToCallAfterCommand = false; - return runtime.afterCommand(env, result); + var newResult = runtime.afterCommand(env, result); + if (newResult.getExitCode().equals(ExitCode.REMOTE_CACHE_EVICTED)) { + var executionOptions = + Preconditions.checkNotNull(options.getOptions(ExecutionOptions.class)); + if (attempt < executionOptions.remoteRetryOnCacheEviction) { + throw new RemoteCacheEvictedException(); + } + } + + return newResult; + } catch (RemoteCacheEvictedException e) { + throw e; } catch (Throwable e) { logger.atSevere().withCause(e).log("Shutting down due to exception"); Crash crash = Crash.from(e); @@ -665,6 +691,8 @@ private BlazeCommandResult execExclusively( } } + private static class RemoteCacheEvictedException extends IOException {} + private static void replayEarlyExitEvents( OutErr outErr, BlazeOptionHandler optionHandler, diff --git a/src/test/java/com/google/devtools/build/lib/remote/BuildWithoutTheBytesIntegrationTest.java b/src/test/java/com/google/devtools/build/lib/remote/BuildWithoutTheBytesIntegrationTest.java index 25b5c3cf33c0bb..f45f8086cb0434 100644 --- a/src/test/java/com/google/devtools/build/lib/remote/BuildWithoutTheBytesIntegrationTest.java +++ b/src/test/java/com/google/devtools/build/lib/remote/BuildWithoutTheBytesIntegrationTest.java @@ -466,9 +466,7 @@ public void remoteCacheEvictBlobs_whenPrefetchingInput_exitWithCode39() throws E // Assert: Exit code is 39 assertThat(error) .hasMessageThat() - .contains( - "Build without the Bytes does not work if your remote cache evicts blobs" - + " during builds"); + .contains("Failed to fetch blobs because they do not exist remotely"); assertThat(error).hasMessageThat().contains(String.format("%s/%s", hashCode, bytes.length)); assertThat(error.getDetailedExitCode().getExitCode().getNumericExitCode()).isEqualTo(39); } diff --git a/src/test/shell/bazel/remote/build_without_the_bytes_test.sh b/src/test/shell/bazel/remote/build_without_the_bytes_test.sh index aa599ff9af679a..0415433c3c4d03 100755 --- a/src/test/shell/bazel/remote/build_without_the_bytes_test.sh +++ b/src/test/shell/bazel/remote/build_without_the_bytes_test.sh @@ -1627,4 +1627,63 @@ end_of_record" expect_log "$expected_result" } +function test_remote_cache_eviction_retries() { + mkdir -p a + + cat > a/BUILD <<'EOF' +genrule( + name = 'foo', + srcs = ['foo.in'], + outs = ['foo.out'], + cmd = 'cat $(SRCS) > $@', +) + +genrule( + name = 'bar', + srcs = ['foo.out', 'bar.in'], + outs = ['bar.out'], + cmd = 'cat $(SRCS) > $@', + tags = ['no-remote-exec'], +) +EOF + + echo foo > a/foo.in + echo bar > a/bar.in + + # Populate remote cache + bazel build \ + --remote_executor=grpc://localhost:${worker_port} \ + --remote_download_minimal \ + //a:bar >& $TEST_log || fail "Failed to build" + + bazel clean + + # Clean build, foo.out isn't downloaded + bazel build \ + --remote_executor=grpc://localhost:${worker_port} \ + --remote_download_minimal \ + //a:bar >& $TEST_log || fail "Failed to build" + + if [[ -f bazel-bin/a/foo.out ]]; then + fail "Expected intermediate output bazel-bin/a/foo.out to not be downloaded" + fi + + # Evict blobs from remote cache + stop_worker + start_worker + + echo "updated bar" > a/bar.in + + # Incremental build triggers remote cache eviction error but Bazel + # automatically retries the build and reruns the generating actions for + # missing blobs + bazel build \ + --remote_executor=grpc://localhost:${worker_port} \ + --remote_download_minimal \ + --experimental_remote_cache_eviction_retries=5 \ + //a:bar >& $TEST_log || fail "Failed to build" + + expect_log "Found remote cache eviction error, retrying the build..." +} + run_suite "Build without the Bytes tests"