diff --git a/src/main/java/com/google/devtools/build/lib/remote/RemoteActionInputFetcher.java b/src/main/java/com/google/devtools/build/lib/remote/RemoteActionInputFetcher.java index 28b4ccbb618c77..38287b28743922 100644 --- a/src/main/java/com/google/devtools/build/lib/remote/RemoteActionInputFetcher.java +++ b/src/main/java/com/google/devtools/build/lib/remote/RemoteActionInputFetcher.java @@ -152,10 +152,7 @@ protected Completable onErrorResumeNext(Throwable error) { new EnvironmentalExecException( (BulkTransferException) error, FailureDetail.newBuilder() - .setMessage( - "Failed to fetch blobs because they do not exist remotely." - + " Build without the Bytes does not work if your remote" - + " cache evicts blobs during builds") + .setMessage("Failed to fetch blobs because they do not exist remotely") .setSpawn(FailureDetails.Spawn.newBuilder().setCode(code)) .build()); } diff --git a/src/main/java/com/google/devtools/build/lib/runtime/BlazeCommandDispatcher.java b/src/main/java/com/google/devtools/build/lib/runtime/BlazeCommandDispatcher.java index 0fbd5a2d3f0327..fe2cce0491b0ac 100644 --- a/src/main/java/com/google/devtools/build/lib/runtime/BlazeCommandDispatcher.java +++ b/src/main/java/com/google/devtools/build/lib/runtime/BlazeCommandDispatcher.java @@ -54,6 +54,7 @@ import com.google.devtools.build.lib.util.AnsiStrippingOutputStream; import com.google.devtools.build.lib.util.DebugLoggerConfigurator; import com.google.devtools.build.lib.util.DetailedExitCode; +import com.google.devtools.build.lib.util.ExitCode; import com.google.devtools.build.lib.util.InterruptedFailureDetails; import com.google.devtools.build.lib.util.LoggingUtil; import com.google.devtools.build.lib.util.Pair; @@ -148,6 +149,54 @@ public BlazeCommandResult exec( Optional>> startupOptionsTaggedWithBazelRc, List commandExtensions) throws InterruptedException { + var remoteCacheEvictionRetries = parseRemoteCacheEvictionRetries(args, outErr); + while (true) { + var result = + execOnce( + invocationPolicy, + args, + outErr, + lockingMode, + clientDescription, + firstContactTimeMillis, + startupOptionsTaggedWithBazelRc, + commandExtensions); + if (result.getExitCode() == ExitCode.REMOTE_CACHE_EVICTED && remoteCacheEvictionRetries > 0) { + --remoteCacheEvictionRetries; + outErr.printErrLn("Found remote cache eviction error, retrying the build..."); + continue; + } + return result; + } + } + + private int parseRemoteCacheEvictionRetries(List args, OutErr outErr) { + var retryFlagPrefix = "--experimental_remote_cache_eviction_retries="; + for (var arg : args) { + if (arg.startsWith(retryFlagPrefix)) { + try { + return Integer.parseInt(arg.substring(retryFlagPrefix.length())); + } catch (NumberFormatException e) { + outErr.printErrLn( + String.format( + "Failed to parse retry times: %s, remote cache eviction retry is disabled", e)); + return 0; + } + } + } + return 0; + } + + public BlazeCommandResult execOnce( + InvocationPolicy invocationPolicy, + List args, + OutErr outErr, + LockingMode lockingMode, + String clientDescription, + long firstContactTimeMillis, + Optional>> startupOptionsTaggedWithBazelRc, + List commandExtensions) + throws InterruptedException { OriginalUnstructuredCommandLineEvent originalCommandLine = new OriginalUnstructuredCommandLineEvent(args); Preconditions.checkNotNull(clientDescription); diff --git a/src/main/java/com/google/devtools/build/lib/runtime/CommonCommandOptions.java b/src/main/java/com/google/devtools/build/lib/runtime/CommonCommandOptions.java index 44129690677141..f0b09420089dbd 100644 --- a/src/main/java/com/google/devtools/build/lib/runtime/CommonCommandOptions.java +++ b/src/main/java/com/google/devtools/build/lib/runtime/CommonCommandOptions.java @@ -534,6 +534,15 @@ public String getTypeDescription() { + " them.") public boolean heuristicallyDropNodes; + @Option( + name = "experimental_remote_cache_eviction_retries", + defaultValue = "0", + documentationCategory = OptionDocumentationCategory.REMOTE, + effectTags = {OptionEffectTag.EXECUTION}, + help = + "The maximum number of attempts to retry if the build encountered remote cache eviction error.") + public int remoteRetryOnCacheEviction; + /** The option converter to check that the user can only specify legal profiler tasks. */ public static class ProfilerTaskConverter extends EnumConverter { public ProfilerTaskConverter() { diff --git a/src/test/java/com/google/devtools/build/lib/remote/BuildWithoutTheBytesIntegrationTest.java b/src/test/java/com/google/devtools/build/lib/remote/BuildWithoutTheBytesIntegrationTest.java index c27756888f02ab..7f8429707f088f 100644 --- a/src/test/java/com/google/devtools/build/lib/remote/BuildWithoutTheBytesIntegrationTest.java +++ b/src/test/java/com/google/devtools/build/lib/remote/BuildWithoutTheBytesIntegrationTest.java @@ -474,9 +474,7 @@ public void remoteCacheEvictBlobs_whenPrefetchingInput_exitWithCode39() throws E // Assert: Exit code is 39 assertThat(error) .hasMessageThat() - .contains( - "Build without the Bytes does not work if your remote cache evicts blobs" - + " during builds"); + .contains("Failed to fetch blobs because they do not exist remotely"); assertThat(error).hasMessageThat().contains(String.format("%s/%s", hashCode, bytes.length)); assertThat(error.getDetailedExitCode().getExitCode().getNumericExitCode()).isEqualTo(39); } diff --git a/src/test/shell/bazel/remote/build_without_the_bytes_test.sh b/src/test/shell/bazel/remote/build_without_the_bytes_test.sh index 21e015547dbff6..cbc9f6064eae13 100755 --- a/src/test/shell/bazel/remote/build_without_the_bytes_test.sh +++ b/src/test/shell/bazel/remote/build_without_the_bytes_test.sh @@ -1685,4 +1685,62 @@ end_of_record" expect_log "$expected_result" } +function test_remote_cache_eviction_when_prefetching_input() { + mkdir -p a + + cat > a/BUILD <<'EOF' +genrule( + name = 'foo', + srcs = ['foo.in'], + outs = ['foo.out'], + cmd = 'cat $(SRCS) > $@', +) + +genrule( + name = 'bar', + srcs = ['foo.out', 'bar.in'], + outs = ['bar.out'], + cmd = 'cat $(SRCS) > $@', + tags = ['no-remote-exec'], +) +EOF + + echo foo > a/foo.in + echo bar > a/bar.in + + # Populate remote cache + bazel build \ + --remote_executor=grpc://localhost:${worker_port} \ + --remote_download_minimal \ + //a:bar >& $TEST_log || fail "Failed to build" + + bazel clean + + # Clean build, foo.out isn't downloaded + bazel build \ + --remote_executor=grpc://localhost:${worker_port} \ + --remote_download_minimal \ + //a:bar >& $TEST_log || fail "Failed to build" + + (! [[ -f bazel-bin/a/foo.out ]]) \ + || fail "Expected intermediate output bazel-bin/a/foo.out to not be downloaded" + + # Evict blobs from remote cache + stop_worker + start_worker + + echo "updated bar" > a/bar.in + + # Incremental build triggers remote cache eviction error but Bazel + # automatically retries the build and reruns the generating actions for + # missing blobs + bazel build \ + --remote_executor=grpc://localhost:${worker_port} \ + --remote_download_minimal \ + --experimental_remote_cache_eviction_retries=5 \ + //a:bar >& $TEST_log || fail "Failed to build" + + expect_log "Found remote cache eviction error, retrying the build..." +} + run_suite "Build without the Bytes tests"