From 5122617b8a22fee7acd86c9c48f2c2737709ca3f Mon Sep 17 00:00:00 2001 From: George Gensure Date: Tue, 1 Dec 2020 01:33:42 -0800 Subject: [PATCH] Status error presentation with details Remote Execution Status messages embedded in ExecuteResponses are extremely capable vehicles for conveying the nature of an error, and informing a user of further steps to take to remediate it. This change expands the presentation of these response Statuses, and brings all of the error details to light, by default instead of requiring --verbose_failures to investigate any details of a remote execution problem. The interpretation of precondition failures to highlight retriable responses has been expanded to ignore benign details that might be included in a response. SpawnResult error message composition has been simplified substantially, without any special behavior for 'Remote' errors, and a removal of a duplicate message printout incurred in the wake of succcessive @janakr and @olaola changes. Failure messages are now implied to be present in all spawn result failure reporting exactly once, and the failureMessage of a SpawnResult is implied to be the parameter to getDetailMessage. An example error presentation is as follows (including the modifications to SpawnResult's output formatting): ``` ERROR: /home/werkt/dev/test/BUILD:22:10: Linking test failed: (Exit 34): Remote Execution Failure: Failed Precondition: Action 4223ab2cc114385110714243a0b4a88cc743f2169b5be7d4d438a6bbba4f529f/142 is invalid Resource Info: type.googleapis.com/google.longrunning.Operation: name='shard/operations/9335fef2-184b-4d26-9a6f-2f27cebe7527', owner='tool_invocation_id:4b4bf7b1-fadd-44fd-99be-a234e7c26fc4,correlated_invocation_id:dc88325a-9317-48c0-9013-b3bb8b7a458f' Precondition Failure: (MISSING) bazel-out/k8-fastbuild/bin/test: 7872: An output could not be uploaded because it exceeded the maximum size of an entry Target //:test failed to build ``` Closes #12564. PiperOrigin-RevId: 344973841 --- .../build/lib/actions/SpawnResult.java | 8 - .../build/lib/remote/ExecuteRetrier.java | 133 ++++++++++++ .../lib/remote/ExecutionStatusException.java | 4 + .../ExperimentalGrpcRemoteExecutor.java | 2 +- .../build/lib/remote/RemoteRetrier.java | 6 +- .../build/lib/remote/RemoteSpawnRunner.java | 54 +---- .../devtools/build/lib/remote/Retrier.java | 14 +- .../devtools/build/lib/remote/util/BUILD | 6 +- .../devtools/build/lib/remote/util/Utils.java | 203 +++++++++++++++++- .../lib/remote/ByteStreamUploaderTest.java | 7 +- .../build/lib/remote/GrpcCacheClientTest.java | 6 +- .../build/lib/remote/RemoteRetrierTest.java | 30 +-- .../lib/remote/RemoteSpawnRunnerTest.java | 4 +- 13 files changed, 382 insertions(+), 95 deletions(-) create mode 100644 src/main/java/com/google/devtools/build/lib/remote/ExecuteRetrier.java diff --git a/src/main/java/com/google/devtools/build/lib/actions/SpawnResult.java b/src/main/java/com/google/devtools/build/lib/actions/SpawnResult.java index 149b2dbdc82b94..7fcdff9f0e5819 100644 --- a/src/main/java/com/google/devtools/build/lib/actions/SpawnResult.java +++ b/src/main/java/com/google/devtools/build/lib/actions/SpawnResult.java @@ -385,11 +385,6 @@ public String getDetailMessage( String reason = "(" + status.toShortString() + ")"; // e.g "(Exit 1)" String explanation = Strings.isNullOrEmpty(message) ? "" : ": " + message; - if (!status().isConsideredUserError()) { - String errorDetail = status().name().toLowerCase(Locale.US) - .replace('_', ' '); - explanation += ". Note: Remote connection/protocol failed with: " + errorDetail; - } if (status() == Status.TIMEOUT) { if (getWallTime().isPresent()) { explanation += @@ -407,9 +402,6 @@ public String getDetailMessage( explanation += " Action tagged as local was forcibly run remotely and failed - it's " + "possible that the action simply doesn't work remotely"; } - if (!Strings.isNullOrEmpty(failureMessage)) { - explanation += " " + failureMessage; - } return reason + explanation; } diff --git a/src/main/java/com/google/devtools/build/lib/remote/ExecuteRetrier.java b/src/main/java/com/google/devtools/build/lib/remote/ExecuteRetrier.java new file mode 100644 index 00000000000000..7a580420c93bf4 --- /dev/null +++ b/src/main/java/com/google/devtools/build/lib/remote/ExecuteRetrier.java @@ -0,0 +1,133 @@ +// Copyright 2020 The Bazel Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package com.google.devtools.build.lib.remote; + +import com.google.common.util.concurrent.ListeningScheduledExecutorService; +import com.google.protobuf.Any; +import com.google.protobuf.InvalidProtocolBufferException; +import com.google.protobuf.util.Durations; +import com.google.rpc.DebugInfo; +import com.google.rpc.Help; +import com.google.rpc.LocalizedMessage; +import com.google.rpc.PreconditionFailure; +import com.google.rpc.PreconditionFailure.Violation; +import com.google.rpc.RequestInfo; +import com.google.rpc.ResourceInfo; +import com.google.rpc.RetryInfo; +import com.google.rpc.Status; +import io.grpc.Status.Code; +import io.grpc.protobuf.StatusProto; + +/** Specific retry logic for execute request with gapi Status. */ +class ExecuteRetrier extends RemoteRetrier { + + private static final String VIOLATION_TYPE_MISSING = "MISSING"; + + private static class RetryInfoBackoff implements Backoff { + private final int maxRetryAttempts; + int retries = 0; + + RetryInfoBackoff(int maxRetryAttempts) { + this.maxRetryAttempts = maxRetryAttempts; + } + + @Override + public long nextDelayMillis(Exception e) { + if (retries >= maxRetryAttempts) { + return -1; + } + RetryInfo retryInfo = getRetryInfo(e); + retries++; + return Durations.toMillis(retryInfo.getRetryDelay()); + } + + RetryInfo getRetryInfo(Exception e) { + RetryInfo retryInfo = RetryInfo.getDefaultInstance(); + Status status = StatusProto.fromThrowable(e); + if (status != null) { + for (Any detail : status.getDetailsList()) { + if (detail.is(RetryInfo.class)) { + try { + retryInfo = detail.unpack(RetryInfo.class); + } catch (InvalidProtocolBufferException protoEx) { + // really shouldn't happen, ignore + } + } + } + } + return retryInfo; + } + + @Override + public int getRetryAttempts() { + return retries; + } + } + + ExecuteRetrier( + int maxRetryAttempts, + ListeningScheduledExecutorService retryService, + CircuitBreaker circuitBreaker) { + super( + () -> maxRetryAttempts > 0 ? new RetryInfoBackoff(maxRetryAttempts) : RETRIES_DISABLED, + ExecuteRetrier::shouldRetry, + retryService, + circuitBreaker); + } + + private static boolean shouldRetry(Exception e) { + if (BulkTransferException.isOnlyCausedByCacheNotFoundException(e)) { + return true; + } + Status status = StatusProto.fromThrowable(e); + if (status == null || status.getDetailsCount() == 0) { + return false; + } + boolean failedPrecondition = status.getCode() == Code.FAILED_PRECONDITION.value(); + for (Any detail : status.getDetailsList()) { + if (detail.is(RetryInfo.class)) { + // server says we can retry, regardless of other details + return true; + } else if (failedPrecondition) { + if (detail.is(PreconditionFailure.class)) { + try { + PreconditionFailure f = detail.unpack(PreconditionFailure.class); + if (f.getViolationsCount() == 0) { + failedPrecondition = false; + } + for (Violation v : f.getViolationsList()) { + if (!v.getType().equals(VIOLATION_TYPE_MISSING)) { + failedPrecondition = false; + } + } + // if *all* > 0 precondition failure violations have type MISSING, failedPrecondition + // remains true + } catch (InvalidProtocolBufferException protoEx) { + // really shouldn't happen + return false; + } + } else if (!(detail.is(DebugInfo.class) + || detail.is(Help.class) + || detail.is(LocalizedMessage.class) + || detail.is(RequestInfo.class) + || detail.is(ResourceInfo.class))) { // ignore benign details + // consider all other details as failures + failedPrecondition = false; + } + } + } + return failedPrecondition; + } +} diff --git a/src/main/java/com/google/devtools/build/lib/remote/ExecutionStatusException.java b/src/main/java/com/google/devtools/build/lib/remote/ExecutionStatusException.java index eb973579a23841..4a62628b6e8cf2 100644 --- a/src/main/java/com/google/devtools/build/lib/remote/ExecutionStatusException.java +++ b/src/main/java/com/google/devtools/build/lib/remote/ExecutionStatusException.java @@ -63,4 +63,8 @@ public boolean isExecutionTimeout() { public ExecuteResponse getResponse() { return response; } + + public Status getOriginalStatus() { + return status; + } } diff --git a/src/main/java/com/google/devtools/build/lib/remote/ExperimentalGrpcRemoteExecutor.java b/src/main/java/com/google/devtools/build/lib/remote/ExperimentalGrpcRemoteExecutor.java index fafe8bb6bbaec1..ea999318932b61 100644 --- a/src/main/java/com/google/devtools/build/lib/remote/ExperimentalGrpcRemoteExecutor.java +++ b/src/main/java/com/google/devtools/build/lib/remote/ExperimentalGrpcRemoteExecutor.java @@ -194,7 +194,7 @@ ExecuteResponse waitExecution() throws IOException { // // However, we only retry Execute() if executeBackoff should retry. Also increase the retry // counter at the same time (done by nextDelayMillis()). - if (e.getStatus().getCode() == Code.NOT_FOUND && executeBackoff.nextDelayMillis() >= 0) { + if (e.getStatus().getCode() == Code.NOT_FOUND && executeBackoff.nextDelayMillis(e) >= 0) { lastOperation = null; return null; } diff --git a/src/main/java/com/google/devtools/build/lib/remote/RemoteRetrier.java b/src/main/java/com/google/devtools/build/lib/remote/RemoteRetrier.java index 59af19b3191fbb..2aca34c9daaaed 100644 --- a/src/main/java/com/google/devtools/build/lib/remote/RemoteRetrier.java +++ b/src/main/java/com/google/devtools/build/lib/remote/RemoteRetrier.java @@ -174,7 +174,7 @@ public ExponentialBackoff(RemoteOptions options) { } @Override - public long nextDelayMillis() { + public long nextDelayMillis(Exception e) { if (attempts == maxAttempts) { return -1; } @@ -221,13 +221,13 @@ public void reset() { } @Override - public long nextDelayMillis() { + public long nextDelayMillis(Exception e) { if (currentBackoff == null) { currentBackoff = backoffSupplier.get(); retries++; return 0; } - return currentBackoff.nextDelayMillis(); + return currentBackoff.nextDelayMillis(e); } @Override diff --git a/src/main/java/com/google/devtools/build/lib/remote/RemoteSpawnRunner.java b/src/main/java/com/google/devtools/build/lib/remote/RemoteSpawnRunner.java index 8aab3bf358d8f9..f1925062de5a74 100644 --- a/src/main/java/com/google/devtools/build/lib/remote/RemoteSpawnRunner.java +++ b/src/main/java/com/google/devtools/build/lib/remote/RemoteSpawnRunner.java @@ -85,17 +85,12 @@ import com.google.devtools.build.lib.vfs.Path; import com.google.devtools.build.lib.vfs.PathFragment; import com.google.longrunning.Operation; -import com.google.protobuf.Any; -import com.google.protobuf.InvalidProtocolBufferException; import com.google.protobuf.Message; import com.google.protobuf.Timestamp; import com.google.protobuf.util.Durations; import com.google.protobuf.util.Timestamps; -import com.google.rpc.PreconditionFailure; -import com.google.rpc.PreconditionFailure.Violation; import io.grpc.Context; import io.grpc.Status.Code; -import io.grpc.protobuf.StatusProto; import java.io.IOException; import java.time.Duration; import java.util.ArrayList; @@ -114,38 +109,6 @@ @ThreadSafe public class RemoteSpawnRunner implements SpawnRunner { - private static final String VIOLATION_TYPE_MISSING = "MISSING"; - - private static boolean retriableExecErrors(Exception e) { - if (BulkTransferException.isOnlyCausedByCacheNotFoundException(e)) { - return true; - } - if (!RemoteRetrierUtils.causedByStatus(e, Code.FAILED_PRECONDITION)) { - return false; - } - com.google.rpc.Status status = StatusProto.fromThrowable(e); - if (status == null || status.getDetailsCount() == 0) { - return false; - } - for (Any details : status.getDetailsList()) { - PreconditionFailure f; - try { - f = details.unpack(PreconditionFailure.class); - } catch (InvalidProtocolBufferException protoEx) { - return false; - } - if (f.getViolationsCount() == 0) { - return false; // Generally shouldn't happen - } - for (Violation v : f.getViolationsList()) { - if (!v.getType().equals(VIOLATION_TYPE_MISSING)) { - return false; - } - } - } - return true; // if *all* > 0 violations have type MISSING - } - private final Path execRoot; private final RemoteOptions remoteOptions; private final ExecutionOptions executionOptions; @@ -656,12 +619,10 @@ private SpawnResult handleError( catastrophe = false; } - final String errorMessage; - if (!verboseFailures) { - errorMessage = Utils.grpcAwareErrorMessage(exception); - } else { + String errorMessage = Utils.grpcAwareErrorMessage(exception); + if (verboseFailures) { // On --verbose_failures print the whole stack trace - errorMessage = Throwables.getStackTraceAsString(exception); + errorMessage += "\n" + Throwables.getStackTraceAsString(exception); } return new SpawnResult.Builder() @@ -817,12 +778,7 @@ static Collection resolveActionInputs( private static RemoteRetrier createExecuteRetrier( RemoteOptions options, ListeningScheduledExecutorService retryService) { - return new RemoteRetrier( - options.remoteMaxRetryAttempts > 0 - ? () -> new Retrier.ZeroBackoff(options.remoteMaxRetryAttempts) - : () -> Retrier.RETRIES_DISABLED, - RemoteSpawnRunner::retriableExecErrors, - retryService, - Retrier.ALLOW_ALL_CALLS); + return new ExecuteRetrier( + options.remoteMaxRetryAttempts, retryService, Retrier.ALLOW_ALL_CALLS); } } diff --git a/src/main/java/com/google/devtools/build/lib/remote/Retrier.java b/src/main/java/com/google/devtools/build/lib/remote/Retrier.java index e920cf7a6e4c40..4711a06eb9e454 100644 --- a/src/main/java/com/google/devtools/build/lib/remote/Retrier.java +++ b/src/main/java/com/google/devtools/build/lib/remote/Retrier.java @@ -47,11 +47,11 @@ public interface Backoff { * Returns the next delay in milliseconds, or a value less than {@code 0} if we should stop * retrying. */ - long nextDelayMillis(); + long nextDelayMillis(Exception e); /** - * Returns the number of calls to {@link #nextDelayMillis()} thus far, not counting any calls - * that returned less than {@code 0}. + * Returns the number of calls to {@link #nextDelayMillis(Exception)} thus far, not counting any + * calls that returned less than {@code 0}. */ int getRetryAttempts(); } @@ -140,7 +140,7 @@ public void recordSuccess() {} public static final Backoff RETRIES_DISABLED = new Backoff() { @Override - public long nextDelayMillis() { + public long nextDelayMillis(Exception e) { return -1; } @@ -161,7 +161,7 @@ public ZeroBackoff(int maxRetries) { } @Override - public long nextDelayMillis() { + public long nextDelayMillis(Exception e) { if (retries >= maxRetries) { return -1; } @@ -253,7 +253,7 @@ public T execute(Callable call, Backoff backoff) throws Exception { if (!shouldRetry.test(e)) { throw e; } - final long delayMillis = backoff.nextDelayMillis(); + final long delayMillis = backoff.nextDelayMillis(e); if (delayMillis < 0) { throw e; } @@ -286,7 +286,7 @@ public ListenableFuture executeAsync(AsyncCallable call, Backoff backo private ListenableFuture onExecuteAsyncFailure( Exception t, AsyncCallable call, Backoff backoff) { if (isRetriable(t)) { - long waitMillis = backoff.nextDelayMillis(); + long waitMillis = backoff.nextDelayMillis(t); if (waitMillis >= 0) { try { return Futures.scheduleAsync( diff --git a/src/main/java/com/google/devtools/build/lib/remote/util/BUILD b/src/main/java/com/google/devtools/build/lib/remote/util/BUILD index 2ed7f93bb5a9e0..6d91bc9f237439 100644 --- a/src/main/java/com/google/devtools/build/lib/remote/util/BUILD +++ b/src/main/java/com/google/devtools/build/lib/remote/util/BUILD @@ -20,16 +20,20 @@ java_library( "//src/main/java/com/google/devtools/build/lib/analysis:blaze_version_info", "//src/main/java/com/google/devtools/build/lib/authandtls", "//src/main/java/com/google/devtools/build/lib/concurrent", + "//src/main/java/com/google/devtools/build/lib/remote:ExecutionStatusException", "//src/main/java/com/google/devtools/build/lib/remote/common", "//src/main/java/com/google/devtools/build/lib/remote/options", "//src/main/java/com/google/devtools/build/lib/vfs", "//src/main/java/com/google/devtools/build/lib/vfs:pathfragment", "//src/main/protobuf:failure_details_java_proto", - "//third_party:flogger", "//third_party:guava", "//third_party:jsr305", "//third_party/grpc:grpc-jar", "//third_party/protobuf:protobuf_java", + "//third_party/protobuf:protobuf_java_util", + "@googleapis//:google_rpc_code_java_proto", + "@googleapis//:google_rpc_error_details_java_proto", + "@googleapis//:google_rpc_status_java_proto", "@remoteapis//:build_bazel_remote_execution_v2_remote_execution_java_grpc", "@remoteapis//:build_bazel_remote_execution_v2_remote_execution_java_proto", ], diff --git a/src/main/java/com/google/devtools/build/lib/remote/util/Utils.java b/src/main/java/com/google/devtools/build/lib/remote/util/Utils.java index 86f21a5b1347ef..f5ce5077c5c4a0 100644 --- a/src/main/java/com/google/devtools/build/lib/remote/util/Utils.java +++ b/src/main/java/com/google/devtools/build/lib/remote/util/Utils.java @@ -13,8 +13,11 @@ // limitations under the License. package com.google.devtools.build.lib.remote.util; +import static java.util.stream.Collectors.joining; + import build.bazel.remote.execution.v2.ActionResult; import build.bazel.remote.execution.v2.Digest; +import com.google.common.base.Ascii; import com.google.common.base.Preconditions; import com.google.common.base.Throwables; import com.google.common.collect.ImmutableSet; @@ -28,22 +31,37 @@ import com.google.devtools.build.lib.actions.Spawn; import com.google.devtools.build.lib.actions.SpawnMetrics; import com.google.devtools.build.lib.actions.SpawnResult; -import com.google.devtools.build.lib.actions.SpawnResult.Status; import com.google.devtools.build.lib.authandtls.CallCredentialsProvider; +import com.google.devtools.build.lib.remote.ExecutionStatusException; import com.google.devtools.build.lib.remote.common.CacheNotFoundException; import com.google.devtools.build.lib.remote.common.RemoteCacheClient.ActionKey; import com.google.devtools.build.lib.remote.options.RemoteOutputsMode; import com.google.devtools.build.lib.server.FailureDetails; import com.google.devtools.build.lib.server.FailureDetails.FailureDetail; -import com.google.devtools.build.lib.server.FailureDetails.Spawn.Code; import com.google.devtools.build.lib.vfs.PathFragment; +import com.google.protobuf.Any; import com.google.protobuf.ByteString; +import com.google.protobuf.Duration; import com.google.protobuf.InvalidProtocolBufferException; +import com.google.protobuf.util.Durations; +import com.google.rpc.BadRequest; +import com.google.rpc.Code; +import com.google.rpc.DebugInfo; +import com.google.rpc.Help; +import com.google.rpc.LocalizedMessage; +import com.google.rpc.PreconditionFailure; +import com.google.rpc.QuotaFailure; +import com.google.rpc.RequestInfo; +import com.google.rpc.ResourceInfo; +import com.google.rpc.RetryInfo; +import com.google.rpc.Status; import java.io.ByteArrayOutputStream; import java.io.IOException; import java.io.OutputStream; +import java.util.Arrays; import java.util.Collection; import java.util.Collections; +import java.util.Locale; import java.util.concurrent.Callable; import java.util.concurrent.ExecutionException; import java.util.function.BiFunction; @@ -119,7 +137,8 @@ public static SpawnResult createSpawnResult( String mnemonic) { SpawnResult.Builder builder = new SpawnResult.Builder() - .setStatus(exitCode == 0 ? Status.SUCCESS : Status.NON_ZERO_EXIT) + .setStatus( + exitCode == 0 ? SpawnResult.Status.SUCCESS : SpawnResult.Status.NON_ZERO_EXIT) .setExitCode(exitCode) .setRunnerName(cacheHit ? runnerName + " cache hit" : runnerName) .setCacheHit(cacheHit) @@ -129,7 +148,9 @@ public static SpawnResult createSpawnResult( builder.setFailureDetail( FailureDetail.newBuilder() .setMessage(mnemonic + " returned a non-zero exit code when running remotely") - .setSpawn(FailureDetails.Spawn.newBuilder().setCode(Code.NON_ZERO_EXIT)) + .setSpawn( + FailureDetails.Spawn.newBuilder() + .setCode(FailureDetails.Spawn.Code.NON_ZERO_EXIT)) .build()); } if (inMemoryOutput != null) { @@ -162,8 +183,182 @@ public static boolean hasFilesToDownload( return !Collections.disjoint(outputs, filesToDownload); } + private static String statusName(int code) { + // 'convert_underscores' to 'Convert Underscores' + String name = Code.forNumber(code).getValueDescriptor().getName(); + return Arrays.stream(name.split("_")) + .map(word -> Ascii.toUpperCase(word.substring(0, 1)) + Ascii.toLowerCase(word.substring(1))) + .collect(joining(" ")); + } + + private static String errorDetailsMessage(Iterable details) + throws InvalidProtocolBufferException { + String messages = ""; + for (Any detail : details) { + messages += " " + errorDetailMessage(detail) + "\n"; + } + return messages; + } + + private static String durationMessage(Duration duration) { + // this will give us seconds, might want to consider something nicer (graduating ms, s, m, h, d, + // w?) + return Durations.toString(duration); + } + + private static String retryInfoMessage(RetryInfo retryInfo) { + return "Retry delay recommendation of " + durationMessage(retryInfo.getRetryDelay()); + } + + private static String debugInfoMessage(DebugInfo debugInfo) { + String message = ""; + if (debugInfo.getStackEntriesCount() > 0) { + message += + "Debug Stack Information:\n " + String.join("\n ", debugInfo.getStackEntriesList()); + } + if (!debugInfo.getDetail().isEmpty()) { + if (debugInfo.getStackEntriesCount() > 0) { + message += "\n"; + } + message += "Debug Details: " + debugInfo.getDetail(); + } + return message; + } + + private static String quotaFailureMessage(QuotaFailure quotaFailure) { + String message = "Quota Failure"; + if (quotaFailure.getViolationsCount() > 0) { + message += ":"; + } + for (QuotaFailure.Violation violation : quotaFailure.getViolationsList()) { + message += "\n " + violation.getSubject() + ": " + violation.getDescription(); + } + return message; + } + + private static String preconditionFailureMessage(PreconditionFailure preconditionFailure) { + String message = "Precondition Failure"; + if (preconditionFailure.getViolationsCount() > 0) { + message += ":"; + } + for (PreconditionFailure.Violation violation : preconditionFailure.getViolationsList()) { + message += + "\n (" + + violation.getType() + + ") " + + violation.getSubject() + + ": " + + violation.getDescription(); + } + return message; + } + + private static String badRequestMessage(BadRequest badRequest) { + String message = "Bad Request"; + if (badRequest.getFieldViolationsCount() > 0) { + message += ":"; + } + for (BadRequest.FieldViolation fieldViolation : badRequest.getFieldViolationsList()) { + message += "\n " + fieldViolation.getField() + ": " + fieldViolation.getDescription(); + } + return message; + } + + private static String requestInfoMessage(RequestInfo requestInfo) { + return "Request Info: " + requestInfo.getRequestId() + " => " + requestInfo.getServingData(); + } + + private static String resourceInfoMessage(ResourceInfo resourceInfo) { + String message = + "Resource Info: " + + resourceInfo.getResourceType() + + ": name='" + + resourceInfo.getResourceName() + + "', owner='" + + resourceInfo.getOwner() + + "'"; + if (!resourceInfo.getDescription().isEmpty()) { + message += ", description: " + resourceInfo.getDescription(); + } + return message; + } + + private static String helpMessage(Help help) { + String message = "Help"; + if (help.getLinksCount() > 0) { + message += ":"; + } + for (Help.Link link : help.getLinksList()) { + message += "\n " + link.getDescription() + ": " + link.getUrl(); + } + return message; + } + + private static String errorDetailMessage(Any detail) throws InvalidProtocolBufferException { + if (detail.is(RetryInfo.class)) { + return retryInfoMessage(detail.unpack(RetryInfo.class)); + } + if (detail.is(DebugInfo.class)) { + return debugInfoMessage(detail.unpack(DebugInfo.class)); + } + if (detail.is(QuotaFailure.class)) { + return quotaFailureMessage(detail.unpack(QuotaFailure.class)); + } + if (detail.is(PreconditionFailure.class)) { + return preconditionFailureMessage(detail.unpack(PreconditionFailure.class)); + } + if (detail.is(BadRequest.class)) { + return badRequestMessage(detail.unpack(BadRequest.class)); + } + if (detail.is(RequestInfo.class)) { + return requestInfoMessage(detail.unpack(RequestInfo.class)); + } + if (detail.is(ResourceInfo.class)) { + return resourceInfoMessage(detail.unpack(ResourceInfo.class)); + } + if (detail.is(Help.class)) { + return helpMessage(detail.unpack(Help.class)); + } + return "Unrecognized error detail: " + detail; + } + + private static String localizedStatusMessage(Status status) + throws InvalidProtocolBufferException { + String languageTag = Locale.getDefault().toLanguageTag(); + for (Any detail : status.getDetailsList()) { + if (detail.is(LocalizedMessage.class)) { + LocalizedMessage message = detail.unpack(LocalizedMessage.class); + if (message.getLocale().equals(languageTag)) { + return message.getMessage(); + } + } + } + return status.getMessage(); + } + + private static String executionStatusExceptionErrorMessage(ExecutionStatusException e) + throws InvalidProtocolBufferException { + Status status = e.getOriginalStatus(); + return statusName(status.getCode()) + + ": " + + localizedStatusMessage(status) + + "\n" + + errorDetailsMessage(status.getDetailsList()); + } + public static String grpcAwareErrorMessage(IOException e) { io.grpc.Status errStatus = io.grpc.Status.fromThrowable(e); + if (e.getCause() instanceof ExecutionStatusException) { + try { + return "Remote Execution Failure:\n" + + executionStatusExceptionErrorMessage((ExecutionStatusException) e.getCause()); + } catch (InvalidProtocolBufferException protoEx) { + return "Error occurred attempting to format an error message for " + + errStatus + + ": " + + Throwables.getStackTraceAsString(protoEx); + } + } if (!errStatus.getCode().equals(io.grpc.Status.UNKNOWN.getCode())) { // If the error originated in the gRPC library then display it as "STATUS: error message" // to the user diff --git a/src/test/java/com/google/devtools/build/lib/remote/ByteStreamUploaderTest.java b/src/test/java/com/google/devtools/build/lib/remote/ByteStreamUploaderTest.java index 247ee8a7e36f4d..eec073a50a197b 100644 --- a/src/test/java/com/google/devtools/build/lib/remote/ByteStreamUploaderTest.java +++ b/src/test/java/com/google/devtools/build/lib/remote/ByteStreamUploaderTest.java @@ -17,6 +17,7 @@ import static java.nio.charset.StandardCharsets.UTF_8; import static org.junit.Assert.assertThrows; import static org.junit.Assert.fail; +import static org.mockito.ArgumentMatchers.any; import build.bazel.remote.execution.v2.Digest; import build.bazel.remote.execution.v2.RequestMetadata; @@ -330,7 +331,7 @@ public void queryWriteStatus( uploader.uploadBlob(hash, chunker, true); // This test should not have triggered any retries. - Mockito.verify(mockBackoff, Mockito.never()).nextDelayMillis(); + Mockito.verify(mockBackoff, Mockito.never()).nextDelayMillis(any(Exception.class)); Mockito.verify(mockBackoff, Mockito.times(1)).getRetryAttempts(); blockUntilInternalStateConsistent(uploader); @@ -469,7 +470,7 @@ public void queryWriteStatus( // This test should have triggered a single retry, because it made // no progress. - Mockito.verify(mockBackoff, Mockito.times(1)).nextDelayMillis(); + Mockito.verify(mockBackoff, Mockito.times(1)).nextDelayMillis(any(Exception.class)); blockUntilInternalStateConsistent(uploader); @@ -1402,7 +1403,7 @@ public FixedBackoff(int maxRetries, int delayMillis) { } @Override - public long nextDelayMillis() { + public long nextDelayMillis(Exception e) { if (retries < maxRetries) { retries++; return delayMillis; diff --git a/src/test/java/com/google/devtools/build/lib/remote/GrpcCacheClientTest.java b/src/test/java/com/google/devtools/build/lib/remote/GrpcCacheClientTest.java index 7f12e92a592e29..aadb0eeb4eed43 100644 --- a/src/test/java/com/google/devtools/build/lib/remote/GrpcCacheClientTest.java +++ b/src/test/java/com/google/devtools/build/lib/remote/GrpcCacheClientTest.java @@ -1083,13 +1083,13 @@ public void read(ReadRequest request, StreamObserver responseObser } }); assertThat(new String(downloadBlob(client, digest), UTF_8)).isEqualTo("abcdefg"); - Mockito.verify(mockBackoff, Mockito.never()).nextDelayMillis(); + Mockito.verify(mockBackoff, Mockito.never()).nextDelayMillis(any(Exception.class)); } @Test public void downloadBlobPassesThroughDeadlineExceededWithoutProgress() throws IOException { Backoff mockBackoff = Mockito.mock(Backoff.class); - Mockito.when(mockBackoff.nextDelayMillis()).thenReturn(-1L); + Mockito.when(mockBackoff.nextDelayMillis(any(Exception.class))).thenReturn(-1L); final GrpcCacheClient client = newClient(Options.getDefaults(RemoteOptions.class), () -> mockBackoff); final Digest digest = DIGEST_UTIL.computeAsUtf8("abcdefg"); @@ -1109,7 +1109,7 @@ public void read(ReadRequest request, StreamObserver responseObser IOException e = assertThrows(IOException.class, () -> downloadBlob(client, digest)); Status st = Status.fromThrowable(e); assertThat(st.getCode()).isEqualTo(Status.Code.DEADLINE_EXCEEDED); - Mockito.verify(mockBackoff, Mockito.times(1)).nextDelayMillis(); + Mockito.verify(mockBackoff, Mockito.times(1)).nextDelayMillis(any(Exception.class)); } @Test diff --git a/src/test/java/com/google/devtools/build/lib/remote/RemoteRetrierTest.java b/src/test/java/com/google/devtools/build/lib/remote/RemoteRetrierTest.java index 25f70d127ba018..75f8a8317c9ffe 100644 --- a/src/test/java/com/google/devtools/build/lib/remote/RemoteRetrierTest.java +++ b/src/test/java/com/google/devtools/build/lib/remote/RemoteRetrierTest.java @@ -67,28 +67,30 @@ public void tearDown() throws InterruptedException { @Test public void testExponentialBackoff() throws Exception { + Exception e = new Exception(); Retrier.Backoff backoff = new ExponentialBackoff(Duration.ofSeconds(1), Duration.ofSeconds(10), 2, 0, 6); - assertThat(backoff.nextDelayMillis()).isEqualTo(1000); - assertThat(backoff.nextDelayMillis()).isEqualTo(2000); - assertThat(backoff.nextDelayMillis()).isEqualTo(4000); - assertThat(backoff.nextDelayMillis()).isEqualTo(8000); - assertThat(backoff.nextDelayMillis()).isEqualTo(10000); - assertThat(backoff.nextDelayMillis()).isEqualTo(10000); - assertThat(backoff.nextDelayMillis()).isLessThan(0L); + assertThat(backoff.nextDelayMillis(e)).isEqualTo(1000); + assertThat(backoff.nextDelayMillis(e)).isEqualTo(2000); + assertThat(backoff.nextDelayMillis(e)).isEqualTo(4000); + assertThat(backoff.nextDelayMillis(e)).isEqualTo(8000); + assertThat(backoff.nextDelayMillis(e)).isEqualTo(10000); + assertThat(backoff.nextDelayMillis(e)).isEqualTo(10000); + assertThat(backoff.nextDelayMillis(e)).isLessThan(0L); } @Test public void testExponentialBackoffJittered() throws Exception { + Exception e = new Exception(); Retrier.Backoff backoff = new ExponentialBackoff(Duration.ofSeconds(1), Duration.ofSeconds(10), 2, 0.1, 6); - assertThat(backoff.nextDelayMillis()).isIn(Range.closedOpen(900L, 1100L)); - assertThat(backoff.nextDelayMillis()).isIn(Range.closedOpen(1800L, 2200L)); - assertThat(backoff.nextDelayMillis()).isIn(Range.closedOpen(3600L, 4400L)); - assertThat(backoff.nextDelayMillis()).isIn(Range.closedOpen(7200L, 8800L)); - assertThat(backoff.nextDelayMillis()).isIn(Range.closedOpen(9000L, 11000L)); - assertThat(backoff.nextDelayMillis()).isIn(Range.closedOpen(9000L, 11000L)); - assertThat(backoff.nextDelayMillis()).isLessThan(0L); + assertThat(backoff.nextDelayMillis(e)).isIn(Range.closedOpen(900L, 1100L)); + assertThat(backoff.nextDelayMillis(e)).isIn(Range.closedOpen(1800L, 2200L)); + assertThat(backoff.nextDelayMillis(e)).isIn(Range.closedOpen(3600L, 4400L)); + assertThat(backoff.nextDelayMillis(e)).isIn(Range.closedOpen(7200L, 8800L)); + assertThat(backoff.nextDelayMillis(e)).isIn(Range.closedOpen(9000L, 11000L)); + assertThat(backoff.nextDelayMillis(e)).isIn(Range.closedOpen(9000L, 11000L)); + assertThat(backoff.nextDelayMillis(e)).isLessThan(0L); } @Test diff --git a/src/test/java/com/google/devtools/build/lib/remote/RemoteSpawnRunnerTest.java b/src/test/java/com/google/devtools/build/lib/remote/RemoteSpawnRunnerTest.java index bf7aa76ae0d542..f164bb65b6cadf 100644 --- a/src/test/java/com/google/devtools/build/lib/remote/RemoteSpawnRunnerTest.java +++ b/src/test/java/com/google/devtools/build/lib/remote/RemoteSpawnRunnerTest.java @@ -793,7 +793,7 @@ public void testExitCode_executorfailure() throws Exception { SpawnResult result = runner.exec(spawn, policy); assertThat(result.exitCode()).isEqualTo(ExitCode.REMOTE_ERROR.getNumericExitCode()); - assertThat(result.getDetailMessage("", false, false)).contains("reasons"); + assertThat(result.getFailureMessage()).contains("reasons"); } @Test @@ -813,7 +813,7 @@ public void testExitCode_executionfailure() throws Exception { SpawnResult result = runner.exec(spawn, policy); assertThat(result.exitCode()).isEqualTo(ExitCode.REMOTE_ERROR.getNumericExitCode()); - assertThat(result.getDetailMessage("", false, false)).contains("reasons"); + assertThat(result.getFailureMessage()).contains("reasons"); } @Test