From 0fb6f73c63071db27be91edf7735f097d2172011 Mon Sep 17 00:00:00 2001 From: Ivan Senic Date: Wed, 29 Mar 2023 14:34:35 +0200 Subject: [PATCH] closes #261: grpc retries and timeouts to fix availability failures --- .../retries/impl/JsonApiGrpcRetryPolicy.java | 54 +++++++++++ src/main/resources/application.yaml | 13 +++ .../impl/JsonApiGrpcRetryPolicyTest.java | 89 +++++++++++++++++++ 3 files changed, 156 insertions(+) create mode 100644 src/main/java/io/stargate/sgv2/jsonapi/grpc/retries/impl/JsonApiGrpcRetryPolicy.java create mode 100644 src/test/java/io/stargate/sgv2/jsonapi/grpc/retries/impl/JsonApiGrpcRetryPolicyTest.java diff --git a/src/main/java/io/stargate/sgv2/jsonapi/grpc/retries/impl/JsonApiGrpcRetryPolicy.java b/src/main/java/io/stargate/sgv2/jsonapi/grpc/retries/impl/JsonApiGrpcRetryPolicy.java new file mode 100644 index 0000000000..a27a5c1862 --- /dev/null +++ b/src/main/java/io/stargate/sgv2/jsonapi/grpc/retries/impl/JsonApiGrpcRetryPolicy.java @@ -0,0 +1,54 @@ +package io.stargate.sgv2.jsonapi.grpc.retries.impl; + +import io.grpc.Metadata; +import io.grpc.Status; +import io.grpc.StatusRuntimeException; +import io.grpc.protobuf.ProtoUtils; +import io.stargate.bridge.proto.QueryOuterClass; +import java.util.Objects; +import java.util.function.Predicate; +import javax.enterprise.context.ApplicationScoped; + +/** Default gRPC retry policy used in the project. */ +@ApplicationScoped +// TODO correct type here +public class JsonApiGrpcRetryPolicy implements Predicate { + + private static final Metadata.Key WRITE_TIMEOUT_KEY = + ProtoUtils.keyForProto(QueryOuterClass.WriteTimeout.getDefaultInstance()); + + private static final Metadata.Key READ_TIMEOUT_KEY = + ProtoUtils.keyForProto(QueryOuterClass.ReadTimeout.getDefaultInstance()); + + /** {@inheritDoc} */ + @Override + public boolean test(StatusRuntimeException e) { + Status status = e.getStatus(); + Status.Code code = status.getCode(); + + // always retry unavailable + if (Objects.equals(code, Status.Code.UNAVAILABLE)) { + return true; + } + + // for timeouts, retry only server side timeouts + if (Objects.equals(code, Status.Code.DEADLINE_EXCEEDED)) { + return isValidServerSideTimeout(e.getTrailers()); + } + + // nothing else + return false; + } + + // ensure we retry only server side timeouts we want + private boolean isValidServerSideTimeout(Metadata trailers) { + // if we have trailers + if (null != trailers) { + // TODO double check the CAS write timeout retries are fine + return trailers.containsKey(READ_TIMEOUT_KEY) || trailers.containsKey(WRITE_TIMEOUT_KEY); + } + + // otherwise not + return false; + } +} diff --git a/src/main/resources/application.yaml b/src/main/resources/application.yaml index 89ed437136..fa42db57f4 100644 --- a/src/main/resources/application.yaml +++ b/src/main/resources/application.yaml @@ -7,6 +7,19 @@ stargate: exception-mappers: enabled: false + # custom grpc settings + grpc: + + # default client timeout 2x from max server side timeout + # see https://docs.datastax.com/en/dse/6.8/dse-dev/datastax_enterprise/config/configCassandra_yaml.html#Networktimeoutsettings + call-deadline: PT20S + + # retries use custom policy, see io.stargate.sgv2.jsonapi.grpc.retries.impl.JsonApiGrpcRetryPolicy + retires: + enabled: true + max-attempts: 1 + policy: custom + # metrics properties # see io.stargate.sgv2.api.common.config.MetricsConfig for all config properties and options metrics: diff --git a/src/test/java/io/stargate/sgv2/jsonapi/grpc/retries/impl/JsonApiGrpcRetryPolicyTest.java b/src/test/java/io/stargate/sgv2/jsonapi/grpc/retries/impl/JsonApiGrpcRetryPolicyTest.java new file mode 100644 index 0000000000..663818ad68 --- /dev/null +++ b/src/test/java/io/stargate/sgv2/jsonapi/grpc/retries/impl/JsonApiGrpcRetryPolicyTest.java @@ -0,0 +1,89 @@ +package io.stargate.sgv2.jsonapi.grpc.retries.impl; + +import static org.assertj.core.api.Assertions.assertThat; + +import io.grpc.Metadata; +import io.grpc.Status; +import io.grpc.StatusRuntimeException; +import io.grpc.protobuf.ProtoUtils; +import io.stargate.bridge.proto.QueryOuterClass; +import org.junit.jupiter.api.Nested; +import org.junit.jupiter.api.Test; + +class JsonApiGrpcRetryPolicyTest { + + JsonApiGrpcRetryPolicy policy = new JsonApiGrpcRetryPolicy(); + + @Nested + class PredicateTest { + + @Test + public void unavailable() { + StatusRuntimeException e = new StatusRuntimeException(Status.UNAVAILABLE); + + boolean result = policy.test(e); + + assertThat(result).isTrue(); + } + + @Test + public void deadlineWithReadTimeout() { + Metadata.Key key = + ProtoUtils.keyForProto(QueryOuterClass.ReadTimeout.getDefaultInstance()); + QueryOuterClass.ReadTimeout value = QueryOuterClass.ReadTimeout.newBuilder().build(); + Metadata metadata = new Metadata(); + metadata.put(key, value); + StatusRuntimeException e = new StatusRuntimeException(Status.DEADLINE_EXCEEDED, metadata); + + boolean result = policy.test(e); + + assertThat(result).isTrue(); + } + + @Test + public void deadlineWithWriteTimeout() { + Metadata.Key key = + ProtoUtils.keyForProto(QueryOuterClass.WriteTimeout.getDefaultInstance()); + QueryOuterClass.WriteTimeout value = QueryOuterClass.WriteTimeout.newBuilder().build(); + Metadata metadata = new Metadata(); + metadata.put(key, value); + StatusRuntimeException e = new StatusRuntimeException(Status.DEADLINE_EXCEEDED, metadata); + + boolean result = policy.test(e); + + assertThat(result).isTrue(); + } + + @Test + public void deadlineWithWrongTrailer() { + Metadata.Key key = + ProtoUtils.keyForProto(QueryOuterClass.Unavailable.getDefaultInstance()); + QueryOuterClass.Unavailable value = QueryOuterClass.Unavailable.newBuilder().build(); + Metadata metadata = new Metadata(); + metadata.put(key, value); + StatusRuntimeException e = new StatusRuntimeException(Status.DEADLINE_EXCEEDED, metadata); + + boolean result = policy.test(e); + + assertThat(result).isFalse(); + } + + @Test + public void deadlineWithoutTrailer() { + StatusRuntimeException e = new StatusRuntimeException(Status.DEADLINE_EXCEEDED); + + boolean result = policy.test(e); + + assertThat(result).isFalse(); + } + + @Test + public void ignoredStatusCode() { + StatusRuntimeException e = new StatusRuntimeException(Status.INTERNAL); + + boolean result = policy.test(e); + + assertThat(result).isFalse(); + } + } +}