Skip to content

Commit

Permalink
closes #261: grpc retries and timeouts to fix availability failures
Browse files Browse the repository at this point in the history
  • Loading branch information
Ivan Senic committed Apr 4, 2023
1 parent e2f8210 commit c8a65c7
Show file tree
Hide file tree
Showing 3 changed files with 156 additions and 0 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
package io.stargate.sgv2.jsonapi.grpc.retries.impl;

import io.grpc.Metadata;
import io.grpc.Status;
import io.grpc.StatusRuntimeException;
import io.grpc.protobuf.ProtoUtils;
import io.stargate.bridge.proto.QueryOuterClass;
import java.util.Objects;
import java.util.function.Predicate;
import javax.enterprise.context.ApplicationScoped;

/** Default gRPC retry policy used in the project. */
@ApplicationScoped
// TODO correct type here
public class JsonApiGrpcRetryPolicy implements Predicate<StatusRuntimeException> {

private static final Metadata.Key<QueryOuterClass.WriteTimeout> WRITE_TIMEOUT_KEY =
ProtoUtils.keyForProto(QueryOuterClass.WriteTimeout.getDefaultInstance());

private static final Metadata.Key<QueryOuterClass.ReadTimeout> READ_TIMEOUT_KEY =
ProtoUtils.keyForProto(QueryOuterClass.ReadTimeout.getDefaultInstance());

/** {@inheritDoc} */
@Override
public boolean test(StatusRuntimeException e) {
Status status = e.getStatus();
Status.Code code = status.getCode();

// always retry unavailable
if (Objects.equals(code, Status.Code.UNAVAILABLE)) {
return true;
}

// for timeouts, retry only server side timeouts
if (Objects.equals(code, Status.Code.DEADLINE_EXCEEDED)) {
return isValidServerSideTimeout(e.getTrailers());
}

// nothing else
return false;
}

// ensure we retry only server side timeouts we want
private boolean isValidServerSideTimeout(Metadata trailers) {
// if we have trailers
if (null != trailers) {
// TODO double check the CAS write timeout retries are fine
return trailers.containsKey(READ_TIMEOUT_KEY) || trailers.containsKey(WRITE_TIMEOUT_KEY);
}

// otherwise not
return false;
}
}
13 changes: 13 additions & 0 deletions src/main/resources/application.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,19 @@ stargate:
exception-mappers:
enabled: false

# custom grpc settings
grpc:

# default client timeout 2x from max server side timeout
# see https://docs.datastax.com/en/dse/6.8/dse-dev/datastax_enterprise/config/configCassandra_yaml.html#Networktimeoutsettings
call-deadline: PT20S

# retries use custom policy, see io.stargate.sgv2.jsonapi.grpc.retries.impl.JsonApiGrpcRetryPolicy
retires:
enabled: true
max-attempts: 1
policy: custom

# metrics properties
# see io.stargate.sgv2.api.common.config.MetricsConfig for all config properties and options
metrics:
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
package io.stargate.sgv2.jsonapi.grpc.retries.impl;

import static org.assertj.core.api.Assertions.assertThat;

import io.grpc.Metadata;
import io.grpc.Status;
import io.grpc.StatusRuntimeException;
import io.grpc.protobuf.ProtoUtils;
import io.stargate.bridge.proto.QueryOuterClass;
import org.junit.jupiter.api.Nested;
import org.junit.jupiter.api.Test;

class JsonApiGrpcRetryPolicyTest {

JsonApiGrpcRetryPolicy policy = new JsonApiGrpcRetryPolicy();

@Nested
class PredicateTest {

@Test
public void unavailable() {
StatusRuntimeException e = new StatusRuntimeException(Status.UNAVAILABLE);

boolean result = policy.test(e);

assertThat(result).isTrue();
}

@Test
public void deadlineWithReadTimeout() {
Metadata.Key<QueryOuterClass.ReadTimeout> key =
ProtoUtils.keyForProto(QueryOuterClass.ReadTimeout.getDefaultInstance());
QueryOuterClass.ReadTimeout value = QueryOuterClass.ReadTimeout.newBuilder().build();
Metadata metadata = new Metadata();
metadata.put(key, value);
StatusRuntimeException e = new StatusRuntimeException(Status.DEADLINE_EXCEEDED, metadata);

boolean result = policy.test(e);

assertThat(result).isTrue();
}

@Test
public void deadlineWithWriteTimeout() {
Metadata.Key<QueryOuterClass.WriteTimeout> key =
ProtoUtils.keyForProto(QueryOuterClass.WriteTimeout.getDefaultInstance());
QueryOuterClass.WriteTimeout value = QueryOuterClass.WriteTimeout.newBuilder().build();
Metadata metadata = new Metadata();
metadata.put(key, value);
StatusRuntimeException e = new StatusRuntimeException(Status.DEADLINE_EXCEEDED, metadata);

boolean result = policy.test(e);

assertThat(result).isTrue();
}

@Test
public void deadlineWithWrongTrailer() {
Metadata.Key<QueryOuterClass.Unavailable> key =
ProtoUtils.keyForProto(QueryOuterClass.Unavailable.getDefaultInstance());
QueryOuterClass.Unavailable value = QueryOuterClass.Unavailable.newBuilder().build();
Metadata metadata = new Metadata();
metadata.put(key, value);
StatusRuntimeException e = new StatusRuntimeException(Status.DEADLINE_EXCEEDED, metadata);

boolean result = policy.test(e);

assertThat(result).isFalse();
}

@Test
public void deadlineWithoutTrailer() {
StatusRuntimeException e = new StatusRuntimeException(Status.DEADLINE_EXCEEDED);

boolean result = policy.test(e);

assertThat(result).isFalse();
}

@Test
public void ignoredStatusCode() {
StatusRuntimeException e = new StatusRuntimeException(Status.INTERNAL);

boolean result = policy.test(e);

assertThat(result).isFalse();
}
}
}

0 comments on commit c8a65c7

Please sign in to comment.