From 58f1c5528487d731673c2bbc042b7d97c80759ea Mon Sep 17 00:00:00 2001 From: Aaron Gable Date: Thu, 16 Feb 2023 14:17:09 -0800 Subject: [PATCH] Allow BoulderErrors to be interpreted as grpc.Statuses (#6654) Add the GRPCStatus method to our BoulderError type, so that the gRPC server code can automatically set an appropriate Status on all gRPC responses, based on the kind of error that we return. We still serialize the whole BoulderError type and details into the response metadata, so that it can be rehydrated on the client side, but this allows the gRPC-native Status to be something other than Unknown. As part of this change, have our custom error serialization code stop manually setting the gRPC status code to codes.Unknown. This change allows the default gRPC prometheus metrics to more accurately report the kinds of errors our gRPC requests experience, and may allow us to more elegantly transition to using grpc.Status errors in other places where they're relevant and useful. --- errors/errors.go | 50 +++++++++++++++++++++++++++++++++++++++ grpc/errors.go | 24 +++++++++---------- grpc/interceptors_test.go | 5 ++-- 3 files changed, 64 insertions(+), 15 deletions(-) diff --git a/errors/errors.go b/errors/errors.go index 50be1087a09..6eaf4c331fa 100644 --- a/errors/errors.go +++ b/errors/errors.go @@ -15,6 +15,8 @@ import ( "time" "github.com/letsencrypt/boulder/identifier" + "google.golang.org/grpc/codes" + "google.golang.org/grpc/status" ) // ErrorType provides a coarse category for BoulderErrors. @@ -78,6 +80,54 @@ func (be *BoulderError) Unwrap() error { return be.Type } +// GRPCStatus implements the interface implicitly defined by gRPC's +// status.FromError, which uses this function to detect if the error produced +// by the gRPC server implementation code is a gRPC status.Status. Implementing +// this means that BoulderErrors serialized in gRPC response metadata can be +// accompanied by a gRPC status other than "UNKNOWN". +func (be *BoulderError) GRPCStatus() *status.Status { + var c codes.Code + switch be.Type { + case InternalServer: + c = codes.Internal + case Malformed: + c = codes.InvalidArgument + case Unauthorized: + c = codes.PermissionDenied + case NotFound: + c = codes.NotFound + case RateLimit: + c = codes.ResourceExhausted + case RejectedIdentifier: + c = codes.InvalidArgument + case InvalidEmail: + c = codes.InvalidArgument + case ConnectionFailure: + c = codes.Unavailable + case CAA: + c = codes.FailedPrecondition + case MissingSCTs: + c = codes.Internal + case Duplicate: + c = codes.AlreadyExists + case OrderNotReady: + c = codes.FailedPrecondition + case DNS: + c = codes.Unknown + case BadPublicKey: + c = codes.InvalidArgument + case BadCSR: + c = codes.InvalidArgument + case AlreadyRevoked: + c = codes.AlreadyExists + case BadRevocationReason: + c = codes.InvalidArgument + default: + c = codes.Unknown + } + return status.New(c, be.Error()) +} + // WithSubErrors returns a new BoulderError instance created by adding the // provided subErrs to the existing BoulderError. func (be *BoulderError) WithSubErrors(subErrs []SubBoulderError) *BoulderError { diff --git a/grpc/errors.go b/grpc/errors.go index 9515d58f3c1..7a06b9fcf68 100644 --- a/grpc/errors.go +++ b/grpc/errors.go @@ -9,7 +9,6 @@ import ( "time" "google.golang.org/grpc" - "google.golang.org/grpc/codes" "google.golang.org/grpc/metadata" "google.golang.org/grpc/status" @@ -21,12 +20,13 @@ import ( // context. errors.BoulderError error types are encoded using the grpc/metadata // in the context.Context for the RPC which is considered to be the 'proper' // method of encoding custom error types (grpc/grpc#4543 and grpc/grpc-go#478) -func wrapError(ctx context.Context, err error) error { - if err == nil { +func wrapError(ctx context.Context, appErr error) error { + if appErr == nil { return nil } + var berr *berrors.BoulderError - if errors.As(err, &berr) { + if errors.As(appErr, &berr) { pairs := []string{ "errortype", strconv.Itoa(int(berr.Type)), } @@ -39,8 +39,7 @@ func wrapError(ctx context.Context, err error) error { jsonSubErrs, err := json.Marshal(berr.SubErrors) if err != nil { return berrors.InternalServerError( - "error marshaling json SubErrors, orig error %q", - err) + "error marshaling json SubErrors, orig error %q", err) } pairs = append(pairs, "suberrors", string(jsonSubErrs)) } @@ -51,13 +50,14 @@ func wrapError(ctx context.Context, err error) error { pairs = append(pairs, "retryafter", berr.RetryAfter.String()) } - // Ignoring the error return here is safe because if setting the metadata - // fails, we'll still return an error, but it will be interpreted on the - // other side as an InternalServerError instead of a more specific one. - _ = grpc.SetTrailer(ctx, metadata.Pairs(pairs...)) - return status.Errorf(codes.Unknown, err.Error()) + err := grpc.SetTrailer(ctx, metadata.Pairs(pairs...)) + if err != nil { + return berrors.InternalServerError( + "error setting gRPC error metadata, orig error %q", appErr) + } } - return status.Errorf(codes.Unknown, err.Error()) + + return appErr } // unwrapError unwraps errors returned from gRPC client calls which were wrapped diff --git a/grpc/interceptors_test.go b/grpc/interceptors_test.go index 478fd5468a0..dfe6bdaaa69 100644 --- a/grpc/interceptors_test.go +++ b/grpc/interceptors_test.go @@ -18,7 +18,6 @@ import ( "github.com/prometheus/client_golang/prometheus" "google.golang.org/grpc" "google.golang.org/grpc/balancer/roundrobin" - "google.golang.org/grpc/codes" "google.golang.org/grpc/credentials" "google.golang.org/grpc/credentials/insecure" "google.golang.org/grpc/metadata" @@ -136,7 +135,7 @@ func (s *testServer) Chill(ctx context.Context, in *test_proto.Time) (*test_prot spent := int64(time.Since(start) / time.Nanosecond) return &test_proto.Time{Time: spent}, nil case <-ctx.Done(): - return nil, status.Errorf(codes.DeadlineExceeded, "the chiller overslept") + return nil, errors.New("unique error indicating that the server's shortened context timed itself out") } } @@ -182,7 +181,7 @@ func TestTimeouts(t *testing.T) { timeout time.Duration expectedErrorPrefix string }{ - {250 * time.Millisecond, "rpc error: code = Unknown desc = rpc error: code = DeadlineExceeded desc = the chiller overslept"}, + {250 * time.Millisecond, "rpc error: code = Unknown desc = unique error indicating that the server's shortened context timed itself out"}, {100 * time.Millisecond, "Chiller.Chill timed out after 0 ms"}, {10 * time.Millisecond, "Chiller.Chill timed out after 0 ms"}, }