Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Parse errormsgs in retryable status codes #5541

Merged
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
Show all changes
15 commits
Select commit Hold shift + click to select a range
170861d
parse-errormsgs-in-retry-able-statuscodes: In case of retyable scenar…
pree-dew Jun 24, 2024
6c740ac
parse-errormsgs-in-retry-able-statuscodes: add change log detail and …
pree-dew Jun 24, 2024
17cc1e8
parse-errormsgs-in-retry-able-statuscodes: 1. Add capability to pass …
pree-dew Jun 26, 2024
a2c49e6
parse-errormsgs-in-retry-able-statuscodes: fix lint issues
pree-dew Jun 28, 2024
abe63e9
parse-errormsgs-in-retry-able-statuscodes: 1. Include msg parsing fro…
pree-dew Jul 4, 2024
7884731
parse-errormsgs-in-retry-able-statuscodes: change errors.New to fmt.E…
pree-dew Jul 4, 2024
2709f7c
parse-errormsgs-in-retry-able-statuscodes: 1. Correct changlog and co…
pree-dew Jul 4, 2024
ae806df
parse-errormsgs-in-retry-able-statuscodes: resolve merge conflicts
pree-dew Jul 8, 2024
2bfb551
parse-errormsgs-in-retry-able-statuscodes: sync with main
pree-dew Jul 11, 2024
d1a5da6
parse-errormsgs-in-retry-able-statuscodes: 1. Provide Unwrap and As m…
pree-dew Jul 11, 2024
999b4d5
parse-errormsgs-in-retry-able-statuscodes: 1. Don't pre-render the er…
pree-dew Jul 11, 2024
0e2dd7c
parse-errormsgs-in-retry-able-statuscodes: 1. Use better assert funct…
pree-dew Jul 12, 2024
75af2ca
parse-errormsgs-in-retry-able-statuscodes: cover unwrap and As functi…
pree-dew Jul 12, 2024
ed9f12e
parse-errormsgs-in-retry-able-statuscodes: sync with main and resolve…
pree-dew Jul 17, 2024
cd4a639
Merge branch 'main' into parse-errormsgs-in-retry-able-statuscodes
dmathieu Jul 18, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.htm
- Fix panic in baggage creation when a member contains 0x80 char in key or value. (#5494)
- Correct comments for the priority of the `WithEndpoint` and `WithEndpointURL` options and their coresponding environment variables in in `go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc`. (#5508)
- Fix stale timestamps reported by the lastvalue aggregation. (#5517)
- Pass the underlying error rather than a generic retry-able failure in `go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetrichttp`, `go.opentelemetry.io/otel/exporters/otlp/otlplog/otlploghttp` and `go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp`. (#5541)
MrAlias marked this conversation as resolved.
Show resolved Hide resolved

## [1.27.0/0.49.0/0.3.0] 2024-05-21

Expand Down
42 changes: 36 additions & 6 deletions exporters/otlp/otlplog/otlploghttp/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,11 @@ import (
"net/http"
"net/url"
"strconv"
"strings"
"sync"
"time"

"google.golang.org/grpc/status"
"google.golang.org/protobuf/proto"

"go.opentelemetry.io/otel"
Expand Down Expand Up @@ -143,7 +145,7 @@ func (c *httpClient) uploadLogs(ctx context.Context, data []*logpb.ResourceLogs)
resp, err := c.client.Do(request.Request)
var urlErr *url.Error
if errors.As(err, &urlErr) && urlErr.Temporary() {
return newResponseError(http.Header{})
return newResponseError(http.Header{}, err.Error())
MrAlias marked this conversation as resolved.
Show resolved Hide resolved
}
if err != nil {
return err
Expand Down Expand Up @@ -184,13 +186,26 @@ func (c *httpClient) uploadLogs(ctx context.Context, data []*logpb.ResourceLogs)
sc == http.StatusServiceUnavailable,
sc == http.StatusGatewayTimeout:
// Retry-able failure.
rErr = newResponseError(resp.Header)
rErr = newResponseError(resp.Header, "")
MrAlias marked this conversation as resolved.
Show resolved Hide resolved

// Going to retry, drain the body to reuse the connection.
if _, err := io.Copy(io.Discard, resp.Body); err != nil {
// server may return a message with the response
// body, so we read it to include in the error
// message to be returned. It will help in
// debugging the actual issue.
var respData bytes.Buffer
if _, err := io.Copy(&respData, resp.Body); err != nil {
_ = resp.Body.Close()
return err
}

// overwrite the error message with the response body
// if it is not empty
respStr := strings.TrimSpace(respData.String())
if respStr != "" {
// pass the error message along with retry-able error,
// so that it can be retried and also passes the message
rErr = newResponseError(resp.Header, respStr)
}
MrAlias marked this conversation as resolved.
Show resolved Hide resolved
default:
rErr = fmt.Errorf("failed to send logs to %s: %s", request.URL, resp.Status)
}
Expand Down Expand Up @@ -264,23 +279,38 @@ func (r *request) reset(ctx context.Context) {
}

// retryableError represents a request failure that can be retried.
//
// If the `errMsg` attribute is not empty, it will be used as the error message.
type retryableError struct {
throttle int64
errMsg string
MrAlias marked this conversation as resolved.
Show resolved Hide resolved
}

// newResponseError returns a retryableError and will extract any explicit
// throttle delay contained in headers.
func newResponseError(header http.Header) error {
// throttle delay contained in headers and if there is message in the response
// body, it will be used as the error message.
func newResponseError(header http.Header, body string) error {
var rErr retryableError
if v := header.Get("Retry-After"); v != "" {
if t, err := strconv.ParseInt(v, 10, 64); err == nil {
rErr.throttle = t
}
}

rErr.errMsg = body
// Extract the error message from the response body.
if st, ok := status.FromError(fmt.Errorf(body)); ok {
MrAlias marked this conversation as resolved.
Show resolved Hide resolved
rErr.errMsg = fmt.Sprintf("rpc error: code = %s desc = %s", st.Code(), st.Message())
}

return rErr
}

func (e retryableError) Error() string {
if e.errMsg != "" {
return e.errMsg
}

return "retry-able request failure"
}

Expand Down
19 changes: 19 additions & 0 deletions exporters/otlp/otlplog/otlploghttp/client_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -700,6 +700,25 @@ func TestConfig(t *testing.T) {
assert.Len(t, rCh, 0, "failed HTTP responses did not occur")
})

t.Run("WithRetryAndExporterErr", func(t *testing.T) {
exporterErr := errors.New("rpc error: code = Unavailable desc = service.name not found in resource attributes")
rCh := make(chan exportResult, 1)
rCh <- exportResult{Err: &httpResponseError{
Status: http.StatusTooManyRequests,
Err: exporterErr,
}}
exp, coll := factoryFunc("", rCh, WithRetry(RetryConfig{
Enabled: false,
}))
ctx := context.Background()
t.Cleanup(func() { require.NoError(t, coll.Shutdown(ctx)) })
// Push this after Shutdown so the HTTP server doesn't hang.
t.Cleanup(func() { close(rCh) })
t.Cleanup(func() { require.NoError(t, exp.Shutdown(ctx)) })
err := exp.Export(ctx, make([]log.Record, 1))
assert.EqualError(t, err, fmt.Sprintf("%d: %v", http.StatusTooManyRequests, exporterErr))
})

t.Run("WithURLPath", func(t *testing.T) {
path := "/prefix/v2/logs"
ePt := fmt.Sprintf("http://localhost:0%s", path)
Expand Down
2 changes: 1 addition & 1 deletion exporters/otlp/otlplog/otlploghttp/go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ require (
go.opentelemetry.io/otel/sdk/log v0.3.0
go.opentelemetry.io/otel/trace v1.27.0
go.opentelemetry.io/proto/otlp v1.3.1
google.golang.org/grpc v1.64.0
google.golang.org/protobuf v1.34.2
)

Expand All @@ -29,7 +30,6 @@ require (
golang.org/x/text v0.16.0 // indirect
google.golang.org/genproto/googleapis/api v0.0.0-20240617180043-68d350f18fd4 // indirect
google.golang.org/genproto/googleapis/rpc v0.0.0-20240617180043-68d350f18fd4 // indirect
google.golang.org/grpc v1.64.0 // indirect
gopkg.in/yaml.v3 v3.0.1 // indirect
)

Expand Down
43 changes: 37 additions & 6 deletions exporters/otlp/otlpmetric/otlpmetrichttp/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,11 @@ import (
"net/http"
"net/url"
"strconv"
"strings"
"sync"
"time"

"google.golang.org/grpc/status"
"google.golang.org/protobuf/proto"

"go.opentelemetry.io/otel"
Expand Down Expand Up @@ -146,7 +148,7 @@ func (c *client) UploadMetrics(ctx context.Context, protoMetrics *metricpb.Resou
resp, err := c.httpClient.Do(request.Request)
var urlErr *url.Error
if errors.As(err, &urlErr) && urlErr.Temporary() {
return newResponseError(http.Header{})
return newResponseError(http.Header{}, err.Error())
}
if err != nil {
return err
Expand Down Expand Up @@ -187,13 +189,27 @@ func (c *client) UploadMetrics(ctx context.Context, protoMetrics *metricpb.Resou
sc == http.StatusServiceUnavailable,
sc == http.StatusGatewayTimeout:
// Retry-able failure.
rErr = newResponseError(resp.Header)
rErr = newResponseError(resp.Header, "")

// Going to retry, drain the body to reuse the connection.
if _, err := io.Copy(io.Discard, resp.Body); err != nil {
// server may return a message with the response
// body, so we read it to include in the error
// message to be returned. It will help in
// debugging the actual issue.
var respData bytes.Buffer
if _, err := io.Copy(&respData, resp.Body); err != nil {
_ = resp.Body.Close()
return err
}

// overwrite the error message with the response body
// if it is not empty
respStr := strings.TrimSpace(respData.String())
if respStr != "" {
// pass the error message along with retry-able error,
// so that it can be retried and also passes the message
rErr = newResponseError(resp.Header, respStr)
}

default:
rErr = fmt.Errorf("failed to send metrics to %s: %s", request.URL, resp.Status)
}
Expand Down Expand Up @@ -267,23 +283,38 @@ func (r *request) reset(ctx context.Context) {
}

// retryableError represents a request failure that can be retried.
dmathieu marked this conversation as resolved.
Show resolved Hide resolved
//
// If the `errMsg` attribute is not empty, it will be used as the error message.
type retryableError struct {
throttle int64
errMsg string
}

// newResponseError returns a retryableError and will extract any explicit
// throttle delay contained in headers.
func newResponseError(header http.Header) error {
// throttle delay contained in headers and if there is message in the response
// body, it will be used as the error message.
func newResponseError(header http.Header, body string) error {
var rErr retryableError
if v := header.Get("Retry-After"); v != "" {
if t, err := strconv.ParseInt(v, 10, 64); err == nil {
rErr.throttle = t
}
}

rErr.errMsg = body
// Extract the error message from the response body.
if st, ok := status.FromError(fmt.Errorf(body)); ok {
rErr.errMsg = fmt.Sprintf("rpc error: code = %s desc = %s", st.Code(), st.Message())
}

return rErr
}

func (e retryableError) Error() string {
if e.errMsg != "" {
return e.errMsg
}

return "retry-able request failure"
}

Expand Down
19 changes: 19 additions & 0 deletions exporters/otlp/otlpmetric/otlpmetrichttp/client_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -192,6 +192,25 @@ func TestConfig(t *testing.T) {
assert.Len(t, rCh, 0, "failed HTTP responses did not occur")
})

t.Run("WithRetryAndExporterErr", func(t *testing.T) {
dmathieu marked this conversation as resolved.
Show resolved Hide resolved
exporterErr := errors.New("rpc error: code = Unavailable desc = service.name not found in resource attributes")
rCh := make(chan otest.ExportResult, 1)
rCh <- otest.ExportResult{Err: &otest.HTTPResponseError{
Status: http.StatusTooManyRequests,
Err: exporterErr,
}}
exp, coll := factoryFunc("", rCh, WithRetry(RetryConfig{
Enabled: false,
}))
ctx := context.Background()
t.Cleanup(func() { require.NoError(t, coll.Shutdown(ctx)) })
// Push this after Shutdown so the HTTP server doesn't hang.
t.Cleanup(func() { close(rCh) })
t.Cleanup(func() { require.NoError(t, exp.Shutdown(ctx)) })
err := exp.Export(ctx, &metricdata.ResourceMetrics{})
assert.EqualError(t, err, fmt.Sprintf("failed to upload metrics: %d: %v", http.StatusTooManyRequests, exporterErr))
})

t.Run("WithURLPath", func(t *testing.T) {
path := "/prefix/v2/metrics"
ePt := fmt.Sprintf("http://localhost:0%s", path)
Expand Down
48 changes: 41 additions & 7 deletions exporters/otlp/otlptrace/otlptracehttp/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,11 @@ import (
"net/http"
"net/url"
"strconv"
"strings"
"sync"
"time"

"google.golang.org/grpc/status"
"google.golang.org/protobuf/proto"

"go.opentelemetry.io/otel"
Expand Down Expand Up @@ -151,7 +153,7 @@ func (d *client) UploadTraces(ctx context.Context, protoSpans []*tracepb.Resourc
resp, err := d.client.Do(request.Request)
var urlErr *url.Error
if errors.As(err, &urlErr) && urlErr.Temporary() {
return newResponseError(http.Header{})
return newResponseError(http.Header{}, err.Error())
}
if err != nil {
return err
Expand Down Expand Up @@ -198,11 +200,28 @@ func (d *client) UploadTraces(ctx context.Context, protoSpans []*tracepb.Resourc
sc == http.StatusBadGateway,
sc == http.StatusServiceUnavailable,
sc == http.StatusGatewayTimeout:
// Retry-able failures. Drain the body to reuse the connection.
if _, err := io.Copy(io.Discard, resp.Body); err != nil {
otel.Handle(err)
// Retry-able failures.
rErr := newResponseError(resp.Header, "")

// server may return a message with the response
// body, so we read it to include in the error
// message to be returned. It will help in
// debugging the actual issue.
var respData bytes.Buffer
if _, err := io.Copy(&respData, resp.Body); err != nil {
_ = resp.Body.Close()
return err
}

// overwrite the error message with the response body
// if it is not empty
respStr := strings.TrimSpace(respData.String())
if respStr != "" {
// pass the error message along with retry-able error,
// so that it can be retried and also passes the message
rErr = newResponseError(resp.Header, respStr)
}
return newResponseError(resp.Header)
return rErr
default:
return fmt.Errorf("failed to send to %s: %s", request.URL, resp.Status)
}
Expand Down Expand Up @@ -289,23 +308,38 @@ func (r *request) reset(ctx context.Context) {
}

// retryableError represents a request failure that can be retried.
//
// If the `errMsg` attribute is not empty, it will be used as the error message.
type retryableError struct {
throttle int64
errMsg string
}

// newResponseError returns a retryableError and will extract any explicit
// throttle delay contained in headers.
func newResponseError(header http.Header) error {
// throttle delay contained in headers and if there is message in the response
// body, it will be used as the error message.
func newResponseError(header http.Header, body string) error {
var rErr retryableError
if s, ok := header["Retry-After"]; ok {
if t, err := strconv.ParseInt(s[0], 10, 64); err == nil {
rErr.throttle = t
}
}

rErr.errMsg = body
// Extract the error message from the response body.
if st, ok := status.FromError(fmt.Errorf(body)); ok {
rErr.errMsg = fmt.Sprintf("rpc error: code = %s desc = %s", st.Code(), st.Message())
}

return rErr
}

func (e retryableError) Error() string {
if e.errMsg != "" {
return e.errMsg
}

return "retry-able request failure"
}

Expand Down
2 changes: 1 addition & 1 deletion exporters/otlp/otlptrace/otlptracehttp/client_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -238,7 +238,7 @@ func TestTimeout(t *testing.T) {
assert.NoError(t, exporter.Shutdown(ctx))
}()
err = exporter.ExportSpans(ctx, otlptracetest.SingleReadOnlySpan())
assert.ErrorContains(t, err, "retry-able request failure")
assert.ErrorContains(t, err, "context deadline exceeded")
}

func TestNoRetry(t *testing.T) {
Expand Down