Skip to content

Commit

Permalink
refactor: unify error metrics (#1342)
Browse files Browse the repository at this point in the history
  • Loading branch information
StarpTech authored Nov 7, 2024
1 parent fb47157 commit c6277bd
Show file tree
Hide file tree
Showing 8 changed files with 205 additions and 114 deletions.
121 changes: 97 additions & 24 deletions router-tests/prometheus_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1645,8 +1645,8 @@ func TestPrometheus(t *testing.T) {
totalRequestErrorsMetric := totalRequestsErrors.GetMetric()

require.Len(t, totalRequestErrorsMetric, 2)
require.Len(t, totalRequestErrorsMetric[0].Label, 13)
require.Len(t, totalRequestErrorsMetric[1].Label, 15)
require.Len(t, totalRequestErrorsMetric[0].Label, 12)
require.Len(t, totalRequestErrorsMetric[1].Label, 14)

// Error metric for the subgraph error
require.Equal(t, []*io_prometheus_client.LabelPair{
Expand Down Expand Up @@ -1686,10 +1686,6 @@ func TestPrometheus(t *testing.T) {
Name: PointerOf("wg_operation_type"),
Value: PointerOf("query"),
},
{
Name: PointerOf("wg_request_error"),
Value: PointerOf("true"),
},
{
Name: PointerOf("wg_router_cluster_name"),
Value: PointerOf(""),
Expand Down Expand Up @@ -1726,10 +1722,6 @@ func TestPrometheus(t *testing.T) {
Name: PointerOf("wg_client_version"),
Value: PointerOf("missing"),
},
{
Name: PointerOf("wg_component_name"),
Value: PointerOf("engine-loader"),
},
{
Name: PointerOf("wg_federated_graph_id"),
Value: PointerOf("graph"),
Expand Down Expand Up @@ -1807,13 +1799,39 @@ func TestPrometheus(t *testing.T) {
mf, err := promRegistry.Gather()
require.NoError(t, err)

requestDuration := findMetricFamilyByName(mf, "router_http_request_duration_milliseconds")
requestDurationMetric := requestDuration.GetMetric()

/**
employees -> 200 Status code = 1
products -> 402 + 2x error codes = 2
router -> 200 Status code + 2x error codes = 2
Total metrics = 5
*/

require.Len(t, requestDurationMetric, 5)
require.Len(t, requestDurationMetric[0].Label, 14)
require.Len(t, requestDurationMetric[1].Label, 14)
require.Len(t, requestDurationMetric[2].Label, 14)
require.Len(t, requestDurationMetric[3].Label, 16)
require.Len(t, requestDurationMetric[4].Label, 16)

totalRequestsErrors := findMetricFamilyByName(mf, "router_http_requests_error_total")
totalRequestErrorsMetric := totalRequestsErrors.GetMetric()

require.Len(t, totalRequestErrorsMetric, 3)
require.Len(t, totalRequestErrorsMetric[0].Label, 14)
require.Len(t, totalRequestErrorsMetric[1].Label, 14)
/**
products -> 402 + 2x error codes = 2
router -> 200 Status code + 2x error codes = 2
Total metrics = 4
*/

require.Len(t, totalRequestErrorsMetric, 4)
require.Len(t, totalRequestErrorsMetric[0].Label, 13)
require.Len(t, totalRequestErrorsMetric[1].Label, 13)
require.Len(t, totalRequestErrorsMetric[2].Label, 15)
require.Len(t, totalRequestErrorsMetric[3].Label, 15)

require.Equal(t, []*io_prometheus_client.LabelPair{
{
Expand Down Expand Up @@ -1856,10 +1874,6 @@ func TestPrometheus(t *testing.T) {
Name: PointerOf("wg_operation_type"),
Value: PointerOf("query"),
},
{
Name: PointerOf("wg_request_error"),
Value: PointerOf("true"),
},
{
Name: PointerOf("wg_router_cluster_name"),
Value: PointerOf(""),
Expand Down Expand Up @@ -1916,10 +1930,6 @@ func TestPrometheus(t *testing.T) {
Name: PointerOf("wg_operation_type"),
Value: PointerOf("query"),
},
{
Name: PointerOf("wg_request_error"),
Value: PointerOf("true"),
},
{
Name: PointerOf("wg_router_cluster_name"),
Value: PointerOf(""),
Expand All @@ -1936,6 +1946,10 @@ func TestPrometheus(t *testing.T) {

// Error metric for the subgraph error
require.Equal(t, []*io_prometheus_client.LabelPair{
{
Name: PointerOf("error_codes"),
Value: PointerOf("UNAUTHORIZED"),
},
{
Name: PointerOf("http_status_code"),
Value: PointerOf("403"),
Expand All @@ -1957,8 +1971,67 @@ func TestPrometheus(t *testing.T) {
Value: PointerOf("missing"),
},
{
Name: PointerOf("wg_component_name"),
Value: PointerOf("engine-loader"),
Name: PointerOf("wg_federated_graph_id"),
Value: PointerOf("graph"),
},
{
Name: PointerOf("wg_operation_name"),
Value: PointerOf("myQuery"),
},
{
Name: PointerOf("wg_operation_protocol"),
Value: PointerOf("http"),
},
{
Name: PointerOf("wg_operation_type"),
Value: PointerOf("query"),
},
{
Name: PointerOf("wg_router_cluster_name"),
Value: PointerOf(""),
},
{
Name: PointerOf("wg_router_config_version"),
Value: PointerOf(xEnv.RouterConfigVersionMain()),
},
{
Name: PointerOf("wg_router_version"),
Value: PointerOf("dev"),
},
{
Name: PointerOf("wg_subgraph_id"),
Value: PointerOf("3"),
},
{
Name: PointerOf("wg_subgraph_name"),
Value: PointerOf("products"),
},
}, totalRequestErrorsMetric[2].Label)

require.Equal(t, []*io_prometheus_client.LabelPair{
{
Name: PointerOf("error_codes"),
Value: PointerOf("YOUR_ERROR_CODE"),
},
{
Name: PointerOf("http_status_code"),
Value: PointerOf("403"),
},
{
Name: PointerOf("otel_scope_name"),
Value: PointerOf("cosmo.router.prometheus"),
},
{
Name: PointerOf("otel_scope_version"),
Value: PointerOf("0.0.1"),
},
{
Name: PointerOf("wg_client_name"),
Value: PointerOf("unknown"),
},
{
Name: PointerOf("wg_client_version"),
Value: PointerOf("missing"),
},
{
Name: PointerOf("wg_federated_graph_id"),
Expand Down Expand Up @@ -1996,7 +2069,7 @@ func TestPrometheus(t *testing.T) {
Name: PointerOf("wg_subgraph_name"),
Value: PointerOf("products"),
},
}, totalRequestErrorsMetric[2].Label)
}, totalRequestErrorsMetric[3].Label)
})
})

Expand Down
17 changes: 9 additions & 8 deletions router-tests/telemetry_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2349,16 +2349,16 @@ func TestTelemetry(t *testing.T) {
require.Equal(t, codes.Error, sn[1].Status().Code)
require.Contains(t, sn[1].Status().Description, "unexpected literal - got: UNDEFINED want one of: [ENUM TYPE UNION QUERY INPUT EXTEND SCHEMA SCALAR FRAGMENT INTERFACE DIRECTIVE]")

require.Lenf(t, sn[1].Attributes(), 8, "expected 14 attributes, got %d", len(sn[1].Attributes()))
require.Lenf(t, sn[1].Attributes(), 8, "expected 8 attributes, got %d", len(sn[1].Attributes()))

require.Contains(t, sn[1].Attributes(), otel.WgRouterVersion.String("dev"))
require.Contains(t, sn[1].Attributes(), otel.WgRouterClusterName.String(""))
require.Contains(t, sn[1].Attributes(), otel.WgFederatedGraphID.String("graph"))
require.Contains(t, sn[1].Attributes(), otel.WgRouterConfigVersion.String(xEnv.RouterConfigVersionMain()))
require.Contains(t, sn[1].Attributes(), otel.WgClientName.String("unknown"))
require.Contains(t, sn[1].Attributes(), otel.WgRequestError.Bool(true))
require.Contains(t, sn[1].Attributes(), otel.WgClientVersion.String("missing"))
require.Contains(t, sn[1].Attributes(), otel.WgOperationProtocol.String("http"))
require.Contains(t, sn[1].Attributes(), otel.WgRequestError.Bool(true))

events := sn[1].Events()
require.Len(t, events, 1, "expected 1 event because the GraphQL parsing failed")
Expand All @@ -2370,7 +2370,6 @@ func TestTelemetry(t *testing.T) {
require.Contains(t, sn[2].Status().Description, "unexpected literal - got: UNDEFINED want one of: [ENUM TYPE UNION QUERY INPUT EXTEND SCHEMA SCALAR FRAGMENT INTERFACE DIRECTIVE]")

require.Lenf(t, sn[2].Attributes(), 23, "expected 23 attributes, got %d", len(sn[2].Attributes()))
require.Contains(t, sn[2].Attributes(), otel.WgRequestError.Bool(true))

events = sn[2].Events()
require.Len(t, events, 1, "expected 1 event because the GraphQL request failed")
Expand Down Expand Up @@ -2745,8 +2744,10 @@ func TestTelemetry(t *testing.T) {
{
Attributes: attribute.NewSet(
attribute.String("from_header", "custom-value"),
attribute.StringSlice("error_codes", []string{"UNAUTHORIZED", "YOUR_ERROR_CODE"}),
attribute.String("sha256", "b0066f89f91315b4610ed127be677e6cea380494eb20c83cc121c97552ca44b2"),
semconv.HTTPStatusCode(403),
otel.WgRequestError.Bool(true),
otel.WgClientName.String("unknown"),
otel.WgClientVersion.String("missing"),
otel.WgFederatedGraphID.String("graph"),
Expand Down Expand Up @@ -2818,7 +2819,9 @@ func TestTelemetry(t *testing.T) {
{
Attributes: attribute.NewSet(
attribute.String("from_header", "custom-value"),
attribute.StringSlice("error_codes", []string{"UNAUTHORIZED", "YOUR_ERROR_CODE"}),
attribute.String("sha256", "b0066f89f91315b4610ed127be677e6cea380494eb20c83cc121c97552ca44b2"),
otel.WgRequestError.Bool(true),
semconv.HTTPStatusCode(403),
otel.WgClientName.String("unknown"),
otel.WgClientVersion.String("missing"),
Expand All @@ -2837,12 +2840,12 @@ func TestTelemetry(t *testing.T) {
},
{
Attributes: attribute.NewSet(
otel.WgRequestError.Bool(true),
attribute.StringSlice("error_codes", []string{"UNAUTHORIZED", "YOUR_ERROR_CODE"}),
attribute.StringSlice("error_services", []string{"products"}),
attribute.String("from_header", "custom-value"),
semconv.HTTPStatusCode(200),
attribute.StringSlice("services", []string{"employees", "products"}),
otel.WgRequestError.Bool(true),
attribute.String("sha256", "b0066f89f91315b4610ed127be677e6cea380494eb20c83cc121c97552ca44b2"),
otel.WgClientName.String("unknown"),
otel.WgClientVersion.String("missing"),
Expand Down Expand Up @@ -2916,7 +2919,6 @@ func TestTelemetry(t *testing.T) {
attribute.StringSlice("services", []string{"employees", "products"}),
attribute.StringSlice("error_services", []string{"products"}),
semconv.HTTPStatusCode(200),
otel.WgRequestError.Bool(true),
otel.WgClientName.String("unknown"),
otel.WgClientVersion.String("missing"),
otel.WgFederatedGraphID.String("graph"),
Expand Down Expand Up @@ -3009,7 +3011,6 @@ func TestTelemetry(t *testing.T) {
attribute.StringSlice("services", []string{"employees", "products"}),
attribute.StringSlice("error_services", []string{"products"}),
semconv.HTTPStatusCode(200),
otel.WgRequestError.Bool(true),
otel.WgClientName.String("unknown"),
otel.WgClientVersion.String("missing"),
otel.WgFederatedGraphID.String("graph"),
Expand Down Expand Up @@ -3129,9 +3130,9 @@ func TestTelemetry(t *testing.T) {
{
Attributes: attribute.NewSet(
attribute.String("from_header", "custom-value"),
attribute.StringSlice("error_codes", []string{"UNAUTHORIZED", "YOUR_ERROR_CODE"}),
attribute.String("sha256", "b0066f89f91315b4610ed127be677e6cea380494eb20c83cc121c97552ca44b2"),
semconv.HTTPStatusCode(403),
otel.WgComponentName.String("engine-loader"),
otel.WgClientName.String("unknown"),
otel.WgClientVersion.String("missing"),
otel.WgFederatedGraphID.String("graph"),
Expand Down Expand Up @@ -3165,7 +3166,6 @@ func TestTelemetry(t *testing.T) {
otel.WgRouterClusterName.String(""),
otel.WgRouterConfigVersion.String(xEnv.RouterConfigVersionMain()),
otel.WgRouterVersion.String("dev"),
otel.WgRequestError.Bool(true),
),
Value: 1,
},
Expand All @@ -3189,6 +3189,7 @@ func TestTelemetry(t *testing.T) {
failedRequestsMetric,
},
}

metricdatatest.AssertEqual(t, want, rm.ScopeMetrics[0], metricdatatest.IgnoreTimestamp(), metricdatatest.IgnoreValue())
})
})
Expand Down
1 change: 1 addition & 0 deletions router/core/context.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ type contextKey int
const (
requestContextKey contextKey = iota
subgraphResolverContextKey
engineLoaderHooksContextKey
)

var _ RequestContext = (*requestContext)(nil)
Expand Down
Loading

0 comments on commit c6277bd

Please sign in to comment.