Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix custom attributes for spans and histogram when used with response_event #5221

Merged
merged 7 commits into from
May 24, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 36 additions & 0 deletions .changesets/fix_bnjjj_fix_advanced_telemetry_spans.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
### Fix custom attributes for spans and histogram when used with response_event ([PR #5221](https://github.com/apollographql/router/pull/5221))

It will fix several issues found:

+ Custom attributes based on response_event in spans were not added properly
+ Histograms using response_event selectors were not updated properly
+ Static selector to set a static value is now able to take a Value instead of just a string
+ Static selector to set a static value is now set at every stages
+ New `on_graphql_error` selector also available on supergraph
+ You can now override the status of a span using `otel.status_code` attribute to change the status of a span

For example, by default spans are marked in error if you have a critical error or http status code != 200, now if you want to mark your span in error if you have a graphql error in response body for example then you can have this kind of configuration:

```yaml
telemetry:
instrumentation:
spans:
router:
attributes:
otel.status_code:
static: error
condition:
eq:
- true
- on_graphql_error: true
supergraph:
attributes:
otel.status_code:
static: error
condition:
eq:
- true
- on_graphql_error: true
```

By [@bnjjj](https://github.com/bnjjj) in https://github.com/apollographql/router/pull/5221
Original file line number Diff line number Diff line change
Expand Up @@ -4492,14 +4492,15 @@ expression: "&schema"
"type": "object"
},
{
"description": "Deprecated, should not be used anymore, use static field instead",
"type": "string"
},
{
"additionalProperties": false,
"properties": {
"static": {
"description": "A static string value",
"type": "string"
"$ref": "#/definitions/AttributeValue",
"description": "#/definitions/AttributeValue"
}
},
"required": [
Expand Down Expand Up @@ -5453,14 +5454,15 @@ expression: "&schema"
"type": "object"
},
{
"description": "Deprecated, should not be used anymore, use static field instead",
"type": "string"
},
{
"additionalProperties": false,
"properties": {
"static": {
"description": "A static string value",
"type": "string"
"$ref": "#/definitions/AttributeValue",
"description": "#/definitions/AttributeValue"
}
},
"required": [
Expand Down Expand Up @@ -6026,21 +6028,35 @@ expression: "&schema"
"type": "object"
},
{
"description": "Deprecated, should not be used anymore, use static field instead",
"type": "string"
},
{
"additionalProperties": false,
"properties": {
"static": {
"description": "A static string value",
"type": "string"
"$ref": "#/definitions/AttributeValue",
"description": "#/definitions/AttributeValue"
}
},
"required": [
"static"
],
"type": "object"
},
{
"additionalProperties": false,
"properties": {
"on_graphql_error": {
"description": "Boolean set to true if the response body contains graphql error",
"type": "boolean"
}
},
"required": [
"on_graphql_error"
],
"type": "object"
},
{
"additionalProperties": false,
"properties": {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -205,7 +205,6 @@ where
) -> Option<opentelemetry::Value> {
// We may have got the value from the request.
let value = mem::take(&mut *self.value.lock());

match (value, &self.condition) {
(State::Value(value), Some(condition)) => {
// We have a value already, let's see if the condition was evaluated to true.
Expand Down
89 changes: 83 additions & 6 deletions apollo-router/src/plugins/telemetry/config_new/instruments.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1474,8 +1474,9 @@ where
}
};

if let (Some(histogram), Some(increment)) = (inner.histogram.take(), increment) {
if let (Some(histogram), Some(increment)) = (&inner.histogram, increment) {
histogram.record(increment, &inner.attributes);
inner.updated = true;
}
}

Expand Down Expand Up @@ -1529,9 +1530,9 @@ where
return;
}
};
inner.updated = true;
if let (Some(histogram), Some(increment)) = (&inner.histogram, increment) {
histogram.record(increment, &attrs);
inner.updated = true;
}
}

Expand Down Expand Up @@ -1595,6 +1596,7 @@ mod tests {
use serde_json::json;

use super::*;
use crate::context::CONTAINS_GRAPHQL_ERROR;
use crate::context::OPERATION_KIND;
use crate::graphql;
use crate::metrics::FutureMetricsExt;
Expand Down Expand Up @@ -1854,6 +1856,44 @@ mod tests {
}
}
},
"acme.request.on_graphql_error_selector": {
"value": "event_unit",
"type": "counter",
"unit": "error",
"description": "my description",
"condition": {
"eq": [
true,
{
"on_graphql_error": true
}
]
},
"attributes": {
"response_errors": {
"response_errors": "$.*"
}
}
},
"acme.request.on_graphql_error_histo": {
"value": "event_unit",
"type": "histogram",
"unit": "error",
"description": "my description",
"condition": {
"eq": [
"NOPE",
{
"response_errors": "$.[0].extensions.code"
}
]
},
"attributes": {
"response_errors": {
"response_errors": "$.*"
}
}
},
"acme.request.on_graphql_data": {
"value": {
"response_data": "$.price"
Expand Down Expand Up @@ -1895,7 +1935,14 @@ mod tests {

let custom_instruments = SupergraphCustomInstruments::new(&config.supergraph.custom);
let context = crate::context::Context::new();
let _ = context.insert(OPERATION_KIND, "query".to_string());
let _ = context.insert(OPERATION_KIND, "query".to_string()).unwrap();
let context_with_error = crate::context::Context::new();
let _ = context_with_error
.insert(OPERATION_KIND, "query".to_string())
.unwrap();
let _ = context_with_error
.insert(CONTAINS_GRAPHQL_ERROR, true)
.unwrap();
let supergraph_req = supergraph::Request::fake_builder()
.header("conditional-custom", "X")
.header("x-my-header-count", "55")
Expand Down Expand Up @@ -1929,7 +1976,7 @@ mod tests {
.extension_code("NOPE")
.build()])
.build(),
&supergraph_req.context.clone(),
&context_with_error,
);

assert_counter!("acme.query", 1.0, query = "{me{name}}");
Expand All @@ -1939,6 +1986,16 @@ mod tests {
1.0,
response_errors = "{\"message\":\"nope\",\"extensions\":{\"code\":\"NOPE\"}}"
);
assert_counter!(
"acme.request.on_graphql_error_selector",
1.0,
response_errors = "{\"message\":\"nope\",\"extensions\":{\"code\":\"NOPE\"}}"
);
assert_histogram_sum!(
"acme.request.on_graphql_error_histo",
1.0,
response_errors = "{\"message\":\"nope\",\"extensions\":{\"code\":\"NOPE\"}}"
);
assert_counter!("acme.request.on_graphql_data", 500.0, response.data = 500);

let custom_instruments = SupergraphCustomInstruments::new(&config.supergraph.custom);
Expand Down Expand Up @@ -1973,7 +2030,7 @@ mod tests {
.extension_code("NOPE")
.build()])
.build(),
&supergraph_req.context.clone(),
&context_with_error,
);

assert_counter!("acme.query", 1.0, query = "{me{name}}");
Expand All @@ -1983,6 +2040,16 @@ mod tests {
2.0,
response_errors = "{\"message\":\"nope\",\"extensions\":{\"code\":\"NOPE\"}}"
);
assert_counter!(
"acme.request.on_graphql_error_selector",
2.0,
response_errors = "{\"message\":\"nope\",\"extensions\":{\"code\":\"NOPE\"}}"
);
assert_histogram_sum!(
"acme.request.on_graphql_error_histo",
2.0,
response_errors = "{\"message\":\"nope\",\"extensions\":{\"code\":\"NOPE\"}}"
);
assert_counter!("acme.request.on_graphql_data", 1000.0, response.data = 500);

let custom_instruments = SupergraphCustomInstruments::new(&config.supergraph.custom);
Expand All @@ -2007,7 +2074,7 @@ mod tests {
&graphql::Response::builder()
.data(serde_json_bytes::json!({"foo": "bar"}))
.build(),
&supergraph_req.context.clone(),
&supergraph_req.context,
);

assert_counter!("acme.query", 2.0, query = "{me{name}}");
Expand All @@ -2017,6 +2084,16 @@ mod tests {
2.0,
response_errors = "{\"message\":\"nope\",\"extensions\":{\"code\":\"NOPE\"}}"
);
assert_counter!(
"acme.request.on_graphql_error_selector",
2.0,
response_errors = "{\"message\":\"nope\",\"extensions\":{\"code\":\"NOPE\"}}"
);
assert_histogram_sum!(
"acme.request.on_graphql_error_histo",
2.0,
response_errors = "{\"message\":\"nope\",\"extensions\":{\"code\":\"NOPE\"}}"
);
assert_counter!("acme.request.on_graphql_data", 1000.0, response.data = 500);
}
.with_metrics()
Expand Down
Loading
Loading