-
Notifications
You must be signed in to change notification settings - Fork 511
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Implement new POTel span processor #3223
Changes from all commits
28effd6
e7cbb59
618d6ca
7dba029
1a35dae
c523182
9bfab81
e7a20f2
436626b
5d04d3d
048acc9
8a08fb3
2e2e5b9
2c29711
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,14 +1,26 @@ | ||
from opentelemetry.context.context import Context | ||
from opentelemetry.context import Context, create_key, get_value, set_value | ||
from opentelemetry.context.contextvars_context import ContextVarsRuntimeContext | ||
|
||
from sentry_sdk.scope import Scope | ||
|
||
|
||
_SCOPES_KEY = create_key("sentry_scopes") | ||
|
||
|
||
class SentryContextVarsRuntimeContext(ContextVarsRuntimeContext): | ||
def attach(self, context): | ||
# type: (Context) -> object | ||
# TODO-neel-potel do scope management | ||
return super().attach(context) | ||
scopes = get_value(_SCOPES_KEY, context) | ||
|
||
if scopes and isinstance(scopes, tuple): | ||
(current_scope, isolation_scope) = scopes | ||
else: | ||
current_scope = Scope.get_current_scope() | ||
isolation_scope = Scope.get_isolation_scope() | ||
|
||
# TODO-neel-potel fork isolation_scope too like JS | ||
# once we setup our own apis to pass through to otel | ||
new_scopes = (current_scope.fork(), isolation_scope) | ||
new_context = set_value(_SCOPES_KEY, new_scopes, context) | ||
|
||
def detach(self, token): | ||
# type: (object) -> None | ||
# TODO-neel-potel not sure if we need anything here, see later | ||
super().detach(token) | ||
return super().attach(new_context) |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,11 +1,24 @@ | ||
from opentelemetry.sdk.trace import SpanProcessor | ||
from collections import deque, defaultdict | ||
|
||
from opentelemetry.trace import format_trace_id, format_span_id | ||
from opentelemetry.context import Context | ||
from opentelemetry.sdk.trace import Span, ReadableSpan, SpanProcessor | ||
|
||
from sentry_sdk import capture_event | ||
from sentry_sdk.integrations.opentelemetry.utils import ( | ||
is_sentry_span, | ||
convert_otel_timestamp, | ||
extract_span_data, | ||
) | ||
from sentry_sdk.integrations.opentelemetry.consts import ( | ||
OTEL_SENTRY_CONTEXT, | ||
SPAN_ORIGIN, | ||
) | ||
from sentry_sdk._types import TYPE_CHECKING | ||
|
||
if TYPE_CHECKING: | ||
from typing import Optional | ||
from opentelemetry.sdk.trace import ReadableSpan | ||
from typing import Optional, List, Any, Deque, DefaultDict | ||
from sentry_sdk._types import Event | ||
|
||
|
||
class PotelSentrySpanProcessor(SpanProcessor): | ||
|
@@ -22,15 +35,25 @@ def __new__(cls): | |
|
||
def __init__(self): | ||
# type: () -> None | ||
pass | ||
self._children_spans = defaultdict( | ||
list | ||
) # type: DefaultDict[int, List[ReadableSpan]] | ||
|
||
def on_start(self, span, parent_context=None): | ||
# type: (ReadableSpan, Optional[Context]) -> None | ||
# type: (Span, Optional[Context]) -> None | ||
pass | ||
|
||
def on_end(self, span): | ||
# type: (ReadableSpan) -> None | ||
pass | ||
if is_sentry_span(span): | ||
return | ||
|
||
# TODO-neel-potel-remote only take parent if not remote | ||
if span.parent: | ||
self._children_spans[span.parent.span_id].append(span) | ||
else: | ||
# if have a root span ending, we build a transaction and send it | ||
self._flush_root_span(span) | ||
|
||
# TODO-neel-potel not sure we need a clear like JS | ||
def shutdown(self): | ||
|
@@ -42,3 +65,117 @@ def shutdown(self): | |
def force_flush(self, timeout_millis=30000): | ||
# type: (int) -> bool | ||
return True | ||
|
||
def _flush_root_span(self, span): | ||
# type: (ReadableSpan) -> None | ||
transaction_event = self._root_span_to_transaction_event(span) | ||
if not transaction_event: | ||
return | ||
|
||
spans = [] | ||
for child in self._collect_children(span): | ||
span_json = self._span_to_json(child) | ||
if span_json: | ||
spans.append(span_json) | ||
transaction_event["spans"] = spans | ||
# TODO-neel-potel sort and cutoff max spans | ||
|
||
capture_event(transaction_event) | ||
|
||
def _collect_children(self, span): | ||
# type: (ReadableSpan) -> List[ReadableSpan] | ||
if not span.context: | ||
return [] | ||
|
||
children = [] | ||
bfs_queue = deque() # type: Deque[int] | ||
bfs_queue.append(span.context.span_id) | ||
|
||
while bfs_queue: | ||
parent_span_id = bfs_queue.popleft() | ||
node_children = self._children_spans.pop(parent_span_id, []) | ||
children.extend(node_children) | ||
bfs_queue.extend( | ||
[child.context.span_id for child in node_children if child.context] | ||
) | ||
|
||
return children | ||
|
||
# we construct the event from scratch here | ||
# and not use the current Transaction class for easier refactoring | ||
def _root_span_to_transaction_event(self, span): | ||
Comment on lines
+104
to
+105
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is cool, haven't thought of completely bypassing this. I'll have to think about the implications for the granular instrumenter if we isolate OTel and our instrumentation like this. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. so for now I'm just creating a raw dict, but I'm thinking eventually of a |
||
# type: (ReadableSpan) -> Optional[Event] | ||
if not span.context: | ||
return None | ||
if not span.start_time: | ||
return None | ||
if not span.end_time: | ||
return None | ||
|
||
trace_id = format_trace_id(span.context.trace_id) | ||
span_id = format_span_id(span.context.span_id) | ||
parent_span_id = format_span_id(span.parent.span_id) if span.parent else None | ||
|
||
(op, description, _) = extract_span_data(span) | ||
|
||
trace_context = { | ||
"trace_id": trace_id, | ||
"span_id": span_id, | ||
"origin": SPAN_ORIGIN, | ||
"op": op, | ||
"status": "ok", # TODO-neel-potel span status mapping | ||
} # type: dict[str, Any] | ||
|
||
if parent_span_id: | ||
trace_context["parent_span_id"] = parent_span_id | ||
if span.attributes: | ||
trace_context["data"] = dict(span.attributes) | ||
|
||
contexts = {"trace": trace_context} | ||
if span.resource.attributes: | ||
contexts[OTEL_SENTRY_CONTEXT] = {"resource": dict(span.resource.attributes)} | ||
|
||
event = { | ||
"type": "transaction", | ||
"transaction": description, | ||
# TODO-neel-potel tx source based on integration | ||
"transaction_info": {"source": "custom"}, | ||
"contexts": contexts, | ||
"start_timestamp": convert_otel_timestamp(span.start_time), | ||
"timestamp": convert_otel_timestamp(span.end_time), | ||
} # type: Event | ||
|
||
return event | ||
|
||
def _span_to_json(self, span): | ||
# type: (ReadableSpan) -> Optional[dict[str, Any]] | ||
if not span.context: | ||
return None | ||
if not span.start_time: | ||
return None | ||
if not span.end_time: | ||
return None | ||
|
||
trace_id = format_trace_id(span.context.trace_id) | ||
span_id = format_span_id(span.context.span_id) | ||
parent_span_id = format_span_id(span.parent.span_id) if span.parent else None | ||
|
||
(op, description, _) = extract_span_data(span) | ||
|
||
span_json = { | ||
"trace_id": trace_id, | ||
"span_id": span_id, | ||
"origin": SPAN_ORIGIN, | ||
"op": op, | ||
"description": description, | ||
"status": "ok", # TODO-neel-potel span status mapping | ||
"start_timestamp": convert_otel_timestamp(span.start_time), | ||
"timestamp": convert_otel_timestamp(span.end_time), | ||
} # type: dict[str, Any] | ||
|
||
if parent_span_id: | ||
span_json["parent_span_id"] = parent_span_id | ||
if span.attributes: | ||
span_json["data"] = dict(span.attributes) | ||
|
||
return span_json |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'm thinking what happens if there's a child span that, for whatever reason (weird async cases? wonky instrumentation?), hasn't finished before the root span. Since we're not using
on_start
at all, there'll be virtually no record of the span at this point and the parent transaction will be sent. Once the child finishes, we'll runon_end
with the now orphaned span and it'll be saved in_children_spans
, but never sent, since the parent transaction is already gone. So we might have a potential leak there.Do we need some sort of cleanup of orphaned spans? Should we wait a bit before flushing the transaction to account for child spans possibly ending very close to the parent span, but a bit late? IIRC I've noticed JS also having a small sleep in place.
TBH not sure how much of a real world problem late child spans are, but I can imagine they happen.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Another option: Also use
on_start
to create some record of the span and then inon_end
, if a transaction comes and we detect that it has unfinished child spans, wait for a grace period to give them time to finish?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
some heuristic cleanup logic will be done in a follow up PR yes, I haven't thought through exactly how we'll do it but JS just has a cutoff logic of 5 minutes