Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Serialize vars early to avoid living references #3409

Merged
merged 3 commits into from
Aug 8, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 8 additions & 8 deletions sentry_sdk/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,12 @@
from collections.abc import Mapping
from datetime import datetime, timezone
from importlib import import_module
from typing import cast

from sentry_sdk._compat import PY37, check_uwsgi_thread_support
from sentry_sdk.utils import (
capture_internal_exceptions,
current_stacktrace,
disable_capture_event,
format_timestamp,
get_sdk_name,
get_type_name,
Expand Down Expand Up @@ -525,10 +525,13 @@ def _prepare_event(
# Postprocess the event here so that annotated types do
# generally not surface in before_send
if event is not None:
event = serialize(
event,
max_request_body_size=self.options.get("max_request_body_size"),
max_value_length=self.options.get("max_value_length"),
event = cast(
"Event",
serialize(
cast("Dict[str, Any]", event),
max_request_body_size=self.options.get("max_request_body_size"),
max_value_length=self.options.get("max_value_length"),
),
)

before_send = self.options["before_send"]
Expand Down Expand Up @@ -726,9 +729,6 @@ def capture_event(

:returns: An event ID. May be `None` if there is no DSN set or of if the SDK decided to discard the event for other reasons. In such situations setting `debug=True` on `init()` may help.
"""
if disable_capture_event.get(False):
return None

if hint is None:
hint = {}
event_id = event.get("event_id")
Expand Down
3 changes: 2 additions & 1 deletion sentry_sdk/integrations/pure_eval.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,8 @@ def start(n):
atok = source.asttokens()

expressions.sort(key=closeness, reverse=True)
return {
vars = {
atok.get_text(nodes[0]): value
for nodes, value in expressions[: serializer.MAX_DATABAG_BREADTH]
}
return serializer.serialize(vars, is_vars=True)
10 changes: 10 additions & 0 deletions sentry_sdk/scope.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
capture_internal_exception,
capture_internal_exceptions,
ContextVar,
disable_capture_event,
event_from_exception,
exc_info_from_error,
logger,
Expand Down Expand Up @@ -1130,6 +1131,9 @@ def capture_event(self, event, hint=None, scope=None, **scope_kwargs):

:returns: An `event_id` if the SDK decided to send the event (see :py:meth:`sentry_sdk.client._Client.capture_event`).
"""
if disable_capture_event.get(False):
return None

scope = self._merge_scopes(scope, scope_kwargs)

event_id = self.get_client().capture_event(event=event, hint=hint, scope=scope)
Expand Down Expand Up @@ -1157,6 +1161,9 @@ def capture_message(self, message, level=None, scope=None, **scope_kwargs):

:returns: An `event_id` if the SDK decided to send the event (see :py:meth:`sentry_sdk.client._Client.capture_event`).
"""
if disable_capture_event.get(False):
return None

if level is None:
level = "info"

Expand All @@ -1182,6 +1189,9 @@ def capture_exception(self, error=None, scope=None, **scope_kwargs):

:returns: An `event_id` if the SDK decided to send the event (see :py:meth:`sentry_sdk.client._Client.capture_event`).
"""
if disable_capture_event.get(False):
return None

if error is not None:
exc_info = exc_info_from_error(error)
else:
Expand Down
74 changes: 27 additions & 47 deletions sentry_sdk/serializer.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
from typing import Type
from typing import Union

from sentry_sdk._types import NotImplementedType, Event
from sentry_sdk._types import NotImplementedType

Span = Dict[str, Any]

Expand Down Expand Up @@ -95,7 +95,25 @@ def __exit__(


def serialize(event, **kwargs):
# type: (Event, **Any) -> Event
# type: (Dict[str, Any], **Any) -> Dict[str, Any]
"""
A very smart serializer that takes a dict and emits a json-friendly dict.
Currently used for serializing the final Event and also prematurely while fetching the stack
local variables for each frame in a stacktrace.

It works internally with 'databags' which are arbitrary data structures like Mapping, Sequence and Set.
The algorithm itself is a recursive graph walk down the data structures it encounters.

It has the following responsibilities:
* Trimming databags and keeping them within MAX_DATABAG_BREADTH and MAX_DATABAG_DEPTH.
* Calling safe_repr() on objects appropriately to keep them informative and readable in the final payload.
* Annotating the payload with the _meta field whenever trimming happens.

:param max_request_body_size: If set to "always", will never trim request bodies.
:param max_value_length: The max length to strip strings to, defaults to sentry_sdk.consts.DEFAULT_MAX_VALUE_LENGTH
:param is_vars: If we're serializing vars early, we want to repr() things that are JSON-serializable to make their type more apparent. For example, it's useful to see the difference between a unicode-string and a bytestring when viewing a stacktrace.

"""
memo = Memo()
path = [] # type: List[Segment]
meta_stack = [] # type: List[Dict[str, Any]]
Expand All @@ -104,6 +122,7 @@ def serialize(event, **kwargs):
kwargs.pop("max_request_body_size", None) == "always"
) # type: bool
max_value_length = kwargs.pop("max_value_length", None) # type: Optional[int]
is_vars = kwargs.pop("is_vars", False)

def _annotate(**meta):
# type: (**Any) -> None
Expand All @@ -118,56 +137,17 @@ def _annotate(**meta):

meta_stack[-1].setdefault("", {}).update(meta)

def _should_repr_strings():
# type: () -> Optional[bool]
"""
By default non-serializable objects are going through
safe_repr(). For certain places in the event (local vars) we
want to repr() even things that are JSON-serializable to
make their type more apparent. For example, it's useful to
see the difference between a unicode-string and a bytestring
when viewing a stacktrace.

For container-types we still don't do anything different.
Generally we just try to make the Sentry UI present exactly
what a pretty-printed repr would look like.

:returns: `True` if we are somewhere in frame variables, and `False` if
we are in a position where we will never encounter frame variables
when recursing (for example, we're in `event.extra`). `None` if we
are not (yet) in frame variables, but might encounter them when
recursing (e.g. we're in `event.exception`)
"""
try:
p0 = path[0]
if p0 == "stacktrace" and path[1] == "frames" and path[3] == "vars":
return True

if (
p0 in ("threads", "exception")
and path[1] == "values"
and path[3] == "stacktrace"
and path[4] == "frames"
and path[6] == "vars"
):
return True
except IndexError:
return None

return False

def _is_databag():
# type: () -> Optional[bool]
"""
A databag is any value that we need to trim.
True for stuff like vars, request bodies, breadcrumbs and extra.

:returns: Works like `_should_repr_strings()`. `True` for "yes",
`False` for :"no", `None` for "maybe soon".
:returns: `True` for "yes", `False` for :"no", `None` for "maybe soon".
"""
try:
rv = _should_repr_strings()
if rv in (True, None):
return rv
if is_vars:
return True

is_request_body = _is_request_body()
if is_request_body in (True, None):
Expand Down Expand Up @@ -253,7 +233,7 @@ def _serialize_node_impl(
if isinstance(obj, AnnotatedValue):
should_repr_strings = False
if should_repr_strings is None:
should_repr_strings = _should_repr_strings()
should_repr_strings = is_vars

if is_databag is None:
is_databag = _is_databag()
Expand Down Expand Up @@ -387,7 +367,7 @@ def _serialize_node_impl(
disable_capture_event.set(True)
try:
serialized_event = _serialize_node(event, **kwargs)
if meta_stack and isinstance(serialized_event, dict):
if not is_vars and meta_stack and isinstance(serialized_event, dict):
serialized_event["_meta"] = meta_stack[0]

return serialized_event
Expand Down
4 changes: 3 additions & 1 deletion sentry_sdk/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -616,7 +616,9 @@ def serialize_frame(
)

if include_local_variables:
rv["vars"] = frame.f_locals.copy()
from sentry_sdk.serializer import serialize

rv["vars"] = serialize(dict(frame.f_locals), is_vars=True)

return rv

Expand Down
17 changes: 17 additions & 0 deletions tests/test_scrubber.py
Original file line number Diff line number Diff line change
Expand Up @@ -187,3 +187,20 @@ def test_recursive_event_scrubber(sentry_init, capture_events):

(event,) = events
assert event["extra"]["deep"]["deeper"][0]["deepest"]["password"] == "'[Filtered]'"


def test_recursive_scrubber_does_not_override_original(sentry_init, capture_events):
sentry_init(event_scrubber=EventScrubber(recursive=True))
events = capture_events()

data = {"csrf": "secret"}
try:
raise RuntimeError("An error")
except Exception:
capture_exception()

(event,) = events
frames = event["exception"]["values"][0]["stacktrace"]["frames"]
(frame,) = frames
assert data["csrf"] == "secret"
assert frame["vars"]["data"]["csrf"] == "[Filtered]"
Loading