Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add ability to ignore some requests from httplib #263

Merged
merged 8 commits into from
Jan 4, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 30 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -473,6 +473,36 @@ XRayMiddleware(app, xray_recorder)
db = XRayFlaskSqlAlchemy(app)

```

### Ignoring httplib requests

If you want to ignore certain httplib requests you can do so based on the hostname or URL that is being requsted. The hostname is matched using the Python [fnmatch library](https://docs.python.org/3/library/fnmatch.html) which does Unix glob style matching.

```python
from aws_xray_sdk.ext.httplib import add_ignored as xray_add_ignored

# ignore requests to test.myapp.com
xray_add_ignored(hostname='test.myapp.com')

# ignore requests to a subdomain of myapp.com with a glob pattern
xray_add_ignored(hostname='*.myapp.com')

# ignore requests to /test-url and /other-test-url
xray_add_ignored(urls=['/test-path', '/other-test-path'])

# ignore requests to myapp.com for /test-url
xray_add_ignored(hostname='myapp.com', urls=['/test-url'])
```

If you use a subclass of httplib to make your requests, you can also filter on the class name that initiates the request. This must use the complete package name to do the match.

```python
from aws_xray_sdk.ext.httplib import add_ignored as xray_add_ignored

# ignore all requests made by botocore
xray_add_ignored(subclass='botocore.awsrequest.AWSHTTPConnection')
```

## License

The AWS X-Ray SDK for Python is licensed under the Apache 2.0 License. See LICENSE and NOTICE.txt for more information.
4 changes: 2 additions & 2 deletions aws_xray_sdk/ext/httplib/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
from .patch import patch, unpatch
from .patch import patch, unpatch, add_ignored, reset_ignored

__all__ = ['patch', 'unpatch']
__all__ = ['patch', 'unpatch', 'add_ignored', 'reset_ignored']
48 changes: 44 additions & 4 deletions aws_xray_sdk/ext/httplib/patch.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from collections import namedtuple
import sys
import wrapt

import fnmatch
import urllib3.connection

from aws_xray_sdk.core import xray_recorder
Expand All @@ -22,8 +22,33 @@

_XRAY_PROP = '_xray_prop'
_XRay_Data = namedtuple('xray_data', ['method', 'host', 'url'])
_XRay_Ignore = namedtuple('xray_ignore', ['subclass', 'hostname', 'urls'])
# A flag indicates whether this module is X-Ray patched or not
PATCH_FLAG = '__xray_patched'
# Calls that should be ignored
_XRAY_IGNORE = set()


def add_ignored(subclass=None, hostname=None, urls=None):
global _XRAY_IGNORE
if subclass is not None or hostname is not None or urls is not None:
urls = urls if urls is None else tuple(urls)
_XRAY_IGNORE.add(_XRay_Ignore(subclass=subclass, hostname=hostname, urls=urls))


def reset_ignored():
global _XRAY_IGNORE
_XRAY_IGNORE.clear()
_ignored_add_default()


def _ignored_add_default():
# skip httplib tracing for SDK built-in centralized sampling pollers
add_ignored(subclass='botocore.awsrequest.AWSHTTPConnection', urls=['/GetSamplingRules', '/SamplingTargets'])
Comment on lines +45 to +47
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm conflicted here - I know that using Python's built-in string matching as we are now will be faster than using fnmatch, but it would be awkward to not use this ignore mechanism and special-case /GetSamplingRules and /SamplingTargets.

I guess the added latency is kinda peanuts compared to the actual network request, so it's probably ok, but what do you think @srprash?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I feel using fnmatch for ignoring /GetSamplingRules and /SamplingTarget is okay since it fits well in the overall mechanism to ignore any URL. I'm not really aware of the latency of fnmatch but my guess is that special casing the sampling urls and then matching the user urls would be roughly equivalent and won't cause much of a difference here.



# make sure we have the default rules
_ignored_add_default()


def http_response_processor(wrapped, instance, args, kwargs, return_value,
Expand Down Expand Up @@ -77,11 +102,26 @@ def http_send_request_processor(wrapped, instance, args, kwargs, return_value,
subsegment.add_exception(exception, stack)


def _ignore_request(instance, hostname, url):
global _XRAY_IGNORE
module = instance.__class__.__module__
if module is None or module == str.__class__.__module__:
subclass = instance.__class__.__name__
else:
subclass = module + '.' + instance.__class__.__name__
for rule in _XRAY_IGNORE:
subclass_match = subclass == rule.subclass if rule.subclass is not None else True
host_match = fnmatch.fnmatch(hostname, rule.hostname) if rule.hostname is not None else True
url_match = url in rule.urls if rule.urls is not None else True
if url_match and host_match and subclass_match:
return True
return False


def _send_request(wrapped, instance, args, kwargs):
def decompose_args(method, url, body, headers, encode_chunked=False):
# skip httplib tracing for SDK built-in centralized sampling pollers
if (('/GetSamplingRules' in args or '/SamplingTargets' in args) and
type(instance).__name__ == 'botocore.awsrequest.AWSHTTPConnection'):
# skip any ignored requests
if _ignore_request(instance, instance.host, url):
return wrapped(*args, **kwargs)

# Only injects headers when the subsegment for the outgoing
Expand Down
67 changes: 66 additions & 1 deletion tests/ext/httplib/test_httplib.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ def construct_ctx():
so that later subsegment can be attached. After each test run
it cleans up context storage again.
"""
from aws_xray_sdk.ext.httplib import unpatch
from aws_xray_sdk.ext.httplib import unpatch, reset_ignored

patch(('httplib',))
xray_recorder.configure(service='test', sampling=False, context=Context())
Expand All @@ -35,6 +35,7 @@ def construct_ctx():
yield
xray_recorder.clear_trace_entities()
unpatch()
reset_ignored()


def _do_req(url, method='GET', use_https=True):
Expand Down Expand Up @@ -141,3 +142,67 @@ def test_correct_identify_https():

https_meta = subsegment.http
assert https_meta['request']['url'].split(":")[0] == 'https'


def test_ignore_url():
from aws_xray_sdk.ext.httplib import add_ignored
path = '/status/200'
url = 'https://{}{}'.format(BASE_URL, path)
add_ignored(urls=[path])
_do_req(url, use_https=True)
assert len(xray_recorder.current_segment().subsegments) == 0


def test_ignore_hostname():
from aws_xray_sdk.ext.httplib import add_ignored
path = '/status/200'
url = 'https://{}{}'.format(BASE_URL, path)
add_ignored(hostname=BASE_URL)
_do_req(url, use_https=True)
assert len(xray_recorder.current_segment().subsegments) == 0


def test_ignore_hostname_glob():
from aws_xray_sdk.ext.httplib import add_ignored
path = '/status/200'
url = 'https://{}{}'.format(BASE_URL, path)
add_ignored(hostname='http*.org')
_do_req(url, use_https=True)
assert len(xray_recorder.current_segment().subsegments) == 0


class CustomHttpsConnection(httplib.HTTPSConnection):
pass


def test_ignore_subclass():
from aws_xray_sdk.ext.httplib import add_ignored
path = '/status/200'
subclass = 'tests.ext.httplib.test_httplib.CustomHttpsConnection'
add_ignored(subclass=subclass)
conn = CustomHttpsConnection(BASE_URL)
conn.request('GET', path)
conn.getresponse()
assert len(xray_recorder.current_segment().subsegments) == 0


def test_ignore_multiple_match():
from aws_xray_sdk.ext.httplib import add_ignored
path = '/status/200'
subclass = 'tests.ext.httplib.test_httplib.CustomHttpsConnection'
add_ignored(subclass=subclass, hostname=BASE_URL)
conn = CustomHttpsConnection(BASE_URL)
conn.request('GET', path)
conn.getresponse()
assert len(xray_recorder.current_segment().subsegments) == 0


def test_ignore_multiple_no_match():
from aws_xray_sdk.ext.httplib import add_ignored
path = '/status/200'
subclass = 'tests.ext.httplib.test_httplib.CustomHttpsConnection'
add_ignored(subclass=subclass, hostname='fake.host')
conn = CustomHttpsConnection(BASE_URL)
conn.request('GET', path)
conn.getresponse()
assert len(xray_recorder.current_segment().subsegments) > 0