Skip to content

Commit

Permalink
Add ability to ignore some requests from httplib (aws#263)
Browse files Browse the repository at this point in the history
* Expand ability to ignore some httplib calls.

* Add tests.

* Add glob match to httplib ignore hostname.

* Clean up httplib tests.

* Use full module path for subclass.

* Add documentation for ignoring httplib requests

* Code review feedback
  • Loading branch information
jonathangreen authored and Tyler Hargraves committed Mar 22, 2022
1 parent 2b95726 commit b714572
Show file tree
Hide file tree
Showing 4 changed files with 142 additions and 7 deletions.
30 changes: 30 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -473,6 +473,36 @@ XRayMiddleware(app, xray_recorder)
db = XRayFlaskSqlAlchemy(app)

```

### Ignoring httplib requests

If you want to ignore certain httplib requests you can do so based on the hostname or URL that is being requsted. The hostname is matched using the Python [fnmatch library](https://docs.python.org/3/library/fnmatch.html) which does Unix glob style matching.

```python
from aws_xray_sdk.ext.httplib import add_ignored as xray_add_ignored

# ignore requests to test.myapp.com
xray_add_ignored(hostname='test.myapp.com')

# ignore requests to a subdomain of myapp.com with a glob pattern
xray_add_ignored(hostname='*.myapp.com')

# ignore requests to /test-url and /other-test-url
xray_add_ignored(urls=['/test-path', '/other-test-path'])

# ignore requests to myapp.com for /test-url
xray_add_ignored(hostname='myapp.com', urls=['/test-url'])
```

If you use a subclass of httplib to make your requests, you can also filter on the class name that initiates the request. This must use the complete package name to do the match.

```python
from aws_xray_sdk.ext.httplib import add_ignored as xray_add_ignored

# ignore all requests made by botocore
xray_add_ignored(subclass='botocore.awsrequest.AWSHTTPConnection')
```

## License

The AWS X-Ray SDK for Python is licensed under the Apache 2.0 License. See LICENSE and NOTICE.txt for more information.
4 changes: 2 additions & 2 deletions aws_xray_sdk/ext/httplib/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
from .patch import patch, unpatch
from .patch import patch, unpatch, add_ignored, reset_ignored

__all__ = ['patch', 'unpatch']
__all__ = ['patch', 'unpatch', 'add_ignored', 'reset_ignored']
48 changes: 44 additions & 4 deletions aws_xray_sdk/ext/httplib/patch.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from collections import namedtuple
import sys
import wrapt

import fnmatch
import urllib3.connection

from aws_xray_sdk.core import xray_recorder
Expand All @@ -22,8 +22,33 @@

_XRAY_PROP = '_xray_prop'
_XRay_Data = namedtuple('xray_data', ['method', 'host', 'url'])
_XRay_Ignore = namedtuple('xray_ignore', ['subclass', 'hostname', 'urls'])
# A flag indicates whether this module is X-Ray patched or not
PATCH_FLAG = '__xray_patched'
# Calls that should be ignored
_XRAY_IGNORE = set()


def add_ignored(subclass=None, hostname=None, urls=None):
global _XRAY_IGNORE
if subclass is not None or hostname is not None or urls is not None:
urls = urls if urls is None else tuple(urls)
_XRAY_IGNORE.add(_XRay_Ignore(subclass=subclass, hostname=hostname, urls=urls))


def reset_ignored():
global _XRAY_IGNORE
_XRAY_IGNORE.clear()
_ignored_add_default()


def _ignored_add_default():
# skip httplib tracing for SDK built-in centralized sampling pollers
add_ignored(subclass='botocore.awsrequest.AWSHTTPConnection', urls=['/GetSamplingRules', '/SamplingTargets'])


# make sure we have the default rules
_ignored_add_default()


def http_response_processor(wrapped, instance, args, kwargs, return_value,
Expand Down Expand Up @@ -77,11 +102,26 @@ def http_send_request_processor(wrapped, instance, args, kwargs, return_value,
subsegment.add_exception(exception, stack)


def _ignore_request(instance, hostname, url):
global _XRAY_IGNORE
module = instance.__class__.__module__
if module is None or module == str.__class__.__module__:
subclass = instance.__class__.__name__
else:
subclass = module + '.' + instance.__class__.__name__
for rule in _XRAY_IGNORE:
subclass_match = subclass == rule.subclass if rule.subclass is not None else True
host_match = fnmatch.fnmatch(hostname, rule.hostname) if rule.hostname is not None else True
url_match = url in rule.urls if rule.urls is not None else True
if url_match and host_match and subclass_match:
return True
return False


def _send_request(wrapped, instance, args, kwargs):
def decompose_args(method, url, body, headers, encode_chunked=False):
# skip httplib tracing for SDK built-in centralized sampling pollers
if (('/GetSamplingRules' in args or '/SamplingTargets' in args) and
type(instance).__name__ == 'botocore.awsrequest.AWSHTTPConnection'):
# skip any ignored requests
if _ignore_request(instance, instance.host, url):
return wrapped(*args, **kwargs)

# Only injects headers when the subsegment for the outgoing
Expand Down
67 changes: 66 additions & 1 deletion tests/ext/httplib/test_httplib.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ def construct_ctx():
so that later subsegment can be attached. After each test run
it cleans up context storage again.
"""
from aws_xray_sdk.ext.httplib import unpatch
from aws_xray_sdk.ext.httplib import unpatch, reset_ignored

patch(('httplib',))
xray_recorder.configure(service='test', sampling=False, context=Context())
Expand All @@ -35,6 +35,7 @@ def construct_ctx():
yield
xray_recorder.clear_trace_entities()
unpatch()
reset_ignored()


def _do_req(url, method='GET', use_https=True):
Expand Down Expand Up @@ -141,3 +142,67 @@ def test_correct_identify_https():

https_meta = subsegment.http
assert https_meta['request']['url'].split(":")[0] == 'https'


def test_ignore_url():
from aws_xray_sdk.ext.httplib import add_ignored
path = '/status/200'
url = 'https://{}{}'.format(BASE_URL, path)
add_ignored(urls=[path])
_do_req(url, use_https=True)
assert len(xray_recorder.current_segment().subsegments) == 0


def test_ignore_hostname():
from aws_xray_sdk.ext.httplib import add_ignored
path = '/status/200'
url = 'https://{}{}'.format(BASE_URL, path)
add_ignored(hostname=BASE_URL)
_do_req(url, use_https=True)
assert len(xray_recorder.current_segment().subsegments) == 0


def test_ignore_hostname_glob():
from aws_xray_sdk.ext.httplib import add_ignored
path = '/status/200'
url = 'https://{}{}'.format(BASE_URL, path)
add_ignored(hostname='http*.org')
_do_req(url, use_https=True)
assert len(xray_recorder.current_segment().subsegments) == 0


class CustomHttpsConnection(httplib.HTTPSConnection):
pass


def test_ignore_subclass():
from aws_xray_sdk.ext.httplib import add_ignored
path = '/status/200'
subclass = 'tests.ext.httplib.test_httplib.CustomHttpsConnection'
add_ignored(subclass=subclass)
conn = CustomHttpsConnection(BASE_URL)
conn.request('GET', path)
conn.getresponse()
assert len(xray_recorder.current_segment().subsegments) == 0


def test_ignore_multiple_match():
from aws_xray_sdk.ext.httplib import add_ignored
path = '/status/200'
subclass = 'tests.ext.httplib.test_httplib.CustomHttpsConnection'
add_ignored(subclass=subclass, hostname=BASE_URL)
conn = CustomHttpsConnection(BASE_URL)
conn.request('GET', path)
conn.getresponse()
assert len(xray_recorder.current_segment().subsegments) == 0


def test_ignore_multiple_no_match():
from aws_xray_sdk.ext.httplib import add_ignored
path = '/status/200'
subclass = 'tests.ext.httplib.test_httplib.CustomHttpsConnection'
add_ignored(subclass=subclass, hostname='fake.host')
conn = CustomHttpsConnection(BASE_URL)
conn.request('GET', path)
conn.getresponse()
assert len(xray_recorder.current_segment().subsegments) > 0

0 comments on commit b714572

Please sign in to comment.