From db5a01434d09b32ace6405837daaabd04b2d4bd4 Mon Sep 17 00:00:00 2001 From: Srdjan Grubor Date: Wed, 30 Jun 2021 11:48:11 -0500 Subject: [PATCH] [statsd] Add caching to tag normalization on Python3.2+ Since tag normalization is still the highest bottleneck in metrics submission latency, this change adds small caching (512 entries) to that method's calls via built-in `@lru_cache` where available (Python3.2+). When the cache is hit, we avoid the ultra-expensive `re.sub` operation and increase the performance. --- datadog/util/compat.py | 27 +++++++++++ datadog/util/format.py | 11 ++++- tests/performance/test_statsd_throughput.py | 8 +++- tests/unit/util/test_compat.py | 50 +++++++++++++++++++++ tests/unit/util/test_format.py | 2 + 5 files changed, 95 insertions(+), 3 deletions(-) create mode 100644 tests/unit/util/test_compat.py diff --git a/datadog/util/compat.py b/datadog/util/compat.py index fc053b105..58927d189 100644 --- a/datadog/util/compat.py +++ b/datadog/util/compat.py @@ -10,6 +10,9 @@ import socket import sys +# Logging +log = logging.getLogger("datadog.util") + # Note: using `sys.version_info` instead of the helper functions defined here # so that mypy detects version-specific code paths. Currently, mypy doesn't # support try/except imports for version-specific code paths either. @@ -94,6 +97,13 @@ def is_p3k(): return _is_py_version_higher_than(3) +def is_higher_py32(): + """ + Assert that Python is version 3.2 or higher. + """ + return _is_py_version_higher_than(3, 2) + + def is_higher_py35(): """ Assert that Python is version 3.5 or higher. @@ -106,3 +116,20 @@ def is_pypy(): Assert that PyPy is being used (regardless of 2 or 3) """ return "__pypy__" in sys.builtin_module_names + + +def conditional_lru_cache(func): + """ + A decorator that conditionally enables a lru_cache of size 512 if + the version of Python can support it (>3.2) and otherwise returns + the original function + """ + if not is_higher_py32(): + return func + + log.debug("Enabling LRU cache for function %s", func.__name__) + + # pylint: disable=import-outside-toplevel + from functools import lru_cache + + return lru_cache(maxsize=512)(func) diff --git a/datadog/util/format.py b/datadog/util/format.py index a29edb231..f6b1e96af 100644 --- a/datadog/util/format.py +++ b/datadog/util/format.py @@ -7,6 +7,8 @@ import json import re +from datadog.util.compat import conditional_lru_cache + TAG_INVALID_CHARS_RE = re.compile(r"[^\w\d_\-:/\.]", re.UNICODE) TAG_INVALID_CHARS_SUBS = "_" @@ -29,5 +31,12 @@ def force_to_epoch_seconds(epoch_sec_or_dt): return epoch_sec_or_dt -def normalize_tags(tag_list): +@conditional_lru_cache +def _normalize_tags_with_cache(tag_list): return [TAG_INVALID_CHARS_RE.sub(TAG_INVALID_CHARS_SUBS, tag) for tag in tag_list] + + +def normalize_tags(tag_list): + # We have to turn our input tag list into a non-mutable tuple for it to + # be hashable (and thus usable) by the @lru_cache decorator. + return _normalize_tags_with_cache(tuple(tag_list)) diff --git a/tests/performance/test_statsd_throughput.py b/tests/performance/test_statsd_throughput.py index 59c45411c..a08723e52 100644 --- a/tests/performance/test_statsd_throughput.py +++ b/tests/performance/test_statsd_throughput.py @@ -147,12 +147,14 @@ def one_line_warning(message, category, filename, lineno, *_): # pylint: disable=too-many-locals def test_statsd_performance(self): print( - "Starting: {} run(s), {} threads, {} points/thread via {} (profiling: {})...".format( + "Starting: {} run(s), {} thread(s), {} points/thread via {} (profiling: {}) on Python{}.{} ...".format( self.num_runs, self.num_threads, self.num_datapoints, self.transport, str(self.profiling_enabled).lower(), + sys.version_info[0], + sys.version_info[1], ) ) @@ -209,7 +211,7 @@ def test_statsd_performance(self): run_latencies.append(float(avg_latency)) received_packet_pcts.append(received_packet_pct) - result_msg = "\nTotal for {} run(s), {} threads, {} points/thread via {}:\n" + result_msg = "\nTotal for {} run(s), {} thread(s), {} points/thread via {} on Python{}.{}:\n" result_msg += "\tDuration:\t\t{:.4f}s\n" result_msg += "\tLatency:\t\t{:.2f}μs\n" result_msg += "\tCPU:\t\t\t{:.4f}\n" @@ -221,6 +223,8 @@ def test_statsd_performance(self): self.num_threads, self.num_datapoints, self.transport, + sys.version_info[0], + sys.version_info[1], sum(run_durations) / len(run_durations), sum(run_latencies) / len(run_latencies), sum(run_cpu_stats) / len(run_cpu_stats), diff --git a/tests/unit/util/test_compat.py b/tests/unit/util/test_compat.py new file mode 100644 index 000000000..e9288e933 --- /dev/null +++ b/tests/unit/util/test_compat.py @@ -0,0 +1,50 @@ +# coding: utf8 +# Unless explicitly stated otherwise all files in this repository are licensed under the BSD-3-Clause License. +# This product includes software developed at Datadog (https://www.datadoghq.com/). +# Copyright 2015-Present Datadog, Inc +import logging +import unittest + +from mock import patch + +from datadog.util.compat import conditional_lru_cache, is_higher_py32 + +class TestConditionalLRUCache(unittest.TestCase): + def test_normal_usage(self): + @conditional_lru_cache + def test_function(some_string, num1, num2, num3): + return (some_string, num1 + num2 + num3) + + for idx in range(600): + self.assertEqual( + test_function("abc", idx, idx*2, idx *3), + ("abc", idx + idx * 2 + idx *3), + ) + + def test_var_args(self): + @conditional_lru_cache + def test_function(*args): + return sum(list(args)) + + args = [] + for idx in range(100): + args.append(idx) + self.assertEqual( + test_function(*args), + sum(args), + ) + + # pylint: disable=no-self-use + def test_debug_log(self): + test_object_logger = logging.getLogger('datadog.util') + with patch.object(test_object_logger, 'debug') as mock_debug: + @conditional_lru_cache + def test_function(): + pass + + test_function() + + if is_higher_py32(): + mock_debug.assert_called_once() + else: + mock_debug.assert_not_called() diff --git a/tests/unit/util/test_format.py b/tests/unit/util/test_format.py index 1d3d7cb36..dc4d6b62b 100644 --- a/tests/unit/util/test_format.py +++ b/tests/unit/util/test_format.py @@ -39,6 +39,8 @@ class TestNormalizeTags: Test of the format's `normalize_tags` functionality """ test_data = [ + ([], []), + ([''],['']), (['this is a tag'], ['this_is_a_tag']), (['abc!@#$%^&*()0987654321{}}{'], ['abc__________0987654321____']), (['abc!@#', '^%$#3456#'], ['abc___', '____3456_']),