fix: http endpoint filtering according to design review (#248)

* feat: span endpoint filtering for client and server spans separately * test: http endpoint filtering * test: more readable test cases * docs: document http endpoint filtering
lumigo-io · Dec 20, 2023 · c1d8928 · c1d8928
1 parent f741013
commit c1d8928
Show file tree

Hide file tree

Showing 4 changed files with 172 additions and 296 deletions.
diff --git a/README.md b/README.md
@@ -110,7 +110,12 @@ This setting is independent from `LUMIGO_DEBUG`, that is, `LUMIGO_DEBUG` does no
   * `LUMIGO_SECRET_MASKING_REGEX_HTTP_RESPONSE_BODIES` applies secret redaction to HTTP response bodies
   * `LUMIGO_SECRET_MASKING_REGEX_HTTP_RESPONSE_HEADERS` applies secret redaction to HTTP response bodies
   * `LUMIGO_SECRET_MASKING_REGEX_ENVIRONMENT` applies secret redaction to process environment variables (that is, the content of `process.env`)
-* `LUMIGO_AUTO_FILTER_HTTP_ENDPOINTS_REGEX`: This option enables the automatic filtering of endpoints that match the supplied regular expressions, refer to the [Filtering out HTTP endpoints](#filtering-out-http-endpoints) section.
+* `LUMIGO_FILTER_HTTP_ENDPOINTS_REGEX='["regex1", "regex2"]'`: This option enables the filtering of client and server endpoints that match the supplied regular expressions. More fine-grained settings can be applied via the following environment variables, which will work in addition to `LUMIGO_FILTER_HTTP_ENDPOINTS_REGEX` for a specific span type:
+  * `LUMIGO_FILTER_HTTP_ENDPOINTS_REGEX_SERVER` applies the filter to server spans only. Matching is performed against the following attributes on a span: `url.path`, and `http.target`.
+  * `LUMIGO_FILTER_HTTP_ENDPOINTS_REGEX_CLIENT` applies the filter to client spans only. Matching is performed against the following attributes on a span: `url.full`, and `http.url`.
+
+  For more information check out [Filtering http endpoints](#filtering-http-endpoints).
+
 ### Execution Tags
 
 [Execution Tags](https://docs.lumigo.io/docs/execution-tags) allow you to dynamically add dimensions to your invocations so that they can be identified, searched for, and filtered in Lumigo.
@@ -461,25 +466,21 @@ The possible variations are (case-insensitive):
 * `LUMIGO_AUTO_FILTER_EMPTY_SQS=FALSE` do not filter out empty SQS polling messages
 * No environment variable set (default): filter out empty SQS polling messages
 
-### Filtering out HTTP endpoints
+### Filtering http endpoints
 
-It is possible to automatically filter out spans based on an HTTP server endpoints for all supported web server frameworks.
+It is possible to filter out spans based on the HTTP server / client endpoints for all supported web server frameworks.
 
-Simply set the `LUMIGO_AUTO_FILTER_HTTP_ENDPOINTS_REGEX` environment variable to a regex string that will match the urls,
-spans for matching urls will be not be delivered (works for both incoming & outgoing HTTP requests).
+Set the `LUMIGO_FILTER_HTTP_ENDPOINTS_REGEX` environment variable to a list of regex strings that will match 
+server / client endpoints.
+Spans with matching endpoints will be not be traced.
+If you only want to filter out server (inbound) spans or client (outbound) spans, you can set the env vars 
+`LUMIGO_FILTER_HTTP_ENDPOINTS_REGEX_SERVER` or `LUMIGO_FILTER_HTTP_ENDPOINTS_REGEX_CLIENT` respectively.
 
 If we are filtering out an HTTP call to an opentelemetry traced component, every subsequent invocation made by that 
 component won't be traced either.
 
 When filtering out an HTTP span, all child spans will not be recorded as well.
 
-NOTE: When urls are accessed using standard ports (80 for HTTP and 443 for HTTPS), the port is not included in the url.
-
-example patterns:
-* `https:\/\/example\.com\/about` - will match `https://example.com/about`
-* `.*example.*` - will match any url with the word `example` in it
-* `https:\/\/example\.com:123.*` - will match any http request to example.com on port 123
-
 ## Contributing
 
 For guidelines on contributing, please see [CONTRIBUTING.md](./CONTRIBUTING.md).
diff --git a/src/samplers/lumigoSampler.test.js b/src/samplers/lumigoSampler.test.js
@@ -1,217 +1,64 @@
-import { extractUrl, shouldSkipSpanOnRouteMatch } from './lumigoSampler';
+import { doesEndpointMatchRegexes, extractEndpoint, parseStringToArray } from './lumigoSampler';
+import { SpanKind } from '@opentelemetry/api';
 
 describe('lumigo sampler', () => {
   afterEach(() => {
     jest.clearAllMocks();
   });
 
-  [
-    // Test happy flow - regex matches url
-    {
-      url: 'https://example.com',
-      regex: '.*example.*',
-      shouldSkip: true,
-    },
-    {
-      url: 'https://example.com',
-      regex: 'https://example.com',
-      shouldSkip: true,
-    },
-    {
-      url: 'https://example.com/about',
-      regex: 'https://example.com/about',
-      shouldSkip: true,
-    },
-    {
-      url: 'https://example.com/about',
-      regex: 'https?://.+/about',
-      shouldSkip: true,
-    },
-
-    // Test url doesn't match regex
-    {
-      url: 'https://example.com',
-      regex: '.*not-example.*',
-      shouldSkip: false,
-    },
-    {
-      url: 'https://example.com',
-      regex: 'https://not-example.com',
-      shouldSkip: false,
-    },
-    {
-      url: 'https://example.com/about',
-      regex: 'https://example.com/not-about',
-      shouldSkip: false,
-    },
-
-    // Test regex is invalid
-    {
-      url: 'https://example.com',
-      // The regex is invalid because a char must come before the * operator
-      regex: '*',
-      shouldSkip: false,
-    },
+  test.each`
+    rawArrayString    | expectedArray
+    ${'["a"]'}        | ${['a']}
+    ${'["a", "b"]'}   | ${['a', 'b']}
+    ${'[]'}           | ${[]}
+    ${'"a","b"'}      | ${[]}
+    ${'Not an array'} | ${[]}
+    ${null}           | ${[]}
+    ${undefined}      | ${[]}
+    ${'["a", 2]'}     | ${[]}
+    ${'["a", true]'}  | ${[]}
+  `('test parse array string', ({ rawArrayString, expectedArray }) => {
+    expect(parseStringToArray(rawArrayString)).toEqual(expectedArray);
+  });
 
-    // Test regex is undefined
-    {
-      url: 'https://example.com',
-      regex: undefined,
-      shouldSkip: false,
-    },
-    {
-      url: null,
-      regex: '.*',
-      shouldSkip: false,
-    },
-    {
-      url: undefined,
-      regex: '.*',
-      shouldSkip: false,
-    },
-  ].map(({ url, regex, shouldSkip }) => {
-    test('test should skip span with url', () => {
-      if (regex) {
-        process.env.LUMIGO_AUTO_FILTER_HTTP_ENDPOINTS_REGEX = regex;
-      }
-      expect(shouldSkipSpanOnRouteMatch(url)).toEqual(shouldSkip);
+  describe('test when there is a match', () => {
+    const endpoint = 'https://example.com';
+    const regexes = ['.*example.*'];
+    const expected = true;
+    test('test regex match endpoint', () => {
+      expect(doesEndpointMatchRegexes(endpoint, regexes)).toEqual(expected);
     });
   });
 
-  [
-    {
-      description: 'happy flow - full url field exists',
-      cases: [
-        {
-          attributes: {
-            'http.url': 'https://example.com',
-          },
-          expectedUrl: 'https://example.com',
-        },
-        {
-          attributes: {
-            'http.url': 'https://example.com?page=1',
-          },
-          expectedUrl: 'https://example.com?page=1',
-        },
-      ],
-    },
-    {
-      description: 'happy flow - schema host and target fields exist',
-      cases: [
-        {
-          attributes: {
-            'http.scheme': 'https',
-            'http.host': 'example.com',
-            'http.target': '/',
-          },
-          expectedUrl: 'https://example.com',
-        },
-        {
-          attributes: {
-            'http.scheme': 'https',
-            'http.host': 'example.com',
-            'http.target': '/about',
-          },
-          expectedUrl: 'https://example.com/about',
-        },
-        {
-          attributes: {
-            'http.scheme': 'https',
-            'http.host': 'example.com',
-            'http.target': '/about?page=1',
-          },
-          expectedUrl: 'https://example.com/about?page=1',
-        },
-      ],
-    },
-    {
-      description: 'http endpoint standard port',
-      cases: [
-        {
-          attributes: {
-            'http.url': 'https://example.com:443',
-          },
-          expectedUrl: 'https://example.com',
-        },
-        {
-          attributes: {
-            'http.url': 'http://example.com:80',
-          },
-          expectedUrl: 'http://example.com',
-        },
-      ],
-    },
-    {
-      description: 'http endpoint non standard port',
-      cases: [
-        {
-          attributes: {
-            'http.url': 'https://example.com:80',
-          },
-          expectedUrl: 'https://example.com:80',
-        },
-        {
-          attributes: {
-            'http.url': 'https://example.com:80/about',
-          },
-          expectedUrl: 'https://example.com:80/about',
-        },
-        {
-          attributes: {
-            'http.url': 'http://example.com:443',
-          },
-          expectedUrl: 'http://example.com:443',
-        },
-        {
-          attributes: {
-            'http.url': 'http://example.com:443/about',
-          },
-          expectedUrl: 'http://example.com:443/about',
-        },
-        {
-          attributes: {
-            'http.scheme': 'https',
-            'http.host': 'example.com:80',
-            'http.target': '/about',
-          },
-          expectedUrl: 'https://example.com:80/about',
-        },
-      ],
-    },
-    {
-      description: 'http root url',
-      cases: [
-        {
-          attributes: {
-            'http.url': 'https://example.com/',
-          },
-          expectedUrl: 'https://example.com',
-        },
-      ],
-    },
-    {
-      description: 'missing values',
-      cases: [
-        {
-          attributes: null,
-          expectedUrl: null,
-        },
-        {
-          attributes: undefined,
-          expectedUrl: null,
-        },
-        {
-          attributes: {},
-          expectedUrl: null,
-        },
-      ],
-    },
-  ].map(({ description, cases }) => {
-    return cases.map(({ attributes, expectedUrl }) => {
-      test(`test extract url from span - ${description}`, () => {
-        expect(extractUrl(attributes)).toEqual(expectedUrl);
-      });
-    });
+  test.each`
+    endpoint                 | regexes                                 | shouldMatch
+    ${'https://example.com'} | ${['.*example.*']}                      | ${true}
+    ${'/orders/123'}         | ${['.*orders.*']}                       | ${true}
+    ${'/orders/123'}         | ${['.*will-not-match.*', '.*orders.*']} | ${true}
+    ${'/orders/123'}         | ${[]}                                   | ${false}
+    ${'/orders/123'}         | ${['no-match-1', 'no-match-2']}         | ${false}
+    ${''}                    | ${['.*']}                               | ${false}
+    ${null}                  | ${['.*']}                               | ${false}
+    ${undefined}             | ${['.*']}                               | ${false}
+  `('test regex match endpoint', ({ endpoint, regexes, shouldMatch }) => {
+    expect(doesEndpointMatchRegexes(endpoint, regexes)).toEqual(shouldMatch);
+  });
+
+  test.each`
+    attributes                                                | spanKind           | expectedEndpoint
+    ${{ 'url.path': 'urlPath', 'http.target': 'httpTarget' }} | ${SpanKind.SERVER} | ${'urlPath'}
+    ${{ a: 'a', 'http.target': 'httpTarget' }}                | ${SpanKind.SERVER} | ${'httpTarget'}
+    ${{ 'url.full': 'fullUrl', 'http.url': 'httpUrl' }}       | ${SpanKind.CLIENT} | ${'fullUrl'}
+    ${{ a: 'a', 'http.url': 'httpUrl' }}                      | ${SpanKind.CLIENT} | ${'httpUrl'}
+    ${{
+  'url.path': 'urlPath',
+  'http.target': 'httpTarget',
+  'url.full': 'fullUrl',
+  'http.url': 'httpUrl',
+}} | ${SpanKind.INTERNAL} | ${null}
+    ${{}}                                                     | ${SpanKind.SERVER} | ${null}
+    ${{}}                                                     | ${SpanKind.CLIENT} | ${null}
+  `('test extract endpoint', ({ attributes, spanKind, expectedEndpoint }) => {
+    expect(extractEndpoint(attributes, spanKind)).toEqual(expectedEndpoint);
   });
 });