Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Strip leading comments from SQL queries when generating the span name. #1434

Merged
merged 4 commits into from
Nov 18, 2022
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
([#1350](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/1350))
- `opentelemetry-instrumentation-starlette` Add support for regular expression matching and sanitization of HTTP headers.
([#1404](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/1404))
- Strip leading comments from SQL queries when generating the span name.
([#1434](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/1434))

### Fixed

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
---
"""

import re
from typing import Collection

import asyncpg
Expand Down Expand Up @@ -99,6 +100,7 @@ def __init__(self, capture_parameters=False):
super().__init__()
self.capture_parameters = capture_parameters
self._tracer = None
self._leading_comment_remover = re.compile(r"^/\*.*?\*/")

def instrumentation_dependencies(self) -> Collection[str]:
return _instruments
Expand Down Expand Up @@ -135,7 +137,8 @@ async def _do_execute(self, func, instance, args, kwargs):
name = args[0] if args[0] else params.get("database", "postgresql")

try:
name = name.split()[0]
# Strip leading comments so we get the operation name.
name = self._leading_comment_remover.sub("", name).split()[0]
except IndexError:
name = ""

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@

import functools
import logging
import re
import typing

import wrapt
Expand Down Expand Up @@ -368,6 +369,7 @@ def __init__(self, db_api_integration: DatabaseApiIntegration) -> None:
else {}
)
self._connect_module = self._db_api_integration.connect_module
self._leading_comment_remover = re.compile(r"^/\*.*?\*/")

def _populate_span(
self,
Expand Down Expand Up @@ -397,7 +399,8 @@ def _populate_span(

def get_operation_name(self, cursor, args): # pylint: disable=no-self-use
if args and isinstance(args[0], str):
return args[0].split()[0]
# Strip leading comments so we get the operation name.
return self._leading_comment_remover.sub("", args[0]).split()[0]
return ""

def get_statement(self, cursor, args): # pylint: disable=no-self-use
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -88,11 +88,17 @@ def test_span_name(self):
query"""
)
cursor.execute("tab\tseparated query")
cursor.execute("/* leading comment */ query")
cursor.execute("/* leading comment */ query /* trailing comment */")
cursor.execute("query /* trailing comment */")
spans_list = self.memory_exporter.get_finished_spans()
self.assertEqual(len(spans_list), 3)
self.assertEqual(len(spans_list), 6)
self.assertEqual(spans_list[0].name, "Test")
self.assertEqual(spans_list[1].name, "multi")
self.assertEqual(spans_list[2].name, "tab")
self.assertEqual(spans_list[3].name, "query")
self.assertEqual(spans_list[4].name, "query")
self.assertEqual(spans_list[5].name, "query")

def test_span_succeeded_with_capture_of_statement_parameters(self):
connection_props = {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -216,7 +216,8 @@ def get_operation_name(self, cursor, args):
statement = statement.as_string(cursor)

if isinstance(statement, str):
return statement.split()[0]
# Strip leading comments so we get the operation name.
return self._leading_comment_remover.sub("", statement).split()[0]

return ""

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,32 @@ def test_instrumentor(self):
spans_list = self.memory_exporter.get_finished_spans()
self.assertEqual(len(spans_list), 1)

def test_span_name(self):
Psycopg2Instrumentor().instrument()

cnx = psycopg2.connect(database="test")

cursor = cnx.cursor()

cursor.execute("Test query", ("param1Value", False))
cursor.execute(
"""multi
line
query"""
)
cursor.execute("tab\tseparated query")
cursor.execute("/* leading comment */ query")
cursor.execute("/* leading comment */ query /* trailing comment */")
cursor.execute("query /* trailing comment */")
spans_list = self.memory_exporter.get_finished_spans()
self.assertEqual(len(spans_list), 6)
self.assertEqual(spans_list[0].name, "Test")
self.assertEqual(spans_list[1].name, "multi")
self.assertEqual(spans_list[2].name, "tab")
self.assertEqual(spans_list[3].name, "query")
self.assertEqual(spans_list[4].name, "query")
self.assertEqual(spans_list[5].name, "query")

# pylint: disable=unused-argument
def test_not_recording(self):
mock_tracer = mock.Mock()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import re

from sqlalchemy.event import listen # pylint: disable=no-name-in-module

Expand Down Expand Up @@ -100,6 +101,7 @@ def __init__(
self.vendor = _normalize_vendor(engine.name)
self.enable_commenter = enable_commenter
self.commenter_options = commenter_options if commenter_options else {}
self._leading_comment_remover = re.compile(r"^/\*.*?\*/")

listen(
engine, "before_cursor_execute", self._before_cur_exec, retval=True
Expand All @@ -115,7 +117,10 @@ def _operation_name(self, db_name, statement):
# use cases and uses the SQL statement in span name correctly as per the spec.
# For some very special cases it might not record the correct statement if the SQL
# dialect is too weird but in any case it shouldn't break anything.
parts.append(statement.split()[0])
# Strip leading comments so we get the operation name.
parts.append(
self._leading_comment_remover.sub("", statement).split()[0]
)
if db_name:
parts.append(db_name)
if not parts:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,15 +42,27 @@ def test_trace_integration(self):
)
cnx = engine.connect()
cnx.execute("SELECT 1 + 1;").fetchall()
cnx.execute("/* leading comment */ SELECT 1 + 1;").fetchall()
cnx.execute(
"/* leading comment */ SELECT 1 + 1; /* trailing comment */"
).fetchall()
cnx.execute("SELECT 1 + 1; /* trailing comment */").fetchall()
spans = self.memory_exporter.get_finished_spans()

self.assertEqual(len(spans), 2)
self.assertEqual(len(spans), 5)
# first span - the connection to the db
self.assertEqual(spans[0].name, "connect")
self.assertEqual(spans[0].kind, trace.SpanKind.CLIENT)
# second span - the query itself
self.assertEqual(spans[1].name, "SELECT :memory:")
self.assertEqual(spans[1].kind, trace.SpanKind.CLIENT)
# spans for queries with comments
self.assertEqual(spans[2].name, "SELECT :memory:")
self.assertEqual(spans[2].kind, trace.SpanKind.CLIENT)
self.assertEqual(spans[3].name, "SELECT :memory:")
self.assertEqual(spans[3].kind, trace.SpanKind.CLIENT)
self.assertEqual(spans[4].name, "SELECT :memory:")
self.assertEqual(spans[4].kind, trace.SpanKind.CLIENT)

def test_instrument_two_engines(self):
engine_1 = create_engine("sqlite:///:memory:")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,35 @@ def test_instrumented_fetch_method_without_arguments(self, *_, **__):
spans[0].attributes[SpanAttributes.DB_STATEMENT], "SELECT 42;"
)

def test_instrumented_remove_comments(self, *_, **__):
async_call(self._connection.fetch("/* leading comment */ SELECT 42;"))
async_call(
self._connection.fetch(
"/* leading comment */ SELECT 42; /* trailing comment */"
)
)
async_call(self._connection.fetch("SELECT 42; /* trailing comment */"))
spans = self.memory_exporter.get_finished_spans()
self.assertEqual(len(spans), 3)
self.check_span(spans[0])
self.assertEqual(spans[0].name, "SELECT")
self.assertEqual(
spans[0].attributes[SpanAttributes.DB_STATEMENT],
"/* leading comment */ SELECT 42;",
)
self.check_span(spans[1])
self.assertEqual(spans[1].name, "SELECT")
self.assertEqual(
spans[1].attributes[SpanAttributes.DB_STATEMENT],
"/* leading comment */ SELECT 42; /* trailing comment */",
)
self.check_span(spans[2])
self.assertEqual(spans[2].name, "SELECT")
self.assertEqual(
spans[2].attributes[SpanAttributes.DB_STATEMENT],
"SELECT 42; /* trailing comment */",
)

def test_instrumented_transaction_method(self, *_, **__):
async def _transaction_execute():
async with self._connection.transaction():
Expand Down