Skip to content

Commit

Permalink
SQLAlchemy: Use SQL operation and DB name as the Span name (#254)
Browse files Browse the repository at this point in the history
Current instrumentation uses the entire SQL query as the operation name
which makes traces very hard to read and understand in addition to
introducing high-cardinality issues. This commit fixes the problem by
using only the SQL operation name and the DB name instead of the entire
query.
  • Loading branch information
owais authored Jan 20, 2021
1 parent d12f67f commit 8c8f278
Show file tree
Hide file tree
Showing 8 changed files with 59 additions and 41 deletions.
4 changes: 3 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

### Changed
- `opentelemetry-instrumentation-asgi`, `opentelemetry-instrumentation-wsgi` Return `None` for `CarrierGetter` if key not found
([#1374](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/233))
([#233](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/233))
- `opentelemetry-instrumentation-grpc` Comply with updated spec, rework tests
([#236](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/236))
- `opentelemetry-instrumentation-asgi`, `opentelemetry-instrumentation-falcon`, `opentelemetry-instrumentation-flask`, `opentelemetry-instrumentation-pyramid`, `opentelemetry-instrumentation-wsgi` Renamed `host.port` attribute to `net.host.port`
Expand All @@ -57,6 +57,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
([#235](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/235))
- `opentelemetry-exporter-datadog` `opentelemetry-sdk-extension-aws` Fix reference to ids_generator in sdk
([#235](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/235))
- `opentelemetry-instrumentation-sqlalchemy` Use SQL operation and DB name as span name.
([#254](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/254))

## [0.16b1](https://github.com/open-telemetry/opentelemetry-python-contrib/releases/tag/v0.16b1) - 2020-11-26

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.

from sqlalchemy.event import listen
from sqlalchemy.event import listen # pylint: disable=no-name-in-module

from opentelemetry import trace
from opentelemetry.instrumentation.sqlalchemy.version import __version__
Expand Down Expand Up @@ -72,22 +72,38 @@ def __init__(self, tracer, engine):
listen(engine, "after_cursor_execute", self._after_cur_exec)
listen(engine, "handle_error", self._handle_error)

def _operation_name(self, db_name, statement):
parts = []
if isinstance(statement, str):
# otel spec recommends against parsing SQL queries. We are not trying to parse SQL
# but simply truncating the statement to the first word. This covers probably >95%
# use cases and uses the SQL statement in span name correctly as per the spec.
# For some very special cases it might not record the correct statement if the SQL
# dialect is too weird but in any case it shouldn't break anything.
parts.append(statement.split()[0])
if db_name:
parts.append(db_name)
if not parts:
return self.vendor
return " ".join(parts)

# pylint: disable=unused-argument
def _before_cur_exec(self, conn, cursor, statement, *args):
attrs, found = _get_attributes_from_url(conn.engine.url)
if not found:
attrs = _get_attributes_from_cursor(self.vendor, cursor, attrs)

db_name = attrs.get(_DB, "")
self.current_span = self.tracer.start_span(
statement, kind=trace.SpanKind.CLIENT
self._operation_name(db_name, statement),
kind=trace.SpanKind.CLIENT,
)
with self.tracer.use_span(self.current_span, end_on_exit=False):
if self.current_span.is_recording():
self.current_span.set_attribute(_STMT, statement)
self.current_span.set_attribute("db.system", self.vendor)

if not _set_attributes_from_url(
self.current_span, conn.engine.url
):
_set_attributes_from_cursor(
self.current_span, self.vendor, cursor
)
for key, value in attrs.items():
self.current_span.set_attribute(key, value)

# pylint: disable=unused-argument
def _after_cur_exec(self, conn, cursor, statement, *args):
Expand All @@ -108,25 +124,22 @@ def _handle_error(self, context):
self.current_span.end()


def _set_attributes_from_url(span: trace.Span, url):
def _get_attributes_from_url(url):
"""Set connection tags from the url. return true if successful."""
if span.is_recording():
if url.host:
span.set_attribute(_HOST, url.host)
if url.port:
span.set_attribute(_PORT, url.port)
if url.database:
span.set_attribute(_DB, url.database)
if url.username:
span.set_attribute(_USER, url.username)

return bool(url.host)


def _set_attributes_from_cursor(span: trace.Span, vendor, cursor):
attrs = {}
if url.host:
attrs[_HOST] = url.host
if url.port:
attrs[_PORT] = url.port
if url.database:
attrs[_DB] = url.database
if url.username:
attrs[_USER] = url.username
return attrs, bool(url.host)


def _get_attributes_from_cursor(vendor, cursor, attrs):
"""Attempt to set db connection attributes by introspecting the cursor."""
if not span.is_recording():
return
if vendor == "postgresql":
# pylint: disable=import-outside-toplevel
from psycopg2.extensions import parse_dsn
Expand All @@ -135,6 +148,7 @@ def _set_attributes_from_cursor(span: trace.Span, vendor, cursor):
dsn = getattr(cursor.connection, "dsn", None)
if dsn:
data = parse_dsn(dsn)
span.set_attribute(_DB, data.get("dbname"))
span.set_attribute(_HOST, data.get("host"))
span.set_attribute(_PORT, int(data.get("port")))
attrs[_DB] = data.get("dbname")
attrs[_HOST] = data.get("host")
attrs[_PORT] = int(data.get("port"))
return attrs
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ def test_trace_integration(self):
spans = self.memory_exporter.get_finished_spans()

self.assertEqual(len(spans), 1)
self.assertEqual(spans[0].name, "SELECT 1 + 1;")
self.assertEqual(spans[0].name, "SELECT :memory:")
self.assertEqual(spans[0].kind, trace.SpanKind.CLIENT)

def test_not_recording(self):
Expand Down Expand Up @@ -68,5 +68,5 @@ def test_create_engine_wrapper(self):
spans = self.memory_exporter.get_finished_spans()

self.assertEqual(len(spans), 1)
self.assertEqual(spans[0].name, "SELECT 1 + 1;")
self.assertEqual(spans[0].name, "SELECT :memory:")
self.assertEqual(spans[0].kind, trace.SpanKind.CLIENT)
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,8 @@ def tearDown(self):
super().tearDown()

def _check_span(self, span, name):
if self.SQL_DB:
name = "{0} {1}".format(name, self.SQL_DB)
self.assertEqual(span.name, name)
self.assertEqual(span.attributes.get(_DB), self.SQL_DB)
self.assertIs(span.status.status_code, trace.status.StatusCode.UNSET)
Expand All @@ -129,7 +131,7 @@ def test_orm_insert(self):
stmt += "(?, ?)"
else:
stmt += "(%(id)s, %(name)s)"
self._check_span(span, stmt)
self._check_span(span, "INSERT")
self.assertIn("INSERT INTO players", span.attributes.get(_STMT))
self.check_meta(span)

Expand All @@ -146,7 +148,7 @@ def test_session_query(self):
stmt += "?"
else:
stmt += "%(name_1)s"
self._check_span(span, stmt)
self._check_span(span, "SELECT")
self.assertIn(
"SELECT players.id AS players_id, players.name AS players_name \nFROM players \nWHERE players.name",
span.attributes.get(_STMT),
Expand All @@ -163,7 +165,7 @@ def test_engine_connect_execute(self):
spans = self.memory_exporter.get_finished_spans()
self.assertEqual(len(spans), 1)
span = spans[0]
self._check_span(span, stmt)
self._check_span(span, "SELECT")
self.assertEqual(span.attributes.get(_STMT), "SELECT * FROM players")
self.check_meta(span)

Expand All @@ -188,4 +190,4 @@ def test_parent(self):
self.assertEqual(parent_span.name, "sqlalch_op")
self.assertEqual(parent_span.instrumentation_info.name, "sqlalch_svc")

self.assertEqual(child_span.name, stmt)
self.assertEqual(child_span.name, "SELECT " + self.SQL_DB)
Original file line number Diff line number Diff line change
Expand Up @@ -56,14 +56,14 @@ def tearDown(self):

def test_engine_traced(self):
# ensures that the engine is traced
rows = self.conn.execute("SELECT 1").fetchall()
rows = self.conn.execute("SELECT").fetchall()
self.assertEqual(len(rows), 1)

traces = self.memory_exporter.get_finished_spans()
# trace composition
self.assertEqual(len(traces), 1)
span = traces[0]
# check subset of span fields
self.assertEqual(span.name, "SELECT 1")
self.assertEqual(span.name, "SELECT opentelemetry-tests")
self.assertIs(span.status.status_code, trace.status.StatusCode.UNSET)
self.assertGreater((span.end_time - span.start_time), 0)
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ def test_engine_execute_errors(self):
self.assertEqual(len(spans), 1)
span = spans[0]
# span fields
self.assertEqual(span.name, "SELECT * FROM a_wrong_table")
self.assertEqual(span.name, "SELECT opentelemetry-tests")
self.assertEqual(
span.attributes.get(_STMT), "SELECT * FROM a_wrong_table"
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ def test_engine_execute_errors(self):
self.assertEqual(len(spans), 1)
span = spans[0]
# span fields
self.assertEqual(span.name, "SELECT * FROM a_wrong_table")
self.assertEqual(span.name, "SELECT opentelemetry-tests")
self.assertEqual(
span.attributes.get(_STMT), "SELECT * FROM a_wrong_table"
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ def test_engine_execute_errors(self):
self.assertEqual(len(spans), 1)
span = spans[0]
# span fields
self.assertEqual(span.name, stmt)
self.assertEqual(span.name, "SELECT :memory:")
self.assertEqual(
span.attributes.get(_STMT), "SELECT * FROM a_wrong_table"
)
Expand Down

0 comments on commit 8c8f278

Please sign in to comment.