Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

chore(sqla): Add explicit bidirectional performant relationships for SQLA model #22413

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 27 additions & 10 deletions superset/connectors/sqla/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,14 @@
)
from sqlalchemy.engine.base import Connection
from sqlalchemy.ext.hybrid import hybrid_property
from sqlalchemy.orm import backref, Query, relationship, RelationshipProperty, Session
from sqlalchemy.orm import (
backref,
Mapped,
Query,
relationship,
RelationshipProperty,
Session,
)
from sqlalchemy.orm.mapper import Mapper
from sqlalchemy.schema import UniqueConstraint
from sqlalchemy.sql import column, ColumnElement, literal_column, table
Expand Down Expand Up @@ -224,10 +231,10 @@ class TableColumn(Model, BaseColumn, CertificationMixin):
__tablename__ = "table_columns"
__table_args__ = (UniqueConstraint("table_id", "column_name"),)
table_id = Column(Integer, ForeignKey("tables.id"))
table: SqlaTable = relationship(
table: Mapped["SqlaTable"] = relationship(
"SqlaTable",
backref=backref("columns", cascade="all, delete-orphan"),
foreign_keys=[table_id],
back_populates="columns",
Copy link
Member Author

@john-bodley john-bodley Dec 14, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

backrefs, per here, is deemed legacy though is used in some instances—such as the database <> table relationship given how/where the code is defined.

lazy="joined", # Eager loading for efficient parent referencing with selectin.
)
is_dttm = Column(Boolean, default=False)
expression = Column(MediumText())
Expand Down Expand Up @@ -439,10 +446,10 @@ class SqlMetric(Model, BaseMetric, CertificationMixin):
__tablename__ = "sql_metrics"
__table_args__ = (UniqueConstraint("table_id", "metric_name"),)
table_id = Column(Integer, ForeignKey("tables.id"))
table = relationship(
table: Mapped["SqlaTable"] = relationship(
"SqlaTable",
backref=backref("metrics", cascade="all, delete-orphan"),
foreign_keys=[table_id],
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In normal cases—such as this—the foreign_keys argument is not required. See here for specifics.

back_populates="metrics",
lazy="joined", # Eager loading for efficient parent referencing with selectin.
)
expression = Column(MediumText(), nullable=False)
extra = Column(Text)
Expand Down Expand Up @@ -535,13 +542,23 @@ def _process_sql_expression(


class SqlaTable(Model, BaseDatasource): # pylint: disable=too-many-public-methods
"""An ORM object for SqlAlchemy table references"""
"""An ORM object for SqlAlchemy table references."""

type = "table"
query_language = "sql"
is_rls_supported = True
columns: List[TableColumn] = []
metrics: List[SqlMetric] = []
columns: Mapped[List[TableColumn]] = relationship(
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ideally we would use lazy="selectin" rather than lazy="select" (default) but this seems to be causing downstream issues with the associated sqlatable_user table. My current hunch is the selectin option is rather new and there's been a number of reported issues.

TableColumn,
back_populates="table",
cascade="all, delete-orphan",
lazy="selectin", # Only non-eager loading that works with bidirectional joined.
)
metrics: Mapped[List[SqlMetric]] = relationship(
SqlMetric,
back_populates="table",
cascade="all, delete-orphan",
lazy="selectin", # Only non-eager loading that works with bidirectional joined.
)
metric_class = SqlMetric
column_class = TableColumn
owner_class = security_manager.user_model
Expand Down
3 changes: 3 additions & 0 deletions superset/datasets/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -242,6 +242,9 @@ class DatasetRestApi(BaseSupersetModelRestApi):
DatasetDuplicateSchema,
)

list_outer_default_load = True
show_outer_default_load = True

@expose("/", methods=["POST"])
@protect()
@safe
Expand Down
4 changes: 2 additions & 2 deletions superset/examples/birth_names.py
Original file line number Diff line number Diff line change
Expand Up @@ -144,8 +144,8 @@ def _add_table_metrics(datasource: SqlaTable) -> None:
metrics.append(SqlMetric(metric_name="sum__num", expression=f"SUM({col})"))

for col in columns:
if col.column_name == "ds":
col.is_dttm = True
if col.column_name == "ds": # type: ignore
col.is_dttm = True # type: ignore
break

datasource.columns = columns
Expand Down
5 changes: 0 additions & 5 deletions superset/models/dashboard.py
Original file line number Diff line number Diff line change
Expand Up @@ -423,11 +423,6 @@ def export_dashboards( # pylint: disable=too-many-locals
remote_id=eager_datasource.id,
database_name=eager_datasource.database.name,
)
datasource_class = copied_datasource.__class__
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This was breaking the serialization of the datasources, i.e., JSON detected a circular reference. Given that the datasource children (metrics, columns) are defined via the sqlalchemy.orm.relationship this logic no longer seems needed.

for field_name in datasource_class.export_children:
field_val = getattr(eager_datasource, field_name).copy()
# set children without creating ORM relations
copied_datasource.__dict__[field_name] = field_val
eager_datasources.append(copied_datasource)

return json.dumps(
Expand Down
5 changes: 4 additions & 1 deletion superset/models/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -327,7 +327,10 @@ def import_from_dict(
# Recursively create children
if recursive:
for child in cls.export_children:
child_class = cls.__mapper__.relationships[child].argument.class_
argument = cls.__mapper__.relationships[child].argument
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The relationship argument can either be a mapped class or instance.

child_class = (
argument.class_ if hasattr(argument, "class_") else argument
)
added = []
for c_obj in new_children.get(child, []):
added.append(
Expand Down
6 changes: 4 additions & 2 deletions tests/integration_tests/charts/api_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -1315,9 +1315,10 @@ def test_import_chart(self):

chart.owners = []
dataset.owners = []
database.owners = []
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Databases don't have owners.

db.session.delete(chart)
db.session.commit()
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I was running into a similar issue to this which only happens when I set the lazy="selectin" relationship property which—somewhat peculiarly—only ensures eager loading for the bidirectional relationships when the reciprocal is lazy="joined", i.e., if lazy="select", SQLAlchemy performs N queries for when selecting the TableColumn.table/SqlMetric.table field.

The TL;DR is I've tried a slew of things and the only success—per the Stack Overflow post—was to commit between deleting the dataset and chart. Note this doesn't seem to be an issue in production (per my manual tests) given that one doesn't delete both a dataset/database at the same time.

db.session.delete(dataset)
db.session.commit()
db.session.delete(database)
db.session.commit()

Expand Down Expand Up @@ -1387,9 +1388,10 @@ def test_import_chart_overwrite(self):

chart.owners = []
dataset.owners = []
database.owners = []
db.session.delete(chart)
db.session.commit()
db.session.delete(dataset)
db.session.commit()
db.session.delete(database)
db.session.commit()

Expand Down
4 changes: 2 additions & 2 deletions tests/integration_tests/databases/api_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -1987,8 +1987,8 @@ def test_import_database(self):
assert str(dataset.uuid) == dataset_config["uuid"]

dataset.owners = []
database.owners = []
db.session.delete(dataset)
db.session.commit()
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think there's a quirk with how the owner relationships are defined—in terms of cascading—and without committing the dataset deletion, SQLAlchemy complains that the owners are stale.

db.session.delete(database)
db.session.commit()

Expand Down Expand Up @@ -2058,8 +2058,8 @@ def test_import_database_overwrite(self):
)
dataset = database.tables[0]
dataset.owners = []
database.owners = []
db.session.delete(dataset)
db.session.commit()
db.session.delete(database)
db.session.commit()

Expand Down
4 changes: 2 additions & 2 deletions tests/integration_tests/datasets/api_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -1988,8 +1988,8 @@ def test_import_dataset(self):
assert str(dataset.uuid) == dataset_config["uuid"]

dataset.owners = []
database.owners = []
db.session.delete(dataset)
db.session.commit()
db.session.delete(database)
db.session.commit()

Expand Down Expand Up @@ -2090,8 +2090,8 @@ def test_import_dataset_overwrite(self):
dataset = database.tables[0]

dataset.owners = []
database.owners = []
db.session.delete(dataset)
db.session.commit()
db.session.delete(database)
db.session.commit()

Expand Down