Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Reduce dashboard bootstrap payload #9284

Merged
merged 1 commit into from
Mar 13, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions superset/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -282,6 +282,7 @@ def _try_json_readsha(filepath, length): # pylint: disable=unused-argument
"ENABLE_EXPLORE_JSON_CSRF_PROTECTION": False,
"KV_STORE": False,
"PRESTO_EXPAND_DATA": False,
"REDUCE_DASHBOARD_BOOTSTRAP_PAYLOAD": False,
"SHARE_QUERIES_VIA_KV_STORE": False,
"TAGGING_SYSTEM": False,
}
Expand Down
94 changes: 94 additions & 0 deletions superset/connectors/base/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,28 @@
from superset.models.slice import Slice
from superset.utils import core as utils

METRIC_FORM_DATA_PARAMS = [
"metric",
"metrics",
"metric_2",
"percent_metrics",
"secondary_metric",
"size",
"timeseries_limit_metric",
"x",
"y",
]

COLUMN_FORM_DATA_PARAMS = [
"all_columns",
"all_columns_x",
"columns",
"entity",
"groupby",
"order_by_cols",
"series",
]


class BaseDatasource(
AuditMixinNullable, ImportMixin
Expand Down Expand Up @@ -213,6 +235,70 @@ def data(self) -> Dict[str, Any]:
"select_star": self.select_star,
}

def data_for_slices(self, slices: List[Slice]) -> Dict[str, Any]:
"""
The representation of the datasource containing only the required data
to render the provided slices.
Used to reduce the payload when loading a dashboard.
"""
data = self.data
metric_names = set()
column_names = set()
for slc in slices:
form_data = slc.form_data

# pull out all required metrics from the form_data
for param in METRIC_FORM_DATA_PARAMS:
for metric in utils.get_iterable(form_data.get(param) or []):
metric_names.add(utils.get_metric_name(metric))

if utils.is_adhoc_metric(metric):
column_names.add(
(metric.get("column") or {}).get("column_name")
)

# pull out all required columns from the form_data
for filter_ in form_data.get("adhoc_filters") or []:
if filter_["clause"] == "WHERE" and filter_.get("subject"):
column_names.add(filter_.get("subject"))

for param in COLUMN_FORM_DATA_PARAMS:
for column in utils.get_iterable(form_data.get(param) or []):
column_names.add(column)

filtered_metrics = [
metric
for metric in data["metrics"]
if metric["metric_name"] in metric_names
]

filtered_columns = [
column
for column in data["columns"]
if column["column_name"] in column_names
]

del data["description"]
data.update({"metrics": filtered_metrics})
data.update({"columns": filtered_columns})
verbose_map = {"__timestamp": "Time"}
verbose_map.update(
{
metric["metric_name"]: metric["verbose_name"] or metric["metric_name"]
for metric in filtered_metrics
}
)
verbose_map.update(
{
column["column_name"]: column["verbose_name"] or column["column_name"]
for column in filtered_columns
}
)
data["verbose_map"] = verbose_map

return data

@staticmethod
def filter_values_handler(
values, target_column_is_numeric=False, is_list_target=False
Expand Down Expand Up @@ -353,6 +439,14 @@ def get_extra_cache_keys( # pylint: disable=no-self-use
"""
return []

def __hash__(self) -> int:
return hash(self.uid)

def __eq__(self, other: object) -> bool:
if not isinstance(other, BaseDatasource):
return NotImplemented
return self.uid == other.uid


class BaseColumn(AuditMixinNullable, ImportMixin):
"""Interface for column"""
Expand Down
11 changes: 11 additions & 0 deletions superset/utils/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -1211,6 +1211,17 @@ def split(
yield s[i:]


def get_iterable(x: Any) -> List:
"""
Get an iterable (list) representation of the object.
:param x: The object
:returns: An iterable representation
"""

return x if isinstance(x, list) else [x]


class TimeRangeEndpoint(str, Enum):
"""
The time range endpoint types which represent inclusive, exclusive, or unknown.
Expand Down
16 changes: 13 additions & 3 deletions superset/views/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
# pylint: disable=C,R,W
import logging
import re
from collections import defaultdict
from contextlib import closing
from datetime import datetime, timedelta
from typing import Any, Callable, cast, Dict, List, Optional, Union
Expand Down Expand Up @@ -1788,11 +1789,12 @@ def dashboard(self, dashboard_id):
dash = qry.one_or_none()
if not dash:
abort(404)
datasources = set()

datasources = defaultdict(list)
for slc in dash.slices:
datasource = slc.datasource
if datasource:
datasources.add(datasource)
datasources[datasource].append(slc)

if config["ENABLE_ACCESS_REQUEST"]:
for datasource in datasources:
Expand All @@ -1807,6 +1809,14 @@ def dashboard(self, dashboard_id):
"superset/request_access/?" f"dashboard_id={dash.id}&"
)

# Filter out unneeded fields from the datasource payload
datasources_payload = {
datasource.uid: datasource.data_for_slices(slices)
if is_feature_enabled("REDUCE_DASHBOARD_BOOTSTRAP_PAYLOAD")
else datasource.data
for datasource, slices in datasources.items()
}

dash_edit_perm = check_ownership(
dash, raise_if_false=False
) and security_manager.can_access("can_save_dash", "Superset")
Expand Down Expand Up @@ -1854,7 +1864,7 @@ def dashboard(**kwargs):
bootstrap_data = {
"user_id": g.user.get_id(),
"dashboard_data": dashboard_data,
"datasources": {ds.uid: ds.data for ds in datasources},
"datasources": datasources_payload,
"common": common_bootstrap_payload(),
"editMode": edit_mode,
"urlParams": url_params,
Expand Down
16 changes: 15 additions & 1 deletion tests/model_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,9 @@
from sqlalchemy.engine.url import make_url

import tests.test_app
from superset import app
from superset import app, db as metadata_db
from superset.models.core import Database
from superset.models.slice import Slice
from superset.utils.core import get_example_database, QueryStatus

from .base_tests import SupersetTestCase
Expand Down Expand Up @@ -318,3 +319,16 @@ def test_query_with_non_existent_metrics(self):
tbl.get_query_str(query_obj)

self.assertTrue("Metric 'invalid' does not exist", context.exception)

def test_data_for_slices(self):
tbl = self.get_table_by_name("birth_names")
slc = (
metadata_db.session.query(Slice)
.filter_by(datasource_id=tbl.id, datasource_type=tbl.type)
.first()
)

data_for_slices = tbl.data_for_slices([slc])
self.assertEquals(len(data_for_slices["columns"]), 0)
self.assertEquals(len(data_for_slices["metrics"]), 1)
self.assertEquals(len(data_for_slices["verbose_map"].keys()), 2)
6 changes: 6 additions & 0 deletions tests/utils_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
convert_legacy_filters_into_adhoc,
datetime_f,
format_timedelta,
get_iterable,
get_or_create_db,
get_since_until,
get_stacktrace,
Expand Down Expand Up @@ -950,3 +951,8 @@ def test_get_time_range_endpoints(self):
get_time_range_endpoints(form_data={"datasource": "1__table"}, slc=slc),
(TimeRangeEndpoint.INCLUSIVE, TimeRangeEndpoint.EXCLUSIVE),
)

def test_get_iterable(self):
self.assertListEqual(get_iterable(123), [123])
self.assertListEqual(get_iterable([123]), [123])
self.assertListEqual(get_iterable("foo"), ["foo"])