From 11fded8ad4effe5c1f1fe8d324e80ddc6b9278d7 Mon Sep 17 00:00:00 2001 From: erik_ritter Date: Wed, 11 Mar 2020 09:36:29 -0700 Subject: [PATCH] Reduce dashboard bootstrap payload --- superset/config.py | 1 + superset/connectors/base/models.py | 94 ++++++++++++++++++++++++++++++ superset/utils/core.py | 11 ++++ superset/views/core.py | 16 ++++- tests/model_tests.py | 16 ++++- tests/utils_tests.py | 6 ++ 6 files changed, 140 insertions(+), 4 deletions(-) diff --git a/superset/config.py b/superset/config.py index 443a92d786618..a44a67168a718 100644 --- a/superset/config.py +++ b/superset/config.py @@ -282,6 +282,7 @@ def _try_json_readsha(filepath, length): # pylint: disable=unused-argument "ENABLE_EXPLORE_JSON_CSRF_PROTECTION": False, "KV_STORE": False, "PRESTO_EXPAND_DATA": False, + "REDUCE_DASHBOARD_BOOTSTRAP_PAYLOAD": False, "SHARE_QUERIES_VIA_KV_STORE": False, "TAGGING_SYSTEM": False, } diff --git a/superset/connectors/base/models.py b/superset/connectors/base/models.py index eac6cbb71baa8..dfcafbfddd19b 100644 --- a/superset/connectors/base/models.py +++ b/superset/connectors/base/models.py @@ -27,6 +27,28 @@ from superset.models.slice import Slice from superset.utils import core as utils +METRIC_FORM_DATA_PARAMS = [ + "metric", + "metrics", + "metric_2", + "percent_metrics", + "secondary_metric", + "size", + "timeseries_limit_metric", + "x", + "y", +] + +COLUMN_FORM_DATA_PARAMS = [ + "all_columns", + "all_columns_x", + "columns", + "entity", + "groupby", + "order_by_cols", + "series", +] + class BaseDatasource( AuditMixinNullable, ImportMixin @@ -213,6 +235,70 @@ def data(self) -> Dict[str, Any]: "select_star": self.select_star, } + def data_for_slices(self, slices: List[Slice]) -> Dict[str, Any]: + """ + The representation of the datasource containing only the required data + to render the provided slices. + + Used to reduce the payload when loading a dashboard. + """ + data = self.data + metric_names = set() + column_names = set() + for slc in slices: + form_data = slc.form_data + + # pull out all required metrics from the form_data + for param in METRIC_FORM_DATA_PARAMS: + for metric in utils.get_iterable(form_data.get(param) or []): + metric_names.add(utils.get_metric_name(metric)) + + if utils.is_adhoc_metric(metric): + column_names.add( + (metric.get("column") or {}).get("column_name") + ) + + # pull out all required columns from the form_data + for filter_ in form_data.get("adhoc_filters") or []: + if filter_["clause"] == "WHERE" and filter_.get("subject"): + column_names.add(filter_.get("subject")) + + for param in COLUMN_FORM_DATA_PARAMS: + for column in utils.get_iterable(form_data.get(param) or []): + column_names.add(column) + + filtered_metrics = [ + metric + for metric in data["metrics"] + if metric["metric_name"] in metric_names + ] + + filtered_columns = [ + column + for column in data["columns"] + if column["column_name"] in column_names + ] + + del data["description"] + data.update({"metrics": filtered_metrics}) + data.update({"columns": filtered_columns}) + verbose_map = {"__timestamp": "Time"} + verbose_map.update( + { + metric["metric_name"]: metric["verbose_name"] or metric["metric_name"] + for metric in filtered_metrics + } + ) + verbose_map.update( + { + column["column_name"]: column["verbose_name"] or column["column_name"] + for column in filtered_columns + } + ) + data["verbose_map"] = verbose_map + + return data + @staticmethod def filter_values_handler( values, target_column_is_numeric=False, is_list_target=False @@ -353,6 +439,14 @@ def get_extra_cache_keys( # pylint: disable=no-self-use """ return [] + def __hash__(self) -> int: + return hash(self.uid) + + def __eq__(self, other: object) -> bool: + if not isinstance(other, BaseDatasource): + return NotImplemented + return self.uid == other.uid + class BaseColumn(AuditMixinNullable, ImportMixin): """Interface for column""" diff --git a/superset/utils/core.py b/superset/utils/core.py index 0e820e1bd523b..23d6d4ef9256d 100644 --- a/superset/utils/core.py +++ b/superset/utils/core.py @@ -1211,6 +1211,17 @@ def split( yield s[i:] +def get_iterable(x: Any) -> List: + """ + Get an iterable (list) representation of the object. + + :param x: The object + :returns: An iterable representation + """ + + return x if isinstance(x, list) else [x] + + class TimeRangeEndpoint(str, Enum): """ The time range endpoint types which represent inclusive, exclusive, or unknown. diff --git a/superset/views/core.py b/superset/views/core.py index ce49374a0389e..b906de8718fac 100755 --- a/superset/views/core.py +++ b/superset/views/core.py @@ -17,6 +17,7 @@ # pylint: disable=C,R,W import logging import re +from collections import defaultdict from contextlib import closing from datetime import datetime, timedelta from typing import Any, Callable, cast, Dict, List, Optional, Union @@ -1788,11 +1789,12 @@ def dashboard(self, dashboard_id): dash = qry.one_or_none() if not dash: abort(404) - datasources = set() + + datasources = defaultdict(list) for slc in dash.slices: datasource = slc.datasource if datasource: - datasources.add(datasource) + datasources[datasource].append(slc) if config["ENABLE_ACCESS_REQUEST"]: for datasource in datasources: @@ -1807,6 +1809,14 @@ def dashboard(self, dashboard_id): "superset/request_access/?" f"dashboard_id={dash.id}&" ) + # Filter out unneeded fields from the datasource payload + datasources_payload = { + datasource.uid: datasource.data_for_slices(slices) + if is_feature_enabled("REDUCE_DASHBOARD_BOOTSTRAP_PAYLOAD") + else datasource.data + for datasource, slices in datasources.items() + } + dash_edit_perm = check_ownership( dash, raise_if_false=False ) and security_manager.can_access("can_save_dash", "Superset") @@ -1854,7 +1864,7 @@ def dashboard(**kwargs): bootstrap_data = { "user_id": g.user.get_id(), "dashboard_data": dashboard_data, - "datasources": {ds.uid: ds.data for ds in datasources}, + "datasources": datasources_payload, "common": common_bootstrap_payload(), "editMode": edit_mode, "urlParams": url_params, diff --git a/tests/model_tests.py b/tests/model_tests.py index 6c93ed807fe30..4a203a96475d4 100644 --- a/tests/model_tests.py +++ b/tests/model_tests.py @@ -22,8 +22,9 @@ from sqlalchemy.engine.url import make_url import tests.test_app -from superset import app +from superset import app, db as metadata_db from superset.models.core import Database +from superset.models.slice import Slice from superset.utils.core import get_example_database, QueryStatus from .base_tests import SupersetTestCase @@ -318,3 +319,16 @@ def test_query_with_non_existent_metrics(self): tbl.get_query_str(query_obj) self.assertTrue("Metric 'invalid' does not exist", context.exception) + + def test_data_for_slices(self): + tbl = self.get_table_by_name("birth_names") + slc = ( + metadata_db.session.query(Slice) + .filter_by(datasource_id=tbl.id, datasource_type=tbl.type) + .first() + ) + + data_for_slices = tbl.data_for_slices([slc]) + self.assertEquals(len(data_for_slices["columns"]), 0) + self.assertEquals(len(data_for_slices["metrics"]), 1) + self.assertEquals(len(data_for_slices["verbose_map"].keys()), 2) diff --git a/tests/utils_tests.py b/tests/utils_tests.py index 81cc37cbd2013..cc19c48aa9227 100644 --- a/tests/utils_tests.py +++ b/tests/utils_tests.py @@ -36,6 +36,7 @@ convert_legacy_filters_into_adhoc, datetime_f, format_timedelta, + get_iterable, get_or_create_db, get_since_until, get_stacktrace, @@ -950,3 +951,8 @@ def test_get_time_range_endpoints(self): get_time_range_endpoints(form_data={"datasource": "1__table"}, slc=slc), (TimeRangeEndpoint.INCLUSIVE, TimeRangeEndpoint.EXCLUSIVE), ) + + def test_get_iterable(self): + self.assertListEqual(get_iterable(123), [123]) + self.assertListEqual(get_iterable([123]), [123]) + self.assertListEqual(get_iterable("foo"), ["foo"])