From d2afe98ab30928c7a99966c2703b94ee8e549f33 Mon Sep 17 00:00:00 2001 From: Grace Date: Mon, 21 Sep 2020 18:30:51 -0700 Subject: [PATCH 1/2] feat: add etag for dashboard load requests --- superset/config.py | 1 + superset/utils/decorators.py | 36 ++++++++++++++--- superset/views/core.py | 77 ++++++++++++++++-------------------- superset/views/utils.py | 74 ++++++++++++++++++++++++++++++++-- 4 files changed, 137 insertions(+), 51 deletions(-) diff --git a/superset/config.py b/superset/config.py index ff644781903ad..748d47340aee0 100644 --- a/superset/config.py +++ b/superset/config.py @@ -297,6 +297,7 @@ def _try_json_readsha( # pylint: disable=unused-argument # Experimental feature introducing a client (browser) cache "CLIENT_CACHE": False, "ENABLE_EXPLORE_JSON_CSRF_PROTECTION": False, + "ENABLE_DASHBOARD_ETAG_HEADER": False, "KV_STORE": False, "PRESTO_EXPAND_DATA": False, # Exposes API endpoint to compute thumbnails diff --git a/superset/utils/decorators.py b/superset/utils/decorators.py index 694e07bd2c434..ec284fc7cac77 100644 --- a/superset/utils/decorators.py +++ b/superset/utils/decorators.py @@ -15,15 +15,15 @@ # specific language governing permissions and limitations # under the License. import logging -from datetime import datetime, timedelta +from datetime import datetime, timedelta, timezone from functools import wraps -from typing import Any, Callable, Iterator +from typing import Any, Callable, Iterator, Optional from contextlib2 import contextmanager from flask import request from werkzeug.wrappers.etag import ETagResponseMixin -from superset import app, cache +from superset import app, cache, is_feature_enabled from superset.stats_logger import BaseStatsLogger from superset.utils.dates import now_as_float @@ -34,6 +34,10 @@ logger = logging.getLogger(__name__) +def is_dashboard_request(kwargs: Any) -> bool: + return kwargs.get("dashboard_id_or_slug") is not None + + @contextmanager def stats_timing(stats_key: str, stats_logger: BaseStatsLogger) -> Iterator[float]: """Provide a transactional scope around a series of operations.""" @@ -46,7 +50,11 @@ def stats_timing(stats_key: str, stats_logger: BaseStatsLogger) -> Iterator[floa stats_logger.timing(stats_key, now_as_float() - start_ts) -def etag_cache(max_age: int, check_perms: Callable[..., Any]) -> Callable[..., Any]: +def etag_cache( + max_age: int, + check_perms: Callable[..., Any], + check_latest_changed_on: Optional[Callable[..., Any]] = None, +) -> Callable[..., Any]: """ A decorator for caching views and handling etag conditional requests. @@ -72,6 +80,12 @@ def wrapper(*args: Any, **kwargs: Any) -> ETagResponseMixin: if request.method == "POST": return f(*args, **kwargs) + # if it is dashboard request but feature is not eabled, + # do not use cache + is_dashboard = is_dashboard_request(kwargs) + if is_dashboard and not is_feature_enabled("ENABLE_DASHBOARD_ETAG_HEADER"): + return f(*args, **kwargs) + response = None if cache: try: @@ -89,13 +103,25 @@ def wrapper(*args: Any, **kwargs: Any) -> ETagResponseMixin: raise logger.exception("Exception possibly due to cache backend.") + # if cache is stale? + if check_latest_changed_on: + latest_changed_on = check_latest_changed_on(*args, **kwargs) + if response and response.last_modified: + latest_record = response.last_modified.replace( + tzinfo=timezone.utc + ).astimezone(tz=None) + if latest_changed_on.timestamp() > latest_record.timestamp(): + response = None + # if no response was cached, compute it using the wrapped function if response is None: response = f(*args, **kwargs) # add headers for caching: Last Modified, Expires and ETag response.cache_control.public = True - response.last_modified = datetime.utcnow() + response.last_modified = ( + latest_changed_on if is_dashboard else datetime.utcnow() + ) expiration = max_age if max_age != 0 else FAR_FUTURE response.expires = response.last_modified + timedelta( seconds=expiration diff --git a/superset/views/core.py b/superset/views/core.py index 3f0c47703a2b3..3bb7ce30d731b 100755 --- a/superset/views/core.py +++ b/superset/views/core.py @@ -17,7 +17,6 @@ # pylint: disable=comparison-with-callable import logging import re -from collections import defaultdict from contextlib import closing from datetime import datetime from typing import Any, cast, Dict, List, Optional, Union @@ -26,7 +25,17 @@ import backoff import pandas as pd import simplejson as json -from flask import abort, flash, g, Markup, redirect, render_template, request, Response +from flask import ( + abort, + flash, + g, + make_response, + Markup, + redirect, + render_template, + request, + Response, +) from flask_appbuilder import expose from flask_appbuilder.models.sqla.interface import SQLAInterface from flask_appbuilder.security.decorators import has_access, has_access_api @@ -114,11 +123,15 @@ _deserialize_results_payload, apply_display_max_row_limit, bootstrap_user_data, + check_dashboard_perms, check_datasource_perms, check_slice_perms, get_cta_schema_name, + get_dashboard, get_dashboard_extra_filters, + get_dashboard_latest_changed_on, get_datasource_info, + get_datasources_from_dashboard, get_form_data, get_viz, is_owner, @@ -1585,42 +1598,18 @@ def publish( # pylint: disable=no-self-use return json_success(json.dumps({"published": dash.published})) @has_access + @etag_cache( + 0, + check_perms=check_dashboard_perms, + check_latest_changed_on=get_dashboard_latest_changed_on, + ) @expose("/dashboard//") def dashboard( # pylint: disable=too-many-locals self, dashboard_id_or_slug: str ) -> FlaskResponse: """Server side rendering for a dashboard""" - session = db.session() - qry = session.query(Dashboard) - if dashboard_id_or_slug.isdigit(): - qry = qry.filter_by(id=int(dashboard_id_or_slug)) - else: - qry = qry.filter_by(slug=dashboard_id_or_slug) - - dash = qry.one_or_none() - if not dash: - abort(404) - - datasources = defaultdict(list) - for slc in dash.slices: - datasource = slc.datasource - if datasource: - datasources[datasource].append(slc) - - if config["ENABLE_ACCESS_REQUEST"]: - for datasource in datasources: - if datasource and not security_manager.can_access_datasource( - datasource - ): - flash( - __( - security_manager.get_datasource_access_error_msg(datasource) - ), - "danger", - ) - return redirect( - "superset/request_access/?" f"dashboard_id={dash.id}&" - ) + dash = get_dashboard(dashboard_id_or_slug) + datasources = get_datasources_from_dashboard(dash) # Filter out unneeded fields from the datasource payload datasources_payload = { @@ -1661,7 +1650,7 @@ def dashboard(**_: Any) -> None: if is_feature_enabled("REMOVE_SLICE_LEVEL_LABEL_COLORS"): # dashboard metadata has dashboard-level label_colors, # so remove slice-level label_colors from its form_data - for slc in dashboard_data.get("slices"): + for slc in dashboard_data.get("slices") or []: form_data = slc.get("form_data") form_data.pop("label_colors", None) @@ -1695,15 +1684,17 @@ def dashboard(**_: Any) -> None: json.dumps(bootstrap_data, default=utils.pessimistic_json_iso_dttm_ser) ) - return self.render_template( - "superset/dashboard.html", - entry="dashboard", - standalone_mode=standalone_mode, - title=dash.dashboard_title, - custom_css=dashboard_data.get("css"), - bootstrap_data=json.dumps( - bootstrap_data, default=utils.pessimistic_json_iso_dttm_ser - ), + return make_response( + self.render_template( + "superset/dashboard.html", + entry="dashboard", + standalone_mode=standalone_mode, + title=dash.dashboard_title, + custom_css=dashboard_data.get("css"), + bootstrap_data=json.dumps( + bootstrap_data, default=utils.pessimistic_json_iso_dttm_ser + ), + ) ) @api diff --git a/superset/views/utils.py b/superset/views/utils.py index eaecc5fe87031..f467494c0ae6f 100644 --- a/superset/views/utils.py +++ b/superset/views/utils.py @@ -16,19 +16,27 @@ # under the License. import logging from collections import defaultdict -from datetime import date +from datetime import date, datetime from typing import Any, Callable, DefaultDict, Dict, List, Optional, Set, Tuple, Union from urllib import parse import msgpack import pyarrow as pa import simplejson as json -from flask import g, request +from flask import abort, flash, g, redirect, request from flask_appbuilder.security.sqla import models as ab_models from flask_appbuilder.security.sqla.models import User +from flask_babel import gettext as __ import superset.models.core as models -from superset import app, dataframe, db, is_feature_enabled, result_set +from superset import ( + app, + dataframe, + db, + is_feature_enabled, + result_set, + security_manager, +) from superset.connectors.connector_registry import ConnectorRegistry from superset.errors import ErrorLevel, SupersetError, SupersetErrorType from superset.exceptions import SupersetException, SupersetSecurityException @@ -298,6 +306,46 @@ def get_time_range_endpoints( CONTAINER_TYPES = ["COLUMN", "GRID", "TABS", "TAB", "ROW"] +def get_dashboard(dashboard_id_or_slug: str,) -> Dashboard: + session = db.session() + qry = session.query(Dashboard) + if dashboard_id_or_slug.isdigit(): + qry = qry.filter_by(id=int(dashboard_id_or_slug)) + else: + qry = qry.filter_by(slug=dashboard_id_or_slug) + dashboard = qry.one_or_none() + + if not dashboard: + abort(404) + + return dashboard + + +def get_datasources_from_dashboard( + dashboard: Dashboard, +) -> DefaultDict[Any, List[Any]]: + datasources = defaultdict(list) + for slc in dashboard.slices: + datasource = slc.datasource + if datasource: + datasources[datasource].append(slc) + return datasources + + +def get_dashboard_latest_changed_on(_self: Any, dashboard_id_or_slug: str) -> datetime: + """ + Get latest changed datetime for a dashboard. The change could be dashboard + metadata change, or any of its slice data change. + + This function takes `self` since it must have the same signature as the + the decorated method. + """ + dash = get_dashboard(dashboard_id_or_slug) + dash_changed_on = dash.changed_on + slices_changed_on = max([s.changed_on for s in dash.slices]) + return max(dash_changed_on, slices_changed_on) + + def get_dashboard_extra_filters( slice_id: int, dashboard_id: int ) -> List[Dict[str, Any]]: @@ -490,6 +538,26 @@ def check_slice_perms(_self: Any, slice_id: int) -> None: viz_obj.raise_for_access() +def check_dashboard_perms(_self: Any, dashboard_id_or_slug: str) -> None: + """ + Check if user can access a cached response from explore_json. + + This function takes `self` since it must have the same signature as the + the decorated method. + """ + + dash = get_dashboard(dashboard_id_or_slug) + datasources = get_datasources_from_dashboard(dash) + if app.config["ENABLE_ACCESS_REQUEST"]: + for datasource in datasources: + if datasource and not security_manager.can_access_datasource(datasource): + flash( + __(security_manager.get_datasource_access_error_msg(datasource)), + "danger", + ) + redirect("superset/request_access/?" f"dashboard_id={dash.id}&") + + def _deserialize_results_payload( payload: Union[bytes, str], query: Query, use_msgpack: Optional[bool] = False ) -> Dict[str, Any]: From 18b31f856407a21eb099302872bd24e117557608 Mon Sep 17 00:00:00 2001 From: Grace Date: Fri, 25 Sep 2020 11:11:09 -0700 Subject: [PATCH 2/2] fix review comments --- superset/utils/decorators.py | 44 ++++++++++++++++-------------------- superset/views/core.py | 17 ++++++++++---- superset/views/utils.py | 18 ++++----------- tests/utils_tests.py | 12 ++++++++++ 4 files changed, 47 insertions(+), 44 deletions(-) diff --git a/superset/utils/decorators.py b/superset/utils/decorators.py index ec284fc7cac77..ae4c5726d8ee7 100644 --- a/superset/utils/decorators.py +++ b/superset/utils/decorators.py @@ -15,7 +15,7 @@ # specific language governing permissions and limitations # under the License. import logging -from datetime import datetime, timedelta, timezone +from datetime import datetime, timedelta from functools import wraps from typing import Any, Callable, Iterator, Optional @@ -23,7 +23,7 @@ from flask import request from werkzeug.wrappers.etag import ETagResponseMixin -from superset import app, cache, is_feature_enabled +from superset import app, cache from superset.stats_logger import BaseStatsLogger from superset.utils.dates import now_as_float @@ -34,10 +34,6 @@ logger = logging.getLogger(__name__) -def is_dashboard_request(kwargs: Any) -> bool: - return kwargs.get("dashboard_id_or_slug") is not None - - @contextmanager def stats_timing(stats_key: str, stats_logger: BaseStatsLogger) -> Iterator[float]: """Provide a transactional scope around a series of operations.""" @@ -53,7 +49,8 @@ def stats_timing(stats_key: str, stats_logger: BaseStatsLogger) -> Iterator[floa def etag_cache( max_age: int, check_perms: Callable[..., Any], - check_latest_changed_on: Optional[Callable[..., Any]] = None, + get_last_modified: Optional[Callable[..., Any]] = None, + skip: Optional[Callable[..., Any]] = None, ) -> Callable[..., Any]: """ A decorator for caching views and handling etag conditional requests. @@ -77,13 +74,7 @@ def wrapper(*args: Any, **kwargs: Any) -> ETagResponseMixin: # for POST requests we can't set cache headers, use the response # cache nor use conditional requests; this will still use the # dataframe cache in `superset/viz.py`, though. - if request.method == "POST": - return f(*args, **kwargs) - - # if it is dashboard request but feature is not eabled, - # do not use cache - is_dashboard = is_dashboard_request(kwargs) - if is_dashboard and not is_feature_enabled("ENABLE_DASHBOARD_ETAG_HEADER"): + if request.method == "POST" or (skip and skip(*args, **kwargs)): return f(*args, **kwargs) response = None @@ -104,14 +95,19 @@ def wrapper(*args: Any, **kwargs: Any) -> ETagResponseMixin: logger.exception("Exception possibly due to cache backend.") # if cache is stale? - if check_latest_changed_on: - latest_changed_on = check_latest_changed_on(*args, **kwargs) - if response and response.last_modified: - latest_record = response.last_modified.replace( - tzinfo=timezone.utc - ).astimezone(tz=None) - if latest_changed_on.timestamp() > latest_record.timestamp(): - response = None + if get_last_modified: + content_changed_time = get_last_modified(*args, **kwargs) + if ( + response + and response.last_modified + and response.last_modified.timestamp() + < content_changed_time.timestamp() + ): + response = None + else: + # if caller didn't provide content's last_modified time, assume + # its cache won't be stale. + content_changed_time = datetime.utcnow() # if no response was cached, compute it using the wrapped function if response is None: @@ -119,9 +115,7 @@ def wrapper(*args: Any, **kwargs: Any) -> ETagResponseMixin: # add headers for caching: Last Modified, Expires and ETag response.cache_control.public = True - response.last_modified = ( - latest_changed_on if is_dashboard else datetime.utcnow() - ) + response.last_modified = content_changed_time expiration = max_age if max_age != 0 else FAR_FUTURE response.expires = response.last_modified + timedelta( seconds=expiration diff --git a/superset/views/core.py b/superset/views/core.py index 3bb7ce30d731b..a82b59fa1e41f 100755 --- a/superset/views/core.py +++ b/superset/views/core.py @@ -17,6 +17,7 @@ # pylint: disable=comparison-with-callable import logging import re +from collections import defaultdict from contextlib import closing from datetime import datetime from typing import Any, cast, Dict, List, Optional, Union @@ -128,10 +129,9 @@ check_slice_perms, get_cta_schema_name, get_dashboard, + get_dashboard_changedon_dt, get_dashboard_extra_filters, - get_dashboard_latest_changed_on, get_datasource_info, - get_datasources_from_dashboard, get_form_data, get_viz, is_owner, @@ -1601,7 +1601,10 @@ def publish( # pylint: disable=no-self-use @etag_cache( 0, check_perms=check_dashboard_perms, - check_latest_changed_on=get_dashboard_latest_changed_on, + get_last_modified=get_dashboard_changedon_dt, + skip=lambda _self, dashboard_id_or_slug: not is_feature_enabled( + "ENABLE_DASHBOARD_ETAG_HEADER" + ), ) @expose("/dashboard//") def dashboard( # pylint: disable=too-many-locals @@ -1609,14 +1612,18 @@ def dashboard( # pylint: disable=too-many-locals ) -> FlaskResponse: """Server side rendering for a dashboard""" dash = get_dashboard(dashboard_id_or_slug) - datasources = get_datasources_from_dashboard(dash) + slices_by_datasources = defaultdict(list) + for slc in dash.slices: + datasource = slc.datasource + if datasource: + slices_by_datasources[datasource].append(slc) # Filter out unneeded fields from the datasource payload datasources_payload = { datasource.uid: datasource.data_for_slices(slices) if is_feature_enabled("REDUCE_DASHBOARD_BOOTSTRAP_PAYLOAD") else datasource.data - for datasource, slices in datasources.items() + for datasource, slices in slices_by_datasources.items() } dash_edit_perm = check_ownership( diff --git a/superset/views/utils.py b/superset/views/utils.py index f467494c0ae6f..dc164944c2b57 100644 --- a/superset/views/utils.py +++ b/superset/views/utils.py @@ -321,18 +321,7 @@ def get_dashboard(dashboard_id_or_slug: str,) -> Dashboard: return dashboard -def get_datasources_from_dashboard( - dashboard: Dashboard, -) -> DefaultDict[Any, List[Any]]: - datasources = defaultdict(list) - for slc in dashboard.slices: - datasource = slc.datasource - if datasource: - datasources[datasource].append(slc) - return datasources - - -def get_dashboard_latest_changed_on(_self: Any, dashboard_id_or_slug: str) -> datetime: +def get_dashboard_changedon_dt(_self: Any, dashboard_id_or_slug: str) -> datetime: """ Get latest changed datetime for a dashboard. The change could be dashboard metadata change, or any of its slice data change. @@ -343,7 +332,8 @@ def get_dashboard_latest_changed_on(_self: Any, dashboard_id_or_slug: str) -> da dash = get_dashboard(dashboard_id_or_slug) dash_changed_on = dash.changed_on slices_changed_on = max([s.changed_on for s in dash.slices]) - return max(dash_changed_on, slices_changed_on) + # drop microseconds in datetime to match with last_modified header + return max(dash_changed_on, slices_changed_on).replace(microsecond=0) def get_dashboard_extra_filters( @@ -547,7 +537,7 @@ def check_dashboard_perms(_self: Any, dashboard_id_or_slug: str) -> None: """ dash = get_dashboard(dashboard_id_or_slug) - datasources = get_datasources_from_dashboard(dash) + datasources = list(dash.datasources) if app.config["ENABLE_ACCESS_REQUEST"]: for datasource in datasources: if datasource and not security_manager.can_access_datasource(datasource): diff --git a/tests/utils_tests.py b/tests/utils_tests.py index 035a7864feb20..c63fcc119328d 100644 --- a/tests/utils_tests.py +++ b/tests/utils_tests.py @@ -67,6 +67,7 @@ from superset.utils import schema from superset.views.utils import ( build_extra_filters, + get_dashboard_changedon_dt, get_form_data, get_time_range_endpoints, ) @@ -1134,3 +1135,14 @@ def test_get_form_data_token(self): assert get_form_data_token({"token": "token_abcdefg1"}) == "token_abcdefg1" generated_token = get_form_data_token({}) assert re.match(r"^token_[a-z0-9]{8}$", generated_token) is not None + + def test_get_dashboard_changedon_dt(self) -> None: + slug = "world_health" + dashboard = db.session.query(Dashboard).filter_by(slug=slug).one() + dashboard_last_changedon = dashboard.changed_on + slices = dashboard.slices + slices_last_changedon = max([slc.changed_on for slc in slices]) + # drop microsecond in datetime + assert get_dashboard_changedon_dt(self, slug) == max( + dashboard_last_changedon, slices_last_changedon + ).replace(microsecond=0)