diff --git a/superset/common/query_context_processor.py b/superset/common/query_context_processor.py index ecb8db4246f6b..6553c19807ee2 100644 --- a/superset/common/query_context_processor.py +++ b/superset/common/query_context_processor.py @@ -37,7 +37,7 @@ from superset.common.utils.query_cache_manager import QueryCacheManager from superset.common.utils.time_range_utils import get_since_until_from_query_object from superset.connectors.base.models import BaseDatasource -from superset.constants import CacheRegion +from superset.constants import CacheRegion, TimeGrain from superset.exceptions import ( InvalidPostProcessingError, QueryObjectValidationError, @@ -74,6 +74,27 @@ stats_logger: BaseStatsLogger = config["STATS_LOGGER"] logger = logging.getLogger(__name__) +# Temporary column used for joining aggregated offset results +AGGREGATED_JOIN_COLUMN = "__aggregated_join_column" + +# This only includes time grains that may influence +# the temporal column used for joining offset results. +# Given that we don't allow time shifts smaller than a day, +# we don't need to include smaller time grains aggregations. +AGGREGATED_JOIN_GRAINS = { + TimeGrain.WEEK, + TimeGrain.WEEK_STARTING_SUNDAY, + TimeGrain.WEEK_STARTING_MONDAY, + TimeGrain.WEEK_ENDING_SATURDAY, + TimeGrain.WEEK_ENDING_SUNDAY, + TimeGrain.MONTH, + TimeGrain.QUARTER, + TimeGrain.YEAR, +} + +# Right suffix used for joining offset results +R_SUFFIX = "__right_suffix" + class CachedTimeOffset(TypedDict): df: pd.DataFrame @@ -89,10 +110,6 @@ class QueryContextProcessor: _query_context: QueryContext _qc_datasource: BaseDatasource - """ - The query context contains the query object and additional fields necessary - to retrieve the data payload for a given viz. - """ def __init__(self, query_context: QueryContext): self._query_context = query_context @@ -307,6 +324,35 @@ def _get_timestamp_format( return df + @staticmethod + def get_time_grain(query_object: QueryObject) -> Any | None: + if ( + query_object.columns + and len(query_object.columns) > 0 + and isinstance(query_object.columns[0], dict) + ): + # If the time grain is in the columns it will be the first one + # and it will be of AdhocColumn type + return query_object.columns[0].get("timeGrain") + + return query_object.extras.get("time_grain_sqla") + + def add_aggregated_join_column( + self, + df: pd.DataFrame, + time_grain: str, + join_column_producer: Any = None, + ) -> None: + if join_column_producer: + df[AGGREGATED_JOIN_COLUMN] = df.apply( + lambda row: join_column_producer(row, 0), axis=1 + ) + else: + df[AGGREGATED_JOIN_COLUMN] = df.apply( + lambda row: self.get_aggregated_join_column(row, 0, time_grain), + axis=1, + ) + def processing_time_offsets( # pylint: disable=too-many-locals,too-many-statements self, df: pd.DataFrame, @@ -317,9 +363,8 @@ def processing_time_offsets( # pylint: disable=too-many-locals,too-many-stateme query_object_clone = copy.copy(query_object) queries: list[str] = [] cache_keys: list[str | None] = [] - rv_dfs: list[pd.DataFrame] = [df] + offset_dfs: list[pd.DataFrame] = [] - time_offsets = query_object.time_offsets outer_from_dttm, outer_to_dttm = get_since_until_from_query_object(query_object) if not outer_from_dttm or not outer_to_dttm: raise QueryObjectValidationError( @@ -328,7 +373,31 @@ def processing_time_offsets( # pylint: disable=too-many-locals,too-many-stateme "when using a Time Comparison." ) ) - for offset in time_offsets: + + columns = df.columns + time_grain = self.get_time_grain(query_object) + + if not time_grain: + raise QueryObjectValidationError( + _("Time Grain must be specified when using Time Shift.") + ) + + join_column_producer = config["TIME_GRAIN_JOIN_COLUMN_PRODUCERS"].get( + time_grain + ) + use_aggregated_join_column = ( + join_column_producer or time_grain in AGGREGATED_JOIN_GRAINS + ) + if use_aggregated_join_column: + self.add_aggregated_join_column(df, time_grain, join_column_producer) + # skips the first column which is the temporal column + # because we'll use the aggregated join columns instead + columns = df.columns[1:] + + metric_names = get_metric_names(query_object.metrics) + join_keys = [col for col in columns if col not in metric_names] + + for offset in query_object.time_offsets: try: # pylint: disable=line-too-long # Since the xaxis is also a column name for the time filter, xaxis_label will be set as granularity @@ -364,13 +433,15 @@ def processing_time_offsets( # pylint: disable=too-many-locals,too-many-stateme ] # `offset` is added to the hash function - cache_key = self.query_cache_key(query_object_clone, time_offset=offset) + cache_key = self.query_cache_key( + query_object_clone, time_offset=offset, time_grain=time_grain + ) cache = QueryCacheManager.get( cache_key, CacheRegion.DATA, query_context.force ) # whether hit on the cache if cache.is_loaded: - rv_dfs.append(cache.df) + offset_dfs.append(cache.df) queries.append(cache.query) cache_keys.append(cache_key) continue @@ -379,11 +450,8 @@ def processing_time_offsets( # pylint: disable=too-many-locals,too-many-stateme # rename metrics: SUM(value) => SUM(value) 1 year ago metrics_mapping = { metric: TIME_COMPARISON.join([metric, offset]) - for metric in get_metric_names( - query_object_clone_dct.get("metrics", []) - ) + for metric in metric_names } - join_keys = [col for col in df.columns if col not in metrics_mapping.keys()] if isinstance(self._qc_datasource, Query): result = self._qc_datasource.exc_query(query_object_clone_dct) @@ -420,21 +488,19 @@ def processing_time_offsets( # pylint: disable=too-many-locals,too-many-stateme ) ) + # modifies temporal column using offset offset_metrics_df[index] = offset_metrics_df[index] - DateOffset( **normalize_time_delta(offset) ) - # df left join `offset_metrics_df` - offset_df = dataframe_utils.left_join_df( - left_df=df, - right_df=offset_metrics_df, - join_keys=join_keys, - ) - offset_slice = offset_df[metrics_mapping.values()] + if use_aggregated_join_column: + self.add_aggregated_join_column( + offset_metrics_df, time_grain, join_column_producer + ) - # set offset_slice to cache and stack. + # cache df and query value = { - "df": offset_slice, + "df": offset_metrics_df, "query": result.query, } cache.set( @@ -444,10 +510,51 @@ def processing_time_offsets( # pylint: disable=too-many-locals,too-many-stateme datasource_uid=query_context.datasource.uid, region=CacheRegion.DATA, ) - rv_dfs.append(offset_slice) + offset_dfs.append(offset_metrics_df) + + if offset_dfs: + # iterate on offset_dfs, left join each with df + for offset_df in offset_dfs: + df = dataframe_utils.left_join_df( + left_df=df, + right_df=offset_df, + join_keys=join_keys, + rsuffix=R_SUFFIX, + ) - rv_df = pd.concat(rv_dfs, axis=1, copy=False) if time_offsets else df - return CachedTimeOffset(df=rv_df, queries=queries, cache_keys=cache_keys) + # removes columns used for join + df.drop( + list(df.filter(regex=f"{AGGREGATED_JOIN_COLUMN}|{R_SUFFIX}")), + axis=1, + inplace=True, + ) + + return CachedTimeOffset(df=df, queries=queries, cache_keys=cache_keys) + + @staticmethod + def get_aggregated_join_column( + row: pd.Series, column_index: int, time_grain: str + ) -> str: + if time_grain in ( + TimeGrain.WEEK_STARTING_SUNDAY, + TimeGrain.WEEK_ENDING_SATURDAY, + ): + return row[column_index].strftime("%Y-W%U") + + if time_grain in ( + TimeGrain.WEEK, + TimeGrain.WEEK_STARTING_MONDAY, + TimeGrain.WEEK_ENDING_SUNDAY, + ): + return row[column_index].strftime("%Y-W%W") + + if time_grain == TimeGrain.MONTH: + return row[column_index].strftime("%Y-%m") + + if time_grain == TimeGrain.QUARTER: + return row[column_index].strftime("%Y-Q") + str(row[column_index].quarter) + + return row[column_index].strftime("%Y") def get_data(self, df: pd.DataFrame) -> str | list[dict[str, Any]]: if self._query_context.result_format in ChartDataResultFormat.table_like(): diff --git a/superset/common/utils/dataframe_utils.py b/superset/common/utils/dataframe_utils.py index a3421f6431061..7772ec58509bb 100644 --- a/superset/common/utils/dataframe_utils.py +++ b/superset/common/utils/dataframe_utils.py @@ -30,8 +30,12 @@ def left_join_df( left_df: pd.DataFrame, right_df: pd.DataFrame, join_keys: list[str], + lsuffix: str = "", + rsuffix: str = "", ) -> pd.DataFrame: - df = left_df.set_index(join_keys).join(right_df.set_index(join_keys)) + df = left_df.set_index(join_keys).join( + right_df.set_index(join_keys), lsuffix=lsuffix, rsuffix=rsuffix + ) df.reset_index(inplace=True) return df diff --git a/superset/config.py b/superset/config.py index 434456386d932..f9fdf3cdd5122 100644 --- a/superset/config.py +++ b/superset/config.py @@ -41,6 +41,7 @@ from dateutil import tz from flask import Blueprint from flask_appbuilder.security.manager import AUTH_DB +from pandas import Series from pandas._libs.parsers import STR_NA_VALUES # pylint: disable=no-name-in-module from sqlalchemy.orm.query import Query @@ -773,6 +774,17 @@ class D3Format(TypedDict, total=False): # } TIME_GRAIN_ADDON_EXPRESSIONS: dict[str, dict[str, str]] = {} +# Map of custom time grains and artificial join column producers used +# when generating the join key between results and time shifts. +# See supeset/common/query_context_processor.get_aggregated_join_column +# +# Example of a join column producer that aggregates by fiscal year +# def join_producer(row: Series, column_index: int) -> str: +# return row[index].strftime("%F") +# +# TIME_GRAIN_JOIN_COLUMN_PRODUCERS = {"P1F": join_producer} +TIME_GRAIN_JOIN_COLUMN_PRODUCERS: dict[str, Callable[[Series, int], str]] = {} + # --------------------------------------------------- # List of viz_types not allowed in your environment # For example: Disable pivot table and treemap: diff --git a/superset/constants.py b/superset/constants.py index e4bad9f8aa728..89e5a2eeed2c1 100644 --- a/superset/constants.py +++ b/superset/constants.py @@ -186,6 +186,30 @@ class RouteMethod: # pylint: disable=too-few-public-methods ) +class TimeGrain(str, Enum): + SECOND = "PT1S" + FIVE_SECONDS = "PT5S" + THIRTY_SECONDS = "PT30S" + MINUTE = "PT1M" + FIVE_MINUTES = "PT5M" + TEN_MINUTES = "PT10M" + FIFTEEN_MINUTES = "PT15M" + THIRTY_MINUTES = "PT30M" + HALF_HOUR = "PT0.5H" + HOUR = "PT1H" + SIX_HOURS = "PT6H" + DAY = "P1D" + WEEK = "P1W" + WEEK_STARTING_SUNDAY = "1969-12-28T00:00:00Z/P1W" + WEEK_STARTING_MONDAY = "1969-12-29T00:00:00Z/P1W" + WEEK_ENDING_SATURDAY = "P1W/1970-01-03T00:00:00Z" + WEEK_ENDING_SUNDAY = "P1W/1970-01-04T00:00:00Z" + MONTH = "P1M" + QUARTER = "P3M" + QUARTER_YEAR = "P0.25Y" + YEAR = "P1Y" + + class PandasAxis(int, Enum): ROW = 0 COLUMN = 1 diff --git a/superset/db_engine_specs/ascend.py b/superset/db_engine_specs/ascend.py index 759b78ad1c928..6d3d7a496e13a 100644 --- a/superset/db_engine_specs/ascend.py +++ b/superset/db_engine_specs/ascend.py @@ -16,6 +16,7 @@ # under the License. from sqlalchemy.dialects import registry +from superset.constants import TimeGrain from superset.db_engine_specs.impala import ImpalaEngineSpec @@ -29,12 +30,12 @@ class AscendEngineSpec(ImpalaEngineSpec): _time_grain_expressions = { None: "{col}", - "PT1S": "DATE_TRUNC('second', {col})", - "PT1M": "DATE_TRUNC('minute', {col})", - "PT1H": "DATE_TRUNC('hour', {col})", - "P1D": "DATE_TRUNC('day', {col})", - "P1W": "DATE_TRUNC('week', {col})", - "P1M": "DATE_TRUNC('month', {col})", - "P3M": "DATE_TRUNC('quarter', {col})", - "P1Y": "DATE_TRUNC('year', {col})", + TimeGrain.SECOND: "DATE_TRUNC('second', {col})", + TimeGrain.MINUTE: "DATE_TRUNC('minute', {col})", + TimeGrain.HOUR: "DATE_TRUNC('hour', {col})", + TimeGrain.DAY: "DATE_TRUNC('day', {col})", + TimeGrain.WEEK: "DATE_TRUNC('week', {col})", + TimeGrain.MONTH: "DATE_TRUNC('month', {col})", + TimeGrain.QUARTER: "DATE_TRUNC('quarter', {col})", + TimeGrain.YEAR: "DATE_TRUNC('year', {col})", } diff --git a/superset/db_engine_specs/athena.py b/superset/db_engine_specs/athena.py index ad6bed113da87..c62a2455f68fa 100644 --- a/superset/db_engine_specs/athena.py +++ b/superset/db_engine_specs/athena.py @@ -22,6 +22,7 @@ from flask_babel import gettext as __ from sqlalchemy import types +from superset.constants import TimeGrain from superset.db_engine_specs.base import BaseEngineSpec from superset.errors import SupersetErrorType @@ -38,17 +39,17 @@ class AthenaEngineSpec(BaseEngineSpec): _time_grain_expressions = { None: "{col}", - "PT1S": "date_trunc('second', CAST({col} AS TIMESTAMP))", - "PT1M": "date_trunc('minute', CAST({col} AS TIMESTAMP))", - "PT1H": "date_trunc('hour', CAST({col} AS TIMESTAMP))", - "P1D": "date_trunc('day', CAST({col} AS TIMESTAMP))", - "P1W": "date_trunc('week', CAST({col} AS TIMESTAMP))", - "P1M": "date_trunc('month', CAST({col} AS TIMESTAMP))", - "P3M": "date_trunc('quarter', CAST({col} AS TIMESTAMP))", - "P1Y": "date_trunc('year', CAST({col} AS TIMESTAMP))", - "P1W/1970-01-03T00:00:00Z": "date_add('day', 5, date_trunc('week', \ + TimeGrain.SECOND: "date_trunc('second', CAST({col} AS TIMESTAMP))", + TimeGrain.MINUTE: "date_trunc('minute', CAST({col} AS TIMESTAMP))", + TimeGrain.HOUR: "date_trunc('hour', CAST({col} AS TIMESTAMP))", + TimeGrain.DAY: "date_trunc('day', CAST({col} AS TIMESTAMP))", + TimeGrain.WEEK: "date_trunc('week', CAST({col} AS TIMESTAMP))", + TimeGrain.MONTH: "date_trunc('month', CAST({col} AS TIMESTAMP))", + TimeGrain.QUARTER: "date_trunc('quarter', CAST({col} AS TIMESTAMP))", + TimeGrain.YEAR: "date_trunc('year', CAST({col} AS TIMESTAMP))", + TimeGrain.WEEK_ENDING_SATURDAY: "date_add('day', 5, date_trunc('week', \ date_add('day', 1, CAST({col} AS TIMESTAMP))))", - "1969-12-28T00:00:00Z/P1W": "date_add('day', -1, date_trunc('week', \ + TimeGrain.WEEK_STARTING_SUNDAY: "date_add('day', -1, date_trunc('week', \ date_add('day', 1, CAST({col} AS TIMESTAMP))))", } diff --git a/superset/db_engine_specs/base.py b/superset/db_engine_specs/base.py index ef922a5e63a44..01d878ce0c54d 100644 --- a/superset/db_engine_specs/base.py +++ b/superset/db_engine_specs/base.py @@ -49,6 +49,7 @@ from typing_extensions import TypedDict from superset import security_manager, sql_parse +from superset.constants import TimeGrain as TimeGrainConstants from superset.databases.utils import make_url_safe from superset.errors import ErrorLevel, SupersetError, SupersetErrorType from superset.sql_parse import ParsedQuery, Table @@ -80,25 +81,25 @@ class TimeGrain(NamedTuple): builtin_time_grains: dict[str | None, str] = { - "PT1S": __("Second"), - "PT5S": __("5 second"), - "PT30S": __("30 second"), - "PT1M": __("Minute"), - "PT5M": __("5 minute"), - "PT10M": __("10 minute"), - "PT15M": __("15 minute"), - "PT30M": __("30 minute"), - "PT1H": __("Hour"), - "PT6H": __("6 hour"), - "P1D": __("Day"), - "P1W": __("Week"), - "P1M": __("Month"), - "P3M": __("Quarter"), - "P1Y": __("Year"), - "1969-12-28T00:00:00Z/P1W": __("Week starting Sunday"), - "1969-12-29T00:00:00Z/P1W": __("Week starting Monday"), - "P1W/1970-01-03T00:00:00Z": __("Week ending Saturday"), - "P1W/1970-01-04T00:00:00Z": __("Week_ending Sunday"), + TimeGrainConstants.SECOND: __("Second"), + TimeGrainConstants.FIVE_SECONDS: __("5 second"), + TimeGrainConstants.THIRTY_SECONDS: __("30 second"), + TimeGrainConstants.MINUTE: __("Minute"), + TimeGrainConstants.FIVE_MINUTES: __("5 minute"), + TimeGrainConstants.TEN_MINUTES: __("10 minute"), + TimeGrainConstants.FIFTEEN_MINUTES: __("15 minute"), + TimeGrainConstants.THIRTY_MINUTES: __("30 minute"), + TimeGrainConstants.HOUR: __("Hour"), + TimeGrainConstants.SIX_HOURS: __("6 hour"), + TimeGrainConstants.DAY: __("Day"), + TimeGrainConstants.WEEK: __("Week"), + TimeGrainConstants.MONTH: __("Month"), + TimeGrainConstants.QUARTER: __("Quarter"), + TimeGrainConstants.YEAR: __("Year"), + TimeGrainConstants.WEEK_STARTING_SUNDAY: __("Week starting Sunday"), + TimeGrainConstants.WEEK_STARTING_MONDAY: __("Week starting Monday"), + TimeGrainConstants.WEEK_ENDING_SATURDAY: __("Week ending Saturday"), + TimeGrainConstants.WEEK_ENDING_SUNDAY: __("Week_ending Sunday"), } diff --git a/superset/db_engine_specs/bigquery.py b/superset/db_engine_specs/bigquery.py index 3b62f4bbb809c..e69194f50f705 100644 --- a/superset/db_engine_specs/bigquery.py +++ b/superset/db_engine_specs/bigquery.py @@ -35,7 +35,7 @@ from typing_extensions import TypedDict from superset import sql_parse -from superset.constants import PASSWORD_MASK +from superset.constants import PASSWORD_MASK, TimeGrain from superset.databases.schemas import encrypted_field_properties, EncryptedString from superset.databases.utils import make_url_safe from superset.db_engine_specs.base import BaseEngineSpec, BasicPropertiesType @@ -147,31 +147,31 @@ class BigQueryEngineSpec(BaseEngineSpec): # pylint: disable=too-many-public-met _time_grain_expressions = { None: "{col}", - "PT1S": "CAST(TIMESTAMP_SECONDS(" + TimeGrain.SECOND: "CAST(TIMESTAMP_SECONDS(" "UNIX_SECONDS(CAST({col} AS TIMESTAMP))" ") AS {type})", - "PT1M": "CAST(TIMESTAMP_SECONDS(" + TimeGrain.MINUTE: "CAST(TIMESTAMP_SECONDS(" "60 * DIV(UNIX_SECONDS(CAST({col} AS TIMESTAMP)), 60)" ") AS {type})", - "PT5M": "CAST(TIMESTAMP_SECONDS(" + TimeGrain.FIVE_MINUTES: "CAST(TIMESTAMP_SECONDS(" "5*60 * DIV(UNIX_SECONDS(CAST({col} AS TIMESTAMP)), 5*60)" ") AS {type})", - "PT10M": "CAST(TIMESTAMP_SECONDS(" + TimeGrain.TEN_MINUTES: "CAST(TIMESTAMP_SECONDS(" "10*60 * DIV(UNIX_SECONDS(CAST({col} AS TIMESTAMP)), 10*60)" ") AS {type})", - "PT15M": "CAST(TIMESTAMP_SECONDS(" + TimeGrain.FIFTEEN_MINUTES: "CAST(TIMESTAMP_SECONDS(" "15*60 * DIV(UNIX_SECONDS(CAST({col} AS TIMESTAMP)), 15*60)" ") AS {type})", - "PT30M": "CAST(TIMESTAMP_SECONDS(" + TimeGrain.THIRTY_MINUTES: "CAST(TIMESTAMP_SECONDS(" "30*60 * DIV(UNIX_SECONDS(CAST({col} AS TIMESTAMP)), 30*60)" ") AS {type})", - "PT1H": "{func}({col}, HOUR)", - "P1D": "{func}({col}, DAY)", - "P1W": "{func}({col}, WEEK)", - "1969-12-29T00:00:00Z/P1W": "{func}({col}, ISOWEEK)", - "P1M": "{func}({col}, MONTH)", - "P3M": "{func}({col}, QUARTER)", - "P1Y": "{func}({col}, YEAR)", + TimeGrain.HOUR: "{func}({col}, HOUR)", + TimeGrain.DAY: "{func}({col}, DAY)", + TimeGrain.WEEK: "{func}({col}, WEEK)", + TimeGrain.WEEK_STARTING_MONDAY: "{func}({col}, ISOWEEK)", + TimeGrain.MONTH: "{func}({col}, MONTH)", + TimeGrain.QUARTER: "{func}({col}, QUARTER)", + TimeGrain.YEAR: "{func}({col}, YEAR)", } custom_errors: dict[Pattern[str], tuple[str, SupersetErrorType, dict[str, Any]]] = { diff --git a/superset/db_engine_specs/crate.py b/superset/db_engine_specs/crate.py index d8d91c67962d6..46ce1e08ff24e 100644 --- a/superset/db_engine_specs/crate.py +++ b/superset/db_engine_specs/crate.py @@ -21,6 +21,7 @@ from sqlalchemy import types +from superset.constants import TimeGrain from superset.db_engine_specs.base import BaseEngineSpec if TYPE_CHECKING: @@ -33,14 +34,14 @@ class CrateEngineSpec(BaseEngineSpec): _time_grain_expressions = { None: "{col}", - "PT1S": "DATE_TRUNC('second', {col})", - "PT1M": "DATE_TRUNC('minute', {col})", - "PT1H": "DATE_TRUNC('hour', {col})", - "P1D": "DATE_TRUNC('day', {col})", - "P1W": "DATE_TRUNC('week', {col})", - "P1M": "DATE_TRUNC('month', {col})", - "P3M": "DATE_TRUNC('quarter', {col})", - "P1Y": "DATE_TRUNC('year', {col})", + TimeGrain.SECOND: "DATE_TRUNC('second', {col})", + TimeGrain.MINUTE: "DATE_TRUNC('minute', {col})", + TimeGrain.HOUR: "DATE_TRUNC('hour', {col})", + TimeGrain.DAY: "DATE_TRUNC('day', {col})", + TimeGrain.WEEK: "DATE_TRUNC('week', {col})", + TimeGrain.MONTH: "DATE_TRUNC('month', {col})", + TimeGrain.QUARTER: "DATE_TRUNC('quarter', {col})", + TimeGrain.YEAR: "DATE_TRUNC('year', {col})", } @classmethod diff --git a/superset/db_engine_specs/databricks.py b/superset/db_engine_specs/databricks.py index 5df24be65d6b5..56ce47772f589 100644 --- a/superset/db_engine_specs/databricks.py +++ b/superset/db_engine_specs/databricks.py @@ -14,10 +14,11 @@ # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. +from __future__ import annotations import json from datetime import datetime -from typing import Any, Optional, TYPE_CHECKING +from typing import Any, TYPE_CHECKING from apispec import APISpec from apispec.ext.marshmallow import MarshmallowPlugin @@ -28,7 +29,7 @@ from sqlalchemy.engine.url import URL from typing_extensions import TypedDict -from superset.constants import USER_AGENT +from superset.constants import TimeGrain, USER_AGENT from superset.databases.utils import make_url_safe from superset.db_engine_specs.base import BaseEngineSpec, BasicParametersMixin from superset.db_engine_specs.hive import HiveEngineSpec @@ -39,6 +40,7 @@ from superset.models.core import Database +# class DatabricksParametersSchema(Schema): """ This is the list of fields that are expected @@ -93,20 +95,20 @@ class DatabricksPropertiesType(TypedDict): extra: str -time_grain_expressions = { +time_grain_expressions: dict[str | None, str] = { None: "{col}", - "PT1S": "date_trunc('second', {col})", - "PT1M": "date_trunc('minute', {col})", - "PT1H": "date_trunc('hour', {col})", - "P1D": "date_trunc('day', {col})", - "P1W": "date_trunc('week', {col})", - "P1M": "date_trunc('month', {col})", - "P3M": "date_trunc('quarter', {col})", - "P1Y": "date_trunc('year', {col})", - "P1W/1970-01-03T00:00:00Z": ( + TimeGrain.SECOND: "date_trunc('second', {col})", + TimeGrain.MINUTE: "date_trunc('minute', {col})", + TimeGrain.HOUR: "date_trunc('hour', {col})", + TimeGrain.DAY: "date_trunc('day', {col})", + TimeGrain.WEEK: "date_trunc('week', {col})", + TimeGrain.MONTH: "date_trunc('month', {col})", + TimeGrain.QUARTER: "date_trunc('quarter', {col})", + TimeGrain.YEAR: "date_trunc('year', {col})", + TimeGrain.WEEK_ENDING_SATURDAY: ( "date_trunc('week', {col} + interval '1 day') + interval '5 days'" ), - "1969-12-28T00:00:00Z/P1W": ( + TimeGrain.WEEK_STARTING_SUNDAY: ( "date_trunc('week', {col} + interval '1 day') - interval '1 day'" ), } @@ -135,8 +137,8 @@ class DatabricksODBCEngineSpec(BaseEngineSpec): @classmethod def convert_dttm( - cls, target_type: str, dttm: datetime, db_extra: Optional[dict[str, Any]] = None - ) -> Optional[str]: + cls, target_type: str, dttm: datetime, db_extra: dict[str, Any] | None = None + ) -> str | None: return HiveEngineSpec.convert_dttm(target_type, dttm, db_extra=db_extra) @classmethod @@ -160,7 +162,7 @@ class DatabricksNativeEngineSpec(DatabricksODBCEngineSpec, BasicParametersMixin) encryption_parameters = {"ssl": "1"} @staticmethod - def get_extra_params(database: "Database") -> dict[str, Any]: + def get_extra_params(database: Database) -> dict[str, Any]: """ Add a user agent to be used in the requests. Trim whitespace from connect_args to avoid databricks driver errors @@ -181,9 +183,9 @@ def get_extra_params(database: "Database") -> dict[str, Any]: @classmethod def get_table_names( cls, - database: "Database", + database: Database, inspector: Inspector, - schema: Optional[str], + schema: str | None, ) -> set[str]: return super().get_table_names( database, inspector, schema @@ -213,7 +215,7 @@ def build_sqlalchemy_uri( # type: ignore @classmethod def extract_errors( - cls, ex: Exception, context: Optional[dict[str, Any]] = None + cls, ex: Exception, context: dict[str, Any] | None = None ) -> list[SupersetError]: raw_message = cls._extract_error_message(ex) diff --git a/superset/db_engine_specs/db2.py b/superset/db_engine_specs/db2.py index 45241b3d89adb..5f54613a4b533 100644 --- a/superset/db_engine_specs/db2.py +++ b/superset/db_engine_specs/db2.py @@ -14,6 +14,7 @@ # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. +from superset.constants import TimeGrain from superset.db_engine_specs.base import BaseEngineSpec, LimitMethod @@ -27,25 +28,25 @@ class Db2EngineSpec(BaseEngineSpec): _time_grain_expressions = { None: "{col}", - "PT1S": "CAST({col} as TIMESTAMP) - MICROSECOND({col}) MICROSECONDS", - "PT1M": "CAST({col} as TIMESTAMP)" + TimeGrain.SECOND: "CAST({col} as TIMESTAMP) - MICROSECOND({col}) MICROSECONDS", + TimeGrain.MINUTE: "CAST({col} as TIMESTAMP)" " - SECOND({col}) SECONDS" " - MICROSECOND({col}) MICROSECONDS", - "PT1H": "CAST({col} as TIMESTAMP)" + TimeGrain.HOUR: "CAST({col} as TIMESTAMP)" " - MINUTE({col}) MINUTES" " - SECOND({col}) SECONDS" " - MICROSECOND({col}) MICROSECONDS ", - "P1D": "CAST({col} as TIMESTAMP)" + TimeGrain.DAY: "CAST({col} as TIMESTAMP)" " - HOUR({col}) HOURS" " - MINUTE({col}) MINUTES" " - SECOND({col}) SECONDS" " - MICROSECOND({col}) MICROSECONDS", - "P1W": "{col} - (DAYOFWEEK({col})) DAYS", - "P1M": "{col} - (DAY({col})-1) DAYS", - "P3M": "{col} - (DAY({col})-1) DAYS" + TimeGrain.WEEK: "{col} - (DAYOFWEEK({col})) DAYS", + TimeGrain.MONTH: "{col} - (DAY({col})-1) DAYS", + TimeGrain.QUARTER: "{col} - (DAY({col})-1) DAYS" " - (MONTH({col})-1) MONTHS" " + ((QUARTER({col})-1) * 3) MONTHS", - "P1Y": "{col} - (DAY({col})-1) DAYS - (MONTH({col})-1) MONTHS", + TimeGrain.YEAR: "{col} - (DAY({col})-1) DAYS - (MONTH({col})-1) MONTHS", } @classmethod diff --git a/superset/db_engine_specs/dremio.py b/superset/db_engine_specs/dremio.py index 7b4c0458cd1a7..2288c5257248c 100644 --- a/superset/db_engine_specs/dremio.py +++ b/superset/db_engine_specs/dremio.py @@ -19,6 +19,7 @@ from sqlalchemy import types +from superset.constants import TimeGrain from superset.db_engine_specs.base import BaseEngineSpec @@ -30,14 +31,14 @@ class DremioEngineSpec(BaseEngineSpec): _time_grain_expressions = { None: "{col}", - "PT1S": "DATE_TRUNC('second', {col})", - "PT1M": "DATE_TRUNC('minute', {col})", - "PT1H": "DATE_TRUNC('hour', {col})", - "P1D": "DATE_TRUNC('day', {col})", - "P1W": "DATE_TRUNC('week', {col})", - "P1M": "DATE_TRUNC('month', {col})", - "P3M": "DATE_TRUNC('quarter', {col})", - "P1Y": "DATE_TRUNC('year', {col})", + TimeGrain.SECOND: "DATE_TRUNC('second', {col})", + TimeGrain.MINUTE: "DATE_TRUNC('minute', {col})", + TimeGrain.HOUR: "DATE_TRUNC('hour', {col})", + TimeGrain.DAY: "DATE_TRUNC('day', {col})", + TimeGrain.WEEK: "DATE_TRUNC('week', {col})", + TimeGrain.MONTH: "DATE_TRUNC('month', {col})", + TimeGrain.QUARTER: "DATE_TRUNC('quarter', {col})", + TimeGrain.YEAR: "DATE_TRUNC('year', {col})", } @classmethod diff --git a/superset/db_engine_specs/drill.py b/superset/db_engine_specs/drill.py index 946544863dda7..fb42409b4e952 100644 --- a/superset/db_engine_specs/drill.py +++ b/superset/db_engine_specs/drill.py @@ -21,6 +21,7 @@ from sqlalchemy import types from sqlalchemy.engine.url import URL +from superset.constants import TimeGrain from superset.db_engine_specs.base import BaseEngineSpec from superset.db_engine_specs.exceptions import SupersetDBAPIProgrammingError @@ -36,16 +37,16 @@ class DrillEngineSpec(BaseEngineSpec): _time_grain_expressions = { None: "{col}", - "PT1S": "NEARESTDATE({col}, 'SECOND')", - "PT1M": "NEARESTDATE({col}, 'MINUTE')", - "PT15M": "NEARESTDATE({col}, 'QUARTER_HOUR')", - "PT30M": "NEARESTDATE({col}, 'HALF_HOUR')", - "PT1H": "NEARESTDATE({col}, 'HOUR')", - "P1D": "NEARESTDATE({col}, 'DAY')", - "P1W": "NEARESTDATE({col}, 'WEEK_SUNDAY')", - "P1M": "NEARESTDATE({col}, 'MONTH')", - "P3M": "NEARESTDATE({col}, 'QUARTER')", - "P1Y": "NEARESTDATE({col}, 'YEAR')", + TimeGrain.SECOND: "NEARESTDATE({col}, 'SECOND')", + TimeGrain.MINUTE: "NEARESTDATE({col}, 'MINUTE')", + TimeGrain.FIFTEEN_MINUTES: "NEARESTDATE({col}, 'QUARTER_HOUR')", + TimeGrain.THIRTY_MINUTES: "NEARESTDATE({col}, 'HALF_HOUR')", + TimeGrain.HOUR: "NEARESTDATE({col}, 'HOUR')", + TimeGrain.DAY: "NEARESTDATE({col}, 'DAY')", + TimeGrain.WEEK: "NEARESTDATE({col}, 'WEEK_SUNDAY')", + TimeGrain.MONTH: "NEARESTDATE({col}, 'MONTH')", + TimeGrain.QUARTER: "NEARESTDATE({col}, 'QUARTER')", + TimeGrain.YEAR: "NEARESTDATE({col}, 'YEAR')", } # Returns a function to convert a Unix timestamp in milliseconds to a date diff --git a/superset/db_engine_specs/druid.py b/superset/db_engine_specs/druid.py index 43ce310a4061e..478f3e9492c45 100644 --- a/superset/db_engine_specs/druid.py +++ b/superset/db_engine_specs/druid.py @@ -26,6 +26,7 @@ from sqlalchemy.engine.reflection import Inspector from superset import is_feature_enabled +from superset.constants import TimeGrain from superset.db_engine_specs.base import BaseEngineSpec from superset.db_engine_specs.exceptions import SupersetDBAPIConnectionError from superset.exceptions import SupersetException @@ -48,26 +49,26 @@ class DruidEngineSpec(BaseEngineSpec): _time_grain_expressions = { None: "{col}", - "PT1S": "TIME_FLOOR(CAST({col} AS TIMESTAMP), 'PT1S')", - "PT5S": "TIME_FLOOR(CAST({col} AS TIMESTAMP), 'PT5S')", - "PT30S": "TIME_FLOOR(CAST({col} AS TIMESTAMP), 'PT30S')", - "PT1M": "TIME_FLOOR(CAST({col} AS TIMESTAMP), 'PT1M')", - "PT5M": "TIME_FLOOR(CAST({col} AS TIMESTAMP), 'PT5M')", - "PT10M": "TIME_FLOOR(CAST({col} AS TIMESTAMP), 'PT10M')", - "PT15M": "TIME_FLOOR(CAST({col} AS TIMESTAMP), 'PT15M')", - "PT30M": "TIME_FLOOR(CAST({col} AS TIMESTAMP), 'PT30M')", - "PT1H": "TIME_FLOOR(CAST({col} AS TIMESTAMP), 'PT1H')", - "PT6H": "TIME_FLOOR(CAST({col} AS TIMESTAMP), 'PT6H')", - "P1D": "TIME_FLOOR(CAST({col} AS TIMESTAMP), 'P1D')", - "P1W": "TIME_FLOOR(CAST({col} AS TIMESTAMP), 'P1W')", - "P1M": "TIME_FLOOR(CAST({col} AS TIMESTAMP), 'P1M')", - "P3M": "TIME_FLOOR(CAST({col} AS TIMESTAMP), 'P3M')", - "P1Y": "TIME_FLOOR(CAST({col} AS TIMESTAMP), 'P1Y')", - "P1W/1970-01-03T00:00:00Z": ( + TimeGrain.SECOND: "TIME_FLOOR(CAST({col} AS TIMESTAMP), 'PT1S')", + TimeGrain.FIVE_SECONDS: "TIME_FLOOR(CAST({col} AS TIMESTAMP), 'PT5S')", + TimeGrain.THIRTY_SECONDS: "TIME_FLOOR(CAST({col} AS TIMESTAMP), 'PT30S')", + TimeGrain.MINUTE: "TIME_FLOOR(CAST({col} AS TIMESTAMP), 'PT1M')", + TimeGrain.FIVE_MINUTES: "TIME_FLOOR(CAST({col} AS TIMESTAMP), 'PT5M')", + TimeGrain.TEN_MINUTES: "TIME_FLOOR(CAST({col} AS TIMESTAMP), 'PT10M')", + TimeGrain.FIFTEEN_MINUTES: "TIME_FLOOR(CAST({col} AS TIMESTAMP), 'PT15M')", + TimeGrain.THIRTY_MINUTES: "TIME_FLOOR(CAST({col} AS TIMESTAMP), 'PT30M')", + TimeGrain.HOUR: "TIME_FLOOR(CAST({col} AS TIMESTAMP), 'PT1H')", + TimeGrain.SIX_HOURS: "TIME_FLOOR(CAST({col} AS TIMESTAMP), 'PT6H')", + TimeGrain.DAY: "TIME_FLOOR(CAST({col} AS TIMESTAMP), 'P1D')", + TimeGrain.WEEK: "TIME_FLOOR(CAST({col} AS TIMESTAMP), 'P1W')", + TimeGrain.MONTH: "TIME_FLOOR(CAST({col} AS TIMESTAMP), 'P1M')", + TimeGrain.QUARTER: "TIME_FLOOR(CAST({col} AS TIMESTAMP), 'P3M')", + TimeGrain.YEAR: "TIME_FLOOR(CAST({col} AS TIMESTAMP), 'P1Y')", + TimeGrain.WEEK_ENDING_SATURDAY: ( "TIME_SHIFT(TIME_FLOOR(TIME_SHIFT(CAST({col} AS TIMESTAMP), " "'P1D', 1), 'P1W'), 'P1D', 5)" ), - "1969-12-28T00:00:00Z/P1W": ( + TimeGrain.WEEK_STARTING_SUNDAY: ( "TIME_SHIFT(TIME_FLOOR(TIME_SHIFT(CAST({col} AS TIMESTAMP), " "'P1D', 1), 'P1W'), 'P1D', -1)" ), diff --git a/superset/db_engine_specs/duckdb.py b/superset/db_engine_specs/duckdb.py index 3bbf9ecc3834d..fa2f01f50a516 100644 --- a/superset/db_engine_specs/duckdb.py +++ b/superset/db_engine_specs/duckdb.py @@ -25,6 +25,7 @@ from sqlalchemy import types from sqlalchemy.engine.reflection import Inspector +from superset.constants import TimeGrain from superset.db_engine_specs.base import BaseEngineSpec from superset.errors import SupersetErrorType @@ -42,14 +43,14 @@ class DuckDBEngineSpec(BaseEngineSpec): _time_grain_expressions = { None: "{col}", - "PT1S": "DATE_TRUNC('second', {col})", - "PT1M": "DATE_TRUNC('minute', {col})", - "PT1H": "DATE_TRUNC('hour', {col})", - "P1D": "DATE_TRUNC('day', {col})", - "P1W": "DATE_TRUNC('week', {col})", - "P1M": "DATE_TRUNC('month', {col})", - "P3M": "DATE_TRUNC('quarter', {col})", - "P1Y": "DATE_TRUNC('year', {col})", + TimeGrain.SECOND: "DATE_TRUNC('second', {col})", + TimeGrain.MINUTE: "DATE_TRUNC('minute', {col})", + TimeGrain.HOUR: "DATE_TRUNC('hour', {col})", + TimeGrain.DAY: "DATE_TRUNC('day', {col})", + TimeGrain.WEEK: "DATE_TRUNC('week', {col})", + TimeGrain.MONTH: "DATE_TRUNC('month', {col})", + TimeGrain.QUARTER: "DATE_TRUNC('quarter', {col})", + TimeGrain.YEAR: "DATE_TRUNC('year', {col})", } custom_errors: dict[Pattern[str], tuple[str, SupersetErrorType, dict[str, Any]]] = { diff --git a/superset/db_engine_specs/dynamodb.py b/superset/db_engine_specs/dynamodb.py index 5f7a9e2b71e58..0a29f8d4ae9ce 100644 --- a/superset/db_engine_specs/dynamodb.py +++ b/superset/db_engine_specs/dynamodb.py @@ -19,6 +19,7 @@ from sqlalchemy import types +from superset.constants import TimeGrain from superset.db_engine_specs.base import BaseEngineSpec @@ -28,23 +29,24 @@ class DynamoDBEngineSpec(BaseEngineSpec): _time_grain_expressions = { None: "{col}", - "PT1S": "DATETIME(STRFTIME('%Y-%m-%dT%H:%M:%S', {col}))", - "PT1M": "DATETIME(STRFTIME('%Y-%m-%dT%H:%M:00', {col}))", - "PT1H": "DATETIME(STRFTIME('%Y-%m-%dT%H:00:00', {col}))", - "P1D": "DATETIME({col}, 'start of day')", - "P1W": "DATETIME({col}, 'start of day', -strftime('%w', {col}) || ' days')", - "P1M": "DATETIME({col}, 'start of month')", - "P3M": ( + TimeGrain.SECOND: "DATETIME(STRFTIME('%Y-%m-%dT%H:%M:%S', {col}))", + TimeGrain.MINUTE: "DATETIME(STRFTIME('%Y-%m-%dT%H:%M:00', {col}))", + TimeGrain.HOUR: "DATETIME(STRFTIME('%Y-%m-%dT%H:00:00', {col}))", + TimeGrain.DAY: "DATETIME({col}, 'start of day')", + TimeGrain.WEEK: "DATETIME({col}, 'start of day', \ + -strftime('%w', {col}) || ' days')", + TimeGrain.MONTH: "DATETIME({col}, 'start of month')", + TimeGrain.QUARTER: ( "DATETIME({col}, 'start of month', " "printf('-%d month', (strftime('%m', {col}) - 1) % 3))" ), - "P1Y": "DATETIME({col}, 'start of year')", - "P1W/1970-01-03T00:00:00Z": "DATETIME({col}, 'start of day', 'weekday 6')", - "P1W/1970-01-04T00:00:00Z": "DATETIME({col}, 'start of day', 'weekday 0')", - "1969-12-28T00:00:00Z/P1W": ( + TimeGrain.YEAR: "DATETIME({col}, 'start of year')", + TimeGrain.WEEK_ENDING_SATURDAY: "DATETIME({col}, 'start of day', 'weekday 6')", + TimeGrain.WEEK_ENDING_SUNDAY: "DATETIME({col}, 'start of day', 'weekday 0')", + TimeGrain.WEEK_STARTING_SUNDAY: ( "DATETIME({col}, 'start of day', 'weekday 0', '-7 days')" ), - "1969-12-29T00:00:00Z/P1W": ( + TimeGrain.WEEK_STARTING_MONDAY: ( "DATETIME({col}, 'start of day', 'weekday 1', '-7 days')" ), } diff --git a/superset/db_engine_specs/elasticsearch.py b/superset/db_engine_specs/elasticsearch.py index d717c52bf592a..173302d58b526 100644 --- a/superset/db_engine_specs/elasticsearch.py +++ b/superset/db_engine_specs/elasticsearch.py @@ -21,6 +21,7 @@ from packaging.version import Version from sqlalchemy import types +from superset.constants import TimeGrain from superset.db_engine_specs.base import BaseEngineSpec from superset.db_engine_specs.exceptions import ( SupersetDBAPIDatabaseError, @@ -42,12 +43,12 @@ class ElasticSearchEngineSpec(BaseEngineSpec): # pylint: disable=abstract-metho _time_grain_expressions = { None: "{col}", - "PT1S": "HISTOGRAM({col}, INTERVAL 1 SECOND)", - "PT1M": "HISTOGRAM({col}, INTERVAL 1 MINUTE)", - "PT1H": "HISTOGRAM({col}, INTERVAL 1 HOUR)", - "P1D": "HISTOGRAM({col}, INTERVAL 1 DAY)", - "P1M": "HISTOGRAM({col}, INTERVAL 1 MONTH)", - "P1Y": "HISTOGRAM({col}, INTERVAL 1 YEAR)", + TimeGrain.SECOND: "HISTOGRAM({col}, INTERVAL 1 SECOND)", + TimeGrain.MINUTE: "HISTOGRAM({col}, INTERVAL 1 MINUTE)", + TimeGrain.HOUR: "HISTOGRAM({col}, INTERVAL 1 HOUR)", + TimeGrain.DAY: "HISTOGRAM({col}, INTERVAL 1 DAY)", + TimeGrain.MONTH: "HISTOGRAM({col}, INTERVAL 1 MONTH)", + TimeGrain.YEAR: "HISTOGRAM({col}, INTERVAL 1 YEAR)", } type_code_map: dict[int, str] = {} # loaded from get_datatype only if needed @@ -104,12 +105,12 @@ class OpenDistroEngineSpec(BaseEngineSpec): # pylint: disable=abstract-method _time_grain_expressions = { None: "{col}", - "PT1S": "date_format({col}, 'yyyy-MM-dd HH:mm:ss.000')", - "PT1M": "date_format({col}, 'yyyy-MM-dd HH:mm:00.000')", - "PT1H": "date_format({col}, 'yyyy-MM-dd HH:00:00.000')", - "P1D": "date_format({col}, 'yyyy-MM-dd 00:00:00.000')", - "P1M": "date_format({col}, 'yyyy-MM-01 00:00:00.000')", - "P1Y": "date_format({col}, 'yyyy-01-01 00:00:00.000')", + TimeGrain.SECOND: "date_format({col}, 'yyyy-MM-dd HH:mm:ss.000')", + TimeGrain.MINUTE: "date_format({col}, 'yyyy-MM-dd HH:mm:00.000')", + TimeGrain.HOUR: "date_format({col}, 'yyyy-MM-dd HH:00:00.000')", + TimeGrain.DAY: "date_format({col}, 'yyyy-MM-dd 00:00:00.000')", + TimeGrain.MONTH: "date_format({col}, 'yyyy-MM-01 00:00:00.000')", + TimeGrain.YEAR: "date_format({col}, 'yyyy-01-01 00:00:00.000')", } engine = "odelasticsearch" diff --git a/superset/db_engine_specs/exasol.py b/superset/db_engine_specs/exasol.py index 6da56e2feee8f..e5f8c011d1616 100644 --- a/superset/db_engine_specs/exasol.py +++ b/superset/db_engine_specs/exasol.py @@ -16,6 +16,7 @@ # under the License. from typing import Any, Optional +from superset.constants import TimeGrain from superset.db_engine_specs.base import BaseEngineSpec @@ -29,14 +30,14 @@ class ExasolEngineSpec(BaseEngineSpec): # pylint: disable=abstract-method # Exasol's DATE_TRUNC function is PostgresSQL compatible _time_grain_expressions = { None: "{col}", - "PT1S": "DATE_TRUNC('second', {col})", - "PT1M": "DATE_TRUNC('minute', {col})", - "PT1H": "DATE_TRUNC('hour', {col})", - "P1D": "DATE_TRUNC('day', {col})", - "P1W": "DATE_TRUNC('week', {col})", - "P1M": "DATE_TRUNC('month', {col})", - "P3M": "DATE_TRUNC('quarter', {col})", - "P1Y": "DATE_TRUNC('year', {col})", + TimeGrain.SECOND: "DATE_TRUNC('second', {col})", + TimeGrain.MINUTE: "DATE_TRUNC('minute', {col})", + TimeGrain.HOUR: "DATE_TRUNC('hour', {col})", + TimeGrain.DAY: "DATE_TRUNC('day', {col})", + TimeGrain.WEEK: "DATE_TRUNC('week', {col})", + TimeGrain.MONTH: "DATE_TRUNC('month', {col})", + TimeGrain.QUARTER: "DATE_TRUNC('quarter', {col})", + TimeGrain.YEAR: "DATE_TRUNC('year', {col})", } @classmethod diff --git a/superset/db_engine_specs/firebird.py b/superset/db_engine_specs/firebird.py index 4448074157073..15c4bef7bf417 100644 --- a/superset/db_engine_specs/firebird.py +++ b/superset/db_engine_specs/firebird.py @@ -19,6 +19,7 @@ from sqlalchemy import types +from superset.constants import TimeGrain from superset.db_engine_specs.base import BaseEngineSpec, LimitMethod @@ -33,7 +34,7 @@ class FirebirdEngineSpec(BaseEngineSpec): _time_grain_expressions = { None: "{col}", - "PT1S": ( + TimeGrain.SECOND: ( "CAST(CAST({col} AS DATE) " "|| ' ' " "|| EXTRACT(HOUR FROM {col}) " @@ -42,7 +43,7 @@ class FirebirdEngineSpec(BaseEngineSpec): "|| ':' " "|| FLOOR(EXTRACT(SECOND FROM {col})) AS TIMESTAMP)" ), - "PT1M": ( + TimeGrain.MINUTE: ( "CAST(CAST({col} AS DATE) " "|| ' ' " "|| EXTRACT(HOUR FROM {col}) " @@ -50,20 +51,20 @@ class FirebirdEngineSpec(BaseEngineSpec): "|| EXTRACT(MINUTE FROM {col}) " "|| ':00' AS TIMESTAMP)" ), - "PT1H": ( + TimeGrain.HOUR: ( "CAST(CAST({col} AS DATE) " "|| ' ' " "|| EXTRACT(HOUR FROM {col}) " "|| ':00:00' AS TIMESTAMP)" ), - "P1D": "CAST({col} AS DATE)", - "P1M": ( + TimeGrain.DAY: "CAST({col} AS DATE)", + TimeGrain.MONTH: ( "CAST(EXTRACT(YEAR FROM {col}) " "|| '-' " "|| EXTRACT(MONTH FROM {col}) " "|| '-01' AS DATE)" ), - "P1Y": "CAST(EXTRACT(YEAR FROM {col}) || '-01-01' AS DATE)", + TimeGrain.YEAR: "CAST(EXTRACT(YEAR FROM {col}) || '-01-01' AS DATE)", } @classmethod diff --git a/superset/db_engine_specs/firebolt.py b/superset/db_engine_specs/firebolt.py index ace3d6b3b232e..13ab727ab099c 100644 --- a/superset/db_engine_specs/firebolt.py +++ b/superset/db_engine_specs/firebolt.py @@ -19,6 +19,7 @@ from sqlalchemy import types +from superset.constants import TimeGrain from superset.db_engine_specs.base import BaseEngineSpec @@ -31,14 +32,14 @@ class FireboltEngineSpec(BaseEngineSpec): _time_grain_expressions = { None: "{col}", - "PT1S": "date_trunc('second', CAST({col} AS TIMESTAMP))", - "PT1M": "date_trunc('minute', CAST({col} AS TIMESTAMP))", - "PT1H": "date_trunc('hour', CAST({col} AS TIMESTAMP))", - "P1D": "date_trunc('day', CAST({col} AS TIMESTAMP))", - "P1W": "date_trunc('week', CAST({col} AS TIMESTAMP))", - "P1M": "date_trunc('month', CAST({col} AS TIMESTAMP))", - "P3M": "date_trunc('quarter', CAST({col} AS TIMESTAMP))", - "P1Y": "date_trunc('year', CAST({col} AS TIMESTAMP))", + TimeGrain.SECOND: "date_trunc('second', CAST({col} AS TIMESTAMP))", + TimeGrain.MINUTE: "date_trunc('minute', CAST({col} AS TIMESTAMP))", + TimeGrain.HOUR: "date_trunc('hour', CAST({col} AS TIMESTAMP))", + TimeGrain.DAY: "date_trunc('day', CAST({col} AS TIMESTAMP))", + TimeGrain.WEEK: "date_trunc('week', CAST({col} AS TIMESTAMP))", + TimeGrain.MONTH: "date_trunc('month', CAST({col} AS TIMESTAMP))", + TimeGrain.QUARTER: "date_trunc('quarter', CAST({col} AS TIMESTAMP))", + TimeGrain.YEAR: "date_trunc('year', CAST({col} AS TIMESTAMP))", } @classmethod diff --git a/superset/db_engine_specs/hana.py b/superset/db_engine_specs/hana.py index 108838f9d2a8d..da05fba847d24 100644 --- a/superset/db_engine_specs/hana.py +++ b/superset/db_engine_specs/hana.py @@ -19,6 +19,7 @@ from sqlalchemy import types +from superset.constants import TimeGrain from superset.db_engine_specs.base import LimitMethod from superset.db_engine_specs.postgres import PostgresBaseEngineSpec @@ -32,15 +33,15 @@ class HanaEngineSpec(PostgresBaseEngineSpec): _time_grain_expressions = { None: "{col}", - "PT1S": "TO_TIMESTAMP(SUBSTRING(TO_TIMESTAMP({col}),0,20))", - "PT1M": "TO_TIMESTAMP(SUBSTRING(TO_TIMESTAMP({col}),0,17) || '00')", - "PT1H": "TO_TIMESTAMP(SUBSTRING(TO_TIMESTAMP({col}),0,14) || '00:00')", - "P1D": "TO_DATE({col})", - "P1M": "TO_DATE(SUBSTRING(TO_DATE({col}),0,7)||'-01')", - "P3M": "TO_DATE(SUBSTRING( \ + TimeGrain.SECOND: "TO_TIMESTAMP(SUBSTRING(TO_TIMESTAMP({col}),0,20))", + TimeGrain.MINUTE: "TO_TIMESTAMP(SUBSTRING(TO_TIMESTAMP({col}),0,17) || '00')", + TimeGrain.HOUR: "TO_TIMESTAMP(SUBSTRING(TO_TIMESTAMP({col}),0,14) || '00:00')", + TimeGrain.DAY: "TO_DATE({col})", + TimeGrain.MONTH: "TO_DATE(SUBSTRING(TO_DATE({col}),0,7)||'-01')", + TimeGrain.QUARTER: "TO_DATE(SUBSTRING( \ TO_DATE({col}), 0, 5)|| LPAD(CAST((CAST(SUBSTRING(QUARTER( \ TO_DATE({col}), 1), 7, 1) as int)-1)*3 +1 as text),2,'0') ||'-01')", - "P1Y": "TO_DATE(YEAR({col})||'-01-01')", + TimeGrain.YEAR: "TO_DATE(YEAR({col})||'-01-01')", } @classmethod diff --git a/superset/db_engine_specs/hive.py b/superset/db_engine_specs/hive.py index 7601ebb2cddf5..d7c2465badceb 100644 --- a/superset/db_engine_specs/hive.py +++ b/superset/db_engine_specs/hive.py @@ -38,6 +38,7 @@ from sqlalchemy.sql.expression import ColumnClause, Select from superset.common.db_query_status import QueryStatus +from superset.constants import TimeGrain from superset.databases.utils import make_url_safe from superset.db_engine_specs.base import BaseEngineSpec from superset.db_engine_specs.presto import PrestoEngineSpec @@ -107,16 +108,16 @@ class HiveEngineSpec(PrestoEngineSpec): # pylint: disable=line-too-long _time_grain_expressions = { None: "{col}", - "PT1S": "from_unixtime(unix_timestamp({col}), 'yyyy-MM-dd HH:mm:ss')", - "PT1M": "from_unixtime(unix_timestamp({col}), 'yyyy-MM-dd HH:mm:00')", - "PT1H": "from_unixtime(unix_timestamp({col}), 'yyyy-MM-dd HH:00:00')", - "P1D": "from_unixtime(unix_timestamp({col}), 'yyyy-MM-dd 00:00:00')", - "P1W": "date_format(date_sub({col}, CAST(7-from_unixtime(unix_timestamp({col}),'u') as int)), 'yyyy-MM-dd 00:00:00')", - "P1M": "from_unixtime(unix_timestamp({col}), 'yyyy-MM-01 00:00:00')", - "P3M": "date_format(add_months(trunc({col}, 'MM'), -(month({col})-1)%3), 'yyyy-MM-dd 00:00:00')", - "P1Y": "from_unixtime(unix_timestamp({col}), 'yyyy-01-01 00:00:00')", - "P1W/1970-01-03T00:00:00Z": "date_format(date_add({col}, INT(6-from_unixtime(unix_timestamp({col}), 'u'))), 'yyyy-MM-dd 00:00:00')", - "1969-12-28T00:00:00Z/P1W": "date_format(date_add({col}, -INT(from_unixtime(unix_timestamp({col}), 'u'))), 'yyyy-MM-dd 00:00:00')", + TimeGrain.SECOND: "from_unixtime(unix_timestamp({col}), 'yyyy-MM-dd HH:mm:ss')", + TimeGrain.MINUTE: "from_unixtime(unix_timestamp({col}), 'yyyy-MM-dd HH:mm:00')", + TimeGrain.HOUR: "from_unixtime(unix_timestamp({col}), 'yyyy-MM-dd HH:00:00')", + TimeGrain.DAY: "from_unixtime(unix_timestamp({col}), 'yyyy-MM-dd 00:00:00')", + TimeGrain.WEEK: "date_format(date_sub({col}, CAST(7-from_unixtime(unix_timestamp({col}),'u') as int)), 'yyyy-MM-dd 00:00:00')", + TimeGrain.MONTH: "from_unixtime(unix_timestamp({col}), 'yyyy-MM-01 00:00:00')", + TimeGrain.QUARTER: "date_format(add_months(trunc({col}, 'MM'), -(month({col})-1)%3), 'yyyy-MM-dd 00:00:00')", + TimeGrain.YEAR: "from_unixtime(unix_timestamp({col}), 'yyyy-01-01 00:00:00')", + TimeGrain.WEEK_ENDING_SATURDAY: "date_format(date_add({col}, INT(6-from_unixtime(unix_timestamp({col}), 'u'))), 'yyyy-MM-dd 00:00:00')", + TimeGrain.WEEK_STARTING_SUNDAY: "date_format(date_add({col}, -INT(from_unixtime(unix_timestamp({col}), 'u'))), 'yyyy-MM-dd 00:00:00')", } # Scoping regex at class level to avoid recompiling diff --git a/superset/db_engine_specs/impala.py b/superset/db_engine_specs/impala.py index cd1c9e47329e2..c10cf679355ea 100644 --- a/superset/db_engine_specs/impala.py +++ b/superset/db_engine_specs/impala.py @@ -25,7 +25,7 @@ from sqlalchemy.engine.reflection import Inspector from sqlalchemy.orm import Session -from superset.constants import QUERY_EARLY_CANCEL_KEY +from superset.constants import QUERY_EARLY_CANCEL_KEY, TimeGrain from superset.db_engine_specs.base import BaseEngineSpec from superset.models.sql_lab import Query @@ -42,13 +42,13 @@ class ImpalaEngineSpec(BaseEngineSpec): _time_grain_expressions = { None: "{col}", - "PT1M": "TRUNC({col}, 'MI')", - "PT1H": "TRUNC({col}, 'HH')", - "P1D": "TRUNC({col}, 'DD')", - "P1W": "TRUNC({col}, 'WW')", - "P1M": "TRUNC({col}, 'MONTH')", - "P3M": "TRUNC({col}, 'Q')", - "P1Y": "TRUNC({col}, 'YYYY')", + TimeGrain.MINUTE: "TRUNC({col}, 'MI')", + TimeGrain.HOUR: "TRUNC({col}, 'HH')", + TimeGrain.DAY: "TRUNC({col}, 'DD')", + TimeGrain.WEEK: "TRUNC({col}, 'WW')", + TimeGrain.MONTH: "TRUNC({col}, 'MONTH')", + TimeGrain.QUARTER: "TRUNC({col}, 'Q')", + TimeGrain.YEAR: "TRUNC({col}, 'YYYY')", } @classmethod diff --git a/superset/db_engine_specs/kusto.py b/superset/db_engine_specs/kusto.py index 17147d5cc059f..554e8b029f098 100644 --- a/superset/db_engine_specs/kusto.py +++ b/superset/db_engine_specs/kusto.py @@ -21,6 +21,7 @@ from sqlalchemy import types from sqlalchemy.dialects.mssql.base import SMALLDATETIME +from superset.constants import TimeGrain from superset.db_engine_specs.base import BaseEngineSpec, LimitMethod from superset.db_engine_specs.exceptions import ( SupersetDBAPIDatabaseError, @@ -43,21 +44,24 @@ class KustoSqlEngineSpec(BaseEngineSpec): # pylint: disable=abstract-method _time_grain_expressions = { None: "{col}", - "PT1S": "DATEADD(second, DATEDIFF(second, '2000-01-01', {col}), '2000-01-01')", - "PT1M": "DATEADD(minute, DATEDIFF(minute, 0, {col}), 0)", - "PT5M": "DATEADD(minute, DATEDIFF(minute, 0, {col}) / 5 * 5, 0)", - "PT10M": "DATEADD(minute, DATEDIFF(minute, 0, {col}) / 10 * 10, 0)", - "PT15M": "DATEADD(minute, DATEDIFF(minute, 0, {col}) / 15 * 15, 0)", - "PT0.5H": "DATEADD(minute, DATEDIFF(minute, 0, {col}) / 30 * 30, 0)", - "PT1H": "DATEADD(hour, DATEDIFF(hour, 0, {col}), 0)", - "P1D": "DATEADD(day, DATEDIFF(day, 0, {col}), 0)", - "P1W": "DATEADD(day, -1, DATEADD(week, DATEDIFF(week, 0, {col}), 0))", - "P1M": "DATEADD(month, DATEDIFF(month, 0, {col}), 0)", - "P3M": "DATEADD(quarter, DATEDIFF(quarter, 0, {col}), 0)", - "P1Y": "DATEADD(year, DATEDIFF(year, 0, {col}), 0)", - "1969-12-28T00:00:00Z/P1W": "DATEADD(day, -1," + TimeGrain.SECOND: "DATEADD(second, \ + 'DATEDIFF(second, 2000-01-01', {col}), '2000-01-01')", + TimeGrain.MINUTE: "DATEADD(minute, DATEDIFF(minute, 0, {col}), 0)", + TimeGrain.FIVE_MINUTES: "DATEADD(minute, DATEDIFF(minute, 0, {col}) / 5 * 5, 0)", + TimeGrain.TEN_MINUTES: "DATEADD(minute, \ + DATEDIFF(minute, 0, {col}) / 10 * 10, 0)", + TimeGrain.FIFTEEN_MINUTES: "DATEADD(minute, \ + DATEDIFF(minute, 0, {col}) / 15 * 15, 0)", + TimeGrain.HALF_HOUR: "DATEADD(minute, DATEDIFF(minute, 0, {col}) / 30 * 30, 0)", + TimeGrain.HOUR: "DATEADD(hour, DATEDIFF(hour, 0, {col}), 0)", + TimeGrain.DAY: "DATEADD(day, DATEDIFF(day, 0, {col}), 0)", + TimeGrain.WEEK: "DATEADD(day, -1, DATEADD(week, DATEDIFF(week, 0, {col}), 0))", + TimeGrain.MONTH: "DATEADD(month, DATEDIFF(month, 0, {col}), 0)", + TimeGrain.QUARTER: "DATEADD(quarter, DATEDIFF(quarter, 0, {col}), 0)", + TimeGrain.YEAR: "DATEADD(year, DATEDIFF(year, 0, {col}), 0)", + TimeGrain.WEEK_STARTING_SUNDAY: "DATEADD(day, -1," " DATEADD(week, DATEDIFF(week, 0, {col}), 0))", - "1969-12-29T00:00:00Z/P1W": "DATEADD(week," + TimeGrain.WEEK_STARTING_MONDAY: "DATEADD(week," " DATEDIFF(week, 0, DATEADD(day, -1, {col})), 0)", } @@ -120,12 +124,14 @@ class KustoKqlEngineSpec(BaseEngineSpec): # pylint: disable=abstract-method _time_grain_expressions = { None: "{col}", - "PT1S": "{col}/ time(1s)", - "PT1M": "{col}/ time(1min)", - "PT1H": "{col}/ time(1h)", - "P1D": "{col}/ time(1d)", - "P1M": "datetime_diff('month',CreateDate, datetime(0001-01-01 00:00:00))+1", - "P1Y": "datetime_diff('year',CreateDate, datetime(0001-01-01 00:00:00))+1", + TimeGrain.SECOND: "{col}/ time(1s)", + TimeGrain.MINUTE: "{col}/ time(1min)", + TimeGrain.HOUR: "{col}/ time(1h)", + TimeGrain.DAY: "{col}/ time(1d)", + TimeGrain.MONTH: "datetime_diff('month', CreateDate, \ + datetime(0001-01-01 00:00:00))+1", + TimeGrain.YEAR: "datetime_diff('year', CreateDate, \ + datetime(0001-01-01 00:00:00))+1", } type_code_map: dict[int, str] = {} # loaded from get_datatype only if needed diff --git a/superset/db_engine_specs/kylin.py b/superset/db_engine_specs/kylin.py index f522602a48e7a..34851500316ac 100644 --- a/superset/db_engine_specs/kylin.py +++ b/superset/db_engine_specs/kylin.py @@ -19,6 +19,7 @@ from sqlalchemy import types +from superset.constants import TimeGrain from superset.db_engine_specs.base import BaseEngineSpec @@ -30,14 +31,14 @@ class KylinEngineSpec(BaseEngineSpec): # pylint: disable=abstract-method _time_grain_expressions = { None: "{col}", - "PT1S": "CAST(FLOOR(CAST({col} AS TIMESTAMP) TO SECOND) AS TIMESTAMP)", - "PT1M": "CAST(FLOOR(CAST({col} AS TIMESTAMP) TO MINUTE) AS TIMESTAMP)", - "PT1H": "CAST(FLOOR(CAST({col} AS TIMESTAMP) TO HOUR) AS TIMESTAMP)", - "P1D": "CAST(FLOOR(CAST({col} AS TIMESTAMP) TO DAY) AS DATE)", - "P1W": "CAST(FLOOR(CAST({col} AS TIMESTAMP) TO WEEK) AS DATE)", - "P1M": "CAST(FLOOR(CAST({col} AS TIMESTAMP) TO MONTH) AS DATE)", - "P3M": "CAST(FLOOR(CAST({col} AS TIMESTAMP) TO QUARTER) AS DATE)", - "P1Y": "CAST(FLOOR(CAST({col} AS TIMESTAMP) TO YEAR) AS DATE)", + TimeGrain.SECOND: "CAST(FLOOR(CAST({col} AS TIMESTAMP) TO SECOND) AS TIMESTAMP)", + TimeGrain.MINUTE: "CAST(FLOOR(CAST({col} AS TIMESTAMP) TO MINUTE) AS TIMESTAMP)", + TimeGrain.HOUR: "CAST(FLOOR(CAST({col} AS TIMESTAMP) TO HOUR) AS TIMESTAMP)", + TimeGrain.DAY: "CAST(FLOOR(CAST({col} AS TIMESTAMP) TO DAY) AS DATE)", + TimeGrain.WEEK: "CAST(FLOOR(CAST({col} AS TIMESTAMP) TO WEEK) AS DATE)", + TimeGrain.MONTH: "CAST(FLOOR(CAST({col} AS TIMESTAMP) TO MONTH) AS DATE)", + TimeGrain.QUARTER: "CAST(FLOOR(CAST({col} AS TIMESTAMP) TO QUARTER) AS DATE)", + TimeGrain.YEAR: "CAST(FLOOR(CAST({col} AS TIMESTAMP) TO YEAR) AS DATE)", } @classmethod diff --git a/superset/db_engine_specs/mssql.py b/superset/db_engine_specs/mssql.py index 3e0879b90415c..5d29d36ba89b7 100644 --- a/superset/db_engine_specs/mssql.py +++ b/superset/db_engine_specs/mssql.py @@ -24,6 +24,7 @@ from sqlalchemy import types from sqlalchemy.dialects.mssql.base import SMALLDATETIME +from superset.constants import TimeGrain from superset.db_engine_specs.base import BaseEngineSpec, LimitMethod from superset.errors import SupersetErrorType from superset.utils.core import GenericDataType @@ -55,24 +56,30 @@ class MssqlEngineSpec(BaseEngineSpec): _time_grain_expressions = { None: "{col}", - "PT1S": "DATEADD(SECOND, DATEDIFF(SECOND, '2000-01-01', {col}), '2000-01-01')", - "PT1M": "DATEADD(MINUTE, DATEDIFF(MINUTE, 0, {col}), 0)", - "PT5M": "DATEADD(MINUTE, DATEDIFF(MINUTE, 0, {col}) / 5 * 5, 0)", - "PT10M": "DATEADD(MINUTE, DATEDIFF(MINUTE, 0, {col}) / 10 * 10, 0)", - "PT15M": "DATEADD(MINUTE, DATEDIFF(MINUTE, 0, {col}) / 15 * 15, 0)", - "PT30M": "DATEADD(MINUTE, DATEDIFF(MINUTE, 0, {col}) / 30 * 30, 0)", - "PT1H": "DATEADD(HOUR, DATEDIFF(HOUR, 0, {col}), 0)", - "P1D": "DATEADD(DAY, DATEDIFF(DAY, 0, {col}), 0)", - "P1W": "DATEADD(DAY, 1 - DATEPART(WEEKDAY, {col})," + TimeGrain.SECOND: "DATEADD(SECOND, \ + DATEDIFF(SECOND, '2000-01-01', {col}), '2000-01-01')", + TimeGrain.MINUTE: "DATEADD(MINUTE, DATEDIFF(MINUTE, 0, {col}), 0)", + TimeGrain.FIVE_MINUTES: "DATEADD(MINUTE, \ + DATEDIFF(MINUTE, 0, {col}) / 5 * 5, 0)", + TimeGrain.TEN_MINUTES: "DATEADD(MINUTE, \ + DATEDIFF(MINUTE, 0, {col}) / 10 * 10, 0)", + TimeGrain.FIFTEEN_MINUTES: "DATEADD(MINUTE, \ + DATEDIFF(MINUTE, 0, {col}) / 15 * 15, 0)", + TimeGrain.THIRTY_MINUTES: "DATEADD(MINUTE, \ + DATEDIFF(MINUTE, 0, {col}) / 30 * 30, 0)", + TimeGrain.HOUR: "DATEADD(HOUR, DATEDIFF(HOUR, 0, {col}), 0)", + TimeGrain.DAY: "DATEADD(DAY, DATEDIFF(DAY, 0, {col}), 0)", + TimeGrain.WEEK: "DATEADD(DAY, 1 - DATEPART(WEEKDAY, {col})," " DATEADD(DAY, DATEDIFF(DAY, 0, {col}), 0))", - "P1M": "DATEADD(MONTH, DATEDIFF(MONTH, 0, {col}), 0)", - "P3M": "DATEADD(QUARTER, DATEDIFF(QUARTER, 0, {col}), 0)", - "P1Y": "DATEADD(YEAR, DATEDIFF(YEAR, 0, {col}), 0)", - "1969-12-28T00:00:00Z/P1W": "DATEADD(DAY, -1," + TimeGrain.MONTH: "DATEADD(MONTH, DATEDIFF(MONTH, 0, {col}), 0)", + TimeGrain.QUARTER: "DATEADD(QUARTER, DATEDIFF(QUARTER, 0, {col}), 0)", + TimeGrain.YEAR: "DATEADD(YEAR, DATEDIFF(YEAR, 0, {col}), 0)", + TimeGrain.WEEK_STARTING_SUNDAY: "DATEADD(DAY, -1," " DATEADD(WEEK, DATEDIFF(WEEK, 0, {col}), 0))", - "1969-12-29T00:00:00Z/P1W": "DATEADD(WEEK," + TimeGrain.WEEK_STARTING_MONDAY: "DATEADD(WEEK," " DATEDIFF(WEEK, 0, DATEADD(DAY, -1, {col})), 0)", } + column_type_mappings = ( ( re.compile(r"^smalldatetime.*", re.IGNORECASE), diff --git a/superset/db_engine_specs/mysql.py b/superset/db_engine_specs/mysql.py index 9f853d577c30b..e83e53e426143 100644 --- a/superset/db_engine_specs/mysql.py +++ b/superset/db_engine_specs/mysql.py @@ -36,6 +36,7 @@ ) from sqlalchemy.engine.url import URL +from superset.constants import TimeGrain from superset.db_engine_specs.base import BaseEngineSpec, BasicParametersMixin from superset.errors import SupersetErrorType from superset.models.sql_lab import Query @@ -127,19 +128,19 @@ class MySQLEngineSpec(BaseEngineSpec, BasicParametersMixin): _time_grain_expressions = { None: "{col}", - "PT1S": "DATE_ADD(DATE({col}), " + TimeGrain.SECOND: "DATE_ADD(DATE({col}), " "INTERVAL (HOUR({col})*60*60 + MINUTE({col})*60" " + SECOND({col})) SECOND)", - "PT1M": "DATE_ADD(DATE({col}), " + TimeGrain.MINUTE: "DATE_ADD(DATE({col}), " "INTERVAL (HOUR({col})*60 + MINUTE({col})) MINUTE)", - "PT1H": "DATE_ADD(DATE({col}), INTERVAL HOUR({col}) HOUR)", - "P1D": "DATE({col})", - "P1W": "DATE(DATE_SUB({col}, INTERVAL DAYOFWEEK({col}) - 1 DAY))", - "P1M": "DATE(DATE_SUB({col}, INTERVAL DAYOFMONTH({col}) - 1 DAY))", - "P3M": "MAKEDATE(YEAR({col}), 1) " + TimeGrain.HOUR: "DATE_ADD(DATE({col}), INTERVAL HOUR({col}) HOUR)", + TimeGrain.DAY: "DATE({col})", + TimeGrain.WEEK: "DATE(DATE_SUB({col}, INTERVAL DAYOFWEEK({col}) - 1 DAY))", + TimeGrain.MONTH: "DATE(DATE_SUB({col}, INTERVAL DAYOFMONTH({col}) - 1 DAY))", + TimeGrain.QUARTER: "MAKEDATE(YEAR({col}), 1) " "+ INTERVAL QUARTER({col}) QUARTER - INTERVAL 1 QUARTER", - "P1Y": "DATE(DATE_SUB({col}, INTERVAL DAYOFYEAR({col}) - 1 DAY))", - "1969-12-29T00:00:00Z/P1W": "DATE(DATE_SUB({col}, " + TimeGrain.YEAR: "DATE(DATE_SUB({col}, INTERVAL DAYOFYEAR({col}) - 1 DAY))", + TimeGrain.WEEK_STARTING_MONDAY: "DATE(DATE_SUB({col}, " "INTERVAL DAYOFWEEK(DATE_SUB({col}, " "INTERVAL 1 DAY)) - 1 DAY))", } diff --git a/superset/db_engine_specs/netezza.py b/superset/db_engine_specs/netezza.py index 6f336b96bc3f7..66b7eeeea6f40 100644 --- a/superset/db_engine_specs/netezza.py +++ b/superset/db_engine_specs/netezza.py @@ -14,6 +14,7 @@ # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. +from superset.constants import TimeGrain from superset.db_engine_specs.postgres import PostgresBaseEngineSpec @@ -24,14 +25,14 @@ class NetezzaEngineSpec(PostgresBaseEngineSpec): _time_grain_expressions = { None: "{col}", - "PT1S": "DATE_TRUNC('second', {col})", - "PT1M": "DATE_TRUNC('minute', {col})", - "PT1H": "DATE_TRUNC('hour', {col})", - "P1D": "DATE_TRUNC('day', {col})", - "P1W": "DATE_TRUNC('week', {col})", - "P1M": "DATE_TRUNC('month', {col})", - "P3M": "DATE_TRUNC('quarter', {col})", - "P1Y": "DATE_TRUNC('year', {col})", + TimeGrain.SECOND: "DATE_TRUNC('second', {col})", + TimeGrain.MINUTE: "DATE_TRUNC('minute', {col})", + TimeGrain.HOUR: "DATE_TRUNC('hour', {col})", + TimeGrain.DAY: "DATE_TRUNC('day', {col})", + TimeGrain.WEEK: "DATE_TRUNC('week', {col})", + TimeGrain.MONTH: "DATE_TRUNC('month', {col})", + TimeGrain.QUARTER: "DATE_TRUNC('quarter', {col})", + TimeGrain.YEAR: "DATE_TRUNC('year', {col})", } @classmethod diff --git a/superset/db_engine_specs/ocient.py b/superset/db_engine_specs/ocient.py index 59fa52a656a7e..f17032a08d155 100644 --- a/superset/db_engine_specs/ocient.py +++ b/superset/db_engine_specs/ocient.py @@ -38,6 +38,7 @@ except (ImportError, RuntimeError): pass +from superset.constants import TimeGrain from superset.db_engine_specs.base import BaseEngineSpec from superset.errors import SupersetErrorType from superset.models.core import Database @@ -295,14 +296,14 @@ class OcientEngineSpec(BaseEngineSpec): } _time_grain_expressions = { None: "{col}", - "PT1S": "ROUND({col}, 'SECOND')", - "PT1M": "ROUND({col}, 'MINUTE')", - "PT1H": "ROUND({col}, 'HOUR')", - "P1D": "ROUND({col}, 'DAY')", - "P1W": "ROUND({col}, 'WEEK')", - "P1M": "ROUND({col}, 'MONTH')", - "P0.25Y": "ROUND({col}, 'QUARTER')", - "P1Y": "ROUND({col}, 'YEAR')", + TimeGrain.SECOND: "ROUND({col}, 'SECOND')", + TimeGrain.MINUTE: "ROUND({col}, 'MINUTE')", + TimeGrain.HOUR: "ROUND({col}, 'HOUR')", + TimeGrain.DAY: "ROUND({col}, 'DAY')", + TimeGrain.WEEK: "ROUND({col}, 'WEEK')", + TimeGrain.MONTH: "ROUND({col}, 'MONTH')", + TimeGrain.QUARTER_YEAR: "ROUND({col}, 'QUARTER')", + TimeGrain.YEAR: "ROUND({col}, 'YEAR')", } @classmethod diff --git a/superset/db_engine_specs/oracle.py b/superset/db_engine_specs/oracle.py index 1199b74406d2a..b98ab8ed59e62 100644 --- a/superset/db_engine_specs/oracle.py +++ b/superset/db_engine_specs/oracle.py @@ -19,6 +19,7 @@ from sqlalchemy import types +from superset.constants import TimeGrain from superset.db_engine_specs.base import BaseEngineSpec, LimitMethod @@ -31,14 +32,14 @@ class OracleEngineSpec(BaseEngineSpec): _time_grain_expressions = { None: "{col}", - "PT1S": "CAST({col} as DATE)", - "PT1M": "TRUNC(CAST({col} as DATE), 'MI')", - "PT1H": "TRUNC(CAST({col} as DATE), 'HH')", - "P1D": "TRUNC(CAST({col} as DATE), 'DDD')", - "P1W": "TRUNC(CAST({col} as DATE), 'WW')", - "P1M": "TRUNC(CAST({col} as DATE), 'MONTH')", - "P3M": "TRUNC(CAST({col} as DATE), 'Q')", - "P1Y": "TRUNC(CAST({col} as DATE), 'YEAR')", + TimeGrain.SECOND: "CAST({col} as DATE)", + TimeGrain.MINUTE: "TRUNC(CAST({col} as DATE), 'MI')", + TimeGrain.HOUR: "TRUNC(CAST({col} as DATE), 'HH')", + TimeGrain.DAY: "TRUNC(CAST({col} as DATE), 'DDD')", + TimeGrain.WEEK: "TRUNC(CAST({col} as DATE), 'WW')", + TimeGrain.MONTH: "TRUNC(CAST({col} as DATE), 'MONTH')", + TimeGrain.QUARTER: "TRUNC(CAST({col} as DATE), 'Q')", + TimeGrain.YEAR: "TRUNC(CAST({col} as DATE), 'YEAR')", } @classmethod diff --git a/superset/db_engine_specs/pinot.py b/superset/db_engine_specs/pinot.py index bfec8b294716d..a0662366d1c6e 100644 --- a/superset/db_engine_specs/pinot.py +++ b/superset/db_engine_specs/pinot.py @@ -18,6 +18,7 @@ from sqlalchemy.sql.expression import ColumnClause +from superset.constants import TimeGrain from superset.db_engine_specs.base import BaseEngineSpec, TimestampExpression @@ -30,19 +31,19 @@ class PinotEngineSpec(BaseEngineSpec): # pylint: disable=abstract-method allows_alias_in_orderby = False # Pinot does its own conversion below - _time_grain_expressions: dict[Optional[str], str] = { - "PT1S": "1:SECONDS", - "PT1M": "1:MINUTES", - "PT5M": "5:MINUTES", - "PT10M": "10:MINUTES", - "PT15M": "15:MINUTES", - "PT30M": "30:MINUTES", - "PT1H": "1:HOURS", - "P1D": "1:DAYS", - "P1W": "week", - "P1M": "month", - "P3MY": "quarter", - "P1Y": "year", + _time_grain_expressions = { + TimeGrain.SECOND: "1:SECONDS", + TimeGrain.MINUTE: "1:MINUTES", + TimeGrain.FIVE_MINUTES: "5:MINUTES", + TimeGrain.TEN_MINUTES: "10:MINUTES", + TimeGrain.FIFTEEN_MINUTES: "15:MINUTES", + TimeGrain.THIRTY_MINUTES: "30:MINUTES", + TimeGrain.HOUR: "1:HOURS", + TimeGrain.DAY: "1:DAYS", + TimeGrain.WEEK: "week", + TimeGrain.MONTH: "month", + TimeGrain.QUARTER: "quarter", + TimeGrain.YEAR: "year", } _python_to_java_time_patterns: dict[str, str] = { @@ -55,18 +56,18 @@ class PinotEngineSpec(BaseEngineSpec): # pylint: disable=abstract-method } _use_date_trunc_function: dict[str, bool] = { - "PT1S": False, - "PT1M": False, - "PT5M": False, - "PT10M": False, - "PT15M": False, - "PT30M": False, - "PT1H": False, - "P1D": False, - "P1W": True, - "P1M": True, - "P3M": True, - "P1Y": True, + TimeGrain.SECOND: False, + TimeGrain.MINUTE: False, + TimeGrain.FIVE_MINUTES: False, + TimeGrain.TEN_MINUTES: False, + TimeGrain.FIFTEEN_MINUTES: False, + TimeGrain.THIRTY_MINUTES: False, + TimeGrain.HOUR: False, + TimeGrain.DAY: False, + TimeGrain.WEEK: True, + TimeGrain.MONTH: True, + TimeGrain.QUARTER: True, + TimeGrain.YEAR: True, } @classmethod diff --git a/superset/db_engine_specs/postgres.py b/superset/db_engine_specs/postgres.py index 2088782f83bae..cdd71fdfccbcc 100644 --- a/superset/db_engine_specs/postgres.py +++ b/superset/db_engine_specs/postgres.py @@ -28,6 +28,7 @@ from sqlalchemy.engine.url import URL from sqlalchemy.types import Date, DateTime, String +from superset.constants import TimeGrain from superset.db_engine_specs.base import BaseEngineSpec, BasicParametersMixin from superset.errors import SupersetErrorType from superset.exceptions import SupersetException @@ -100,14 +101,14 @@ class PostgresBaseEngineSpec(BaseEngineSpec): _time_grain_expressions = { None: "{col}", - "PT1S": "DATE_TRUNC('second', {col})", - "PT1M": "DATE_TRUNC('minute', {col})", - "PT1H": "DATE_TRUNC('hour', {col})", - "P1D": "DATE_TRUNC('day', {col})", - "P1W": "DATE_TRUNC('week', {col})", - "P1M": "DATE_TRUNC('month', {col})", - "P3M": "DATE_TRUNC('quarter', {col})", - "P1Y": "DATE_TRUNC('year', {col})", + TimeGrain.SECOND: "DATE_TRUNC('second', {col})", + TimeGrain.MINUTE: "DATE_TRUNC('minute', {col})", + TimeGrain.HOUR: "DATE_TRUNC('hour', {col})", + TimeGrain.DAY: "DATE_TRUNC('day', {col})", + TimeGrain.WEEK: "DATE_TRUNC('week', {col})", + TimeGrain.MONTH: "DATE_TRUNC('month', {col})", + TimeGrain.QUARTER: "DATE_TRUNC('quarter', {col})", + TimeGrain.YEAR: "DATE_TRUNC('year', {col})", } custom_errors: dict[Pattern[str], tuple[str, SupersetErrorType, dict[str, Any]]] = { diff --git a/superset/db_engine_specs/presto.py b/superset/db_engine_specs/presto.py index d5a2ab7605517..861e822345028 100644 --- a/superset/db_engine_specs/presto.py +++ b/superset/db_engine_specs/presto.py @@ -43,6 +43,7 @@ from superset import cache_manager, is_feature_enabled from superset.common.db_query_status import QueryStatus +from superset.constants import TimeGrain from superset.databases.utils import make_url_safe from superset.db_engine_specs.base import BaseEngineSpec from superset.errors import SupersetErrorType @@ -246,22 +247,18 @@ class PrestoBaseEngineSpec(BaseEngineSpec, metaclass=ABCMeta): # pylint: disable=line-too-long _time_grain_expressions = { None: "{col}", - "PT1S": "date_trunc('second', CAST({col} AS TIMESTAMP))", - "PT1M": "date_trunc('minute', CAST({col} AS TIMESTAMP))", - "PT1H": "date_trunc('hour', CAST({col} AS TIMESTAMP))", - "P1D": "date_trunc('day', CAST({col} AS TIMESTAMP))", - "P1W": "date_trunc('week', CAST({col} AS TIMESTAMP))", - "P1M": "date_trunc('month', CAST({col} AS TIMESTAMP))", - "P3M": "date_trunc('quarter', CAST({col} AS TIMESTAMP))", - "P1Y": "date_trunc('year', CAST({col} AS TIMESTAMP))", - # Week starting Sunday - "1969-12-28T00:00:00Z/P1W": "date_trunc('week', CAST({col} AS TIMESTAMP) + interval '1' day) - interval '1' day", # noqa - # Week starting Monday - "1969-12-29T00:00:00Z/P1W": "date_trunc('week', CAST({col} AS TIMESTAMP))", - # Week ending Saturday - "P1W/1970-01-03T00:00:00Z": "date_trunc('week', CAST({col} AS TIMESTAMP) + interval '1' day) + interval '5' day", # noqa - # Week ending Sunday - "P1W/1970-01-04T00:00:00Z": "date_trunc('week', CAST({col} AS TIMESTAMP)) + interval '6' day", # noqa + TimeGrain.SECOND: "date_trunc('second', CAST({col} AS TIMESTAMP))", + TimeGrain.MINUTE: "date_trunc('minute', CAST({col} AS TIMESTAMP))", + TimeGrain.HOUR: "date_trunc('hour', CAST({col} AS TIMESTAMP))", + TimeGrain.DAY: "date_trunc('day', CAST({col} AS TIMESTAMP))", + TimeGrain.WEEK: "date_trunc('week', CAST({col} AS TIMESTAMP))", + TimeGrain.MONTH: "date_trunc('month', CAST({col} AS TIMESTAMP))", + TimeGrain.QUARTER: "date_trunc('quarter', CAST({col} AS TIMESTAMP))", + TimeGrain.YEAR: "date_trunc('year', CAST({col} AS TIMESTAMP))", + TimeGrain.WEEK_STARTING_SUNDAY: "date_trunc('week', CAST({col} AS TIMESTAMP) + interval '1' day) - interval '1' day", # noqa + TimeGrain.WEEK_STARTING_MONDAY: "date_trunc('week', CAST({col} AS TIMESTAMP))", + TimeGrain.WEEK_ENDING_SATURDAY: "date_trunc('week', CAST({col} AS TIMESTAMP) + interval '1' day) + interval '5' day", # noqa + TimeGrain.WEEK_ENDING_SUNDAY: "date_trunc('week', CAST({col} AS TIMESTAMP)) + interval '6' day", # noqa } @classmethod diff --git a/superset/db_engine_specs/rockset.py b/superset/db_engine_specs/rockset.py index 71adca0b10ba7..73ca57ac06b0c 100644 --- a/superset/db_engine_specs/rockset.py +++ b/superset/db_engine_specs/rockset.py @@ -19,6 +19,7 @@ from sqlalchemy import types +from superset.constants import TimeGrain from superset.db_engine_specs.base import BaseEngineSpec if TYPE_CHECKING: @@ -31,14 +32,14 @@ class RocksetEngineSpec(BaseEngineSpec): _time_grain_expressions = { None: "{col}", - "PT1S": "DATE_TRUNC('second', {col})", - "PT1M": "DATE_TRUNC('minute', {col})", - "PT1H": "DATE_TRUNC('hour', {col})", - "P1D": "DATE_TRUNC('day', {col})", - "P1W": "DATE_TRUNC('week', {col})", - "P1M": "DATE_TRUNC('month', {col})", - "P3M": "DATE_TRUNC('quarter', {col})", - "P1Y": "DATE_TRUNC('year', {col})", + TimeGrain.SECOND: "DATE_TRUNC('second', {col})", + TimeGrain.MINUTE: "DATE_TRUNC('minute', {col})", + TimeGrain.HOUR: "DATE_TRUNC('hour', {col})", + TimeGrain.DAY: "DATE_TRUNC('day', {col})", + TimeGrain.WEEK: "DATE_TRUNC('week', {col})", + TimeGrain.MONTH: "DATE_TRUNC('month', {col})", + TimeGrain.QUARTER: "DATE_TRUNC('quarter', {col})", + TimeGrain.YEAR: "DATE_TRUNC('year', {col})", } @classmethod diff --git a/superset/db_engine_specs/snowflake.py b/superset/db_engine_specs/snowflake.py index 32ade649b0af3..879ec307c4257 100644 --- a/superset/db_engine_specs/snowflake.py +++ b/superset/db_engine_specs/snowflake.py @@ -34,7 +34,7 @@ from sqlalchemy.engine.url import URL from typing_extensions import TypedDict -from superset.constants import USER_AGENT +from superset.constants import TimeGrain, USER_AGENT from superset.databases.utils import make_url_safe from superset.db_engine_specs.base import BaseEngineSpec, BasicPropertiesType from superset.db_engine_specs.postgres import PostgresBaseEngineSpec @@ -90,22 +90,22 @@ class SnowflakeEngineSpec(PostgresBaseEngineSpec): _time_grain_expressions = { None: "{col}", - "PT1S": "DATE_TRUNC('SECOND', {col})", - "PT1M": "DATE_TRUNC('MINUTE', {col})", - "PT5M": "DATEADD(MINUTE, FLOOR(DATE_PART(MINUTE, {col}) / 5) * 5, \ - DATE_TRUNC('HOUR', {col}))", - "PT10M": "DATEADD(MINUTE, FLOOR(DATE_PART(MINUTE, {col}) / 10) * 10, \ - DATE_TRUNC('HOUR', {col}))", - "PT15M": "DATEADD(MINUTE, FLOOR(DATE_PART(MINUTE, {col}) / 15) * 15, \ - DATE_TRUNC('HOUR', {col}))", - "PT30M": "DATEADD(MINUTE, FLOOR(DATE_PART(MINUTE, {col}) / 30) * 30, \ - DATE_TRUNC('HOUR', {col}))", - "PT1H": "DATE_TRUNC('HOUR', {col})", - "P1D": "DATE_TRUNC('DAY', {col})", - "P1W": "DATE_TRUNC('WEEK', {col})", - "P1M": "DATE_TRUNC('MONTH', {col})", - "P3M": "DATE_TRUNC('QUARTER', {col})", - "P1Y": "DATE_TRUNC('YEAR', {col})", + TimeGrain.SECOND: "DATE_TRUNC('SECOND', {col})", + TimeGrain.MINUTE: "DATE_TRUNC('MINUTE', {col})", + TimeGrain.FIVE_MINUTES: "DATEADD(MINUTE, \ + FLOOR(DATE_PART(MINUTE, {col}) / 5) * 5, DATE_TRUNC('HOUR', {col}))", + TimeGrain.TEN_MINUTES: "DATEADD(MINUTE, \ + FLOOR(DATE_PART(MINUTE, {col}) / 10) * 10, DATE_TRUNC('HOUR', {col}))", + TimeGrain.FIFTEEN_MINUTES: "DATEADD(MINUTE, \ + FLOOR(DATE_PART(MINUTE, {col}) / 15) * 15, DATE_TRUNC('HOUR', {col}))", + TimeGrain.THIRTY_MINUTES: "DATEADD(MINUTE, \ + FLOOR(DATE_PART(MINUTE, {col}) / 30) * 30, DATE_TRUNC('HOUR', {col}))", + TimeGrain.HOUR: "DATE_TRUNC('HOUR', {col})", + TimeGrain.DAY: "DATE_TRUNC('DAY', {col})", + TimeGrain.WEEK: "DATE_TRUNC('WEEK', {col})", + TimeGrain.MONTH: "DATE_TRUNC('MONTH', {col})", + TimeGrain.QUARTER: "DATE_TRUNC('QUARTER', {col})", + TimeGrain.YEAR: "DATE_TRUNC('YEAR', {col})", } custom_errors: dict[Pattern[str], tuple[str, SupersetErrorType, dict[str, Any]]] = { diff --git a/superset/db_engine_specs/spark.py b/superset/db_engine_specs/spark.py index a6eeb2e9db4d0..95a7bfdaeaf4e 100644 --- a/superset/db_engine_specs/spark.py +++ b/superset/db_engine_specs/spark.py @@ -14,23 +14,25 @@ # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. +from __future__ import annotations +from superset.constants import TimeGrain from superset.db_engine_specs.hive import HiveEngineSpec -time_grain_expressions = { +time_grain_expressions: dict[str | None, str] = { None: "{col}", - "PT1S": "date_trunc('second', {col})", - "PT1M": "date_trunc('minute', {col})", - "PT1H": "date_trunc('hour', {col})", - "P1D": "date_trunc('day', {col})", - "P1W": "date_trunc('week', {col})", - "P1M": "date_trunc('month', {col})", - "P3M": "date_trunc('quarter', {col})", - "P1Y": "date_trunc('year', {col})", - "P1W/1970-01-03T00:00:00Z": ( + TimeGrain.SECOND: "date_trunc('second', {col})", + TimeGrain.MINUTE: "date_trunc('minute', {col})", + TimeGrain.HOUR: "date_trunc('hour', {col})", + TimeGrain.DAY: "date_trunc('day', {col})", + TimeGrain.WEEK: "date_trunc('week', {col})", + TimeGrain.MONTH: "date_trunc('month', {col})", + TimeGrain.QUARTER: "date_trunc('quarter', {col})", + TimeGrain.YEAR: "date_trunc('year', {col})", + TimeGrain.WEEK_ENDING_SATURDAY: ( "date_trunc('week', {col} + interval '1 day') + interval '5 days'" ), - "1969-12-28T00:00:00Z/P1W": ( + TimeGrain.WEEK_STARTING_SUNDAY: ( "date_trunc('week', {col} + interval '1 day') - interval '1 day'" ), } diff --git a/superset/db_engine_specs/sqlite.py b/superset/db_engine_specs/sqlite.py index 767d0a20ad6ca..06d55375098a3 100644 --- a/superset/db_engine_specs/sqlite.py +++ b/superset/db_engine_specs/sqlite.py @@ -23,6 +23,7 @@ from sqlalchemy import types from sqlalchemy.engine.reflection import Inspector +from superset.constants import TimeGrain from superset.db_engine_specs.base import BaseEngineSpec from superset.errors import SupersetErrorType @@ -40,23 +41,24 @@ class SqliteEngineSpec(BaseEngineSpec): _time_grain_expressions = { None: "{col}", - "PT1S": "DATETIME(STRFTIME('%Y-%m-%dT%H:%M:%S', {col}))", - "PT1M": "DATETIME(STRFTIME('%Y-%m-%dT%H:%M:00', {col}))", - "PT1H": "DATETIME(STRFTIME('%Y-%m-%dT%H:00:00', {col}))", - "P1D": "DATETIME({col}, 'start of day')", - "P1W": "DATETIME({col}, 'start of day', -strftime('%w', {col}) || ' days')", - "P1M": "DATETIME({col}, 'start of month')", - "P3M": ( + TimeGrain.SECOND: "DATETIME(STRFTIME('%Y-%m-%dT%H:%M:%S', {col}))", + TimeGrain.MINUTE: "DATETIME(STRFTIME('%Y-%m-%dT%H:%M:00', {col}))", + TimeGrain.HOUR: "DATETIME(STRFTIME('%Y-%m-%dT%H:00:00', {col}))", + TimeGrain.DAY: "DATETIME({col}, 'start of day')", + TimeGrain.WEEK: "DATETIME({col}, 'start of day', \ + -strftime('%w', {col}) || ' days')", + TimeGrain.MONTH: "DATETIME({col}, 'start of month')", + TimeGrain.QUARTER: ( "DATETIME({col}, 'start of month', " "printf('-%d month', (strftime('%m', {col}) - 1) % 3))" ), - "P1Y": "DATETIME({col}, 'start of year')", - "P1W/1970-01-03T00:00:00Z": "DATETIME({col}, 'start of day', 'weekday 6')", - "P1W/1970-01-04T00:00:00Z": "DATETIME({col}, 'start of day', 'weekday 0')", - "1969-12-28T00:00:00Z/P1W": ( + TimeGrain.YEAR: "DATETIME({col}, 'start of year')", + TimeGrain.WEEK_ENDING_SATURDAY: "DATETIME({col}, 'start of day', 'weekday 6')", + TimeGrain.WEEK_ENDING_SUNDAY: "DATETIME({col}, 'start of day', 'weekday 0')", + TimeGrain.WEEK_STARTING_SUNDAY: ( "DATETIME({col}, 'start of day', 'weekday 0', '-7 days')" ), - "1969-12-29T00:00:00Z/P1W": ( + TimeGrain.WEEK_STARTING_MONDAY: ( "DATETIME({col}, 'start of day', 'weekday 1', '-7 days')" ), } diff --git a/superset/utils/pandas_postprocessing/utils.py b/superset/utils/pandas_postprocessing/utils.py index 37d53697cb89b..4d6884c8af0e7 100644 --- a/superset/utils/pandas_postprocessing/utils.py +++ b/superset/utils/pandas_postprocessing/utils.py @@ -23,6 +23,7 @@ from flask_babel import gettext as _ from pandas import DataFrame, NamedAgg +from superset.constants import TimeGrain from superset.exceptions import InvalidPostProcessingError NUMPY_FUNCTIONS: dict[str, Callable[..., Any]] = { @@ -74,23 +75,23 @@ "cumsum", ) -PROPHET_TIME_GRAIN_MAP = { - "PT1S": "S", - "PT1M": "min", - "PT5M": "5min", - "PT10M": "10min", - "PT15M": "15min", - "PT30M": "30min", - "PT1H": "H", - "P1D": "D", - "P1W": "W", - "P1M": "M", - "P3M": "Q", - "P1Y": "A", - "1969-12-28T00:00:00Z/P1W": "W-SUN", - "1969-12-29T00:00:00Z/P1W": "W-MON", - "P1W/1970-01-03T00:00:00Z": "W-SAT", - "P1W/1970-01-04T00:00:00Z": "W-SUN", +PROPHET_TIME_GRAIN_MAP: dict[str, str] = { + TimeGrain.SECOND: "S", + TimeGrain.MINUTE: "min", + TimeGrain.FIVE_MINUTES: "5min", + TimeGrain.TEN_MINUTES: "10min", + TimeGrain.FIFTEEN_MINUTES: "15min", + TimeGrain.THIRTY_MINUTES: "30min", + TimeGrain.HOUR: "H", + TimeGrain.DAY: "D", + TimeGrain.WEEK: "W", + TimeGrain.MONTH: "M", + TimeGrain.QUARTER: "Q", + TimeGrain.YEAR: "A", + TimeGrain.WEEK_STARTING_SUNDAY: "W-SUN", + TimeGrain.WEEK_STARTING_MONDAY: "W-MON", + TimeGrain.WEEK_ENDING_SATURDAY: "W-SAT", + TimeGrain.WEEK_ENDING_SUNDAY: "W-SUN", } RESAMPLE_METHOD = ("asfreq", "bfill", "ffill", "linear", "median", "mean", "sum") diff --git a/tests/unit_tests/common/test_get_aggregated_join_column.py b/tests/unit_tests/common/test_get_aggregated_join_column.py new file mode 100644 index 0000000000000..8effacf2494cb --- /dev/null +++ b/tests/unit_tests/common/test_get_aggregated_join_column.py @@ -0,0 +1,77 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +from pandas import DataFrame, Series, Timestamp +from pandas.testing import assert_frame_equal +from pytest import fixture, mark + +from superset.common.chart_data import ChartDataResultFormat, ChartDataResultType +from superset.common.query_context import QueryContext +from superset.common.query_context_processor import ( + AGGREGATED_JOIN_COLUMN, + QueryContextProcessor, +) +from superset.connectors.base.models import BaseDatasource +from superset.constants import TimeGrain + +query_context_processor = QueryContextProcessor( + QueryContext( + datasource=BaseDatasource(), + queries=[], + result_type=ChartDataResultType.COLUMNS, + form_data={}, + slice_=None, + result_format=ChartDataResultFormat.CSV, + cache_values={}, + ) +) + + +@fixture +def make_join_column_producer(): + def join_column_producer(row: Series, column_index: int) -> str: + return "CUSTOM_FORMAT" + + return join_column_producer + + +@mark.parametrize( + ("time_grain", "expected"), + [ + (TimeGrain.WEEK, "2020-W01"), + (TimeGrain.MONTH, "2020-01"), + (TimeGrain.QUARTER, "2020-Q1"), + (TimeGrain.YEAR, "2020"), + ], +) +def test_aggregated_join_column(time_grain: str, expected: str): + df = DataFrame({"ds": [Timestamp("2020-01-07")]}) + query_context_processor.add_aggregated_join_column(df, time_grain) + result = DataFrame( + {"ds": [Timestamp("2020-01-07")], AGGREGATED_JOIN_COLUMN: [expected]} + ) + assert_frame_equal(df, result) + + +def test_aggregated_join_column_producer(make_join_column_producer): + df = DataFrame({"ds": [Timestamp("2020-01-07")]}) + query_context_processor.add_aggregated_join_column( + df, TimeGrain.YEAR, make_join_column_producer + ) + result = DataFrame( + {"ds": [Timestamp("2020-01-07")], AGGREGATED_JOIN_COLUMN: ["CUSTOM_FORMAT"]} + ) + assert_frame_equal(df, result)