From 44e647ac665dbcf0378b960dbf4bc6c1fcd97ad4 Mon Sep 17 00:00:00 2001 From: "Michael S. Molina" Date: Mon, 22 May 2023 13:31:32 -0300 Subject: [PATCH 01/15] fix: Time shifts with different granularity --- superset/common/query_context_processor.py | 105 ++++++++++++++++----- superset/common/utils/dataframe_utils.py | 6 +- superset/constants.py | 7 ++ 3 files changed, 91 insertions(+), 27 deletions(-) diff --git a/superset/common/query_context_processor.py b/superset/common/query_context_processor.py index ecb8db4246f6b..d6e5ea1a6f431 100644 --- a/superset/common/query_context_processor.py +++ b/superset/common/query_context_processor.py @@ -37,7 +37,7 @@ from superset.common.utils.query_cache_manager import QueryCacheManager from superset.common.utils.time_range_utils import get_since_until_from_query_object from superset.connectors.base.models import BaseDatasource -from superset.constants import CacheRegion +from superset.constants import CacheRegion, WeeklyTimeGrain from superset.exceptions import ( InvalidPostProcessingError, QueryObjectValidationError, @@ -87,12 +87,10 @@ class QueryContextProcessor: to retrieve the data payload for a given viz. """ + AGGREGATED_JOIN_COLUMN = "$aggregated_join_column" + _query_context: QueryContext _qc_datasource: BaseDatasource - """ - The query context contains the query object and additional fields necessary - to retrieve the data payload for a given viz. - """ def __init__(self, query_context: QueryContext): self._query_context = query_context @@ -317,9 +315,8 @@ def processing_time_offsets( # pylint: disable=too-many-locals,too-many-stateme query_object_clone = copy.copy(query_object) queries: list[str] = [] cache_keys: list[str | None] = [] - rv_dfs: list[pd.DataFrame] = [df] + offset_dfs: list[pd.DataFrame] = [] - time_offsets = query_object.time_offsets outer_from_dttm, outer_to_dttm = get_since_until_from_query_object(query_object) if not outer_from_dttm or not outer_to_dttm: raise QueryObjectValidationError( @@ -328,7 +325,25 @@ def processing_time_offsets( # pylint: disable=too-many-locals,too-many-stateme "when using a Time Comparison." ) ) - for offset in time_offsets: + + columns = df.columns + time_grain = query_object.extras["time_grain_sqla"] + use_aggregated_join_column = any( + grain in time_grain for grain in ("P1W", "P1M", "P3M", "P1Y") + ) + if use_aggregated_join_column: + # adds aggregated join column + df[self.AGGREGATED_JOIN_COLUMN] = df.apply( + lambda row: self.get_aggregated_join_column(row, 0, time_grain), axis=1 + ) + # skips the first column which is the temporal column + # because we'll use the aggregated join columns instead + columns = df.columns[1:] + + metric_names = get_metric_names(query_object.metrics) + join_keys = [col for col in columns if col not in metric_names] + + for offset in query_object.time_offsets: try: # pylint: disable=line-too-long # Since the xaxis is also a column name for the time filter, xaxis_label will be set as granularity @@ -364,13 +379,15 @@ def processing_time_offsets( # pylint: disable=too-many-locals,too-many-stateme ] # `offset` is added to the hash function - cache_key = self.query_cache_key(query_object_clone, time_offset=offset) + cache_key = self.query_cache_key( + query_object_clone, time_offset=offset, time_grain=time_grain + ) cache = QueryCacheManager.get( cache_key, CacheRegion.DATA, query_context.force ) # whether hit on the cache if cache.is_loaded: - rv_dfs.append(cache.df) + offset_dfs.append(cache.df) queries.append(cache.query) cache_keys.append(cache_key) continue @@ -379,11 +396,8 @@ def processing_time_offsets( # pylint: disable=too-many-locals,too-many-stateme # rename metrics: SUM(value) => SUM(value) 1 year ago metrics_mapping = { metric: TIME_COMPARISON.join([metric, offset]) - for metric in get_metric_names( - query_object_clone_dct.get("metrics", []) - ) + for metric in metric_names } - join_keys = [col for col in df.columns if col not in metrics_mapping.keys()] if isinstance(self._qc_datasource, Query): result = self._qc_datasource.exc_query(query_object_clone_dct) @@ -420,21 +434,23 @@ def processing_time_offsets( # pylint: disable=too-many-locals,too-many-stateme ) ) + # modifies temporal column using offset offset_metrics_df[index] = offset_metrics_df[index] - DateOffset( **normalize_time_delta(offset) ) - # df left join `offset_metrics_df` - offset_df = dataframe_utils.left_join_df( - left_df=df, - right_df=offset_metrics_df, - join_keys=join_keys, - ) - offset_slice = offset_df[metrics_mapping.values()] + if use_aggregated_join_column: + # adds aggregated join column + offset_metrics_df[ + self.AGGREGATED_JOIN_COLUMN + ] = offset_metrics_df.apply( + lambda row: self.get_aggregated_join_column(row, 0, time_grain), + axis=1, + ) - # set offset_slice to cache and stack. + # cache df and query value = { - "df": offset_slice, + "df": offset_metrics_df, "query": result.query, } cache.set( @@ -444,10 +460,47 @@ def processing_time_offsets( # pylint: disable=too-many-locals,too-many-stateme datasource_uid=query_context.datasource.uid, region=CacheRegion.DATA, ) - rv_dfs.append(offset_slice) + offset_dfs.append(offset_metrics_df) + + if offset_dfs: + # iterate on offset_dfs, left join each with df + for offset_df in offset_dfs: + df = dataframe_utils.left_join_df( + left_df=df, + right_df=offset_df, + join_keys=join_keys, + rsuffix="_right", + ) + + # remove AGGREGATED_JOIN_COLUMN from df + if use_aggregated_join_column: + df = df.drop(columns=[self.AGGREGATED_JOIN_COLUMN]) + + return CachedTimeOffset(df=df, queries=queries, cache_keys=cache_keys) + + def get_aggregated_join_column( + self, row: pd.Series, column_index: int, time_grain: str + ) -> str: + # weekly time grain + if "P1W" in time_grain: + if time_grain in ( + WeeklyTimeGrain.WEEK_STARTING_SUNDAY, + WeeklyTimeGrain.WEEK_ENDING_SATURDAY, + ): + return row[column_index].strftime("%Y-W%U") + else: + return row[column_index].strftime("%Y-W%W") + + # monthly time grain + elif "P1M" in time_grain: + return row[column_index].strftime("%Y-%m") + + # quarterly time grain + elif "P3M" in time_grain: + return row[column_index].strftime("%Y-Q") + str(row[column_index].quarter) - rv_df = pd.concat(rv_dfs, axis=1, copy=False) if time_offsets else df - return CachedTimeOffset(df=rv_df, queries=queries, cache_keys=cache_keys) + # yearly time grain + return row[column_index].strftime("%Y") def get_data(self, df: pd.DataFrame) -> str | list[dict[str, Any]]: if self._query_context.result_format in ChartDataResultFormat.table_like(): diff --git a/superset/common/utils/dataframe_utils.py b/superset/common/utils/dataframe_utils.py index a3421f6431061..7772ec58509bb 100644 --- a/superset/common/utils/dataframe_utils.py +++ b/superset/common/utils/dataframe_utils.py @@ -30,8 +30,12 @@ def left_join_df( left_df: pd.DataFrame, right_df: pd.DataFrame, join_keys: list[str], + lsuffix: str = "", + rsuffix: str = "", ) -> pd.DataFrame: - df = left_df.set_index(join_keys).join(right_df.set_index(join_keys)) + df = left_df.set_index(join_keys).join( + right_df.set_index(join_keys), lsuffix=lsuffix, rsuffix=rsuffix + ) df.reset_index(inplace=True) return df diff --git a/superset/constants.py b/superset/constants.py index e4bad9f8aa728..826b8d249e5b9 100644 --- a/superset/constants.py +++ b/superset/constants.py @@ -186,6 +186,13 @@ class RouteMethod: # pylint: disable=too-few-public-methods ) +class WeeklyTimeGrain(str, Enum): + WEEK_STARTING_SUNDAY = "1969-12-28T00:00:00Z/P1W" + WEEK_STARTING_MONDAY = "1969-12-29T00:00:00Z/P1W" + WEEK_ENDING_SATURDAY = "P1W/1970-01-03T00:00:00Z" + WEEK_ENDING_SUNDAY = "P1W/1970-01-04T00:00:00Z" + + class PandasAxis(int, Enum): ROW = 0 COLUMN = 1 From c6b455257ba7b269302f382f546d88a99425098a Mon Sep 17 00:00:00 2001 From: "Michael S. Molina" Date: Tue, 30 May 2023 10:49:54 -0300 Subject: [PATCH 02/15] Extracts constants --- superset/common/query_context_processor.py | 51 ++++++++++++---------- superset/constants.py | 15 ++++++- 2 files changed, 43 insertions(+), 23 deletions(-) diff --git a/superset/common/query_context_processor.py b/superset/common/query_context_processor.py index d6e5ea1a6f431..adf9c01fb322b 100644 --- a/superset/common/query_context_processor.py +++ b/superset/common/query_context_processor.py @@ -37,7 +37,7 @@ from superset.common.utils.query_cache_manager import QueryCacheManager from superset.common.utils.time_range_utils import get_since_until_from_query_object from superset.connectors.base.models import BaseDatasource -from superset.constants import CacheRegion, WeeklyTimeGrain +from superset.constants import CacheRegion, TimeGrain from superset.exceptions import ( InvalidPostProcessingError, QueryObjectValidationError, @@ -74,6 +74,19 @@ stats_logger: BaseStatsLogger = config["STATS_LOGGER"] logger = logging.getLogger(__name__) +AGGREGATED_JOIN_COLUMN = "__aggregated_join_column" + +AGGREGATED_JOIN_GRAINS = { + TimeGrain.WEEK, + TimeGrain.WEEK_STARTING_SUNDAY, + TimeGrain.WEEK_STARTING_MONDAY, + TimeGrain.WEEK_ENDING_SATURDAY, + TimeGrain.WEEK_ENDING_SUNDAY, + TimeGrain.MONTH, + TimeGrain.QUARTER, + TimeGrain.YEAR, +} + class CachedTimeOffset(TypedDict): df: pd.DataFrame @@ -87,8 +100,6 @@ class QueryContextProcessor: to retrieve the data payload for a given viz. """ - AGGREGATED_JOIN_COLUMN = "$aggregated_join_column" - _query_context: QueryContext _qc_datasource: BaseDatasource @@ -328,12 +339,10 @@ def processing_time_offsets( # pylint: disable=too-many-locals,too-many-stateme columns = df.columns time_grain = query_object.extras["time_grain_sqla"] - use_aggregated_join_column = any( - grain in time_grain for grain in ("P1W", "P1M", "P3M", "P1Y") - ) + use_aggregated_join_column = time_grain in AGGREGATED_JOIN_GRAINS if use_aggregated_join_column: # adds aggregated join column - df[self.AGGREGATED_JOIN_COLUMN] = df.apply( + df[AGGREGATED_JOIN_COLUMN] = df.apply( lambda row: self.get_aggregated_join_column(row, 0, time_grain), axis=1 ) # skips the first column which is the temporal column @@ -441,9 +450,7 @@ def processing_time_offsets( # pylint: disable=too-many-locals,too-many-stateme if use_aggregated_join_column: # adds aggregated join column - offset_metrics_df[ - self.AGGREGATED_JOIN_COLUMN - ] = offset_metrics_df.apply( + offset_metrics_df[AGGREGATED_JOIN_COLUMN] = offset_metrics_df.apply( lambda row: self.get_aggregated_join_column(row, 0, time_grain), axis=1, ) @@ -474,32 +481,32 @@ def processing_time_offsets( # pylint: disable=too-many-locals,too-many-stateme # remove AGGREGATED_JOIN_COLUMN from df if use_aggregated_join_column: - df = df.drop(columns=[self.AGGREGATED_JOIN_COLUMN]) + df = df.drop(columns=[AGGREGATED_JOIN_COLUMN]) return CachedTimeOffset(df=df, queries=queries, cache_keys=cache_keys) def get_aggregated_join_column( self, row: pd.Series, column_index: int, time_grain: str ) -> str: - # weekly time grain - if "P1W" in time_grain: - if time_grain in ( - WeeklyTimeGrain.WEEK_STARTING_SUNDAY, - WeeklyTimeGrain.WEEK_ENDING_SATURDAY, + if time_grain in ( + TimeGrain.WEEK_STARTING_SUNDAY, + TimeGrain.WEEK_ENDING_SATURDAY, ): return row[column_index].strftime("%Y-W%U") - else: + + elif time_grain in ( + TimeGrain.WEEK, + TimeGrain.WEEK_STARTING_MONDAY, + TimeGrain.WEEK_ENDING_SUNDAY, + ): return row[column_index].strftime("%Y-W%W") - # monthly time grain - elif "P1M" in time_grain: + elif time_grain == TimeGrain.MONTH: return row[column_index].strftime("%Y-%m") - # quarterly time grain - elif "P3M" in time_grain: + elif time_grain == TimeGrain.QUARTER: return row[column_index].strftime("%Y-Q") + str(row[column_index].quarter) - # yearly time grain return row[column_index].strftime("%Y") def get_data(self, df: pd.DataFrame) -> str | list[dict[str, Any]]: diff --git a/superset/constants.py b/superset/constants.py index 826b8d249e5b9..25733671b6892 100644 --- a/superset/constants.py +++ b/superset/constants.py @@ -186,11 +186,24 @@ class RouteMethod: # pylint: disable=too-few-public-methods ) -class WeeklyTimeGrain(str, Enum): +class TimeGrain(str, Enum): + DATE = "date" + SECOND = "PT1S" + MINUTE = "PT1M" + FIVE_MINUTES = "PT5M" + TEN_MINUTES = "PT10M" + FIFTEEN_MINUTES = "PT15M" + THIRTY_MINUTES = "PT30M" + HOUR = "PT1H" + DAY = "P1D" + WEEK = "P1W" WEEK_STARTING_SUNDAY = "1969-12-28T00:00:00Z/P1W" WEEK_STARTING_MONDAY = "1969-12-29T00:00:00Z/P1W" WEEK_ENDING_SATURDAY = "P1W/1970-01-03T00:00:00Z" WEEK_ENDING_SUNDAY = "P1W/1970-01-04T00:00:00Z" + MONTH = "P1M" + QUARTER = "P3M" + YEAR = "P1Y" class PandasAxis(int, Enum): From 45bab6a8ad5d7eb1b2b9503cbcb523a7cf18ea46 Mon Sep 17 00:00:00 2001 From: "Michael S. Molina" Date: Wed, 31 May 2023 09:26:56 -0300 Subject: [PATCH 03/15] Replaces literals with constants --- superset/constants.py | 8 ++- superset/db_engine_specs/ascend.py | 17 ++++--- superset/db_engine_specs/athena.py | 21 ++++---- superset/db_engine_specs/base.py | 39 +++++++------- superset/db_engine_specs/bigquery.py | 28 +++++----- superset/db_engine_specs/crate.py | 17 ++++--- superset/db_engine_specs/databricks.py | 22 ++++---- superset/db_engine_specs/db2.py | 17 ++++--- superset/db_engine_specs/dremio.py | 17 ++++--- superset/db_engine_specs/drill.py | 21 ++++---- superset/db_engine_specs/druid.py | 35 ++++++------- superset/db_engine_specs/duckdb.py | 17 ++++--- superset/db_engine_specs/dynamodb.py | 25 ++++----- superset/db_engine_specs/elasticsearch.py | 25 ++++----- superset/db_engine_specs/exasol.py | 17 ++++--- superset/db_engine_specs/firebird.py | 13 ++--- superset/db_engine_specs/firebolt.py | 17 ++++--- superset/db_engine_specs/hana.py | 15 +++--- superset/db_engine_specs/hive.py | 21 ++++---- superset/db_engine_specs/impala.py | 16 +++--- superset/db_engine_specs/kusto.py | 41 +++++++-------- superset/db_engine_specs/kylin.py | 17 ++++--- superset/db_engine_specs/mssql.py | 30 ++++++----- superset/db_engine_specs/mysql.py | 19 +++---- superset/db_engine_specs/netezza.py | 17 ++++--- superset/db_engine_specs/ocient.py | 17 ++++--- superset/db_engine_specs/oracle.py | 17 ++++--- superset/db_engine_specs/pinot.py | 51 ++++++++++--------- superset/db_engine_specs/postgres.py | 17 ++++--- superset/db_engine_specs/presto.py | 29 +++++------ superset/db_engine_specs/rockset.py | 17 ++++--- superset/db_engine_specs/snowflake.py | 26 +++++----- superset/db_engine_specs/spark.py | 21 ++++---- superset/db_engine_specs/sqlite.py | 25 ++++----- superset/utils/pandas_postprocessing/utils.py | 33 ++++++------ 35 files changed, 408 insertions(+), 377 deletions(-) diff --git a/superset/constants.py b/superset/constants.py index 25733671b6892..632abeb513944 100644 --- a/superset/constants.py +++ b/superset/constants.py @@ -186,15 +186,18 @@ class RouteMethod: # pylint: disable=too-few-public-methods ) -class TimeGrain(str, Enum): - DATE = "date" +class TimeGrain: SECOND = "PT1S" + FIVE_SECONDS = "PT5S" + THIRTY_SECONDS = "PT30S" MINUTE = "PT1M" FIVE_MINUTES = "PT5M" TEN_MINUTES = "PT10M" FIFTEEN_MINUTES = "PT15M" THIRTY_MINUTES = "PT30M" + HALF_HOUR = "PT0.5H" HOUR = "PT1H" + SIX_HOURS = "PT6H" DAY = "P1D" WEEK = "P1W" WEEK_STARTING_SUNDAY = "1969-12-28T00:00:00Z/P1W" @@ -203,6 +206,7 @@ class TimeGrain(str, Enum): WEEK_ENDING_SUNDAY = "P1W/1970-01-04T00:00:00Z" MONTH = "P1M" QUARTER = "P3M" + QUARTER_YEAR = "P0.25Y" YEAR = "P1Y" diff --git a/superset/db_engine_specs/ascend.py b/superset/db_engine_specs/ascend.py index 759b78ad1c928..6d3d7a496e13a 100644 --- a/superset/db_engine_specs/ascend.py +++ b/superset/db_engine_specs/ascend.py @@ -16,6 +16,7 @@ # under the License. from sqlalchemy.dialects import registry +from superset.constants import TimeGrain from superset.db_engine_specs.impala import ImpalaEngineSpec @@ -29,12 +30,12 @@ class AscendEngineSpec(ImpalaEngineSpec): _time_grain_expressions = { None: "{col}", - "PT1S": "DATE_TRUNC('second', {col})", - "PT1M": "DATE_TRUNC('minute', {col})", - "PT1H": "DATE_TRUNC('hour', {col})", - "P1D": "DATE_TRUNC('day', {col})", - "P1W": "DATE_TRUNC('week', {col})", - "P1M": "DATE_TRUNC('month', {col})", - "P3M": "DATE_TRUNC('quarter', {col})", - "P1Y": "DATE_TRUNC('year', {col})", + TimeGrain.SECOND: "DATE_TRUNC('second', {col})", + TimeGrain.MINUTE: "DATE_TRUNC('minute', {col})", + TimeGrain.HOUR: "DATE_TRUNC('hour', {col})", + TimeGrain.DAY: "DATE_TRUNC('day', {col})", + TimeGrain.WEEK: "DATE_TRUNC('week', {col})", + TimeGrain.MONTH: "DATE_TRUNC('month', {col})", + TimeGrain.QUARTER: "DATE_TRUNC('quarter', {col})", + TimeGrain.YEAR: "DATE_TRUNC('year', {col})", } diff --git a/superset/db_engine_specs/athena.py b/superset/db_engine_specs/athena.py index ad6bed113da87..c62a2455f68fa 100644 --- a/superset/db_engine_specs/athena.py +++ b/superset/db_engine_specs/athena.py @@ -22,6 +22,7 @@ from flask_babel import gettext as __ from sqlalchemy import types +from superset.constants import TimeGrain from superset.db_engine_specs.base import BaseEngineSpec from superset.errors import SupersetErrorType @@ -38,17 +39,17 @@ class AthenaEngineSpec(BaseEngineSpec): _time_grain_expressions = { None: "{col}", - "PT1S": "date_trunc('second', CAST({col} AS TIMESTAMP))", - "PT1M": "date_trunc('minute', CAST({col} AS TIMESTAMP))", - "PT1H": "date_trunc('hour', CAST({col} AS TIMESTAMP))", - "P1D": "date_trunc('day', CAST({col} AS TIMESTAMP))", - "P1W": "date_trunc('week', CAST({col} AS TIMESTAMP))", - "P1M": "date_trunc('month', CAST({col} AS TIMESTAMP))", - "P3M": "date_trunc('quarter', CAST({col} AS TIMESTAMP))", - "P1Y": "date_trunc('year', CAST({col} AS TIMESTAMP))", - "P1W/1970-01-03T00:00:00Z": "date_add('day', 5, date_trunc('week', \ + TimeGrain.SECOND: "date_trunc('second', CAST({col} AS TIMESTAMP))", + TimeGrain.MINUTE: "date_trunc('minute', CAST({col} AS TIMESTAMP))", + TimeGrain.HOUR: "date_trunc('hour', CAST({col} AS TIMESTAMP))", + TimeGrain.DAY: "date_trunc('day', CAST({col} AS TIMESTAMP))", + TimeGrain.WEEK: "date_trunc('week', CAST({col} AS TIMESTAMP))", + TimeGrain.MONTH: "date_trunc('month', CAST({col} AS TIMESTAMP))", + TimeGrain.QUARTER: "date_trunc('quarter', CAST({col} AS TIMESTAMP))", + TimeGrain.YEAR: "date_trunc('year', CAST({col} AS TIMESTAMP))", + TimeGrain.WEEK_ENDING_SATURDAY: "date_add('day', 5, date_trunc('week', \ date_add('day', 1, CAST({col} AS TIMESTAMP))))", - "1969-12-28T00:00:00Z/P1W": "date_add('day', -1, date_trunc('week', \ + TimeGrain.WEEK_STARTING_SUNDAY: "date_add('day', -1, date_trunc('week', \ date_add('day', 1, CAST({col} AS TIMESTAMP))))", } diff --git a/superset/db_engine_specs/base.py b/superset/db_engine_specs/base.py index ef922a5e63a44..01d878ce0c54d 100644 --- a/superset/db_engine_specs/base.py +++ b/superset/db_engine_specs/base.py @@ -49,6 +49,7 @@ from typing_extensions import TypedDict from superset import security_manager, sql_parse +from superset.constants import TimeGrain as TimeGrainConstants from superset.databases.utils import make_url_safe from superset.errors import ErrorLevel, SupersetError, SupersetErrorType from superset.sql_parse import ParsedQuery, Table @@ -80,25 +81,25 @@ class TimeGrain(NamedTuple): builtin_time_grains: dict[str | None, str] = { - "PT1S": __("Second"), - "PT5S": __("5 second"), - "PT30S": __("30 second"), - "PT1M": __("Minute"), - "PT5M": __("5 minute"), - "PT10M": __("10 minute"), - "PT15M": __("15 minute"), - "PT30M": __("30 minute"), - "PT1H": __("Hour"), - "PT6H": __("6 hour"), - "P1D": __("Day"), - "P1W": __("Week"), - "P1M": __("Month"), - "P3M": __("Quarter"), - "P1Y": __("Year"), - "1969-12-28T00:00:00Z/P1W": __("Week starting Sunday"), - "1969-12-29T00:00:00Z/P1W": __("Week starting Monday"), - "P1W/1970-01-03T00:00:00Z": __("Week ending Saturday"), - "P1W/1970-01-04T00:00:00Z": __("Week_ending Sunday"), + TimeGrainConstants.SECOND: __("Second"), + TimeGrainConstants.FIVE_SECONDS: __("5 second"), + TimeGrainConstants.THIRTY_SECONDS: __("30 second"), + TimeGrainConstants.MINUTE: __("Minute"), + TimeGrainConstants.FIVE_MINUTES: __("5 minute"), + TimeGrainConstants.TEN_MINUTES: __("10 minute"), + TimeGrainConstants.FIFTEEN_MINUTES: __("15 minute"), + TimeGrainConstants.THIRTY_MINUTES: __("30 minute"), + TimeGrainConstants.HOUR: __("Hour"), + TimeGrainConstants.SIX_HOURS: __("6 hour"), + TimeGrainConstants.DAY: __("Day"), + TimeGrainConstants.WEEK: __("Week"), + TimeGrainConstants.MONTH: __("Month"), + TimeGrainConstants.QUARTER: __("Quarter"), + TimeGrainConstants.YEAR: __("Year"), + TimeGrainConstants.WEEK_STARTING_SUNDAY: __("Week starting Sunday"), + TimeGrainConstants.WEEK_STARTING_MONDAY: __("Week starting Monday"), + TimeGrainConstants.WEEK_ENDING_SATURDAY: __("Week ending Saturday"), + TimeGrainConstants.WEEK_ENDING_SUNDAY: __("Week_ending Sunday"), } diff --git a/superset/db_engine_specs/bigquery.py b/superset/db_engine_specs/bigquery.py index 3b62f4bbb809c..e69194f50f705 100644 --- a/superset/db_engine_specs/bigquery.py +++ b/superset/db_engine_specs/bigquery.py @@ -35,7 +35,7 @@ from typing_extensions import TypedDict from superset import sql_parse -from superset.constants import PASSWORD_MASK +from superset.constants import PASSWORD_MASK, TimeGrain from superset.databases.schemas import encrypted_field_properties, EncryptedString from superset.databases.utils import make_url_safe from superset.db_engine_specs.base import BaseEngineSpec, BasicPropertiesType @@ -147,31 +147,31 @@ class BigQueryEngineSpec(BaseEngineSpec): # pylint: disable=too-many-public-met _time_grain_expressions = { None: "{col}", - "PT1S": "CAST(TIMESTAMP_SECONDS(" + TimeGrain.SECOND: "CAST(TIMESTAMP_SECONDS(" "UNIX_SECONDS(CAST({col} AS TIMESTAMP))" ") AS {type})", - "PT1M": "CAST(TIMESTAMP_SECONDS(" + TimeGrain.MINUTE: "CAST(TIMESTAMP_SECONDS(" "60 * DIV(UNIX_SECONDS(CAST({col} AS TIMESTAMP)), 60)" ") AS {type})", - "PT5M": "CAST(TIMESTAMP_SECONDS(" + TimeGrain.FIVE_MINUTES: "CAST(TIMESTAMP_SECONDS(" "5*60 * DIV(UNIX_SECONDS(CAST({col} AS TIMESTAMP)), 5*60)" ") AS {type})", - "PT10M": "CAST(TIMESTAMP_SECONDS(" + TimeGrain.TEN_MINUTES: "CAST(TIMESTAMP_SECONDS(" "10*60 * DIV(UNIX_SECONDS(CAST({col} AS TIMESTAMP)), 10*60)" ") AS {type})", - "PT15M": "CAST(TIMESTAMP_SECONDS(" + TimeGrain.FIFTEEN_MINUTES: "CAST(TIMESTAMP_SECONDS(" "15*60 * DIV(UNIX_SECONDS(CAST({col} AS TIMESTAMP)), 15*60)" ") AS {type})", - "PT30M": "CAST(TIMESTAMP_SECONDS(" + TimeGrain.THIRTY_MINUTES: "CAST(TIMESTAMP_SECONDS(" "30*60 * DIV(UNIX_SECONDS(CAST({col} AS TIMESTAMP)), 30*60)" ") AS {type})", - "PT1H": "{func}({col}, HOUR)", - "P1D": "{func}({col}, DAY)", - "P1W": "{func}({col}, WEEK)", - "1969-12-29T00:00:00Z/P1W": "{func}({col}, ISOWEEK)", - "P1M": "{func}({col}, MONTH)", - "P3M": "{func}({col}, QUARTER)", - "P1Y": "{func}({col}, YEAR)", + TimeGrain.HOUR: "{func}({col}, HOUR)", + TimeGrain.DAY: "{func}({col}, DAY)", + TimeGrain.WEEK: "{func}({col}, WEEK)", + TimeGrain.WEEK_STARTING_MONDAY: "{func}({col}, ISOWEEK)", + TimeGrain.MONTH: "{func}({col}, MONTH)", + TimeGrain.QUARTER: "{func}({col}, QUARTER)", + TimeGrain.YEAR: "{func}({col}, YEAR)", } custom_errors: dict[Pattern[str], tuple[str, SupersetErrorType, dict[str, Any]]] = { diff --git a/superset/db_engine_specs/crate.py b/superset/db_engine_specs/crate.py index d8d91c67962d6..46ce1e08ff24e 100644 --- a/superset/db_engine_specs/crate.py +++ b/superset/db_engine_specs/crate.py @@ -21,6 +21,7 @@ from sqlalchemy import types +from superset.constants import TimeGrain from superset.db_engine_specs.base import BaseEngineSpec if TYPE_CHECKING: @@ -33,14 +34,14 @@ class CrateEngineSpec(BaseEngineSpec): _time_grain_expressions = { None: "{col}", - "PT1S": "DATE_TRUNC('second', {col})", - "PT1M": "DATE_TRUNC('minute', {col})", - "PT1H": "DATE_TRUNC('hour', {col})", - "P1D": "DATE_TRUNC('day', {col})", - "P1W": "DATE_TRUNC('week', {col})", - "P1M": "DATE_TRUNC('month', {col})", - "P3M": "DATE_TRUNC('quarter', {col})", - "P1Y": "DATE_TRUNC('year', {col})", + TimeGrain.SECOND: "DATE_TRUNC('second', {col})", + TimeGrain.MINUTE: "DATE_TRUNC('minute', {col})", + TimeGrain.HOUR: "DATE_TRUNC('hour', {col})", + TimeGrain.DAY: "DATE_TRUNC('day', {col})", + TimeGrain.WEEK: "DATE_TRUNC('week', {col})", + TimeGrain.MONTH: "DATE_TRUNC('month', {col})", + TimeGrain.QUARTER: "DATE_TRUNC('quarter', {col})", + TimeGrain.YEAR: "DATE_TRUNC('year', {col})", } @classmethod diff --git a/superset/db_engine_specs/databricks.py b/superset/db_engine_specs/databricks.py index 5df24be65d6b5..df53d017b7ad2 100644 --- a/superset/db_engine_specs/databricks.py +++ b/superset/db_engine_specs/databricks.py @@ -28,7 +28,7 @@ from sqlalchemy.engine.url import URL from typing_extensions import TypedDict -from superset.constants import USER_AGENT +from superset.constants import TimeGrain, USER_AGENT from superset.databases.utils import make_url_safe from superset.db_engine_specs.base import BaseEngineSpec, BasicParametersMixin from superset.db_engine_specs.hive import HiveEngineSpec @@ -95,18 +95,18 @@ class DatabricksPropertiesType(TypedDict): time_grain_expressions = { None: "{col}", - "PT1S": "date_trunc('second', {col})", - "PT1M": "date_trunc('minute', {col})", - "PT1H": "date_trunc('hour', {col})", - "P1D": "date_trunc('day', {col})", - "P1W": "date_trunc('week', {col})", - "P1M": "date_trunc('month', {col})", - "P3M": "date_trunc('quarter', {col})", - "P1Y": "date_trunc('year', {col})", - "P1W/1970-01-03T00:00:00Z": ( + TimeGrain.SECOND: "date_trunc('second', {col})", + TimeGrain.MINUTE: "date_trunc('minute', {col})", + TimeGrain.HOUR: "date_trunc('hour', {col})", + TimeGrain.DAY: "date_trunc('day', {col})", + TimeGrain.WEEK: "date_trunc('week', {col})", + TimeGrain.MONTH: "date_trunc('month', {col})", + TimeGrain.QUARTER: "date_trunc('quarter', {col})", + TimeGrain.YEAR: "date_trunc('year', {col})", + TimeGrain.WEEK_ENDING_SATURDAY: ( "date_trunc('week', {col} + interval '1 day') + interval '5 days'" ), - "1969-12-28T00:00:00Z/P1W": ( + TimeGrain.WEEK_STARTING_SUNDAY: ( "date_trunc('week', {col} + interval '1 day') - interval '1 day'" ), } diff --git a/superset/db_engine_specs/db2.py b/superset/db_engine_specs/db2.py index 45241b3d89adb..5f54613a4b533 100644 --- a/superset/db_engine_specs/db2.py +++ b/superset/db_engine_specs/db2.py @@ -14,6 +14,7 @@ # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. +from superset.constants import TimeGrain from superset.db_engine_specs.base import BaseEngineSpec, LimitMethod @@ -27,25 +28,25 @@ class Db2EngineSpec(BaseEngineSpec): _time_grain_expressions = { None: "{col}", - "PT1S": "CAST({col} as TIMESTAMP) - MICROSECOND({col}) MICROSECONDS", - "PT1M": "CAST({col} as TIMESTAMP)" + TimeGrain.SECOND: "CAST({col} as TIMESTAMP) - MICROSECOND({col}) MICROSECONDS", + TimeGrain.MINUTE: "CAST({col} as TIMESTAMP)" " - SECOND({col}) SECONDS" " - MICROSECOND({col}) MICROSECONDS", - "PT1H": "CAST({col} as TIMESTAMP)" + TimeGrain.HOUR: "CAST({col} as TIMESTAMP)" " - MINUTE({col}) MINUTES" " - SECOND({col}) SECONDS" " - MICROSECOND({col}) MICROSECONDS ", - "P1D": "CAST({col} as TIMESTAMP)" + TimeGrain.DAY: "CAST({col} as TIMESTAMP)" " - HOUR({col}) HOURS" " - MINUTE({col}) MINUTES" " - SECOND({col}) SECONDS" " - MICROSECOND({col}) MICROSECONDS", - "P1W": "{col} - (DAYOFWEEK({col})) DAYS", - "P1M": "{col} - (DAY({col})-1) DAYS", - "P3M": "{col} - (DAY({col})-1) DAYS" + TimeGrain.WEEK: "{col} - (DAYOFWEEK({col})) DAYS", + TimeGrain.MONTH: "{col} - (DAY({col})-1) DAYS", + TimeGrain.QUARTER: "{col} - (DAY({col})-1) DAYS" " - (MONTH({col})-1) MONTHS" " + ((QUARTER({col})-1) * 3) MONTHS", - "P1Y": "{col} - (DAY({col})-1) DAYS - (MONTH({col})-1) MONTHS", + TimeGrain.YEAR: "{col} - (DAY({col})-1) DAYS - (MONTH({col})-1) MONTHS", } @classmethod diff --git a/superset/db_engine_specs/dremio.py b/superset/db_engine_specs/dremio.py index 7b4c0458cd1a7..2288c5257248c 100644 --- a/superset/db_engine_specs/dremio.py +++ b/superset/db_engine_specs/dremio.py @@ -19,6 +19,7 @@ from sqlalchemy import types +from superset.constants import TimeGrain from superset.db_engine_specs.base import BaseEngineSpec @@ -30,14 +31,14 @@ class DremioEngineSpec(BaseEngineSpec): _time_grain_expressions = { None: "{col}", - "PT1S": "DATE_TRUNC('second', {col})", - "PT1M": "DATE_TRUNC('minute', {col})", - "PT1H": "DATE_TRUNC('hour', {col})", - "P1D": "DATE_TRUNC('day', {col})", - "P1W": "DATE_TRUNC('week', {col})", - "P1M": "DATE_TRUNC('month', {col})", - "P3M": "DATE_TRUNC('quarter', {col})", - "P1Y": "DATE_TRUNC('year', {col})", + TimeGrain.SECOND: "DATE_TRUNC('second', {col})", + TimeGrain.MINUTE: "DATE_TRUNC('minute', {col})", + TimeGrain.HOUR: "DATE_TRUNC('hour', {col})", + TimeGrain.DAY: "DATE_TRUNC('day', {col})", + TimeGrain.WEEK: "DATE_TRUNC('week', {col})", + TimeGrain.MONTH: "DATE_TRUNC('month', {col})", + TimeGrain.QUARTER: "DATE_TRUNC('quarter', {col})", + TimeGrain.YEAR: "DATE_TRUNC('year', {col})", } @classmethod diff --git a/superset/db_engine_specs/drill.py b/superset/db_engine_specs/drill.py index 946544863dda7..fb42409b4e952 100644 --- a/superset/db_engine_specs/drill.py +++ b/superset/db_engine_specs/drill.py @@ -21,6 +21,7 @@ from sqlalchemy import types from sqlalchemy.engine.url import URL +from superset.constants import TimeGrain from superset.db_engine_specs.base import BaseEngineSpec from superset.db_engine_specs.exceptions import SupersetDBAPIProgrammingError @@ -36,16 +37,16 @@ class DrillEngineSpec(BaseEngineSpec): _time_grain_expressions = { None: "{col}", - "PT1S": "NEARESTDATE({col}, 'SECOND')", - "PT1M": "NEARESTDATE({col}, 'MINUTE')", - "PT15M": "NEARESTDATE({col}, 'QUARTER_HOUR')", - "PT30M": "NEARESTDATE({col}, 'HALF_HOUR')", - "PT1H": "NEARESTDATE({col}, 'HOUR')", - "P1D": "NEARESTDATE({col}, 'DAY')", - "P1W": "NEARESTDATE({col}, 'WEEK_SUNDAY')", - "P1M": "NEARESTDATE({col}, 'MONTH')", - "P3M": "NEARESTDATE({col}, 'QUARTER')", - "P1Y": "NEARESTDATE({col}, 'YEAR')", + TimeGrain.SECOND: "NEARESTDATE({col}, 'SECOND')", + TimeGrain.MINUTE: "NEARESTDATE({col}, 'MINUTE')", + TimeGrain.FIFTEEN_MINUTES: "NEARESTDATE({col}, 'QUARTER_HOUR')", + TimeGrain.THIRTY_MINUTES: "NEARESTDATE({col}, 'HALF_HOUR')", + TimeGrain.HOUR: "NEARESTDATE({col}, 'HOUR')", + TimeGrain.DAY: "NEARESTDATE({col}, 'DAY')", + TimeGrain.WEEK: "NEARESTDATE({col}, 'WEEK_SUNDAY')", + TimeGrain.MONTH: "NEARESTDATE({col}, 'MONTH')", + TimeGrain.QUARTER: "NEARESTDATE({col}, 'QUARTER')", + TimeGrain.YEAR: "NEARESTDATE({col}, 'YEAR')", } # Returns a function to convert a Unix timestamp in milliseconds to a date diff --git a/superset/db_engine_specs/druid.py b/superset/db_engine_specs/druid.py index 43ce310a4061e..2ac9346634d18 100644 --- a/superset/db_engine_specs/druid.py +++ b/superset/db_engine_specs/druid.py @@ -26,6 +26,7 @@ from sqlalchemy.engine.reflection import Inspector from superset import is_feature_enabled +from superset.constants import TimeGrain from superset.db_engine_specs.base import BaseEngineSpec from superset.db_engine_specs.exceptions import SupersetDBAPIConnectionError from superset.exceptions import SupersetException @@ -48,26 +49,26 @@ class DruidEngineSpec(BaseEngineSpec): _time_grain_expressions = { None: "{col}", - "PT1S": "TIME_FLOOR(CAST({col} AS TIMESTAMP), 'PT1S')", - "PT5S": "TIME_FLOOR(CAST({col} AS TIMESTAMP), 'PT5S')", - "PT30S": "TIME_FLOOR(CAST({col} AS TIMESTAMP), 'PT30S')", - "PT1M": "TIME_FLOOR(CAST({col} AS TIMESTAMP), 'PT1M')", - "PT5M": "TIME_FLOOR(CAST({col} AS TIMESTAMP), 'PT5M')", - "PT10M": "TIME_FLOOR(CAST({col} AS TIMESTAMP), 'PT10M')", - "PT15M": "TIME_FLOOR(CAST({col} AS TIMESTAMP), 'PT15M')", - "PT30M": "TIME_FLOOR(CAST({col} AS TIMESTAMP), 'PT30M')", - "PT1H": "TIME_FLOOR(CAST({col} AS TIMESTAMP), 'PT1H')", - "PT6H": "TIME_FLOOR(CAST({col} AS TIMESTAMP), 'PT6H')", - "P1D": "TIME_FLOOR(CAST({col} AS TIMESTAMP), 'P1D')", - "P1W": "TIME_FLOOR(CAST({col} AS TIMESTAMP), 'P1W')", - "P1M": "TIME_FLOOR(CAST({col} AS TIMESTAMP), 'P1M')", - "P3M": "TIME_FLOOR(CAST({col} AS TIMESTAMP), 'P3M')", - "P1Y": "TIME_FLOOR(CAST({col} AS TIMESTAMP), 'P1Y')", - "P1W/1970-01-03T00:00:00Z": ( + TimeGrain.SECOND: "TIME_FLOOR(CAST({col} AS TIMESTAMP), 'PT1S')", + TimeGrain.FIVE_SECONDS: "TIME_FLOOR(CAST({col} AS TIMESTAMP), 'PT5S')", + TimeGrain.THIRTY_SECONDS: "TIME_FLOOR(CAST({col} AS TIMESTAMP), 'PT30S')", + TimeGrain.MINUTE: "TIME_FLOOR(CAST({col} AS TIMESTAMP), 'PT1M')", + TimeGrain.FIVE_MINUTES: "TIME_FLOOR(CAST({col} AS TIMESTAMP), 'PT5M')", + TimeGrain.TEN_MINUTES: "TIME_FLOOR(CAST({col} AS TIMESTAMP), 'PT10M')", + TimeGrain.FIFTEEN_MINUTES: "TIME_FLOOR(CAST({col} AS TIMESTAMP), 'PT15M')", + TimeGrain.THIRTY_MINUTES: "TIME_FLOOR(CAST({col} AS TIMESTAMP), 'PT30M')", + TimeGrain.HOUR: "TIME_FLOOR(CAST({col} AS TIMESTAMP), 'PT1H')", + TimeGrain.SIX_HOURS: "TIME_FLOOR(CAST({col} AS TIMESTAMP), 'PT6H')", + TimeGrain.DAY: "TIME_FLOOR(CAST({col} AS TIMESTAMP), 'P1D')", + TimeGrain.WEEK: "TIME_FLOOR(CAST({col} AS TIMESTAMP), 'P1W')", + TimeGrain.MONTH: "TIME_FLOOR(CAST({col} AS TIMESTAMP), 'P1M')", + TimeGrain.QUARTER: "TIME_FLOOR(CAST({col} AS TIMESTAMP), 'P3M')", + TimeGrain.YEAR: "TIME_FLOOR(CAST({col} AS TIMESTAMP), 'P1Y')", + TimeGrain.WEEK_STARTING_SUNDAY: ( "TIME_SHIFT(TIME_FLOOR(TIME_SHIFT(CAST({col} AS TIMESTAMP), " "'P1D', 1), 'P1W'), 'P1D', 5)" ), - "1969-12-28T00:00:00Z/P1W": ( + TimeGrain.WEEK_STARTING_MONDAY: ( "TIME_SHIFT(TIME_FLOOR(TIME_SHIFT(CAST({col} AS TIMESTAMP), " "'P1D', 1), 'P1W'), 'P1D', -1)" ), diff --git a/superset/db_engine_specs/duckdb.py b/superset/db_engine_specs/duckdb.py index 3bbf9ecc3834d..fa2f01f50a516 100644 --- a/superset/db_engine_specs/duckdb.py +++ b/superset/db_engine_specs/duckdb.py @@ -25,6 +25,7 @@ from sqlalchemy import types from sqlalchemy.engine.reflection import Inspector +from superset.constants import TimeGrain from superset.db_engine_specs.base import BaseEngineSpec from superset.errors import SupersetErrorType @@ -42,14 +43,14 @@ class DuckDBEngineSpec(BaseEngineSpec): _time_grain_expressions = { None: "{col}", - "PT1S": "DATE_TRUNC('second', {col})", - "PT1M": "DATE_TRUNC('minute', {col})", - "PT1H": "DATE_TRUNC('hour', {col})", - "P1D": "DATE_TRUNC('day', {col})", - "P1W": "DATE_TRUNC('week', {col})", - "P1M": "DATE_TRUNC('month', {col})", - "P3M": "DATE_TRUNC('quarter', {col})", - "P1Y": "DATE_TRUNC('year', {col})", + TimeGrain.SECOND: "DATE_TRUNC('second', {col})", + TimeGrain.MINUTE: "DATE_TRUNC('minute', {col})", + TimeGrain.HOUR: "DATE_TRUNC('hour', {col})", + TimeGrain.DAY: "DATE_TRUNC('day', {col})", + TimeGrain.WEEK: "DATE_TRUNC('week', {col})", + TimeGrain.MONTH: "DATE_TRUNC('month', {col})", + TimeGrain.QUARTER: "DATE_TRUNC('quarter', {col})", + TimeGrain.YEAR: "DATE_TRUNC('year', {col})", } custom_errors: dict[Pattern[str], tuple[str, SupersetErrorType, dict[str, Any]]] = { diff --git a/superset/db_engine_specs/dynamodb.py b/superset/db_engine_specs/dynamodb.py index 5f7a9e2b71e58..d8db741f770d1 100644 --- a/superset/db_engine_specs/dynamodb.py +++ b/superset/db_engine_specs/dynamodb.py @@ -19,6 +19,7 @@ from sqlalchemy import types +from superset.constants import TimeGrain from superset.db_engine_specs.base import BaseEngineSpec @@ -28,23 +29,23 @@ class DynamoDBEngineSpec(BaseEngineSpec): _time_grain_expressions = { None: "{col}", - "PT1S": "DATETIME(STRFTIME('%Y-%m-%dT%H:%M:%S', {col}))", - "PT1M": "DATETIME(STRFTIME('%Y-%m-%dT%H:%M:00', {col}))", - "PT1H": "DATETIME(STRFTIME('%Y-%m-%dT%H:00:00', {col}))", - "P1D": "DATETIME({col}, 'start of day')", - "P1W": "DATETIME({col}, 'start of day', -strftime('%w', {col}) || ' days')", - "P1M": "DATETIME({col}, 'start of month')", - "P3M": ( + TimeGrain.SECOND: "DATETIME(STRFTIME('%Y-%m-%dT%H:%M:%S', {col}))", + TimeGrain.MINUTE: "DATETIME(STRFTIME('%Y-%m-%dT%H:%M:00', {col}))", + TimeGrain.HOUR: "DATETIME(STRFTIME('%Y-%m-%dT%H:00:00', {col}))", + TimeGrain.DAY: "DATETIME({col}, 'start of day')", + TimeGrain.WEEK: "DATETIME({col}, 'start of day', -strftime('%w', {col}) || ' days')", + TimeGrain.MONTH: "DATETIME({col}, 'start of month')", + TimeGrain.QUARTER: ( "DATETIME({col}, 'start of month', " "printf('-%d month', (strftime('%m', {col}) - 1) % 3))" ), - "P1Y": "DATETIME({col}, 'start of year')", - "P1W/1970-01-03T00:00:00Z": "DATETIME({col}, 'start of day', 'weekday 6')", - "P1W/1970-01-04T00:00:00Z": "DATETIME({col}, 'start of day', 'weekday 0')", - "1969-12-28T00:00:00Z/P1W": ( + TimeGrain.YEAR: "DATETIME({col}, 'start of year')", + TimeGrain.WEEK_ENDING_SATURDAY: "DATETIME({col}, 'start of day', 'weekday 6')", + TimeGrain.WEEK_ENDING_SUNDAY: "DATETIME({col}, 'start of day', 'weekday 0')", + TimeGrain.WEEK_STARTING_SUNDAY: ( "DATETIME({col}, 'start of day', 'weekday 0', '-7 days')" ), - "1969-12-29T00:00:00Z/P1W": ( + TimeGrain.WEEK_STARTING_MONDAY: ( "DATETIME({col}, 'start of day', 'weekday 1', '-7 days')" ), } diff --git a/superset/db_engine_specs/elasticsearch.py b/superset/db_engine_specs/elasticsearch.py index d717c52bf592a..173302d58b526 100644 --- a/superset/db_engine_specs/elasticsearch.py +++ b/superset/db_engine_specs/elasticsearch.py @@ -21,6 +21,7 @@ from packaging.version import Version from sqlalchemy import types +from superset.constants import TimeGrain from superset.db_engine_specs.base import BaseEngineSpec from superset.db_engine_specs.exceptions import ( SupersetDBAPIDatabaseError, @@ -42,12 +43,12 @@ class ElasticSearchEngineSpec(BaseEngineSpec): # pylint: disable=abstract-metho _time_grain_expressions = { None: "{col}", - "PT1S": "HISTOGRAM({col}, INTERVAL 1 SECOND)", - "PT1M": "HISTOGRAM({col}, INTERVAL 1 MINUTE)", - "PT1H": "HISTOGRAM({col}, INTERVAL 1 HOUR)", - "P1D": "HISTOGRAM({col}, INTERVAL 1 DAY)", - "P1M": "HISTOGRAM({col}, INTERVAL 1 MONTH)", - "P1Y": "HISTOGRAM({col}, INTERVAL 1 YEAR)", + TimeGrain.SECOND: "HISTOGRAM({col}, INTERVAL 1 SECOND)", + TimeGrain.MINUTE: "HISTOGRAM({col}, INTERVAL 1 MINUTE)", + TimeGrain.HOUR: "HISTOGRAM({col}, INTERVAL 1 HOUR)", + TimeGrain.DAY: "HISTOGRAM({col}, INTERVAL 1 DAY)", + TimeGrain.MONTH: "HISTOGRAM({col}, INTERVAL 1 MONTH)", + TimeGrain.YEAR: "HISTOGRAM({col}, INTERVAL 1 YEAR)", } type_code_map: dict[int, str] = {} # loaded from get_datatype only if needed @@ -104,12 +105,12 @@ class OpenDistroEngineSpec(BaseEngineSpec): # pylint: disable=abstract-method _time_grain_expressions = { None: "{col}", - "PT1S": "date_format({col}, 'yyyy-MM-dd HH:mm:ss.000')", - "PT1M": "date_format({col}, 'yyyy-MM-dd HH:mm:00.000')", - "PT1H": "date_format({col}, 'yyyy-MM-dd HH:00:00.000')", - "P1D": "date_format({col}, 'yyyy-MM-dd 00:00:00.000')", - "P1M": "date_format({col}, 'yyyy-MM-01 00:00:00.000')", - "P1Y": "date_format({col}, 'yyyy-01-01 00:00:00.000')", + TimeGrain.SECOND: "date_format({col}, 'yyyy-MM-dd HH:mm:ss.000')", + TimeGrain.MINUTE: "date_format({col}, 'yyyy-MM-dd HH:mm:00.000')", + TimeGrain.HOUR: "date_format({col}, 'yyyy-MM-dd HH:00:00.000')", + TimeGrain.DAY: "date_format({col}, 'yyyy-MM-dd 00:00:00.000')", + TimeGrain.MONTH: "date_format({col}, 'yyyy-MM-01 00:00:00.000')", + TimeGrain.YEAR: "date_format({col}, 'yyyy-01-01 00:00:00.000')", } engine = "odelasticsearch" diff --git a/superset/db_engine_specs/exasol.py b/superset/db_engine_specs/exasol.py index 6da56e2feee8f..e5f8c011d1616 100644 --- a/superset/db_engine_specs/exasol.py +++ b/superset/db_engine_specs/exasol.py @@ -16,6 +16,7 @@ # under the License. from typing import Any, Optional +from superset.constants import TimeGrain from superset.db_engine_specs.base import BaseEngineSpec @@ -29,14 +30,14 @@ class ExasolEngineSpec(BaseEngineSpec): # pylint: disable=abstract-method # Exasol's DATE_TRUNC function is PostgresSQL compatible _time_grain_expressions = { None: "{col}", - "PT1S": "DATE_TRUNC('second', {col})", - "PT1M": "DATE_TRUNC('minute', {col})", - "PT1H": "DATE_TRUNC('hour', {col})", - "P1D": "DATE_TRUNC('day', {col})", - "P1W": "DATE_TRUNC('week', {col})", - "P1M": "DATE_TRUNC('month', {col})", - "P3M": "DATE_TRUNC('quarter', {col})", - "P1Y": "DATE_TRUNC('year', {col})", + TimeGrain.SECOND: "DATE_TRUNC('second', {col})", + TimeGrain.MINUTE: "DATE_TRUNC('minute', {col})", + TimeGrain.HOUR: "DATE_TRUNC('hour', {col})", + TimeGrain.DAY: "DATE_TRUNC('day', {col})", + TimeGrain.WEEK: "DATE_TRUNC('week', {col})", + TimeGrain.MONTH: "DATE_TRUNC('month', {col})", + TimeGrain.QUARTER: "DATE_TRUNC('quarter', {col})", + TimeGrain.YEAR: "DATE_TRUNC('year', {col})", } @classmethod diff --git a/superset/db_engine_specs/firebird.py b/superset/db_engine_specs/firebird.py index 4448074157073..15c4bef7bf417 100644 --- a/superset/db_engine_specs/firebird.py +++ b/superset/db_engine_specs/firebird.py @@ -19,6 +19,7 @@ from sqlalchemy import types +from superset.constants import TimeGrain from superset.db_engine_specs.base import BaseEngineSpec, LimitMethod @@ -33,7 +34,7 @@ class FirebirdEngineSpec(BaseEngineSpec): _time_grain_expressions = { None: "{col}", - "PT1S": ( + TimeGrain.SECOND: ( "CAST(CAST({col} AS DATE) " "|| ' ' " "|| EXTRACT(HOUR FROM {col}) " @@ -42,7 +43,7 @@ class FirebirdEngineSpec(BaseEngineSpec): "|| ':' " "|| FLOOR(EXTRACT(SECOND FROM {col})) AS TIMESTAMP)" ), - "PT1M": ( + TimeGrain.MINUTE: ( "CAST(CAST({col} AS DATE) " "|| ' ' " "|| EXTRACT(HOUR FROM {col}) " @@ -50,20 +51,20 @@ class FirebirdEngineSpec(BaseEngineSpec): "|| EXTRACT(MINUTE FROM {col}) " "|| ':00' AS TIMESTAMP)" ), - "PT1H": ( + TimeGrain.HOUR: ( "CAST(CAST({col} AS DATE) " "|| ' ' " "|| EXTRACT(HOUR FROM {col}) " "|| ':00:00' AS TIMESTAMP)" ), - "P1D": "CAST({col} AS DATE)", - "P1M": ( + TimeGrain.DAY: "CAST({col} AS DATE)", + TimeGrain.MONTH: ( "CAST(EXTRACT(YEAR FROM {col}) " "|| '-' " "|| EXTRACT(MONTH FROM {col}) " "|| '-01' AS DATE)" ), - "P1Y": "CAST(EXTRACT(YEAR FROM {col}) || '-01-01' AS DATE)", + TimeGrain.YEAR: "CAST(EXTRACT(YEAR FROM {col}) || '-01-01' AS DATE)", } @classmethod diff --git a/superset/db_engine_specs/firebolt.py b/superset/db_engine_specs/firebolt.py index ace3d6b3b232e..13ab727ab099c 100644 --- a/superset/db_engine_specs/firebolt.py +++ b/superset/db_engine_specs/firebolt.py @@ -19,6 +19,7 @@ from sqlalchemy import types +from superset.constants import TimeGrain from superset.db_engine_specs.base import BaseEngineSpec @@ -31,14 +32,14 @@ class FireboltEngineSpec(BaseEngineSpec): _time_grain_expressions = { None: "{col}", - "PT1S": "date_trunc('second', CAST({col} AS TIMESTAMP))", - "PT1M": "date_trunc('minute', CAST({col} AS TIMESTAMP))", - "PT1H": "date_trunc('hour', CAST({col} AS TIMESTAMP))", - "P1D": "date_trunc('day', CAST({col} AS TIMESTAMP))", - "P1W": "date_trunc('week', CAST({col} AS TIMESTAMP))", - "P1M": "date_trunc('month', CAST({col} AS TIMESTAMP))", - "P3M": "date_trunc('quarter', CAST({col} AS TIMESTAMP))", - "P1Y": "date_trunc('year', CAST({col} AS TIMESTAMP))", + TimeGrain.SECOND: "date_trunc('second', CAST({col} AS TIMESTAMP))", + TimeGrain.MINUTE: "date_trunc('minute', CAST({col} AS TIMESTAMP))", + TimeGrain.HOUR: "date_trunc('hour', CAST({col} AS TIMESTAMP))", + TimeGrain.DAY: "date_trunc('day', CAST({col} AS TIMESTAMP))", + TimeGrain.WEEK: "date_trunc('week', CAST({col} AS TIMESTAMP))", + TimeGrain.MONTH: "date_trunc('month', CAST({col} AS TIMESTAMP))", + TimeGrain.QUARTER: "date_trunc('quarter', CAST({col} AS TIMESTAMP))", + TimeGrain.YEAR: "date_trunc('year', CAST({col} AS TIMESTAMP))", } @classmethod diff --git a/superset/db_engine_specs/hana.py b/superset/db_engine_specs/hana.py index 108838f9d2a8d..da05fba847d24 100644 --- a/superset/db_engine_specs/hana.py +++ b/superset/db_engine_specs/hana.py @@ -19,6 +19,7 @@ from sqlalchemy import types +from superset.constants import TimeGrain from superset.db_engine_specs.base import LimitMethod from superset.db_engine_specs.postgres import PostgresBaseEngineSpec @@ -32,15 +33,15 @@ class HanaEngineSpec(PostgresBaseEngineSpec): _time_grain_expressions = { None: "{col}", - "PT1S": "TO_TIMESTAMP(SUBSTRING(TO_TIMESTAMP({col}),0,20))", - "PT1M": "TO_TIMESTAMP(SUBSTRING(TO_TIMESTAMP({col}),0,17) || '00')", - "PT1H": "TO_TIMESTAMP(SUBSTRING(TO_TIMESTAMP({col}),0,14) || '00:00')", - "P1D": "TO_DATE({col})", - "P1M": "TO_DATE(SUBSTRING(TO_DATE({col}),0,7)||'-01')", - "P3M": "TO_DATE(SUBSTRING( \ + TimeGrain.SECOND: "TO_TIMESTAMP(SUBSTRING(TO_TIMESTAMP({col}),0,20))", + TimeGrain.MINUTE: "TO_TIMESTAMP(SUBSTRING(TO_TIMESTAMP({col}),0,17) || '00')", + TimeGrain.HOUR: "TO_TIMESTAMP(SUBSTRING(TO_TIMESTAMP({col}),0,14) || '00:00')", + TimeGrain.DAY: "TO_DATE({col})", + TimeGrain.MONTH: "TO_DATE(SUBSTRING(TO_DATE({col}),0,7)||'-01')", + TimeGrain.QUARTER: "TO_DATE(SUBSTRING( \ TO_DATE({col}), 0, 5)|| LPAD(CAST((CAST(SUBSTRING(QUARTER( \ TO_DATE({col}), 1), 7, 1) as int)-1)*3 +1 as text),2,'0') ||'-01')", - "P1Y": "TO_DATE(YEAR({col})||'-01-01')", + TimeGrain.YEAR: "TO_DATE(YEAR({col})||'-01-01')", } @classmethod diff --git a/superset/db_engine_specs/hive.py b/superset/db_engine_specs/hive.py index 7601ebb2cddf5..d7c2465badceb 100644 --- a/superset/db_engine_specs/hive.py +++ b/superset/db_engine_specs/hive.py @@ -38,6 +38,7 @@ from sqlalchemy.sql.expression import ColumnClause, Select from superset.common.db_query_status import QueryStatus +from superset.constants import TimeGrain from superset.databases.utils import make_url_safe from superset.db_engine_specs.base import BaseEngineSpec from superset.db_engine_specs.presto import PrestoEngineSpec @@ -107,16 +108,16 @@ class HiveEngineSpec(PrestoEngineSpec): # pylint: disable=line-too-long _time_grain_expressions = { None: "{col}", - "PT1S": "from_unixtime(unix_timestamp({col}), 'yyyy-MM-dd HH:mm:ss')", - "PT1M": "from_unixtime(unix_timestamp({col}), 'yyyy-MM-dd HH:mm:00')", - "PT1H": "from_unixtime(unix_timestamp({col}), 'yyyy-MM-dd HH:00:00')", - "P1D": "from_unixtime(unix_timestamp({col}), 'yyyy-MM-dd 00:00:00')", - "P1W": "date_format(date_sub({col}, CAST(7-from_unixtime(unix_timestamp({col}),'u') as int)), 'yyyy-MM-dd 00:00:00')", - "P1M": "from_unixtime(unix_timestamp({col}), 'yyyy-MM-01 00:00:00')", - "P3M": "date_format(add_months(trunc({col}, 'MM'), -(month({col})-1)%3), 'yyyy-MM-dd 00:00:00')", - "P1Y": "from_unixtime(unix_timestamp({col}), 'yyyy-01-01 00:00:00')", - "P1W/1970-01-03T00:00:00Z": "date_format(date_add({col}, INT(6-from_unixtime(unix_timestamp({col}), 'u'))), 'yyyy-MM-dd 00:00:00')", - "1969-12-28T00:00:00Z/P1W": "date_format(date_add({col}, -INT(from_unixtime(unix_timestamp({col}), 'u'))), 'yyyy-MM-dd 00:00:00')", + TimeGrain.SECOND: "from_unixtime(unix_timestamp({col}), 'yyyy-MM-dd HH:mm:ss')", + TimeGrain.MINUTE: "from_unixtime(unix_timestamp({col}), 'yyyy-MM-dd HH:mm:00')", + TimeGrain.HOUR: "from_unixtime(unix_timestamp({col}), 'yyyy-MM-dd HH:00:00')", + TimeGrain.DAY: "from_unixtime(unix_timestamp({col}), 'yyyy-MM-dd 00:00:00')", + TimeGrain.WEEK: "date_format(date_sub({col}, CAST(7-from_unixtime(unix_timestamp({col}),'u') as int)), 'yyyy-MM-dd 00:00:00')", + TimeGrain.MONTH: "from_unixtime(unix_timestamp({col}), 'yyyy-MM-01 00:00:00')", + TimeGrain.QUARTER: "date_format(add_months(trunc({col}, 'MM'), -(month({col})-1)%3), 'yyyy-MM-dd 00:00:00')", + TimeGrain.YEAR: "from_unixtime(unix_timestamp({col}), 'yyyy-01-01 00:00:00')", + TimeGrain.WEEK_ENDING_SATURDAY: "date_format(date_add({col}, INT(6-from_unixtime(unix_timestamp({col}), 'u'))), 'yyyy-MM-dd 00:00:00')", + TimeGrain.WEEK_STARTING_SUNDAY: "date_format(date_add({col}, -INT(from_unixtime(unix_timestamp({col}), 'u'))), 'yyyy-MM-dd 00:00:00')", } # Scoping regex at class level to avoid recompiling diff --git a/superset/db_engine_specs/impala.py b/superset/db_engine_specs/impala.py index cd1c9e47329e2..c10cf679355ea 100644 --- a/superset/db_engine_specs/impala.py +++ b/superset/db_engine_specs/impala.py @@ -25,7 +25,7 @@ from sqlalchemy.engine.reflection import Inspector from sqlalchemy.orm import Session -from superset.constants import QUERY_EARLY_CANCEL_KEY +from superset.constants import QUERY_EARLY_CANCEL_KEY, TimeGrain from superset.db_engine_specs.base import BaseEngineSpec from superset.models.sql_lab import Query @@ -42,13 +42,13 @@ class ImpalaEngineSpec(BaseEngineSpec): _time_grain_expressions = { None: "{col}", - "PT1M": "TRUNC({col}, 'MI')", - "PT1H": "TRUNC({col}, 'HH')", - "P1D": "TRUNC({col}, 'DD')", - "P1W": "TRUNC({col}, 'WW')", - "P1M": "TRUNC({col}, 'MONTH')", - "P3M": "TRUNC({col}, 'Q')", - "P1Y": "TRUNC({col}, 'YYYY')", + TimeGrain.MINUTE: "TRUNC({col}, 'MI')", + TimeGrain.HOUR: "TRUNC({col}, 'HH')", + TimeGrain.DAY: "TRUNC({col}, 'DD')", + TimeGrain.WEEK: "TRUNC({col}, 'WW')", + TimeGrain.MONTH: "TRUNC({col}, 'MONTH')", + TimeGrain.QUARTER: "TRUNC({col}, 'Q')", + TimeGrain.YEAR: "TRUNC({col}, 'YYYY')", } @classmethod diff --git a/superset/db_engine_specs/kusto.py b/superset/db_engine_specs/kusto.py index 17147d5cc059f..65d48f789e539 100644 --- a/superset/db_engine_specs/kusto.py +++ b/superset/db_engine_specs/kusto.py @@ -21,6 +21,7 @@ from sqlalchemy import types from sqlalchemy.dialects.mssql.base import SMALLDATETIME +from superset.constants import TimeGrain from superset.db_engine_specs.base import BaseEngineSpec, LimitMethod from superset.db_engine_specs.exceptions import ( SupersetDBAPIDatabaseError, @@ -43,21 +44,21 @@ class KustoSqlEngineSpec(BaseEngineSpec): # pylint: disable=abstract-method _time_grain_expressions = { None: "{col}", - "PT1S": "DATEADD(second, DATEDIFF(second, '2000-01-01', {col}), '2000-01-01')", - "PT1M": "DATEADD(minute, DATEDIFF(minute, 0, {col}), 0)", - "PT5M": "DATEADD(minute, DATEDIFF(minute, 0, {col}) / 5 * 5, 0)", - "PT10M": "DATEADD(minute, DATEDIFF(minute, 0, {col}) / 10 * 10, 0)", - "PT15M": "DATEADD(minute, DATEDIFF(minute, 0, {col}) / 15 * 15, 0)", - "PT0.5H": "DATEADD(minute, DATEDIFF(minute, 0, {col}) / 30 * 30, 0)", - "PT1H": "DATEADD(hour, DATEDIFF(hour, 0, {col}), 0)", - "P1D": "DATEADD(day, DATEDIFF(day, 0, {col}), 0)", - "P1W": "DATEADD(day, -1, DATEADD(week, DATEDIFF(week, 0, {col}), 0))", - "P1M": "DATEADD(month, DATEDIFF(month, 0, {col}), 0)", - "P3M": "DATEADD(quarter, DATEDIFF(quarter, 0, {col}), 0)", - "P1Y": "DATEADD(year, DATEDIFF(year, 0, {col}), 0)", - "1969-12-28T00:00:00Z/P1W": "DATEADD(day, -1," + TimeGrain.SECOND: "DATEADD(second, DATEDIFF(second, '2000-01-01', {col}), '2000-01-01')", + TimeGrain.MINUTE: "DATEADD(minute, DATEDIFF(minute, 0, {col}), 0)", + TimeGrain.FIVE_MINUTES: "DATEADD(minute, DATEDIFF(minute, 0, {col}) / 5 * 5, 0)", + TimeGrain.TEN_MINUTES: "DATEADD(minute, DATEDIFF(minute, 0, {col}) / 10 * 10, 0)", + TimeGrain.FIFTEEN_MINUTES: "DATEADD(minute, DATEDIFF(minute, 0, {col}) / 15 * 15, 0)", + TimeGrain.HALF_HOUR: "DATEADD(minute, DATEDIFF(minute, 0, {col}) / 30 * 30, 0)", + TimeGrain.HOUR: "DATEADD(hour, DATEDIFF(hour, 0, {col}), 0)", + TimeGrain.DAY: "DATEADD(day, DATEDIFF(day, 0, {col}), 0)", + TimeGrain.WEEK: "DATEADD(day, -1, DATEADD(week, DATEDIFF(week, 0, {col}), 0))", + TimeGrain.MONTH: "DATEADD(month, DATEDIFF(month, 0, {col}), 0)", + TimeGrain.QUARTER: "DATEADD(quarter, DATEDIFF(quarter, 0, {col}), 0)", + TimeGrain.YEAR: "DATEADD(year, DATEDIFF(year, 0, {col}), 0)", + TimeGrain.WEEK_STARTING_SUNDAY: "DATEADD(day, -1," " DATEADD(week, DATEDIFF(week, 0, {col}), 0))", - "1969-12-29T00:00:00Z/P1W": "DATEADD(week," + TimeGrain.WEEK_STARTING_MONDAY: "DATEADD(week," " DATEDIFF(week, 0, DATEADD(day, -1, {col})), 0)", } @@ -120,12 +121,12 @@ class KustoKqlEngineSpec(BaseEngineSpec): # pylint: disable=abstract-method _time_grain_expressions = { None: "{col}", - "PT1S": "{col}/ time(1s)", - "PT1M": "{col}/ time(1min)", - "PT1H": "{col}/ time(1h)", - "P1D": "{col}/ time(1d)", - "P1M": "datetime_diff('month',CreateDate, datetime(0001-01-01 00:00:00))+1", - "P1Y": "datetime_diff('year',CreateDate, datetime(0001-01-01 00:00:00))+1", + TimeGrain.SECOND: "{col}/ time(1s)", + TimeGrain.MINUTE: "{col}/ time(1min)", + TimeGrain.HOUR: "{col}/ time(1h)", + TimeGrain.DAY: "{col}/ time(1d)", + TimeGrain.MONTH: "datetime_diff('month',CreateDate, datetime(0001-01-01 00:00:00))+1", + TimeGrain.YEAR: "datetime_diff('year',CreateDate, datetime(0001-01-01 00:00:00))+1", } type_code_map: dict[int, str] = {} # loaded from get_datatype only if needed diff --git a/superset/db_engine_specs/kylin.py b/superset/db_engine_specs/kylin.py index f522602a48e7a..34851500316ac 100644 --- a/superset/db_engine_specs/kylin.py +++ b/superset/db_engine_specs/kylin.py @@ -19,6 +19,7 @@ from sqlalchemy import types +from superset.constants import TimeGrain from superset.db_engine_specs.base import BaseEngineSpec @@ -30,14 +31,14 @@ class KylinEngineSpec(BaseEngineSpec): # pylint: disable=abstract-method _time_grain_expressions = { None: "{col}", - "PT1S": "CAST(FLOOR(CAST({col} AS TIMESTAMP) TO SECOND) AS TIMESTAMP)", - "PT1M": "CAST(FLOOR(CAST({col} AS TIMESTAMP) TO MINUTE) AS TIMESTAMP)", - "PT1H": "CAST(FLOOR(CAST({col} AS TIMESTAMP) TO HOUR) AS TIMESTAMP)", - "P1D": "CAST(FLOOR(CAST({col} AS TIMESTAMP) TO DAY) AS DATE)", - "P1W": "CAST(FLOOR(CAST({col} AS TIMESTAMP) TO WEEK) AS DATE)", - "P1M": "CAST(FLOOR(CAST({col} AS TIMESTAMP) TO MONTH) AS DATE)", - "P3M": "CAST(FLOOR(CAST({col} AS TIMESTAMP) TO QUARTER) AS DATE)", - "P1Y": "CAST(FLOOR(CAST({col} AS TIMESTAMP) TO YEAR) AS DATE)", + TimeGrain.SECOND: "CAST(FLOOR(CAST({col} AS TIMESTAMP) TO SECOND) AS TIMESTAMP)", + TimeGrain.MINUTE: "CAST(FLOOR(CAST({col} AS TIMESTAMP) TO MINUTE) AS TIMESTAMP)", + TimeGrain.HOUR: "CAST(FLOOR(CAST({col} AS TIMESTAMP) TO HOUR) AS TIMESTAMP)", + TimeGrain.DAY: "CAST(FLOOR(CAST({col} AS TIMESTAMP) TO DAY) AS DATE)", + TimeGrain.WEEK: "CAST(FLOOR(CAST({col} AS TIMESTAMP) TO WEEK) AS DATE)", + TimeGrain.MONTH: "CAST(FLOOR(CAST({col} AS TIMESTAMP) TO MONTH) AS DATE)", + TimeGrain.QUARTER: "CAST(FLOOR(CAST({col} AS TIMESTAMP) TO QUARTER) AS DATE)", + TimeGrain.YEAR: "CAST(FLOOR(CAST({col} AS TIMESTAMP) TO YEAR) AS DATE)", } @classmethod diff --git a/superset/db_engine_specs/mssql.py b/superset/db_engine_specs/mssql.py index 3e0879b90415c..9a4d2c5edbc72 100644 --- a/superset/db_engine_specs/mssql.py +++ b/superset/db_engine_specs/mssql.py @@ -24,6 +24,7 @@ from sqlalchemy import types from sqlalchemy.dialects.mssql.base import SMALLDATETIME +from superset.constants import TimeGrain from superset.db_engine_specs.base import BaseEngineSpec, LimitMethod from superset.errors import SupersetErrorType from superset.utils.core import GenericDataType @@ -55,24 +56,25 @@ class MssqlEngineSpec(BaseEngineSpec): _time_grain_expressions = { None: "{col}", - "PT1S": "DATEADD(SECOND, DATEDIFF(SECOND, '2000-01-01', {col}), '2000-01-01')", - "PT1M": "DATEADD(MINUTE, DATEDIFF(MINUTE, 0, {col}), 0)", - "PT5M": "DATEADD(MINUTE, DATEDIFF(MINUTE, 0, {col}) / 5 * 5, 0)", - "PT10M": "DATEADD(MINUTE, DATEDIFF(MINUTE, 0, {col}) / 10 * 10, 0)", - "PT15M": "DATEADD(MINUTE, DATEDIFF(MINUTE, 0, {col}) / 15 * 15, 0)", - "PT30M": "DATEADD(MINUTE, DATEDIFF(MINUTE, 0, {col}) / 30 * 30, 0)", - "PT1H": "DATEADD(HOUR, DATEDIFF(HOUR, 0, {col}), 0)", - "P1D": "DATEADD(DAY, DATEDIFF(DAY, 0, {col}), 0)", - "P1W": "DATEADD(DAY, 1 - DATEPART(WEEKDAY, {col})," + TimeGrain.SECOND: "DATEADD(SECOND, DATEDIFF(SECOND, '2000-01-01', {col}), '2000-01-01')", + TimeGrain.MINUTE: "DATEADD(MINUTE, DATEDIFF(MINUTE, 0, {col}), 0)", + TimeGrain.FIVE_MINUTES: "DATEADD(MINUTE, DATEDIFF(MINUTE, 0, {col}) / 5 * 5, 0)", + TimeGrain.TEN_MINUTES: "DATEADD(MINUTE, DATEDIFF(MINUTE, 0, {col}) / 10 * 10, 0)", + TimeGrain.FIFTEEN_MINUTES: "DATEADD(MINUTE, DATEDIFF(MINUTE, 0, {col}) / 15 * 15, 0)", + TimeGrain.THIRTY_MINUTES: "DATEADD(MINUTE, DATEDIFF(MINUTE, 0, {col}) / 30 * 30, 0)", + TimeGrain.HOUR: "DATEADD(HOUR, DATEDIFF(HOUR, 0, {col}), 0)", + TimeGrain.DAY: "DATEADD(DAY, DATEDIFF(DAY, 0, {col}), 0)", + TimeGrain.WEEK: "DATEADD(DAY, 1 - DATEPART(WEEKDAY, {col})," " DATEADD(DAY, DATEDIFF(DAY, 0, {col}), 0))", - "P1M": "DATEADD(MONTH, DATEDIFF(MONTH, 0, {col}), 0)", - "P3M": "DATEADD(QUARTER, DATEDIFF(QUARTER, 0, {col}), 0)", - "P1Y": "DATEADD(YEAR, DATEDIFF(YEAR, 0, {col}), 0)", - "1969-12-28T00:00:00Z/P1W": "DATEADD(DAY, -1," + TimeGrain.MONTH: "DATEADD(MONTH, DATEDIFF(MONTH, 0, {col}), 0)", + TimeGrain.QUARTER: "DATEADD(QUARTER, DATEDIFF(QUARTER, 0, {col}), 0)", + TimeGrain.YEAR: "DATEADD(YEAR, DATEDIFF(YEAR, 0, {col}), 0)", + TimeGrain.WEEK_STARTING_SUNDAY: "DATEADD(DAY, -1," " DATEADD(WEEK, DATEDIFF(WEEK, 0, {col}), 0))", - "1969-12-29T00:00:00Z/P1W": "DATEADD(WEEK," + TimeGrain.WEEK_STARTING_MONDAY: "DATEADD(WEEK," " DATEDIFF(WEEK, 0, DATEADD(DAY, -1, {col})), 0)", } + column_type_mappings = ( ( re.compile(r"^smalldatetime.*", re.IGNORECASE), diff --git a/superset/db_engine_specs/mysql.py b/superset/db_engine_specs/mysql.py index 9f853d577c30b..e83e53e426143 100644 --- a/superset/db_engine_specs/mysql.py +++ b/superset/db_engine_specs/mysql.py @@ -36,6 +36,7 @@ ) from sqlalchemy.engine.url import URL +from superset.constants import TimeGrain from superset.db_engine_specs.base import BaseEngineSpec, BasicParametersMixin from superset.errors import SupersetErrorType from superset.models.sql_lab import Query @@ -127,19 +128,19 @@ class MySQLEngineSpec(BaseEngineSpec, BasicParametersMixin): _time_grain_expressions = { None: "{col}", - "PT1S": "DATE_ADD(DATE({col}), " + TimeGrain.SECOND: "DATE_ADD(DATE({col}), " "INTERVAL (HOUR({col})*60*60 + MINUTE({col})*60" " + SECOND({col})) SECOND)", - "PT1M": "DATE_ADD(DATE({col}), " + TimeGrain.MINUTE: "DATE_ADD(DATE({col}), " "INTERVAL (HOUR({col})*60 + MINUTE({col})) MINUTE)", - "PT1H": "DATE_ADD(DATE({col}), INTERVAL HOUR({col}) HOUR)", - "P1D": "DATE({col})", - "P1W": "DATE(DATE_SUB({col}, INTERVAL DAYOFWEEK({col}) - 1 DAY))", - "P1M": "DATE(DATE_SUB({col}, INTERVAL DAYOFMONTH({col}) - 1 DAY))", - "P3M": "MAKEDATE(YEAR({col}), 1) " + TimeGrain.HOUR: "DATE_ADD(DATE({col}), INTERVAL HOUR({col}) HOUR)", + TimeGrain.DAY: "DATE({col})", + TimeGrain.WEEK: "DATE(DATE_SUB({col}, INTERVAL DAYOFWEEK({col}) - 1 DAY))", + TimeGrain.MONTH: "DATE(DATE_SUB({col}, INTERVAL DAYOFMONTH({col}) - 1 DAY))", + TimeGrain.QUARTER: "MAKEDATE(YEAR({col}), 1) " "+ INTERVAL QUARTER({col}) QUARTER - INTERVAL 1 QUARTER", - "P1Y": "DATE(DATE_SUB({col}, INTERVAL DAYOFYEAR({col}) - 1 DAY))", - "1969-12-29T00:00:00Z/P1W": "DATE(DATE_SUB({col}, " + TimeGrain.YEAR: "DATE(DATE_SUB({col}, INTERVAL DAYOFYEAR({col}) - 1 DAY))", + TimeGrain.WEEK_STARTING_MONDAY: "DATE(DATE_SUB({col}, " "INTERVAL DAYOFWEEK(DATE_SUB({col}, " "INTERVAL 1 DAY)) - 1 DAY))", } diff --git a/superset/db_engine_specs/netezza.py b/superset/db_engine_specs/netezza.py index 6f336b96bc3f7..66b7eeeea6f40 100644 --- a/superset/db_engine_specs/netezza.py +++ b/superset/db_engine_specs/netezza.py @@ -14,6 +14,7 @@ # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. +from superset.constants import TimeGrain from superset.db_engine_specs.postgres import PostgresBaseEngineSpec @@ -24,14 +25,14 @@ class NetezzaEngineSpec(PostgresBaseEngineSpec): _time_grain_expressions = { None: "{col}", - "PT1S": "DATE_TRUNC('second', {col})", - "PT1M": "DATE_TRUNC('minute', {col})", - "PT1H": "DATE_TRUNC('hour', {col})", - "P1D": "DATE_TRUNC('day', {col})", - "P1W": "DATE_TRUNC('week', {col})", - "P1M": "DATE_TRUNC('month', {col})", - "P3M": "DATE_TRUNC('quarter', {col})", - "P1Y": "DATE_TRUNC('year', {col})", + TimeGrain.SECOND: "DATE_TRUNC('second', {col})", + TimeGrain.MINUTE: "DATE_TRUNC('minute', {col})", + TimeGrain.HOUR: "DATE_TRUNC('hour', {col})", + TimeGrain.DAY: "DATE_TRUNC('day', {col})", + TimeGrain.WEEK: "DATE_TRUNC('week', {col})", + TimeGrain.MONTH: "DATE_TRUNC('month', {col})", + TimeGrain.QUARTER: "DATE_TRUNC('quarter', {col})", + TimeGrain.YEAR: "DATE_TRUNC('year', {col})", } @classmethod diff --git a/superset/db_engine_specs/ocient.py b/superset/db_engine_specs/ocient.py index 59fa52a656a7e..f17032a08d155 100644 --- a/superset/db_engine_specs/ocient.py +++ b/superset/db_engine_specs/ocient.py @@ -38,6 +38,7 @@ except (ImportError, RuntimeError): pass +from superset.constants import TimeGrain from superset.db_engine_specs.base import BaseEngineSpec from superset.errors import SupersetErrorType from superset.models.core import Database @@ -295,14 +296,14 @@ class OcientEngineSpec(BaseEngineSpec): } _time_grain_expressions = { None: "{col}", - "PT1S": "ROUND({col}, 'SECOND')", - "PT1M": "ROUND({col}, 'MINUTE')", - "PT1H": "ROUND({col}, 'HOUR')", - "P1D": "ROUND({col}, 'DAY')", - "P1W": "ROUND({col}, 'WEEK')", - "P1M": "ROUND({col}, 'MONTH')", - "P0.25Y": "ROUND({col}, 'QUARTER')", - "P1Y": "ROUND({col}, 'YEAR')", + TimeGrain.SECOND: "ROUND({col}, 'SECOND')", + TimeGrain.MINUTE: "ROUND({col}, 'MINUTE')", + TimeGrain.HOUR: "ROUND({col}, 'HOUR')", + TimeGrain.DAY: "ROUND({col}, 'DAY')", + TimeGrain.WEEK: "ROUND({col}, 'WEEK')", + TimeGrain.MONTH: "ROUND({col}, 'MONTH')", + TimeGrain.QUARTER_YEAR: "ROUND({col}, 'QUARTER')", + TimeGrain.YEAR: "ROUND({col}, 'YEAR')", } @classmethod diff --git a/superset/db_engine_specs/oracle.py b/superset/db_engine_specs/oracle.py index 1199b74406d2a..b98ab8ed59e62 100644 --- a/superset/db_engine_specs/oracle.py +++ b/superset/db_engine_specs/oracle.py @@ -19,6 +19,7 @@ from sqlalchemy import types +from superset.constants import TimeGrain from superset.db_engine_specs.base import BaseEngineSpec, LimitMethod @@ -31,14 +32,14 @@ class OracleEngineSpec(BaseEngineSpec): _time_grain_expressions = { None: "{col}", - "PT1S": "CAST({col} as DATE)", - "PT1M": "TRUNC(CAST({col} as DATE), 'MI')", - "PT1H": "TRUNC(CAST({col} as DATE), 'HH')", - "P1D": "TRUNC(CAST({col} as DATE), 'DDD')", - "P1W": "TRUNC(CAST({col} as DATE), 'WW')", - "P1M": "TRUNC(CAST({col} as DATE), 'MONTH')", - "P3M": "TRUNC(CAST({col} as DATE), 'Q')", - "P1Y": "TRUNC(CAST({col} as DATE), 'YEAR')", + TimeGrain.SECOND: "CAST({col} as DATE)", + TimeGrain.MINUTE: "TRUNC(CAST({col} as DATE), 'MI')", + TimeGrain.HOUR: "TRUNC(CAST({col} as DATE), 'HH')", + TimeGrain.DAY: "TRUNC(CAST({col} as DATE), 'DDD')", + TimeGrain.WEEK: "TRUNC(CAST({col} as DATE), 'WW')", + TimeGrain.MONTH: "TRUNC(CAST({col} as DATE), 'MONTH')", + TimeGrain.QUARTER: "TRUNC(CAST({col} as DATE), 'Q')", + TimeGrain.YEAR: "TRUNC(CAST({col} as DATE), 'YEAR')", } @classmethod diff --git a/superset/db_engine_specs/pinot.py b/superset/db_engine_specs/pinot.py index bfec8b294716d..a0662366d1c6e 100644 --- a/superset/db_engine_specs/pinot.py +++ b/superset/db_engine_specs/pinot.py @@ -18,6 +18,7 @@ from sqlalchemy.sql.expression import ColumnClause +from superset.constants import TimeGrain from superset.db_engine_specs.base import BaseEngineSpec, TimestampExpression @@ -30,19 +31,19 @@ class PinotEngineSpec(BaseEngineSpec): # pylint: disable=abstract-method allows_alias_in_orderby = False # Pinot does its own conversion below - _time_grain_expressions: dict[Optional[str], str] = { - "PT1S": "1:SECONDS", - "PT1M": "1:MINUTES", - "PT5M": "5:MINUTES", - "PT10M": "10:MINUTES", - "PT15M": "15:MINUTES", - "PT30M": "30:MINUTES", - "PT1H": "1:HOURS", - "P1D": "1:DAYS", - "P1W": "week", - "P1M": "month", - "P3MY": "quarter", - "P1Y": "year", + _time_grain_expressions = { + TimeGrain.SECOND: "1:SECONDS", + TimeGrain.MINUTE: "1:MINUTES", + TimeGrain.FIVE_MINUTES: "5:MINUTES", + TimeGrain.TEN_MINUTES: "10:MINUTES", + TimeGrain.FIFTEEN_MINUTES: "15:MINUTES", + TimeGrain.THIRTY_MINUTES: "30:MINUTES", + TimeGrain.HOUR: "1:HOURS", + TimeGrain.DAY: "1:DAYS", + TimeGrain.WEEK: "week", + TimeGrain.MONTH: "month", + TimeGrain.QUARTER: "quarter", + TimeGrain.YEAR: "year", } _python_to_java_time_patterns: dict[str, str] = { @@ -55,18 +56,18 @@ class PinotEngineSpec(BaseEngineSpec): # pylint: disable=abstract-method } _use_date_trunc_function: dict[str, bool] = { - "PT1S": False, - "PT1M": False, - "PT5M": False, - "PT10M": False, - "PT15M": False, - "PT30M": False, - "PT1H": False, - "P1D": False, - "P1W": True, - "P1M": True, - "P3M": True, - "P1Y": True, + TimeGrain.SECOND: False, + TimeGrain.MINUTE: False, + TimeGrain.FIVE_MINUTES: False, + TimeGrain.TEN_MINUTES: False, + TimeGrain.FIFTEEN_MINUTES: False, + TimeGrain.THIRTY_MINUTES: False, + TimeGrain.HOUR: False, + TimeGrain.DAY: False, + TimeGrain.WEEK: True, + TimeGrain.MONTH: True, + TimeGrain.QUARTER: True, + TimeGrain.YEAR: True, } @classmethod diff --git a/superset/db_engine_specs/postgres.py b/superset/db_engine_specs/postgres.py index 2088782f83bae..cdd71fdfccbcc 100644 --- a/superset/db_engine_specs/postgres.py +++ b/superset/db_engine_specs/postgres.py @@ -28,6 +28,7 @@ from sqlalchemy.engine.url import URL from sqlalchemy.types import Date, DateTime, String +from superset.constants import TimeGrain from superset.db_engine_specs.base import BaseEngineSpec, BasicParametersMixin from superset.errors import SupersetErrorType from superset.exceptions import SupersetException @@ -100,14 +101,14 @@ class PostgresBaseEngineSpec(BaseEngineSpec): _time_grain_expressions = { None: "{col}", - "PT1S": "DATE_TRUNC('second', {col})", - "PT1M": "DATE_TRUNC('minute', {col})", - "PT1H": "DATE_TRUNC('hour', {col})", - "P1D": "DATE_TRUNC('day', {col})", - "P1W": "DATE_TRUNC('week', {col})", - "P1M": "DATE_TRUNC('month', {col})", - "P3M": "DATE_TRUNC('quarter', {col})", - "P1Y": "DATE_TRUNC('year', {col})", + TimeGrain.SECOND: "DATE_TRUNC('second', {col})", + TimeGrain.MINUTE: "DATE_TRUNC('minute', {col})", + TimeGrain.HOUR: "DATE_TRUNC('hour', {col})", + TimeGrain.DAY: "DATE_TRUNC('day', {col})", + TimeGrain.WEEK: "DATE_TRUNC('week', {col})", + TimeGrain.MONTH: "DATE_TRUNC('month', {col})", + TimeGrain.QUARTER: "DATE_TRUNC('quarter', {col})", + TimeGrain.YEAR: "DATE_TRUNC('year', {col})", } custom_errors: dict[Pattern[str], tuple[str, SupersetErrorType, dict[str, Any]]] = { diff --git a/superset/db_engine_specs/presto.py b/superset/db_engine_specs/presto.py index d5a2ab7605517..861e822345028 100644 --- a/superset/db_engine_specs/presto.py +++ b/superset/db_engine_specs/presto.py @@ -43,6 +43,7 @@ from superset import cache_manager, is_feature_enabled from superset.common.db_query_status import QueryStatus +from superset.constants import TimeGrain from superset.databases.utils import make_url_safe from superset.db_engine_specs.base import BaseEngineSpec from superset.errors import SupersetErrorType @@ -246,22 +247,18 @@ class PrestoBaseEngineSpec(BaseEngineSpec, metaclass=ABCMeta): # pylint: disable=line-too-long _time_grain_expressions = { None: "{col}", - "PT1S": "date_trunc('second', CAST({col} AS TIMESTAMP))", - "PT1M": "date_trunc('minute', CAST({col} AS TIMESTAMP))", - "PT1H": "date_trunc('hour', CAST({col} AS TIMESTAMP))", - "P1D": "date_trunc('day', CAST({col} AS TIMESTAMP))", - "P1W": "date_trunc('week', CAST({col} AS TIMESTAMP))", - "P1M": "date_trunc('month', CAST({col} AS TIMESTAMP))", - "P3M": "date_trunc('quarter', CAST({col} AS TIMESTAMP))", - "P1Y": "date_trunc('year', CAST({col} AS TIMESTAMP))", - # Week starting Sunday - "1969-12-28T00:00:00Z/P1W": "date_trunc('week', CAST({col} AS TIMESTAMP) + interval '1' day) - interval '1' day", # noqa - # Week starting Monday - "1969-12-29T00:00:00Z/P1W": "date_trunc('week', CAST({col} AS TIMESTAMP))", - # Week ending Saturday - "P1W/1970-01-03T00:00:00Z": "date_trunc('week', CAST({col} AS TIMESTAMP) + interval '1' day) + interval '5' day", # noqa - # Week ending Sunday - "P1W/1970-01-04T00:00:00Z": "date_trunc('week', CAST({col} AS TIMESTAMP)) + interval '6' day", # noqa + TimeGrain.SECOND: "date_trunc('second', CAST({col} AS TIMESTAMP))", + TimeGrain.MINUTE: "date_trunc('minute', CAST({col} AS TIMESTAMP))", + TimeGrain.HOUR: "date_trunc('hour', CAST({col} AS TIMESTAMP))", + TimeGrain.DAY: "date_trunc('day', CAST({col} AS TIMESTAMP))", + TimeGrain.WEEK: "date_trunc('week', CAST({col} AS TIMESTAMP))", + TimeGrain.MONTH: "date_trunc('month', CAST({col} AS TIMESTAMP))", + TimeGrain.QUARTER: "date_trunc('quarter', CAST({col} AS TIMESTAMP))", + TimeGrain.YEAR: "date_trunc('year', CAST({col} AS TIMESTAMP))", + TimeGrain.WEEK_STARTING_SUNDAY: "date_trunc('week', CAST({col} AS TIMESTAMP) + interval '1' day) - interval '1' day", # noqa + TimeGrain.WEEK_STARTING_MONDAY: "date_trunc('week', CAST({col} AS TIMESTAMP))", + TimeGrain.WEEK_ENDING_SATURDAY: "date_trunc('week', CAST({col} AS TIMESTAMP) + interval '1' day) + interval '5' day", # noqa + TimeGrain.WEEK_ENDING_SUNDAY: "date_trunc('week', CAST({col} AS TIMESTAMP)) + interval '6' day", # noqa } @classmethod diff --git a/superset/db_engine_specs/rockset.py b/superset/db_engine_specs/rockset.py index 71adca0b10ba7..73ca57ac06b0c 100644 --- a/superset/db_engine_specs/rockset.py +++ b/superset/db_engine_specs/rockset.py @@ -19,6 +19,7 @@ from sqlalchemy import types +from superset.constants import TimeGrain from superset.db_engine_specs.base import BaseEngineSpec if TYPE_CHECKING: @@ -31,14 +32,14 @@ class RocksetEngineSpec(BaseEngineSpec): _time_grain_expressions = { None: "{col}", - "PT1S": "DATE_TRUNC('second', {col})", - "PT1M": "DATE_TRUNC('minute', {col})", - "PT1H": "DATE_TRUNC('hour', {col})", - "P1D": "DATE_TRUNC('day', {col})", - "P1W": "DATE_TRUNC('week', {col})", - "P1M": "DATE_TRUNC('month', {col})", - "P3M": "DATE_TRUNC('quarter', {col})", - "P1Y": "DATE_TRUNC('year', {col})", + TimeGrain.SECOND: "DATE_TRUNC('second', {col})", + TimeGrain.MINUTE: "DATE_TRUNC('minute', {col})", + TimeGrain.HOUR: "DATE_TRUNC('hour', {col})", + TimeGrain.DAY: "DATE_TRUNC('day', {col})", + TimeGrain.WEEK: "DATE_TRUNC('week', {col})", + TimeGrain.MONTH: "DATE_TRUNC('month', {col})", + TimeGrain.QUARTER: "DATE_TRUNC('quarter', {col})", + TimeGrain.YEAR: "DATE_TRUNC('year', {col})", } @classmethod diff --git a/superset/db_engine_specs/snowflake.py b/superset/db_engine_specs/snowflake.py index 32ade649b0af3..473484f9a76c3 100644 --- a/superset/db_engine_specs/snowflake.py +++ b/superset/db_engine_specs/snowflake.py @@ -34,7 +34,7 @@ from sqlalchemy.engine.url import URL from typing_extensions import TypedDict -from superset.constants import USER_AGENT +from superset.constants import TimeGrain, USER_AGENT from superset.databases.utils import make_url_safe from superset.db_engine_specs.base import BaseEngineSpec, BasicPropertiesType from superset.db_engine_specs.postgres import PostgresBaseEngineSpec @@ -90,22 +90,22 @@ class SnowflakeEngineSpec(PostgresBaseEngineSpec): _time_grain_expressions = { None: "{col}", - "PT1S": "DATE_TRUNC('SECOND', {col})", - "PT1M": "DATE_TRUNC('MINUTE', {col})", - "PT5M": "DATEADD(MINUTE, FLOOR(DATE_PART(MINUTE, {col}) / 5) * 5, \ + TimeGrain.SECOND: "DATE_TRUNC('SECOND', {col})", + TimeGrain.MINUTE: "DATE_TRUNC('MINUTE', {col})", + TimeGrain.FIVE_MINUTES: "DATEADD(MINUTE, FLOOR(DATE_PART(MINUTE, {col}) / 5) * 5, \ DATE_TRUNC('HOUR', {col}))", - "PT10M": "DATEADD(MINUTE, FLOOR(DATE_PART(MINUTE, {col}) / 10) * 10, \ + TimeGrain.TEN_MINUTES: "DATEADD(MINUTE, FLOOR(DATE_PART(MINUTE, {col}) / 10) * 10, \ DATE_TRUNC('HOUR', {col}))", - "PT15M": "DATEADD(MINUTE, FLOOR(DATE_PART(MINUTE, {col}) / 15) * 15, \ + TimeGrain.FIFTEEN_MINUTES: "DATEADD(MINUTE, FLOOR(DATE_PART(MINUTE, {col}) / 15) * 15, \ DATE_TRUNC('HOUR', {col}))", - "PT30M": "DATEADD(MINUTE, FLOOR(DATE_PART(MINUTE, {col}) / 30) * 30, \ + TimeGrain.THIRTY_MINUTES: "DATEADD(MINUTE, FLOOR(DATE_PART(MINUTE, {col}) / 30) * 30, \ DATE_TRUNC('HOUR', {col}))", - "PT1H": "DATE_TRUNC('HOUR', {col})", - "P1D": "DATE_TRUNC('DAY', {col})", - "P1W": "DATE_TRUNC('WEEK', {col})", - "P1M": "DATE_TRUNC('MONTH', {col})", - "P3M": "DATE_TRUNC('QUARTER', {col})", - "P1Y": "DATE_TRUNC('YEAR', {col})", + TimeGrain.HOUR: "DATE_TRUNC('HOUR', {col})", + TimeGrain.DAY: "DATE_TRUNC('DAY', {col})", + TimeGrain.WEEK: "DATE_TRUNC('WEEK', {col})", + TimeGrain.MONTH: "DATE_TRUNC('MONTH', {col})", + TimeGrain.QUARTER: "DATE_TRUNC('QUARTER', {col})", + TimeGrain.YEAR: "DATE_TRUNC('YEAR', {col})", } custom_errors: dict[Pattern[str], tuple[str, SupersetErrorType, dict[str, Any]]] = { diff --git a/superset/db_engine_specs/spark.py b/superset/db_engine_specs/spark.py index a6eeb2e9db4d0..ccde9e5161c17 100644 --- a/superset/db_engine_specs/spark.py +++ b/superset/db_engine_specs/spark.py @@ -15,22 +15,23 @@ # specific language governing permissions and limitations # under the License. +from superset.constants import TimeGrain from superset.db_engine_specs.hive import HiveEngineSpec time_grain_expressions = { None: "{col}", - "PT1S": "date_trunc('second', {col})", - "PT1M": "date_trunc('minute', {col})", - "PT1H": "date_trunc('hour', {col})", - "P1D": "date_trunc('day', {col})", - "P1W": "date_trunc('week', {col})", - "P1M": "date_trunc('month', {col})", - "P3M": "date_trunc('quarter', {col})", - "P1Y": "date_trunc('year', {col})", - "P1W/1970-01-03T00:00:00Z": ( + TimeGrain.SECOND: "date_trunc('second', {col})", + TimeGrain.MINUTE: "date_trunc('minute', {col})", + TimeGrain.HOUR: "date_trunc('hour', {col})", + TimeGrain.DAY: "date_trunc('day', {col})", + TimeGrain.WEEK: "date_trunc('week', {col})", + TimeGrain.MONTH: "date_trunc('month', {col})", + TimeGrain.QUARTER: "date_trunc('quarter', {col})", + TimeGrain.YEAR: "date_trunc('year', {col})", + TimeGrain.WEEK_ENDING_SATURDAY: ( "date_trunc('week', {col} + interval '1 day') + interval '5 days'" ), - "1969-12-28T00:00:00Z/P1W": ( + TimeGrain.WEEK_STARTING_SUNDAY: ( "date_trunc('week', {col} + interval '1 day') - interval '1 day'" ), } diff --git a/superset/db_engine_specs/sqlite.py b/superset/db_engine_specs/sqlite.py index 767d0a20ad6ca..4bfd9949ad72d 100644 --- a/superset/db_engine_specs/sqlite.py +++ b/superset/db_engine_specs/sqlite.py @@ -23,6 +23,7 @@ from sqlalchemy import types from sqlalchemy.engine.reflection import Inspector +from superset.constants import TimeGrain from superset.db_engine_specs.base import BaseEngineSpec from superset.errors import SupersetErrorType @@ -40,23 +41,23 @@ class SqliteEngineSpec(BaseEngineSpec): _time_grain_expressions = { None: "{col}", - "PT1S": "DATETIME(STRFTIME('%Y-%m-%dT%H:%M:%S', {col}))", - "PT1M": "DATETIME(STRFTIME('%Y-%m-%dT%H:%M:00', {col}))", - "PT1H": "DATETIME(STRFTIME('%Y-%m-%dT%H:00:00', {col}))", - "P1D": "DATETIME({col}, 'start of day')", - "P1W": "DATETIME({col}, 'start of day', -strftime('%w', {col}) || ' days')", - "P1M": "DATETIME({col}, 'start of month')", - "P3M": ( + TimeGrain.SECOND: "DATETIME(STRFTIME('%Y-%m-%dT%H:%M:%S', {col}))", + TimeGrain.MINUTE: "DATETIME(STRFTIME('%Y-%m-%dT%H:%M:00', {col}))", + TimeGrain.HOUR: "DATETIME(STRFTIME('%Y-%m-%dT%H:00:00', {col}))", + TimeGrain.DAY: "DATETIME({col}, 'start of day')", + TimeGrain.WEEK: "DATETIME({col}, 'start of day', -strftime('%w', {col}) || ' days')", + TimeGrain.MONTH: "DATETIME({col}, 'start of month')", + TimeGrain.QUARTER: ( "DATETIME({col}, 'start of month', " "printf('-%d month', (strftime('%m', {col}) - 1) % 3))" ), - "P1Y": "DATETIME({col}, 'start of year')", - "P1W/1970-01-03T00:00:00Z": "DATETIME({col}, 'start of day', 'weekday 6')", - "P1W/1970-01-04T00:00:00Z": "DATETIME({col}, 'start of day', 'weekday 0')", - "1969-12-28T00:00:00Z/P1W": ( + TimeGrain.YEAR: "DATETIME({col}, 'start of year')", + TimeGrain.WEEK_ENDING_SATURDAY: "DATETIME({col}, 'start of day', 'weekday 6')", + TimeGrain.WEEK_ENDING_SUNDAY: "DATETIME({col}, 'start of day', 'weekday 0')", + TimeGrain.WEEK_STARTING_SUNDAY: ( "DATETIME({col}, 'start of day', 'weekday 0', '-7 days')" ), - "1969-12-29T00:00:00Z/P1W": ( + TimeGrain.WEEK_STARTING_MONDAY: ( "DATETIME({col}, 'start of day', 'weekday 1', '-7 days')" ), } diff --git a/superset/utils/pandas_postprocessing/utils.py b/superset/utils/pandas_postprocessing/utils.py index 37d53697cb89b..5b3fe7514db9e 100644 --- a/superset/utils/pandas_postprocessing/utils.py +++ b/superset/utils/pandas_postprocessing/utils.py @@ -23,6 +23,7 @@ from flask_babel import gettext as _ from pandas import DataFrame, NamedAgg +from superset.constants import TimeGrain from superset.exceptions import InvalidPostProcessingError NUMPY_FUNCTIONS: dict[str, Callable[..., Any]] = { @@ -75,22 +76,22 @@ ) PROPHET_TIME_GRAIN_MAP = { - "PT1S": "S", - "PT1M": "min", - "PT5M": "5min", - "PT10M": "10min", - "PT15M": "15min", - "PT30M": "30min", - "PT1H": "H", - "P1D": "D", - "P1W": "W", - "P1M": "M", - "P3M": "Q", - "P1Y": "A", - "1969-12-28T00:00:00Z/P1W": "W-SUN", - "1969-12-29T00:00:00Z/P1W": "W-MON", - "P1W/1970-01-03T00:00:00Z": "W-SAT", - "P1W/1970-01-04T00:00:00Z": "W-SUN", + TimeGrain.SECOND: "S", + TimeGrain.MINUTE: "min", + TimeGrain.FIVE_MINUTES: "5min", + TimeGrain.TEN_MINUTES: "10min", + TimeGrain.FIFTEEN_MINUTES: "15min", + TimeGrain.THIRTY_MINUTES: "30min", + TimeGrain.HOUR: "H", + TimeGrain.DAY: "D", + TimeGrain.WEEK: "W", + TimeGrain.MONTH: "M", + TimeGrain.QUARTER: "Q", + TimeGrain.YEAR: "A", + TimeGrain.WEEK_STARTING_SUNDAY: "W-SUN", + TimeGrain.WEEK_STARTING_MONDAY: "W-MON", + TimeGrain.WEEK_ENDING_SATURDAY: "W-SAT", + TimeGrain.WEEK_ENDING_SUNDAY: "W-SUN", } RESAMPLE_METHOD = ("asfreq", "bfill", "ffill", "linear", "median", "mean", "sum") From 6367ce5f2d643b41af272952830a8f48e713ebff Mon Sep 17 00:00:00 2001 From: "Michael S. Molina" Date: Wed, 31 May 2023 11:35:38 -0300 Subject: [PATCH 04/15] Adds tests --- superset/common/query_context_processor.py | 21 ++++---- .../common/test_get_aggregated_join_column.py | 52 +++++++++++++++++++ 2 files changed, 63 insertions(+), 10 deletions(-) create mode 100644 tests/unit_tests/common/test_get_aggregated_join_column.py diff --git a/superset/common/query_context_processor.py b/superset/common/query_context_processor.py index adf9c01fb322b..ba2b3222b854e 100644 --- a/superset/common/query_context_processor.py +++ b/superset/common/query_context_processor.py @@ -485,21 +485,22 @@ def processing_time_offsets( # pylint: disable=too-many-locals,too-many-stateme return CachedTimeOffset(df=df, queries=queries, cache_keys=cache_keys) + @staticmethod def get_aggregated_join_column( - self, row: pd.Series, column_index: int, time_grain: str + row: pd.Series, column_index: int, time_grain: str ) -> str: if time_grain in ( - TimeGrain.WEEK_STARTING_SUNDAY, - TimeGrain.WEEK_ENDING_SATURDAY, - ): - return row[column_index].strftime("%Y-W%U") + TimeGrain.WEEK_STARTING_SUNDAY, + TimeGrain.WEEK_ENDING_SATURDAY, + ): + return row[column_index].strftime("%Y-W%U") elif time_grain in ( - TimeGrain.WEEK, - TimeGrain.WEEK_STARTING_MONDAY, - TimeGrain.WEEK_ENDING_SUNDAY, - ): - return row[column_index].strftime("%Y-W%W") + TimeGrain.WEEK, + TimeGrain.WEEK_STARTING_MONDAY, + TimeGrain.WEEK_ENDING_SUNDAY, + ): + return row[column_index].strftime("%Y-W%W") elif time_grain == TimeGrain.MONTH: return row[column_index].strftime("%Y-%m") diff --git a/tests/unit_tests/common/test_get_aggregated_join_column.py b/tests/unit_tests/common/test_get_aggregated_join_column.py new file mode 100644 index 0000000000000..3d0274ee91542 --- /dev/null +++ b/tests/unit_tests/common/test_get_aggregated_join_column.py @@ -0,0 +1,52 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +from pandas import Timestamp + +from superset.common.query_context_processor import QueryContextProcessor +from superset.constants import TimeGrain + +get_aggregated_join_column = QueryContextProcessor.get_aggregated_join_column + +row = [Timestamp("2020-01-07")] + + +def test_week_join_column(): + result = get_aggregated_join_column( + row=row, column_index=0, time_grain=TimeGrain.WEEK + ) + assert result == "2020-W01" + + +def test_month_join_column(): + result = get_aggregated_join_column( + row=row, column_index=0, time_grain=TimeGrain.MONTH + ) + assert result == "2020-01" + + +def test_quarter_join_column(): + result = get_aggregated_join_column( + row=row, column_index=0, time_grain=TimeGrain.QUARTER + ) + assert result == "2020-Q1" + + +def test_year_join_column(): + result = get_aggregated_join_column( + row=row, column_index=0, time_grain=TimeGrain.YEAR + ) + assert result == "2020" From 847b41d8869a6d1bbc474ab03e6336440c9218a7 Mon Sep 17 00:00:00 2001 From: "Michael S. Molina" Date: Wed, 31 May 2023 12:05:45 -0300 Subject: [PATCH 05/15] Fixes lint errors --- superset/common/query_context_processor.py | 6 +++--- superset/constants.py | 2 +- superset/db_engine_specs/dynamodb.py | 3 ++- superset/db_engine_specs/kusto.py | 15 ++++++++++----- superset/db_engine_specs/mssql.py | 15 ++++++++++----- superset/db_engine_specs/snowflake.py | 16 ++++++++-------- superset/db_engine_specs/sqlite.py | 3 ++- 7 files changed, 36 insertions(+), 24 deletions(-) diff --git a/superset/common/query_context_processor.py b/superset/common/query_context_processor.py index ba2b3222b854e..906accedf31dc 100644 --- a/superset/common/query_context_processor.py +++ b/superset/common/query_context_processor.py @@ -495,17 +495,17 @@ def get_aggregated_join_column( ): return row[column_index].strftime("%Y-W%U") - elif time_grain in ( + if time_grain in ( TimeGrain.WEEK, TimeGrain.WEEK_STARTING_MONDAY, TimeGrain.WEEK_ENDING_SUNDAY, ): return row[column_index].strftime("%Y-W%W") - elif time_grain == TimeGrain.MONTH: + if time_grain == TimeGrain.MONTH: return row[column_index].strftime("%Y-%m") - elif time_grain == TimeGrain.QUARTER: + if time_grain == TimeGrain.QUARTER: return row[column_index].strftime("%Y-Q") + str(row[column_index].quarter) return row[column_index].strftime("%Y") diff --git a/superset/constants.py b/superset/constants.py index 632abeb513944..a780dbe68b25b 100644 --- a/superset/constants.py +++ b/superset/constants.py @@ -186,7 +186,7 @@ class RouteMethod: # pylint: disable=too-few-public-methods ) -class TimeGrain: +class TimeGrain: # pylint: disable=too-few-public-methods SECOND = "PT1S" FIVE_SECONDS = "PT5S" THIRTY_SECONDS = "PT30S" diff --git a/superset/db_engine_specs/dynamodb.py b/superset/db_engine_specs/dynamodb.py index d8db741f770d1..0a29f8d4ae9ce 100644 --- a/superset/db_engine_specs/dynamodb.py +++ b/superset/db_engine_specs/dynamodb.py @@ -33,7 +33,8 @@ class DynamoDBEngineSpec(BaseEngineSpec): TimeGrain.MINUTE: "DATETIME(STRFTIME('%Y-%m-%dT%H:%M:00', {col}))", TimeGrain.HOUR: "DATETIME(STRFTIME('%Y-%m-%dT%H:00:00', {col}))", TimeGrain.DAY: "DATETIME({col}, 'start of day')", - TimeGrain.WEEK: "DATETIME({col}, 'start of day', -strftime('%w', {col}) || ' days')", + TimeGrain.WEEK: "DATETIME({col}, 'start of day', \ + -strftime('%w', {col}) || ' days')", TimeGrain.MONTH: "DATETIME({col}, 'start of month')", TimeGrain.QUARTER: ( "DATETIME({col}, 'start of month', " diff --git a/superset/db_engine_specs/kusto.py b/superset/db_engine_specs/kusto.py index 65d48f789e539..554e8b029f098 100644 --- a/superset/db_engine_specs/kusto.py +++ b/superset/db_engine_specs/kusto.py @@ -44,11 +44,14 @@ class KustoSqlEngineSpec(BaseEngineSpec): # pylint: disable=abstract-method _time_grain_expressions = { None: "{col}", - TimeGrain.SECOND: "DATEADD(second, DATEDIFF(second, '2000-01-01', {col}), '2000-01-01')", + TimeGrain.SECOND: "DATEADD(second, \ + 'DATEDIFF(second, 2000-01-01', {col}), '2000-01-01')", TimeGrain.MINUTE: "DATEADD(minute, DATEDIFF(minute, 0, {col}), 0)", TimeGrain.FIVE_MINUTES: "DATEADD(minute, DATEDIFF(minute, 0, {col}) / 5 * 5, 0)", - TimeGrain.TEN_MINUTES: "DATEADD(minute, DATEDIFF(minute, 0, {col}) / 10 * 10, 0)", - TimeGrain.FIFTEEN_MINUTES: "DATEADD(minute, DATEDIFF(minute, 0, {col}) / 15 * 15, 0)", + TimeGrain.TEN_MINUTES: "DATEADD(minute, \ + DATEDIFF(minute, 0, {col}) / 10 * 10, 0)", + TimeGrain.FIFTEEN_MINUTES: "DATEADD(minute, \ + DATEDIFF(minute, 0, {col}) / 15 * 15, 0)", TimeGrain.HALF_HOUR: "DATEADD(minute, DATEDIFF(minute, 0, {col}) / 30 * 30, 0)", TimeGrain.HOUR: "DATEADD(hour, DATEDIFF(hour, 0, {col}), 0)", TimeGrain.DAY: "DATEADD(day, DATEDIFF(day, 0, {col}), 0)", @@ -125,8 +128,10 @@ class KustoKqlEngineSpec(BaseEngineSpec): # pylint: disable=abstract-method TimeGrain.MINUTE: "{col}/ time(1min)", TimeGrain.HOUR: "{col}/ time(1h)", TimeGrain.DAY: "{col}/ time(1d)", - TimeGrain.MONTH: "datetime_diff('month',CreateDate, datetime(0001-01-01 00:00:00))+1", - TimeGrain.YEAR: "datetime_diff('year',CreateDate, datetime(0001-01-01 00:00:00))+1", + TimeGrain.MONTH: "datetime_diff('month', CreateDate, \ + datetime(0001-01-01 00:00:00))+1", + TimeGrain.YEAR: "datetime_diff('year', CreateDate, \ + datetime(0001-01-01 00:00:00))+1", } type_code_map: dict[int, str] = {} # loaded from get_datatype only if needed diff --git a/superset/db_engine_specs/mssql.py b/superset/db_engine_specs/mssql.py index 9a4d2c5edbc72..5d29d36ba89b7 100644 --- a/superset/db_engine_specs/mssql.py +++ b/superset/db_engine_specs/mssql.py @@ -56,12 +56,17 @@ class MssqlEngineSpec(BaseEngineSpec): _time_grain_expressions = { None: "{col}", - TimeGrain.SECOND: "DATEADD(SECOND, DATEDIFF(SECOND, '2000-01-01', {col}), '2000-01-01')", + TimeGrain.SECOND: "DATEADD(SECOND, \ + DATEDIFF(SECOND, '2000-01-01', {col}), '2000-01-01')", TimeGrain.MINUTE: "DATEADD(MINUTE, DATEDIFF(MINUTE, 0, {col}), 0)", - TimeGrain.FIVE_MINUTES: "DATEADD(MINUTE, DATEDIFF(MINUTE, 0, {col}) / 5 * 5, 0)", - TimeGrain.TEN_MINUTES: "DATEADD(MINUTE, DATEDIFF(MINUTE, 0, {col}) / 10 * 10, 0)", - TimeGrain.FIFTEEN_MINUTES: "DATEADD(MINUTE, DATEDIFF(MINUTE, 0, {col}) / 15 * 15, 0)", - TimeGrain.THIRTY_MINUTES: "DATEADD(MINUTE, DATEDIFF(MINUTE, 0, {col}) / 30 * 30, 0)", + TimeGrain.FIVE_MINUTES: "DATEADD(MINUTE, \ + DATEDIFF(MINUTE, 0, {col}) / 5 * 5, 0)", + TimeGrain.TEN_MINUTES: "DATEADD(MINUTE, \ + DATEDIFF(MINUTE, 0, {col}) / 10 * 10, 0)", + TimeGrain.FIFTEEN_MINUTES: "DATEADD(MINUTE, \ + DATEDIFF(MINUTE, 0, {col}) / 15 * 15, 0)", + TimeGrain.THIRTY_MINUTES: "DATEADD(MINUTE, \ + DATEDIFF(MINUTE, 0, {col}) / 30 * 30, 0)", TimeGrain.HOUR: "DATEADD(HOUR, DATEDIFF(HOUR, 0, {col}), 0)", TimeGrain.DAY: "DATEADD(DAY, DATEDIFF(DAY, 0, {col}), 0)", TimeGrain.WEEK: "DATEADD(DAY, 1 - DATEPART(WEEKDAY, {col})," diff --git a/superset/db_engine_specs/snowflake.py b/superset/db_engine_specs/snowflake.py index 473484f9a76c3..879ec307c4257 100644 --- a/superset/db_engine_specs/snowflake.py +++ b/superset/db_engine_specs/snowflake.py @@ -92,14 +92,14 @@ class SnowflakeEngineSpec(PostgresBaseEngineSpec): None: "{col}", TimeGrain.SECOND: "DATE_TRUNC('SECOND', {col})", TimeGrain.MINUTE: "DATE_TRUNC('MINUTE', {col})", - TimeGrain.FIVE_MINUTES: "DATEADD(MINUTE, FLOOR(DATE_PART(MINUTE, {col}) / 5) * 5, \ - DATE_TRUNC('HOUR', {col}))", - TimeGrain.TEN_MINUTES: "DATEADD(MINUTE, FLOOR(DATE_PART(MINUTE, {col}) / 10) * 10, \ - DATE_TRUNC('HOUR', {col}))", - TimeGrain.FIFTEEN_MINUTES: "DATEADD(MINUTE, FLOOR(DATE_PART(MINUTE, {col}) / 15) * 15, \ - DATE_TRUNC('HOUR', {col}))", - TimeGrain.THIRTY_MINUTES: "DATEADD(MINUTE, FLOOR(DATE_PART(MINUTE, {col}) / 30) * 30, \ - DATE_TRUNC('HOUR', {col}))", + TimeGrain.FIVE_MINUTES: "DATEADD(MINUTE, \ + FLOOR(DATE_PART(MINUTE, {col}) / 5) * 5, DATE_TRUNC('HOUR', {col}))", + TimeGrain.TEN_MINUTES: "DATEADD(MINUTE, \ + FLOOR(DATE_PART(MINUTE, {col}) / 10) * 10, DATE_TRUNC('HOUR', {col}))", + TimeGrain.FIFTEEN_MINUTES: "DATEADD(MINUTE, \ + FLOOR(DATE_PART(MINUTE, {col}) / 15) * 15, DATE_TRUNC('HOUR', {col}))", + TimeGrain.THIRTY_MINUTES: "DATEADD(MINUTE, \ + FLOOR(DATE_PART(MINUTE, {col}) / 30) * 30, DATE_TRUNC('HOUR', {col}))", TimeGrain.HOUR: "DATE_TRUNC('HOUR', {col})", TimeGrain.DAY: "DATE_TRUNC('DAY', {col})", TimeGrain.WEEK: "DATE_TRUNC('WEEK', {col})", diff --git a/superset/db_engine_specs/sqlite.py b/superset/db_engine_specs/sqlite.py index 4bfd9949ad72d..06d55375098a3 100644 --- a/superset/db_engine_specs/sqlite.py +++ b/superset/db_engine_specs/sqlite.py @@ -45,7 +45,8 @@ class SqliteEngineSpec(BaseEngineSpec): TimeGrain.MINUTE: "DATETIME(STRFTIME('%Y-%m-%dT%H:%M:00', {col}))", TimeGrain.HOUR: "DATETIME(STRFTIME('%Y-%m-%dT%H:00:00', {col}))", TimeGrain.DAY: "DATETIME({col}, 'start of day')", - TimeGrain.WEEK: "DATETIME({col}, 'start of day', -strftime('%w', {col}) || ' days')", + TimeGrain.WEEK: "DATETIME({col}, 'start of day', \ + -strftime('%w', {col}) || ' days')", TimeGrain.MONTH: "DATETIME({col}, 'start of month')", TimeGrain.QUARTER: ( "DATETIME({col}, 'start of month', " From bce26b1b569c7d7fe3ddd52f6213abfa53d48610 Mon Sep 17 00:00:00 2001 From: "Michael S. Molina" Date: Wed, 31 May 2023 14:52:04 -0300 Subject: [PATCH 06/15] Account for no time grain --- superset/common/query_context_processor.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/superset/common/query_context_processor.py b/superset/common/query_context_processor.py index 906accedf31dc..3edc208a5d7c0 100644 --- a/superset/common/query_context_processor.py +++ b/superset/common/query_context_processor.py @@ -338,7 +338,7 @@ def processing_time_offsets( # pylint: disable=too-many-locals,too-many-stateme ) columns = df.columns - time_grain = query_object.extras["time_grain_sqla"] + time_grain = query_object.extras.get("time_grain_sqla") use_aggregated_join_column = time_grain in AGGREGATED_JOIN_GRAINS if use_aggregated_join_column: # adds aggregated join column @@ -481,7 +481,7 @@ def processing_time_offsets( # pylint: disable=too-many-locals,too-many-stateme # remove AGGREGATED_JOIN_COLUMN from df if use_aggregated_join_column: - df = df.drop(columns=[AGGREGATED_JOIN_COLUMN]) + df.drop(columns=[AGGREGATED_JOIN_COLUMN], inplace=True) return CachedTimeOffset(df=df, queries=queries, cache_keys=cache_keys) From dd16814e86283ce9dce1bf46b6d17d0ac7cca3e2 Mon Sep 17 00:00:00 2001 From: "Michael S. Molina" Date: Wed, 31 May 2023 16:22:48 -0300 Subject: [PATCH 07/15] Fixes druid time grains --- superset/common/query_context_processor.py | 6 +++++- superset/db_engine_specs/druid.py | 4 ++-- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/superset/common/query_context_processor.py b/superset/common/query_context_processor.py index 3edc208a5d7c0..27455c18579c0 100644 --- a/superset/common/query_context_processor.py +++ b/superset/common/query_context_processor.py @@ -76,6 +76,10 @@ AGGREGATED_JOIN_COLUMN = "__aggregated_join_column" +# This only includes time grains that may influence +# the temporal column used for joining offset results. +# Given that we don't allow time shifts smaller than a day, +# we don't need to include smaller time grains aggregations. AGGREGATED_JOIN_GRAINS = { TimeGrain.WEEK, TimeGrain.WEEK_STARTING_SUNDAY, @@ -338,7 +342,7 @@ def processing_time_offsets( # pylint: disable=too-many-locals,too-many-stateme ) columns = df.columns - time_grain = query_object.extras.get("time_grain_sqla") + time_grain = query_object.extras.get("time_grain_sqla") or TimeGrain.DAY use_aggregated_join_column = time_grain in AGGREGATED_JOIN_GRAINS if use_aggregated_join_column: # adds aggregated join column diff --git a/superset/db_engine_specs/druid.py b/superset/db_engine_specs/druid.py index 2ac9346634d18..478f3e9492c45 100644 --- a/superset/db_engine_specs/druid.py +++ b/superset/db_engine_specs/druid.py @@ -64,11 +64,11 @@ class DruidEngineSpec(BaseEngineSpec): TimeGrain.MONTH: "TIME_FLOOR(CAST({col} AS TIMESTAMP), 'P1M')", TimeGrain.QUARTER: "TIME_FLOOR(CAST({col} AS TIMESTAMP), 'P3M')", TimeGrain.YEAR: "TIME_FLOOR(CAST({col} AS TIMESTAMP), 'P1Y')", - TimeGrain.WEEK_STARTING_SUNDAY: ( + TimeGrain.WEEK_ENDING_SATURDAY: ( "TIME_SHIFT(TIME_FLOOR(TIME_SHIFT(CAST({col} AS TIMESTAMP), " "'P1D', 1), 'P1W'), 'P1D', 5)" ), - TimeGrain.WEEK_STARTING_MONDAY: ( + TimeGrain.WEEK_STARTING_SUNDAY: ( "TIME_SHIFT(TIME_FLOOR(TIME_SHIFT(CAST({col} AS TIMESTAMP), " "'P1D', 1), 'P1W'), 'P1D', -1)" ), From 134381d481901616628345e7ff4b8e71b5aadd78 Mon Sep 17 00:00:00 2001 From: "Michael S. Molina" Date: Thu, 1 Jun 2023 10:31:58 -0300 Subject: [PATCH 08/15] Removes columns used for join --- superset/common/query_context_processor.py | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/superset/common/query_context_processor.py b/superset/common/query_context_processor.py index 27455c18579c0..81f6e7165e2e4 100644 --- a/superset/common/query_context_processor.py +++ b/superset/common/query_context_processor.py @@ -74,6 +74,7 @@ stats_logger: BaseStatsLogger = config["STATS_LOGGER"] logger = logging.getLogger(__name__) +# Artificial column used for joining aggregated offset results AGGREGATED_JOIN_COLUMN = "__aggregated_join_column" # This only includes time grains that may influence @@ -91,6 +92,9 @@ TimeGrain.YEAR, } +# Right suffix used for joining offset results +R_SUFFIX = "__right_suffix" + class CachedTimeOffset(TypedDict): df: pd.DataFrame @@ -480,12 +484,15 @@ def processing_time_offsets( # pylint: disable=too-many-locals,too-many-stateme left_df=df, right_df=offset_df, join_keys=join_keys, - rsuffix="_right", + rsuffix=R_SUFFIX, ) - # remove AGGREGATED_JOIN_COLUMN from df - if use_aggregated_join_column: - df.drop(columns=[AGGREGATED_JOIN_COLUMN], inplace=True) + # removes columns used for join + df.drop( + list(df.filter(regex=f"{AGGREGATED_JOIN_COLUMN}|{R_SUFFIX}")), + axis=1, + inplace=True, + ) return CachedTimeOffset(df=df, queries=queries, cache_keys=cache_keys) From 088113fadce8cec9922cfc52da716f2d322cc8de Mon Sep 17 00:00:00 2001 From: "Michael S. Molina" Date: Thu, 1 Jun 2023 11:16:15 -0300 Subject: [PATCH 09/15] Gets time grain from form_data --- superset/common/query_context_processor.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/superset/common/query_context_processor.py b/superset/common/query_context_processor.py index 81f6e7165e2e4..11f5430dbcf5d 100644 --- a/superset/common/query_context_processor.py +++ b/superset/common/query_context_processor.py @@ -346,7 +346,7 @@ def processing_time_offsets( # pylint: disable=too-many-locals,too-many-stateme ) columns = df.columns - time_grain = query_object.extras.get("time_grain_sqla") or TimeGrain.DAY + time_grain = query_context.form_data.get("time_grain_sqla") or TimeGrain.DAY use_aggregated_join_column = time_grain in AGGREGATED_JOIN_GRAINS if use_aggregated_join_column: # adds aggregated join column From 78cd29132ce530be991716ebcb34f3f15ad5295f Mon Sep 17 00:00:00 2001 From: "Michael S. Molina" Date: Thu, 1 Jun 2023 11:51:08 -0300 Subject: [PATCH 10/15] Adds a method to get the time grain --- superset/common/query_context_processor.py | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/superset/common/query_context_processor.py b/superset/common/query_context_processor.py index 11f5430dbcf5d..71714fa76bbb8 100644 --- a/superset/common/query_context_processor.py +++ b/superset/common/query_context_processor.py @@ -324,6 +324,24 @@ def _get_timestamp_format( return df + @staticmethod + def get_time_grain( + query_context: QueryContext, query_object: QueryObject + ) -> Optional[Any]: + if query_context.form_data: + return query_context.form_data.get("time_grain_sqla") + + if ( + query_object.columns + and len(query_object.columns) > 0 + and isinstance(query_object.columns[0], dict) + ): + # If the time grain is in the columns it will be the first one + # and it will be of AdhocColumn type + return query_object.columns[0].get("timeGrain") + + return query_object.extras.get("time_grain_sqla") + def processing_time_offsets( # pylint: disable=too-many-locals,too-many-statements self, df: pd.DataFrame, @@ -346,7 +364,7 @@ def processing_time_offsets( # pylint: disable=too-many-locals,too-many-stateme ) columns = df.columns - time_grain = query_context.form_data.get("time_grain_sqla") or TimeGrain.DAY + time_grain = self.get_time_grain(query_context, query_object) or TimeGrain.DAY use_aggregated_join_column = time_grain in AGGREGATED_JOIN_GRAINS if use_aggregated_join_column: # adds aggregated join column From fd57a5b8efcc347ce07972c5f380db610cbad976 Mon Sep 17 00:00:00 2001 From: "Michael S. Molina" Date: Fri, 2 Jun 2023 11:16:50 -0300 Subject: [PATCH 11/15] Adds config to set join column producerr --- superset/common/query_context_processor.py | 41 +++++++++++++++++----- superset/config.py | 12 +++++++ 2 files changed, 44 insertions(+), 9 deletions(-) diff --git a/superset/common/query_context_processor.py b/superset/common/query_context_processor.py index 71714fa76bbb8..b45cbbaf77274 100644 --- a/superset/common/query_context_processor.py +++ b/superset/common/query_context_processor.py @@ -327,7 +327,7 @@ def _get_timestamp_format( @staticmethod def get_time_grain( query_context: QueryContext, query_object: QueryObject - ) -> Optional[Any]: + ) -> Any | None: if query_context.form_data: return query_context.form_data.get("time_grain_sqla") @@ -365,12 +365,23 @@ def processing_time_offsets( # pylint: disable=too-many-locals,too-many-stateme columns = df.columns time_grain = self.get_time_grain(query_context, query_object) or TimeGrain.DAY - use_aggregated_join_column = time_grain in AGGREGATED_JOIN_GRAINS + join_column_producer = config["TIME_GRAIN_JOIN_COLUMN_PRODUCERS"].get( + time_grain + ) + use_aggregated_join_column = ( + time_grain in AGGREGATED_JOIN_GRAINS or join_column_producer + ) if use_aggregated_join_column: # adds aggregated join column - df[AGGREGATED_JOIN_COLUMN] = df.apply( - lambda row: self.get_aggregated_join_column(row, 0, time_grain), axis=1 - ) + if join_column_producer: + df[AGGREGATED_JOIN_COLUMN] = df.apply( + lambda row: join_column_producer(row, 0), axis=1 + ) + else: + df[AGGREGATED_JOIN_COLUMN] = df.apply( + lambda row: self.get_aggregated_join_column(row, 0, time_grain), + axis=1, + ) # skips the first column which is the temporal column # because we'll use the aggregated join columns instead columns = df.columns[1:] @@ -476,10 +487,22 @@ def processing_time_offsets( # pylint: disable=too-many-locals,too-many-stateme if use_aggregated_join_column: # adds aggregated join column - offset_metrics_df[AGGREGATED_JOIN_COLUMN] = offset_metrics_df.apply( - lambda row: self.get_aggregated_join_column(row, 0, time_grain), - axis=1, - ) + if join_column_producer: + offset_metrics_df[ + AGGREGATED_JOIN_COLUMN + ] = offset_metrics_df.apply( + lambda row: join_column_producer(row, 0), + axis=1, + ) + else: + offset_metrics_df[ + AGGREGATED_JOIN_COLUMN + ] = offset_metrics_df.apply( + lambda row: self.get_aggregated_join_column( + row, 0, time_grain + ), + axis=1, + ) # cache df and query value = { diff --git a/superset/config.py b/superset/config.py index 434456386d932..f9fdf3cdd5122 100644 --- a/superset/config.py +++ b/superset/config.py @@ -41,6 +41,7 @@ from dateutil import tz from flask import Blueprint from flask_appbuilder.security.manager import AUTH_DB +from pandas import Series from pandas._libs.parsers import STR_NA_VALUES # pylint: disable=no-name-in-module from sqlalchemy.orm.query import Query @@ -773,6 +774,17 @@ class D3Format(TypedDict, total=False): # } TIME_GRAIN_ADDON_EXPRESSIONS: dict[str, dict[str, str]] = {} +# Map of custom time grains and artificial join column producers used +# when generating the join key between results and time shifts. +# See supeset/common/query_context_processor.get_aggregated_join_column +# +# Example of a join column producer that aggregates by fiscal year +# def join_producer(row: Series, column_index: int) -> str: +# return row[index].strftime("%F") +# +# TIME_GRAIN_JOIN_COLUMN_PRODUCERS = {"P1F": join_producer} +TIME_GRAIN_JOIN_COLUMN_PRODUCERS: dict[str, Callable[[Series, int], str]] = {} + # --------------------------------------------------- # List of viz_types not allowed in your environment # For example: Disable pivot table and treemap: From 8070513bad82717caa4fd41263fc3b9d4a96dac6 Mon Sep 17 00:00:00 2001 From: "Michael S. Molina" Date: Fri, 2 Jun 2023 11:53:58 -0300 Subject: [PATCH 12/15] Extracts method --- superset/common/query_context_processor.py | 47 +++++++++------------- 1 file changed, 20 insertions(+), 27 deletions(-) diff --git a/superset/common/query_context_processor.py b/superset/common/query_context_processor.py index b45cbbaf77274..f273f494dc478 100644 --- a/superset/common/query_context_processor.py +++ b/superset/common/query_context_processor.py @@ -342,6 +342,22 @@ def get_time_grain( return query_object.extras.get("time_grain_sqla") + def add_aggregated_join_column( + self, + df: pd.DataFrame, + time_grain: str, + join_column_producer: Any, + ) -> None: + if join_column_producer: + df[AGGREGATED_JOIN_COLUMN] = df.apply( + lambda row: join_column_producer(row, 0), axis=1 + ) + else: + df[AGGREGATED_JOIN_COLUMN] = df.apply( + lambda row: self.get_aggregated_join_column(row, 0, time_grain), + axis=1, + ) + def processing_time_offsets( # pylint: disable=too-many-locals,too-many-statements self, df: pd.DataFrame, @@ -372,16 +388,7 @@ def processing_time_offsets( # pylint: disable=too-many-locals,too-many-stateme time_grain in AGGREGATED_JOIN_GRAINS or join_column_producer ) if use_aggregated_join_column: - # adds aggregated join column - if join_column_producer: - df[AGGREGATED_JOIN_COLUMN] = df.apply( - lambda row: join_column_producer(row, 0), axis=1 - ) - else: - df[AGGREGATED_JOIN_COLUMN] = df.apply( - lambda row: self.get_aggregated_join_column(row, 0, time_grain), - axis=1, - ) + self.add_aggregated_join_column(df, time_grain, join_column_producer) # skips the first column which is the temporal column # because we'll use the aggregated join columns instead columns = df.columns[1:] @@ -486,23 +493,9 @@ def processing_time_offsets( # pylint: disable=too-many-locals,too-many-stateme ) if use_aggregated_join_column: - # adds aggregated join column - if join_column_producer: - offset_metrics_df[ - AGGREGATED_JOIN_COLUMN - ] = offset_metrics_df.apply( - lambda row: join_column_producer(row, 0), - axis=1, - ) - else: - offset_metrics_df[ - AGGREGATED_JOIN_COLUMN - ] = offset_metrics_df.apply( - lambda row: self.get_aggregated_join_column( - row, 0, time_grain - ), - axis=1, - ) + self.add_aggregated_join_column( + offset_metrics_df, time_grain, join_column_producer + ) # cache df and query value = { From ac7596e1c8542b3dae4b862800b5e56cc60e66ee Mon Sep 17 00:00:00 2001 From: "Michael S. Molina" Date: Wed, 7 Jun 2023 11:02:50 -0300 Subject: [PATCH 13/15] Addresses comments --- superset/common/query_context_processor.py | 21 +++--- .../common/test_get_aggregated_join_column.py | 69 ++++++++++++------- 2 files changed, 56 insertions(+), 34 deletions(-) diff --git a/superset/common/query_context_processor.py b/superset/common/query_context_processor.py index f273f494dc478..6553c19807ee2 100644 --- a/superset/common/query_context_processor.py +++ b/superset/common/query_context_processor.py @@ -74,7 +74,7 @@ stats_logger: BaseStatsLogger = config["STATS_LOGGER"] logger = logging.getLogger(__name__) -# Artificial column used for joining aggregated offset results +# Temporary column used for joining aggregated offset results AGGREGATED_JOIN_COLUMN = "__aggregated_join_column" # This only includes time grains that may influence @@ -325,12 +325,7 @@ def _get_timestamp_format( return df @staticmethod - def get_time_grain( - query_context: QueryContext, query_object: QueryObject - ) -> Any | None: - if query_context.form_data: - return query_context.form_data.get("time_grain_sqla") - + def get_time_grain(query_object: QueryObject) -> Any | None: if ( query_object.columns and len(query_object.columns) > 0 @@ -346,7 +341,7 @@ def add_aggregated_join_column( self, df: pd.DataFrame, time_grain: str, - join_column_producer: Any, + join_column_producer: Any = None, ) -> None: if join_column_producer: df[AGGREGATED_JOIN_COLUMN] = df.apply( @@ -380,12 +375,18 @@ def processing_time_offsets( # pylint: disable=too-many-locals,too-many-stateme ) columns = df.columns - time_grain = self.get_time_grain(query_context, query_object) or TimeGrain.DAY + time_grain = self.get_time_grain(query_object) + + if not time_grain: + raise QueryObjectValidationError( + _("Time Grain must be specified when using Time Shift.") + ) + join_column_producer = config["TIME_GRAIN_JOIN_COLUMN_PRODUCERS"].get( time_grain ) use_aggregated_join_column = ( - time_grain in AGGREGATED_JOIN_GRAINS or join_column_producer + join_column_producer or time_grain in AGGREGATED_JOIN_GRAINS ) if use_aggregated_join_column: self.add_aggregated_join_column(df, time_grain, join_column_producer) diff --git a/tests/unit_tests/common/test_get_aggregated_join_column.py b/tests/unit_tests/common/test_get_aggregated_join_column.py index 3d0274ee91542..ebf0e2d89b0f2 100644 --- a/tests/unit_tests/common/test_get_aggregated_join_column.py +++ b/tests/unit_tests/common/test_get_aggregated_join_column.py @@ -14,39 +14,60 @@ # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. -from pandas import Timestamp +from pandas import DataFrame, Series, Timestamp +from pandas.testing import assert_frame_equal +from pytest import mark -from superset.common.query_context_processor import QueryContextProcessor +from superset.common.chart_data import ChartDataResultFormat, ChartDataResultType +from superset.common.query_context import QueryContext +from superset.common.query_context_processor import ( + AGGREGATED_JOIN_COLUMN, + QueryContextProcessor, +) +from superset.connectors.base.models import BaseDatasource from superset.constants import TimeGrain -get_aggregated_join_column = QueryContextProcessor.get_aggregated_join_column - -row = [Timestamp("2020-01-07")] - - -def test_week_join_column(): - result = get_aggregated_join_column( - row=row, column_index=0, time_grain=TimeGrain.WEEK +query_context_processor = QueryContextProcessor( + QueryContext( + datasource=BaseDatasource(), + queries=[], + result_type=ChartDataResultType.COLUMNS, + form_data={}, + slice_=None, + result_format=ChartDataResultFormat.CSV, + cache_values={}, ) - assert result == "2020-W01" +) -def test_month_join_column(): - result = get_aggregated_join_column( - row=row, column_index=0, time_grain=TimeGrain.MONTH - ) - assert result == "2020-01" +def join_column_producer(row: Series, column_index: int) -> str: + return "CUSTOM_FORMAT" -def test_quarter_join_column(): - result = get_aggregated_join_column( - row=row, column_index=0, time_grain=TimeGrain.QUARTER +@mark.parametrize( + ("time_grain", "expected"), + [ + (TimeGrain.WEEK, "2020-W01"), + (TimeGrain.MONTH, "2020-01"), + (TimeGrain.QUARTER, "2020-Q1"), + (TimeGrain.YEAR, "2020"), + ], +) +def test_aggregated_join_column(time_grain: str, expected: str): + df = DataFrame({"ds": [Timestamp("2020-01-07")]}) + query_context_processor.add_aggregated_join_column(df, time_grain) + result = DataFrame( + {"ds": [Timestamp("2020-01-07")], AGGREGATED_JOIN_COLUMN: [expected]} ) - assert result == "2020-Q1" + assert_frame_equal(df, result) -def test_year_join_column(): - result = get_aggregated_join_column( - row=row, column_index=0, time_grain=TimeGrain.YEAR +def test_aggregated_join_column_producer(): + df = DataFrame({"ds": [Timestamp("2020-01-07")]}) + query_context_processor.add_aggregated_join_column( + df, TimeGrain.YEAR, join_column_producer + ) + result = DataFrame( + {"ds": [Timestamp("2020-01-07")], AGGREGATED_JOIN_COLUMN: ["CUSTOM_FORMAT"]} ) - assert result == "2020" + assert_frame_equal(df, result) From d8a752470960a9a542a3ed4c5710adaf435f6782 Mon Sep 17 00:00:00 2001 From: "Michael S. Molina" Date: Thu, 8 Jun 2023 11:11:07 -0300 Subject: [PATCH 14/15] Uses a fixture in the test --- superset/constants.py | 2 +- superset/utils/pandas_postprocessing/utils.py | 2 +- .../common/test_get_aggregated_join_column.py | 14 +++++++++----- 3 files changed, 11 insertions(+), 7 deletions(-) diff --git a/superset/constants.py b/superset/constants.py index a780dbe68b25b..89e5a2eeed2c1 100644 --- a/superset/constants.py +++ b/superset/constants.py @@ -186,7 +186,7 @@ class RouteMethod: # pylint: disable=too-few-public-methods ) -class TimeGrain: # pylint: disable=too-few-public-methods +class TimeGrain(str, Enum): SECOND = "PT1S" FIVE_SECONDS = "PT5S" THIRTY_SECONDS = "PT30S" diff --git a/superset/utils/pandas_postprocessing/utils.py b/superset/utils/pandas_postprocessing/utils.py index 5b3fe7514db9e..4d6884c8af0e7 100644 --- a/superset/utils/pandas_postprocessing/utils.py +++ b/superset/utils/pandas_postprocessing/utils.py @@ -75,7 +75,7 @@ "cumsum", ) -PROPHET_TIME_GRAIN_MAP = { +PROPHET_TIME_GRAIN_MAP: dict[str, str] = { TimeGrain.SECOND: "S", TimeGrain.MINUTE: "min", TimeGrain.FIVE_MINUTES: "5min", diff --git a/tests/unit_tests/common/test_get_aggregated_join_column.py b/tests/unit_tests/common/test_get_aggregated_join_column.py index ebf0e2d89b0f2..8effacf2494cb 100644 --- a/tests/unit_tests/common/test_get_aggregated_join_column.py +++ b/tests/unit_tests/common/test_get_aggregated_join_column.py @@ -16,7 +16,7 @@ # under the License. from pandas import DataFrame, Series, Timestamp from pandas.testing import assert_frame_equal -from pytest import mark +from pytest import fixture, mark from superset.common.chart_data import ChartDataResultFormat, ChartDataResultType from superset.common.query_context import QueryContext @@ -40,8 +40,12 @@ ) -def join_column_producer(row: Series, column_index: int) -> str: - return "CUSTOM_FORMAT" +@fixture +def make_join_column_producer(): + def join_column_producer(row: Series, column_index: int) -> str: + return "CUSTOM_FORMAT" + + return join_column_producer @mark.parametrize( @@ -62,10 +66,10 @@ def test_aggregated_join_column(time_grain: str, expected: str): assert_frame_equal(df, result) -def test_aggregated_join_column_producer(): +def test_aggregated_join_column_producer(make_join_column_producer): df = DataFrame({"ds": [Timestamp("2020-01-07")]}) query_context_processor.add_aggregated_join_column( - df, TimeGrain.YEAR, join_column_producer + df, TimeGrain.YEAR, make_join_column_producer ) result = DataFrame( {"ds": [Timestamp("2020-01-07")], AGGREGATED_JOIN_COLUMN: ["CUSTOM_FORMAT"]} From 13ed8300debf6b9e3228ef8c6b91087fc8d92491 Mon Sep 17 00:00:00 2001 From: "Michael S. Molina" Date: Thu, 8 Jun 2023 11:26:59 -0300 Subject: [PATCH 15/15] Fixes types --- superset/db_engine_specs/databricks.py | 18 ++++++++++-------- superset/db_engine_specs/spark.py | 3 ++- 2 files changed, 12 insertions(+), 9 deletions(-) diff --git a/superset/db_engine_specs/databricks.py b/superset/db_engine_specs/databricks.py index df53d017b7ad2..56ce47772f589 100644 --- a/superset/db_engine_specs/databricks.py +++ b/superset/db_engine_specs/databricks.py @@ -14,10 +14,11 @@ # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. +from __future__ import annotations import json from datetime import datetime -from typing import Any, Optional, TYPE_CHECKING +from typing import Any, TYPE_CHECKING from apispec import APISpec from apispec.ext.marshmallow import MarshmallowPlugin @@ -39,6 +40,7 @@ from superset.models.core import Database +# class DatabricksParametersSchema(Schema): """ This is the list of fields that are expected @@ -93,7 +95,7 @@ class DatabricksPropertiesType(TypedDict): extra: str -time_grain_expressions = { +time_grain_expressions: dict[str | None, str] = { None: "{col}", TimeGrain.SECOND: "date_trunc('second', {col})", TimeGrain.MINUTE: "date_trunc('minute', {col})", @@ -135,8 +137,8 @@ class DatabricksODBCEngineSpec(BaseEngineSpec): @classmethod def convert_dttm( - cls, target_type: str, dttm: datetime, db_extra: Optional[dict[str, Any]] = None - ) -> Optional[str]: + cls, target_type: str, dttm: datetime, db_extra: dict[str, Any] | None = None + ) -> str | None: return HiveEngineSpec.convert_dttm(target_type, dttm, db_extra=db_extra) @classmethod @@ -160,7 +162,7 @@ class DatabricksNativeEngineSpec(DatabricksODBCEngineSpec, BasicParametersMixin) encryption_parameters = {"ssl": "1"} @staticmethod - def get_extra_params(database: "Database") -> dict[str, Any]: + def get_extra_params(database: Database) -> dict[str, Any]: """ Add a user agent to be used in the requests. Trim whitespace from connect_args to avoid databricks driver errors @@ -181,9 +183,9 @@ def get_extra_params(database: "Database") -> dict[str, Any]: @classmethod def get_table_names( cls, - database: "Database", + database: Database, inspector: Inspector, - schema: Optional[str], + schema: str | None, ) -> set[str]: return super().get_table_names( database, inspector, schema @@ -213,7 +215,7 @@ def build_sqlalchemy_uri( # type: ignore @classmethod def extract_errors( - cls, ex: Exception, context: Optional[dict[str, Any]] = None + cls, ex: Exception, context: dict[str, Any] | None = None ) -> list[SupersetError]: raw_message = cls._extract_error_message(ex) diff --git a/superset/db_engine_specs/spark.py b/superset/db_engine_specs/spark.py index ccde9e5161c17..95a7bfdaeaf4e 100644 --- a/superset/db_engine_specs/spark.py +++ b/superset/db_engine_specs/spark.py @@ -14,11 +14,12 @@ # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. +from __future__ import annotations from superset.constants import TimeGrain from superset.db_engine_specs.hive import HiveEngineSpec -time_grain_expressions = { +time_grain_expressions: dict[str | None, str] = { None: "{col}", TimeGrain.SECOND: "date_trunc('second', {col})", TimeGrain.MINUTE: "date_trunc('minute', {col})",