From 5914ce62e4878e2bdaf5d729ef615dae6698e63f Mon Sep 17 00:00:00 2001 From: Beto Dealmeida Date: Mon, 7 Aug 2023 13:52:35 -0700 Subject: [PATCH 1/4] feat: improve SQLite DB engine spec --- superset/db_engine_specs/gsheets.py | 7 +- superset/db_engine_specs/shillelagh.py | 18 +++ superset/db_engine_specs/sqlite.py | 166 ++++++++++++++++++++++++- 3 files changed, 186 insertions(+), 5 deletions(-) diff --git a/superset/db_engine_specs/gsheets.py b/superset/db_engine_specs/gsheets.py index abf5bac48f96a..777499a8f955e 100644 --- a/superset/db_engine_specs/gsheets.py +++ b/superset/db_engine_specs/gsheets.py @@ -32,7 +32,7 @@ from superset import security_manager from superset.constants import PASSWORD_MASK from superset.databases.schemas import encrypted_field_properties, EncryptedString -from superset.db_engine_specs.sqlite import SqliteEngineSpec +from superset.db_engine_specs.shillelagh import ShillelaghEngineSpec from superset.errors import ErrorLevel, SupersetError, SupersetErrorType if TYPE_CHECKING: @@ -65,14 +65,13 @@ class GSheetsPropertiesType(TypedDict): catalog: dict[str, str] -class GSheetsEngineSpec(SqliteEngineSpec): +class GSheetsEngineSpec(ShillelaghEngineSpec): """Engine for Google spreadsheets""" - engine = "gsheets" engine_name = "Google Sheets" + engine = "gsheets" allows_joins = True allows_subqueries = True - disable_ssh_tunneling = True parameters_schema = GSheetsParametersSchema() default_driver = "apsw" diff --git a/superset/db_engine_specs/shillelagh.py b/superset/db_engine_specs/shillelagh.py index 37301224484b7..61820824b02d9 100644 --- a/superset/db_engine_specs/shillelagh.py +++ b/superset/db_engine_specs/shillelagh.py @@ -14,8 +14,15 @@ # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. +from __future__ import annotations + +from typing import TYPE_CHECKING + from superset.db_engine_specs.sqlite import SqliteEngineSpec +if TYPE_CHECKING: + from superset.models.core import Database + class ShillelaghEngineSpec(SqliteEngineSpec): """Engine for shillelagh""" @@ -28,3 +35,14 @@ class ShillelaghEngineSpec(SqliteEngineSpec): allows_joins = True allows_subqueries = True + + @classmethod + def get_function_names( + cls, + database: Database, + ) -> list[str]: + return super().get_function_names(database) + [ + "sleep", + "version", + "get_metadata", + ] diff --git a/superset/db_engine_specs/sqlite.py b/superset/db_engine_specs/sqlite.py index 06d55375098a3..e7f053da21284 100644 --- a/superset/db_engine_specs/sqlite.py +++ b/superset/db_engine_specs/sqlite.py @@ -14,6 +14,9 @@ # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. + +from __future__ import annotations + import re from datetime import datetime from re import Pattern @@ -39,11 +42,41 @@ class SqliteEngineSpec(BaseEngineSpec): engine = "sqlite" engine_name = "SQLite" + disable_ssh_tunneling = True + _time_grain_expressions = { None: "{col}", TimeGrain.SECOND: "DATETIME(STRFTIME('%Y-%m-%dT%H:%M:%S', {col}))", + TimeGrain.FIVE_SECONDS: ( + "DATETIME({col}, printf('-%d seconds', " + "CAST(strftime('%S', {col}) AS INT) % 5))" + ), + TimeGrain.THIRTY_SECONDS: ( + "DATETIME({col}, printf('-%d seconds', " + "CAST(strftime('%S', {col}) AS INT) % 30))" + ), TimeGrain.MINUTE: "DATETIME(STRFTIME('%Y-%m-%dT%H:%M:00', {col}))", + TimeGrain.FIVE_MINUTES: ( + "DATETIME(STRFTIME('%Y-%m-%dT%H:%M:00', {col}), printf('-%d minutes', " + "CAST(strftime('%M', {col}) AS INT) % 5))" + ), + TimeGrain.TEN_MINUTES: ( + "DATETIME(STRFTIME('%Y-%m-%dT%H:%M:00', {col}), printf('-%d minutes', " + "CAST(strftime('%M', {col}) AS INT) % 10))" + ), + TimeGrain.FIFTEEN_MINUTES: ( + "DATETIME(STRFTIME('%Y-%m-%dT%H:%M:00', {col}), printf('-%d minutes', " + "CAST(strftime('%M', {col}) AS INT) % 15))" + ), + TimeGrain.THIRTY_MINUTES: ( + "DATETIME(STRFTIME('%Y-%m-%dT%H:%M:00', {col}), printf('-%d minutes', " + "CAST(strftime('%M', {col}) AS INT) % 30))" + ), TimeGrain.HOUR: "DATETIME(STRFTIME('%Y-%m-%dT%H:00:00', {col}))", + TimeGrain.SIX_HOURS: ( + "DATETIME(STRFTIME('%Y-%m-%dT%H:00:00', {col}), printf('-%d hours', " + "CAST(strftime('%H', {col}) AS INT) % 6))" + ), TimeGrain.DAY: "DATETIME({col}, 'start of day')", TimeGrain.WEEK: "DATETIME({col}, 'start of day', \ -strftime('%w', {col}) || ' days')", @@ -62,6 +95,13 @@ class SqliteEngineSpec(BaseEngineSpec): "DATETIME({col}, 'start of day', 'weekday 1', '-7 days')" ), } + # not sure why these are differnet + _time_grain_expressions.update( + { + TimeGrain.HALF_HOUR: _time_grain_expressions[TimeGrain.THIRTY_MINUTES], + TimeGrain.QUARTER_YEAR: _time_grain_expressions[TimeGrain.QUARTER], + } + ) custom_errors: dict[Pattern[str], tuple[str, SupersetErrorType, dict[str, Any]]] = { COLUMN_DOES_NOT_EXIST_REGEX: ( @@ -86,7 +126,131 @@ def convert_dttm( @classmethod def get_table_names( - cls, database: "Database", inspector: Inspector, schema: Optional[str] + cls, database: Database, inspector: Inspector, schema: Optional[str] ) -> set[str]: """Need to disregard the schema for Sqlite""" return set(inspector.get_table_names()) + + @classmethod + def get_function_names( + cls, + database: Database, + ) -> list[str]: + """ + Return function names. + """ + return [ + "abs", + "acos", + "acosh", + "asin", + "asinh", + "atan", + "atan2", + "atanh", + "avg", + "ceil", + "ceiling", + "changes", + "char", + "coalesce", + "cos", + "cosh", + "count", + "cume_dist", + "date", + "datetime", + "degrees", + "dense_rank", + "exp", + "first_value", + "floor", + "format", + "glob", + "group_concat", + "hex", + "ifnull", + "iif", + "instr", + "json", + "json_array", + "json_array_length", + "json_each", + "json_error_position", + "json_extract", + "json_group_array", + "json_group_object", + "json_insert", + "json_object", + "json_patch", + "json_quote", + "json_remove", + "json_replace", + "json_set", + "json_tree", + "json_type", + "json_valid", + "julianday", + "lag", + "last_insert_rowid", + "last_value", + "lead", + "length", + "like", + "likelihood", + "likely", + "ln", + "load_extension", + "log", + "log10", + "log2", + "lower", + "ltrim", + "max", + "min", + "mod", + "nth_value", + "ntile", + "nullif", + "percent_rank", + "pi", + "pow", + "power", + "printf", + "quote", + "radians", + "random", + "randomblob", + "rank", + "replace", + "round", + "row_number", + "rtrim", + "sign", + "sin", + "sinh", + "soundex", + "sqlite_compileoption_get", + "sqlite_compileoption_used", + "sqlite_offset", + "sqlite_source_id", + "sqlite_version", + "sqrt", + "strftime", + "substr", + "substring", + "sum", + "tan", + "tanh", + "time", + "total_changes", + "trim", + "trunc", + "typeof", + "unhex", + "unicode", + "unixepoch", + "unlikely", + "upper", + "zeroblob", + ] From a8f56b306b06fb34368f8b2090a0f9a80145bfef Mon Sep 17 00:00:00 2001 From: Beto Dealmeida Date: Mon, 7 Aug 2023 14:02:07 -0700 Subject: [PATCH 2/4] Add more tests --- .../unit_tests/db_engine_specs/test_sqlite.py | 88 +++++++++++++------ 1 file changed, 60 insertions(+), 28 deletions(-) diff --git a/tests/unit_tests/db_engine_specs/test_sqlite.py b/tests/unit_tests/db_engine_specs/test_sqlite.py index 11ce174c0f4ed..a31992afcf867 100644 --- a/tests/unit_tests/db_engine_specs/test_sqlite.py +++ b/tests/unit_tests/db_engine_specs/test_sqlite.py @@ -21,6 +21,7 @@ import pytest from sqlalchemy.engine import create_engine +from superset.constants import TimeGrain from tests.unit_tests.db_engine_specs.utils import assert_convert_dttm from tests.unit_tests.fixtures.common import dttm @@ -47,13 +48,21 @@ def test_convert_dttm( @pytest.mark.parametrize( "dttm,grain,expected", [ - ("2022-05-04T05:06:07.89Z", "PT1S", "2022-05-04 05:06:07"), - ("2022-05-04T05:06:07.89Z", "PT1M", "2022-05-04 05:06:00"), - ("2022-05-04T05:06:07.89Z", "PT1H", "2022-05-04 05:00:00"), - ("2022-05-04T05:06:07.89Z", "P1D", "2022-05-04 00:00:00"), - ("2022-05-04T05:06:07.89Z", "P1W", "2022-05-01 00:00:00"), - ("2022-05-04T05:06:07.89Z", "P1M", "2022-05-01 00:00:00"), - ("2022-05-04T05:06:07.89Z", "P1Y", "2022-01-01 00:00:00"), + ("2022-05-04T05:06:07.89Z", TimeGrain.SECOND, "2022-05-04 05:06:07"), + ("2022-05-04T05:06:07.89Z", TimeGrain.FIVE_SECONDS, "2022-05-04 05:06:05"), + ("2022-05-04T05:06:37.89Z", TimeGrain.THIRTY_SECONDS, "2022-05-04 05:06:30"), + ("2022-05-04T05:06:07.89Z", TimeGrain.MINUTE, "2022-05-04 05:06:00"), + ("2022-05-04T05:06:07.89Z", TimeGrain.FIVE_MINUTES, "2022-05-04 05:05:00"), + ("2022-05-04T05:36:07.89Z", TimeGrain.TEN_MINUTES, "2022-05-04 05:30:00"), + ("2022-05-04T05:46:07.89Z", TimeGrain.FIFTEEN_MINUTES, "2022-05-04 05:45:00"), + ("2022-05-04T05:36:07.89Z", TimeGrain.THIRTY_MINUTES, "2022-05-04 05:30:00"), + ("2022-05-04T05:36:07.89Z", TimeGrain.HALF_HOUR, "2022-05-04 05:30:00"), + ("2022-05-04T05:06:07.89Z", TimeGrain.HOUR, "2022-05-04 05:00:00"), + ("2022-05-04T07:06:07.89Z", TimeGrain.SIX_HOURS, "2022-05-04 06:00:00"), + ("2022-05-04T05:06:07.89Z", TimeGrain.DAY, "2022-05-04 00:00:00"), + ("2022-05-04T05:06:07.89Z", TimeGrain.WEEK, "2022-05-01 00:00:00"), + ("2022-05-04T05:06:07.89Z", TimeGrain.MONTH, "2022-05-01 00:00:00"), + ("2022-05-04T05:06:07.89Z", TimeGrain.YEAR, "2022-01-01 00:00:00"), # ___________________________ # | May 2022 | # |---------------------------| @@ -61,27 +70,50 @@ def test_convert_dttm( # |---+---+---+---+---+---+---| # | 1 | 2 | 3 | 4 | 5 | 6 | 7 | # --------------------------- - # week ending Saturday - ("2022-05-04T05:06:07.89Z", "P1W/1970-01-03T00:00:00Z", "2022-05-07 00:00:00"), - # week ending Sunday - ("2022-05-04T05:06:07.89Z", "P1W/1970-01-04T00:00:00Z", "2022-05-08 00:00:00"), - # week starting Sunday - ("2022-05-04T05:06:07.89Z", "1969-12-28T00:00:00Z/P1W", "2022-05-01 00:00:00"), - # week starting Monday - ("2022-05-04T05:06:07.89Z", "1969-12-29T00:00:00Z/P1W", "2022-05-02 00:00:00"), - # tests for quarter - ("2022-01-04T05:06:07.89Z", "P3M", "2022-01-01 00:00:00"), - ("2022-02-04T05:06:07.89Z", "P3M", "2022-01-01 00:00:00"), - ("2022-03-04T05:06:07.89Z", "P3M", "2022-01-01 00:00:00"), - ("2022-04-04T05:06:07.89Z", "P3M", "2022-04-01 00:00:00"), - ("2022-05-04T05:06:07.89Z", "P3M", "2022-04-01 00:00:00"), - ("2022-06-04T05:06:07.89Z", "P3M", "2022-04-01 00:00:00"), - ("2022-07-04T05:06:07.89Z", "P3M", "2022-07-01 00:00:00"), - ("2022-08-04T05:06:07.89Z", "P3M", "2022-07-01 00:00:00"), - ("2022-09-04T05:06:07.89Z", "P3M", "2022-07-01 00:00:00"), - ("2022-10-04T05:06:07.89Z", "P3M", "2022-10-01 00:00:00"), - ("2022-11-04T05:06:07.89Z", "P3M", "2022-10-01 00:00:00"), - ("2022-12-04T05:06:07.89Z", "P3M", "2022-10-01 00:00:00"), + ( + "2022-05-04T05:06:07.89Z", + TimeGrain.WEEK_ENDING_SATURDAY, + "2022-05-07 00:00:00", + ), + ( + "2022-05-04T05:06:07.89Z", + TimeGrain.WEEK_ENDING_SUNDAY, + "2022-05-08 00:00:00", + ), + ( + "2022-05-04T05:06:07.89Z", + TimeGrain.WEEK_STARTING_SUNDAY, + "2022-05-01 00:00:00", + ), + ( + "2022-05-04T05:06:07.89Z", + TimeGrain.WEEK_STARTING_MONDAY, + "2022-05-02 00:00:00", + ), + ("2022-01-04T05:06:07.89Z", TimeGrain.QUARTER_YEAR, "2022-01-01 00:00:00"), + ("2022-02-04T05:06:07.89Z", TimeGrain.QUARTER_YEAR, "2022-01-01 00:00:00"), + ("2022-03-04T05:06:07.89Z", TimeGrain.QUARTER_YEAR, "2022-01-01 00:00:00"), + ("2022-04-04T05:06:07.89Z", TimeGrain.QUARTER_YEAR, "2022-04-01 00:00:00"), + ("2022-05-04T05:06:07.89Z", TimeGrain.QUARTER_YEAR, "2022-04-01 00:00:00"), + ("2022-06-04T05:06:07.89Z", TimeGrain.QUARTER_YEAR, "2022-04-01 00:00:00"), + ("2022-07-04T05:06:07.89Z", TimeGrain.QUARTER_YEAR, "2022-07-01 00:00:00"), + ("2022-08-04T05:06:07.89Z", TimeGrain.QUARTER_YEAR, "2022-07-01 00:00:00"), + ("2022-09-04T05:06:07.89Z", TimeGrain.QUARTER_YEAR, "2022-07-01 00:00:00"), + ("2022-10-04T05:06:07.89Z", TimeGrain.QUARTER_YEAR, "2022-10-01 00:00:00"), + ("2022-11-04T05:06:07.89Z", TimeGrain.QUARTER_YEAR, "2022-10-01 00:00:00"), + ("2022-12-04T05:06:07.89Z", TimeGrain.QUARTER_YEAR, "2022-10-01 00:00:00"), + ("2022-01-04T05:06:07.89Z", TimeGrain.QUARTER, "2022-01-01 00:00:00"), + ("2022-02-04T05:06:07.89Z", TimeGrain.QUARTER, "2022-01-01 00:00:00"), + ("2022-03-04T05:06:07.89Z", TimeGrain.QUARTER, "2022-01-01 00:00:00"), + ("2022-04-04T05:06:07.89Z", TimeGrain.QUARTER, "2022-04-01 00:00:00"), + ("2022-05-04T05:06:07.89Z", TimeGrain.QUARTER, "2022-04-01 00:00:00"), + ("2022-06-04T05:06:07.89Z", TimeGrain.QUARTER, "2022-04-01 00:00:00"), + ("2022-07-04T05:06:07.89Z", TimeGrain.QUARTER, "2022-07-01 00:00:00"), + ("2022-08-04T05:06:07.89Z", TimeGrain.QUARTER, "2022-07-01 00:00:00"), + ("2022-09-04T05:06:07.89Z", TimeGrain.QUARTER, "2022-07-01 00:00:00"), + ("2022-10-04T05:06:07.89Z", TimeGrain.QUARTER, "2022-10-01 00:00:00"), + ("2022-11-04T05:06:07.89Z", TimeGrain.QUARTER, "2022-10-01 00:00:00"), + ("2022-12-04T05:06:07.89Z", TimeGrain.QUARTER, "2022-10-01 00:00:00"), ], ) def test_time_grain_expressions(dttm: str, grain: str, expected: str) -> None: From 0a80e2bf6f34a313a7a27e4e35a511f795685641 Mon Sep 17 00:00:00 2001 From: Beto Dealmeida Date: Mon, 7 Aug 2023 14:04:40 -0700 Subject: [PATCH 3/4] Fix typo --- superset/db_engine_specs/sqlite.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/superset/db_engine_specs/sqlite.py b/superset/db_engine_specs/sqlite.py index e7f053da21284..c06660abbd658 100644 --- a/superset/db_engine_specs/sqlite.py +++ b/superset/db_engine_specs/sqlite.py @@ -20,7 +20,7 @@ import re from datetime import datetime from re import Pattern -from typing import Any, Optional, TYPE_CHECKING +from typing import Any, TYPE_CHECKING from flask_babel import gettext as __ from sqlalchemy import types @@ -95,7 +95,7 @@ class SqliteEngineSpec(BaseEngineSpec): "DATETIME({col}, 'start of day', 'weekday 1', '-7 days')" ), } - # not sure why these are differnet + # not sure why these are diffenret _time_grain_expressions.update( { TimeGrain.HALF_HOUR: _time_grain_expressions[TimeGrain.THIRTY_MINUTES], @@ -117,8 +117,8 @@ def epoch_to_dttm(cls) -> str: @classmethod def convert_dttm( - cls, target_type: str, dttm: datetime, db_extra: Optional[dict[str, Any]] = None - ) -> Optional[str]: + cls, target_type: str, dttm: datetime, db_extra: dict[str, Any] | None = None + ) -> str | None: sqla_type = cls.get_sqla_column_type(target_type) if isinstance(sqla_type, (types.String, types.DateTime)): return f"""'{dttm.isoformat(sep=" ", timespec="seconds")}'""" @@ -126,7 +126,10 @@ def convert_dttm( @classmethod def get_table_names( - cls, database: Database, inspector: Inspector, schema: Optional[str] + cls, + database: Database, + inspector: Inspector, + schema: str | None, ) -> set[str]: """Need to disregard the schema for Sqlite""" return set(inspector.get_table_names()) From 3b8d078bead3687512550322a4fe004a5e30e7ce Mon Sep 17 00:00:00 2001 From: Beto Dealmeida Date: Mon, 7 Aug 2023 14:33:35 -0700 Subject: [PATCH 4/4] Fix test --- .../integration_tests/databases/api_tests.py | 132 +++++++++++++++++- 1 file changed, 131 insertions(+), 1 deletion(-) diff --git a/tests/integration_tests/databases/api_tests.py b/tests/integration_tests/databases/api_tests.py index bd39d965743fd..8bf4867d019d9 100644 --- a/tests/integration_tests/databases/api_tests.py +++ b/tests/integration_tests/databases/api_tests.py @@ -2836,7 +2836,7 @@ def test_import_database_masked_ssh_tunnel_feature_only_pk_passwd( ) def test_function_names(self, mock_get_function_names): example_db = get_example_database() - if example_db.backend in {"hive", "presto"}: + if example_db.backend in {"hive", "presto", "sqlite"}: return mock_get_function_names.return_value = ["AVG", "MAX", "SUM"] @@ -2850,6 +2850,136 @@ def test_function_names(self, mock_get_function_names): assert rv.status_code == 200 assert response == {"function_names": ["AVG", "MAX", "SUM"]} + def test_function_names_sqlite(self): + example_db = get_example_database() + if example_db.backend != "sqlite": + return + + self.login(username="admin") + uri = "api/v1/database/1/function_names/" + + rv = self.client.get(uri) + response = json.loads(rv.data.decode("utf-8")) + + assert rv.status_code == 200 + assert response == { + "function_names": [ + "abs", + "acos", + "acosh", + "asin", + "asinh", + "atan", + "atan2", + "atanh", + "avg", + "ceil", + "ceiling", + "changes", + "char", + "coalesce", + "cos", + "cosh", + "count", + "cume_dist", + "date", + "datetime", + "degrees", + "dense_rank", + "exp", + "first_value", + "floor", + "format", + "glob", + "group_concat", + "hex", + "ifnull", + "iif", + "instr", + "json", + "json_array", + "json_array_length", + "json_each", + "json_error_position", + "json_extract", + "json_group_array", + "json_group_object", + "json_insert", + "json_object", + "json_patch", + "json_quote", + "json_remove", + "json_replace", + "json_set", + "json_tree", + "json_type", + "json_valid", + "julianday", + "lag", + "last_insert_rowid", + "last_value", + "lead", + "length", + "like", + "likelihood", + "likely", + "ln", + "load_extension", + "log", + "log10", + "log2", + "lower", + "ltrim", + "max", + "min", + "mod", + "nth_value", + "ntile", + "nullif", + "percent_rank", + "pi", + "pow", + "power", + "printf", + "quote", + "radians", + "random", + "randomblob", + "rank", + "replace", + "round", + "row_number", + "rtrim", + "sign", + "sin", + "sinh", + "soundex", + "sqlite_compileoption_get", + "sqlite_compileoption_used", + "sqlite_offset", + "sqlite_source_id", + "sqlite_version", + "sqrt", + "strftime", + "substr", + "substring", + "sum", + "tan", + "tanh", + "time", + "total_changes", + "trim", + "trunc", + "typeof", + "unhex", + "unicode", + "unixepoch", + "unlikely", + "upper", + "zeroblob", + ] + } + @mock.patch("superset.databases.api.get_available_engine_specs") @mock.patch("superset.databases.api.app") def test_available(self, app, get_available_engine_specs):