From a8b023c0358cfad0aa3f249d7a3164026f91e116 Mon Sep 17 00:00:00 2001 From: jwilliams-ocient <60358443+jwilliams-ocient@users.noreply.github.com> Date: Fri, 13 Jan 2023 08:44:43 -1000 Subject: [PATCH 01/26] feat: add OcientEngineSpec --- superset/db_engine_specs/ocient.py | 116 +++++++++++++++++++++++++++++ 1 file changed, 116 insertions(+) create mode 100644 superset/db_engine_specs/ocient.py diff --git a/superset/db_engine_specs/ocient.py b/superset/db_engine_specs/ocient.py new file mode 100644 index 0000000000000..473eb6b14c62e --- /dev/null +++ b/superset/db_engine_specs/ocient.py @@ -0,0 +1,116 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +from typing import Optional, List + +from sqlalchemy.engine.reflection import Inspector +from superset.db_engine_specs.base import BaseEngineSpec + +import pyocient +from pyocient import _STPoint, _STLinestring, _STPolygon +from ipaddress import IPv4Address, IPv6Address +from superset import app + +superset_log_level = app.config['LOG_LEVEL'] +pyocient.logger.setLevel(superset_log_level) + + +# Custom datatype conversion functions + +def _to_hex(data: bytes) -> str: + return data.hex() + +def _polygon_to_json(polygon: _STPolygon) -> str: + json_value = f'{str([[p.long, p.lat] for p in polygon.exterior])}' + if polygon.holes: + for hole in polygon.holes: + json_value += f', {str([[p.long, p.lat] for p in hole])}' + json_value = f'[{json_value}]' + return json_value + +def _linestring_to_json(linestring: _STLinestring) -> str: + return f'{str([[p.long, p.lat] for p in linestring.points])}' + +def _point_to_comma_delimited(point: _STPoint) -> str: + return f'{point.long}, {point.lat}' + + +# Map of datatypes that are not allowed in superset, but are allowed in our DB +# Key is datatype, value is function to map, in order to convert to a valid superset type + +_superset_conversion_dict = { + _STPoint: _point_to_comma_delimited, + _STLinestring: _linestring_to_json, + _STPolygon: _polygon_to_json, + IPv4Address: str, + IPv6Address: str, + bytes: _to_hex +} + +def _verify_datatype(elem): + """Verifies that the type of elem is allowable in superset + Whether a datatype is allowable is determined by the builtin json.dumps method, and superset's json_int_dttm_ser found in superset/utils/core.py + NOTE: This method does not allow for inheritance, elem that inherit from an allowable type will not be seen as allowable + + If the type is allowable, it is returned without modification + If the type is not allowable, the repr string of the elem is returned + NOTE: This may need to be changed, particularity with consideration to GIS data, which may be better presented as a list + + """ + if type(elem) in _superset_conversion_dict: + return _superset_conversion_dict[type(elem)](elem) + return elem + +class OcientEngineSpec(BaseEngineSpec): + """Engine spec for the Ocient Database + + Modify label to syntax supported by Ocient: + - 'count' changed to '"count"' + - label containing ')' and '(' wrapped with double-quotes + - Add an 'A' to a Label beginning with '_' + + """ + + engine = 'ocient' + engine_name = "Ocient" + #limit_method = LimitMethod.WRAP_SQL + force_column_alias_quotes = True + max_column_name_length = 30 + + _time_grain_expressions = { + None: "{col}", + "PT1S": "ROUND({col}, 'SECOND')", + "PT1M": "ROUND({col}, 'MINUTE')", + "PT1H": "ROUND({col}, 'HOUR')", + "P1D": "ROUND({col}, 'DAY')", + "P1W": "ROUND({col}, 'WEEK')", + "P1M": "ROUND({col}, 'MONTH')", + "P0.25Y": "ROUND({col}, 'QUARTER')", + "P1Y": "ROUND({col}, 'YEAR')", + } + + @classmethod + def get_table_names( + cls, database: "Database", inspector: Inspector, schema: Optional[str] + ) -> List[str]: + return sorted(inspector.get_table_names(schema)) + + @classmethod + def fetch_data(cls, cursor, lim=None): + data = super(OcientEngineSpec, cls).fetch_data(cursor) + if len(data) != 0 and type(data[0]).__name__ == 'Row': + data = [list(map(_verify_datatype, r)) for r in data] + return data From 879f52298b68a453b5497dad87bfc57ecf8675de Mon Sep 17 00:00:00 2001 From: Jordan Williams Date: Thu, 19 Jan 2023 23:39:45 +0000 Subject: [PATCH 02/26] chore: code cleanup * doc: add docstrings to functions * chore: clean up comments * perf: avoid copying row data that doesn't need to be sanitized --- superset/db_engine_specs/ocient.py | 126 ++++++++++++++++++++--------- 1 file changed, 87 insertions(+), 39 deletions(-) diff --git a/superset/db_engine_specs/ocient.py b/superset/db_engine_specs/ocient.py index 473eb6b14c62e..06bb6024ef390 100644 --- a/superset/db_engine_specs/ocient.py +++ b/superset/db_engine_specs/ocient.py @@ -20,20 +20,31 @@ from superset.db_engine_specs.base import BaseEngineSpec import pyocient -from pyocient import _STPoint, _STLinestring, _STPolygon -from ipaddress import IPv4Address, IPv6Address +from pyocient import _STPoint, _STLinestring, _STPolygon, TypeCodes from superset import app +from typing import Any, Callable, Dict, List, NamedTuple, Tuple +# Ensure pyocient inherits Superset's logging level superset_log_level = app.config['LOG_LEVEL'] pyocient.logger.setLevel(superset_log_level) -# Custom datatype conversion functions - def _to_hex(data: bytes) -> str: + """ + Converts the bytes object into a string of hexadecimal digits. + + :param data: the bytes object + :returns: string of hexadecimal digits representing the bytes + """ return data.hex() def _polygon_to_json(polygon: _STPolygon) -> str: + """ + Converts the _STPolygon object into its JSON representation. + + :param data: the polygon object + :returns: JSON representation of the polygon + """ json_value = f'{str([[p.long, p.lat] for p in polygon.exterior])}' if polygon.holes: for hole in polygon.holes: @@ -42,48 +53,66 @@ def _polygon_to_json(polygon: _STPolygon) -> str: return json_value def _linestring_to_json(linestring: _STLinestring) -> str: + """ + Converts the _STLinestring object into its JSON representation. + + :param data: the linestring object + :returns: JSON representation of the linestring + """ return f'{str([[p.long, p.lat] for p in linestring.points])}' def _point_to_comma_delimited(point: _STPoint) -> str: - return f'{point.long}, {point.lat}' - + """ + Returns the x and y coordinates as a comma delimited string. -# Map of datatypes that are not allowed in superset, but are allowed in our DB -# Key is datatype, value is function to map, in order to convert to a valid superset type + :param data: the point object + :returns: the x and y coordinates as a comma delimited string + """ + return f'{point.long}, {point.lat}' -_superset_conversion_dict = { - _STPoint: _point_to_comma_delimited, - _STLinestring: _linestring_to_json, - _STPolygon: _polygon_to_json, - IPv4Address: str, - IPv6Address: str, - bytes: _to_hex +# Sanitization function for column values +SanitizeFunc = Callable[[Any], Any] + +# Represents a pair of a column index and the sanitization function +# to apply to its values. +PlacedSanitizeFunc = NamedTuple( + "PlacedSanitizeFunc", + [ + ("column_index", int), + ("sanitize_func", SanitizeFunc), + ], +) + +# This map contains functions used to sanitize values for column types +# that cannot be processed natively by Superset. +# +# Superset serializes temporal objects using a custom serializer +# defined in superset/utils/core.py (#json_int_dttm_ser(...)). Other +# are serialized by the default JSON encoder. +_sanitized_ocient_type_codes: Dict[int, SanitizeFunc] = { + TypeCodes.BINARY: _to_hex, + TypeCodes.ST_POINT: _point_to_comma_delimited, + TypeCodes.IP: str, + TypeCodes.IPV4: str, + TypeCodes.ST_LINESTRING: _linestring_to_json, + TypeCodes.ST_POLYGON: _polygon_to_json, } -def _verify_datatype(elem): - """Verifies that the type of elem is allowable in superset - Whether a datatype is allowable is determined by the builtin json.dumps method, and superset's json_int_dttm_ser found in superset/utils/core.py - NOTE: This method does not allow for inheritance, elem that inherit from an allowable type will not be seen as allowable - - If the type is allowable, it is returned without modification - If the type is not allowable, the repr string of the elem is returned - NOTE: This may need to be changed, particularity with consideration to GIS data, which may be better presented as a list - +def _find_columns_to_sanitize(cursor: Any) -> List[PlacedSanitizeFunc]: """ - if type(elem) in _superset_conversion_dict: - return _superset_conversion_dict[type(elem)](elem) - return elem + Cleans the column value for consumption by Superset. -class OcientEngineSpec(BaseEngineSpec): - """Engine spec for the Ocient Database - - Modify label to syntax supported by Ocient: - - 'count' changed to '"count"' - - label containing ')' and '(' wrapped with double-quotes - - Add an 'A' to a Label beginning with '_' - + :param cursor: the result set cursor + :returns: the list of tuples consisting of the column index and sanitization function """ + return [ + PlacedSanitizeFunc(i, _sanitized_ocient_type_codes[cursor.description[i][1]]) + for i in range(len(cursor.description)) + if cursor.description[i][1] in _sanitized_ocient_type_codes + ] + +class OcientEngineSpec(BaseEngineSpec): engine = 'ocient' engine_name = "Ocient" #limit_method = LimitMethod.WRAP_SQL @@ -110,7 +139,26 @@ def get_table_names( @classmethod def fetch_data(cls, cursor, lim=None): - data = super(OcientEngineSpec, cls).fetch_data(cursor) - if len(data) != 0 and type(data[0]).__name__ == 'Row': - data = [list(map(_verify_datatype, r)) for r in data] - return data + rows = super(OcientEngineSpec, cls).fetch_data(cursor) + + if (len(rows)) == 0: + # No rows were produced + return rows + + if type(rows[0]).__name__ != 'Row': + # TODO what else is returned here? + return rows + + # Peek at the schema to determine which column values, if any, + # require sanitization. + columns_to_sanitize: List[PlacedSanitizeFunc] = _find_columns_to_sanitize(cursor) + + if columns_to_sanitize: + # At least 1 column has to be sanitized. + for row in rows: + for info in columns_to_sanitize: + # Modify the element in-place. + v = row[info.column_index] + row[info.column_index] = info.sanitize_func(v) + + return rows From 9e3ef8534fa258c1314474d3e329cee82f9d5e65 Mon Sep 17 00:00:00 2001 From: Alex Clavel Date: Mon, 23 Jan 2023 22:21:00 +0000 Subject: [PATCH 03/26] Adding custom errors to Ocient engine spec --- superset/db_engine_specs/ocient.py | 84 +++++++++++++++++++++++++++++- 1 file changed, 83 insertions(+), 1 deletion(-) diff --git a/superset/db_engine_specs/ocient.py b/superset/db_engine_specs/ocient.py index 06bb6024ef390..d31c97cce09bb 100644 --- a/superset/db_engine_specs/ocient.py +++ b/superset/db_engine_specs/ocient.py @@ -14,10 +14,13 @@ # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. -from typing import Optional, List +from typing import Any, Dict, List, Optional, Pattern, Tuple, TYPE_CHECKING +import re from sqlalchemy.engine.reflection import Inspector from superset.db_engine_specs.base import BaseEngineSpec +from superset.errors import SupersetErrorType +from flask_babel import gettext as __ import pyocient from pyocient import _STPoint, _STLinestring, _STPolygon, TypeCodes @@ -29,6 +32,38 @@ pyocient.logger.setLevel(superset_log_level) +# Regular expressions to catch custom errors + +CONNECTION_INVALID_USERNAME_REGEX = re.compile( + "The referenced user does not exist \(User \'(?P.*?)\' not found\)" +) +CONNECTION_INVALID_PASSWORD_REGEX = re.compile( + 'The userid/password combination was not valid \(Incorrect password for user\)' +) +CONNECTION_INVALID_HOSTNAME_REGEX = re.compile( + r"Unable to connect to (?P.*?):(?P.*?):" +) +CONNECTION_UNKNOWN_DATABASE_REGEX = re.compile( + "No database named '(?P.*?)' exists" +) +CONNECTION_INVALID_PORT_ERROR = re.compile( + "Port out of range 0-65535" +) +INVALID_CONNECTION_STRING_REGEX = re.compile( + "An invalid connection string attribute was specified \(failed to decrypt cipher text\)" +) +SYNTAX_ERROR_REGEX = re.compile( + r"There is a syntax error in your statement \(extraneous input '(?P.*?)' expecting {.*}" +) +TABLE_DOES_NOT_EXIST_REGEX = re.compile( + "The referenced table or view '(?P.*?)' does not exist" +) +COLUMN_DOES_NOT_EXIST_REGEX = re.compile( + "The reference to column '(?P.*?)' is not valid" +) + +# Custom datatype conversion functions + def _to_hex(data: bytes) -> str: """ Converts the bytes object into a string of hexadecimal digits. @@ -119,6 +154,53 @@ class OcientEngineSpec(BaseEngineSpec): force_column_alias_quotes = True max_column_name_length = 30 + custom_errors : Dict[Pattern[str], Tuple[str, SupersetErrorType, Dict[str, Any]]] = { + CONNECTION_INVALID_USERNAME_REGEX: ( + __('The username "%(username)s" does not exist.'), + SupersetErrorType.CONNECTION_INVALID_USERNAME_ERROR, + {}, + ), + CONNECTION_INVALID_PASSWORD_REGEX: ( + __('The user/password combination is not valid (Incorrect password for user).'), + SupersetErrorType.CONNECTION_INVALID_PASSWORD_ERROR, + {}, + ), + CONNECTION_UNKNOWN_DATABASE_REGEX: ( + __('Could not connect to database: "%(database)s"'), + SupersetErrorType.CONNECTION_UNKNOWN_DATABASE_ERROR, + {} + ), + CONNECTION_INVALID_HOSTNAME_REGEX: ( + __('Could not resolve hostname: "%(host)s".'), + SupersetErrorType.CONNECTION_INVALID_HOSTNAME_ERROR, + {} + ), + CONNECTION_INVALID_PORT_ERROR: ( + __('Port out of range 0-65535'), + SupersetErrorType.CONNECTION_INVALID_PORT_ERROR, + {} + ), + INVALID_CONNECTION_STRING_REGEX: ( + __('Invalid Connection String: Expecting String of the form \'ocient://user:pass@host:port/database\'.'), + SupersetErrorType.GENERIC_DB_ENGINE_ERROR, + {} + ), + SYNTAX_ERROR_REGEX: ( + __('Extraneous input: "%(ext_input)s".'), + SupersetErrorType.SYNTAX_ERROR, + {} + ), + TABLE_DOES_NOT_EXIST_REGEX: ( + __('Table or View "%(table)s" does not exist.'), + SupersetErrorType.TABLE_DOES_NOT_EXIST_ERROR, + {} + ), + COLUMN_DOES_NOT_EXIST_REGEX: ( + __('Invalid reference to column: "%(column)s"'), + SupersetErrorType.COLUMN_DOES_NOT_EXIST_ERROR, + {} + ), +} _time_grain_expressions = { None: "{col}", "PT1S": "ROUND({col}, 'SECOND')", From 818474d9cfdc7b383f949c3005f397ebd8eb9351 Mon Sep 17 00:00:00 2001 From: Alex Clavel Date: Mon, 23 Jan 2023 20:10:39 +0000 Subject: [PATCH 04/26] Fixing syntax regex to match all ocient syntax errors --- superset/db_engine_specs/ocient.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/superset/db_engine_specs/ocient.py b/superset/db_engine_specs/ocient.py index d31c97cce09bb..9f5af502011e6 100644 --- a/superset/db_engine_specs/ocient.py +++ b/superset/db_engine_specs/ocient.py @@ -53,7 +53,7 @@ "An invalid connection string attribute was specified \(failed to decrypt cipher text\)" ) SYNTAX_ERROR_REGEX = re.compile( - r"There is a syntax error in your statement \(extraneous input '(?P.*?)' expecting {.*}" + r"There is a syntax error in your statement \((?P.*?) input '(?P.*?)' expecting {.*}" ) TABLE_DOES_NOT_EXIST_REGEX = re.compile( "The referenced table or view '(?P
.*?)' does not exist" @@ -186,7 +186,7 @@ class OcientEngineSpec(BaseEngineSpec): {} ), SYNTAX_ERROR_REGEX: ( - __('Extraneous input: "%(ext_input)s".'), + __('Syntax Error: %(qualifier)s input "%(input)s".'), SupersetErrorType.SYNTAX_ERROR, {} ), From e8c97ba09444aad0c41f5c3846175883efc956fc Mon Sep 17 00:00:00 2001 From: Alex Clavel Date: Mon, 23 Jan 2023 20:17:50 +0000 Subject: [PATCH 05/26] Cleaning up imports --- superset/db_engine_specs/ocient.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/superset/db_engine_specs/ocient.py b/superset/db_engine_specs/ocient.py index 9f5af502011e6..824ba10481131 100644 --- a/superset/db_engine_specs/ocient.py +++ b/superset/db_engine_specs/ocient.py @@ -14,7 +14,7 @@ # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. -from typing import Any, Dict, List, Optional, Pattern, Tuple, TYPE_CHECKING + import re from sqlalchemy.engine.reflection import Inspector @@ -25,8 +25,8 @@ import pyocient from pyocient import _STPoint, _STLinestring, _STPolygon, TypeCodes from superset import app -from typing import Any, Callable, Dict, List, NamedTuple, Tuple - +from superset.models.core import Database +from typing import Any, Callable, Dict, List, NamedTuple, Tuple, Optional, Pattern # Ensure pyocient inherits Superset's logging level superset_log_level = app.config['LOG_LEVEL'] pyocient.logger.setLevel(superset_log_level) From d4655887cf203db058595adeb33746b12c2dbec1 Mon Sep 17 00:00:00 2001 From: jwilliams-ocient <60358443+jwilliams-ocient@users.noreply.github.com> Date: Mon, 23 Jan 2023 15:16:50 -0800 Subject: [PATCH 06/26] chore: add Ocient dependencies to setup.py (#3) --- setup.py | 1 + 1 file changed, 1 insertion(+) diff --git a/setup.py b/setup.py index dc546e5a6030c..626132dc897b2 100644 --- a/setup.py +++ b/setup.py @@ -160,6 +160,7 @@ def get_git_sha() -> str: "kylin": ["kylinpy>=2.8.1, <2.9"], "mssql": ["pymssql>=2.1.4, <2.2"], "mysql": ["mysqlclient>=2.1.0, <3"], + "ocient": ["sqlalchemy_ocient>=1.0.0"] "oracle": ["cx-Oracle>8.0.0, <8.1"], "pinot": ["pinotdb>=0.3.3, <0.4"], "postgres": ["psycopg2-binary==2.9.5"], From bee53684a7a0b4f6e8109486f104999918c61263 Mon Sep 17 00:00:00 2001 From: jwilliams-ocient <60358443+jwilliams-ocient@users.noreply.github.com> Date: Tue, 24 Jan 2023 12:02:00 -0600 Subject: [PATCH 07/26] fix: add missing comma to inline dict element --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 626132dc897b2..e1ed349e6a151 100644 --- a/setup.py +++ b/setup.py @@ -160,7 +160,7 @@ def get_git_sha() -> str: "kylin": ["kylinpy>=2.8.1, <2.9"], "mssql": ["pymssql>=2.1.4, <2.2"], "mysql": ["mysqlclient>=2.1.0, <3"], - "ocient": ["sqlalchemy_ocient>=1.0.0"] + "ocient": ["sqlalchemy_ocient>=1.0.0"], "oracle": ["cx-Oracle>8.0.0, <8.1"], "pinot": ["pinotdb>=0.3.3, <0.4"], "postgres": ["psycopg2-binary==2.9.5"], From ffb76bce4ef71bf58a4fe44e8e72049f80c91bdb Mon Sep 17 00:00:00 2001 From: jwilliams-ocient <60358443+jwilliams-ocient@users.noreply.github.com> Date: Wed, 25 Jan 2023 10:05:27 -0600 Subject: [PATCH 08/26] test: add unit tests for Ocient engine spec (#6) --- .../unit_tests/db_engine_specs/test_ocient.py | 233 ++++++++++++++++++ 1 file changed, 233 insertions(+) create mode 100644 tests/unit_tests/db_engine_specs/test_ocient.py diff --git a/tests/unit_tests/db_engine_specs/test_ocient.py b/tests/unit_tests/db_engine_specs/test_ocient.py new file mode 100644 index 0000000000000..4135b9a6aa935 --- /dev/null +++ b/tests/unit_tests/db_engine_specs/test_ocient.py @@ -0,0 +1,233 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# pylint: disable=import-outside-toplevel + +from datetime import datetime + +import pytest + +from superset.errors import ErrorLevel, SupersetError, SupersetErrorType +from typing import Dict, List, Tuple + + +@pytest.mark.parametrize( + "actual,expected", + [ + ("DATE", "TO_DATE('2019-01-02')"), + ("DATETIME", "CAST('2019-01-02T03:04:05.678900' AS DATETIME)"), + ("TIMESTAMP", "TO_TIMESTAMP('2019-01-02T03:04:05.678900')"), + ], +) +def test_convert_dttm(actual: str, expected: str, dttm: datetime) -> None: + from superset.db_engine_specs.ocient import OcientEngineSpec + + assert OcientEngineSpec.convert_dttm(actual, dttm) == expected + +# (msg,expected) +MARSHALED_OCIENT_ERRORS: List[Tuple[str, Dict]] = [ + ( + 'The referenced user does not exist (User \'mj\' not found)', + SupersetError( + message='The username "mj" does not exist.', + error_type=SupersetErrorType.CONNECTION_INVALID_USERNAME_ERROR, + level=ErrorLevel.ERROR, + extra={ + "engine_name": "Ocient", + "issue_codes": [ + { + "code": 1012, + "message": "Issue 1012 - The username provided when connecting to a database is not valid.", + } + ], + }, + ) + ), + ( + 'The userid/password combination was not valid (Incorrect password for user)', + SupersetError( + message='The user/password combination is not valid (Incorrect password for user).', + error_type=SupersetErrorType.CONNECTION_INVALID_PASSWORD_ERROR, + level=ErrorLevel.ERROR, + extra={ + "engine_name": "Ocient", + "issue_codes": [ + { + "code": 1013, + "message": "Issue 1013 - The password provided when connecting to a database is not valid.", + } + ], + }, + ) + ), + ( + 'No database named \'bulls\' exists', + SupersetError( + message='Could not connect to database: "bulls"', + error_type=SupersetErrorType.CONNECTION_UNKNOWN_DATABASE_ERROR, + level=ErrorLevel.ERROR, + extra={ + "engine_name": "Ocient", + "issue_codes": [ + { + "code": 1015, + "message": "Issue 1015 - Either the database is spelled incorrectly or does not exist.", + } + ], + }, + ) + ), + ( + 'Unable to connect to unitedcenter.com:4050', + SupersetError( + message='Could not resolve hostname: "unitedcenter.com".', + error_type=SupersetErrorType.CONNECTION_INVALID_HOSTNAME_ERROR, + level=ErrorLevel.ERROR, + extra={ + "engine_name": "Ocient", + "issue_codes": [ + { + "code": 1007, + "message": "Issue 1007 - The hostname provided can't be resolved.", + } + ], + }, + ) + ), + ( + 'Port out of range 0-65535', + SupersetError( + message='Port out of range 0-65535', + error_type=SupersetErrorType.CONNECTION_INVALID_PORT_ERROR, + level=ErrorLevel.ERROR, + extra={ + "engine_name": "Ocient", + "issue_codes": [ + { + "code": 1034, + "message": "Issue 1034 - The port number is invalid.", + } + ], + }, + ) + ), + ( + 'An invalid connection string attribute was specified (failed to decrypt cipher text)', + SupersetError( + message='Invalid Connection String: Expecting String of the form \'ocient://user:pass@host:port/database\'.', + error_type=SupersetErrorType.GENERIC_DB_ENGINE_ERROR, + level=ErrorLevel.ERROR, + extra={ + "engine_name": "Ocient", + "issue_codes": [ + { + "code": 1002, + "message": "Issue 1002 - The database returned an unexpected error.", + } + ], + }, + ) + ), + ( + 'There is a syntax error in your statement (extraneous input \'foo bar baz\' expecting ', + SupersetError( + message='Extraneous input: "foo bar baz".', + error_type=SupersetErrorType.SYNTAX_ERROR, + level=ErrorLevel.ERROR, + extra={ + "engine_name": "Ocient", + "issue_codes": [ + { + "code": 1030, + "message": "Issue 1030 - The query has a syntax error.", + } + ], + }, + ) + ), + ( + 'There is a syntax error in your statement (mismatched input \'to\' expecting {, \'trace\', \'using\'})', + SupersetError( + message='Extraneous input: "foo bar baz".', + error_type=SupersetErrorType.SYNTAX_ERROR, + level=ErrorLevel.ERROR, + extra={ + "engine_name": "Ocient", + "issue_codes": [ + { + "code": 1030, + "message": "Issue 1030 - The query has a syntax error.", + } + ], + }, + ) + ), + ( + 'The referenced table or view \'goats\' does not exist', + SupersetError( + message='Table or View "goats" does not exist.', + error_type=SupersetErrorType.TABLE_DOES_NOT_EXIST_ERROR, + level=ErrorLevel.ERROR, + extra={ + "engine_name": "Ocient", + "issue_codes": [ + { + "code": 1003, + "message": "Issue 1003 - There is a syntax error in the SQL query. Perhaps there was a misspelling or a typo.", + }, + { + "code": 1005, + "message": "Issue 1005 - The table was deleted or renamed in the database.", + } + ], + }, + ) + ), + ( + 'The reference to column \'goats\' is not valid', + SupersetError( + message='Invalid reference to column: "goats"', + error_type=SupersetErrorType.COLUMN_DOES_NOT_EXIST_ERROR, + level=ErrorLevel.ERROR, + extra={ + "engine_name": "Ocient", + "issue_codes": [ + { + "code": 1003, + "message": "Issue 1003 - There is a syntax error in the SQL query. Perhaps there was a misspelling or a typo.", + }, + { + "code": 1004, + "message": "Issue 1004 - The column was deleted or renamed in the database.", + } + ], + }, + ) + ), +] + +@pytest.mark.parametrize( + "msg,expected", + MARSHALED_OCIENT_ERRORS +) +def test_connection_errors(msg: str, expected: SupersetError) -> None: + from superset.db_engine_specs.ocient import OcientEngineSpec + + result = OcientEngineSpec.extract_errors(Exception(msg)) + assert result == [ + expected + ] From 8059833577f8f73037f7334d06d9ae6b6a29b09c Mon Sep 17 00:00:00 2001 From: alexclavel-ocient <111374753+alexclavel-ocient@users.noreply.github.com> Date: Mon, 30 Jan 2023 15:18:54 -0600 Subject: [PATCH 09/26] DB-23237 Superset: datatypes are not being converted properly (#8) * Fix: Replace invalid inplace type conversion * Nit: Add comment clarifying why we cannot update in place * Nit: Generate functions based on cursor, not rows --- superset/db_engine_specs/ocient.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/superset/db_engine_specs/ocient.py b/superset/db_engine_specs/ocient.py index 824ba10481131..cef53ab47e477 100644 --- a/superset/db_engine_specs/ocient.py +++ b/superset/db_engine_specs/ocient.py @@ -237,10 +237,13 @@ def fetch_data(cls, cursor, lim=None): if columns_to_sanitize: # At least 1 column has to be sanitized. - for row in rows: - for info in columns_to_sanitize: - # Modify the element in-place. - v = row[info.column_index] - row[info.column_index] = info.sanitize_func(v) - + def do_nothing(x): + return x + + sanitization_functions = [do_nothing for _ in range(len(cursor.description))] + for info in columns_to_sanitize: + sanitization_functions[info.column_index] = info.sanitize_func + + # Rows from pyocient are given as NamedTuple, so we need to recreate the whole table + rows = [[sanitization_functions[i](row[i]) for i in range(len(row))] for row in rows] return rows From 6c1d956809c6b48706df0d264c676a86c4375afc Mon Sep 17 00:00:00 2001 From: alexclavel-ocient <111374753+alexclavel-ocient@users.noreply.github.com> Date: Mon, 30 Jan 2023 15:32:48 -0600 Subject: [PATCH 10/26] DB-22748 Fix canceling queries in Superset (#2) * Adding support for query cancellation * Removing double import * Adding support for query cancellation Removing double import Inverting logic for fetch data. Re-add missing function * Fiding doubled up merge inputs * Handle Query cancellation for multiple queries * Adding mutex lock to query id mapping * Removing unecesssary iteration over values of cache * Fixing locks in db engine spec * Fix error capture in fetch_data --- superset/db_engine_specs/ocient.py | 72 ++++++++++++++++++++++++------ 1 file changed, 58 insertions(+), 14 deletions(-) diff --git a/superset/db_engine_specs/ocient.py b/superset/db_engine_specs/ocient.py index cef53ab47e477..abfabe81a631f 100644 --- a/superset/db_engine_specs/ocient.py +++ b/superset/db_engine_specs/ocient.py @@ -18,6 +18,7 @@ import re from sqlalchemy.engine.reflection import Inspector +from sqlalchemy.orm import Session from superset.db_engine_specs.base import BaseEngineSpec from superset.errors import SupersetErrorType from flask_babel import gettext as __ @@ -27,6 +28,9 @@ from superset import app from superset.models.core import Database from typing import Any, Callable, Dict, List, NamedTuple, Tuple, Optional, Pattern +import threading + +from superset.models.sql_lab import Query # Ensure pyocient inherits Superset's logging level superset_log_level = app.config['LOG_LEVEL'] pyocient.logger.setLevel(superset_log_level) @@ -62,6 +66,8 @@ "The reference to column '(?P.*?)' is not valid" ) + + # Custom datatype conversion functions def _to_hex(data: bytes) -> str: @@ -154,6 +160,12 @@ class OcientEngineSpec(BaseEngineSpec): force_column_alias_quotes = True max_column_name_length = 30 + # Store mapping of superset Query id -> Ocient ID + # These are inserted into the cache when executing the query + # They are then removed, either upon cancellation or query completion + query_id_mapping: Dict[str, str]= dict() + query_id_mapping_lock = threading.Lock() + custom_errors : Dict[Pattern[str], Tuple[str, SupersetErrorType, Dict[str, Any]]] = { CONNECTION_INVALID_USERNAME_REGEX: ( __('The username "%(username)s" does not exist.'), @@ -212,28 +224,29 @@ class OcientEngineSpec(BaseEngineSpec): "P0.25Y": "ROUND({col}, 'QUARTER')", "P1Y": "ROUND({col}, 'YEAR')", } + @classmethod def get_table_names( - cls, database: "Database", inspector: Inspector, schema: Optional[str] + cls, database: Database, inspector: Inspector, schema: Optional[str] ) -> List[str]: return sorted(inspector.get_table_names(schema)) + @classmethod def fetch_data(cls, cursor, lim=None): - rows = super(OcientEngineSpec, cls).fetch_data(cursor) - - if (len(rows)) == 0: - # No rows were produced - return rows - - if type(rows[0]).__name__ != 'Row': - # TODO what else is returned here? - return rows - - # Peek at the schema to determine which column values, if any, - # require sanitization. - columns_to_sanitize: List[PlacedSanitizeFunc] = _find_columns_to_sanitize(cursor) + try: + rows = super(OcientEngineSpec, cls).fetch_data(cursor) + except Exception as exception: + with OcientEngineSpec.query_id_mapping_lock: + del OcientEngineSpec.query_id_mapping[getattr(cursor, 'superset_query_id')] + raise exception + + + if len(rows) > 0 and type(rows[0]).__name__ == rows: + # Peek at the schema to determine which column values, if any, + # require sanitization. + columns_to_sanitize: List[PlacedSanitizeFunc] = _find_columns_to_sanitize(cursor) if columns_to_sanitize: # At least 1 column has to be sanitized. @@ -247,3 +260,34 @@ def do_nothing(x): # Rows from pyocient are given as NamedTuple, so we need to recreate the whole table rows = [[sanitization_functions[i](row[i]) for i in range(len(row))] for row in rows] return rows + + + @classmethod + def get_cancel_query_id(cls, cursor: Any, query: Query) -> Optional[str]: + # Return a Non-None value + # If None is returned, Superset will not call cancel_query + return 'DUMMY_VALUE' + + + @classmethod + def handle_cursor(cls, cursor: Any, query: Query, session: Session) -> None: + with OcientEngineSpec.query_id_mapping_lock: + OcientEngineSpec.query_id_mapping[query.id] = cursor.query_id + + # Add the query id to the cursor + setattr(cursor, "superset_query_id", query.id) + return super().handle_cursor(cursor, query, session) + + + @classmethod + def cancel_query(cls, cursor: Any, query: Query, cancel_query_id: str) -> bool: + with OcientEngineSpec.query_id_mapping_lock: + if query.id in OcientEngineSpec.query_id_mapping: + cursor.execute(f'CANCEL {OcientEngineSpec.query_id_mapping[query.id]}') + # Query has been cancelled, so we can safely remove the cursor from the cache + del OcientEngineSpec.query_id_mapping[query.id] + + return True + # If the query is not in the cache, it must have either been cancelled elsewhere or completed + else: + return False \ No newline at end of file From 6979aed7a2970a282244f6e5e8fa0ad2ce3eed0d Mon Sep 17 00:00:00 2001 From: alexclavel-ocient <111374753+alexclavel-ocient@users.noreply.github.com> Date: Mon, 30 Jan 2023 16:00:14 -0600 Subject: [PATCH 11/26] Run Black Formatting on Ocient DB Engine Spec (#9) --- superset/db_engine_specs/ocient.py | 125 ++++++++++++++++------------- 1 file changed, 68 insertions(+), 57 deletions(-) diff --git a/superset/db_engine_specs/ocient.py b/superset/db_engine_specs/ocient.py index abfabe81a631f..15e9024912507 100644 --- a/superset/db_engine_specs/ocient.py +++ b/superset/db_engine_specs/ocient.py @@ -31,28 +31,27 @@ import threading from superset.models.sql_lab import Query + # Ensure pyocient inherits Superset's logging level -superset_log_level = app.config['LOG_LEVEL'] +superset_log_level = app.config["LOG_LEVEL"] pyocient.logger.setLevel(superset_log_level) # Regular expressions to catch custom errors CONNECTION_INVALID_USERNAME_REGEX = re.compile( - "The referenced user does not exist \(User \'(?P.*?)\' not found\)" + "The referenced user does not exist \(User '(?P.*?)' not found\)" ) CONNECTION_INVALID_PASSWORD_REGEX = re.compile( - 'The userid/password combination was not valid \(Incorrect password for user\)' + "The userid/password combination was not valid \(Incorrect password for user\)" ) CONNECTION_INVALID_HOSTNAME_REGEX = re.compile( - r"Unable to connect to (?P.*?):(?P.*?):" + r"Unable to connect to (?P.*?):(?P.*?):" ) CONNECTION_UNKNOWN_DATABASE_REGEX = re.compile( "No database named '(?P.*?)' exists" ) -CONNECTION_INVALID_PORT_ERROR = re.compile( - "Port out of range 0-65535" -) +CONNECTION_INVALID_PORT_ERROR = re.compile("Port out of range 0-65535") INVALID_CONNECTION_STRING_REGEX = re.compile( "An invalid connection string attribute was specified \(failed to decrypt cipher text\)" ) @@ -67,9 +66,9 @@ ) - # Custom datatype conversion functions + def _to_hex(data: bytes) -> str: """ Converts the bytes object into a string of hexadecimal digits. @@ -79,6 +78,7 @@ def _to_hex(data: bytes) -> str: """ return data.hex() + def _polygon_to_json(polygon: _STPolygon) -> str: """ Converts the _STPolygon object into its JSON representation. @@ -86,13 +86,14 @@ def _polygon_to_json(polygon: _STPolygon) -> str: :param data: the polygon object :returns: JSON representation of the polygon """ - json_value = f'{str([[p.long, p.lat] for p in polygon.exterior])}' + json_value = f"{str([[p.long, p.lat] for p in polygon.exterior])}" if polygon.holes: for hole in polygon.holes: - json_value += f', {str([[p.long, p.lat] for p in hole])}' - json_value = f'[{json_value}]' + json_value += f", {str([[p.long, p.lat] for p in hole])}" + json_value = f"[{json_value}]" return json_value + def _linestring_to_json(linestring: _STLinestring) -> str: """ Converts the _STLinestring object into its JSON representation. @@ -100,7 +101,8 @@ def _linestring_to_json(linestring: _STLinestring) -> str: :param data: the linestring object :returns: JSON representation of the linestring """ - return f'{str([[p.long, p.lat] for p in linestring.points])}' + return f"{str([[p.long, p.lat] for p in linestring.points])}" + def _point_to_comma_delimited(point: _STPoint) -> str: """ @@ -109,12 +111,13 @@ def _point_to_comma_delimited(point: _STPoint) -> str: :param data: the point object :returns: the x and y coordinates as a comma delimited string """ - return f'{point.long}, {point.lat}' + return f"{point.long}, {point.lat}" + # Sanitization function for column values SanitizeFunc = Callable[[Any], Any] -# Represents a pair of a column index and the sanitization function +# Represents a pair of a column index and the sanitization function # to apply to its values. PlacedSanitizeFunc = NamedTuple( "PlacedSanitizeFunc", @@ -124,11 +127,11 @@ def _point_to_comma_delimited(point: _STPoint) -> str: ], ) -# This map contains functions used to sanitize values for column types +# This map contains functions used to sanitize values for column types # that cannot be processed natively by Superset. -# -# Superset serializes temporal objects using a custom serializer -# defined in superset/utils/core.py (#json_int_dttm_ser(...)). Other +# +# Superset serializes temporal objects using a custom serializer +# defined in superset/utils/core.py (#json_int_dttm_ser(...)). Other # are serialized by the default JSON encoder. _sanitized_ocient_type_codes: Dict[int, SanitizeFunc] = { TypeCodes.BINARY: _to_hex, @@ -138,11 +141,12 @@ def _point_to_comma_delimited(point: _STPoint) -> str: TypeCodes.ST_LINESTRING: _linestring_to_json, TypeCodes.ST_POLYGON: _polygon_to_json, } - + + def _find_columns_to_sanitize(cursor: Any) -> List[PlacedSanitizeFunc]: """ Cleans the column value for consumption by Superset. - + :param cursor: the result set cursor :returns: the list of tuples consisting of the column index and sanitization function """ @@ -152,67 +156,71 @@ def _find_columns_to_sanitize(cursor: Any) -> List[PlacedSanitizeFunc]: if cursor.description[i][1] in _sanitized_ocient_type_codes ] - + class OcientEngineSpec(BaseEngineSpec): - engine = 'ocient' + engine = "ocient" engine_name = "Ocient" - #limit_method = LimitMethod.WRAP_SQL + # limit_method = LimitMethod.WRAP_SQL force_column_alias_quotes = True max_column_name_length = 30 # Store mapping of superset Query id -> Ocient ID # These are inserted into the cache when executing the query # They are then removed, either upon cancellation or query completion - query_id_mapping: Dict[str, str]= dict() + query_id_mapping: Dict[str, str] = dict() query_id_mapping_lock = threading.Lock() - custom_errors : Dict[Pattern[str], Tuple[str, SupersetErrorType, Dict[str, Any]]] = { + custom_errors: Dict[Pattern[str], Tuple[str, SupersetErrorType, Dict[str, Any]]] = { CONNECTION_INVALID_USERNAME_REGEX: ( __('The username "%(username)s" does not exist.'), SupersetErrorType.CONNECTION_INVALID_USERNAME_ERROR, {}, ), CONNECTION_INVALID_PASSWORD_REGEX: ( - __('The user/password combination is not valid (Incorrect password for user).'), + __( + "The user/password combination is not valid (Incorrect password for user)." + ), SupersetErrorType.CONNECTION_INVALID_PASSWORD_ERROR, {}, ), CONNECTION_UNKNOWN_DATABASE_REGEX: ( __('Could not connect to database: "%(database)s"'), SupersetErrorType.CONNECTION_UNKNOWN_DATABASE_ERROR, - {} + {}, ), CONNECTION_INVALID_HOSTNAME_REGEX: ( __('Could not resolve hostname: "%(host)s".'), SupersetErrorType.CONNECTION_INVALID_HOSTNAME_ERROR, - {} + {}, ), CONNECTION_INVALID_PORT_ERROR: ( - __('Port out of range 0-65535'), + __("Port out of range 0-65535"), SupersetErrorType.CONNECTION_INVALID_PORT_ERROR, - {} + {}, ), INVALID_CONNECTION_STRING_REGEX: ( - __('Invalid Connection String: Expecting String of the form \'ocient://user:pass@host:port/database\'.'), + __( + "Invalid Connection String: Expecting String of the form 'ocient://user:pass@host:port/database'." + ), SupersetErrorType.GENERIC_DB_ENGINE_ERROR, - {} - ), + {}, + ), SYNTAX_ERROR_REGEX: ( __('Syntax Error: %(qualifier)s input "%(input)s".'), SupersetErrorType.SYNTAX_ERROR, - {} + {}, ), TABLE_DOES_NOT_EXIST_REGEX: ( __('Table or View "%(table)s" does not exist.'), SupersetErrorType.TABLE_DOES_NOT_EXIST_ERROR, - {} + {}, ), COLUMN_DOES_NOT_EXIST_REGEX: ( __('Invalid reference to column: "%(column)s"'), SupersetErrorType.COLUMN_DOES_NOT_EXIST_ERROR, - {} + {}, ), -} + } _time_grain_expressions = { None: "{col}", "PT1S": "ROUND({col}, 'SECOND')", @@ -224,14 +232,12 @@ class OcientEngineSpec(BaseEngineSpec): "P0.25Y": "ROUND({col}, 'QUARTER')", "P1Y": "ROUND({col}, 'YEAR')", } - @classmethod def get_table_names( cls, database: Database, inspector: Inspector, schema: Optional[str] ) -> List[str]: return sorted(inspector.get_table_names(schema)) - @classmethod def fetch_data(cls, cursor, lim=None): @@ -239,55 +245,60 @@ def fetch_data(cls, cursor, lim=None): rows = super(OcientEngineSpec, cls).fetch_data(cursor) except Exception as exception: with OcientEngineSpec.query_id_mapping_lock: - del OcientEngineSpec.query_id_mapping[getattr(cursor, 'superset_query_id')] + del OcientEngineSpec.query_id_mapping[ + getattr(cursor, "superset_query_id") + ] raise exception - - + if len(rows) > 0 and type(rows[0]).__name__ == rows: # Peek at the schema to determine which column values, if any, # require sanitization. - columns_to_sanitize: List[PlacedSanitizeFunc] = _find_columns_to_sanitize(cursor) + columns_to_sanitize: List[PlacedSanitizeFunc] = _find_columns_to_sanitize( + cursor + ) if columns_to_sanitize: # At least 1 column has to be sanitized. - def do_nothing(x): + def do_nothing(x): return x - - sanitization_functions = [do_nothing for _ in range(len(cursor.description))] + + sanitization_functions = [ + do_nothing for _ in range(len(cursor.description)) + ] for info in columns_to_sanitize: sanitization_functions[info.column_index] = info.sanitize_func - + # Rows from pyocient are given as NamedTuple, so we need to recreate the whole table - rows = [[sanitization_functions[i](row[i]) for i in range(len(row))] for row in rows] + rows = [ + [sanitization_functions[i](row[i]) for i in range(len(row))] + for row in rows + ] return rows - @classmethod def get_cancel_query_id(cls, cursor: Any, query: Query) -> Optional[str]: # Return a Non-None value # If None is returned, Superset will not call cancel_query - return 'DUMMY_VALUE' - + return "DUMMY_VALUE" @classmethod def handle_cursor(cls, cursor: Any, query: Query, session: Session) -> None: with OcientEngineSpec.query_id_mapping_lock: - OcientEngineSpec.query_id_mapping[query.id] = cursor.query_id - + OcientEngineSpec.query_id_mapping[query.id] = cursor.query_id + # Add the query id to the cursor setattr(cursor, "superset_query_id", query.id) return super().handle_cursor(cursor, query, session) - - + @classmethod def cancel_query(cls, cursor: Any, query: Query, cancel_query_id: str) -> bool: with OcientEngineSpec.query_id_mapping_lock: if query.id in OcientEngineSpec.query_id_mapping: - cursor.execute(f'CANCEL {OcientEngineSpec.query_id_mapping[query.id]}') + cursor.execute(f"CANCEL {OcientEngineSpec.query_id_mapping[query.id]}") # Query has been cancelled, so we can safely remove the cursor from the cache del OcientEngineSpec.query_id_mapping[query.id] return True # If the query is not in the cache, it must have either been cancelled elsewhere or completed else: - return False \ No newline at end of file + return False From a83a363ccfcf1b56ea5f18275039bb39c464848b Mon Sep 17 00:00:00 2001 From: jwilliams-ocient <60358443+jwilliams-ocient@users.noreply.github.com> Date: Mon, 6 Feb 2023 12:01:19 -0600 Subject: [PATCH 12/26] chore: add userdoc for Ocient DB (#10) --- docs/docs/databases/ocient.mdx | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) create mode 100644 docs/docs/databases/ocient.mdx diff --git a/docs/docs/databases/ocient.mdx b/docs/docs/databases/ocient.mdx new file mode 100644 index 0000000000000..5cc5737587371 --- /dev/null +++ b/docs/docs/databases/ocient.mdx @@ -0,0 +1,32 @@ +--- +title: Ocient DB +hide_title: true +sidebar_position: 20 +version: 1 +--- + +## Ocient DB + +The recommended connector library for Ocient is [sqlalchemy-ocient](https://pypi.org/project/sqlalchemy-ocient). + +## Install the Ocient Driver + +``` +pip install sqlalchemy_ocient +``` + +## Connecting to Ocient + +The connection string for Ocient looks like this: + +```shell +ocient://{user}:{password}@{DNSname}:{port}/{database} +``` + +**NOTE**: You must enter the `user` and `password` credentials. `host` defaults to localhost, +port defaults to 4050, database defaults to `system` and `tls` defaults +to `off`. + +## User Access Control + +Make sure the user has privileges to access and use all required databases, schemas, tables, views, and warehouses, as the Ocient SQLAlchemy engine does not test for user or role rights by default. From 4dd143cd287340a132d41b2845bcf519ecde727d Mon Sep 17 00:00:00 2001 From: jwilliams-ocient <60358443+jwilliams-ocient@users.noreply.github.com> Date: Mon, 6 Feb 2023 12:01:53 -0600 Subject: [PATCH 13/26] fix: rename 'sqlalchemy_ocient' package to 'sqlalchemy-ocient' (#11) --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index e1ed349e6a151..f1da8979f4bb2 100644 --- a/setup.py +++ b/setup.py @@ -160,7 +160,7 @@ def get_git_sha() -> str: "kylin": ["kylinpy>=2.8.1, <2.9"], "mssql": ["pymssql>=2.1.4, <2.2"], "mysql": ["mysqlclient>=2.1.0, <3"], - "ocient": ["sqlalchemy_ocient>=1.0.0"], + "ocient": ["sqlalchemy-ocient>=1.0.0"], "oracle": ["cx-Oracle>8.0.0, <8.1"], "pinot": ["pinotdb>=0.3.3, <0.4"], "postgres": ["psycopg2-binary==2.9.5"], From 3ab8dff671e274b058bf1018045c3fa97154ca35 Mon Sep 17 00:00:00 2001 From: alexclavel-ocient <111374753+alexclavel-ocient@users.noreply.github.com> Date: Tue, 7 Feb 2023 12:46:13 -0600 Subject: [PATCH 14/26] Fix: Update Ocient docs with coorect package name (#14) --- docs/docs/databases/ocient.mdx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/docs/databases/ocient.mdx b/docs/docs/databases/ocient.mdx index 5cc5737587371..a601bbe2b625b 100644 --- a/docs/docs/databases/ocient.mdx +++ b/docs/docs/databases/ocient.mdx @@ -12,7 +12,7 @@ The recommended connector library for Ocient is [sqlalchemy-ocient](https://pypi ## Install the Ocient Driver ``` -pip install sqlalchemy_ocient +pip install sqlalchemy-ocient ``` ## Connecting to Ocient From 75aa069e0c8f50c579c2901a934ed63da5c49b55 Mon Sep 17 00:00:00 2001 From: jwilliams-ocient <60358443+jwilliams-ocient@users.noreply.github.com> Date: Tue, 7 Feb 2023 15:22:36 -0600 Subject: [PATCH 15/26] fix: apply suggestions from pre-commit hooks (#12) * fix: apply suggestions from pre-commit hooks * fix: correct return type for get_table_names * fix: correct fetch_data type signature * fix: fix type annotations within fetch_data body --- superset/db_engine_specs/ocient.py | 45 ++++++++----- .../unit_tests/db_engine_specs/test_ocient.py | 65 +++++++++---------- 2 files changed, 59 insertions(+), 51 deletions(-) diff --git a/superset/db_engine_specs/ocient.py b/superset/db_engine_specs/ocient.py index 15e9024912507..b66216bf5ca98 100644 --- a/superset/db_engine_specs/ocient.py +++ b/superset/db_engine_specs/ocient.py @@ -16,20 +16,19 @@ # under the License. import re +import threading +from typing import Any, Callable, Dict, List, NamedTuple, Optional, Pattern, Set, Tuple +import pyocient +from flask_babel import gettext as __ +from pyocient import _STLinestring, _STPoint, _STPolygon, TypeCodes from sqlalchemy.engine.reflection import Inspector from sqlalchemy.orm import Session -from superset.db_engine_specs.base import BaseEngineSpec -from superset.errors import SupersetErrorType -from flask_babel import gettext as __ -import pyocient -from pyocient import _STPoint, _STLinestring, _STPolygon, TypeCodes from superset import app +from superset.db_engine_specs.base import BaseEngineSpec +from superset.errors import SupersetErrorType from superset.models.core import Database -from typing import Any, Callable, Dict, List, NamedTuple, Tuple, Optional, Pattern -import threading - from superset.models.sql_lab import Query # Ensure pyocient inherits Superset's logging level @@ -236,13 +235,17 @@ class OcientEngineSpec(BaseEngineSpec): @classmethod def get_table_names( cls, database: Database, inspector: Inspector, schema: Optional[str] - ) -> List[str]: - return sorted(inspector.get_table_names(schema)) + ) -> Set[str]: + return inspector.get_table_names(schema) @classmethod - def fetch_data(cls, cursor, lim=None): + def fetch_data( + cls, cursor: Any, lim: Optional[int] = None + ) -> List[Tuple[Any, ...]]: try: - rows = super(OcientEngineSpec, cls).fetch_data(cursor) + rows: List[Tuple[Any, ...]] = super(OcientEngineSpec, cls).fetch_data( + cursor, lim + ) except Exception as exception: with OcientEngineSpec.query_id_mapping_lock: del OcientEngineSpec.query_id_mapping[ @@ -259,18 +262,26 @@ def fetch_data(cls, cursor, lim=None): if columns_to_sanitize: # At least 1 column has to be sanitized. - def do_nothing(x): + + def identity(x: Any) -> Any: return x - sanitization_functions = [ - do_nothing for _ in range(len(cursor.description)) + # Use the identity function if the column type doesn't need to be + # sanitized. + sanitization_functions: List[SanitizeFunc] = [ + identity for _ in range(len(cursor.description)) ] for info in columns_to_sanitize: sanitization_functions[info.column_index] = info.sanitize_func - # Rows from pyocient are given as NamedTuple, so we need to recreate the whole table + # pyocient returns a list of NamedTuple objects which represent a + # single row. We have to do this copy because that data type is + # NamedTuple's are immutable. rows = [ - [sanitization_functions[i](row[i]) for i in range(len(row))] + tuple( + sanitize_func(val) + for sanitize_func, val in zip(sanitization_functions, row) + ) for row in rows ] return rows diff --git a/tests/unit_tests/db_engine_specs/test_ocient.py b/tests/unit_tests/db_engine_specs/test_ocient.py index 4135b9a6aa935..0b8a7533be187 100644 --- a/tests/unit_tests/db_engine_specs/test_ocient.py +++ b/tests/unit_tests/db_engine_specs/test_ocient.py @@ -18,11 +18,11 @@ # pylint: disable=import-outside-toplevel from datetime import datetime +from typing import Dict, List, Tuple import pytest from superset.errors import ErrorLevel, SupersetError, SupersetErrorType -from typing import Dict, List, Tuple @pytest.mark.parametrize( @@ -38,10 +38,11 @@ def test_convert_dttm(actual: str, expected: str, dttm: datetime) -> None: assert OcientEngineSpec.convert_dttm(actual, dttm) == expected + # (msg,expected) -MARSHALED_OCIENT_ERRORS: List[Tuple[str, Dict]] = [ +MARSHALED_OCIENT_ERRORS: List[Tuple[str, SupersetError]] = [ ( - 'The referenced user does not exist (User \'mj\' not found)', + "The referenced user does not exist (User 'mj' not found)", SupersetError( message='The username "mj" does not exist.', error_type=SupersetErrorType.CONNECTION_INVALID_USERNAME_ERROR, @@ -55,12 +56,12 @@ def test_convert_dttm(actual: str, expected: str, dttm: datetime) -> None: } ], }, - ) + ), ), ( - 'The userid/password combination was not valid (Incorrect password for user)', + "The userid/password combination was not valid (Incorrect password for user)", SupersetError( - message='The user/password combination is not valid (Incorrect password for user).', + message="The user/password combination is not valid (Incorrect password for user).", error_type=SupersetErrorType.CONNECTION_INVALID_PASSWORD_ERROR, level=ErrorLevel.ERROR, extra={ @@ -72,10 +73,10 @@ def test_convert_dttm(actual: str, expected: str, dttm: datetime) -> None: } ], }, - ) + ), ), ( - 'No database named \'bulls\' exists', + "No database named 'bulls' exists", SupersetError( message='Could not connect to database: "bulls"', error_type=SupersetErrorType.CONNECTION_UNKNOWN_DATABASE_ERROR, @@ -89,10 +90,10 @@ def test_convert_dttm(actual: str, expected: str, dttm: datetime) -> None: } ], }, - ) + ), ), ( - 'Unable to connect to unitedcenter.com:4050', + "Unable to connect to unitedcenter.com:4050", SupersetError( message='Could not resolve hostname: "unitedcenter.com".', error_type=SupersetErrorType.CONNECTION_INVALID_HOSTNAME_ERROR, @@ -106,12 +107,12 @@ def test_convert_dttm(actual: str, expected: str, dttm: datetime) -> None: } ], }, - ) + ), ), ( - 'Port out of range 0-65535', + "Port out of range 0-65535", SupersetError( - message='Port out of range 0-65535', + message="Port out of range 0-65535", error_type=SupersetErrorType.CONNECTION_INVALID_PORT_ERROR, level=ErrorLevel.ERROR, extra={ @@ -123,12 +124,12 @@ def test_convert_dttm(actual: str, expected: str, dttm: datetime) -> None: } ], }, - ) + ), ), ( - 'An invalid connection string attribute was specified (failed to decrypt cipher text)', + "An invalid connection string attribute was specified (failed to decrypt cipher text)", SupersetError( - message='Invalid Connection String: Expecting String of the form \'ocient://user:pass@host:port/database\'.', + message="Invalid Connection String: Expecting String of the form 'ocient://user:pass@host:port/database'.", error_type=SupersetErrorType.GENERIC_DB_ENGINE_ERROR, level=ErrorLevel.ERROR, extra={ @@ -140,10 +141,10 @@ def test_convert_dttm(actual: str, expected: str, dttm: datetime) -> None: } ], }, - ) + ), ), ( - 'There is a syntax error in your statement (extraneous input \'foo bar baz\' expecting ', + "There is a syntax error in your statement (extraneous input 'foo bar baz' expecting ", SupersetError( message='Extraneous input: "foo bar baz".', error_type=SupersetErrorType.SYNTAX_ERROR, @@ -157,10 +158,10 @@ def test_convert_dttm(actual: str, expected: str, dttm: datetime) -> None: } ], }, - ) + ), ), ( - 'There is a syntax error in your statement (mismatched input \'to\' expecting {, \'trace\', \'using\'})', + "There is a syntax error in your statement (mismatched input 'to' expecting {, 'trace', 'using'})", SupersetError( message='Extraneous input: "foo bar baz".', error_type=SupersetErrorType.SYNTAX_ERROR, @@ -174,10 +175,10 @@ def test_convert_dttm(actual: str, expected: str, dttm: datetime) -> None: } ], }, - ) + ), ), ( - 'The referenced table or view \'goats\' does not exist', + "The referenced table or view 'goats' does not exist", SupersetError( message='Table or View "goats" does not exist.', error_type=SupersetErrorType.TABLE_DOES_NOT_EXIST_ERROR, @@ -192,13 +193,13 @@ def test_convert_dttm(actual: str, expected: str, dttm: datetime) -> None: { "code": 1005, "message": "Issue 1005 - The table was deleted or renamed in the database.", - } + }, ], }, - ) + ), ), ( - 'The reference to column \'goats\' is not valid', + "The reference to column 'goats' is not valid", SupersetError( message='Invalid reference to column: "goats"', error_type=SupersetErrorType.COLUMN_DOES_NOT_EXIST_ERROR, @@ -213,21 +214,17 @@ def test_convert_dttm(actual: str, expected: str, dttm: datetime) -> None: { "code": 1004, "message": "Issue 1004 - The column was deleted or renamed in the database.", - } + }, ], }, - ) + ), ), ] -@pytest.mark.parametrize( - "msg,expected", - MARSHALED_OCIENT_ERRORS -) + +@pytest.mark.parametrize("msg,expected", MARSHALED_OCIENT_ERRORS) def test_connection_errors(msg: str, expected: SupersetError) -> None: from superset.db_engine_specs.ocient import OcientEngineSpec result = OcientEngineSpec.extract_errors(Exception(msg)) - assert result == [ - expected - ] + assert result == [expected] From 75e4e35a7f6b5414cb63331061be8661abbebb47 Mon Sep 17 00:00:00 2001 From: alexclavel-ocient <111374753+alexclavel-ocient@users.noreply.github.com> Date: Wed, 8 Feb 2023 11:57:29 -0600 Subject: [PATCH 16/26] Fix: make cte_alias compatible with Ocient (#16) * Fix: make cte_alias compatible with Ocient * Fixing indentation in ocient.py --- superset/db_engine_specs/ocient.py | 51 ++++++++++++++++-------------- 1 file changed, 27 insertions(+), 24 deletions(-) diff --git a/superset/db_engine_specs/ocient.py b/superset/db_engine_specs/ocient.py index b66216bf5ca98..417919c62acdf 100644 --- a/superset/db_engine_specs/ocient.py +++ b/superset/db_engine_specs/ocient.py @@ -163,6 +163,9 @@ class OcientEngineSpec(BaseEngineSpec): force_column_alias_quotes = True max_column_name_length = 30 + allows_cte_in_subquery = False + # Ocient does not support cte names starting with underscores + cte_alias = "cte__" # Store mapping of superset Query id -> Ocient ID # These are inserted into the cache when executing the query # They are then removed, either upon cancellation or query completion @@ -260,30 +263,30 @@ def fetch_data( cursor ) - if columns_to_sanitize: - # At least 1 column has to be sanitized. - - def identity(x: Any) -> Any: - return x - - # Use the identity function if the column type doesn't need to be - # sanitized. - sanitization_functions: List[SanitizeFunc] = [ - identity for _ in range(len(cursor.description)) - ] - for info in columns_to_sanitize: - sanitization_functions[info.column_index] = info.sanitize_func - - # pyocient returns a list of NamedTuple objects which represent a - # single row. We have to do this copy because that data type is - # NamedTuple's are immutable. - rows = [ - tuple( - sanitize_func(val) - for sanitize_func, val in zip(sanitization_functions, row) - ) - for row in rows - ] + if columns_to_sanitize: + # At least 1 column has to be sanitized. + + def identity(x: Any) -> Any: + return x + + # Use the identity function if the column type doesn't need to be + # sanitized. + sanitization_functions: List[SanitizeFunc] = [ + identity for _ in range(len(cursor.description)) + ] + for info in columns_to_sanitize: + sanitization_functions[info.column_index] = info.sanitize_func + + # pyocient returns a list of NamedTuple objects which represent a + # single row. We have to do this copy because that data type is + # NamedTuple's are immutable. + rows = [ + tuple( + sanitize_func(val) + for sanitize_func, val in zip(sanitization_functions, row) + ) + for row in rows + ] return rows @classmethod From 4d88c781155271d70c8ab4a1036a7667c86c76f7 Mon Sep 17 00:00:00 2001 From: alexclavel-ocient <111374753+alexclavel-ocient@users.noreply.github.com> Date: Tue, 28 Feb 2023 10:59:05 -0600 Subject: [PATCH 17/26] Adding Ocient to testing requirements (#18) --- requirements/testing.in | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements/testing.in b/requirements/testing.in index 9a40c90753da1..439ee594dde03 100644 --- a/requirements/testing.in +++ b/requirements/testing.in @@ -16,7 +16,7 @@ # -r development.in -r integration.in --e file:.[bigquery,hive,presto,trino] +-e file:.[bigquery,hive,presto,trino,ocient] docker flask-testing freezegun From c01ccda5b12a0fc94ada692d38c7a697f4548e4d Mon Sep 17 00:00:00 2001 From: alexclavel-ocient <111374753+alexclavel-ocient@users.noreply.github.com> Date: Mon, 13 Mar 2023 10:00:34 -0500 Subject: [PATCH 18/26] Fix: make fetch_data check for rows properly (#19) --- superset/db_engine_specs/ocient.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/superset/db_engine_specs/ocient.py b/superset/db_engine_specs/ocient.py index 417919c62acdf..f974b42078bba 100644 --- a/superset/db_engine_specs/ocient.py +++ b/superset/db_engine_specs/ocient.py @@ -256,7 +256,8 @@ def fetch_data( ] raise exception - if len(rows) > 0 and type(rows[0]).__name__ == rows: + # TODO: Unsure if we need to verify that we are receiving rows: + if len(rows) > 0 and type(rows[0]).__name__ == 'Row': # Peek at the schema to determine which column values, if any, # require sanitization. columns_to_sanitize: List[PlacedSanitizeFunc] = _find_columns_to_sanitize( From 1f1c5a3e901ecbec4349f192c90d7236f92f27ff Mon Sep 17 00:00:00 2001 From: jwilliams-ocient <60358443+jwilliams-ocient@users.noreply.github.com> Date: Tue, 11 Apr 2023 08:45:02 -0500 Subject: [PATCH 19/26] build[pyocient]: set min version for pyocient (#20) * v1.0.15 removes upper bound on protobuf version (conflict) --- setup.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 567339982220a..07b3a1f676bb1 100644 --- a/setup.py +++ b/setup.py @@ -162,7 +162,10 @@ def get_git_sha() -> str: "kylin": ["kylinpy>=2.8.1, <2.9"], "mssql": ["pymssql>=2.1.4, <2.2"], "mysql": ["mysqlclient>=2.1.0, <3"], - "ocient": ["sqlalchemy-ocient>=1.0.0"], + "ocient": [ + "sqlalchemy-ocient>=1.0.0", + "pyocient>=1.0.15", + ], "oracle": ["cx-Oracle>8.0.0, <8.1"], "pinot": ["pinotdb>=0.3.3, <0.4"], "postgres": ["psycopg2-binary==2.9.5"], From 30b13e5c8fa142017d0c289e4d9752a275e45138 Mon Sep 17 00:00:00 2001 From: jwilliams-ocient <60358443+jwilliams-ocient@users.noreply.github.com> Date: Wed, 12 Apr 2023 09:10:50 -0500 Subject: [PATCH 20/26] docs: add example of setting DSN params (#21) * docs: add example of setting DSN params * docs: apply review suggestions Co-authored-by: rmasters1 <100157128+rmasters1@users.noreply.github.com> --------- Co-authored-by: rmasters1 <100157128+rmasters1@users.noreply.github.com> --- docs/docs/databases/ocient.mdx | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/docs/docs/databases/ocient.mdx b/docs/docs/databases/ocient.mdx index a601bbe2b625b..ce0b70b74fb82 100644 --- a/docs/docs/databases/ocient.mdx +++ b/docs/docs/databases/ocient.mdx @@ -17,15 +17,20 @@ pip install sqlalchemy-ocient ## Connecting to Ocient -The connection string for Ocient looks like this: +The format of the Ocient DSN is: ```shell -ocient://{user}:{password}@{DNSname}:{port}/{database} +ocient://user:password@[host][:port][/database][?param1=value1&...] +``` + +The DSN for connecting to an `exampledb` database hosted at `examplehost:4050` with TLS enabled is: +```shell +ocient://admin:abc123@examplehost:4050/exampledb?tls=on ``` **NOTE**: You must enter the `user` and `password` credentials. `host` defaults to localhost, port defaults to 4050, database defaults to `system` and `tls` defaults -to `off`. +to `unverified`. ## User Access Control From b2ba77d487460dbb3605ad7b082845344a166fda Mon Sep 17 00:00:00 2001 From: jwilliams-ocient <60358443+jwilliams-ocient@users.noreply.github.com> Date: Thu, 20 Apr 2023 09:06:08 -0500 Subject: [PATCH 21/26] fix: recover from pyocient import errors. The library may not be installed on the target host (#22) --- superset/db_engine_specs/ocient.py | 33 ++++++++++++++++++++++-------- 1 file changed, 24 insertions(+), 9 deletions(-) diff --git a/superset/db_engine_specs/ocient.py b/superset/db_engine_specs/ocient.py index f974b42078bba..5df18ea7cdfb4 100644 --- a/superset/db_engine_specs/ocient.py +++ b/superset/db_engine_specs/ocient.py @@ -25,7 +25,16 @@ from sqlalchemy.engine.reflection import Inspector from sqlalchemy.orm import Session -from superset import app +# Need to try-catch here because pyocient may not be installed +try: + # Ensure pyocient inherits Superset's logging level + import pyocient + from superset import app + superset_log_level = app.config["LOG_LEVEL"] + pyocient.logger.setLevel(superset_log_level) +except ImportError as e: + pass + from superset.db_engine_specs.base import BaseEngineSpec from superset.errors import SupersetErrorType from superset.models.core import Database @@ -132,14 +141,20 @@ def _point_to_comma_delimited(point: _STPoint) -> str: # Superset serializes temporal objects using a custom serializer # defined in superset/utils/core.py (#json_int_dttm_ser(...)). Other # are serialized by the default JSON encoder. -_sanitized_ocient_type_codes: Dict[int, SanitizeFunc] = { - TypeCodes.BINARY: _to_hex, - TypeCodes.ST_POINT: _point_to_comma_delimited, - TypeCodes.IP: str, - TypeCodes.IPV4: str, - TypeCodes.ST_LINESTRING: _linestring_to_json, - TypeCodes.ST_POLYGON: _polygon_to_json, -} +# +# Need to try-catch here because pyocient may not be installed +try: + from pyocient import TypeCodes + _sanitized_ocient_type_codes: Dict[int, SanitizeFunc] = { + TypeCodes.BINARY: _to_hex, + TypeCodes.ST_POINT: _point_to_comma_delimited, + TypeCodes.IP: str, + TypeCodes.IPV4: str, + TypeCodes.ST_LINESTRING: _linestring_to_json, + TypeCodes.ST_POLYGON: _polygon_to_json, + } +except ImportError as e: + _sanitized_ocient_type_codes: Dict[int, SanitizeFunc] = {} def _find_columns_to_sanitize(cursor: Any) -> List[PlacedSanitizeFunc]: From 33891c41fdf65bef406812189477b34e07ea8e3b Mon Sep 17 00:00:00 2001 From: Jordan Williams Date: Thu, 20 Apr 2023 14:42:26 +0000 Subject: [PATCH 22/26] fix: forgot to remove the unchecked imports at the top of the file... --- superset/db_engine_specs/ocient.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/superset/db_engine_specs/ocient.py b/superset/db_engine_specs/ocient.py index 5df18ea7cdfb4..c4ddbbaed2d44 100644 --- a/superset/db_engine_specs/ocient.py +++ b/superset/db_engine_specs/ocient.py @@ -19,9 +19,7 @@ import threading from typing import Any, Callable, Dict, List, NamedTuple, Optional, Pattern, Set, Tuple -import pyocient from flask_babel import gettext as __ -from pyocient import _STLinestring, _STPoint, _STPolygon, TypeCodes from sqlalchemy.engine.reflection import Inspector from sqlalchemy.orm import Session From 3f802d8fbd16b781d12504e484eeb40fa1e36bf7 Mon Sep 17 00:00:00 2001 From: Jordan Williams Date: Thu, 20 Apr 2023 15:31:23 +0000 Subject: [PATCH 23/26] chore: remove potentially unknown type annotations --- superset/db_engine_specs/ocient.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/superset/db_engine_specs/ocient.py b/superset/db_engine_specs/ocient.py index c4ddbbaed2d44..8a16cb9023655 100644 --- a/superset/db_engine_specs/ocient.py +++ b/superset/db_engine_specs/ocient.py @@ -85,7 +85,7 @@ def _to_hex(data: bytes) -> str: return data.hex() -def _polygon_to_json(polygon: _STPolygon) -> str: +def _polygon_to_json(polygon: "_STPolygon") -> str: """ Converts the _STPolygon object into its JSON representation. @@ -100,7 +100,7 @@ def _polygon_to_json(polygon: _STPolygon) -> str: return json_value -def _linestring_to_json(linestring: _STLinestring) -> str: +def _linestring_to_json(linestring: "_STLinestring") -> str: """ Converts the _STLinestring object into its JSON representation. @@ -110,7 +110,7 @@ def _linestring_to_json(linestring: _STLinestring) -> str: return f"{str([[p.long, p.lat] for p in linestring.points])}" -def _point_to_comma_delimited(point: _STPoint) -> str: +def _point_to_comma_delimited(point: "_STPoint") -> str: """ Returns the x and y coordinates as a comma delimited string. From d4a4d0bd309a5b3ecd2257aac612c93d2dbeabb6 Mon Sep 17 00:00:00 2001 From: Jordan Williams Date: Thu, 20 Apr 2023 15:38:14 +0000 Subject: [PATCH 24/26] chore: add epoch_to_dttm override --- superset/db_engine_specs/ocient.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/superset/db_engine_specs/ocient.py b/superset/db_engine_specs/ocient.py index 8a16cb9023655..9c019b7be2741 100644 --- a/superset/db_engine_specs/ocient.py +++ b/superset/db_engine_specs/ocient.py @@ -302,6 +302,14 @@ def identity(x: Any) -> Any: for row in rows ] return rows + + @classmethod + def epoch_to_dttm(cls) -> str: + return "DATEADD(S, {col}, '1970-01-01')" + + @classmethod + def epoch_ms_to_dttm(cls) -> str: + return "DATEADD(MS, {col}, '1970-01-01')" @classmethod def get_cancel_query_id(cls, cursor: Any, query: Query) -> Optional[str]: From 787e303068e4b3be9e3c5ca3acc7aef7479c2586 Mon Sep 17 00:00:00 2001 From: jwilliams-ocient <60358443+jwilliams-ocient@users.noreply.github.com> Date: Thu, 20 Apr 2023 14:18:22 -0500 Subject: [PATCH 25/26] test: fix broken unit tests (#23) * refactor: holy shit I botched another commit lol * fix: address warnings generated by pylint * fix: fix custom errors matchers * test: fix broken unit tests --- superset/db_engine_specs/ocient.py | 48 +++++++++---------- .../unit_tests/db_engine_specs/test_ocient.py | 20 ++------ 2 files changed, 27 insertions(+), 41 deletions(-) diff --git a/superset/db_engine_specs/ocient.py b/superset/db_engine_specs/ocient.py index 9c019b7be2741..f6f97a9576fbf 100644 --- a/superset/db_engine_specs/ocient.py +++ b/superset/db_engine_specs/ocient.py @@ -38,37 +38,35 @@ from superset.models.core import Database from superset.models.sql_lab import Query -# Ensure pyocient inherits Superset's logging level -superset_log_level = app.config["LOG_LEVEL"] -pyocient.logger.setLevel(superset_log_level) - # Regular expressions to catch custom errors CONNECTION_INVALID_USERNAME_REGEX = re.compile( - "The referenced user does not exist \(User '(?P.*?)' not found\)" + r"The referenced user does not exist \(User '(?P.*?)' not found\)" ) CONNECTION_INVALID_PASSWORD_REGEX = re.compile( - "The userid/password combination was not valid \(Incorrect password for user\)" + r"The userid/password combination was not valid \(Incorrect password for user\)" ) CONNECTION_INVALID_HOSTNAME_REGEX = re.compile( - r"Unable to connect to (?P.*?):(?P.*?):" + r"Unable to connect to (?P.*?):(?P.*?)" ) CONNECTION_UNKNOWN_DATABASE_REGEX = re.compile( - "No database named '(?P.*?)' exists" + r"No database named '(?P.*?)' exists" ) CONNECTION_INVALID_PORT_ERROR = re.compile("Port out of range 0-65535") INVALID_CONNECTION_STRING_REGEX = re.compile( - "An invalid connection string attribute was specified \(failed to decrypt cipher text\)" + r"An invalid connection string attribute was specified" + r" \(failed to decrypt cipher text\)" ) SYNTAX_ERROR_REGEX = re.compile( - r"There is a syntax error in your statement \((?P.*?) input '(?P.*?)' expecting {.*}" + r"There is a syntax error in your statement \((?P.*?)" + r" input '(?P.*?)' expecting (?P.*?)\)" ) TABLE_DOES_NOT_EXIST_REGEX = re.compile( - "The referenced table or view '(?P
.*?)' does not exist" + r"The referenced table or view '(?P
.*?)' does not exist" ) COLUMN_DOES_NOT_EXIST_REGEX = re.compile( - "The reference to column '(?P.*?)' is not valid" + r"The reference to column '(?P.*?)' is not valid" ) @@ -139,7 +137,7 @@ def _point_to_comma_delimited(point: "_STPoint") -> str: # Superset serializes temporal objects using a custom serializer # defined in superset/utils/core.py (#json_int_dttm_ser(...)). Other # are serialized by the default JSON encoder. -# +# # Need to try-catch here because pyocient may not be installed try: from pyocient import TypeCodes @@ -193,7 +191,8 @@ class OcientEngineSpec(BaseEngineSpec): ), CONNECTION_INVALID_PASSWORD_REGEX: ( __( - "The user/password combination is not valid (Incorrect password for user)." + "The user/password combination is not valid" + " (Incorrect password for user)." ), SupersetErrorType.CONNECTION_INVALID_PASSWORD_ERROR, {}, @@ -215,13 +214,14 @@ class OcientEngineSpec(BaseEngineSpec): ), INVALID_CONNECTION_STRING_REGEX: ( __( - "Invalid Connection String: Expecting String of the form 'ocient://user:pass@host:port/database'." + "Invalid Connection String: Expecting String of" + " the form 'ocient://user:pass@host:port/database'." ), SupersetErrorType.GENERIC_DB_ENGINE_ERROR, {}, ), SYNTAX_ERROR_REGEX: ( - __('Syntax Error: %(qualifier)s input "%(input)s".'), + __('Syntax Error: %(qualifier)s input "%(input)s" expecting "%(expected)s'), SupersetErrorType.SYNTAX_ERROR, {}, ), @@ -256,11 +256,11 @@ def get_table_names( @classmethod def fetch_data( - cls, cursor: Any, lim: Optional[int] = None + cls, cursor: Any, limit: Optional[int] = None ) -> List[Tuple[Any, ...]]: try: rows: List[Tuple[Any, ...]] = super(OcientEngineSpec, cls).fetch_data( - cursor, lim + cursor, limit ) except Exception as exception: with OcientEngineSpec.query_id_mapping_lock: @@ -302,7 +302,7 @@ def identity(x: Any) -> Any: for row in rows ] return rows - + @classmethod def epoch_to_dttm(cls) -> str: return "DATEADD(S, {col}, '1970-01-01')" @@ -331,10 +331,10 @@ def cancel_query(cls, cursor: Any, query: Query, cancel_query_id: str) -> bool: with OcientEngineSpec.query_id_mapping_lock: if query.id in OcientEngineSpec.query_id_mapping: cursor.execute(f"CANCEL {OcientEngineSpec.query_id_mapping[query.id]}") - # Query has been cancelled, so we can safely remove the cursor from the cache + # Query has been cancelled, so we can safely remove the cursor from + # the cache del OcientEngineSpec.query_id_mapping[query.id] - return True - # If the query is not in the cache, it must have either been cancelled elsewhere or completed - else: - return False + # If the query is not in the cache, it must have either been cancelled + # elsewhere or completed + return False diff --git a/tests/unit_tests/db_engine_specs/test_ocient.py b/tests/unit_tests/db_engine_specs/test_ocient.py index 0b8a7533be187..75acb68680db4 100644 --- a/tests/unit_tests/db_engine_specs/test_ocient.py +++ b/tests/unit_tests/db_engine_specs/test_ocient.py @@ -25,20 +25,6 @@ from superset.errors import ErrorLevel, SupersetError, SupersetErrorType -@pytest.mark.parametrize( - "actual,expected", - [ - ("DATE", "TO_DATE('2019-01-02')"), - ("DATETIME", "CAST('2019-01-02T03:04:05.678900' AS DATETIME)"), - ("TIMESTAMP", "TO_TIMESTAMP('2019-01-02T03:04:05.678900')"), - ], -) -def test_convert_dttm(actual: str, expected: str, dttm: datetime) -> None: - from superset.db_engine_specs.ocient import OcientEngineSpec - - assert OcientEngineSpec.convert_dttm(actual, dttm) == expected - - # (msg,expected) MARSHALED_OCIENT_ERRORS: List[Tuple[str, SupersetError]] = [ ( @@ -144,9 +130,9 @@ def test_convert_dttm(actual: str, expected: str, dttm: datetime) -> None: ), ), ( - "There is a syntax error in your statement (extraneous input 'foo bar baz' expecting ", + "There is a syntax error in your statement (extraneous input 'foo bar baz' expecting {, 'trace', 'using'})", SupersetError( - message='Extraneous input: "foo bar baz".', + message='Syntax Error: extraneous input "foo bar baz" expecting "{, \'trace\', \'using\'}', error_type=SupersetErrorType.SYNTAX_ERROR, level=ErrorLevel.ERROR, extra={ @@ -163,7 +149,7 @@ def test_convert_dttm(actual: str, expected: str, dttm: datetime) -> None: ( "There is a syntax error in your statement (mismatched input 'to' expecting {, 'trace', 'using'})", SupersetError( - message='Extraneous input: "foo bar baz".', + message='Syntax Error: mismatched input "to" expecting "{, \'trace\', \'using\'}', error_type=SupersetErrorType.SYNTAX_ERROR, level=ErrorLevel.ERROR, extra={ From e82386e68b19dc8c24ceaca19004b8a493a4462f Mon Sep 17 00:00:00 2001 From: jwilliams-ocient <60358443+jwilliams-ocient@users.noreply.github.com> Date: Thu, 20 Apr 2023 18:52:38 -0500 Subject: [PATCH 26/26] chore: pass pre-commit checks (#24) * style: run pre-commit hooks * style: remove type annotations that may not exist * build: remove ocient from requirements/testing.in --- requirements/testing.in | 2 +- superset/db_engine_specs/ocient.py | 14 ++++++++------ tests/unit_tests/db_engine_specs/test_ocient.py | 5 ++--- 3 files changed, 11 insertions(+), 10 deletions(-) diff --git a/requirements/testing.in b/requirements/testing.in index 439ee594dde03..9a40c90753da1 100644 --- a/requirements/testing.in +++ b/requirements/testing.in @@ -16,7 +16,7 @@ # -r development.in -r integration.in --e file:.[bigquery,hive,presto,trino,ocient] +-e file:.[bigquery,hive,presto,trino] docker flask-testing freezegun diff --git a/superset/db_engine_specs/ocient.py b/superset/db_engine_specs/ocient.py index f6f97a9576fbf..87bd2c2422340 100644 --- a/superset/db_engine_specs/ocient.py +++ b/superset/db_engine_specs/ocient.py @@ -27,7 +27,9 @@ try: # Ensure pyocient inherits Superset's logging level import pyocient + from superset import app + superset_log_level = app.config["LOG_LEVEL"] pyocient.logger.setLevel(superset_log_level) except ImportError as e: @@ -38,7 +40,6 @@ from superset.models.core import Database from superset.models.sql_lab import Query - # Regular expressions to catch custom errors CONNECTION_INVALID_USERNAME_REGEX = re.compile( @@ -83,7 +84,7 @@ def _to_hex(data: bytes) -> str: return data.hex() -def _polygon_to_json(polygon: "_STPolygon") -> str: +def _polygon_to_json(polygon: Any) -> str: """ Converts the _STPolygon object into its JSON representation. @@ -98,7 +99,7 @@ def _polygon_to_json(polygon: "_STPolygon") -> str: return json_value -def _linestring_to_json(linestring: "_STLinestring") -> str: +def _linestring_to_json(linestring: Any) -> str: """ Converts the _STLinestring object into its JSON representation. @@ -108,7 +109,7 @@ def _linestring_to_json(linestring: "_STLinestring") -> str: return f"{str([[p.long, p.lat] for p in linestring.points])}" -def _point_to_comma_delimited(point: "_STPoint") -> str: +def _point_to_comma_delimited(point: Any) -> str: """ Returns the x and y coordinates as a comma delimited string. @@ -141,6 +142,7 @@ def _point_to_comma_delimited(point: "_STPoint") -> str: # Need to try-catch here because pyocient may not be installed try: from pyocient import TypeCodes + _sanitized_ocient_type_codes: Dict[int, SanitizeFunc] = { TypeCodes.BINARY: _to_hex, TypeCodes.ST_POINT: _point_to_comma_delimited, @@ -150,7 +152,7 @@ def _point_to_comma_delimited(point: "_STPoint") -> str: TypeCodes.ST_POLYGON: _polygon_to_json, } except ImportError as e: - _sanitized_ocient_type_codes: Dict[int, SanitizeFunc] = {} + _sanitized_ocient_type_codes = {} def _find_columns_to_sanitize(cursor: Any) -> List[PlacedSanitizeFunc]: @@ -270,7 +272,7 @@ def fetch_data( raise exception # TODO: Unsure if we need to verify that we are receiving rows: - if len(rows) > 0 and type(rows[0]).__name__ == 'Row': + if len(rows) > 0 and type(rows[0]).__name__ == "Row": # Peek at the schema to determine which column values, if any, # require sanitization. columns_to_sanitize: List[PlacedSanitizeFunc] = _find_columns_to_sanitize( diff --git a/tests/unit_tests/db_engine_specs/test_ocient.py b/tests/unit_tests/db_engine_specs/test_ocient.py index 75acb68680db4..c5578fa93727d 100644 --- a/tests/unit_tests/db_engine_specs/test_ocient.py +++ b/tests/unit_tests/db_engine_specs/test_ocient.py @@ -24,7 +24,6 @@ from superset.errors import ErrorLevel, SupersetError, SupersetErrorType - # (msg,expected) MARSHALED_OCIENT_ERRORS: List[Tuple[str, SupersetError]] = [ ( @@ -132,7 +131,7 @@ ( "There is a syntax error in your statement (extraneous input 'foo bar baz' expecting {, 'trace', 'using'})", SupersetError( - message='Syntax Error: extraneous input "foo bar baz" expecting "{, \'trace\', \'using\'}', + message="Syntax Error: extraneous input \"foo bar baz\" expecting \"{, 'trace', 'using'}", error_type=SupersetErrorType.SYNTAX_ERROR, level=ErrorLevel.ERROR, extra={ @@ -149,7 +148,7 @@ ( "There is a syntax error in your statement (mismatched input 'to' expecting {, 'trace', 'using'})", SupersetError( - message='Syntax Error: mismatched input "to" expecting "{, \'trace\', \'using\'}', + message="Syntax Error: mismatched input \"to\" expecting \"{, 'trace', 'using'}", error_type=SupersetErrorType.SYNTAX_ERROR, level=ErrorLevel.ERROR, extra={