From 02a4f809ab6c7de253c1bde5cf34aa856f002d80 Mon Sep 17 00:00:00 2001 From: raghavsharma Date: Tue, 7 Sep 2021 11:55:43 +0530 Subject: [PATCH 01/14] New branch from superset for integration with firebolt sqlalchemy adapter --- .../firebolt_sqlalchemy_adapter/constants.py | 7 + .../firebolt_sqlalchemy_adapter/exceptions.py | 49 +++ .../firebolt_api_service.py | 220 +++++++++++ .../firebolt_connector.py | 370 ++++++++++++++++++ .../firebolt_dialect.py | 209 ++++++++++ .../test_sqlalchemy_adapter/constants.py | 4 + .../test_firebolt_api_service.py | 81 ++++ .../test_fireboltconnector.py | 75 ++++ .../test_fireboltdialect.py | 29 ++ 9 files changed, 1044 insertions(+) create mode 100644 superset/firebolt_sqlalchemy_adapter/constants.py create mode 100644 superset/firebolt_sqlalchemy_adapter/exceptions.py create mode 100644 superset/firebolt_sqlalchemy_adapter/firebolt_api_service.py create mode 100644 superset/firebolt_sqlalchemy_adapter/firebolt_connector.py create mode 100644 superset/firebolt_sqlalchemy_adapter/firebolt_dialect.py create mode 100644 superset/firebolt_sqlalchemy_adapter/test_sqlalchemy_adapter/constants.py create mode 100644 superset/firebolt_sqlalchemy_adapter/test_sqlalchemy_adapter/test_firebolt_api_service.py create mode 100644 superset/firebolt_sqlalchemy_adapter/test_sqlalchemy_adapter/test_fireboltconnector.py create mode 100644 superset/firebolt_sqlalchemy_adapter/test_sqlalchemy_adapter/test_fireboltdialect.py diff --git a/superset/firebolt_sqlalchemy_adapter/constants.py b/superset/firebolt_sqlalchemy_adapter/constants.py new file mode 100644 index 0000000000000..ebaa1dc4d37ca --- /dev/null +++ b/superset/firebolt_sqlalchemy_adapter/constants.py @@ -0,0 +1,7 @@ +token_url = "https://api.app.firebolt.io/auth/v1/login" +token_header = {"Content-Type": "application/json;charset=UTF-8"} +refresh_url = "https://api.app.firebolt.io/auth/v1/refresh" +query_engine_url = 'https://api.app.firebolt.io/core/v1/account/engines:getURLByDatabaseName' +engine_id_url = "https://api.app.firebolt.io/core/v1/account/engines:getIdbyName" +engine_start_url = 'https://api.app.firebolt.io/core/v1/account/engines/' +default_engine_name = 'sigmoid-alchemy-analytics' \ No newline at end of file diff --git a/superset/firebolt_sqlalchemy_adapter/exceptions.py b/superset/firebolt_sqlalchemy_adapter/exceptions.py new file mode 100644 index 0000000000000..a0ac3e5dcff63 --- /dev/null +++ b/superset/firebolt_sqlalchemy_adapter/exceptions.py @@ -0,0 +1,49 @@ +from sqlalchemy.exc import CompileError + + +class Error(Exception): + pass + + +class Warning(Exception): + pass + + +class InterfaceError(Error): + pass + + +class DatabaseError(Error): + pass + + +class InternalError(DatabaseError): + pass + + +class OperationalError(DatabaseError): + pass + + +class ProgrammingError(DatabaseError): + pass + + +class IntegrityError(DatabaseError): + pass + + +class DataError(DatabaseError): + pass + + +class NotSupportedError(CompileError): + pass + + +class InvalidCredentialsError(DatabaseError): + pass + + +class SchemaNotFoundError(DatabaseError): + pass diff --git a/superset/firebolt_sqlalchemy_adapter/firebolt_api_service.py b/superset/firebolt_sqlalchemy_adapter/firebolt_api_service.py new file mode 100644 index 0000000000000..3232cf1aa8c83 --- /dev/null +++ b/superset/firebolt_sqlalchemy_adapter/firebolt_api_service.py @@ -0,0 +1,220 @@ +import json + +import requests +from requests.exceptions import HTTPError + +from sqlalchemy_adapter import constants, exceptions + + +class FireboltApiService: + + @staticmethod + def get_connection(user_email, password, db_name): + # get access token + token_json = FireboltApiService.get_access_token({'username': user_email, 'password': password}) + access_token = token_json["access_token"] + refresh_token = token_json["refresh_token"] + + # get engine url + engine_url = FireboltApiService.get_engine_url_by_db(db_name, access_token) + return access_token, engine_url, refresh_token + + # retrieve authentication token + """ + This method uses the user email and the password to fire the API to generate access-token. + :input dictionary containing user-email and password + :returns access-token + """ + + @staticmethod + def get_access_token(data): + json_data = {} # base case + payload = {} + try: + + """ + General format of request: + curl --request POST 'https://api.app.firebolt.io/auth/v1/login' --header 'Content-Type: application/json;charset=UTF-8' --data-binary '{"username":"raghavs@sigmoidanalytics.com","password":"Sharma%1"}' + """ + token_response = requests.post(url=constants.token_url, data=json.dumps(data), + headers=constants.token_header) + token_response.raise_for_status() + + """ + General format of response: + + { + "access_token": "YOUR_ACCESS_TOKEN_VALUE", + "expires_in": 86400, + "refresh_token": "YOUR_REFRESH_TOKEN_VALUE", + "scope": "offline_access", + "token_type": "Bearer" + } + """ + + json_data = json.loads(token_response.text) + + except HTTPError as http_err: + payload = { + "error": "Access Token API Exception", + "errorMessage": http_err.response.text, + } + except Exception as err: + payload = { + "error": "Access Token API Exception", + "errorMessage": str(err), + } + + if payload != {}: + msg = "{error} : {errorMessage}".format(**payload) + raise exceptions.InvalidCredentialsError(msg) + + return json_data + + # refresh access token + """ + In case the token expires or the API throws a 401 HTTP error, then this method generates a fresh token + :input refresh api url, request type, authentication header and + the refresh token generated alongside the previous expired token + :returns new access-token + """ + + @staticmethod + def get_access_token_via_refresh(refresh_token): + refresh_access_token = "" + payload = {} + try: + """ + Request: + curl --request POST 'https://api.app.firebolt.io/auth/v1/refresh' \ + --header 'Content-Type: application/json;charset=UTF-8' \ + --data-binary '{"refresh_token":"YOUR_REFRESH_TOKEN_VALUE"}' + """ + data = {'refresh_token': refresh_token} + token_response = requests.post(url=constants.refresh_url, data=json.dumps(data), + headers=constants.token_header) + token_response.raise_for_status() + + """ + Response: + { + "access_token": "YOUR_REFRESHED_ACCESS_TOKEN_VALUE", + "scope": "offline_access", + "expires_in": 86400, + "token_type": "Bearer" + } + """ + + json_data = json.loads(token_response.text) + refresh_access_token = json_data["access_token"] + + except HTTPError as http_err: + payload = { + "error": "Refresh Access Token API Exception", + "errorMessage": http_err.response.text, + } + except Exception as err: + payload = { + "error": "Refresh Access Token API Exception", + "errorMessage": str(err), + } + if payload != {}: + msg = "{error} : {errorMessage}".format(**payload) + raise exceptions.InternalError(msg) + + return refresh_access_token + + # get engine url by db name + """ + This method generates engine url using db name and access-token + :input api url, request type, authentication header and access-token + :returns engine url + """ + + @staticmethod + def get_engine_url_by_db(db_name, access_token): + engine_url = "" # base case + payload = {} + try: + """ + Request: + curl --request GET 'https://api.app.firebolt.io/core/v1/account/engines:getURLByDatabaseName?database_name=YOUR_DATABASE_NAME' \ + --header 'Authorization: Bearer YOUR_ACCESS_TOKEN_VALUE' + """ + header = {'Authorization': "Bearer " + access_token} + query_engine_response = requests.get(constants.query_engine_url, params={'database_name': db_name}, + headers=header) + query_engine_response.raise_for_status() + + """ + Response: + {"engine_url": "YOUR_DATABASES_DEFAULT_ENGINE_URL"} + """ + json_data = json.loads(query_engine_response.text) + engine_url = json_data["engine_url"] + + except HTTPError as http_err: + payload = { + "error": "Engine Url API Exception", + "errorMessage": http_err.response.text, + } + except Exception as err: + payload = { + "error": "Engine Url API Exception", + "errorMessage": str(err), + } + if payload != {}: + msg = "{error} : {errorMessage}".format(**payload) + raise exceptions.SchemaNotFoundError(msg) + + return engine_url + + # run queries + """ + This method is used to submit a query to run to a running engine. + You can specify multiple queries separated by a semicolon (;).. + :input token url, request type of API and authentication header + :returns access-token + """ + + @staticmethod + def run_query(access_token, refresh_token, engine_url, db_name, query): + query_response = {} # base-case + payload = {} + try: + + """ + Request: + echo "SELECT * FROM lineitem LIMIT 1000" | curl + --request POST 'https://YOUR_ENGINE_ENDPOINT/?database=YOUR_DATABASE_NAME' \ + --header 'Authorization: Bearer YOUR_ACCESS_TOKEN_VALUE' \ + --data-binary @- + """ + + header = {'Authorization': "Bearer " + access_token} + query_response = requests.post(url="https://" + engine_url, params={'database': db_name}, + headers=header, files={"query": (None, query)}) + if type(query_response) == HTTPError and \ + query_response.response.status_code == 401: # check for access token expiry + access_token = FireboltApiService.get_access_token_via_refresh(refresh_token) + header = {'Authorization': "Bearer " + access_token} + query_response = requests.post(url="https://" + engine_url, params={'database': db_name}, + headers=header, files={"query": (None, query)}) + query_response.raise_for_status() + + except HTTPError as http_err: + payload = { + "error": "Run Query API Exception", + "errorMessage": http_err.response.text, + } + except Exception as err: + payload = { + "error": "Run Query API Exception", + "errorMessage": str(err), + } + if payload != {}: + msg = "{error} : {errorMessage}".format(**payload) + raise exceptions.InternalError(msg) + + # return json_data + return query_response diff --git a/superset/firebolt_sqlalchemy_adapter/firebolt_connector.py b/superset/firebolt_sqlalchemy_adapter/firebolt_connector.py new file mode 100644 index 0000000000000..d673f38dd1f33 --- /dev/null +++ b/superset/firebolt_sqlalchemy_adapter/firebolt_connector.py @@ -0,0 +1,370 @@ +#!/usr/bin/env python +# +# See http://www.python.org/dev/peps/pep-0249/ +# +# Many docstrings in this file are based on the PEP, which is in the public domain. + +# Built as per Python DB API Specification - PEP 249 +# Responsible for connection to Database and providing database cursor for query execution +# Connector is imported and used by Dialect to get connection + +import itertools +import json +from collections import namedtuple, OrderedDict +from requests.exceptions import HTTPError + +import requests +from sqlalchemy_adapter.firebolt_api_service import FireboltApiService + +from sqlalchemy_adapter import exceptions + + +class Type(object): + STRING = 1 + NUMBER = 2 + BOOLEAN = 3 + + +# @check_valid_connection +def connect(user_email, password, db_name): + """ + Constructor for creating a connection to the database. + + >>> connection = connect('user_email','password','db_name') + >>> cursor = connection.cursor() + >>> response = cursor.execute('select * from ').fetchall() + + """ + connection = Connection(user_email, password, db_name) + # if connection.engine_url == "": + # connection.errors.append(connection.access_token) + return connection + + +def check_closed(f): + """Decorator that checks if connection/cursor is closed.""" + + def g(self, *args, **kwargs): + if self.closed: + raise exceptions.Error( + "{klass} already closed".format(klass=self.__class__.__name__) + ) + return f(self, *args, **kwargs) + + return g + + +def check_result(f): + """Decorator that checks if the cursor has results from `execute`.""" + + def g(self, *args, **kwargs): + if self._results is None: + raise exceptions.Error("Called before `execute`") + return f(self, *args, **kwargs) + + return g + + +# def check_valid_connection(f): +# """Decorator that checks if connection has been created successfully.""" +# +# def g(self, *args, **kwargs): +# if type(self.access_token) != dict or type(self.engine_url) == "": +# raise exceptions.Error("Invalid connection parameters") +# return f(self, *args, **kwargs) +# +# return g + + +def get_description_from_row(row): + """ + Return description from a single row. + + We only return the name, type (inferred from the data) and if the values + can be NULL. String columns in Firebolt are NULLable. Numeric columns are NOT + NULL. + """ + return [ + ( + name, # name + get_type(value), # type_code + None, # [display_size] + None, # [internal_size] + None, # [precision] + None, # [scale] + get_type(value) == Type.STRING, # [null_ok] + ) + for name, value in row.items() + ] + + +def get_type(value): + """ + Infer type from value. + + Note that bool is a subclass of int so order of statements matter. + """ + + if isinstance(value, str) or value is None: + return Type.STRING + elif isinstance(value, bool): + return Type.BOOLEAN + elif isinstance(value, (int, float)): + return Type.NUMBER + + raise exceptions.Error("Value of unknown type: {value}".format(value=value)) + + +class Connection(object): + """Connection to a Firebolt database.""" + + def __init__(self, user_email, password, db_name): + self._user_email = user_email + self._password = password + self._db_name = db_name + + connection_details = FireboltApiService.get_connection(user_email, password, db_name) + + # if connection_details[1] == "": + # raise exceptions.InvalidCredentialsError("Invalid credentials or Database name") + self.access_token = connection_details[0] + self.engine_url = connection_details[1] + self.refresh_token = connection_details[2] + self.cursors = [] + self.closed = False + + @check_closed + def close(self): + """Close the connection now.""" + self.closed = True + for cursor in self.cursors: + try: + cursor.close() + except exceptions.Error: + pass # already closed + + @check_closed + def commit(self): + """ + Commit any pending transaction to the database. + + Not supported. + """ + pass + + @check_closed + def cursor(self): + """Return a new Cursor Object using the connection.""" + cursor = Cursor( + self._db_name, + self.access_token, + self.engine_url, + self.refresh_token + ) + + self.cursors.append(cursor) + + return cursor + + @check_closed + def execute(self, query): + cursor = self.cursor() + return cursor.execute(query) + + def __enter__(self): + return self.cursor() + + def __exit__(self, *exc): + self.close() + + +class Cursor(object): + """Connection cursor.""" + + def __init__(self, db_name, access_token, engine_url, refresh_token): + + self._db_name = db_name + self._access_token = access_token + self._engine_url = engine_url + self._refresh_token = refresh_token + self.closed = False + + # This read/write attribute specifies the number of rows to fetch at a + # time with .fetchmany(). It defaults to 1 meaning to fetch a single + # row at a time. + self.arraysize = 1 + + self.closed = False + + # this is updated only after a query + self.description = None + + # this is set to an iterator after a successfull query + self._results = None + self.header = False + + @property + @check_result + @check_closed + def rowcount(self): + # consume the iterator + results = list(self._results) + n = len(results) + self._results = iter(results) + return n + + @check_closed + def close(self): + """Close the cursor.""" + self.closed = True + + @check_closed + def execute(self, query): + # def execute(self, operation, parameters=None): + # query = apply_parameters(operation, parameters) + results = self._stream_query(query) + + """ + `_stream_query` returns a generator that produces the rows; we need to + consume the first row so that `description` is properly set, so let's + consume it and insert it back if it is not the header. + """ + try: + first_row = next(results) + self._results = ( + results if self.header else itertools.chain([first_row], results) + ) + except StopIteration: + self._results = iter([]) + return self + + @check_closed + def executemany(self, operation, seq_of_parameters=None): + raise exceptions.NotSupportedError( + "`executemany` is not supported, use `execute` instead" + ) + + @check_result + @check_closed + def fetchone(self): + """ + Fetch the next row of a query result set, returning a single sequence, + or `None` when no more data is available. + """ + try: + return self.next() + except StopIteration: + return None + + @check_result + @check_closed + def fetchmany(self, size=None): + """ + Fetch the next set of rows of a query result, returning a sequence of + sequences (e.g. a list of tuples). An empty sequence is returned when + no more rows are available. + """ + size = size or self.arraysize + return list(itertools.islice(self._results, size)) + + @check_result + @check_closed + def fetchall(self): + """ + Fetch all (remaining) rows of a query result, returning them as a + sequence of sequences (e.g. a list of tuples). Note that the cursor's + arraysize attribute can affect the performance of this operation. + """ + return list(self._results) + + @check_closed + def setinputsizes(self, sizes): + # not supported + pass + + @check_closed + def setoutputsizes(self, sizes): + # not supported + pass + + @check_closed + def __iter__(self): + return self + + @check_closed + def __next__(self): + return next(self._results) + + next = __next__ + + def _stream_query(self, query): + """ + Stream rows from a query. + + This method will yield rows as the data is returned in chunks from the + server. + """ + self.description = None + + r = FireboltApiService.run_query(self._access_token, self._refresh_token, self._engine_url, self._db_name, query) + + # Setting `chunk_size` to `None` makes it use the server size + chunks = r.iter_content(chunk_size=None, decode_unicode=True) + Row = None + for row in rows_from_chunks(chunks): + # TODO Check if row description has to be set + # # update description + # if self.description is None: + # self.description = ( + # list(row.items()) if self.header else get_description_from_row(row) + # ) + + # return row in namedtuple + if Row is None: + Row = namedtuple("Row", row.keys(), rename=True) + yield Row(*row.values()) + + +def rows_from_chunks(chunks): + """ + A generator that yields rows from JSON chunks. + + Firebolt will return the data in chunks, but they are not aligned with the + JSON objects. This function will parse all complete rows inside each chunk, + yielding them as soon as possible. + """ + body = "" + # count = 1 + squareBrackets = 0 + dataStartpos = 0 + dataEndPos = 0 + inString = False + for chunk in chunks: + # print("Chunk:", count) # Code for testing response being processed in + # count = count + 1 + + if chunk: + body = "".join((body, chunk)) + for i, char in enumerate(body): + if char == '"': + if not inString: + inString = True + else: + inString = False + + if not inString: + if char == '[': + squareBrackets +=1 + if squareBrackets == 2: + dataStartpos = i+1 + if char == ']' and squareBrackets == 2: + dataEndPos = i + break + + rows = body[dataStartpos:dataEndPos].lstrip().rstrip() + # print(rows) + + for row in json.loads( + "[{rows}]".format(rows=rows), object_pairs_hook=OrderedDict + ): + yield row diff --git a/superset/firebolt_sqlalchemy_adapter/firebolt_dialect.py b/superset/firebolt_sqlalchemy_adapter/firebolt_dialect.py new file mode 100644 index 0000000000000..9701fd1178531 --- /dev/null +++ b/superset/firebolt_sqlalchemy_adapter/firebolt_dialect.py @@ -0,0 +1,209 @@ +import json + +from sqlalchemy import types +from sqlalchemy.engine import default +from sqlalchemy.sql import compiler +import sqlalchemy_adapter + +# Firebolt data types compatibility with sqlalchemy.sql.types +type_map = { + "char": types.String, + "text": types.String, + "varchar": types.String, + "string": types.String, + "float": types.Float, + "double": types.Float, + "double precision": types.Float, + "boolean": types.Boolean, + "int": types.BigInteger, + "integer": types.BigInteger, + "bigint": types.BigInteger, + "long": types.BigInteger, + "timestamp": types.TIMESTAMP, + "datetime": types.TIMESTAMP, + "date": types.DATE, + "array": types.ARRAY, +} + + +class UniversalSet(object): + def __contains__(self, item): + return True + + +class FireboltIdentifierPreparer(compiler.IdentifierPreparer): + reserved_words = UniversalSet() + + +# TODO: Check if SQLCompiler is fine or any other compiler like postgres needs to be inherited +class FireboltCompiler(compiler.SQLCompiler): + pass + + +class FireboltTypeCompiler(compiler.GenericTypeCompiler): + def visit_DOUBLEPRECISION(self, type_, **kwargs): + return "FLOAT" + + def visit_ARRAY(self, type, **kwargs): + return "Array(%s)" % type + + +""" +FireboltDialect defines the behavior of Firebolt database and DB-API combination. +It is responsible for metadata definition and firing queries for receiving Database schema and table information. +""" + + +# TODO: check dialect attribute values + +class FireboltDialect(default.DefaultDialect): + name = "firebolt" + scheme = "http" + driver = "rest" + user = None + password = None + preparer = FireboltIdentifierPreparer + statement_compiler = FireboltCompiler + type_compiler = FireboltTypeCompiler + supports_alter = False + supports_pk_autoincrement = False + supports_default_values = False + supports_empty_insert = False + supports_unicode_statements = True + supports_unicode_binds = True + returns_unicode_strings = True + description_encoding = None + supports_native_boolean = True + + def __init__(self, context=None, *args, **kwargs): + super(FireboltDialect, self).__init__(*args, **kwargs) + self.context = context or {} + + @classmethod + def dbapi(cls): + return sqlalchemy_adapter + + # Build DB-API compatible connection arguments. + def create_connect_args(self, url): + kwargs = { + "host": url.host, + "port": url.port or 8082, + "user": url.username or None, + "password": url.password or None, + "path": url.database, + "scheme": self.scheme, + "context": self.context, + "header": url.query.get("header") == "true", + } + return ([], kwargs) + + def get_schema_names(self, connection, **kwargs): + result = connection.execute( + "SELECT SCHEMA_NAME FROM INFORMATION_SCHEMA.DATABASES" + ) + + return result + + def has_table(self, connection, table_name, schema=None): + query = """ + SELECT COUNT(*) > 0 AS exists_ + FROM INFORMATION_SCHEMA.TABLES + WHERE TABLE_NAME = '{table_name}' + """.format( + table_name=table_name + ) + + result = connection.execute(query) + return result.fetchone().exists_ + + def get_table_names(self, connection, schema=None, **kwargs): + query = "SELECT TABLE_NAME FROM INFORMATION_SCHEMA.TABLES" + if schema: + query = "{query} WHERE TABLE_SCHEMA = '{schema}'".format( + query=query, schema=schema + ) + + result = connection.execute(query) + return result + + def get_view_names(self, connection, schema=None, **kwargs): + return [] + + def get_table_options(self, connection, table_name, schema=None, **kwargs): + return {} + + def get_columns(self, connection, table_name, schema=None, **kwargs): + # TODO: Check alternative for column_default in below query + query = """ + SELECT COLUMN_NAME, + DATA_TYPE, + IS_NULLABLE + FROM INFORMATION_SCHEMA.COLUMNS + WHERE TABLE_NAME = '{table_name}' + """.format( + table_name=table_name + ) + if schema: + query = "{query} AND TABLE_SCHEMA = '{schema}'".format( + query=query, schema=schema + ) + + result = connection.execute(query) + # y = json.loads(result) + result = result["data"] + return [ + { + "name": row['column_name'], + "type": type_map[row['data_type'].lower()], + "nullable": get_is_nullable(row['is_nullable']) + # "default": get_default(row.COLUMN_DEFAULT), + } + for row in result + ] + + def get_pk_constraint(self, connection, table_name, schema=None, **kwargs): + return {"constrained_columns": [], "name": None} + + def get_foreign_keys(self, connection, table_name, schema=None, **kwargs): + return [] + + def get_check_constraints(self, connection, table_name, schema=None, **kwargs): + return [] + + def get_table_comment(self, connection, table_name, schema=None, **kwargs): + return {"text": ""} + + def get_indexes(self, connection, table_name, schema=None, **kwargs): + return [] + + def get_unique_constraints(self, connection, table_name, schema=None, **kwargs): + return [] + + def get_view_definition(self, connection, view_name, schema=None, **kwargs): + pass + + def do_rollback(self, dbapi_connection): + pass + + def _check_unicode_returns(self, connection, additional_tests=None): + return True + + def _check_unicode_description(self, connection): + return True + + +FireboltHTTPDialect = FireboltDialect + + +class FireboltHTTPSDialect(FireboltDialect): + scheme = "https" + + +def get_is_nullable(column_is_nullable): + return column_is_nullable.lower() == "yes" + + +# TODO check if this method is needed +def get_default(firebolt_column_default): + # currently unused, returns '' + return str(firebolt_column_default) if firebolt_column_default != "" else None diff --git a/superset/firebolt_sqlalchemy_adapter/test_sqlalchemy_adapter/constants.py b/superset/firebolt_sqlalchemy_adapter/test_sqlalchemy_adapter/constants.py new file mode 100644 index 0000000000000..551c97974133a --- /dev/null +++ b/superset/firebolt_sqlalchemy_adapter/test_sqlalchemy_adapter/constants.py @@ -0,0 +1,4 @@ +db_name = 'Sigmoid_Alchemy' +query = 'select * from joining_details' +username = 'aapurva@sigmoidanalytics.com' +password = 'Apurva111' \ No newline at end of file diff --git a/superset/firebolt_sqlalchemy_adapter/test_sqlalchemy_adapter/test_firebolt_api_service.py b/superset/firebolt_sqlalchemy_adapter/test_sqlalchemy_adapter/test_firebolt_api_service.py new file mode 100644 index 0000000000000..cd519b1fc3a2e --- /dev/null +++ b/superset/firebolt_sqlalchemy_adapter/test_sqlalchemy_adapter/test_firebolt_api_service.py @@ -0,0 +1,81 @@ +from sqlalchemy_adapter.firebolt_api_service import FireboltApiService +from sqlalchemy_adapter.test_sqlalchemy_adapter import constants +from requests.exceptions import HTTPError +from sqlalchemy_adapter import exceptions +import pytest + +access_token = FireboltApiService.get_access_token({'username': constants.username, + 'password': constants.password}) +engine_url = FireboltApiService.get_engine_url_by_db(constants.db_name, access_token["access_token"]) + + +class TestFireboltApiService: + + def test_get_connection_success(self): + response = FireboltApiService.get_connection(constants.username, constants.password, constants.db_name) + if type(response) == HTTPError: + assert response.response.status_code == 503 + else: + assert response != "" + + def test_get_connection_invalid_credentials(self): + with pytest.raises(Exception) as e_info: + response = FireboltApiService.get_connection('username', 'password', constants.db_name)[0] + + def test_get_connection_invalid_schema_name(self): + with pytest.raises(Exception) as e_info: + response = FireboltApiService.get_connection(constants.username, constants.password, 'db_name')[1] + + def test_get_access_token_success(self): + assert access_token["access_token"] != "" + + def test_get_access_token_invalid_credentials(self): + with pytest.raises(Exception) as e_info: + response = FireboltApiService.get_access_token({'username': 'username', 'password': 'password'}) + + def test_get_access_token_via_refresh_success(self): + assert FireboltApiService.get_access_token_via_refresh(access_token["refresh_token"]) != "" + + def test_get_access_token_via_refresh_invalid_token(self): + with pytest.raises(Exception) as e_info: + response = FireboltApiService.get_access_token_via_refresh({'refresh_token': 'refresh_token'}) + + def test_get_engine_url_by_db_success(self): + assert engine_url != "" + + def test_get_engine_url_by_db_invalid_schema(self): + with pytest.raises(Exception) as e_info: + response = FireboltApiService.get_engine_url_by_db('db_name', access_token["access_token"]) + + def test_get_engine_url_by_db_invalid_header(self): + with pytest.raises(Exception) as e_info: + response = FireboltApiService.get_engine_url_by_db(constants.db_name, 'header') != "" + + def test_run_query_success(self): + try: + response = FireboltApiService.run_query(access_token["access_token"], access_token["refresh_token"], + engine_url, constants.db_name, + constants.query) + assert response != "" + except exceptions.InternalError as http_err: + assert http_err != "" + + def test_run_query_invalid_url(self): + with pytest.raises(Exception) as e_info: + response = FireboltApiService.run_query(access_token["access_token"], access_token["refresh_token"], "", + constants.db_name, constants.query) != {} + + def test_run_query_invalid_schema(self): + with pytest.raises(Exception) as e_info: + response = FireboltApiService.run_query(access_token["access_token"], access_token["refresh_token"], + engine_url, 'db_name', constants.query) + + def test_run_query_invalid_header(self): + with pytest.raises(Exception) as e_info: + response = FireboltApiService.run_query('header', access_token["refresh_token"], engine_url, constants.db_name, + constants.query) != {} + + def test_run_query_invalid_query(self): + with pytest.raises(Exception) as e_info: + response = FireboltApiService.run_query(access_token["access_token"], access_token["refresh_token"], + engine_url, constants.db_name, 'query') diff --git a/superset/firebolt_sqlalchemy_adapter/test_sqlalchemy_adapter/test_fireboltconnector.py b/superset/firebolt_sqlalchemy_adapter/test_sqlalchemy_adapter/test_fireboltconnector.py new file mode 100644 index 0000000000000..f42e3ffa99643 --- /dev/null +++ b/superset/firebolt_sqlalchemy_adapter/test_sqlalchemy_adapter/test_fireboltconnector.py @@ -0,0 +1,75 @@ +from sqlalchemy_adapter import firebolt_connector + + +class Testfirebolt_connector: + + def test_connect_success(self): + user_email = "aapurva@sigmoidanalytics.com" + password = "Apurva111" + db_name = "Sigmoid_Alchemy" + connection = firebolt_connector.connect(user_email, password, db_name) + assert connection.access_token + assert connection.engine_url + + def test_connect_invalid_credentials(self): + user_email = "aapurva@sigmoidanalytics.com" + password = "wrongpassword" + db_name = "Sigmoid_Alchemy" + connection = firebolt_connector.connect(user_email, password, db_name) + assert not connection.access_token + assert not connection.engine_url + + def test_connect_invalid_database(self): + user_email = "aapurva@sigmoidanalytics.com" + password = "Apurva111" + db_name = "wrongdatabase" + connection = firebolt_connector.connect(user_email, password, db_name) + assert not connection.access_token + assert not connection.engine_url + + def test_get_type(self): + value_1 = "String Value" + value_2_1 = 5 + value_2_2 = 5.1 + value_3_1 = True + value_3_2 = False + assert firebolt_connector.get_type(value_1) == 1 + assert firebolt_connector.get_type(value_2_1) == 2 + assert firebolt_connector.get_type(value_2_2) == 2 + assert firebolt_connector.get_type(value_3_1) == 3 + assert firebolt_connector.get_type(value_3_2) == 3 + # TODO check how to assert/test exceptions + + def test_get_description_from_row(self): + row = {'id': 1, 'name': 'John', 'is_eligible': True} + result = firebolt_connector.get_description_from_row(row) + assert result[0][0] == 'id' + assert result[0][1] == firebolt_connector.Type.NUMBER + assert result[0][6] == False + assert result[1][0] == 'name' + assert result[1][1] == firebolt_connector.Type.STRING + assert result[1][6] == True + assert result[2][0] == 'is_eligible' + assert result[2][1] == firebolt_connector.Type.BOOLEAN + assert result[2][6] == False + + def test_connection_cursor(self): + user_email = "aapurva@sigmoidanalytics.com" + password = "Apurva111" + db_name = "Sigmoid_Alchemy" + connection = firebolt_connector.connect(user_email, password, db_name) + assert len(connection.cursors) == 0 + connection.cursor() + assert len(connection.cursors) > 0 + + def test_connection_execute(self): + user_email = "aapurva@sigmoidanalytics.com" + password = "Apurva111" + db_name = "Sigmoid_Alchemy" + query = "SELECT SCHEMA_NAME FROM INFORMATION_SCHEMA.DATABASES" + connection = firebolt_connector.connect(user_email, password, db_name) + result = connection.execute(query) + assert result['data'][0]['schema_name'] == 'Sigmoid_Alchemy' + + def test_escape(self): + assert firebolt_connector.escape("*") == "*" diff --git a/superset/firebolt_sqlalchemy_adapter/test_sqlalchemy_adapter/test_fireboltdialect.py b/superset/firebolt_sqlalchemy_adapter/test_sqlalchemy_adapter/test_fireboltdialect.py new file mode 100644 index 0000000000000..0e61677873ea4 --- /dev/null +++ b/superset/firebolt_sqlalchemy_adapter/test_sqlalchemy_adapter/test_fireboltdialect.py @@ -0,0 +1,29 @@ +from sqlalchemy_adapter import firebolt_connector +from sqlalchemy_adapter import firebolt_dialect + +class ConnectionDetails(): + user_email = "aapurva@sigmoidanalytics.com" + password = "Apurva111" + db_name = "Sigmoid_Alchemy" + connection = firebolt_connector.connect(user_email, password, db_name) + fireboltDialect = firebolt_dialect.FireboltDialect + +class TestFireboltDialect: + + + + def test_create_connect_args(self): + None + + def test_get_schema_names(self): + # result = fireboltDialect.get_schema_names() + None + + def test_has_table(self): + None + + def test_get_table_names(self): + None + + def get_columns(self): + None \ No newline at end of file From 296ac3d7185584b99318b4c8099e8457279b3e27 Mon Sep 17 00:00:00 2001 From: raghavsharma Date: Tue, 21 Sep 2021 17:33:09 +0530 Subject: [PATCH 02/14] Added db_engine_spec file for Firebolt --- superset/db_engine_specs/firebolt.py | 8 ++++++++ 1 file changed, 8 insertions(+) create mode 100644 superset/db_engine_specs/firebolt.py diff --git a/superset/db_engine_specs/firebolt.py b/superset/db_engine_specs/firebolt.py new file mode 100644 index 0000000000000..1a51716911abe --- /dev/null +++ b/superset/db_engine_specs/firebolt.py @@ -0,0 +1,8 @@ +from superset.db_engine_specs.base import BaseEngineSpec + +class FireboltEngineSpec(BaseEngineSpec): + """Engine spec for Firebolt""" + + engine = "firebolt" + engine_name = "Firebolt" + default_driver = "firebolt" From 21d5ab9d8b15917f0beb72815dff9949282d1501 Mon Sep 17 00:00:00 2001 From: raghavsharma Date: Tue, 21 Sep 2021 18:58:48 +0530 Subject: [PATCH 03/14] Removed firebolt code from superset repo --- .../webdriver/chrome/remote_connection.py | 28 ++ .../firebolt_sqlalchemy_adapter/constants.py | 7 - .../firebolt_sqlalchemy_adapter/exceptions.py | 49 --- .../firebolt_api_service.py | 220 ----------- .../firebolt_connector.py | 370 ------------------ .../firebolt_dialect.py | 209 ---------- .../test_sqlalchemy_adapter/constants.py | 4 - .../test_firebolt_api_service.py | 81 ---- .../test_fireboltconnector.py | 75 ---- .../test_fireboltdialect.py | 29 -- 10 files changed, 28 insertions(+), 1044 deletions(-) create mode 100644 myVenv/lib/python3.9/site-packages/selenium/webdriver/chrome/remote_connection.py delete mode 100644 superset/firebolt_sqlalchemy_adapter/constants.py delete mode 100644 superset/firebolt_sqlalchemy_adapter/exceptions.py delete mode 100644 superset/firebolt_sqlalchemy_adapter/firebolt_api_service.py delete mode 100644 superset/firebolt_sqlalchemy_adapter/firebolt_connector.py delete mode 100644 superset/firebolt_sqlalchemy_adapter/firebolt_dialect.py delete mode 100644 superset/firebolt_sqlalchemy_adapter/test_sqlalchemy_adapter/constants.py delete mode 100644 superset/firebolt_sqlalchemy_adapter/test_sqlalchemy_adapter/test_firebolt_api_service.py delete mode 100644 superset/firebolt_sqlalchemy_adapter/test_sqlalchemy_adapter/test_fireboltconnector.py delete mode 100644 superset/firebolt_sqlalchemy_adapter/test_sqlalchemy_adapter/test_fireboltdialect.py diff --git a/myVenv/lib/python3.9/site-packages/selenium/webdriver/chrome/remote_connection.py b/myVenv/lib/python3.9/site-packages/selenium/webdriver/chrome/remote_connection.py new file mode 100644 index 0000000000000..8774b1aad8efb --- /dev/null +++ b/myVenv/lib/python3.9/site-packages/selenium/webdriver/chrome/remote_connection.py @@ -0,0 +1,28 @@ +# Licensed to the Software Freedom Conservancy (SFC) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The SFC licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from selenium.webdriver.remote.remote_connection import RemoteConnection + + +class ChromeRemoteConnection(RemoteConnection): + + def __init__(self, remote_server_addr, keep_alive=True): + RemoteConnection.__init__(self, remote_server_addr, keep_alive) + self._commands["launchApp"] = ('POST', '/session/$sessionId/chromium/launch_app') + self._commands["setNetworkConditions"] = ('POST', '/session/$sessionId/chromium/network_conditions') + self._commands["getNetworkConditions"] = ('GET', '/session/$sessionId/chromium/network_conditions') + self._commands['executeCdpCommand'] = ('POST', '/session/$sessionId/goog/cdp/execute') diff --git a/superset/firebolt_sqlalchemy_adapter/constants.py b/superset/firebolt_sqlalchemy_adapter/constants.py deleted file mode 100644 index ebaa1dc4d37ca..0000000000000 --- a/superset/firebolt_sqlalchemy_adapter/constants.py +++ /dev/null @@ -1,7 +0,0 @@ -token_url = "https://api.app.firebolt.io/auth/v1/login" -token_header = {"Content-Type": "application/json;charset=UTF-8"} -refresh_url = "https://api.app.firebolt.io/auth/v1/refresh" -query_engine_url = 'https://api.app.firebolt.io/core/v1/account/engines:getURLByDatabaseName' -engine_id_url = "https://api.app.firebolt.io/core/v1/account/engines:getIdbyName" -engine_start_url = 'https://api.app.firebolt.io/core/v1/account/engines/' -default_engine_name = 'sigmoid-alchemy-analytics' \ No newline at end of file diff --git a/superset/firebolt_sqlalchemy_adapter/exceptions.py b/superset/firebolt_sqlalchemy_adapter/exceptions.py deleted file mode 100644 index a0ac3e5dcff63..0000000000000 --- a/superset/firebolt_sqlalchemy_adapter/exceptions.py +++ /dev/null @@ -1,49 +0,0 @@ -from sqlalchemy.exc import CompileError - - -class Error(Exception): - pass - - -class Warning(Exception): - pass - - -class InterfaceError(Error): - pass - - -class DatabaseError(Error): - pass - - -class InternalError(DatabaseError): - pass - - -class OperationalError(DatabaseError): - pass - - -class ProgrammingError(DatabaseError): - pass - - -class IntegrityError(DatabaseError): - pass - - -class DataError(DatabaseError): - pass - - -class NotSupportedError(CompileError): - pass - - -class InvalidCredentialsError(DatabaseError): - pass - - -class SchemaNotFoundError(DatabaseError): - pass diff --git a/superset/firebolt_sqlalchemy_adapter/firebolt_api_service.py b/superset/firebolt_sqlalchemy_adapter/firebolt_api_service.py deleted file mode 100644 index 3232cf1aa8c83..0000000000000 --- a/superset/firebolt_sqlalchemy_adapter/firebolt_api_service.py +++ /dev/null @@ -1,220 +0,0 @@ -import json - -import requests -from requests.exceptions import HTTPError - -from sqlalchemy_adapter import constants, exceptions - - -class FireboltApiService: - - @staticmethod - def get_connection(user_email, password, db_name): - # get access token - token_json = FireboltApiService.get_access_token({'username': user_email, 'password': password}) - access_token = token_json["access_token"] - refresh_token = token_json["refresh_token"] - - # get engine url - engine_url = FireboltApiService.get_engine_url_by_db(db_name, access_token) - return access_token, engine_url, refresh_token - - # retrieve authentication token - """ - This method uses the user email and the password to fire the API to generate access-token. - :input dictionary containing user-email and password - :returns access-token - """ - - @staticmethod - def get_access_token(data): - json_data = {} # base case - payload = {} - try: - - """ - General format of request: - curl --request POST 'https://api.app.firebolt.io/auth/v1/login' --header 'Content-Type: application/json;charset=UTF-8' --data-binary '{"username":"raghavs@sigmoidanalytics.com","password":"Sharma%1"}' - """ - token_response = requests.post(url=constants.token_url, data=json.dumps(data), - headers=constants.token_header) - token_response.raise_for_status() - - """ - General format of response: - - { - "access_token": "YOUR_ACCESS_TOKEN_VALUE", - "expires_in": 86400, - "refresh_token": "YOUR_REFRESH_TOKEN_VALUE", - "scope": "offline_access", - "token_type": "Bearer" - } - """ - - json_data = json.loads(token_response.text) - - except HTTPError as http_err: - payload = { - "error": "Access Token API Exception", - "errorMessage": http_err.response.text, - } - except Exception as err: - payload = { - "error": "Access Token API Exception", - "errorMessage": str(err), - } - - if payload != {}: - msg = "{error} : {errorMessage}".format(**payload) - raise exceptions.InvalidCredentialsError(msg) - - return json_data - - # refresh access token - """ - In case the token expires or the API throws a 401 HTTP error, then this method generates a fresh token - :input refresh api url, request type, authentication header and - the refresh token generated alongside the previous expired token - :returns new access-token - """ - - @staticmethod - def get_access_token_via_refresh(refresh_token): - refresh_access_token = "" - payload = {} - try: - """ - Request: - curl --request POST 'https://api.app.firebolt.io/auth/v1/refresh' \ - --header 'Content-Type: application/json;charset=UTF-8' \ - --data-binary '{"refresh_token":"YOUR_REFRESH_TOKEN_VALUE"}' - """ - data = {'refresh_token': refresh_token} - token_response = requests.post(url=constants.refresh_url, data=json.dumps(data), - headers=constants.token_header) - token_response.raise_for_status() - - """ - Response: - { - "access_token": "YOUR_REFRESHED_ACCESS_TOKEN_VALUE", - "scope": "offline_access", - "expires_in": 86400, - "token_type": "Bearer" - } - """ - - json_data = json.loads(token_response.text) - refresh_access_token = json_data["access_token"] - - except HTTPError as http_err: - payload = { - "error": "Refresh Access Token API Exception", - "errorMessage": http_err.response.text, - } - except Exception as err: - payload = { - "error": "Refresh Access Token API Exception", - "errorMessage": str(err), - } - if payload != {}: - msg = "{error} : {errorMessage}".format(**payload) - raise exceptions.InternalError(msg) - - return refresh_access_token - - # get engine url by db name - """ - This method generates engine url using db name and access-token - :input api url, request type, authentication header and access-token - :returns engine url - """ - - @staticmethod - def get_engine_url_by_db(db_name, access_token): - engine_url = "" # base case - payload = {} - try: - """ - Request: - curl --request GET 'https://api.app.firebolt.io/core/v1/account/engines:getURLByDatabaseName?database_name=YOUR_DATABASE_NAME' \ - --header 'Authorization: Bearer YOUR_ACCESS_TOKEN_VALUE' - """ - header = {'Authorization': "Bearer " + access_token} - query_engine_response = requests.get(constants.query_engine_url, params={'database_name': db_name}, - headers=header) - query_engine_response.raise_for_status() - - """ - Response: - {"engine_url": "YOUR_DATABASES_DEFAULT_ENGINE_URL"} - """ - json_data = json.loads(query_engine_response.text) - engine_url = json_data["engine_url"] - - except HTTPError as http_err: - payload = { - "error": "Engine Url API Exception", - "errorMessage": http_err.response.text, - } - except Exception as err: - payload = { - "error": "Engine Url API Exception", - "errorMessage": str(err), - } - if payload != {}: - msg = "{error} : {errorMessage}".format(**payload) - raise exceptions.SchemaNotFoundError(msg) - - return engine_url - - # run queries - """ - This method is used to submit a query to run to a running engine. - You can specify multiple queries separated by a semicolon (;).. - :input token url, request type of API and authentication header - :returns access-token - """ - - @staticmethod - def run_query(access_token, refresh_token, engine_url, db_name, query): - query_response = {} # base-case - payload = {} - try: - - """ - Request: - echo "SELECT * FROM lineitem LIMIT 1000" | curl - --request POST 'https://YOUR_ENGINE_ENDPOINT/?database=YOUR_DATABASE_NAME' \ - --header 'Authorization: Bearer YOUR_ACCESS_TOKEN_VALUE' \ - --data-binary @- - """ - - header = {'Authorization': "Bearer " + access_token} - query_response = requests.post(url="https://" + engine_url, params={'database': db_name}, - headers=header, files={"query": (None, query)}) - if type(query_response) == HTTPError and \ - query_response.response.status_code == 401: # check for access token expiry - access_token = FireboltApiService.get_access_token_via_refresh(refresh_token) - header = {'Authorization': "Bearer " + access_token} - query_response = requests.post(url="https://" + engine_url, params={'database': db_name}, - headers=header, files={"query": (None, query)}) - query_response.raise_for_status() - - except HTTPError as http_err: - payload = { - "error": "Run Query API Exception", - "errorMessage": http_err.response.text, - } - except Exception as err: - payload = { - "error": "Run Query API Exception", - "errorMessage": str(err), - } - if payload != {}: - msg = "{error} : {errorMessage}".format(**payload) - raise exceptions.InternalError(msg) - - # return json_data - return query_response diff --git a/superset/firebolt_sqlalchemy_adapter/firebolt_connector.py b/superset/firebolt_sqlalchemy_adapter/firebolt_connector.py deleted file mode 100644 index d673f38dd1f33..0000000000000 --- a/superset/firebolt_sqlalchemy_adapter/firebolt_connector.py +++ /dev/null @@ -1,370 +0,0 @@ -#!/usr/bin/env python -# -# See http://www.python.org/dev/peps/pep-0249/ -# -# Many docstrings in this file are based on the PEP, which is in the public domain. - -# Built as per Python DB API Specification - PEP 249 -# Responsible for connection to Database and providing database cursor for query execution -# Connector is imported and used by Dialect to get connection - -import itertools -import json -from collections import namedtuple, OrderedDict -from requests.exceptions import HTTPError - -import requests -from sqlalchemy_adapter.firebolt_api_service import FireboltApiService - -from sqlalchemy_adapter import exceptions - - -class Type(object): - STRING = 1 - NUMBER = 2 - BOOLEAN = 3 - - -# @check_valid_connection -def connect(user_email, password, db_name): - """ - Constructor for creating a connection to the database. - - >>> connection = connect('user_email','password','db_name') - >>> cursor = connection.cursor() - >>> response = cursor.execute('select * from ').fetchall() - - """ - connection = Connection(user_email, password, db_name) - # if connection.engine_url == "": - # connection.errors.append(connection.access_token) - return connection - - -def check_closed(f): - """Decorator that checks if connection/cursor is closed.""" - - def g(self, *args, **kwargs): - if self.closed: - raise exceptions.Error( - "{klass} already closed".format(klass=self.__class__.__name__) - ) - return f(self, *args, **kwargs) - - return g - - -def check_result(f): - """Decorator that checks if the cursor has results from `execute`.""" - - def g(self, *args, **kwargs): - if self._results is None: - raise exceptions.Error("Called before `execute`") - return f(self, *args, **kwargs) - - return g - - -# def check_valid_connection(f): -# """Decorator that checks if connection has been created successfully.""" -# -# def g(self, *args, **kwargs): -# if type(self.access_token) != dict or type(self.engine_url) == "": -# raise exceptions.Error("Invalid connection parameters") -# return f(self, *args, **kwargs) -# -# return g - - -def get_description_from_row(row): - """ - Return description from a single row. - - We only return the name, type (inferred from the data) and if the values - can be NULL. String columns in Firebolt are NULLable. Numeric columns are NOT - NULL. - """ - return [ - ( - name, # name - get_type(value), # type_code - None, # [display_size] - None, # [internal_size] - None, # [precision] - None, # [scale] - get_type(value) == Type.STRING, # [null_ok] - ) - for name, value in row.items() - ] - - -def get_type(value): - """ - Infer type from value. - - Note that bool is a subclass of int so order of statements matter. - """ - - if isinstance(value, str) or value is None: - return Type.STRING - elif isinstance(value, bool): - return Type.BOOLEAN - elif isinstance(value, (int, float)): - return Type.NUMBER - - raise exceptions.Error("Value of unknown type: {value}".format(value=value)) - - -class Connection(object): - """Connection to a Firebolt database.""" - - def __init__(self, user_email, password, db_name): - self._user_email = user_email - self._password = password - self._db_name = db_name - - connection_details = FireboltApiService.get_connection(user_email, password, db_name) - - # if connection_details[1] == "": - # raise exceptions.InvalidCredentialsError("Invalid credentials or Database name") - self.access_token = connection_details[0] - self.engine_url = connection_details[1] - self.refresh_token = connection_details[2] - self.cursors = [] - self.closed = False - - @check_closed - def close(self): - """Close the connection now.""" - self.closed = True - for cursor in self.cursors: - try: - cursor.close() - except exceptions.Error: - pass # already closed - - @check_closed - def commit(self): - """ - Commit any pending transaction to the database. - - Not supported. - """ - pass - - @check_closed - def cursor(self): - """Return a new Cursor Object using the connection.""" - cursor = Cursor( - self._db_name, - self.access_token, - self.engine_url, - self.refresh_token - ) - - self.cursors.append(cursor) - - return cursor - - @check_closed - def execute(self, query): - cursor = self.cursor() - return cursor.execute(query) - - def __enter__(self): - return self.cursor() - - def __exit__(self, *exc): - self.close() - - -class Cursor(object): - """Connection cursor.""" - - def __init__(self, db_name, access_token, engine_url, refresh_token): - - self._db_name = db_name - self._access_token = access_token - self._engine_url = engine_url - self._refresh_token = refresh_token - self.closed = False - - # This read/write attribute specifies the number of rows to fetch at a - # time with .fetchmany(). It defaults to 1 meaning to fetch a single - # row at a time. - self.arraysize = 1 - - self.closed = False - - # this is updated only after a query - self.description = None - - # this is set to an iterator after a successfull query - self._results = None - self.header = False - - @property - @check_result - @check_closed - def rowcount(self): - # consume the iterator - results = list(self._results) - n = len(results) - self._results = iter(results) - return n - - @check_closed - def close(self): - """Close the cursor.""" - self.closed = True - - @check_closed - def execute(self, query): - # def execute(self, operation, parameters=None): - # query = apply_parameters(operation, parameters) - results = self._stream_query(query) - - """ - `_stream_query` returns a generator that produces the rows; we need to - consume the first row so that `description` is properly set, so let's - consume it and insert it back if it is not the header. - """ - try: - first_row = next(results) - self._results = ( - results if self.header else itertools.chain([first_row], results) - ) - except StopIteration: - self._results = iter([]) - return self - - @check_closed - def executemany(self, operation, seq_of_parameters=None): - raise exceptions.NotSupportedError( - "`executemany` is not supported, use `execute` instead" - ) - - @check_result - @check_closed - def fetchone(self): - """ - Fetch the next row of a query result set, returning a single sequence, - or `None` when no more data is available. - """ - try: - return self.next() - except StopIteration: - return None - - @check_result - @check_closed - def fetchmany(self, size=None): - """ - Fetch the next set of rows of a query result, returning a sequence of - sequences (e.g. a list of tuples). An empty sequence is returned when - no more rows are available. - """ - size = size or self.arraysize - return list(itertools.islice(self._results, size)) - - @check_result - @check_closed - def fetchall(self): - """ - Fetch all (remaining) rows of a query result, returning them as a - sequence of sequences (e.g. a list of tuples). Note that the cursor's - arraysize attribute can affect the performance of this operation. - """ - return list(self._results) - - @check_closed - def setinputsizes(self, sizes): - # not supported - pass - - @check_closed - def setoutputsizes(self, sizes): - # not supported - pass - - @check_closed - def __iter__(self): - return self - - @check_closed - def __next__(self): - return next(self._results) - - next = __next__ - - def _stream_query(self, query): - """ - Stream rows from a query. - - This method will yield rows as the data is returned in chunks from the - server. - """ - self.description = None - - r = FireboltApiService.run_query(self._access_token, self._refresh_token, self._engine_url, self._db_name, query) - - # Setting `chunk_size` to `None` makes it use the server size - chunks = r.iter_content(chunk_size=None, decode_unicode=True) - Row = None - for row in rows_from_chunks(chunks): - # TODO Check if row description has to be set - # # update description - # if self.description is None: - # self.description = ( - # list(row.items()) if self.header else get_description_from_row(row) - # ) - - # return row in namedtuple - if Row is None: - Row = namedtuple("Row", row.keys(), rename=True) - yield Row(*row.values()) - - -def rows_from_chunks(chunks): - """ - A generator that yields rows from JSON chunks. - - Firebolt will return the data in chunks, but they are not aligned with the - JSON objects. This function will parse all complete rows inside each chunk, - yielding them as soon as possible. - """ - body = "" - # count = 1 - squareBrackets = 0 - dataStartpos = 0 - dataEndPos = 0 - inString = False - for chunk in chunks: - # print("Chunk:", count) # Code for testing response being processed in - # count = count + 1 - - if chunk: - body = "".join((body, chunk)) - for i, char in enumerate(body): - if char == '"': - if not inString: - inString = True - else: - inString = False - - if not inString: - if char == '[': - squareBrackets +=1 - if squareBrackets == 2: - dataStartpos = i+1 - if char == ']' and squareBrackets == 2: - dataEndPos = i - break - - rows = body[dataStartpos:dataEndPos].lstrip().rstrip() - # print(rows) - - for row in json.loads( - "[{rows}]".format(rows=rows), object_pairs_hook=OrderedDict - ): - yield row diff --git a/superset/firebolt_sqlalchemy_adapter/firebolt_dialect.py b/superset/firebolt_sqlalchemy_adapter/firebolt_dialect.py deleted file mode 100644 index 9701fd1178531..0000000000000 --- a/superset/firebolt_sqlalchemy_adapter/firebolt_dialect.py +++ /dev/null @@ -1,209 +0,0 @@ -import json - -from sqlalchemy import types -from sqlalchemy.engine import default -from sqlalchemy.sql import compiler -import sqlalchemy_adapter - -# Firebolt data types compatibility with sqlalchemy.sql.types -type_map = { - "char": types.String, - "text": types.String, - "varchar": types.String, - "string": types.String, - "float": types.Float, - "double": types.Float, - "double precision": types.Float, - "boolean": types.Boolean, - "int": types.BigInteger, - "integer": types.BigInteger, - "bigint": types.BigInteger, - "long": types.BigInteger, - "timestamp": types.TIMESTAMP, - "datetime": types.TIMESTAMP, - "date": types.DATE, - "array": types.ARRAY, -} - - -class UniversalSet(object): - def __contains__(self, item): - return True - - -class FireboltIdentifierPreparer(compiler.IdentifierPreparer): - reserved_words = UniversalSet() - - -# TODO: Check if SQLCompiler is fine or any other compiler like postgres needs to be inherited -class FireboltCompiler(compiler.SQLCompiler): - pass - - -class FireboltTypeCompiler(compiler.GenericTypeCompiler): - def visit_DOUBLEPRECISION(self, type_, **kwargs): - return "FLOAT" - - def visit_ARRAY(self, type, **kwargs): - return "Array(%s)" % type - - -""" -FireboltDialect defines the behavior of Firebolt database and DB-API combination. -It is responsible for metadata definition and firing queries for receiving Database schema and table information. -""" - - -# TODO: check dialect attribute values - -class FireboltDialect(default.DefaultDialect): - name = "firebolt" - scheme = "http" - driver = "rest" - user = None - password = None - preparer = FireboltIdentifierPreparer - statement_compiler = FireboltCompiler - type_compiler = FireboltTypeCompiler - supports_alter = False - supports_pk_autoincrement = False - supports_default_values = False - supports_empty_insert = False - supports_unicode_statements = True - supports_unicode_binds = True - returns_unicode_strings = True - description_encoding = None - supports_native_boolean = True - - def __init__(self, context=None, *args, **kwargs): - super(FireboltDialect, self).__init__(*args, **kwargs) - self.context = context or {} - - @classmethod - def dbapi(cls): - return sqlalchemy_adapter - - # Build DB-API compatible connection arguments. - def create_connect_args(self, url): - kwargs = { - "host": url.host, - "port": url.port or 8082, - "user": url.username or None, - "password": url.password or None, - "path": url.database, - "scheme": self.scheme, - "context": self.context, - "header": url.query.get("header") == "true", - } - return ([], kwargs) - - def get_schema_names(self, connection, **kwargs): - result = connection.execute( - "SELECT SCHEMA_NAME FROM INFORMATION_SCHEMA.DATABASES" - ) - - return result - - def has_table(self, connection, table_name, schema=None): - query = """ - SELECT COUNT(*) > 0 AS exists_ - FROM INFORMATION_SCHEMA.TABLES - WHERE TABLE_NAME = '{table_name}' - """.format( - table_name=table_name - ) - - result = connection.execute(query) - return result.fetchone().exists_ - - def get_table_names(self, connection, schema=None, **kwargs): - query = "SELECT TABLE_NAME FROM INFORMATION_SCHEMA.TABLES" - if schema: - query = "{query} WHERE TABLE_SCHEMA = '{schema}'".format( - query=query, schema=schema - ) - - result = connection.execute(query) - return result - - def get_view_names(self, connection, schema=None, **kwargs): - return [] - - def get_table_options(self, connection, table_name, schema=None, **kwargs): - return {} - - def get_columns(self, connection, table_name, schema=None, **kwargs): - # TODO: Check alternative for column_default in below query - query = """ - SELECT COLUMN_NAME, - DATA_TYPE, - IS_NULLABLE - FROM INFORMATION_SCHEMA.COLUMNS - WHERE TABLE_NAME = '{table_name}' - """.format( - table_name=table_name - ) - if schema: - query = "{query} AND TABLE_SCHEMA = '{schema}'".format( - query=query, schema=schema - ) - - result = connection.execute(query) - # y = json.loads(result) - result = result["data"] - return [ - { - "name": row['column_name'], - "type": type_map[row['data_type'].lower()], - "nullable": get_is_nullable(row['is_nullable']) - # "default": get_default(row.COLUMN_DEFAULT), - } - for row in result - ] - - def get_pk_constraint(self, connection, table_name, schema=None, **kwargs): - return {"constrained_columns": [], "name": None} - - def get_foreign_keys(self, connection, table_name, schema=None, **kwargs): - return [] - - def get_check_constraints(self, connection, table_name, schema=None, **kwargs): - return [] - - def get_table_comment(self, connection, table_name, schema=None, **kwargs): - return {"text": ""} - - def get_indexes(self, connection, table_name, schema=None, **kwargs): - return [] - - def get_unique_constraints(self, connection, table_name, schema=None, **kwargs): - return [] - - def get_view_definition(self, connection, view_name, schema=None, **kwargs): - pass - - def do_rollback(self, dbapi_connection): - pass - - def _check_unicode_returns(self, connection, additional_tests=None): - return True - - def _check_unicode_description(self, connection): - return True - - -FireboltHTTPDialect = FireboltDialect - - -class FireboltHTTPSDialect(FireboltDialect): - scheme = "https" - - -def get_is_nullable(column_is_nullable): - return column_is_nullable.lower() == "yes" - - -# TODO check if this method is needed -def get_default(firebolt_column_default): - # currently unused, returns '' - return str(firebolt_column_default) if firebolt_column_default != "" else None diff --git a/superset/firebolt_sqlalchemy_adapter/test_sqlalchemy_adapter/constants.py b/superset/firebolt_sqlalchemy_adapter/test_sqlalchemy_adapter/constants.py deleted file mode 100644 index 551c97974133a..0000000000000 --- a/superset/firebolt_sqlalchemy_adapter/test_sqlalchemy_adapter/constants.py +++ /dev/null @@ -1,4 +0,0 @@ -db_name = 'Sigmoid_Alchemy' -query = 'select * from joining_details' -username = 'aapurva@sigmoidanalytics.com' -password = 'Apurva111' \ No newline at end of file diff --git a/superset/firebolt_sqlalchemy_adapter/test_sqlalchemy_adapter/test_firebolt_api_service.py b/superset/firebolt_sqlalchemy_adapter/test_sqlalchemy_adapter/test_firebolt_api_service.py deleted file mode 100644 index cd519b1fc3a2e..0000000000000 --- a/superset/firebolt_sqlalchemy_adapter/test_sqlalchemy_adapter/test_firebolt_api_service.py +++ /dev/null @@ -1,81 +0,0 @@ -from sqlalchemy_adapter.firebolt_api_service import FireboltApiService -from sqlalchemy_adapter.test_sqlalchemy_adapter import constants -from requests.exceptions import HTTPError -from sqlalchemy_adapter import exceptions -import pytest - -access_token = FireboltApiService.get_access_token({'username': constants.username, - 'password': constants.password}) -engine_url = FireboltApiService.get_engine_url_by_db(constants.db_name, access_token["access_token"]) - - -class TestFireboltApiService: - - def test_get_connection_success(self): - response = FireboltApiService.get_connection(constants.username, constants.password, constants.db_name) - if type(response) == HTTPError: - assert response.response.status_code == 503 - else: - assert response != "" - - def test_get_connection_invalid_credentials(self): - with pytest.raises(Exception) as e_info: - response = FireboltApiService.get_connection('username', 'password', constants.db_name)[0] - - def test_get_connection_invalid_schema_name(self): - with pytest.raises(Exception) as e_info: - response = FireboltApiService.get_connection(constants.username, constants.password, 'db_name')[1] - - def test_get_access_token_success(self): - assert access_token["access_token"] != "" - - def test_get_access_token_invalid_credentials(self): - with pytest.raises(Exception) as e_info: - response = FireboltApiService.get_access_token({'username': 'username', 'password': 'password'}) - - def test_get_access_token_via_refresh_success(self): - assert FireboltApiService.get_access_token_via_refresh(access_token["refresh_token"]) != "" - - def test_get_access_token_via_refresh_invalid_token(self): - with pytest.raises(Exception) as e_info: - response = FireboltApiService.get_access_token_via_refresh({'refresh_token': 'refresh_token'}) - - def test_get_engine_url_by_db_success(self): - assert engine_url != "" - - def test_get_engine_url_by_db_invalid_schema(self): - with pytest.raises(Exception) as e_info: - response = FireboltApiService.get_engine_url_by_db('db_name', access_token["access_token"]) - - def test_get_engine_url_by_db_invalid_header(self): - with pytest.raises(Exception) as e_info: - response = FireboltApiService.get_engine_url_by_db(constants.db_name, 'header') != "" - - def test_run_query_success(self): - try: - response = FireboltApiService.run_query(access_token["access_token"], access_token["refresh_token"], - engine_url, constants.db_name, - constants.query) - assert response != "" - except exceptions.InternalError as http_err: - assert http_err != "" - - def test_run_query_invalid_url(self): - with pytest.raises(Exception) as e_info: - response = FireboltApiService.run_query(access_token["access_token"], access_token["refresh_token"], "", - constants.db_name, constants.query) != {} - - def test_run_query_invalid_schema(self): - with pytest.raises(Exception) as e_info: - response = FireboltApiService.run_query(access_token["access_token"], access_token["refresh_token"], - engine_url, 'db_name', constants.query) - - def test_run_query_invalid_header(self): - with pytest.raises(Exception) as e_info: - response = FireboltApiService.run_query('header', access_token["refresh_token"], engine_url, constants.db_name, - constants.query) != {} - - def test_run_query_invalid_query(self): - with pytest.raises(Exception) as e_info: - response = FireboltApiService.run_query(access_token["access_token"], access_token["refresh_token"], - engine_url, constants.db_name, 'query') diff --git a/superset/firebolt_sqlalchemy_adapter/test_sqlalchemy_adapter/test_fireboltconnector.py b/superset/firebolt_sqlalchemy_adapter/test_sqlalchemy_adapter/test_fireboltconnector.py deleted file mode 100644 index f42e3ffa99643..0000000000000 --- a/superset/firebolt_sqlalchemy_adapter/test_sqlalchemy_adapter/test_fireboltconnector.py +++ /dev/null @@ -1,75 +0,0 @@ -from sqlalchemy_adapter import firebolt_connector - - -class Testfirebolt_connector: - - def test_connect_success(self): - user_email = "aapurva@sigmoidanalytics.com" - password = "Apurva111" - db_name = "Sigmoid_Alchemy" - connection = firebolt_connector.connect(user_email, password, db_name) - assert connection.access_token - assert connection.engine_url - - def test_connect_invalid_credentials(self): - user_email = "aapurva@sigmoidanalytics.com" - password = "wrongpassword" - db_name = "Sigmoid_Alchemy" - connection = firebolt_connector.connect(user_email, password, db_name) - assert not connection.access_token - assert not connection.engine_url - - def test_connect_invalid_database(self): - user_email = "aapurva@sigmoidanalytics.com" - password = "Apurva111" - db_name = "wrongdatabase" - connection = firebolt_connector.connect(user_email, password, db_name) - assert not connection.access_token - assert not connection.engine_url - - def test_get_type(self): - value_1 = "String Value" - value_2_1 = 5 - value_2_2 = 5.1 - value_3_1 = True - value_3_2 = False - assert firebolt_connector.get_type(value_1) == 1 - assert firebolt_connector.get_type(value_2_1) == 2 - assert firebolt_connector.get_type(value_2_2) == 2 - assert firebolt_connector.get_type(value_3_1) == 3 - assert firebolt_connector.get_type(value_3_2) == 3 - # TODO check how to assert/test exceptions - - def test_get_description_from_row(self): - row = {'id': 1, 'name': 'John', 'is_eligible': True} - result = firebolt_connector.get_description_from_row(row) - assert result[0][0] == 'id' - assert result[0][1] == firebolt_connector.Type.NUMBER - assert result[0][6] == False - assert result[1][0] == 'name' - assert result[1][1] == firebolt_connector.Type.STRING - assert result[1][6] == True - assert result[2][0] == 'is_eligible' - assert result[2][1] == firebolt_connector.Type.BOOLEAN - assert result[2][6] == False - - def test_connection_cursor(self): - user_email = "aapurva@sigmoidanalytics.com" - password = "Apurva111" - db_name = "Sigmoid_Alchemy" - connection = firebolt_connector.connect(user_email, password, db_name) - assert len(connection.cursors) == 0 - connection.cursor() - assert len(connection.cursors) > 0 - - def test_connection_execute(self): - user_email = "aapurva@sigmoidanalytics.com" - password = "Apurva111" - db_name = "Sigmoid_Alchemy" - query = "SELECT SCHEMA_NAME FROM INFORMATION_SCHEMA.DATABASES" - connection = firebolt_connector.connect(user_email, password, db_name) - result = connection.execute(query) - assert result['data'][0]['schema_name'] == 'Sigmoid_Alchemy' - - def test_escape(self): - assert firebolt_connector.escape("*") == "*" diff --git a/superset/firebolt_sqlalchemy_adapter/test_sqlalchemy_adapter/test_fireboltdialect.py b/superset/firebolt_sqlalchemy_adapter/test_sqlalchemy_adapter/test_fireboltdialect.py deleted file mode 100644 index 0e61677873ea4..0000000000000 --- a/superset/firebolt_sqlalchemy_adapter/test_sqlalchemy_adapter/test_fireboltdialect.py +++ /dev/null @@ -1,29 +0,0 @@ -from sqlalchemy_adapter import firebolt_connector -from sqlalchemy_adapter import firebolt_dialect - -class ConnectionDetails(): - user_email = "aapurva@sigmoidanalytics.com" - password = "Apurva111" - db_name = "Sigmoid_Alchemy" - connection = firebolt_connector.connect(user_email, password, db_name) - fireboltDialect = firebolt_dialect.FireboltDialect - -class TestFireboltDialect: - - - - def test_create_connect_args(self): - None - - def test_get_schema_names(self): - # result = fireboltDialect.get_schema_names() - None - - def test_has_table(self): - None - - def test_get_table_names(self): - None - - def get_columns(self): - None \ No newline at end of file From 027913f161baeac57fb6a132df8e4f01ab9e2b40 Mon Sep 17 00:00:00 2001 From: raghavSharmaSigmoid <88667094+raghavSharmaSigmoid@users.noreply.github.com> Date: Tue, 21 Sep 2021 19:01:45 +0530 Subject: [PATCH 04/14] Deleted virtual env commit --- .../webdriver/chrome/remote_connection.py | 28 ------------------- 1 file changed, 28 deletions(-) delete mode 100644 myVenv/lib/python3.9/site-packages/selenium/webdriver/chrome/remote_connection.py diff --git a/myVenv/lib/python3.9/site-packages/selenium/webdriver/chrome/remote_connection.py b/myVenv/lib/python3.9/site-packages/selenium/webdriver/chrome/remote_connection.py deleted file mode 100644 index 8774b1aad8efb..0000000000000 --- a/myVenv/lib/python3.9/site-packages/selenium/webdriver/chrome/remote_connection.py +++ /dev/null @@ -1,28 +0,0 @@ -# Licensed to the Software Freedom Conservancy (SFC) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The SFC licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -from selenium.webdriver.remote.remote_connection import RemoteConnection - - -class ChromeRemoteConnection(RemoteConnection): - - def __init__(self, remote_server_addr, keep_alive=True): - RemoteConnection.__init__(self, remote_server_addr, keep_alive) - self._commands["launchApp"] = ('POST', '/session/$sessionId/chromium/launch_app') - self._commands["setNetworkConditions"] = ('POST', '/session/$sessionId/chromium/network_conditions') - self._commands["getNetworkConditions"] = ('GET', '/session/$sessionId/chromium/network_conditions') - self._commands['executeCdpCommand'] = ('POST', '/session/$sessionId/goog/cdp/execute') From 5d9d581fc39cc85121c2a5c69178ca8c647ca3f3 Mon Sep 17 00:00:00 2001 From: apurva-sigmoid <89530372+apurva-sigmoid@users.noreply.github.com> Date: Thu, 23 Sep 2021 18:48:59 +0530 Subject: [PATCH 05/14] Adding time grain changes to firebolt.py --- superset/db_engine_specs/firebolt.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/superset/db_engine_specs/firebolt.py b/superset/db_engine_specs/firebolt.py index 1a51716911abe..98d8d30c3093c 100644 --- a/superset/db_engine_specs/firebolt.py +++ b/superset/db_engine_specs/firebolt.py @@ -6,3 +6,15 @@ class FireboltEngineSpec(BaseEngineSpec): engine = "firebolt" engine_name = "Firebolt" default_driver = "firebolt" + + _time_grain_expressions = { + None: "{col}", + "PT1S": "date_trunc('second', CAST({col} AS TIMESTAMP))", + "PT1M": "date_trunc('minute', CAST({col} AS TIMESTAMP))", + "PT1H": "date_trunc('hour', CAST({col} AS TIMESTAMP))", + "P1D": "date_trunc('day', CAST({col} AS TIMESTAMP))", + "P1W": "date_trunc('week', CAST({col} AS TIMESTAMP))", + "P1M": "date_trunc('month', CAST({col} AS TIMESTAMP))", + "P0.25Y": "date_trunc('quarter', CAST({col} AS TIMESTAMP))", + "P1Y": "date_trunc('year', CAST({col} AS TIMESTAMP))", + } From 45c5072649a7926fc4d15d2eb0b9cc5a89e5d7b2 Mon Sep 17 00:00:00 2001 From: apurva-sigmoid <89530372+apurva-sigmoid@users.noreply.github.com> Date: Tue, 28 Sep 2021 19:23:14 +0530 Subject: [PATCH 06/14] Updated README.md Added steps to install and run Superset with Firebolt SQLAlchemy Adapter --- README.md | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/README.md b/README.md index 443c48c6aabab..403c2f1bb93e4 100644 --- a/README.md +++ b/README.md @@ -147,6 +147,40 @@ Interested in contributing? Check out our to find resources around contributing along with a detailed guide on how to set up a development environment. +## Installation Steps for Superset+Firebolt SQLAlchemy Adapter + +### Pre-requisites + +1. Git - https://www.atlassian.com/git/tutorials/install-git +2. Docker - https://docs.docker.com/engine/install/ +3. Python3 + +### Firebolt SQLAlchemy installation steps + +````bash +# 1. In your terminal, run below commands to clone superset and switch to firebolt-integration branch +$ cd $HOME +$ git clone https://github.com/raghavSharmaSigmoid/superset.git +$ cd superset +$ git checkout firebolt-integration + +# 2. Run below commands to clone firebolt adapter into superset folder +$ cd superset +$ git clone https://github.com/raghavSharmaSigmoid/firebolt-sqlalchemy.git + +# 3. Run below commands to build the firebolt adapter +$ cd firebolt-sqlalchemy +$ python3 -m build + +# 4. Run below commands to create local requirements text file which has firebolt adapter path +$ cd $HOME/superset +$ touch ./docker/requirements-local.txt +$ echo "firebolt-sqlalchemy@file://localhost/app/superset/firebolt-sqlalchemy/dist/firebolt-sqlalchemy-0.0.1.tar.gz#egg=firebolt-sqlalchemy" >> ./docker/requirements-local.txt + +# 5. Run below command to start docker +$ docker-compose up +```` + ## Resources - Superset 1.0 From 5ed17c7a4545998ed07693c93738ebe7bff2580e Mon Sep 17 00:00:00 2001 From: apurva-sigmoid <89530372+apurva-sigmoid@users.noreply.github.com> Date: Wed, 29 Sep 2021 16:57:53 +0530 Subject: [PATCH 07/14] Update README.md Reduced installation steps. Using PyPi installation for adapter now --- README.md | 16 ++++------------ 1 file changed, 4 insertions(+), 12 deletions(-) diff --git a/README.md b/README.md index 403c2f1bb93e4..e42e33d0fe380 100644 --- a/README.md +++ b/README.md @@ -164,20 +164,12 @@ $ git clone https://github.com/raghavSharmaSigmoid/superset.git $ cd superset $ git checkout firebolt-integration -# 2. Run below commands to clone firebolt adapter into superset folder -$ cd superset -$ git clone https://github.com/raghavSharmaSigmoid/firebolt-sqlalchemy.git - -# 3. Run below commands to build the firebolt adapter -$ cd firebolt-sqlalchemy -$ python3 -m build - -# 4. Run below commands to create local requirements text file which has firebolt adapter path -$ cd $HOME/superset +# 2. Run below commands to create local requirements text file which has firebolt adapter path $ touch ./docker/requirements-local.txt -$ echo "firebolt-sqlalchemy@file://localhost/app/superset/firebolt-sqlalchemy/dist/firebolt-sqlalchemy-0.0.1.tar.gz#egg=firebolt-sqlalchemy" >> ./docker/requirements-local.txt +$ echo "firebolt-sqlalchemy" >> ./docker/requirements-local.txt -# 5. Run below command to start docker +# 3. Run below command to build and start docker +$ docker-compose build --force-rm $ docker-compose up ```` From acee35200ad0c7bda15bc07db2b92770dd3e5885 Mon Sep 17 00:00:00 2001 From: Apurva Anand Date: Wed, 29 Sep 2021 18:12:31 +0530 Subject: [PATCH 08/14] Revert "Update README.md" This reverts commit 5ed17c7a4545998ed07693c93738ebe7bff2580e. --- README.md | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index e42e33d0fe380..403c2f1bb93e4 100644 --- a/README.md +++ b/README.md @@ -164,12 +164,20 @@ $ git clone https://github.com/raghavSharmaSigmoid/superset.git $ cd superset $ git checkout firebolt-integration -# 2. Run below commands to create local requirements text file which has firebolt adapter path +# 2. Run below commands to clone firebolt adapter into superset folder +$ cd superset +$ git clone https://github.com/raghavSharmaSigmoid/firebolt-sqlalchemy.git + +# 3. Run below commands to build the firebolt adapter +$ cd firebolt-sqlalchemy +$ python3 -m build + +# 4. Run below commands to create local requirements text file which has firebolt adapter path +$ cd $HOME/superset $ touch ./docker/requirements-local.txt -$ echo "firebolt-sqlalchemy" >> ./docker/requirements-local.txt +$ echo "firebolt-sqlalchemy@file://localhost/app/superset/firebolt-sqlalchemy/dist/firebolt-sqlalchemy-0.0.1.tar.gz#egg=firebolt-sqlalchemy" >> ./docker/requirements-local.txt -# 3. Run below command to build and start docker -$ docker-compose build --force-rm +# 5. Run below command to start docker $ docker-compose up ```` From 62e82ab8d60091465715d53705994d862887be0a Mon Sep 17 00:00:00 2001 From: Apurva Anand Date: Wed, 29 Sep 2021 18:12:53 +0530 Subject: [PATCH 09/14] Revert "Updated README.md" This reverts commit 45c5072649a7926fc4d15d2eb0b9cc5a89e5d7b2. --- README.md | 34 ---------------------------------- 1 file changed, 34 deletions(-) diff --git a/README.md b/README.md index 403c2f1bb93e4..443c48c6aabab 100644 --- a/README.md +++ b/README.md @@ -147,40 +147,6 @@ Interested in contributing? Check out our to find resources around contributing along with a detailed guide on how to set up a development environment. -## Installation Steps for Superset+Firebolt SQLAlchemy Adapter - -### Pre-requisites - -1. Git - https://www.atlassian.com/git/tutorials/install-git -2. Docker - https://docs.docker.com/engine/install/ -3. Python3 - -### Firebolt SQLAlchemy installation steps - -````bash -# 1. In your terminal, run below commands to clone superset and switch to firebolt-integration branch -$ cd $HOME -$ git clone https://github.com/raghavSharmaSigmoid/superset.git -$ cd superset -$ git checkout firebolt-integration - -# 2. Run below commands to clone firebolt adapter into superset folder -$ cd superset -$ git clone https://github.com/raghavSharmaSigmoid/firebolt-sqlalchemy.git - -# 3. Run below commands to build the firebolt adapter -$ cd firebolt-sqlalchemy -$ python3 -m build - -# 4. Run below commands to create local requirements text file which has firebolt adapter path -$ cd $HOME/superset -$ touch ./docker/requirements-local.txt -$ echo "firebolt-sqlalchemy@file://localhost/app/superset/firebolt-sqlalchemy/dist/firebolt-sqlalchemy-0.0.1.tar.gz#egg=firebolt-sqlalchemy" >> ./docker/requirements-local.txt - -# 5. Run below command to start docker -$ docker-compose up -```` - ## Resources - Superset 1.0 From 86c260336c324ee58be2afeba0596100439173b2 Mon Sep 17 00:00:00 2001 From: Apurva Anand Date: Wed, 29 Sep 2021 22:17:11 +0530 Subject: [PATCH 10/14] added epoch methods, added test cases for firebolt db engine spec and edited setup.py --- setup.py | 1 + superset/db_engine_specs/firebolt.py | 20 ++++++++++++++++ .../db_engine_specs/firebolt_tests.py | 23 +++++++++++++++++++ 3 files changed, 44 insertions(+) create mode 100644 tests/integration_tests/db_engine_specs/firebolt_tests.py diff --git a/setup.py b/setup.py index 567e31daf7736..c9e858124b400 100644 --- a/setup.py +++ b/setup.py @@ -134,6 +134,7 @@ def get_git_sha() -> str: "exasol": ["sqlalchemy-exasol>=2.1.0, <2.2"], "excel": ["xlrd>=1.2.0, <1.3"], "firebird": ["sqlalchemy-firebird>=0.7.0, <0.8"], + "firebolt": ["firebolt-sqlalchemy>=0.0.1"], "gsheets": ["shillelagh[gsheetsapi]>=1.0.3, <2"], "hana": ["hdbcli==2.4.162", "sqlalchemy_hana==0.4.0"], "hive": ["pyhive[hive]>=0.6.1", "tableschema", "thrift>=0.11.0, <1.0.0"], diff --git a/superset/db_engine_specs/firebolt.py b/superset/db_engine_specs/firebolt.py index 98d8d30c3093c..18f1a225e7d43 100644 --- a/superset/db_engine_specs/firebolt.py +++ b/superset/db_engine_specs/firebolt.py @@ -1,4 +1,9 @@ +from datetime import datetime +from typing import Optional + from superset.db_engine_specs.base import BaseEngineSpec +from superset.utils import core as utils + class FireboltEngineSpec(BaseEngineSpec): """Engine spec for Firebolt""" @@ -18,3 +23,18 @@ class FireboltEngineSpec(BaseEngineSpec): "P0.25Y": "date_trunc('quarter', CAST({col} AS TIMESTAMP))", "P1Y": "date_trunc('year', CAST({col} AS TIMESTAMP))", } + + @classmethod + def convert_dttm(cls, target_type: str, dttm: datetime) -> Optional[str]: + tt = target_type.upper() + if tt == utils.TemporalType.DATE: + return f"CAST('{dttm.date().isoformat()}' AS DATE)" + if tt == utils.TemporalType.DATETIME: + return f"""CAST('{dttm.isoformat(timespec="seconds")}' AS DATETIME)""" + if tt == utils.TemporalType.TIMESTAMP: + return f"""CAST('{dttm.isoformat(timespec="seconds")}' AS TIMESTAMP)""" + return None + + @classmethod + def epoch_to_dttm(cls) -> str: + return "from_unixtime({col})" diff --git a/tests/integration_tests/db_engine_specs/firebolt_tests.py b/tests/integration_tests/db_engine_specs/firebolt_tests.py new file mode 100644 index 0000000000000..df8017ed7c18a --- /dev/null +++ b/tests/integration_tests/db_engine_specs/firebolt_tests.py @@ -0,0 +1,23 @@ +from superset.db_engine_specs.firebolt import FireboltEngineSpec +from tests.integration_tests.db_engine_specs.base_tests import TestDbEngineSpec + + +class TestFireboltDbEngineSpec(TestDbEngineSpec): + def test_convert_dttm(self): + dttm = self.get_dttm() + test_cases = { + "DATE": "CAST('2019-01-02' AS DATE)", + "DATETIME": "CAST('2019-01-02T03:04:05' AS DATETIME)", + "TIMESTAMP": "CAST('2019-01-02T03:04:05' AS TIMESTAMP)", + "UNKNOWNTYPE": None, + } + + for target_type, expected in test_cases.items(): + actual = FireboltEngineSpec.convert_dttm(target_type, dttm) + self.assertEqual(actual, expected) + + def test_epoch_to_dttm(self): + assert ( + FireboltEngineSpec.epoch_to_dttm().format(col="timestamp_column") + == "from_unixtime(timestamp_column)" + ) From 33309e2eaf72c414f736a35d314bd902c7cce9cd Mon Sep 17 00:00:00 2001 From: Apurva Anand Date: Thu, 30 Sep 2021 14:33:09 +0530 Subject: [PATCH 11/14] Added license to files --- superset/db_engine_specs/firebolt.py | 16 ++++++++++++++++ .../db_engine_specs/firebolt_tests.py | 16 ++++++++++++++++ 2 files changed, 32 insertions(+) diff --git a/superset/db_engine_specs/firebolt.py b/superset/db_engine_specs/firebolt.py index 18f1a225e7d43..db9a8a8932afb 100644 --- a/superset/db_engine_specs/firebolt.py +++ b/superset/db_engine_specs/firebolt.py @@ -1,3 +1,19 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. from datetime import datetime from typing import Optional diff --git a/tests/integration_tests/db_engine_specs/firebolt_tests.py b/tests/integration_tests/db_engine_specs/firebolt_tests.py index df8017ed7c18a..793b32970bddb 100644 --- a/tests/integration_tests/db_engine_specs/firebolt_tests.py +++ b/tests/integration_tests/db_engine_specs/firebolt_tests.py @@ -1,3 +1,19 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. from superset.db_engine_specs.firebolt import FireboltEngineSpec from tests.integration_tests.db_engine_specs.base_tests import TestDbEngineSpec From 01a1374477ca1876e298c8e859b6e28ad65e2519 Mon Sep 17 00:00:00 2001 From: Apurva Anand Date: Thu, 30 Sep 2021 15:24:01 +0530 Subject: [PATCH 12/14] Added documentation for Firebolt-SQLAlchemy --- .../docs/Connecting to Databases/firebolt.mdx | 24 +++++++++++++++++++ .../docs/Connecting to Databases/index.mdx | 1 + 2 files changed, 25 insertions(+) create mode 100644 docs/src/pages/docs/Connecting to Databases/firebolt.mdx diff --git a/docs/src/pages/docs/Connecting to Databases/firebolt.mdx b/docs/src/pages/docs/Connecting to Databases/firebolt.mdx new file mode 100644 index 0000000000000..c1d11ff10e3be --- /dev/null +++ b/docs/src/pages/docs/Connecting to Databases/firebolt.mdx @@ -0,0 +1,24 @@ +--- +name: Firebolt +menu: Connecting to Databases +route: /docs/databases/firebolt +index: 31 +version: 1 +--- + +## Firebolt + +The recommended connector library for Firebolt is [firebolt-sqlalchemy](https://pypi.org/project/firebolt-sqlalchemy/). +Superset has been tested on `firebolt-sqlalchemy>=0.0.1`. + +The recommended connection string is: + +``` +firebolt://{username}:{password}@{host}/{database} +``` + +Here's a connection string example of Superset connecting to a Firebolt database: + +``` +firebolt://email@domain:password@host/sample_database +``` diff --git a/docs/src/pages/docs/Connecting to Databases/index.mdx b/docs/src/pages/docs/Connecting to Databases/index.mdx index 0704dc7956940..5d7ec9f38e237 100644 --- a/docs/src/pages/docs/Connecting to Databases/index.mdx +++ b/docs/src/pages/docs/Connecting to Databases/index.mdx @@ -42,6 +42,7 @@ A list of some of the recommended packages. |[Elasticsearch](/docs/databases/elasticsearch)|```pip install elasticsearch-dbapi```|```elasticsearch+http://{user}:{password}@{host}:9200/```| |[Exasol](/docs/databases/exasol)|```pip install sqlalchemy-exasol```|```exa+pyodbc://{username}:{password}@{hostname}:{port}/my_schema?CONNECTIONLCALL=en_US.UTF-8&driver=EXAODBC```| |[Google Sheets](/docs/databases/google-sheets)|```pip install shillelagh[gsheetsapi]```|```gsheets://```| +|[Firebolt](/docs/databases/firebolt)|```pip install firebolt-sqlalchemy```|```firebolt://{username}:{password}@{host}/{database}```| |[Hologres](/docs/databases/hologres)|```pip install psycopg2```|```postgresql+psycopg2://:@/```| |[IBM Db2](/docs/databases/ibm-db2)|```pip install ibm_db_sa```|```db2+ibm_db://```| |[IBM Netezza Performance Server](/docs/databases/netezza)|```pip install nzalchemy```|```netezza+nzpy://:@/```| From 7b5d61ffbb87c7da3ec394c0c9d2f2d7e4253231 Mon Sep 17 00:00:00 2001 From: Apurva Anand Date: Fri, 1 Oct 2021 15:51:49 +0530 Subject: [PATCH 13/14] Removed trailing whitespace --- superset/db_engine_specs/firebolt.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/superset/db_engine_specs/firebolt.py b/superset/db_engine_specs/firebolt.py index db9a8a8932afb..ea5091f69ffa0 100644 --- a/superset/db_engine_specs/firebolt.py +++ b/superset/db_engine_specs/firebolt.py @@ -27,7 +27,7 @@ class FireboltEngineSpec(BaseEngineSpec): engine = "firebolt" engine_name = "Firebolt" default_driver = "firebolt" - + _time_grain_expressions = { None: "{col}", "PT1S": "date_trunc('second', CAST({col} AS TIMESTAMP))", From 1dd5a704b5524b06e81bbdbe0cd9673887c721fe Mon Sep 17 00:00:00 2001 From: raghavsharma Date: Wed, 27 Oct 2021 15:08:32 +0530 Subject: [PATCH 14/14] Updated connection string for Firebolt --- docs/src/pages/docs/Connecting to Databases/firebolt.mdx | 8 ++++++-- docs/src/pages/docs/Connecting to Databases/index.mdx | 2 +- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/docs/src/pages/docs/Connecting to Databases/firebolt.mdx b/docs/src/pages/docs/Connecting to Databases/firebolt.mdx index c1d11ff10e3be..39ef6c8a233b3 100644 --- a/docs/src/pages/docs/Connecting to Databases/firebolt.mdx +++ b/docs/src/pages/docs/Connecting to Databases/firebolt.mdx @@ -14,11 +14,15 @@ Superset has been tested on `firebolt-sqlalchemy>=0.0.1`. The recommended connection string is: ``` -firebolt://{username}:{password}@{host}/{database} +firebolt://{username}:{password}@{database} +or +firebolt://{username}:{password}@{database}/{engine_name} ``` Here's a connection string example of Superset connecting to a Firebolt database: ``` -firebolt://email@domain:password@host/sample_database +firebolt://email@domain:password@sample_database +or +firebolt://email@domain:password@sample_database/sample_engine ``` diff --git a/docs/src/pages/docs/Connecting to Databases/index.mdx b/docs/src/pages/docs/Connecting to Databases/index.mdx index 5d7ec9f38e237..d2d2cd6a77eba 100644 --- a/docs/src/pages/docs/Connecting to Databases/index.mdx +++ b/docs/src/pages/docs/Connecting to Databases/index.mdx @@ -42,7 +42,7 @@ A list of some of the recommended packages. |[Elasticsearch](/docs/databases/elasticsearch)|```pip install elasticsearch-dbapi```|```elasticsearch+http://{user}:{password}@{host}:9200/```| |[Exasol](/docs/databases/exasol)|```pip install sqlalchemy-exasol```|```exa+pyodbc://{username}:{password}@{hostname}:{port}/my_schema?CONNECTIONLCALL=en_US.UTF-8&driver=EXAODBC```| |[Google Sheets](/docs/databases/google-sheets)|```pip install shillelagh[gsheetsapi]```|```gsheets://```| -|[Firebolt](/docs/databases/firebolt)|```pip install firebolt-sqlalchemy```|```firebolt://{username}:{password}@{host}/{database}```| +|[Firebolt](/docs/databases/firebolt)|```pip install firebolt-sqlalchemy```|```firebolt://{username}:{password}@{database} or firebolt://{username}:{password}@{database}/{engine_name}```| |[Hologres](/docs/databases/hologres)|```pip install psycopg2```|```postgresql+psycopg2://:@/```| |[IBM Db2](/docs/databases/ibm-db2)|```pip install ibm_db_sa```|```db2+ibm_db://```| |[IBM Netezza Performance Server](/docs/databases/netezza)|```pip install nzalchemy```|```netezza+nzpy://:@/```|