From 579c359174ab00140aa35bf01f9e9d80084d3a62 Mon Sep 17 00:00:00 2001 From: Antonio Rivero Date: Mon, 3 Jul 2023 12:02:22 -0400 Subject: [PATCH 1/4] Database Filtering: - Add new configuration so we can inject extra filters to our databases when running the DatabaseFilter in base_filters - Add tests for our new config and its usage --- superset/config.py | 15 ++++ superset/databases/filters.py | 12 ++- .../integration_tests/databases/api_tests.py | 74 ++++++++++++++++ tests/unit_tests/databases/api_test.py | 86 +++++++++++++++++++ 4 files changed, 186 insertions(+), 1 deletion(-) diff --git a/superset/config.py b/superset/config.py index 0f9fafb68e238..af2816fc06d40 100644 --- a/superset/config.py +++ b/superset/config.py @@ -1582,6 +1582,21 @@ class ExtraRelatedQueryFilters(TypedDict, total=False): EXTRA_RELATED_QUERY_FILTERS: ExtraRelatedQueryFilters = {} +# Extra dynamic database filter make it possible to limit which databases are shown +# in the UI before any other filtering is applied. Useful for example when +# considering to filter using Feature Flags along with regular role filters +# that get applied by default in our base_filters. +# For example, to only show a database starting with the letter "b" +# in the "Database Connections" list, you could add the following in your config: +# def initial_database_filter(query: Query, *args, *kwargs): +# from superset.models.core import Database +# +# filter = Database.database_name.startswith('b') +# return query.filter(filter) +# +# EXTRA_DYNAMIC_DATABASE_FILTER = initial_database_filter +EXTRA_DYNAMIC_DATABASE_FILTER: Callable[[Query], Query] | None = None + # ------------------------------------------------------------------- # * WARNING: STOP EDITING HERE * diff --git a/superset/databases/filters.py b/superset/databases/filters.py index 2ca77b77d1c40..d5ce6aa50b135 100644 --- a/superset/databases/filters.py +++ b/superset/databases/filters.py @@ -16,7 +16,7 @@ # under the License. from typing import Any -from flask import g +from flask import current_app, g from flask_babel import lazy_gettext as _ from sqlalchemy import or_ from sqlalchemy.orm import Query @@ -41,6 +41,16 @@ class DatabaseFilter(BaseFilter): # pylint: disable=too-few-public-methods # TODO(bogdan): consider caching. def apply(self, query: Query, value: Any) -> Query: + # Dynamic Filters need to be applied to the Query before we filter + # databases with anything else. This way you can show/hide databases using + # Feature Flags for example in conjuction with the regular role filtering. + # If not, if an user has access to all Databases it would skip this dynamic + # filtering. + + if dynamic_filter := current_app.config["EXTRA_DYNAMIC_DATABASE_FILTER"]: + query = dynamic_filter(query) + + # We can proceed with default filtering now if security_manager.can_access_all_databases(): return query database_perms = security_manager.user_view_menu_names("database_access") diff --git a/tests/integration_tests/databases/api_tests.py b/tests/integration_tests/databases/api_tests.py index 568ba0593443d..bc07ae13166bb 100644 --- a/tests/integration_tests/databases/api_tests.py +++ b/tests/integration_tests/databases/api_tests.py @@ -3632,3 +3632,77 @@ def test_validate_sql_endpoint_failure(self, get_validator_by_name): return self.assertEqual(rv.status_code, 422) self.assertIn("Kaboom!", response["errors"][0]["message"]) + + def test_get_databases_with_extra_filters(self): + """ + API: Test get database with extra query filter + """ + self.login(username="admin") + extra = { + "metadata_params": {}, + "engine_params": {}, + "metadata_cache_timeout": {}, + "schemas_allowed_for_file_upload": [], + } + example_db = get_example_database() + + if example_db.backend == "sqlite": + return + database_data = { + "sqlalchemy_uri": example_db.sqlalchemy_uri_decrypted, + "configuration_method": ConfigurationMethod.SQLALCHEMY_FORM, + "server_cert": None, + "extra": json.dumps(extra), + } + + uri = "api/v1/database/" + rv = self.client.post( + uri, json={**database_data, "database_name": "dyntest-create-database-1"} + ) + first_response = json.loads(rv.data.decode("utf-8")) + self.assertEqual(rv.status_code, 201) + + uri = "api/v1/database/" + rv = self.client.post( + uri, json={**database_data, "database_name": "create-database-2"} + ) + second_response = json.loads(rv.data.decode("utf-8")) + self.assertEqual(rv.status_code, 201) + dbs = db.session.query(Database).all() + expected_names = [db.database_name for db in dbs] + expected_names.sort() + + uri = f"api/v1/database/" + rv = self.client.get(uri) + data = json.loads(rv.data.decode("utf-8")) + self.assertEqual(data["count"], len(dbs)) + database_names = [item["database_name"] for item in data["result"]] + database_names.sort() + # All Databases because we are an admin + self.assertEqual(database_names, expected_names) + assert rv.status_code == 200 + + def _base_filter(query): + from superset.models.core import Database + + return query.filter(Database.database_name.startswith("dyntest")) + + with patch.dict( + "superset.views.filters.current_app.config", + {"EXTRA_DYNAMIC_DATABASE_FILTER": _base_filter}, + ): + uri = f"api/v1/database/" + rv = self.client.get(uri) + data = json.loads(rv.data.decode("utf-8")) + self.assertEqual(data["count"], 1) + database_names = [item["database_name"] for item in data["result"]] + # Only the database that starts with tests, even if we are an admin + self.assertEqual(database_names, ["dyntest-create-database-1"]) + assert rv.status_code == 200 + + # Cleanup + first_model = db.session.query(Database).get(first_response.get("id")) + second_model = db.session.query(Database).get(second_response.get("id")) + db.session.delete(first_model) + db.session.delete(second_model) + db.session.commit() diff --git a/tests/unit_tests/databases/api_test.py b/tests/unit_tests/databases/api_test.py index 24fde88369594..6f34a3ff9eadb 100644 --- a/tests/unit_tests/databases/api_test.py +++ b/tests/unit_tests/databases/api_test.py @@ -23,6 +23,7 @@ from uuid import UUID import pytest +from flask import current_app from pytest_mock import MockFixture from sqlalchemy.orm.session import Session @@ -495,3 +496,88 @@ def test_delete_ssh_tunnel_not_found( response_tunnel = DatabaseDAO.get_ssh_tunnel(2) assert response_tunnel is None + + +def test_apply_dynamic_database_filter( + mocker: MockFixture, + app: Any, + session: Session, + client: Any, + full_api_access: None, +) -> None: + """ + Test that we canfilter the list of databases + """ + with app.app_context(): + from superset.daos.database import DatabaseDAO + from superset.databases.api import DatabaseRestApi + from superset.databases.ssh_tunnel.models import SSHTunnel + from superset.models.core import Database + + DatabaseRestApi.datamodel.session = session + + # create table for databases + Database.metadata.create_all(session.get_bind()) # pylint: disable=no-member + + # Create our First Database + database = Database( + database_name="first-database", + sqlalchemy_uri="gsheets://", + encrypted_extra=json.dumps( + { + "metadata_params": {}, + "engine_params": {}, + "metadata_cache_timeout": {}, + "schemas_allowed_for_file_upload": [], + } + ), + ) + session.add(database) + session.commit() + + # Create our Second Database + database = Database( + database_name="second-database", + sqlalchemy_uri="gsheets://", + encrypted_extra=json.dumps( + { + "metadata_params": {}, + "engine_params": {}, + "metadata_cache_timeout": {}, + "schemas_allowed_for_file_upload": [], + } + ), + ) + session.add(database) + session.commit() + + # mock the lookup so that we don't need to include the driver + mocker.patch("sqlalchemy.engine.URL.get_driver_name", return_value="gsheets") + mocker.patch("superset.utils.log.DBEventLogger.log") + mocker.patch( + "superset.databases.ssh_tunnel.commands.delete.is_feature_enabled", + return_value=False, + ) + + # Get our recently created Databases + response_databases = DatabaseDAO.find_all() + assert response_databases + expected_db_names = ["first-database", "second-database"] + actual_db_names = [db.database_name for db in response_databases] + assert actual_db_names == expected_db_names + + def _base_filter(query): + from superset.models.core import Database + + return query.filter(Database.database_name.startswith("second")) + + original_config = current_app.config.copy() + original_config["EXTRA_DYNAMIC_DATABASE_FILTER"] = _base_filter + + mocker.patch("superset.views.filters.current_app.config", new=original_config) + # Get filtered list + response_databases = DatabaseDAO.find_all() + assert response_databases + expected_db_names = ["second-database"] + actual_db_names = [db.database_name for db in response_databases] + assert actual_db_names == expected_db_names From eaddeaea4fb486cee670bf1ed07cb07129fd4420 Mon Sep 17 00:00:00 2001 From: Antonio Rivero Date: Wed, 5 Jul 2023 13:38:51 -0400 Subject: [PATCH 2/4] Database filtering: - Add explicit assertions to validate the filter is not called when not defined - Use docstring comment --- superset/databases/filters.py | 12 +++++----- .../integration_tests/databases/api_tests.py | 19 +++++++++++----- tests/unit_tests/databases/api_test.py | 22 ++++++++++++++----- 3 files changed, 36 insertions(+), 17 deletions(-) diff --git a/superset/databases/filters.py b/superset/databases/filters.py index d5ce6aa50b135..3fc8a13a3804f 100644 --- a/superset/databases/filters.py +++ b/superset/databases/filters.py @@ -41,11 +41,13 @@ class DatabaseFilter(BaseFilter): # pylint: disable=too-few-public-methods # TODO(bogdan): consider caching. def apply(self, query: Query, value: Any) -> Query: - # Dynamic Filters need to be applied to the Query before we filter - # databases with anything else. This way you can show/hide databases using - # Feature Flags for example in conjuction with the regular role filtering. - # If not, if an user has access to all Databases it would skip this dynamic - # filtering. + """ + Dynamic Filters need to be applied to the Query before we filter + databases with anything else. This way you can show/hide databases using + Feature Flags for example in conjuction with the regular role filtering. + If not, if an user has access to all Databases it would skip this dynamic + filtering. + """ if dynamic_filter := current_app.config["EXTRA_DYNAMIC_DATABASE_FILTER"]: query = dynamic_filter(query) diff --git a/tests/integration_tests/databases/api_tests.py b/tests/integration_tests/databases/api_tests.py index bc07ae13166bb..ebdfb71b3fd18 100644 --- a/tests/integration_tests/databases/api_tests.py +++ b/tests/integration_tests/databases/api_tests.py @@ -28,6 +28,8 @@ import pytest import yaml +from unittest.mock import Mock + from sqlalchemy.engine.url import make_url from sqlalchemy.exc import DBAPIError from sqlalchemy.sql import func @@ -3668,6 +3670,14 @@ def test_get_databases_with_extra_filters(self): ) second_response = json.loads(rv.data.decode("utf-8")) self.assertEqual(rv.status_code, 201) + + def _base_filter(query): + from superset.models.core import Database + + return query.filter(Database.database_name.startswith("dyntest")) + + # Create the Mock + base_filter_mock = Mock(side_effect=_base_filter) dbs = db.session.query(Database).all() expected_names = [db.database_name for db in dbs] expected_names.sort() @@ -3681,15 +3691,11 @@ def test_get_databases_with_extra_filters(self): # All Databases because we are an admin self.assertEqual(database_names, expected_names) assert rv.status_code == 200 - - def _base_filter(query): - from superset.models.core import Database - - return query.filter(Database.database_name.startswith("dyntest")) + base_filter_mock.assert_not_called() with patch.dict( "superset.views.filters.current_app.config", - {"EXTRA_DYNAMIC_DATABASE_FILTER": _base_filter}, + {"EXTRA_DYNAMIC_DATABASE_FILTER": base_filter_mock}, ): uri = f"api/v1/database/" rv = self.client.get(uri) @@ -3699,6 +3705,7 @@ def _base_filter(query): # Only the database that starts with tests, even if we are an admin self.assertEqual(database_names, ["dyntest-create-database-1"]) assert rv.status_code == 200 + base_filter_mock.assert_called() # Cleanup first_model = db.session.query(Database).get(first_response.get("id")) diff --git a/tests/unit_tests/databases/api_test.py b/tests/unit_tests/databases/api_test.py index 6f34a3ff9eadb..92eb0708f2c50 100644 --- a/tests/unit_tests/databases/api_test.py +++ b/tests/unit_tests/databases/api_test.py @@ -20,6 +20,7 @@ import json from io import BytesIO from typing import Any +from unittest.mock import Mock from uuid import UUID import pytest @@ -506,7 +507,7 @@ def test_apply_dynamic_database_filter( full_api_access: None, ) -> None: """ - Test that we canfilter the list of databases + Test that we can filter the list of databases """ with app.app_context(): from superset.daos.database import DatabaseDAO @@ -559,6 +560,14 @@ def test_apply_dynamic_database_filter( return_value=False, ) + def _base_filter(query): + from superset.models.core import Database + + return query.filter(Database.database_name.startswith("second")) + + # Create a mock object + base_filter_mock = Mock(side_effect=_base_filter) + # Get our recently created Databases response_databases = DatabaseDAO.find_all() assert response_databases @@ -566,13 +575,11 @@ def test_apply_dynamic_database_filter( actual_db_names = [db.database_name for db in response_databases] assert actual_db_names == expected_db_names - def _base_filter(query): - from superset.models.core import Database - - return query.filter(Database.database_name.startswith("second")) + # Ensure that the filter has not been called because it's not in our config + assert base_filter_mock.call_count == 0 original_config = current_app.config.copy() - original_config["EXTRA_DYNAMIC_DATABASE_FILTER"] = _base_filter + original_config["EXTRA_DYNAMIC_DATABASE_FILTER"] = base_filter_mock mocker.patch("superset.views.filters.current_app.config", new=original_config) # Get filtered list @@ -581,3 +588,6 @@ def _base_filter(query): expected_db_names = ["second-database"] actual_db_names = [db.database_name for db in response_databases] assert actual_db_names == expected_db_names + + # Ensure that the filter has been called once + assert base_filter_mock.call_count == 1 From a8995d850826b57fca1ce9e2f091f7e2bab45f6c Mon Sep 17 00:00:00 2001 From: Antonio Rivero Date: Wed, 5 Jul 2023 14:51:22 -0400 Subject: [PATCH 3/4] Database filtering: - Generalize the new config so we can use it for other entities down the road. - Pull the new database key from the config so we apply any given filter method in the databases API - Adjust our tests with the new config name and structure --- superset/config.py | 11 ++++++++--- superset/databases/filters.py | 5 +++-- tests/integration_tests/databases/api_tests.py | 2 +- tests/unit_tests/databases/api_test.py | 2 +- 4 files changed, 13 insertions(+), 7 deletions(-) diff --git a/superset/config.py b/superset/config.py index af2816fc06d40..85ba10167f793 100644 --- a/superset/config.py +++ b/superset/config.py @@ -1582,7 +1582,8 @@ class ExtraRelatedQueryFilters(TypedDict, total=False): EXTRA_RELATED_QUERY_FILTERS: ExtraRelatedQueryFilters = {} -# Extra dynamic database filter make it possible to limit which databases are shown + +# Extra dynamic query filters make it possible to limit which objects are shown # in the UI before any other filtering is applied. Useful for example when # considering to filter using Feature Flags along with regular role filters # that get applied by default in our base_filters. @@ -1594,8 +1595,12 @@ class ExtraRelatedQueryFilters(TypedDict, total=False): # filter = Database.database_name.startswith('b') # return query.filter(filter) # -# EXTRA_DYNAMIC_DATABASE_FILTER = initial_database_filter -EXTRA_DYNAMIC_DATABASE_FILTER: Callable[[Query], Query] | None = None +# EXTRA_DYNAMIC_QUERY_FILTERS = {"database": initial_database_filter} +class ExtraDynamicQueryFilters(TypedDict, total=False): + databases: Callable[[Query], Query] + + +EXTRA_DYNAMIC_QUERY_FILTERS: ExtraDynamicQueryFilters = {} # ------------------------------------------------------------------- diff --git a/superset/databases/filters.py b/superset/databases/filters.py index 3fc8a13a3804f..384a62c9d3b6f 100644 --- a/superset/databases/filters.py +++ b/superset/databases/filters.py @@ -49,8 +49,9 @@ def apply(self, query: Query, value: Any) -> Query: filtering. """ - if dynamic_filter := current_app.config["EXTRA_DYNAMIC_DATABASE_FILTER"]: - query = dynamic_filter(query) + if dynamic_filters := current_app.config["EXTRA_DYNAMIC_QUERY_FILTERS"]: + if dynamic_databases_filter := dynamic_filters.get("databases"): + query = dynamic_databases_filter(query) # We can proceed with default filtering now if security_manager.can_access_all_databases(): diff --git a/tests/integration_tests/databases/api_tests.py b/tests/integration_tests/databases/api_tests.py index ebdfb71b3fd18..6d6c832abcbc7 100644 --- a/tests/integration_tests/databases/api_tests.py +++ b/tests/integration_tests/databases/api_tests.py @@ -3695,7 +3695,7 @@ def _base_filter(query): with patch.dict( "superset.views.filters.current_app.config", - {"EXTRA_DYNAMIC_DATABASE_FILTER": base_filter_mock}, + {"EXTRA_DYNAMIC_QUERY_FILTERS": {"databases": base_filter_mock}}, ): uri = f"api/v1/database/" rv = self.client.get(uri) diff --git a/tests/unit_tests/databases/api_test.py b/tests/unit_tests/databases/api_test.py index 92eb0708f2c50..71c078f10c414 100644 --- a/tests/unit_tests/databases/api_test.py +++ b/tests/unit_tests/databases/api_test.py @@ -579,7 +579,7 @@ def _base_filter(query): assert base_filter_mock.call_count == 0 original_config = current_app.config.copy() - original_config["EXTRA_DYNAMIC_DATABASE_FILTER"] = base_filter_mock + original_config["EXTRA_DYNAMIC_QUERY_FILTERS"] = {"databases": base_filter_mock} mocker.patch("superset.views.filters.current_app.config", new=original_config) # Get filtered list From dac34ff1779352e83a8150d272870191c36f287d Mon Sep 17 00:00:00 2001 From: Antonio Rivero Date: Thu, 6 Jul 2023 16:18:05 -0400 Subject: [PATCH 4/4] Database Filtering: - Add more info to our comments in our tests --- tests/integration_tests/databases/api_tests.py | 14 +++++++++++++- tests/unit_tests/databases/api_test.py | 5 ++++- 2 files changed, 17 insertions(+), 2 deletions(-) diff --git a/tests/integration_tests/databases/api_tests.py b/tests/integration_tests/databases/api_tests.py index 6d6c832abcbc7..ebf94219c3b0a 100644 --- a/tests/integration_tests/databases/api_tests.py +++ b/tests/integration_tests/databases/api_tests.py @@ -3637,7 +3637,11 @@ def test_validate_sql_endpoint_failure(self, get_validator_by_name): def test_get_databases_with_extra_filters(self): """ - API: Test get database with extra query filter + API: Test get database with extra query filter. + Here we are testing our default where all databases + must be returned if nothing is being set in the config. + Then, we're adding the patch for the config to add the filter function + and testing it's being applied. """ self.login(username="admin") extra = { @@ -3650,6 +3654,7 @@ def test_get_databases_with_extra_filters(self): if example_db.backend == "sqlite": return + # Create our two databases database_data = { "sqlalchemy_uri": example_db.sqlalchemy_uri_decrypted, "configuration_method": ConfigurationMethod.SQLALCHEMY_FORM, @@ -3671,6 +3676,7 @@ def test_get_databases_with_extra_filters(self): second_response = json.loads(rv.data.decode("utf-8")) self.assertEqual(rv.status_code, 201) + # The filter function def _base_filter(query): from superset.models.core import Database @@ -3683,16 +3689,20 @@ def _base_filter(query): expected_names.sort() uri = f"api/v1/database/" + # Get the list of databases without filter in the config rv = self.client.get(uri) data = json.loads(rv.data.decode("utf-8")) + # All databases must be returned if no filter is present self.assertEqual(data["count"], len(dbs)) database_names = [item["database_name"] for item in data["result"]] database_names.sort() # All Databases because we are an admin self.assertEqual(database_names, expected_names) assert rv.status_code == 200 + # Our filter function wasn't get called base_filter_mock.assert_not_called() + # Now we patch the config to include our filter function with patch.dict( "superset.views.filters.current_app.config", {"EXTRA_DYNAMIC_QUERY_FILTERS": {"databases": base_filter_mock}}, @@ -3700,11 +3710,13 @@ def _base_filter(query): uri = f"api/v1/database/" rv = self.client.get(uri) data = json.loads(rv.data.decode("utf-8")) + # Only one database start with dyntest self.assertEqual(data["count"], 1) database_names = [item["database_name"] for item in data["result"]] # Only the database that starts with tests, even if we are an admin self.assertEqual(database_names, ["dyntest-create-database-1"]) assert rv.status_code == 200 + # The filter function is called now that it's defined in our config base_filter_mock.assert_called() # Cleanup diff --git a/tests/unit_tests/databases/api_test.py b/tests/unit_tests/databases/api_test.py index 71c078f10c414..899e2b0234571 100644 --- a/tests/unit_tests/databases/api_test.py +++ b/tests/unit_tests/databases/api_test.py @@ -507,7 +507,10 @@ def test_apply_dynamic_database_filter( full_api_access: None, ) -> None: """ - Test that we can filter the list of databases + Test that we can filter the list of databases. + First test the default behavior without a filter and then + defining a filter function and patching the config to get + the filtered results. """ with app.app_context(): from superset.daos.database import DatabaseDAO