diff --git a/UPDATING.md b/UPDATING.md index a422e425773fb..c66bd04f43140 100644 --- a/UPDATING.md +++ b/UPDATING.md @@ -28,7 +28,7 @@ assists people when migrating to a new version. - [20799](https://github.com/apache/superset/pull/20799): Presto and Trino engine will now display tracking URL for running queries in SQL Lab. If for some reason you don't want to show the tracking URL (for example, when your data warehouse hasn't enable access for to Presto or Trino UI), update `TRACKING_URL_TRANSFORMER` in `config.py` to return `None`. - [21002](https://github.com/apache/superset/pull/21002): Support Python 3.10 and bump pandas 1.4 and pyarrow 6. - [21163](https://github.com/apache/superset/pull/21163): When `GENERIC_CHART_AXES` feature flags set to `True`, the Time Grain control will move below the X-Axis control. -- [21284](https://github.com/apache/superset/pull/21284): The non-functional `MAX_TABLE_NAMES` config key has been removed. + ### Breaking Changes diff --git a/docs/static/resources/openapi.json b/docs/static/resources/openapi.json index ce84f368c8fc4..9020e4ba7d289 100644 --- a/docs/static/resources/openapi.json +++ b/docs/static/resources/openapi.json @@ -3478,6 +3478,9 @@ }, "Database": { "properties": { + "allow_multi_schema_metadata_fetch": { + "type": "boolean" + }, "allows_cost_estimate": { "type": "boolean" }, @@ -3614,6 +3617,10 @@ "nullable": true, "type": "boolean" }, + "allow_multi_schema_metadata_fetch": { + "nullable": true, + "type": "boolean" + }, "allow_run_async": { "nullable": true, "type": "boolean" @@ -3699,6 +3706,10 @@ "nullable": true, "type": "boolean" }, + "allow_multi_schema_metadata_fetch": { + "nullable": true, + "type": "boolean" + }, "allow_run_async": { "nullable": true, "type": "boolean" @@ -3794,6 +3805,10 @@ "description": "Allow to upload CSV file data into this databaseIf selected, please set the schemas allowed for csv upload in Extra.", "type": "boolean" }, + "allow_multi_schema_metadata_fetch": { + "description": "Allow SQL Lab to fetch a list of all tables and all views across all database schemas. For large data warehouse with thousands of tables, this can be expensive and put strain on the system.", + "type": "boolean" + }, "allow_run_async": { "description": "Operate the database in asynchronous mode, meaning that the queries are executed on remote workers as opposed to on the web server itself. This assumes that you have a Celery worker setup as well as a results backend. Refer to the installation docs for more information.", "type": "boolean" @@ -3891,6 +3906,10 @@ "description": "Allow to upload CSV file data into this databaseIf selected, please set the schemas allowed for csv upload in Extra.", "type": "boolean" }, + "allow_multi_schema_metadata_fetch": { + "description": "Allow SQL Lab to fetch a list of all tables and all views across all database schemas. For large data warehouse with thousands of tables, this can be expensive and put strain on the system.", + "type": "boolean" + }, "allow_run_async": { "description": "Operate the database in asynchronous mode, meaning that the queries are executed on remote workers as opposed to on the web server itself. This assumes that you have a Celery worker setup as well as a results backend. Refer to the installation docs for more information.", "type": "boolean" diff --git a/superset-frontend/packages/superset-ui-demo/storybook/stories/plugins/plugin-chart-table/birthNames.json b/superset-frontend/packages/superset-ui-demo/storybook/stories/plugins/plugin-chart-table/birthNames.json index c35d3a80665fc..cecb37e02627e 100644 --- a/superset-frontend/packages/superset-ui-demo/storybook/stories/plugins/plugin-chart-table/birthNames.json +++ b/superset-frontend/packages/superset-ui-demo/storybook/stories/plugins/plugin-chart-table/birthNames.json @@ -13,6 +13,7 @@ "id": 1, "name": "examples", "backend": "postgresql", + "allow_multi_schema_metadata_fetch": false, "allows_subquery": true, "allows_cost_estimate": null, "allows_virtual_table_explore": true, diff --git a/superset-frontend/spec/fixtures/mockDatasource.js b/superset-frontend/spec/fixtures/mockDatasource.js index 21a5805519b67..30513fc126748 100644 --- a/superset-frontend/spec/fixtures/mockDatasource.js +++ b/superset-frontend/spec/fixtures/mockDatasource.js @@ -171,6 +171,7 @@ export default { name: 'birth_names', owners: [{ first_name: 'joe', last_name: 'man', id: 1 }], database: { + allow_multi_schema_metadata_fetch: null, name: 'main', backend: 'sqlite', }, diff --git a/superset-frontend/src/SqlLab/components/SqlEditor/SqlEditor.test.jsx b/superset-frontend/src/SqlLab/components/SqlEditor/SqlEditor.test.jsx index 163c6408ad637..f1b959c0fe005 100644 --- a/superset-frontend/src/SqlLab/components/SqlEditor/SqlEditor.test.jsx +++ b/superset-frontend/src/SqlLab/components/SqlEditor/SqlEditor.test.jsx @@ -64,6 +64,7 @@ const store = mockStore({ allow_cvas: false, allow_dml: false, allow_file_upload: false, + allow_multi_schema_metadata_fetch: false, allow_run_async: false, backend: 'postgresql', database_name: 'examples', diff --git a/superset-frontend/src/components/DatabaseSelector/DatabaseSelector.test.tsx b/superset-frontend/src/components/DatabaseSelector/DatabaseSelector.test.tsx index 0b2a7b521f2a3..272249b549600 100644 --- a/superset-frontend/src/components/DatabaseSelector/DatabaseSelector.test.tsx +++ b/superset-frontend/src/components/DatabaseSelector/DatabaseSelector.test.tsx @@ -31,6 +31,7 @@ const createProps = (): DatabaseSelectorProps => ({ id: 1, database_name: 'test', backend: 'test-postgresql', + allow_multi_schema_metadata_fetch: false, }, formMode: false, isDatabaseSelectEnabled: true, @@ -68,6 +69,8 @@ beforeEach(() => { allow_ctas: 'Allow Ctas', allow_cvas: 'Allow Cvas', allow_dml: 'Allow Dml', + allow_multi_schema_metadata_fetch: + 'Allow Multi Schema Metadata Fetch', allow_run_async: 'Allow Run Async', allows_cost_estimate: 'Allows Cost Estimate', allows_subquery: 'Allows Subquery', @@ -89,6 +92,7 @@ beforeEach(() => { 'allow_ctas', 'allow_cvas', 'allow_dml', + 'allow_multi_schema_metadata_fetch', 'allow_run_async', 'allows_cost_estimate', 'allows_subquery', @@ -122,6 +126,7 @@ beforeEach(() => { allow_ctas: false, allow_cvas: false, allow_dml: false, + allow_multi_schema_metadata_fetch: false, allow_run_async: false, allows_cost_estimate: null, allows_subquery: true, @@ -142,6 +147,7 @@ beforeEach(() => { allow_ctas: false, allow_cvas: false, allow_dml: false, + allow_multi_schema_metadata_fetch: false, allow_run_async: false, allows_cost_estimate: null, allows_subquery: true, @@ -266,6 +272,7 @@ test('Sends the correct db when changing the database', async () => { id: 2, database_name: 'test-mysql', backend: 'mysql', + allow_multi_schema_metadata_fetch: false, }), ), ); diff --git a/superset-frontend/src/components/DatabaseSelector/index.tsx b/superset-frontend/src/components/DatabaseSelector/index.tsx index 59109d3b5c0ce..1df7f78a3bea9 100644 --- a/superset-frontend/src/components/DatabaseSelector/index.tsx +++ b/superset-frontend/src/components/DatabaseSelector/index.tsx @@ -74,12 +74,14 @@ type DatabaseValue = { id: number; database_name: string; backend: string; + allow_multi_schema_metadata_fetch: boolean; }; export type DatabaseObject = { id: number; database_name: string; backend: string; + allow_multi_schema_metadata_fetch: boolean; }; type SchemaValue = { label: string; value: string }; @@ -197,6 +199,8 @@ export default function DatabaseSelector({ id: row.id, database_name: row.database_name, backend: row.backend, + allow_multi_schema_metadata_fetch: + row.allow_multi_schema_metadata_fetch, })); return { diff --git a/superset-frontend/src/components/TableSelector/TableSelector.test.tsx b/superset-frontend/src/components/TableSelector/TableSelector.test.tsx index 3f74e7781ed17..32d84c008605c 100644 --- a/superset-frontend/src/components/TableSelector/TableSelector.test.tsx +++ b/superset-frontend/src/components/TableSelector/TableSelector.test.tsx @@ -31,6 +31,7 @@ const createProps = (props = {}) => ({ id: 1, database_name: 'main', backend: 'sqlite', + allow_multi_schema_metadata_fetch: false, }, schema: 'test_schema', handleError: jest.fn(), diff --git a/superset-frontend/src/components/TableSelector/index.tsx b/superset-frontend/src/components/TableSelector/index.tsx index acf9f67f7cdb2..ecf657e52e7a5 100644 --- a/superset-frontend/src/components/TableSelector/index.tsx +++ b/superset-frontend/src/components/TableSelector/index.tsx @@ -112,9 +112,9 @@ export interface TableOption { } export const TableOption = ({ table }: { table: Table }) => { - const { value, type, extra } = table; + const { label, type, extra } = table; return ( - + {type === 'view' ? ( ) : ( @@ -133,7 +133,7 @@ export const TableOption = ({ table }: { table: Table }) => { size="l" /> )} - {value} + {label} ); }; @@ -286,7 +286,9 @@ const TableSelector: FunctionComponent = ({ ); function renderTableSelect() { - const disabled = (currentSchema && !formMode && readOnly) || !currentSchema; + const disabled = + (currentSchema && !formMode && readOnly) || + (!currentSchema && !database?.allow_multi_schema_metadata_fetch); const header = sqlLabMode ? ( {t('See table schema')} diff --git a/superset-frontend/src/views/CRUD/data/database/DatabaseModal/ExtraOptions.tsx b/superset-frontend/src/views/CRUD/data/database/DatabaseModal/ExtraOptions.tsx index 45504b3d5ee71..8283ff2509b81 100644 --- a/superset-frontend/src/views/CRUD/data/database/DatabaseModal/ExtraOptions.tsx +++ b/superset-frontend/src/views/CRUD/data/database/DatabaseModal/ExtraOptions.tsx @@ -148,6 +148,24 @@ const ExtraOptions = ({ /> + +
+ + +
+
{ name: /allow dml/i, }); const allowDMLText = screen.getByText(/allow dml/i); + const allowMultiSchemaMDFetchCheckbox = screen.getByRole('checkbox', { + name: /allow multi schema metadata fetch/i, + }); + const allowMultiSchemaMDFetchText = screen.getByText( + /allow multi schema metadata fetch/i, + ); const enableQueryCostEstimationCheckbox = screen.getByRole('checkbox', { name: /enable query cost estimation/i, }); @@ -613,6 +619,7 @@ describe('DatabaseModal', () => { checkboxOffSVGs[4], checkboxOffSVGs[5], checkboxOffSVGs[6], + checkboxOffSVGs[7], tooltipIcons[0], tooltipIcons[1], tooltipIcons[2], @@ -620,6 +627,7 @@ describe('DatabaseModal', () => { tooltipIcons[4], tooltipIcons[5], tooltipIcons[6], + tooltipIcons[7], exposeInSQLLabText, allowCTASText, allowCVASText, @@ -627,6 +635,7 @@ describe('DatabaseModal', () => { CTASCVASInput, CTASCVASHelperText, allowDMLText, + allowMultiSchemaMDFetchText, enableQueryCostEstimationText, allowDbExplorationText, disableSQLLabDataPreviewQueriesText, @@ -637,6 +646,7 @@ describe('DatabaseModal', () => { allowCTASCheckbox, allowCVASCheckbox, allowDMLCheckbox, + allowMultiSchemaMDFetchCheckbox, enableQueryCostEstimationCheckbox, allowDbExplorationCheckbox, disableSQLLabDataPreviewQueriesCheckbox, @@ -648,8 +658,8 @@ describe('DatabaseModal', () => { invisibleComponents.forEach(component => { expect(component).not.toBeVisible(); }); - expect(checkboxOffSVGs).toHaveLength(7); - expect(tooltipIcons).toHaveLength(7); + expect(checkboxOffSVGs).toHaveLength(8); + expect(tooltipIcons).toHaveLength(8); }); test('renders the "Advanced" - PERFORMANCE tab correctly', async () => { diff --git a/superset-frontend/src/views/CRUD/data/database/types.ts b/superset-frontend/src/views/CRUD/data/database/types.ts index 92dd8e187851b..24ae99cad11b1 100644 --- a/superset-frontend/src/views/CRUD/data/database/types.ts +++ b/superset-frontend/src/views/CRUD/data/database/types.ts @@ -66,6 +66,7 @@ export type DatabaseObject = { allow_ctas?: boolean; allow_cvas?: boolean; allow_dml?: boolean; + allow_multi_schema_metadata_fetch?: boolean; force_ctas_schema?: string; // Security diff --git a/superset/cli/update.py b/superset/cli/update.py index e2054485c6367..ae4ad644c9a8c 100755 --- a/superset/cli/update.py +++ b/superset/cli/update.py @@ -31,6 +31,7 @@ import superset.utils.database as database_utils from superset.extensions import db +from superset.utils.core import override_user from superset.utils.encrypt import SecretsMigrator logger = logging.getLogger(__name__) @@ -52,6 +53,38 @@ def set_database_uri(database_name: str, uri: str, skip_create: bool) -> None: database_utils.get_or_create_db(database_name, uri, not skip_create) +@click.command() +@with_appcontext +@click.option( + "--username", + "-u", + default=None, + help=( + "Specify which user should execute the underlying SQL queries. If undefined " + "defaults to the user registered with the database connection." + ), +) +def update_datasources_cache(username: Optional[str]) -> None: + """Refresh sqllab datasources cache""" + # pylint: disable=import-outside-toplevel + from superset import security_manager + from superset.models.core import Database + + with override_user(security_manager.find_user(username)): + for database in db.session.query(Database).all(): + if database.allow_multi_schema_metadata_fetch: + print("Fetching {} datasources ...".format(database.name)) + try: + database.get_all_table_names_in_database( + force=True, cache=True, cache_timeout=24 * 60 * 60 + ) + database.get_all_view_names_in_database( + force=True, cache=True, cache_timeout=24 * 60 * 60 + ) + except Exception as ex: # pylint: disable=broad-except + print("{}".format(str(ex))) + + @click.command() @with_appcontext def sync_tags() -> None: diff --git a/superset/config.py b/superset/config.py index e659d7ba83023..51458d9501e5b 100644 --- a/superset/config.py +++ b/superset/config.py @@ -751,6 +751,9 @@ def _try_json_readsha(filepath: str, length: int) -> Optional[str]: # the SQL Lab UI DEFAULT_SQLLAB_LIMIT = 1000 +# Maximum number of tables/views displayed in the dropdown window in SQL Lab. +MAX_TABLE_NAMES = 3000 + # Adds a warning message on sqllab save query and schedule query modals. SQLLAB_SAVE_WARNING_MESSAGE = None SQLLAB_SCHEDULE_WARNING_MESSAGE = None diff --git a/superset/dashboards/schemas.py b/superset/dashboards/schemas.py index c8912d14331d6..1375f98dd71af 100644 --- a/superset/dashboards/schemas.py +++ b/superset/dashboards/schemas.py @@ -174,6 +174,7 @@ class DatabaseSchema(Schema): id = fields.Int() name = fields.String() backend = fields.String() + allow_multi_schema_metadata_fetch = fields.Bool() # pylint: disable=invalid-name allows_subquery = fields.Bool() allows_cost_estimate = fields.Bool() allows_virtual_table_explore = fields.Bool() diff --git a/superset/databases/api.py b/superset/databases/api.py index e61bce68db481..d9d0c739be3cf 100644 --- a/superset/databases/api.py +++ b/superset/databases/api.py @@ -121,6 +121,7 @@ class DatabaseRestApi(BaseSupersetModelRestApi): "allow_dml", "backend", "force_ctas_schema", + "allow_multi_schema_metadata_fetch", "impersonate_user", "masked_encrypted_extra", "extra", @@ -135,6 +136,7 @@ class DatabaseRestApi(BaseSupersetModelRestApi): "allow_ctas", "allow_cvas", "allow_dml", + "allow_multi_schema_metadata_fetch", "allow_run_async", "allows_cost_estimate", "allows_subquery", @@ -165,6 +167,7 @@ class DatabaseRestApi(BaseSupersetModelRestApi): "configuration_method", "force_ctas_schema", "impersonate_user", + "allow_multi_schema_metadata_fetch", "extra", "encrypted_extra", "server_cert", diff --git a/superset/databases/schemas.py b/superset/databases/schemas.py index 201e35cbfc881..61fa90ef923ea 100644 --- a/superset/databases/schemas.py +++ b/superset/databases/schemas.py @@ -67,6 +67,11 @@ "(UPDATE, DELETE, CREATE, ...) " "in SQL Lab" ) +allow_multi_schema_metadata_fetch_description = ( + "Allow SQL Lab to fetch a list of all tables and all views across " + "all database schemas. For large data warehouse with thousands of " + "tables, this can be expensive and put strain on the system." +) # pylint: disable=invalid-name configuration_method_description = ( "Configuration_method is used on the frontend to " "inform the backend whether to explode parameters " @@ -363,6 +368,9 @@ class Meta: # pylint: disable=too-few-public-methods allow_none=True, validate=Length(0, 250), ) + allow_multi_schema_metadata_fetch = fields.Boolean( + description=allow_multi_schema_metadata_fetch_description, + ) impersonate_user = fields.Boolean(description=impersonate_user_description) masked_encrypted_extra = fields.String( description=encrypted_extra_description, @@ -407,6 +415,9 @@ class Meta: # pylint: disable=too-few-public-methods allow_none=True, validate=Length(0, 250), ) + allow_multi_schema_metadata_fetch = fields.Boolean( + description=allow_multi_schema_metadata_fetch_description + ) impersonate_user = fields.Boolean(description=impersonate_user_description) masked_encrypted_extra = fields.String( description=encrypted_extra_description, @@ -575,7 +586,7 @@ class DatabaseFunctionNamesResponse(Schema): class ImportV1DatabaseExtraSchema(Schema): # pylint: disable=no-self-use, unused-argument @pre_load - def fix_schemas_allowed_for_csv_upload( # pylint: disable=invalid-name + def fix_schemas_allowed_for_csv_upload( self, data: Dict[str, Any], **kwargs: Any ) -> Dict[str, Any]: """ diff --git a/superset/db_engine_specs/base.py b/superset/db_engine_specs/base.py index 13b766c789e6e..52f79be82226b 100644 --- a/superset/db_engine_specs/base.py +++ b/superset/db_engine_specs/base.py @@ -917,6 +917,48 @@ def convert_dttm( # pylint: disable=unused-argument """ return None + @classmethod + def get_all_datasource_names( + cls, database: "Database", datasource_type: str + ) -> List[utils.DatasourceName]: + """Returns a list of all tables or views in database. + + :param database: Database instance + :param datasource_type: Datasource_type can be 'table' or 'view' + :return: List of all datasources in database or schema + """ + # TODO: Fix circular import caused by importing Database + schemas = database.get_all_schema_names( + cache=database.schema_cache_enabled, + cache_timeout=database.schema_cache_timeout, + force=True, + ) + all_datasources: List[utils.DatasourceName] = [] + for schema in schemas: + if datasource_type == "table": + all_datasources.extend( + utils.DatasourceName(*datasource_name) + for datasource_name in database.get_all_table_names_in_schema( + schema=schema, + force=True, + cache=database.table_cache_enabled, + cache_timeout=database.table_cache_timeout, + ) + ) + elif datasource_type == "view": + all_datasources.extend( + utils.DatasourceName(*datasource_name) + for datasource_name in database.get_all_view_names_in_schema( + schema=schema, + force=True, + cache=database.table_cache_enabled, + cache_timeout=database.table_cache_timeout, + ) + ) + else: + raise Exception(f"Unsupported datasource_type: {datasource_type}") + return all_datasources + @classmethod def handle_cursor(cls, cursor: Any, query: "Query", session: Session) -> None: """Handle a live cursor between the execute and fetchall calls diff --git a/superset/db_engine_specs/hive.py b/superset/db_engine_specs/hive.py index b37348e911ece..8ea1bfddae686 100644 --- a/superset/db_engine_specs/hive.py +++ b/superset/db_engine_specs/hive.py @@ -145,6 +145,12 @@ def patch(cls) -> None: hive.ttypes = patched_ttypes hive.Cursor.fetch_logs = patched_hive.fetch_logs + @classmethod + def get_all_datasource_names( + cls, database: "Database", datasource_type: str + ) -> List[utils.DatasourceName]: + return BaseEngineSpec.get_all_datasource_names(database, datasource_type) + @classmethod def fetch_data( cls, cursor: Any, limit: Optional[int] = None diff --git a/superset/db_engine_specs/presto.py b/superset/db_engine_specs/presto.py index 22e4f7594ccf6..ab1854c4233e6 100644 --- a/superset/db_engine_specs/presto.py +++ b/superset/db_engine_specs/presto.py @@ -796,6 +796,26 @@ def select_star( # pylint: disable=too-many-arguments presto_cols, ) + @classmethod + def get_all_datasource_names( + cls, database: Database, datasource_type: str + ) -> List[utils.DatasourceName]: + datasource_df = database.get_df( + "SELECT table_schema, table_name FROM INFORMATION_SCHEMA.{}S " + "ORDER BY concat(table_schema, '.', table_name)".format( + datasource_type.upper() + ), + None, + ) + datasource_names: List[utils.DatasourceName] = [] + for _unused, row in datasource_df.iterrows(): + datasource_names.append( + utils.DatasourceName( + schema=row["table_schema"], table=row["table_name"] + ) + ) + return datasource_names + @classmethod def expand_data( # pylint: disable=too-many-locals cls, columns: List[ResultSetColumnType], data: List[Dict[Any, Any]] diff --git a/superset/db_engine_specs/sqlite.py b/superset/db_engine_specs/sqlite.py index 8c583060d293b..3dfe647e37185 100644 --- a/superset/db_engine_specs/sqlite.py +++ b/superset/db_engine_specs/sqlite.py @@ -72,6 +72,38 @@ class SqliteEngineSpec(BaseEngineSpec): def epoch_to_dttm(cls) -> str: return "datetime({col}, 'unixepoch')" + @classmethod + def get_all_datasource_names( + cls, database: "Database", datasource_type: str + ) -> List[utils.DatasourceName]: + schemas = database.get_all_schema_names( + cache=database.schema_cache_enabled, + cache_timeout=database.schema_cache_timeout, + force=True, + ) + schema = schemas[0] + if datasource_type == "table": + return [ + utils.DatasourceName(*datasource_name) + for datasource_name in database.get_all_table_names_in_schema( + schema=schema, + force=True, + cache=database.table_cache_enabled, + cache_timeout=database.table_cache_timeout, + ) + ] + if datasource_type == "view": + return [ + utils.DatasourceName(*datasource_name) + for datasource_name in database.get_all_view_names_in_schema( + schema=schema, + force=True, + cache=database.table_cache_enabled, + cache_timeout=database.table_cache_timeout, + ) + ] + raise Exception(f"Unsupported datasource_type: {datasource_type}") + @classmethod def convert_dttm( cls, target_type: str, dttm: datetime, db_extra: Optional[Dict[str, Any]] = None diff --git a/superset/migrations/versions/2022-08-31_19-30_291f024254b5_drop_column_allow_multi_schema_metadata_fetch.py b/superset/migrations/versions/2022-08-31_19-30_291f024254b5_drop_column_allow_multi_schema_metadata_fetch.py deleted file mode 100644 index fadcb3dda24e5..0000000000000 --- a/superset/migrations/versions/2022-08-31_19-30_291f024254b5_drop_column_allow_multi_schema_metadata_fetch.py +++ /dev/null @@ -1,48 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -"""drop_column_allow_multi_schema_metadata_fetch - - -Revision ID: 291f024254b5 -Revises: 6d3c6f9d665d -Create Date: 2022-08-31 19:30:33.665025 - -""" - -# revision identifiers, used by Alembic. -revision = "291f024254b5" -down_revision = "6d3c6f9d665d" - -import sqlalchemy as sa -from alembic import op - - -def upgrade(): - with op.batch_alter_table("dbs") as batch_op: - batch_op.drop_column("allow_multi_schema_metadata_fetch") - - -def downgrade(): - op.add_column( - "dbs", - sa.Column( - "allow_multi_schema_metadata_fetch", - sa.Boolean(), - nullable=True, - default=True, - ), - ) diff --git a/superset/models/core.py b/superset/models/core.py index f1c1e6bbdd102..822191cb72ff2 100755 --- a/superset/models/core.py +++ b/superset/models/core.py @@ -134,6 +134,9 @@ class Database( allow_cvas = Column(Boolean, default=False) allow_dml = Column(Boolean, default=False) force_ctas_schema = Column(String(250)) + allow_multi_schema_metadata_fetch = Column( # pylint: disable=invalid-name + Boolean, default=False + ) extra = Column( Text, default=textwrap.dedent( @@ -225,6 +228,7 @@ def data(self) -> Dict[str, Any]: "name": self.database_name, "backend": self.backend, "configuration_method": self.configuration_method, + "allow_multi_schema_metadata_fetch": self.allow_multi_schema_metadata_fetch, "allows_subquery": self.allows_subquery, "allows_cost_estimate": self.allows_cost_estimate, "allows_virtual_table_explore": self.allows_virtual_table_explore, @@ -513,6 +517,46 @@ def inspector(self) -> Inspector: engine = self.get_sqla_engine() return sqla.inspect(engine) + @cache_util.memoized_func( + key="db:{self.id}:schema:None:table_list", + cache=cache_manager.cache, + ) + def get_all_table_names_in_database( # pylint: disable=unused-argument + self, + cache: bool = False, + cache_timeout: Optional[bool] = None, + force: bool = False, + ) -> List[Tuple[str, str]]: + """Parameters need to be passed as keyword arguments.""" + if not self.allow_multi_schema_metadata_fetch: + return [] + return [ + (datasource_name.table, datasource_name.schema) + for datasource_name in self.db_engine_spec.get_all_datasource_names( + self, "table" + ) + ] + + @cache_util.memoized_func( + key="db:{self.id}:schema:None:view_list", + cache=cache_manager.cache, + ) + def get_all_view_names_in_database( # pylint: disable=unused-argument + self, + cache: bool = False, + cache_timeout: Optional[bool] = None, + force: bool = False, + ) -> List[Tuple[str, str]]: + """Parameters need to be passed as keyword arguments.""" + if not self.allow_multi_schema_metadata_fetch: + return [] + return [ + (datasource_name.table, datasource_name.schema) + for datasource_name in self.db_engine_spec.get_all_datasource_names( + self, "view" + ) + ] + @cache_util.memoized_func( key="db:{self.id}:schema:{schema}:table_list", cache=cache_manager.cache, diff --git a/superset/views/core.py b/superset/views/core.py index 859b42ad97d7f..1e69c8a4a8fa9 100755 --- a/superset/views/core.py +++ b/superset/views/core.py @@ -179,6 +179,7 @@ "allow_ctas", "allow_cvas", "allow_dml", + "allow_multi_schema_metadata_fetch", "allow_run_async", "allows_subquery", "backend", @@ -1102,40 +1103,34 @@ def save_or_overwrite_slice( @api @has_access_api @event_logger.log_this - @expose("/tables///") - @expose("/tables////") - def tables( # pylint: disable=no-self-use + @expose("/tables////") + @expose("/tables/////") + @expose("/tables/////") + def tables( # pylint: disable=too-many-locals,no-self-use,too-many-arguments self, db_id: int, schema: str, + substr: str, force_refresh: str = "false", + exact_match: str = "false", ) -> FlaskResponse: """Endpoint to fetch the list of tables for given database""" - - force_refresh_parsed = force_refresh.lower() == "true" - schema_parsed = utils.parse_js_uri_path_item(schema, eval_undefined=True) - - if not schema_parsed: - return json_error_response(_("Schema undefined"), status=422) - # Guarantees database filtering by security access - database = ( - DatabaseFilter("id", SQLAInterface(Database, db.session)) - .apply( - db.session.query(Database), - None, - ) - .filter_by(id=db_id) - .one_or_none() + query = db.session.query(Database) + query = DatabaseFilter("id", SQLAInterface(Database, db.session)).apply( + query, None ) - + database = query.filter_by(id=db_id).one_or_none() if not database: - return json_error_response("Database not found", status=404) + return json_error_response("Not found", 404) - tables = security_manager.get_datasources_accessible_by_user( - database=database, - schema=schema_parsed, - datasource_names=[ + force_refresh_parsed = force_refresh.lower() == "true" + exact_match_parsed = exact_match.lower() == "true" + schema_parsed = utils.parse_js_uri_path_item(schema, eval_undefined=True) + substr_parsed = utils.parse_js_uri_path_item(substr, eval_undefined=True) + + if schema_parsed: + tables = [ utils.DatasourceName(*datasource_name) for datasource_name in database.get_all_table_names_in_schema( schema=schema_parsed, @@ -1143,13 +1138,8 @@ def tables( # pylint: disable=no-self-use cache=database.table_cache_enabled, cache_timeout=database.table_cache_timeout, ) - ], - ) - - views = security_manager.get_datasources_accessible_by_user( - database=database, - schema=schema_parsed, - datasource_names=[ + ] or [] + views = [ utils.DatasourceName(*datasource_name) for datasource_name in database.get_all_view_names_in_schema( schema=schema_parsed, @@ -1157,36 +1147,95 @@ def tables( # pylint: disable=no-self-use cache=database.table_cache_enabled, cache_timeout=database.table_cache_timeout, ) - ], + ] or [] + else: + tables = [ + utils.DatasourceName(*datasource_name) + for datasource_name in database.get_all_table_names_in_database( + cache=True, force=False, cache_timeout=24 * 60 * 60 + ) + ] + views = [ + utils.DatasourceName(*datasource_name) + for datasource_name in database.get_all_view_names_in_database( + cache=True, force=False, cache_timeout=24 * 60 * 60 + ) + ] + tables = security_manager.get_datasources_accessible_by_user( + database, tables, schema_parsed + ) + views = security_manager.get_datasources_accessible_by_user( + database, views, schema_parsed ) + def get_datasource_label(ds_name: utils.DatasourceName) -> str: + return ( + ds_name.table if schema_parsed else f"{ds_name.schema}.{ds_name.table}" + ) + + def is_match(src: str, target: utils.DatasourceName) -> bool: + target_label = get_datasource_label(target) + if exact_match_parsed: + return src == target_label + return src in target_label + + if substr_parsed: + tables = [tn for tn in tables if is_match(substr_parsed, tn)] + views = [vn for vn in views if is_match(substr_parsed, vn)] + + if not schema_parsed and database.default_schemas: + user_schemas = ( + [g.user.email.split("@")[0]] if hasattr(g.user, "email") else [] + ) + valid_schemas = set(database.default_schemas + user_schemas) + + tables = [tn for tn in tables if tn.schema in valid_schemas] + views = [vn for vn in views if vn.schema in valid_schemas] + + max_items = config["MAX_TABLE_NAMES"] or len(tables) + total_items = len(tables) + len(views) + max_tables = len(tables) + max_views = len(views) + if total_items and substr_parsed: + max_tables = max_items * len(tables) // total_items + max_views = max_items * len(views) // total_items + extra_dict_by_name = { table.name: table.extra_dict for table in ( - db.session.query(SqlaTable).filter(SqlaTable.schema == schema_parsed) + db.session.query(SqlaTable).filter( + SqlaTable.name.in_( # # pylint: disable=no-member + f"{table.schema}.{table.table}" for table in tables + ) + ) ).all() } - options = sorted( + table_options = [ + { + "value": tn.table, + "schema": tn.schema, + "label": get_datasource_label(tn), + "title": get_datasource_label(tn), + "type": "table", + "extra": extra_dict_by_name.get(f"{tn.schema}.{tn.table}", None), + } + for tn in tables[:max_tables] + ] + table_options.extend( [ { - "value": table.table, - "type": "table", - "extra": extra_dict_by_name.get(table.table, None), - } - for table in tables - ] - + [ - { - "value": view.table, + "value": vn.table, + "schema": vn.schema, + "label": get_datasource_label(vn), + "title": get_datasource_label(vn), "type": "view", } - for view in views - ], - key=lambda item: item["value"], + for vn in views[:max_views] + ] ) - - payload = {"tableLength": len(tables) + len(views), "options": options} + table_options.sort(key=lambda value: value["label"]) + payload = {"tableLength": len(tables) + len(views), "options": table_options} return json_success(json.dumps(payload)) @api diff --git a/superset/views/database/mixins.py b/superset/views/database/mixins.py index bd83e79439c95..f6f7f1115e201 100644 --- a/superset/views/database/mixins.py +++ b/superset/views/database/mixins.py @@ -63,6 +63,7 @@ class DatabaseMixin: "allow_dml", "force_ctas_schema", "impersonate_user", + "allow_multi_schema_metadata_fetch", "extra", "encrypted_extra", "server_cert", @@ -169,6 +170,11 @@ class DatabaseMixin: "service account, but impersonate the currently logged on user " "via hive.server2.proxy.user property." ), + "allow_multi_schema_metadata_fetch": _( + "Allow SQL Lab to fetch a list of all tables and all views across " + "all database schemas. For large data warehouse with thousands of " + "tables, this can be expensive and put strain on the system." + ), "cache_timeout": _( "Duration (in seconds) of the caching timeout for charts of this database. " "A timeout of 0 indicates that the cache never expires. " @@ -197,6 +203,7 @@ class DatabaseMixin: "impersonate_user": _("Impersonate the logged on user"), "allow_file_upload": _("Allow Csv Upload"), "modified": _("Modified"), + "allow_multi_schema_metadata_fetch": _("Allow Multi Schema Metadata Fetch"), "backend": _("Backend"), } diff --git a/tests/integration_tests/core_tests.py b/tests/integration_tests/core_tests.py index 75314c1533303..8a58b7f1591d8 100644 --- a/tests/integration_tests/core_tests.py +++ b/tests/integration_tests/core_tests.py @@ -156,7 +156,7 @@ def test_get_superset_tables_not_allowed(self): example_db = superset.utils.database.get_example_database() schema_name = self.default_schema_backend_map[example_db.backend] self.login(username="gamma") - uri = f"superset/tables/{example_db.id}/{schema_name}/" + uri = f"superset/tables/{example_db.id}/{schema_name}/undefined/" rv = self.client.get(uri) self.assertEqual(rv.status_code, 404) @@ -185,7 +185,7 @@ def test_get_superset_tables_allowed(self): example_db = utils.get_example_database() schema_name = self.default_schema_backend_map[example_db.backend] - uri = f"superset/tables/{example_db.id}/{schema_name}/" + uri = f"superset/tables/{example_db.id}/{schema_name}/{table_name}/" rv = self.client.get(uri) self.assertEqual(rv.status_code, 200) @@ -197,6 +197,7 @@ def test_get_superset_tables_allowed(self): @pytest.mark.usefixtures("load_energy_table_with_slice") def test_get_superset_tables_not_allowed_with_out_permissions(self): session = db.session + table_name = "energy_usage" role_name = "dummy_role_no_table_access" self.logout() self.login(username="gamma") @@ -209,7 +210,7 @@ def test_get_superset_tables_not_allowed_with_out_permissions(self): example_db = utils.get_example_database() schema_name = self.default_schema_backend_map[example_db.backend] - uri = f"superset/tables/{example_db.id}/{schema_name}/" + uri = f"superset/tables/{example_db.id}/{schema_name}/{table_name}/" rv = self.client.get(uri) self.assertEqual(rv.status_code, 404) @@ -218,18 +219,38 @@ def test_get_superset_tables_not_allowed_with_out_permissions(self): gamma_user.roles.remove(security_manager.find_role(role_name)) session.commit() - def test_get_superset_tables_database_not_found(self): + def test_get_superset_tables_substr(self): + example_db = superset.utils.database.get_example_database() + if example_db.backend in {"presto", "hive", "sqlite"}: + # TODO: change table to the real table that is in examples. + return self.login(username="admin") - uri = f"superset/tables/invalid/public/" + schema_name = self.default_schema_backend_map[example_db.backend] + uri = f"superset/tables/{example_db.id}/{schema_name}/ab_role/" rv = self.client.get(uri) - self.assertEqual(rv.status_code, 404) + response = json.loads(rv.data.decode("utf-8")) + self.assertEqual(rv.status_code, 200) - def test_get_superset_tables_schema_undefined(self): - example_db = superset.utils.database.get_example_database() - self.login(username="gamma") - uri = f"superset/tables/{example_db.id}/undefined/" + expected_response = { + "options": [ + { + "label": "ab_role", + "schema": schema_name, + "title": "ab_role", + "type": "table", + "value": "ab_role", + "extra": None, + } + ], + "tableLength": 1, + } + self.assertEqual(response, expected_response) + + def test_get_superset_tables_not_found(self): + self.login(username="admin") + uri = f"superset/tables/invalid/public/undefined/" rv = self.client.get(uri) - self.assertEqual(rv.status_code, 422) + self.assertEqual(rv.status_code, 404) def test_annotation_json_endpoint(self): # Set up an annotation layer and annotation diff --git a/tests/integration_tests/databases/api_tests.py b/tests/integration_tests/databases/api_tests.py index fab3708c9968d..4b8803ec759dd 100644 --- a/tests/integration_tests/databases/api_tests.py +++ b/tests/integration_tests/databases/api_tests.py @@ -185,6 +185,7 @@ def test_get_items(self): "allow_cvas", "allow_dml", "allow_file_upload", + "allow_multi_schema_metadata_fetch", "allow_run_async", "allows_cost_estimate", "allows_subquery", diff --git a/tests/integration_tests/db_engine_specs/presto_tests.py b/tests/integration_tests/db_engine_specs/presto_tests.py index 2d6cf7b8622c0..a991a4d22c049 100644 --- a/tests/integration_tests/db_engine_specs/presto_tests.py +++ b/tests/integration_tests/db_engine_specs/presto_tests.py @@ -851,6 +851,19 @@ def test_estimate_statement_cost_invalid_syntax(self): "DROP TABLE brth_names", mock_cursor ) + def test_get_all_datasource_names(self): + df = pd.DataFrame.from_dict( + {"table_schema": ["schema1", "schema2"], "table_name": ["name1", "name2"]} + ) + database = mock.MagicMock() + database.get_df.return_value = df + result = PrestoEngineSpec.get_all_datasource_names(database, "table") + expected_result = [ + DatasourceName(schema="schema1", table="name1"), + DatasourceName(schema="schema2", table="name2"), + ] + assert result == expected_result + def test_get_create_view(self): mock_execute = mock.MagicMock() mock_fetchall = mock.MagicMock(return_value=[["a", "b,", "c"], ["d", "e"]]) diff --git a/tests/unit_tests/db_engine_specs/test_sqlite.py b/tests/unit_tests/db_engine_specs/test_sqlite.py index 76ea4fdff9c69..03470173dc40e 100644 --- a/tests/unit_tests/db_engine_specs/test_sqlite.py +++ b/tests/unit_tests/db_engine_specs/test_sqlite.py @@ -42,6 +42,55 @@ def test_convert_dttm_invalid_type(dttm: datetime) -> None: assert SqliteEngineSpec.convert_dttm("other", dttm) is None +def test_get_all_datasource_names_table() -> None: + from superset.db_engine_specs.sqlite import SqliteEngineSpec + + database = mock.MagicMock() + database.get_all_schema_names.return_value = ["schema1"] + table_names = [("table1", "schema1"), ("table2", "schema1")] + get_tables = mock.MagicMock(return_value=table_names) + database.get_all_table_names_in_schema = get_tables + result = SqliteEngineSpec.get_all_datasource_names(database, "table") + + assert result == table_names + get_tables.assert_called_once_with( + schema="schema1", + force=True, + cache=database.table_cache_enabled, + cache_timeout=database.table_cache_timeout, + ) + + +def test_get_all_datasource_names_view() -> None: + from superset.db_engine_specs.sqlite import SqliteEngineSpec + + database = mock.MagicMock() + database.get_all_schema_names.return_value = ["schema1"] + views_names = [("view1", "schema1"), ("view2", "schema1")] + get_views = mock.MagicMock(return_value=views_names) + database.get_all_view_names_in_schema = get_views + result = SqliteEngineSpec.get_all_datasource_names(database, "view") + + assert result == views_names + get_views.assert_called_once_with( + schema="schema1", + force=True, + cache=database.table_cache_enabled, + cache_timeout=database.table_cache_timeout, + ) + + +def test_get_all_datasource_names_invalid_type() -> None: + from superset.db_engine_specs.sqlite import SqliteEngineSpec + + database = mock.MagicMock() + database.get_all_schema_names.return_value = ["schema1"] + invalid_type = "asdf" + + with pytest.raises(Exception): + SqliteEngineSpec.get_all_datasource_names(database, invalid_type) + + @pytest.mark.parametrize( "dttm,grain,expected", [