From 931e1b21396c55a7da8fa8b026c64c63b333e3eb Mon Sep 17 00:00:00 2001 From: Daniel Vaz Gaspar Date: Wed, 23 Aug 2023 13:31:44 +0100 Subject: [PATCH] fix: dataset safe URL for explore_url (#24686) (cherry picked from commit a9efd4b2e307b0df68e88ebbd02d22d7032fa451) --- UPDATING.md | 1 + .../pages/DatasetList/DatasetList.test.tsx | 60 ++++++++++++++++++- .../src/pages/DatasetList/index.tsx | 27 +++++++-- superset/config.py | 2 +- superset/datasets/commands/exceptions.py | 17 ------ superset/datasets/commands/update.py | 12 ---- superset/utils/urls.py | 19 +----- superset/views/base.py | 1 + superset/views/datasource/views.py | 17 +----- tests/integration_tests/datasets/api_tests.py | 26 -------- tests/integration_tests/datasource_tests.py | 26 -------- tests/unit_tests/utils/urls_tests.py | 24 -------- 12 files changed, 85 insertions(+), 147 deletions(-) diff --git a/UPDATING.md b/UPDATING.md index 5a29c43dfad99..19c60a19b7439 100644 --- a/UPDATING.md +++ b/UPDATING.md @@ -51,6 +51,7 @@ assists people when migrating to a new version. ### Breaking Changes +- [24686]https://github.com/apache/superset/pull/24686): All dataset's custom explore_url are handled as relative URLs on the frontend, behaviour controlled by PREVENT_UNSAFE_DEFAULT_URLS_ON_DATASET. - [24262](https://github.com/apache/superset/pull/24262): Enabled `TALISMAN_ENABLED` flag by default and provided stricter default Content Security Policy - [24415](https://github.com/apache/superset/pull/24415): Removed the obsolete Druid NoSQL REGEX operator. - [24423](https://github.com/apache/superset/pull/24423): Removed deprecated APIs `/superset/slice_json/...`, `/superset/annotation_json/...` diff --git a/superset-frontend/src/pages/DatasetList/DatasetList.test.tsx b/superset-frontend/src/pages/DatasetList/DatasetList.test.tsx index 115a861bddf52..916dd0615bb8b 100644 --- a/superset-frontend/src/pages/DatasetList/DatasetList.test.tsx +++ b/superset-frontend/src/pages/DatasetList/DatasetList.test.tsx @@ -35,6 +35,7 @@ import IndeterminateCheckbox from 'src/components/IndeterminateCheckbox'; import waitForComponentToPaint from 'spec/helpers/waitForComponentToPaint'; import { act } from 'react-dom/test-utils'; import SubMenu from 'src/features/home/SubMenu'; +import * as reactRedux from 'react-redux'; // store needed for withToasts(DatasetList) const mockStore = configureStore([thunk]); @@ -47,13 +48,15 @@ const datasetsDuplicateEndpoint = 'glob:*/api/v1/dataset/duplicate*'; const databaseEndpoint = 'glob:*/api/v1/dataset/related/database*'; const datasetsEndpoint = 'glob:*/api/v1/dataset/?*'; +const useSelectorMock = jest.spyOn(reactRedux, 'useSelector'); + const mockdatasets = [...new Array(3)].map((_, i) => ({ changed_by_name: 'user', kind: i === 0 ? 'virtual' : 'physical', // ensure there is 1 virtual changed_by: 'user', changed_on: new Date().toISOString(), database_name: `db ${i}`, - explore_url: `/explore/?datasource_type=table&datasource_id=${i}`, + explore_url: `https://www.google.com?${i}`, id: i, schema: `schema ${i}`, table_name: `coolest table ${i}`, @@ -280,3 +283,58 @@ describe('RTL', () => { expect(importTooltip).toBeInTheDocument(); }); }); + +describe('Prevent unsafe URLs', () => { + const mockedProps = {}; + let wrapper: any; + + it('Check prevent unsafe is on renders relative links', async () => { + const tdColumnsNumber = 9; + useSelectorMock.mockReturnValue(true); + wrapper = await mountAndWait(mockedProps); + const tdElements = wrapper.find(ListView).find('td'); + expect( + tdElements + .at(0 * tdColumnsNumber + 1) + .find('a') + .prop('href'), + ).toBe('/https://www.google.com?0'); + expect( + tdElements + .at(1 * tdColumnsNumber + 1) + .find('a') + .prop('href'), + ).toBe('/https://www.google.com?1'); + expect( + tdElements + .at(2 * tdColumnsNumber + 1) + .find('a') + .prop('href'), + ).toBe('/https://www.google.com?2'); + }); + + it('Check prevent unsafe is off renders absolute links', async () => { + const tdColumnsNumber = 9; + useSelectorMock.mockReturnValue(false); + wrapper = await mountAndWait(mockedProps); + const tdElements = wrapper.find(ListView).find('td'); + expect( + tdElements + .at(0 * tdColumnsNumber + 1) + .find('a') + .prop('href'), + ).toBe('https://www.google.com?0'); + expect( + tdElements + .at(1 * tdColumnsNumber + 1) + .find('a') + .prop('href'), + ).toBe('https://www.google.com?1'); + expect( + tdElements + .at(2 * tdColumnsNumber + 1) + .find('a') + .prop('href'), + ).toBe('https://www.google.com?2'); + }); +}); diff --git a/superset-frontend/src/pages/DatasetList/index.tsx b/superset-frontend/src/pages/DatasetList/index.tsx index 7633edb01617a..0f3fb84ab181e 100644 --- a/superset-frontend/src/pages/DatasetList/index.tsx +++ b/superset-frontend/src/pages/DatasetList/index.tsx @@ -30,7 +30,7 @@ import React, { useMemo, useCallback, } from 'react'; -import { useHistory } from 'react-router-dom'; +import { Link, useHistory } from 'react-router-dom'; import rison from 'rison'; import { createFetchRelated, @@ -69,6 +69,7 @@ import { CONFIRM_OVERWRITE_MESSAGE, } from 'src/features/datasets/constants'; import DuplicateDatasetModal from 'src/features/datasets/DuplicateDatasetModal'; +import { useSelector } from 'react-redux'; const extensionsRegistry = getExtensionsRegistry(); const DatasetDeleteRelatedExtension = extensionsRegistry.get( @@ -181,6 +182,11 @@ const DatasetList: FunctionComponent = ({ setSSHTunnelPrivateKeyPasswordFields, ] = useState([]); + const PREVENT_UNSAFE_DEFAULT_URLS_ON_DATASET = useSelector( + state => + state.common?.conf?.PREVENT_UNSAFE_DEFAULT_URLS_ON_DATASET || false, + ); + const openDatasetImportModal = () => { showImportModal(true); }; @@ -309,11 +315,20 @@ const DatasetList: FunctionComponent = ({ }, }, }: any) => { - const titleLink = ( - // exploreUrl can be a link to Explore or an external link - // in the first case use SPA routing, else use HTML anchor - {datasetTitle} - ); + let titleLink: JSX.Element; + if (PREVENT_UNSAFE_DEFAULT_URLS_ON_DATASET) { + titleLink = ( + + {datasetTitle} + + ); + } else { + titleLink = ( + // exploreUrl can be a link to Explore or an external link + // in the first case use SPA routing, else use HTML anchor + {datasetTitle} + ); + } try { const parsedExtra = JSON.parse(extra); return ( diff --git a/superset/config.py b/superset/config.py index 17f5bb72a2621..a2ce1174a16a0 100644 --- a/superset/config.py +++ b/superset/config.py @@ -1460,7 +1460,7 @@ def EMAIL_HEADER_MUTATOR( # pylint: disable=invalid-name,unused-argument # Typically these should not be allowed. PREVENT_UNSAFE_DB_CONNECTIONS = True -# Prevents unsafe default endpoints to be registered on datasets. +# If true all default urls on datasets will be handled as relative URLs by the frontend PREVENT_UNSAFE_DEFAULT_URLS_ON_DATASET = True # Define a list of allowed URLs for dataset data imports (v1). diff --git a/superset/datasets/commands/exceptions.py b/superset/datasets/commands/exceptions.py index fe9fe94cc61a3..f135294980835 100644 --- a/superset/datasets/commands/exceptions.py +++ b/superset/datasets/commands/exceptions.py @@ -61,23 +61,6 @@ def __init__(self, table_name: str) -> None: ) -class DatasetEndpointUnsafeValidationError(ValidationError): - """ - Marshmallow validation error for unsafe dataset default endpoint - """ - - def __init__(self) -> None: - super().__init__( - [ - _( - "The submitted URL is not considered safe," - " only use URLs with the same domain as Superset." - ) - ], - field_name="default_endpoint", - ) - - class DatasetColumnNotFoundValidationError(ValidationError): """ Marshmallow validation error when dataset column for update does not exist diff --git a/superset/datasets/commands/update.py b/superset/datasets/commands/update.py index 1636805567b52..a38439fb7f235 100644 --- a/superset/datasets/commands/update.py +++ b/superset/datasets/commands/update.py @@ -18,7 +18,6 @@ from collections import Counter from typing import Any, Optional -from flask import current_app from flask_appbuilder.models.sqla import Model from marshmallow import ValidationError @@ -32,7 +31,6 @@ DatasetColumnNotFoundValidationError, DatasetColumnsDuplicateValidationError, DatasetColumnsExistsValidationError, - DatasetEndpointUnsafeValidationError, DatasetExistsValidationError, DatasetForbiddenError, DatasetInvalidError, @@ -43,7 +41,6 @@ DatasetUpdateFailedError, ) from superset.exceptions import SupersetSecurityException -from superset.utils.urls import is_safe_url logger = logging.getLogger(__name__) @@ -104,15 +101,6 @@ def validate(self) -> None: self._properties["owners"] = owners except ValidationError as ex: exceptions.append(ex) - # Validate default URL safety - default_endpoint = self._properties.get("default_endpoint") - if ( - default_endpoint - and not is_safe_url(default_endpoint) - and current_app.config["PREVENT_UNSAFE_DEFAULT_URLS_ON_DATASET"] - ): - exceptions.append(DatasetEndpointUnsafeValidationError()) - # Validate columns if columns := self._properties.get("columns"): self._validate_columns(columns, exceptions) diff --git a/superset/utils/urls.py b/superset/utils/urls.py index 31fbd89337af4..57a1b63dd41d3 100644 --- a/superset/utils/urls.py +++ b/superset/utils/urls.py @@ -14,12 +14,10 @@ # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. -import unicodedata import urllib from typing import Any -from urllib.parse import urlparse -from flask import current_app, request, url_for +from flask import current_app, url_for def get_url_host(user_friendly: bool = False) -> str: @@ -52,18 +50,3 @@ def modify_url_query(url: str, **kwargs: Any) -> str: f"{k}={urllib.parse.quote(str(v[0]))}" for k, v in params.items() ) return urllib.parse.urlunsplit(parts) - - -def is_safe_url(url: str) -> bool: - if url.startswith("///"): - return False - try: - ref_url = urlparse(request.host_url) - test_url = urlparse(url) - except ValueError: - return False - if unicodedata.category(url[0])[0] == "C": - return False - if test_url.scheme != ref_url.scheme or ref_url.netloc != test_url.netloc: - return False - return True diff --git a/superset/views/base.py b/superset/views/base.py index 250c16ebb2ca6..ab53ff07daa75 100644 --- a/superset/views/base.py +++ b/superset/views/base.py @@ -120,6 +120,7 @@ "ALERT_REPORTS_DEFAULT_RETENTION", "ALERT_REPORTS_DEFAULT_WORKING_TIMEOUT", "NATIVE_FILTER_DEFAULT_ROW_LIMIT", + "PREVENT_UNSAFE_DEFAULT_URLS_ON_DATASET", ) logger = logging.getLogger(__name__) diff --git a/superset/views/datasource/views.py b/superset/views/datasource/views.py index b2fd387379fd9..06dd37fbd9a35 100644 --- a/superset/views/datasource/views.py +++ b/superset/views/datasource/views.py @@ -18,7 +18,7 @@ from collections import Counter from typing import Any -from flask import current_app, redirect, request +from flask import redirect, request from flask_appbuilder import expose, permission_name from flask_appbuilder.api import rison from flask_appbuilder.security.decorators import has_access, has_access_api @@ -40,7 +40,6 @@ from superset.models.core import Database from superset.superset_typing import FlaskResponse from superset.utils.core import DatasourceType -from superset.utils.urls import is_safe_url from superset.views.base import ( api, BaseSupersetView, @@ -82,20 +81,6 @@ def save(self) -> FlaskResponse: datasource_id = datasource_dict.get("id") datasource_type = datasource_dict.get("type") database_id = datasource_dict["database"].get("id") - default_endpoint = datasource_dict["default_endpoint"] - if ( - default_endpoint - and not is_safe_url(default_endpoint) - and current_app.config["PREVENT_UNSAFE_DEFAULT_URLS_ON_DATASET"] - ): - return json_error_response( - _( - "The submitted URL is not considered safe," - " only use URLs with the same domain as Superset." - ), - status=400, - ) - orm_datasource = DatasourceDAO.get_datasource( db.session, DatasourceType(datasource_type), datasource_id ) diff --git a/tests/integration_tests/datasets/api_tests.py b/tests/integration_tests/datasets/api_tests.py index 8884b1171aa3f..2acefe48d0739 100644 --- a/tests/integration_tests/datasets/api_tests.py +++ b/tests/integration_tests/datasets/api_tests.py @@ -1446,32 +1446,6 @@ def test_update_dataset_item_uniqueness(self): db.session.delete(ab_user) db.session.commit() - def test_update_dataset_unsafe_default_endpoint(self): - """ - Dataset API: Test unsafe default endpoint - """ - if backend() == "sqlite": - return - - dataset = self.insert_default_dataset() - self.login(username="admin") - uri = f"api/v1/dataset/{dataset.id}" - table_data = {"default_endpoint": "http://www.google.com"} - rv = self.client.put(uri, json=table_data) - data = json.loads(rv.data.decode("utf-8")) - assert rv.status_code == 422 - expected_response = { - "message": { - "default_endpoint": [ - "The submitted URL is not considered safe," - " only use URLs with the same domain as Superset." - ] - } - } - assert data == expected_response - db.session.delete(dataset) - db.session.commit() - @patch("superset.daos.dataset.DatasetDAO.update") def test_update_dataset_sqlalchemy_error(self, mock_dao_update): """ diff --git a/tests/integration_tests/datasource_tests.py b/tests/integration_tests/datasource_tests.py index 4c05898cfe75e..12293325d118d 100644 --- a/tests/integration_tests/datasource_tests.py +++ b/tests/integration_tests/datasource_tests.py @@ -302,32 +302,6 @@ def test_save(self): print(k) self.assertEqual(resp[k], datasource_post[k]) - def test_save_default_endpoint_validation_fail(self): - self.login(username="admin") - tbl_id = self.get_table(name="birth_names").id - - datasource_post = get_datasource_post() - datasource_post["id"] = tbl_id - datasource_post["owners"] = [1] - datasource_post["default_endpoint"] = "http://www.google.com" - data = dict(data=json.dumps(datasource_post)) - resp = self.client.post("/datasource/save/", data=data) - assert resp.status_code == 400 - - def test_save_default_endpoint_validation_unsafe(self): - self.app.config["PREVENT_UNSAFE_DEFAULT_URLS_ON_DATASET"] = False - self.login(username="admin") - tbl_id = self.get_table(name="birth_names").id - - datasource_post = get_datasource_post() - datasource_post["id"] = tbl_id - datasource_post["owners"] = [1] - datasource_post["default_endpoint"] = "http://www.google.com" - data = dict(data=json.dumps(datasource_post)) - resp = self.client.post("/datasource/save/", data=data) - assert resp.status_code == 200 - self.app.config["PREVENT_UNSAFE_DEFAULT_URLS_ON_DATASET"] = True - def test_save_default_endpoint_validation_success(self): self.login(username="admin") tbl_id = self.get_table(name="birth_names").id diff --git a/tests/unit_tests/utils/urls_tests.py b/tests/unit_tests/utils/urls_tests.py index 287f346c3d99d..8ead2dacb4ed0 100644 --- a/tests/unit_tests/utils/urls_tests.py +++ b/tests/unit_tests/utils/urls_tests.py @@ -39,27 +39,3 @@ def test_convert_dashboard_link() -> None: def test_convert_dashboard_link_with_integer() -> None: test_url = modify_url_query(EXPLORE_DASHBOARD_LINK, standalone=0) assert test_url == "http://localhost:9000/superset/dashboard/3/?standalone=0" - - -@pytest.mark.parametrize( - "url,is_safe", - [ - ("http://localhost/", True), - ("http://localhost/superset/1", True), - ("https://localhost/", False), - ("https://localhost/superset/1", False), - ("localhost/superset/1", False), - ("ftp://localhost/superset/1", False), - ("http://external.com", False), - ("https://external.com", False), - ("external.com", False), - ("///localhost", False), - ("xpto://localhost:[3/1/", False), - ], -) -def test_is_safe_url(url: str, is_safe: bool) -> None: - from superset import app - from superset.utils.urls import is_safe_url - - with app.test_request_context("/"): - assert is_safe_url(url) == is_safe