Skip to content

Commit

Permalink
fix: dataset safe URL for explore_url (apache#24686)
Browse files Browse the repository at this point in the history
(cherry picked from commit a9efd4b)
  • Loading branch information
dpgaspar authored and michael-s-molina committed Aug 23, 2023
1 parent b5f7f54 commit 931e1b2
Show file tree
Hide file tree
Showing 12 changed files with 85 additions and 147 deletions.
1 change: 1 addition & 0 deletions UPDATING.md
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ assists people when migrating to a new version.

### Breaking Changes

- [24686]https://github.com/apache/superset/pull/24686): All dataset's custom explore_url are handled as relative URLs on the frontend, behaviour controlled by PREVENT_UNSAFE_DEFAULT_URLS_ON_DATASET.
- [24262](https://github.com/apache/superset/pull/24262): Enabled `TALISMAN_ENABLED` flag by default and provided stricter default Content Security Policy
- [24415](https://github.com/apache/superset/pull/24415): Removed the obsolete Druid NoSQL REGEX operator.
- [24423](https://github.com/apache/superset/pull/24423): Removed deprecated APIs `/superset/slice_json/...`, `/superset/annotation_json/...`
Expand Down
60 changes: 59 additions & 1 deletion superset-frontend/src/pages/DatasetList/DatasetList.test.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ import IndeterminateCheckbox from 'src/components/IndeterminateCheckbox';
import waitForComponentToPaint from 'spec/helpers/waitForComponentToPaint';
import { act } from 'react-dom/test-utils';
import SubMenu from 'src/features/home/SubMenu';
import * as reactRedux from 'react-redux';

// store needed for withToasts(DatasetList)
const mockStore = configureStore([thunk]);
Expand All @@ -47,13 +48,15 @@ const datasetsDuplicateEndpoint = 'glob:*/api/v1/dataset/duplicate*';
const databaseEndpoint = 'glob:*/api/v1/dataset/related/database*';
const datasetsEndpoint = 'glob:*/api/v1/dataset/?*';

const useSelectorMock = jest.spyOn(reactRedux, 'useSelector');

const mockdatasets = [...new Array(3)].map((_, i) => ({
changed_by_name: 'user',
kind: i === 0 ? 'virtual' : 'physical', // ensure there is 1 virtual
changed_by: 'user',
changed_on: new Date().toISOString(),
database_name: `db ${i}`,
explore_url: `/explore/?datasource_type=table&datasource_id=${i}`,
explore_url: `https://www.google.com?${i}`,
id: i,
schema: `schema ${i}`,
table_name: `coolest table ${i}`,
Expand Down Expand Up @@ -280,3 +283,58 @@ describe('RTL', () => {
expect(importTooltip).toBeInTheDocument();
});
});

describe('Prevent unsafe URLs', () => {
const mockedProps = {};
let wrapper: any;

it('Check prevent unsafe is on renders relative links', async () => {
const tdColumnsNumber = 9;
useSelectorMock.mockReturnValue(true);
wrapper = await mountAndWait(mockedProps);
const tdElements = wrapper.find(ListView).find('td');
expect(
tdElements
.at(0 * tdColumnsNumber + 1)
.find('a')
.prop('href'),
).toBe('/https://www.google.com?0');
expect(
tdElements
.at(1 * tdColumnsNumber + 1)
.find('a')
.prop('href'),
).toBe('/https://www.google.com?1');
expect(
tdElements
.at(2 * tdColumnsNumber + 1)
.find('a')
.prop('href'),
).toBe('/https://www.google.com?2');
});

it('Check prevent unsafe is off renders absolute links', async () => {
const tdColumnsNumber = 9;
useSelectorMock.mockReturnValue(false);
wrapper = await mountAndWait(mockedProps);
const tdElements = wrapper.find(ListView).find('td');
expect(
tdElements
.at(0 * tdColumnsNumber + 1)
.find('a')
.prop('href'),
).toBe('https://www.google.com?0');
expect(
tdElements
.at(1 * tdColumnsNumber + 1)
.find('a')
.prop('href'),
).toBe('https://www.google.com?1');
expect(
tdElements
.at(2 * tdColumnsNumber + 1)
.find('a')
.prop('href'),
).toBe('https://www.google.com?2');
});
});
27 changes: 21 additions & 6 deletions superset-frontend/src/pages/DatasetList/index.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ import React, {
useMemo,
useCallback,
} from 'react';
import { useHistory } from 'react-router-dom';
import { Link, useHistory } from 'react-router-dom';
import rison from 'rison';
import {
createFetchRelated,
Expand Down Expand Up @@ -69,6 +69,7 @@ import {
CONFIRM_OVERWRITE_MESSAGE,
} from 'src/features/datasets/constants';
import DuplicateDatasetModal from 'src/features/datasets/DuplicateDatasetModal';
import { useSelector } from 'react-redux';

const extensionsRegistry = getExtensionsRegistry();
const DatasetDeleteRelatedExtension = extensionsRegistry.get(
Expand Down Expand Up @@ -181,6 +182,11 @@ const DatasetList: FunctionComponent<DatasetListProps> = ({
setSSHTunnelPrivateKeyPasswordFields,
] = useState<string[]>([]);

const PREVENT_UNSAFE_DEFAULT_URLS_ON_DATASET = useSelector<any, boolean>(
state =>
state.common?.conf?.PREVENT_UNSAFE_DEFAULT_URLS_ON_DATASET || false,
);

const openDatasetImportModal = () => {
showImportModal(true);
};
Expand Down Expand Up @@ -309,11 +315,20 @@ const DatasetList: FunctionComponent<DatasetListProps> = ({
},
},
}: any) => {
const titleLink = (
// exploreUrl can be a link to Explore or an external link
// in the first case use SPA routing, else use HTML anchor
<GenericLink to={exploreURL}>{datasetTitle}</GenericLink>
);
let titleLink: JSX.Element;
if (PREVENT_UNSAFE_DEFAULT_URLS_ON_DATASET) {
titleLink = (
<Link data-test="internal-link" to={exploreURL}>
{datasetTitle}
</Link>
);
} else {
titleLink = (
// exploreUrl can be a link to Explore or an external link
// in the first case use SPA routing, else use HTML anchor
<GenericLink to={exploreURL}>{datasetTitle}</GenericLink>
);
}
try {
const parsedExtra = JSON.parse(extra);
return (
Expand Down
2 changes: 1 addition & 1 deletion superset/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -1460,7 +1460,7 @@ def EMAIL_HEADER_MUTATOR( # pylint: disable=invalid-name,unused-argument
# Typically these should not be allowed.
PREVENT_UNSAFE_DB_CONNECTIONS = True

# Prevents unsafe default endpoints to be registered on datasets.
# If true all default urls on datasets will be handled as relative URLs by the frontend
PREVENT_UNSAFE_DEFAULT_URLS_ON_DATASET = True

# Define a list of allowed URLs for dataset data imports (v1).
Expand Down
17 changes: 0 additions & 17 deletions superset/datasets/commands/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,23 +61,6 @@ def __init__(self, table_name: str) -> None:
)


class DatasetEndpointUnsafeValidationError(ValidationError):
"""
Marshmallow validation error for unsafe dataset default endpoint
"""

def __init__(self) -> None:
super().__init__(
[
_(
"The submitted URL is not considered safe,"
" only use URLs with the same domain as Superset."
)
],
field_name="default_endpoint",
)


class DatasetColumnNotFoundValidationError(ValidationError):
"""
Marshmallow validation error when dataset column for update does not exist
Expand Down
12 changes: 0 additions & 12 deletions superset/datasets/commands/update.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@
from collections import Counter
from typing import Any, Optional

from flask import current_app
from flask_appbuilder.models.sqla import Model
from marshmallow import ValidationError

Expand All @@ -32,7 +31,6 @@
DatasetColumnNotFoundValidationError,
DatasetColumnsDuplicateValidationError,
DatasetColumnsExistsValidationError,
DatasetEndpointUnsafeValidationError,
DatasetExistsValidationError,
DatasetForbiddenError,
DatasetInvalidError,
Expand All @@ -43,7 +41,6 @@
DatasetUpdateFailedError,
)
from superset.exceptions import SupersetSecurityException
from superset.utils.urls import is_safe_url

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -104,15 +101,6 @@ def validate(self) -> None:
self._properties["owners"] = owners
except ValidationError as ex:
exceptions.append(ex)
# Validate default URL safety
default_endpoint = self._properties.get("default_endpoint")
if (
default_endpoint
and not is_safe_url(default_endpoint)
and current_app.config["PREVENT_UNSAFE_DEFAULT_URLS_ON_DATASET"]
):
exceptions.append(DatasetEndpointUnsafeValidationError())

# Validate columns
if columns := self._properties.get("columns"):
self._validate_columns(columns, exceptions)
Expand Down
19 changes: 1 addition & 18 deletions superset/utils/urls.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,10 @@
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
import unicodedata
import urllib
from typing import Any
from urllib.parse import urlparse

from flask import current_app, request, url_for
from flask import current_app, url_for


def get_url_host(user_friendly: bool = False) -> str:
Expand Down Expand Up @@ -52,18 +50,3 @@ def modify_url_query(url: str, **kwargs: Any) -> str:
f"{k}={urllib.parse.quote(str(v[0]))}" for k, v in params.items()
)
return urllib.parse.urlunsplit(parts)


def is_safe_url(url: str) -> bool:
if url.startswith("///"):
return False
try:
ref_url = urlparse(request.host_url)
test_url = urlparse(url)
except ValueError:
return False
if unicodedata.category(url[0])[0] == "C":
return False
if test_url.scheme != ref_url.scheme or ref_url.netloc != test_url.netloc:
return False
return True
1 change: 1 addition & 0 deletions superset/views/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,7 @@
"ALERT_REPORTS_DEFAULT_RETENTION",
"ALERT_REPORTS_DEFAULT_WORKING_TIMEOUT",
"NATIVE_FILTER_DEFAULT_ROW_LIMIT",
"PREVENT_UNSAFE_DEFAULT_URLS_ON_DATASET",
)

logger = logging.getLogger(__name__)
Expand Down
17 changes: 1 addition & 16 deletions superset/views/datasource/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
from collections import Counter
from typing import Any

from flask import current_app, redirect, request
from flask import redirect, request
from flask_appbuilder import expose, permission_name
from flask_appbuilder.api import rison
from flask_appbuilder.security.decorators import has_access, has_access_api
Expand All @@ -40,7 +40,6 @@
from superset.models.core import Database
from superset.superset_typing import FlaskResponse
from superset.utils.core import DatasourceType
from superset.utils.urls import is_safe_url
from superset.views.base import (
api,
BaseSupersetView,
Expand Down Expand Up @@ -82,20 +81,6 @@ def save(self) -> FlaskResponse:
datasource_id = datasource_dict.get("id")
datasource_type = datasource_dict.get("type")
database_id = datasource_dict["database"].get("id")
default_endpoint = datasource_dict["default_endpoint"]
if (
default_endpoint
and not is_safe_url(default_endpoint)
and current_app.config["PREVENT_UNSAFE_DEFAULT_URLS_ON_DATASET"]
):
return json_error_response(
_(
"The submitted URL is not considered safe,"
" only use URLs with the same domain as Superset."
),
status=400,
)

orm_datasource = DatasourceDAO.get_datasource(
db.session, DatasourceType(datasource_type), datasource_id
)
Expand Down
26 changes: 0 additions & 26 deletions tests/integration_tests/datasets/api_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -1446,32 +1446,6 @@ def test_update_dataset_item_uniqueness(self):
db.session.delete(ab_user)
db.session.commit()

def test_update_dataset_unsafe_default_endpoint(self):
"""
Dataset API: Test unsafe default endpoint
"""
if backend() == "sqlite":
return

dataset = self.insert_default_dataset()
self.login(username="admin")
uri = f"api/v1/dataset/{dataset.id}"
table_data = {"default_endpoint": "http://www.google.com"}
rv = self.client.put(uri, json=table_data)
data = json.loads(rv.data.decode("utf-8"))
assert rv.status_code == 422
expected_response = {
"message": {
"default_endpoint": [
"The submitted URL is not considered safe,"
" only use URLs with the same domain as Superset."
]
}
}
assert data == expected_response
db.session.delete(dataset)
db.session.commit()

@patch("superset.daos.dataset.DatasetDAO.update")
def test_update_dataset_sqlalchemy_error(self, mock_dao_update):
"""
Expand Down
26 changes: 0 additions & 26 deletions tests/integration_tests/datasource_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -302,32 +302,6 @@ def test_save(self):
print(k)
self.assertEqual(resp[k], datasource_post[k])

def test_save_default_endpoint_validation_fail(self):
self.login(username="admin")
tbl_id = self.get_table(name="birth_names").id

datasource_post = get_datasource_post()
datasource_post["id"] = tbl_id
datasource_post["owners"] = [1]
datasource_post["default_endpoint"] = "http://www.google.com"
data = dict(data=json.dumps(datasource_post))
resp = self.client.post("/datasource/save/", data=data)
assert resp.status_code == 400

def test_save_default_endpoint_validation_unsafe(self):
self.app.config["PREVENT_UNSAFE_DEFAULT_URLS_ON_DATASET"] = False
self.login(username="admin")
tbl_id = self.get_table(name="birth_names").id

datasource_post = get_datasource_post()
datasource_post["id"] = tbl_id
datasource_post["owners"] = [1]
datasource_post["default_endpoint"] = "http://www.google.com"
data = dict(data=json.dumps(datasource_post))
resp = self.client.post("/datasource/save/", data=data)
assert resp.status_code == 200
self.app.config["PREVENT_UNSAFE_DEFAULT_URLS_ON_DATASET"] = True

def test_save_default_endpoint_validation_success(self):
self.login(username="admin")
tbl_id = self.get_table(name="birth_names").id
Expand Down
24 changes: 0 additions & 24 deletions tests/unit_tests/utils/urls_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,27 +39,3 @@ def test_convert_dashboard_link() -> None:
def test_convert_dashboard_link_with_integer() -> None:
test_url = modify_url_query(EXPLORE_DASHBOARD_LINK, standalone=0)
assert test_url == "http://localhost:9000/superset/dashboard/3/?standalone=0"


@pytest.mark.parametrize(
"url,is_safe",
[
("http://localhost/", True),
("http://localhost/superset/1", True),
("https://localhost/", False),
("https://localhost/superset/1", False),
("localhost/superset/1", False),
("ftp://localhost/superset/1", False),
("http://external.com", False),
("https://external.com", False),
("external.com", False),
("///localhost", False),
("xpto://localhost:[3/1/", False),
],
)
def test_is_safe_url(url: str, is_safe: bool) -> None:
from superset import app
from superset.utils.urls import is_safe_url

with app.test_request_context("/"):
assert is_safe_url(url) == is_safe

0 comments on commit 931e1b2

Please sign in to comment.