Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix improper escaping of glob parameter, remove regex search #1062

Merged
merged 4 commits into from
Jun 21, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
51 changes: 2 additions & 49 deletions dandiapi/api/tests/test_asset.py
Original file line number Diff line number Diff line change
Expand Up @@ -1271,6 +1271,8 @@ def test_asset_direct_info(api_client, asset):
['a/b/c/d.txt', 'a/b/c/e.txt'],
),
('a/b/d/*', ['a/b/d/e.txt']),
('*b*e.txt', ['a/b/c/e.txt', 'a/b/d/e.txt']),
jjnesbitt marked this conversation as resolved.
Show resolved Hide resolved
('.*[a|b].txt', []), # regexes shouldn't be evaluated
],
)
def test_asset_rest_glob(api_client, asset_factory, version, glob_pattern, expected_paths):
Expand All @@ -1284,52 +1286,3 @@ def test_asset_rest_glob(api_client, asset_factory, version, glob_pattern, expec
)

assert expected_paths == [asset['path'] for asset in resp.json()['results']]


@pytest.mark.django_db
@pytest.mark.parametrize(
'regex_pattern,expected_paths',
[
(
'[0-9].txt',
['1.txt', '1/2/3.txt'],
),
(
'[a-z].txt',
['a/b/c/d.txt', 'a/b/c/e.txt', 'a/b/d/e.txt'],
),
],
)
def test_asset_rest_regex_valid(api_client, asset_factory, version, regex_pattern, expected_paths):
paths = ('1.txt', '1/2/3.txt', 'a/b/c/d.txt', 'a/b/c/e.txt', 'a/b/d/e.txt')
for path in paths:
version.assets.add(asset_factory(path=path))

resp = api_client.get(
f'/api/dandisets/{version.dandiset.identifier}/versions/{version.version}/assets/',
{'regex': regex_pattern},
)

assert expected_paths == [asset['path'] for asset in resp.json()['results']]


@pytest.mark.django_db
def test_asset_rest_regex_invalid(api_client, version):
resp = api_client.get(
f'/api/dandisets/{version.dandiset.identifier}/versions/{version.version}/assets/',
{'regex': '[[[[['}, # provide an invalid regex
)

assert resp.status_code == 400


@pytest.mark.django_db
def test_asset_rest_glob_regex_together(api_client, version):
"""Test that including both a glob and regex returns a 400 error."""
resp = api_client.get(
f'/api/dandisets/{version.dandiset.identifier}/versions/{version.version}/assets/',
{'regex': '[0-9].txt', 'glob': '*.txt'},
)

assert resp.status_code == 400
assert resp.json() == {'glob': ['Cannot specify both glob and regex']}
21 changes: 6 additions & 15 deletions dandiapi/api/views/asset.py
Original file line number Diff line number Diff line change
Expand Up @@ -517,23 +517,14 @@ def list(self, request, *args, **kwargs):

queryset = self.filter_queryset(self.get_queryset())
glob_pattern: str | None = serializer.validated_data.get('glob')
regex_pattern: str | None = serializer.validated_data.get('regex')

if regex_pattern is not None:
try:
# Validate the regex by calling re.compile on it
re.compile(regex_pattern)
queryset = queryset.filter(path__iregex=regex_pattern)
except re.error:
return Response(
data=f'{regex_pattern} is not a valid regex pattern.',
status=status.HTTP_400_BAD_REQUEST,
)

if glob_pattern is not None:
queryset = queryset.filter(
path__iregex=glob_pattern.replace('*', '.*').replace('.', '\\.')
)
# Escape special characters in the glob pattern. This is a security precaution taken
# since we are using postgres' regex search. A malicious user who knows this could
# include a regex as part of the glob expression, which postgres would happily parse
# and use if it's not escaped.
glob_pattern = re.escape(glob_pattern)
queryset = queryset.filter(path__iregex=glob_pattern.replace('\\*', '.*'))

page = self.paginate_queryset(queryset)
if page is not None:
Expand Down
8 changes: 0 additions & 8 deletions dandiapi/api/views/serializers.py
Original file line number Diff line number Diff line change
Expand Up @@ -201,11 +201,3 @@ class AssetPathsQueryParameterSerializer(serializers.Serializer):

class AssetListSerializer(serializers.Serializer):
glob = serializers.CharField(required=False)
regex = serializers.CharField(required=False)

def validate(self, attrs):
if 'glob' in attrs and 'regex' in attrs:
raise serializers.ValidationError(
{'glob': 'Cannot specify both glob and regex'},
)
return super().validate(attrs)