diff --git a/dandiapi/api/tests/test_asset.py b/dandiapi/api/tests/test_asset.py index f9a6fc80f..680f9de53 100644 --- a/dandiapi/api/tests/test_asset.py +++ b/dandiapi/api/tests/test_asset.py @@ -1271,6 +1271,8 @@ def test_asset_direct_info(api_client, asset): ['a/b/c/d.txt', 'a/b/c/e.txt'], ), ('a/b/d/*', ['a/b/d/e.txt']), + ('*b*e.txt', ['a/b/c/e.txt', 'a/b/d/e.txt']), + ('.*[a|b].txt', []), # regexes shouldn't be evaluated ], ) def test_asset_rest_glob(api_client, asset_factory, version, glob_pattern, expected_paths): @@ -1284,52 +1286,3 @@ def test_asset_rest_glob(api_client, asset_factory, version, glob_pattern, expec ) assert expected_paths == [asset['path'] for asset in resp.json()['results']] - - -@pytest.mark.django_db -@pytest.mark.parametrize( - 'regex_pattern,expected_paths', - [ - ( - '[0-9].txt', - ['1.txt', '1/2/3.txt'], - ), - ( - '[a-z].txt', - ['a/b/c/d.txt', 'a/b/c/e.txt', 'a/b/d/e.txt'], - ), - ], -) -def test_asset_rest_regex_valid(api_client, asset_factory, version, regex_pattern, expected_paths): - paths = ('1.txt', '1/2/3.txt', 'a/b/c/d.txt', 'a/b/c/e.txt', 'a/b/d/e.txt') - for path in paths: - version.assets.add(asset_factory(path=path)) - - resp = api_client.get( - f'/api/dandisets/{version.dandiset.identifier}/versions/{version.version}/assets/', - {'regex': regex_pattern}, - ) - - assert expected_paths == [asset['path'] for asset in resp.json()['results']] - - -@pytest.mark.django_db -def test_asset_rest_regex_invalid(api_client, version): - resp = api_client.get( - f'/api/dandisets/{version.dandiset.identifier}/versions/{version.version}/assets/', - {'regex': '[[[[['}, # provide an invalid regex - ) - - assert resp.status_code == 400 - - -@pytest.mark.django_db -def test_asset_rest_glob_regex_together(api_client, version): - """Test that including both a glob and regex returns a 400 error.""" - resp = api_client.get( - f'/api/dandisets/{version.dandiset.identifier}/versions/{version.version}/assets/', - {'regex': '[0-9].txt', 'glob': '*.txt'}, - ) - - assert resp.status_code == 400 - assert resp.json() == {'glob': ['Cannot specify both glob and regex']} diff --git a/dandiapi/api/views/asset.py b/dandiapi/api/views/asset.py index d430cdaa2..152c91569 100644 --- a/dandiapi/api/views/asset.py +++ b/dandiapi/api/views/asset.py @@ -517,23 +517,14 @@ def list(self, request, *args, **kwargs): queryset = self.filter_queryset(self.get_queryset()) glob_pattern: str | None = serializer.validated_data.get('glob') - regex_pattern: str | None = serializer.validated_data.get('regex') - - if regex_pattern is not None: - try: - # Validate the regex by calling re.compile on it - re.compile(regex_pattern) - queryset = queryset.filter(path__iregex=regex_pattern) - except re.error: - return Response( - data=f'{regex_pattern} is not a valid regex pattern.', - status=status.HTTP_400_BAD_REQUEST, - ) if glob_pattern is not None: - queryset = queryset.filter( - path__iregex=glob_pattern.replace('*', '.*').replace('.', '\\.') - ) + # Escape special characters in the glob pattern. This is a security precaution taken + # since we are using postgres' regex search. A malicious user who knows this could + # include a regex as part of the glob expression, which postgres would happily parse + # and use if it's not escaped. + glob_pattern = re.escape(glob_pattern) + queryset = queryset.filter(path__iregex=glob_pattern.replace('\\*', '.*')) page = self.paginate_queryset(queryset) if page is not None: diff --git a/dandiapi/api/views/serializers.py b/dandiapi/api/views/serializers.py index 46cb9a0ca..eeb83f568 100644 --- a/dandiapi/api/views/serializers.py +++ b/dandiapi/api/views/serializers.py @@ -201,11 +201,3 @@ class AssetPathsQueryParameterSerializer(serializers.Serializer): class AssetListSerializer(serializers.Serializer): glob = serializers.CharField(required=False) - regex = serializers.CharField(required=False) - - def validate(self, attrs): - if 'glob' in attrs and 'regex' in attrs: - raise serializers.ValidationError( - {'glob': 'Cannot specify both glob and regex'}, - ) - return super().validate(attrs)