diff --git a/dandiapi/api/tests/test_asset.py b/dandiapi/api/tests/test_asset.py index 8bef3adb9..e4bf8043a 100644 --- a/dandiapi/api/tests/test_asset.py +++ b/dandiapi/api/tests/test_asset.py @@ -1240,3 +1240,80 @@ def test_asset_direct_info(api_client, asset): 'created': TIMESTAMP_RE, 'modified': TIMESTAMP_RE, } + + +@pytest.mark.django_db +@pytest.mark.parametrize( + 'glob_pattern,expected_paths', + [ + ( + '*.txt', + ['a/b.txt', 'a/b/c.txt', 'a/b/c/d.txt', 'a/b/c/e.txt', 'a/b/d/e.txt'], + ), + ( + 'a/b/c/*', + ['a/b/c/d.txt', 'a/b/c/e.txt'], + ), + ('a/b/d/*', ['a/b/d/e.txt']), + ], +) +def test_asset_rest_glob(api_client, asset_factory, version, glob_pattern, expected_paths): + paths = ('a/b.txt', 'a/b/c.txt', 'a/b/c/d.txt', 'a/b/c/e.txt', 'a/b/d/e.txt') + for path in paths: + version.assets.add(asset_factory(path=path)) + + resp = api_client.get( + f'/api/dandisets/{version.dandiset.identifier}/versions/{version.version}/assets/', + {'glob': glob_pattern}, + ) + + assert expected_paths == [asset['path'] for asset in resp.json()['results']] + + +@pytest.mark.django_db +@pytest.mark.parametrize( + 'regex_pattern,expected_paths', + [ + ( + '[0-9].txt', + ['1.txt', '1/2/3.txt'], + ), + ( + '[a-z].txt', + ['a/b/c/d.txt', 'a/b/c/e.txt', 'a/b/d/e.txt'], + ), + ], +) +def test_asset_rest_regex_valid(api_client, asset_factory, version, regex_pattern, expected_paths): + paths = ('1.txt', '1/2/3.txt', 'a/b/c/d.txt', 'a/b/c/e.txt', 'a/b/d/e.txt') + for path in paths: + version.assets.add(asset_factory(path=path)) + + resp = api_client.get( + f'/api/dandisets/{version.dandiset.identifier}/versions/{version.version}/assets/', + {'regex': regex_pattern}, + ) + + assert expected_paths == [asset['path'] for asset in resp.json()['results']] + + +@pytest.mark.django_db +def test_asset_rest_regex_invalid(api_client, version): + resp = api_client.get( + f'/api/dandisets/{version.dandiset.identifier}/versions/{version.version}/assets/', + {'regex': '[[[[['}, # provide an invalid regex + ) + + assert resp.status_code == 400 + + +@pytest.mark.django_db +def test_asset_rest_glob_regex_together(api_client, version): + """Test that including both a glob and regex returns a 400 error.""" + resp = api_client.get( + f'/api/dandisets/{version.dandiset.identifier}/versions/{version.version}/assets/', + {'regex': '[0-9].txt', 'glob': '*.txt'}, + ) + + assert resp.status_code == 400 + assert resp.json() == {'glob': ['Cannot specify both glob and regex']} diff --git a/dandiapi/api/views/asset.py b/dandiapi/api/views/asset.py index 3cc821620..ced14f280 100644 --- a/dandiapi/api/views/asset.py +++ b/dandiapi/api/views/asset.py @@ -1,3 +1,7 @@ +from __future__ import annotations + +import re + try: from storages.backends.s3boto3 import S3Boto3Storage except ImportError: @@ -41,6 +45,7 @@ ) from dandiapi.api.views.serializers import ( AssetDetailSerializer, + AssetListSerializer, AssetPathsQueryParameterSerializer, AssetPathsResponseSerializer, AssetSerializer, @@ -505,6 +510,39 @@ def destroy(self, request, versions__dandiset__pk, versions__version, **kwargs): return Response(None, status=status.HTTP_204_NO_CONTENT) + @swagger_auto_schema(query_serializer=AssetListSerializer) + def list(self, request, *args, **kwargs): + serializer = AssetListSerializer(data=request.query_params) + serializer.is_valid(raise_exception=True) + + queryset = self.filter_queryset(self.get_queryset()) + glob_pattern: str | None = serializer.validated_data.get('glob') + regex_pattern: str | None = serializer.validated_data.get('regex') + + if regex_pattern is not None: + try: + # Validate the regex by calling re.compile on it + re.compile(regex_pattern) + queryset = queryset.filter(path__iregex=regex_pattern) + except re.error: + return Response( + data=f'{regex_pattern} is not a valid regex pattern.', + status=status.HTTP_400_BAD_REQUEST, + ) + + if glob_pattern is not None: + queryset = queryset.filter( + path__iregex=glob_pattern.replace('*', '.*').replace('.', '\\.') + ) + + page = self.paginate_queryset(queryset) + if page is not None: + serializer = self.get_serializer(page, many=True) + return self.get_paginated_response(serializer.data) + + serializer = self.get_serializer(queryset, many=True) + return Response(serializer.data) + @swagger_auto_schema( manual_parameters=[PATH_PREFIX_PARAM], responses={200: AssetPathsResponseSerializer()}, diff --git a/dandiapi/api/views/serializers.py b/dandiapi/api/views/serializers.py index 5affd7265..46cb9a0ca 100644 --- a/dandiapi/api/views/serializers.py +++ b/dandiapi/api/views/serializers.py @@ -197,3 +197,15 @@ class AssetPathsQueryParameterSerializer(serializers.Serializer): path_prefix = serializers.CharField(default='') page = serializers.IntegerField(default=1) page_size = serializers.IntegerField(default=DandiPagination.page_size) + + +class AssetListSerializer(serializers.Serializer): + glob = serializers.CharField(required=False) + regex = serializers.CharField(required=False) + + def validate(self, attrs): + if 'glob' in attrs and 'regex' in attrs: + raise serializers.ValidationError( + {'glob': 'Cannot specify both glob and regex'}, + ) + return super().validate(attrs)