Skip to content

Commit

Permalink
Merge pull request #1022 from dandi/asset-glob-param
Browse files Browse the repository at this point in the history
Add support for glob/regex filtering on `NestedAsset` list endpoint
  • Loading branch information
mvandenburgh committed Apr 19, 2022
2 parents 99bb491 + fa2d3f4 commit a7f19f1
Show file tree
Hide file tree
Showing 3 changed files with 127 additions and 0 deletions.
77 changes: 77 additions & 0 deletions dandiapi/api/tests/test_asset.py
Original file line number Diff line number Diff line change
Expand Up @@ -1240,3 +1240,80 @@ def test_asset_direct_info(api_client, asset):
'created': TIMESTAMP_RE,
'modified': TIMESTAMP_RE,
}


@pytest.mark.django_db
@pytest.mark.parametrize(
'glob_pattern,expected_paths',
[
(
'*.txt',
['a/b.txt', 'a/b/c.txt', 'a/b/c/d.txt', 'a/b/c/e.txt', 'a/b/d/e.txt'],
),
(
'a/b/c/*',
['a/b/c/d.txt', 'a/b/c/e.txt'],
),
('a/b/d/*', ['a/b/d/e.txt']),
],
)
def test_asset_rest_glob(api_client, asset_factory, version, glob_pattern, expected_paths):
paths = ('a/b.txt', 'a/b/c.txt', 'a/b/c/d.txt', 'a/b/c/e.txt', 'a/b/d/e.txt')
for path in paths:
version.assets.add(asset_factory(path=path))

resp = api_client.get(
f'/api/dandisets/{version.dandiset.identifier}/versions/{version.version}/assets/',
{'glob': glob_pattern},
)

assert expected_paths == [asset['path'] for asset in resp.json()['results']]


@pytest.mark.django_db
@pytest.mark.parametrize(
'regex_pattern,expected_paths',
[
(
'[0-9].txt',
['1.txt', '1/2/3.txt'],
),
(
'[a-z].txt',
['a/b/c/d.txt', 'a/b/c/e.txt', 'a/b/d/e.txt'],
),
],
)
def test_asset_rest_regex_valid(api_client, asset_factory, version, regex_pattern, expected_paths):
paths = ('1.txt', '1/2/3.txt', 'a/b/c/d.txt', 'a/b/c/e.txt', 'a/b/d/e.txt')
for path in paths:
version.assets.add(asset_factory(path=path))

resp = api_client.get(
f'/api/dandisets/{version.dandiset.identifier}/versions/{version.version}/assets/',
{'regex': regex_pattern},
)

assert expected_paths == [asset['path'] for asset in resp.json()['results']]


@pytest.mark.django_db
def test_asset_rest_regex_invalid(api_client, version):
resp = api_client.get(
f'/api/dandisets/{version.dandiset.identifier}/versions/{version.version}/assets/',
{'regex': '[[[[['}, # provide an invalid regex
)

assert resp.status_code == 400


@pytest.mark.django_db
def test_asset_rest_glob_regex_together(api_client, version):
"""Test that including both a glob and regex returns a 400 error."""
resp = api_client.get(
f'/api/dandisets/{version.dandiset.identifier}/versions/{version.version}/assets/',
{'regex': '[0-9].txt', 'glob': '*.txt'},
)

assert resp.status_code == 400
assert resp.json() == {'glob': ['Cannot specify both glob and regex']}
38 changes: 38 additions & 0 deletions dandiapi/api/views/asset.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
from __future__ import annotations

import re

try:
from storages.backends.s3boto3 import S3Boto3Storage
except ImportError:
Expand Down Expand Up @@ -41,6 +45,7 @@
)
from dandiapi.api.views.serializers import (
AssetDetailSerializer,
AssetListSerializer,
AssetPathsQueryParameterSerializer,
AssetPathsResponseSerializer,
AssetSerializer,
Expand Down Expand Up @@ -505,6 +510,39 @@ def destroy(self, request, versions__dandiset__pk, versions__version, **kwargs):

return Response(None, status=status.HTTP_204_NO_CONTENT)

@swagger_auto_schema(query_serializer=AssetListSerializer)
def list(self, request, *args, **kwargs):
serializer = AssetListSerializer(data=request.query_params)
serializer.is_valid(raise_exception=True)

queryset = self.filter_queryset(self.get_queryset())
glob_pattern: str | None = serializer.validated_data.get('glob')
regex_pattern: str | None = serializer.validated_data.get('regex')

if regex_pattern is not None:
try:
# Validate the regex by calling re.compile on it
re.compile(regex_pattern)
queryset = queryset.filter(path__iregex=regex_pattern)
except re.error:
return Response(
data=f'{regex_pattern} is not a valid regex pattern.',
status=status.HTTP_400_BAD_REQUEST,
)

if glob_pattern is not None:
queryset = queryset.filter(
path__iregex=glob_pattern.replace('*', '.*').replace('.', '\\.')
)

page = self.paginate_queryset(queryset)
if page is not None:
serializer = self.get_serializer(page, many=True)
return self.get_paginated_response(serializer.data)

serializer = self.get_serializer(queryset, many=True)
return Response(serializer.data)

@swagger_auto_schema(
manual_parameters=[PATH_PREFIX_PARAM],
responses={200: AssetPathsResponseSerializer()},
Expand Down
12 changes: 12 additions & 0 deletions dandiapi/api/views/serializers.py
Original file line number Diff line number Diff line change
Expand Up @@ -197,3 +197,15 @@ class AssetPathsQueryParameterSerializer(serializers.Serializer):
path_prefix = serializers.CharField(default='')
page = serializers.IntegerField(default=1)
page_size = serializers.IntegerField(default=DandiPagination.page_size)


class AssetListSerializer(serializers.Serializer):
glob = serializers.CharField(required=False)
regex = serializers.CharField(required=False)

def validate(self, attrs):
if 'glob' in attrs and 'regex' in attrs:
raise serializers.ValidationError(
{'glob': 'Cannot specify both glob and regex'},
)
return super().validate(attrs)

0 comments on commit a7f19f1

Please sign in to comment.