From a43d5977d1436f3675797b459b1bedb725f0f52a Mon Sep 17 00:00:00 2001 From: Mike VanDenburgh Date: Tue, 12 Apr 2022 13:03:16 -0400 Subject: [PATCH 1/6] Accept `glob` parameter on `NestedAsset` list endpoint --- dandiapi/api/views/asset.py | 22 ++++++++++++++++++++++ dandiapi/api/views/common.py | 7 +++++++ 2 files changed, 29 insertions(+) diff --git a/dandiapi/api/views/asset.py b/dandiapi/api/views/asset.py index 3cc821620..4a5bf84cb 100644 --- a/dandiapi/api/views/asset.py +++ b/dandiapi/api/views/asset.py @@ -1,3 +1,5 @@ +from __future__ import annotations + try: from storages.backends.s3boto3 import S3Boto3Storage except ImportError: @@ -33,6 +35,7 @@ from dandiapi.api.models.asset import BaseAssetBlob, EmbargoedAssetBlob from dandiapi.api.tasks import validate_asset_metadata from dandiapi.api.views.common import ( + ASSET_GLOB_PARAM, ASSET_ID_PARAM, PATH_PREFIX_PARAM, VERSIONS_DANDISET_PK_PARAM, @@ -505,6 +508,25 @@ def destroy(self, request, versions__dandiset__pk, versions__version, **kwargs): return Response(None, status=status.HTTP_204_NO_CONTENT) + @swagger_auto_schema(manual_parameters=[ASSET_GLOB_PARAM]) + def list(self, request, *args, **kwargs): + queryset = self.filter_queryset(self.get_queryset()) + + glob_pattern: str | None = self.request.query_params.get('glob') + + if glob_pattern is not None: + queryset = queryset.filter( + path__iregex=glob_pattern.replace("*", ".*").replace(".", "\\.") + ) + + page = self.paginate_queryset(queryset) + if page is not None: + serializer = self.get_serializer(page, many=True) + return self.get_paginated_response(serializer.data) + + serializer = self.get_serializer(queryset, many=True) + return Response(serializer.data) + @swagger_auto_schema( manual_parameters=[PATH_PREFIX_PARAM], responses={200: AssetPathsResponseSerializer()}, diff --git a/dandiapi/api/views/common.py b/dandiapi/api/views/common.py index dc520d5ab..78655c225 100644 --- a/dandiapi/api/views/common.py +++ b/dandiapi/api/views/common.py @@ -33,6 +33,13 @@ class DandiPagination(PageNumberPagination): PATH_PREFIX_PARAM = openapi.Parameter('path_prefix', openapi.IN_QUERY, type=openapi.TYPE_STRING) +ASSET_GLOB_PARAM = openapi.Parameter( + 'glob', + openapi.IN_QUERY, + type=openapi.TYPE_STRING, + description='Glob pattern to filter asset paths with.', +) + VERSION_PARAM = openapi.Parameter( 'version', openapi.IN_PATH, From 60f6fee15bb8890b7a3cd1a46d68bab276543754 Mon Sep 17 00:00:00 2001 From: Mike VanDenburgh Date: Tue, 12 Apr 2022 13:03:41 -0400 Subject: [PATCH 2/6] Add test for glob parameter --- dandiapi/api/tests/test_asset.py | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/dandiapi/api/tests/test_asset.py b/dandiapi/api/tests/test_asset.py index 8bef3adb9..377321e50 100644 --- a/dandiapi/api/tests/test_asset.py +++ b/dandiapi/api/tests/test_asset.py @@ -1240,3 +1240,31 @@ def test_asset_direct_info(api_client, asset): 'created': TIMESTAMP_RE, 'modified': TIMESTAMP_RE, } + + +@pytest.mark.django_db +@pytest.mark.parametrize( + 'glob_pattern,expected_paths', + [ + ( + '*.txt', + ['a/b.txt', 'a/b/c.txt', 'a/b/c/d.txt', 'a/b/c/e.txt', 'a/b/d/e.txt'], + ), + ( + 'a/b/c/*', + ['a/b/c/d.txt', 'a/b/c/e.txt'], + ), + ('a/b/d/*', ['a/b/d/e.txt']), + ], +) +def test_asset_rest_glob(api_client, asset_factory, version, glob_pattern, expected_paths): + paths = ('a/b.txt', 'a/b/c.txt', 'a/b/c/d.txt', 'a/b/c/e.txt', 'a/b/d/e.txt') + for path in paths: + version.assets.add(asset_factory(path=path)) + + resp = api_client.get( + f'/api/dandisets/{version.dandiset.identifier}/versions/{version.version}/assets/', + {'glob': glob_pattern}, + ) + + assert expected_paths == [asset['path'] for asset in resp.json()['results']] From 026d9791149741952f0c69a6c66759a07dc0a8bf Mon Sep 17 00:00:00 2001 From: Mike VanDenburgh Date: Tue, 12 Apr 2022 13:18:55 -0400 Subject: [PATCH 3/6] Accept `regex` parameter on NestedAsset list endpoint --- dandiapi/api/views/asset.py | 17 ++++++++++++++++- dandiapi/api/views/common.py | 7 +++++++ 2 files changed, 23 insertions(+), 1 deletion(-) diff --git a/dandiapi/api/views/asset.py b/dandiapi/api/views/asset.py index 4a5bf84cb..030b3b14b 100644 --- a/dandiapi/api/views/asset.py +++ b/dandiapi/api/views/asset.py @@ -1,5 +1,7 @@ from __future__ import annotations +import re + try: from storages.backends.s3boto3 import S3Boto3Storage except ImportError: @@ -37,6 +39,7 @@ from dandiapi.api.views.common import ( ASSET_GLOB_PARAM, ASSET_ID_PARAM, + ASSET_REGEX_PARAM, PATH_PREFIX_PARAM, VERSIONS_DANDISET_PK_PARAM, VERSIONS_VERSION_PARAM, @@ -508,11 +511,23 @@ def destroy(self, request, versions__dandiset__pk, versions__version, **kwargs): return Response(None, status=status.HTTP_204_NO_CONTENT) - @swagger_auto_schema(manual_parameters=[ASSET_GLOB_PARAM]) + @swagger_auto_schema(manual_parameters=[ASSET_GLOB_PARAM, ASSET_REGEX_PARAM]) def list(self, request, *args, **kwargs): queryset = self.filter_queryset(self.get_queryset()) glob_pattern: str | None = self.request.query_params.get('glob') + regex_pattern: str | None = self.request.query_params.get('regex') + + if regex_pattern is not None: + try: + # Validate the regex by calling re.compile on it + re.compile(regex_pattern) + queryset = queryset.filter(path__iregex=regex_pattern) + except re.error: + return Response( + data=f'{regex_pattern} is not a valid regex pattern.', + status=status.HTTP_400_BAD_REQUEST, + ) if glob_pattern is not None: queryset = queryset.filter( diff --git a/dandiapi/api/views/common.py b/dandiapi/api/views/common.py index 78655c225..98fa8f426 100644 --- a/dandiapi/api/views/common.py +++ b/dandiapi/api/views/common.py @@ -40,6 +40,13 @@ class DandiPagination(PageNumberPagination): description='Glob pattern to filter asset paths with.', ) +ASSET_REGEX_PARAM = openapi.Parameter( + 'regex', + openapi.IN_QUERY, + type=openapi.TYPE_STRING, + description='Regex pattern to filter asset paths with.', +) + VERSION_PARAM = openapi.Parameter( 'version', openapi.IN_PATH, From 5af04b91e1d08947a5295d7745f9d1f6063ac2b1 Mon Sep 17 00:00:00 2001 From: Mike VanDenburgh Date: Tue, 12 Apr 2022 14:29:53 -0400 Subject: [PATCH 4/6] Add test for regex parameter --- dandiapi/api/tests/test_asset.py | 37 ++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/dandiapi/api/tests/test_asset.py b/dandiapi/api/tests/test_asset.py index 377321e50..829e389bc 100644 --- a/dandiapi/api/tests/test_asset.py +++ b/dandiapi/api/tests/test_asset.py @@ -1268,3 +1268,40 @@ def test_asset_rest_glob(api_client, asset_factory, version, glob_pattern, expec ) assert expected_paths == [asset['path'] for asset in resp.json()['results']] + + +@pytest.mark.django_db +@pytest.mark.parametrize( + 'regex_pattern,expected_paths', + [ + ( + '[0-9].txt', + ['1.txt', '1/2/3.txt'], + ), + ( + '[a-z].txt', + ['a/b/c/d.txt', 'a/b/c/e.txt', 'a/b/d/e.txt'], + ), + ], +) +def test_asset_rest_regex_valid(api_client, asset_factory, version, regex_pattern, expected_paths): + paths = ('1.txt', '1/2/3.txt', 'a/b/c/d.txt', 'a/b/c/e.txt', 'a/b/d/e.txt') + for path in paths: + version.assets.add(asset_factory(path=path)) + + resp = api_client.get( + f'/api/dandisets/{version.dandiset.identifier}/versions/{version.version}/assets/', + {'regex': regex_pattern}, + ) + + assert expected_paths == [asset['path'] for asset in resp.json()['results']] + + +@pytest.mark.django_db +def test_asset_rest_regex_invalid(api_client, version): + resp = api_client.get( + f'/api/dandisets/{version.dandiset.identifier}/versions/{version.version}/assets/', + {'regex': '[[[[['}, # provide an invalid regex + ) + + assert resp.status_code == 400 From 679f94df64aeff05a31ff3bad563e0d82770e6d2 Mon Sep 17 00:00:00 2001 From: Mike VanDenburgh Date: Thu, 14 Apr 2022 12:15:15 -0400 Subject: [PATCH 5/6] Use serializer for GET parameters --- dandiapi/api/views/asset.py | 15 ++++++++------- dandiapi/api/views/common.py | 14 -------------- dandiapi/api/views/serializers.py | 5 +++++ 3 files changed, 13 insertions(+), 21 deletions(-) diff --git a/dandiapi/api/views/asset.py b/dandiapi/api/views/asset.py index 030b3b14b..ced14f280 100644 --- a/dandiapi/api/views/asset.py +++ b/dandiapi/api/views/asset.py @@ -37,9 +37,7 @@ from dandiapi.api.models.asset import BaseAssetBlob, EmbargoedAssetBlob from dandiapi.api.tasks import validate_asset_metadata from dandiapi.api.views.common import ( - ASSET_GLOB_PARAM, ASSET_ID_PARAM, - ASSET_REGEX_PARAM, PATH_PREFIX_PARAM, VERSIONS_DANDISET_PK_PARAM, VERSIONS_VERSION_PARAM, @@ -47,6 +45,7 @@ ) from dandiapi.api.views.serializers import ( AssetDetailSerializer, + AssetListSerializer, AssetPathsQueryParameterSerializer, AssetPathsResponseSerializer, AssetSerializer, @@ -511,12 +510,14 @@ def destroy(self, request, versions__dandiset__pk, versions__version, **kwargs): return Response(None, status=status.HTTP_204_NO_CONTENT) - @swagger_auto_schema(manual_parameters=[ASSET_GLOB_PARAM, ASSET_REGEX_PARAM]) + @swagger_auto_schema(query_serializer=AssetListSerializer) def list(self, request, *args, **kwargs): - queryset = self.filter_queryset(self.get_queryset()) + serializer = AssetListSerializer(data=request.query_params) + serializer.is_valid(raise_exception=True) - glob_pattern: str | None = self.request.query_params.get('glob') - regex_pattern: str | None = self.request.query_params.get('regex') + queryset = self.filter_queryset(self.get_queryset()) + glob_pattern: str | None = serializer.validated_data.get('glob') + regex_pattern: str | None = serializer.validated_data.get('regex') if regex_pattern is not None: try: @@ -531,7 +532,7 @@ def list(self, request, *args, **kwargs): if glob_pattern is not None: queryset = queryset.filter( - path__iregex=glob_pattern.replace("*", ".*").replace(".", "\\.") + path__iregex=glob_pattern.replace('*', '.*').replace('.', '\\.') ) page = self.paginate_queryset(queryset) diff --git a/dandiapi/api/views/common.py b/dandiapi/api/views/common.py index 98fa8f426..dc520d5ab 100644 --- a/dandiapi/api/views/common.py +++ b/dandiapi/api/views/common.py @@ -33,20 +33,6 @@ class DandiPagination(PageNumberPagination): PATH_PREFIX_PARAM = openapi.Parameter('path_prefix', openapi.IN_QUERY, type=openapi.TYPE_STRING) -ASSET_GLOB_PARAM = openapi.Parameter( - 'glob', - openapi.IN_QUERY, - type=openapi.TYPE_STRING, - description='Glob pattern to filter asset paths with.', -) - -ASSET_REGEX_PARAM = openapi.Parameter( - 'regex', - openapi.IN_QUERY, - type=openapi.TYPE_STRING, - description='Regex pattern to filter asset paths with.', -) - VERSION_PARAM = openapi.Parameter( 'version', openapi.IN_PATH, diff --git a/dandiapi/api/views/serializers.py b/dandiapi/api/views/serializers.py index 5affd7265..34f61aa93 100644 --- a/dandiapi/api/views/serializers.py +++ b/dandiapi/api/views/serializers.py @@ -197,3 +197,8 @@ class AssetPathsQueryParameterSerializer(serializers.Serializer): path_prefix = serializers.CharField(default='') page = serializers.IntegerField(default=1) page_size = serializers.IntegerField(default=DandiPagination.page_size) + + +class AssetListSerializer(serializers.Serializer): + glob = serializers.CharField(required=False) + regex = serializers.CharField(required=False) From fa2d3f47f9142bde1ccf69fcbffe5ea839cd8acf Mon Sep 17 00:00:00 2001 From: Mike VanDenburgh Date: Thu, 14 Apr 2022 14:09:49 -0400 Subject: [PATCH 6/6] Error if user supplies both `glob` and `regex` params --- dandiapi/api/tests/test_asset.py | 12 ++++++++++++ dandiapi/api/views/serializers.py | 7 +++++++ 2 files changed, 19 insertions(+) diff --git a/dandiapi/api/tests/test_asset.py b/dandiapi/api/tests/test_asset.py index 829e389bc..e4bf8043a 100644 --- a/dandiapi/api/tests/test_asset.py +++ b/dandiapi/api/tests/test_asset.py @@ -1305,3 +1305,15 @@ def test_asset_rest_regex_invalid(api_client, version): ) assert resp.status_code == 400 + + +@pytest.mark.django_db +def test_asset_rest_glob_regex_together(api_client, version): + """Test that including both a glob and regex returns a 400 error.""" + resp = api_client.get( + f'/api/dandisets/{version.dandiset.identifier}/versions/{version.version}/assets/', + {'regex': '[0-9].txt', 'glob': '*.txt'}, + ) + + assert resp.status_code == 400 + assert resp.json() == {'glob': ['Cannot specify both glob and regex']} diff --git a/dandiapi/api/views/serializers.py b/dandiapi/api/views/serializers.py index 34f61aa93..46cb9a0ca 100644 --- a/dandiapi/api/views/serializers.py +++ b/dandiapi/api/views/serializers.py @@ -202,3 +202,10 @@ class AssetPathsQueryParameterSerializer(serializers.Serializer): class AssetListSerializer(serializers.Serializer): glob = serializers.CharField(required=False) regex = serializers.CharField(required=False) + + def validate(self, attrs): + if 'glob' in attrs and 'regex' in attrs: + raise serializers.ValidationError( + {'glob': 'Cannot specify both glob and regex'}, + ) + return super().validate(attrs)