From e6f00e656a91ca20279935d596cdaed2149a5dc3 Mon Sep 17 00:00:00 2001 From: Jacob Nesbitt Date: Wed, 7 Dec 2022 12:54:37 -0500 Subject: [PATCH 1/5] Use flat file listing in zarr file browser --- dandiapi/urls.py | 7 +- dandiapi/zarr/views/__init__.py | 223 ++++++++++++++------------------ 2 files changed, 100 insertions(+), 130 deletions(-) diff --git a/dandiapi/urls.py b/dandiapi/urls.py index d3a3dbe6d..40211c84d 100644 --- a/dandiapi/urls.py +++ b/dandiapi/urls.py @@ -26,7 +26,7 @@ users_me_view, users_search_view, ) -from dandiapi.zarr.views import ZarrViewSet, explore_zarr_archive +from dandiapi.zarr.views import ZarrViewSet router = ExtendedSimpleRouter() ( @@ -97,11 +97,6 @@ def to_url(self, value): re_path( r'^api/users/questionnaire-form/$', user_questionnaire_form_view, name='user-questionnaire' ), - re_path( - r'^api/zarr/(?P[0-9a-f\-]{36}).zarr/(?P.*)?$', - explore_zarr_archive, - name='zarr-explore', - ), path('accounts/', include('allauth.urls')), path('admin/', admin.site.urls), path('dashboard/', DashboardView.as_view(), name='dashboard-index'), diff --git a/dandiapi/zarr/views/__init__.py b/dandiapi/zarr/views/__init__.py index 97f307388..fec1b83a7 100644 --- a/dandiapi/zarr/views/__init__.py +++ b/dandiapi/zarr/views/__init__.py @@ -3,24 +3,21 @@ import logging from pathlib import Path -from django.conf import settings from django.db import IntegrityError, transaction from django.db.models import Exists, OuterRef from django.db.models.query import QuerySet -from django.http.response import HttpResponseRedirect -from django.urls import reverse -from drf_yasg import openapi +from django.http import HttpResponseRedirect from drf_yasg.utils import no_body, swagger_auto_schema from rest_framework import serializers, status -from rest_framework.decorators import action, api_view +from rest_framework.decorators import action from rest_framework.exceptions import PermissionDenied, ValidationError from rest_framework.generics import get_object_or_404 from rest_framework.response import Response from rest_framework.viewsets import ReadOnlyModelViewSet from dandiapi.api.models.dandiset import Dandiset +from dandiapi.api.storage import get_boto_client from dandiapi.api.views.common import DandiPagination -from dandiapi.zarr.checksums import ZarrChecksumFileUpdater from dandiapi.zarr.models import ZarrArchive, ZarrArchiveStatus, ZarrUploadFile from dandiapi.zarr.tasks import cancel_zarr_upload, ingest_zarr_archive @@ -97,13 +94,11 @@ class Meta: upload_in_progress = serializers.BooleanField(source='uploads_exist') -class ZarrExploreSerializer(serializers.Serializer): - directories = serializers.ListField(child=serializers.URLField()) - files = serializers.ListField(child=serializers.URLField()) - checksums = serializers.DictField( - child=serializers.RegexField('^[0-9a-f]{32}(-[0-9]+--[0-9]+)?$') - ) - checksum = serializers.RegexField('^[0-9a-f]{32}-[0-9]+--[0-9]+$') +class ZarrExploreQuerySerializer(serializers.Serializer): + after = serializers.CharField(default='') + prefix = serializers.CharField(default='') + limit = serializers.IntegerField(default=1000) + download = serializers.BooleanField(default=False) class ZarrListQuerySerializer(serializers.Serializer): @@ -252,38 +247,6 @@ def upload_cancel(self, request, zarr_id): cancel_zarr_upload.delay(zarr_id) return Response(None, status=status.HTTP_204_NO_CONTENT) - @swagger_auto_schema( - method='DELETE', - request_body=ZarrDeleteFileRequestSerializer(many=True), - responses={ - 200: ZarrSerializer(many=True), - 400: ZarrArchive.INGEST_ERROR_MSG, - }, - operation_summary='Delete files from a zarr archive.', - operation_description='', - ) - @action(methods=['DELETE'], url_path='files', detail=True) - def delete_files(self, request, zarr_id): - """Delete files from a zarr archive.""" - queryset = self.get_queryset().select_for_update() - with transaction.atomic(): - zarr_archive: ZarrArchive = get_object_or_404(queryset, zarr_id=zarr_id) - if zarr_archive.status == ZarrArchiveStatus.INGESTING: - return Response(ZarrArchive.INGEST_ERROR_MSG, status=status.HTTP_400_BAD_REQUEST) - - if not self.request.user.has_perm('owner', zarr_archive.dandiset): - # The user does not have ownership permission - raise PermissionDenied() - serializer = ZarrDeleteFileRequestSerializer(data=request.data, many=True) - serializer.is_valid(raise_exception=True) - paths = [file['path'] for file in serializer.validated_data] - zarr_archive.delete_files(paths) - - # Save any zarr assets to trigger metadata updates - for asset in zarr_archive.assets.all(): - asset.save() - return Response(None, status=status.HTTP_204_NO_CONTENT) - @swagger_auto_schema( method='POST', request_body=no_body, @@ -316,85 +279,97 @@ def ingest(self, request, zarr_id): return Response(None, status=status.HTTP_204_NO_CONTENT) - -@swagger_auto_schema( - method='GET', - responses={ - 200: ZarrExploreSerializer(), - 302: 'Redirect to an object in S3', - }, - manual_parameters=[ - openapi.Parameter( - 'path', - openapi.IN_PATH, - 'a file or directory path within the zarr file.', - type=openapi.TYPE_STRING, - required=True, - pattern=r'.*', - ) - ], - operation_id='zarr_content_read', -) -@api_view(['GET', 'HEAD']) -def explore_zarr_archive(request, zarr_id: str, path: str): - """ - Get information about files in a zarr archive. - - If the path ends with /, it is assumed to be a directory and metadata about the directory is returned. - If the path does not end with /, it is assumed to be a file and a redirect to that file in S3 is returned. - HEAD requests are all assumed to be files and will return redirects to that file in S3. - - This API is compatible with https://filesystem-spec.readthedocs.io/en/latest/api.html#fsspec.implementations.http.HTTPFileSystem. - """ # noqa: E501 - zarr_archive = get_object_or_404(ZarrArchive, zarr_id=zarr_id) - if path == '': - path = '/' - if request.method == 'HEAD': - # We cannot use storage.url because that presigns a GET request. - # Instead, we need to presign the HEAD request using the storage-appropriate client. - storage = ZarrUploadFile.blob.field.storage - url = storage.generate_presigned_head_object_url(zarr_archive.s3_path(path)) - return HttpResponseRedirect(url) - # If a path ends with a /, assume it is a directory. - # Return a JSON blob which contains URLs to the contents of the directory. - if path.endswith('/'): - # Strip off the trailing /, it confuses the ZarrChecksumFileUpdater - path = path.rstrip('/') - # We use the .checksum file to determine the directory contents, since S3 cannot. - listing = ZarrChecksumFileUpdater( - zarr_archive=zarr_archive, zarr_directory_path=path - ).read_checksum_file() - if listing is None: - return Response(status=status.HTTP_404_NOT_FOUND) - directories = [ - settings.DANDI_API_URL - + reverse('zarr-explore', args=[zarr_id, str(Path(path) / directory.name)]) - + '/' - for directory in listing.checksums.directories - ] - files = [ - settings.DANDI_API_URL - + reverse('zarr-explore', args=[zarr_id, str(Path(path) / file.name)]) - for file in listing.checksums.files - ] - checksums = { - **{directory.name: directory.digest for directory in listing.checksums.directories}, - **{file.name: file.digest for file in listing.checksums.files}, - } - serializer = ZarrExploreSerializer( - data={ - 'directories': directories, - 'files': files, - 'checksums': checksums, - 'checksum': listing.digest, - } - ) + @swagger_auto_schema( + method='GET', + responses={ + 200: 'Listing of s3 objects', + 302: 'Redirect to an object in S3', + }, + query_serializer=ZarrExploreQuerySerializer(), + ) + @action(methods=['HEAD', 'GET'], detail=True) + def files(self, request, zarr_id: str): + """List files in a zarr archive.""" + zarr_archive = get_object_or_404(ZarrArchive, zarr_id=zarr_id) + serializer = ZarrExploreQuerySerializer(data=request.query_params) serializer.is_valid(raise_exception=True) - return Response(serializer.data) - else: - # The path did not end in a /, so it was a file. - # Redirect to a presigned S3 URL. + + # The root path for this zarr in s3 + base_path = Path(zarr_archive.s3_path('')) + + # Retrieve and join query params + limit = serializer.validated_data['limit'] + download = serializer.validated_data['download'] + + raw_prefix = serializer.validated_data['prefix'] + full_prefix = str(base_path / raw_prefix) + + _after = serializer.validated_data['after'] + after = str(base_path / _after) if _after else '' + + # Handle head request redirects + if request.method == 'HEAD': + # We cannot use storage.url because that presigns a GET request. + # Instead, we need to presign the HEAD request using the storage-appropriate client. + storage = ZarrUploadFile.blob.field.storage + url = storage.generate_presigned_head_object_url(full_prefix) + return HttpResponseRedirect(url) + + # Return a redirect to the file, if requested # S3 will 404 if the file does not exist. - return HttpResponseRedirect( - ZarrUploadFile.blob.field.storage.url(zarr_archive.s3_path(path)) + if download: + return HttpResponseRedirect( + ZarrUploadFile.blob.field.storage.url(zarr_archive.s3_path(raw_prefix)) + ) + + # Retrieve file listing + client = get_boto_client() + listing = client.list_objects_v2( + Bucket=zarr_archive.storage.bucket_name, + Prefix=full_prefix, + StartAfter=after, + MaxKeys=limit, ) + + # Map/filter listing + results = [ + { + 'Key': str(Path(obj['Key']).relative_to(base_path)), + 'LastModified': obj['LastModified'], + 'ETag': obj['ETag'].strip('"'), + 'Size': obj['Size'], + } + for obj in listing.get('Contents', []) + ] + + return Response(results) + + @swagger_auto_schema( + request_body=ZarrDeleteFileRequestSerializer(many=True), + responses={ + 200: ZarrSerializer(many=True), + 400: ZarrArchive.INGEST_ERROR_MSG, + }, + operation_summary='Delete files from a zarr archive.', + ) + @files.mapping.delete + def delete_files(self, request, zarr_id): + """Delete files from a zarr archive.""" + queryset = self.get_queryset().select_for_update() + with transaction.atomic(): + zarr_archive: ZarrArchive = get_object_or_404(queryset, zarr_id=zarr_id) + if zarr_archive.status == ZarrArchiveStatus.INGESTING: + return Response(ZarrArchive.INGEST_ERROR_MSG, status=status.HTTP_400_BAD_REQUEST) + + if not self.request.user.has_perm('owner', zarr_archive.dandiset): + # The user does not have ownership permission + raise PermissionDenied() + serializer = ZarrDeleteFileRequestSerializer(data=request.data, many=True) + serializer.is_valid(raise_exception=True) + paths = [file['path'] for file in serializer.validated_data] + zarr_archive.delete_files(paths) + + # Save any zarr assets to trigger metadata updates + for asset in zarr_archive.assets.all(): + asset.save() + return Response(None, status=status.HTTP_204_NO_CONTENT) From 2734955697482fcfe174de3b3a6e14f2889a31b2 Mon Sep 17 00:00:00 2001 From: Jacob Nesbitt Date: Wed, 7 Dec 2022 13:31:15 -0500 Subject: [PATCH 2/5] Update zarr file listing tests --- dandiapi/zarr/tests/test_zarr.py | 147 +++++++++---------------------- 1 file changed, 41 insertions(+), 106 deletions(-) diff --git a/dandiapi/zarr/tests/test_zarr.py b/dandiapi/zarr/tests/test_zarr.py index 99ebb4c03..b17aadf79 100644 --- a/dandiapi/zarr/tests/test_zarr.py +++ b/dandiapi/zarr/tests/test_zarr.py @@ -6,7 +6,7 @@ from dandiapi.api.models.dandiset import Dandiset from dandiapi.api.tests.fuzzy import UUID_RE -from dandiapi.zarr.checksums import ZarrChecksumFileUpdater, ZarrChecksumUpdater +from dandiapi.zarr.checksums import ZarrChecksumFileUpdater from dandiapi.zarr.models import ZarrArchive, ZarrArchiveStatus, ZarrUploadFile from dandiapi.zarr.tasks import ingest_zarr_archive @@ -413,131 +413,66 @@ def test_zarr_rest_delete_upload_in_progress( assert resp.json() == ['Cannot delete files while an upload is in progress.'] -@pytest.mark.parametrize( - ('path', 'directories', 'files'), - [ - ('', ['foo/'], []), - ('foo/', ['foo/bar/'], ['foo/a', 'foo/b']), - ('foo/bar/', [], ['foo/bar/c']), - ], - ids=[ - '/', - 'foo/', - 'foo/bar/', - ], -) @pytest.mark.django_db -def test_zarr_explore_directory( - api_client, - storage, - zarr_archive: ZarrArchive, - zarr_upload_file_factory, - path, - directories, - files, -): +def test_zarr_file_list(api_client, storage, zarr_archive: ZarrArchive, zarr_upload_file_factory): # Pretend like ZarrUploadFile was defined with the given storage ZarrUploadFile.blob.field.storage = storage - a: ZarrUploadFile = zarr_upload_file_factory(zarr_archive=zarr_archive, path='a') - b: ZarrUploadFile = zarr_upload_file_factory(zarr_archive=zarr_archive, path='b') - c: ZarrUploadFile = zarr_upload_file_factory(zarr_archive=zarr_archive, path='c') - # Write the checksum files - ZarrChecksumUpdater(zarr_archive).update_file_checksums( - { - 'foo/a': a.to_checksum(), - 'foo/b': b.to_checksum(), - 'foo/bar/c': c.to_checksum(), - } - ) - listing = ZarrChecksumFileUpdater(zarr_archive, path).read_checksum_file() + files = [ + 'foo/bar/a.txt', + 'foo/bar/b.txt', + 'foo/baz.txt', + # These two files are here just to demonstrate filtering + 'bar/a.txt', + 'bar/b.txt', + ] + for file in files: + zarr_upload_file_factory(zarr_archive=zarr_archive, path=file) - resp = api_client.get(f'/api/zarr/{zarr_archive.zarr_id}.zarr/{path}') - assert resp.status_code == 200 - assert resp.json() == { - 'directories': [ - f'http://localhost:8000/api/zarr/{zarr_archive.zarr_id}.zarr/{dirpath}' - for dirpath in directories - ], - 'files': [ - f'http://localhost:8000/api/zarr/{zarr_archive.zarr_id}.zarr/{filepath}' - for filepath in files - ], - 'checksums': { - **{directory.name: directory.digest for directory in listing.checksums.directories}, - **{file.name: file.digest for file in listing.checksums.files}, - }, - 'checksum': listing.digest, - } + # Check base listing + resp = api_client.get(f'/api/zarr/{zarr_archive.zarr_id}/files/') + assert [x['Key'] for x in resp.json()] == sorted(files) + # Check that prefix query param works as expected + resp = api_client.get( + f'/api/zarr/{zarr_archive.zarr_id}/files/', + {'prefix': 'foo/'}, + ) + assert [x['Key'] for x in resp.json()] == ['foo/bar/a.txt', 'foo/bar/b.txt', 'foo/baz.txt'] -@pytest.mark.django_db -def test_zarr_explore_directory_does_not_exist( - api_client, - storage, - zarr_archive: ZarrArchive, -): - # Pretend like ZarrUploadFile was defined with the given storage - ZarrUploadFile.blob.field.storage = storage + # Check that prefix and after work together resp = api_client.get( - f'/api/zarr/{zarr_archive.zarr_id}.zarr/does/not/exist/', + f'/api/zarr/{zarr_archive.zarr_id}/files/', + {'prefix': 'foo/', 'after': 'foo/bar/a.txt'}, ) - assert resp.status_code == 404 - - -@pytest.mark.parametrize( - 'path', - [ - 'foo', - 'foo/a', - 'foo/b', - 'foo/bar/c', - 'gibberish', - ], -) -@pytest.mark.django_db -def test_zarr_explore_file( - api_client, - storage, - zarr_archive: ZarrArchive, - path, -): - # Pretend like ZarrUploadFile was defined with the given storage - ZarrUploadFile.blob.field.storage = storage + assert [x['Key'] for x in resp.json()] == ['foo/bar/b.txt', 'foo/baz.txt'] + + # Use limit query param resp = api_client.get( - f'/api/zarr/{zarr_archive.zarr_id}.zarr/{path}', + f'/api/zarr/{zarr_archive.zarr_id}/files/', + {'prefix': 'foo/', 'after': 'foo/bar/a.txt', 'limit': 1}, + ) + assert [x['Key'] for x in resp.json()] == ['foo/bar/b.txt'] + + # Check download flag + resp = api_client.get( + f'/api/zarr/{zarr_archive.zarr_id}/files/', + {'prefix': 'foo/bar/a.txt', 'download': True}, ) assert resp.status_code == 302 assert resp.headers['Location'].startswith( - f'http://{settings.MINIO_STORAGE_ENDPOINT}/test-dandiapi-dandisets/test-prefix/test-zarr/{zarr_archive.zarr_id}/{path}?' # noqa: E501 + f'http://{settings.MINIO_STORAGE_ENDPOINT}/test-dandiapi-dandisets/test-prefix/test-zarr/{zarr_archive.zarr_id}/foo/bar/a.txt?' # noqa: E501 ) -@pytest.mark.parametrize( - 'path', - [ - 'foo', - 'foo/a', - 'foo/b', - 'foo/bar/c', - 'gibberish', - 'gibberish/', - ], -) @pytest.mark.django_db -def test_zarr_explore_head( - api_client, - storage, - zarr_archive: ZarrArchive, - path, -): +def test_zarr_explore_head(api_client, storage, zarr_archive: ZarrArchive): # Pretend like ZarrUploadFile was defined with the given storage ZarrUploadFile.blob.field.storage = storage - resp = api_client.head( - f'/api/zarr/{zarr_archive.zarr_id}.zarr/{path}', - ) + filepath = 'foo/bar.txt' + resp = api_client.head(f'/api/zarr/{zarr_archive.zarr_id}/files/', {'prefix': filepath}) assert resp.status_code == 302 assert resp.headers['Location'].startswith( - f'http://{settings.MINIO_STORAGE_ENDPOINT}/test-dandiapi-dandisets/test-prefix/test-zarr/{zarr_archive.zarr_id}/{path}?' # noqa: E501 + f'http://{settings.MINIO_STORAGE_ENDPOINT}/test-dandiapi-dandisets/test-prefix/test-zarr/{zarr_archive.zarr_id}/{filepath}?' # noqa: E501 ) From 3f677fa1d4d5903a2a0df46479da58e0ec4f96e2 Mon Sep 17 00:00:00 2001 From: Jacob Nesbitt Date: Wed, 7 Dec 2022 17:19:27 -0500 Subject: [PATCH 3/5] Raise error in asset download endpoint if zarr --- dandiapi/api/tests/test_asset.py | 10 ++-------- dandiapi/api/views/asset.py | 13 ++++++++----- 2 files changed, 10 insertions(+), 13 deletions(-) diff --git a/dandiapi/api/tests/test_asset.py b/dandiapi/api/tests/test_asset.py index aa55fbf4f..ee5ceac85 100644 --- a/dandiapi/api/tests/test_asset.py +++ b/dandiapi/api/tests/test_asset.py @@ -1246,10 +1246,7 @@ def test_asset_download_zarr(api_client, version, asset_factory, zarr_archive): f'/api/dandisets/{version.dandiset.identifier}/' f'versions/{version.version}/assets/{asset.asset_id}/download/' ) - - assert response.status_code == 302 - download_url = response.get('Location') - assert download_url == f'/api/zarr/{zarr_archive.zarr_id}.zarr/' + assert response.status_code == 400 @pytest.mark.django_db @@ -1281,10 +1278,7 @@ def test_asset_direct_download_zarr(api_client, version, asset_factory, zarr_arc version.assets.add(asset) response = api_client.get(f'/api/assets/{asset.asset_id}/download/') - - assert response.status_code == 302 - download_url = response.get('Location') - assert download_url == f'/api/zarr/{zarr_archive.zarr_id}.zarr/' + assert response.status_code == 400 @pytest.mark.django_db diff --git a/dandiapi/api/views/asset.py b/dandiapi/api/views/asset.py index d6dd5f099..b624e60ce 100644 --- a/dandiapi/api/views/asset.py +++ b/dandiapi/api/views/asset.py @@ -27,7 +27,6 @@ from django.conf import settings from django.db.models import QuerySet from django.http import HttpResponseRedirect -from django.urls import reverse from django.utils.decorators import method_decorator from django_filters import rest_framework as filters from drf_yasg.utils import swagger_auto_schema @@ -121,12 +120,16 @@ def retrieve(self, request, **kwargs): def download(self, *args, **kwargs): asset = self.get_object() - # Assign asset blob or redirect if zarr + # Raise error if zarr if asset.is_zarr: - return HttpResponseRedirect( - reverse('zarr-explore', kwargs={'zarr_id': asset.zarr.zarr_id, 'path': ''}) + return Response( + 'Unable to provide download link for zarr assets.' + ' Please browse the zarr files directly to do so.', + status=status.HTTP_400_BAD_REQUEST, ) - elif asset.is_blob: + + # Assign asset_blob + if asset.is_blob: asset_blob = asset.blob elif asset.is_embargoed_blob: asset_blob = asset.embargoed_blob From 3dea2462042f6dd48ba1d8a50b5d136f95bd3e8f Mon Sep 17 00:00:00 2001 From: Jacob Nesbitt Date: Mon, 12 Dec 2022 17:22:57 -0500 Subject: [PATCH 4/5] Set min/max value for limit param --- dandiapi/zarr/views/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dandiapi/zarr/views/__init__.py b/dandiapi/zarr/views/__init__.py index fec1b83a7..87e7dda19 100644 --- a/dandiapi/zarr/views/__init__.py +++ b/dandiapi/zarr/views/__init__.py @@ -97,7 +97,7 @@ class Meta: class ZarrExploreQuerySerializer(serializers.Serializer): after = serializers.CharField(default='') prefix = serializers.CharField(default='') - limit = serializers.IntegerField(default=1000) + limit = serializers.IntegerField(min_value=0, max_value=1000, default=1000) download = serializers.BooleanField(default=False) From 7301c6488cff151e9b464d9e05c47c3b5b0277cd Mon Sep 17 00:00:00 2001 From: Jacob Nesbitt Date: Mon, 12 Dec 2022 17:43:15 -0500 Subject: [PATCH 5/5] Add next field to zarr file listing endpoint --- dandiapi/zarr/tests/test_zarr.py | 12 +++++++---- dandiapi/zarr/views/__init__.py | 34 ++++++++++++++++++++++++++++---- 2 files changed, 38 insertions(+), 8 deletions(-) diff --git a/dandiapi/zarr/tests/test_zarr.py b/dandiapi/zarr/tests/test_zarr.py index b17aadf79..3e3987bb0 100644 --- a/dandiapi/zarr/tests/test_zarr.py +++ b/dandiapi/zarr/tests/test_zarr.py @@ -431,28 +431,32 @@ def test_zarr_file_list(api_client, storage, zarr_archive: ZarrArchive, zarr_upl # Check base listing resp = api_client.get(f'/api/zarr/{zarr_archive.zarr_id}/files/') - assert [x['Key'] for x in resp.json()] == sorted(files) + assert [x['Key'] for x in resp.json()['results']] == sorted(files) # Check that prefix query param works as expected resp = api_client.get( f'/api/zarr/{zarr_archive.zarr_id}/files/', {'prefix': 'foo/'}, ) - assert [x['Key'] for x in resp.json()] == ['foo/bar/a.txt', 'foo/bar/b.txt', 'foo/baz.txt'] + assert [x['Key'] for x in resp.json()['results']] == [ + 'foo/bar/a.txt', + 'foo/bar/b.txt', + 'foo/baz.txt', + ] # Check that prefix and after work together resp = api_client.get( f'/api/zarr/{zarr_archive.zarr_id}/files/', {'prefix': 'foo/', 'after': 'foo/bar/a.txt'}, ) - assert [x['Key'] for x in resp.json()] == ['foo/bar/b.txt', 'foo/baz.txt'] + assert [x['Key'] for x in resp.json()['results']] == ['foo/bar/b.txt', 'foo/baz.txt'] # Use limit query param resp = api_client.get( f'/api/zarr/{zarr_archive.zarr_id}/files/', {'prefix': 'foo/', 'after': 'foo/bar/a.txt', 'limit': 1}, ) - assert [x['Key'] for x in resp.json()] == ['foo/bar/b.txt'] + assert [x['Key'] for x in resp.json()['results']] == ['foo/bar/b.txt'] # Check download flag resp = api_client.get( diff --git a/dandiapi/zarr/views/__init__.py b/dandiapi/zarr/views/__init__.py index 87e7dda19..81e1ece23 100644 --- a/dandiapi/zarr/views/__init__.py +++ b/dandiapi/zarr/views/__init__.py @@ -13,6 +13,7 @@ from rest_framework.exceptions import PermissionDenied, ValidationError from rest_framework.generics import get_object_or_404 from rest_framework.response import Response +from rest_framework.utils.urls import replace_query_param from rest_framework.viewsets import ReadOnlyModelViewSet from dandiapi.api.models.dandiset import Dandiset @@ -94,13 +95,24 @@ class Meta: upload_in_progress = serializers.BooleanField(source='uploads_exist') -class ZarrExploreQuerySerializer(serializers.Serializer): +class ZarrExploreInputSerializer(serializers.Serializer): after = serializers.CharField(default='') prefix = serializers.CharField(default='') limit = serializers.IntegerField(min_value=0, max_value=1000, default=1000) download = serializers.BooleanField(default=False) +class ZarrExploreOutputSerializer(serializers.Serializer): + class ResultsSerializer(serializers.Serializer): + Key = serializers.CharField() + LastModified = serializers.CharField() + ETag = serializers.CharField() + Size = serializers.IntegerField(min_value=0) + + next = serializers.CharField(default=None) + results = serializers.ListField(child=ResultsSerializer()) + + class ZarrListQuerySerializer(serializers.Serializer): dandiset = serializers.RegexField(Dandiset.IDENTIFIER_REGEX, required=False) name = serializers.CharField(required=False) @@ -285,13 +297,13 @@ def ingest(self, request, zarr_id): 200: 'Listing of s3 objects', 302: 'Redirect to an object in S3', }, - query_serializer=ZarrExploreQuerySerializer(), + query_serializer=ZarrExploreInputSerializer(), ) @action(methods=['HEAD', 'GET'], detail=True) def files(self, request, zarr_id: str): """List files in a zarr archive.""" zarr_archive = get_object_or_404(ZarrArchive, zarr_id=zarr_id) - serializer = ZarrExploreQuerySerializer(data=request.query_params) + serializer = ZarrExploreInputSerializer(data=request.query_params) serializer.is_valid(raise_exception=True) # The root path for this zarr in s3 @@ -342,7 +354,21 @@ def files(self, request, zarr_id: str): for obj in listing.get('Contents', []) ] - return Response(results) + # Create next listing if necessary + next_link = None + if listing['IsTruncated']: + url = self.request.build_absolute_uri() + next_link = replace_query_param(url, 'after', results[-1]['Key']) + + # Construct serializer and return + return Response( + ZarrExploreOutputSerializer( + instance={ + 'next': next_link, + 'results': results, + } + ).data + ) @swagger_auto_schema( request_body=ZarrDeleteFileRequestSerializer(many=True),