diff --git a/dandiapi/api/storage.py b/dandiapi/api/storage.py index 820eda813..47c056f8e 100644 --- a/dandiapi/api/storage.py +++ b/dandiapi/api/storage.py @@ -143,13 +143,14 @@ def etag_from_blob_name(self, blob_name) -> str | None: return etag[1:-1] return etag - def generate_presigned_put_object_url(self, blob_name: str) -> str: + def generate_presigned_put_object_url(self, blob_name: str, md5: str) -> str: return self.connection.meta.client.generate_presigned_url( ClientMethod='put_object', Params={ 'Bucket': self.bucket_name, 'Key': blob_name, 'ACL': 'bucket-owner-full-control', + 'ContentMD5': md5, }, ExpiresIn=600, # TODO proper expiration ) @@ -190,7 +191,9 @@ def etag_from_blob_name(self, blob_name) -> str | None: else: return response.etag - def generate_presigned_put_object_url(self, blob_name: str) -> str: + def generate_presigned_put_object_url(self, blob_name: str, _: str) -> str: + # Note: minio-py doesn't support using Content-MD5 headers + # storage.client will generate URLs like `http://minio:9000/...` when running in # docker. To avoid this, use the secondary base_url_client which is configured to # generate URLs like `http://localhost:9000/...`. diff --git a/dandiapi/zarr/models.py b/dandiapi/zarr/models.py index 67b8e51c3..68355c6b1 100644 --- a/dandiapi/zarr/models.py +++ b/dandiapi/zarr/models.py @@ -68,9 +68,10 @@ def s3_url(self): s3_url = urlunparse((parsed[0], parsed[1], parsed[2], '', '', '')) return s3_url - def generate_upload_urls(self, paths: list[str]): + def generate_upload_urls(self, path_md5s: list[dict]): return [ - self.storage.generate_presigned_put_object_url(self.s3_path(path)) for path in paths + self.storage.generate_presigned_put_object_url(self.s3_path(o['path']), o['md5']) + for o in path_md5s ] def mark_pending(self): diff --git a/dandiapi/zarr/tests/test_zarr_upload.py b/dandiapi/zarr/tests/test_zarr_upload.py index ceac32b59..baf0e10ef 100644 --- a/dandiapi/zarr/tests/test_zarr_upload.py +++ b/dandiapi/zarr/tests/test_zarr_upload.py @@ -25,7 +25,7 @@ def test_zarr_rest_upload_start( # Request upload files resp = authenticated_api_client.post( f'/api/zarr/{zarr_archive.zarr_id}/files/', - ['foo/bar.txt'], + [{'path': 'foo/bar.txt', 'md5': '12345'}], format='json', ) assert resp.status_code == 200 @@ -41,7 +41,7 @@ def test_zarr_rest_upload_start( # Request more resp = authenticated_api_client.post( f'/api/zarr/{zarr_archive.zarr_id}/files/', - ['foo/bar2.txt'], + [{'path': 'foo/bar2.txt', 'md5': '12345'}], format='json', ) assert resp.status_code == 200 @@ -52,7 +52,7 @@ def test_zarr_rest_upload_start( def test_zarr_rest_upload_start_not_an_owner(authenticated_api_client, zarr_archive: ZarrArchive): resp = authenticated_api_client.post( f'/api/zarr/{zarr_archive.zarr_id}/files/', - ['foo/bar.txt'], + [{'path': 'foo/bar.txt', 'md5': '12345'}], format='json', ) assert resp.status_code == 403 diff --git a/dandiapi/zarr/views/__init__.py b/dandiapi/zarr/views/__init__.py index dad604ca3..dd422e700 100644 --- a/dandiapi/zarr/views/__init__.py +++ b/dandiapi/zarr/views/__init__.py @@ -24,8 +24,9 @@ logger = logging.getLogger(__name__) -class ZarrFileCreationSerializer(serializers.ListSerializer): - child = serializers.CharField() +class ZarrFileCreationSerializer(serializers.Serializer): + path = serializers.CharField() + md5 = serializers.CharField() class ZarrDeleteFileRequestSerializer(serializers.Serializer): @@ -271,7 +272,7 @@ def create_files(self, request, zarr_id): if not self.request.user.has_perm('owner', zarr_archive.dandiset): raise PermissionDenied() - serializer = ZarrFileCreationSerializer(data=request.data) + serializer = ZarrFileCreationSerializer(data=request.data, many=True) serializer.is_valid(raise_exception=True) # Generate presigned urls diff --git a/doc/design/zarr-performance-redesign.md b/doc/design/zarr-performance-redesign.md index afb0a3bf5..5955ec0a5 100644 --- a/doc/design/zarr-performance-redesign.md +++ b/doc/design/zarr-performance-redesign.md @@ -100,7 +100,7 @@ sequenceDiagram Server-->>-Client: PENDING Zarr Archive loop for each file - Client->>+Server: Request signed URLs + Client->>+Server: Request signed URLs (w/ path and md5) Server-->>-Client: A list of signed URLs Client->>+S3: Upload individual file using signed URL end @@ -120,7 +120,7 @@ sequenceDiagram Client->>+Client: Verify zarr checksum with local ``` -(Step 1): **`dandi-cli` computes the Zarr checksum locally**. (Note that this step can actually be taken any time before step 12; it is listed here arbitrarily.) +(Step 1): **`dandi-cli` computes the Zarr checksum locally**. (Steps 2 and 3): `dandi-cli` asks the server to create a new Zarr archive, which is put into the `PENDING` state.