Skip to content

Commit

Permalink
Merge pull request #1497 from dandi/add-zarr-md5-signing-urls
Browse files Browse the repository at this point in the history
Add back md5 to zarr object url signing
  • Loading branch information
danlamanna committed Feb 21, 2023
2 parents 774a4f6 + ec5f69e commit 9e7b39f
Show file tree
Hide file tree
Showing 5 changed files with 17 additions and 12 deletions.
7 changes: 5 additions & 2 deletions dandiapi/api/storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -143,13 +143,14 @@ def etag_from_blob_name(self, blob_name) -> str | None:
return etag[1:-1]
return etag

def generate_presigned_put_object_url(self, blob_name: str) -> str:
def generate_presigned_put_object_url(self, blob_name: str, md5: str) -> str:
return self.connection.meta.client.generate_presigned_url(
ClientMethod='put_object',
Params={
'Bucket': self.bucket_name,
'Key': blob_name,
'ACL': 'bucket-owner-full-control',
'ContentMD5': md5,
},
ExpiresIn=600, # TODO proper expiration
)
Expand Down Expand Up @@ -190,7 +191,9 @@ def etag_from_blob_name(self, blob_name) -> str | None:
else:
return response.etag

def generate_presigned_put_object_url(self, blob_name: str) -> str:
def generate_presigned_put_object_url(self, blob_name: str, _: str) -> str:
# Note: minio-py doesn't support using Content-MD5 headers

# storage.client will generate URLs like `http://minio:9000/...` when running in
# docker. To avoid this, use the secondary base_url_client which is configured to
# generate URLs like `http://localhost:9000/...`.
Expand Down
5 changes: 3 additions & 2 deletions dandiapi/zarr/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,9 +68,10 @@ def s3_url(self):
s3_url = urlunparse((parsed[0], parsed[1], parsed[2], '', '', ''))
return s3_url

def generate_upload_urls(self, paths: list[str]):
def generate_upload_urls(self, path_md5s: list[dict]):
return [
self.storage.generate_presigned_put_object_url(self.s3_path(path)) for path in paths
self.storage.generate_presigned_put_object_url(self.s3_path(o['path']), o['md5'])
for o in path_md5s
]

def mark_pending(self):
Expand Down
6 changes: 3 additions & 3 deletions dandiapi/zarr/tests/test_zarr_upload.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ def test_zarr_rest_upload_start(
# Request upload files
resp = authenticated_api_client.post(
f'/api/zarr/{zarr_archive.zarr_id}/files/',
['foo/bar.txt'],
[{'path': 'foo/bar.txt', 'md5': '12345'}],
format='json',
)
assert resp.status_code == 200
Expand All @@ -41,7 +41,7 @@ def test_zarr_rest_upload_start(
# Request more
resp = authenticated_api_client.post(
f'/api/zarr/{zarr_archive.zarr_id}/files/',
['foo/bar2.txt'],
[{'path': 'foo/bar2.txt', 'md5': '12345'}],
format='json',
)
assert resp.status_code == 200
Expand All @@ -52,7 +52,7 @@ def test_zarr_rest_upload_start(
def test_zarr_rest_upload_start_not_an_owner(authenticated_api_client, zarr_archive: ZarrArchive):
resp = authenticated_api_client.post(
f'/api/zarr/{zarr_archive.zarr_id}/files/',
['foo/bar.txt'],
[{'path': 'foo/bar.txt', 'md5': '12345'}],
format='json',
)
assert resp.status_code == 403
Expand Down
7 changes: 4 additions & 3 deletions dandiapi/zarr/views/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,9 @@
logger = logging.getLogger(__name__)


class ZarrFileCreationSerializer(serializers.ListSerializer):
child = serializers.CharField()
class ZarrFileCreationSerializer(serializers.Serializer):
path = serializers.CharField()
md5 = serializers.CharField()


class ZarrDeleteFileRequestSerializer(serializers.Serializer):
Expand Down Expand Up @@ -271,7 +272,7 @@ def create_files(self, request, zarr_id):
if not self.request.user.has_perm('owner', zarr_archive.dandiset):
raise PermissionDenied()

serializer = ZarrFileCreationSerializer(data=request.data)
serializer = ZarrFileCreationSerializer(data=request.data, many=True)
serializer.is_valid(raise_exception=True)

# Generate presigned urls
Expand Down
4 changes: 2 additions & 2 deletions doc/design/zarr-performance-redesign.md
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ sequenceDiagram
Server-->>-Client: PENDING Zarr Archive
loop for each file
Client->>+Server: Request signed URLs
Client->>+Server: Request signed URLs (w/ path and md5)
Server-->>-Client: A list of signed URLs
Client->>+S3: Upload individual file using signed URL
end
Expand All @@ -120,7 +120,7 @@ sequenceDiagram
Client->>+Client: Verify zarr checksum with local
```

(Step 1): **`dandi-cli` computes the Zarr checksum locally**. (Note that this step can actually be taken any time before step 12; it is listed here arbitrarily.)
(Step 1): **`dandi-cli` computes the Zarr checksum locally**.

(Steps 2 and 3): `dandi-cli` asks the server to create a new Zarr archive, which is put into the `PENDING` state.

Expand Down

0 comments on commit 9e7b39f

Please sign in to comment.