Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add back md5 to zarr object url signing #1497

Merged
merged 2 commits into from
Feb 21, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 5 additions & 2 deletions dandiapi/api/storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -143,13 +143,14 @@ def etag_from_blob_name(self, blob_name) -> str | None:
return etag[1:-1]
return etag

def generate_presigned_put_object_url(self, blob_name: str) -> str:
def generate_presigned_put_object_url(self, blob_name: str, md5: str) -> str:
return self.connection.meta.client.generate_presigned_url(
ClientMethod='put_object',
Params={
'Bucket': self.bucket_name,
'Key': blob_name,
'ACL': 'bucket-owner-full-control',
'ContentMD5': md5,
},
ExpiresIn=600, # TODO proper expiration
)
Expand Down Expand Up @@ -190,7 +191,9 @@ def etag_from_blob_name(self, blob_name) -> str | None:
else:
return response.etag

def generate_presigned_put_object_url(self, blob_name: str) -> str:
def generate_presigned_put_object_url(self, blob_name: str, _: str) -> str:
# Note: minio-py doesn't support using Content-MD5 headers

# storage.client will generate URLs like `http://minio:9000/...` when running in
# docker. To avoid this, use the secondary base_url_client which is configured to
# generate URLs like `http://localhost:9000/...`.
Expand Down
5 changes: 3 additions & 2 deletions dandiapi/zarr/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,9 +68,10 @@ def s3_url(self):
s3_url = urlunparse((parsed[0], parsed[1], parsed[2], '', '', ''))
return s3_url

def generate_upload_urls(self, paths: list[str]):
def generate_upload_urls(self, path_md5s: list[dict]):
return [
self.storage.generate_presigned_put_object_url(self.s3_path(path)) for path in paths
self.storage.generate_presigned_put_object_url(self.s3_path(o['path']), o['md5'])
for o in path_md5s
]

def mark_pending(self):
Expand Down
6 changes: 3 additions & 3 deletions dandiapi/zarr/tests/test_zarr_upload.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ def test_zarr_rest_upload_start(
# Request upload files
resp = authenticated_api_client.post(
f'/api/zarr/{zarr_archive.zarr_id}/files/',
['foo/bar.txt'],
[{'path': 'foo/bar.txt', 'md5': '12345'}],
format='json',
)
assert resp.status_code == 200
Expand All @@ -41,7 +41,7 @@ def test_zarr_rest_upload_start(
# Request more
resp = authenticated_api_client.post(
f'/api/zarr/{zarr_archive.zarr_id}/files/',
['foo/bar2.txt'],
[{'path': 'foo/bar2.txt', 'md5': '12345'}],
format='json',
)
assert resp.status_code == 200
Expand All @@ -52,7 +52,7 @@ def test_zarr_rest_upload_start(
def test_zarr_rest_upload_start_not_an_owner(authenticated_api_client, zarr_archive: ZarrArchive):
resp = authenticated_api_client.post(
f'/api/zarr/{zarr_archive.zarr_id}/files/',
['foo/bar.txt'],
[{'path': 'foo/bar.txt', 'md5': '12345'}],
format='json',
)
assert resp.status_code == 403
Expand Down
7 changes: 4 additions & 3 deletions dandiapi/zarr/views/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,9 @@
logger = logging.getLogger(__name__)


class ZarrFileCreationSerializer(serializers.ListSerializer):
child = serializers.CharField()
class ZarrFileCreationSerializer(serializers.Serializer):
path = serializers.CharField()
md5 = serializers.CharField()


class ZarrDeleteFileRequestSerializer(serializers.Serializer):
Expand Down Expand Up @@ -271,7 +272,7 @@ def create_files(self, request, zarr_id):
if not self.request.user.has_perm('owner', zarr_archive.dandiset):
raise PermissionDenied()

serializer = ZarrFileCreationSerializer(data=request.data)
serializer = ZarrFileCreationSerializer(data=request.data, many=True)
serializer.is_valid(raise_exception=True)

# Generate presigned urls
Expand Down
4 changes: 2 additions & 2 deletions doc/design/zarr-performance-redesign.md
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ sequenceDiagram
Server-->>-Client: PENDING Zarr Archive

loop for each file
Client->>+Server: Request signed URLs
Client->>+Server: Request signed URLs (w/ path and md5)
Server-->>-Client: A list of signed URLs
Client->>+S3: Upload individual file using signed URL
end
Expand All @@ -120,7 +120,7 @@ sequenceDiagram
Client->>+Client: Verify zarr checksum with local
```

(Step 1): **`dandi-cli` computes the Zarr checksum locally**. (Note that this step can actually be taken any time before step 12; it is listed here arbitrarily.)
(Step 1): **`dandi-cli` computes the Zarr checksum locally**.

(Steps 2 and 3): `dandi-cli` asks the server to create a new Zarr archive, which is put into the `PENDING` state.

Expand Down