Skip to content

Commit

Permalink
Add new zarr checksum format to model
Browse files Browse the repository at this point in the history
  • Loading branch information
dchiquito committed Mar 16, 2022
1 parent 20f54c7 commit 02972c5
Show file tree
Hide file tree
Showing 3 changed files with 27 additions and 10 deletions.
3 changes: 2 additions & 1 deletion dandischema/digests/zarr.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@

"""Passed to the json() method of pydantic models for serialization."""
ENCODING_KWARGS = {"separators": (",", ":")}
ZARR_CHECKSUM_PATTERN = "([0-9a-f]{32})-([0-9]+)--([0-9]+)"


def generate_directory_digest(md5: str, file_count: int, size: int):
Expand All @@ -18,7 +19,7 @@ def generate_directory_digest(md5: str, file_count: int, size: int):

def parse_directory_digest(digest: str):
"""Parse a directory digest into its constituent parts"""
match = re.match("([0-9a-f]{32})-([0-9]+)--([0-9]+)", digest)
match = re.match(ZARR_CHECKSUM_PATTERN, digest)
if match is None:
raise ValueError(f"Cannot parse directory digest {digest}")
return match.group(1), int(match.group(2)), int(match.group(3))
Expand Down
9 changes: 8 additions & 1 deletion dandischema/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@

from .consts import DANDI_SCHEMA_VERSION
from .digests.dandietag import DandiETag
from .digests.zarr import ZARR_CHECKSUM_PATTERN, parse_directory_digest
from .model_types import (
AccessTypeDict,
AgeReferenceTypeDict,
Expand Down Expand Up @@ -1124,11 +1125,17 @@ def digest_check(cls, v, values, **kwargs):
if v.get(DigestType.dandi_etag):
raise ValueError("Digest cannot have both etag and zarr checksums.")
digest = v.get(DigestType.dandi_zarr_checksum)
if not re.fullmatch(MD5_PATTERN, digest):
if not re.fullmatch(ZARR_CHECKSUM_PATTERN, digest):
raise ValueError(
f"Digest must have an appropriate dandi-zarr-checksum value. "
f"Got {digest}"
)
_checksum, _file_count, zarr_size = parse_directory_digest(digest)
content_size = values.get("contentSize")
if content_size != zarr_size:
raise ValueError(
f"contentSize {content_size} is not equal to the checksum size {zarr_size}."
)
else:
if DigestType.dandi_etag not in v:
raise ValueError("A non-zarr asset must have a dandi-etag.")
Expand Down
25 changes: 17 additions & 8 deletions dandischema/tests/test_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,7 @@ def test_asset_digest():
for el in exc.value.errors()
)

digest = 33 * "a"
digest = 32 * "a"
digest_model = {models.DigestType.dandi_zarr_checksum: digest}
with pytest.raises(pydantic.ValidationError) as exc:
models.BareAsset(
Expand All @@ -133,14 +133,23 @@ def test_asset_digest():
for val in set([el["msg"] for el in exc.value.errors()])
]
)
digest = 32 * "a"
digest = f"{32 * 'a'}-1--42"
digest_model = {models.DigestType.dandi_zarr_checksum: digest}
models.BareAsset(
contentSize=100,
encodingFormat="application/x-zarr",
digest=digest_model,
path="/",
with pytest.raises(pydantic.ValidationError) as exc:
models.BareAsset(
contentSize=100,
encodingFormat="application/x-zarr",
digest=digest_model,
path="/",
)
assert any(
[
"contentSize 100 is not equal to the checksum size 42." in val
for val in set([el["msg"] for el in exc.value.errors()])
]
)
digest = f"{32 * 'a'}-1--100"
digest_model = {models.DigestType.dandi_zarr_checksum: digest}
with pytest.raises(pydantic.ValidationError) as exc:
models.PublishedAsset(
contentSize=100,
Expand Down Expand Up @@ -357,7 +366,7 @@ def test_autogenerated_titles():


def test_dantimeta_1():
""" checking basic metadata for publishing"""
"""checking basic metadata for publishing"""
# meta data without doi, datePublished and publishedBy
meta_dict = {
"identifier": "DANDI:999999",
Expand Down

0 comments on commit 02972c5

Please sign in to comment.