-
Notifications
You must be signed in to change notification settings - Fork 10
/
models.py
122 lines (100 loc) · 4.18 KB
/
models.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
from __future__ import annotations
import logging
from urllib.parse import urlparse, urlunparse
from uuid import uuid4
from django.conf import settings
from django.db import models
from django_extensions.db.models import TimeStampedModel
from rest_framework.exceptions import ValidationError
from dandiapi.api.models import Dandiset
from dandiapi.api.storage import get_storage
logger = logging.getLogger(name=__name__)
# The status of the zarr ingestion (checksums, size, file count)
class ZarrArchiveStatus(models.TextChoices):
PENDING = 'Pending'
UPLOADED = 'Uploaded'
INGESTING = 'Ingesting'
COMPLETE = 'Complete'
class BaseZarrArchive(TimeStampedModel):
UUID_REGEX = r'[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}'
INGEST_ERROR_MSG = 'Zarr archive is currently ingesting or has already ingested'
class Meta:
ordering = ['created']
get_latest_by = 'modified'
abstract = True
constraints = [
models.UniqueConstraint(
name='%(app_label)s-%(class)s-unique-name',
fields=['dandiset', 'name'],
),
models.CheckConstraint(
name='%(app_label)s-%(class)s-consistent-checksum-status',
check=models.Q(
checksum__isnull=True,
status__in=[
ZarrArchiveStatus.PENDING,
ZarrArchiveStatus.UPLOADED,
ZarrArchiveStatus.INGESTING,
],
)
| models.Q(checksum__isnull=False, status=ZarrArchiveStatus.COMPLETE),
),
]
zarr_id = models.UUIDField(unique=True, default=uuid4, db_index=True)
name = models.CharField(max_length=512)
file_count = models.BigIntegerField(default=0)
size = models.BigIntegerField(default=0)
checksum = models.CharField(max_length=512, null=True, default=None, blank=True) # noqa: DJ001
status = models.CharField(
max_length=max(len(choice[0]) for choice in ZarrArchiveStatus.choices),
choices=ZarrArchiveStatus.choices,
default=ZarrArchiveStatus.PENDING,
)
@property
def digest(self) -> dict[str, str]:
return {'dandi:dandi-zarr-checksum': self.checksum}
@property
def s3_url(self):
signed_url = self.storage.url(self.s3_path(''))
# Strip off the query parameters from the presigning, as they are different every time
parsed = urlparse(signed_url)
return urlunparse((parsed[0], parsed[1], parsed[2], '', '', ''))
def generate_upload_urls(self, path_md5s: list[dict]):
return [
self.storage.generate_presigned_put_object_url(self.s3_path(o['path']), o['base64md5'])
for o in path_md5s
]
def mark_pending(self):
self.checksum = None
self.status = ZarrArchiveStatus.PENDING
self.file_count = 0
self.size = 0
def delete_files(self, paths: list[str]):
for path in paths:
if not self.storage.exists(self.s3_path(path)):
raise ValidationError(f'File {self.s3_path(path)} does not exist.')
for path in paths:
self.storage.delete(self.s3_path(path))
# Files deleted, mark pending
self.mark_pending()
self.save()
class ZarrArchive(BaseZarrArchive):
storage = get_storage()
dandiset = models.ForeignKey(Dandiset, related_name='zarr_archives', on_delete=models.CASCADE)
def s3_path(self, zarr_path: str) -> str:
"""Generate a full S3 object path from a path in this zarr_archive."""
return (
f'{settings.DANDI_DANDISETS_BUCKET_PREFIX}{settings.DANDI_ZARR_PREFIX_NAME}/'
f'{self.zarr_id}/{zarr_path}'
)
class EmbargoedZarrArchive(BaseZarrArchive):
storage = get_storage()
dandiset = models.ForeignKey(
Dandiset, related_name='embargoed_zarr_archives', on_delete=models.CASCADE
)
def s3_path(self, zarr_path: str) -> str:
"""Generate a full S3 object path from a path in this zarr_archive."""
return (
f'{settings.DANDI_ZARR_PREFIX_NAME}/'
f'{self.dandiset.identifier}/{self.zarr_id}/{zarr_path}'
)