-
Notifications
You must be signed in to change notification settings - Fork 12
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #1312 from dandi/asset-paths-design
- Loading branch information
Showing
16 changed files
with
881 additions
and
366 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,156 @@ | ||
from __future__ import annotations | ||
|
||
import os | ||
|
||
from django.db import transaction | ||
from django.db.models import Count, F, QuerySet | ||
|
||
from dandiapi.api.models import Asset, AssetPath, AssetPathRelation, Version | ||
|
||
#################################################################### | ||
# Dandiset and version deletion will cascade to asset path deletion. | ||
# Thus, no explicit action is needed for these. | ||
#################################################################### | ||
|
||
|
||
def extract_paths(path: str) -> list[str]: | ||
nodepaths: list[str] = path.split('/') | ||
for i in range(len(nodepaths))[1:]: | ||
nodepaths[i] = os.path.join(nodepaths[i - 1], nodepaths[i]) | ||
|
||
return nodepaths | ||
|
||
|
||
def get_root_paths(version: Version) -> QuerySet[AssetPath]: | ||
"""Return all root paths for a version.""" | ||
# Use prefetch_related here instead of select_related, | ||
# as otherwise the resulting join is very large | ||
qs = AssetPath.objects.prefetch_related( | ||
'asset', | ||
'asset__blob', | ||
'asset__embargoed_blob', | ||
'asset__zarr', | ||
) | ||
return ( | ||
qs.filter(version=version) | ||
.alias(num_parents=Count('parent_links')) | ||
.filter(num_parents=1) | ||
.order_by('path') | ||
) | ||
|
||
|
||
def get_path_children(path: AssetPath) -> QuerySet[AssetPath]: | ||
"""Get all direct children from an existing path.""" | ||
qs = AssetPath.objects.select_related( | ||
'asset', | ||
'asset__blob', | ||
'asset__embargoed_blob', | ||
'asset__zarr', | ||
) | ||
path_ids = ( | ||
AssetPathRelation.objects.filter(parent=path, depth=1) | ||
.values_list('child', flat=True) | ||
.distinct() | ||
) | ||
return qs.filter(id__in=path_ids).order_by('path') | ||
|
||
|
||
def search_asset_paths(query: str, version: Version) -> QuerySet[AssetPath] | None: | ||
"""Return all direct children of this path, if there are any.""" | ||
if not query: | ||
return get_root_paths(version) | ||
|
||
# Ensure no trailing slash | ||
fixed_query = query.rstrip('/') | ||
|
||
# Retrieve path | ||
path = AssetPath.objects.filter(version=version, path=fixed_query).first() | ||
if path is None: | ||
return None | ||
|
||
return get_path_children(path) | ||
|
||
|
||
# TODO: Make idempotent | ||
@transaction.atomic() | ||
def add_asset_paths(asset: Asset, version: Version): | ||
# Get or create leaf path | ||
leaf, created = AssetPath.objects.get_or_create(path=asset.path, asset=asset, version=version) | ||
if not created: | ||
return | ||
|
||
# Create absolute paths (exclude leaf node) | ||
nodepaths = extract_paths(asset.path)[:-1] | ||
|
||
# Create nodes | ||
AssetPath.objects.bulk_create( | ||
[AssetPath(path=path, version=version, asset=None) for path in nodepaths], | ||
ignore_conflicts=True, | ||
) | ||
|
||
# Retrieve all paths | ||
paths = [*AssetPath.objects.filter(version=version, path__in=nodepaths).order_by('path'), leaf] | ||
|
||
# Create relations between paths | ||
links = [] | ||
for i in range(len(paths)): | ||
links.extend( | ||
[ | ||
AssetPathRelation(parent=paths[i], child=paths[j], depth=j - i) | ||
for j in range(len(paths))[i:] | ||
] | ||
) | ||
|
||
# Create objects | ||
AssetPathRelation.objects.bulk_create(links, ignore_conflicts=True) | ||
|
||
# Get all relations (including leaf node) | ||
parent_ids = ( | ||
AssetPathRelation.objects.filter(child=leaf) | ||
.distinct('parent') | ||
.values_list('parent', flat=True) | ||
) | ||
|
||
# Update size + file count | ||
AssetPath.objects.filter(id__in=parent_ids).update( | ||
aggregate_size=F('aggregate_size') + asset.size, aggregate_files=F('aggregate_files') + 1 | ||
) | ||
|
||
|
||
@transaction.atomic() | ||
def delete_asset_paths(asset: Asset, version: Version): | ||
leaf: AssetPath = AssetPath.objects.get(asset=asset, version=version) | ||
|
||
# Fetch parents | ||
parent_ids = ( | ||
AssetPathRelation.objects.filter(child=leaf) | ||
.distinct('parent') | ||
.values_list('parent', flat=True) | ||
) | ||
parent_paths = AssetPath.objects.filter(id__in=parent_ids) | ||
|
||
# Update parents | ||
parent_paths.update( | ||
aggregate_size=F('aggregate_size') - asset.size, aggregate_files=F('aggregate_files') - 1 | ||
) | ||
|
||
# Ensure integrity | ||
leaf.refresh_from_db() | ||
assert leaf.aggregate_size == 0 | ||
assert leaf.aggregate_files == 0 | ||
|
||
# Delete leaf node and any other paths with no contained files | ||
AssetPath.objects.filter(aggregate_files=0).delete() | ||
|
||
|
||
@transaction.atomic() | ||
def update_asset_paths(old_asset: Asset, new_asset: Asset, version: Version): | ||
delete_asset_paths(old_asset, version) | ||
add_asset_paths(new_asset, version) | ||
|
||
|
||
@transaction.atomic() | ||
def add_version_asset_paths(version: Version): | ||
"""Add every asset from a version.""" | ||
for asset in version.assets.iterator(): | ||
add_asset_paths(asset, version) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
import djclick as click | ||
|
||
from dandiapi.api.asset_paths import add_version_asset_paths | ||
from dandiapi.api.models import Version | ||
|
||
|
||
@click.command() | ||
def ingest_asset_paths(): | ||
for version in Version.objects.iterator(): | ||
print(f'Version: {version}') | ||
print(f'\t {version.assets.count()} assets') | ||
add_version_asset_paths(version) |
127 changes: 127 additions & 0 deletions
127
dandiapi/api/migrations/0038_assetpath_assetpathrelation_alter_asset_path_and_more.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,127 @@ | ||
# Generated by Django 4.1.1 on 2022-10-19 16:07 | ||
|
||
from django.db import migrations, models | ||
import django.db.models.deletion | ||
|
||
import dandiapi.api.models.asset | ||
|
||
|
||
class Migration(migrations.Migration): | ||
|
||
dependencies = [ | ||
('api', '0037_alter_version_status'), | ||
] | ||
|
||
operations = [ | ||
migrations.CreateModel( | ||
name='AssetPath', | ||
fields=[ | ||
( | ||
'id', | ||
models.BigAutoField( | ||
auto_created=True, primary_key=True, serialize=False, verbose_name='ID' | ||
), | ||
), | ||
('path', models.CharField(max_length=512)), | ||
('aggregate_files', models.PositiveBigIntegerField(default=0)), | ||
('aggregate_size', models.PositiveBigIntegerField(default=0)), | ||
], | ||
), | ||
migrations.CreateModel( | ||
name='AssetPathRelation', | ||
fields=[ | ||
( | ||
'id', | ||
models.BigAutoField( | ||
auto_created=True, primary_key=True, serialize=False, verbose_name='ID' | ||
), | ||
), | ||
('depth', models.PositiveIntegerField()), | ||
], | ||
), | ||
migrations.AlterField( | ||
model_name='asset', | ||
name='path', | ||
field=models.CharField( | ||
max_length=512, validators=[dandiapi.api.models.asset.validate_asset_path] | ||
), | ||
), | ||
migrations.AddConstraint( | ||
model_name='asset', | ||
constraint=models.CheckConstraint( | ||
check=models.Q( | ||
('path__regex', '^([A-z0-9(),&\\s#+~_=-]?\\/?\\.?[A-z0-9(),&\\s#+~_=-])+$') | ||
), | ||
name='asset_path_regex', | ||
), | ||
), | ||
migrations.AddConstraint( | ||
model_name='asset', | ||
constraint=models.CheckConstraint( | ||
check=models.Q(('path__startswith', '/'), _negated=True), | ||
name='asset_path_no_leading_slash', | ||
), | ||
), | ||
migrations.AddField( | ||
model_name='assetpathrelation', | ||
name='child', | ||
field=models.ForeignKey( | ||
on_delete=django.db.models.deletion.CASCADE, | ||
related_name='parent_links', | ||
to='api.assetpath', | ||
), | ||
), | ||
migrations.AddField( | ||
model_name='assetpathrelation', | ||
name='parent', | ||
field=models.ForeignKey( | ||
on_delete=django.db.models.deletion.CASCADE, | ||
related_name='child_links', | ||
to='api.assetpath', | ||
), | ||
), | ||
migrations.AddField( | ||
model_name='assetpath', | ||
name='asset', | ||
field=models.ForeignKey( | ||
blank=True, | ||
null=True, | ||
on_delete=django.db.models.deletion.PROTECT, | ||
related_name='leaf_paths', | ||
to='api.asset', | ||
), | ||
), | ||
migrations.AddField( | ||
model_name='assetpath', | ||
name='version', | ||
field=models.ForeignKey( | ||
on_delete=django.db.models.deletion.CASCADE, | ||
related_name='asset_paths', | ||
to='api.version', | ||
), | ||
), | ||
migrations.AddConstraint( | ||
model_name='assetpathrelation', | ||
constraint=models.UniqueConstraint( | ||
fields=('parent', 'child'), name='unique-relationship' | ||
), | ||
), | ||
migrations.AddConstraint( | ||
model_name='assetpath', | ||
constraint=models.CheckConstraint( | ||
check=models.Q(('path__endswith', '/'), _negated=True), name='consistent-slash' | ||
), | ||
), | ||
migrations.AddConstraint( | ||
model_name='assetpath', | ||
constraint=models.UniqueConstraint( | ||
fields=('asset', 'version'), name='unique-asset-version' | ||
), | ||
), | ||
migrations.AddConstraint( | ||
model_name='assetpath', | ||
constraint=models.UniqueConstraint( | ||
fields=('version', 'path'), name='unique-version-path' | ||
), | ||
), | ||
] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.