Skip to content

Commit

Permalink
Make asset paths operations idempotent
Browse files Browse the repository at this point in the history
  • Loading branch information
jjnesbitt committed Nov 3, 2022
1 parent daa1fa2 commit ff289b9
Show file tree
Hide file tree
Showing 4 changed files with 93 additions and 4 deletions.
13 changes: 10 additions & 3 deletions dandiapi/api/asset_paths.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,10 +108,13 @@ def insert_asset_paths(asset: Asset, version: Version):
return leaf


# TODO: Make idempotent
def _add_asset_paths(asset: Asset, version: Version):
leaf = insert_asset_paths(asset, version)

# Only increment if it hasn't been performed yet
if leaf.aggregate_files == 1:
return

# Increment the size and file count of all parent paths.
# Get all relations (including leaf node)
parent_ids = (
Expand All @@ -128,7 +131,9 @@ def _add_asset_paths(asset: Asset, version: Version):


def _delete_asset_paths(asset: Asset, version: Version):
leaf: AssetPath = AssetPath.objects.get(asset=asset, version=version)
leaf: AssetPath | None = AssetPath.objects.filter(asset=asset, version=version).first()
if leaf is None:
return

# Fetch parents
parent_ids = (
Expand Down Expand Up @@ -178,7 +183,9 @@ def add_version_asset_paths(version: Version):
# Compute aggregate file size + count for each asset path, instead of when each asset
# is added. This is done because updating the same row many times within a transaction is slow.
# https://stackoverflow.com/a/60221875
nodes = AssetPath.objects.filter(version=version)

# Get all nodes which haven't been computed yet
nodes = AssetPath.objects.filter(version=version, aggregate_files=0)
for node in nodes:
child_link_ids = node.child_links.distinct('child_id').values_list('child_id', flat=True)
child_leaves = AssetPath.objects.filter(id__in=child_link_ids, asset__isnull=False)
Expand Down
24 changes: 24 additions & 0 deletions dandiapi/api/migrations/0039_assetpath_consistent-leaf-paths.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
# Generated by Django 4.1.1 on 2022-11-03 18:54

from django.db import migrations, models


class Migration(migrations.Migration):

dependencies = [
('api', '0038_assetpath_assetpathrelation_alter_asset_path_and_more'),
]

operations = [
migrations.AddConstraint(
model_name='assetpath',
constraint=models.CheckConstraint(
check=models.Q(
('asset__isnull', True),
models.Q(('aggregate_files__lte', 1), ('asset__isnull', False)),
_connector='OR',
),
name='consistent-leaf-paths',
),
),
]
8 changes: 8 additions & 0 deletions dandiapi/api/models/asset_paths.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,14 @@ class Meta:
models.CheckConstraint(check=~models.Q(path__endswith='/'), name='consistent-slash'),
models.UniqueConstraint(fields=['asset', 'version'], name='unique-asset-version'),
models.UniqueConstraint(fields=['version', 'path'], name='unique-version-path'),
# Ensure all leaf paths have at most 1 associated file
models.CheckConstraint(
check=(
models.Q(asset__isnull=True)
| models.Q(asset__isnull=False, aggregate_files__lte=1)
),
name='consistent-leaf-paths',
),
]


Expand Down
52 changes: 51 additions & 1 deletion dandiapi/api/tests/test_asset_paths.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,23 @@ def test_asset_path_add_asset(draft_version_factory, asset_factory):
)


@pytest.mark.django_db
def test_asset_path_add_asset_idempotent(draft_version_factory, asset_factory):
# Create asset with version
asset: Asset = asset_factory()
version: Version = draft_version_factory()
version.assets.add(asset)

# Add asset to paths
add_asset_paths(asset, version)
add_asset_paths(asset, version)

# Get asset path
path: AssetPath = AssetPath.objects.get(asset=asset, version=version)
assert path.aggregate_files == 1
assert path.aggregate_size == asset.size


@pytest.mark.django_db
def test_asset_path_add_version_asset_paths(draft_version_factory, asset_factory):
# Create asset with version
Expand Down Expand Up @@ -113,6 +130,28 @@ def test_asset_path_add_version_asset_paths(draft_version_factory, asset_factory
)


@pytest.mark.django_db
def test_asset_path_add_version_asset_paths_idempotent(draft_version_factory, asset_factory):
# Create asset with version
version: Version = draft_version_factory()
version.assets.add(asset_factory(path='foo/bar/baz.txt'))
version.assets.add(asset_factory(path='foo/bar/baz2.txt'))
version.assets.add(asset_factory(path='foo/baz/file.txt'))
version.assets.add(asset_factory(path='top.txt'))

# Add verison asset paths
add_version_asset_paths(version)
add_version_asset_paths(version)

# Ensure no duplicate counts
leafpaths = AssetPath.objects.select_related('asset').filter(
version=version, asset__isnull=False
)
for path in leafpaths:
assert path.aggregate_files == 1