diff --git a/dandiapi/api/asset_paths.py b/dandiapi/api/asset_paths.py index f156a9a1c..d4456e81b 100644 --- a/dandiapi/api/asset_paths.py +++ b/dandiapi/api/asset_paths.py @@ -108,10 +108,13 @@ def insert_asset_paths(asset: Asset, version: Version): return leaf -# TODO: Make idempotent def _add_asset_paths(asset: Asset, version: Version): leaf = insert_asset_paths(asset, version) + # Only increment if it hasn't been performed yet + if leaf.aggregate_files == 1: + return + # Increment the size and file count of all parent paths. # Get all relations (including leaf node) parent_ids = ( @@ -128,7 +131,9 @@ def _add_asset_paths(asset: Asset, version: Version): def _delete_asset_paths(asset: Asset, version: Version): - leaf: AssetPath = AssetPath.objects.get(asset=asset, version=version) + leaf: AssetPath | None = AssetPath.objects.filter(asset=asset, version=version).first() + if leaf is None: + return # Fetch parents parent_ids = ( @@ -178,7 +183,9 @@ def add_version_asset_paths(version: Version): # Compute aggregate file size + count for each asset path, instead of when each asset # is added. This is done because updating the same row many times within a transaction is slow. # https://stackoverflow.com/a/60221875 - nodes = AssetPath.objects.filter(version=version) + + # Get all nodes which haven't been computed yet + nodes = AssetPath.objects.filter(version=version, aggregate_files=0) for node in nodes: child_link_ids = node.child_links.distinct('child_id').values_list('child_id', flat=True) child_leaves = AssetPath.objects.filter(id__in=child_link_ids, asset__isnull=False) diff --git a/dandiapi/api/migrations/0039_assetpath_consistent-leaf-paths.py b/dandiapi/api/migrations/0039_assetpath_consistent-leaf-paths.py new file mode 100644 index 000000000..0f410e892 --- /dev/null +++ b/dandiapi/api/migrations/0039_assetpath_consistent-leaf-paths.py @@ -0,0 +1,24 @@ +# Generated by Django 4.1.1 on 2022-11-03 18:54 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('api', '0038_assetpath_assetpathrelation_alter_asset_path_and_more'), + ] + + operations = [ + migrations.AddConstraint( + model_name='assetpath', + constraint=models.CheckConstraint( + check=models.Q( + ('asset__isnull', True), + models.Q(('aggregate_files__lte', 1), ('asset__isnull', False)), + _connector='OR', + ), + name='consistent-leaf-paths', + ), + ), + ] diff --git a/dandiapi/api/models/asset_paths.py b/dandiapi/api/models/asset_paths.py index 24b91aa75..5af6ad6f5 100644 --- a/dandiapi/api/models/asset_paths.py +++ b/dandiapi/api/models/asset_paths.py @@ -28,6 +28,14 @@ class Meta: models.CheckConstraint(check=~models.Q(path__endswith='/'), name='consistent-slash'), models.UniqueConstraint(fields=['asset', 'version'], name='unique-asset-version'), models.UniqueConstraint(fields=['version', 'path'], name='unique-version-path'), + # Ensure all leaf paths have at most 1 associated file + models.CheckConstraint( + check=( + models.Q(asset__isnull=True) + | models.Q(asset__isnull=False, aggregate_files__lte=1) + ), + name='consistent-leaf-paths', + ), ] diff --git a/dandiapi/api/tests/test_asset_paths.py b/dandiapi/api/tests/test_asset_paths.py index 943c40375..f782dea72 100644 --- a/dandiapi/api/tests/test_asset_paths.py +++ b/dandiapi/api/tests/test_asset_paths.py @@ -79,6 +79,23 @@ def test_asset_path_add_asset(draft_version_factory, asset_factory): ) +@pytest.mark.django_db +def test_asset_path_add_asset_idempotent(draft_version_factory, asset_factory): + # Create asset with version + asset: Asset = asset_factory() + version: Version = draft_version_factory() + version.assets.add(asset) + + # Add asset to paths + add_asset_paths(asset, version) + add_asset_paths(asset, version) + + # Get asset path + path: AssetPath = AssetPath.objects.get(asset=asset, version=version) + assert path.aggregate_files == 1 + assert path.aggregate_size == asset.size + + @pytest.mark.django_db def test_asset_path_add_version_asset_paths(draft_version_factory, asset_factory): # Create asset with version @@ -113,6 +130,28 @@ def test_asset_path_add_version_asset_paths(draft_version_factory, asset_factory ) +@pytest.mark.django_db +def test_asset_path_add_version_asset_paths_idempotent(draft_version_factory, asset_factory): + # Create asset with version + version: Version = draft_version_factory() + version.assets.add(asset_factory(path='foo/bar/baz.txt')) + version.assets.add(asset_factory(path='foo/bar/baz2.txt')) + version.assets.add(asset_factory(path='foo/baz/file.txt')) + version.assets.add(asset_factory(path='top.txt')) + + # Add verison asset paths + add_version_asset_paths(version) + add_version_asset_paths(version) + + # Ensure no duplicate counts + leafpaths = AssetPath.objects.select_related('asset').filter( + version=version, asset__isnull=False + ) + for path in leafpaths: + assert path.aggregate_files == 1 + assert path.aggregate_size == path.asset.size + + @pytest.mark.django_db def test_asset_path_add_asset_shared_paths(draft_version_factory, asset_factory): # Create asset with version @@ -145,6 +184,16 @@ def test_asset_path_delete_asset(ingested_asset): assert not AssetPath.objects.filter(path=asset.path, version=version).exists() +@pytest.mark.django_db +def test_asset_path_delete_asset_idempotent(ingested_asset): + asset = ingested_asset + version = ingested_asset.versions.first() + + # Try deleting twice, ensuring no error raised + delete_asset_paths(asset, version) + delete_asset_paths(asset, version) + + @pytest.mark.django_db def test_asset_path_update_asset(draft_version_factory, asset_factory): # Create asset with version @@ -158,7 +207,8 @@ def test_asset_path_update_asset(draft_version_factory, asset_factory): version.assets.add(new_asset) add_asset_paths(new_asset, version) - # Update asset + # Update asset (call twice to ensure idempotence) + update_asset_paths(old_asset=old_asset, new_asset=new_asset, version=version) update_asset_paths(old_asset=old_asset, new_asset=new_asset, version=version) # Assert none of old paths exist