Skip to content

Commit

Permalink
Merge pull request #1400 from dandi/batch-bulk-creates-publish
Browse files Browse the repository at this point in the history
  • Loading branch information
mvandenburgh authored Dec 12, 2022
2 parents 029fdb8 + c6e1ece commit 58c0287
Show file tree
Hide file tree
Showing 2 changed files with 23 additions and 8 deletions.
30 changes: 22 additions & 8 deletions dandiapi/api/services/publish/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from django.contrib.auth.models import User
from django.db import transaction
from django.db.models import QuerySet
from more_itertools import ichunked

from dandiapi.api import doi
from dandiapi.api.asset_paths import add_version_asset_paths
Expand Down Expand Up @@ -83,17 +84,30 @@ def _publish_dandiset(dandiset_id: int) -> None:

# Add a new many-to-many association directly to any already published assets
already_published_assets: QuerySet[Asset] = old_version.assets.filter(published=True)
AssetVersions.objects.bulk_create(
AssetVersions(asset_id=asset_id, version_id=new_version.id)
for asset_id in already_published_assets.values_list('id', flat=True).iterator()
)

# Batch bulk creates to avoid blowing up memory when there are a lot of assets
for asset_ids_batch in ichunked(
already_published_assets.values_list('id', flat=True).iterator(), 5_000
):
AssetVersions.objects.bulk_create(
[
AssetVersions(asset_id=asset_id, version_id=new_version.id)
for asset_id in asset_ids_batch
]
)

draft_assets: QuerySet[Asset] = old_version.assets.filter(published=False)

AssetVersions.objects.bulk_create(
AssetVersions(asset_id=asset.id, version_id=new_version.id)
for asset in draft_assets.iterator()
)
# Batch bulk creates to avoid blowing up memory when there are a lot of assets
for asset_ids_batch in ichunked(
draft_assets.values_list('id', flat=True).iterator(), 5_000
):
AssetVersions.objects.bulk_create(
[
AssetVersions(asset_id=asset_id, version_id=new_version.id)
for asset_id in asset_ids_batch
]
)

# Publish any draft assets
for draft_asset in draft_assets.iterator():
Expand Down
1 change: 1 addition & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@
'jsonschema',
'pydantic',
'boto3[s3]',
'more_itertools',
# Production-only
'django-composed-configuration[prod]>=0.22.0',
# pin directly to a version since we're extending the private multipart interface
Expand Down

0 comments on commit 58c0287

Please sign in to comment.