Skip to content

Commit

Permalink
Add management command to re-extract for asset metadata
Browse files Browse the repository at this point in the history
  • Loading branch information
jjnesbitt committed Sep 24, 2024
1 parent 99e55f4 commit d5b9668
Show file tree
Hide file tree
Showing 2 changed files with 95 additions and 0 deletions.
93 changes: 93 additions & 0 deletions dandiapi/api/management/commands/extract_metadata.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
from __future__ import annotations

import logging
from pathlib import Path

from dandi.dandiapi import RemoteReadableAsset
from dandi.metadata.nwb import nwb2asset
from dandischema.models import get_schema_version
from django.contrib.auth.models import User
import djclick as click
from tqdm import tqdm

from dandiapi.api.models import Asset, Dandiset, Version
from dandiapi.api.services.asset import change_asset

logger = logging.getLogger(__name__)


@click.group()
def group():
pass


def extract_asset_metadata(asset: Asset, draft_version: Version):
readable_asset = RemoteReadableAsset(
asset.s3_url, size=asset.size, mtime=asset.modified, name=Path(asset.path).name
)

if not asset.path.lower().endswith('.nwb'):
logger.info('Asset %s: Not an NWB file, skipping...', asset.path)
return

new_metadata = nwb2asset(readable_asset, schema_version=get_schema_version()).json_dict()

# Use dandiset owner, default to some admin user
user = (
draft_version.dandiset.owners.first()
or User.objects.filter(is_superuser=True, is_staff=True).first()
)

# Replace old asset with new asset containing updated metadata
change_asset(
user=user,
asset=asset,
version=draft_version,
new_asset_blob=asset.blob,
new_zarr_archive=asset.zarr,
new_metadata=new_metadata,
)


def extract_dandiset_assets(dandiset: Dandiset):
# Only update assets which do not belong to a published version
assets = dandiset.draft_version.assets.filter(
published=False, path__iendswith='.nwb'
).select_related('blob', 'zarr')
if not assets:
logger.info('No draft NWB assets found in dandiset %s. Skipping...', dandiset)
return

for asset in tqdm(assets):
extract_asset_metadata(asset=asset, draft_version=dandiset.draft_version)


@group.command(help='Re-extracts the metadata of this asset')
@click.argument('asset_id')
def asset(asset_id: str):
asset = Asset.objects.get(asset_id=asset_id)
draft_versions = asset.versions.filter(version='draft')
if not draft_versions.exists():
raise click.ClickException(
'Cannot re-extract metadata of asset that has no associated draft versions.'
)

# Re-extract for every draft version
for version in draft_versions:
extract_asset_metadata(asset=asset, draft_version=version)


@group.command(
help='Re-extracts the metadata of all assets in the draft version of the provided dandiset'
)
@click.argument('dandiset_id')
def dandiset(dandiset_id: str):
dandiset = Dandiset.objects.get(id=int(dandiset_id))
extract_dandiset_assets(dandiset)


@group.command(name='all', help='Re-extracts the metadata of all assets in all draft versions')
def all_dandisets():
for dandiset in Dandiset.objects.all():
logger.info('DANDISET: %s', dandiset.identifier)
extract_dandiset_assets(dandiset)
2 changes: 2 additions & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@
include_package_data=True,
install_requires=[
'celery',
'dandi',
# Pin dandischema to exact version to make explicit which schema version is being used
'dandischema==0.10.2', # schema version 0.6.8
'django~=4.1.0',
Expand All @@ -59,6 +60,7 @@
'djangorestframework-yaml',
'drf-extensions',
'drf-yasg',
'fsspec[http]',
'jsonschema',
'boto3[s3]',
'more_itertools',
Expand Down

0 comments on commit d5b9668

Please sign in to comment.