Skip to content

Commit

Permalink
Merge pull request #1800 from dandi/fix-incorrect-assets-summary
Browse files Browse the repository at this point in the history
Don't use .save() in validate_version_metadata
  • Loading branch information
jjnesbitt committed Feb 14, 2024
2 parents cfa54d1 + 11bfd89 commit 4637cb3
Show file tree
Hide file tree
Showing 2 changed files with 65 additions and 34 deletions.
66 changes: 33 additions & 33 deletions dandiapi/api/services/metadata/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,8 +129,28 @@ def _build_validatable_version_metadata(version: Version) -> dict:
}
return metadata_for_validation

version_id = version.id
def _get_version_validation_result(
version: Version,
) -> tuple[Version.Status, list[dict[str, str]]]:
try:
validate(
_build_validatable_version_metadata(version),
schema_key='PublishedDandiset',
json_validation=True,
)
except dandischema.exceptions.ValidationError as e:
logger.info('Error while validating version %s', version.id)
return (Version.Status.INVALID, _collect_validation_errors(e))
except ValueError as e:
# A bare ValueError is thrown when dandischema generates its own exceptions, like a
# mismatched schemaVersion.
logger.info('Error while validating version %s', version.id)
return (Version.Status.INVALID, [{'field': '', 'message': str(e)}])

logger.info('Successfully validated version %s', version.id)
return (Version.Status.VALID, [])

version_id = version.id
logger.info('Validating dandiset metadata for version %s', version_id)

# Published versions are immutable
Expand All @@ -140,43 +160,23 @@ def _build_validatable_version_metadata(version: Version) -> dict:
with transaction.atomic():
# validating version metadata needs to lock the version to avoid racing with
# other modifications e.g. aggregate_assets_summary.
version = (
Version.objects.filter(id=version.id, status=Version.Status.PENDING)
.select_for_update()
.first()
)
version_qs = Version.objects.filter(id=version_id).select_for_update()
current_version = version_qs.first()

# It's possible for this version to get deleted during execution of this function.
# If that happens *before* the select_for_update query, return early.
if version is None:
if current_version is None:
logger.info('Version %s no longer exists, skipping validation', version_id)
return

version.status = Version.Status.VALIDATING
version.save()

try:
validate(
_build_validatable_version_metadata(version),
schema_key='PublishedDandiset',
json_validation=True,
# If the version has since been modified, return early
if current_version.status != Version.Status.PENDING:
logger.info(
'Skipping validation for version %s due to concurrent modification', version_id
)
except dandischema.exceptions.ValidationError as e:
logger.info('Error while validating version %s', version.id)
version.status = Version.Status.INVALID

validation_errors = _collect_validation_errors(e)
version.validation_errors = validation_errors
version.save()
return
except ValueError as e:
# A bare ValueError is thrown when dandischema generates its own exceptions, like a
# mismatched schemaVersion.
version.status = Version.Status.INVALID
version.validation_errors = [{'field': '', 'message': str(e)}]
version.save()
return
logger.info('Successfully validated version %s', version.id)
version.status = Version.Status.VALID
version.validation_errors = []
version.save()

# Set to validating and continue
version_qs.update(status=Version.Status.VALIDATING)
status, errors = _get_version_validation_result(current_version)
version_qs.update(status=status, validation_errors=errors, modified=timezone.now())
33 changes: 32 additions & 1 deletion dandiapi/api/tests/test_tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
from typing import TYPE_CHECKING

from django.conf import settings
from django.forms.models import model_to_dict
from django.utils import timezone
from guardian.shortcuts import assign_perm
import pytest

Expand Down Expand Up @@ -190,13 +192,42 @@ def test_validate_asset_metadata_saves_version(draft_asset: Asset, draft_version
def test_validate_version_metadata(draft_version: Version, asset: Asset):
draft_version.assets.add(asset)

tasks.validate_version_metadata_task(draft_version.id)
# Bypass .save to manually set an older timestamp
old_modified = timezone.now() - datetime.timedelta(minutes=10)
updated = Version.objects.filter(id=draft_version.id).update(modified=old_modified)
assert updated == 1

tasks.validate_version_metadata_task(draft_version.id)
draft_version.refresh_from_db()

assert draft_version.status == Version.Status.VALID
assert draft_version.validation_errors == []

# Ensure modified field was updated
assert draft_version.modified > old_modified


@pytest.mark.django_db()
def test_validate_version_metadata_no_side_effects(draft_version: Version, asset: Asset):
draft_version.assets.add(asset)

# Set the version `status` and `validation_errors` fields to something
# which can't be a result of validate_version_metadata
draft_version.status = Version.Status.PENDING
draft_version.validation_errors = [{'foo': 'bar'}]
draft_version.save()

# Validate version metadata, storing the model data before and after
old_data = model_to_dict(draft_version)
tasks.validate_version_metadata_task(draft_version.id)
draft_version.refresh_from_db()
new_data = model_to_dict(draft_version)

# Check that change is isolated to these two fields
assert old_data.pop('status') != new_data.pop('status')
assert old_data.pop('validation_errors') != new_data.pop('validation_errors')
assert old_data == new_data


@pytest.mark.django_db()
def test_validate_version_metadata_malformed_schema_version(draft_version: Version, asset: Asset):
Expand Down

0 comments on commit 4637cb3

Please sign in to comment.