diff --git a/dandiapi/api/services/metadata/__init__.py b/dandiapi/api/services/metadata/__init__.py index 022ee3379..f691b71b2 100644 --- a/dandiapi/api/services/metadata/__init__.py +++ b/dandiapi/api/services/metadata/__init__.py @@ -129,8 +129,28 @@ def _build_validatable_version_metadata(version: Version) -> dict: } return metadata_for_validation - version_id = version.id + def _get_version_validation_result( + version: Version, + ) -> tuple[Version.Status, list[dict[str, str]]]: + try: + validate( + _build_validatable_version_metadata(version), + schema_key='PublishedDandiset', + json_validation=True, + ) + except dandischema.exceptions.ValidationError as e: + logger.info('Error while validating version %s', version.id) + return (Version.Status.INVALID, _collect_validation_errors(e)) + except ValueError as e: + # A bare ValueError is thrown when dandischema generates its own exceptions, like a + # mismatched schemaVersion. + logger.info('Error while validating version %s', version.id) + return (Version.Status.INVALID, [{'field': '', 'message': str(e)}]) + logger.info('Successfully validated version %s', version.id) + return (Version.Status.VALID, []) + + version_id = version.id logger.info('Validating dandiset metadata for version %s', version_id) # Published versions are immutable @@ -140,43 +160,23 @@ def _build_validatable_version_metadata(version: Version) -> dict: with transaction.atomic(): # validating version metadata needs to lock the version to avoid racing with # other modifications e.g. aggregate_assets_summary. - version = ( - Version.objects.filter(id=version.id, status=Version.Status.PENDING) - .select_for_update() - .first() - ) + version_qs = Version.objects.filter(id=version_id).select_for_update() + current_version = version_qs.first() # It's possible for this version to get deleted during execution of this function. # If that happens *before* the select_for_update query, return early. - if version is None: + if current_version is None: logger.info('Version %s no longer exists, skipping validation', version_id) return - version.status = Version.Status.VALIDATING - version.save() - - try: - validate( - _build_validatable_version_metadata(version), - schema_key='PublishedDandiset', - json_validation=True, + # If the version has since been modified, return early + if current_version.status != Version.Status.PENDING: + logger.info( + 'Skipping validation for version %s due to concurrent modification', version_id ) - except dandischema.exceptions.ValidationError as e: - logger.info('Error while validating version %s', version.id) - version.status = Version.Status.INVALID - - validation_errors = _collect_validation_errors(e) - version.validation_errors = validation_errors - version.save() - return - except ValueError as e: - # A bare ValueError is thrown when dandischema generates its own exceptions, like a - # mismatched schemaVersion. - version.status = Version.Status.INVALID - version.validation_errors = [{'field': '', 'message': str(e)}] - version.save() return - logger.info('Successfully validated version %s', version.id) - version.status = Version.Status.VALID - version.validation_errors = [] - version.save() + + # Set to validating and continue + version_qs.update(status=Version.Status.VALIDATING) + status, errors = _get_version_validation_result(current_version) + version_qs.update(status=status, validation_errors=errors, modified=timezone.now()) diff --git a/dandiapi/api/tests/test_tasks.py b/dandiapi/api/tests/test_tasks.py index 15cdea730..80ea1d39e 100644 --- a/dandiapi/api/tests/test_tasks.py +++ b/dandiapi/api/tests/test_tasks.py @@ -5,6 +5,8 @@ from typing import TYPE_CHECKING from django.conf import settings +from django.forms.models import model_to_dict +from django.utils import timezone from guardian.shortcuts import assign_perm import pytest @@ -190,13 +192,42 @@ def test_validate_asset_metadata_saves_version(draft_asset: Asset, draft_version def test_validate_version_metadata(draft_version: Version, asset: Asset): draft_version.assets.add(asset) - tasks.validate_version_metadata_task(draft_version.id) + # Bypass .save to manually set an older timestamp + old_modified = timezone.now() - datetime.timedelta(minutes=10) + updated = Version.objects.filter(id=draft_version.id).update(modified=old_modified) + assert updated == 1 + tasks.validate_version_metadata_task(draft_version.id) draft_version.refresh_from_db() assert draft_version.status == Version.Status.VALID assert draft_version.validation_errors == [] + # Ensure modified field was updated + assert draft_version.modified > old_modified + + +@pytest.mark.django_db() +def test_validate_version_metadata_no_side_effects(draft_version: Version, asset: Asset): + draft_version.assets.add(asset) + + # Set the version `status` and `validation_errors` fields to something + # which can't be a result of validate_version_metadata + draft_version.status = Version.Status.PENDING + draft_version.validation_errors = [{'foo': 'bar'}] + draft_version.save() + + # Validate version metadata, storing the model data before and after + old_data = model_to_dict(draft_version) + tasks.validate_version_metadata_task(draft_version.id) + draft_version.refresh_from_db() + new_data = model_to_dict(draft_version) + + # Check that change is isolated to these two fields + assert old_data.pop('status') != new_data.pop('status') + assert old_data.pop('validation_errors') != new_data.pop('validation_errors') + assert old_data == new_data + @pytest.mark.django_db() def test_validate_version_metadata_malformed_schema_version(draft_version: Version, asset: Asset):