diff --git a/django/library/doi.py b/django/library/doi.py index 1ede754a0..e8f34c881 100644 --- a/django/library/doi.py +++ b/django/library/doi.py @@ -18,7 +18,7 @@ DataCiteRegistrationLog, ) -from datacite import DataCiteRESTClient, schema43 +from datacite import DataCiteRESTClient, schema45 from datacite.errors import ( DataCiteError, DataCiteNoContentError, @@ -183,8 +183,12 @@ def _datacite_heartbeat_url(self): def _validate_metadata(self, datacite_metadata: DataCiteSchema): metadata_dict = datacite_metadata.to_dict() - if not schema43.validate(metadata_dict): - logger.error("Invalid DataCite metadata: %s", metadata_dict) + try: + schema45.validator.validate(metadata_dict) + except Exception as e: + logger.error( + "Invalid DataCite metadata: %s", schema45.tostring(metadata_dict), e + ) raise DataCiteError(f"Invalid DataCite metadata: {metadata_dict}") return datacite_metadata, metadata_dict @@ -202,17 +206,22 @@ def mint_public_doi(self, codebase_or_release: Codebase | CodebaseRelease): return "XX.DRYXX/XXXX-XRUN", True if hasattr(codebase_or_release, "datacite"): del codebase_or_release.datacite - datacite_metadata, metadata_dict = self._validate_metadata( - codebase_or_release.datacite - ) + doi = "Unassigned" http_status = 200 message = "Minted new DOI successfully." + datacite_metadata = codebase_or_release.datacite + try: + datacite_metadata, metadata_dict = self._validate_metadata( + datacite_metadata + ) doi = self.datacite_client.public_doi( metadata_dict, url=codebase_or_release.permanent_url ) + codebase_or_release.doi = doi + codebase_or_release.save() except DataCiteError as e: logger.error(e) message = str(e) @@ -235,7 +244,27 @@ def mint_public_doi(self, codebase_or_release: Codebase | CodebaseRelease): self._save_log_record(**log_record_dict) return doi, http_status == 200 + @classmethod + def is_metadata_fresh(cls, codebase_or_release: Codebase | CodebaseRelease): + try: + newest_log_entry = DataCiteRegistrationLog.objects.latest_entry( + codebase_or_release + ) + # make sure item does not have stale datacite metadata + if hasattr(codebase_or_release, "datacite"): + del codebase_or_release.datacite + return newest_log_entry.metadata_hash == codebase_or_release.datacite.hash() + + except DataCiteRegistrationLog.DoesNotExist: + # no logs for this item, metadata is stale + logger.info("No registration logs available for %s", codebase_or_release) + + return False + def update_doi_metadata(self, codebase_or_release: Codebase | CodebaseRelease): + if self.is_metadata_fresh(codebase_or_release): + logger.info("No need to update DOI metadata for %s", codebase_or_release) + return True doi = codebase_or_release.doi if self.dry_run: logger.debug("DRY RUN") @@ -278,16 +307,10 @@ def update_doi_metadata(self, codebase_or_release: Codebase | CodebaseRelease): self._save_log_record(**log_record_dict) return http_status == 200 - def mint_new_doi_for_codebase(self, codebase: Codebase) -> str: - return self.mint_public_doi(codebase) - - def mint_new_doi_for_release(self, release: CodebaseRelease) -> str: - return self.mint_public_doi(release) - - def update_metadata_for_codebase(self, codebase: Codebase) -> bool: + def update_codebase_metadata(self, codebase: Codebase) -> bool: return self.update_doi_metadata(codebase) - def update_metadata_for_release(self, release: CodebaseRelease) -> bool: + def update_release_metadata(self, release: CodebaseRelease) -> bool: return self.update_doi_metadata(release) @staticmethod @@ -524,7 +547,7 @@ def mint_dois_for_peer_reviewed_releases_without_dois(interactive=True, dry_run= """ if not codebase_doi: # request to DataCite API - codebase_doi = datacite_api.mint_new_doi_for_codebase(codebase) + codebase_doi = datacite_api.mint_public_doi(codebase) if not codebase_doi: logger.error( @@ -544,7 +567,7 @@ def mint_dois_for_peer_reviewed_releases_without_dois(interactive=True, dry_run= Mint DOI for release """ # request to DataCite API - release_doi = datacite_api.mint_new_doi_for_release(release) + release_doi = datacite_api.mint_public_doi(release) if not release_doi: logger.error("Could not mint DOI for release %s. Skipping.", release.pk) if interactive: @@ -559,7 +582,7 @@ def mint_dois_for_peer_reviewed_releases_without_dois(interactive=True, dry_run= """ Since a new DOI has been minted for the release, we need to update it's parent's metadata (HasVersion) """ - ok = datacite_api.update_metadata_for_codebase(codebase) + ok = datacite_api.update_codebase_metadata(codebase) if not ok: logger.error("Failed to update metadata for codebase %s", codebase.pk) @@ -572,7 +595,7 @@ def mint_dois_for_peer_reviewed_releases_without_dois(interactive=True, dry_run= next_release = release.get_next_release() if previous_release and previous_release.doi: - ok = datacite_api.update_metadata_for_release(previous_release) + ok = datacite_api.update_release_metadata(previous_release) if not ok: logger.error( "Failed to update metadata for previous_release %s", @@ -580,7 +603,7 @@ def mint_dois_for_peer_reviewed_releases_without_dois(interactive=True, dry_run= ) if next_release and next_release.doi: - ok = datacite_api.update_metadata_for_release(next_release) + ok = datacite_api.update_release_metadata(next_release) if not ok: logger.error( "Failed to update metadata for next_release %s", next_release.pk @@ -619,7 +642,7 @@ def mint_dois_for_peer_reviewed_releases_without_dois(interactive=True, dry_run= if invalid_codebases: logger.error( "FAILURE: %s Codebases with invalid or missing DOIs: %s", - invalid_codebases.count(), + len(invalid_codebases), invalid_codebases, ) else: diff --git a/django/library/management/commands/clean_peer_reviewed_dois_02.py b/django/library/management/commands/clean_peer_reviewed_dois_02.py deleted file mode 100644 index 44ca82a29..000000000 --- a/django/library/management/commands/clean_peer_reviewed_dois_02.py +++ /dev/null @@ -1,64 +0,0 @@ -import logging -from django.core.management.base import BaseCommand -from library.doi import VERIFICATION_MESSAGE, get_welcome_message -from library.models import CodebaseRelease - -logger = logging.getLogger(__name__) - - -def remove_dois_from_unreviewed_releases(interactive=True, dry_run=True): - print(get_welcome_message(dry_run)) - - unreviewed_releases_with_dois = CodebaseRelease.objects.filter( - peer_reviewed=False, doi__isnull=False - ) - total_unreviewed_releases_with_dois = unreviewed_releases_with_dois.count() - - logger.info( - "Cleaning up DOIs for %s unreviewed CodebaseReleases with DOIs", - total_unreviewed_releases_with_dois, - ) - if interactive: - confirm = input( - "Deleting all DOIs for unreviewed CodebaseReleases. Enter 'DELETE' to continue or CTRL+C to quit: " - ) - if confirm.lower() == "delete": - unreviewed_releases_with_dois.update(doi=None) - - """ - assert correctness - """ - if not dry_run: - print(VERIFICATION_MESSAGE) - logger.info( - "Checking that DOIs for all not peer reviewed releases have been deleted..." - ) - assert ( - CodebaseRelease.objects.filter( - peer_reviewed=False, doi__isnull=False - ).count() - == 0 - ) - logger.info( - "All DOIs from not peer_reviewed CodebaseReleases %s with DOIs deleted successfully.", - total_unreviewed_releases_with_dois, - ) - - -class Command(BaseCommand): - - def add_arguments(self, parser): - parser.add_argument( - "--interactive", - action="store_true", - help="Wait for user to press enter to continue.", - default=True, - ) - parser.add_argument( - "--dry-run", action="store_true", help="Output what would have happened." - ) - - def handle(self, *args, **options): - interactive = options["interactive"] - dry_run = options["dry_run"] - remove_dois_from_unreviewed_releases(interactive, dry_run) diff --git a/django/library/management/commands/delete_all_existing_codebase_dois_01.py b/django/library/management/commands/delete_all_existing_codebase_dois_01.py deleted file mode 100644 index 613bc02e1..000000000 --- a/django/library/management/commands/delete_all_existing_codebase_dois_01.py +++ /dev/null @@ -1,62 +0,0 @@ -import csv -import logging -import sys -from django.core.management.base import BaseCommand -from library.doi import VERIFICATION_MESSAGE, get_welcome_message -from library.models import Codebase - -logger = logging.getLogger(__name__) - - -def remove_existing_codebase_dois(interactive=True, dry_run=True): - print(get_welcome_message(dry_run)) - codebases_with_dois = Codebase.objects.exclude(doi__isnull=True) - - logger.info( - f"Removing DOIs for {len(codebases_with_dois)} Codebases. Query: Codebase.objects.exclude(doi__isnull=True) ..." - ) - if interactive and codebases_with_dois.exists(): - confirm = input( - "WARNING: this will remove all existing codebase DOIs and is unrecoverable. Type 'DELETE' to continue or Ctrl+C to quit: " - ) - if confirm.lower() == "delete": - with open("codebases_with_dois.csv", "w") as f: - writer = csv.writer(f) - writer.writerow(["Codebase ID", "Codebase DOI"]) - for codebase in codebases_with_dois: - writer.writerow([codebase.pk, codebase.doi]) - codebases_with_dois.update(doi=None) - else: - logger.info("Aborting.") - sys.exit() - - logger.info( - "All DOIs from {len(codebases_with_dois)} codebases deleted successfully." - ) - - """ - assert correctness - """ - if not dry_run: - print(VERIFICATION_MESSAGE) - assert Codebase.objects.filter(doi__isnull=False).count() == 0 - logger.info("Success. All existing codebase DOIs deleted.") - - -class Command(BaseCommand): - - def add_arguments(self, parser): - parser.add_argument( - "--interactive", - action="store_true", - help="Wait for user to press enter to continue.", - default=True, - ) - parser.add_argument( - "--dry-run", action="store_true", help="Output what would have happened." - ) - - def handle(self, *args, **options): - interactive = options["interactive"] - dry_run = options["dry_run"] - remove_existing_codebase_dois(interactive, dry_run) diff --git a/django/library/management/commands/fix_existing_dois_03.py b/django/library/management/commands/doi_mint_parent_codebases.py similarity index 95% rename from django/library/management/commands/fix_existing_dois_03.py rename to django/library/management/commands/doi_mint_parent_codebases.py index 8a4563ce4..f9f09c2c5 100644 --- a/django/library/management/commands/fix_existing_dois_03.py +++ b/django/library/management/commands/doi_mint_parent_codebases.py @@ -29,7 +29,7 @@ def update_existing_dois(interactive=True, dry_run=True): total_peer_reviewed_releases_count = peer_reviewed_releases.count() logger.info( - "Updating DOIs for %s peer reviewed CodebaseReleases with DOIs", + "Updating DOIs for parent Codebases of %s peer reviewed CodebaseReleases with DOIs", total_peer_reviewed_releases_count, ) @@ -56,7 +56,7 @@ def update_existing_dois(interactive=True, dry_run=True): if not codebase_doi: # request to DataCite API logger.debug("Minting DOI for parent codebase: %s", codebase.pk) - codebase_doi, success = datacite_api.mint_new_doi_for_codebase(codebase) + codebase_doi, success = datacite_api.mint_public_doi(codebase) if not success: logger.error( @@ -108,7 +108,7 @@ def update_existing_dois(interactive=True, dry_run=True): release_doi, ) # set up DataCite API request to mint new DOI - release_doi, success = datacite_api.mint_new_doi_for_release(release) + release_doi, success = datacite_api.mint_public_doi(release) if not success: logger.error( "Could not mint DOI for release %s. DOI: %s. Skipping.", @@ -139,7 +139,7 @@ def update_existing_dois(interactive=True, dry_run=True): release_doi, ) # request to DataCite API: mint new DOI! - release_doi, success = datacite_api.mint_new_doi_for_release(release) + release_doi, success = datacite_api.mint_public_doi(release) if not success: logger.error( "Could not mint DOI for release %s. DOI: %s. Skipping.", @@ -214,7 +214,7 @@ def add_arguments(self, parser): "--interactive", action="store_true", help="Wait for user to press enter to continue.", - default=True, + default=False, ) parser.add_argument( "--dry-run", action="store_true", help="Output what would have happened." diff --git a/django/library/management/commands/doi_reset_production.py b/django/library/management/commands/doi_reset_production.py new file mode 100644 index 000000000..7c7fd966e --- /dev/null +++ b/django/library/management/commands/doi_reset_production.py @@ -0,0 +1,97 @@ +import csv +import logging +import sys +from django.core.management.base import BaseCommand +from library.doi import VERIFICATION_MESSAGE, get_welcome_message, DataCiteApi +from library.models import Codebase, CodebaseRelease + +logger = logging.getLogger(__name__) + + +def cleanup_existing_dois(interactive=True, dry_run=True): + print(get_welcome_message(dry_run)) + + api = DataCiteApi(dry_run=dry_run) + codebases_with_dois = Codebase.objects.with_doi() + logger.info("Removing all Codebase DOIs") + if interactive and codebases_with_dois.exists(): + confirm = input( + "WARNING: this will remove all existing codebase DOIs and is unrecoverable. Type 'DELETE' to continue or Ctrl+C to quit: " + ) + if not confirm.lower() == "delete": + logger.info("Aborting.") + sys.exit() + + """ + assert correctness + """ + if not dry_run: + print(VERIFICATION_MESSAGE) + with open("codebases_with_dois.csv", "w") as f: + writer = csv.writer(f) + writer.writerow(["Codebase ID", "Codebase DOI"]) + for codebase in codebases_with_dois: + writer.writerow([codebase.pk, codebase.doi]) + codebases_with_dois.update(doi=None) + assert not Codebase.objects.with_doi().exists() + logger.info("Success. All existing codebase DOIs deleted.") + + # clean up unreviewed release DOIs + + unreviewed_releases_with_dois = CodebaseRelease.objects.unreviewed().with_doi() + total_unreviewed_releases_with_dois = unreviewed_releases_with_dois.count() + logger.info( + "Removing %s unreviewed CodebaseRelease DOIs", + total_unreviewed_releases_with_dois, + ) + if interactive: + confirm = input( + f"Deleting all DOIs for {total_unreviewed_releases_with_dois} unreviewed CodebaseReleases. Enter 'DELETE' to continue or CTRL+C to quit: " + ) + if not confirm.lower() == "delete": + logger.debug("Aborting...") + sys.exit() + + if not dry_run: + with open("unreviewed_releases_with_dois.csv", "w") as f: + writer = csv.writer(f) + writer.writerow(["CodebaseRelease ID", "CodebaseRelease DOI"]) + for release in unreviewed_releases_with_dois: + writer.writerow([release.pk, release.doi]) + unreviewed_releases_with_dois.update(doi=None) + + reviewed_releases_without_dois = ( + CodebaseRelease.objects.reviewed().public().without_doi() + ) + invalid_releases = [] + for release in reviewed_releases_without_dois: + try: + api.mint_public_doi(release) + except Exception as e: + logger.error("Error minting DOI for release %s", release) + invalid_releases.append((release, e)) + + for release, error in invalid_releases: + with open("invalid_releases.csv", "w") as f: + writer = csv.writer(f) + writer.writerow(["CodebaseRelease ID", "Reason", "Datacite Metadata"]) + writer.writerow([release.pk, error, release.datacite.to_dict()]) + + +class Command(BaseCommand): + + def add_arguments(self, parser): + parser.add_argument( + "--interactive", + action="store_true", + help="Wait for user to press enter to continue.", + default=True, + ) + parser.add_argument( + "--dry-run", action="store_true", help="Output what would have happened." + ) + + def handle(self, *args, **options): + interactive = options["interactive"] + dry_run = options["dry_run"] + cleanup_existing_dois(interactive, dry_run) diff --git a/django/library/management/commands/doi_reset_staging.py b/django/library/management/commands/doi_reset_staging.py new file mode 100644 index 000000000..bddb8f060 --- /dev/null +++ b/django/library/management/commands/doi_reset_staging.py @@ -0,0 +1,83 @@ +import csv +import logging +import sys +from django.conf import settings +from django.core.management.base import BaseCommand +from library.doi import VERIFICATION_MESSAGE, get_welcome_message, DataCiteApi +from library.models import Codebase, CodebaseRelease + +logger = logging.getLogger(__name__) + + +def reset_all_dois(interactive=True, dry_run=True): + print(get_welcome_message(dry_run)) + if settings.DEPLOY_ENVIRONMENT.is_production: + logger.error("This command is not allowed in production.") + sys.exit() + logger.info("(ENV: %s) Removing all DOIs", settings.DEPLOY_ENVIRONMENT) + releases_with_dois = CodebaseRelease.objects.with_doi() + codebases_with_dois = Codebase.objects.with_doi() + confirm = input( + "WARNING: this will remove ALL existing DOIs and is unrecoverable. Type 'DELETE' to continue or Ctrl+C to quit: " + ) + if confirm.lower() == "delete": + with open("deleted_codebase_dois.csv", "w") as f: + writer = csv.writer(f) + writer.writerow(["Codebase ID", "Codebase DOI"]) + for codebase in codebases_with_dois: + writer.writerow([codebase.pk, codebase.doi]) + Codebase.objects.update(doi=None) + with open("deleted_release_dois.csv", "w") as f: + writer = csv.writer(f) + writer.writerow(["CodebaseRelease ID", "CodebaseRelease DOI"]) + for release in releases_with_dois: + writer.writerow([release.pk, release.doi]) + CodebaseRelease.objects.update(doi=None) + else: + logger.info("Aborting.") + sys.exit() + + """ + assert correctness + """ + if not dry_run: + print(VERIFICATION_MESSAGE) + assert Codebase.objects.with_doi().count() == 0 + assert CodebaseRelease.objects.with_doi().count() == 0 + logger.info("Success. All existing codebase DOIs deleted.") + + """ Mint DOIs for all new Peer Reviewed Releases""" + peer_reviewed_releases = CodebaseRelease.objects.reviewed().public() + datacite_api = DataCiteApi(dry_run=dry_run) + invalid_releases = [] + for release in peer_reviewed_releases: + try: + datacite_api.mint_public_doi(release) + except Exception as e: + logger.error("Error minting DOI for release %s", release) + invalid_releases.append((release, e)) + + for release, error in invalid_releases: + with open("invalid_releases.csv", "w") as f: + writer = csv.writer(f) + writer.writerow(["CodebaseRelease ID", "Reason", "Datacite Metadata"]) + writer.writerow([release.pk, error, release.datacite.to_dict()]) + + +class Command(BaseCommand): + + def add_arguments(self, parser): + parser.add_argument( + "--interactive", + action="store_true", + help="Wait for user to press enter to continue.", + default=True, + ) + parser.add_argument( + "--dry-run", action="store_true", help="Output what would have happened." + ) + + def handle(self, *args, **options): + interactive = options["interactive"] + dry_run = options["dry_run"] + reset_all_dois(interactive, dry_run) diff --git a/django/library/management/commands/update_metadata_for_all_existing_dois_04.py b/django/library/management/commands/doi_update_metadata.py similarity index 52% rename from django/library/management/commands/update_metadata_for_all_existing_dois_04.py rename to django/library/management/commands/doi_update_metadata.py index 1a691ca24..07840519e 100644 --- a/django/library/management/commands/update_metadata_for_all_existing_dois_04.py +++ b/django/library/management/commands/doi_update_metadata.py @@ -1,7 +1,7 @@ import logging from django.core.management.base import BaseCommand -from library.models import CodebaseRelease, Codebase, DataciteRegistrationLog +from library.models import CodebaseRelease, Codebase, DataCiteRegistrationLog from library.doi import DataCiteApi, VERIFICATION_MESSAGE, get_welcome_message logger = logging.getLogger(__name__) @@ -12,10 +12,11 @@ def update_doi_metadata(interactive=True, dry_run=True): datacite_api = DataCiteApi(dry_run=dry_run) all_codebases_with_dois = Codebase.objects.with_doi() + total_number_of_codebases_with_dois = all_codebases_with_dois.count() logger.info( "Updating metadata for all codebases (%s) with DOIs and their releases with DOIs. ...", - all_codebases_with_dois.count(), + total_number_of_codebases_with_dois, ) for i, codebase in enumerate(all_codebases_with_dois): @@ -23,18 +24,14 @@ def update_doi_metadata(interactive=True, dry_run=True): "Processing codebase %s - %s/%s", codebase.pk, i + 1, - all_codebases_with_dois.count(), + total_number_of_codebases_with_dois, ) if interactive: input("Press Enter to continue or CTRL+C to quit...") - if DataciteRegistrationLog.is_metadata_stale(codebase): - logger.debug("Metadata is stale. Updating metadata in DataCite...") - ok = datacite_api.update_metadata_for_codebase(codebase) - if not ok: - logger.error("Failed to update metadata for codebase {codebase.pk}") - else: - logger.debug("Metadata for codebase {codebase.pk} is in sync!") + ok = datacite_api.update_codebase_metadata(codebase) + if not ok: + logger.error("Failed to update metadata for codebase {codebase.pk}") for j, release in enumerate(codebase.releases.all()): logger.debug( @@ -47,60 +44,49 @@ def update_doi_metadata(interactive=True, dry_run=True): input("Press Enter to continue or CTRL+C to quit...") if release.peer_reviewed and release.doi: - if DataciteRegistrationLog.is_metadata_stale(release): - logger.debug("Metadata is stale. Updating metadata in DataCite...") - ok = datacite_api.update_metadata_for_release(release) - if not ok: - logger.error( - "Failed to update metadata for release %s", release.pk - ) - else: - logger.debug("Metadata for release %s is synced", release.pk) + ok = datacite_api.update_release_metadata(release) + if not ok: + logger.error("Failed to update metadata for release %s", release.pk) else: - if not release.doi: - logger.warning("Release has no DOI") - if not release.peer_reviewed: - logger.warning("Release is not peer reviewed") + logger.debug("Skipping unreviewed / no DOI release %s", release.pk) - logger.info("Metadata updated for all existing (Codebase & CodebaseRelease) DOIs.") + logger.info("Metadata updated for all existing Codebase + CodebaseRelease DOIs.") """ assert correctness """ if not dry_run: print(VERIFICATION_MESSAGE) - logger.info("Checking that Comses metadata is in sync with DataCite...") - invalid_codebases = [] + logger.info("Checking that local metadata is in sync with DataCite...") invalid_releases = [] results = datacite_api.threaded_metadata_check(all_codebases_with_dois) - for pk, is_meta_valid in results: - if not is_meta_valid: - invalid_codebases.append(pk) - + invalid_codebases = [ + pk for pk, is_valid_metadata in results if not is_valid_metadata + ] if invalid_codebases: logger.error( - "Failure. Metadata not in sync with DataCite for %s codebases: %s", - invalid_codebases.count(), - invalid_codebases, + "FAILURE: Metadata not in sync with DataCite for %s codebases", + len(invalid_codebases), ) else: logger.info( - "Success. Metadata in sync with DataCite for all codebases with DOI." + "SUCCESS: Metadata in sync with DataCite for all codebases with DOI." ) all_releases_with_dois = CodebaseRelease.objects.with_doi() results = datacite_api.threaded_metadata_check(all_releases_with_dois) - for pk, is_meta_valid in results: - if not is_meta_valid: - invalid_releases.append(pk) - + invalid_releases = [ + pk for pk, is_valid_metadata in results if not is_valid_metadata + ] if invalid_releases: logger.error( - f"Failure. Metadata not in sync with DataCite for {len(invalid_releases)} releases: {invalid_releases}" + "FAILURE: Metadata not in sync with DataCite for %s releases: %s", + len(invalid_releases), + invalid_releases, ) else: logger.info( - f"Success. Metadata in sync with DataCite for all releases with DOI." + "SUCCESS: Metadata in sync with DataCite for all releases with DOI." ) @@ -111,6 +97,7 @@ def add_arguments(self, parser): "--interactive", action="store_true", help="Wait for user to press enter to continue.", + default=False, ) parser.add_argument( "--dry-run", action="store_true", help="Output what would have happened." diff --git a/django/library/management/commands/sync_doi_metadata.py b/django/library/management/commands/sync_doi_metadata.py index 432d2740d..10c0a4cbe 100644 --- a/django/library/management/commands/sync_doi_metadata.py +++ b/django/library/management/commands/sync_doi_metadata.py @@ -29,15 +29,12 @@ def update_stale_metadata_for_all_codebases_with_dois(interactive=True, dry_run= if interactive: input("Press Enter to continue or CTRL+C to quit...") - if DataciteRegistrationLog.is_metadata_stale(codebase): - logger.debug("Metadata is stale. Updating metadata in DataCite...") - success = datacite_api.update_metadata_for_codebase(codebase) - if not success: - logger.error("Failed to update metadata for codebase %s", codebase.pk) - else: - logger.debug("Metadata successfully updated.") + logger.debug("Metadata is stale. Updating metadata in DataCite...") + success = datacite_api.update_codebase_metadata(codebase) + if not success: + logger.error("Failed to update metadata for codebase %s", codebase.pk) else: - logger.debug("Metadata is in sync. Skipping...") + logger.debug("Metadata successfully updated.") logger.info("Updated all codebases with stale metadata.") """ @@ -79,15 +76,12 @@ def update_stale_metadata_for_all_releases_with_dois(interactive=True, dry_run=T if interactive: input("Press Enter to continue or CTRL+C to quit...") - if DataciteRegistrationLog.is_metadata_stale(release): - logger.debug("Metadata is stale. Updating metadata in DataCite...") - ok = datacite_api.update_metadata_for_release(release) - if not ok: - logger.error("Failed to update metadata for release %s", release.pk) - else: - logger.debug("Metadata successfully updated.") + logger.debug("Metadata is stale. Updating metadata in DataCite...") + ok = datacite_api.update_release_metadata(release) + if not ok: + logger.error("Failed to update metadata for release %s", release.pk) else: - logger.debug("Metadata is up-to-date. Skipping...") + logger.debug("Metadata successfully updated.") if interactive: input("Press Enter to continue or CTRL+C to quit...") diff --git a/django/library/migrations/0031_dataciteregistrationlog_and_more.py b/django/library/migrations/0031_dataciteregistrationlog_and_more.py new file mode 100644 index 000000000..ef45abe60 --- /dev/null +++ b/django/library/migrations/0031_dataciteregistrationlog_and_more.py @@ -0,0 +1,96 @@ +# Generated by Django 4.2.16 on 2024-10-29 21:28 + +from django.db import migrations, models +import django.db.models.deletion + + +class Migration(migrations.Migration): + + dependencies = [ + ("library", "0030_peerreviewinvitation"), + ] + + operations = [ + migrations.CreateModel( + name="DataCiteRegistrationLog", + fields=[ + ( + "id", + models.AutoField( + auto_created=True, + primary_key=True, + serialize=False, + verbose_name="ID", + ), + ), + ( + "action", + models.CharField( + choices=[ + ("CREATE_RELEASE_DOI", "create release DOI"), + ("CREATE_CODEBASE_DOI", "create codebase DOI"), + ("UPDATE_RELEASE_METADATA", "update release metadata"), + ("UPDATE_CODEBASE_METADATA", "update codebase metadata"), + ], + max_length=50, + ), + ), + ("timestamp", models.DateTimeField(auto_now_add=True)), + ("http_status", models.IntegerField(default=None, null=True)), + ("message", models.TextField(default=None, null=True)), + ("metadata_hash", models.CharField(max_length=255)), + ("doi", models.CharField(blank=True, max_length=255, null=True)), + ], + ), + migrations.AlterModelOptions( + name="peerreviewinvitation", + options={"ordering": ["-date_sent"]}, + ), + migrations.RemoveField( + model_name="contributor", + name="affiliations", + ), + migrations.AlterField( + model_name="codebase", + name="date_created", + field=models.DateTimeField(auto_now_add=True), + ), + migrations.AlterField( + model_name="codebaserelease", + name="date_created", + field=models.DateTimeField(auto_now_add=True), + ), + migrations.AlterField( + model_name="codebasereleasedownload", + name="date_created", + field=models.DateTimeField(auto_now_add=True), + ), + migrations.AlterField( + model_name="peerreviewinvitation", + name="date_sent", + field=models.DateTimeField(auto_now=True), + ), + migrations.DeleteModel( + name="ContributorAffiliation", + ), + migrations.AddField( + model_name="dataciteregistrationlog", + name="codebase", + field=models.ForeignKey( + null=True, + on_delete=django.db.models.deletion.CASCADE, + related_name="datacite_logs", + to="library.codebase", + ), + ), + migrations.AddField( + model_name="dataciteregistrationlog", + name="release", + field=models.ForeignKey( + null=True, + on_delete=django.db.models.deletion.CASCADE, + related_name="datacite_logs", + to="library.codebaserelease", + ), + ), + ] diff --git a/django/library/models.py b/django/library/models.py index da4d784b7..86085fc77 100644 --- a/django/library/models.py +++ b/django/library/models.py @@ -7,8 +7,8 @@ import uuid from abc import ABC -from collections import OrderedDict -from datetime import date, datetime, timedelta +from collections import OrderedDict, defaultdict +from datetime import date, timedelta from typing import List from django.conf import settings @@ -171,7 +171,10 @@ def affiliations(self): @property def affiliation_ror_ids(self): - return [affiliation.get("ror_id") for affiliation in self.json_affiliations] + return [ + {"name": affiliation.get("name"), "ror_id": affiliation.get("ror_id")} + for affiliation in self.json_affiliations + ] @cached_property def json_affiliations_string(self): @@ -187,24 +190,12 @@ def to_affiliation_string(cls, afl): # e.g., "Arizona State University https://www.asu.edu ASU" return f"{afl.get('name')} {afl.get('url')} {afl.get('acronym')}" - @property - def codemeta_affiliation(self): - """ - For now codemeta affiliations appear to be a single https://schema.org/Organization - """ - if self.json_affiliations: - return CodeMetaSchema.convert_affiliation(self.json_affiliations[0]) - @property def primary_affiliation(self): return self.json_affiliations[0] if self.json_affiliations else {} @property def primary_affiliation_name(self): - return self.primary_json_affiliation_name - - @property - def primary_json_affiliation_name(self): return self.json_affiliations[0]["name"] if self.json_affiliations else "" @staticmethod @@ -1132,6 +1123,9 @@ def accessible(self, user): def reviewed(self, **kwargs): return self.filter(peer_reviewed=True, **kwargs) + def unreviewed(self, **kwargs): + return self.exclude(peer_reviewed=True).filter(**kwargs) + def with_doi(self, **kwargs): return self.exclude(Q(doi__isnull=True) | Q(doi="")).filter(**kwargs) @@ -2594,15 +2588,16 @@ def __init__(self, release: CodebaseRelease): ] if release.live: - # should not generate CodeMeta or DataCite for non-published releases self.first_published = release.first_published_at.date() self.last_published = release.last_published_on.date() - self.copyright_year = self.last_published.year else: - # FIXME: default values? - self.first_published = self.last_published = self.copyright_year = ( - date.today() + # FIXME: default to today for unpublished releases + # should not generate CodeMeta or DataCite for non-published releases but CodeMeta is generated even for unpublished + logger.warning( + "Generating CommonMetadata for an unpublished release: %s", release ) + self.first_published = self.last_published = date.today() + self.copyright_year = self.last_published.year if release.license: self.license = release.license else: @@ -2759,18 +2754,24 @@ def convert_authors(cls, common_metadata: CommonMetadata): ] @classmethod - def convert_ror_affiliation(cls, affiliation: dict): + def convert_affiliation(cls, affiliation: dict): + codemeta_affiliation = {} if affiliation: - return { + codemeta_affiliation = { # FIXME: may switch to https://schema.org/ResearchOrganization at some point "@type": "Organization", - "@id": affiliation.get("ror_id"), "name": affiliation.get("name"), "url": affiliation.get("url"), - "identifier": affiliation.get("ror_id"), - "sameAs": affiliation.get("ror_id"), } - return {} + if affiliation.get("ror_id"): + codemeta_affiliation.update( + { + "@id": affiliation.get("ror_id"), + "identifier": affiliation.get("ror_id"), + "sameAs": affiliation.get("ror_id"), + } + ) + return codemeta_affiliation @classmethod def convert_contributor(cls, contributor: Contributor): @@ -2782,8 +2783,8 @@ def convert_contributor(cls, contributor: Contributor): } if contributor.orcid_url: codemeta["@id"] = contributor.orcid_url - if contributor.json_affiliations: - codemeta["affiliation"] = cls.convert_ror_affiliation( + if contributor.affiliations: + codemeta["affiliation"] = cls.convert_affiliation( contributor.primary_affiliation ) if contributor.email: @@ -2837,8 +2838,9 @@ class DataCiteSchema(ABC): COMSES_PUBLISHER = { "publisherIdentifier": CommonMetadata.COMSES_ORGANIZATION["ror_id"], "publisherIdentifierScheme": "ROR", - "schemeURI": "https://ror.org", + "schemeUri": "https://ror.org", "name": CommonMetadata.COMSES_ORGANIZATION["name"], + "lang": "en", } INITIAL_DATA = { @@ -2884,32 +2886,47 @@ def convert_release_contributor(cls, release_contributor: ReleaseContributor): nameType="Personal", givenName=contributor.given_name, familyName=contributor.family_name, - creatorName=f"{contributor.family_name}, {contributor.given_name}", + name=f"{contributor.family_name}, {contributor.given_name}", ) if contributor.orcid_url: creator.update( - nameIdentifier=contributor.orcid_url, - nameIdentifierScheme="ORCID", - schemeURI="https://orcid.org", + nameIdentifiers=[ + { + "nameIdentifier": contributor.orcid_url, + "nameIdentifierScheme": "ORCID", + "schemeUri": "https://orcid.org", + } + ] ) else: creator.update(nameType="Organizational", creatorName=contributor.name) # check for ROR affiliations or freetext: https://datacite-metadata-schema.readthedocs.io/en/4.5/properties/creator/#affiliation - affiliations = contributor.json_affiliations + affiliations = contributor.affiliations if affiliations: - ror_ids = contributor.affiliation_ror_ids - if ror_ids: - # set affiliationIdentifier to first ROR ID - creator.update( - affiliationIdentifier=ror_ids[0], + creator_affiliations = [ + cls.convert_affiliation(a) for a in affiliations if a + ] + creator.update(affiliation=creator_affiliations) + return creator + + @classmethod + def convert_affiliation(cls, affiliation: dict): + """ + Converts a CoMSES affiliation dict to a DataCite affiliation dict + """ + datacite_affiliation = {} + if affiliation.get("name"): + datacite_affiliation = { + "name": affiliation.get("name"), + } + # FIXME: should we validate the ror id + if affiliation.get("ror_id"): + affiliation.update( + affiliationIdentifier=affiliation.get("ror_id"), affiliationIdentifierScheme="ROR", - schemeURI="https://ror.org", ) - else: - # otherwise set to the first affiliation freetext name - creator.update(affiliation=contributor.primary_affiliation_name) - return creator + return datacite_affiliation @classmethod def to_citable_authors(cls, release_contributors): @@ -2932,44 +2949,44 @@ def to_contributors(cls, common_metadata: CommonMetadata): nonauthor_contributors = common_metadata.release_contributor_nonauthors contributors = [ - # FIXME: probably not the right way to bootstrap non author contributors - # perhaps this should be the provider institution, e.g., CML ROR + # bootstrap non author contributors { - "contributorName": common_metadata.code_repository, - "contributorType": "hostingInstitution", - } + "name": common_metadata.code_repository, + "contributorType": "HostingInstitution", + }, + { + "name": common_metadata.COMSES_ORGANIZATION["name"], + "contributorType": "Editor", + }, ] if nonauthor_contributors: - role_mapping = { - "copyrightHolder": "RightsHolder", - "editor": "Editor", - "funder": "Sponsor", - "pointOfContact": "ContactPerson", - "resourceProvider": "Distributor", - } - + role_mapping = defaultdict( + lambda: "Other", + { + "copyrightHolder": "RightsHolder", + "editor": "Editor", + "funder": "Sponsor", + "pointOfContact": "ContactPerson", + "resourceProvider": "Distributor", + }, + ) + has_other_role_already = False for release_contributor in nonauthor_contributors: - # FIXME: what is other_role_added for? - other_role_added = False for role in release_contributor.roles: contributor_type = role_mapping.get(role, "Other") - if contributor_type == "Other" and not other_role_added: - contributors.append( - { - "contributorName": release_contributor.contributor.name, - "contributorType": "Other", - } - ) - other_role_added = True - elif contributor_type != "Other": - contributors.append( - { - "contributorName": release_contributor.contributor.name, - "contributorType": contributor_type, - } - ) - + # only allow a single Other role per contributor + if contributor_type == "Other": + if has_other_role_already: + continue + else: + has_other_role_already = True + contributors.append( + { + "name": release_contributor.contributor.name, + "contributorType": contributor_type, + } + ) return contributors def to_dict(self): @@ -3012,17 +3029,16 @@ def convert(cls, common_metadata: CommonMetadata): common_metadata.release_contributor_authors ), "descriptions": common_metadata.descriptions, - "publicationYear": common_metadata.copyright_year, + "publicationYear": str(cls.to_publication_year(common_metadata)), "titles": [{"title": common_metadata.name}], "version": common_metadata.version, - "codeRepository": common_metadata.code_repository, "contributors": cls.to_contributors(common_metadata), "subjects": cls.convert_keywords(common_metadata), "rightsList": [ { "rights": common_metadata.license.name, "rightsIdentifier": common_metadata.license.name, - "rightsURI": common_metadata.license.url, + "rightsUri": common_metadata.license.url, } ], } @@ -3109,10 +3125,10 @@ def convert(cls, codebase: Codebase): "descriptionType": "Abstract", } ], - "publicationYear": codebase.publication_year, + "publicationYear": str(codebase.publication_year), } - """ + """ Set codebase relatedIdentifiers """ @@ -3207,20 +3223,6 @@ class DataCiteRegistrationLog(models.Model): objects = DataCiteRegistrationLogQuerySet.as_manager() - @classmethod - def is_metadata_stale(cls, item): - try: - newest_log_entry = DataCiteRegistrationLog.objects.latest_entry(item) - # make sure item does not have stale datacite metadata - del item.datacite - return newest_log_entry.metadata_hash != item.datacite.hash() - - except DataCiteRegistrationLog.DoesNotExist: - # no logs for this item, metadata is stale - logger.info("No registration logs available for this item %s", item) - - return True - @property def codebase_or_release_id(self): if self.codebase: diff --git a/django/library/serializers.py b/django/library/serializers.py index 510cce1aa..0707e1517 100644 --- a/django/library/serializers.py +++ b/django/library/serializers.py @@ -193,7 +193,7 @@ class Meta: "user", "type", "json_affiliations", - "primary_json_affiliation_name", + "primary_affiliation_name", "profile_url", ) diff --git a/django/requirements.txt b/django/requirements.txt index b5a7e1356..d67dbd045 100644 --- a/django/requirements.txt +++ b/django/requirements.txt @@ -1,6 +1,6 @@ bagit==1.8.1 bleach==6.1.0 -datacite==1.1.4 +datacite==1.2.0 dedupe==3.0.2 django-allauth==0.63.6 django-anymail[mailgun]==10.3 @@ -24,7 +24,7 @@ django-vite==2.1.3 # latest is 3.0.4 django-waffle==4.1.0 djangorestframework==3.15.2 djangorestframework-camel-case==1.4.2 -Django==4.2.15 +Django==4.2.16 elasticsearch-dsl>=7.0.0,<8.0.0 elasticsearch>=7.0.0,<8.0.0 html2text>=2016.9.19 diff --git a/frontend/src/components/UserSearch.vue b/frontend/src/components/UserSearch.vue index 28e106453..439400e98 100644 --- a/frontend/src/components/UserSearch.vue +++ b/frontend/src/components/UserSearch.vue @@ -51,8 +51,8 @@ ({{ option.email }}) - , {{ option.primaryJsonAffiliationName }}, {{ option.primaryAffiliationName }}