diff --git a/django/library/doi.py b/django/library/doi.py index 1ede754a0..6fdc092e2 100644 --- a/django/library/doi.py +++ b/django/library/doi.py @@ -18,7 +18,7 @@ DataCiteRegistrationLog, ) -from datacite import DataCiteRESTClient, schema43 +from datacite import DataCiteRESTClient, schema45 from datacite.errors import ( DataCiteError, DataCiteNoContentError, @@ -183,8 +183,12 @@ def _datacite_heartbeat_url(self): def _validate_metadata(self, datacite_metadata: DataCiteSchema): metadata_dict = datacite_metadata.to_dict() - if not schema43.validate(metadata_dict): - logger.error("Invalid DataCite metadata: %s", metadata_dict) + try: + schema45.validator.validate(metadata_dict) + except Exception: + logger.error( + "Invalid DataCite metadata: %s", schema45.tostring(metadata_dict) + ) raise DataCiteError(f"Invalid DataCite metadata: {metadata_dict}") return datacite_metadata, metadata_dict @@ -202,14 +206,17 @@ def mint_public_doi(self, codebase_or_release: Codebase | CodebaseRelease): return "XX.DRYXX/XXXX-XRUN", True if hasattr(codebase_or_release, "datacite"): del codebase_or_release.datacite - datacite_metadata, metadata_dict = self._validate_metadata( - codebase_or_release.datacite - ) + doi = "Unassigned" http_status = 200 message = "Minted new DOI successfully." + datacite_metadata = codebase_or_release.datacite + try: + datacite_metadata, metadata_dict = self._validate_metadata( + datacite_metadata + ) doi = self.datacite_client.public_doi( metadata_dict, url=codebase_or_release.permanent_url ) diff --git a/django/library/management/commands/fix_existing_dois_03.py b/django/library/management/commands/doi_mint_parent_codebase_dois_03.py similarity index 100% rename from django/library/management/commands/fix_existing_dois_03.py rename to django/library/management/commands/doi_mint_parent_codebase_dois_03.py diff --git a/django/library/management/commands/delete_all_existing_codebase_dois_01.py b/django/library/management/commands/doi_remove_codebase_dois_01.py similarity index 84% rename from django/library/management/commands/delete_all_existing_codebase_dois_01.py rename to django/library/management/commands/doi_remove_codebase_dois_01.py index 613bc02e1..1f732bc16 100644 --- a/django/library/management/commands/delete_all_existing_codebase_dois_01.py +++ b/django/library/management/commands/doi_remove_codebase_dois_01.py @@ -10,11 +10,9 @@ def remove_existing_codebase_dois(interactive=True, dry_run=True): print(get_welcome_message(dry_run)) - codebases_with_dois = Codebase.objects.exclude(doi__isnull=True) + codebases_with_dois = Codebase.objects.with_doi() - logger.info( - f"Removing DOIs for {len(codebases_with_dois)} Codebases. Query: Codebase.objects.exclude(doi__isnull=True) ..." - ) + logger.info("Removing all Codebase DOIs") if interactive and codebases_with_dois.exists(): confirm = input( "WARNING: this will remove all existing codebase DOIs and is unrecoverable. Type 'DELETE' to continue or Ctrl+C to quit: " @@ -30,10 +28,6 @@ def remove_existing_codebase_dois(interactive=True, dry_run=True): logger.info("Aborting.") sys.exit() - logger.info( - "All DOIs from {len(codebases_with_dois)} codebases deleted successfully." - ) - """ assert correctness """ diff --git a/django/library/management/commands/clean_peer_reviewed_dois_02.py b/django/library/management/commands/doi_remove_unreviewed_dois_02.py similarity index 77% rename from django/library/management/commands/clean_peer_reviewed_dois_02.py rename to django/library/management/commands/doi_remove_unreviewed_dois_02.py index 44ca82a29..cfbac1402 100644 --- a/django/library/management/commands/clean_peer_reviewed_dois_02.py +++ b/django/library/management/commands/doi_remove_unreviewed_dois_02.py @@ -1,4 +1,5 @@ import logging +import sys from django.core.management.base import BaseCommand from library.doi import VERIFICATION_MESSAGE, get_welcome_message from library.models import CodebaseRelease @@ -9,9 +10,7 @@ def remove_dois_from_unreviewed_releases(interactive=True, dry_run=True): print(get_welcome_message(dry_run)) - unreviewed_releases_with_dois = CodebaseRelease.objects.filter( - peer_reviewed=False, doi__isnull=False - ) + unreviewed_releases_with_dois = CodebaseRelease.objects.unreviewed().with_doi() total_unreviewed_releases_with_dois = unreviewed_releases_with_dois.count() logger.info( @@ -24,6 +23,9 @@ def remove_dois_from_unreviewed_releases(interactive=True, dry_run=True): ) if confirm.lower() == "delete": unreviewed_releases_with_dois.update(doi=None) + else: + logger.debug("Aborting...") + sys.exit() """ assert correctness @@ -31,16 +33,11 @@ def remove_dois_from_unreviewed_releases(interactive=True, dry_run=True): if not dry_run: print(VERIFICATION_MESSAGE) logger.info( - "Checking that DOIs for all not peer reviewed releases have been deleted..." - ) - assert ( - CodebaseRelease.objects.filter( - peer_reviewed=False, doi__isnull=False - ).count() - == 0 + "Checking that DOIs for all unreviewed releases have been deleted..." ) + assert not CodebaseRelease.objects.unreviewed().with_doi().exists() logger.info( - "All DOIs from not peer_reviewed CodebaseReleases %s with DOIs deleted successfully.", + "%s unreviewed CodebaseReleases with DOIs updated successfully.", total_unreviewed_releases_with_dois, ) diff --git a/django/library/management/commands/doi_reset_staging.py b/django/library/management/commands/doi_reset_staging.py new file mode 100644 index 000000000..f87ee2507 --- /dev/null +++ b/django/library/management/commands/doi_reset_staging.py @@ -0,0 +1,75 @@ +import csv +import logging +import sys +from django.conf import settings +from django.core.management.base import BaseCommand +from library.doi import VERIFICATION_MESSAGE, get_welcome_message, DataCiteApi +from library.models import Codebase, CodebaseRelease + +logger = logging.getLogger(__name__) + + +def reset_all_dois(interactive=True, dry_run=True): + print(get_welcome_message(dry_run)) + if settings.DEPLOY_ENVIRONMENT.is_production: + logger.error("This command is not allowed in production.") + sys.exit() + logger.info("(ENV: %s) Removing all DOIs", settings.DEPLOY_ENVIRONMENT) + releases_with_dois = CodebaseRelease.objects.with_doi() + codebases_with_dois = Codebase.objects.with_doi() + confirm = input( + "WARNING: this will remove ALL existing DOIs and is unrecoverable. Type 'DELETE' to continue or Ctrl+C to quit: " + ) + if confirm.lower() == "delete": + with open("codebase_dois.csv", "w") as f: + writer = csv.writer(f) + writer.writerow(["Codebase ID", "Codebase DOI"]) + for codebase in codebases_with_dois: + writer.writerow([codebase.pk, codebase.doi]) + Codebase.objects.update(doi=None) + with open("release_doi.csv", "w") as f: + writer = csv.writer(f) + writer.writerow(["CodebaseRelease ID", "CodebaseRelease DOI"]) + for release in releases_with_dois: + writer.writerow([release.pk, release.doi]) + CodebaseRelease.objects.update(doi=None) + else: + logger.info("Aborting.") + sys.exit() + + """ + assert correctness + """ + if not dry_run: + print(VERIFICATION_MESSAGE) + assert Codebase.objects.with_doi().count() == 0 + assert CodebaseRelease.objects.with_doi().count() == 0 + logger.info("Success. All existing codebase DOIs deleted.") + + """ Mint DOIs for all new Peer Reviewed Releases""" + peer_reviewed_releases = CodebaseRelease.objects.reviewed() + datacite_api = DataCiteApi(dry_run=dry_run) + for release in peer_reviewed_releases: + try: + datacite_api.mint_new_doi_for_release(release) + except Exception: + logger.error("Error minting DOI for release %s", release) + + +class Command(BaseCommand): + + def add_arguments(self, parser): + parser.add_argument( + "--interactive", + action="store_true", + help="Wait for user to press enter to continue.", + default=True, + ) + parser.add_argument( + "--dry-run", action="store_true", help="Output what would have happened." + ) + + def handle(self, *args, **options): + interactive = options["interactive"] + dry_run = options["dry_run"] + reset_all_dois(interactive, dry_run) diff --git a/django/library/management/commands/update_metadata_for_all_existing_dois_04.py b/django/library/management/commands/doi_update_doi_metadata_04.py similarity index 81% rename from django/library/management/commands/update_metadata_for_all_existing_dois_04.py rename to django/library/management/commands/doi_update_doi_metadata_04.py index 1a691ca24..d43ae0bfc 100644 --- a/django/library/management/commands/update_metadata_for_all_existing_dois_04.py +++ b/django/library/management/commands/doi_update_doi_metadata_04.py @@ -12,10 +12,11 @@ def update_doi_metadata(interactive=True, dry_run=True): datacite_api = DataCiteApi(dry_run=dry_run) all_codebases_with_dois = Codebase.objects.with_doi() + total_number_of_codebases_with_dois = all_codebases_with_dois.count() logger.info( "Updating metadata for all codebases (%s) with DOIs and their releases with DOIs. ...", - all_codebases_with_dois.count(), + total_number_of_codebases_with_dois, ) for i, codebase in enumerate(all_codebases_with_dois): @@ -23,7 +24,7 @@ def update_doi_metadata(interactive=True, dry_run=True): "Processing codebase %s - %s/%s", codebase.pk, i + 1, - all_codebases_with_dois.count(), + total_number_of_codebases_with_dois, ) if interactive: input("Press Enter to continue or CTRL+C to quit...") @@ -68,39 +69,38 @@ def update_doi_metadata(interactive=True, dry_run=True): """ if not dry_run: print(VERIFICATION_MESSAGE) - logger.info("Checking that Comses metadata is in sync with DataCite...") - invalid_codebases = [] + logger.info("Checking that local metadata is in sync with DataCite...") invalid_releases = [] results = datacite_api.threaded_metadata_check(all_codebases_with_dois) - for pk, is_meta_valid in results: - if not is_meta_valid: - invalid_codebases.append(pk) - + invalid_codebases = [ + pk for pk, is_valid_metadata in results if not is_valid_metadata + ] if invalid_codebases: logger.error( - "Failure. Metadata not in sync with DataCite for %s codebases: %s", + "FAILURE: Metadata not in sync with DataCite for %s codebases: %s", invalid_codebases.count(), invalid_codebases, ) else: logger.info( - "Success. Metadata in sync with DataCite for all codebases with DOI." + "SUCCESS: Metadata in sync with DataCite for all codebases with DOI." ) all_releases_with_dois = CodebaseRelease.objects.with_doi() results = datacite_api.threaded_metadata_check(all_releases_with_dois) - for pk, is_meta_valid in results: - if not is_meta_valid: - invalid_releases.append(pk) - + invalid_releases = [ + pk for pk, is_valid_metadata in results if not is_valid_metadata + ] if invalid_releases: logger.error( - f"Failure. Metadata not in sync with DataCite for {len(invalid_releases)} releases: {invalid_releases}" + "FAILURE: Metadata not in sync with DataCite for %s releases: %s", + invalid_releases.count(), + invalid_releases, ) else: logger.info( - f"Success. Metadata in sync with DataCite for all releases with DOI." + "SUCCESS: Metadata in sync with DataCite for all releases with DOI." ) diff --git a/django/library/migrations/0031_dataciteregistrationlog_and_more.py b/django/library/migrations/0031_dataciteregistrationlog_and_more.py new file mode 100644 index 000000000..ef45abe60 --- /dev/null +++ b/django/library/migrations/0031_dataciteregistrationlog_and_more.py @@ -0,0 +1,96 @@ +# Generated by Django 4.2.16 on 2024-10-29 21:28 + +from django.db import migrations, models +import django.db.models.deletion + + +class Migration(migrations.Migration): + + dependencies = [ + ("library", "0030_peerreviewinvitation"), + ] + + operations = [ + migrations.CreateModel( + name="DataCiteRegistrationLog", + fields=[ + ( + "id", + models.AutoField( + auto_created=True, + primary_key=True, + serialize=False, + verbose_name="ID", + ), + ), + ( + "action", + models.CharField( + choices=[ + ("CREATE_RELEASE_DOI", "create release DOI"), + ("CREATE_CODEBASE_DOI", "create codebase DOI"), + ("UPDATE_RELEASE_METADATA", "update release metadata"), + ("UPDATE_CODEBASE_METADATA", "update codebase metadata"), + ], + max_length=50, + ), + ), + ("timestamp", models.DateTimeField(auto_now_add=True)), + ("http_status", models.IntegerField(default=None, null=True)), + ("message", models.TextField(default=None, null=True)), + ("metadata_hash", models.CharField(max_length=255)), + ("doi", models.CharField(blank=True, max_length=255, null=True)), + ], + ), + migrations.AlterModelOptions( + name="peerreviewinvitation", + options={"ordering": ["-date_sent"]}, + ), + migrations.RemoveField( + model_name="contributor", + name="affiliations", + ), + migrations.AlterField( + model_name="codebase", + name="date_created", + field=models.DateTimeField(auto_now_add=True), + ), + migrations.AlterField( + model_name="codebaserelease", + name="date_created", + field=models.DateTimeField(auto_now_add=True), + ), + migrations.AlterField( + model_name="codebasereleasedownload", + name="date_created", + field=models.DateTimeField(auto_now_add=True), + ), + migrations.AlterField( + model_name="peerreviewinvitation", + name="date_sent", + field=models.DateTimeField(auto_now=True), + ), + migrations.DeleteModel( + name="ContributorAffiliation", + ), + migrations.AddField( + model_name="dataciteregistrationlog", + name="codebase", + field=models.ForeignKey( + null=True, + on_delete=django.db.models.deletion.CASCADE, + related_name="datacite_logs", + to="library.codebase", + ), + ), + migrations.AddField( + model_name="dataciteregistrationlog", + name="release", + field=models.ForeignKey( + null=True, + on_delete=django.db.models.deletion.CASCADE, + related_name="datacite_logs", + to="library.codebaserelease", + ), + ), + ] diff --git a/django/library/models.py b/django/library/models.py index da4d784b7..fce5faf8b 100644 --- a/django/library/models.py +++ b/django/library/models.py @@ -7,8 +7,8 @@ import uuid from abc import ABC -from collections import OrderedDict -from datetime import date, datetime, timedelta +from collections import OrderedDict, defaultdict +from datetime import date, timedelta from typing import List from django.conf import settings @@ -1132,6 +1132,9 @@ def accessible(self, user): def reviewed(self, **kwargs): return self.filter(peer_reviewed=True, **kwargs) + def unreviewed(self, **kwargs): + return self.exclude(peer_reviewed=True).filter(**kwargs) + def with_doi(self, **kwargs): return self.exclude(Q(doi__isnull=True) | Q(doi="")).filter(**kwargs) @@ -2594,15 +2597,16 @@ def __init__(self, release: CodebaseRelease): ] if release.live: - # should not generate CodeMeta or DataCite for non-published releases self.first_published = release.first_published_at.date() self.last_published = release.last_published_on.date() - self.copyright_year = self.last_published.year else: - # FIXME: default values? - self.first_published = self.last_published = self.copyright_year = ( - date.today() + # FIXME: default to today for unpublished releases + # should not generate CodeMeta or DataCite for non-published releases but CodeMeta is generated even for unpublished + logger.warning( + "Generating CommonMetadata for an unpublished release: %s", release ) + self.first_published = self.last_published = date.today() + self.copyright_year = self.last_published.year if release.license: self.license = release.license else: @@ -2837,7 +2841,7 @@ class DataCiteSchema(ABC): COMSES_PUBLISHER = { "publisherIdentifier": CommonMetadata.COMSES_ORGANIZATION["ror_id"], "publisherIdentifierScheme": "ROR", - "schemeURI": "https://ror.org", + "schemeUri": "https://ror.org", "name": CommonMetadata.COMSES_ORGANIZATION["name"], } @@ -2884,13 +2888,13 @@ def convert_release_contributor(cls, release_contributor: ReleaseContributor): nameType="Personal", givenName=contributor.given_name, familyName=contributor.family_name, - creatorName=f"{contributor.family_name}, {contributor.given_name}", + name=f"{contributor.family_name}, {contributor.given_name}", ) if contributor.orcid_url: creator.update( nameIdentifier=contributor.orcid_url, nameIdentifierScheme="ORCID", - schemeURI="https://orcid.org", + schemeUri="https://orcid.org", ) else: creator.update(nameType="Organizational", creatorName=contributor.name) @@ -2904,7 +2908,7 @@ def convert_release_contributor(cls, release_contributor: ReleaseContributor): creator.update( affiliationIdentifier=ror_ids[0], affiliationIdentifierScheme="ROR", - schemeURI="https://ror.org", + schemeUri="https://ror.org", ) else: # otherwise set to the first affiliation freetext name @@ -2932,44 +2936,44 @@ def to_contributors(cls, common_metadata: CommonMetadata): nonauthor_contributors = common_metadata.release_contributor_nonauthors contributors = [ - # FIXME: probably not the right way to bootstrap non author contributors - # perhaps this should be the provider institution, e.g., CML ROR + # bootstrap non author contributors { - "contributorName": common_metadata.code_repository, - "contributorType": "hostingInstitution", - } + "name": common_metadata.code_repository, + "contributorType": "HostingInstitution", + }, + { + "name": common_metadata.COMSES_ORGANIZATION["name"], + "contributorType": "Editor", + }, ] if nonauthor_contributors: - role_mapping = { - "copyrightHolder": "RightsHolder", - "editor": "Editor", - "funder": "Sponsor", - "pointOfContact": "ContactPerson", - "resourceProvider": "Distributor", - } - + role_mapping = defaultdict( + lambda: "Other", + { + "copyrightHolder": "RightsHolder", + "editor": "Editor", + "funder": "Sponsor", + "pointOfContact": "ContactPerson", + "resourceProvider": "Distributor", + }, + ) + has_other_role_already = False for release_contributor in nonauthor_contributors: - # FIXME: what is other_role_added for? - other_role_added = False for role in release_contributor.roles: contributor_type = role_mapping.get(role, "Other") - if contributor_type == "Other" and not other_role_added: - contributors.append( - { - "contributorName": release_contributor.contributor.name, - "contributorType": "Other", - } - ) - other_role_added = True - elif contributor_type != "Other": - contributors.append( - { - "contributorName": release_contributor.contributor.name, - "contributorType": contributor_type, - } - ) - + # only allow a single Other role per contributor + if contributor_type == "Other": + if has_other_role_already: + continue + else: + has_other_role_already = True + contributors.append( + { + "name": release_contributor.contributor.name, + "contributorType": contributor_type, + } + ) return contributors def to_dict(self): @@ -3012,17 +3016,16 @@ def convert(cls, common_metadata: CommonMetadata): common_metadata.release_contributor_authors ), "descriptions": common_metadata.descriptions, - "publicationYear": common_metadata.copyright_year, + "publicationYear": str(cls.to_publication_year(common_metadata)), "titles": [{"title": common_metadata.name}], "version": common_metadata.version, - "codeRepository": common_metadata.code_repository, "contributors": cls.to_contributors(common_metadata), "subjects": cls.convert_keywords(common_metadata), "rightsList": [ { "rights": common_metadata.license.name, "rightsIdentifier": common_metadata.license.name, - "rightsURI": common_metadata.license.url, + "rightsUri": common_metadata.license.url, } ], } @@ -3112,7 +3115,7 @@ def convert(cls, codebase: Codebase): "publicationYear": codebase.publication_year, } - """ + """ Set codebase relatedIdentifiers """ diff --git a/django/requirements.txt b/django/requirements.txt index b5a7e1356..d67dbd045 100644 --- a/django/requirements.txt +++ b/django/requirements.txt @@ -1,6 +1,6 @@ bagit==1.8.1 bleach==6.1.0 -datacite==1.1.4 +datacite==1.2.0 dedupe==3.0.2 django-allauth==0.63.6 django-anymail[mailgun]==10.3 @@ -24,7 +24,7 @@ django-vite==2.1.3 # latest is 3.0.4 django-waffle==4.1.0 djangorestframework==3.15.2 djangorestframework-camel-case==1.4.2 -Django==4.2.15 +Django==4.2.16 elasticsearch-dsl>=7.0.0,<8.0.0 elasticsearch>=7.0.0,<8.0.0 html2text>=2016.9.19