Skip to content

Commit

Permalink
refactor: prefix doi management commands
Browse files Browse the repository at this point in the history
- prefix all one-off destructive DOI commands with `doi_`
- add reset_staging to mint new DOIs on staging using the datacite
  sandbox, doi_reset_staging -> step 3, doi_mint_parent_codebase_dois
- bump deps for datacite schema 4.5 and django cve
  • Loading branch information
alee committed Nov 13, 2024
1 parent 9c2678f commit ec356fb
Show file tree
Hide file tree
Showing 9 changed files with 290 additions and 95 deletions.
19 changes: 13 additions & 6 deletions django/library/doi.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
DataCiteRegistrationLog,
)

from datacite import DataCiteRESTClient, schema43
from datacite import DataCiteRESTClient, schema45
from datacite.errors import (
DataCiteError,
DataCiteNoContentError,
Expand Down Expand Up @@ -183,8 +183,12 @@ def _datacite_heartbeat_url(self):

def _validate_metadata(self, datacite_metadata: DataCiteSchema):
metadata_dict = datacite_metadata.to_dict()
if not schema43.validate(metadata_dict):
logger.error("Invalid DataCite metadata: %s", metadata_dict)
try:
schema45.validator.validate(metadata_dict)
except Exception as e:
logger.error(
"Invalid DataCite metadata: %s", schema45.tostring(metadata_dict), e
)
raise DataCiteError(f"Invalid DataCite metadata: {metadata_dict}")
return datacite_metadata, metadata_dict

Expand All @@ -202,14 +206,17 @@ def mint_public_doi(self, codebase_or_release: Codebase | CodebaseRelease):
return "XX.DRYXX/XXXX-XRUN", True
if hasattr(codebase_or_release, "datacite"):
del codebase_or_release.datacite
datacite_metadata, metadata_dict = self._validate_metadata(
codebase_or_release.datacite
)

doi = "Unassigned"
http_status = 200
message = "Minted new DOI successfully."

datacite_metadata = codebase_or_release.datacite

try:
datacite_metadata, metadata_dict = self._validate_metadata(
datacite_metadata
)
doi = self.datacite_client.public_doi(
metadata_dict, url=codebase_or_release.permanent_url
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,9 @@

def remove_existing_codebase_dois(interactive=True, dry_run=True):
print(get_welcome_message(dry_run))
codebases_with_dois = Codebase.objects.exclude(doi__isnull=True)
codebases_with_dois = Codebase.objects.with_doi()

logger.info(
f"Removing DOIs for {len(codebases_with_dois)} Codebases. Query: Codebase.objects.exclude(doi__isnull=True) ..."
)
logger.info("Removing all Codebase DOIs")
if interactive and codebases_with_dois.exists():
confirm = input(
"WARNING: this will remove all existing codebase DOIs and is unrecoverable. Type 'DELETE' to continue or Ctrl+C to quit: "
Expand All @@ -30,10 +28,6 @@ def remove_existing_codebase_dois(interactive=True, dry_run=True):
logger.info("Aborting.")
sys.exit()

logger.info(
"All DOIs from {len(codebases_with_dois)} codebases deleted successfully."
)

"""
assert correctness
"""
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import logging
import sys
from django.core.management.base import BaseCommand
from library.doi import VERIFICATION_MESSAGE, get_welcome_message
from library.models import CodebaseRelease
Expand All @@ -9,9 +10,7 @@
def remove_dois_from_unreviewed_releases(interactive=True, dry_run=True):
print(get_welcome_message(dry_run))

unreviewed_releases_with_dois = CodebaseRelease.objects.filter(
peer_reviewed=False, doi__isnull=False
)
unreviewed_releases_with_dois = CodebaseRelease.objects.unreviewed().with_doi()
total_unreviewed_releases_with_dois = unreviewed_releases_with_dois.count()

logger.info(
Expand All @@ -24,23 +23,21 @@ def remove_dois_from_unreviewed_releases(interactive=True, dry_run=True):
)
if confirm.lower() == "delete":
unreviewed_releases_with_dois.update(doi=None)
else:
logger.debug("Aborting...")
sys.exit()

"""
assert correctness
"""
if not dry_run:
print(VERIFICATION_MESSAGE)
logger.info(
"Checking that DOIs for all not peer reviewed releases have been deleted..."
)
assert (
CodebaseRelease.objects.filter(
peer_reviewed=False, doi__isnull=False
).count()
== 0
"Checking that DOIs for all unreviewed releases have been deleted..."
)
assert not CodebaseRelease.objects.unreviewed().with_doi().exists()
logger.info(
"All DOIs from not peer_reviewed CodebaseReleases %s with DOIs deleted successfully.",
"%s unreviewed CodebaseReleases with DOIs updated successfully.",
total_unreviewed_releases_with_dois,
)

Expand Down
83 changes: 83 additions & 0 deletions django/library/management/commands/doi_reset_staging.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
import csv
import logging
import sys
from django.conf import settings
from django.core.management.base import BaseCommand
from library.doi import VERIFICATION_MESSAGE, get_welcome_message, DataCiteApi
from library.models import Codebase, CodebaseRelease

logger = logging.getLogger(__name__)


def reset_all_dois(interactive=True, dry_run=True):
print(get_welcome_message(dry_run))
if settings.DEPLOY_ENVIRONMENT.is_production:
logger.error("This command is not allowed in production.")
sys.exit()
logger.info("(ENV: %s) Removing all DOIs", settings.DEPLOY_ENVIRONMENT)
releases_with_dois = CodebaseRelease.objects.with_doi()
codebases_with_dois = Codebase.objects.with_doi()
confirm = input(
"WARNING: this will remove ALL existing DOIs and is unrecoverable. Type 'DELETE' to continue or Ctrl+C to quit: "
)
if confirm.lower() == "delete":
with open("codebase_dois.csv", "w") as f:
writer = csv.writer(f)
writer.writerow(["Codebase ID", "Codebase DOI"])
for codebase in codebases_with_dois:
writer.writerow([codebase.pk, codebase.doi])
Codebase.objects.update(doi=None)
with open("release_doi.csv", "w") as f:
writer = csv.writer(f)
writer.writerow(["CodebaseRelease ID", "CodebaseRelease DOI"])
for release in releases_with_dois:
writer.writerow([release.pk, release.doi])
CodebaseRelease.objects.update(doi=None)
else:
logger.info("Aborting.")
sys.exit()

"""
assert correctness
"""
if not dry_run:
print(VERIFICATION_MESSAGE)
assert Codebase.objects.with_doi().count() == 0
assert CodebaseRelease.objects.with_doi().count() == 0
logger.info("Success. All existing codebase DOIs deleted.")

""" Mint DOIs for all new Peer Reviewed Releases"""
peer_reviewed_releases = CodebaseRelease.objects.reviewed()
datacite_api = DataCiteApi(dry_run=dry_run)
invalid_releases = []
for release in peer_reviewed_releases:
try:
datacite_api.mint_new_doi_for_release(release)
except Exception as e:
logger.error("Error minting DOI for release %s", release)
invalid_releases.append((release, e))

for release, error in invalid_releases:
with open("invalid_releases.csv", "w") as f:
writer = csv.writer(f)
writer.writerow(["CodebaseRelease ID", "Reason", "Datacite Metadata"])
writer.writerow([release.pk, error, release.datacite.to_dict()])


class Command(BaseCommand):

def add_arguments(self, parser):
parser.add_argument(
"--interactive",
action="store_true",
help="Wait for user to press enter to continue.",
default=True,
)
parser.add_argument(
"--dry-run", action="store_true", help="Output what would have happened."
)

def handle(self, *args, **options):
interactive = options["interactive"]
dry_run = options["dry_run"]
reset_all_dois(interactive, dry_run)
Original file line number Diff line number Diff line change
Expand Up @@ -12,18 +12,19 @@ def update_doi_metadata(interactive=True, dry_run=True):

datacite_api = DataCiteApi(dry_run=dry_run)
all_codebases_with_dois = Codebase.objects.with_doi()
total_number_of_codebases_with_dois = all_codebases_with_dois.count()

logger.info(
"Updating metadata for all codebases (%s) with DOIs and their releases with DOIs. ...",
all_codebases_with_dois.count(),
total_number_of_codebases_with_dois,
)

for i, codebase in enumerate(all_codebases_with_dois):
logger.debug(
"Processing codebase %s - %s/%s",
codebase.pk,
i + 1,
all_codebases_with_dois.count(),
total_number_of_codebases_with_dois,
)
if interactive:
input("Press Enter to continue or CTRL+C to quit...")
Expand Down Expand Up @@ -68,39 +69,38 @@ def update_doi_metadata(interactive=True, dry_run=True):
"""
if not dry_run:
print(VERIFICATION_MESSAGE)
logger.info("Checking that Comses metadata is in sync with DataCite...")
invalid_codebases = []
logger.info("Checking that local metadata is in sync with DataCite...")
invalid_releases = []

results = datacite_api.threaded_metadata_check(all_codebases_with_dois)
for pk, is_meta_valid in results:
if not is_meta_valid:
invalid_codebases.append(pk)

invalid_codebases = [
pk for pk, is_valid_metadata in results if not is_valid_metadata
]
if invalid_codebases:
logger.error(
"Failure. Metadata not in sync with DataCite for %s codebases: %s",
"FAILURE: Metadata not in sync with DataCite for %s codebases: %s",
invalid_codebases.count(),
invalid_codebases,
)
else:
logger.info(
"Success. Metadata in sync with DataCite for all codebases with DOI."
"SUCCESS: Metadata in sync with DataCite for all codebases with DOI."
)

all_releases_with_dois = CodebaseRelease.objects.with_doi()
results = datacite_api.threaded_metadata_check(all_releases_with_dois)
for pk, is_meta_valid in results:
if not is_meta_valid:
invalid_releases.append(pk)

invalid_releases = [
pk for pk, is_valid_metadata in results if not is_valid_metadata
]
if invalid_releases:
logger.error(
f"Failure. Metadata not in sync with DataCite for {len(invalid_releases)} releases: {invalid_releases}"
"FAILURE: Metadata not in sync with DataCite for %s releases: %s",
invalid_releases.count(),
invalid_releases,
)
else:
logger.info(
f"Success. Metadata in sync with DataCite for all releases with DOI."
"SUCCESS: Metadata in sync with DataCite for all releases with DOI."
)


Expand Down
96 changes: 96 additions & 0 deletions django/library/migrations/0031_dataciteregistrationlog_and_more.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
# Generated by Django 4.2.16 on 2024-10-29 21:28

from django.db import migrations, models
import django.db.models.deletion


class Migration(migrations.Migration):

dependencies = [
("library", "0030_peerreviewinvitation"),
]

operations = [
migrations.CreateModel(
name="DataCiteRegistrationLog",
fields=[
(
"id",
models.AutoField(
auto_created=True,
primary_key=True,
serialize=False,
verbose_name="ID",
),
),
(
"action",
models.CharField(
choices=[
("CREATE_RELEASE_DOI", "create release DOI"),
("CREATE_CODEBASE_DOI", "create codebase DOI"),
("UPDATE_RELEASE_METADATA", "update release metadata"),
("UPDATE_CODEBASE_METADATA", "update codebase metadata"),
],
max_length=50,
),
),
("timestamp", models.DateTimeField(auto_now_add=True)),
("http_status", models.IntegerField(default=None, null=True)),
("message", models.TextField(default=None, null=True)),
("metadata_hash", models.CharField(max_length=255)),
("doi", models.CharField(blank=True, max_length=255, null=True)),
],
),
migrations.AlterModelOptions(
name="peerreviewinvitation",
options={"ordering": ["-date_sent"]},
),
migrations.RemoveField(
model_name="contributor",
name="affiliations",
),
migrations.AlterField(
model_name="codebase",
name="date_created",
field=models.DateTimeField(auto_now_add=True),
),
migrations.AlterField(
model_name="codebaserelease",
name="date_created",
field=models.DateTimeField(auto_now_add=True),
),
migrations.AlterField(
model_name="codebasereleasedownload",
name="date_created",
field=models.DateTimeField(auto_now_add=True),
),
migrations.AlterField(
model_name="peerreviewinvitation",
name="date_sent",
field=models.DateTimeField(auto_now=True),
),
migrations.DeleteModel(
name="ContributorAffiliation",
),
migrations.AddField(
model_name="dataciteregistrationlog",
name="codebase",
field=models.ForeignKey(
null=True,
on_delete=django.db.models.deletion.CASCADE,
related_name="datacite_logs",
to="library.codebase",
),
),
migrations.AddField(
model_name="dataciteregistrationlog",
name="release",
field=models.ForeignKey(
null=True,
on_delete=django.db.models.deletion.CASCADE,
related_name="datacite_logs",
to="library.codebaserelease",
),
),
]
Loading

0 comments on commit ec356fb

Please sign in to comment.