From b475f8cae1f31ab478a9a1cc96edbf8358412efa Mon Sep 17 00:00:00 2001 From: HoussemNasri Date: Thu, 5 Oct 2023 09:35:48 +0100 Subject: [PATCH 1/3] Infer DOI from ArXiv identifier --- .../org/jabref/logic/cleanup/DoiCleanup.java | 19 +++++++++++++++--- .../entry/identifier/ArXivIdentifier.java | 20 +++++++++++++++++++ 2 files changed, 36 insertions(+), 3 deletions(-) diff --git a/src/main/java/org/jabref/logic/cleanup/DoiCleanup.java b/src/main/java/org/jabref/logic/cleanup/DoiCleanup.java index 3657acabb5f..a208f47fcce 100644 --- a/src/main/java/org/jabref/logic/cleanup/DoiCleanup.java +++ b/src/main/java/org/jabref/logic/cleanup/DoiCleanup.java @@ -13,17 +13,19 @@ import org.jabref.model.entry.field.Field; import org.jabref.model.entry.field.StandardField; import org.jabref.model.entry.field.UnknownField; +import org.jabref.model.entry.identifier.ArXivIdentifier; import org.jabref.model.entry.identifier.DOI; /** - * Formats the DOI (e.g. removes http part) and also moves DOIs from note, url or ee field to the doi field. + * Formats the DOI (e.g. removes http part) and also infers DOIs from the note, url, eprint or ee fields. */ public class DoiCleanup implements CleanupJob { /** * Fields to check for DOIs. */ - private static final List FIELDS = Arrays.asList(StandardField.NOTE, StandardField.URL, new UnknownField("ee")); + private static final List FIELDS = Arrays.asList(StandardField.NOTE, StandardField.URL, StandardField.EPRINT, + new UnknownField("ee")); @Override public List cleanup(BibEntry entry) { @@ -57,7 +59,9 @@ public List cleanup(BibEntry entry) { } else { // As the Doi field is empty we now check if note, url, or ee field contains a Doi for (Field field : FIELDS) { - Optional doi = entry.getField(field).flatMap(DOI::parse); + Optional fieldContentOpt = entry.getField(field); + + Optional doi = fieldContentOpt.flatMap(DOI::parse); if (doi.isPresent()) { // Update Doi @@ -65,6 +69,15 @@ public List cleanup(BibEntry entry) { change.ifPresent(changes::add); removeFieldValue(entry, field, changes); } + + if (StandardField.EPRINT == field && fieldContentOpt.isPresent()) { + fieldContentOpt.flatMap(ArXivIdentifier::parse) + .flatMap(ArXivIdentifier::inferDOI) + .ifPresent(inferredDoi -> { + Optional change = entry.setField(StandardField.DOI, inferredDoi.getDOI()); + change.ifPresent(changes::add); + }); + } } } return changes; diff --git a/src/main/java/org/jabref/model/entry/identifier/ArXivIdentifier.java b/src/main/java/org/jabref/model/entry/identifier/ArXivIdentifier.java index 4db64bad268..01a9e74270f 100644 --- a/src/main/java/org/jabref/model/entry/identifier/ArXivIdentifier.java +++ b/src/main/java/org/jabref/model/entry/identifier/ArXivIdentifier.java @@ -9,10 +9,14 @@ import org.jabref.model.strings.StringUtil; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + /** * Identifier for the arXiv. See https://arxiv.org/help/arxiv_identifier */ public class ArXivIdentifier extends EprintIdentifier { + private static final Logger LOGGER = LoggerFactory.getLogger(ArXivIdentifier.class); private static final String ARXIV_PREFIX = "http(s)?://arxiv.org/(abs|pdf)/|arxiv|arXiv"; private final String identifier; @@ -71,6 +75,22 @@ public Optional getClassification() { } } + /** + * ArXiV articles are assigned DOIs automatically, which starts with a DOI prefix '10.48550/' followed by the ArXiV + * ID (replacing the colon with a period). + *

+ * For more information: + * + * new-arxiv-articles-are-now-automatically-assigned-dois + * */ + public Optional inferDOI() { + if (StringUtil.isBlank(identifier)) { + return Optional.empty(); + } + + return DOI.parse("10.48550/arxiv." + identifier); + } + @Override public String toString() { return "ArXivIdentifier{" + From 9f75ade304498a3828575c22942a195b6abc78b7 Mon Sep 17 00:00:00 2001 From: HoussemNasri Date: Thu, 5 Oct 2023 09:44:00 +0100 Subject: [PATCH 2/3] Update Changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 68032150277..470fe431158 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -24,6 +24,7 @@ Note that this project **does not** adhere to [Semantic Versioning](https://semv - The export formats `listrefs`, `tablerefs`, `tablerefsabsbib`, now use the ISO date format in the footer [#10383](https://github.com/JabRef/jabref/pull/10383). - When searching for an identifier in the "Web search", the title of the search window is now "Identifier-based Web Search". [#10391](https://github.com/JabRef/jabref/pull/10391) +- We modified the DOI cleanup to infer the DOI from an ArXiV ID if it's present. [10426](https://github.com/JabRef/jabref/issues/10426) ### Fixed From f5bc1b2241540eef2adcda4636a252253e343d97 Mon Sep 17 00:00:00 2001 From: HoussemNasri Date: Thu, 5 Oct 2023 10:33:50 +0100 Subject: [PATCH 3/3] Remove extra isPresent() check --- src/main/java/org/jabref/logic/cleanup/DoiCleanup.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/org/jabref/logic/cleanup/DoiCleanup.java b/src/main/java/org/jabref/logic/cleanup/DoiCleanup.java index a208f47fcce..1cb3f2d04e3 100644 --- a/src/main/java/org/jabref/logic/cleanup/DoiCleanup.java +++ b/src/main/java/org/jabref/logic/cleanup/DoiCleanup.java @@ -70,7 +70,7 @@ public List cleanup(BibEntry entry) { removeFieldValue(entry, field, changes); } - if (StandardField.EPRINT == field && fieldContentOpt.isPresent()) { + if (StandardField.EPRINT == field) { fieldContentOpt.flatMap(ArXivIdentifier::parse) .flatMap(ArXivIdentifier::inferDOI) .ifPresent(inferredDoi -> {