diff --git a/CHANGELOG.md b/CHANGELOG.md index 7c9853c1f24..725fe30477c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,6 +13,8 @@ We refer to [GitHub issues](https://github.com/JabRef/jabref/issues) by using `# ### Changed +- We added a short DOI field formatter which shortens DOI to more human readable form. [koppor#343](https://github.com/koppor/jabref/issues/343) + ### Fixed ### Removed diff --git a/src/main/java/org/jabref/logic/formatter/Formatters.java b/src/main/java/org/jabref/logic/formatter/Formatters.java index 9e867ac23ed..7aea0a024a6 100644 --- a/src/main/java/org/jabref/logic/formatter/Formatters.java +++ b/src/main/java/org/jabref/logic/formatter/Formatters.java @@ -19,6 +19,7 @@ import org.jabref.logic.formatter.bibtexfields.OrdinalsToSuperscriptFormatter; import org.jabref.logic.formatter.bibtexfields.RegexFormatter; import org.jabref.logic.formatter.bibtexfields.RemoveBracesFormatter; +import org.jabref.logic.formatter.bibtexfields.ShortenDOIFormatter; import org.jabref.logic.formatter.bibtexfields.UnicodeToLatexFormatter; import org.jabref.logic.formatter.bibtexfields.UnitsToLatexFormatter; import org.jabref.logic.formatter.casechanger.CapitalizeFormatter; @@ -67,7 +68,8 @@ public static List getOthers() { new OrdinalsToSuperscriptFormatter(), new RemoveBracesFormatter(), new UnitsToLatexFormatter(), - new EscapeUnderscoresFormatter() + new EscapeUnderscoresFormatter(), + new ShortenDOIFormatter() ); } @@ -102,5 +104,4 @@ public static Optional getFormatterForModifier(String modifier) { return getAll().stream().filter(f -> f.getKey().equals(modifier)).findAny(); } } - } diff --git a/src/main/java/org/jabref/logic/formatter/bibtexfields/ShortenDOIFormatter.java b/src/main/java/org/jabref/logic/formatter/bibtexfields/ShortenDOIFormatter.java new file mode 100644 index 00000000000..75b9ba9a2ab --- /dev/null +++ b/src/main/java/org/jabref/logic/formatter/bibtexfields/ShortenDOIFormatter.java @@ -0,0 +1,59 @@ +package org.jabref.logic.formatter.bibtexfields; + +import java.util.Objects; +import java.util.Optional; + +import org.jabref.logic.importer.util.ShortDOIService; +import org.jabref.logic.importer.util.ShortDOIServiceException; +import org.jabref.logic.l10n.Localization; +import org.jabref.model.cleanup.Formatter; +import org.jabref.model.entry.identifier.DOI; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class ShortenDOIFormatter extends Formatter { + + private static final Logger LOGGER = LoggerFactory.getLogger(ShortenDOIFormatter.class); + + @Override + public String getName() { + return Localization.lang("Shorten DOI"); + } + + @Override + public String getKey() { + return "short_doi"; + } + + @Override + public String format(String value) { + Objects.requireNonNull(value); + + ShortDOIService shortDOIService = new ShortDOIService(); + + Optional doi = Optional.empty(); + + try { + doi = DOI.parse(value); + + if (doi.isPresent()) { + return shortDOIService.getShortDOI(doi.get()).getDOI(); + } + } catch (ShortDOIServiceException e) { + LOGGER.error(e.getMessage(), e); + } + + return value; + } + + @Override + public String getDescription() { + return Localization.lang("Shortens DOI to more human readable form."); + } + + @Override + public String getExampleInput() { + return "10.1006/jmbi.1998.2354"; + } +} diff --git a/src/main/java/org/jabref/logic/importer/util/ShortDOIService.java b/src/main/java/org/jabref/logic/importer/util/ShortDOIService.java new file mode 100644 index 00000000000..bbb3dc7b4c6 --- /dev/null +++ b/src/main/java/org/jabref/logic/importer/util/ShortDOIService.java @@ -0,0 +1,63 @@ +package org.jabref.logic.importer.util; + +import java.io.IOException; +import java.net.MalformedURLException; +import java.net.URI; +import java.net.URISyntaxException; +import java.net.URL; + +import org.jabref.logic.importer.ParseException; +import org.jabref.logic.net.URLDownload; +import org.jabref.model.entry.identifier.DOI; + +import org.apache.http.client.utils.URIBuilder; +import org.json.JSONException; +import org.json.JSONObject; + +/** + * Class for obtaining shortened DOI names. + * + * @see http://shortdoi.org + */ +public class ShortDOIService { + + private static final String BASIC_URL = "http://shortdoi.org/"; + + /** + * Obtains shortened DOI name for given DOI + * + * @param doi DOI + * @return A shortened DOI name + */ + public DOI getShortDOI(DOI doi) throws ShortDOIServiceException { + JSONObject responseJSON = makeRequest(doi); + String shortDoi = responseJSON.getString("ShortDOI"); + + return new DOI(shortDoi); + } + + private JSONObject makeRequest(DOI doi) throws ShortDOIServiceException { + + URIBuilder uriBuilder = null; + URL url = null; + + try { + uriBuilder = new URIBuilder(BASIC_URL); + uriBuilder.setPath(uriBuilder.getPath() + doi.getDOI()); + uriBuilder.addParameter("format", "json"); + + URI uri = uriBuilder.build(); + url = uri.toURL(); + } catch (URISyntaxException | MalformedURLException e) { + throw new ShortDOIServiceException("Cannot get short DOI", e); + } + + URLDownload urlDownload = new URLDownload(url); + + try { + return JsonReader.toJsonObject(urlDownload.asInputStream()); + } catch (ParseException | IOException | JSONException e) { + throw new ShortDOIServiceException("Cannot get short DOI", e); + } + } +} diff --git a/src/main/java/org/jabref/logic/importer/util/ShortDOIServiceException.java b/src/main/java/org/jabref/logic/importer/util/ShortDOIServiceException.java new file mode 100644 index 00000000000..cd7943eba03 --- /dev/null +++ b/src/main/java/org/jabref/logic/importer/util/ShortDOIServiceException.java @@ -0,0 +1,25 @@ +package org.jabref.logic.importer.util; + +import org.jabref.JabRefException; + +public class ShortDOIServiceException extends JabRefException { + public ShortDOIServiceException(String message) { + super(message); + } + + public ShortDOIServiceException(String message, Throwable cause) { + super(message, cause); + } + + public ShortDOIServiceException(String message, String localizedMessage) { + super(message, localizedMessage); + } + + public ShortDOIServiceException(String message, String localizedMessage, Throwable cause) { + super(message, localizedMessage, cause); + } + + public ShortDOIServiceException(Throwable cause) { + super(cause); + } +} diff --git a/src/main/java/org/jabref/model/entry/identifier/DOI.java b/src/main/java/org/jabref/model/entry/identifier/DOI.java index 310f81e543b..8c02f9a4696 100644 --- a/src/main/java/org/jabref/model/entry/identifier/DOI.java +++ b/src/main/java/org/jabref/model/entry/identifier/DOI.java @@ -14,14 +14,15 @@ import org.slf4j.LoggerFactory; /** - * Class for working with Digital object identifiers (DOIs) + * Class for working with Digital object identifiers (DOIs) and Short DOIs * * @see https://en.wikipedia.org/wiki/Digital_object_identifier + * @see http://shortdoi.org */ public class DOI implements Identifier { private static final Logger LOGGER = LoggerFactory.getLogger(DOI.class); - // DOI resolver + // DOI/Short DOI resolver private static final URI RESOLVER = URI.create("https://doi.org"); // Regex // (see http://www.doi.org/doi_handbook/2_Numbering.html) @@ -43,20 +44,46 @@ public class DOI implements Identifier { + "[/:]" // divider + "(?:[^\\s]+)" // suffix alphanumeric without space + ")"; // end group \1 + + // Regex (Short DOI) + private static final String SHORT_DOI_EXP = "" + + "(?:urn:)?" // optional urn + + "(?:doi:)?" // optional doi + + "(" // begin group \1 + + "10" // directory indicator + + "[/:%]" // divider + + "[a-zA-Z0-9]+" + + ")"; // end group \1 + private static final String FIND_SHORT_DOI_EXP = "" + + "(?:urn:)?" // optional urn + + "(?:doi:)?" // optional doi + + "(" // begin group \1 + + "10" // directory indicator + + "[/:]" // divider + + "[a-zA-Z0-9]+" + + "(?:[^\\s]+)" // suffix alphanumeric without space + + ")"; // end group \1 + private static final String HTTP_EXP = "https?://[^\\s]+?" + DOI_EXP; + private static final String SHORT_DOI_HTTP_EXP = "https?://[^\\s]+?" + SHORT_DOI_EXP; // Pattern private static final Pattern EXACT_DOI_PATT = Pattern.compile("^(?:https?://[^\\s]+?)?" + DOI_EXP + "$", Pattern.CASE_INSENSITIVE); private static final Pattern DOI_PATT = Pattern.compile("(?:https?://[^\\s]+?)?" + FIND_DOI_EXP, Pattern.CASE_INSENSITIVE); + // Pattern (short DOI) + private static final Pattern EXACT_SHORT_DOI_PATT = Pattern.compile("^(?:https?://[^\\s]+?)?" + SHORT_DOI_EXP, Pattern.CASE_INSENSITIVE); + private static final Pattern SHORT_DOI_PATT = Pattern.compile("(?:https?://[^\\s]+?)?" + FIND_SHORT_DOI_EXP, Pattern.CASE_INSENSITIVE); // DOI private final String doi; + // Short DOI + private boolean isShortDoi; /** - * Creates a DOI from various schemes including URL, URN, and plain DOIs. + * Creates a DOI from various schemes including URL, URN, and plain DOIs/Short DOIs. * - * @param doi the DOI string - * @throws NullPointerException if DOI is null - * @throws IllegalArgumentException if doi does not include a valid DOI + * @param doi the DOI/Short DOI string * @return an instance of the DOI class + * @throws NullPointerException if DOI/Short DOI is null + * @throws IllegalArgumentException if doi does not include a valid DOI/Short DOI */ public DOI(String doi) { Objects.requireNonNull(doi); @@ -65,33 +92,40 @@ public DOI(String doi) { String trimmedDoi = doi.trim(); // HTTP URL decoding - if (doi.matches(HTTP_EXP)) { + if (doi.matches(HTTP_EXP) || doi.matches(SHORT_DOI_HTTP_EXP)) { try { // decodes path segment URI url = new URI(trimmedDoi); trimmedDoi = url.getScheme() + "://" + url.getHost() + url.getPath(); } catch (URISyntaxException e) { - throw new IllegalArgumentException(doi + " is not a valid HTTP DOI."); + throw new IllegalArgumentException(doi + " is not a valid HTTP DOI/Short DOI."); } } - // Extract DOI + // Extract DOI/Short DOI Matcher matcher = EXACT_DOI_PATT.matcher(trimmedDoi); if (matcher.find()) { // match only group \1 this.doi = matcher.group(1); } else { - throw new IllegalArgumentException(trimmedDoi + " is not a valid DOI."); + // Short DOI + Matcher shortDoiMatcher = EXACT_SHORT_DOI_PATT.matcher(trimmedDoi); + if (shortDoiMatcher.find()) { + this.doi = shortDoiMatcher.group(1); + isShortDoi = true; + } else { + throw new IllegalArgumentException(trimmedDoi + " is not a valid DOI/Short DOI."); + } } } /** * Creates an Optional from various schemes including URL, URN, and plain DOIs. * - * Useful for suppressing the IllegalArgumentException of the Constructor - * and checking for Optional.isPresent() instead. + * Useful for suppressing the IllegalArgumentException of the Constructor and checking for + * Optional.isPresent() instead. * - * @param doi the DOI string + * @param doi the DOI/Short DOI string * @return an Optional containing the DOI or an empty Optional */ public static Optional parse(String doi) { @@ -105,9 +139,9 @@ public static Optional parse(String doi) { } /** - * Determines whether a DOI is valid or not + * Determines whether a DOI/Short DOI is valid or not * - * @param doi the DOI string + * @param doi the DOI/Short DOI string * @return true if DOI is valid, false otherwise */ public static boolean isValid(String doi) { @@ -115,9 +149,9 @@ public static boolean isValid(String doi) { } /** - * Tries to find a DOI inside the given text. + * Tries to find a DOI/Short DOI inside the given text. * - * @param text the Text which might contain a DOI + * @param text the Text which might contain a DOI/Short DOI * @return an Optional containing the DOI or an empty Optional */ public static Optional findInText(String text) { @@ -128,6 +162,12 @@ public static Optional findInText(String text) { // match only group \1 result = Optional.of(new DOI(matcher.group(1))); } + + matcher = SHORT_DOI_PATT.matcher(text); + if (matcher.find()) { + result = Optional.of(new DOI(matcher.group(1))); + } + return result; } @@ -139,18 +179,27 @@ public String toString() { } /** - * Return the plain DOI + * Return the plain DOI/Short DOI * - * @return the plain DOI value. + * @return the plain DOI/Short DOI value. */ public String getDOI() { return doi; } /** - * Return a URI presentation for the DOI + * Determines whether DOI is short DOI or not + * + * @return true if DOI is short DOI, false otherwise + */ + public boolean isShortDoi() { + return isShortDoi; + } + + /** + * Return a URI presentation for the DOI/Short DOI * - * @return an encoded URI representation of the DOI + * @return an encoded URI representation of the DOI/Short DOI */ @Override public Optional getExternalURI() { @@ -165,9 +214,9 @@ public Optional getExternalURI() { } /** - * Return an ASCII URL presentation for the DOI + * Return an ASCII URL presentation for the DOI/Short DOI * - * @return an encoded URL representation of the DOI + * @return an encoded URL representation of the DOI/Short DOI */ public String getURIAsASCIIString() { return getExternalURI().map(URI::toASCIIString).orElse(""); diff --git a/src/main/resources/l10n/JabRef_en.properties b/src/main/resources/l10n/JabRef_en.properties index d33984a30b1..943756d9e44 100644 --- a/src/main/resources/l10n/JabRef_en.properties +++ b/src/main/resources/l10n/JabRef_en.properties @@ -1557,6 +1557,8 @@ Add\ enclosing\ braces=Add enclosing braces Add\ braces\ encapsulating\ the\ complete\ field\ content.=Add braces encapsulating the complete field content. Remove\ enclosing\ braces=Remove enclosing braces Removes\ braces\ encapsulating\ the\ complete\ field\ content.=Removes braces encapsulating the complete field content. +Shorten\ DOI=Shorten DOI +Shortens\ DOI\ to\ more\ human\ readable\ form.=Shortens DOI to more human readable form. Sentence\ case=Sentence case Shortens\ lists\ of\ persons\ if\ there\ are\ more\ than\ 2\ persons\ to\ "et\ al.".=Shortens lists of persons if there are more than 2 persons to "et al.". Title\ case=Title case diff --git a/src/test/java/org/jabref/logic/formatter/bibtexfields/ShortenDOIFormatterTest.java b/src/test/java/org/jabref/logic/formatter/bibtexfields/ShortenDOIFormatterTest.java new file mode 100644 index 00000000000..1191014af41 --- /dev/null +++ b/src/test/java/org/jabref/logic/formatter/bibtexfields/ShortenDOIFormatterTest.java @@ -0,0 +1,24 @@ +package org.jabref.logic.formatter.bibtexfields; + +import org.jabref.testutils.category.FetcherTest; + +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +import static org.junit.jupiter.api.Assertions.assertEquals; + +@FetcherTest +class ShortenDOIFormatterTest { + + private ShortenDOIFormatter formatter; + + @BeforeEach + public void setUp() { + formatter = new ShortenDOIFormatter(); + } + + @Test + public void formatDoi() { + assertEquals("10/adc", formatter.format("10.1006/jmbi.1998.2354")); + } +} diff --git a/src/test/java/org/jabref/logic/importer/util/ShortDOIServiceTest.java b/src/test/java/org/jabref/logic/importer/util/ShortDOIServiceTest.java new file mode 100644 index 00000000000..26ea4706030 --- /dev/null +++ b/src/test/java/org/jabref/logic/importer/util/ShortDOIServiceTest.java @@ -0,0 +1,36 @@ +package org.jabref.logic.importer.util; + +import org.jabref.model.entry.identifier.DOI; +import org.jabref.testutils.category.FetcherTest; + +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertThrows; + +@FetcherTest +class ShortDOIServiceTest { + + private final DOI doi = new DOI("10.1109/ACCESS.2013.2260813"); + private final DOI notExistingDoi = new DOI("10.1109/ACCESS.2013.226081400"); + + private ShortDOIService sut; + + @BeforeEach + void setUp() { + sut = new ShortDOIService(); + } + + @Test + void getShortDOI() throws ShortDOIServiceException { + DOI shortDoi = sut.getShortDOI(doi); + + assertEquals("10/gf4gqc", shortDoi.getDOI()); + } + + @Test + void shouldThrowExceptionWhenDOIWasNotFound() throws ShortDOIServiceException { + assertThrows(ShortDOIServiceException.class, () -> sut.getShortDOI(notExistingDoi)); + } +} diff --git a/src/test/java/org/jabref/model/entry/identifier/DOITest.java b/src/test/java/org/jabref/model/entry/identifier/DOITest.java index 4d45bb239cf..44af0ec70a2 100644 --- a/src/test/java/org/jabref/model/entry/identifier/DOITest.java +++ b/src/test/java/org/jabref/model/entry/identifier/DOITest.java @@ -18,22 +18,42 @@ public void acceptPlainDoi() { assertEquals("10.1126/sciadv.1500214", new DOI("10.1126/sciadv.1500214").getDOI()); } + @Test + public void acceptPlainShortDoi() { + assertEquals("10/gf4gqc", new DOI("10/gf4gqc").getDOI()); + } + @Test public void ignoreLeadingAndTrailingWhitespaces() { assertEquals("10.1006/jmbi.1998.2354", new DOI(" 10.1006/jmbi.1998.2354 ").getDOI()); } + @Test + public void ignoreLeadingAndTrailingWhitespacesInShortDoi() { + assertEquals("10/gf4gqc", new DOI(" 10/gf4gqc ").getDOI()); + } + @Test public void rejectEmbeddedDoi() { assertThrows(IllegalArgumentException.class, () -> new DOI("other stuff 10.1006/jmbi.1998.2354 end")); } + @Test + public void rejectEmbeddedShortDoi() { + assertThrows(IllegalArgumentException.class, () -> new DOI("other stuff 10/gf4gqc end")); + } + @Test public void rejectInvalidDirectoryIndicator() { // wrong directory indicator assertThrows(IllegalArgumentException.class, () -> new DOI("12.1006/jmbi.1998.2354 end")); } + @Test + public void rejectInvalidDirectoryIndicatorInShortDoi() { + assertThrows(IllegalArgumentException.class, () -> new DOI("20/abcd")); + } + @Test public void rejectInvalidDoiUri() { assertThrows(IllegalArgumentException.class, () -> new DOI("https://thisisnouri")); @@ -45,12 +65,22 @@ public void rejectMissingDivider() { assertThrows(IllegalArgumentException.class, () -> new DOI("10.1006jmbi.1998.2354 end")); } + @Test + public void rejectMissingDividerInShortDoi() { + assertThrows(IllegalArgumentException.class, () -> new DOI("10gf4gqc end")); + } + @Test public void acceptDoiPrefix() { // Doi prefix assertEquals("10.1006/jmbi.1998.2354", new DOI("doi:10.1006/jmbi.1998.2354").getDOI()); } + @Test + public void acceptDoiPrefixInShortDoi() { + assertEquals("10/gf4gqc", new DOI("doi:10/gf4gqc").getDOI()); + } + @Test public void acceptURNPrefix() { assertEquals("10.123/456", new DOI("urn:10.123/456").getDOI()); @@ -60,6 +90,15 @@ public void acceptURNPrefix() { assertEquals("10.123:456ABC/zyz", new DOI("http://doi.org/urn:doi:10.123:456ABC%2Fzyz").getDOI()); } + @Test + public void acceptURNPrefixInShortDoi() { + assertEquals("10/gf4gqc", new DOI("urn:10/gf4gqc").getDOI()); + assertEquals("10/gf4gqc", new DOI("urn:doi:10/gf4gqc").getDOI()); + assertEquals("10/gf4gqc", new DOI("http://doi.org/urn:doi:10/gf4gqc").getDOI()); + // : is also allowed as divider, will be replaced by RESOLVER + assertEquals("10:gf4gqc", new DOI("http://doi.org/urn:doi:10:gf4gqc").getDOI()); + } + @Test public void acceptURLDoi() { // http @@ -87,6 +126,27 @@ public void acceptURLDoi() { new DOI("http://doi.ieeecomputersociety.org/10.1109/MIC.2012.43").getDOI()); } + @Test + public void acceptURLShortDoi() { + // http + assertEquals("10/gf4gqc", new DOI("http://doi.org/10/gf4gqc").getDOI()); + // https + assertEquals("10/gf4gqc", new DOI("https://doi.org/10/gf4gqc").getDOI()); + // https with % divider + assertEquals("10/gf4gqc", new DOI("https://dx.doi.org/10%2Fgf4gqc").getDOI()); + // other domains + assertEquals("10/gf4gqc", new DOI("http://doi.acm.org/10/gf4gqc").getDOI()); + assertEquals("10/gf4gqc", new DOI("http://doi.acm.net/10/gf4gqc").getDOI()); + assertEquals("10/gf4gqc", new DOI("http://doi.acm.com/10/gf4gqc").getDOI()); + assertEquals("10/gf4gqc", new DOI("http://doi.acm.de/10/gf4gqc").getDOI()); + assertEquals("10/gf4gqc", new DOI("http://dx.doi.org/10/gf4gqc").getDOI()); + assertEquals("10/gf4gqc", new DOI("http://dx.doi.net/10/gf4gqc").getDOI()); + assertEquals("10/gf4gqc", new DOI("http://dx.doi.com/10/gf4gqc").getDOI()); + assertEquals("10/gf4gqc", new DOI("http://dx.doi.de/10/gf4gqc").getDOI()); + assertEquals("10/gf4gqc", new DOI("http://dx.doi.org/10/gf4gqc").getDOI()); + assertEquals("10/gf4gqc", new DOI("http://doi.ieeecomputersociety.org/10/gf4gqc").getDOI()); + } + @Test public void correctlyDecodeHttpDOIs() { // See http://www.doi.org/doi_handbook/2_Numbering.html#2.5.2.4 @@ -133,12 +193,22 @@ public void constructCorrectURLForDoi() { new DOI("doi:10.1109/VLHCC.2004.20").getURIAsASCIIString()); } + @Test + public void constructCorrectURLForShortDoi() { + assertEquals("https://doi.org/10/gf4gqc", new DOI("10/gf4gqc").getURIAsASCIIString()); + } + @Test public void findDoiInsideArbitraryText() { assertEquals("10.1006/jmbi.1998.2354", DOI.findInText("other stuff 10.1006/jmbi.1998.2354 end").get().getDOI()); } + @Test + public void findShortDoiInsideArbitraryText() { + assertEquals("10/gf4gqc", DOI.findInText("other stuff 10/gf4gqc end").get().getDOI()); + } + @Test public void noDOIFoundInsideArbitraryText() { assertEquals(Optional.empty(), DOI.findInText("text without 28282 a doi")); @@ -149,4 +219,20 @@ public void parseDOIWithWhiteSpace() { String doiWithSpace = "https : / / doi.org / 10 .1109 /V LHCC.20 04.20"; assertEquals("https://doi.org/10.1109/VLHCC.2004.20", DOI.parse(doiWithSpace).get().getURIAsASCIIString()); } + + @Test + public void parseShortDOIWithWhiteSpace() { + String shortDoiWithSpace = "https : / / doi.org / 10 / gf4gqc"; + assertEquals("https://doi.org/10/gf4gqc", DOI.parse(shortDoiWithSpace).get().getURIAsASCIIString()); + } + + @Test + public void isShortDoiShouldReturnTrueWhenItIsShortDoi() { + assertEquals(true, new DOI("10/abcde").isShortDoi()); + } + + @Test + public void isShortDoiShouldReturnFalseWhenItIsDoi() { + assertEquals(false, new DOI("10.1006/jmbi.1998.2354").isShortDoi()); + } }