Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add shorten DOI field formatter #5276

Merged
merged 1 commit into from
Sep 8, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@ We refer to [GitHub issues](https://github.com/JabRef/jabref/issues) by using `#

### Changed

- We added a short DOI field formatter which shortens DOI to more human readable form. [koppor#343](https://github.com/koppor/jabref/issues/343)

### Fixed

### Removed
Expand Down
5 changes: 3 additions & 2 deletions src/main/java/org/jabref/logic/formatter/Formatters.java
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
import org.jabref.logic.formatter.bibtexfields.OrdinalsToSuperscriptFormatter;
import org.jabref.logic.formatter.bibtexfields.RegexFormatter;
import org.jabref.logic.formatter.bibtexfields.RemoveBracesFormatter;
import org.jabref.logic.formatter.bibtexfields.ShortenDOIFormatter;
import org.jabref.logic.formatter.bibtexfields.UnicodeToLatexFormatter;
import org.jabref.logic.formatter.bibtexfields.UnitsToLatexFormatter;
import org.jabref.logic.formatter.casechanger.CapitalizeFormatter;
Expand Down Expand Up @@ -67,7 +68,8 @@ public static List<Formatter> getOthers() {
new OrdinalsToSuperscriptFormatter(),
new RemoveBracesFormatter(),
new UnitsToLatexFormatter(),
new EscapeUnderscoresFormatter()
new EscapeUnderscoresFormatter(),
new ShortenDOIFormatter()
);
}

Expand Down Expand Up @@ -102,5 +104,4 @@ public static Optional<Formatter> getFormatterForModifier(String modifier) {
return getAll().stream().filter(f -> f.getKey().equals(modifier)).findAny();
}
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
package org.jabref.logic.formatter.bibtexfields;

import java.util.Objects;
import java.util.Optional;

import org.jabref.logic.importer.util.ShortDOIService;
import org.jabref.logic.importer.util.ShortDOIServiceException;
import org.jabref.logic.l10n.Localization;
import org.jabref.model.cleanup.Formatter;
import org.jabref.model.entry.identifier.DOI;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class ShortenDOIFormatter extends Formatter {

private static final Logger LOGGER = LoggerFactory.getLogger(ShortenDOIFormatter.class);

@Override
public String getName() {
return Localization.lang("Shorten DOI");
}

@Override
public String getKey() {
return "short_doi";
}

@Override
public String format(String value) {
Objects.requireNonNull(value);

ShortDOIService shortDOIService = new ShortDOIService();

Optional<DOI> doi = Optional.empty();

try {
doi = DOI.parse(value);

if (doi.isPresent()) {
return shortDOIService.getShortDOI(doi.get()).getDOI();
}
} catch (ShortDOIServiceException e) {
LOGGER.error(e.getMessage(), e);
}

return value;
}

@Override
public String getDescription() {
return Localization.lang("Shortens DOI to more human readable form.");
}

@Override
public String getExampleInput() {
return "10.1006/jmbi.1998.2354";
}
}
63 changes: 63 additions & 0 deletions src/main/java/org/jabref/logic/importer/util/ShortDOIService.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
package org.jabref.logic.importer.util;

import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URI;
import java.net.URISyntaxException;
import java.net.URL;

import org.jabref.logic.importer.ParseException;
import org.jabref.logic.net.URLDownload;
import org.jabref.model.entry.identifier.DOI;

import org.apache.http.client.utils.URIBuilder;
import org.json.JSONException;
import org.json.JSONObject;

/**
* Class for obtaining shortened DOI names.
*
* @see http://shortdoi.org
*/
public class ShortDOIService {

private static final String BASIC_URL = "http://shortdoi.org/";

/**
* Obtains shortened DOI name for given DOI
*
* @param doi DOI
* @return A shortened DOI name
*/
public DOI getShortDOI(DOI doi) throws ShortDOIServiceException {
JSONObject responseJSON = makeRequest(doi);
String shortDoi = responseJSON.getString("ShortDOI");

return new DOI(shortDoi);
}

private JSONObject makeRequest(DOI doi) throws ShortDOIServiceException {

URIBuilder uriBuilder = null;
URL url = null;

try {
uriBuilder = new URIBuilder(BASIC_URL);
uriBuilder.setPath(uriBuilder.getPath() + doi.getDOI());
uriBuilder.addParameter("format", "json");

URI uri = uriBuilder.build();
url = uri.toURL();
} catch (URISyntaxException | MalformedURLException e) {
throw new ShortDOIServiceException("Cannot get short DOI", e);
}

URLDownload urlDownload = new URLDownload(url);

try {
return JsonReader.toJsonObject(urlDownload.asInputStream());
} catch (ParseException | IOException | JSONException e) {
throw new ShortDOIServiceException("Cannot get short DOI", e);
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
package org.jabref.logic.importer.util;

import org.jabref.JabRefException;

public class ShortDOIServiceException extends JabRefException {
public ShortDOIServiceException(String message) {
super(message);
}

public ShortDOIServiceException(String message, Throwable cause) {
super(message, cause);
}

public ShortDOIServiceException(String message, String localizedMessage) {
super(message, localizedMessage);
}

public ShortDOIServiceException(String message, String localizedMessage, Throwable cause) {
super(message, localizedMessage, cause);
}

public ShortDOIServiceException(Throwable cause) {
super(cause);
}
}
95 changes: 72 additions & 23 deletions src/main/java/org/jabref/model/entry/identifier/DOI.java
Original file line number Diff line number Diff line change
Expand Up @@ -14,14 +14,15 @@
import org.slf4j.LoggerFactory;

/**
* Class for working with Digital object identifiers (DOIs)
* Class for working with Digital object identifiers (DOIs) and Short DOIs
*
* @see https://en.wikipedia.org/wiki/Digital_object_identifier
* @see http://shortdoi.org
*/
public class DOI implements Identifier {
private static final Logger LOGGER = LoggerFactory.getLogger(DOI.class);

// DOI resolver
// DOI/Short DOI resolver
private static final URI RESOLVER = URI.create("https://doi.org");
// Regex
// (see http://www.doi.org/doi_handbook/2_Numbering.html)
Expand All @@ -43,20 +44,46 @@ public class DOI implements Identifier {
+ "[/:]" // divider
+ "(?:[^\\s]+)" // suffix alphanumeric without space
+ ")"; // end group \1

// Regex (Short DOI)
private static final String SHORT_DOI_EXP = ""
+ "(?:urn:)?" // optional urn
+ "(?:doi:)?" // optional doi
+ "(" // begin group \1
+ "10" // directory indicator
+ "[/:%]" // divider
+ "[a-zA-Z0-9]+"
+ ")"; // end group \1
private static final String FIND_SHORT_DOI_EXP = ""
+ "(?:urn:)?" // optional urn
+ "(?:doi:)?" // optional doi
+ "(" // begin group \1
+ "10" // directory indicator
+ "[/:]" // divider
+ "[a-zA-Z0-9]+"
+ "(?:[^\\s]+)" // suffix alphanumeric without space
+ ")"; // end group \1

private static final String HTTP_EXP = "https?://[^\\s]+?" + DOI_EXP;
private static final String SHORT_DOI_HTTP_EXP = "https?://[^\\s]+?" + SHORT_DOI_EXP;
// Pattern
private static final Pattern EXACT_DOI_PATT = Pattern.compile("^(?:https?://[^\\s]+?)?" + DOI_EXP + "$", Pattern.CASE_INSENSITIVE);
private static final Pattern DOI_PATT = Pattern.compile("(?:https?://[^\\s]+?)?" + FIND_DOI_EXP, Pattern.CASE_INSENSITIVE);
// Pattern (short DOI)
private static final Pattern EXACT_SHORT_DOI_PATT = Pattern.compile("^(?:https?://[^\\s]+?)?" + SHORT_DOI_EXP, Pattern.CASE_INSENSITIVE);
private static final Pattern SHORT_DOI_PATT = Pattern.compile("(?:https?://[^\\s]+?)?" + FIND_SHORT_DOI_EXP, Pattern.CASE_INSENSITIVE);
// DOI
private final String doi;
// Short DOI
private boolean isShortDoi;

/**
* Creates a DOI from various schemes including URL, URN, and plain DOIs.
* Creates a DOI from various schemes including URL, URN, and plain DOIs/Short DOIs.
*
* @param doi the DOI string
* @throws NullPointerException if DOI is null
* @throws IllegalArgumentException if doi does not include a valid DOI
* @param doi the DOI/Short DOI string
* @return an instance of the DOI class
* @throws NullPointerException if DOI/Short DOI is null
* @throws IllegalArgumentException if doi does not include a valid DOI/Short DOI
*/
public DOI(String doi) {
Objects.requireNonNull(doi);
Expand All @@ -65,33 +92,40 @@ public DOI(String doi) {
String trimmedDoi = doi.trim();

// HTTP URL decoding
if (doi.matches(HTTP_EXP)) {
if (doi.matches(HTTP_EXP) || doi.matches(SHORT_DOI_HTTP_EXP)) {
try {
// decodes path segment
URI url = new URI(trimmedDoi);
trimmedDoi = url.getScheme() + "://" + url.getHost() + url.getPath();
} catch (URISyntaxException e) {
throw new IllegalArgumentException(doi + " is not a valid HTTP DOI.");
throw new IllegalArgumentException(doi + " is not a valid HTTP DOI/Short DOI.");
}
}

// Extract DOI
// Extract DOI/Short DOI
Matcher matcher = EXACT_DOI_PATT.matcher(trimmedDoi);
if (matcher.find()) {
// match only group \1
this.doi = matcher.group(1);
} else {
throw new IllegalArgumentException(trimmedDoi + " is not a valid DOI.");
// Short DOI
Matcher shortDoiMatcher = EXACT_SHORT_DOI_PATT.matcher(trimmedDoi);
if (shortDoiMatcher.find()) {
this.doi = shortDoiMatcher.group(1);
isShortDoi = true;
} else {
throw new IllegalArgumentException(trimmedDoi + " is not a valid DOI/Short DOI.");
}
}
}

/**
* Creates an Optional<DOI> from various schemes including URL, URN, and plain DOIs.
*
* Useful for suppressing the <c>IllegalArgumentException</c> of the Constructor
* and checking for Optional.isPresent() instead.
* Useful for suppressing the <c>IllegalArgumentException</c> of the Constructor and checking for
* Optional.isPresent() instead.
*
* @param doi the DOI string
* @param doi the DOI/Short DOI string
* @return an Optional containing the DOI or an empty Optional
*/
public static Optional<DOI> parse(String doi) {
Expand All @@ -105,19 +139,19 @@ public static Optional<DOI> parse(String doi) {
}

/**
* Determines whether a DOI is valid or not
* Determines whether a DOI/Short DOI is valid or not
*
* @param doi the DOI string
* @param doi the DOI/Short DOI string
* @return true if DOI is valid, false otherwise
*/
public static boolean isValid(String doi) {
return parse(doi).isPresent();
}

/**
* Tries to find a DOI inside the given text.
* Tries to find a DOI/Short DOI inside the given text.
*
* @param text the Text which might contain a DOI
* @param text the Text which might contain a DOI/Short DOI
* @return an Optional containing the DOI or an empty Optional
*/
public static Optional<DOI> findInText(String text) {
Expand All @@ -128,6 +162,12 @@ public static Optional<DOI> findInText(String text) {
// match only group \1
result = Optional.of(new DOI(matcher.group(1)));
}

matcher = SHORT_DOI_PATT.matcher(text);
if (matcher.find()) {
result = Optional.of(new DOI(matcher.group(1)));
}

return result;
}

Expand All @@ -139,18 +179,27 @@ public String toString() {
}

/**
* Return the plain DOI
* Return the plain DOI/Short DOI
*
* @return the plain DOI value.
* @return the plain DOI/Short DOI value.
*/
public String getDOI() {
return doi;
}

/**
* Return a URI presentation for the DOI
* Determines whether DOI is short DOI or not
*
* @return true if DOI is short DOI, false otherwise
*/
public boolean isShortDoi() {
return isShortDoi;
}

/**
* Return a URI presentation for the DOI/Short DOI
*
* @return an encoded URI representation of the DOI
* @return an encoded URI representation of the DOI/Short DOI
*/
@Override
public Optional<URI> getExternalURI() {
Expand All @@ -165,9 +214,9 @@ public Optional<URI> getExternalURI() {
}

/**
* Return an ASCII URL presentation for the DOI
* Return an ASCII URL presentation for the DOI/Short DOI
*
* @return an encoded URL representation of the DOI
* @return an encoded URL representation of the DOI/Short DOI
*/
public String getURIAsASCIIString() {
return getExternalURI().map(URI::toASCIIString).orElse("");
Expand Down
2 changes: 2 additions & 0 deletions src/main/resources/l10n/JabRef_en.properties
Original file line number Diff line number Diff line change
Expand Up @@ -1557,6 +1557,8 @@ Add\ enclosing\ braces=Add enclosing braces
Add\ braces\ encapsulating\ the\ complete\ field\ content.=Add braces encapsulating the complete field content.
Remove\ enclosing\ braces=Remove enclosing braces
Removes\ braces\ encapsulating\ the\ complete\ field\ content.=Removes braces encapsulating the complete field content.
Shorten\ DOI=Shorten DOI
Shortens\ DOI\ to\ more\ human\ readable\ form.=Shortens DOI to more human readable form.
Sentence\ case=Sentence case
Shortens\ lists\ of\ persons\ if\ there\ are\ more\ than\ 2\ persons\ to\ "et\ al.".=Shortens lists of persons if there are more than 2 persons to "et al.".
Title\ case=Title case
Expand Down
Loading