Skip to content

Commit

Permalink
Fixes #8787 Decode doi contains masked characters (#8812)
Browse files Browse the repository at this point in the history
Co-authored-by: Christoph <siedlerkiller@gmail.com>
  • Loading branch information
fly-ing-fish and Siedlerchr authored May 23, 2022
1 parent 4572ab5 commit 2d680ab
Show file tree
Hide file tree
Showing 6 changed files with 101 additions and 7 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/automerge.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ jobs:
with:
github-token: "${{ secrets.GITHUB_TOKEN }}"
- name: Merge pull requests
uses: pascalgn/automerge-action@v0.15.3
uses: pascalgn/automerge-action@v0.15.2
if: steps.waitforstatuschecks.outputs.status == 'success'
env:
MERGE_METHOD: "merge"
Expand Down
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ Note that this project **does not** adhere to [Semantic Versioning](http://semve
- We fixed the unnecessary horizontal scroll bar in group panel [#8467](https://github.com/JabRef/jabref/issues/8467)
- We fixed an issue where the notification bar message, icon and actions appeared to be invisible. [#8761](https://github.com/JabRef/jabref/issues/8761)
- We fixed an issue where deprecated fields tab is shown when the fields don't contain any values. [#8396](https://github.com/JabRef/jabref/issues/8396)
- We fixed an issue where an exxception for DOI search occured when the DOI contained urlencoded characters. [#8787](https://github.com/JabRef/jabref/issues/8787)
- We fixed an issue which allow us to select and open identifiers from a popup list in the maintable [#8758](https://github.com/JabRef/jabref/issues/8758), [8802](https://github.com/JabRef/jabref/issues/8802)
- We fixed an issue where the escape button had no functionality within the "Filter groups" textfield. [koppor#562](https://github.com/koppor/jabref/issues/562)
- We fixed an issue where right clicking a group and choose "remove selected entries from this group" leads to error when Bibtex source tab is selected. [#8012](https://github.com/JabRef/jabref/issues/8012)
Expand Down
12 changes: 11 additions & 1 deletion src/main/java/org/jabref/logic/cleanup/DoiCleanup.java
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
package org.jabref.logic.cleanup;

import java.io.UnsupportedEncodingException;
import java.net.URLDecoder;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
Expand Down Expand Up @@ -31,6 +33,14 @@ public List<FieldChange> cleanup(BibEntry entry) {
if (entry.hasField(StandardField.DOI)) {
String doiFieldValue = entry.getField(StandardField.DOI).orElse(null);

String decodeDoiFieldValue = "";
try {
decodeDoiFieldValue = URLDecoder.decode(doiFieldValue, "UTF-8");
} catch (UnsupportedEncodingException e) {
decodeDoiFieldValue = doiFieldValue;
}
doiFieldValue = decodeDoiFieldValue;

Optional<DOI> doi = DOI.parse(doiFieldValue);

if (doi.isPresent()) {
Expand All @@ -45,7 +55,7 @@ public List<FieldChange> cleanup(BibEntry entry) {
// Doi field seems to contain Doi -> cleanup note, url, ee field
for (Field field : FIELDS) {
entry.getField(field).flatMap(DOI::parse)
.ifPresent(unused -> removeFieldValue(entry, field, changes));
.ifPresent(unused -> removeFieldValue(entry, field, changes));
}
}
} else {
Expand Down
16 changes: 14 additions & 2 deletions src/main/java/org/jabref/model/entry/identifier/DOI.java
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,16 @@

import java.net.URI;
import java.net.URISyntaxException;
import java.net.URLDecoder;
import java.nio.charset.StandardCharsets;
import java.util.Locale;
import java.util.Objects;
import java.util.Optional;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.jabref.architecture.AllowedToUseLogic;
import org.jabref.logic.layout.format.LatexToUnicodeFormatter;
import org.jabref.model.entry.field.Field;
import org.jabref.model.entry.field.StandardField;

Expand All @@ -17,6 +21,7 @@
/**
* Class for working with <a href="https://en.wikipedia.org/wiki/Digital_object_identifier">Digital object identifiers (DOIs)</a> and <a href="http://shortdoi.org">Short DOIs</a>
*/
@AllowedToUseLogic("because we want to have this class 'smart' an be able to parse obscure DOIs, too. For this, we need the LatexToUnicodeformatter.")
public class DOI implements Identifier {

public static final URI AGENCY_RESOLVER = URI.create("https://doi.org/doiRA");
Expand Down Expand Up @@ -93,8 +98,8 @@ public class DOI implements Identifier {
// See https://stackoverflow.com/questions/3203190/regex-any-ascii-character for the regexp that includes ASCII characters only
// Another reference for regexp for ASCII characters: https://howtodoinjava.com/java/regex/java-clean-ascii-text-non-printable-chars/
private static final String CHARS_TO_REMOVE = "[\\s+" // remove white space characters, i.e, \t, \n, \x0B, \f, \r . + is a greedy quantifier
+ "[^\\x00-\\x7F]" // strips off all non-ASCII characters
+ "]";
+ "[^\\x00-\\x7F]" // strips off all non-ASCII characters
+ "]";

// DOI
private final String doi;
Expand Down Expand Up @@ -159,9 +164,16 @@ public DOI(String doi) {
*/
public static Optional<DOI> parse(String doi) {
try {
LatexToUnicodeFormatter formatter = new LatexToUnicodeFormatter();
String cleanedDOI = doi;
cleanedDOI = URLDecoder.decode(cleanedDOI, StandardCharsets.UTF_8);
cleanedDOI = formatter.format(cleanedDOI);
cleanedDOI = cleanedDOI.replaceAll(CHARS_TO_REMOVE, "");

if (cleanedDOI.startsWith("_") && cleanedDOI.endsWith("_")) {
cleanedDOI = cleanedDOI.substring(1, cleanedDOI.length() - 1);
}

return Optional.of(new DOI(cleanedDOI));
} catch (IllegalArgumentException | NullPointerException e) {
return Optional.empty();
Expand Down
11 changes: 8 additions & 3 deletions src/test/java/org/jabref/logic/cleanup/DoiCleanupTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,12 @@ private static Stream<Arguments> provideDoiForAllLowers() {

// cleanup just ee field with URL
Arguments.of(doiResult, new BibEntry()
.withField(unknownField, "https://doi.org/10.1145/2594455"))
);
}
.withField(unknownField, "https://doi.org/10.1145/2594455")),

// cleanup of url encoded chars
Arguments.of(new BibEntry()
.withField(StandardField.DOI, "10.18726/2018_3"),
new BibEntry()
.withField(unknownField, "https://doi.org/10.18726/2018%7B%5Ctextunderscore%7D3")));
}
}
66 changes: 66 additions & 0 deletions src/test/java/org/jabref/logic/cleanup/DoiDecodeCleanupTest.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
package org.jabref.logic.cleanup;

import java.util.stream.Stream;

import org.jabref.model.entry.BibEntry;
import org.jabref.model.entry.field.StandardField;
import org.jabref.model.entry.field.UnknownField;

import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.Arguments;
import org.junit.jupiter.params.provider.MethodSource;

import static org.junit.jupiter.api.Assertions.assertEquals;

public class DoiDecodeCleanupTest {

@ParameterizedTest
@MethodSource("provideDoiForAllLowers")
public void testChangeDoi(BibEntry expected, BibEntry doiInfoField) {
DoiCleanup cleanUp = new DoiCleanup();
cleanUp.cleanup(doiInfoField);

assertEquals(expected, doiInfoField);
}

private static Stream<Arguments> provideDoiForAllLowers() {
UnknownField unknownField = new UnknownField("ee");
BibEntry doiResult = new BibEntry().withField(StandardField.DOI, "10.18726/2018_3");

return Stream.of(
// cleanup for Doi field only
Arguments.of(doiResult, new BibEntry().withField(
StandardField.URL, "https://doi.org/10.18726/2018%7B%5Ctextunderscore%7D3")),

// cleanup with Doi and URL to all entries
Arguments.of(doiResult, new BibEntry()
.withField(StandardField.DOI, "10.18726/2018%7B%5Ctextunderscore%7D3")
.withField(StandardField.URL, "https://doi.org/10.18726/2018%7B%5Ctextunderscore%7D3")
.withField(StandardField.NOTE, "https://doi.org/10.18726/2018%7B%5Ctextunderscore%7D3")
.withField(unknownField, "https://doi.org/10.18726/2018%7B%5Ctextunderscore%7D3")),

// cleanup with Doi and no URL to entries
Arguments.of(
new BibEntry()
.withField(StandardField.DOI, "10.18726/2018_3")
.withField(StandardField.NOTE, "This is a random note to this Doi")
.withField(unknownField, "This is a random ee field for this Doi"),
new BibEntry()
.withField(StandardField.DOI, "10.18726/2018_3")
.withField(StandardField.NOTE, "This is a random note to this Doi")
.withField(unknownField, "This is a random ee field for this Doi")),

// cleanup with spaced Doi
Arguments.of(doiResult, new BibEntry()
.withField(StandardField.DOI, "10.18726/2018%7B%5Ctextunderscore%7D3")),

// cleanup just Note field with URL
Arguments.of(doiResult, new BibEntry()
.withField(StandardField.NOTE, "https://doi.org/10.18726/2018%7B%5Ctextunderscore%7D3")),

// cleanup just ee field with URL
Arguments.of(doiResult, new BibEntry()
.withField(unknownField, "https://doi.org/10.18726/2018%7B%5Ctextunderscore%7D3"))
);
}
}

0 comments on commit 2d680ab

Please sign in to comment.