From 5f7fe7e79922010d8ebbed998d87429006d1a08c Mon Sep 17 00:00:00 2001 From: Oliver Kopp Date: Sat, 9 Nov 2019 21:14:14 +0100 Subject: [PATCH 1/7] Clean URLs in DBLPFetcher - Style RemoveLatexCommandsFormatter - Add tests for RemoveLatexCommandsFormatter - Add support for removing single and multiple whitespaces after a command - Split tests in RemoveBracketsTest - Fix casing in CleanupUrlFormatter (to match style and current name of test class CleanUpFormatterTest) - Fix casing in some ...Url.. method names - Format CleanupUrlFormatter --- .../jabref/gui/fieldeditors/UrlEditor.java | 9 ++- .../fieldeditors/contextmenu/EditorMenus.java | 6 +- .../jabref/logic/formatter/Formatters.java | 4 +- ...ormatter.java => CleanupUrlFormatter.java} | 18 +++--- .../bibtexfields/LatexCleanupFormatter.java | 3 + .../logic/importer/fetcher/DBLPFetcher.java | 11 ++-- .../format/RemoveLatexCommandsFormatter.java | 53 +++++++++------- .../bibtexfields/CleanupUrlFormatterTest.java | 9 ++- .../layout/format/RemoveBracketsTest.java | 14 ++++- .../RemoveLatexCommandsFormatterTest.java | 61 +++++++++++++++++++ 10 files changed, 139 insertions(+), 49 deletions(-) rename src/main/java/org/jabref/logic/formatter/bibtexfields/{CleanupURLFormatter.java => CleanupUrlFormatter.java} (84%) create mode 100644 src/test/java/org/jabref/logic/layout/format/RemoveLatexCommandsFormatterTest.java diff --git a/src/main/java/org/jabref/gui/fieldeditors/UrlEditor.java b/src/main/java/org/jabref/gui/fieldeditors/UrlEditor.java index 08d22698a8c..270770a2ea1 100644 --- a/src/main/java/org/jabref/gui/fieldeditors/UrlEditor.java +++ b/src/main/java/org/jabref/gui/fieldeditors/UrlEditor.java @@ -12,7 +12,7 @@ import org.jabref.gui.DialogService; import org.jabref.gui.autocompleter.AutoCompleteSuggestionProvider; import org.jabref.gui.fieldeditors.contextmenu.EditorMenus; -import org.jabref.logic.formatter.bibtexfields.CleanupURLFormatter; +import org.jabref.logic.formatter.bibtexfields.CleanupUrlFormatter; import org.jabref.logic.formatter.bibtexfields.TrimWhitespaceFormatter; import org.jabref.logic.integrity.FieldCheckers; import org.jabref.model.entry.BibEntry; @@ -34,12 +34,11 @@ public UrlEditor(Field field, DialogService dialogService, AutoCompleteSuggestio .load(); textArea.textProperty().bindBidirectional(viewModel.textProperty()); - Supplier> contextMenuSupplier = EditorMenus.getCleanupURLMenu(textArea); + Supplier> contextMenuSupplier = EditorMenus.getCleanupUrlMenu(textArea); textArea.addToContextMenu(contextMenuSupplier); - // init paste handler for URLEditor to format pasted url link in textArea - textArea.setPasteActionHandler(() -> textArea.setText(new CleanupURLFormatter().format(new TrimWhitespaceFormatter().format(textArea.getText())))); - + // init paste handler for UrlEditor to format pasted url link in textArea + textArea.setPasteActionHandler(() -> textArea.setText(new CleanupUrlFormatter().format(new TrimWhitespaceFormatter().format(textArea.getText())))); new EditorValidator(preferences).configureValidation(viewModel.getFieldValidator().getValidationStatus(), textArea); } diff --git a/src/main/java/org/jabref/gui/fieldeditors/contextmenu/EditorMenus.java b/src/main/java/org/jabref/gui/fieldeditors/contextmenu/EditorMenus.java index abdb9700909..b6b8df62bc3 100644 --- a/src/main/java/org/jabref/gui/fieldeditors/contextmenu/EditorMenus.java +++ b/src/main/java/org/jabref/gui/fieldeditors/contextmenu/EditorMenus.java @@ -16,7 +16,7 @@ import org.jabref.gui.actions.ActionFactory; import org.jabref.gui.actions.StandardActions; import org.jabref.gui.edit.CopyDoiUrlAction; -import org.jabref.logic.formatter.bibtexfields.CleanupURLFormatter; +import org.jabref.logic.formatter.bibtexfields.CleanupUrlFormatter; import org.jabref.logic.formatter.bibtexfields.NormalizeNamesFormatter; import org.jabref.logic.l10n.Localization; @@ -91,11 +91,11 @@ public static Supplier> getDOIMenu(TextArea textArea) { * @param textArea text-area that this menu will be connected to * @return menu containing items of the default menu and an item to cleanup a URL */ - public static Supplier> getCleanupURLMenu(TextArea textArea) { + public static Supplier> getCleanupUrlMenu(TextArea textArea) { return () -> { CustomMenuItem cleanupURL = new CustomMenuItem(new Label(Localization.lang("Cleanup URL link"))); cleanupURL.setDisable(textArea.textProperty().isEmpty().get()); - cleanupURL.setOnAction(event -> textArea.setText(new CleanupURLFormatter().format(textArea.getText()))); + cleanupURL.setOnAction(event -> textArea.setText(new CleanupUrlFormatter().format(textArea.getText()))); List menuItems = new ArrayList<>(); menuItems.add(cleanupURL); diff --git a/src/main/java/org/jabref/logic/formatter/Formatters.java b/src/main/java/org/jabref/logic/formatter/Formatters.java index 7aea0a024a6..55a91f7f86c 100644 --- a/src/main/java/org/jabref/logic/formatter/Formatters.java +++ b/src/main/java/org/jabref/logic/formatter/Formatters.java @@ -6,7 +6,7 @@ import java.util.Objects; import java.util.Optional; -import org.jabref.logic.formatter.bibtexfields.CleanupURLFormatter; +import org.jabref.logic.formatter.bibtexfields.CleanupUrlFormatter; import org.jabref.logic.formatter.bibtexfields.ClearFormatter; import org.jabref.logic.formatter.bibtexfields.EscapeUnderscoresFormatter; import org.jabref.logic.formatter.bibtexfields.HtmlToLatexFormatter; @@ -58,7 +58,7 @@ public static List getCaseChangers() { public static List getOthers() { return Arrays.asList( new ClearFormatter(), - new CleanupURLFormatter(), + new CleanupUrlFormatter(), new LatexCleanupFormatter(), new MinifyNameListFormatter(), new NormalizeDateFormatter(), diff --git a/src/main/java/org/jabref/logic/formatter/bibtexfields/CleanupURLFormatter.java b/src/main/java/org/jabref/logic/formatter/bibtexfields/CleanupUrlFormatter.java similarity index 84% rename from src/main/java/org/jabref/logic/formatter/bibtexfields/CleanupURLFormatter.java rename to src/main/java/org/jabref/logic/formatter/bibtexfields/CleanupUrlFormatter.java index e14b717dd64..62932ac31c2 100644 --- a/src/main/java/org/jabref/logic/formatter/bibtexfields/CleanupURLFormatter.java +++ b/src/main/java/org/jabref/logic/formatter/bibtexfields/CleanupUrlFormatter.java @@ -7,6 +7,7 @@ import java.util.regex.Pattern; import org.jabref.logic.l10n.Localization; +import org.jabref.logic.layout.format.RemoveLatexCommandsFormatter; import org.jabref.model.cleanup.Formatter; import org.apache.commons.logging.Log; @@ -15,9 +16,9 @@ /** * Cleanup URL link */ -public class CleanupURLFormatter extends Formatter { +public class CleanupUrlFormatter extends Formatter { - private static final Log LOGGER = LogFactory.getLog(CleanupURLFormatter.class); + private static final Log LOGGER = LogFactory.getLog(CleanupUrlFormatter.class); // This regexp find "url=" or "to=" parameter in full link and get text after them private static final Pattern PATTERN_URL = Pattern.compile("(?:url|to)=([^&]*)"); @@ -38,16 +39,16 @@ public String format(String value) { Matcher matcher = PATTERN_URL.matcher(value); if (matcher.find()) { - toDecode = matcher.group(1); - + toDecode = matcher.group(1); } try { decodedLink = URLDecoder.decode(toDecode, StandardCharsets.UTF_8.name()); - } - catch (UnsupportedEncodingException e) { + } catch (UnsupportedEncodingException e) { LOGGER.warn("Used unsupported character encoding", e); } - return decodedLink; + + String result = new RemoveLatexCommandsFormatter().format(decodedLink); + return result; } @Override @@ -61,6 +62,5 @@ public String getExampleInput() { "rja&uact=8&ved=0ahUKEwjg3ZrB_ZPXAhVGuhoKHYdOBOg4ChAWCCYwAA&url=" + "http%3A%2F%2Fwww.focus.de%2Fgesundheit%2Fratgeber%2Fherz%2Ftest%2" + "Flebenserwartung-werden-sie-100-jahre-alt_aid_363828.html" + "&usg=AOvVaw1G6m2jf-pTHYkXceii4hXU"; - } - + } } diff --git a/src/main/java/org/jabref/logic/formatter/bibtexfields/LatexCleanupFormatter.java b/src/main/java/org/jabref/logic/formatter/bibtexfields/LatexCleanupFormatter.java index 032914146b6..c9bf179d201 100644 --- a/src/main/java/org/jabref/logic/formatter/bibtexfields/LatexCleanupFormatter.java +++ b/src/main/java/org/jabref/logic/formatter/bibtexfields/LatexCleanupFormatter.java @@ -5,6 +5,9 @@ import org.jabref.logic.l10n.Localization; import org.jabref.model.cleanup.Formatter; +/** + * Simplifies LaTeX syntax. {@see org.jabref.logic.layout.format.RemoveLatexCommandsFormatter} for a formatter removing LaTeX commands completely. + */ public class LatexCleanupFormatter extends Formatter { private static final Pattern REMOVE_REDUNDANT = Pattern diff --git a/src/main/java/org/jabref/logic/importer/fetcher/DBLPFetcher.java b/src/main/java/org/jabref/logic/importer/fetcher/DBLPFetcher.java index d7515a638c4..76942fea1b7 100644 --- a/src/main/java/org/jabref/logic/importer/fetcher/DBLPFetcher.java +++ b/src/main/java/org/jabref/logic/importer/fetcher/DBLPFetcher.java @@ -7,6 +7,7 @@ import java.util.Optional; import org.jabref.logic.cleanup.DoiCleanup; +import org.jabref.logic.formatter.bibtexfields.CleanupUrlFormatter; import org.jabref.logic.formatter.bibtexfields.ClearFormatter; import org.jabref.logic.help.HelpFile; import org.jabref.logic.importer.FetcherException; @@ -17,6 +18,7 @@ import org.jabref.model.cleanup.FieldFormatterCleanup; import org.jabref.model.entry.BibEntry; import org.jabref.model.entry.field.InternalField; +import org.jabref.model.entry.field.StandardField; import org.jabref.model.util.DummyFileUpdateMonitor; import org.apache.http.client.utils.URIBuilder; @@ -57,13 +59,13 @@ public Parser getParser() { @Override public void doPostCleanup(BibEntry entry) { DoiCleanup doiCleaner = new DoiCleanup(); - - FieldFormatterCleanup clearTimestampFormatter = new FieldFormatterCleanup(InternalField.TIMESTAMP, - new ClearFormatter()); - doiCleaner.cleanup(entry); + + FieldFormatterCleanup clearTimestampFormatter = new FieldFormatterCleanup(InternalField.TIMESTAMP, new ClearFormatter()); clearTimestampFormatter.cleanup(entry); + FieldFormatterCleanup cleanUpUrlFormatter = new FieldFormatterCleanup(StandardField.URL, new CleanupUrlFormatter()); + cleanUpUrlFormatter.cleanup(entry); } @Override @@ -75,5 +77,4 @@ public String getName() { public Optional getHelpPage() { return Optional.of(HelpFile.FETCHER_DBLP); } - } diff --git a/src/main/java/org/jabref/logic/layout/format/RemoveLatexCommandsFormatter.java b/src/main/java/org/jabref/logic/layout/format/RemoveLatexCommandsFormatter.java index a538bdaf13e..e9681c4d698 100644 --- a/src/main/java/org/jabref/logic/layout/format/RemoveLatexCommandsFormatter.java +++ b/src/main/java/org/jabref/logic/layout/format/RemoveLatexCommandsFormatter.java @@ -7,51 +7,61 @@ public class RemoveLatexCommandsFormatter implements LayoutFormatter { @Override public String format(String field) { - StringBuilder sb = new StringBuilder(""); + StringBuilder cleanedField = new StringBuilder(); StringBuilder currentCommand = null; - char c; + char currentCharacter; boolean escaped = false; boolean incommand = false; - int i; - for (i = 0; i < field.length(); i++) { - c = field.charAt(i); - if (escaped && (c == '\\')) { - sb.append('\\'); + int currentFieldPosition; + for (currentFieldPosition = 0; currentFieldPosition < field.length(); currentFieldPosition++) { + currentCharacter = field.charAt(currentFieldPosition); + if (escaped && (currentCharacter == '\\')) { + cleanedField.append('\\'); escaped = false; - } else if (c == '\\') { + // \\ --> first \ begins the command, second \ ends the command + // \latexommand\\ -> \latexcommand is the command, terminated by \, which begins a new command + incommand = false; + } else if (currentCharacter == '\\') { escaped = true; incommand = true; currentCommand = new StringBuilder(); - } else if (!incommand && ((c == '{') || (c == '}'))) { + } else if (!incommand && ((currentCharacter == '{') || (currentCharacter == '}'))) { // Swallow the brace. - } else if (Character.isLetter(c) || StringUtil.SPECIAL_COMMAND_CHARS.contains(String.valueOf(c))) { + } else if (Character.isLetter(currentCharacter) || StringUtil.SPECIAL_COMMAND_CHARS.contains(String.valueOf(currentCharacter))) { escaped = false; if (incommand) { - currentCommand.append(c); + currentCommand.append(currentCharacter); if ((currentCommand.length() == 1) && StringUtil.SPECIAL_COMMAND_CHARS.contains(currentCommand.toString())) { // This indicates that we are in a command of the type \^o or \~{n} incommand = false; escaped = false; - } } else { - sb.append(c); + cleanedField.append(currentCharacter); } - } else if (Character.isLetter(c)) { + } else if (Character.isLetter(currentCharacter)) { escaped = false; if (incommand) { // We are in a command, and should not keep the letter. - currentCommand.append(c); + currentCommand.append(currentCharacter); } else { - sb.append(c); + cleanedField.append(currentCharacter); } } else { - if (!incommand || (!Character.isWhitespace(c) && (c != '{'))) { - sb.append(c); + if (!incommand || (!Character.isWhitespace(currentCharacter) && (currentCharacter != '{'))) { + cleanedField.append(currentCharacter); } else { - if (c != '{') { - sb.append(c); + if (!Character.isWhitespace(currentCharacter) && (currentCharacter != '{')) { + // do not append the opening brace of a command parameter + // do not append the whitespace character + cleanedField.append(currentCharacter); + } + if (incommand) { + // eat up all whitespace characters + while ((currentFieldPosition + 1 < field.length() && Character.isWhitespace(field.charAt(currentFieldPosition + 1)))) { + currentFieldPosition++; + } } } incommand = false; @@ -59,7 +69,6 @@ public String format(String field) { } } - return sb.toString(); + return cleanedField.toString(); } - } diff --git a/src/test/java/org/jabref/logic/formatter/bibtexfields/CleanupUrlFormatterTest.java b/src/test/java/org/jabref/logic/formatter/bibtexfields/CleanupUrlFormatterTest.java index 46e4bea3508..c6a838aabae 100644 --- a/src/test/java/org/jabref/logic/formatter/bibtexfields/CleanupUrlFormatterTest.java +++ b/src/test/java/org/jabref/logic/formatter/bibtexfields/CleanupUrlFormatterTest.java @@ -10,11 +10,11 @@ */ class CleanupUrlFormatterTest { - private CleanupURLFormatter formatter; + private CleanupUrlFormatter formatter; @BeforeEach void setUp() { - formatter = new CleanupURLFormatter(); + formatter = new CleanupUrlFormatter(); } @Test @@ -29,6 +29,11 @@ void extractURLFormLink() { formatter.format("away.php?to=http%3A%2F%2Fwikipedia.org&a=snippet")); } + @Test + void latexCommandsRemoved() { + assertEquals("http://pi.informatik.uni-siegen.de/stt/36_2/./03_Technische_Beitraege/ZEUS2016/beitrag_2.pdf", formatter.format("http://pi.informatik.uni-siegen.de/stt/36\\_2/./03\\_Technische\\_Beitraege/ZEUS2016/beitrag\\_2.pdf")); + } + @Test void formatExample() { assertEquals("http://www.focus.de/" + diff --git a/src/test/java/org/jabref/logic/layout/format/RemoveBracketsTest.java b/src/test/java/org/jabref/logic/layout/format/RemoveBracketsTest.java index 48c904c40bc..018d5354786 100644 --- a/src/test/java/org/jabref/logic/layout/format/RemoveBracketsTest.java +++ b/src/test/java/org/jabref/logic/layout/format/RemoveBracketsTest.java @@ -16,10 +16,22 @@ public void setUp() { } @Test - public void testFormat() throws Exception { + public void bracePairCorrectlyRemoved() throws Exception { assertEquals("some text", formatter.format("{some text}")); + } + + @Test + public void singleOpeningBraceCorrectlyRemoved() throws Exception { assertEquals("some text", formatter.format("{some text")); + } + + @Test + public void singleClosingBraceCorrectlyRemoved() throws Exception { assertEquals("some text", formatter.format("some text}")); + } + + @Test + public void bracePairWithEscapedBackslashCorrectlyRemoved() throws Exception { assertEquals("\\some text\\", formatter.format("\\{some text\\}")); } } diff --git a/src/test/java/org/jabref/logic/layout/format/RemoveLatexCommandsFormatterTest.java b/src/test/java/org/jabref/logic/layout/format/RemoveLatexCommandsFormatterTest.java new file mode 100644 index 00000000000..9eab6df5985 --- /dev/null +++ b/src/test/java/org/jabref/logic/layout/format/RemoveLatexCommandsFormatterTest.java @@ -0,0 +1,61 @@ +package org.jabref.logic.layout.format; + +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +import static org.junit.jupiter.api.Assertions.assertEquals; + +class RemoveLatexCommandsFormatterTest { + + private RemoveLatexCommandsFormatter formatter; + + @BeforeEach + public void setUp() { + formatter = new RemoveLatexCommandsFormatter(); + } + + @Test + public void withoutLatexCommandsUnmodified() { + assertEquals("some text", formatter.format("some text")); + } + + @Test + public void singleCommandWiped() { + assertEquals("", formatter.format("\\sometext")); + } + + @Test + public void singleSpaceAfterCommandRemoved() { + assertEquals("text", formatter.format("\\some text")); + } + + @Test + public void multipleSpacesAfterCommandRemoved() { + assertEquals("text", formatter.format("\\some text")); + } + + @Test + public void escapedBackslashBecomesBackslash() { + assertEquals("\\", formatter.format("\\\\")); + } + + @Test + public void escapedBackslashFollowedByTextBecomesBackslashFollowedByText() { + assertEquals("\\some text", formatter.format("\\\\some text")); + } + + @Test + public void escapedBackslashKept() { + assertEquals("\\some text\\", formatter.format("\\\\some text\\\\")); + } + + @Test + public void escapedUnderscoreReplaces() { + assertEquals("some_text", formatter.format("some\\_text")); + } + + @Test + public void exampleUrlCorrectlyCleaned() { + assertEquals("http://pi.informatik.uni-siegen.de/stt/36_2/./03_Technische_Beitraege/ZEUS2016/beitrag_2.pdf", formatter.format("http://pi.informatik.uni-siegen.de/stt/36\\_2/./03\\_Technische\\_Beitraege/ZEUS2016/beitrag\\_2.pdf")); + } +} From cbb383692b292c71e8ebf5d064bd59039cbfc508 Mon Sep 17 00:00:00 2001 From: Oliver Kopp Date: Sun, 10 Nov 2019 00:43:28 +0100 Subject: [PATCH 2/7] Use RemoveLatexCommandsFormatter directly --- .../org/jabref/logic/importer/fetcher/DBLPFetcher.java | 8 +++++--- .../formatter/bibtexfields/CleanupUrlFormatterTest.java | 6 ++++++ 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/src/main/java/org/jabref/logic/importer/fetcher/DBLPFetcher.java b/src/main/java/org/jabref/logic/importer/fetcher/DBLPFetcher.java index 76942fea1b7..ec4ba9c353f 100644 --- a/src/main/java/org/jabref/logic/importer/fetcher/DBLPFetcher.java +++ b/src/main/java/org/jabref/logic/importer/fetcher/DBLPFetcher.java @@ -7,7 +7,6 @@ import java.util.Optional; import org.jabref.logic.cleanup.DoiCleanup; -import org.jabref.logic.formatter.bibtexfields.CleanupUrlFormatter; import org.jabref.logic.formatter.bibtexfields.ClearFormatter; import org.jabref.logic.help.HelpFile; import org.jabref.logic.importer.FetcherException; @@ -15,6 +14,7 @@ import org.jabref.logic.importer.Parser; import org.jabref.logic.importer.SearchBasedParserFetcher; import org.jabref.logic.importer.fileformat.BibtexParser; +import org.jabref.logic.layout.format.RemoveLatexCommandsFormatter; import org.jabref.model.cleanup.FieldFormatterCleanup; import org.jabref.model.entry.BibEntry; import org.jabref.model.entry.field.InternalField; @@ -64,8 +64,10 @@ public void doPostCleanup(BibEntry entry) { FieldFormatterCleanup clearTimestampFormatter = new FieldFormatterCleanup(InternalField.TIMESTAMP, new ClearFormatter()); clearTimestampFormatter.cleanup(entry); - FieldFormatterCleanup cleanUpUrlFormatter = new FieldFormatterCleanup(StandardField.URL, new CleanupUrlFormatter()); - cleanUpUrlFormatter.cleanup(entry); + // unescape the the contents of the URL field + entry.getField(StandardField.URL) + .map(url -> new RemoveLatexCommandsFormatter().format(url)) + .ifPresent(url -> entry.setField(StandardField.URL, url)); } @Override diff --git a/src/test/java/org/jabref/logic/formatter/bibtexfields/CleanupUrlFormatterTest.java b/src/test/java/org/jabref/logic/formatter/bibtexfields/CleanupUrlFormatterTest.java index c6a838aabae..ed3274c224e 100644 --- a/src/test/java/org/jabref/logic/formatter/bibtexfields/CleanupUrlFormatterTest.java +++ b/src/test/java/org/jabref/logic/formatter/bibtexfields/CleanupUrlFormatterTest.java @@ -34,6 +34,12 @@ void latexCommandsRemoved() { assertEquals("http://pi.informatik.uni-siegen.de/stt/36_2/./03_Technische_Beitraege/ZEUS2016/beitrag_2.pdf", formatter.format("http://pi.informatik.uni-siegen.de/stt/36\\_2/./03\\_Technische\\_Beitraege/ZEUS2016/beitrag\\_2.pdf")); } + @Test + void urlencodedSlashesAreAlsoConverted() { + // the caller has to pay attention that this does not happen + assertEquals("jabref.org/test/test", formatter.format("jabref.org/test%2Ftest")); + } + @Test void formatExample() { assertEquals("http://www.focus.de/" + From 6070126c15cb6652e3fa15bb90952aade22ed090 Mon Sep 17 00:00:00 2001 From: Oliver Kopp Date: Sun, 10 Nov 2019 00:52:35 +0100 Subject: [PATCH 3/7] Improve code of ShortenDoiFormatter --- .../bibtexfields/ShortenDOIFormatter.java | 26 +++++++------------ .../bibtexfields/ShortenDOIFormatterTest.java | 5 ++++ 2 files changed, 14 insertions(+), 17 deletions(-) diff --git a/src/main/java/org/jabref/logic/formatter/bibtexfields/ShortenDOIFormatter.java b/src/main/java/org/jabref/logic/formatter/bibtexfields/ShortenDOIFormatter.java index 75b9ba9a2ab..e689cc962da 100644 --- a/src/main/java/org/jabref/logic/formatter/bibtexfields/ShortenDOIFormatter.java +++ b/src/main/java/org/jabref/logic/formatter/bibtexfields/ShortenDOIFormatter.java @@ -1,7 +1,6 @@ package org.jabref.logic.formatter.bibtexfields; import java.util.Objects; -import java.util.Optional; import org.jabref.logic.importer.util.ShortDOIService; import org.jabref.logic.importer.util.ShortDOIServiceException; @@ -29,22 +28,15 @@ public String getKey() { @Override public String format(String value) { Objects.requireNonNull(value); - - ShortDOIService shortDOIService = new ShortDOIService(); - - Optional doi = Optional.empty(); - - try { - doi = DOI.parse(value); - - if (doi.isPresent()) { - return shortDOIService.getShortDOI(doi.get()).getDOI(); - } - } catch (ShortDOIServiceException e) { - LOGGER.error(e.getMessage(), e); - } - - return value; + return DOI.parse(value) + .map(doi -> { + try { + return new ShortDOIService().getShortDOI(doi).getDOI(); + } catch (ShortDOIServiceException e) { + LOGGER.error(e.getMessage(), e); + return value; + } + }).orElse(value); } @Override diff --git a/src/test/java/org/jabref/logic/formatter/bibtexfields/ShortenDOIFormatterTest.java b/src/test/java/org/jabref/logic/formatter/bibtexfields/ShortenDOIFormatterTest.java index 1191014af41..58de9c0e324 100644 --- a/src/test/java/org/jabref/logic/formatter/bibtexfields/ShortenDOIFormatterTest.java +++ b/src/test/java/org/jabref/logic/formatter/bibtexfields/ShortenDOIFormatterTest.java @@ -20,5 +20,10 @@ public void setUp() { @Test public void formatDoi() { assertEquals("10/adc", formatter.format("10.1006/jmbi.1998.2354")); + + } + @Test + public void invalidDoiIsKept() { + assertEquals("invalid-doi", formatter.format("invalid-doi")); } } From e3e22e38da9d6cf3d1c22fd4e44e3820242e74e5 Mon Sep 17 00:00:00 2001 From: Oliver Kopp Date: Sun, 10 Nov 2019 00:58:36 +0100 Subject: [PATCH 4/7] Reformat code and more speaking variable name --- .../jabref/logic/layout/LayoutFormatter.java | 20 ++++++++----------- .../org/jabref/logic/layout/LayoutHelper.java | 20 +++++++------------ .../org/jabref/model/cleanup/Formatter.java | 18 ++++++++--------- 3 files changed, 24 insertions(+), 34 deletions(-) diff --git a/src/main/java/org/jabref/logic/layout/LayoutFormatter.java b/src/main/java/org/jabref/logic/layout/LayoutFormatter.java index 47184094402..25b701b3d12 100644 --- a/src/main/java/org/jabref/logic/layout/LayoutFormatter.java +++ b/src/main/java/org/jabref/logic/layout/LayoutFormatter.java @@ -2,14 +2,12 @@ /** * The LayoutFormatter is used for a Filter design-pattern. - * + *

* Implementing classes have to accept a String and returned a formatted version of it. - * + *

* Example: - * - * "John von Neumann" => "von Neumann, John" - * - * @version 1.2 - Documentation CO + *

+ * "John von Neumann" => "von Neumann, John" */ @FunctionalInterface public interface LayoutFormatter { @@ -17,14 +15,12 @@ public interface LayoutFormatter { /** * Failure Mode: *

- * Formatters should be robust in the sense that they always return some - * relevant string. + * Formatters should be robust in the sense that they always return some relevant string. *

- * If the formatter can detect an invalid input it should return the - * original string otherwise it may simply return a wrong output. + * If the formatter can detect an invalid input it should return the original string otherwise it may simply return + * a wrong output. * - * @param fieldText - * The text to layout. + * @param fieldText The text to layout. * @return The layouted text. */ String format(String fieldText); diff --git a/src/main/java/org/jabref/logic/layout/LayoutHelper.java b/src/main/java/org/jabref/logic/layout/LayoutHelper.java index 2502b285c7f..dde7ad47e24 100644 --- a/src/main/java/org/jabref/logic/layout/LayoutHelper.java +++ b/src/main/java/org/jabref/logic/layout/LayoutHelper.java @@ -67,27 +67,24 @@ public static void setCurrentGroup(String newGroup) { private void doBracketedField(final int field) throws IOException { StringBuilder buffer = null; - int c; + int currentCharacter; boolean start = false; while (!endOfFile) { - c = read(); + currentCharacter = read(); - if (c == -1) { + if (currentCharacter == -1) { endOfFile = true; - if (buffer != null) { parsedEntries.add(new StringInt(buffer.toString(), field)); } - return; } - if ((c == '{') || (c == '}')) { - if (c == '}') { + if ((currentCharacter == '{') || (currentCharacter == '}')) { + if (currentCharacter == '}') { if (buffer != null) { parsedEntries.add(new StringInt(buffer.toString(), field)); - return; } } else { @@ -98,16 +95,13 @@ private void doBracketedField(final int field) throws IOException { buffer = new StringBuilder(100); } - if (start && (c != '}')) { - buffer.append((char) c); + if (start && (currentCharacter != '}')) { + buffer.append((char) currentCharacter); } } } } - /** - * - */ private void doBracketedOptionField() throws IOException { StringBuilder buffer = null; int c; diff --git a/src/main/java/org/jabref/model/cleanup/Formatter.java b/src/main/java/org/jabref/model/cleanup/Formatter.java index 17eb2d262e2..fa149b720c3 100644 --- a/src/main/java/org/jabref/model/cleanup/Formatter.java +++ b/src/main/java/org/jabref/model/cleanup/Formatter.java @@ -1,13 +1,12 @@ package org.jabref.model.cleanup; /** - * The Formatter is used for a Filter design-pattern. Extending classes have to accept a String and returned a - * formatted version of it. Implementations have to reside in the logic package. - * + * The Formatter is used for a Filter design-pattern. Extending classes have to accept a String and returned a formatted + * version of it. Implementations have to reside in the logic package. + *

* Example: - * + *

* "John von Neumann" => "von Neumann, John" - * */ public abstract class Formatter { @@ -20,13 +19,14 @@ public abstract class Formatter { /** * Returns a unique key for the formatter that can be used for its identification + * * @return the key of the formatter, always not null */ public abstract String getKey(); /** * Formats a field value by with a particular formatter transformation. - * + *

* Calling this method with a null argument results in a NullPointerException. * * @param value the input String @@ -42,8 +42,8 @@ public abstract class Formatter { public abstract String getDescription(); /** - * Returns an example input string of the formatter. - * This example is used as input to the formatter to demonstrate its functionality + * Returns an example input string of the formatter. This example is used as input to the formatter to demonstrate + * its functionality * * @return the example input string, always non empty */ @@ -68,7 +68,7 @@ public int hashCode() { @Override public boolean equals(Object obj) { if (obj instanceof Formatter) { - return getKey().equals(((Formatter)obj).getKey()); + return getKey().equals(((Formatter) obj).getKey()); } else { return false; } From 72616992135d5a538259408bc722b60149b77f31 Mon Sep 17 00:00:00 2001 From: Oliver Kopp Date: Sun, 10 Nov 2019 01:09:41 +0100 Subject: [PATCH 5/7] Introduce LayoutFormatterBasedFormatter --- .../logic/importer/fetcher/DBLPFetcher.java | 17 ++++---- .../java/org/jabref/logic/layout/Layout.java | 3 -- .../layout/LayoutFormatterBasedFormatter.java | 40 +++++++++++++++++++ .../model/cleanup/FieldFormatterCleanups.java | 2 +- 4 files changed, 51 insertions(+), 11 deletions(-) create mode 100644 src/main/java/org/jabref/logic/layout/LayoutFormatterBasedFormatter.java diff --git a/src/main/java/org/jabref/logic/importer/fetcher/DBLPFetcher.java b/src/main/java/org/jabref/logic/importer/fetcher/DBLPFetcher.java index ec4ba9c353f..2e419a8271d 100644 --- a/src/main/java/org/jabref/logic/importer/fetcher/DBLPFetcher.java +++ b/src/main/java/org/jabref/logic/importer/fetcher/DBLPFetcher.java @@ -3,6 +3,7 @@ import java.net.MalformedURLException; import java.net.URISyntaxException; import java.net.URL; +import java.util.List; import java.util.Objects; import java.util.Optional; @@ -14,8 +15,10 @@ import org.jabref.logic.importer.Parser; import org.jabref.logic.importer.SearchBasedParserFetcher; import org.jabref.logic.importer.fileformat.BibtexParser; +import org.jabref.logic.layout.LayoutFormatterBasedFormatter; import org.jabref.logic.layout.format.RemoveLatexCommandsFormatter; import org.jabref.model.cleanup.FieldFormatterCleanup; +import org.jabref.model.cleanup.FieldFormatterCleanups; import org.jabref.model.entry.BibEntry; import org.jabref.model.entry.field.InternalField; import org.jabref.model.entry.field.StandardField; @@ -61,13 +64,13 @@ public void doPostCleanup(BibEntry entry) { DoiCleanup doiCleaner = new DoiCleanup(); doiCleaner.cleanup(entry); - FieldFormatterCleanup clearTimestampFormatter = new FieldFormatterCleanup(InternalField.TIMESTAMP, new ClearFormatter()); - clearTimestampFormatter.cleanup(entry); - - // unescape the the contents of the URL field - entry.getField(StandardField.URL) - .map(url -> new RemoveLatexCommandsFormatter().format(url)) - .ifPresent(url -> entry.setField(StandardField.URL, url)); + FieldFormatterCleanups cleanups = new FieldFormatterCleanups(true, + List.of( + new FieldFormatterCleanup(InternalField.TIMESTAMP, new ClearFormatter()), + // unescape the the contents of the URL field, e.g., some\_url\_part becomes some_url_part + new FieldFormatterCleanup(StandardField.URL, new LayoutFormatterBasedFormatter(new RemoveLatexCommandsFormatter())) + )); + cleanups.applySaveActions(entry); } @Override diff --git a/src/main/java/org/jabref/logic/layout/Layout.java b/src/main/java/org/jabref/logic/layout/Layout.java index 873fb396de4..25ed99711d1 100644 --- a/src/main/java/org/jabref/logic/layout/Layout.java +++ b/src/main/java/org/jabref/logic/layout/Layout.java @@ -12,9 +12,6 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; -/** - * Main class for formatting DOCUMENT ME! - */ public class Layout { private static final Logger LOGGER = LoggerFactory.getLogger(Layout.class); diff --git a/src/main/java/org/jabref/logic/layout/LayoutFormatterBasedFormatter.java b/src/main/java/org/jabref/logic/layout/LayoutFormatterBasedFormatter.java new file mode 100644 index 00000000000..72762fc8142 --- /dev/null +++ b/src/main/java/org/jabref/logic/layout/LayoutFormatterBasedFormatter.java @@ -0,0 +1,40 @@ +package org.jabref.logic.layout; + +import org.jabref.model.cleanup.Formatter; + +/** + * When having to use a LayoutFormatter as Formatter, this class is helpful. One usecase is {@link org.jabref.model.cleanup.FieldFormatterCleanup} + */ +public class LayoutFormatterBasedFormatter extends Formatter { + + private final LayoutFormatter layoutFormatter; + + public LayoutFormatterBasedFormatter(LayoutFormatter layoutFormatter) { + this.layoutFormatter = layoutFormatter; + } + + @Override + public String getName() { + return layoutFormatter.getClass().getName(); + } + + @Override + public String getKey() { + return layoutFormatter.getClass().getName(); + } + + @Override + public String format(String value) { + return layoutFormatter.format(value); + } + + @Override + public String getDescription() { + return layoutFormatter.getClass().getName(); + } + + @Override + public String getExampleInput() { + return layoutFormatter.getClass().getName(); + } +} diff --git a/src/main/java/org/jabref/model/cleanup/FieldFormatterCleanups.java b/src/main/java/org/jabref/model/cleanup/FieldFormatterCleanups.java index ffae762c0dc..b0e43f0e756 100644 --- a/src/main/java/org/jabref/model/cleanup/FieldFormatterCleanups.java +++ b/src/main/java/org/jabref/model/cleanup/FieldFormatterCleanups.java @@ -62,7 +62,7 @@ public List applySaveActions(BibEntry entry) { if (enabled) { return applyAllActions(entry); } else { - return new ArrayList<>(); + return Collections.emptyList(); } } From 569667f555c67c99fcc9ae0d1ea6f0d7f4d530d1 Mon Sep 17 00:00:00 2001 From: Oliver Kopp Date: Sun, 10 Nov 2019 08:19:19 +0100 Subject: [PATCH 6/7] Fix checkstyle --- .../logic/formatter/bibtexfields/ShortenDOIFormatterTest.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/test/java/org/jabref/logic/formatter/bibtexfields/ShortenDOIFormatterTest.java b/src/test/java/org/jabref/logic/formatter/bibtexfields/ShortenDOIFormatterTest.java index 58de9c0e324..1da025cdb4b 100644 --- a/src/test/java/org/jabref/logic/formatter/bibtexfields/ShortenDOIFormatterTest.java +++ b/src/test/java/org/jabref/logic/formatter/bibtexfields/ShortenDOIFormatterTest.java @@ -20,8 +20,8 @@ public void setUp() { @Test public void formatDoi() { assertEquals("10/adc", formatter.format("10.1006/jmbi.1998.2354")); - } + @Test public void invalidDoiIsKept() { assertEquals("invalid-doi", formatter.format("invalid-doi")); From 83cbb3d02ce679ce6f6e5daf88d8510b3a804ac3 Mon Sep 17 00:00:00 2001 From: Oliver Kopp Date: Sun, 10 Nov 2019 14:00:13 +0100 Subject: [PATCH 7/7] Do not apply RemoveLatexCommandsFormatter in CleanupUrlFormatter --- .../logic/formatter/bibtexfields/CleanupUrlFormatter.java | 4 +--- .../logic/formatter/bibtexfields/CleanupUrlFormatterTest.java | 4 ++-- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/src/main/java/org/jabref/logic/formatter/bibtexfields/CleanupUrlFormatter.java b/src/main/java/org/jabref/logic/formatter/bibtexfields/CleanupUrlFormatter.java index 62932ac31c2..1821760ab0a 100644 --- a/src/main/java/org/jabref/logic/formatter/bibtexfields/CleanupUrlFormatter.java +++ b/src/main/java/org/jabref/logic/formatter/bibtexfields/CleanupUrlFormatter.java @@ -7,7 +7,6 @@ import java.util.regex.Pattern; import org.jabref.logic.l10n.Localization; -import org.jabref.logic.layout.format.RemoveLatexCommandsFormatter; import org.jabref.model.cleanup.Formatter; import org.apache.commons.logging.Log; @@ -47,8 +46,7 @@ public String format(String value) { LOGGER.warn("Used unsupported character encoding", e); } - String result = new RemoveLatexCommandsFormatter().format(decodedLink); - return result; + return decodedLink; } @Override diff --git a/src/test/java/org/jabref/logic/formatter/bibtexfields/CleanupUrlFormatterTest.java b/src/test/java/org/jabref/logic/formatter/bibtexfields/CleanupUrlFormatterTest.java index ed3274c224e..0fb47acf98e 100644 --- a/src/test/java/org/jabref/logic/formatter/bibtexfields/CleanupUrlFormatterTest.java +++ b/src/test/java/org/jabref/logic/formatter/bibtexfields/CleanupUrlFormatterTest.java @@ -30,8 +30,8 @@ void extractURLFormLink() { } @Test - void latexCommandsRemoved() { - assertEquals("http://pi.informatik.uni-siegen.de/stt/36_2/./03_Technische_Beitraege/ZEUS2016/beitrag_2.pdf", formatter.format("http://pi.informatik.uni-siegen.de/stt/36\\_2/./03\\_Technische\\_Beitraege/ZEUS2016/beitrag\\_2.pdf")); + void latexCommandsNotRemoved() { + assertEquals("http://pi.informatik.uni-siegen.de/stt/36\\_2/./03\\_Technische\\_Beitraege/ZEUS2016/beitrag\\_2.pdf", formatter.format("http://pi.informatik.uni-siegen.de/stt/36\\_2/./03\\_Technische\\_Beitraege/ZEUS2016/beitrag\\_2.pdf")); } @Test