Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix DBLPFetcherTest -- dblp changed the format #5582

Merged
merged 7 commits into from
Nov 10, 2019
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 4 additions & 5 deletions src/main/java/org/jabref/gui/fieldeditors/UrlEditor.java
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
import org.jabref.gui.DialogService;
import org.jabref.gui.autocompleter.AutoCompleteSuggestionProvider;
import org.jabref.gui.fieldeditors.contextmenu.EditorMenus;
import org.jabref.logic.formatter.bibtexfields.CleanupURLFormatter;
import org.jabref.logic.formatter.bibtexfields.CleanupUrlFormatter;
import org.jabref.logic.formatter.bibtexfields.TrimWhitespaceFormatter;
import org.jabref.logic.integrity.FieldCheckers;
import org.jabref.model.entry.BibEntry;
Expand All @@ -34,12 +34,11 @@ public UrlEditor(Field field, DialogService dialogService, AutoCompleteSuggestio
.load();

textArea.textProperty().bindBidirectional(viewModel.textProperty());
Supplier<List<MenuItem>> contextMenuSupplier = EditorMenus.getCleanupURLMenu(textArea);
Supplier<List<MenuItem>> contextMenuSupplier = EditorMenus.getCleanupUrlMenu(textArea);
textArea.addToContextMenu(contextMenuSupplier);

// init paste handler for URLEditor to format pasted url link in textArea
textArea.setPasteActionHandler(() -> textArea.setText(new CleanupURLFormatter().format(new TrimWhitespaceFormatter().format(textArea.getText()))));

// init paste handler for UrlEditor to format pasted url link in textArea
textArea.setPasteActionHandler(() -> textArea.setText(new CleanupUrlFormatter().format(new TrimWhitespaceFormatter().format(textArea.getText()))));

new EditorValidator(preferences).configureValidation(viewModel.getFieldValidator().getValidationStatus(), textArea);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
import org.jabref.gui.actions.ActionFactory;
import org.jabref.gui.actions.StandardActions;
import org.jabref.gui.edit.CopyDoiUrlAction;
import org.jabref.logic.formatter.bibtexfields.CleanupURLFormatter;
import org.jabref.logic.formatter.bibtexfields.CleanupUrlFormatter;
import org.jabref.logic.formatter.bibtexfields.NormalizeNamesFormatter;
import org.jabref.logic.l10n.Localization;

Expand Down Expand Up @@ -91,11 +91,11 @@ public static Supplier<List<MenuItem>> getDOIMenu(TextArea textArea) {
* @param textArea text-area that this menu will be connected to
* @return menu containing items of the default menu and an item to cleanup a URL
*/
public static Supplier<List<MenuItem>> getCleanupURLMenu(TextArea textArea) {
public static Supplier<List<MenuItem>> getCleanupUrlMenu(TextArea textArea) {
return () -> {
CustomMenuItem cleanupURL = new CustomMenuItem(new Label(Localization.lang("Cleanup URL link")));
cleanupURL.setDisable(textArea.textProperty().isEmpty().get());
cleanupURL.setOnAction(event -> textArea.setText(new CleanupURLFormatter().format(textArea.getText())));
cleanupURL.setOnAction(event -> textArea.setText(new CleanupUrlFormatter().format(textArea.getText())));

List<MenuItem> menuItems = new ArrayList<>();
menuItems.add(cleanupURL);
Expand Down
4 changes: 2 additions & 2 deletions src/main/java/org/jabref/logic/formatter/Formatters.java
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
import java.util.Objects;
import java.util.Optional;

import org.jabref.logic.formatter.bibtexfields.CleanupURLFormatter;
import org.jabref.logic.formatter.bibtexfields.CleanupUrlFormatter;
import org.jabref.logic.formatter.bibtexfields.ClearFormatter;
import org.jabref.logic.formatter.bibtexfields.EscapeUnderscoresFormatter;
import org.jabref.logic.formatter.bibtexfields.HtmlToLatexFormatter;
Expand Down Expand Up @@ -58,7 +58,7 @@ public static List<Formatter> getCaseChangers() {
public static List<Formatter> getOthers() {
return Arrays.asList(
new ClearFormatter(),
new CleanupURLFormatter(),
new CleanupUrlFormatter(),
new LatexCleanupFormatter(),
new MinifyNameListFormatter(),
new NormalizeDateFormatter(),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import java.util.regex.Pattern;

import org.jabref.logic.l10n.Localization;
import org.jabref.logic.layout.format.RemoveLatexCommandsFormatter;
import org.jabref.model.cleanup.Formatter;

import org.apache.commons.logging.Log;
Expand All @@ -15,9 +16,9 @@
/**
* Cleanup URL link
*/
public class CleanupURLFormatter extends Formatter {
public class CleanupUrlFormatter extends Formatter {

private static final Log LOGGER = LogFactory.getLog(CleanupURLFormatter.class);
private static final Log LOGGER = LogFactory.getLog(CleanupUrlFormatter.class);
// This regexp find "url=" or "to=" parameter in full link and get text after them
private static final Pattern PATTERN_URL = Pattern.compile("(?:url|to)=([^&]*)");

Expand All @@ -38,16 +39,16 @@ public String format(String value) {

Matcher matcher = PATTERN_URL.matcher(value);
if (matcher.find()) {
toDecode = matcher.group(1);

toDecode = matcher.group(1);
}
try {
decodedLink = URLDecoder.decode(toDecode, StandardCharsets.UTF_8.name());
}
catch (UnsupportedEncodingException e) {
} catch (UnsupportedEncodingException e) {
LOGGER.warn("Used unsupported character encoding", e);
}
return decodedLink;

String result = new RemoveLatexCommandsFormatter().format(decodedLink);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Although somewhat of an edge case, but doesn't this lead to problems with the valid url jabref.org/test%5Ctest (which I guess would be reduced to jabref.org/test instead of the expected jabref.org/test\test?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not this LOC in particularly, but in general: Yes. I added a test case for that.

In the fetcher itself, I use the RemoveLatexCommandsFormatter directly. I did not know how to convert a LayoutFormatter to a Formatter. Thus, I "manually" updated the field in cbb3836. Moments later, I worked on LayoutFormatterBasedFormatter which wraps a LayoutFormatter in a Formatter and IMHO made the code more nice.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm still unsure if we should really remove latex commands from the URL. I guess this is only an issue for the DBLP fetcher and nowhere else.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh, now I got it. Reverted that in 83cbb3d

return result;
}

@Override
Expand All @@ -61,6 +62,5 @@ public String getExampleInput() {
"rja&uact=8&ved=0ahUKEwjg3ZrB_ZPXAhVGuhoKHYdOBOg4ChAWCCYwAA&url=" +
"http%3A%2F%2Fwww.focus.de%2Fgesundheit%2Fratgeber%2Fherz%2Ftest%2" +
"Flebenserwartung-werden-sie-100-jahre-alt_aid_363828.html" + "&usg=AOvVaw1G6m2jf-pTHYkXceii4hXU";
}

}
}
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,9 @@
import org.jabref.logic.l10n.Localization;
import org.jabref.model.cleanup.Formatter;

/**
* Simplifies LaTeX syntax. {@see org.jabref.logic.layout.format.RemoveLatexCommandsFormatter} for a formatter removing LaTeX commands completely.
*/
public class LatexCleanupFormatter extends Formatter {

private static final Pattern REMOVE_REDUNDANT = Pattern
Expand Down
11 changes: 6 additions & 5 deletions src/main/java/org/jabref/logic/importer/fetcher/DBLPFetcher.java
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import java.util.Optional;

import org.jabref.logic.cleanup.DoiCleanup;
import org.jabref.logic.formatter.bibtexfields.CleanupUrlFormatter;
import org.jabref.logic.formatter.bibtexfields.ClearFormatter;
import org.jabref.logic.help.HelpFile;
import org.jabref.logic.importer.FetcherException;
Expand All @@ -17,6 +18,7 @@
import org.jabref.model.cleanup.FieldFormatterCleanup;
import org.jabref.model.entry.BibEntry;
import org.jabref.model.entry.field.InternalField;
import org.jabref.model.entry.field.StandardField;
import org.jabref.model.util.DummyFileUpdateMonitor;

import org.apache.http.client.utils.URIBuilder;
Expand Down Expand Up @@ -57,13 +59,13 @@ public Parser getParser() {
@Override
public void doPostCleanup(BibEntry entry) {
DoiCleanup doiCleaner = new DoiCleanup();

FieldFormatterCleanup clearTimestampFormatter = new FieldFormatterCleanup(InternalField.TIMESTAMP,
new ClearFormatter());

doiCleaner.cleanup(entry);

FieldFormatterCleanup clearTimestampFormatter = new FieldFormatterCleanup(InternalField.TIMESTAMP, new ClearFormatter());
clearTimestampFormatter.cleanup(entry);

FieldFormatterCleanup cleanUpUrlFormatter = new FieldFormatterCleanup(StandardField.URL, new CleanupUrlFormatter());
cleanUpUrlFormatter.cleanup(entry);
}

@Override
Expand All @@ -75,5 +77,4 @@ public String getName() {
public Optional<HelpFile> getHelpPage() {
return Optional.of(HelpFile.FETCHER_DBLP);
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -7,59 +7,68 @@ public class RemoveLatexCommandsFormatter implements LayoutFormatter {

@Override
public String format(String field) {
StringBuilder sb = new StringBuilder("");
StringBuilder cleanedField = new StringBuilder();
StringBuilder currentCommand = null;
char c;
char currentCharacter;
boolean escaped = false;
boolean incommand = false;
int i;
for (i = 0; i < field.length(); i++) {
c = field.charAt(i);
if (escaped && (c == '\\')) {
sb.append('\\');
int currentFieldPosition;
for (currentFieldPosition = 0; currentFieldPosition < field.length(); currentFieldPosition++) {
currentCharacter = field.charAt(currentFieldPosition);
if (escaped && (currentCharacter == '\\')) {
cleanedField.append('\\');
escaped = false;
} else if (c == '\\') {
// \\ --> first \ begins the command, second \ ends the command
// \latexommand\\ -> \latexcommand is the command, terminated by \, which begins a new command
incommand = false;
} else if (currentCharacter == '\\') {
escaped = true;
incommand = true;
currentCommand = new StringBuilder();
} else if (!incommand && ((c == '{') || (c == '}'))) {
} else if (!incommand && ((currentCharacter == '{') || (currentCharacter == '}'))) {
// Swallow the brace.
} else if (Character.isLetter(c) || StringUtil.SPECIAL_COMMAND_CHARS.contains(String.valueOf(c))) {
} else if (Character.isLetter(currentCharacter) || StringUtil.SPECIAL_COMMAND_CHARS.contains(String.valueOf(currentCharacter))) {
escaped = false;
if (incommand) {
currentCommand.append(c);
currentCommand.append(currentCharacter);
if ((currentCommand.length() == 1)
&& StringUtil.SPECIAL_COMMAND_CHARS.contains(currentCommand.toString())) {
// This indicates that we are in a command of the type \^o or \~{n}
incommand = false;
escaped = false;

}
} else {
sb.append(c);
cleanedField.append(currentCharacter);
}
} else if (Character.isLetter(c)) {
} else if (Character.isLetter(currentCharacter)) {
escaped = false;
if (incommand) {
// We are in a command, and should not keep the letter.
currentCommand.append(c);
currentCommand.append(currentCharacter);
} else {
sb.append(c);
cleanedField.append(currentCharacter);
}
} else {
if (!incommand || (!Character.isWhitespace(c) && (c != '{'))) {
sb.append(c);
if (!incommand || (!Character.isWhitespace(currentCharacter) && (currentCharacter != '{'))) {
cleanedField.append(currentCharacter);
} else {
if (c != '{') {
sb.append(c);
if (!Character.isWhitespace(currentCharacter) && (currentCharacter != '{')) {
// do not append the opening brace of a command parameter
// do not append the whitespace character
cleanedField.append(currentCharacter);
}
if (incommand) {
// eat up all whitespace characters
while ((currentFieldPosition + 1 < field.length() && Character.isWhitespace(field.charAt(currentFieldPosition + 1)))) {
currentFieldPosition++;
}
}
}
incommand = false;
escaped = false;
}
}

return sb.toString();
return cleanedField.toString();
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,11 @@
*/
class CleanupUrlFormatterTest {

private CleanupURLFormatter formatter;
private CleanupUrlFormatter formatter;

@BeforeEach
void setUp() {
formatter = new CleanupURLFormatter();
formatter = new CleanupUrlFormatter();
}

@Test
Expand All @@ -29,6 +29,11 @@ void extractURLFormLink() {
formatter.format("away.php?to=http%3A%2F%2Fwikipedia.org&a=snippet"));
}

@Test
void latexCommandsRemoved() {
assertEquals("http://pi.informatik.uni-siegen.de/stt/36_2/./03_Technische_Beitraege/ZEUS2016/beitrag_2.pdf", formatter.format("http://pi.informatik.uni-siegen.de/stt/36\\_2/./03\\_Technische\\_Beitraege/ZEUS2016/beitrag\\_2.pdf"));
}

@Test
void formatExample() {
assertEquals("http://www.focus.de/" +
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,22 @@ public void setUp() {
}

@Test
public void testFormat() throws Exception {
public void bracePairCorrectlyRemoved() throws Exception {
assertEquals("some text", formatter.format("{some text}"));
}

@Test
public void singleOpeningBraceCorrectlyRemoved() throws Exception {
assertEquals("some text", formatter.format("{some text"));
}

@Test
public void singleClosingBraceCorrectlyRemoved() throws Exception {
assertEquals("some text", formatter.format("some text}"));
}

@Test
public void bracePairWithEscapedBackslashCorrectlyRemoved() throws Exception {
assertEquals("\\some text\\", formatter.format("\\{some text\\}"));
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
package org.jabref.logic.layout.format;

import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;

import static org.junit.jupiter.api.Assertions.assertEquals;

class RemoveLatexCommandsFormatterTest {

private RemoveLatexCommandsFormatter formatter;

@BeforeEach
public void setUp() {
formatter = new RemoveLatexCommandsFormatter();
}

@Test
public void withoutLatexCommandsUnmodified() {
assertEquals("some text", formatter.format("some text"));
}

@Test
public void singleCommandWiped() {
assertEquals("", formatter.format("\\sometext"));
}

@Test
public void singleSpaceAfterCommandRemoved() {
assertEquals("text", formatter.format("\\some text"));
}

@Test
public void multipleSpacesAfterCommandRemoved() {
assertEquals("text", formatter.format("\\some text"));
}

@Test
public void escapedBackslashBecomesBackslash() {
assertEquals("\\", formatter.format("\\\\"));
}

@Test
public void escapedBackslashFollowedByTextBecomesBackslashFollowedByText() {
assertEquals("\\some text", formatter.format("\\\\some text"));
}

@Test
public void escapedBackslashKept() {
assertEquals("\\some text\\", formatter.format("\\\\some text\\\\"));
}

@Test
public void escapedUnderscoreReplaces() {
assertEquals("some_text", formatter.format("some\\_text"));
}

@Test
public void exampleUrlCorrectlyCleaned() {
assertEquals("http://pi.informatik.uni-siegen.de/stt/36_2/./03_Technische_Beitraege/ZEUS2016/beitrag_2.pdf", formatter.format("http://pi.informatik.uni-siegen.de/stt/36\\_2/./03\\_Technische\\_Beitraege/ZEUS2016/beitrag\\_2.pdf"));
}
}