diff --git a/CHANGELOG.md b/CHANGELOG.md index d26da4f8e4e..398fa0b14c8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -20,6 +20,7 @@ Note that this project **does not** adhere to [Semantic Versioning](http://semve - We added support for multiple languages for exporting to and importing references from MS Office. [#9699](https://github.com/JabRef/jabref/issues/9699) - We enabled scrolling in the groups list when dragging a group on another group. [#2869](https://github.com/JabRef/jabref/pull/2869) - We added the option to automatically download online files when a new entry is created from an existing ID (e.g. DOI). The option can be disabled in the preferences under "Import and Export" [#9756](https://github.com/JabRef/jabref/issues/9756) +- We added a new Integrity check for unscaped ampersands. [koppor#585](https://github.com/koppor/jabref/issues/585) ### Changed diff --git a/src/main/java/org/jabref/logic/integrity/AmpersandChecker.java b/src/main/java/org/jabref/logic/integrity/AmpersandChecker.java new file mode 100644 index 00000000000..9da189890c0 --- /dev/null +++ b/src/main/java/org/jabref/logic/integrity/AmpersandChecker.java @@ -0,0 +1,41 @@ +package org.jabref.logic.integrity; + +import java.util.ArrayList; +import java.util.List; +import java.util.Map; +import java.util.regex.MatchResult; +import java.util.regex.Pattern; + +import org.jabref.logic.l10n.Localization; +import org.jabref.model.entry.BibEntry; +import org.jabref.model.entry.field.Field; + +import com.google.common.base.CharMatcher; + +/** + * Checks if the BibEntry contains unescaped ampersands. + */ +public class AmpersandChecker implements EntryChecker { + // matches for an & preceded by any number of \ + private static final Pattern BACKSLASH_PRECEDED_AMPERSAND = Pattern.compile("\\\\*&"); + + @Override + public List check(BibEntry entry) { + List results = new ArrayList<>(); + + for (Map.Entry field : entry.getFieldMap().entrySet()) { + // counts the number of even \ occurrences preceding an & + long unescapedAmpersands = BACKSLASH_PRECEDED_AMPERSAND.matcher(field.getValue()) + .results() + .map(MatchResult::group) + .filter(m -> CharMatcher.is('\\').countIn(m) % 2 == 0) + .count(); + + if (unescapedAmpersands > 0) { + results.add(new IntegrityMessage(Localization.lang("Found %0 unescaped '&'", unescapedAmpersands), entry, field.getKey())); + // note: when changing the message - also do so in tests + } + } + return results; + } +} diff --git a/src/main/java/org/jabref/logic/integrity/IntegrityCheck.java b/src/main/java/org/jabref/logic/integrity/IntegrityCheck.java index 6de2d71eb4f..b98ecb6a595 100644 --- a/src/main/java/org/jabref/logic/integrity/IntegrityCheck.java +++ b/src/main/java/org/jabref/logic/integrity/IntegrityCheck.java @@ -37,8 +37,9 @@ public IntegrityCheck(BibDatabaseContext bibDatabaseContext, new HTMLCharacterChecker(), new EntryLinkChecker(bibDatabaseContext.getDatabase()), new CitationKeyDeviationChecker(bibDatabaseContext, citationKeyPatternPreferences), - new CitationKeyDuplicationChecker(bibDatabaseContext.getDatabase()) - )); + new CitationKeyDuplicationChecker(bibDatabaseContext.getDatabase()), + new AmpersandChecker() + )); if (bibDatabaseContext.isBiblatexMode()) { entryCheckers.addAll(List.of( new JournalInAbbreviationListChecker(StandardField.JOURNALTITLE, journalAbbreviationRepository), diff --git a/src/main/resources/l10n/JabRef_de.properties b/src/main/resources/l10n/JabRef_de.properties index ef42cbe2ebf..9438a0a5937 100644 --- a/src/main/resources/l10n/JabRef_de.properties +++ b/src/main/resources/l10n/JabRef_de.properties @@ -1561,6 +1561,8 @@ Verse=Vers change\ entries\ of\ group=Ändere Einträge der Gruppe odd\ number\ of\ unescaped\ '\#'=Ungerade Anzahl an '\#' Maskierungszeichen +Found\ %0\ unescaped\ '\&'=%0 unmaskierte(s) '&' gefunden + Show\ diff=Zeige Änderungen Copy\ Version=Kopiere Version Maintainers=Maintainer diff --git a/src/main/resources/l10n/JabRef_en.properties b/src/main/resources/l10n/JabRef_en.properties index 6129349cb44..5ae1aec7593 100644 --- a/src/main/resources/l10n/JabRef_en.properties +++ b/src/main/resources/l10n/JabRef_en.properties @@ -1566,6 +1566,8 @@ Verse=Verse change\ entries\ of\ group=change entries of group odd\ number\ of\ unescaped\ '\#'=odd number of unescaped '#' +Found\ %0\ unescaped\ '\&'=Found %0 unescaped '&' + Show\ diff=Show diff Copy\ Version=Copy Version Maintainers=Maintainers diff --git a/src/test/java/org/jabref/logic/integrity/AmpersandCheckerTest.java b/src/test/java/org/jabref/logic/integrity/AmpersandCheckerTest.java new file mode 100644 index 00000000000..81d9942c162 --- /dev/null +++ b/src/test/java/org/jabref/logic/integrity/AmpersandCheckerTest.java @@ -0,0 +1,68 @@ +package org.jabref.logic.integrity; + +import java.util.Collections; +import java.util.List; +import java.util.stream.Stream; + +import org.jabref.model.entry.BibEntry; +import org.jabref.model.entry.field.Field; +import org.jabref.model.entry.field.StandardField; + +import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.MethodSource; + +import static org.junit.jupiter.api.Assertions.assertEquals; + +public class AmpersandCheckerTest { + + private final AmpersandChecker checker = new AmpersandChecker(); + private final BibEntry entry = new BibEntry(); + + @ParameterizedTest + @MethodSource("provideAcceptedInputs") + void acceptsAllowedInputs(List expected, Field field, String value) { + entry.setField(field, value); + assertEquals(expected, checker.check(entry)); + } + + private static Stream provideAcceptedInputs() { + return Stream.of( + Arguments.of(Collections.emptyList(), StandardField.TITLE, "No ampersand at all"), + Arguments.of(Collections.emptyList(), StandardField.FOREWORD, "Properly escaped \\&"), + Arguments.of(Collections.emptyList(), StandardField.AUTHOR, "\\& Multiple properly escaped \\&"), + Arguments.of(Collections.emptyList(), StandardField.BOOKTITLE, "\\\\\\& With multiple backslashes"), + Arguments.of(Collections.emptyList(), StandardField.COMMENT, "\\\\\\& With multiple backslashes multiple times \\\\\\\\\\&"), + Arguments.of(Collections.emptyList(), StandardField.NOTE, "In the \\& middle of \\\\\\& something") + ); + } + + @ParameterizedTest + @MethodSource("provideUnacceptedInputs") + void rejectsDisallowedInputs(String expectedMessage, Field field, String value) { + entry.setField(field, value); + assertEquals(List.of(new IntegrityMessage(expectedMessage, entry, field)), checker.check(entry)); + } + + private static Stream provideUnacceptedInputs() { + return Stream.of( + Arguments.of("Found 1 unescaped '&'", StandardField.SUBTITLE, "A single &"), + Arguments.of("Found 2 unescaped '&'", StandardField.ABSTRACT, "Multiple \\\\& not properly & escaped"), + Arguments.of("Found 1 unescaped '&'", StandardField.AUTHOR, "To many backslashes \\\\&"), + Arguments.of("Found 2 unescaped '&'", StandardField.LABEL, "\\\\\\\\& Multiple times \\\\& multiple backslashes") + ); + } + + @Test + void entryWithEscapedAndUnescapedAmpersand() { + entry.setField(StandardField.TITLE, "Jack \\& Jill & more"); + assertEquals(List.of(new IntegrityMessage("Found 1 unescaped '&'", entry, StandardField.TITLE)), checker.check(entry)); + } + + @Test + void entryWithMultipleEscapedAndUnescapedAmpersands() { + entry.setField(StandardField.AFTERWORD, "May the force be with you & live long \\\\& prosper \\& to infinity \\\\\\& beyond & assemble \\\\\\\\& excelsior!"); + assertEquals(List.of(new IntegrityMessage("Found 4 unescaped '&'", entry, StandardField.AFTERWORD)), checker.check(entry)); + } +}