Skip to content

Commit

Permalink
Merge pull request #9758 from deybis17/quality-check-escaped-ampersand
Browse files Browse the repository at this point in the history
Quality check escaped ampersand
  • Loading branch information
Siedlerchr authored Apr 15, 2023
2 parents 461e915 + 38c0350 commit b0ac603
Show file tree
Hide file tree
Showing 6 changed files with 117 additions and 2 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ Note that this project **does not** adhere to [Semantic Versioning](http://semve
- We added support for multiple languages for exporting to and importing references from MS Office. [#9699](https://github.com/JabRef/jabref/issues/9699)
- We enabled scrolling in the groups list when dragging a group on another group. [#2869](https://github.com/JabRef/jabref/pull/2869)
- We added the option to automatically download online files when a new entry is created from an existing ID (e.g. DOI). The option can be disabled in the preferences under "Import and Export" [#9756](https://github.com/JabRef/jabref/issues/9756)
- We added a new Integrity check for unscaped ampersands. [koppor#585](https://github.com/koppor/jabref/issues/585)


### Changed
Expand Down
41 changes: 41 additions & 0 deletions src/main/java/org/jabref/logic/integrity/AmpersandChecker.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
package org.jabref.logic.integrity;

import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.regex.MatchResult;
import java.util.regex.Pattern;

import org.jabref.logic.l10n.Localization;
import org.jabref.model.entry.BibEntry;
import org.jabref.model.entry.field.Field;

import com.google.common.base.CharMatcher;

/**
* Checks if the BibEntry contains unescaped ampersands.
*/
public class AmpersandChecker implements EntryChecker {
// matches for an & preceded by any number of \
private static final Pattern BACKSLASH_PRECEDED_AMPERSAND = Pattern.compile("\\\\*&");

@Override
public List<IntegrityMessage> check(BibEntry entry) {
List<IntegrityMessage> results = new ArrayList<>();

for (Map.Entry<Field, String> field : entry.getFieldMap().entrySet()) {
// counts the number of even \ occurrences preceding an &
long unescapedAmpersands = BACKSLASH_PRECEDED_AMPERSAND.matcher(field.getValue())
.results()
.map(MatchResult::group)
.filter(m -> CharMatcher.is('\\').countIn(m) % 2 == 0)
.count();

if (unescapedAmpersands > 0) {
results.add(new IntegrityMessage(Localization.lang("Found %0 unescaped '&'", unescapedAmpersands), entry, field.getKey()));
// note: when changing the message - also do so in tests
}
}
return results;
}
}
5 changes: 3 additions & 2 deletions src/main/java/org/jabref/logic/integrity/IntegrityCheck.java
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,9 @@ public IntegrityCheck(BibDatabaseContext bibDatabaseContext,
new HTMLCharacterChecker(),
new EntryLinkChecker(bibDatabaseContext.getDatabase()),
new CitationKeyDeviationChecker(bibDatabaseContext, citationKeyPatternPreferences),
new CitationKeyDuplicationChecker(bibDatabaseContext.getDatabase())
));
new CitationKeyDuplicationChecker(bibDatabaseContext.getDatabase()),
new AmpersandChecker()
));
if (bibDatabaseContext.isBiblatexMode()) {
entryCheckers.addAll(List.of(
new JournalInAbbreviationListChecker(StandardField.JOURNALTITLE, journalAbbreviationRepository),
Expand Down
2 changes: 2 additions & 0 deletions src/main/resources/l10n/JabRef_de.properties
Original file line number Diff line number Diff line change
Expand Up @@ -1561,6 +1561,8 @@ Verse=Vers
change\ entries\ of\ group=Ändere Einträge der Gruppe
odd\ number\ of\ unescaped\ '\#'=Ungerade Anzahl an '\#' Maskierungszeichen

Found\ %0\ unescaped\ '\&'=%0 unmaskierte(s) '&' gefunden

Show\ diff=Zeige Änderungen
Copy\ Version=Kopiere Version
Maintainers=Maintainer
Expand Down
2 changes: 2 additions & 0 deletions src/main/resources/l10n/JabRef_en.properties
Original file line number Diff line number Diff line change
Expand Up @@ -1566,6 +1566,8 @@ Verse=Verse
change\ entries\ of\ group=change entries of group
odd\ number\ of\ unescaped\ '\#'=odd number of unescaped '#'

Found\ %0\ unescaped\ '\&'=Found %0 unescaped '&'

Show\ diff=Show diff
Copy\ Version=Copy Version
Maintainers=Maintainers
Expand Down
68 changes: 68 additions & 0 deletions src/test/java/org/jabref/logic/integrity/AmpersandCheckerTest.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
package org.jabref.logic.integrity;

import java.util.Collections;
import java.util.List;
import java.util.stream.Stream;

import org.jabref.model.entry.BibEntry;
import org.jabref.model.entry.field.Field;
import org.jabref.model.entry.field.StandardField;

import org.junit.jupiter.api.Test;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.Arguments;
import org.junit.jupiter.params.provider.MethodSource;

import static org.junit.jupiter.api.Assertions.assertEquals;

public class AmpersandCheckerTest {

private final AmpersandChecker checker = new AmpersandChecker();
private final BibEntry entry = new BibEntry();

@ParameterizedTest
@MethodSource("provideAcceptedInputs")
void acceptsAllowedInputs(List<IntegrityMessage> expected, Field field, String value) {
entry.setField(field, value);
assertEquals(expected, checker.check(entry));
}

private static Stream<Arguments> provideAcceptedInputs() {
return Stream.of(
Arguments.of(Collections.emptyList(), StandardField.TITLE, "No ampersand at all"),
Arguments.of(Collections.emptyList(), StandardField.FOREWORD, "Properly escaped \\&"),
Arguments.of(Collections.emptyList(), StandardField.AUTHOR, "\\& Multiple properly escaped \\&"),
Arguments.of(Collections.emptyList(), StandardField.BOOKTITLE, "\\\\\\& With multiple backslashes"),
Arguments.of(Collections.emptyList(), StandardField.COMMENT, "\\\\\\& With multiple backslashes multiple times \\\\\\\\\\&"),
Arguments.of(Collections.emptyList(), StandardField.NOTE, "In the \\& middle of \\\\\\& something")
);
}

@ParameterizedTest
@MethodSource("provideUnacceptedInputs")
void rejectsDisallowedInputs(String expectedMessage, Field field, String value) {
entry.setField(field, value);
assertEquals(List.of(new IntegrityMessage(expectedMessage, entry, field)), checker.check(entry));
}

private static Stream<Arguments> provideUnacceptedInputs() {
return Stream.of(
Arguments.of("Found 1 unescaped '&'", StandardField.SUBTITLE, "A single &"),
Arguments.of("Found 2 unescaped '&'", StandardField.ABSTRACT, "Multiple \\\\& not properly & escaped"),
Arguments.of("Found 1 unescaped '&'", StandardField.AUTHOR, "To many backslashes \\\\&"),
Arguments.of("Found 2 unescaped '&'", StandardField.LABEL, "\\\\\\\\& Multiple times \\\\& multiple backslashes")
);
}

@Test
void entryWithEscapedAndUnescapedAmpersand() {
entry.setField(StandardField.TITLE, "Jack \\& Jill & more");
assertEquals(List.of(new IntegrityMessage("Found 1 unescaped '&'", entry, StandardField.TITLE)), checker.check(entry));
}

@Test
void entryWithMultipleEscapedAndUnescapedAmpersands() {
entry.setField(StandardField.AFTERWORD, "May the force be with you & live long \\\\& prosper \\& to infinity \\\\\\& beyond & assemble \\\\\\\\& excelsior!");
assertEquals(List.of(new IntegrityMessage("Found 4 unescaped '&'", entry, StandardField.AFTERWORD)), checker.check(entry));
}
}

0 comments on commit b0ac603

Please sign in to comment.