Fix integrity check for tilde accents in author names (#9097)

* Solving tilde error in author names * Fixed checkstyle and removed superfluous new test class * remove latex html conversion and rtf chart * add test to authorlist parser * parameterized tests * fix test * fix test * Convert to parameterized test * fix test and checkstyle * fix checkstyle and tests * Fixed tabstop * add changelog Co-authored-by: Carl Christian Snethlage <50491877+calixtus@users.noreply.github.com> Co-authored-by: Siedlerchr <siedlerkiller@gmail.com>
JabRef · Sep 3, 2022 · ad9aa62 · ad9aa62
1 parent 6c19393
commit ad9aa62
Show file tree

Hide file tree

Showing 7 changed files with 58 additions and 83 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -30,6 +30,7 @@ Note that this project **does not** adhere to [Semantic Versioning](http://semve
 
 ### Fixed
 
+- We fixed an issue where author names with tilde accents (for example ñ) were marked as "Names are not in the standard BibTex format" [#8071](https://github.com/JabRef/jabref/issues/8071)
 - We fixed an issue where the possibility to generate a subdatabase from an aux file was writing empty files when called from the commandline [#9115](https://github.com/JabRef/jabref/issues/9115), [forum#3516](https://discourse.jabref.org/t/export-subdatabase-from-aux-file-on-macos-command-line/3516)
 - We fixed the display of issue, number, eid and pages fields in the entry preview. [#8607](https://github.com/JabRef/jabref/pull/8607), [#8372](https://github.com/JabRef/jabref/issues/8372), [Koppor#514](https://github.com/koppor/jabref/issues/514), [forum#2390](https://discourse.jabref.org/t/unable-to-edit-my-bibtex-file-that-i-used-before-vers-5-1/2390), [forum#3462](https://discourse.jabref.org/t/jabref-5-6-need-help-with-export-from-jabref-to-microsoft-word-entry-preview-of-apa-7-not-rendering-correctly/3462)
 - We fixed the page ranges checker to detect article numbers in the pages field (used at [Check Integrity](https://docs.jabref.org/finding-sorting-and-cleaning-entries/checkintegrity)). [#8607](https://github.com/JabRef/jabref/pull/8607)

diff --git a/src/main/java/org/jabref/logic/importer/AuthorListParser.java b/src/main/java/org/jabref/logic/importer/AuthorListParser.java
@@ -447,7 +447,7 @@ private Token getToken() {
             if (c == '\\') {
                 currentBackslash = tokenEnd;
             }
-            if ((bracesLevel == 0) && ((",;~-".indexOf(c) != -1) || Character.isWhitespace(c))) {
+            if ((bracesLevel == 0) && ((",;-".indexOf(c) != -1) || Character.isWhitespace(c))) {
                 break;
             }
             tokenEnd++;

diff --git a/src/main/java/org/jabref/logic/util/strings/HTMLUnicodeConversionMaps.java b/src/main/java/org/jabref/logic/util/strings/HTMLUnicodeConversionMaps.java
@@ -762,6 +762,7 @@ public class HTMLUnicodeConversionMaps {
             {"119978", "Oscr", "$\\mathcal{O}$"}, // script capital O -- possibly use \mathscr
             {"119984", "Uscr", "$\\mathcal{U}$"}, // script capital U -- possibly use \mathscr
             {"120598", "", "$\\epsilon$"}, // mathematical italic epsilon U+1D716 -- requires amsmath
+            {"120599", "", "{{\\˜{n}}}"}, // n with tide
     };
 
     // List of combining accents
@@ -888,7 +889,6 @@ public class HTMLUnicodeConversionMaps {
         // Manual corrections
         LATEX_HTML_CONVERSION_MAP.put("AA", "&Aring;"); // Overwritten by &angst; which is less supported
         LATEX_UNICODE_CONVERSION_MAP.put("AA", "Å"); // Overwritten by Ångstrom symbol
-        LATEX_UNICODE_CONVERSION_MAP.put("'n", "ń");
 
         // Manual additions
         // Support relax to the extent that it is simply removed

diff --git a/src/main/java/org/jabref/logic/util/strings/RtfCharMap.java b/src/main/java/org/jabref/logic/util/strings/RtfCharMap.java
@@ -4,7 +4,7 @@
 
 public class RtfCharMap {
 
-    private HashMap<String, String> rtfMap = new HashMap<>();
+    private final HashMap<String, String> rtfMap = new HashMap<>();
 
     public RtfCharMap() {
         put("`a", "\\'e0");

diff --git a/src/test/java/org/jabref/logic/formatter/bibtexfields/HtmlToUnicodeFormatterTest.java b/src/test/java/org/jabref/logic/formatter/bibtexfields/HtmlToUnicodeFormatterTest.java
@@ -1,60 +1,40 @@
 package org.jabref.logic.formatter.bibtexfields;
 
+import java.util.stream.Stream;
+
 import org.junit.jupiter.api.BeforeEach;
-import org.junit.jupiter.api.Test;
+import org.junit.jupiter.params.ParameterizedTest;
+import org.junit.jupiter.params.provider.Arguments;
+import org.junit.jupiter.params.provider.MethodSource;
 
 import static org.junit.jupiter.api.Assertions.assertEquals;
 
 public class HtmlToUnicodeFormatterTest {
 
     private HtmlToUnicodeFormatter formatter;
 
+    private static Stream<Arguments> data() {
+        return Stream.of(
+                         Arguments.of("abc", "abc"),
+                         Arguments.of("åäö", "&aring;&auml;&ouml;"),
+                         Arguments.of("í", "i&#x301;"),
+                         Arguments.of("Ε", "&Epsilon;"),
+                         Arguments.of("ä", "&auml;"),
+                         Arguments.of("ä", "&#228;"),
+                         Arguments.of("ä", "&#xe4;"),
+                         Arguments.of("ñ", "&#241;"),
+                         Arguments.of("aaa", "<p>aaa</p>"),
+                         Arguments.of("bread & butter", "<b>bread</b> &amp; butter"));
+    }
+
     @BeforeEach
     public void setUp() {
         formatter = new HtmlToUnicodeFormatter();
     }
 
-    @Test
-    public void formatWithoutHtmlCharactersReturnsSameString() {
-        assertEquals("abc", formatter.format("abc"));
-    }
-
-    @Test
-    public void formatMultipleHtmlCharacters() {
-        assertEquals("åäö", formatter.format("&aring;&auml;&ouml;"));
-    }
-
-    @Test
-    public void formatCombinedAccent() {
-        assertEquals("í", formatter.format("i&#x301;"));
-    }
-
-    @Test
-    public void testBasic() {
-        assertEquals("aaa", formatter.format("aaa"));
-    }
-
-    @Test
-    public void testUmlauts() {
-        assertEquals("ä", formatter.format("&auml;"));
-        assertEquals("ä", formatter.format("&#228;"));
-        assertEquals("ä", formatter.format("&#xe4;"));
-    }
-
-    @Test
-    public void testGreekLetter() {
-        assertEquals("Ε", formatter.format("&Epsilon;"));
-    }
-
-    @Test
-    public void testHTMLRemoveTags() {
-        assertEquals("aaa", formatter.format("<p>aaa</p>"));
-    }
-
-    @Test
-    public void formatExample() {
-        assertEquals("bread & butter", formatter.format(formatter.getExampleInput()));
+    @ParameterizedTest
+    @MethodSource("data")
+    void testFormatterWorksCorrectly(String expected, String input) {
+        assertEquals(expected, formatter.format(input));
     }
 }
-
-
diff --git a/src/test/java/org/jabref/logic/importer/AuthorListParserTest.java b/src/test/java/org/jabref/logic/importer/AuthorListParserTest.java
@@ -25,7 +25,8 @@ private static Stream<Arguments> data() {
                 Arguments.of("de la Vallée Poussin, Jean Charles Gabriel", new Author("Jean Charles Gabriel", "J. C. G.", "de la", "Vallée Poussin", null)),
                 Arguments.of("de la Vallée Poussin, J. C. G.", new Author("J. C. G.", "J. C. G.", "de la", "Vallée Poussin", null)),
                 Arguments.of("{K}ent-{B}oswell, E. S.", new Author("E. S.", "E. S.", null, "{K}ent-{B}oswell", null)),
-                Arguments.of("Uhlenhaut, N Henriette", new Author("N Henriette", "N. H.", null, "Uhlenhaut", null))
+                Arguments.of("Uhlenhaut, N Henriette", new Author("N Henriette", "N. H.", null, "Uhlenhaut", null)),
+                Arguments.of("Nu{\\~{n}}ez, Jose", new Author("Jose", "J.", null, "Nu{\\~{n}}ez", null))
         );
     }
 

diff --git a/src/test/java/org/jabref/logic/integrity/PersonNamesCheckerTest.java b/src/test/java/org/jabref/logic/integrity/PersonNamesCheckerTest.java
@@ -29,48 +29,35 @@ public void setUp() throws Exception {
         checkerb = new PersonNamesChecker(database);
     }
 
-    @Test
-    public void validNameFirstnameAuthor() throws Exception {
-        assertEquals(Optional.empty(), checker.checkValue("Kolb, Stefan"));
+    @ParameterizedTest
+    @MethodSource("provideValidNames")
+    public void validNames(String name) {
+        assertEquals(Optional.empty(), checker.checkValue(name));
     }
 
-    @Test
-    public void validNameFirstnameAuthors() throws Exception {
-        assertEquals(Optional.empty(), checker.checkValue("Kolb, Stefan and Harrer, Simon"));
-    }
+    private static Stream<String> provideValidNames() {
+        return Stream.of(
+                "Kolb, Stefan",                     // single [Name, Firstname]
+                "Kolb, Stefan and Harrer, Simon",   // multiple [Name, Firstname]
+                "Stefan Kolb",                      // single [Firstname Name]
+                "Stefan Kolb and Simon Harrer",     // multiple [Firstname Name]
 
-    @Test
-    public void validFirstnameNameAuthor() throws Exception {
-        assertEquals(Optional.empty(), checker.checkValue("Stefan Kolb"));
-    }
+                "M. J. Gotay",                      // second name in front
 
-    @Test
-    public void validFirstnameNameAuthors() throws Exception {
-        assertEquals(Optional.empty(), checker.checkValue("Stefan Kolb and Simon Harrer"));
+                "{JabRef}",                         // corporate name in brackets
+                "{JabRef} and Stefan Kolb",         // mixed corporate name with name
+                "{JabRef} and Kolb, Stefan",
+
+                "hugo Para{\\~n}os"                 // tilde in name
+        );
     }
 
     @Test
-    public void complainAboutPersonStringWithTwoManyCommas() throws Exception {
+    public void complainAboutPersonStringWithTwoManyCommas() {
         assertEquals(Optional.of("Names are not in the standard BibTeX format."),
                 checker.checkValue("Test1, Test2, Test3, Test4, Test5, Test6"));
     }
 
-    @Test
-    public void doNotComplainAboutSecondNameInFront() throws Exception {
-        assertEquals(Optional.empty(), checker.checkValue("M. J. Gotay"));
-    }
-
-    @Test
-    public void validCorporateNameInBrackets() throws Exception {
-        assertEquals(Optional.empty(), checker.checkValue("{JabRef}"));
-    }
-
-    @Test
-    public void validCorporateNameAndPerson() throws Exception {
-        assertEquals(Optional.empty(), checker.checkValue("{JabRef} and Stefan Kolb"));
-        assertEquals(Optional.empty(), checker.checkValue("{JabRef} and Kolb, Stefan"));
-    }
-
     @ParameterizedTest
     @MethodSource("provideCorrectFormats")
     public void authorNameInCorrectFormatsShouldNotComplain(String input) {
@@ -84,13 +71,19 @@ public void authorNameInIncorrectFormatsShouldComplain(String input) {
     }
 
     private static Stream<String> provideCorrectFormats() {
-        return Stream.of("", "Knuth", "Donald E. Knuth and Kurt Cobain and A. Einstein");
+        return Stream.of(
+                "",
+                "Knuth",
+                "Donald E. Knuth and Kurt Cobain and A. Einstein");
     }
 
     private static Stream<String> provideIncorrectFormats() {
-        return Stream.of("   Knuth, Donald E. ",
-                         "Knuth, Donald E. and Kurt Cobain and A. Einstein",
-                                      ", and Kurt Cobain and A. Einstein", "Donald E. Knuth and Kurt Cobain and ,",
-                         "and Kurt Cobain and A. Einstein", "Donald E. Knuth and Kurt Cobain and");
+        return Stream.of(
+                "   Knuth, Donald E. ",
+                "Knuth, Donald E. and Kurt Cobain and A. Einstein",
+                ", and Kurt Cobain and A. Einstein",
+                "Donald E. Knuth and Kurt Cobain and ,",
+                "and Kurt Cobain and A. Einstein",
+                "Donald E. Knuth and Kurt Cobain and");
     }
 }