Skip to content

Commit

Permalink
Fix integrity check for tilde accents in author names (#9097)
Browse files Browse the repository at this point in the history
* Solving tilde error in author names

* Fixed checkstyle and removed superfluous new test class

* remove latex html conversion and rtf chart

* add test to authorlist parser

* parameterized tests

* fix test

* fix test

* Convert to parameterized test

* fix test and checkstyle

* fix checkstyle and tests

* Fixed tabstop

* add changelog

Co-authored-by: Carl Christian Snethlage <50491877+calixtus@users.noreply.github.com>
Co-authored-by: Siedlerchr <siedlerkiller@gmail.com>
  • Loading branch information
3 people authored Sep 3, 2022
1 parent 6c19393 commit ad9aa62
Show file tree
Hide file tree
Showing 7 changed files with 58 additions and 83 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ Note that this project **does not** adhere to [Semantic Versioning](http://semve

### Fixed

- We fixed an issue where author names with tilde accents (for example ñ) were marked as "Names are not in the standard BibTex format" [#8071](https://github.com/JabRef/jabref/issues/8071)
- We fixed an issue where the possibility to generate a subdatabase from an aux file was writing empty files when called from the commandline [#9115](https://github.com/JabRef/jabref/issues/9115), [forum#3516](https://discourse.jabref.org/t/export-subdatabase-from-aux-file-on-macos-command-line/3516)
- We fixed the display of issue, number, eid and pages fields in the entry preview. [#8607](https://github.com/JabRef/jabref/pull/8607), [#8372](https://github.com/JabRef/jabref/issues/8372), [Koppor#514](https://github.com/koppor/jabref/issues/514), [forum#2390](https://discourse.jabref.org/t/unable-to-edit-my-bibtex-file-that-i-used-before-vers-5-1/2390), [forum#3462](https://discourse.jabref.org/t/jabref-5-6-need-help-with-export-from-jabref-to-microsoft-word-entry-preview-of-apa-7-not-rendering-correctly/3462)
- We fixed the page ranges checker to detect article numbers in the pages field (used at [Check Integrity](https://docs.jabref.org/finding-sorting-and-cleaning-entries/checkintegrity)). [#8607](https://github.com/JabRef/jabref/pull/8607)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -447,7 +447,7 @@ private Token getToken() {
if (c == '\\') {
currentBackslash = tokenEnd;
}
if ((bracesLevel == 0) && ((",;~-".indexOf(c) != -1) || Character.isWhitespace(c))) {
if ((bracesLevel == 0) && ((",;-".indexOf(c) != -1) || Character.isWhitespace(c))) {
break;
}
tokenEnd++;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -762,6 +762,7 @@ public class HTMLUnicodeConversionMaps {
{"119978", "Oscr", "$\\mathcal{O}$"}, // script capital O -- possibly use \mathscr
{"119984", "Uscr", "$\\mathcal{U}$"}, // script capital U -- possibly use \mathscr
{"120598", "", "$\\epsilon$"}, // mathematical italic epsilon U+1D716 -- requires amsmath
{"120599", "", "{{\\˜{n}}}"}, // n with tide
};

// List of combining accents
Expand Down Expand Up @@ -888,7 +889,6 @@ public class HTMLUnicodeConversionMaps {
// Manual corrections
LATEX_HTML_CONVERSION_MAP.put("AA", "&Aring;"); // Overwritten by &angst; which is less supported
LATEX_UNICODE_CONVERSION_MAP.put("AA", "Å"); // Overwritten by Ångstrom symbol
LATEX_UNICODE_CONVERSION_MAP.put("'n", "ń");

// Manual additions
// Support relax to the extent that it is simply removed
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

public class RtfCharMap {

private HashMap<String, String> rtfMap = new HashMap<>();
private final HashMap<String, String> rtfMap = new HashMap<>();

public RtfCharMap() {
put("`a", "\\'e0");
Expand Down
Original file line number Diff line number Diff line change
@@ -1,60 +1,40 @@
package org.jabref.logic.formatter.bibtexfields;

import java.util.stream.Stream;

import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.Arguments;
import org.junit.jupiter.params.provider.MethodSource;

import static org.junit.jupiter.api.Assertions.assertEquals;

public class HtmlToUnicodeFormatterTest {

private HtmlToUnicodeFormatter formatter;

private static Stream<Arguments> data() {
return Stream.of(
Arguments.of("abc", "abc"),
Arguments.of("åäö", "&aring;&auml;&ouml;"),
Arguments.of("í", "i&#x301;"),
Arguments.of("Ε", "&Epsilon;"),
Arguments.of("ä", "&auml;"),
Arguments.of("ä", "&#228;"),
Arguments.of("ä", "&#xe4;"),
Arguments.of("ñ", "&#241;"),
Arguments.of("aaa", "<p>aaa</p>"),
Arguments.of("bread & butter", "<b>bread</b> &amp; butter"));
}

@BeforeEach
public void setUp() {
formatter = new HtmlToUnicodeFormatter();
}

@Test
public void formatWithoutHtmlCharactersReturnsSameString() {
assertEquals("abc", formatter.format("abc"));
}

@Test
public void formatMultipleHtmlCharacters() {
assertEquals("åäö", formatter.format("&aring;&auml;&ouml;"));
}

@Test
public void formatCombinedAccent() {
assertEquals("í", formatter.format("i&#x301;"));
}

@Test
public void testBasic() {
assertEquals("aaa", formatter.format("aaa"));
}

@Test
public void testUmlauts() {
assertEquals("ä", formatter.format("&auml;"));
assertEquals("ä", formatter.format("&#228;"));
assertEquals("ä", formatter.format("&#xe4;"));
}

@Test
public void testGreekLetter() {
assertEquals("Ε", formatter.format("&Epsilon;"));
}

@Test
public void testHTMLRemoveTags() {
assertEquals("aaa", formatter.format("<p>aaa</p>"));
}

@Test
public void formatExample() {
assertEquals("bread & butter", formatter.format(formatter.getExampleInput()));
@ParameterizedTest
@MethodSource("data")
void testFormatterWorksCorrectly(String expected, String input) {
assertEquals(expected, formatter.format(input));
}
}


Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,8 @@ private static Stream<Arguments> data() {
Arguments.of("de la Vallée Poussin, Jean Charles Gabriel", new Author("Jean Charles Gabriel", "J. C. G.", "de la", "Vallée Poussin", null)),
Arguments.of("de la Vallée Poussin, J. C. G.", new Author("J. C. G.", "J. C. G.", "de la", "Vallée Poussin", null)),
Arguments.of("{K}ent-{B}oswell, E. S.", new Author("E. S.", "E. S.", null, "{K}ent-{B}oswell", null)),
Arguments.of("Uhlenhaut, N Henriette", new Author("N Henriette", "N. H.", null, "Uhlenhaut", null))
Arguments.of("Uhlenhaut, N Henriette", new Author("N Henriette", "N. H.", null, "Uhlenhaut", null)),
Arguments.of("Nu{\\~{n}}ez, Jose", new Author("Jose", "J.", null, "Nu{\\~{n}}ez", null))
);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,48 +29,35 @@ public void setUp() throws Exception {
checkerb = new PersonNamesChecker(database);
}

@Test
public void validNameFirstnameAuthor() throws Exception {
assertEquals(Optional.empty(), checker.checkValue("Kolb, Stefan"));
@ParameterizedTest
@MethodSource("provideValidNames")
public void validNames(String name) {
assertEquals(Optional.empty(), checker.checkValue(name));
}

@Test
public void validNameFirstnameAuthors() throws Exception {
assertEquals(Optional.empty(), checker.checkValue("Kolb, Stefan and Harrer, Simon"));
}
private static Stream<String> provideValidNames() {
return Stream.of(
"Kolb, Stefan", // single [Name, Firstname]
"Kolb, Stefan and Harrer, Simon", // multiple [Name, Firstname]
"Stefan Kolb", // single [Firstname Name]
"Stefan Kolb and Simon Harrer", // multiple [Firstname Name]

@Test
public void validFirstnameNameAuthor() throws Exception {
assertEquals(Optional.empty(), checker.checkValue("Stefan Kolb"));
}
"M. J. Gotay", // second name in front

@Test
public void validFirstnameNameAuthors() throws Exception {
assertEquals(Optional.empty(), checker.checkValue("Stefan Kolb and Simon Harrer"));
"{JabRef}", // corporate name in brackets
"{JabRef} and Stefan Kolb", // mixed corporate name with name
"{JabRef} and Kolb, Stefan",

"hugo Para{\\~n}os" // tilde in name
);
}

@Test
public void complainAboutPersonStringWithTwoManyCommas() throws Exception {
public void complainAboutPersonStringWithTwoManyCommas() {
assertEquals(Optional.of("Names are not in the standard BibTeX format."),
checker.checkValue("Test1, Test2, Test3, Test4, Test5, Test6"));
}

@Test
public void doNotComplainAboutSecondNameInFront() throws Exception {
assertEquals(Optional.empty(), checker.checkValue("M. J. Gotay"));
}

@Test
public void validCorporateNameInBrackets() throws Exception {
assertEquals(Optional.empty(), checker.checkValue("{JabRef}"));
}

@Test
public void validCorporateNameAndPerson() throws Exception {
assertEquals(Optional.empty(), checker.checkValue("{JabRef} and Stefan Kolb"));
assertEquals(Optional.empty(), checker.checkValue("{JabRef} and Kolb, Stefan"));
}

@ParameterizedTest
@MethodSource("provideCorrectFormats")
public void authorNameInCorrectFormatsShouldNotComplain(String input) {
Expand All @@ -84,13 +71,19 @@ public void authorNameInIncorrectFormatsShouldComplain(String input) {
}

private static Stream<String> provideCorrectFormats() {
return Stream.of("", "Knuth", "Donald E. Knuth and Kurt Cobain and A. Einstein");
return Stream.of(
"",
"Knuth",
"Donald E. Knuth and Kurt Cobain and A. Einstein");
}

private static Stream<String> provideIncorrectFormats() {
return Stream.of(" Knuth, Donald E. ",
"Knuth, Donald E. and Kurt Cobain and A. Einstein",
", and Kurt Cobain and A. Einstein", "Donald E. Knuth and Kurt Cobain and ,",
"and Kurt Cobain and A. Einstein", "Donald E. Knuth and Kurt Cobain and");
return Stream.of(
" Knuth, Donald E. ",
"Knuth, Donald E. and Kurt Cobain and A. Einstein",
", and Kurt Cobain and A. Einstein",
"Donald E. Knuth and Kurt Cobain and ,",
"and Kurt Cobain and A. Einstein",
"Donald E. Knuth and Kurt Cobain and");
}
}

0 comments on commit ad9aa62

Please sign in to comment.