Skip to content

Commit

Permalink
Fix #2775: Hyphens in last names are properly parsed (#3209)
Browse files Browse the repository at this point in the history
  • Loading branch information
tobiasdiez authored and LinusDietz committed Sep 12, 2017
1 parent 58fec29 commit c9445d4
Show file tree
Hide file tree
Showing 3 changed files with 70 additions and 33 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ We refer to [GitHub issues](https://github.com/JabRef/jabref/issues) by using `#
- We fixed an issue where metadata syncing with local and shared database were unstable. It will also fix syncing groups and sub-groups in database. [#2284](https://github.com/JabRef/jabref/issues/2284)
- We fixed an issue where it was possible to leave the entry editor with an imbalance of braces. [#3167](https://github.com/JabRef/jabref/issues/3167)
- Renaming files now truncates the filename to not exceed the limit of 255 chars [#2622](https://github.com/JabRef/jabref/issues/2622)
- We improved the handling of hyphens in names. [#2775](https://github.com/JabRef/jabref/issues/2775)

### Removed
- We removed support for LatexEditor, as it is not under active development. [#3199](https://github.com/JabRef/jabref/issues/3199)
Expand Down
75 changes: 46 additions & 29 deletions src/main/java/org/jabref/model/entry/AuthorListParser.java
Original file line number Diff line number Diff line change
Expand Up @@ -32,25 +32,6 @@ public class AuthorListParser {
// Constant HashSet containing names of TeX special characters
private static final Set<String> TEX_NAMES = new HashSet<>();

/** the raw bibtex author/editor field */
private String original;

/** index of the start in original, for example to point to 'abc' in 'abc xyz', tokenStart=2 */
private int tokenStart;

/** index of the end in original, for example to point to 'abc' in 'abc xyz', tokenEnd=5 */
private int tokenEnd;

/** end of token abbreviation (always: tokenStart < tokenAbbr <= tokenEnd), only valid if getToken returns TOKEN_WORD */
private int tokenAbbr;


/** either space of dash */
private char tokenTerm;

/** true if upper-case token, false if lower-case */
private boolean tokenCase;

static {
TEX_NAMES.add("aa");
TEX_NAMES.add("ae");
Expand All @@ -66,6 +47,32 @@ public class AuthorListParser {
TEX_NAMES.add("j");
}

/**
* the raw bibtex author/editor field
*/
private String original;
/**
* index of the start in original, for example to point to 'abc' in 'abc xyz', tokenStart=2
*/
private int tokenStart;
/**
* index of the end in original, for example to point to 'abc' in 'abc xyz', tokenEnd=5
*/
private int tokenEnd;
/**
* end of token abbreviation (always: tokenStart < tokenAbbrEnd <= tokenEnd), only valid if getToken returns
* TOKEN_WORD
*/
private int tokenAbbrEnd;
/**
* either space of dash
*/
private char tokenTerm;
/**
* true if upper-case token, false if lower-case
*/
private boolean tokenCase;

/**
* Parses the String containing person names and returns a list of person information.
*
Expand Down Expand Up @@ -121,7 +128,7 @@ private Optional<Author> getAuthor() {
break;
case TOKEN_WORD:
tokens.add(original.substring(tokenStart, tokenEnd));
tokens.add(original.substring(tokenStart, tokenAbbr));
tokens.add(original.substring(tokenStart, tokenAbbrEnd));
tokens.add(tokenTerm);
tokens.add(tokenCase);
if (commaFirst >= 0) {
Expand All @@ -137,6 +144,13 @@ private Optional<Author> getAuthor() {
// We are in a first name which contained a hyphen
break;
}

int thisTermToken = previousTermToken + TOKEN_GROUP_LENGTH;
if ((thisTermToken >= 0) && tokens.get(thisTermToken).equals('-')) {
// We are in a name which contained a hyphen
break;
}

vonStart = tokens.size() - TOKEN_GROUP_LENGTH;
break;
}
Expand Down Expand Up @@ -194,14 +208,16 @@ private Optional<Author> getAuthor() {
firstPartStart = 0;
}
}
} else { // commas are present: it affects only 'first part' and
// 'junior part'
} else {
// commas are present: it affects only 'first part' and 'junior part'
firstPartEnd = tokens.size();
if (commaSecond < 0) { // one comma
if (commaSecond < 0) {
// one comma
if (commaFirst < firstPartEnd) {
firstPartStart = commaFirst;
}
} else { // two or more commas
} else {
// two or more commas
if (commaSecond < firstPartEnd) {
firstPartStart = commaSecond;
}
Expand Down Expand Up @@ -342,7 +358,7 @@ private int getToken() {
tokenEnd++;
return TOKEN_AND;
}
tokenAbbr = -1;
tokenAbbrEnd = -1;
tokenTerm = ' ';
tokenCase = true;
int bracesLevel = 0;
Expand All @@ -353,8 +369,9 @@ private int getToken() {
if (c == '{') {
bracesLevel++;
}
if (firstLetterIsFound && (tokenAbbr < 0) && ((bracesLevel == 0) || (c == '{'))) {
tokenAbbr = tokenEnd;

if (firstLetterIsFound && (tokenAbbrEnd < 0) && ((bracesLevel == 0) || (c == '{'))) {
tokenAbbrEnd = tokenEnd;
}
if ((c == '}') && (bracesLevel > 0)) {
bracesLevel--;
Expand Down Expand Up @@ -388,8 +405,8 @@ private int getToken() {
}
tokenEnd++;
}
if (tokenAbbr < 0) {
tokenAbbr = tokenEnd;
if (tokenAbbrEnd < 0) {
tokenAbbrEnd = tokenEnd;
}
if ((tokenEnd < original.length()) && (original.charAt(tokenEnd) == '-')) {
tokenTerm = '-';
Expand Down
27 changes: 23 additions & 4 deletions src/test/java/org/jabref/model/entry/AuthorListTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,10 @@

public class AuthorListTest {

public static int size(String bibtex) {
return AuthorList.parse(bibtex).getNumberOfAuthors();
}

@Test
public void testFixAuthorNatbib() {
Assert.assertEquals("", AuthorList.fixAuthorNatbib(""));
Expand Down Expand Up @@ -286,10 +290,6 @@ public void testFixAuthorForAlphabetization() {
.fixAuthorForAlphabetization("John von Neumann and John Smith and de Black Brown, Jr., Peter"));
}

public static int size(String bibtex) {
return AuthorList.parse(bibtex).getNumberOfAuthors();
}

@Test
public void testSize() {

Expand Down Expand Up @@ -625,6 +625,25 @@ public void parseNameWithHyphenInLastName() throws Exception {
Assert.assertEquals(new AuthorList(expected), AuthorList.parse("Firstname Bailey-Jones"));
}

@Test
public void parseNameWithHyphenInLastNameWithInitials() throws Exception {
Author expected = new Author("E. S.", "E. S.", null, "El-{M}allah", null);
Assert.assertEquals(new AuthorList(expected), AuthorList.parse("E. S. El-{M}allah"));
}

@Test
public void parseNameWithHyphenInLastNameWithEscaped() throws Exception {
Author expected = new Author("E. S.", "E. S.", null, "{K}ent-{B}oswell", null);
Assert.assertEquals(new AuthorList(expected), AuthorList.parse("E. S. {K}ent-{B}oswell"));
}

@Test
public void parseNameWithHyphenInLastNameWhenLastNameGivenFirst() throws Exception {
// TODO: Fix abbreviation to be "A."
Author expected = new Author("ʿAbdallāh", "ʿ.", null, "al-Ṣāliḥ", null);
Assert.assertEquals(new AuthorList(expected), AuthorList.parse("al-Ṣāliḥ, ʿAbdallāh"));
}

@Test
public void parseNameWithBraces() throws Exception {
Author expected = new Author("H{e}lene", "H.", null, "Fiaux", null);
Expand Down

0 comments on commit c9445d4

Please sign in to comment.