Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix Normalize pages formatter not replacing dashes #7243

Merged
merged 16 commits into from
Jan 4, 2021
Merged
Show file tree
Hide file tree
Changes from 9 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ Note that this project **does not** adhere to [Semantic Versioning](http://semve

### Fixed

- We fixed an issue where the "Normalize page numbers" formatter did not replace en-dashes or em-dashes with a hyphen-minus sign. [#7239](https://github.com/JabRef/jabref/issues/7239)
- We fixed an issue with the style of highlighted check boxes while searching in preferences. [#7226](https://github.com/JabRef/jabref/issues/7226)

### Removed
Expand Down
Original file line number Diff line number Diff line change
@@ -1,31 +1,33 @@
package org.jabref.logic.formatter.bibtexfields;

import java.util.Objects;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.jabref.logic.cleanup.Formatter;
import org.jabref.logic.formatter.casechanger.UnprotectTermsFormatter;
import org.jabref.logic.l10n.Localization;

import com.google.common.base.Strings;

/**
* This class includes sensible defaults for consistent formatting of BibTeX page numbers.
*
* <p>
* From BibTeX manual:
* One or more page numbers or range of numbers, such as 42--111 or 7,41,73--97 or 43+
* (the '+' in this last example indicates pages following that don't form a simple range).
* To make it easier to maintain Scribe-compatible databases, the standard styles convert
* a single dash (as in 7-33) to the double dash used in TEX to denote number ranges (as in 7--33).
* <p>
* Examples:
*
* <ul>
* <li><code>1-2</code> <code>1--2</code></li>
* <li><code>1---2</code> <code>1--2</code></li>
* </ul>
*/
public class NormalizePagesFormatter extends Formatter {

// "startpage" and "endpage" are named groups. See http://stackoverflow.com/a/415635/873282 for a documentation
private static final Pattern PAGES_DETECT_PATTERN = Pattern.compile("\\A(?<startpage>(\\d+:)?\\d+)(?:-{1,2}(?<endpage>(\\d+:)?\\d+))?\\Z");
private static final Pattern EM_EN_DASH_PATTERN = Pattern.compile("\u2013|\u2014");

private static final String REJECT_LITERALS = "[^a-zA-Z0-9,\\-\\+,:]";
private static final String PAGES_REPLACE_PATTERN = "${startpage}--${endpage}";
private static final String SINGLE_PAGE_REPLACE_PATTERN = "$1";
private final Formatter unprotectTermsFormatter = new UnprotectTermsFormatter();

@Override
public String getName() {
Expand All @@ -44,11 +46,11 @@ public String getKey() {
* Keeps the existing String if the resulting field does not match the expected Regex.
*
* <example>
* 1-2 -> 1--2
* 1,2,3 -> 1,2,3
* {1}-{2} -> 1--2
* 43+ -> 43+
* Invalid -> Invalid
* 1-2 -> 1--2
* 1,2,3 -> 1,2,3
* {1}-{2} -> 1--2
* 43+ -> 43+
* Invalid -> Invalid
* </example>
*/
@Override
Expand All @@ -63,19 +65,9 @@ public String format(String value) {
// Remove pages prefix
String cleanValue = value.replace("pp.", "").replace("p.", "");
// remove unwanted literals including en dash, em dash, and whitespace
cleanValue = cleanValue.replaceAll("\u2013|\u2014", "-").replaceAll(REJECT_LITERALS, "");
// try to find pages pattern
Matcher matcher = PAGES_DETECT_PATTERN.matcher(cleanValue);
if (matcher.matches()) {
// replace
if (Strings.isNullOrEmpty(matcher.group("endpage"))) {
return matcher.replaceFirst(SINGLE_PAGE_REPLACE_PATTERN);
} else {
return matcher.replaceFirst(PAGES_REPLACE_PATTERN);
}
}
// no replacement
return value;
value = EM_EN_DASH_PATTERN.matcher(cleanValue).replaceAll("-")
.replaceAll("[ ]*[-]+[ ]*", "--");
return unprotectTermsFormatter.format(value.trim());
Copy link
Member

@tobiasdiez tobiasdiez Dec 29, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm also not sure if I-X is correct or I--X for roman page numbers.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Result of test-driven-development 😇. I will think more about it now.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm also not sure if I-X is correct or I--X for roman page numbers.

This reads more as a page range, too. Roman 9 is written IX (and not I-X), isn't it?

}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,11 @@
import org.jabref.logic.protectedterms.ProtectedTermsLoader;
import org.jabref.logic.util.strings.StringLengthComparator;

/**
* Adds {} brackets around acronyms, month names and countries to preserve their case.
*
* Related formatter: {@link org.jabref.logic.formatter.bibtexfields.RemoveBracesFormatter}
*/
public class ProtectTermsFormatter extends Formatter {

private final ProtectedTermsLoader protectedTermsLoader;
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
package org.jabref.logic.formatter.casechanger;

import java.util.Objects;

import org.jabref.logic.cleanup.Formatter;
import org.jabref.logic.l10n.Localization;

/**
* Remove {} brackets around words
*
* Related formatter: {@link ProtectTermsFormatter}
*/
public class UnprotectTermsFormatter extends Formatter {

@Override
public String format(String text) {
// similar implementation at {@link org.jabref.logic.formatter.bibtexfields.RemoveBracesFormatter.hasNegativeBraceCount}
Objects.requireNonNull(text);
if (text.isEmpty()) {
return text;
}
StringBuilder result = new StringBuilder();
int level = 0;
int index = 0;
do {
char charAtIndex = text.charAt(index);
if (charAtIndex == '{') {
level++;
} else if (charAtIndex == '}') {
level--;
} else {
result.append(charAtIndex);
}
index++;
} while (index < text.length() && level >= 0);
if (level != 0) {
// in case of unbalanced braces, the original text is returned unmodified
return text;
}
return result.toString();
}

@Override
public String getDescription() {
return Localization.lang(
"Removes all {} brackets around words.");
koppor marked this conversation as resolved.
Show resolved Hide resolved
}

@Override
public String getExampleInput() {
return "{In} {CDMA}";
}

@Override
public String getName() {
return Localization.lang("Unprotect terms");
}

@Override
public String getKey() {
return "unprotect_terms";
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,9 @@
import org.jabref.logic.cleanup.Formatter;
import org.jabref.logic.l10n.Localization;

/**
* Converts all characters of the given string to upper case, but does not change words starting with "{"
*/
public class UpperCaseFormatter extends Formatter {

@Override
Expand All @@ -15,9 +18,6 @@ public String getKey() {
return "upper_case";
}

/**
* Converts all characters of the given string to upper case, but does not change words starting with "{"
*/
@Override
public String format(String input) {
Title title = new Title(input);
Expand Down
2 changes: 2 additions & 0 deletions src/main/resources/l10n/JabRef_en.properties
Original file line number Diff line number Diff line change
Expand Up @@ -2284,3 +2284,5 @@ Regular\ expression=Regular expression

Error\ importing.\ See\ the\ error\ log\ for\ details.=Error importing. See the error log for details.

Removes\ all\ {}\ brackets\ around\ words.=Removes all {} brackets around words.
Unprotect\ terms=Unprotect terms
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
package org.jabref.logic.formatter.bibtexfields;

import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import java.util.stream.Stream;

import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.Arguments;
import org.junit.jupiter.params.provider.MethodSource;

import static org.junit.jupiter.api.Assertions.assertEquals;

Expand All @@ -10,102 +13,76 @@
*/
public class NormalizePagesFormatterTest {

private NormalizePagesFormatter formatter;
private final NormalizePagesFormatter formatter = new NormalizePagesFormatter();

@BeforeEach
public void setUp() {
formatter = new NormalizePagesFormatter();
}
private static Stream<Arguments> tests() {
return Stream.of(
// formatSinglePageResultsInNoChange
Arguments.of("1", "1"),

@Test
public void formatSinglePageResultsInNoChange() {
assertEquals("1", formatter.format("1"));
}
// formatPageNumbers
Arguments.of("1--2", "1-2"),

@Test
public void formatPageNumbers() {
assertEquals("1--2", formatter.format("1-2"));
}
// formatPageNumbersCommaSeparated
Arguments.of("1,2,3", "1,2,3"),

@Test
public void formatPageNumbersCommaSeparated() {
assertEquals("1,2,3", formatter.format("1,2,3"));
}
// formatPageNumbersPlusRange
Arguments.of("43+", "43+"),

@Test
public void formatPageNumbersPlusRange() {
assertEquals("43+", formatter.format("43+"));
}
// ignoreWhitespaceInPageNumbers
Arguments.of("1--2", " 1 - 2 "),

@Test
public void ignoreWhitespaceInPageNumbers() {
assertEquals("1--2", formatter.format(" 1 - 2 "));
}
// removeWhitespaceSinglePage
Arguments.of("1", " 1 "),

@Test
public void removeWhitespaceSinglePage() {
assertEquals("1", formatter.format(" 1 "));
}
// removeWhitespacePageRange
Arguments.of("1--2", " 1 -- 2 "),

@Test
public void removeWhitespacePageRange() {
assertEquals("1--2", formatter.format(" 1 -- 2 "));
}
// ignoreWhitespaceInPageNumbersWithDoubleDash
Arguments.of("43--103", "43 -- 103"),

@Test
public void ignoreWhitespaceInPageNumbersWithDoubleDash() {
assertEquals("43--103", formatter.format("43 -- 103"));
}
// keepCorrectlyFormattedPageNumbers
Arguments.of("1--2", "1--2"),

@Test
public void keepCorrectlyFormattedPageNumbers() {
assertEquals("1--2", formatter.format("1--2"));
}
// formatPageNumbersRemoveUnexpectedLiterals
Arguments.of("1--2", "{1}-{2}"),

@Test
public void formatPageNumbersRemoveUnexpectedLiterals() {
assertEquals("1--2", formatter.format("{1}-{2}"));
}
// formatPageNumbersRegexNotMatching
Arguments.of("12", "12"),

@Test
public void formatPageNumbersRegexNotMatching() {
assertEquals("12", formatter.format("12"));
}
// doNotRemoveLetters
Arguments.of("R1--R50", "R1-R50"),

@Test
public void doNotRemoveLetters() {
assertEquals("R1-R50", formatter.format("R1-R50"));
}
// replaceLongDashWithDoubleDash
Arguments.of("1--50", "1 \u2014 50"),

@Test
public void replaceLongDashWithDoubleDash() {
assertEquals("1--50", formatter.format("1 \u2014 50"));
}
// removePagePrefix
Arguments.of("50", "p.50"),

@Test
public void removePagePrefix() {
assertEquals("50", formatter.format("p.50"));
}
// removePagesPrefix
Arguments.of("50", "pp.50"),

@Test
public void removePagesPrefix() {
assertEquals("50", formatter.format("pp.50"));
}
// keep &
Arguments.of("40&50", "40&50"),

@Test
public void formatACMPages() {
// This appears in https://doi.org/10.1145/1658373.1658375
assertEquals("2:1--2:33", formatter.format("2:1-2:33"));
}
// formatACMPages
// This appears in https://doi.org/10.1145/1658373.1658375
Arguments.of("2:1--2:33", "2:1-2:33"),

// keepFormattedACMPages
// This appears in https://doi.org/10.1145/1658373.1658375
Arguments.of("2:1--2:33", "2:1--2:33"),

// formatExample
Arguments.of("1--2", new NormalizePagesFormatter().getExampleInput()),

@Test
public void keepFormattedACMPages() {
// This appears in https://doi.org/10.1145/1658373.1658375
assertEquals("2:1--2:33", formatter.format("2:1--2:33"));
// replaceDashWithMinus
Arguments.of("R404--R405", "R404–R405"));
}

@Test
public void formatExample() {
assertEquals("1--2", formatter.format(formatter.getExampleInput()));
@ParameterizedTest
@MethodSource("tests")
public void test(String expected, String toFormat) {
koppor marked this conversation as resolved.
Show resolved Hide resolved
assertEquals(expected, formatter.format(toFormat));
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
package org.jabref.logic.formatter.casechanger;

import java.io.IOException;
import java.util.stream.Stream;

import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.Arguments;
import org.junit.jupiter.params.provider.MethodSource;

import static org.junit.jupiter.api.Assertions.assertEquals;

/**
* Tests in addition to the general tests from {@link org.jabref.logic.formatter.FormatterTest}
*/
public class UnprotectTermsFormatterTest {

private UnprotectTermsFormatter formatter = new UnprotectTermsFormatter();

private static Stream<Arguments> terms() throws IOException {
return Stream.of(
Arguments.of("VLSI", "{VLSI}"),
Arguments.of("VLsI", "VLsI"),
Arguments.of("VLSI", "VLSI"),
Arguments.of("VLSI VLSI", "{VLSI} {VLSI}"),
Arguments.of("BPEL", "{BPEL}"),
Arguments.of("3GPP 3G", "{3GPP} {3G}"),
Arguments.of("{A} and {B}}", "{A} and {B}}"),
Arguments.of("Testing BPEL Engine Performance: A Survey", "{Testing BPEL Engine Performance: A Survey}"),
Arguments.of("Testing BPEL Engine Performance: A Survey", "Testing {BPEL} Engine Performance: A Survey"),
Arguments.of("Testing BPEL Engine Performance: A Survey", "{Testing {BPEL} Engine Performance: A Survey}"),
Arguments.of("In CDMA", new UnprotectTermsFormatter().getExampleInput()));
}

@ParameterizedTest
@MethodSource("terms")
public void test(String expected, String toFormat) {
assertEquals(expected, formatter.format(toFormat));
}
}