JabRef · calixtus · Jan 4, 2021 · Dec 25, 2020 · Dec 26, 2020 · Dec 28, 2020
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -17,6 +17,7 @@ Note that this project **does not** adhere to [Semantic Versioning](http://semve
 
 ### Fixed
 
+- We fixed an issue where the "Normalize page numbers" formatter did not replace en-dashes or em-dashes with a hyphen-minus sign. [#7239](https://github.com/JabRef/jabref/issues/7239)
 - We fixed an issue with the style of highlighted check boxes while searching in preferences. [#7226](https://github.com/JabRef/jabref/issues/7226)
 
 ### Removed

diff --git a/src/main/java/org/jabref/logic/formatter/bibtexfields/NormalizePagesFormatter.java b/src/main/java/org/jabref/logic/formatter/bibtexfields/NormalizePagesFormatter.java
@@ -1,31 +1,33 @@
 package org.jabref.logic.formatter.bibtexfields;
 
 import java.util.Objects;
-import java.util.regex.Matcher;
 import java.util.regex.Pattern;
 
 import org.jabref.logic.cleanup.Formatter;
+import org.jabref.logic.formatter.casechanger.UnprotectTermsFormatter;
 import org.jabref.logic.l10n.Localization;
 
-import com.google.common.base.Strings;
-
 /**
  * This class includes sensible defaults for consistent formatting of BibTeX page numbers.
- *
+ * <p>
  * From BibTeX manual:
  * One or more page numbers or range of numbers, such as 42--111 or 7,41,73--97 or 43+
  * (the '+' in this last example indicates pages following that don't form a simple range).
  * To make it easier to maintain Scribe-compatible databases, the standard styles convert
  * a single dash (as in 7-33) to the double dash used in TEX to denote number ranges (as in 7--33).
+ * <p>
+ * Examples:
+ *
+ * <ul>
+ *     <li><code>1-2</code> <code>1--2</code></li>
+ *     <li><code>1---2</code> <code>1--2</code></li>
+ * </ul>
  */
 public class NormalizePagesFormatter extends Formatter {
 
-    // "startpage" and "endpage" are named groups. See http://stackoverflow.com/a/415635/873282 for a documentation
-    private static final Pattern PAGES_DETECT_PATTERN = Pattern.compile("\\A(?<startpage>(\\d+:)?\\d+)(?:-{1,2}(?<endpage>(\\d+:)?\\d+))?\\Z");
+    private static final Pattern EM_EN_DASH_PATTERN = Pattern.compile("\u2013|\u2014");
 
-    private static final String REJECT_LITERALS = "[^a-zA-Z0-9,\\-\\+,:]";
-    private static final String PAGES_REPLACE_PATTERN = "${startpage}--${endpage}";
-    private static final String SINGLE_PAGE_REPLACE_PATTERN = "$1";
+    private final Formatter unprotectTermsFormatter = new UnprotectTermsFormatter();
 
     @Override
     public String getName() {
@@ -44,11 +46,11 @@ public String getKey() {
      * Keeps the existing String if the resulting field does not match the expected Regex.
      *
      * <example>
-     *     1-2 -> 1--2
-     *     1,2,3 -> 1,2,3
-     *     {1}-{2} -> 1--2
-     *     43+ -> 43+
-     *     Invalid -> Invalid
+     * 1-2 -> 1--2
+     * 1,2,3 -> 1,2,3
+     * {1}-{2} -> 1--2
+     * 43+ -> 43+
+     * Invalid -> Invalid
      * </example>
      */
     @Override
@@ -63,19 +65,9 @@ public String format(String value) {
         // Remove pages prefix
         String cleanValue = value.replace("pp.", "").replace("p.", "");
         // remove unwanted literals including en dash, em dash, and whitespace
-        cleanValue = cleanValue.replaceAll("\u2013|\u2014", "-").replaceAll(REJECT_LITERALS, "");
-        // try to find pages pattern
-        Matcher matcher = PAGES_DETECT_PATTERN.matcher(cleanValue);
-        if (matcher.matches()) {
-            // replace
-            if (Strings.isNullOrEmpty(matcher.group("endpage"))) {
-                return matcher.replaceFirst(SINGLE_PAGE_REPLACE_PATTERN);
-            } else {
-                return matcher.replaceFirst(PAGES_REPLACE_PATTERN);
-            }
-        }
-        // no replacement
-        return value;
+        value = EM_EN_DASH_PATTERN.matcher(cleanValue).replaceAll("-")
+                                  .replaceAll("[ ]*[-]+[ ]*", "--");
+        return unprotectTermsFormatter.format(value.trim());
     }
 
     @Override

diff --git a/src/main/java/org/jabref/logic/formatter/casechanger/ProtectTermsFormatter.java b/src/main/java/org/jabref/logic/formatter/casechanger/ProtectTermsFormatter.java
@@ -8,6 +8,11 @@
 import org.jabref.logic.protectedterms.ProtectedTermsLoader;
 import org.jabref.logic.util.strings.StringLengthComparator;
 
+/**
+ * Adds {} brackets around acronyms, month names and countries to preserve their case.
+ *
+ * Related formatter: {@link org.jabref.logic.formatter.bibtexfields.RemoveBracesFormatter}
+ */
 public class ProtectTermsFormatter extends Formatter {
 
     private final ProtectedTermsLoader protectedTermsLoader;

diff --git a/src/main/java/org/jabref/logic/formatter/casechanger/UnprotectTermsFormatter.java b/src/main/java/org/jabref/logic/formatter/casechanger/UnprotectTermsFormatter.java
@@ -0,0 +1,63 @@
+package org.jabref.logic.formatter.casechanger;
+
+import java.util.Objects;
+
+import org.jabref.logic.cleanup.Formatter;
+import org.jabref.logic.l10n.Localization;
+
+/**
+ * Remove {} brackets around words
+ *
+ * Related formatter: {@link ProtectTermsFormatter}
+ */
+public class UnprotectTermsFormatter extends Formatter {
+
+    @Override
+    public String format(String text) {
+        // similar implementation at {@link org.jabref.logic.formatter.bibtexfields.RemoveBracesFormatter.hasNegativeBraceCount}
+        Objects.requireNonNull(text);
+        if (text.isEmpty()) {
+            return text;
+        }
+        StringBuilder result = new StringBuilder();
+        int level = 0;
+        int index = 0;
+        do {
+            char charAtIndex = text.charAt(index);
+            if (charAtIndex == '{') {
+                level++;
+            } else if (charAtIndex == '}') {
+                level--;
+            } else {
+                result.append(charAtIndex);
+            }
+            index++;
+        } while (index < text.length() && level >= 0);
+        if (level != 0) {
+            // in case of unbalanced braces, the original text is returned unmodified
+            return text;
+        }
+        return result.toString();
+    }
+
+    @Override
+    public String getDescription() {
+        return Localization.lang(
+                "Removes all {} brackets around words.");
+    }
+
+    @Override
+    public String getExampleInput() {
+        return "{In} {CDMA}";
+    }
+
+    @Override
+    public String getName() {
+        return Localization.lang("Unprotect terms");
+    }
+
+    @Override
+    public String getKey() {
+        return "unprotect_terms";
+    }
+}
diff --git a/src/main/java/org/jabref/logic/formatter/casechanger/UpperCaseFormatter.java b/src/main/java/org/jabref/logic/formatter/casechanger/UpperCaseFormatter.java
@@ -3,6 +3,9 @@
 import org.jabref.logic.cleanup.Formatter;
 import org.jabref.logic.l10n.Localization;
 
+/**
+ * Converts all characters of the given string to upper case, but does not change words starting with "{"
+ */
 public class UpperCaseFormatter extends Formatter {
 
     @Override
@@ -15,9 +18,6 @@ public String getKey() {
         return "upper_case";
     }
 
-    /**
-     * Converts all characters of the given string to upper case, but does not change words starting with "{"
-     */
     @Override
     public String format(String input) {
         Title title = new Title(input);

diff --git a/src/main/resources/l10n/JabRef_en.properties b/src/main/resources/l10n/JabRef_en.properties
@@ -2284,3 +2284,5 @@ Regular\ expression=Regular expression
 
 Error\ importing.\ See\ the\ error\ log\ for\ details.=Error importing. See the error log for details.
 
+Removes\ all\ {}\ brackets\ around\ words.=Removes all {} brackets around words.
+Unprotect\ terms=Unprotect terms
diff --git a/src/test/java/org/jabref/logic/formatter/bibtexfields/NormalizePagesFormatterTest.java b/src/test/java/org/jabref/logic/formatter/bibtexfields/NormalizePagesFormatterTest.java
@@ -1,7 +1,10 @@
 package org.jabref.logic.formatter.bibtexfields;
 
-import org.junit.jupiter.api.BeforeEach;
-import org.junit.jupiter.api.Test;
+import java.util.stream.Stream;
+
+import org.junit.jupiter.params.ParameterizedTest;
+import org.junit.jupiter.params.provider.Arguments;
+import org.junit.jupiter.params.provider.MethodSource;
 
 import static org.junit.jupiter.api.Assertions.assertEquals;
 
@@ -10,102 +13,76 @@
  */
 public class NormalizePagesFormatterTest {
 
-    private NormalizePagesFormatter formatter;
+    private final NormalizePagesFormatter formatter = new NormalizePagesFormatter();
 
-    @BeforeEach
-    public void setUp() {
-        formatter = new NormalizePagesFormatter();
-    }
+    private static Stream<Arguments> tests() {
+        return Stream.of(
+                // formatSinglePageResultsInNoChange
+                Arguments.of("1", "1"),
 
-    @Test
-    public void formatSinglePageResultsInNoChange() {
-        assertEquals("1", formatter.format("1"));
-    }
+                // formatPageNumbers
+                Arguments.of("1--2", "1-2"),
 
-    @Test
-    public void formatPageNumbers() {
-        assertEquals("1--2", formatter.format("1-2"));
-    }
+                // formatPageNumbersCommaSeparated
+                Arguments.of("1,2,3", "1,2,3"),
 
-    @Test
-    public void formatPageNumbersCommaSeparated() {
-        assertEquals("1,2,3", formatter.format("1,2,3"));
-    }
+                // formatPageNumbersPlusRange
+                Arguments.of("43+", "43+"),
 
-    @Test
-    public void formatPageNumbersPlusRange() {
-        assertEquals("43+", formatter.format("43+"));
-    }
+                // ignoreWhitespaceInPageNumbers
+                Arguments.of("1--2", "   1  - 2 "),
 
-    @Test
-    public void ignoreWhitespaceInPageNumbers() {
-        assertEquals("1--2", formatter.format("   1  - 2 "));
-    }
+                // removeWhitespaceSinglePage
+                Arguments.of("1", "   1  "),
 
-    @Test
-    public void removeWhitespaceSinglePage() {
-        assertEquals("1", formatter.format("   1  "));
-    }
+                // removeWhitespacePageRange
+                Arguments.of("1--2", "   1 -- 2  "),
 
-    @Test
-    public void removeWhitespacePageRange() {
-        assertEquals("1--2", formatter.format("   1 -- 2  "));
-    }
+                // ignoreWhitespaceInPageNumbersWithDoubleDash
+                Arguments.of("43--103", "43 -- 103"),
 
-    @Test
-    public void ignoreWhitespaceInPageNumbersWithDoubleDash() {
-        assertEquals("43--103", formatter.format("43 -- 103"));
-    }
+                // keepCorrectlyFormattedPageNumbers
+                Arguments.of("1--2", "1--2"),
 
-    @Test
-    public void keepCorrectlyFormattedPageNumbers() {
-        assertEquals("1--2", formatter.format("1--2"));
-    }
+                // formatPageNumbersRemoveUnexpectedLiterals
+                Arguments.of("1--2", "{1}-{2}"),
 
-    @Test
-    public void formatPageNumbersRemoveUnexpectedLiterals() {
-        assertEquals("1--2", formatter.format("{1}-{2}"));
-    }
+                // formatPageNumbersRegexNotMatching
+                Arguments.of("12", "12"),
 
-    @Test
-    public void formatPageNumbersRegexNotMatching() {
-        assertEquals("12", formatter.format("12"));
-    }
+                // doNotRemoveLetters
+                Arguments.of("R1--R50", "R1-R50"),
 
-    @Test
-    public void doNotRemoveLetters() {
-        assertEquals("R1-R50", formatter.format("R1-R50"));
-    }
+                // replaceLongDashWithDoubleDash
+                Arguments.of("1--50", "1 \u2014 50"),
 
-    @Test
-    public void replaceLongDashWithDoubleDash() {
-        assertEquals("1--50", formatter.format("1 \u2014 50"));
-    }
+                // removePagePrefix
+                Arguments.of("50", "p.50"),
 
-    @Test
-    public void removePagePrefix() {
-        assertEquals("50", formatter.format("p.50"));
-    }
+                // removePagesPrefix
+                Arguments.of("50", "pp.50"),
 
-    @Test
-    public void removePagesPrefix() {
-        assertEquals("50", formatter.format("pp.50"));
-    }
+                // keep &
+                Arguments.of("40&50", "40&50"),
 
-    @Test
-    public void formatACMPages() {
-        // This appears in https://doi.org/10.1145/1658373.1658375
-        assertEquals("2:1--2:33", formatter.format("2:1-2:33"));
-    }
+                // formatACMPages
+                // This appears in https://doi.org/10.1145/1658373.1658375
+                Arguments.of("2:1--2:33", "2:1-2:33"),
+
+                // keepFormattedACMPages
+                // This appears in https://doi.org/10.1145/1658373.1658375
+                Arguments.of("2:1--2:33", "2:1--2:33"),
+
+                // formatExample
+                Arguments.of("1--2", new NormalizePagesFormatter().getExampleInput()),
 
-    @Test
-    public void keepFormattedACMPages() {
-        // This appears in https://doi.org/10.1145/1658373.1658375
-        assertEquals("2:1--2:33", formatter.format("2:1--2:33"));
+                // replaceDashWithMinus
+                Arguments.of("R404--R405", "R404–R405"));
     }
 
-    @Test
-    public void formatExample() {
-        assertEquals("1--2", formatter.format(formatter.getExampleInput()));
+    @ParameterizedTest
+    @MethodSource("tests")
+    public void test(String expected, String toFormat) {
+        assertEquals(expected, formatter.format(toFormat));
     }
 }
diff --git a/src/test/java/org/jabref/logic/formatter/casechanger/UnprotectTermsFormatterTest.java b/src/test/java/org/jabref/logic/formatter/casechanger/UnprotectTermsFormatterTest.java
@@ -0,0 +1,39 @@
+package org.jabref.logic.formatter.casechanger;
+
+import java.io.IOException;
+import java.util.stream.Stream;
+
+import org.junit.jupiter.params.ParameterizedTest;
+import org.junit.jupiter.params.provider.Arguments;
+import org.junit.jupiter.params.provider.MethodSource;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+
+/**
+ * Tests in addition to the general tests from {@link org.jabref.logic.formatter.FormatterTest}
+ */
+public class UnprotectTermsFormatterTest {
+
+    private UnprotectTermsFormatter formatter = new UnprotectTermsFormatter();
+
+    private static Stream<Arguments> terms() throws IOException {
+        return Stream.of(
+                Arguments.of("VLSI", "{VLSI}"),
+                Arguments.of("VLsI", "VLsI"),
+                Arguments.of("VLSI", "VLSI"),
+                Arguments.of("VLSI VLSI", "{VLSI} {VLSI}"),
+                Arguments.of("BPEL", "{BPEL}"),
+                Arguments.of("3GPP 3G", "{3GPP} {3G}"),
+                Arguments.of("{A} and {B}}", "{A} and {B}}"),
+                Arguments.of("Testing BPEL Engine Performance: A Survey", "{Testing BPEL Engine Performance: A Survey}"),
+                Arguments.of("Testing BPEL Engine Performance: A Survey", "Testing {BPEL} Engine Performance: A Survey"),
+                Arguments.of("Testing BPEL Engine Performance: A Survey", "{Testing {BPEL} Engine Performance: A Survey}"),
+                Arguments.of("In CDMA", new UnprotectTermsFormatter().getExampleInput()));
+    }
+
+    @ParameterizedTest
+    @MethodSource("terms")
+    public void test(String expected, String toFormat) {
+        assertEquals(expected, formatter.format(toFormat));
+    }
+}
Original file line number	Diff line number	Diff line change
Expand Up		@@ -2284,3 +2284,5 @@ Regular\ expression=Regular expression

		Error\ importing.\ See\ the\ error\ log\ for\ details.=Error importing. See the error log for details.

		Removes\ all\ {}\ brackets\ around\ words.=Removes all {} brackets around words.
		Unprotect\ terms=Unprotect terms