From e54499f9f039fc0cf541734ac6baa5cccb8461ed Mon Sep 17 00:00:00 2001 From: Ronald Brill Date: Thu, 22 Dec 2022 09:40:37 +0100 Subject: [PATCH] Support for unicode point escapes added when converting regex from js to java (issue #535) Minor fixes for unicode handling when converting regex from js to java. --- src/changes/changes.xml | 6 + .../regexp/RegExpJsToJavaConverter.java | 22 +++- .../regexp/RegExpJsToJavaConverter2Test.java | 113 ++++++++++++++++++ .../regexp/RegExpJsToJavaConverterTest.java | 22 +++- 4 files changed, 156 insertions(+), 7 deletions(-) create mode 100644 src/test/java/com/gargoylesoftware/htmlunit/javascript/regexp/RegExpJsToJavaConverter2Test.java diff --git a/src/changes/changes.xml b/src/changes/changes.xml index 11a621939e8..8bcca052ba6 100644 --- a/src/changes/changes.xml +++ b/src/changes/changes.xml @@ -8,6 +8,12 @@ + + Support for unicode point escapes added when converting regex from js to java. + + + Minor fixes for unicode handling when converting regex from js to java. + RequestParameter normalization should not replace KeyDataPair by NameValuePair. diff --git a/src/main/java/com/gargoylesoftware/htmlunit/javascript/regexp/RegExpJsToJavaConverter.java b/src/main/java/com/gargoylesoftware/htmlunit/javascript/regexp/RegExpJsToJavaConverter.java index f299f083ad9..2440d984094 100644 --- a/src/main/java/com/gargoylesoftware/htmlunit/javascript/regexp/RegExpJsToJavaConverter.java +++ b/src/main/java/com/gargoylesoftware/htmlunit/javascript/regexp/RegExpJsToJavaConverter.java @@ -348,9 +348,25 @@ private void processEscapeSequence() { } if ('u' == escapeSequence) { - // Unicode (e.g. \u0009) - // Read the for char unicode - tape_.move(4); + int next = tape_.read(); + if (next > -1) { + if (next == '{') { + final int uPos = tape_.currentPos_ - 2; + // Unicode point escapes + do { + next = tape_.read(); + } + while (next > -1 && next != '}'); + if (next == '}') { + tape_.tape_.replace(uPos, uPos + 1, "x"); + } + return; + } + + // Unicode (e.g. \u0009) + // we have nothing to convert here + } + return; } diff --git a/src/test/java/com/gargoylesoftware/htmlunit/javascript/regexp/RegExpJsToJavaConverter2Test.java b/src/test/java/com/gargoylesoftware/htmlunit/javascript/regexp/RegExpJsToJavaConverter2Test.java new file mode 100644 index 00000000000..de64c072deb --- /dev/null +++ b/src/test/java/com/gargoylesoftware/htmlunit/javascript/regexp/RegExpJsToJavaConverter2Test.java @@ -0,0 +1,113 @@ +/* + * Copyright (c) 2002-2022 Gargoyle Software Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.gargoylesoftware.htmlunit.javascript.regexp; + +import org.junit.Test; +import org.junit.runner.RunWith; +import org.openqa.selenium.By; +import org.openqa.selenium.WebDriver; + +import com.gargoylesoftware.htmlunit.WebDriverTestCase; +import com.gargoylesoftware.htmlunit.html.HtmlTextInput; +import com.gargoylesoftware.htmlunit.junit.BrowserRunner; +import com.gargoylesoftware.htmlunit.junit.BrowserRunner.Alerts; + +/** + * Tests for {@link HtmlTextInput} validation based on regex. + * + * @author Ronald Brill + */ +@RunWith(BrowserRunner.class) +public class RegExpJsToJavaConverter2Test extends WebDriverTestCase { + + /** + * @throws Exception if an error occurs + */ + @Test + @Alerts("true") + public void validationPattern() throws Exception { + validation("1234*", "123"); + } + + /** + * @throws Exception if an error occurs + */ + @Test + @Alerts("false") + public void validationPatternFailed() throws Exception { + validation("1234*", "1235"); + } + + /** + * @throws Exception if an error occurs + */ + @Test + @Alerts("true") + public void validationPatternUnicode() throws Exception { + validation("123\\u0077*", "123\u0077\u0077"); + } + + /** + * @throws Exception if an error occurs + */ + @Test + @Alerts("true") + public void validationPatternUnicodeWrong() throws Exception { + validation("123\\u77 a*", "123\u0077 "); + validation("123\\u77a*", "123\u0077aaaa"); + } + + /** + * @throws Exception if an error occurs + */ + @Test + @Alerts("true") + public void validationPatternUnicodeCodePointEscapes() throws Exception { + validation("123\\u{1D306}", "123𝌆"); + } + + /** + * @throws Exception if an error occurs + */ + @Test + @Alerts("false") + public void validationPatternUnicodeCodePointEscapesFails() throws Exception { + validation("123\\u{1D306}", "123𝌇"); + } + + private void validation(final String pattern, final String value) throws Exception { + final String html = + "\n" + + " \n" + + "\n" + + "\n" + + " \n" + + " \n" + + ""; + + expandExpectedAlertsVariables(URL_FIRST); + + final WebDriver driver = loadPage2(html, URL_FIRST); + + driver.findElement(By.id("myTest")).click(); + verifyTitle2(driver, getExpectedAlerts()[0]); + } +} diff --git a/src/test/java/com/gargoylesoftware/htmlunit/javascript/regexp/RegExpJsToJavaConverterTest.java b/src/test/java/com/gargoylesoftware/htmlunit/javascript/regexp/RegExpJsToJavaConverterTest.java index b4825b66725..a553b13ccc3 100644 --- a/src/test/java/com/gargoylesoftware/htmlunit/javascript/regexp/RegExpJsToJavaConverterTest.java +++ b/src/test/java/com/gargoylesoftware/htmlunit/javascript/regexp/RegExpJsToJavaConverterTest.java @@ -97,10 +97,9 @@ public void escapeHex() { public void escapeUnicode() { final RegExpJsToJavaConverter regExpJsToJavaConverter = new RegExpJsToJavaConverter(); - final String in = "\\u0074"; - final String out = regExpJsToJavaConverter.convert(in); - - assertEquals("\\u0074", out); + assertEquals("\\u0074", regExpJsToJavaConverter.convert("\\u0074")); + assertEquals("\\u0074 \\{", regExpJsToJavaConverter.convert("\\u0074 {")); + assertEquals("\\u74 \\{", regExpJsToJavaConverter.convert("\\u74 {")); } /** @@ -475,4 +474,19 @@ public void squareBracket() { assertEquals(".", regExpJsToJavaConverter.convert("[^]")); assertEquals("x.y", regExpJsToJavaConverter.convert("x[^]y")); } + + /** + * Verifies that square braces can be used non escaped in JS regexp. + */ + @Test + public void unicode() { + final RegExpJsToJavaConverter regExpJsToJavaConverter = new RegExpJsToJavaConverter(); + + assertEquals("[\\x{F0000}-\\x{FFFFD}]*", regExpJsToJavaConverter.convert("[\\u{F0000}-\\u{FFFFD}]*")); + assertEquals("\\x{F0000}-\\x{FFFFD}", regExpJsToJavaConverter.convert("\\u{F0000}-\\u{FFFFD}")); + assertEquals("\\x{000000000061}", regExpJsToJavaConverter.convert("\\u{000000000061}")); + + assertEquals("\\u{FFFFD", regExpJsToJavaConverter.convert("\\u{FFFFD")); + assertEquals("\\x{FFFFD}\\}", regExpJsToJavaConverter.convert("\\u{FFFFD}}")); + } }