Skip to content

Commit

Permalink
Support for unicode point escapes added when converting regex from js…
Browse files Browse the repository at this point in the history
… to java (issue #535)

Minor fixes for unicode handling when converting regex from js to java.
  • Loading branch information
rbri committed Dec 22, 2022
1 parent c4c068b commit e54499f
Show file tree
Hide file tree
Showing 4 changed files with 156 additions and 7 deletions.
6 changes: 6 additions & 0 deletions src/changes/changes.xml
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,12 @@

<body>
<release version="2.68.0" date="Dezember xx, 2022" description="Chrome/Edge 108, Firefox 108, Bugfixes, neko-html without xerces">
<action type="add" dev="rbri" issue="535">
Support for unicode point escapes added when converting regex from js to java.
</action>
<action type="fix" dev="rbri">
Minor fixes for unicode handling when converting regex from js to java.
</action>
<action type="fix" dev="Michael Lueck" issue="533">
RequestParameter normalization should not replace KeyDataPair by NameValuePair.
</action>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -348,9 +348,25 @@ private void processEscapeSequence() {
}

if ('u' == escapeSequence) {
// Unicode (e.g. \u0009)
// Read the for char unicode
tape_.move(4);
int next = tape_.read();
if (next > -1) {
if (next == '{') {
final int uPos = tape_.currentPos_ - 2;
// Unicode point escapes
do {
next = tape_.read();
}
while (next > -1 && next != '}');
if (next == '}') {
tape_.tape_.replace(uPos, uPos + 1, "x");
}
return;
}

// Unicode (e.g. \u0009)
// we have nothing to convert here
}

return;
}

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
/*
* Copyright (c) 2002-2022 Gargoyle Software Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
* https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.gargoylesoftware.htmlunit.javascript.regexp;

import org.junit.Test;
import org.junit.runner.RunWith;
import org.openqa.selenium.By;
import org.openqa.selenium.WebDriver;

import com.gargoylesoftware.htmlunit.WebDriverTestCase;
import com.gargoylesoftware.htmlunit.html.HtmlTextInput;
import com.gargoylesoftware.htmlunit.junit.BrowserRunner;
import com.gargoylesoftware.htmlunit.junit.BrowserRunner.Alerts;

/**
* Tests for {@link HtmlTextInput} validation based on regex.
*
* @author Ronald Brill
*/
@RunWith(BrowserRunner.class)
public class RegExpJsToJavaConverter2Test extends WebDriverTestCase {

/**
* @throws Exception if an error occurs
*/
@Test
@Alerts("true")
public void validationPattern() throws Exception {
validation("1234*", "123");
}

/**
* @throws Exception if an error occurs
*/
@Test
@Alerts("false")
public void validationPatternFailed() throws Exception {
validation("1234*", "1235");
}

/**
* @throws Exception if an error occurs
*/
@Test
@Alerts("true")
public void validationPatternUnicode() throws Exception {
validation("123\\u0077*", "123\u0077\u0077");
}

/**
* @throws Exception if an error occurs
*/
@Test
@Alerts("true")
public void validationPatternUnicodeWrong() throws Exception {
validation("123\\u77 a*", "123\u0077 ");
validation("123\\u77a*", "123\u0077aaaa");
}

/**
* @throws Exception if an error occurs
*/
@Test
@Alerts("true")
public void validationPatternUnicodeCodePointEscapes() throws Exception {
validation("123\\u{1D306}", "123&#x1D306;");
}

/**
* @throws Exception if an error occurs
*/
@Test
@Alerts("false")
public void validationPatternUnicodeCodePointEscapesFails() throws Exception {
validation("123\\u{1D306}", "123&#x1D307;");
}

private void validation(final String pattern, final String value) throws Exception {
final String html =
"<html><head>\n"
+ " <script>\n"
+ LOG_TITLE_FUNCTION
+ " function test() {\n"
+ " var elem = document.getElementById('e1');\n"
+ " log(elem.validity.valid);\n"
+ " }\n"
+ " </script>\n"
+ "</head>\n"
+ "<body>\n"
+ " <input type='text' id='e1' name='k' pattern='" + pattern + "' value='" + value + "'>\n"
+ " <button id='myTest' type='button' onclick='test()'>Test</button>\n"
+ "</body></html>";

expandExpectedAlertsVariables(URL_FIRST);

final WebDriver driver = loadPage2(html, URL_FIRST);

driver.findElement(By.id("myTest")).click();
verifyTitle2(driver, getExpectedAlerts()[0]);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -97,10 +97,9 @@ public void escapeHex() {
public void escapeUnicode() {
final RegExpJsToJavaConverter regExpJsToJavaConverter = new RegExpJsToJavaConverter();

final String in = "\\u0074";
final String out = regExpJsToJavaConverter.convert(in);

assertEquals("\\u0074", out);
assertEquals("\\u0074", regExpJsToJavaConverter.convert("\\u0074"));
assertEquals("\\u0074 \\{", regExpJsToJavaConverter.convert("\\u0074 {"));
assertEquals("\\u74 \\{", regExpJsToJavaConverter.convert("\\u74 {"));
}

/**
Expand Down Expand Up @@ -475,4 +474,19 @@ public void squareBracket() {
assertEquals(".", regExpJsToJavaConverter.convert("[^]"));
assertEquals("x.y", regExpJsToJavaConverter.convert("x[^]y"));
}

/**
* Verifies that square braces can be used non escaped in JS regexp.
*/
@Test
public void unicode() {
final RegExpJsToJavaConverter regExpJsToJavaConverter = new RegExpJsToJavaConverter();

assertEquals("[\\x{F0000}-\\x{FFFFD}]*", regExpJsToJavaConverter.convert("[\\u{F0000}-\\u{FFFFD}]*"));
assertEquals("\\x{F0000}-\\x{FFFFD}", regExpJsToJavaConverter.convert("\\u{F0000}-\\u{FFFFD}"));
assertEquals("\\x{000000000061}", regExpJsToJavaConverter.convert("\\u{000000000061}"));

assertEquals("\\u{FFFFD", regExpJsToJavaConverter.convert("\\u{FFFFD"));
assertEquals("\\x{FFFFD}\\}", regExpJsToJavaConverter.convert("\\u{FFFFD}}"));
}
}

0 comments on commit e54499f

Please sign in to comment.