Skip to content

Commit

Permalink
Tests for Unicode property names in RegExp
Browse files Browse the repository at this point in the history
  • Loading branch information
Jonathan Jacobs committed Feb 6, 2024
1 parent 285106b commit 040070d
Show file tree
Hide file tree
Showing 3 changed files with 147 additions and 1 deletion.
4 changes: 4 additions & 0 deletions test/hermes/regexp-icase.js
Original file line number Diff line number Diff line change
Expand Up @@ -53,3 +53,7 @@ print(/[\u03B1-\u03C9]/i.exec("\u03D1"));
// CHECK-NEXT: ϑ
print(/(.+)(ςΣ)(.+)(σ)/i.exec("Ὀδυσσεύς"));
// CHECK-NEXT: Ὀδυσσεύς,Ὀδυ,σσ,εύ,ς
print(/\p{Lu}+/ui.exec("aBc"));
// CHECK-NEXT: aBc
print(/[\p{Lu}]+/ui.exec("aBc"));
// CHECK-NEXT: aBc
142 changes: 142 additions & 0 deletions test/hermes/regexp_unicode_properties.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,142 @@
/**
* Copyright (c) Meta Platforms, Inc. and affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/

// RUN: LC_ALL=en_US.UTF-8 %hermes -non-strict -O -target=HBC %s | %FileCheck --match-full-lines %s

print('RegExp Unicode Properties');
// CHECK-LABEL: RegExp Unicode Properties

function hexCodePointAt(s, idx) {
return s.codePointAt(idx).toString(16).toUpperCase();
}

try { new RegExp("\\p{PropertyThatDoesNotExist}", "u"); } catch (e) { print(e.message); }
// CHECK-NEXT: Invalid RegExp: Invalid property name
try { new RegExp("[\\p{PropertyThatDoesNotExist}]", "u"); } catch (e) { print(e.message); }
// CHECK-NEXT: Invalid RegExp: Invalid property name
try { new RegExp("\\p{Script=}", "u"); } catch (e) { print(e.message); }
// CHECK-NEXT: Invalid RegExp: Invalid property name
try { new RegExp("[\\p{Script=}]", "u"); } catch (e) { print(e.message); }
// CHECK-NEXT: Invalid RegExp: Invalid property name

print(/\p{Lu}\p{Ll}/ui.exec("aB"));
// CHECK-NEXT: aB

print(/\p{Lowercase_Letter}/u.exec("a"));
// CHECK-NEXT: a

try { new RegExp("\\p{lowercase_letter}", "u"); } catch (e) { print(e.message); }
// CHECK-NEXT: Invalid RegExp: Invalid property name

print(/\p{Decimal_Number}/u.exec("1"));
// CHECK-NEXT: 1
print(/[\p{Decimal_Number}]+/u.exec("123"));
// CHECK-NEXT: 123

print(/\p{Nd}/.exec("p{Nd}"));
// CHECK-NEXT: p{Nd}

print(/\P{Nd}/.exec("P{Nd}"));
// CHECK-NEXT: P{Nd}

// U+FFFF is unassigned (as of Unicode 15.1.0).
print(hexCodePointAt(/\p{Unassigned}/u.exec("\u{FFFF}")[0]));
// CHECK-NEXT: FFFF
print(/\p{Assigned}/u.exec("\u{FFFF}"));
// CHECK-NEXT: null
// Script=Zzzz behaves like Unassigned / Cn.
print(hexCodePointAt(/\p{Script=Zzzz}/u.exec("\u{FFFF}")[0]));
// CHECK-NEXT: FFFF

// U+FFFD is the replacement character, which is assigned.
print(hexCodePointAt(/\P{Unassigned}/u.exec("\u{FFFD}")[0]));
// CHECK-NEXT: FFFD
print(/\P{Assigned}/u.exec("\u{FFFD}"));
// CHECK-NEXT: null
// Script=Zzzz behaves like Unassigned / Cn.
print(/\p{Script=Zzzz}/u.exec("\u{FFFD}"));
// CHECK-NEXT: null

print(/\p{Script=Latin}+/u.exec("Ave"));
// CHECK-NEXT: Ave

print(/\p{Script_Extensions=Latin}+/u.exec("Ave"));
// CHECK-NEXT: Ave

print(/\p{Script=Latin}+/u.exec("\u{4f60}\u{597d}"));
// CHECK-NEXT: null

print(/\p{Script_Extensions=Latin}+/u.exec("\u{4f60}\u{597d}"));
// CHECK-NEXT: null

try { new RegExp("\\P{}", "u"); } catch (e) { print(e.message); }
// CHECK-NEXT: Invalid RegExp: Invalid property name
try { new RegExp("[\\P{}]", "u"); } catch (e) { print(e.message); }
// CHECK-NEXT: Invalid RegExp: Invalid property name

try { new RegExp("\\p{}", "u"); } catch (e) { print(e.message); }
// CHECK-NEXT: Invalid RegExp: Invalid property name
try { new RegExp("[\\p{}]", "u"); } catch (e) { print(e.message); }
// CHECK-NEXT: Invalid RegExp: Invalid property name

try { new RegExp("\\P{Ll", "u"); } catch (e) { print(e.message); }
// CHECK-NEXT: Invalid RegExp: Invalid property name
try { new RegExp("[\\P{Ll]", "u"); } catch (e) { print(e.message); }
// CHECK-NEXT: Invalid RegExp: Invalid property name

try { new RegExp("\\P}", "u"); } catch (e) { print(e.message); }
// CHECK-NEXT: Invalid RegExp: Invalid property name
try { new RegExp("[\\P}]", "u"); } catch (e) { print(e.message); }
// CHECK-NEXT: Invalid RegExp: Invalid property name

try { new RegExp("\\p}", "u"); } catch (e) { print(e.message); }
// CHECK-NEXT: Invalid RegExp: Invalid property name
try { new RegExp("[\\p}]", "u"); } catch (e) { print(e.message); }
// CHECK-NEXT: Invalid RegExp: Invalid property name

try { new RegExp("\\p", "u"); } catch (e) { print(e.message); }
// CHECK-NEXT: Invalid RegExp: Invalid property name
try { new RegExp("[\\p]", "u"); } catch (e) { print(e.message); }
// CHECK-NEXT: Invalid RegExp: Invalid property name

try { new RegExp("\\P", "u"); } catch (e) { print(e.message); }
// CHECK-NEXT: Invalid RegExp: Invalid property name
try { new RegExp("[\\P]", "u"); } catch (e) { print(e.message); }
// CHECK-NEXT: Invalid RegExp: Invalid property name

try { new RegExp("{", "u"); } catch (e) { print(e.message); }
// CHECK-NEXT: Invalid RegExp: Invalid quantifier bracket

try { new RegExp("}", "u"); } catch (e) { print(e.message); }
// CHECK-NEXT: Invalid RegExp: Invalid quantifier bracket

print((new RegExp("{", "")).source);
// CHECK-NEXT: {

print((new RegExp("}", "")).source);
// CHECK-NEXT: }

// Here the match has length 2, because this emoji must be encoded via
// a surrogate pair.
print(/\p{Emoji}/u.exec("\u{1F600}")[0].length);
// CHECK-NEXT: 2

// Hex or non-hex codepoints, i.e. all codepoints.
print(/[\p{Hex}\P{Hex}]/u.exec('\u{1D306}'));
// CHECK-NEXT: 𝌆

// Not uppercase letters.
print(/[\P{Lu}]+/u.exec('abc'));
// CHECK-NEXT: abc
print(/[\P{Lu}]+/u.exec('ABC'));
// CHECK-NEXT: null

// Not not uppercase letters.
print(/[^\P{Lu}]+/u.exec('ABC'));
// CHECK-NEXT: ABC
print(/[^\P{Lu}]+/u.exec('abc'));
// CHECK-NEXT: null
2 changes: 1 addition & 1 deletion utils/testsuite/testsuite_skiplist.py
Original file line number Diff line number Diff line change
Expand Up @@ -2054,7 +2054,7 @@
"json-superset",
"let",
"new.target",
"regexp-unicode-property-escapes",
"regexp-v-flag",
"resizable-arraybuffer",
"string-trimming",
"super",
Expand Down

0 comments on commit 040070d

Please sign in to comment.