From df703021948ba33777c59163087e88ea6864ceeb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Juli=C3=A1n=20Espina?= Date: Tue, 11 Oct 2022 18:01:02 +0000 Subject: [PATCH] First prototype for new `JsString` using UTF-16 (#1659) I think it's time to address the elephant in the room. This Pull Request will (hopefully!) solve part of #736. This is a complete rewrite of `JsString`, but instead of storing `u8` bytes it stores `u16` words. The `encode!` macro (renamed to `utf16!` for simplicity) from the `const-utf16` crate allows us to create UTF-16 encoded arrays at compilation time. `JsString` implements `Deref` to unlock the slice methods and possibly make some manipulations easier. However, we would need to create our own library of utilities for `JsString`. --- Cargo.lock | 158 ++- Cargo.toml | 2 + boa_engine/Cargo.toml | 3 + boa_engine/src/builtins/array/mod.rs | 22 +- boa_engine/src/builtins/console/mod.rs | 42 +- boa_engine/src/builtins/date/mod.rs | 45 +- boa_engine/src/builtins/error/mod.rs | 14 +- boa_engine/src/builtins/eval/mod.rs | 3 +- boa_engine/src/builtins/function/mod.rs | 68 +- boa_engine/src/builtins/function/tests.rs | 17 +- .../src/builtins/intl/date_time_format.rs | 37 +- boa_engine/src/builtins/intl/mod.rs | 147 +- boa_engine/src/builtins/intl/tests.rs | 126 +- boa_engine/src/builtins/json/mod.rs | 287 ++-- boa_engine/src/builtins/number/mod.rs | 37 +- .../src/builtins/object/for_in_iterator.rs | 6 +- boa_engine/src/builtins/object/mod.rs | 37 +- boa_engine/src/builtins/regexp/mod.rs | 217 ++- boa_engine/src/builtins/string/mod.rs | 868 +++++------- .../src/builtins/string/string_iterator.rs | 18 +- boa_engine/src/builtins/string/tests.rs | 24 +- boa_engine/src/builtins/typed_array/mod.rs | 11 +- boa_engine/src/builtins/uri/mod.rs | 71 +- boa_engine/src/bytecompiler/mod.rs | 14 +- boa_engine/src/class.rs | 2 +- boa_engine/src/context/mod.rs | 30 +- boa_engine/src/environments/compile.rs | 5 +- boa_engine/src/lib.rs | 6 +- .../src/object/internal_methods/array.rs | 3 +- .../src/object/internal_methods/string.rs | 10 +- boa_engine/src/object/jsobject.rs | 4 +- boa_engine/src/object/mod.rs | 18 +- boa_engine/src/property/mod.rs | 24 +- boa_engine/src/string.rs | 1227 ----------------- boa_engine/src/string/common.rs | 500 +++++++ boa_engine/src/string/mod.rs | 1015 ++++++++++++++ boa_engine/src/symbol.rs | 16 +- boa_engine/src/syntax/ast/keyword.rs | 96 +- .../src/syntax/ast/node/identifier/mod.rs | 15 +- boa_engine/src/syntax/ast/node/object/mod.rs | 8 +- .../src/syntax/ast/node/template/mod.rs | 9 +- boa_engine/src/syntax/lexer/identifier.rs | 2 +- .../src/syntax/lexer/private_identifier.rs | 4 +- boa_engine/src/syntax/lexer/regex.rs | 2 +- boa_engine/src/syntax/lexer/string.rs | 6 +- boa_engine/src/syntax/lexer/template.rs | 14 +- boa_engine/src/syntax/lexer/tests.rs | 76 +- boa_engine/src/syntax/lexer/token.rs | 6 +- .../parser/cursor/buffered_lexer/tests.rs | 61 +- .../parser/expression/left_hand_side/tests.rs | 82 +- .../primary/array_initializer/tests.rs | 11 +- .../async_function_expression/tests.rs | 23 +- .../async_generator_expression/tests.rs | 23 +- .../primary/function_expression/tests.rs | 74 +- .../primary/generator_expression/tests.rs | 20 +- .../primary/object_initializer/mod.rs | 44 +- .../primary/object_initializer/tests.rs | 133 +- .../syntax/parser/expression/primary/tests.rs | 7 +- .../src/syntax/parser/expression/tests.rs | 224 +-- boa_engine/src/syntax/parser/function/mod.rs | 31 +- .../src/syntax/parser/function/tests.rs | 239 +++- boa_engine/src/syntax/parser/mod.rs | 11 +- .../syntax/parser/statement/block/tests.rs | 29 +- .../parser/statement/break_stm/tests.rs | 41 +- .../parser/statement/continue_stm/tests.rs | 41 +- .../hoistable/async_function_decl/tests.rs | 15 +- .../hoistable/async_generator_decl/tests.rs | 11 +- .../declaration/hoistable/class_decl/tests.rs | 31 +- .../hoistable/function_decl/tests.rs | 49 +- .../hoistable/generator_decl/tests.rs | 11 +- .../parser/statement/declaration/tests.rs | 63 +- .../parser/statement/iteration/tests.rs | 61 +- .../syntax/parser/statement/switch/tests.rs | 27 +- .../syntax/parser/statement/throw/tests.rs | 14 +- .../syntax/parser/statement/try_stm/tests.rs | 65 +- boa_engine/src/syntax/parser/tests.rs | 60 +- boa_engine/src/tests.rs | 27 +- boa_engine/src/value/display.rs | 18 +- boa_engine/src/value/equality.rs | 4 +- boa_engine/src/value/mod.rs | 15 +- boa_engine/src/value/operations.rs | 52 +- boa_engine/src/value/serde_json.rs | 4 +- boa_engine/src/value/tests.rs | 12 +- boa_engine/src/vm/code_block.rs | 33 +- boa_engine/src/vm/mod.rs | 90 +- boa_examples/src/bin/classes.rs | 18 +- boa_examples/src/bin/closures.rs | 18 +- boa_examples/src/bin/jsarray.rs | 7 +- boa_examples/src/bin/loadfile.rs | 5 +- boa_examples/src/bin/loadstring.rs | 5 +- boa_examples/src/bin/modulehandler.rs | 2 +- boa_interner/Cargo.toml | 4 + boa_interner/src/fixed_string.rs | 50 +- boa_interner/src/interned_str.rs | 64 +- boa_interner/src/lib.rs | 378 +++-- boa_interner/src/raw.rs | 196 +++ boa_interner/src/sym.rs | 104 +- boa_interner/src/tests.rs | 84 +- boa_macros/Cargo.toml | 18 + boa_macros/src/lib.rs | 15 + boa_macros/tests/utf16.rs | 15 + boa_tester/src/exec/js262.rs | 4 +- boa_tester/src/exec/mod.rs | 3 +- 103 files changed, 4660 insertions(+), 3458 deletions(-) delete mode 100644 boa_engine/src/string.rs create mode 100644 boa_engine/src/string/common.rs create mode 100644 boa_engine/src/string/mod.rs create mode 100644 boa_interner/src/raw.rs create mode 100644 boa_macros/Cargo.toml create mode 100644 boa_macros/src/lib.rs create mode 100644 boa_macros/tests/utf16.rs diff --git a/Cargo.lock b/Cargo.lock index 4dcb7a68b59..f609eb856ac 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -78,6 +78,7 @@ dependencies = [ "bitflags", "boa_gc", "boa_interner", + "boa_macros", "boa_profiler", "boa_unicode", "chrono", @@ -104,6 +105,8 @@ dependencies = [ "ryu-js", "serde", "serde_json", + "sptr", + "static_assertions", "sys-locale", "tap", "unicode-normalization", @@ -130,12 +133,23 @@ dependencies = [ name = "boa_interner" version = "0.16.0" dependencies = [ + "boa_macros", + "indexmap", + "once_cell", "phf", "rustc-hash", "serde", "static_assertions", ] +[[package]] +name = "boa_macros" +version = "0.16.0" +dependencies = [ + "quote", + "syn", +] + [[package]] name = "boa_profiler" version = "0.16.0" @@ -303,6 +317,16 @@ dependencies = [ "winapi", ] +[[package]] +name = "codespan-reporting" +version = "0.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3538270d33cc669650c4b093848450d380def10c331d38c768e34cac80576e6e" +dependencies = [ + "termcolor", + "unicode-width", +] + [[package]] name = "colored" version = "2.0.0" @@ -379,26 +403,24 @@ dependencies = [ [[package]] name = "crossbeam-epoch" -version = "0.9.10" +version = "0.9.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "045ebe27666471bb549370b4b0b3e51b07f56325befa4284db65fc89c02511b1" +checksum = "f916dfc5d356b0ed9dae65f1db9fc9770aa2851d2662b988ccf4fe3516e86348" dependencies = [ "autocfg", "cfg-if", "crossbeam-utils", "memoffset", - "once_cell", "scopeguard", ] [[package]] name = "crossbeam-utils" -version = "0.8.11" +version = "0.8.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "51887d4adc7b564537b15adcfb307936f8075dfcd5f00dde9a9f1d29383682bc" +checksum = "edbafec5fa1f196ca66527c1b12c2ec4745ca14b50f1ad8f9f6f720b55d11fac" dependencies = [ "cfg-if", - "once_cell", ] [[package]] @@ -417,6 +439,50 @@ version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b365fabc795046672053e29c954733ec3b05e4be654ab130fe8f1f94d7051f35" +[[package]] +name = "cxx" +version = "1.0.78" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "19f39818dcfc97d45b03953c1292efc4e80954e1583c4aa770bac1383e2310a4" +dependencies = [ + "cc", + "cxxbridge-flags", + "cxxbridge-macro", + "link-cplusplus", +] + +[[package]] +name = "cxx-build" +version = "1.0.78" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3e580d70777c116df50c390d1211993f62d40302881e54d4b79727acb83d0199" +dependencies = [ + "cc", + "codespan-reporting", + "once_cell", + "proc-macro2", + "quote", + "scratch", + "syn", +] + +[[package]] +name = "cxxbridge-flags" +version = "1.0.78" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "56a46460b88d1cec95112c8c363f0e2c39afdb237f60583b0b36343bf627ea9c" + +[[package]] +name = "cxxbridge-macro" +version = "1.0.78" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "747b608fecf06b0d72d440f27acc99288207324b793be2c17991839f3d4995ea" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "dirs-next" version = "2.0.0" @@ -614,17 +680,28 @@ dependencies = [ [[package]] name = "iana-time-zone" -version = "0.1.50" +version = "0.1.51" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fd911b35d940d2bd0bea0f9100068e5b97b51a1cbe13d13382f132e0365257a0" +checksum = "f5a6ef98976b22b3b7f2f3a806f858cb862044cfa66805aa3ad84cb3d3b785ed" dependencies = [ "android_system_properties", "core-foundation-sys", + "iana-time-zone-haiku", "js-sys", "wasm-bindgen", "winapi", ] +[[package]] +name = "iana-time-zone-haiku" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fde6edd6cef363e9359ed3c98ba64590ba9eecba2293eb5a723ab32aee8926aa" +dependencies = [ + "cxx", + "cxx-build", +] + [[package]] name = "icu_calendar" version = "0.6.0" @@ -790,15 +867,15 @@ dependencies = [ [[package]] name = "itoa" -version = "1.0.3" +version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6c8af84674fe1f223a982c933a0ee1086ac4d4052aa0fb8060c12c6ad838e754" +checksum = "4217ad341ebadf8d8e724e264f13e593e0648f5b3e94b3896a5df283be015ecc" [[package]] name = "jemalloc-sys" -version = "0.5.1+5.3.0-patched" +version = "0.5.2+5.3.0-patched" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b7c2b313609b95939cb0c5a5c6917fb9b7c9394562aa3ef44eb66ffa51736432" +checksum = "134163979b6eed9564c98637b710b40979939ba351f59952708234ea11b5f3f8" dependencies = [ "cc", "fs_extra", @@ -832,9 +909,18 @@ checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" [[package]] name = "libc" -version = "0.2.133" +version = "0.2.135" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c0f80d65747a3e43d1596c7c5492d95d5edddaabd45a7fcdb02b95f644164966" +checksum = "68783febc7782c6c5cb401fbda4de5a9898be1762314da0bb2c10ced61f18b0c" + +[[package]] +name = "link-cplusplus" +version = "1.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9272ab7b96c9046fbc5bc56c06c117cb639fe2d509df0c421cad82d2915cf369" +dependencies = [ + "cc", +] [[package]] name = "linux-raw-sys" @@ -1140,9 +1226,9 @@ dependencies = [ [[package]] name = "proc-macro2" -version = "1.0.43" +version = "1.0.46" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0a2ca2c61bc9f3d74d2886294ab7b9853abd9c1ad903a3ac7815c58989bb7bab" +checksum = "94e2ef8dbfc347b10c094890f778ee2e36ca9bb4262e86dc99cd217e35f3470b" dependencies = [ "unicode-ident", ] @@ -1274,9 +1360,9 @@ checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2" [[package]] name = "rustix" -version = "0.35.10" +version = "0.35.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "af895b90e5c071badc3136fc10ff0bcfc98747eadbaf43ed8f214e07ba8f8477" +checksum = "fbb2fda4666def1433b1b05431ab402e42a1084285477222b72d6c564c417cef" dependencies = [ "bitflags", "errno", @@ -1347,6 +1433,12 @@ version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd" +[[package]] +name = "scratch" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9c8132065adcfd6e02db789d9285a0deb2f3fcb04002865ab67d5fb103533898" + [[package]] name = "serde" version = "1.0.145" @@ -1369,9 +1461,9 @@ dependencies = [ [[package]] name = "serde_json" -version = "1.0.85" +version = "1.0.86" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e55a28e3aaef9d5ce0506d0a14dbba8054ddc7e499ef522dd8b26859ec9d4a44" +checksum = "41feea4228a6f1cd09ec7a3593a682276702cd67b5273544757dae23c096f074" dependencies = [ "itoa", "ryu", @@ -1399,13 +1491,19 @@ checksum = "7bd3e3206899af3f8b12af284fafc038cc1dc2b41d1b89dd17297221c5d225de" [[package]] name = "smallvec" -version = "1.9.0" +version = "1.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2fd0db749597d91ff862fd1d55ea87f7855a744a8425a64695b6fca237d1dad1" +checksum = "a507befe795404456341dfab10cef66ead4c041f62b8b11bbb92bffe5d0953e0" dependencies = [ "serde", ] +[[package]] +name = "sptr" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3b9b39299b249ad65f3b7e96443bad61c02ca5cd3589f46cb6d610a0fd6c0d6a" + [[package]] name = "stable_deref_trait" version = "1.2.0" @@ -1432,9 +1530,9 @@ checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623" [[package]] name = "syn" -version = "1.0.100" +version = "1.0.102" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "52205623b1b0f064a4e71182c3b18ae902267282930c6d5462c91b859668426e" +checksum = "3fcd952facd492f9be3ef0d0b7032a6e442ee9b361d4acc2b1d0c4aaa5f613a1" dependencies = [ "proc-macro2", "quote", @@ -1491,18 +1589,18 @@ checksum = "949517c0cf1bf4ee812e2e07e08ab448e3ae0d23472aee8a06c985f0c8815b16" [[package]] name = "thiserror" -version = "1.0.35" +version = "1.0.37" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c53f98874615aea268107765aa1ed8f6116782501d18e53d08b471733bea6c85" +checksum = "10deb33631e3c9018b9baf9dcbbc4f737320d2b576bac10f6aefa048fa407e3e" dependencies = [ "thiserror-impl", ] [[package]] name = "thiserror-impl" -version = "1.0.35" +version = "1.0.37" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f8b463991b4eab2d801e724172285ec4195c650e8ec79b149e6c2a8e6dd3f783" +checksum = "982d17546b47146b28f7c22e3d08465f6b8903d0ea13c1660d9d84a6e7adcdbb" dependencies = [ "proc-macro2", "quote", @@ -1565,9 +1663,9 @@ checksum = "2281c8c1d221438e373249e065ca4989c4c36952c211ff21a0ee91c44a3869e7" [[package]] name = "unicode-ident" -version = "1.0.4" +version = "1.0.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dcc811dc4066ac62f84f11307873c4850cb653bfa9b1719cee2bd2204a4bc5dd" +checksum = "6ceab39d59e4c9499d4e5a8ee0e2735b891bb7308ac83dfb4e80cad195c9f6f3" [[package]] name = "unicode-normalization" diff --git a/Cargo.toml b/Cargo.toml index 8dc992bcb77..bbb50e7f50c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -9,6 +9,7 @@ members = [ "boa_unicode", "boa_wasm", "boa_examples", + "boa_macros", ] [workspace.package] @@ -26,6 +27,7 @@ boa_interner = { version = "0.16.0", path = "boa_interner" } boa_gc = { version = "0.16.0", path = "boa_gc" } boa_profiler = { version = "0.16.0", path = "boa_profiler" } boa_unicode = { version = "0.16.0", path = "boa_unicode" } +boa_macros = { version = "0.16.0", path = "boa_macros" } [workspace.metadata.workspaces] allow_branch = "main" diff --git a/boa_engine/Cargo.toml b/boa_engine/Cargo.toml index 1f79e1126ce..55e1f4444dd 100644 --- a/boa_engine/Cargo.toml +++ b/boa_engine/Cargo.toml @@ -32,6 +32,7 @@ boa_unicode.workspace = true boa_interner.workspace = true boa_gc.workspace = true boa_profiler.workspace = true +boa_macros.workspace = true gc = "0.4.1" serde = { version = "1.0.145", features = ["derive", "rc"] } serde_json = "1.0.85" @@ -50,6 +51,8 @@ unicode-normalization = "0.1.22" dyn-clone = "1.0.9" once_cell = "1.15.0" tap = "1.0.1" +sptr = "0.3.2" +static_assertions = "1.1.0" icu_locale_canonicalizer = { version = "0.6.0", features = ["serde"], optional = true } icu_locid = { version = "0.6.0", features = ["serde"], optional = true } icu_datetime = { version = "0.6.0", features = ["serde"], optional = true } diff --git a/boa_engine/src/builtins/array/mod.rs b/boa_engine/src/builtins/array/mod.rs index cf325236366..6aeea6020a4 100644 --- a/boa_engine/src/builtins/array/mod.rs +++ b/boa_engine/src/builtins/array/mod.rs @@ -23,6 +23,7 @@ use crate::{ builtins::BuiltIn, builtins::Number, context::intrinsics::StandardConstructors, + js_string, object::{ internal_methods::get_prototype_from_constructor, ConstructorBuilder, FunctionBuilder, JsFunction, JsObject, ObjectData, @@ -30,7 +31,7 @@ use crate::{ property::{Attribute, PropertyDescriptor, PropertyNameKind}, symbol::WellKnownSymbols, value::{IntegerOrInfinity, JsValue}, - Context, JsResult, JsString, + Context, JsResult, }; use std::cmp::{max, min, Ordering}; @@ -412,7 +413,12 @@ impl Array { let mapping = match mapfn { JsValue::Undefined => None, JsValue::Object(o) if o.is_callable() => Some(o), - _ => return context.throw_type_error(format!("{} is not a function", mapfn.type_of())), + _ => { + return context.throw_type_error(format!( + "{} is not a function", + mapfn.type_of().to_std_string_escaped() + )) + } }; // 4. Let usingIterator be ? GetMethod(items, @@iterator). @@ -864,34 +870,34 @@ impl Array { // 4. Else, let sep be ? ToString(separator). let separator = args.get_or_undefined(0); let separator = if separator.is_undefined() { - JsString::new(",") + js_string!(",") } else { separator.to_string(context)? }; // 5. Let R be the empty String. - let mut r = String::new(); + let mut r = Vec::new(); // 6. Let k be 0. // 7. Repeat, while k < len, for k in 0..len { // a. If k > 0, set R to the string-concatenation of R and sep. if k > 0 { - r.push_str(&separator); + r.extend_from_slice(&separator); } // b. Let element be ? Get(O, ! ToString(𝔽(k))). let element = o.get(k, context)?; // c. If element is undefined or null, let next be the empty String; otherwise, let next be ? ToString(element). let next = if element.is_null_or_undefined() { - JsString::new("") + js_string!() } else { element.to_string(context)? }; // d. Set R to the string-concatenation of R and next. - r.push_str(&next); + r.extend_from_slice(&next); // e. Set k to k + 1. } // 8. Return R. - Ok(r.into()) + Ok(js_string!(&r[..]).into()) } /// `Array.prototype.toString( separator )` diff --git a/boa_engine/src/builtins/console/mod.rs b/boa_engine/src/builtins/console/mod.rs index dd2687c474c..997219a92cc 100644 --- a/boa_engine/src/builtins/console/mod.rs +++ b/boa_engine/src/builtins/console/mod.rs @@ -60,10 +60,11 @@ pub fn formatter(data: &[JsValue], context: &mut Context) -> JsResult { match data.len() { 0 => Ok(String::new()), - 1 => Ok(target.to_string()), + 1 => Ok(target.to_std_string_escaped()), _ => { let mut formatted = String::new(); let mut arg_index = 1; + let target = target.to_std_string_escaped(); let mut chars = target.chars(); while let Some(c) = chars.next() { if c == '%' { @@ -96,7 +97,8 @@ pub fn formatter(data: &[JsValue], context: &mut Context) -> JsResult { .get(arg_index) .cloned() .unwrap_or_default() - .to_string(context)?; + .to_string(context)? + .to_std_string_escaped(); formatted.push_str(&arg); arg_index += 1; } @@ -114,7 +116,10 @@ pub fn formatter(data: &[JsValue], context: &mut Context) -> JsResult { /* unformatted data */ for rest in data.iter().skip(arg_index) { - formatted.push_str(&format!(" {}", rest.to_string(context)?)); + formatted.push_str(&format!( + " {}", + rest.to_string(context)?.to_std_string_escaped() + )); } Ok(formatted) @@ -300,7 +305,7 @@ impl Console { context .interner() .resolve_expect(frame.code.name) - .to_owned(), + .to_string(), ); } @@ -365,7 +370,7 @@ impl Console { None => "default".into(), }; - let msg = format!("count {label}:"); + let msg = format!("count {}:", label.to_std_string_escaped()); let c = context.console_mut().count_map.entry(label).or_insert(0); *c += 1; @@ -396,7 +401,7 @@ impl Console { context.console_mut().count_map.remove(&label); logger( - LogMessage::Warn(format!("countReset {label}")), + LogMessage::Warn(format!("countReset {}", label.to_std_string_escaped())), context.console(), ); @@ -429,7 +434,10 @@ impl Console { if context.console().timer_map.get(&label).is_some() { logger( - LogMessage::Warn(format!("Timer '{label}' already exist")), + LogMessage::Warn(format!( + "Timer '{}' already exist", + label.to_std_string_escaped() + )), context.console(), ); } else { @@ -462,14 +470,17 @@ impl Console { if let Some(t) = context.console().timer_map.get(&label) { let time = Self::system_time_in_ms(); - let mut concat = format!("{label}: {} ms", time - t); + let mut concat = format!("{}: {} ms", label.to_std_string_escaped(), time - t); for msg in args.iter().skip(1) { concat = concat + " " + &msg.display().to_string(); } logger(LogMessage::Log(concat), context.console()); } else { logger( - LogMessage::Warn(format!("Timer '{label}' doesn't exist")), + LogMessage::Warn(format!( + "Timer '{}' doesn't exist", + label.to_std_string_escaped() + )), context.console(), ); } @@ -497,15 +508,22 @@ impl Console { None => "default".into(), }; - if let Some(t) = context.console_mut().timer_map.remove(label.as_str()) { + if let Some(t) = context.console_mut().timer_map.remove(&label) { let time = Self::system_time_in_ms(); logger( - LogMessage::Info(format!("{label}: {} ms - timer removed", time - t)), + LogMessage::Info(format!( + "{}: {} ms - timer removed", + label.to_std_string_escaped(), + time - t + )), context.console(), ); } else { logger( - LogMessage::Warn(format!("Timer '{label}' doesn't exist")), + LogMessage::Warn(format!( + "Timer '{}' doesn't exist", + label.to_std_string_escaped() + )), context.console(), ); } diff --git a/boa_engine/src/builtins/date/mod.rs b/boa_engine/src/builtins/date/mod.rs index 95fc226d9ee..7166fd19e59 100644 --- a/boa_engine/src/builtins/date/mod.rs +++ b/boa_engine/src/builtins/date/mod.rs @@ -5,12 +5,14 @@ use super::JsArgs; use crate::{ builtins::BuiltIn, context::intrinsics::StandardConstructors, + js_string, object::{ internal_methods::get_prototype_from_constructor, ConstructorBuilder, JsObject, ObjectData, }, + string::utf16, symbol::WellKnownSymbols, value::{JsValue, PreferredType}, - Context, JsResult, JsString, + Context, JsResult, }; use boa_profiler::Profiler; use chrono::{prelude::*, Duration, LocalResult}; @@ -393,10 +395,11 @@ impl Date { let tv = match this_time_value(value, context) { Ok(dt) => dt.0, _ => match value.to_primitive(context, PreferredType::Default)? { - JsValue::String(ref str) => match chrono::DateTime::parse_from_rfc3339(str) { - Ok(dt) => Some(dt.naive_utc()), - _ => None, - }, + JsValue::String(ref str) => str + .to_std_string() + .ok() + .and_then(|s| chrono::DateTime::parse_from_rfc3339(s.as_str()).ok()) + .map(|dt| dt.naive_utc()), tv => { let tv = tv.to_number(context)?; if tv.is_nan() { @@ -512,13 +515,15 @@ impl Date { let hint = args.get_or_undefined(0); - let try_first = match hint.as_string().map(JsString::as_str) { + let try_first = match hint.as_string() { // 3. If hint is "string" or "default", then // a. Let tryFirst be string. - Some("string" | "default") => PreferredType::String, + Some(string) if string == utf16!("string") || string == utf16!("default") => { + PreferredType::String + } // 4. Else if hint is "number", then // a. Let tryFirst be number. - Some("number") => PreferredType::Number, + Some(number) if number == utf16!("number") => PreferredType::Number, // 5. Else, throw a TypeError exception. _ => { return context @@ -1638,7 +1643,7 @@ impl Date { .to_string() .into()) } else { - Ok(JsString::from("Invalid Date").into()) + Ok(js_string!("Invalid Date").into()) } } @@ -1740,7 +1745,7 @@ impl Date { .to_string() .into()) } else { - Ok(JsString::from("Invalid Date").into()) + Ok(js_string!("Invalid Date").into()) } } @@ -1776,7 +1781,7 @@ impl Date { .to_string() .into()) } else { - Ok(JsString::from("Invalid Date").into()) + Ok(js_string!("Invalid Date").into()) } } @@ -1842,14 +1847,20 @@ impl Date { // This method is implementation-defined and discouraged, so we just require the same format as the string // constructor. - if args.is_empty() { + let date = if let Some(arg) = args.get(0) { + arg + } else { return Ok(JsValue::nan()); - } + }; - match DateTime::parse_from_rfc3339(&args[0].to_string(context)?) { - Ok(v) => Ok(JsValue::new(v.naive_utc().timestamp_millis() as f64)), - _ => Ok(JsValue::new(f64::NAN)), - } + let date = date.to_string(context)?; + + Ok(JsValue::new( + date.to_std_string() + .ok() + .and_then(|s| DateTime::parse_from_rfc3339(s.as_str()).ok()) + .map_or(f64::NAN, |v| v.naive_utc().timestamp_millis() as f64), + )) } /// `Date.UTC()` diff --git a/boa_engine/src/builtins/error/mod.rs b/boa_engine/src/builtins/error/mod.rs index c68d1854446..ac6accf31ca 100644 --- a/boa_engine/src/builtins/error/mod.rs +++ b/boa_engine/src/builtins/error/mod.rs @@ -13,11 +13,13 @@ use crate::{ builtins::BuiltIn, context::intrinsics::StandardConstructors, + js_string, object::{ internal_methods::get_prototype_from_constructor, ConstructorBuilder, JsObject, ObjectData, }, property::Attribute, - Context, JsResult, JsString, JsValue, + string::utf16, + Context, JsResult, JsValue, }; use boa_profiler::Profiler; use tap::{Conv, Pipe}; @@ -151,21 +153,21 @@ impl Error { }; // 3. Let name be ? Get(O, "name"). - let name = o.get("name", context)?; + let name = o.get(js_string!("name"), context)?; // 4. If name is undefined, set name to "Error"; otherwise set name to ? ToString(name). let name = if name.is_undefined() { - JsString::new("Error") + js_string!("Error") } else { name.to_string(context)? }; // 5. Let msg be ? Get(O, "message"). - let msg = o.get("message", context)?; + let msg = o.get(js_string!("message"), context)?; // 6. If msg is undefined, set msg to the empty String; otherwise set msg to ? ToString(msg). let msg = if msg.is_undefined() { - JsString::empty() + js_string!() } else { msg.to_string(context)? }; @@ -182,6 +184,6 @@ impl Error { // 9. Return the string-concatenation of name, the code unit 0x003A (COLON), // the code unit 0x0020 (SPACE), and msg. - Ok(format!("{name}: {msg}").into()) + Ok(js_string!(&name, utf16!(": "), &msg).into()) } } diff --git a/boa_engine/src/builtins/eval/mod.rs b/boa_engine/src/builtins/eval/mod.rs index 71b82951fad..a511b683ad1 100644 --- a/boa_engine/src/builtins/eval/mod.rs +++ b/boa_engine/src/builtins/eval/mod.rs @@ -78,9 +78,10 @@ impl Eval { }; // Because of implementation details the following code differs from the spec. + // TODO: rework parser to take an iterator of `u32` unicode codepoints // Parse the script body and handle early errors (6 - 11) - let body = match context.parse_eval(x.as_bytes(), direct, strict) { + let body = match context.parse_eval(x.to_std_string_escaped().as_bytes(), direct, strict) { Ok(body) => body, Err(e) => return context.throw_syntax_error(e.to_string()), }; diff --git a/boa_engine/src/builtins/function/mod.rs b/boa_engine/src/builtins/function/mod.rs index d5c23ea1bcd..9b286b35246 100644 --- a/boa_engine/src/builtins/function/mod.rs +++ b/boa_engine/src/builtins/function/mod.rs @@ -16,12 +16,14 @@ use crate::{ bytecompiler::{FunctionCompiler, FunctionKind}, context::intrinsics::StandardConstructors, environments::DeclarativeEnvironmentStack, + js_string, object::{ internal_methods::get_prototype_from_constructor, JsObject, NativeObject, Object, ObjectData, }, object::{ConstructorBuilder, FunctionBuilder, JsFunction, PrivateElement, Ref, RefMut}, property::{Attribute, PropertyDescriptor, PropertyKey}, + string::utf16, symbol::WellKnownSymbols, syntax::{ ast::node::{FormalParameterList, StatementList}, @@ -36,7 +38,6 @@ use boa_profiler::Profiler; use dyn_clone::DynClone; use std::{ any::Any, - borrow::Cow, fmt, ops::{Deref, DerefMut}, }; @@ -498,16 +499,15 @@ impl BuiltInFunctionObject { } else { let mut parameters = Vec::with_capacity(args.len()); for arg in args { - parameters.push(arg.to_string(context)?); + parameters.push(arg.to_string(context)?.as_slice().to_owned()); } - let mut parameters = parameters.join(","); - parameters.push(')'); - - let parameters = match Parser::new(parameters.as_bytes()).parse_formal_parameters( - context.interner_mut(), - generator, - r#async, - ) { + let mut parameters = parameters.join(utf16!(",")); + parameters.push(u16::from(b')')); + + // TODO: make parser generic to u32 iterators + let parameters = match Parser::new(String::from_utf16_lossy(¶meters).as_bytes()) + .parse_formal_parameters(context.interner_mut(), generator, r#async) + { Ok(parameters) => parameters, Err(e) => { return context.throw_syntax_error(format!( @@ -541,11 +541,10 @@ impl BuiltInFunctionObject { let body_arg = body_arg.to_string(context)?; - let body = match Parser::new(body_arg.as_bytes()).parse_function_body( - context.interner_mut(), - generator, - r#async, - ) { + // TODO: make parser generic to u32 iterators + let body = match Parser::new(body_arg.to_std_string_escaped().as_bytes()) + .parse_function_body(context.interner_mut(), generator, r#async) + { Ok(statement_list) => statement_list, Err(e) => { return context @@ -767,12 +766,10 @@ impl BuiltInFunctionObject { let target_name = target.get("name", context)?; // 9. If Type(targetName) is not String, set targetName to the empty String. - let target_name = target_name - .as_string() - .map_or(JsString::new(""), Clone::clone); + let target_name = target_name.as_string().map_or(js_string!(), Clone::clone); // 10. Perform SetFunctionName(F, targetName, "bound"). - set_function_name(&f, &target_name.into(), Some("bound"), context); + set_function_name(&f, &target_name.into(), Some(js_string!("bound")), context); // 11. Return F. Ok(f.into()) @@ -832,16 +829,23 @@ impl BuiltInFunctionObject { constructor: _, }, Some(name), - ) => Ok(format!("function {name}() {{\n [native Code]\n}}").into()), + ) => Ok(js_string!( + utf16!("function "), + &name, + utf16!("() {{\n [native Code]\n}}") + ) + .into()), (Function::Ordinary { .. }, Some(name)) if name.is_empty() => { - Ok("[Function (anonymous)]".into()) + Ok(js_string!("[Function (anonymous)]").into()) + } + (Function::Ordinary { .. }, Some(name)) => { + Ok(js_string!(utf16!("[Function: "), &name, utf16!("]")).into()) } - (Function::Ordinary { .. }, Some(name)) => Ok(format!("[Function: {name}]").into()), - (Function::Ordinary { .. }, None) => Ok("[Function (anonymous)]".into()), + (Function::Ordinary { .. }, None) => Ok(js_string!("[Function (anonymous)]").into()), (Function::Generator { .. }, Some(name)) => { - Ok(format!("[Function*: {}]", &name).into()) + Ok(js_string!(utf16!("[Function*: "), &name, utf16!("]")).into()) } - (Function::Generator { .. }, None) => Ok("[Function* (anonymous)]".into()), + (Function::Generator { .. }, None) => Ok(js_string!("[Function* (anonymous)]").into()), _ => Ok("TODO".into()), } } @@ -930,7 +934,7 @@ impl BuiltIn for BuiltInFunctionObject { fn set_function_name( function: &JsObject, name: &PropertyKey, - prefix: Option<&str>, + prefix: Option, context: &mut Context, ) { // 1. Assert: F is an extensible object that does not have a "name" own property. @@ -940,14 +944,14 @@ fn set_function_name( // a. Let description be name's [[Description]] value. if let Some(desc) = sym.description() { // c. Else, set name to the string-concatenation of "[", description, and "]". - Cow::Owned(JsString::concat_array(&["[", &desc, "]"])) + js_string!(utf16!("["), &desc, utf16!("]")) } else { // b. If description is undefined, set name to the empty String. - Cow::Owned(JsString::new("")) + js_string!() } } - PropertyKey::String(string) => Cow::Borrowed(string), - PropertyKey::Index(index) => Cow::Owned(JsString::new(index.to_string())), + PropertyKey::String(string) => string.clone(), + PropertyKey::Index(index) => js_string!(format!("{}", index)), }; // 3. Else if name is a Private Name, then @@ -960,7 +964,7 @@ fn set_function_name( // 5. If prefix is present, then if let Some(prefix) = prefix { - name = Cow::Owned(JsString::concat_array(&[prefix, " ", &name])); + name = js_string!(&prefix, utf16!(" "), &name); // b. If F has an [[InitialName]] internal slot, then // i. Optionally, set F.[[InitialName]] to name. // todo: implement [[InitialName]] for builtins @@ -972,7 +976,7 @@ fn set_function_name( .define_property_or_throw( "name", PropertyDescriptor::builder() - .value(name.into_owned()) + .value(name) .writable(false) .enumerable(false) .configurable(true), diff --git a/boa_engine/src/builtins/function/tests.rs b/boa_engine/src/builtins/function/tests.rs index f1833439739..ca975413ec4 100644 --- a/boa_engine/src/builtins/function/tests.rs +++ b/boa_engine/src/builtins/function/tests.rs @@ -1,8 +1,9 @@ use crate::{ - forward, forward_val, + forward, forward_val, js_string, object::FunctionBuilder, property::{Attribute, PropertyDescriptor}, - Context, JsString, + string::utf16, + Context, }; #[allow(clippy::float_cmp)] @@ -129,7 +130,7 @@ fn function_prototype_call() { "#; let value = forward_val(&mut context, func).unwrap(); assert!(value.is_string()); - assert_eq!(value.as_string().unwrap(), "[object Error]"); + assert_eq!(value.as_string().unwrap(), utf16!("[object Error]")); } #[test] @@ -142,7 +143,7 @@ fn function_prototype_call_throw() { let value = forward_val(&mut context, throw).unwrap_err(); assert!(value.is_object()); let string = value.to_string(&mut context).unwrap(); - assert!(string.starts_with("TypeError")); + assert!(string.starts_with(utf16!("TypeError"))); } #[test] @@ -222,7 +223,7 @@ fn function_prototype_apply_on_object() { fn closure_capture_clone() { let mut context = Context::default(); - let string = JsString::from("Hello"); + let string = js_string!("Hello"); let object = context.construct_object(); object .define_property_or_throw( @@ -241,13 +242,13 @@ fn closure_capture_clone() { |_, _, captures, context| { let (string, object) = &captures; - let hw = JsString::concat( + let hw = js_string!( string, - object + &object .__get_own_property__(&"key".into(), context)? .and_then(|prop| prop.value().cloned()) .and_then(|val| val.as_string().cloned()) - .ok_or_else(|| context.construct_type_error("invalid `key` property"))?, + .ok_or_else(|| context.construct_type_error("invalid `key` property"))? ); Ok(hw.into()) }, diff --git a/boa_engine/src/builtins/intl/date_time_format.rs b/boa_engine/src/builtins/intl/date_time_format.rs index 333c224bc41..fd2ecddabfa 100644 --- a/boa_engine/src/builtins/intl/date_time_format.rs +++ b/boa_engine/src/builtins/intl/date_time_format.rs @@ -9,6 +9,7 @@ use crate::{ context::intrinsics::StandardConstructors, + js_string, object::{ internal_methods::get_prototype_from_constructor, ConstructorBuilder, JsFunction, JsObject, ObjectData, @@ -84,24 +85,24 @@ impl DateTimeFormat { prototype, ObjectData::date_time_format(Box::new(Self { initialized_date_time_format: true, - locale: JsString::from("en-US"), - calendar: JsString::from("gregory"), - numbering_system: JsString::from("arab"), - time_zone: JsString::from("UTC"), - weekday: JsString::from("narrow"), - era: JsString::from("narrow"), - year: JsString::from("numeric"), - month: JsString::from("narrow"), - day: JsString::from("numeric"), - day_period: JsString::from("narrow"), - hour: JsString::from("numeric"), - minute: JsString::from("numeric"), - second: JsString::from("numeric"), - fractional_second_digits: JsString::from(""), - time_zone_name: JsString::from(""), - hour_cycle: JsString::from("h24"), - pattern: JsString::from("{hour}:{minute}"), - bound_format: JsString::from("undefined"), + locale: js_string!("en-US"), + calendar: js_string!("gregory"), + numbering_system: js_string!("arab"), + time_zone: js_string!("UTC"), + weekday: js_string!("narrow"), + era: js_string!("narrow"), + year: js_string!("numeric"), + month: js_string!("narrow"), + day: js_string!("numeric"), + day_period: js_string!("narrow"), + hour: js_string!("numeric"), + minute: js_string!("numeric"), + second: js_string!("numeric"), + fractional_second_digits: js_string!(""), + time_zone_name: js_string!(""), + hour_cycle: js_string!("h24"), + pattern: js_string!("{hour}:{minute}"), + bound_format: js_string!("undefined"), })), ); diff --git a/boa_engine/src/builtins/intl/mod.rs b/boa_engine/src/builtins/intl/mod.rs index 116dbfbe702..74af600460e 100644 --- a/boa_engine/src/builtins/intl/mod.rs +++ b/boa_engine/src/builtins/intl/mod.rs @@ -13,7 +13,7 @@ use crate::{ object::{JsObject, ObjectInitializer}, property::Attribute, symbol::WellKnownSymbols, - Context, JsResult, JsString, JsValue, + Context, JsResult, JsValue, }; pub mod date_time_format; @@ -89,8 +89,8 @@ impl Intl { /// This is a return value for `lookup_matcher` and `best_fit_matcher` subroutines. #[derive(Debug)] struct MatcherRecord { - locale: JsString, - extension: JsString, + locale: String, + extension: String, } /// Abstract operation `DefaultLocale ( )` @@ -121,9 +121,9 @@ fn default_locale(canonicalizer: &LocaleCanonicalizer) -> Locale { /// - [ECMAScript reference][spec] /// /// [spec]: https://tc39.es/ecma402/#sec-bestavailablelocale -fn best_available_locale(available_locales: &[JsString], locale: &JsString) -> Option { +fn best_available_locale<'a>(available_locales: &'_ [&'_ str], locale: &'a str) -> Option<&'a str> { // 1. Let candidate be locale. - let mut candidate = locale.clone(); + let mut candidate = locale; // 2. Repeat loop { // a. If availableLocales contains an element equal to candidate, return candidate. @@ -144,7 +144,7 @@ fn best_available_locale(available_locales: &[JsString], locale: &JsString) -> O ind }; // d. Let candidate be the substring of candidate from position 0, inclusive, to position pos, exclusive. - candidate = JsString::new(&candidate[..trim_ind]); + candidate = &candidate[..trim_ind]; } None => return None, } @@ -162,8 +162,8 @@ fn best_available_locale(available_locales: &[JsString], locale: &JsString) -> O /// /// [spec]: https://tc39.es/ecma402/#sec-lookupmatcher fn lookup_matcher( - available_locales: &[JsString], - requested_locales: &[JsString], + available_locales: &[&str], + requested_locales: &[&str], canonicalizer: &LocaleCanonicalizer, ) -> MatcherRecord { // 1. Let result be a new Record. @@ -171,9 +171,8 @@ fn lookup_matcher( for locale_str in requested_locales { // a. Let noExtensionsLocale be the String value that is locale with any Unicode locale // extension sequences removed. - let parsed_locale = - Locale::from_bytes(locale_str.as_bytes()).expect("Locale parsing failed"); - let no_extensions_locale = JsString::new(parsed_locale.id.to_string()); + let locale: Locale = locale_str.parse().expect("Locale parsing failed"); + let no_extensions_locale = locale.id.to_string(); // b. Let availableLocale be ! BestAvailableLocale(availableLocales, noExtensionsLocale). let available_locale = best_available_locale(available_locales, &no_extensions_locale); @@ -184,17 +183,17 @@ fn lookup_matcher( // Assignment deferred. See return statement below. // ii. If locale and noExtensionsLocale are not the same String value, then let maybe_ext = if locale_str.eq(&no_extensions_locale) { - JsString::empty() + "".into() } else { // 1. Let extension be the String value consisting of the substring of the Unicode // locale extension sequence within locale. // 2. Set result.[[extension]] to extension. - JsString::new(parsed_locale.extensions.to_string()) + locale.extensions.to_string() }; // iii. Return result. return MatcherRecord { - locale: available_locale, + locale: available_locale.into(), extension: maybe_ext, }; } @@ -204,8 +203,8 @@ fn lookup_matcher( // 4. Set result.[[locale]] to defLocale. // 5. Return result. MatcherRecord { - locale: default_locale(canonicalizer).to_string().into(), - extension: JsString::empty(), + locale: default_locale(canonicalizer).to_string(), + extension: "".into(), } } @@ -222,8 +221,8 @@ fn lookup_matcher( /// /// [spec]: https://tc39.es/ecma402/#sec-bestfitmatcher fn best_fit_matcher( - available_locales: &[JsString], - requested_locales: &[JsString], + available_locales: &[&str], + requested_locales: &[&str], canonicalizer: &LocaleCanonicalizer, ) -> MatcherRecord { lookup_matcher(available_locales, requested_locales, canonicalizer) @@ -232,8 +231,8 @@ fn best_fit_matcher( /// `Keyword` structure is a pair of keyword key and keyword value. #[derive(Debug)] struct Keyword { - key: JsString, - value: JsString, + key: String, + value: String, } /// `UniExtRecord` structure represents unicode extension records. @@ -246,7 +245,7 @@ struct Keyword { #[allow(dead_code)] #[derive(Debug)] struct UniExtRecord { - attributes: Vec, // never read at this point + attributes: Vec, // never read at this point keywords: Vec, } @@ -259,12 +258,12 @@ struct UniExtRecord { /// - [ECMAScript reference][spec] /// /// [spec]: https://tc39.es/ecma402/#sec-unicode-extension-components -fn unicode_extension_components(extension: &JsString) -> UniExtRecord { +fn unicode_extension_components(extension: &str) -> UniExtRecord { // 1. Let attributes be a new empty List. - let mut attributes = Vec::::new(); + let mut attributes: Vec = Vec::new(); // 2. Let keywords be a new empty List. - let mut keywords = Vec::::new(); + let mut keywords: Vec = Vec::new(); // 3. Let keyword be undefined. let mut keyword: Option = None; @@ -278,24 +277,24 @@ fn unicode_extension_components(extension: &JsString) -> UniExtRecord { // 6. Repeat, while k < size, while k < size { // a. Let e be ! StringIndexOf(extension, "-", k). - let e = extension.index_of(&JsString::new("-"), k); + let e = extension[k..].find('-'); // b. If e = -1, let len be size - k; else let len be e - k. let len = match e { - Some(pos) => pos - k, + Some(pos) => pos, None => size - k, }; // c. Let subtag be the String value equal to the substring of extension consisting of the // code units at indices k (inclusive) through k + len (exclusive). - let subtag = JsString::new(&extension[k..k + len]); + let subtag = &extension[k..k + len]; // d. If keyword is undefined and len ≠ 2, then if keyword.is_none() && len != 2 { // i. If subtag is not an element of attributes, then - if !attributes.contains(&subtag) { + if !attributes.iter().any(|s| s == subtag) { // 1. Append subtag to attributes. - attributes.push(subtag); + attributes.push(subtag.to_string()); } // e. Else if len = 2, then } else if len == 2 { @@ -311,8 +310,8 @@ fn unicode_extension_components(extension: &JsString) -> UniExtRecord { // ii. Set keyword to the Record { [[Key]]: subtag, [[Value]]: "" }. keyword = Some(Keyword { - key: subtag, - value: JsString::empty(), + key: subtag.into(), + value: "".into(), }); // f. Else, } else { @@ -322,9 +321,9 @@ fn unicode_extension_components(extension: &JsString) -> UniExtRecord { // 1. Set keyword.[[Value]] to the string-concatenation of keyword.[[Value]], "-", and subtag. if let Some(keyword_val) = keyword { let new_keyword_val = if keyword_val.value.is_empty() { - subtag + subtag.into() } else { - JsString::new(format!("{}-{subtag}", keyword_val.value)) + format!("{}-{subtag}", keyword_val.value) }; keyword = Some(Keyword { @@ -369,7 +368,7 @@ fn insert_unicode_extension_and_canonicalize( locale: &str, extension: &str, canonicalizer: &LocaleCanonicalizer, -) -> JsString { +) -> String { // TODO 1. Assert: locale does not contain a substring that is a Unicode locale extension sequence. // TODO 2. Assert: extension is a Unicode locale extension sequence. // TODO 3. Assert: tag matches the unicode_locale_id production. @@ -404,7 +403,7 @@ fn insert_unicode_extension_and_canonicalize( // 8. Return ! CanonicalizeUnicodeLocaleId(locale). canonicalize_unicode_locale_id(&mut new_locale, canonicalizer); - new_locale.to_string().into() + new_locale.to_string() } /// Abstract operation `CanonicalizeLocaleList ( locales )` @@ -466,11 +465,15 @@ fn canonicalize_locale_list(args: &[JsValue], context: &mut Context) -> JsResult // 1. Let tag be kValue.[[Locale]]. // iv. Else, // 1. Let tag be ? ToString(kValue). - let tag = k_value.to_string(context)?; // v. If IsStructurallyValidLanguageTag(tag) is false, throw a RangeError exception. - let mut tag = tag.parse().map_err(|_| { - context.construct_range_error("locale is not a structurally valid language tag") - })?; + let mut tag = k_value + .to_string(context)? + .to_std_string() + .ok() + .and_then(|tag| tag.parse().ok()) + .ok_or_else(|| { + context.construct_range_error("locale is not a structurally valid language tag") + })?; // vi. Let canonicalizedTag be CanonicalizeUnicodeLocaleId(tag). canonicalize_unicode_locale_id(&mut tag, context.icu().locale_canonicalizer()); @@ -489,15 +492,15 @@ fn canonicalize_locale_list(args: &[JsValue], context: &mut Context) -> JsResult /// It is an alias for a map where key is a string and value is another map. /// /// Value of that inner map is a vector of strings representing locale parameters. -type LocaleDataRecord = FxHashMap>>; +type LocaleDataRecord = FxHashMap>>; /// `DateTimeFormatRecord` type aggregates `locale_matcher` selector and `properties` map. /// /// It is used as a type of `options` parameter in `resolve_locale` subroutine. #[derive(Debug)] struct DateTimeFormatRecord { - pub(crate) locale_matcher: JsString, - pub(crate) properties: FxHashMap, + pub(crate) locale_matcher: String, + pub(crate) properties: FxHashMap, } /// `ResolveLocaleRecord` type consists of unicode `locale` string, `data_locale` string and `properties` map. @@ -505,9 +508,9 @@ struct DateTimeFormatRecord { /// This is a return value for `resolve_locale` subroutine. #[derive(Debug)] struct ResolveLocaleRecord { - pub(crate) locale: JsString, - pub(crate) properties: FxHashMap, - pub(crate) data_locale: JsString, + pub(crate) locale: String, + pub(crate) properties: FxHashMap, + pub(crate) data_locale: String, } /// Abstract operation `ResolveLocale ( availableLocales, requestedLocales, options, relevantExtensionKeys, localeData )` @@ -523,10 +526,10 @@ struct ResolveLocaleRecord { /// [spec]: https://tc39.es/ecma402/#sec-resolvelocale #[allow(dead_code)] fn resolve_locale( - available_locales: &[JsString], - requested_locales: &[JsString], + available_locales: &[&str], + requested_locales: &[&str], options: &DateTimeFormatRecord, - relevant_extension_keys: &[JsString], + relevant_extension_keys: &[&str], locale_data: &LocaleDataRecord, context: &mut Context, ) -> ResolveLocaleRecord { @@ -536,7 +539,7 @@ fn resolve_locale( // a. Let r be ! LookupMatcher(availableLocales, requestedLocales). // 3. Else, // a. Let r be ! BestFitMatcher(availableLocales, requestedLocales). - let r = if matcher.eq(&JsString::new("lookup")) { + let r = if matcher == "lookup" { lookup_matcher( available_locales, requested_locales, @@ -555,9 +558,9 @@ fn resolve_locale( // 5. Let result be a new Record. let mut result = ResolveLocaleRecord { - locale: JsString::empty(), + locale: "".into(), properties: FxHashMap::default(), - data_locale: JsString::empty(), + data_locale: "".into(), }; // 6. Set result.[[dataLocale]] to foundLocale. @@ -574,10 +577,10 @@ fn resolve_locale( }; // 8. Let supportedExtension be "-u". - let mut supported_extension = JsString::new("-u"); + let mut supported_extension = String::from("-u"); // 9. For each element key of relevantExtensionKeys, do - for key in relevant_extension_keys { + for &key in relevant_extension_keys { // a. Let foundLocaleData be localeData.[[]]. // TODO b. Assert: Type(foundLocaleData) is Record. let found_locale_data = match locale_data.get(&found_locale) { @@ -595,12 +598,12 @@ fn resolve_locale( // e. Let value be keyLocaleData[0]. // TODO f. Assert: Type(value) is either String or Null. let mut value = match key_locale_data.get(0) { - Some(first_elt) => JsValue::String(first_elt.clone()), + Some(first_elt) => first_elt.clone().into(), None => JsValue::null(), }; // g. Let supportedExtensionAddition be "". - let mut supported_extension_addition = JsString::empty(); + let mut supported_extension_addition = "".into(); // h. If r has an [[extension]] field, then if !r.extension.is_empty() { @@ -614,20 +617,19 @@ fn resolve_locale( // 3. If requestedValue is not the empty String, then if !requested_value.is_empty() { // a. If keyLocaleData contains requestedValue, then - if key_locale_data.contains(requested_value) { + if key_locale_data.iter().any(|s| s == requested_value) { // i. Let value be requestedValue. - value = JsValue::String(JsString::new(requested_value)); + value = requested_value.clone().into(); // ii. Let supportedExtensionAddition be the string-concatenation // of "-", key, "-", and value. - supported_extension_addition = - JsString::concat_array(&["-", key, "-", requested_value]); + supported_extension_addition = format!("-{key}-{requested_value}"); } // 4. Else if keyLocaleData contains "true", then - } else if key_locale_data.contains(&JsString::new("true")) { + } else if key_locale_data.iter().any(|s| s == "true") { // a. Let value be "true". - value = JsValue::String(JsString::new("true")); + value = "true".into(); // b. Let supportedExtensionAddition be the string-concatenation of "-" and key. - supported_extension_addition = JsString::concat_array(&["-", key]); + supported_extension_addition = format!("-{key}"); } } } @@ -659,7 +661,7 @@ fn resolve_locale( if let Some(options_val_str) = options_value.as_string() { if options_val_str.is_empty() { // a. Let optionsValue be "true". - options_value = JsValue::String(JsString::new("true")); + options_value = "true".into(); } } } @@ -667,24 +669,25 @@ fn resolve_locale( // iv. If keyLocaleData contains optionsValue, then let options_val_str = options_value .to_string(context) - .unwrap_or_else(|_| JsString::empty()); - if key_locale_data.contains(&options_val_str) { + .unwrap_or_else(|_| "".into()) + .to_std_string_escaped(); + if key_locale_data.iter().any(|s| s == &options_val_str) { // 1. If SameValue(optionsValue, value) is false, then if !options_value.eq(&value) { // a. Let value be optionsValue. value = options_value; // b. Let supportedExtensionAddition be "". - supported_extension_addition = JsString::empty(); + supported_extension_addition = "".into(); } } } // j. Set result.[[]] to value. - result.properties.insert(key.clone(), value); + result.properties.insert(key.to_string(), value); // k. Append supportedExtensionAddition to supportedExtension. - supported_extension = JsString::concat(supported_extension, &supported_extension_addition); + supported_extension.push_str(&supported_extension_addition); } // 10. If the number of elements in supportedExtension is greater than 2, then @@ -726,7 +729,7 @@ pub(crate) fn get_option( options: &JsObject, property: &str, r#type: &GetOptionType, - values: &[JsString], + values: &[&str], fallback: &JsValue, context: &mut Context, ) -> JsResult { @@ -749,11 +752,11 @@ pub(crate) fn get_option( value = match r#type { GetOptionType::Boolean => JsValue::Boolean(value.to_boolean()), GetOptionType::String => { - let string_value = value.to_string(context)?; - if !values.is_empty() && !values.contains(&string_value) { + let string_value = value.to_string(context)?.to_std_string_escaped(); + if !values.is_empty() && !values.contains(&string_value.as_str()) { return context.throw_range_error("GetOption: values array does not contain value"); } - JsValue::String(string_value) + JsValue::String(string_value.into()) } }; diff --git a/boa_engine/src/builtins/intl/tests.rs b/boa_engine/src/builtins/intl/tests.rs index 6abfb0c6002..1a68701ee20 100644 --- a/boa_engine/src/builtins/intl/tests.rs +++ b/boa_engine/src/builtins/intl/tests.rs @@ -6,7 +6,7 @@ use crate::{ resolve_locale, unicode_extension_components, DateTimeFormatRecord, GetOptionType, }, object::JsObject, - Context, JsString, JsValue, + Context, JsValue, }; use icu_locale_canonicalizer::LocaleCanonicalizer; @@ -14,31 +14,31 @@ use rustc_hash::FxHashMap; #[test] fn best_avail_loc() { - let no_extensions_locale = JsString::new("en-US"); - let available_locales = Vec::::new(); + let no_extensions_locale = "en-US"; + let available_locales = Vec::new(); assert_eq!( best_available_locale(&available_locales, &no_extensions_locale,), None ); - let no_extensions_locale = JsString::new("de-DE"); + let no_extensions_locale = "de-DE"; let available_locales = vec![no_extensions_locale.clone()]; assert_eq!( best_available_locale(&available_locales, &no_extensions_locale,), Some(no_extensions_locale) ); - let locale_part = "fr".to_string(); - let no_extensions_locale = JsString::new(locale_part.clone() + "-CA"); - let available_locales = vec![JsString::new(locale_part.clone())]; + let locale_part = "fr"; + let no_extensions_locale = locale_part.to_string() + "-CA"; + let available_locales = vec![locale_part]; assert_eq!( best_available_locale(&available_locales, &no_extensions_locale,), - Some(JsString::new(locale_part)) + Some(locale_part) ); - let ja_kana_t = JsString::new("ja-Kana-JP-t"); - let ja_kana = JsString::new("ja-Kana-JP"); - let no_extensions_locale = JsString::new("ja-Kana-JP-t-it-latn-it"); + let ja_kana_t = "ja-Kana-JP-t"; + let ja_kana = "ja-Kana-JP"; + let no_extensions_locale = "ja-Kana-JP-t-it-latn-it"; let available_locales = vec![ja_kana_t, ja_kana.clone()]; assert_eq!( best_available_locale(&available_locales, &no_extensions_locale,), @@ -52,8 +52,8 @@ fn lookup_match() { let canonicalizer = LocaleCanonicalizer::new(&provider).expect("Could not create canonicalizer"); // available: [], requested: [] - let available_locales = Vec::::new(); - let requested_locales = Vec::::new(); + let available_locales = Vec::new(); + let requested_locales = Vec::new(); let matcher = lookup_matcher(&available_locales, &requested_locales, &canonicalizer); assert_eq!( @@ -63,8 +63,8 @@ fn lookup_match() { assert_eq!(matcher.extension, ""); // available: [de-DE], requested: [] - let available_locales = vec![JsString::new("de-DE")]; - let requested_locales = Vec::::new(); + let available_locales = vec!["de-DE"]; + let requested_locales = Vec::new(); let matcher = lookup_matcher(&available_locales, &requested_locales, &canonicalizer); assert_eq!( @@ -74,16 +74,16 @@ fn lookup_match() { assert_eq!(matcher.extension, ""); // available: [fr-FR], requested: [fr-FR-u-hc-h12] - let available_locales = vec![JsString::new("fr-FR")]; - let requested_locales = vec![JsString::new("fr-FR-u-hc-h12")]; + let available_locales = vec!["fr-FR"]; + let requested_locales = vec!["fr-FR-u-hc-h12"]; let matcher = lookup_matcher(&available_locales, &requested_locales, &canonicalizer); assert_eq!(matcher.locale, "fr-FR"); assert_eq!(matcher.extension, "u-hc-h12"); // available: [es-ES], requested: [es-ES] - let available_locales = vec![JsString::new("es-ES")]; - let requested_locales = vec![JsString::new("es-ES")]; + let available_locales = vec!["es-ES"]; + let requested_locales = vec!["es-ES"]; let matcher = best_fit_matcher(&available_locales, &requested_locales, &canonicalizer); assert_eq!(matcher.locale, "es-ES"); @@ -95,31 +95,31 @@ fn insert_unicode_ext() { let provider = icu_testdata::get_provider(); let canonicalizer = LocaleCanonicalizer::new(&provider).expect("Could not create canonicalizer"); - let locale = JsString::new("hu-HU"); - let ext = JsString::empty(); + let locale = "hu-HU"; + let ext = ""; assert_eq!( insert_unicode_extension_and_canonicalize(&locale, &ext, &canonicalizer), locale ); - let locale = JsString::new("hu-HU"); - let ext = JsString::new("-u-hc-h12"); + let locale = "hu-HU"; + let ext = "-u-hc-h12"; assert_eq!( insert_unicode_extension_and_canonicalize(&locale, &ext, &canonicalizer), - JsString::new("hu-HU-u-hc-h12") + "hu-HU-u-hc-h12" ); - let locale = JsString::new("hu-HU-x-PRIVATE"); - let ext = JsString::new("-u-hc-h12"); + let locale = "hu-HU-x-PRIVATE"; + let ext = "-u-hc-h12"; assert_eq!( insert_unicode_extension_and_canonicalize(&locale, &ext, &canonicalizer), - JsString::new("hu-HU-u-hc-h12-x-private") + "hu-HU-u-hc-h12-x-private" ); } #[test] fn uni_ext_comp() { - let ext = JsString::new("-u-ca-japanese-hc-h12"); + let ext = "-u-ca-japanese-hc-h12"; let components = unicode_extension_components(&ext); assert!(components.attributes.is_empty()); assert_eq!(components.keywords.len(), 2); @@ -128,16 +128,16 @@ fn uni_ext_comp() { assert_eq!(components.keywords[1].key, "hc"); assert_eq!(components.keywords[1].value, "h12"); - let ext = JsString::new("-u-alias-co-phonebk-ka-shifted"); + let ext = "-u-alias-co-phonebk-ka-shifted"; let components = unicode_extension_components(&ext); - assert_eq!(components.attributes, vec![JsString::new("alias")]); + assert_eq!(components.attributes, vec![String::from("alias")]); assert_eq!(components.keywords.len(), 2); assert_eq!(components.keywords[0].key, "co"); assert_eq!(components.keywords[0].value, "phonebk"); assert_eq!(components.keywords[1].key, "ka"); assert_eq!(components.keywords[1].value, "shifted"); - let ext = JsString::new("-u-ca-buddhist-kk-nu-thai"); + let ext = "-u-ca-buddhist-kk-nu-thai"; let components = unicode_extension_components(&ext); assert!(components.attributes.is_empty()); assert_eq!(components.keywords.len(), 3); @@ -148,7 +148,7 @@ fn uni_ext_comp() { assert_eq!(components.keywords[2].key, "nu"); assert_eq!(components.keywords[2].value, "thai"); - let ext = JsString::new("-u-ca-islamic-civil"); + let ext = "-u-ca-islamic-civil"; let components = unicode_extension_components(&ext); assert!(components.attributes.is_empty()); assert_eq!(components.keywords.len(), 1); @@ -161,12 +161,12 @@ fn locale_resolution() { let mut context = Context::default(); // test lookup - let available_locales = Vec::::new(); - let requested_locales = Vec::::new(); - let relevant_extension_keys = Vec::::new(); + let available_locales = Vec::new(); + let requested_locales = Vec::new(); + let relevant_extension_keys = Vec::new(); let locale_data = FxHashMap::default(); let options = DateTimeFormatRecord { - locale_matcher: JsString::new("lookup"), + locale_matcher: "lookup".into(), properties: FxHashMap::default(), }; @@ -193,12 +193,12 @@ fn locale_resolution() { assert!(locale_record.properties.is_empty()); // test best fit - let available_locales = Vec::::new(); - let requested_locales = Vec::::new(); - let relevant_extension_keys = Vec::::new(); + let available_locales = Vec::new(); + let requested_locales = Vec::new(); + let relevant_extension_keys = Vec::new(); let locale_data = FxHashMap::default(); let options = DateTimeFormatRecord { - locale_matcher: JsString::new("best-fit"), + locale_matcher: "best-fit".into(), properties: FxHashMap::default(), }; @@ -225,12 +225,12 @@ fn locale_resolution() { assert!(locale_record.properties.is_empty()); // available: [es-ES], requested: [es-ES] - let available_locales = vec![JsString::new("es-ES")]; - let requested_locales = vec![JsString::new("es-ES")]; - let relevant_extension_keys = Vec::::new(); + let available_locales = vec!["es-ES"]; + let requested_locales = vec!["es-ES"]; + let relevant_extension_keys = Vec::new(); let locale_data = FxHashMap::default(); let options = DateTimeFormatRecord { - locale_matcher: JsString::new("lookup"), + locale_matcher: "lookup".into(), properties: FxHashMap::default(), }; @@ -247,12 +247,12 @@ fn locale_resolution() { assert!(locale_record.properties.is_empty()); // available: [zh-CN], requested: [] - let available_locales = vec![JsString::new("zh-CN")]; - let requested_locales = Vec::::new(); - let relevant_extension_keys = Vec::::new(); + let available_locales = vec!["zh-CN"]; + let requested_locales = Vec::new(); + let relevant_extension_keys = Vec::new(); let locale_data = FxHashMap::default(); let options = DateTimeFormatRecord { - locale_matcher: JsString::new("lookup"), + locale_matcher: "lookup".into(), properties: FxHashMap::default(), }; @@ -283,8 +283,8 @@ fn locale_resolution() { fn get_opt() { let mut context = Context::default(); - let values = Vec::::new(); - let fallback = JsValue::String(JsString::new("fallback")); + let values = Vec::new(); + let fallback = JsValue::String("fallback".into()); let options_obj = JsObject::empty(); let option_type = GetOptionType::String; let get_option_result = get_option( @@ -298,10 +298,10 @@ fn get_opt() { .expect("GetOption should not fail on fallback test"); assert_eq!(get_option_result, fallback); - let values = Vec::::new(); - let fallback = JsValue::String(JsString::new("fallback")); + let values = Vec::new(); + let fallback = JsValue::String("fallback".into()); let options_obj = JsObject::empty(); - let locale_value = JsValue::String(JsString::new("en-US")); + let locale_value = JsValue::String("en-US".into()); options_obj .set("Locale", locale_value.clone(), true, &mut context) .expect("Setting a property should not fail"); @@ -317,10 +317,10 @@ fn get_opt() { .expect("GetOption should not fail on string test"); assert_eq!(get_option_result, locale_value); - let fallback = JsValue::String(JsString::new("fallback")); + let fallback = JsValue::String("fallback".into()); let options_obj = JsObject::empty(); - let locale_string = JsString::new("en-US"); - let locale_value = JsValue::String(locale_string.clone()); + let locale_string = "en-US"; + let locale_value = JsValue::String(locale_string.into()); let values = vec![locale_string]; options_obj .set("Locale", locale_value.clone(), true, &mut context) @@ -340,7 +340,7 @@ fn get_opt() { let fallback = JsValue::new(false); let options_obj = JsObject::empty(); let boolean_value = JsValue::new(true); - let values = Vec::::new(); + let values = Vec::new(); options_obj .set("boolean_val", boolean_value.clone(), true, &mut context) .expect("Setting a property should not fail"); @@ -356,10 +356,10 @@ fn get_opt() { .expect("GetOption should not fail on boolean test"); assert_eq!(get_option_result, boolean_value); - let fallback = JsValue::String(JsString::new("fallback")); + let fallback = JsValue::String("fallback".into()); let options_obj = JsObject::empty(); - let locale_value = JsValue::String(JsString::new("en-US")); - let other_locale_str = JsString::new("de-DE"); + let locale_value = JsValue::String("en-US".into()); + let other_locale_str = "de-DE"; let values = vec![other_locale_str]; options_obj .set("Locale", locale_value, true, &mut context) @@ -473,7 +473,7 @@ fn to_date_time_opts() { ) .expect("toDateTimeOptions should not fail in date test"); - let numeric_jsstring = JsValue::String(JsString::new("numeric")); + let numeric_jsstring = JsValue::String("numeric".into()); assert_eq!( date_time_opts.get("year", &mut context), Ok(numeric_jsstring.clone()) @@ -495,7 +495,7 @@ fn to_date_time_opts() { ) .expect("toDateTimeOptions should not fail in time test"); - let numeric_jsstring = JsValue::String(JsString::new("numeric")); + let numeric_jsstring = JsValue::String("numeric".into()); assert_eq!( date_time_opts.get("hour", &mut context), Ok(numeric_jsstring.clone()) @@ -517,7 +517,7 @@ fn to_date_time_opts() { ) .expect("toDateTimeOptions should not fail when testing required = 'any'"); - let numeric_jsstring = JsValue::String(JsString::new("numeric")); + let numeric_jsstring = JsValue::String("numeric".into()); assert_eq!( date_time_opts.get("year", &mut context), Ok(numeric_jsstring.clone()) diff --git a/boa_engine/src/builtins/json/mod.rs b/boa_engine/src/builtins/json/mod.rs index e51e10110cf..952bb9ad289 100644 --- a/boa_engine/src/builtins/json/mod.rs +++ b/boa_engine/src/builtins/json/mod.rs @@ -14,24 +14,122 @@ //! [mdn]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/JSON use super::JsArgs; +use std::{ + borrow::Cow, + iter::{once, FusedIterator}, +}; + use crate::{ - builtins::{ - string::{is_leading_surrogate, is_trailing_surrogate}, - BuiltIn, - }, + builtins::BuiltIn, + js_string, object::{JsObject, ObjectInitializer, RecursionLimiter}, property::{Attribute, PropertyNameKind}, + string::{utf16, CodePoint}, symbol::WellKnownSymbols, value::IntegerOrInfinity, Context, JsResult, JsString, JsValue, }; use boa_profiler::Profiler; -use serde_json::{self, Value as JSONValue}; use tap::{Conv, Pipe}; #[cfg(test)] mod tests; +// `Intersperse` impl taken from `itertools` +#[must_use = "iterator adaptors are lazy and do nothing unless consumed"] +#[derive(Clone, Debug)] +struct Intersperse +where + I: Iterator, +{ + element: I::Item, + iter: std::iter::Fuse, + peek: Option, +} + +fn intersperse(iter: I, element: I::Item) -> Intersperse +where + I: Iterator, +{ + let mut iter = iter.fuse(); + Intersperse { + peek: iter.next(), + iter, + element, + } +} + +impl Iterator for Intersperse +where + I: Iterator, + I::Item: Clone, +{ + type Item = I::Item; + #[inline] + fn next(&mut self) -> Option { + if self.peek.is_some() { + self.peek.take() + } else { + self.peek = self.iter.next(); + if self.peek.is_some() { + Some(self.element.clone()) + } else { + None + } + } + } + + fn size_hint(&self) -> (usize, Option) { + type SizeHint = (usize, Option); + fn add(a: SizeHint, b: SizeHint) -> SizeHint { + let min = a.0.saturating_add(b.0); + let max = match (a.1, b.1) { + (Some(x), Some(y)) => x.checked_add(y), + _ => None, + }; + + (min, max) + } + + fn add_scalar(sh: SizeHint, x: usize) -> SizeHint { + let (mut low, mut hi) = sh; + low = low.saturating_add(x); + hi = hi.and_then(|elt| elt.checked_add(x)); + (low, hi) + } + // 2 * SH + { 1 or 0 } + let has_peek = usize::from(self.peek.is_some()); + let sh = self.iter.size_hint(); + add_scalar(add(sh, sh), has_peek) + } + + fn fold(mut self, init: B, mut f: F) -> B + where + Self: Sized, + F: FnMut(B, Self::Item) -> B, + { + let mut accum = init; + + if let Some(x) = self.peek.take() { + accum = f(accum, x); + } + + let element = &mut self.element; + + self.iter.fold(accum, |accum, x| { + let accum = f(accum, element.clone()); + f(accum, x) + }) + } +} + +impl FusedIterator for Intersperse +where + I: Iterator, + I::Item: Clone, +{ +} + /// JavaScript `JSON` global object. #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] pub(crate) struct Json; @@ -74,16 +172,19 @@ impl Json { .get(0) .cloned() .unwrap_or_default() - .to_string(context)?; + .to_string(context)? + .to_std_string() + .map_err(|e| context.construct_syntax_error(e.to_string()))?; // 2. Parse ! StringToCodePoints(jsonString) as a JSON text as specified in ECMA-404. // Throw a SyntaxError exception if it is not a valid JSON text as defined in that specification. - if let Err(e) = serde_json::from_str::(&json_string) { + if let Err(e) = serde_json::from_str::(&json_string) { return context.throw_syntax_error(e.to_string()); } // 3. Let scriptString be the string-concatenation of "(", jsonString, and ");". - let script_string = JsString::concat_array(&["(", json_string.as_str(), ");"]); + // TODO: fix script read for eval + let script_string = format!("({json_string});"); // 4. Let script be ParseText(! StringToCodePoints(scriptString), Script). // 5. NOTE: The early error rules defined in 13.2.5.1 have special handling for the above invocation of ParseText. @@ -169,7 +270,8 @@ impl Json { // This is safe, because EnumerableOwnPropertyNames with 'key' type only returns strings. let p = p .as_string() - .expect("EnumerableOwnPropertyNames only returns strings"); + .expect("EnumerableOwnPropertyNames only returns strings") + .clone(); // 1. Let newElement be ? InternalizeJSONProperty(val, P, reviver). let new_element = @@ -178,12 +280,12 @@ impl Json { // 2. If newElement is undefined, then if new_element.is_undefined() { // a. Perform ? val.[[Delete]](P). - obj.__delete__(&p.clone().into(), context)?; + obj.__delete__(&p.into(), context)?; } // 3. Else, else { // a. Perform ? CreateDataProperty(val, P, newElement). - obj.create_data_property(p.as_str(), new_element, context)?; + obj.create_data_property(p, new_element, context)?; } } } @@ -218,7 +320,7 @@ impl Json { let stack = Vec::new(); // 2. Let indent be the empty String. - let indent = JsString::new(""); + let indent = js_string!(); // 3. Let PropertyList and ReplacerFunction be undefined. let mut property_list = None; @@ -305,9 +407,9 @@ impl Json { .to_integer_or_infinity(context) .expect("ToIntegerOrInfinity cannot fail on number") { - IntegerOrInfinity::PositiveInfinity => JsString::new(" "), - IntegerOrInfinity::NegativeInfinity => JsString::new(""), - IntegerOrInfinity::Integer(i) if i < 1 => JsString::new(""), + IntegerOrInfinity::PositiveInfinity => js_string!(" "), + IntegerOrInfinity::NegativeInfinity => js_string!(), + IntegerOrInfinity::Integer(i) if i < 1 => js_string!(), IntegerOrInfinity::Integer(i) => { let mut s = String::new(); let i = std::cmp::min(10, i); @@ -320,11 +422,11 @@ impl Json { // 7. Else if Type(space) is String, then } else if let Some(s) = space.as_string() { // a. If the length of space is 10 or less, let gap be space; otherwise let gap be the substring of space from 0 to 10. - String::from_utf16_lossy(&s.encode_utf16().take(10).collect::>()).into() + js_string!(s.get(..10).unwrap_or(s)) // 8. Else, } else { // a. Let gap be the empty String. - JsString::new("") + js_string!() }; // 9. Let wrapper be ! OrdinaryObjectCreate(%Object.prototype%). @@ -346,7 +448,7 @@ impl Json { // 12. Return ? SerializeJSONProperty(state, the empty String, wrapper). Ok( - Self::serialize_json_property(&mut state, JsString::new(""), &wrapper, context)? + Self::serialize_json_property(&mut state, js_string!(), &wrapper, context)? .map(Into::into) .unwrap_or_default(), ) @@ -413,13 +515,13 @@ impl Json { // 5. If value is null, return "null". if value.is_null() { - return Ok(Some(JsString::new("null"))); + return Ok(Some(js_string!("null"))); } // 6. If value is true, return "true". // 7. If value is false, return "false". if value.is_boolean() { - return Ok(Some(JsString::new(if value.to_boolean() { + return Ok(Some(js_string!(if value.to_boolean() { "true" } else { "false" @@ -443,7 +545,7 @@ impl Json { } // b. Return "null". - return Ok(Some(JsString::new("null"))); + return Ok(Some(js_string!("null"))); } // 10. If Type(value) is BigInt, throw a TypeError exception. @@ -476,45 +578,50 @@ impl Json { /// /// [spec]: https://tc39.es/ecma262/#sec-quotejsonstring fn quote_json_string(value: &JsString) -> JsString { + let mut buf = [0; 2]; // 1. Let product be the String value consisting solely of the code unit 0x0022 (QUOTATION MARK). - let mut product = String::from('"'); + let mut product = vec!['"' as u16]; // 2. For each code point C of ! StringToCodePoints(value), do - for code_point in value.encode_utf16() { + for code_point in value.code_points() { match code_point { // a. If C is listed in the “Code Point” column of Table 73, then - // i. Set product to the string-concatenation of product and the escape sequence for C as specified in the “Escape Sequence” column of the corresponding row. - 0x8 => product.push_str("\\b"), - 0x9 => product.push_str("\\t"), - 0xA => product.push_str("\\n"), - 0xC => product.push_str("\\f"), - 0xD => product.push_str("\\r"), - 0x22 => product.push_str("\\\""), - 0x5C => product.push_str("\\\\"), - // b. Else if C has a numeric value less than 0x0020 (SPACE), or if C has the same numeric value as a leading surrogate or trailing surrogate, then - code_point - if is_leading_surrogate(code_point) || is_trailing_surrogate(code_point) => - { - // i. Let unit be the code unit whose numeric value is that of C. - // ii. Set product to the string-concatenation of product and UnicodeEscape(unit). - product.push_str(&format!("\\\\uAA{code_point:x}")); + // i. Set product to the string-concatenation of product and the + // escape sequence for C as specified in the “Escape Sequence” + // column of the corresponding row. + CodePoint::Unicode('\u{0008}') => product.extend_from_slice(utf16!(r"\b")), + CodePoint::Unicode('\u{0009}') => product.extend_from_slice(utf16!(r"\t")), + CodePoint::Unicode('\u{000A}') => product.extend_from_slice(utf16!(r"\n")), + CodePoint::Unicode('\u{000C}') => product.extend_from_slice(utf16!(r"\f")), + CodePoint::Unicode('\u{000D}') => product.extend_from_slice(utf16!(r"\r")), + CodePoint::Unicode('\u{0022}') => product.extend_from_slice(utf16!(r#"\""#)), + CodePoint::Unicode('\u{005C}') => product.extend_from_slice(utf16!(r"\\")), + // b. Else if C has a numeric value less than 0x0020 (SPACE), or + // if C has the same numeric value as a leading surrogate or + // trailing surrogate, then + // i. Let unit be the code unit whose numeric value is that + // of C. + // ii. Set product to the string-concatenation of product + // and UnicodeEscape(unit). + CodePoint::Unicode(c) if c < '\u{0020}' => { + product.extend(format!("\\u{:04x}", c as u32).encode_utf16()); + } + CodePoint::UnpairedSurrogate(surr) => { + product.extend(format!("\\u{surr:04x}").encode_utf16()); } // c. Else, - code_point => { + CodePoint::Unicode(c) => { // i. Set product to the string-concatenation of product and ! UTF16EncodeCodePoint(C). - product.push( - char::from_u32(u32::from(code_point)) - .expect("char from code point cannot fail here"), - ); + product.extend_from_slice(c.encode_utf16(&mut buf)); } } } // 3. Set product to the string-concatenation of product and the code unit 0x0022 (QUOTATION MARK). - product.push('"'); + product.push('"' as u16); // 4. Return product. - product.into() + js_string!(&product[..]) } /// `25.5.2.4 SerializeJSONObject ( state, value )` @@ -541,7 +648,7 @@ impl Json { let stepback = state.indent.clone(); // 4. Set state.[[Indent]] to the string-concatenation of state.[[Indent]] and state.[[Gap]]. - state.indent = JsString::concat(&state.indent, &state.gap); + state.indent = js_string!(&state.indent, &state.gap); // 5. If state.[[PropertyList]] is not undefined, then let k = if let Some(p) = &state.property_list { @@ -571,19 +678,19 @@ impl Json { // b. If strP is not undefined, then if let Some(str_p) = str_p { // i. Let member be QuoteJSONString(P). + let mut member = Self::quote_json_string(p).to_vec(); + // ii. Set member to the string-concatenation of member and ":". + member.push(':' as u16); + // iii. If state.[[Gap]] is not the empty String, then - // 1. Set member to the string-concatenation of member and the code unit 0x0020 (SPACE). + if !state.gap.is_empty() { + // 1. Set member to the string-concatenation of member and the code unit 0x0020 (SPACE). + member.push(' ' as u16); + } + // iv. Set member to the string-concatenation of member and strP. - let member = if state.gap.is_empty() { - format!("{}:{}", Self::quote_json_string(p).as_str(), str_p.as_str()) - } else { - format!( - "{}: {}", - Self::quote_json_string(p).as_str(), - str_p.as_str() - ) - }; + member.extend_from_slice(&str_p); // v. Append member to partial. partial.push(member); @@ -593,7 +700,7 @@ impl Json { // 9. If partial is empty, then let r#final = if partial.is_empty() { // a. Let final be "{}". - JsString::new("{}") + js_string!("{}") // 10. Else, } else { // a. If state.[[Gap]] is the empty String, then @@ -602,23 +709,34 @@ impl Json { // with each adjacent pair of Strings separated with the code unit 0x002C (COMMA). // A comma is not inserted either before the first String or after the last String. // ii. Let final be the string-concatenation of "{", properties, and "}". - format!("{{{}}}", partial.join(",")).into() + let separator = utf16!(","); + let result = once(utf16!("{")) + .chain(intersperse(partial.iter().map(Vec::as_slice), separator)) + .chain(once(utf16!("}"))) + .flatten() + .copied() + .collect::>(); + js_string!(&result[..]) // b. Else, } else { // i. Let separator be the string-concatenation of the code unit 0x002C (COMMA), // the code unit 0x000A (LINE FEED), and state.[[Indent]]. - let separator = format!(",\n{}", state.indent.as_str()); + let mut separator = utf16!(",\n").to_vec(); + separator.extend_from_slice(&state.indent); // ii. Let properties be the String value formed by concatenating all the element Strings of partial // with each adjacent pair of Strings separated with separator. // The separator String is not inserted either before the first String or after the last String. - let properties = partial.join(&separator); - // iii. Let final be the string-concatenation of "{", the code unit 0x000A (LINE FEED), state.[[Indent]], properties, the code unit 0x000A (LINE FEED), stepback, and "}". - format!( - "{{\n{}{properties}\n{}}}", - state.indent.as_str(), - stepback.as_str() - ) - .into() + // iii. Let final be the string-concatenation of "{", the code + // unit 0x000A (LINE FEED), state.[[Indent]], properties, + // the code unit 0x000A (LINE FEED), stepback, and "}". + let result = [utf16!("{\n"), &state.indent[..]] + .into_iter() + .chain(intersperse(partial.iter().map(Vec::as_slice), &separator)) + .chain([utf16!("\n"), &stepback[..], utf16!("}")].into_iter()) + .flatten() + .copied() + .collect::>(); + js_string!(&result[..]) } }; @@ -656,7 +774,7 @@ impl Json { let stepback = state.indent.clone(); // 4. Set state.[[Indent]] to the string-concatenation of state.[[Indent]] and state.[[Gap]]. - state.indent = JsString::concat(&state.indent, &state.gap); + state.indent = js_string!(&state.indent, &state.gap); // 5. Let partial be a new empty List. let mut partial = Vec::new(); @@ -676,11 +794,11 @@ impl Json { // b. If strP is undefined, then if let Some(str_p) = str_p { // i. Append strP to partial. - partial.push(str_p); + partial.push(Cow::Owned(str_p.to_vec())); // c. Else, } else { // i. Append "null" to partial. - partial.push("null".into()); + partial.push(Cow::Borrowed(utf16!("null"))); } // d. Set index to index + 1. @@ -690,7 +808,7 @@ impl Json { // 9. If partial is empty, then let r#final = if partial.is_empty() { // a. Let final be "[]". - JsString::from("[]") + js_string!("[]") // 10. Else, } else { // a. If state.[[Gap]] is the empty String, then @@ -699,23 +817,32 @@ impl Json { // with each adjacent pair of Strings separated with the code unit 0x002C (COMMA). // A comma is not inserted either before the first String or after the last String. // ii. Let final be the string-concatenation of "[", properties, and "]". - format!("[{}]", partial.join(",")).into() + let separator = utf16!(","); + let result = once(utf16!("[")) + .chain(intersperse(partial.iter().map(Cow::as_ref), separator)) + .chain(once(utf16!("]"))) + .flatten() + .copied() + .collect::>(); + js_string!(&result[..]) // b. Else, } else { // i. Let separator be the string-concatenation of the code unit 0x002C (COMMA), // the code unit 0x000A (LINE FEED), and state.[[Indent]]. - let separator = format!(",\n{}", state.indent.as_str()); + let mut separator = utf16!(",\n").to_vec(); + separator.extend_from_slice(&state.indent); // ii. Let properties be the String value formed by concatenating all the element Strings of partial // with each adjacent pair of Strings separated with separator. // The separator String is not inserted either before the first String or after the last String. - let properties = partial.join(&separator); // iii. Let final be the string-concatenation of "[", the code unit 0x000A (LINE FEED), state.[[Indent]], properties, the code unit 0x000A (LINE FEED), stepback, and "]". - format!( - "[\n{}{properties}\n{}]", - state.indent.as_str(), - stepback.as_str() - ) - .into() + let result = [utf16!("[\n"), &state.indent[..]] + .into_iter() + .chain(intersperse(partial.iter().map(Cow::as_ref), &separator)) + .chain([utf16!("\n"), &stepback[..], utf16!("]")].into_iter()) + .flatten() + .copied() + .collect::>(); + js_string!(&result[..]) } }; diff --git a/boa_engine/src/builtins/number/mod.rs b/boa_engine/src/builtins/number/mod.rs index e39ade669c2..f142402e7ea 100644 --- a/boa_engine/src/builtins/number/mod.rs +++ b/boa_engine/src/builtins/number/mod.rs @@ -21,6 +21,8 @@ use crate::{ JsObject, ObjectData, }, property::Attribute, + string::utf16, + string::Utf16Trim, value::{AbstractRelation, IntegerOrInfinity, JsValue}, Context, JsResult, }; @@ -130,7 +132,7 @@ impl Number { /// [mdn]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Number/MAX_SAFE_INTEGER pub(crate) const MAX_SAFE_INTEGER: f64 = 9_007_199_254_740_991_f64; - /// The `Number.MIN_SAFE_INTEGER` constant represents the minimum safe integer in JavaScript (`-(253 - 1)`). + /// The `Number.MIN_SAFE_INTEGER` constant represents the minimum safe integer in JavaScript (`-(2^53 - 1)`). /// /// More information: /// - [ECMAScript reference][spec] @@ -761,12 +763,12 @@ impl Number { let input_string = val.to_string(context)?; // 2. Let S be ! TrimString(inputString, start). - let mut var_s = input_string.trim_start_matches(is_trimmable_whitespace); + let mut var_s = input_string.trim_start(); // 3. Let sign be 1. // 4. If S is not empty and the first code unit of S is the code unit 0x002D (HYPHEN-MINUS), // set sign to -1. - let sign = if !var_s.is_empty() && var_s.starts_with('\u{002D}') { + let sign = if !var_s.is_empty() && var_s.starts_with(utf16!("-")) { -1 } else { 1 @@ -774,10 +776,10 @@ impl Number { // 5. If S is not empty and the first code unit of S is the code unit 0x002B (PLUS SIGN) or // the code unit 0x002D (HYPHEN-MINUS), remove the first code unit from S. - if !var_s.is_empty() { - var_s = var_s - .strip_prefix(&['\u{002B}', '\u{002D}'][..]) - .unwrap_or(var_s); + if !var_s.is_empty() + && (var_s.starts_with(utf16!("+")) || var_s.starts_with(utf16!("-"))) + { + var_s = &var_s[1..]; } // 6. Let R be ℝ(? ToInt32(radix)). @@ -810,23 +812,21 @@ impl Number { // ii. Set R to 16. if strip_prefix && var_s.len() >= 2 - && (var_s.starts_with("0x") || var_s.starts_with("0X")) + && (var_s.starts_with(utf16!("0x")) || var_s.starts_with(utf16!("0X"))) { - var_s = var_s.split_at(2).1; + var_s = &var_s[2..]; var_r = 16; } // 11. If S contains a code unit that is not a radix-R digit, let end be the index within S of the // first such code unit; otherwise, let end be the length of S. - let end = if let Some(index) = var_s.find(|c: char| !c.is_digit(var_r as u32)) { - index - } else { - var_s.len() - }; + let end = char::decode_utf16(var_s.iter().copied()) + .position(|code| !code.map(|c| c.is_digit(var_r as u32)).unwrap_or_default()) + .unwrap_or(var_s.len()); // 12. Let Z be the substring of S from 0 to end. - let var_z = var_s.split_at(end).0; + let var_z = String::from_utf16_lossy(&var_s[..end]); // 13. If Z is empty, return NaN. if var_z.is_empty() { @@ -839,8 +839,8 @@ impl Number { // 0 digit, at the option of the implementation; and if R is not 2, 4, 8, 10, 16, or 32, then // mathInt may be an implementation-approximated value representing the integer value that is // represented by Z in radix-R notation.) - let math_int = u64::from_str_radix(var_z, var_r as u32).map_or_else( - |_| f64::from_str_radix(var_z, var_r as u32).expect("invalid_float_conversion"), + let math_int = u64::from_str_radix(&var_z, var_r as u32).map_or_else( + |_| f64::from_str_radix(&var_z, var_r as u32).expect("invalid_float_conversion"), |i| i as f64, ); @@ -884,7 +884,8 @@ impl Number { context: &mut Context, ) -> JsResult { if let Some(val) = args.get(0) { - let input_string = val.to_string(context)?; + // TODO: parse float with optimal utf16 algorithm + let input_string = val.to_string(context)?.to_std_string_escaped(); let s = input_string.trim_start_matches(is_trimmable_whitespace); let s_prefix_lower = s.chars().take(4).collect::().to_ascii_lowercase(); diff --git a/boa_engine/src/builtins/object/for_in_iterator.rs b/boa_engine/src/builtins/object/for_in_iterator.rs index 612daa0b499..da89afec4b2 100644 --- a/boa_engine/src/builtins/object/for_in_iterator.rs +++ b/boa_engine/src/builtins/object/for_in_iterator.rs @@ -96,11 +96,7 @@ impl ForInIterator { { iterator.visited_keys.insert(r.clone()); if desc.expect_enumerable() { - return Ok(create_iter_result_object( - JsValue::new(r.to_string()), - false, - context, - )); + return Ok(create_iter_result_object(JsValue::new(r), false, context)); } } } diff --git a/boa_engine/src/builtins/object/mod.rs b/boa_engine/src/builtins/object/mod.rs index 891360ded7e..8e277b72d59 100644 --- a/boa_engine/src/builtins/object/mod.rs +++ b/boa_engine/src/builtins/object/mod.rs @@ -17,14 +17,16 @@ use super::Array; use crate::{ builtins::{map, BuiltIn, JsArgs}, context::intrinsics::StandardConstructors, + js_string, object::{ internal_methods::get_prototype_from_constructor, ConstructorBuilder, FunctionBuilder, IntegrityLevel, JsObject, ObjectData, ObjectKind, }, property::{Attribute, PropertyDescriptor, PropertyKey, PropertyNameKind}, + string::utf16, symbol::WellKnownSymbols, value::JsValue, - Context, JsResult, JsString, + Context, JsResult, }; use boa_profiler::Profiler; use tap::{Conv, Pipe}; @@ -603,8 +605,10 @@ impl Object { JsValue::Null => None, // 2. If Type(proto) is neither Object nor Null, throw a TypeError exception. val => { - return ctx - .throw_type_error(format!("expected an object or null, got {}", val.type_of())) + return ctx.throw_type_error(format!( + "expected an object or null, got {}", + val.type_of().to_std_string_escaped() + )) } }; @@ -747,8 +751,8 @@ impl Object { // 4. Let isArray be ? IsArray(O). // 5. If isArray is true, let builtinTag be "Array". - let builtin_tag = if JsValue::from(o.clone()).is_array(context)? { - "Array" + let builtin_tag = if o.is_array_abstract(context)? { + js_string!("Array") } else { // 6. Else if O has a [[ParameterMap]] internal slot, let builtinTag be "Arguments". // 7. Else if O has a [[Call]] internal method, let builtinTag be "Function". @@ -761,16 +765,15 @@ impl Object { // 14. Else, let builtinTag be "Object". let o = o.borrow(); match o.kind() { - ObjectKind::Array => "Array", - ObjectKind::Arguments(_) => "Arguments", - ObjectKind::Function(_) => "Function", - ObjectKind::Error => "Error", - ObjectKind::Boolean(_) => "Boolean", - ObjectKind::Number(_) => "Number", - ObjectKind::String(_) => "String", - ObjectKind::Date(_) => "Date", - ObjectKind::RegExp(_) => "RegExp", - _ => "Object", + ObjectKind::Arguments(_) => js_string!("Arguments"), + ObjectKind::Function(_) => js_string!("Function"), + ObjectKind::Error => js_string!("Error"), + ObjectKind::Boolean(_) => js_string!("Boolean"), + ObjectKind::Number(_) => js_string!("Number"), + ObjectKind::String(_) => js_string!("String"), + ObjectKind::Date(_) => js_string!("Date"), + ObjectKind::RegExp(_) => js_string!("RegExp"), + _ => js_string!("Object"), } }; @@ -778,10 +781,10 @@ impl Object { let tag = o.get(WellKnownSymbols::to_string_tag(), context)?; // 16. If Type(tag) is not String, set tag to builtinTag. - let tag_str = tag.as_string().map_or(builtin_tag, JsString::as_str); + let tag_str = tag.as_string().unwrap_or(&builtin_tag); // 17. Return the string-concatenation of "[object ", tag, and "]". - Ok(format!("[object {tag_str}]").into()) + Ok(js_string!(utf16!("[object "), tag_str, utf16!("]")).into()) } /// `Object.prototype.toLocaleString( [ reserved1 [ , reserved2 ] ] )` diff --git a/boa_engine/src/builtins/regexp/mod.rs b/boa_engine/src/builtins/regexp/mod.rs index 98064bda9d1..b11bb67e0f5 100644 --- a/boa_engine/src/builtins/regexp/mod.rs +++ b/boa_engine/src/builtins/regexp/mod.rs @@ -16,14 +16,16 @@ use super::JsArgs; use crate::{ builtins::{array::Array, string, BuiltIn}, context::intrinsics::StandardConstructors, + js_string, object::{ internal_methods::get_prototype_from_constructor, ConstructorBuilder, FunctionBuilder, JsObject, ObjectData, }, property::{Attribute, PropertyDescriptorBuilder}, + string::{utf16, CodePoint}, symbol::WellKnownSymbols, syntax::lexer::regex::RegExpFlags, - value::{IntegerOrInfinity, JsValue}, + value::JsValue, Context, JsResult, JsString, }; use boa_profiler::Profiler; @@ -266,7 +268,7 @@ impl RegExp { // 1. If pattern is undefined, let P be the empty String. // 2. Else, let P be ? ToString(pattern). let p = if pattern.is_undefined() { - JsString::new("") + js_string!() } else { pattern.to_string(context)? }; @@ -274,14 +276,15 @@ impl RegExp { // 3. If flags is undefined, let F be the empty String. // 4. Else, let F be ? ToString(flags). let f = if flags.is_undefined() { - JsString::new("") + js_string!() } else { flags.to_string(context)? }; // 5. If F contains any code unit other than "g", "i", "m", "s", "u", or "y" // or if it contains the same code unit more than once, throw a SyntaxError exception. - let flags = match RegExpFlags::from_str(&f) { + // TODO: Should directly parse the JsString instead of converting to String + let flags = match RegExpFlags::from_str(&f.to_std_string_escaped()) { Err(msg) => return context.throw_syntax_error(msg), Ok(result) => result, }; @@ -295,7 +298,10 @@ impl RegExp { // 13. Set obj.[[OriginalFlags]] to F. // 14. NOTE: The definitions of DotAll, IgnoreCase, Multiline, and Unicode in 22.2.2.1 refer to this value of obj.[[OriginalFlags]]. // 15. Set obj.[[RegExpMatcher]] to CompilePattern of parseResult. - let matcher = match Regex::with_flags(&p, f.as_ref()) { + // TODO: add support for utf16 regex to remove this conversions. + let ps = p.to_std_string_escaped(); + let fs = f.to_std_string_escaped(); + let matcher = match Regex::with_flags(&ps, fs.as_ref()) { Err(error) => { return context .throw_syntax_error(format!("failed to create matcher: {}", error.text)); @@ -644,22 +650,23 @@ impl RegExp { /// - [ECMAScript reference][spec] /// /// [spec]: https://tc39.es/ecma262/#sec-escaperegexppattern - fn escape_pattern(src: &str, _flags: &str) -> JsValue { + fn escape_pattern(src: &JsString, _flags: &JsString) -> JsValue { if src.is_empty() { - JsValue::new("(?:)") + js_string!("(?:)").into() } else { - let mut s = String::from(""); - - for c in src.chars() { + let mut s = Vec::with_capacity(src.len()); + let mut buf = [0; 2]; + for c in src.code_points() { match c { - '/' => s.push_str("\\/"), - '\n' => s.push_str("\\\\n"), - '\r' => s.push_str("\\\\r"), - _ => s.push(c), + CodePoint::Unicode('/') => s.extend_from_slice(utf16!(r"\/")), + CodePoint::Unicode('\n') => s.extend_from_slice(utf16!(r"\\n")), + CodePoint::Unicode('\r') => s.extend_from_slice(utf16!(r"\\r")), + CodePoint::Unicode(c) => s.extend_from_slice(c.encode_utf16(&mut buf)), + CodePoint::UnpairedSurrogate(surr) => s.push(surr), } } - JsValue::new(s) + JsValue::new(js_string!(&s[..])) } } @@ -806,7 +813,7 @@ impl RegExp { // 2. Assert: Type(S) is String. // 3. Let length be the number of code units in S. - let length = input.encode_utf16().count() as u64; + let length = input.len() as u64; // 4. Let lastIndex be ℝ(? ToLength(? Get(R, "lastIndex"))). let mut last_index = this.get("lastIndex", context)?.to_length(context)?; @@ -815,10 +822,10 @@ impl RegExp { let flags = &rx.original_flags; // 6. If flags contains "g", let global be true; else let global be false. - let global = flags.contains('g'); + let global = flags.contains(&('g' as u16)); // 7. If flags contains "y", let sticky be true; else let sticky be false. - let sticky = flags.contains('y'); + let sticky = flags.contains(&('y' as u16)); // 8. If global is false and sticky is false, set lastIndex to 0. if !global && !sticky { @@ -829,7 +836,7 @@ impl RegExp { let matcher = &rx.matcher; // 10. If flags contains "u", let fullUnicode be true; else let fullUnicode be false. - let unicode = flags.contains('u'); + let unicode = flags.contains(&('u' as u16)); // 11. Let matchSucceeded be false. // 12. Repeat, while matchSucceeded is false, @@ -848,19 +855,16 @@ impl RegExp { // b. Let r be matcher(S, lastIndex). // Check if last_index is a valid utf8 index into input. - let last_byte_index = match String::from_utf16( - &input - .encode_utf16() - .take(last_index as usize) - .collect::>(), - ) { + // TODO: avoid converting to String + let last_byte_index = match String::from_utf16(&input[..last_index as usize]) { Ok(s) => s.len(), Err(_) => { - return context - .throw_type_error("Failed to get byte index from utf16 encoded string") + return Ok(None); } }; - let r = matcher.find_from(input, last_byte_index).next(); + let r = matcher + .find_from(input.to_std_string_escaped().as_str(), last_byte_index) + .next(); match r { // c. If r is failure, then @@ -905,20 +909,27 @@ impl RegExp { // 13. Let e be r's endIndex value. let mut e = match_value.end(); + let lossy_input = input.to_std_string_escaped(); // 14. If fullUnicode is true, then - if unicode { + // TODO: disabled for now until we have UTF-16 support + if false { // e is an index into the Input character list, derived from S, matched by matcher. // Let eUTF be the smallest index into S that corresponds to the character at element e of Input. // If e is greater than or equal to the number of elements in Input, then eUTF is the number of code units in S. // b. Set e to eUTF. - e = input.split_at(e).0.encode_utf16().count(); + e = input.get(..e).map_or_else(|| input.len(), <[u16]>::len); } // 15. If global is true or sticky is true, then if global || sticky { // a. Perform ? Set(R, "lastIndex", 𝔽(e), true). - this.set("lastIndex", e, true, context)?; + this.set( + "lastIndex", + lossy_input[..e].encode_utf16().count(), + true, + context, + )?; } // 16. Let n be the number of elements in r's captures List. (This is the same value as 22.2.2.1's NcapturingParens.) @@ -939,11 +950,7 @@ impl RegExp { .expect("this CreateDataPropertyOrThrow call must not fail"); // 22. Let matchedSubstr be the substring of S from lastIndex to e. - let matched_substr = if let Some(s) = input.get(match_value.range()) { - s - } else { - "" - }; + let matched_substr = js_string!(&lossy_input[last_index as usize..e]); // 23. Perform ! CreateDataPropertyOrThrow(A, "0", matchedSubstr). a.create_data_property_or_throw(0, matched_substr, context) @@ -954,7 +961,7 @@ impl RegExp { let named_groups = match_value.named_groups(); let groups = if named_groups.clone().count() > 0 { // a. Let groups be ! OrdinaryObjectCreate(null). - let groups = JsValue::from(JsObject::empty()); + let groups = JsObject::empty(); // Perform 27.f here // f. If the ith capture of R was defined with a GroupName, then @@ -962,19 +969,15 @@ impl RegExp { // ii. Perform ! CreateDataPropertyOrThrow(groups, s, capturedValue). for (name, range) in named_groups { if let Some(range) = range { - let value = if let Some(s) = input.get(range.clone()) { - s - } else { - "" - }; + // TODO: Full UTF-16 regex support + let value = js_string!(&lossy_input[range.clone()]); groups - .to_object(context)? .create_data_property_or_throw(name, value, context) .expect("this CreateDataPropertyOrThrow call must not fail"); } } - groups + groups.into() } else { // a. Let groups be undefined. JsValue::undefined() @@ -994,13 +997,8 @@ impl RegExp { None => JsValue::undefined(), // c. Else if fullUnicode is true, then // d. Else, - Some(range) => { - if let Some(s) = input.get(range) { - s.into() - } else { - "".into() - } - } + // TODO: Full UTF-16 regex support + Some(range) => js_string!(&lossy_input[range]).into(), }; // e. Perform ! CreateDataPropertyOrThrow(A, ! ToString(𝔽(i)), capturedValue). @@ -1037,11 +1035,7 @@ impl RegExp { }; // 3. Let S be ? ToString(string). - let arg_str = args - .get(0) - .cloned() - .unwrap_or_default() - .to_string(context)?; + let arg_str = args.get_or_undefined(0).to_string(context)?; // 4. Let global be ! ToBoolean(? Get(rx, "global")). let global = rx.get("global", context)?.to_boolean(); @@ -1144,7 +1138,7 @@ impl RegExp { this.display() )); }; - Ok(format!("/{body}/{flags}").into()) + Ok(js_string!(utf16!("/"), &body, utf16!("/"), &flags).into()) } /// `RegExp.prototype[ @@matchAll ]( string )` @@ -1190,11 +1184,11 @@ impl RegExp { // 9. If flags contains "g", let global be true. // 10. Else, let global be false. - let global = flags.contains('g'); + let global = flags.contains(&('g' as u16)); // 11. If flags contains "u", let fullUnicode be true. // 12. Else, let fullUnicode be false. - let unicode = flags.contains('u'); + let unicode = flags.contains(&('u' as u16)); // 13. Return ! CreateRegExpStringIterator(matcher, S, global, fullUnicode). Ok(RegExpStringIterator::create_regexp_string_iterator( @@ -1234,14 +1228,10 @@ impl RegExp { }; // 3. Let S be ? ToString(string). - let arg_str = args - .get(0) - .cloned() - .unwrap_or_default() - .to_string(context)?; + let arg_str = args.get_or_undefined(0).to_string(context)?; // 4. Let lengthS be the number of code unit elements in S. - let length_arg_str = arg_str.encode_utf16().count(); + let length_arg_str = arg_str.len(); // 5. Let functionalReplace be IsCallable(replaceValue). let mut replace_value = args.get_or_undefined(1).clone(); @@ -1310,7 +1300,7 @@ impl RegExp { } // 12. Let accumulatedResult be the empty String. - let mut accumulated_result = JsString::new(""); + let mut accumulated_result = vec![]; // 13. Let nextSourcePosition be 0. let mut next_source_position = 0; @@ -1327,7 +1317,7 @@ impl RegExp { let matched = result.get("0", context)?.to_string(context)?; // d. Let matchLength be the number of code units in matched. - let match_length = matched.encode_utf16().count(); + let match_length = matched.len(); // e. Let position be ? ToIntegerOrInfinity(? Get(result, "index")). let position = result @@ -1336,19 +1326,7 @@ impl RegExp { // f. Set position to the result of clamping position between 0 and lengthS. //position = position. - let position = match position { - IntegerOrInfinity::Integer(i) => { - if i < 0 { - 0 - } else if i as usize > length_arg_str { - length_arg_str - } else { - i as usize - } - } - IntegerOrInfinity::PositiveInfinity => length_arg_str, - IntegerOrInfinity::NegativeInfinity => 0, - }; + let position = position.clamp_finite(0, length_arg_str as i64) as usize; // h. Let captures be a new empty List. let mut captures = Vec::new(); @@ -1357,7 +1335,7 @@ impl RegExp { // i. Repeat, while n ≤ nCaptures, for n in 1..=n_captures { // i. Let capN be ? Get(result, ! ToString(𝔽(n))). - let mut cap_n = result.get(n.to_string(), context)?; + let mut cap_n = result.get(n, context)?; // ii. If capN is not undefined, then if !cap_n.is_undefined() { @@ -1409,8 +1387,8 @@ impl RegExp { // ii. Let replacement be ? GetSubstitution(matched, S, position, captures, namedCaptures, replaceValue). replacement = string::get_substitution( - matched.as_str(), - arg_str.as_str(), + &matched, + &arg_str, position, &captures, &named_captures, @@ -1427,13 +1405,8 @@ impl RegExp { // In such cases, the corresponding substitution is ignored. // ii. Set accumulatedResult to the string-concatenation of accumulatedResult, // the substring of S from nextSourcePosition to position, and replacement. - accumulated_result = format!( - "{accumulated_result}{}{replacement}", - arg_str - .get(next_source_position..position) - .expect("index of a regexp match cannot be greater than the input string"), - ) - .into(); + accumulated_result.extend_from_slice(&arg_str[next_source_position..position]); + accumulated_result.extend_from_slice(&replacement); // iii. Set nextSourcePosition to position + matchLength. next_source_position = position + match_length; @@ -1442,18 +1415,11 @@ impl RegExp { // 15. If nextSourcePosition ≥ lengthS, return accumulatedResult. if next_source_position >= length_arg_str { - return Ok(accumulated_result.into()); + return Ok(js_string!(accumulated_result).into()); } // 16. Return the string-concatenation of accumulatedResult and the substring of S from nextSourcePosition. - Ok(format!( - "{}{}", - accumulated_result, - arg_str - .get(next_source_position..) - .expect("next_source_position cannot be greater than the input string") - ) - .into()) + Ok(js_string!(&accumulated_result[..], &arg_str[next_source_position..]).into()) } /// `RegExp.prototype[ @@search ]( string )` @@ -1482,11 +1448,7 @@ impl RegExp { }; // 3. Let S be ? ToString(string). - let arg_str = args - .get(0) - .cloned() - .unwrap_or_default() - .to_string(context)?; + let arg_str = args.get_or_undefined(0).to_string(context)?; // 4. Let previousLastIndex be ? Get(rx, "lastIndex"). let previous_last_index = rx.get("lastIndex", context)?; @@ -1543,11 +1505,7 @@ impl RegExp { }; // 3. Let S be ? ToString(string). - let arg_str = args - .get(0) - .cloned() - .unwrap_or_default() - .to_string(context)?; + let arg_str = args.get_or_undefined(0).to_string(context)?; // 4. Let C be ? SpeciesConstructor(rx, %RegExp%). let constructor = rx.species_constructor(StandardConstructors::regexp, context)?; @@ -1557,14 +1515,14 @@ impl RegExp { // 6. If flags contains "u", let unicodeMatching be true. // 7. Else, let unicodeMatching be false. - let unicode = flags.contains('u'); + let unicode = flags.contains(&('u' as u16)); // 8. If flags contains "y", let newFlags be flags. // 9. Else, let newFlags be the string-concatenation of flags and "y". - let new_flags = if flags.contains('y') { - flags.to_string() + let new_flags = if flags.contains(&('y' as u16)) { + flags } else { - format!("{flags}y") + js_string!(&flags, utf16!("y")) }; // 10. Let splitter be ? Construct(C, « rx, newFlags »). @@ -1594,7 +1552,7 @@ impl RegExp { } // 15. Let size be the length of S. - let size = arg_str.encode_utf16().count() as u64; + let size = arg_str.len() as u64; // 16. If size is 0, then if size == 0 { @@ -1642,13 +1600,7 @@ impl RegExp { q = advance_string_index(&arg_str, q, unicode); } else { // 1. Let T be the substring of S from p to q. - let arg_str_substring = String::from_utf16_lossy( - &arg_str - .encode_utf16() - .skip(p as usize) - .take((q - p) as usize) - .collect::>(), - ); + let arg_str_substring = js_string!(&arg_str[p as usize..q as usize]); // 2. Perform ! CreateDataPropertyOrThrow(A, ! ToString(𝔽(lengthA)), T). a.create_data_property_or_throw(length_a, arg_str_substring, context) @@ -1669,17 +1621,13 @@ impl RegExp { let mut number_of_captures = result.length_of_array_like(context)? as isize; // 7. Set numberOfCaptures to max(numberOfCaptures - 1, 0). - number_of_captures = if number_of_captures == 0 { - 0 - } else { - std::cmp::max(number_of_captures - 1, 0) - }; + number_of_captures = std::cmp::max(number_of_captures - 1, 0); // 8. Let i be 1. // 9. Repeat, while i ≤ numberOfCaptures, for i in 1..=number_of_captures { // a. Let nextCapture be ? Get(z, ! ToString(𝔽(i))). - let next_capture = result.get(i.to_string(), context)?; + let next_capture = result.get(i, context)?; // b. Perform ! CreateDataPropertyOrThrow(A, ! ToString(𝔽(lengthA)), nextCapture). a.create_data_property_or_throw(length_a, next_capture, context) @@ -1703,13 +1651,7 @@ impl RegExp { } // 20. Let T be the substring of S from p to size. - let arg_str_substring = String::from_utf16_lossy( - &arg_str - .encode_utf16() - .skip(p as usize) - .take((size - p) as usize) - .collect::>(), - ); + let arg_str_substring = js_string!(&arg_str[p as usize..size as usize]); // 21. Perform ! CreateDataPropertyOrThrow(A, ! ToString(𝔽(lengthA)), T). a.create_data_property_or_throw(length_a, arg_str_substring, context) @@ -1737,7 +1679,7 @@ fn advance_string_index(s: &JsString, index: u64, unicode: bool) -> u64 { } // 3. Let length be the number of code units in S. - let length = s.encode_utf16().count() as u64; + let length = s.len() as u64; // 4. If index + 1 ≥ length, return index + 1. if index + 1 > length { @@ -1745,8 +1687,7 @@ fn advance_string_index(s: &JsString, index: u64, unicode: bool) -> u64 { } // 5. Let cp be ! CodePointAt(S, index). - let cp = crate::builtins::string::code_point_at(s, index); + let code_point = s.code_point_at(index as usize); - // 6. Return index + cp.[[CodeUnitCount]]. - index + u64::from(cp.code_unit_count) + index + code_point.code_unit_count() as u64 } diff --git a/boa_engine/src/builtins/string/mod.rs b/boa_engine/src/builtins/string/mod.rs index 900346fc3d3..c3a97da9bb4 100644 --- a/boa_engine/src/builtins/string/mod.rs +++ b/boa_engine/src/builtins/string/mod.rs @@ -17,22 +17,20 @@ use super::JsArgs; use crate::{ builtins::{string::string_iterator::StringIterator, Array, BuiltIn, Number, RegExp}, context::intrinsics::StandardConstructors, + js_string, object::{ internal_methods::get_prototype_from_constructor, ConstructorBuilder, JsObject, ObjectData, }, property::{Attribute, PropertyDescriptor}, + string::utf16, + string::{CodePoint, Utf16Trim}, symbol::WellKnownSymbols, value::IntegerOrInfinity, Context, JsResult, JsString, JsValue, }; use boa_profiler::Profiler; -use std::{ - char::from_u32, - cmp::{max, min}, - string::String as StdString, -}; +use std::cmp::{max, min}; use tap::{Conv, Pipe}; -use unicode_normalization::UnicodeNormalization; #[derive(Clone, Copy, Eq, PartialEq)] pub(crate) enum Placement { @@ -40,89 +38,6 @@ pub(crate) enum Placement { End, } -/// Code point information for the `CodePointAt` abstract operation. -#[derive(Debug, Clone, Copy)] -pub(crate) struct CodePointInfo { - pub(crate) code_point: u32, - pub(crate) code_unit_count: u8, - pub(crate) is_unpaired_surrogate: bool, -} - -/// The `CodePointAt ( string, position )` abstract operation. -/// -/// The abstract operation `CodePointAt` takes arguments `string` (a String) and `position` (a -/// non-negative integer) and returns a Record with fields `[[CodePoint]]` (a code point), -/// `[[CodeUnitCount]]` (a positive integer), and `[[IsUnpairedSurrogate]]` (a Boolean). It -/// interprets string as a sequence of UTF-16 encoded code points, as described in 6.1.4, and reads -/// from it a single code point starting with the code unit at index `position`. -/// -/// More information: -/// - [ECMAScript reference][spec] -/// -/// [spec]: https://tc39.es/ecma262/#sec-codepointat -pub(crate) fn code_point_at(string: &JsString, position: u64) -> CodePointInfo { - let mut encoded = string.encode_utf16(); - - // 1. Let size be the length of string. - let size = encoded.clone().count() as u64; - - // 2. Assert: position ≥ 0 and position < size. - assert!(position < size); - - // 3. Let first be the code unit at index position within string. - let first = encoded - .nth(position as usize) - .expect("The callers of this function must've already checked bounds."); - - // 4. Let cp be the code point whose numeric value is that of first. - let cp = u32::from(first); - - // 5. If first is not a leading surrogate or trailing surrogate, then - if !is_leading_surrogate(first) && !is_trailing_surrogate(first) { - // a. Return the Record { [[CodePoint]]: cp, [[CodeUnitCount]]: 1, [[IsUnpairedSurrogate]]: false }. - return CodePointInfo { - code_point: cp, - code_unit_count: 1, - is_unpaired_surrogate: false, - }; - } - - // 6. If first is a trailing surrogate or position + 1 = size, then - if is_trailing_surrogate(first) || position + 1 == size { - // a. Return the Record { [[CodePoint]]: cp, [[CodeUnitCount]]: 1, [[IsUnpairedSurrogate]]: true }. - return CodePointInfo { - code_point: cp, - code_unit_count: 1, - is_unpaired_surrogate: true, - }; - } - - // 7. Let second be the code unit at index position + 1 within string. - let second = encoded - .next() - .expect("The callers of this function must've already checked bounds."); - - // 8. If second is not a trailing surrogate, then - if !is_trailing_surrogate(second) { - // a. Return the Record { [[CodePoint]]: cp, [[CodeUnitCount]]: 1, [[IsUnpairedSurrogate]]: true }. - return CodePointInfo { - code_point: cp, - code_unit_count: 1, - is_unpaired_surrogate: true, - }; - } - - // 9. Set cp to UTF16SurrogatePairToCodePoint(first, second). - let cp = (u32::from(first) - 0xD800) * 0x400 + (u32::from(second) - 0xDC00) + 0x10000; - - // 10. Return the Record { [[CodePoint]]: cp, [[CodeUnitCount]]: 2, [[IsUnpairedSurrogate]]: false }. - CodePointInfo { - code_point: cp, - code_unit_count: 2, - is_unpaired_surrogate: false, - } -} - /// Helper function to check if a `char` is trimmable. #[inline] pub(crate) fn is_trimmable_whitespace(c: char) -> bool { @@ -144,26 +59,6 @@ pub(crate) fn is_trimmable_whitespace(c: char) -> bool { ) } -/// Checks if the given code unit is a leading surrogate. -/// -/// More information: -/// - [ECMAScript reference][spec] -/// -/// [spec]: https://tc39.es/ecma262/#leading-surrogate -pub(crate) fn is_leading_surrogate(value: u16) -> bool { - (0xD800..=0xDBFF).contains(&value) -} - -/// Checks if the given code unit is a trailing surrogate. -/// -/// More information: -/// - [ECMAScript reference][spec] -/// -/// [spec]: https://tc39.es/ecma262/#trailing-surrogate -pub(crate) fn is_trailing_surrogate(value: u16) -> bool { - (0xDC00..=0xDFFF).contains(&value) -} - /// JavaScript `String` implementation. #[derive(Debug, Clone, Copy)] pub(crate) struct String; @@ -236,7 +131,7 @@ impl String { /// which can differ in JavaScript engines. In Boa it is `2^32 - 1` pub(crate) const MAX_STRING_LENGTH: usize = u32::MAX as usize; - /// `String( value )` + /// Constructor `String( value )` /// /// pub(crate) fn constructor( @@ -255,7 +150,7 @@ impl String { // b. Let s be ? ToString(value). Some(value) => value.to_string(context)?, // 1. If value is not present, let s be the empty String. - None => JsString::default(), + None => js_string!(), }; // 3. If NewTarget is undefined, return s. @@ -265,6 +160,7 @@ impl String { let prototype = get_prototype_from_constructor(new_target, StandardConstructors::string, context)?; + // 4. Return ! StringCreate(s, ? GetPrototypeFromConstructor(NewTarget, "%String.prototype%")). Ok(Self::string_create(string, prototype, context).into()) } @@ -278,7 +174,7 @@ impl String { /// [spec]: https://tc39.es/ecma262/#sec-stringcreate fn string_create(value: JsString, prototype: JsObject, context: &mut Context) -> JsObject { // 7. Let length be the number of code unit elements in value. - let len = value.encode_utf16().count(); + let len = value.len(); // 1. Let S be ! MakeBasicObject(« [[Prototype]], [[Extensible]], [[StringData]] »). // 2. Set S.[[Prototype]] to prototype. @@ -340,7 +236,9 @@ impl String { context: &mut Context, ) -> JsResult { // 1. Let result be the empty String. - let mut result = StdString::new(); + let mut result = Vec::with_capacity(args.len()); + + let mut buf = [0; 2]; // 2. For each element next of codePoints, do for arg in args.iter() { @@ -357,17 +255,19 @@ impl String { return Err(context.construct_range_error(format!("invalid code point: {nextcp}"))); } - // TODO: Full UTF-16 support + let nextcp = + char::from_u32(nextcp as u32).expect("Checked above the range of `nextcp`"); + // d. Set result to the string-concatenation of result and ! UTF16EncodeCodePoint(ℝ(nextCP)). - result.push(char::try_from(nextcp as u32).unwrap_or('\u{FFFD}' /* replacement char */)); + result.extend_from_slice(nextcp.encode_utf16(&mut buf)); } // 3. Assert: If codePoints is empty, then result is the empty String. // 4. Return result. - Ok(result.into()) + Ok(js_string!(&result[..]).into()) } - /// `String.prototype.raw( template, ...substitutions )` + /// `String.raw( template, ...substitutions )` /// /// More information: /// - [ECMAScript reference][spec] @@ -393,7 +293,7 @@ impl String { // 5. If literalSegments ≤ 0, return the empty String. // This is not <= because a `usize` is always positive. if literal_segments == 0 { - return Ok(JsString::empty().into()); + return Ok(js_string!().into()); } // 6. Let stringElements be a new empty List. @@ -410,13 +310,13 @@ impl String { let next_seg = raw.get(next_key, context)?.to_string(context)?; // c. Append the code unit elements of nextSeg to the end of stringElements. - string_elements.extend(next_seg.encode_utf16()); + string_elements.extend(next_seg.iter().copied()); // d. If nextIndex + 1 = literalSegments, then if next_index + 1 == literal_segments { // i. Return the String value whose code units are the elements in the List stringElements. // If stringElements has no elements, the empty String is returned. - return Ok(StdString::from_utf16_lossy(&string_elements).into()); + return Ok(js_string!(string_elements).into()); } // e. If nextIndex < numberOfSubstitutions, let next be substitutions[nextIndex]. @@ -425,14 +325,14 @@ impl String { // f. Else, let next be the empty String. } else { - JsString::empty().into() + js_string!().into() }; // g. Let nextSub be ? ToString(next). let next_sub = next.to_string(context)?; // h. Append the code unit elements of nextSub to the end of stringElements. - string_elements.extend(next_sub.encode_utf16()); + string_elements.extend(next_sub.iter().copied()); // i. Set nextIndex to nextIndex + 1. next_index += 1; @@ -464,9 +364,7 @@ impl String { } // 3. Return result. - Ok(JsValue::String(JsString::new( - std::string::String::from_utf16_lossy(&result), - ))) + Ok(js_string!(result).into()) } /// `String.prototype.toString ( )` @@ -513,26 +411,18 @@ impl String { // 2. Let S be ? ToString(O). let string = this.to_string(context)?; - // 4. Let size be the length of S. - let size = string.encode_utf16().count() as i64; - // 3. Let position be ? ToIntegerOrInfinity(pos). - match args.get_or_undefined(0).to_integer_or_infinity(context)? { - IntegerOrInfinity::Integer(position) if (0..size).contains(&position) => { - // 6. Return the substring of S from position to position + 1. - let char = string - .encode_utf16() - .nth(position as usize) - .expect("Already checked bounds above"); - - Ok(char::try_from(u32::from(char)) - .unwrap_or('\u{FFFD}' /* replacement char */) - .into()) - } - _ => { - // 5. If position < 0 or position ≥ size, return the empty String. - Ok("".into()) + let position = args.get_or_undefined(0).to_integer_or_infinity(context)?; + + match position { + // 4. Let size be the length of S. + // 6. Return the substring of S from position to position + 1. + IntegerOrInfinity::Integer(i) if i >= 0 && i < string.len() as i64 => { + let i = i as usize; + Ok(js_string!(&string[i..=i]).into()) } + // 5. If position < 0 or position ≥ size, return the empty String. + _ => Ok(js_string!().into()), } } @@ -555,7 +445,7 @@ impl String { let s = this.to_string(context)?; // 3. Let len be the length of S. - let len = s.encode_utf16().count() as i64; + let len = s.len() as i64; // 4. Let relativeIndex be ? ToIntegerOrInfinity(index). let relative_index = args.get_or_undefined(0).to_integer_or_infinity(context)?; @@ -571,13 +461,7 @@ impl String { }; // 8. Return the substring of S from k to k + 1. - if let Some(utf16_val) = s.encode_utf16().nth(k) { - Ok(JsValue::new( - from_u32(u32::from(utf16_val)).expect("invalid utf-16 character"), - )) - } else { - Ok(JsValue::undefined()) - } + Ok(js_string!(&s[k..=k]).into()) } /// `String.prototype.codePointAt( index )` @@ -608,14 +492,15 @@ impl String { // 3. Let position be ? ToIntegerOrInfinity(pos). let position = args.get_or_undefined(0).to_integer_or_infinity(context)?; - // 4. Let size be the length of S. - let size = string.encode_utf16().count() as i64; - match position { - IntegerOrInfinity::Integer(position) if (0..size).contains(&position) => { + // 4. Let size be the length of S. + IntegerOrInfinity::Integer(i) if i >= 0 && i < string.len() as i64 => { // 6. Let cp be ! CodePointAt(S, position). // 7. Return 𝔽(cp.[[CodePoint]]). - Ok(code_point_at(&string, position as u64).code_point.into()) + Ok(string + .code_point_at(usize::try_from(i).expect("already checked that i >= 0")) + .as_u32() + .into()) } // 5. If position < 0 or position ≥ size, return undefined. _ => Ok(JsValue::undefined()), @@ -650,19 +535,11 @@ impl String { // 3. Let position be ? ToIntegerOrInfinity(pos). let position = args.get_or_undefined(0).to_integer_or_infinity(context)?; - // 4. Let size be the length of S. - let size = string.encode_utf16().count() as i64; - match position { - IntegerOrInfinity::Integer(position) if (0..size).contains(&position) => { + // 4. Let size be the length of S. + IntegerOrInfinity::Integer(i) if i >= 0 && i < string.len() as i64 => { // 6. Return the Number value for the numeric value of the code unit at index position within the String S. - let char_code = u32::from( - string - .encode_utf16() - .nth(position as usize) - .expect("Already checked bounds above."), - ); - Ok(char_code.into()) + Ok(u32::from(string[i as usize]).into()) } // 5. If position < 0 or position ≥ size, return NaN. _ => Ok(JsValue::nan()), @@ -692,14 +569,14 @@ impl String { let this = this.require_object_coercible(context)?; // 2. Let S be ? ToString(O). - let mut string = this.to_string(context)?.to_string(); + let mut string = this.to_string(context)?; // 3. Let R be S. // 4. For each element next of args, do for arg in args { // a. Let nextString be ? ToString(next). // b. Set R to the string-concatenation of R and nextString. - string.push_str(&arg.to_string(context)?); + string = js_string!(&string, &arg.to_string(context)?); } // 5. Return R. @@ -728,7 +605,7 @@ impl String { // 2. Let S be ? ToString(O). let string = this.to_string(context)?; - let len = string.encode_utf16().count(); + let len = string.len(); // 3. Let n be ? ToIntegerOrInfinity(count). match args.get_or_undefined(0).to_integer_or_infinity(context)? { @@ -736,20 +613,20 @@ impl String { if n > 0 && (n as usize) * len <= Self::MAX_STRING_LENGTH => { if string.is_empty() { - return Ok("".into()); + return Ok(js_string!().into()); } let n = n as usize; - let mut result = std::string::String::with_capacity(n * len); + let mut result = Vec::with_capacity(n * len); std::iter::repeat(&string[..]) .take(n) - .for_each(|s| result.push_str(s)); + .for_each(|s| result.extend_from_slice(s)); // 6. Return the String value that is made from n copies of S appended together. - Ok(result.into()) + Ok(js_string!(result).into()) } // 5. If n is 0, return the empty String. - IntegerOrInfinity::Integer(n) if n == 0 => Ok("".into()), + IntegerOrInfinity::Integer(n) if n == 0 => Ok(js_string!().into()), // 4. If n < 0 or n is +∞, throw a RangeError exception. _ => context.throw_range_error( "repeat count must be a positive finite number \ @@ -780,7 +657,7 @@ impl String { let string = this.to_string(context)?; // 3. Let len be the length of S. - let len = string.encode_utf16().count() as i64; + let len = string.len() as i64; // 4. Let intStart be ? ToIntegerOrInfinity(start). let from = match args.get_or_undefined(0).to_integer_or_infinity(context)? { @@ -816,13 +693,10 @@ impl String { // 12. If from ≥ to, return the empty String. if from >= to { - Ok("".into()) + Ok(js_string!().into()) } else { // 13. Return the substring of S from from to to. - let span = to - from; - let substring_utf16: Vec = string.encode_utf16().skip(from).take(span).collect(); - let substring_lossy = StdString::from_utf16_lossy(&substring_utf16); - Ok(substring_lossy.into()) + Ok(js_string!(&string[from..to]).into()) } } @@ -861,7 +735,7 @@ impl String { let search_string = search_string.to_string(context)?; // 6. Let len be the length of S. - let len = string.encode_utf16().count() as i64; + let len = string.len() as i64; // 7. If position is undefined, let pos be 0; else let pos be ? ToIntegerOrInfinity(position). let pos = match args.get_or_undefined(1) { @@ -873,7 +747,7 @@ impl String { let start = pos.clamp_finite(0, len) as usize; // 9. Let searchLength be the length of searchStr. - let search_length = search_string.encode_utf16().count(); + let search_length = search_string.len(); // 10. If searchLength = 0, return true. if search_length == 0 { @@ -891,9 +765,7 @@ impl String { // 14. Return ! SameValueNonNumeric(substring, searchStr). // `SameValueNonNumeric` forwards to `==`, so directly check // equality to avoid converting to `JsValue` - let substring_utf16 = string.encode_utf16().skip(start).take(search_length); - let search_str_utf16 = search_string.encode_utf16(); - Ok(JsValue::new(substring_utf16.eq(search_str_utf16))) + Ok(JsValue::new(search_string == string[start..end])) } } @@ -931,7 +803,7 @@ impl String { }; // 6. Let len be the length of S. - let len = string.encode_utf16().count() as i64; + let len = string.len() as i64; // 7. If endPosition is undefined, let pos be len; else let pos be ? ToIntegerOrInfinity(endPosition). let end = match args.get_or_undefined(1) { @@ -943,7 +815,7 @@ impl String { let end = end.clamp_finite(0, len) as usize; // 9. Let searchLength be the length of searchStr. - let search_length = search_str.encode_utf16().count(); + let search_length = search_str.len(); // 10. If searchLength = 0, return true. if search_length == 0 { @@ -956,11 +828,7 @@ impl String { // 14. Return ! SameValueNonNumeric(substring, searchStr). // `SameValueNonNumeric` forwards to `==`, so directly check // equality to avoid converting to `JsValue` - - let substring_utf16 = string.encode_utf16().skip(start).take(search_length); - let search_str_utf16 = search_str.encode_utf16(); - - Ok(JsValue::new(substring_utf16.eq(search_str_utf16))) + Ok(JsValue::new(search_str == string[start..end])) } else { // 12. If start < 0, return false. Ok(false.into()) @@ -1006,7 +874,7 @@ impl String { // 8. Let len be the length of S. // 9. Let start be the result of clamping pos between 0 and len. - let start = pos.clamp_finite(0, string.encode_utf16().count() as i64) as usize; + let start = pos.clamp_finite(0, string.len() as i64) as usize; // 10. Let index be ! StringIndexOf(S, searchStr, start). // 11. If index is not -1, return true. @@ -1084,9 +952,7 @@ impl String { }; // 10. Let preserved be the substring of string from 0 to position. - let preserved = StdString::from_utf16_lossy( - &this_str.encode_utf16().take(position).collect::>(), - ); + let preserved = &this_str[..position]; // 11. If functionalReplace is true, then // 12. Else, @@ -1106,8 +972,8 @@ impl String { // c. Let replacement be ! GetSubstitution(searchString, string, position, captures, undefined, replaceValue). get_substitution( - search_str.as_str(), - this_str.as_str(), + &search_str, + &this_str, position, &captures, &JsValue::undefined(), @@ -1117,14 +983,10 @@ impl String { }; // 13. Return the string-concatenation of preserved, replacement, and the substring of string from position + searchLength. - Ok(format!( - "{preserved}{replacement}{}", - StdString::from_utf16_lossy( - &this_str - .encode_utf16() - .skip(position + search_length) - .collect::>() - ) + Ok(js_string!( + preserved, + &replacement, + &this_str[position + search_length..] ) .into()) } @@ -1169,7 +1031,7 @@ impl String { flags.require_object_coercible(context)?; // iii. If ? ToString(flags) does not contain "g", throw a TypeError exception. - if !flags.to_string(context)?.contains('g') { + if !flags.to_string(context)?.contains(&('g' as u16)) { return context.throw_type_error( "String.prototype.replaceAll called with a non-global RegExp argument", ); @@ -1208,7 +1070,7 @@ impl String { }; // 7. Let searchLength be the length of searchString. - let search_length = search_string.encode_utf16().count(); + let search_length = search_string.len(); // 8. Let advanceBy be max(1, searchLength). let advance_by = max(1, search_length); @@ -1232,19 +1094,12 @@ impl String { let mut end_of_last_match = 0; // 13. Let result be the empty String. - let mut result = JsString::new(""); + let mut result = Vec::with_capacity(string.len()); // 14. For each element p of matchPositions, do for p in match_positions { // a. Let preserved be the substring of string from endOfLastMatch to p. - let preserved = StdString::from_utf16_lossy( - &string - .clone() - .encode_utf16() - .skip(end_of_last_match) - .take(p - end_of_last_match) - .collect::>(), - ); + let preserved = &string[end_of_last_match..p]; // c. Else, let replacement = if let Some(ref replace_value) = replace_value_string { @@ -1279,29 +1134,21 @@ impl String { }; // d. Set result to the string-concatenation of result, preserved, and replacement. - result = JsString::new(format!("{}{preserved}{replacement}", result.as_str())); + result.extend_from_slice(preserved); + result.extend_from_slice(&replacement); // e. Set endOfLastMatch to p + searchLength. end_of_last_match = p + search_length; } // 15. If endOfLastMatch < the length of string, then - if end_of_last_match < string.encode_utf16().count() { + if end_of_last_match < string.len() { // a. Set result to the string-concatenation of result and the substring of string from endOfLastMatch. - result = JsString::new(format!( - "{}{}", - result.as_str(), - &StdString::from_utf16_lossy( - &string - .encode_utf16() - .skip(end_of_last_match) - .collect::>() - ) - )); + result.extend_from_slice(&string[end_of_last_match..]); } // 16. Return result. - Ok(result.into()) + Ok(js_string!(result).into()) } /// `String.prototype.indexOf( searchValue[, fromIndex] )` @@ -1336,7 +1183,7 @@ impl String { let pos = args.get_or_undefined(1).to_integer_or_infinity(context)?; // 6. Let len be the length of S. - let len = string.encode_utf16().count() as i64; + let len = string.len() as i64; // 7. Let start be the result of clamping pos between 0 and len. let start = pos.clamp_finite(0, len) as usize; @@ -1389,7 +1236,7 @@ impl String { }; // 7. Let len be the length of S. - let len = string.encode_utf16().count(); + let len = string.len(); // 8. Let start be the result of clamping pos between 0 and len. let start = pos.clamp_finite(0, len as i64) as usize; @@ -1399,17 +1246,19 @@ impl String { } // 10. Let searchLen be the length of searchStr. - let search_len = search_str.encode_utf16().count(); - - // 11. For each non-negative integer i starting with start such that i ≤ len - searchLen, in descending order, do - // a. Let candidate be the substring of S from i to i + searchLen. - let substring_utf16: Vec = string.encode_utf16().take(start + search_len).collect(); - let substring_lossy = StdString::from_utf16_lossy(&substring_utf16); - if let Some(position) = substring_lossy.rfind(search_str.as_str()) { - // b. If candidate is the same sequence of code units as searchStr, return 𝔽(i). - return Ok(JsValue::new( - substring_lossy[..position].encode_utf16().count(), - )); + let search_len = search_str.len(); + + if let Some(end) = len.checked_sub(search_len) { + // 11. For each non-negative integer i starting with start such that i ≤ len - searchLen, in descending order, do + for i in (0..=min(start, end)).rev() { + // a. Let candidate be the substring of S from i to i + searchLen. + let candidate = &string[i..i + search_len]; + + // b. If candidate is the same sequence of code units as searchStr, return 𝔽(i). + if candidate == &search_str { + return Ok(i.into()); + } + } } // 12. Return -1𝔽. @@ -1465,7 +1314,7 @@ impl String { /// - [ECMAScript reference][spec] /// /// [spec]: https://tc39.es/ecma262/#sec-stringpad - fn string_pad( + pub(crate) fn string_pad( object: &JsValue, max_length: &JsValue, fill_string: &JsValue, @@ -1479,7 +1328,7 @@ impl String { let int_max_length = max_length.to_length(context)?; // 3. Let stringLength be the length of S. - let string_length = string.encode_utf16().count() as u64; + let string_length = string.len() as u64; // 4. If intMaxLength ≤ stringLength, return S. if int_max_length <= string_length { @@ -1488,7 +1337,7 @@ impl String { // 5. If fillString is undefined, let filler be the String value consisting solely of the code unit 0x0020 (SPACE). let filler = if fill_string.is_undefined() { - "\u{0020}".into() + js_string!("\u{0020}") } else { // 6. Else, let filler be ? ToString(fillString). fill_string.to_string(context)? @@ -1501,7 +1350,7 @@ impl String { // 8. Let fillLen be intMaxLength - stringLength. let fill_len = int_max_length - string_length; - let filler_len = filler.encode_utf16().count() as u64; + let filler_len = filler.len() as u64; // 9. Let truncatedStringFiller be the String value consisting of repeated // concatenations of filler truncated to length fillLen. @@ -1515,20 +1364,15 @@ impl String { } }; - let truncated_string_filler = filler - .repeat(repetitions as usize) - .encode_utf16() - .take(fill_len as usize) - .collect::>(); - let truncated_string_filler = - std::string::String::from_utf16_lossy(truncated_string_filler.as_slice()); + let truncated_string_filler = filler.repeat(repetitions as usize); + let truncated_string_filler = &truncated_string_filler[..fill_len as usize]; // 10. If placement is start, return the string-concatenation of truncatedStringFiller and S. if placement == Placement::Start { - Ok(format!("{truncated_string_filler}{string}").into()) + Ok(js_string!(truncated_string_filler, &string).into()) } else { // 11. Else, return the string-concatenation of S and truncatedStringFiller. - Ok(format!("{string}{truncated_string_filler}").into()) + Ok(js_string!(&string, truncated_string_filler).into()) } } @@ -1582,7 +1426,7 @@ impl String { let max_length = args.get_or_undefined(0); let fill_string = args.get_or_undefined(1); - // 2. Return ? StringPad(O, maxLength, fillString, start). + // 2. Return ? StringPad(O, maxLength, fillString, end). Self::string_pad(this, max_length, fill_string, Placement::Start, context) } @@ -1599,9 +1443,11 @@ impl String { /// [spec]: https://tc39.es/ecma262/#sec-string.prototype.trim /// [mdn]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/String/trim pub(crate) fn trim(this: &JsValue, _: &[JsValue], context: &mut Context) -> JsResult { + // 1. Let S be the this value. + // 2. Return ? TrimString(S, start+end). let object = this.require_object_coercible(context)?; let string = object.to_string(context)?; - Ok(JsValue::new(string.trim_matches(is_trimmable_whitespace))) + Ok(js_string!(string.trim()).into()) } /// `String.prototype.trimStart()` @@ -1621,11 +1467,11 @@ impl String { _: &[JsValue], context: &mut Context, ) -> JsResult { - let this = this.require_object_coercible(context)?; - let string = this.to_string(context)?; - Ok(JsValue::new( - string.trim_start_matches(is_trimmable_whitespace), - )) + // 1. Let S be the this value. + // 2. Return ? TrimString(S, start). + let object = this.require_object_coercible(context)?; + let string = object.to_string(context)?; + Ok(js_string!(string.trim_start()).into()) } /// String.prototype.trimEnd() @@ -1645,11 +1491,11 @@ impl String { _: &[JsValue], context: &mut Context, ) -> JsResult { - let this = this.require_object_coercible(context)?; - let string = this.to_string(context)?; - Ok(JsValue::new( - string.trim_end_matches(is_trimmable_whitespace), - )) + // 1. Let S be the this value. + // 2. Return ? TrimString(S, end). + let object = this.require_object_coercible(context)?; + let string = object.to_string(context)?; + Ok(js_string!(string.trim_end()).into()) } /// `String.prototype.toLowerCase()` @@ -1674,12 +1520,38 @@ impl String { // 2. Let S be ? ToString(O). let string = this.to_string(context)?; + let mut code_points = string.code_points(); + let mut lower_text = Vec::with_capacity(string.len()); + let mut next_unpaired_surrogate = None; + // 3. Let sText be ! StringToCodePoints(S). // 4. Let lowerText be the result of toLowercase(sText), according to // the Unicode Default Case Conversion algorithm. + loop { + let only_chars = code_points + .by_ref() + .map_while(|cpoint| match cpoint { + CodePoint::Unicode(c) => Some(c), + CodePoint::UnpairedSurrogate(s) => { + next_unpaired_surrogate = Some(s); + None + } + }) + .collect::() + .to_lowercase(); + + lower_text.extend(only_chars.encode_utf16()); + + if let Some(surr) = next_unpaired_surrogate.take() { + lower_text.push(surr); + } else { + break; + } + } + // 5. Let L be ! CodePointsToString(lowerText). // 6. Return L. - Ok(JsValue::new(string.to_lowercase())) + Ok(js_string!(lower_text).into()) } /// `String.prototype.toUpperCase()` @@ -1711,12 +1583,38 @@ impl String { // 2. Let S be ? ToString(O). let string = this.to_string(context)?; + let mut code_points = string.code_points(); + let mut upper_text = Vec::with_capacity(string.len()); + let mut next_unpaired_surrogate = None; + // 3. Let sText be ! StringToCodePoints(S). // 4. Let upperText be the result of toUppercase(sText), according to // the Unicode Default Case Conversion algorithm. + loop { + let only_chars = code_points + .by_ref() + .map_while(|cpoint| match cpoint { + CodePoint::Unicode(c) => Some(c), + CodePoint::UnpairedSurrogate(s) => { + next_unpaired_surrogate = Some(s); + None + } + }) + .collect::() + .to_uppercase(); + + upper_text.extend(only_chars.encode_utf16()); + + if let Some(surr) = next_unpaired_surrogate.take() { + upper_text.push(surr); + } else { + break; + } + } + // 5. Let L be ! CodePointsToString(upperText). // 6. Return L. - Ok(JsValue::new(string.to_uppercase())) + Ok(js_string!(upper_text).into()) } /// `String.prototype.substring( indexStart[, indexEnd] )` @@ -1741,7 +1639,7 @@ impl String { let string = this.to_string(context)?; // 3. Let len be the length of S. - let len = string.encode_utf16().count() as i64; + let len = string.len() as i64; // 4. Let intStart be ? ToIntegerOrInfinity(start). let int_start = args.get_or_undefined(0).to_integer_or_infinity(context)?; @@ -1765,13 +1663,7 @@ impl String { let to = max(final_start, final_end); // 10. Return the substring of S from from to to. - // Extract the part of the string contained between the from index and the to index - // where from is guaranteed to be smaller or equal to to - // TODO: Full UTF-16 support - let substring_utf16: Vec = string.encode_utf16().skip(from).take(to - from).collect(); - let substring = StdString::from_utf16_lossy(&substring_utf16); - - Ok(substring.into()) + Ok(js_string!(&string[from..to]).into()) } /// `String.prototype.substr( start[, length] )` @@ -1797,7 +1689,7 @@ impl String { let string = this.to_string(context)?; // 3. Let size be the length of S. - let size = string.encode_utf16().count() as i64; + let size = string.len() as i64; // 4. Let intStart be ? ToIntegerOrInfinity(start). let int_start = args.get_or_undefined(0).to_integer_or_infinity(context)?; @@ -1814,7 +1706,7 @@ impl String { IntegerOrInfinity::Integer(i) if i < 0 => max(size + i, 0), IntegerOrInfinity::Integer(i) => i, // 8. If intStart is +∞, ... return the empty String - IntegerOrInfinity::PositiveInfinity => return Ok("".into()), + IntegerOrInfinity::PositiveInfinity => return Ok(js_string!().into()), // 5. If intStart is -∞, set intStart to 0. IntegerOrInfinity::NegativeInfinity => 0, } as usize; @@ -1822,22 +1714,19 @@ impl String { // 8. If ... intLength ≤ 0, or intLength is +∞, return the empty String. let int_length = match int_length { IntegerOrInfinity::Integer(i) if i > 0 => i, - _ => return Ok("".into()), + _ => return Ok(js_string!().into()), } as usize; // 9. Let intEnd be min(intStart + intLength, size). let int_end = min(int_start + int_length, size as usize); - // 11. Return the substring of S from intStart to intEnd. - // 10. If intStart ≥ intEnd, return the empty String. - let substring_utf16: Vec = string - .encode_utf16() - .skip(int_start) - .take(int_end - int_start) - .collect(); - let substring = StdString::from_utf16_lossy(&substring_utf16); - - Ok(substring.into()) + if let Some(substr) = string.get(int_start..int_end) { + // 11. Return the substring of S from intStart to intEnd. + Ok(js_string!(substr).into()) + } else { + // 10. If intStart ≥ intEnd, return the empty String. + Ok(js_string!().into()) + } } /// `String.prototype.split ( separator, limit )` @@ -1876,120 +1765,88 @@ impl String { // 3. Let S be ? ToString(O). let this_str = this.to_string(context)?; - // 4. Let A be ! ArrayCreate(0). - let a = Array::array_create(0, None, context)?; - - // 5. Let lengthA be 0. - let mut length_a = 0; - - // 6. If limit is undefined, let lim be 2^32 - 1; else let lim be ℝ(? ToUint32(limit)). + // 4. If limit is undefined, let lim be 2^32 - 1; else let lim be ℝ(? ToUint32(limit)). let lim = if limit.is_undefined() { u32::MAX } else { limit.to_u32(context)? - }; + } as usize; - // 7. Let R be ? ToString(separator). + // 5. Let R be ? ToString(separator). let separator_str = separator.to_string(context)?; - // 8. If lim = 0, return A. + // 6. If lim = 0, return A. if lim == 0 { - return Ok(a.into()); + // a. Return ! CreateArrayFromList(« »). + return Ok(Array::create_array_from_list([], context).into()); } - // 9. If separator is undefined, then + // 7. If separator is undefined, then if separator.is_undefined() { - // a. Perform ! CreateDataPropertyOrThrow(A, "0", S). - a.create_data_property_or_throw(0, this_str, context) - .expect("this CreateDataPropertyOrThrow call must not fail"); - - // b. Return A. - return Ok(a.into()); + // a. Return ! CreateArrayFromList(« S »). + return Ok(Array::create_array_from_list([this_str.into()], context).into()); } - // 10. Let s be the length of S. - let this_str_length = this_str.encode_utf16().count(); - - // 11. If s = 0, then - if this_str_length == 0 { - // a. If R is not the empty String, then - if !separator_str.is_empty() { - // i. Perform ! CreateDataPropertyOrThrow(A, "0", S). - a.create_data_property_or_throw(0, this_str, context) - .expect("this CreateDataPropertyOrThrow call must not fail"); - } - - // b. Return A. - return Ok(a.into()); + // 8. Let separatorLength be the length of R. + let separator_length = separator_str.len(); + + // 9. If separatorLength is 0, then + if separator_length == 0 { + // a. Let head be the substring of S from 0 to lim. + // b. Let codeUnits be a List consisting of the sequence of code units that are the elements of head. + let head = this_str + .get(..lim) + .unwrap_or(&this_str[..]) + .iter() + .map(|code| js_string!(std::slice::from_ref(code)).into()); + // c. Return ! CreateArrayFromList(codeUnits). + return Ok(Array::create_array_from_list(head, context).into()); } - // 12. Let p be 0. - // 13. Let q be p. - let mut p = 0; - let mut q = p; - - // 14. Repeat, while q ≠ s, - while q != this_str_length { - // a. Let e be SplitMatch(S, q, R). - let e = split_match(&this_str, q, &separator_str); - - match e { - // b. If e is not-matched, set q to q + 1. - None => q += 1, - // c. Else, - Some(e) => { - // i. Assert: e is a non-negative integer ≤ s. - // ii. If e = p, set q to q + 1. - // iii. Else, - if e == p { - q += 1; - } else { - // 1. Let T be the substring of S from p to q. - let this_str_substring = StdString::from_utf16_lossy( - &this_str - .encode_utf16() - .skip(p) - .take(q - p) - .collect::>(), - ); + // 10. If S is the empty String, return ! CreateArrayFromList(« S »). + if this_str.is_empty() { + return Ok(Array::create_array_from_list([this_str.into()], context).into()); + } - // 2. Perform ! CreateDataPropertyOrThrow(A, ! ToString(𝔽(lengthA)), T). - a.create_data_property_or_throw(length_a, this_str_substring, context) - .expect("this CreateDataPropertyOrThrow call must not fail"); + // 11. Let substrings be a new empty List. + let mut substrings = vec![]; - // 3. Set lengthA to lengthA + 1. - length_a += 1; + // 12. Let i be 0. + let mut i = 0; - // 4. If lengthA = lim, return A. - if length_a == lim { - return Ok(a.into()); - } + // 13. Let j be ! StringIndexOf(S, R, 0). + let mut j = this_str.index_of(&separator_str, 0); - // 5. Set p to e. - p = e; + // 14. Repeat, while j is not -1 + while let Some(index) = j { + // a. Let T be the substring of S from i to j. + // b. Append T as the last element of substrings. + substrings.push(js_string!(&this_str[i..index])); - // 6. Set q to p. - q = p; - } - } + // c. If the number of elements of substrings is lim, return ! CreateArrayFromList(substrings). + if substrings.len() == lim { + return Ok(Array::create_array_from_list( + substrings.into_iter().map(JsValue::from), + context, + ) + .into()); } + // d. Set i to j + separatorLength. + i = index + separator_length; + + // e. Set j to ! StringIndexOf(S, R, i). + j = this_str.index_of(&separator_str, i); } - // 15. Let T be the substring of S from p to s. - let this_str_substring = StdString::from_utf16_lossy( - &this_str - .encode_utf16() - .skip(p) - .take(this_str_length - p) - .collect::>(), - ); - - // 16. Perform ! CreateDataPropertyOrThrow(A, ! ToString(𝔽(lengthA)), T). - a.create_data_property_or_throw(length_a, this_str_substring, context) - .expect("this CreateDataPropertyOrThrow call must not fail"); - - // 17. Return A. - Ok(a.into()) + // 15. Let T be the substring of S from i. + // 16. Append T to substrings. + substrings.push(js_string!(&this_str[i..])); + + // 17. Return ! CreateArrayFromList(substrings). + Ok( + Array::create_array_from_list(substrings.into_iter().map(JsValue::from), context) + .into(), + ) } /// String.prototype.valueOf() @@ -2004,7 +1861,7 @@ impl String { /// [mdn]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/String/valueOf pub(crate) fn value_of( this: &JsValue, - _: &[JsValue], + _args: &[JsValue], context: &mut Context, ) -> JsResult { // 1. Return ? thisStringValue(this value). @@ -2036,18 +1893,20 @@ impl String { if !regexp.is_null_or_undefined() { // a. Let isRegExp be ? IsRegExp(regexp). // b. If isRegExp is true, then - if let Some(regexp_obj) = regexp.as_object().filter(|obj| obj.is_regexp()) { - // i. Let flags be ? Get(regexp, "flags"). - let flags = regexp_obj.get("flags", context)?; - - // ii. Perform ? RequireObjectCoercible(flags). - flags.require_object_coercible(context)?; - - // iii. If ? ToString(flags) does not contain "g", throw a TypeError exception. - if !flags.to_string(context)?.contains('g') { - return context.throw_type_error( - "String.prototype.matchAll called with a non-global RegExp argument", - ); + if let Some(regexp_obj) = regexp.as_object() { + if is_reg_exp_object(regexp_obj, context)? { + // i. Let flags be ? Get(regexp, "flags"). + let flags = regexp_obj.get("flags", context)?; + + // ii. Perform ? RequireObjectCoercible(flags). + flags.require_object_coercible(context)?; + + // iii. If ? ToString(flags) does not contain "g", throw a TypeError exception. + if !flags.to_string(context)?.contains(&('g' as u16)) { + return context.throw_type_error( + "String.prototype.matchAll called with a non-global RegExp argument", + ); + } } } // c. Let matcher be ? GetMethod(regexp, @@matchAll). @@ -2062,7 +1921,7 @@ impl String { let s = o.to_string(context)?; // 4. Let rx be ? RegExpCreate(regexp, "g"). - let rx = RegExp::create(regexp, &JsValue::new("g"), context)?; + let rx = RegExp::create(regexp, &JsValue::new(js_string!("g")), context)?; // 5. Return ? Invoke(rx, @@matchAll, « S »). rx.invoke(WellKnownSymbols::match_all(), &[JsValue::new(s)], context) @@ -2083,38 +1942,91 @@ impl String { args: &[JsValue], context: &mut Context, ) -> JsResult { + use unicode_normalization::UnicodeNormalization; + /// Represents the type of normalization applied to a [`JsString`] + #[derive(Clone, Copy)] + pub(crate) enum Normalization { + Nfc, + Nfd, + Nfkc, + Nfkd, + } // 1. Let O be ? RequireObjectCoercible(this value). let this = this.require_object_coercible(context)?; // 2. Let S be ? ToString(O). let s = this.to_string(context)?; - let form = args.get_or_undefined(0); - - let f_str; - - let f = if form.is_undefined() { + let f = match args.get_or_undefined(0) { // 3. If form is undefined, let f be "NFC". - "NFC" - } else { + &JsValue::Undefined => js_string!("NFC"), // 4. Else, let f be ? ToString(form). - f_str = form.to_string(context)?; - f_str.as_str() + form => form.to_string(context)?, }; // 6. Let ns be the String value that is the result of normalizing S // into the normalization form named by f as specified in // https://unicode.org/reports/tr15/. - // 7. Return ns. - match f { - "NFC" => Ok(JsValue::new(s.nfc().collect::())), - "NFD" => Ok(JsValue::new(s.nfd().collect::())), - "NFKC" => Ok(JsValue::new(s.nfkc().collect::())), - "NFKD" => Ok(JsValue::new(s.nfkd().collect::())), + let normalization = match f { + ntype if &ntype == utf16!("NFC") => Normalization::Nfc, + ntype if &ntype == utf16!("NFD") => Normalization::Nfd, + ntype if &ntype == utf16!("NFKC") => Normalization::Nfkc, + ntype if &ntype == utf16!("NFKD") => Normalization::Nfkd, // 5. If f is not one of "NFC", "NFD", "NFKC", or "NFKD", throw a RangeError exception. - _ => context - .throw_range_error("The normalization form should be one of NFC, NFD, NFKC, NFKD."), + _ => { + return context.throw_range_error( + "The normalization form should be one of NFC, NFD, NFKC, NFKD.", + ) + } + }; + + let mut code_points = s.code_points(); + let mut result = Vec::with_capacity(s.len()); + + let mut next_unpaired_surrogate = None; + let mut buf = [0; 2]; + + loop { + let only_chars = code_points.by_ref().map_while(|cpoint| match cpoint { + CodePoint::Unicode(c) => Some(c), + CodePoint::UnpairedSurrogate(s) => { + next_unpaired_surrogate = Some(s); + None + } + }); + + match normalization { + Normalization::Nfc => { + for mapped in only_chars.nfc() { + result.extend_from_slice(mapped.encode_utf16(&mut buf)); + } + } + Normalization::Nfd => { + for mapped in only_chars.nfd() { + result.extend_from_slice(mapped.encode_utf16(&mut buf)); + } + } + Normalization::Nfkc => { + for mapped in only_chars.nfkc() { + result.extend_from_slice(mapped.encode_utf16(&mut buf)); + } + } + Normalization::Nfkd => { + for mapped in only_chars.nfkd() { + result.extend_from_slice(mapped.encode_utf16(&mut buf)); + } + } + } + + if let Some(surr) = next_unpaired_surrogate.take() { + result.push(surr); + } else { + break; + } } + + // 7. Return ns. + Ok(js_string!(result).into()) } /// `String.prototype.search( regexp )` @@ -2173,23 +2085,24 @@ impl String { /// /// [spec]: https://tc39.es/ecma262/#sec-getsubstitution pub(crate) fn get_substitution( - matched: &str, - str: &str, + matched: &JsString, + str: &JsString, position: usize, captures: &[JsValue], named_captures: &JsValue, replacement: &JsString, context: &mut Context, ) -> JsResult { + let mut buf = [0; 2]; // 1. Assert: Type(matched) is String. // 2. Let matchLength be the number of code units in matched. - let match_length = matched.encode_utf16().count(); + let match_length = matched.len(); // 3. Assert: Type(str) is String. // 4. Let stringLength be the number of code units in str. - let str_length = str.encode_utf16().count(); + let str_length = str.len(); // 5. Assert: position ≤ stringLength. // 6. Assert: captures is a possibly empty List of Strings. @@ -2205,47 +2118,55 @@ pub(crate) fn get_substitution( // from replacement to result while performing replacements as specified in Table 58. // These $ replacements are done left-to-right, and, once such a replacement is performed, // the new replacement text is not subject to further replacements. - let mut result = StdString::new(); - let mut chars = replacement.chars().peekable(); + let mut result = vec![]; + let mut chars = replacement.code_points().peekable(); while let Some(first) = chars.next() { - if first == '$' { + if first == CodePoint::Unicode('$') { let second = chars.next(); - let second_is_digit = second.as_ref().map_or(false, char::is_ascii_digit); + let second_is_digit = second + .and_then(CodePoint::as_char) + .as_ref() + .map_or(false, char::is_ascii_digit); // we use peek so that it is still in the iterator if not used - let third = if second_is_digit { chars.peek() } else { None }; - let third_is_digit = third.map_or(false, char::is_ascii_digit); + let third = if second_is_digit { + chars.peek().copied() + } else { + None + }; + let third_is_digit = third + .and_then(CodePoint::as_char) + .as_ref() + .map_or(false, char::is_ascii_digit); match (second, third) { // $$ - (Some('$'), _) => { + (Some(CodePoint::Unicode('$')), _) => { // $ - result.push('$'); + result.push('$' as u16); } // $& - (Some('&'), _) => { + (Some(CodePoint::Unicode('&')), _) => { // matched - result.push_str(matched); + result.extend_from_slice(matched); } // $` - (Some('`'), _) => { + (Some(CodePoint::Unicode('`')), _) => { // The replacement is the substring of str from 0 to position. - result.push_str(&StdString::from_utf16_lossy( - &str.encode_utf16().take(position).collect::>(), - )); + result.extend_from_slice(&str[..position]); } // $' - (Some('\''), _) => { + (Some(CodePoint::Unicode('\'')), _) => { // If tailPos ≥ stringLength, the replacement is the empty String. // Otherwise the replacement is the substring of str from tailPos. if tail_pos < str_length { - result.push_str(&StdString::from_utf16_lossy( - &str.encode_utf16().skip(tail_pos).collect::>(), - )); + result.extend_from_slice(&str[tail_pos..]); } } // $nn - (Some(second), Some(third)) if second_is_digit && third_is_digit => { + (Some(CodePoint::Unicode(second)), Some(CodePoint::Unicode(third))) + if second_is_digit && third_is_digit => + { // The nnth element of captures, where nn is a two-digit decimal number in the range 01 to 99. let tens = second .to_digit(10) @@ -2260,19 +2181,17 @@ pub(crate) fn get_substitution( // If nn ≤ m and the nnth element of captures is undefined, use the empty String instead. // If nn is 00 or nn > m, no replacement is done. if nn == 0 || nn > m { - result.push('$'); - result.push(second); - result.push(*third); + result.extend_from_slice(&['$' as u16, second as u16, third as u16]); } else if let Some(capture) = captures.get(nn - 1) { if let Some(s) = capture.as_string() { - result.push_str(s); + result.extend_from_slice(s); } } chars.next(); } // $n - (Some(second), _) if second_is_digit => { + (Some(CodePoint::Unicode(second)), _) if second_is_digit => { // The nth element of captures, where n is a single digit in the range 1 to 9. let n = second .to_digit(10) @@ -2282,20 +2201,19 @@ pub(crate) fn get_substitution( // If n ≤ m and the nth element of captures is undefined, use the empty String instead. // If n > m, no replacement is done. if n == 0 || n > m { - result.push('$'); - result.push(second); + result.extend_from_slice(&['$' as u16, second as u16]); } else if let Some(capture) = captures.get(n - 1) { if let Some(s) = capture.as_string() { - result.push_str(s); + result.extend_from_slice(s); } } } // $< - (Some('<'), _) => { + (Some(CodePoint::Unicode('<')), _) => { // 1. If namedCaptures is undefined, the replacement text is the String "$<". // 2. Else, if named_captures.is_undefined() { - result.push_str("$<"); + result.extend_from_slice(utf16!("$<")); } else { // a. Assert: Type(namedCaptures) is Object. let named_captures = named_captures @@ -2303,15 +2221,15 @@ pub(crate) fn get_substitution( .expect("should be an object according to spec"); // b. Scan until the next > U+003E (GREATER-THAN SIGN). - let mut group_name = StdString::new(); + let mut group_name = vec![]; let mut found = false; loop { match chars.next() { - Some('>') => { + Some(CodePoint::Unicode('>')) => { found = true; break; } - Some(c) => group_name.push(c), + Some(c) => group_name.extend_from_slice(c.encode_utf16(&mut buf)), None => break, } } @@ -2319,72 +2237,38 @@ pub(crate) fn get_substitution( // c. If none is found, the replacement text is the String "$<". #[allow(clippy::if_not_else)] if !found { - result.push_str("$<"); - result.push_str(&group_name); + result.extend_from_slice(utf16!("$<")); + result.extend_from_slice(&group_name); // d. Else, } else { // i. Let groupName be the enclosed substring. + let group_name = js_string!(group_name); // ii. Let capture be ? Get(namedCaptures, groupName). let capture = named_captures.get(group_name, context)?; // iii. If capture is undefined, replace the text through > with the empty String. // iv. Otherwise, replace the text through > with ? ToString(capture). if !capture.is_undefined() { - result.push_str(capture.to_string(context)?.as_str()); + result.extend_from_slice(&capture.to_string(context)?); } } } } // $?, ? is none of the above _ => { - result.push('$'); + result.push('$' as u16); if let Some(second) = second { - result.push(second); + result.extend_from_slice(second.encode_utf16(&mut buf)); } } } } else { - result.push(first); + result.extend_from_slice(first.encode_utf16(&mut buf)); } } // 11. Return result. - Ok(result.into()) -} - -/// `22.1.3.21.1 SplitMatch ( S, q, R )` -/// -/// More information: -/// - [ECMAScript reference][spec] -/// -/// [spec]: https://tc39.es/ecma262/#sec-splitmatch -fn split_match(s_str: &str, q: usize, r_str: &str) -> Option { - // 1. Let r be the number of code units in R. - let r = r_str.encode_utf16().count(); - - // 2. Let s be the number of code units in S. - let s = s_str.encode_utf16().count(); - - // 3. If q + r > s, return not-matched. - if q + r > s { - return None; - } - - // 4. If there exists an integer i between 0 (inclusive) and r (exclusive) - // such that the code unit at index q + i within S is different from the code unit at index i within R, - // return not-matched. - for i in 0..r { - if let Some(s_char) = s_str.encode_utf16().nth(q + i) { - if let Some(r_char) = r_str.encode_utf16().nth(i) { - if s_char != r_char { - return None; - } - } - } - } - - // 5. Return q + r. - Some(q + r) + Ok(js_string!(result)) } /// Abstract operation `IsRegExp( argument )` diff --git a/boa_engine/src/builtins/string/string_iterator.rs b/boa_engine/src/builtins/string/string_iterator.rs index 21b1568ad6e..7b3357dc075 100644 --- a/boa_engine/src/builtins/string/string_iterator.rs +++ b/boa_engine/src/builtins/string/string_iterator.rs @@ -1,7 +1,5 @@ use crate::{ - builtins::{ - function::make_builtin_fn, iterable::create_iter_result_object, string::code_point_at, - }, + builtins::{function::make_builtin_fn, iterable::create_iter_result_object}, object::{JsObject, ObjectData}, property::PropertyDescriptor, symbol::WellKnownSymbols, @@ -10,12 +8,10 @@ use crate::{ use boa_gc::{Finalize, Trace}; use boa_profiler::Profiler; -use super::CodePointInfo; - #[derive(Debug, Clone, Finalize, Trace)] pub struct StringIterator { string: JsValue, - next_index: i32, + next_index: usize, } impl StringIterator { @@ -53,7 +49,7 @@ impl StringIterator { )); } let native_string = string_iterator.string.to_string(context)?; - let len = native_string.encode_utf16().count() as i32; + let len = native_string.len(); let position = string_iterator.next_index; if position >= len { string_iterator.string = JsValue::undefined(); @@ -63,12 +59,8 @@ impl StringIterator { context, )); } - let CodePointInfo { - code_point: _, - code_unit_count, - is_unpaired_surrogate: _, - } = code_point_at(&native_string, position as u64); - string_iterator.next_index += i32::from(code_unit_count); + let code_point = native_string.code_point_at(position); + string_iterator.next_index += code_point.code_unit_count(); let result_string = crate::builtins::string::String::substring( &string_iterator.string, &[position.into(), string_iterator.next_index.into()], diff --git a/boa_engine/src/builtins/string/tests.rs b/boa_engine/src/builtins/string/tests.rs index cefd29a8d87..96e303a1170 100644 --- a/boa_engine/src/builtins/string/tests.rs +++ b/boa_engine/src/builtins/string/tests.rs @@ -1,4 +1,3 @@ -use super::{is_leading_surrogate, is_trailing_surrogate}; use crate::{forward, forward_val, Context}; #[test] @@ -682,13 +681,14 @@ fn split() { forward(&mut context, "['']") ); - // TODO: Support keeping invalid code point in string assert_eq!( forward( &mut context, "\'\u{1d7d8}\u{1d7d9}\u{1d7da}\u{1d7db}\'.split(\'\')" ), - forward(&mut context, "['�','�','�','�','�','�','�','�']") + // TODO: modify interner to store UTF-16 surrogates from string literals + // forward(&mut context, "['�','�','�','�','�','�','�','�']") + "[ \"\\uD835\", \"\\uDFD8\", \"\\uD835\", \"\\uDFD9\", \"\\uD835\", \"\\uDFDA\", \"\\uD835\", \"\\uDFDB\" ]" ); } @@ -966,7 +966,7 @@ fn char_at() { assert_eq!(forward(&mut context, "'abc'.charAt(9)"), "\"\""); assert_eq!(forward(&mut context, "'abc'.charAt()"), "\"a\""); assert_eq!(forward(&mut context, "'abc'.charAt(null)"), "\"a\""); - assert_eq!(forward(&mut context, "'\\uDBFF'.charAt(0)"), "\"\u{FFFD}\""); + assert_eq!(forward(&mut context, "'\\uDBFF'.charAt(0)"), r#""\uDBFF""#); } #[test] @@ -1139,7 +1139,7 @@ fn string_get_property() { assert_eq!(forward(&mut context, "'abc'[2]"), "\"c\""); assert_eq!(forward(&mut context, "'abc'[3]"), "undefined"); assert_eq!(forward(&mut context, "'abc'['foo']"), "undefined"); - assert_eq!(forward(&mut context, "'😀'[0]"), "\"�\""); + assert_eq!(forward(&mut context, "'😀'[0]"), "\"\\uD83D\""); } #[test] @@ -1151,17 +1151,3 @@ fn search() { assert_eq!(forward(&mut context, "'aa'.search(/a/g)"), "0"); assert_eq!(forward(&mut context, "'ba'.search(/a/)"), "1"); } - -#[test] -fn ut_is_leading_surrogate() { - for cp in 0xD800..=0xDBFF { - assert!(is_leading_surrogate(cp), "failed: {cp:X}"); - } -} - -#[test] -fn ut_is_trailing_surrogate() { - for cp in 0xDC00..=0xDFFF { - assert!(is_trailing_surrogate(cp), "failed: {cp:X}"); - } -} diff --git a/boa_engine/src/builtins/typed_array/mod.rs b/boa_engine/src/builtins/typed_array/mod.rs index 0315cf0693c..6e7da8b1bed 100644 --- a/boa_engine/src/builtins/typed_array/mod.rs +++ b/boa_engine/src/builtins/typed_array/mod.rs @@ -20,6 +20,7 @@ use crate::{ Array, ArrayIterator, BuiltIn, JsArgs, }, context::intrinsics::{StandardConstructor, StandardConstructors}, + js_string, object::{ internal_methods::get_prototype_from_constructor, ConstructorBuilder, FunctionBuilder, JsObject, ObjectData, @@ -27,7 +28,7 @@ use crate::{ property::{Attribute, PropertyNameKind}, symbol::WellKnownSymbols, value::{IntegerOrInfinity, JsValue}, - Context, JsResult, JsString, + Context, JsResult, }; use boa_profiler::Profiler; use num_traits::{Signed, Zero}; @@ -1482,21 +1483,21 @@ impl TypedArray { // 4. If separator is undefined, let sep be the single-element String ",". let separator = args.get_or_undefined(0); let sep = if separator.is_undefined() { - JsString::new(",") + js_string!(",") // 5. Else, let sep be ? ToString(separator). } else { separator.to_string(context)? }; // 6. Let R be the empty String. - let mut r = JsString::new(""); + let mut r = js_string!(); // 7. Let k be 0. // 8. Repeat, while k < len, for k in 0..len { // a. If k > 0, set R to the string-concatenation of R and sep. if k > 0 { - r = JsString::concat(r, sep.clone()); + r = js_string!(&r, &sep); } // b. Let element be ! Get(O, ! ToString(𝔽(k))). @@ -1505,7 +1506,7 @@ impl TypedArray { // c. If element is undefined, let next be the empty String; otherwise, let next be ! ToString(element). // d. Set R to the string-concatenation of R and next. if !element.is_undefined() { - r = JsString::concat(r, element.to_string(context)?); + r = js_string!(&r, &element.to_string(context)?); } } diff --git a/boa_engine/src/builtins/uri/mod.rs b/boa_engine/src/builtins/uri/mod.rs index 19fb39d9486..e5c1fa73963 100644 --- a/boa_engine/src/builtins/uri/mod.rs +++ b/boa_engine/src/builtins/uri/mod.rs @@ -17,10 +17,10 @@ use self::consts::{ is_uri_unescaped, }; -use super::{string::code_point_at, BuiltIn}; +use super::BuiltIn; use crate::{ - builtins::JsArgs, object::FunctionBuilder, property::Attribute, Context, JsResult, JsString, - JsValue, + builtins::JsArgs, js_string, object::FunctionBuilder, property::Attribute, string::CodePoint, + Context, JsResult, JsString, JsValue, }; /// URI Handling Functions @@ -228,17 +228,15 @@ impl Uri { /// - [ECMAScript reference][spec] /// /// [spec]: https://tc39.es/ecma262/#sec-encode -fn encode(context: &mut Context, string: &JsString, unescaped_set: F) -> JsResult +fn encode(context: &mut Context, string: &JsString, unescaped_set: F) -> JsResult where F: Fn(u16) -> bool, { - let code_units = string.encode_utf16().collect::>(); - // 1. Let strLen be the length of string. - let str_len = code_units.len(); + let str_len = string.len(); // 2. Let R be the empty String. - let mut r = String::new(); + let mut r = Vec::new(); // 3. Let k be 0. let mut k = 0; @@ -246,11 +244,11 @@ where loop { // a. If k = strLen, return R. if k == str_len { - return Ok(r); + return Ok(js_string!(r)); } // b. Let C be the code unit at index k within string. - let c = code_units[k]; + let c = string[k]; // c. If C is in unescapedSet, then if unescaped_set(c) { @@ -258,27 +256,27 @@ where k += 1; // ii. Set R to the string-concatenation of R and C. - r.push(char::from_u32(u32::from(c)).expect("char from code point cannot fail here")); + r.push(c); } else { // d. Else, // i. Let cp be CodePointAt(string, k). - let cp = code_point_at(string, k as u64); + let cp = string.code_point_at(k); // ii. If cp.[[IsUnpairedSurrogate]] is true, throw a URIError exception. - if cp.is_unpaired_surrogate { - context.throw_uri_error("trying to encode an invalid string")?; - } + let ch = if let CodePoint::Unicode(ch) = cp { + ch + } else { + return Err(context.construct_uri_error("trying to encode an invalid string")); + }; // iii. Set k to k + cp.[[CodeUnitCount]]. - k += cp.code_unit_count as usize; + k += cp.code_unit_count(); // iv. Let Octets be the List of octets resulting by applying the UTF-8 transformation // to cp.[[CodePoint]]. let mut buff = [0_u8; 4]; // Will never be more than 4 bytes - let octets = char::from_u32(cp.code_point) - .expect("valid unicode code point to char conversion failed") - .encode_utf8(&mut buff); + let octets = ch.encode_utf8(&mut buff); // v. For each element octet of Octets, do for octet in octets.bytes() { @@ -287,7 +285,7 @@ where // "%" // the String representation of octet, formatted as a two-digit uppercase // hexadecimal number, padded to the left with a zero if necessary - r = format!("{r}%{octet:0>2X}"); + r.extend(format!("%{octet:0>2X}").encode_utf16()); } } } @@ -304,28 +302,28 @@ where /// /// [spec]: https://tc39.es/ecma262/#sec-decode #[allow(clippy::many_single_char_names)] -fn decode(context: &mut Context, string: &JsString, reserved_set: F) -> JsResult +fn decode(context: &mut Context, string: &JsString, reserved_set: F) -> JsResult where F: Fn(u16) -> bool, { - let code_units = string.encode_utf16().collect::>(); - // 1. Let strLen be the length of string. - let str_len = code_units.len(); + let str_len = string.len(); // 2. Let R be the empty String. let mut r = Vec::new(); + let mut octets = Vec::with_capacity(4); + // 3. Let k be 0. let mut k = 0; // 4. Repeat, loop { // a. If k = strLen, return R. if k == str_len { - return Ok(String::from_utf16(&r).expect("invalid UTF-16 characters found")); + return Ok(js_string!(r)); } // b. Let C be the code unit at index k within string. - let c = code_units[k]; + let c = string[k]; // c. If C is not the code unit 0x0025 (PERCENT SIGN), then #[allow(clippy::if_not_else)] @@ -345,7 +343,7 @@ where // iii. If the code units at index (k + 1) and (k + 2) within string do not represent // hexadecimal digits, throw a URIError exception. // iv. Let B be the 8-bit value represented by the two hexadecimal digits at index (k + 1) and (k + 2). - let b = decode_hex_byte(code_units[k + 1], code_units[k + 2]) + let b = decode_hex_byte(string[k + 1], string[k + 2]) .ok_or_else(|| context.construct_uri_error("invalid hexadecimal digit found"))?; // v. Set k to k + 2. @@ -366,7 +364,7 @@ where } else { // 3. Else, // a. Let S be the substring of string from start to k + 1. - Vec::from(&code_units[start..=k]) + Vec::from(&string[start..=k]) } } else { // viii. Else, @@ -381,7 +379,7 @@ where } // 3. Let Octets be « B ». - let mut octets = Vec::from([b]); + octets.push(b); // 4. Let j be 1. // 5. Repeat, while j < n, @@ -390,17 +388,16 @@ where k += 1; // b. If the code unit at index k within string is not the code unit 0x0025 (PERCENT SIGN), throw a URIError exception. - if code_units[k] != 0x0025 { + if string[k] != 0x0025 { context .throw_uri_error("escape characters must be preceded with a % sign")?; } // c. If the code units at index (k + 1) and (k + 2) within string do not represent hexadecimal digits, throw a URIError exception. // d. Let B be the 8-bit value represented by the two hexadecimal digits at index (k + 1) and (k + 2). - let b = - decode_hex_byte(code_units[k + 1], code_units[k + 2]).ok_or_else(|| { - context.construct_uri_error("invalid hexadecimal digit found") - })?; + let b = decode_hex_byte(string[k + 1], string[k + 2]).ok_or_else(|| { + context.construct_uri_error("invalid hexadecimal digit found") + })?; // e. Set k to k + 2. k += 2; @@ -415,7 +412,7 @@ where assert_eq!(octets.len(), n); // 7. If Octets does not contain a valid UTF-8 encoding of a Unicode code point, throw a URIError exception. - match String::from_utf8(octets) { + match std::str::from_utf8(&octets) { Err(_) => { return Err(context.construct_uri_error("invalid UTF-8 encoding found")) } @@ -424,7 +421,9 @@ where // 9. Let S be UTF16EncodeCodePoint(V). // utf16_encode_codepoint(v) - v.encode_utf16().collect::>() + let s = v.encode_utf16().collect::>(); + octets.clear(); + s } } } diff --git a/boa_engine/src/bytecompiler/mod.rs b/boa_engine/src/bytecompiler/mod.rs index f7faef4243c..48c278f7b50 100644 --- a/boa_engine/src/bytecompiler/mod.rs +++ b/boa_engine/src/bytecompiler/mod.rs @@ -644,7 +644,7 @@ impl<'b> ByteCompiler<'b> { Node::Const(c) => { match c { Const::String(v) => self.emit_push_literal(Literal::String( - self.interner().resolve_expect(*v).into(), + self.interner().resolve_expect(*v).into_common(false), )), Const::Int(v) => self.emit_push_integer(*v), Const::Num(v) => self.emit_push_rational(*v), @@ -1148,7 +1148,7 @@ impl<'b> ByteCompiler<'b> { for element in template_literal.elements() { match element { TemplateElement::String(s) => self.emit_push_literal(Literal::String( - self.interner().resolve_expect(*s).into(), + self.interner().resolve_expect(*s).into_common(false), )), TemplateElement::Expr(expr) => { self.compile_expr(expr, true)?; @@ -1240,7 +1240,7 @@ impl<'b> ByteCompiler<'b> { for cooked in template.cookeds() { if let Some(cooked) = cooked { self.emit_push_literal(Literal::String( - self.interner().resolve_expect(*cooked).into(), + self.interner().resolve_expect(*cooked).into_common(false), )); } else { self.emit_opcode(Opcode::PushUndefined); @@ -1252,7 +1252,7 @@ impl<'b> ByteCompiler<'b> { self.emit_opcode(Opcode::PushNewArray); for raw in template.raws() { self.emit_push_literal(Literal::String( - self.interner().resolve_expect(*raw).into(), + self.interner().resolve_expect(*raw).into_common(false), )); self.emit_opcode(Opcode::PushValueToArray); } @@ -2336,7 +2336,7 @@ impl<'b> ByteCompiler<'b> { for key in excluded_keys { self.emit_push_literal(Literal::String( - self.interner().resolve_expect(*key).into(), + self.interner().resolve_expect(*key).into_common(false), )); } @@ -2354,7 +2354,7 @@ impl<'b> ByteCompiler<'b> { self.emit_opcode(Opcode::PushEmptyObject); for key in excluded_keys { self.emit_push_literal(Literal::String( - self.interner().resolve_expect(*key).into(), + self.interner().resolve_expect(*key).into_common(false), )); } self.emit(Opcode::CopyDataProperties, &[excluded_keys.len() as u32, 0]); @@ -2943,7 +2943,7 @@ impl<'b> ByteCompiler<'b> { match name { PropertyName::Literal(name) => { self.emit_push_literal(Literal::String( - self.interner().resolve_expect(*name).into(), + self.interner().resolve_expect(*name).into_common(false), )); } PropertyName::Computed(name) => { diff --git a/boa_engine/src/class.rs b/boa_engine/src/class.rs index cbbb8a2eda4..bd2dacdf107 100644 --- a/boa_engine/src/class.rs +++ b/boa_engine/src/class.rs @@ -30,7 +30,7 @@ //! // This is equivalent to `String(arg)`. //! let kind = args.get_or_undefined(0).to_string(context)?; //! -//! let animal = match kind.as_str() { +//! let animal = match kind.to_std_string_escaped().as_str() { //! "cat" => Self::Cat, //! "dog" => Self::Dog, //! _ => Self::Other, diff --git a/boa_engine/src/context/mod.rs b/boa_engine/src/context/mod.rs index 82503797d25..505725c11b8 100644 --- a/boa_engine/src/context/mod.rs +++ b/boa_engine/src/context/mod.rs @@ -21,7 +21,7 @@ use crate::{ realm::Realm, syntax::{ast::node::StatementList, parser::ParseError, Parser}, vm::{CallFrame, CodeBlock, FinallyReturn, GeneratorResumeKind, Vm}, - JsResult, JsValue, + JsResult, JsString, JsValue, }; use boa_gc::Gc; @@ -230,7 +230,7 @@ impl Context { #[inline] pub fn construct_error(&mut self, message: M) -> JsValue where - M: Into>, + M: Into, { crate::builtins::error::Error::constructor( &self @@ -249,7 +249,7 @@ impl Context { #[inline] pub fn throw_error(&mut self, message: M) -> JsResult where - M: Into>, + M: Into, { Err(self.construct_error(message)) } @@ -258,7 +258,7 @@ impl Context { #[inline] pub fn construct_range_error(&mut self, message: M) -> JsValue where - M: Into>, + M: Into, { crate::builtins::error::RangeError::constructor( &self @@ -277,7 +277,7 @@ impl Context { #[inline] pub fn throw_range_error(&mut self, message: M) -> JsResult where - M: Into>, + M: Into, { Err(self.construct_range_error(message)) } @@ -286,7 +286,7 @@ impl Context { #[inline] pub fn construct_type_error(&mut self, message: M) -> JsValue where - M: Into>, + M: Into, { crate::builtins::error::TypeError::constructor( &self @@ -305,7 +305,7 @@ impl Context { #[inline] pub fn throw_type_error(&mut self, message: M) -> JsResult where - M: Into>, + M: Into, { Err(self.construct_type_error(message)) } @@ -314,7 +314,7 @@ impl Context { #[inline] pub fn construct_reference_error(&mut self, message: M) -> JsValue where - M: Into>, + M: Into, { crate::builtins::error::ReferenceError::constructor( &self @@ -333,7 +333,7 @@ impl Context { #[inline] pub fn throw_reference_error(&mut self, message: M) -> JsResult where - M: Into>, + M: Into, { Err(self.construct_reference_error(message)) } @@ -342,7 +342,7 @@ impl Context { #[inline] pub fn construct_syntax_error(&mut self, message: M) -> JsValue where - M: Into>, + M: Into, { crate::builtins::error::SyntaxError::constructor( &self @@ -361,7 +361,7 @@ impl Context { #[inline] pub fn throw_syntax_error(&mut self, message: M) -> JsResult where - M: Into>, + M: Into, { Err(self.construct_syntax_error(message)) } @@ -369,7 +369,7 @@ impl Context { /// Constructs a `EvalError` with the specified message. pub fn construct_eval_error(&mut self, message: M) -> JsValue where - M: Into>, + M: Into, { crate::builtins::error::EvalError::constructor( &self @@ -387,7 +387,7 @@ impl Context { /// Constructs a `URIError` with the specified message. pub fn construct_uri_error(&mut self, message: M) -> JsValue where - M: Into>, + M: Into, { crate::builtins::error::UriError::constructor( &self @@ -405,7 +405,7 @@ impl Context { /// Throws a `EvalError` with the specified message. pub fn throw_eval_error(&mut self, message: M) -> JsResult where - M: Into>, + M: Into, { Err(self.construct_eval_error(message)) } @@ -413,7 +413,7 @@ impl Context { /// Throws a `URIError` with the specified message. pub fn throw_uri_error(&mut self, message: M) -> JsResult where - M: Into>, + M: Into, { Err(self.construct_uri_error(message)) } diff --git a/boa_engine/src/environments/compile.rs b/boa_engine/src/environments/compile.rs index 9bea4ba6f28..99bec72afce 100644 --- a/boa_engine/src/environments/compile.rs +++ b/boa_engine/src/environments/compile.rs @@ -290,7 +290,10 @@ impl Context { .borrow_mut() .create_mutable_binding(name, function_scope) { - let name_str = JsString::from(self.interner().resolve_expect(name)); + let name_str = self + .interner() + .resolve_expect(name) + .into_common::(false); let desc = self .realm .global_property_map diff --git a/boa_engine/src/lib.rs b/boa_engine/src/lib.rs index 9e6e3ea26ce..4b3c44034e0 100644 --- a/boa_engine/src/lib.rs +++ b/boa_engine/src/lib.rs @@ -67,14 +67,16 @@ clippy::missing_errors_doc, clippy::as_conversions, clippy::let_unit_value, + // TODO deny once false positive is fixed (https://github.com/rust-lang/rust-clippy/issues/9626). + clippy::trait_duplication_in_bounds, // Ignore because `write!(string, ...)` instead of `string.push_str(&format!(...))` can fail. // We only use it in `ToInternedString` where performance is not an issue. clippy::format_push_string, - // TODO deny once false positive are fixed (https://github.com/rust-lang/rust-clippy/issues/9076). - clippy::trait_duplication_in_bounds, rustdoc::missing_doc_code_examples )] +extern crate static_assertions as sa; + pub mod bigint; pub mod builtins; pub mod bytecompiler; diff --git a/boa_engine/src/object/internal_methods/array.rs b/boa_engine/src/object/internal_methods/array.rs index 2abc0b2de6a..976bf0cdcec 100644 --- a/boa_engine/src/object/internal_methods/array.rs +++ b/boa_engine/src/object/internal_methods/array.rs @@ -1,6 +1,7 @@ use crate::{ object::JsObject, property::{PropertyDescriptor, PropertyKey}, + string::utf16, Context, JsResult, }; @@ -32,7 +33,7 @@ pub(crate) fn array_exotic_define_own_property( // 1. Assert: IsPropertyKey(P) is true. match key { // 2. If P is "length", then - PropertyKey::String(ref s) if s == "length" => { + PropertyKey::String(ref s) if s == utf16!("length") => { // a. Return ? ArraySetLength(A, Desc). array_set_length(obj, desc, context) diff --git a/boa_engine/src/object/internal_methods/string.rs b/boa_engine/src/object/internal_methods/string.rs index 2cf9af5ba80..650b01eee29 100644 --- a/boa_engine/src/object/internal_methods/string.rs +++ b/boa_engine/src/object/internal_methods/string.rs @@ -1,7 +1,8 @@ use crate::{ + js_string, object::JsObject, property::{PropertyDescriptor, PropertyKey}, - Context, JsResult, JsValue, + Context, JsResult, }; use super::{InternalObjectMethods, ORDINARY_INTERNAL_METHODS}; @@ -97,7 +98,7 @@ pub(crate) fn string_exotic_own_property_keys( .as_string() .expect("string exotic method should only be callable from string objects"); // 4. Let len be the length of str. - let len = string.encode_utf16().count(); + let len = string.len(); // 1. Let keys be a new empty List. let mut keys = Vec::with_capacity(len); @@ -172,10 +173,7 @@ fn string_get_own_property(obj: &JsObject, key: &PropertyKey) -> Option FunctionBuilder<'context> { function, constructor: None, }, - name: JsString::default(), + name: js_string!(), length: 0, } } @@ -1646,7 +1648,7 @@ impl<'context> FunctionBuilder<'context> { constructor: None, captures: Captures::new(()), }, - name: JsString::default(), + name: js_string!(), length: 0, } } @@ -1680,7 +1682,7 @@ impl<'context> FunctionBuilder<'context> { constructor: None, captures: Captures::new(captures), }, - name: JsString::default(), + name: js_string!(), length: 0, } } @@ -1692,9 +1694,9 @@ impl<'context> FunctionBuilder<'context> { #[must_use] pub fn name(mut self, name: N) -> Self where - N: AsRef, + N: Into, { - self.name = name.as_ref().into(); + self.name = name.into(); self } @@ -1915,7 +1917,7 @@ impl<'context> ConstructorBuilder<'context> { object: JsObject::empty(), prototype: JsObject::empty(), length: 0, - name: JsString::default(), + name: js_string!(), callable: true, constructor: Some(ConstructorKind::Base), inherit: None, @@ -1937,7 +1939,7 @@ impl<'context> ConstructorBuilder<'context> { has_prototype_property: true, prototype: standard_constructor.prototype, length: 0, - name: JsString::default(), + name: js_string!(), callable: true, constructor: Some(ConstructorKind::Base), inherit: None, diff --git a/boa_engine/src/property/mod.rs b/boa_engine/src/property/mod.rs index 7f56395c9b6..d6f313e99c5 100644 --- a/boa_engine/src/property/mod.rs +++ b/boa_engine/src/property/mod.rs @@ -15,7 +15,7 @@ //! [mdn]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Object/defineProperty //! [section]: https://tc39.es/ecma262/#sec-property-attributes -use crate::{JsString, JsSymbol, JsValue}; +use crate::{js_string, JsString, JsSymbol, JsValue}; use boa_gc::{Finalize, Trace}; use std::fmt; @@ -498,7 +498,7 @@ pub enum PropertyKey { impl From for PropertyKey { #[inline] fn from(string: JsString) -> Self { - if let Ok(index) = string.parse() { + if let Some(index) = string.to_std_string().ok().and_then(|s| s.parse().ok()) { Self::Index(index) } else { Self::String(string) @@ -534,7 +534,7 @@ impl From> for PropertyKey { if let Ok(index) = string.parse() { Self::Index(index) } else { - Self::String(string.into()) + Self::String(string.as_ref().into()) } } } @@ -550,8 +550,8 @@ impl fmt::Display for PropertyKey { #[inline] fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match self { - Self::String(ref string) => string.fmt(f), - Self::Symbol(ref symbol) => symbol.fmt(f), + Self::String(ref string) => string.to_std_string_escaped().fmt(f), + Self::Symbol(ref symbol) => symbol.descriptive_string().to_std_string_escaped().fmt(f), Self::Index(index) => index.fmt(f), } } @@ -608,7 +608,7 @@ impl From for PropertyKey { if let Ok(index) = u32::try_from(value) { Self::Index(index) } else { - Self::String(JsString::from(value.to_string())) + Self::String(js_string!(value.to_string())) } } } @@ -618,7 +618,7 @@ impl From for PropertyKey { if let Ok(index) = u32::try_from(value) { Self::Index(index) } else { - Self::String(JsString::from(value.to_string())) + Self::String(js_string!(value.to_string())) } } } @@ -628,7 +628,7 @@ impl From for PropertyKey { if let Ok(index) = u32::try_from(value) { Self::Index(index) } else { - Self::String(JsString::from(value.to_string())) + Self::String(js_string!(value.to_string())) } } } @@ -638,7 +638,7 @@ impl From for PropertyKey { if let Ok(index) = u32::try_from(value) { Self::Index(index) } else { - Self::String(JsString::from(value.to_string())) + Self::String(js_string!(value.to_string())) } } } @@ -648,7 +648,7 @@ impl From for PropertyKey { if let Ok(index) = u32::try_from(value) { Self::Index(index) } else { - Self::String(JsString::from(value.to_string())) + Self::String(js_string!(value.to_string())) } } } @@ -664,8 +664,8 @@ impl From for PropertyKey { } } -impl PartialEq<&str> for PropertyKey { - fn eq(&self, other: &&str) -> bool { +impl PartialEq<[u16]> for PropertyKey { + fn eq(&self, other: &[u16]) -> bool { match self { Self::String(ref string) => string == other, _ => false, diff --git a/boa_engine/src/string.rs b/boa_engine/src/string.rs deleted file mode 100644 index b8c76e631c8..00000000000 --- a/boa_engine/src/string.rs +++ /dev/null @@ -1,1227 +0,0 @@ -use crate::builtins::string::is_trimmable_whitespace; -use boa_gc::{unsafe_empty_trace, Finalize, Trace}; -use rustc_hash::{FxHashMap, FxHasher}; -use std::{ - alloc::{alloc, dealloc, handle_alloc_error, Layout}, - borrow::Borrow, - cell::Cell, - hash::BuildHasherDefault, - hash::{Hash, Hasher}, - marker::PhantomData, - ops::Deref, - ptr::{copy_nonoverlapping, NonNull}, - rc::Rc, -}; - -const CONSTANTS_ARRAY: [&str; 419] = [ - // Empty string - "", - // Misc - ",", - ":", - // Generic use - "name", - "length", - "arguments", - "prototype", - "constructor", - "return", - "throw", - "global", - "globalThis", - // typeof - "null", - "undefined", - "number", - "string", - "symbol", - "bigint", - "object", - "function", - // Property descriptor - "value", - "get", - "set", - "writable", - "enumerable", - "configurable", - // Object object - "Object", - "assign", - "create", - "toString", - "valueOf", - "is", - "seal", - "isSealed", - "freeze", - "isFrozen", - "isExtensible", - "hasOwnProperty", - "isPrototypeOf", - "setPrototypeOf", - "getPrototypeOf", - "defineProperty", - "defineProperties", - "deleteProperty", - "construct", - "hasOwn", - "ownKeys", - "keys", - "values", - "entries", - "fromEntries", - // Function object - "Function", - "apply", - "bind", - "call", - // Generator object - "Generator", - // Array object - "Array", - "at", - "from", - "isArray", - "of", - "copyWithin", - "entries", - "every", - "fill", - "filter", - "find", - "findIndex", - "findLast", - "findLastIndex", - "flat", - "flatMap", - "forEach", - "includes", - "indexOf", - "join", - "map", - "next", - "reduce", - "reduceRight", - "reverse", - "shift", - "slice", - "splice", - "some", - "sort", - "unshift", - "push", - "pop", - // String object - "String", - "charAt", - "charCodeAt", - "codePointAt", - "concat", - "endsWith", - "fromCharCode", - "fromCodePoint", - "includes", - "indexOf", - "lastIndexOf", - "match", - "matchAll", - "normalize", - "padEnd", - "padStart", - "raw", - "repeat", - "replace", - "replaceAll", - "search", - "slice", - "split", - "startsWith", - "substr", - "substring", - "toLocaleString", - "toLowerCase", - "toUpperCase", - "trim", - "trimEnd", - "trimStart", - // Number object - "Number", - "Infinity", - "NaN", - "parseInt", - "parseFloat", - "isFinite", - "isNaN", - "parseInt", - "EPSILON", - "MAX_SAFE_INTEGER", - "MIN_SAFE_INTEGER", - "MAX_VALUE", - "MIN_VALUE", - "isSafeInteger", - "isInteger", - "toExponential", - "toFixed", - "toPrecision", - // Boolean object - "Boolean", - // BigInt object - "BigInt", - "asIntN", - "asUintN", - // RegExp object - "RegExp", - "exec", - "test", - "flags", - "index", - "lastIndex", - "hasIndices", - "ignoreCase", - "multiline", - "dotAll", - "unicode", - "sticky", - "source", - "get hasIndices", - "get global", - "get ignoreCase", - "get multiline", - "get dotAll", - "get unicode", - "get sticky", - "get flags", - "get source", - // Symbol object - "Symbol", - "for", - "keyFor", - "description", - "asyncIterator", - "hasInstance", - "species", - "Symbol.species", - "unscopables", - "iterator", - "Symbol.iterator", - "Symbol.match", - "[Symbol.match]", - "Symbol.matchAll", - "Symbol.replace", - "[Symbol.replace]", - "Symbol.search", - "[Symbol.search]", - "Symbol.split", - "[Symbol.split]", - "toStringTag", - "toPrimitive", - "get description", - // Map object - "Map", - "clear", - "delete", - "get", - "has", - "set", - "size", - // Set object - "Set", - "add", - // Reflect object - "Reflect", - // Proxy object - "Proxy", - "revocable", - // Error objects - "Error", - "AggregateError", - "TypeError", - "RangeError", - "SyntaxError", - "ReferenceError", - "EvalError", - "ThrowTypeError", - "URIError", - "message", - // Date object - "Date", - "toJSON", - "getDate", - "getDay", - "getFullYear", - "getHours", - "getMilliseconds", - "getMinutes", - "getMonth", - "getSeconds", - "getTime", - "getYear", - "getUTCDate", - "getUTCDay", - "getUTCFullYear", - "getUTCHours", - "getUTCMinutes", - "getUTCMonth", - "getUTCSeconds", - "setDate", - "setFullYear", - "setHours", - "setMilliseconds", - "setMinutes", - "setMonth", - "setSeconds", - "setYear", - "setTime", - "setUTCDate", - "setUTCFullYear", - "setUTCHours", - "setUTCMinutes", - "setUTCMonth", - "setUTCSeconds", - "toDateString", - "toGMTString", - "toISOString", - "toTimeString", - "toUTCString", - "now", - "UTC", - // JSON object - "JSON", - "parse", - "stringify", - // Iterator object - "Array Iterator", - "Set Iterator", - "String Iterator", - "Map Iterator", - "For In Iterator", - // Math object - "Math", - "LN10", - "LN2", - "LOG10E", - "LOG2E", - "PI", - "SQRT1_2", - "SQRT2", - "abs", - "acos", - "acosh", - "asin", - "asinh", - "atan", - "atanh", - "atan2", - "cbrt", - "ceil", - "clz32", - "cos", - "cosh", - "exp", - "expm1", - "floor", - "fround", - "hypot", - "imul", - "log", - "log1p", - "log10", - "log2", - "max", - "min", - "pow", - "random", - "round", - "sign", - "sin", - "sinh", - "sqrt", - "tan", - "tanh", - "trunc", - // Intl object - "Intl", - "DateTimeFormat", - // TypedArray object - "TypedArray", - "ArrayBuffer", - "Int8Array", - "Uint8Array", - "Int16Array", - "Uint16Array", - "Int32Array", - "Uint32Array", - "BigInt64Array", - "BigUint64Array", - "Float32Array", - "Float64Array", - "buffer", - "byteLength", - "byteOffset", - "isView", - "subarray", - "get byteLength", - "get buffer", - "get byteOffset", - "get size", - "get length", - // DataView object - "DataView", - "getBigInt64", - "getBigUint64", - "getFloat32", - "getFloat64", - "getInt8", - "getInt16", - "getInt32", - "getUint8", - "getUint16", - "getUint32", - "setBigInt64", - "setBigUint64", - "setFloat32", - "setFloat64", - "setInt8", - "setInt16", - "setInt32", - "setUint8", - "setUint16", - "setUint32", - // Console object - "console", - "assert", - "debug", - "error", - "info", - "trace", - "warn", - "exception", - "count", - "countReset", - "group", - "groupCollapsed", - "groupEnd", - "time", - "timeLog", - "timeEnd", - "dir", - "dirxml", - // Minified name - "a", - "b", - "c", - "d", - "e", - "f", - "g", - "h", - "i", - "j", - "k", - "l", - "m", - "n", - "o", - "p", - "q", - "r", - "s", - "t", - "u", - "v", - "w", - "x", - "y", - "z", - "A", - "B", - "C", - "D", - "E", - "F", - "G", - "H", - "I", - "J", - "K", - "L", - "M", - "N", - "O", - "P", - "Q", - "R", - "S", - "T", - "U", - "V", - "W", - "X", - "Y", - "Z", - "_", - "$", -]; - -const MAX_CONSTANT_STRING_LENGTH: usize = { - let mut max = 0; - let mut i = 0; - while i < CONSTANTS_ARRAY.len() { - let len = CONSTANTS_ARRAY[i].len(); - if len > max { - max = len; - } - i += 1; - } - max -}; - -unsafe fn try_alloc(layout: Layout) -> *mut u8 { - let ptr = alloc(layout); - if ptr.is_null() { - handle_alloc_error(layout); - } - ptr -} - -thread_local! { - static CONSTANTS: FxHashMap<&'static str, JsString> = { - let mut constants = FxHashMap::with_capacity_and_hasher( - CONSTANTS_ARRAY.len(), - BuildHasherDefault::::default(), - ); - - for (idx, &s) in CONSTANTS_ARRAY.iter().enumerate() { - // Safety: We already know it's an index of [`CONSTANTS_ARRAY`]. - let v = unsafe { JsString::new_static(idx) }; - constants.insert(s, v); - } - - constants - }; -} - -/// The inner representation of a [`JsString`]. -#[repr(C)] -struct Inner { - /// The utf8 length, the number of bytes. - len: usize, - - /// The number of references to the string. - /// - /// When this reaches `0` the string is deallocated. - refcount: Cell, - - /// An empty array which is used to get the offset of string data. - data: [u8; 0], -} - -impl Inner { - /// Create a new `Inner` from `&str`. - #[inline] - fn new(s: &str) -> NonNull { - // We get the layout of the `Inner` type and we extend by the size - // of the string array. - let inner_layout = Layout::new::(); - let (layout, offset) = inner_layout - .extend(Layout::array::(s.len()).expect("failed to create memory layout")) - .expect("failed to extend memory layout"); - - let inner = unsafe { - let inner = try_alloc(layout).cast::(); - - // Write the first part, the Inner. - inner.write(Self { - len: s.len(), - refcount: Cell::new(1), - data: [0; 0], - }); - - // Get offset into the string data. - let data = (*inner).data.as_mut_ptr(); - - debug_assert!(std::ptr::eq(inner.cast::().add(offset), data)); - - // Copy string data into data offset. - copy_nonoverlapping(s.as_ptr(), data, s.len()); - - inner - }; - - // Safety: We already know it's not null, so this is safe. - unsafe { NonNull::new_unchecked(inner) } - } - - /// Concatenate array of strings. - #[inline] - fn concat_array(strings: &[&str]) -> NonNull { - let mut total_string_size = 0; - for string in strings { - total_string_size += string.len(); - } - - // We get the layout of the `Inner` type and we extend by the size - // of the string array. - let inner_layout = Layout::new::(); - let (layout, offset) = inner_layout - .extend(Layout::array::(total_string_size).expect("failed to create memory layout")) - .expect("failed to extend memory layout"); - - let inner = unsafe { - let inner = try_alloc(layout).cast::(); - - // Write the first part, the Inner. - inner.write(Self { - len: total_string_size, - refcount: Cell::new(1), - data: [0; 0], - }); - - // Get offset into the string data. - let data = (*inner).data.as_mut_ptr(); - - debug_assert!(std::ptr::eq(inner.cast::().add(offset), data)); - - // Copy the two string data into data offset. - let mut offset = 0; - for string in strings { - copy_nonoverlapping(string.as_ptr(), data.add(offset), string.len()); - offset += string.len(); - } - - inner - }; - - // Safety: We already know it's not null, so this is safe. - unsafe { NonNull::new_unchecked(inner) } - } - - /// Deallocate inner type with string data. - #[inline] - unsafe fn dealloc(x: NonNull) { - let len = (*x.as_ptr()).len; - - let inner_layout = Layout::new::(); - let (layout, _offset) = inner_layout - .extend(Layout::array::(len).expect("failed to create memory layout")) - .expect("failed to extend memory layout"); - - dealloc(x.as_ptr().cast::<_>(), layout); - } - - #[inline] - fn as_str(&self) -> &str { - unsafe { - let slice = std::slice::from_raw_parts(self.data.as_ptr(), self.len); - std::str::from_utf8_unchecked(slice) - } - } -} - -/// This represents a JavaScript primitive string. -/// -/// This is similar to `Rc`. But unlike `Rc` which stores the length -/// on the stack and a pointer to the data (this is also known as fat pointers). -/// The `JsString` length and data is stored on the heap. and just an non-null -/// pointer is kept, so its size is the size of a pointer. -/// -/// We define some commonly used string constants in an interner. For these -/// strings, we no longer allocate memory on the heap to reduce the overhead of -/// memory allocation and reference counting. -#[derive(Finalize)] -pub struct JsString { - inner: TaggedInner, - _marker: PhantomData>, -} - -// Safety: JsString does not contain any objects which needs to be traced, -// so this is safe. -unsafe impl Trace for JsString { - unsafe_empty_trace!(); -} - -/// This struct uses a technique called tagged pointer to benefit from the fact that newly -/// allocated pointers are always word aligned on 64-bits platforms, making it impossible -/// to have a LSB equal to 1. More details about this technique on the article of Wikipedia -/// about [tagged pointers][tagged_wp]. -/// -/// # Representation -/// -/// If the LSB of the internal [`NonNull`] is set (1), then the pointer address represents -/// an index value for [`CONSTANTS_ARRAY`], where the remaining MSBs store the index. -/// Otherwise, the whole pointer represents the address of a heap allocated [`Inner`]. -/// -/// It uses [`NonNull`], which guarantees that `TaggedInner` (and subsequently [`JsString`]) -/// can use the "null pointer optimization" to optimize the size of [`Option`]. -/// -/// # Provenance -/// -/// This struct stores a [`NonNull`] instead of a [`NonZeroUsize`][std::num::NonZeroUsize] -/// in order to preserve the provenance of our valid heap pointers. -/// On the other hand, all index values are just casted to invalid pointers, -/// because we don't need to preserve the provenance of [`usize`] indices. -/// -/// [tagged_wp]: https://en.wikipedia.org/wiki/Tagged_pointer -#[repr(transparent)] -#[derive(Debug, Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash)] -struct TaggedInner(NonNull); - -impl TaggedInner { - #[inline] - unsafe fn new_heap(inner: NonNull) -> Self { - Self(inner) - } - - /// Create a new static `TaggedInner` from the index of an element inside - /// [`CONSTANTS_ARRAY`]. - #[inline] - const unsafe fn new_static(idx: usize) -> Self { - // Safety: We already know it's not null, so this is safe. - Self(NonNull::new_unchecked(((idx << 1) | 1) as *mut _)) - } - - /// Check if `TaggedInner` contains an index for [`CONSTANTS_ARRAY`]. - #[inline] - fn is_static(self) -> bool { - (self.0.as_ptr() as usize) & 1 == 1 - } - - /// Returns a reference to a string stored on the heap, - /// without checking if its internal pointer is valid. - /// - /// # Safety - /// - /// Calling this method with a static `TaggedInner` results in Undefined Behaviour. - #[inline] - const unsafe fn get_heap_unchecked(self) -> NonNull { - self.0 - } - - /// Returns the string inside [`CONSTANTS_ARRAY`] corresponding to the - /// index inside `TaggedInner`, without checking its validity. - /// - /// # Safety - /// - /// Calling this method with a `TaggedInner` storing an out of bounds index - /// for [`CONSTANTS_ARRAY`] or a valid pointer to a heap allocated [`Inner`] - /// results in Undefined Behaviour. - #[inline] - unsafe fn get_static_unchecked(self) -> &'static str { - // shift right to get the index. - CONSTANTS_ARRAY.get_unchecked((self.0.as_ptr() as usize) >> 1) - } -} - -impl Default for JsString { - #[inline] - fn default() -> Self { - // Safety: We already know it's an index of [`CONSTANTS_ARRAY`]. - unsafe { Self::new_static(0) } - } -} - -/// Enum representing either a reference to a heap allocated [`Inner`] -/// or a static reference to a [`str`] inside [`CONSTANTS_ARRAY`]. -enum InnerKind<'a> { - // A string allocated on the heap. - Heap(&'a Inner), - // A static string slice. - Static(&'static str), -} - -impl JsString { - /// Create a new JavaScript string from an index of [`CONSTANTS_ARRAY`]. - #[inline] - unsafe fn new_static(idx: usize) -> Self { - Self { - inner: TaggedInner::new_static(idx), - _marker: PhantomData, - } - } - - /// Create an empty string, same as calling default. - #[inline] - pub fn empty() -> Self { - Self::default() - } - - /// Create a new JavaScript string. - #[inline] - pub fn new>(s: S) -> Self { - let s = s.as_ref(); - - if s.len() <= MAX_CONSTANT_STRING_LENGTH { - if let Some(constant) = CONSTANTS.with(|c| c.get(s).cloned()) { - return constant; - } - } - - Self { - // Safety: We already know it's a valid heap pointer. - inner: unsafe { TaggedInner::new_heap(Inner::new(s)) }, - _marker: PhantomData, - } - } - - /// Concatenate two string. - pub fn concat(x: T, y: U) -> Self - where - T: AsRef, - U: AsRef, - { - let x = x.as_ref(); - let y = y.as_ref(); - - let inner = Inner::concat_array(&[x, y]); - let s = unsafe { inner.as_ref() }.as_str(); - - if s.len() <= MAX_CONSTANT_STRING_LENGTH { - if let Some(constant) = CONSTANTS.with(|c| c.get(s).cloned()) { - unsafe { Inner::dealloc(inner) }; - return constant; - } - } - - Self { - // Safety: We already know it's a valid heap pointer. - inner: unsafe { TaggedInner::new_heap(inner) }, - _marker: PhantomData, - } - } - - /// Concatenate array of string. - pub fn concat_array(strings: &[&str]) -> Self { - let inner = Inner::concat_array(strings); - let s = unsafe { inner.as_ref() }.as_str(); - - if s.len() <= MAX_CONSTANT_STRING_LENGTH { - if let Some(constant) = CONSTANTS.with(|c| c.get(s).cloned()) { - unsafe { Inner::dealloc(inner) }; - return constant; - } - } - - Self { - // Safety: We already know it's a valid heap pointer. - inner: unsafe { TaggedInner::new_heap(inner) }, - _marker: PhantomData, - } - } - - /// Return the inner representation. - #[inline] - fn inner(&self) -> InnerKind<'_> { - // Check the first bit to 1. - if self.inner.is_static() { - // Safety: We already checked. - InnerKind::Static(unsafe { self.inner.get_static_unchecked() }) - } else { - // Safety: We already checked. - InnerKind::Heap(unsafe { self.inner.get_heap_unchecked().as_ref() }) - } - } - - /// Return the JavaScript string as a rust `&str`. - #[inline] - pub fn as_str(&self) -> &str { - match self.inner() { - InnerKind::Heap(inner) => inner.as_str(), - InnerKind::Static(inner) => inner, - } - } - - /// Gets the number of `JsString`s which point to this allocation. - #[inline] - pub fn refcount(this: &Self) -> Option { - match this.inner() { - InnerKind::Heap(inner) => Some(inner.refcount.get()), - InnerKind::Static(_inner) => None, - } - } - - /// Returns `true` if the two `JsString`s point to the same allocation (in a vein similar to [`ptr::eq`]). - /// - /// [`ptr::eq`]: std::ptr::eq - #[inline] - pub fn ptr_eq(x: &Self, y: &Self) -> bool { - x.inner == y.inner - } - - /// `6.1.4.1 StringIndexOf ( string, searchValue, fromIndex )` - /// - /// Note: Instead of returning an isize with `-1` as the "not found" value, - /// We make use of the type system and return Option with None as the "not found" value. - /// - /// More information: - /// - [ECMAScript reference][spec] - /// - /// [spec]: https://tc39.es/ecma262/#sec-stringindexof - pub(crate) fn index_of(&self, search_value: &Self, from_index: usize) -> Option { - // 1. Assert: Type(string) is String. - // 2. Assert: Type(searchValue) is String. - // 3. Assert: fromIndex is a non-negative integer. - - // 4. Let len be the length of string. - let len = self.encode_utf16().count(); - - // 5. If searchValue is the empty String and fromIndex ≤ len, return fromIndex. - if search_value.is_empty() && from_index <= len { - return Some(from_index); - } - - // 6. Let searchLen be the length of searchValue. - let search_len = search_value.encode_utf16().count(); - - // 7. For each integer i starting with fromIndex such that i ≤ len - searchLen, in ascending order, do - for i in from_index..=len { - if i as isize > (len as isize - search_len as isize) { - break; - } - - // a. Let candidate be the substring of string from i to i + searchLen. - let candidate = String::from_utf16_lossy( - &self - .encode_utf16() - .skip(i) - .take(search_len) - .collect::>(), - ); - - // b. If candidate is the same sequence of code units as searchValue, return i. - if candidate == search_value.as_str() { - return Some(i); - } - } - - // 8. Return -1. - None - } - - pub(crate) fn string_to_number(&self) -> f64 { - let string = self.trim_matches(is_trimmable_whitespace); - - match string { - "" => return 0.0, - "-Infinity" => return f64::NEG_INFINITY, - "Infinity" | "+Infinity" => return f64::INFINITY, - _ => {} - } - - let mut s = string.bytes(); - let base = match (s.next(), s.next()) { - (Some(b'0'), Some(b'b' | b'B')) => Some(2), - (Some(b'0'), Some(b'o' | b'O')) => Some(8), - (Some(b'0'), Some(b'x' | b'X')) => Some(16), - _ => None, - }; - - // Parse numbers that begin with `0b`, `0o` and `0x`. - if let Some(base) = base { - let string = &string[2..]; - if string.is_empty() { - return f64::NAN; - } - - // Fast path - if let Ok(value) = u32::from_str_radix(string, base) { - return f64::from(value); - } - - // Slow path - let mut value = 0.0; - for c in s { - if let Some(digit) = char::from(c).to_digit(base) { - value = value * f64::from(base) + f64::from(digit); - } else { - return f64::NAN; - } - } - return value; - } - - match string { - // Handle special cases so `fast_float` does not return infinity. - "inf" | "+inf" | "-inf" => f64::NAN, - string => fast_float::parse(string).unwrap_or(f64::NAN), - } - } -} - -impl Clone for JsString { - #[inline] - fn clone(&self) -> Self { - if let InnerKind::Heap(inner) = self.inner() { - inner.refcount.set(inner.refcount.get() + 1); - } - Self { - inner: self.inner, - _marker: PhantomData, - } - } -} - -impl Drop for JsString { - #[inline] - fn drop(&mut self) { - if let InnerKind::Heap(inner) = self.inner() { - if inner.refcount.get() == 1 { - // Safety: If refcount is 1 and we call drop, that means this is the last - // JsString which points to this memory allocation, so deallocating it is safe. - unsafe { - Inner::dealloc(self.inner.get_heap_unchecked()); - } - } else { - inner.refcount.set(inner.refcount.get() - 1); - } - } - } -} - -impl std::fmt::Debug for JsString { - #[inline] - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - self.as_str().fmt(f) - } -} - -impl std::fmt::Display for JsString { - #[inline] - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - self.as_str().fmt(f) - } -} - -impl From<&str> for JsString { - #[inline] - fn from(s: &str) -> Self { - Self::new(s) - } -} - -impl From> for JsString { - #[inline] - fn from(s: Box) -> Self { - Self::new(s) - } -} - -impl From for JsString { - #[inline] - fn from(s: String) -> Self { - Self::new(s) - } -} - -impl AsRef for JsString { - #[inline] - fn as_ref(&self) -> &str { - self.as_str() - } -} - -impl Borrow for JsString { - #[inline] - fn borrow(&self) -> &str { - self.as_str() - } -} - -impl Deref for JsString { - type Target = str; - - #[inline] - fn deref(&self) -> &Self::Target { - self.as_str() - } -} - -impl PartialEq for JsString { - #[inline] - fn eq(&self, other: &Self) -> bool { - // If they point at the same memory allocation, then they are equal. - if Self::ptr_eq(self, other) { - return true; - } - - self.as_str() == other.as_str() - } -} - -impl Eq for JsString {} - -impl Hash for JsString { - #[inline] - fn hash(&self, state: &mut H) { - self.as_str().hash(state); - } -} - -impl PartialOrd for JsString { - #[inline] - fn partial_cmp(&self, other: &Self) -> Option { - self.as_str().partial_cmp(other.as_str()) - } -} - -impl Ord for JsString { - #[inline] - fn cmp(&self, other: &Self) -> std::cmp::Ordering { - self.as_str().cmp(other) - } -} - -impl PartialEq for JsString { - #[inline] - fn eq(&self, other: &str) -> bool { - self.as_str() == other - } -} - -impl PartialEq for str { - #[inline] - fn eq(&self, other: &JsString) -> bool { - self == other.as_str() - } -} - -impl PartialEq<&str> for JsString { - #[inline] - fn eq(&self, other: &&str) -> bool { - self.as_str() == *other - } -} - -impl PartialEq for &str { - #[inline] - fn eq(&self, other: &JsString) -> bool { - *self == other.as_str() - } -} - -#[cfg(test)] -mod tests { - use super::JsString; - use std::mem::size_of; - - #[test] - fn empty() { - let _empty = JsString::new(""); - } - - #[test] - fn pointer_size() { - assert_eq!(size_of::(), size_of::<*const u8>()); - assert_eq!(size_of::>(), size_of::<*const u8>()); - } - - #[test] - fn refcount() { - let x = JsString::new("Hello wrold"); - assert_eq!(JsString::refcount(&x), Some(1)); - - { - let y = x.clone(); - assert_eq!(JsString::refcount(&x), Some(2)); - assert_eq!(JsString::refcount(&y), Some(2)); - - { - let z = y.clone(); - assert_eq!(JsString::refcount(&x), Some(3)); - assert_eq!(JsString::refcount(&y), Some(3)); - assert_eq!(JsString::refcount(&z), Some(3)); - } - - assert_eq!(JsString::refcount(&x), Some(2)); - assert_eq!(JsString::refcount(&y), Some(2)); - } - - assert_eq!(JsString::refcount(&x), Some(1)); - } - - #[test] - fn static_refcount() { - let x = JsString::new(""); - assert_eq!(JsString::refcount(&x), None); - - { - let y = x.clone(); - assert_eq!(JsString::refcount(&x), None); - assert_eq!(JsString::refcount(&y), None); - }; - - assert_eq!(JsString::refcount(&x), None); - } - - #[test] - fn ptr_eq() { - let x = JsString::new("Hello"); - let y = x.clone(); - - assert!(JsString::ptr_eq(&x, &y)); - - let z = JsString::new("Hello"); - assert!(!JsString::ptr_eq(&x, &z)); - assert!(!JsString::ptr_eq(&y, &z)); - } - - #[test] - fn static_ptr_eq() { - let x = JsString::new(""); - let y = x.clone(); - - assert!(JsString::ptr_eq(&x, &y)); - - let z = JsString::new(""); - assert!(JsString::ptr_eq(&x, &z)); - assert!(JsString::ptr_eq(&y, &z)); - } - - #[test] - fn as_str() { - let s = "Hello"; - let x = JsString::new(s); - - assert_eq!(x.as_str(), s); - } - - #[test] - fn hash() { - use std::collections::hash_map::DefaultHasher; - use std::hash::{Hash, Hasher}; - - let s = "Hello, world!"; - let x = JsString::new(s); - - assert_eq!(x.as_str(), s); - - let mut hasher = DefaultHasher::new(); - s.hash(&mut hasher); - let s_hash = hasher.finish(); - let mut hasher = DefaultHasher::new(); - x.hash(&mut hasher); - let x_hash = hasher.finish(); - - assert_eq!(s_hash, x_hash); - } - - #[test] - fn concat() { - let x = JsString::new("hello"); - let y = ", "; - let z = JsString::new("world"); - let w = String::from("!"); - - let xy = JsString::concat(x, y); - assert_eq!(xy, "hello, "); - assert_eq!(JsString::refcount(&xy), Some(1)); - - let xyz = JsString::concat(xy, z); - assert_eq!(xyz, "hello, world"); - assert_eq!(JsString::refcount(&xyz), Some(1)); - - let xyzw = JsString::concat(xyz, w); - assert_eq!(xyzw, "hello, world!"); - assert_eq!(JsString::refcount(&xyzw), Some(1)); - } -} diff --git a/boa_engine/src/string/common.rs b/boa_engine/src/string/common.rs new file mode 100644 index 00000000000..5382fae3302 --- /dev/null +++ b/boa_engine/src/string/common.rs @@ -0,0 +1,500 @@ +use std::hash::BuildHasherDefault; + +use boa_macros::utf16; +use rustc_hash::{FxHashMap, FxHasher}; + +use super::{JsString, TaggedJsString}; + +/// List of commonly used strings in Javascript code. +/// +/// Any string defined here is used as a static [`JsString`] instead of allocating on the heap. +pub(super) const COMMON_STRINGS: &[&[u16]] = &[ + // Empty string + utf16!(""), + // Misc + utf16!(","), + utf16!(":"), + // Generic use + utf16!("name"), + utf16!("length"), + utf16!("arguments"), + utf16!("prototype"), + utf16!("constructor"), + utf16!("return"), + utf16!("throw"), + utf16!("global"), + utf16!("globalThis"), + // typeof + utf16!("null"), + utf16!("undefined"), + utf16!("number"), + utf16!("string"), + utf16!("symbol"), + utf16!("bigint"), + utf16!("object"), + utf16!("function"), + // Property descriptor + utf16!("value"), + utf16!("get"), + utf16!("set"), + utf16!("writable"), + utf16!("enumerable"), + utf16!("configurable"), + // Object object + utf16!("Object"), + utf16!("assign"), + utf16!("create"), + utf16!("toString"), + utf16!("valueOf"), + utf16!("is"), + utf16!("seal"), + utf16!("isSealed"), + utf16!("freeze"), + utf16!("isFrozen"), + utf16!("isExtensible"), + utf16!("hasOwnProperty"), + utf16!("isPrototypeOf"), + utf16!("setPrototypeOf"), + utf16!("getPrototypeOf"), + utf16!("defineProperty"), + utf16!("defineProperties"), + utf16!("deleteProperty"), + utf16!("construct"), + utf16!("hasOwn"), + utf16!("ownKeys"), + utf16!("keys"), + utf16!("values"), + utf16!("entries"), + utf16!("fromEntries"), + // Function object + utf16!("Function"), + utf16!("apply"), + utf16!("bind"), + utf16!("call"), + // Generator object + utf16!("Generator"), + // Array object + utf16!("Array"), + utf16!("at"), + utf16!("from"), + utf16!("isArray"), + utf16!("of"), + utf16!("copyWithin"), + utf16!("entries"), + utf16!("every"), + utf16!("fill"), + utf16!("filter"), + utf16!("find"), + utf16!("findIndex"), + utf16!("findLast"), + utf16!("findLastIndex"), + utf16!("flat"), + utf16!("flatMap"), + utf16!("forEach"), + utf16!("includes"), + utf16!("indexOf"), + utf16!("join"), + utf16!("map"), + utf16!("next"), + utf16!("reduce"), + utf16!("reduceRight"), + utf16!("reverse"), + utf16!("shift"), + utf16!("slice"), + utf16!("splice"), + utf16!("some"), + utf16!("sort"), + utf16!("unshift"), + utf16!("push"), + utf16!("pop"), + // String object + utf16!("String"), + utf16!("charAt"), + utf16!("charCodeAt"), + utf16!("codePointAt"), + utf16!("concat"), + utf16!("endsWith"), + utf16!("fromCharCode"), + utf16!("fromCodePoint"), + utf16!("includes"), + utf16!("indexOf"), + utf16!("lastIndexOf"), + utf16!("match"), + utf16!("matchAll"), + utf16!("normalize"), + utf16!("padEnd"), + utf16!("padStart"), + utf16!("raw"), + utf16!("repeat"), + utf16!("replace"), + utf16!("replaceAll"), + utf16!("search"), + utf16!("slice"), + utf16!("split"), + utf16!("startsWith"), + utf16!("substr"), + utf16!("substring"), + utf16!("toLocaleString"), + utf16!("toLowerCase"), + utf16!("toUpperCase"), + utf16!("trim"), + utf16!("trimEnd"), + utf16!("trimStart"), + // Number object + utf16!("Number"), + utf16!("Infinity"), + utf16!("NaN"), + utf16!("parseInt"), + utf16!("parseFloat"), + utf16!("isFinite"), + utf16!("isNaN"), + utf16!("parseInt"), + utf16!("EPSILON"), + utf16!("MAX_SAFE_INTEGER"), + utf16!("MIN_SAFE_INTEGER"), + utf16!("MAX_VALUE"), + utf16!("MIN_VALUE"), + utf16!("isSafeInteger"), + utf16!("isInteger"), + utf16!("toExponential"), + utf16!("toFixed"), + utf16!("toPrecision"), + // Boolean object + utf16!("Boolean"), + // BigInt object + utf16!("BigInt"), + utf16!("asIntN"), + utf16!("asUintN"), + // RegExp object + utf16!("RegExp"), + utf16!("exec"), + utf16!("test"), + utf16!("flags"), + utf16!("index"), + utf16!("lastIndex"), + utf16!("hasIndices"), + utf16!("ignoreCase"), + utf16!("multiline"), + utf16!("dotAll"), + utf16!("unicode"), + utf16!("sticky"), + utf16!("source"), + utf16!("get hasIndices"), + utf16!("get global"), + utf16!("get ignoreCase"), + utf16!("get multiline"), + utf16!("get dotAll"), + utf16!("get unicode"), + utf16!("get sticky"), + utf16!("get flags"), + utf16!("get source"), + // Symbol object + utf16!("Symbol"), + utf16!("for"), + utf16!("keyFor"), + utf16!("description"), + utf16!("asyncIterator"), + utf16!("hasInstance"), + utf16!("species"), + utf16!("Symbol.species"), + utf16!("unscopables"), + utf16!("iterator"), + utf16!("Symbol.iterator"), + utf16!("Symbol.match"), + utf16!("[Symbol.match]"), + utf16!("Symbol.matchAll"), + utf16!("Symbol.replace"), + utf16!("[Symbol.replace]"), + utf16!("Symbol.search"), + utf16!("[Symbol.search]"), + utf16!("Symbol.split"), + utf16!("[Symbol.split]"), + utf16!("toStringTag"), + utf16!("toPrimitive"), + utf16!("get description"), + // Map object + utf16!("Map"), + utf16!("clear"), + utf16!("delete"), + utf16!("has"), + utf16!("size"), + // Set object + utf16!("Set"), + utf16!("add"), + // Reflect object + utf16!("Reflect"), + // Proxy object + utf16!("Proxy"), + utf16!("revocable"), + // Error objects + utf16!("Error"), + utf16!("AggregateError"), + utf16!("TypeError"), + utf16!("RangeError"), + utf16!("SyntaxError"), + utf16!("ReferenceError"), + utf16!("EvalError"), + utf16!("ThrowTypeError"), + utf16!("URIError"), + utf16!("message"), + // Date object + utf16!("Date"), + utf16!("toJSON"), + utf16!("getDate"), + utf16!("getDay"), + utf16!("getFullYear"), + utf16!("getHours"), + utf16!("getMilliseconds"), + utf16!("getMinutes"), + utf16!("getMonth"), + utf16!("getSeconds"), + utf16!("getTime"), + utf16!("getYear"), + utf16!("getUTCDate"), + utf16!("getUTCDay"), + utf16!("getUTCFullYear"), + utf16!("getUTCHours"), + utf16!("getUTCMinutes"), + utf16!("getUTCMonth"), + utf16!("getUTCSeconds"), + utf16!("setDate"), + utf16!("setFullYear"), + utf16!("setHours"), + utf16!("setMilliseconds"), + utf16!("setMinutes"), + utf16!("setMonth"), + utf16!("setSeconds"), + utf16!("setYear"), + utf16!("setTime"), + utf16!("setUTCDate"), + utf16!("setUTCFullYear"), + utf16!("setUTCHours"), + utf16!("setUTCMinutes"), + utf16!("setUTCMonth"), + utf16!("setUTCSeconds"), + utf16!("toDateString"), + utf16!("toGMTString"), + utf16!("toISOString"), + utf16!("toTimeString"), + utf16!("toUTCString"), + utf16!("now"), + utf16!("UTC"), + // JSON object + utf16!("JSON"), + utf16!("parse"), + utf16!("stringify"), + // Iterator object + utf16!("Array Iterator"), + utf16!("Set Iterator"), + utf16!("String Iterator"), + utf16!("Map Iterator"), + utf16!("For In Iterator"), + // Math object + utf16!("Math"), + utf16!("LN10"), + utf16!("LN2"), + utf16!("LOG10E"), + utf16!("LOG2E"), + utf16!("PI"), + utf16!("SQRT1_2"), + utf16!("SQRT2"), + utf16!("abs"), + utf16!("acos"), + utf16!("acosh"), + utf16!("asin"), + utf16!("asinh"), + utf16!("atan"), + utf16!("atanh"), + utf16!("atan2"), + utf16!("cbrt"), + utf16!("ceil"), + utf16!("clz32"), + utf16!("cos"), + utf16!("cosh"), + utf16!("exp"), + utf16!("expm1"), + utf16!("floor"), + utf16!("fround"), + utf16!("hypot"), + utf16!("imul"), + utf16!("log"), + utf16!("log1p"), + utf16!("log10"), + utf16!("log2"), + utf16!("max"), + utf16!("min"), + utf16!("pow"), + utf16!("random"), + utf16!("round"), + utf16!("sign"), + utf16!("sin"), + utf16!("sinh"), + utf16!("sqrt"), + utf16!("tan"), + utf16!("tanh"), + utf16!("trunc"), + // Intl object + utf16!("Intl"), + utf16!("DateTimeFormat"), + // TypedArray object + utf16!("TypedArray"), + utf16!("ArrayBuffer"), + utf16!("Int8Array"), + utf16!("Uint8Array"), + utf16!("Int16Array"), + utf16!("Uint16Array"), + utf16!("Int32Array"), + utf16!("Uint32Array"), + utf16!("BigInt64Array"), + utf16!("BigUint64Array"), + utf16!("Float32Array"), + utf16!("Float64Array"), + utf16!("buffer"), + utf16!("byteLength"), + utf16!("byteOffset"), + utf16!("isView"), + utf16!("subarray"), + utf16!("get byteLength"), + utf16!("get buffer"), + utf16!("get byteOffset"), + utf16!("get size"), + utf16!("get length"), + // DataView object + utf16!("DataView"), + utf16!("getBigInt64"), + utf16!("getBigUint64"), + utf16!("getFloat32"), + utf16!("getFloat64"), + utf16!("getInt8"), + utf16!("getInt16"), + utf16!("getInt32"), + utf16!("getUint8"), + utf16!("getUint16"), + utf16!("getUint32"), + utf16!("setBigInt64"), + utf16!("setBigUint64"), + utf16!("setFloat32"), + utf16!("setFloat64"), + utf16!("setInt8"), + utf16!("setInt16"), + utf16!("setInt32"), + utf16!("setUint8"), + utf16!("setUint16"), + utf16!("setUint32"), + // Console object + utf16!("console"), + utf16!("assert"), + utf16!("debug"), + utf16!("error"), + utf16!("info"), + utf16!("trace"), + utf16!("warn"), + utf16!("exception"), + utf16!("count"), + utf16!("countReset"), + utf16!("group"), + utf16!("groupCollapsed"), + utf16!("groupEnd"), + utf16!("time"), + utf16!("timeLog"), + utf16!("timeEnd"), + utf16!("dir"), + utf16!("dirxml"), + // Minified name + utf16!("a"), + utf16!("b"), + utf16!("c"), + utf16!("d"), + utf16!("e"), + utf16!("f"), + utf16!("g"), + utf16!("h"), + utf16!("i"), + utf16!("j"), + utf16!("k"), + utf16!("l"), + utf16!("m"), + utf16!("n"), + utf16!("o"), + utf16!("p"), + utf16!("q"), + utf16!("r"), + utf16!("s"), + utf16!("t"), + utf16!("u"), + utf16!("v"), + utf16!("w"), + utf16!("x"), + utf16!("y"), + utf16!("z"), + utf16!("A"), + utf16!("B"), + utf16!("C"), + utf16!("D"), + utf16!("E"), + utf16!("F"), + utf16!("G"), + utf16!("H"), + utf16!("I"), + utf16!("J"), + utf16!("K"), + utf16!("L"), + utf16!("M"), + utf16!("N"), + utf16!("O"), + utf16!("P"), + utf16!("Q"), + utf16!("R"), + utf16!("S"), + utf16!("T"), + utf16!("U"), + utf16!("V"), + utf16!("W"), + utf16!("X"), + utf16!("Y"), + utf16!("Z"), + utf16!("_"), + utf16!("$"), +]; + +/// The maximum length of a string within [`COMMON_STRINGS`]. +/// +/// This is useful to skip checks for strings with lengths > `MAX_COMMON_STRING_LENGTH` and directly +/// allocate on the heap. +pub(super) const MAX_COMMON_STRING_LENGTH: usize = { + let mut max = 0; + let mut i = 0; + while i < COMMON_STRINGS.len() { + let len = COMMON_STRINGS[i].len(); + if len > max { + max = len; + } + i += 1; + } + max +}; + +thread_local! { + /// Map from a string inside [`COMMON_STRINGS`] to its corresponding static [`JsString`]. + pub(super) static COMMON_STRINGS_CACHE: FxHashMap<&'static [u16], JsString> = { + let mut constants = FxHashMap::with_capacity_and_hasher( + COMMON_STRINGS.len(), + BuildHasherDefault::::default(), + ); + + for (idx, &s) in COMMON_STRINGS.iter().enumerate() { + // Safety: + // As we're just building a cache of `JsString` indices to access the stored + // `COMMON_STRINGS`, this cannot generate invalid `TaggedJsString`s, since `idx` is + // always a valid index in `COMMON_STRINGS`. + let v = unsafe { + JsString { + ptr: TaggedJsString::new_static(idx), + } + }; + constants.insert(s, v); + } + + constants + }; +} diff --git a/boa_engine/src/string/mod.rs b/boa_engine/src/string/mod.rs new file mode 100644 index 00000000000..7b0d9888c5b --- /dev/null +++ b/boa_engine/src/string/mod.rs @@ -0,0 +1,1015 @@ +//! A UTF-16–encoded, reference counted, immutable string. +//! +//! This module contains the [`JsString`] type, the [`js_string`][crate::js_string] macro and the +//! [`utf16`] macro. +//! +//! The [`js_string`][crate::js_string] macro is used when you need to create a new [`JsString`], +//! and the [`utf16`] macro is used for const conversions of string literals to UTF-16. + +// Required per unsafe code standards to ensure every unsafe usage is properly documented. +// - `unsafe_op_in_unsafe_fn` will be warn-by-default in edition 2024: +// https://github.com/rust-lang/rust/issues/71668#issuecomment-1189396860 +// - `undocumented_unsafe_blocks` and `missing_safety_doc` requires a `Safety:` section in the +// comment or doc of the unsafe block or function, respectively. +#![deny( + unsafe_op_in_unsafe_fn, + clippy::undocumented_unsafe_blocks, + clippy::missing_safety_doc +)] +// Remove when/if https://github.com/rust-lang/rust/issues/95228 stabilizes. +// Right now this allows us to use the stable polyfill from the `sptr` crate, which uses +// the same names from the unstable functions of the `std::ptr` module. +#![allow(unstable_name_collisions)] + +mod common; + +use crate::{builtins::string::is_trimmable_whitespace, JsBigInt}; +use boa_gc::{unsafe_empty_trace, Finalize, Trace}; +pub use boa_macros::utf16; + +use std::{ + alloc::{alloc, dealloc, Layout}, + borrow::Borrow, + cell::Cell, + hash::{Hash, Hasher}, + ops::{Deref, Index}, + ptr::{self, NonNull}, + slice::SliceIndex, +}; + +use self::common::{COMMON_STRINGS, COMMON_STRINGS_CACHE, MAX_COMMON_STRING_LENGTH}; + +/// Utility macro to create a [`JsString`]. +/// +/// # Examples +/// +/// You can call the macro without arguments to create an empty `JsString`: +/// +/// ``` +/// use boa_engine::js_string; +/// use boa_engine::string::utf16; +/// +/// let empty_str = js_string!(); +/// assert!(empty_str.is_empty()); +/// ``` +/// +/// +/// You can create a `JsString` from a string literal, which completely skips the runtime +/// conversion from [`&str`] to [`&\[u16\]`]: +/// +/// ``` +/// # use boa_engine::js_string; +/// # use boa_engine::string::utf16; +/// let hw = js_string!("Hello, world!"); +/// assert_eq!(&hw, utf16!("Hello, world!")); +/// ``` +/// +/// Any `&[u16]` slice is a valid `JsString`, including unpaired surrogates: +/// +/// ``` +/// # use boa_engine::js_string; +/// let array = js_string!(&[0xD8AFu16, 0x00A0, 0xD8FF, 0x00F0]); +/// ``` +/// +/// You can also pass it any number of `&[u16]` as arguments to create a new `JsString` with +/// the concatenation of every slice: +/// +/// ``` +/// # use boa_engine::js_string; +/// # use boa_engine::string::utf16; +/// const NAME: &[u16] = utf16!("human! "); +/// let greeting = js_string!("Hello, "); +/// let msg = js_string!(&greeting, &NAME, utf16!("Nice to meet you!")); +/// +/// assert_eq!(&msg, utf16!("Hello, human! Nice to meet you!")); +/// ``` +#[macro_export] +macro_rules! js_string { + () => { + $crate::JsString::default() + }; + ($s:literal) => { + $crate::JsString::from($crate::string::utf16!($s)) + }; + ($s:expr) => { + $crate::JsString::from($s) + }; + ( $x:expr, $y:expr ) => { + $crate::JsString::concat($x, $y) + }; + ( $( $s:expr ),+ ) => { + $crate::JsString::concat_array(&[ $( $s ),+ ]) + }; +} + +/// Represents a Unicode codepoint within a [`JsString`], which could be a valid +/// '[Unicode scalar value]', or an unpaired surrogate. +/// +/// [Unicode scalar value]: https://www.unicode.org/glossary/#unicode_scalar_value +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +pub enum CodePoint { + Unicode(char), + UnpairedSurrogate(u16), +} + +impl CodePoint { + /// Get the number of UTF-16 code units needed to encode this code point. + pub fn code_unit_count(self) -> usize { + match self { + Self::Unicode(c) => c.len_utf16(), + Self::UnpairedSurrogate(_) => 1, + } + } + + /// Convert the code point to its [`u32`] representation. + pub fn as_u32(self) -> u32 { + match self { + Self::Unicode(c) => u32::from(c), + Self::UnpairedSurrogate(surr) => u32::from(surr), + } + } + + /// If the code point represents a valid 'Unicode scalar value', returns its [`char`] + /// representation, otherwise returns [`None`] on unpaired surrogates. + pub fn as_char(self) -> Option { + match self { + Self::Unicode(c) => Some(c), + Self::UnpairedSurrogate(_) => None, + } + } + + /// Encodes this code point as UTF-16 into the provided u16 buffer, and then returns the subslice + /// of the buffer that contains the encoded character. + /// + /// # Panics + /// + /// Panics if the buffer is not large enough. A buffer of length 2 is large enough to encode any + /// code point. + pub fn encode_utf16(self, dst: &mut [u16]) -> &mut [u16] { + match self { + CodePoint::Unicode(c) => c.encode_utf16(dst), + CodePoint::UnpairedSurrogate(surr) => { + dst[0] = surr; + &mut dst[0..=0] + } + } + } +} + +/// The raw representation of a [`JsString`] in the heap. +#[repr(C)] +struct RawJsString { + /// The UTF-16 length. + len: usize, + + /// The number of references to the string. + /// + /// When this reaches `0` the string is deallocated. + refcount: Cell, + + /// An empty array which is used to get the offset of string data. + data: [u16; 0], +} + +/// This struct uses a technique called tagged pointer to benefit from the fact that newly allocated +/// pointers are always word aligned on 64-bits platforms, making it impossible to have a LSB equal +/// to 1. More details about this technique on the article of Wikipedia about [tagged pointers][tagged_wp]. +/// +/// # Representation +/// +/// If the LSB of the internal [`NonNull`] is set (1), then the pointer address represents +/// an index value for [`COMMON_STRINGS`], where the remaining MSBs store the index. +/// Otherwise, the whole pointer represents the address of a heap allocated [`RawJsString`]. +/// +/// It uses [`NonNull`], which guarantees that [`TaggedJsString`] (and subsequently [`JsString`]) can +/// use the "null pointer optimization" to optimize the size of [`Option`]. +/// +/// # Provenance +/// +/// This struct stores a [`NonNull`] instead of a [`NonZeroUsize`][std::num::NonZeroUsize] +/// in order to preserve the provenance of our valid heap pointers. +/// On the other hand, all index values are just casted to invalid pointers, because we don't need to +/// preserve the provenance of [`usize`] indices. +/// +/// [tagged_wp]: https://en.wikipedia.org/wiki/Tagged_pointer +#[repr(transparent)] +#[derive(Debug, Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash)] +struct TaggedJsString(NonNull); + +impl TaggedJsString { + /// Creates a new [`TaggedJsString`] from a pointer to a valid [`RawJsString`]. + /// + /// # Safety + /// + /// `inner` must point to a valid instance of [`RawJsString`], which should be deallocated only + /// by [`JsString`]. + #[inline] + unsafe fn new_heap(inner: NonNull) -> Self { + Self(inner) + } + + /// Creates a new static [`TaggedJsString`] from the index of an element inside + /// [`COMMON_STRINGS`]. + /// + /// # Safety + /// + /// `idx` must be a valid index on [`COMMON_STRINGS`]. + #[inline] + const unsafe fn new_static(idx: usize) -> Self { + // SAFETY: + // The operation `(idx << 1) | 1` sets the least significant bit to 1, meaning any pointer + // (valid or invalid) created using this address cannot be null. + unsafe { Self(NonNull::new_unchecked(sptr::invalid_mut((idx << 1) | 1))) } + } + + /// Checks if [`TaggedJsString`] contains an index for [`COMMON_STRINGS`]. + #[inline] + fn is_static(self) -> bool { + (self.0.as_ptr() as usize) & 1 == 1 + } + + /// Returns a reference to a string stored on the heap, without checking if its internal pointer + /// is valid. + /// + /// # Safety + /// + /// `self` must be a heap allocated [`RawJsString`]. + #[inline] + const unsafe fn get_heap_unchecked(self) -> NonNull { + self.0 + } + + /// Returns the string inside [`COMMON_STRINGS`] corresponding to the index inside + /// [`TaggedJsString`], without checking its validity. + /// + /// # Safety + /// + /// `self` must not be a pointer to a heap allocated [`RawJsString`], and it must be a valid + /// index inside [`COMMON_STRINGS`]. + #[inline] + unsafe fn get_static_unchecked(self) -> &'static [u16] { + // SAFETY: + // The caller must ensure `self` is a valid index inside `COMMON_STRINGS`. + unsafe { COMMON_STRINGS.get_unchecked((self.0.as_ptr() as usize) >> 1) } + } +} + +/// Enum representing either a reference to a heap allocated [`RawJsString`] or a static reference to +/// a [`\[u16\]`][std::slice] inside [`COMMON_STRINGS`]. +enum JsStringPtrKind<'a> { + // A string allocated on the heap. + Heap(&'a mut RawJsString), + // A static string slice. + Static(&'static [u16]), +} + +/// A UTF-16–encoded, reference counted, immutable string. +/// +/// This is pretty similar to a [Rc][std::rc::Rc]\<[\[u16\]][std::slice]\>, but without +/// the length metadata associated with the [`Rc`][std::rc::Rc] fat pointer. Instead, the length of +/// every string is stored on the heap, along with its reference counter and its data. +/// +/// We define some commonly used string constants in an interner. For these strings, we don't allocate +/// memory on the heap to reduce the overhead of memory allocation and reference counting. +/// +/// # Deref +/// +/// [`JsString`] implements [Deref], inheriting all of +/// [`\[u16\]`][std::slice]'s methods. +#[derive(Finalize)] +pub struct JsString { + ptr: TaggedJsString, +} + +// Safety: `JsString` does not contain any objects which needs to be traced, so this is safe. +unsafe impl Trace for JsString { + unsafe_empty_trace!(); +} + +impl JsString { + /// Obtains the underlying [`&[u16]`][std::slice] slice of a [`JsString`] + pub fn as_slice(&self) -> &[u16] { + self + } + + /// Creates a new [`JsString`] from the concatenation of `x` and `y`. + pub fn concat(x: &[u16], y: &[u16]) -> Self { + Self::concat_array(&[x, y]) + } + + /// Creates a new [`JsString`] from the concatenation of every element of + /// `strings`. + pub fn concat_array(strings: &[&[u16]]) -> Self { + let full_count = strings.iter().fold(0, |len, s| len + s.len()); + + let ptr = Self::allocate_inner(full_count); + + let string = { + // SAFETY: + // `ptr` being a `NonNull` ensures that a dereference of its underlying pointer is always valid. + let mut data = unsafe { (*ptr.as_ptr()).data.as_mut_ptr() }; + for string in strings { + let count = string.len(); + // SAFETY: + // The sum of all `count` for each `string` equals `full_count`, and since we're + // iteratively writing each of them to `data`, `copy_non_overlapping` always stays + // in-bounds for `count` reads of each string and `full_count` writes to `data`. + // + // Each `string` must be properly aligned to be a valid slice, and `data` must be + // properly aligned by `allocate_inner`. + // + // `allocate_inner` must return a valid pointer to newly allocated memory, meaning + // `ptr` and all `string`s should never overlap. + unsafe { + ptr::copy_nonoverlapping(string.as_ptr(), data, count); + data = data.add(count); + } + } + Self { + // Safety: We already know it's a valid heap pointer. + ptr: unsafe { TaggedJsString::new_heap(ptr) }, + } + }; + + if string.len() <= MAX_COMMON_STRING_LENGTH { + if let Some(constant) = COMMON_STRINGS_CACHE.with(|c| c.get(&string[..]).cloned()) { + return constant; + } + } + + string + } + + /// Decodes a [`JsString`] into a [`String`], replacing invalid data with its escaped representation + /// in 4 digit hexadecimal. + pub fn to_std_string_escaped(&self) -> String { + self.to_string_escaped() + } + + /// Decodes a [`JsString`] into a [`String`], returning + /// [`FromUtf16Error`][std::string::FromUtf16Error] if it contains any invalid data. + pub fn to_std_string(&self) -> Result { + String::from_utf16(self) + } + + /// Gets an iterator of all the Unicode codepoints of a [`JsString`]. + pub fn code_points(&self) -> impl Iterator + '_ { + char::decode_utf16(self.iter().copied()).map(|res| match res { + Ok(c) => CodePoint::Unicode(c), + Err(e) => CodePoint::UnpairedSurrogate(e.unpaired_surrogate()), + }) + } + + /// Abstract operation `StringIndexOf ( string, searchValue, fromIndex )` + /// + /// Note: Instead of returning an isize with `-1` as the "not found" value, we make use of the + /// type system and return [Option]\ with [`None`] as the "not found" value. + /// + /// More information: + /// - [ECMAScript reference][spec] + /// + /// [spec]: https://tc39.es/ecma262/#sec-stringindexof + pub(crate) fn index_of(&self, search_value: &[u16], from_index: usize) -> Option { + // 1. Assert: Type(string) is String. + // 2. Assert: Type(searchValue) is String. + // 3. Assert: fromIndex is a non-negative integer. + + // 4. Let len be the length of string. + let len = self.len(); + + // 5. If searchValue is the empty String and fromIndex ≤ len, return fromIndex. + if search_value.is_empty() { + return if from_index <= len { + Some(from_index) + } else { + None + }; + } + + // 6. Let searchLen be the length of searchValue. + // 7. For each integer i starting with fromIndex such that i ≤ len - searchLen, in ascending order, do + // a. Let candidate be the substring of string from i to i + searchLen. + // b. If candidate is the same sequence of code units as searchValue, return i. + // 8. Return -1. + self.windows(search_value.len()) + .skip(from_index) + .position(|s| s == search_value) + .map(|i| i + from_index) + } + + /// Abstract operation `CodePointAt( string, position )`. + /// + /// The abstract operation `CodePointAt` takes arguments `string` (a String) and `position` (a + /// non-negative integer) and returns a Record with fields `[[CodePoint]]` (a code point), + /// `[[CodeUnitCount]]` (a positive integer), and `[[IsUnpairedSurrogate]]` (a Boolean). It + /// interprets string as a sequence of UTF-16 encoded code points, as described in 6.1.4, and reads + /// from it a single code point starting with the code unit at index `position`. + /// + /// More information: + /// - [ECMAScript reference][spec] + /// + /// [spec]: https://tc39.es/ecma262/#sec-codepointat + pub(crate) fn code_point_at(&self, position: usize) -> CodePoint { + // 1. Let size be the length of string. + let size = self.len(); + + // 2. Assert: position ≥ 0 and position < size. + // position >= 0 ensured by position: usize + assert!(position < size); + + // 3. Let first be the code unit at index position within string. + // 4. Let cp be the code point whose numeric value is that of first. + // 5. If first is not a leading surrogate or trailing surrogate, then + // a. Return the Record { [[CodePoint]]: cp, [[CodeUnitCount]]: 1, [[IsUnpairedSurrogate]]: false }. + // 6. If first is a trailing surrogate or position + 1 = size, then + // a. Return the Record { [[CodePoint]]: cp, [[CodeUnitCount]]: 1, [[IsUnpairedSurrogate]]: true }. + // 7. Let second be the code unit at index position + 1 within string. + // 8. If second is not a trailing surrogate, then + // a. Return the Record { [[CodePoint]]: cp, [[CodeUnitCount]]: 1, [[IsUnpairedSurrogate]]: true }. + // 9. Set cp to ! UTF16SurrogatePairToCodePoint(first, second). + + // We can skip the checks and instead use the `char::decode_utf16` function to take care of that for us. + let code_point = self + .get(position..=position + 1) + .unwrap_or(&self[position..=position]); + + match char::decode_utf16(code_point.iter().copied()) + .next() + .expect("code_point always has a value") + { + Ok(c) => CodePoint::Unicode(c), + Err(e) => CodePoint::UnpairedSurrogate(e.unpaired_surrogate()), + } + } + + /// Abstract operation `StringToNumber ( str )` + /// + /// More information: + /// - [ECMAScript reference][spec] + /// + /// [spec]: https://tc39.es/ecma262/#sec-stringtonumber + #[allow(clippy::question_mark)] + pub(crate) fn to_number(&self) -> f64 { + // 1. Let text be ! StringToCodePoints(str). + // 2. Let literal be ParseText(text, StringNumericLiteral). + let string = if let Ok(string) = self.to_std_string() { + string + } else { + // 3. If literal is a List of errors, return NaN. + return f64::NAN; + }; + // 4. Return StringNumericValue of literal. + let string = string.trim_matches(is_trimmable_whitespace); + match string { + "" => return 0.0, + "-Infinity" => return f64::NEG_INFINITY, + "Infinity" | "+Infinity" => return f64::INFINITY, + _ => {} + } + + let mut s = string.bytes(); + let base = match (s.next(), s.next()) { + (Some(b'0'), Some(b'b' | b'B')) => Some(2), + (Some(b'0'), Some(b'o' | b'O')) => Some(8), + (Some(b'0'), Some(b'x' | b'X')) => Some(16), + _ => None, + }; + + // Parse numbers that begin with `0b`, `0o` and `0x`. + if let Some(base) = base { + let string = &string[2..]; + if string.is_empty() { + return f64::NAN; + } + + // Fast path + if let Ok(value) = u32::from_str_radix(string, base) { + return f64::from(value); + } + + // Slow path + let mut value = 0.0; + for c in s { + if let Some(digit) = char::from(c).to_digit(base) { + value = value * f64::from(base) + f64::from(digit); + } else { + return f64::NAN; + } + } + return value; + } + + match string { + // Handle special cases so `fast_float` does not return infinity. + "inf" | "+inf" | "-inf" => f64::NAN, + string => fast_float::parse(string).unwrap_or(f64::NAN), + } + } + + /// Abstract operation `StringToBigInt ( str )` + /// + /// More information: + /// - [ECMAScript reference][spec] + /// + /// [spec]: https://tc39.es/ecma262/#sec-stringtobigint + pub(crate) fn to_big_int(&self) -> Option { + // 1. Let text be ! StringToCodePoints(str). + // 2. Let literal be ParseText(text, StringIntegerLiteral). + // 3. If literal is a List of errors, return undefined. + // 4. Let mv be the MV of literal. + // 5. Assert: mv is an integer. + // 6. Return ℤ(mv). + JsBigInt::from_string(self.to_std_string().ok().as_ref()?) + } + + /// Returns the inner pointer data, unwrapping its tagged data if the pointer contains a static + /// index for [`COMMON_STRINGS`]. + #[inline] + fn ptr(&self) -> JsStringPtrKind<'_> { + // Check the first bit to 1. + if self.ptr.is_static() { + // Safety: We already checked. + JsStringPtrKind::Static(unsafe { self.ptr.get_static_unchecked() }) + } else { + // Safety: We already checked. + JsStringPtrKind::Heap(unsafe { self.ptr.get_heap_unchecked().as_mut() }) + } + } + + // This is marked as safe because it is always valid to call this function to request any number + // of `u16`, since this function ought to fail on an OOM error. + /// Allocates a new [`RawJsString`] with an internal capacity of `str_len` chars. + fn allocate_inner(str_len: usize) -> NonNull { + let (layout, offset) = Layout::array::(str_len) + .and_then(|arr| Layout::new::().extend(arr)) + .map(|(layout, offset)| (layout.pad_to_align(), offset)) + .expect("failed to create memory layout"); + + // SAFETY: + // The layout size of `RawJsString` is never zero, since it has to store + // the length of the string and the reference count. + let inner = unsafe { alloc(layout).cast::() }; + + // We need to verify that the pointer returned by `alloc` is not null, otherwise + // we should abort, since an allocation error is pretty unrecoverable for us + // right now. + let inner = NonNull::new(inner).unwrap_or_else(|| std::alloc::handle_alloc_error(layout)); + + // SAFETY: + // `NonNull` verified for us that the pointer returned by `alloc` is valid, + // meaning we can write to its pointed memory. + unsafe { + // Write the first part, the `RawJsString`. + inner.as_ptr().write(RawJsString { + len: str_len, + refcount: Cell::new(1), + data: [0; 0], + }); + } + + debug_assert!({ + let inner = inner.as_ptr(); + // SAFETY: + // - `inner` must be a valid pointer, since it comes from a `NonNull`, + // meaning we can safely dereference it to `RawJsString`. + // - `offset` should point us to the beginning of the array, + // and since we requested an `RawJsString` layout with a trailing + // `[u16; str_len]`, the memory of the array must be in the `usize` + // range for the allocation to succeed. + unsafe { + let data = (*inner).data.as_ptr(); + ptr::eq(inner.cast::().add(offset).cast(), data) + } + }); + + inner + } + + /// Creates a new [`JsString`] from `data`, without checking if the string is in the interner. + fn from_slice_skip_interning(data: &[u16]) -> Self { + let count = data.len(); + let ptr = Self::allocate_inner(count); + // SAFETY: + // - We read `count = data.len()` elements from `data`, which is within the bounds of the slice. + // - `allocate_inner` must allocate at least `count` elements, which allows us to safely + // write at least `count` elements. + // - `allocate_inner` should already take care of the alignment of `ptr`, and `data` must be + // aligned to be a valid slice. + // - `allocate_inner` must return a valid pointer to newly allocated memory, meaning `ptr` + // and `data` should never overlap. + unsafe { + ptr::copy_nonoverlapping(data.as_ptr(), (*ptr.as_ptr()).data.as_mut_ptr(), count); + } + Self { + // Safety: We already know it's a valid heap pointer. + ptr: unsafe { TaggedJsString::new_heap(ptr) }, + } + } +} + +impl AsRef<[u16]> for JsString { + fn as_ref(&self) -> &[u16] { + self + } +} + +impl Borrow<[u16]> for JsString { + fn borrow(&self) -> &[u16] { + self + } +} + +impl Clone for JsString { + #[inline] + fn clone(&self) -> Self { + if let JsStringPtrKind::Heap(inner) = self.ptr() { + inner.refcount.set(inner.refcount.get() + 1); + } + Self { ptr: self.ptr } + } +} + +impl Default for JsString { + #[inline] + fn default() -> Self { + sa::const_assert!(!COMMON_STRINGS.is_empty()); + // Safety: + // `COMMON_STRINGS` must not be empty for this to be safe. + // The static assertion above verifies this. + unsafe { + Self { + ptr: TaggedJsString::new_static(0), + } + } + } +} + +impl Drop for JsString { + #[inline] + fn drop(&mut self) { + if let JsStringPtrKind::Heap(inner) = self.ptr() { + inner.refcount.set(inner.refcount.get() - 1); + if inner.refcount.get() == 0 { + // SAFETY: + // All the checks for the validity of the layout have already been made on `alloc_inner`, + // so we can skip the unwrap. + let layout = unsafe { + Layout::for_value(inner) + .extend(Layout::array::(inner.len).unwrap_unchecked()) + .unwrap_unchecked() + .0 + .pad_to_align() + }; + + // Safety: + // If refcount is 0 and we call drop, that means this is the last `JsString` which + // points to this memory allocation, so deallocating it is safe. + unsafe { + dealloc((inner as *mut RawJsString).cast(), layout); + } + } + } + } +} + +impl std::fmt::Debug for JsString { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + std::char::decode_utf16(self.as_slice().to_owned()) + .map(|r| { + r.map_or_else( + |err| format!("<0x{:04x}>", err.unpaired_surrogate()), + String::from, + ) + }) + .collect::() + .fmt(f) + } +} + +impl Deref for JsString { + type Target = [u16]; + + fn deref(&self) -> &Self::Target { + match self.ptr() { + JsStringPtrKind::Heap(h) => { + // SAFETY: + // - The `RawJsString` type has all the necessary information to reconstruct a valid + // slice (length and starting pointer). + // + // - We aligned `h.data` on allocation, and the block is of size `h.len`, so this + // should only generate valid reads. + // + // - The lifetime of `&Self::Target` is shorter than the lifetime of `self`, as seen + // by its signature, so this doesn't outlive `self`. + unsafe { std::slice::from_raw_parts(h.data.as_ptr(), h.len) } + } + JsStringPtrKind::Static(s) => s, + } + } +} + +impl Eq for JsString {} + +impl From<&[u16]> for JsString { + fn from(s: &[u16]) -> Self { + if s.len() <= MAX_COMMON_STRING_LENGTH { + if let Some(constant) = COMMON_STRINGS_CACHE.with(|c| c.get(s).cloned()) { + return constant; + } + } + Self::from_slice_skip_interning(s) + } +} + +impl From> for JsString { + fn from(vec: Vec) -> Self { + JsString::from(&vec[..]) + } +} + +impl From<&str> for JsString { + #[inline] + fn from(s: &str) -> Self { + let s = s.encode_utf16().collect::>(); + + Self::from(&s[..]) + } +} + +impl From for JsString { + #[inline] + fn from(s: String) -> Self { + Self::from(s.as_str()) + } +} + +impl From<&[u16; N]> for JsString { + #[inline] + fn from(s: &[u16; N]) -> Self { + Self::from(&s[..]) + } +} + +impl Hash for JsString { + fn hash(&self, state: &mut H) { + self[..].hash(state); + } +} + +impl> Index for JsString { + type Output = I::Output; + + #[inline] + fn index(&self, index: I) -> &Self::Output { + Index::index(&**self, index) + } +} + +impl Ord for JsString { + fn cmp(&self, other: &Self) -> std::cmp::Ordering { + self[..].cmp(other) + } +} + +impl PartialEq for JsString { + fn eq(&self, other: &Self) -> bool { + if self.ptr == other.ptr { + return true; + } + + self[..] == other[..] + } +} + +impl PartialEq for [u16] { + fn eq(&self, other: &JsString) -> bool { + self == &**other + } +} + +impl PartialEq for [u16; N] { + fn eq(&self, other: &JsString) -> bool { + self[..] == *other + } +} + +impl PartialEq<[u16]> for JsString { + fn eq(&self, other: &[u16]) -> bool { + &**self == other + } +} + +impl PartialEq<[u16; N]> for JsString { + fn eq(&self, other: &[u16; N]) -> bool { + *self == other[..] + } +} + +impl PartialOrd for JsString { + fn partial_cmp(&self, other: &Self) -> Option { + self[..].partial_cmp(other) + } +} + +/// Utility trait that adds trimming functionality to every `UTF-16` string. +pub(crate) trait Utf16Trim { + /// Trims both leading and trailing space from `self`. + fn trim(&self) -> &Self { + self.trim_start().trim_end() + } + + /// Trims all leading space from `self`. + fn trim_start(&self) -> &Self; + + /// Trims all trailing space from `self`. + fn trim_end(&self) -> &Self; +} + +impl Utf16Trim for [u16] { + fn trim_start(&self) -> &Self { + if let Some(left) = self.iter().copied().position(|r| { + !char::from_u32(u32::from(r)) + .map(is_trimmable_whitespace) + .unwrap_or_default() + }) { + &self[left..] + } else { + &[] + } + } + fn trim_end(&self) -> &Self { + if let Some(right) = self.iter().copied().rposition(|r| { + !char::from_u32(u32::from(r)) + .map(is_trimmable_whitespace) + .unwrap_or_default() + }) { + &self[..=right] + } else { + &[] + } + } +} + +/// Utility trait that adds a `UTF-16` escaped representation to every +/// [`[u16]`][std::slice]. +pub(crate) trait ToStringEscaped { + /// Decodes `self` as an `UTF-16` encoded string, escaping any unpaired surrogates by its + /// codepoint value. + fn to_string_escaped(&self) -> String; +} + +impl ToStringEscaped for [u16] { + fn to_string_escaped(&self) -> String { + char::decode_utf16(self.iter().copied()) + .map(|r| match r { + Ok(c) => String::from(c), + Err(e) => format!("\\u{:04X}", e.unpaired_surrogate()), + }) + .collect() + } +} +#[cfg(test)] +mod tests { + use super::utf16; + use super::{JsString, JsStringPtrKind}; + use std::mem::size_of; + + impl JsString { + /// Gets the number of `JsString`s which point to this allocation. + #[inline] + fn refcount(&self) -> Option { + match self.ptr() { + JsStringPtrKind::Heap(inner) => Some(inner.refcount.get()), + JsStringPtrKind::Static(_inner) => None, + } + } + } + + #[test] + fn empty() { + let s = js_string!(); + assert_eq!(*s, "".encode_utf16().collect::>()); + } + + #[test] + fn pointer_size() { + assert_eq!(size_of::(), size_of::<*const ()>()); + assert_eq!(size_of::>(), size_of::<*const ()>()); + } + + #[test] + fn refcount() { + let x = js_string!("Hello world"); + assert_eq!(x.refcount(), Some(1)); + + { + let y = x.clone(); + assert_eq!(x.refcount(), Some(2)); + assert_eq!(y.refcount(), Some(2)); + + { + let z = y.clone(); + assert_eq!(x.refcount(), Some(3)); + assert_eq!(y.refcount(), Some(3)); + assert_eq!(z.refcount(), Some(3)); + } + + assert_eq!(x.refcount(), Some(2)); + assert_eq!(y.refcount(), Some(2)); + } + + assert_eq!(x.refcount(), Some(1)); + } + + #[test] + fn static_refcount() { + let x = js_string!(); + assert_eq!(x.refcount(), None); + + { + let y = x.clone(); + assert_eq!(x.refcount(), None); + assert_eq!(y.refcount(), None); + }; + + assert_eq!(x.refcount(), None); + } + + #[test] + fn ptr_eq() { + let x = js_string!("Hello"); + let y = x.clone(); + + assert!(!x.ptr.is_static()); + + assert_eq!(x.ptr, y.ptr); + + let z = js_string!("Hello"); + assert_ne!(x.ptr, z.ptr); + assert_ne!(y.ptr, z.ptr); + } + + #[test] + fn static_ptr_eq() { + let x = js_string!(); + let y = x.clone(); + + assert!(x.ptr.is_static()); + + assert_eq!(x.ptr, y.ptr); + + let z = js_string!(); + assert_eq!(x.ptr, z.ptr); + assert_eq!(y.ptr, z.ptr); + } + + #[test] + fn as_str() { + const HELLO: &str = "Hello"; + let x = js_string!(HELLO); + + assert_eq!(*x, HELLO.encode_utf16().collect::>()); + } + + #[test] + fn hash() { + use std::collections::hash_map::DefaultHasher; + use std::hash::{Hash, Hasher}; + + const HELLOWORLD: &[u16] = utf16!("Hello World!"); + let x = js_string!(HELLOWORLD); + + assert_eq!(&*x, HELLOWORLD); + + let mut hasher = DefaultHasher::new(); + HELLOWORLD.hash(&mut hasher); + let s_hash = hasher.finish(); + + let mut hasher = DefaultHasher::new(); + x.hash(&mut hasher); + let x_hash = hasher.finish(); + + assert_eq!(s_hash, x_hash); + } + + #[test] + fn concat() { + const Y: &[u16] = utf16!(", "); + const W: &[u16] = utf16!("!"); + + let x = js_string!("hello"); + let z = js_string!("world"); + + let xy = js_string!(&x, Y); + assert_eq!(&xy, utf16!("hello, ")); + assert_eq!(xy.refcount(), Some(1)); + + let xyz = js_string!(&xy, &z); + assert_eq!(&xyz, utf16!("hello, world")); + assert_eq!(xyz.refcount(), Some(1)); + + let xyzw = js_string!(&xyz, W); + assert_eq!(&xyzw, utf16!("hello, world!")); + assert_eq!(xyzw.refcount(), Some(1)); + } +} diff --git a/boa_engine/src/symbol.rs b/boa_engine/src/symbol.rs index bbd013978f3..f500b86077a 100644 --- a/boa_engine/src/symbol.rs +++ b/boa_engine/src/symbol.rs @@ -15,11 +15,10 @@ //! [spec]: https://tc39.es/ecma262/#sec-symbol-value //! [mdn]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Symbol -use crate::JsString; +use crate::{js_string, string::utf16, JsString}; use boa_gc::{unsafe_empty_trace, Finalize, Trace}; use std::{ cell::Cell, - fmt::{self, Display}, hash::{Hash, Hasher}, rc::Rc, }; @@ -31,7 +30,7 @@ use std::{ ///# use boa_engine::symbol::WellKnownSymbols; /// /// let iterator = WellKnownSymbols::iterator(); -/// assert_eq!(iterator.description().as_deref(), Some("Symbol.iterator")); +/// assert_eq!(iterator.description().unwrap().to_std_string_escaped(), "Symbol.iterator"); /// ``` /// This is equivalent to `let iterator = Symbol.iterator` in JavaScript. #[derive(Debug, Clone)] @@ -303,15 +302,18 @@ impl JsSymbol { /// /// [spec]: https://tc39.es/ecma262/#sec-symboldescriptivestring pub fn descriptive_string(&self) -> JsString { - self.to_string().into() + self.inner.description.as_ref().map_or_else( + || js_string!("Symbol()"), + |desc| js_string!(utf16!("Symbol("), desc, utf16!(")")), + ) } } -impl Display for JsSymbol { +impl std::fmt::Display for JsSymbol { #[inline] - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match &self.inner.description { - Some(desc) => write!(f, "Symbol({desc})"), + Some(desc) => write!(f, "Symbol({})", desc.to_std_string_escaped()), None => write!(f, "Symbol()"), } } diff --git a/boa_engine/src/syntax/ast/keyword.rs b/boa_engine/src/syntax/ast/keyword.rs index 920ac1cbfdc..b5c754da5c0 100644 --- a/boa_engine/src/syntax/ast/keyword.rs +++ b/boa_engine/src/syntax/ast/keyword.rs @@ -7,7 +7,10 @@ //! [spec]: https://tc39.es/ecma262/#sec-keywords-and-reserved-words //! [mdn]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Lexical_grammar#Keywords -use crate::syntax::ast::op::{BinOp, CompOp}; +use crate::{ + string::utf16, + syntax::ast::op::{BinOp, CompOp}, +}; use boa_interner::{Interner, Sym}; use std::{convert::TryInto, error, fmt, str::FromStr}; @@ -491,56 +494,57 @@ impl Keyword { } } - /// Gets the keyword as a string. - pub fn as_str(self) -> &'static str { + /// Gets the keyword as a tuple of strings. + pub fn as_str(self) -> (&'static str, &'static [u16]) { match self { - Self::Await => "await", - Self::Async => "async", - Self::Break => "break", - Self::Case => "case", - Self::Catch => "catch", - Self::Class => "class", - Self::Continue => "continue", - Self::Const => "const", - Self::Debugger => "debugger", - Self::Default => "default", - Self::Delete => "delete", - Self::Do => "do", - Self::Else => "else", - Self::Enum => "enum", - Self::Extends => "extends", - Self::Export => "export", - Self::False => "false", - Self::Finally => "finally", - Self::For => "for", - Self::Function => "function", - Self::If => "if", - Self::In => "in", - Self::InstanceOf => "instanceof", - Self::Import => "import", - Self::Let => "let", - Self::New => "new", - Self::Null => "null", - Self::Of => "of", - Self::Return => "return", - Self::Super => "super", - Self::Switch => "switch", - Self::This => "this", - Self::Throw => "throw", - Self::True => "true", - Self::Try => "try", - Self::TypeOf => "typeof", - Self::Var => "var", - Self::Void => "void", - Self::While => "while", - Self::With => "with", - Self::Yield => "yield", + Self::Await => ("await", utf16!("await")), + Self::Async => ("async", utf16!("async")), + Self::Break => ("break", utf16!("break")), + Self::Case => ("case", utf16!("case")), + Self::Catch => ("catch", utf16!("catch")), + Self::Class => ("class", utf16!("class")), + Self::Continue => ("continue", utf16!("continue")), + Self::Const => ("const", utf16!("const")), + Self::Debugger => ("debugger", utf16!("debugger")), + Self::Default => ("default", utf16!("default")), + Self::Delete => ("delete", utf16!("delete")), + Self::Do => ("do", utf16!("do")), + Self::Else => ("else", utf16!("else")), + Self::Enum => ("enum", utf16!("enum")), + Self::Extends => ("extends", utf16!("extends")), + Self::Export => ("export", utf16!("export")), + Self::False => ("false", utf16!("false")), + Self::Finally => ("finally", utf16!("finally")), + Self::For => ("for", utf16!("for")), + Self::Function => ("function", utf16!("function")), + Self::If => ("if", utf16!("if")), + Self::In => ("in", utf16!("in")), + Self::InstanceOf => ("instanceof", utf16!("instanceof")), + Self::Import => ("import", utf16!("import")), + Self::Let => ("let", utf16!("let")), + Self::New => ("new", utf16!("new")), + Self::Null => ("null", utf16!("null")), + Self::Of => ("of", utf16!("of")), + Self::Return => ("return", utf16!("return")), + Self::Super => ("super", utf16!("super")), + Self::Switch => ("switch", utf16!("switch")), + Self::This => ("this", utf16!("this")), + Self::Throw => ("throw", utf16!("throw")), + Self::True => ("true", utf16!("true")), + Self::Try => ("try", utf16!("try")), + Self::TypeOf => ("typeof", utf16!("typeof")), + Self::Var => ("var", utf16!("var")), + Self::Void => ("void", utf16!("void")), + Self::While => ("while", utf16!("while")), + Self::With => ("with", utf16!("with")), + Self::Yield => ("yield", utf16!("yield")), } } /// Converts the keyword to a symbol in the given interner. pub fn to_sym(self, interner: &mut Interner) -> Sym { - interner.get_or_intern_static(self.as_str()) + let (utf8, utf16) = self.as_str(); + interner.get_or_intern_static(utf8, utf16) } } @@ -623,6 +627,6 @@ impl FromStr for Keyword { impl fmt::Display for Keyword { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - fmt::Display::fmt(self.as_str(), f) + fmt::Display::fmt(self.as_str().0, f) } } diff --git a/boa_engine/src/syntax/ast/node/identifier/mod.rs b/boa_engine/src/syntax/ast/node/identifier/mod.rs index 16d93fb2d7d..62d7fbef80e 100644 --- a/boa_engine/src/syntax/ast/node/identifier/mod.rs +++ b/boa_engine/src/syntax/ast/node/identifier/mod.rs @@ -1,8 +1,11 @@ //! Local identifier node. -use crate::syntax::{ - ast::{node::Node, Position}, - parser::ParseError, +use crate::{ + string::ToStringEscaped, + syntax::{ + ast::{node::Node, Position}, + parser::ParseError, + }, }; use boa_interner::{Interner, Sym, ToInternedString}; @@ -63,7 +66,11 @@ impl Identifier { impl ToInternedString for Identifier { fn to_interned_string(&self, interner: &Interner) -> String { - interner.resolve_expect(self.ident).to_owned() + interner.resolve_expect(self.ident).join( + String::from, + ToStringEscaped::to_string_escaped, + true, + ) } } diff --git a/boa_engine/src/syntax/ast/node/object/mod.rs b/boa_engine/src/syntax/ast/node/object/mod.rs index 89d920c411c..e9156dfde5b 100644 --- a/boa_engine/src/syntax/ast/node/object/mod.rs +++ b/boa_engine/src/syntax/ast/node/object/mod.rs @@ -1,5 +1,5 @@ //! Object node. - +use crate::string::ToStringEscaped; use crate::syntax::ast::{ node::{ declaration::block_to_string, join_nodes, AsyncFunctionExpr, AsyncGeneratorExpr, @@ -420,7 +420,11 @@ impl PropertyName { impl ToInternedString for PropertyName { fn to_interned_string(&self, interner: &Interner) -> String { match self { - PropertyName::Literal(key) => interner.resolve_expect(*key).to_owned(), + PropertyName::Literal(key) => interner.resolve_expect(*key).join( + String::from, + ToStringEscaped::to_string_escaped, + true, + ), PropertyName::Computed(key) => key.to_interned_string(interner), } } diff --git a/boa_engine/src/syntax/ast/node/template/mod.rs b/boa_engine/src/syntax/ast/node/template/mod.rs index e306cf2f046..75c003d3e4f 100644 --- a/boa_engine/src/syntax/ast/node/template/mod.rs +++ b/boa_engine/src/syntax/ast/node/template/mod.rs @@ -1,5 +1,7 @@ //! Template literal node. +use crate::string::ToStringEscaped; + use super::Node; use boa_interner::{Interner, Sym, ToInternedString}; @@ -44,7 +46,12 @@ impl ToInternedString for TemplateLit { for elt in self.elements.iter() { match elt { - TemplateElement::String(s) => buf.push_str(interner.resolve_expect(*s)), + TemplateElement::String(s) => interner.resolve_expect(*s).join_with_context( + |s, buf| buf.push_str(s), + |js, buf| buf.push_str(&js.to_string_escaped()), + &mut buf, + true, + ), TemplateElement::Expr(n) => { buf.push_str(&format!("${{{}}}", n.to_interned_string(interner))); } diff --git a/boa_engine/src/syntax/lexer/identifier.rs b/boa_engine/src/syntax/lexer/identifier.rs index 0722c6bf17d..e57c42dcfac 100644 --- a/boa_engine/src/syntax/lexer/identifier.rs +++ b/boa_engine/src/syntax/lexer/identifier.rs @@ -85,7 +85,7 @@ impl Tokenizer for Identifier { _ => TokenKind::Keyword((keyword, contains_escaped_chars)), } } else { - TokenKind::identifier(interner.get_or_intern(identifier_name)) + TokenKind::identifier(interner.get_or_intern(identifier_name.as_str())) }; Ok(Token::new(token_kind, Span::new(start_pos, cursor.pos()))) diff --git a/boa_engine/src/syntax/lexer/private_identifier.rs b/boa_engine/src/syntax/lexer/private_identifier.rs index d3ef44cbcbd..867461c8baf 100644 --- a/boa_engine/src/syntax/lexer/private_identifier.rs +++ b/boa_engine/src/syntax/lexer/private_identifier.rs @@ -43,14 +43,14 @@ impl Tokenizer for PrivateIdentifier { '\\' if cursor.peek()? == Some(b'u') => { let (name, _) = Identifier::take_identifier_name(cursor, start_pos, c)?; Ok(Token::new( - TokenKind::PrivateIdentifier(interner.get_or_intern(&name)), + TokenKind::PrivateIdentifier(interner.get_or_intern(name.as_str())), Span::new(start_pos, cursor.pos()), )) } _ if Identifier::is_identifier_start(c as u32) => { let (name, _) = Identifier::take_identifier_name(cursor, start_pos, c)?; Ok(Token::new( - TokenKind::PrivateIdentifier(interner.get_or_intern(&name)), + TokenKind::PrivateIdentifier(interner.get_or_intern(name.as_str())), Span::new(start_pos, cursor.pos()), )) } diff --git a/boa_engine/src/syntax/lexer/regex.rs b/boa_engine/src/syntax/lexer/regex.rs index ddee084f91f..5f695f42605 100644 --- a/boa_engine/src/syntax/lexer/regex.rs +++ b/boa_engine/src/syntax/lexer/regex.rs @@ -184,7 +184,7 @@ impl FromStr for RegExpFlags { fn parse_regex_flags(s: &str, start: Position, interner: &mut Interner) -> Result { match RegExpFlags::from_str(s) { Err(message) => Err(Error::Syntax(message.into(), start)), - Ok(flags) => Ok(interner.get_or_intern(flags.to_string())), + Ok(flags) => Ok(interner.get_or_intern(flags.to_string().as_str())), } } diff --git a/boa_engine/src/syntax/lexer/string.rs b/boa_engine/src/syntax/lexer/string.rs index c38e10f4267..ea5e58eec5b 100644 --- a/boa_engine/src/syntax/lexer/string.rs +++ b/boa_engine/src/syntax/lexer/string.rs @@ -91,7 +91,7 @@ impl Tokenizer for StringLiteral { Self::take_string_characters(cursor, start_pos, self.terminator, cursor.strict_mode())?; Ok(Token::new( - TokenKind::string_literal(interner.get_or_intern(lit)), + TokenKind::string_literal(interner.get_or_intern(&lit[..])), span, )) } @@ -118,7 +118,7 @@ impl StringLiteral { start_pos: Position, terminator: StringTerminator, is_strict_mode: bool, - ) -> Result<(String, Span), Error> + ) -> Result<(Vec, Span), Error> where R: Read, { @@ -157,7 +157,7 @@ impl StringLiteral { } } - Ok((buf.to_string_lossy(), Span::new(start_pos, cursor.pos()))) + Ok((buf, Span::new(start_pos, cursor.pos()))) } #[inline] diff --git a/boa_engine/src/syntax/lexer/template.rs b/boa_engine/src/syntax/lexer/template.rs index 023579aacf1..68d9df180f3 100644 --- a/boa_engine/src/syntax/lexer/template.rs +++ b/boa_engine/src/syntax/lexer/template.rs @@ -49,8 +49,8 @@ impl TemplateString { /// /// [spec]: https://tc39.es/ecma262/#sec-static-semantics-templatestrings pub fn to_owned_cooked(self, interner: &mut Interner) -> Result { - let mut cursor = - Cursor::with_position(interner.resolve_expect(self.raw).as_bytes(), self.start_pos); + let string = interner.resolve_expect(self.raw).to_string(); + let mut cursor = Cursor::with_position(string.as_bytes(), self.start_pos); let mut buf: Vec = Vec::new(); loop { @@ -80,9 +80,7 @@ impl TemplateString { } } - let str = buf.to_string_lossy(); - - Ok(interner.get_or_intern(&str)) + Ok(interner.get_or_intern(&buf[..])) } } @@ -123,8 +121,7 @@ impl Tokenizer for TemplateLiteral { match ch { // ` 0x0060 => { - let raw = buf.to_string_lossy(); - let raw_sym = interner.get_or_intern(raw); + let raw_sym = interner.get_or_intern(&buf[..]); let template_string = TemplateString::new(raw_sym, start_pos); return Ok(Token::new( @@ -134,8 +131,7 @@ impl Tokenizer for TemplateLiteral { } // $ 0x0024 if cursor.next_is(b'{')? => { - let raw = buf.to_string_lossy(); - let raw_sym = interner.get_or_intern(raw); + let raw_sym = interner.get_or_intern(&buf[..]); let template_string = TemplateString::new(raw_sym, start_pos); return Ok(Token::new( diff --git a/boa_engine/src/syntax/lexer/tests.rs b/boa_engine/src/syntax/lexer/tests.rs index 9c975fe23cc..13db696c146 100644 --- a/boa_engine/src/syntax/lexer/tests.rs +++ b/boa_engine/src/syntax/lexer/tests.rs @@ -7,8 +7,10 @@ use boa_interner::Sym; use super::token::Numeric; use super::*; use super::{Error, Position}; -use crate::syntax::ast::Keyword; -use crate::syntax::lexer::template::TemplateString; +use crate::{ + string::utf16, + syntax::{ast::Keyword, lexer::template::TemplateString}, +}; use std::str; fn span(start: (u32, u32), end: (u32, u32)) -> Span { @@ -67,7 +69,7 @@ fn check_multi_line_comment() { let mut lexer = Lexer::new(s.as_bytes()); let mut interner = Interner::default(); - let sym = interner.get_or_intern_static("x"); + let sym = interner.get_or_intern_static("x", utf16!("x")); let expected = [ TokenKind::Keyword((Keyword::Var, false)), TokenKind::LineTerminator, @@ -84,18 +86,20 @@ fn check_identifier() { let mut interner = Interner::default(); let expected = [ - TokenKind::identifier(interner.get_or_intern_static("x")), - TokenKind::identifier(interner.get_or_intern_static("x1")), - TokenKind::identifier(interner.get_or_intern_static("_x")), - TokenKind::identifier(interner.get_or_intern_static("$x")), - TokenKind::identifier(interner.get_or_intern_static("__")), - TokenKind::identifier(interner.get_or_intern_static("$$")), - TokenKind::identifier(interner.get_or_intern_static("Ѐ")), - TokenKind::identifier(interner.get_or_intern_static("ЀЀ")), - TokenKind::identifier(interner.get_or_intern_static("x\u{200C}\u{200D}")), - TokenKind::identifier(interner.get_or_intern_static("x")), - TokenKind::identifier(interner.get_or_intern_static("xx")), - TokenKind::identifier(interner.get_or_intern_static("xxx")), + TokenKind::identifier(interner.get_or_intern_static("x", utf16!("x"))), + TokenKind::identifier(interner.get_or_intern_static("x1", utf16!("x1"))), + TokenKind::identifier(interner.get_or_intern_static("_x", utf16!("_x"))), + TokenKind::identifier(interner.get_or_intern_static("$x", utf16!("$x"))), + TokenKind::identifier(interner.get_or_intern_static("__", utf16!("__"))), + TokenKind::identifier(interner.get_or_intern_static("$$", utf16!("$$"))), + TokenKind::identifier(interner.get_or_intern_static("Ѐ", utf16!("Ѐ"))), + TokenKind::identifier(interner.get_or_intern_static("ЀЀ", utf16!("ЀЀ"))), + TokenKind::identifier( + interner.get_or_intern_static("x\u{200C}\u{200D}", utf16!("x\u{200C}\u{200D}")), + ), + TokenKind::identifier(interner.get_or_intern_static("x", utf16!("x"))), + TokenKind::identifier(interner.get_or_intern_static("xx", utf16!("xx"))), + TokenKind::identifier(interner.get_or_intern_static("xxx", utf16!("xxx"))), ]; expect_tokens(&mut lexer, &expected, &mut interner); @@ -119,7 +123,7 @@ fn check_invalid_identifier_part() { let invalid_identifier_parts = [" ", "\n", ".", "*", "😀", "\u{007F}"]; let mut interner = Interner::default(); - let sym = interner.get_or_intern_static("x"); + let sym = interner.get_or_intern_static("x", utf16!("x")); for part in &invalid_identifier_parts { let s = String::from("x") + part; let mut lexer = Lexer::new(s.as_bytes()); @@ -137,8 +141,8 @@ fn check_string() { let mut lexer = Lexer::new(s.as_bytes()); let mut interner = Interner::default(); - let a_sym = interner.get_or_intern_static("aaa"); - let b_sym = interner.get_or_intern_static("bbb"); + let a_sym = interner.get_or_intern_static("aaa", utf16!("aaa")); + let b_sym = interner.get_or_intern_static("bbb", utf16!("bbb")); let expected = [ TokenKind::string_literal(a_sym), TokenKind::string_literal(b_sym), @@ -153,7 +157,8 @@ fn check_template_literal_simple() { let mut lexer = Lexer::new(s.as_bytes()); let mut interner = Interner::default(); - let sym = interner.get_or_intern_static("I'm a template literal"); + let sym = + interner.get_or_intern_static("I'm a template literal", utf16!("I'm a template literal")); assert_eq!( lexer.next(&mut interner).unwrap().unwrap().kind(), @@ -296,8 +301,8 @@ fn check_variable_definition_tokens() { let mut lexer = Lexer::new(s.as_bytes()); let mut interner = Interner::default(); - let a_sym = interner.get_or_intern_static("a"); - let hello_sym = interner.get_or_intern_static("hello"); + let a_sym = interner.get_or_intern_static("a", utf16!("a")); + let hello_sym = interner.get_or_intern_static("hello", utf16!("hello")); let expected = [ TokenKind::Keyword((Keyword::Let, false)), TokenKind::identifier(a_sym), @@ -585,7 +590,7 @@ fn hexadecimal_edge_case() { let mut lexer = Lexer::new(&b"0xffff.ff 0xffffff"[..]); let mut interner = Interner::default(); - let sym = interner.get_or_intern_static("ff"); + let sym = interner.get_or_intern_static("ff", utf16!("ff")); let expected = [ TokenKind::numeric_literal(0xffff), TokenKind::Punctuator(Punctuator::Dot), @@ -627,7 +632,7 @@ fn regex_literal() { let mut interner = Interner::default(); let expected = [TokenKind::regular_expression_literal( - interner.get_or_intern_static("(?:)"), + interner.get_or_intern_static("(?:)", utf16!("(?:)")), Sym::EMPTY_STRING, )]; @@ -641,10 +646,10 @@ fn regex_equals_following_assignment() { let expected = [ TokenKind::Keyword((Keyword::Const, false)), - TokenKind::identifier(interner.get_or_intern_static("myRegex")), + TokenKind::identifier(interner.get_or_intern_static("myRegex", utf16!("myRegex"))), TokenKind::Punctuator(Punctuator::Assign), TokenKind::regular_expression_literal( - interner.get_or_intern_static("="), + interner.get_or_intern_static("=", utf16!("=")), Sym::EMPTY_STRING, ), TokenKind::Punctuator(Punctuator::Semicolon), @@ -659,8 +664,8 @@ fn regex_literal_flags() { let mut interner = Interner::default(); let expected = [TokenKind::regular_expression_literal( - interner.get_or_intern_static("\\/[^\\/]*\\/*"), - interner.get_or_intern_static("gim"), + interner.get_or_intern_static("\\/[^\\/]*\\/*", utf16!("\\/[^\\/]*\\/*")), + interner.get_or_intern_static("gim", utf16!("gim")), )]; expect_tokens(&mut lexer, &expected, &mut interner); @@ -940,7 +945,7 @@ fn string_unicode() { let mut lexer = Lexer::new(s.as_bytes()); let mut interner = Interner::default(); - let sym = interner.get_or_intern_static("中文"); + let sym = interner.get_or_intern_static("中文", utf16!("中文")); let expected = [ TokenKind::StringLiteral(sym), TokenKind::Punctuator(Punctuator::Semicolon), @@ -954,7 +959,8 @@ fn string_unicode_escape_with_braces() { let mut lexer = Lexer::new(&br#"'{\u{20ac}\u{a0}\u{a0}}'"#[..]); let mut interner = Interner::default(); - let sym = interner.get_or_intern_static("{\u{20ac}\u{a0}\u{a0}}"); + let sym = + interner.get_or_intern_static("{\u{20ac}\u{a0}\u{a0}}", utf16!("{\u{20ac}\u{a0}\u{a0}}")); let expected = [TokenKind::StringLiteral(sym)]; expect_tokens(&mut lexer, &expected, &mut interner); @@ -989,7 +995,7 @@ fn string_unicode_escape_with_braces_2() { let mut lexer = Lexer::new(s.as_bytes()); let mut interner = Interner::default(); - let sym = interner.get_or_intern_static("\u{20ac}\u{a0}\u{a0}"); + let sym = interner.get_or_intern_static("\u{20ac}\u{a0}\u{a0}", utf16!("\u{20ac}\u{a0}\u{a0}")); let expected = [TokenKind::StringLiteral(sym)]; expect_tokens(&mut lexer, &expected, &mut interner); @@ -1002,7 +1008,7 @@ fn string_with_single_escape() { let mut lexer = Lexer::new(s.as_bytes()); let mut interner = Interner::default(); - let sym = interner.get_or_intern_static("Б"); + let sym = interner.get_or_intern_static("Б", utf16!("Б")); let expected = [TokenKind::StringLiteral(sym)]; expect_tokens(&mut lexer, &expected, &mut interner); @@ -1024,7 +1030,7 @@ fn string_legacy_octal_escape() { let mut lexer = Lexer::new(s.as_bytes()); let mut interner = Interner::default(); - let sym = interner.get_or_intern(expected); + let sym = interner.get_or_intern(expected.encode_utf16().collect::>().as_slice()); let expected_tokens = [TokenKind::StringLiteral(sym)]; expect_tokens(&mut lexer, &expected_tokens, &mut interner); @@ -1054,7 +1060,7 @@ fn string_zero_escape() { let mut lexer = Lexer::new(s.as_bytes()); let mut interner = Interner::default(); - let sym = interner.get_or_intern(expected); + let sym = interner.get_or_intern(expected.encode_utf16().collect::>().as_slice()); let expected_tokens = [TokenKind::StringLiteral(sym)]; expect_tokens(&mut lexer, &expected_tokens, &mut interner); @@ -1069,7 +1075,7 @@ fn string_non_octal_decimal_escape() { let mut lexer = Lexer::new(s.as_bytes()); let mut interner = Interner::default(); - let sym = interner.get_or_intern(expected); + let sym = interner.get_or_intern(expected.encode_utf16().collect::>().as_slice()); let expected_tokens = [TokenKind::StringLiteral(sym)]; expect_tokens(&mut lexer, &expected_tokens, &mut interner); @@ -1098,7 +1104,7 @@ fn string_line_continuation() { let mut lexer = Lexer::new(s.as_bytes()); let mut interner = Interner::default(); - let sym = interner.get_or_intern_static("hello world"); + let sym = interner.get_or_intern_static("hello world", utf16!("hello world")); let expected_tokens = [TokenKind::StringLiteral(sym)]; expect_tokens(&mut lexer, &expected_tokens, &mut interner); diff --git a/boa_engine/src/syntax/lexer/token.rs b/boa_engine/src/syntax/lexer/token.rs index 0cdae5ec61a..ae836db3f57 100644 --- a/boa_engine/src/syntax/lexer/token.rs +++ b/boa_engine/src/syntax/lexer/token.rs @@ -222,7 +222,7 @@ impl TokenKind { match *self { Self::BooleanLiteral(val) => val.to_string(), Self::EOF => "end of file".to_owned(), - Self::Identifier(ident) => interner.resolve_expect(ident).to_owned(), + Self::Identifier(ident) => interner.resolve_expect(ident).to_string(), Self::PrivateIdentifier(ident) => format!("#{}", interner.resolve_expect(ident)), Self::Keyword((word, _)) => word.to_string(), Self::NullLiteral => "null".to_owned(), @@ -230,9 +230,9 @@ impl TokenKind { Self::NumericLiteral(Numeric::Integer(num)) => num.to_string(), Self::NumericLiteral(Numeric::BigInt(ref num)) => format!("{num}n"), Self::Punctuator(punc) => punc.to_string(), - Self::StringLiteral(lit) => interner.resolve_expect(lit).to_owned(), + Self::StringLiteral(lit) => interner.resolve_expect(lit).to_string(), Self::TemplateNoSubstitution(ts) | Self::TemplateMiddle(ts) => { - interner.resolve_expect(ts.as_raw()).to_owned() + interner.resolve_expect(ts.as_raw()).to_string() } Self::RegularExpressionLiteral(body, flags) => { format!( diff --git a/boa_engine/src/syntax/parser/cursor/buffered_lexer/tests.rs b/boa_engine/src/syntax/parser/cursor/buffered_lexer/tests.rs index 399dd7aded2..d122329ac60 100644 --- a/boa_engine/src/syntax/parser/cursor/buffered_lexer/tests.rs +++ b/boa_engine/src/syntax/parser/cursor/buffered_lexer/tests.rs @@ -1,5 +1,8 @@ use super::BufferedLexer; -use crate::syntax::lexer::{Token, TokenKind}; +use crate::{ + string::utf16, + syntax::lexer::{Token, TokenKind}, +}; use boa_interner::Interner; #[test] @@ -12,42 +15,42 @@ fn peek_skip_accending() { .unwrap() .expect("Some value expected") .kind(), - TokenKind::identifier(interner.get_or_intern_static("a")) + TokenKind::identifier(interner.get_or_intern_static("a", utf16!("a"))) ); assert_eq!( *cur.peek(1, false, &mut interner) .unwrap() .expect("Some value expected") .kind(), - TokenKind::identifier(interner.get_or_intern_static("b")) + TokenKind::identifier(interner.get_or_intern_static("b", utf16!("b"))) ); assert_eq!( *cur.peek(2, false, &mut interner) .unwrap() .expect("Some value expected") .kind(), - TokenKind::identifier(interner.get_or_intern_static("c")) + TokenKind::identifier(interner.get_or_intern_static("c", utf16!("c"))) ); assert_eq!( *cur.peek(2, false, &mut interner) .unwrap() .expect("Some value expected") .kind(), - TokenKind::identifier(interner.get_or_intern_static("c")) + TokenKind::identifier(interner.get_or_intern_static("c", utf16!("c"))) ); assert_eq!( *cur.peek(1, false, &mut interner) .unwrap() .expect("Some value expected") .kind(), - TokenKind::identifier(interner.get_or_intern_static("b")) + TokenKind::identifier(interner.get_or_intern_static("b", utf16!("b"))) ); assert_eq!( *cur.peek(0, false, &mut interner) .unwrap() .expect("Some value expected") .kind(), - TokenKind::identifier(interner.get_or_intern_static("a")) + TokenKind::identifier(interner.get_or_intern_static("a", utf16!("a"))) ); } @@ -61,77 +64,77 @@ fn peek_skip_next() { .unwrap() .expect("Some value expected") .kind(), - TokenKind::identifier(interner.get_or_intern_static("a")) + TokenKind::identifier(interner.get_or_intern_static("a", utf16!("a"))) ); assert_eq!( *cur.peek(1, false, &mut interner) .unwrap() .expect("Some value expected") .kind(), - TokenKind::identifier(interner.get_or_intern_static("b")) + TokenKind::identifier(interner.get_or_intern_static("b", utf16!("b"))) ); assert_eq!( *cur.peek(2, false, &mut interner) .unwrap() .expect("Some value expected") .kind(), - TokenKind::identifier(interner.get_or_intern_static("c")) + TokenKind::identifier(interner.get_or_intern_static("c", utf16!("c"))) ); assert_eq!( *cur.next(false, &mut interner) .unwrap() .expect("Some value expected") .kind(), - TokenKind::identifier(interner.get_or_intern_static("a")) + TokenKind::identifier(interner.get_or_intern_static("a", utf16!("a"))) ); assert_eq!( *cur.next(false, &mut interner) .unwrap() .expect("Some value expected") .kind(), - TokenKind::identifier(interner.get_or_intern_static("b")) + TokenKind::identifier(interner.get_or_intern_static("b", utf16!("b"))) ); assert_eq!( *cur.next(false, &mut interner) .unwrap() .expect("Some value expected") .kind(), - TokenKind::identifier(interner.get_or_intern_static("c")) + TokenKind::identifier(interner.get_or_intern_static("c", utf16!("c"))) ); assert_eq!( *cur.next(false, &mut interner) .unwrap() .expect("Some value expected") .kind(), - TokenKind::identifier(interner.get_or_intern_static("d")) + TokenKind::identifier(interner.get_or_intern_static("d", utf16!("d"))) ); assert_eq!( *cur.next(false, &mut interner) .unwrap() .expect("Some value expected") .kind(), - TokenKind::identifier(interner.get_or_intern_static("e")) + TokenKind::identifier(interner.get_or_intern_static("e", utf16!("e"))) ); assert_eq!( *cur.peek(0, false, &mut interner) .unwrap() .expect("Some value expected") .kind(), - TokenKind::identifier(interner.get_or_intern_static("f")) + TokenKind::identifier(interner.get_or_intern_static("f", utf16!("f"))) ); assert_eq!( *cur.peek(1, false, &mut interner) .unwrap() .expect("Some value expected") .kind(), - TokenKind::identifier(interner.get_or_intern_static("g")) + TokenKind::identifier(interner.get_or_intern_static("g", utf16!("g"))) ); assert_eq!( *cur.peek(2, false, &mut interner) .unwrap() .expect("Some value expected") .kind(), - TokenKind::identifier(interner.get_or_intern_static("h")) + TokenKind::identifier(interner.get_or_intern_static("h", utf16!("h"))) ); } @@ -145,49 +148,49 @@ fn peek_skip_next_alternating() { .unwrap() .expect("Some value expected") .kind(), - TokenKind::identifier(interner.get_or_intern_static("a")) + TokenKind::identifier(interner.get_or_intern_static("a", utf16!("a"))) ); assert_eq!( *cur.next(false, &mut interner) .unwrap() .expect("Some value expected") .kind(), - TokenKind::identifier(interner.get_or_intern_static("a")) + TokenKind::identifier(interner.get_or_intern_static("a", utf16!("a"))) ); assert_eq!( *cur.peek(1, false, &mut interner) .unwrap() .expect("Some value expected") .kind(), - TokenKind::identifier(interner.get_or_intern_static("c")) + TokenKind::identifier(interner.get_or_intern_static("c", utf16!("c"))) ); assert_eq!( *cur.next(false, &mut interner) .unwrap() .expect("Some value expected") .kind(), - TokenKind::identifier(interner.get_or_intern_static("b")) + TokenKind::identifier(interner.get_or_intern_static("b", utf16!("b"))) ); assert_eq!( *cur.peek(1, false, &mut interner) .unwrap() .expect("Some value expected") .kind(), - TokenKind::identifier(interner.get_or_intern_static("d")) + TokenKind::identifier(interner.get_or_intern_static("d", utf16!("d"))) ); assert_eq!( *cur.next(false, &mut interner) .unwrap() .expect("Some value expected") .kind(), - TokenKind::identifier(interner.get_or_intern_static("c")) + TokenKind::identifier(interner.get_or_intern_static("c", utf16!("c"))) ); assert_eq!( *cur.peek(2, false, &mut interner) .unwrap() .expect("Some value expected") .kind(), - TokenKind::identifier(interner.get_or_intern_static("f")) + TokenKind::identifier(interner.get_or_intern_static("f", utf16!("f"))) ); } @@ -241,14 +244,14 @@ fn skip_peeked_terminators() { .unwrap() .expect("Some value expected") .kind(), - TokenKind::identifier(interner.get_or_intern_static("A")) + TokenKind::identifier(interner.get_or_intern_static("A", utf16!("A"))) ); assert_eq!( *cur.peek(0, true, &mut interner) .unwrap() .expect("Some value expected") .kind(), - TokenKind::identifier(interner.get_or_intern_static("A")) + TokenKind::identifier(interner.get_or_intern_static("A", utf16!("A"))) ); assert_eq!( @@ -263,7 +266,7 @@ fn skip_peeked_terminators() { .unwrap() .expect("Some value expected") .kind(), - TokenKind::identifier(interner.get_or_intern_static("B")) // This value is after the line terminator + TokenKind::identifier(interner.get_or_intern_static("B", utf16!("B"))) // This value is after the line terminator ); assert_eq!( @@ -271,7 +274,7 @@ fn skip_peeked_terminators() { .unwrap() .expect("Some value expected") .kind(), - TokenKind::identifier(interner.get_or_intern_static("B")) + TokenKind::identifier(interner.get_or_intern_static("B", utf16!("B"))) ); // End of stream assert!(cur.peek(2, true, &mut interner).unwrap().is_none()); diff --git a/boa_engine/src/syntax/parser/expression/left_hand_side/tests.rs b/boa_engine/src/syntax/parser/expression/left_hand_side/tests.rs index 5d41bca3346..8e511879ab4 100644 --- a/boa_engine/src/syntax/parser/expression/left_hand_side/tests.rs +++ b/boa_engine/src/syntax/parser/expression/left_hand_side/tests.rs @@ -1,51 +1,59 @@ -use crate::syntax::{ - ast::node::{field::GetConstField, Call, Identifier}, - parser::tests::check_parser, +use crate::{ + string::utf16, + syntax::{ + ast::node::{field::GetConstField, Call, Identifier}, + parser::tests::check_parser, + }, }; use boa_interner::Interner; -#[track_caller] -fn check_call_property_identifier(property_name: &'static str) { - let mut interner = Interner::default(); - check_parser( - format!("a().{}", property_name).as_str(), - vec![GetConstField::new( - Call::new(Identifier::new(interner.get_or_intern_static("a")), vec![]), - interner.get_or_intern_static(property_name), - ) - .into()], - interner, - ); +macro_rules! check_call_property_identifier { + ($property:literal) => {{ + let mut interner = Interner::default(); + check_parser( + format!("a().{}", $property).as_str(), + vec![GetConstField::new( + Call::new( + Identifier::new(interner.get_or_intern_static("a", utf16!("a"))), + vec![], + ), + interner.get_or_intern_static($property, utf16!($property)), + ) + .into()], + interner, + ); + }}; } #[test] fn check_call_properties() { - check_call_property_identifier("prop"); - check_call_property_identifier("true"); - check_call_property_identifier("false"); - check_call_property_identifier("null"); - check_call_property_identifier("let"); + check_call_property_identifier!("prop"); + check_call_property_identifier!("true"); + check_call_property_identifier!("false"); + check_call_property_identifier!("null"); + check_call_property_identifier!("let"); } -#[track_caller] -fn check_member_property_identifier(property_name: &'static str) { - let mut interner = Interner::default(); - check_parser( - format!("a.{}", property_name).as_str(), - vec![GetConstField::new( - Identifier::new(interner.get_or_intern_static("a")), - interner.get_or_intern_static(property_name), - ) - .into()], - interner, - ); +macro_rules! check_member_property_identifier { + ($property:literal) => {{ + let mut interner = Interner::default(); + check_parser( + format!("a.{}", $property).as_str(), + vec![GetConstField::new( + Identifier::new(interner.get_or_intern_static("a", utf16!("a"))), + interner.get_or_intern_static($property, utf16!($property)), + ) + .into()], + interner, + ); + }}; } #[test] fn check_member_properties() { - check_member_property_identifier("prop"); - check_member_property_identifier("true"); - check_member_property_identifier("false"); - check_member_property_identifier("null"); - check_member_property_identifier("let"); + check_member_property_identifier!("prop"); + check_member_property_identifier!("true"); + check_member_property_identifier!("false"); + check_member_property_identifier!("null"); + check_member_property_identifier!("let"); } diff --git a/boa_engine/src/syntax/parser/expression/primary/array_initializer/tests.rs b/boa_engine/src/syntax/parser/expression/primary/array_initializer/tests.rs index 88b8d34a40e..063dca51550 100644 --- a/boa_engine/src/syntax/parser/expression/primary/array_initializer/tests.rs +++ b/boa_engine/src/syntax/parser/expression/primary/array_initializer/tests.rs @@ -1,8 +1,11 @@ // ! Tests for array initializer parsing. -use crate::syntax::{ - ast::{node::ArrayDecl, Const, Node}, - parser::tests::check_parser, +use crate::{ + string::utf16, + syntax::{ + ast::{node::ArrayDecl, Const, Node}, + parser::tests::check_parser, + }, }; use boa_interner::{Interner, Sym}; @@ -97,7 +100,7 @@ fn check_combined() { "[1, \"a\", 2]", vec![ArrayDecl::from(vec![ Const::from(1).into(), - Const::from(interner.get_or_intern_static("a")).into(), + Const::from(interner.get_or_intern_static("a", utf16!("a"))).into(), Const::from(2).into(), ]) .into()], diff --git a/boa_engine/src/syntax/parser/expression/primary/async_function_expression/tests.rs b/boa_engine/src/syntax/parser/expression/primary/async_function_expression/tests.rs index fbb0512799d..349dca5e20d 100644 --- a/boa_engine/src/syntax/parser/expression/primary/async_function_expression/tests.rs +++ b/boa_engine/src/syntax/parser/expression/primary/async_function_expression/tests.rs @@ -1,12 +1,15 @@ -use crate::syntax::{ - ast::{ - node::{ - AsyncFunctionExpr, Declaration, DeclarationList, FormalParameterList, Return, - StatementList, +use crate::{ + string::utf16, + syntax::{ + ast::{ + node::{ + AsyncFunctionExpr, Declaration, DeclarationList, FormalParameterList, Return, + StatementList, + }, + Const, }, - Const, + parser::tests::check_parser, }, - parser::tests::check_parser, }; use boa_interner::{Interner, Sym}; @@ -14,7 +17,7 @@ use boa_interner::{Interner, Sym}; #[test] fn check_async_expression() { let mut interner = Interner::default(); - let add = interner.get_or_intern_static("add"); + let add = interner.get_or_intern_static("add", utf16!("add")); check_parser( "const add = async function() { return 1; @@ -42,8 +45,8 @@ fn check_async_expression() { #[test] fn check_nested_async_expression() { let mut interner = Interner::default(); - let a = interner.get_or_intern_static("a"); - let b = interner.get_or_intern_static("b"); + let a = interner.get_or_intern_static("a", utf16!("a")); + let b = interner.get_or_intern_static("b", utf16!("b")); check_parser( "const a = async function() { const b = async function() { diff --git a/boa_engine/src/syntax/parser/expression/primary/async_generator_expression/tests.rs b/boa_engine/src/syntax/parser/expression/primary/async_generator_expression/tests.rs index ae2c5a6cef2..b8236d2b36f 100644 --- a/boa_engine/src/syntax/parser/expression/primary/async_generator_expression/tests.rs +++ b/boa_engine/src/syntax/parser/expression/primary/async_generator_expression/tests.rs @@ -1,12 +1,15 @@ -use crate::syntax::{ - ast::{ - node::{ - AsyncGeneratorExpr, Declaration, DeclarationList, FormalParameterList, Return, - StatementList, +use crate::{ + string::utf16, + syntax::{ + ast::{ + node::{ + AsyncGeneratorExpr, Declaration, DeclarationList, FormalParameterList, Return, + StatementList, + }, + Const, }, - Const, + parser::tests::check_parser, }, - parser::tests::check_parser, }; use boa_interner::{Interner, Sym}; @@ -15,7 +18,7 @@ use boa_interner::{Interner, Sym}; #[test] fn check_async_generator_expr() { let mut interner = Interner::default(); - let add = interner.get_or_intern_static("add"); + let add = interner.get_or_intern_static("add", utf16!("add")); check_parser( "const add = async function*(){ return 1; @@ -43,8 +46,8 @@ fn check_async_generator_expr() { #[test] fn check_nested_async_generator_expr() { let mut interner = Interner::default(); - let a = interner.get_or_intern_static("a"); - let b = interner.get_or_intern_static("b"); + let a = interner.get_or_intern_static("a", utf16!("a")); + let b = interner.get_or_intern_static("b", utf16!("b")); check_parser( "const a = async function*() { const b = async function*() { diff --git a/boa_engine/src/syntax/parser/expression/primary/function_expression/tests.rs b/boa_engine/src/syntax/parser/expression/primary/function_expression/tests.rs index c535cbbfbac..9d309999c15 100644 --- a/boa_engine/src/syntax/parser/expression/primary/function_expression/tests.rs +++ b/boa_engine/src/syntax/parser/expression/primary/function_expression/tests.rs @@ -1,11 +1,15 @@ -use crate::syntax::{ - ast::{ - node::{ - Declaration, DeclarationList, FormalParameterList, FunctionExpr, Return, StatementList, +use crate::{ + string::utf16, + syntax::{ + ast::{ + node::{ + Declaration, DeclarationList, FormalParameterList, FunctionExpr, Return, + StatementList, + }, + Const, }, - Const, + parser::tests::check_parser, }, - parser::tests::check_parser, }; use boa_interner::{Interner, Sym}; @@ -13,7 +17,7 @@ use boa_interner::{Interner, Sym}; #[test] fn check_function_expression() { let mut interner = Interner::default(); - let add = interner.get_or_intern_static("add"); + let add = interner.get_or_intern_static("add", utf16!("add")); check_parser( "const add = function() { return 1; @@ -41,8 +45,8 @@ fn check_function_expression() { #[test] fn check_nested_function_expression() { let mut interner = Interner::default(); - let a = interner.get_or_intern_static("a"); - let b = interner.get_or_intern_static("b"); + let a = interner.get_or_intern_static("a", utf16!("a")); + let b = interner.get_or_intern_static("b", utf16!("b")); check_parser( "const a = function() { const b = function() { @@ -91,54 +95,56 @@ fn check_nested_function_expression() { #[test] fn check_function_non_reserved_keyword() { - let genast = |keyword, interner: &mut Interner| { - vec![DeclarationList::Const( - vec![Declaration::new_with_identifier( - interner.get_or_intern_static("add"), - Some( - FunctionExpr::new::<_, _, StatementList>( - Some(interner.get_or_intern_static(keyword)), - FormalParameterList::default(), - vec![Return::new::<_, _, Option>(Const::from(1), None).into()].into(), - ) - .into(), - ), - )] - .into(), - ) - .into()] - }; + macro_rules! genast { + ($keyword:literal, $interner:expr) => { + vec![DeclarationList::Const( + vec![Declaration::new_with_identifier( + $interner.get_or_intern_static("add", utf16!("add")), + Some( + FunctionExpr::new::<_, _, StatementList>( + Some($interner.get_or_intern_static($keyword, utf16!($keyword))), + FormalParameterList::default(), + vec![Return::new::<_, _, Option>(Const::from(1), None).into()].into(), + ) + .into(), + ), + )] + .into(), + ) + .into()] + }; + } let mut interner = Interner::default(); - let ast = genast("as", &mut interner); + let ast = genast!("as", interner); check_parser("const add = function as() { return 1; };", ast, interner); let mut interner = Interner::default(); - let ast = genast("async", &mut interner); + let ast = genast!("async", interner); check_parser("const add = function async() { return 1; };", ast, interner); let mut interner = Interner::default(); - let ast = genast("from", &mut interner); + let ast = genast!("from", interner); check_parser("const add = function from() { return 1; };", ast, interner); let mut interner = Interner::default(); - let ast = genast("get", &mut interner); + let ast = genast!("get", interner); check_parser("const add = function get() { return 1; };", ast, interner); let mut interner = Interner::default(); - let ast = genast("meta", &mut interner); + let ast = genast!("meta", interner); check_parser("const add = function meta() { return 1; };", ast, interner); let mut interner = Interner::default(); - let ast = genast("of", &mut interner); + let ast = genast!("of", interner); check_parser("const add = function of() { return 1; };", ast, interner); let mut interner = Interner::default(); - let ast = genast("set", &mut interner); + let ast = genast!("set", interner); check_parser("const add = function set() { return 1; };", ast, interner); let mut interner = Interner::default(); - let ast = genast("target", &mut interner); + let ast = genast!("target", interner); check_parser( "const add = function target() { return 1; };", ast, diff --git a/boa_engine/src/syntax/parser/expression/primary/generator_expression/tests.rs b/boa_engine/src/syntax/parser/expression/primary/generator_expression/tests.rs index 6b8b58a0450..f3cff0e604d 100644 --- a/boa_engine/src/syntax/parser/expression/primary/generator_expression/tests.rs +++ b/boa_engine/src/syntax/parser/expression/primary/generator_expression/tests.rs @@ -1,18 +1,22 @@ -use crate::syntax::{ - ast::{ - node::{ - Declaration, DeclarationList, FormalParameterList, GeneratorExpr, StatementList, Yield, +use crate::{ + string::utf16, + syntax::{ + ast::{ + node::{ + Declaration, DeclarationList, FormalParameterList, GeneratorExpr, StatementList, + Yield, + }, + Const, }, - Const, + parser::tests::check_parser, }, - parser::tests::check_parser, }; use boa_interner::Interner; #[test] fn check_generator_function_expression() { let mut interner = Interner::default(); - let gen = interner.get_or_intern_static("gen"); + let gen = interner.get_or_intern_static("gen", utf16!("gen")); check_parser( "const gen = function*() { yield 1; @@ -40,7 +44,7 @@ fn check_generator_function_expression() { #[test] fn check_generator_function_delegate_yield_expression() { let mut interner = Interner::default(); - let gen = interner.get_or_intern_static("gen"); + let gen = interner.get_or_intern_static("gen", utf16!("gen")); check_parser( "const gen = function*() { yield* 1; diff --git a/boa_engine/src/syntax/parser/expression/primary/object_initializer/mod.rs b/boa_engine/src/syntax/parser/expression/primary/object_initializer/mod.rs index d3870e4a167..a5ae2a11c59 100644 --- a/boa_engine/src/syntax/parser/expression/primary/object_initializer/mod.rs +++ b/boa_engine/src/syntax/parser/expression/primary/object_initializer/mod.rs @@ -10,21 +10,24 @@ #[cfg(test)] mod tests; -use crate::syntax::{ - ast::{ - node::{ - function_contains_super, has_direct_super, - object::{self, MethodDefinition}, - AsyncFunctionExpr, AsyncGeneratorExpr, FormalParameterList, FunctionExpr, - GeneratorExpr, Node, Object, +use crate::{ + string::utf16, + syntax::{ + ast::{ + node::{ + function_contains_super, has_direct_super, + object::{self, MethodDefinition}, + AsyncFunctionExpr, AsyncGeneratorExpr, FormalParameterList, FunctionExpr, + GeneratorExpr, Node, Object, + }, + Const, Keyword, Punctuator, + }, + lexer::{token::Numeric, Error as LexError, TokenKind}, + parser::{ + expression::{identifiers::IdentifierReference, AssignmentExpression}, + function::{FormalParameter, FormalParameters, FunctionBody, UniqueFormalParameters}, + AllowAwait, AllowIn, AllowYield, Cursor, ParseError, ParseResult, TokenParser, }, - Const, Keyword, Punctuator, - }, - lexer::{token::Numeric, Error as LexError, TokenKind}, - parser::{ - expression::{identifiers::IdentifierReference, AssignmentExpression}, - function::{FormalParameter, FormalParameters, FunctionBody, UniqueFormalParameters}, - AllowAwait, AllowIn, AllowYield, Cursor, ParseError, ParseResult, TokenParser, }, }; use boa_interner::{Interner, Sym}; @@ -539,12 +542,19 @@ where Numeric::BigInt(num) => Node::Const(Const::from(num.clone())).into(), }, TokenKind::Keyword((word, _)) => { - Node::Const(Const::from(interner.get_or_intern_static(word.as_str()))).into() + let (utf8, utf16) = word.as_str(); + Node::Const(Const::from(interner.get_or_intern_static(utf8, utf16))).into() } TokenKind::NullLiteral => Node::Const(Const::from(Sym::NULL)).into(), TokenKind::BooleanLiteral(bool) => match bool { - true => Node::Const(Const::from(interner.get_or_intern_static("true"))).into(), - false => Node::Const(Const::from(interner.get_or_intern_static("false"))).into(), + true => Node::Const(Const::from( + interner.get_or_intern_static("true", utf16!("true")), + )) + .into(), + false => Node::Const(Const::from( + interner.get_or_intern_static("false", utf16!("false")), + )) + .into(), }, _ => return Err(ParseError::AbruptEnd), }; diff --git a/boa_engine/src/syntax/parser/expression/primary/object_initializer/tests.rs b/boa_engine/src/syntax/parser/expression/primary/object_initializer/tests.rs index 592d21df628..028323c7193 100644 --- a/boa_engine/src/syntax/parser/expression/primary/object_initializer/tests.rs +++ b/boa_engine/src/syntax/parser/expression/primary/object_initializer/tests.rs @@ -1,13 +1,17 @@ -use crate::syntax::{ - ast::{ - node::{ - object::{MethodDefinition, PropertyDefinition}, - AsyncFunctionExpr, AsyncGeneratorExpr, Declaration, DeclarationList, FormalParameter, - FormalParameterList, FormalParameterListFlags, FunctionExpr, Identifier, Node, Object, +use crate::{ + string::utf16, + syntax::{ + ast::{ + node::{ + object::{MethodDefinition, PropertyDefinition}, + AsyncFunctionExpr, AsyncGeneratorExpr, Declaration, DeclarationList, + FormalParameter, FormalParameterList, FormalParameterListFlags, FunctionExpr, + Identifier, Node, Object, + }, + Const, }, - Const, + parser::tests::{check_invalid, check_parser}, }, - parser::tests::{check_invalid, check_parser}, }; use boa_interner::Interner; @@ -17,8 +21,14 @@ fn check_object_literal() { let mut interner = Interner::default(); let object_properties = vec![ - PropertyDefinition::property(interner.get_or_intern_static("a"), Const::from(true)), - PropertyDefinition::property(interner.get_or_intern_static("b"), Const::from(false)), + PropertyDefinition::property( + interner.get_or_intern_static("a", utf16!("a")), + Const::from(true), + ), + PropertyDefinition::property( + interner.get_or_intern_static("b", utf16!("b")), + Const::from(false), + ), ]; check_parser( @@ -29,7 +39,7 @@ fn check_object_literal() { ", vec![DeclarationList::Const( vec![Declaration::new_with_identifier( - interner.get_or_intern_static("x"), + interner.get_or_intern_static("x", utf16!("x")), Some(Object::from(object_properties).into()), )] .into(), @@ -45,14 +55,17 @@ fn check_object_short_function() { let mut interner = Interner::default(); let object_properties = vec![ - PropertyDefinition::property(interner.get_or_intern_static("a"), Const::from(true)), + PropertyDefinition::property( + interner.get_or_intern_static("a", utf16!("a")), + Const::from(true), + ), PropertyDefinition::method_definition( MethodDefinition::Ordinary(FunctionExpr::new( None, FormalParameterList::default(), vec![], )), - interner.get_or_intern_static("b"), + interner.get_or_intern_static("b", utf16!("b")), ), ]; @@ -64,7 +77,7 @@ fn check_object_short_function() { ", vec![DeclarationList::Const( vec![Declaration::new_with_identifier( - interner.get_or_intern_static("x"), + interner.get_or_intern_static("x", utf16!("x")), Some(Object::from(object_properties).into()), )] .into(), @@ -80,14 +93,17 @@ fn check_object_short_function_arguments() { let mut interner = Interner::default(); let object_properties = vec![ - PropertyDefinition::property(interner.get_or_intern_static("a"), Const::from(true)), + PropertyDefinition::property( + interner.get_or_intern_static("a", utf16!("a")), + Const::from(true), + ), PropertyDefinition::method_definition( MethodDefinition::Ordinary(FunctionExpr::new( None, FormalParameterList { parameters: Box::new([FormalParameter::new( Declaration::new_with_identifier( - interner.get_or_intern_static("test"), + interner.get_or_intern_static("test", utf16!("test")), None, ), false, @@ -97,7 +113,7 @@ fn check_object_short_function_arguments() { }, vec![], )), - interner.get_or_intern_static("b"), + interner.get_or_intern_static("b", utf16!("b")), ), ]; @@ -109,7 +125,7 @@ fn check_object_short_function_arguments() { ", vec![DeclarationList::Const( vec![Declaration::new_with_identifier( - interner.get_or_intern_static("x"), + interner.get_or_intern_static("x", utf16!("x")), Some(Object::from(object_properties).into()), )] .into(), @@ -124,14 +140,17 @@ fn check_object_getter() { let mut interner = Interner::default(); let object_properties = vec![ - PropertyDefinition::property(interner.get_or_intern_static("a"), Const::from(true)), + PropertyDefinition::property( + interner.get_or_intern_static("a", utf16!("a")), + Const::from(true), + ), PropertyDefinition::method_definition( MethodDefinition::Get(FunctionExpr::new( None, FormalParameterList::default(), vec![], )), - interner.get_or_intern_static("b"), + interner.get_or_intern_static("b", utf16!("b")), ), ]; @@ -143,7 +162,7 @@ fn check_object_getter() { ", vec![DeclarationList::Const( vec![Declaration::new_with_identifier( - interner.get_or_intern_static("x"), + interner.get_or_intern_static("x", utf16!("x")), Some(Object::from(object_properties).into()), )] .into(), @@ -158,14 +177,17 @@ fn check_object_setter() { let mut interner = Interner::default(); let object_properties = vec![ - PropertyDefinition::property(interner.get_or_intern_static("a"), Const::from(true)), + PropertyDefinition::property( + interner.get_or_intern_static("a", utf16!("a")), + Const::from(true), + ), PropertyDefinition::method_definition( MethodDefinition::Set(FunctionExpr::new( None, FormalParameterList { parameters: Box::new([FormalParameter::new( Declaration::new_with_identifier( - interner.get_or_intern_static("test"), + interner.get_or_intern_static("test", utf16!("test")), None, ), false, @@ -175,7 +197,7 @@ fn check_object_setter() { }, vec![], )), - interner.get_or_intern_static("b"), + interner.get_or_intern_static("b", utf16!("b")), ), ]; @@ -187,7 +209,7 @@ fn check_object_setter() { ", vec![DeclarationList::Const( vec![Declaration::new_with_identifier( - interner.get_or_intern_static("x"), + interner.get_or_intern_static("x", utf16!("x")), Some(Object::from(object_properties).into()), )] .into(), @@ -207,7 +229,7 @@ fn check_object_short_function_get() { FormalParameterList::default(), vec![], )), - interner.get_or_intern_static("get"), + interner.get_or_intern_static("get", utf16!("get")), )]; check_parser( @@ -217,7 +239,7 @@ fn check_object_short_function_get() { ", vec![DeclarationList::Const( vec![Declaration::new_with_identifier( - interner.get_or_intern_static("x"), + interner.get_or_intern_static("x", utf16!("x")), Some(Object::from(object_properties).into()), )] .into(), @@ -237,7 +259,7 @@ fn check_object_short_function_set() { FormalParameterList::default(), vec![], )), - interner.get_or_intern_static("set"), + interner.get_or_intern_static("set", utf16!("set")), )]; check_parser( @@ -247,7 +269,7 @@ fn check_object_short_function_set() { ", vec![DeclarationList::Const( vec![Declaration::new_with_identifier( - interner.get_or_intern_static("x"), + interner.get_or_intern_static("x", utf16!("x")), Some(Object::from(object_properties).into()), )] .into(), @@ -262,8 +284,8 @@ fn check_object_shorthand_property_names() { let mut interner = Interner::default(); let object_properties = vec![PropertyDefinition::property( - interner.get_or_intern_static("a"), - Identifier::new(interner.get_or_intern_static("a")), + interner.get_or_intern_static("a", utf16!("a")), + Identifier::new(interner.get_or_intern_static("a", utf16!("a"))), )]; check_parser( @@ -273,7 +295,7 @@ fn check_object_shorthand_property_names() { vec![ DeclarationList::Const( vec![Declaration::new_with_identifier( - interner.get_or_intern_static("a"), + interner.get_or_intern_static("a", utf16!("a")), Some(Const::from(true).into()), )] .into(), @@ -281,7 +303,7 @@ fn check_object_shorthand_property_names() { .into(), DeclarationList::Const( vec![Declaration::new_with_identifier( - interner.get_or_intern_static("x"), + interner.get_or_intern_static("x", utf16!("x")), Some(Object::from(object_properties).into()), )] .into(), @@ -298,12 +320,12 @@ fn check_object_shorthand_multiple_properties() { let object_properties = vec![ PropertyDefinition::property( - interner.get_or_intern_static("a"), - Identifier::new(interner.get_or_intern_static("a")), + interner.get_or_intern_static("a", utf16!("a")), + Identifier::new(interner.get_or_intern_static("a", utf16!("a"))), ), PropertyDefinition::property( - interner.get_or_intern_static("b"), - Identifier::new(interner.get_or_intern_static("b")), + interner.get_or_intern_static("b", utf16!("b")), + Identifier::new(interner.get_or_intern_static("b", utf16!("b"))), ), ]; @@ -315,7 +337,7 @@ fn check_object_shorthand_multiple_properties() { vec![ DeclarationList::Const( vec![Declaration::new_with_identifier( - interner.get_or_intern_static("a"), + interner.get_or_intern_static("a", utf16!("a")), Some(Const::from(true).into()), )] .into(), @@ -323,7 +345,7 @@ fn check_object_shorthand_multiple_properties() { .into(), DeclarationList::Const( vec![Declaration::new_with_identifier( - interner.get_or_intern_static("b"), + interner.get_or_intern_static("b", utf16!("b")), Some(Const::from(false).into()), )] .into(), @@ -331,7 +353,7 @@ fn check_object_shorthand_multiple_properties() { .into(), DeclarationList::Const( vec![Declaration::new_with_identifier( - interner.get_or_intern_static("x"), + interner.get_or_intern_static("x", utf16!("x")), Some(Object::from(object_properties).into()), )] .into(), @@ -347,8 +369,13 @@ fn check_object_spread() { let mut interner = Interner::default(); let object_properties = vec![ - PropertyDefinition::property(interner.get_or_intern_static("a"), Const::from(1)), - PropertyDefinition::spread_object(Identifier::new(interner.get_or_intern_static("b"))), + PropertyDefinition::property( + interner.get_or_intern_static("a", utf16!("a")), + Const::from(1), + ), + PropertyDefinition::spread_object(Identifier::new( + interner.get_or_intern_static("b", utf16!("b")), + )), ]; check_parser( @@ -356,7 +383,7 @@ fn check_object_spread() { ", vec![DeclarationList::Const( vec![Declaration::new_with_identifier( - interner.get_or_intern_static("x"), + interner.get_or_intern_static("x", utf16!("x")), Some(Object::from(object_properties).into()), )] .into(), @@ -376,7 +403,7 @@ fn check_async_method() { FormalParameterList::default(), vec![], )), - interner.get_or_intern_static("dive"), + interner.get_or_intern_static("dive", utf16!("dive")), )]; check_parser( @@ -386,7 +413,7 @@ fn check_async_method() { ", vec![DeclarationList::Const( vec![Declaration::new_with_identifier( - interner.get_or_intern_static("x"), + interner.get_or_intern_static("x", utf16!("x")), Some(Object::from(object_properties).into()), )] .into(), @@ -406,7 +433,7 @@ fn check_async_generator_method() { FormalParameterList::default(), vec![], )), - interner.get_or_intern_static("vroom"), + interner.get_or_intern_static("vroom", utf16!("vroom")), )]; check_parser( @@ -416,7 +443,7 @@ fn check_async_generator_method() { ", vec![DeclarationList::Const( vec![Declaration::new_with_identifier( - interner.get_or_intern_static("x"), + interner.get_or_intern_static("x", utf16!("x")), Some(Object::from(object_properties).into()), )] .into(), @@ -458,7 +485,9 @@ fn check_async_ordinary_method() { FormalParameterList::default(), vec![], )), - Node::Const(Const::from(interner.get_or_intern_static("async"))), + Node::Const(Const::from( + interner.get_or_intern_static("async", utf16!("async")), + )), )]; check_parser( @@ -468,7 +497,7 @@ fn check_async_ordinary_method() { ", vec![DeclarationList::Const( vec![Declaration::new_with_identifier( - interner.get_or_intern_static("x"), + interner.get_or_intern_static("x", utf16!("x")), Some(Object::from(object_properties).into()), )] .into(), @@ -483,7 +512,9 @@ fn check_async_property() { let mut interner = Interner::default(); let object_properties = vec![PropertyDefinition::property( - Node::Const(Const::from(interner.get_or_intern_static("async"))), + Node::Const(Const::from( + interner.get_or_intern_static("async", utf16!("async")), + )), Const::from(true), )]; @@ -494,7 +525,7 @@ fn check_async_property() { ", vec![DeclarationList::Const( vec![Declaration::new_with_identifier( - interner.get_or_intern_static("x"), + interner.get_or_intern_static("x", utf16!("x")), Some(Object::from(object_properties).into()), )] .into(), diff --git a/boa_engine/src/syntax/parser/expression/primary/tests.rs b/boa_engine/src/syntax/parser/expression/primary/tests.rs index 2c308fd621e..a754fc196a5 100644 --- a/boa_engine/src/syntax/parser/expression/primary/tests.rs +++ b/boa_engine/src/syntax/parser/expression/primary/tests.rs @@ -1,4 +1,7 @@ -use crate::syntax::{ast::Const, parser::tests::check_parser}; +use crate::{ + string::utf16, + syntax::{ast::Const, parser::tests::check_parser}, +}; use boa_interner::{Interner, Sym}; #[test] @@ -14,7 +17,7 @@ fn check_string() { let mut interner = Interner::default(); check_parser( "\"hello\"", - vec![Const::from(interner.get_or_intern_static("hello")).into()], + vec![Const::from(interner.get_or_intern_static("hello", utf16!("hello"))).into()], interner, ); } diff --git a/boa_engine/src/syntax/parser/expression/tests.rs b/boa_engine/src/syntax/parser/expression/tests.rs index f17bde69b14..56351e6d59f 100644 --- a/boa_engine/src/syntax/parser/expression/tests.rs +++ b/boa_engine/src/syntax/parser/expression/tests.rs @@ -1,10 +1,13 @@ -use crate::syntax::{ - ast::op::{AssignOp, BitOp, CompOp, LogOp, NumOp}, - ast::{ - node::{BinOp, Call, Declaration, DeclarationList, Identifier, New}, - Const, Node, +use crate::{ + string::utf16, + syntax::{ + ast::op::{AssignOp, BitOp, CompOp, LogOp, NumOp}, + ast::{ + node::{BinOp, Call, Declaration, DeclarationList, Identifier, New}, + Const, Node, + }, + parser::tests::{check_invalid, check_parser}, }, - parser::tests::{check_invalid, check_parser}, }; use boa_interner::{Interner, Sym}; @@ -16,8 +19,8 @@ fn check_numeric_operations() { "a + b", vec![BinOp::new( NumOp::Add, - Identifier::new(interner.get_or_intern_static("a")), - Identifier::new(interner.get_or_intern_static("b")), + Identifier::new(interner.get_or_intern_static("a", utf16!("a"))), + Identifier::new(interner.get_or_intern_static("b", utf16!("b"))), ) .into()], interner, @@ -28,7 +31,7 @@ fn check_numeric_operations() { "a+1", vec![BinOp::new( NumOp::Add, - Identifier::new(interner.get_or_intern_static("a")), + Identifier::new(interner.get_or_intern_static("a", utf16!("a"))), Const::from(1), ) .into()], @@ -40,8 +43,8 @@ fn check_numeric_operations() { "a - b", vec![BinOp::new( NumOp::Sub, - Identifier::new(interner.get_or_intern_static("a")), - Identifier::new(interner.get_or_intern_static("b")), + Identifier::new(interner.get_or_intern_static("a", utf16!("a"))), + Identifier::new(interner.get_or_intern_static("b", utf16!("b"))), ) .into()], interner, @@ -52,7 +55,7 @@ fn check_numeric_operations() { "a-1", vec![BinOp::new( NumOp::Sub, - Identifier::new(interner.get_or_intern_static("a")), + Identifier::new(interner.get_or_intern_static("a", utf16!("a"))), Const::from(1), ) .into()], @@ -64,8 +67,8 @@ fn check_numeric_operations() { "a / b", vec![BinOp::new( NumOp::Div, - Identifier::new(interner.get_or_intern_static("a")), - Identifier::new(interner.get_or_intern_static("b")), + Identifier::new(interner.get_or_intern_static("a", utf16!("a"))), + Identifier::new(interner.get_or_intern_static("b", utf16!("b"))), ) .into()], interner, @@ -76,7 +79,7 @@ fn check_numeric_operations() { "a/2", vec![BinOp::new( NumOp::Div, - Identifier::new(interner.get_or_intern_static("a")), + Identifier::new(interner.get_or_intern_static("a", utf16!("a"))), Const::from(2), ) .into()], @@ -88,11 +91,11 @@ fn check_numeric_operations() { "let myRegex = /=/;", vec![DeclarationList::Let( vec![Declaration::new_with_identifier( - interner.get_or_intern_static("myRegex"), + interner.get_or_intern_static("myRegex", utf16!("myRegex")), Node::from(New::from(Call::new( Identifier::new(Sym::REGEXP), vec![ - Node::from(Const::from(interner.get_or_intern_static("="))), + Node::from(Const::from(interner.get_or_intern_static("=", utf16!("=")))), Node::from(Const::from(Sym::EMPTY_STRING)), ], ))), @@ -108,8 +111,8 @@ fn check_numeric_operations() { "a * b", vec![BinOp::new( NumOp::Mul, - Identifier::new(interner.get_or_intern_static("a")), - Identifier::new(interner.get_or_intern_static("b")), + Identifier::new(interner.get_or_intern_static("a", utf16!("a"))), + Identifier::new(interner.get_or_intern_static("b", utf16!("b"))), ) .into()], interner, @@ -120,7 +123,7 @@ fn check_numeric_operations() { "a*2", vec![BinOp::new( NumOp::Mul, - Identifier::new(interner.get_or_intern_static("a")), + Identifier::new(interner.get_or_intern_static("a", utf16!("a"))), Const::from(2), ) .into()], @@ -132,8 +135,8 @@ fn check_numeric_operations() { "a ** b", vec![BinOp::new( NumOp::Exp, - Identifier::new(interner.get_or_intern_static("a")), - Identifier::new(interner.get_or_intern_static("b")), + Identifier::new(interner.get_or_intern_static("a", utf16!("a"))), + Identifier::new(interner.get_or_intern_static("b", utf16!("b"))), ) .into()], interner, @@ -144,7 +147,7 @@ fn check_numeric_operations() { "a**2", vec![BinOp::new( NumOp::Exp, - Identifier::new(interner.get_or_intern_static("a")), + Identifier::new(interner.get_or_intern_static("a", utf16!("a"))), Const::from(2), ) .into()], @@ -156,8 +159,8 @@ fn check_numeric_operations() { "a % b", vec![BinOp::new( NumOp::Mod, - Identifier::new(interner.get_or_intern_static("a")), - Identifier::new(interner.get_or_intern_static("b")), + Identifier::new(interner.get_or_intern_static("a", utf16!("a"))), + Identifier::new(interner.get_or_intern_static("b", utf16!("b"))), ) .into()], interner, @@ -168,7 +171,7 @@ fn check_numeric_operations() { "a%2", vec![BinOp::new( NumOp::Mod, - Identifier::new(interner.get_or_intern_static("a")), + Identifier::new(interner.get_or_intern_static("a", utf16!("a"))), Const::from(2), ) .into()], @@ -186,13 +189,13 @@ fn check_complex_numeric_operations() { NumOp::Add, BinOp::new( NumOp::Add, - Identifier::new(interner.get_or_intern_static("a")), + Identifier::new(interner.get_or_intern_static("a", utf16!("a"))), BinOp::new( NumOp::Mul, - Identifier::new(interner.get_or_intern_static("d")), + Identifier::new(interner.get_or_intern_static("d", utf16!("d"))), BinOp::new( NumOp::Sub, - Identifier::new(interner.get_or_intern_static("b")), + Identifier::new(interner.get_or_intern_static("b", utf16!("b"))), Const::from(3), ), ), @@ -212,8 +215,8 @@ fn check_bitwise_operations() { "a & b", vec![BinOp::new( BitOp::And, - Identifier::new(interner.get_or_intern_static("a")), - Identifier::new(interner.get_or_intern_static("b")), + Identifier::new(interner.get_or_intern_static("a", utf16!("a"))), + Identifier::new(interner.get_or_intern_static("b", utf16!("b"))), ) .into()], interner, @@ -224,8 +227,8 @@ fn check_bitwise_operations() { "a&b", vec![BinOp::new( BitOp::And, - Identifier::new(interner.get_or_intern_static("a")), - Identifier::new(interner.get_or_intern_static("b")), + Identifier::new(interner.get_or_intern_static("a", utf16!("a"))), + Identifier::new(interner.get_or_intern_static("b", utf16!("b"))), ) .into()], interner, @@ -236,8 +239,8 @@ fn check_bitwise_operations() { "a | b", vec![BinOp::new( BitOp::Or, - Identifier::new(interner.get_or_intern_static("a")), - Identifier::new(interner.get_or_intern_static("b")), + Identifier::new(interner.get_or_intern_static("a", utf16!("a"))), + Identifier::new(interner.get_or_intern_static("b", utf16!("b"))), ) .into()], interner, @@ -248,8 +251,8 @@ fn check_bitwise_operations() { "a|b", vec![BinOp::new( BitOp::Or, - Identifier::new(interner.get_or_intern_static("a")), - Identifier::new(interner.get_or_intern_static("b")), + Identifier::new(interner.get_or_intern_static("a", utf16!("a"))), + Identifier::new(interner.get_or_intern_static("b", utf16!("b"))), ) .into()], interner, @@ -260,8 +263,8 @@ fn check_bitwise_operations() { "a ^ b", vec![BinOp::new( BitOp::Xor, - Identifier::new(interner.get_or_intern_static("a")), - Identifier::new(interner.get_or_intern_static("b")), + Identifier::new(interner.get_or_intern_static("a", utf16!("a"))), + Identifier::new(interner.get_or_intern_static("b", utf16!("b"))), ) .into()], interner, @@ -272,8 +275,8 @@ fn check_bitwise_operations() { "a^b", vec![BinOp::new( BitOp::Xor, - Identifier::new(interner.get_or_intern_static("a")), - Identifier::new(interner.get_or_intern_static("b")), + Identifier::new(interner.get_or_intern_static("a", utf16!("a"))), + Identifier::new(interner.get_or_intern_static("b", utf16!("b"))), ) .into()], interner, @@ -284,8 +287,8 @@ fn check_bitwise_operations() { "a << b", vec![BinOp::new( BitOp::Shl, - Identifier::new(interner.get_or_intern_static("a")), - Identifier::new(interner.get_or_intern_static("b")), + Identifier::new(interner.get_or_intern_static("a", utf16!("a"))), + Identifier::new(interner.get_or_intern_static("b", utf16!("b"))), ) .into()], interner, @@ -296,8 +299,8 @@ fn check_bitwise_operations() { "a<> b", vec![BinOp::new( BitOp::Shr, - Identifier::new(interner.get_or_intern_static("a")), - Identifier::new(interner.get_or_intern_static("b")), + Identifier::new(interner.get_or_intern_static("a", utf16!("a"))), + Identifier::new(interner.get_or_intern_static("b", utf16!("b"))), ) .into()], interner, @@ -320,8 +323,8 @@ fn check_bitwise_operations() { "a>>b", vec![BinOp::new( BitOp::Shr, - Identifier::new(interner.get_or_intern_static("a")), - Identifier::new(interner.get_or_intern_static("b")), + Identifier::new(interner.get_or_intern_static("a", utf16!("a"))), + Identifier::new(interner.get_or_intern_static("b", utf16!("b"))), ) .into()], interner, @@ -336,8 +339,8 @@ fn check_assign_operations() { "a += b", vec![BinOp::new( AssignOp::Add, - Identifier::new(interner.get_or_intern_static("a")), - Identifier::new(interner.get_or_intern_static("b")), + Identifier::new(interner.get_or_intern_static("a", utf16!("a"))), + Identifier::new(interner.get_or_intern_static("b", utf16!("b"))), ) .into()], interner, @@ -348,8 +351,8 @@ fn check_assign_operations() { "a -= b", vec![BinOp::new( AssignOp::Sub, - Identifier::new(interner.get_or_intern_static("a")), - Identifier::new(interner.get_or_intern_static("b")), + Identifier::new(interner.get_or_intern_static("a", utf16!("a"))), + Identifier::new(interner.get_or_intern_static("b", utf16!("b"))), ) .into()], interner, @@ -360,8 +363,8 @@ fn check_assign_operations() { "a *= b", vec![BinOp::new( AssignOp::Mul, - Identifier::new(interner.get_or_intern_static("a")), - Identifier::new(interner.get_or_intern_static("b")), + Identifier::new(interner.get_or_intern_static("a", utf16!("a"))), + Identifier::new(interner.get_or_intern_static("b", utf16!("b"))), ) .into()], interner, @@ -372,8 +375,8 @@ fn check_assign_operations() { "a **= b", vec![BinOp::new( AssignOp::Exp, - Identifier::new(interner.get_or_intern_static("a")), - Identifier::new(interner.get_or_intern_static("b")), + Identifier::new(interner.get_or_intern_static("a", utf16!("a"))), + Identifier::new(interner.get_or_intern_static("b", utf16!("b"))), ) .into()], interner, @@ -384,8 +387,8 @@ fn check_assign_operations() { "a /= b", vec![BinOp::new( AssignOp::Div, - Identifier::new(interner.get_or_intern_static("a")), - Identifier::new(interner.get_or_intern_static("b")), + Identifier::new(interner.get_or_intern_static("a", utf16!("a"))), + Identifier::new(interner.get_or_intern_static("b", utf16!("b"))), ) .into()], interner, @@ -396,8 +399,8 @@ fn check_assign_operations() { "a %= b", vec![BinOp::new( AssignOp::Mod, - Identifier::new(interner.get_or_intern_static("a")), - Identifier::new(interner.get_or_intern_static("b")), + Identifier::new(interner.get_or_intern_static("a", utf16!("a"))), + Identifier::new(interner.get_or_intern_static("b", utf16!("b"))), ) .into()], interner, @@ -408,8 +411,8 @@ fn check_assign_operations() { "a &= b", vec![BinOp::new( AssignOp::And, - Identifier::new(interner.get_or_intern_static("a")), - Identifier::new(interner.get_or_intern_static("b")), + Identifier::new(interner.get_or_intern_static("a", utf16!("a"))), + Identifier::new(interner.get_or_intern_static("b", utf16!("b"))), ) .into()], interner, @@ -420,8 +423,8 @@ fn check_assign_operations() { "a |= b", vec![BinOp::new( AssignOp::Or, - Identifier::new(interner.get_or_intern_static("a")), - Identifier::new(interner.get_or_intern_static("b")), + Identifier::new(interner.get_or_intern_static("a", utf16!("a"))), + Identifier::new(interner.get_or_intern_static("b", utf16!("b"))), ) .into()], interner, @@ -432,8 +435,8 @@ fn check_assign_operations() { "a ^= b", vec![BinOp::new( AssignOp::Xor, - Identifier::new(interner.get_or_intern_static("a")), - Identifier::new(interner.get_or_intern_static("b")), + Identifier::new(interner.get_or_intern_static("a", utf16!("a"))), + Identifier::new(interner.get_or_intern_static("b", utf16!("b"))), ) .into()], interner, @@ -444,8 +447,8 @@ fn check_assign_operations() { "a <<= b", vec![BinOp::new( AssignOp::Shl, - Identifier::new(interner.get_or_intern_static("a")), - Identifier::new(interner.get_or_intern_static("b")), + Identifier::new(interner.get_or_intern_static("a", utf16!("a"))), + Identifier::new(interner.get_or_intern_static("b", utf16!("b"))), ) .into()], interner, @@ -456,8 +459,8 @@ fn check_assign_operations() { "a >>= b", vec![BinOp::new( AssignOp::Shr, - Identifier::new(interner.get_or_intern_static("a")), - Identifier::new(interner.get_or_intern_static("b")), + Identifier::new(interner.get_or_intern_static("a", utf16!("a"))), + Identifier::new(interner.get_or_intern_static("b", utf16!("b"))), ) .into()], interner, @@ -468,8 +471,8 @@ fn check_assign_operations() { "a >>>= b", vec![BinOp::new( AssignOp::Ushr, - Identifier::new(interner.get_or_intern_static("a")), - Identifier::new(interner.get_or_intern_static("b")), + Identifier::new(interner.get_or_intern_static("a", utf16!("a"))), + Identifier::new(interner.get_or_intern_static("b", utf16!("b"))), ) .into()], interner, @@ -480,7 +483,7 @@ fn check_assign_operations() { "a %= 10 / 2", vec![BinOp::new( AssignOp::Mod, - Identifier::new(interner.get_or_intern_static("a")), + Identifier::new(interner.get_or_intern_static("a", utf16!("a"))), BinOp::new(NumOp::Div, Const::from(10), Const::from(2)), ) .into()], @@ -492,8 +495,8 @@ fn check_assign_operations() { "a ??= b", vec![BinOp::new( AssignOp::Coalesce, - Identifier::new(interner.get_or_intern_static("a")), - Identifier::new(interner.get_or_intern_static("b")), + Identifier::new(interner.get_or_intern_static("a", utf16!("a"))), + Identifier::new(interner.get_or_intern_static("b", utf16!("b"))), ) .into()], interner, @@ -507,8 +510,8 @@ fn check_relational_operations() { "a < b", vec![BinOp::new( CompOp::LessThan, - Identifier::new(interner.get_or_intern_static("a")), - Identifier::new(interner.get_or_intern_static("b")), + Identifier::new(interner.get_or_intern_static("a", utf16!("a"))), + Identifier::new(interner.get_or_intern_static("b", utf16!("b"))), ) .into()], interner, @@ -519,8 +522,8 @@ fn check_relational_operations() { "a > b", vec![BinOp::new( CompOp::GreaterThan, - Identifier::new(interner.get_or_intern_static("a")), - Identifier::new(interner.get_or_intern_static("b")), + Identifier::new(interner.get_or_intern_static("a", utf16!("a"))), + Identifier::new(interner.get_or_intern_static("b", utf16!("b"))), ) .into()], interner, @@ -531,8 +534,8 @@ fn check_relational_operations() { "a <= b", vec![BinOp::new( CompOp::LessThanOrEqual, - Identifier::new(interner.get_or_intern_static("a")), - Identifier::new(interner.get_or_intern_static("b")), + Identifier::new(interner.get_or_intern_static("a", utf16!("a"))), + Identifier::new(interner.get_or_intern_static("b", utf16!("b"))), ) .into()], interner, @@ -543,8 +546,8 @@ fn check_relational_operations() { "a >= b", vec![BinOp::new( CompOp::GreaterThanOrEqual, - Identifier::new(interner.get_or_intern_static("a")), - Identifier::new(interner.get_or_intern_static("b")), + Identifier::new(interner.get_or_intern_static("a", utf16!("a"))), + Identifier::new(interner.get_or_intern_static("b", utf16!("b"))), ) .into()], interner, @@ -555,8 +558,8 @@ fn check_relational_operations() { "p in o", vec![BinOp::new( CompOp::In, - Identifier::new(interner.get_or_intern_static("p")), - Identifier::new(interner.get_or_intern_static("o")), + Identifier::new(interner.get_or_intern_static("p", utf16!("p"))), + Identifier::new(interner.get_or_intern_static("o", utf16!("o"))), ) .into()], interner, @@ -572,17 +575,17 @@ fn check_logical_expressions() { LogOp::Or, BinOp::new( LogOp::And, - Identifier::new(interner.get_or_intern_static("a")), - Identifier::new(interner.get_or_intern_static("b")), + Identifier::new(interner.get_or_intern_static("a", utf16!("a"))), + Identifier::new(interner.get_or_intern_static("b", utf16!("b"))), ), BinOp::new( LogOp::Or, BinOp::new( LogOp::And, - Identifier::new(interner.get_or_intern_static("c")), - Identifier::new(interner.get_or_intern_static("d")), + Identifier::new(interner.get_or_intern_static("c", utf16!("c"))), + Identifier::new(interner.get_or_intern_static("d", utf16!("d"))), ), - Identifier::new(interner.get_or_intern_static("e")), + Identifier::new(interner.get_or_intern_static("e", utf16!("e"))), ), ) .into()], @@ -596,10 +599,10 @@ fn check_logical_expressions() { LogOp::Coalesce, BinOp::new( LogOp::Coalesce, - Identifier::new(interner.get_or_intern_static("a")), - Identifier::new(interner.get_or_intern_static("b")), + Identifier::new(interner.get_or_intern_static("a", utf16!("a"))), + Identifier::new(interner.get_or_intern_static("b", utf16!("b"))), ), - Identifier::new(interner.get_or_intern_static("c")), + Identifier::new(interner.get_or_intern_static("c", utf16!("c"))), ) .into()], interner, @@ -611,14 +614,15 @@ fn check_logical_expressions() { check_invalid("a || b ?? c"); } -#[track_caller] -fn check_non_reserved_identifier(keyword: &'static str) { - let mut interner = Interner::default(); - check_parser( - format!("({})", keyword).as_str(), - vec![Identifier::new(interner.get_or_intern_static(keyword)).into()], - interner, - ); +macro_rules! check_non_reserved_identifier { + ($keyword:literal) => {{ + let mut interner = Interner::default(); + check_parser( + format!("({})", $keyword).as_str(), + vec![Identifier::new(interner.get_or_intern_static($keyword, utf16!($keyword))).into()], + interner, + ); + }}; } #[test] @@ -629,12 +633,12 @@ fn check_non_reserved_identifiers() { // Identifier is not allowed: as, async, from, get, meta, of, set, // and target. - check_non_reserved_identifier("as"); - check_non_reserved_identifier("async"); - check_non_reserved_identifier("from"); - check_non_reserved_identifier("get"); - check_non_reserved_identifier("meta"); - check_non_reserved_identifier("of"); - check_non_reserved_identifier("set"); - check_non_reserved_identifier("target"); + check_non_reserved_identifier!("as"); + check_non_reserved_identifier!("async"); + check_non_reserved_identifier!("from"); + check_non_reserved_identifier!("get"); + check_non_reserved_identifier!("meta"); + check_non_reserved_identifier!("of"); + check_non_reserved_identifier!("set"); + check_non_reserved_identifier!("target"); } diff --git a/boa_engine/src/syntax/parser/function/mod.rs b/boa_engine/src/syntax/parser/function/mod.rs index 45cb1b33ec1..aebec3ecb74 100644 --- a/boa_engine/src/syntax/parser/function/mod.rs +++ b/boa_engine/src/syntax/parser/function/mod.rs @@ -10,17 +10,20 @@ #[cfg(test)] mod tests; -use crate::syntax::{ - ast::{ - node::{self, FormalParameterList}, - node::{declaration::Declaration, FormalParameterListFlags}, - Punctuator, - }, - lexer::{Error as LexError, InputElement, TokenKind}, - parser::{ - expression::{BindingIdentifier, Initializer}, - statement::{ArrayBindingPattern, ObjectBindingPattern, StatementList}, - AllowAwait, AllowYield, Cursor, ParseError, TokenParser, +use crate::{ + string::utf16, + syntax::{ + ast::{ + node::{self, FormalParameterList}, + node::{declaration::Declaration, FormalParameterListFlags}, + Punctuator, + }, + lexer::{Error as LexError, InputElement, TokenKind}, + parser::{ + expression::{BindingIdentifier, Initializer}, + statement::{ArrayBindingPattern, ObjectBindingPattern, StatementList}, + AllowAwait, AllowYield, Cursor, ParseError, TokenParser, + }, }, }; use boa_interner::{Interner, Sym}; @@ -529,7 +532,11 @@ where return Ok(Vec::new().into()); } TokenKind::StringLiteral(string) - if interner.resolve_expect(*string) == "use strict" => + if interner.resolve_expect(*string).join( + |s| s == "use strict", + |g| g == utf16!("use strict"), + true, + ) => { cursor.set_strict_mode(true); strict = true; diff --git a/boa_engine/src/syntax/parser/function/tests.rs b/boa_engine/src/syntax/parser/function/tests.rs index 9fda69e5b85..29dad43b181 100644 --- a/boa_engine/src/syntax/parser/function/tests.rs +++ b/boa_engine/src/syntax/parser/function/tests.rs @@ -1,4 +1,5 @@ use crate::{ + string::utf16, syntax::{ ast::node::{ ArrowFunctionDecl, BinOp, Declaration, DeclarationList, FormalParameter, @@ -18,16 +19,23 @@ fn check_basic() { check_parser( "function foo(a) { return a; }", vec![FunctionDecl::new( - interner.get_or_intern_static("foo"), + interner.get_or_intern_static("foo", utf16!("foo")), FormalParameterList { parameters: Box::new([FormalParameter::new( - Declaration::new_with_identifier(interner.get_or_intern_static("a"), None), + Declaration::new_with_identifier( + interner.get_or_intern_static("a", utf16!("a")), + None, + ), false, )]), flags: FormalParameterListFlags::default(), length: 1, }, - vec![Return::new(Identifier::from(interner.get_or_intern_static("a")), None).into()], + vec![Return::new( + Identifier::from(interner.get_or_intern_static("a", utf16!("a"))), + None, + ) + .into()], ) .into()], interner, @@ -41,15 +49,21 @@ fn check_duplicates_strict_off() { check_parser( "function foo(a, a) { return a; }", vec![FunctionDecl::new( - interner.get_or_intern_static("foo"), + interner.get_or_intern_static("foo", utf16!("foo")), FormalParameterList { parameters: Box::new([ FormalParameter::new( - Declaration::new_with_identifier(interner.get_or_intern_static("a"), None), + Declaration::new_with_identifier( + interner.get_or_intern_static("a", utf16!("a")), + None, + ), false, ), FormalParameter::new( - Declaration::new_with_identifier(interner.get_or_intern_static("a"), None), + Declaration::new_with_identifier( + interner.get_or_intern_static("a", utf16!("a")), + None, + ), false, ), ]), @@ -57,7 +71,11 @@ fn check_duplicates_strict_off() { .union(FormalParameterListFlags::HAS_DUPLICATES), length: 2, }, - vec![Return::new(Identifier::from(interner.get_or_intern_static("a")), None).into()], + vec![Return::new( + Identifier::from(interner.get_or_intern_static("a", utf16!("a"))), + None, + ) + .into()], ) .into()], interner, @@ -81,16 +99,23 @@ fn check_basic_semicolon_insertion() { check_parser( "function foo(a) { return a }", vec![FunctionDecl::new( - interner.get_or_intern_static("foo"), + interner.get_or_intern_static("foo", utf16!("foo")), FormalParameterList { parameters: Box::new([FormalParameter::new( - Declaration::new_with_identifier(interner.get_or_intern_static("a"), None), + Declaration::new_with_identifier( + interner.get_or_intern_static("a", utf16!("a")), + None, + ), false, )]), flags: FormalParameterListFlags::default(), length: 1, }, - vec![Return::new(Identifier::from(interner.get_or_intern_static("a")), None).into()], + vec![Return::new( + Identifier::from(interner.get_or_intern_static("a", utf16!("a"))), + None, + ) + .into()], ) .into()], interner, @@ -104,10 +129,13 @@ fn check_empty_return() { check_parser( "function foo(a) { return; }", vec![FunctionDecl::new( - interner.get_or_intern_static("foo"), + interner.get_or_intern_static("foo", utf16!("foo")), FormalParameterList { parameters: Box::new([FormalParameter::new( - Declaration::new_with_identifier(interner.get_or_intern_static("a"), None), + Declaration::new_with_identifier( + interner.get_or_intern_static("a", utf16!("a")), + None, + ), false, )]), flags: FormalParameterListFlags::default(), @@ -127,10 +155,13 @@ fn check_empty_return_semicolon_insertion() { check_parser( "function foo(a) { return }", vec![FunctionDecl::new( - interner.get_or_intern_static("foo"), + interner.get_or_intern_static("foo", utf16!("foo")), FormalParameterList { parameters: Box::new([FormalParameter::new( - Declaration::new_with_identifier(interner.get_or_intern_static("a"), None), + Declaration::new_with_identifier( + interner.get_or_intern_static("a", utf16!("a")), + None, + ), false, )]), flags: FormalParameterListFlags::default(), @@ -150,15 +181,21 @@ fn check_rest_operator() { check_parser( "function foo(a, ...b) {}", vec![FunctionDecl::new( - interner.get_or_intern_static("foo"), + interner.get_or_intern_static("foo", utf16!("foo")), FormalParameterList { parameters: Box::new([ FormalParameter::new( - Declaration::new_with_identifier(interner.get_or_intern_static("a"), None), + Declaration::new_with_identifier( + interner.get_or_intern_static("a", utf16!("a")), + None, + ), false, ), FormalParameter::new( - Declaration::new_with_identifier(interner.get_or_intern_static("b"), None), + Declaration::new_with_identifier( + interner.get_or_intern_static("b", utf16!("b")), + None, + ), true, ), ]), @@ -183,7 +220,10 @@ fn check_arrow_only_rest() { None, FormalParameterList { parameters: Box::new([FormalParameter::new( - Declaration::new_with_identifier(interner.get_or_intern_static("a"), None), + Declaration::new_with_identifier( + interner.get_or_intern_static("a", utf16!("a")), + None, + ), true, )]), flags: FormalParameterListFlags::empty() @@ -208,15 +248,24 @@ fn check_arrow_rest() { FormalParameterList { parameters: Box::new([ FormalParameter::new( - Declaration::new_with_identifier(interner.get_or_intern_static("a"), None), + Declaration::new_with_identifier( + interner.get_or_intern_static("a", utf16!("a")), + None, + ), false, ), FormalParameter::new( - Declaration::new_with_identifier(interner.get_or_intern_static("b"), None), + Declaration::new_with_identifier( + interner.get_or_intern_static("b", utf16!("b")), + None, + ), false, ), FormalParameter::new( - Declaration::new_with_identifier(interner.get_or_intern_static("c"), None), + Declaration::new_with_identifier( + interner.get_or_intern_static("c", utf16!("c")), + None, + ), true, ), ]), @@ -242,11 +291,17 @@ fn check_arrow() { FormalParameterList { parameters: Box::new([ FormalParameter::new( - Declaration::new_with_identifier(interner.get_or_intern_static("a"), None), + Declaration::new_with_identifier( + interner.get_or_intern_static("a", utf16!("a")), + None, + ), false, ), FormalParameter::new( - Declaration::new_with_identifier(interner.get_or_intern_static("b"), None), + Declaration::new_with_identifier( + interner.get_or_intern_static("b", utf16!("b")), + None, + ), false, ), ]), @@ -256,8 +311,8 @@ fn check_arrow() { vec![Return::new( BinOp::new( NumOp::Add, - Identifier::new(interner.get_or_intern_static("a")), - Identifier::new(interner.get_or_intern_static("b")), + Identifier::new(interner.get_or_intern_static("a", utf16!("a"))), + Identifier::new(interner.get_or_intern_static("b", utf16!("b"))), ), None, ) @@ -279,11 +334,17 @@ fn check_arrow_semicolon_insertion() { FormalParameterList { parameters: Box::new([ FormalParameter::new( - Declaration::new_with_identifier(interner.get_or_intern_static("a"), None), + Declaration::new_with_identifier( + interner.get_or_intern_static("a", utf16!("a")), + None, + ), false, ), FormalParameter::new( - Declaration::new_with_identifier(interner.get_or_intern_static("b"), None), + Declaration::new_with_identifier( + interner.get_or_intern_static("b", utf16!("b")), + None, + ), false, ), ]), @@ -293,8 +354,8 @@ fn check_arrow_semicolon_insertion() { vec![Return::new( BinOp::new( NumOp::Add, - Identifier::new(interner.get_or_intern_static("a")), - Identifier::new(interner.get_or_intern_static("b")), + Identifier::new(interner.get_or_intern_static("a", utf16!("a"))), + Identifier::new(interner.get_or_intern_static("b", utf16!("b"))), ), None, ) @@ -316,11 +377,17 @@ fn check_arrow_epty_return() { FormalParameterList { parameters: Box::new([ FormalParameter::new( - Declaration::new_with_identifier(interner.get_or_intern_static("a"), None), + Declaration::new_with_identifier( + interner.get_or_intern_static("a", utf16!("a")), + None, + ), false, ), FormalParameter::new( - Declaration::new_with_identifier(interner.get_or_intern_static("b"), None), + Declaration::new_with_identifier( + interner.get_or_intern_static("b", utf16!("b")), + None, + ), false, ), ]), @@ -345,11 +412,17 @@ fn check_arrow_empty_return_semicolon_insertion() { FormalParameterList { parameters: Box::new([ FormalParameter::new( - Declaration::new_with_identifier(interner.get_or_intern_static("a"), None), + Declaration::new_with_identifier( + interner.get_or_intern_static("a", utf16!("a")), + None, + ), false, ), FormalParameter::new( - Declaration::new_with_identifier(interner.get_or_intern_static("b"), None), + Declaration::new_with_identifier( + interner.get_or_intern_static("b", utf16!("b")), + None, + ), false, ), ]), @@ -370,14 +443,14 @@ fn check_arrow_assignment() { "let foo = (a) => { return a };", vec![DeclarationList::Let( vec![Declaration::new_with_identifier( - Identifier::new(interner.get_or_intern_static("foo")), + Identifier::new(interner.get_or_intern_static("foo", utf16!("foo"))), Some( ArrowFunctionDecl::new( - Some(interner.get_or_intern_static("foo")), + Some(interner.get_or_intern_static("foo", utf16!("foo"))), FormalParameterList { parameters: Box::new([FormalParameter::new( Declaration::new_with_identifier( - interner.get_or_intern_static("a"), + interner.get_or_intern_static("a", utf16!("a")), None, ), false, @@ -386,7 +459,10 @@ fn check_arrow_assignment() { length: 1, }, vec![Return::new::, Option<_>>( - Some(Identifier::new(interner.get_or_intern_static("a")).into()), + Some( + Identifier::new(interner.get_or_intern_static("a", utf16!("a"))) + .into(), + ), None, ) .into()], @@ -408,14 +484,14 @@ fn check_arrow_assignment_nobrackets() { "let foo = (a) => a;", vec![DeclarationList::Let( vec![Declaration::new_with_identifier( - interner.get_or_intern_static("foo"), + interner.get_or_intern_static("foo", utf16!("foo")), Some( ArrowFunctionDecl::new( - interner.get_or_intern_static("foo"), + interner.get_or_intern_static("foo", utf16!("foo")), FormalParameterList { parameters: Box::new([FormalParameter::new( Declaration::new_with_identifier( - interner.get_or_intern_static("a"), + interner.get_or_intern_static("a", utf16!("a")), None, ), false, @@ -424,7 +500,10 @@ fn check_arrow_assignment_nobrackets() { length: 1, }, vec![Return::new::, Option<_>>( - Some(Identifier::new(interner.get_or_intern_static("a")).into()), + Some( + Identifier::new(interner.get_or_intern_static("a", utf16!("a"))) + .into(), + ), None, ) .into()], @@ -446,14 +525,14 @@ fn check_arrow_assignment_noparenthesis() { "let foo = a => { return a };", vec![DeclarationList::Let( vec![Declaration::new_with_identifier( - interner.get_or_intern_static("foo"), + interner.get_or_intern_static("foo", utf16!("foo")), Some( ArrowFunctionDecl::new( - Some(interner.get_or_intern_static("foo")), + Some(interner.get_or_intern_static("foo", utf16!("foo"))), FormalParameterList { parameters: Box::new([FormalParameter::new( Declaration::new_with_identifier( - interner.get_or_intern_static("a"), + interner.get_or_intern_static("a", utf16!("a")), None, ), false, @@ -462,7 +541,10 @@ fn check_arrow_assignment_noparenthesis() { length: 1, }, vec![Return::new::, Option<_>>( - Some(Identifier::new(interner.get_or_intern_static("a")).into()), + Some( + Identifier::new(interner.get_or_intern_static("a", utf16!("a"))) + .into(), + ), None, ) .into()], @@ -484,14 +566,14 @@ fn check_arrow_assignment_noparenthesis_nobrackets() { "let foo = a => a;", vec![DeclarationList::Let( vec![Declaration::new_with_identifier( - Identifier::new(interner.get_or_intern_static("foo")), + Identifier::new(interner.get_or_intern_static("foo", utf16!("foo"))), Some( ArrowFunctionDecl::new( - Some(interner.get_or_intern_static("foo")), + Some(interner.get_or_intern_static("foo", utf16!("foo"))), FormalParameterList { parameters: Box::new([FormalParameter::new( Declaration::new_with_identifier( - interner.get_or_intern_static("a"), + interner.get_or_intern_static("a", utf16!("a")), None, ), false, @@ -500,7 +582,10 @@ fn check_arrow_assignment_noparenthesis_nobrackets() { length: 1, }, vec![Return::new::, Option<_>>( - Some(Identifier::new(interner.get_or_intern_static("a")).into()), + Some( + Identifier::new(interner.get_or_intern_static("a", utf16!("a"))) + .into(), + ), None, ) .into()], @@ -522,22 +607,22 @@ fn check_arrow_assignment_2arg() { "let foo = (a, b) => { return a };", vec![DeclarationList::Let( vec![Declaration::new_with_identifier( - Identifier::new(interner.get_or_intern_static("foo")), + Identifier::new(interner.get_or_intern_static("foo", utf16!("foo"))), Some( ArrowFunctionDecl::new( - Some(interner.get_or_intern_static("foo")), + Some(interner.get_or_intern_static("foo", utf16!("foo"))), FormalParameterList { parameters: Box::new([ FormalParameter::new( Declaration::new_with_identifier( - interner.get_or_intern_static("a"), + interner.get_or_intern_static("a", utf16!("a")), None, ), false, ), FormalParameter::new( Declaration::new_with_identifier( - interner.get_or_intern_static("b"), + interner.get_or_intern_static("b", utf16!("b")), None, ), false, @@ -547,7 +632,10 @@ fn check_arrow_assignment_2arg() { length: 2, }, vec![Return::new::, Option<_>>( - Some(Identifier::new(interner.get_or_intern_static("a")).into()), + Some( + Identifier::new(interner.get_or_intern_static("a", utf16!("a"))) + .into(), + ), None, ) .into()], @@ -569,22 +657,22 @@ fn check_arrow_assignment_2arg_nobrackets() { "let foo = (a, b) => a;", vec![DeclarationList::Let( vec![Declaration::new_with_identifier( - Identifier::new(interner.get_or_intern_static("foo")), + Identifier::new(interner.get_or_intern_static("foo", utf16!("foo"))), Some( ArrowFunctionDecl::new( - Some(interner.get_or_intern_static("foo")), + Some(interner.get_or_intern_static("foo", utf16!("foo"))), FormalParameterList { parameters: Box::new([ FormalParameter::new( Declaration::new_with_identifier( - interner.get_or_intern_static("a"), + interner.get_or_intern_static("a", utf16!("a")), None, ), false, ), FormalParameter::new( Declaration::new_with_identifier( - interner.get_or_intern_static("b"), + interner.get_or_intern_static("b", utf16!("b")), None, ), false, @@ -594,7 +682,10 @@ fn check_arrow_assignment_2arg_nobrackets() { length: 2, }, vec![Return::new::, Option<_>>( - Some(Identifier::new(interner.get_or_intern_static("a")).into()), + Some( + Identifier::new(interner.get_or_intern_static("a", utf16!("a"))) + .into(), + ), None, ) .into()], @@ -616,29 +707,29 @@ fn check_arrow_assignment_3arg() { "let foo = (a, b, c) => { return a };", vec![DeclarationList::Let( vec![Declaration::new_with_identifier( - Identifier::new(interner.get_or_intern_static("foo")), + Identifier::new(interner.get_or_intern_static("foo", utf16!("foo"))), Some( ArrowFunctionDecl::new( - Some(interner.get_or_intern_static("foo")), + Some(interner.get_or_intern_static("foo", utf16!("foo"))), FormalParameterList { parameters: Box::new([ FormalParameter::new( Declaration::new_with_identifier( - interner.get_or_intern_static("a"), + interner.get_or_intern_static("a", utf16!("a")), None, ), false, ), FormalParameter::new( Declaration::new_with_identifier( - interner.get_or_intern_static("b"), + interner.get_or_intern_static("b", utf16!("b")), None, ), false, ), FormalParameter::new( Declaration::new_with_identifier( - interner.get_or_intern_static("c"), + interner.get_or_intern_static("c", utf16!("c")), None, ), false, @@ -648,7 +739,10 @@ fn check_arrow_assignment_3arg() { length: 3, }, vec![Return::new::, Option<_>>( - Some(Identifier::new(interner.get_or_intern_static("a")).into()), + Some( + Identifier::new(interner.get_or_intern_static("a", utf16!("a"))) + .into(), + ), None, ) .into()], @@ -670,29 +764,29 @@ fn check_arrow_assignment_3arg_nobrackets() { "let foo = (a, b, c) => a;", vec![DeclarationList::Let( vec![Declaration::new_with_identifier( - Identifier::new(interner.get_or_intern_static("foo")), + Identifier::new(interner.get_or_intern_static("foo", utf16!("foo"))), Some( ArrowFunctionDecl::new( - Some(interner.get_or_intern_static("foo")), + Some(interner.get_or_intern_static("foo", utf16!("foo"))), FormalParameterList { parameters: Box::new([ FormalParameter::new( Declaration::new_with_identifier( - interner.get_or_intern_static("a"), + interner.get_or_intern_static("a", utf16!("a")), None, ), false, ), FormalParameter::new( Declaration::new_with_identifier( - interner.get_or_intern_static("b"), + interner.get_or_intern_static("b", utf16!("b")), None, ), false, ), FormalParameter::new( Declaration::new_with_identifier( - interner.get_or_intern_static("c"), + interner.get_or_intern_static("c", utf16!("c")), None, ), false, @@ -702,7 +796,10 @@ fn check_arrow_assignment_3arg_nobrackets() { length: 3, }, vec![Return::new::, Option<_>>( - Some(Identifier::new(interner.get_or_intern_static("a")).into()), + Some( + Identifier::new(interner.get_or_intern_static("a", utf16!("a"))) + .into(), + ), None, ) .into()], diff --git a/boa_engine/src/syntax/parser/mod.rs b/boa_engine/src/syntax/parser/mod.rs index b7f9a7d00be..64ae7d0f7c1 100644 --- a/boa_engine/src/syntax/parser/mod.rs +++ b/boa_engine/src/syntax/parser/mod.rs @@ -12,6 +12,7 @@ pub mod error; mod tests; use crate::{ + string::utf16, syntax::{ ast::{ node::{ContainsSymbol, FormalParameterList, StatementList}, @@ -23,7 +24,7 @@ use crate::{ function::{FormalParameters, FunctionStatementList}, }, }, - Context, + Context, JsString, }; use boa_interner::{Interner, Sym}; use rustc_hash::{FxHashMap, FxHashSet}; @@ -268,7 +269,11 @@ impl Script { match tok.kind() { // Set the strict mode TokenKind::StringLiteral(string) - if context.interner_mut().resolve_expect(*string) == "use strict" => + if context.interner_mut().resolve_expect(*string).join( + |s| s == "use strict", + |g| g == utf16!("use strict"), + true, + ) => { cursor.set_strict_mode(true); strict = true; @@ -316,7 +321,7 @@ impl Script { .realm .global_property_map .string_property_map() - .get(name_str); + .get(&name_str.into_common::(false)); let non_configurable_binding_exists = match desc { Some(desc) => !matches!(desc.configurable(), Some(true)), None => false, diff --git a/boa_engine/src/syntax/parser/statement/block/tests.rs b/boa_engine/src/syntax/parser/statement/block/tests.rs index 8a84ee9b33a..c5167218be8 100644 --- a/boa_engine/src/syntax/parser/statement/block/tests.rs +++ b/boa_engine/src/syntax/parser/statement/block/tests.rs @@ -1,14 +1,17 @@ //! Block statement parsing tests. -use crate::syntax::{ - ast::{ - node::{ - Assign, Block, Call, Declaration, DeclarationList, FormalParameterList, FunctionDecl, - Identifier, Node, Return, UnaryOp, +use crate::{ + string::utf16, + syntax::{ + ast::{ + node::{ + Assign, Block, Call, Declaration, DeclarationList, FormalParameterList, + FunctionDecl, Identifier, Node, Return, UnaryOp, + }, + op, Const, }, - op, Const, + parser::tests::check_parser, }, - parser::tests::check_parser, }; use boa_interner::Interner; @@ -29,7 +32,7 @@ fn empty() { #[test] fn non_empty() { let mut interner = Interner::default(); - let a = interner.get_or_intern_static("a"); + let a = interner.get_or_intern_static("a", utf16!("a")); check_block( r"{ var a = 10; @@ -50,8 +53,8 @@ fn non_empty() { ); let mut interner = Interner::default(); - let hello = interner.get_or_intern_static("hello"); - let a = interner.get_or_intern_static("a"); + let hello = interner.get_or_intern_static("hello", utf16!("hello")); + let a = interner.get_or_intern_static("a", utf16!("a")); check_block( r"{ function hello() { @@ -85,8 +88,8 @@ fn non_empty() { #[test] fn hoisting() { let mut interner = Interner::default(); - let hello = interner.get_or_intern_static("hello"); - let a = interner.get_or_intern_static("a"); + let hello = interner.get_or_intern_static("hello", utf16!("hello")); + let a = interner.get_or_intern_static("a", utf16!("a")); check_block( r"{ var a = hello(); @@ -115,7 +118,7 @@ fn hoisting() { ); let mut interner = Interner::default(); - let a = interner.get_or_intern_static("a"); + let a = interner.get_or_intern_static("a", utf16!("a")); check_block( r"{ a = 10; diff --git a/boa_engine/src/syntax/parser/statement/break_stm/tests.rs b/boa_engine/src/syntax/parser/statement/break_stm/tests.rs index 30d2845b18c..764293a4587 100644 --- a/boa_engine/src/syntax/parser/statement/break_stm/tests.rs +++ b/boa_engine/src/syntax/parser/statement/break_stm/tests.rs @@ -1,9 +1,12 @@ -use crate::syntax::{ - ast::{ - node::{Block, Break, Node, WhileLoop}, - Const, +use crate::{ + string::utf16, + syntax::{ + ast::{ + node::{Block, Break, Node, WhileLoop}, + Const, + }, + parser::tests::check_parser, }, - parser::tests::check_parser, }; use boa_interner::Interner; @@ -48,9 +51,10 @@ fn new_line_semicolon_insertion() { }", vec![WhileLoop::new( Const::from(true), - Block::from(vec![ - Break::new(interner.get_or_intern_static("test")).into() - ]), + Block::from(vec![Break::new( + interner.get_or_intern_static("test", utf16!("test")), + ) + .into()]), ) .into()], interner, @@ -79,9 +83,10 @@ fn new_line_block() { }", vec![WhileLoop::new( Const::from(true), - Block::from(vec![ - Break::new(interner.get_or_intern_static("test")).into() - ]), + Block::from(vec![Break::new( + interner.get_or_intern_static("test", utf16!("test")), + ) + .into()]), ) .into()], interner, @@ -97,9 +102,10 @@ fn reserved_label() { }", vec![WhileLoop::new( Const::from(true), - Block::from(vec![ - Break::new(interner.get_or_intern_static("await")).into() - ]), + Block::from(vec![Break::new( + interner.get_or_intern_static("await", utf16!("await")), + ) + .into()]), ) .into()], interner, @@ -112,9 +118,10 @@ fn reserved_label() { }", vec![WhileLoop::new( Const::from(true), - Block::from(vec![ - Break::new(interner.get_or_intern_static("yield")).into() - ]), + Block::from(vec![Break::new( + interner.get_or_intern_static("yield", utf16!("yield")), + ) + .into()]), ) .into()], interner, diff --git a/boa_engine/src/syntax/parser/statement/continue_stm/tests.rs b/boa_engine/src/syntax/parser/statement/continue_stm/tests.rs index ae3f4230bdb..f8a14c99340 100644 --- a/boa_engine/src/syntax/parser/statement/continue_stm/tests.rs +++ b/boa_engine/src/syntax/parser/statement/continue_stm/tests.rs @@ -1,9 +1,12 @@ -use crate::syntax::{ - ast::{ - node::{Block, Continue, WhileLoop}, - Const, +use crate::{ + string::utf16, + syntax::{ + ast::{ + node::{Block, Continue, WhileLoop}, + Const, + }, + parser::tests::check_parser, }, - parser::tests::check_parser, }; use boa_interner::Interner; @@ -48,9 +51,10 @@ fn new_line_semicolon_insertion() { }", vec![WhileLoop::new( Const::from(true), - Block::from(vec![ - Continue::new(interner.get_or_intern_static("test")).into() - ]), + Block::from(vec![Continue::new( + interner.get_or_intern_static("test", utf16!("test")), + ) + .into()]), ) .into()], interner, @@ -79,9 +83,10 @@ fn new_line_block() { }", vec![WhileLoop::new( Const::from(true), - Block::from(vec![ - Continue::new(interner.get_or_intern_static("test")).into() - ]), + Block::from(vec![Continue::new( + interner.get_or_intern_static("test", utf16!("test")), + ) + .into()]), ) .into()], interner, @@ -97,9 +102,10 @@ fn reserved_label() { }", vec![WhileLoop::new( Const::from(true), - Block::from(vec![ - Continue::new(interner.get_or_intern_static("await")).into() - ]), + Block::from(vec![Continue::new( + interner.get_or_intern_static("await", utf16!("await")), + ) + .into()]), ) .into()], interner, @@ -112,9 +118,10 @@ fn reserved_label() { }", vec![WhileLoop::new( Const::from(true), - Block::from(vec![ - Continue::new(interner.get_or_intern_static("yield")).into() - ]), + Block::from(vec![Continue::new( + interner.get_or_intern_static("yield", utf16!("yield")), + ) + .into()]), ) .into()], interner, diff --git a/boa_engine/src/syntax/parser/statement/declaration/hoistable/async_function_decl/tests.rs b/boa_engine/src/syntax/parser/statement/declaration/hoistable/async_function_decl/tests.rs index 2d184720e68..643f1c32d68 100644 --- a/boa_engine/src/syntax/parser/statement/declaration/hoistable/async_function_decl/tests.rs +++ b/boa_engine/src/syntax/parser/statement/declaration/hoistable/async_function_decl/tests.rs @@ -1,6 +1,9 @@ -use crate::syntax::{ - ast::node::{AsyncFunctionDecl, FormalParameterList}, - parser::tests::check_parser, +use crate::{ + string::utf16, + syntax::{ + ast::node::{AsyncFunctionDecl, FormalParameterList}, + parser::tests::check_parser, + }, }; use boa_interner::Interner; @@ -11,7 +14,7 @@ fn async_function_declaration() { check_parser( "async function hello() {}", vec![AsyncFunctionDecl::new( - interner.get_or_intern_static("hello"), + interner.get_or_intern_static("hello", utf16!("hello")), FormalParameterList::default(), vec![], ) @@ -27,7 +30,7 @@ fn async_function_declaration_keywords() { check_parser( "async function yield() {}", vec![AsyncFunctionDecl::new( - interner.get_or_intern_static("yield"), + interner.get_or_intern_static("yield", utf16!("yield")), FormalParameterList::default(), vec![], ) @@ -39,7 +42,7 @@ fn async_function_declaration_keywords() { check_parser( "async function await() {}", vec![AsyncFunctionDecl::new( - interner.get_or_intern_static("await"), + interner.get_or_intern_static("await", utf16!("await")), FormalParameterList::default(), vec![], ) diff --git a/boa_engine/src/syntax/parser/statement/declaration/hoistable/async_generator_decl/tests.rs b/boa_engine/src/syntax/parser/statement/declaration/hoistable/async_generator_decl/tests.rs index 72c7198f7ba..ef80f70398c 100644 --- a/boa_engine/src/syntax/parser/statement/declaration/hoistable/async_generator_decl/tests.rs +++ b/boa_engine/src/syntax/parser/statement/declaration/hoistable/async_generator_decl/tests.rs @@ -1,6 +1,9 @@ -use crate::syntax::{ - ast::node::{AsyncGeneratorDecl, FormalParameterList}, - parser::tests::check_parser, +use crate::{ + string::utf16, + syntax::{ + ast::node::{AsyncGeneratorDecl, FormalParameterList}, + parser::tests::check_parser, + }, }; use boa_interner::Interner; @@ -10,7 +13,7 @@ fn async_generator_function_declaration() { check_parser( "async function* gen() {}", vec![AsyncGeneratorDecl::new( - interner.get_or_intern_static("gen"), + interner.get_or_intern_static("gen", utf16!("gen")), FormalParameterList::default(), vec![], ) diff --git a/boa_engine/src/syntax/parser/statement/declaration/hoistable/class_decl/tests.rs b/boa_engine/src/syntax/parser/statement/declaration/hoistable/class_decl/tests.rs index 660e9245c9b..52edab5830f 100644 --- a/boa_engine/src/syntax/parser/statement/declaration/hoistable/class_decl/tests.rs +++ b/boa_engine/src/syntax/parser/statement/declaration/hoistable/class_decl/tests.rs @@ -1,13 +1,16 @@ -use crate::syntax::{ - ast::{ - node::{ - declaration::class_decl::ClassElement as ClassElementNode, - object::{MethodDefinition, PropertyName}, - Class, FormalParameterList, FunctionExpr, Node, +use crate::{ + string::utf16, + syntax::{ + ast::{ + node::{ + declaration::class_decl::ClassElement as ClassElementNode, + object::{MethodDefinition, PropertyName}, + Class, FormalParameterList, FunctionExpr, Node, + }, + Const, }, - Const, + parser::tests::check_parser, }, - parser::tests::check_parser, }; use boa_interner::Interner; @@ -17,7 +20,7 @@ fn check_async_ordinary_method() { let elements = vec![ClassElementNode::MethodDefinition( PropertyName::Computed(Node::Const(Const::from( - interner.get_or_intern_static("async"), + interner.get_or_intern_static("async", utf16!("async")), ))), MethodDefinition::Ordinary(FunctionExpr::new( None, @@ -32,7 +35,7 @@ fn check_async_ordinary_method() { } ", [Node::ClassDecl(Class::new( - interner.get_or_intern_static("A"), + interner.get_or_intern_static("A", utf16!("A")), None, None, elements, @@ -47,7 +50,7 @@ fn check_async_field_initialization() { let elements = vec![ClassElementNode::FieldDefinition( PropertyName::Computed(Node::Const(Const::from( - interner.get_or_intern_static("async"), + interner.get_or_intern_static("async", utf16!("async")), ))), Some(Node::Const(Const::from(1))), )]; @@ -59,7 +62,7 @@ fn check_async_field_initialization() { } ", [Node::ClassDecl(Class::new( - interner.get_or_intern_static("A"), + interner.get_or_intern_static("A", utf16!("A")), None, None, elements, @@ -74,7 +77,7 @@ fn check_async_field() { let elements = vec![ClassElementNode::FieldDefinition( PropertyName::Computed(Node::Const(Const::from( - interner.get_or_intern_static("async"), + interner.get_or_intern_static("async", utf16!("async")), ))), None, )]; @@ -85,7 +88,7 @@ fn check_async_field() { } ", [Node::ClassDecl(Class::new( - interner.get_or_intern_static("A"), + interner.get_or_intern_static("A", utf16!("A")), None, None, elements, diff --git a/boa_engine/src/syntax/parser/statement/declaration/hoistable/function_decl/tests.rs b/boa_engine/src/syntax/parser/statement/declaration/hoistable/function_decl/tests.rs index ff5b5e2a7a4..fe121a24a7c 100644 --- a/boa_engine/src/syntax/parser/statement/declaration/hoistable/function_decl/tests.rs +++ b/boa_engine/src/syntax/parser/statement/declaration/hoistable/function_decl/tests.rs @@ -1,6 +1,9 @@ -use crate::syntax::{ - ast::node::{FormalParameterList, FunctionDecl}, - parser::tests::check_parser, +use crate::{ + string::utf16, + syntax::{ + ast::node::{FormalParameterList, FunctionDecl}, + parser::tests::check_parser, + }, }; use boa_interner::Interner; @@ -11,7 +14,7 @@ fn function_declaration() { check_parser( "function hello() {}", vec![FunctionDecl::new( - interner.get_or_intern_static("hello"), + interner.get_or_intern_static("hello", utf16!("hello")), FormalParameterList::default(), vec![], ) @@ -23,52 +26,54 @@ fn function_declaration() { /// Function declaration parsing with keywords. #[test] fn function_declaration_keywords() { - let genast = |keyword, interner: &mut Interner| { - vec![FunctionDecl::new( - interner.get_or_intern_static(keyword), - FormalParameterList::default(), - vec![], - ) - .into()] - }; + macro_rules! genast { + ($keyword:literal, $interner:expr) => { + vec![FunctionDecl::new( + $interner.get_or_intern_static($keyword, utf16!($keyword)), + FormalParameterList::default(), + vec![], + ) + .into()] + }; + } let mut interner = Interner::default(); - let ast = genast("yield", &mut interner); + let ast = genast!("yield", interner); check_parser("function yield() {}", ast, interner); let mut interner = Interner::default(); - let ast = genast("await", &mut interner); + let ast = genast!("await", interner); check_parser("function await() {}", ast, interner); let mut interner = Interner::default(); - let ast = genast("as", &mut interner); + let ast = genast!("as", interner); check_parser("function as() {}", ast, interner); let mut interner = Interner::default(); - let ast = genast("async", &mut interner); + let ast = genast!("async", interner); check_parser("function async() {}", ast, interner); let mut interner = Interner::default(); - let ast = genast("from", &mut interner); + let ast = genast!("from", interner); check_parser("function from() {}", ast, interner); let mut interner = Interner::default(); - let ast = genast("get", &mut interner); + let ast = genast!("get", interner); check_parser("function get() {}", ast, interner); let mut interner = Interner::default(); - let ast = genast("meta", &mut interner); + let ast = genast!("meta", interner); check_parser("function meta() {}", ast, interner); let mut interner = Interner::default(); - let ast = genast("of", &mut interner); + let ast = genast!("of", interner); check_parser("function of() {}", ast, interner); let mut interner = Interner::default(); - let ast = genast("set", &mut interner); + let ast = genast!("set", interner); check_parser("function set() {}", ast, interner); let mut interner = Interner::default(); - let ast = genast("target", &mut interner); + let ast = genast!("target", interner); check_parser("function target() {}", ast, interner); } diff --git a/boa_engine/src/syntax/parser/statement/declaration/hoistable/generator_decl/tests.rs b/boa_engine/src/syntax/parser/statement/declaration/hoistable/generator_decl/tests.rs index c517a217009..55a46cc2214 100644 --- a/boa_engine/src/syntax/parser/statement/declaration/hoistable/generator_decl/tests.rs +++ b/boa_engine/src/syntax/parser/statement/declaration/hoistable/generator_decl/tests.rs @@ -1,6 +1,9 @@ -use crate::syntax::{ - ast::node::{FormalParameterList, GeneratorDecl}, - parser::tests::check_parser, +use crate::{ + string::utf16, + syntax::{ + ast::node::{FormalParameterList, GeneratorDecl}, + parser::tests::check_parser, + }, }; use boa_interner::Interner; @@ -10,7 +13,7 @@ fn generator_function_declaration() { check_parser( "function* gen() {}", vec![GeneratorDecl::new( - interner.get_or_intern_static("gen"), + interner.get_or_intern_static("gen", utf16!("gen")), FormalParameterList::default(), vec![], ) diff --git a/boa_engine/src/syntax/parser/statement/declaration/tests.rs b/boa_engine/src/syntax/parser/statement/declaration/tests.rs index ec9cd874445..d5461180919 100644 --- a/boa_engine/src/syntax/parser/statement/declaration/tests.rs +++ b/boa_engine/src/syntax/parser/statement/declaration/tests.rs @@ -1,9 +1,12 @@ -use crate::syntax::{ - ast::{ - node::{Declaration, DeclarationList, Node}, - Const, +use crate::{ + string::utf16, + syntax::{ + ast::{ + node::{Declaration, DeclarationList, Node}, + Const, + }, + parser::tests::{check_invalid, check_parser}, }, - parser::tests::{check_invalid, check_parser}, }; use boa_interner::Interner; @@ -15,7 +18,7 @@ fn var_declaration() { "var a = 5;", vec![DeclarationList::Var( vec![Declaration::new_with_identifier( - interner.get_or_intern_static("a"), + interner.get_or_intern_static("a", utf16!("a")), Some(Const::from(5).into()), )] .into(), @@ -33,7 +36,7 @@ fn var_declaration_keywords() { "var yield = 5;", vec![DeclarationList::Var( vec![Declaration::new_with_identifier( - interner.get_or_intern_static("yield"), + interner.get_or_intern_static("yield", utf16!("yield")), Some(Const::from(5).into()), )] .into(), @@ -47,7 +50,7 @@ fn var_declaration_keywords() { "var await = 5;", vec![DeclarationList::Var( vec![Declaration::new_with_identifier( - interner.get_or_intern_static("await"), + interner.get_or_intern_static("await", utf16!("await")), Some(Const::from(5).into()), )] .into(), @@ -65,7 +68,7 @@ fn var_declaration_no_spaces() { "var a=5;", vec![DeclarationList::Var( vec![Declaration::new_with_identifier( - interner.get_or_intern_static("a"), + interner.get_or_intern_static("a", utf16!("a")), Some(Const::from(5).into()), )] .into(), @@ -83,7 +86,7 @@ fn empty_var_declaration() { "var a;", vec![DeclarationList::Var( vec![Declaration::new_with_identifier( - interner.get_or_intern_static("a"), + interner.get_or_intern_static("a", utf16!("a")), None, )] .into(), @@ -102,12 +105,15 @@ fn multiple_var_declaration() { vec![DeclarationList::Var( vec![ Declaration::new_with_identifier( - interner.get_or_intern_static("a"), + interner.get_or_intern_static("a", utf16!("a")), Some(Const::from(5).into()), ), - Declaration::new_with_identifier(interner.get_or_intern_static("b"), None), Declaration::new_with_identifier( - interner.get_or_intern_static("c"), + interner.get_or_intern_static("b", utf16!("b")), + None, + ), + Declaration::new_with_identifier( + interner.get_or_intern_static("c", utf16!("c")), Some(Const::from(6).into()), ), ] @@ -126,7 +132,7 @@ fn let_declaration() { "let a = 5;", vec![DeclarationList::Let( vec![Declaration::new_with_identifier( - interner.get_or_intern_static("a"), + interner.get_or_intern_static("a", utf16!("a")), Node::from(Const::from(5)), )] .into(), @@ -144,7 +150,7 @@ fn let_declaration_keywords() { "let yield = 5;", vec![DeclarationList::Let( vec![Declaration::new_with_identifier( - interner.get_or_intern_static("yield"), + interner.get_or_intern_static("yield", utf16!("yield")), Node::from(Const::from(5)), )] .into(), @@ -158,7 +164,7 @@ fn let_declaration_keywords() { "let await = 5;", vec![DeclarationList::Let( vec![Declaration::new_with_identifier( - interner.get_or_intern_static("await"), + interner.get_or_intern_static("await", utf16!("await")), Node::from(Const::from(5)), )] .into(), @@ -176,7 +182,7 @@ fn let_declaration_no_spaces() { "let a=5;", vec![DeclarationList::Let( vec![Declaration::new_with_identifier( - interner.get_or_intern_static("a"), + interner.get_or_intern_static("a", utf16!("a")), Node::from(Const::from(5)), )] .into(), @@ -194,7 +200,7 @@ fn empty_let_declaration() { "let a;", vec![DeclarationList::Let( vec![Declaration::new_with_identifier( - interner.get_or_intern_static("a"), + interner.get_or_intern_static("a", utf16!("a")), None, )] .into(), @@ -213,12 +219,15 @@ fn multiple_let_declaration() { vec![DeclarationList::Let( vec![ Declaration::new_with_identifier( - interner.get_or_intern_static("a"), + interner.get_or_intern_static("a", utf16!("a")), Node::from(Const::from(5)), ), - Declaration::new_with_identifier(interner.get_or_intern_static("b"), None), Declaration::new_with_identifier( - interner.get_or_intern_static("c"), + interner.get_or_intern_static("b", utf16!("b")), + None, + ), + Declaration::new_with_identifier( + interner.get_or_intern_static("c", utf16!("c")), Node::from(Const::from(6)), ), ] @@ -237,7 +246,7 @@ fn const_declaration() { "const a = 5;", vec![DeclarationList::Const( vec![Declaration::new_with_identifier( - interner.get_or_intern_static("a"), + interner.get_or_intern_static("a", utf16!("a")), Node::from(Const::from(5)), )] .into(), @@ -255,7 +264,7 @@ fn const_declaration_keywords() { "const yield = 5;", vec![DeclarationList::Const( vec![Declaration::new_with_identifier( - interner.get_or_intern_static("yield"), + interner.get_or_intern_static("yield", utf16!("yield")), Node::from(Const::from(5)), )] .into(), @@ -269,7 +278,7 @@ fn const_declaration_keywords() { "const await = 5;", vec![DeclarationList::Const( vec![Declaration::new_with_identifier( - interner.get_or_intern_static("await"), + interner.get_or_intern_static("await", utf16!("await")), Node::from(Const::from(5)), )] .into(), @@ -287,7 +296,7 @@ fn const_declaration_no_spaces() { "const a=5;", vec![DeclarationList::Const( vec![Declaration::new_with_identifier( - interner.get_or_intern_static("a"), + interner.get_or_intern_static("a", utf16!("a")), Node::from(Const::from(5)), )] .into(), @@ -312,11 +321,11 @@ fn multiple_const_declaration() { vec![DeclarationList::Const( vec![ Declaration::new_with_identifier( - interner.get_or_intern_static("a"), + interner.get_or_intern_static("a", utf16!("a")), Node::from(Const::from(5)), ), Declaration::new_with_identifier( - interner.get_or_intern_static("c"), + interner.get_or_intern_static("c", utf16!("c")), Node::from(Const::from(6)), ), ] diff --git a/boa_engine/src/syntax/parser/statement/iteration/tests.rs b/boa_engine/src/syntax/parser/statement/iteration/tests.rs index b212f16d670..f1cff6009b5 100644 --- a/boa_engine/src/syntax/parser/statement/iteration/tests.rs +++ b/boa_engine/src/syntax/parser/statement/iteration/tests.rs @@ -1,13 +1,16 @@ -use crate::syntax::{ - ast::{ - node::{ - field::GetConstField, BinOp, Block, Break, Call, Declaration, DeclarationList, - DoWhileLoop, Identifier, UnaryOp, WhileLoop, +use crate::{ + string::utf16, + syntax::{ + ast::{ + node::{ + field::GetConstField, BinOp, Block, Break, Call, Declaration, DeclarationList, + DoWhileLoop, Identifier, UnaryOp, WhileLoop, + }, + op::{self, AssignOp, CompOp}, + Const, }, - op::{self, AssignOp, CompOp}, - Const, + parser::tests::{check_invalid, check_parser}, }, - parser::tests::{check_invalid, check_parser}, }; use boa_interner::Interner; @@ -22,7 +25,7 @@ fn check_do_while() { vec![DoWhileLoop::new( Block::from(vec![BinOp::new( AssignOp::Add, - Identifier::new(interner.get_or_intern_static("a")), + Identifier::new(interner.get_or_intern_static("a", utf16!("a"))), Const::from(1), ) .into()]), @@ -43,7 +46,7 @@ fn check_do_while_semicolon_insertion() { vec![ DeclarationList::Var( vec![Declaration::new_with_identifier( - interner.get_or_intern_static("i"), + interner.get_or_intern_static("i", utf16!("i")), Some(Const::from(0).into()), )] .into(), @@ -52,17 +55,21 @@ fn check_do_while_semicolon_insertion() { DoWhileLoop::new( Block::from(vec![Call::new( GetConstField::new( - Identifier::new(interner.get_or_intern_static("console")), - interner.get_or_intern_static("log"), + Identifier::new( + interner.get_or_intern_static("console", utf16!("console")), + ), + interner.get_or_intern_static("log", utf16!("log")), ), - vec![Const::from(interner.get_or_intern_static("hello")).into()], + vec![ + Const::from(interner.get_or_intern_static("hello", utf16!("hello"))).into(), + ], ) .into()]), BinOp::new( CompOp::LessThan, UnaryOp::new( op::UnaryOp::IncrementPost, - Identifier::new(interner.get_or_intern_static("i")), + Identifier::new(interner.get_or_intern_static("i", utf16!("i"))), ), Const::from(10), ), @@ -70,10 +77,10 @@ fn check_do_while_semicolon_insertion() { .into(), Call::new( GetConstField::new( - Identifier::new(interner.get_or_intern_static("console")), - interner.get_or_intern_static("log"), + Identifier::new(interner.get_or_intern_static("console", utf16!("console"))), + interner.get_or_intern_static("log", utf16!("log")), ), - vec![Const::from(interner.get_or_intern_static("end")).into()], + vec![Const::from(interner.get_or_intern_static("end", utf16!("end"))).into()], ) .into(), ], @@ -92,7 +99,7 @@ fn check_do_while_semicolon_insertion_no_space() { vec![ DeclarationList::Var( vec![Declaration::new_with_identifier( - interner.get_or_intern_static("i"), + interner.get_or_intern_static("i", utf16!("i")), Some(Const::from(0).into()), )] .into(), @@ -101,17 +108,21 @@ fn check_do_while_semicolon_insertion_no_space() { DoWhileLoop::new( Block::from(vec![Call::new( GetConstField::new( - Identifier::new(interner.get_or_intern_static("console")), - interner.get_or_intern_static("log"), + Identifier::new( + interner.get_or_intern_static("console", utf16!("console")), + ), + interner.get_or_intern_static("log", utf16!("log")), ), - vec![Const::from(interner.get_or_intern_static("hello")).into()], + vec![ + Const::from(interner.get_or_intern_static("hello", utf16!("hello"))).into(), + ], ) .into()]), BinOp::new( CompOp::LessThan, UnaryOp::new( op::UnaryOp::IncrementPost, - Identifier::new(interner.get_or_intern_static("i")), + Identifier::new(interner.get_or_intern_static("i", utf16!("i"))), ), Const::from(10), ), @@ -119,10 +130,10 @@ fn check_do_while_semicolon_insertion_no_space() { .into(), Call::new( GetConstField::new( - Identifier::new(interner.get_or_intern_static("console")), - interner.get_or_intern_static("log"), + Identifier::new(interner.get_or_intern_static("console", utf16!("console"))), + interner.get_or_intern_static("log", utf16!("log")), ), - vec![Const::from(interner.get_or_intern_static("end")).into()], + vec![Const::from(interner.get_or_intern_static("end", utf16!("end"))).into()], ) .into(), ], diff --git a/boa_engine/src/syntax/parser/statement/switch/tests.rs b/boa_engine/src/syntax/parser/statement/switch/tests.rs index f99e1f03f5a..c2c05fe8230 100644 --- a/boa_engine/src/syntax/parser/statement/switch/tests.rs +++ b/boa_engine/src/syntax/parser/statement/switch/tests.rs @@ -1,12 +1,15 @@ -use crate::syntax::{ - ast::{ - node::{ - Break, Call, Case, Declaration, DeclarationList, GetConstField, Identifier, Node, - Switch, +use crate::{ + string::utf16, + syntax::{ + ast::{ + node::{ + Break, Call, Case, Declaration, DeclarationList, GetConstField, Identifier, Node, + Switch, + }, + Const, }, - Const, + parser::tests::{check_invalid, check_parser}, }, - parser::tests::{check_invalid, check_parser}, }; use boa_interner::Interner; @@ -110,7 +113,7 @@ fn check_switch_seperated_defaults() { /// Example of JS code . #[test] -fn check_seperated_switch() { +fn check_separated_switch() { let s = r#" let a = 10; @@ -150,9 +153,9 @@ fn check_seperated_switch() { "#; let mut interner = Interner::default(); - let log = interner.get_or_intern_static("log"); - let console = interner.get_or_intern_static("console"); - let a = interner.get_or_intern_static("a"); + let log = interner.get_or_intern_static("log", utf16!("log")); + let console = interner.get_or_intern_static("console", utf16!("console")); + let a = interner.get_or_intern_static("a", utf16!("a")); check_parser( s, @@ -194,7 +197,7 @@ fn check_seperated_switch() { Some(vec![Call::new( GetConstField::new(Identifier::new(console), log), vec![Node::from(Const::from( - interner.get_or_intern_static("Default"), + interner.get_or_intern_static("Default", utf16!("Default")), ))], ) .into()]), diff --git a/boa_engine/src/syntax/parser/statement/throw/tests.rs b/boa_engine/src/syntax/parser/statement/throw/tests.rs index 2e95dfdb4ad..c8de7b98947 100644 --- a/boa_engine/src/syntax/parser/statement/throw/tests.rs +++ b/boa_engine/src/syntax/parser/statement/throw/tests.rs @@ -1,6 +1,9 @@ -use crate::syntax::{ - ast::{node::Throw, Const}, - parser::tests::check_parser, +use crate::{ + string::utf16, + syntax::{ + ast::{node::Throw, Const}, + parser::tests::check_parser, + }, }; use boa_interner::Interner; @@ -9,7 +12,10 @@ fn check_throw_parsing() { let mut interner = Interner::default(); check_parser( "throw 'error';", - vec![Throw::new(Const::from(interner.get_or_intern_static("error"))).into()], + vec![Throw::new(Const::from( + interner.get_or_intern_static("error", utf16!("error")), + )) + .into()], interner, ); } diff --git a/boa_engine/src/syntax/parser/statement/try_stm/tests.rs b/boa_engine/src/syntax/parser/statement/try_stm/tests.rs index a858ad86a34..23587b17375 100644 --- a/boa_engine/src/syntax/parser/statement/try_stm/tests.rs +++ b/boa_engine/src/syntax/parser/statement/try_stm/tests.rs @@ -1,13 +1,16 @@ -use crate::syntax::{ - ast::{ - node::{ - declaration::{BindingPatternTypeArray, BindingPatternTypeObject}, - object::PropertyName, - Block, Catch, Declaration, DeclarationList, Finally, Identifier, Try, +use crate::{ + string::utf16, + syntax::{ + ast::{ + node::{ + declaration::{BindingPatternTypeArray, BindingPatternTypeObject}, + object::PropertyName, + Block, Catch, Declaration, DeclarationList, Finally, Identifier, Try, + }, + Const, }, - Const, + parser::tests::{check_invalid, check_parser}, }, - parser::tests::{check_invalid, check_parser}, }; use boa_interner::Interner; @@ -19,7 +22,10 @@ fn check_inline_with_empty_try_catch() { vec![Try::new( vec![], Some(Catch::new( - Declaration::new_with_identifier(interner.get_or_intern_static("e"), None), + Declaration::new_with_identifier( + interner.get_or_intern_static("e", utf16!("e")), + None, + ), vec![], )), None, @@ -37,14 +43,17 @@ fn check_inline_with_var_decl_inside_try() { vec![Try::new( vec![DeclarationList::Var( vec![Declaration::new_with_identifier( - interner.get_or_intern_static("x"), + interner.get_or_intern_static("x", utf16!("x")), Some(Const::from(1).into()), )] .into(), ) .into()], Some(Catch::new( - Declaration::new_with_identifier(interner.get_or_intern_static("e"), None), + Declaration::new_with_identifier( + interner.get_or_intern_static("e", utf16!("e")), + None, + ), vec![], )), None, @@ -62,17 +71,20 @@ fn check_inline_with_var_decl_inside_catch() { vec![Try::new( vec![DeclarationList::Var( vec![Declaration::new_with_identifier( - interner.get_or_intern_static("x"), + interner.get_or_intern_static("x", utf16!("x")), Some(Const::from(1).into()), )] .into(), ) .into()], Some(Catch::new( - Declaration::new_with_identifier(interner.get_or_intern_static("e"), None), + Declaration::new_with_identifier( + interner.get_or_intern_static("e", utf16!("e")), + None, + ), vec![DeclarationList::Var( vec![Declaration::new_with_identifier( - interner.get_or_intern_static("x"), + interner.get_or_intern_static("x", utf16!("x")), Some(Const::from(1).into()), )] .into(), @@ -94,7 +106,10 @@ fn check_inline_with_empty_try_catch_finally() { vec![Try::new( vec![], Some(Catch::new( - Declaration::new_with_identifier(interner.get_or_intern_static("e"), None), + Declaration::new_with_identifier( + interner.get_or_intern_static("e", utf16!("e")), + None, + ), vec![], )), Some(Finally::from(vec![])), @@ -123,7 +138,7 @@ fn check_inline_with_empty_try_var_decl_in_finally() { None, Some(Finally::from(vec![DeclarationList::Var( vec![Declaration::new_with_identifier( - interner.get_or_intern_static("x"), + interner.get_or_intern_static("x", utf16!("x")), Some(Const::from(1).into()), )] .into(), @@ -146,7 +161,7 @@ fn check_inline_empty_try_paramless_catch() { None, vec![DeclarationList::Var( vec![Declaration::new_with_identifier( - interner.get_or_intern_static("x"), + interner.get_or_intern_static("x", utf16!("x")), Some(Const::from(1).into()), )] .into(), @@ -163,7 +178,7 @@ fn check_inline_empty_try_paramless_catch() { #[test] fn check_inline_with_binding_pattern_object() { let mut interner = Interner::default(); - let a = interner.get_or_intern_static("a"); + let a = interner.get_or_intern_static("a", utf16!("a")); check_parser( "try {} catch ({ a, b: c }) {}", vec![Try::new( @@ -177,9 +192,9 @@ fn check_inline_with_binding_pattern_object() { default_init: None, }, BindingPatternTypeObject::SingleName { - ident: interner.get_or_intern_static("c"), + ident: interner.get_or_intern_static("c", utf16!("c")), property_name: PropertyName::Literal( - interner.get_or_intern_static("b"), + interner.get_or_intern_static("b", utf16!("b")), ), default_init: None, }, @@ -206,11 +221,11 @@ fn check_inline_with_binding_pattern_array() { Some(Declaration::new_with_array_pattern( vec![ BindingPatternTypeArray::SingleName { - ident: interner.get_or_intern_static("a"), + ident: interner.get_or_intern_static("a", utf16!("a")), default_init: None, }, BindingPatternTypeArray::SingleName { - ident: interner.get_or_intern_static("b"), + ident: interner.get_or_intern_static("b", utf16!("b")), default_init: None, }, ], @@ -234,13 +249,13 @@ fn check_catch_with_var_redeclaration() { Block::from(vec![]), Some(Catch::new::<_, Declaration, _>( Some(Declaration::new_with_identifier( - Identifier::new(interner.get_or_intern_static("e")), + Identifier::new(interner.get_or_intern_static("e", utf16!("e"))), None, )), vec![DeclarationList::Var( vec![Declaration::new_with_identifier( - interner.get_or_intern_static("e"), - Some(Const::from(interner.get_or_intern_static("oh")).into()), + interner.get_or_intern_static("e", utf16!("e")), + Some(Const::from(interner.get_or_intern_static("oh", utf16!("oh"))).into()), )] .into(), ) diff --git a/boa_engine/src/syntax/parser/tests.rs b/boa_engine/src/syntax/parser/tests.rs index 5ccc45a0cf5..6a07f3a47d8 100644 --- a/boa_engine/src/syntax/parser/tests.rs +++ b/boa_engine/src/syntax/parser/tests.rs @@ -3,6 +3,7 @@ use super::Parser; use crate::{ context::ContextBuilder, + string::utf16, syntax::ast::{ node::{ field::GetConstField, object::PropertyDefinition, ArrowFunctionDecl, Assign, BinOp, @@ -49,10 +50,10 @@ fn check_construct_call_precedence() { vec![Node::from(Call::new( GetConstField::new( New::from(Call::new( - Identifier::new(interner.get_or_intern_static("Date")), + Identifier::new(interner.get_or_intern_static("Date", utf16!("Date"))), vec![], )), - interner.get_or_intern_static("getTime"), + interner.get_or_intern_static("getTime", utf16!("getTime")), ), vec![], ))], @@ -66,10 +67,10 @@ fn assign_operator_precedence() { check_parser( "a = a + 1", vec![Assign::new( - Identifier::new(interner.get_or_intern_static("a")), + Identifier::new(interner.get_or_intern_static("a", utf16!("a"))), BinOp::new( NumOp::Add, - Identifier::new(interner.get_or_intern_static("a")), + Identifier::new(interner.get_or_intern_static("a", utf16!("a"))), Const::from(1), ), ) @@ -81,8 +82,8 @@ fn assign_operator_precedence() { #[test] fn hoisting() { let mut interner = Interner::default(); - let hello = interner.get_or_intern_static("hello"); - let a = interner.get_or_intern_static("a"); + let hello = interner.get_or_intern_static("hello", utf16!("hello")); + let a = interner.get_or_intern_static("a", utf16!("a")); check_parser( r" var a = hello(); @@ -110,7 +111,7 @@ fn hoisting() { ); let mut interner = Interner::default(); - let a = interner.get_or_intern_static("a"); + let a = interner.get_or_intern_static("a", utf16!("a")); check_parser( r" a = 10; @@ -138,12 +139,12 @@ fn ambigous_regex_divide_expression() { BinOp::new( NumOp::Div, Const::Int(1), - Identifier::new(interner.get_or_intern_static("a")), + Identifier::new(interner.get_or_intern_static("a", utf16!("a"))), ), BinOp::new( NumOp::Div, Const::Int(1), - Identifier::new(interner.get_or_intern_static("b")), + Identifier::new(interner.get_or_intern_static("b", utf16!("b"))), ), ) .into()], @@ -156,7 +157,7 @@ fn two_divisions_in_expression() { let s = "a !== 0 || 1 / a === 1 / b;"; let mut interner = Interner::default(); - let a = interner.get_or_intern_static("a"); + let a = interner.get_or_intern_static("a", utf16!("a")); check_parser( s, vec![BinOp::new( @@ -168,7 +169,7 @@ fn two_divisions_in_expression() { BinOp::new( NumOp::Div, Const::Int(1), - Identifier::new(interner.get_or_intern_static("b")), + Identifier::new(interner.get_or_intern_static("b", utf16!("b"))), ), ), ) @@ -190,7 +191,7 @@ fn comment_semi_colon_insertion() { vec![ DeclarationList::Let( vec![Declaration::new_with_identifier( - interner.get_or_intern_static("a"), + interner.get_or_intern_static("a", utf16!("a")), Some(Const::Int(10).into()), )] .into(), @@ -198,7 +199,7 @@ fn comment_semi_colon_insertion() { .into(), DeclarationList::Let( vec![Declaration::new_with_identifier( - interner.get_or_intern_static("b"), + interner.get_or_intern_static("b", utf16!("b")), Some(Const::Int(20).into()), )] .into(), @@ -224,7 +225,7 @@ fn multiline_comment_semi_colon_insertion() { vec![ DeclarationList::Let( vec![Declaration::new_with_identifier( - interner.get_or_intern_static("a"), + interner.get_or_intern_static("a", utf16!("a")), Some(Const::Int(10).into()), )] .into(), @@ -232,7 +233,7 @@ fn multiline_comment_semi_colon_insertion() { .into(), DeclarationList::Let( vec![Declaration::new_with_identifier( - interner.get_or_intern_static("b"), + interner.get_or_intern_static("b", utf16!("b")), Some(Const::Int(20).into()), )] .into(), @@ -255,7 +256,7 @@ fn multiline_comment_no_lineterminator() { vec![ DeclarationList::Let( vec![Declaration::new_with_identifier( - interner.get_or_intern_static("a"), + interner.get_or_intern_static("a", utf16!("a")), Some(Const::Int(10).into()), )] .into(), @@ -263,7 +264,7 @@ fn multiline_comment_no_lineterminator() { .into(), DeclarationList::Let( vec![Declaration::new_with_identifier( - interner.get_or_intern_static("b"), + interner.get_or_intern_static("b", utf16!("b")), Some(Const::Int(20).into()), )] .into(), @@ -289,14 +290,14 @@ fn assignment_line_terminator() { vec![ DeclarationList::Let( vec![Declaration::new_with_identifier( - interner.get_or_intern_static("a"), + interner.get_or_intern_static("a", utf16!("a")), Some(Const::Int(3).into()), )] .into(), ) .into(), Assign::new( - Identifier::new(interner.get_or_intern_static("a")), + Identifier::new(interner.get_or_intern_static("a", utf16!("a"))), Const::from(5), ) .into(), @@ -318,7 +319,7 @@ fn assignment_multiline_terminator() { "#; let mut interner = Interner::default(); - let a = interner.get_or_intern_static("a"); + let a = interner.get_or_intern_static("a", utf16!("a")); check_parser( s, vec![ @@ -343,7 +344,7 @@ fn bracketed_expr() { let mut interner = Interner::default(); check_parser( s, - vec![Identifier::new(interner.get_or_intern_static("b")).into()], + vec![Identifier::new(interner.get_or_intern_static("b", utf16!("b"))).into()], interner, ); } @@ -353,7 +354,7 @@ fn increment_in_comma_op() { let s = r#"(b++, b)"#; let mut interner = Interner::default(); - let b = interner.get_or_intern_static("b"); + let b = interner.get_or_intern_static("b", utf16!("b")); check_parser( s, vec![BinOp::new::<_, Node, Node>( @@ -378,15 +379,20 @@ fn spread_in_object() { let mut interner = Interner::default(); let object_properties = vec![ - PropertyDefinition::property(interner.get_or_intern_static("a"), Const::from(1)), - PropertyDefinition::spread_object(Identifier::new(interner.get_or_intern_static("b"))), + PropertyDefinition::property( + interner.get_or_intern_static("a", utf16!("a")), + Const::from(1), + ), + PropertyDefinition::spread_object(Identifier::new( + interner.get_or_intern_static("b", utf16!("b")), + )), ]; check_parser( s, vec![DeclarationList::Let( vec![Declaration::new_with_identifier( - interner.get_or_intern_static("x"), + interner.get_or_intern_static("x", utf16!("x")), Some(Object::from(object_properties).into()), )] .into(), @@ -405,7 +411,7 @@ fn spread_in_arrow_function() { "#; let mut interner = Interner::default(); - let b = interner.get_or_intern_static("b"); + let b = interner.get_or_intern_static("b", utf16!("b")); check_parser( s, vec![ArrowFunctionDecl::new( @@ -429,7 +435,7 @@ fn spread_in_arrow_function() { #[test] fn empty_statement() { let mut interner = Interner::default(); - let a = interner.get_or_intern_static("a"); + let a = interner.get_or_intern_static("a", utf16!("a")); check_parser( r" ;;var a = 10; diff --git a/boa_engine/src/tests.rs b/boa_engine/src/tests.rs index 6aceff0cf60..d02657f2edf 100644 --- a/boa_engine/src/tests.rs +++ b/boa_engine/src/tests.rs @@ -1,6 +1,6 @@ use crate::{ - builtins::Number, check_output, exec, forward, forward_val, value::IntegerOrInfinity, Context, - JsValue, TestAction, + builtins::Number, check_output, exec, forward, forward_val, string::utf16, + value::IntegerOrInfinity, Context, JsValue, TestAction, }; #[test] @@ -1201,16 +1201,25 @@ fn to_int32() { fn to_string() { let mut context = Context::default(); - assert_eq!(JsValue::null().to_string(&mut context).unwrap(), "null"); assert_eq!( - JsValue::undefined().to_string(&mut context).unwrap(), - "undefined" + &JsValue::null().to_string(&mut context).unwrap(), + utf16!("null") ); - assert_eq!(JsValue::new(55).to_string(&mut context).unwrap(), "55"); - assert_eq!(JsValue::new(55.0).to_string(&mut context).unwrap(), "55"); assert_eq!( - JsValue::new("hello").to_string(&mut context).unwrap(), - "hello" + &JsValue::undefined().to_string(&mut context).unwrap(), + utf16!("undefined") + ); + assert_eq!( + &JsValue::new(55).to_string(&mut context).unwrap(), + utf16!("55") + ); + assert_eq!( + &JsValue::new(55.0).to_string(&mut context).unwrap(), + utf16!("55") + ); + assert_eq!( + &JsValue::new("hello").to_string(&mut context).unwrap(), + utf16!("hello") ); } diff --git a/boa_engine/src/value/display.rs b/boa_engine/src/value/display.rs index 5430cc09bb4..8431557a951 100644 --- a/boa_engine/src/value/display.rs +++ b/boa_engine/src/value/display.rs @@ -1,4 +1,4 @@ -use crate::{object::ObjectKind, property::PropertyDescriptor}; +use crate::{js_string, object::ObjectKind, property::PropertyDescriptor}; use super::{fmt, Display, HashSet, JsValue, PropertyKey}; @@ -102,7 +102,9 @@ pub(crate) fn log_string_from(x: &JsValue, print_internals: bool, print_children // Can use the private "type" field of an Object to match on // which type of Object it represents for special printing match v.borrow().kind() { - ObjectKind::String(ref string) => format!("String {{ \"{string}\" }}"), + ObjectKind::String(ref string) => { + format!("String {{ \"{}\" }}", string.to_std_string_escaped()) + } ObjectKind::Boolean(boolean) => format!("Boolean {{ {boolean} }}"), ObjectKind::Number(rational) => { if rational.is_sign_negative() && *rational == 0.0 { @@ -116,7 +118,7 @@ pub(crate) fn log_string_from(x: &JsValue, print_internals: bool, print_children let len = v .borrow() .properties() - .get(&PropertyKey::from("length")) + .get(&PropertyKey::from(js_string!("length"))) .expect("array object must have 'length' property") // FIXME: handle accessor descriptors .expect_value() @@ -196,7 +198,6 @@ pub(crate) fn log_string_from(x: &JsValue, print_internals: bool, print_children _ => display_obj(x, print_internals), } } - JsValue::Symbol(ref symbol) => symbol.to_string(), _ => x.display().to_string(), } } @@ -283,11 +284,10 @@ impl Display for ValueDisplay<'_> { JsValue::Null => write!(f, "null"), JsValue::Undefined => write!(f, "undefined"), JsValue::Boolean(v) => write!(f, "{v}"), - JsValue::Symbol(ref symbol) => match symbol.description() { - Some(description) => write!(f, "Symbol({description})"), - None => write!(f, "Symbol()"), - }, - JsValue::String(ref v) => write!(f, "\"{v}\""), + JsValue::Symbol(ref symbol) => { + write!(f, "{}", symbol.descriptive_string().to_std_string_escaped()) + } + JsValue::String(ref v) => write!(f, "\"{}\"", v.to_std_string_escaped()), JsValue::Rational(v) => format_rational(*v, f), JsValue::Object(_) => { write!(f, "{}", log_string_from(self.value, self.internals, true)) diff --git a/boa_engine/src/value/equality.rs b/boa_engine/src/value/equality.rs index c7d4d65235c..58f6cee9de1 100644 --- a/boa_engine/src/value/equality.rs +++ b/boa_engine/src/value/equality.rs @@ -64,13 +64,13 @@ impl JsValue { // a. Let n be ! StringToBigInt(y). // b. If n is NaN, return false. // c. Return the result of the comparison x == n. - (Self::BigInt(ref a), Self::String(ref b)) => match JsBigInt::from_string(b) { + (Self::BigInt(ref a), Self::String(ref b)) => match b.to_big_int() { Some(ref b) => a == b, None => false, }, // 7. If Type(x) is String and Type(y) is BigInt, return the result of the comparison y == x. - (Self::String(ref a), Self::BigInt(ref b)) => match JsBigInt::from_string(a) { + (Self::String(ref a), Self::BigInt(ref b)) => match a.to_big_int() { Some(ref a) => a == b, None => false, }, diff --git a/boa_engine/src/value/mod.rs b/boa_engine/src/value/mod.rs index f41258de086..41e72ba7a79 100644 --- a/boa_engine/src/value/mod.rs +++ b/boa_engine/src/value/mod.rs @@ -10,6 +10,7 @@ use crate::{ number::{f64_to_int32, f64_to_uint32}, Number, }, + js_string, object::{JsObject, ObjectData}, property::{PropertyDescriptor, PropertyKey}, symbol::{JsSymbol, WellKnownSymbols}, @@ -25,7 +26,6 @@ use std::{ collections::HashSet, fmt::{self, Display}, ops::Sub, - str::FromStr, }; mod conversions; @@ -64,7 +64,7 @@ pub enum JsValue { Undefined, /// `boolean` - A `true` / `false` value, for if a certain criteria is met. Boolean(bool), - /// `String` - A UTF-8 string, such as `"Hello, world"`. + /// `String` - A UTF-16 string, such as `"Hello, world"`. String(JsString), /// `Number` - A 64-bit floating point number, such as `3.1415` Rational(f64), @@ -406,11 +406,12 @@ impl JsValue { Self::Null => context.throw_type_error("cannot convert null to a BigInt"), Self::Undefined => context.throw_type_error("cannot convert undefined to a BigInt"), Self::String(ref string) => { - if let Some(value) = JsBigInt::from_string(string) { + if let Some(value) = string.to_big_int() { Ok(value) } else { context.throw_syntax_error(format!( - "cannot convert string '{string}' to bigint primitive", + "cannot convert string '{}' to bigint primitive", + string.to_std_string_escaped() )) } } @@ -508,9 +509,9 @@ impl JsValue { JsObject::from_proto_and_data(prototype, ObjectData::string(string.clone())); // Make sure the correct length is set on our new string object object.insert_property( - "length", + js_string!("length"), PropertyDescriptor::builder() - .value(string.encode_utf16().count()) + .value(string.len()) .writable(false) .enumerable(false) .configurable(false), @@ -880,7 +881,7 @@ impl JsValue { Self::Null => Ok(0.0), Self::Undefined => Ok(f64::NAN), Self::Boolean(b) => Ok(if b { 1.0 } else { 0.0 }), - Self::String(ref string) => Ok(string.string_to_number()), + Self::String(ref string) => Ok(string.to_number()), Self::Rational(number) => Ok(number), Self::Integer(integer) => Ok(f64::from(integer)), Self::Symbol(_) => context.throw_type_error("argument must not be a symbol"), diff --git a/boa_engine/src/value/operations.rs b/boa_engine/src/value/operations.rs index 4e45f6790e8..0a836f4a155 100644 --- a/boa_engine/src/value/operations.rs +++ b/boa_engine/src/value/operations.rs @@ -1,8 +1,12 @@ -use super::{ - Context, FromStr, JsBigInt, JsResult, JsString, JsValue, Numeric, PreferredType, - WellKnownSymbols, +use crate::{ + builtins::{ + number::{f64_to_int32, f64_to_uint32}, + Number, + }, + js_string, }; -use crate::builtins::number::{f64_to_int32, f64_to_uint32, Number}; + +use super::{Context, JsBigInt, JsResult, JsValue, Numeric, PreferredType, WellKnownSymbols}; impl JsValue { #[inline] @@ -19,21 +23,17 @@ impl JsValue { (Self::BigInt(ref x), Self::BigInt(ref y)) => Self::new(JsBigInt::add(x, y)), // String concat - (Self::String(ref x), Self::String(ref y)) => Self::from(JsString::concat(x, y)), - (Self::String(ref x), y) => Self::from(JsString::concat(x, y.to_string(context)?)), - (x, Self::String(ref y)) => Self::from(JsString::concat(x.to_string(context)?, y)), + (Self::String(ref x), Self::String(ref y)) => Self::from(js_string!(x, y)), + (Self::String(ref x), y) => Self::from(js_string!(x, &y.to_string(context)?)), + (x, Self::String(ref y)) => Self::from(js_string!(&x.to_string(context)?, y)), // Slow path: (_, _) => match ( self.to_primitive(context, PreferredType::Default)?, other.to_primitive(context, PreferredType::Default)?, ) { - (Self::String(ref x), ref y) => { - Self::from(JsString::concat(x, y.to_string(context)?)) - } - (ref x, Self::String(ref y)) => { - Self::from(JsString::concat(x.to_string(context)?, y)) - } + (Self::String(ref x), ref y) => Self::from(js_string!(x, &y.to_string(context)?)), + (ref x, Self::String(ref y)) => Self::from(js_string!(&x.to_string(context)?, y)), (x, y) => match (x.to_numeric(context)?, y.to_numeric(context)?) { (Numeric::Number(x), Numeric::Number(y)) => Self::new(x + y), (Numeric::BigInt(ref x), Numeric::BigInt(ref y)) => { @@ -417,7 +417,7 @@ impl JsValue { if !target.is_object() { return context.throw_type_error(format!( "right-hand side of 'instanceof' should be an object, got {}", - target.type_of() + target.type_of().to_std_string_escaped() )); } @@ -449,10 +449,7 @@ impl JsValue { Ok(num) => -num, Err(_) => f64::NAN, }), - Self::String(ref str) => Self::new(match f64::from_str(str) { - Ok(num) => -num, - Err(_) => f64::NAN, - }), + Self::String(ref str) => Self::new(-str.to_number()), Self::Rational(num) => Self::new(-num), Self::Integer(num) if num == 0 => Self::new(-f64::from(0)), Self::Integer(num) => Self::new(-num), @@ -512,29 +509,16 @@ impl JsValue { }; match (px, py) { - (Self::String(ref x), Self::String(ref y)) => { - if x.starts_with(y.as_str()) { - return Ok(AbstractRelation::False); - } - if y.starts_with(x.as_str()) { - return Ok(AbstractRelation::True); - } - for (x, y) in x.chars().zip(y.chars()) { - if x != y { - return Ok((x < y).into()); - } - } - unreachable!() - } + (Self::String(ref x), Self::String(ref y)) => (x < y).into(), (Self::BigInt(ref x), Self::String(ref y)) => { - if let Some(y) = JsBigInt::from_string(y) { + if let Some(y) = y.to_big_int() { (*x < y).into() } else { AbstractRelation::Undefined } } (Self::String(ref x), Self::BigInt(ref y)) => { - if let Some(x) = JsBigInt::from_string(x) { + if let Some(x) = x.to_big_int() { (x < *y).into() } else { AbstractRelation::Undefined diff --git a/boa_engine/src/value/serde_json.rs b/boa_engine/src/value/serde_json.rs index 645e7ae3619..4b0cca514b7 100644 --- a/boa_engine/src/value/serde_json.rs +++ b/boa_engine/src/value/serde_json.rs @@ -108,7 +108,7 @@ impl JsValue { Self::Null => Ok(Value::Null), Self::Undefined => todo!("undefined to JSON"), &Self::Boolean(b) => Ok(b.into()), - Self::String(string) => Ok(string.as_str().into()), + Self::String(string) => Ok(string.to_std_string_escaped().into()), &Self::Rational(rat) => Ok(rat.into()), &Self::Integer(int) => Ok(int.into()), Self::BigInt(_bigint) => context.throw_type_error("cannot convert bigint to JSON"), @@ -131,7 +131,7 @@ impl JsValue { let mut map = Map::new(); for (key, property) in obj.borrow().properties().iter() { let key = match &key { - PropertyKey::String(string) => string.as_str().to_owned(), + PropertyKey::String(string) => string.to_std_string_escaped(), PropertyKey::Index(i) => i.to_string(), PropertyKey::Symbol(_sym) => { return context.throw_type_error("cannot convert Symbol to JSON") diff --git a/boa_engine/src/value/tests.rs b/boa_engine/src/value/tests.rs index dce5bb5a0b4..77c030a832e 100644 --- a/boa_engine/src/value/tests.rs +++ b/boa_engine/src/value/tests.rs @@ -1,7 +1,7 @@ #![allow(clippy::float_cmp)] use super::*; -use crate::{check_output, forward, forward_val, Context, TestAction}; +use crate::{check_output, forward, forward_val, string::utf16, Context, TestAction}; use std::collections::hash_map::DefaultHasher; use std::hash::{Hash, Hasher}; @@ -279,7 +279,7 @@ fn add_number_and_string() { let value = forward_val(&mut context, "1 + \" + 2 = 3\"").unwrap(); let value = value.to_string(&mut context).unwrap(); - assert_eq!(value, "1 + 2 = 3"); + assert_eq!(&value, utf16!("1 + 2 = 3")); } #[test] @@ -288,7 +288,7 @@ fn add_string_and_string() { let value = forward_val(&mut context, "\"Hello\" + \", world\"").unwrap(); let value = value.to_string(&mut context).unwrap(); - assert_eq!(value, "Hello, world"); + assert_eq!(&value, utf16!("Hello, world")); } #[test] @@ -306,7 +306,7 @@ fn add_number_object_and_string_object() { let value = forward_val(&mut context, "new Number(10) + new String(\"0\")").unwrap(); let value = value.to_string(&mut context).unwrap(); - assert_eq!(value, "100"); + assert_eq!(&value, utf16!("100")); } #[test] @@ -694,7 +694,7 @@ mod cyclic_conversions { let value = forward_val(&mut context, src).unwrap(); let result = value.as_string().unwrap(); - assert_eq!(result, "[[],[]]",); + assert_eq!(result, utf16!("[[],[]]")); } // These tests don't throw errors. Instead we mirror Chrome / Firefox behavior for these @@ -711,7 +711,7 @@ mod cyclic_conversions { let value = forward_val(&mut context, src).unwrap(); let result = value.as_string().unwrap(); - assert_eq!(result, ""); + assert_eq!(result, utf16!("")); } #[test] diff --git a/boa_engine/src/vm/code_block.rs b/boa_engine/src/vm/code_block.rs index d23f8481db3..38e8707f650 100644 --- a/boa_engine/src/vm/code_block.rs +++ b/boa_engine/src/vm/code_block.rs @@ -13,6 +13,7 @@ use crate::{ }, context::intrinsics::StandardConstructors, environments::{BindingLocator, CompileTimeEnvironment}, + js_string, object::{ internal_methods::get_prototype_from_constructor, JsObject, ObjectData, PrivateElement, }, @@ -20,7 +21,7 @@ use crate::{ syntax::ast::node::FormalParameterList, vm::call_frame::GeneratorResumeKind, vm::{call_frame::FinallyReturn, CallFrame, Opcode}, - Context, JsResult, JsValue, + Context, JsResult, JsString, JsValue, }; use boa_gc::{Cell, Finalize, Gc, Trace}; use boa_interner::{Interner, Sym, ToInternedString}; @@ -417,7 +418,7 @@ impl ToInternedString for CodeBlock { for (i, value) in self.literals.iter().enumerate() { f.push_str(&format!( " {i:04}: <{}> {}\n", - value.type_of(), + value.type_of().to_std_string_escaped(), value.display() )); } @@ -476,7 +477,12 @@ pub(crate) fn create_function_object( let prototype = context.construct_object(); let name_property = PropertyDescriptor::builder() - .value(context.interner().resolve_expect(code.name)) + .value( + context + .interner() + .resolve_expect(code.name) + .into_common::(false), + ) .writable(false) .enumerable(false) .configurable(true) @@ -528,7 +534,7 @@ pub(crate) fn create_function_object( .build(); prototype - .define_property_or_throw("constructor", constructor_property, context) + .define_property_or_throw(js_string!("constructor"), constructor_property, context) .expect("failed to define the constructor property of the function"); let prototype_property = PropertyDescriptor::builder() @@ -539,14 +545,14 @@ pub(crate) fn create_function_object( .build(); constructor - .define_property_or_throw("length", length_property, context) + .define_property_or_throw(js_string!("length"), length_property, context) .expect("failed to define the length property of the function"); constructor - .define_property_or_throw("name", name_property, context) + .define_property_or_throw(js_string!("name"), name_property, context) .expect("failed to define the name property of the function"); if !r#async { constructor - .define_property_or_throw("prototype", prototype_property, context) + .define_property_or_throw(js_string!("prototype"), prototype_property, context) .expect("failed to define the prototype property of the function"); } @@ -574,7 +580,12 @@ pub(crate) fn create_generator_function_object( }; let name_property = PropertyDescriptor::builder() - .value(context.interner().resolve_expect(code.name)) + .value( + context + .interner() + .resolve_expect(code.name) + .into_common::(false), + ) .writable(false) .enumerable(false) .configurable(true) @@ -625,13 +636,13 @@ pub(crate) fn create_generator_function_object( .build(); constructor - .define_property_or_throw("prototype", prototype_property, context) + .define_property_or_throw(js_string!("prototype"), prototype_property, context) .expect("failed to define the prototype property of the generator function"); constructor - .define_property_or_throw("name", name_property, context) + .define_property_or_throw(js_string!("name"), name_property, context) .expect("failed to define the name property of the generator function"); constructor - .define_property_or_throw("length", length_property, context) + .define_property_or_throw(js_string!("length"), length_property, context) .expect("failed to define the length property of the generator function"); constructor diff --git a/boa_engine/src/vm/mod.rs b/boa_engine/src/vm/mod.rs index 1b6d516ec22..774f86442b7 100644 --- a/boa_engine/src/vm/mod.rs +++ b/boa_engine/src/vm/mod.rs @@ -388,7 +388,7 @@ impl Context { if !rhs.is_object() { return self.throw_type_error(format!( "right-hand side of 'in' should be an object, got {}", - rhs.type_of() + rhs.type_of().to_std_string_escaped() )); } let key = lhs.to_property_key(self)?; @@ -481,7 +481,7 @@ impl Context { let key = self .interner() .resolve_expect(binding_locator.name()) - .into(); + .into_common(false); self.global_bindings_mut().entry(key).or_insert( PropertyDescriptor::builder() .value(JsValue::Undefined) @@ -508,6 +508,7 @@ impl Context { let key = self .interner() .resolve_expect(binding_locator.name()) + .into_common::(false) .into(); crate::object::internal_methods::global::global_set_no_receiver( &key, value, self, @@ -555,7 +556,7 @@ impl Context { let key: JsString = self .interner() .resolve_expect(binding_locator.name()) - .into(); + .into_common(false); match self.global_bindings_mut().get(&key) { Some(desc) => match desc.kind() { DescriptorKind::Data { @@ -568,12 +569,17 @@ impl Context { self.call(&get, &self.global_object().clone().into(), &[])? } _ => { - return self - .throw_reference_error(format!("{key} is not defined")) + return self.throw_reference_error(format!( + "{} is not defined", + key.to_std_string_escaped() + )) } }, _ => { - return self.throw_reference_error(format!("{key} is not defined")) + return self.throw_reference_error(format!( + "{} is not defined", + key.to_std_string_escaped() + )) } } } @@ -584,9 +590,11 @@ impl Context { ) { value } else { - let name = - JsString::from(self.interner().resolve_expect(binding_locator.name())); - return self.throw_reference_error(format!("{name} is not initialized")); + let name = self + .interner() + .resolve_expect(binding_locator.name()) + .to_string(); + return self.throw_reference_error(format!("{name} is not initialized",)); }; self.vm.push(value); @@ -606,7 +614,7 @@ impl Context { let key: JsString = self .interner() .resolve_expect(binding_locator.name()) - .into(); + .into_common(false); match self.global_bindings_mut().get(&key) { Some(desc) => match desc.kind() { DescriptorKind::Data { @@ -650,12 +658,13 @@ impl Context { let key: JsString = self .interner() .resolve_expect(binding_locator.name()) - .into(); + .into_common(false); let exists = self.global_bindings_mut().contains_key(&key); if !exists && self.vm.frame().code.strict { return self.throw_reference_error(format!( - "assignment to undeclared variable {key}" + "assignment to undeclared variable {}", + key.to_std_string_escaped() )); } @@ -668,7 +677,8 @@ impl Context { if !success && self.vm.frame().code.strict { return self.throw_type_error(format!( - "cannot set non-writable property: {key}", + "cannot set non-writable property: {}", + key.to_std_string_escaped() )); } } @@ -741,7 +751,11 @@ impl Context { }; let name = self.vm.frame().code.names[index as usize]; - let name: PropertyKey = self.interner().resolve_expect(name).into(); + let name: PropertyKey = self + .interner() + .resolve_expect(name) + .into_common::(false) + .into(); let result = object.get(name, self)?; self.vm.push(result); @@ -787,7 +801,11 @@ impl Context { }; let name = self.vm.frame().code.names[index as usize]; - let name: PropertyKey = self.interner().resolve_expect(name).into(); + let name: PropertyKey = self + .interner() + .resolve_expect(name) + .into_common::(false) + .into(); object.set(name, value, self.vm.frame().code.strict, self)?; } @@ -801,7 +819,10 @@ impl Context { object.to_object(self)? }; let name = self.vm.frame().code.names[index as usize]; - let name = self.interner().resolve_expect(name); + let name = self + .interner() + .resolve_expect(name) + .into_common::(false); object.__define_own_property__( name.into(), PropertyDescriptor::builder() @@ -832,7 +853,7 @@ impl Context { let name = self.vm.frame().code.names[index as usize]; let name = self.interner().resolve_expect(name); object.__define_own_property__( - name.into(), + name.into_common::(false).into(), PropertyDescriptor::builder() .value(value) .writable(true) @@ -910,7 +931,11 @@ impl Context { let value = self.vm.pop(); let object = object.to_object(self)?; let name = self.vm.frame().code.names[index as usize]; - let name = self.interner().resolve_expect(name).into(); + let name = self + .interner() + .resolve_expect(name) + .into_common::(false) + .into(); let set = object .__get_own_property__(&name, self)? .as_ref() @@ -940,7 +965,11 @@ impl Context { .expect("method must be function object") .set_home_object(object.clone()); let name = self.vm.frame().code.names[index as usize]; - let name = self.interner().resolve_expect(name).into(); + let name = self + .interner() + .resolve_expect(name) + .into_common::(false) + .into(); let set = object .__get_own_property__(&name, self)? .as_ref() @@ -1014,7 +1043,11 @@ impl Context { let value = self.vm.pop(); let object = object.to_object(self)?; let name = self.vm.frame().code.names[index as usize]; - let name = self.interner().resolve_expect(name).into(); + let name = self + .interner() + .resolve_expect(name) + .into_common::(false) + .into(); let get = object .__get_own_property__(&name, self)? .as_ref() @@ -1044,7 +1077,11 @@ impl Context { .expect("method must be function object") .set_home_object(object.clone()); let name = self.vm.frame().code.names[index as usize]; - let name = self.interner().resolve_expect(name).into(); + let name = self + .interner() + .resolve_expect(name) + .into_common::(false) + .into(); let get = object .__get_own_property__(&name, self)? .as_ref() @@ -1347,7 +1384,11 @@ impl Context { Opcode::DeletePropertyByName => { let index = self.vm.read::(); let key = self.vm.frame().code.names[index as usize]; - let key = self.interner().resolve_expect(key).into(); + let key = self + .interner() + .resolve_expect(key) + .into_common::(false) + .into(); let object = self.vm.pop(); let result = object.to_object(self)?.__delete__(&key, self)?; if !result && self.vm.frame().code.strict { @@ -2177,7 +2218,10 @@ impl Context { } strings.reverse(); let s = JsString::concat_array( - &strings.iter().map(JsString::as_str).collect::>(), + &strings + .iter() + .map(JsString::as_slice) + .collect::>(), ); self.vm.push(s); } diff --git a/boa_examples/src/bin/classes.rs b/boa_examples/src/bin/classes.rs index 951284ce813..a45eb3f5906 100644 --- a/boa_examples/src/bin/classes.rs +++ b/boa_examples/src/bin/classes.rs @@ -1,8 +1,9 @@ // NOTE: this example requires the `console` feature to run correctly. use boa_engine::{ + builtins::JsArgs, class::{Class, ClassBuilder}, property::Attribute, - Context, JsResult, JsValue, + Context, JsResult, JsString, JsValue, }; use boa_gc::{Finalize, Trace}; @@ -17,7 +18,7 @@ use boa_gc::{Finalize, Trace}; #[derive(Debug, Trace, Finalize)] struct Person { /// The name of the person. - name: String, + name: JsString, /// The age of the person. age: u32, } @@ -36,7 +37,7 @@ impl Person { // and print a message to stdout. println!( "Hello my name is {}, I'm {} years old", - person.name, + person.name.to_std_string_escaped(), person.age // Here we can access the native rust fields of the struct. ); return Ok(JsValue::undefined()); @@ -64,11 +65,7 @@ impl Class for Person { // and then we call `to_string()`. // // This is equivalent to `String(arg)`. - let name = args - .get(0) - .cloned() - .unwrap_or_default() - .to_string(context)?; + let name = args.get_or_undefined(0).to_string(context)?; // We get the second argument. If it is unavailable we default to `undefined`, // and then we call `to_u32`. // @@ -76,10 +73,7 @@ impl Class for Person { let age = args.get(1).cloned().unwrap_or_default().to_u32(context)?; // We construct a new native struct `Person` - let person = Person { - name: name.to_string(), - age, - }; + let person = Person { name, age }; Ok(person) // and we return it. } diff --git a/boa_examples/src/bin/closures.rs b/boa_examples/src/bin/closures.rs index 2c453e40702..412336cd0d5 100644 --- a/boa_examples/src/bin/closures.rs +++ b/boa_examples/src/bin/closures.rs @@ -2,8 +2,10 @@ // inside Rust and call them from Javascript. use boa_engine::{ + js_string, object::{FunctionBuilder, JsObject}, property::{Attribute, PropertyDescriptor}, + string::utf16, Context, JsString, JsValue, }; use boa_gc::{Finalize, Trace}; @@ -67,17 +69,17 @@ fn main() -> Result<(), JsValue> { let name = captures.object.get("name", context)?; // We create a new message from our captured variable. - let message = JsString::concat_array(&[ - "message from `", - name.to_string(context)?.as_str(), - "`: ", - captures.greeting.as_str(), - ]); + let message = js_string!( + utf16!("message from `"), + &name.to_string(context)?, + utf16!("`: "), + &captures.greeting + ); // We can also mutate the moved data inside the closure. - captures.greeting = format!("{} Hello!", captures.greeting).into(); + captures.greeting = js_string!(&captures.greeting, utf16!(" Hello!")); - println!("{message}"); + println!("{}", message.to_std_string_escaped()); println!(); // We convert `message` into `JsValue` to be able to return it. diff --git a/boa_examples/src/bin/jsarray.rs b/boa_examples/src/bin/jsarray.rs index 1159a57dbd2..ae79ceddd6b 100644 --- a/boa_examples/src/bin/jsarray.rs +++ b/boa_examples/src/bin/jsarray.rs @@ -2,6 +2,7 @@ use boa_engine::{ object::{builtins::JsArray, FunctionBuilder}, + string::utf16, Context, JsResult, JsValue, }; @@ -52,12 +53,12 @@ fn main() -> JsResult<()> { // Join the array with an optional separator (default ","). let joined_array = array.join(None, context)?; - assert_eq!(joined_array, "14,13,12,11,10"); + assert_eq!(&joined_array, utf16!("14,13,12,11,10")); array.fill(false, Some(1), Some(4), context)?; let joined_array = array.join(Some("::".into()), context)?; - assert_eq!(joined_array, "14::false::false::false::10"); + assert_eq!(&joined_array, utf16!("14::false::false::false::10")); let filter_callback = FunctionBuilder::native(context, |_this, args, _context| { Ok(args.get(0).cloned().unwrap_or_default().is_number().into()) @@ -85,7 +86,7 @@ fn main() -> JsResult<()> { .concat(&[another_array.into()], context)? // [ 100, 196, 1, 2, 3, 4, 5 ] .slice(Some(1), Some(5), context)?; // [ 196, 1, 2, 3 ] - assert_eq!(chained_array.join(None, context)?, "196,1,2,3"); + assert_eq!(&chained_array.join(None, context)?, utf16!("196,1,2,3")); let reduce_callback = FunctionBuilder::native(context, |_this, args, context| { let accumulator = args.get(0).cloned().unwrap_or_default(); diff --git a/boa_examples/src/bin/loadfile.rs b/boa_examples/src/bin/loadfile.rs index 1692aecf92c..f6c92a1dce3 100644 --- a/boa_examples/src/bin/loadfile.rs +++ b/boa_examples/src/bin/loadfile.rs @@ -15,7 +15,10 @@ fn main() { // Parse the source code match context.eval(src) { Ok(res) => { - println!("{}", res.to_string(&mut context).unwrap()); + println!( + "{}", + res.to_string(&mut context).unwrap().to_std_string_escaped() + ); } Err(e) => { // Pretty print the error diff --git a/boa_examples/src/bin/loadstring.rs b/boa_examples/src/bin/loadstring.rs index f3c50098e61..ed9e9843d9d 100644 --- a/boa_examples/src/bin/loadstring.rs +++ b/boa_examples/src/bin/loadstring.rs @@ -11,7 +11,10 @@ fn main() { // Parse the source code match context.eval(js_code) { Ok(res) => { - println!("{}", res.to_string(&mut context).unwrap()); + println!( + "{}", + res.to_string(&mut context).unwrap().to_std_string_escaped() + ); } Err(e) => { // Pretty print the error diff --git a/boa_examples/src/bin/modulehandler.rs b/boa_examples/src/bin/modulehandler.rs index e482447e331..1bd4b76514b 100644 --- a/boa_examples/src/bin/modulehandler.rs +++ b/boa_examples/src/bin/modulehandler.rs @@ -39,7 +39,7 @@ fn require(_: &JsValue, args: &[JsValue], ctx: &mut Context) -> JsResult { + inner: Vec, } -impl FixedString { +impl Default for FixedString { + fn default() -> Self { + Self { + inner: Vec::default(), + } + } +} + +impl FixedString { /// Creates a new, pinned [`FixedString`]. pub(super) fn new(capacity: usize) -> Self { Self { - inner: String::with_capacity(capacity), + inner: Vec::with_capacity(capacity), } } @@ -23,7 +31,12 @@ impl FixedString { pub(super) fn is_empty(&self) -> bool { self.inner.is_empty() } +} +impl FixedString +where + Char: Clone, +{ /// Tries to push `string` to the [`FixedString`], and returns /// an [`InternedStr`] pointer to the stored `string`, or /// `None` if the capacity is not enough to store `string`. @@ -31,32 +44,35 @@ impl FixedString { /// # Safety /// /// The caller is responsible for ensuring `self` outlives the returned - /// `InternedStr`. - pub(super) unsafe fn push(&mut self, string: &str) -> Option { + /// [`InternedStr`]. + pub(super) unsafe fn push(&mut self, string: &[Char]) -> Option> { let capacity = self.inner.capacity(); (capacity >= self.inner.len() + string.len()).then(|| { - let old_len = self.inner.len(); - self.inner.push_str(string); - // SAFETY: The caller is responsible for extending the lifetime + // SAFETY: + // The caller is responsible for extending the lifetime // of `self` to outlive the return value. - unsafe { InternedStr::new(self.inner[old_len..self.inner.len()].into()) } + unsafe { self.push_unchecked(string) } }) } /// Pushes `string` to the [`FixedString`], and returns /// an [`InternedStr`] pointer to the stored `string`, without - /// checking if the total `capacity` is enough to store `string`. + /// checking if the total `capacity` is enough to store `string`, + /// and without checking if the string is correctly aligned. /// /// # Safety /// /// The caller is responsible for ensuring that `self` outlives the returned - /// `InternedStr` and that it has enough capacity to store `string` without + /// [`InternedStr`] and that it has enough capacity to store `string` without /// reallocating. - pub(super) unsafe fn push_unchecked(&mut self, string: &str) -> InternedStr { + pub(super) unsafe fn push_unchecked(&mut self, string: &[Char]) -> InternedStr { let old_len = self.inner.len(); - self.inner.push_str(string); + self.inner.extend_from_slice(string); + // SAFETY: The caller is responsible for extending the lifetime - // of `self` to outlive the return value. - unsafe { InternedStr::new(self.inner[old_len..self.inner.len()].into()) } + // of `self` to outlive the return value, and for ensuring + // the alignment of `string` is correct. + let ptr = &self.inner[old_len..self.inner.len()]; + unsafe { InternedStr::new(ptr.into()) } } } diff --git a/boa_interner/src/interned_str.rs b/boa_interner/src/interned_str.rs index f8db8154300..7f5654a22cf 100644 --- a/boa_interner/src/interned_str.rs +++ b/boa_interner/src/interned_str.rs @@ -1,4 +1,4 @@ -use std::{borrow::Borrow, ptr::NonNull}; +use std::{hash::Hash, ptr::NonNull}; /// Wrapper for an interned str pointer, required to /// quickly check using a hash if a string is inside an [`Interner`][`super::Interner`]. @@ -7,23 +7,27 @@ use std::{borrow::Borrow, ptr::NonNull}; /// /// This struct could cause Undefined Behaviour on: /// - Use without ensuring the referenced memory is still allocated. -/// - Construction of an [`InternedStr`] from an invalid [`NonNull`]. +/// - Construction of an [`InternedStr`] from an invalid [`NonNull`] pointer. +/// - Construction of an [`InternedStr`] from a [`NonNull`] pointer +/// without checking if the pointed memory of the [`NonNull`] outlives +/// the [`InternedStr`]. /// /// In general, this should not be used outside of an [`Interner`][`super::Interner`]. -#[derive(Debug, Clone)] -pub(super) struct InternedStr { - ptr: NonNull, +#[derive(Debug)] +pub(super) struct InternedStr { + ptr: NonNull<[Char]>, } -impl InternedStr { - /// Create a new interned string from the given `str`. +impl InternedStr { + /// Create a new interned string from the given `*const u8` pointer, + /// length and encoding kind /// /// # Safety /// /// Not maintaining the invariants specified on the struct definition /// could cause Undefined Behaviour. #[inline] - pub(super) unsafe fn new(ptr: NonNull) -> Self { + pub(super) unsafe fn new(ptr: NonNull<[Char]>) -> Self { Self { ptr } } @@ -34,37 +38,45 @@ impl InternedStr { /// Not maintaining the invariants specified on the struct definition /// could cause Undefined Behaviour. #[inline] - pub(super) unsafe fn as_str(&self) -> &str { - // SAFETY: The caller must verify the invariants - // specified on the struct definition. + pub(super) unsafe fn as_ref(&self) -> &[Char] { + // SAFETY: + // The caller must ensure `ptr` is still valid throughout the + // lifetime of `self`. unsafe { self.ptr.as_ref() } } } -impl std::hash::Hash for InternedStr { - fn hash(&self, state: &mut H) { - // SAFETY: The caller must verify the invariants - // specified in the struct definition. - unsafe { - self.as_str().hash(state); - } +impl Clone for InternedStr { + fn clone(&self) -> Self { + Self { ptr: self.ptr } } } -impl Eq for InternedStr {} +impl Copy for InternedStr {} -impl PartialEq for InternedStr { +impl Eq for InternedStr where Char: Eq {} + +impl PartialEq for InternedStr +where + Char: PartialEq, +{ fn eq(&self, other: &Self) -> bool { // SAFETY: The caller must verify the invariants // specified in the struct definition. - unsafe { self.as_str() == other.as_str() } + unsafe { self.as_ref() == other.as_ref() } } } -impl Borrow for InternedStr { - fn borrow(&self) -> &str { - // SAFETY: The caller must verify the invariants - // specified in the struct definition. - unsafe { self.as_str() } +impl Hash for InternedStr +where + Char: Hash, +{ + fn hash(&self, state: &mut H) { + // SAFETY: + // The caller must ensure `ptr` is still valid throughout the + // lifetime of `self`. + unsafe { + self.as_ref().hash(state); + } } } diff --git a/boa_interner/src/lib.rs b/boa_interner/src/lib.rs index 94e1bddb3c9..079e0582282 100644 --- a/boa_interner/src/lib.rs +++ b/boa_interner/src/lib.rs @@ -67,44 +67,150 @@ clippy::missing_errors_doc, clippy::as_conversions, clippy::let_unit_value, - rustdoc::missing_doc_code_examples + // TODO deny once false positive is fixed (https://github.com/rust-lang/rust-clippy/issues/9626). + clippy::trait_duplication_in_bounds, + rustdoc::missing_doc_code_examples, )] extern crate static_assertions as sa; mod fixed_string; mod interned_str; +mod raw; mod sym; #[cfg(test)] mod tests; -use fixed_string::FixedString; +use std::borrow::Cow; + +use raw::RawInterner; pub use sym::*; -use std::fmt::{Debug, Display}; +/// An enumeration of all slice types [`Interner`] can internally store. +/// +/// This struct allows us to intern either `UTF-8` or `UTF-16` str references, which are the two +/// encodings [`Interner`] can store. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub enum JStrRef<'a> { + Utf8(&'a str), + Utf16(&'a [u16]), +} + +impl<'a> From<&'a str> for JStrRef<'a> { + fn from(s: &'a str) -> Self { + JStrRef::Utf8(s) + } +} + +impl<'a> From<&'a [u16]> for JStrRef<'a> { + fn from(s: &'a [u16]) -> Self { + JStrRef::Utf16(s) + } +} + +impl<'a, const N: usize> From<&'a [u16; N]> for JStrRef<'a> { + fn from(s: &'a [u16; N]) -> Self { + JStrRef::Utf16(s) + } +} + +/// A double reference to an interned string inside [`Interner`]. +/// +/// [`JSInternedStrRef::utf8`] returns an [`Option`], since not every `UTF-16` string is fully +/// representable as a `UTF-8` string (because of unpaired surrogates). However, every `UTF-8` +/// string is representable as a `UTF-16` string, so `JSInternedStrRef::utf8` returns a +/// [&\[u16\]][std::slice]. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub struct JSInternedStrRef<'a, 'b> { + utf8: Option<&'a str>, + utf16: &'b [u16], +} + +impl<'a, 'b> JSInternedStrRef<'a, 'b> { + /// Returns the inner reference to the interned string in `UTF-8` encoding. + /// if the string is not representable in `UTF-8`, returns [`None`] + pub fn utf8(&self) -> Option<&'a str> { + self.utf8 + } + + /// Returns the inner reference to the interned string in `UTF-16` encoding. + pub fn utf16(&self) -> &'b [u16] { + self.utf16 + } -use interned_str::InternedStr; -use rustc_hash::FxHashMap; + /// Joins the result of both possible strings into a common type. + /// + /// If `self` is representable by a `UTF-8` string and the `prioritize_utf8` argument is set, + /// it will prioritize calling `f`, and will only call `g` if `self` is only representable by a + /// `UTF-16` string. Otherwise, it will directly call `g`. + pub fn join(self, f: F, g: G, prioritize_utf8: bool) -> T + where + F: FnOnce(&'a str) -> T, + G: FnOnce(&'b [u16]) -> T, + { + if prioritize_utf8 { + if let Some(str) = self.utf8 { + return f(str); + } + } + g(self.utf16) + } + + /// Same as [`join`][`JSInternedStrRef::join`], but where you can pass an additional context. + /// + /// Useful when you have a `&mut Context` context that cannot be borrowed by both closures at + /// the same time. + pub fn join_with_context(self, f: F, g: G, ctx: C, prioritize_utf8: bool) -> T + where + F: FnOnce(&'a str, C) -> T, + G: FnOnce(&'b [u16], C) -> T, + { + if prioritize_utf8 { + if let Some(str) = self.utf8 { + return f(str, ctx); + } + } + g(self.utf16, ctx) + } + + /// Converts both string types into a common type `C`. + /// + /// If `self` is representable by a `UTF-8` string and the `prioritize_utf8` argument is set, it + /// will prioritize converting its `UTF-8` representation first, and will only convert its + /// `UTF-16` representation if it is only representable by a `UTF-16` string. Otherwise, it will + /// directly convert its `UTF-16` representation. + pub fn into_common(self, prioritize_utf8: bool) -> C + where + C: From<&'a str> + From<&'b [u16]>, + { + self.join(Into::into, Into::into, prioritize_utf8) + } +} + +impl<'a, 'b> std::fmt::Display for JSInternedStrRef<'a, 'b> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + self.join_with_context( + std::fmt::Display::fmt, + |js, f| { + char::decode_utf16(js.iter().copied()) + .map(|r| match r { + Ok(c) => String::from(c), + Err(e) => format!("\\u{:04X}", e.unpaired_surrogate()), + }) + .collect::() + .fmt(f) + }, + f, + true, + ) + } +} /// The string interner for Boa. #[derive(Debug, Default)] pub struct Interner { - // COMMENT FOR DEVS: - // This interner works on the assumption that - // `head` won't ever be reallocated, since this could invalidate - // some of our stored pointers inside `spans`. - // This means that any operation on `head` and `full` should be carefully - // reviewed to not cause Undefined Behaviour. - // `get_or_intern` has a more thorough explanation on this. - // - // Also, if you want to implement `shrink_to_fit` (and friends), - // please check out https://github.com/Robbepop/string-interner/pull/47 first. - // This doesn't implement that method, since implementing it increases - // our memory footprint. - symbols: FxHashMap, - spans: Vec, - head: FixedString, - full: Vec, + utf8_interner: RawInterner, + utf16_interner: RawInterner, } impl Interner { @@ -118,34 +224,44 @@ impl Interner { #[inline] pub fn with_capacity(capacity: usize) -> Self { Self { - symbols: FxHashMap::default(), - spans: Vec::with_capacity(capacity), - head: FixedString::new(capacity), - full: Vec::new(), + utf8_interner: RawInterner::with_capacity(capacity), + utf16_interner: RawInterner::with_capacity(capacity), } } /// Returns the number of strings interned by the interner. #[inline] pub fn len(&self) -> usize { - COMMON_STRINGS.len() + self.spans.len() + // `utf16_interner.len()` == `utf8_interner.len()`, + // so we can use any of them. + COMMON_STRINGS_UTF8.len() + self.utf16_interner.len() } /// Returns `true` if the [`Interner`] contains no interned strings. #[inline] pub fn is_empty(&self) -> bool { - COMMON_STRINGS.is_empty() && self.spans.is_empty() + COMMON_STRINGS_UTF8.is_empty() && self.utf16_interner.is_empty() } /// Returns the symbol for the given string if any. /// /// Can be used to query if a string has already been interned without interning. - pub fn get(&self, string: T) -> Option + pub fn get<'a, T>(&self, string: T) -> Option where - T: AsRef, + T: Into>, { - let string = string.as_ref(); - Self::get_common(string).or_else(|| self.symbols.get(string).copied()) + let string = string.into(); + Self::get_common(string).or_else(|| { + let index = match string { + JStrRef::Utf8(s) => self.utf8_interner.get(s.as_bytes()), + JStrRef::Utf16(s) => self.utf16_interner.get(s), + }; + // SAFETY: + // `get_or_intern/get_or_intern_static` already have checks to avoid returning indices + // that could cause overflows, meaning the indices returned by + // `idx + 1 + COMMON_STRINGS_UTF8.len()` cannot cause overflows. + unsafe { index.map(|i| Sym::new_unchecked(i + 1 + COMMON_STRINGS_UTF8.len())) } + }) } /// Interns the given string. @@ -155,65 +271,43 @@ impl Interner { /// # Panics /// /// If the interner already interns the maximum number of strings possible by the chosen symbol type. - pub fn get_or_intern(&mut self, string: T) -> Sym + pub fn get_or_intern<'a, T>(&mut self, string: T) -> Sym where - T: AsRef, + T: Into>, { - let string = string.as_ref(); - if let Some(sym) = self.get(string) { - return sym; - } + let string = string.into(); + self.get(string).unwrap_or_else(|| { + let (utf8, utf16) = match string { + JStrRef::Utf8(s) => ( + Some(Cow::Borrowed(s)), + Cow::Owned(s.encode_utf16().collect()), + ), + JStrRef::Utf16(s) => (String::from_utf16(s).ok().map(Cow::Owned), Cow::Borrowed(s)), + }; + + // We need a way to check for the strings that can be interned by `utf16_interner` but + // not by `utf8_interner` (since there are some UTF-16 strings with surrogates that are + // not representable in UTF-8), so we use the sentinel value `""` as a marker indicating + // that the `Sym` corresponding to that string is only available in `utf16_interner`. + // + // We don't need to worry about matches with `""` inside `get`, because + // `COMMON_STRINGS_UTF8` filters all the empty strings before interning. + let index = if let Some(utf8) = utf8 { + self.utf8_interner.intern(utf8.as_bytes()) + } else { + self.utf8_interner.intern_static(b"") + }; + + let utf16_index = self.utf16_interner.intern(&utf16); - // SAFETY: - // - // Firstly, this interner works on the assumption that the allocated - // memory by `head` won't ever be moved from its position on the heap, - // which is an important point to understand why manipulating it like - // this is safe. - // - // `String` (which is simply a `Vec` with additional invariants) - // is essentially a pointer to heap memory that can be moved without - // any problems, since copying a pointer cannot invalidate the memory - // that it points to. - // - // However, `String` CAN be invalidated when pushing, extending or - // shrinking it, since all those operations reallocate on the heap. - // - // To prevent that, we HAVE to ensure the capacity will succeed without - // having to reallocate, and the only way to do that without invalidating - // any other alive `InternedStr` is to create a brand new `head` with - // enough capacity and push the old `head` to `full` to keep it alive - // throughout the lifetime of the whole `Interner`. - // - // `FixedString` encapsulates this by only allowing checked `push`es - // to the internal string, but we still have to ensure the memory - // of `head` is not deallocated until the whole `Interner` deallocates, - // which we can do by moving it inside the `Interner` itself, specifically - // on the `full` vector, where every other old `head` also lives. - let interned_str = unsafe { - self.head.push(string).unwrap_or_else(|| { - let new_cap = - (usize::max(self.head.capacity(), string.len()) + 1).next_power_of_two(); - let new_head = FixedString::new(new_cap); - let old_head = std::mem::replace(&mut self.head, new_head); - - // If the user creates an `Interner` - // with `Interner::with_capacity(BIG_NUMBER)` and - // the first interned string's length is bigger than `BIG_NUMBER`, - // `self.full.push(old_head)` would push a big, empty string of - // allocated size `BIG_NUMBER` into `full`. - // This prevents that case. - if !old_head.is_empty() { - self.full.push(old_head); - } - self.head.push_unchecked(string) - }) - }; - - // SAFETY: We are obtaining a pointer to the internal memory of - // `head`, which is alive through the whole life of `Interner`, so - // this is safe. - unsafe { self.generate_symbol(interned_str) } + // Just to check everything is okay + assert_eq!(index, utf16_index); + + index + .checked_add(1 + COMMON_STRINGS_UTF8.len()) + .and_then(Sym::new) + .expect("Cannot intern new string: integer overflow") + }) } /// Interns the given `'static` string. @@ -222,32 +316,68 @@ impl Interner { /// /// # Note /// - /// This is more efficient than [`Interner::get_or_intern`], since it - /// avoids storing `string` inside the [`Interner`]. + /// This is more efficient than [`Interner::get_or_intern`], since it avoids allocating space + /// for one `string` inside the [`Interner`], with the disadvantage that you need to provide + /// both the `UTF-8` and the `UTF-16` representation of the string. /// /// # Panics /// - /// If the interner already interns the maximum number of strings possible - /// by the chosen symbol type. - pub fn get_or_intern_static(&mut self, string: &'static str) -> Sym { - self.get(string).unwrap_or_else(|| { - // SAFETY: a static `str` is always alive, so its pointer - // should therefore always be valid. - unsafe { self.generate_symbol(InternedStr::new(string.into())) } + /// If the interner already interns the maximum number of strings possible by the chosen symbol type. + pub fn get_or_intern_static(&mut self, utf8: &'static str, utf16: &'static [u16]) -> Sym { + // Uses the utf8 because it's quicker to check inside `COMMON_STRINGS_UTF8` + // (which is a perfect hash set) than to check inside `COMMON_STRINGS_UTF16` + // (which is a lazy static hash set). + self.get(utf8).unwrap_or_else(|| { + let index = self.utf8_interner.intern(utf8.as_bytes()); + let utf16_index = self.utf16_interner.intern(utf16); + + // Just to check everything is okay + debug_assert_eq!(index, utf16_index); + + index + .checked_add(1 + COMMON_STRINGS_UTF8.len()) + .and_then(Sym::new) + .expect("Cannot intern new string: integer overflow") }) } /// Returns the string for the given symbol if any. #[inline] - pub fn resolve(&self, symbol: Sym) -> Option<&str> { + pub fn resolve(&self, symbol: Sym) -> Option> { let index = symbol.get() - 1; - COMMON_STRINGS.index(index).copied().or_else(|| { - self.spans.get(index - COMMON_STRINGS.len()).map(|ptr| - // SAFETY: We always ensure the stored `InternedStr`s always - // reference memory inside `head` and `full` - unsafe {ptr.as_str()}) - }) + if let Some(utf8) = COMMON_STRINGS_UTF8.index(index).copied() { + let utf16 = COMMON_STRINGS_UTF16 + .get_index(index) + .copied() + .expect("The sizes of both statics must be equal"); + return Some(JSInternedStrRef { + utf8: Some(utf8), + utf16, + }); + } + + let index = index - COMMON_STRINGS_UTF8.len(); + + if let Some(utf16) = self.utf16_interner.index(index) { + let index = index - (self.utf16_interner.len() - self.utf8_interner.len()); + // SAFETY: + // We only manipulate valid UTF-8 `str`s and convert them to `[u8]` for convenience, + // so converting back to a `str` is safe. + let utf8 = unsafe { + std::str::from_utf8_unchecked( + self.utf8_interner + .index(index) + .expect("both interners must have the same size"), + ) + }; + return Some(JSInternedStrRef { + utf8: if utf8.is_empty() { None } else { Some(utf8) }, + utf16, + }); + } + + None } /// Returns the string for the given symbol. @@ -256,34 +386,26 @@ impl Interner { /// /// If the interner cannot resolve the given symbol. #[inline] - pub fn resolve_expect(&self, symbol: Sym) -> &str { + pub fn resolve_expect(&self, symbol: Sym) -> JSInternedStrRef<'_, '_> { self.resolve(symbol).expect("string disappeared") } /// Gets the symbol of the common string if one of them - fn get_common(string: &str) -> Option { - COMMON_STRINGS.get_index(string).map(|idx| - // SAFETY: `idx >= 0`, since it's an `usize`, and `idx + 1 > 0`. - // In this case, we don't need to worry about overflows - // because we have a static assertion in place checking that - // `COMMON_STRINGS.len() < usize::MAX`. - unsafe { - Sym::new_unchecked(idx + 1) - }) - } - - /// Generates a new symbol for the provided [`str`] pointer. - /// - /// # Safety - /// - /// The caller must ensure `string` points to a valid - /// memory inside `head` and that it won't be invalidated - /// by allocations and deallocations. - unsafe fn generate_symbol(&mut self, string: InternedStr) -> Sym { - let next = Sym::new(self.len() + 1).expect("cannot get interner symbol: integer overflow"); - self.spans.push(string.clone()); - self.symbols.insert(string, next); - next + fn get_common(string: JStrRef<'_>) -> Option { + match string { + JStrRef::Utf8(s) => COMMON_STRINGS_UTF8.get_index(s).map(|idx| { + // SAFETY: `idx >= 0`, since it's an `usize`, and `idx + 1 > 0`. + // In this case, we don't need to worry about overflows because we have a static + // assertion in place checking that `COMMON_STRINGS.len() < usize::MAX`. + unsafe { Sym::new_unchecked(idx + 1) } + }), + JStrRef::Utf16(s) => COMMON_STRINGS_UTF16.get_index_of(&s).map(|idx| { + // SAFETY: `idx >= 0`, since it's an `usize`, and `idx + 1 > 0`. + // In this case, we don't need to worry about overflows because we have a static + // assertion in place checking that `COMMON_STRINGS.len() < usize::MAX`. + unsafe { Sym::new_unchecked(idx + 1) } + }), + } } } @@ -295,7 +417,7 @@ pub trait ToInternedString { impl ToInternedString for T where - T: Display, + T: std::fmt::Display, { fn to_interned_string(&self, _interner: &Interner) -> String { self.to_string() diff --git a/boa_interner/src/raw.rs b/boa_interner/src/raw.rs new file mode 100644 index 00000000000..9604f9bff77 --- /dev/null +++ b/boa_interner/src/raw.rs @@ -0,0 +1,196 @@ +use std::hash::Hash; + +use rustc_hash::FxHashMap; + +use crate::{fixed_string::FixedString, interned_str::InternedStr}; + +/// Raw string interner, generic by a char type. +#[derive(Debug)] +pub(super) struct RawInterner { + // COMMENT FOR DEVS: + // This interner works on the assumption that + // `head` won't ever be reallocated, since this could invalidate + // some of our stored pointers inside `spans`. + // This means that any operation on `head` and `full` should be carefully + // reviewed to not cause Undefined Behaviour. + // `intern` has a more thorough explanation on this. + // + // Also, if you want to implement `shrink_to_fit` (and friends), + // please check out https://github.com/Robbepop/string-interner/pull/47 first. + // This doesn't implement that method, since implementing it increases + // our memory footprint. + symbol_cache: FxHashMap, usize>, + spans: Vec>, + head: FixedString, + full: Vec>, +} + +impl Default for RawInterner { + fn default() -> Self { + Self { + symbol_cache: FxHashMap::default(), + spans: Vec::default(), + head: FixedString::default(), + full: Vec::default(), + } + } +} + +impl RawInterner { + /// Creates a new `RawInterner` with the specified capacity. + #[inline] + pub(super) fn with_capacity(capacity: usize) -> Self { + Self { + symbol_cache: FxHashMap::default(), + spans: Vec::with_capacity(capacity), + head: FixedString::new(capacity), + full: Vec::new(), + } + } + + /// Returns the number of strings interned by the interner. + #[inline] + pub(super) fn len(&self) -> usize { + self.spans.len() + } + + /// Returns `true` if the interner contains no interned strings. + #[inline] + pub(super) fn is_empty(&self) -> bool { + self.spans.is_empty() + } +} + +impl RawInterner +where + Char: Hash + Eq, +{ + /// Returns the index position for the given string if any. + /// + /// Can be used to query if a string has already been interned without interning. + pub(super) fn get(&self, string: &[Char]) -> Option { + // SAFETY: + // `string` is a valid slice that doesn't outlive the + // created `InternedStr`, so this is safe. + unsafe { + self.symbol_cache + .get(&InternedStr::new(string.into())) + .copied() + } + } + + /// Interns the given `'static` string. + /// + /// Returns the index of `string` within the interner. + /// + /// # Note + /// + /// This is more efficient than [`RawInterner::intern`], since it + /// avoids storing `string` inside the interner. + /// + /// # Panics + /// + /// If the interner already interns the maximum number of strings possible + /// by the chosen symbol type. + pub(super) fn intern_static(&mut self, string: &'static [Char]) -> usize { + // SAFETY: + // A static string reference is always valid, meaning it cannot outlive + // the lifetime of the created `InternedStr`. This makes this + // operation safe. + let string = unsafe { InternedStr::new(string.into()) }; + + // SAFETY: + // A `InternedStr` created from a static reference + // cannot be invalidated by allocations and deallocations, + // so this is safe. + unsafe { self.next_index(string) } + } + + /// Returns the string for the given index if any. + #[inline] + pub(super) fn index(&self, index: usize) -> Option<&[Char]> { + self.spans.get(index).map(|ptr| + // SAFETY: We always ensure the stored `InternedStr`s always + // reference memory inside `head` and `full` + unsafe {ptr.as_ref()}) + } + + /// Inserts a new string pointer into `spans` and returns its index. + /// + /// # Safety + /// + /// The caller must ensure `string` points to a valid + /// memory inside `head` (or only valid in the case of statics) + /// and that it won't be invalidated by allocations and deallocations. + unsafe fn next_index(&mut self, string: InternedStr) -> usize { + let next = self.len(); + self.spans.push(string); + self.symbol_cache.insert(string, next); + next + } +} + +impl RawInterner +where + Char: Hash + Eq + Clone, +{ + /// Interns the given string. + /// + /// Returns the index of `string` within the interner. + /// + /// # Panics + /// + /// If the interner already interns the maximum number of strings possible by the chosen symbol type. + pub(super) fn intern(&mut self, string: &[Char]) -> usize { + // SAFETY: + // + // Firstly, this interner works on the assumption that the allocated + // memory by `head` won't ever be moved from its position on the heap, + // which is an important point to understand why manipulating it like + // this is safe. + // + // `String` (which is simply a `Vec` with additional invariants) + // is essentially a pointer to heap memory that can be moved without + // any problems, since copying a pointer cannot invalidate the memory + // that it points to. + // + // However, `String` CAN be invalidated when pushing, extending or + // shrinking it, since all those operations reallocate on the heap. + // + // To prevent that, we HAVE to ensure the capacity will succeed without + // having to reallocate, and the only way to do that without invalidating + // any other alive `InternedStr` is to create a brand new `head` with + // enough capacity and push the old `head` to `full` to keep it alive + // throughout the lifetime of the whole interner. + // + // `FixedString` encapsulates this by only allowing checked `push`es + // to the internal string, but we still have to ensure the memory + // of `head` is not deallocated until the whole interner deallocates, + // which we can do by moving it inside the interner itself, specifically + // on the `full` vector, where every other old `head` also lives. + let interned_str = unsafe { + self.head.push(string).unwrap_or_else(|| { + let new_cap = + (usize::max(self.head.capacity(), string.len()) + 1).next_power_of_two(); + let new_head = FixedString::new(new_cap); + let old_head = std::mem::replace(&mut self.head, new_head); + + // If the user creates an `Interner` + // with `Interner::with_capacity(BIG_NUMBER)` and + // the first interned string's length is bigger than `BIG_NUMBER`, + // `self.full.push(old_head)` would push a big, empty string of + // allocated size `BIG_NUMBER` into `full`. + // This prevents that case. + if !old_head.is_empty() { + self.full.push(old_head); + } + self.head.push_unchecked(string) + }) + }; + + // SAFETY: We are obtaining a pointer to the internal memory of + // `head`, which is alive through the whole life of the interner, so + // this is safe. + unsafe { self.next_index(interned_str) } + } +} diff --git a/boa_interner/src/sym.rs b/boa_interner/src/sym.rs index fa30cd26131..ce154e176ee 100644 --- a/boa_interner/src/sym.rs +++ b/boa_interner/src/sym.rs @@ -1,3 +1,6 @@ +use boa_macros::utf16; +use indexmap::IndexSet; +use once_cell::sync::Lazy; use std::num::NonZeroUsize; #[cfg(feature = "serde")] @@ -129,44 +132,65 @@ impl Sym { } } -/// Ordered set of commonly used static strings. -/// -/// # Note -/// -/// `COMMON_STRINGS` and the constants defined in [`Sym`] must always -/// be in sync. -pub(super) static COMMON_STRINGS: phf::OrderedSet<&'static str> = { - const COMMON_STRINGS: phf::OrderedSet<&'static str> = phf::phf_ordered_set! { - "", - "arguments", - "await", - "yield", - "eval", - "default", - "null", - "RegExp", - "get", - "set", - "
", - "raw", - "static", - "prototype", - "constructor", - "implements", - "interface", - "let", - "package", - "private", - "protected", - "public", - "anonymous", - "true", - "false", - "async", - "of", - "target", +macro_rules! create_static_strings { + ( $( $s:literal ),+ ) => { + /// Ordered set of commonly used static `UTF-8` strings. + /// + /// # Note + /// + /// `COMMON_STRINGS_UTF8`, `COMMON_STRINGS_UTF16` and the constants + /// defined in [`Sym`] must always be in sync. + pub(super) static COMMON_STRINGS_UTF8: phf::OrderedSet<&'static str> = { + const COMMON_STRINGS: phf::OrderedSet<&'static str> = phf::phf_ordered_set! { + $( $s ),+ + }; + // A `COMMON_STRINGS` of size `usize::MAX` would cause an overflow on our `Interner` + sa::const_assert!(COMMON_STRINGS.len() < usize::MAX); + COMMON_STRINGS + }; + + /// Ordered set of commonly used static `UTF-16` strings. + /// + /// # Note + /// + /// `COMMON_STRINGS_UTF8`, `COMMON_STRINGS_UTF16` and the constants + /// defined in [`Sym`] must always be in sync. + // FIXME: use phf when const expressions are allowed. https://github.com/rust-phf/rust-phf/issues/188 + pub(super) static COMMON_STRINGS_UTF16: Lazy> = Lazy::new(|| { + let mut set = IndexSet::with_capacity(COMMON_STRINGS_UTF8.len()); + $( set.insert(utf16!($s)); )+ + set + }); }; - // A `COMMON_STRINGS` of size `usize::MAX` would cause an overflow on our `Interner` - sa::const_assert!(COMMON_STRINGS.len() < usize::MAX); - COMMON_STRINGS -}; +} + +create_static_strings! { + "", + "arguments", + "await", + "yield", + "eval", + "default", + "null", + "RegExp", + "get", + "set", + "
", + "raw", + "static", + "prototype", + "constructor", + "implements", + "interface", + "let", + "package", + "private", + "protected", + "public", + "anonymous", + "true", + "false", + "async", + "of", + "target" +} diff --git a/boa_interner/src/tests.rs b/boa_interner/src/tests.rs index d42d5b9bf2f..3e5b46f733d 100644 --- a/boa_interner/src/tests.rs +++ b/boa_interner/src/tests.rs @@ -1,4 +1,5 @@ -use crate::{Interner, Sym, COMMON_STRINGS}; +use crate::{Interner, Sym, COMMON_STRINGS_UTF16, COMMON_STRINGS_UTF8}; +use boa_macros::utf16; #[track_caller] fn sym_from_usize(index: usize) -> Sym { @@ -9,7 +10,7 @@ fn sym_from_usize(index: usize) -> Sym { fn check_static_strings() { let mut interner = Interner::default(); - for (i, str) in COMMON_STRINGS.into_iter().enumerate() { + for (i, &str) in COMMON_STRINGS_UTF8.into_iter().enumerate() { assert_eq!(interner.get_or_intern(str), sym_from_usize(i + 1)); } } @@ -18,22 +19,29 @@ fn check_static_strings() { fn check_new_string() { let mut interner = Interner::default(); - assert!(interner.get_or_intern("my test string").get() > COMMON_STRINGS.len()); + assert!(interner.get_or_intern("my test string").get() > COMMON_STRINGS_UTF8.len()); } #[test] fn check_resolve() { let mut interner = Interner::default(); - let strings = ["test string", "arguments", "hello"]; + let utf_8_strings = ["test string", "arguments", "hello"]; + let utf_8_strings = utf_8_strings.into_iter(); + let utf_16_strings = [utf16!("TEST STRING"), utf16!("ARGUMENTS"), utf16!("HELLO")]; + let utf_16_strings = utf_16_strings.into_iter(); - for string in strings { - let sym = interner.get_or_intern(string); + for (s8, s16) in utf_8_strings.zip(utf_16_strings) { + let sym = interner.get_or_intern(s8); let resolved = interner.resolve(sym).unwrap(); - assert_eq!(string, resolved); - - let new_sym = interner.get_or_intern(string); + assert_eq!(Some(s8), resolved.utf8()); + let new_sym = interner.get_or_intern(s8); + assert_eq!(sym, new_sym); + let sym = interner.get_or_intern(s16); + let resolved = interner.resolve(sym).unwrap(); + assert_eq!(s16, resolved.utf16()); + let new_sym = interner.get_or_intern(s16); assert_eq!(sym, new_sym); } } @@ -42,17 +50,65 @@ fn check_resolve() { fn check_static_resolve() { let mut interner = Interner::default(); - for string in COMMON_STRINGS + for (utf8, utf16) in COMMON_STRINGS_UTF8 .into_iter() .copied() - .chain(["my test str", "hello world", ";"].into_iter()) + .zip(COMMON_STRINGS_UTF16.iter().copied()) + .chain( + [ + ("my test str", utf16!("my test str")), + ("hello world", utf16!("hello world")), + (";", utf16!(";")), + ] + .into_iter(), + ) { - let sym = interner.get_or_intern_static(string); + let sym = interner.get_or_intern_static(utf8, utf16); let resolved = interner.resolve(sym).unwrap(); - assert_eq!(string, resolved); + assert_eq!(Some(utf8), resolved.utf8()); + assert_eq!(utf16, resolved.utf16()); - let new_sym = interner.get_or_intern(string); + let new_sym = interner.get_or_intern(utf8); assert_eq!(sym, new_sym); } } + +#[test] +fn check_unpaired_surrogates() { + let mut interner = Interner::default(); + + let unp = &[ + 0xDC15u16, 0xDC19, 'h' as u16, 'e' as u16, 'l' as u16, 'l' as u16, 'o' as u16, + ]; + let unp2 = &[ + 0xDC01u16, 'w' as u16, 'o' as u16, 'r' as u16, 0xDCF4, 'l' as u16, 'd' as u16, + ]; + + let sym = interner.get_or_intern("abc"); + let sym2 = interner.get_or_intern("def"); + + let sym3 = interner.get_or_intern(unp); + let sym4 = interner.get_or_intern(utf16!("ghi")); + let sym5 = interner.get_or_intern(unp2); + + let sym6 = interner.get_or_intern("jkl"); + + assert_eq!(interner.resolve_expect(sym).utf8(), Some("abc")); + assert_eq!(interner.resolve_expect(sym).utf16(), utf16!("abc")); + + assert_eq!(interner.resolve_expect(sym2).utf8(), Some("def")); + assert_eq!(interner.resolve_expect(sym2).utf16(), utf16!("def")); + + assert!(interner.resolve_expect(sym3).utf8().is_none()); + assert_eq!(interner.resolve_expect(sym3).utf16(), unp); + + assert_eq!(interner.resolve_expect(sym4).utf8(), Some("ghi")); + assert_eq!(interner.resolve_expect(sym4).utf16(), utf16!("ghi")); + + assert!(interner.resolve_expect(sym5).utf8().is_none()); + assert_eq!(interner.resolve_expect(sym5).utf16(), unp2); + + assert_eq!(interner.resolve_expect(sym6).utf8(), Some("jkl")); + assert_eq!(interner.resolve_expect(sym6).utf16(), utf16!("jkl")); +} diff --git a/boa_macros/Cargo.toml b/boa_macros/Cargo.toml new file mode 100644 index 00000000000..80968bcf620 --- /dev/null +++ b/boa_macros/Cargo.toml @@ -0,0 +1,18 @@ +[package] +name = "boa_macros" +description = "Macros for the Boa JavaScript engine." +version.workspace = true +edition.workspace = true +authors.workspace = true +license.workspace = true +repository.workspace = true +rust-version.workspace = true + + +[lib] +proc-macro = true + +[dependencies] +quote = "1.0.21" +syn = "1.0.101" + diff --git a/boa_macros/src/lib.rs b/boa_macros/src/lib.rs new file mode 100644 index 00000000000..205905786c9 --- /dev/null +++ b/boa_macros/src/lib.rs @@ -0,0 +1,15 @@ +use proc_macro::TokenStream; +use quote::quote; +use syn::{parse_macro_input, LitStr}; + +/// Construct a utf-16 array literal from a utf-8 [`str`] literal. +#[proc_macro] +pub fn utf16(input: TokenStream) -> TokenStream { + let literal = parse_macro_input!(input as LitStr); + let utf8 = literal.value(); + let utf16 = utf8.encode_utf16().collect::>(); + quote! { + [#(#utf16),*].as_slice() + } + .into() +} diff --git a/boa_macros/tests/utf16.rs b/boa_macros/tests/utf16.rs new file mode 100644 index 00000000000..c3ece6489b1 --- /dev/null +++ b/boa_macros/tests/utf16.rs @@ -0,0 +1,15 @@ +use boa_macros::utf16; + +#[test] +fn literal() { + let utf16 = utf16!("hello!"); + let manual = "hello!".encode_utf16().collect::>(); + assert_eq!(manual, utf16); +} + +#[test] +fn utf16() { + let utf16 = utf16!("hello!😁😁😁"); + let manual = "hello!😁😁😁".encode_utf16().collect::>(); + assert_eq!(manual, utf16); +} diff --git a/boa_tester/src/exec/js262.rs b/boa_tester/src/exec/js262.rs index f6ad73dd210..57eb9bf08d1 100644 --- a/boa_tester/src/exec/js262.rs +++ b/boa_tester/src/exec/js262.rs @@ -83,12 +83,12 @@ fn detach_array_buffer( /// Accepts a string value as its first argument and executes it as an ECMAScript script. fn eval_script(_this: &JsValue, args: &[JsValue], context: &mut Context) -> JsResult { if let Some(source_text) = args.get(0).and_then(JsValue::as_string) { - match context.parse(source_text.as_str()) { + match context.parse(source_text.to_std_string_escaped()) { // TODO: check strict Err(e) => context.throw_type_error(format!("Uncaught Syntax Error: {e}")), // Calling eval here parses the code a second time. // TODO: We can fix this after we have have defined the public api for the vm executer. - Ok(_) => context.eval(source_text.as_str()), + Ok(_) => context.eval(source_text.to_std_string_escaped()), } } else { Ok(JsValue::undefined()) diff --git a/boa_tester/src/exec/mod.rs b/boa_tester/src/exec/mod.rs index 3c519645211..8a38ebda42e 100644 --- a/boa_tester/src/exec/mod.rs +++ b/boa_tester/src/exec/mod.rs @@ -394,7 +394,8 @@ fn test262_print( _context: &mut Context, ) -> JsResult { if let Some(message) = args.get_or_undefined(0).as_string() { - *captures.result.borrow_mut() = Some(message.as_str() == "Test262:AsyncTestComplete"); + *captures.result.borrow_mut() = + Some(message.to_std_string_escaped() == "Test262:AsyncTestComplete"); } else { *captures.result.borrow_mut() = Some(false); }