diff --git a/boa/src/builtins/regexp/mod.rs b/boa/src/builtins/regexp/mod.rs index c587ca56810..e1ca4daab50 100644 --- a/boa/src/builtins/regexp/mod.rs +++ b/boa/src/builtins/regexp/mod.rs @@ -1760,7 +1760,7 @@ fn advance_string_index(s: &JsString, index: usize, unicode: bool) -> usize { // 5. Let cp be ! CodePointAt(S, index). let (_, offset, _) = - crate::builtins::string::code_point_at(s, index as i32).expect("Failed to get code point"); + crate::builtins::string::code_point_at(s, index as i64).expect("Failed to get code point"); index + offset as usize } diff --git a/boa/src/builtins/string/mod.rs b/boa/src/builtins/string/mod.rs index fd89627c7ea..c0252ecf01b 100644 --- a/boa/src/builtins/string/mod.rs +++ b/boa/src/builtins/string/mod.rs @@ -15,7 +15,7 @@ mod tests; use super::JsArgs; use crate::{ - builtins::{string::string_iterator::StringIterator, Array, BuiltIn, RegExp, Symbol}, + builtins::{string::string_iterator::StringIterator, Array, BuiltIn, Number, RegExp, Symbol}, context::StandardObjects, object::{ internal_methods::get_prototype_from_constructor, ConstructorBuilder, JsObject, ObjectData, @@ -24,24 +24,21 @@ use crate::{ symbol::WellKnownSymbols, BoaProfiler, Context, JsResult, JsString, JsValue, }; -use std::{ - char::{decode_utf16, from_u32}, - cmp::{max, min}, - string::String as StdString, -}; +use std::{char::from_u32, cmp::max, string::String as StdString}; use unicode_normalization::UnicodeNormalization; -pub(crate) fn code_point_at(string: &JsString, position: i32) -> Option<(u32, u8, bool)> { - let size = string.encode_utf16().count() as i32; - if position < 0 || position >= size { +pub(crate) fn code_point_at(string: &JsString, position: i64) -> Option<(u32, u8, bool)> { + let size = string.encode_utf16().count(); + if position < 0 || position >= size as i64 { return None; } + let mut encoded = string.encode_utf16(); let first = encoded.nth(position as usize)?; if !is_leading_surrogate(first) && !is_trailing_surrogate(first) { return Some((u32::from(first), 1, false)); } - if is_trailing_surrogate(first) || position + 1 == size { + if is_trailing_surrogate(first) || position + 1 == size as i64 { return Some((u32::from(first), 1, true)); } let second = encoded.next()?; @@ -108,6 +105,7 @@ impl BuiltIn for String { .property("length", 0, attribute) .static_method(Self::raw, "raw", 1) .static_method(Self::from_char_code, "fromCharCode", 1) + .static_method(Self::from_code_point, "fromCodePoint", 1) .method(Self::char_at, "charAt", 1) .method(Self::char_code_at, "charCodeAt", 1) .method(Self::code_point_at, "codePointAt", 1) @@ -230,6 +228,49 @@ impl String { .ok_or_else(|| context.construct_type_error("'this' is not a string")) } + /// `String.fromCodePoint(num1[, ...[, numN]])` + /// + /// The static `String.fromCodePoint()` method returns a string created by using the specified sequence of code points. + /// + /// More information: + /// - [ECMAScript reference][spec] + /// - [MDN documentation][mdn] + /// + /// [spec]: https://tc39.es/ecma262/#sec-string.fromcodepoint + /// [mdn]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/String/fromCodePoint + pub(crate) fn from_code_point( + _: &JsValue, + args: &[JsValue], + context: &mut Context, + ) -> JsResult { + // 1. Let result be the empty String. + let mut result = StdString::new(); + + // 2. For each element next of codePoints, do + for arg in args.iter() { + // a. Let nextCP be ? ToNumber(next). + let nextcp = arg.to_number(context)?; + + // b. If ! IsIntegralNumber(nextCP) is false, throw a RangeError exception. + if !Number::is_float_integer(nextcp) { + return Err(context.construct_range_error(format!("invalid code point: {nextcp}"))); + } + + // c. If ℝ(nextCP) < 0 or ℝ(nextCP) > 0x10FFFF, throw a RangeError exception. + if nextcp < 0.0 || nextcp > f64::from(0x10FFFF) { + return Err(context.construct_range_error(format!("invalid code point: {nextcp}"))); + } + + // TODO: Full UTF-16 support + // d. Set result to the string-concatenation of result and ! UTF16EncodeCodePoint(ℝ(nextCP)). + result.push(char::try_from(nextcp as u32).unwrap_or('\u{FFFD}' /* replacement char */)); + } + + // 3. Assert: If codePoints is empty, then result is the empty String. + // 4. Return result. + Ok(result.into()) + } + /// `String.prototype.raw( template, ...substitutions )` /// /// More information: @@ -364,17 +405,16 @@ impl String { args: &[JsValue], context: &mut Context, ) -> JsResult { - // First we get it the actual string a private field stored on the object only the context has access to. - // Then we convert it into a Rust String by wrapping it in from_value - let primitive_val = this.to_string(context)?; - let pos = args + let this = this.require_object_coercible(context)?; + let string = this.to_string(context)?; + let position = args .get(0) .cloned() .unwrap_or_else(JsValue::undefined) - .to_integer(context)? as i32; + .to_integer(context)?; // Fast path returning empty string when pos is obviously out of range - if pos < 0 || pos >= primitive_val.len() as i32 { + if position < 0.0 { return Ok("".into()); } @@ -382,8 +422,10 @@ impl String { // unicode code points // Note that this is an O(N) operation (because UTF-8 is complex) while getting the number of // bytes is an O(1) operation. - if let Some(utf16_val) = primitive_val.encode_utf16().nth(pos as usize) { - Ok(JsValue::new(from_u32(u32::from(utf16_val)).unwrap())) + if let Some(utf16_val) = string.encode_utf16().nth(position as usize) { + Ok(char::try_from(u32::from(utf16_val)) + .unwrap_or('\u{FFFD}' /* replacement char */) + .into()) } else { Ok("".into()) } @@ -445,19 +487,20 @@ impl String { ) -> JsResult { // First we get it the actual string a private field stored on the object only the context has access to. // Then we convert it into a Rust String by wrapping it in from_value - let primitive_val = this.to_string(context)?; - let pos = args + let this = this.require_object_coercible(context)?; + let string = this.to_string(context)?; + let position = args .get(0) .cloned() .unwrap_or_else(JsValue::undefined) - .to_integer(context)? as i32; + .to_integer(context)?; // Fast path returning undefined when pos is obviously out of range - if pos < 0 || pos >= primitive_val.len() as i32 { + if position < 0.0 { return Ok(JsValue::undefined()); } - if let Some((code_point, _, _)) = code_point_at(&primitive_val, pos) { + if let Some((code_point, _, _)) = code_point_at(&string, position as i64) { Ok(JsValue::new(code_point)) } else { Ok(JsValue::undefined()) @@ -485,22 +528,23 @@ impl String { ) -> JsResult { // First we get it the actual string a private field stored on the object only the context has access to. // Then we convert it into a Rust String by wrapping it in from_value - let primitive_val = this.to_string(context)?; - let pos = args + let this = this.require_object_coercible(context)?; + let string = this.to_string(context)?; + let position = args .get(0) .cloned() .unwrap_or_else(JsValue::undefined) - .to_integer(context)? as i32; + .to_integer(context)?; // Fast path returning NaN when pos is obviously out of range - if pos < 0 || pos >= primitive_val.len() as i32 { + if position < 0.0 || position >= string.len() as f64 { return Ok(JsValue::nan()); } // Calling .len() on a string would give the wrong result, as they are bytes not the number of unicode code points // Note that this is an O(N) operation (because UTF-8 is complex) while getting the number of bytes is an O(1) operation. // If there is no element at that index, the result is NaN - if let Some(utf16_val) = primitive_val.encode_utf16().nth(pos as usize) { + if let Some(utf16_val) = string.encode_utf16().nth(position as usize) { Ok(JsValue::new(f64::from(utf16_val))) } else { Ok(JsValue::nan()) @@ -526,8 +570,8 @@ impl String { args: &[JsValue], context: &mut Context, ) -> JsResult { - let object = this.require_object_coercible(context)?; - let mut string = object.to_string(context)?.to_string(); + let this = this.require_object_coercible(context)?; + let mut string = this.to_string(context)?.to_string(); for arg in args { string.push_str(&arg.to_string(context)?); @@ -552,8 +596,8 @@ impl String { args: &[JsValue], context: &mut Context, ) -> JsResult { - let object = this.require_object_coercible(context)?; - let string = object.to_string(context)?; + let this = this.require_object_coercible(context)?; + let string = this.to_string(context)?; if let Some(arg) = args.get(0) { let n = arg.to_integer(context)?; @@ -590,44 +634,43 @@ impl String { args: &[JsValue], context: &mut Context, ) -> JsResult { - // First we get it the actual string a private field stored on the object only the context has access to. - // Then we convert it into a Rust String by wrapping it in from_value - let primitive_val = this.to_string(context)?; + let this = this.require_object_coercible(context)?; + let string = this.to_string(context)?; // Calling .len() on a string would give the wrong result, as they are bytes not the number of unicode code points // Note that this is an O(N) operation (because UTF-8 is complex) while getting the number of bytes is an O(1) operation. - let length = primitive_val.chars().count() as i32; - - let start = args + let len = string.encode_utf16().count(); + let from = match args .get(0) .cloned() .unwrap_or_else(JsValue::undefined) - .to_integer(context)? as i32; - let end = args + .to_integer(context)? + { + int_start if int_start.is_infinite() && int_start.is_sign_negative() => 0.0, + int_start if int_start < 0.0 => (len as f64 + int_start).max(0.0), + int_start => int_start.min(len as f64), + } as usize; + + let to = match args .get(1) - .cloned() - .unwrap_or_else(|| JsValue::new(length)) - .to_integer(context)? as i32; - - let from = if start < 0 { - max(length.wrapping_add(start), 0) - } else { - min(start, length) - }; - let to = if end < 0 { - max(length.wrapping_add(end), 0) + .filter(|end| !end.is_undefined()) + .map(|end| end.to_integer(context)) + .transpose()? + .unwrap_or(len as f64) + { + int_end if int_end.is_infinite() && int_end.is_sign_negative() => 0.0, + int_end if int_end < 0.0 => (len as f64 + int_end).max(0.0), + int_end => int_end.min(len as f64), + } as usize; + + if from >= to { + Ok("".into()) } else { - min(end, length) - }; - - let span = max(to.wrapping_sub(from), 0); - - let new_str: StdString = primitive_val - .chars() - .skip(from as usize) - .take(span as usize) - .collect(); - Ok(JsValue::new(new_str)) + let span = to - from; + let substring_utf16: Vec = string.encode_utf16().skip(from).take(span).collect(); + let substring_lossy = StdString::from_utf16_lossy(&substring_utf16); + Ok(substring_lossy.into()) + } } /// `String.prototype.startWith( searchString[, position] )` @@ -645,41 +688,40 @@ impl String { args: &[JsValue], context: &mut Context, ) -> JsResult { - // First we get it the actual string a private field stored on the object only the context has access to. - // Then we convert it into a Rust String by wrapping it in from_value - let primitive_val = this.to_string(context)?; + let this = this.require_object_coercible(context)?; + let string = this.to_string(context)?; - let arg = args.get_or_undefined(0); + let search_string = args.get(0).cloned().unwrap_or_else(JsValue::undefined); - if Self::is_regexp_object(arg) { + if Self::is_regexp_object(&search_string) { context.throw_type_error( "First argument to String.prototype.startsWith must not be a regular expression", )?; } - let search_string = arg.to_string(context)?; + let search_str = search_string.to_string(context)?; - let length = primitive_val.chars().count() as i32; - let search_length = search_string.chars().count() as i32; + let len = string.encode_utf16().count(); + let search_length = search_str.encode_utf16().count(); // If less than 2 args specified, position is 'undefined', defaults to 0 - let position = if let Some(integer) = args.get(1) { - integer.to_integer(context)? as i32 - } else { - 0 + let pos = match args.get(1).cloned().unwrap_or_else(JsValue::undefined) { + position if position.is_undefined() => 0.0, + position => position.to_integer(context)?, }; - let start = min(max(position, 0), length); - let end = start.wrapping_add(search_length); + let start = pos.min(len as f64).max(0.0); + let end = start + search_length as f64; - if end > length { + if end > len as f64 { Ok(JsValue::new(false)) } else { - // Only use the part of the string from "start" - let this_string: StdString = primitive_val.chars().skip(start as usize).collect(); - Ok(JsValue::new( - this_string.starts_with(search_string.as_str()), - )) + let substring_utf16 = string + .encode_utf16() + .skip(start as usize) + .take(search_length); + let search_str_utf16 = search_str.encode_utf16(); + Ok(JsValue::new(substring_utf16.eq(search_str_utf16))) } } @@ -698,40 +740,44 @@ impl String { args: &[JsValue], context: &mut Context, ) -> JsResult { - // First we get it the actual string a private field stored on the object only the context has access to. - // Then we convert it into a Rust String by wrapping it in from_value - let primitive_val = this.to_string(context)?; + let this = this.require_object_coercible(context)?; + let string = this.to_string(context)?; - let arg = args.get_or_undefined(0); + let search_str = match args.get(0).cloned().unwrap_or_else(JsValue::undefined) { + search_string if Self::is_regexp_object(&search_string) => { + return context.throw_type_error( + "First argument to String.prototype.endsWith must not be a regular expression", + ); + } + search_string => search_string.to_string(context)?, + }; - if Self::is_regexp_object(arg) { - context.throw_type_error( - "First argument to String.prototype.endsWith must not be a regular expression", - )?; - } + let len = string.encode_utf16().count(); - let search_string = arg.to_string(context)?; + let pos = match args.get(1).cloned().unwrap_or_else(JsValue::undefined) { + end_position if end_position.is_undefined() => len as f64, + end_position => end_position.to_integer(context)?, + }; - let length = primitive_val.chars().count() as i32; - let search_length = search_string.chars().count() as i32; + let end = pos.max(0.0).min(len as f64) as usize; // If less than 2 args specified, end_position is 'undefined', defaults to // length of this - let end_position = if let Some(integer) = args.get(1) { - integer.to_integer(context)? as i32 - } else { - length - }; + if search_str.is_empty() { + return Ok(JsValue::new(true)); + } - let end = min(max(end_position, 0), length); - let start = end.wrapping_sub(search_length); + let search_length = search_str.encode_utf16().count(); - if start < 0 { + if end < search_length { Ok(JsValue::new(false)) } else { - // Only use the part of the string up to "end" - let this_string: StdString = primitive_val.chars().take(end as usize).collect(); - Ok(JsValue::new(this_string.ends_with(search_string.as_str()))) + let start = end - search_length; + + let substring_utf16 = string.encode_utf16().skip(start).take(search_length); + let search_str_utf16 = search_str.encode_utf16(); + + Ok(JsValue::new(substring_utf16.eq(search_str_utf16))) } } @@ -750,36 +796,32 @@ impl String { args: &[JsValue], context: &mut Context, ) -> JsResult { - // First we get it the actual string a private field stored on the object only the context has access to. - // Then we convert it into a Rust String by wrapping it in from_value - let primitive_val = this.to_string(context)?; - - let arg = args.get_or_undefined(0); - - if Self::is_regexp_object(arg) { - context.throw_type_error( - "First argument to String.prototype.includes must not be a regular expression", - )?; - } - - let search_string = arg.to_string(context)?; + let this = this.require_object_coercible(context)?; + let string = this.to_string(context)?; - let length = primitive_val.chars().count() as i32; + let search_str = match args.get(0).cloned().unwrap_or_else(JsValue::undefined) { + search_string if Self::is_regexp_object(&search_string) => { + return context.throw_type_error( + "First argument to String.prototype.includes must not be a regular expression", + ); + } + search_string => search_string.to_string(context)?, + }; - // If less than 2 args specified, position is 'undefined', defaults to 0 + let pos = args + .get(1) + .cloned() + .unwrap_or_else(JsValue::undefined) + .to_integer(context)?; + let start = pos.max(0.0) as usize; - let position = if let Some(integer) = args.get(1) { - integer.to_integer(context)? as i32 + let substring_lossy = if start > 0 { + let substring_utf16: Vec = string.encode_utf16().skip(start).collect(); + StdString::from_utf16_lossy(&substring_utf16) } else { - 0 + string.to_string() }; - - let start = min(max(position, 0), length); - - // Take the string from "this" and use only the part of it after "start" - let this_string: StdString = primitive_val.chars().skip(start as usize).collect(); - - Ok(JsValue::new(this_string.contains(search_string.as_str()))) + Ok(substring_lossy.contains(search_str.as_str()).into()) } fn is_regexp_object(value: &JsValue) -> bool { @@ -1099,29 +1141,41 @@ impl String { let this = this.require_object_coercible(context)?; let string = this.to_string(context)?; - let search_string = args + let search_str = args .get(0) .cloned() .unwrap_or_else(JsValue::undefined) .to_string(context)?; - let length = string.chars().count(); - let start = args + let pos = args .get(1) - .map(|position| position.to_integer(context)) - .transpose()? - .map_or(0, |position| position.max(0.0).min(length as f64) as usize); + .cloned() + .unwrap_or_else(JsValue::undefined) + .to_integer(context)?; + + let len = string.encode_utf16().count(); + let start = pos.max(0.0); - if search_string.is_empty() { - return Ok(start.min(length).into()); + if search_str.is_empty() { + return Ok(JsValue::new(start.min(len as f64))); } - if start < length { - if let Some(position) = string.find(search_string.as_str()) { - return Ok(string[..position].chars().count().into()); + if start < len as f64 { + let start = start as usize; + + let substring_lossy = if start > 0 { + let substring_utf16: Vec = string.encode_utf16().skip(start).collect(); + StdString::from_utf16_lossy(&substring_utf16) + } else { + string.to_string() + }; + + if let Some(position) = substring_lossy.find(search_str.as_str()) { + return Ok(JsValue::new( + substring_lossy[..position].encode_utf16().count() + start, + )); } } - Ok(JsValue::new(-1)) } @@ -1143,32 +1197,58 @@ impl String { args: &[JsValue], context: &mut Context, ) -> JsResult { + // 1. Let O be ? RequireObjectCoercible(this value). let this = this.require_object_coercible(context)?; + // 2. Let S be ? ToString(O). let string = this.to_string(context)?; - let search_string = args + // 3. Let searchStr be ? ToString(searchString). + let search_str = args .get(0) .cloned() .unwrap_or_else(JsValue::undefined) .to_string(context)?; - let length = string.chars().count(); - let start = args + // 4. Let numPos be ? ToNumber(position). + // 5. Assert: If position is undefined, then numPos is NaN. + let num_pos = args .get(1) - .map(|position| position.to_integer(context)) - .transpose()? - .map_or(0, |position| position.max(0.0).min(length as f64) as usize); + .cloned() + .unwrap_or_else(JsValue::undefined) + .to_number(context)?; + + // 6. If numPos is NaN, let pos be +∞; otherwise, let pos be ! ToIntegerOrInfinity(numPos). + let pos = if num_pos.is_nan() { + f64::INFINITY + } else { + JsValue::new(num_pos).to_integer(context)? + }; + + // 7. Let len be the length of S. + let len = string.encode_utf16().count(); + // 8. Let start be the result of clamping pos between 0 and len. + let start = pos.max(0.0).min(len as f64) as usize; - if search_string.is_empty() { - return Ok(start.min(length).into()); + // 9. If searchStr is the empty String, return 𝔽(start). + if search_str.is_empty() { + return Ok(JsValue::new(start as f64)); } - if start < length { - if let Some(position) = string.rfind(search_string.as_str()) { - return Ok(string[..position].chars().count().into()); - } + // TODO: Full UTF-16 support + // 10. Let searchLen be the length of searchStr. + let search_len = search_str.encode_utf16().count(); + // 11. For each non-negative integer i starting with start such that i ≤ len - searchLen, in descending order, do + // a. Let candidate be the substring of S from i to i + searchLen. + let substring_utf16: Vec = string.encode_utf16().take(start + search_len).collect(); + let substring_lossy = StdString::from_utf16_lossy(&substring_utf16); + if let Some(position) = substring_lossy.rfind(search_str.as_str()) { + // b. If candidate is the same sequence of code units as searchStr, return 𝔽(i). + return Ok(JsValue::new( + substring_lossy[..position].encode_utf16().count(), + )); } + // 12. Return -1𝔽. Ok(JsValue::new(-1)) } @@ -1218,36 +1298,59 @@ impl String { /// Performs the actual string padding for padStart/End. /// fn string_pad( - primitive: JsString, - max_length: i32, - fill_string: Option<&JsString>, + object: &JsValue, + max_length: &JsValue, + fill_string: &JsValue, at_start: bool, - ) -> JsValue { - let primitive_length = primitive.len() as i32; + context: &mut Context, + ) -> JsResult { + let string = object.to_string(context)?; + + let int_max_length = max_length.to_length(context)?; + let string_length = string.encode_utf16().count(); - if max_length <= primitive_length { - return JsValue::new(primitive); + if int_max_length <= string_length { + return Ok(string.into()); } - let filler = fill_string.map_or(" ", JsString::as_str); + let filler = if fill_string.is_undefined() { + "\u{0020}".into() + } else { + fill_string.to_string(context)? + }; + let filler_utf16: Vec = filler.encode_utf16().collect(); if filler.is_empty() { - return JsValue::new(primitive); + return Ok(string.into()); } - let fill_len = max_length.wrapping_sub(primitive_length); - let mut fill_str = StdString::new(); + let fill_len = int_max_length - string_length; + let filler_len = filler_utf16.len(); - while fill_str.len() < fill_len as usize { - fill_str.push_str(filler); - } - // Cut to size max_length - let concat_fill_str: StdString = fill_str.chars().take(fill_len as usize).collect(); + let mut truncated_string_filler = StdString::new(); + let mut truncated_string_filler_len: usize = 0; + while truncated_string_filler_len < fill_len { + if truncated_string_filler_len.wrapping_add(filler_len) <= fill_len { + truncated_string_filler.push_str(&filler); + truncated_string_filler_len += filler_len; + } else { + truncated_string_filler.push_str( + StdString::from_utf16_lossy( + &filler_utf16[..fill_len - truncated_string_filler_len], + ) + .as_str(), + ); + truncated_string_filler_len = fill_len; + } + } if at_start { - JsValue::new(format!("{concat_fill_str}{primitive}")) + truncated_string_filler.push_str(&string); + Ok(truncated_string_filler.into()) } else { - JsValue::new(format!("{primitive}{concat_fill_str}")) + let mut string = string.to_string(); + string.push_str(&truncated_string_filler); + Ok(string.into()) } } @@ -1268,23 +1371,12 @@ impl String { args: &[JsValue], context: &mut Context, ) -> JsResult { - let primitive = this.to_string(context)?; - if args.is_empty() { - return Err(JsValue::new("padEnd requires maxLength argument")); - } - let max_length = args - .get(0) - .expect("failed to get argument for String method") - .to_integer(context)? as i32; + let this = this.require_object_coercible(context)?; - let fill_string = args.get(1).map(|arg| arg.to_string(context)).transpose()?; + let max_length = args.get(0).cloned().unwrap_or_else(JsValue::undefined); + let fill_string = args.get(1).cloned().unwrap_or_else(JsValue::undefined); - Ok(Self::string_pad( - primitive, - max_length, - fill_string.as_ref(), - false, - )) + Self::string_pad(this, &max_length, &fill_string, false, context) } /// `String.prototype.padStart( targetLength [, padString] )` @@ -1304,23 +1396,12 @@ impl String { args: &[JsValue], context: &mut Context, ) -> JsResult { - let primitive = this.to_string(context)?; - if args.is_empty() { - return Err(JsValue::new("padStart requires maxLength argument")); - } - let max_length = args - .get(0) - .expect("failed to get argument for String method") - .to_integer(context)? as i32; + let this = this.require_object_coercible(context)?; - let fill_string = args.get(1).map(|arg| arg.to_string(context)).transpose()?; + let max_length = args.get(0).cloned().unwrap_or_else(JsValue::undefined); + let fill_string = args.get(1).cloned().unwrap_or_else(JsValue::undefined); - Ok(Self::string_pad( - primitive, - max_length, - fill_string.as_ref(), - true, - )) + Self::string_pad(this, &max_length, &fill_string, true, context) } /// String.prototype.trim() @@ -1336,8 +1417,8 @@ impl String { /// [spec]: https://tc39.es/ecma262/#sec-string.prototype.trim /// [mdn]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/String/trim pub(crate) fn trim(this: &JsValue, _: &[JsValue], context: &mut Context) -> JsResult { - let this = this.require_object_coercible(context)?; - let string = this.to_string(context)?; + let object = this.require_object_coercible(context)?; + let string = object.to_string(context)?; Ok(JsValue::new(string.trim_matches(is_trimmable_whitespace))) } @@ -1358,6 +1439,7 @@ impl String { _: &[JsValue], context: &mut Context, ) -> JsResult { + let this = this.require_object_coercible(context)?; let string = this.to_string(context)?; Ok(JsValue::new( string.trim_start_matches(is_trimmable_whitespace), @@ -1404,12 +1486,11 @@ impl String { _: &[JsValue], context: &mut Context, ) -> JsResult { - // First we get it the actual string a private field stored on the object only the context has access to. - // Then we convert it into a Rust String by wrapping it in from_value - let this_str = this.to_string(context)?; + let this = this.require_object_coercible(context)?; + let string = this.to_string(context)?; // The Rust String is mapped to uppercase using the builtin .to_lowercase(). // There might be corner cases where it does not behave exactly like Javascript expects - Ok(JsValue::new(this_str.to_lowercase())) + Ok(JsValue::new(string.to_lowercase())) } /// `String.prototype.toUpperCase()` @@ -1430,12 +1511,11 @@ impl String { _: &[JsValue], context: &mut Context, ) -> JsResult { - // First we get it the actual string a private field stored on the object only the context has access to. - // Then we convert it into a Rust String by wrapping it in from_value - let this_str = this.to_string(context)?; + let this = this.require_object_coercible(context)?; + let string = this.to_string(context)?; // The Rust String is mapped to uppercase using the builtin .to_uppercase(). // There might be corner cases where it does not behave exactly like Javascript expects - Ok(JsValue::new(this_str.to_uppercase())) + Ok(JsValue::new(string.to_uppercase())) } /// `String.prototype.substring( indexStart[, indexEnd] )` @@ -1453,39 +1533,36 @@ impl String { args: &[JsValue], context: &mut Context, ) -> JsResult { - // First we get it the actual string a private field stored on the object only the context has access to. - // Then we convert it into a Rust String by wrapping it in from_value - let primitive_val = this.to_string(context)?; - // If no args are specified, start is 'undefined', defaults to 0 - let start = if let Some(integer) = args.get(0) { - integer.to_integer(context)? as i32 - } else { - 0 - }; - let length = primitive_val.encode_utf16().count() as i32; - // If less than 2 args specified, end is the length of the this object converted to a String - let end = if let Some(integer) = args.get(1) { - integer.to_integer(context)? as i32 - } else { - length + let this = this.require_object_coercible(context)?; + let string = this.to_string(context)?; + + let len = string.len(); + let int_start = args + .get(0) + .cloned() + .unwrap_or_else(JsValue::undefined) + .to_integer(context)?; + + let int_end = match args.get(1).cloned().unwrap_or_else(JsValue::undefined) { + end if end.is_undefined() => len as f64, + end => end.to_integer(context)?, }; + // Both start and end args replaced by 0 if they were negative // or by the length of the String if they were greater - let final_start = min(max(start, 0), length); - let final_end = min(max(end, 0), length); - // Start and end are swapped if start is greater than end - let from = min(final_start, final_end) as usize; - let to = max(final_start, final_end) as usize; - // Extract the part of the string contained between the start index and the end index - // where start is guaranteed to be smaller or equals to end - let extracted_string: Result = decode_utf16( - primitive_val - .encode_utf16() - .skip(from) - .take(to.wrapping_sub(from)), - ) - .collect(); - Ok(JsValue::new(extracted_string.expect("Invalid string"))) + let final_start = int_start.max(0.0).min(len as f64); + let final_end = int_end.max(0.0).min(len as f64); + + let from = final_start.min(final_end) as usize; + let to = final_start.max(final_end) as usize; + + // Extract the part of the string contained between the from index and the to index + // where from is guaranteed to be smaller or equal to to + // TODO: Full UTF-16 support + let substring_utf16: Vec = string.encode_utf16().skip(from).take(to - from).collect(); + let substring = StdString::from_utf16_lossy(&substring_utf16); + + Ok(substring.into()) } /// `String.prototype.substr( start[, length] )` @@ -1504,44 +1581,39 @@ impl String { args: &[JsValue], context: &mut Context, ) -> JsResult { - // First we get it the actual string a private field stored on the object only the context has access to. - // Then we convert it into a Rust String by wrapping it in from_value - let primitive_val = this.to_string(context)?; - // If no args are specified, start is 'undefined', defaults to 0 - let mut start = if let Some(integer) = args.get(0) { - integer.to_integer(context)? as i32 - } else { - 0 + let this = this.require_object_coercible(context)?; + let string: Vec = this.to_string(context)?.encode_utf16().collect(); + let size = string.len(); + + let int_start = match args + .get(0) + .cloned() + .unwrap_or_else(JsValue::undefined) + .to_integer(context)? + { + int_start if int_start.is_infinite() && int_start.is_sign_negative() => 0.0, + int_start if int_start < 0.0 => (int_start + size as f64).max(0.0), + int_start => int_start, }; - let length = primitive_val.chars().count() as i32; - // If less than 2 args specified, end is +infinity, the maximum number value. - // Using i32::max_value() should be safe because the final length used is at most - // the number of code units from start to the end of the string, - // which should always be smaller or equals to both +infinity and i32::max_value - let end = if let Some(integer) = args.get(1) { - integer.to_integer(context)? as i32 - } else { - i32::MAX + + let int_length = match args.get(1).cloned().unwrap_or_else(JsValue::undefined) { + length if length.is_undefined() => size as f64, + length => length.to_integer(context)?, }; - // If start is negative it become the number of code units from the end of the string - if start < 0 { - start = max(length.wrapping_add(start), 0); + + if int_start.is_infinite() || int_length <= 0.0 || int_length.is_infinite() { + return Ok("".into()); } - // length replaced by 0 if it was negative - // or by the number of code units from start to the end of the string if it was greater - let result_length = min(max(end, 0), length.wrapping_sub(start)); - // If length is negative we return an empty string - // otherwise we extract the part of the string from start and is length code units long - if result_length <= 0 { - Ok(JsValue::new("")) - } else { - let extracted_string: StdString = primitive_val - .chars() - .skip(start as usize) - .take(result_length as usize) - .collect(); - Ok(JsValue::new(extracted_string)) + let int_end = (int_start + int_length).min(size as f64) as usize; + let int_start = int_start as usize; + + if int_start >= int_end { + Ok("".into()) + } else { + let substring_utf16 = &string[int_start..int_end]; + let substring = StdString::from_utf16_lossy(substring_utf16); + Ok(substring.into()) } } diff --git a/boa/src/builtins/string/string_iterator.rs b/boa/src/builtins/string/string_iterator.rs index 18c218118a5..60df8b77a20 100644 --- a/boa/src/builtins/string/string_iterator.rs +++ b/boa/src/builtins/string/string_iterator.rs @@ -56,8 +56,8 @@ impl StringIterator { context, )); } - let (_, code_unit_count, _) = - code_point_at(&native_string, position).expect("Invalid code point position"); + let (_, code_unit_count, _) = code_point_at(&native_string, i64::from(position)) + .expect("Invalid code point position"); string_iterator.next_index += i32::from(code_unit_count); let result_string = crate::builtins::string::String::substring( &string_iterator.string, diff --git a/boa/src/builtins/string/tests.rs b/boa/src/builtins/string/tests.rs index 7d6c78b69ee..c3118869f29 100644 --- a/boa/src/builtins/string/tests.rs +++ b/boa/src/builtins/string/tests.rs @@ -893,14 +893,14 @@ fn last_index_of_with_non_string_search_string_argument() { fn last_index_of_with_from_index_argument() { let mut context = Context::default(); assert_eq!(forward(&mut context, "''.lastIndexOf('x', 2)"), "-1"); - assert_eq!(forward(&mut context, "'x'.lastIndexOf('x', 2)"), "-1"); - assert_eq!(forward(&mut context, "'abcxx'.lastIndexOf('x', 2)"), "4"); - assert_eq!(forward(&mut context, "'x'.lastIndexOf('x', 2)"), "-1"); - assert_eq!(forward(&mut context, "'µµµxµµµ'.lastIndexOf('x', 2)"), "3"); + assert_eq!(forward(&mut context, "'x'.lastIndexOf('x', 2)"), "0"); + assert_eq!(forward(&mut context, "'abcxx'.lastIndexOf('x', 2)"), "-1"); + assert_eq!(forward(&mut context, "'x'.lastIndexOf('x', 2)"), "0"); + assert_eq!(forward(&mut context, "'µµµxµµµ'.lastIndexOf('x', 2)"), "-1"); assert_eq!( forward(&mut context, "'µµµxµµµ'.lastIndexOf('x', 10000000)"), - "-1" + "3" ); } @@ -945,13 +945,13 @@ fn last_index_non_integer_position_argument() { ); assert_eq!( forward(&mut context, "'abcx'.lastIndexOf('x', new String('1'))"), - "3" + "-1" ); assert_eq!( forward(&mut context, "'abcx'.lastIndexOf('x', new String('100'))"), - "-1" + "3" ); - assert_eq!(forward(&mut context, "'abcx'.lastIndexOf('x', null)"), "3"); + assert_eq!(forward(&mut context, "'abcx'.lastIndexOf('x', null)"), "-1"); } #[test]