Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

perf(es/parser): Reduce allocations while lexing numbers #9057

Merged
merged 2 commits into from
Jun 16, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 2 additions & 3 deletions crates/swc_ecma_parser/src/lexer/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@ use std::{cell::RefCell, char, iter::FusedIterator, rc::Rc};

use either::Either::{Left, Right};
use smallvec::{smallvec, SmallVec};
use smartstring::SmartString;
use swc_atoms::{Atom, AtomStoreCell};
use swc_common::{comments::Comments, input::StringInput, BytePos, Span};
use swc_ecma_ast::{op, AssignOp, EsVersion};
Expand Down Expand Up @@ -472,7 +471,7 @@ impl<'a> Lexer<'a> {
'x' => {
self.bump(); // 'x'

match self.read_int_u32::<16>(2, &mut Raw(None))? {
match self.read_int_u32::<16>(2)? {
Some(val) => return Ok(Some(vec![Char::from(val)])),
None => self.error(
start,
Expand Down Expand Up @@ -880,7 +879,7 @@ impl<'a> Lexer<'a> {
}

let state = self.input.cur_pos();
let c = match self.read_int_u32::<16>(if is_curly { 0 } else { 4 }, &mut Raw(None)) {
let c = match self.read_int_u32::<16>(if is_curly { 0 } else { 4 }) {
Ok(Some(val)) => {
if 0x0010_ffff >= val {
char::from_u32(val)
Expand Down
82 changes: 30 additions & 52 deletions crates/swc_ecma_parser/src/lexer/number.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,11 @@
//!
//!
//! See https://tc39.github.io/ecma262/#sec-literals-numeric-literals
use std::{borrow::Cow, fmt::Write};
use std::borrow::Cow;

use either::Either;
use num_bigint::BigInt as BigIntValue;
use num_traits::{Num as NumTrait, ToPrimitive};
use smartstring::LazyCompact;
use swc_common::SyntaxContext;
use tracing::trace;

Expand Down Expand Up @@ -46,7 +45,6 @@ impl<'a> Lexer<'a> {
}

let start = self.cur_pos();
let mut raw_val = SmartString::<LazyCompact>::new();

let val = if starts_with_dot {
// first char is '.'
Expand All @@ -70,8 +68,6 @@ impl<'a> Lexer<'a> {
)));
}

write!(raw_val, "{}", &s.value).unwrap();

if starts_with_zero {
// TODO: I guess it would be okay if I don't use -ffast-math
// (or something like that), but needs review.
Expand Down Expand Up @@ -146,29 +142,28 @@ impl<'a> Lexer<'a> {
//
// `.1.a`, `.1e-4.a` are valid,
if self.cur() == Some('.') {
raw_val.push('.');

self.bump();

if starts_with_dot {
debug_assert!(self.cur().is_some());
debug_assert!(self.cur().unwrap().is_ascii_digit());
}

let mut raw = Raw(Some(Default::default()));
// Read numbers after dot
let dec_val = self.read_int::<10>(0, &mut raw)?;
self.read_int::<10>(0)?;

val = {
if dec_val.is_some() {
raw_val.push_str(raw.0.as_ref().unwrap());
}
let end = self.cur_pos();
let raw = unsafe {
// Safety: We got both start and end position from `self.input`
self.input.slice(start, end)
};

// Remove number separator from number
if raw_val.contains('_') {
Cow::Owned(raw_val.replace('_', ""))
if raw.contains('_') {
Cow::Owned(raw.replace('_', ""))
} else {
Cow::Borrowed(&*raw_val)
Cow::Borrowed(raw)
}
.parse()
.expect("failed to parse float using rust's impl")
Expand All @@ -193,8 +188,6 @@ impl<'a> Lexer<'a> {
}
};

raw_val.push('e');

let positive = if next == '+' || next == '-' {
self.bump(); // remove '+', '-'

Expand All @@ -203,8 +196,7 @@ impl<'a> Lexer<'a> {
true
};

let mut raw = Raw(Some(Default::default()));
let exp = self.read_number_no_dot::<10>(&mut raw)?;
let exp = self.read_number_no_dot::<10>()?;

val = if exp == f64::INFINITY {
if positive && val != 0.0 {
Expand All @@ -213,16 +205,16 @@ impl<'a> Lexer<'a> {
0.0
}
} else {
let flag = if positive { '+' } else { '-' };

raw_val.push(flag);

write!(raw_val, "{}", exp).unwrap();

if raw_val.contains('_') {
Cow::Owned(raw_val.replace('_', ""))
let end = self.cur_pos();
let raw = unsafe {
// Safety: We got both start and end position from `self.input`
self.input.slice(start, end)
};

if raw.contains('_') {
Cow::Owned(raw.replace('_', ""))
} else {
Cow::Borrowed(&*raw_val)
Cow::Borrowed(raw)
}
.parse()
.expect("failed to parse float literal")
Expand Down Expand Up @@ -293,7 +285,7 @@ impl<'a> Lexer<'a> {

/// This can read long integers like
/// "13612536612375123612312312312312312312312".
fn read_number_no_dot<const RADIX: u8>(&mut self, raw: &mut Raw) -> LexResult<f64> {
fn read_number_no_dot<const RADIX: u8>(&mut self) -> LexResult<f64> {
debug_assert!(
RADIX == 2 || RADIX == 8 || RADIX == 10 || RADIX == 16,
"radix for read_number_no_dot should be one of 2, 8, 10, 16, but got {}",
Expand All @@ -309,7 +301,6 @@ impl<'a> Lexer<'a> {

Ok((f64::mul_add(total, radix as f64, v as f64), true))
},
raw,
true,
);

Expand All @@ -336,8 +327,6 @@ impl<'a> Lexer<'a> {
let mut non_octal = false;
let mut read_any = false;

let mut raw = Raw(Some(Default::default()));

self.read_digits::<_, f64, RADIX>(
|total, radix, v| {
read_any = true;
Expand All @@ -348,17 +337,20 @@ impl<'a> Lexer<'a> {

Ok((f64::mul_add(total, radix as f64, v as f64), true))
},
&mut raw,
true,
)?;

if !read_any {
self.error(start, SyntaxError::ExpectedDigit { radix: RADIX })?;
}

let raw_str = raw.0.take().unwrap();
let end = self.cur_pos();
let raw = unsafe {
// Safety: We got both start and end position from `self.input`
self.input.slice(start, end)
};
// Remove number separator from number
let raw_number_str = raw_str.replace('_', "");
let raw_number_str = raw.replace('_', "");
let parsed_float = BigIntValue::from_str_radix(&raw_number_str, RADIX as u32)
.expect("failed to parse float using BigInt")
.to_f64()
Expand All @@ -381,11 +373,7 @@ impl<'a> Lexer<'a> {
/// were read, the integer value otherwise.
/// When `len` is not zero, this
/// will return `None` unless the integer has exactly `len` digits.
pub(super) fn read_int<const RADIX: u8>(
&mut self,
len: u8,
raw: &mut Raw,
) -> LexResult<Option<f64>> {
pub(super) fn read_int<const RADIX: u8>(&mut self, len: u8) -> LexResult<Option<f64>> {
let mut count = 0u16;
let v = self.read_digits::<_, Option<f64>, RADIX>(
|opt: Option<f64>, radix, val| {
Expand All @@ -394,7 +382,6 @@ impl<'a> Lexer<'a> {

Ok((Some(total), count != len as u16))
},
raw,
true,
)?;
if len != 0 && count != len as u16 {
Expand All @@ -404,11 +391,7 @@ impl<'a> Lexer<'a> {
}
}

pub(super) fn read_int_u32<const RADIX: u8>(
&mut self,
len: u8,
raw: &mut Raw,
) -> LexResult<Option<u32>> {
pub(super) fn read_int_u32<const RADIX: u8>(&mut self, len: u8) -> LexResult<Option<u32>> {
let start = self.state.start;

let mut count = 0;
Expand All @@ -427,7 +410,6 @@ impl<'a> Lexer<'a> {

Ok((Some(total), count != len))
},
raw,
true,
)?;
if len != 0 && count != len {
Expand All @@ -441,7 +423,6 @@ impl<'a> Lexer<'a> {
fn read_digits<F, Ret, const RADIX: u8>(
&mut self,
mut op: F,
raw: &mut Raw,
allow_num_separator: bool,
) -> LexResult<Ret>
where
Expand Down Expand Up @@ -499,7 +480,6 @@ impl<'a> Lexer<'a> {
// Safety: cur() returns Some(c) where c is a valid char
self.input.bump();
}
raw.push(c);

continue;
}
Expand All @@ -511,8 +491,6 @@ impl<'a> Lexer<'a> {
return Ok(total);
};

raw.push(c);

self.bump();

let (t, cont) = op(total, RADIX, val)?;
Expand Down Expand Up @@ -574,7 +552,7 @@ mod tests {

fn int<const RADIX: u8>(s: &'static str) -> u32 {
lex(s, |l| {
l.read_int_u32::<RADIX>(0, &mut Raw(None))
l.read_int_u32::<RADIX>(0)
.unwrap()
.expect("read_int returned None")
})
Expand Down
22 changes: 0 additions & 22 deletions crates/swc_ecma_parser/src/lexer/util.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
//! [babylon/util/identifier.js]:https://github.com/babel/babel/blob/master/packages/babylon/src/util/identifier.js
use std::char;

use smartstring::{LazyCompact, SmartString};
use swc_common::{
comments::{Comment, CommentKind},
BytePos, Span, SyntaxContext,
Expand All @@ -22,27 +21,6 @@ use crate::{
Tokens,
};

/// Collector for raw string.
///
/// Methods of this struct is noop if the value is [None].
pub(super) struct Raw(pub Option<SmartString<LazyCompact>>);

impl Raw {
#[inline]
pub fn push(&mut self, c: char) {
if let Some(ref mut st) = self.0 {
st.push(c)
}
}
}

// pub const BACKSPACE: char = 8 as char;
// pub const SHIFT_OUT: char = 14 as char;
// pub const OGHAM_SPACE_MARK: char = '\u{1680}'; // ' '
// pub const LINE_FEED: char = '\n';
// pub const LINE_SEPARATOR: char = '\u{2028}';
// pub const PARAGRAPH_SEPARATOR: char = '\u{2029}';

impl<'a> Lexer<'a> {
pub(super) fn span(&self, start: BytePos) -> Span {
let end = self.last_pos();
Expand Down
Loading