Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

perf(codegen): check last char with byte methods #6509

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
84 changes: 75 additions & 9 deletions crates/oxc_codegen/src/code_buffer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -125,17 +125,55 @@ impl CodeBuffer {
/// let mut code = CodeBuffer::new();
/// code.print_str("foo");
///
/// assert_eq!(code.peek_nth_back(0), Some('o'));
/// assert_eq!(code.peek_nth_back(2), Some('f'));
/// assert_eq!(code.peek_nth_back(3), None);
/// assert_eq!(code.peek_nth_char_back(0), Some('o'));
/// assert_eq!(code.peek_nth_char_back(2), Some('f'));
/// assert_eq!(code.peek_nth_char_back(3), None);
/// ```
#[inline]
#[must_use = "Peeking is pointless if the peeked char isn't used"]
pub fn peek_nth_back(&self, n: usize) -> Option<char> {
pub fn peek_nth_char_back(&self, n: usize) -> Option<char> {
// SAFETY: All methods of `CodeBuffer` ensure `buf` is valid UTF-8
unsafe { std::str::from_utf8_unchecked(&self.buf) }.chars().nth_back(n)
}

/// Peek the `n`th byte from the end of the buffer.
///
/// When `n` is zero, the last byte is returned.
/// Returns [`None`] if `n` exceeds the length of the buffer.
///
/// # Example
/// ```
/// use oxc_codegen::CodeBuffer;
/// let mut code = CodeBuffer::new();
/// code.print_str("foo");
///
/// assert_eq!(code.peek_nth_byte_back(0), Some(b'o'));
/// assert_eq!(code.peek_nth_byte_back(2), Some(b'f'));
/// assert_eq!(code.peek_nth_byte_back(3), None);
/// ```
#[inline]
#[must_use = "Peeking is pointless if the peeked char isn't used"]
pub fn peek_nth_byte_back(&self, n: usize) -> Option<u8> {
let len = self.len();
if n < len {
Some(self.buf[len - 1 - n])
} else {
None
}
}

/// Peek the last byte from the end of the buffer.
#[inline]
pub fn last_byte(&self) -> Option<u8> {
self.buf.last().copied()
}

/// Peek the last char from the end of the buffer.
#[inline]
pub fn last_char(&self) -> Option<char> {
self.peek_nth_char_back(0)
}

/// Push a single ASCII byte into the buffer.
///
/// # Panics
Expand Down Expand Up @@ -460,12 +498,40 @@ mod test {
}

#[test]
fn peek_nth_back() {
fn peek_nth_char_back() {
let mut code = CodeBuffer::new();
code.print_str("bar");

assert_eq!(code.peek_nth_char_back(0), Some('r'));
assert_eq!(code.peek_nth_char_back(1), Some('a'));
assert_eq!(code.peek_nth_char_back(2), Some('b'));
assert_eq!(code.peek_nth_char_back(3), None);
}

#[test]
fn peek_nth_byte_back() {
let mut code = CodeBuffer::new();
code.print_str("foo");
code.print_str("bar");

assert_eq!(code.peek_nth_back(0), Some('o'));
assert_eq!(code.peek_nth_back(2), Some('f'));
assert_eq!(code.peek_nth_back(3), None);
assert_eq!(code.peek_nth_byte_back(0), Some(b'r'));
assert_eq!(code.peek_nth_byte_back(1), Some(b'a'));
assert_eq!(code.peek_nth_byte_back(2), Some(b'b'));
assert_eq!(code.peek_nth_byte_back(3), None);
}

#[test]
fn last_byte() {
let mut code = CodeBuffer::new();
assert_eq!(code.last_byte(), None);
code.print_str("bar");
assert_eq!(code.last_byte(), Some(b'r'));
}

#[test]
fn last_char() {
let mut code = CodeBuffer::new();
assert_eq!(code.last_char(), None);
code.print_str("bar");
assert_eq!(code.last_char(), Some('r'));
}
}
2 changes: 1 addition & 1 deletion crates/oxc_codegen/src/comment.rs
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ impl<'a> Codegen<'a> {

if comments.first().is_some_and(|c| c.preceded_by_newline) {
// Skip printing newline if this comment is already on a newline.
if self.peek_nth_back(0).is_some_and(|c| c != '\n' && c != '\t') {
if self.last_byte().is_some_and(|b| b != b'\n' && b != b'\t') {
self.print_hard_newline();
self.print_indent();
}
Expand Down
6 changes: 3 additions & 3 deletions crates/oxc_codegen/src/gen.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1194,11 +1194,11 @@ impl<'a> Gen for BigIntLiteral<'a> {
impl<'a> Gen for RegExpLiteral<'a> {
fn gen(&self, p: &mut Codegen, _ctx: Context) {
p.add_source_mapping(self.span.start);
let last = p.peek_nth_back(0);
let last = p.last_byte();
let pattern_text = self.regex.pattern.source_text(p.source_text);
// Avoid forming a single-line comment or "</script" sequence
if Some('/') == last
|| (Some('<') == last && pattern_text.cow_to_lowercase().starts_with("script"))
if last == Some(b'/')
|| (last == Some(b'<') && pattern_text.cow_to_lowercase().starts_with("script"))
{
p.print_hard_space();
}
Expand Down
36 changes: 27 additions & 9 deletions crates/oxc_codegen/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ use oxc_ast::ast::{
use oxc_mangler::Mangler;
use oxc_span::{GetSpan, Span};
use oxc_syntax::{
identifier::is_identifier_part,
identifier::{is_identifier_part, is_identifier_part_ascii},
operator::{BinaryOperator, UnaryOperator, UpdateOperator},
precedence::Precedence,
};
Expand Down Expand Up @@ -292,17 +292,31 @@ impl<'a> Codegen<'a> {

#[inline]
fn print_space_before_identifier(&mut self) {
if self
.peek_nth_back(0)
.is_some_and(|ch| is_identifier_part(ch) || self.prev_reg_exp_end == self.code.len())
{
self.print_hard_space();
let Some(byte) = self.last_byte() else { return };

if self.prev_reg_exp_end != self.code.len() {
let is_identifier = if byte.is_ascii() {
// Fast path for ASCII (very common case)
is_identifier_part_ascii(byte as char)
} else {
is_identifier_part(self.last_char().unwrap())
};
if !is_identifier {
return;
}
}

self.print_hard_space();
}

#[inline]
fn last_byte(&self) -> Option<u8> {
self.code.last_byte()
}

#[inline]
fn peek_nth_back(&self, n: usize) -> Option<char> {
self.code.peek_nth_back(n)
fn last_char(&self) -> Option<char> {
self.code.last_char()
}

#[inline]
Expand Down Expand Up @@ -533,7 +547,11 @@ impl<'a> Codegen<'a> {
|| ((prev == bin_op_sub || prev == un_op_neg)
&& (next == bin_op_sub || next == un_op_neg || next == un_op_pre_dec))
|| (prev == un_op_post_dec && next == bin_op_gt)
|| (prev == un_op_not && next == un_op_pre_dec && self.peek_nth_back(1) == Some('<'))
|| (prev == un_op_not
&& next == un_op_pre_dec
// `prev == UnaryOperator::LogicalNot` which means last byte is ASCII,
// and therefore previous character is 1 byte from end of buffer
&& self.code.peek_nth_byte_back(1) == Some(b'<'))
{
self.print_hard_space();
}
Expand Down
Loading