Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

refactor(lexer): tighten safety of lexer by always including lifetime on SourcePosition #8293

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions crates/oxc_parser/src/lexer/comment.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ static LINE_BREAK_TABLE: SafeByteMatchTable =
static MULTILINE_COMMENT_START_TABLE: SafeByteMatchTable =
safe_byte_match_table!(|b| matches!(b, b'*' | b'\r' | b'\n' | LS_OR_PS_FIRST));

impl Lexer<'_> {
impl<'a> Lexer<'a> {
/// Section 12.4 Single Line Comment
pub(super) fn skip_single_line_comment(&mut self) -> Kind {
byte_search! {
Expand Down Expand Up @@ -151,7 +151,7 @@ impl Lexer<'_> {
Kind::Skip
}

fn skip_multi_line_comment_after_line_break(&mut self, pos: SourcePosition) -> Kind {
fn skip_multi_line_comment_after_line_break(&mut self, pos: SourcePosition<'a>) -> Kind {
// Can use `memchr` here as only searching for 1 pattern.
// Cache `Finder` instance on `Lexer` as there's a significant cost to creating it.
// `Finder::new` isn't a const function, so can't make it a `static`, and `lazy_static!`
Expand Down
9 changes: 6 additions & 3 deletions crates/oxc_parser/src/lexer/identifier.rs
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ impl<'a> Lexer<'a> {
/// Handle rest of identifier after first byte of a multi-byte Unicode char found.
/// Any number of characters can have already been consumed from `self.source` prior to it.
/// `self.source` should be positioned at start of Unicode character.
fn identifier_tail_unicode(&mut self, start_pos: SourcePosition) -> &'a str {
fn identifier_tail_unicode(&mut self, start_pos: SourcePosition<'a>) -> &'a str {
let c = self.peek_char().unwrap();
if is_identifier_part_unicode(c) {
self.consume_char();
Expand All @@ -113,7 +113,10 @@ impl<'a> Lexer<'a> {
///
/// First char should have been consumed from `self.source` prior to calling this.
/// `start_pos` should be position of the start of the identifier (before first char was consumed).
pub(super) fn identifier_tail_after_unicode(&mut self, start_pos: SourcePosition) -> &'a str {
pub(super) fn identifier_tail_after_unicode(
&mut self,
start_pos: SourcePosition<'a>,
) -> &'a str {
// Identifier contains a Unicode chars, so probably contains more.
// So just iterate over chars now, instead of bytes.
while let Some(c) = self.peek_char() {
Expand Down Expand Up @@ -146,7 +149,7 @@ impl<'a> Lexer<'a> {
///
/// The `\` must not have be consumed from `lexer.source`.
/// `start_pos` must be position of start of identifier.
fn identifier_backslash(&mut self, start_pos: SourcePosition, is_start: bool) -> &'a str {
fn identifier_backslash(&mut self, start_pos: SourcePosition<'a>, is_start: bool) -> &'a str {
// Create arena string to hold unescaped identifier.
// We don't know how long identifier will end up being. Take a guess that total length
// will be double what we've seen so far, or `MIN_ESCAPED_STR_LEN` minimum.
Expand Down
22 changes: 14 additions & 8 deletions crates/oxc_parser/src/lexer/source.rs
Original file line number Diff line number Diff line change
Expand Up @@ -167,7 +167,7 @@ impl<'a> Source<'a> {

/// Move current position.
#[inline]
pub(super) fn set_position(&mut self, pos: SourcePosition) {
pub(super) fn set_position(&mut self, pos: SourcePosition<'a>) {
// `SourcePosition` always upholds the invariants of `Source`, as long as it's created
// from this `Source`. `SourcePosition`s can only be created from a `Source`.
// `Source::new` takes a `UniquePromise`, which guarantees that it's the only `Source`
Expand Down Expand Up @@ -216,7 +216,7 @@ impl<'a> Source<'a> {
}

/// Get string slice from a `SourcePosition` up to the current position of `Source`.
pub(super) fn str_from_pos_to_current(&self, pos: SourcePosition) -> &'a str {
pub(super) fn str_from_pos_to_current(&self, pos: SourcePosition<'a>) -> &'a str {
assert!(pos.ptr <= self.ptr);
// SAFETY: The above assertion satisfies `str_from_pos_to_current_unchecked`'s requirements
unsafe { self.str_from_pos_to_current_unchecked(pos) }
Expand All @@ -230,7 +230,10 @@ impl<'a> Source<'a> {
/// 1. `Source::set_position` has not been called since `pos` was created.
/// 2. `pos` has not been advanced with `SourcePosition::add`.
#[inline]
pub(super) unsafe fn str_from_pos_to_current_unchecked(&self, pos: SourcePosition) -> &'a str {
pub(super) unsafe fn str_from_pos_to_current_unchecked(
&self,
pos: SourcePosition<'a>,
) -> &'a str {
// SAFETY: Caller guarantees `pos` is not after current position of `Source`.
// `self.ptr` is always a valid `SourcePosition` due to invariants of `Source`.
self.str_between_positions_unchecked(pos, SourcePosition::new(self.ptr))
Expand All @@ -244,15 +247,18 @@ impl<'a> Source<'a> {
/// 1. `Source::set_position` has not been called since `pos` was created.
/// 2. `pos` has not been moved backwards with `SourcePosition::sub`.
#[inline]
pub(super) unsafe fn str_from_current_to_pos_unchecked(&self, pos: SourcePosition) -> &'a str {
pub(super) unsafe fn str_from_current_to_pos_unchecked(
&self,
pos: SourcePosition<'a>,
) -> &'a str {
// SAFETY: Caller guarantees `pos` is not before current position of `Source`.
// `self.ptr` is always a valid `SourcePosition` due to invariants of `Source`.
self.str_between_positions_unchecked(SourcePosition::new(self.ptr), pos)
}

/// Get string slice from a `SourcePosition` up to the end of `Source`.
#[inline]
pub(super) fn str_from_pos_to_end(&self, pos: SourcePosition) -> &'a str {
pub(super) fn str_from_pos_to_end(&self, pos: SourcePosition<'a>) -> &'a str {
// SAFETY: Invariants of `SourcePosition` is that it cannot be after end of `Source`,
// and always on a UTF-8 character boundary.
// `self.end` is always a valid `SourcePosition` due to invariants of `Source`.
Expand All @@ -266,8 +272,8 @@ impl<'a> Source<'a> {
#[inline]
pub(super) unsafe fn str_between_positions_unchecked(
&self,
start: SourcePosition,
end: SourcePosition,
start: SourcePosition<'a>,
end: SourcePosition<'a>,
) -> &'a str {
// Check `start` is not after `end`
debug_assert!(start.ptr <= end.ptr);
Expand Down Expand Up @@ -304,7 +310,7 @@ impl<'a> Source<'a> {
/// Get offset of `pos`.
#[expect(clippy::cast_possible_truncation)]
#[inline]
pub(super) fn offset_of(&self, pos: SourcePosition) -> u32 {
pub(super) fn offset_of(&self, pos: SourcePosition<'a>) -> u32 {
// Cannot overflow `u32` because of `MAX_LEN` check in `Source::new`
(pos.addr() - self.start as usize) as u32
}
Expand Down
2 changes: 1 addition & 1 deletion crates/oxc_parser/src/lexer/template.rs
Original file line number Diff line number Diff line change
Expand Up @@ -167,7 +167,7 @@ impl<'a> Lexer<'a> {
/// # SAFETY
/// `pos` must not be before `self.source.position()`
#[expect(clippy::unnecessary_safety_comment)]
unsafe fn template_literal_create_string(&self, pos: SourcePosition) -> String<'a> {
unsafe fn template_literal_create_string(&self, pos: SourcePosition<'a>) -> String<'a> {
// Create arena string to hold modified template literal.
// We don't know how long template literal will end up being. Take a guess that total length
// will be double what we've seen so far, or `MIN_ESCAPED_TEMPLATE_LIT_LEN` minimum.
Expand Down
Loading