Skip to content

Commit

Permalink
Optimize more
Browse files Browse the repository at this point in the history
  • Loading branch information
AlexWaygood committed Mar 15, 2024
1 parent 1646f0f commit d9a68ba
Show file tree
Hide file tree
Showing 2 changed files with 25 additions and 20 deletions.
35 changes: 15 additions & 20 deletions crates/ruff_python_parser/src/lexer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -199,26 +199,8 @@ impl<'source> Lexer<'source> {
}

let mut is_ascii = ascii_first_char;

loop {
let c = self.cursor.first();
// Arrange things such that ASCII codepoints never
// result in the slower `is_xid_continue` getting called.
if c.is_ascii() {
if !matches!(c, 'a'..='z' | 'A'..='Z' | '_' | '0'..='9') {
break;
}
} else {
if !is_xid_continue(c) {
break;
}
is_ascii = false;
}
if self.cursor.is_eof() {
break;
}
self.cursor.bump();
}
self.cursor
.eat_while(|c| is_identifier_continuation(c, &mut is_ascii));

let keyword = match self.token_text() {
"False" => Tok::False,
Expand Down Expand Up @@ -1607,6 +1589,19 @@ fn is_unicode_identifier_start(c: char) -> bool {
is_xid_start(c)
}

// Checks if the character c is a valid continuation character as described
// in https://docs.python.org/3/reference/lexical_analysis.html#identifiers
fn is_identifier_continuation(c: char, ascii_only_identifier: &mut bool) -> bool {
// Arrange things such that ASCII codepoints never
// result in the slower `is_xid_continue` getting called.
if c.is_ascii() {
matches!(c, 'a'..='z' | 'A'..='Z' | '_' | '0'..='9')
} else {
*ascii_only_identifier = false;
is_xid_continue(c)
}
}

/// Returns `true` for [whitespace](https://docs.python.org/3/reference/lexical_analysis.html#whitespace-between-tokens)
/// characters.
///
Expand Down
10 changes: 10 additions & 0 deletions crates/ruff_python_parser/src/lexer/cursor.rs
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,16 @@ impl<'a> Cursor<'a> {
}
}

/// Eats symbols while predicate returns true or until the end of file is reached.
#[inline]
pub(super) fn eat_while(&mut self, mut predicate: impl FnMut(char) -> bool) {
// It was tried making optimized version of this for eg. line comments, but
// LLVM can inline all of this and compile it down to fast iteration over bytes.
while predicate(self.first()) && !self.is_eof() {
self.bump();
}
}

/// Skips the next `count` bytes.
///
/// ## Panics
Expand Down

0 comments on commit d9a68ba

Please sign in to comment.