swc-project · kdy1 · Jun 22, 2024 · Jun 21, 2024 · Jun 21, 2024 · Jun 21, 2024
@@ -26,6 +26,7 @@ verify        = ["swc_ecma_visit"]
 
 [dependencies]
 either      = { workspace = true }
+memchr      = { workspace = true, features = ["use_std"] }
 num-bigint  = { workspace = true }
 num-traits  = { workspace = true }
 serde       = { workspace = true, features = ["derive"] }

@@ -26,7 +26,7 @@ impl<'a> Lexer<'a> {
 
                     self.emit_error_span(span, SyntaxError::TS1185);
                     self.skip_line_comment(6);
-                    self.skip_space::<true>()?;
+                    self.skip_space::<true>();
                     return self.read_token();
                 }
                 '<' | '{' => {

@@ -379,7 +379,7 @@ impl<'a> Lexer<'a> {
                 let span = fixed_len_span(start, 7);
                 self.emit_error_span(span, SyntaxError::TS1185);
                 self.skip_line_comment(5);
-                self.skip_space::<true>()?;
+                self.skip_space::<true>();
                 return self.error_span(span, SyntaxError::TS1185);
             }
 
@@ -572,7 +572,7 @@ impl<'a> Lexer<'a> {
             if self.state.had_line_break && c == b'-' && self.eat(b'>') {
                 self.emit_module_mode_error(start, SyntaxError::LegacyCommentInModule);
                 self.skip_line_comment(0);
-                self.skip_space::<true>()?;
+                self.skip_space::<true>();
                 return self.read_token();
             }
 
@@ -617,7 +617,7 @@ impl<'a> Lexer<'a> {
                     if had_line_break_before_last && self.is_str("====") {
                         self.emit_error_span(fixed_len_span(start, 7), SyntaxError::TS1185);
                         self.skip_line_comment(4);
-                        self.skip_space::<true>()?;
+                        self.skip_space::<true>();
                         return self.read_token();
                     }
 
@@ -676,7 +676,7 @@ impl<'a> Lexer<'a> {
         // XML style comment. `<!--`
         if c == '<' && self.is(b'!') && self.peek() == Some('-') && self.peek_ahead() == Some('-') {
             self.skip_line_comment(3);
-            self.skip_space::<true>()?;
+            self.skip_space::<true>();
             self.emit_module_mode_error(start, SyntaxError::LegacyCommentInModule);
 
             return self.read_token();
@@ -732,7 +732,7 @@ impl<'a> Lexer<'a> {
         {
             self.emit_error_span(fixed_len_span(start, 7), SyntaxError::TS1185);
             self.skip_line_comment(5);
-            self.skip_space::<true>()?;
+            self.skip_space::<true>();
             return self.read_token();
         }
 

@@ -194,6 +194,50 @@ impl Tokens for Lexer<'_> {
 }
 
 impl Lexer<'_> {
+    /// Consume pending comments.
+    ///
+    /// This is called when the input is exhausted.
+    #[cold]
+    #[inline(never)]
+    fn consume_pending_comments(&mut self) {
+        if let Some(comments) = self.comments.as_mut() {
+            let comments_buffer = self.comments_buffer.as_mut().unwrap();
+            let last = self.state.prev_hi;
+
+            // move the pending to the leading or trailing
+            for c in comments_buffer.take_pending_leading() {
+                // if the file had no tokens and no shebang, then treat any
+                // comments in the leading comments buffer as leading.
+                // Otherwise treat them as trailing.
+                if last == self.start_pos {
+                    comments_buffer.push(BufferedComment {
+                        kind: BufferedCommentKind::Leading,
+                        pos: last,
+                        comment: c,
+                    });
+                } else {
+                    comments_buffer.push(BufferedComment {
+                        kind: BufferedCommentKind::Trailing,
+                        pos: last,
+                        comment: c,
+                    });
+                }
+            }
+
+            // now fill the user's passed in comments
+            for comment in comments_buffer.take_comments() {
+                match comment.kind {
+                    BufferedCommentKind::Leading => {
+                        comments.add_leading(comment.pos, comment.comment);
+                    }
+                    BufferedCommentKind::Trailing => {
+                        comments.add_trailing(comment.pos, comment.comment);
+                    }
+                }
+            }
+        }
+    }
+
     fn next_token(&mut self, start: &mut BytePos) -> Result<Option<Token>, Error> {
         if let Some(start) = self.state.next_regexp {
             return Ok(Some(self.read_regexp(start)?));
@@ -210,50 +254,15 @@ impl Lexer<'_> {
 
         // skip spaces before getting next character, if we are allowed to.
         if self.state.can_skip_space() {
-            self.skip_space::<true>()?;
+            self.skip_space::<true>();
             *start = self.input.cur_pos();
         };
 
         match self.input.cur() {
             Some(..) => {}
             // End of input.
             None => {
-                if let Some(comments) = self.comments.as_mut() {
-                    let comments_buffer = self.comments_buffer.as_mut().unwrap();
-                    let last = self.state.prev_hi;
-
-                    // move the pending to the leading or trailing
-                    for c in comments_buffer.take_pending_leading() {
-                        // if the file had no tokens and no shebang, then treat any
-                        // comments in the leading comments buffer as leading.
-                        // Otherwise treat them as trailing.
-                        if last == self.start_pos {
-                            comments_buffer.push(BufferedComment {
-                                kind: BufferedCommentKind::Leading,
-                                pos: last,
-                                comment: c,
-                            });
-                        } else {
-                            comments_buffer.push(BufferedComment {
-                                kind: BufferedCommentKind::Trailing,
-                                pos: last,
-                                comment: c,
-                            });
-                        }
-                    }
-
-                    // now fill the user's passed in comments
-                    for comment in comments_buffer.take_comments() {
-                        match comment.kind {
-                            BufferedCommentKind::Leading => {
-                                comments.add_leading(comment.pos, comment.comment);
-                            }
-                            BufferedCommentKind::Trailing => {
-                                comments.add_trailing(comment.pos, comment.comment);
-                            }
-                        }
-                    }
-                }
+                self.consume_pending_comments();
 
                 return Ok(None);
             }
@@ -311,7 +320,7 @@ impl Lexer<'_> {
 
                         self.emit_error_span(span, SyntaxError::TS1185);
                         self.skip_line_comment(6);
-                        self.skip_space::<true>()?;
+                        self.skip_space::<true>();
                         return self.read_token();
                     }
 

@@ -4,6 +4,7 @@
 //! [babylon/util/identifier.js]:https://github.com/babel/babel/blob/master/packages/babylon/src/util/identifier.js
 use std::char;
 
+use ::memchr::memmem;
 use swc_common::{
     comments::{Comment, CommentKind},
     BytePos, Span, SyntaxContext,
@@ -160,7 +161,7 @@ impl<'a> Lexer<'a> {
     ///
     /// See https://tc39.github.io/ecma262/#sec-white-space
     #[inline(never)]
-    pub(super) fn skip_space<const LEX_COMMENTS: bool>(&mut self) -> LexResult<()> {
+    pub(super) fn skip_space<const LEX_COMMENTS: bool>(&mut self) {
         loop {
             let (offset, newline) = {
                 let mut skip = SkipWhitespace {
@@ -182,15 +183,13 @@ impl<'a> Lexer<'a> {
                     self.skip_line_comment(2);
                     continue;
                 } else if self.peek() == Some('*') {
-                    self.skip_block_comment()?;
+                    self.skip_block_comment();
                     continue;
                 }
             }
 
             break;
         }
-
-        Ok(())
     }
 
     #[inline(never)]
@@ -250,7 +249,7 @@ impl<'a> Lexer<'a> {
 
     /// Expects current char to be '/' and next char to be '*'.
     #[inline(never)]
-    pub(super) fn skip_block_comment(&mut self) -> LexResult<()> {
+    pub(super) fn skip_block_comment(&mut self) {
         let start = self.cur_pos();
 
         debug_assert_eq!(self.cur(), Some('/'));
@@ -260,41 +259,36 @@ impl<'a> Lexer<'a> {
 
         // jsdoc
         let slice_start = self.cur_pos();
-        let mut was_star = if self.input.is_byte(b'*') {
-            self.bump();
-            true
-        } else {
-            false
-        };
 
         let mut is_for_next = self.state.had_line_break || !self.state.can_have_trailing_comment();
 
-        while let Some(c) = self.cur() {
-            if was_star && c == '/' {
-                debug_assert_eq!(self.cur(), Some('/'));
-                self.bump(); // '/'
-
-                let end = self.cur_pos();
-
-                self.skip_space::<false>()?;
+        if let Some(idx) = memmem::find(self.input.as_str().as_bytes(), b"*/") {
+            if !self.state.had_line_break {
+                self.state.had_line_break = self.input.as_str()[0..idx]
+                    .chars()
+                    .any(|c| c.is_line_terminator());
+            }
 
-                if self.input.is_byte(b';') {
-                    is_for_next = false;
-                }
+            self.input.bump_bytes(idx + 2);
+            let end = self.cur_pos();
 
-                self.store_comment(is_for_next, start, end, slice_start);
+            self.skip_space::<false>();
 
-                return Ok(());
-            }
-            if c.is_line_terminator() {
-                self.state.had_line_break = true;
+            if self.input.is_byte(b';') {
+                is_for_next = false;
             }
 
-            was_star = c == '*';
-            self.bump();
+            self.store_comment(is_for_next, start, end, slice_start);
+
+            return;
         }
 
-        self.error(start, SyntaxError::UnterminatedBlockComment)?
+        let len = self.input.as_str().bytes().len();
+        self.input.bump_bytes(len);
+
+        let span = self.span(start);
+
+        self.emit_error_span(span, SyntaxError::UnterminatedBlockComment)
     }
 
     #[inline(never)]