From b7db235065381cfef9fcc039fa6bfffdfdc84aa5 Mon Sep 17 00:00:00 2001 From: IWANABETHATGUY Date: Tue, 20 Aug 2024 18:57:12 +0800 Subject: [PATCH] fix: comments gen regression (#5003) try to fix: https://github.com/rolldown/rolldown/issues/2013 1. Before we only considering the ast is untouched, but considering the scenario. ```js const a = /*__PURE__*/ test(), // ^^^ ^^^^^^ is removed during transform b = a(); ``` Then according to the previous algorithm, `PURE` will attach to `b = a()` 2. Now, we try to attach comments as much as possible unless the comments are separated by comments, for the case above, `PURE` will not be attached to `a()` since the content between `b = a()` and `/* __PURE__*/` is not all whitespace. 3. we added back `MoveMap`, for the special case ```js /*__NODE_SIDE_EFFECTS__*/ export const c = 100; // ^^^^^^^^^^^^^^^^^^^^^ should be attached to first declarator, // ^^^^^^ are not whitespace ``` --- Cargo.lock | 1 + crates/oxc_ast/src/trivia.rs | 14 ++++++ crates/oxc_codegen/Cargo.toml | 9 ++-- crates/oxc_codegen/src/annotation_comment.rs | 43 ++++++++++++------- crates/oxc_codegen/src/gen.rs | 35 +++++++++------ crates/oxc_codegen/src/lib.rs | 45 +++++++++++++++++++- 6 files changed, 113 insertions(+), 34 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 4866f2263108d..56af861d04a31 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1504,6 +1504,7 @@ dependencies = [ "oxc_span", "oxc_syntax", "pico-args", + "rustc-hash", ] [[package]] diff --git a/crates/oxc_ast/src/trivia.rs b/crates/oxc_ast/src/trivia.rs index 1f0a82e31d0f5..387d7b503bd03 100644 --- a/crates/oxc_ast/src/trivia.rs +++ b/crates/oxc_ast/src/trivia.rs @@ -22,6 +22,20 @@ impl Comment { let span = Span::new(start, end); Self { kind, span } } + + pub fn real_span_end(&self) -> u32 { + match self.kind { + CommentKind::SingleLine => self.span.end, + // length of `*/` + CommentKind::MultiLine => self.span.end + 2, + } + } + + pub fn real_span_start(&self) -> u32 { + match self.kind { + CommentKind::SingleLine | CommentKind::MultiLine => self.span.start - 2, + } + } } #[derive(Debug, Clone, Copy, Eq, PartialEq)] diff --git a/crates/oxc_codegen/Cargo.toml b/crates/oxc_codegen/Cargo.toml index f49dc584e98db..a250dfb67ee1f 100644 --- a/crates/oxc_codegen/Cargo.toml +++ b/crates/oxc_codegen/Cargo.toml @@ -28,10 +28,11 @@ oxc_sourcemap = { workspace = true } oxc_mangler = { workspace = true } oxc_index = { workspace = true } -bitflags = { workspace = true } -nonmax = { workspace = true } -once_cell = { workspace = true } -daachorse = { workspace = true } +bitflags = { workspace = true } +nonmax = { workspace = true } +once_cell = { workspace = true } +daachorse = { workspace = true } +rustc-hash = { workspace = true } [dev-dependencies] oxc_parser = { workspace = true } diff --git a/crates/oxc_codegen/src/annotation_comment.rs b/crates/oxc_codegen/src/annotation_comment.rs index 395bf564d53a7..048bb5b069bf0 100644 --- a/crates/oxc_codegen/src/annotation_comment.rs +++ b/crates/oxc_codegen/src/annotation_comment.rs @@ -53,7 +53,19 @@ impl<'a> Codegen<'a> { if !self.comment_options.preserve_annotate_comments { return vec![]; } - self.get_leading_comments(self.latest_consumed_comment_end, node_start) + let mut latest_comment_start = node_start; + let mut ret = self + .get_leading_comments(self.latest_consumed_comment_end, node_start) + .rev() + // each comment should be separated by whitespaces + .take_while(|comment| { + let comment_end = comment.real_span_end(); + let range_content = + &self.source_text[comment_end as usize..latest_comment_start as usize]; + let all_whitespace = range_content.chars().all(char::is_whitespace); + latest_comment_start = comment.real_span_start(); + all_whitespace + }) .filter_map(|comment| { let source_code = self.source_text; let comment_content = @@ -68,7 +80,9 @@ impl<'a> Codegen<'a> { } None }) - .collect::>() + .collect::>(); + ret.reverse(); + ret } pub(crate) fn print_comment(&mut self, comment: AnnotationComment) { @@ -85,11 +99,12 @@ impl<'a> Codegen<'a> { // in this example, `Object.getOwnPropertyNames(Symbol)` and `Object.getOwnPropertyNames(Symbol).filter()`, `Object.getOwnPropertyNames(Symbol).filter().map()` // share the same leading comment. since they both are call expr and has same span start, we need to avoid print the same comment multiple times. let comment_span = comment.span(); - - if self.latest_consumed_comment_end >= comment_span.end { + let real_span_end = comment.comment.real_span_end(); + if self.latest_consumed_comment_end >= real_span_end { return; } - let real_comment_end = match comment.kind() { + self.update_last_consumed_comment_end(real_span_end); + match comment.kind() { CommentKind::SingleLine => { self.print_str("//"); self.print_range_of_source_code( @@ -97,7 +112,6 @@ impl<'a> Codegen<'a> { ); self.print_soft_newline(); self.print_indent(); - comment_span.end } CommentKind::MultiLine => { self.print_str("/*"); @@ -106,10 +120,8 @@ impl<'a> Codegen<'a> { ); self.print_str("*/"); self.print_soft_space(); - comment_span.end + 2 } - }; - self.update_last_consumed_comment_end(real_comment_end); + } // FIXME: esbuild function `restoreExprStartFlags` self.start_of_default_export = self.code_len(); } @@ -118,25 +130,26 @@ impl<'a> Codegen<'a> { if !self.comment_options.preserve_annotate_comments { return; } - let annotation_kind_set = AnnotationKind::empty(); - + let mut annotation_kind_set = AnnotationKind::empty(); + if let Some(comments) = self.try_take_moved_comment(node_start) { + self.print_comments(&comments, &mut annotation_kind_set); + } let leading_annotate_comments = self.get_leading_annotate_comments(node_start); - self.print_comments(&leading_annotate_comments, annotation_kind_set); + self.print_comments(&leading_annotate_comments, &mut annotation_kind_set); } #[inline] pub(crate) fn print_comments( &mut self, leading_annotate_comment: &Vec, - mut annotation_kind_set: AnnotationKind, + annotation_kind_set: &mut AnnotationKind, ) { for &comment in leading_annotate_comment { let kind = comment.annotation_kind(); - if !annotation_kind_set.intersects(kind) { + if !annotation_kind_set.contains(kind) { annotation_kind_set.insert(kind); self.print_comment(comment); } - self.update_last_consumed_comment_end(comment.span().end); } } diff --git a/crates/oxc_codegen/src/gen.rs b/crates/oxc_codegen/src/gen.rs index aaea2df95574c..ec63891ce58f3 100644 --- a/crates/oxc_codegen/src/gen.rs +++ b/crates/oxc_codegen/src/gen.rs @@ -155,10 +155,6 @@ impl<'a> Gen for Statement<'a> { p.print_semicolon_after_statement(); } } - - if p.comment_options.preserve_annotate_comments { - p.update_last_consumed_comment_end(self.span().end); - } } } @@ -592,9 +588,17 @@ impl<'a> Gen for VariableDeclaration<'a> { } if p.comment_options.preserve_annotate_comments - && !matches!(self.kind, VariableDeclarationKind::Const) + && matches!(self.kind, VariableDeclarationKind::Const) { - p.update_last_consumed_comment_end(self.span.start); + if let Some(declarator) = self.declarations.first() { + if let Some(ref init) = declarator.init { + let leading_annotate_comments = + p.get_leading_annotate_comments(self.span.start); + if !leading_annotate_comments.is_empty() { + p.move_comments(init.span().start, leading_annotate_comments); + } + } + } } p.print_str(match self.kind { VariableDeclarationKind::Const => "const", @@ -864,9 +868,17 @@ impl<'a> Gen for ExportNamedDeclaration<'a> { p.gen_comments(self.span.start); } Some(Declaration::VariableDeclaration(var_decl)) - if !matches!(var_decl.kind, VariableDeclarationKind::Const) => + if matches!(var_decl.kind, VariableDeclarationKind::Const) => { - p.update_last_consumed_comment_end(self.span.start); + if let Some(declarator) = var_decl.declarations.first() { + if let Some(ref init) = declarator.init { + let leading_annotate_comments = + p.get_leading_annotate_comments(self.span.start); + if !leading_annotate_comments.is_empty() { + p.move_comments(init.span().start, leading_annotate_comments); + } + } + } } _ => {} }; @@ -1062,9 +1074,6 @@ impl<'a> GenExpr for Expression<'a> { Self::TSNonNullExpression(e) => e.gen_expr(p, precedence, ctx), Self::TSInstantiationExpression(e) => e.gen_expr(p, precedence, ctx), } - if p.comment_options.preserve_annotate_comments { - p.update_last_consumed_comment_end(self.span().end); - } } } @@ -1409,7 +1418,7 @@ impl<'a> GenExpr for CallExpression<'a> { wrap = true; } p.wrap(wrap, |p| { - p.print_comments(&annotate_comments, AnnotationKind::empty()); + p.print_comments(&annotate_comments, &mut AnnotationKind::empty()); p.add_source_mapping(self.span.start); self.callee.gen_expr(p, Precedence::Postfix, Context::empty()); if self.optional { @@ -2078,7 +2087,7 @@ impl<'a> GenExpr for NewExpression<'a> { wrap = true; } p.wrap(wrap, |p| { - p.print_comments(&annotate_comment, AnnotationKind::empty()); + p.print_comments(&annotate_comment, &mut AnnotationKind::empty()); p.print_space_before_identifier(); p.add_source_mapping(self.span.start); p.print_str("new "); diff --git a/crates/oxc_codegen/src/lib.rs b/crates/oxc_codegen/src/lib.rs index 8e0c16c811453..36072583d69fd 100644 --- a/crates/oxc_codegen/src/lib.rs +++ b/crates/oxc_codegen/src/lib.rs @@ -10,7 +10,7 @@ mod gen; mod operator; mod sourcemap_builder; -use std::{borrow::Cow, ops::Range}; +use std::{borrow::Cow, collections::hash_map::Entry, ops::Range}; use oxc_ast::{ ast::{BindingIdentifier, BlockStatement, Expression, IdentifierReference, Program, Statement}, @@ -23,6 +23,7 @@ use oxc_syntax::{ operator::{BinaryOperator, UnaryOperator, UpdateOperator}, precedence::Precedence, }; +use rustc_hash::FxHashMap; use crate::{ binary_expr_visitor::BinaryExpressionVisitor, operator::Operator, @@ -33,6 +34,8 @@ pub use crate::{ gen::{Gen, GenExpr}, }; +use self::annotation_comment::AnnotationComment; + /// Code generator without whitespace removal. pub type CodeGenerator<'a> = Codegen<'a>; @@ -95,7 +98,13 @@ pub struct Codegen<'a> { sourcemap_builder: Option, latest_consumed_comment_end: u32, + + /// The key of map is the node start position, + /// the first element of value is the start of the comment + /// the second element of value includes the end of the comment and comment kind. + move_comment_map: MoveCommentMap, } +pub(crate) type MoveCommentMap = FxHashMap>; impl<'a> Default for Codegen<'a> { fn default() -> Self { @@ -140,6 +149,7 @@ impl<'a> Codegen<'a> { quote: b'"', sourcemap_builder: None, latest_consumed_comment_end: 0, + move_comment_map: MoveCommentMap::default(), } } @@ -516,7 +526,38 @@ impl<'a> Codegen<'a> { self.code.extend_from_slice(self.source_text[range].as_bytes()); } - fn get_leading_comments(&self, start: u32, end: u32) -> impl Iterator + '_ { + fn get_leading_comments( + &self, + start: u32, + end: u32, + ) -> impl DoubleEndedIterator + '_ { self.trivias.comments_range(start..end) } + /// In some scenario, we want to move the comment that should be codegened to another position. + /// ```js + /// /* @__NO_SIDE_EFFECTS__ */ export const a = function() { + /// + /// }, b = 10000; + /// + /// ``` + /// should generate such output: + /// ```js + /// export const /* @__NO_SIDE_EFFECTS__ */ a = function() { + /// + /// }, b = 10000; + /// ``` + fn move_comments(&mut self, position: u32, full_comment_infos: Vec) { + match self.move_comment_map.entry(position) { + Entry::Occupied(mut occ) => { + occ.get_mut().extend(full_comment_infos); + } + Entry::Vacant(vac) => { + vac.insert(full_comment_infos); + } + } + } + + fn try_take_moved_comment(&mut self, node_start: u32) -> Option> { + self.move_comment_map.remove(&node_start) + } }