diff --git a/crates/rome_formatter/src/format_element.rs b/crates/rome_formatter/src/format_element.rs index 92452d93bbc..0b200311312 100644 --- a/crates/rome_formatter/src/format_element.rs +++ b/crates/rome_formatter/src/format_element.rs @@ -1,6 +1,6 @@ -use crate::format_elements; use crate::intersperse::{Intersperse, IntersperseFn}; -use rome_rowan::{Language, SyntaxNode, SyntaxToken, SyntaxTriviaPieceComments, TextRange}; +use crate::{format_elements, TextSize}; +use rome_rowan::{Language, SyntaxNode, SyntaxToken, SyntaxTriviaPieceComments}; use std::borrow::Cow; use std::fmt::{self, Debug, Formatter}; use std::ops::Deref; @@ -823,46 +823,29 @@ where L: Language, { /// Get the number of line breaks between two consecutive SyntaxNodes in the tree - fn get_lines_between_nodes( - prev_node: &SyntaxNode, - next_node: &SyntaxNode, - ) -> usize { - // Ensure the two nodes are actually siblings on debug - debug_assert_eq!(prev_node.next_sibling().as_ref(), Some(next_node)); - debug_assert_eq!(next_node.prev_sibling().as_ref(), Some(prev_node)); - - // Count the lines separating the two statements, - // starting with the trailing trivia of the previous node - let mut line_count = prev_node - .last_trailing_trivia() - .and_then(|prev_token| { - // Newline pieces can only come last in trailing trivias, skip to it directly - prev_token.pieces().next_back()?.as_newline() - }) - .is_some() as usize; - - // Then add the newlines in the leading trivia of the next node + fn get_lines_before(next_node: &SyntaxNode) -> usize { + // Count the newlines in the leading trivia of the next node if let Some(leading_trivia) = next_node.first_leading_trivia() { - for piece in leading_trivia.pieces() { - if piece.is_newline() { - line_count += 1; - } else if piece.is_comments() { + leading_trivia + .pieces() + .take_while(|piece| { // Stop at the first comment piece, the comment printer // will handle newlines between the comment and the node - break; - } - } + !piece.is_comments() + }) + .filter(|piece| piece.is_newline()) + .count() + } else { + 0 } - - line_count } concat_elements(IntersperseFn::new( elements.into_iter(), - |prev_node, next_node, next_elem| { + |_, next_node, next_elem| { if next_elem.is_empty() { empty_element() - } else if get_lines_between_nodes(prev_node, next_node) > 1 { + } else if get_lines_before(next_node) > 1 { empty_line() } else { separator() @@ -928,10 +911,8 @@ pub enum VerbatimKind { Unknown, Suppressed, Verbatim { - /// the range that belongs to the node/token formatted verbatim - range: TextRange, - /// the text that belongs to the node/token formatted verbatim - text: String, + /// the length of the formatted node + length: TextSize, }, } @@ -945,10 +926,10 @@ pub struct Verbatim { } impl Verbatim { - pub fn new_verbatim(element: FormatElement, text: String, range: TextRange) -> Self { + pub fn new_verbatim(element: FormatElement, length: TextSize) -> Self { Self { element: Box::new(element), - kind: VerbatimKind::Verbatim { range, text }, + kind: VerbatimKind::Verbatim { length }, } } @@ -1139,7 +1120,12 @@ pub enum Token { Static { text: &'static str }, /// Token constructed from the input source as a dynamics /// string and a range of the input source - Dynamic { text: String, source: TextRange }, + Dynamic { + // There's no need for the text to be mutable, using `Box` safes 8 bytes over `String`. + text: Box, + // The position of the dynamic token in the unformatted source code + source_position: TextSize, + }, } impl Debug for Token { @@ -1147,8 +1133,8 @@ impl Debug for Token { // This does not use debug_tuple so the tokens are // written on a single line even when pretty-printing match self { - Token::Static { text } => write!(fmt, "Token({:?})", text), - Token::Dynamic { text, source } => write!(fmt, "Token({:?}, {:?})", text, source), + Token::Static { text } => write!(fmt, "StaticToken({:?})", text), + Token::Dynamic { text, .. } => write!(fmt, "DynamicToken({:?})", text), } } } @@ -1160,17 +1146,22 @@ impl Token { } /// Create a token from a dynamic string and a range of the input source - pub fn new_dynamic(text: String, source: TextRange) -> Self { + pub fn new_dynamic(text: String, position: TextSize) -> Self { debug_assert!(!text.contains('\r'), "The content '{}' contains an unsupported '\\r' line terminator character but string tokens must only use line feeds '\\n' as line separator. Use '\\n' instead of '\\r' and '\\r\\n' to insert a line break in strings.", text); - Self::Dynamic { text, source } + Self::Dynamic { + text: text.into_boxed_str(), + source_position: position, + } } /// Get the range of the input source covered by this token, /// or None if the token was synthesized by the formatter - pub fn source(&self) -> Option<&TextRange> { + pub fn source(&self) -> Option<&TextSize> { match self { Token::Static { .. } => None, - Token::Dynamic { source, .. } => Some(source), + Token::Dynamic { + source_position, .. + } => Some(source_position), } } } @@ -1190,7 +1181,10 @@ impl From> for Token { impl<'a, L: Language> From<&'a SyntaxToken> for Token { fn from(token: &'a SyntaxToken) -> Self { - Self::new_dynamic(token.text_trimmed().into(), token.text_trimmed_range()) + Self::new_dynamic( + token.text_trimmed().into(), + token.text_trimmed_range().start(), + ) } } @@ -1230,7 +1224,7 @@ impl From> for Token { fn from(trivia: SyntaxTriviaPieceComments) -> Self { Self::new_dynamic( normalize_newlines(trivia.text().trim(), LINE_TERMINATORS).into_owned(), - trivia.text_range(), + trivia.text_range().start(), ) } } @@ -1398,9 +1392,10 @@ impl From> for FormatElement { mod tests { use crate::format_element::{ - empty_element, join_elements, normalize_newlines, List, LINE_TERMINATORS, + empty_element, join_elements, normalize_newlines, ConditionalGroupContent, List, + VerbatimKind, LINE_TERMINATORS, }; - use crate::{concat_elements, space_token, token, FormatElement}; + use crate::{concat_elements, space_token, token, FormatElement, TextRange, Token, Verbatim}; #[test] fn concat_elements_returns_a_list_token_containing_the_passed_in_elements() { @@ -1516,4 +1511,19 @@ mod tests { assert_eq!(normalize_newlines("a\u{2028}b", LINE_TERMINATORS), "a\nb"); assert_eq!(normalize_newlines("a\u{2029}b", LINE_TERMINATORS), "a\nb"); } + + #[test] + fn sizes() { + assert_eq!(8, std::mem::size_of::()); + assert_eq!(8, std::mem::size_of::()); + assert_eq!(16, std::mem::size_of::()); + assert_eq!(24, std::mem::size_of::()); + assert_eq!(16, std::mem::size_of::()); + assert_eq!(24, std::mem::size_of::()); + // Increasing the size of FormatElement has serious consequences on runtime performance and memory footprint. + // Is there a more efficient way to encode the data to avoid increasing its size? Can the information + // be recomputed at a later point in time? + // You reduced the size of a format element? Excellent work! + assert_eq!(32, std::mem::size_of::()); + } } diff --git a/crates/rome_formatter/src/lib.rs b/crates/rome_formatter/src/lib.rs index f6e51e5c866..09205d205cc 100644 --- a/crates/rome_formatter/src/lib.rs +++ b/crates/rome_formatter/src/lib.rs @@ -203,7 +203,7 @@ pub struct Formatted { code: String, range: Option, sourcemap: Vec, - verbatim_source: Vec<(String, TextRange)>, + verbatim_ranges: Vec, } impl Formatted { @@ -211,13 +211,13 @@ impl Formatted { code: String, range: Option, sourcemap: Vec, - verbatim_source: Vec<(String, TextRange)>, + verbatim_source: Vec, ) -> Self { Self { code, range, sourcemap, - verbatim_source, + verbatim_ranges: verbatim_source, } } @@ -227,7 +227,7 @@ impl Formatted { code: String::new(), range: None, sourcemap: Vec::new(), - verbatim_source: Vec::new(), + verbatim_ranges: Vec::new(), } } @@ -259,12 +259,16 @@ impl Formatted { self.code } - pub fn verbatim(&self) -> &[(String, TextRange)] { - &self.verbatim_source + /// The text in the formatted code that has been formatted as verbatim. + pub fn verbatim(&self) -> impl Iterator { + self.verbatim_ranges + .iter() + .map(|range| (*range, &self.code[*range])) } - pub fn into_verbatim(self) -> Vec<(String, TextRange)> { - self.verbatim_source + /// Ranges of the formatted code that have been formatted as verbatim. + pub fn verbatim_ranges(&self) -> &[TextRange] { + &self.verbatim_ranges } } diff --git a/crates/rome_formatter/src/printer.rs b/crates/rome_formatter/src/printer.rs index 8481d3c3832..d8392d030f2 100644 --- a/crates/rome_formatter/src/printer.rs +++ b/crates/rome_formatter/src/printer.rs @@ -171,9 +171,9 @@ impl<'a> Printer<'a> { self.state.pending_space = false; } - if let Some(range) = token.source() { + if let Some(source) = token.source() { self.state.source_markers.push(SourceMarker { - source: range.start(), + source: *source, dest: TextSize::from(self.state.buffer.len() as u32), }); } @@ -271,8 +271,11 @@ impl<'a> Printer<'a> { } FormatElement::Verbatim(verbatim) => { - if let VerbatimKind::Verbatim { range, text } = &verbatim.kind { - self.state.verbatim_markers.push((text.clone(), *range)); + if let VerbatimKind::Verbatim { length } = &verbatim.kind { + self.state.verbatim_markers.push(TextRange::at( + TextSize::from(self.state.buffer.len() as u32), + *length, + )); } queue.enqueue(PrintElementCall::new(&verbatim.element, args)); @@ -499,9 +502,8 @@ struct PrinterState<'a> { generated_column: usize, line_width: usize, has_empty_line: bool, - // mappings: Mapping[]; line_suffixes: Vec>, - verbatim_markers: Vec<(String, TextRange)>, + verbatim_markers: Vec, } impl<'a> PrinterState<'a> { diff --git a/crates/rome_js_formatter/src/formatter.rs b/crates/rome_js_formatter/src/formatter.rs index bc8f33d74a9..60375a89600 100644 --- a/crates/rome_js_formatter/src/formatter.rs +++ b/crates/rome_js_formatter/src/formatter.rs @@ -433,7 +433,7 @@ impl Formatter { let text = &token.text()[relative_skipped_range]; elements.push(FormatElement::from(Token::new_dynamic( text.to_string(), - skipped_trivia_range, + skipped_trivia_range.start(), ))); // `print_trailing_trivia_pieces` and `format_leading_trivia_pieces` remove any whitespace except @@ -604,17 +604,13 @@ impl Formatter { /// "mess up" the developers, yet incomplete, work or accidentally introduce new syntax errors. /// /// You may be inclined to call `node.text` directly. However, using `text` doesn't track the nodes - ///nor its children source mapping information, resulting in incorrect source maps for this subtree. + /// nor its children source mapping information, resulting in incorrect source maps for this subtree. /// /// These nodes and tokens get tracked as [FormatElement::Verbatim], useful to understand /// if these nodes still need to have their own implementation. pub fn format_verbatim(&self, node: &JsSyntaxNode) -> FormatElement { let verbatim = self.format_verbatim_node_or_token(node); - FormatElement::Verbatim(Verbatim::new_verbatim( - verbatim, - node.to_string(), - node.text_range(), - )) + FormatElement::Verbatim(Verbatim::new_verbatim(verbatim, node.text_range().len())) } /// Formats unknown nodes. The difference between this method and `format_verbatim` is that this method @@ -653,7 +649,7 @@ impl Formatter { fn trivia_token(piece: SyntaxTriviaPiece) -> Token { Token::new_dynamic( normalize_newlines(piece.text(), LINE_TERMINATORS).into_owned(), - piece.text_range(), + piece.text_range().start(), ) } @@ -666,7 +662,7 @@ impl Formatter { let content = Token::new_dynamic( normalize_newlines(&node.text_trimmed().to_string(), LINE_TERMINATORS).into_owned(), - node.text_trimmed_range(), + node.text_trimmed_range().start(), ); // Clippy false positive: SkipWhile does not implement DoubleEndedIterator diff --git a/crates/rome_js_formatter/src/lib.rs b/crates/rome_js_formatter/src/lib.rs index d9fa68ee08a..0b0317658d9 100644 --- a/crates/rome_js_formatter/src/lib.rs +++ b/crates/rome_js_formatter/src/lib.rs @@ -222,13 +222,13 @@ pub fn format_range( let input_range = TextRange::new(start_source, end_source); let output_range = TextRange::new(start_dest, end_dest); let sourcemap = Vec::from(formatted.sourcemap()); - let verbatim = Vec::from(formatted.verbatim()); + let verbatim_ranges = Vec::from(formatted.verbatim_ranges()); let code = &formatted.into_code()[output_range]; Ok(Formatted::new( code.into(), Some(input_range), sourcemap, - verbatim, + verbatim_ranges, )) } @@ -294,12 +294,12 @@ pub fn format_node(options: FormatOptions, root: &JsSyntaxNode) -> FormatResult< let element = Formatter::new(options).format_root(root)?; let formatted = Printer::new(options).print_with_indent(&element, initial_indent); let sourcemap = Vec::from(formatted.sourcemap()); - let verbatim = Vec::from(formatted.verbatim()); + let verbatim_ranges = Vec::from(formatted.verbatim_ranges()); Ok(Formatted::new( formatted.into_code(), Some(root.text_range()), sourcemap, - verbatim, + verbatim_ranges, )) } diff --git a/crates/rome_js_formatter/src/ts/types/intersection_type.rs b/crates/rome_js_formatter/src/ts/types/intersection_type.rs index 997ed79a78f..695f1dc79b9 100644 --- a/crates/rome_js_formatter/src/ts/types/intersection_type.rs +++ b/crates/rome_js_formatter/src/ts/types/intersection_type.rs @@ -21,7 +21,7 @@ impl ToFormatElement for TsIntersectionType { // if_group_breaks block to avoid removing comments when the // group does not break let replaced = - if_group_breaks(format_elements![Token::from(token.clone()), space_token()]); + if_group_breaks(format_elements![Token::from(&token), space_token()]); formatter.format_replaced(&token, replaced) } None => if_group_breaks(format_elements![token("&"), space_token()]), diff --git a/crates/rome_js_formatter/src/ts/types/union_type.rs b/crates/rome_js_formatter/src/ts/types/union_type.rs index 07bac292787..96a019a089f 100644 --- a/crates/rome_js_formatter/src/ts/types/union_type.rs +++ b/crates/rome_js_formatter/src/ts/types/union_type.rs @@ -21,7 +21,7 @@ impl ToFormatElement for TsUnionType { // if_group_breaks block to avoid removing comments when the // group does not break let replaced = - if_group_breaks(format_elements![Token::from(token.clone()), space_token()]); + if_group_breaks(format_elements![Token::from(&token), space_token()]); formatter.format_replaced(&token, replaced) } None => if_group_breaks(format_elements![token("|"), space_token()]), diff --git a/crates/rome_js_formatter/src/utils/mod.rs b/crates/rome_js_formatter/src/utils/mod.rs index e909266f13c..2a9f55b418b 100644 --- a/crates/rome_js_formatter/src/utils/mod.rs +++ b/crates/rome_js_formatter/src/utils/mod.rs @@ -390,7 +390,7 @@ pub(crate) fn format_template_chunk( &chunk, FormatElement::from(Token::new_dynamic( normalize_newlines(chunk.text_trimmed(), ['\r']).into_owned(), - chunk.text_trimmed_range(), + chunk.text_trimmed_range().start(), )), )) } @@ -629,6 +629,6 @@ pub(crate) fn format_string_literal_token( formatter.format_replaced( &token, - Token::new_dynamic(content, token.text_trimmed_range()).into(), + Token::new_dynamic(content, token.text_trimmed_range().start()).into(), ) } diff --git a/crates/rome_js_formatter/tests/spec_test.rs b/crates/rome_js_formatter/tests/spec_test.rs index 2ba71bc6d92..ed57074d797 100644 --- a/crates/rome_js_formatter/tests/spec_test.rs +++ b/crates/rome_js_formatter/tests/spec_test.rs @@ -88,13 +88,12 @@ impl SnapshotContent { fn add_output(&mut self, formatted: Formatted, options: FormatOptions) { let code = formatted.as_code(); let mut output: String = code.to_string(); - if !formatted.verbatim().is_empty() { + if !formatted.verbatim_ranges().is_empty() { output.push_str("\n\n"); output.push_str("## Unimplemented nodes/tokens"); output.push_str("\n\n"); - for (text, range) in formatted.verbatim() { - let string = format!("{:?} => {:?}\n", text, range); - output.push_str(string.as_str()); + for (range, text) in formatted.verbatim() { + output.push_str(&format!("{:?} => {:?}\n", text, range)); } }