From 3c88c8e42adf7faf769151aed2048ee2b55960fc Mon Sep 17 00:00:00 2001 From: Micha Reiser Date: Fri, 15 Apr 2022 10:32:51 +0200 Subject: [PATCH] perf(rome_js_formatter): Reduce `FormatElement` size This PR reduces the size of a `FormatElement` from 56 to 32 bytes by: * Using a `Box` to store a dynamic token's string as it doesn't need to be mutable (safes 8 bytes for the `capacity`) * Only storing the start position for a `DynamicToken` because the length isn't used by the printer * Change the semantics of `verbatim_ranges` to store the ranges in the **formatted** string. The ranges in the formatted output should be sufficient for debugging and it allows to resolve the `String` rather than having to store it on the `FormatElement` (range can be computed in the printer). --- crates/rome_formatter/src/format_element.rs | 132 +++++++++++------- crates/rome_formatter/src/lib.rs | 20 +-- crates/rome_formatter/src/printer.rs | 67 ++++----- crates/rome_js_formatter/src/formatter.rs | 14 +- .../rome_js_formatter/src/formatter_traits.rs | 3 +- crates/rome_js_formatter/src/lib.rs | 8 +- .../src/ts/types/intersection_type.rs | 2 +- .../src/ts/types/union_type.rs | 2 +- crates/rome_js_formatter/src/utils/mod.rs | 8 +- crates/rome_js_formatter/tests/spec_test.rs | 10 +- 10 files changed, 147 insertions(+), 119 deletions(-) diff --git a/crates/rome_formatter/src/format_element.rs b/crates/rome_formatter/src/format_element.rs index 92452d93bbc7..2219dbc87fe1 100644 --- a/crates/rome_formatter/src/format_element.rs +++ b/crates/rome_formatter/src/format_element.rs @@ -1,6 +1,6 @@ -use crate::format_elements; use crate::intersperse::{Intersperse, IntersperseFn}; -use rome_rowan::{Language, SyntaxNode, SyntaxToken, SyntaxTriviaPieceComments, TextRange}; +use crate::{format_elements, TextSize}; +use rome_rowan::{Language, SyntaxNode, SyntaxToken, SyntaxTriviaPieceComments}; use std::borrow::Cow; use std::fmt::{self, Debug, Formatter}; use std::ops::Deref; @@ -245,27 +245,34 @@ pub fn concat_elements(elements: I) -> FormatElement where I: IntoIterator, { - let mut elements = elements.into_iter(); + let elements = elements.into_iter(); let (lower_bound, upper_bound) = elements.size_hint(); let size_hint = upper_bound.unwrap_or(lower_bound); + let mut elements = elements.filter(|element| !element.is_empty()).peekable(); + + let first = match elements.next() { + Some(element) => element, + None => return empty_element(), + }; + + if elements.peek().is_none() { + return first; + } + // If the first non empty element is a vec, use it, // otherwise create a new one with the current element - let mut concatenated = loop { - match elements.next() { - Some(FormatElement::Empty) => continue, - Some(FormatElement::List(list)) => { - let mut v = list.content; - v.reserve(size_hint); - break v; - } - Some(element) => { - let mut v = Vec::with_capacity(size_hint); - v.push(element); - break v; - } - None => return empty_element(), + let mut concatenated = match first { + FormatElement::List(list) => { + let mut v = list.content; + v.reserve(size_hint - 1); + v + } + element => { + let mut v = Vec::with_capacity(size_hint); + v.push(element); + v } }; @@ -273,18 +280,11 @@ where for element in elements { match element { FormatElement::List(list) => concatenated.extend(list.content), - FormatElement::Empty => {} element => concatenated.push(element), } } - if concatenated.is_empty() { - empty_element() - } else if concatenated.len() == 1 { - concatenated.pop().unwrap() - } else { - FormatElement::from(List::new(concatenated)) - } + FormatElement::from(List::new(concatenated)) } /// Concatenates a list of [FormatElement]s with spaces and line breaks to fit @@ -928,10 +928,8 @@ pub enum VerbatimKind { Unknown, Suppressed, Verbatim { - /// the range that belongs to the node/token formatted verbatim - range: TextRange, - /// the text that belongs to the node/token formatted verbatim - text: String, + /// the length of the formatted node + length: TextSize, }, } @@ -945,10 +943,10 @@ pub struct Verbatim { } impl Verbatim { - pub fn new_verbatim(element: FormatElement, text: String, range: TextRange) -> Self { + pub fn new_verbatim(element: FormatElement, length: TextSize) -> Self { Self { element: Box::new(element), - kind: VerbatimKind::Verbatim { range, text }, + kind: VerbatimKind::Verbatim { length }, } } @@ -1132,25 +1130,18 @@ impl ConditionalGroupContent { } } -/// See [token] for documentation #[derive(Eq, Clone)] pub enum Token { /// Token constructed by the formatter from a static string Static { text: &'static str }, /// Token constructed from the input source as a dynamics /// string and a range of the input source - Dynamic { text: String, source: TextRange }, -} - -impl Debug for Token { - fn fmt(&self, fmt: &mut Formatter) -> fmt::Result { - // This does not use debug_tuple so the tokens are - // written on a single line even when pretty-printing - match self { - Token::Static { text } => write!(fmt, "Token({:?})", text), - Token::Dynamic { text, source } => write!(fmt, "Token({:?}, {:?})", text, source), - } - } + Dynamic { + // There's no need for the text to be mutable, using `Box` safes 8 bytes over `String`. + text: Box, + // The position of the dynamic token in the unformatted source code + source_position: TextSize, + }, } impl Token { @@ -1160,17 +1151,33 @@ impl Token { } /// Create a token from a dynamic string and a range of the input source - pub fn new_dynamic(text: String, source: TextRange) -> Self { + pub fn new_dynamic(text: String, position: TextSize) -> Self { debug_assert!(!text.contains('\r'), "The content '{}' contains an unsupported '\\r' line terminator character but string tokens must only use line feeds '\\n' as line separator. Use '\\n' instead of '\\r' and '\\r\\n' to insert a line break in strings.", text); - Self::Dynamic { text, source } + Self::Dynamic { + text: text.into_boxed_str(), + source_position: position, + } } /// Get the range of the input source covered by this token, /// or None if the token was synthesized by the formatter - pub fn source(&self) -> Option<&TextRange> { + pub fn source(&self) -> Option<&TextSize> { match self { Token::Static { .. } => None, - Token::Dynamic { source, .. } => Some(source), + Token::Dynamic { + source_position, .. + } => Some(source_position), + } + } +} + +impl Debug for Token { + fn fmt(&self, fmt: &mut Formatter) -> fmt::Result { + // This does not use debug_tuple so the tokens are + // written on a single line even when pretty-printing + match self { + Token::Static { text } => write!(fmt, "StaticToken({:?})", text), + Token::Dynamic { text, .. } => write!(fmt, "DynamicToken({:?})", text), } } } @@ -1190,7 +1197,10 @@ impl From> for Token { impl<'a, L: Language> From<&'a SyntaxToken> for Token { fn from(token: &'a SyntaxToken) -> Self { - Self::new_dynamic(token.text_trimmed().into(), token.text_trimmed_range()) + Self::new_dynamic( + token.text_trimmed().into(), + token.text_trimmed_range().start(), + ) } } @@ -1230,7 +1240,7 @@ impl From> for Token { fn from(trivia: SyntaxTriviaPieceComments) -> Self { Self::new_dynamic( normalize_newlines(trivia.text().trim(), LINE_TERMINATORS).into_owned(), - trivia.text_range(), + trivia.text_range().start(), ) } } @@ -1239,8 +1249,8 @@ impl Deref for Token { type Target = str; fn deref(&self) -> &Self::Target { match self { - Token::Static { text } => text, Token::Dynamic { text, .. } => text, + Token::Static { text } => text, } } } @@ -1398,9 +1408,10 @@ impl From> for FormatElement { mod tests { use crate::format_element::{ - empty_element, join_elements, normalize_newlines, List, LINE_TERMINATORS, + empty_element, join_elements, normalize_newlines, ConditionalGroupContent, List, + VerbatimKind, LINE_TERMINATORS, }; - use crate::{concat_elements, space_token, token, FormatElement}; + use crate::{concat_elements, space_token, token, FormatElement, TextRange, Token, Verbatim}; #[test] fn concat_elements_returns_a_list_token_containing_the_passed_in_elements() { @@ -1516,4 +1527,19 @@ mod tests { assert_eq!(normalize_newlines("a\u{2028}b", LINE_TERMINATORS), "a\nb"); assert_eq!(normalize_newlines("a\u{2029}b", LINE_TERMINATORS), "a\nb"); } + + #[test] + fn sizes() { + assert_eq!(8, std::mem::size_of::()); + assert_eq!(8, std::mem::size_of::()); + assert_eq!(16, std::mem::size_of::()); + assert_eq!(24, std::mem::size_of::()); + assert_eq!(16, std::mem::size_of::()); + assert_eq!(24, std::mem::size_of::()); + // Increasing the size of FormatElement has serious consequences on runtime performance and memory footprint. + // Is there a more efficient way to encode the data to avoid increasing its size? Can the information + // be recomputed at a later point in time? + // You reduced the size of a format element? Excellent work! + assert_eq!(32, std::mem::size_of::()); + } } diff --git a/crates/rome_formatter/src/lib.rs b/crates/rome_formatter/src/lib.rs index f6e51e5c866c..09205d205cc5 100644 --- a/crates/rome_formatter/src/lib.rs +++ b/crates/rome_formatter/src/lib.rs @@ -203,7 +203,7 @@ pub struct Formatted { code: String, range: Option, sourcemap: Vec, - verbatim_source: Vec<(String, TextRange)>, + verbatim_ranges: Vec, } impl Formatted { @@ -211,13 +211,13 @@ impl Formatted { code: String, range: Option, sourcemap: Vec, - verbatim_source: Vec<(String, TextRange)>, + verbatim_source: Vec, ) -> Self { Self { code, range, sourcemap, - verbatim_source, + verbatim_ranges: verbatim_source, } } @@ -227,7 +227,7 @@ impl Formatted { code: String::new(), range: None, sourcemap: Vec::new(), - verbatim_source: Vec::new(), + verbatim_ranges: Vec::new(), } } @@ -259,12 +259,16 @@ impl Formatted { self.code } - pub fn verbatim(&self) -> &[(String, TextRange)] { - &self.verbatim_source + /// The text in the formatted code that has been formatted as verbatim. + pub fn verbatim(&self) -> impl Iterator { + self.verbatim_ranges + .iter() + .map(|range| (*range, &self.code[*range])) } - pub fn into_verbatim(self) -> Vec<(String, TextRange)> { - self.verbatim_source + /// Ranges of the formatted code that have been formatted as verbatim. + pub fn verbatim_ranges(&self) -> &[TextRange] { + &self.verbatim_ranges } } diff --git a/crates/rome_formatter/src/printer.rs b/crates/rome_formatter/src/printer.rs index 8481d3c38320..df096108a5c5 100644 --- a/crates/rome_formatter/src/printer.rs +++ b/crates/rome_formatter/src/printer.rs @@ -153,34 +153,7 @@ impl<'a> Printer<'a> { } } FormatElement::Empty => {} - FormatElement::Token(token) => { - // Print pending indention - if self.state.pending_indent > 0 { - self.print_str( - self.options - .indent_string - .repeat(self.state.pending_indent as usize) - .as_str(), - ); - self.state.pending_indent = 0; - } - - // Print pending spaces - if self.state.pending_space { - self.print_str(" "); - self.state.pending_space = false; - } - - if let Some(range) = token.source() { - self.state.source_markers.push(SourceMarker { - source: range.start(), - dest: TextSize::from(self.state.buffer.len() as u32), - }); - } - - self.print_str(token); - } - + FormatElement::Token(token) => self.print_token(token, token.source()), FormatElement::HardGroup(group) => queue.enqueue(PrintElementCall::new( group.content.as_ref(), args.with_hard_group(true), @@ -271,8 +244,11 @@ impl<'a> Printer<'a> { } FormatElement::Verbatim(verbatim) => { - if let VerbatimKind::Verbatim { range, text } = &verbatim.kind { - self.state.verbatim_markers.push((text.clone(), *range)); + if let VerbatimKind::Verbatim { length } = &verbatim.kind { + self.state.verbatim_markers.push(TextRange::at( + TextSize::from(self.state.buffer.len() as u32), + *length, + )); } queue.enqueue(PrintElementCall::new(&verbatim.element, args)); @@ -280,6 +256,34 @@ impl<'a> Printer<'a> { } } + fn print_token(&mut self, token: &str, source: Option<&TextSize>) { + // Print pending indention + if self.state.pending_indent > 0 { + self.print_str( + self.options + .indent_string + .repeat(self.state.pending_indent as usize) + .as_str(), + ); + self.state.pending_indent = 0; + } + + // Print pending spaces + if self.state.pending_space { + self.print_str(" "); + self.state.pending_space = false; + } + + if let Some(source) = source { + self.state.source_markers.push(SourceMarker { + source: *source, + dest: TextSize::from(self.state.buffer.len() as u32), + }); + } + + self.print_str(token); + } + /// Tries to print an element without any line breaks. Reverts any made `state` changes (by this function) /// and returns with a [LineBreakRequiredError] if the `element` contains any hard line breaks /// or printing the group exceeds the configured maximal print width. @@ -499,9 +503,8 @@ struct PrinterState<'a> { generated_column: usize, line_width: usize, has_empty_line: bool, - // mappings: Mapping[]; line_suffixes: Vec>, - verbatim_markers: Vec<(String, TextRange)>, + verbatim_markers: Vec, } impl<'a> PrinterState<'a> { diff --git a/crates/rome_js_formatter/src/formatter.rs b/crates/rome_js_formatter/src/formatter.rs index 183fe310877f..48fb9f0eda5a 100644 --- a/crates/rome_js_formatter/src/formatter.rs +++ b/crates/rome_js_formatter/src/formatter.rs @@ -433,7 +433,7 @@ impl Formatter { let text = &token.text()[relative_skipped_range]; elements.push(FormatElement::from(Token::new_dynamic( text.to_string(), - skipped_trivia_range, + skipped_trivia_range.start(), ))); // `print_trailing_trivia_pieces` and `format_leading_trivia_pieces` remove any whitespace except @@ -604,17 +604,13 @@ impl Formatter { /// "mess up" the developers, yet incomplete, work or accidentally introduce new syntax errors. /// /// You may be inclined to call `node.text` directly. However, using `text` doesn't track the nodes - ///nor its children source mapping information, resulting in incorrect source maps for this subtree. + /// nor its children source mapping information, resulting in incorrect source maps for this subtree. /// /// These nodes and tokens get tracked as [FormatElement::Verbatim], useful to understand /// if these nodes still need to have their own implementation. pub fn format_verbatim(&self, node: &JsSyntaxNode) -> FormatElement { let verbatim = self.format_verbatim_node_or_token(node); - FormatElement::Verbatim(Verbatim::new_verbatim( - verbatim, - node.to_string(), - node.text_range(), - )) + FormatElement::Verbatim(Verbatim::new_verbatim(verbatim, node.text_range().len())) } /// Formats unknown nodes. The difference between this method and `format_verbatim` is that this method @@ -653,7 +649,7 @@ impl Formatter { fn trivia_token(piece: SyntaxTriviaPiece) -> Token { Token::new_dynamic( normalize_newlines(piece.text(), LINE_TERMINATORS).into_owned(), - piece.text_range(), + piece.text_range().start(), ) } @@ -666,7 +662,7 @@ impl Formatter { let content = Token::new_dynamic( normalize_newlines(&node.text_trimmed().to_string(), LINE_TERMINATORS).into_owned(), - node.text_trimmed_range(), + node.text_trimmed_range().start(), ); // Clippy false positive: SkipWhile does not implement DoubleEndedIterator diff --git a/crates/rome_js_formatter/src/formatter_traits.rs b/crates/rome_js_formatter/src/formatter_traits.rs index a76d6dd65427..877f0c208195 100644 --- a/crates/rome_js_formatter/src/formatter_traits.rs +++ b/crates/rome_js_formatter/src/formatter_traits.rs @@ -1,8 +1,7 @@ use crate::formatter::TriviaPrintMode; use crate::utils::has_formatter_suppressions; -use crate::Token; use crate::{ - empty_element, format_elements, FormatElement, FormatResult, Formatter, ToFormatElement, + empty_element, format_elements, FormatElement, FormatResult, Formatter, ToFormatElement, Token, }; use rome_js_syntax::{JsLanguage, JsSyntaxToken}; use rome_rowan::{AstNode, SyntaxResult}; diff --git a/crates/rome_js_formatter/src/lib.rs b/crates/rome_js_formatter/src/lib.rs index d9fa68ee08af..0b0317658d99 100644 --- a/crates/rome_js_formatter/src/lib.rs +++ b/crates/rome_js_formatter/src/lib.rs @@ -222,13 +222,13 @@ pub fn format_range( let input_range = TextRange::new(start_source, end_source); let output_range = TextRange::new(start_dest, end_dest); let sourcemap = Vec::from(formatted.sourcemap()); - let verbatim = Vec::from(formatted.verbatim()); + let verbatim_ranges = Vec::from(formatted.verbatim_ranges()); let code = &formatted.into_code()[output_range]; Ok(Formatted::new( code.into(), Some(input_range), sourcemap, - verbatim, + verbatim_ranges, )) } @@ -294,12 +294,12 @@ pub fn format_node(options: FormatOptions, root: &JsSyntaxNode) -> FormatResult< let element = Formatter::new(options).format_root(root)?; let formatted = Printer::new(options).print_with_indent(&element, initial_indent); let sourcemap = Vec::from(formatted.sourcemap()); - let verbatim = Vec::from(formatted.verbatim()); + let verbatim_ranges = Vec::from(formatted.verbatim_ranges()); Ok(Formatted::new( formatted.into_code(), Some(root.text_range()), sourcemap, - verbatim, + verbatim_ranges, )) } diff --git a/crates/rome_js_formatter/src/ts/types/intersection_type.rs b/crates/rome_js_formatter/src/ts/types/intersection_type.rs index 997ed79a78f3..695f1dc79b95 100644 --- a/crates/rome_js_formatter/src/ts/types/intersection_type.rs +++ b/crates/rome_js_formatter/src/ts/types/intersection_type.rs @@ -21,7 +21,7 @@ impl ToFormatElement for TsIntersectionType { // if_group_breaks block to avoid removing comments when the // group does not break let replaced = - if_group_breaks(format_elements![Token::from(token.clone()), space_token()]); + if_group_breaks(format_elements![Token::from(&token), space_token()]); formatter.format_replaced(&token, replaced) } None => if_group_breaks(format_elements![token("&"), space_token()]), diff --git a/crates/rome_js_formatter/src/ts/types/union_type.rs b/crates/rome_js_formatter/src/ts/types/union_type.rs index 07bac292787b..96a019a089fb 100644 --- a/crates/rome_js_formatter/src/ts/types/union_type.rs +++ b/crates/rome_js_formatter/src/ts/types/union_type.rs @@ -21,7 +21,7 @@ impl ToFormatElement for TsUnionType { // if_group_breaks block to avoid removing comments when the // group does not break let replaced = - if_group_breaks(format_elements![Token::from(token.clone()), space_token()]); + if_group_breaks(format_elements![Token::from(&token), space_token()]); formatter.format_replaced(&token, replaced) } None => if_group_breaks(format_elements![token("|"), space_token()]), diff --git a/crates/rome_js_formatter/src/utils/mod.rs b/crates/rome_js_formatter/src/utils/mod.rs index a7f49dcbcca5..a3775fd5d3ac 100644 --- a/crates/rome_js_formatter/src/utils/mod.rs +++ b/crates/rome_js_formatter/src/utils/mod.rs @@ -7,12 +7,12 @@ mod simple; use crate::formatter_traits::{FormatOptionalTokenAndNode, FormatTokenAndNode}; use crate::{ empty_element, empty_line, format_elements, hard_group_elements, space_token, token, - FormatElement, FormatResult, Formatter, QuoteStyle, Token, + FormatElement, FormatResult, Formatter, QuoteStyle, }; pub(crate) use binary_like_expression::{format_binary_like_expression, JsAnyBinaryLikeExpression}; pub(crate) use call_expression::format_call_expression; pub(crate) use format_conditional::{format_conditional, Conditional}; -use rome_formatter::normalize_newlines; +use rome_formatter::{normalize_newlines, Token}; use rome_js_syntax::{ JsAnyExpression, JsAnyFunction, JsAnyRoot, JsAnyStatement, JsInitializerClause, JsLanguage, JsTemplateElement, JsTemplateElementFields, Modifiers, TsTemplateElement, @@ -390,7 +390,7 @@ pub(crate) fn format_template_chunk( &chunk, FormatElement::from(Token::new_dynamic( normalize_newlines(chunk.text_trimmed(), ['\r']).into_owned(), - chunk.text_trimmed_range(), + chunk.text_trimmed_range().start(), )), )) } @@ -625,6 +625,6 @@ pub(crate) fn format_string_literal_token( formatter.format_replaced( &token, - Token::new_dynamic(content, token.text_trimmed_range()).into(), + Token::new_dynamic(content, token.text_trimmed_range().start()).into(), ) } diff --git a/crates/rome_js_formatter/tests/spec_test.rs b/crates/rome_js_formatter/tests/spec_test.rs index af9b1c372913..76541bac2712 100644 --- a/crates/rome_js_formatter/tests/spec_test.rs +++ b/crates/rome_js_formatter/tests/spec_test.rs @@ -86,14 +86,14 @@ struct SnapshotContent { impl SnapshotContent { pub fn add_output(&mut self, formatted: Formatted, options: FormatOptions) { - let mut output: String = formatted.as_code().into(); - if !formatted.verbatim().is_empty() { + let code = formatted.as_code(); + let mut output: String = String::from(code); + if !formatted.verbatim_ranges().is_empty() { output.push_str("\n\n"); output.push_str("## Unimplemented nodes/tokens"); output.push_str("\n\n"); - for (text, range) in formatted.verbatim() { - let string = format!("{:?} => {:?}\n", text, range); - output.push_str(string.as_str()); + for (range, text) in formatted.verbatim() { + output.push_str(&format!("{:?} => {:?}\n", text, range)); } } self.output.push((output, options));