Skip to content
This repository has been archived by the owner on Aug 31, 2023. It is now read-only.

Commit

Permalink
perf(rome_js_formatter): Reduce FormatElement size
Browse files Browse the repository at this point in the history
This PR reduces the size of a `FormatElement` from 56 to 32 bytes by:

 * Using a `Box<str>` to store a dynamic token's string as it doesn't need to be mutable (safes 8 bytes for the `capacity`)
 * Only storing the start position for a `DynamicToken` because the length isn't used by the printer
 * Change the semantics of `verbatim_ranges` to store the ranges in the **formatted** string. The ranges in the formatted output should be sufficient for debugging and it allows to resolve the `String` rather than having to store it on the `FormatElement` (range can be computed in the printer).
  • Loading branch information
MichaReiser authored and Micha Reiser committed Apr 15, 2022
1 parent 03c946e commit 3c88c8e
Show file tree
Hide file tree
Showing 10 changed files with 147 additions and 119 deletions.
132 changes: 79 additions & 53 deletions crates/rome_formatter/src/format_element.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
use crate::format_elements;
use crate::intersperse::{Intersperse, IntersperseFn};
use rome_rowan::{Language, SyntaxNode, SyntaxToken, SyntaxTriviaPieceComments, TextRange};
use crate::{format_elements, TextSize};
use rome_rowan::{Language, SyntaxNode, SyntaxToken, SyntaxTriviaPieceComments};
use std::borrow::Cow;
use std::fmt::{self, Debug, Formatter};
use std::ops::Deref;
Expand Down Expand Up @@ -245,46 +245,46 @@ pub fn concat_elements<I>(elements: I) -> FormatElement
where
I: IntoIterator<Item = FormatElement>,
{
let mut elements = elements.into_iter();
let elements = elements.into_iter();

let (lower_bound, upper_bound) = elements.size_hint();
let size_hint = upper_bound.unwrap_or(lower_bound);

let mut elements = elements.filter(|element| !element.is_empty()).peekable();

let first = match elements.next() {
Some(element) => element,
None => return empty_element(),
};

if elements.peek().is_none() {
return first;
}

// If the first non empty element is a vec, use it,
// otherwise create a new one with the current element
let mut concatenated = loop {
match elements.next() {
Some(FormatElement::Empty) => continue,
Some(FormatElement::List(list)) => {
let mut v = list.content;
v.reserve(size_hint);
break v;
}
Some(element) => {
let mut v = Vec::with_capacity(size_hint);
v.push(element);
break v;
}
None => return empty_element(),
let mut concatenated = match first {
FormatElement::List(list) => {
let mut v = list.content;
v.reserve(size_hint - 1);
v
}
element => {
let mut v = Vec::with_capacity(size_hint);
v.push(element);
v
}
};

// continue to the rest of the list
for element in elements {
match element {
FormatElement::List(list) => concatenated.extend(list.content),
FormatElement::Empty => {}
element => concatenated.push(element),
}
}

if concatenated.is_empty() {
empty_element()
} else if concatenated.len() == 1 {
concatenated.pop().unwrap()
} else {
FormatElement::from(List::new(concatenated))
}
FormatElement::from(List::new(concatenated))
}

/// Concatenates a list of [FormatElement]s with spaces and line breaks to fit
Expand Down Expand Up @@ -928,10 +928,8 @@ pub enum VerbatimKind {
Unknown,
Suppressed,
Verbatim {
/// the range that belongs to the node/token formatted verbatim
range: TextRange,
/// the text that belongs to the node/token formatted verbatim
text: String,
/// the length of the formatted node
length: TextSize,
},
}

Expand All @@ -945,10 +943,10 @@ pub struct Verbatim {
}

impl Verbatim {
pub fn new_verbatim(element: FormatElement, text: String, range: TextRange) -> Self {
pub fn new_verbatim(element: FormatElement, length: TextSize) -> Self {
Self {
element: Box::new(element),
kind: VerbatimKind::Verbatim { range, text },
kind: VerbatimKind::Verbatim { length },
}
}

Expand Down Expand Up @@ -1132,25 +1130,18 @@ impl ConditionalGroupContent {
}
}

/// See [token] for documentation
#[derive(Eq, Clone)]
pub enum Token {
/// Token constructed by the formatter from a static string
Static { text: &'static str },
/// Token constructed from the input source as a dynamics
/// string and a range of the input source
Dynamic { text: String, source: TextRange },
}

impl Debug for Token {
fn fmt(&self, fmt: &mut Formatter) -> fmt::Result {
// This does not use debug_tuple so the tokens are
// written on a single line even when pretty-printing
match self {
Token::Static { text } => write!(fmt, "Token({:?})", text),
Token::Dynamic { text, source } => write!(fmt, "Token({:?}, {:?})", text, source),
}
}
Dynamic {
// There's no need for the text to be mutable, using `Box<str>` safes 8 bytes over `String`.
text: Box<str>,
// The position of the dynamic token in the unformatted source code
source_position: TextSize,
},
}

impl Token {
Expand All @@ -1160,17 +1151,33 @@ impl Token {
}

/// Create a token from a dynamic string and a range of the input source
pub fn new_dynamic(text: String, source: TextRange) -> Self {
pub fn new_dynamic(text: String, position: TextSize) -> Self {
debug_assert!(!text.contains('\r'), "The content '{}' contains an unsupported '\\r' line terminator character but string tokens must only use line feeds '\\n' as line separator. Use '\\n' instead of '\\r' and '\\r\\n' to insert a line break in strings.", text);
Self::Dynamic { text, source }
Self::Dynamic {
text: text.into_boxed_str(),
source_position: position,
}
}

/// Get the range of the input source covered by this token,
/// or None if the token was synthesized by the formatter
pub fn source(&self) -> Option<&TextRange> {
pub fn source(&self) -> Option<&TextSize> {
match self {
Token::Static { .. } => None,
Token::Dynamic { source, .. } => Some(source),
Token::Dynamic {
source_position, ..
} => Some(source_position),
}
}
}

impl Debug for Token {
fn fmt(&self, fmt: &mut Formatter) -> fmt::Result {
// This does not use debug_tuple so the tokens are
// written on a single line even when pretty-printing
match self {
Token::Static { text } => write!(fmt, "StaticToken({:?})", text),
Token::Dynamic { text, .. } => write!(fmt, "DynamicToken({:?})", text),
}
}
}
Expand All @@ -1190,7 +1197,10 @@ impl<L: Language> From<SyntaxToken<L>> for Token {

impl<'a, L: Language> From<&'a SyntaxToken<L>> for Token {
fn from(token: &'a SyntaxToken<L>) -> Self {
Self::new_dynamic(token.text_trimmed().into(), token.text_trimmed_range())
Self::new_dynamic(
token.text_trimmed().into(),
token.text_trimmed_range().start(),
)
}
}

Expand Down Expand Up @@ -1230,7 +1240,7 @@ impl<L: Language> From<SyntaxTriviaPieceComments<L>> for Token {
fn from(trivia: SyntaxTriviaPieceComments<L>) -> Self {
Self::new_dynamic(
normalize_newlines(trivia.text().trim(), LINE_TERMINATORS).into_owned(),
trivia.text_range(),
trivia.text_range().start(),
)
}
}
Expand All @@ -1239,8 +1249,8 @@ impl Deref for Token {
type Target = str;
fn deref(&self) -> &Self::Target {
match self {
Token::Static { text } => text,
Token::Dynamic { text, .. } => text,
Token::Static { text } => text,
}
}
}
Expand Down Expand Up @@ -1398,9 +1408,10 @@ impl From<Option<FormatElement>> for FormatElement {
mod tests {

use crate::format_element::{
empty_element, join_elements, normalize_newlines, List, LINE_TERMINATORS,
empty_element, join_elements, normalize_newlines, ConditionalGroupContent, List,
VerbatimKind, LINE_TERMINATORS,
};
use crate::{concat_elements, space_token, token, FormatElement};
use crate::{concat_elements, space_token, token, FormatElement, TextRange, Token, Verbatim};

#[test]
fn concat_elements_returns_a_list_token_containing_the_passed_in_elements() {
Expand Down Expand Up @@ -1516,4 +1527,19 @@ mod tests {
assert_eq!(normalize_newlines("a\u{2028}b", LINE_TERMINATORS), "a\nb");
assert_eq!(normalize_newlines("a\u{2029}b", LINE_TERMINATORS), "a\nb");
}

#[test]
fn sizes() {
assert_eq!(8, std::mem::size_of::<TextRange>());
assert_eq!(8, std::mem::size_of::<VerbatimKind>());
assert_eq!(16, std::mem::size_of::<Verbatim>());
assert_eq!(24, std::mem::size_of::<Token>());
assert_eq!(16, std::mem::size_of::<ConditionalGroupContent>());
assert_eq!(24, std::mem::size_of::<List>());
// Increasing the size of FormatElement has serious consequences on runtime performance and memory footprint.
// Is there a more efficient way to encode the data to avoid increasing its size? Can the information
// be recomputed at a later point in time?
// You reduced the size of a format element? Excellent work!
assert_eq!(32, std::mem::size_of::<FormatElement>());
}
}
20 changes: 12 additions & 8 deletions crates/rome_formatter/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -203,21 +203,21 @@ pub struct Formatted {
code: String,
range: Option<TextRange>,
sourcemap: Vec<SourceMarker>,
verbatim_source: Vec<(String, TextRange)>,
verbatim_ranges: Vec<TextRange>,
}

impl Formatted {
pub fn new(
code: String,
range: Option<TextRange>,
sourcemap: Vec<SourceMarker>,
verbatim_source: Vec<(String, TextRange)>,
verbatim_source: Vec<TextRange>,
) -> Self {
Self {
code,
range,
sourcemap,
verbatim_source,
verbatim_ranges: verbatim_source,
}
}

Expand All @@ -227,7 +227,7 @@ impl Formatted {
code: String::new(),
range: None,
sourcemap: Vec::new(),
verbatim_source: Vec::new(),
verbatim_ranges: Vec::new(),
}
}

Expand Down Expand Up @@ -259,12 +259,16 @@ impl Formatted {
self.code
}

pub fn verbatim(&self) -> &[(String, TextRange)] {
&self.verbatim_source
/// The text in the formatted code that has been formatted as verbatim.
pub fn verbatim(&self) -> impl Iterator<Item = (TextRange, &str)> {
self.verbatim_ranges
.iter()
.map(|range| (*range, &self.code[*range]))
}

pub fn into_verbatim(self) -> Vec<(String, TextRange)> {
self.verbatim_source
/// Ranges of the formatted code that have been formatted as verbatim.
pub fn verbatim_ranges(&self) -> &[TextRange] {
&self.verbatim_ranges
}
}

Expand Down
67 changes: 35 additions & 32 deletions crates/rome_formatter/src/printer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -153,34 +153,7 @@ impl<'a> Printer<'a> {
}
}
FormatElement::Empty => {}
FormatElement::Token(token) => {
// Print pending indention
if self.state.pending_indent > 0 {
self.print_str(
self.options
.indent_string
.repeat(self.state.pending_indent as usize)
.as_str(),
);
self.state.pending_indent = 0;
}

// Print pending spaces
if self.state.pending_space {
self.print_str(" ");
self.state.pending_space = false;
}

if let Some(range) = token.source() {
self.state.source_markers.push(SourceMarker {
source: range.start(),
dest: TextSize::from(self.state.buffer.len() as u32),
});
}

self.print_str(token);
}

FormatElement::Token(token) => self.print_token(token, token.source()),
FormatElement::HardGroup(group) => queue.enqueue(PrintElementCall::new(
group.content.as_ref(),
args.with_hard_group(true),
Expand Down Expand Up @@ -271,15 +244,46 @@ impl<'a> Printer<'a> {
}

FormatElement::Verbatim(verbatim) => {
if let VerbatimKind::Verbatim { range, text } = &verbatim.kind {
self.state.verbatim_markers.push((text.clone(), *range));
if let VerbatimKind::Verbatim { length } = &verbatim.kind {
self.state.verbatim_markers.push(TextRange::at(
TextSize::from(self.state.buffer.len() as u32),
*length,
));
}

queue.enqueue(PrintElementCall::new(&verbatim.element, args));
}
}
}

fn print_token(&mut self, token: &str, source: Option<&TextSize>) {
// Print pending indention
if self.state.pending_indent > 0 {
self.print_str(
self.options
.indent_string
.repeat(self.state.pending_indent as usize)
.as_str(),
);
self.state.pending_indent = 0;
}

// Print pending spaces
if self.state.pending_space {
self.print_str(" ");
self.state.pending_space = false;
}

if let Some(source) = source {
self.state.source_markers.push(SourceMarker {
source: *source,
dest: TextSize::from(self.state.buffer.len() as u32),
});
}

self.print_str(token);
}

/// Tries to print an element without any line breaks. Reverts any made `state` changes (by this function)
/// and returns with a [LineBreakRequiredError] if the `element` contains any hard line breaks
/// or printing the group exceeds the configured maximal print width.
Expand Down Expand Up @@ -499,9 +503,8 @@ struct PrinterState<'a> {
generated_column: usize,
line_width: usize,
has_empty_line: bool,
// mappings: Mapping[];
line_suffixes: Vec<PrintElementCall<'a>>,
verbatim_markers: Vec<(String, TextRange)>,
verbatim_markers: Vec<TextRange>,
}

impl<'a> PrinterState<'a> {
Expand Down
Loading

0 comments on commit 3c88c8e

Please sign in to comment.