Skip to content

Commit

Permalink
Memoize text width (astral-sh#6552)
Browse files Browse the repository at this point in the history
  • Loading branch information
MichaReiser authored Sep 6, 2023
1 parent fa6bff0 commit 5f59101
Show file tree
Hide file tree
Showing 14 changed files with 213 additions and 184 deletions.
2 changes: 1 addition & 1 deletion crates/ruff_formatter/src/arguments.rs
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,7 @@ mod tests {

#[test]
fn test_nesting() {
let mut context = FormatState::new(());
let mut context = FormatState::new(SimpleFormatContext::default());
let mut buffer = VecBuffer::new(&mut context);

write!(
Expand Down
56 changes: 13 additions & 43 deletions crates/ruff_formatter/src/builders.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,9 @@ use Tag::*;
use crate::format_element::tag::{Condition, Tag};
use crate::prelude::tag::{DedentMode, GroupMode, LabelId};
use crate::prelude::*;
use crate::{format_element, write, Argument, Arguments, FormatContext, GroupId, TextSize};
use crate::{
format_element, write, Argument, Arguments, FormatContext, FormatOptions, GroupId, TextSize,
};
use crate::{Buffer, VecBuffer};

/// A line break that only gets printed if the enclosing `Group` doesn't fit on a single line.
Expand Down Expand Up @@ -348,14 +350,18 @@ pub struct Text<'a> {
position: Option<TextSize>,
}

impl<Context> Format<Context> for Text<'_> {
impl<Context> Format<Context> for Text<'_>
where
Context: FormatContext,
{
fn fmt(&self, f: &mut Formatter<Context>) -> FormatResult<()> {
if let Some(source_position) = self.position {
f.write_element(FormatElement::SourcePosition(source_position));
}

f.write_element(FormatElement::Text {
text: self.text.to_string().into_boxed_str(),
text_width: TextWidth::from_text(self.text, f.options().tab_width()),
});

Ok(())
Expand All @@ -369,31 +375,13 @@ impl std::fmt::Debug for Text<'_> {
}

/// Emits a text as it is written in the source document. Optimized to avoid allocations.
pub const fn source_text_slice(
range: TextRange,
newlines: ContainsNewlines,
) -> SourceTextSliceBuilder {
SourceTextSliceBuilder {
range,
new_lines: newlines,
}
}

#[derive(Copy, Clone, Eq, PartialEq, Debug)]
pub enum ContainsNewlines {
/// The string contains newline characters
Yes,
/// The string contains no newline characters
No,

/// The string may contain newline characters, search the string to determine if there are any newlines.
Detect,
pub const fn source_text_slice(range: TextRange) -> SourceTextSliceBuilder {
SourceTextSliceBuilder { range }
}

#[derive(Eq, PartialEq, Debug)]
pub struct SourceTextSliceBuilder {
range: TextRange,
new_lines: ContainsNewlines,
}

impl<Context> Format<Context> for SourceTextSliceBuilder
Expand All @@ -405,28 +393,10 @@ where
let slice = source_code.slice(self.range);
debug_assert_no_newlines(slice.text(source_code));

let contains_newlines = match self.new_lines {
ContainsNewlines::Yes => {
debug_assert!(
slice.text(source_code).contains('\n'),
"Text contains no new line characters but the caller specified that it does."
);
true
}
ContainsNewlines::No => {
debug_assert!(
!slice.text(source_code).contains('\n'),
"Text contains new line characters but the caller specified that it does not."
);
false
}
ContainsNewlines::Detect => slice.text(source_code).contains('\n'),
};
let text_width =
TextWidth::from_text(slice.text(source_code), f.context().options().tab_width());

f.write_element(FormatElement::SourceCodeSlice {
slice,
contains_newlines,
});
f.write_element(FormatElement::SourceCodeSlice { slice, text_width });

Ok(())
}
Expand Down
95 changes: 77 additions & 18 deletions crates/ruff_formatter/src/format_element.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,14 @@ pub mod tag;

use std::borrow::Cow;
use std::hash::{Hash, Hasher};
use std::num::NonZeroU32;
use std::ops::Deref;
use std::rc::Rc;
use unicode_width::UnicodeWidthChar;

use crate::format_element::tag::{GroupMode, LabelId, Tag};
use crate::source_code::SourceCodeSlice;
use crate::TagKind;
use crate::{TabWidth, TagKind};
use ruff_text_size::TextSize;

/// Language agnostic IR for formatting source code.
Expand Down Expand Up @@ -37,13 +39,13 @@ pub enum FormatElement {
Text {
/// There's no need for the text to be mutable, using `Box<str>` safes 8 bytes over `String`.
text: Box<str>,
text_width: TextWidth,
},

/// Text that gets emitted as it is in the source code. Optimized to avoid any allocations.
SourceCodeSlice {
slice: SourceCodeSlice,
/// Whether the string contains any new line characters
contains_newlines: bool,
text_width: TextWidth,
},

/// Prevents that line suffixes move past this boundary. Forces the printer to print any pending
Expand Down Expand Up @@ -73,13 +75,10 @@ impl std::fmt::Debug for FormatElement {
FormatElement::ExpandParent => write!(fmt, "ExpandParent"),
FormatElement::Token { text } => fmt.debug_tuple("Token").field(text).finish(),
FormatElement::Text { text, .. } => fmt.debug_tuple("DynamicText").field(text).finish(),
FormatElement::SourceCodeSlice {
slice,
contains_newlines,
} => fmt
FormatElement::SourceCodeSlice { slice, text_width } => fmt
.debug_tuple("Text")
.field(slice)
.field(contains_newlines)
.field(text_width)
.finish(),
FormatElement::LineSuffixBoundary => write!(fmt, "LineSuffixBoundary"),
FormatElement::BestFitting { variants, mode } => fmt
Expand Down Expand Up @@ -255,11 +254,8 @@ impl FormatElements for FormatElement {
FormatElement::ExpandParent => true,
FormatElement::Tag(Tag::StartGroup(group)) => !group.mode().is_flat(),
FormatElement::Line(line_mode) => matches!(line_mode, LineMode::Hard | LineMode::Empty),

FormatElement::Text { text, .. } => text.contains('\n'),
FormatElement::SourceCodeSlice {
contains_newlines, ..
} => *contains_newlines,
FormatElement::Text { text_width, .. } => text_width.is_multiline(),
FormatElement::SourceCodeSlice { text_width, .. } => text_width.is_multiline(),
FormatElement::Interned(interned) => interned.will_break(),
// Traverse into the most flat version because the content is guaranteed to expand when even
// the most flat version contains some content that forces a break.
Expand Down Expand Up @@ -403,6 +399,67 @@ pub trait FormatElements {
fn end_tag(&self, kind: TagKind) -> Option<&Tag>;
}

/// New-type wrapper for a single-line text unicode width.
/// Mainly to prevent access to the inner value.
///
/// ## Representation
///
/// Represents the width by adding 1 to the actual width so that the width can be represented by a [`NonZeroU32`],
/// allowing [`TextWidth`] or [`Option<Width>`] fit in 4 bytes rather than 8.
///
/// This means that 2^32 can not be precisely represented and instead has the same value as 2^32-1.
/// This imprecision shouldn't matter in practice because either text are longer than any configured line width
/// and thus, the text should break.
#[derive(Copy, Clone, Debug, Eq, PartialEq)]
pub struct Width(NonZeroU32);

impl Width {
pub(crate) const fn new(width: u32) -> Self {
Width(NonZeroU32::MIN.saturating_add(width))
}

pub const fn value(self) -> u32 {
self.0.get() - 1
}
}

/// The pre-computed unicode width of a text if it is a single-line text or a marker
/// that it is a multiline text if it contains a line feed.
#[derive(Copy, Clone, Debug, Eq, PartialEq)]
pub enum TextWidth {
Width(Width),
Multiline,
}

impl TextWidth {
pub fn from_text(text: &str, tab_width: TabWidth) -> TextWidth {
let mut width = 0u32;

for c in text.chars() {
let char_width = match c {
'\t' => tab_width.value(),
'\n' => return TextWidth::Multiline,
#[allow(clippy::cast_possible_truncation)]
c => c.width().unwrap_or(0) as u32,
};
width += char_width;
}

Self::Width(Width::new(width))
}

pub const fn width(self) -> Option<Width> {
match self {
TextWidth::Width(width) => Some(width),
TextWidth::Multiline => None,
}
}

pub(crate) const fn is_multiline(self) -> bool {
matches!(self, TextWidth::Multiline)
}
}

#[cfg(test)]
mod tests {

Expand Down Expand Up @@ -430,19 +487,21 @@ mod sizes {
// be recomputed at a later point in time?
// You reduced the size of a format element? Excellent work!

use super::{BestFittingVariants, Interned, TextWidth};
use static_assertions::assert_eq_size;

assert_eq_size!(ruff_text_size::TextRange, [u8; 8]);
assert_eq_size!(crate::prelude::tag::VerbatimKind, [u8; 8]);
assert_eq_size!(crate::prelude::Interned, [u8; 16]);
assert_eq_size!(crate::format_element::BestFittingVariants, [u8; 16]);
assert_eq_size!(TextWidth, [u8; 4]);
assert_eq_size!(super::tag::VerbatimKind, [u8; 8]);
assert_eq_size!(Interned, [u8; 16]);
assert_eq_size!(BestFittingVariants, [u8; 16]);

#[cfg(not(debug_assertions))]
assert_eq_size!(crate::SourceCodeSlice, [u8; 8]);

#[cfg(not(debug_assertions))]
assert_eq_size!(crate::format_element::Tag, [u8; 16]);
assert_eq_size!(super::Tag, [u8; 16]);

#[cfg(not(debug_assertions))]
assert_eq_size!(crate::FormatElement, [u8; 24]);
assert_eq_size!(super::FormatElement, [u8; 24]);
}
35 changes: 18 additions & 17 deletions crates/ruff_formatter/src/format_element/document.rs
Original file line number Diff line number Diff line change
Expand Up @@ -104,10 +104,11 @@ impl Document {
expands = false;
continue;
}
FormatElement::Text { text, .. } => text.contains('\n'),
FormatElement::SourceCodeSlice {
contains_newlines, ..
} => *contains_newlines,
FormatElement::Text {
text: _,
text_width,
} => text_width.is_multiline(),
FormatElement::SourceCodeSlice { text_width, .. } => text_width.is_multiline(),
FormatElement::ExpandParent
| FormatElement::Line(LineMode::Hard | LineMode::Empty) => true,
_ => false,
Expand Down Expand Up @@ -259,18 +260,24 @@ impl Format<IrFormatContext<'_>> for &[FormatElement] {
| FormatElement::Text { .. }
| FormatElement::SourceCodeSlice { .. }) => {
fn write_escaped(element: &FormatElement, f: &mut Formatter<IrFormatContext>) {
let text = match element {
FormatElement::Token { text } => text,
FormatElement::Text { text } => text.as_ref(),
FormatElement::SourceCodeSlice { slice, .. } => {
slice.text(f.context().source_code())
let (text, text_width) = match element {
#[allow(clippy::cast_possible_truncation)]
FormatElement::Token { text } => {
(*text, TextWidth::Width(Width::new(text.len() as u32)))
}
FormatElement::Text { text, text_width } => {
(text.as_ref(), *text_width)
}
FormatElement::SourceCodeSlice { slice, text_width } => {
(slice.text(f.context().source_code()), *text_width)
}
_ => unreachable!(),
};

if text.contains('"') {
f.write_element(FormatElement::Text {
text: text.replace('"', r#"\""#).into(),
text_width,
});
} else {
f.write_element(element.clone());
Expand Down Expand Up @@ -854,15 +861,9 @@ mod tests {
[group(&format_args![
token("("),
soft_block_indent(&format_args![
source_text_slice(
TextRange::at(TextSize::new(0), TextSize::new(19)),
ContainsNewlines::No
),
source_text_slice(TextRange::at(TextSize::new(0), TextSize::new(19)),),
space(),
source_text_slice(
TextRange::at(TextSize::new(20), TextSize::new(28)),
ContainsNewlines::No
),
source_text_slice(TextRange::at(TextSize::new(20), TextSize::new(28)),),
])
])]
)
Expand Down
8 changes: 4 additions & 4 deletions crates/ruff_formatter/src/macros.rs
Original file line number Diff line number Diff line change
Expand Up @@ -343,15 +343,15 @@ mod tests {

struct TestFormat;

impl Format<()> for TestFormat {
fn fmt(&self, f: &mut Formatter<()>) -> FormatResult<()> {
impl Format<SimpleFormatContext> for TestFormat {
fn fmt(&self, f: &mut Formatter<SimpleFormatContext>) -> FormatResult<()> {
write!(f, [token("test")])
}
}

#[test]
fn test_single_element() {
let mut state = FormatState::new(());
let mut state = FormatState::new(SimpleFormatContext::default());
let mut buffer = VecBuffer::new(&mut state);

write![&mut buffer, [TestFormat]].unwrap();
Expand All @@ -364,7 +364,7 @@ mod tests {

#[test]
fn test_multiple_elements() {
let mut state = FormatState::new(());
let mut state = FormatState::new(SimpleFormatContext::default());
let mut buffer = VecBuffer::new(&mut state);

write![
Expand Down
Loading

0 comments on commit 5f59101

Please sign in to comment.