From c6b90489d490ab678b170bb1433a74ca1982ab1c Mon Sep 17 00:00:00 2001 From: Matthew Toohey Date: Sun, 30 Oct 2022 18:30:47 -0400 Subject: [PATCH] Pass `tab_width` to functions instead of storing it in `Word` --- src/core.rs | 41 +++++++++++++----------------- src/fill.rs | 7 ++--- src/word_splitters.rs | 22 ++++++++-------- src/wrap.rs | 13 +++++----- src/wrap_algorithms.rs | 20 ++++++++------- src/wrap_algorithms/optimal_fit.rs | 23 +++++++++-------- 6 files changed, 59 insertions(+), 67 deletions(-) diff --git a/src/core.rs b/src/core.rs index 2a13dc3b..e493b63d 100644 --- a/src/core.rs +++ b/src/core.rs @@ -206,7 +206,7 @@ pub trait Fragment: std::fmt::Debug { /// Displayed width of the whitespace that must follow the word /// when the word is not at the end of a line. - fn whitespace_width(&self) -> f64; + fn whitespace_width(&self, tab_width: u8) -> f64; /// Displayed width of the penalty that must be inserted if the /// word falls at the end of a line. @@ -227,8 +227,6 @@ pub struct Word<'a> { pub penalty: &'a str, // Cached width in columns. pub(crate) width: usize, - // Width of a tab character within this word. - pub(crate) tab_width: u8, } impl std::ops::Deref for Word<'_> { @@ -245,22 +243,14 @@ impl<'a> Word<'a> { /// All trailing whitespace is automatically taken to be the whitespace part /// of the word. pub fn from(word: &str) -> Word<'_> { - Self::with_tab_width(word, 0) - } - - /// Construct a `Word` from a string, with a customizable tab width - /// to calculate the total width with. - /// - /// All trailing whitespace is automatically removed to be the - /// whitespace part of the word. - pub fn with_tab_width(word: &str, tab_width: u8) -> Word<'_> { let trimmed = word.trim_end(); Word { word: trimmed, - width: display_width(trimmed, tab_width), + // trimmed shouldn't contain whitespace, so we don't need to pass + // an accurate tab_width. + width: display_width(trimmed, 0), whitespace: &word[trimmed.len()..], penalty: "", - tab_width, } } @@ -273,11 +263,15 @@ impl<'a> Word<'a> { /// ``` /// use textwrap::core::Word; /// assert_eq!( - /// Word::from("Hello! ").break_apart(3).collect::>(), + /// Word::from("Hello! ").break_apart(3, 0).collect::>(), /// vec![Word::from("Hel"), Word::from("lo! ")] /// ); /// ``` - pub fn break_apart<'b>(&'b self, line_width: usize) -> impl Iterator> + 'b { + pub fn break_apart<'b>( + &'b self, + line_width: usize, + tab_width: u8, + ) -> impl Iterator> + 'b { let mut char_indices = self.word.char_indices(); let mut offset = 0; let mut width = 0; @@ -288,20 +282,19 @@ impl<'a> Word<'a> { continue; } - if width > 0 && width + ch_width(ch, self.tab_width) > line_width { + if width > 0 && width + ch_width(ch, tab_width) > line_width { let word = Word { word: &self.word[offset..idx], whitespace: "", penalty: "", - tab_width: self.tab_width, width, }; offset = idx; - width = ch_width(ch, self.tab_width); + width = ch_width(ch, tab_width); return Some(word); } - width += ch_width(ch, self.tab_width); + width += ch_width(ch, tab_width); } if offset < self.word.len() { @@ -326,8 +319,8 @@ impl Fragment for Word<'_> { } #[inline] - fn whitespace_width(&self) -> f64 { - display_width(self.whitespace, self.tab_width) as f64 + fn whitespace_width(&self, tab_width: u8) -> f64 { + display_width(self.whitespace, tab_width) as f64 } // We assume the penalty is `""` or `"-"`. This allows us to @@ -343,14 +336,14 @@ impl Fragment for Word<'_> { /// This simply calls [`Word::break_apart`] on words that are too /// wide. This means that no extra `'-'` is inserted, the word is /// simply broken into smaller pieces. -pub fn break_words<'a, I>(words: I, line_width: usize) -> Vec> +pub fn break_words<'a, I>(words: I, line_width: usize, tab_width: u8) -> Vec> where I: IntoIterator>, { let mut shortened_words = Vec::new(); for word in words { if word.width() > line_width as f64 { - shortened_words.extend(word.break_apart(line_width)); + shortened_words.extend(word.break_apart(line_width, tab_width)); } else { shortened_words.push(word); } diff --git a/src/fill.rs b/src/fill.rs index 7490bebc..fb46b81b 100644 --- a/src/fill.rs +++ b/src/fill.rs @@ -128,12 +128,9 @@ where for line in text.split('\n') { let words = WordSeparator::AsciiSpace .find_words(line) - .map(|mut w| { - w.tab_width = options.tab_width; - w - }) .collect::>(); - let wrapped_words = wrap_algorithms::wrap_first_fit(&words, &[options.width as f64]); + let wrapped_words = + wrap_algorithms::wrap_first_fit(&words, &[options.width as f64], options.tab_width); let mut line_offset = offset; for words in &wrapped_words[..wrapped_words.len() - 1] { diff --git a/src/word_splitters.rs b/src/word_splitters.rs index e9eb3438..07f4e5c0 100644 --- a/src/word_splitters.rs +++ b/src/word_splitters.rs @@ -193,10 +193,12 @@ where let need_hyphen = !word[..idx].ends_with('-'); let w = Word { word: &word.word[prev..idx], - width: display_width(&word[prev..idx], word.tab_width), + // word[prev..idx] is a subset of the original word.word, + // which is trimmed of whitespace in Word::from, so we + // don't need an accurate tab_width. + width: display_width(&word[prev..idx], 0), whitespace: "", penalty: if need_hyphen { "-" } else { "" }, - tab_width: word.tab_width, }; prev = idx; return Some(w); @@ -205,7 +207,9 @@ where if prev < word.word.len() || prev == 0 { let w = Word { word: &word.word[prev..], - width: display_width(&word[prev..], word.tab_width), + // see comment by display_width call above regarding + // tab_width. + width: display_width(&word[prev..], 0), ..word }; prev = word.word.len() + 1; @@ -279,15 +283,13 @@ mod tests { word: "foo", width: 3, whitespace: "", - penalty: "-", - tab_width: 0 + penalty: "-" }, Word { word: "bar", width: 3, whitespace: "", - penalty: "", - tab_width: 0 + penalty: "" } ] ); @@ -302,15 +304,13 @@ mod tests { word: "fo-", width: 3, whitespace: "", - penalty: "", - tab_width: 0 + penalty: "" }, Word { word: "bar", width: 3, whitespace: "", - penalty: "", - tab_width: 0 + penalty: "" } ] ); diff --git a/src/wrap.rs b/src/wrap.rs index d3821918..1c40fb09 100644 --- a/src/wrap.rs +++ b/src/wrap.rs @@ -225,27 +225,26 @@ pub(crate) fn wrap_single_line_slow_path<'a>( .saturating_sub(display_width(options.subsequent_indent, options.tab_width)); let line_widths = [initial_width, subsequent_width]; - let words = options.word_separator.find_words(line).map(|mut w| { - w.tab_width = options.tab_width; - w - }); + let words = options.word_separator.find_words(line); let split_words = split_words(words, &options.word_splitter); let broken_words = if options.break_words { - let mut broken_words = break_words(split_words, line_widths[1]); + let mut broken_words = break_words(split_words, line_widths[1], options.tab_width); if !options.initial_indent.is_empty() { // Without this, the first word will always go into the // first line. However, since we break words based on the // _second_ line width, it can be wrong to unconditionally // put the first word onto the first line. An empty // zero-width word fixed this. - broken_words.insert(0, Word::with_tab_width("", options.tab_width)); + broken_words.insert(0, Word::from("")); } broken_words } else { split_words.collect::>() }; - let wrapped_words = options.wrap_algorithm.wrap(&broken_words, &line_widths); + let wrapped_words = options + .wrap_algorithm + .wrap(&broken_words, &line_widths, options.tab_width); let mut idx = 0; for words in wrapped_words { diff --git a/src/wrap_algorithms.rs b/src/wrap_algorithms.rs index e97f8bd6..f3a89a58 100644 --- a/src/wrap_algorithms.rs +++ b/src/wrap_algorithms.rs @@ -168,6 +168,7 @@ impl WrapAlgorithm { &self, words: &'b [Word<'a>], line_widths: &'b [usize], + tab_width: u8, ) -> Vec<&'b [Word<'a>]> { // Every integer up to 2u64.pow(f64::MANTISSA_DIGITS) = 2**53 // = 9_007_199_254_740_992 can be represented without loss by @@ -176,13 +177,13 @@ impl WrapAlgorithm { let f64_line_widths = line_widths.iter().map(|w| *w as f64).collect::>(); match self { - WrapAlgorithm::FirstFit => wrap_first_fit(words, &f64_line_widths), + WrapAlgorithm::FirstFit => wrap_first_fit(words, &f64_line_widths, tab_width), #[cfg(feature = "smawk")] WrapAlgorithm::OptimalFit(penalties) => { // The computation cannnot overflow when the line // widths are restricted to usize. - wrap_optimal_fit(words, &f64_line_widths, penalties).unwrap() + wrap_optimal_fit(words, &f64_line_widths, tab_width, penalties).unwrap() } WrapAlgorithm::Custom(func) => func(words, line_widths), @@ -231,7 +232,7 @@ impl Default for WrapAlgorithm { /// /// let text = "These few words will unfortunately not wrap nicely."; /// let words = WordSeparator::AsciiSpace.find_words(text).collect::>(); -/// assert_eq!(lines_to_strings(wrap_first_fit(&words, &[15.0])), +/// assert_eq!(lines_to_strings(wrap_first_fit(&words, &[15.0], 0)), /// vec!["These few words", /// "will", // <-- short line /// "unfortunately", @@ -242,7 +243,7 @@ impl Default for WrapAlgorithm { /// #[cfg(feature = "smawk")] /// use textwrap::wrap_algorithms::{wrap_optimal_fit, Penalties}; /// #[cfg(feature = "smawk")] -/// assert_eq!(lines_to_strings(wrap_optimal_fit(&words, &[15.0], &Penalties::new()).unwrap()), +/// assert_eq!(lines_to_strings(wrap_optimal_fit(&words, &[15.0], 0, &Penalties::new()).unwrap()), /// vec!["These few", /// "words will", /// "unfortunately", @@ -284,7 +285,7 @@ impl Default for WrapAlgorithm { /// /// impl Fragment for Task<'_> { /// fn width(&self) -> f64 { self.hours } -/// fn whitespace_width(&self) -> f64 { self.sweep } +/// fn whitespace_width(&self, tab_width: u8) -> f64 { self.sweep } /// fn penalty_width(&self) -> f64 { self.cleanup } /// } /// @@ -308,7 +309,7 @@ impl Default for WrapAlgorithm { /// let mut days = Vec::new(); /// // Assign tasks to days. The assignment is a vector of slices, /// // with a slice per day. -/// let assigned_days: Vec<&[Task<'a>]> = wrap_first_fit(&tasks, &[day_length]); +/// let assigned_days: Vec<&[Task<'a>]> = wrap_first_fit(&tasks, &[day_length], 0); /// for day in assigned_days.iter() { /// let last = day.last().unwrap(); /// let work_hours: f64 = day.iter().map(|t| t.hours + t.sweep).sum(); @@ -347,6 +348,7 @@ impl Default for WrapAlgorithm { pub fn wrap_first_fit<'a, 'b, T: Fragment>( fragments: &'a [T], line_widths: &'b [f64], + tab_width: u8, ) -> Vec<&'a [T]> { // The final line width is used for all remaining lines. let default_line_width = line_widths.last().copied().unwrap_or(0.0); @@ -364,7 +366,7 @@ pub fn wrap_first_fit<'a, 'b, T: Fragment>( start = idx; width = 0.0; } - width += fragment.width() + fragment.whitespace_width(); + width += fragment.width() + fragment.whitespace_width(tab_width); } lines.push(&fragments[start..]); lines @@ -380,7 +382,7 @@ mod tests { #[rustfmt::skip] impl Fragment for Word { fn width(&self) -> f64 { self.0 } - fn whitespace_width(&self) -> f64 { 1.0 } + fn whitespace_width(&self, _: u8) -> f64 { 1.0 } fn penalty_width(&self) -> f64 { 0.0 } } @@ -397,7 +399,7 @@ mod tests { // Wrap at just under f64::MAX (~19e307). The tiny // whitespace_widths disappear because of loss of precision. assert_eq!( - wrap_first_fit(&words, &[15e307]), + wrap_first_fit(&words, &[15e307], 0), &[ vec![ Word(1e307), diff --git a/src/wrap_algorithms/optimal_fit.rs b/src/wrap_algorithms/optimal_fit.rs index bdc03345..5f128080 100644 --- a/src/wrap_algorithms/optimal_fit.rs +++ b/src/wrap_algorithms/optimal_fit.rs @@ -44,20 +44,20 @@ pub struct Penalties { /// let penalties = Penalties::new(); /// /// // Perfect fit, both words are on a single line with no overflow. - /// let wrapped = wrap_optimal_fit(&fragments, &[length], &penalties).unwrap(); + /// let wrapped = wrap_optimal_fit(&fragments, &[length], 0, &penalties).unwrap(); /// assert_eq!(wrapped, vec![&[Word::from(short), Word::from(&long)]]); /// /// // The words no longer fit, yet we get a single line back. While /// // the cost of overflow (`1 * 2500`) is the same as the cost of the /// // gap (`50 * 50 = 2500`), the tie is broken by `nline_penalty` /// // which makes it cheaper to overflow than to use two lines. - /// let wrapped = wrap_optimal_fit(&fragments, &[length - 1.0], &penalties).unwrap(); + /// let wrapped = wrap_optimal_fit(&fragments, &[length - 1.0], 0, &penalties).unwrap(); /// assert_eq!(wrapped, vec![&[Word::from(short), Word::from(&long)]]); /// /// // The cost of overflow would be 2 * 2500, whereas the cost of /// // the gap is only `49 * 49 + nline_penalty = 2401 + 1000 = /// // 3401`. We therefore get two lines. - /// let wrapped = wrap_optimal_fit(&fragments, &[length - 2.0], &penalties).unwrap(); + /// let wrapped = wrap_optimal_fit(&fragments, &[length - 2.0], 0, &penalties).unwrap(); /// assert_eq!(wrapped, vec![&[Word::from(short)], /// &[Word::from(&long)]]); /// ``` @@ -283,13 +283,13 @@ impl std::error::Error for OverflowError {} /// /// impl Fragment for Word { /// fn width(&self) -> f64 { self.0 } -/// fn whitespace_width(&self) -> f64 { 1.0 } +/// fn whitespace_width(&self, tab_width: u8) -> f64 { 1.0 } /// fn penalty_width(&self) -> f64 { 0.0 } /// } /// /// // Wrapping overflows because 1e155 * 1e155 = 1e310, which is /// // larger than f64::MAX: -/// assert_eq!(wrap_optimal_fit(&[Word(0.0), Word(0.0)], &[1e155], &Penalties::default()), +/// assert_eq!(wrap_optimal_fit(&[Word(0.0), Word(0.0)], &[1e155], 0, &Penalties::default()), /// Err(OverflowError)); /// ``` /// @@ -302,6 +302,7 @@ impl std::error::Error for OverflowError {} pub fn wrap_optimal_fit<'a, 'b, T: Fragment>( fragments: &'a [T], line_widths: &'b [f64], + tab_width: u8, penalties: &'b Penalties, ) -> Result, OverflowError> { // The final line width is used for all remaining lines. @@ -310,7 +311,7 @@ pub fn wrap_optimal_fit<'a, 'b, T: Fragment>( let mut width = 0.0; widths.push(width); for fragment in fragments { - width += fragment.width() + fragment.whitespace_width(); + width += fragment.width() + fragment.whitespace_width(tab_width); widths.push(width); } @@ -328,7 +329,7 @@ pub fn wrap_optimal_fit<'a, 'b, T: Fragment>( // Compute the width of a line spanning fragments[i..j] in // constant time. We need to adjust widths[j] by subtracting // the whitespace of fragment[j-1] and then add the penalty. - let line_width = widths[j] - widths[i] - fragments[j - 1].whitespace_width() + let line_width = widths[j] - widths[i] - fragments[j - 1].whitespace_width(tab_width) + fragments[j - 1].penalty_width(); // We compute cost of the line containing fragments[i..j]. We @@ -398,7 +399,7 @@ mod tests { #[rustfmt::skip] impl Fragment for Word { fn width(&self) -> f64 { self.0 } - fn whitespace_width(&self) -> f64 { 1.0 } + fn whitespace_width(&self, _: u8) -> f64 { 1.0 } fn penalty_width(&self) -> f64 { 0.0 } } @@ -406,7 +407,7 @@ mod tests { fn wrap_fragments_with_infinite_widths() { let words = vec![Word(f64::INFINITY)]; assert_eq!( - wrap_optimal_fit(&words, &[0.0], &Penalties::default()), + wrap_optimal_fit(&words, &[0.0], 0, &Penalties::default()), Err(OverflowError) ); } @@ -415,7 +416,7 @@ mod tests { fn wrap_fragments_with_huge_widths() { let words = vec![Word(1e200), Word(1e250), Word(1e300)]; assert_eq!( - wrap_optimal_fit(&words, &[1e300], &Penalties::default()), + wrap_optimal_fit(&words, &[1e300], 0, &Penalties::default()), Err(OverflowError) ); } @@ -426,7 +427,7 @@ mod tests { // makes the `gap * gap` cost fit comfortably in a f64. let words = vec![Word(1e25), Word(1e50), Word(1e75)]; assert_eq!( - wrap_optimal_fit(&words, &[1e100], &Penalties::default()), + wrap_optimal_fit(&words, &[1e100], 0, &Penalties::default()), Ok(vec![&vec![Word(1e25), Word(1e50), Word(1e75)][..]]) ); }