From 8d7b0c2e233c0bc0f83ed84ec1cfd0fb913e94a8 Mon Sep 17 00:00:00 2001 From: Matthew Toohey Date: Fri, 28 Oct 2022 22:46:22 -0400 Subject: [PATCH] Add `tab_width` support --- benchmarks/Cargo.toml | 5 ++ benchmarks/display_width.rs | 26 ++++++ examples/wasm/src/lib.rs | 8 +- fuzz/fuzz_targets/wrap_first_fit.rs | 4 +- fuzz/fuzz_targets/wrap_optimal_fit.rs | 4 +- fuzz/fuzz_targets/wrap_optimal_fit_usize.rs | 4 +- src/columns.rs | 28 ++++-- src/core.rs | 97 +++++++++++---------- src/fill.rs | 52 ++++++++++- src/options.rs | 27 ++++++ src/refill.rs | 4 +- src/word_splitters.rs | 31 ++++--- src/wrap.rs | 18 ++-- src/wrap_algorithms.rs | 20 +++-- src/wrap_algorithms/optimal_fit.rs | 23 ++--- 15 files changed, 248 insertions(+), 103 deletions(-) create mode 100644 benchmarks/display_width.rs diff --git a/benchmarks/Cargo.toml b/benchmarks/Cargo.toml index d9de6ccc..e89afee5 100644 --- a/benchmarks/Cargo.toml +++ b/benchmarks/Cargo.toml @@ -23,6 +23,11 @@ name = "unfill" harness = false path = "unfill.rs" +[[bench]] +name = "display_width" +harness = false +path = "display_width.rs" + [dependencies] textwrap = { path = "../", features = ["hyphenation"] } diff --git a/benchmarks/display_width.rs b/benchmarks/display_width.rs new file mode 100644 index 00000000..caa252d6 --- /dev/null +++ b/benchmarks/display_width.rs @@ -0,0 +1,26 @@ +use criterion::{criterion_group, criterion_main, Criterion}; + +pub fn benchmark(c: &mut Criterion) { + let words_per_line = [ + 5, 10, 15, 5, 5, 10, 5, 5, 5, 10, // 10 lines + 10, 10, 5, 5, 5, 5, 15, 10, 5, 5, // 20 lines + 10, 5, 5, 5, 15, 10, 10, 5, 5, 5, // 30 lines + 15, 5, 5, 10, 5, 5, 5, 15, 5, 10, // 40 lines + 5, 15, 5, 5, 15, 5, 10, 10, 5, 5, // 50 lines + ]; + let mut text = String::new(); + for (line_no, word_count) in words_per_line.iter().enumerate() { + text.push_str("\t\t\t"); + text.push_str(&lipsum::lipsum_words_from_seed(*word_count, line_no as u64)); + text.push('\n'); + } + text.push_str("\n\n\n\n"); + assert_eq!(text.len(), 2800); // The size for reference. + + c.bench_function("display_width", |b| { + b.iter(|| textwrap::core::display_width(&text, 2)) + }); +} + +criterion_group!(benches, benchmark); +criterion_main!(benches); diff --git a/examples/wasm/src/lib.rs b/examples/wasm/src/lib.rs index 5c9bba9a..96b21a16 100644 --- a/examples/wasm/src/lib.rs +++ b/examples/wasm/src/lib.rs @@ -152,7 +152,7 @@ impl textwrap::core::Fragment for CanvasWord<'_> { } #[inline] - fn whitespace_width(&self) -> f64 { + fn whitespace_width(&self, _: u8) -> f64 { self.whitespace_width } @@ -351,7 +351,7 @@ pub fn draw_wrapped_text( let mut lineno = 0; for line in text.split('\n') { let words = word_separator.find_words(line); - let split_words = split_words(words, &word_splitter); + let split_words = split_words(words, &word_splitter, 0); let canvas_words = split_words .flat_map(|word| { @@ -366,10 +366,10 @@ pub fn draw_wrapped_text( let line_lengths = [options.width]; let wrapped_words = match options.wrap_algorithm { - WasmWrapAlgorithm::FirstFit => wrap_first_fit(&canvas_words, &line_lengths), + WasmWrapAlgorithm::FirstFit => wrap_first_fit(&canvas_words, &line_lengths, 0), WasmWrapAlgorithm::OptimalFit => { let penalties = options.penalties.into(); - wrap_optimal_fit(&canvas_words, &line_lengths, &penalties).unwrap() + wrap_optimal_fit(&canvas_words, &line_lengths, 0, &penalties).unwrap() } _ => Err("WasmOptions has an invalid wrap_algorithm field")?, }; diff --git a/fuzz/fuzz_targets/wrap_first_fit.rs b/fuzz/fuzz_targets/wrap_first_fit.rs index bc7136ee..a5681641 100644 --- a/fuzz/fuzz_targets/wrap_first_fit.rs +++ b/fuzz/fuzz_targets/wrap_first_fit.rs @@ -14,12 +14,12 @@ struct Word { #[rustfmt::skip] impl core::Fragment for Word { fn width(&self) -> f64 { self.width } - fn whitespace_width(&self) -> f64 { self.whitespace_width } + fn whitespace_width(&self, _: u8) -> f64 { self.whitespace_width } fn penalty_width(&self) -> f64 { self.penalty_width } } fuzz_target!(|input: (f64, Vec)| { let width = input.0; let words = input.1; - let _ = wrap_first_fit(&words, &[width]); + let _ = wrap_first_fit(&words, &[width], 0); }); diff --git a/fuzz/fuzz_targets/wrap_optimal_fit.rs b/fuzz/fuzz_targets/wrap_optimal_fit.rs index 7f09d39a..e17ac137 100644 --- a/fuzz/fuzz_targets/wrap_optimal_fit.rs +++ b/fuzz/fuzz_targets/wrap_optimal_fit.rs @@ -35,7 +35,7 @@ struct Word { #[rustfmt::skip] impl core::Fragment for Word { fn width(&self) -> f64 { self.width } - fn whitespace_width(&self) -> f64 { self.whitespace_width } + fn whitespace_width(&self, _: u8) -> f64 { self.whitespace_width } fn penalty_width(&self) -> f64 { self.penalty_width } } @@ -57,5 +57,5 @@ fuzz_target!(|input: (usize, Vec, Penalties)| { } } - let _ = wrap_optimal_fit(&words, &[width as f64], &penalties); + let _ = wrap_optimal_fit(&words, &[width as f64], 0, &penalties); }); diff --git a/fuzz/fuzz_targets/wrap_optimal_fit_usize.rs b/fuzz/fuzz_targets/wrap_optimal_fit_usize.rs index 162cde19..196f88fa 100644 --- a/fuzz/fuzz_targets/wrap_optimal_fit_usize.rs +++ b/fuzz/fuzz_targets/wrap_optimal_fit_usize.rs @@ -35,7 +35,7 @@ struct Word { #[rustfmt::skip] impl core::Fragment for Word { fn width(&self) -> f64 { self.width as f64 } - fn whitespace_width(&self) -> f64 { self.whitespace_width as f64 } + fn whitespace_width(&self, _: u8) -> f64 { self.whitespace_width as f64 } fn penalty_width(&self) -> f64 { self.penalty_width as f64 } } @@ -45,5 +45,5 @@ fuzz_target!(|input: (usize, Vec, Penalties)| { let width = input.0; let words = input.1; let penalties = input.2.into(); - let _ = wrap_optimal_fit(&words, &[width as f64], &penalties); + let _ = wrap_optimal_fit(&words, &[width as f64], 0, &penalties); }); diff --git a/src/columns.rs b/src/columns.rs index d14d5588..8ee23d1c 100644 --- a/src/columns.rs +++ b/src/columns.rs @@ -1,7 +1,6 @@ //! Functionality for wrapping text into columns. -use crate::core::display_width; -use crate::{wrap, Options}; +use crate::{core, wrap, Options}; /// Wrap text into columns with a given total width. /// @@ -23,9 +22,9 @@ use crate::{wrap, Options}; /// # let columns = 2; /// # let options = textwrap::Options::new(80); /// let inner_width = options.width -/// - textwrap::core::display_width(left_gap) -/// - textwrap::core::display_width(right_gap) -/// - textwrap::core::display_width(middle_gap) * (columns - 1); +/// - textwrap::core::display_width(left_gap, options.tab_width) +/// - textwrap::core::display_width(right_gap, options.tab_width) +/// - textwrap::core::display_width(middle_gap, options.tab_width) * (columns - 1); /// let column_width = inner_width / columns; /// ``` /// @@ -74,6 +73,8 @@ where assert!(columns > 0); let mut options: Options = total_width_or_options.into(); + let tab_width = options.tab_width; + let display_width = |text| core::display_width(text, tab_width); let inner_width = options .width @@ -190,4 +191,21 @@ mod tests { fn wrap_columns_panic_with_zero_columns() { wrap_columns("", 0, 10, "", "", ""); } + + #[test] + fn wrap_columns_with_tabs() { + let options = Options::new(23).tab_width(4); + + #[cfg(feature = "smawk")] + let expected = vec!["|hello |is\tlong|", "|this |yeah |"]; + #[cfg(all(not(feature = "smawk"), feature = "unicode-linebreak"))] + let expected = vec!["|hello |long |", "|this\tis|yeah |"]; + #[cfg(not(any(feature = "smawk", feature = "unicode-linebreak")))] + let expected = vec!["|hello\tt|\tlong |", "|his\tis |\tyeah |"]; + + assert_eq!( + wrap_columns("hello\tthis\tis\tlong\tyeah", 2, options, "|", "|", "|"), + expected + ) + } } diff --git a/src/core.rs b/src/core.rs index 52c204ec..6075a361 100644 --- a/src/core.rs +++ b/src/core.rs @@ -61,8 +61,12 @@ pub(crate) fn skip_ansi_escape_sequence>(ch: char, char #[cfg(feature = "unicode-width")] #[inline] -fn ch_width(ch: char) -> usize { - unicode_width::UnicodeWidthChar::width(ch).unwrap_or(0) +fn ch_width(ch: char, tab_width: u8) -> usize { + if ch == '\t' { + tab_width as usize + } else { + unicode_width::UnicodeWidthChar::width(ch).unwrap_or(0) + } } /// First character which [`ch_width`] will classify as double-width. @@ -72,8 +76,10 @@ const DOUBLE_WIDTH_CUTOFF: char = '\u{1100}'; #[cfg(not(feature = "unicode-width"))] #[inline] -fn ch_width(ch: char) -> usize { - if ch < DOUBLE_WIDTH_CUTOFF { +fn ch_width(ch: char, tab_width: u8) -> usize { + if ch == '\t' { + tab_width as usize + } else if ch < DOUBLE_WIDTH_CUTOFF { 1 } else { 2 @@ -88,8 +94,8 @@ fn ch_width(ch: char) -> usize { /// ``` /// use textwrap::core::display_width; /// -/// assert_eq!(display_width("CafΓ© Plain"), 10); -/// assert_eq!(display_width("\u{1b}[31mCafΓ© Rouge\u{1b}[0m"), 10); +/// assert_eq!(display_width("CafΓ© Plain", 0), 10); +/// assert_eq!(display_width("\u{1b}[31mCafΓ© Rouge\u{1b}[0m", 0), 10); /// ``` /// /// **Note:** When the `unicode-width` Cargo feature is disabled, the @@ -107,11 +113,11 @@ fn ch_width(ch: char) -> usize { /// ``` /// use textwrap::core::display_width; /// -/// assert_eq!(display_width("Cafe Plain"), 10); +/// assert_eq!(display_width("Cafe Plain", 0), 10); /// #[cfg(feature = "unicode-width")] -/// assert_eq!(display_width("Cafe\u{301} Plain"), 10); +/// assert_eq!(display_width("Cafe\u{301} Plain", 0), 10); /// #[cfg(not(feature = "unicode-width"))] -/// assert_eq!(display_width("Cafe\u{301} Plain"), 11); +/// assert_eq!(display_width("Cafe\u{301} Plain", 0), 11); /// ``` /// /// ## Emojis and CJK Characters @@ -123,8 +129,8 @@ fn ch_width(ch: char) -> usize { /// ``` /// use textwrap::core::display_width; /// -/// assert_eq!(display_width("πŸ˜‚πŸ˜­πŸ₯ΊπŸ€£βœ¨πŸ˜πŸ™πŸ₯°πŸ˜ŠπŸ”₯"), 20); -/// assert_eq!(display_width("δ½ ε₯½"), 4); // β€œNǐ hǎo” or β€œHello” in Chinese +/// assert_eq!(display_width("πŸ˜‚πŸ˜­πŸ₯ΊπŸ€£βœ¨πŸ˜πŸ™πŸ₯°πŸ˜ŠπŸ”₯", 0), 20); +/// assert_eq!(display_width("δ½ ε₯½", 0), 4); // β€œNǐ hǎo” or β€œHello” in Chinese /// ``` /// /// # Limitations @@ -150,9 +156,9 @@ fn ch_width(ch: char) -> usize { /// /// assert_eq!("πŸ‘¨β€πŸ¦°".chars().collect::>(), ['\u{1f468}', '\u{200d}', '\u{1f9b0}']); /// #[cfg(feature = "unicode-width")] -/// assert_eq!(display_width("πŸ‘¨β€πŸ¦°"), 4); +/// assert_eq!(display_width("πŸ‘¨β€πŸ¦°", 0), 4); /// #[cfg(not(feature = "unicode-width"))] -/// assert_eq!(display_width("πŸ‘¨β€πŸ¦°"), 6); +/// assert_eq!(display_width("πŸ‘¨β€πŸ¦°", 0), 6); /// ``` /// /// This happens because the grapheme consists of three code points: @@ -172,14 +178,14 @@ fn ch_width(ch: char) -> usize { /// [Unicode equivalence]: https://en.wikipedia.org/wiki/Unicode_equivalence /// [CJK characters]: https://en.wikipedia.org/wiki/CJK_characters /// [emoji modifier sequences]: https://unicode.org/emoji/charts/full-emoji-modifiers.html -pub fn display_width(text: &str) -> usize { +pub fn display_width(text: &str, tab_width: u8) -> usize { let mut chars = text.chars(); let mut width = 0; while let Some(ch) = chars.next() { if skip_ansi_escape_sequence(ch, &mut chars) { continue; } - width += ch_width(ch); + width += ch_width(ch, tab_width); } width } @@ -200,7 +206,7 @@ pub trait Fragment: std::fmt::Debug { /// Displayed width of the whitespace that must follow the word /// when the word is not at the end of a line. - fn whitespace_width(&self) -> f64; + fn whitespace_width(&self, tab_width: u8) -> f64; /// Displayed width of the penalty that must be inserted if the /// word falls at the end of a line. @@ -234,13 +240,15 @@ impl std::ops::Deref for Word<'_> { impl<'a> Word<'a> { /// Construct a `Word` from a string. /// - /// A trailing stretch of `' '` is automatically taken to be the - /// whitespace part of the word. + /// All trailing whitespace is automatically taken to be the whitespace part + /// of the word. pub fn from(word: &str) -> Word<'_> { - let trimmed = word.trim_end_matches(' '); + let trimmed = word.trim_end_matches(&[' ', '\t']); Word { word: trimmed, - width: display_width(trimmed), + // trimmed shouldn't contain whitespace, so we don't need to pass + // an accurate tab_width. + width: display_width(trimmed, 0), whitespace: &word[trimmed.len()..], penalty: "", } @@ -255,11 +263,15 @@ impl<'a> Word<'a> { /// ``` /// use textwrap::core::Word; /// assert_eq!( - /// Word::from("Hello! ").break_apart(3).collect::>(), + /// Word::from("Hello! ").break_apart(3, 0).collect::>(), /// vec![Word::from("Hel"), Word::from("lo! ")] /// ); /// ``` - pub fn break_apart<'b>(&'b self, line_width: usize) -> impl Iterator> + 'b { + pub fn break_apart<'b>( + &'b self, + line_width: usize, + tab_width: u8, + ) -> impl Iterator> + 'b { let mut char_indices = self.word.char_indices(); let mut offset = 0; let mut width = 0; @@ -270,27 +282,26 @@ impl<'a> Word<'a> { continue; } - if width > 0 && width + ch_width(ch) > line_width { + if width > 0 && width + ch_width(ch, tab_width) > line_width { let word = Word { word: &self.word[offset..idx], - width: width, whitespace: "", penalty: "", + width, }; offset = idx; - width = ch_width(ch); + width = ch_width(ch, tab_width); return Some(word); } - width += ch_width(ch); + width += ch_width(ch, tab_width); } if offset < self.word.len() { let word = Word { word: &self.word[offset..], - width: width, - whitespace: self.whitespace, - penalty: self.penalty, + width, + ..*self }; offset = self.word.len(); return Some(word); @@ -307,11 +318,9 @@ impl Fragment for Word<'_> { self.width as f64 } - // We assume the whitespace consist of ' ' only. This allows us to - // compute the display width in constant time. #[inline] - fn whitespace_width(&self) -> f64 { - self.whitespace.len() as f64 + fn whitespace_width(&self, tab_width: u8) -> f64 { + display_width(self.whitespace, tab_width) as f64 } // We assume the penalty is `""` or `"-"`. This allows us to @@ -327,14 +336,14 @@ impl Fragment for Word<'_> { /// This simply calls [`Word::break_apart`] on words that are too /// wide. This means that no extra `'-'` is inserted, the word is /// simply broken into smaller pieces. -pub fn break_words<'a, I>(words: I, line_width: usize) -> Vec> +pub fn break_words<'a, I>(words: I, line_width: usize, tab_width: u8) -> Vec> where I: IntoIterator>, { let mut shortened_words = Vec::new(); for word in words { if word.width() > line_width as f64 { - shortened_words.extend(word.break_apart(line_width)); + shortened_words.extend(word.break_apart(line_width, tab_width)); } else { shortened_words.push(word); } @@ -373,7 +382,7 @@ mod tests { assert_eq!(ch.width().unwrap(), 1, "char: {}", desc); #[cfg(not(feature = "unicode-width"))] - assert_eq!(ch_width(ch), 1, "char: {}", desc); + assert_eq!(ch_width(ch, 0), 1, "char: {}", desc); } } @@ -391,7 +400,7 @@ mod tests { assert!(ch.width().unwrap() <= 2, "char: {}", desc); #[cfg(not(feature = "unicode-width"))] - assert_eq!(ch_width(ch), 2, "char: {}", desc); + assert_eq!(ch_width(ch, 0), 2, "char: {}", desc); } } @@ -402,32 +411,32 @@ mod tests { #[test] fn display_width_works() { assert_eq!("CafΓ© Plain".len(), 11); // β€œΓ©β€ is two bytes - assert_eq!(display_width("CafΓ© Plain"), 10); - assert_eq!(display_width("\u{1b}[31mCafΓ© Rouge\u{1b}[0m"), 10); + assert_eq!(display_width("CafΓ© Plain", 0), 10); + assert_eq!(display_width("\u{1b}[31mCafΓ© Rouge\u{1b}[0m", 0), 10); } #[test] fn display_width_narrow_emojis() { #[cfg(feature = "unicode-width")] - assert_eq!(display_width("⁉"), 1); + assert_eq!(display_width("⁉", 0), 1); // The ⁉ character is above DOUBLE_WIDTH_CUTOFF. #[cfg(not(feature = "unicode-width"))] - assert_eq!(display_width("⁉"), 2); + assert_eq!(display_width("⁉", 0), 2); } #[test] fn display_width_narrow_emojis_variant_selector() { #[cfg(feature = "unicode-width")] - assert_eq!(display_width("⁉\u{fe0f}"), 1); + assert_eq!(display_width("⁉\u{fe0f}", 0), 1); // The variant selector-16 is also counted. #[cfg(not(feature = "unicode-width"))] - assert_eq!(display_width("⁉\u{fe0f}"), 4); + assert_eq!(display_width("⁉\u{fe0f}", 0), 4); } #[test] fn display_width_emojis() { - assert_eq!(display_width("πŸ˜‚πŸ˜­πŸ₯ΊπŸ€£βœ¨πŸ˜πŸ™πŸ₯°πŸ˜ŠπŸ”₯"), 20); + assert_eq!(display_width("πŸ˜‚πŸ˜­πŸ₯ΊπŸ€£βœ¨πŸ˜πŸ™πŸ₯°πŸ˜ŠπŸ”₯", 0), 20); } } diff --git a/src/fill.rs b/src/fill.rs index fbcaab9e..1f183f96 100644 --- a/src/fill.rs +++ b/src/fill.rs @@ -39,8 +39,12 @@ where { let options = width_or_options.into(); - if text.len() < options.width && !text.contains('\n') && options.initial_indent.is_empty() { - String::from(text.trim_end_matches(' ')) + if text.len() < options.width + && options.tab_width <= 1 + && !text.contains('\n') + && options.initial_indent.is_empty() + { + String::from(text.trim_end_matches(&[' ', '\t'])) } else { fill_slow_path(text, options) } @@ -117,7 +121,11 @@ pub(crate) fn fill_slow_path(text: &str, options: Options<'_>) -> String { /// [`fill()`]. Please see the [`linear` /// benchmark](https://github.com/mgeisler/textwrap/blob/master/benchmarks/linear.rs) /// for details. -pub fn fill_inplace(text: &mut String, width: usize) { +pub fn fill_inplace<'a, Opt>(text: &mut String, width_or_options: Opt) +where + Opt: Into>, +{ + let options: Options = width_or_options.into(); let mut indices = Vec::new(); let mut offset = 0; @@ -125,7 +133,8 @@ pub fn fill_inplace(text: &mut String, width: usize) { let words = WordSeparator::AsciiSpace .find_words(line) .collect::>(); - let wrapped_words = wrap_algorithms::wrap_first_fit(&words, &[width as f64]); + let wrapped_words = + wrap_algorithms::wrap_first_fit(&words, &[options.width as f64], options.tab_width); let mut line_offset = offset; for words in &wrapped_words[..wrapped_words.len() - 1] { @@ -210,6 +219,21 @@ mod tests { ); } + #[test] + fn fill_tabs() { + let options = Options::new(10).tab_width(4); + assert_eq!( + fill("Hello\t there\t friends", options), + "Hello\nthere\nfriends" + ); + } + + #[test] + fn fill_tabs_prevent_fast_path() { + let options = Options::new(10).tab_width(4); + assert_eq!(fill("Hey\tworld", options), "Hey\nworld"); + } + #[test] fn break_words_line_breaks() { assert_eq!(fill("ab\ncdefghijkl", 5), "ab\ncdefg\nhijkl"); @@ -295,4 +319,24 @@ mod tests { fill_inplace(&mut text, 10); assert_eq!(text, "foo bar \nbaz"); } + + #[test] + fn fill_inplace_only_tabs() { + let mut text = String::from("Hello\tWorld"); + fill_inplace(&mut text, 8); + + // fill_inplace shouldn't change the text, since it uses + // the ASCII space as the word separator. + assert_eq!(text, "Hello\tWorld"); + } + + #[test] + fn fill_inplace_tabs() { + let options = Options::new(10).tab_width(4); + let mut text = String::from("Hello\t there\t friends"); + + fill_inplace(&mut text, options); + + assert_eq!(text, "Hello\t\nthere\t\nfriends"); + } } diff --git a/src/options.rs b/src/options.rs index 2336816c..96e964da 100644 --- a/src/options.rs +++ b/src/options.rs @@ -8,6 +8,8 @@ use crate::{LineEnding, WordSeparator, WordSplitter, WrapAlgorithm}; pub struct Options<'a> { /// The width in columns at which the text will be wrapped. pub width: usize, + /// How wide a tab character should be considered to be. + pub tab_width: u8, /// Line ending used for breaking lines. pub line_ending: LineEnding, /// Indentation used for the first line of output. See the @@ -36,6 +38,7 @@ impl<'a> From<&'a Options<'a>> for Options<'a> { fn from(options: &'a Options<'a>) -> Self { Self { width: options.width, + tab_width: options.tab_width, line_ending: options.line_ending, initial_indent: options.initial_indent, subsequent_indent: options.subsequent_indent, @@ -86,6 +89,7 @@ impl<'a> Options<'a> { pub const fn new(width: usize) -> Self { Options { width, + tab_width: 0, line_ending: LineEnding::LF, initial_indent: "", subsequent_indent: "", @@ -118,6 +122,26 @@ impl<'a> Options<'a> { } } + /// Change [`self.tab_width`]. The tab width is how wide + /// a tab character should be considered. By default, a tab + /// character to be consiered to have a width of 0. + /// + /// # Examples + /// + /// ``` + /// use textwrap::{Options, wrap}; + /// + /// assert_eq!(wrap("foo\tbar baz", Options::new(12).tab_width(0)), + /// vec!["foo\tbar baz"]); + /// assert_eq!(wrap("foo\tbar baz", Options::new(12).tab_width(4)), + /// vec!["foo\tbar", "baz"]); + /// ``` + /// + /// [`self.tab_width`]: #structfield.tab_width + pub fn tab_width(self, tab_width: u8) -> Self { + Options { tab_width, ..self } + } + /// Change [`self.initial_indent`]. The initial indentation is /// used on the very first line of output. /// @@ -217,6 +241,7 @@ impl<'a> Options<'a> { pub fn word_separator(self, word_separator: WordSeparator) -> Options<'a> { Options { width: self.width, + tab_width: self.tab_width, line_ending: self.line_ending, initial_indent: self.initial_indent, subsequent_indent: self.subsequent_indent, @@ -235,6 +260,7 @@ impl<'a> Options<'a> { pub fn wrap_algorithm(self, wrap_algorithm: WrapAlgorithm) -> Options<'a> { Options { width: self.width, + tab_width: self.tab_width, line_ending: self.line_ending, initial_indent: self.initial_indent, subsequent_indent: self.subsequent_indent, @@ -279,6 +305,7 @@ impl<'a> Options<'a> { pub fn word_splitter(self, word_splitter: WordSplitter) -> Options<'a> { Options { width: self.width, + tab_width: self.tab_width, line_ending: self.line_ending, initial_indent: self.initial_indent, subsequent_indent: self.subsequent_indent, diff --git a/src/refill.rs b/src/refill.rs index ae9448bd..14ad64d3 100644 --- a/src/refill.rs +++ b/src/refill.rs @@ -60,11 +60,11 @@ use crate::{fill, LineEnding, Options}; /// assert_eq!(options.line_ending, LineEnding::LF); /// ``` pub fn unfill(text: &str) -> (String, Options<'_>) { - let prefix_chars: &[_] = &[' ', '-', '+', '*', '>', '#', '/']; + let prefix_chars: &[_] = &[' ', '\t', '-', '+', '*', '>', '#', '/']; let mut options = Options::new(0); for (idx, line) in text.lines().enumerate() { - options.width = std::cmp::max(options.width, display_width(line)); + options.width = std::cmp::max(options.width, display_width(line, options.tab_width)); let without_prefix = line.trim_start_matches(prefix_chars); let prefix = &line[..line.len() - without_prefix.len()]; diff --git a/src/word_splitters.rs b/src/word_splitters.rs index 69e246f0..b4e4bedf 100644 --- a/src/word_splitters.rs +++ b/src/word_splitters.rs @@ -181,6 +181,7 @@ impl WordSplitter { pub fn split_words<'a, I>( words: I, word_splitter: &'a WordSplitter, + tab_width: u8, ) -> impl Iterator> where I: IntoIterator>, @@ -193,7 +194,7 @@ where let need_hyphen = !word[..idx].ends_with('-'); let w = Word { word: &word.word[prev..idx], - width: display_width(&word[prev..idx]), + width: display_width(&word[prev..idx], tab_width), whitespace: "", penalty: if need_hyphen { "-" } else { "" }, }; @@ -204,9 +205,8 @@ where if prev < word.word.len() || prev == 0 { let w = Word { word: &word.word[prev..], - width: display_width(&word[prev..]), - whitespace: word.whitespace, - penalty: word.penalty, + width: display_width(&word[prev..], tab_width), + ..word }; prev = word.word.len() + 1; return Some(w); @@ -230,13 +230,16 @@ mod tests { #[test] fn split_words_no_words() { - assert_iter_eq!(split_words(vec![], &WordSplitter::HyphenSplitter), vec![]); + assert_iter_eq!( + split_words(vec![], &WordSplitter::HyphenSplitter, 0), + vec![] + ); } #[test] fn split_words_empty_word() { assert_iter_eq!( - split_words(vec![Word::from(" ")], &WordSplitter::HyphenSplitter), + split_words(vec![Word::from(" ")], &WordSplitter::HyphenSplitter, 0), vec![Word::from(" ")] ); } @@ -244,7 +247,7 @@ mod tests { #[test] fn split_words_single_word() { assert_iter_eq!( - split_words(vec![Word::from("foobar")], &WordSplitter::HyphenSplitter), + split_words(vec![Word::from("foobar")], &WordSplitter::HyphenSplitter, 0), vec![Word::from("foobar")] ); } @@ -252,7 +255,11 @@ mod tests { #[test] fn split_words_hyphen_splitter() { assert_iter_eq!( - split_words(vec![Word::from("foo-bar")], &WordSplitter::HyphenSplitter), + split_words( + vec![Word::from("foo-bar")], + &WordSplitter::HyphenSplitter, + 0 + ), vec![Word::from("foo-"), Word::from("bar")] ); } @@ -260,7 +267,7 @@ mod tests { #[test] fn split_words_no_hyphenation() { assert_iter_eq!( - split_words(vec![Word::from("foo-bar")], &WordSplitter::NoHyphenation), + split_words(vec![Word::from("foo-bar")], &WordSplitter::NoHyphenation, 0), vec![Word::from("foo-bar")] ); } @@ -272,7 +279,8 @@ mod tests { assert_iter_eq!( split_words( vec![Word::from("foobar")].into_iter(), - &WordSplitter::Custom(fixed_split_point) + &WordSplitter::Custom(fixed_split_point), + 0, ), vec![ Word { @@ -293,7 +301,8 @@ mod tests { assert_iter_eq!( split_words( vec![Word::from("fo-bar")].into_iter(), - &WordSplitter::Custom(fixed_split_point) + &WordSplitter::Custom(fixed_split_point), + 0 ), vec![ Word { diff --git a/src/wrap.rs b/src/wrap.rs index a7f2ccf2..dbd35cdb 100644 --- a/src/wrap.rs +++ b/src/wrap.rs @@ -202,8 +202,8 @@ pub(crate) fn wrap_single_line<'a>( } else { options.subsequent_indent }; - if line.len() < options.width && indent.is_empty() { - lines.push(Cow::from(line.trim_end_matches(' '))); + if line.len() < options.width && options.tab_width <= 1 && indent.is_empty() { + lines.push(Cow::from(line.trim_end_matches(&[' ', '\t']))); } else { wrap_single_line_slow_path(line, options, lines) } @@ -217,18 +217,20 @@ pub(crate) fn wrap_single_line_slow_path<'a>( options: &Options<'_>, lines: &mut Vec>, ) { + let line = line.trim_end_matches(&[' ', '\t']); + let initial_width = options .width - .saturating_sub(display_width(options.initial_indent)); + .saturating_sub(display_width(options.initial_indent, options.tab_width)); let subsequent_width = options .width - .saturating_sub(display_width(options.subsequent_indent)); + .saturating_sub(display_width(options.subsequent_indent, options.tab_width)); let line_widths = [initial_width, subsequent_width]; let words = options.word_separator.find_words(line); - let split_words = split_words(words, &options.word_splitter); + let split_words = split_words(words, &options.word_splitter, options.tab_width); let broken_words = if options.break_words { - let mut broken_words = break_words(split_words, line_widths[1]); + let mut broken_words = break_words(split_words, line_widths[1], options.tab_width); if !options.initial_indent.is_empty() { // Without this, the first word will always go into the // first line. However, since we break words based on the @@ -242,7 +244,9 @@ pub(crate) fn wrap_single_line_slow_path<'a>( split_words.collect::>() }; - let wrapped_words = options.wrap_algorithm.wrap(&broken_words, &line_widths); + let wrapped_words = options + .wrap_algorithm + .wrap(&broken_words, &line_widths, options.tab_width); let mut idx = 0; for words in wrapped_words { diff --git a/src/wrap_algorithms.rs b/src/wrap_algorithms.rs index 1e5f08a1..565ddde5 100644 --- a/src/wrap_algorithms.rs +++ b/src/wrap_algorithms.rs @@ -168,6 +168,7 @@ impl WrapAlgorithm { &self, words: &'b [Word<'a>], line_widths: &'b [usize], + tab_width: u8, ) -> Vec<&'b [Word<'a>]> { // Every integer up to 2u64.pow(f64::MANTISSA_DIGITS) = 2**53 // = 9_007_199_254_740_992 can be represented without loss by @@ -176,13 +177,13 @@ impl WrapAlgorithm { let f64_line_widths = line_widths.iter().map(|w| *w as f64).collect::>(); match self { - WrapAlgorithm::FirstFit => wrap_first_fit(words, &f64_line_widths), + WrapAlgorithm::FirstFit => wrap_first_fit(words, &f64_line_widths, tab_width), #[cfg(feature = "smawk")] WrapAlgorithm::OptimalFit(penalties) => { // The computation cannnot overflow when the line // widths are restricted to usize. - wrap_optimal_fit(words, &f64_line_widths, penalties).unwrap() + wrap_optimal_fit(words, &f64_line_widths, tab_width, penalties).unwrap() } WrapAlgorithm::Custom(func) => func(words, line_widths), @@ -231,7 +232,7 @@ impl Default for WrapAlgorithm { /// /// let text = "These few words will unfortunately not wrap nicely."; /// let words = WordSeparator::AsciiSpace.find_words(text).collect::>(); -/// assert_eq!(lines_to_strings(wrap_first_fit(&words, &[15.0])), +/// assert_eq!(lines_to_strings(wrap_first_fit(&words, &[15.0], 0)), /// vec!["These few words", /// "will", // <-- short line /// "unfortunately", @@ -242,7 +243,7 @@ impl Default for WrapAlgorithm { /// #[cfg(feature = "smawk")] /// use textwrap::wrap_algorithms::{wrap_optimal_fit, Penalties}; /// #[cfg(feature = "smawk")] -/// assert_eq!(lines_to_strings(wrap_optimal_fit(&words, &[15.0], &Penalties::new()).unwrap()), +/// assert_eq!(lines_to_strings(wrap_optimal_fit(&words, &[15.0], 0, &Penalties::new()).unwrap()), /// vec!["These few", /// "words will", /// "unfortunately", @@ -284,7 +285,7 @@ impl Default for WrapAlgorithm { /// /// impl Fragment for Task<'_> { /// fn width(&self) -> f64 { self.hours } -/// fn whitespace_width(&self) -> f64 { self.sweep } +/// fn whitespace_width(&self, tab_width: u8) -> f64 { self.sweep } /// fn penalty_width(&self) -> f64 { self.cleanup } /// } /// @@ -308,7 +309,7 @@ impl Default for WrapAlgorithm { /// let mut days = Vec::new(); /// // Assign tasks to days. The assignment is a vector of slices, /// // with a slice per day. -/// let assigned_days: Vec<&[Task<'a>]> = wrap_first_fit(&tasks, &[day_length]); +/// let assigned_days: Vec<&[Task<'a>]> = wrap_first_fit(&tasks, &[day_length], 0); /// for day in assigned_days.iter() { /// let last = day.last().unwrap(); /// let work_hours: f64 = day.iter().map(|t| t.hours + t.sweep).sum(); @@ -347,6 +348,7 @@ impl Default for WrapAlgorithm { pub fn wrap_first_fit<'a, T: Fragment>( fragments: &'a [T], line_widths: &[f64], + tab_width: u8, ) -> Vec<&'a [T]> { // The final line width is used for all remaining lines. let default_line_width = line_widths.last().copied().unwrap_or(0.0); @@ -364,7 +366,7 @@ pub fn wrap_first_fit<'a, T: Fragment>( start = idx; width = 0.0; } - width += fragment.width() + fragment.whitespace_width(); + width += fragment.width() + fragment.whitespace_width(tab_width); } lines.push(&fragments[start..]); lines @@ -380,7 +382,7 @@ mod tests { #[rustfmt::skip] impl Fragment for Word { fn width(&self) -> f64 { self.0 } - fn whitespace_width(&self) -> f64 { 1.0 } + fn whitespace_width(&self, _: u8) -> f64 { 1.0 } fn penalty_width(&self) -> f64 { 0.0 } } @@ -397,7 +399,7 @@ mod tests { // Wrap at just under f64::MAX (~19e307). The tiny // whitespace_widths disappear because of loss of precision. assert_eq!( - wrap_first_fit(&words, &[15e307]), + wrap_first_fit(&words, &[15e307], 0), &[ vec![ Word(1e307), diff --git a/src/wrap_algorithms/optimal_fit.rs b/src/wrap_algorithms/optimal_fit.rs index bdc03345..5f128080 100644 --- a/src/wrap_algorithms/optimal_fit.rs +++ b/src/wrap_algorithms/optimal_fit.rs @@ -44,20 +44,20 @@ pub struct Penalties { /// let penalties = Penalties::new(); /// /// // Perfect fit, both words are on a single line with no overflow. - /// let wrapped = wrap_optimal_fit(&fragments, &[length], &penalties).unwrap(); + /// let wrapped = wrap_optimal_fit(&fragments, &[length], 0, &penalties).unwrap(); /// assert_eq!(wrapped, vec![&[Word::from(short), Word::from(&long)]]); /// /// // The words no longer fit, yet we get a single line back. While /// // the cost of overflow (`1 * 2500`) is the same as the cost of the /// // gap (`50 * 50 = 2500`), the tie is broken by `nline_penalty` /// // which makes it cheaper to overflow than to use two lines. - /// let wrapped = wrap_optimal_fit(&fragments, &[length - 1.0], &penalties).unwrap(); + /// let wrapped = wrap_optimal_fit(&fragments, &[length - 1.0], 0, &penalties).unwrap(); /// assert_eq!(wrapped, vec![&[Word::from(short), Word::from(&long)]]); /// /// // The cost of overflow would be 2 * 2500, whereas the cost of /// // the gap is only `49 * 49 + nline_penalty = 2401 + 1000 = /// // 3401`. We therefore get two lines. - /// let wrapped = wrap_optimal_fit(&fragments, &[length - 2.0], &penalties).unwrap(); + /// let wrapped = wrap_optimal_fit(&fragments, &[length - 2.0], 0, &penalties).unwrap(); /// assert_eq!(wrapped, vec![&[Word::from(short)], /// &[Word::from(&long)]]); /// ``` @@ -283,13 +283,13 @@ impl std::error::Error for OverflowError {} /// /// impl Fragment for Word { /// fn width(&self) -> f64 { self.0 } -/// fn whitespace_width(&self) -> f64 { 1.0 } +/// fn whitespace_width(&self, tab_width: u8) -> f64 { 1.0 } /// fn penalty_width(&self) -> f64 { 0.0 } /// } /// /// // Wrapping overflows because 1e155 * 1e155 = 1e310, which is /// // larger than f64::MAX: -/// assert_eq!(wrap_optimal_fit(&[Word(0.0), Word(0.0)], &[1e155], &Penalties::default()), +/// assert_eq!(wrap_optimal_fit(&[Word(0.0), Word(0.0)], &[1e155], 0, &Penalties::default()), /// Err(OverflowError)); /// ``` /// @@ -302,6 +302,7 @@ impl std::error::Error for OverflowError {} pub fn wrap_optimal_fit<'a, 'b, T: Fragment>( fragments: &'a [T], line_widths: &'b [f64], + tab_width: u8, penalties: &'b Penalties, ) -> Result, OverflowError> { // The final line width is used for all remaining lines. @@ -310,7 +311,7 @@ pub fn wrap_optimal_fit<'a, 'b, T: Fragment>( let mut width = 0.0; widths.push(width); for fragment in fragments { - width += fragment.width() + fragment.whitespace_width(); + width += fragment.width() + fragment.whitespace_width(tab_width); widths.push(width); } @@ -328,7 +329,7 @@ pub fn wrap_optimal_fit<'a, 'b, T: Fragment>( // Compute the width of a line spanning fragments[i..j] in // constant time. We need to adjust widths[j] by subtracting // the whitespace of fragment[j-1] and then add the penalty. - let line_width = widths[j] - widths[i] - fragments[j - 1].whitespace_width() + let line_width = widths[j] - widths[i] - fragments[j - 1].whitespace_width(tab_width) + fragments[j - 1].penalty_width(); // We compute cost of the line containing fragments[i..j]. We @@ -398,7 +399,7 @@ mod tests { #[rustfmt::skip] impl Fragment for Word { fn width(&self) -> f64 { self.0 } - fn whitespace_width(&self) -> f64 { 1.0 } + fn whitespace_width(&self, _: u8) -> f64 { 1.0 } fn penalty_width(&self) -> f64 { 0.0 } } @@ -406,7 +407,7 @@ mod tests { fn wrap_fragments_with_infinite_widths() { let words = vec![Word(f64::INFINITY)]; assert_eq!( - wrap_optimal_fit(&words, &[0.0], &Penalties::default()), + wrap_optimal_fit(&words, &[0.0], 0, &Penalties::default()), Err(OverflowError) ); } @@ -415,7 +416,7 @@ mod tests { fn wrap_fragments_with_huge_widths() { let words = vec![Word(1e200), Word(1e250), Word(1e300)]; assert_eq!( - wrap_optimal_fit(&words, &[1e300], &Penalties::default()), + wrap_optimal_fit(&words, &[1e300], 0, &Penalties::default()), Err(OverflowError) ); } @@ -426,7 +427,7 @@ mod tests { // makes the `gap * gap` cost fit comfortably in a f64. let words = vec![Word(1e25), Word(1e50), Word(1e75)]; assert_eq!( - wrap_optimal_fit(&words, &[1e100], &Penalties::default()), + wrap_optimal_fit(&words, &[1e100], 0, &Penalties::default()), Ok(vec![&vec![Word(1e25), Word(1e50), Word(1e75)][..]]) ); }