Skip to content

Commit

Permalink
Pass tab_width to functions instead of storing it in Word
Browse files Browse the repository at this point in the history
  • Loading branch information
mtoohey31 committed Oct 30, 2022
1 parent 489f3b0 commit c6b9048
Show file tree
Hide file tree
Showing 6 changed files with 59 additions and 67 deletions.
41 changes: 17 additions & 24 deletions src/core.rs
Original file line number Diff line number Diff line change
Expand Up @@ -206,7 +206,7 @@ pub trait Fragment: std::fmt::Debug {

/// Displayed width of the whitespace that must follow the word
/// when the word is not at the end of a line.
fn whitespace_width(&self) -> f64;
fn whitespace_width(&self, tab_width: u8) -> f64;

/// Displayed width of the penalty that must be inserted if the
/// word falls at the end of a line.
Expand All @@ -227,8 +227,6 @@ pub struct Word<'a> {
pub penalty: &'a str,
// Cached width in columns.
pub(crate) width: usize,
// Width of a tab character within this word.
pub(crate) tab_width: u8,
}

impl std::ops::Deref for Word<'_> {
Expand All @@ -245,22 +243,14 @@ impl<'a> Word<'a> {
/// All trailing whitespace is automatically taken to be the whitespace part
/// of the word.
pub fn from(word: &str) -> Word<'_> {
Self::with_tab_width(word, 0)
}

/// Construct a `Word` from a string, with a customizable tab width
/// to calculate the total width with.
///
/// All trailing whitespace is automatically removed to be the
/// whitespace part of the word.
pub fn with_tab_width(word: &str, tab_width: u8) -> Word<'_> {
let trimmed = word.trim_end();
Word {
word: trimmed,
width: display_width(trimmed, tab_width),
// trimmed shouldn't contain whitespace, so we don't need to pass
// an accurate tab_width.
width: display_width(trimmed, 0),
whitespace: &word[trimmed.len()..],
penalty: "",
tab_width,
}
}

Expand All @@ -273,11 +263,15 @@ impl<'a> Word<'a> {
/// ```
/// use textwrap::core::Word;
/// assert_eq!(
/// Word::from("Hello! ").break_apart(3).collect::<Vec<_>>(),
/// Word::from("Hello! ").break_apart(3, 0).collect::<Vec<_>>(),
/// vec![Word::from("Hel"), Word::from("lo! ")]
/// );
/// ```
pub fn break_apart<'b>(&'b self, line_width: usize) -> impl Iterator<Item = Word<'a>> + 'b {
pub fn break_apart<'b>(
&'b self,
line_width: usize,
tab_width: u8,
) -> impl Iterator<Item = Word<'a>> + 'b {
let mut char_indices = self.word.char_indices();
let mut offset = 0;
let mut width = 0;
Expand All @@ -288,20 +282,19 @@ impl<'a> Word<'a> {
continue;
}

if width > 0 && width + ch_width(ch, self.tab_width) > line_width {
if width > 0 && width + ch_width(ch, tab_width) > line_width {
let word = Word {
word: &self.word[offset..idx],
whitespace: "",
penalty: "",
tab_width: self.tab_width,
width,
};
offset = idx;
width = ch_width(ch, self.tab_width);
width = ch_width(ch, tab_width);
return Some(word);
}

width += ch_width(ch, self.tab_width);
width += ch_width(ch, tab_width);
}

if offset < self.word.len() {
Expand All @@ -326,8 +319,8 @@ impl Fragment for Word<'_> {
}

#[inline]
fn whitespace_width(&self) -> f64 {
display_width(self.whitespace, self.tab_width) as f64
fn whitespace_width(&self, tab_width: u8) -> f64 {
display_width(self.whitespace, tab_width) as f64
}

// We assume the penalty is `""` or `"-"`. This allows us to
Expand All @@ -343,14 +336,14 @@ impl Fragment for Word<'_> {
/// This simply calls [`Word::break_apart`] on words that are too
/// wide. This means that no extra `'-'` is inserted, the word is
/// simply broken into smaller pieces.
pub fn break_words<'a, I>(words: I, line_width: usize) -> Vec<Word<'a>>
pub fn break_words<'a, I>(words: I, line_width: usize, tab_width: u8) -> Vec<Word<'a>>
where
I: IntoIterator<Item = Word<'a>>,
{
let mut shortened_words = Vec::new();
for word in words {
if word.width() > line_width as f64 {
shortened_words.extend(word.break_apart(line_width));
shortened_words.extend(word.break_apart(line_width, tab_width));
} else {
shortened_words.push(word);
}
Expand Down
7 changes: 2 additions & 5 deletions src/fill.rs
Original file line number Diff line number Diff line change
Expand Up @@ -128,12 +128,9 @@ where
for line in text.split('\n') {
let words = WordSeparator::AsciiSpace
.find_words(line)
.map(|mut w| {
w.tab_width = options.tab_width;
w
})
.collect::<Vec<_>>();
let wrapped_words = wrap_algorithms::wrap_first_fit(&words, &[options.width as f64]);
let wrapped_words =
wrap_algorithms::wrap_first_fit(&words, &[options.width as f64], options.tab_width);

let mut line_offset = offset;
for words in &wrapped_words[..wrapped_words.len() - 1] {
Expand Down
22 changes: 11 additions & 11 deletions src/word_splitters.rs
Original file line number Diff line number Diff line change
Expand Up @@ -193,10 +193,12 @@ where
let need_hyphen = !word[..idx].ends_with('-');
let w = Word {
word: &word.word[prev..idx],
width: display_width(&word[prev..idx], word.tab_width),
// word[prev..idx] is a subset of the original word.word,
// which is trimmed of whitespace in Word::from, so we

This comment has been minimized.

Copy link
@mgeisler

mgeisler Nov 1, 2022

Owner

Ah nice! I had to read it twice to get it: can you instead say something like

subset of the word.word, which is guaranteed to not contain any TABs

I think that's the crucial point which allows us to skip looking for tabs.

This comment has been minimized.

Copy link
@mtoohey31

mtoohey31 Nov 2, 2022

Author

Actually, now that I'm looking at this again, I think this may be incorrect. If we're separating using WordSeparator::AsciiSpace, a word might actually contain a tab inside it, right? And if we're using WordSeparator::Custom, we have no guarantees at all.

I've switched things so that an accurate tab_width value gets passed in 603f4cf, but if it turns out that there's a reason I was correct before and am wrong now, let me know and I can revert that.

This comment has been minimized.

Copy link
@mgeisler

mgeisler Nov 6, 2022

Owner

Actually, now that I'm looking at this again, I think this may be incorrect. If we're separating using WordSeparator::AsciiSpace, a word might actually contain a tab inside it, right?

If we only split on ' ', then yeah for sure. However, I think we discussed somewhere that we should now also split on TAB characters. I suppose we should also change the name of the enum variant then...

Let's discuss more on the fresh #490 PR 😃

// don't need an accurate tab_width.
width: display_width(&word[prev..idx], 0),
whitespace: "",
penalty: if need_hyphen { "-" } else { "" },
tab_width: word.tab_width,
};
prev = idx;
return Some(w);
Expand All @@ -205,7 +207,9 @@ where
if prev < word.word.len() || prev == 0 {
let w = Word {
word: &word.word[prev..],
width: display_width(&word[prev..], word.tab_width),
// see comment by display_width call above regarding
// tab_width.
width: display_width(&word[prev..], 0),
..word
};
prev = word.word.len() + 1;
Expand Down Expand Up @@ -279,15 +283,13 @@ mod tests {
word: "foo",
width: 3,
whitespace: "",
penalty: "-",
tab_width: 0
penalty: "-"
},
Word {
word: "bar",
width: 3,
whitespace: "",
penalty: "",
tab_width: 0
penalty: ""
}
]
);
Expand All @@ -302,15 +304,13 @@ mod tests {
word: "fo-",
width: 3,
whitespace: "",
penalty: "",
tab_width: 0
penalty: ""
},
Word {
word: "bar",
width: 3,
whitespace: "",
penalty: "",
tab_width: 0
penalty: ""
}
]
);
Expand Down
13 changes: 6 additions & 7 deletions src/wrap.rs
Original file line number Diff line number Diff line change
Expand Up @@ -225,27 +225,26 @@ pub(crate) fn wrap_single_line_slow_path<'a>(
.saturating_sub(display_width(options.subsequent_indent, options.tab_width));
let line_widths = [initial_width, subsequent_width];

let words = options.word_separator.find_words(line).map(|mut w| {
w.tab_width = options.tab_width;
w
});
let words = options.word_separator.find_words(line);
let split_words = split_words(words, &options.word_splitter);
let broken_words = if options.break_words {
let mut broken_words = break_words(split_words, line_widths[1]);
let mut broken_words = break_words(split_words, line_widths[1], options.tab_width);
if !options.initial_indent.is_empty() {
// Without this, the first word will always go into the
// first line. However, since we break words based on the
// _second_ line width, it can be wrong to unconditionally
// put the first word onto the first line. An empty
// zero-width word fixed this.
broken_words.insert(0, Word::with_tab_width("", options.tab_width));
broken_words.insert(0, Word::from(""));
}
broken_words
} else {
split_words.collect::<Vec<_>>()
};

let wrapped_words = options.wrap_algorithm.wrap(&broken_words, &line_widths);
let wrapped_words = options
.wrap_algorithm
.wrap(&broken_words, &line_widths, options.tab_width);

let mut idx = 0;
for words in wrapped_words {
Expand Down
20 changes: 11 additions & 9 deletions src/wrap_algorithms.rs
Original file line number Diff line number Diff line change
Expand Up @@ -168,6 +168,7 @@ impl WrapAlgorithm {
&self,
words: &'b [Word<'a>],
line_widths: &'b [usize],
tab_width: u8,
) -> Vec<&'b [Word<'a>]> {
// Every integer up to 2u64.pow(f64::MANTISSA_DIGITS) = 2**53
// = 9_007_199_254_740_992 can be represented without loss by
Expand All @@ -176,13 +177,13 @@ impl WrapAlgorithm {
let f64_line_widths = line_widths.iter().map(|w| *w as f64).collect::<Vec<_>>();

match self {
WrapAlgorithm::FirstFit => wrap_first_fit(words, &f64_line_widths),
WrapAlgorithm::FirstFit => wrap_first_fit(words, &f64_line_widths, tab_width),

#[cfg(feature = "smawk")]
WrapAlgorithm::OptimalFit(penalties) => {
// The computation cannnot overflow when the line
// widths are restricted to usize.
wrap_optimal_fit(words, &f64_line_widths, penalties).unwrap()
wrap_optimal_fit(words, &f64_line_widths, tab_width, penalties).unwrap()
}

WrapAlgorithm::Custom(func) => func(words, line_widths),
Expand Down Expand Up @@ -231,7 +232,7 @@ impl Default for WrapAlgorithm {
///
/// let text = "These few words will unfortunately not wrap nicely.";
/// let words = WordSeparator::AsciiSpace.find_words(text).collect::<Vec<_>>();
/// assert_eq!(lines_to_strings(wrap_first_fit(&words, &[15.0])),
/// assert_eq!(lines_to_strings(wrap_first_fit(&words, &[15.0], 0)),
/// vec!["These few words",
/// "will", // <-- short line
/// "unfortunately",
Expand All @@ -242,7 +243,7 @@ impl Default for WrapAlgorithm {
/// #[cfg(feature = "smawk")]
/// use textwrap::wrap_algorithms::{wrap_optimal_fit, Penalties};
/// #[cfg(feature = "smawk")]
/// assert_eq!(lines_to_strings(wrap_optimal_fit(&words, &[15.0], &Penalties::new()).unwrap()),
/// assert_eq!(lines_to_strings(wrap_optimal_fit(&words, &[15.0], 0, &Penalties::new()).unwrap()),
/// vec!["These few",
/// "words will",
/// "unfortunately",
Expand Down Expand Up @@ -284,7 +285,7 @@ impl Default for WrapAlgorithm {
///
/// impl Fragment for Task<'_> {
/// fn width(&self) -> f64 { self.hours }
/// fn whitespace_width(&self) -> f64 { self.sweep }
/// fn whitespace_width(&self, tab_width: u8) -> f64 { self.sweep }
/// fn penalty_width(&self) -> f64 { self.cleanup }
/// }
///
Expand All @@ -308,7 +309,7 @@ impl Default for WrapAlgorithm {
/// let mut days = Vec::new();
/// // Assign tasks to days. The assignment is a vector of slices,
/// // with a slice per day.
/// let assigned_days: Vec<&[Task<'a>]> = wrap_first_fit(&tasks, &[day_length]);
/// let assigned_days: Vec<&[Task<'a>]> = wrap_first_fit(&tasks, &[day_length], 0);
/// for day in assigned_days.iter() {
/// let last = day.last().unwrap();
/// let work_hours: f64 = day.iter().map(|t| t.hours + t.sweep).sum();
Expand Down Expand Up @@ -347,6 +348,7 @@ impl Default for WrapAlgorithm {
pub fn wrap_first_fit<'a, 'b, T: Fragment>(
fragments: &'a [T],
line_widths: &'b [f64],
tab_width: u8,
) -> Vec<&'a [T]> {
// The final line width is used for all remaining lines.
let default_line_width = line_widths.last().copied().unwrap_or(0.0);
Expand All @@ -364,7 +366,7 @@ pub fn wrap_first_fit<'a, 'b, T: Fragment>(
start = idx;
width = 0.0;
}
width += fragment.width() + fragment.whitespace_width();
width += fragment.width() + fragment.whitespace_width(tab_width);
}
lines.push(&fragments[start..]);
lines
Expand All @@ -380,7 +382,7 @@ mod tests {
#[rustfmt::skip]
impl Fragment for Word {
fn width(&self) -> f64 { self.0 }
fn whitespace_width(&self) -> f64 { 1.0 }
fn whitespace_width(&self, _: u8) -> f64 { 1.0 }
fn penalty_width(&self) -> f64 { 0.0 }
}

Expand All @@ -397,7 +399,7 @@ mod tests {
// Wrap at just under f64::MAX (~19e307). The tiny
// whitespace_widths disappear because of loss of precision.
assert_eq!(
wrap_first_fit(&words, &[15e307]),
wrap_first_fit(&words, &[15e307], 0),
&[
vec![
Word(1e307),
Expand Down
Loading

0 comments on commit c6b9048

Please sign in to comment.