Skip to content

Commit

Permalink
Auto-detect line ending in unfill
Browse files Browse the repository at this point in the history
  • Loading branch information
koiuo committed Jun 9, 2022
1 parent 69a5e66 commit 9ab0112
Show file tree
Hide file tree
Showing 2 changed files with 102 additions and 42 deletions.
97 changes: 70 additions & 27 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -623,19 +623,18 @@ where
/// * This is an
/// example of
/// a list item.
/// ", LineEnding::LF);
/// ");
///
/// assert_eq!(text, "This is an example of a list item.\n");
/// assert_eq!(options.initial_indent, "* ");
/// assert_eq!(options.subsequent_indent, " ");
/// ```
pub fn unfill(text: &str, line_ending: LineEnding) -> (String, Options<'_>) {
let line_ending_pat = line_ending.as_str();
let trimmed = text.trim_end_matches(line_ending_pat);
pub fn unfill(text: &str) -> (String, Options<'_>) {
let trimmed = text.trim_end_matches(&['\r', '\n']);
let prefix_chars: &[_] = &[' ', '-', '+', '*', '>', '#', '/'];

let mut options = Options::new(0).line_ending(line_ending);
for (idx, line) in trimmed.split(line_ending_pat).enumerate() {
let mut options = Options::new(0);
for (idx, line) in trimmed.split('\n').enumerate() {
options.width = std::cmp::max(options.width, core::display_width(line));
let without_prefix = line.trim_start_matches(prefix_chars);
let prefix = &line[..line.len() - without_prefix.len()];
Expand All @@ -658,16 +657,24 @@ pub fn unfill(text: &str, line_ending: LineEnding) -> (String, Options<'_>) {
}

let mut unfilled = String::with_capacity(text.len());
for (idx, line) in trimmed.split(line_ending_pat).enumerate() {
if idx == 0 {
let mut detected_line_ending = None;

for (line, ending) in line_ending::NonEmptyLines(text) {
if unfilled.is_empty() {
unfilled.push_str(&line[options.initial_indent.len()..]);
} else {
unfilled.push(' ');
unfilled.push_str(&line[options.subsequent_indent.len()..]);
}
match (detected_line_ending, ending) {
(None, Some(_)) => detected_line_ending = ending,
(Some(LineEnding::CRLF), Some(LineEnding::LF)) => detected_line_ending = ending,
_ => (),
}
}

unfilled.push_str(&text[trimmed.len()..]);

options.line_ending = detected_line_ending.unwrap_or(LineEnding::LF);
(unfilled, options)
}

Expand Down Expand Up @@ -731,7 +738,7 @@ where
{
let mut new_options = new_width_or_options.into();
let trimmed = filled_text.trim_end_matches(new_options.line_ending.as_str());
let (text, options) = unfill(trimmed, new_options.line_ending);
let (text, options) = unfill(trimmed);
new_options.initial_indent = options.initial_indent;
new_options.subsequent_indent = options.subsequent_indent;
let mut refilled = fill(&text, new_options);
Expand Down Expand Up @@ -1319,7 +1326,7 @@ mod tests {
assert_eq!(
wrap(
"Hello, World!",
Options::new(15).word_separator(WordSeparator::UnicodeBreakProperties)
Options::new(15).word_separator(WordSeparator::UnicodeBreakProperties),
),
vec!["Hello, W", "orld!"]
);
Expand Down Expand Up @@ -1721,29 +1728,73 @@ mod tests {

#[test]
fn unfill_simple() {
let (text, options) = unfill("foo\nbar", LineEnding::LF);
let (text, options) = unfill("foo\nbar");
assert_eq!(text, "foo bar");
assert_eq!(options.width, 3);
assert_eq!(options.line_ending, LineEnding::LF);
}

#[test]
fn unfill_no_new_line() {
let (text, options) = unfill("foo bar");
assert_eq!(text, "foo bar");
assert_eq!(options.width, 7);
assert_eq!(options.line_ending, LineEnding::LF);
}

#[test]
fn unfill_simple_crlf() {
let (text, options) = unfill("foo\r\nbar");
assert_eq!(text, "foo bar");
assert_eq!(options.width, 3);
assert_eq!(options.line_ending, LineEnding::CRLF);
}

/// If mixed new line sequence is encountered, we want to fallback to `\n`
/// 1. it is the default
/// 2. it still matches both `\n` and `\r\n` unlike `\r\n` which will not match `\n`
#[test]
fn unfill_mixed_new_lines() {
let (text, options) = unfill("foo\r\nbar\nbaz");
assert_eq!(text, "foo bar baz");
assert_eq!(options.width, 3);
assert_eq!(options.line_ending, LineEnding::LF);
}

#[test]
fn unfill_trailing_newlines() {
let (text, options) = unfill("foo\nbar\n\n\n", LineEnding::LF);
let (text, options) = unfill("foo\nbar\n\n\n");
assert_eq!(text, "foo bar\n\n\n");
assert_eq!(options.width, 3);
}

#[test]
fn unfill_mixed_trailing_newlines() {
let (text, options) = unfill("foo\r\nbar\n\r\n\n");
assert_eq!(text, "foo bar\n\r\n\n");
assert_eq!(options.width, 3);
assert_eq!(options.line_ending, LineEnding::LF);
}

#[test]
fn unfill_trailing_crlf() {
let (text, options) = unfill("foo bar\r\n");
assert_eq!(text, "foo bar\r\n");
assert_eq!(options.width, 7);
assert_eq!(options.line_ending, LineEnding::CRLF);
}

#[test]
fn unfill_initial_indent() {
let (text, options) = unfill(" foo\nbar\nbaz", LineEnding::LF);
let (text, options) = unfill(" foo\nbar\nbaz");
assert_eq!(text, "foo bar baz");
assert_eq!(options.width, 5);
assert_eq!(options.initial_indent, " ");
}

#[test]
fn unfill_differing_indents() {
let (text, options) = unfill(" foo\n bar\n baz", LineEnding::LF);
let (text, options) = unfill(" foo\n bar\n baz");
assert_eq!(text, "foo bar baz");
assert_eq!(options.width, 7);
assert_eq!(options.initial_indent, " ");
Expand All @@ -1752,7 +1803,7 @@ mod tests {

#[test]
fn unfill_list_item() {
let (text, options) = unfill("* foo\n bar\n baz", LineEnding::LF);
let (text, options) = unfill("* foo\n bar\n baz");
assert_eq!(text, "foo bar baz");
assert_eq!(options.width, 5);
assert_eq!(options.initial_indent, "* ");
Expand All @@ -1761,7 +1812,7 @@ mod tests {

#[test]
fn unfill_multiple_char_prefix() {
let (text, options) = unfill(" // foo bar\n // baz\n // quux", LineEnding::LF);
let (text, options) = unfill(" // foo bar\n // baz\n // quux");
assert_eq!(text, "foo bar baz quux");
assert_eq!(options.width, 14);
assert_eq!(options.initial_indent, " // ");
Expand All @@ -1770,7 +1821,7 @@ mod tests {

#[test]
fn unfill_block_quote() {
let (text, options) = unfill("> foo\n> bar\n> baz", LineEnding::LF);
let (text, options) = unfill("> foo\n> bar\n> baz");
assert_eq!(text, "foo bar baz");
assert_eq!(options.width, 5);
assert_eq!(options.initial_indent, "> ");
Expand All @@ -1779,15 +1830,7 @@ mod tests {

#[test]
fn unfill_whitespace() {
assert_eq!(unfill("foo bar", LineEnding::LF).0, "foo bar");
}

#[test]
fn unfill_crlf() {
let (text, options) = unfill("foo\r\nbar", LineEnding::CRLF);
assert_eq!(text, "foo bar");
assert_eq!(options.width, 3);
assert_eq!(options.line_ending, LineEnding::CRLF);
assert_eq!(unfill("foo bar").0, "foo bar");
}

#[test]
Expand Down
47 changes: 32 additions & 15 deletions src/line_ending.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,31 +5,19 @@ use std::str::FromStr;
/// TODO doc
#[derive(Clone, Copy, Debug, PartialEq)]
pub enum LineEnding {
/// TODO
CR,
/// TODO
CRLF,
/// TODO
LF,
}

impl LineEnding {
/// TODO
#[inline]
pub const fn len_chars(&self) -> usize {
match self {
Self::CRLF => 2,
_ => 1,
}
}

/// TODO
#[inline]
pub const fn as_str(&self) -> &'static str {
match self {
Self::CRLF => "\u{000D}\u{000A}",
Self::LF => "\u{000A}",
Self::CR => "\u{000D}",
Self::CRLF => "\r\n",
Self::LF => "\n",
}
}
}
Expand All @@ -43,8 +31,37 @@ impl FromStr for LineEnding {
match s {
"\u{000D}\u{000A}" => Result::Ok(LineEnding::CRLF),
"\u{000A}" => Result::Ok(LineEnding::LF),
"\u{000D}" => Result::Ok(LineEnding::CR),
_ => Result::Err(()),
}
}
}

/// TODO
#[derive(Debug, Clone, Copy)]
pub struct NonEmptyLines<'a>(pub &'a str);

impl<'a> Iterator for NonEmptyLines<'a> {
type Item = (&'a str, Option<LineEnding>);

fn next(&mut self) -> Option<Self::Item> {
while let Some(lf) = self.0.find('\n') {
if lf == 0 || (lf == 1 && self.0.as_bytes()[lf - 1] == b'\r') {
self.0 = &self.0[(lf + 1)..];
continue;
}
let trimmed = match self.0.as_bytes()[lf - 1] {
b'\r' => (&self.0[..(lf - 1)], Some(LineEnding::CRLF)),
_ => (&self.0[..lf], Some(LineEnding::LF)),
};
self.0 = &self.0[(lf + 1)..];
return Some(trimmed);
}
if self.0.len() > 0 {
let result = Some((self.0, None));
self.0 = "";
return result;
} else {
return None;
}
}
}

0 comments on commit 9ab0112

Please sign in to comment.