From 7f096ccd3edc89b8da0f6bd0e76002b37ff0e909 Mon Sep 17 00:00:00 2001 From: Torsten Long Date: Thu, 15 Aug 2024 15:40:01 +0200 Subject: [PATCH 1/7] First functioning version of block quote formatting --- src/features.rs | 4 ++ src/main.rs | 70 ++++++++++++++++++++++---------- src/parse.rs | 105 ++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 157 insertions(+), 22 deletions(-) diff --git a/src/features.rs b/src/features.rs index 66478b3..9f4650b 100644 --- a/src/features.rs +++ b/src/features.rs @@ -23,6 +23,7 @@ use crate::parse::ParseCfg; #[derive(Debug, PartialEq)] pub struct FeatureCfg { pub keep_spaces_in_links: bool, + pub format_block_quotes: bool, pub break_cfg: BreakCfg, pub parse_cfg: ParseCfg, } @@ -31,6 +32,7 @@ impl Default for FeatureCfg { fn default() -> Self { FeatureCfg { keep_spaces_in_links: false, + format_block_quotes: false, parse_cfg: ParseCfg { keep_linebreaks: false, }, @@ -57,6 +59,7 @@ impl std::str::FromStr for FeatureCfg { { match feature { "keep-spaces-in-links" => cfg.keep_spaces_in_links = true, + "format-block-quotes" => cfg.format_block_quotes = true, "keep-linebreaks" => { cfg.parse_cfg.keep_linebreaks = true; cfg.break_cfg.keep_linebreaks = true; @@ -86,6 +89,7 @@ mod test { let default = FeatureCfg::default(); let swapped = FeatureCfg { keep_spaces_in_links: !default.keep_spaces_in_links, + format_block_quotes: !default.format_block_quotes, parse_cfg: ParseCfg { keep_linebreaks: !default.parse_cfg.keep_linebreaks, }, diff --git a/src/main.rs b/src/main.rs index 9b5e23b..ea2aedb 100644 --- a/src/main.rs +++ b/src/main.rs @@ -69,6 +69,48 @@ fn generate_report( } } +struct Processor { + feature_cfg: features::FeatureCfg, + detector: detect::BreakDetector, + max_width: Option, +} + +impl Processor { + fn process(&self, text: String, width_reduction: usize) -> String { + // At first, process all block quotes. + eprintln!("REDUCTION {}", width_reduction); + let text = parse::BlockQuotes::new(&text).map_to_matches(|t| self.process(t, 2)); + // Then process the actual text. + let ends_on_linebreak = text.ends_with('\n'); + let after_space_replace = if self.feature_cfg.keep_spaces_in_links { + log::debug!("not replacing spaces in links by non-breaking spaces"); + text + } else { + log::debug!("replacing spaces in links by non-breaking spaces"); + replace::replace_spaces_in_links_by_nbsp(text) + }; + let parsed = parse::parse_markdown(&after_space_replace, &self.feature_cfg.parse_cfg); + let filled = ranges::fill_markdown_ranges(parsed, &after_space_replace); + let formatted = wrap::add_linebreaks_and_wrap( + filled, + &self + .max_width + .map(|el| el.checked_sub(width_reduction).unwrap_or(el)), + &self.detector, + &after_space_replace, + ); + + // Keep newlines at the end of the file in tact. They disappear sometimes. + let file_end = if !formatted.ends_with('\n') && ends_on_linebreak { + log::debug!("adding missing trailing newline character"); + "\n" + } else { + "" + }; + format!("{}{}", formatted, file_end) + } +} + fn process( document: String, file_dir: &PathBuf, @@ -95,6 +137,11 @@ fn process( log::debug!("limiting line length to {} characters", cfg.max_width); Some(cfg.max_width) }; + let processor = Processor { + feature_cfg, + detector, + max_width, + }; // Actually process the text. let (frontmatter, text) = frontmatter::split_frontmatter(document.clone()); @@ -107,28 +154,7 @@ fn process( text }; - let after_space_replace = if feature_cfg.keep_spaces_in_links { - log::debug!("not replacing spaces in links by non-breaking spaces"); - after_upstream - } else { - log::debug!("replacing spaces in links by non-breaking spaces"); - replace::replace_spaces_in_links_by_nbsp(after_upstream) - }; - - let parsed = parse::parse_markdown(&after_space_replace, &feature_cfg.parse_cfg); - let filled = ranges::fill_markdown_ranges(parsed, &after_space_replace); - let formatted = - wrap::add_linebreaks_and_wrap(filled, &max_width, &detector, &after_space_replace); - - // Keep newlines at the end of the file in tact. They disappear sometimes. - let file_end = if !formatted.ends_with('\n') && document.ends_with('\n') { - log::debug!("adding missing trailing newline character"); - "\n" - } else { - "" - }; - - let processed = format!("{}{}{}", frontmatter, formatted, file_end); + let processed = format!("{}{}", frontmatter, processor.process(after_upstream, 0)); Ok((processed, document)) } diff --git a/src/parse.rs b/src/parse.rs index 2f44492..8c88586 100644 --- a/src/parse.rs +++ b/src/parse.rs @@ -18,6 +18,7 @@ along with this program. If not, see . use core::ops::Range; use pulldown_cmark::{Event, Options, Parser, Tag, TagEnd}; use std::collections::HashMap; +use std::fmt::Write; use crate::detect::WhitespaceDetector; use crate::ignore::IgnoreByHtmlComment; @@ -162,6 +163,110 @@ fn to_be_wrapped( .collect::>() } +#[derive(Debug)] +enum RangeMatch<'a> { + Matches(String), + NoMatch(&'a str), +} + +pub struct BlockQuotes<'a>(Vec>); + +impl<'a> BlockQuotes<'a> { + fn strip_prefix(text: &str) -> String { + text.split_inclusive('\n') + .map(|t| { + t.strip_prefix('>') + .map(|el| el.trim_start_matches(' ')) + .unwrap_or(t) + }) + .collect::() + } + + pub fn new(text: &'a str) -> Self { + let mut level: usize = 0; + // In case we ever need to iterate over other kinds of syntax, the tag as well as the + // function stripping prefixes will have to be adjusted. + let tag = Tag::BlockQuote; + + let mut opts = Options::empty(); + opts.insert(Options::ENABLE_TABLES); + opts.insert(Options::ENABLE_FOOTNOTES); + opts.insert(Options::ENABLE_TASKLISTS); + opts.insert(Options::ENABLE_HEADING_ATTRIBUTES); + opts.insert(Options::ENABLE_SMART_PUNCTUATION); + opts.insert(Options::ENABLE_STRIKETHROUGH); + + let ranges = Parser::new_ext(text, opts) + .into_offset_iter() + .filter_map(|(event, range)| match event { + Event::Start(start) => { + level += 1; + if level == 1 { + Some((start == tag, range)) + } else { + None + } + } + Event::End(_) => { + level -= 1; + None + } + _ => { + if level == 0 { + Some((false, range)) + } else { + None + } + } + }) + // .map(|(matches, range)| { + // if matches { + // RangeMatch::Matches(range) + // } else { + // RangeMatch::NoMatch(range) + // } + // }) + .map(|(matches, range)| { + if matches { + eprintln!("\n\nRANGE {}\n\n", Self::strip_prefix(&text[range.clone()])); + RangeMatch::Matches(Self::strip_prefix(&text[range])) + } else { + RangeMatch::NoMatch(&text[range]) + } + }) + .collect::>(); + + Self(ranges) + } + + pub fn map_to_matches(self, func: MapFn) -> String + where + MapFn: Fn(String) -> String, + { + self.0 + .into_iter() + .map(|el| match el { + RangeMatch::Matches(s) => { + // The "write!" calls should never fail since we write to a String. + let mut result = String::from("\n"); + func(s.to_string()).split_inclusive('\n').for_each(|line| { + let prefix = if line.len() == 1 { "" } else { " " }; + write!(result, ">{}{}", prefix, line) + .expect("building block-quote formated result"); + }); + writeln!(result).expect("building block-quote formated result"); + result + } + // func(s.to_string()) + // .split_inclusive('\n') + // .map(|el| format!("> {}", el)) + // .collect::(), + RangeMatch::NoMatch(s) => s.to_string(), + }) + .collect::() + } +} + /// Check whether there is nothing but whitespace between the end of the previous range and the /// start of the next one, if the ranges do not connect directly anyway. Note that we still keep /// paragraphs separated by keeping ranges separate that are separated by more linebreaks than one. From 867137db0ee1a84f8ef81d94b03ff020a4b83810 Mon Sep 17 00:00:00 2001 From: Torsten Long Date: Thu, 15 Aug 2024 16:34:12 +0200 Subject: [PATCH 2/7] Misc improvements to the formatting of block quotes --- src/main.rs | 4 +++- src/parse.rs | 53 +++++++++++++++++++++++++--------------------------- 2 files changed, 28 insertions(+), 29 deletions(-) diff --git a/src/main.rs b/src/main.rs index ea2aedb..328ef6a 100644 --- a/src/main.rs +++ b/src/main.rs @@ -79,7 +79,9 @@ impl Processor { fn process(&self, text: String, width_reduction: usize) -> String { // At first, process all block quotes. eprintln!("REDUCTION {}", width_reduction); - let text = parse::BlockQuotes::new(&text).map_to_matches(|t| self.process(t, 2)); + let text = parse::BlockQuotes::new(&text).apply_to_matches_and_join(|t| { + self.process(t, width_reduction + parse::BlockQuotes::FULL_PREFIX_LEN) + }); // Then process the actual text. let ends_on_linebreak = text.ends_with('\n'); let after_space_replace = if self.feature_cfg.keep_spaces_in_links { diff --git a/src/parse.rs b/src/parse.rs index 8c88586..3922099 100644 --- a/src/parse.rs +++ b/src/parse.rs @@ -165,23 +165,42 @@ fn to_be_wrapped( #[derive(Debug)] enum RangeMatch<'a> { - Matches(String), + Matches(&'a str), NoMatch(&'a str), } pub struct BlockQuotes<'a>(Vec>); impl<'a> BlockQuotes<'a> { + pub const FULL_PREFIX: &'static str = "> "; + pub const FULL_PREFIX_LEN: usize = Self::FULL_PREFIX.len(); + pub const SHORT_PREFIX: &'static str = ">"; + fn strip_prefix(text: &str) -> String { text.split_inclusive('\n') .map(|t| { - t.strip_prefix('>') - .map(|el| el.trim_start_matches(' ')) + t.strip_prefix(Self::SHORT_PREFIX) + .map(|el| el.strip_prefix(' ').unwrap_or(el)) .unwrap_or(t) }) .collect::() } + fn add_prefix(text: String) -> String { + // The "write!" calls should never fail since we write to a String that we create here. + let mut result = String::from("\n"); + text.split_inclusive('\n').for_each(|line| { + let prefix = if line.len() == 1 { + Self::SHORT_PREFIX + } else { + Self::FULL_PREFIX + }; + write!(result, "{}{}", prefix, line).expect("building block-quote formated result"); + }); + writeln!(result).expect("building block-quote formated result"); + result + } + pub fn new(text: &'a str) -> Self { let mut level: usize = 0; // In case we ever need to iterate over other kinds of syntax, the tag as well as the @@ -219,17 +238,9 @@ impl<'a> BlockQuotes<'a> { } } }) - // .map(|(matches, range)| { - // if matches { - // RangeMatch::Matches(range) - // } else { - // RangeMatch::NoMatch(range) - // } - // }) .map(|(matches, range)| { if matches { - eprintln!("\n\nRANGE {}\n\n", Self::strip_prefix(&text[range.clone()])); - RangeMatch::Matches(Self::strip_prefix(&text[range])) + RangeMatch::Matches(&text[range]) } else { RangeMatch::NoMatch(&text[range]) } @@ -239,29 +250,15 @@ impl<'a> BlockQuotes<'a> { Self(ranges) } - pub fn map_to_matches(self, func: MapFn) -> String + pub fn apply_to_matches_and_join(self, func: MapFn) -> String where MapFn: Fn(String) -> String, { self.0 .into_iter() .map(|el| match el { - RangeMatch::Matches(s) => { - // The "write!" calls should never fail since we write to a String. - let mut result = String::from("\n"); - func(s.to_string()).split_inclusive('\n').for_each(|line| { - let prefix = if line.len() == 1 { "" } else { " " }; - write!(result, ">{}{}", prefix, line) - .expect("building block-quote formated result"); - }); - writeln!(result).expect("building block-quote formated result"); - result - } - // func(s.to_string()) - // .split_inclusive('\n') - // .map(|el| format!("> {}", el)) - // .collect::(), RangeMatch::NoMatch(s) => s.to_string(), + RangeMatch::Matches(s) => Self::add_prefix(func(Self::strip_prefix(s))), }) .collect::() } From 8cd59b6f8de6d624003b199524ab4b2db0a88a2c Mon Sep 17 00:00:00 2001 From: Torsten Long Date: Thu, 15 Aug 2024 16:35:48 +0200 Subject: [PATCH 3/7] Bump version number --- Cargo.lock | 2 +- Cargo.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 7505b50..35437e1 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -337,7 +337,7 @@ checksum = "a7a70ba024b9dc04c27ea2f0c0548feb474ec5c54bba33a7f72f873a39d07b24" [[package]] name = "mdslw" -version = "0.11.1" +version = "0.12.0" dependencies = [ "anyhow", "clap", diff --git a/Cargo.toml b/Cargo.toml index 635bd84..07afa28 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "mdslw" -version = "0.11.1" +version = "0.12.0" edition = "2021" [profile.release] From f5267edb6323ca243dc520cae31ed6ce320a60f4 Mon Sep 17 00:00:00 2001 From: Torsten Long Date: Thu, 15 Aug 2024 17:47:45 +0200 Subject: [PATCH 4/7] Fix missing text or too many line breaks when using block quotes --- src/features.rs | 3 +- src/main.rs | 1 - src/parse.rs | 151 +++++++++++++++++++++++++++++++++++++++++++----- 3 files changed, 137 insertions(+), 18 deletions(-) diff --git a/src/features.rs b/src/features.rs index 9f4650b..7659b1f 100644 --- a/src/features.rs +++ b/src/features.rs @@ -98,7 +98,8 @@ mod test { }, }; - let parsed = "keep-spaces-in-links , keep-linebreaks".parse::()?; + let parsed = + "keep-spaces-in-links , keep-linebreaks ,format-block-quotes".parse::()?; assert_eq!(parsed, swapped); Ok(()) diff --git a/src/main.rs b/src/main.rs index 328ef6a..612ce0a 100644 --- a/src/main.rs +++ b/src/main.rs @@ -78,7 +78,6 @@ struct Processor { impl Processor { fn process(&self, text: String, width_reduction: usize) -> String { // At first, process all block quotes. - eprintln!("REDUCTION {}", width_reduction); let text = parse::BlockQuotes::new(&text).apply_to_matches_and_join(|t| { self.process(t, width_reduction + parse::BlockQuotes::FULL_PREFIX_LEN) }); diff --git a/src/parse.rs b/src/parse.rs index 3922099..7bdc9d3 100644 --- a/src/parse.rs +++ b/src/parse.rs @@ -188,7 +188,7 @@ impl<'a> BlockQuotes<'a> { fn add_prefix(text: String) -> String { // The "write!" calls should never fail since we write to a String that we create here. - let mut result = String::from("\n"); + let mut result = String::new(); text.split_inclusive('\n').for_each(|line| { let prefix = if line.len() == 1 { Self::SHORT_PREFIX @@ -197,7 +197,6 @@ impl<'a> BlockQuotes<'a> { }; write!(result, "{}{}", prefix, line).expect("building block-quote formated result"); }); - writeln!(result).expect("building block-quote formated result"); result } @@ -215,13 +214,21 @@ impl<'a> BlockQuotes<'a> { opts.insert(Options::ENABLE_SMART_PUNCTUATION); opts.insert(Options::ENABLE_STRIKETHROUGH); - let ranges = Parser::new_ext(text, opts) + let mut start = 0; + + let mut ranges = Parser::new_ext(text, opts) .into_offset_iter() .filter_map(|(event, range)| match event { Event::Start(start) => { level += 1; - if level == 1 { - Some((start == tag, range)) + if level == 1 && start == tag { + // Using a CharRange here to prevent the flat_map below from flattening + // all the ranges, since Range supports flattening but our + // CharRange does not. + Some(CharRange { + start: range.start, + end: range.end, + }) } else { None } @@ -230,26 +237,32 @@ impl<'a> BlockQuotes<'a> { level -= 1; None } - _ => { - if level == 0 { - Some((false, range)) - } else { - None - } - } + _ => None, }) - .map(|(matches, range)| { - if matches { - RangeMatch::Matches(&text[range]) + .flat_map(|range| { + let prev_start = start; + let this_start = range.start; + start = range.end; + + let this = RangeMatch::Matches(&text[range]); + if this_start == prev_start { + vec![this] } else { - RangeMatch::NoMatch(&text[range]) + let missing = RangeMatch::NoMatch(&text[prev_start..this_start]); + vec![missing, this] } }) .collect::>(); + if start != text.len() { + ranges.push(RangeMatch::NoMatch(&text[start..text.len()])); + } + Self(ranges) } + /// The argument `func` should keep a line break at the end if its arguments ends in one. In + /// most cases, it ends in a line break. pub fn apply_to_matches_and_join(self, func: MapFn) -> String where MapFn: Fn(String) -> String, @@ -424,4 +437,110 @@ some code assert_eq!(expected, parsed); } + + #[test] + fn applying_to_no_block_quotes_remains_unchanged() { + let text = r#" +## Some Heading + +Some text without block quotes. + + + +- More text. +- More text. + - Even more text. + - Some text with a [link]. + +```code +some code +``` + +[link]: https://something.com "some link" +"#; + + let unchanged = BlockQuotes::new(text).apply_to_matches_and_join(|_| String::new()); + assert_eq!(text.to_string(), unchanged); + } + + #[test] + fn applying_to_block_quotes() { + let text = r#" +## Some Heading + +Some text with block quotes. + +> This first text is block quoted. +> +>> This text is quoted at the second level. +> +> Some more quotes at the first level. + + + +- More text. +- More text. + - Even more text. + - Some text with a [link]. + +> This second text is also block quoted. +> +> > This text is quoted at the second level. +> +> Some more quotes at the first level. + +```code +some code +``` + +[link]: https://something.com "some link" +"#; + + let expected = r#" +## Some Heading + +Some text with block quotes. + +> 115 + + + +- More text. +- More text. + - Even more text. + - Some text with a [link]. + +> 121 + +```code +some code +``` + +[link]: https://something.com "some link" +"#; + + let changed = + BlockQuotes::new(text).apply_to_matches_and_join(|s| format!("{}\n", s.len())); + assert_eq!(expected, changed); + } + + #[test] + fn flattening_vecs_of_char_ranges_retains_ranges() { + let to_be_flattened = vec![ + vec![CharRange { start: 0, end: 10 }], + vec![ + CharRange { + start: 100, + end: 110, + }, + CharRange { + start: 200, + end: 210, + }, + ], + ]; + let flat = to_be_flattened.into_iter().flatten().collect::>(); + let expected = vec![(0..10), (100..110), (200..210)]; + assert_eq!(expected, flat); + } } From a4d1a9bcc74d1b9f1452e9d8fe51216a62b9b30e Mon Sep 17 00:00:00 2001 From: Torsten Long Date: Thu, 15 Aug 2024 17:54:03 +0200 Subject: [PATCH 5/7] Honour feature format-block-quotes --- src/main.rs | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/src/main.rs b/src/main.rs index 612ce0a..0f5e481 100644 --- a/src/main.rs +++ b/src/main.rs @@ -78,9 +78,15 @@ struct Processor { impl Processor { fn process(&self, text: String, width_reduction: usize) -> String { // At first, process all block quotes. - let text = parse::BlockQuotes::new(&text).apply_to_matches_and_join(|t| { - self.process(t, width_reduction + parse::BlockQuotes::FULL_PREFIX_LEN) - }); + let text = if self.feature_cfg.format_block_quotes { + log::debug!("formatting text in block quotes"); + parse::BlockQuotes::new(&text).apply_to_matches_and_join(|t| { + self.process(t, width_reduction + parse::BlockQuotes::FULL_PREFIX_LEN) + }) + } else { + log::debug!("not formatting text in block quotes"); + text + }; // Then process the actual text. let ends_on_linebreak = text.ends_with('\n'); let after_space_replace = if self.feature_cfg.keep_spaces_in_links { From d0b9a2f076c379e6db624a3943914ca879f8d5ea Mon Sep 17 00:00:00 2001 From: Torsten Long Date: Thu, 15 Aug 2024 17:55:12 +0200 Subject: [PATCH 6/7] Document feature of formatting in block quotes --- README.md | 2 ++ src/cfg.rs | 1 + 2 files changed, 3 insertions(+) diff --git a/README.md b/README.md index 009705c..f069caf 100644 --- a/README.md +++ b/README.md @@ -246,6 +246,8 @@ Values are resolved in the following order: Do not replace spaces in link texts by [non-breaking spaces][wiki nbsp]. - `keep-linebreaks`: Do not remove existing linebreaks during the line-wrapping process. + - `format-block-quotes`: + Format text in block quotes. - `--completion `: Output shell completion file for the given shell to stdout and exit. The following shells are supported: diff --git a/src/cfg.rs b/src/cfg.rs index 02849d7..02d4d46 100644 --- a/src/cfg.rs +++ b/src/cfg.rs @@ -220,6 +220,7 @@ pub struct CliArgs { /// {n} * keep-spaces-in-links => do not replace spaces in link texts by non-breaking spaces /// {n} * keep-linebreaks => do not remove existing linebreaks during the line-wrapping /// process + /// {n} * format-block-quotes => format text in block quotes /// {n} . #[arg(long, env = "MDSLW_FEATURES", default_value = "\u{200b}")] pub features: ValueWOrigin, From 45a0806565426c64306a103979ce8f461b057fcb Mon Sep 17 00:00:00 2001 From: Torsten Long Date: Thu, 15 Aug 2024 18:14:55 +0200 Subject: [PATCH 7/7] Reduce indentation --- src/main.rs | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/src/main.rs b/src/main.rs index 0f5e481..407c680 100644 --- a/src/main.rs +++ b/src/main.rs @@ -98,14 +98,11 @@ impl Processor { }; let parsed = parse::parse_markdown(&after_space_replace, &self.feature_cfg.parse_cfg); let filled = ranges::fill_markdown_ranges(parsed, &after_space_replace); - let formatted = wrap::add_linebreaks_and_wrap( - filled, - &self - .max_width - .map(|el| el.checked_sub(width_reduction).unwrap_or(el)), - &self.detector, - &after_space_replace, - ); + let width = &self + .max_width + .map(|el| el.checked_sub(width_reduction).unwrap_or(el)); + let formatted = + wrap::add_linebreaks_and_wrap(filled, width, &self.detector, &after_space_replace); // Keep newlines at the end of the file in tact. They disappear sometimes. let file_end = if !formatted.ends_with('\n') && ends_on_linebreak {