diff --git a/Cargo.lock b/Cargo.lock index 7505b50..35437e1 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -337,7 +337,7 @@ checksum = "a7a70ba024b9dc04c27ea2f0c0548feb474ec5c54bba33a7f72f873a39d07b24" [[package]] name = "mdslw" -version = "0.11.1" +version = "0.12.0" dependencies = [ "anyhow", "clap", diff --git a/Cargo.toml b/Cargo.toml index 635bd84..07afa28 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "mdslw" -version = "0.11.1" +version = "0.12.0" edition = "2021" [profile.release] diff --git a/README.md b/README.md index 009705c..f069caf 100644 --- a/README.md +++ b/README.md @@ -246,6 +246,8 @@ Values are resolved in the following order: Do not replace spaces in link texts by [non-breaking spaces][wiki nbsp]. - `keep-linebreaks`: Do not remove existing linebreaks during the line-wrapping process. + - `format-block-quotes`: + Format text in block quotes. - `--completion `: Output shell completion file for the given shell to stdout and exit. The following shells are supported: diff --git a/src/cfg.rs b/src/cfg.rs index 02849d7..02d4d46 100644 --- a/src/cfg.rs +++ b/src/cfg.rs @@ -220,6 +220,7 @@ pub struct CliArgs { /// {n} * keep-spaces-in-links => do not replace spaces in link texts by non-breaking spaces /// {n} * keep-linebreaks => do not remove existing linebreaks during the line-wrapping /// process + /// {n} * format-block-quotes => format text in block quotes /// {n} . #[arg(long, env = "MDSLW_FEATURES", default_value = "\u{200b}")] pub features: ValueWOrigin, diff --git a/src/features.rs b/src/features.rs index 66478b3..7659b1f 100644 --- a/src/features.rs +++ b/src/features.rs @@ -23,6 +23,7 @@ use crate::parse::ParseCfg; #[derive(Debug, PartialEq)] pub struct FeatureCfg { pub keep_spaces_in_links: bool, + pub format_block_quotes: bool, pub break_cfg: BreakCfg, pub parse_cfg: ParseCfg, } @@ -31,6 +32,7 @@ impl Default for FeatureCfg { fn default() -> Self { FeatureCfg { keep_spaces_in_links: false, + format_block_quotes: false, parse_cfg: ParseCfg { keep_linebreaks: false, }, @@ -57,6 +59,7 @@ impl std::str::FromStr for FeatureCfg { { match feature { "keep-spaces-in-links" => cfg.keep_spaces_in_links = true, + "format-block-quotes" => cfg.format_block_quotes = true, "keep-linebreaks" => { cfg.parse_cfg.keep_linebreaks = true; cfg.break_cfg.keep_linebreaks = true; @@ -86,6 +89,7 @@ mod test { let default = FeatureCfg::default(); let swapped = FeatureCfg { keep_spaces_in_links: !default.keep_spaces_in_links, + format_block_quotes: !default.format_block_quotes, parse_cfg: ParseCfg { keep_linebreaks: !default.parse_cfg.keep_linebreaks, }, @@ -94,7 +98,8 @@ mod test { }, }; - let parsed = "keep-spaces-in-links , keep-linebreaks".parse::()?; + let parsed = + "keep-spaces-in-links , keep-linebreaks ,format-block-quotes".parse::()?; assert_eq!(parsed, swapped); Ok(()) diff --git a/src/main.rs b/src/main.rs index 9b5e23b..407c680 100644 --- a/src/main.rs +++ b/src/main.rs @@ -69,6 +69,52 @@ fn generate_report( } } +struct Processor { + feature_cfg: features::FeatureCfg, + detector: detect::BreakDetector, + max_width: Option, +} + +impl Processor { + fn process(&self, text: String, width_reduction: usize) -> String { + // At first, process all block quotes. + let text = if self.feature_cfg.format_block_quotes { + log::debug!("formatting text in block quotes"); + parse::BlockQuotes::new(&text).apply_to_matches_and_join(|t| { + self.process(t, width_reduction + parse::BlockQuotes::FULL_PREFIX_LEN) + }) + } else { + log::debug!("not formatting text in block quotes"); + text + }; + // Then process the actual text. + let ends_on_linebreak = text.ends_with('\n'); + let after_space_replace = if self.feature_cfg.keep_spaces_in_links { + log::debug!("not replacing spaces in links by non-breaking spaces"); + text + } else { + log::debug!("replacing spaces in links by non-breaking spaces"); + replace::replace_spaces_in_links_by_nbsp(text) + }; + let parsed = parse::parse_markdown(&after_space_replace, &self.feature_cfg.parse_cfg); + let filled = ranges::fill_markdown_ranges(parsed, &after_space_replace); + let width = &self + .max_width + .map(|el| el.checked_sub(width_reduction).unwrap_or(el)); + let formatted = + wrap::add_linebreaks_and_wrap(filled, width, &self.detector, &after_space_replace); + + // Keep newlines at the end of the file in tact. They disappear sometimes. + let file_end = if !formatted.ends_with('\n') && ends_on_linebreak { + log::debug!("adding missing trailing newline character"); + "\n" + } else { + "" + }; + format!("{}{}", formatted, file_end) + } +} + fn process( document: String, file_dir: &PathBuf, @@ -95,6 +141,11 @@ fn process( log::debug!("limiting line length to {} characters", cfg.max_width); Some(cfg.max_width) }; + let processor = Processor { + feature_cfg, + detector, + max_width, + }; // Actually process the text. let (frontmatter, text) = frontmatter::split_frontmatter(document.clone()); @@ -107,28 +158,7 @@ fn process( text }; - let after_space_replace = if feature_cfg.keep_spaces_in_links { - log::debug!("not replacing spaces in links by non-breaking spaces"); - after_upstream - } else { - log::debug!("replacing spaces in links by non-breaking spaces"); - replace::replace_spaces_in_links_by_nbsp(after_upstream) - }; - - let parsed = parse::parse_markdown(&after_space_replace, &feature_cfg.parse_cfg); - let filled = ranges::fill_markdown_ranges(parsed, &after_space_replace); - let formatted = - wrap::add_linebreaks_and_wrap(filled, &max_width, &detector, &after_space_replace); - - // Keep newlines at the end of the file in tact. They disappear sometimes. - let file_end = if !formatted.ends_with('\n') && document.ends_with('\n') { - log::debug!("adding missing trailing newline character"); - "\n" - } else { - "" - }; - - let processed = format!("{}{}{}", frontmatter, formatted, file_end); + let processed = format!("{}{}", frontmatter, processor.process(after_upstream, 0)); Ok((processed, document)) } diff --git a/src/parse.rs b/src/parse.rs index 2f44492..7bdc9d3 100644 --- a/src/parse.rs +++ b/src/parse.rs @@ -18,6 +18,7 @@ along with this program. If not, see . use core::ops::Range; use pulldown_cmark::{Event, Options, Parser, Tag, TagEnd}; use std::collections::HashMap; +use std::fmt::Write; use crate::detect::WhitespaceDetector; use crate::ignore::IgnoreByHtmlComment; @@ -162,6 +163,120 @@ fn to_be_wrapped( .collect::>() } +#[derive(Debug)] +enum RangeMatch<'a> { + Matches(&'a str), + NoMatch(&'a str), +} + +pub struct BlockQuotes<'a>(Vec>); + +impl<'a> BlockQuotes<'a> { + pub const FULL_PREFIX: &'static str = "> "; + pub const FULL_PREFIX_LEN: usize = Self::FULL_PREFIX.len(); + pub const SHORT_PREFIX: &'static str = ">"; + + fn strip_prefix(text: &str) -> String { + text.split_inclusive('\n') + .map(|t| { + t.strip_prefix(Self::SHORT_PREFIX) + .map(|el| el.strip_prefix(' ').unwrap_or(el)) + .unwrap_or(t) + }) + .collect::() + } + + fn add_prefix(text: String) -> String { + // The "write!" calls should never fail since we write to a String that we create here. + let mut result = String::new(); + text.split_inclusive('\n').for_each(|line| { + let prefix = if line.len() == 1 { + Self::SHORT_PREFIX + } else { + Self::FULL_PREFIX + }; + write!(result, "{}{}", prefix, line).expect("building block-quote formated result"); + }); + result + } + + pub fn new(text: &'a str) -> Self { + let mut level: usize = 0; + // In case we ever need to iterate over other kinds of syntax, the tag as well as the + // function stripping prefixes will have to be adjusted. + let tag = Tag::BlockQuote; + + let mut opts = Options::empty(); + opts.insert(Options::ENABLE_TABLES); + opts.insert(Options::ENABLE_FOOTNOTES); + opts.insert(Options::ENABLE_TASKLISTS); + opts.insert(Options::ENABLE_HEADING_ATTRIBUTES); + opts.insert(Options::ENABLE_SMART_PUNCTUATION); + opts.insert(Options::ENABLE_STRIKETHROUGH); + + let mut start = 0; + + let mut ranges = Parser::new_ext(text, opts) + .into_offset_iter() + .filter_map(|(event, range)| match event { + Event::Start(start) => { + level += 1; + if level == 1 && start == tag { + // Using a CharRange here to prevent the flat_map below from flattening + // all the ranges, since Range supports flattening but our + // CharRange does not. + Some(CharRange { + start: range.start, + end: range.end, + }) + } else { + None + } + } + Event::End(_) => { + level -= 1; + None + } + _ => None, + }) + .flat_map(|range| { + let prev_start = start; + let this_start = range.start; + start = range.end; + + let this = RangeMatch::Matches(&text[range]); + if this_start == prev_start { + vec![this] + } else { + let missing = RangeMatch::NoMatch(&text[prev_start..this_start]); + vec![missing, this] + } + }) + .collect::>(); + + if start != text.len() { + ranges.push(RangeMatch::NoMatch(&text[start..text.len()])); + } + + Self(ranges) + } + + /// The argument `func` should keep a line break at the end if its arguments ends in one. In + /// most cases, it ends in a line break. + pub fn apply_to_matches_and_join(self, func: MapFn) -> String + where + MapFn: Fn(String) -> String, + { + self.0 + .into_iter() + .map(|el| match el { + RangeMatch::NoMatch(s) => s.to_string(), + RangeMatch::Matches(s) => Self::add_prefix(func(Self::strip_prefix(s))), + }) + .collect::() + } +} + /// Check whether there is nothing but whitespace between the end of the previous range and the /// start of the next one, if the ranges do not connect directly anyway. Note that we still keep /// paragraphs separated by keeping ranges separate that are separated by more linebreaks than one. @@ -322,4 +437,110 @@ some code assert_eq!(expected, parsed); } + + #[test] + fn applying_to_no_block_quotes_remains_unchanged() { + let text = r#" +## Some Heading + +Some text without block quotes. + + + +- More text. +- More text. + - Even more text. + - Some text with a [link]. + +```code +some code +``` + +[link]: https://something.com "some link" +"#; + + let unchanged = BlockQuotes::new(text).apply_to_matches_and_join(|_| String::new()); + assert_eq!(text.to_string(), unchanged); + } + + #[test] + fn applying_to_block_quotes() { + let text = r#" +## Some Heading + +Some text with block quotes. + +> This first text is block quoted. +> +>> This text is quoted at the second level. +> +> Some more quotes at the first level. + + + +- More text. +- More text. + - Even more text. + - Some text with a [link]. + +> This second text is also block quoted. +> +> > This text is quoted at the second level. +> +> Some more quotes at the first level. + +```code +some code +``` + +[link]: https://something.com "some link" +"#; + + let expected = r#" +## Some Heading + +Some text with block quotes. + +> 115 + + + +- More text. +- More text. + - Even more text. + - Some text with a [link]. + +> 121 + +```code +some code +``` + +[link]: https://something.com "some link" +"#; + + let changed = + BlockQuotes::new(text).apply_to_matches_and_join(|s| format!("{}\n", s.len())); + assert_eq!(expected, changed); + } + + #[test] + fn flattening_vecs_of_char_ranges_retains_ranges() { + let to_be_flattened = vec![ + vec![CharRange { start: 0, end: 10 }], + vec![ + CharRange { + start: 100, + end: 110, + }, + CharRange { + start: 200, + end: 210, + }, + ], + ]; + let flat = to_be_flattened.into_iter().flatten().collect::>(); + let expected = vec![(0..10), (100..110), (200..210)]; + assert_eq!(expected, flat); + } }