From 77bfaffda27b6d26f2b861b2aa3869084a460816 Mon Sep 17 00:00:00 2001 From: Tshepang Mbambo Date: Tue, 18 Oct 2022 07:46:09 +0200 Subject: [PATCH] add tool to "enforce" semantic line breaks See #1132 --- .gitignore | 1 + ci/semantic-line-breaks/Cargo.lock | 193 ++++++++++++++++++++++++++++ ci/semantic-line-breaks/Cargo.toml | 12 ++ ci/semantic-line-breaks/src/main.rs | 125 ++++++++++++++++++ 4 files changed, 331 insertions(+) create mode 100644 ci/semantic-line-breaks/Cargo.lock create mode 100644 ci/semantic-line-breaks/Cargo.toml create mode 100644 ci/semantic-line-breaks/src/main.rs diff --git a/.gitignore b/.gitignore index 96034e514..dc1a4b17c 100644 --- a/.gitignore +++ b/.gitignore @@ -4,6 +4,7 @@ book book.toml ci/date-check/target/ +ci/semantic-line-breaks/target/ # Generated by check-in.sh pulls.json diff --git a/ci/semantic-line-breaks/Cargo.lock b/ci/semantic-line-breaks/Cargo.lock new file mode 100644 index 000000000..b84ce4a23 --- /dev/null +++ b/ci/semantic-line-breaks/Cargo.lock @@ -0,0 +1,193 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "aho-corasick" +version = "0.7.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e37cfd5e7657ada45f742d6e99ca5788580b5c529dc78faf11ece6dc702656f" +dependencies = [ + "memchr", +] + +[[package]] +name = "anyhow" +version = "1.0.62" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1485d4d2cc45e7b201ee3767015c96faa5904387c9d87c6efdd0fb511f12d305" + +[[package]] +name = "bstr" +version = "0.2.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ba3569f383e8f1598449f1a423e72e99569137b47740b1da11ef19af3d5c3223" +dependencies = [ + "memchr", +] + +[[package]] +name = "cfg-if" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" + +[[package]] +name = "crossbeam-utils" +version = "0.8.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "51887d4adc7b564537b15adcfb307936f8075dfcd5f00dde9a9f1d29383682bc" +dependencies = [ + "cfg-if", + "once_cell", +] + +[[package]] +name = "fnv" +version = "1.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" + +[[package]] +name = "globset" +version = "0.4.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0a1e17342619edbc21a964c2afbeb6c820c6a2560032872f397bb97ea127bd0a" +dependencies = [ + "aho-corasick", + "bstr", + "fnv", + "log", + "regex", +] + +[[package]] +name = "ignore" +version = "0.4.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "713f1b139373f96a2e0ce3ac931cd01ee973c3c5dd7c40c0c2efe96ad2b6751d" +dependencies = [ + "crossbeam-utils", + "globset", + "lazy_static", + "log", + "memchr", + "regex", + "same-file", + "thread_local", + "walkdir", + "winapi-util", +] + +[[package]] +name = "lazy_static" +version = "1.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" + +[[package]] +name = "log" +version = "0.4.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "abb12e687cfb44aa40f41fc3978ef76448f9b6038cad6aef4259d3c095a2382e" +dependencies = [ + "cfg-if", +] + +[[package]] +name = "memchr" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d" + +[[package]] +name = "once_cell" +version = "1.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "074864da206b4973b84eb91683020dbefd6a8c3f0f38e054d93954e891935e4e" + +[[package]] +name = "regex" +version = "1.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4c4eb3267174b8c6c2f654116623910a0fef09c4753f8dd83db29c48a0df988b" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax", +] + +[[package]] +name = "regex-syntax" +version = "0.6.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a3f87b73ce11b1619a3c6332f45341e0047173771e8b8b73f87bfeefb7b56244" + +[[package]] +name = "same-file" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502" +dependencies = [ + "winapi-util", +] + +[[package]] +name = "semantic-line-breaks" +version = "0.0.0" +dependencies = [ + "anyhow", + "ignore", + "regex", +] + +[[package]] +name = "thread_local" +version = "1.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5516c27b78311c50bf42c071425c560ac799b11c30b31f87e3081965fe5e0180" +dependencies = [ + "once_cell", +] + +[[package]] +name = "walkdir" +version = "2.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "808cf2735cd4b6866113f648b791c6adc5714537bc222d9347bb203386ffda56" +dependencies = [ + "same-file", + "winapi", + "winapi-util", +] + +[[package]] +name = "winapi" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" +dependencies = [ + "winapi-i686-pc-windows-gnu", + "winapi-x86_64-pc-windows-gnu", +] + +[[package]] +name = "winapi-i686-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" + +[[package]] +name = "winapi-util" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "70ec6ce85bb158151cae5e5c87f95a8e97d2c0c4b001223f33a334e3ce5de178" +dependencies = [ + "winapi", +] + +[[package]] +name = "winapi-x86_64-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" diff --git a/ci/semantic-line-breaks/Cargo.toml b/ci/semantic-line-breaks/Cargo.toml new file mode 100644 index 000000000..656b4a215 --- /dev/null +++ b/ci/semantic-line-breaks/Cargo.toml @@ -0,0 +1,12 @@ +[package] +name = "semantic-line-breaks" +version = "0.0.0" +edition = "2021" + +[dependencies] +anyhow = "1" +ignore = "0.4" + +[dependencies.regex] +version = "1" +features = ["pattern"] diff --git a/ci/semantic-line-breaks/src/main.rs b/ci/semantic-line-breaks/src/main.rs new file mode 100644 index 000000000..1e7c8ba7e --- /dev/null +++ b/ci/semantic-line-breaks/src/main.rs @@ -0,0 +1,125 @@ +use std::{env, fs, process}; + +use anyhow::Result; +use ignore::Walk; +use regex::Regex; + +fn main() -> Result<()> { + let mut args = env::args(); + if args.len() == 1 { + eprintln!("error: expected root Markdown directory as CLI argument"); + process::exit(1); + } + let root_dir = args.nth(1).unwrap(); + for result in Walk::new(root_dir) { + let entry = result?; + if entry.file_type().expect("no stdin").is_dir() { + continue; + } + let path = entry.path(); + if let Some(extension) = path.extension() { + if extension != "md" { + continue; + } + } else { + continue; + } + let old = fs::read_to_string(path)?; + let new = comply(&old)?; + if new != old { + fs::write(path, new)?; + } + } + Ok(()) +} + +fn comply(content: &str) -> Result { + let content: Vec<_> = content.lines().map(|line| line.to_owned()).collect(); + let mut new_content = content.clone(); + let mut new_n = 0; + let mut in_code_block = false; + let split_re = Regex::new(r"(\.|\?|;|!)\s+")?; + let ignore_re = Regex::new(r"(\d\.|\-|\*|r\?)\s+")?; + for (n, line) in content.iter().enumerate() { + if n != 0 { + new_n += 1; + } + if ignore_re.is_match(line) { + continue; + } + // headings + if line.starts_with('#') { + continue; + } + let line = line.trim_end(); + if line.is_empty() { + continue; + } + // not eol + if line.contains("e.g.") { + continue; + } + // not eol + if line.contains("i.e.") { + continue; + } + // tables + if line.contains(" | ") { + continue; + } + // code blocks + if line.starts_with("```") { + if in_code_block { + in_code_block = false; + } else { + in_code_block = true; + continue; + } + } + if in_code_block { + continue; + } + if split_re.is_match(line) { + let indent = line.find(|ch: char| !ch.is_whitespace()).unwrap(); + let new_lines: Vec<_> = line + .split_inclusive(&split_re) + .map(|portion| format!("{:indent$}{}", "", portion.trim())) + .collect(); + new_content.splice(new_n..new_n + 1, new_lines.clone()); + new_n += new_lines.len() - 1; + } + } + Ok(new_content.join("\n") + "\n") +} + +#[test] +fn test() { + let original = "\ +# some heading + +must! be; split? now. +1. ignore numbered +ignore | tables +ignore e.g. and i.e. for realsies +``` +some code. block +``` +some more text. +"; + let reformatted = "\ +# some heading + +must! +be; +split? +now. +1. ignore numbered +ignore | tables +ignore e.g. and i.e. for realsies +``` +some code. block +``` +some more text. +"; + assert_eq!(comply(original).unwrap(), reformatted); +}