diff --git a/Cargo.lock b/Cargo.lock index fd5943d..958fb70 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -110,6 +110,7 @@ dependencies = [ "serde", "serde_derive", "serde_json", + "toml", "ureq", "url", "walkdir", @@ -1141,6 +1142,15 @@ version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cda74da7e1a664f795bb1f8a87ec406fb89a02522cf6e50620d016add6dbbf5c" +[[package]] +name = "toml" +version = "0.5.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a31142970826733df8241ef35dc040ef98c679ab14d7c3e54d827099b3acecaa" +dependencies = [ + "serde", +] + [[package]] name = "treeline" version = "0.1.0" diff --git a/Cargo.toml b/Cargo.toml index a65c850..397df77 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -36,6 +36,7 @@ regex = { version = "1", default-features = false, features = ["std", "perf"] } ureq = { version = "2.0.1", features = ["tls"], default-features = false } serde = "1.0" serde_derive = "1.0" +toml = "0.5" url = "2" # Try to keep this in sync with `url`'s version percent-encoding = "2" diff --git a/src/bin/deadlinks.rs b/src/bin/deadlinks.rs index d7b85b0..ddc23cf 100644 --- a/src/bin/deadlinks.rs +++ b/src/bin/deadlinks.rs @@ -17,6 +17,7 @@ Options: --check-http Check 'http' and 'https' scheme links --forbid-http Give an error if HTTP links are found. This is incompatible with --check-http. --ignore-fragments Don't check URL fragments. + --ignore-file Path to a file with ignores. Defaults to `deadlinks.toml`. --debug Use debug output -v --verbose Use verbose output -V --version Print version info and exit. @@ -25,6 +26,7 @@ Options: #[derive(Debug, Deserialize)] struct MainArgs { arg_directory: Vec, + arg_ignore_file: Option, flag_verbose: bool, flag_debug: bool, flag_check_http: bool, @@ -41,11 +43,20 @@ impl From<&MainArgs> for CheckContext { } else { HttpCheck::Ignored }; + let (ignored_links, ignored_intra_doc_links) = match shared::parse_ignore_file(args.arg_ignore_file.clone()) { + Ok(x) => x, + Err(err) => { + eprintln!("error: {}", err); + std::process::exit(1); + } + }; CheckContext { check_http, verbose: args.flag_debug, check_fragments: !args.flag_ignore_fragments, check_intra_doc_links: false, + ignored_links, + ignored_intra_doc_links, } } } @@ -65,6 +76,7 @@ fn parse_args() -> Result { flag_ignore_fragments: args.contains("--ignore-fragments"), flag_check_http: args.contains("--check-http"), flag_forbid_http: args.contains("--forbid-http"), + arg_ignore_file: args.opt_value_from_os_str("--ignore-file", |os_str| Result::<_, std::convert::Infallible>::Ok(PathBuf::from(os_str))).unwrap(), arg_directory: args.free_os()?.into_iter().map(Into::into).collect(), }; if args.flag_forbid_http && args.flag_check_http { diff --git a/src/bin/shared.rs b/src/bin/shared.rs index 6e7f11c..9a9aa85 100644 --- a/src/bin/shared.rs +++ b/src/bin/shared.rs @@ -1,6 +1,66 @@ +use cargo_deadlinks::IgnoredFile; use log::LevelFilter; use pico_args::Error; -use std::fmt::{self, Display}; +use toml::Value; +use std::{fmt::{self, Display}, path::PathBuf}; + +pub fn parse_ignore_file(path: Option) -> Result<(Vec, Vec), Box> { + let is_required = path.is_some(); + let path = path.unwrap_or("deadlinks.toml".into()); + + let contents = match std::fs::read_to_string(path) { + Ok(toml) => toml, + Err(err) => { + return if is_required { + Err(err.into()) + } else { + // We proactively looked for `deadlinks.toml`, but it wasn't there. + // Pretend it was an empty file. + Ok((vec![], vec![])) + }; + } + }; + let val: Value = contents.parse()?; + let ignores = match val { + Value::Table(values) => values, + _ => Err(format!("invalid TOML format: expected a top-level table, got {:?}", val))?, + }; + + let parse = |val: Value, files: &mut Vec<_>| -> Result<_, Box> { + let map = match val { + Value::Table(map) => map, + _ => Err(format!("invalid TOML format: expected a table, got {}", val))?, + }; + for (file, val) in map { + let links = match val { + Value::Array(links) => links, + _ => Err(format!("invalid TOML format: expected a list of links, got {}", val))?, + }; + let links = links.into_iter().map(|val| match val { + Value::String(link) => Ok(link), + _ => Err(format!("invalid TOML format: expected a string, got {}", val)), + }).collect::>()?; + files.push(IgnoredFile { + path: PathBuf::from(file), + links, + }); + } + Ok(()) + }; + + let (mut ignored_links, mut ignored_intra_doc_links) = (vec![], vec![]); + for (key, val) in ignores { + if key == "fragments" { + parse(val, &mut ignored_links) + } else if key == "intra_doc_links" { + parse(val, &mut ignored_intra_doc_links) + } else { + Err(format!("invalid TOML format: expected 'fragments' or 'intra_doc_links', got {}", key).into()) + }? + } + + Ok((ignored_links, ignored_intra_doc_links)) +} /// Initalizes the logger according to the provided config flags. pub fn init_logger(debug: bool, verbose: bool, krate: &str) { diff --git a/src/check.rs b/src/check.rs index b37869f..f3ba85c 100644 --- a/src/check.rs +++ b/src/check.rs @@ -84,6 +84,8 @@ pub enum CheckError { /// An HTTP URL was encountered, but HTTP checking was forbidden HttpForbidden(Url), /// The linked file existed, but was missing the linked HTML anchor + /// + /// (`link, fragment, missing range) Fragment(Link, String, Option>), /// An error occured while trying to find whether the file or URL existed Io(Box), diff --git a/src/lib.rs b/src/lib.rs index 6ec0237..07f83ab 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -4,7 +4,7 @@ use std::{ path::{Path, PathBuf}, }; -use log::info; +use log::{info, debug}; use rayon::prelude::*; use rayon::ThreadPoolBuilder; use url::Url; @@ -20,9 +20,9 @@ mod parse; #[derive(Copy, Clone, Debug, PartialEq, Eq)] /// What behavior should deadlinks use for HTTP links? pub enum HttpCheck { - /// Make an internet request to ensure the link works + /// Make an internet request to ensure the link works. Enabled, - /// Do nothing when encountering a link + /// Do nothing when encountering a link. Ignored, /// Give an error when encountering a link. /// @@ -32,12 +32,33 @@ pub enum HttpCheck { } // NOTE: this could be Copy, but we intentionally choose not to guarantee that. +/// Link-checking options. #[derive(Clone, Debug)] pub struct CheckContext { + /// Should deadlinks give more detail when checking links? + /// + /// Currently, 'more detail' just means not to abbreviate file paths when printing errors. pub verbose: bool, + /// What behavior should deadlinks use for HTTP links? pub check_http: HttpCheck, + /// Should fragments in URLs be checked? pub check_fragments: bool, pub check_intra_doc_links: bool, + /// A list of files with ignored link fragments. + pub ignored_links: Vec, + /// A list of files with ignored intra-doc links. + pub ignored_intra_doc_links: Vec, +} + +/// A file to ignore. +#[derive(Clone, Debug)] +pub struct IgnoredFile { + /// What file path should be ignored? + pub path: PathBuf, + /// What links in the file should be ignored? + /// + /// An empty list means all links should be ignored. + pub links: Vec, } impl Default for CheckContext { @@ -47,6 +68,8 @@ impl Default for CheckContext { verbose: false, check_fragments: true, check_intra_doc_links: false, + ignored_links: Vec::new(), + ignored_intra_doc_links: Vec::new(), } } } @@ -72,6 +95,9 @@ impl fmt::Display for FileError { /// For each error that occurred, print an error message. /// Returns whether an error occurred. pub fn walk_dir(dir_path: &Path, ctx: &CheckContext) -> bool { + debug!("ignored_links: {:?}", ctx.ignored_links); + debug!("ignored_intra_doc_links: {:?}", ctx.ignored_intra_doc_links); + let pool = ThreadPoolBuilder::new() .num_threads(num_cpus::get()) .build() @@ -79,12 +105,57 @@ pub fn walk_dir(dir_path: &Path, ctx: &CheckContext) -> bool { pool.install(|| { unavailable_urls(dir_path, ctx) - .map(|mut err| { + .filter_map(|mut file_err| { + let shortened_path = file_err.path.strip_prefix(dir_path).unwrap_or(dir_path); + debug!("file_err={:?}, shortened_path={:?}", file_err, shortened_path); + + // First, filter out ignored errors + if let Some(ignore) = ctx.ignored_links.iter().find(|ignore| ignore.path == shortened_path) { + file_err.errors.retain(|err| { + let should_ignore = if ignore.links.is_empty() { + // Ignore all links + matches!(err, CheckError::Http(_) | CheckError::File(_) | CheckError::Fragment(..)) + } else { + // Ignore links that are present in the list + match err { + CheckError::Fragment(_, fragment, _) => ignore.links.iter().any(|link| { + let link = if link.starts_with('#') { &link[1..] } else { link.as_str() }; + link == fragment + }), + CheckError::File(path) => ignore.links.iter().any(|link| Path::new(link) == path), + CheckError::Http(url) => ignore.links.iter().any(|link| link == url.as_str()), + CheckError::IntraDocLink(_) | CheckError::HttpForbidden(_) | CheckError::Io(_) => false, + } + }; + !should_ignore + }); + } + if let Some(ignore) = ctx.ignored_intra_doc_links.iter().find(|ignore| ignore.path == shortened_path) { + file_err.errors.retain(|err| { + let should_ignore = if ignore.links.is_empty() { + // Ignore all links + matches!(err, CheckError::IntraDocLink(_)) + } else { + // Ignore links that are present in the list + match err { + CheckError::IntraDocLink(link) => ignore.links.contains(link), + _ => false, + } + }; + !should_ignore + }); + } + + if file_err.errors.is_empty() { + return None; + } + + // Next, print the error for display if !ctx.verbose { - err.shorten_all(dir_path); + file_err.shorten_all(dir_path); } - println!("{}", err); - true + println!("{}", file_err); + Some(true) }) // |||||| .reduce(|| false, |initial, new| initial || new)