Skip to content

Commit

Permalink
Initial prototype for ignore files
Browse files Browse the repository at this point in the history
  • Loading branch information
jyn514 committed Jan 18, 2021
1 parent 36c3a9d commit 23baa46
Show file tree
Hide file tree
Showing 6 changed files with 164 additions and 8 deletions.
10 changes: 10 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ regex = { version = "1", default-features = false, features = ["std", "perf"] }
ureq = { version = "2.0.1", features = ["tls"], default-features = false }
serde = "1.0"
serde_derive = "1.0"
toml = "0.5"
url = "2"
# Try to keep this in sync with `url`'s version
percent-encoding = "2"
Expand Down
12 changes: 12 additions & 0 deletions src/bin/deadlinks.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ Options:
--check-http Check 'http' and 'https' scheme links
--forbid-http Give an error if HTTP links are found. This is incompatible with --check-http.
--ignore-fragments Don't check URL fragments.
--ignore-file Path to a file with ignores. Defaults to `deadlinks.toml`.
--debug Use debug output
-v --verbose Use verbose output
-V --version Print version info and exit.
Expand All @@ -25,6 +26,7 @@ Options:
#[derive(Debug, Deserialize)]
struct MainArgs {
arg_directory: Vec<PathBuf>,
arg_ignore_file: Option<PathBuf>,
flag_verbose: bool,
flag_debug: bool,
flag_check_http: bool,
Expand All @@ -41,11 +43,20 @@ impl From<&MainArgs> for CheckContext {
} else {
HttpCheck::Ignored
};
let (ignored_links, ignored_intra_doc_links) = match shared::parse_ignore_file(args.arg_ignore_file.clone()) {
Ok(x) => x,
Err(err) => {
eprintln!("error: {}", err);
std::process::exit(1);
}
};
CheckContext {
check_http,
verbose: args.flag_debug,
check_fragments: !args.flag_ignore_fragments,
check_intra_doc_links: false,
ignored_links,
ignored_intra_doc_links,
}
}
}
Expand All @@ -65,6 +76,7 @@ fn parse_args() -> Result<MainArgs, shared::PicoError> {
flag_ignore_fragments: args.contains("--ignore-fragments"),
flag_check_http: args.contains("--check-http"),
flag_forbid_http: args.contains("--forbid-http"),
arg_ignore_file: args.opt_value_from_os_str("--ignore-file", |os_str| Result::<_, std::convert::Infallible>::Ok(PathBuf::from(os_str))).unwrap(),
arg_directory: args.free_os()?.into_iter().map(Into::into).collect(),
};
if args.flag_forbid_http && args.flag_check_http {
Expand Down
62 changes: 61 additions & 1 deletion src/bin/shared.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,66 @@
use cargo_deadlinks::IgnoredFile;
use log::LevelFilter;
use pico_args::Error;
use std::fmt::{self, Display};
use toml::Value;
use std::{fmt::{self, Display}, path::PathBuf};

pub fn parse_ignore_file(path: Option<PathBuf>) -> Result<(Vec<IgnoredFile>, Vec<IgnoredFile>), Box<dyn std::error::Error>> {
let is_required = path.is_some();
let path = path.unwrap_or("deadlinks.toml".into());

let contents = match std::fs::read_to_string(path) {
Ok(toml) => toml,
Err(err) => {
return if is_required {
Err(err.into())
} else {
// We proactively looked for `deadlinks.toml`, but it wasn't there.
// Pretend it was an empty file.
Ok((vec![], vec![]))
};
}
};
let val: Value = contents.parse()?;
let ignores = match val {
Value::Table(values) => values,
_ => Err(format!("invalid TOML format: expected a top-level table, got {:?}", val))?,
};

let parse = |val: Value, files: &mut Vec<_>| -> Result<_, Box<dyn std::error::Error>> {
let map = match val {
Value::Table(map) => map,
_ => Err(format!("invalid TOML format: expected a table, got {}", val))?,
};
for (file, val) in map {
let links = match val {
Value::Array(links) => links,
_ => Err(format!("invalid TOML format: expected a list of links, got {}", val))?,
};
let links = links.into_iter().map(|val| match val {
Value::String(link) => Ok(link),
_ => Err(format!("invalid TOML format: expected a string, got {}", val)),
}).collect::<Result<_, _>>()?;
files.push(IgnoredFile {
path: PathBuf::from(file),
links,
});
}
Ok(())
};

let (mut ignored_links, mut ignored_intra_doc_links) = (vec![], vec![]);
for (key, val) in ignores {
if key == "fragments" {
parse(val, &mut ignored_links)
} else if key == "intra_doc_links" {
parse(val, &mut ignored_intra_doc_links)
} else {
Err(format!("invalid TOML format: expected 'fragments' or 'intra_doc_links', got {}", key).into())
}?
}

Ok((ignored_links, ignored_intra_doc_links))
}

/// Initalizes the logger according to the provided config flags.
pub fn init_logger(debug: bool, verbose: bool, krate: &str) {
Expand Down
2 changes: 2 additions & 0 deletions src/check.rs
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,8 @@ pub enum CheckError {
/// An HTTP URL was encountered, but HTTP checking was forbidden
HttpForbidden(Url),
/// The linked file existed, but was missing the linked HTML anchor
///
/// (`link, fragment, missing range)
Fragment(Link, String, Option<Vec<String>>),
/// An error occured while trying to find whether the file or URL existed
Io(Box<IoError>),
Expand Down
85 changes: 78 additions & 7 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ use std::{
path::{Path, PathBuf},
};

use log::info;
use log::{info, debug};
use rayon::prelude::*;
use rayon::ThreadPoolBuilder;
use url::Url;
Expand All @@ -20,9 +20,9 @@ mod parse;
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
/// What behavior should deadlinks use for HTTP links?
pub enum HttpCheck {
/// Make an internet request to ensure the link works
/// Make an internet request to ensure the link works.
Enabled,
/// Do nothing when encountering a link
/// Do nothing when encountering a link.
Ignored,
/// Give an error when encountering a link.
///
Expand All @@ -32,12 +32,33 @@ pub enum HttpCheck {
}

// NOTE: this could be Copy, but we intentionally choose not to guarantee that.
/// Link-checking options.
#[derive(Clone, Debug)]
pub struct CheckContext {
/// Should deadlinks give more detail when checking links?
///
/// Currently, 'more detail' just means not to abbreviate file paths when printing errors.
pub verbose: bool,
/// What behavior should deadlinks use for HTTP links?
pub check_http: HttpCheck,
/// Should fragments in URLs be checked?
pub check_fragments: bool,
pub check_intra_doc_links: bool,
/// A list of files with ignored link fragments.
pub ignored_links: Vec<IgnoredFile>,
/// A list of files with ignored intra-doc links.
pub ignored_intra_doc_links: Vec<IgnoredFile>,
}

/// A file to ignore.
#[derive(Clone, Debug)]
pub struct IgnoredFile {
/// What file path should be ignored?
pub path: PathBuf,
/// What links in the file should be ignored?
///
/// An empty list means all links should be ignored.
pub links: Vec<String>,
}

impl Default for CheckContext {
Expand All @@ -47,6 +68,8 @@ impl Default for CheckContext {
verbose: false,
check_fragments: true,
check_intra_doc_links: false,
ignored_links: Vec::new(),
ignored_intra_doc_links: Vec::new(),
}
}
}
Expand All @@ -72,19 +95,67 @@ impl fmt::Display for FileError {
/// For each error that occurred, print an error message.
/// Returns whether an error occurred.
pub fn walk_dir(dir_path: &Path, ctx: &CheckContext) -> bool {
debug!("ignored_links: {:?}", ctx.ignored_links);
debug!("ignored_intra_doc_links: {:?}", ctx.ignored_intra_doc_links);

let pool = ThreadPoolBuilder::new()
.num_threads(num_cpus::get())
.build()
.unwrap();

pool.install(|| {
unavailable_urls(dir_path, ctx)
.map(|mut err| {
.filter_map(|mut file_err| {
let shortened_path = file_err.path.strip_prefix(dir_path).unwrap_or(dir_path);
debug!("file_err={:?}, shortened_path={:?}", file_err, shortened_path);

// First, filter out ignored errors
if let Some(ignore) = ctx.ignored_links.iter().find(|ignore| ignore.path == shortened_path) {
file_err.errors.retain(|err| {
let should_ignore = if ignore.links.is_empty() {
// Ignore all links
matches!(err, CheckError::Http(_) | CheckError::File(_) | CheckError::Fragment(..))
} else {
// Ignore links that are present in the list
match err {
CheckError::Fragment(_, fragment, _) => ignore.links.iter().any(|link| {
let link = if link.starts_with('#') { &link[1..] } else { link.as_str() };
link == fragment
}),
CheckError::File(path) => ignore.links.iter().any(|link| Path::new(link) == path),
CheckError::Http(url) => ignore.links.iter().any(|link| link == url.as_str()),
CheckError::IntraDocLink(_) | CheckError::HttpForbidden(_) | CheckError::Io(_) => false,
}
};
!should_ignore
});
}
if let Some(ignore) = ctx.ignored_intra_doc_links.iter().find(|ignore| ignore.path == shortened_path) {
file_err.errors.retain(|err| {
let should_ignore = if ignore.links.is_empty() {
// Ignore all links
matches!(err, CheckError::IntraDocLink(_))
} else {
// Ignore links that are present in the list
match err {
CheckError::IntraDocLink(link) => ignore.links.contains(link),
_ => false,
}
};
!should_ignore
});
}

if file_err.errors.is_empty() {
return None;
}

// Next, print the error for display
if !ctx.verbose {
err.shorten_all(dir_path);
file_err.shorten_all(dir_path);
}
println!("{}", err);
true
println!("{}", file_err);
Some(true)
})
// ||||||
.reduce(|| false, |initial, new| initial || new)
Expand Down

0 comments on commit 23baa46

Please sign in to comment.