Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[WIP] Initial prototype for ignore files #144

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ regex = { version = "1", default-features = false, features = ["std", "perf"] }
ureq = { version = "2.0.1", features = ["tls"], default-features = false }
serde = "1.0"
serde_derive = "1.0"
toml = "0.5"
url = "2"
# Try to keep this in sync with `url`'s version
percent-encoding = "2"
Expand Down
16 changes: 16 additions & 0 deletions src/bin/cargo-deadlinks.rs
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ CARGO_ARGS will be passed verbatim to `cargo doc` (as long as `--no-build` is no
struct MainArgs {
arg_directory: Option<String>,
arg_cargo_directory: Option<OsString>,
arg_ignore_file: Option<PathBuf>,
flag_verbose: bool,
flag_debug: bool,
flag_check_http: bool,
Expand All @@ -56,11 +57,21 @@ impl From<&MainArgs> for CheckContext {
} else {
HttpCheck::Ignored
};
let (ignored_links, ignored_intra_doc_links) =
match shared::parse_ignore_file(args.arg_ignore_file.clone()) {
Ok(x) => x,
Err(err) => {
eprintln!("error: {}", err);
std::process::exit(1);
}
};
CheckContext {
check_http,
verbose: args.flag_debug,
check_fragments: !args.flag_ignore_fragments,
check_intra_doc_links: args.flag_check_intra_doc_links,
ignored_links,
ignored_intra_doc_links,
}
}
}
Expand Down Expand Up @@ -96,6 +107,11 @@ fn parse_args() -> Result<MainArgs, shared::PicoError> {
}
let main_args = MainArgs {
arg_directory: args.opt_value_from_str("--dir")?,
arg_ignore_file: args
.opt_value_from_os_str("--ignore-file", |os_str| {
Result::<_, std::convert::Infallible>::Ok(PathBuf::from(os_str))
})
.unwrap(),
arg_cargo_directory: args
.opt_value_from_os_str("--cargo-dir", |s| Result::<_, Error>::Ok(s.to_owned()))?,
flag_verbose: args.contains(["-v", "--verbose"]),
Expand Down
17 changes: 17 additions & 0 deletions src/bin/deadlinks.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ Options:
--check-http Check 'http' and 'https' scheme links
--forbid-http Give an error if HTTP links are found. This is incompatible with --check-http.
--ignore-fragments Don't check URL fragments.
--ignore-file Path to a file with ignores. Defaults to `deadlinks.toml`.
--debug Use debug output
-v --verbose Use verbose output
-V --version Print version info and exit.
Expand All @@ -25,6 +26,7 @@ Options:
#[derive(Debug, Deserialize)]
struct MainArgs {
arg_directory: Vec<PathBuf>,
arg_ignore_file: Option<PathBuf>,
flag_verbose: bool,
flag_debug: bool,
flag_check_http: bool,
Expand All @@ -41,11 +43,21 @@ impl From<&MainArgs> for CheckContext {
} else {
HttpCheck::Ignored
};
let (ignored_links, ignored_intra_doc_links) =
match shared::parse_ignore_file(args.arg_ignore_file.clone()) {
Ok(x) => x,
Err(err) => {
eprintln!("error: {}", err);
std::process::exit(1);
}
};
CheckContext {
check_http,
verbose: args.flag_debug,
check_fragments: !args.flag_ignore_fragments,
check_intra_doc_links: false,
ignored_links,
ignored_intra_doc_links,
}
}
}
Expand All @@ -65,6 +77,11 @@ fn parse_args() -> Result<MainArgs, shared::PicoError> {
flag_ignore_fragments: args.contains("--ignore-fragments"),
flag_check_http: args.contains("--check-http"),
flag_forbid_http: args.contains("--forbid-http"),
arg_ignore_file: args
.opt_value_from_os_str("--ignore-file", |os_str| {
Result::<_, std::convert::Infallible>::Ok(PathBuf::from(os_str))
})
.unwrap(),
arg_directory: args.free_os()?.into_iter().map(Into::into).collect(),
};
if args.flag_forbid_http && args.flag_check_http {
Expand Down
88 changes: 87 additions & 1 deletion src/bin/shared.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,92 @@
use cargo_deadlinks::IgnoredFile;
use log::LevelFilter;
use pico_args::Error;
use std::fmt::{self, Display};
use std::{
fmt::{self, Display},
path::PathBuf,
};
use toml::Value;

pub fn parse_ignore_file(
path: Option<PathBuf>,
) -> Result<(Vec<IgnoredFile>, Vec<IgnoredFile>), Box<dyn std::error::Error>> {
let is_required = path.is_some();
let path = path.unwrap_or_else(|| "deadlinks.toml".into());

let contents = match std::fs::read_to_string(path) {
Ok(toml) => toml,
Err(err) => {
return if is_required {
Err(err.into())
} else {
// We proactively looked for `deadlinks.toml`, but it wasn't there.
// Pretend it was an empty file.
Ok((vec![], vec![]))
};
}
};
let val: Value = contents.parse()?;
let ignores = match val {
// top-level should always be a table
Value::Table(values) => values,
_ => return Err(format!(
"invalid TOML format: expected a top-level table, got {:?}",
val
).into()),
};

// [(fragment | intra_doc_links)] key = [a, b, c]
let parse = |val: Value, files: &mut Vec<_>| -> Result<_, Box<dyn std::error::Error>> {
let map = match val {
Value::Table(map) => map,
_ => return Err(format!(
"invalid TOML format: expected a table, got {:?}",
val
).into()),
};
for (file, val) in map {
let links = match val {
Value::Array(links) => links,
_ => return Err(format!(
"invalid TOML format: expected a list of links, got {:?}",
val
).into()),
};
let links = links
.into_iter()
.map(|val| match val {
Value::String(link) => Ok(link),
_ => Err(format!(
"invalid TOML format: expected a string, got {:?}",
val
)),
})
.collect::<Result<_, _>>()?;
files.push(IgnoredFile {
path: PathBuf::from(file),
links,
});
}
Ok(())
};

let (mut ignored_links, mut ignored_intra_doc_links) = (vec![], vec![]);
for (key, val) in ignores {
if key == "fragments" {
parse(val, &mut ignored_links)
} else if key == "intra_doc_links" {
parse(val, &mut ignored_intra_doc_links)
} else {
Err(format!(
"invalid TOML format: expected 'fragments' or 'intra_doc_links', got {}",
key
)
.into())
}?
}

Ok((ignored_links, ignored_intra_doc_links))
}

/// Initalizes the logger according to the provided config flags.
pub fn init_logger(debug: bool, verbose: bool, krate: &str) {
Expand Down
2 changes: 2 additions & 0 deletions src/check.rs
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,8 @@ pub enum CheckError {
/// An HTTP URL was encountered, but HTTP checking was forbidden
HttpForbidden(Url),
/// The linked file existed, but was missing the linked HTML anchor
///
/// (`link, fragment, missing range)
Fragment(Link, String, Option<Vec<String>>),
/// An error occured while trying to find whether the file or URL existed
Io(Box<IoError>),
Expand Down
86 changes: 79 additions & 7 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ use std::{
path::{Path, PathBuf},
};

use log::info;
use log::{debug, info};
use rayon::prelude::*;
use rayon::ThreadPoolBuilder;
use url::Url;
Expand All @@ -20,9 +20,9 @@ mod parse;
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
/// What behavior should deadlinks use for HTTP links?
pub enum HttpCheck {
/// Make an internet request to ensure the link works
/// Make an internet request to ensure the link works.
Enabled,
/// Do nothing when encountering a link
/// Do nothing when encountering a link.
Ignored,
/// Give an error when encountering a link.
///
Expand All @@ -32,12 +32,33 @@ pub enum HttpCheck {
}

// NOTE: this could be Copy, but we intentionally choose not to guarantee that.
/// Link-checking options.
#[derive(Clone, Debug)]
pub struct CheckContext {
/// Should deadlinks give more detail when checking links?
///
/// Currently, 'more detail' just means not to abbreviate file paths when printing errors.
pub verbose: bool,
/// What behavior should deadlinks use for HTTP links?
pub check_http: HttpCheck,
/// Should fragments in URLs be checked?
pub check_fragments: bool,
pub check_intra_doc_links: bool,
/// A list of files with ignored link fragments.
pub ignored_links: Vec<IgnoredFile>,
/// A list of files with ignored intra-doc links.
pub ignored_intra_doc_links: Vec<IgnoredFile>,
}

/// A file to ignore.
#[derive(Clone, Debug)]
pub struct IgnoredFile {
/// What file path should be ignored?
pub path: PathBuf,
/// What links in the file should be ignored?
///
/// An empty list means all links should be ignored.
pub links: Vec<String>,
}

impl Default for CheckContext {
Expand All @@ -47,6 +68,8 @@ impl Default for CheckContext {
verbose: false,
check_fragments: true,
check_intra_doc_links: false,
ignored_links: Vec::new(),
ignored_intra_doc_links: Vec::new(),
}
}
}
Expand All @@ -72,19 +95,68 @@ impl fmt::Display for FileError {
/// For each error that occurred, print an error message.
/// Returns whether an error occurred.
pub fn walk_dir(dir_path: &Path, ctx: &CheckContext) -> bool {
debug!("ignored_links: {:?}", ctx.ignored_links);
debug!("ignored_intra_doc_links: {:?}", ctx.ignored_intra_doc_links);

let pool = ThreadPoolBuilder::new()
.num_threads(num_cpus::get())
.build()
.unwrap();

pool.install(|| {
unavailable_urls(dir_path, ctx)
.map(|mut err| {
.filter_map(|mut file_err| {
let shortened_path = file_err.path.strip_prefix(dir_path).unwrap_or(dir_path);
debug!("file_err={:?}, shortened_path={:?}", file_err, shortened_path);

// First, filter out ignored errors
if let Some(ignore) = ctx.ignored_links.iter().find(|ignore| ignore.path == shortened_path) {
file_err.errors.retain(|err| {
let should_ignore = if ignore.links.is_empty() {
// Ignore all links
matches!(err, CheckError::Http(_) | CheckError::File(_) | CheckError::Fragment(..))
} else {
// Ignore links that are present in the list
match err {
CheckError::Fragment(_, fragment, _) => ignore.links.iter().any(|link| {
#[allow(clippy::or_fun_call)]
let link = link.strip_prefix('#').unwrap_or(link.as_str());
link == fragment
}),
CheckError::File(path) => ignore.links.iter().any(|link| Path::new(link) == path),
CheckError::Http(url) => ignore.links.iter().any(|link| link == url.as_str()),
CheckError::IntraDocLink(_) | CheckError::HttpForbidden(_) | CheckError::Io(_) => false,
}
};
!should_ignore
});
}
if let Some(ignore) = ctx.ignored_intra_doc_links.iter().find(|ignore| ignore.path == shortened_path) {
file_err.errors.retain(|err| {
let should_ignore = if ignore.links.is_empty() {
// Ignore all links
matches!(err, CheckError::IntraDocLink(_))
} else {
// Ignore links that are present in the list
match err {
CheckError::IntraDocLink(link) => ignore.links.contains(link),
_ => false,
}
};
!should_ignore
});
}

if file_err.errors.is_empty() {
return None;
}

// Next, print the error for display
if !ctx.verbose {
err.shorten_all(dir_path);
file_err.shorten_all(dir_path);
}
println!("{}", err);
true
println!("{}", file_err);
Some(true)
})
// ||||||
.reduce(|| false, |initial, new| initial || new)
Expand Down