Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Enable ignoring files based on what git ignores #94

Merged
merged 1 commit into from
Jun 26, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,12 @@ Another example is to call *mlc* on a certain directory or file:
mlc ./docs
```

Alternatively you may want to ignore all files currently ignored by `git` (requires `git` binary to be found on $PATH) and set a root-dir for relative links:

```bash
mlc --gitignore --root-dir .
```

Call *mlc* with the `--help` flag to display all available cli arguments:

``` bash
Expand All @@ -120,6 +126,7 @@ The following arguments are available:
| `--match-file-extension` | `-e` | Set the flag, if the file extension shall be checked as well. For example the following markup link `[link](dir/file)` matches if for example a file called `file.md` exists in `dir`, but would fail when the `--match-file-extension` flag is set. |
| `--version` | `-V` | Print current version of mlc |
| `--ignore-path` | `-p` | Comma separated list of directories or files which shall be ignored. For example |
| `--gitignore` | `-g` | Ignore all files currently ignored by git (requires `git` binary to be available on $PATH). |
| `--ignore-links` | `-i` | Comma separated list of links which shall be ignored. Use simple `?` and `*` wildcards. For example `--ignore-links "http*://crates.io*"` will skip all links to the crates.io website. See the [used lib](https://github.com/becheran/wildmatch) for more information. |
| `--markup-types` | `-t` | Comma separated list list of markup types which shall be checked [possible values: md, html] |
| `--root-dir` | `-r` | All links to the file system starting with a slash on linux or backslash on windows will use another virtual root dir. For example the link in a file `[link](/dir/other/file.md)` checked with the cli arg `--root-dir /env/another/dir` will let *mlc* check the existence of `/env/another/dir/dir/other/file.md`. |
Expand All @@ -138,6 +145,8 @@ offline = true
match-file-extension= true
# List of files and directories which will be ignored
ignore-path=["./ignore-me","./src"]
# Ignore all files ignored by git
gitignore = true
# List of links which will be ignored
ignore-links=["http://ignore-me.de/*","http://*.ignoresub-domain/*"]
# List of markup types which shall be checked
Expand Down
29 changes: 22 additions & 7 deletions src/cli.rs
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,16 @@ pub fn parse_args() -> Config {
.help("Path to the root folder used to resolve all relative paths")
.required(false),
)

.arg(
Arg::new("gitignore")
.long("gitignore")
.short('g')
.value_name("GIT")
.help("Ignore all files ignored by git")
.action(ArgAction::SetTrue)
.required(false),
)
.get_matches();

let default_dir = format!(".{}", &MAIN_SEPARATOR);
Expand Down Expand Up @@ -148,18 +158,23 @@ pub fn parse_args() -> Config {
}

if let Some(ignore_path) = matches.get_many::<String>("ignore-path") {
opt.ignore_path = Some(ignore_path.map(|x| Path::new(x).to_path_buf()).collect());
}
if opt.ignore_path.is_some() {
opt.ignore_path.as_mut().unwrap().iter_mut().for_each(|p| {
let mut paths: Vec<_> = ignore_path.map(|x| Path::new(x).to_path_buf()).collect();
for p in paths.iter_mut() {
match fs::canonicalize(&p) {
Ok(p) => &p,
Ok(canonical_path) => {
*p = canonical_path;
}
Err(e) => {
println!("⚠ Warn: Ignore path {:?} not found. {:?}.", p, e);
&p
panic!("Exiting due to invalid ignore path.");
}
};
});
}
opt.ignore_path = Some(paths);
}

if matches.get_flag("gitignore") {
opt.gitignore = Some(true);
}

if let Some(root_dir) = matches.get_one::<String>("root-dir") {
Expand Down
84 changes: 76 additions & 8 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,10 @@ use serde::Deserialize;
use std::collections::HashMap;
use std::env;
use std::fmt;
use std::fs;
use std::path::Path;
use std::path::PathBuf;
use std::process::Command;
use std::sync::Arc;
use tokio::sync::Mutex;
use tokio::time::{sleep_until, Duration, Instant};
Expand Down Expand Up @@ -49,6 +52,8 @@ pub struct OptionalConfig {
pub ignore_path: Option<Vec<PathBuf>>,
#[serde(rename(deserialize = "root-dir"))]
pub root_dir: Option<PathBuf>,
#[serde(rename(deserialize = "gitignore"))]
pub gitignore: Option<bool>,
pub throttle: Option<u32>,
}

Expand Down Expand Up @@ -80,13 +85,14 @@ impl fmt::Display for Config {
f,
"
Debug: {:?}
Dir: {}
Dir: {}
DoNotWarnForRedirectTo: {:?}
Types: {:?}
Types: {:?}
Offline: {}
MatchExt: {}
RootDir: {}
IgnoreLinks: {}
Gitignore: {}
IgnoreLinks: {}
IgnorePath: {:?}
Throttle: {} ms",
self.optional.debug.unwrap_or(false),
Expand All @@ -96,6 +102,7 @@ Throttle: {} ms",
self.optional.offline.unwrap_or_default(),
self.optional.match_file_extension.unwrap_or_default(),
root_dir_str,
self.optional.gitignore.unwrap_or_default(),
ignore_str.join(","),
ignore_path_str,
self.optional.throttle.unwrap_or(0)
Expand Down Expand Up @@ -125,6 +132,33 @@ fn find_all_links(config: &Config) -> Vec<MarkupLink> {
links
}

fn find_git_ignored_files() -> Option<Vec<PathBuf>> {
let output = Command::new("git")
willcl-ark marked this conversation as resolved.
Show resolved Hide resolved
.arg("ls-files")
.arg("--ignored")
.arg("--others")
.arg("--exclude-standard")
.output()
.expect("Failed to execute 'git' command");

if output.status.success() {
let ignored_files = String::from_utf8(output.stdout)
.expect("Invalid UTF-8 sequence")
.lines()
.filter(|line| line.ends_with(".md") || line.ends_with(".html"))
.filter_map(|line| fs::canonicalize(Path::new(line.trim())).ok())
.collect::<Vec<_>>();
Some(ignored_files)
} else {
eprintln!(
"git ls-files command failed: {}",
String::from_utf8_lossy(&output.stderr)
);
None
}
}


fn print_helper(
link: &MarkupLink,
status_code: &colored::ColoredString,
Expand Down Expand Up @@ -168,7 +202,41 @@ pub async fn run(config: &Config) -> Result<(), ()> {
Some(s) => s.iter().map(|m| WildMatch::new(m)).collect(),
None => vec![],
};

let gitignored_files: Option<Vec<PathBuf>> = if config.optional.gitignore.is_some() {
let files = find_git_ignored_files();
debug!("Found gitignored files: {:?}", files);
files
} else {
None
};

let is_gitignore_enabled = gitignored_files.is_some();

for link in &links {
let canonical_link_source = match fs::canonicalize(&link.source) {
Ok(path) => path,
Err(e) => {
warn!("Failed to canonicalize link source: {}. Error: {:?}", link.source, e);
continue;
}
};

if is_gitignore_enabled {
if let Some(ref gif) = gitignored_files {
if gif.iter().any(|path| path == &canonical_link_source) {
print_helper(
link,
&"Skip".green(),
"Ignore link because it is ignored by git.",
false,
);
skipped += 1;
continue;
}
}
}

if ignore_links.iter().any(|m| m.matches(&link.target)) {
print_helper(
link,
Expand All @@ -179,6 +247,7 @@ pub async fn run(config: &Config) -> Result<(), ()> {
skipped += 1;
continue;
}

let link_type = get_link_type(&link.target);
let target = resolve_target_link(link, &link_type, config).await;
let t = Target { target, link_type };
Expand All @@ -190,11 +259,10 @@ pub async fn run(config: &Config) -> Result<(), ()> {
}
}

let do_not_warn_for_redirect_to: Arc<Vec<WildMatch>> =
Arc::new(match &config.optional.do_not_warn_for_redirect_to {
Some(s) => s.iter().map(|m| WildMatch::new(m)).collect(),
None => vec![],
});
let do_not_warn_for_redirect_to: Arc<Vec<WildMatch>> = Arc::new(match &config.optional.do_not_warn_for_redirect_to {
Some(s) => s.iter().map(|m| WildMatch::new(m)).collect(),
None => vec![],
});

let throttle = config.optional.throttle.unwrap_or_default() > 0;
info!("Throttle HTTP requests to same host: {:?}", throttle);
Expand Down
2 changes: 2 additions & 0 deletions tests/end_to_end.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ async fn end_to_end() {
fs::canonicalize("./benches/benchmark/markdown/ignore_me_dir").unwrap(),
]),
root_dir: None,
gitignore: None,
},
};
if let Err(e) = mlc::run(&config).await {
Expand All @@ -46,6 +47,7 @@ async fn end_to_end_different_root() {
ignore_path: None,
throttle: None,
root_dir: Some(test_files),
gitignore: None,
},
};
if let Err(e) = mlc::run(&config).await {
Expand Down
Loading