From ac16ae7ec23612a1259d3f05b181ecf5b02fc2fb Mon Sep 17 00:00:00 2001 From: Damien Mathieu <42@dmathieu.com> Date: Mon, 14 Oct 2024 02:41:56 +0200 Subject: [PATCH] Allow excluding cache based on status code (#1403) This introduces an option `--cache-exclude-status`, which allows specifying a range of HTTP status codes which will be ignored from the cache. Closes #1400. --- README.md | 16 ++ lychee-bin/src/commands/check.rs | 94 ++++++- lychee-bin/src/options.rs | 45 +++- lychee-bin/tests/cli.rs | 59 +++++ lychee-lib/src/lib.rs | 5 +- lychee-lib/src/types/accept/mod.rs | 2 - lychee-lib/src/types/accept/range.rs | 4 +- lychee-lib/src/types/error.rs | 10 +- lychee-lib/src/types/mod.rs | 2 + lychee-lib/src/types/status_code/excluder.rs | 242 ++++++++++++++++++ lychee-lib/src/types/status_code/mod.rs | 5 + .../types/{accept => status_code}/selector.rs | 54 ++-- 12 files changed, 491 insertions(+), 47 deletions(-) create mode 100644 lychee-lib/src/types/status_code/excluder.rs create mode 100644 lychee-lib/src/types/status_code/mod.rs rename lychee-lib/src/types/{accept => status_code}/selector.rs (79%) diff --git a/README.md b/README.md index 689a5737d6..763eb22327 100644 --- a/README.md +++ b/README.md @@ -335,6 +335,22 @@ Options: [default: 1d] + --cache-exclude-status + A list of status codes that will be ignored from the cache + + The following accept range syntax is supported: [start]..[=]end|code. Some valid + examples are: + + - 429 + - 500..=599 + - 500.. + + Use "lychee --cache-exclude-status '429, 500..502' ..." to provide a comma- separated + list of excluded status codes. This example will not cache results with a status code of 429, 500, + 501 and 502. + + [default: ] + --dump Don't perform any link checking. Instead, dump all the links extracted from inputs that would be checked diff --git a/lychee-bin/src/commands/check.rs b/lychee-bin/src/commands/check.rs index 23d0c2ac76..f828f7840f 100644 --- a/lychee-bin/src/commands/check.rs +++ b/lychee-bin/src/commands/check.rs @@ -10,7 +10,7 @@ use reqwest::Url; use tokio::sync::mpsc; use tokio_stream::wrappers::ReceiverStream; -use lychee_lib::{Client, ErrorKind, Request, Response}; +use lychee_lib::{Client, ErrorKind, Request, Response, Uri}; use lychee_lib::{InputSource, Result}; use lychee_lib::{ResponseBody, Status}; @@ -46,6 +46,7 @@ where let client = params.client; let cache = params.cache; + let cache_exclude_status = params.cfg.cache_exclude_status.into_set(); let accept = params.cfg.accept.into_set(); let pb = if params.cfg.no_progress || params.cfg.verbose.log_level() >= log::Level::Info { @@ -61,6 +62,7 @@ where max_concurrency, client, cache, + cache_exclude_status, accept, )); @@ -219,6 +221,7 @@ async fn request_channel_task( max_concurrency: usize, client: Client, cache: Arc, + cache_exclude_status: HashSet, accept: HashSet, ) { StreamExt::for_each_concurrent( @@ -226,7 +229,14 @@ async fn request_channel_task( max_concurrency, |request: Result| async { let request = request.expect("cannot read request"); - let response = handle(&client, cache.clone(), request, accept.clone()).await; + let response = handle( + &client, + cache.clone(), + cache_exclude_status.clone(), + request, + accept.clone(), + ) + .await; send_resp .send(response) @@ -260,6 +270,7 @@ async fn check_url(client: &Client, request: Request) -> Response { async fn handle( client: &Client, cache: Arc, + cache_exclude_status: HashSet, request: Request, accept: HashSet, ) -> Response { @@ -287,9 +298,10 @@ async fn handle( // benefit. // - Skip caching unsupported URLs as they might be supported in a // future run. - // - Skip caching excluded links; they might not be excluded in the next run + // - Skip caching excluded links; they might not be excluded in the next run. + // - Skip caching links for which the status code has been explicitly excluded from the cache. let status = response.status(); - if uri.is_file() || status.is_excluded() || status.is_unsupported() || status.is_unknown() { + if ignore_cache(&uri, status, &cache_exclude_status) { return response; } @@ -297,6 +309,26 @@ async fn handle( response } +/// Returns `true` if the response should be ignored in the cache. +/// +/// The response should be ignored if: +/// - The URI is a file URI. +/// - The status is excluded. +/// - The status is unsupported. +/// - The status is unknown. +/// - The status code is excluded from the cache. +fn ignore_cache(uri: &Uri, status: &Status, cache_exclude_status: &HashSet) -> bool { + let status_code_excluded = status + .code() + .map_or(false, |code| cache_exclude_status.contains(&code.as_u16())); + + uri.is_file() + || status.is_excluded() + || status.is_unsupported() + || status.is_unknown() + || status_code_excluded +} + fn show_progress( output: &mut dyn Write, progress_bar: &Option, @@ -352,8 +384,9 @@ fn get_failed_urls(stats: &mut ResponseStats) -> Vec<(InputSource, Url)> { #[cfg(test)] mod tests { use crate::{formatters::get_response_formatter, options}; + use http::StatusCode; use log::info; - use lychee_lib::{CacheStatus, ClientBuilder, InputSource, Uri}; + use lychee_lib::{CacheStatus, ClientBuilder, ErrorKind, InputSource, Uri}; use super::*; @@ -414,4 +447,55 @@ mod tests { Status::Error(ErrorKind::InvalidURI(_)) )); } + + #[test] + fn test_cache_by_default() { + assert!(!ignore_cache( + &Uri::try_from("https://[::1]").unwrap(), + &Status::Ok(StatusCode::OK), + &HashSet::default() + )); + } + + #[test] + // Cache is ignored for file URLs + fn test_cache_ignore_file_urls() { + assert!(ignore_cache( + &Uri::try_from("file:///home").unwrap(), + &Status::Ok(StatusCode::OK), + &HashSet::default() + )); + } + + #[test] + // Cache is ignored for unsupported status + fn test_cache_ignore_unsupported_status() { + assert!(ignore_cache( + &Uri::try_from("https://[::1]").unwrap(), + &Status::Unsupported(ErrorKind::EmptyUrl), + &HashSet::default() + )); + } + + #[test] + // Cache is ignored for unknown status + fn test_cache_ignore_unknown_status() { + assert!(ignore_cache( + &Uri::try_from("https://[::1]").unwrap(), + &Status::UnknownStatusCode(StatusCode::IM_A_TEAPOT), + &HashSet::default() + )); + } + + #[test] + fn test_cache_ignore_excluded_status() { + // Cache is ignored for excluded status codes + let exclude = [StatusCode::OK.as_u16()].iter().copied().collect(); + + assert!(ignore_cache( + &Uri::try_from("https://[::1]").unwrap(), + &Status::Ok(StatusCode::OK), + &exclude + )); + } } diff --git a/lychee-bin/src/options.rs b/lychee-bin/src/options.rs index b8c23be7f4..988dc9aa60 100644 --- a/lychee-bin/src/options.rs +++ b/lychee-bin/src/options.rs @@ -6,8 +6,8 @@ use clap::builder::PossibleValuesParser; use clap::{arg, builder::TypedValueParser, Parser}; use const_format::{concatcp, formatcp}; use lychee_lib::{ - AcceptSelector, Base, BasicAuthSelector, Input, DEFAULT_MAX_REDIRECTS, DEFAULT_MAX_RETRIES, - DEFAULT_RETRY_WAIT_TIME_SECS, DEFAULT_TIMEOUT_SECS, DEFAULT_USER_AGENT, + Base, BasicAuthSelector, Input, StatusCodeExcluder, StatusCodeSelector, DEFAULT_MAX_REDIRECTS, + DEFAULT_MAX_RETRIES, DEFAULT_RETRY_WAIT_TIME_SECS, DEFAULT_TIMEOUT_SECS, DEFAULT_USER_AGENT, }; use secrecy::{ExposeSecret, SecretString}; use serde::Deserialize; @@ -145,7 +145,8 @@ default_function! { retry_wait_time: usize = DEFAULT_RETRY_WAIT_TIME_SECS; method: String = DEFAULT_METHOD.to_string(); verbosity: Verbosity = Verbosity::default(); - accept_selector: AcceptSelector = AcceptSelector::default(); + cache_exclude_selector: StatusCodeExcluder = StatusCodeExcluder::new(); + accept_selector: StatusCodeSelector = StatusCodeSelector::default(); } // Macro for merging configuration values @@ -231,6 +232,26 @@ pub(crate) struct Config { #[serde(with = "humantime_serde")] pub(crate) max_cache_age: Duration, + /// A list of status codes that will be excluded from the cache + #[arg( + long, + default_value_t, + long_help = "A list of status codes that will be ignored from the cache + +The following accept range syntax is supported: [start]..[=]end|code. Some valid +examples are: + +- 429 +- 500..=599 +- 500.. + +Use \"lychee --cache-exclude-status '429, 500..502' ...\" to provide a comma- separated +list of excluded status codes. This example will not cache results with a status code of 429, 500, +501 and 502." + )] + #[serde(default = "cache_exclude_selector")] + pub(crate) cache_exclude_status: StatusCodeExcluder, + /// Don't perform any link checking. /// Instead, dump all the links extracted from inputs that would be checked #[arg(long)] @@ -394,7 +415,7 @@ separated list of accepted status codes. This example will accept 200, 201, 202, 203, 204, 429, and 500 as valid status codes." )] #[serde(default = "accept_selector")] - pub(crate) accept: AcceptSelector, + pub(crate) accept: StatusCodeSelector, /// Enable the checking of fragments in links. #[arg(long)] @@ -509,6 +530,7 @@ impl Config { max_retries: DEFAULT_MAX_RETRIES; max_concurrency: DEFAULT_MAX_CONCURRENCY; max_cache_age: humantime::parse_duration(DEFAULT_MAX_CACHE_AGE).unwrap(); + cache_exclude_status: StatusCodeExcluder::default(); threads: None; user_agent: DEFAULT_USER_AGENT; insecure: false; @@ -538,7 +560,7 @@ impl Config { require_https: false; cookie_jar: None; include_fragments: false; - accept: AcceptSelector::default(); + accept: StatusCodeSelector::default(); } if self @@ -564,7 +586,7 @@ mod tests { #[test] fn test_accept_status_codes() { let toml = Config { - accept: AcceptSelector::from_str("200..=204, 429, 500").unwrap(), + accept: StatusCodeSelector::from_str("200..=204, 429, 500").unwrap(), ..Default::default() }; @@ -577,4 +599,15 @@ mod tests { assert!(cli.accept.contains(204)); assert!(!cli.accept.contains(205)); } + + #[test] + fn test_default() { + let cli = Config::default(); + + assert_eq!( + cli.accept, + StatusCodeSelector::from_str("100..=103,200..=299").expect("no error") + ); + assert_eq!(cli.cache_exclude_status, StatusCodeExcluder::new()); + } } diff --git a/lychee-bin/tests/cli.rs b/lychee-bin/tests/cli.rs index 1188f9af81..89bcd4cd16 100644 --- a/lychee-bin/tests/cli.rs +++ b/lychee-bin/tests/cli.rs @@ -895,6 +895,65 @@ mod cli { Ok(()) } + #[tokio::test] + async fn test_lycheecache_exclude_custom_status_codes() -> Result<()> { + let base_path = fixtures_path().join("cache"); + let cache_file = base_path.join(LYCHEE_CACHE_FILE); + + // Unconditionally remove cache file if it exists + let _ = fs::remove_file(&cache_file); + + let mock_server_ok = mock_server!(StatusCode::OK); + let mock_server_no_content = mock_server!(StatusCode::NO_CONTENT); + let mock_server_too_many_requests = mock_server!(StatusCode::TOO_MANY_REQUESTS); + + let dir = tempfile::tempdir()?; + let mut file = File::create(dir.path().join("c.md"))?; + + writeln!(file, "{}", mock_server_ok.uri().as_str())?; + writeln!(file, "{}", mock_server_no_content.uri().as_str())?; + writeln!(file, "{}", mock_server_too_many_requests.uri().as_str())?; + + let mut cmd = main_command(); + let test_cmd = cmd + .current_dir(&base_path) + .arg(dir.path().join("c.md")) + .arg("--verbose") + .arg("--no-progress") + .arg("--cache") + .arg("--cache-exclude-status") + .arg("204,429"); + + assert!( + !cache_file.exists(), + "cache file should not exist before this test" + ); + + // run first without cache to generate the cache file + test_cmd + .assert() + .stderr(contains(format!("[200] {}/\n", mock_server_ok.uri()))) + .stderr(contains(format!( + "[204] {}/ | OK (204 No Content): No Content\n", + mock_server_no_content.uri() + ))) + .stderr(contains(format!( + "[429] {}/ | Failed: Network error: Too Many Requests\n", + mock_server_too_many_requests.uri() + ))); + + // check content of cache file + let data = fs::read_to_string(&cache_file)?; + assert!(data.contains(&format!("{}/,200", mock_server_ok.uri()))); + assert!(!data.contains(&format!("{}/,204", mock_server_no_content.uri()))); + assert!(!data.contains(&format!("{}/,429", mock_server_too_many_requests.uri()))); + + // clear the cache file + fs::remove_file(&cache_file)?; + + Ok(()) + } + #[tokio::test] async fn test_lycheecache_accept_custom_status_codes() -> Result<()> { let base_path = fixtures_path().join("cache_accept_custom_status_codes"); diff --git a/lychee-lib/src/lib.rs b/lychee-lib/src/lib.rs index e414808b85..93df6d0db8 100644 --- a/lychee-lib/src/lib.rs +++ b/lychee-lib/src/lib.rs @@ -95,8 +95,9 @@ pub use crate::{ collector::Collector, filter::{Excludes, Filter, Includes}, types::{ - uri::valid::Uri, AcceptRange, AcceptRangeError, AcceptSelector, Base, BasicAuthCredentials, + uri::valid::Uri, AcceptRange, AcceptRangeError, Base, BasicAuthCredentials, BasicAuthSelector, CacheStatus, CookieJar, ErrorKind, FileType, Input, InputContent, - InputSource, Request, Response, ResponseBody, Result, Status, + InputSource, Request, Response, ResponseBody, Result, Status, StatusCodeExcluder, + StatusCodeSelector, }, }; diff --git a/lychee-lib/src/types/accept/mod.rs b/lychee-lib/src/types/accept/mod.rs index dc50b69384..665c832ea1 100644 --- a/lychee-lib/src/types/accept/mod.rs +++ b/lychee-lib/src/types/accept/mod.rs @@ -1,5 +1,3 @@ mod range; -mod selector; pub use range::*; -pub use selector::*; diff --git a/lychee-lib/src/types/accept/range.rs b/lychee-lib/src/types/accept/range.rs index 2f4c7b914b..3707404b6b 100644 --- a/lychee-lib/src/types/accept/range.rs +++ b/lychee-lib/src/types/accept/range.rs @@ -7,8 +7,8 @@ use thiserror::Error; static RANGE_PATTERN: Lazy = Lazy::new(|| Regex::new(r"^([0-9]{3})?\.\.(=?)([0-9]{3})+$|^([0-9]{3})$").unwrap()); -/// The [`AcceptRangeParseError`] indicates that the parsing process of an -/// [`AcceptRange`] from a string failed due to various underlying reasons. +/// Indicates that the parsing process of an [`AcceptRange`] from a string +/// failed due to various underlying reasons. #[derive(Debug, Error, PartialEq)] pub enum AcceptRangeError { /// The string input didn't contain any range pattern. diff --git a/lychee-lib/src/types/error.rs b/lychee-lib/src/types/error.rs index d5b0efe47f..808a5513b2 100644 --- a/lychee-lib/src/types/error.rs +++ b/lychee-lib/src/types/error.rs @@ -6,7 +6,7 @@ use thiserror::Error; use tokio::task::JoinError; use super::InputContent; -use crate::types::AcceptSelectorError; +use crate::types::StatusCodeSelectorError; use crate::{basic_auth::BasicAuthExtractorError, utils, Uri}; /// Kinds of status errors @@ -142,9 +142,9 @@ pub enum ErrorKind { #[error("Cannot load cookies")] Cookies(String), - /// Accept selector parse error - #[error("Accept range error")] - AcceptSelectorError(#[from] AcceptSelectorError), + /// Status code selector parse error + #[error("Status code range error")] + StatusCodeSelectorError(#[from] StatusCodeSelectorError), } impl ErrorKind { @@ -301,7 +301,7 @@ impl Hash for ErrorKind { Self::TooManyRedirects(e) => e.to_string().hash(state), Self::BasicAuthExtractorError(e) => e.to_string().hash(state), Self::Cookies(e) => e.to_string().hash(state), - Self::AcceptSelectorError(e) => e.to_string().hash(state), + Self::StatusCodeSelectorError(e) => e.to_string().hash(state), } } } diff --git a/lychee-lib/src/types/mod.rs b/lychee-lib/src/types/mod.rs index 6d0ee51cd7..5bb933fe0b 100644 --- a/lychee-lib/src/types/mod.rs +++ b/lychee-lib/src/types/mod.rs @@ -12,6 +12,7 @@ pub(crate) mod mail; mod request; mod response; mod status; +mod status_code; pub(crate) mod uri; pub use accept::*; @@ -25,6 +26,7 @@ pub use input::{Input, InputContent, InputSource}; pub use request::Request; pub use response::{Response, ResponseBody}; pub use status::Status; +pub use status_code::*; /// The lychee `Result` type pub type Result = std::result::Result; diff --git a/lychee-lib/src/types/status_code/excluder.rs b/lychee-lib/src/types/status_code/excluder.rs new file mode 100644 index 0000000000..34068eb203 --- /dev/null +++ b/lychee-lib/src/types/status_code/excluder.rs @@ -0,0 +1,242 @@ +use std::{collections::HashSet, fmt::Display, str::FromStr}; + +use serde::{de::Visitor, Deserialize}; + +use crate::{ + types::accept::AcceptRange, types::status_code::StatusCodeSelectorError, AcceptRangeError, +}; + +/// A [`StatusCodeExcluder`] holds ranges of HTTP status codes, and determines +/// whether a specific code is matched, so the link can be counted as valid (not +/// broken) or excluded. `StatusCodeExcluder` differs from +/// [`StatusCodeSelector`](super::selector::StatusCodeSelector) in the defaults +/// it provides. As this is meant to exclude status codes, the default is to +/// keep everything. +#[derive(Clone, Debug, PartialEq)] +pub struct StatusCodeExcluder { + ranges: Vec, +} + +impl FromStr for StatusCodeExcluder { + type Err = StatusCodeSelectorError; + + fn from_str(input: &str) -> Result { + let input = input.trim(); + + if input.is_empty() { + return Ok(Self::new()); + } + + let ranges = input + .split(',') + .map(|part| AcceptRange::from_str(part.trim())) + .collect::, AcceptRangeError>>()?; + + Ok(Self::new_from(ranges)) + } +} + +impl Default for StatusCodeExcluder { + fn default() -> Self { + Self::new() + } +} + +impl Display for StatusCodeExcluder { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let ranges: Vec<_> = self.ranges.iter().map(ToString::to_string).collect(); + write!(f, "{}", ranges.join(",")) + } +} + +impl StatusCodeExcluder { + /// Creates a new empty [`StatusCodeExcluder`]. + #[must_use] + pub const fn new() -> Self { + Self { ranges: Vec::new() } + } + + /// Creates a new [`StatusCodeExcluder`] prefilled with `ranges`. + #[must_use] + pub fn new_from(ranges: Vec) -> Self { + let mut selector = Self::new(); + + for range in ranges { + selector.add_range(range); + } + + selector + } + + /// Adds a range of HTTP status codes to this [`StatusCodeExcluder`]. + /// This method merges the new and existing ranges if they overlap. + pub fn add_range(&mut self, range: AcceptRange) -> &mut Self { + // Merge with previous range if possible + if let Some(last) = self.ranges.last_mut() { + if last.merge(&range) { + return self; + } + } + + // If neither is the case, the ranges have no overlap at all. Just add + // to the list of ranges. + self.ranges.push(range); + self + } + + /// Returns whether this [`StatusCodeExcluder`] contains `value`. + #[must_use] + pub fn contains(&self, value: u16) -> bool { + self.ranges.iter().any(|range| range.contains(value)) + } + + /// Consumes self and creates a [`HashSet`] which contains all + /// accepted status codes. + #[must_use] + pub fn into_set(self) -> HashSet { + let mut set = HashSet::new(); + + for range in self.ranges { + for value in range.inner() { + set.insert(value); + } + } + + set + } + + #[cfg(test)] + pub(crate) fn len(&self) -> usize { + self.ranges.len() + } +} + +struct StatusCodeExcluderVisitor; + +impl<'de> Visitor<'de> for StatusCodeExcluderVisitor { + type Value = StatusCodeExcluder; + + fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result { + formatter.write_str("a string or a sequence of strings") + } + + fn visit_str(self, v: &str) -> Result + where + E: serde::de::Error, + { + StatusCodeExcluder::from_str(v).map_err(serde::de::Error::custom) + } + + fn visit_i64(self, v: i64) -> Result + where + E: serde::de::Error, + { + let value = u16::try_from(v).map_err(serde::de::Error::custom)?; + Ok(StatusCodeExcluder::new_from(vec![AcceptRange::new( + value, value, + )])) + } + + fn visit_seq(self, mut seq: A) -> Result + where + A: serde::de::SeqAccess<'de>, + { + let mut selector = StatusCodeExcluder::new(); + while let Some(v) = seq.next_element::()? { + if let Some(v) = v.as_integer() { + let value = u16::try_from(v).map_err(serde::de::Error::custom)?; + selector.add_range(AcceptRange::new(value, value)); + continue; + } + + if let Some(s) = v.as_str() { + let range = AcceptRange::from_str(s).map_err(serde::de::Error::custom)?; + selector.add_range(range); + continue; + } + + return Err(serde::de::Error::custom( + "failed to parse sequence of accept ranges", + )); + } + Ok(selector) + } +} + +impl<'de> Deserialize<'de> for StatusCodeExcluder { + fn deserialize(deserializer: D) -> Result + where + D: serde::Deserializer<'de>, + { + deserializer.deserialize_any(StatusCodeExcluderVisitor) + } +} + +#[cfg(test)] +mod test { + use super::*; + use rstest::rstest; + + #[rstest] + #[case("", vec![], vec![100, 110, 150, 200, 300, 175, 350], 0)] + #[case("100..=150,200..=300", vec![100, 110, 150, 200, 300], vec![175, 350], 2)] + #[case("200..=300,100..=250", vec![100, 150, 200, 250, 300], vec![350], 1)] + #[case("100..=200,150..=200", vec![100, 150, 200], vec![250, 300], 1)] + #[case("100..=200,300", vec![100, 110, 200, 300], vec![250, 350], 2)] + fn test_from_str( + #[case] input: &str, + #[case] valid_values: Vec, + #[case] invalid_values: Vec, + #[case] length: usize, + ) { + let selector = StatusCodeExcluder::from_str(input).unwrap(); + assert_eq!(selector.len(), length); + + for valid in valid_values { + assert!(selector.contains(valid)); + } + + for invalid in invalid_values { + assert!(!selector.contains(invalid)); + } + } + + #[rstest] + #[case(r"accept = ['200..204', '429']", vec![200, 203, 429], vec![204, 404], 2)] + #[case(r"accept = '200..204, 429'", vec![200, 203, 429], vec![204, 404], 2)] + #[case(r"accept = ['200', '429']", vec![200, 429], vec![404], 2)] + #[case(r"accept = '200, 429'", vec![200, 429], vec![404], 2)] + #[case(r"accept = [200, 429]", vec![200, 429], vec![404], 2)] + #[case(r"accept = '200'", vec![200], vec![404], 1)] + #[case(r"accept = 200", vec![200], vec![404], 1)] + fn test_deserialize( + #[case] input: &str, + #[case] valid_values: Vec, + #[case] invalid_values: Vec, + #[case] length: usize, + ) { + #[derive(Deserialize)] + struct Config { + accept: StatusCodeExcluder, + } + + let config: Config = toml::from_str(input).unwrap(); + assert_eq!(config.accept.len(), length); + + for valid in valid_values { + assert!(config.accept.contains(valid)); + } + + for invalid in invalid_values { + assert!(!config.accept.contains(invalid)); + } + } + + #[rstest] + #[case("100..=150,200..=300", "100..=150,200..=300")] + #[case("100..=150,300", "100..=150,300..=300")] + fn test_display(#[case] input: &str, #[case] display: &str) { + let selector = StatusCodeExcluder::from_str(input).unwrap(); + assert_eq!(selector.to_string(), display); + } +} diff --git a/lychee-lib/src/types/status_code/mod.rs b/lychee-lib/src/types/status_code/mod.rs new file mode 100644 index 0000000000..b58b8cf346 --- /dev/null +++ b/lychee-lib/src/types/status_code/mod.rs @@ -0,0 +1,5 @@ +mod excluder; +mod selector; + +pub use excluder::*; +pub use selector::*; diff --git a/lychee-lib/src/types/accept/selector.rs b/lychee-lib/src/types/status_code/selector.rs similarity index 79% rename from lychee-lib/src/types/accept/selector.rs rename to lychee-lib/src/types/status_code/selector.rs index f3ab55250d..060e097227 100644 --- a/lychee-lib/src/types/accept/selector.rs +++ b/lychee-lib/src/types/status_code/selector.rs @@ -6,7 +6,7 @@ use thiserror::Error; use crate::{types::accept::AcceptRange, AcceptRangeError}; #[derive(Debug, Error)] -pub enum AcceptSelectorError { +pub enum StatusCodeSelectorError { #[error("invalid/empty input")] InvalidInput, @@ -14,21 +14,25 @@ pub enum AcceptSelectorError { AcceptRangeError(#[from] AcceptRangeError), } -/// An [`AcceptSelector`] determines if a returned HTTP status code should be -/// accepted and thus counted as a valid (not broken) link. +/// A [`StatusCodeSelector`] holds ranges of HTTP status codes, and determines +/// whether a specific code is matched, so the link can be counted as valid (not +/// broken) or excluded. `StatusCodeSelector` differs from +/// [`StatusCodeExcluder`](super::excluder::StatusCodeExcluder) +/// in the defaults it provides. As this is meant to +/// select valid status codes, the default includes every successful status. #[derive(Clone, Debug, PartialEq)] -pub struct AcceptSelector { +pub struct StatusCodeSelector { ranges: Vec, } -impl FromStr for AcceptSelector { - type Err = AcceptSelectorError; +impl FromStr for StatusCodeSelector { + type Err = StatusCodeSelectorError; fn from_str(input: &str) -> Result { let input = input.trim(); if input.is_empty() { - return Err(AcceptSelectorError::InvalidInput); + return Err(StatusCodeSelectorError::InvalidInput); } let ranges = input @@ -40,27 +44,27 @@ impl FromStr for AcceptSelector { } } -impl Default for AcceptSelector { +impl Default for StatusCodeSelector { fn default() -> Self { Self::new_from(vec![AcceptRange::new(100, 103), AcceptRange::new(200, 299)]) } } -impl Display for AcceptSelector { +impl Display for StatusCodeSelector { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { let ranges: Vec<_> = self.ranges.iter().map(ToString::to_string).collect(); write!(f, "{}", ranges.join(",")) } } -impl AcceptSelector { - /// Creates a new empty [`AcceptSelector`]. +impl StatusCodeSelector { + /// Creates a new empty [`StatusCodeSelector`]. #[must_use] pub const fn new() -> Self { Self { ranges: Vec::new() } } - /// Creates a new [`AcceptSelector`] prefilled with `ranges`. + /// Creates a new [`StatusCodeSelector`] prefilled with `ranges`. #[must_use] pub fn new_from(ranges: Vec) -> Self { let mut selector = Self::new(); @@ -72,7 +76,7 @@ impl AcceptSelector { selector } - /// Adds a range of accepted HTTP status codes to this [`AcceptSelector`]. + /// Adds a range of HTTP status codes to this [`StatusCodeSelector`]. /// This method merges the new and existing ranges if they overlap. pub fn add_range(&mut self, range: AcceptRange) -> &mut Self { // Merge with previous range if possible @@ -88,7 +92,7 @@ impl AcceptSelector { self } - /// Returns whether this [`AcceptSelector`] contains `value`. + /// Returns whether this [`StatusCodeSelector`] contains `value`. #[must_use] pub fn contains(&self, value: u16) -> bool { self.ranges.iter().any(|range| range.contains(value)) @@ -115,10 +119,10 @@ impl AcceptSelector { } } -struct AcceptSelectorVisitor; +struct StatusCodeSelectorVisitor; -impl<'de> Visitor<'de> for AcceptSelectorVisitor { - type Value = AcceptSelector; +impl<'de> Visitor<'de> for StatusCodeSelectorVisitor { + type Value = StatusCodeSelector; fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result { formatter.write_str("a string or a sequence of strings") @@ -128,7 +132,7 @@ impl<'de> Visitor<'de> for AcceptSelectorVisitor { where E: serde::de::Error, { - AcceptSelector::from_str(v).map_err(serde::de::Error::custom) + StatusCodeSelector::from_str(v).map_err(serde::de::Error::custom) } fn visit_i64(self, v: i64) -> Result @@ -136,7 +140,7 @@ impl<'de> Visitor<'de> for AcceptSelectorVisitor { E: serde::de::Error, { let value = u16::try_from(v).map_err(serde::de::Error::custom)?; - Ok(AcceptSelector::new_from(vec![AcceptRange::new( + Ok(StatusCodeSelector::new_from(vec![AcceptRange::new( value, value, )])) } @@ -145,7 +149,7 @@ impl<'de> Visitor<'de> for AcceptSelectorVisitor { where A: serde::de::SeqAccess<'de>, { - let mut selector = AcceptSelector::new(); + let mut selector = StatusCodeSelector::new(); while let Some(v) = seq.next_element::()? { if let Some(v) = v.as_integer() { let value = u16::try_from(v).map_err(serde::de::Error::custom)?; @@ -167,12 +171,12 @@ impl<'de> Visitor<'de> for AcceptSelectorVisitor { } } -impl<'de> Deserialize<'de> for AcceptSelector { +impl<'de> Deserialize<'de> for StatusCodeSelector { fn deserialize(deserializer: D) -> Result where D: serde::Deserializer<'de>, { - deserializer.deserialize_any(AcceptSelectorVisitor) + deserializer.deserialize_any(StatusCodeSelectorVisitor) } } @@ -192,7 +196,7 @@ mod test { #[case] invalid_values: Vec, #[case] length: usize, ) { - let selector = AcceptSelector::from_str(input).unwrap(); + let selector = StatusCodeSelector::from_str(input).unwrap(); assert_eq!(selector.len(), length); for valid in valid_values { @@ -220,7 +224,7 @@ mod test { ) { #[derive(Deserialize)] struct Config { - accept: AcceptSelector, + accept: StatusCodeSelector, } let config: Config = toml::from_str(input).unwrap(); @@ -239,7 +243,7 @@ mod test { #[case("100..=150,200..=300", "100..=150,200..=300")] #[case("100..=150,300", "100..=150,300..=300")] fn test_display(#[case] input: &str, #[case] display: &str) { - let selector = AcceptSelector::from_str(input).unwrap(); + let selector = StatusCodeSelector::from_str(input).unwrap(); assert_eq!(selector.to_string(), display); } }