diff --git a/src/http/reqwest/async_reqwest.rs b/src/http/reqwest/async_reqwest.rs index ea87d5f2a..24e5b6bf6 100644 --- a/src/http/reqwest/async_reqwest.rs +++ b/src/http/reqwest/async_reqwest.rs @@ -6,6 +6,7 @@ use reqwest::header::USER_AGENT; use crate::http::{RobotsTxtClient, DEFAULT_USER_AGENT}; use crate::parser::{ParseResult, parse_fetched_robots_txt}; use crate::model::FetchedRobotsTxt; +use crate::model::{RobotparserError, ErrorKind}; use std::pin::Pin; use futures::task::{Context, Poll}; use futures::Future; @@ -15,10 +16,10 @@ use futures::future::ok as future_ok; type FetchFuture = Box>>; impl RobotsTxtClient for Client { - type Result = RobotsTxtResponse; + type Result = Result; fn fetch_robots_txt(&self, origin: Origin) -> Self::Result { let url = format!("{}/robots.txt", origin.unicode_serialization()); - let url = Url::parse(&url).expect("Unable to parse robots.txt url"); + let url = Url::parse(&url).map_err(|err| RobotparserError {kind: ErrorKind::Url(err)})?; let mut request = Request::new(Method::GET, url); let _ = request.headers_mut().insert(USER_AGENT, HeaderValue::from_static(DEFAULT_USER_AGENT)); let response = self @@ -30,10 +31,10 @@ impl RobotsTxtClient for Client { }); }); let response: Pin>>> = Box::pin(response); - return RobotsTxtResponse { + Ok(RobotsTxtResponse { origin, response, - } + }) } } @@ -73,4 +74,4 @@ impl Future for RobotsTxtResponse { }, } } -} \ No newline at end of file +} diff --git a/src/http/reqwest/sync_reqwest.rs b/src/http/reqwest/sync_reqwest.rs index 0365d66db..a8e433490 100644 --- a/src/http/reqwest/sync_reqwest.rs +++ b/src/http/reqwest/sync_reqwest.rs @@ -1,23 +1,24 @@ use reqwest::blocking::{Client, Request}; -use reqwest::{Method, Error}; +use reqwest::Method; use reqwest::header::HeaderValue; use url::{Origin, Url}; use reqwest::header::USER_AGENT; use crate::http::{RobotsTxtClient, DEFAULT_USER_AGENT}; use crate::parser::{ParseResult, parse_fetched_robots_txt}; use crate::model::FetchedRobotsTxt; +use crate::model::{RobotparserError, ErrorKind}; impl RobotsTxtClient for Client { - type Result = Result, Error>; + type Result = Result, RobotparserError>; fn fetch_robots_txt(&self, origin: Origin) -> Self::Result { let url = format!("{}/robots.txt", origin.unicode_serialization()); - let url = Url::parse(&url).expect("Unable to parse robots.txt url"); + let url = Url::parse(&url).map_err(|err| RobotparserError {kind: ErrorKind::Url(err)})?; let mut request = Request::new(Method::GET, url); let _ = request.headers_mut().insert(USER_AGENT, HeaderValue::from_static(DEFAULT_USER_AGENT)); - let response = self.execute(request)?; + let response = self.execute(request).map_err(|err| RobotparserError {kind: ErrorKind::HttpClient(err)})?; let status_code = response.status().as_u16(); - let text = response.text()?; + let text = response.text().map_err(|err| RobotparserError {kind: ErrorKind::HttpClient(err)})?; let robots_txt = parse_fetched_robots_txt(origin, status_code, &text); return Ok(robots_txt); } -} \ No newline at end of file +} diff --git a/src/model.rs b/src/model.rs index 483385d4b..16f1885f4 100644 --- a/src/model.rs +++ b/src/model.rs @@ -14,4 +14,6 @@ pub (crate) use self::fetched_robots_txt::FetchedRobotsTxtContainer; mod fetched_robots_txt; pub use self::robots_txt::RobotsTxt; mod path; -pub (crate) use self::path::Path; \ No newline at end of file +pub (crate) use self::path::Path; +mod errors; +pub use self::errors::{RobotparserError, ErrorKind}; diff --git a/src/model/errors.rs b/src/model/errors.rs new file mode 100644 index 000000000..a0ef4a8da --- /dev/null +++ b/src/model/errors.rs @@ -0,0 +1,21 @@ +use std::fmt; + +#[derive(Debug)] +pub struct RobotparserError { + pub kind: ErrorKind, +} + +#[derive(Debug)] +pub enum ErrorKind { + Url(url::ParseError), + HttpClient(reqwest::Error), +} + +impl fmt::Display for RobotparserError { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self.kind { + ErrorKind::Url(ref err) => err.fmt(f), + ErrorKind::HttpClient(ref err) => err.fmt(f), + } + } +} diff --git a/tests/test_reqwest_async.rs b/tests/test_reqwest_async.rs index 5da6b0f50..3701b2b52 100644 --- a/tests/test_reqwest_async.rs +++ b/tests/test_reqwest_async.rs @@ -10,7 +10,7 @@ fn test_reqwest_async() { let mut runtime = Runtime::new().unwrap(); let client = Client::new(); let robots_txt_url = Url::parse("https://www.python.org/robots.txt").unwrap(); - let robots_txt_response = runtime.block_on(client.fetch_robots_txt(robots_txt_url.origin())); + let robots_txt_response = runtime.block_on(client.fetch_robots_txt(robots_txt_url.origin()).unwrap()); let robots_txt = robots_txt_response.unwrap().get_result(); let fetch_url = Url::parse("https://www.python.org/robots.txt").unwrap(); assert!(robots_txt.can_fetch("*", &fetch_url)); @@ -19,10 +19,12 @@ fn test_reqwest_async() { } #[test] -#[should_panic] fn test_reqwest_blocking_panic_url() { let client = Client::new(); let host = Host::Domain("python.org::".into()); let origin = Origin::Tuple("https".into(), host, 80); - client.fetch_robots_txt(origin); + match client.fetch_robots_txt(origin) { + Ok(_) => assert!(false), + Err(_) => assert!(true) + } } diff --git a/tests/test_reqwest_blocking.rs b/tests/test_reqwest_blocking.rs index 42e529df2..b82681127 100644 --- a/tests/test_reqwest_blocking.rs +++ b/tests/test_reqwest_blocking.rs @@ -16,10 +16,12 @@ fn test_reqwest_blocking() { } #[test] -#[should_panic] fn test_reqwest_blocking_panic_url() { let client = Client::new(); let host = Host::Domain("python.org::".into()); let origin = Origin::Tuple("https".into(), host, 80); - client.fetch_robots_txt(origin).unwrap().get_result(); + match client.fetch_robots_txt(origin) { + Ok(_) => assert!(false), + Err(_) => assert!(true) + } }