Skip to content

Commit

Permalink
Initial error handling
Browse files Browse the repository at this point in the history
ref #22
  • Loading branch information
spk committed Mar 7, 2020
1 parent 503d351 commit e150c1f
Show file tree
Hide file tree
Showing 6 changed files with 46 additions and 17 deletions.
11 changes: 6 additions & 5 deletions src/http/reqwest/async_reqwest.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ use reqwest::header::USER_AGENT;
use crate::http::{RobotsTxtClient, DEFAULT_USER_AGENT};
use crate::parser::{ParseResult, parse_fetched_robots_txt};
use crate::model::FetchedRobotsTxt;
use crate::model::{RobotparserError, ErrorKind};
use std::pin::Pin;
use futures::task::{Context, Poll};
use futures::Future;
Expand All @@ -15,10 +16,10 @@ use futures::future::ok as future_ok;
type FetchFuture = Box<dyn Future<Output=Result<(ResponseInfo, String), Error>>>;

impl RobotsTxtClient for Client {
type Result = RobotsTxtResponse;
type Result = Result<RobotsTxtResponse, RobotparserError>;
fn fetch_robots_txt(&self, origin: Origin) -> Self::Result {
let url = format!("{}/robots.txt", origin.unicode_serialization());
let url = Url::parse(&url).expect("Unable to parse robots.txt url");
let url = Url::parse(&url).map_err(|err| RobotparserError {kind: ErrorKind::Url(err)})?;
let mut request = Request::new(Method::GET, url);
let _ = request.headers_mut().insert(USER_AGENT, HeaderValue::from_static(DEFAULT_USER_AGENT));
let response = self
Expand All @@ -30,10 +31,10 @@ impl RobotsTxtClient for Client {
});
});
let response: Pin<Box<dyn Future<Output=Result<(ResponseInfo, String), Error>>>> = Box::pin(response);
return RobotsTxtResponse {
Ok(RobotsTxtResponse {
origin,
response,
}
})
}
}

Expand Down Expand Up @@ -73,4 +74,4 @@ impl Future for RobotsTxtResponse {
},
}
}
}
}
13 changes: 7 additions & 6 deletions src/http/reqwest/sync_reqwest.rs
Original file line number Diff line number Diff line change
@@ -1,23 +1,24 @@
use reqwest::blocking::{Client, Request};
use reqwest::{Method, Error};
use reqwest::Method;
use reqwest::header::HeaderValue;
use url::{Origin, Url};
use reqwest::header::USER_AGENT;
use crate::http::{RobotsTxtClient, DEFAULT_USER_AGENT};
use crate::parser::{ParseResult, parse_fetched_robots_txt};
use crate::model::FetchedRobotsTxt;
use crate::model::{RobotparserError, ErrorKind};

impl RobotsTxtClient for Client {
type Result = Result<ParseResult<FetchedRobotsTxt>, Error>;
type Result = Result<ParseResult<FetchedRobotsTxt>, RobotparserError>;
fn fetch_robots_txt(&self, origin: Origin) -> Self::Result {
let url = format!("{}/robots.txt", origin.unicode_serialization());
let url = Url::parse(&url).expect("Unable to parse robots.txt url");
let url = Url::parse(&url).map_err(|err| RobotparserError {kind: ErrorKind::Url(err)})?;
let mut request = Request::new(Method::GET, url);
let _ = request.headers_mut().insert(USER_AGENT, HeaderValue::from_static(DEFAULT_USER_AGENT));
let response = self.execute(request)?;
let response = self.execute(request).map_err(|err| RobotparserError {kind: ErrorKind::HttpClient(err)})?;
let status_code = response.status().as_u16();
let text = response.text()?;
let text = response.text().map_err(|err| RobotparserError {kind: ErrorKind::HttpClient(err)})?;
let robots_txt = parse_fetched_robots_txt(origin, status_code, &text);
return Ok(robots_txt);
}
}
}
4 changes: 3 additions & 1 deletion src/model.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,4 +14,6 @@ pub (crate) use self::fetched_robots_txt::FetchedRobotsTxtContainer;
mod fetched_robots_txt;
pub use self::robots_txt::RobotsTxt;
mod path;
pub (crate) use self::path::Path;
pub (crate) use self::path::Path;
mod errors;
pub use self::errors::{RobotparserError, ErrorKind};
21 changes: 21 additions & 0 deletions src/model/errors.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
use std::fmt;

#[derive(Debug)]
pub struct RobotparserError {
pub kind: ErrorKind,
}

#[derive(Debug)]
pub enum ErrorKind {
Url(url::ParseError),
HttpClient(reqwest::Error),
}

impl fmt::Display for RobotparserError {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self.kind {
ErrorKind::Url(ref err) => err.fmt(f),
ErrorKind::HttpClient(ref err) => err.fmt(f),
}
}
}
8 changes: 5 additions & 3 deletions tests/test_reqwest_async.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ fn test_reqwest_async() {
let mut runtime = Runtime::new().unwrap();
let client = Client::new();
let robots_txt_url = Url::parse("https://www.python.org/robots.txt").unwrap();
let robots_txt_response = runtime.block_on(client.fetch_robots_txt(robots_txt_url.origin()));
let robots_txt_response = runtime.block_on(client.fetch_robots_txt(robots_txt_url.origin()).unwrap());
let robots_txt = robots_txt_response.unwrap().get_result();
let fetch_url = Url::parse("https://www.python.org/robots.txt").unwrap();
assert!(robots_txt.can_fetch("*", &fetch_url));
Expand All @@ -19,10 +19,12 @@ fn test_reqwest_async() {
}

#[test]
#[should_panic]
fn test_reqwest_blocking_panic_url() {
let client = Client::new();
let host = Host::Domain("python.org::".into());
let origin = Origin::Tuple("https".into(), host, 80);
client.fetch_robots_txt(origin);
match client.fetch_robots_txt(origin) {
Ok(_) => assert!(false),
Err(_) => assert!(true)
}
}
6 changes: 4 additions & 2 deletions tests/test_reqwest_blocking.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,12 @@ fn test_reqwest_blocking() {
}

#[test]
#[should_panic]
fn test_reqwest_blocking_panic_url() {
let client = Client::new();
let host = Host::Domain("python.org::".into());
let origin = Origin::Tuple("https".into(), host, 80);
client.fetch_robots_txt(origin).unwrap().get_result();
match client.fetch_robots_txt(origin) {
Ok(_) => assert!(false),
Err(_) => assert!(true)
}
}

0 comments on commit e150c1f

Please sign in to comment.