Skip to content

Commit

Permalink
Error handling (#24)
Browse files Browse the repository at this point in the history
* Add test when url is invalid and panic

* Initial error handling

ref #22

* Rename ErrorKind::HttpClient => ErrorKind::Http

* Implement std::error::Error and rename to Error
  • Loading branch information
spk authored Mar 8, 2020
1 parent 1474a8c commit df49f6b
Show file tree
Hide file tree
Showing 6 changed files with 73 additions and 17 deletions.
20 changes: 11 additions & 9 deletions src/http/reqwest/async_reqwest.rs
Original file line number Diff line number Diff line change
@@ -1,24 +1,26 @@
use reqwest::{Client, Request};
use reqwest::{Method, Error};
use reqwest::Method;
use reqwest::Error as ReqwestError;
use reqwest::header::HeaderValue;
use url::{Origin, Url};
use reqwest::header::USER_AGENT;
use crate::http::{RobotsTxtClient, DEFAULT_USER_AGENT};
use crate::parser::{ParseResult, parse_fetched_robots_txt};
use crate::model::FetchedRobotsTxt;
use crate::model::{Error, ErrorKind};
use std::pin::Pin;
use futures::task::{Context, Poll};
use futures::Future;
use futures::future::TryFutureExt;
use futures::future::ok as future_ok;

type FetchFuture = Box<dyn Future<Output=Result<(ResponseInfo, String), Error>>>;
type FetchFuture = Box<dyn Future<Output=Result<(ResponseInfo, String), ReqwestError>>>;

impl RobotsTxtClient for Client {
type Result = RobotsTxtResponse;
type Result = Result<RobotsTxtResponse, Error>;
fn fetch_robots_txt(&self, origin: Origin) -> Self::Result {
let url = format!("{}/robots.txt", origin.unicode_serialization());
let url = Url::parse(&url).expect("Unable to parse robots.txt url");
let url = Url::parse(&url).map_err(|err| Error {kind: ErrorKind::Url(err)})?;
let mut request = Request::new(Method::GET, url);
let _ = request.headers_mut().insert(USER_AGENT, HeaderValue::from_static(DEFAULT_USER_AGENT));
let response = self
Expand All @@ -29,11 +31,11 @@ impl RobotsTxtClient for Client {
return future_ok((response_info, response_text));
});
});
let response: Pin<Box<dyn Future<Output=Result<(ResponseInfo, String), Error>>>> = Box::pin(response);
return RobotsTxtResponse {
let response: Pin<Box<dyn Future<Output=Result<(ResponseInfo, String), ReqwestError>>>> = Box::pin(response);
Ok(RobotsTxtResponse {
origin,
response,
}
})
}
}

Expand All @@ -55,7 +57,7 @@ impl RobotsTxtResponse {
}

impl Future for RobotsTxtResponse {
type Output = Result<ParseResult<FetchedRobotsTxt>, Error>;
type Output = Result<ParseResult<FetchedRobotsTxt>, ReqwestError>;

fn poll(self: Pin<&mut Self>, cx: &mut Context) -> Poll<Self::Output> {
let self_mut = self.get_mut();
Expand All @@ -73,4 +75,4 @@ impl Future for RobotsTxtResponse {
},
}
}
}
}
11 changes: 6 additions & 5 deletions src/http/reqwest/sync_reqwest.rs
Original file line number Diff line number Diff line change
@@ -1,23 +1,24 @@
use reqwest::blocking::{Client, Request};
use reqwest::{Method, Error};
use reqwest::Method;
use reqwest::header::HeaderValue;
use url::{Origin, Url};
use reqwest::header::USER_AGENT;
use crate::http::{RobotsTxtClient, DEFAULT_USER_AGENT};
use crate::parser::{ParseResult, parse_fetched_robots_txt};
use crate::model::FetchedRobotsTxt;
use crate::model::{Error, ErrorKind};

impl RobotsTxtClient for Client {
type Result = Result<ParseResult<FetchedRobotsTxt>, Error>;
fn fetch_robots_txt(&self, origin: Origin) -> Self::Result {
let url = format!("{}/robots.txt", origin.unicode_serialization());
let url = Url::parse(&url).expect("Unable to parse robots.txt url");
let url = Url::parse(&url).map_err(|err| Error {kind: ErrorKind::Url(err)})?;
let mut request = Request::new(Method::GET, url);
let _ = request.headers_mut().insert(USER_AGENT, HeaderValue::from_static(DEFAULT_USER_AGENT));
let response = self.execute(request)?;
let response = self.execute(request).map_err(|err| Error {kind: ErrorKind::Http(err)})?;
let status_code = response.status().as_u16();
let text = response.text()?;
let text = response.text().map_err(|err| Error {kind: ErrorKind::Http(err)})?;
let robots_txt = parse_fetched_robots_txt(origin, status_code, &text);
return Ok(robots_txt);
}
}
}
4 changes: 3 additions & 1 deletion src/model.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,4 +14,6 @@ pub (crate) use self::fetched_robots_txt::FetchedRobotsTxtContainer;
mod fetched_robots_txt;
pub use self::robots_txt::RobotsTxt;
mod path;
pub (crate) use self::path::Path;
pub (crate) use self::path::Path;
mod errors;
pub use self::errors::{Error, ErrorKind};
23 changes: 23 additions & 0 deletions src/model/errors.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
use std::fmt;

#[derive(Debug)]
pub struct Error {
pub kind: ErrorKind,
}

#[derive(Debug)]
pub enum ErrorKind {
Url(url::ParseError),
Http(reqwest::Error),
}

impl fmt::Display for Error {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self.kind {
ErrorKind::Url(ref err) => err.fmt(f),
ErrorKind::Http(ref err) => err.fmt(f),
}
}
}

impl std::error::Error for Error {}
18 changes: 16 additions & 2 deletions tests/test_reqwest_async.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,28 @@ use robotparser::service::RobotsTxtService;
use reqwest::Client;
use url::Url;
use tokio::runtime::Runtime;
use url::{Host, Origin};

#[test]
fn test_reqwest_async() {
let mut runtime = Runtime::new().unwrap();
let client = Client::new();
let robots_txt_url = Url::parse("https://www.python.org/robots.txt").unwrap();
let robots_txt_response = runtime.block_on(client.fetch_robots_txt(robots_txt_url.origin()));
let robots_txt_response = runtime.block_on(client.fetch_robots_txt(robots_txt_url.origin()).unwrap());
let robots_txt = robots_txt_response.unwrap().get_result();
let fetch_url = Url::parse("https://www.python.org/robots.txt").unwrap();
assert!(robots_txt.can_fetch("*", &fetch_url));
}
let fetch_url = Url::parse("http://www.python.org/webstats/").unwrap();
assert!(!robots_txt.can_fetch("*", &fetch_url));
}

#[test]
fn test_reqwest_blocking_panic_url() {
let client = Client::new();
let host = Host::Domain("python.org::".into());
let origin = Origin::Tuple("https".into(), host, 80);
match client.fetch_robots_txt(origin) {
Ok(_) => assert!(false),
Err(_) => assert!(true)
}
}
14 changes: 14 additions & 0 deletions tests/test_reqwest_blocking.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ use robotparser::http::RobotsTxtClient;
use robotparser::service::RobotsTxtService;
use reqwest::blocking::Client;
use url::Url;
use url::{Host, Origin};

#[test]
fn test_reqwest_blocking() {
Expand All @@ -10,4 +11,17 @@ fn test_reqwest_blocking() {
let robots_txt = client.fetch_robots_txt(robots_txt_url.origin()).unwrap().get_result();
let fetch_url = Url::parse("https://www.python.org/robots.txt").unwrap();
assert!(robots_txt.can_fetch("*", &fetch_url));
let fetch_url = Url::parse("https://www.python.org/webstats/").unwrap();
assert!(!robots_txt.can_fetch("*", &fetch_url));
}

#[test]
fn test_reqwest_blocking_panic_url() {
let client = Client::new();
let host = Host::Domain("python.org::".into());
let origin = Origin::Tuple("https".into(), host, 80);
match client.fetch_robots_txt(origin) {
Ok(_) => assert!(false),
Err(_) => assert!(true)
}
}

0 comments on commit df49f6b

Please sign in to comment.