-
Notifications
You must be signed in to change notification settings - Fork 8
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
ref #22
- Loading branch information
Showing
6 changed files
with
46 additions
and
17 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,23 +1,24 @@ | ||
use reqwest::blocking::{Client, Request}; | ||
use reqwest::{Method, Error}; | ||
use reqwest::Method; | ||
use reqwest::header::HeaderValue; | ||
use url::{Origin, Url}; | ||
use reqwest::header::USER_AGENT; | ||
use crate::http::{RobotsTxtClient, DEFAULT_USER_AGENT}; | ||
use crate::parser::{ParseResult, parse_fetched_robots_txt}; | ||
use crate::model::FetchedRobotsTxt; | ||
use crate::model::{RobotparserError, ErrorKind}; | ||
|
||
impl RobotsTxtClient for Client { | ||
type Result = Result<ParseResult<FetchedRobotsTxt>, Error>; | ||
type Result = Result<ParseResult<FetchedRobotsTxt>, RobotparserError>; | ||
fn fetch_robots_txt(&self, origin: Origin) -> Self::Result { | ||
let url = format!("{}/robots.txt", origin.unicode_serialization()); | ||
let url = Url::parse(&url).expect("Unable to parse robots.txt url"); | ||
let url = Url::parse(&url).map_err(|err| RobotparserError {kind: ErrorKind::Url(err)})?; | ||
let mut request = Request::new(Method::GET, url); | ||
let _ = request.headers_mut().insert(USER_AGENT, HeaderValue::from_static(DEFAULT_USER_AGENT)); | ||
let response = self.execute(request)?; | ||
let response = self.execute(request).map_err(|err| RobotparserError {kind: ErrorKind::HttpClient(err)})?; | ||
let status_code = response.status().as_u16(); | ||
let text = response.text()?; | ||
let text = response.text().map_err(|err| RobotparserError {kind: ErrorKind::HttpClient(err)})?; | ||
let robots_txt = parse_fetched_robots_txt(origin, status_code, &text); | ||
return Ok(robots_txt); | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
use std::fmt; | ||
|
||
#[derive(Debug)] | ||
pub struct RobotparserError { | ||
pub kind: ErrorKind, | ||
} | ||
|
||
#[derive(Debug)] | ||
pub enum ErrorKind { | ||
Url(url::ParseError), | ||
HttpClient(reqwest::Error), | ||
} | ||
|
||
impl fmt::Display for RobotparserError { | ||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { | ||
match self.kind { | ||
ErrorKind::Url(ref err) => err.fmt(f), | ||
ErrorKind::HttpClient(ref err) => err.fmt(f), | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters