Skip to content

Commit

Permalink
feat(client): Implement RawRequestBuilder for raw HTTP requests (#257)
Browse files Browse the repository at this point in the history
  • Loading branch information
surajk-m authored Aug 31, 2024
1 parent ed0ba36 commit f106027
Showing 1 changed file with 268 additions and 88 deletions.
356 changes: 268 additions & 88 deletions src/client.rs
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,268 @@ impl Client {
self.issue(Cmd::Persist).await?;
Ok(())
}

/// Create a new raw request builder.
///
/// This method allows to build a direct HTTP request to a remote site without routing
/// through the WebDriver host. It preserves the cookies and user agent from the current
/// WebDriver session, enabling you to maintain the session context while making external
/// requests.
///
/// This can be useful for operations where direct access is needed or when
/// interacting with third-party services that require the same session cookies.
pub fn raw_request(
&self,
) -> RawRequestBuilder<
'_,
fn(http::request::Builder) -> hyper::Request<BoxBody<hyper::body::Bytes, Infallible>>,
> {
RawRequestBuilder::new(self)
}
}

/// A builder for constructing raw HTTP requests with optional cookies.
///
/// ```no_run
/// # use fantoccini::{ClientBuilder, Locator};
/// # #[tokio::main]
/// # async fn main() -> Result<(), fantoccini::error::CmdError> {
/// # #[cfg(all(feature = "native-tls", not(feature = "rustls-tls")))]
/// # let client = ClientBuilder::native().connect("http://localhost:4444").await.expect("failed to connect to WebDriver");
/// # #[cfg(feature = "rustls-tls")]
/// # let client = ClientBuilder::rustls().expect("rustls initialization").connect("http://localhost:4444").await.expect("failed to connect to WebDriver");
/// # #[cfg(all(not(feature = "native-tls"), not(feature = "rustls-tls")))]
/// # let client: fantoccini::Client = unreachable!("no tls provider available");
/// // go back to the frontpage
/// client.goto("https://www.wikipedia.org/").await?;
/// // find the source for the Wikipedia globe
/// let img = client.find(Locator::Css("img.central-featured-logo")).await?;
/// let src = img.attr("src").await?.expect("image should have a src");
/// // now build a raw HTTP client request
/// // we could just use client.raw_client_for() here,
/// // but let's use the builder to show how it works:
/// let mut builder = client.raw_request();
/// builder.method(hyper::Method::GET).url(&src);
/// // we don't need cookies for this request
/// builder.skip_cookie_navigation();
/// let raw = builder.send().await?;
///
/// // we then read out the image bytes
/// use futures_util::TryStreamExt;
/// use http_body_util::BodyExt;
/// let pixels = raw
/// .into_body()
/// .collect()
/// .await
/// .map_err(fantoccini::error::CmdError::from)?
/// .to_bytes();
/// // and voilla, we now have the bytes for the Wikipedia logo!
/// assert!(pixels.len() > 0);
/// println!("Wikipedia logo is {}b", pixels.len());
/// # client.close().await
/// # }
/// ```
#[derive(Clone, Debug)]
pub struct RawRequestBuilder<'a, F> {
client: &'a Client,
method: Method,
url: String,
cookie_url: Option<String>,
request_modifier: F,
}

fn empty_body(
req: http::request::Builder,
) -> hyper::Request<BoxBody<hyper::body::Bytes, Infallible>> {
req.body(BoxBody::new(http_body_util::Empty::new()))
.unwrap()
}

impl<'a>
RawRequestBuilder<
'a,
fn(http::request::Builder) -> hyper::Request<BoxBody<hyper::body::Bytes, Infallible>>,
>
{
/// Create a new raw request builder.
pub fn new(client: &'a Client) -> Self {
RawRequestBuilder {
client,
method: Method::GET,
url: String::new(),
cookie_url: Some("/please_give_me_your_cookies".to_string()),
request_modifier: empty_body,
}
}
}

impl<'a, F> RawRequestBuilder<'a, F> {
/// Set the HTTP method for the request.
pub fn method(&mut self, method: Method) -> &mut Self {
self.method = method;
self
}

/// Set the URL for the request.
pub fn url(&mut self, url: &str) -> &mut Self {
self.url = url.to_string();
self
}

/// Set the URL for retrieving cookies.
///
/// The WebDriver specification requires that cookies can only be retrieved or set for the
/// current domain of the active WebDriver session. This method sets a `cookie_url` which
/// the WebDriver client will navigate to in order to retrieve the cookies needed for
/// the raw HTTP request.
///
/// This approach is necessary due to the WebDriver limitation discussed in
/// [w3c/webdriver#1238](https://github.com/w3c/webdriver/issues/1238),
/// which prevents setting cookies for a domain that the WebDriver is not currently on.
///
/// By setting this URL, you can ensure that the appropriate cookies are included in the
/// raw HTTP request. This can be particularly useful for scenarios where you need to
/// reuse cookies from a previous session to avoid redundant login operations or share
/// WebDriver sessions across different threads with distinct cookies.
///
/// - [Issue #148](https://github.com/jonhoo/fantoccini/issues/148)
pub fn cookie_url(&mut self, url: &str) -> &mut Self {
self.cookie_url = Some(url.to_string());
self
}

/// Opt out of the cookie navigation process.
///
/// This allows to skip the navigation to a cookie URL, if you don't want to retrieve cookies.
pub fn skip_cookie_navigation(&mut self) -> &mut Self {
self.cookie_url = None;
self
}
}

impl<'a, F> RawRequestBuilder<'a, F> {
/// Set a function to modify the request.
pub fn map_request<F2>(self, f: F2) -> RawRequestBuilder<'a, F2>
where
F2: FnOnce(
http::request::Builder,
) -> hyper::Request<BoxBody<hyper::body::Bytes, Infallible>>,
{
RawRequestBuilder {
client: self.client,
method: self.method,
url: self.url,
cookie_url: self.cookie_url,
request_modifier: f,
}
}
}

impl<'a, F> RawRequestBuilder<'a, F>
where
F: FnOnce(http::request::Builder) -> hyper::Request<BoxBody<hyper::body::Bytes, Infallible>>,
{
/// Send the constructed request.
pub async fn send(self) -> Result<hyper::Response<hyper::body::Incoming>, error::CmdError> {
// Allow navigation by relative URL:
let old_url = self.client.current_url_().await?;
let url = old_url.join(&self.url)?;

// We need to do some trickiness here. GetCookies will only give us the cookies for the
// *current* domain, whereas we want the cookies for `url`'s domain. So, we navigate to the
// URL in question, fetch its cookies, and then navigate back. *Except* that we can't do
// that either (what if `url` is some huge file?). So we *actually* navigate to some weird
// url that's unlikely to exist on the target domain, and which won't resolve into the
// actual content, but will still give the same cookies.
//
// The fact that cookies can have /path and security constraints makes this even more of a
// pain. /path in particular is tricky, because you could have a URL like:
//
// example.com/download/some_identifier/ignored_filename_just_for_show
//
// Imagine if a cookie is set with path=/download/some_identifier. How do we get that
// cookie without triggering a request for the (large) file? I don't know. Hence: TODO.
//
let cookies = if let Some(cookie_url) = self.cookie_url {
let cookie_url = url.join(&cookie_url)?;
self.client.goto(cookie_url.as_str()).await?;

// TODO: go back before we return if this call errors:
let cookies = self.client.issue(WebDriverCommand::GetCookies).await?;
self.client.back().await?;

if !cookies.is_array() {
return Err(error::CmdError::NotW3C(cookies));
}

// now add all the cookies
let mut all_ok = true;
let mut jar = Vec::new();
for cookie in cookies.as_array().unwrap() {
if !cookie.is_object() {
all_ok = false;
break;
}

// https://w3c.github.io/webdriver/webdriver-spec.html#cookies
let cookie = cookie.as_object().unwrap();
if !cookie.contains_key("name") || !cookie.contains_key("value") {
all_ok = false;
break;
}

if !cookie["name"].is_string() || !cookie["value"].is_string() {
all_ok = false;
break;
}

// Note that since we're sending these cookies, all that matters is the mapping
// from name to value. The other fields only matter when deciding whether to
// include a cookie or not, and the driver has already decided that for us
// (GetCookies is for a particular URL).
jar.push(
cookie::Cookie::new(
cookie["name"].as_str().unwrap().to_owned(),
cookie["value"].as_str().unwrap().to_owned(),
)
.encoded()
.to_string(),
);
}

if !all_ok {
return Err(error::CmdError::NotW3C(cookies));
}

Some(jar.join("; "))
} else {
None
};

let mut req = hyper::Request::builder();
req = req
.method(self.method)
.uri(http::Uri::try_from(url.as_str()).unwrap());

if let Some(cookies) = cookies {
req = req.header(hyper::header::COOKIE, cookies);
}

let ua = self.client.get_ua().await?;
if let Some(ua) = ua {
req = req.header(hyper::header::USER_AGENT, ua);
}

let req = (self.request_modifier)(req);

let (tx, rx) = oneshot::channel();
self.client.issue(Cmd::Raw { req, rsp: tx }).await?;
match rx.await {
Ok(Ok(r)) => Ok(r),
Ok(Err(e)) => Err(e.into()),
Err(e) => unreachable!("Session ended prematurely: {:?}", e),
}
}
}

// NOTE: new impl block to keep related methods together.
Expand Down Expand Up @@ -903,11 +1165,9 @@ impl Client {
method: Method,
url: &str,
) -> Result<hyper::Response<hyper::body::Incoming>, error::CmdError> {
self.with_raw_client_for(method, url, |req| {
req.body(BoxBody::new(http_body_util::Empty::new()))
.unwrap()
})
.await
let mut builder = self.raw_request();
builder.method(method).url(url);
builder.send().await
}

/// Build and issue an HTTP request to the given `url` with all the same cookies as the current
Expand All @@ -926,89 +1186,9 @@ impl Client {
http::request::Builder,
) -> hyper::Request<BoxBody<hyper::body::Bytes, Infallible>>,
{
let url = url.to_owned();
// We need to do some trickiness here. GetCookies will only give us the cookies for the
// *current* domain, whereas we want the cookies for `url`'s domain. So, we navigate to the
// URL in question, fetch its cookies, and then navigate back. *Except* that we can't do
// that either (what if `url` is some huge file?). So we *actually* navigate to some weird
// url that's unlikely to exist on the target domain, and which won't resolve into the
// actual content, but will still give the same cookies.
//
// The fact that cookies can have /path and security constraints makes this even more of a
// pain. /path in particular is tricky, because you could have a URL like:
//
// example.com/download/some_identifier/ignored_filename_just_for_show
//
// Imagine if a cookie is set with path=/download/some_identifier. How do we get that
// cookie without triggering a request for the (large) file? I don't know. Hence: TODO.
let old_url = self.current_url_().await?;
let url = old_url.clone().join(&url)?;
let cookie_url = url.clone().join("/please_give_me_your_cookies")?;
self.goto(cookie_url.as_str()).await?;

// TODO: go back before we return if this call errors:
let cookies = self.issue(WebDriverCommand::GetCookies).await?;
if !cookies.is_array() {
return Err(error::CmdError::NotW3C(cookies));
}
self.back().await?;
let ua = self.get_ua().await?;

// now add all the cookies
let mut all_ok = true;
let mut jar = Vec::new();
for cookie in cookies.as_array().unwrap() {
if !cookie.is_object() {
all_ok = false;
break;
}

// https://w3c.github.io/webdriver/webdriver-spec.html#cookies
let cookie = cookie.as_object().unwrap();
if !cookie.contains_key("name") || !cookie.contains_key("value") {
all_ok = false;
break;
}

if !cookie["name"].is_string() || !cookie["value"].is_string() {
all_ok = false;
break;
}

// Note that since we're sending these cookies, all that matters is the mapping
// from name to value. The other fields only matter when deciding whether to
// include a cookie or not, and the driver has already decided that for us
// (GetCookies is for a particular URL).
jar.push(
cookie::Cookie::new(
cookie["name"].as_str().unwrap().to_owned(),
cookie["value"].as_str().unwrap().to_owned(),
)
.encoded()
.to_string(),
);
}

if !all_ok {
return Err(error::CmdError::NotW3C(cookies));
}

let mut req = hyper::Request::builder();
req = req
.method(method)
.uri(http::Uri::try_from(url.as_str()).unwrap());
req = req.header(hyper::header::COOKIE, jar.join("; "));
if let Some(s) = ua {
req = req.header(hyper::header::USER_AGENT, s);
}
let req = before(req);
let (tx, rx) = oneshot::channel();
self.issue(Cmd::Raw { req, rsp: tx }).await?;
match rx.await {
Ok(Ok(r)) => Ok(r),
Ok(Err(e)) => Err(e.into()),
Err(e) => unreachable!("Session ended prematurely: {:?}", e),
}
let mut builder = self.raw_request();
builder.method(method).url(url).clone().map_request(before);
builder.send().await
}
}

Expand Down

0 comments on commit f106027

Please sign in to comment.