Skip to content

Commit

Permalink
Merge pull request #884 from epi052/878-support-raw-urls
Browse files Browse the repository at this point in the history
878 support raw urls
  • Loading branch information
epi052 authored Apr 26, 2023
2 parents ec78ec3 + 9876759 commit 1cf37e3
Show file tree
Hide file tree
Showing 13 changed files with 392 additions and 65 deletions.
2 changes: 1 addition & 1 deletion Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "feroxbuster"
version = "2.9.4"
version = "2.9.5"
authors = ["Ben 'epi' Risher (@epi052)"]
license = "MIT"
edition = "2021"
Expand Down
4 changes: 2 additions & 2 deletions shell_completions/_feroxbuster
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,8 @@ _feroxbuster() {
'--replay-proxy=[Send only unfiltered requests through a Replay Proxy, instead of all requests]:REPLAY_PROXY:_urls' \
'*-R+[Status Codes to send through a Replay Proxy when found (default: --status-codes value)]:REPLAY_CODE: ' \
'*--replay-codes=[Status Codes to send through a Replay Proxy when found (default: --status-codes value)]:REPLAY_CODE: ' \
'-a+[Sets the User-Agent (default: feroxbuster/2.9.4)]:USER_AGENT: ' \
'--user-agent=[Sets the User-Agent (default: feroxbuster/2.9.4)]:USER_AGENT: ' \
'-a+[Sets the User-Agent (default: feroxbuster/2.9.5)]:USER_AGENT: ' \
'--user-agent=[Sets the User-Agent (default: feroxbuster/2.9.5)]:USER_AGENT: ' \
'*-x+[File extension(s) to search for (ex: -x php -x pdf js)]:FILE_EXTENSION: ' \
'*--extensions=[File extension(s) to search for (ex: -x php -x pdf js)]:FILE_EXTENSION: ' \
'*-m+[Which HTTP request method(s) should be sent (default: GET)]:HTTP_METHODS: ' \
Expand Down
4 changes: 2 additions & 2 deletions shell_completions/_feroxbuster.ps1
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,8 @@ Register-ArgumentCompleter -Native -CommandName 'feroxbuster' -ScriptBlock {
[CompletionResult]::new('--replay-proxy', 'replay-proxy', [CompletionResultType]::ParameterName, 'Send only unfiltered requests through a Replay Proxy, instead of all requests')
[CompletionResult]::new('-R', 'R', [CompletionResultType]::ParameterName, 'Status Codes to send through a Replay Proxy when found (default: --status-codes value)')
[CompletionResult]::new('--replay-codes', 'replay-codes', [CompletionResultType]::ParameterName, 'Status Codes to send through a Replay Proxy when found (default: --status-codes value)')
[CompletionResult]::new('-a', 'a', [CompletionResultType]::ParameterName, 'Sets the User-Agent (default: feroxbuster/2.9.4)')
[CompletionResult]::new('--user-agent', 'user-agent', [CompletionResultType]::ParameterName, 'Sets the User-Agent (default: feroxbuster/2.9.4)')
[CompletionResult]::new('-a', 'a', [CompletionResultType]::ParameterName, 'Sets the User-Agent (default: feroxbuster/2.9.5)')
[CompletionResult]::new('--user-agent', 'user-agent', [CompletionResultType]::ParameterName, 'Sets the User-Agent (default: feroxbuster/2.9.5)')
[CompletionResult]::new('-x', 'x', [CompletionResultType]::ParameterName, 'File extension(s) to search for (ex: -x php -x pdf js)')
[CompletionResult]::new('--extensions', 'extensions', [CompletionResultType]::ParameterName, 'File extension(s) to search for (ex: -x php -x pdf js)')
[CompletionResult]::new('-m', 'm', [CompletionResultType]::ParameterName, 'Which HTTP request method(s) should be sent (default: GET)')
Expand Down
4 changes: 2 additions & 2 deletions shell_completions/feroxbuster.elv
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,8 @@ set edit:completion:arg-completer[feroxbuster] = {|@words|
cand --replay-proxy 'Send only unfiltered requests through a Replay Proxy, instead of all requests'
cand -R 'Status Codes to send through a Replay Proxy when found (default: --status-codes value)'
cand --replay-codes 'Status Codes to send through a Replay Proxy when found (default: --status-codes value)'
cand -a 'Sets the User-Agent (default: feroxbuster/2.9.4)'
cand --user-agent 'Sets the User-Agent (default: feroxbuster/2.9.4)'
cand -a 'Sets the User-Agent (default: feroxbuster/2.9.5)'
cand --user-agent 'Sets the User-Agent (default: feroxbuster/2.9.5)'
cand -x 'File extension(s) to search for (ex: -x php -x pdf js)'
cand --extensions 'File extension(s) to search for (ex: -x php -x pdf js)'
cand -m 'Which HTTP request method(s) should be sent (default: GET)'
Expand Down
5 changes: 2 additions & 3 deletions src/banner/container.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,11 @@ use super::entry::BannerEntry;
use crate::{
config::Configuration,
event_handlers::Handles,
utils::{logged_request, status_colorizer},
utils::{logged_request, parse_url_with_raw_path, status_colorizer},
DEFAULT_IGNORED_EXTENSIONS, DEFAULT_METHOD, DEFAULT_STATUS_CODES, VERSION,
};
use anyhow::{bail, Result};
use console::{style, Emoji};
use reqwest::Url;
use serde_json::Value;
use std::{io::Write, sync::Arc};

Expand Down Expand Up @@ -478,7 +477,7 @@ by Ben "epi" Risher {} ver: {}"#,
pub async fn check_for_updates(&mut self, url: &str, handles: Arc<Handles>) -> Result<()> {
log::trace!("enter: needs_update({}, {:?})", url, handles);

let api_url = Url::parse(url)?;
let api_url = parse_url_with_raw_path(url)?;

let result = logged_request(&api_url, DEFAULT_METHOD, None, handles.clone()).await?;
let body = result.text().await?;
Expand Down
7 changes: 5 additions & 2 deletions src/config/container.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,10 @@ use super::utils::{
use crate::config::determine_output_level;
use crate::config::utils::determine_requester_policy;
use crate::{
client, parser, scan_manager::resume_scan, traits::FeroxSerialize, utils::fmt_err,
client, parser,
scan_manager::resume_scan,
traits::FeroxSerialize,
utils::{fmt_err, parse_url_with_raw_path},
DEFAULT_CONFIG_NAME,
};
use anyhow::{anyhow, Context, Result};
Expand Down Expand Up @@ -673,7 +676,7 @@ impl Configuration {
for denier in arg {
// could be an absolute url or a regex, need to determine which and populate the
// appropriate vector
match Url::parse(denier.trim_end_matches('/')) {
match parse_url_with_raw_path(denier.trim_end_matches('/')) {
Ok(absolute) => {
// denier is an absolute url and can be parsed as such
config.url_denylist.push(absolute);
Expand Down
6 changes: 4 additions & 2 deletions src/event_handlers/scans.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ use crate::{
use super::command::Command::AddToUsizeField;
use super::*;
use crate::statistics::StatField;
use reqwest::Url;
use crate::utils::parse_url_with_raw_path;
use tokio::time::Duration;

#[derive(Debug)]
Expand Down Expand Up @@ -325,7 +325,9 @@ impl ScanHandler {
self.data.add_directory_scan(&target, order).1 // add the new target; return FeroxScan
};

if should_test_deny && should_deny_url(&Url::parse(&target)?, self.handles.clone())? {
if should_test_deny
&& should_deny_url(&parse_url_with_raw_path(&target)?, self.handles.clone())?
{
// response was caught by a user-provided deny list
// checking this last, since it's most susceptible to longer runtimes due to what
// input is received
Expand Down
13 changes: 8 additions & 5 deletions src/extractor/container.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,10 @@ use crate::{
StatField::{LinksExtracted, TotalExpected},
},
url::FeroxUrl,
utils::{logged_request, make_request, send_try_recursion_command, should_deny_url},
utils::{
logged_request, make_request, parse_url_with_raw_path, send_try_recursion_command,
should_deny_url,
},
ExtractionResult, DEFAULT_METHOD,
};
use anyhow::{bail, Context, Result};
Expand Down Expand Up @@ -122,7 +125,7 @@ impl<'a> Extractor<'a> {
) -> Result<()> {
log::trace!("enter: parse_url_and_add_subpaths({:?})", links);

match Url::parse(url_to_parse) {
match parse_url_with_raw_path(url_to_parse) {
Ok(absolute) => {
if absolute.domain() != original_url.domain()
|| absolute.host() != original_url.host()
Expand Down Expand Up @@ -475,7 +478,7 @@ impl<'a> Extractor<'a> {
ExtractionTarget::ResponseBody | ExtractionTarget::DirectoryListing => {
self.response.unwrap().url().clone()
}
ExtractionTarget::RobotsTxt => match Url::parse(&self.url) {
ExtractionTarget::RobotsTxt => match parse_url_with_raw_path(&self.url) {
Ok(u) => u,
Err(e) => {
bail!("Could not parse {}: {}", self.url, e);
Expand Down Expand Up @@ -524,7 +527,7 @@ impl<'a> Extractor<'a> {

for capture in self.robots_regex.captures_iter(body) {
if let Some(new_path) = capture.name("url_path") {
let mut new_url = Url::parse(&self.url)?;
let mut new_url = parse_url_with_raw_path(&self.url)?;

new_url.set_path(new_path.as_str());

Expand Down Expand Up @@ -654,7 +657,7 @@ impl<'a> Extractor<'a> {
&client
};

let mut url = Url::parse(&self.url)?;
let mut url = parse_url_with_raw_path(&self.url)?;
url.set_path(location); // overwrite existing path

// purposefully not using logged_request here due to using the special client
Expand Down
5 changes: 2 additions & 3 deletions src/filters/utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,10 @@ use crate::event_handlers::Handles;
use crate::filters::similarity::SIM_HASHER;
use crate::nlp::preprocess;
use crate::response::FeroxResponse;
use crate::utils::logged_request;
use crate::utils::{logged_request, parse_url_with_raw_path};
use crate::DEFAULT_METHOD;
use anyhow::Result;
use regex::Regex;
use reqwest::Url;
use std::sync::Arc;

/// wrapper around logic necessary to create a SimilarityFilter
Expand All @@ -23,7 +22,7 @@ pub(crate) async fn create_similarity_filter(
handles: Arc<Handles>,
) -> Result<SimilarityFilter> {
// url as-is based on input, ignores user-specified url manipulation options (add-slash etc)
let url = Url::parse(similarity_filter)?;
let url = parse_url_with_raw_path(similarity_filter)?;

// attempt to request the given url
let resp = logged_request(&url, DEFAULT_METHOD, None, handles.clone()).await?;
Expand Down
6 changes: 3 additions & 3 deletions src/response.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ use crate::{
event_handlers::{Command, Handles},
traits::FeroxSerialize,
url::FeroxUrl,
utils::{self, fmt_err, status_colorizer},
utils::{self, fmt_err, parse_url_with_raw_path, status_colorizer},
CommandSender,
};

Expand Down Expand Up @@ -140,7 +140,7 @@ impl FeroxResponse {

/// Set `FeroxResponse`'s `url` attribute, has no affect if an error occurs
pub fn set_url(&mut self, url: &str) {
match Url::parse(url) {
match parse_url_with_raw_path(url) {
Ok(url) => {
self.url = url;
}
Expand Down Expand Up @@ -599,7 +599,7 @@ impl<'de> Deserialize<'de> for FeroxResponse {
match key.as_str() {
"url" => {
if let Some(url) = value.as_str() {
if let Ok(parsed) = Url::parse(url) {
if let Ok(parsed) = parse_url_with_raw_path(url) {
response.url = parsed;
}
}
Expand Down
25 changes: 13 additions & 12 deletions src/url.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
use crate::utils::parse_url_with_raw_path;
use crate::{event_handlers::Handles, statistics::StatError::UrlFormat, Command::AddError};
use anyhow::{anyhow, bail, Result};
use reqwest::Url;
Expand Down Expand Up @@ -142,19 +143,19 @@ impl FeroxUrl {
word = word.trim_start_matches('/').to_string();
};

let base_url = Url::parse(&url)?;
let joined = base_url.join(&word)?;
let base_url = parse_url_with_raw_path(&url)?;
let mut joined = base_url.join(&word)?;

if self.handles.config.queries.is_empty() {
// no query params to process
log::trace!("exit: format -> {}", joined);
Ok(joined)
} else {
let with_params =
Url::parse_with_params(joined.as_str(), &self.handles.config.queries)?;
log::trace!("exit: format_url -> {}", with_params);
Ok(with_params) // request with params attached
if !self.handles.config.queries.is_empty() {
// if called, this adds a '?' to the url, whether or not there are queries to be added
// so we need to check if there are queries to be added before blindly adding the '?'
joined
.query_pairs_mut()
.extend_pairs(self.handles.config.queries.iter());
}

log::trace!("exit: format_url -> {}", joined);
Ok(joined)
}

/// Simple helper to abstract away adding a forward-slash to a url if not present
Expand Down Expand Up @@ -189,7 +190,7 @@ impl FeroxUrl {

let target = self.normalize();

let parsed = Url::parse(&target)?;
let parsed = parse_url_with_raw_path(&target)?;
let parts = parsed
.path_segments()
.ok_or_else(|| anyhow!("No path segments found"))?;
Expand Down
Loading

0 comments on commit 1cf37e3

Please sign in to comment.