diff --git a/src/lib.rs b/src/lib.rs index a4d9abf2..2bc7404e 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -8,13 +8,11 @@ pub mod logger; pub mod parser; pub mod progress; pub mod reporter; +pub mod scan_manager; pub mod scanner; pub mod utils; -use reqwest::{ - header::HeaderMap, - {Response, StatusCode, Url}, -}; +use reqwest::{header::HeaderMap, Response, StatusCode, Url}; use std::{error, fmt}; use tokio::sync::mpsc::{UnboundedReceiver, UnboundedSender}; diff --git a/src/main.rs b/src/main.rs index ce1db26e..baf17110 100644 --- a/src/main.rs +++ b/src/main.rs @@ -3,7 +3,8 @@ use feroxbuster::{ banner, config::{CONFIGURATION, PROGRESS_BAR, PROGRESS_PRINTER}, heuristics, logger, reporter, - scanner::{self, scan_url, PAUSE_SCAN}, + scan_manager::PAUSE_SCAN, + scanner::{self, scan_url}, utils::{ferox_print, get_current_depth, module_colorizer, status_colorizer}, FeroxError, FeroxResponse, FeroxResult, SLEEP_DURATION, VERSION, }; @@ -253,7 +254,7 @@ async fn clean_up( save_output: bool, ) { log::trace!( - "enter: clean_up({:?}, {:?}, {:?}, {:?}, {}", + "enter: clean_up({:?}, {:?}, {:?}, {:?}, {})", tx_term, term_handle, tx_file, diff --git a/src/scan_manager.rs b/src/scan_manager.rs new file mode 100644 index 00000000..7c3c1384 --- /dev/null +++ b/src/scan_manager.rs @@ -0,0 +1,531 @@ +use crate::{config::PROGRESS_PRINTER, progress, scanner::NUMBER_OF_REQUESTS, SLEEP_DURATION}; +use console::style; +use indicatif::{ProgressBar, ProgressStyle}; +use lazy_static::lazy_static; +use std::{ + cmp::PartialEq, + fmt, + sync::{Arc, Mutex, RwLock}, +}; +use std::{ + io::{stderr, Write}, + sync::atomic::{AtomicBool, AtomicUsize, Ordering}, +}; +use tokio::{task::JoinHandle, time}; +use uuid::Uuid; + +lazy_static! { + /// A clock spinner protected with a RwLock to allow for a single thread to use at a time + // todo remove this when issue #107 is resolved + static ref SINGLE_SPINNER: RwLock = RwLock::new(get_single_spinner()); +} + +/// Single atomic number that gets incremented once, used to track first thread to interact with +/// when pausing a scan +static INTERACTIVE_BARRIER: AtomicUsize = AtomicUsize::new(0); + +/// Atomic boolean flag, used to determine whether or not a scan should pause or resume +pub static PAUSE_SCAN: AtomicBool = AtomicBool::new(false); + +/// Simple enum used to flag a `FeroxScan` as likely a directory or file +#[derive(Debug)] +pub enum ScanType { + File, + Directory, +} + +/// Struct to hold scan-related state +/// +/// The purpose of this container is to open up the pathway to aborting currently running tasks and +/// serialization of all scan state into a state file in order to resume scans that were cut short +#[derive(Debug)] +pub struct FeroxScan { + /// UUID that uniquely ID's the scan + pub id: String, + + /// The URL that to be scanned + pub url: String, + + /// The type of scan + pub scan_type: ScanType, + + /// Whether or not this scan has completed + pub complete: bool, + + /// The spawned tokio task performing this scan + pub task: Option>, + + /// The progress bar associated with this scan + pub progress_bar: Option, +} + +/// Implementation of FeroxScan +impl FeroxScan { + /// Stop a currently running scan + pub fn abort(&self) { + self.stop_progress_bar(); + + if let Some(_task) = &self.task { + // task.abort(); todo uncomment once upgraded to tokio 0.3 + } + } + + /// Create a default FeroxScan, populates ID with a new UUID + fn default() -> Self { + let new_id = Uuid::new_v4().to_simple().to_string(); + + FeroxScan { + id: new_id, + task: None, + complete: false, + url: String::new(), + progress_bar: None, + scan_type: ScanType::File, + } + } + + /// Simple helper to call .finish on the scan's progress bar + fn stop_progress_bar(&self) { + if let Some(pb) = &self.progress_bar { + pb.finish(); + } + } + + /// Simple helper get a progress bar + pub fn progress_bar(&mut self) -> ProgressBar { + if let Some(pb) = &self.progress_bar { + pb.clone() + } else { + let num_requests = NUMBER_OF_REQUESTS.load(Ordering::Relaxed); + let pb = progress::add_bar(&self.url, num_requests, false); + + pb.reset_elapsed(); + + self.progress_bar = Some(pb.clone()); + + pb + } + } + + /// Given a URL and ProgressBar, create a new FeroxScan, wrap it in an Arc and return it + pub fn new(url: &str, scan_type: ScanType, pb: Option) -> Arc> { + let mut me = Self::default(); + + me.url = url.to_string(); + me.scan_type = scan_type; + me.progress_bar = pb; + + Arc::new(Mutex::new(me)) + } + + /// Mark the scan as complete and stop the scan's progress bar + pub fn finish(&mut self) { + self.complete = true; + self.stop_progress_bar(); + } +} + +/// Display implementation +impl fmt::Display for FeroxScan { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let complete = if self.complete { + style("complete").green() + } else { + style("incomplete").red() + }; + + write!(f, "{:10} {}", complete, self.url) + } +} + +/// PartialEq implementation; uses FeroxScan.id for comparison +impl PartialEq for FeroxScan { + fn eq(&self, other: &Self) -> bool { + self.id == other.id + } +} + +/// Container around a locked hashset of `FeroxScan`s, adds wrappers for insertion and searching +#[derive(Debug, Default)] +pub struct FeroxScans { + /// Internal structure: locked hashset of `FeroxScan`s + pub scans: Mutex>>>, +} + +/// Implementation of `FeroxScans` +impl FeroxScans { + /// Add a `FeroxScan` to the internal container + /// + /// If the internal container did NOT contain the scan, true is returned; else false + pub fn insert(&self, scan: Arc>) -> bool { + let sentry = match scan.lock() { + Ok(locked_scan) => { + // If the container did contain the scan, set sentry to false + // If the container did not contain the scan, set sentry to true + !self.contains(&locked_scan.url) + } + Err(e) => { + // poisoned lock + log::error!("FeroxScan's ({:?}) mutex is poisoned: {}", self, e); + false + } + }; + + if sentry { + // can't update the internal container while the scan itself is locked, so first + // lock the scan and check the container for the scan's presence, then add if + // not found + match self.scans.lock() { + Ok(mut scans) => { + scans.push(scan); + } + Err(e) => { + log::error!("FeroxScans' container's mutex is poisoned: {}", e); + return false; + } + } + } + + sentry + } + + /// Simple check for whether or not a FeroxScan is contained within the inner container based + /// on the given URL + pub fn contains(&self, url: &str) -> bool { + match self.scans.lock() { + Ok(scans) => { + for scan in scans.iter() { + if let Ok(locked_scan) = scan.lock() { + if locked_scan.url == url { + return true; + } + } + } + } + Err(e) => { + log::error!("FeroxScans' container's mutex is poisoned: {}", e); + } + } + false + } + + /// Find and return a `FeroxScan` based on the given URL + pub fn get_scan_by_url(&self, url: &str) -> Option>> { + if let Ok(scans) = self.scans.lock() { + for scan in scans.iter() { + if let Ok(locked_scan) = scan.lock() { + if locked_scan.url == url { + return Some(scan.clone()); + } + } + } + } + None + } + + /// Print all FeroxScans of type Directory + /// + /// Example: + /// 0: complete https://10.129.45.20 + /// 9: complete https://10.129.45.20/images + /// 10: complete https://10.129.45.20/assets + pub fn display_scans(&self) { + if let Ok(scans) = self.scans.lock() { + for (i, scan) in scans.iter().enumerate() { + if let Ok(unlocked_scan) = scan.lock() { + match unlocked_scan.scan_type { + ScanType::Directory => { + PROGRESS_PRINTER.println(format!("{:3}: {}", i, unlocked_scan)); + } + ScanType::File => { + // we're only interested in displaying directory scans, as those are + // the only ones that make sense to be stopped + } + } + } + } + } + } + + /// Forced the calling thread into a busy loop + /// + /// Every `SLEEP_DURATION` milliseconds, the function examines the result stored in `PAUSE_SCAN` + /// + /// When the value stored in `PAUSE_SCAN` becomes `false`, the function returns, exiting the busy + /// loop + pub async fn pause(&self, get_user_input: bool) { + // function uses tokio::time, not std + + // local testing showed a pretty slow increase (less than linear) in CPU usage as # of + // concurrent scans rose when SLEEP_DURATION was set to 500, using that as the default for now + let mut interval = time::interval(time::Duration::from_millis(SLEEP_DURATION)); + + // ignore any error returned + let _ = stderr().flush(); + + if INTERACTIVE_BARRIER.load(Ordering::Relaxed) == 0 { + INTERACTIVE_BARRIER.fetch_add(1, Ordering::Relaxed); + + if get_user_input { + self.display_scans(); + + let mut user_input = String::new(); + std::io::stdin().read_line(&mut user_input).unwrap(); + // todo actual logic for parsing user input in a way that allows for + // calling .abort on the scan retrieved based on the input (issue #107) + } + } + + if SINGLE_SPINNER.read().unwrap().is_finished() { + // todo remove this when issue #107 is resolved + + // in order to not leave draw artifacts laying around in the terminal, we call + // finish_and_clear on the progress bar when resuming scans. For this reason, we need to + // check if the spinner is finished, and repopulate the RwLock with a new spinner if + // necessary + if let Ok(mut guard) = SINGLE_SPINNER.write() { + *guard = get_single_spinner(); + } + } + + if let Ok(spinner) = SINGLE_SPINNER.write() { + spinner.enable_steady_tick(120); + } + + loop { + // first tick happens immediately, all others wait the specified duration + interval.tick().await; + + if !PAUSE_SCAN.load(Ordering::Acquire) { + // PAUSE_SCAN is false, so we can exit the busy loop + + if INTERACTIVE_BARRIER.load(Ordering::Relaxed) == 1 { + INTERACTIVE_BARRIER.fetch_sub(1, Ordering::Relaxed); + } + + if let Ok(spinner) = SINGLE_SPINNER.write() { + // todo remove this when issue #107 is resolved + spinner.finish_and_clear(); + } + + let _ = stderr().flush(); + + log::trace!("exit: pause_scan"); + return; + } + } + } + + /// Given a url, create a new `FeroxScan` and add it to `FeroxScans` + /// + /// If `FeroxScans` did not already contain the scan, return true; otherwise return false + /// + /// Also return a reference to the new `FeroxScan` + fn add_scan(&self, url: &str, scan_type: ScanType) -> (bool, Arc>) { + let bar = match scan_type { + ScanType::Directory => { + let progress_bar = + progress::add_bar(&url, NUMBER_OF_REQUESTS.load(Ordering::Relaxed), false); + + progress_bar.reset_elapsed(); + + Some(progress_bar) + } + ScanType::File => None, + }; + + let ferox_scan = FeroxScan::new(&url, scan_type, bar); + + // If the set did not contain the scan, true is returned. + // If the set did contain the scan, false is returned. + let response = self.insert(ferox_scan.clone()); + + (response, ferox_scan) + } + + /// Given a url, create a new `FeroxScan` and add it to `FeroxScans` as a Directory Scan + /// + /// If `FeroxScans` did not already contain the scan, return true; otherwise return false + /// + /// Also return a reference to the new `FeroxScan` + pub fn add_directory_scan(&self, url: &str) -> (bool, Arc>) { + self.add_scan(&url, ScanType::Directory) + } + + /// Given a url, create a new `FeroxScan` and add it to `FeroxScans` as a File Scan + /// + /// If `FeroxScans` did not already contain the scan, return true; otherwise return false + /// + /// Also return a reference to the new `FeroxScan` + pub fn add_file_scan(&self, url: &str) -> (bool, Arc>) { + self.add_scan(&url, ScanType::File) + } +} + +/// Return a clock spinner, used when scans are paused +// todo remove this when issue #107 is resolved +fn get_single_spinner() -> ProgressBar { + log::trace!("enter: get_single_spinner"); + + let spinner = ProgressBar::new_spinner().with_style( + ProgressStyle::default_spinner() + .tick_strings(&[ + "🕛", "🕐", "🕑", "🕒", "🕓", "🕔", "🕕", "🕖", "🕗", "🕘", "🕙", "🕚", + ]) + .template(&format!( + "\t-= All Scans {{spinner}} {} =-", + style("Paused").red() + )), + ); + + log::trace!("exit: get_single_spinner -> {:?}", spinner); + spinner +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + /// test that get_single_spinner returns the correct spinner + // todo remove this when issue #107 is resolved + fn scanner_get_single_spinner_returns_spinner() { + let spinner = get_single_spinner(); + assert!(!spinner.is_finished()); + } + + #[tokio::test(core_threads = 1)] + /// tests that pause_scan pauses execution and releases execution when PAUSE_SCAN is toggled + /// the spinner used during the test has had .finish_and_clear called on it, meaning that + /// a new one will be created, taking the if branch within the function + async fn scanner_pause_scan_with_finished_spinner() { + let now = time::Instant::now(); + let urls = FeroxScans::default(); + + PAUSE_SCAN.store(true, Ordering::Relaxed); + + let expected = time::Duration::from_secs(2); + + tokio::spawn(async move { + time::delay_for(expected).await; + PAUSE_SCAN.store(false, Ordering::Relaxed); + }); + + urls.pause(false).await; + + assert!(now.elapsed() > expected); + } + + #[test] + /// add an unknown url to the hashset, expect true + fn add_url_to_list_of_scanned_urls_with_unknown_url() { + let urls = FeroxScans::default(); + let url = "http://unknown_url"; + let (result, _scan) = urls.add_scan(url, ScanType::Directory); + assert_eq!(result, true); + } + + #[test] + /// add a known url to the hashset, with a trailing slash, expect false + fn add_url_to_list_of_scanned_urls_with_known_url() { + let urls = FeroxScans::default(); + let pb = ProgressBar::new(1); + let url = "http://unknown_url/"; + let scan = FeroxScan::new(url, ScanType::Directory, Some(pb)); + + assert_eq!(urls.insert(scan), true); + + let (result, _scan) = urls.add_scan(url, ScanType::Directory); + + assert_eq!(result, false); + } + + #[test] + /// abort should call stop_progress_bar, marking it as finished + fn abort_stops_progress_bar() { + let pb = ProgressBar::new(1); + let url = "http://unknown_url/"; + let scan = FeroxScan::new(url, ScanType::Directory, Some(pb)); + + assert_eq!( + scan.lock() + .unwrap() + .progress_bar + .as_ref() + .unwrap() + .is_finished(), + false + ); + + scan.lock().unwrap().abort(); + + assert_eq!( + scan.lock() + .unwrap() + .progress_bar + .as_ref() + .unwrap() + .is_finished(), + true + ); + } + + #[test] + /// add a known url to the hashset, without a trailing slash, expect false + fn add_url_to_list_of_scanned_urls_with_known_url_without_slash() { + let urls = FeroxScans::default(); + let url = "http://unknown_url"; + let scan = FeroxScan::new(url, ScanType::File, None); + + assert_eq!(urls.insert(scan), true); + + let (result, _scan) = urls.add_scan(url, ScanType::File); + + assert_eq!(result, false); + } + + #[test] + /// just increasing coverage, no real expectations + fn call_display_scans() { + let urls = FeroxScans::default(); + let pb = ProgressBar::new(1); + let pb_two = ProgressBar::new(2); + let url = "http://unknown_url/"; + let url_two = "http://unknown_url/fa"; + let scan = FeroxScan::new(url, ScanType::Directory, Some(pb)); + let scan_two = FeroxScan::new(url_two, ScanType::Directory, Some(pb_two)); + + scan_two.lock().unwrap().finish(); // one complete, one incomplete + + assert_eq!(urls.insert(scan), true); + + urls.display_scans(); + } + + #[test] + /// ensure that PartialEq compares FeroxScan.id fields + fn partial_eq_compares_the_id_field() { + let url = "http://unknown_url/"; + let scan = FeroxScan::new(url, ScanType::Directory, None); + let scan_two = FeroxScan::new(url, ScanType::Directory, None); + + assert!(!scan.lock().unwrap().eq(&scan_two.lock().unwrap())); + + scan_two.lock().unwrap().id = scan.lock().unwrap().id.clone(); + + assert!(scan.lock().unwrap().eq(&scan_two.lock().unwrap())); + } + + #[test] + /// show that a new progress bar is created if one doesn't exist + fn ferox_scan_get_progress_bar_when_none_is_set() { + let mut scan = FeroxScan::default(); + + assert!(scan.progress_bar.is_none()); // no pb exists + + let pb = scan.progress_bar(); + + assert!(scan.progress_bar.is_some()); // new pb created + assert!(!pb.is_finished()) // not finished + } +} diff --git a/src/scanner.rs b/src/scanner.rs index 1811dfe3..271c3ab5 100644 --- a/src/scanner.rs +++ b/src/scanner.rs @@ -4,24 +4,22 @@ use crate::{ filters::{ FeroxFilter, LinesFilter, SizeFilter, StatusCodeFilter, WildcardFilter, WordsFilter, }, - heuristics, progress, + heuristics, + scan_manager::{FeroxScans, PAUSE_SCAN}, utils::{format_url, get_current_depth, make_request}, - FeroxChannel, FeroxResponse, SLEEP_DURATION, + FeroxChannel, FeroxResponse, }; -use console::style; use futures::{ future::{BoxFuture, FutureExt}, stream, StreamExt, }; -use indicatif::{ProgressBar, ProgressStyle}; use lazy_static::lazy_static; use reqwest::Url; use std::{ collections::HashSet, convert::TryInto, - io::{stderr, Write}, ops::Deref, - sync::atomic::{AtomicBool, AtomicU64, AtomicUsize, Ordering}, + sync::atomic::{AtomicU64, AtomicUsize, Ordering}, sync::{Arc, RwLock}, }; use tokio::{ @@ -30,7 +28,6 @@ use tokio::{ Semaphore, }, task::JoinHandle, - time, }; /// Single atomic number that gets incremented once, used to track first scan vs. all others @@ -39,15 +36,9 @@ static CALL_COUNT: AtomicUsize = AtomicUsize::new(0); /// Single atomic number that gets holds the number of requests to be sent per directory scanned pub static NUMBER_OF_REQUESTS: AtomicU64 = AtomicU64::new(0); -/// Atomic boolean flag, used to determine whether or not a scan should pause or resume -pub static PAUSE_SCAN: AtomicBool = AtomicBool::new(false); - lazy_static! { /// Set of urls that have been sent to [scan_url](fn.scan_url.html), used for deduplication - static ref SCANNED_URLS: RwLock> = RwLock::new(HashSet::new()); - - /// A clock spinner protected with a RwLock to allow for a single thread to use at a time - static ref SINGLE_SPINNER: RwLock = RwLock::new(get_single_spinner()); + pub static ref SCANNED_URLS: FeroxScans = FeroxScans::default(); /// Vector of implementors of the FeroxFilter trait static ref FILTERS: Arc>>> = Arc::new(RwLock::new(Vec::>::new())); @@ -56,101 +47,6 @@ lazy_static! { static ref SCAN_LIMITER: Semaphore = Semaphore::new(CONFIGURATION.scan_limit); } -/// Return a clock spinner, used when scans are paused -fn get_single_spinner() -> ProgressBar { - log::trace!("enter: get_single_spinner"); - - let spinner = ProgressBar::new_spinner().with_style( - ProgressStyle::default_spinner() - .tick_strings(&[ - "🕛", "🕐", "🕑", "🕒", "🕓", "🕔", "🕕", "🕖", "🕗", "🕘", "🕙", "🕚", - ]) - .template(&format!( - "\t-= All Scans {{spinner}} {} =-", - style("Paused").red() - )), - ); - - log::trace!("exit: get_single_spinner -> {:?}", spinner); - spinner -} - -/// Forced the calling thread into a busy loop -/// -/// Every `SLEEP_DURATION` milliseconds, the function examines the result stored in `PAUSE_SCAN` -/// -/// When the value stored in `PAUSE_SCAN` becomes `false`, the function returns, exiting the busy -/// loop -async fn pause_scan() { - log::trace!("enter: pause_scan"); - // function uses tokio::time, not std - - // local testing showed a pretty slow increase (less than linear) in CPU usage as # of - // concurrent scans rose when SLEEP_DURATION was set to 500, using that as the default for now - let mut interval = time::interval(time::Duration::from_millis(SLEEP_DURATION)); - - // ignore any error returned - let _ = stderr().flush(); - - if SINGLE_SPINNER.read().unwrap().is_finished() { - // in order to not leave draw artifacts laying around in the terminal, we call - // finish_and_clear on the progress bar when resuming scans. For this reason, we need to - // check if the spinner is finished, and repopulate the RwLock with a new spinner if - // necessary - if let Ok(mut guard) = SINGLE_SPINNER.write() { - *guard = get_single_spinner(); - } - } - - if let Ok(spinner) = SINGLE_SPINNER.write() { - spinner.enable_steady_tick(120); - } - - loop { - // first tick happens immediately, all others wait the specified duration - interval.tick().await; - - if !PAUSE_SCAN.load(Ordering::Acquire) { - // PAUSE_SCAN is false, so we can exit the busy loop - if let Ok(spinner) = SINGLE_SPINNER.write() { - spinner.finish_and_clear(); - } - let _ = stderr().flush(); - log::trace!("exit: pause_scan"); - return; - } - } -} - -/// Adds the given url to `SCANNED_URLS` -/// -/// If `SCANNED_URLS` did not already contain the url, return true; otherwise return false -fn add_url_to_list_of_scanned_urls(resp: &str, scanned_urls: &RwLock>) -> bool { - log::trace!( - "enter: add_url_to_list_of_scanned_urls({}, {:?})", - resp, - scanned_urls - ); - - match scanned_urls.write() { - // check new url against what's already been scanned - Ok(mut urls) => { - // If the set did not contain resp, true is returned. - // If the set did contain resp, false is returned. - let response = urls.insert(resp.to_string()); - - log::trace!("exit: add_url_to_list_of_scanned_urls -> {}", response); - response - } - Err(e) => { - // poisoned lock - log::error!("Set of scanned urls poisoned: {}", e); - log::trace!("exit: add_url_to_list_of_scanned_urls -> false"); - false - } - } -} - /// Adds the given FeroxFilter to the given list of FeroxFilter implementors /// /// If the given list did not already contain the filter, return true; otherwise return false @@ -210,7 +106,7 @@ fn spawn_recursion_handler( let mut scans = vec![]; while let Some(resp) = recursion_channel.recv().await { - let unknown = add_url_to_list_of_scanned_urls(&resp, &SCANNED_URLS); + let (unknown, _) = SCANNED_URLS.add_directory_scan(&resp); if !unknown { // not unknown, i.e. we've seen the url before and don't need to scan again @@ -224,7 +120,7 @@ fn spawn_recursion_handler( let resp_clone = resp.clone(); let list_clone = wordlist.clone(); - scans.push(tokio::spawn(async move { + let future = tokio::spawn(async move { scan_url( resp_clone.to_owned().as_str(), list_clone, @@ -233,7 +129,9 @@ fn spawn_recursion_handler( file_clone, ) .await - })); + }); + + scans.push(future); } scans } @@ -480,13 +378,6 @@ async fn make_requests( let new_links = get_links(&ferox_response).await; for new_link in new_links { - let unknown = add_url_to_list_of_scanned_urls(&new_link, &SCANNED_URLS); - - if !unknown { - // not unknown, i.e. we've seen the url before and don't need to scan again - continue; - } - // create a url based on the given command line options, continue on error let new_url = match format_url( &new_link, @@ -499,6 +390,11 @@ async fn make_requests( Err(_) => continue, }; + if SCANNED_URLS.get_scan_by_url(&new_url.to_string()).is_some() { + //we've seen the url before and don't need to scan again + continue; + } + // make the request and store the response let new_response = match make_request(&CONFIGURATION.client, &new_url).await { Ok(resp) => resp, @@ -516,6 +412,8 @@ async fn make_requests( // very likely a file, simply request and report log::debug!("Singular extraction: {}", new_ferox_response); + SCANNED_URLS.add_file_scan(&new_url.to_string()); + send_report(report_chan.clone(), new_ferox_response); continue; @@ -583,21 +481,33 @@ pub async fn scan_url( let (tx_dir, rx_dir): FeroxChannel = mpsc::unbounded_channel(); - let progress_bar = progress::add_bar( - &target_url, - NUMBER_OF_REQUESTS.load(Ordering::Relaxed), - false, - ); - progress_bar.reset_elapsed(); - if CALL_COUNT.load(Ordering::Relaxed) == 0 { CALL_COUNT.fetch_add(1, Ordering::Relaxed); // this protection allows us to add the first scanned url to SCANNED_URLS // from within the scan_url function instead of the recursion handler - add_url_to_list_of_scanned_urls(&target_url, &SCANNED_URLS); + SCANNED_URLS.add_directory_scan(&target_url); } + let ferox_scan = match SCANNED_URLS.get_scan_by_url(&target_url) { + Some(scan) => scan, + None => { + log::error!( + "Could not find FeroxScan associated with {}; this shouldn't happen... exiting", + target_url + ); + return; + } + }; + + let progress_bar = match ferox_scan.lock() { + Ok(mut scan) => scan.progress_bar(), + Err(e) => { + log::error!("FeroxScan's ({:?}) mutex is poisoned: {}", ferox_scan, e); + return; + } + }; + // When acquire is called and the semaphore has remaining permits, the function immediately // returns a permit. However, if no remaining permits are available, acquire (asynchronously) // waits until an outstanding permit is dropped. At this point, the freed permit is assigned @@ -645,7 +555,9 @@ pub async fn scan_url( // for every word in the wordlist, check to see if PAUSE_SCAN is set to true // when true; enter a busy loop that only exits by setting PAUSE_SCAN back // to false - pause_scan().await; + + // todo change to true when issue #107 is resolved + SCANNED_URLS.pause(false).await; } make_requests(&tgt, &word, base_depth, txd, txr).await }), @@ -671,7 +583,9 @@ pub async fn scan_url( // drop the current permit so the semaphore will allow another scan to proceed drop(permit); - progress_bar.finish(); + if let Ok(mut scan) = ferox_scan.lock() { + scan.finish(); + } // manually drop tx in order for the rx task's while loops to eval to false log::trace!("dropped recursion handler's transmitter"); @@ -860,68 +774,4 @@ mod tests { let result = reached_max_depth(&url, 0, 2); assert!(result); } - - #[test] - /// add an unknown url to the hashset, expect true - fn add_url_to_list_of_scanned_urls_with_unknown_url() { - let urls = RwLock::new(HashSet::::new()); - let url = "http://unknown_url"; - assert_eq!(add_url_to_list_of_scanned_urls(url, &urls), true); - } - - #[test] - /// add a known url to the hashset, with a trailing slash, expect false - fn add_url_to_list_of_scanned_urls_with_known_url() { - let urls = RwLock::new(HashSet::::new()); - let url = "http://unknown_url/"; - - assert_eq!(urls.write().unwrap().insert(url.to_string()), true); - - assert_eq!(add_url_to_list_of_scanned_urls(url, &urls), false); - } - - #[test] - /// add a known url to the hashset, without a trailing slash, expect false - fn add_url_to_list_of_scanned_urls_with_known_url_without_slash() { - let urls = RwLock::new(HashSet::::new()); - let url = "http://unknown_url"; - - assert_eq!( - urls.write() - .unwrap() - .insert("http://unknown_url".to_string()), - true - ); - - assert_eq!(add_url_to_list_of_scanned_urls(url, &urls), false); - } - - #[test] - /// test that get_single_spinner returns the correct spinner - fn scanner_get_single_spinner_returns_spinner() { - let spinner = get_single_spinner(); - assert!(!spinner.is_finished()); - } - - #[tokio::test(core_threads = 1)] - /// tests that pause_scan pauses execution and releases execution when PAUSE_SCAN is toggled - /// the spinner used during the test has had .finish_and_clear called on it, meaning that - /// a new one will be created, taking the if branch within the function - async fn scanner_pause_scan_with_finished_spinner() { - let now = time::Instant::now(); - - PAUSE_SCAN.store(true, Ordering::Relaxed); - SINGLE_SPINNER.write().unwrap().finish_and_clear(); - - let expected = time::Duration::from_secs(2); - - tokio::spawn(async move { - time::delay_for(expected).await; - PAUSE_SCAN.store(false, Ordering::Relaxed); - }); - - pause_scan().await; - - assert!(now.elapsed() > expected); - } } diff --git a/src/utils.rs b/src/utils.rs index 7927b189..3e3f6af1 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -23,13 +23,7 @@ use std::convert::TryInto; pub fn get_current_depth(target: &str) -> usize { log::trace!("enter: get_current_depth({})", target); - let target = if !target.ends_with('/') { - // target url doesn't end with a /, for the purposes of determining depth, we'll normalize - // all urls to end in a / and then calculate accordingly - format!("{}/", target) - } else { - String::from(target) - }; + let target = normalize_url(target); match Url::parse(&target) { Ok(url) => { @@ -350,6 +344,22 @@ pub fn set_open_file_limit(limit: usize) -> bool { false } +/// Simple helper to abstract away adding a forward-slash to a url if not present +/// +/// used mostly for deduplication purposes and url state tracking +pub fn normalize_url(url: &str) -> String { + log::trace!("enter: normalize_url({})", url); + + let normalized = if url.ends_with('/') { + url.to_string() + } else { + format!("{}/", url) + }; + + log::trace!("exit: normalize_url -> {}", normalized); + normalized +} + #[cfg(test)] mod tests { use super::*;