diff --git a/Cargo.lock b/Cargo.lock index 9166fcde3..201dea533 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3366,7 +3366,7 @@ dependencies = [ [[package]] name = "spider" -version = "1.93.11" +version = "1.93.13" dependencies = [ "ahash", "async-openai", @@ -3417,7 +3417,7 @@ dependencies = [ [[package]] name = "spider_cli" -version = "1.93.11" +version = "1.93.13" dependencies = [ "clap", "env_logger", @@ -3440,7 +3440,7 @@ dependencies = [ [[package]] name = "spider_worker" -version = "1.93.11" +version = "1.93.13" dependencies = [ "env_logger", "lazy_static", diff --git a/spider/Cargo.toml b/spider/Cargo.toml index 2e1ff2c2f..cab7377f6 100644 --- a/spider/Cargo.toml +++ b/spider/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "spider" -version = "1.93.11" +version = "1.93.13" authors = [ "j-mendez " ] diff --git a/spider/README.md b/spider/README.md index 22d071cc9..8df548376 100644 --- a/spider/README.md +++ b/spider/README.md @@ -16,7 +16,7 @@ This is a basic async example crawling a web page, add spider to your `Cargo.tom ```toml [dependencies] -spider = "1.93.11" +spider = "1.93.13" ``` And then the code: @@ -93,7 +93,7 @@ We have the following optional feature flags. ```toml [dependencies] -spider = { version = "1.93.11", features = ["regex", "ua_generator"] } +spider = { version = "1.93.13", features = ["regex", "ua_generator"] } ``` 1. `ua_generator`: Enables auto generating a random real User-Agent. @@ -138,7 +138,7 @@ Move processing to a worker, drastically increases performance even if worker is ```toml [dependencies] -spider = { version = "1.93.11", features = ["decentralized"] } +spider = { version = "1.93.13", features = ["decentralized"] } ``` ```sh @@ -169,7 +169,7 @@ Use the subscribe method to get a broadcast channel. ```toml [dependencies] -spider = { version = "1.93.11", features = ["sync"] } +spider = { version = "1.93.13", features = ["sync"] } ``` ```rust,no_run @@ -199,7 +199,7 @@ Allow regex for blacklisting routes ```toml [dependencies] -spider = { version = "1.93.11", features = ["regex"] } +spider = { version = "1.93.13", features = ["regex"] } ``` ```rust,no_run @@ -226,7 +226,7 @@ If you are performing large workloads you may need to control the crawler by ena ```toml [dependencies] -spider = { version = "1.93.11", features = ["control"] } +spider = { version = "1.93.13", features = ["control"] } ``` ```rust @@ -296,7 +296,7 @@ Use cron jobs to run crawls continuously at anytime. ```toml [dependencies] -spider = { version = "1.93.11", features = ["sync", "cron"] } +spider = { version = "1.93.13", features = ["sync", "cron"] } ``` ```rust,no_run @@ -335,7 +335,7 @@ the feature flag [`chrome_intercept`] to possibly speed up request using Network ```toml [dependencies] -spider = { version = "1.93.11", features = ["chrome", "chrome_intercept"] } +spider = { version = "1.93.13", features = ["chrome", "chrome_intercept"] } ``` You can use `website.crawl_concurrent_raw` to perform a crawl without chromium when needed. Use the feature flag `chrome_headed` to enable headful browser usage if needed to debug. @@ -365,7 +365,7 @@ Enabling HTTP cache can be done with the feature flag [`cache`] or [`cache_mem`] ```toml [dependencies] -spider = { version = "1.93.11", features = ["cache"] } +spider = { version = "1.93.13", features = ["cache"] } ``` You need to set `website.cache` to true to enable as well. @@ -396,7 +396,7 @@ Intelligently run crawls using HTTP and JavaScript Rendering when needed. The be ```toml [dependencies] -spider = { version = "1.93.11", features = ["smart"] } +spider = { version = "1.93.13", features = ["smart"] } ``` ```rust,no_run @@ -422,7 +422,7 @@ Use OpenAI to generate dynamic scripts to drive the browser done with the featur ```toml [dependencies] -spider = { version = "1.93.11", features = ["openai"] } +spider = { version = "1.93.13", features = ["openai"] } ``` ```rust @@ -448,7 +448,7 @@ Set a depth limit to prevent forwarding. ```toml [dependencies] -spider = { version = "1.93.11", features = ["budget"] } +spider = { version = "1.93.13", features = ["budget"] } ``` ```rust,no_run @@ -511,7 +511,7 @@ async fn main() -> Result<(), Error> { { Ok(mut website) => { let handle = tokio::spawn(async move { - println!("Starting Crawl - {:?}", website.get_domain().inner()); + println!("Starting Crawl - {:?}", website.get_url().inner()); let start = Instant::now(); website.crawl().await; @@ -525,7 +525,7 @@ async fn main() -> Result<(), Error> { println!( "{:?} - Time elapsed in website.crawl() is: {:?} for total pages: {:?}", - website.get_domain().inner(), + website.get_url().inner(), duration, links.len() ); diff --git a/spider/src/website.rs b/spider/src/website.rs index 65e4ba615..71bd000a8 100644 --- a/spider/src/website.rs +++ b/spider/src/website.rs @@ -558,12 +558,12 @@ impl Website { } /// Domain parsed url getter. - pub fn get_domain_parsed(&self) -> &Option> { + pub fn get_url_parsed(&self) -> &Option> { &self.domain_parsed } /// Domain name getter. - pub fn get_domain(&self) -> &CaseInsensitiveString { + pub fn get_url(&self) -> &CaseInsensitiveString { &self.url } @@ -4563,7 +4563,7 @@ impl Job for Website { async fn handle(&mut self) { log::info!( "CRON: {} - cron job running {}", - self.get_domain().as_ref(), + self.get_url().as_ref(), self.now() ); if self.configuration.cron_type == CronType::Crawl { diff --git a/spider_cli/Cargo.toml b/spider_cli/Cargo.toml index bf0880ec5..c3396e32e 100644 --- a/spider_cli/Cargo.toml +++ b/spider_cli/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "spider_cli" -version = "1.93.11" +version = "1.93.13" authors = [ "madeindjs ", "j-mendez ", @@ -29,7 +29,7 @@ quote = "1.0.18" failure_derive = "0.1.8" [dependencies.spider] -version = "1.93.11" +version = "1.93.13" path = "../spider" [[bin]] diff --git a/spider_worker/Cargo.toml b/spider_worker/Cargo.toml index e9fe43576..b0cbce735 100644 --- a/spider_worker/Cargo.toml +++ b/spider_worker/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "spider_worker" -version = "1.93.11" +version = "1.93.13" authors = [ "madeindjs ", "j-mendez ", @@ -25,7 +25,7 @@ lazy_static = "1.4.0" env_logger = "0.11.3" [dependencies.spider] -version = "1.93.11" +version = "1.93.13" path = "../spider" features = ["serde", "flexbuffers"]