From f8a9fca79aeaf24c51b78ba46c4d1749301e884b Mon Sep 17 00:00:00 2001 From: j-mendez Date: Sat, 30 Nov 2024 20:07:42 -0500 Subject: [PATCH] chore(page): fix page link return full urls --- Cargo.lock | 28 ++-- spider/Cargo.toml | 6 +- spider/src/page.rs | 174 ++++++++++++++++--------- spider/src/website.rs | 208 +++++++++++------------------- spider_chrome/Cargo.toml | 2 +- spider_cli/Cargo.toml | 2 +- spider_cli/src/main.rs | 2 +- spider_transformations/Cargo.toml | 2 +- spider_utils/Cargo.toml | 2 +- spider_worker/Cargo.toml | 2 +- 10 files changed, 210 insertions(+), 218 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 7655bddd4..a1e825a60 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1709,15 +1709,6 @@ dependencies = [ "ahash", ] -[[package]] -name = "hashbrown" -version = "0.14.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1" -dependencies = [ - "ahash", -] - [[package]] name = "hashbrown" version = "0.15.2" @@ -4304,7 +4295,7 @@ dependencies = [ [[package]] name = "spider" -version = "2.13.84" +version = "2.13.88" dependencies = [ "ahash", "aho-corasick", @@ -4366,7 +4357,7 @@ dependencies = [ [[package]] name = "spider_chrome" -version = "2.13.84" +version = "2.13.88" dependencies = [ "adblock", "async-tungstenite", @@ -4401,7 +4392,7 @@ dependencies = [ [[package]] name = "spider_cli" -version = "2.13.84" +version = "2.13.88" dependencies = [ "clap", "env_logger", @@ -4426,7 +4417,7 @@ dependencies = [ [[package]] name = "spider_transformations" -version = "2.13.84" +version = "2.13.88" dependencies = [ "aho-corasick", "fast_html2md", @@ -4448,7 +4439,7 @@ dependencies = [ [[package]] name = "spider_utils" -version = "2.13.84" +version = "2.13.88" dependencies = [ "indexmap 1.9.3", "serde", @@ -4460,7 +4451,7 @@ dependencies = [ [[package]] name = "spider_worker" -version = "2.13.84" +version = "2.13.88" dependencies = [ "env_logger", "lazy_static", @@ -4517,12 +4508,11 @@ dependencies = [ [[package]] name = "string-interner" -version = "0.17.0" +version = "0.18.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1c6a0d765f5807e98a091107bae0a56ea3799f66a5de47b2c84c94a39c09974e" +checksum = "1a3275464d7a9f2d4cac57c89c2ef96a8524dba2864c8d6f82e3980baf136f9b" dependencies = [ - "cfg-if", - "hashbrown 0.14.5", + "hashbrown 0.15.2", "serde", ] diff --git a/spider/Cargo.toml b/spider/Cargo.toml index 86566635a..d6c082d93 100644 --- a/spider/Cargo.toml +++ b/spider/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "spider" -version = "2.13.84" +version = "2.13.88" authors = [ "j-mendez " ] @@ -62,7 +62,7 @@ phf = "0.11" phf_codegen = "0.11" auto_encoder = { version = "0.1" } base64 = { version = "0.22", optional = true } -string-interner = { version = "0.17", default-features = false, features = ["std", "inline-more", "backends"] } +string-interner = { version = "0.18", default-features = false, features = ["std", "inline-more", "backends"] } httpdate = { version = "1", optional = true } rand = { version = "0.8", optional = true } serde_regex = { version = "1", optional = true } @@ -123,7 +123,7 @@ time = [] adblock = ["spider_chrome/adblock"] sync = ["tokio/sync"] flexbuffers = ["dep:flexbuffers"] -serde = ["dep:serde", "hashbrown/serde", "string-interner/serde-1", "dep:serde_regex", "smallvec/serde"] +serde = ["dep:serde", "hashbrown/serde", "string-interner/serde", "dep:serde_regex", "smallvec/serde"] fs = ["tokio/fs"] full_resources = [] socks = ["reqwest/socks"] diff --git a/spider/src/page.rs b/spider/src/page.rs index 1f934c0ce..571a82c53 100644 --- a/spider/src/page.rs +++ b/spider/src/page.rs @@ -158,7 +158,7 @@ pub struct Page { #[cfg(feature = "openai")] /// The extra data from the AI, example extracting data etc... pub extra_ai_data: Option>, - /// The links found on the page. + /// The links found on the page. This includes all links that have an href url. pub page_links: Option>>, /// The request should retry pub should_retry: bool, @@ -996,13 +996,10 @@ impl Page { #[cfg(all(not(feature = "decentralized"), feature = "chrome"))] /// Close the chrome page used. Useful when storing the page with subscription usage. The feature flag `chrome_store_page` is required. pub async fn close_page(&mut self) { - match self.chrome_page.as_mut() { - Some(page) => { - let _ = page - .execute(chromiumoxide::cdp::browser_protocol::page::CloseParams::default()) - .await; - } - _ => (), + if let Some(page) = self.chrome_page.as_mut() { + let _ = page + .execute(chromiumoxide::cdp::browser_protocol::page::CloseParams::default()) + .await; } } @@ -1155,9 +1152,9 @@ impl Page { #[cfg(all(not(feature = "decentralized")))] #[cfg_attr(feature = "tracing", tracing::instrument(skip_all))] pub async fn links_stream_xml_links_stream_base< - A: PartialEq + Eq + Sync + Send + Clone + Default + std::hash::Hash + From, + A: PartialEq + Eq + Sync + Send + Clone + Default + ToString + std::hash::Hash + From, >( - &self, + &mut self, selectors: &RelativeSelectors, xml: &str, map: &mut HashSet, @@ -1177,6 +1174,11 @@ impl Page { let sub_matcher = &selectors.0; let mut is_link_tag = false; + let mut links_pages = if self.page_links.is_some() { + Some(map.clone()) + } else { + None + }; loop { match reader.read_event_into_async(&mut buf).await { @@ -1202,7 +1204,7 @@ impl Page { sub_matcher, &self.external_domains_caseless, false, - &mut None, + &mut links_pages, ); } } @@ -1223,19 +1225,32 @@ impl Page { } buf.clear(); } + + if let Some(lp) = links_pages { + let page_links = self.page_links.get_or_insert_with(Default::default); + page_links.extend( + lp.into_iter() + .map(|item| CaseInsensitiveString::from(item.to_string())), + ); + } } /// Find the links as a stream using string resource validation #[inline(always)] #[cfg(all(not(feature = "decentralized")))] pub async fn links_stream_base< - A: PartialEq + Eq + Sync + Send + Clone + Default + std::hash::Hash + From, + A: PartialEq + Eq + Sync + Send + Clone + Default + ToString + std::hash::Hash + From, >( - &self, + &mut self, selectors: &RelativeSelectors, html: &str, ) -> HashSet { - let mut map = HashSet::new(); + let mut map: HashSet = HashSet::new(); + let mut links_pages = if self.page_links.is_some() { + Some(map.clone()) + } else { + None + }; if !html.is_empty() { if html.starts_with(", + A: PartialEq + Eq + Sync + Send + Clone + Default + ToString + std::hash::Hash + From, >( - &self, + &mut self, selectors: &RelativeSelectors, html: &str, client: &Client, ) -> HashSet { use auto_encoder::auto_encode_bytes; - let mut map = HashSet::new(); - let mut map_ssg = HashSet::new(); - + let mut map: HashSet = HashSet::new(); + let mut map_ssg: HashSet = HashSet::new(); + let mut links_pages = if self.page_links.is_some() { + Some(map.clone()) + } else { + None + }; if !html.is_empty() { if html.starts_with(", + A: PartialEq + Eq + Sync + Send + Clone + Default + ToString + std::hash::Hash + From, >( - &self, + &mut self, selectors: &RelativeSelectors, client: &Client, ) -> HashSet { @@ -1487,7 +1522,7 @@ impl Page { #[inline(always)] #[cfg(all(not(feature = "decentralized")))] pub async fn links_ssg( - &self, + &mut self, selectors: &RelativeSelectors, client: &Client, ) -> HashSet { @@ -1504,9 +1539,9 @@ impl Page { #[inline(always)] #[cfg(all(not(feature = "decentralized"), not(feature = "full_resources")))] pub async fn links_stream< - A: PartialEq + Eq + Sync + Send + Clone + Default + std::hash::Hash + From, + A: PartialEq + Eq + Sync + Send + Clone + Default + ToString + std::hash::Hash + From, >( - &self, + &mut self, selectors: &RelativeSelectors, ) -> HashSet { if auto_encoder::is_binary_file(self.get_html_bytes_u8()) { @@ -1526,9 +1561,9 @@ impl Page { #[inline(always)] #[cfg_attr(feature = "tracing", tracing::instrument(skip_all))] pub async fn links_stream_smart< - A: PartialEq + Eq + Sync + Send + Clone + Default + std::hash::Hash + From, + A: PartialEq + Eq + Sync + Send + Clone + Default + ToString + std::hash::Hash + From, >( - &self, + &mut self, selectors: &RelativeSelectors, browser: &std::sync::Arc, configuration: &crate::configuration::Configuration, @@ -1541,6 +1576,11 @@ impl Page { let mut map = HashSet::new(); let mut inner_map: HashSet = map.clone(); + let mut links_pages = if self.page_links.is_some() { + Some(map.clone()) + } else { + None + }; if !self.is_empty() { let html_resource = Box::new(self.get_html()); @@ -1614,6 +1654,7 @@ impl Page { sub_matcher, &external_domains_caseless, false, + &mut links_pages, ); } @@ -1758,7 +1799,6 @@ impl Page { }, ) .await; - map.extend(extended_map) } Err(e) => { @@ -1767,9 +1807,22 @@ impl Page { }; } } + map.extend(inner_map); } + if let Some(lp) = links_pages { + let page_links = self.page_links.get_or_insert_with(Default::default); + page_links.extend( + lp.into_iter() + .map(|item| CaseInsensitiveString::from(item.to_string())), + ); + page_links.extend( + map.iter() + .map(|item| CaseInsensitiveString::from(item.to_string())), + ); + } + map } @@ -1778,12 +1831,17 @@ impl Page { #[cfg(all(not(feature = "decentralized")))] #[cfg_attr(feature = "tracing", tracing::instrument(skip_all,))] pub async fn links_stream_full_resource< - A: PartialEq + Eq + Sync + Send + Clone + Default + std::hash::Hash + From, + A: PartialEq + Eq + Sync + Send + Clone + Default + ToString + std::hash::Hash + From, >( - &self, + &mut self, selectors: &RelativeSelectors, ) -> HashSet { let mut map = HashSet::new(); + let mut links_pages = if self.page_links.is_some() { + Some(map.clone()) + } else { + None + }; if !self.is_empty() { let html = Box::new(self.get_html()); @@ -1821,7 +1879,7 @@ impl Page { sub_matcher, &external_domains_caseless, true, - &mut None, + &mut links_pages, ); } Ok(()) @@ -1865,9 +1923,9 @@ impl Page { #[inline(always)] #[cfg(all(not(feature = "decentralized"), feature = "full_resources"))] pub async fn links_stream< - A: PartialEq + Eq + Sync + Send + Clone + Default + std::hash::Hash + From, + A: PartialEq + Eq + Sync + Send + Clone + Default + ToString + std::hash::Hash + From, >( - &self, + &mut self, selectors: &RelativeSelectors, ) -> HashSet { if auto_encoder::is_binary_file(self.get_html_bytes_u8()) { @@ -1881,9 +1939,9 @@ impl Page { #[cfg(feature = "decentralized")] /// Find the links as a stream using string resource validation pub async fn links_stream< - A: PartialEq + Eq + Sync + Send + Clone + Default + std::hash::Hash + From, + A: PartialEq + Eq + Sync + Send + Clone + Default + ToString + std::hash::Hash + From, >( - &self, + &mut self, _: &RelativeSelectors, ) -> HashSet { Default::default() @@ -1892,7 +1950,7 @@ impl Page { /// Find all href links and return them using CSS selectors. #[cfg(not(feature = "decentralized"))] #[inline(always)] - pub async fn links(&self, selectors: &RelativeSelectors) -> HashSet { + pub async fn links(&mut self, selectors: &RelativeSelectors) -> HashSet { match self.html.is_some() { false => Default::default(), true => self.links_stream::(selectors).await, @@ -1903,7 +1961,7 @@ impl Page { #[inline(always)] #[cfg(all(not(feature = "decentralized")))] pub async fn links_full( - &self, + &mut self, selectors: &RelativeSelectors, ) -> HashSet { match self.html.is_some() { @@ -1922,7 +1980,7 @@ impl Page { #[cfg(all(not(feature = "decentralized"), feature = "smart"))] #[inline(always)] pub async fn smart_links( - &self, + &mut self, selectors: &RelativeSelectors, page: &std::sync::Arc, configuration: &crate::configuration::Configuration, @@ -1993,27 +2051,27 @@ pub fn get_html_encoded(html: &Option, _label: &str) -> String { } } -/// Rewrite a string without encoding it. -#[cfg(all( - not(feature = "decentralized"), - not(feature = "full_resources"), - feature = "smart" -))] -pub(crate) fn rewrite_str_as_bytes<'h, 's>( - html: &str, - settings: impl Into>, -) -> Result, lol_html::errors::RewritingError> { - let mut output = vec![]; +// /// Rewrite a string without encoding it. +// #[cfg(all( +// not(feature = "decentralized"), +// not(feature = "full_resources"), +// feature = "smart" +// ))] +// pub(crate) fn rewrite_str_as_bytes<'h, 's>( +// html: &str, +// settings: impl Into>, +// ) -> Result, lol_html::errors::RewritingError> { +// let mut output = vec![]; - let mut rewriter = lol_html::HtmlRewriter::new(settings.into(), |c: &[u8]| { - output.extend_from_slice(c); - }); +// let mut rewriter = lol_html::HtmlRewriter::new(settings.into(), |c: &[u8]| { +// output.extend_from_slice(c); +// }); - rewriter.write(html.as_bytes())?; - rewriter.end()?; +// rewriter.write(html.as_bytes())?; +// rewriter.end()?; - Ok(output) -} +// Ok(output) +// } #[cfg(test)] pub const TEST_AGENT_NAME: &str = concat!(env!("CARGO_PKG_NAME"), "/", env!("CARGO_PKG_VERSION")); @@ -2066,7 +2124,7 @@ async fn parse_links() { .unwrap(); let link_result = "https://choosealicense.com/"; - let page = Page::new(link_result, &client).await; + let mut page = Page::new(link_result, &client).await; let selector = get_page_selectors(link_result, false, false); let links = page.links(&selector.unwrap()).await; diff --git a/spider/src/website.rs b/spider/src/website.rs index 9c027de82..b27193608 100644 --- a/spider/src/website.rs +++ b/spider/src/website.rs @@ -1442,14 +1442,11 @@ impl Website { if let Some(s) = self.setup_selectors() { base.0 = s.0; base.1 = s.1; - match prior_domain { - Some(prior_domain) => match prior_domain.host_str() { - Some(dname) => { - base.2 = dname.into(); - } - _ => (), - }, - _ => (), + + if let Some(pdname) = prior_domain { + if let Some(dname) = pdname.host_str() { + base.2 = dname.into(); + } } } } @@ -1481,13 +1478,6 @@ impl Website { } else if page.status_code.is_server_error() { self.status = CrawlStatus::ServerError; } - if self.configuration.return_page_links { - page.page_links = if links.is_empty() { - None - } else { - Some(Box::new(links.clone())) - }; - } channel_send_page(&self.channel, page, &self.channel_guard); @@ -1730,16 +1720,14 @@ impl Website { } else if page.status_code.is_server_error() { self.status = CrawlStatus::ServerError; } - let links = HashSet::from(page.links.clone()); + // todo: pass full links to the worker to return. if self.configuration.return_page_links { - page.page_links = if links.is_empty() { - None - } else { - Some(Box::new(links.clone())) - }; + page.page_links = Some(page.links.clone().into()); } + let links = HashSet::from(page.links.clone()); + channel_send_page(&self.channel, page, &self.channel_guard); links @@ -1793,11 +1781,7 @@ impl Website { self.links_visited.insert(link_result.0); if self.configuration.return_page_links { - page.page_links = if links.is_empty() { - None - } else { - Some(Box::new(links.clone())) - }; + page.page_links = Some(Default::default()); } channel_send_page(&self.channel, page.clone(), &self.channel_guard); @@ -1851,23 +1835,17 @@ impl Website { self.links_visited.insert(link_result.0); if self.configuration.return_page_links { - let links = HashSet::from(page.links(&base).await); - - page.page_links = if links.is_empty() { - None - } else { - Some(Box::new(links.clone())) - }; + page.page_links = Some(Default::default()); + let next_links = HashSet::from(page.links(&base).await); channel_send_page(&self.channel, page.clone(), &self.channel_guard); - links.extend(links); + links.extend(next_links); } else { channel_send_page(&self.channel, page.clone(), &self.channel_guard); + let next_links = HashSet::from(page.links(&base).await); - let links = HashSet::from(page.links(&base).await); - - links.extend(links); + links.extend(next_links); } } @@ -1930,6 +1908,9 @@ impl Website { self.links_visited.insert(link_result.0); if !page.is_empty() { + if self.configuration.return_page_links { + page.page_links = Some(Default::default()); + } let page_links = HashSet::from(page.links(&base).await); links.extend(page_links); @@ -1937,14 +1918,6 @@ impl Website { self.status = CrawlStatus::Empty; }; - if self.configuration.return_page_links { - page.page_links = if links.is_empty() { - None - } else { - Some(Box::new(links.clone())) - }; - } - channel_send_page(&self.channel, page, &self.channel_guard); } @@ -2339,6 +2312,7 @@ impl Website { let allowed = self.is_allowed(&s); if allowed.eq(&ProcessLinkStatus::BudgetExceeded) { + exceeded_budget = true; break; } if allowed.eq(&ProcessLinkStatus::Blocked) { @@ -2417,10 +2391,8 @@ impl Website { } else { let semaphore = self.setup_semaphore(); - let mut q = match &self.channel_queue { - Some(q) => Some(q.0.subscribe()), - _ => None, - }; + let mut q = self.channel_queue.as_ref().map(|q| q.0.subscribe()); + let mut links: HashSet = self.drain_extra_links().collect(); @@ -2551,7 +2523,6 @@ impl Website { if let Some(timeout) = page.get_timeout() { tokio::time::sleep(timeout).await; } - if page.status_code == StatusCode::GATEWAY_TIMEOUT { if let Err(elasped) = tokio::time::timeout(BACKOFF_MAX_DURATION, async { let p = Page::new( @@ -2570,12 +2541,9 @@ impl Website { page.clone_from(&p); }).await { - log::info!("backoff gateway timeout exceeded {elasped}"); + log::info!("{target_url} backoff gateway timeout exceeded {elasped}"); } - - } else { - page.clone_from( &Page::new( &target_url, @@ -2589,7 +2557,6 @@ impl Website { &shared.6.automation_scripts, &shared.6.viewport, &shared.6.request_timeout, - ) .await, ); @@ -2612,6 +2579,10 @@ impl Website { page.base = shared.9.as_deref().cloned(); + if return_page_links { + page.page_links = Some(Default::default()); + } + let links = if full_resources { page.links_full(&shared.1).await } else { @@ -2620,14 +2591,6 @@ impl Website { page.base = prev_domain; - if return_page_links { - page.page_links = if links.is_empty() { - None - } else { - Some(Box::new(links.clone())) - }; - } - channel_send_page( &shared.2, page, &shared.4, ); @@ -2730,10 +2693,8 @@ impl Website { async fn crawl_concurrent(&mut self, client: &Client, handle: &Option>) { match url::Url::parse(&self.url.inner()) { Ok(_) => { - let mut q = match &self.channel_queue { - Some(q) => Some(q.0.subscribe()), - _ => None, - }; + let mut q = self.channel_queue.as_ref().map(|q| q.0.subscribe()); + self.configuration.configure_allowlist(); let domain = self.url.inner().as_str(); let mut interval = Box::pin(tokio::time::interval(Duration::from_millis(10))); @@ -2789,57 +2750,48 @@ impl Website { self.links_visited.insert(link.clone()); - match SEM.acquire().await { - Ok(permit) => { - let client = client.clone(); + if let Ok(permit) = SEM.acquire().await { + let client = client.clone(); - spawn_set("page_fetch", &mut set, async move { - let link_results = match on_link_find_callback { - Some(cb) => cb(link, None), - _ => (link, None), - }; - let link_results = link_results.0.as_ref(); - let page = Page::new_links_only( - &if http_worker && link_results.starts_with("https") - { - link_results - .replacen("https", "http", 1) - .to_string() - } else { - link_results.to_string() - }, - &client, - ) - .await; - - drop(permit); + spawn_set("page_fetch", &mut set, async move { + let link_results = match on_link_find_callback { + Some(cb) => cb(link, None), + _ => (link, None), + }; + let link_results = link_results.0.as_ref(); + let page = Page::new_links_only( + &if http_worker && link_results.starts_with("https") { + link_results + .replacen("https", "http", 1) + .to_string() + } else { + link_results.to_string() + }, + &client, + ) + .await; - page.links - }); + drop(permit); - match q.as_mut() { - Some(q) => { - while let Ok(link) = q.try_recv() { - let s = link.into(); - let allowed = self.is_allowed(&s); + page.links + }); - if allowed - .eq(&ProcessLinkStatus::BudgetExceeded) - { - break; - } - if allowed.eq(&ProcessLinkStatus::Blocked) { - continue; - } + if let Some(q) = q.as_mut() { + while let Ok(link) = q.try_recv() { + let s = link.into(); + let allowed = self.is_allowed(&s); - self.links_visited - .extend_with_new_links(&mut links, s); - } + if allowed.eq(&ProcessLinkStatus::BudgetExceeded) { + exceeded_budget = true; + break; } - _ => (), + if allowed.eq(&ProcessLinkStatus::Blocked) { + continue; + } + + self.links_visited.extend_with_new_links(&mut links, s); } } - _ => (), } } _ => break, @@ -2896,10 +2848,8 @@ impl Website { ) .await; } else { - let mut q = match &self.channel_queue { - Some(q) => Some(q.0.subscribe()), - _ => None, - }; + let mut q = self.channel_queue.as_ref().map(|q| q.0.subscribe()); + let mut links: HashSet = self.drain_extra_links().collect(); @@ -3025,7 +2975,6 @@ impl Website { log::info!("backoff gateway timeout exceeded {elasped}"); } - } else { if retry_count.is_power_of_two() { @@ -3056,6 +3005,10 @@ impl Website { page.base = shared.7.as_deref().cloned(); + if return_page_links { + page.page_links = Some(Default::default()); + } + let links = page .smart_links( &shared.1, &shared.4, &shared.5, @@ -3065,14 +3018,6 @@ impl Website { page.base = prev_domain; - if return_page_links { - page.page_links = if links.is_empty() { - None - } else { - Some(Box::new(links.clone())) - }; - } - channel_send_page(&shared.2, page, &shared.3); drop(permit); @@ -3174,10 +3119,7 @@ impl Website { match self.setup_selectors() { Some(selectors) => { - let mut q = match &self.channel_queue { - Some(q) => Some(q.0.subscribe()), - _ => None, - }; + let mut q = self.channel_queue.as_ref().map(|q| q.0.subscribe()); let domain = self.url.inner().as_str(); self.domain_parsed = parse_absolute_url(&domain); @@ -3361,18 +3303,20 @@ impl Website { drop(tx); if let Ok(mut handle) = handles.await { - for page in handle.iter_mut() { + for mut page in handle.iter_mut() { let prev_domain = page.base.take(); page.base = self.domain_parsed.as_deref().cloned(); + if self.configuration.return_page_links { + page.page_links = Some(Default::default()); + } let links = page.links(&selectors).await; page.base = prev_domain; self.extra_links.extend(links) } if scrape { - match self.pages.as_mut() { - Some(p) => p.extend(handle), - _ => (), - }; + if let Some(p) = self.pages.as_mut() { + p.extend(handle); + } } match q.as_mut() { diff --git a/spider_chrome/Cargo.toml b/spider_chrome/Cargo.toml index b7261dbee..91cc466cd 100644 --- a/spider_chrome/Cargo.toml +++ b/spider_chrome/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "spider_chrome" -version = "2.13.84" +version = "2.13.88" rust-version = "1.70" authors = [ "j-mendez " diff --git a/spider_cli/Cargo.toml b/spider_cli/Cargo.toml index 04d42afab..7c96f4f1a 100644 --- a/spider_cli/Cargo.toml +++ b/spider_cli/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "spider_cli" -version = "2.13.84" +version = "2.13.88" authors = [ "j-mendez " ] diff --git a/spider_cli/src/main.rs b/spider_cli/src/main.rs index f84e7f5e2..91488a39b 100644 --- a/spider_cli/src/main.rs +++ b/spider_cli/src/main.rs @@ -179,7 +179,7 @@ async fn main() { website.crawl().await; }); - while let Ok(res) = rx2.recv().await { + while let Ok(mut res) = rx2.recv().await { let page_json = json!({ "url": res.get_url(), "html": if output_html { diff --git a/spider_transformations/Cargo.toml b/spider_transformations/Cargo.toml index c306a697b..8e2e7c039 100644 --- a/spider_transformations/Cargo.toml +++ b/spider_transformations/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "spider_transformations" -version = "2.13.84" +version = "2.13.88" authors = [ "j-mendez " ] diff --git a/spider_utils/Cargo.toml b/spider_utils/Cargo.toml index 338d3d518..7a690f055 100644 --- a/spider_utils/Cargo.toml +++ b/spider_utils/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "spider_utils" -version = "2.13.84" +version = "2.13.88" authors = [ "j-mendez " ] diff --git a/spider_worker/Cargo.toml b/spider_worker/Cargo.toml index 325d2f4e0..06dec1a78 100644 --- a/spider_worker/Cargo.toml +++ b/spider_worker/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "spider_worker" -version = "2.13.84" +version = "2.13.88" authors = [ "j-mendez " ]