diff --git a/crates/polars-io/src/cloud/options.rs b/crates/polars-io/src/cloud/options.rs index 59266282113c..57d29ee71de0 100644 --- a/crates/polars-io/src/cloud/options.rs +++ b/crates/polars-io/src/cloud/options.rs @@ -200,14 +200,12 @@ fn read_config( let content = std::str::from_utf8(buf.as_ref()).ok()?; for (pattern, key) in keys.iter() { - let local = std::mem::take(builder); - if builder.get_config_value(key).is_none() { let reg = Regex::new(pattern).unwrap(); let cap = reg.captures(content)?; let m = cap.get(1)?; let parsed = m.as_str(); - *builder = local.with_config(*key, parsed) + *builder = std::mem::take(builder).with_config(*key, parsed); } } } diff --git a/crates/polars-io/src/file_cache/utils.rs b/crates/polars-io/src/file_cache/utils.rs index b239c2792e54..fec85a0a5128 100644 --- a/crates/polars-io/src/file_cache/utils.rs +++ b/crates/polars-io/src/file_cache/utils.rs @@ -52,12 +52,10 @@ pub(super) fn update_last_accessed(file: &std::fs::File) { } } -pub fn init_entries_from_uri_list]>>( - uri_list: A, +pub fn init_entries_from_uri_list( + uri_list: &[Arc], cloud_options: Option<&CloudOptions>, ) -> PolarsResult>> { - let uri_list = uri_list.as_ref(); - if uri_list.is_empty() { return Ok(Default::default()); } @@ -69,13 +67,27 @@ pub fn init_entries_from_uri_list]>>( .unwrap_or_else(get_env_file_cache_ttl); if is_cloud_url(first_uri) { - let (_, object_store) = pl_async::get_runtime() - .block_on_potential_spawn(build_object_store(first_uri, cloud_options))?; - let object_store = PolarsObjectStore::new(object_store); + let object_stores = pl_async::get_runtime().block_on_potential_spawn(async { + futures::future::try_join_all( + (0..if first_uri.starts_with("http") { + // Object stores for http are tied to the path. + uri_list.len() + } else { + 1 + }) + .map(|i| async move { + let (_, object_store) = + build_object_store(&uri_list[i], cloud_options).await?; + PolarsResult::Ok(PolarsObjectStore::new(object_store)) + }), + ) + .await + })?; uri_list .iter() - .map(|uri| { + .enumerate() + .map(|(i, uri)| { FILE_CACHE.init_entry( uri.clone(), || { @@ -88,7 +100,8 @@ pub fn init_entries_from_uri_list]>>( object_path_from_string(prefix)? }; - let object_store = object_store.clone(); + let object_store = + object_stores[std::cmp::min(i, object_stores.len())].clone(); let uri = uri.clone(); Ok(Arc::new(CloudFileFetcher { diff --git a/crates/polars-io/src/utils/path.rs b/crates/polars-io/src/utils/path.rs index 91bc74ed2700..84e02022bd85 100644 --- a/crates/polars-io/src/utils/path.rs +++ b/crates/polars-io/src/utils/path.rs @@ -132,6 +132,10 @@ pub fn expand_paths( cloud_options: Option<&CloudOptions>| -> PolarsResult<(usize, Vec)> { crate::pl_async::get_runtime().block_on_potential_spawn(async { + if path.starts_with("http") { + return Ok((0, vec![PathBuf::from(path)])); + } + let (cloud_location, store) = crate::cloud::build_object_store(path, cloud_options).await?; diff --git a/crates/polars-plan/src/plans/conversion/scans.rs b/crates/polars-plan/src/plans/conversion/scans.rs index 68e17d278898..5908d085fd15 100644 --- a/crates/polars-plan/src/plans/conversion/scans.rs +++ b/crates/polars-plan/src/plans/conversion/scans.rs @@ -156,7 +156,8 @@ pub(super) fn csv_file_info( paths .iter() .map(|path| Arc::from(path.to_str().unwrap())) - .collect::>(), + .collect::>() + .as_slice(), cloud_options, )?) } else {