From 3ed1743becd2d40b20fd0092551b693f44b20b92 Mon Sep 17 00:00:00 2001 From: Simon Lin Date: Mon, 22 Jul 2024 17:16:10 +1000 Subject: [PATCH 1/3] perf: Don't send head request unless without explicit trailing slash --- crates/polars-io/src/path_utils/mod.rs | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/crates/polars-io/src/path_utils/mod.rs b/crates/polars-io/src/path_utils/mod.rs index f6025ef78aea..cc6c57e13ad1 100644 --- a/crates/polars-io/src/path_utils/mod.rs +++ b/crates/polars-io/src/path_utils/mod.rs @@ -174,10 +174,7 @@ pub fn expand_paths_hive( let prefix = object_path_from_string(cloud_location.prefix.clone())?; - let out = if !path.ends_with("/") - && cloud_location.expansion.is_none() - && store.head(&prefix).await.is_ok() - { + let out = if !path.ends_with("/") && cloud_location.expansion.is_none() { ( 0, vec![PathBuf::from(format_path( From 759579c2ca0efd714baabfdd5d2d20af15ab4373 Mon Sep 17 00:00:00 2001 From: Simon Lin Date: Mon, 22 Jul 2024 17:39:49 +1000 Subject: [PATCH 2/3] c --- crates/polars-io/src/path_utils/mod.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/crates/polars-io/src/path_utils/mod.rs b/crates/polars-io/src/path_utils/mod.rs index cc6c57e13ad1..59129ca6349e 100644 --- a/crates/polars-io/src/path_utils/mod.rs +++ b/crates/polars-io/src/path_utils/mod.rs @@ -174,7 +174,9 @@ pub fn expand_paths_hive( let prefix = object_path_from_string(cloud_location.prefix.clone())?; - let out = if !path.ends_with("/") && cloud_location.expansion.is_none() { + let out = if !path.ends_with("/") && cloud_location.expansion.is_none() && { + is_cloud || PathBuf::from(path).is_file() + } { ( 0, vec![PathBuf::from(format_path( From af1b949ad41fea48c68cd83e9f65efdcf9895624 Mon Sep 17 00:00:00 2001 From: Simon Lin Date: Mon, 22 Jul 2024 19:28:35 +1000 Subject: [PATCH 3/3] add comment --- crates/polars-io/src/path_utils/mod.rs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/crates/polars-io/src/path_utils/mod.rs b/crates/polars-io/src/path_utils/mod.rs index 59129ca6349e..be1f0c947df7 100644 --- a/crates/polars-io/src/path_utils/mod.rs +++ b/crates/polars-io/src/path_utils/mod.rs @@ -175,6 +175,10 @@ pub fn expand_paths_hive( let prefix = object_path_from_string(cloud_location.prefix.clone())?; let out = if !path.ends_with("/") && cloud_location.expansion.is_none() && { + // We need to check if it is a directory for local paths (we can be here due + // to FORCE_ASYNC). For cloud paths the convention is that the user must add + // a trailing slash `/` to scan directories. We don't infer it as that would + // mean sending one network request per path serially (very slow). is_cloud || PathBuf::from(path).is_file() } { (