diff --git a/common/io/src/operator.rs b/common/io/src/operator.rs index 17d6223a9260..04dae898ae82 100644 --- a/common/io/src/operator.rs +++ b/common/io/src/operator.rs @@ -120,5 +120,10 @@ pub async fn init_s3_operator(cfg: &StorageS3Config) -> Result { builder.disable_credential_loader(); } + // Enable virtual host style + if cfg.enable_virtual_host_style { + builder.enable_virtual_host_style(); + } + Ok(Operator::new(builder.finish().await?)) } diff --git a/query/src/interpreters/interpreter_common.rs b/query/src/interpreters/interpreter_common.rs index 7d21db183f55..c3d68f19b292 100644 --- a/query/src/interpreters/interpreter_common.rs +++ b/query/src/interpreters/interpreter_common.rs @@ -23,8 +23,8 @@ use common_meta_types::GrantObject; use common_meta_types::StageFile; use common_meta_types::StageType; use common_meta_types::UserStageInfo; +use common_tracing::tracing::warn; use futures::TryStreamExt; -use opendal::ObjectMode; use regex::Regex; use crate::sessions::QueryContext; @@ -79,6 +79,11 @@ pub async fn list_files( } } +/// List files from DAL in recursive way. +/// +/// - If input path is a dir, we will list it recursively. +/// - Or, we will append the file itself, and try to list `path/`. +/// - If not exist, we will try to list `path/` too. pub async fn list_files_from_dal( ctx: &Arc, stage: &UserStageInfo, @@ -86,40 +91,39 @@ pub async fn list_files_from_dal( pattern: &str, ) -> Result> { let op = StageSource::get_op(ctx, stage).await?; - let files = if path.ends_with('/') { - let mut list = vec![]; - let mut dirs = vec![path.to_string()]; - while let Some(dir) = dirs.pop() { - let mut objects = op.object(&dir).list().await?; - while let Some(de) = objects.try_next().await? { - let meta = de.metadata().await?; - let path = de.path().to_string(); - match de.mode() { - ObjectMode::FILE => { - list.push((path, meta)); - } - ObjectMode::DIR => { - dirs.push(path); - } - ObjectMode::Unknown => continue, - } - } + let mut files = Vec::new(); + + // - If the path itself is a dir, return directly. + // - Otherwise, return a path suffix by `/` + // - If other errors happen, we will ignore them by returning None. + let dir_path = match op.object(path).metadata().await { + Ok(meta) if meta.mode().is_dir() => Some(path.to_string()), + Ok(meta) if !meta.mode().is_dir() => { + files.push((path.to_string(), meta)); + + Some(format!("{path}/")) } - list - } else { - let o = op.object(path); - match o.metadata().await { - Ok(meta) => { - if meta.mode().is_dir() { - vec![] - } else { - vec![(o.path().to_string(), meta)] + Err(e) if e.kind() == io::ErrorKind::NotFound => Some(format!("{path}/")), + Err(e) => return Err(e.into()), + _ => None, + }; + + // Check the if this dir valid and list it recursively. + if let Some(dir) = dir_path { + match op.object(&dir).metadata().await { + Ok(_) => { + let mut ds = op.batch().walk_top_down(&dir)?; + while let Some(de) = ds.try_next().await? { + if de.mode().is_file() { + let path = de.path().to_string(); + let meta = de.metadata().await?; + files.push((path, meta)); + } } } - Err(e) if e.kind() == io::ErrorKind::NotFound => vec![], - Err(e) => return Err(e.into()), - } - }; + Err(e) => warn!("ignore listing {path}/, because: {:?}", e), + }; + } let regex = if !pattern.is_empty() { Some(Regex::new(pattern).map_err(|e| { @@ -201,7 +205,7 @@ pub async fn list_files_from_meta_api( if path.ends_with('/') { name.starts_with(path) } else { - name == path + name.starts_with(&format!("{path}/")) || name == path } }) .filter(|file| { diff --git a/query/tests/it/interpreters/interpreter_list.rs b/query/tests/it/interpreters/interpreter_list.rs index b7d5d6ed9bf5..4d3276643353 100644 --- a/query/tests/it/interpreters/interpreter_list.rs +++ b/query/tests/it/interpreters/interpreter_list.rs @@ -118,10 +118,11 @@ async fn test_list_stage_interpreter() -> Result<()> { let stream = executor.execute(None).await?; let result = stream.try_collect::>().await?; let expected = vec![ - "+------+------+-----+---------------+---------+", - "| name | size | md5 | last_modified | creator |", - "+------+------+-----+---------------+---------+", - "+------+------+-----+---------------+---------+", + "+----------------+------+------+-------------------------------+-----------------+", + "| name | size | md5 | last_modified | creator |", + "+----------------+------+------+-------------------------------+-----------------+", + "| test/books.csv | 100 | NULL | 1970-01-01 00:00:00.000 +0000 | 'test_user'@'%' |", + "+----------------+------+------+-------------------------------+-----------------+", ]; common_datablocks::assert_blocks_sorted_eq(expected, result.as_slice()); }