From a6e80ce0ce66c8010130d095902bd4ba0ca60964 Mon Sep 17 00:00:00 2001 From: Xuanwo Date: Fri, 17 Jun 2022 15:03:49 +0800 Subject: [PATCH 01/40] Implement copy into statement Signed-off-by: Xuanwo --- common/ast/src/ast/mod.rs | 17 +++- common/ast/src/ast/statement.rs | 93 +++++++++++++++++++ common/ast/src/parser/statement.rs | 68 ++++++++++++++ query/src/interpreters/interpreter_copy_v2.rs | 52 +++++++++++ query/src/interpreters/mod.rs | 1 + 5 files changed, 230 insertions(+), 1 deletion(-) create mode 100644 query/src/interpreters/interpreter_copy_v2.rs diff --git a/common/ast/src/ast/mod.rs b/common/ast/src/ast/mod.rs index fb2b81e36cc7..b734bc4292dd 100644 --- a/common/ast/src/ast/mod.rs +++ b/common/ast/src/ast/mod.rs @@ -58,6 +58,7 @@ fn write_period_separated_list( Ok(()) } +/// Write input items into `a, b, c` fn write_comma_separated_list( f: &mut Formatter<'_>, items: impl IntoIterator, @@ -66,7 +67,21 @@ fn write_comma_separated_list( if i > 0 { write!(f, ", ")?; } - write!(f, "{}", item)?; + write!(f, "{item}")?; + } + Ok(()) +} + +/// Write input items into `'a', 'b', 'c'` +fn write_quoted_comma_separated_list( + f: &mut Formatter<'_>, + items: impl IntoIterator, +) -> std::fmt::Result { + for (i, item) in items.into_iter().enumerate() { + if i > 0 { + write!(f, ", ")?; + } + write!(f, "'{item}'")?; } Ok(()) } diff --git a/common/ast/src/ast/statement.rs b/common/ast/src/ast/statement.rs index f0613c805d5c..aeed49102440 100644 --- a/common/ast/src/ast/statement.rs +++ b/common/ast/src/ast/statement.rs @@ -29,6 +29,7 @@ use crate::ast::expr::Literal; use crate::ast::expr::TypeName; use crate::ast::write_comma_separated_list; use crate::ast::write_period_separated_list; +use crate::ast::write_quoted_comma_separated_list; use crate::ast::Identifier; use crate::ast::Query; use crate::parser::token::Token; @@ -51,6 +52,16 @@ pub enum Statement<'a> { overwrite: bool, }, + CopyInto { + src: CopyTarget<'a>, + dst: CopyTarget<'a>, + files: Vec, + pattern: String, + file_format: BTreeMap, + validation_mode: String, + size_limit: usize, + }, + ShowSettings, ShowProcessList, ShowMetrics, @@ -347,6 +358,29 @@ pub enum DatabaseEngine { Github(String), } +/// CopyTarget is the target that can be used in `COPY INTO`. +#[derive(Debug, Clone, PartialEq)] +pub enum CopyTarget<'a> { + /// Table can be used in `INTO` or `FROM`. + /// + /// While table used as `FROM`, it will be rewrite as `(SELECT * FROM table)` + Table(Option>, Identifier<'a>), + /// Location can be used in `INTO` or `FROM`. + /// + /// Location could be + /// + /// - internal stage: `@internal_stage/path/to/dir/` + /// - external stage: `@s3_external_stage/path/to/dir/` + /// - external location: `s3://bucket/path/to/dir/` + /// + /// We only parse them into `String` and leave the location parser in further. + Location(String), + /// Query can only be used as `FROM`. + /// + /// For example:`(SELECT field_a,field_b FROM table)` + Query(Box>), +} + #[derive(Debug, Clone, PartialEq)] pub struct CreateViewStmt<'a> { pub if_not_exists: bool, @@ -549,6 +583,26 @@ impl Display for KillTarget { } } +impl Display for CopyTarget<'_> { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + match self { + CopyTarget::Table(database, table) => { + if let Some(database) = database { + write!(f, "{database}.{table}") + } else { + write!(f, "{table}") + } + } + CopyTarget::Location(location) => { + write!(f, "{location}") + } + CopyTarget::Query(query) => { + write!(f, "({query})") + } + } + } +} + impl Display for RoleOption { fn fmt(&self, f: &mut Formatter) -> std::fmt::Result { match self { @@ -606,6 +660,45 @@ impl<'a> Display for Statement<'a> { InsertSource::Select { query } => write!(f, " {query}")?, } } + Statement::CopyInto { + src, + dst, + files, + pattern, + file_format, + validation_mode, + size_limit, + } => { + write!(f, "COPY")?; + write!(f, " INTO {dst}")?; + write!(f, " FROM {src}")?; + + if !file_format.is_empty() { + write!(f, " FILE_FORMAT = (")?; + for (k, v) in file_format.iter() { + write!(f, " {} = '{}'", k, v)?; + } + write!(f, " )")?; + } + + if !files.is_empty() { + write!(f, " FILES = (")?; + write_quoted_comma_separated_list(f, files)?; + write!(f, " )")?; + } + + if !pattern.is_empty() { + write!(f, " PATTERN = '{pattern}'")?; + } + + if *size_limit != 0 { + write!(f, " SIZE_LIMIT = {size_limit}")?; + } + + if !validation_mode.is_empty() { + write!(f, "VALIDATION_MODE = {validation_mode}")?; + } + } Statement::ShowSettings => { write!(f, "SHOW SETTINGS")?; } diff --git a/common/ast/src/parser/statement.rs b/common/ast/src/parser/statement.rs index bd7ed2b1c734..20d9ab9373f6 100644 --- a/common/ast/src/parser/statement.rs +++ b/common/ast/src/parser/statement.rs @@ -13,6 +13,7 @@ // limitations under the License. use std::collections::BTreeMap; +use std::fmt::format; use common_meta_types::AuthType; use common_meta_types::UserIdentity; @@ -591,6 +592,30 @@ pub fn statement(i: Input) -> IResult { }, ); + let copy_into = map( + rule! { + COPY + ~ INTO ~ #copy_target + ~ FROM ~ #copy_target + ~ ( FILES ~ "=" ~ "(" ~ #comma_separated_list0(literal_string) ~ ")")? + ~ ( PATTERN ~ "=" ~ #literal_string)? + ~ ( FILE_FORMAT ~ "=" ~ #options)? + ~ ( VALIDATION_MODE ~ "=" ~ #literal_string)? + ~ ( SIZE_LIMIT ~ "=" ~ #literal_u64)? + }, + |(_, _, dst, _, src, files, pattern, file_format, validation_mode, size_limit)| { + Statement::CopyInto { + src, + dst, + files: files.map(|v| v.3).unwrap_or_default(), + pattern: pattern.map(|v| v.2).unwrap_or_default(), + file_format: file_format.map(|v| v.2).unwrap_or_default(), + validation_mode: validation_mode.map(|v| v.2).unwrap_or_default(), + size_limit: size_limit.map(|v| v.2).unwrap_or_default() as usize, + } + }, + ); + alt(( rule!( #map(query, |query| Statement::Query(Box::new(query))) @@ -646,6 +671,16 @@ pub fn statement(i: Input) -> IResult { | #remove_stage: "`REMOVE @ [pattern = '']`" | #drop_stage: "`DROP STAGE `" ), + rule! ( + #copy_into: "`COPY + INTO { internalStage | externalStage | externalLocation | [.] } + FROM { internalStage | externalStage | externalLocation | [.] | ( ) } + [ FILE_FORMAT = ( { TYPE = { CSV | JSON | PARQUET } [ formatTypeOptions ] } ) ] + [ FILES = ( '' [ , '' ] [ , ... ] ) ] + [ PATTERN = '' ] + [ VALIDATION_MODE = RETURN_ROWS ] + [ copyOptions ]`" + ), ))(i) } @@ -801,6 +836,39 @@ pub fn kill_target(i: Input) -> IResult { ))(i) } +/// Parse input into `CopyTarget` +pub fn copy_target(i: Input) -> IResult { + let table = map( + rule! { + #peroid_separated_idents_1_to_2 + }, + |(database, table)| CopyTarget::Table(database, table), + ); + let query = map( + rule! { + #parenthesized_query + }, + |query| CopyTarget::Query(Box::new(query)), + ); + let stage_location = map( + rule! { + "@" ~ #literal_string + }, + |(_, location)| CopyTarget::Location(format!("@{location}")), + ); + let uri_location = map( + rule! { + #literal_string ~ "://" ~ #literal_string + }, + // TODO(xuanwo): Maybe we can check the proposal during parse? + |(protocol, _, location)| CopyTarget::Location(format!("{protocol}://{location}")), + ); + + rule!( + #table | #query | #stage_location | #uri_location + )(i) +} + pub fn show_limit(i: Input) -> IResult { let limit_like = map( rule! { diff --git a/query/src/interpreters/interpreter_copy_v2.rs b/query/src/interpreters/interpreter_copy_v2.rs new file mode 100644 index 000000000000..a16dde2f00f3 --- /dev/null +++ b/query/src/interpreters/interpreter_copy_v2.rs @@ -0,0 +1,52 @@ +// Copyright 2022 Datafuse Labs. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +use std::sync::Arc; + +use common_ast::ast::ExplainKind; +use common_datablocks::DataBlock; +use common_datavalues::prelude::*; +use common_exception::ErrorCode; +use common_exception::Result; +use common_streams::DataBlockStream; +use common_streams::SendableDataBlockStream; + +use crate::interpreters::Interpreter; +use crate::interpreters::InterpreterPtr; +use crate::sessions::QueryContext; +use crate::sql::exec::PipelineBuilder; +use crate::sql::optimizer::SExpr; +use crate::sql::plans::Plan; +use crate::sql::BindContext; +use crate::sql::MetadataRef; + +pub struct CopyInterpreterV2 { + ctx: Arc, + schema: DataSchemaRef, + kind: ExplainKind, + plan: Plan, +} + +#[async_trait::async_trait] +impl Interpreter for CopyInterpreterV2 { + fn name(&self) -> &str { + "CopyInterpreterV2" + } + + async fn execute( + &self, + _input_stream: Option, + ) -> Result { + todo!() + } +} diff --git a/query/src/interpreters/mod.rs b/query/src/interpreters/mod.rs index 75602a407602..24bb944070b0 100644 --- a/query/src/interpreters/mod.rs +++ b/query/src/interpreters/mod.rs @@ -20,6 +20,7 @@ mod interpreter_cluster_key_alter; mod interpreter_cluster_key_drop; mod interpreter_common; mod interpreter_copy; +mod interpreter_copy_v2; mod interpreter_database_create; mod interpreter_database_drop; mod interpreter_database_rename; From cc2acaf8f13893e935621e209a34d0c6807b50ab Mon Sep 17 00:00:00 2001 From: Xuanwo Date: Fri, 17 Jun 2022 17:04:09 +0800 Subject: [PATCH 02/40] Refactor CopyStatement Signed-off-by: Xuanwo --- common/ast/src/ast/statement.rs | 62 ++++++++++--------- common/ast/src/parser/statement.rs | 4 +- .../interpreters/interpreter_factory_v2.rs | 2 + query/src/sql/planner/binder/copy.rs | 41 ++++++++++++ query/src/sql/planner/binder/mod.rs | 6 ++ query/src/sql/planner/format/display_plan.rs | 2 + query/src/sql/planner/plans/mod.rs | 3 + 7 files changed, 88 insertions(+), 32 deletions(-) create mode 100644 query/src/sql/planner/binder/copy.rs diff --git a/common/ast/src/ast/statement.rs b/common/ast/src/ast/statement.rs index aeed49102440..455a33261b0c 100644 --- a/common/ast/src/ast/statement.rs +++ b/common/ast/src/ast/statement.rs @@ -52,15 +52,7 @@ pub enum Statement<'a> { overwrite: bool, }, - CopyInto { - src: CopyTarget<'a>, - dst: CopyTarget<'a>, - files: Vec, - pattern: String, - file_format: BTreeMap, - validation_mode: String, - size_limit: usize, - }, + Copy(CopyStmt<'a>), ShowSettings, ShowProcessList, @@ -175,6 +167,24 @@ pub enum ExplainKind { Pipeline, } +/// CopyStmt is the parsed statement of `COPY`. +/// +/// ## Examples +/// +/// ```sql +/// COPY INTO table from s3://bucket/path/to/x.csv +/// ``` +#[derive(Debug, Clone, PartialEq)] +pub struct CopyStmt<'a> { + pub src: CopyTarget<'a>, + pub dst: CopyTarget<'a>, + pub files: Vec, + pub pattern: String, + pub file_format: BTreeMap, + pub validation_mode: String, + pub size_limit: usize, +} + #[derive(Debug, Clone, PartialEq)] // Databases pub struct ShowDatabasesStmt<'a> { pub limit: Option>, @@ -660,43 +670,35 @@ impl<'a> Display for Statement<'a> { InsertSource::Select { query } => write!(f, " {query}")?, } } - Statement::CopyInto { - src, - dst, - files, - pattern, - file_format, - validation_mode, - size_limit, - } => { + Statement::Copy(stmt) => { write!(f, "COPY")?; - write!(f, " INTO {dst}")?; - write!(f, " FROM {src}")?; + write!(f, " INTO {}", stmt.dst)?; + write!(f, " FROM {}", stmt.src)?; - if !file_format.is_empty() { + if !stmt.file_format.is_empty() { write!(f, " FILE_FORMAT = (")?; - for (k, v) in file_format.iter() { + for (k, v) in stmt.file_format.iter() { write!(f, " {} = '{}'", k, v)?; } write!(f, " )")?; } - if !files.is_empty() { + if !stmt.files.is_empty() { write!(f, " FILES = (")?; - write_quoted_comma_separated_list(f, files)?; + write_quoted_comma_separated_list(f, &stmt.files)?; write!(f, " )")?; } - if !pattern.is_empty() { - write!(f, " PATTERN = '{pattern}'")?; + if !stmt.pattern.is_empty() { + write!(f, " PATTERN = '{}'", stmt.pattern)?; } - if *size_limit != 0 { - write!(f, " SIZE_LIMIT = {size_limit}")?; + if stmt.size_limit != 0 { + write!(f, " SIZE_LIMIT = {}", stmt.size_limit)?; } - if !validation_mode.is_empty() { - write!(f, "VALIDATION_MODE = {validation_mode}")?; + if !stmt.validation_mode.is_empty() { + write!(f, "VALIDATION_MODE = {}", stmt.validation_mode)?; } } Statement::ShowSettings => { diff --git a/common/ast/src/parser/statement.rs b/common/ast/src/parser/statement.rs index 20d9ab9373f6..a85522adbee6 100644 --- a/common/ast/src/parser/statement.rs +++ b/common/ast/src/parser/statement.rs @@ -604,7 +604,7 @@ pub fn statement(i: Input) -> IResult { ~ ( SIZE_LIMIT ~ "=" ~ #literal_u64)? }, |(_, _, dst, _, src, files, pattern, file_format, validation_mode, size_limit)| { - Statement::CopyInto { + Statement::Copy(CopyStmt { src, dst, files: files.map(|v| v.3).unwrap_or_default(), @@ -612,7 +612,7 @@ pub fn statement(i: Input) -> IResult { file_format: file_format.map(|v| v.2).unwrap_or_default(), validation_mode: validation_mode.map(|v| v.2).unwrap_or_default(), size_limit: size_limit.map(|v| v.2).unwrap_or_default() as usize, - } + }) }, ); diff --git a/query/src/interpreters/interpreter_factory_v2.rs b/query/src/interpreters/interpreter_factory_v2.rs index e9dbc8228f2b..9b819c256cb0 100644 --- a/query/src/interpreters/interpreter_factory_v2.rs +++ b/query/src/interpreters/interpreter_factory_v2.rs @@ -68,6 +68,8 @@ impl InterpreterFactoryV2 { ExplainInterpreterV2::try_create(ctx, *plan.clone(), kind.clone()) } + Plan::Copy(_) => todo!(), + Plan::ShowMetrics => ShowMetricsInterpreter::try_create(ctx), Plan::ShowProcessList => ShowProcessListInterpreter::try_create(ctx), Plan::ShowSettings => ShowSettingsInterpreter::try_create(ctx), diff --git a/query/src/sql/planner/binder/copy.rs b/query/src/sql/planner/binder/copy.rs new file mode 100644 index 000000000000..811100039397 --- /dev/null +++ b/query/src/sql/planner/binder/copy.rs @@ -0,0 +1,41 @@ +// Copyright 2022 Datafuse Labs. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::str::FromStr; + +use common_ast::ast::CopyStmt; +use common_ast::ast::CreateStageStmt; +use common_exception::ErrorCode; +use common_exception::Result; +use common_meta_types::OnErrorMode; +use common_meta_types::StageType; +use common_meta_types::UserStageInfo; +use common_planners::CreateUserStagePlan; +use common_planners::ListPlan; +use common_planners::RemoveUserStagePlan; + +use crate::sql::binder::Binder; +use crate::sql::plans::Plan; +use crate::sql::statements::parse_copy_file_format_options; +use crate::sql::statements::parse_stage_location; +use crate::sql::statements::parse_uri_location; + +impl<'a> Binder { + pub(in crate::sql::planner::binder) async fn bind_copy( + &mut self, + stmt: &CopyStmt<'a>, + ) -> Result { + todo!() + } +} diff --git a/query/src/sql/planner/binder/mod.rs b/query/src/sql/planner/binder/mod.rs index dee34785b12e..77c286a3f37e 100644 --- a/query/src/sql/planner/binder/mod.rs +++ b/query/src/sql/planner/binder/mod.rs @@ -39,6 +39,7 @@ use crate::storages::Table; mod aggregate; mod bind_context; +mod copy; mod ddl; mod distinct; mod join; @@ -110,6 +111,11 @@ impl<'a> Binder { }) } + Statement::Copy(stmt) => { + let plan = self.bind_copy(stmt).await?; + Ok(plan) + } + Statement::ShowMetrics => Ok(Plan::ShowMetrics), Statement::ShowProcessList => Ok(Plan::ShowProcessList), Statement::ShowSettings => Ok(Plan::ShowSettings), diff --git a/query/src/sql/planner/format/display_plan.rs b/query/src/sql/planner/format/display_plan.rs index c6bf9481c3bd..b0af98ea1269 100644 --- a/query/src/sql/planner/format/display_plan.rs +++ b/query/src/sql/planner/format/display_plan.rs @@ -27,6 +27,8 @@ impl Plan { Ok(format!("{:?}:\n{}", kind, result)) } + Plan::Copy(_) => todo!(), + Plan::ShowMetrics => Ok("SHOW METRICS".to_string()), Plan::ShowProcessList => Ok("SHOW PROCESSLIST".to_string()), Plan::ShowSettings => Ok("SHOW SETTINGS".to_string()), diff --git a/query/src/sql/planner/plans/mod.rs b/query/src/sql/planner/plans/mod.rs index a061ad555218..729a168e3702 100644 --- a/query/src/sql/planner/plans/mod.rs +++ b/query/src/sql/planner/plans/mod.rs @@ -67,6 +67,9 @@ pub enum Plan { plan: Box, }, + // Copy + Copy(Box), + // System ShowMetrics, ShowProcessList, From b02d6c268af7d7db0f2862aa6718bfae09242213 Mon Sep 17 00:00:00 2001 From: Xuanwo Date: Fri, 17 Jun 2022 18:12:03 +0800 Subject: [PATCH 03/40] Fix support for catalog Signed-off-by: Xuanwo --- common/ast/src/ast/statement.rs | 16 +++++++++++++--- common/ast/src/parser/statement.rs | 4 ++-- 2 files changed, 15 insertions(+), 5 deletions(-) diff --git a/common/ast/src/ast/statement.rs b/common/ast/src/ast/statement.rs index 455a33261b0c..62df5b063d76 100644 --- a/common/ast/src/ast/statement.rs +++ b/common/ast/src/ast/statement.rs @@ -374,7 +374,11 @@ pub enum CopyTarget<'a> { /// Table can be used in `INTO` or `FROM`. /// /// While table used as `FROM`, it will be rewrite as `(SELECT * FROM table)` - Table(Option>, Identifier<'a>), + Table( + Option>, + Option>, + Identifier<'a>, + ), /// Location can be used in `INTO` or `FROM`. /// /// Location could be @@ -596,8 +600,14 @@ impl Display for KillTarget { impl Display for CopyTarget<'_> { fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { match self { - CopyTarget::Table(database, table) => { - if let Some(database) = database { + CopyTarget::Table(catalog, database, table) => { + if let Some(catalog) = catalog { + write!( + f, + "{catalog}.{}.{table}", + database.as_ref().expect("database must be valid") + ) + } else if let Some(database) = database { write!(f, "{database}.{table}") } else { write!(f, "{table}") diff --git a/common/ast/src/parser/statement.rs b/common/ast/src/parser/statement.rs index a85522adbee6..1c867b17cac0 100644 --- a/common/ast/src/parser/statement.rs +++ b/common/ast/src/parser/statement.rs @@ -840,9 +840,9 @@ pub fn kill_target(i: Input) -> IResult { pub fn copy_target(i: Input) -> IResult { let table = map( rule! { - #peroid_separated_idents_1_to_2 + #peroid_separated_idents_1_to_3 }, - |(database, table)| CopyTarget::Table(database, table), + |(catalog, database, table)| CopyTarget::Table(catalog, database, table), ); let query = map( rule! { From 313fb5773b622f112e08a06de2ad46befc55f8a1 Mon Sep 17 00:00:00 2001 From: Xuanwo Date: Fri, 17 Jun 2022 19:03:58 +0800 Subject: [PATCH 04/40] Implement bind statement for copy Signed-off-by: Xuanwo --- query/src/sql/planner/binder/copy.rs | 175 ++++++++++++++++++++++++++- 1 file changed, 174 insertions(+), 1 deletion(-) diff --git a/query/src/sql/planner/binder/copy.rs b/query/src/sql/planner/binder/copy.rs index 811100039397..8474c3e877a7 100644 --- a/query/src/sql/planner/binder/copy.rs +++ b/query/src/sql/planner/binder/copy.rs @@ -12,18 +12,31 @@ // See the License for the specific language governing permissions and // limitations under the License. +use std::collections::BTreeMap; use std::str::FromStr; use common_ast::ast::CopyStmt; +use common_ast::ast::CopyTarget; use common_ast::ast::CreateStageStmt; +use common_ast::ast::Query; +use common_ast::ast::Statement; +use common_ast::parser::error::Backtrace; +use common_ast::parser::parse_sql; +use common_ast::parser::tokenize_sql; use common_exception::ErrorCode; use common_exception::Result; use common_meta_types::OnErrorMode; use common_meta_types::StageType; use common_meta_types::UserStageInfo; +use common_planners::CopyMode; +use common_planners::CopyPlan; use common_planners::CreateUserStagePlan; use common_planners::ListPlan; +use common_planners::ReadDataSourcePlan; use common_planners::RemoveUserStagePlan; +use common_planners::SourceInfo; +use common_planners::StageTableInfo; +use common_planners::ValidationMode; use crate::sql::binder::Binder; use crate::sql::plans::Plan; @@ -36,6 +49,166 @@ impl<'a> Binder { &mut self, stmt: &CopyStmt<'a>, ) -> Result { - todo!() + match &stmt.dst { + CopyTarget::Table(catalog, database, table) => { + let (mut stage_info, path) = match &stmt.src { + CopyTarget::Table(_, _, _) => { + return Err(ErrorCode::SyntaxException( + "COPY INTO FROM
is invalid", + )) + } + CopyTarget::Query(_) => { + return Err(ErrorCode::SyntaxException( + "COPY INTO
FROM is invalid", + )) + } + // TODO(xuanwo): we need to parse credential and encryption. + CopyTarget::Location(location) => self.bind_stage(stmt, location).await?, + }; + + let catalog_name = catalog + .map(|v| v.to_string()) + .unwrap_or(self.ctx.get_current_catalog()); + let database_name = database + .map(|v| v.to_string()) + .unwrap_or(self.ctx.get_current_database()); + let table_name = table.to_string(); + let table = self + .ctx + .get_table(&catalog_name, &database_name, &table_name) + .await?; + let mut schema = table.schema(); + let table_id = table.get_id(); + + // TODO(xuanwo): we need to support columns in COPY. + + // Read Source plan. + let from = ReadDataSourcePlan { + catalog: catalog_name.clone(), + source_info: SourceInfo::StageSource(StageTableInfo { + schema: schema.clone(), + stage_info, + path, + files: vec![], + }), + scan_fields: None, + parts: vec![], + statistics: Default::default(), + description: "".to_string(), + tbl_args: None, + push_downs: None, + }; + + // Pattern. + let pattern = self.pattern.clone(); + + Ok(Plan::Copy(Box::new(CopyPlan { + validation_mode, + copy_mode: CopyMode::IntoTable { + catalog_name, + db_name, + tbl_name, + tbl_id, + schema, + from, + files: self.files.clone(), + pattern, + }, + }))) + } + CopyTarget::Location(location) => { + let (mut stage_info, path) = self.bind_stage(stmt, location).await?; + let query = match &stmt.src { + CopyTarget::Table(catalog, database, table) => { + let catalog_name = catalog + .map(|v| v.to_string()) + .unwrap_or(self.ctx.get_current_catalog()); + let database_name = database + .map(|v| v.to_string()) + .unwrap_or(self.ctx.get_current_database()); + let table_name = table.to_string(); + let subquery = format!( + "SELECT * FROM {}.{}.{}", + catalog_name, database_name, table_name + ); + let tokens = tokenize_sql(sql)?; + let backtrace = Backtrace::new(); + let stmts = parse_sql(&tokens, &backtrace)?; + if stmts.len() > 1 { + return Err(ErrorCode::UnImplement("unsupported multiple statements")); + } + match &stmts[0] { + Statement::Query(query) => query, + _ => { + return Err(ErrorCode::SyntaxException( + "COPY INTO FROM is invalid", + )) + } + } + } + CopyTarget::Query(query) => query, + CopyTarget::Location(_) => { + return Err(ErrorCode::SyntaxException( + "COPY INTO FROM is invalid", + )) + } + }; + + Ok(Plan::Copy(Box::new(CopyPlan { + validation_mode, + copy_mode: CopyMode::IntoStage { + stage_table_info: StageTableInfo { + schema: query.schema(), + stage_info, + path, + files: vec![], + }, + // TODO(xuanwo): we need to convert query to Plan. + query: Box::new(query), + }, + }))) + } + CopyTarget::Query(_) => { + return Err(ErrorCode::SyntaxException("COPY INTO is invalid")) + } + } + } + + async fn bind_stage( + &mut self, + stmt: &CopyStmt<'a>, + location: &str, + ) -> Result<(UserStageInfo, String)> { + let (mut stage_info, path) = if location.starts_with('@') { + parse_stage_location(&self.ctx, &location).await? + } else { + parse_uri_location(&location, &BTreeMap::new(), &BTreeMap::new())? + }; + + if !stmt.file_format.is_empty() { + stage_info.file_format_options = + parse_copy_file_format_options(&self.file_format_options)?; + } + + // Copy options. + { + // TODO(xuanwo): COPY should handle on error. + // on_error. + // if !stmt.on_error.is_empty() { + // stage_info.copy_options.on_error = + // OnErrorMode::from_str(&self.on_error).map_err(ErrorCode::SyntaxException)?; + // } + + // size_limit. + if stmt.size_limit != 0 { + stage_info.copy_options.size_limit = stmt.size_limit; + } + } + + // Validation mode. + let validation_mode = ValidationMode::from_str(stmt.validation_mode.as_str()) + .map_err(ErrorCode::SyntaxException)?; + + Ok((stage_info, path)) } } From eb7963ea76e020e21abe47bb300866bb3bd7a439 Mon Sep 17 00:00:00 2001 From: Xuanwo Date: Sat, 18 Jun 2022 15:04:38 +0800 Subject: [PATCH 05/40] Add copy plan v2 Signed-off-by: Xuanwo --- query/src/sql/planner/binder/copy.rs | 81 +++++++++-------- query/src/sql/planner/binder/mod.rs | 2 +- query/src/sql/planner/plans/copy_v2.rs | 117 +++++++++++++++++++++++++ query/src/sql/planner/plans/mod.rs | 5 +- 4 files changed, 165 insertions(+), 40 deletions(-) create mode 100644 query/src/sql/planner/plans/copy_v2.rs diff --git a/query/src/sql/planner/binder/copy.rs b/query/src/sql/planner/binder/copy.rs index 8474c3e877a7..ffb109c698d0 100644 --- a/query/src/sql/planner/binder/copy.rs +++ b/query/src/sql/planner/binder/copy.rs @@ -36,22 +36,25 @@ use common_planners::ReadDataSourcePlan; use common_planners::RemoveUserStagePlan; use common_planners::SourceInfo; use common_planners::StageTableInfo; -use common_planners::ValidationMode; use crate::sql::binder::Binder; +use crate::sql::plans::CopyPlanV2; use crate::sql::plans::Plan; +use crate::sql::plans::ValidationMode; use crate::sql::statements::parse_copy_file_format_options; use crate::sql::statements::parse_stage_location; use crate::sql::statements::parse_uri_location; +use crate::sql::BindContext; impl<'a> Binder { pub(in crate::sql::planner::binder) async fn bind_copy( &mut self, + bind_context: &BindContext, stmt: &CopyStmt<'a>, ) -> Result { match &stmt.dst { CopyTarget::Table(catalog, database, table) => { - let (mut stage_info, path) = match &stmt.src { + let (stage_info, path) = match &stmt.src { CopyTarget::Table(_, _, _) => { return Err(ErrorCode::SyntaxException( "COPY INTO
FROM
is invalid", @@ -66,10 +69,15 @@ impl<'a> Binder { CopyTarget::Location(location) => self.bind_stage(stmt, location).await?, }; + let validation_mode = ValidationMode::from_str(stmt.validation_mode.as_str()) + .map_err(ErrorCode::SyntaxException)?; + let catalog_name = catalog + .as_ref() .map(|v| v.to_string()) .unwrap_or(self.ctx.get_current_catalog()); let database_name = database + .as_ref() .map(|v| v.to_string()) .unwrap_or(self.ctx.get_current_database()); let table_name = table.to_string(); @@ -77,7 +85,7 @@ impl<'a> Binder { .ctx .get_table(&catalog_name, &database_name, &table_name) .await?; - let mut schema = table.schema(); + let schema = table.schema(); let table_id = table.get_id(); // TODO(xuanwo): we need to support columns in COPY. @@ -99,46 +107,47 @@ impl<'a> Binder { push_downs: None, }; - // Pattern. - let pattern = self.pattern.clone(); - - Ok(Plan::Copy(Box::new(CopyPlan { + Ok(Plan::Copy(Box::new(CopyPlanV2::IntoTable { + catalog_name, + database_name, + table_name, + table_id, + schema, + from, + files: stmt.files.clone(), + pattern: stmt.pattern.clone(), validation_mode, - copy_mode: CopyMode::IntoTable { - catalog_name, - db_name, - tbl_name, - tbl_id, - schema, - from, - files: self.files.clone(), - pattern, - }, }))) } CopyTarget::Location(location) => { - let (mut stage_info, path) = self.bind_stage(stmt, location).await?; + let (stage_info, path) = self.bind_stage(stmt, location).await?; let query = match &stmt.src { CopyTarget::Table(catalog, database, table) => { let catalog_name = catalog + .as_ref() .map(|v| v.to_string()) .unwrap_or(self.ctx.get_current_catalog()); let database_name = database + .as_ref() .map(|v| v.to_string()) .unwrap_or(self.ctx.get_current_database()); let table_name = table.to_string(); + let subquery = format!( "SELECT * FROM {}.{}.{}", catalog_name, database_name, table_name ); - let tokens = tokenize_sql(sql)?; + let tokens = tokenize_sql(&subquery)?; let backtrace = Backtrace::new(); let stmts = parse_sql(&tokens, &backtrace)?; if stmts.len() > 1 { return Err(ErrorCode::UnImplement("unsupported multiple statements")); } match &stmts[0] { - Statement::Query(query) => query, + Statement::Query(query) => { + self.bind_statement(bind_context, &Statement::Query(query.clone())) + .await? + } _ => { return Err(ErrorCode::SyntaxException( "COPY INTO FROM is invalid", @@ -146,7 +155,10 @@ impl<'a> Binder { } } } - CopyTarget::Query(query) => query, + CopyTarget::Query(query) => { + self.bind_statement(bind_context, &Statement::Query(query.clone())) + .await? + } CopyTarget::Location(_) => { return Err(ErrorCode::SyntaxException( "COPY INTO FROM is invalid", @@ -154,18 +166,16 @@ impl<'a> Binder { } }; - Ok(Plan::Copy(Box::new(CopyPlan { + // Validation mode. + let validation_mode = ValidationMode::from_str(stmt.validation_mode.as_str()) + .map_err(ErrorCode::SyntaxException)?; + + Ok(Plan::Copy(Box::new(CopyPlanV2::IntoStage { + stage: stage_info, + path, validation_mode, - copy_mode: CopyMode::IntoStage { - stage_table_info: StageTableInfo { - schema: query.schema(), - stage_info, - path, - files: vec![], - }, - // TODO(xuanwo): we need to convert query to Plan. - query: Box::new(query), - }, + // TODO(xuanwo): we need to convert query to Plan. + query: Box::new(query), }))) } CopyTarget::Query(_) => { @@ -186,8 +196,7 @@ impl<'a> Binder { }; if !stmt.file_format.is_empty() { - stage_info.file_format_options = - parse_copy_file_format_options(&self.file_format_options)?; + stage_info.file_format_options = parse_copy_file_format_options(&stmt.file_format)?; } // Copy options. @@ -205,10 +214,6 @@ impl<'a> Binder { } } - // Validation mode. - let validation_mode = ValidationMode::from_str(stmt.validation_mode.as_str()) - .map_err(ErrorCode::SyntaxException)?; - Ok((stage_info, path)) } } diff --git a/query/src/sql/planner/binder/mod.rs b/query/src/sql/planner/binder/mod.rs index 77c286a3f37e..2af7f5789d32 100644 --- a/query/src/sql/planner/binder/mod.rs +++ b/query/src/sql/planner/binder/mod.rs @@ -112,7 +112,7 @@ impl<'a> Binder { } Statement::Copy(stmt) => { - let plan = self.bind_copy(stmt).await?; + let plan = self.bind_copy(bind_context, stmt).await?; Ok(plan) } diff --git a/query/src/sql/planner/plans/copy_v2.rs b/query/src/sql/planner/plans/copy_v2.rs new file mode 100644 index 000000000000..4ebb565759a3 --- /dev/null +++ b/query/src/sql/planner/plans/copy_v2.rs @@ -0,0 +1,117 @@ +// Copyright 2022 Datafuse Labs. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::fmt::Debug; +use std::fmt::Formatter; +use std::str::FromStr; + +use common_datavalues::DataSchemaRef; +use common_meta_types::MetaId; +use common_meta_types::UserStageInfo; +use common_planners::ReadDataSourcePlan; +use common_planners::StageTableInfo; + +use crate::sql::plans::Plan; + +#[derive(serde::Serialize, serde::Deserialize, PartialEq, Clone, Debug)] +pub enum ValidationMode { + None, + ReturnNRows(u64), + ReturnErrors, + ReturnAllErrors, +} + +impl FromStr for ValidationMode { + type Err = String; + fn from_str(s: &str) -> std::result::Result { + match s.to_uppercase().as_str() { + "" => Ok(ValidationMode::None), + "RETURN_ERRORS" => Ok(ValidationMode::ReturnErrors), + "RETURN_ALL_ERRORS" => Ok(ValidationMode::ReturnAllErrors), + v => { + let rows_str = v.replace("RETURN_", "").replace("_ROWS", ""); + let rows = rows_str.parse::(); + match rows { + Ok(v) => { Ok(ValidationMode::ReturnNRows(v)) } + Err(_) => { + Err( + format!("Unknown validation mode:{:?}, must one of {{ RETURN__ROWS | RETURN_ERRORS | RETURN_ALL_ERRORS}}", v) + ) + } + } + } + } + } +} + +/// CopyPlan supports CopyIntoTable & CopyIntoStage +#[derive(Clone)] +pub enum CopyPlanV2 { + IntoTable { + catalog_name: String, + database_name: String, + table_name: String, + table_id: MetaId, + files: Vec, + pattern: String, + schema: DataSchemaRef, + validation_mode: ValidationMode, + from: ReadDataSourcePlan, + }, + IntoStage { + stage: UserStageInfo, + path: String, + validation_mode: ValidationMode, + query: Box, + }, +} + +impl Debug for CopyPlanV2 { + // Ignore the schema. + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + match &self { + CopyPlanV2::IntoTable { + database_name, + table_name, + files, + pattern, + from, + validation_mode, + .. + } => { + write!(f, "Copy into {:}.{:}", database_name, table_name)?; + if !files.is_empty() { + write!(f, ", files: {:?}", files)?; + + if !pattern.is_empty() { + write!(f, ", pattern: {:?}", pattern)?; + } + write!(f, ", validation_mode: {:?}", validation_mode)?; + write!(f, ", {:?}", from)?; + } + } + CopyPlanV2::IntoStage { + stage, + path, + validation_mode, + query, + } => { + write!(f, "Copy into {:?}", stage)?; + write!(f, ", path: {:?}", path)?; + write!(f, ", validation_mode: {:?}", validation_mode)?; + } + } + Ok(()) + } +} diff --git a/query/src/sql/planner/plans/mod.rs b/query/src/sql/planner/plans/mod.rs index 729a168e3702..33dbd8b207a7 100644 --- a/query/src/sql/planner/plans/mod.rs +++ b/query/src/sql/planner/plans/mod.rs @@ -14,6 +14,7 @@ mod aggregate; mod apply; +mod copy_v2; mod eval_scalar; mod filter; mod hash_join; @@ -32,6 +33,8 @@ pub use aggregate::Aggregate; pub use apply::CrossApply; use common_ast::ast::ExplainKind; use common_planners::*; +pub use copy_v2::CopyPlanV2; +pub use copy_v2::ValidationMode; pub use eval_scalar::EvalScalar; pub use eval_scalar::ScalarItem; pub use filter::Filter; @@ -68,7 +71,7 @@ pub enum Plan { }, // Copy - Copy(Box), + Copy(Box), // System ShowMetrics, From 074470c5caf6de83ecaf826cbbea4beb37aacca5 Mon Sep 17 00:00:00 2001 From: Xuanwo Date: Mon, 20 Jun 2022 15:24:04 +0800 Subject: [PATCH 06/40] Add debug log for plan Signed-off-by: Xuanwo --- query/src/interpreters/interpreter_copy_v2.rs | 5 ++--- query/src/lib.rs | 1 + query/src/sql/planner/binder/copy.rs | 7 +++++++ query/src/sql/planner/plans/copy_v2.rs | 1 + 4 files changed, 11 insertions(+), 3 deletions(-) diff --git a/query/src/interpreters/interpreter_copy_v2.rs b/query/src/interpreters/interpreter_copy_v2.rs index a16dde2f00f3..ee5a2905be5e 100644 --- a/query/src/interpreters/interpreter_copy_v2.rs +++ b/query/src/interpreters/interpreter_copy_v2.rs @@ -26,15 +26,14 @@ use crate::interpreters::InterpreterPtr; use crate::sessions::QueryContext; use crate::sql::exec::PipelineBuilder; use crate::sql::optimizer::SExpr; +use crate::sql::plans::CopyPlanV2; use crate::sql::plans::Plan; use crate::sql::BindContext; use crate::sql::MetadataRef; pub struct CopyInterpreterV2 { ctx: Arc, - schema: DataSchemaRef, - kind: ExplainKind, - plan: Plan, + plan: CopyPlanV2, } #[async_trait::async_trait] diff --git a/query/src/lib.rs b/query/src/lib.rs index 41e13b4959d9..4e19de83db3d 100644 --- a/query/src/lib.rs +++ b/query/src/lib.rs @@ -17,6 +17,7 @@ #![feature(arbitrary_self_types)] #![feature(generic_associated_types)] #![feature(type_alias_impl_trait)] +#![feature(assert_matches)] pub mod api; pub mod catalogs; diff --git a/query/src/sql/planner/binder/copy.rs b/query/src/sql/planner/binder/copy.rs index ffb109c698d0..b71ca06fcdef 100644 --- a/query/src/sql/planner/binder/copy.rs +++ b/query/src/sql/planner/binder/copy.rs @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +use std::assert_matches::debug_assert_matches; use std::collections::BTreeMap; use std::str::FromStr; @@ -37,6 +38,7 @@ use common_planners::RemoveUserStagePlan; use common_planners::SourceInfo; use common_planners::StageTableInfo; +use crate::interpreters::SelectInterpreterV2; use crate::sql::binder::Binder; use crate::sql::plans::CopyPlanV2; use crate::sql::plans::Plan; @@ -166,6 +168,11 @@ impl<'a> Binder { } }; + debug_assert!( + matches!(query, Plan::Query { .. }), + "input sql must be Plan::Query, but it's not" + ); + // Validation mode. let validation_mode = ValidationMode::from_str(stmt.validation_mode.as_str()) .map_err(ErrorCode::SyntaxException)?; diff --git a/query/src/sql/planner/plans/copy_v2.rs b/query/src/sql/planner/plans/copy_v2.rs index 4ebb565759a3..810e59e0b39d 100644 --- a/query/src/sql/planner/plans/copy_v2.rs +++ b/query/src/sql/planner/plans/copy_v2.rs @@ -22,6 +22,7 @@ use common_meta_types::UserStageInfo; use common_planners::ReadDataSourcePlan; use common_planners::StageTableInfo; +use crate::interpreters::SelectInterpreterV2; use crate::sql::plans::Plan; #[derive(serde::Serialize, serde::Deserialize, PartialEq, Clone, Debug)] From e5638d5879afdb2fdf5b0dc9633b34417cfc5d08 Mon Sep 17 00:00:00 2001 From: Xuanwo Date: Mon, 20 Jun 2022 15:25:54 +0800 Subject: [PATCH 07/40] Rename query to from Signed-off-by: Xuanwo --- query/src/sql/planner/binder/copy.rs | 3 +-- query/src/sql/planner/plans/copy_v2.rs | 4 ++-- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/query/src/sql/planner/binder/copy.rs b/query/src/sql/planner/binder/copy.rs index b71ca06fcdef..21107704c4a4 100644 --- a/query/src/sql/planner/binder/copy.rs +++ b/query/src/sql/planner/binder/copy.rs @@ -181,8 +181,7 @@ impl<'a> Binder { stage: stage_info, path, validation_mode, - // TODO(xuanwo): we need to convert query to Plan. - query: Box::new(query), + from: Box::new(query), }))) } CopyTarget::Query(_) => { diff --git a/query/src/sql/planner/plans/copy_v2.rs b/query/src/sql/planner/plans/copy_v2.rs index 810e59e0b39d..9563b72a009d 100644 --- a/query/src/sql/planner/plans/copy_v2.rs +++ b/query/src/sql/planner/plans/copy_v2.rs @@ -74,7 +74,7 @@ pub enum CopyPlanV2 { stage: UserStageInfo, path: String, validation_mode: ValidationMode, - query: Box, + from: Box, }, } @@ -106,7 +106,7 @@ impl Debug for CopyPlanV2 { stage, path, validation_mode, - query, + from: query, } => { write!(f, "Copy into {:?}", stage)?; write!(f, ", path: {:?}", path)?; From 098a68a62fe1ca289690abe4d202f4d491362e7e Mon Sep 17 00:00:00 2001 From: Xuanwo Date: Mon, 20 Jun 2022 16:11:38 +0800 Subject: [PATCH 08/40] Implement interpreters Signed-off-by: Xuanwo --- query/src/interpreters/interpreter_copy_v2.rs | 277 +++++++++++++++++- query/src/sql/planner/plans/mod.rs | 32 ++ 2 files changed, 308 insertions(+), 1 deletion(-) diff --git a/query/src/interpreters/interpreter_copy_v2.rs b/query/src/interpreters/interpreter_copy_v2.rs index ee5a2905be5e..d59244b38850 100644 --- a/query/src/interpreters/interpreter_copy_v2.rs +++ b/query/src/interpreters/interpreter_copy_v2.rs @@ -1,3 +1,4 @@ +use std::path::Path; // Copyright 2022 Datafuse Labs. // // Licensed under the Apache License, Version 2.0 (the "License"); @@ -18,11 +19,24 @@ use common_datablocks::DataBlock; use common_datavalues::prelude::*; use common_exception::ErrorCode; use common_exception::Result; +use common_meta_types::UserStageInfo; +use common_planners::ReadDataSourcePlan; +use common_planners::SourceInfo; +use common_planners::StageTableInfo; use common_streams::DataBlockStream; use common_streams::SendableDataBlockStream; +use common_tracing::tracing; +use futures::TryStreamExt; +use regex::Regex; +use crate::interpreters::stream::ProcessorExecutorStream; use crate::interpreters::Interpreter; use crate::interpreters::InterpreterPtr; +use crate::interpreters::SelectInterpreter; +use crate::interpreters::SelectInterpreterV2; +use crate::pipelines::new::executor::PipelineCompleteExecutor; +use crate::pipelines::new::executor::PipelinePullingExecutor; +use crate::pipelines::new::NewPipeline; use crate::sessions::QueryContext; use crate::sql::exec::PipelineBuilder; use crate::sql::optimizer::SExpr; @@ -30,22 +44,283 @@ use crate::sql::plans::CopyPlanV2; use crate::sql::plans::Plan; use crate::sql::BindContext; use crate::sql::MetadataRef; +use crate::storages::stage::StageSource; +use crate::storages::stage::StageTable; pub struct CopyInterpreterV2 { ctx: Arc, plan: CopyPlanV2, } +impl CopyInterpreterV2 { + /// Create a CopyInterpreterV2 with context and [`CopyPlanV2`]. + pub fn try_create(ctx: Arc, plan: CopyPlanV2) -> Result { + Ok(Arc::new(CopyInterpreterV2 { ctx, plan })) + } + + /// List the files. + /// There are two cases here: + /// 1. If the plan.files is not empty, we already set the files sets to the COPY command with: `files=(, )` syntax, only need to add the prefix to the file. + /// 2. If the plan.files is empty, there are also two case: + /// 2.1 If the path is a file like /path/to/path/file, S3File::list() will return the same file path. + /// 2.2 If the path is a folder, S3File::list() will return all the files in it. + /// + /// TODO(xuanwo): Align with interpreters/interpreter_common.rs `list_files` + async fn list_files( + &self, + from: &ReadDataSourcePlan, + files: &Vec, + ) -> Result> { + let files = match &from.source_info { + SourceInfo::StageSource(table_info) => { + let path = &table_info.path; + // Here we add the path to the file: /path/to/path/file1. + let files_with_path = if !files.is_empty() { + let mut files_with_path = vec![]; + for file in files { + let new_path = Path::new(path).join(file); + files_with_path.push(new_path.to_string_lossy().to_string()); + } + files_with_path + } else if !path.ends_with('/') { + let op = StageSource::get_op(&self.ctx, &table_info.stage_info).await?; + if op.object(path).is_exist().await? { + vec![path.to_string()] + } else { + vec![] + } + } else { + let op = StageSource::get_op(&self.ctx, &table_info.stage_info).await?; + let mut list = vec![]; + + // TODO: we could rewrite into try_collect. + let mut objects = op.object(path).list().await?; + while let Some(de) = objects.try_next().await? { + list.push(de.path().to_string()); + } + + list + }; + + Ok(files_with_path) + } + other => Err(ErrorCode::LogicalError(format!( + "Cannot list files for the source info: {:?}", + other + ))), + }; + + files + } + + /// Rewrite the ReadDataSourcePlan.S3StageSource.file_name to new file name. + fn rewrite_read_plan_file_name( + mut plan: ReadDataSourcePlan, + files: Vec, + ) -> ReadDataSourcePlan { + if let SourceInfo::StageSource(ref mut s3) = plan.source_info { + s3.files = files + } + plan + } + + // Read a file and commit it to the table. + // Progress: + // 1. Build a select pipeline + // 2. Execute the pipeline and get the stream + // 3. Read from the stream and write to the table. + // Note: + // We parse the `s3://` to ReadSourcePlan instead of to a SELECT plan is that: + #[tracing::instrument(level = "debug", name = "copy_files_to_table", skip(self), fields(ctx.id = self.ctx.get_id().as_str()))] + async fn copy_files_to_table( + &self, + catalog_name: &String, + db_name: &String, + tbl_name: &String, + from: &ReadDataSourcePlan, + files: Vec, + ) -> Result> { + let ctx = self.ctx.clone(); + let settings = self.ctx.get_settings(); + + let mut pipeline = NewPipeline::create(); + let read_source_plan = from.clone(); + let read_source_plan = Self::rewrite_read_plan_file_name(read_source_plan, files); + tracing::info!("copy_files_to_table: source plan:{:?}", read_source_plan); + let table = ctx.build_table_from_source_plan(&read_source_plan)?; + let res = table.read2(ctx.clone(), &read_source_plan, &mut pipeline); + if let Err(e) = res { + return Err(e); + } + + let table = ctx.get_table(catalog_name, db_name, tbl_name).await?; + + if ctx.get_settings().get_enable_new_processor_framework()? != 0 + && self.ctx.get_cluster().is_empty() + { + table.append2(ctx.clone(), &mut pipeline)?; + pipeline.set_max_threads(settings.get_max_threads()? as usize); + + let async_runtime = ctx.get_storage_runtime(); + let executor = PipelineCompleteExecutor::try_create(async_runtime, pipeline)?; + executor.execute()?; + + return Ok(ctx.consume_precommit_blocks()); + } + + pipeline.set_max_threads(settings.get_max_threads()? as usize); + + let async_runtime = ctx.get_storage_runtime(); + let executor = PipelinePullingExecutor::try_create(async_runtime, pipeline)?; + let (handler, stream) = ProcessorExecutorStream::create(executor)?; + self.ctx.add_source_abort_handle(handler); + + let operations = table + .append_data(ctx.clone(), Box::pin(stream)) + .await? + .try_collect() + .await?; + + Ok(operations) + } + + async fn execute_copy_into_stage( + &self, + stage: &UserStageInfo, + query: &Plan, + ) -> Result { + let (s_expr, metadata, bind_context) = match query { + Plan::Query { + s_expr, + metadata, + bind_context, + } => (s_expr, metadata, bind_context), + v => unreachable!("Input plan must be Query, but it's {}", v.name()), + }; + + let select_interpreter = SelectInterpreterV2::try_create( + self.ctx.clone(), + *(bind_context.clone()), + s_expr.clone(), + metadata.clone(), + )?; + + // Building data schema from bind_context columns + // TODO(leiyskey): Extract the following logic as new API of BindContext. + let fields = bind_context + .columns + .iter() + .map(|column_binding| { + DataField::new( + &column_binding.column_name, + column_binding.data_type.clone(), + ) + }) + .collect(); + let data_schema = DataSchemaRefExt::create(fields); + let stage_table_info = StageTableInfo { + schema: data_schema, + stage_info: stage.clone(), + path: "".to_string(), + files: vec![], + }; + + let table = StageTable::try_create(stage_table_info)?; + + let stream = select_interpreter.execute(None).await?; + let results = table.append_data(self.ctx.clone(), stream).await?; + + table + .commit_insertion( + self.ctx.clone(), + &self.ctx.get_current_catalog(), + results.try_collect().await?, + false, + ) + .await?; + + Ok(Box::pin(DataBlockStream::create( + // TODO(xuanwo): Is this correct? + Arc::new(DataSchema::new(vec![])), + None, + vec![], + ))) + } +} + #[async_trait::async_trait] impl Interpreter for CopyInterpreterV2 { fn name(&self) -> &str { "CopyInterpreterV2" } + #[tracing::instrument(level = "debug", name = "copy_interpreter_execute_v2", skip(self, _input_stream), fields(ctx.id = self.ctx.get_id().as_str()))] async fn execute( &self, _input_stream: Option, ) -> Result { - todo!() + match &self.plan { + // TODO(xuanwo): extract them as a separate function. + CopyPlanV2::IntoTable { + catalog_name, + database_name, + table_name, + table_id, + files, + pattern, + schema, + validation_mode, + from, + } => { + let mut files = self.list_files(from, files).await?; + + // Pattern match check. + let pattern = &pattern; + if !pattern.is_empty() { + let regex = Regex::new(pattern).map_err(|e| { + ErrorCode::SyntaxException(format!( + "Pattern format invalid, got:{}, error:{:?}", + pattern, e + )) + })?; + + let matched_files = files + .iter() + .filter(|file| regex.is_match(file)) + .cloned() + .collect(); + files = matched_files; + } + + tracing::info!("copy file list:{:?}, pattern:{}", &files, pattern,); + + let write_results = self + .copy_files_to_table(catalog_name, database_name, table_name, from, files) + .await?; + + let table = self + .ctx + .get_table(catalog_name, database_name, table_name) + .await?; + + // Commit. + table + .commit_insertion(self.ctx.clone(), catalog_name, write_results, false) + .await?; + + Ok(Box::pin(DataBlockStream::create( + // TODO(xuanwo): Is this correct? + Arc::new(DataSchema::new(vec![])), + None, + vec![], + ))) + } + CopyPlanV2::IntoStage { + stage, + path, + validation_mode, + from, + } => self.execute_copy_into_stage(stage, from).await, + } } } diff --git a/query/src/sql/planner/plans/mod.rs b/query/src/sql/planner/plans/mod.rs index 33dbd8b207a7..b9b6cebfb898 100644 --- a/query/src/sql/planner/plans/mod.rs +++ b/query/src/sql/planner/plans/mod.rs @@ -106,3 +106,35 @@ pub enum Plan { DropStage(Box), RemoveStage(Box), } + +impl Plan { + /// Returning this plan's name. + pub fn name(&self) -> &str { + match self { + Plan::Query { .. } => "Query", + Plan::Explain { .. } => "Explain", + Plan::Copy(_) => "Copy", + Plan::ShowMetrics => "ShowMetrics", + Plan::ShowProcessList => "ShowProcessList", + Plan::ShowSettings => "ShowSettings", + Plan::CreateDatabase(_) => "CreateDatabase", + Plan::DropDatabase(_) => "DropDatabase", + Plan::RenameDatabase(_) => "RenameDatabase", + Plan::CreateTable(_) => "CreateTable", + Plan::CreateView(_) => "CreateView", + Plan::AlterView(_) => "AlterView", + Plan::DropView(_) => "DropView", + Plan::AlterUser(_) => "AlterUser", + Plan::CreateUser(_) => "CreateUser", + Plan::DropUser(_) => "DropUser", + Plan::CreateRole(_) => "CreateRole", + Plan::DropRole(_) => "DropRole", + Plan::ShowStages => "ShowStages", + Plan::ListStage(_) => "ListStage", + Plan::DescribeStage(_) => "DescribeStage", + Plan::CreateStage(_) => "CreateStage", + Plan::DropStage(_) => "DropStage", + Plan::RemoveStage(_) => "RemoveStage", + } + } +} From ced3b8eea57ab0dc01d45e3adda080f32f7c9abe Mon Sep 17 00:00:00 2001 From: Xuanwo Date: Mon, 20 Jun 2022 16:29:32 +0800 Subject: [PATCH 09/40] Make cargo check happy Signed-off-by: Xuanwo --- common/ast/src/parser/statement.rs | 1 - query/src/interpreters/interpreter_copy_v2.rs | 19 ++++--------------- .../interpreters/interpreter_factory_v2.rs | 3 ++- query/src/sql/planner/binder/copy.rs | 11 ----------- query/src/sql/planner/plans/copy_v2.rs | 4 +--- 5 files changed, 7 insertions(+), 31 deletions(-) diff --git a/common/ast/src/parser/statement.rs b/common/ast/src/parser/statement.rs index 1c867b17cac0..ebbade842266 100644 --- a/common/ast/src/parser/statement.rs +++ b/common/ast/src/parser/statement.rs @@ -13,7 +13,6 @@ // limitations under the License. use std::collections::BTreeMap; -use std::fmt::format; use common_meta_types::AuthType; use common_meta_types::UserIdentity; diff --git a/query/src/interpreters/interpreter_copy_v2.rs b/query/src/interpreters/interpreter_copy_v2.rs index d59244b38850..d3491c37703b 100644 --- a/query/src/interpreters/interpreter_copy_v2.rs +++ b/query/src/interpreters/interpreter_copy_v2.rs @@ -14,7 +14,6 @@ use std::path::Path; // limitations under the License. use std::sync::Arc; -use common_ast::ast::ExplainKind; use common_datablocks::DataBlock; use common_datavalues::prelude::*; use common_exception::ErrorCode; @@ -32,18 +31,13 @@ use regex::Regex; use crate::interpreters::stream::ProcessorExecutorStream; use crate::interpreters::Interpreter; use crate::interpreters::InterpreterPtr; -use crate::interpreters::SelectInterpreter; use crate::interpreters::SelectInterpreterV2; use crate::pipelines::new::executor::PipelineCompleteExecutor; use crate::pipelines::new::executor::PipelinePullingExecutor; use crate::pipelines::new::NewPipeline; use crate::sessions::QueryContext; -use crate::sql::exec::PipelineBuilder; -use crate::sql::optimizer::SExpr; use crate::sql::plans::CopyPlanV2; use crate::sql::plans::Plan; -use crate::sql::BindContext; -use crate::sql::MetadataRef; use crate::storages::stage::StageSource; use crate::storages::stage::StageTable; @@ -265,12 +259,10 @@ impl Interpreter for CopyInterpreterV2 { catalog_name, database_name, table_name, - table_id, files, pattern, - schema, - validation_mode, from, + .. } => { let mut files = self.list_files(from, files).await?; @@ -315,12 +307,9 @@ impl Interpreter for CopyInterpreterV2 { vec![], ))) } - CopyPlanV2::IntoStage { - stage, - path, - validation_mode, - from, - } => self.execute_copy_into_stage(stage, from).await, + CopyPlanV2::IntoStage { stage, from, .. } => { + self.execute_copy_into_stage(stage, from).await + } } } } diff --git a/query/src/interpreters/interpreter_factory_v2.rs b/query/src/interpreters/interpreter_factory_v2.rs index 9b819c256cb0..dcfed23f3d62 100644 --- a/query/src/interpreters/interpreter_factory_v2.rs +++ b/query/src/interpreters/interpreter_factory_v2.rs @@ -17,6 +17,7 @@ use std::sync::Arc; use common_exception::Result; use super::*; +use crate::interpreters::interpreter_copy_v2::CopyInterpreterV2; use crate::sessions::QueryContext; use crate::sql::plans::Plan; use crate::sql::DfStatement; @@ -68,7 +69,7 @@ impl InterpreterFactoryV2 { ExplainInterpreterV2::try_create(ctx, *plan.clone(), kind.clone()) } - Plan::Copy(_) => todo!(), + Plan::Copy(copy_plan) => CopyInterpreterV2::try_create(ctx, *copy_plan.clone()), Plan::ShowMetrics => ShowMetricsInterpreter::try_create(ctx), Plan::ShowProcessList => ShowProcessListInterpreter::try_create(ctx), diff --git a/query/src/sql/planner/binder/copy.rs b/query/src/sql/planner/binder/copy.rs index 21107704c4a4..7b84344b2808 100644 --- a/query/src/sql/planner/binder/copy.rs +++ b/query/src/sql/planner/binder/copy.rs @@ -12,33 +12,22 @@ // See the License for the specific language governing permissions and // limitations under the License. -use std::assert_matches::debug_assert_matches; use std::collections::BTreeMap; use std::str::FromStr; use common_ast::ast::CopyStmt; use common_ast::ast::CopyTarget; -use common_ast::ast::CreateStageStmt; -use common_ast::ast::Query; use common_ast::ast::Statement; use common_ast::parser::error::Backtrace; use common_ast::parser::parse_sql; use common_ast::parser::tokenize_sql; use common_exception::ErrorCode; use common_exception::Result; -use common_meta_types::OnErrorMode; -use common_meta_types::StageType; use common_meta_types::UserStageInfo; -use common_planners::CopyMode; -use common_planners::CopyPlan; -use common_planners::CreateUserStagePlan; -use common_planners::ListPlan; use common_planners::ReadDataSourcePlan; -use common_planners::RemoveUserStagePlan; use common_planners::SourceInfo; use common_planners::StageTableInfo; -use crate::interpreters::SelectInterpreterV2; use crate::sql::binder::Binder; use crate::sql::plans::CopyPlanV2; use crate::sql::plans::Plan; diff --git a/query/src/sql/planner/plans/copy_v2.rs b/query/src/sql/planner/plans/copy_v2.rs index 9563b72a009d..6509d77f070d 100644 --- a/query/src/sql/planner/plans/copy_v2.rs +++ b/query/src/sql/planner/plans/copy_v2.rs @@ -20,9 +20,7 @@ use common_datavalues::DataSchemaRef; use common_meta_types::MetaId; use common_meta_types::UserStageInfo; use common_planners::ReadDataSourcePlan; -use common_planners::StageTableInfo; -use crate::interpreters::SelectInterpreterV2; use crate::sql::plans::Plan; #[derive(serde::Serialize, serde::Deserialize, PartialEq, Clone, Debug)] @@ -106,7 +104,7 @@ impl Debug for CopyPlanV2 { stage, path, validation_mode, - from: query, + .. } => { write!(f, "Copy into {:?}", stage)?; write!(f, ", path: {:?}", path)?; From 06bc4c0529c54712ee2f0d7f57f654e6b5ed80db Mon Sep 17 00:00:00 2001 From: Xuanwo Date: Mon, 20 Jun 2022 16:40:43 +0800 Subject: [PATCH 10/40] Make clippy happy Signed-off-by: Xuanwo --- query/src/sql/planner/binder/copy.rs | 18 ++++++++---------- query/src/sql/planner/plans/copy_v2.rs | 2 +- 2 files changed, 9 insertions(+), 11 deletions(-) diff --git a/query/src/sql/planner/binder/copy.rs b/query/src/sql/planner/binder/copy.rs index 7b84344b2808..228a0a92cb8f 100644 --- a/query/src/sql/planner/binder/copy.rs +++ b/query/src/sql/planner/binder/copy.rs @@ -66,11 +66,11 @@ impl<'a> Binder { let catalog_name = catalog .as_ref() .map(|v| v.to_string()) - .unwrap_or(self.ctx.get_current_catalog()); + .unwrap_or_else(|| self.ctx.get_current_catalog()); let database_name = database .as_ref() .map(|v| v.to_string()) - .unwrap_or(self.ctx.get_current_database()); + .unwrap_or_else(|| self.ctx.get_current_database()); let table_name = table.to_string(); let table = self .ctx @@ -104,7 +104,7 @@ impl<'a> Binder { table_name, table_id, schema, - from, + from: Box::new(from), files: stmt.files.clone(), pattern: stmt.pattern.clone(), validation_mode, @@ -117,11 +117,11 @@ impl<'a> Binder { let catalog_name = catalog .as_ref() .map(|v| v.to_string()) - .unwrap_or(self.ctx.get_current_catalog()); + .unwrap_or_else(|| self.ctx.get_current_catalog()); let database_name = database .as_ref() .map(|v| v.to_string()) - .unwrap_or(self.ctx.get_current_database()); + .unwrap_or_else(|| self.ctx.get_current_database()); let table_name = table.to_string(); let subquery = format!( @@ -173,9 +173,7 @@ impl<'a> Binder { from: Box::new(query), }))) } - CopyTarget::Query(_) => { - return Err(ErrorCode::SyntaxException("COPY INTO is invalid")) - } + CopyTarget::Query(_) => Err(ErrorCode::SyntaxException("COPY INTO is invalid")), } } @@ -185,9 +183,9 @@ impl<'a> Binder { location: &str, ) -> Result<(UserStageInfo, String)> { let (mut stage_info, path) = if location.starts_with('@') { - parse_stage_location(&self.ctx, &location).await? + parse_stage_location(&self.ctx, location).await? } else { - parse_uri_location(&location, &BTreeMap::new(), &BTreeMap::new())? + parse_uri_location(location, &BTreeMap::new(), &BTreeMap::new())? }; if !stmt.file_format.is_empty() { diff --git a/query/src/sql/planner/plans/copy_v2.rs b/query/src/sql/planner/plans/copy_v2.rs index 6509d77f070d..ec27d764482e 100644 --- a/query/src/sql/planner/plans/copy_v2.rs +++ b/query/src/sql/planner/plans/copy_v2.rs @@ -66,7 +66,7 @@ pub enum CopyPlanV2 { pattern: String, schema: DataSchemaRef, validation_mode: ValidationMode, - from: ReadDataSourcePlan, + from: Box, }, IntoStage { stage: UserStageInfo, From af4fd8c580b6e8f652b4c02df8b53bc73ad626a2 Mon Sep 17 00:00:00 2001 From: Xuanwo Date: Mon, 20 Jun 2022 17:06:03 +0800 Subject: [PATCH 11/40] Resolve merge conflicts Signed-off-by: Xuanwo --- query/src/interpreters/interpreter_copy_v2.rs | 2 +- query/src/sql/planner/binder/copy.rs | 7 ++----- query/src/sql/planner/binder/mod.rs | 5 +---- 3 files changed, 4 insertions(+), 10 deletions(-) diff --git a/query/src/interpreters/interpreter_copy_v2.rs b/query/src/interpreters/interpreter_copy_v2.rs index d3491c37703b..551d375dedcd 100644 --- a/query/src/interpreters/interpreter_copy_v2.rs +++ b/query/src/interpreters/interpreter_copy_v2.rs @@ -189,7 +189,7 @@ impl CopyInterpreterV2 { metadata, bind_context, } => (s_expr, metadata, bind_context), - v => unreachable!("Input plan must be Query, but it's {}", v.name()), + v => unreachable!("Input plan must be Query, but it's {v}"), }; let select_interpreter = SelectInterpreterV2::try_create( diff --git a/query/src/sql/planner/binder/copy.rs b/query/src/sql/planner/binder/copy.rs index 228a0a92cb8f..8044e1375534 100644 --- a/query/src/sql/planner/binder/copy.rs +++ b/query/src/sql/planner/binder/copy.rs @@ -130,11 +130,8 @@ impl<'a> Binder { ); let tokens = tokenize_sql(&subquery)?; let backtrace = Backtrace::new(); - let stmts = parse_sql(&tokens, &backtrace)?; - if stmts.len() > 1 { - return Err(ErrorCode::UnImplement("unsupported multiple statements")); - } - match &stmts[0] { + let sub_stmt = parse_sql(&tokens, &backtrace)?; + match &sub_stmt { Statement::Query(query) => { self.bind_statement(bind_context, &Statement::Query(query.clone())) .await? diff --git a/query/src/sql/planner/binder/mod.rs b/query/src/sql/planner/binder/mod.rs index 5da904cab780..1aabf2c9d09d 100644 --- a/query/src/sql/planner/binder/mod.rs +++ b/query/src/sql/planner/binder/mod.rs @@ -109,10 +109,7 @@ impl<'a> Binder { self.bind_show_functions(bind_context, limit).await? } - Statement::Copy(stmt) => { - let plan = self.bind_copy(bind_context, stmt).await?; - Ok(plan) - } + Statement::Copy(stmt) => self.bind_copy(bind_context, stmt).await?, Statement::ShowMetrics => Plan::ShowMetrics, Statement::ShowProcessList => Plan::ShowProcessList, From cfb598787a6a088c11379ca9e01a256edb800f7c Mon Sep 17 00:00:00 2001 From: Xuanwo Date: Mon, 20 Jun 2022 17:46:40 +0800 Subject: [PATCH 12/40] Add unit test for ast Signed-off-by: Xuanwo --- common/ast/src/parser/statement.rs | 20 +-- common/ast/tests/it/parser.rs | 36 +++++ common/ast/tests/it/testdata/statement.txt | 180 +++++++++++++++++++++ 3 files changed, 224 insertions(+), 12 deletions(-) diff --git a/common/ast/src/parser/statement.rs b/common/ast/src/parser/statement.rs index 57b68301c143..727d81590847 100644 --- a/common/ast/src/parser/statement.rs +++ b/common/ast/src/parser/statement.rs @@ -993,22 +993,16 @@ pub fn copy_target(i: Input) -> IResult { }, |query| CopyTarget::Query(Box::new(query)), ); - let stage_location = map( + let location = map( rule! { - "@" ~ #literal_string + #literal_string }, - |(_, location)| CopyTarget::Location(format!("@{location}")), - ); - let uri_location = map( - rule! { - #literal_string ~ "://" ~ #literal_string - }, - // TODO(xuanwo): Maybe we can check the proposal during parse? - |(protocol, _, location)| CopyTarget::Location(format!("{protocol}://{location}")), + // TODO(xuanwo): Maybe we can check the protocol during parse? + |location| CopyTarget::Location(location), ); rule!( - #table | #query | #stage_location | #uri_location + #table | #query | #location )(i) } @@ -1129,6 +1123,8 @@ pub fn options(i: Input) -> IResult> { })(i) }; + let u64_to_string = |i| map_res(literal_u64, |v| Ok(v.to_string()))(i); + let ident_with_format = alt(( ident_to_string, map(rule! { FORMAT }, |_| "FORMAT".to_string()), @@ -1136,7 +1132,7 @@ pub fn options(i: Input) -> IResult> { map( rule! { - "(" ~ ( #ident_with_format ~ "=" ~ (#ident_to_string | #literal_string) )* ~ ")" + "(" ~ ( #ident_with_format ~ "=" ~ (#ident_to_string | #u64_to_string | #literal_string) )* ~ ")" }, |(_, opts, _)| BTreeMap::from_iter(opts.iter().map(|(k, _, v)| (k.clone(), v.clone()))), )(i) diff --git a/common/ast/tests/it/parser.rs b/common/ast/tests/it/parser.rs index 67cd3b466bcb..a512f2e1339f 100644 --- a/common/ast/tests/it/parser.rs +++ b/common/ast/tests/it/parser.rs @@ -145,6 +145,42 @@ fn test_statement() { r#"SHOW GRANTS FOR 'test-grant'@'localhost';"#, r#"SHOW GRANTS FOR USER 'test-grant'@'localhost';"#, r#"SHOW GRANTS FOR ROLE 'role1';"#, + r#"COPY INTO mytable + FROM 's3://mybucket/data.csv' + FILE_FORMAT = ( + type = 'CSV' + field_delimiter = ',' + record_delimiter = '\n' + skip_header = 1 + ) + size_limit=10;"#, + r#"COPY INTO mytable + FROM @my_stage + FILE_FORMAT = ( + type = 'CSV' + field_delimiter = ',' + record_delimiter = '\n' + skip_header = 1 + ) + size_limit=10;"#, + r#"COPY INTO 's3://mybucket/data.csv' + FROM mytable + FILE_FORMAT = ( + type = 'CSV' + field_delimiter = ',' + record_delimiter = '\n' + skip_header = 1 + ) + size_limit=10;"#, + r#"COPY INTO @my_stage + FROM mytable + FILE_FORMAT = ( + type = 'CSV' + field_delimiter = ',' + record_delimiter = '\n' + skip_header = 1 + ) + size_limit=10;"#, ]; for case in cases { diff --git a/common/ast/tests/it/testdata/statement.txt b/common/ast/tests/it/testdata/statement.txt index c1b29993caef..f0ee9de517bc 100644 --- a/common/ast/tests/it/testdata/statement.txt +++ b/common/ast/tests/it/testdata/statement.txt @@ -3779,3 +3779,183 @@ ShowGrants { } +---------- Input ---------- +COPY INTO mytable + FROM 's3://mybucket/data.csv' + FILE_FORMAT = ( + type = 'CSV' + field_delimiter = ',' + record_delimiter = '\n' + skip_header = 1 + ) + size_limit=10; +---------- Output --------- +COPY INTO mytable FROM s3://mybucket/data.csv FILE_FORMAT = ( field_delimiter = ',' record_delimiter = ' +' skip_header = '1' type = 'CSV' ) SIZE_LIMIT = 10 +---------- AST ------------ +Copy( + CopyStmt { + src: Location( + "s3://mybucket/data.csv", + ), + dst: Table( + None, + None, + Identifier { + name: "mytable", + quote: None, + span: Ident(10..17), + }, + ), + files: [], + pattern: "", + file_format: { + "field_delimiter": ",", + "record_delimiter": "\n", + "skip_header": "1", + "type": "CSV", + }, + validation_mode: "", + size_limit: 10, + }, +) + + +---------- Input ---------- +COPY INTO mytable + FROM @my_stage + FILE_FORMAT = ( + type = 'CSV' + field_delimiter = ',' + record_delimiter = '\n' + skip_header = 1 + ) + size_limit=10; +---------- Output --------- +COPY INTO mytable FROM @my_stage FILE_FORMAT = ( field_delimiter = ',' record_delimiter = ' +' skip_header = '1' type = 'CSV' ) SIZE_LIMIT = 10 +---------- AST ------------ +Copy( + CopyStmt { + src: Table( + None, + None, + Identifier { + name: "@my_stage", + quote: None, + span: AtString(39..48), + }, + ), + dst: Table( + None, + None, + Identifier { + name: "mytable", + quote: None, + span: Ident(10..17), + }, + ), + files: [], + pattern: "", + file_format: { + "field_delimiter": ",", + "record_delimiter": "\n", + "skip_header": "1", + "type": "CSV", + }, + validation_mode: "", + size_limit: 10, + }, +) + + +---------- Input ---------- +COPY INTO 's3://mybucket/data.csv' + FROM mytable + FILE_FORMAT = ( + type = 'CSV' + field_delimiter = ',' + record_delimiter = '\n' + skip_header = 1 + ) + size_limit=10; +---------- Output --------- +COPY INTO s3://mybucket/data.csv FROM mytable FILE_FORMAT = ( field_delimiter = ',' record_delimiter = ' +' skip_header = '1' type = 'CSV' ) SIZE_LIMIT = 10 +---------- AST ------------ +Copy( + CopyStmt { + src: Table( + None, + None, + Identifier { + name: "mytable", + quote: None, + span: Ident(56..63), + }, + ), + dst: Location( + "s3://mybucket/data.csv", + ), + files: [], + pattern: "", + file_format: { + "field_delimiter": ",", + "record_delimiter": "\n", + "skip_header": "1", + "type": "CSV", + }, + validation_mode: "", + size_limit: 10, + }, +) + + +---------- Input ---------- +COPY INTO @my_stage + FROM mytable + FILE_FORMAT = ( + type = 'CSV' + field_delimiter = ',' + record_delimiter = '\n' + skip_header = 1 + ) + size_limit=10; +---------- Output --------- +COPY INTO @my_stage FROM mytable FILE_FORMAT = ( field_delimiter = ',' record_delimiter = ' +' skip_header = '1' type = 'CSV' ) SIZE_LIMIT = 10 +---------- AST ------------ +Copy( + CopyStmt { + src: Table( + None, + None, + Identifier { + name: "mytable", + quote: None, + span: Ident(41..48), + }, + ), + dst: Table( + None, + None, + Identifier { + name: "@my_stage", + quote: None, + span: AtString(10..19), + }, + ), + files: [], + pattern: "", + file_format: { + "field_delimiter": ",", + "record_delimiter": "\n", + "skip_header": "1", + "type": "CSV", + }, + validation_mode: "", + size_limit: 10, + }, +) + + From 7d5117a4e57b050daed0e2ab3d6ae7daed42c881 Mon Sep 17 00:00:00 2001 From: Xuanwo Date: Mon, 20 Jun 2022 17:49:10 +0800 Subject: [PATCH 13/40] Move UserStageInfo into box Signed-off-by: Xuanwo --- query/src/sql/planner/binder/copy.rs | 2 +- query/src/sql/planner/plans/copy_v2.rs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/query/src/sql/planner/binder/copy.rs b/query/src/sql/planner/binder/copy.rs index 8044e1375534..aee5076bea54 100644 --- a/query/src/sql/planner/binder/copy.rs +++ b/query/src/sql/planner/binder/copy.rs @@ -164,7 +164,7 @@ impl<'a> Binder { .map_err(ErrorCode::SyntaxException)?; Ok(Plan::Copy(Box::new(CopyPlanV2::IntoStage { - stage: stage_info, + stage: Box::new(stage_info), path, validation_mode, from: Box::new(query), diff --git a/query/src/sql/planner/plans/copy_v2.rs b/query/src/sql/planner/plans/copy_v2.rs index ec27d764482e..45255e15855a 100644 --- a/query/src/sql/planner/plans/copy_v2.rs +++ b/query/src/sql/planner/plans/copy_v2.rs @@ -69,7 +69,7 @@ pub enum CopyPlanV2 { from: Box, }, IntoStage { - stage: UserStageInfo, + stage: Box, path: String, validation_mode: ValidationMode, from: Box, From 56b4344400ac922adb04ca756c03ea01709d362d Mon Sep 17 00:00:00 2001 From: Xuanwo Date: Mon, 20 Jun 2022 22:06:46 +0800 Subject: [PATCH 14/40] Make clippy happy Signed-off-by: Xuanwo --- common/ast/src/parser/statement.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/common/ast/src/parser/statement.rs b/common/ast/src/parser/statement.rs index 727d81590847..256f254e0e6d 100644 --- a/common/ast/src/parser/statement.rs +++ b/common/ast/src/parser/statement.rs @@ -998,7 +998,7 @@ pub fn copy_target(i: Input) -> IResult { #literal_string }, // TODO(xuanwo): Maybe we can check the protocol during parse? - |location| CopyTarget::Location(location), + CopyTarget::Location, ); rule!( From 2f142df97532cbda5a924bc733d29d29efc31336 Mon Sep 17 00:00:00 2001 From: Xuanwo Date: Tue, 21 Jun 2022 11:25:22 +0800 Subject: [PATCH 15/40] Extend support for CopyTarget Signed-off-by: Xuanwo --- Cargo.lock | 1 + common/ast/Cargo.toml | 1 + common/ast/src/ast/mod.rs | 14 +++ common/ast/src/ast/statement.rs | 50 ++++++++-- common/ast/src/parser/statement.rs | 101 +++++++++++++++++---- common/ast/tests/it/testdata/statement.txt | 50 +++++----- query/src/sql/planner/binder/copy.rs | 6 +- 7 files changed, 167 insertions(+), 56 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 45f48e2d8b8f..d7ce91f30532 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1008,6 +1008,7 @@ dependencies = [ "serde", "sqlparser", "thiserror", + "url", ] [[package]] diff --git a/common/ast/Cargo.toml b/common/ast/Cargo.toml index 94c8db072e78..51be25927803 100644 --- a/common/ast/Cargo.toml +++ b/common/ast/Cargo.toml @@ -33,6 +33,7 @@ nom-rule = "0.3.0" pratt = "0.3.0" serde = { version = "1.0.136", features = ["derive"] } thiserror = "1.0.30" +url = "2.2.2" [dev-dependencies] common-base = { path = "../base" } diff --git a/common/ast/src/ast/mod.rs b/common/ast/src/ast/mod.rs index b734bc4292dd..fd4436cb69ef 100644 --- a/common/ast/src/ast/mod.rs +++ b/common/ast/src/ast/mod.rs @@ -98,3 +98,17 @@ fn write_space_seperated_list( } Ok(()) } + +/// Write input map items into `field_a=x field_b=y` +fn write_space_seperated_map( + f: &mut Formatter<'_>, + items: impl IntoIterator, +) -> std::fmt::Result { + for (i, (k, v)) in items.into_iter().enumerate() { + if i > 0 { + write!(f, " ")?; + } + write!(f, "{k}='{v}'")?; + } + Ok(()) +} diff --git a/common/ast/src/ast/statement.rs b/common/ast/src/ast/statement.rs index ab8b2b5cb26e..663ea1cc567e 100644 --- a/common/ast/src/ast/statement.rs +++ b/common/ast/src/ast/statement.rs @@ -32,6 +32,7 @@ use crate::ast::expr::TypeName; use crate::ast::write_comma_separated_list; use crate::ast::write_period_separated_list; use crate::ast::write_quoted_comma_separated_list; +use crate::ast::write_space_seperated_map; use crate::ast::Identifier; use crate::ast::Query; use crate::parser::token::Token; @@ -383,16 +384,31 @@ pub enum CopyTarget<'a> { Option>, Identifier<'a>, ), - /// Location can be used in `INTO` or `FROM`. + /// StageLocation (a.k.a internal and external stage) can be used + /// in `INTO` or `FROM`. /// - /// Location could be + /// For examples: /// /// - internal stage: `@internal_stage/path/to/dir/` /// - external stage: `@s3_external_stage/path/to/dir/` - /// - external location: `s3://bucket/path/to/dir/` + StageLocation { + /// The name of the stage. + name: String, + path: String, + }, + /// UriLocation (a.k.a external location) can be used in `INTO` or `FROM`. + /// + /// For examples: `'s3://example/path/to/dir' CREDENTIALS = (AWS_ACCESS_ID="admin" AWS_SECRET_KEY="admin")` /// - /// We only parse them into `String` and leave the location parser in further. - Location(String), + /// TODO(xuanwo): Add endpoint_url support. + /// TODO(xuanwo): We can check if we support this protocol during parsing. + UriLocation { + protocol: String, + name: String, + path: String, + credentials: BTreeMap, + encryption: BTreeMap, + }, /// Query can only be used as `FROM`. /// /// For example:`(SELECT field_a,field_b FROM table)` @@ -716,8 +732,28 @@ impl Display for CopyTarget<'_> { write!(f, "{table}") } } - CopyTarget::Location(location) => { - write!(f, "{location}") + CopyTarget::StageLocation { name, path } => { + write!(f, "@{name}{path}") + } + CopyTarget::UriLocation { + protocol, + name, + path, + credentials, + encryption, + } => { + write!(f, "{protocol}://{name}{path}")?; + if !credentials.is_empty() { + write!(f, " CREDENTIALS = ( ")?; + write_space_seperated_map(f, credentials)?; + write!(f, " )")?; + } + if !encryption.is_empty() { + write!(f, " ENCRYPTION = ( ")?; + write_space_seperated_map(f, encryption)?; + write!(f, " )")?; + } + Ok(()) } CopyTarget::Query(query) => { write!(f, "({query})") diff --git a/common/ast/src/parser/statement.rs b/common/ast/src/parser/statement.rs index 7054dc59bebe..2bbc456bb715 100644 --- a/common/ast/src/parser/statement.rs +++ b/common/ast/src/parser/statement.rs @@ -22,6 +22,7 @@ use nom::branch::alt; use nom::combinator::map; use nom::combinator::value; use nom::Slice; +use url::Url; use super::error::ErrorKind; use crate::ast::*; @@ -995,29 +996,89 @@ pub fn kill_target(i: Input) -> IResult { } /// Parse input into `CopyTarget` +/// +/// # Notes +/// +/// It's required to parse stage location first. Or stage could be parsed as table. pub fn copy_target(i: Input) -> IResult { - let table = map( - rule! { - #peroid_separated_idents_1_to_3 - }, - |(catalog, database, table)| CopyTarget::Table(catalog, database, table), - ); - let query = map( - rule! { - #parenthesized_query - }, - |query| CopyTarget::Query(Box::new(query)), - ); - let location = map( - rule! { - #literal_string - }, - // TODO(xuanwo): Maybe we can check the protocol during parse? - CopyTarget::Location, - ); + // Parse input like `@my_stage/path/to/dir` + let stage_location = |i| { + map_res( + rule! { + #at_string + }, + |location| { + let parsed = location.splitn(2, '/').collect::>(); + if parsed.len() == 1 { + Ok(CopyTarget::StageLocation { + name: parsed[0].to_string(), + path: "/".to_string(), + }) + } else { + Ok(CopyTarget::StageLocation { + name: parsed[0].to_string(), + path: format!("/{}", parsed[1]), + }) + } + }, + )(i) + }; + + // Parse input like `mytable` + let table = |i| { + map_res( + rule! { + #peroid_separated_idents_1_to_3 + }, + |(catalog, database, table)| Ok(CopyTarget::Table(catalog, database, table)), + )(i) + }; + + // Parse input like `( SELECT * from mytable )` + let query = |i| { + map_res( + rule! { + #parenthesized_query + }, + |query| Ok(CopyTarget::Query(Box::new(query))), + )(i) + }; + + // Parse input like `'s3://example/path/to/dir' CREDENTIALS = (AWS_ACCESS_ID="admin" AWS_SECRET_KEY="admin")` + let uri_location = |i| { + map_res( + rule! { + #literal_string + ~ (CREDENTIALS ~ "=" ~ #options)? + ~ (ENCRYPTION ~ "=" ~ #options)? + }, + |(location, credentials_opt, encryption_opt)| { + let parsed = Url::parse(&location) + .map_err(|_| ErrorKind::Other("Unexpected invalid url"))?; + + Ok(CopyTarget::UriLocation { + protocol: parsed.scheme().to_string(), + name: parsed + .host_str() + .ok_or(ErrorKind::Other("Unexpected invalid url for name missing"))? + .to_string(), + path: if parsed.path().is_empty() { + "/".to_string() + } else { + parsed.path().to_string() + }, + credentials: credentials_opt.map(|v| v.2).unwrap_or_default(), + encryption: encryption_opt.map(|v| v.2).unwrap_or_default(), + }) + }, + )(i) + }; rule!( - #table | #query | #location + #stage_location: "@ { }" + | #uri_location: "':// {} { CREDENTIALS = ({ AWS_ACCESS_KEY = 'aws_access_key' }) } '" + | #table: "{ { . } . }
" + | #query: "( )" )(i) } diff --git a/common/ast/tests/it/testdata/statement.txt b/common/ast/tests/it/testdata/statement.txt index c9d9174a9e15..257bd4aff5a6 100644 --- a/common/ast/tests/it/testdata/statement.txt +++ b/common/ast/tests/it/testdata/statement.txt @@ -3915,9 +3915,13 @@ COPY INTO mytable FROM s3://mybucket/data.csv FILE_FORMAT = ( field_delimiter = ---------- AST ------------ Copy( CopyStmt { - src: Location( - "s3://mybucket/data.csv", - ), + src: UriLocation { + protocol: "s3", + name: "mybucket", + path: "/data.csv", + credentials: {}, + encryption: {}, + }, dst: Table( None, None, @@ -3952,20 +3956,15 @@ COPY INTO mytable ) size_limit=10; ---------- Output --------- -COPY INTO mytable FROM @my_stage FILE_FORMAT = ( field_delimiter = ',' record_delimiter = ' +COPY INTO mytable FROM @my_stage/ FILE_FORMAT = ( field_delimiter = ',' record_delimiter = ' ' skip_header = '1' type = 'CSV' ) SIZE_LIMIT = 10 ---------- AST ------------ Copy( CopyStmt { - src: Table( - None, - None, - Identifier { - name: "@my_stage", - quote: None, - span: AtString(39..48), - }, - ), + src: StageLocation { + name: "my_stage", + path: "/", + }, dst: Table( None, None, @@ -4014,9 +4013,13 @@ Copy( span: Ident(56..63), }, ), - dst: Location( - "s3://mybucket/data.csv", - ), + dst: UriLocation { + protocol: "s3", + name: "mybucket", + path: "/data.csv", + credentials: {}, + encryption: {}, + }, files: [], pattern: "", file_format: { @@ -4042,7 +4045,7 @@ COPY INTO @my_stage ) size_limit=10; ---------- Output --------- -COPY INTO @my_stage FROM mytable FILE_FORMAT = ( field_delimiter = ',' record_delimiter = ' +COPY INTO @my_stage/ FROM mytable FILE_FORMAT = ( field_delimiter = ',' record_delimiter = ' ' skip_header = '1' type = 'CSV' ) SIZE_LIMIT = 10 ---------- AST ------------ Copy( @@ -4056,15 +4059,10 @@ Copy( span: Ident(41..48), }, ), - dst: Table( - None, - None, - Identifier { - name: "@my_stage", - quote: None, - span: AtString(10..19), - }, - ), + dst: StageLocation { + name: "my_stage", + path: "/", + }, files: [], pattern: "", file_format: { diff --git a/query/src/sql/planner/binder/copy.rs b/query/src/sql/planner/binder/copy.rs index aee5076bea54..88c9709403f6 100644 --- a/query/src/sql/planner/binder/copy.rs +++ b/query/src/sql/planner/binder/copy.rs @@ -57,7 +57,7 @@ impl<'a> Binder { )) } // TODO(xuanwo): we need to parse credential and encryption. - CopyTarget::Location(location) => self.bind_stage(stmt, location).await?, + CopyTarget::UriLocation(location) => self.bind_stage(stmt, location).await?, }; let validation_mode = ValidationMode::from_str(stmt.validation_mode.as_str()) @@ -110,7 +110,7 @@ impl<'a> Binder { validation_mode, }))) } - CopyTarget::Location(location) => { + CopyTarget::UriLocation(location) => { let (stage_info, path) = self.bind_stage(stmt, location).await?; let query = match &stmt.src { CopyTarget::Table(catalog, database, table) => { @@ -147,7 +147,7 @@ impl<'a> Binder { self.bind_statement(bind_context, &Statement::Query(query.clone())) .await? } - CopyTarget::Location(_) => { + CopyTarget::UriLocation(_) => { return Err(ErrorCode::SyntaxException( "COPY INTO FROM is invalid", )) From 397722eaa5190734ba20c66a56c3f8d1ced6fffb Mon Sep 17 00:00:00 2001 From: Xuanwo Date: Tue, 21 Jun 2022 11:36:05 +0800 Subject: [PATCH 16/40] Add test for new parser Signed-off-by: Xuanwo --- common/ast/src/ast/statement.rs | 3 +- common/ast/tests/it/parser.rs | 18 ++++++ .../ast/tests/it/testdata/statement-error.txt | 20 ++++++ common/ast/tests/it/testdata/statement.txt | 62 ++++++++++++++++++- 4 files changed, 100 insertions(+), 3 deletions(-) diff --git a/common/ast/src/ast/statement.rs b/common/ast/src/ast/statement.rs index 663ea1cc567e..02a7c9967be9 100644 --- a/common/ast/src/ast/statement.rs +++ b/common/ast/src/ast/statement.rs @@ -402,6 +402,7 @@ pub enum CopyTarget<'a> { /// /// TODO(xuanwo): Add endpoint_url support. /// TODO(xuanwo): We can check if we support this protocol during parsing. + /// TODO(xuanwo): Maybe we can introduce more strict (friendly) report for credentials and encryption, like parsed into StorageConfig? UriLocation { protocol: String, name: String, @@ -742,7 +743,7 @@ impl Display for CopyTarget<'_> { credentials, encryption, } => { - write!(f, "{protocol}://{name}{path}")?; + write!(f, "'{protocol}://{name}{path}'")?; if !credentials.is_empty() { write!(f, " CREDENTIALS = ( ")?; write_space_seperated_map(f, credentials)?; diff --git a/common/ast/tests/it/parser.rs b/common/ast/tests/it/parser.rs index 9be813957b4a..40e2fbb2557a 100644 --- a/common/ast/tests/it/parser.rs +++ b/common/ast/tests/it/parser.rs @@ -186,6 +186,22 @@ fn test_statement() { skip_header = 1 ) size_limit=10;"#, + r#"COPY INTO mytable + FROM 's3://mybucket/data.csv' + CREDENTIALS = ( + AWS_KEY_ID = 'access_key' + AWS_SECRET_KEY = 'secret_key' + ) + ENCRYPTION = ( + MASTER_KEY = 'master_key' + ) + FILE_FORMAT = ( + type = 'CSV' + field_delimiter = ',' + record_delimiter = '\n' + skip_header = 1 + ) + size_limit=10;"#, ]; for case in cases { @@ -230,6 +246,8 @@ fn test_statement_error() { r#"SHOW GRANT FOR ROLE role1;"#, r#"REVOKE SELECT, CREATE, ALL PRIVILEGES ON * FROM 'test-grant'@'localhost';"#, r#"REVOKE SELECT, CREATE ON * TO 'test-grant'@'localhost';"#, + r#"COPY INTO mytable FROM 's3://bucket' CREDENTIAL = ();"#, + r#"COPY INTO mytable FROM @mystage CREDENTIALS = ();"#, ]; for case in cases { diff --git a/common/ast/tests/it/testdata/statement-error.txt b/common/ast/tests/it/testdata/statement-error.txt index 1da6fac1e561..d432ea55163d 100644 --- a/common/ast/tests/it/testdata/statement-error.txt +++ b/common/ast/tests/it/testdata/statement-error.txt @@ -250,3 +250,23 @@ error: | while parsing `REVOKE { ROLE | schemaObjectPrivileges | ALL [ PRIVILEGES ] ON } FROM { [ROLE ] | [USER] }` +---------- Input ---------- +COPY INTO mytable FROM 's3://bucket' CREDENTIAL = (); +---------- Output --------- +error: + --> SQL:1:38 + | +1 | COPY INTO mytable FROM 's3://bucket' CREDENTIAL = (); + | ^^^^^^^^^^ expected `CREDENTIALS`, `ENCRYPTION`, `FILES`, `PATTERN`, `FILE_FORMAT`, `VALIDATION_MODE`, or 2 more ... + + +---------- Input ---------- +COPY INTO mytable FROM @mystage CREDENTIALS = (); +---------- Output --------- +error: + --> SQL:1:33 + | +1 | COPY INTO mytable FROM @mystage CREDENTIALS = (); + | ^^^^^^^^^^^ expected `FILES`, `PATTERN`, `FILE_FORMAT`, `VALIDATION_MODE`, `SIZE_LIMIT`, or `;` + + diff --git a/common/ast/tests/it/testdata/statement.txt b/common/ast/tests/it/testdata/statement.txt index 257bd4aff5a6..f41bd298f25f 100644 --- a/common/ast/tests/it/testdata/statement.txt +++ b/common/ast/tests/it/testdata/statement.txt @@ -3910,7 +3910,7 @@ COPY INTO mytable ) size_limit=10; ---------- Output --------- -COPY INTO mytable FROM s3://mybucket/data.csv FILE_FORMAT = ( field_delimiter = ',' record_delimiter = ' +COPY INTO mytable FROM 's3://mybucket/data.csv' FILE_FORMAT = ( field_delimiter = ',' record_delimiter = ' ' skip_header = '1' type = 'CSV' ) SIZE_LIMIT = 10 ---------- AST ------------ Copy( @@ -3999,7 +3999,7 @@ COPY INTO 's3://mybucket/data.csv' ) size_limit=10; ---------- Output --------- -COPY INTO s3://mybucket/data.csv FROM mytable FILE_FORMAT = ( field_delimiter = ',' record_delimiter = ' +COPY INTO 's3://mybucket/data.csv' FROM mytable FILE_FORMAT = ( field_delimiter = ',' record_delimiter = ' ' skip_header = '1' type = 'CSV' ) SIZE_LIMIT = 10 ---------- AST ------------ Copy( @@ -4077,3 +4077,61 @@ Copy( ) +---------- Input ---------- +COPY INTO mytable + FROM 's3://mybucket/data.csv' + CREDENTIALS = ( + AWS_KEY_ID = 'access_key' + AWS_SECRET_KEY = 'secret_key' + ) + ENCRYPTION = ( + MASTER_KEY = 'master_key' + ) + FILE_FORMAT = ( + type = 'CSV' + field_delimiter = ',' + record_delimiter = '\n' + skip_header = 1 + ) + size_limit=10; +---------- Output --------- +COPY INTO mytable FROM 's3://mybucket/data.csv' CREDENTIALS = ( AWS_KEY_ID='access_key' AWS_SECRET_KEY='secret_key' ) ENCRYPTION = ( MASTER_KEY='master_key' ) FILE_FORMAT = ( field_delimiter = ',' record_delimiter = ' +' skip_header = '1' type = 'CSV' ) SIZE_LIMIT = 10 +---------- AST ------------ +Copy( + CopyStmt { + src: UriLocation { + protocol: "s3", + name: "mybucket", + path: "/data.csv", + credentials: { + "AWS_KEY_ID": "access_key", + "AWS_SECRET_KEY": "secret_key", + }, + encryption: { + "MASTER_KEY": "master_key", + }, + }, + dst: Table( + None, + None, + Identifier { + name: "mytable", + quote: None, + span: Ident(10..17), + }, + ), + files: [], + pattern: "", + file_format: { + "field_delimiter": ",", + "record_delimiter": "\n", + "skip_header": "1", + "type": "CSV", + }, + validation_mode: "", + size_limit: 10, + }, +) + + From 6d597537076e8acc795c1c378c9ae19bfa2995da Mon Sep 17 00:00:00 2001 From: Xuanwo Date: Tue, 21 Jun 2022 13:12:52 +0800 Subject: [PATCH 17/40] Refactor binder Signed-off-by: Xuanwo --- common/ast/src/ast/statement.rs | 12 + query/src/sql/planner/binder/copy.rs | 540 ++++++++++++++----- query/src/sql/statements/statement_common.rs | 77 ++- 3 files changed, 490 insertions(+), 139 deletions(-) diff --git a/common/ast/src/ast/statement.rs b/common/ast/src/ast/statement.rs index 02a7c9967be9..416822e18419 100644 --- a/common/ast/src/ast/statement.rs +++ b/common/ast/src/ast/statement.rs @@ -185,6 +185,7 @@ pub struct CopyStmt<'a> { pub files: Vec, pub pattern: String, pub file_format: BTreeMap, + /// TODO(xuanwo): parse into validation_mode directly. pub validation_mode: String, pub size_limit: usize, } @@ -416,6 +417,17 @@ pub enum CopyTarget<'a> { Query(Box>), } +impl CopyTarget<'_> { + pub fn target(&self) -> &str { + match self { + CopyTarget::Table(_, _, _) => "Table", + CopyTarget::StageLocation { .. } => "StageLocation", + CopyTarget::UriLocation { .. } => "UriLocation", + CopyTarget::Query(_) => "Query", + } + } +} + #[derive(Debug, Clone, PartialEq)] pub struct CreateViewStmt<'a> { pub if_not_exists: bool, diff --git a/query/src/sql/planner/binder/copy.rs b/query/src/sql/planner/binder/copy.rs index 88c9709403f6..0f1f59be51bf 100644 --- a/query/src/sql/planner/binder/copy.rs +++ b/query/src/sql/planner/binder/copy.rs @@ -17,6 +17,7 @@ use std::str::FromStr; use common_ast::ast::CopyStmt; use common_ast::ast::CopyTarget; +use common_ast::ast::Query; use common_ast::ast::Statement; use common_ast::parser::error::Backtrace; use common_ast::parser::parse_sql; @@ -34,7 +35,9 @@ use crate::sql::plans::Plan; use crate::sql::plans::ValidationMode; use crate::sql::statements::parse_copy_file_format_options; use crate::sql::statements::parse_stage_location; +use crate::sql::statements::parse_stage_location_v2; use crate::sql::statements::parse_uri_location; +use crate::sql::statements::parse_uri_location_v2; use crate::sql::BindContext; impl<'a> Binder { @@ -43,150 +46,433 @@ impl<'a> Binder { bind_context: &BindContext, stmt: &CopyStmt<'a>, ) -> Result { - match &stmt.dst { - CopyTarget::Table(catalog, database, table) => { - let (stage_info, path) = match &stmt.src { - CopyTarget::Table(_, _, _) => { - return Err(ErrorCode::SyntaxException( - "COPY INTO
FROM
is invalid", - )) - } - CopyTarget::Query(_) => { - return Err(ErrorCode::SyntaxException( - "COPY INTO
FROM is invalid", - )) - } - // TODO(xuanwo): we need to parse credential and encryption. - CopyTarget::UriLocation(location) => self.bind_stage(stmt, location).await?, - }; - - let validation_mode = ValidationMode::from_str(stmt.validation_mode.as_str()) - .map_err(ErrorCode::SyntaxException)?; + match (&stmt.src, &stmt.dst) { + ( + &CopyTarget::StageLocation { name, path }, + &CopyTarget::Table(catalog, database, table), + ) => { + let catalog_name = catalog + .as_ref() + .map(|v| v.to_string()) + .unwrap_or_else(|| self.ctx.get_current_catalog()); + let database_name = catalog + .as_ref() + .map(|v| v.to_string()) + .unwrap_or_else(|| self.ctx.get_current_database()); + let table = table.to_string(); + + self.bind_copy_from_stage_into_table( + bind_context, + stmt, + &name, + &path, + &catalog_name, + &database_name, + &table, + ) + .await + } + ( + &CopyTarget::UriLocation { + protocol, + name, + path, + credentials, + encryption, + }, + &CopyTarget::Table(catalog, database, table), + ) => { + let catalog_name = catalog + .as_ref() + .map(|v| v.to_string()) + .unwrap_or_else(|| self.ctx.get_current_catalog()); + let database_name = catalog + .as_ref() + .map(|v| v.to_string()) + .unwrap_or_else(|| self.ctx.get_current_database()); + let table = table.to_string(); + self.bind_copy_from_uri_into_table( + bind_context, + stmt, + &protocol, + &name, + &path, + &credentials, + &encryption, + &catalog_name, + &database_name, + &table, + ) + .await + } + ( + &CopyTarget::Table(catalog, database, table), + &CopyTarget::StageLocation { name, path }, + ) => { let catalog_name = catalog .as_ref() .map(|v| v.to_string()) .unwrap_or_else(|| self.ctx.get_current_catalog()); - let database_name = database + let database_name = catalog .as_ref() .map(|v| v.to_string()) .unwrap_or_else(|| self.ctx.get_current_database()); - let table_name = table.to_string(); - let table = self - .ctx - .get_table(&catalog_name, &database_name, &table_name) - .await?; - let schema = table.schema(); - let table_id = table.get_id(); - - // TODO(xuanwo): we need to support columns in COPY. - - // Read Source plan. - let from = ReadDataSourcePlan { - catalog: catalog_name.clone(), - source_info: SourceInfo::StageSource(StageTableInfo { - schema: schema.clone(), - stage_info, - path, - files: vec![], - }), - scan_fields: None, - parts: vec![], - statistics: Default::default(), - description: "".to_string(), - tbl_args: None, - push_downs: None, - }; - - Ok(Plan::Copy(Box::new(CopyPlanV2::IntoTable { - catalog_name, - database_name, - table_name, - table_id, - schema, - from: Box::new(from), - files: stmt.files.clone(), - pattern: stmt.pattern.clone(), - validation_mode, - }))) + let table = table.to_string(); + + self.bind_copy_from_table_into_stage( + bind_context, + stmt, + &catalog_name, + &database_name, + &table, + &name, + &path, + ) + .await } - CopyTarget::UriLocation(location) => { - let (stage_info, path) = self.bind_stage(stmt, location).await?; - let query = match &stmt.src { - CopyTarget::Table(catalog, database, table) => { - let catalog_name = catalog - .as_ref() - .map(|v| v.to_string()) - .unwrap_or_else(|| self.ctx.get_current_catalog()); - let database_name = database - .as_ref() - .map(|v| v.to_string()) - .unwrap_or_else(|| self.ctx.get_current_database()); - let table_name = table.to_string(); - - let subquery = format!( - "SELECT * FROM {}.{}.{}", - catalog_name, database_name, table_name - ); - let tokens = tokenize_sql(&subquery)?; - let backtrace = Backtrace::new(); - let sub_stmt = parse_sql(&tokens, &backtrace)?; - match &sub_stmt { - Statement::Query(query) => { - self.bind_statement(bind_context, &Statement::Query(query.clone())) - .await? - } - _ => { - return Err(ErrorCode::SyntaxException( - "COPY INTO FROM is invalid", - )) - } - } - } - CopyTarget::Query(query) => { - self.bind_statement(bind_context, &Statement::Query(query.clone())) - .await? - } - CopyTarget::UriLocation(_) => { - return Err(ErrorCode::SyntaxException( - "COPY INTO FROM is invalid", - )) - } - }; - - debug_assert!( - matches!(query, Plan::Query { .. }), - "input sql must be Plan::Query, but it's not" - ); - - // Validation mode. - let validation_mode = ValidationMode::from_str(stmt.validation_mode.as_str()) - .map_err(ErrorCode::SyntaxException)?; - - Ok(Plan::Copy(Box::new(CopyPlanV2::IntoStage { - stage: Box::new(stage_info), + ( + &CopyTarget::Table(catalog, database, table), + &CopyTarget::UriLocation { + protocol, + name, path, - validation_mode, - from: Box::new(query), - }))) + credentials, + encryption, + }, + ) => { + let catalog_name = catalog + .as_ref() + .map(|v| v.to_string()) + .unwrap_or_else(|| self.ctx.get_current_catalog()); + let database_name = catalog + .as_ref() + .map(|v| v.to_string()) + .unwrap_or_else(|| self.ctx.get_current_database()); + let table = table.to_string(); + + self.bind_copy_from_table_into_uri( + bind_context, + stmt, + &catalog_name, + &database_name, + &table, + &protocol, + &name, + &path, + &credentials, + &encryption, + ) + .await + } + (&CopyTarget::Query(query), &CopyTarget::StageLocation { name, path }) => { + self.bind_copy_from_query_into_stage(bind_context, stmt, &query, &name, &path) + .await + } + ( + &CopyTarget::Query(query), + &CopyTarget::UriLocation { + protocol, + name, + path, + credentials, + encryption, + }, + ) => { + self.bind_copy_from_query_into_uri( + bind_context, + stmt, + &query, + &protocol, + &name, + &path, + &credentials, + &encryption, + ) + .await } - CopyTarget::Query(_) => Err(ErrorCode::SyntaxException("COPY INTO is invalid")), + (src, dst) => Err(ErrorCode::SyntaxException(format!( + "COPY INTO <{}> FROM <{}> is invalid", + dst.target(), + src.target() + ))), } } - async fn bind_stage( + /// Bind COPY INFO
FROM + async fn bind_copy_from_stage_into_table( &mut self, + bind_context: &BindContext, stmt: &CopyStmt<'a>, - location: &str, - ) -> Result<(UserStageInfo, String)> { - let (mut stage_info, path) = if location.starts_with('@') { - parse_stage_location(&self.ctx, location).await? - } else { - parse_uri_location(location, &BTreeMap::new(), &BTreeMap::new())? + src_stage: &str, + src_path: &str, + dst_catalog_name: &str, + dst_database_name: &str, + dst_table_name: &str, + ) -> Result { + let validation_mode = ValidationMode::from_str(stmt.validation_mode.as_str()) + .map_err(ErrorCode::SyntaxException)?; + + let table = self + .ctx + .get_table(dst_catalog_name, dst_database_name, dst_table_name) + .await?; + + let (mut stage_info, path) = + parse_stage_location_v2(&self.ctx, src_stage, src_path).await?; + self.apply_stage_options(stmt, &mut stage_info); + + let from = ReadDataSourcePlan { + catalog: dst_catalog_name.to_string(), + source_info: SourceInfo::StageSource(StageTableInfo { + schema: table.schema.clone(), + stage_info, + path, + files: vec![], + }), + scan_fields: None, + parts: vec![], + statistics: Default::default(), + description: "".to_string(), + tbl_args: None, + push_downs: None, }; + Ok(Plan::Copy(Box::new(CopyPlanV2::IntoTable { + catalog_name: dst_catalog_name.to_string(), + database_name: dst_database_name.to_string(), + table_name: dst_table_name.to_string(), + table_id: table.get_id(), + schema: table.schema(), + from: Box::new(from), + files: stmt.files.clone(), + pattern: stmt.pattern.clone(), + validation_mode, + }))) + } + + /// Bind COPY INFO
FROM + async fn bind_copy_from_uri_into_table( + &mut self, + bind_context: &BindContext, + stmt: &CopyStmt<'a>, + src_protocol: &str, + src_name: &str, + src_path: &str, + src_credentials: &BTreeMap, + src_encryption: &BTreeMap, + dst_catalog_name: &str, + dst_database_name: &str, + dst_table_name: &str, + ) -> Result { + let validation_mode = ValidationMode::from_str(stmt.validation_mode.as_str()) + .map_err(ErrorCode::SyntaxException)?; + + let table = self + .ctx + .get_table(dst_catalog_name, dst_database_name, dst_table_name) + .await?; + + let (mut stage_info, path) = parse_uri_location_v2( + &src_protocol, + src_name, + src_path, + src_credentials, + src_encryption, + )?; + self.apply_stage_options(stmt, &mut stage_info); + + let from = ReadDataSourcePlan { + catalog: dst_catalog_name.to_string(), + source_info: SourceInfo::StageSource(StageTableInfo { + schema: table.schema.clone(), + stage_info, + path, + files: vec![], + }), + scan_fields: None, + parts: vec![], + statistics: Default::default(), + description: "".to_string(), + tbl_args: None, + push_downs: None, + }; + + Ok(Plan::Copy(Box::new(CopyPlanV2::IntoTable { + catalog_name, + database_name, + table_name, + table_id, + schema, + from: Box::new(from), + files: stmt.files.clone(), + pattern: stmt.pattern.clone(), + validation_mode, + }))) + } + + /// Bind COPY INFO FROM
+ async fn bind_copy_from_table_into_stage( + &mut self, + bind_context: &BindContext, + stmt: &CopyStmt<'a>, + src_catalog_name: &str, + src_database_name: &str, + src_table_name: &str, + dst_stage: &str, + dst_path: &str, + ) -> Result { + let subquery = + format!("SELECT * FROM {src_catalog_name}.{src_database_name}.{src_table_name}"); + let tokens = tokenize_sql(&subquery)?; + let backtrace = Backtrace::new(); + let sub_stmt = parse_sql(&tokens, &backtrace)?; + + let query = match &sub_stmt { + Statement::Query(query) => { + self.bind_statement(bind_context, &Statement::Query(query.clone())) + .await? + } + _ => { + return Err(ErrorCode::SyntaxException( + "COPY INTO FROM is invalid", + )) + } + }; + + // Validation mode. + let validation_mode = ValidationMode::from_str(stmt.validation_mode.as_str()) + .map_err(ErrorCode::SyntaxException)?; + + let (mut stage_info, path) = + parse_stage_location_v2(&self.ctx, dst_stage, dst_path).await?; + self.apply_stage_options(stmt, &mut stage_info); + + Ok(Plan::Copy(Box::new(CopyPlanV2::IntoStage { + stage: Box::new(stage_info), + path, + validation_mode, + from: Box::new(query), + }))) + } + + /// Bind COPY INFO FROM
+ async fn bind_copy_from_table_into_uri( + &mut self, + bind_context: &BindContext, + stmt: &CopyStmt<'a>, + src_catalog_name: &str, + src_database_name: &str, + src_table_name: &str, + dst_protocol: &str, + dst_name: &str, + dst_path: &str, + dst_credentials: &BTreeMap, + dst_encryption: &BTreeMap, + ) -> Result { + let subquery = + format!("SELECT * FROM {src_catalog_name}.{src_database_name}.{src_table_name}"); + let tokens = tokenize_sql(&subquery)?; + let backtrace = Backtrace::new(); + let sub_stmt = parse_sql(&tokens, &backtrace)?; + + let query = match &sub_stmt { + Statement::Query(query) => { + self.bind_statement(bind_context, &Statement::Query(query.clone())) + .await? + } + _ => { + return Err(ErrorCode::SyntaxException( + "COPY INTO FROM is invalid", + )) + } + }; + + // Validation mode. + let validation_mode = ValidationMode::from_str(stmt.validation_mode.as_str()) + .map_err(ErrorCode::SyntaxException)?; + + let (mut stage_info, path) = + parse_stage_location_v2(&self.ctx, dst_stage, dst_path).await?; + self.apply_stage_options(stmt, &mut stage_info); + + Ok(Plan::Copy(Box::new(CopyPlanV2::IntoStage { + stage: Box::new(stage_info), + path, + validation_mode, + from: Box::new(query), + }))) + } + + /// Bind COPY INFO FROM + async fn bind_copy_from_query_into_stage( + &mut self, + bind_context: &BindContext, + stmt: &CopyStmt<'a>, + src_query: &Box, + dst_stage: &str, + dst_path: &str, + ) -> Result { + let query = self + .bind_statement(bind_context, &Statement::Query(src_query.clone())) + .await?; + + // Validation mode. + let validation_mode = ValidationMode::from_str(stmt.validation_mode.as_str()) + .map_err(ErrorCode::SyntaxException)?; + + let (mut stage_info, path) = + parse_stage_location_v2(&self.ctx, dst_stage, dst_path).await?; + self.apply_stage_options(stmt, &mut stage_info); + + Ok(Plan::Copy(Box::new(CopyPlanV2::IntoStage { + stage: Box::new(stage_info), + path, + validation_mode, + from: Box::new(query), + }))) + } + + /// Bind COPY INFO FROM + async fn bind_copy_from_query_into_uri( + &mut self, + bind_context: &BindContext, + stmt: &CopyStmt<'a>, + src_query: &Box, + dst_protocol: &str, + dst_name: &str, + dst_path: &str, + dst_credentials: &BTreeMap, + dst_encryption: &BTreeMap, + ) -> Result { + let query = self + .bind_statement(bind_context, &Statement::Query(src_query.clone())) + .await?; + + // Validation mode. + let validation_mode = ValidationMode::from_str(stmt.validation_mode.as_str()) + .map_err(ErrorCode::SyntaxException)?; + + let (mut stage_info, path) = parse_uri_location_v2( + dst_protocol, + dst_name, + dst_path, + dst_credentials, + dst_encryption, + )?; + self.apply_stage_options(stmt, &mut stage_info); + + Ok(Plan::Copy(Box::new(CopyPlanV2::IntoStage { + stage: Box::new(stage_info), + path, + validation_mode, + from: Box::new(query), + }))) + } + + async fn apply_stage_options(&mut self, stmt: &CopyStmt<'a>, stage: &mut UserStageInfo) { if !stmt.file_format.is_empty() { - stage_info.file_format_options = parse_copy_file_format_options(&stmt.file_format)?; + stage.file_format_options = parse_copy_file_format_options(&stmt.file_format)?; } // Copy options. @@ -200,10 +486,8 @@ impl<'a> Binder { // size_limit. if stmt.size_limit != 0 { - stage_info.copy_options.size_limit = stmt.size_limit; + stage.copy_options.size_limit = stmt.size_limit; } } - - Ok((stage_info, path)) } } diff --git a/query/src/sql/statements/statement_common.rs b/query/src/sql/statements/statement_common.rs index 895202067143..5081d823c556 100644 --- a/query/src/sql/statements/statement_common.rs +++ b/query/src/sql/statements/statement_common.rs @@ -55,6 +55,8 @@ use crate::sessions::QueryContext; /// For internal stage, we will also add prefix `/stage//` /// /// - @internal/abc => (internal, "/stage/internal/abc") +/// +/// TODO(xuanwo): Move those logic into parser. pub async fn parse_stage_location( ctx: &Arc, location: &str, @@ -74,6 +76,31 @@ pub async fn parse_stage_location( Ok((stage, relative_path)) } +/// parse_stage_location_v2 work similar to parse_stage_location. +/// +/// Difference is input location has already been parsed by parser. +/// +/// TODO(xuanwo): Move this logic into parser +pub async fn parse_stage_location_v2( + ctx: &Arc, + name: &str, + path: &str, +) -> Result<(UserStageInfo, String)> { + debug_assert!(path.starts_with('/'), "path should starts with '/'"); + + let mgr = ctx.get_user_manager(); + let stage = mgr.get_stage(&ctx.get_tenant(), names[0]).await?; + + let prefix = stage.get_prefix(); + debug_assert!(prefix.ends_with('/'), "prefix should ends with '/'"); + + // prefix must be endswith `/`, so we should trim path here. + let relative_path = format!("{prefix}{}", path.trim_start_matches('/')); + + debug!("parsed stage: {stage:?}, path: {relative_path}"); + Ok((stage, relative_path)) +} + /// External stage(location starts without `@`): /// /// ```sql @@ -82,6 +109,8 @@ pub async fn parse_stage_location( /// encryption=(master_key = 'my_master_key') /// file_format = (type = csv field_delimiter = '|' skip_header = 1)" /// ``` +/// +/// TODO(xuanwo): Move this logic into parser pub fn parse_uri_location( location: &str, credential_options: &BTreeMap, @@ -113,13 +142,44 @@ pub fn parse_uri_location( path = "/".to_string(); } + parse_uri_location_v2( + uri.scheme_str().ok_or(Err(ErrorCode::SyntaxException( + "File location scheme must be specified", + )))?, + &bucket, + &path, + credential_options, + encryption_options, + ) +} + +/// External stage(location starts without `@`): +/// +/// ```sql +/// copy into table from 's3://mybucket/data/files' +/// credentials=(aws_key_id='my_key_id' aws_secret_key='my_secret_key') +/// encryption=(master_key = 'my_master_key') +/// file_format = (type = csv field_delimiter = '|' skip_header = 1)" +/// ``` +/// +/// This function works similar with parse_uri_location. +/// Different is input location has been parsed. +/// +/// TODO(xuanwo): Move this logic into parser +pub fn parse_uri_location_v2( + protocol: &str, + name: &str, + path: &str, + credentials: &BTreeMap, + encryption: &BTreeMap, +) -> Result<(UserStageInfo, String)> { // Path endswith `/` means it's a directory, otherwise it's a file. // If the path is a directory, we will use this path as root. // If the path is a file, we will use `/` as root (which is the default value) let (root, path) = if path.ends_with('/') { - (path.as_str(), "/") + (path, "/") } else { - ("/", path.as_str()) + ("/", path) }; // File storage plan. @@ -133,18 +193,12 @@ pub fn parse_uri_location( let cfg = StorageS3Config { bucket, root: root.to_string(), - access_key_id: credential_options - .get("aws_key_id") - .cloned() - .unwrap_or_default(), - secret_access_key: credential_options + access_key_id: credentials.get("aws_key_id").cloned().unwrap_or_default(), + secret_access_key: credentials .get("aws_secret_key") .cloned() .unwrap_or_default(), - master_key: encryption_options - .get("master_key") - .cloned() - .unwrap_or_default(), + master_key: encryption.get("master_key").cloned().unwrap_or_default(), disable_credential_loader: true, ..Default::default() }; @@ -171,6 +225,7 @@ pub fn parse_uri_location( Ok((stage, path)) } +/// TODO(xuanwo): Move those logic into parser pub fn parse_copy_file_format_options( file_format_options: &BTreeMap, ) -> Result { From 7694da661716f3f74b98646322b64cdf12df9118 Mon Sep 17 00:00:00 2001 From: Xuanwo Date: Tue, 21 Jun 2022 13:38:46 +0800 Subject: [PATCH 18/40] Make check happy Signed-off-by: Xuanwo --- query/src/sql/planner/binder/copy.rs | 83 +++++++++++--------- query/src/sql/statements/statement_common.rs | 57 ++++++-------- 2 files changed, 72 insertions(+), 68 deletions(-) diff --git a/query/src/sql/planner/binder/copy.rs b/query/src/sql/planner/binder/copy.rs index 0f1f59be51bf..c9534ad8a295 100644 --- a/query/src/sql/planner/binder/copy.rs +++ b/query/src/sql/planner/binder/copy.rs @@ -34,9 +34,7 @@ use crate::sql::plans::CopyPlanV2; use crate::sql::plans::Plan; use crate::sql::plans::ValidationMode; use crate::sql::statements::parse_copy_file_format_options; -use crate::sql::statements::parse_stage_location; use crate::sql::statements::parse_stage_location_v2; -use crate::sql::statements::parse_uri_location; use crate::sql::statements::parse_uri_location_v2; use crate::sql::BindContext; @@ -48,14 +46,14 @@ impl<'a> Binder { ) -> Result { match (&stmt.src, &stmt.dst) { ( - &CopyTarget::StageLocation { name, path }, - &CopyTarget::Table(catalog, database, table), + CopyTarget::StageLocation { name, path }, + CopyTarget::Table(catalog, database, table), ) => { let catalog_name = catalog .as_ref() .map(|v| v.to_string()) .unwrap_or_else(|| self.ctx.get_current_catalog()); - let database_name = catalog + let database_name = database .as_ref() .map(|v| v.to_string()) .unwrap_or_else(|| self.ctx.get_current_database()); @@ -73,20 +71,20 @@ impl<'a> Binder { .await } ( - &CopyTarget::UriLocation { + CopyTarget::UriLocation { protocol, name, path, credentials, encryption, }, - &CopyTarget::Table(catalog, database, table), + CopyTarget::Table(catalog, database, table), ) => { let catalog_name = catalog .as_ref() .map(|v| v.to_string()) .unwrap_or_else(|| self.ctx.get_current_catalog()); - let database_name = catalog + let database_name = database .as_ref() .map(|v| v.to_string()) .unwrap_or_else(|| self.ctx.get_current_database()); @@ -107,14 +105,14 @@ impl<'a> Binder { .await } ( - &CopyTarget::Table(catalog, database, table), - &CopyTarget::StageLocation { name, path }, + CopyTarget::Table(catalog, database, table), + CopyTarget::StageLocation { name, path }, ) => { let catalog_name = catalog .as_ref() .map(|v| v.to_string()) .unwrap_or_else(|| self.ctx.get_current_catalog()); - let database_name = catalog + let database_name = database .as_ref() .map(|v| v.to_string()) .unwrap_or_else(|| self.ctx.get_current_database()); @@ -132,8 +130,8 @@ impl<'a> Binder { .await } ( - &CopyTarget::Table(catalog, database, table), - &CopyTarget::UriLocation { + CopyTarget::Table(catalog, database, table), + CopyTarget::UriLocation { protocol, name, path, @@ -145,7 +143,7 @@ impl<'a> Binder { .as_ref() .map(|v| v.to_string()) .unwrap_or_else(|| self.ctx.get_current_catalog()); - let database_name = catalog + let database_name = database .as_ref() .map(|v| v.to_string()) .unwrap_or_else(|| self.ctx.get_current_database()); @@ -165,13 +163,13 @@ impl<'a> Binder { ) .await } - (&CopyTarget::Query(query), &CopyTarget::StageLocation { name, path }) => { + (CopyTarget::Query(query), CopyTarget::StageLocation { name, path }) => { self.bind_copy_from_query_into_stage(bind_context, stmt, &query, &name, &path) .await } ( - &CopyTarget::Query(query), - &CopyTarget::UriLocation { + CopyTarget::Query(query), + CopyTarget::UriLocation { protocol, name, path, @@ -202,7 +200,7 @@ impl<'a> Binder { /// Bind COPY INFO
FROM async fn bind_copy_from_stage_into_table( &mut self, - bind_context: &BindContext, + _: &BindContext, stmt: &CopyStmt<'a>, src_stage: &str, src_path: &str, @@ -220,12 +218,12 @@ impl<'a> Binder { let (mut stage_info, path) = parse_stage_location_v2(&self.ctx, src_stage, src_path).await?; - self.apply_stage_options(stmt, &mut stage_info); + self.apply_stage_options(stmt, &mut stage_info)?; let from = ReadDataSourcePlan { catalog: dst_catalog_name.to_string(), source_info: SourceInfo::StageSource(StageTableInfo { - schema: table.schema.clone(), + schema: table.schema().clone(), stage_info, path, files: vec![], @@ -254,7 +252,7 @@ impl<'a> Binder { /// Bind COPY INFO
FROM async fn bind_copy_from_uri_into_table( &mut self, - bind_context: &BindContext, + _: &BindContext, stmt: &CopyStmt<'a>, src_protocol: &str, src_name: &str, @@ -280,12 +278,12 @@ impl<'a> Binder { src_credentials, src_encryption, )?; - self.apply_stage_options(stmt, &mut stage_info); + self.apply_stage_options(stmt, &mut stage_info)?; let from = ReadDataSourcePlan { catalog: dst_catalog_name.to_string(), source_info: SourceInfo::StageSource(StageTableInfo { - schema: table.schema.clone(), + schema: table.schema().clone(), stage_info, path, files: vec![], @@ -299,11 +297,11 @@ impl<'a> Binder { }; Ok(Plan::Copy(Box::new(CopyPlanV2::IntoTable { - catalog_name, - database_name, - table_name, - table_id, - schema, + catalog_name: dst_catalog_name.to_string(), + database_name: dst_database_name.to_string(), + table_name: dst_table_name.to_string(), + table_id: table.get_id(), + schema: table.schema(), from: Box::new(from), files: stmt.files.clone(), pattern: stmt.pattern.clone(), @@ -346,7 +344,7 @@ impl<'a> Binder { let (mut stage_info, path) = parse_stage_location_v2(&self.ctx, dst_stage, dst_path).await?; - self.apply_stage_options(stmt, &mut stage_info); + self.apply_stage_options(stmt, &mut stage_info)?; Ok(Plan::Copy(Box::new(CopyPlanV2::IntoStage { stage: Box::new(stage_info), @@ -392,9 +390,14 @@ impl<'a> Binder { let validation_mode = ValidationMode::from_str(stmt.validation_mode.as_str()) .map_err(ErrorCode::SyntaxException)?; - let (mut stage_info, path) = - parse_stage_location_v2(&self.ctx, dst_stage, dst_path).await?; - self.apply_stage_options(stmt, &mut stage_info); + let (mut stage_info, path) = parse_uri_location_v2( + dst_protocol, + dst_name, + dst_path, + dst_credentials, + dst_encryption, + )?; + self.apply_stage_options(stmt, &mut stage_info)?; Ok(Plan::Copy(Box::new(CopyPlanV2::IntoStage { stage: Box::new(stage_info), @@ -409,7 +412,7 @@ impl<'a> Binder { &mut self, bind_context: &BindContext, stmt: &CopyStmt<'a>, - src_query: &Box, + src_query: &Box>, dst_stage: &str, dst_path: &str, ) -> Result { @@ -423,7 +426,7 @@ impl<'a> Binder { let (mut stage_info, path) = parse_stage_location_v2(&self.ctx, dst_stage, dst_path).await?; - self.apply_stage_options(stmt, &mut stage_info); + self.apply_stage_options(stmt, &mut stage_info)?; Ok(Plan::Copy(Box::new(CopyPlanV2::IntoStage { stage: Box::new(stage_info), @@ -438,7 +441,7 @@ impl<'a> Binder { &mut self, bind_context: &BindContext, stmt: &CopyStmt<'a>, - src_query: &Box, + src_query: &Box>, dst_protocol: &str, dst_name: &str, dst_path: &str, @@ -460,7 +463,7 @@ impl<'a> Binder { dst_credentials, dst_encryption, )?; - self.apply_stage_options(stmt, &mut stage_info); + self.apply_stage_options(stmt, &mut stage_info)?; Ok(Plan::Copy(Box::new(CopyPlanV2::IntoStage { stage: Box::new(stage_info), @@ -470,7 +473,11 @@ impl<'a> Binder { }))) } - async fn apply_stage_options(&mut self, stmt: &CopyStmt<'a>, stage: &mut UserStageInfo) { + fn apply_stage_options( + &mut self, + stmt: &CopyStmt<'a>, + stage: &mut UserStageInfo, + ) -> Result<()> { if !stmt.file_format.is_empty() { stage.file_format_options = parse_copy_file_format_options(&stmt.file_format)?; } @@ -489,5 +496,7 @@ impl<'a> Binder { stage.copy_options.size_limit = stmt.size_limit; } } + + Ok(()) } } diff --git a/query/src/sql/statements/statement_common.rs b/query/src/sql/statements/statement_common.rs index 5081d823c556..02be568c3c26 100644 --- a/query/src/sql/statements/statement_common.rs +++ b/query/src/sql/statements/statement_common.rs @@ -89,7 +89,7 @@ pub async fn parse_stage_location_v2( debug_assert!(path.starts_with('/'), "path should starts with '/'"); let mgr = ctx.get_user_manager(); - let stage = mgr.get_stage(&ctx.get_tenant(), names[0]).await?; + let stage = mgr.get_stage(&ctx.get_tenant(), name).await?; let prefix = stage.get_prefix(); debug_assert!(prefix.ends_with('/'), "prefix should ends with '/'"); @@ -143,9 +143,9 @@ pub fn parse_uri_location( } parse_uri_location_v2( - uri.scheme_str().ok_or(Err(ErrorCode::SyntaxException( + uri.scheme_str().ok_or(ErrorCode::SyntaxException( "File location scheme must be specified", - )))?, + ))?, &bucket, &path, credential_options, @@ -183,38 +183,33 @@ pub fn parse_uri_location_v2( }; // File storage plan. - let (stage_storage, path) = match uri.scheme_str() { - None => Err(ErrorCode::SyntaxException( - "File location scheme must be specified", - )), - Some(v) => match v { - // AWS s3 plan. - "s3" => { - let cfg = StorageS3Config { - bucket, - root: root.to_string(), - access_key_id: credentials.get("aws_key_id").cloned().unwrap_or_default(), - secret_access_key: credentials - .get("aws_secret_key") - .cloned() - .unwrap_or_default(), - master_key: encryption.get("master_key").cloned().unwrap_or_default(), - disable_credential_loader: true, - ..Default::default() - }; + let stage_storage = match protocol { + // AWS s3 plan. + "s3" => { + let cfg = StorageS3Config { + bucket: name.to_string(), + root: root.to_string(), + access_key_id: credentials.get("aws_key_id").cloned().unwrap_or_default(), + secret_access_key: credentials + .get("aws_secret_key") + .cloned() + .unwrap_or_default(), + master_key: encryption.get("master_key").cloned().unwrap_or_default(), + disable_credential_loader: true, + ..Default::default() + }; - Ok((StorageParams::S3(cfg), path.to_string())) - } + Ok(StorageParams::S3(cfg)) + } - // Others. - _ => Err(ErrorCode::SyntaxException( - "File location uri unsupported, must be one of [s3, @stage]", - )), - }, + // Others. + _ => Err(ErrorCode::SyntaxException( + "File location uri unsupported, must be one of [s3]", + )), }?; let stage = UserStageInfo { - stage_name: location.to_string(), + stage_name: format!("{protocol}://{name}{path}"), stage_type: StageType::External, stage_params: StageParams { storage: stage_storage, @@ -222,7 +217,7 @@ pub fn parse_uri_location_v2( ..Default::default() }; - Ok((stage, path)) + Ok((stage, path.to_string())) } /// TODO(xuanwo): Move those logic into parser From 957da6aaabfc6f0f4fbf215a91a0dfcde7037a45 Mon Sep 17 00:00:00 2001 From: Xuanwo Date: Tue, 21 Jun 2022 14:03:39 +0800 Subject: [PATCH 19/40] Enable stateful tests Signed-off-by: Xuanwo --- .../00_0000_copy_from_s3_location.result | 11 +++ .../00_0000_copy_from_s3_location.sh | 71 ++++++++++++++++++ .../00_copy_v2/00_0000_copy_from_stage.result | 21 ++++++ .../00_copy_v2/00_0000_copy_from_stage.sh | 75 +++++++++++++++++++ .../00_copy_v2/00_0000_copy_from_stage2.py | 50 +++++++++++++ .../00_0000_copy_from_stage2.result | 0 .../00_copy_v2/00_0000_copy_into_stage.result | 1 + .../00_copy_v2/00_0000_copy_into_stage.sh | 28 +++++++ .../01_load_v2/01_0000_streaming_load.result | 6 ++ .../01_load_v2/01_0000_streaming_load.sh | 68 +++++++++++++++++ .../01_load_v2/01_0001_upload_to_stage.result | 2 + .../01_load_v2/01_0001_upload_to_stage.sh | 20 +++++ .../01_0002_remove_external_stage.result | 21 ++++++ .../01_0002_remove_external_stage.sh | 40 ++++++++++ .../01_0002_remove_internal_stage.result | 18 +++++ .../01_0002_remove_internal_stage.sh | 38 ++++++++++ tests/suites/1_stateful/01_load_v2/books.csv | 2 + 17 files changed, 472 insertions(+) create mode 100644 tests/suites/1_stateful/00_copy_v2/00_0000_copy_from_s3_location.result create mode 100755 tests/suites/1_stateful/00_copy_v2/00_0000_copy_from_s3_location.sh create mode 100755 tests/suites/1_stateful/00_copy_v2/00_0000_copy_from_stage.result create mode 100755 tests/suites/1_stateful/00_copy_v2/00_0000_copy_from_stage.sh create mode 100755 tests/suites/1_stateful/00_copy_v2/00_0000_copy_from_stage2.py create mode 100755 tests/suites/1_stateful/00_copy_v2/00_0000_copy_from_stage2.result create mode 100755 tests/suites/1_stateful/00_copy_v2/00_0000_copy_into_stage.result create mode 100755 tests/suites/1_stateful/00_copy_v2/00_0000_copy_into_stage.sh create mode 100644 tests/suites/1_stateful/01_load_v2/01_0000_streaming_load.result create mode 100755 tests/suites/1_stateful/01_load_v2/01_0000_streaming_load.sh create mode 100755 tests/suites/1_stateful/01_load_v2/01_0001_upload_to_stage.result create mode 100755 tests/suites/1_stateful/01_load_v2/01_0001_upload_to_stage.sh create mode 100644 tests/suites/1_stateful/01_load_v2/01_0002_remove_external_stage.result create mode 100755 tests/suites/1_stateful/01_load_v2/01_0002_remove_external_stage.sh create mode 100644 tests/suites/1_stateful/01_load_v2/01_0002_remove_internal_stage.result create mode 100755 tests/suites/1_stateful/01_load_v2/01_0002_remove_internal_stage.sh create mode 100644 tests/suites/1_stateful/01_load_v2/books.csv diff --git a/tests/suites/1_stateful/00_copy_v2/00_0000_copy_from_s3_location.result b/tests/suites/1_stateful/00_copy_v2/00_0000_copy_from_s3_location.result new file mode 100644 index 000000000000..fc81bdf440b0 --- /dev/null +++ b/tests/suites/1_stateful/00_copy_v2/00_0000_copy_from_s3_location.result @@ -0,0 +1,11 @@ +Test copy from file +199 2020.0 769 +Test copy from gzip file +199 2020.0 769 +Test copy from zstd file +199 2020.0 769 +Test copy from bzip2 file +199 2020.0 769 +398 2020.0 1538 +398 2020.0 1538 +398 2020.0 1538 diff --git a/tests/suites/1_stateful/00_copy_v2/00_0000_copy_from_s3_location.sh b/tests/suites/1_stateful/00_copy_v2/00_0000_copy_from_s3_location.sh new file mode 100755 index 000000000000..a3426a6993e2 --- /dev/null +++ b/tests/suites/1_stateful/00_copy_v2/00_0000_copy_from_s3_location.sh @@ -0,0 +1,71 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +. "$CURDIR"/../../../shell_env.sh + +echo "set enable_planner_v2 = 1;" | $MYSQL_CLIENT_CONNECT +echo "drop table if exists ontime200;" | $MYSQL_CLIENT_CONNECT +## Create table +cat $CURDIR/../ontime/create_table.sql | sed 's/ontime/ontime200/g' | $MYSQL_CLIENT_CONNECT + +## Copy from s3. +echo "Test copy from file" +echo "copy into ontime200 from 's3://testbucket/admin/data/ontime_200.csv' credentials=(aws_key_id='minioadmin' aws_secret_key='minioadmin') FILE_FORMAT = (type = 'CSV' field_delimiter = ',' record_delimiter = '\n' skip_header = 1)" | $MYSQL_CLIENT_CONNECT + +## Result. +echo "select count(1), avg(Year), sum(DayOfWeek) from ontime200" | $MYSQL_CLIENT_CONNECT + +# Truncate the ontime table. +echo "truncate table ontime200" | $MYSQL_CLIENT_CONNECT + +## Copy from s3 with compression gzip. +echo "Test copy from gzip file" +echo "copy into ontime200 from 's3://testbucket/admin/data/ontime_200.csv.gz' credentials=(aws_key_id='minioadmin' aws_secret_key='minioadmin') FILE_FORMAT = (type = 'CSV' field_delimiter = ',' compression = 'gzip' record_delimiter = '\n' skip_header = 1)" | $MYSQL_CLIENT_CONNECT + +## Result. +echo "select count(1), avg(Year), sum(DayOfWeek) from ontime200" | $MYSQL_CLIENT_CONNECT + +# Truncate the ontime table. +echo "truncate table ontime200" | $MYSQL_CLIENT_CONNECT + +## Copy from s3 with compression zstd. +echo "Test copy from zstd file" +echo "copy into ontime200 from 's3://testbucket/admin/data/ontime_200.csv.zst' credentials=(aws_key_id='minioadmin' aws_secret_key='minioadmin') FILE_FORMAT = (type = 'CSV' field_delimiter = ',' compression = 'zstd' record_delimiter = '\n' skip_header = 1)" | $MYSQL_CLIENT_CONNECT + +## Result. +echo "select count(1), avg(Year), sum(DayOfWeek) from ontime200" | $MYSQL_CLIENT_CONNECT + +# Truncate the ontime table. +echo "truncate table ontime200" | $MYSQL_CLIENT_CONNECT + +## Copy from s3 with compression bz2. +echo "Test copy from bzip2 file" +echo "copy into ontime200 from 's3://testbucket/admin/data/ontime_200.csv.bz2' credentials=(aws_key_id='minioadmin' aws_secret_key='minioadmin') FILE_FORMAT = (type = 'CSV' field_delimiter = ',' compression = 'bz2' record_delimiter = '\n' skip_header = 1)" | $MYSQL_CLIENT_CONNECT + +## Result. +echo "select count(1), avg(Year), sum(DayOfWeek) from ontime200" | $MYSQL_CLIENT_CONNECT + +# Truncate the ontime table. +echo "truncate table ontime200" | $MYSQL_CLIENT_CONNECT + +## Copy from s3 with files. +echo "copy into ontime200 from 's3://testbucket/admin/data/' credentials=(aws_key_id='minioadmin' aws_secret_key='minioadmin') FILES = ('ontime_200.csv', 'ontime_200_v1.csv') FILE_FORMAT = (type = 'CSV' field_delimiter = ',' record_delimiter = '\n' skip_header = 1)" | $MYSQL_CLIENT_CONNECT +## Result. +echo "select count(1), avg(Year), sum(DayOfWeek) from ontime200" | $MYSQL_CLIENT_CONNECT +echo "truncate table ontime200" | $MYSQL_CLIENT_CONNECT + + +## Copy from s3 by directory with pattern. +echo "copy into ontime200 from 's3://testbucket/admin/data/' credentials=(aws_key_id='minioadmin' aws_secret_key='minioadmin') PATTERN = 'ontime.*csv$' FILE_FORMAT = (type = 'CSV' field_delimiter = ',' record_delimiter = '\n' skip_header = 1)" | $MYSQL_CLIENT_CONNECT +## Result. +echo "select count(1), avg(Year), sum(DayOfWeek) from ontime200" | $MYSQL_CLIENT_CONNECT +echo "truncate table ontime200" | $MYSQL_CLIENT_CONNECT + + +## Copy from parquet +echo "copy into ontime200 from 's3://testbucket/admin/data/' credentials=(aws_key_id='minioadmin' aws_secret_key='minioadmin') PATTERN = 'ontime.*parquet' FILE_FORMAT = (type = 'PARQUET')" | $MYSQL_CLIENT_CONNECT +echo "select count(1), avg(Year), sum(DayOfWeek) from ontime200" | $MYSQL_CLIENT_CONNECT +echo "truncate table ontime200" | $MYSQL_CLIENT_CONNECT +echo "drop table if exists ontime200;" | $MYSQL_CLIENT_CONNECT + + diff --git a/tests/suites/1_stateful/00_copy_v2/00_0000_copy_from_stage.result b/tests/suites/1_stateful/00_copy_v2/00_0000_copy_from_stage.result new file mode 100755 index 000000000000..cf669803ebf7 --- /dev/null +++ b/tests/suites/1_stateful/00_copy_v2/00_0000_copy_from_stage.result @@ -0,0 +1,21 @@ +ontime_200.csv +ontime_200.csv.bz2 +ontime_200.csv.gz +ontime_200.csv.zst +ontime_200.parquet +199 2020.0 769 +199 2020.0 769 +199 2020.0 769 +199 2020.0 769 +597 2020.0 2307 +ontime_200.parquet +ontime_200_v1.parquet +398 2020.0 1538 +199 2020.0 769 +199 2020.0 769 +199 2020.0 769 +597 2020.0 2307 +ontime_200.csv +ontime_200.csv +ontime_200.csv +ontime_200.csv diff --git a/tests/suites/1_stateful/00_copy_v2/00_0000_copy_from_stage.sh b/tests/suites/1_stateful/00_copy_v2/00_0000_copy_from_stage.sh new file mode 100755 index 000000000000..240a8ccc858d --- /dev/null +++ b/tests/suites/1_stateful/00_copy_v2/00_0000_copy_from_stage.sh @@ -0,0 +1,75 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +. "$CURDIR"/../../../shell_env.sh + +echo "set enable_planner_v2 = 1;" | $MYSQL_CLIENT_CONNECT +echo "drop table if exists ontime200;" | $MYSQL_CLIENT_CONNECT +echo "drop stage if exists s1" | $MYSQL_CLIENT_CONNECT +echo "drop stage if exists named_external_stage" | $MYSQL_CLIENT_CONNECT + +## Create table +cat $CURDIR/../ontime/create_table.sql | sed 's/ontime/ontime200/g' | $MYSQL_CLIENT_CONNECT + +aws --endpoint-url http://127.0.0.1:9900/ s3 cp s3://testbucket/admin/data/ontime_200.csv s3://testbucket/admin/stage/s1/ontime_200.csv >/dev/null 2>&1 +aws --endpoint-url http://127.0.0.1:9900/ s3 cp s3://testbucket/admin/data/ontime_200.csv.gz s3://testbucket/admin/stage/s1/ontime_200.csv.gz >/dev/null 2>&1 +aws --endpoint-url http://127.0.0.1:9900/ s3 cp s3://testbucket/admin/data/ontime_200.csv.zst s3://testbucket/admin/stage/s1/ontime_200.csv.zst >/dev/null 2>&1 +aws --endpoint-url http://127.0.0.1:9900/ s3 cp s3://testbucket/admin/data/ontime_200.csv.bz2 s3://testbucket/admin/stage/s1/ontime_200.csv.bz2 >/dev/null 2>&1 +aws --endpoint-url http://127.0.0.1:9900/ s3 cp s3://testbucket/admin/data/ontime_200.parquet s3://testbucket/admin/stage/s1/ontime_200.parquet >/dev/null 2>&1 + +## Copy from internal stage +echo "CREATE STAGE s1;" | $MYSQL_CLIENT_CONNECT +echo "list @s1 PATTERN = 'ontime.*'" | $MYSQL_CLIENT_CONNECT | awk '{print $1}' | sort + +copy_from_stage_cases=( + # copy parquet + "copy into ontime200 from '@s1' PATTERN = 'ontime.*parquet$' FILE_FORMAT = (type = 'PARQUET');" + # copy gzip csv + "copy into ontime200 from '@s1' FILES = ('ontime_200.csv.gz') FILE_FORMAT = (type = 'CSV' field_delimiter = ',' compression = 'gzip' record_delimiter = '\n' skip_header = 1);" + # copy zstd csv + "copy into ontime200 from '@s1' FILES = ('ontime_200.csv.zst') FILE_FORMAT = (type = 'CSV' field_delimiter = ',' compression = 'zstd' record_delimiter = '\n' skip_header = 1);" + # copy bz2 csv + "copy into ontime200 from '@s1' FILES = ('ontime_200.csv.bz2') FILE_FORMAT = (type = 'CSV' field_delimiter = ',' compression = 'bz2' record_delimiter = '\n' skip_header = 1);" + # copy auto csv + "copy into ontime200 from '@s1' FILES = ('ontime_200.csv.gz', 'ontime_200.csv.zst', 'ontime_200.csv.bz2') FILE_FORMAT = (type = 'CSV' field_delimiter = ',' compression = AUTO record_delimiter = '\n' skip_header = 1);" +) + +for i in "${copy_from_stage_cases[@]}"; do + echo "$i" | $MYSQL_CLIENT_CONNECT + echo "select count(1), avg(Year), sum(DayOfWeek) from ontime200" | $MYSQL_CLIENT_CONNECT + echo "truncate table ontime200" | $MYSQL_CLIENT_CONNECT +done + +## Copy from named external stage +echo "CREATE STAGE named_external_stage url = 's3://testbucket/admin/data/' credentials=(aws_key_id='minioadmin' aws_secret_key='minioadmin');" | $MYSQL_CLIENT_CONNECT +echo "list @named_external_stage PATTERN = 'ontime.*parquet$'" | $MYSQL_CLIENT_CONNECT | awk '{print $1}' | sort + +copy_from_named_external_stage_cases=( + # copy parquet + "copy into ontime200 from '@named_external_stage' PATTERN = 'ontime.*parquet$' FILE_FORMAT = (type = 'PARQUET')" + # copy gzip csv + "copy into ontime200 from '@named_external_stage' FILES = ('ontime_200.csv.gz') FILE_FORMAT = (type = 'CSV' field_delimiter = ',' compression = 'gzip' record_delimiter = '\n' skip_header = 1);" + # copy zstd csv + "copy into ontime200 from '@named_external_stage' FILES = ('ontime_200.csv.zst') FILE_FORMAT = (type = 'CSV' field_delimiter = ',' compression = 'zstd' record_delimiter = '\n' skip_header = 1);" + # copy bz2 csv + "copy into ontime200 from '@named_external_stage' FILES = ('ontime_200.csv.bz2') FILE_FORMAT = (type = 'CSV' field_delimiter = ',' compression = 'bz2' record_delimiter = '\n' skip_header = 1);" + # copy auto csv + "copy into ontime200 from '@named_external_stage' FILES = ('ontime_200.csv.gz','ontime_200.csv.bz2','ontime_200.csv.zst') FILE_FORMAT = (type = 'CSV' field_delimiter = ',' compression = 'auto' record_delimiter = '\n' skip_header = 1);" +) + +for i in "${copy_from_named_external_stage_cases[@]}"; do + echo "$i" | $MYSQL_CLIENT_CONNECT + echo "select count(1), avg(Year), sum(DayOfWeek) from ontime200" | $MYSQL_CLIENT_CONNECT + echo "truncate table ontime200" | $MYSQL_CLIENT_CONNECT +done + + +## List stage use http API + +curl -s -u root: -XPOST "http://localhost:${QUERY_HTTP_HANDLER_PORT}/v1/query" --header 'Content-Type: application/json' -d '{"sql": "list @s1;"}' | grep -o 'ontime_200.csv' + + +## Drop table. +echo "drop table ontime200" | $MYSQL_CLIENT_CONNECT +echo "drop stage if exists named_external_stage" | $MYSQL_CLIENT_CONNECT +echo "drop stage if exists s1" | $MYSQL_CLIENT_CONNECT diff --git a/tests/suites/1_stateful/00_copy_v2/00_0000_copy_from_stage2.py b/tests/suites/1_stateful/00_copy_v2/00_0000_copy_from_stage2.py new file mode 100755 index 000000000000..e4c8d4980d54 --- /dev/null +++ b/tests/suites/1_stateful/00_copy_v2/00_0000_copy_from_stage2.py @@ -0,0 +1,50 @@ +#!/usr/bin/env python3 + +import os +import sys +import signal + +CURDIR = os.path.dirname(os.path.realpath(__file__)) +sys.path.insert(0, os.path.join(CURDIR, '../../../helpers')) + +from native_client import NativeClient +from native_client import prompt + +log = None + +with NativeClient(name='client1>') as client1: + client1.expect(prompt) + + client1.send("set enable_planner_v2 = 1;") + client1.send("drop table if exists ontime200;") + client1.expect(prompt) + + client1.send("drop stage if exists named_external_stage;") + client1.expect(prompt) + + create_sql_f = os.path.join(CURDIR, '../ontime/create_table.sql') + read = open(create_sql_f, 'r') + create_sql = read.read().replace("ontime", "ontime200") + read.close() + + client1.send(create_sql) + client1.expect(prompt) + + client1.send( + "CREATE STAGE named_external_stage url = 's3://testbucket/admin/data/' credentials=(aws_key_id='minioadmin' aws_secret_key='minioadmin');" + ) + client1.expect(prompt) + + client1.send('SET max_block_size = 50;') + client1.expect(prompt) + + client1.send( + "copy into ontime200 from '@named_external_stage' PATTERN = 'ontime_200.csv$' FILE_FORMAT = (type = CSV field_delimiter = ',' record_delimiter = '\n' skip_header = 1);" + ) + client1.expect(prompt) + + client1.send("select count() from ontime200;") + client1.expect("199") + + client1.send("drop table ontime200;") + client1.expect(prompt) diff --git a/tests/suites/1_stateful/00_copy_v2/00_0000_copy_from_stage2.result b/tests/suites/1_stateful/00_copy_v2/00_0000_copy_from_stage2.result new file mode 100755 index 000000000000..e69de29bb2d1 diff --git a/tests/suites/1_stateful/00_copy_v2/00_0000_copy_into_stage.result b/tests/suites/1_stateful/00_copy_v2/00_0000_copy_into_stage.result new file mode 100755 index 000000000000..0cfbf08886fc --- /dev/null +++ b/tests/suites/1_stateful/00_copy_v2/00_0000_copy_into_stage.result @@ -0,0 +1 @@ +2 diff --git a/tests/suites/1_stateful/00_copy_v2/00_0000_copy_into_stage.sh b/tests/suites/1_stateful/00_copy_v2/00_0000_copy_into_stage.sh new file mode 100755 index 000000000000..9aa54d05b800 --- /dev/null +++ b/tests/suites/1_stateful/00_copy_v2/00_0000_copy_into_stage.sh @@ -0,0 +1,28 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +. "$CURDIR"/../../../shell_env.sh + +echo "set enable_planner_v2 = 1;" | $MYSQL_CLIENT_CONNECT +echo "drop table if exists test_table;" | $MYSQL_CLIENT_CONNECT +echo "drop STAGE if exists s2;" | $MYSQL_CLIENT_CONNECT +echo "CREATE STAGE s2;" | $MYSQL_CLIENT_CONNECT + +echo "CREATE TABLE test_table ( + id INTEGER, + name VARCHAR, + age INT +);" | $MYSQL_CLIENT_CONNECT + +for i in `seq 1 10`;do + echo "insert into test_table (id,name,age) values(1,'2',3), (4, '5', 6);" | $MYSQL_CLIENT_CONNECT +done + +echo "copy into @s2 from test_table FILE_FORMAT = (type = 'CSV');" | $MYSQL_CLIENT_CONNECT +echo "copy into @s2 from (select name, age, id from test_table limit 100) FILE_FORMAT = (type = 'PARQUET');" | $MYSQL_CLIENT_CONNECT +echo "list @s2;" | $MYSQL_CLIENT_CONNECT | wc -l | sed 's/ //g' +echo "drop STAGE s2;" | $MYSQL_CLIENT_CONNECT +echo "drop table test_table;" | $MYSQL_CLIENT_CONNECT + +aws --endpoint-url http://127.0.0.1:9900/ s3 rm s3://testbucket/admin/stage/s2 --recursive > /dev/null 2>&1 + diff --git a/tests/suites/1_stateful/01_load_v2/01_0000_streaming_load.result b/tests/suites/1_stateful/01_load_v2/01_0000_streaming_load.result new file mode 100644 index 000000000000..66c521198a54 --- /dev/null +++ b/tests/suites/1_stateful/01_load_v2/01_0000_streaming_load.result @@ -0,0 +1,6 @@ +199 2020.0 769 +199 2020.0 769 +199 2020.0 769 +199 2020.0 769 +199 2020.0 769 +199 2020.0 769 diff --git a/tests/suites/1_stateful/01_load_v2/01_0000_streaming_load.sh b/tests/suites/1_stateful/01_load_v2/01_0000_streaming_load.sh new file mode 100755 index 000000000000..8585996591cb --- /dev/null +++ b/tests/suites/1_stateful/01_load_v2/01_0000_streaming_load.sh @@ -0,0 +1,68 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +. "$CURDIR"/../../../shell_env.sh + +echo "set enable_planner_v2 = 1;" | $MYSQL_CLIENT_CONNECT +echo "drop table if exists ontime_streaming_load;" | $MYSQL_CLIENT_CONNECT +## create ontime table +cat $CURDIR/../ontime/create_table.sql | sed 's/ontime/ontime_streaming_load/g' | $MYSQL_CLIENT_CONNECT + +aws --endpoint-url http://127.0.0.1:9900/ s3 cp s3://testbucket/admin/data/ontime_200.csv /tmp/ontime_200.csv > /dev/null 2>&1 +aws --endpoint-url http://127.0.0.1:9900/ s3 cp s3://testbucket/admin/data/ontime_200.csv.gz /tmp/ontime_200.csv.gz > /dev/null 2>&1 +aws --endpoint-url http://127.0.0.1:9900/ s3 cp s3://testbucket/admin/data/ontime_200.csv.zst /tmp/ontime_200.csv.zst > /dev/null 2>&1 +aws --endpoint-url http://127.0.0.1:9900/ s3 cp s3://testbucket/admin/data/ontime_200.csv.bz2 /tmp/ontime_200.csv.bz2 > /dev/null 2>&1 +aws --endpoint-url http://127.0.0.1:9900/ s3 cp s3://testbucket/admin/data/ontime_200.parquet /tmp/ontime_200.parquet > /dev/null 2>&1 +aws --endpoint-url http://127.0.0.1:9900/ s3 cp s3://testbucket/admin/data/ontime_200.ndjson /tmp/ontime_200.ndjson > /dev/null 2>&1 + + +# do the Data integrity check +echo "33b1243ecd881e701a1c33cc8d621ecbf9817be006dce8722cfc6dd7ef0637f9 /tmp/ontime_200.csv" | sha256sum --check > /dev/null 2>&1 +if [ $? -ne 0 ]; then + echo "The downloaded dataset has been corrupted, please remove and fetch it again." + exit 1 +fi + +echo "e90086f5a25ef8bdb2469030a90e5ae30e967e41ed38b71f386f2a1bdc24efc8 /tmp/ontime_200.parquet" | sha256sum --check > /dev/null 2>&1 +if [ $? -ne 0 ]; then + echo "The downloaded dataset has been corrupted, please remove and fetch it again." + exit 1 +fi + +echo "8e6e663cf6fdaedf99516f8969512e40954bfa863822e8ef2d61e50c182c8d91 /tmp/ontime_200.ndjson" | sha256sum --check > /dev/null 2>&1 +if [ $? -ne 0 ]; then + echo "The downloaded dataset has been corrupted, please remove and fetch it again." + exit 1 +fi + +# load csv +curl -H "insert_sql:insert into ontime_streaming_load format Csv" -H "skip_header:1" -F "upload=@/tmp/ontime_200.csv" -u root: -XPUT "http://localhost:${QUERY_HTTP_HANDLER_PORT}/v1/streaming_load" > /dev/null 2>&1 +echo "select count(1) ,avg(Year), sum(DayOfWeek) from ontime_streaming_load;" | $MYSQL_CLIENT_CONNECT +echo "truncate table ontime_streaming_load" | $MYSQL_CLIENT_CONNECT + +# load csv gz +curl -H "insert_sql:insert into ontime_streaming_load format Csv" -H "skip_header:1" -H "compression:gzip" -F "upload=@/tmp/ontime_200.csv.gz" -u root: -XPUT "http://localhost:${QUERY_HTTP_HANDLER_PORT}/v1/streaming_load" > /dev/null 2>&1 +echo "select count(1) ,avg(Year), sum(DayOfWeek) from ontime_streaming_load;" | $MYSQL_CLIENT_CONNECT +echo "truncate table ontime_streaming_load" | $MYSQL_CLIENT_CONNECT + +# load csv zstd +curl -H "insert_sql:insert into ontime_streaming_load format Csv" -H "skip_header:1" -H "compression:zstd" -F "upload=@/tmp/ontime_200.csv.zst" -u root: -XPUT "http://localhost:${QUERY_HTTP_HANDLER_PORT}/v1/streaming_load" > /dev/null 2>&1 +echo "select count(1) ,avg(Year), sum(DayOfWeek) from ontime_streaming_load;" | $MYSQL_CLIENT_CONNECT +echo "truncate table ontime_streaming_load" | $MYSQL_CLIENT_CONNECT + +# load csv bz2 +curl -H "insert_sql:insert into ontime_streaming_load format Csv" -H "skip_header:1" -H "compression:bz2" -F "upload=@/tmp/ontime_200.csv.bz2" -u root: -XPUT "http://localhost:${QUERY_HTTP_HANDLER_PORT}/v1/streaming_load" > /dev/null 2>&1 +echo "select count(1) ,avg(Year), sum(DayOfWeek) from ontime_streaming_load;" | $MYSQL_CLIENT_CONNECT +echo "truncate table ontime_streaming_load" | $MYSQL_CLIENT_CONNECT + +# load parquet +curl -H "insert_sql:insert into ontime_streaming_load format Parquet" -H "skip_header:1" -F "upload=@/tmp/ontime_200.parquet" -u root: -XPUT "http://localhost:${QUERY_HTTP_HANDLER_PORT}/v1/streaming_load" > /dev/null 2>&1 +echo "select count(1) ,avg(Year), sum(DayOfWeek) from ontime_streaming_load;" | $MYSQL_CLIENT_CONNECT +echo "truncate table ontime_streaming_load" | $MYSQL_CLIENT_CONNECT + +# load ndjson +curl -H "insert_sql:insert into ontime_streaming_load format NdJson" -H "skip_header:1" -F "upload=@/tmp/ontime_200.ndjson" -u root: -XPUT "http://localhost:${QUERY_HTTP_HANDLER_PORT}/v1/streaming_load" > /dev/null 2>&1 +echo "select count(1) ,avg(Year), sum(DayOfWeek) from ontime_streaming_load;" | $MYSQL_CLIENT_CONNECT +echo "truncate table ontime_streaming_load" | $MYSQL_CLIENT_CONNECT + +echo "drop table ontime_streaming_load;" | $MYSQL_CLIENT_CONNECT diff --git a/tests/suites/1_stateful/01_load_v2/01_0001_upload_to_stage.result b/tests/suites/1_stateful/01_load_v2/01_0001_upload_to_stage.result new file mode 100755 index 000000000000..a4ba87fe9797 --- /dev/null +++ b/tests/suites/1_stateful/01_load_v2/01_0001_upload_to_stage.result @@ -0,0 +1,2 @@ +books.csv 91 NULL +test/books.csv 91 NULL diff --git a/tests/suites/1_stateful/01_load_v2/01_0001_upload_to_stage.sh b/tests/suites/1_stateful/01_load_v2/01_0001_upload_to_stage.sh new file mode 100755 index 000000000000..cb518258d829 --- /dev/null +++ b/tests/suites/1_stateful/01_load_v2/01_0001_upload_to_stage.sh @@ -0,0 +1,20 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +. "$CURDIR"/../../../shell_env.sh + +echo "set enable_planner_v2 = 1;" | $MYSQL_CLIENT_CONNECT +echo "drop stage if exists s2;" | $MYSQL_CLIENT_CONNECT +echo "CREATE STAGE if not exists s2;" | $MYSQL_CLIENT_CONNECT +echo "list @s2" | $MYSQL_CLIENT_CONNECT + +curl -u root: -XPUT -H "stage_name:s2" -F "upload=@${CURDIR}/books.csv" "http://localhost:${QUERY_HTTP_HANDLER_PORT}/v1/upload_to_stage" > /dev/null 2>&1 +curl -u root: -XPUT -H "stage_name:s2" -H "relative_path:test" -F "upload=@${CURDIR}/books.csv" "http://localhost:${QUERY_HTTP_HANDLER_PORT}/v1/upload_to_stage" > /dev/null 2>&1 + +echo "list @s2" | $MYSQL_CLIENT_CONNECT | awk '{print $1,$2,$3}' +echo "drop stage s2;" | $MYSQL_CLIENT_CONNECT + +# test drop stage +echo "CREATE STAGE if not exists s2;" | $MYSQL_CLIENT_CONNECT +echo "list @s2" | $MYSQL_CLIENT_CONNECT +echo "drop stage s2;" | $MYSQL_CLIENT_CONNECT diff --git a/tests/suites/1_stateful/01_load_v2/01_0002_remove_external_stage.result b/tests/suites/1_stateful/01_load_v2/01_0002_remove_external_stage.result new file mode 100644 index 000000000000..0ada267da153 --- /dev/null +++ b/tests/suites/1_stateful/01_load_v2/01_0002_remove_external_stage.result @@ -0,0 +1,21 @@ +=== List files in external stage === +dir/ontime_200.csv +dir/ontime_200.csv.gz +dir/ontime_200.csv.zst +ontime_200.csv +ontime_200.csv.gz +ontime_200.csv.zst +=== Test remove external stage file === +dir/ontime_200.csv +dir/ontime_200.csv.gz +dir/ontime_200.csv.zst +ontime_200.csv +ontime_200.csv.zst +dir/ontime_200.csv +dir/ontime_200.csv.zst +=== Test remove external stage file with pattern === +dir/ontime_200.csv +ontime_200.csv +ontime_200.csv.zst +dir/ontime_200.csv +ontime_200.csv diff --git a/tests/suites/1_stateful/01_load_v2/01_0002_remove_external_stage.sh b/tests/suites/1_stateful/01_load_v2/01_0002_remove_external_stage.sh new file mode 100755 index 000000000000..f949ca737de1 --- /dev/null +++ b/tests/suites/1_stateful/01_load_v2/01_0002_remove_external_stage.sh @@ -0,0 +1,40 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +. "$CURDIR"/../../../shell_env.sh + +echo "set enable_planner_v2 = 1;" | $MYSQL_CLIENT_CONNECT +echo "drop stage if exists named_external_stage" | $MYSQL_CLIENT_CONNECT + +## tempdate/ +aws --endpoint-url http://127.0.0.1:9900/ s3 cp s3://testbucket/admin/data/ontime_200.csv s3://testbucket/admin/tempdata/ontime_200.csv >/dev/null 2>&1 +aws --endpoint-url http://127.0.0.1:9900/ s3 cp s3://testbucket/admin/data/ontime_200.csv.gz s3://testbucket/admin/tempdata/ontime_200.csv.gz >/dev/null 2>&1 +aws --endpoint-url http://127.0.0.1:9900/ s3 cp s3://testbucket/admin/data/ontime_200.csv.zst s3://testbucket/admin/tempdata/ontime_200.csv.zst >/dev/null 2>&1 + +## tempdate/dir/ +aws --endpoint-url http://127.0.0.1:9900/ s3 cp s3://testbucket/admin/data/ontime_200.csv s3://testbucket/admin/tempdata/dir/ontime_200.csv >/dev/null 2>&1 +aws --endpoint-url http://127.0.0.1:9900/ s3 cp s3://testbucket/admin/data/ontime_200.csv.gz s3://testbucket/admin/tempdata/dir/ontime_200.csv.gz >/dev/null 2>&1 +aws --endpoint-url http://127.0.0.1:9900/ s3 cp s3://testbucket/admin/data/ontime_200.csv.zst s3://testbucket/admin/tempdata/dir/ontime_200.csv.zst >/dev/null 2>&1 + +## Copy from named external stage +echo "CREATE STAGE named_external_stage url = 's3://testbucket/admin/tempdata/' credentials=(aws_key_id='minioadmin' aws_secret_key='minioadmin');" | $MYSQL_CLIENT_CONNECT + +## List files in internal stage +echo "=== List files in external stage ===" +echo "list @named_external_stage" | $MYSQL_CLIENT_CONNECT | awk '{print $1}' | sort + +## Remove external stage file +echo "=== Test remove external stage file ===" +echo "remove @named_external_stage/ontime_200.csv.gz" | $MYSQL_CLIENT_CONNECT +echo "list @named_external_stage" | $MYSQL_CLIENT_CONNECT | awk '{print $1}' | sort +echo "remove @named_external_stage/dir/ontime_200.csv.gz" | $MYSQL_CLIENT_CONNECT +echo "list @named_external_stage/dir/" | $MYSQL_CLIENT_CONNECT | awk '{print $1}' | sort + +## Remove external stage file with pattern +echo "=== Test remove external stage file with pattern ===" +echo "remove @named_external_stage/dir/ PATTERN = '.*zst'" | $MYSQL_CLIENT_CONNECT +echo "list @named_external_stage" | $MYSQL_CLIENT_CONNECT | awk '{print $1}' | sort +echo "remove @named_external_stage PATTERN = '.*zst'" | $MYSQL_CLIENT_CONNECT +echo "list @named_external_stage" | $MYSQL_CLIENT_CONNECT | awk '{print $1}' | sort + +echo "drop stage named_external_stage" | $MYSQL_CLIENT_CONNECT diff --git a/tests/suites/1_stateful/01_load_v2/01_0002_remove_internal_stage.result b/tests/suites/1_stateful/01_load_v2/01_0002_remove_internal_stage.result new file mode 100644 index 000000000000..b22c26728a3e --- /dev/null +++ b/tests/suites/1_stateful/01_load_v2/01_0002_remove_internal_stage.result @@ -0,0 +1,18 @@ +=== List files in internal stage === +dir/ontime_200.csv +dir/ontime_200.csv.gz +dir/ontime_200.csv.zst +ontime_200.csv +ontime_200.csv.gz +ontime_200.csv.zst +=== Test remove internal stage file === +dir/ontime_200.csv +dir/ontime_200.csv.zst +dir/ontime_200.csv +dir/ontime_200.csv.zst +ontime_200.csv +ontime_200.csv.zst +=== Test remove internal stage file with pattern === +dir/ontime_200.csv +ontime_200.csv +ontime_200.csv.zst diff --git a/tests/suites/1_stateful/01_load_v2/01_0002_remove_internal_stage.sh b/tests/suites/1_stateful/01_load_v2/01_0002_remove_internal_stage.sh new file mode 100755 index 000000000000..bbbdd63d51b1 --- /dev/null +++ b/tests/suites/1_stateful/01_load_v2/01_0002_remove_internal_stage.sh @@ -0,0 +1,38 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +. "$CURDIR"/../../../shell_env.sh + +echo "set enable_planner_v2 = 1;" | $MYSQL_CLIENT_CONNECT +echo "drop stage if exists s1" | $MYSQL_CLIENT_CONNECT + +## Copy from internal stage +echo "CREATE STAGE s1;" | $MYSQL_CLIENT_CONNECT + +aws --endpoint-url http://127.0.0.1:9900/ s3 cp s3://testbucket/admin/data/ontime_200.csv s3://testbucket/admin/stage/s1/ontime_200.csv >/dev/null 2>&1 +aws --endpoint-url http://127.0.0.1:9900/ s3 cp s3://testbucket/admin/data/ontime_200.csv.gz s3://testbucket/admin/stage/s1/ontime_200.csv.gz >/dev/null 2>&1 +aws --endpoint-url http://127.0.0.1:9900/ s3 cp s3://testbucket/admin/data/ontime_200.csv.zst s3://testbucket/admin/stage/s1/ontime_200.csv.zst >/dev/null 2>&1 + +aws --endpoint-url http://127.0.0.1:9900/ s3 cp s3://testbucket/admin/data/ontime_200.csv s3://testbucket/admin/stage/s1/dir/ontime_200.csv >/dev/null 2>&1 +aws --endpoint-url http://127.0.0.1:9900/ s3 cp s3://testbucket/admin/data/ontime_200.csv.gz s3://testbucket/admin/stage/s1/dir/ontime_200.csv.gz >/dev/null 2>&1 +aws --endpoint-url http://127.0.0.1:9900/ s3 cp s3://testbucket/admin/data/ontime_200.csv.zst s3://testbucket/admin/stage/s1/dir/ontime_200.csv.zst >/dev/null 2>&1 + +## List files in internal stage +echo "=== List files in internal stage ===" +echo "list @s1" | $MYSQL_CLIENT_CONNECT | awk '{print $1}' | sort + +## Remove internal stage file +echo "=== Test remove internal stage file ===" +echo "remove @s1/ontime_200.csv.gz" | $MYSQL_CLIENT_CONNECT +echo "remove @s1/dir/ontime_200.csv.gz" | $MYSQL_CLIENT_CONNECT +echo "list @s1/dir/" | $MYSQL_CLIENT_CONNECT | awk '{print $1}'| sort +echo "list @s1" | $MYSQL_CLIENT_CONNECT | awk '{print $1}' | sort + +## Remove internal stage file with pattern +echo "=== Test remove internal stage file with pattern ===" +echo "remove @s1/dir/ PATTERN = '.*zst'" | $MYSQL_CLIENT_CONNECT +echo "list @s1" | $MYSQL_CLIENT_CONNECT | awk '{print $1}' | sort +echo "remove @s1 PATTERN = 'ontime.*'" | $MYSQL_CLIENT_CONNECT +echo "list @s1" | $MYSQL_CLIENT_CONNECT | awk '{print $1}' | sort + +echo "drop stage s1" | $MYSQL_CLIENT_CONNECT diff --git a/tests/suites/1_stateful/01_load_v2/books.csv b/tests/suites/1_stateful/01_load_v2/books.csv new file mode 100644 index 000000000000..a282467f8969 --- /dev/null +++ b/tests/suites/1_stateful/01_load_v2/books.csv @@ -0,0 +1,2 @@ +Transaction Processing,Jim Gray,1992 +Readings in Database Systems,Michael Stonebraker,2004 From 0f15f8b5c6d55a3e1365c06b891a4f7281f01f38 Mon Sep 17 00:00:00 2001 From: Xuanwo Date: Tue, 21 Jun 2022 14:03:51 +0800 Subject: [PATCH 20/40] Make clippy happy Signed-off-by: Xuanwo --- query/src/sql/planner/binder/copy.rs | 48 ++++++++++++++-------------- 1 file changed, 24 insertions(+), 24 deletions(-) diff --git a/query/src/sql/planner/binder/copy.rs b/query/src/sql/planner/binder/copy.rs index c9534ad8a295..00a539bbdb43 100644 --- a/query/src/sql/planner/binder/copy.rs +++ b/query/src/sql/planner/binder/copy.rs @@ -62,8 +62,8 @@ impl<'a> Binder { self.bind_copy_from_stage_into_table( bind_context, stmt, - &name, - &path, + name, + path, &catalog_name, &database_name, &table, @@ -93,11 +93,11 @@ impl<'a> Binder { self.bind_copy_from_uri_into_table( bind_context, stmt, - &protocol, - &name, - &path, - &credentials, - &encryption, + protocol, + name, + path, + credentials, + encryption, &catalog_name, &database_name, &table, @@ -124,8 +124,8 @@ impl<'a> Binder { &catalog_name, &database_name, &table, - &name, - &path, + name, + path, ) .await } @@ -155,16 +155,16 @@ impl<'a> Binder { &catalog_name, &database_name, &table, - &protocol, - &name, - &path, - &credentials, - &encryption, + protocol, + name, + path, + credentials, + encryption, ) .await } (CopyTarget::Query(query), CopyTarget::StageLocation { name, path }) => { - self.bind_copy_from_query_into_stage(bind_context, stmt, &query, &name, &path) + self.bind_copy_from_query_into_stage(bind_context, stmt, query, name, path) .await } ( @@ -180,12 +180,12 @@ impl<'a> Binder { self.bind_copy_from_query_into_uri( bind_context, stmt, - &query, - &protocol, - &name, - &path, - &credentials, - &encryption, + query, + protocol, + name, + path, + credentials, + encryption, ) .await } @@ -223,7 +223,7 @@ impl<'a> Binder { let from = ReadDataSourcePlan { catalog: dst_catalog_name.to_string(), source_info: SourceInfo::StageSource(StageTableInfo { - schema: table.schema().clone(), + schema: table.schema(), stage_info, path, files: vec![], @@ -272,7 +272,7 @@ impl<'a> Binder { .await?; let (mut stage_info, path) = parse_uri_location_v2( - &src_protocol, + src_protocol, src_name, src_path, src_credentials, @@ -283,7 +283,7 @@ impl<'a> Binder { let from = ReadDataSourcePlan { catalog: dst_catalog_name.to_string(), source_info: SourceInfo::StageSource(StageTableInfo { - schema: table.schema().clone(), + schema: table.schema(), stage_info, path, files: vec![], From f0057d8a5ac52dbb50dd573d3c41aa0e9e55418c Mon Sep 17 00:00:00 2001 From: Xuanwo Date: Tue, 21 Jun 2022 14:30:31 +0800 Subject: [PATCH 21/40] Make clippy happy Signed-off-by: Xuanwo --- query/src/sql/planner/binder/copy.rs | 13 +++++++++---- query/src/sql/statements/statement_common.rs | 5 ++--- 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/query/src/sql/planner/binder/copy.rs b/query/src/sql/planner/binder/copy.rs index 00a539bbdb43..1735790b10fe 100644 --- a/query/src/sql/planner/binder/copy.rs +++ b/query/src/sql/planner/binder/copy.rs @@ -198,6 +198,7 @@ impl<'a> Binder { } /// Bind COPY INFO
FROM + #[allow(clippy::too_many_arguments)] async fn bind_copy_from_stage_into_table( &mut self, _: &BindContext, @@ -250,6 +251,7 @@ impl<'a> Binder { } /// Bind COPY INFO
FROM + #[allow(clippy::too_many_arguments)] async fn bind_copy_from_uri_into_table( &mut self, _: &BindContext, @@ -310,6 +312,7 @@ impl<'a> Binder { } /// Bind COPY INFO FROM
+ #[allow(clippy::too_many_arguments)] async fn bind_copy_from_table_into_stage( &mut self, bind_context: &BindContext, @@ -355,6 +358,7 @@ impl<'a> Binder { } /// Bind COPY INFO FROM
+ #[allow(clippy::too_many_arguments)] async fn bind_copy_from_table_into_uri( &mut self, bind_context: &BindContext, @@ -412,12 +416,12 @@ impl<'a> Binder { &mut self, bind_context: &BindContext, stmt: &CopyStmt<'a>, - src_query: &Box>, + src_query: &Query<'_>, dst_stage: &str, dst_path: &str, ) -> Result { let query = self - .bind_statement(bind_context, &Statement::Query(src_query.clone())) + .bind_statement(bind_context, &Statement::Query(Box::new(src_query.clone()))) .await?; // Validation mode. @@ -437,11 +441,12 @@ impl<'a> Binder { } /// Bind COPY INFO FROM + #[allow(clippy::too_many_arguments)] async fn bind_copy_from_query_into_uri( &mut self, bind_context: &BindContext, stmt: &CopyStmt<'a>, - src_query: &Box>, + src_query: &Query<'_>, dst_protocol: &str, dst_name: &str, dst_path: &str, @@ -449,7 +454,7 @@ impl<'a> Binder { dst_encryption: &BTreeMap, ) -> Result { let query = self - .bind_statement(bind_context, &Statement::Query(src_query.clone())) + .bind_statement(bind_context, &Statement::Query(Box::new(src_query.clone()))) .await?; // Validation mode. diff --git a/query/src/sql/statements/statement_common.rs b/query/src/sql/statements/statement_common.rs index 02be568c3c26..5b4429e781a0 100644 --- a/query/src/sql/statements/statement_common.rs +++ b/query/src/sql/statements/statement_common.rs @@ -143,9 +143,8 @@ pub fn parse_uri_location( } parse_uri_location_v2( - uri.scheme_str().ok_or(ErrorCode::SyntaxException( - "File location scheme must be specified", - ))?, + uri.scheme_str() + .ok_or_else(|| ErrorCode::SyntaxException("File location scheme must be specified"))?, &bucket, &path, credential_options, From e8c2151babd54c6bfad6af2cba6280c9a6138827 Mon Sep 17 00:00:00 2001 From: Xuanwo Date: Tue, 21 Jun 2022 14:55:11 +0800 Subject: [PATCH 22/40] Fix unit tests Signed-off-by: Xuanwo --- query/src/sql/statements/statement_common.rs | 2 +- query/tests/it/sql/statements/statement_copy.rs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/query/src/sql/statements/statement_common.rs b/query/src/sql/statements/statement_common.rs index 5b4429e781a0..eb5a213df526 100644 --- a/query/src/sql/statements/statement_common.rs +++ b/query/src/sql/statements/statement_common.rs @@ -208,7 +208,7 @@ pub fn parse_uri_location_v2( }?; let stage = UserStageInfo { - stage_name: format!("{protocol}://{name}{path}"), + stage_name: format!("{protocol}://{name}{}{path}", root.trim_end_matches('/')), stage_type: StageType::External, stage_params: StageParams { storage: stage_storage, diff --git a/query/tests/it/sql/statements/statement_copy.rs b/query/tests/it/sql/statements/statement_copy.rs index 71035c1c697f..c34428c72846 100644 --- a/query/tests/it/sql/statements/statement_copy.rs +++ b/query/tests/it/sql/statements/statement_copy.rs @@ -143,7 +143,7 @@ async fn test_statement_copy() -> Result<()> { encryption=(master_key = 'my_master_key') file_format = (type = csv field_delimiter = '|' skip_header = 1)", expect: "", - err: "Code: 1005, displayText = File location uri unsupported, must be one of [s3, @stage].", + err: "Code: 1005, displayText = File location uri unsupported, must be one of [s3].", }, TestCase { name: "copy-internal-ok", From 058136b33f2e8736192a3d8045cbf9f46e909cbc Mon Sep 17 00:00:00 2001 From: Xuanwo Date: Tue, 21 Jun 2022 15:14:11 +0800 Subject: [PATCH 23/40] Enable planner Signed-off-by: Xuanwo --- .../00_copy_v2/00_0000_copy_from_s3_location.sh | 13 ++++++------- .../00_copy_v2/00_0000_copy_from_stage.sh | 4 ++-- .../00_copy_v2/00_0000_copy_into_stage.sh | 4 ++-- 3 files changed, 10 insertions(+), 11 deletions(-) diff --git a/tests/suites/1_stateful/00_copy_v2/00_0000_copy_from_s3_location.sh b/tests/suites/1_stateful/00_copy_v2/00_0000_copy_from_s3_location.sh index a3426a6993e2..41f1ba76651a 100755 --- a/tests/suites/1_stateful/00_copy_v2/00_0000_copy_from_s3_location.sh +++ b/tests/suites/1_stateful/00_copy_v2/00_0000_copy_from_s3_location.sh @@ -3,7 +3,6 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) . "$CURDIR"/../../../shell_env.sh -echo "set enable_planner_v2 = 1;" | $MYSQL_CLIENT_CONNECT echo "drop table if exists ontime200;" | $MYSQL_CLIENT_CONNECT ## Create table cat $CURDIR/../ontime/create_table.sql | sed 's/ontime/ontime200/g' | $MYSQL_CLIENT_CONNECT @@ -20,7 +19,7 @@ echo "truncate table ontime200" | $MYSQL_CLIENT_CONNECT ## Copy from s3 with compression gzip. echo "Test copy from gzip file" -echo "copy into ontime200 from 's3://testbucket/admin/data/ontime_200.csv.gz' credentials=(aws_key_id='minioadmin' aws_secret_key='minioadmin') FILE_FORMAT = (type = 'CSV' field_delimiter = ',' compression = 'gzip' record_delimiter = '\n' skip_header = 1)" | $MYSQL_CLIENT_CONNECT +echo "set enable_planner_v2 = 1; copy into ontime200 from 's3://testbucket/admin/data/ontime_200.csv.gz' credentials=(aws_key_id='minioadmin' aws_secret_key='minioadmin') FILE_FORMAT = (type = 'CSV' field_delimiter = ',' compression = 'gzip' record_delimiter = '\n' skip_header = 1)" | $MYSQL_CLIENT_CONNECT ## Result. echo "select count(1), avg(Year), sum(DayOfWeek) from ontime200" | $MYSQL_CLIENT_CONNECT @@ -30,7 +29,7 @@ echo "truncate table ontime200" | $MYSQL_CLIENT_CONNECT ## Copy from s3 with compression zstd. echo "Test copy from zstd file" -echo "copy into ontime200 from 's3://testbucket/admin/data/ontime_200.csv.zst' credentials=(aws_key_id='minioadmin' aws_secret_key='minioadmin') FILE_FORMAT = (type = 'CSV' field_delimiter = ',' compression = 'zstd' record_delimiter = '\n' skip_header = 1)" | $MYSQL_CLIENT_CONNECT +echo "set enable_planner_v2 = 1; copy into ontime200 from 's3://testbucket/admin/data/ontime_200.csv.zst' credentials=(aws_key_id='minioadmin' aws_secret_key='minioadmin') FILE_FORMAT = (type = 'CSV' field_delimiter = ',' compression = 'zstd' record_delimiter = '\n' skip_header = 1)" | $MYSQL_CLIENT_CONNECT ## Result. echo "select count(1), avg(Year), sum(DayOfWeek) from ontime200" | $MYSQL_CLIENT_CONNECT @@ -40,7 +39,7 @@ echo "truncate table ontime200" | $MYSQL_CLIENT_CONNECT ## Copy from s3 with compression bz2. echo "Test copy from bzip2 file" -echo "copy into ontime200 from 's3://testbucket/admin/data/ontime_200.csv.bz2' credentials=(aws_key_id='minioadmin' aws_secret_key='minioadmin') FILE_FORMAT = (type = 'CSV' field_delimiter = ',' compression = 'bz2' record_delimiter = '\n' skip_header = 1)" | $MYSQL_CLIENT_CONNECT +echo "set enable_planner_v2 = 1; copy into ontime200 from 's3://testbucket/admin/data/ontime_200.csv.bz2' credentials=(aws_key_id='minioadmin' aws_secret_key='minioadmin') FILE_FORMAT = (type = 'CSV' field_delimiter = ',' compression = 'bz2' record_delimiter = '\n' skip_header = 1)" | $MYSQL_CLIENT_CONNECT ## Result. echo "select count(1), avg(Year), sum(DayOfWeek) from ontime200" | $MYSQL_CLIENT_CONNECT @@ -49,21 +48,21 @@ echo "select count(1), avg(Year), sum(DayOfWeek) from ontime200" | $MYSQL_CLIEN echo "truncate table ontime200" | $MYSQL_CLIENT_CONNECT ## Copy from s3 with files. -echo "copy into ontime200 from 's3://testbucket/admin/data/' credentials=(aws_key_id='minioadmin' aws_secret_key='minioadmin') FILES = ('ontime_200.csv', 'ontime_200_v1.csv') FILE_FORMAT = (type = 'CSV' field_delimiter = ',' record_delimiter = '\n' skip_header = 1)" | $MYSQL_CLIENT_CONNECT +echo "set enable_planner_v2 = 1; copy into ontime200 from 's3://testbucket/admin/data/' credentials=(aws_key_id='minioadmin' aws_secret_key='minioadmin') FILES = ('ontime_200.csv', 'ontime_200_v1.csv') FILE_FORMAT = (type = 'CSV' field_delimiter = ',' record_delimiter = '\n' skip_header = 1)" | $MYSQL_CLIENT_CONNECT ## Result. echo "select count(1), avg(Year), sum(DayOfWeek) from ontime200" | $MYSQL_CLIENT_CONNECT echo "truncate table ontime200" | $MYSQL_CLIENT_CONNECT ## Copy from s3 by directory with pattern. -echo "copy into ontime200 from 's3://testbucket/admin/data/' credentials=(aws_key_id='minioadmin' aws_secret_key='minioadmin') PATTERN = 'ontime.*csv$' FILE_FORMAT = (type = 'CSV' field_delimiter = ',' record_delimiter = '\n' skip_header = 1)" | $MYSQL_CLIENT_CONNECT +echo "set enable_planner_v2 = 1; copy into ontime200 from 's3://testbucket/admin/data/' credentials=(aws_key_id='minioadmin' aws_secret_key='minioadmin') PATTERN = 'ontime.*csv$' FILE_FORMAT = (type = 'CSV' field_delimiter = ',' record_delimiter = '\n' skip_header = 1)" | $MYSQL_CLIENT_CONNECT ## Result. echo "select count(1), avg(Year), sum(DayOfWeek) from ontime200" | $MYSQL_CLIENT_CONNECT echo "truncate table ontime200" | $MYSQL_CLIENT_CONNECT ## Copy from parquet -echo "copy into ontime200 from 's3://testbucket/admin/data/' credentials=(aws_key_id='minioadmin' aws_secret_key='minioadmin') PATTERN = 'ontime.*parquet' FILE_FORMAT = (type = 'PARQUET')" | $MYSQL_CLIENT_CONNECT +echo "set enable_planner_v2 = 1; copy into ontime200 from 's3://testbucket/admin/data/' credentials=(aws_key_id='minioadmin' aws_secret_key='minioadmin') PATTERN = 'ontime.*parquet' FILE_FORMAT = (type = 'PARQUET')" | $MYSQL_CLIENT_CONNECT echo "select count(1), avg(Year), sum(DayOfWeek) from ontime200" | $MYSQL_CLIENT_CONNECT echo "truncate table ontime200" | $MYSQL_CLIENT_CONNECT echo "drop table if exists ontime200;" | $MYSQL_CLIENT_CONNECT diff --git a/tests/suites/1_stateful/00_copy_v2/00_0000_copy_from_stage.sh b/tests/suites/1_stateful/00_copy_v2/00_0000_copy_from_stage.sh index 240a8ccc858d..41d994eba161 100755 --- a/tests/suites/1_stateful/00_copy_v2/00_0000_copy_from_stage.sh +++ b/tests/suites/1_stateful/00_copy_v2/00_0000_copy_from_stage.sh @@ -35,7 +35,7 @@ copy_from_stage_cases=( ) for i in "${copy_from_stage_cases[@]}"; do - echo "$i" | $MYSQL_CLIENT_CONNECT + echo "set enable_planner_v2 = 1; $i" | $MYSQL_CLIENT_CONNECT echo "select count(1), avg(Year), sum(DayOfWeek) from ontime200" | $MYSQL_CLIENT_CONNECT echo "truncate table ontime200" | $MYSQL_CLIENT_CONNECT done @@ -58,7 +58,7 @@ copy_from_named_external_stage_cases=( ) for i in "${copy_from_named_external_stage_cases[@]}"; do - echo "$i" | $MYSQL_CLIENT_CONNECT + echo "set enable_planner_v2 = 1; $i" | $MYSQL_CLIENT_CONNECT echo "select count(1), avg(Year), sum(DayOfWeek) from ontime200" | $MYSQL_CLIENT_CONNECT echo "truncate table ontime200" | $MYSQL_CLIENT_CONNECT done diff --git a/tests/suites/1_stateful/00_copy_v2/00_0000_copy_into_stage.sh b/tests/suites/1_stateful/00_copy_v2/00_0000_copy_into_stage.sh index 9aa54d05b800..80d46705c4f5 100755 --- a/tests/suites/1_stateful/00_copy_v2/00_0000_copy_into_stage.sh +++ b/tests/suites/1_stateful/00_copy_v2/00_0000_copy_into_stage.sh @@ -18,8 +18,8 @@ for i in `seq 1 10`;do echo "insert into test_table (id,name,age) values(1,'2',3), (4, '5', 6);" | $MYSQL_CLIENT_CONNECT done -echo "copy into @s2 from test_table FILE_FORMAT = (type = 'CSV');" | $MYSQL_CLIENT_CONNECT -echo "copy into @s2 from (select name, age, id from test_table limit 100) FILE_FORMAT = (type = 'PARQUET');" | $MYSQL_CLIENT_CONNECT +echo "set enable_planner_v2 = 1; copy into @s2 from test_table FILE_FORMAT = (type = 'CSV');" | $MYSQL_CLIENT_CONNECT +echo "set enable_planner_v2 = 1; copy into @s2 from (select name, age, id from test_table limit 100) FILE_FORMAT = (type = 'PARQUET');" | $MYSQL_CLIENT_CONNECT echo "list @s2;" | $MYSQL_CLIENT_CONNECT | wc -l | sed 's/ //g' echo "drop STAGE s2;" | $MYSQL_CLIENT_CONNECT echo "drop table test_table;" | $MYSQL_CLIENT_CONNECT From c1cbaf467dc36e3b61b9dc40e743a3ffbcae580e Mon Sep 17 00:00:00 2001 From: Xuanwo Date: Tue, 21 Jun 2022 15:42:49 +0800 Subject: [PATCH 24/40] Fix copy plan not display Signed-off-by: Xuanwo --- query/src/sql/planner/format/display_plan.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/query/src/sql/planner/format/display_plan.rs b/query/src/sql/planner/format/display_plan.rs index 3d5ec58f9a2a..b41225cc20f3 100644 --- a/query/src/sql/planner/format/display_plan.rs +++ b/query/src/sql/planner/format/display_plan.rs @@ -27,7 +27,7 @@ impl Plan { Ok(format!("{:?}:\n{}", kind, result)) } - Plan::Copy(_) => todo!(), + Plan::Copy(plan) => Ok(format!("{:?}", plan)), // Databases Plan::ShowDatabases(show_databases) => Ok(format!("{:?}", show_databases)), From ec4214d8ccb9be137ecb0bca3fa1e55246b1e4b2 Mon Sep 17 00:00:00 2001 From: Xuanwo Date: Tue, 21 Jun 2022 15:50:19 +0800 Subject: [PATCH 25/40] Fix unit test Signed-off-by: Xuanwo --- query/src/sql/statements/statement_common.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/query/src/sql/statements/statement_common.rs b/query/src/sql/statements/statement_common.rs index eb5a213df526..76457f4e86f4 100644 --- a/query/src/sql/statements/statement_common.rs +++ b/query/src/sql/statements/statement_common.rs @@ -208,7 +208,7 @@ pub fn parse_uri_location_v2( }?; let stage = UserStageInfo { - stage_name: format!("{protocol}://{name}{}{path}", root.trim_end_matches('/')), + stage_name: format!("{protocol}://{name}{root}{}", path.trim_start_matches('/')), stage_type: StageType::External, stage_params: StageParams { storage: stage_storage, From 45b43b04b92bc1f19cc5990d950a53599bd3c3ff Mon Sep 17 00:00:00 2001 From: Xuanwo Date: Tue, 21 Jun 2022 15:55:28 +0800 Subject: [PATCH 26/40] Fix unit test Signed-off-by: Xuanwo --- common/ast/src/parser/statement.rs | 4 +++- common/ast/tests/it/testdata/statement.txt | 12 ++++++------ query/tests/it/sql/statements/statement_common.rs | 2 +- 3 files changed, 10 insertions(+), 8 deletions(-) diff --git a/common/ast/src/parser/statement.rs b/common/ast/src/parser/statement.rs index 2bbc456bb715..0e417bdc71fa 100644 --- a/common/ast/src/parser/statement.rs +++ b/common/ast/src/parser/statement.rs @@ -1210,6 +1210,8 @@ pub fn options(i: Input) -> IResult> { rule! { "(" ~ ( #ident_with_format ~ "=" ~ (#ident_to_string | #u64_to_string | #literal_string) )* ~ ")" }, - |(_, opts, _)| BTreeMap::from_iter(opts.iter().map(|(k, _, v)| (k.clone(), v.clone()))), + |(_, opts, _)| { + BTreeMap::from_iter(opts.iter().map(|(k, _, v)| (k.to_lowercase(), v.clone()))) + }, )(i) } diff --git a/common/ast/tests/it/testdata/statement.txt b/common/ast/tests/it/testdata/statement.txt index f41bd298f25f..6cd910be1ec4 100644 --- a/common/ast/tests/it/testdata/statement.txt +++ b/common/ast/tests/it/testdata/statement.txt @@ -3044,7 +3044,7 @@ Query( ---------- Input ---------- CREATE STAGE IF NOT EXISTS test_stage url='s3://load/files/' credentials=(aws_key_id='1a2b3c' aws_secret_key='4x5y6z') file_format=(FORMAT = CSV compression = GZIP record_delimiter=',') ---------- Output --------- -CREATE STAGE IF NOT EXISTS test_stage URL = 's3://load/files/' CREDENTIALS = ( aws_key_id = '1a2b3c' aws_secret_key = '4x5y6z' ) FILE_FORMAT = ( FORMAT = 'CSV' compression = 'GZIP' record_delimiter = ',' ) +CREATE STAGE IF NOT EXISTS test_stage URL = 's3://load/files/' CREDENTIALS = ( aws_key_id = '1a2b3c' aws_secret_key = '4x5y6z' ) FILE_FORMAT = ( compression = 'GZIP' format = 'CSV' record_delimiter = ',' ) ---------- AST ------------ CreateStage( CreateStageStmt { @@ -3057,8 +3057,8 @@ CreateStage( }, encryption_options: {}, file_format_options: { - "FORMAT": "CSV", "compression": "GZIP", + "format": "CSV", "record_delimiter": ",", }, on_error: "", @@ -4095,7 +4095,7 @@ COPY INTO mytable ) size_limit=10; ---------- Output --------- -COPY INTO mytable FROM 's3://mybucket/data.csv' CREDENTIALS = ( AWS_KEY_ID='access_key' AWS_SECRET_KEY='secret_key' ) ENCRYPTION = ( MASTER_KEY='master_key' ) FILE_FORMAT = ( field_delimiter = ',' record_delimiter = ' +COPY INTO mytable FROM 's3://mybucket/data.csv' CREDENTIALS = ( aws_key_id='access_key' aws_secret_key='secret_key' ) ENCRYPTION = ( master_key='master_key' ) FILE_FORMAT = ( field_delimiter = ',' record_delimiter = ' ' skip_header = '1' type = 'CSV' ) SIZE_LIMIT = 10 ---------- AST ------------ Copy( @@ -4105,11 +4105,11 @@ Copy( name: "mybucket", path: "/data.csv", credentials: { - "AWS_KEY_ID": "access_key", - "AWS_SECRET_KEY": "secret_key", + "aws_key_id": "access_key", + "aws_secret_key": "secret_key", }, encryption: { - "MASTER_KEY": "master_key", + "master_key": "master_key", }, }, dst: Table( diff --git a/query/tests/it/sql/statements/statement_common.rs b/query/tests/it/sql/statements/statement_common.rs index 889edd7798ec..43e6b1da7800 100644 --- a/query/tests/it/sql/statements/statement_common.rs +++ b/query/tests/it/sql/statements/statement_common.rs @@ -116,7 +116,7 @@ fn test_parse_uri_location() -> Result<()> { ]), BTreeMap::from([("master_key".into(), "test_master_key".into())]), UserStageInfo { - stage_name: "s3://test".to_string(), + stage_name: "s3://test/".to_string(), stage_type: StageType::External, stage_params: StageParams { storage: StorageParams::S3(StorageS3Config { From bd932cb11b1e251a743e06cdbb3a133f75775560 Mon Sep 17 00:00:00 2001 From: Xuanwo Date: Tue, 21 Jun 2022 17:01:00 +0800 Subject: [PATCH 27/40] Update common/ast/src/ast/statement.rs Co-authored-by: Andy Lok --- common/ast/src/ast/statement.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/common/ast/src/ast/statement.rs b/common/ast/src/ast/statement.rs index 416822e18419..def9a6b3dd8f 100644 --- a/common/ast/src/ast/statement.rs +++ b/common/ast/src/ast/statement.rs @@ -418,7 +418,7 @@ pub enum CopyTarget<'a> { } impl CopyTarget<'_> { - pub fn target(&self) -> &str { + pub fn target(&self) -> &'static str { match self { CopyTarget::Table(_, _, _) => "Table", CopyTarget::StageLocation { .. } => "StageLocation", From b1d08ca820add425fa6a9f0ba7a93b7cfd83947a Mon Sep 17 00:00:00 2001 From: Xuanwo Date: Tue, 21 Jun 2022 17:01:06 +0800 Subject: [PATCH 28/40] Update common/ast/src/parser/statement.rs Co-authored-by: Andy Lok --- common/ast/src/parser/statement.rs | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/common/ast/src/parser/statement.rs b/common/ast/src/parser/statement.rs index 0e417bdc71fa..0ace76b71c9d 100644 --- a/common/ast/src/parser/statement.rs +++ b/common/ast/src/parser/statement.rs @@ -1027,9 +1027,7 @@ pub fn copy_target(i: Input) -> IResult { // Parse input like `mytable` let table = |i| { map_res( - rule! { - #peroid_separated_idents_1_to_3 - }, + peroid_separated_idents_1_to_3, |(catalog, database, table)| Ok(CopyTarget::Table(catalog, database, table)), )(i) }; From 3feb8b31c1b45dd13cea80c5bd9cee32eeab1755 Mon Sep 17 00:00:00 2001 From: Xuanwo Date: Tue, 21 Jun 2022 17:11:45 +0800 Subject: [PATCH 29/40] Address comments Signed-off-by: Xuanwo --- common/ast/src/ast/statement.rs | 18 +++++++++++------- common/ast/src/parser/statement.rs | 8 +++++++- query/src/sql/planner/binder/copy.rs | 24 ++++++++++++++++++++---- 3 files changed, 38 insertions(+), 12 deletions(-) diff --git a/common/ast/src/ast/statement.rs b/common/ast/src/ast/statement.rs index def9a6b3dd8f..02392fb541d7 100644 --- a/common/ast/src/ast/statement.rs +++ b/common/ast/src/ast/statement.rs @@ -380,11 +380,11 @@ pub enum CopyTarget<'a> { /// Table can be used in `INTO` or `FROM`. /// /// While table used as `FROM`, it will be rewrite as `(SELECT * FROM table)` - Table( - Option>, - Option>, - Identifier<'a>, - ), + Table { + catalog: Option>, + database: Option>, + table: Identifier<'a>, + }, /// StageLocation (a.k.a internal and external stage) can be used /// in `INTO` or `FROM`. /// @@ -420,7 +420,7 @@ pub enum CopyTarget<'a> { impl CopyTarget<'_> { pub fn target(&self) -> &'static str { match self { - CopyTarget::Table(_, _, _) => "Table", + CopyTarget::Table { .. } => "Table", CopyTarget::StageLocation { .. } => "StageLocation", CopyTarget::UriLocation { .. } => "UriLocation", CopyTarget::Query(_) => "Query", @@ -732,7 +732,11 @@ impl Display for KillTarget { impl Display for CopyTarget<'_> { fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { match self { - CopyTarget::Table(catalog, database, table) => { + CopyTarget::Table { + catalog, + database, + table, + } => { if let Some(catalog) = catalog { write!( f, diff --git a/common/ast/src/parser/statement.rs b/common/ast/src/parser/statement.rs index 0ace76b71c9d..9d4b2e7980a8 100644 --- a/common/ast/src/parser/statement.rs +++ b/common/ast/src/parser/statement.rs @@ -1028,7 +1028,13 @@ pub fn copy_target(i: Input) -> IResult { let table = |i| { map_res( peroid_separated_idents_1_to_3, - |(catalog, database, table)| Ok(CopyTarget::Table(catalog, database, table)), + |(catalog, database, table)| { + Ok(CopyTarget::Table { + catalog, + database, + table, + }) + }, )(i) }; diff --git a/query/src/sql/planner/binder/copy.rs b/query/src/sql/planner/binder/copy.rs index 1735790b10fe..3effe4dcd929 100644 --- a/query/src/sql/planner/binder/copy.rs +++ b/query/src/sql/planner/binder/copy.rs @@ -47,7 +47,11 @@ impl<'a> Binder { match (&stmt.src, &stmt.dst) { ( CopyTarget::StageLocation { name, path }, - CopyTarget::Table(catalog, database, table), + CopyTarget::Table { + catalog, + database, + table, + }, ) => { let catalog_name = catalog .as_ref() @@ -78,7 +82,11 @@ impl<'a> Binder { credentials, encryption, }, - CopyTarget::Table(catalog, database, table), + CopyTarget::Table { + catalog, + database, + table, + }, ) => { let catalog_name = catalog .as_ref() @@ -105,7 +113,11 @@ impl<'a> Binder { .await } ( - CopyTarget::Table(catalog, database, table), + CopyTarget::Table { + catalog, + database, + table, + }, CopyTarget::StageLocation { name, path }, ) => { let catalog_name = catalog @@ -130,7 +142,11 @@ impl<'a> Binder { .await } ( - CopyTarget::Table(catalog, database, table), + CopyTarget::Table { + catalog, + database, + table, + }, CopyTarget::UriLocation { protocol, name, From fdf555edb84e882ccdc57e6fab54e4e8cefab2cc Mon Sep 17 00:00:00 2001 From: Xuanwo Date: Tue, 21 Jun 2022 17:29:18 +0800 Subject: [PATCH 30/40] Fix ast unit tests Signed-off-by: Xuanwo --- common/ast/tests/it/testdata/statement.txt | 50 +++++++++++----------- 1 file changed, 25 insertions(+), 25 deletions(-) diff --git a/common/ast/tests/it/testdata/statement.txt b/common/ast/tests/it/testdata/statement.txt index 6cd910be1ec4..3c759bbdeb7b 100644 --- a/common/ast/tests/it/testdata/statement.txt +++ b/common/ast/tests/it/testdata/statement.txt @@ -3922,15 +3922,15 @@ Copy( credentials: {}, encryption: {}, }, - dst: Table( - None, - None, - Identifier { + dst: Table { + catalog: None, + database: None, + table: Identifier { name: "mytable", quote: None, span: Ident(10..17), }, - ), + }, files: [], pattern: "", file_format: { @@ -3965,15 +3965,15 @@ Copy( name: "my_stage", path: "/", }, - dst: Table( - None, - None, - Identifier { + dst: Table { + catalog: None, + database: None, + table: Identifier { name: "mytable", quote: None, span: Ident(10..17), }, - ), + }, files: [], pattern: "", file_format: { @@ -4004,15 +4004,15 @@ COPY INTO 's3://mybucket/data.csv' FROM mytable FILE_FORMAT = ( field_delimiter ---------- AST ------------ Copy( CopyStmt { - src: Table( - None, - None, - Identifier { + src: Table { + catalog: None, + database: None, + table: Identifier { name: "mytable", quote: None, span: Ident(56..63), }, - ), + }, dst: UriLocation { protocol: "s3", name: "mybucket", @@ -4050,15 +4050,15 @@ COPY INTO @my_stage/ FROM mytable FILE_FORMAT = ( field_delimiter = ',' record_d ---------- AST ------------ Copy( CopyStmt { - src: Table( - None, - None, - Identifier { + src: Table { + catalog: None, + database: None, + table: Identifier { name: "mytable", quote: None, span: Ident(41..48), }, - ), + }, dst: StageLocation { name: "my_stage", path: "/", @@ -4112,15 +4112,15 @@ Copy( "master_key": "master_key", }, }, - dst: Table( - None, - None, - Identifier { + dst: Table { + catalog: None, + database: None, + table: Identifier { name: "mytable", quote: None, span: Ident(10..17), }, - ), + }, files: [], pattern: "", file_format: { From d4b18664f31168a37408dfb3de76f2f114cc24aa Mon Sep 17 00:00:00 2001 From: Xuanwo Date: Tue, 21 Jun 2022 18:41:38 +0800 Subject: [PATCH 31/40] Rename CopyTarget to CopyUnit Signed-off-by: Xuanwo --- common/ast/src/ast/statement.rs | 28 ++++++++++++++-------------- common/ast/src/parser/statement.rs | 12 ++++++------ query/src/sql/planner/binder/copy.rs | 24 ++++++++++++------------ 3 files changed, 32 insertions(+), 32 deletions(-) diff --git a/common/ast/src/ast/statement.rs b/common/ast/src/ast/statement.rs index 02392fb541d7..c172f3672bcb 100644 --- a/common/ast/src/ast/statement.rs +++ b/common/ast/src/ast/statement.rs @@ -180,8 +180,8 @@ pub enum ExplainKind { /// ``` #[derive(Debug, Clone, PartialEq)] pub struct CopyStmt<'a> { - pub src: CopyTarget<'a>, - pub dst: CopyTarget<'a>, + pub src: CopyUnit<'a>, + pub dst: CopyUnit<'a>, pub files: Vec, pub pattern: String, pub file_format: BTreeMap, @@ -374,9 +374,9 @@ pub enum DatabaseEngine { Github(String), } -/// CopyTarget is the target that can be used in `COPY INTO`. +/// CopyUnit is the unit that can be used in `COPY`. #[derive(Debug, Clone, PartialEq)] -pub enum CopyTarget<'a> { +pub enum CopyUnit<'a> { /// Table can be used in `INTO` or `FROM`. /// /// While table used as `FROM`, it will be rewrite as `(SELECT * FROM table)` @@ -417,13 +417,13 @@ pub enum CopyTarget<'a> { Query(Box>), } -impl CopyTarget<'_> { +impl CopyUnit<'_> { pub fn target(&self) -> &'static str { match self { - CopyTarget::Table { .. } => "Table", - CopyTarget::StageLocation { .. } => "StageLocation", - CopyTarget::UriLocation { .. } => "UriLocation", - CopyTarget::Query(_) => "Query", + CopyUnit::Table { .. } => "Table", + CopyUnit::StageLocation { .. } => "StageLocation", + CopyUnit::UriLocation { .. } => "UriLocation", + CopyUnit::Query(_) => "Query", } } } @@ -729,10 +729,10 @@ impl Display for KillTarget { } } -impl Display for CopyTarget<'_> { +impl Display for CopyUnit<'_> { fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { match self { - CopyTarget::Table { + CopyUnit::Table { catalog, database, table, @@ -749,10 +749,10 @@ impl Display for CopyTarget<'_> { write!(f, "{table}") } } - CopyTarget::StageLocation { name, path } => { + CopyUnit::StageLocation { name, path } => { write!(f, "@{name}{path}") } - CopyTarget::UriLocation { + CopyUnit::UriLocation { protocol, name, path, @@ -772,7 +772,7 @@ impl Display for CopyTarget<'_> { } Ok(()) } - CopyTarget::Query(query) => { + CopyUnit::Query(query) => { write!(f, "({query})") } } diff --git a/common/ast/src/parser/statement.rs b/common/ast/src/parser/statement.rs index 9d4b2e7980a8..bfab9dd44e73 100644 --- a/common/ast/src/parser/statement.rs +++ b/common/ast/src/parser/statement.rs @@ -1000,7 +1000,7 @@ pub fn kill_target(i: Input) -> IResult { /// # Notes /// /// It's required to parse stage location first. Or stage could be parsed as table. -pub fn copy_target(i: Input) -> IResult { +pub fn copy_target(i: Input) -> IResult { // Parse input like `@my_stage/path/to/dir` let stage_location = |i| { map_res( @@ -1010,12 +1010,12 @@ pub fn copy_target(i: Input) -> IResult { |location| { let parsed = location.splitn(2, '/').collect::>(); if parsed.len() == 1 { - Ok(CopyTarget::StageLocation { + Ok(CopyUnit::StageLocation { name: parsed[0].to_string(), path: "/".to_string(), }) } else { - Ok(CopyTarget::StageLocation { + Ok(CopyUnit::StageLocation { name: parsed[0].to_string(), path: format!("/{}", parsed[1]), }) @@ -1029,7 +1029,7 @@ pub fn copy_target(i: Input) -> IResult { map_res( peroid_separated_idents_1_to_3, |(catalog, database, table)| { - Ok(CopyTarget::Table { + Ok(CopyUnit::Table { catalog, database, table, @@ -1044,7 +1044,7 @@ pub fn copy_target(i: Input) -> IResult { rule! { #parenthesized_query }, - |query| Ok(CopyTarget::Query(Box::new(query))), + |query| Ok(CopyUnit::Query(Box::new(query))), )(i) }; @@ -1060,7 +1060,7 @@ pub fn copy_target(i: Input) -> IResult { let parsed = Url::parse(&location) .map_err(|_| ErrorKind::Other("Unexpected invalid url"))?; - Ok(CopyTarget::UriLocation { + Ok(CopyUnit::UriLocation { protocol: parsed.scheme().to_string(), name: parsed .host_str() diff --git a/query/src/sql/planner/binder/copy.rs b/query/src/sql/planner/binder/copy.rs index 3effe4dcd929..2f72d817cb69 100644 --- a/query/src/sql/planner/binder/copy.rs +++ b/query/src/sql/planner/binder/copy.rs @@ -16,7 +16,7 @@ use std::collections::BTreeMap; use std::str::FromStr; use common_ast::ast::CopyStmt; -use common_ast::ast::CopyTarget; +use common_ast::ast::CopyUnit; use common_ast::ast::Query; use common_ast::ast::Statement; use common_ast::parser::error::Backtrace; @@ -46,8 +46,8 @@ impl<'a> Binder { ) -> Result { match (&stmt.src, &stmt.dst) { ( - CopyTarget::StageLocation { name, path }, - CopyTarget::Table { + CopyUnit::StageLocation { name, path }, + CopyUnit::Table { catalog, database, table, @@ -75,14 +75,14 @@ impl<'a> Binder { .await } ( - CopyTarget::UriLocation { + CopyUnit::UriLocation { protocol, name, path, credentials, encryption, }, - CopyTarget::Table { + CopyUnit::Table { catalog, database, table, @@ -113,12 +113,12 @@ impl<'a> Binder { .await } ( - CopyTarget::Table { + CopyUnit::Table { catalog, database, table, }, - CopyTarget::StageLocation { name, path }, + CopyUnit::StageLocation { name, path }, ) => { let catalog_name = catalog .as_ref() @@ -142,12 +142,12 @@ impl<'a> Binder { .await } ( - CopyTarget::Table { + CopyUnit::Table { catalog, database, table, }, - CopyTarget::UriLocation { + CopyUnit::UriLocation { protocol, name, path, @@ -179,13 +179,13 @@ impl<'a> Binder { ) .await } - (CopyTarget::Query(query), CopyTarget::StageLocation { name, path }) => { + (CopyUnit::Query(query), CopyUnit::StageLocation { name, path }) => { self.bind_copy_from_query_into_stage(bind_context, stmt, query, name, path) .await } ( - CopyTarget::Query(query), - CopyTarget::UriLocation { + CopyUnit::Query(query), + CopyUnit::UriLocation { protocol, name, path, From 001861871a3152c4110e161ac26a36a532bdfc55 Mon Sep 17 00:00:00 2001 From: Xuanwo Date: Tue, 21 Jun 2022 18:51:38 +0800 Subject: [PATCH 32/40] Fix format Signed-off-by: Xuanwo --- query/src/interpreters/interpreter_copy_v2.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/query/src/interpreters/interpreter_copy_v2.rs b/query/src/interpreters/interpreter_copy_v2.rs index 551d375dedcd..17b64037c9fe 100644 --- a/query/src/interpreters/interpreter_copy_v2.rs +++ b/query/src/interpreters/interpreter_copy_v2.rs @@ -1,4 +1,3 @@ -use std::path::Path; // Copyright 2022 Datafuse Labs. // // Licensed under the Apache License, Version 2.0 (the "License"); @@ -12,6 +11,7 @@ use std::path::Path; // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. +use std::path::Path; use std::sync::Arc; use common_datablocks::DataBlock; From 4453cc73a379aa32bd9634f01c325dedd0267966 Mon Sep 17 00:00:00 2001 From: Xuanwo Date: Tue, 21 Jun 2022 18:54:19 +0800 Subject: [PATCH 33/40] Update common/ast/src/parser/statement.rs Co-authored-by: Andy Lok --- common/ast/src/parser/statement.rs | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/common/ast/src/parser/statement.rs b/common/ast/src/parser/statement.rs index bfab9dd44e73..5d1ad8ba8df4 100644 --- a/common/ast/src/parser/statement.rs +++ b/common/ast/src/parser/statement.rs @@ -1003,10 +1003,7 @@ pub fn kill_target(i: Input) -> IResult { pub fn copy_target(i: Input) -> IResult { // Parse input like `@my_stage/path/to/dir` let stage_location = |i| { - map_res( - rule! { - #at_string - }, + map(at_string, |location| { let parsed = location.splitn(2, '/').collect::>(); if parsed.len() == 1 { From c8e55105c3c254864caf792e802deef4140c9f9a Mon Sep 17 00:00:00 2001 From: Xuanwo Date: Tue, 21 Jun 2022 18:54:27 +0800 Subject: [PATCH 34/40] Update common/ast/src/parser/statement.rs Co-authored-by: Andy Lok --- common/ast/src/parser/statement.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/common/ast/src/parser/statement.rs b/common/ast/src/parser/statement.rs index 5d1ad8ba8df4..15f49c5e5616 100644 --- a/common/ast/src/parser/statement.rs +++ b/common/ast/src/parser/statement.rs @@ -1055,7 +1055,7 @@ pub fn copy_target(i: Input) -> IResult { }, |(location, credentials_opt, encryption_opt)| { let parsed = Url::parse(&location) - .map_err(|_| ErrorKind::Other("Unexpected invalid url"))?; + .map_err(|_| ErrorKind::Other("invalid url"))?; Ok(CopyUnit::UriLocation { protocol: parsed.scheme().to_string(), From f2e55b77d5c9546743bd73f155148d27bc971cbd Mon Sep 17 00:00:00 2001 From: Xuanwo Date: Tue, 21 Jun 2022 18:55:11 +0800 Subject: [PATCH 35/40] Update common/ast/src/parser/statement.rs Co-authored-by: Andy Lok --- common/ast/src/parser/statement.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/common/ast/src/parser/statement.rs b/common/ast/src/parser/statement.rs index 15f49c5e5616..ff7614e45171 100644 --- a/common/ast/src/parser/statement.rs +++ b/common/ast/src/parser/statement.rs @@ -1200,7 +1200,7 @@ pub fn options(i: Input) -> IResult> { })(i) }; - let u64_to_string = |i| map_res(literal_u64, |v| Ok(v.to_string()))(i); + let u64_to_string = |i| map(literal_u64, u64::to_string)(i); let ident_with_format = alt(( ident_to_string, From 22f47d8613e4b36b2ad6bcc076e7c94a8cfdf12c Mon Sep 17 00:00:00 2001 From: Xuanwo Date: Tue, 21 Jun 2022 18:55:20 +0800 Subject: [PATCH 36/40] Update common/ast/src/parser/statement.rs Co-authored-by: Andy Lok --- common/ast/src/parser/statement.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/common/ast/src/parser/statement.rs b/common/ast/src/parser/statement.rs index ff7614e45171..eb41a48c3cf5 100644 --- a/common/ast/src/parser/statement.rs +++ b/common/ast/src/parser/statement.rs @@ -1023,7 +1023,7 @@ pub fn copy_target(i: Input) -> IResult { // Parse input like `mytable` let table = |i| { - map_res( + map( peroid_separated_idents_1_to_3, |(catalog, database, table)| { Ok(CopyUnit::Table { From cc301094edeb992af123315fb756e9d9e0b911f4 Mon Sep 17 00:00:00 2001 From: Xuanwo Date: Tue, 21 Jun 2022 18:56:32 +0800 Subject: [PATCH 37/40] Remove not needed rule Signed-off-by: Xuanwo --- common/ast/src/parser/statement.rs | 42 +++++++++++++----------------- 1 file changed, 18 insertions(+), 24 deletions(-) diff --git a/common/ast/src/parser/statement.rs b/common/ast/src/parser/statement.rs index eb41a48c3cf5..333c8bb1e7f4 100644 --- a/common/ast/src/parser/statement.rs +++ b/common/ast/src/parser/statement.rs @@ -1003,22 +1003,20 @@ pub fn kill_target(i: Input) -> IResult { pub fn copy_target(i: Input) -> IResult { // Parse input like `@my_stage/path/to/dir` let stage_location = |i| { - map(at_string, - |location| { - let parsed = location.splitn(2, '/').collect::>(); - if parsed.len() == 1 { - Ok(CopyUnit::StageLocation { - name: parsed[0].to_string(), - path: "/".to_string(), - }) - } else { - Ok(CopyUnit::StageLocation { - name: parsed[0].to_string(), - path: format!("/{}", parsed[1]), - }) - } - }, - )(i) + map(at_string, |location| { + let parsed = location.splitn(2, '/').collect::>(); + if parsed.len() == 1 { + Ok(CopyUnit::StageLocation { + name: parsed[0].to_string(), + path: "/".to_string(), + }) + } else { + Ok(CopyUnit::StageLocation { + name: parsed[0].to_string(), + path: format!("/{}", parsed[1]), + }) + } + })(i) }; // Parse input like `mytable` @@ -1037,12 +1035,9 @@ pub fn copy_target(i: Input) -> IResult { // Parse input like `( SELECT * from mytable )` let query = |i| { - map_res( - rule! { - #parenthesized_query - }, - |query| Ok(CopyUnit::Query(Box::new(query))), - )(i) + map_res(parenthesized_query, |query| { + Ok(CopyUnit::Query(Box::new(query))) + })(i) }; // Parse input like `'s3://example/path/to/dir' CREDENTIALS = (AWS_ACCESS_ID="admin" AWS_SECRET_KEY="admin")` @@ -1054,8 +1049,7 @@ pub fn copy_target(i: Input) -> IResult { ~ (ENCRYPTION ~ "=" ~ #options)? }, |(location, credentials_opt, encryption_opt)| { - let parsed = Url::parse(&location) - .map_err(|_| ErrorKind::Other("invalid url"))?; + let parsed = Url::parse(&location).map_err(|_| ErrorKind::Other("invalid url"))?; Ok(CopyUnit::UriLocation { protocol: parsed.scheme().to_string(), From a16912461d0a1325438f19a7b6a6a51d5f6ad421 Mon Sep 17 00:00:00 2001 From: Xuanwo Date: Tue, 21 Jun 2022 19:12:38 +0800 Subject: [PATCH 38/40] Address comments Signed-off-by: Xuanwo --- common/ast/src/parser/statement.rs | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/common/ast/src/parser/statement.rs b/common/ast/src/parser/statement.rs index 333c8bb1e7f4..711960312fc5 100644 --- a/common/ast/src/parser/statement.rs +++ b/common/ast/src/parser/statement.rs @@ -1003,7 +1003,7 @@ pub fn kill_target(i: Input) -> IResult { pub fn copy_target(i: Input) -> IResult { // Parse input like `@my_stage/path/to/dir` let stage_location = |i| { - map(at_string, |location| { + map_res(at_string, |location| { let parsed = location.splitn(2, '/').collect::>(); if parsed.len() == 1 { Ok(CopyUnit::StageLocation { @@ -1021,7 +1021,7 @@ pub fn copy_target(i: Input) -> IResult { // Parse input like `mytable` let table = |i| { - map( + map_res( peroid_separated_idents_1_to_3, |(catalog, database, table)| { Ok(CopyUnit::Table { @@ -1049,13 +1049,14 @@ pub fn copy_target(i: Input) -> IResult { ~ (ENCRYPTION ~ "=" ~ #options)? }, |(location, credentials_opt, encryption_opt)| { - let parsed = Url::parse(&location).map_err(|_| ErrorKind::Other("invalid url"))?; + let parsed = + Url::parse(&location).map_err(|_| ErrorKind::Other("invalid uri location"))?; Ok(CopyUnit::UriLocation { protocol: parsed.scheme().to_string(), name: parsed .host_str() - .ok_or(ErrorKind::Other("Unexpected invalid url for name missing"))? + .ok_or(ErrorKind::Other("invalid uri location"))? .to_string(), path: if parsed.path().is_empty() { "/".to_string() @@ -1194,7 +1195,7 @@ pub fn options(i: Input) -> IResult> { })(i) }; - let u64_to_string = |i| map(literal_u64, u64::to_string)(i); + let u64_to_string = |i| map_res(literal_u64, |v| Ok(v.to_string()))(i); let ident_with_format = alt(( ident_to_string, From cc66b1ce579aabb1e70edab3dd13c4fced3191dd Mon Sep 17 00:00:00 2001 From: Xuanwo Date: Tue, 21 Jun 2022 19:49:25 +0800 Subject: [PATCH 39/40] Remove not needed map_res Signed-off-by: Xuanwo --- common/ast/src/parser/statement.rs | 26 ++++++++++++-------------- 1 file changed, 12 insertions(+), 14 deletions(-) diff --git a/common/ast/src/parser/statement.rs b/common/ast/src/parser/statement.rs index 711960312fc5..4094b3258646 100644 --- a/common/ast/src/parser/statement.rs +++ b/common/ast/src/parser/statement.rs @@ -1003,40 +1003,38 @@ pub fn kill_target(i: Input) -> IResult { pub fn copy_target(i: Input) -> IResult { // Parse input like `@my_stage/path/to/dir` let stage_location = |i| { - map_res(at_string, |location| { + map(at_string, |location| { let parsed = location.splitn(2, '/').collect::>(); if parsed.len() == 1 { - Ok(CopyUnit::StageLocation { + CopyUnit::StageLocation { name: parsed[0].to_string(), path: "/".to_string(), - }) + } } else { - Ok(CopyUnit::StageLocation { + CopyUnit::StageLocation { name: parsed[0].to_string(), path: format!("/{}", parsed[1]), - }) + } } })(i) }; // Parse input like `mytable` let table = |i| { - map_res( + map( peroid_separated_idents_1_to_3, - |(catalog, database, table)| { - Ok(CopyUnit::Table { - catalog, - database, - table, - }) + |(catalog, database, table)| CopyUnit::Table { + catalog, + database, + table, }, )(i) }; // Parse input like `( SELECT * from mytable )` let query = |i| { - map_res(parenthesized_query, |query| { - Ok(CopyUnit::Query(Box::new(query))) + map(parenthesized_query, |query| { + CopyUnit::Query(Box::new(query)) })(i) }; From 37da1b1e0663804fcc6dca4d08cbef0d6e68db65 Mon Sep 17 00:00:00 2001 From: Xuanwo Date: Tue, 21 Jun 2022 19:54:19 +0800 Subject: [PATCH 40/40] Return u64_string directly Signed-off-by: Xuanwo --- common/ast/src/parser/statement.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/common/ast/src/parser/statement.rs b/common/ast/src/parser/statement.rs index 4094b3258646..a4cec1c12e6e 100644 --- a/common/ast/src/parser/statement.rs +++ b/common/ast/src/parser/statement.rs @@ -1193,7 +1193,7 @@ pub fn options(i: Input) -> IResult> { })(i) }; - let u64_to_string = |i| map_res(literal_u64, |v| Ok(v.to_string()))(i); + let u64_to_string = |i| map(literal_u64, |v| v.to_string())(i); let ident_with_format = alt(( ident_to_string,