Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

refactor(query/planner): Migrate COPY to new planner #6074

Merged
merged 47 commits into from
Jun 21, 2022
Merged
Show file tree
Hide file tree
Changes from 46 commits
Commits
Show all changes
47 commits
Select commit Hold shift + click to select a range
a6e80ce
Implement copy into statement
Xuanwo Jun 17, 2022
cc2acaf
Refactor CopyStatement
Xuanwo Jun 17, 2022
b02d6c2
Fix support for catalog
Xuanwo Jun 17, 2022
313fb57
Implement bind statement for copy
Xuanwo Jun 17, 2022
eb7963e
Add copy plan v2
Xuanwo Jun 18, 2022
074470c
Add debug log for plan
Xuanwo Jun 20, 2022
e5638d5
Rename query to from
Xuanwo Jun 20, 2022
098a68a
Implement interpreters
Xuanwo Jun 20, 2022
ced3b8e
Make cargo check happy
Xuanwo Jun 20, 2022
06bc4c0
Make clippy happy
Xuanwo Jun 20, 2022
eaad2fa
Merge remote-tracking branch 'origin/main' into migrate-copy-into-to-…
Xuanwo Jun 20, 2022
af4fd8c
Resolve merge conflicts
Xuanwo Jun 20, 2022
cfb5987
Add unit test for ast
Xuanwo Jun 20, 2022
7d5117a
Move UserStageInfo into box
Xuanwo Jun 20, 2022
56b4344
Make clippy happy
Xuanwo Jun 20, 2022
3ed5c5b
Merge remote-tracking branch 'origin/main' into migrate-copy-into-to-…
Xuanwo Jun 20, 2022
9c534c7
Merge remote-tracking branch 'origin/main' into migrate-copy-into-to-…
Xuanwo Jun 20, 2022
2f142df
Extend support for CopyTarget
Xuanwo Jun 21, 2022
397722e
Add test for new parser
Xuanwo Jun 21, 2022
6d59753
Refactor binder
Xuanwo Jun 21, 2022
7694da6
Make check happy
Xuanwo Jun 21, 2022
9294a9e
Merge remote-tracking branch 'origin/main' into migrate-copy-into-to-…
Xuanwo Jun 21, 2022
957da6a
Enable stateful tests
Xuanwo Jun 21, 2022
0f15f8b
Make clippy happy
Xuanwo Jun 21, 2022
f0057d8
Make clippy happy
Xuanwo Jun 21, 2022
e8c2151
Fix unit tests
Xuanwo Jun 21, 2022
058136b
Enable planner
Xuanwo Jun 21, 2022
c1cbaf4
Fix copy plan not display
Xuanwo Jun 21, 2022
ec4214d
Fix unit test
Xuanwo Jun 21, 2022
45b43b0
Fix unit test
Xuanwo Jun 21, 2022
7a0ef09
Merge remote-tracking branch 'origin/main' into migrate-copy-into-to-…
Xuanwo Jun 21, 2022
bd932cb
Update common/ast/src/ast/statement.rs
Xuanwo Jun 21, 2022
b1d08ca
Update common/ast/src/parser/statement.rs
Xuanwo Jun 21, 2022
3feb8b3
Address comments
Xuanwo Jun 21, 2022
fdf555e
Fix ast unit tests
Xuanwo Jun 21, 2022
d4b1866
Rename CopyTarget to CopyUnit
Xuanwo Jun 21, 2022
0018618
Fix format
Xuanwo Jun 21, 2022
4453cc7
Update common/ast/src/parser/statement.rs
Xuanwo Jun 21, 2022
c8e5510
Update common/ast/src/parser/statement.rs
Xuanwo Jun 21, 2022
f2e55b7
Update common/ast/src/parser/statement.rs
Xuanwo Jun 21, 2022
22f47d8
Update common/ast/src/parser/statement.rs
Xuanwo Jun 21, 2022
cc30109
Remove not needed rule
Xuanwo Jun 21, 2022
a169124
Address comments
Xuanwo Jun 21, 2022
cc66b1c
Remove not needed map_res
Xuanwo Jun 21, 2022
37da1b1
Return u64_string directly
Xuanwo Jun 21, 2022
cb58994
Merge remote-tracking branch 'origin/main' into migrate-copy-into-to-…
Xuanwo Jun 21, 2022
336917a
Merge branch 'main' into migrate-copy-into-to-new-planner
mergify[bot] Jun 21, 2022
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions common/ast/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ nom-rule = "0.3.0"
pratt = "0.3.0"
serde = { version = "1.0.136", features = ["derive"] }
thiserror = "1.0.30"
url = "2.2.2"

[dev-dependencies]
common-base = { path = "../base" }
Expand Down
31 changes: 30 additions & 1 deletion common/ast/src/ast/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ fn write_period_separated_list(
Ok(())
}

/// Write input items into `a, b, c`
fn write_comma_separated_list(
f: &mut Formatter<'_>,
items: impl IntoIterator<Item = impl Display>,
Expand All @@ -66,7 +67,21 @@ fn write_comma_separated_list(
if i > 0 {
write!(f, ", ")?;
}
write!(f, "{}", item)?;
write!(f, "{item}")?;
}
Ok(())
}

/// Write input items into `'a', 'b', 'c'`
fn write_quoted_comma_separated_list(
f: &mut Formatter<'_>,
items: impl IntoIterator<Item = impl Display>,
) -> std::fmt::Result {
for (i, item) in items.into_iter().enumerate() {
if i > 0 {
write!(f, ", ")?;
}
write!(f, "'{item}'")?;
}
Ok(())
}
Expand All @@ -83,3 +98,17 @@ fn write_space_seperated_list(
}
Ok(())
}

/// Write input map items into `field_a=x field_b=y`
fn write_space_seperated_map(
Xuanwo marked this conversation as resolved.
Show resolved Hide resolved
f: &mut Formatter<'_>,
items: impl IntoIterator<Item = (impl Display, impl Display)>,
) -> std::fmt::Result {
for (i, (k, v)) in items.into_iter().enumerate() {
if i > 0 {
write!(f, " ")?;
}
write!(f, "{k}='{v}'")?;
}
Ok(())
}
158 changes: 158 additions & 0 deletions common/ast/src/ast/statement.rs
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,8 @@ use crate::ast::expr::Literal;
use crate::ast::expr::TypeName;
use crate::ast::write_comma_separated_list;
use crate::ast::write_period_separated_list;
use crate::ast::write_quoted_comma_separated_list;
use crate::ast::write_space_seperated_map;
use crate::ast::Identifier;
use crate::ast::Query;
use crate::parser::token::Token;
Expand All @@ -53,6 +55,8 @@ pub enum Statement<'a> {
overwrite: bool,
},

Copy(CopyStmt<'a>),

ShowSettings,
ShowProcessList,
ShowMetrics,
Expand Down Expand Up @@ -167,6 +171,25 @@ pub enum ExplainKind {
Pipeline,
}

/// CopyStmt is the parsed statement of `COPY`.
///
/// ## Examples
///
/// ```sql
/// COPY INTO table from s3://bucket/path/to/x.csv
/// ```
#[derive(Debug, Clone, PartialEq)]
pub struct CopyStmt<'a> {
pub src: CopyUnit<'a>,
pub dst: CopyUnit<'a>,
pub files: Vec<String>,
Xuanwo marked this conversation as resolved.
Show resolved Hide resolved
pub pattern: String,
pub file_format: BTreeMap<String, String>,
/// TODO(xuanwo): parse into validation_mode directly.
pub validation_mode: String,
pub size_limit: usize,
}

#[derive(Debug, Clone, PartialEq)] // Databases
pub struct ShowDatabasesStmt<'a> {
pub limit: Option<ShowLimit<'a>>,
Expand Down Expand Up @@ -351,6 +374,60 @@ pub enum DatabaseEngine {
Github(String),
}

/// CopyUnit is the unit that can be used in `COPY`.
#[derive(Debug, Clone, PartialEq)]
pub enum CopyUnit<'a> {
/// Table can be used in `INTO` or `FROM`.
///
/// While table used as `FROM`, it will be rewrite as `(SELECT * FROM table)`
Table {
catalog: Option<Identifier<'a>>,
database: Option<Identifier<'a>>,
table: Identifier<'a>,
},
/// StageLocation (a.k.a internal and external stage) can be used
/// in `INTO` or `FROM`.
///
/// For examples:
///
/// - internal stage: `@internal_stage/path/to/dir/`
/// - external stage: `@s3_external_stage/path/to/dir/`
StageLocation {
/// The name of the stage.
name: String,
path: String,
},
/// UriLocation (a.k.a external location) can be used in `INTO` or `FROM`.
///
/// For examples: `'s3://example/path/to/dir' CREDENTIALS = (AWS_ACCESS_ID="admin" AWS_SECRET_KEY="admin")`
///
/// TODO(xuanwo): Add endpoint_url support.
/// TODO(xuanwo): We can check if we support this protocol during parsing.
/// TODO(xuanwo): Maybe we can introduce more strict (friendly) report for credentials and encryption, like parsed into StorageConfig?
UriLocation {
protocol: String,
name: String,
path: String,
credentials: BTreeMap<String, String>,
encryption: BTreeMap<String, String>,
},
/// Query can only be used as `FROM`.
///
/// For example:`(SELECT field_a,field_b FROM table)`
Query(Box<Query<'a>>),
}

impl CopyUnit<'_> {
pub fn target(&self) -> &'static str {
match self {
CopyUnit::Table { .. } => "Table",
CopyUnit::StageLocation { .. } => "StageLocation",
CopyUnit::UriLocation { .. } => "UriLocation",
CopyUnit::Query(_) => "Query",
}
}
}

#[derive(Debug, Clone, PartialEq)]
pub struct CreateViewStmt<'a> {
pub if_not_exists: bool,
Expand Down Expand Up @@ -652,6 +729,56 @@ impl Display for KillTarget {
}
}

impl Display for CopyUnit<'_> {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
match self {
CopyUnit::Table {
catalog,
database,
table,
} => {
if let Some(catalog) = catalog {
write!(
f,
"{catalog}.{}.{table}",
database.as_ref().expect("database must be valid")
)
} else if let Some(database) = database {
write!(f, "{database}.{table}")
} else {
write!(f, "{table}")
}
}
CopyUnit::StageLocation { name, path } => {
write!(f, "@{name}{path}")
}
CopyUnit::UriLocation {
protocol,
name,
path,
credentials,
encryption,
} => {
write!(f, "'{protocol}://{name}{path}'")?;
if !credentials.is_empty() {
write!(f, " CREDENTIALS = ( ")?;
write_space_seperated_map(f, credentials)?;
write!(f, " )")?;
}
if !encryption.is_empty() {
write!(f, " ENCRYPTION = ( ")?;
write_space_seperated_map(f, encryption)?;
write!(f, " )")?;
}
Ok(())
}
CopyUnit::Query(query) => {
write!(f, "({query})")
}
}
}
}

impl Display for RoleOption {
fn fmt(&self, f: &mut Formatter) -> std::fmt::Result {
match self {
Expand Down Expand Up @@ -717,6 +844,37 @@ impl<'a> Display for Statement<'a> {
InsertSource::Select { query } => write!(f, " {query}")?,
}
}
Statement::Copy(stmt) => {
write!(f, "COPY")?;
write!(f, " INTO {}", stmt.dst)?;
write!(f, " FROM {}", stmt.src)?;

if !stmt.file_format.is_empty() {
write!(f, " FILE_FORMAT = (")?;
for (k, v) in stmt.file_format.iter() {
write!(f, " {} = '{}'", k, v)?;
}
write!(f, " )")?;
}

if !stmt.files.is_empty() {
write!(f, " FILES = (")?;
write_quoted_comma_separated_list(f, &stmt.files)?;
write!(f, " )")?;
}

if !stmt.pattern.is_empty() {
write!(f, " PATTERN = '{}'", stmt.pattern)?;
}

if stmt.size_limit != 0 {
write!(f, " SIZE_LIMIT = {}", stmt.size_limit)?;
}

if !stmt.validation_mode.is_empty() {
write!(f, "VALIDATION_MODE = {}", stmt.validation_mode)?;
}
}
Statement::ShowSettings => {
write!(f, "SHOW SETTINGS")?;
}
Expand Down
Loading