Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: hilbert clustering #16296

Merged
merged 17 commits into from
Sep 2, 2024
15 changes: 12 additions & 3 deletions src/query/ast/src/ast/format/syntax/ddl.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ use crate::ast::AddColumnOption;
use crate::ast::AlterTableAction;
use crate::ast::AlterTableStmt;
use crate::ast::AlterViewStmt;
use crate::ast::ClusterType;
use crate::ast::CreateDictionaryStmt;
use crate::ast::CreateOption;
use crate::ast::CreateStreamStmt;
Expand Down Expand Up @@ -79,11 +80,15 @@ pub(crate) fn pretty_create_table(stmt: CreateTableStmt) -> RcDoc<'static> {
} else {
RcDoc::nil()
})
.append(if !stmt.cluster_by.is_empty() {
.append(if let Some(cluster_by) = stmt.cluster_by {
RcDoc::line()
.append(RcDoc::text("CLUSTER BY "))
.append(match cluster_by.cluster_type {
ClusterType::Linear => RcDoc::text("LINEAR"),
ClusterType::Hilbert => RcDoc::text("HILBERT"),
})
.append(parenthesized(
interweave_comma(stmt.cluster_by.into_iter().map(pretty_expr)).group(),
interweave_comma(cluster_by.cluster_exprs.into_iter().map(pretty_expr)).group(),
))
} else {
RcDoc::nil()
Expand Down Expand Up @@ -209,8 +214,12 @@ pub(crate) fn pretty_alter_table_action(action: AlterTableAction) -> RcDoc<'stat
.append(RcDoc::text(column.to_string())),
AlterTableAction::AlterTableClusterKey { cluster_by } => RcDoc::line()
.append(RcDoc::text("CLUSTER BY "))
.append(match cluster_by.cluster_type {
ClusterType::Linear => RcDoc::text("LINEAR"),
ClusterType::Hilbert => RcDoc::text("HILBERT"),
})
.append(parenthesized(
interweave_comma(cluster_by.into_iter().map(pretty_expr)).group(),
interweave_comma(cluster_by.cluster_exprs.into_iter().map(pretty_expr)).group(),
)),
AlterTableAction::DropTableClusterKey => {
RcDoc::line().append(RcDoc::text("DROP CLUSTER KEY"))
Expand Down
11 changes: 4 additions & 7 deletions src/query/ast/src/ast/statements/dynamic_table.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,11 @@ use std::fmt::Formatter;
use derive_visitor::Drive;
use derive_visitor::DriveMut;

use crate::ast::write_comma_separated_list;
use crate::ast::write_dot_separated_list;
use crate::ast::write_space_separated_string_map;
use crate::ast::ClusterOption;
use crate::ast::CreateOption;
use crate::ast::CreateTableSource;
use crate::ast::Expr;
use crate::ast::Identifier;
use crate::ast::Query;
use crate::ast::WarehouseOptions;
Expand Down Expand Up @@ -98,7 +97,7 @@ pub struct CreateDynamicTableStmt {
pub database: Option<Identifier>,
pub table: Identifier,
pub source: Option<CreateTableSource>,
pub cluster_by: Vec<Expr>,
pub cluster_by: Option<ClusterOption>,

pub target_lag: TargetLag,
pub warehouse_opts: WarehouseOptions,
Expand Down Expand Up @@ -134,10 +133,8 @@ impl Display for CreateDynamicTableStmt {
write!(f, " {source}")?;
}

if !self.cluster_by.is_empty() {
write!(f, " CLUSTER BY (")?;
write_comma_separated_list(f, &self.cluster_by)?;
write!(f, ")")?
if let Some(cluster_by) = &self.cluster_by {
write!(f, " {cluster_by}")?;
}

write!(f, " TARGET_LAG = {}", self.target_lag)?;
Expand Down
54 changes: 45 additions & 9 deletions src/query/ast/src/ast/statements/table.rs
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,46 @@ impl Display for ShowDropTablesStmt {
}
}

#[derive(Debug, Clone, PartialEq, Drive, DriveMut)]
pub enum ClusterType {
Linear,
Hilbert,
}

impl Display for ClusterType {
fn fmt(&self, f: &mut Formatter) -> std::fmt::Result {
match self {
ClusterType::Linear => write!(f, "LINEAR"),
ClusterType::Hilbert => write!(f, "HILBERT"),
}
}
}

impl std::str::FromStr for ClusterType {
type Err = ();
fn from_str(s: &str) -> Result<Self, Self::Err> {
match s.to_lowercase().as_str() {
"linear" => Ok(ClusterType::Linear),
"hilbert" => Ok(ClusterType::Hilbert),
_ => Err(()),
}
}
}

#[derive(Debug, Clone, PartialEq, Drive, DriveMut)]
pub struct ClusterOption {
pub cluster_type: ClusterType,
pub cluster_exprs: Vec<Expr>,
}

impl Display for ClusterOption {
fn fmt(&self, f: &mut Formatter) -> std::fmt::Result {
write!(f, "CLUSTER BY {}(", self.cluster_type)?;
write_comma_separated_list(f, &self.cluster_exprs)?;
write!(f, ")")
}
}

#[derive(Debug, Clone, PartialEq, Drive, DriveMut)]
pub struct CreateTableStmt {
pub create_option: CreateOption,
Expand All @@ -137,7 +177,7 @@ pub struct CreateTableStmt {
pub source: Option<CreateTableSource>,
pub engine: Option<Engine>,
pub uri_location: Option<UriLocation>,
pub cluster_by: Vec<Expr>,
pub cluster_by: Option<ClusterOption>,
pub table_options: BTreeMap<String, String>,
pub as_query: Option<Box<Query>>,
pub table_type: TableType,
Expand Down Expand Up @@ -185,10 +225,8 @@ impl Display for CreateTableStmt {
write!(f, " {uri_location}")?;
}

if !self.cluster_by.is_empty() {
write!(f, " CLUSTER BY (")?;
write_comma_separated_list(f, &self.cluster_by)?;
write!(f, ")")?
if let Some(cluster_by) = &self.cluster_by {
write!(f, " {cluster_by}")?;
}

// Format table options
Expand Down Expand Up @@ -374,7 +412,7 @@ pub enum AlterTableAction {
column: Identifier,
},
AlterTableClusterKey {
cluster_by: Vec<Expr>,
cluster_by: ClusterOption,
},
DropTableClusterKey,
ReclusterTable {
Expand Down Expand Up @@ -420,9 +458,7 @@ impl Display for AlterTableAction {
write!(f, "DROP COLUMN {column}")?;
}
AlterTableAction::AlterTableClusterKey { cluster_by } => {
write!(f, "CLUSTER BY (")?;
write_comma_separated_list(f, cluster_by)?;
write!(f, ")")?;
write!(f, " {cluster_by}")?;
}
AlterTableAction::DropTableClusterKey => {
write!(f, "DROP CLUSTER KEY")?;
Expand Down
12 changes: 8 additions & 4 deletions src/query/ast/src/parser/dynamic_table.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@ use nom::branch::permutation;
use nom::combinator::map;
use nom::combinator::value;

use crate::ast::ClusterOption;
use crate::ast::ClusterType;
use crate::ast::CreateDynamicTableStmt;
use crate::ast::InitializeMode;
use crate::ast::RefreshMode;
Expand All @@ -30,6 +32,7 @@ use crate::parser::common::IResult;
use crate::parser::expr::expr;
use crate::parser::expr::literal_u64;
use crate::parser::query::query;
use crate::parser::statement::cluster_type;
use crate::parser::statement::create_table_source;
use crate::parser::statement::parse_create_option;
use crate::parser::statement::table_option;
Expand Down Expand Up @@ -58,7 +61,7 @@ fn create_dynamic_table(i: Input) -> IResult<Statement> {
CREATE ~ ( OR ~ ^REPLACE )? ~ TRANSIENT? ~ DYNAMIC ~ TABLE ~ ( IF ~ ^NOT ~ ^EXISTS )?
~ #dot_separated_idents_1_to_3
~ #create_table_source?
~ ( CLUSTER ~ ^BY ~ ^"(" ~ ^#comma_separated_list1(expr) ~ ^")" )?
~ ( CLUSTER ~ ^BY ~ ( #cluster_type )? ~ ^"(" ~ ^#comma_separated_list1(expr) ~ ^")" )?
~ #dynamic_table_options
~ (#table_option)?
~ (AS ~ ^#query)
Expand Down Expand Up @@ -86,9 +89,10 @@ fn create_dynamic_table(i: Input) -> IResult<Statement> {
database,
table,
source,
cluster_by: opt_cluster_by
.map(|(_, _, _, exprs, _)| exprs)
.unwrap_or_default(),
cluster_by: opt_cluster_by.map(|(_, _, typ, _, cluster_exprs, _)| ClusterOption {
cluster_type: typ.unwrap_or(ClusterType::Linear),
cluster_exprs,
}),
target_lag,
warehouse_opts,
refresh_mode: refresh_mode_opt.unwrap_or(RefreshMode::Auto),
Expand Down
25 changes: 19 additions & 6 deletions src/query/ast/src/parser/statement.rs
Original file line number Diff line number Diff line change
Expand Up @@ -695,7 +695,7 @@ pub fn statement_body(i: Input) -> IResult<Statement> {
~ #create_table_source?
~ ( #engine )?
~ ( #uri_location )?
~ ( CLUSTER ~ ^BY ~ ^"(" ~ ^#comma_separated_list1(expr) ~ ^")" )?
~ ( CLUSTER ~ ^BY ~ ( #cluster_type )? ~ ^"(" ~ ^#comma_separated_list1(expr) ~ ^")" )?
~ ( #table_option )?
~ ( AS ~ ^#query )?
},
Expand Down Expand Up @@ -729,9 +729,10 @@ pub fn statement_body(i: Input) -> IResult<Statement> {
source,
engine,
uri_location,
cluster_by: opt_cluster_by
.map(|(_, _, _, exprs, _)| exprs)
.unwrap_or_default(),
cluster_by: opt_cluster_by.map(|(_, _, typ, _, exprs, _)| ClusterOption {
cluster_type: typ.unwrap_or(ClusterType::Linear),
cluster_exprs: exprs,
}),
table_options: opt_table_options.unwrap_or_default(),
as_query: opt_as_query.map(|(_, query)| Box::new(query)),
table_type,
Expand Down Expand Up @@ -3456,9 +3457,14 @@ pub fn alter_table_action(i: Input) -> IResult<AlterTableAction> {
);
let alter_table_cluster_key = map(
rule! {
CLUSTER ~ ^BY ~ ^"(" ~ ^#comma_separated_list1(expr) ~ ^")"
CLUSTER ~ ^BY ~ ( #cluster_type )? ~ ^"(" ~ ^#comma_separated_list1(expr) ~ ^")"
},
|(_, _, typ, _, cluster_exprs, _)| AlterTableAction::AlterTableClusterKey {
cluster_by: ClusterOption {
cluster_type: typ.unwrap_or(ClusterType::Linear),
cluster_exprs,
},
},
|(_, _, _, cluster_by, _)| AlterTableAction::AlterTableClusterKey { cluster_by },
);

let drop_table_cluster_key = map(
Expand Down Expand Up @@ -3898,6 +3904,13 @@ pub fn switch(i: Input) -> IResult<bool> {
))(i)
}

pub fn cluster_type(i: Input) -> IResult<ClusterType> {
alt((
value(ClusterType::Linear, rule! { LINEAR }),
value(ClusterType::Hilbert, rule! { HILBERT }),
))(i)
}

pub fn limit_where(i: Input) -> IResult<ShowLimit> {
map(
rule! {
Expand Down
4 changes: 4 additions & 0 deletions src/query/ast/src/parser/token.rs
Original file line number Diff line number Diff line change
Expand Up @@ -656,6 +656,8 @@ pub enum TokenKind {
HAVING,
#[token("HIGH", ignore(ascii_case))]
HIGH,
#[token("HILBERT", ignore(ascii_case))]
HILBERT,
#[token("HISTORY", ignore(ascii_case))]
HISTORY,
#[token("HIVE", ignore(ascii_case))]
Expand Down Expand Up @@ -728,6 +730,8 @@ pub enum TokenKind {
KILL,
#[token("LATERAL", ignore(ascii_case))]
LATERAL,
#[token("LINEAR", ignore(ascii_case))]
LINEAR,
#[token("LOCATION_PREFIX", ignore(ascii_case))]
LOCATION_PREFIX,
#[token("LOCKS", ignore(ascii_case))]
Expand Down
Loading
Loading