diff --git a/src/query/ast/src/ast/expr.rs b/src/query/ast/src/ast/expr.rs index 57a3d874f1e7..a845806671fa 100644 --- a/src/query/ast/src/ast/expr.rs +++ b/src/query/ast/src/ast/expr.rs @@ -789,6 +789,23 @@ pub enum Literal { Null, } +impl Literal { + pub fn as_double(&self) -> Result { + match self { + Literal::UInt64(val) => Ok(*val as f64), + Literal::Float64(val) => Ok(*val), + Literal::Decimal256 { value, scale, .. } => { + let div = 10_f64.powi(*scale as i32); + Ok(value.as_f64() / div) + } + _ => Err(ParseError( + None, + format!("Cannot convert {:?} to double", self), + )), + } + } +} + impl Display for Literal { fn fmt(&self, f: &mut Formatter) -> std::fmt::Result { match self { diff --git a/src/query/ast/src/ast/format/ast_format.rs b/src/query/ast/src/ast/format/ast_format.rs index a6350fff2540..d73089791ae8 100644 --- a/src/query/ast/src/ast/format/ast_format.rs +++ b/src/query/ast/src/ast/format/ast_format.rs @@ -3327,6 +3327,7 @@ impl<'ast> Visitor<'ast> for AstFormatVisitor { consume, pivot, unpivot, + sample, } => { let mut name = String::new(); name.push_str("TableIdentifier "); @@ -3354,6 +3355,11 @@ impl<'ast> Visitor<'ast> for AstFormatVisitor { name.push_str(&unpivot.to_string()); } + if let Some(sample) = sample { + name.push(' '); + name.push_str(&sample.to_string()); + } + let mut children = Vec::new(); if let Some(temporal) = temporal { diff --git a/src/query/ast/src/ast/format/syntax/query.rs b/src/query/ast/src/ast/format/syntax/query.rs index 57de3ae1ff65..9e264477ad3a 100644 --- a/src/query/ast/src/ast/format/syntax/query.rs +++ b/src/query/ast/src/ast/format/syntax/query.rs @@ -322,6 +322,7 @@ pub(crate) fn pretty_table(table: TableReference) -> RcDoc<'static> { consume, pivot, unpivot, + sample, } => if let Some(catalog) = catalog { RcDoc::text(catalog.to_string()).append(RcDoc::text(".")) } else { @@ -353,6 +354,11 @@ pub(crate) fn pretty_table(table: TableReference) -> RcDoc<'static> { } else { RcDoc::nil() }) + .append(if let Some(sample) = sample { + RcDoc::text(format!(" {sample}")) + } else { + RcDoc::nil() + }) .append(if let Some(alias) = alias { RcDoc::text(format!(" AS {alias}")) } else { diff --git a/src/query/ast/src/ast/query.rs b/src/query/ast/src/ast/query.rs index 01c51711d0f6..e30b7949479a 100644 --- a/src/query/ast/src/ast/query.rs +++ b/src/query/ast/src/ast/query.rs @@ -19,6 +19,7 @@ use derive_visitor::Drive; use derive_visitor::DriveMut; use super::Lambda; +use super::Literal; use crate::ast::write_comma_separated_list; use crate::ast::write_dot_separated_list; use crate::ast::Expr; @@ -608,6 +609,39 @@ impl Display for TemporalClause { } } +#[derive(Debug, Clone, PartialEq, Drive, DriveMut)] +pub enum SampleLevel { + ROW, + BLOCK, +} + +#[derive(Debug, Clone, PartialEq, Drive, DriveMut)] +pub enum SampleConfig { + Probability(Literal), + RowsNum(Literal), +} + +#[derive(Debug, Clone, PartialEq, Drive, DriveMut)] +pub struct Sample { + pub sample_level: SampleLevel, + pub sample_conf: SampleConfig, +} + +impl Display for Sample { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + write!(f, "SAMPLE ")?; + match self.sample_level { + SampleLevel::ROW => write!(f, "ROW ")?, + SampleLevel::BLOCK => write!(f, "BLOCK ")?, + } + match &self.sample_conf { + SampleConfig::Probability(prob) => write!(f, "({})", prob)?, + SampleConfig::RowsNum(rows) => write!(f, "({} ROWS)", rows)?, + } + Ok(()) + } +} + /// A table name or a parenthesized subquery with an optional alias #[derive(Debug, Clone, PartialEq, Drive, DriveMut)] pub enum TableReference { @@ -623,6 +657,7 @@ pub enum TableReference { consume: bool, pivot: Option>, unpivot: Option>, + sample: Option, }, // `TABLE(expr)[ AS alias ]` TableFunction { @@ -697,6 +732,7 @@ impl Display for TableReference { consume, pivot, unpivot, + sample, } => { write_dot_separated_list( f, @@ -721,6 +757,10 @@ impl Display for TableReference { if let Some(unpivot) = unpivot { write!(f, " {unpivot}")?; } + + if let Some(sample) = sample { + write!(f, " {sample}")?; + } } TableReference::TableFunction { span: _, diff --git a/src/query/ast/src/ast/statements/merge_into.rs b/src/query/ast/src/ast/statements/merge_into.rs index 554d4ae1220b..8755a02cafbf 100644 --- a/src/query/ast/src/ast/statements/merge_into.rs +++ b/src/query/ast/src/ast/statements/merge_into.rs @@ -218,6 +218,7 @@ impl MergeSource { consume: false, pivot: None, unpivot: None, + sample: None, }, } } diff --git a/src/query/ast/src/parser/query.rs b/src/query/ast/src/parser/query.rs index 2b5bfa5f4c88..4e1d5862b62f 100644 --- a/src/query/ast/src/parser/query.rs +++ b/src/query/ast/src/parser/query.rs @@ -685,6 +685,7 @@ pub enum TableReferenceElement { consume: bool, pivot: Option>, unpivot: Option>, + sample: Option, }, // `TABLE(expr)[ AS alias ]` TableFunction { @@ -741,9 +742,43 @@ pub fn table_reference_element(i: Input) -> IResult match level.kind { + ROW => SampleLevel::ROW, + BLOCK => SampleLevel::BLOCK, + _ => unreachable!(), + }, + None => SampleLevel::ROW, + }; + let mut default_sample_conf = SampleConfig::Probability(Literal::Float64(100.0)); + if let Some((_, Expr::Literal { value, .. }, rows, _)) = sample_conf { + default_sample_conf = if rows.is_some() { + SampleConfig::RowsNum(value) + } else { + SampleConfig::Probability(value) + }; + } + table_sample = Some(Sample { + sample_level, + sample_conf: default_sample_conf, + }) + }; TableReferenceElement::Table { catalog, database, @@ -753,6 +788,7 @@ pub fn table_reference_element(i: Input) -> IResult>> PrattParser consume, pivot, unpivot, + sample, } => TableReference::Table { span: transform_span(input.span.tokens), catalog, @@ -874,6 +911,7 @@ impl<'a, I: Iterator>> PrattParser consume, pivot, unpivot, + sample, }, TableReferenceElement::TableFunction { lateral, diff --git a/src/query/ast/src/parser/statement.rs b/src/query/ast/src/parser/statement.rs index 2b41ab833cc9..63c7d6e75a32 100644 --- a/src/query/ast/src/parser/statement.rs +++ b/src/query/ast/src/parser/statement.rs @@ -4140,6 +4140,7 @@ pub fn table_reference_with_alias(i: Input) -> IResult { consume: false, pivot: None, unpivot: None, + sample: None, }, )(i) } @@ -4159,6 +4160,7 @@ pub fn table_reference_only(i: Input) -> IResult { consume: false, pivot: None, unpivot: None, + sample: None, }, )(i) } diff --git a/src/query/ast/src/parser/token.rs b/src/query/ast/src/parser/token.rs index 5d154f98a6d4..277b111f6d02 100644 --- a/src/query/ast/src/parser/token.rs +++ b/src/query/ast/src/parser/token.rs @@ -405,6 +405,8 @@ pub enum TokenKind { BROTLI, #[token("BZ2", ignore(ascii_case))] BZ2, + #[token("BLOCK", ignore(ascii_case))] + BLOCK, #[token("CALL", ignore(ascii_case))] CALL, #[token("CASE", ignore(ascii_case))] @@ -924,6 +926,8 @@ pub enum TokenKind { RETURN_FAILED_ONLY, #[token("REVERSE", ignore(ascii_case))] REVERSE, + #[token("SAMPLE", ignore(ascii_case))] + SAMPLE, #[token("MERGE", ignore(ascii_case))] MERGE, #[token("MATCHED", ignore(ascii_case))] @@ -1567,6 +1571,7 @@ impl TokenKind { // | TokenKind::AUTHORIZATION // | TokenKind::BINARY | TokenKind::BOTH + | TokenKind::BLOCK | TokenKind::CASE | TokenKind::CAST // | TokenKind::CHECK @@ -1624,6 +1629,7 @@ impl TokenKind { // | TokenKind::SIMILAR | TokenKind::SOME | TokenKind::SEMI + | TokenKind::SAMPLE // | TokenKind::SYMMETRIC // | TokenKind::TABLESAMPLE | TokenKind::THEN diff --git a/src/query/ast/tests/it/parser.rs b/src/query/ast/tests/it/parser.rs index efc1a55b1df3..eef305c9e6ad 100644 --- a/src/query/ast/tests/it/parser.rs +++ b/src/query/ast/tests/it/parser.rs @@ -229,6 +229,10 @@ fn test_statement() { r#"select * FROM t where ((a));"#, r#"select * FROM t where ((select 1) > 1);"#, r#"select ((t1.a)>=(((((t2.b)<=(t3.c))) IS NOT NULL)::INTEGER));"#, + r#"select * from t sample row (99);"#, + r#"select * from t sample block (99);"#, + r#"select * from t sample row (10 rows);"#, + r#"select * from t sample block (10 rows);"#, r#"insert into t (c1, c2) values (1, 2), (3, 4);"#, r#"insert into t (c1, c2) values (1, 2);"#, r#"insert into table t select * from t2;"#, diff --git a/src/query/ast/tests/it/testdata/query.txt b/src/query/ast/tests/it/testdata/query.txt index d4d8daf0753c..6b2a5fccb269 100644 --- a/src/query/ast/tests/it/testdata/query.txt +++ b/src/query/ast/tests/it/testdata/query.txt @@ -162,6 +162,7 @@ Query { consume: false, pivot: None, unpivot: None, + sample: None, }, right: Table { span: Some( @@ -182,6 +183,7 @@ Query { consume: false, pivot: None, unpivot: None, + sample: None, }, }, }, @@ -384,6 +386,7 @@ Query { consume: false, pivot: None, unpivot: None, + sample: None, }, ], selection: None, @@ -483,6 +486,7 @@ Query { consume: false, pivot: None, unpivot: None, + sample: None, }, ], selection: None, @@ -568,6 +572,7 @@ Query { consume: false, pivot: None, unpivot: None, + sample: None, }, ], selection: None, @@ -693,6 +698,7 @@ Query { consume: true, pivot: None, unpivot: None, + sample: None, }, ], selection: None, @@ -766,6 +772,7 @@ Query { consume: false, pivot: None, unpivot: None, + sample: None, }, right: Table { span: Some( @@ -786,6 +793,7 @@ Query { consume: false, pivot: None, unpivot: None, + sample: None, }, }, }, @@ -861,6 +869,7 @@ Query { consume: false, pivot: None, unpivot: None, + sample: None, }, right: Table { span: Some( @@ -881,6 +890,7 @@ Query { consume: false, pivot: None, unpivot: None, + sample: None, }, }, }, @@ -1001,6 +1011,7 @@ Query { consume: false, pivot: None, unpivot: None, + sample: None, }, right: Table { span: Some( @@ -1021,6 +1032,7 @@ Query { consume: false, pivot: None, unpivot: None, + sample: None, }, }, }, @@ -1141,6 +1153,7 @@ Query { consume: false, pivot: None, unpivot: None, + sample: None, }, right: Table { span: Some( @@ -1161,6 +1174,7 @@ Query { consume: false, pivot: None, unpivot: None, + sample: None, }, }, }, @@ -1290,6 +1304,7 @@ Query { consume: false, pivot: None, unpivot: None, + sample: None, }, right: Table { span: Some( @@ -1310,6 +1325,7 @@ Query { consume: false, pivot: None, unpivot: None, + sample: None, }, }, }, @@ -1403,6 +1419,7 @@ Query { consume: false, pivot: None, unpivot: None, + sample: None, }, right: Table { span: Some( @@ -1423,6 +1440,7 @@ Query { consume: false, pivot: None, unpivot: None, + sample: None, }, }, }, @@ -1516,6 +1534,7 @@ Query { consume: false, pivot: None, unpivot: None, + sample: None, }, right: Table { span: Some( @@ -1536,6 +1555,7 @@ Query { consume: false, pivot: None, unpivot: None, + sample: None, }, }, }, @@ -1558,6 +1578,7 @@ Query { consume: false, pivot: None, unpivot: None, + sample: None, }, }, }, @@ -1674,6 +1695,7 @@ Query { consume: false, pivot: None, unpivot: None, + sample: None, }, ], selection: None, @@ -1753,6 +1775,7 @@ Query { consume: false, pivot: None, unpivot: None, + sample: None, }, ], selection: Some( @@ -1910,6 +1933,7 @@ Query { consume: false, pivot: None, unpivot: None, + sample: None, }, ], selection: None, @@ -1989,6 +2013,7 @@ Query { consume: false, pivot: None, unpivot: None, + sample: None, }, ], selection: Some( @@ -2137,6 +2162,7 @@ Query { consume: false, pivot: None, unpivot: None, + sample: None, }, ], selection: None, @@ -2216,6 +2242,7 @@ Query { consume: false, pivot: None, unpivot: None, + sample: None, }, ], selection: Some( @@ -2373,6 +2400,7 @@ Query { consume: false, pivot: None, unpivot: None, + sample: None, }, ], selection: None, @@ -2449,6 +2477,7 @@ Query { consume: false, pivot: None, unpivot: None, + sample: None, }, ], selection: None, @@ -2537,6 +2566,7 @@ Query { consume: false, pivot: None, unpivot: None, + sample: None, }, ], selection: Some( @@ -2712,6 +2742,7 @@ Query { consume: false, pivot: None, unpivot: None, + sample: None, }, Table { span: Some( @@ -2732,6 +2763,7 @@ Query { consume: false, pivot: None, unpivot: None, + sample: None, }, Table { span: Some( @@ -2752,6 +2784,7 @@ Query { consume: false, pivot: None, unpivot: None, + sample: None, }, ], selection: Some( @@ -2916,6 +2949,7 @@ Query { consume: false, pivot: None, unpivot: None, + sample: None, }, ], selection: None, @@ -2977,6 +3011,7 @@ Query { consume: false, pivot: None, unpivot: None, + sample: None, }, ], selection: None, @@ -3058,6 +3093,7 @@ Query { consume: false, pivot: None, unpivot: None, + sample: None, }, ], selection: None, @@ -3286,6 +3322,7 @@ Query { consume: false, pivot: None, unpivot: None, + sample: None, }, ], selection: None, @@ -3458,6 +3495,7 @@ Query { consume: false, pivot: None, unpivot: None, + sample: None, }, Table { span: Some( @@ -3490,6 +3528,7 @@ Query { consume: false, pivot: None, unpivot: None, + sample: None, }, Subquery { span: Some( @@ -3687,6 +3726,7 @@ Query { consume: false, pivot: None, unpivot: None, + sample: None, }, right: Table { span: Some( @@ -3707,6 +3747,7 @@ Query { consume: false, pivot: None, unpivot: None, + sample: None, }, }, }, @@ -3963,6 +4004,7 @@ Query { consume: false, pivot: None, unpivot: None, + sample: None, }, ], selection: None, @@ -4012,6 +4054,7 @@ Query { consume: false, pivot: None, unpivot: None, + sample: None, }, ], selection: None, @@ -4087,6 +4130,7 @@ Query { consume: false, pivot: None, unpivot: None, + sample: None, }, ], selection: None, @@ -4136,6 +4180,7 @@ Query { consume: false, pivot: None, unpivot: None, + sample: None, }, ], selection: None, @@ -4218,6 +4263,7 @@ Query { consume: false, pivot: None, unpivot: None, + sample: None, }, ], selection: None, @@ -4267,6 +4313,7 @@ Query { consume: false, pivot: None, unpivot: None, + sample: None, }, ], selection: None, @@ -4318,6 +4365,7 @@ Query { consume: false, pivot: None, unpivot: None, + sample: None, }, ], selection: None, @@ -4400,6 +4448,7 @@ Query { consume: false, pivot: None, unpivot: None, + sample: None, }, ], selection: None, @@ -4449,6 +4498,7 @@ Query { consume: false, pivot: None, unpivot: None, + sample: None, }, ], selection: None, @@ -4500,6 +4550,7 @@ Query { consume: false, pivot: None, unpivot: None, + sample: None, }, ], selection: None, @@ -4575,6 +4626,7 @@ Query { consume: false, pivot: None, unpivot: None, + sample: None, }, ], selection: None, @@ -4631,6 +4683,7 @@ Query { consume: false, pivot: None, unpivot: None, + sample: None, }, ], selection: None, @@ -4680,6 +4733,7 @@ Query { consume: false, pivot: None, unpivot: None, + sample: None, }, ], selection: None, @@ -4764,6 +4818,7 @@ Query { consume: false, pivot: None, unpivot: None, + sample: None, }, ], selection: None, @@ -4813,6 +4868,7 @@ Query { consume: false, pivot: None, unpivot: None, + sample: None, }, ], selection: None, @@ -4864,6 +4920,7 @@ Query { consume: false, pivot: None, unpivot: None, + sample: None, }, ], selection: None, @@ -4939,6 +4996,7 @@ Query { consume: false, pivot: None, unpivot: None, + sample: None, }, ], selection: None, @@ -4995,6 +5053,7 @@ Query { consume: false, pivot: None, unpivot: None, + sample: None, }, ], selection: None, @@ -5044,6 +5103,7 @@ Query { consume: false, pivot: None, unpivot: None, + sample: None, }, ], selection: None, @@ -5425,6 +5485,7 @@ Query { consume: false, pivot: None, unpivot: None, + sample: None, }, ], selection: None, @@ -5650,6 +5711,7 @@ Query { }, ), unpivot: None, + sample: None, }, ], selection: None, @@ -5793,6 +5855,7 @@ Query { ], }, ), + sample: None, }, ], selection: None, @@ -6012,6 +6075,7 @@ Query { consume: false, pivot: None, unpivot: None, + sample: None, }, ], selection: None, @@ -6330,6 +6394,7 @@ Query { consume: false, pivot: None, unpivot: None, + sample: None, }, ], selection: None, @@ -6570,6 +6635,7 @@ Query { consume: false, pivot: None, unpivot: None, + sample: None, }, ], selection: None, @@ -6984,6 +7050,7 @@ Query { consume: false, pivot: None, unpivot: None, + sample: None, }, right: Subquery { span: Some( @@ -7144,6 +7211,7 @@ Query { consume: false, pivot: None, unpivot: None, + sample: None, }, TableFunction { span: Some( diff --git a/src/query/ast/tests/it/testdata/raw-insert.txt b/src/query/ast/tests/it/testdata/raw-insert.txt index fd95f384079a..e0159b91af26 100644 --- a/src/query/ast/tests/it/testdata/raw-insert.txt +++ b/src/query/ast/tests/it/testdata/raw-insert.txt @@ -156,6 +156,7 @@ Insert( consume: false, pivot: None, unpivot: None, + sample: None, }, ], selection: None, diff --git a/src/query/ast/tests/it/testdata/script.txt b/src/query/ast/tests/it/testdata/script.txt index b0dba73efa5c..edaabc11de9a 100644 --- a/src/query/ast/tests/it/testdata/script.txt +++ b/src/query/ast/tests/it/testdata/script.txt @@ -311,6 +311,7 @@ Return { consume: false, pivot: None, unpivot: None, + sample: None, }, ], selection: None, @@ -1606,6 +1607,7 @@ Loop { consume: false, pivot: None, unpivot: None, + sample: None, }, ], selection: Some( diff --git a/src/query/ast/tests/it/testdata/statement.txt b/src/query/ast/tests/it/testdata/statement.txt index 63d1264c5eb8..e604ac30e29a 100644 --- a/src/query/ast/tests/it/testdata/statement.txt +++ b/src/query/ast/tests/it/testdata/statement.txt @@ -630,6 +630,7 @@ Replace( consume: false, pivot: None, unpivot: None, + sample: None, }, ], selection: None, @@ -740,6 +741,7 @@ Explain { consume: false, pivot: None, unpivot: None, + sample: None, }, ], selection: None, @@ -878,6 +880,7 @@ Explain { consume: false, pivot: None, unpivot: None, + sample: None, }, ], selection: None, @@ -989,6 +992,7 @@ Explain { consume: false, pivot: None, unpivot: None, + sample: None, }, ], selection: None, @@ -1065,6 +1069,7 @@ Explain { consume: false, pivot: None, unpivot: None, + sample: None, }, ], selection: Some( @@ -1280,6 +1285,7 @@ CreateIndex( consume: false, pivot: None, unpivot: None, + sample: None, }, ], selection: Some( @@ -1474,6 +1480,7 @@ CreateIndex( consume: false, pivot: None, unpivot: None, + sample: None, }, ], selection: Some( @@ -2008,6 +2015,7 @@ CreateTable( consume: false, pivot: None, unpivot: None, + sample: None, }, ], selection: None, @@ -4995,6 +5003,7 @@ CreateTable( consume: false, pivot: None, unpivot: None, + sample: None, }, ], selection: None, @@ -5346,6 +5355,7 @@ Query( consume: false, pivot: None, unpivot: None, + sample: None, }, ], selection: Some( @@ -5570,6 +5580,7 @@ Query( consume: false, pivot: None, unpivot: None, + sample: None, }, ], selection: None, @@ -5640,6 +5651,7 @@ Query( consume: false, pivot: None, unpivot: None, + sample: None, }, ], selection: None, @@ -5717,6 +5729,7 @@ Query( consume: false, pivot: None, unpivot: None, + sample: None, }, ], selection: None, @@ -5785,6 +5798,7 @@ Query( consume: false, pivot: None, unpivot: None, + sample: None, }, Table { span: Some( @@ -5805,6 +5819,7 @@ Query( consume: false, pivot: None, unpivot: None, + sample: None, }, Table { span: Some( @@ -5825,6 +5840,7 @@ Query( consume: false, pivot: None, unpivot: None, + sample: None, }, ], selection: None, @@ -5893,6 +5909,7 @@ Query( consume: false, pivot: None, unpivot: None, + sample: None, }, Table { span: Some( @@ -5913,6 +5930,7 @@ Query( consume: false, pivot: None, unpivot: None, + sample: None, }, Table { span: Some( @@ -5933,6 +5951,7 @@ Query( consume: false, pivot: None, unpivot: None, + sample: None, }, ], selection: None, @@ -6119,6 +6138,7 @@ Query( consume: false, pivot: None, unpivot: None, + sample: None, }, right: Table { span: Some( @@ -6139,6 +6159,7 @@ Query( consume: false, pivot: None, unpivot: None, + sample: None, }, }, }, @@ -6279,6 +6300,7 @@ Query( consume: false, pivot: None, unpivot: None, + sample: None, }, right: Table { span: Some( @@ -6299,6 +6321,7 @@ Query( consume: false, pivot: None, unpivot: None, + sample: None, }, }, }, @@ -6439,6 +6462,7 @@ Query( consume: false, pivot: None, unpivot: None, + sample: None, }, right: Table { span: Some( @@ -6459,6 +6483,7 @@ Query( consume: false, pivot: None, unpivot: None, + sample: None, }, }, }, @@ -6599,6 +6624,7 @@ Query( consume: false, pivot: None, unpivot: None, + sample: None, }, right: Table { span: Some( @@ -6619,6 +6645,7 @@ Query( consume: false, pivot: None, unpivot: None, + sample: None, }, }, }, @@ -6759,6 +6786,7 @@ Query( consume: false, pivot: None, unpivot: None, + sample: None, }, right: Table { span: Some( @@ -6779,6 +6807,7 @@ Query( consume: false, pivot: None, unpivot: None, + sample: None, }, }, }, @@ -6919,6 +6948,7 @@ Query( consume: false, pivot: None, unpivot: None, + sample: None, }, right: Table { span: Some( @@ -6939,6 +6969,7 @@ Query( consume: false, pivot: None, unpivot: None, + sample: None, }, }, }, @@ -7079,6 +7110,7 @@ Query( consume: false, pivot: None, unpivot: None, + sample: None, }, right: Table { span: Some( @@ -7099,6 +7131,7 @@ Query( consume: false, pivot: None, unpivot: None, + sample: None, }, }, }, @@ -7239,6 +7272,7 @@ Query( consume: false, pivot: None, unpivot: None, + sample: None, }, right: Table { span: Some( @@ -7259,6 +7293,7 @@ Query( consume: false, pivot: None, unpivot: None, + sample: None, }, }, }, @@ -7399,6 +7434,7 @@ Query( consume: false, pivot: None, unpivot: None, + sample: None, }, right: Table { span: Some( @@ -7419,6 +7455,7 @@ Query( consume: false, pivot: None, unpivot: None, + sample: None, }, }, }, @@ -7559,6 +7596,7 @@ Query( consume: false, pivot: None, unpivot: None, + sample: None, }, right: Table { span: Some( @@ -7579,6 +7617,7 @@ Query( consume: false, pivot: None, unpivot: None, + sample: None, }, }, }, @@ -7719,6 +7758,7 @@ Query( consume: false, pivot: None, unpivot: None, + sample: None, }, right: Table { span: Some( @@ -7739,6 +7779,7 @@ Query( consume: false, pivot: None, unpivot: None, + sample: None, }, }, }, @@ -7827,6 +7868,7 @@ Query( consume: false, pivot: None, unpivot: None, + sample: None, }, right: Table { span: Some( @@ -7847,6 +7889,7 @@ Query( consume: false, pivot: None, unpivot: None, + sample: None, }, }, }, @@ -7935,6 +7978,7 @@ Query( consume: false, pivot: None, unpivot: None, + sample: None, }, right: Table { span: Some( @@ -7955,6 +7999,7 @@ Query( consume: false, pivot: None, unpivot: None, + sample: None, }, }, }, @@ -8043,6 +8088,7 @@ Query( consume: false, pivot: None, unpivot: None, + sample: None, }, right: Table { span: Some( @@ -8063,6 +8109,7 @@ Query( consume: false, pivot: None, unpivot: None, + sample: None, }, }, }, @@ -8151,6 +8198,7 @@ Query( consume: false, pivot: None, unpivot: None, + sample: None, }, right: Table { span: Some( @@ -8171,6 +8219,7 @@ Query( consume: false, pivot: None, unpivot: None, + sample: None, }, }, }, @@ -8241,6 +8290,7 @@ Query( consume: false, pivot: None, unpivot: None, + sample: None, }, ], selection: Some( @@ -8350,6 +8400,7 @@ Query( consume: false, pivot: None, unpivot: None, + sample: None, }, ], selection: None, @@ -8432,6 +8483,7 @@ Query( consume: false, pivot: None, unpivot: None, + sample: None, }, ], selection: Some( @@ -8541,6 +8593,7 @@ Query( consume: false, pivot: None, unpivot: None, + sample: None, }, ], selection: None, @@ -8623,6 +8676,7 @@ Query( consume: false, pivot: None, unpivot: None, + sample: None, }, ], selection: Some( @@ -8732,6 +8786,7 @@ Query( consume: false, pivot: None, unpivot: None, + sample: None, }, ], selection: None, @@ -8814,6 +8869,7 @@ Query( consume: false, pivot: None, unpivot: None, + sample: None, }, ], selection: Some( @@ -8921,6 +8977,7 @@ Query( consume: false, pivot: None, unpivot: None, + sample: None, }, ], selection: None, @@ -9453,6 +9510,7 @@ Query( consume: false, pivot: None, unpivot: None, + sample: None, }, ], selection: Some( @@ -9541,6 +9599,7 @@ Query( consume: false, pivot: None, unpivot: None, + sample: None, }, ], selection: Some( @@ -9767,6 +9826,318 @@ Query( ) +---------- Input ---------- +select * from t sample row (99); +---------- Output --------- +SELECT * FROM t SAMPLE ROW (99) +---------- AST ------------ +Query( + Query { + span: Some( + 0..31, + ), + with: None, + body: Select( + SelectStmt { + span: Some( + 0..31, + ), + hints: None, + distinct: false, + top_n: None, + select_list: [ + StarColumns { + qualified: [ + Star( + Some( + 7..8, + ), + ), + ], + column_filter: None, + }, + ], + from: [ + Table { + span: Some( + 14..31, + ), + catalog: None, + database: None, + table: Identifier { + span: Some( + 14..15, + ), + name: "t", + quote: None, + is_hole: false, + }, + alias: None, + temporal: None, + consume: false, + pivot: None, + unpivot: None, + sample: Some( + Sample { + sample_level: ROW, + sample_conf: Probability( + UInt64( + 99, + ), + ), + }, + ), + }, + ], + selection: None, + group_by: None, + having: None, + window_list: None, + qualify: None, + }, + ), + order_by: [], + limit: [], + offset: None, + ignore_result: false, + }, +) + + +---------- Input ---------- +select * from t sample block (99); +---------- Output --------- +SELECT * FROM t SAMPLE BLOCK (99) +---------- AST ------------ +Query( + Query { + span: Some( + 0..33, + ), + with: None, + body: Select( + SelectStmt { + span: Some( + 0..33, + ), + hints: None, + distinct: false, + top_n: None, + select_list: [ + StarColumns { + qualified: [ + Star( + Some( + 7..8, + ), + ), + ], + column_filter: None, + }, + ], + from: [ + Table { + span: Some( + 14..33, + ), + catalog: None, + database: None, + table: Identifier { + span: Some( + 14..15, + ), + name: "t", + quote: None, + is_hole: false, + }, + alias: None, + temporal: None, + consume: false, + pivot: None, + unpivot: None, + sample: Some( + Sample { + sample_level: BLOCK, + sample_conf: Probability( + UInt64( + 99, + ), + ), + }, + ), + }, + ], + selection: None, + group_by: None, + having: None, + window_list: None, + qualify: None, + }, + ), + order_by: [], + limit: [], + offset: None, + ignore_result: false, + }, +) + + +---------- Input ---------- +select * from t sample row (10 rows); +---------- Output --------- +SELECT * FROM t SAMPLE ROW (10 ROWS) +---------- AST ------------ +Query( + Query { + span: Some( + 0..36, + ), + with: None, + body: Select( + SelectStmt { + span: Some( + 0..36, + ), + hints: None, + distinct: false, + top_n: None, + select_list: [ + StarColumns { + qualified: [ + Star( + Some( + 7..8, + ), + ), + ], + column_filter: None, + }, + ], + from: [ + Table { + span: Some( + 14..36, + ), + catalog: None, + database: None, + table: Identifier { + span: Some( + 14..15, + ), + name: "t", + quote: None, + is_hole: false, + }, + alias: None, + temporal: None, + consume: false, + pivot: None, + unpivot: None, + sample: Some( + Sample { + sample_level: ROW, + sample_conf: RowsNum( + UInt64( + 10, + ), + ), + }, + ), + }, + ], + selection: None, + group_by: None, + having: None, + window_list: None, + qualify: None, + }, + ), + order_by: [], + limit: [], + offset: None, + ignore_result: false, + }, +) + + +---------- Input ---------- +select * from t sample block (10 rows); +---------- Output --------- +SELECT * FROM t SAMPLE BLOCK (10 ROWS) +---------- AST ------------ +Query( + Query { + span: Some( + 0..38, + ), + with: None, + body: Select( + SelectStmt { + span: Some( + 0..38, + ), + hints: None, + distinct: false, + top_n: None, + select_list: [ + StarColumns { + qualified: [ + Star( + Some( + 7..8, + ), + ), + ], + column_filter: None, + }, + ], + from: [ + Table { + span: Some( + 14..38, + ), + catalog: None, + database: None, + table: Identifier { + span: Some( + 14..15, + ), + name: "t", + quote: None, + is_hole: false, + }, + alias: None, + temporal: None, + consume: false, + pivot: None, + unpivot: None, + sample: Some( + Sample { + sample_level: BLOCK, + sample_conf: RowsNum( + UInt64( + 10, + ), + ), + }, + ), + }, + ], + selection: None, + group_by: None, + having: None, + window_list: None, + qualify: None, + }, + ), + order_by: [], + limit: [], + offset: None, + ignore_result: false, + }, +) + + ---------- Input ---------- insert into t (c1, c2) values (1, 2), (3, 4); ---------- Output --------- @@ -9979,6 +10350,7 @@ Insert( consume: false, pivot: None, unpivot: None, + sample: None, }, ], selection: None, @@ -10829,6 +11201,7 @@ AlterTable( consume: false, pivot: None, unpivot: None, + sample: None, }, action: AlterTableClusterKey { cluster_by: [ @@ -10884,6 +11257,7 @@ AlterTable( consume: false, pivot: None, unpivot: None, + sample: None, }, action: ModifyTableComment { new_comment: "t1-commnet", @@ -10919,6 +11293,7 @@ AlterTable( consume: false, pivot: None, unpivot: None, + sample: None, }, action: DropTableClusterKey, }, @@ -10952,6 +11327,7 @@ AlterTable( consume: false, pivot: None, unpivot: None, + sample: None, }, action: ReclusterTable { is_final: true, @@ -11025,6 +11401,7 @@ AlterTable( consume: false, pivot: None, unpivot: None, + sample: None, }, action: AddColumn { column: ColumnDefinition { @@ -11075,6 +11452,7 @@ AlterTable( consume: false, pivot: None, unpivot: None, + sample: None, }, action: AddColumn { column: ColumnDefinition { @@ -11125,6 +11503,7 @@ AlterTable( consume: false, pivot: None, unpivot: None, + sample: None, }, action: AddColumn { column: ColumnDefinition { @@ -11188,6 +11567,7 @@ AlterTable( consume: false, pivot: None, unpivot: None, + sample: None, }, action: AddColumn { column: ColumnDefinition { @@ -11256,6 +11636,7 @@ AlterTable( consume: false, pivot: None, unpivot: None, + sample: None, }, action: RenameColumn { old_column: Identifier { @@ -11306,6 +11687,7 @@ AlterTable( consume: false, pivot: None, unpivot: None, + sample: None, }, action: DropColumn { column: Identifier { @@ -11348,6 +11730,7 @@ AlterTable( consume: false, pivot: None, unpivot: None, + sample: None, }, action: DropColumn { column: Identifier { @@ -11390,6 +11773,7 @@ AlterTable( consume: false, pivot: None, unpivot: None, + sample: None, }, action: ModifyColumn { action: SetMaskingPolicy( @@ -11435,6 +11819,7 @@ AlterTable( consume: false, pivot: None, unpivot: None, + sample: None, }, action: ModifyColumn { action: UnsetMaskingPolicy( @@ -11479,6 +11864,7 @@ AlterTable( consume: false, pivot: None, unpivot: None, + sample: None, }, action: ModifyColumn { action: SetDataType( @@ -11554,6 +11940,7 @@ AlterTable( consume: false, pivot: None, unpivot: None, + sample: None, }, action: ModifyColumn { action: SetDataType( @@ -11635,6 +12022,7 @@ AlterTable( consume: false, pivot: None, unpivot: None, + sample: None, }, action: ModifyColumn { action: SetDataType( @@ -11686,6 +12074,7 @@ AlterTable( consume: false, pivot: None, unpivot: None, + sample: None, }, action: ModifyColumn { action: SetDataType( @@ -11737,6 +12126,7 @@ AlterTable( consume: false, pivot: None, unpivot: None, + sample: None, }, action: ModifyColumn { action: ConvertStoredComputedColumn( @@ -11781,6 +12171,7 @@ AlterTable( consume: false, pivot: None, unpivot: None, + sample: None, }, action: SetOptions { set_options: { @@ -15510,6 +15901,7 @@ Update( consume: false, pivot: None, unpivot: None, + sample: None, }, update_list: [ UpdateExpr { @@ -16492,6 +16884,7 @@ Query( consume: false, pivot: None, unpivot: None, + sample: None, }, }, }, @@ -16737,6 +17130,7 @@ Query( consume: false, pivot: None, unpivot: None, + sample: None, }, ], selection: None, @@ -16894,6 +17288,7 @@ Query( consume: false, pivot: None, unpivot: None, + sample: None, }, ], selection: None, @@ -17049,6 +17444,7 @@ Query( consume: false, pivot: None, unpivot: None, + sample: None, }, ], selection: None, @@ -17223,6 +17619,7 @@ Query( consume: false, pivot: None, unpivot: None, + sample: None, }, ], selection: None, @@ -17377,6 +17774,7 @@ Query( consume: false, pivot: None, unpivot: None, + sample: None, }, ], selection: None, @@ -17507,6 +17905,7 @@ Query( consume: false, pivot: None, unpivot: None, + sample: None, }, ], selection: None, @@ -18403,6 +18802,7 @@ CreateDynamicTable( consume: false, pivot: None, unpivot: None, + sample: None, }, ], selection: None, @@ -18548,6 +18948,7 @@ CreateDynamicTable( consume: false, pivot: None, unpivot: None, + sample: None, }, ], selection: None, @@ -18746,6 +19147,7 @@ CreateDynamicTable( consume: false, pivot: None, unpivot: None, + sample: None, }, ], selection: None, @@ -18950,6 +19352,7 @@ CreateDynamicTable( consume: false, pivot: None, unpivot: None, + sample: None, }, ], selection: None, @@ -19186,6 +19589,7 @@ CreateDynamicTable( consume: false, pivot: None, unpivot: None, + sample: None, }, ], selection: None, @@ -20824,6 +21228,7 @@ Query( consume: false, pivot: None, unpivot: None, + sample: None, }, ], selection: None, @@ -20946,6 +21351,7 @@ Query( consume: false, pivot: None, unpivot: None, + sample: None, }, ], selection: None, @@ -21070,6 +21476,7 @@ Query( consume: false, pivot: None, unpivot: None, + sample: None, }, ], selection: None, @@ -21194,6 +21601,7 @@ Query( consume: false, pivot: None, unpivot: None, + sample: None, }, ], selection: None, @@ -21318,6 +21726,7 @@ Query( consume: false, pivot: None, unpivot: None, + sample: None, }, ], selection: None, @@ -21435,6 +21844,7 @@ Query( consume: false, pivot: None, unpivot: None, + sample: None, }, ], selection: None, @@ -22356,6 +22766,7 @@ Query( consume: false, pivot: None, unpivot: None, + sample: None, }, ], selection: Some( @@ -22584,6 +22995,7 @@ Query( consume: false, pivot: None, unpivot: None, + sample: None, }, ], selection: None, diff --git a/src/query/sql/src/planner/binder/bind_mutation/merge.rs b/src/query/sql/src/planner/binder/bind_mutation/merge.rs index a1b7a3179d23..78c29fea4aa4 100644 --- a/src/query/sql/src/planner/binder/bind_mutation/merge.rs +++ b/src/query/sql/src/planner/binder/bind_mutation/merge.rs @@ -59,6 +59,7 @@ impl Binder { consume: false, pivot: None, unpivot: None, + sample: None, }; let source_reference = stmt.source.transform_table_reference(); diff --git a/src/query/sql/src/planner/binder/bind_table_reference/bind.rs b/src/query/sql/src/planner/binder/bind_table_reference/bind.rs index 15a108fc39ff..56adad136a97 100644 --- a/src/query/sql/src/planner/binder/bind_table_reference/bind.rs +++ b/src/query/sql/src/planner/binder/bind_table_reference/bind.rs @@ -36,6 +36,7 @@ impl Binder { pivot: _, unpivot: _, consume, + sample, } => self.bind_table( bind_context, span, @@ -45,6 +46,7 @@ impl Binder { alias, temporal, *consume, + sample, ), TableReference::TableFunction { span, diff --git a/src/query/sql/src/planner/binder/bind_table_reference/bind_table.rs b/src/query/sql/src/planner/binder/bind_table_reference/bind_table.rs index fc5bc5be428e..78e70e4b20f8 100644 --- a/src/query/sql/src/planner/binder/bind_table_reference/bind_table.rs +++ b/src/query/sql/src/planner/binder/bind_table_reference/bind_table.rs @@ -13,6 +13,7 @@ // limitations under the License. use databend_common_ast::ast::Identifier; +use databend_common_ast::ast::Sample; use databend_common_ast::ast::Statement; use databend_common_ast::ast::TableAlias; use databend_common_ast::ast::TemporalClause; @@ -44,6 +45,7 @@ impl Binder { alias: &Option, temporal: &Option, consume: bool, + sample: &Option, ) -> Result<(SExpr, BindContext)> { let table_identifier = TableIdentifier::new(self, catalog, database, table, alias); let (catalog, database, table_name, table_name_alias) = ( @@ -142,6 +144,7 @@ impl Binder { database.as_str(), table_index, change_type, + sample, )?; if let Some(alias) = alias { @@ -247,8 +250,13 @@ impl Binder { false, ); - let (s_expr, mut bind_context) = - self.bind_base_table(bind_context, database.as_str(), table_index, None)?; + let (s_expr, mut bind_context) = self.bind_base_table( + bind_context, + database.as_str(), + table_index, + None, + sample, + )?; if let Some(alias) = alias { bind_context.apply_table_alias(alias, &self.name_resolution_ctx)?; } diff --git a/src/query/sql/src/planner/binder/bind_table_reference/bind_table_function.rs b/src/query/sql/src/planner/binder/bind_table_reference/bind_table_function.rs index 9e506f14e60c..ebc422b5d656 100644 --- a/src/query/sql/src/planner/binder/bind_table_reference/bind_table_function.rs +++ b/src/query/sql/src/planner/binder/bind_table_reference/bind_table_function.rs @@ -152,7 +152,7 @@ impl Binder { ); let (s_expr, mut bind_context) = - self.bind_base_table(bind_context, "system", table_index, None)?; + self.bind_base_table(bind_context, "system", table_index, None, &None)?; if let Some(alias) = alias { bind_context.apply_table_alias(alias, &self.name_resolution_ctx)?; } @@ -220,7 +220,7 @@ impl Binder { ); let (s_expr, mut bind_context) = - self.bind_base_table(bind_context, "system", table_index, None)?; + self.bind_base_table(bind_context, "system", table_index, None, &None)?; if let Some(alias) = alias { bind_context.apply_table_alias(alias, &self.name_resolution_ctx)?; } diff --git a/src/query/sql/src/planner/binder/table.rs b/src/query/sql/src/planner/binder/table.rs index 7e0b8d0b7292..ad52053be435 100644 --- a/src/query/sql/src/planner/binder/table.rs +++ b/src/query/sql/src/planner/binder/table.rs @@ -21,6 +21,9 @@ use chrono::Utc; use dashmap::DashMap; use databend_common_ast::ast::Identifier; use databend_common_ast::ast::Indirection; +use databend_common_ast::ast::Sample; +use databend_common_ast::ast::SampleConfig; +use databend_common_ast::ast::SampleLevel; use databend_common_ast::ast::SelectTarget; use databend_common_ast::ast::SetExpr; use databend_common_ast::ast::SetOperator; @@ -138,7 +141,7 @@ impl Binder { ); let (s_expr, mut bind_context) = - self.bind_base_table(bind_context, "system", table_index, None)?; + self.bind_base_table(bind_context, "system", table_index, None, &None)?; if let Some(alias) = alias { bind_context.apply_table_alias(alias, &self.name_resolution_ctx)?; } @@ -415,6 +418,7 @@ impl Binder { database_name: &str, table_index: IndexType, change_type: Option, + sample: &Option, ) -> Result<(SExpr, BindContext)> { let mut bind_context = BindContext::with_parent(Box::new(bind_context.clone())); @@ -468,6 +472,7 @@ impl Binder { columns: columns.into_iter().map(|col| col.index()).collect(), statistics: Arc::new(Statistics::default()), change_type, + sample_conf: table_sample(sample)?, ..Default::default() } .into(), @@ -657,3 +662,15 @@ impl Binder { Ok(index_metas) } } + +fn table_sample(sample: &Option) -> Result> { + if let Some(sample) = sample { + if sample.sample_level == SampleLevel::BLOCK { + return Err(ErrorCode::SyntaxException( + "BLOCK sampling is not supported.".to_string(), + )); + } + return Ok(Some(sample.sample_conf.clone())); + } + Ok(None) +} diff --git a/src/query/sql/src/planner/dataframe.rs b/src/query/sql/src/planner/dataframe.rs index 8f263dc674cf..aab71c714470 100644 --- a/src/query/sql/src/planner/dataframe.rs +++ b/src/query/sql/src/planner/dataframe.rs @@ -65,6 +65,7 @@ impl Dataframe { consume: false, pivot: None, unpivot: None, + sample: None, }; let settings = query_ctx.get_settings(); @@ -104,7 +105,7 @@ impl Dataframe { false, ); - binder.bind_base_table(&bind_context, database, table_index, None) + binder.bind_base_table(&bind_context, database, table_index, None, &None) } else { binder.bind_table_reference(&mut bind_context, &table) }?; @@ -469,6 +470,7 @@ impl Dataframe { consume: false, pivot: None, unpivot: None, + sample: None, }; table_ref.push(table); } diff --git a/src/query/sql/src/planner/optimizer/statistics/collect_statistics.rs b/src/query/sql/src/planner/optimizer/statistics/collect_statistics.rs index 33c67d10aee8..0a358895316e 100644 --- a/src/query/sql/src/planner/optimizer/statistics/collect_statistics.rs +++ b/src/query/sql/src/planner/optimizer/statistics/collect_statistics.rs @@ -23,6 +23,7 @@ use log::info; use crate::optimizer::RelExpr; use crate::optimizer::SExpr; use crate::optimizer::StatInfo; +use crate::plans::Filter; use crate::plans::RelOperator; use crate::plans::Statistics; use crate::BaseTableColumn; @@ -68,6 +69,8 @@ impl CollectStatisticsOptimizer { .table_statistics(self.table_ctx.clone(), true, scan.change_type.clone()) .await?; + let sample_filter = scan.sample_filter(&table_stats)?; + let mut column_stats = HashMap::new(); let mut histograms = HashMap::new(); for column in columns.iter() { @@ -104,8 +107,14 @@ impl CollectStatisticsOptimizer { column_stats, histograms, }); - - Ok(s_expr.replace_plan(Arc::new(RelOperator::Scan(scan)))) + let mut s_expr = s_expr.replace_plan(Arc::new(RelOperator::Scan(scan))); + if let Some(sample_filter) = sample_filter { + let filter = Filter { + predicates: vec![sample_filter], + }; + s_expr = SExpr::create_unary(Arc::new(filter.into()), Arc::new(s_expr)) + } + Ok(s_expr) } RelOperator::MaterializedCte(materialized_cte) => { // Collect the common table expression statistics first. diff --git a/src/query/sql/src/planner/plans/scan.rs b/src/query/sql/src/planner/plans/scan.rs index 1c9606a1b40d..7c55fd59b4c0 100644 --- a/src/query/sql/src/planner/plans/scan.rs +++ b/src/query/sql/src/planner/plans/scan.rs @@ -16,12 +16,17 @@ use std::collections::HashMap; use std::collections::HashSet; use std::sync::Arc; +use databend_common_ast::ast::Literal; +use databend_common_ast::ast::SampleConfig; use databend_common_catalog::plan::InvertedIndexInfo; use databend_common_catalog::statistics::BasicColumnStatistics; use databend_common_catalog::table::TableStatistics; use databend_common_catalog::table_context::TableContext; use databend_common_exception::ErrorCode; use databend_common_exception::Result; +use databend_common_expression::types::NumberScalar; +use databend_common_expression::types::F64; +use databend_common_expression::Scalar; use databend_common_expression::TableSchemaRef; use databend_common_storage::Histogram; use databend_common_storage::DEFAULT_HISTOGRAM_BUCKETS; @@ -42,6 +47,8 @@ use crate::optimizer::SelectivityEstimator; use crate::optimizer::StatInfo; use crate::optimizer::Statistics as OpStatistics; use crate::optimizer::MAX_SELECTIVITY; +use crate::plans::ConstantExpr; +use crate::plans::FunctionCall; use crate::plans::Operator; use crate::plans::RelOp; use crate::plans::ScalarExpr; @@ -105,6 +112,7 @@ pub struct Scan { pub inverted_index: Option, // Lazy row fetch. pub is_lazy_table: bool, + pub sample_conf: Option, pub statistics: Arc, } @@ -144,6 +152,7 @@ impl Scan { update_stream_columns: self.update_stream_columns, inverted_index: self.inverted_index.clone(), is_lazy_table: self.is_lazy_table, + sample_conf: self.sample_conf.clone(), } } @@ -165,6 +174,57 @@ impl Scan { used_columns.extend(self.columns.iter()); used_columns } + + pub fn sample_filter(&self, stats: &Option) -> Result> { + if let Some(sample_conf) = &self.sample_conf { + let rand = match sample_conf { + SampleConfig::Probability(probability) => probability.as_double()? / 100.0, + SampleConfig::RowsNum(rows) => { + let rows = if let Literal::UInt64(rows) = rows { + *rows + } else { + return Err(ErrorCode::SyntaxException( + "Sample rows should be a positive integer".to_string(), + )); + }; + if let Some(stats) = stats { + if let Some(row_num) = stats.num_rows + && row_num > 0 + { + rows as f64 / row_num as f64 + } else { + return Err(ErrorCode::Internal( + "Number of rows in stats is invalid".to_string(), + )); + } + } else { + return Err(ErrorCode::Internal( + "Table statistics is not available".to_string(), + )); + } + } + }; + let rand_expr = ScalarExpr::FunctionCall(FunctionCall { + span: None, + func_name: "rand".to_string(), + params: vec![], + arguments: vec![], + }); + return Ok(Some(ScalarExpr::FunctionCall(FunctionCall { + span: None, + func_name: "lte".to_string(), + params: vec![], + arguments: vec![ + rand_expr, + ScalarExpr::ConstantExpr(ConstantExpr { + span: None, + value: Scalar::Number(NumberScalar::Float64(F64::from(rand))), + }), + ], + }))); + } + Ok(None) + } } impl PartialEq for Scan { diff --git a/src/query/sql/src/planner/semantic/view_rewriter.rs b/src/query/sql/src/planner/semantic/view_rewriter.rs index cf9d06363b6c..0cf47078fe01 100644 --- a/src/query/sql/src/planner/semantic/view_rewriter.rs +++ b/src/query/sql/src/planner/semantic/view_rewriter.rs @@ -34,6 +34,7 @@ impl ViewRewriter { consume, pivot, unpivot, + sample, } = table_ref { // Must rewrite view query when table_ref::database is none. If not: @@ -53,6 +54,7 @@ impl ViewRewriter { consume: *consume, pivot: pivot.clone(), unpivot: unpivot.clone(), + sample: sample.clone(), } } } diff --git a/src/tests/sqlsmith/src/sql_gen/dml.rs b/src/tests/sqlsmith/src/sql_gen/dml.rs index 4d8003db4449..487b34ebd677 100644 --- a/src/tests/sqlsmith/src/sql_gen/dml.rs +++ b/src/tests/sqlsmith/src/sql_gen/dml.rs @@ -302,6 +302,7 @@ impl<'a, R: Rng + 'a> SqlGenerator<'a, R> { consume: false, pivot: None, unpivot: None, + sample: None, }; (table, table_reference) } @@ -505,6 +506,7 @@ impl<'a, R: Rng + 'a> SqlGenerator<'a, R> { consume: false, pivot: None, unpivot: None, + sample: None, }; Some(( AlterTableStmt { diff --git a/src/tests/sqlsmith/src/sql_gen/query.rs b/src/tests/sqlsmith/src/sql_gen/query.rs index d98825f76e3c..b6e40c106a17 100644 --- a/src/tests/sqlsmith/src/sql_gen/query.rs +++ b/src/tests/sqlsmith/src/sql_gen/query.rs @@ -483,6 +483,7 @@ impl<'a, R: Rng> SqlGenerator<'a, R> { pivot: None, // TODO unpivot: None, + sample: None, }; (table_ref, schema) } diff --git a/tests/sqllogictests/suites/mode/standalone/explain/table_sample.test b/tests/sqllogictests/suites/mode/standalone/explain/table_sample.test new file mode 100644 index 000000000000..f6d19a62add4 --- /dev/null +++ b/tests/sqllogictests/suites/mode/standalone/explain/table_sample.test @@ -0,0 +1,42 @@ +statement ok +create or replace table t as select number as a from numbers(1000); + +query T +explain select * from t sample row (10 rows); +---- +Filter +├── output columns: [t.a (#0)] +├── filters: [rand() <= 0.01] +├── estimated rows: 200.00 +└── TableScan + ├── table: default.default.t + ├── output columns: [a (#0)] + ├── read rows: 1000 + ├── read size: 1.40 KiB + ├── partitions total: 1 + ├── partitions scanned: 1 + ├── pruning stats: [segments: , blocks: ] + ├── push downs: [filters: [rand() <= 0.01], limit: NONE] + └── estimated rows: 1000.00 + +query T +explain select * from t sample row (99.1); +---- +Filter +├── output columns: [t.a (#0)] +├── filters: [rand() <= 0.991] +├── estimated rows: 200.00 +└── TableScan + ├── table: default.default.t + ├── output columns: [a (#0)] + ├── read rows: 1000 + ├── read size: 1.40 KiB + ├── partitions total: 1 + ├── partitions scanned: 1 + ├── pruning stats: [segments: , blocks: ] + ├── push downs: [filters: [rand() <= 0.991], limit: NONE] + └── estimated rows: 1000.00 + + +statement ok +drop table t; diff --git a/tests/suites/1_stateful/07_stage_attachment/07_0000_insert_with_stage.sh b/tests/suites/1_stateful/07_stage_attachment/07_0000_insert_with_stage.sh index e98e729f896e..fbc0af5caa1c 100755 --- a/tests/suites/1_stateful/07_stage_attachment/07_0000_insert_with_stage.sh +++ b/tests/suites/1_stateful/07_stage_attachment/07_0000_insert_with_stage.sh @@ -3,12 +3,12 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) . "$CURDIR"/../../../shell_env.sh -echo "drop table if exists sample" | $BENDSQL_CLIENT_CONNECT +echo "drop table if exists sample_table" | $BENDSQL_CLIENT_CONNECT echo "drop stage if exists s1" | $BENDSQL_CLIENT_CONNECT ## Create table cat </dev/null -curl -s -u root: -XPOST "http://localhost:${QUERY_HTTP_HANDLER_PORT}/v1/query" --header 'Content-Type: application/json' -d '{"sql": "insert into sample (Id, City, Score) values (?,?,?)", "stage_attachment": {"location": "@s1/sample1.csv", "copy_options": {"purge": "true"}}, "pagination": { "wait_time_secs": 3}}' | jq -r '.state, .stats.scan_progress.bytes, .error' -echo "select * from sample" | $BENDSQL_CLIENT_CONNECT +curl -s -u root: -XPOST "http://localhost:${QUERY_HTTP_HANDLER_PORT}/v1/query" --header 'Content-Type: application/json' -d '{"sql": "insert into sample_table (Id, City, Score) values (?,?,?)", "stage_attachment": {"location": "@s1/sample1.csv", "copy_options": {"purge": "true"}}, "pagination": { "wait_time_secs": 3}}' | jq -r '.state, .stats.scan_progress.bytes, .error' +echo "select * from sample_table" | $BENDSQL_CLIENT_CONNECT # use placeholder (?, ?, 1+1) -echo "truncate table sample" | $BENDSQL_CLIENT_CONNECT +echo "truncate table sample_table" | $BENDSQL_CLIENT_CONNECT aws --endpoint-url ${STORAGE_S3_ENDPOINT_URL} s3 cp s3://testbucket/admin/data/csv/sample_2_columns.csv s3://testbucket/admin/stage/internal/s1/sample2.csv >/dev/null -curl -s -u root: -XPOST "http://localhost:${QUERY_HTTP_HANDLER_PORT}/v1/query" --header 'Content-Type: application/json' -d '{"sql": "insert into sample (Id, City, Score) values (?,?,1+1)", "stage_attachment": {"location": "@s1/sample2.csv", "copy_options": {"purge": "true"}}, "pagination": { "wait_time_secs": 3}}' | jq -r '.state, .stats.scan_progress.bytes, .error' -echo "select * from sample" | $BENDSQL_CLIENT_CONNECT +curl -s -u root: -XPOST "http://localhost:${QUERY_HTTP_HANDLER_PORT}/v1/query" --header 'Content-Type: application/json' -d '{"sql": "insert into sample_table (Id, City, Score) values (?,?,1+1)", "stage_attachment": {"location": "@s1/sample2.csv", "copy_options": {"purge": "true"}}, "pagination": { "wait_time_secs": 3}}' | jq -r '.state, .stats.scan_progress.bytes, .error' +echo "select * from sample_table" | $BENDSQL_CLIENT_CONNECT # ### Drop table. -echo "drop table sample" | $BENDSQL_CLIENT_CONNECT +echo "drop table sample_table" | $BENDSQL_CLIENT_CONNECT echo "drop stage if exists s1" | $BENDSQL_CLIENT_CONNECT diff --git a/tests/suites/1_stateful/07_stage_attachment/07_0001_replace_with_stage.sh b/tests/suites/1_stateful/07_stage_attachment/07_0001_replace_with_stage.sh index bf3ae653b21c..c40c690fff76 100755 --- a/tests/suites/1_stateful/07_stage_attachment/07_0001_replace_with_stage.sh +++ b/tests/suites/1_stateful/07_stage_attachment/07_0001_replace_with_stage.sh @@ -3,12 +3,12 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) . "$CURDIR"/../../../shell_env.sh -echo "drop table if exists sample" | $BENDSQL_CLIENT_CONNECT +echo "drop table if exists sample_table" | $BENDSQL_CLIENT_CONNECT echo "drop stage if exists s1" | $BENDSQL_CLIENT_CONNECT ## Create table cat </dev/null -curl -s -u root: -XPOST "http://localhost:${QUERY_HTTP_HANDLER_PORT}/v1/query" --header 'Content-Type: application/json' -d '{"sql": "replace into sample (Id, City, Score) ON(Id) values (?,?,?)", "stage_attachment": {"location": "@s1/sample1.csv", "copy_options": {"purge": "true"}}, "pagination": { "wait_time_secs": 3}}' | jq -r '.stats.scan_progress.bytes, .error' -echo "select * from sample order by id" | $BENDSQL_CLIENT_CONNECT +curl -s -u root: -XPOST "http://localhost:${QUERY_HTTP_HANDLER_PORT}/v1/query" --header 'Content-Type: application/json' -d '{"sql": "replace into sample_table (Id, City, Score) ON(Id) values (?,?,?)", "stage_attachment": {"location": "@s1/sample1.csv", "copy_options": {"purge": "true"}}, "pagination": { "wait_time_secs": 3}}' | jq -r '.stats.scan_progress.bytes, .error' +echo "select * from sample_table order by id" | $BENDSQL_CLIENT_CONNECT # use placeholder (?, ?, 1+1) aws --endpoint-url ${STORAGE_S3_ENDPOINT_URL} s3 cp s3://testbucket/admin/data/csv/sample_2_columns.csv s3://testbucket/admin/stage/internal/s1/sample2.csv >/dev/null -curl -s -u root: -XPOST "http://localhost:${QUERY_HTTP_HANDLER_PORT}/v1/query" --header 'Content-Type: application/json' -d '{"sql": "replace into sample (Id, City, Score) ON(Id) values (?,?,1+1)", "stage_attachment": {"location": "@s1/sample2.csv", "copy_options": {"purge": "true"}}, "pagination": { "wait_time_secs": 3}}' | jq -r '.stats.scan_progress.bytes, .error' -echo "select * from sample order by id" | $BENDSQL_CLIENT_CONNECT +curl -s -u root: -XPOST "http://localhost:${QUERY_HTTP_HANDLER_PORT}/v1/query" --header 'Content-Type: application/json' -d '{"sql": "replace into sample_table (Id, City, Score) ON(Id) values (?,?,1+1)", "stage_attachment": {"location": "@s1/sample2.csv", "copy_options": {"purge": "true"}}, "pagination": { "wait_time_secs": 3}}' | jq -r '.stats.scan_progress.bytes, .error' +echo "select * from sample_table order by id" | $BENDSQL_CLIENT_CONNECT aws --endpoint-url ${STORAGE_S3_ENDPOINT_URL} s3 cp s3://testbucket/admin/data/csv/sample_3_replace.csv s3://testbucket/admin/stage/internal/s1/sample3.csv >/dev/null -curl -s -u root: -XPOST "http://localhost:${QUERY_HTTP_HANDLER_PORT}/v1/query" --header 'Content-Type: application/json' -d '{"sql": "replace into sample (Id, City, Score) ON(Id) values (?,?,?)", "stage_attachment": {"location": "@s1/sample3.csv", "copy_options": {"purge": "true"}}, "pagination": { "wait_time_secs": 3}}' | jq -r '.stats.scan_progress.bytes, .error' -echo "select * from sample order by id" | $BENDSQL_CLIENT_CONNECT +curl -s -u root: -XPOST "http://localhost:${QUERY_HTTP_HANDLER_PORT}/v1/query" --header 'Content-Type: application/json' -d '{"sql": "replace into sample_table (Id, City, Score) ON(Id) values (?,?,?)", "stage_attachment": {"location": "@s1/sample3.csv", "copy_options": {"purge": "true"}}, "pagination": { "wait_time_secs": 3}}' | jq -r '.stats.scan_progress.bytes, .error' +echo "select * from sample_table order by id" | $BENDSQL_CLIENT_CONNECT # duplicate value would show error and would not take effect aws --endpoint-url ${STORAGE_S3_ENDPOINT_URL} s3 cp s3://testbucket/admin/data/csv/sample_3_duplicate.csv s3://testbucket/admin/stage/internal/s1/sample4.csv >/dev/null -curl -s -u root: -XPOST "http://localhost:${QUERY_HTTP_HANDLER_PORT}/v1/query" --header 'Content-Type: application/json' -d '{"sql": "replace into sample (Id, City, Score) ON(Id) values (?,?,?)", "stage_attachment": {"location": "@s1/sample4.csv", "copy_options": {"purge": "true"}}, "pagination": { "wait_time_secs": 3}}' | jq -r '.error' -echo "select * from sample order by id" | $BENDSQL_CLIENT_CONNECT +curl -s -u root: -XPOST "http://localhost:${QUERY_HTTP_HANDLER_PORT}/v1/query" --header 'Content-Type: application/json' -d '{"sql": "replace into sample_table (Id, City, Score) ON(Id) values (?,?,?)", "stage_attachment": {"location": "@s1/sample4.csv", "copy_options": {"purge": "true"}}, "pagination": { "wait_time_secs": 3}}' | jq -r '.error' +echo "select * from sample_table order by id" | $BENDSQL_CLIENT_CONNECT ### Drop table. -echo "drop table sample" | $BENDSQL_CLIENT_CONNECT +echo "drop table sample_table" | $BENDSQL_CLIENT_CONNECT echo "drop stage if exists s1" | $BENDSQL_CLIENT_CONNECT diff --git a/tests/suites/1_stateful/07_stage_attachment/07_0002_insert_with_stage_deduplicate.sh b/tests/suites/1_stateful/07_stage_attachment/07_0002_insert_with_stage_deduplicate.sh index b8d0e0eb7bb6..b6ecbafee537 100755 --- a/tests/suites/1_stateful/07_stage_attachment/07_0002_insert_with_stage_deduplicate.sh +++ b/tests/suites/1_stateful/07_stage_attachment/07_0002_insert_with_stage_deduplicate.sh @@ -3,12 +3,12 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) . "$CURDIR"/../../../shell_env.sh -echo "drop table if exists sample" | $BENDSQL_CLIENT_CONNECT +echo "drop table if exists sample_table" | $BENDSQL_CLIENT_CONNECT echo "drop stage if exists s1" | $BENDSQL_CLIENT_CONNECT ## Create table cat <