From 76a0cab407e0ed89f08207e914927135379f2048 Mon Sep 17 00:00:00 2001 From: baishen Date: Thu, 27 Oct 2022 18:50:20 +0800 Subject: [PATCH 01/47] merge --- Cargo.lock | 18 +- src/query/catalog/Cargo.toml | 2 +- src/query/catalog/src/table.rs | 16 +- src/query/catalog/src/table_args.rs | 4 +- src/query/catalog/src/table_context.rs | 6 +- src/query/legacy-planners/src/lib.rs | 8 - src/query/planner/Cargo.toml | 6 +- src/query/planner/src/extras.rs | 158 +++++++++ src/query/planner/src/lib.rs | 19 +- src/query/planner/src/metadata.rs | 317 ------------------ .../src/partition.rs} | 0 .../src}/physical_scalar.rs | 41 +-- src/query/planner/src/plan_partition.rs | 50 +++ .../src/plan_read_datasource.rs | 15 +- src/query/planner/src/plans/delete.rs | 54 +++ src/query/planner/src/plans/mod.rs | 7 + .../src/plans/projection.rs} | 50 +-- src/query/planner/src/plans/setting.rs | 36 ++ src/query/planner/src/stage_table.rs | 44 +++ .../interpreters/fragments/v2/fragmenter.rs | 2 +- .../interpreter_user_stage_remove.rs | 2 +- .../service/src/servers/http/v1/download.rs | 2 +- .../src/table_functions/async_crash_me.rs | 2 +- .../src/table_functions/numbers_table.rs | 10 +- .../src/table_functions/sync_crash_me.rs | 4 +- .../table_functions/table_function_factory.rs | 4 +- src/query/sql/Cargo.toml | 12 - .../sql/src/evaluator/physical_scalar.rs | 3 +- .../sql/src/executor/expression_builder.rs | 191 ----------- src/query/sql/src/executor/format.rs | 26 +- src/query/sql/src/executor/mod.rs | 12 +- src/query/sql/src/executor/physical_plan.rs | 9 +- .../sql/src/executor/physical_plan_builder.rs | 74 ++-- .../sql/src/executor/physical_plan_display.rs | 2 +- src/query/sql/src/executor/plan_extras.rs | 74 ++++ .../src/executor/plan_read_datasource.rs} | 104 +++++- src/query/sql/src/lib.rs | 2 - src/query/sql/src/planner/binder/copy.rs | 7 +- src/query/sql/src/planner/binder/ddl/table.rs | 4 +- src/query/sql/src/planner/binder/delete.rs | 4 +- src/query/sql/src/planner/binder/join.rs | 2 +- src/query/sql/src/planner/binder/scalar.rs | 2 +- src/query/sql/src/planner/binder/setting.rs | 4 +- src/query/sql/src/planner/binder/table.rs | 9 +- src/query/sql/src/planner/metadata.rs | 305 +++++++++++++++++ src/query/sql/src/planner/mod.rs | 1 + .../planner/optimizer/heuristic/heuristic.rs | 2 +- src/query/sql/src/planner/planner.rs | 2 +- src/query/sql/src/planner/plans/copy_v2.rs | 2 +- src/query/sql/src/planner/plans/mod.rs | 19 ++ src/query/sql/src/planner/plans/plan.rs | 5 +- .../sql/src/planner/semantic/type_check.rs | 34 +- src/query/storages/factory/src/lib.rs | 2 - .../factory/src/result/block_buffer.rs | 2 +- .../factory/src/result/result_table.rs | 2 +- .../factory/src/result/result_table_sink.rs | 2 +- .../factory/src/result/result_table_source.rs | 2 +- .../storages/factory/src/result/writer.rs | 2 +- .../storages/factory/src/stage/stage_table.rs | 2 +- .../factory/src/stage/stage_table_sink.rs | 2 +- .../factory/src/system/clusters_table.rs | 2 +- .../clustering_information_table.rs | 2 +- .../fuse_blocks/fuse_block_table.rs | 2 +- .../fuse_segments/fuse_segment_table.rs | 3 +- .../fuse_snapshots/fuse_snapshot_table.rs | 2 +- .../fuse/src/table_functions/table_args.rs | 17 +- src/query/storages/hive/src/hive_table.rs | 2 +- 67 files changed, 1063 insertions(+), 771 deletions(-) create mode 100644 src/query/planner/src/extras.rs delete mode 100644 src/query/planner/src/metadata.rs rename src/query/{legacy-planners/src/plan_partition.rs => planner/src/partition.rs} (100%) rename src/query/{sql/src/executor => planner/src}/physical_scalar.rs (71%) create mode 100644 src/query/planner/src/plan_partition.rs rename src/query/{legacy-planners => planner}/src/plan_read_datasource.rs (93%) create mode 100644 src/query/planner/src/plans/delete.rs rename src/query/{legacy-planners/src/plan_node_extras.rs => planner/src/plans/projection.rs} (59%) create mode 100644 src/query/planner/src/plans/setting.rs create mode 100644 src/query/planner/src/stage_table.rs delete mode 100644 src/query/sql/src/executor/expression_builder.rs create mode 100644 src/query/sql/src/executor/plan_extras.rs rename src/query/{storages/factory/src/storage_table_read_plan.rs => sql/src/executor/plan_read_datasource.rs} (55%) diff --git a/Cargo.lock b/Cargo.lock index f22d5b79aeda..1564c55a8cff 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1266,10 +1266,10 @@ dependencies = [ "common-functions", "common-io", "common-legacy-expression", - "common-legacy-planners", "common-meta-app", "common-meta-types", "common-pipeline-core", + "common-planner", "common-settings", "common-storage", "dyn-clone", @@ -1927,11 +1927,14 @@ dependencies = [ name = "common-planner" version = "0.1.0" dependencies = [ - "common-catalog", "common-datavalues", "common-meta-app", "common-meta-types", + "once_cell", "parking_lot 0.12.1", + "serde", + "serde_json", + "typetag", ] [[package]] @@ -2013,7 +2016,6 @@ dependencies = [ "bytes", "chrono", "chrono-tz", - "common-arrow", "common-ast", "common-base", "common-catalog", @@ -2025,13 +2027,9 @@ dependencies = [ "common-functions", "common-fuse-meta", "common-grpc", - "common-hashtable", "common-hive-meta-store", "common-http", "common-io", - "common-legacy-expression", - "common-legacy-parser", - "common-legacy-planners", "common-management", "common-meta-api", "common-meta-app", @@ -2046,13 +2044,7 @@ dependencies = [ "common-settings", "common-storage", "common-storages-constants", - "common-storages-factory", - "common-storages-fuse", - "common-storages-hive", - "common-storages-index", "common-storages-preludes", - "common-storages-share", - "common-streams", "common-tracing", "common-users", "futures", diff --git a/src/query/catalog/Cargo.toml b/src/query/catalog/Cargo.toml index 581b0b9cdd05..ca72bbcdc989 100644 --- a/src/query/catalog/Cargo.toml +++ b/src/query/catalog/Cargo.toml @@ -17,10 +17,10 @@ common-exception = { path = "../../common/exception" } common-functions = { path = "../functions" } common-io = { path = "../../common/io" } common-legacy-expression = { path = "../legacy-expression" } -common-legacy-planners = { path = "../legacy-planners" } common-meta-app = { path = "../../meta/app" } common-meta-types = { path = "../../meta/types" } common-pipeline-core = { path = "../pipeline/core" } +common-planner = { path = "../planner" } common-settings = { path = "../settings" } common-storage = { path = "../../common/storage" } diff --git a/src/query/catalog/src/table.rs b/src/query/catalog/src/table.rs index 23a7473efcbe..a61cb97f65cf 100644 --- a/src/query/catalog/src/table.rs +++ b/src/query/catalog/src/table.rs @@ -24,12 +24,12 @@ use common_datavalues::DataSchemaRef; use common_datavalues::DataValue; use common_exception::ErrorCode; use common_exception::Result; -use common_legacy_expression::LegacyExpression; -use common_legacy_planners::DeletePlan; -use common_legacy_planners::Extras; -use common_legacy_planners::Partitions; -use common_legacy_planners::ReadDataSourcePlan; -use common_legacy_planners::Statistics; +use common_planner::PhysicalScalar; +use common_planner::plans::DeletePlan; +use common_planner::Partitions; +use common_planner::ReadDataSourcePlan; +use common_planner::extras::Statistics; +use common_planner::extras::Extras; use common_meta_app::schema::TableInfo; use common_meta_types::MetaId; use common_pipeline_core::Pipeline; @@ -90,7 +90,7 @@ pub trait Table: Sync + Send { false } - fn cluster_keys(&self) -> Vec { + fn cluster_keys(&self) -> Vec { vec![] } @@ -136,7 +136,7 @@ pub trait Table: Sync + Send { ))) } - fn table_args(&self) -> Option> { + fn table_args(&self) -> Option> { None } diff --git a/src/query/catalog/src/table_args.rs b/src/query/catalog/src/table_args.rs index 010b0465fd42..e74f5fcada3c 100644 --- a/src/query/catalog/src/table_args.rs +++ b/src/query/catalog/src/table_args.rs @@ -12,6 +12,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -use common_legacy_expression::LegacyExpression; +use common_datavalues::DataValue; -pub type TableArgs = Option>; +pub type TableArgs = Option>; diff --git a/src/query/catalog/src/table_context.rs b/src/query/catalog/src/table_context.rs index 3a8d2ef9e082..435bec482367 100644 --- a/src/query/catalog/src/table_context.rs +++ b/src/query/catalog/src/table_context.rs @@ -24,11 +24,11 @@ use common_datablocks::DataBlock; use common_exception::Result; use common_functions::scalars::FunctionContext; use common_io::prelude::FormatSettings; -use common_legacy_planners::PartInfoPtr; -use common_legacy_planners::Partitions; -use common_legacy_planners::ReadDataSourcePlan; use common_meta_types::RoleInfo; use common_meta_types::UserInfo; +use common_planner::PartInfoPtr; +use common_planner::Partitions; +use common_planner::ReadDataSourcePlan; use common_settings::Settings; use common_storage::DataOperator; use common_storage::StorageMetrics; diff --git a/src/query/legacy-planners/src/lib.rs b/src/query/legacy-planners/src/lib.rs index 0acbe7973ad1..76f0c3e9b671 100644 --- a/src/query/legacy-planners/src/lib.rs +++ b/src/query/legacy-planners/src/lib.rs @@ -13,27 +13,19 @@ // limitations under the License. mod plan_delete; -mod plan_node_extras; mod plan_node_stage; mod plan_node_stage_table; mod plan_node_statistics; -mod plan_partition; -mod plan_read_datasource; mod plan_setting; mod plan_sink; pub use plan_delete::DeletePlan; -pub use plan_node_extras::Extras; -pub use plan_node_extras::PrewhereInfo; -pub use plan_node_extras::Projection; pub use plan_node_stage::StageKind; pub use plan_node_stage_table::StageTableInfo; pub use plan_node_statistics::Statistics; pub use plan_partition::PartInfo; pub use plan_partition::PartInfoPtr; pub use plan_partition::Partitions; -pub use plan_read_datasource::ReadDataSourcePlan; -pub use plan_read_datasource::SourceInfo; pub use plan_setting::SettingPlan; pub use plan_setting::VarValue; pub use plan_sink::SINK_SCHEMA; diff --git a/src/query/planner/Cargo.toml b/src/query/planner/Cargo.toml index 7e4ec664f4b2..be7a897ddbe3 100644 --- a/src/query/planner/Cargo.toml +++ b/src/query/planner/Cargo.toml @@ -7,9 +7,13 @@ publish = { workspace = true } edition = { workspace = true } [dependencies] -common-catalog = { path = "../catalog" } common-datavalues = { path = "../datavalues" } common-meta-app = { path = "../../meta/app" } common-meta-types = { path = "../../meta/types" } +once_cell = "1.15.0" parking_lot = "0.12" +typetag = "0.2.3" + +serde = { workspace = true } +serde_json = { workspace = true } diff --git a/src/query/planner/src/extras.rs b/src/query/planner/src/extras.rs new file mode 100644 index 000000000000..51d92f51b7cb --- /dev/null +++ b/src/query/planner/src/extras.rs @@ -0,0 +1,158 @@ +// Copyright 2021 Datafuse Labs. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::fmt::Debug; + +use common_meta_app::schema::TableInfo; +use common_datavalues::prelude::*; +use once_cell::sync::Lazy; + + +use crate::PhysicalScalar; +use crate::plans::Projection; + + +#[derive(serde::Serialize, serde::Deserialize, Clone, Debug, PartialEq, Eq)] +pub enum StageKind { + Normal, + Expansive, + Merge, +} + + +#[derive(serde::Serialize, serde::Deserialize, Clone, Debug, PartialEq)] +pub struct PrewhereInfo { + /// columns to be ouput be prewhere scan + pub output_columns: Projection, + /// columns used for prewhere + pub prewhere_columns: Projection, + /// remain_columns = scan.columns - need_columns + pub remain_columns: Projection, + /// filter for prewhere + pub filter: PhysicalScalar, +} + +/// Extras is a wrapper for push down items. +#[derive(serde::Serialize, serde::Deserialize, Clone, Debug, PartialEq, Default)] +pub struct Extras { + /// Optional column indices to use as a projection + pub projection: Option, + /// Optional filter expression plan + /// split_conjunctions by `and` operator + pub filters: Vec, + /// Optional prewhere information + /// used for prewhere optimization + pub prewhere: Option, + /// Optional limit to skip read + pub limit: Option, + /// Optional order_by expression plan + pub order_by: Vec, +} + +impl Extras { + pub fn default() -> Self { + Extras { + projection: None, + filters: vec![], + prewhere: None, + limit: None, + order_by: vec![], + } + } +} + + +#[derive(serde::Serialize, serde::Deserialize, PartialEq, Eq, Clone, Debug, Default)] +pub struct Statistics { + /// Total rows of the query read. + pub read_rows: usize, + /// Total bytes of the query read. + pub read_bytes: usize, + /// Number of partitions scanned, (after pruning) + pub partitions_scanned: usize, + /// Number of partitions, (before pruning) + pub partitions_total: usize, + /// Is the statistics exact. + pub is_exact: bool, +} + +impl Statistics { + pub fn new_estimated( + read_rows: usize, + read_bytes: usize, + partitions_scanned: usize, + partitions_total: usize, + ) -> Self { + Statistics { + read_rows, + read_bytes, + partitions_scanned, + partitions_total, + is_exact: false, + } + } + + pub fn new_exact( + read_rows: usize, + read_bytes: usize, + partitions_scanned: usize, + partitions_total: usize, + ) -> Self { + Statistics { + read_rows, + read_bytes, + partitions_scanned, + partitions_total, + is_exact: true, + } + } + + pub fn default_exact() -> Self { + Self { + is_exact: true, + ..Default::default() + } + } + + pub fn clear(&mut self) { + *self = Self::default(); + } + + pub fn get_description(&self, table_info: &TableInfo) -> String { + if self.read_rows > 0 { + format!( + "(Read from {} table, {} Read Rows:{}, Read Bytes:{}, Partitions Scanned:{}, Partitions Total:{})", + table_info.desc, + if self.is_exact { + "Exactly" + } else { + "Approximately" + }, + self.read_rows, + self.read_bytes, + self.partitions_scanned, + self.partitions_total, + ) + } else { + format!("(Read from {} table)", table_info.desc) + } + } +} + +pub static SINK_SCHEMA: Lazy = Lazy::new(|| { + DataSchemaRefExt::create(vec![ + DataField::new("seg_loc", Vu8::to_data_type()), + DataField::new("seg_info", Vu8::to_data_type()), + ]) +}); diff --git a/src/query/planner/src/lib.rs b/src/query/planner/src/lib.rs index fac6a454027b..1248f7e8e524 100644 --- a/src/query/planner/src/lib.rs +++ b/src/query/planner/src/lib.rs @@ -21,14 +21,17 @@ //! After all the planners work, `Interpreter` will use `PhysicalPlan` to //! build pipelines, then our processes will produce result data blocks. -mod metadata; -pub use metadata::ColumnEntry; -pub use metadata::ColumnSet; -pub use metadata::IndexType; -pub use metadata::Metadata; -pub use metadata::MetadataRef; -pub use metadata::TableEntry; -pub use metadata::DUMMY_TABLE_INDEX; +mod physical_scalar; + +pub mod extras; +pub mod stage_table; +pub mod plan_partition; +pub mod plan_read_datasource; +mod partition; + +pub use physical_scalar::*; +pub use plan_partition::*; +pub use plan_read_datasource::*; // Plan will be used publicly. pub mod plans; diff --git a/src/query/planner/src/metadata.rs b/src/query/planner/src/metadata.rs deleted file mode 100644 index 6325946875e5..000000000000 --- a/src/query/planner/src/metadata.rs +++ /dev/null @@ -1,317 +0,0 @@ -// Copyright 2022 Datafuse Labs. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -use std::collections::HashSet; -use std::collections::VecDeque; -use std::fmt::Debug; -use std::fmt::Formatter; -use std::sync::Arc; - -use common_catalog::table::Table; -use common_datavalues::DataField; -use common_datavalues::DataType; -use common_datavalues::DataTypeImpl; -use common_datavalues::StructType; -use common_datavalues::TypeID; -use parking_lot::RwLock; - -/// Planner use [`usize`] as it's index type. -/// -/// This type will be used across the whole planner. -pub type IndexType = usize; - -/// Use IndexType::MAX to represent dummy table. -pub static DUMMY_TABLE_INDEX: IndexType = IndexType::MAX; - -/// ColumnSet represents a set of columns identified by its IndexType. -pub type ColumnSet = HashSet; - -/// A Send & Send version of [`Metadata`]. -/// -/// Callers can clone this ref safely and cheaply. -pub type MetadataRef = Arc>; - -/// Metadata stores information about columns and tables used in a query. -/// Tables and columns are identified with its unique index. -/// Notice that index value of a column can be same with that of a table. -#[derive(Clone, Debug, Default)] -pub struct Metadata { - tables: Vec, - columns: Vec, -} - -impl Metadata { - pub fn table(&self, index: IndexType) -> &TableEntry { - self.tables.get(index).expect("metadata must contain table") - } - - pub fn tables(&self) -> &[TableEntry] { - self.tables.as_slice() - } - - pub fn table_index_by_column_indexes(&self, column_indexes: &ColumnSet) -> Option { - self.columns - .iter() - .find(|v| column_indexes.contains(&v.column_index)) - .and_then(|v| v.table_index) - } - - pub fn column(&self, index: IndexType) -> &ColumnEntry { - self.columns - .get(index) - .expect("metadata must contain column") - } - - pub fn columns(&self) -> &[ColumnEntry] { - self.columns.as_slice() - } - - pub fn columns_by_table_index(&self, index: IndexType) -> Vec { - self.columns - .iter() - .filter(|v| v.table_index == Some(index)) - .cloned() - .collect() - } - - pub fn add_column( - &mut self, - name: String, - data_type: DataTypeImpl, - table_index: Option, - path_indices: Option>, - ) -> IndexType { - let column_index = self.columns.len(); - let column_entry = - ColumnEntry::new(name, data_type, column_index, table_index, path_indices); - self.columns.push(column_entry); - column_index - } - - pub fn add_table( - &mut self, - catalog: String, - database: String, - table_meta: Arc, - ) -> IndexType { - let table_name = table_meta.name().to_string(); - let table_index = self.tables.len(); - let table_entry = TableEntry { - index: table_index, - name: table_name, - database, - catalog, - table: table_meta.clone(), - }; - self.tables.push(table_entry); - let mut struct_fields = VecDeque::new(); - for (i, field) in table_meta.schema().fields().iter().enumerate() { - self.add_column( - field.name().clone(), - field.data_type().clone(), - Some(table_index), - None, - ); - if field.data_type().data_type_id() == TypeID::Struct { - struct_fields.push_back((vec![i], field.clone())); - } - } - // add inner columns of struct column - while !struct_fields.is_empty() { - let (path_indices, field) = struct_fields.pop_front().unwrap(); - let struct_type: StructType = field.data_type().clone().try_into().unwrap(); - - let inner_types = struct_type.types(); - let inner_names = match struct_type.names() { - Some(inner_names) => inner_names - .iter() - .map(|name| format!("{}:{}", field.name(), name)) - .collect::>(), - None => (0..inner_types.len()) - .map(|i| format!("{}:{}", field.name(), i + 1)) - .collect::>(), - }; - for ((i, inner_name), inner_type) in - inner_names.into_iter().enumerate().zip(inner_types.iter()) - { - let mut inner_path_indices = path_indices.clone(); - inner_path_indices.push(i); - - self.add_column( - inner_name.clone(), - inner_type.clone(), - Some(table_index), - Some(inner_path_indices.clone()), - ); - if inner_type.data_type_id() == TypeID::Struct { - let inner_field = DataField::new(&inner_name, inner_type.clone()); - struct_fields.push_back((inner_path_indices, inner_field)); - } - } - } - table_index - } - - /// find_smallest_column in given indices. - pub fn find_smallest_column(&self, indices: &[usize]) -> usize { - let mut smallest_index = indices.iter().min().expect("indices must be valid"); - let mut smallest_size = usize::MAX; - for idx in indices.iter() { - let entry = self.column(*idx); - if let Ok(bytes) = entry.data_type.data_type_id().numeric_byte_size() { - if smallest_size > bytes { - smallest_size = bytes; - smallest_index = &entry.column_index; - } - } - } - *smallest_index - } - - /// find_smallest_column_by_table_index by given table_index - pub fn find_smallest_column_by_table_index(&self, table_index: IndexType) -> usize { - let indices: Vec = self - .columns - .iter() - .filter(|v| v.table_index == Some(table_index)) - .map(|v| v.column_index) - .collect(); - - self.find_smallest_column(&indices) - } -} - -#[derive(Clone)] -pub struct TableEntry { - catalog: String, - database: String, - name: String, - index: IndexType, - - table: Arc, -} - -impl Debug for TableEntry { - fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { - f.debug_struct("TableEntry") - .field("catalog", &self.catalog) - .field("database", &self.database) - .field("name", &self.name) - .field("index", &self.index) - .finish_non_exhaustive() - } -} - -impl TableEntry { - pub fn new( - index: IndexType, - name: String, - catalog: String, - database: String, - table: Arc, - ) -> Self { - TableEntry { - index, - name, - catalog, - database, - table, - } - } - - /// Get the catalog name of this table entry. - pub fn catalog(&self) -> &str { - &self.catalog - } - - /// Get the database name of this table entry. - pub fn database(&self) -> &str { - &self.database - } - - /// Get the name of this table entry. - pub fn name(&self) -> &str { - &self.name - } - - /// Get the index this table entry. - pub fn index(&self) -> IndexType { - self.index - } - - /// Get the table of this table entry. - pub fn table(&self) -> Arc { - self.table.clone() - } -} - -#[derive(Clone, Debug)] -pub struct ColumnEntry { - column_index: IndexType, - name: String, - data_type: DataTypeImpl, - - /// Table index of column entry. None if column is derived from a subquery. - table_index: Option, - /// Path indices for inner column of struct data type. - path_indices: Option>, -} - -impl ColumnEntry { - pub fn new( - name: String, - data_type: DataTypeImpl, - column_index: IndexType, - table_index: Option, - path_indices: Option>, - ) -> Self { - ColumnEntry { - column_index, - name, - data_type, - table_index, - path_indices, - } - } - - /// Get the name of this column entry. - pub fn name(&self) -> &str { - &self.name - } - - /// Get the index of this column entry. - pub fn index(&self) -> IndexType { - self.column_index - } - - /// Get the data type of this column entry. - pub fn data_type(&self) -> &DataTypeImpl { - &self.data_type - } - - /// Get the table index of this column entry. - pub fn table_index(&self) -> Option { - self.table_index - } - - /// Get the path indices of this column entry. - pub fn path_indices(&self) -> Option<&[IndexType]> { - self.path_indices.as_deref() - } - - /// Check if this column entry contains path_indices - pub fn has_path_indices(&self) -> bool { - self.path_indices.is_some() - } -} diff --git a/src/query/legacy-planners/src/plan_partition.rs b/src/query/planner/src/partition.rs similarity index 100% rename from src/query/legacy-planners/src/plan_partition.rs rename to src/query/planner/src/partition.rs diff --git a/src/query/sql/src/executor/physical_scalar.rs b/src/query/planner/src/physical_scalar.rs similarity index 71% rename from src/query/sql/src/executor/physical_scalar.rs rename to src/query/planner/src/physical_scalar.rs index 8c199a8a233f..f86b06b41a0b 100644 --- a/src/query/sql/src/executor/physical_scalar.rs +++ b/src/query/planner/src/physical_scalar.rs @@ -15,11 +15,9 @@ use common_datavalues::format_data_type_sql; use common_datavalues::DataTypeImpl; use common_datavalues::DataValue; -use common_exception::Result; -use common_planner::IndexType; -use common_planner::MetadataRef; -use super::ColumnID; +type ColumnID = String; +type IndexType = usize; /// Serializable and desugared representation of `Scalar`. #[derive(Clone, Debug, Eq, PartialEq, serde::Serialize, serde::Deserialize)] @@ -57,23 +55,23 @@ impl PhysicalScalar { } /// Display with readable variable name. - pub fn pretty_display(&self, _metadata: &MetadataRef) -> Result { + pub fn pretty_display(&self) -> String { match self { - PhysicalScalar::Constant { value, .. } => Ok(value.to_string()), + PhysicalScalar::Constant { value, .. } => value.to_string(), PhysicalScalar::Function { name, args, .. } => { let args = args .iter() - .map(|(arg, _)| arg.pretty_display(_metadata)) - .collect::>>()? + .map(|(arg, _)| arg.pretty_display()) + .collect::>() .join(", "); - Ok(format!("{}({})", name, args)) + format!("{}({})", name, args) } - PhysicalScalar::Cast { input, target } => Ok(format!( + PhysicalScalar::Cast { input, target } => format!( "CAST({} AS {})", - input.pretty_display(_metadata)?, + input.pretty_display(), format_data_type_sql(target) - )), - PhysicalScalar::IndexedVariable { display_name, .. } => Ok(display_name.clone()), + ), + PhysicalScalar::IndexedVariable { display_name, .. } => display_name.clone(), } } } @@ -88,23 +86,6 @@ pub struct AggregateFunctionDesc { pub arg_indices: Vec, } -impl AggregateFunctionDesc { - pub fn pretty_display(&self, metadata: &MetadataRef) -> Result { - Ok(format!( - "{}({})", - self.sig.name, - self.arg_indices - .iter() - .map(|&index| { - let column = metadata.read().column(index).clone(); - Ok(column.name().to_string()) - }) - .collect::>>()? - .join(", ") - )) - } -} - #[derive(Clone, Debug, Eq, PartialEq, serde::Serialize, serde::Deserialize)] pub struct AggregateFunctionSignature { pub name: String, diff --git a/src/query/planner/src/plan_partition.rs b/src/query/planner/src/plan_partition.rs new file mode 100644 index 000000000000..27434c3c29e4 --- /dev/null +++ b/src/query/planner/src/plan_partition.rs @@ -0,0 +1,50 @@ +// Copyright 2021 Datafuse Labs. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::any::Any; +use std::fmt::Debug; +use std::fmt::Formatter; +use std::sync::Arc; + +#[typetag::serde(tag = "type")] +pub trait PartInfo: Send + Sync { + fn as_any(&self) -> &dyn Any; + + #[allow(clippy::borrowed_box)] + fn equals(&self, info: &Box) -> bool; +} + +impl Debug for Box { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + match serde_json::to_string(self) { + Ok(str) => write!(f, "{}", str), + Err(_cause) => Err(std::fmt::Error {}), + } + } +} + +impl PartialEq for Box { + fn eq(&self, other: &Self) -> bool { + let this_type_id = self.as_any().type_id(); + let other_type_id = other.as_any().type_id(); + + match this_type_id == other_type_id { + true => self.equals(other), + false => false, + } + } +} + +pub type PartInfoPtr = Arc>; +pub type Partitions = Vec>>; diff --git a/src/query/legacy-planners/src/plan_read_datasource.rs b/src/query/planner/src/plan_read_datasource.rs similarity index 93% rename from src/query/legacy-planners/src/plan_read_datasource.rs rename to src/query/planner/src/plan_read_datasource.rs index 6b0487e97e95..b78c05f529d1 100644 --- a/src/query/legacy-planners/src/plan_read_datasource.rs +++ b/src/query/planner/src/plan_read_datasource.rs @@ -18,14 +18,15 @@ use std::sync::Arc; use common_datavalues::DataField; use common_datavalues::DataSchema; use common_datavalues::DataSchemaRef; -use common_legacy_expression::LegacyExpression; +use common_datavalues::DataValue; use common_meta_app::schema::TableInfo; -use crate::Extras; -use crate::Partitions; -use crate::Projection; -use crate::StageTableInfo; -use crate::Statistics; +use crate::extras::Extras; +use crate::extras::Statistics; + +use crate::partition::Partitions; +use crate::plans::Projection; +use crate::stage_table::StageTableInfo; #[derive(serde::Serialize, serde::Deserialize, Clone, Debug, PartialEq, Eq)] pub enum SourceInfo { @@ -71,7 +72,7 @@ pub struct ReadDataSourcePlan { pub statistics: Statistics, pub description: String, - pub tbl_args: Option>, + pub tbl_args: Option>, pub push_downs: Option, } diff --git a/src/query/planner/src/plans/delete.rs b/src/query/planner/src/plans/delete.rs new file mode 100644 index 000000000000..b1769f633870 --- /dev/null +++ b/src/query/planner/src/plans/delete.rs @@ -0,0 +1,54 @@ +// Copyright 2021 Datafuse Labs. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::sync::Arc; + +use common_datavalues::DataSchema; +use common_datavalues::DataSchemaRef; +use common_meta_app::schema::TableIdent; + +use super::Projection; + +/// # TODO +/// +/// From @xuanwo +/// +/// Ideally, we need to use `Scalar` in DeletePlan.selection. But we met a +/// cycle deps here. So we have to change `selection` in String first, and +/// change into `Scalar` when our `Planner` has been moved out. +/// +/// At this stage, DeletePlan's selection expr will be parsed twice: +/// +/// - Parsed during `bind` to get column index and projection index. +/// - Parsed during `execution` to get the correct columns +/// +/// It's an ugly but necessary price to pay. Without this, we would sink in +/// hell forever. +#[derive(Clone, Debug, PartialEq, Eq)] +pub struct DeletePlan { + pub catalog_name: String, + pub database_name: String, + pub table_name: String, + pub table_id: TableIdent, + pub selection: Option, + pub projection: Projection, +} + +impl DeletePlan { + pub fn schema(&self) -> DataSchemaRef { + Arc::new(DataSchema::empty()) + } +} + + diff --git a/src/query/planner/src/plans/mod.rs b/src/query/planner/src/plans/mod.rs index 0b17f19963e8..acca28e892c5 100644 --- a/src/query/planner/src/plans/mod.rs +++ b/src/query/planner/src/plans/mod.rs @@ -51,6 +51,9 @@ mod truncate_table; mod undrop_database; mod undrop_table; mod use_database; +mod delete; +mod setting; +mod projection; pub use alter_table_cluster_key::AlterTableClusterKeyPlan; pub use alter_udf::AlterUDFPlan; @@ -94,3 +97,7 @@ pub use truncate_table::TruncateTablePlan; pub use undrop_database::UndropDatabasePlan; pub use undrop_table::UndropTablePlan; pub use use_database::UseDatabasePlan; + +pub use delete::*; +pub use setting::*; +pub use projection::*; diff --git a/src/query/legacy-planners/src/plan_node_extras.rs b/src/query/planner/src/plans/projection.rs similarity index 59% rename from src/query/legacy-planners/src/plan_node_extras.rs rename to src/query/planner/src/plans/projection.rs index a1855e69d2b1..b1aae3bb9d5d 100644 --- a/src/query/legacy-planners/src/plan_node_extras.rs +++ b/src/query/planner/src/plans/projection.rs @@ -12,12 +12,9 @@ // See the License for the specific language governing permissions and // limitations under the License. -use std::collections::BTreeMap; -use std::fmt::Debug; -use std::fmt::Formatter; +use std::{collections::BTreeMap, fmt::Formatter}; use common_datavalues::DataSchema; -use common_legacy_expression::LegacyExpression; #[derive(serde::Serialize, serde::Deserialize, Clone, PartialEq, Eq)] pub enum Projection { @@ -53,7 +50,7 @@ impl Projection { } } -impl Debug for Projection { +impl core::fmt::Debug for Projection { fn fmt(&self, f: &mut Formatter) -> std::fmt::Result { match self { Projection::Columns(indices) => write!(f, "{:?}", indices), @@ -63,45 +60,4 @@ impl Debug for Projection { } } } -} - -#[derive(serde::Serialize, serde::Deserialize, Clone, Debug, PartialEq)] -pub struct PrewhereInfo { - /// columns to be ouput be prewhere scan - pub output_columns: Projection, - /// columns used for prewhere - pub prewhere_columns: Projection, - /// remain_columns = scan.columns - need_columns - pub remain_columns: Projection, - /// filter for prewhere - pub filter: LegacyExpression, -} - -/// Extras is a wrapper for push down items. -#[derive(serde::Serialize, serde::Deserialize, Clone, Debug, PartialEq, Default)] -pub struct Extras { - /// Optional column indices to use as a projection - pub projection: Option, - /// Optional filter expression plan - /// split_conjunctions by `and` operator - pub filters: Vec, - /// Optional prewhere information - /// used for prewhere optimization - pub prewhere: Option, - /// Optional limit to skip read - pub limit: Option, - /// Optional order_by expression plan - pub order_by: Vec, -} - -impl Extras { - pub fn default() -> Self { - Extras { - projection: None, - filters: vec![], - prewhere: None, - limit: None, - order_by: vec![], - } - } -} +} \ No newline at end of file diff --git a/src/query/planner/src/plans/setting.rs b/src/query/planner/src/plans/setting.rs new file mode 100644 index 000000000000..3c2b1359dfe4 --- /dev/null +++ b/src/query/planner/src/plans/setting.rs @@ -0,0 +1,36 @@ +// Copyright 2021 Datafuse Labs. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::sync::Arc; + +use common_datavalues::DataSchema; +use common_datavalues::DataSchemaRef; + +#[derive(Clone, Debug, PartialEq, Eq)] +pub struct VarValue { + pub is_global: bool, + pub variable: String, + pub value: String, +} + +#[derive(Clone, Debug, PartialEq, Eq)] +pub struct SettingPlan { + pub vars: Vec, +} + +impl SettingPlan { + pub fn schema(&self) -> DataSchemaRef { + Arc::new(DataSchema::empty()) + } +} diff --git a/src/query/planner/src/stage_table.rs b/src/query/planner/src/stage_table.rs new file mode 100644 index 000000000000..3d6826747733 --- /dev/null +++ b/src/query/planner/src/stage_table.rs @@ -0,0 +1,44 @@ +// Copyright 2022 Datafuse Labs. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::fmt::Debug; +use std::fmt::Formatter; + +use common_datavalues::DataSchemaRef; +use common_meta_types::UserStageInfo; + +#[derive(serde::Serialize, serde::Deserialize, Clone, PartialEq, Eq)] +pub struct StageTableInfo { + pub schema: DataSchemaRef, + pub stage_info: UserStageInfo, + pub path: String, + pub files: Vec, +} + +impl StageTableInfo { + pub fn schema(&self) -> DataSchemaRef { + self.schema.clone() + } + + pub fn desc(&self) -> String { + self.stage_info.stage_name.clone() + } +} + +impl Debug for StageTableInfo { + // Ignore the schema. + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + write!(f, "{:?}", self.stage_info) + } +} diff --git a/src/query/service/src/interpreters/fragments/v2/fragmenter.rs b/src/query/service/src/interpreters/fragments/v2/fragmenter.rs index 26e9c4238677..eb1dfe033222 100644 --- a/src/query/service/src/interpreters/fragments/v2/fragmenter.rs +++ b/src/query/service/src/interpreters/fragments/v2/fragmenter.rs @@ -16,7 +16,7 @@ use std::sync::Arc; use common_catalog::table_context::TableContext; use common_exception::Result; -use common_legacy_planners::StageKind; +use common_sql::executor::StageKind; use super::FragmentType; use super::PlanFragment; diff --git a/src/query/service/src/interpreters/interpreter_user_stage_remove.rs b/src/query/service/src/interpreters/interpreter_user_stage_remove.rs index 273f667b7fb0..67bdcccba42c 100644 --- a/src/query/service/src/interpreters/interpreter_user_stage_remove.rs +++ b/src/query/service/src/interpreters/interpreter_user_stage_remove.rs @@ -14,10 +14,10 @@ use std::sync::Arc; +use common_catalog::table_context::TableContext; use common_exception::ErrorCode; use common_exception::Result; use common_planner::plans::RemoveStagePlan; -use common_storages_fuse::TableContext; use regex::Regex; use crate::interpreters::interpreter_common::list_files; diff --git a/src/query/service/src/servers/http/v1/download.rs b/src/query/service/src/servers/http/v1/download.rs index f127eeaeca8e..0893c9cec9f4 100644 --- a/src/query/service/src/servers/http/v1/download.rs +++ b/src/query/service/src/servers/http/v1/download.rs @@ -15,12 +15,12 @@ use std::sync::Arc; use async_stream::stream; +use common_catalog::table_context::TableContext; use common_exception::Result; use common_formats::output_format::OutputFormatType; use common_legacy_planners::Extras; use common_legacy_planners::ReadDataSourcePlan; use common_legacy_planners::SourceInfo; -use common_storages_fuse::TableContext; use futures::StreamExt; use crate::sessions::QueryContext; diff --git a/src/query/service/src/table_functions/async_crash_me.rs b/src/query/service/src/table_functions/async_crash_me.rs index 20bd8fcd6a1b..380c5c83cc48 100644 --- a/src/query/service/src/table_functions/async_crash_me.rs +++ b/src/query/service/src/table_functions/async_crash_me.rs @@ -113,7 +113,7 @@ impl Table for AsyncCrashMeTable { Ok((Statistics::new_exact(1, 1, 1, 1), vec![])) } - fn table_args(&self) -> Option> { + fn table_args(&self) -> Option> { Some(vec![LegacyExpression::create_literal(DataValue::UInt64(0))]) } diff --git a/src/query/service/src/table_functions/numbers_table.rs b/src/query/service/src/table_functions/numbers_table.rs index 6d08c252a6b2..d7e7d913a4a8 100644 --- a/src/query/service/src/table_functions/numbers_table.rs +++ b/src/query/service/src/table_functions/numbers_table.rs @@ -66,9 +66,7 @@ impl NumbersTable { if let Some(args) = &table_args { if args.len() == 1 { let arg = &args[0]; - if let LegacyExpression::Literal { value, .. } = arg { - total = Some(value.as_u64()?); - } + total = Some(value.as_u64()?); } } @@ -172,10 +170,8 @@ impl Table for NumbersTable { Ok((statistics, parts)) } - fn table_args(&self) -> Option> { - Some(vec![LegacyExpression::create_literal(DataValue::UInt64( - self.total, - ))]) + fn table_args(&self) -> Option> { + Some(vec![DataValue::UInt64(self.total)]) } fn read_data( diff --git a/src/query/service/src/table_functions/sync_crash_me.rs b/src/query/service/src/table_functions/sync_crash_me.rs index 2b58c98a4bc1..354df7359726 100644 --- a/src/query/service/src/table_functions/sync_crash_me.rs +++ b/src/query/service/src/table_functions/sync_crash_me.rs @@ -113,8 +113,8 @@ impl Table for SyncCrashMeTable { Ok((Statistics::new_exact(1, 1, 1, 1), vec![])) } - fn table_args(&self) -> Option> { - Some(vec![LegacyExpression::create_literal(DataValue::UInt64(0))]) + fn table_args(&self) -> Option> { + Some(vec![DataValue::UInt64(0)]) } fn read_data( diff --git a/src/query/service/src/table_functions/table_function_factory.rs b/src/query/service/src/table_functions/table_function_factory.rs index c8d388250819..26f2151c2abd 100644 --- a/src/query/service/src/table_functions/table_function_factory.rs +++ b/src/query/service/src/table_functions/table_function_factory.rs @@ -15,9 +15,9 @@ use std::collections::HashMap; use std::sync::Arc; +use common_datavalues::DataValue; use common_exception::ErrorCode; use common_exception::Result; -use common_legacy_expression::LegacyExpression; use common_meta_types::MetaId; use parking_lot::RwLock; @@ -32,7 +32,7 @@ use crate::table_functions::sync_crash_me::SyncCrashMeTable; use crate::table_functions::NumbersTable; use crate::table_functions::TableFunction; -pub type TableArgs = Option>; +pub type TableArgs = Option>; type TableFunctionCreators = RwLock)>>; pub trait TableFunctionCreator: Send + Sync { diff --git a/src/query/sql/Cargo.toml b/src/query/sql/Cargo.toml index a013b7a89080..a11a66378690 100644 --- a/src/query/sql/Cargo.toml +++ b/src/query/sql/Cargo.toml @@ -12,7 +12,6 @@ test = false [dependencies] # In alphabetical order # Workspace dependencies -common-arrow = { path = "../../common/arrow" } common-ast = { path = "../ast" } common-base = { path = "../../common/base" } common-catalog = { path = "../catalog" } @@ -24,17 +23,12 @@ common-formats = { path = "../formats" } common-functions = { path = "../functions" } common-fuse-meta = { path = "../storages/fuse-meta" } common-grpc = { path = "../../common/grpc" } -common-hashtable = { path = "../../common/hashtable" } common-hive-meta-store = { path = "../storages/hive-meta-store", optional = true } common-http = { path = "../../common/http" } common-io = { path = "../../common/io" } -common-legacy-expression = { path = "../legacy-expression" } -common-legacy-parser = { path = "../legacy-parser" } -common-legacy-planners = { path = "../legacy-planners" } common-management = { path = "../management" } common-meta-api = { path = "../../meta/api" } common-meta-app = { path = "../../meta/app" } -# common-meta-embedded = { path = "../../meta/embedded" } common-meta-store = { path = "../../meta/store" } common-meta-types = { path = "../../meta/types" } common-metrics = { path = "../../common/metrics" } @@ -46,13 +40,7 @@ common-planner = { path = "../planner" } common-settings = { path = "../settings" } common-storage = { path = "../../common/storage" } common-storages-constants = { path = "../storages/constants" } -common-storages-factory = { path = "../storages/factory" } -common-storages-fuse = { path = "../storages/fuse" } -common-storages-hive = { path = "../storages/hive", optional = true } -common-storages-index = { path = "../storages/index" } common-storages-preludes = { path = "../storages/preludes" } -common-storages-share = { path = "../storages/share" } -common-streams = { path = "../streams" } common-tracing = { path = "../../common/tracing" } common-users = { path = "../users" } diff --git a/src/query/sql/src/evaluator/physical_scalar.rs b/src/query/sql/src/evaluator/physical_scalar.rs index b2b7e1a61178..98051276c77f 100644 --- a/src/query/sql/src/evaluator/physical_scalar.rs +++ b/src/query/sql/src/evaluator/physical_scalar.rs @@ -23,7 +23,8 @@ use common_functions::scalars::FunctionFactory; use crate::evaluator::eval_node::EvalNode; use crate::evaluator::Evaluator; -use crate::executor::PhysicalScalar; +use common_planner::PhysicalScalar; + impl Evaluator { pub fn eval_physical_scalars(physical_scalars: &[PhysicalScalar]) -> Result> { diff --git a/src/query/sql/src/executor/expression_builder.rs b/src/query/sql/src/executor/expression_builder.rs deleted file mode 100644 index 2f2b8dba2fa5..000000000000 --- a/src/query/sql/src/executor/expression_builder.rs +++ /dev/null @@ -1,191 +0,0 @@ -// Copyright 2021 Datafuse Labs. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -use common_datavalues::DataTypeImpl; -use common_datavalues::DataValue; -use common_exception::ErrorCode; -use common_exception::Result; -use common_legacy_expression::LegacyExpression; -use common_planner::IndexType; -use common_planner::MetadataRef; - -use crate::executor::util::format_field_name; -use crate::plans::AggregateFunction; -use crate::plans::AndExpr; -use crate::plans::BoundColumnRef; -use crate::plans::CastExpr; -use crate::plans::ComparisonExpr; -use crate::plans::ConstantExpr; -use crate::plans::FunctionCall; -use crate::plans::OrExpr; -use crate::plans::Scalar; - -pub trait FiledNameFormat { - fn format(display_name: &str, index: IndexType) -> String; -} - -impl FiledNameFormat for ExpressionBuilder { - fn format(display_name: &str, index: IndexType) -> String { - format_field_name(display_name, index) - } -} - -impl FiledNameFormat for ExpressionBuilder { - fn format(display_name: &str, _index: IndexType) -> String { - display_name.to_owned() - } -} - -pub struct ExpressionBuilder { - metadata: MetadataRef, -} - -pub type ExpressionBuilderWithoutRenaming = ExpressionBuilder; -pub type ExpressionBuilderWithRenaming = ExpressionBuilder; - -impl ExpressionBuilder -where ExpressionBuilder: FiledNameFormat -{ - pub fn create(metadata: MetadataRef) -> Self { - ExpressionBuilder { metadata } - } - - pub fn build_and_rename(&self, scalar: &Scalar, index: IndexType) -> Result { - let expr = self.build(scalar)?; - let metadata = self.metadata.read(); - let name = metadata.column(index).name(); - Ok(LegacyExpression::Alias( - Self::format(name, index), - Box::new(expr), - )) - } - - pub fn build(&self, scalar: &Scalar) -> Result { - match scalar { - Scalar::BoundColumnRef(BoundColumnRef { column }) => { - self.build_column_ref(column.index) - } - Scalar::ConstantExpr(ConstantExpr { value, data_type }) => { - self.build_literal(value, data_type) - } - Scalar::ComparisonExpr(ComparisonExpr { - op, left, right, .. - }) => self.build_binary_operator(left, right, op.to_func_name()), - Scalar::AggregateFunction(AggregateFunction { - func_name, - distinct, - params, - args, - .. - }) => self.build_aggr_function(func_name.clone(), *distinct, params.clone(), args), - Scalar::AndExpr(AndExpr { left, right, .. }) => { - let left = self.build(left)?; - let right = self.build(right)?; - Ok(LegacyExpression::BinaryExpression { - left: Box::new(left), - op: "and".to_string(), - right: Box::new(right), - }) - } - Scalar::OrExpr(OrExpr { left, right, .. }) => { - let left = self.build(left)?; - let right = self.build(right)?; - Ok(LegacyExpression::BinaryExpression { - left: Box::new(left), - op: "or".to_string(), - right: Box::new(right), - }) - } - Scalar::FunctionCall(FunctionCall { - arguments, - func_name, - .. - }) => { - let args = arguments - .iter() - .map(|arg| self.build(arg)) - .collect::>>()?; - Ok(LegacyExpression::ScalarFunction { - op: func_name.clone(), - args, - }) - } - Scalar::CastExpr(CastExpr { - argument, - target_type, - .. - }) => { - let arg = self.build(argument)?; - Ok(LegacyExpression::Cast { - expr: Box::new(arg), - data_type: *target_type.clone(), - pg_style: false, - }) - } - Scalar::SubqueryExpr(_) => Err(ErrorCode::UnImplement("Unsupported subquery expr")), - } - } - - pub fn build_column_ref(&self, index: IndexType) -> Result { - let metadata = self.metadata.read(); - let name = metadata.column(index).name(); - Ok(LegacyExpression::Column(Self::format(name, index))) - } - - pub fn build_literal( - &self, - data_value: &DataValue, - data_type: &DataTypeImpl, - ) -> Result { - Ok(LegacyExpression::Literal { - value: data_value.clone(), - column_name: None, - data_type: data_type.clone(), - }) - } - - pub fn build_binary_operator( - &self, - left: &Scalar, - right: &Scalar, - op: String, - ) -> Result { - let left_child = self.build(left)?; - let right_child = self.build(right)?; - Ok(LegacyExpression::BinaryExpression { - left: Box::new(left_child), - op, - right: Box::new(right_child), - }) - } - - pub fn build_aggr_function( - &self, - op: String, - distinct: bool, - params: Vec, - args: &Vec, - ) -> Result { - let mut arg_exprs = Vec::with_capacity(args.len()); - for arg in args.iter() { - arg_exprs.push(self.build(arg)?); - } - Ok(LegacyExpression::AggregateFunction { - op, - distinct, - params, - args: arg_exprs, - }) - } -} diff --git a/src/query/sql/src/executor/format.rs b/src/query/sql/src/executor/format.rs index 38072af9ed0f..777ec3227e3c 100644 --- a/src/query/sql/src/executor/format.rs +++ b/src/query/sql/src/executor/format.rs @@ -15,10 +15,10 @@ use common_ast::ast::FormatTreeNode; use common_exception::ErrorCode; use common_exception::Result; -use common_legacy_planners::StageKind; -use common_planner::IndexType; -use common_planner::MetadataRef; -use common_planner::DUMMY_TABLE_INDEX; +use common_planner::AggregateFunctionDesc; +use crate::planner::IndexType; +use crate::planner::MetadataRef; +use crate::planner::DUMMY_TABLE_INDEX; use itertools::Itertools; use super::AggregateFinal; @@ -31,6 +31,7 @@ use super::Limit; use super::PhysicalPlan; use super::Project; use super::Sort; +use super::StageKind; use super::TableScan; use super::UnionAll; @@ -40,6 +41,8 @@ impl PhysicalPlan { } } + + fn to_format_tree(plan: &PhysicalPlan, metadata: &MetadataRef) -> Result> { match plan { PhysicalPlan::TableScan(plan) => table_scan_to_format_tree(plan, metadata), @@ -163,6 +166,21 @@ fn eval_scalar_to_format_tree( )) } +pub fn pretty_display_agg_desc(desc: &AggregateFunctionDesc, metadata: &MetadataRef) -> Result { + Ok(format!( + "{}({})", + desc.sig.name, + desc.arg_indices + .iter() + .map(|&index| { + let column = metadata.read().column(index).clone(); + Ok(column.name().to_string()) + }) + .collect::>>()? + .join(", ") + )) +} + fn aggregate_partial_to_format_tree( plan: &AggregatePartial, metadata: &MetadataRef, diff --git a/src/query/sql/src/executor/mod.rs b/src/query/sql/src/executor/mod.rs index 1d9acbebbf5d..6c60ad22e2af 100644 --- a/src/query/sql/src/executor/mod.rs +++ b/src/query/sql/src/executor/mod.rs @@ -12,7 +12,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -mod expression_builder; mod format; mod physical_plan; mod physical_plan_builder; @@ -20,13 +19,18 @@ mod physical_plan_display; mod physical_plan_visitor; mod physical_scalar; mod util; +mod plan_extras; +mod plan_read_datasource; + -pub use expression_builder::ExpressionBuilder; -pub use expression_builder::ExpressionBuilderWithRenaming; -pub use expression_builder::ExpressionBuilderWithoutRenaming; pub use physical_plan::*; pub use physical_plan_builder::PhysicalPlanBuilder; pub use physical_plan_builder::PhysicalScalarBuilder; pub use physical_plan_visitor::PhysicalPlanReplacer; pub use physical_scalar::*; pub use util::*; +pub use plan_extras::*; + +pub use plan_read_datasource::ReadDataSourcePlan; +pub use plan_read_datasource::ToReadDataSourcePlan; +pub use plan_read_datasource::SourceInfo; diff --git a/src/query/sql/src/executor/physical_plan.rs b/src/query/sql/src/executor/physical_plan.rs index 0b5b4b302d57..c1d87cdc407b 100644 --- a/src/query/sql/src/executor/physical_plan.rs +++ b/src/query/sql/src/executor/physical_plan.rs @@ -24,13 +24,14 @@ use common_datavalues::NullableType; use common_datavalues::ToDataType; use common_datavalues::Vu8; use common_exception::Result; -use common_legacy_planners::ReadDataSourcePlan; -use common_legacy_planners::StageKind; -use common_legacy_planners::SINK_SCHEMA; +use common_planner::PhysicalScalar; +use super::ReadDataSourcePlan; use common_meta_app::schema::TableInfo; use common_planner::IndexType; -use super::physical_scalar::PhysicalScalar; +use crate::planner::plans::SINK_SCHEMA; + +use super::StageKind; use super::AggregateFunctionDesc; use super::SortDesc; use crate::optimizer::ColumnSet; diff --git a/src/query/sql/src/executor/physical_plan_builder.rs b/src/query/sql/src/executor/physical_plan_builder.rs index 0a54a056b73f..b648f76ba60a 100644 --- a/src/query/sql/src/executor/physical_plan_builder.rs +++ b/src/query/sql/src/executor/physical_plan_builder.rs @@ -18,23 +18,20 @@ use std::sync::Arc; use common_catalog::catalog::CatalogManager; use common_catalog::catalog::CATALOG_DEFAULT; +use common_catalog::table_context::TableContext; use common_datavalues::DataSchemaRef; use common_datavalues::DataSchemaRefExt; use common_exception::ErrorCode; use common_exception::Result; -use common_legacy_expression::LegacyExpression; -use common_legacy_planners::Extras; -use common_legacy_planners::PrewhereInfo; -use common_legacy_planners::Projection; -use common_legacy_planners::StageKind; +use common_functions::scalars::FunctionFactory; +use super::plan_extras::Extras; use common_planner::IndexType; use common_planner::Metadata; use common_planner::MetadataRef; use common_planner::DUMMY_TABLE_INDEX; -use common_storages_factory::ToReadDataSourcePlan; -use common_storages_fuse::TableContext; use itertools::Itertools; +use super::plan_read_datasource::ToReadDataSourcePlan; use super::AggregateFinal; use super::AggregatePartial; use super::Exchange as PhysicalExchange; @@ -43,21 +40,23 @@ use super::HashJoin; use super::Limit; use super::Sort; use super::TableScan; +use crate::executor::PrewhereInfo; use crate::executor::util::check_physical; use crate::executor::AggregateFunctionDesc; use crate::executor::AggregateFunctionSignature; use crate::executor::ColumnID; use crate::executor::EvalScalar; -use crate::executor::ExpressionBuilderWithoutRenaming; use crate::executor::PhysicalPlan; -use crate::executor::PhysicalScalar; +use common_planner::PhysicalScalar; use crate::executor::SortDesc; use crate::executor::UnionAll; use crate::optimizer::ColumnSet; use crate::optimizer::SExpr; use crate::plans::AggregateMode; +use crate::plans::AndExpr; use crate::plans::Exchange; use crate::plans::PhysicalScan; +use crate::plans::Projection; use crate::plans::RelOperator; use crate::plans::Scalar; use crate::ScalarExpr; @@ -482,11 +481,12 @@ impl PhysicalPlanBuilder { let projection = Self::build_projection(&metadata, table_schema, &scan.columns, has_inner_column); + let builder = PhysicalScalarBuilder::new(table_schema); + let push_down_filters = scan .push_down_predicates .clone() .map(|predicates| { - let builder = ExpressionBuilderWithoutRenaming::create(self.metadata.clone()); predicates .into_iter() .map(|scalar| builder.build(&scalar)) @@ -497,22 +497,32 @@ impl PhysicalPlanBuilder { let prewhere_info = scan .prewhere .as_ref() - .map(|prewhere| { - let builder = ExpressionBuilderWithoutRenaming::create(self.metadata.clone()); - let predicates = prewhere + .map(|prewhere| { + let predicate = prewhere .predicates - .iter() - .map(|scalar| builder.build(scalar)) - .collect::>>()?; + .iter().fold(None, |acc: Option, &x: &Scalar| { + match acc { + Some(acc) => { + let func = FunctionFactory::instance() + .get("and", &[&acc.data_type(), &x.data_type()]) + .unwrap(); + Some(Scalar::AndExpr(AndExpr { + left: Box::new(acc), + right: Box::new(x.clone()), + return_type: Box::new(func.return_type()) , + })) + }, + None => Some(x.clone()), + } + }); + assert!( - !predicates.is_empty(), + !predicate.is_some(), "There should be at least one predicate in prewhere" ); - let mut filter = predicates[0].clone(); - for pred in predicates.iter().skip(1) { - filter = filter.and(pred.clone()); - } + + let filter = builder.build(&predicate.unwrap())?; let remain_columns = scan .columns @@ -551,20 +561,14 @@ impl PhysicalPlanBuilder { .order_by .clone() .map(|items| { - let builder = ExpressionBuilderWithoutRenaming::create(self.metadata.clone()); - items - .into_iter() - .map(|item| { - builder - .build_column_ref(item.index) - .map(|c| LegacyExpression::Sort { - expr: Box::new(c.clone()), - asc: item.asc, - nulls_first: item.nulls_first, - origin_expr: Box::new(c), - }) - }) - .collect::>>() + items.into_iter().map(|item| { + let metadata = self.metadata.read(); + let ty = metadata.column(item.index).data_type(); + let name = metadata.column(item.index).name(); + let scalar = PhysicalScalar::IndexedVariable { index: item.index, data_type: ty.clone(), display_name: name.to_string() }; + + Ok((scalar, item.asc, item.nulls_first)) + }).collect::>>() }) .transpose()?; diff --git a/src/query/sql/src/executor/physical_plan_display.rs b/src/query/sql/src/executor/physical_plan_display.rs index 0f47361a9157..13fb4e39660c 100644 --- a/src/query/sql/src/executor/physical_plan_display.rs +++ b/src/query/sql/src/executor/physical_plan_display.rs @@ -29,7 +29,7 @@ use crate::executor::Filter; use crate::executor::HashJoin; use crate::executor::Limit; use crate::executor::PhysicalPlan; -use crate::executor::PhysicalScalar; +use common_planner::PhysicalScalar; use crate::executor::Project; use crate::executor::Sort; use crate::executor::TableScan; diff --git a/src/query/sql/src/executor/plan_extras.rs b/src/query/sql/src/executor/plan_extras.rs new file mode 100644 index 000000000000..1b9310aa36c7 --- /dev/null +++ b/src/query/sql/src/executor/plan_extras.rs @@ -0,0 +1,74 @@ +// Copyright 2021 Datafuse Labs. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::collections::BTreeMap; +use std::fmt::Debug; +use std::fmt::Formatter; + +use common_datavalues::DataSchema; + +use crate::plans::Projection; + +use common_planner::PhysicalScalar; + +#[derive(serde::Serialize, serde::Deserialize, Clone, Debug, PartialEq)] +pub struct PrewhereInfo { + /// columns to be ouput be prewhere scan + pub output_columns: Projection, + /// columns used for prewhere + pub prewhere_columns: Projection, + /// remain_columns = scan.columns - need_columns + pub remain_columns: Projection, + /// filter for prewhere + pub filter: PhysicalScalar, +} + +/// Extras is a wrapper for push down items. +#[derive(serde::Serialize, serde::Deserialize, Clone, Debug, PartialEq, Default)] +pub struct Extras { + /// Optional column indices to use as a projection + pub projection: Option, + /// Optional filter expression plan + /// split_conjunctions by `and` operator + pub filters: Vec, + /// Optional prewhere information + /// used for prewhere optimization + pub prewhere: Option, + /// Optional limit to skip read + pub limit: Option, + /// Optional order_by expression plan, + /// expression: PhysicalScalar, asc: bool, nulls_first + pub order_by: Vec<(PhysicalScalar, bool, bool)>, +} + +impl Extras { + pub fn default() -> Self { + Extras { + projection: None, + filters: vec![], + prewhere: None, + limit: None, + order_by: vec![], + } + } +} + + + +#[derive(serde::Serialize, serde::Deserialize, Clone, Debug, PartialEq, Eq)] +pub enum StageKind { + Normal, + Expansive, + Merge, +} diff --git a/src/query/storages/factory/src/storage_table_read_plan.rs b/src/query/sql/src/executor/plan_read_datasource.rs similarity index 55% rename from src/query/storages/factory/src/storage_table_read_plan.rs rename to src/query/sql/src/executor/plan_read_datasource.rs index 99bef4451cc6..5bbc3394d470 100644 --- a/src/query/storages/factory/src/storage_table_read_plan.rs +++ b/src/query/sql/src/executor/plan_read_datasource.rs @@ -16,14 +16,108 @@ use std::collections::BTreeMap; use std::sync::Arc; use common_catalog::table::Table; +use common_catalog::table_context::TableContext; use common_datavalues::DataField; use common_datavalues::DataSchema; +use common_datavalues::DataSchemaRef; use common_exception::Result; -use common_legacy_planners::Extras; -use common_legacy_planners::Projection; -use common_legacy_planners::ReadDataSourcePlan; -use common_legacy_planners::SourceInfo; -use common_storages_fuse::TableContext; +use common_planner::PhysicalScalar; +use common_meta_app::schema::TableInfo; + +use super::Extras; + +use crate::planner::plans::Partitions; +use crate::planner::plans::Projection; +use crate::planner::plans::StageTableInfo; +use crate::planner::plans::Statistics; + + +#[derive(serde::Serialize, serde::Deserialize, Clone, Debug, PartialEq, Eq)] +pub enum SourceInfo { + // Normal table source, `fuse/system`. + TableSource(TableInfo), + + // Internal/External source, like `s3://` or `azblob://`. + StageSource(StageTableInfo), +} + +impl SourceInfo { + pub fn schema(&self) -> Arc { + match self { + SourceInfo::TableSource(table_info) => table_info.schema(), + SourceInfo::StageSource(table_info) => table_info.schema(), + } + } + + pub fn desc(&self) -> String { + match self { + SourceInfo::TableSource(table_info) => table_info.desc.clone(), + SourceInfo::StageSource(table_info) => table_info.desc(), + } + } +} + +// TODO: Delete the scan plan field, but it depends on plan_parser:L394 +#[derive(serde::Serialize, serde::Deserialize, Clone, Debug, PartialEq)] +pub struct ReadDataSourcePlan { + // TODO catalog id is better + pub catalog: String, + pub source_info: SourceInfo, + + /// Required fields to scan. + /// + /// After optimization, only a sub set of the fields in `table_info.schema().fields` are needed. + /// The key is the column_index of `ColumnEntry` in `Metadata`. + /// + /// If it is None, one should use `table_info.schema().fields()`. + pub scan_fields: Option>, + + pub parts: Partitions, + pub statistics: Statistics, + pub description: String, + + pub tbl_args: Option>, + pub push_downs: Option, +} + +impl ReadDataSourcePlan { + /// Return schema after the projection + pub fn schema(&self) -> DataSchemaRef { + self.scan_fields + .clone() + .map(|x| { + let fields: Vec<_> = x.iter().map(|(_, f)| f.clone()).collect(); + Arc::new(self.source_info.schema().project_by_fields(fields)) + }) + .unwrap_or_else(|| self.source_info.schema()) + } + + /// Return designated required fields or all fields in a hash map. + pub fn scan_fields(&self) -> BTreeMap { + self.scan_fields + .clone() + .unwrap_or_else(|| self.source_info.schema().fields_map()) + } + + pub fn projections(&self) -> Projection { + let default_proj = || { + (0..self.source_info.schema().fields().len()) + .into_iter() + .collect::>() + }; + + if let Some(Extras { + projection: Some(prj), + .. + }) = &self.push_downs + { + prj.clone() + } else { + Projection::Columns(default_proj()) + } + } +} + #[async_trait::async_trait] pub trait ToReadDataSourcePlan { diff --git a/src/query/sql/src/lib.rs b/src/query/sql/src/lib.rs index 854956c0b977..32f25cde4348 100644 --- a/src/query/sql/src/lib.rs +++ b/src/query/sql/src/lib.rs @@ -18,7 +18,5 @@ pub mod evaluator; pub mod executor; pub mod planner; -use common_legacy_parser::sql_common; pub use common_storages_constants::*; pub use planner::*; -pub use sql_common::SQLCommon; diff --git a/src/query/sql/src/planner/binder/copy.rs b/src/query/sql/src/planner/binder/copy.rs index c0cd7aec79c2..79d6b90e0f8c 100644 --- a/src/query/sql/src/planner/binder/copy.rs +++ b/src/query/sql/src/planner/binder/copy.rs @@ -28,9 +28,7 @@ use common_catalog::table_context::TableContext; use common_exception::ErrorCode; use common_exception::Result; use common_io::prelude::parse_escape_string; -use common_legacy_planners::ReadDataSourcePlan; -use common_legacy_planners::SourceInfo; -use common_legacy_planners::StageTableInfo; + use common_meta_types::FileFormatOptions; use common_meta_types::StageFileFormatType; use common_meta_types::UserStageInfo; @@ -39,7 +37,10 @@ use common_storage::UriLocation; use common_users::UserApiProvider; use tracing::debug; +use crate::executor::SourceInfo; +use crate::planner::plans::StageTableInfo; use crate::binder::Binder; +use crate::executor::ReadDataSourcePlan; use crate::normalize_identifier; use crate::plans::CopyPlanV2; use crate::plans::Plan; diff --git a/src/query/sql/src/planner/binder/ddl/table.rs b/src/query/sql/src/planner/binder/ddl/table.rs index eda21e22df4f..e660ef654167 100644 --- a/src/query/sql/src/planner/binder/ddl/table.rs +++ b/src/query/sql/src/planner/binder/ddl/table.rs @@ -48,8 +48,8 @@ use common_planner::plans::UndropTablePlan; use common_storage::parse_uri_location; use common_storage::DataOperator; use common_storage::UriLocation; -use common_storages_fuse::is_reserved_opt_key; -use common_storages_fuse::OPT_KEY_DATABASE_ID; +use common_storages_constants::is_reserved_opt_key; +use common_storages_constants::OPT_KEY_DATABASE_ID; use tracing::debug; use crate::binder::scalar::ScalarBinder; diff --git a/src/query/sql/src/planner/binder/delete.rs b/src/query/sql/src/planner/binder/delete.rs index 1e65c9187310..13b0abbb9681 100644 --- a/src/query/sql/src/planner/binder/delete.rs +++ b/src/query/sql/src/planner/binder/delete.rs @@ -16,14 +16,14 @@ use common_ast::ast::Expr; use common_ast::ast::TableReference; use common_exception::ErrorCode; use common_exception::Result; -use common_legacy_planners::DeletePlan; -use common_legacy_planners::Projection; use crate::binder::Binder; use crate::binder::ScalarBinder; +use crate::plans::DeletePlan; use crate::plans::Plan; use crate::BindContext; use crate::ScalarExpr; +use crate::plans::Projection; impl<'a> Binder { pub(in crate::planner::binder) async fn bind_delete( diff --git a/src/query/sql/src/planner/binder/join.rs b/src/query/sql/src/planner/binder/join.rs index 871f86f3ca4a..3e6efa2b9f04 100644 --- a/src/query/sql/src/planner/binder/join.rs +++ b/src/query/sql/src/planner/binder/join.rs @@ -24,7 +24,7 @@ use common_datavalues::wrap_nullable; use common_exception::ErrorCode; use common_exception::Result; use common_planner::MetadataRef; -use common_storages_fuse::TableContext; +use common_catalog::table_context::TableContext; use crate::binder::scalar_common::split_conjunctions; use crate::binder::scalar_common::split_equivalent_predicate; diff --git a/src/query/sql/src/planner/binder/scalar.rs b/src/query/sql/src/planner/binder/scalar.rs index 0623a6e9fa8e..026d8c794157 100644 --- a/src/query/sql/src/planner/binder/scalar.rs +++ b/src/query/sql/src/planner/binder/scalar.rs @@ -18,7 +18,7 @@ use common_ast::ast::Expr; use common_datavalues::DataTypeImpl; use common_exception::Result; use common_planner::MetadataRef; -use common_storages_fuse::TableContext; +use common_catalog::table_context::TableContext; use crate::planner::binder::BindContext; use crate::planner::semantic::NameResolutionContext; diff --git a/src/query/sql/src/planner/binder/setting.rs b/src/query/sql/src/planner/binder/setting.rs index f1fa0339995e..e2bc9f4ad68f 100644 --- a/src/query/sql/src/planner/binder/setting.rs +++ b/src/query/sql/src/planner/binder/setting.rs @@ -15,13 +15,13 @@ use common_ast::ast::Identifier; use common_ast::ast::Literal; use common_exception::Result; -use common_legacy_planners::SettingPlan; -use common_legacy_planners::VarValue; use super::BindContext; use super::Binder; use crate::planner::semantic::TypeChecker; use crate::plans::Plan; +use crate::plans::SettingPlan; +use crate::plans::VarValue; impl<'a> Binder { pub(in crate::planner::binder) async fn bind_set_variable( diff --git a/src/query/sql/src/planner/binder/table.rs b/src/query/sql/src/planner/binder/table.rs index 6d8fae170ead..d5e580703574 100644 --- a/src/query/sql/src/planner/binder/table.rs +++ b/src/query/sql/src/planner/binder/table.rs @@ -33,7 +33,6 @@ use common_catalog::table_function::TableFunction; use common_datavalues::prelude::*; use common_exception::ErrorCode; use common_exception::Result; -use common_legacy_expression::LegacyExpression; use common_planner::IndexType; use common_storages_preludes::view::view_table::QUERY; @@ -191,18 +190,14 @@ impl<'a> Binder { .into_iter() .map(|(scalar, _)| match scalar { Scalar::ConstantExpr(ConstantExpr { value, data_type }) => { - Ok(LegacyExpression::Literal { - value, - column_name: None, - data_type: *data_type, - }) + Ok(value) } _ => Err(ErrorCode::UnImplement(format!( "Unsupported table argument type: {:?}", scalar ))), }) - .collect::>>()?; + .collect::>>()?; let table_args = Some(expressions); diff --git a/src/query/sql/src/planner/metadata.rs b/src/query/sql/src/planner/metadata.rs index b9521142bc57..de1322d89d70 100644 --- a/src/query/sql/src/planner/metadata.rs +++ b/src/query/sql/src/planner/metadata.rs @@ -12,9 +12,314 @@ // See the License for the specific language governing permissions and // limitations under the License. +use std::collections::HashSet; +use std::collections::VecDeque; +use std::fmt::Debug; +use std::fmt::Formatter; +use std::sync::Arc; + +use common_catalog::table::Table; +use common_datavalues::DataField; +use common_datavalues::DataType; +use common_datavalues::DataTypeImpl; +use common_datavalues::StructType; +use common_datavalues::TypeID; +use parking_lot::RwLock; use common_ast::ast::Expr; use common_ast::ast::Literal; +/// Planner use [`usize`] as it's index type. +/// +/// This type will be used across the whole planner. +pub type IndexType = usize; + +/// Use IndexType::MAX to represent dummy table. +pub static DUMMY_TABLE_INDEX: IndexType = IndexType::MAX; + +/// ColumnSet represents a set of columns identified by its IndexType. +pub type ColumnSet = HashSet; + +/// A Send & Send version of [`Metadata`]. +/// +/// Callers can clone this ref safely and cheaply. +pub type MetadataRef = Arc>; + +/// Metadata stores information about columns and tables used in a query. +/// Tables and columns are identified with its unique index. +/// Notice that index value of a column can be same with that of a table. +#[derive(Clone, Debug, Default)] +pub struct Metadata { + tables: Vec, + columns: Vec, +} + +impl Metadata { + pub fn table(&self, index: IndexType) -> &TableEntry { + self.tables.get(index).expect("metadata must contain table") + } + + pub fn tables(&self) -> &[TableEntry] { + self.tables.as_slice() + } + + pub fn table_index_by_column_indexes(&self, column_indexes: &ColumnSet) -> Option { + self.columns + .iter() + .find(|v| column_indexes.contains(&v.column_index)) + .and_then(|v| v.table_index) + } + + pub fn column(&self, index: IndexType) -> &ColumnEntry { + self.columns + .get(index) + .expect("metadata must contain column") + } + + pub fn columns(&self) -> &[ColumnEntry] { + self.columns.as_slice() + } + + pub fn columns_by_table_index(&self, index: IndexType) -> Vec { + self.columns + .iter() + .filter(|v| v.table_index == Some(index)) + .cloned() + .collect() + } + + pub fn add_column( + &mut self, + name: String, + data_type: DataTypeImpl, + table_index: Option, + path_indices: Option>, + ) -> IndexType { + let column_index = self.columns.len(); + let column_entry = + ColumnEntry::new(name, data_type, column_index, table_index, path_indices); + self.columns.push(column_entry); + column_index + } + + pub fn add_table( + &mut self, + catalog: String, + database: String, + table_meta: Arc, + ) -> IndexType { + let table_name = table_meta.name().to_string(); + let table_index = self.tables.len(); + let table_entry = TableEntry { + index: table_index, + name: table_name, + database, + catalog, + table: table_meta.clone(), + }; + self.tables.push(table_entry); + let mut struct_fields = VecDeque::new(); + for (i, field) in table_meta.schema().fields().iter().enumerate() { + self.add_column( + field.name().clone(), + field.data_type().clone(), + Some(table_index), + None, + ); + if field.data_type().data_type_id() == TypeID::Struct { + struct_fields.push_back((vec![i], field.clone())); + } + } + // add inner columns of struct column + while !struct_fields.is_empty() { + let (path_indices, field) = struct_fields.pop_front().unwrap(); + let struct_type: StructType = field.data_type().clone().try_into().unwrap(); + + let inner_types = struct_type.types(); + let inner_names = match struct_type.names() { + Some(inner_names) => inner_names + .iter() + .map(|name| format!("{}:{}", field.name(), name)) + .collect::>(), + None => (0..inner_types.len()) + .map(|i| format!("{}:{}", field.name(), i + 1)) + .collect::>(), + }; + for ((i, inner_name), inner_type) in + inner_names.into_iter().enumerate().zip(inner_types.iter()) + { + let mut inner_path_indices = path_indices.clone(); + inner_path_indices.push(i); + + self.add_column( + inner_name.clone(), + inner_type.clone(), + Some(table_index), + Some(inner_path_indices.clone()), + ); + if inner_type.data_type_id() == TypeID::Struct { + let inner_field = DataField::new(&inner_name, inner_type.clone()); + struct_fields.push_back((inner_path_indices, inner_field)); + } + } + } + table_index + } + + /// find_smallest_column in given indices. + pub fn find_smallest_column(&self, indices: &[usize]) -> usize { + let mut smallest_index = indices.iter().min().expect("indices must be valid"); + let mut smallest_size = usize::MAX; + for idx in indices.iter() { + let entry = self.column(*idx); + if let Ok(bytes) = entry.data_type.data_type_id().numeric_byte_size() { + if smallest_size > bytes { + smallest_size = bytes; + smallest_index = &entry.column_index; + } + } + } + *smallest_index + } + + /// find_smallest_column_by_table_index by given table_index + pub fn find_smallest_column_by_table_index(&self, table_index: IndexType) -> usize { + let indices: Vec = self + .columns + .iter() + .filter(|v| v.table_index == Some(table_index)) + .map(|v| v.column_index) + .collect(); + + self.find_smallest_column(&indices) + } +} + +#[derive(Clone)] +pub struct TableEntry { + catalog: String, + database: String, + name: String, + index: IndexType, + + table: Arc, +} + +impl Debug for TableEntry { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + f.debug_struct("TableEntry") + .field("catalog", &self.catalog) + .field("database", &self.database) + .field("name", &self.name) + .field("index", &self.index) + .finish_non_exhaustive() + } +} + +impl TableEntry { + pub fn new( + index: IndexType, + name: String, + catalog: String, + database: String, + table: Arc, + ) -> Self { + TableEntry { + index, + name, + catalog, + database, + table, + } + } + + /// Get the catalog name of this table entry. + pub fn catalog(&self) -> &str { + &self.catalog + } + + /// Get the database name of this table entry. + pub fn database(&self) -> &str { + &self.database + } + + /// Get the name of this table entry. + pub fn name(&self) -> &str { + &self.name + } + + /// Get the index this table entry. + pub fn index(&self) -> IndexType { + self.index + } + + /// Get the table of this table entry. + pub fn table(&self) -> Arc { + self.table.clone() + } +} + +#[derive(Clone, Debug)] +pub struct ColumnEntry { + column_index: IndexType, + name: String, + data_type: DataTypeImpl, + + /// Table index of column entry. None if column is derived from a subquery. + table_index: Option, + /// Path indices for inner column of struct data type. + path_indices: Option>, +} + +impl ColumnEntry { + pub fn new( + name: String, + data_type: DataTypeImpl, + column_index: IndexType, + table_index: Option, + path_indices: Option>, + ) -> Self { + ColumnEntry { + column_index, + name, + data_type, + table_index, + path_indices, + } + } + + /// Get the name of this column entry. + pub fn name(&self) -> &str { + &self.name + } + + /// Get the index of this column entry. + pub fn index(&self) -> IndexType { + self.column_index + } + + /// Get the data type of this column entry. + pub fn data_type(&self) -> &DataTypeImpl { + &self.data_type + } + + /// Get the table index of this column entry. + pub fn table_index(&self) -> Option { + self.table_index + } + + /// Get the path indices of this column entry. + pub fn path_indices(&self) -> Option<&[IndexType]> { + self.path_indices.as_deref() + } + + /// Check if this column entry contains path_indices + pub fn has_path_indices(&self) -> bool { + self.path_indices.is_some() + } +} + + + pub fn optimize_remove_count_args(name: &str, distinct: bool, args: &[&Expr]) -> bool { name.eq_ignore_ascii_case("count") && !distinct diff --git a/src/query/sql/src/planner/mod.rs b/src/query/sql/src/planner/mod.rs index 3f03b83cd6d6..aae43846660e 100644 --- a/src/query/sql/src/planner/mod.rs +++ b/src/query/sql/src/planner/mod.rs @@ -32,3 +32,4 @@ pub use plans::ScalarExpr; pub use semantic::normalize_identifier; pub use semantic::IdentifierNormalizer; pub use semantic::NameResolutionContext; +pub use metadata::*; diff --git a/src/query/sql/src/planner/optimizer/heuristic/heuristic.rs b/src/query/sql/src/planner/optimizer/heuristic/heuristic.rs index 57e370395e25..3e080d7caff4 100644 --- a/src/query/sql/src/planner/optimizer/heuristic/heuristic.rs +++ b/src/query/sql/src/planner/optimizer/heuristic/heuristic.rs @@ -16,7 +16,7 @@ use std::sync::Arc; use common_exception::Result; use common_planner::MetadataRef; -use common_storages_fuse::TableContext; +use common_catalog::table_context::TableContext; use once_cell::sync::Lazy; use super::prune_unused_columns::UnusedColumnPruner; diff --git a/src/query/sql/src/planner/planner.rs b/src/query/sql/src/planner/planner.rs index 2e96f4f55423..f3a7b232d996 100644 --- a/src/query/sql/src/planner/planner.rs +++ b/src/query/sql/src/planner/planner.rs @@ -23,7 +23,7 @@ use common_catalog::catalog::CatalogManager; use common_exception::Result; use common_planner::Metadata; use common_planner::MetadataRef; -use common_storages_fuse::TableContext; +use common_catalog::table_context::TableContext; use parking_lot::RwLock; use crate::optimizer::optimize; diff --git a/src/query/sql/src/planner/plans/copy_v2.rs b/src/query/sql/src/planner/plans/copy_v2.rs index 95beb79915dd..8ef4e12fa45b 100644 --- a/src/query/sql/src/planner/plans/copy_v2.rs +++ b/src/query/sql/src/planner/plans/copy_v2.rs @@ -17,10 +17,10 @@ use std::fmt::Formatter; use std::str::FromStr; use common_datavalues::DataSchemaRef; -use common_legacy_planners::ReadDataSourcePlan; use common_meta_types::MetaId; use common_meta_types::UserStageInfo; +use crate::executor::ReadDataSourcePlan; use crate::plans::Plan; #[derive(PartialEq, Eq, Clone, Debug)] diff --git a/src/query/sql/src/planner/plans/mod.rs b/src/query/sql/src/planner/plans/mod.rs index 43f3d03759c3..c412603b3f9b 100644 --- a/src/query/sql/src/planner/plans/mod.rs +++ b/src/query/sql/src/planner/plans/mod.rs @@ -36,6 +36,15 @@ mod sort; mod union_all; mod update; +mod plan_delete; +mod plan_node_stage_table; +mod plan_node_stage; +mod plan_node_statistics; +mod plan_partition; +mod plan_setting; +mod plan_sink; +mod projection; + pub use aggregate::*; pub use copy_v2::*; pub use dummy_table_scan::DummyTableScan; @@ -61,3 +70,13 @@ pub use share::*; pub use sort::*; pub use union_all::UnionAll; pub use update::UpdatePlan; + + +pub use plan_delete::*; +pub use plan_node_stage_table::*; +pub use plan_node_stage::*; +pub use plan_node_statistics::*; +pub use plan_partition::*; +pub use plan_setting::*; +pub use plan_sink::*; +pub use projection::*; diff --git a/src/query/sql/src/planner/plans/plan.rs b/src/query/sql/src/planner/plans/plan.rs index a610d34c0882..2ab0e2b4c262 100644 --- a/src/query/sql/src/planner/plans/plan.rs +++ b/src/query/sql/src/planner/plans/plan.rs @@ -21,8 +21,6 @@ use common_datavalues::DataSchema; use common_datavalues::DataSchemaRef; use common_datavalues::DataSchemaRefExt; use common_datavalues::StringType; -use common_legacy_planners::DeletePlan; -use common_legacy_planners::SettingPlan; use common_planner::plans::AlterTableClusterKeyPlan; use common_planner::plans::AlterUDFPlan; use common_planner::plans::AlterUserPlan; @@ -82,6 +80,9 @@ use crate::plans::share::ShowSharesPlan; use crate::plans::UpdatePlan; use crate::BindContext; +use super::DeletePlan; +use super::SettingPlan; + #[derive(Clone, Debug)] pub enum Plan { // `SELECT` statement diff --git a/src/query/sql/src/planner/semantic/type_check.rs b/src/query/sql/src/planner/semantic/type_check.rs index 56e3986ef5a2..87117b89323c 100644 --- a/src/query/sql/src/planner/semantic/type_check.rs +++ b/src/query/sql/src/planner/semantic/type_check.rs @@ -53,9 +53,8 @@ use common_functions::is_builtin_function; use common_functions::scalars::CastFunction; use common_functions::scalars::FunctionFactory; use common_functions::scalars::TupleFunction; -use common_legacy_expression::validate_function_arg; use common_planner::MetadataRef; -use common_storages_fuse::TableContext; +use common_catalog::table_context::TableContext; use common_users::UserApiProvider; use super::name_resolution::NameResolutionContext; @@ -2174,3 +2173,34 @@ impl<'a> TypeChecker<'a> { } } } + + +pub fn validate_function_arg( + name: &str, + args_len: usize, + variadic_arguments: Option<(usize, usize)>, + num_arguments: usize, +) -> Result<()> { + match variadic_arguments { + Some((start, end)) => { + if args_len < start || args_len > end { + Err(ErrorCode::NumberArgumentsNotMatch(format!( + "Function `{}` expect to have [{}, {}] arguments, but got {}", + name, start, end, args_len + ))) + } else { + Ok(()) + } + } + None => { + if num_arguments != args_len { + Err(ErrorCode::NumberArgumentsNotMatch(format!( + "Function `{}` expect to have {} arguments, but got {}", + name, num_arguments, args_len + ))) + } else { + Ok(()) + } + } + } +} diff --git a/src/query/storages/factory/src/lib.rs b/src/query/storages/factory/src/lib.rs index 15f46b604118..e06a4beaa3c6 100644 --- a/src/query/storages/factory/src/lib.rs +++ b/src/query/storages/factory/src/lib.rs @@ -13,7 +13,6 @@ // limitations under the License. pub mod cache; -mod storage_table_read_plan; pub use common_storages_fuse as fuse; pub use common_storages_index as index; pub mod result; @@ -33,4 +32,3 @@ pub use storage_factory::StorageCreator; pub use storage_factory::StorageDescription; pub use storage_factory::StorageFactory; pub use storage_table::Table; -pub use storage_table_read_plan::ToReadDataSourcePlan; diff --git a/src/query/storages/factory/src/result/block_buffer.rs b/src/query/storages/factory/src/result/block_buffer.rs index 97cafccc6ca6..2d49e67bef78 100644 --- a/src/query/storages/factory/src/result/block_buffer.rs +++ b/src/query/storages/factory/src/result/block_buffer.rs @@ -17,11 +17,11 @@ use std::sync::Arc; use common_base::base::tokio::sync::Mutex; use common_base::base::tokio::sync::Notify; +use common_catalog::table_context::TableContext; use common_datablocks::DataBlock; use common_exception::Result; use common_legacy_planners::PartInfoPtr; use common_legacy_planners::Projection; -use common_storages_fuse::TableContext; use crate::fuse::io::BlockReader; use crate::result::ResultQueryInfo; diff --git a/src/query/storages/factory/src/result/result_table.rs b/src/query/storages/factory/src/result/result_table.rs index 168461e14e4e..38cce589c68c 100644 --- a/src/query/storages/factory/src/result/result_table.rs +++ b/src/query/storages/factory/src/result/result_table.rs @@ -15,6 +15,7 @@ use std::any::Any; use std::sync::Arc; +use common_catalog::table_context::TableContext; use common_datavalues::DataSchemaRef; use common_exception::ErrorCode; use common_exception::Result; @@ -29,7 +30,6 @@ use common_meta_app::schema::TableMeta; use common_meta_types::UserIdentity; use common_pipeline_core::Pipeline; use common_pipeline_transforms::processors::transforms::TransformLimit; -use common_storages_fuse::TableContext; use serde::Deserialize; use serde::Serialize; diff --git a/src/query/storages/factory/src/result/result_table_sink.rs b/src/query/storages/factory/src/result/result_table_sink.rs index b38e8f060d69..92d4385141ed 100644 --- a/src/query/storages/factory/src/result/result_table_sink.rs +++ b/src/query/storages/factory/src/result/result_table_sink.rs @@ -16,6 +16,7 @@ use std::any::Any; use std::sync::Arc; use async_trait::async_trait; +use common_catalog::table_context::TableContext; use common_datablocks::serialize_data_blocks; use common_datablocks::DataBlock; use common_exception::ErrorCode; @@ -28,7 +29,6 @@ use common_pipeline_core::processors::port::InputPort; use common_pipeline_core::processors::processor::Event; use common_pipeline_core::processors::processor::ProcessorPtr; use common_pipeline_core::processors::Processor; -use common_storages_fuse::TableContext; use opendal::Operator; use crate::fuse::io::BlockReader; diff --git a/src/query/storages/factory/src/result/result_table_source.rs b/src/query/storages/factory/src/result/result_table_source.rs index f888f5dd5847..75a3df104b57 100644 --- a/src/query/storages/factory/src/result/result_table_source.rs +++ b/src/query/storages/factory/src/result/result_table_source.rs @@ -17,6 +17,7 @@ use std::sync::Arc; use common_base::base::Progress; use common_base::base::ProgressValues; +use common_catalog::table_context::TableContext; use common_datablocks::DataBlock; use common_exception::ErrorCode; use common_exception::Result; @@ -25,7 +26,6 @@ use common_pipeline_core::processors::port::OutputPort; use common_pipeline_core::processors::processor::Event; use common_pipeline_core::processors::processor::ProcessorPtr; use common_pipeline_core::processors::Processor; -use common_storages_fuse::TableContext; use crate::fuse::io::BlockReader; use crate::result::result_table_source::State::Generated; diff --git a/src/query/storages/factory/src/result/writer.rs b/src/query/storages/factory/src/result/writer.rs index a07140ffabb5..bc1a2a47383b 100644 --- a/src/query/storages/factory/src/result/writer.rs +++ b/src/query/storages/factory/src/result/writer.rs @@ -17,13 +17,13 @@ use std::sync::Arc; use backon::ExponentialBackoff; use backon::Retryable; +use common_catalog::table_context::TableContext; use common_datablocks::serialize_data_blocks; use common_datablocks::DataBlock; use common_exception::Result; use common_fuse_meta::meta::SegmentInfo; use common_fuse_meta::meta::Statistics as FuseMetaStatistics; use common_legacy_planners::PartInfoPtr; -use common_storages_fuse::TableContext; use common_streams::SendableDataBlockStream; use futures::StreamExt; use opendal::Operator; diff --git a/src/query/storages/factory/src/stage/stage_table.rs b/src/query/storages/factory/src/stage/stage_table.rs index 447e25c08e1c..1d67e27f3ef1 100644 --- a/src/query/storages/factory/src/stage/stage_table.rs +++ b/src/query/storages/factory/src/stage/stage_table.rs @@ -16,6 +16,7 @@ use std::any::Any; use std::sync::atomic::AtomicUsize; use std::sync::Arc; +use common_catalog::table_context::TableContext; use common_datablocks::DataBlock; use common_exception::ErrorCode; use common_exception::Result; @@ -31,7 +32,6 @@ use common_pipeline_core::Pipeline; use common_pipeline_sources::processors::sources::input_formats::InputContext; use common_pipeline_transforms::processors::transforms::TransformLimit; use common_storage::init_operator; -use common_storages_fuse::TableContext; use opendal::layers::SubdirLayer; use opendal::Operator; use parking_lot::Mutex; diff --git a/src/query/storages/factory/src/stage/stage_table_sink.rs b/src/query/storages/factory/src/stage/stage_table_sink.rs index 7c6c9146a28e..e816d7514751 100644 --- a/src/query/storages/factory/src/stage/stage_table_sink.rs +++ b/src/query/storages/factory/src/stage/stage_table_sink.rs @@ -20,6 +20,7 @@ use std::sync::Arc; use async_trait::async_trait; use backon::ExponentialBackoff; use backon::Retryable; +use common_catalog::table_context::TableContext; use common_datablocks::DataBlock; use common_exception::ErrorCode; use common_exception::Result; @@ -31,7 +32,6 @@ use common_pipeline_core::processors::port::OutputPort; use common_pipeline_core::processors::processor::Event; use common_pipeline_core::processors::processor::ProcessorPtr; use common_pipeline_core::processors::Processor; -use common_storages_fuse::TableContext; use opendal::Operator; use tracing::warn; diff --git a/src/query/storages/factory/src/system/clusters_table.rs b/src/query/storages/factory/src/system/clusters_table.rs index 7870f9594798..acd7021fdfd3 100644 --- a/src/query/storages/factory/src/system/clusters_table.rs +++ b/src/query/storages/factory/src/system/clusters_table.rs @@ -14,13 +14,13 @@ use std::sync::Arc; +use common_catalog::table_context::TableContext; use common_datablocks::DataBlock; use common_datavalues::prelude::*; use common_exception::Result; use common_meta_app::schema::TableIdent; use common_meta_app::schema::TableInfo; use common_meta_app::schema::TableMeta; -use common_storages_fuse::TableContext; use crate::system::SyncOneBlockSystemTable; use crate::system::SyncSystemTable; diff --git a/src/query/storages/fuse/src/table_functions/clustering_informations/clustering_information_table.rs b/src/query/storages/fuse/src/table_functions/clustering_informations/clustering_information_table.rs index 572b3334c923..f55e9c608a7f 100644 --- a/src/query/storages/fuse/src/table_functions/clustering_informations/clustering_information_table.rs +++ b/src/query/storages/fuse/src/table_functions/clustering_informations/clustering_information_table.rs @@ -103,7 +103,7 @@ impl Table for ClusteringInformationTable { Ok((Statistics::default(), vec![])) } - fn table_args(&self) -> Option> { + fn table_args(&self) -> Option> { Some(vec![ string_literal(self.arg_database_name.as_str()), string_literal(self.arg_table_name.as_str()), diff --git a/src/query/storages/fuse/src/table_functions/fuse_blocks/fuse_block_table.rs b/src/query/storages/fuse/src/table_functions/fuse_blocks/fuse_block_table.rs index 1b75052cc11d..56e5e8fc6742 100644 --- a/src/query/storages/fuse/src/table_functions/fuse_blocks/fuse_block_table.rs +++ b/src/query/storages/fuse/src/table_functions/fuse_blocks/fuse_block_table.rs @@ -102,7 +102,7 @@ impl Table for FuseBlockTable { Ok((Statistics::default(), vec![])) } - fn table_args(&self) -> Option> { + fn table_args(&self) -> Option> { let mut args = Vec::new(); args.push(string_literal(self.arg_database_name.as_str())); args.push(string_literal(self.arg_table_name.as_str())); diff --git a/src/query/storages/fuse/src/table_functions/fuse_segments/fuse_segment_table.rs b/src/query/storages/fuse/src/table_functions/fuse_segments/fuse_segment_table.rs index e2f5771b4b67..9357266ae8b7 100644 --- a/src/query/storages/fuse/src/table_functions/fuse_segments/fuse_segment_table.rs +++ b/src/query/storages/fuse/src/table_functions/fuse_segments/fuse_segment_table.rs @@ -17,6 +17,7 @@ use std::sync::Arc; use common_catalog::catalog::CATALOG_DEFAULT; use common_datablocks::DataBlock; +use common_datavalues::DataValue; use common_exception::Result; use common_legacy_expression::LegacyExpression; use common_legacy_planners::Extras; @@ -102,7 +103,7 @@ impl Table for FuseSegmentTable { Ok((Statistics::default(), vec![])) } - fn table_args(&self) -> Option> { + fn table_args(&self) -> Option> { Some(vec![ string_literal(self.arg_database_name.as_str()), string_literal(self.arg_table_name.as_str()), diff --git a/src/query/storages/fuse/src/table_functions/fuse_snapshots/fuse_snapshot_table.rs b/src/query/storages/fuse/src/table_functions/fuse_snapshots/fuse_snapshot_table.rs index e8f9af2babc3..d170690a0795 100644 --- a/src/query/storages/fuse/src/table_functions/fuse_snapshots/fuse_snapshot_table.rs +++ b/src/query/storages/fuse/src/table_functions/fuse_snapshots/fuse_snapshot_table.rs @@ -99,7 +99,7 @@ impl Table for FuseSnapshotTable { Ok((Statistics::default(), vec![])) } - fn table_args(&self) -> Option> { + fn table_args(&self) -> Option> { Some(vec![ string_literal(self.arg_database_name.as_str()), string_literal(self.arg_table_name.as_str()), diff --git a/src/query/storages/fuse/src/table_functions/table_args.rs b/src/query/storages/fuse/src/table_functions/table_args.rs index ccb2e63d7f98..455639051321 100644 --- a/src/query/storages/fuse/src/table_functions/table_args.rs +++ b/src/query/storages/fuse/src/table_functions/table_args.rs @@ -17,18 +17,11 @@ use common_exception::ErrorCode; use common_exception::Result; use common_legacy_expression::LegacyExpression; -pub fn string_value(expr: &LegacyExpression) -> Result { - if let LegacyExpression::Literal { value, .. } = expr { - String::from_utf8(value.as_string()?) - .map_err(|e| ErrorCode::BadArguments(format!("invalid string. {}", e))) - } else { - Err(ErrorCode::BadArguments(format!( - "expecting string literal, but got {:?}", - expr - ))) - } +pub fn string_value(value: &DataValue) -> Result { + String::from_utf8(value.as_string()?) + .map_err(|e| ErrorCode::BadArguments(format!("invalid string. {}", e))) } -pub fn string_literal(val: &str) -> LegacyExpression { - LegacyExpression::create_literal(DataValue::String(val.as_bytes().to_vec())) +pub fn string_literal(val: &str) -> DataValue { + DataValue::String(val.as_bytes().to_vec()) } diff --git a/src/query/storages/hive/src/hive_table.rs b/src/query/storages/hive/src/hive_table.rs index 0be956216d41..9eaecbe9e129 100644 --- a/src/query/storages/hive/src/hive_table.rs +++ b/src/query/storages/hive/src/hive_table.rs @@ -472,7 +472,7 @@ impl Table for HiveTable { self.do_read_partitions(ctx, push_downs).await } - fn table_args(&self) -> Option> { + fn table_args(&self) -> Option> { None } From d3759873d0cdefa6dcd4a26c457bd309208db118 Mon Sep 17 00:00:00 2001 From: baishen Date: Thu, 27 Oct 2022 21:33:02 +0800 Subject: [PATCH 02/47] fix --- Cargo.lock | 65 +- Cargo.toml | 3 - src/query/catalog/Cargo.toml | 1 - src/query/legacy-expression/Cargo.toml | 19 - src/query/legacy-expression/src/action.rs | 83 -- src/query/legacy-expression/src/chain.rs | 282 ------ src/query/legacy-expression/src/column.rs | 19 - src/query/legacy-expression/src/common.rs | 169 ---- src/query/legacy-expression/src/function.rs | 130 --- src/query/legacy-expression/src/lib.rs | 34 - src/query/legacy-expression/src/literal.rs | 66 -- .../legacy-expression/src/monotonicity.rs | 233 ----- src/query/legacy-expression/src/validator.rs | 112 --- src/query/legacy-expression/src/visitor.rs | 143 --- .../legacy-expression/tests/it/expression.rs | 80 -- src/query/legacy-expression/tests/it/main.rs | 16 - src/query/legacy-parser/Cargo.toml | 19 - .../src/analyzer/analyzer_expr_sync.rs | 860 ------------------ .../src/analyzer/analyzer_value_expr.rs | 123 --- src/query/legacy-parser/src/analyzer/mod.rs | 16 - src/query/legacy-parser/src/lib.rs | 26 - .../legacy-parser/src/parser/expr_parser.rs | 37 - .../src/parser/expression_parser.rs | 34 - src/query/legacy-parser/src/parser/mod.rs | 19 - src/query/legacy-parser/src/sql_common.rs | 156 ---- src/query/legacy-parser/src/sql_dialect.rs | 18 - src/query/legacy-planners/Cargo.toml | 30 - src/query/legacy-planners/src/lib.rs | 31 - src/query/legacy-planners/src/plan_delete.rs | 52 -- .../legacy-planners/src/plan_node_stage.rs | 20 - .../src/plan_node_stage_table.rs | 44 - .../src/plan_node_statistics.rs | 92 -- src/query/legacy-planners/src/plan_setting.rs | 36 - src/query/legacy-planners/src/plan_sink.rs | 23 - src/query/legacy-planners/tests/it/main.rs | 17 - .../legacy-planners/tests/it/plan_extras.rs | 27 - .../tests/it/plan_partition.rs | 119 --- src/query/legacy-planners/tests/it/test.rs | 34 - src/query/pipeline/transforms/Cargo.toml | 4 - .../pipeline/transforms/src/processors/mod.rs | 1 - .../src/processors/transforms/mod.rs | 4 - .../transforms/transform_expression.rs | 75 -- .../transform_expression_executor.rs | 197 ---- .../transforms/transform_sort_partial.rs | 34 - src/query/planner/src/extras.rs | 8 +- src/query/planner/src/lib.rs | 6 +- src/query/planner/src/partition.rs | 3 +- src/query/planner/src/physical_scalar.rs | 28 +- src/query/planner/src/plan_partition.rs | 50 - src/query/service/Cargo.toml | 3 - .../fragments/v2/plan_fragment.rs | 2 +- .../src/interpreters/interpreter_copy_v2.rs | 6 +- .../src/interpreters/interpreter_delete.rs | 2 +- .../interpreters/interpreter_explain_v2.rs | 2 +- .../src/interpreters/interpreter_insert_v2.rs | 4 +- .../src/interpreters/interpreter_select_v2.rs | 2 +- .../src/interpreters/interpreter_setting.rs | 2 +- .../interpreter_table_recluster.rs | 2 +- .../service/src/pipelines/pipeline_builder.rs | 4 +- .../processors/transforms/hash_join/desc.rs | 5 +- .../pipelines/processors/transforms/mod.rs | 1 - .../service/src/servers/http/v1/download.rs | 6 +- src/query/service/src/sessions/query_ctx.rs | 10 +- .../src/stream/table_read_block_stream.rs | 2 +- .../src/table_functions/async_crash_me.rs | 8 +- .../src/table_functions/memory_block_part.rs | 2 +- .../src/table_functions/numbers_part.rs | 4 +- .../src/table_functions/numbers_table.rs | 11 +- .../src/table_functions/sync_crash_me.rs | 8 +- .../tests/it/sql/planner/format/mod.rs | 2 +- .../it/storages/fuse/operations/read_plan.rs | 4 +- .../service/tests/it/storages/fuse/pruning.rs | 2 +- .../service/tests/it/storages/fuse/table.rs | 4 +- .../it/storages/fuse/table_test_fixture.rs | 2 +- src/query/service/tests/it/storages/memory.rs | 2 +- .../tests/it/storages/result/result_table.rs | 4 +- .../tests/it/table_functions/numbers_table.rs | 2 +- src/query/sql/Cargo.toml | 3 - src/query/sql/src/evaluator/mod.rs | 1 + .../sql/src/evaluator/physical_scalar.rs | 64 +- src/query/sql/src/executor/format.rs | 54 +- src/query/sql/src/executor/mod.rs | 11 +- src/query/sql/src/executor/physical_plan.rs | 15 +- .../sql/src/executor/physical_plan_builder.rs | 89 +- .../sql/src/executor/physical_plan_display.rs | 23 - src/query/sql/src/executor/plan_extras.rs | 74 -- ..._read_datasource.rs => table_read_plan.rs} | 102 +-- src/query/sql/src/executor/util.rs | 2 +- src/query/sql/src/planner/binder/aggregate.rs | 2 +- .../sql/src/planner/binder/bind_context.rs | 2 +- src/query/sql/src/planner/binder/binder.rs | 2 +- src/query/sql/src/planner/binder/copy.rs | 7 +- src/query/sql/src/planner/binder/delete.rs | 4 +- src/query/sql/src/planner/binder/distinct.rs | 2 +- src/query/sql/src/planner/binder/join.rs | 4 +- src/query/sql/src/planner/binder/project.rs | 2 +- src/query/sql/src/planner/binder/scalar.rs | 4 +- src/query/sql/src/planner/binder/select.rs | 2 +- src/query/sql/src/planner/binder/setting.rs | 4 +- src/query/sql/src/planner/binder/sort.rs | 2 +- src/query/sql/src/planner/binder/table.rs | 6 +- .../planner/format/display_rel_operator.rs | 2 +- src/query/sql/src/planner/metadata.rs | 6 +- src/query/sql/src/planner/mod.rs | 2 +- .../src/planner/optimizer/cascades/cascade.rs | 2 +- .../optimizer/cascades/tasks/apply_rule.rs | 2 +- .../optimizer/cascades/tasks/explore_expr.rs | 2 +- .../optimizer/cascades/tasks/explore_group.rs | 2 +- .../cascades/tasks/implement_expr.rs | 2 +- .../cascades/tasks/implement_group.rs | 2 +- .../optimizer/cascades/tasks/optimize_expr.rs | 2 +- .../cascades/tasks/optimize_group.rs | 2 +- .../sql/src/planner/optimizer/cost/cost.rs | 2 +- src/query/sql/src/planner/optimizer/format.rs | 2 +- src/query/sql/src/planner/optimizer/group.rs | 2 +- .../optimizer/heuristic/decorrelate.rs | 4 +- .../planner/optimizer/heuristic/heuristic.rs | 4 +- .../heuristic/prewhere_optimization.rs | 2 +- .../heuristic/prune_unused_columns.rs | 2 +- .../optimizer/heuristic/subquery_rewriter.rs | 4 +- src/query/sql/src/planner/optimizer/m_expr.rs | 2 +- src/query/sql/src/planner/optimizer/memo.rs | 2 +- .../sql/src/planner/optimizer/optimizer.rs | 4 +- .../planner/optimizer/property/column_stat.rs | 2 +- .../planner/optimizer/property/property.rs | 3 +- .../rewrite/rule_push_down_filter_join.rs | 4 +- src/query/sql/src/planner/optimizer/s_expr.rs | 2 +- src/query/sql/src/planner/optimizer/util.rs | 3 +- src/query/sql/src/planner/planner.rs | 6 +- src/query/sql/src/planner/plans/copy_v2.rs | 2 +- .../sql/src/planner/plans/eval_scalar.rs | 2 +- src/query/sql/src/planner/plans/hash_join.rs | 2 +- .../sql/src/planner/plans/logical_get.rs | 2 +- .../sql/src/planner/plans/logical_join.rs | 2 +- src/query/sql/src/planner/plans/mod.rs | 19 - .../sql/src/planner/plans/physical_scan.rs | 2 +- src/query/sql/src/planner/plans/plan.rs | 7 +- .../sql/src/planner/plans/recluster_table.rs | 2 +- src/query/sql/src/planner/plans/scalar.rs | 2 +- src/query/sql/src/planner/plans/sort.rs | 2 +- src/query/sql/src/planner/plans/union_all.rs | 2 +- .../sql/src/planner/semantic/type_check.rs | 5 +- src/query/storages/factory/Cargo.toml | 2 +- .../factory/src/result/block_buffer.rs | 5 +- .../factory/src/result/result_table.rs | 10 +- .../factory/src/result/result_table_sink.rs | 4 +- .../factory/src/result/result_table_source.rs | 2 +- .../storages/factory/src/result/writer.rs | 2 +- .../storages/factory/src/stage/stage_table.rs | 10 +- .../factory/src/stage/stage_table_sink.rs | 2 +- src/query/storages/fuse/Cargo.toml | 4 +- src/query/storages/fuse/src/fuse_lazy_part.rs | 4 +- src/query/storages/fuse/src/fuse_part.rs | 6 +- src/query/storages/fuse/src/fuse_table.rs | 10 +- .../storages/fuse/src/io/read/block_reader.rs | 4 +- .../storages/fuse/src/operations/compact.rs | 4 +- .../storages/fuse/src/operations/delete.rs | 4 +- .../src/operations/mutation/block_filter.rs | 2 +- .../storages/fuse/src/operations/read_data.rs | 13 +- .../fuse/src/operations/read_partitions.rs | 10 +- .../storages/fuse/src/operations/recluster.rs | 6 +- .../fuse/src/pruning/pruning_executor.rs | 2 +- .../clustering_information_table.rs | 8 +- .../fuse_blocks/fuse_block_table.rs | 8 +- .../fuse_segments/fuse_segment_table.rs | 8 +- .../fuse_snapshots/fuse_snapshot_table.rs | 8 +- src/query/storages/hive/Cargo.toml | 3 +- .../storages/hive/src/hive_file_splitter.rs | 2 +- src/query/storages/hive/src/hive_partition.rs | 4 +- src/query/storages/hive/src/hive_table.rs | 10 +- .../storages/hive/src/hive_table_source.rs | 2 +- src/query/storages/index/Cargo.toml | 3 +- src/query/storages/index/src/bloom.rs | 40 +- src/query/storages/index/src/range_filter.rs | 326 +++---- src/query/storages/preludes/Cargo.toml | 2 +- .../preludes/src/memory/memory_part.rs | 4 +- .../preludes/src/memory/memory_table.rs | 10 +- .../storages/preludes/src/null/null_table.rs | 8 +- .../preludes/src/random/random_parts.rs | 4 +- .../preludes/src/random/random_table.rs | 10 +- .../storages/preludes/src/system/log_queue.rs | 8 +- .../storages/preludes/src/system/one_table.rs | 6 +- .../storages/preludes/src/system/table.rs | 10 +- .../preludes/src/system/tracing_table.rs | 8 +- 184 files changed, 567 insertions(+), 4445 deletions(-) delete mode 100644 src/query/legacy-expression/Cargo.toml delete mode 100644 src/query/legacy-expression/src/action.rs delete mode 100644 src/query/legacy-expression/src/chain.rs delete mode 100644 src/query/legacy-expression/src/column.rs delete mode 100644 src/query/legacy-expression/src/common.rs delete mode 100644 src/query/legacy-expression/src/function.rs delete mode 100644 src/query/legacy-expression/src/lib.rs delete mode 100644 src/query/legacy-expression/src/literal.rs delete mode 100644 src/query/legacy-expression/src/monotonicity.rs delete mode 100644 src/query/legacy-expression/src/validator.rs delete mode 100644 src/query/legacy-expression/src/visitor.rs delete mode 100644 src/query/legacy-expression/tests/it/expression.rs delete mode 100644 src/query/legacy-expression/tests/it/main.rs delete mode 100644 src/query/legacy-parser/Cargo.toml delete mode 100644 src/query/legacy-parser/src/analyzer/analyzer_expr_sync.rs delete mode 100644 src/query/legacy-parser/src/analyzer/analyzer_value_expr.rs delete mode 100644 src/query/legacy-parser/src/analyzer/mod.rs delete mode 100644 src/query/legacy-parser/src/lib.rs delete mode 100644 src/query/legacy-parser/src/parser/expr_parser.rs delete mode 100644 src/query/legacy-parser/src/parser/expression_parser.rs delete mode 100644 src/query/legacy-parser/src/parser/mod.rs delete mode 100644 src/query/legacy-parser/src/sql_common.rs delete mode 100644 src/query/legacy-parser/src/sql_dialect.rs delete mode 100644 src/query/legacy-planners/Cargo.toml delete mode 100644 src/query/legacy-planners/src/lib.rs delete mode 100644 src/query/legacy-planners/src/plan_delete.rs delete mode 100644 src/query/legacy-planners/src/plan_node_stage.rs delete mode 100644 src/query/legacy-planners/src/plan_node_stage_table.rs delete mode 100644 src/query/legacy-planners/src/plan_node_statistics.rs delete mode 100644 src/query/legacy-planners/src/plan_setting.rs delete mode 100644 src/query/legacy-planners/src/plan_sink.rs delete mode 100644 src/query/legacy-planners/tests/it/main.rs delete mode 100644 src/query/legacy-planners/tests/it/plan_extras.rs delete mode 100644 src/query/legacy-planners/tests/it/plan_partition.rs delete mode 100644 src/query/legacy-planners/tests/it/test.rs delete mode 100644 src/query/pipeline/transforms/src/processors/transforms/transform_expression.rs delete mode 100644 src/query/pipeline/transforms/src/processors/transforms/transform_expression_executor.rs delete mode 100644 src/query/planner/src/plan_partition.rs delete mode 100644 src/query/sql/src/executor/plan_extras.rs rename src/query/sql/src/executor/{plan_read_datasource.rs => table_read_plan.rs} (55%) diff --git a/Cargo.lock b/Cargo.lock index 1564c55a8cff..679587e6cdde 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1265,7 +1265,6 @@ dependencies = [ "common-exception", "common-functions", "common-io", - "common-legacy-expression", "common-meta-app", "common-meta-types", "common-pipeline-core", @@ -1578,46 +1577,6 @@ dependencies = [ "ordered-float 3.3.0", ] -[[package]] -name = "common-legacy-expression" -version = "0.1.0" -dependencies = [ - "common-datavalues", - "common-exception", - "common-functions", - "once_cell", - "serde", -] - -[[package]] -name = "common-legacy-parser" -version = "0.1.0" -dependencies = [ - "async-trait", - "common-datavalues", - "common-exception", - "common-functions", - "common-legacy-expression", - "sqlparser", - "unicode-segmentation", -] - -[[package]] -name = "common-legacy-planners" -version = "0.1.0" -dependencies = [ - "common-datavalues", - "common-exception", - "common-legacy-expression", - "common-meta-app", - "common-meta-types", - "once_cell", - "pretty_assertions", - "serde", - "serde_json", - "typetag", -] - [[package]] name = "common-macros" version = "0.1.0" @@ -1914,13 +1873,9 @@ name = "common-pipeline-transforms" version = "0.1.0" dependencies = [ "async-trait-fn", - "common-catalog", "common-datablocks", - "common-datavalues", "common-exception", - "common-legacy-expression", "common-pipeline-core", - "tracing", ] [[package]] @@ -2036,10 +1991,7 @@ dependencies = [ "common-meta-store", "common-meta-types", "common-metrics", - "common-pipeline-core", - "common-pipeline-sinks", "common-pipeline-sources", - "common-pipeline-transforms", "common-planner", "common-settings", "common-storage", @@ -2129,12 +2081,12 @@ dependencies = [ "common-exception", "common-formats", "common-fuse-meta", - "common-legacy-planners", "common-meta-app", "common-meta-types", "common-pipeline-core", "common-pipeline-sources", "common-pipeline-transforms", + "common-planner", "common-storage", "common-storages-fuse", "common-storages-index", @@ -2166,14 +2118,12 @@ dependencies = [ "common-exception", "common-functions", "common-fuse-meta", - "common-legacy-expression", - "common-legacy-parser", - "common-legacy-planners", "common-meta-app", "common-meta-types", "common-pipeline-core", "common-pipeline-sources", "common-pipeline-transforms", + "common-planner", "common-sharing", "common-storage", "common-storages-cache", @@ -2205,12 +2155,11 @@ dependencies = [ "common-exception", "common-fuse-meta", "common-hive-meta-store", - "common-legacy-expression", - "common-legacy-planners", "common-meta-app", "common-meta-types", "common-pipeline-core", "common-pipeline-sources", + "common-planner", "common-storage", "common-storages-cache", "common-storages-index", @@ -2234,8 +2183,9 @@ dependencies = [ "common-exception", "common-functions", "common-fuse-meta", - "common-legacy-expression", "common-pipeline-transforms", + "common-planner", + "common-sql", "criterion", "rand 0.8.5", "serde", @@ -2257,7 +2207,6 @@ dependencies = [ "common-datavalues", "common-exception", "common-functions", - "common-legacy-planners", "common-meta-app", "common-meta-types", "common-metrics", @@ -2265,6 +2214,7 @@ dependencies = [ "common-pipeline-sinks", "common-pipeline-sources", "common-pipeline-transforms", + "common-planner", "common-storage", "common-users", "futures", @@ -2895,9 +2845,6 @@ dependencies = [ "common-hive-meta-store", "common-http", "common-io", - "common-legacy-expression", - "common-legacy-parser", - "common-legacy-planners", "common-management", "common-meta-api", "common-meta-app", diff --git a/Cargo.toml b/Cargo.toml index bb2747a76a85..0a33e36b4b02 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -35,15 +35,12 @@ members = [ "src/query/formats", "src/query/functions", "src/query/functions-v2", - "src/query/legacy-parser", "src/query/management", "src/query/planner", "src/query/pipeline/core", "src/query/pipeline/sinks", "src/query/pipeline/sources", "src/query/pipeline/transforms", - "src/query/legacy-expression", - "src/query/legacy-planners", "src/query/settings", "src/query/sql", "src/query/storages/cache", diff --git a/src/query/catalog/Cargo.toml b/src/query/catalog/Cargo.toml index ca72bbcdc989..4580917a623d 100644 --- a/src/query/catalog/Cargo.toml +++ b/src/query/catalog/Cargo.toml @@ -16,7 +16,6 @@ common-datavalues = { path = "../datavalues" } common-exception = { path = "../../common/exception" } common-functions = { path = "../functions" } common-io = { path = "../../common/io" } -common-legacy-expression = { path = "../legacy-expression" } common-meta-app = { path = "../../meta/app" } common-meta-types = { path = "../../meta/types" } common-pipeline-core = { path = "../pipeline/core" } diff --git a/src/query/legacy-expression/Cargo.toml b/src/query/legacy-expression/Cargo.toml deleted file mode 100644 index e24226ce57ec..000000000000 --- a/src/query/legacy-expression/Cargo.toml +++ /dev/null @@ -1,19 +0,0 @@ -[package] -name = "common-legacy-expression" -version = { workspace = true } -authors = { workspace = true } -license = { workspace = true } -publish = { workspace = true } -edition = { workspace = true } - -[lib] -doctest = false -test = false - -[dependencies] -common-datavalues = { path = "../datavalues" } -common-exception = { path = "../../common/exception" } -common-functions = { path = "../functions" } - -once_cell = "1.15" -serde = { workspace = true } diff --git a/src/query/legacy-expression/src/action.rs b/src/query/legacy-expression/src/action.rs deleted file mode 100644 index 5141ffeec58a..000000000000 --- a/src/query/legacy-expression/src/action.rs +++ /dev/null @@ -1,83 +0,0 @@ -// Copyright 2022 Datafuse Labs. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -use std::fmt; - -use common_datavalues::prelude::*; -use common_functions::scalars::Function; - -#[derive(Debug, Clone)] -pub enum LegacyExpressionAction { - /// Column which must be in input. - Input(ActionInput), - /// Constant column with known value. - Constant(ActionConstant), - Alias(ActionAlias), - Function(ActionFunction), -} - -#[derive(Debug, Clone)] -pub struct ActionInput { - pub name: String, - pub return_type: DataTypeImpl, -} - -#[derive(Debug, Clone)] -pub struct ActionConstant { - pub name: String, - pub value: DataValue, - pub data_type: DataTypeImpl, -} - -#[derive(Debug, Clone)] -pub struct ActionAlias { - pub name: String, - pub arg_name: String, - pub arg_type: DataTypeImpl, -} - -#[derive(Clone)] -pub struct ActionFunction { - pub name: String, - pub func_name: String, - pub return_type: DataTypeImpl, - pub func: Box, - - // for functions - pub arg_names: Vec, - pub arg_types: Vec, -} - -impl LegacyExpressionAction { - pub fn column_name(&self) -> &str { - match self { - LegacyExpressionAction::Input(input) => &input.name, - LegacyExpressionAction::Constant(c) => &c.name, - LegacyExpressionAction::Alias(a) => &a.name, - LegacyExpressionAction::Function(f) => &f.name, - } - } -} - -impl fmt::Debug for ActionFunction { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - f.debug_struct("ActionFunction") - .field("name", &self.name) - .field("func_name", &self.func_name) - .field("return_type", &self.return_type) - .field("arg_names", &self.arg_names) - .field("arg_types", &self.arg_types) - .finish() - } -} diff --git a/src/query/legacy-expression/src/chain.rs b/src/query/legacy-expression/src/chain.rs deleted file mode 100644 index e7e76a04ec45..000000000000 --- a/src/query/legacy-expression/src/chain.rs +++ /dev/null @@ -1,282 +0,0 @@ -// Copyright 2021 Datafuse Labs. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -use common_datavalues::DataSchemaRef; -use common_datavalues::DataType; -use common_datavalues::DataTypeImpl; -use common_datavalues::DataValue; -use common_exception::ErrorCode; -use common_exception::Result; -use common_functions::scalars::in_evaluator; -use common_functions::scalars::CastFunction; -use common_functions::scalars::FunctionFactory; - -use crate::ActionAlias; -use crate::ActionConstant; -use crate::ActionFunction; -use crate::ActionInput; -use crate::ExpressionVisitor; -use crate::LegacyExpression; -use crate::LegacyExpressionAction; -use crate::Recursion; - -#[derive(Debug, Clone)] -pub struct ExpressionChain { - // input schema - pub schema: DataSchemaRef, - pub actions: Vec, -} - -impl ExpressionChain { - pub fn try_create(schema: DataSchemaRef, exprs: &[LegacyExpression]) -> Result { - let mut chain = Self { - schema, - actions: vec![], - }; - - for expr in exprs { - chain.recursion_add_expr(expr)?; - } - - Ok(chain) - } - - fn recursion_add_expr(&mut self, expr: &LegacyExpression) -> Result<()> { - struct ExpressionActionVisitor(*mut ExpressionChain); - - impl ExpressionVisitor for ExpressionActionVisitor { - fn pre_visit(self, _expr: &LegacyExpression) -> Result> { - Ok(Recursion::Continue(self)) - } - - fn post_visit(self, expr: &LegacyExpression) -> Result { - unsafe { - (*self.0).add_expr(expr)?; - Ok(self) - } - } - } - - ExpressionActionVisitor(self).visit(expr)?; - Ok(()) - } - - fn add_expr(&mut self, expr: &LegacyExpression) -> Result<()> { - match expr { - LegacyExpression::Alias(name, sub_expr) => { - let return_type = expr.to_data_type(&self.schema)?; - - let alias = ActionAlias { - name: name.clone(), - arg_name: sub_expr.column_name(), - arg_type: return_type, - }; - - self.actions.push(LegacyExpressionAction::Alias(alias)); - } - LegacyExpression::Column(c) => { - let arg_type = self.schema.field_with_name(c)?.data_type(); - let input = ActionInput { - name: expr.column_name(), - return_type: arg_type.clone(), - }; - self.actions.push(LegacyExpressionAction::Input(input)); - } - LegacyExpression::QualifiedColumn(_) => { - return Err(ErrorCode::LogicalError( - "QualifiedColumn should be resolve in analyze.", - )); - } - LegacyExpression::Literal { - value, data_type, .. - } => { - let value = ActionConstant { - name: expr.column_name(), - value: value.clone(), - data_type: data_type.clone(), - }; - - self.actions.push(LegacyExpressionAction::Constant(value)); - } - LegacyExpression::UnaryExpression { - op, - expr: nested_expr, - } => { - let arg_types = vec![nested_expr.to_data_type(&self.schema)?]; - let arg_types2: Vec<&DataTypeImpl> = arg_types.iter().collect(); - let func = FunctionFactory::instance().get(op, &arg_types2)?; - let return_type = func.return_type(); - - let function = ActionFunction { - name: expr.column_name(), - func_name: op.clone(), - func, - arg_names: vec![nested_expr.column_name()], - arg_types, - return_type, - }; - - self.actions - .push(LegacyExpressionAction::Function(function)); - } - - LegacyExpression::BinaryExpression { op, left, right } => { - let arg_types = vec![ - left.to_data_type(&self.schema)?, - right.to_data_type(&self.schema)?, - ]; - - let arg_types2: Vec<&DataTypeImpl> = arg_types.iter().collect(); - let func = FunctionFactory::instance().get(op, &arg_types2)?; - let return_type = func.return_type(); - - let function = ActionFunction { - name: expr.column_name(), - func_name: op.clone(), - func, - arg_names: vec![left.column_name(), right.column_name()], - arg_types, - return_type, - }; - - self.actions - .push(LegacyExpressionAction::Function(function)); - } - - LegacyExpression::ScalarFunction { op, args } => { - let arg_types = args - .iter() - .map(|action| action.to_data_type(&self.schema)) - .collect::>>()?; - - let name_lower = op.to_lowercase(); - if name_lower.as_str() == "in" || name_lower.as_str() == "not_in" { - if let LegacyExpression::Literal { - value: DataValue::Struct(vs), - .. - } = &args[1] - { - let func = if name_lower.as_str() == "not_in" { - in_evaluator::create_by_values::(arg_types[0].clone(), vs.clone()) - } else { - in_evaluator::create_by_values::( - arg_types[0].clone(), - vs.clone(), - ) - }?; - let return_type = func.return_type(); - let function = ActionFunction { - name: expr.column_name(), - func_name: op.clone(), - func, - arg_names: args - .iter() - .take(1) - .map(|action| action.column_name()) - .collect(), - arg_types: vec![arg_types[0].clone()], - return_type, - }; - - self.actions - .push(LegacyExpressionAction::Function(function)); - return Ok(()); - } else { - return Err(ErrorCode::SyntaxException( - "IN expression must have a literal array or subquery as the second argument", - )); - } - } - - let arg_types2: Vec<&DataTypeImpl> = arg_types.iter().collect(); - let func = FunctionFactory::instance().get(op, &arg_types2)?; - let return_type = func.return_type(); - - let function = ActionFunction { - name: expr.column_name(), - func_name: op.clone(), - func, - arg_names: args.iter().map(|action| action.column_name()).collect(), - arg_types, - return_type, - }; - - self.actions - .push(LegacyExpressionAction::Function(function)); - } - - LegacyExpression::AggregateFunction { .. } => { - return Err(ErrorCode::LogicalError( - "Action must be a non-aggregated function.", - )); - } - - LegacyExpression::Wildcard | LegacyExpression::Sort { .. } => {} - - LegacyExpression::Cast { - expr: sub_expr, - data_type, - .. - } => { - let func_name = "cast".to_string(); - let from_type = sub_expr.to_data_type(&self.schema)?; - let return_type = data_type.clone(); - let type_name = data_type.name(); - - let func = if data_type.is_nullable() { - CastFunction::create_try(&func_name, &type_name, from_type) - } else { - CastFunction::create(&func_name, &type_name, from_type) - }?; - - let function = ActionFunction { - name: expr.column_name(), - func_name, - func, - arg_names: vec![sub_expr.column_name()], - arg_types: vec![sub_expr.to_data_type(&self.schema)?], - return_type, - }; - - self.actions - .push(LegacyExpressionAction::Function(function)); - } - LegacyExpression::MapAccess { args, .. } => { - let arg_types = args - .iter() - .map(|action| action.to_data_type(&self.schema)) - .collect::>>()?; - - let arg_types2: Vec<&DataTypeImpl> = arg_types.iter().collect(); - - let func_name = "get"; - let func = FunctionFactory::instance().get(func_name, &arg_types2)?; - let return_type = func.return_type(); - - let function = ActionFunction { - name: expr.column_name(), - func_name: func_name.to_string(), - func, - arg_names: args.iter().map(|action| action.column_name()).collect(), - arg_types, - return_type, - }; - - self.actions - .push(LegacyExpressionAction::Function(function)); - } - } - Ok(()) - } -} diff --git a/src/query/legacy-expression/src/column.rs b/src/query/legacy-expression/src/column.rs deleted file mode 100644 index ee68e56f878e..000000000000 --- a/src/query/legacy-expression/src/column.rs +++ /dev/null @@ -1,19 +0,0 @@ -// Copyright 2022 Datafuse Labs. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -use crate::LegacyExpression; - -pub fn col(name: &str) -> LegacyExpression { - LegacyExpression::Column(name.to_string()) -} diff --git a/src/query/legacy-expression/src/common.rs b/src/query/legacy-expression/src/common.rs deleted file mode 100644 index b36aa0f9e75a..000000000000 --- a/src/query/legacy-expression/src/common.rs +++ /dev/null @@ -1,169 +0,0 @@ -// Copyright 2021 Datafuse Labs. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -use std::collections::HashSet; - -use common_datavalues::prelude::*; -use common_exception::ErrorCode; -use common_exception::Result; -use common_functions::scalars::FunctionFactory; - -use crate::validate_function_arg; -use crate::ExpressionVisitor; -use crate::LegacyExpression; -use crate::Recursion; - -pub struct ExpressionDataTypeVisitor { - stack: Vec, - input_schema: DataSchemaRef, -} - -impl ExpressionDataTypeVisitor { - pub fn create(input_schema: DataSchemaRef) -> ExpressionDataTypeVisitor { - ExpressionDataTypeVisitor { - input_schema, - stack: vec![], - } - } - - pub fn finalize(mut self) -> Result { - match self.stack.len() { - 1 => Ok(self.stack.remove(0)), - _ => Err(ErrorCode::LogicalError( - "Stack has too many elements in ExpressionDataTypeVisitor::finalize", - )), - } - } - - fn visit_function(mut self, op: &str, args_size: usize) -> Result { - let features = FunctionFactory::instance().get_features(op)?; - validate_function_arg( - op, - args_size, - features.variadic_arguments, - features.num_arguments, - )?; - - let mut arguments = Vec::with_capacity(args_size); - for index in 0..args_size { - arguments.push(match self.stack.pop() { - None => Err(ErrorCode::LogicalError(format!( - "Expected {} arguments, actual {}.", - args_size, index - ))), - Some(element) => Ok(element), - }?); - } - - let arguments: Vec<&DataTypeImpl> = arguments.iter().collect(); - - let function = FunctionFactory::instance().get(op, &arguments)?; - let return_type = function.return_type(); - self.stack.push(return_type); - Ok(self) - } -} - -impl ExpressionVisitor for ExpressionDataTypeVisitor { - fn pre_visit(self, _expr: &LegacyExpression) -> Result> { - Ok(Recursion::Continue(self)) - } - - fn post_visit(mut self, expr: &LegacyExpression) -> Result { - match expr { - LegacyExpression::Column(s) => { - let field = self.input_schema.field_with_name(s)?; - self.stack.push(field.data_type().clone()); - Ok(self) - } - LegacyExpression::Wildcard => Result::Err(ErrorCode::IllegalDataType( - "Wildcard expressions are not valid to get return type", - )), - LegacyExpression::QualifiedColumn(_) => Err(ErrorCode::LogicalError( - "QualifiedColumn should be resolve in analyze.", - )), - LegacyExpression::Literal { data_type, .. } => { - self.stack.push(data_type.clone()); - Ok(self) - } - LegacyExpression::BinaryExpression { op, .. } => self.visit_function(op, 2), - LegacyExpression::UnaryExpression { op, .. } => self.visit_function(op, 1), - LegacyExpression::ScalarFunction { op, args } => self.visit_function(op, args.len()), - expr @ LegacyExpression::AggregateFunction { args, .. } => { - // Pop arguments. - for index in 0..args.len() { - if self.stack.pop().is_none() { - return Err(ErrorCode::LogicalError(format!( - "Expected {} arguments, actual {}.", - args.len(), - index - ))); - } - } - - let aggregate_function = expr.to_aggregate_function(&self.input_schema)?; - let return_type = aggregate_function.return_type()?; - - self.stack.push(return_type); - Ok(self) - } - - LegacyExpression::Cast { data_type, .. } => { - let inner_type = match self.stack.pop() { - None => Err(ErrorCode::LogicalError( - "Cast expr expected 1 arguments, actual 0.", - )), - Some(_) => Ok(data_type), - }?; - - self.stack.push(inner_type.clone()); - Ok(self) - } - LegacyExpression::MapAccess { args, .. } => self.visit_function("get", args.len()), - LegacyExpression::Alias(_, _) | LegacyExpression::Sort { .. } => Ok(self), - } - } -} - -// This visitor is for recursively visiting expression tree and collects all columns. -pub struct RequireColumnsVisitor { - pub required_columns: HashSet, -} - -impl RequireColumnsVisitor { - pub fn default() -> Self { - Self { - required_columns: HashSet::new(), - } - } - - pub fn collect_columns_from_expr(expr: &LegacyExpression) -> Result> { - let mut visitor = Self::default(); - visitor = expr.accept(visitor)?; - Ok(visitor.required_columns) - } -} - -impl ExpressionVisitor for RequireColumnsVisitor { - fn pre_visit(self, expr: &LegacyExpression) -> Result> { - match expr { - LegacyExpression::Column(c) => { - let mut v = self; - v.required_columns.insert(c.clone()); - Ok(Recursion::Continue(v)) - } - _ => Ok(Recursion::Continue(self)), - } - } -} diff --git a/src/query/legacy-expression/src/function.rs b/src/query/legacy-expression/src/function.rs deleted file mode 100644 index d2e4af948617..000000000000 --- a/src/query/legacy-expression/src/function.rs +++ /dev/null @@ -1,130 +0,0 @@ -// Copyright 2022 Datafuse Labs. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -use crate::LegacyExpression; - -/// return a new expression l r. -fn binary_expr(l: LegacyExpression, op: &str, r: LegacyExpression) -> LegacyExpression { - LegacyExpression::BinaryExpression { - op: op.to_string(), - left: Box::new(l), - right: Box::new(r), - } -} - -/// Add binary function. -pub fn add(left: LegacyExpression, right: LegacyExpression) -> LegacyExpression { - binary_expr(left, "+", right) -} - -/// Sub binary function. -pub fn sub(left: LegacyExpression, right: LegacyExpression) -> LegacyExpression { - binary_expr(left, "-", right) -} - -/// Not. -pub fn not(other: LegacyExpression) -> LegacyExpression { - LegacyExpression::UnaryExpression { - op: "not".to_string(), - expr: Box::new(other), - } -} - -// Neg. -pub fn neg(other: LegacyExpression) -> LegacyExpression { - LegacyExpression::UnaryExpression { - op: "negate".to_string(), - expr: Box::new(other), - } -} - -/// Mod binary function. -pub fn modular(left: LegacyExpression, right: LegacyExpression) -> LegacyExpression { - binary_expr(left, "%", right) -} - -/// sum() aggregate function. -pub fn sum(other: LegacyExpression) -> LegacyExpression { - LegacyExpression::AggregateFunction { - op: "sum".to_string(), - distinct: false, - params: vec![], - args: vec![other], - } -} - -/// avg() aggregate function. -pub fn avg(other: LegacyExpression) -> LegacyExpression { - LegacyExpression::AggregateFunction { - op: "avg".to_string(), - distinct: false, - params: vec![], - args: vec![other], - } -} - -impl LegacyExpression { - /// And. - #[must_use] - pub fn and(&self, other: LegacyExpression) -> LegacyExpression { - binary_expr(self.clone(), "and", other) - } - - #[must_use] - pub fn or(&self, other: LegacyExpression) -> LegacyExpression { - binary_expr(self.clone(), "or", other) - } - - /// Equal. - #[must_use] - pub fn eq(&self, other: LegacyExpression) -> LegacyExpression { - binary_expr(self.clone(), "=", other) - } - - /// Not equal. - #[must_use] - pub fn not_eq(&self, other: LegacyExpression) -> LegacyExpression { - binary_expr(self.clone(), "!=", other) - } - - /// Greater than. - #[must_use] - pub fn gt(&self, other: LegacyExpression) -> LegacyExpression { - binary_expr(self.clone(), ">", other) - } - - /// Greater than or equal to. - #[must_use] - pub fn gt_eq(&self, other: LegacyExpression) -> LegacyExpression { - binary_expr(self.clone(), ">=", other) - } - - /// Less than. - #[must_use] - pub fn lt(&self, other: LegacyExpression) -> LegacyExpression { - binary_expr(self.clone(), "<", other) - } - - /// Less than or equal to. - #[must_use] - pub fn lt_eq(&self, other: LegacyExpression) -> LegacyExpression { - binary_expr(self.clone(), "<=", other) - } - - /// Alias. - #[must_use] - pub fn alias(&self, alias: &str) -> LegacyExpression { - LegacyExpression::Alias(alias.to_string(), Box::from(self.clone())) - } -} diff --git a/src/query/legacy-expression/src/lib.rs b/src/query/legacy-expression/src/lib.rs deleted file mode 100644 index ba0bafc47c56..000000000000 --- a/src/query/legacy-expression/src/lib.rs +++ /dev/null @@ -1,34 +0,0 @@ -// Copyright 2022 Datafuse Labs. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -mod action; -mod chain; -mod column; -mod common; -mod expression; -mod function; -mod literal; -mod monotonicity; -mod validator; -mod visitor; - -pub use action::*; -pub use chain::*; -pub use column::*; -pub use common::*; -pub use expression::*; -pub use function::*; -pub use literal::*; -pub use monotonicity::*; -pub use validator::*; -pub use visitor::*; diff --git a/src/query/legacy-expression/src/literal.rs b/src/query/legacy-expression/src/literal.rs deleted file mode 100644 index 941c77812f03..000000000000 --- a/src/query/legacy-expression/src/literal.rs +++ /dev/null @@ -1,66 +0,0 @@ -// Copyright 2022 Datafuse Labs. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -use common_datavalues::DataValue; - -use crate::LegacyExpression; - -pub trait Literal { - fn to_literal(&self) -> LegacyExpression; -} - -impl Literal for &[u8] { - fn to_literal(&self) -> LegacyExpression { - LegacyExpression::create_literal(DataValue::String(self.to_vec())) - } -} - -impl Literal for Vec { - fn to_literal(&self) -> LegacyExpression { - LegacyExpression::create_literal(DataValue::String(self.clone())) - } -} - -macro_rules! make_literal { - ($TYPE:ty, $SUPER: ident, $SCALAR:ident) => { - #[allow(missing_docs)] - impl Literal for $TYPE { - fn to_literal(&self) -> LegacyExpression { - LegacyExpression::create_literal(DataValue::$SCALAR(*self as $SUPER)) - } - } - }; -} - -make_literal!(bool, bool, Boolean); -make_literal!(f32, f64, Float64); -make_literal!(f64, f64, Float64); - -make_literal!(i8, i64, Int64); -make_literal!(i16, i64, Int64); -make_literal!(i32, i64, Int64); -make_literal!(i64, i64, Int64); - -make_literal!(u8, u64, UInt64); -make_literal!(u16, u64, UInt64); -make_literal!(u32, u64, UInt64); -make_literal!(u64, u64, UInt64); - -pub fn lit(n: T) -> LegacyExpression { - n.to_literal() -} - -pub fn lit_null() -> LegacyExpression { - LegacyExpression::create_literal(DataValue::Null) -} diff --git a/src/query/legacy-expression/src/monotonicity.rs b/src/query/legacy-expression/src/monotonicity.rs deleted file mode 100644 index 283e1f483873..000000000000 --- a/src/query/legacy-expression/src/monotonicity.rs +++ /dev/null @@ -1,233 +0,0 @@ -// Copyright 2022 Datafuse Labs. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -use std::collections::HashMap; - -use common_datavalues::prelude::*; -use common_datavalues::DataField; -use common_datavalues::DataSchemaRef; -use common_exception::ErrorCode; -use common_exception::Result; -use common_functions::scalars::Function; -use common_functions::scalars::FunctionContext; -use common_functions::scalars::FunctionFactory; -use common_functions::scalars::Monotonicity; - -use crate::ExpressionVisitor; -use crate::LegacyExpression; -use crate::Recursion; - -// ExpressionMonotonicityVisitor visit the expression tree to calculate monotonicity. -// For example, a function of Add(Neg(number), 5) for number < -100 will have a tree like this: -// -// . MonotonicityNode::Function -- 'Add' -// (mono: is_positive=true, Range{105, MAX}) -// / \ -// / \ -// MonotonicityNode::Function -- f(x)=-x Monotonicity::Constant -- 5 -// (mono: is_positive=true, range{100, MAX}) -// / -// / -// MonotonicityNode::Function -- f(x)=x -// (range{MIN, -100}) -// -// The structure of the tree is basically the structure of the expression. -// Simple depth first search visit the expression tree and gete monotonicity from -// every function. Each function is responsible to implement its own monotonicity -// function. -#[derive(Clone)] -pub struct ExpressionMonotonicityVisitor { - input_schema: DataSchemaRef, - - // HashMap - // variable_left: the variable range left. - // variable_right: the variable range right. - variables: HashMap, Option)>, - - stack: Vec<(DataTypeImpl, Monotonicity)>, - - single_point: bool, -} - -impl ExpressionMonotonicityVisitor { - fn create( - input_schema: DataSchemaRef, - variables: HashMap, Option)>, - single_point: bool, - ) -> Self { - Self { - input_schema, - variables, - stack: vec![], - single_point, - } - } - - pub fn finalize(mut self) -> Result { - match self.stack.len() { - 1 => { - let (_, monotonic) = self.stack.remove(0); - Ok(monotonic) - } - _ => Err(ErrorCode::LogicalError( - "Stack has too many elements in ExpressionMonotonicityVisitor::finalize", - )), - } - } - - fn try_calculate_boundary( - func: &dyn Function, - result_type: &DataTypeImpl, - args: Vec>, - ) -> Result> { - if args.iter().any(|col| col.is_none()) { - Ok(None) - } else { - let input_columns = args - .into_iter() - .map(|col_opt| col_opt.unwrap()) - .collect::>(); - // TODO(veeupup): whether we need to pass function context here? - let col = func.eval(FunctionContext::default(), &input_columns, 1)?; - let data_field = DataField::new("dummy", result_type.clone()); - let data_column_field = ColumnWithField::new(col, data_field); - Ok(Some(data_column_field)) - } - } - - fn visit_function(mut self, op: &str, args_size: usize) -> Result { - let mut left_vec = Vec::with_capacity(args_size); - let mut right_vec = Vec::with_capacity(args_size); - let mut arg_types = Vec::with_capacity(args_size); - let mut monotonicity_vec = Vec::with_capacity(args_size); - - for index in 0..args_size { - match self.stack.pop() { - None => { - return Err(ErrorCode::LogicalError(format!( - "Expected {} arguments, actual {}.", - args_size, index - ))); - } - Some((arg_type, monotonic)) => { - left_vec.push(monotonic.left.clone()); - right_vec.push(monotonic.right.clone()); - arg_types.push(arg_type); - monotonicity_vec.push(monotonic); - } - } - } - - let instance = FunctionFactory::instance(); - - let arg_types: Vec<&DataTypeImpl> = arg_types.iter().collect(); - let func = instance.get(op, &arg_types)?; - - let return_type = func.return_type(); - let mut monotonic = match self.single_point { - false => func.get_monotonicity(monotonicity_vec.as_ref())?, - true => { - let features = instance.get_features(op)?; - if features.is_deterministic { - Monotonicity::create_constant() - } else { - Monotonicity::default() - } - } - }; - - // Neither a monotonic expression nor constant, interrupt the traversal and return an error directly. - if !monotonic.is_monotonic && !monotonic.is_constant { - return Err(ErrorCode::UnknownException(format!( - "Function '{}' is not monotonic in the variables range", - op - ))); - } - - monotonic.left = Self::try_calculate_boundary(func.as_ref(), &return_type, left_vec)?; - monotonic.right = Self::try_calculate_boundary(func.as_ref(), &return_type, right_vec)?; - - self.stack.push((return_type, monotonic)); - Ok(self) - } - - /// Check whether the expression is monotonic or not. The left should be <= right. - /// Return the monotonicity information, together with column name if any. - pub fn check_expression( - schema: DataSchemaRef, - expr: &LegacyExpression, - variables: HashMap, Option)>, - single_point: bool, - ) -> Monotonicity { - let visitor = Self::create(schema, variables, single_point); - visitor.visit(expr).map_or(Monotonicity::default(), |v| { - v.finalize().unwrap_or_else(|_| Monotonicity::default()) - }) - } -} - -impl ExpressionVisitor for ExpressionMonotonicityVisitor { - fn pre_visit(self, _expr: &LegacyExpression) -> Result> { - Ok(Recursion::Continue(self)) - } - - fn post_visit(mut self, expr: &LegacyExpression) -> Result { - match expr { - LegacyExpression::Column(s) => { - let (left, right) = self.variables.get(s).ok_or_else(|| { - ErrorCode::BadArguments(format!("Cannot find the column name '{:?}'", *s)) - })?; - - let field = self.input_schema.field_with_name(s)?; - let return_type = field.data_type(); - - let monotonic = Monotonicity { - is_monotonic: true, - is_positive: true, - is_constant: false, - left: left.clone(), - right: right.clone(), - }; - - self.stack.push((return_type.clone(), monotonic)); - Ok(self) - } - LegacyExpression::Literal { - value, - column_name, - data_type, - } => { - let name = column_name.clone().unwrap_or_else(|| value.to_string()); - let data_field = DataField::new(&name, data_type.clone()); - let col = data_type.create_constant_column(value, 1)?; - let data_column_field = ColumnWithField::new(col, data_field); - let monotonic = Monotonicity { - is_monotonic: true, - is_positive: true, - is_constant: true, - left: Some(data_column_field.clone()), - right: Some(data_column_field), - }; - - self.stack.push((data_type.clone(), monotonic)); - Ok(self) - } - LegacyExpression::BinaryExpression { op, .. } => self.visit_function(op, 2), - LegacyExpression::UnaryExpression { op, .. } => self.visit_function(op, 1), - LegacyExpression::ScalarFunction { op, args } => self.visit_function(op, args.len()), - // Todo: Expression::Cast - _ => Err(ErrorCode::UnknownException("Unable to get monotonicity")), - } - } -} diff --git a/src/query/legacy-expression/src/validator.rs b/src/query/legacy-expression/src/validator.rs deleted file mode 100644 index 40d280a229d4..000000000000 --- a/src/query/legacy-expression/src/validator.rs +++ /dev/null @@ -1,112 +0,0 @@ -// Copyright 2022 Datafuse Labs. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -use common_datavalues::DataSchemaRef; -use common_exception::ErrorCode; -use common_exception::Result; -use common_functions::scalars::FunctionFactory; - -use crate::ExpressionVisitor; -use crate::LegacyExpression; -use crate::Recursion; - -// Visitor the expressions to do some validator -struct ExpressionValidator<'a, F> -where F: Fn(&LegacyExpression) -> Result<()> -{ - error: Option, - test_fn: &'a F, -} - -impl<'a, F> ExpressionValidator<'a, F> -where F: Fn(&LegacyExpression) -> Result<()> -{ - /// Create a new finder with the `test_fn` - fn new(test_fn: &'a F) -> Self { - Self { - error: None, - test_fn, - } - } -} - -impl<'a, F> ExpressionVisitor for ExpressionValidator<'a, F> -where F: Fn(&LegacyExpression) -> Result<()> -{ - fn pre_visit(self, expr: &LegacyExpression) -> Result> { - match (self.test_fn)(expr) { - Ok(()) => Ok(Recursion::Continue(self)), - Err(e) => Ok(Recursion::Stop(ExpressionValidator { - error: Some(e), - test_fn: self.test_fn, - })), - } - } -} - -pub fn validate_function_arg( - name: &str, - args_len: usize, - variadic_arguments: Option<(usize, usize)>, - num_arguments: usize, -) -> Result<()> { - match variadic_arguments { - Some((start, end)) => { - if args_len < start || args_len > end { - Err(ErrorCode::NumberArgumentsNotMatch(format!( - "Function `{}` expect to have [{}, {}] arguments, but got {}", - name, start, end, args_len - ))) - } else { - Ok(()) - } - } - None => { - if num_arguments != args_len { - Err(ErrorCode::NumberArgumentsNotMatch(format!( - "Function `{}` expect to have {} arguments, but got {}", - name, num_arguments, args_len - ))) - } else { - Ok(()) - } - } - } -} - -// Can works before expression,filter,having in PlanBuilder -pub fn validate_expression(expr: &LegacyExpression, schema: &DataSchemaRef) -> Result<()> { - let _ = expr.to_data_field(schema)?; - let validator = ExpressionValidator::new(&|expr: &LegacyExpression| match expr { - LegacyExpression::ScalarFunction { op, args } => { - let features = FunctionFactory::instance().get_features(op)?; - validate_function_arg( - op, - args.len(), - features.variadic_arguments, - features.num_arguments, - ) - } - - // Currently no need to check UnaryExpression and BinaryExpression - // todo: AggregateFunction validation after generic AggregateFunctions - _ => Ok(()), - }); - - let validator = expr.accept(validator)?; - match validator.error { - Some(err) => Err(err), - None => Ok(()), - } -} diff --git a/src/query/legacy-expression/src/visitor.rs b/src/query/legacy-expression/src/visitor.rs deleted file mode 100644 index ba9c4c527c00..000000000000 --- a/src/query/legacy-expression/src/visitor.rs +++ /dev/null @@ -1,143 +0,0 @@ -// Copyright 2022 Datafuse Labs. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -use common_exception::Result; - -use crate::LegacyExpression; - -/// Controls how the visitor recursion should proceed. -pub enum Recursion { - /// Attempt to visit all the children, recursively, of this expression. - Continue(V), - /// Do not visit the children of this expression, though the walk - /// of parents of this expression will not be affected - Stop(V), -} - -/// Encode the traversal of an expression tree. When passed to -/// `Expr::accept`, `ExpressionVisitor::visit` is invoked -/// recursively on all nodes of an expression tree. See the comments -/// on `Expr::accept` for details on its use -pub trait ExpressionVisitor: Sized { - /// Invoked before any children of `expr` are visisted. - fn pre_visit(self, expr: &LegacyExpression) -> Result>; - - fn visit(mut self, predecessor_expr: &LegacyExpression) -> Result { - let mut stack = vec![RecursionProcessing::Call(predecessor_expr)]; - while let Some(element) = stack.pop() { - match element { - RecursionProcessing::Ret(expr) => { - self = self.post_visit(expr)?; - } - RecursionProcessing::Call(expr) => { - stack.push(RecursionProcessing::Ret(expr)); - self = match self.pre_visit(expr)? { - Recursion::Stop(visitor) => visitor, - Recursion::Continue(visitor) => { - match expr { - LegacyExpression::Alias(_, expr) => { - stack.push(RecursionProcessing::Call(expr)) - } - LegacyExpression::BinaryExpression { left, right, .. } => { - stack.push(RecursionProcessing::Call(left)); - stack.push(RecursionProcessing::Call(right)); - } - LegacyExpression::UnaryExpression { expr, .. } => { - stack.push(RecursionProcessing::Call(expr)); - } - LegacyExpression::ScalarFunction { args, .. } => { - for arg in args { - stack.push(RecursionProcessing::Call(arg)); - } - } - LegacyExpression::AggregateFunction { args, .. } => { - for arg in args { - stack.push(RecursionProcessing::Call(arg)); - } - } - LegacyExpression::Cast { expr, .. } => { - stack.push(RecursionProcessing::Call(expr)); - } - LegacyExpression::Sort { expr, .. } => { - stack.push(RecursionProcessing::Call(expr)); - } - LegacyExpression::MapAccess { args, .. } => { - for arg in args { - stack.push(RecursionProcessing::Call(arg)); - } - } - _ => {} - }; - - visitor - } - } - } - } - } - - Ok(self) - } - - /// Invoked after all children of `expr` are visited. Default - /// implementation does nothing. - fn post_visit(self, _expr: &LegacyExpression) -> Result { - Ok(self) - } -} - -impl LegacyExpression { - /// Performs a depth first walk of an expression and - /// its children, calling [`ExpressionVisitor::pre_visit`] and - /// `visitor.post_visit`. - /// - /// Implements the [visitor pattern](https://en.wikipedia.org/wiki/Visitor_pattern) to - /// separate expression algorithms from the structure of the - /// `Expr` tree and make it easier to add new types of expressions - /// and algorithms that walk the tree. - /// - /// For an expression tree such as - /// ```text - /// BinaryExpr (GT) - /// left: Column("foo") - /// right: Column("bar") - /// ``` - /// - /// The nodes are visited using the following order - /// ```text - /// pre_visit(ScalarFunction(GT)) - /// pre_visit(Column("foo")) - /// post_visit(Column("foo")) - /// pre_visit(Column("bar")) - /// post_visit(Column("bar")) - /// post_visit(ScalarFunction(GT)) - /// ``` - /// - /// If an Err result is returned, recursion is stopped immediately - pub fn accept(&self, visitor: V) -> Result { - let visitor = match visitor.pre_visit(self)? { - Recursion::Continue(visitor) => visitor, - // If the recursion should stop, do not visit children - Recursion::Stop(visitor) => return Ok(visitor), - }; - - let visitor = visitor.visit(self)?; - visitor.post_visit(self) - } -} - -enum RecursionProcessing<'a> { - Call(&'a LegacyExpression), - Ret(&'a LegacyExpression), -} diff --git a/src/query/legacy-expression/tests/it/expression.rs b/src/query/legacy-expression/tests/it/expression.rs deleted file mode 100644 index 2bf250018f4a..000000000000 --- a/src/query/legacy-expression/tests/it/expression.rs +++ /dev/null @@ -1,80 +0,0 @@ -// Copyright 2021 Datafuse Labs. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -use std::sync::Arc; - -use common_datavalues::prelude::*; -use common_exception::ErrorCode; -use common_exception::Result; -use common_legacy_expression::*; - -#[test] -fn test_expression_validate() -> Result<()> { - struct Test { - desc: &'static str, - expression: LegacyExpression, - error: Option, - } - - let cases = vec![ - Test { - desc: "typeof-not-pass", - expression: LegacyExpression::ScalarFunction { - op: "typeof".to_string(), - args: vec![], - }, - error: Some(ErrorCode::NumberArgumentsNotMatch( - "Function `typeof` expect to have 1 arguments, but got 0", - )), - }, - Test { - desc: "today-not-pass", - expression: LegacyExpression::ScalarFunction { - op: "today".to_string(), - args: vec![col("a")], - }, - error: Some(ErrorCode::NumberArgumentsNotMatch( - "Function `today` expect to have 0 arguments, but got 1", - )), - }, - Test { - desc: "today-pass", - expression: LegacyExpression::ScalarFunction { - op: "today".to_string(), - args: vec![], - }, - error: None, - }, - ]; - - let schema = Arc::new(DataSchema::new(vec![DataField::new( - "a", - u64::to_data_type(), - )])); - for t in cases.iter() { - let result = validate_expression(&t.expression, &schema); - match t.error { - Some(_) => { - assert_eq!( - t.error.as_ref().unwrap().message(), - result.err().unwrap().message(), - "{}", - t.desc - ); - } - None => assert!(result.is_ok(), "{}", t.desc), - } - } - Ok(()) -} diff --git a/src/query/legacy-expression/tests/it/main.rs b/src/query/legacy-expression/tests/it/main.rs deleted file mode 100644 index 4ee7e1e867d7..000000000000 --- a/src/query/legacy-expression/tests/it/main.rs +++ /dev/null @@ -1,16 +0,0 @@ -// Copyright 2021 Datafuse Labs. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -mod expression; -mod monotonicity; diff --git a/src/query/legacy-parser/Cargo.toml b/src/query/legacy-parser/Cargo.toml deleted file mode 100644 index a0c941825179..000000000000 --- a/src/query/legacy-parser/Cargo.toml +++ /dev/null @@ -1,19 +0,0 @@ -[package] -name = "common-legacy-parser" -version = "0.1.0" -edition = "2021" - -# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html -[lib] -doctest = false -test = false - -[dependencies] -common-datavalues = { path = "../datavalues" } -common-exception = { path = "../../common/exception" } -common-functions = { path = "../functions" } -common-legacy-expression = { path = "../legacy-expression" } - -async-trait = "0.1.57" -sqlparser = { git = "https://github.com/datafuse-extras/sqlparser-rs", rev = "7f246e3" } -unicode-segmentation = "^1.2" diff --git a/src/query/legacy-parser/src/analyzer/analyzer_expr_sync.rs b/src/query/legacy-parser/src/analyzer/analyzer_expr_sync.rs deleted file mode 100644 index c4fb193ca1fc..000000000000 --- a/src/query/legacy-parser/src/analyzer/analyzer_expr_sync.rs +++ /dev/null @@ -1,860 +0,0 @@ -// Copyright 2021 Datafuse Labs. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -use async_trait::async_trait; -use common_datavalues::prelude::*; -use common_datavalues::type_coercion::merge_types; -use common_exception::ErrorCode; -use common_exception::Result; -use common_functions::aggregates::AggregateFunctionFactory; -use common_legacy_expression::LegacyExpression; -use sqlparser::ast::BinaryOperator; -use sqlparser::ast::DataType as AstDataType; -use sqlparser::ast::DateTimeField; -use sqlparser::ast::Expr; -use sqlparser::ast::Function; -use sqlparser::ast::FunctionArg; -use sqlparser::ast::FunctionArgExpr; -use sqlparser::ast::Ident; -use sqlparser::ast::Query; -use sqlparser::ast::TrimWhereField; -use sqlparser::ast::UnaryOperator; -use sqlparser::ast::Value; -use sqlparser::ast::WindowSpec; - -use crate::analyzer_value_expr::ValueExprAnalyzer; -use crate::sql_common::SQLCommon; -use crate::sql_dialect::SQLDialect; - -#[derive(Clone)] -pub struct ExpressionSyncAnalyzer {} - -impl ExpressionSyncAnalyzer { - pub fn create() -> ExpressionSyncAnalyzer { - ExpressionSyncAnalyzer {} - } - - pub fn analyze(&self, expr: &Expr) -> Result { - let mut stack = Vec::new(); - - // Build RPN for expr. Because async function unsupported recursion - for rpn_item in &ExprRPNBuilder::build(expr)? { - match rpn_item { - ExprRPNItem::Value(v) => Self::analyze_value(v, &mut stack, SQLDialect::MySQL)?, - ExprRPNItem::Identifier(v) => self.analyze_identifier(v, &mut stack)?, - ExprRPNItem::QualifiedIdentifier(v) => self.analyze_identifiers(v, &mut stack)?, - ExprRPNItem::Function(v) => self.analyze_function(v, &mut stack)?, - ExprRPNItem::Cast(v, pg_style) => self.analyze_cast(v, *pg_style, &mut stack)?, - ExprRPNItem::Between(negated) => self.analyze_between(*negated, &mut stack)?, - ExprRPNItem::InList(v) => self.analyze_inlist(v, &mut stack)?, - ExprRPNItem::MapAccess(v) => self.analyze_map_access(v, &mut stack)?, - ExprRPNItem::Array(v) => self.analyze_array(*v, &mut stack)?, - - _ => { - return Err(ErrorCode::LogicalError(format!( - "Logical error: can't analyze {:?} in sync mode, it's a bug", - expr - ))); - } - } - } - - match stack.len() { - 1 => Ok(stack.remove(0)), - _ => Err(ErrorCode::LogicalError( - "Logical error: this is expr rpn bug.", - )), - } - } - - pub fn analyze_function_arg(&self, arg_expr: &FunctionArgExpr) -> Result { - match arg_expr { - FunctionArgExpr::Expr(expr) => self.analyze(expr), - FunctionArgExpr::Wildcard => Ok(LegacyExpression::Wildcard), - FunctionArgExpr::QualifiedWildcard(_) => Err(ErrorCode::SyntaxException(std::format!( - "Unsupported arg statement: {}", - arg_expr - ))), - } - } - - fn analyze_value( - value: &Value, - args: &mut Vec, - typ: impl Into, - ) -> Result<()> { - args.push(ValueExprAnalyzer::analyze(value, typ)?); - Ok(()) - } - - fn analyze_inlist(&self, info: &InListInfo, args: &mut Vec) -> Result<()> { - let mut list = Vec::with_capacity(info.list_size); - for _ in 0..info.list_size { - match args.pop() { - None => { - return Err(ErrorCode::LogicalError("It's a bug.")); - } - Some(arg) => { - list.insert(0, arg); - } - } - } - - let expr = args - .pop() - .ok_or_else(|| ErrorCode::LogicalError("It's a bug."))?; - list.insert(0, expr); - - let op = if info.negated { - "NOT_IN".to_string() - } else { - "IN".to_string() - }; - - args.push(LegacyExpression::ScalarFunction { op, args: list }); - Ok(()) - } - - fn analyze_function( - &self, - info: &FunctionExprInfo, - args: &mut Vec, - ) -> Result<()> { - let mut arguments = Vec::with_capacity(info.args_count); - for _ in 0..info.args_count { - match args.pop() { - None => { - return Err(ErrorCode::LogicalError("It's a bug.")); - } - Some(arg) => { - arguments.insert(0, arg); - } - } - } - - args.push( - match AggregateFunctionFactory::instance().check(&info.name) { - true => { - return Err(ErrorCode::LogicalError( - "Unsupport aggregate function, it's a bug.", - )); - } - false => match info.kind { - OperatorKind::Unary => Self::unary_function(info, &arguments), - OperatorKind::Binary => Self::binary_function(info, &arguments), - OperatorKind::Other => Self::other_function(info, &arguments), - }, - }?, - ); - Ok(()) - } - - fn other_function( - info: &FunctionExprInfo, - args: &[LegacyExpression], - ) -> Result { - let op = info.name.clone(); - let arguments = args.to_owned(); - Ok(LegacyExpression::ScalarFunction { - op, - args: arguments, - }) - } - - fn unary_function( - info: &FunctionExprInfo, - args: &[LegacyExpression], - ) -> Result { - match args.is_empty() { - true => Err(ErrorCode::LogicalError("Unary operator must be one child.")), - false => Ok(LegacyExpression::UnaryExpression { - op: info.name.clone(), - expr: Box::new(args[0].to_owned()), - }), - } - } - - fn binary_function( - info: &FunctionExprInfo, - args: &[LegacyExpression], - ) -> Result { - let op = info.name.clone(); - match args.len() < 2 { - true => Err(ErrorCode::LogicalError( - "Binary operator must be two children.", - )), - false => Ok(LegacyExpression::BinaryExpression { - op, - left: Box::new(args[0].to_owned()), - right: Box::new(args[1].to_owned()), - }), - } - } - - fn analyze_identifier( - &self, - ident: &Ident, - arguments: &mut Vec, - ) -> Result<()> { - let column_name = ident.clone().value; - arguments.push(LegacyExpression::Column(column_name)); - Ok(()) - } - - fn analyze_identifiers( - &self, - idents: &[Ident], - arguments: &mut Vec, - ) -> Result<()> { - let mut names = Vec::with_capacity(idents.len()); - - for ident in idents { - names.push(ident.clone().value); - } - - arguments.push(LegacyExpression::QualifiedColumn(names)); - Ok(()) - } - - fn analyze_cast( - &self, - data_type: &DataTypeImpl, - pg_style: bool, - args: &mut Vec, - ) -> Result<()> { - match args.pop() { - None => Err(ErrorCode::LogicalError( - "Cast operator must be one children.", - )), - Some(inner_expr) => { - args.push(LegacyExpression::Cast { - expr: Box::new(inner_expr), - data_type: data_type.clone(), - pg_style, - }); - Ok(()) - } - } - } - - fn analyze_between(&self, negated: bool, args: &mut Vec) -> Result<()> { - if args.len() < 3 { - return Err(ErrorCode::SyntaxException( - "Between must be a ternary expression.", - )); - } - - let s_args = args.split_off(args.len() - 3); - let expression = s_args[0].clone(); - let low_expression = s_args[1].clone(); - let high_expression = s_args[2].clone(); - - match negated { - false => args.push( - expression - .gt_eq(low_expression) - .and(expression.lt_eq(high_expression)), - ), - true => args.push( - expression - .lt(low_expression) - .or(expression.gt(high_expression)), - ), - }; - - Ok(()) - } - - fn analyze_map_access(&self, keys: &[Value], args: &mut Vec) -> Result<()> { - match args.pop() { - None => Err(ErrorCode::LogicalError( - "MapAccess operator must be one children.", - )), - Some(inner_expr) => { - let path_name: String = keys - .iter() - .enumerate() - .map(|(i, k)| match k { - k @ Value::Number(_, _) => format!("[{}]", k), - Value::SingleQuotedString(s) => format!("[\"{}\"]", s), - Value::ColonString(s) => { - if i == 0 { - s.to_string() - } else { - format!(":{}", s) - } - } - Value::PeriodString(s) => format!(".{}", s), - _ => format!("[{}]", k), - }) - .collect(); - - let name = match keys[0] { - Value::ColonString(_) => format!("{}:{}", inner_expr.column_name(), path_name), - _ => format!("{}{}", inner_expr.column_name(), path_name), - }; - let path = LegacyExpression::create_literal(DataValue::String( - path_name.as_bytes().to_vec(), - )); - let arguments = vec![inner_expr, path]; - - args.push(LegacyExpression::MapAccess { - name, - args: arguments, - }); - Ok(()) - } - } - } - - fn analyze_array(&self, nums: usize, args: &mut Vec) -> Result<()> { - let mut values = Vec::with_capacity(nums); - let mut types = Vec::with_capacity(nums); - for _ in 0..nums { - match args.pop() { - None => { - break; - } - Some(inner_expr) => { - if let LegacyExpression::Literal { - value, data_type, .. - } = inner_expr - { - values.push(value); - types.push(data_type); - } - } - }; - } - if values.len() != nums { - return Err(ErrorCode::LogicalError(format!( - "Array must have {} children.", - nums - ))); - } - let inner_type = if types.is_empty() { - NullType::new_impl() - } else { - types - .iter() - .fold(Ok(types[0].clone()), |acc, v| merge_types(&acc?, v)) - .map_err(|e| ErrorCode::LogicalError(e.message()))? - }; - values.reverse(); - - let array_value = LegacyExpression::create_literal_with_type( - DataValue::Array(values), - ArrayType::new_impl(inner_type), - ); - args.push(array_value); - Ok(()) - } -} - -pub enum OperatorKind { - Unary, - Binary, - Other, -} - -pub struct FunctionExprInfo { - pub name: String, - pub distinct: bool, - pub args_count: usize, - pub kind: OperatorKind, - pub parameters: Vec, - pub over: Option, -} - -pub struct InListInfo { - pub list_size: usize, - pub negated: bool, -} - -pub enum ExprRPNItem { - Value(Value), - Identifier(Ident), - QualifiedIdentifier(Vec), - Function(FunctionExprInfo), - Wildcard, - Exists(Box), - Subquery(Box), - Cast(DataTypeImpl, bool), - Between(bool), - InList(InListInfo), - MapAccess(Vec), - Array(usize), -} - -impl ExprRPNItem { - pub fn function(name: String, args_count: usize) -> ExprRPNItem { - ExprRPNItem::Function(FunctionExprInfo { - name, - distinct: false, - args_count, - kind: OperatorKind::Other, - parameters: Vec::new(), - over: None, - }) - } - - pub fn binary_operator(name: String) -> ExprRPNItem { - ExprRPNItem::Function(FunctionExprInfo { - name, - distinct: false, - args_count: 2, - kind: OperatorKind::Binary, - parameters: Vec::new(), - over: None, - }) - } - - pub fn unary_operator(name: String) -> ExprRPNItem { - ExprRPNItem::Function(FunctionExprInfo { - name, - distinct: false, - args_count: 1, - kind: OperatorKind::Unary, - parameters: Vec::new(), - over: None, - }) - } -} - -pub struct ExprRPNBuilder { - rpn: Vec, -} - -impl ExprRPNBuilder { - pub fn build(expr: &Expr) -> Result> { - let mut builder = ExprRPNBuilder { rpn: Vec::new() }; - ExprTraverser::accept(expr, &mut builder)?; - Ok(builder.rpn) - } - - fn process_expr(&mut self, expr: &Expr) -> Result<()> { - match expr { - Expr::Value(value) => { - self.rpn.push(ExprRPNItem::Value(value.clone())); - } - Expr::Identifier(ident) => { - self.rpn.push(ExprRPNItem::Identifier(ident.clone())); - } - Expr::CompoundIdentifier(idents) => { - self.rpn - .push(ExprRPNItem::QualifiedIdentifier(idents.to_vec())); - } - Expr::IsNull(_) => { - self.rpn - .push(ExprRPNItem::function(String::from("is_null"), 1)); - } - Expr::IsNotNull(_) => { - self.rpn - .push(ExprRPNItem::function(String::from("is_not_null"), 1)); - } - Expr::UnaryOp { op, .. } => { - match op { - UnaryOperator::Plus => {} - // In order to distinguish it from binary addition. - UnaryOperator::Minus => self - .rpn - .push(ExprRPNItem::unary_operator("NEGATE".to_string())), - _ => self.rpn.push(ExprRPNItem::unary_operator(op.to_string())), - } - } - Expr::BinaryOp { op, .. } => { - self.rpn.push(ExprRPNItem::binary_operator(op.to_string())); - } - Expr::Exists(subquery) => { - self.rpn.push(ExprRPNItem::Exists(subquery.clone())); - } - Expr::Subquery(subquery) => { - self.rpn.push(ExprRPNItem::Subquery(subquery.clone())); - } - Expr::Function(function) => { - self.rpn.push(ExprRPNItem::Function(FunctionExprInfo { - name: function.name.to_string(), - args_count: function.args.len(), - distinct: function.distinct, - kind: OperatorKind::Other, - parameters: function.params.to_owned(), - over: function.over.clone(), - })); - } - Expr::Cast { - data_type, - pg_style, - .. - } => { - self.rpn.push(ExprRPNItem::Cast( - SQLCommon::make_data_type(data_type)?, - *pg_style, - )); - } - Expr::TryCast { data_type, .. } => { - let mut ty = SQLCommon::make_data_type(data_type)?; - if ty.can_inside_nullable() { - ty = NullableType::new_impl(ty) - } - self.rpn.push(ExprRPNItem::Cast(ty, false)); - } - Expr::TypedString { data_type, value } => { - self.rpn.push(ExprRPNItem::Value(Value::SingleQuotedString( - value.to_string(), - ))); - self.rpn.push(ExprRPNItem::Cast( - SQLCommon::make_data_type(data_type)?, - false, - )); - } - Expr::Position { .. } => { - let name = String::from("position"); - self.rpn.push(ExprRPNItem::function(name, 2)); - } - Expr::Substring { - substring_from, - substring_for, - .. - } => { - if substring_from.is_none() { - self.rpn - .push(ExprRPNItem::Value(Value::Number(String::from("1"), false))); - } - - let name = String::from("substring"); - match substring_for { - None => self.rpn.push(ExprRPNItem::function(name, 2)), - Some(_) => { - self.rpn.push(ExprRPNItem::function(name, 3)); - } - } - } - Expr::Between { negated, .. } => { - self.rpn.push(ExprRPNItem::Between(*negated)); - } - Expr::Tuple(exprs) => { - let len = exprs.len(); - - if len > 1 { - self.rpn - .push(ExprRPNItem::function(String::from("tuple"), len)); - } - } - Expr::InList { - expr: _, - list, - negated, - } => self.rpn.push(ExprRPNItem::InList(InListInfo { - list_size: list.len(), - negated: *negated, - })), - Expr::Extract { field, .. } => match field { - DateTimeField::Year => self - .rpn - .push(ExprRPNItem::function(String::from("to_year"), 1)), - DateTimeField::Month => self - .rpn - .push(ExprRPNItem::function(String::from("to_month"), 1)), - DateTimeField::Day => self - .rpn - .push(ExprRPNItem::function(String::from("to_day_of_month"), 1)), - DateTimeField::Hour => self - .rpn - .push(ExprRPNItem::function(String::from("to_hour"), 1)), - DateTimeField::Minute => self - .rpn - .push(ExprRPNItem::function(String::from("to_minute"), 1)), - DateTimeField::Second => self - .rpn - .push(ExprRPNItem::function(String::from("to_second"), 1)), - }, - Expr::MapAccess { keys, .. } => { - self.rpn.push(ExprRPNItem::MapAccess(keys.to_owned())); - } - Expr::Trim { trim_where, .. } => match trim_where { - None => self - .rpn - .push(ExprRPNItem::function(String::from("trim"), 1)), - Some(_) => { - self.rpn - .push(ExprRPNItem::function(String::from("trim"), 2)); - } - }, - Expr::Array(exprs) => { - self.rpn.push(ExprRPNItem::Array(exprs.len())); - } - _ => (), - } - - Ok(()) - } -} - -pub struct ExprTraverser; - -impl ExprTraverser { - pub fn accept(expr: &Expr, visitor: &mut V) -> Result<()> { - let expr = visitor.pre_visit(expr)?; - visitor.visit(&expr)?; - visitor.post_visit(&expr) - } -} - -#[async_trait] -pub trait ExprVisitor: Sized + Send { - fn pre_visit(&mut self, expr: &Expr) -> Result { - Ok(expr.clone()) - } - - fn visit(&mut self, expr: &Expr) -> Result<()> { - match expr { - Expr::Nested(expr) => ExprTraverser::accept(expr, self), - Expr::Value(value) => self.visit_value(value), - Expr::Identifier(ident) => self.visit_identifier(ident), - Expr::CompoundIdentifier(idents) => self.visit_identifiers(idents), - Expr::IsNull(expr) => self.visit_simple_function(expr, "is_null"), - Expr::IsNotNull(expr) => self.visit_simple_function(expr, "is_not_null"), - Expr::UnaryOp { op, expr } => self.visit_unary_expr(op, expr), - Expr::BinaryOp { left, op, right } => self.visit_binary_expr(left, op, right), - Expr::Exists(subquery) => self.visit_exists(subquery), - Expr::Subquery(subquery) => self.visit_subquery(subquery), - Expr::Function(function) => self.visit_function(function), - Expr::TryCast { expr, data_type } => self.visit_try_cast(expr, data_type), - Expr::Cast { - expr, - data_type, - pg_style, - } => self.visit_cast(expr, data_type, pg_style), - Expr::TypedString { data_type, value } => self.visit_typed_string(data_type, value), - Expr::Position { - substr_expr, - str_expr, - } => self.visit_position(substr_expr, str_expr), - Expr::Substring { - expr, - substring_from, - substring_for, - } => self.visit_substring(expr, substring_from, substring_for), - Expr::Between { - expr, - negated, - low, - high, - } => self.visit_between(expr, negated, low, high), - Expr::Tuple(exprs) => self.visit_tuple(exprs), - Expr::InList { expr, list, .. } => self.visit_inlist(expr, list), - Expr::Extract { field, expr } => self.visit_extract(field, expr), - Expr::MapAccess { column, keys } => self.visit_map_access(column, keys), - Expr::Trim { expr, trim_where } => self.visit_trim(expr, trim_where), - Expr::Array(exprs) => self.visit_array(exprs), - other => Result::Err(ErrorCode::SyntaxException(format!( - "Unsupported expression: {}, type: {:?}", - expr, other - ))), - } - } - - fn post_visit(&mut self, _expr: &Expr) -> Result<()> { - Ok(()) - } - - fn visit_inlist(&mut self, expr: &Expr, list: &[Expr]) -> Result<()> { - ExprTraverser::accept(expr, self)?; - for expr in list { - ExprTraverser::accept(expr, self)?; - } - Ok(()) - } - - fn visit_tuple(&mut self, exprs: &[Expr]) -> Result<()> { - match exprs.len() { - 0 => Err(ErrorCode::SyntaxException( - "Tuple must have at least one element.", - )), - 1 => ExprTraverser::accept(&exprs[0], self), - _ => { - for expr in exprs { - ExprTraverser::accept(expr, self)?; - } - - Ok(()) - } - } - } - - fn visit_wildcard(&mut self) -> Result<()> { - Ok(()) - } - - fn visit_value(&mut self, _value: &Value) -> Result<()> { - Ok(()) - } - - fn visit_identifier(&mut self, _ident: &Ident) -> Result<()> { - Ok(()) - } - - fn visit_identifiers(&mut self, _idents: &[Ident]) -> Result<()> { - Ok(()) - } - - fn visit_exists(&mut self, _subquery: &Query) -> Result<()> { - Ok(()) - } - - fn visit_subquery(&mut self, _subquery: &Query) -> Result<()> { - Ok(()) - } - - fn visit_function_arg(&mut self, arg_expr: &FunctionArgExpr) -> Result<()> { - match arg_expr { - FunctionArgExpr::Expr(expr) => ExprTraverser::accept(expr, self), - FunctionArgExpr::Wildcard => self.visit_wildcard(), - FunctionArgExpr::QualifiedWildcard(_) => Err(ErrorCode::SyntaxException(std::format!( - "Unsupported QualifiedWildcard: {}", - arg_expr - ))), - } - } - - fn visit_function(&mut self, function: &Function) -> Result<()> { - for function_arg in &function.args { - match function_arg { - FunctionArg::Named { arg, .. } => self.visit_function_arg(arg)?, - FunctionArg::Unnamed(arg) => self.visit_function_arg(arg)?, - }; - } - - if let Some(over) = &function.over { - for partition_by in &over.partition_by { - ExprTraverser::accept(partition_by, self)?; - } - for order_by in &over.order_by { - ExprTraverser::accept(&order_by.expr, self)?; - } - } - - Ok(()) - } - - fn visit_cast( - &mut self, - expr: &Expr, - _data_type: &AstDataType, - _pg_style: &bool, - ) -> Result<()> { - ExprTraverser::accept(expr, self) - } - - fn visit_try_cast(&mut self, expr: &Expr, _data_type: &AstDataType) -> Result<()> { - ExprTraverser::accept(expr, self) - } - - fn visit_typed_string(&mut self, _data_type: &AstDataType, _value: &str) -> Result<()> { - Ok(()) - } - - fn visit_simple_function(&mut self, expr: &Expr, _name: &str) -> Result<()> { - ExprTraverser::accept(expr, self) - } - - fn visit_unary_expr(&mut self, _op: &UnaryOperator, expr: &Expr) -> Result<()> { - ExprTraverser::accept(expr, self) - } - - fn visit_binary_expr(&mut self, left: &Expr, _op: &BinaryOperator, right: &Expr) -> Result<()> { - ExprTraverser::accept(left, self)?; - ExprTraverser::accept(right, self) - } - - fn visit_between( - &mut self, - expr: &Expr, - _negated: &bool, - low: &Expr, - high: &Expr, - ) -> Result<()> { - ExprTraverser::accept(expr, self)?; - ExprTraverser::accept(low, self)?; - ExprTraverser::accept(high, self) - } - - fn visit_position(&mut self, substr_expr: &Expr, str_expr: &Expr) -> Result<()> { - ExprTraverser::accept(substr_expr, self)?; - ExprTraverser::accept(str_expr, self) - } - - fn visit_substring( - &mut self, - expr: &Expr, - from: &Option>, - length: &Option>, - ) -> Result<()> { - ExprTraverser::accept(expr, self)?; - - if let Some(from) = from { - ExprTraverser::accept(from, self)?; - } - - if let Some(length) = length { - ExprTraverser::accept(length, self)?; - } - - Ok(()) - } - - fn visit_extract(&mut self, _field: &DateTimeField, expr: &Expr) -> Result<()> { - ExprTraverser::accept(expr, self) - } - - fn visit_map_access(&mut self, expr: &Expr, _keys: &[Value]) -> Result<()> { - ExprTraverser::accept(expr, self) - } - - fn visit_trim( - &mut self, - expr: &Expr, - trim_where: &Option<(TrimWhereField, Box)>, - ) -> Result<()> { - ExprTraverser::accept(expr, self)?; - - if let Some(trim_where) = trim_where { - ExprTraverser::accept(&trim_where.1, self)?; - } - Ok(()) - } - - fn visit_array(&mut self, exprs: &[Expr]) -> Result<()> { - for expr in exprs { - ExprTraverser::accept(expr, self)?; - } - Ok(()) - } -} - -#[async_trait] -impl ExprVisitor for ExprRPNBuilder { - fn pre_visit(&mut self, expr: &Expr) -> Result { - Ok(expr.clone()) - } - - fn post_visit(&mut self, expr: &Expr) -> Result<()> { - self.process_expr(expr) - } - - fn visit_wildcard(&mut self) -> Result<()> { - self.rpn.push(ExprRPNItem::Wildcard); - Ok(()) - } -} diff --git a/src/query/legacy-parser/src/analyzer/analyzer_value_expr.rs b/src/query/legacy-parser/src/analyzer/analyzer_value_expr.rs deleted file mode 100644 index b2ca3e871bf1..000000000000 --- a/src/query/legacy-parser/src/analyzer/analyzer_value_expr.rs +++ /dev/null @@ -1,123 +0,0 @@ -// Copyright 2021 Datafuse Labs. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -use common_datavalues::prelude::*; -use common_exception::ErrorCode; -use common_exception::Result; -use common_legacy_expression::LegacyExpression; -use sqlparser::ast::DateTimeField; -use sqlparser::ast::Value; - -use crate::sql_dialect::SQLDialect; - -pub struct ValueExprAnalyzer; - -impl ValueExprAnalyzer { - pub fn analyze(value: &Value, typ: impl Into) -> Result { - match value { - Value::Null => Self::analyze_null_value(), - Value::Boolean(value) => Self::analyze_bool_value(value), - Value::Number(value, _) => Self::analyze_number_value(value, None), - Value::HexStringLiteral(value) => Self::analyze_number_value(value, Some(16)), - Value::SingleQuotedString(value) => Self::analyze_string_value(value), - Value::DoubleQuotedString(value) => { - // Only MySQL dialect Support insert SQL like this: - // INSERT INTO t VALUES("val"); - // https://github.com/datafuselabs/databend/issues/4861 - if let SQLDialect::MySQL = typ.into() { - Self::analyze_string_value(value) - } else { - Result::Err(ErrorCode::SyntaxException(format!( - "Unsupported value expression: {}, type: {:?}", - value, - Value::DoubleQuotedString(value.to_string()) - ))) - } - } - Value::Interval { - leading_precision: Some(_), - .. - } - | Value::Interval { - fractional_seconds_precision: Some(_), - .. - } - | Value::Interval { - last_field: Some(_), - .. - } => Self::unsupported_interval(value), - Value::Interval { - value, - leading_field, - .. - } => Self::analyze_interval(value, leading_field), - other => Result::Err(ErrorCode::SyntaxException(format!( - "Unsupported value expression: {}, type: {:?}", - value, other - ))), - } - } - - fn analyze_null_value() -> Result { - Ok(LegacyExpression::create_literal(DataValue::Null)) - } - - fn analyze_bool_value(value: &bool) -> Result { - Ok(LegacyExpression::create_literal(DataValue::Boolean(*value))) - } - - fn analyze_number_value(value: &str, radix: Option) -> Result { - let literal = DataValue::try_from_literal(value, radix)?; - Ok(LegacyExpression::create_literal(literal)) - } - - fn analyze_string_value(value: &str) -> Result { - let data_value = DataValue::String(value.to_string().into_bytes()); - Ok(LegacyExpression::create_literal(data_value)) - } - - fn unsupported_interval(interval: &Value) -> Result { - // TODO: support parsing literal interval like '1 hour' - Err(ErrorCode::SyntaxException(format!( - "Unsupported interval expression: {}.", - interval - ))) - } - - fn analyze_interval(value: &str, unit: &Option) -> Result { - // We only accept i32 for number in "interval [num] [year|month|day|hour|minute|second]" - let num = value.parse::()?; - - // TODO: support default unit for interval - match unit { - None => Err(ErrorCode::SyntaxException( - "Interval must have unit, e.g: '1 HOUR'", - )), - Some(DateTimeField::Year) => Self::interval_literal(num, IntervalKind::Year), - Some(DateTimeField::Month) => Self::interval_literal(num, IntervalKind::Month), - Some(DateTimeField::Day) => Self::interval_literal(num, IntervalKind::Day), - Some(DateTimeField::Hour) => Self::interval_literal(num, IntervalKind::Hour), - Some(DateTimeField::Minute) => Self::interval_literal(num, IntervalKind::Minute), - Some(DateTimeField::Second) => Self::interval_literal(num, IntervalKind::Second), - } - } - - fn interval_literal(num: i32, kind: IntervalKind) -> Result { - Ok(LegacyExpression::Literal { - value: DataValue::Int64(num as i64), - column_name: Some(num.to_string()), - data_type: IntervalType::new_impl(kind), - }) - } -} diff --git a/src/query/legacy-parser/src/analyzer/mod.rs b/src/query/legacy-parser/src/analyzer/mod.rs deleted file mode 100644 index 5a080369a8db..000000000000 --- a/src/query/legacy-parser/src/analyzer/mod.rs +++ /dev/null @@ -1,16 +0,0 @@ -// Copyright 2022 Datafuse Labs. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -pub mod analyzer_expr_sync; -pub mod analyzer_value_expr; diff --git a/src/query/legacy-parser/src/lib.rs b/src/query/legacy-parser/src/lib.rs deleted file mode 100644 index cecc2f6d8bd2..000000000000 --- a/src/query/legacy-parser/src/lib.rs +++ /dev/null @@ -1,26 +0,0 @@ -// Copyright 2022 Datafuse Labs. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#![deny(unused_crate_dependencies)] - -pub use analyzer::analyzer_expr_sync; -pub use analyzer::analyzer_value_expr; -mod analyzer; -mod parser; -pub mod sql_common; -mod sql_dialect; - -pub use parser::ExprParser; -pub use parser::ExpressionParser; -pub use sql_dialect::SQLDialect; diff --git a/src/query/legacy-parser/src/parser/expr_parser.rs b/src/query/legacy-parser/src/parser/expr_parser.rs deleted file mode 100644 index 1d4c88d20196..000000000000 --- a/src/query/legacy-parser/src/parser/expr_parser.rs +++ /dev/null @@ -1,37 +0,0 @@ -// Copyright 2022 Datafuse Labs. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -use sqlparser::ast::Expr; -use sqlparser::dialect::MySqlDialect; -use sqlparser::parser::Parser; -use sqlparser::parser::ParserError; -use sqlparser::tokenizer::Token; -use sqlparser::tokenizer::Tokenizer; - -pub struct ExprParser; - -impl ExprParser { - pub fn parse_exprs(expr: &str) -> Result, ParserError> { - let dialect = &MySqlDialect {}; - let mut tokenizer = Tokenizer::new(dialect, expr); - let (tokens, position_map) = tokenizer.tokenize()?; - let mut parser = Parser::new(tokens, position_map, dialect); - - parser.expect_token(&Token::LParen)?; - let exprs = parser.parse_comma_separated(Parser::parse_expr)?; - parser.expect_token(&Token::RParen)?; - - Ok(exprs) - } -} diff --git a/src/query/legacy-parser/src/parser/expression_parser.rs b/src/query/legacy-parser/src/parser/expression_parser.rs deleted file mode 100644 index f51bc972ede1..000000000000 --- a/src/query/legacy-parser/src/parser/expression_parser.rs +++ /dev/null @@ -1,34 +0,0 @@ -// Copyright 2022 Datafuse Labs. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -use common_exception::Result; -use common_legacy_expression::LegacyExpression; - -use crate::analyzer_expr_sync::ExpressionSyncAnalyzer; -use crate::ExprParser; - -pub struct ExpressionParser; - -impl ExpressionParser { - pub fn parse_exprs(expr: &str) -> Result> { - let exprs = ExprParser::parse_exprs(expr)?; - let analyzer = ExpressionSyncAnalyzer::create(); - - let results = exprs - .iter() - .map(|expr| analyzer.analyze(expr)) - .collect::>>(); - results - } -} diff --git a/src/query/legacy-parser/src/parser/mod.rs b/src/query/legacy-parser/src/parser/mod.rs deleted file mode 100644 index 7374eb03f5bd..000000000000 --- a/src/query/legacy-parser/src/parser/mod.rs +++ /dev/null @@ -1,19 +0,0 @@ -// Copyright 2022 Datafuse Labs. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -mod expr_parser; -mod expression_parser; - -pub use expr_parser::ExprParser; -pub use expression_parser::ExpressionParser; diff --git a/src/query/legacy-parser/src/sql_common.rs b/src/query/legacy-parser/src/sql_common.rs deleted file mode 100644 index 2ec020bd6728..000000000000 --- a/src/query/legacy-parser/src/sql_common.rs +++ /dev/null @@ -1,156 +0,0 @@ -// Copyright 2021 Datafuse Labs. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -use std::collections::HashSet; - -use common_datavalues::prelude::*; -use common_exception::ErrorCode; -use common_exception::Result; -use sqlparser::ast::DataType as SQLDataType; -use unicode_segmentation::UnicodeSegmentation; - -pub struct SQLCommon; - -impl SQLCommon { - /// Maps the SQL type to the corresponding Arrow `DataType` - pub fn make_data_type(sql_type: &SQLDataType) -> Result { - match sql_type { - SQLDataType::TinyInt(_) => Ok(i8::to_data_type()), - SQLDataType::UnsignedTinyInt(_) => Ok(u8::to_data_type()), - SQLDataType::SmallInt(_) => Ok(i16::to_data_type()), - SQLDataType::UnsignedSmallInt(_) => Ok(u16::to_data_type()), - SQLDataType::Int(_) => Ok(i32::to_data_type()), - SQLDataType::UnsignedInt(_) => Ok(u32::to_data_type()), - SQLDataType::BigInt(_) => Ok(i64::to_data_type()), - SQLDataType::UnsignedBigInt(_) => Ok(u64::to_data_type()), - SQLDataType::Char(_) - | SQLDataType::Varchar(_) - | SQLDataType::String - | SQLDataType::Text => Ok(Vu8::to_data_type()), - - SQLDataType::Float(_) => Ok(f32::to_data_type()), - SQLDataType::Decimal(_, _) => Ok(f64::to_data_type()), - SQLDataType::Real | SQLDataType::Double => Ok(f64::to_data_type()), - SQLDataType::Boolean => Ok(bool::to_data_type()), - SQLDataType::Date => Ok(DateType::new_impl()), - // default precision is 6, microseconds - SQLDataType::Timestamp(_) | SQLDataType::DateTime(_) => Ok(TimestampType::new_impl()), - - SQLDataType::Array(sql_type, nullable) => { - let inner_data_type = Self::make_data_type(sql_type)?; - if *nullable { - if inner_data_type.is_null() { - return Result::Err(ErrorCode::IllegalDataType( - "The SQL data type ARRAY(NULL, NULL) is invalid", - )); - } - Ok(ArrayType::new_impl(NullableType::new_impl(inner_data_type))) - } else { - Ok(ArrayType::new_impl(inner_data_type)) - } - } - SQLDataType::Tuple(names, sql_types) => { - let mut inner_data_types = Vec::with_capacity(sql_types.len()); - for sql_type in sql_types { - let inner_data_type = Self::make_data_type(sql_type)?; - inner_data_types.push(inner_data_type); - } - match names { - Some(names) => { - let mut names_set = HashSet::with_capacity(names.len()); - for name in names.iter() { - if !names_set.insert(name.value.clone()) { - return Result::Err(ErrorCode::IllegalDataType( - "The names of tuple elements must be unique", - )); - } - } - let inner_names = names.iter().map(|v| v.value.clone()).collect::>(); - Ok(StructType::new_impl(Some(inner_names), inner_data_types)) - } - None => Ok(StructType::new_impl(None, inner_data_types)), - } - } - - // Custom types for databend: - // Custom(ObjectName([Ident { value: "uint8", quote_style: None }]) - SQLDataType::Custom(obj) if !obj.0.is_empty() => { - match obj.0[0].value.to_uppercase().as_str() { - "SIGNED" => Ok(i64::to_data_type()), - "UNSIGNED" => Ok(u64::to_data_type()), - - name => { - let factory = TypeFactory::instance(); - let data_type = factory.get(name)?; - Ok(data_type) - } - } - } - _ => Result::Err(ErrorCode::IllegalDataType(format!( - "The SQL data type {sql_type:?} is not implemented", - ))), - } - } - - pub fn short_sql(query: &str) -> String { - let query = query.trim_start(); - if query.len() >= 64 && query[..6].eq_ignore_ascii_case("INSERT") { - // keep first 64 graphemes - String::from_utf8( - query - .graphemes(true) - .take(64) - .flat_map(|g| g.as_bytes().iter()) - .copied() // copied converts &u8 into u8 - .chain(b"...".iter().copied()) - .collect::>(), - ) - .unwrap() // by construction, this cannot panic as we extracted unicode grapheme - } else { - query.to_string() - } - } -} - -#[cfg(test)] -mod test { - use crate::sql_common::SQLCommon; - - const LONG_INSERT_WITH_UNICODE_AT_TRUNCATION_POINT: &str = - "INSERT INTO `test` VALUES ('abcd', 'def'),('abcd', 'def'),('abcé', 'def');"; - - #[test] - #[should_panic] - fn test_invalid_string_truncation() { - // This test checks the INSERT statement did panic with byte truncated string. - // We need to do this to validate that the code of short_sql has fixed this panic! - format!("{}...", &LONG_INSERT_WITH_UNICODE_AT_TRUNCATION_POINT[..64]); - } - - #[test] - fn test_short_sql_truncation_on_unicode() { - // short insert into statements are not truncated - assert_eq!( - SQLCommon::short_sql("INSERT INTO `test` VALUES('abcd', 'def');"), - "INSERT INTO `test` VALUES('abcd', 'def');" - ); - // long one are at 64th char... - let shortned = SQLCommon::short_sql(LONG_INSERT_WITH_UNICODE_AT_TRUNCATION_POINT); - assert_eq!(shortned.len(), 68); // 64 chars with a multibyte one (é) + ... - assert_eq!( - shortned, - "INSERT INTO `test` VALUES ('abcd', 'def'),('abcd', 'def'),('abcé..." - ); - } -} diff --git a/src/query/legacy-parser/src/sql_dialect.rs b/src/query/legacy-parser/src/sql_dialect.rs deleted file mode 100644 index cf795248cffc..000000000000 --- a/src/query/legacy-parser/src/sql_dialect.rs +++ /dev/null @@ -1,18 +0,0 @@ -// Copyright 2022 Datafuse Labs. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -pub enum SQLDialect { - MySQL, - Other, -} diff --git a/src/query/legacy-planners/Cargo.toml b/src/query/legacy-planners/Cargo.toml deleted file mode 100644 index 372188f7645d..000000000000 --- a/src/query/legacy-planners/Cargo.toml +++ /dev/null @@ -1,30 +0,0 @@ -[package] -name = "common-legacy-planners" -version = { workspace = true } -authors = { workspace = true } -license = { workspace = true } -publish = { workspace = true } -edition = { workspace = true } - -[lib] -doctest = false -test = false - -[dependencies] # In alphabetical order -# Workspace dependencies -common-datavalues = { path = "../datavalues" } -common-exception = { path = "../../common/exception" } -common-legacy-expression = { path = "../legacy-expression" } -common-meta-app = { path = "../../meta/app" } -common-meta-types = { path = "../../meta/types" } - -# Github dependencies - -# Crates.io dependencies -once_cell = "1.15.0" -serde = { workspace = true } -serde_json = { workspace = true } -typetag = "0.2.3" - -[dev-dependencies] -pretty_assertions = "1.3.0" diff --git a/src/query/legacy-planners/src/lib.rs b/src/query/legacy-planners/src/lib.rs deleted file mode 100644 index 76f0c3e9b671..000000000000 --- a/src/query/legacy-planners/src/lib.rs +++ /dev/null @@ -1,31 +0,0 @@ -// Copyright 2021 Datafuse Labs. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -mod plan_delete; -mod plan_node_stage; -mod plan_node_stage_table; -mod plan_node_statistics; -mod plan_setting; -mod plan_sink; - -pub use plan_delete::DeletePlan; -pub use plan_node_stage::StageKind; -pub use plan_node_stage_table::StageTableInfo; -pub use plan_node_statistics::Statistics; -pub use plan_partition::PartInfo; -pub use plan_partition::PartInfoPtr; -pub use plan_partition::Partitions; -pub use plan_setting::SettingPlan; -pub use plan_setting::VarValue; -pub use plan_sink::SINK_SCHEMA; diff --git a/src/query/legacy-planners/src/plan_delete.rs b/src/query/legacy-planners/src/plan_delete.rs deleted file mode 100644 index 15edeadd55cd..000000000000 --- a/src/query/legacy-planners/src/plan_delete.rs +++ /dev/null @@ -1,52 +0,0 @@ -// Copyright 2021 Datafuse Labs. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -use std::sync::Arc; - -use common_datavalues::DataSchema; -use common_datavalues::DataSchemaRef; -use common_meta_app::schema::TableIdent; - -use crate::Projection; - -/// # TODO -/// -/// From @xuanwo -/// -/// Ideally, we need to use `Scalar` in DeletePlan.selection. But we met a -/// cycle deps here. So we have to change `selection` in String first, and -/// change into `Scalar` when our `Planner` has been moved out. -/// -/// At this stage, DeletePlan's selection expr will be parsed twice: -/// -/// - Parsed during `bind` to get column index and projection index. -/// - Parsed during `execution` to get the correct columns -/// -/// It's an ugly but necessary price to pay. Without this, we would sink in -/// hell forever. -#[derive(Clone, Debug, PartialEq, Eq)] -pub struct DeletePlan { - pub catalog_name: String, - pub database_name: String, - pub table_name: String, - pub table_id: TableIdent, - pub selection: Option, - pub projection: Projection, -} - -impl DeletePlan { - pub fn schema(&self) -> DataSchemaRef { - Arc::new(DataSchema::empty()) - } -} diff --git a/src/query/legacy-planners/src/plan_node_stage.rs b/src/query/legacy-planners/src/plan_node_stage.rs deleted file mode 100644 index 204b47ddc2e0..000000000000 --- a/src/query/legacy-planners/src/plan_node_stage.rs +++ /dev/null @@ -1,20 +0,0 @@ -// Copyright 2021 Datafuse Labs. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#[derive(serde::Serialize, serde::Deserialize, Clone, Debug, PartialEq, Eq)] -pub enum StageKind { - Normal, - Expansive, - Merge, -} diff --git a/src/query/legacy-planners/src/plan_node_stage_table.rs b/src/query/legacy-planners/src/plan_node_stage_table.rs deleted file mode 100644 index 3d6826747733..000000000000 --- a/src/query/legacy-planners/src/plan_node_stage_table.rs +++ /dev/null @@ -1,44 +0,0 @@ -// Copyright 2022 Datafuse Labs. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -use std::fmt::Debug; -use std::fmt::Formatter; - -use common_datavalues::DataSchemaRef; -use common_meta_types::UserStageInfo; - -#[derive(serde::Serialize, serde::Deserialize, Clone, PartialEq, Eq)] -pub struct StageTableInfo { - pub schema: DataSchemaRef, - pub stage_info: UserStageInfo, - pub path: String, - pub files: Vec, -} - -impl StageTableInfo { - pub fn schema(&self) -> DataSchemaRef { - self.schema.clone() - } - - pub fn desc(&self) -> String { - self.stage_info.stage_name.clone() - } -} - -impl Debug for StageTableInfo { - // Ignore the schema. - fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { - write!(f, "{:?}", self.stage_info) - } -} diff --git a/src/query/legacy-planners/src/plan_node_statistics.rs b/src/query/legacy-planners/src/plan_node_statistics.rs deleted file mode 100644 index 3a54c946de73..000000000000 --- a/src/query/legacy-planners/src/plan_node_statistics.rs +++ /dev/null @@ -1,92 +0,0 @@ -// Copyright 2021 Datafuse Labs. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -use common_meta_app::schema::TableInfo; - -#[derive(serde::Serialize, serde::Deserialize, PartialEq, Eq, Clone, Debug, Default)] -pub struct Statistics { - /// Total rows of the query read. - pub read_rows: usize, - /// Total bytes of the query read. - pub read_bytes: usize, - /// Number of partitions scanned, (after pruning) - pub partitions_scanned: usize, - /// Number of partitions, (before pruning) - pub partitions_total: usize, - /// Is the statistics exact. - pub is_exact: bool, -} - -impl Statistics { - pub fn new_estimated( - read_rows: usize, - read_bytes: usize, - partitions_scanned: usize, - partitions_total: usize, - ) -> Self { - Statistics { - read_rows, - read_bytes, - partitions_scanned, - partitions_total, - is_exact: false, - } - } - - pub fn new_exact( - read_rows: usize, - read_bytes: usize, - partitions_scanned: usize, - partitions_total: usize, - ) -> Self { - Statistics { - read_rows, - read_bytes, - partitions_scanned, - partitions_total, - is_exact: true, - } - } - - pub fn default_exact() -> Self { - Self { - is_exact: true, - ..Default::default() - } - } - - pub fn clear(&mut self) { - *self = Self::default(); - } - - pub fn get_description(&self, table_info: &TableInfo) -> String { - if self.read_rows > 0 { - format!( - "(Read from {} table, {} Read Rows:{}, Read Bytes:{}, Partitions Scanned:{}, Partitions Total:{})", - table_info.desc, - if self.is_exact { - "Exactly" - } else { - "Approximately" - }, - self.read_rows, - self.read_bytes, - self.partitions_scanned, - self.partitions_total, - ) - } else { - format!("(Read from {} table)", table_info.desc) - } - } -} diff --git a/src/query/legacy-planners/src/plan_setting.rs b/src/query/legacy-planners/src/plan_setting.rs deleted file mode 100644 index 3c2b1359dfe4..000000000000 --- a/src/query/legacy-planners/src/plan_setting.rs +++ /dev/null @@ -1,36 +0,0 @@ -// Copyright 2021 Datafuse Labs. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -use std::sync::Arc; - -use common_datavalues::DataSchema; -use common_datavalues::DataSchemaRef; - -#[derive(Clone, Debug, PartialEq, Eq)] -pub struct VarValue { - pub is_global: bool, - pub variable: String, - pub value: String, -} - -#[derive(Clone, Debug, PartialEq, Eq)] -pub struct SettingPlan { - pub vars: Vec, -} - -impl SettingPlan { - pub fn schema(&self) -> DataSchemaRef { - Arc::new(DataSchema::empty()) - } -} diff --git a/src/query/legacy-planners/src/plan_sink.rs b/src/query/legacy-planners/src/plan_sink.rs deleted file mode 100644 index 9e8b88671c90..000000000000 --- a/src/query/legacy-planners/src/plan_sink.rs +++ /dev/null @@ -1,23 +0,0 @@ -// Copyright 2021 Datafuse Labs. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -use common_datavalues::prelude::*; -use once_cell::sync::Lazy; - -pub static SINK_SCHEMA: Lazy = Lazy::new(|| { - DataSchemaRefExt::create(vec![ - DataField::new("seg_loc", Vu8::to_data_type()), - DataField::new("seg_info", Vu8::to_data_type()), - ]) -}); diff --git a/src/query/legacy-planners/tests/it/main.rs b/src/query/legacy-planners/tests/it/main.rs deleted file mode 100644 index 1a8376b01aab..000000000000 --- a/src/query/legacy-planners/tests/it/main.rs +++ /dev/null @@ -1,17 +0,0 @@ -// Copyright 2021 Datafuse Labs. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -mod plan_extras; -mod plan_partition; -mod test; diff --git a/src/query/legacy-planners/tests/it/plan_extras.rs b/src/query/legacy-planners/tests/it/plan_extras.rs deleted file mode 100644 index 6742d8eb6288..000000000000 --- a/src/query/legacy-planners/tests/it/plan_extras.rs +++ /dev/null @@ -1,27 +0,0 @@ -// Copyright 2021 Datafuse Labs. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -use common_exception::Result; -use common_legacy_planners::*; -use pretty_assertions::assert_eq; - -#[test] -fn test_plan_extras() -> Result<()> { - let extras = Extras::default(); - let expect = - "Extras { projection: None, filters: [], prewhere: None, limit: None, order_by: [] }"; - let actual = format!("{:?}", extras); - assert_eq!(expect, actual); - Ok(()) -} diff --git a/src/query/legacy-planners/tests/it/plan_partition.rs b/src/query/legacy-planners/tests/it/plan_partition.rs deleted file mode 100644 index 8c6e33f027f4..000000000000 --- a/src/query/legacy-planners/tests/it/plan_partition.rs +++ /dev/null @@ -1,119 +0,0 @@ -// Copyright 2022 Datafuse Labs. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -use std::any::Any; - -use common_exception::Result; -use common_legacy_planners::PartInfo; - -#[derive(serde::Serialize, serde::Deserialize, PartialEq)] -struct TestPartInfoA { - field_a: usize, - field_b: String, -} - -#[typetag::serde(name = "TestA")] -impl PartInfo for TestPartInfoA { - fn as_any(&self) -> &dyn Any { - self - } - - fn equals(&self, info: &Box) -> bool { - match info.as_any().downcast_ref::() { - None => false, - Some(other) => self == other, - } - } -} - -#[derive(serde::Serialize, serde::Deserialize, PartialEq)] -struct TestPartInfoB { - field_a: String, - field_b: u64, -} - -#[typetag::serde(name = "TestB")] -impl PartInfo for TestPartInfoB { - fn as_any(&self) -> &dyn Any { - self - } - - fn equals(&self, info: &Box) -> bool { - match info.as_any().downcast_ref::() { - None => false, - Some(other) => self == other, - } - } -} - -#[test] -fn test_serialize_part_info() -> Result<()> { - let info_a: Box = Box::new(TestPartInfoA { - field_a: 123, - field_b: String::from("456"), - }); - - let info_b: Box = Box::new(TestPartInfoB { - field_a: String::from("123"), - field_b: 456, - }); - - assert_eq!( - serde_json::to_string(&info_a)?, - "{\"type\":\"TestA\",\"field_a\":123,\"field_b\":\"456\"}" - ); - - assert_eq!( - serde_json::to_string(&info_b)?, - "{\"type\":\"TestB\",\"field_a\":\"123\",\"field_b\":456}" - ); - - Ok(()) -} - -#[test] -fn test_deserialize_part_info() -> Result<()> { - let info_a: Box = - serde_json::from_str("{\"type\":\"TestA\",\"field_a\":123,\"field_b\":\"456\"}")?; - let test_part_a = info_a.as_any().downcast_ref::().unwrap(); - assert_eq!(test_part_a.field_a, 123); - assert_eq!(test_part_a.field_b, String::from("456")); - - let info_b = serde_json::from_str::>( - "{\"type\":\"TestB\",\"field_a\":\"123\",\"field_b\":456}", - )?; - let test_part_a = info_b.as_any().downcast_ref::().unwrap(); - assert_eq!(test_part_a.field_a, String::from("123")); - assert_eq!(test_part_a.field_b, 456); - - Ok(()) -} - -#[test] -fn test_partial_equals_part_info() -> Result<()> { - let info_a: Box = Box::new(TestPartInfoA { - field_a: 123, - field_b: String::from("456"), - }); - - let info_b: Box = Box::new(TestPartInfoB { - field_a: String::from("123"), - field_b: 456, - }); - - assert_ne!(&info_a, &info_b); - assert_eq!(&info_a, &info_a); - assert_eq!(&info_b, &info_b); - Ok(()) -} diff --git a/src/query/legacy-planners/tests/it/test.rs b/src/query/legacy-planners/tests/it/test.rs deleted file mode 100644 index 142f659e15df..000000000000 --- a/src/query/legacy-planners/tests/it/test.rs +++ /dev/null @@ -1,34 +0,0 @@ -// Copyright 2021 Datafuse Labs. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -use std::any::Any; - -use common_legacy_planners::PartInfo; - -#[derive(serde::Serialize, serde::Deserialize, PartialEq)] -struct PlannerTestPartInfo {} - -#[typetag::serde(name = "planner_test")] -impl PartInfo for PlannerTestPartInfo { - fn as_any(&self) -> &dyn Any { - self - } - - fn equals(&self, info: &Box) -> bool { - match info.as_any().downcast_ref::() { - None => false, - Some(other) => self == other, - } - } -} diff --git a/src/query/pipeline/transforms/Cargo.toml b/src/query/pipeline/transforms/Cargo.toml index 15115d5a8af7..aa4a915aea60 100644 --- a/src/query/pipeline/transforms/Cargo.toml +++ b/src/query/pipeline/transforms/Cargo.toml @@ -9,12 +9,8 @@ doctest = false test = false [dependencies] -common-catalog = { path = "../../catalog" } common-datablocks = { path = "../../datablocks" } -common-datavalues = { path = "../../datavalues" } common-exception = { path = "../../../common/exception" } -common-legacy-expression = { path = "../../legacy-expression" } common-pipeline-core = { path = "../core" } async-trait = { version = "0.1.57", package = "async-trait-fn" } -tracing = "0.1.36" diff --git a/src/query/pipeline/transforms/src/processors/mod.rs b/src/query/pipeline/transforms/src/processors/mod.rs index 0080140ed6c4..561f7ae62a48 100644 --- a/src/query/pipeline/transforms/src/processors/mod.rs +++ b/src/query/pipeline/transforms/src/processors/mod.rs @@ -14,4 +14,3 @@ pub mod transforms; pub use transforms::Aborting; -pub use transforms::ExpressionExecutor; diff --git a/src/query/pipeline/transforms/src/processors/transforms/mod.rs b/src/query/pipeline/transforms/src/processors/transforms/mod.rs index 201056e5e72c..84475f6c28f1 100644 --- a/src/query/pipeline/transforms/src/processors/transforms/mod.rs +++ b/src/query/pipeline/transforms/src/processors/transforms/mod.rs @@ -15,8 +15,6 @@ pub mod transform; pub mod transform_block_compact; pub mod transform_compact; -pub mod transform_expression; -pub mod transform_expression_executor; pub mod transform_limit; pub mod transform_sort_merge; pub mod transform_sort_partial; @@ -24,8 +22,6 @@ pub mod transform_sort_partial; pub use transform::*; pub use transform_block_compact::*; pub use transform_compact::*; -pub use transform_expression::*; -pub use transform_expression_executor::ExpressionExecutor; pub use transform_limit::*; pub use transform_sort_merge::*; pub use transform_sort_partial::*; diff --git a/src/query/pipeline/transforms/src/processors/transforms/transform_expression.rs b/src/query/pipeline/transforms/src/processors/transforms/transform_expression.rs deleted file mode 100644 index 987e7f65db0c..000000000000 --- a/src/query/pipeline/transforms/src/processors/transforms/transform_expression.rs +++ /dev/null @@ -1,75 +0,0 @@ -// Copyright 2022 Datafuse Labs. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -use std::sync::Arc; - -use common_catalog::table_context::TableContext; -use common_datablocks::DataBlock; -use common_datavalues::DataSchemaRef; -use common_exception::Result; -use common_legacy_expression::LegacyExpression; -use common_pipeline_core::processors::port::InputPort; -use common_pipeline_core::processors::port::OutputPort; -use common_pipeline_core::processors::processor::ProcessorPtr; - -use crate::processors::transforms::transform::Transform; -use crate::processors::transforms::transform::Transformer; -use crate::processors::transforms::ExpressionExecutor; - -pub type ExpressionTransform = ExpressionTransformImpl; - -pub struct ExpressionTransformImpl { - executor: ExpressionExecutor, -} - -impl ExpressionTransformImpl -where Self: Transform -{ - pub fn try_create( - input: Arc, - output: Arc, - input_schema: DataSchemaRef, - output_schema: DataSchemaRef, - exprs: Vec, - ctx: Arc, - ) -> Result { - let executor = ExpressionExecutor::try_create( - ctx, - "expression executor", - input_schema, - output_schema, - exprs, - ALIAS_PROJECT, - )?; - executor.validate()?; - - Ok(Transformer::create(input, output, Self { executor })) - } -} - -impl Transform for ExpressionTransformImpl { - const NAME: &'static str = "ProjectionTransform"; - - fn transform(&mut self, data: DataBlock) -> Result { - self.executor.execute(&data) - } -} - -impl Transform for ExpressionTransformImpl { - const NAME: &'static str = "ExpressionTransform"; - - fn transform(&mut self, data: DataBlock) -> Result { - self.executor.execute(&data) - } -} diff --git a/src/query/pipeline/transforms/src/processors/transforms/transform_expression_executor.rs b/src/query/pipeline/transforms/src/processors/transforms/transform_expression_executor.rs deleted file mode 100644 index e6f20e2adb43..000000000000 --- a/src/query/pipeline/transforms/src/processors/transforms/transform_expression_executor.rs +++ /dev/null @@ -1,197 +0,0 @@ -// Copyright 2021 Datafuse Labs. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -use std::collections::HashMap; -use std::sync::Arc; - -use common_catalog::table_context::TableContext; -use common_datablocks::DataBlock; -use common_datavalues::prelude::*; -use common_exception::ErrorCode; -use common_exception::Result; -use common_legacy_expression::*; -use tracing::debug; - -/// ExpressionExecutor is a helper struct for expressions and projections -/// Aggregate functions is not covered, because all expressions in aggregate functions functions are executed. -#[derive(Clone)] -pub struct ExpressionExecutor { - // description of this executor - description: String, - _input_schema: DataSchemaRef, - output_schema: DataSchemaRef, - chain: Arc, - // whether to perform alias action in executor - alias_project: bool, - ctx: Arc, -} - -impl ExpressionExecutor { - pub fn try_create( - ctx: Arc, - description: &str, - input_schema: DataSchemaRef, - output_schema: DataSchemaRef, - exprs: Vec, - alias_project: bool, - ) -> Result { - let chain = ExpressionChain::try_create(input_schema.clone(), &exprs)?; - - Ok(Self { - description: description.to_string(), - _input_schema: input_schema, - output_schema, - chain: Arc::new(chain), - alias_project, - ctx, - }) - } - - pub fn validate(&self) -> Result<()> { - Ok(()) - } - - #[tracing::instrument( - level = "debug", - name = "transform_expression_execute", - skip(self, block) - )] - pub fn execute(&self, block: &DataBlock) -> Result { - debug!( - "({:#}) execute, actions: {:?}", - self.description, self.chain.actions - ); - - let mut column_map: HashMap<&str, ColumnWithField> = HashMap::new(); - - let mut alias_map: HashMap<&str, &ColumnWithField> = HashMap::new(); - - // supported a + 1 as b, a + 1 as c - // supported a + 1 as a, a as b - // !currently not supported a+1 as c, b+1 as c - let mut alias_action_map: HashMap<&str, Vec<&str>> = HashMap::new(); - - for f in block.schema().fields().iter() { - let column = - ColumnWithField::new(block.try_column_by_name(f.name())?.clone(), f.clone()); - column_map.insert(f.name(), column); - } - - let rows = block.num_rows(); - for action in self.chain.actions.iter() { - if let LegacyExpressionAction::Alias(alias) = action { - if let Some(v) = alias_action_map.get_mut(alias.arg_name.as_str()) { - v.push(alias.name.as_str()); - } else { - alias_action_map.insert(alias.arg_name.as_str(), vec![alias.name.as_str()]); - } - } - - if column_map.contains_key(action.column_name()) { - continue; - } - - match action { - LegacyExpressionAction::Input(input) => { - let column = block.try_column_by_name(&input.name)?.clone(); - let column = ColumnWithField::new( - column, - block.schema().field_with_name(&input.name)?.clone(), - ); - column_map.insert(input.name.as_str(), column); - } - LegacyExpressionAction::Function(f) => { - let column_with_field = self.execute_function(&mut column_map, f, rows)?; - column_map.insert(f.name.as_str(), column_with_field); - } - LegacyExpressionAction::Constant(constant) => { - let column = constant - .data_type - .create_constant_column(&constant.value, rows)?; - - let column = ColumnWithField::new( - column, - DataField::new(constant.name.as_str(), constant.data_type.clone()), - ); - - column_map.insert(constant.name.as_str(), column); - } - _ => {} - } - } - - if self.alias_project { - for (k, v) in alias_action_map.iter() { - let column = column_map.get(k).ok_or_else(|| { - ErrorCode::LogicalError("Arguments must be prepared before alias transform") - })?; - - for name in v.iter() { - match alias_map.insert(name, column) { - Some(_) => Err(ErrorCode::UnImplement(format!( - "Duplicate alias name :{}", - name - ))), - _ => Ok(()), - }?; - } - } - } - - let mut project_columns = Vec::with_capacity(self.output_schema.fields().len()); - for f in self.output_schema.fields() { - let column = match alias_map.get(f.name().as_str()) { - Some(data_column) => data_column, - None => column_map.get(f.name().as_str()).ok_or_else(|| { - ErrorCode::LogicalError(format!( - "Projection column: {} not exists in {:?}, there are bugs!", - f.name(), - column_map.keys() - )) - })?, - }; - project_columns.push(column.column().clone()); - } - // projection to remove unused columns - Ok(DataBlock::create( - self.output_schema.clone(), - project_columns, - )) - } - - #[inline] - fn execute_function( - &self, - column_map: &mut HashMap<&str, ColumnWithField>, - f: &ActionFunction, - rows: usize, - ) -> Result { - // check if it's cached - let mut arg_columns = Vec::with_capacity(f.arg_names.len()); - - for arg in f.arg_names.iter() { - let column = column_map.get(arg.as_str()).cloned().ok_or_else(|| { - ErrorCode::LogicalError("Arguments must be prepared before function transform") - })?; - arg_columns.push(column); - } - - let func_ctx = self.ctx.try_get_function_context()?; - let column = f.func.eval(func_ctx, &arg_columns, rows)?; - Ok(ColumnWithField::new( - column, - DataField::new(&f.name, f.return_type.clone()), - )) - } -} diff --git a/src/query/pipeline/transforms/src/processors/transforms/transform_sort_partial.rs b/src/query/pipeline/transforms/src/processors/transforms/transform_sort_partial.rs index f1407779f71d..c1577eee677e 100644 --- a/src/query/pipeline/transforms/src/processors/transforms/transform_sort_partial.rs +++ b/src/query/pipeline/transforms/src/processors/transforms/transform_sort_partial.rs @@ -16,10 +16,7 @@ use std::sync::Arc; use common_datablocks::DataBlock; use common_datablocks::SortColumnDescription; -use common_datavalues::DataSchemaRef; -use common_exception::ErrorCode; use common_exception::Result; -use common_legacy_expression::LegacyExpression; use common_pipeline_core::processors::port::InputPort; use common_pipeline_core::processors::port::OutputPort; use common_pipeline_core::processors::processor::ProcessorPtr; @@ -54,34 +51,3 @@ impl Transform for TransformSortPartial { DataBlock::sort_block(&block, &self.sort_columns_descriptions, self.limit) } } - -pub fn get_sort_descriptions( - schema: &DataSchemaRef, - exprs: &[LegacyExpression], -) -> Result> { - let mut sort_columns_descriptions = vec![]; - for x in exprs { - match *x { - LegacyExpression::Sort { - ref expr, - asc, - nulls_first, - .. - } => { - let column_name = expr.to_data_field(schema)?.name().clone(); - sort_columns_descriptions.push(SortColumnDescription { - column_name, - asc, - nulls_first, - }); - } - _ => { - return Result::Err(ErrorCode::BadTransformType(format!( - "Sort expression must be ExpressionPlan::Sort, but got: {:?}", - x - ))); - } - } - } - Ok(sort_columns_descriptions) -} diff --git a/src/query/planner/src/extras.rs b/src/query/planner/src/extras.rs index 51d92f51b7cb..2b5a979d0734 100644 --- a/src/query/planner/src/extras.rs +++ b/src/query/planner/src/extras.rs @@ -19,9 +19,7 @@ use common_datavalues::prelude::*; use once_cell::sync::Lazy; -use crate::PhysicalScalar; -use crate::plans::Projection; - +use crate::{PhysicalScalar, plans::Projection}; #[derive(serde::Serialize, serde::Deserialize, Clone, Debug, PartialEq, Eq)] pub enum StageKind { @@ -56,8 +54,8 @@ pub struct Extras { pub prewhere: Option, /// Optional limit to skip read pub limit: Option, - /// Optional order_by expression plan - pub order_by: Vec, + /// Optional order_by expression plan, asc, null_first + pub order_by: Vec<(PhysicalScalar, bool, bool)>, } impl Extras { diff --git a/src/query/planner/src/lib.rs b/src/query/planner/src/lib.rs index 1248f7e8e524..76fdfe17fce3 100644 --- a/src/query/planner/src/lib.rs +++ b/src/query/planner/src/lib.rs @@ -22,16 +22,16 @@ //! build pipelines, then our processes will produce result data blocks. mod physical_scalar; +mod partition; pub mod extras; pub mod stage_table; -pub mod plan_partition; pub mod plan_read_datasource; -mod partition; pub use physical_scalar::*; -pub use plan_partition::*; +pub use partition::*; pub use plan_read_datasource::*; // Plan will be used publicly. pub mod plans; + diff --git a/src/query/planner/src/partition.rs b/src/query/planner/src/partition.rs index 27434c3c29e4..6c703c40a29a 100644 --- a/src/query/planner/src/partition.rs +++ b/src/query/planner/src/partition.rs @@ -46,5 +46,6 @@ impl PartialEq for Box { } } +#[allow(dead_code)] pub type PartInfoPtr = Arc>; -pub type Partitions = Vec>>; +pub type Partitions = Vec; diff --git a/src/query/planner/src/physical_scalar.rs b/src/query/planner/src/physical_scalar.rs index f86b06b41a0b..375b40f94134 100644 --- a/src/query/planner/src/physical_scalar.rs +++ b/src/query/planner/src/physical_scalar.rs @@ -12,6 +12,9 @@ // See the License for the specific language governing permissions and // limitations under the License. +use std::fmt::Display; +use std::fmt::Formatter; + use common_datavalues::format_data_type_sql; use common_datavalues::DataTypeImpl; use common_datavalues::DataValue; @@ -34,7 +37,7 @@ pub enum PhysicalScalar { }, Function { name: String, - args: Vec<(PhysicalScalar, DataTypeImpl)>, + args: Vec, return_type: DataTypeImpl, }, @@ -61,7 +64,7 @@ impl PhysicalScalar { PhysicalScalar::Function { name, args, .. } => { let args = args .iter() - .map(|(arg, _)| arg.pretty_display()) + .map(|arg| arg.pretty_display()) .collect::>() .join(", "); format!("{}({})", name, args) @@ -76,6 +79,27 @@ impl PhysicalScalar { } } +impl Display for PhysicalScalar { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + match &self { + PhysicalScalar::Constant { value, .. } => write!(f, "{}", value), + PhysicalScalar::Function { name, args, .. } => write!( + f, + "{}({})", + name, + args.iter() + .map(|arg| format!("{}", arg)) + .collect::>() + .join(", ") + ), + PhysicalScalar::Cast { input, target } => { + write!(f, "CAST({} AS {})", input, format_data_type_sql(target)) + } + PhysicalScalar::IndexedVariable { index, .. } => write!(f, "${index}"), + } + } +} + #[derive(Clone, Debug, Eq, PartialEq, serde::Serialize, serde::Deserialize)] pub struct AggregateFunctionDesc { pub sig: AggregateFunctionSignature, diff --git a/src/query/planner/src/plan_partition.rs b/src/query/planner/src/plan_partition.rs deleted file mode 100644 index 27434c3c29e4..000000000000 --- a/src/query/planner/src/plan_partition.rs +++ /dev/null @@ -1,50 +0,0 @@ -// Copyright 2021 Datafuse Labs. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -use std::any::Any; -use std::fmt::Debug; -use std::fmt::Formatter; -use std::sync::Arc; - -#[typetag::serde(tag = "type")] -pub trait PartInfo: Send + Sync { - fn as_any(&self) -> &dyn Any; - - #[allow(clippy::borrowed_box)] - fn equals(&self, info: &Box) -> bool; -} - -impl Debug for Box { - fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { - match serde_json::to_string(self) { - Ok(str) => write!(f, "{}", str), - Err(_cause) => Err(std::fmt::Error {}), - } - } -} - -impl PartialEq for Box { - fn eq(&self, other: &Self) -> bool { - let this_type_id = self.as_any().type_id(); - let other_type_id = other.as_any().type_id(); - - match this_type_id == other_type_id { - true => self.equals(other), - false => false, - } - } -} - -pub type PartInfoPtr = Arc>; -pub type Partitions = Vec>>; diff --git a/src/query/service/Cargo.toml b/src/query/service/Cargo.toml index a0c8999f4af0..28f15928ca53 100644 --- a/src/query/service/Cargo.toml +++ b/src/query/service/Cargo.toml @@ -43,9 +43,6 @@ common-hashtable = { path = "../../common/hashtable" } common-hive-meta-store = { path = "../storages/hive-meta-store", optional = true } common-http = { path = "../../common/http" } common-io = { path = "../../common/io" } -common-legacy-expression = { path = "../legacy-expression" } -common-legacy-parser = { path = "../legacy-parser" } -common-legacy-planners = { path = "../legacy-planners" } common-management = { path = "../management" } common-meta-api = { path = "../../meta/api" } common-meta-app = { path = "../../meta/app" } diff --git a/src/query/service/src/interpreters/fragments/v2/plan_fragment.rs b/src/query/service/src/interpreters/fragments/v2/plan_fragment.rs index 442774dcf1c6..2807d168f809 100644 --- a/src/query/service/src/interpreters/fragments/v2/plan_fragment.rs +++ b/src/query/service/src/interpreters/fragments/v2/plan_fragment.rs @@ -16,7 +16,7 @@ use std::sync::Arc; use common_exception::ErrorCode; use common_exception::Result; -use common_legacy_planners::ReadDataSourcePlan; +use common_planner::ReadDataSourcePlan; use super::Fragmenter; use crate::api::DataExchange; diff --git a/src/query/service/src/interpreters/interpreter_copy_v2.rs b/src/query/service/src/interpreters/interpreter_copy_v2.rs index 3826bd6d7ec3..13ba55db56d4 100644 --- a/src/query/service/src/interpreters/interpreter_copy_v2.rs +++ b/src/query/service/src/interpreters/interpreter_copy_v2.rs @@ -19,9 +19,9 @@ use common_base::base::GlobalIORuntime; use common_datavalues::prelude::*; use common_exception::ErrorCode; use common_exception::Result; -use common_legacy_planners::ReadDataSourcePlan; -use common_legacy_planners::SourceInfo; -use common_legacy_planners::StageTableInfo; +use common_planner::ReadDataSourcePlan; +use common_planner::SourceInfo; +use common_planner::StageTableInfo; use common_meta_app::schema::GetTableCopiedFileReq; use common_meta_app::schema::TableCopiedFileInfo; use common_meta_app::schema::UpsertTableCopiedFileReq; diff --git a/src/query/service/src/interpreters/interpreter_delete.rs b/src/query/service/src/interpreters/interpreter_delete.rs index 3a5aa7fdec4b..c590b19cb84f 100644 --- a/src/query/service/src/interpreters/interpreter_delete.rs +++ b/src/query/service/src/interpreters/interpreter_delete.rs @@ -16,7 +16,7 @@ use std::sync::Arc; use common_datavalues::DataSchemaRef; use common_exception::Result; -use common_legacy_planners::DeletePlan; +use common_planner::DeletePlan; use crate::interpreters::Interpreter; use crate::pipelines::PipelineBuildResult; diff --git a/src/query/service/src/interpreters/interpreter_explain_v2.rs b/src/query/service/src/interpreters/interpreter_explain_v2.rs index bf0ba5daca4d..b31f2c974747 100644 --- a/src/query/service/src/interpreters/interpreter_explain_v2.rs +++ b/src/query/service/src/interpreters/interpreter_explain_v2.rs @@ -18,7 +18,7 @@ use common_datablocks::DataBlock; use common_datavalues::prelude::*; use common_exception::ErrorCode; use common_exception::Result; -use common_planner::MetadataRef; +use common_sql::MetadataRef; use super::fragments::Fragmenter; use super::QueryFragmentsActions; diff --git a/src/query/service/src/interpreters/interpreter_insert_v2.rs b/src/query/service/src/interpreters/interpreter_insert_v2.rs index 815730fc972a..1411ae69454d 100644 --- a/src/query/service/src/interpreters/interpreter_insert_v2.rs +++ b/src/query/service/src/interpreters/interpreter_insert_v2.rs @@ -32,8 +32,8 @@ use common_io::prelude::NestedCheckpointReader; use common_pipeline_sources::processors::sources::AsyncSource; use common_pipeline_sources::processors::sources::AsyncSourcer; use common_pipeline_transforms::processors::transforms::Transform; -use common_planner::Metadata; -use common_planner::MetadataRef; +use crate::Metadata; +use common_sql::MetadataRef; use parking_lot::Mutex; use parking_lot::RwLock; diff --git a/src/query/service/src/interpreters/interpreter_select_v2.rs b/src/query/service/src/interpreters/interpreter_select_v2.rs index 51fd6a9bb435..07047bc9cbae 100644 --- a/src/query/service/src/interpreters/interpreter_select_v2.rs +++ b/src/query/service/src/interpreters/interpreter_select_v2.rs @@ -16,7 +16,7 @@ use std::sync::Arc; use common_datavalues::DataSchemaRef; use common_exception::Result; -use common_planner::MetadataRef; +use common_sql::MetadataRef; use super::plan_schedulers::schedule_query_v2; use crate::interpreters::Interpreter; diff --git a/src/query/service/src/interpreters/interpreter_setting.rs b/src/query/service/src/interpreters/interpreter_setting.rs index 7662bfc5a11d..a407d8a3159f 100644 --- a/src/query/service/src/interpreters/interpreter_setting.rs +++ b/src/query/service/src/interpreters/interpreter_setting.rs @@ -17,7 +17,7 @@ use std::sync::Arc; use chrono_tz::Tz; use common_exception::ErrorCode; use common_exception::Result; -use common_legacy_planners::SettingPlan; +use common_planner::SettingPlan; use crate::interpreters::Interpreter; use crate::pipelines::PipelineBuildResult; diff --git a/src/query/service/src/interpreters/interpreter_table_recluster.rs b/src/query/service/src/interpreters/interpreter_table_recluster.rs index a43abee89de0..d1c6b6fbb11e 100644 --- a/src/query/service/src/interpreters/interpreter_table_recluster.rs +++ b/src/query/service/src/interpreters/interpreter_table_recluster.rs @@ -16,7 +16,7 @@ use std::sync::Arc; use std::time::SystemTime; use common_exception::Result; -use common_legacy_planners::Extras; +use common_planner::extras::Extras; use crate::interpreters::Interpreter; use crate::interpreters::InterpreterClusteringHistory; diff --git a/src/query/service/src/pipelines/pipeline_builder.rs b/src/query/service/src/pipelines/pipeline_builder.rs index dafea4cb50e7..6015943e93f3 100644 --- a/src/query/service/src/pipelines/pipeline_builder.rs +++ b/src/query/service/src/pipelines/pipeline_builder.rs @@ -269,8 +269,8 @@ impl PipelineBuilder { predicate = PhysicalScalar::Function { name: "and_filters".to_string(), args: vec![ - (predicate.clone(), predicate.data_type()), - (pred.clone(), pred.data_type()), + predicate.clone() , + pred.clone() , ], return_type: func.return_type(), }; diff --git a/src/query/service/src/pipelines/processors/transforms/hash_join/desc.rs b/src/query/service/src/pipelines/processors/transforms/hash_join/desc.rs index 4f9cd483af01..0b6ad6053bb8 100644 --- a/src/query/service/src/pipelines/processors/transforms/hash_join/desc.rs +++ b/src/query/service/src/pipelines/processors/transforms/hash_join/desc.rs @@ -101,10 +101,7 @@ impl HashJoinDesc { let func = FunctionFactory::instance().get("and", &data_types)?; condition = PhysicalScalar::Function { name: "and".to_string(), - args: vec![ - (condition, left_type), - (other_condition.clone(), right_type), - ], + args: vec![condition, other_condition.clone()], return_type: func.return_type(), }; } diff --git a/src/query/service/src/pipelines/processors/transforms/mod.rs b/src/query/service/src/pipelines/processors/transforms/mod.rs index 5ef252b6d850..8984c51724e1 100644 --- a/src/query/service/src/pipelines/processors/transforms/mod.rs +++ b/src/query/service/src/pipelines/processors/transforms/mod.rs @@ -69,5 +69,4 @@ pub use transform_right_semi_anti_join::RightSemiAntiJoinCompactor; pub use transform_right_semi_anti_join::TransformRightSemiAntiJoin; pub use transform_sort_merge::SortMergeCompactor; pub use transform_sort_merge::TransformSortMerge; -pub use transform_sort_partial::get_sort_descriptions; pub use transform_sort_partial::TransformSortPartial; diff --git a/src/query/service/src/servers/http/v1/download.rs b/src/query/service/src/servers/http/v1/download.rs index 0893c9cec9f4..c18ba9fcbbe2 100644 --- a/src/query/service/src/servers/http/v1/download.rs +++ b/src/query/service/src/servers/http/v1/download.rs @@ -18,9 +18,9 @@ use async_stream::stream; use common_catalog::table_context::TableContext; use common_exception::Result; use common_formats::output_format::OutputFormatType; -use common_legacy_planners::Extras; -use common_legacy_planners::ReadDataSourcePlan; -use common_legacy_planners::SourceInfo; +use common_planner::extras::Extras; +use common_planner::ReadDataSourcePlan; +use common_planner::SourceInfo; use futures::StreamExt; use crate::sessions::QueryContext; diff --git a/src/query/service/src/sessions/query_ctx.rs b/src/query/service/src/sessions/query_ctx.rs index 5b66a61d0950..b113cb04d303 100644 --- a/src/query/service/src/sessions/query_ctx.rs +++ b/src/query/service/src/sessions/query_ctx.rs @@ -35,11 +35,11 @@ use common_exception::Result; use common_functions::scalars::FunctionContext; use common_io::prelude::FormatSettings; use common_legacy_expression::LegacyExpression; -use common_legacy_planners::PartInfoPtr; -use common_legacy_planners::Partitions; -use common_legacy_planners::ReadDataSourcePlan; -use common_legacy_planners::SourceInfo; -use common_legacy_planners::StageTableInfo; +use common_planner::PartInfoPtr; +use common_planner::Partitions; +use common_planner::ReadDataSourcePlan; +use common_planner::SourceInfo; +use common_planner::StageTableInfo; use common_meta_app::schema::TableInfo; use common_meta_types::RoleInfo; use common_meta_types::UserInfo; diff --git a/src/query/service/src/stream/table_read_block_stream.rs b/src/query/service/src/stream/table_read_block_stream.rs index 66da319d4267..41f7258c7f47 100644 --- a/src/query/service/src/stream/table_read_block_stream.rs +++ b/src/query/service/src/stream/table_read_block_stream.rs @@ -15,7 +15,7 @@ use std::sync::Arc; use common_exception::Result; -use common_legacy_planners::ReadDataSourcePlan; +use common_planner::ReadDataSourcePlan; use common_streams::SendableDataBlockStream; use crate::pipelines::executor::ExecutorSettings; diff --git a/src/query/service/src/table_functions/async_crash_me.rs b/src/query/service/src/table_functions/async_crash_me.rs index 380c5c83cc48..0815e0226cb7 100644 --- a/src/query/service/src/table_functions/async_crash_me.rs +++ b/src/query/service/src/table_functions/async_crash_me.rs @@ -26,10 +26,10 @@ use common_datavalues::chrono::Utc; use common_datavalues::prelude::*; use common_exception::Result; use common_legacy_expression::LegacyExpression; -use common_legacy_planners::Extras; -use common_legacy_planners::Partitions; -use common_legacy_planners::ReadDataSourcePlan; -use common_legacy_planners::Statistics; +use common_planner::extras::Extras; +use common_planner::Partitions; +use common_planner::ReadDataSourcePlan; +use common_planner::extras::Statistics; use common_meta_app::schema::TableIdent; use common_meta_app::schema::TableInfo; use common_meta_app::schema::TableMeta; diff --git a/src/query/service/src/table_functions/memory_block_part.rs b/src/query/service/src/table_functions/memory_block_part.rs index cbafce7bcc46..c8d585b9057a 100644 --- a/src/query/service/src/table_functions/memory_block_part.rs +++ b/src/query/service/src/table_functions/memory_block_part.rs @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -use common_legacy_planners::Partitions; +use common_planner::Partitions; use crate::table_functions::numbers_part::NumbersPartInfo; diff --git a/src/query/service/src/table_functions/numbers_part.rs b/src/query/service/src/table_functions/numbers_part.rs index 97a1cd0988ae..8b6d5c2a1702 100644 --- a/src/query/service/src/table_functions/numbers_part.rs +++ b/src/query/service/src/table_functions/numbers_part.rs @@ -17,8 +17,8 @@ use std::sync::Arc; use common_exception::ErrorCode; use common_exception::Result; -use common_legacy_planners::PartInfo; -use common_legacy_planners::PartInfoPtr; +use common_planner::PartInfo; +use common_planner::PartInfoPtr; #[derive(serde::Serialize, serde::Deserialize, PartialEq, Eq)] pub struct NumbersPartInfo { diff --git a/src/query/service/src/table_functions/numbers_table.rs b/src/query/service/src/table_functions/numbers_table.rs index d7e7d913a4a8..c3008ed585a6 100644 --- a/src/query/service/src/table_functions/numbers_table.rs +++ b/src/query/service/src/table_functions/numbers_table.rs @@ -25,18 +25,17 @@ use common_datavalues::prelude::*; use common_exception::ErrorCode; use common_exception::Result; use common_legacy_expression::LegacyExpression; -use common_legacy_planners::Extras; -use common_legacy_planners::PartInfoPtr; -use common_legacy_planners::Partitions; -use common_legacy_planners::ReadDataSourcePlan; -use common_legacy_planners::Statistics; +use common_planner::extras::Extras; +use common_planner::PartInfoPtr; +use common_planner::Partitions; +use common_planner::ReadDataSourcePlan; +use common_planner::extras::Statistics; use common_meta_app::schema::TableIdent; use common_meta_app::schema::TableInfo; use common_meta_app::schema::TableMeta; use crate::pipelines::processors::port::OutputPort; use crate::pipelines::processors::processor::ProcessorPtr; -use crate::pipelines::processors::transforms::get_sort_descriptions; use crate::pipelines::processors::EmptySource; use crate::pipelines::processors::SyncSource; use crate::pipelines::processors::SyncSourcer; diff --git a/src/query/service/src/table_functions/sync_crash_me.rs b/src/query/service/src/table_functions/sync_crash_me.rs index 354df7359726..6b4197496bc3 100644 --- a/src/query/service/src/table_functions/sync_crash_me.rs +++ b/src/query/service/src/table_functions/sync_crash_me.rs @@ -25,10 +25,10 @@ use common_datavalues::chrono::Utc; use common_datavalues::prelude::*; use common_exception::Result; use common_legacy_expression::LegacyExpression; -use common_legacy_planners::Extras; -use common_legacy_planners::Partitions; -use common_legacy_planners::ReadDataSourcePlan; -use common_legacy_planners::Statistics; +use common_planner::extras::Extras; +use common_planner::Partitions; +use common_planner::ReadDataSourcePlan; +use common_planner::extras::Statistics; use common_meta_app::schema::TableIdent; use common_meta_app::schema::TableInfo; use common_meta_app::schema::TableMeta; diff --git a/src/query/service/tests/it/sql/planner/format/mod.rs b/src/query/service/tests/it/sql/planner/format/mod.rs index df414e6bd61f..767552872684 100644 --- a/src/query/service/tests/it/sql/planner/format/mod.rs +++ b/src/query/service/tests/it/sql/planner/format/mod.rs @@ -20,7 +20,7 @@ use common_datavalues::DataValue; use common_meta_app::schema::TableIdent; use common_meta_app::schema::TableInfo; use common_meta_app::schema::TableMeta; -use common_planner::Metadata; +use crate::Metadata; use databend_query::sql::optimizer::SExpr; use databend_query::sql::planner::plans::JoinType; use databend_query::sql::plans::BoundColumnRef; diff --git a/src/query/service/tests/it/storages/fuse/operations/read_plan.rs b/src/query/service/tests/it/storages/fuse/operations/read_plan.rs index ba7bf06c5077..f763239beeea 100644 --- a/src/query/service/tests/it/storages/fuse/operations/read_plan.rs +++ b/src/query/service/tests/it/storages/fuse/operations/read_plan.rs @@ -23,8 +23,8 @@ use common_exception::Result; use common_fuse_meta::meta::BlockMeta; use common_fuse_meta::meta::ColumnMeta; use common_fuse_meta::meta::ColumnStatistics; -use common_legacy_planners::Extras; -use common_legacy_planners::Projection; +use common_planner::extras::Extras; +use common_planner::plans::Projection; use common_storages_fuse::ColumnLeaves; use databend_query::storages::fuse::ColumnLeaf; use databend_query::storages::fuse::FuseTable; diff --git a/src/query/service/tests/it/storages/fuse/pruning.rs b/src/query/service/tests/it/storages/fuse/pruning.rs index b3e72062fd35..456f98fb523a 100644 --- a/src/query/service/tests/it/storages/fuse/pruning.rs +++ b/src/query/service/tests/it/storages/fuse/pruning.rs @@ -26,7 +26,7 @@ use common_legacy_expression::col; use common_legacy_expression::lit; use common_legacy_expression::sub; use common_legacy_expression::LegacyExpression; -use common_legacy_planners::Extras; +use common_planner::extras::Extras; use common_storages_fuse::FuseTable; use databend_query::interpreters::CreateTableInterpreterV2; use databend_query::interpreters::Interpreter; diff --git a/src/query/service/tests/it/storages/fuse/table.rs b/src/query/service/tests/it/storages/fuse/table.rs index a0ddbd53802a..c2c2e1aa47f1 100644 --- a/src/query/service/tests/it/storages/fuse/table.rs +++ b/src/query/service/tests/it/storages/fuse/table.rs @@ -17,8 +17,8 @@ use std::default::Default; use common_ast::ast::Engine; use common_base::base::tokio; use common_exception::Result; -use common_legacy_planners::ReadDataSourcePlan; -use common_legacy_planners::SourceInfo; +use common_planner::ReadDataSourcePlan; +use common_planner::SourceInfo; use common_meta_app::schema::TableInfo; use common_planner::plans::AlterTableClusterKeyPlan; use common_planner::plans::DropTableClusterKeyPlan; diff --git a/src/query/service/tests/it/storages/fuse/table_test_fixture.rs b/src/query/service/tests/it/storages/fuse/table_test_fixture.rs index 4eb73c135d90..a3f7b0c5702a 100644 --- a/src/query/service/tests/it/storages/fuse/table_test_fixture.rs +++ b/src/query/service/tests/it/storages/fuse/table_test_fixture.rs @@ -22,7 +22,7 @@ use common_datablocks::DataBlock; use common_datavalues::prelude::*; use common_exception::Result; use common_legacy_expression::LegacyExpression; -use common_legacy_planners::Extras; +use common_planner::extras::Extras; use common_meta_app::schema::DatabaseMeta; use common_planner::plans::CreateDatabasePlan; use common_storage::StorageFsConfig; diff --git a/src/query/service/tests/it/storages/memory.rs b/src/query/service/tests/it/storages/memory.rs index cd06e3bcab71..02ed228e3dd1 100644 --- a/src/query/service/tests/it/storages/memory.rs +++ b/src/query/service/tests/it/storages/memory.rs @@ -17,7 +17,7 @@ use common_datablocks::assert_blocks_sorted_eq; use common_datablocks::DataBlock; use common_datavalues::prelude::*; use common_exception::Result; -use common_legacy_planners::*; +use common_planner::*; use common_meta_app::schema::TableInfo; use common_meta_app::schema::TableMeta; use databend_query::sessions::TableContext; diff --git a/src/query/service/tests/it/storages/result/result_table.rs b/src/query/service/tests/it/storages/result/result_table.rs index da46804a91bb..4defda991951 100644 --- a/src/query/service/tests/it/storages/result/result_table.rs +++ b/src/query/service/tests/it/storages/result/result_table.rs @@ -23,8 +23,8 @@ use common_datavalues::DataField; use common_datavalues::DataSchema; use common_datavalues::ToDataType; use common_exception::Result; -use common_legacy_planners::ReadDataSourcePlan; -use common_legacy_planners::SourceInfo; +use common_planner::ReadDataSourcePlan; +use common_planner::SourceInfo; use common_meta_types::UserIdentity; use databend_query::sessions::TableContext; use databend_query::storages::result::ResultQueryInfo; diff --git a/src/query/service/tests/it/table_functions/numbers_table.rs b/src/query/service/tests/it/table_functions/numbers_table.rs index 35b7111e8b74..72ac048034a1 100644 --- a/src/query/service/tests/it/table_functions/numbers_table.rs +++ b/src/query/service/tests/it/table_functions/numbers_table.rs @@ -16,7 +16,7 @@ use common_base::base::tokio; use common_datavalues::prelude::*; use common_exception::Result; use common_legacy_expression::*; -use common_legacy_planners::*; +use common_planner::*; use databend_query::interpreters::InterpreterFactory; use databend_query::sessions::SessionManager; use databend_query::sessions::SessionType; diff --git a/src/query/sql/Cargo.toml b/src/query/sql/Cargo.toml index a11a66378690..045cb64957e8 100644 --- a/src/query/sql/Cargo.toml +++ b/src/query/sql/Cargo.toml @@ -32,10 +32,7 @@ common-meta-app = { path = "../../meta/app" } common-meta-store = { path = "../../meta/store" } common-meta-types = { path = "../../meta/types" } common-metrics = { path = "../../common/metrics" } -common-pipeline-core = { path = "../pipeline/core" } -common-pipeline-sinks = { path = "../pipeline/sinks" } common-pipeline-sources = { path = "../pipeline/sources" } -common-pipeline-transforms = { path = "../pipeline/transforms" } common-planner = { path = "../planner" } common-settings = { path = "../settings" } common-storage = { path = "../../common/storage" } diff --git a/src/query/sql/src/evaluator/mod.rs b/src/query/sql/src/evaluator/mod.rs index b6765cf24db4..368c22840deb 100644 --- a/src/query/sql/src/evaluator/mod.rs +++ b/src/query/sql/src/evaluator/mod.rs @@ -19,6 +19,7 @@ mod scalar; use common_datavalues::ColumnRef; use common_datavalues::DataTypeImpl; pub use eval_node::EvalNode; +pub use physical_scalar::PhysicalScalarOp; pub struct Evaluator; diff --git a/src/query/sql/src/evaluator/physical_scalar.rs b/src/query/sql/src/evaluator/physical_scalar.rs index 98051276c77f..2300eac38b82 100644 --- a/src/query/sql/src/evaluator/physical_scalar.rs +++ b/src/query/sql/src/evaluator/physical_scalar.rs @@ -20,11 +20,63 @@ use common_exception::Result; use common_functions::scalars::in_evaluator; use common_functions::scalars::CastFunction; use common_functions::scalars::FunctionFactory; +use common_planner::PhysicalScalar; use crate::evaluator::eval_node::EvalNode; use crate::evaluator::Evaluator; -use common_planner::PhysicalScalar; +pub trait PhysicalScalarOp { + fn binary_op(&self, name: &str, other: &Self) -> Result; + + fn and(&self, other: &Self) -> Result { + self.binary_op("and", other) + } + + fn or(&self, other: &Self) -> Result { + self.binary_op("or", other) + } + + fn eq(&self, other: &Self) -> Result { + self.binary_op("=", other) + } + + fn not_eq(&self, other: &Self) -> Result { + self.binary_op("!=", other) + } + + + fn gt_eq(&self, other: &Self) -> Result { + self.binary_op(">=", other) + } + + fn gt(&self, other: &Self) -> Result { + self.binary_op(">", other) + } + + fn lt_eq(&self, other: &Self) -> Result { + self.binary_op("<=", other) + } + + fn lt(&self, other: &Self) -> Result { + self.binary_op("=", other) + } +} + +impl PhysicalScalarOp for PhysicalScalar { + fn binary_op(&self, name: &str, other: &PhysicalScalar) -> Result { + let func = + FunctionFactory::instance().get(name, &[&self.data_type(), &other.data_type()])?; + + Ok(PhysicalScalar::Function { + name: name.to_owned(), + args: vec![ + self.clone(), + other.clone() + ], + return_type: func.return_type(), + }) + } +} impl Evaluator { pub fn eval_physical_scalars(physical_scalars: &[PhysicalScalar]) -> Result> { @@ -43,7 +95,7 @@ impl Evaluator { PhysicalScalar::Function { name, args, .. } => { let eval_args: Vec = args .iter() - .map(|(v, _)| Self::eval_physical_scalar(v)) + .map(|v| Self::eval_physical_scalar(v)) .collect::>()?; // special case for in function @@ -55,9 +107,9 @@ impl Evaluator { } = &eval_args[1] { let func = if name_lower.as_str() == "not_in" { - in_evaluator::create_by_values::(args[0].1.clone(), vs.clone()) + in_evaluator::create_by_values::(args[0].data_type(), vs.clone()) } else { - in_evaluator::create_by_values::(args[0].1.clone(), vs.clone()) + in_evaluator::create_by_values::(args[0].data_type(), vs.clone()) }?; return Ok(EvalNode::Function { @@ -71,7 +123,9 @@ impl Evaluator { } } - let data_types: Vec<&DataTypeImpl> = args.iter().map(|(_, v)| v).collect(); + let data_types: Vec = args.iter().map(|v| v.data_type()).collect(); + let data_types: Vec<&DataTypeImpl> = data_types.iter().map(|v| v).collect(); + let func = FunctionFactory::instance().get(name, &data_types)?; Ok(EvalNode::Function { func, diff --git a/src/query/sql/src/executor/format.rs b/src/query/sql/src/executor/format.rs index 777ec3227e3c..bd04bc2ff83b 100644 --- a/src/query/sql/src/executor/format.rs +++ b/src/query/sql/src/executor/format.rs @@ -15,10 +15,8 @@ use common_ast::ast::FormatTreeNode; use common_exception::ErrorCode; use common_exception::Result; +use common_planner::extras::StageKind; use common_planner::AggregateFunctionDesc; -use crate::planner::IndexType; -use crate::planner::MetadataRef; -use crate::planner::DUMMY_TABLE_INDEX; use itertools::Itertools; use super::AggregateFinal; @@ -31,9 +29,11 @@ use super::Limit; use super::PhysicalPlan; use super::Project; use super::Sort; -use super::StageKind; use super::TableScan; use super::UnionAll; +use crate::planner::IndexType; +use crate::planner::MetadataRef; +use crate::planner::DUMMY_TABLE_INDEX; impl PhysicalPlan { pub fn format(&self, metadata: MetadataRef) -> Result { @@ -41,8 +41,6 @@ impl PhysicalPlan { } } - - fn to_format_tree(plan: &PhysicalPlan, metadata: &MetadataRef) -> Result> { match plan { PhysicalPlan::TableScan(plan) => table_scan_to_format_tree(plan, metadata), @@ -81,7 +79,7 @@ fn table_scan_to_format_tree( extras .filters .iter() - .map(|f| f.column_name()) + .map(|f| f.pretty_display()) .collect::>() .join(", ") }); @@ -121,8 +119,8 @@ fn filter_to_format_tree(plan: &Filter, metadata: &MetadataRef) -> Result>>()? + .map(|scalar| scalar.pretty_display()) + .collect::>() .join(", "); Ok(FormatTreeNode::with_children("Filter".to_string(), vec![ FormatTreeNode::new(format!("filters: [{filter}]")), @@ -154,8 +152,8 @@ fn eval_scalar_to_format_tree( let scalars = plan .scalars .iter() - .map(|(scalar, _)| scalar.pretty_display(metadata)) - .collect::>>()? + .map(|(scalar, _)| scalar.pretty_display()) + .collect::>() .join(", "); Ok(FormatTreeNode::with_children( "EvalScalar".to_string(), @@ -166,19 +164,19 @@ fn eval_scalar_to_format_tree( )) } -pub fn pretty_display_agg_desc(desc: &AggregateFunctionDesc, metadata: &MetadataRef) -> Result { - Ok(format!( +pub fn pretty_display_agg_desc(desc: &AggregateFunctionDesc, metadata: &MetadataRef) -> String { + format!( "{}({})", desc.sig.name, desc.arg_indices .iter() .map(|&index| { let column = metadata.read().column(index).clone(); - Ok(column.name().to_string()) + column.name().to_string() }) - .collect::>>()? + .collect::>() .join(", ") - )) + ) } fn aggregate_partial_to_format_tree( @@ -199,8 +197,8 @@ fn aggregate_partial_to_format_tree( let agg_funcs = plan .agg_funcs .iter() - .map(|agg| agg.pretty_display(metadata)) - .collect::>>()? + .map(|agg| pretty_display_agg_desc(agg, metadata)) + .collect::>() .join(", "); Ok(FormatTreeNode::with_children( "AggregatePartial".to_string(), @@ -230,8 +228,8 @@ fn aggregate_final_to_format_tree( let agg_funcs = plan .agg_funcs .iter() - .map(|agg| agg.pretty_display(metadata)) - .collect::>>()? + .map(|agg| pretty_display_agg_desc(agg, metadata)) + .collect::>() .join(", "); Ok(FormatTreeNode::with_children( "AggregateFinal".to_string(), @@ -288,20 +286,20 @@ fn hash_join_to_format_tree( let build_keys = plan .build_keys .iter() - .map(|scalar| scalar.pretty_display(metadata)) - .collect::>>()? + .map(|scalar| scalar.pretty_display()) + .collect::>() .join(", "); let probe_keys = plan .probe_keys .iter() - .map(|scalar| scalar.pretty_display(metadata)) - .collect::>>()? + .map(|scalar| scalar.pretty_display()) + .collect::>() .join(", "); let filters = plan .non_equi_conditions .iter() - .map(|filter| filter.pretty_display(metadata)) - .collect::>>()? + .map(|filter| filter.pretty_display()) + .collect::>() .join(", "); let mut build_child = to_format_tree(&plan.build, metadata)?; @@ -330,8 +328,8 @@ fn exchange_to_format_tree( "Hash({})", plan.keys .iter() - .map(|scalar| { scalar.pretty_display(metadata) }) - .collect::>>()? + .map(|scalar| { scalar.pretty_display() }) + .collect::>() .join(", ") ), StageKind::Expansive => "Broadcast".to_string(), diff --git a/src/query/sql/src/executor/mod.rs b/src/query/sql/src/executor/mod.rs index 6c60ad22e2af..07294d99e2af 100644 --- a/src/query/sql/src/executor/mod.rs +++ b/src/query/sql/src/executor/mod.rs @@ -17,20 +17,11 @@ mod physical_plan; mod physical_plan_builder; mod physical_plan_display; mod physical_plan_visitor; -mod physical_scalar; +pub mod table_read_plan; mod util; -mod plan_extras; -mod plan_read_datasource; - pub use physical_plan::*; pub use physical_plan_builder::PhysicalPlanBuilder; pub use physical_plan_builder::PhysicalScalarBuilder; pub use physical_plan_visitor::PhysicalPlanReplacer; -pub use physical_scalar::*; pub use util::*; -pub use plan_extras::*; - -pub use plan_read_datasource::ReadDataSourcePlan; -pub use plan_read_datasource::ToReadDataSourcePlan; -pub use plan_read_datasource::SourceInfo; diff --git a/src/query/sql/src/executor/physical_plan.rs b/src/query/sql/src/executor/physical_plan.rs index c1d87cdc407b..9c64e22abcc8 100644 --- a/src/query/sql/src/executor/physical_plan.rs +++ b/src/query/sql/src/executor/physical_plan.rs @@ -24,19 +24,18 @@ use common_datavalues::NullableType; use common_datavalues::ToDataType; use common_datavalues::Vu8; use common_exception::Result; -use common_planner::PhysicalScalar; -use super::ReadDataSourcePlan; use common_meta_app::schema::TableInfo; -use common_planner::IndexType; - -use crate::planner::plans::SINK_SCHEMA; +use common_planner::extras::StageKind; +use common_planner::extras::SINK_SCHEMA; +use common_planner::AggregateFunctionDesc; +use common_planner::PhysicalScalar; +use common_planner::ReadDataSourcePlan; +use common_planner::SortDesc; -use super::StageKind; -use super::AggregateFunctionDesc; -use super::SortDesc; use crate::optimizer::ColumnSet; use crate::plans::JoinType; use crate::ColumnBinding; +use crate::IndexType; pub type ColumnID = String; diff --git a/src/query/sql/src/executor/physical_plan_builder.rs b/src/query/sql/src/executor/physical_plan_builder.rs index b648f76ba60a..7f62a71cfe06 100644 --- a/src/query/sql/src/executor/physical_plan_builder.rs +++ b/src/query/sql/src/executor/physical_plan_builder.rs @@ -24,14 +24,16 @@ use common_datavalues::DataSchemaRefExt; use common_exception::ErrorCode; use common_exception::Result; use common_functions::scalars::FunctionFactory; -use super::plan_extras::Extras; -use common_planner::IndexType; -use common_planner::Metadata; -use common_planner::MetadataRef; -use common_planner::DUMMY_TABLE_INDEX; +use common_planner::extras::Extras; +use common_planner::extras::PrewhereInfo; +use common_planner::extras::StageKind; +use common_planner::plans::Projection; +use common_planner::AggregateFunctionDesc; +use common_planner::AggregateFunctionSignature; +use common_planner::PhysicalScalar; +use common_planner::SortDesc; use itertools::Itertools; -use super::plan_read_datasource::ToReadDataSourcePlan; use super::AggregateFinal; use super::AggregatePartial; use super::Exchange as PhysicalExchange; @@ -40,15 +42,11 @@ use super::HashJoin; use super::Limit; use super::Sort; use super::TableScan; -use crate::executor::PrewhereInfo; +use crate::executor::table_read_plan::ToReadDataSourcePlan; use crate::executor::util::check_physical; -use crate::executor::AggregateFunctionDesc; -use crate::executor::AggregateFunctionSignature; use crate::executor::ColumnID; use crate::executor::EvalScalar; use crate::executor::PhysicalPlan; -use common_planner::PhysicalScalar; -use crate::executor::SortDesc; use crate::executor::UnionAll; use crate::optimizer::ColumnSet; use crate::optimizer::SExpr; @@ -56,10 +54,13 @@ use crate::plans::AggregateMode; use crate::plans::AndExpr; use crate::plans::Exchange; use crate::plans::PhysicalScan; -use crate::plans::Projection; use crate::plans::RelOperator; use crate::plans::Scalar; +use crate::IndexType; +use crate::Metadata; +use crate::MetadataRef; use crate::ScalarExpr; +use crate::DUMMY_TABLE_INDEX; pub struct PhysicalPlanBuilder { metadata: MetadataRef, @@ -481,7 +482,7 @@ impl PhysicalPlanBuilder { let projection = Self::build_projection(&metadata, table_schema, &scan.columns, has_inner_column); - let builder = PhysicalScalarBuilder::new(table_schema); + let mut builder = PhysicalScalarBuilder::new(table_schema); let push_down_filters = scan .push_down_predicates @@ -497,25 +498,24 @@ impl PhysicalPlanBuilder { let prewhere_info = scan .prewhere .as_ref() - .map(|prewhere| { - let predicate = prewhere - .predicates - .iter().fold(None, |acc: Option, &x: &Scalar| { - match acc { + .map(|prewhere| { + let predicate = + prewhere + .predicates + .iter() + .fold(None, |acc: Option, x: &Scalar| match acc { Some(acc) => { let func = FunctionFactory::instance() - .get("and", &[&acc.data_type(), &x.data_type()]) - .unwrap(); + .get("and", &[&acc.data_type(), &x.data_type()]) + .unwrap(); Some(Scalar::AndExpr(AndExpr { - left: Box::new(acc), + left: Box::new(acc), right: Box::new(x.clone()), - return_type: Box::new(func.return_type()) , + return_type: Box::new(func.return_type()), })) - }, + } None => Some(x.clone()), - } - }); - + }); assert!( !predicate.is_some(), @@ -561,14 +561,21 @@ impl PhysicalPlanBuilder { .order_by .clone() .map(|items| { - items.into_iter().map(|item| { - let metadata = self.metadata.read(); - let ty = metadata.column(item.index).data_type(); - let name = metadata.column(item.index).name(); - let scalar = PhysicalScalar::IndexedVariable { index: item.index, data_type: ty.clone(), display_name: name.to_string() }; - - Ok((scalar, item.asc, item.nulls_first)) - }).collect::>>() + items + .into_iter() + .map(|item| { + let metadata = self.metadata.read(); + let ty = metadata.column(item.index).data_type(); + let name = metadata.column(item.index).name(); + let scalar = PhysicalScalar::IndexedVariable { + index: item.index, + data_type: ty.clone(), + display_name: name.to_string(), + }; + + Ok((scalar, item.asc, item.nulls_first)) + }) + .collect::>>() }) .transpose()?; @@ -620,24 +627,24 @@ impl<'a> PhysicalScalarBuilder<'a> { Scalar::AndExpr(and) => Ok(PhysicalScalar::Function { name: "and".to_string(), args: vec![ - (self.build(&and.left)?, and.left.data_type()), - (self.build(&and.right)?, and.right.data_type()), + self.build(&and.left)?, + self.build(&and.right)? ], return_type: and.data_type(), }), Scalar::OrExpr(or) => Ok(PhysicalScalar::Function { name: "or".to_string(), args: vec![ - (self.build(&or.left)?, or.left.data_type()), - (self.build(&or.right)?, or.right.data_type()), + self.build(&or.left)? , + self.build(&or.right)? ], return_type: or.data_type(), }), Scalar::ComparisonExpr(comp) => Ok(PhysicalScalar::Function { name: comp.op.to_func_name(), args: vec![ - (self.build(&comp.left)?, comp.left.data_type()), - (self.build(&comp.right)?, comp.right.data_type()), + self.build(&comp.left)? , + self.build(&comp.right)? ], return_type: comp.data_type(), }), @@ -647,7 +654,7 @@ impl<'a> PhysicalScalarBuilder<'a> { .arguments .iter() .zip(func.arg_types.iter()) - .map(|(arg, typ)| Ok((self.build(arg)?, typ.clone()))) + .map(|(arg, typ)| Ok(self.build(arg)?)) .collect::>()?, return_type: *func.return_type.clone(), }), diff --git a/src/query/sql/src/executor/physical_plan_display.rs b/src/query/sql/src/executor/physical_plan_display.rs index 13fb4e39660c..2b6832464030 100644 --- a/src/query/sql/src/executor/physical_plan_display.rs +++ b/src/query/sql/src/executor/physical_plan_display.rs @@ -15,7 +15,6 @@ use std::fmt::Display; use std::fmt::Formatter; -use common_datavalues::format_data_type_sql; use itertools::Itertools; use super::DistributedInsertSelect; @@ -29,7 +28,6 @@ use crate::executor::Filter; use crate::executor::HashJoin; use crate::executor::Limit; use crate::executor::PhysicalPlan; -use common_planner::PhysicalScalar; use crate::executor::Project; use crate::executor::Sort; use crate::executor::TableScan; @@ -95,27 +93,6 @@ impl Display for Filter { } } -impl Display for PhysicalScalar { - fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { - match &self { - PhysicalScalar::Constant { value, .. } => write!(f, "{}", value), - PhysicalScalar::Function { name, args, .. } => write!( - f, - "{}({})", - name, - args.iter() - .map(|(arg, _)| format!("{}", arg)) - .collect::>() - .join(", ") - ), - PhysicalScalar::Cast { input, target } => { - write!(f, "CAST({} AS {})", input, format_data_type_sql(target)) - } - PhysicalScalar::IndexedVariable { index, .. } => write!(f, "${index}"), - } - } -} - impl Display for Project { fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { if let Ok(input_schema) = self.input.output_schema() { diff --git a/src/query/sql/src/executor/plan_extras.rs b/src/query/sql/src/executor/plan_extras.rs deleted file mode 100644 index 1b9310aa36c7..000000000000 --- a/src/query/sql/src/executor/plan_extras.rs +++ /dev/null @@ -1,74 +0,0 @@ -// Copyright 2021 Datafuse Labs. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -use std::collections::BTreeMap; -use std::fmt::Debug; -use std::fmt::Formatter; - -use common_datavalues::DataSchema; - -use crate::plans::Projection; - -use common_planner::PhysicalScalar; - -#[derive(serde::Serialize, serde::Deserialize, Clone, Debug, PartialEq)] -pub struct PrewhereInfo { - /// columns to be ouput be prewhere scan - pub output_columns: Projection, - /// columns used for prewhere - pub prewhere_columns: Projection, - /// remain_columns = scan.columns - need_columns - pub remain_columns: Projection, - /// filter for prewhere - pub filter: PhysicalScalar, -} - -/// Extras is a wrapper for push down items. -#[derive(serde::Serialize, serde::Deserialize, Clone, Debug, PartialEq, Default)] -pub struct Extras { - /// Optional column indices to use as a projection - pub projection: Option, - /// Optional filter expression plan - /// split_conjunctions by `and` operator - pub filters: Vec, - /// Optional prewhere information - /// used for prewhere optimization - pub prewhere: Option, - /// Optional limit to skip read - pub limit: Option, - /// Optional order_by expression plan, - /// expression: PhysicalScalar, asc: bool, nulls_first - pub order_by: Vec<(PhysicalScalar, bool, bool)>, -} - -impl Extras { - pub fn default() -> Self { - Extras { - projection: None, - filters: vec![], - prewhere: None, - limit: None, - order_by: vec![], - } - } -} - - - -#[derive(serde::Serialize, serde::Deserialize, Clone, Debug, PartialEq, Eq)] -pub enum StageKind { - Normal, - Expansive, - Merge, -} diff --git a/src/query/sql/src/executor/plan_read_datasource.rs b/src/query/sql/src/executor/table_read_plan.rs similarity index 55% rename from src/query/sql/src/executor/plan_read_datasource.rs rename to src/query/sql/src/executor/table_read_plan.rs index 5bbc3394d470..3128454de03a 100644 --- a/src/query/sql/src/executor/plan_read_datasource.rs +++ b/src/query/sql/src/executor/table_read_plan.rs @@ -19,105 +19,11 @@ use common_catalog::table::Table; use common_catalog::table_context::TableContext; use common_datavalues::DataField; use common_datavalues::DataSchema; -use common_datavalues::DataSchemaRef; use common_exception::Result; -use common_planner::PhysicalScalar; -use common_meta_app::schema::TableInfo; - -use super::Extras; - -use crate::planner::plans::Partitions; -use crate::planner::plans::Projection; -use crate::planner::plans::StageTableInfo; -use crate::planner::plans::Statistics; - - -#[derive(serde::Serialize, serde::Deserialize, Clone, Debug, PartialEq, Eq)] -pub enum SourceInfo { - // Normal table source, `fuse/system`. - TableSource(TableInfo), - - // Internal/External source, like `s3://` or `azblob://`. - StageSource(StageTableInfo), -} - -impl SourceInfo { - pub fn schema(&self) -> Arc { - match self { - SourceInfo::TableSource(table_info) => table_info.schema(), - SourceInfo::StageSource(table_info) => table_info.schema(), - } - } - - pub fn desc(&self) -> String { - match self { - SourceInfo::TableSource(table_info) => table_info.desc.clone(), - SourceInfo::StageSource(table_info) => table_info.desc(), - } - } -} - -// TODO: Delete the scan plan field, but it depends on plan_parser:L394 -#[derive(serde::Serialize, serde::Deserialize, Clone, Debug, PartialEq)] -pub struct ReadDataSourcePlan { - // TODO catalog id is better - pub catalog: String, - pub source_info: SourceInfo, - - /// Required fields to scan. - /// - /// After optimization, only a sub set of the fields in `table_info.schema().fields` are needed. - /// The key is the column_index of `ColumnEntry` in `Metadata`. - /// - /// If it is None, one should use `table_info.schema().fields()`. - pub scan_fields: Option>, - - pub parts: Partitions, - pub statistics: Statistics, - pub description: String, - - pub tbl_args: Option>, - pub push_downs: Option, -} - -impl ReadDataSourcePlan { - /// Return schema after the projection - pub fn schema(&self) -> DataSchemaRef { - self.scan_fields - .clone() - .map(|x| { - let fields: Vec<_> = x.iter().map(|(_, f)| f.clone()).collect(); - Arc::new(self.source_info.schema().project_by_fields(fields)) - }) - .unwrap_or_else(|| self.source_info.schema()) - } - - /// Return designated required fields or all fields in a hash map. - pub fn scan_fields(&self) -> BTreeMap { - self.scan_fields - .clone() - .unwrap_or_else(|| self.source_info.schema().fields_map()) - } - - pub fn projections(&self) -> Projection { - let default_proj = || { - (0..self.source_info.schema().fields().len()) - .into_iter() - .collect::>() - }; - - if let Some(Extras { - projection: Some(prj), - .. - }) = &self.push_downs - { - prj.clone() - } else { - Projection::Columns(default_proj()) - } - } -} - +use common_planner::extras::Extras; +use common_planner::plans::Projection; +use common_planner::ReadDataSourcePlan; +use common_planner::SourceInfo; #[async_trait::async_trait] pub trait ToReadDataSourcePlan { diff --git a/src/query/sql/src/executor/util.rs b/src/query/sql/src/executor/util.rs index 7d5eb86ad5ff..f230fa09bc37 100644 --- a/src/query/sql/src/executor/util.rs +++ b/src/query/sql/src/executor/util.rs @@ -14,12 +14,12 @@ use common_exception::ErrorCode; use common_exception::Result; -use common_planner::IndexType; use once_cell::sync::Lazy; use regex::Regex; use crate::optimizer::SExpr; use crate::plans::Operator; +use crate::IndexType; /// Check if all plans in an expression are physical plans pub fn check_physical(expression: &SExpr) -> bool { diff --git a/src/query/sql/src/planner/binder/aggregate.rs b/src/query/sql/src/planner/binder/aggregate.rs index fbdfd8dc8317..9af2219cacbf 100644 --- a/src/query/sql/src/planner/binder/aggregate.rs +++ b/src/query/sql/src/planner/binder/aggregate.rs @@ -22,7 +22,6 @@ use common_ast::DisplayError; use common_datavalues::DataTypeImpl; use common_exception::ErrorCode; use common_exception::Result; -use common_planner::MetadataRef; use crate::binder::scalar::ScalarBinder; use crate::binder::select::SelectList; @@ -44,6 +43,7 @@ use crate::plans::Scalar; use crate::plans::ScalarExpr; use crate::plans::ScalarItem; use crate::BindContext; +use crate::MetadataRef; #[derive(Default, Clone, PartialEq, Eq, Debug)] pub struct AggregateInfo { diff --git a/src/query/sql/src/planner/binder/bind_context.rs b/src/query/sql/src/planner/binder/bind_context.rs index 49639b389f84..c007e1cbe434 100644 --- a/src/query/sql/src/planner/binder/bind_context.rs +++ b/src/query/sql/src/planner/binder/bind_context.rs @@ -25,13 +25,13 @@ use common_datavalues::DataSchemaRefExt; use common_datavalues::DataTypeImpl; use common_exception::ErrorCode; use common_exception::Result; -use common_planner::IndexType; use parking_lot::RwLock; use super::AggregateInfo; use crate::normalize_identifier; use crate::optimizer::SExpr; use crate::plans::Scalar; +use crate::IndexType; use crate::NameResolutionContext; #[derive(Clone, Debug, Eq, PartialEq, serde::Deserialize, serde::Serialize)] diff --git a/src/query/sql/src/planner/binder/binder.rs b/src/query/sql/src/planner/binder/binder.rs index 0c585ed18839..6dd2561e3455 100644 --- a/src/query/sql/src/planner/binder/binder.rs +++ b/src/query/sql/src/planner/binder/binder.rs @@ -36,12 +36,12 @@ use common_planner::plans::DropUDFPlan; use common_planner::plans::DropUserPlan; use common_planner::plans::ShowGrantsPlan; use common_planner::plans::UseDatabasePlan; -use common_planner::MetadataRef; use crate::plans::Plan; use crate::plans::RewriteKind; use crate::BindContext; use crate::ColumnBinding; +use crate::MetadataRef; use crate::NameResolutionContext; use crate::Visibility; diff --git a/src/query/sql/src/planner/binder/copy.rs b/src/query/sql/src/planner/binder/copy.rs index 79d6b90e0f8c..e257da3db241 100644 --- a/src/query/sql/src/planner/binder/copy.rs +++ b/src/query/sql/src/planner/binder/copy.rs @@ -28,19 +28,18 @@ use common_catalog::table_context::TableContext; use common_exception::ErrorCode; use common_exception::Result; use common_io::prelude::parse_escape_string; - use common_meta_types::FileFormatOptions; use common_meta_types::StageFileFormatType; use common_meta_types::UserStageInfo; +use common_planner::stage_table::StageTableInfo; +use common_planner::ReadDataSourcePlan; +use common_planner::SourceInfo; use common_storage::parse_uri_location; use common_storage::UriLocation; use common_users::UserApiProvider; use tracing::debug; -use crate::executor::SourceInfo; -use crate::planner::plans::StageTableInfo; use crate::binder::Binder; -use crate::executor::ReadDataSourcePlan; use crate::normalize_identifier; use crate::plans::CopyPlanV2; use crate::plans::Plan; diff --git a/src/query/sql/src/planner/binder/delete.rs b/src/query/sql/src/planner/binder/delete.rs index 13b0abbb9681..c2f60e05277f 100644 --- a/src/query/sql/src/planner/binder/delete.rs +++ b/src/query/sql/src/planner/binder/delete.rs @@ -16,14 +16,14 @@ use common_ast::ast::Expr; use common_ast::ast::TableReference; use common_exception::ErrorCode; use common_exception::Result; +use common_planner::plans::DeletePlan; +use common_planner::plans::Projection; use crate::binder::Binder; use crate::binder::ScalarBinder; -use crate::plans::DeletePlan; use crate::plans::Plan; use crate::BindContext; use crate::ScalarExpr; -use crate::plans::Projection; impl<'a> Binder { pub(in crate::planner::binder) async fn bind_delete( diff --git a/src/query/sql/src/planner/binder/distinct.rs b/src/query/sql/src/planner/binder/distinct.rs index cc1fb990b76f..ad8085d09937 100644 --- a/src/query/sql/src/planner/binder/distinct.rs +++ b/src/query/sql/src/planner/binder/distinct.rs @@ -15,7 +15,6 @@ use std::collections::HashMap; use common_exception::Result; -use common_planner::IndexType; use crate::binder::Binder; use crate::binder::ColumnBinding; @@ -28,6 +27,7 @@ use crate::plans::EvalScalar; use crate::plans::Scalar; use crate::plans::ScalarItem; use crate::BindContext; +use crate::IndexType; impl Binder { pub(super) fn bind_distinct( diff --git a/src/query/sql/src/planner/binder/join.rs b/src/query/sql/src/planner/binder/join.rs index 3e6efa2b9f04..1a79520a9741 100644 --- a/src/query/sql/src/planner/binder/join.rs +++ b/src/query/sql/src/planner/binder/join.rs @@ -19,12 +19,11 @@ use common_ast::ast::Expr; use common_ast::ast::Join; use common_ast::ast::JoinCondition; use common_ast::ast::JoinOperator; +use common_catalog::table_context::TableContext; use common_datavalues::type_coercion::compare_coercion; use common_datavalues::wrap_nullable; use common_exception::ErrorCode; use common_exception::Result; -use common_planner::MetadataRef; -use common_catalog::table_context::TableContext; use crate::binder::scalar_common::split_conjunctions; use crate::binder::scalar_common::split_equivalent_predicate; @@ -45,6 +44,7 @@ use crate::plans::LogicalInnerJoin; use crate::plans::Scalar; use crate::plans::ScalarExpr; use crate::BindContext; +use crate::MetadataRef; pub struct JoinConditions { pub(crate) left_conditions: Vec, diff --git a/src/query/sql/src/planner/binder/project.rs b/src/query/sql/src/planner/binder/project.rs index 83e62326d268..15691f92bd03 100644 --- a/src/query/sql/src/planner/binder/project.rs +++ b/src/query/sql/src/planner/binder/project.rs @@ -18,7 +18,6 @@ use common_ast::ast::Indirection; use common_ast::ast::SelectTarget; use common_exception::ErrorCode; use common_exception::Result; -use common_planner::IndexType; use super::bind_context::NameResolutionResult; use crate::binder::select::SelectItem; @@ -38,6 +37,7 @@ use crate::plans::ScalarExpr; use crate::plans::ScalarItem; use crate::plans::SubqueryExpr; use crate::plans::SubqueryType; +use crate::IndexType; impl<'a> Binder { pub(super) fn analyze_projection( diff --git a/src/query/sql/src/planner/binder/scalar.rs b/src/query/sql/src/planner/binder/scalar.rs index 026d8c794157..39e9b6f481eb 100644 --- a/src/query/sql/src/planner/binder/scalar.rs +++ b/src/query/sql/src/planner/binder/scalar.rs @@ -15,15 +15,15 @@ use std::sync::Arc; use common_ast::ast::Expr; +use common_catalog::table_context::TableContext; use common_datavalues::DataTypeImpl; use common_exception::Result; -use common_planner::MetadataRef; -use common_catalog::table_context::TableContext; use crate::planner::binder::BindContext; use crate::planner::semantic::NameResolutionContext; use crate::planner::semantic::TypeChecker; use crate::plans::Scalar; +use crate::MetadataRef; /// Helper for binding scalar expression with `BindContext`. pub struct ScalarBinder<'a> { diff --git a/src/query/sql/src/planner/binder/select.rs b/src/query/sql/src/planner/binder/select.rs index 647cd8c6ecb5..8cf63f99ab04 100644 --- a/src/query/sql/src/planner/binder/select.rs +++ b/src/query/sql/src/planner/binder/select.rs @@ -30,7 +30,6 @@ use common_datavalues::type_coercion::compare_coercion; use common_datavalues::DataTypeImpl; use common_exception::ErrorCode; use common_exception::Result; -use common_planner::IndexType; use crate::binder::join::JoinConditions; use crate::binder::scalar_common::split_conjunctions; @@ -49,6 +48,7 @@ use crate::plans::Scalar; use crate::plans::ScalarItem; use crate::plans::UnionAll; use crate::ColumnBinding; +use crate::IndexType; // A normalized IR for `SELECT` clause. #[derive(Debug, Default)] diff --git a/src/query/sql/src/planner/binder/setting.rs b/src/query/sql/src/planner/binder/setting.rs index e2bc9f4ad68f..90532b16c677 100644 --- a/src/query/sql/src/planner/binder/setting.rs +++ b/src/query/sql/src/planner/binder/setting.rs @@ -15,13 +15,13 @@ use common_ast::ast::Identifier; use common_ast::ast::Literal; use common_exception::Result; +use common_planner::plans::SettingPlan; +use common_planner::plans::VarValue; use super::BindContext; use super::Binder; use crate::planner::semantic::TypeChecker; use crate::plans::Plan; -use crate::plans::SettingPlan; -use crate::plans::VarValue; impl<'a> Binder { pub(in crate::planner::binder) async fn bind_set_variable( diff --git a/src/query/sql/src/planner/binder/sort.rs b/src/query/sql/src/planner/binder/sort.rs index a57efe66f8b7..9a21b7427cf9 100644 --- a/src/query/sql/src/planner/binder/sort.rs +++ b/src/query/sql/src/planner/binder/sort.rs @@ -21,7 +21,6 @@ use common_ast::ast::OrderByExpr; use common_ast::DisplayError; use common_exception::ErrorCode; use common_exception::Result; -use common_planner::IndexType; use super::bind_context::NameResolutionResult; use crate::binder::scalar::ScalarBinder; @@ -44,6 +43,7 @@ use crate::plans::ScalarItem; use crate::plans::Sort; use crate::plans::SortItem; use crate::BindContext; +use crate::IndexType; use crate::ScalarExpr; pub struct OrderItems<'a> { diff --git a/src/query/sql/src/planner/binder/table.rs b/src/query/sql/src/planner/binder/table.rs index d5e580703574..3aa6cb1225f1 100644 --- a/src/query/sql/src/planner/binder/table.rs +++ b/src/query/sql/src/planner/binder/table.rs @@ -33,7 +33,6 @@ use common_catalog::table_function::TableFunction; use common_datavalues::prelude::*; use common_exception::ErrorCode; use common_exception::Result; -use common_planner::IndexType; use common_storages_preludes::view::view_table::QUERY; use crate::binder::scalar::ScalarBinder; @@ -48,6 +47,7 @@ use crate::plans::ConstantExpr; use crate::plans::LogicalGet; use crate::plans::Scalar; use crate::BindContext; +use crate::IndexType; impl<'a> Binder { pub(super) async fn bind_one_table( @@ -189,9 +189,7 @@ impl<'a> Binder { let expressions = args .into_iter() .map(|(scalar, _)| match scalar { - Scalar::ConstantExpr(ConstantExpr { value, data_type }) => { - Ok(value) - } + Scalar::ConstantExpr(ConstantExpr { value, .. }) => Ok(value), _ => Err(ErrorCode::UnImplement(format!( "Unsupported table argument type: {:?}", scalar diff --git a/src/query/sql/src/planner/format/display_rel_operator.rs b/src/query/sql/src/planner/format/display_rel_operator.rs index ddb04e680f29..15435261f153 100644 --- a/src/query/sql/src/planner/format/display_rel_operator.rs +++ b/src/query/sql/src/planner/format/display_rel_operator.rs @@ -17,7 +17,6 @@ use std::fmt::Display; use common_ast::ast::FormatTreeNode; use common_datavalues::format_data_type_sql; use common_functions::scalars::FunctionFactory; -use common_planner::MetadataRef; use itertools::Itertools; use crate::optimizer::SExpr; @@ -38,6 +37,7 @@ use crate::plans::PhysicalScan; use crate::plans::RelOperator; use crate::plans::Scalar; use crate::plans::Sort; +use crate::MetadataRef; use crate::ScalarExpr; #[derive(Clone)] diff --git a/src/query/sql/src/planner/metadata.rs b/src/query/sql/src/planner/metadata.rs index de1322d89d70..9b23fea09265 100644 --- a/src/query/sql/src/planner/metadata.rs +++ b/src/query/sql/src/planner/metadata.rs @@ -18,6 +18,8 @@ use std::fmt::Debug; use std::fmt::Formatter; use std::sync::Arc; +use common_ast::ast::Expr; +use common_ast::ast::Literal; use common_catalog::table::Table; use common_datavalues::DataField; use common_datavalues::DataType; @@ -25,8 +27,6 @@ use common_datavalues::DataTypeImpl; use common_datavalues::StructType; use common_datavalues::TypeID; use parking_lot::RwLock; -use common_ast::ast::Expr; -use common_ast::ast::Literal; /// Planner use [`usize`] as it's index type. /// @@ -318,8 +318,6 @@ impl ColumnEntry { } } - - pub fn optimize_remove_count_args(name: &str, distinct: bool, args: &[&Expr]) -> bool { name.eq_ignore_ascii_case("count") && !distinct diff --git a/src/query/sql/src/planner/mod.rs b/src/query/sql/src/planner/mod.rs index aae43846660e..e7096bb2a3b9 100644 --- a/src/query/sql/src/planner/mod.rs +++ b/src/query/sql/src/planner/mod.rs @@ -27,9 +27,9 @@ pub use binder::Binder; pub use binder::ColumnBinding; pub use binder::ScalarBinder; pub use binder::Visibility; +pub use metadata::*; pub use planner::Planner; pub use plans::ScalarExpr; pub use semantic::normalize_identifier; pub use semantic::IdentifierNormalizer; pub use semantic::NameResolutionContext; -pub use metadata::*; diff --git a/src/query/sql/src/planner/optimizer/cascades/cascade.rs b/src/query/sql/src/planner/optimizer/cascades/cascade.rs index a120136971eb..81016892841b 100644 --- a/src/query/sql/src/planner/optimizer/cascades/cascade.rs +++ b/src/query/sql/src/planner/optimizer/cascades/cascade.rs @@ -18,7 +18,6 @@ use std::sync::Arc; use common_catalog::table_context::TableContext; use common_exception::ErrorCode; use common_exception::Result; -use common_planner::IndexType; use crate::optimizer::cascades::explore_rules::get_explore_rule_set; use crate::optimizer::cascades::implement_rules::get_implement_rule_set; @@ -33,6 +32,7 @@ use crate::optimizer::memo::Memo; use crate::optimizer::rule::RuleSet; use crate::optimizer::rule::TransformResult; use crate::optimizer::SExpr; +use crate::IndexType; /// A cascades-style search engine to enumerate possible alternations of a relational expression and /// find the optimal one. diff --git a/src/query/sql/src/planner/optimizer/cascades/tasks/apply_rule.rs b/src/query/sql/src/planner/optimizer/cascades/tasks/apply_rule.rs index 24cbc7dc826c..dff070d4544d 100644 --- a/src/query/sql/src/planner/optimizer/cascades/tasks/apply_rule.rs +++ b/src/query/sql/src/planner/optimizer/cascades/tasks/apply_rule.rs @@ -15,13 +15,13 @@ use std::rc::Rc; use common_exception::Result; -use common_planner::IndexType; use crate::optimizer::cascades::tasks::SharedCounter; use crate::optimizer::cascades::CascadesOptimizer; use crate::optimizer::rule::TransformResult; use crate::optimizer::RuleFactory; use crate::optimizer::RuleID; +use crate::IndexType; #[derive(Debug)] pub struct ApplyRuleTask { diff --git a/src/query/sql/src/planner/optimizer/cascades/tasks/explore_expr.rs b/src/query/sql/src/planner/optimizer/cascades/tasks/explore_expr.rs index a802a3c8380e..905f0313fb48 100644 --- a/src/query/sql/src/planner/optimizer/cascades/tasks/explore_expr.rs +++ b/src/query/sql/src/planner/optimizer/cascades/tasks/explore_expr.rs @@ -15,7 +15,6 @@ use std::rc::Rc; use common_exception::Result; -use common_planner::IndexType; use super::apply_rule::ApplyRuleTask; use super::explore_group::ExploreGroupTask; @@ -23,6 +22,7 @@ use super::Task; use crate::optimizer::cascades::scheduler::Scheduler; use crate::optimizer::cascades::tasks::SharedCounter; use crate::optimizer::cascades::CascadesOptimizer; +use crate::IndexType; #[derive(Clone, Copy, Debug)] pub enum ExploreExprState { diff --git a/src/query/sql/src/planner/optimizer/cascades/tasks/explore_group.rs b/src/query/sql/src/planner/optimizer/cascades/tasks/explore_group.rs index 5d03190e83f6..2c526613ff82 100644 --- a/src/query/sql/src/planner/optimizer/cascades/tasks/explore_group.rs +++ b/src/query/sql/src/planner/optimizer/cascades/tasks/explore_group.rs @@ -15,7 +15,6 @@ use std::rc::Rc; use common_exception::Result; -use common_planner::IndexType; use super::explore_expr::ExploreExprTask; use super::Task; @@ -23,6 +22,7 @@ use crate::optimizer::cascades::scheduler::Scheduler; use crate::optimizer::cascades::tasks::SharedCounter; use crate::optimizer::cascades::CascadesOptimizer; use crate::optimizer::group::GroupState; +use crate::IndexType; #[derive(Clone, Copy, Debug)] pub enum ExploreGroupState { diff --git a/src/query/sql/src/planner/optimizer/cascades/tasks/implement_expr.rs b/src/query/sql/src/planner/optimizer/cascades/tasks/implement_expr.rs index 8d01bdfd8540..2f25be3ba81d 100644 --- a/src/query/sql/src/planner/optimizer/cascades/tasks/implement_expr.rs +++ b/src/query/sql/src/planner/optimizer/cascades/tasks/implement_expr.rs @@ -15,7 +15,6 @@ use std::rc::Rc; use common_exception::Result; -use common_planner::IndexType; use super::apply_rule::ApplyRuleTask; use super::Task; @@ -23,6 +22,7 @@ use crate::optimizer::cascades::scheduler::Scheduler; use crate::optimizer::cascades::tasks::ImplementGroupTask; use crate::optimizer::cascades::tasks::SharedCounter; use crate::optimizer::cascades::CascadesOptimizer; +use crate::IndexType; #[derive(Clone, Copy, Debug)] pub enum ImplementExprState { diff --git a/src/query/sql/src/planner/optimizer/cascades/tasks/implement_group.rs b/src/query/sql/src/planner/optimizer/cascades/tasks/implement_group.rs index e563256b7780..2150734991e1 100644 --- a/src/query/sql/src/planner/optimizer/cascades/tasks/implement_group.rs +++ b/src/query/sql/src/planner/optimizer/cascades/tasks/implement_group.rs @@ -16,7 +16,6 @@ use std::rc::Rc; use common_exception::ErrorCode; use common_exception::Result; -use common_planner::IndexType; use super::explore_group::ExploreGroupTask; use super::implement_expr::ImplementExprTask; @@ -25,6 +24,7 @@ use crate::optimizer::cascades::scheduler::Scheduler; use crate::optimizer::cascades::tasks::SharedCounter; use crate::optimizer::cascades::CascadesOptimizer; use crate::optimizer::group::GroupState; +use crate::IndexType; #[derive(Clone, Copy, Debug)] pub enum ImplementGroupState { diff --git a/src/query/sql/src/planner/optimizer/cascades/tasks/optimize_expr.rs b/src/query/sql/src/planner/optimizer/cascades/tasks/optimize_expr.rs index ae9da0f0c991..979571be2d12 100644 --- a/src/query/sql/src/planner/optimizer/cascades/tasks/optimize_expr.rs +++ b/src/query/sql/src/planner/optimizer/cascades/tasks/optimize_expr.rs @@ -17,7 +17,6 @@ use std::rc::Rc; use common_exception::ErrorCode; use common_exception::Result; -use common_planner::IndexType; use super::optimize_group::OptimizeGroupTask; use super::Task; @@ -26,6 +25,7 @@ use crate::optimizer::cascades::tasks::SharedCounter; use crate::optimizer::cascades::CascadesOptimizer; use crate::optimizer::cost::Cost; use crate::optimizer::cost::CostContext; +use crate::IndexType; #[derive(Clone, Copy, Debug)] pub enum OptimizeExprState { diff --git a/src/query/sql/src/planner/optimizer/cascades/tasks/optimize_group.rs b/src/query/sql/src/planner/optimizer/cascades/tasks/optimize_group.rs index d04434c8c224..04536a464941 100644 --- a/src/query/sql/src/planner/optimizer/cascades/tasks/optimize_group.rs +++ b/src/query/sql/src/planner/optimizer/cascades/tasks/optimize_group.rs @@ -15,7 +15,6 @@ use std::rc::Rc; use common_exception::Result; -use common_planner::IndexType; use super::implement_group::ImplementGroupTask; use super::optimize_expr::OptimizeExprTask; @@ -25,6 +24,7 @@ use crate::optimizer::cascades::tasks::SharedCounter; use crate::optimizer::cascades::CascadesOptimizer; use crate::optimizer::group::GroupState; use crate::plans::Operator; +use crate::IndexType; #[derive(Clone, Copy, Debug)] pub enum OptimizeGroupState { diff --git a/src/query/sql/src/planner/optimizer/cost/cost.rs b/src/query/sql/src/planner/optimizer/cost/cost.rs index bc3ea4d9e1e2..48ca52fada17 100644 --- a/src/query/sql/src/planner/optimizer/cost/cost.rs +++ b/src/query/sql/src/planner/optimizer/cost/cost.rs @@ -16,10 +16,10 @@ use std::fmt::Display; use std::ops::Add; use common_exception::Result; -use common_planner::IndexType; use crate::optimizer::MExpr; use crate::optimizer::Memo; +use crate::IndexType; #[derive(Debug, Clone, Copy, PartialEq, PartialOrd)] pub struct Cost(pub f64); diff --git a/src/query/sql/src/planner/optimizer/format.rs b/src/query/sql/src/planner/optimizer/format.rs index 257e6f63fce3..4e865ede4ebf 100644 --- a/src/query/sql/src/planner/optimizer/format.rs +++ b/src/query/sql/src/planner/optimizer/format.rs @@ -16,13 +16,13 @@ use std::collections::HashMap; use common_ast::ast::FormatTreeNode; use common_exception::Result; -use common_planner::IndexType; use super::cost::CostContext; use crate::optimizer::group::Group; use crate::optimizer::MExpr; use crate::optimizer::Memo; use crate::plans::RelOperator; +use crate::IndexType; pub fn display_memo(memo: &Memo, cost_map: &HashMap) -> Result { Ok(memo diff --git a/src/query/sql/src/planner/optimizer/group.rs b/src/query/sql/src/planner/optimizer/group.rs index 18b3f1b5d1fc..c8cbdd0a0ed8 100644 --- a/src/query/sql/src/planner/optimizer/group.rs +++ b/src/query/sql/src/planner/optimizer/group.rs @@ -14,10 +14,10 @@ use common_exception::ErrorCode; use common_exception::Result; -use common_planner::IndexType; use crate::optimizer::m_expr::MExpr; use crate::optimizer::property::RelationalProperty; +use crate::IndexType; /// State of a `Group` #[derive(Copy, Clone, Debug, PartialEq, Eq)] diff --git a/src/query/sql/src/planner/optimizer/heuristic/decorrelate.rs b/src/query/sql/src/planner/optimizer/heuristic/decorrelate.rs index c3d70fc77a04..535b54021d80 100644 --- a/src/query/sql/src/planner/optimizer/heuristic/decorrelate.rs +++ b/src/query/sql/src/planner/optimizer/heuristic/decorrelate.rs @@ -20,8 +20,6 @@ use common_datavalues::DataTypeImpl; use common_datavalues::NullableType; use common_exception::ErrorCode; use common_exception::Result; -use common_planner::IndexType; -use common_planner::MetadataRef; use crate::binder::wrap_cast; use crate::binder::JoinPredicate; @@ -56,6 +54,8 @@ use crate::plans::ScalarItem; use crate::plans::SubqueryExpr; use crate::plans::SubqueryType; use crate::ColumnBinding; +use crate::IndexType; +use crate::MetadataRef; use crate::ScalarExpr; /// Decorrelate subqueries inside `s_expr`. diff --git a/src/query/sql/src/planner/optimizer/heuristic/heuristic.rs b/src/query/sql/src/planner/optimizer/heuristic/heuristic.rs index 3e080d7caff4..995fa744dc9f 100644 --- a/src/query/sql/src/planner/optimizer/heuristic/heuristic.rs +++ b/src/query/sql/src/planner/optimizer/heuristic/heuristic.rs @@ -14,9 +14,8 @@ use std::sync::Arc; -use common_exception::Result; -use common_planner::MetadataRef; use common_catalog::table_context::TableContext; +use common_exception::Result; use once_cell::sync::Lazy; use super::prune_unused_columns::UnusedColumnPruner; @@ -29,6 +28,7 @@ use crate::optimizer::ColumnSet; use crate::optimizer::RuleID; use crate::optimizer::SExpr; use crate::BindContext; +use crate::MetadataRef; pub static DEFAULT_REWRITE_RULES: Lazy> = Lazy::new(|| { vec![ diff --git a/src/query/sql/src/planner/optimizer/heuristic/prewhere_optimization.rs b/src/query/sql/src/planner/optimizer/heuristic/prewhere_optimization.rs index 54957e03843e..7cd53a9ff2c0 100644 --- a/src/query/sql/src/planner/optimizer/heuristic/prewhere_optimization.rs +++ b/src/query/sql/src/planner/optimizer/heuristic/prewhere_optimization.rs @@ -13,7 +13,6 @@ // limitations under the License. use common_exception::Result; -use common_planner::MetadataRef; use crate::optimizer::ColumnSet; use crate::optimizer::SExpr; @@ -23,6 +22,7 @@ use crate::plans::PatternPlan; use crate::plans::Prewhere; use crate::plans::RelOp; use crate::plans::Scalar; +use crate::MetadataRef; pub struct PrewhereOptimizer { metadata: MetadataRef, diff --git a/src/query/sql/src/planner/optimizer/heuristic/prune_unused_columns.rs b/src/query/sql/src/planner/optimizer/heuristic/prune_unused_columns.rs index 8aaa294f1cb9..7e19d702ac10 100644 --- a/src/query/sql/src/planner/optimizer/heuristic/prune_unused_columns.rs +++ b/src/query/sql/src/planner/optimizer/heuristic/prune_unused_columns.rs @@ -14,7 +14,6 @@ use common_exception::ErrorCode; use common_exception::Result; -use common_planner::MetadataRef; use itertools::Itertools; use crate::optimizer::ColumnSet; @@ -24,6 +23,7 @@ use crate::plans::Aggregate; use crate::plans::EvalScalar; use crate::plans::LogicalGet; use crate::plans::RelOperator; +use crate::MetadataRef; use crate::ScalarExpr; pub struct UnusedColumnPruner { diff --git a/src/query/sql/src/planner/optimizer/heuristic/subquery_rewriter.rs b/src/query/sql/src/planner/optimizer/heuristic/subquery_rewriter.rs index 914b29b01252..f1d2e10b499e 100644 --- a/src/query/sql/src/planner/optimizer/heuristic/subquery_rewriter.rs +++ b/src/query/sql/src/planner/optimizer/heuristic/subquery_rewriter.rs @@ -22,8 +22,6 @@ use common_datavalues::UInt64Type; use common_exception::ErrorCode; use common_exception::Result; use common_functions::aggregates::AggregateFunctionFactory; -use common_planner::IndexType; -use common_planner::MetadataRef; use crate::binder::ColumnBinding; use crate::binder::Visibility; @@ -49,6 +47,8 @@ use crate::plans::Scalar; use crate::plans::ScalarItem; use crate::plans::SubqueryExpr; use crate::plans::SubqueryType; +use crate::IndexType; +use crate::MetadataRef; use crate::ScalarExpr; #[allow(clippy::enum_variant_names)] diff --git a/src/query/sql/src/planner/optimizer/m_expr.rs b/src/query/sql/src/planner/optimizer/m_expr.rs index 399892670aad..ab96291254ec 100644 --- a/src/query/sql/src/planner/optimizer/m_expr.rs +++ b/src/query/sql/src/planner/optimizer/m_expr.rs @@ -14,7 +14,6 @@ use common_exception::ErrorCode; use common_exception::Result; -use common_planner::IndexType; use super::group::Group; use crate::optimizer::memo::Memo; @@ -24,6 +23,7 @@ use crate::optimizer::rule::TransformResult; use crate::optimizer::SExpr; use crate::plans::Operator; use crate::plans::RelOperator; +use crate::IndexType; /// `MExpr` is abbreviation of multiple expression, which is the representation of relational /// expressions inside `Memo`. diff --git a/src/query/sql/src/planner/optimizer/memo.rs b/src/query/sql/src/planner/optimizer/memo.rs index f78ff242518f..0a60431582ef 100644 --- a/src/query/sql/src/planner/optimizer/memo.rs +++ b/src/query/sql/src/planner/optimizer/memo.rs @@ -16,7 +16,6 @@ use std::collections::HashMap; use common_exception::ErrorCode; use common_exception::Result; -use common_planner::IndexType; use super::group::GroupState; use super::RelExpr; @@ -25,6 +24,7 @@ use crate::optimizer::group::Group; use crate::optimizer::m_expr::MExpr; use crate::optimizer::s_expr::SExpr; use crate::plans::RelOperator; +use crate::IndexType; /// `Memo` is a search space which memoize possible plans of a query. /// The plans inside `Memo` are organized with `Group`s. diff --git a/src/query/sql/src/planner/optimizer/optimizer.rs b/src/query/sql/src/planner/optimizer/optimizer.rs index 40c81fadec3d..e010835b1cdb 100644 --- a/src/query/sql/src/planner/optimizer/optimizer.rs +++ b/src/query/sql/src/planner/optimizer/optimizer.rs @@ -19,8 +19,6 @@ use common_ast::ast::ExplainKind; use common_catalog::table_context::TableContext; use common_exception::ErrorCode; use common_exception::Result; -use common_planner::IndexType; -use common_planner::MetadataRef; use super::cost::CostContext; use super::format::display_memo; @@ -36,6 +34,8 @@ use crate::optimizer::DEFAULT_REWRITE_RULES; use crate::plans::CopyPlanV2; use crate::plans::Plan; use crate::BindContext; +use crate::IndexType; +use crate::MetadataRef; #[derive(Debug, Clone, Default)] pub struct OptimizerConfig { diff --git a/src/query/sql/src/planner/optimizer/property/column_stat.rs b/src/query/sql/src/planner/optimizer/property/column_stat.rs index 74576a75117c..d889b76cd184 100644 --- a/src/query/sql/src/planner/optimizer/property/column_stat.rs +++ b/src/query/sql/src/planner/optimizer/property/column_stat.rs @@ -14,7 +14,7 @@ use std::collections::HashMap; -use common_planner::IndexType; +use crate::IndexType; pub type ColumnStatSet = HashMap; diff --git a/src/query/sql/src/planner/optimizer/property/property.rs b/src/query/sql/src/planner/optimizer/property/property.rs index ca79e639e642..70e613f2f53d 100644 --- a/src/query/sql/src/planner/optimizer/property/property.rs +++ b/src/query/sql/src/planner/optimizer/property/property.rs @@ -14,10 +14,9 @@ use std::collections::HashSet; -use common_planner::IndexType; - use super::column_stat::ColumnStatSet; use crate::plans::Scalar; +use crate::IndexType; pub type ColumnSet = HashSet; diff --git a/src/query/sql/src/planner/optimizer/rule/rewrite/rule_push_down_filter_join.rs b/src/query/sql/src/planner/optimizer/rule/rewrite/rule_push_down_filter_join.rs index 4eaf5c7de078..3be40977cc34 100644 --- a/src/query/sql/src/planner/optimizer/rule/rewrite/rule_push_down_filter_join.rs +++ b/src/query/sql/src/planner/optimizer/rule/rewrite/rule_push_down_filter_join.rs @@ -14,8 +14,6 @@ use common_datavalues::type_coercion::compare_coercion; use common_exception::Result; -use common_planner::ColumnSet; -use common_planner::IndexType; use crate::binder::wrap_cast; use crate::binder::JoinPredicate; @@ -31,6 +29,8 @@ use crate::plans::PatternPlan; use crate::plans::RelOp; use crate::plans::Scalar; use crate::plans::ScalarExpr; +use crate::ColumnSet; +use crate::IndexType; pub struct RulePushDownFilterJoin { id: RuleID, diff --git a/src/query/sql/src/planner/optimizer/s_expr.rs b/src/query/sql/src/planner/optimizer/s_expr.rs index e1373acd7eb2..ffb9d8dcd030 100644 --- a/src/query/sql/src/planner/optimizer/s_expr.rs +++ b/src/query/sql/src/planner/optimizer/s_expr.rs @@ -14,7 +14,6 @@ use common_exception::ErrorCode; use common_exception::Result; -use common_planner::IndexType; use super::RelationalProperty; use crate::optimizer::rule::AppliedRules; @@ -23,6 +22,7 @@ use crate::plans::Operator; use crate::plans::PatternPlan; use crate::plans::RelOp; use crate::plans::RelOperator; +use crate::IndexType; /// `SExpr` is abbreviation of single expression, which is a tree of relational operators. #[derive(Clone, Debug)] diff --git a/src/query/sql/src/planner/optimizer/util.rs b/src/query/sql/src/planner/optimizer/util.rs index 5b402082316f..13a107eeab9a 100644 --- a/src/query/sql/src/planner/optimizer/util.rs +++ b/src/query/sql/src/planner/optimizer/util.rs @@ -12,11 +12,10 @@ // See the License for the specific language governing permissions and // limitations under the License. -use common_planner::MetadataRef; - use super::SExpr; use crate::plans::JoinType; use crate::plans::RelOperator; +use crate::MetadataRef; /// Check if a query will read data from local tables(e.g. system tables). pub fn contains_local_table_scan(s_expr: &SExpr, metadata: &MetadataRef) -> bool { diff --git a/src/query/sql/src/planner/planner.rs b/src/query/sql/src/planner/planner.rs index f3a7b232d996..1c9080ffc260 100644 --- a/src/query/sql/src/planner/planner.rs +++ b/src/query/sql/src/planner/planner.rs @@ -20,10 +20,8 @@ use common_ast::parser::token::TokenKind; use common_ast::parser::token::Tokenizer; use common_ast::Backtrace; use common_catalog::catalog::CatalogManager; -use common_exception::Result; -use common_planner::Metadata; -use common_planner::MetadataRef; use common_catalog::table_context::TableContext; +use common_exception::Result; use parking_lot::RwLock; use crate::optimizer::optimize; @@ -31,6 +29,8 @@ use crate::optimizer::OptimizerConfig; use crate::optimizer::OptimizerContext; use crate::plans::Plan; use crate::Binder; +use crate::Metadata; +use crate::MetadataRef; use crate::NameResolutionContext; const PROBE_INSERT_INITIAL_TOKENS: usize = 128; diff --git a/src/query/sql/src/planner/plans/copy_v2.rs b/src/query/sql/src/planner/plans/copy_v2.rs index 8ef4e12fa45b..c52e9a3a9dd6 100644 --- a/src/query/sql/src/planner/plans/copy_v2.rs +++ b/src/query/sql/src/planner/plans/copy_v2.rs @@ -19,8 +19,8 @@ use std::str::FromStr; use common_datavalues::DataSchemaRef; use common_meta_types::MetaId; use common_meta_types::UserStageInfo; +use common_planner::ReadDataSourcePlan; -use crate::executor::ReadDataSourcePlan; use crate::plans::Plan; #[derive(PartialEq, Eq, Clone, Debug)] diff --git a/src/query/sql/src/planner/plans/eval_scalar.rs b/src/query/sql/src/planner/plans/eval_scalar.rs index a37062a88585..be5a954d02ec 100644 --- a/src/query/sql/src/planner/plans/eval_scalar.rs +++ b/src/query/sql/src/planner/plans/eval_scalar.rs @@ -13,7 +13,6 @@ // limitations under the License.#[derive(Clone, Debug)] use common_exception::Result; -use common_planner::IndexType; use crate::optimizer::ColumnSet; use crate::optimizer::PhysicalProperty; @@ -26,6 +25,7 @@ use crate::plans::PhysicalOperator; use crate::plans::RelOp; use crate::plans::Scalar; use crate::plans::ScalarExpr; +use crate::IndexType; /// Evaluate scalar expression #[derive(Clone, Debug, PartialEq, Eq, Hash)] diff --git a/src/query/sql/src/planner/plans/hash_join.rs b/src/query/sql/src/planner/plans/hash_join.rs index 3105a8811e93..5df8e6404880 100644 --- a/src/query/sql/src/planner/plans/hash_join.rs +++ b/src/query/sql/src/planner/plans/hash_join.rs @@ -13,7 +13,6 @@ // limitations under the License. use common_exception::Result; -use common_planner::IndexType; use super::JoinType; use crate::optimizer::Distribution; @@ -25,6 +24,7 @@ use crate::plans::Operator; use crate::plans::PhysicalOperator; use crate::plans::RelOp; use crate::plans::Scalar; +use crate::IndexType; #[derive(Clone, Debug, PartialEq, Eq, Hash)] pub struct PhysicalHashJoin { diff --git a/src/query/sql/src/planner/plans/logical_get.rs b/src/query/sql/src/planner/plans/logical_get.rs index 49c315ba7121..7692b74acdbe 100644 --- a/src/query/sql/src/planner/plans/logical_get.rs +++ b/src/query/sql/src/planner/plans/logical_get.rs @@ -14,7 +14,6 @@ use common_catalog::table::TableStatistics; use common_exception::Result; -use common_planner::IndexType; use itertools::Itertools; use crate::optimizer::ColumnSet; @@ -26,6 +25,7 @@ use crate::plans::PhysicalOperator; use crate::plans::RelOp; use crate::plans::Scalar; use crate::plans::SortItem; +use crate::IndexType; use crate::ScalarExpr; #[derive(Clone, Debug, PartialEq, Eq)] diff --git a/src/query/sql/src/planner/plans/logical_join.rs b/src/query/sql/src/planner/plans/logical_join.rs index f40b730a9419..bec9c87846d6 100644 --- a/src/query/sql/src/planner/plans/logical_join.rs +++ b/src/query/sql/src/planner/plans/logical_join.rs @@ -16,7 +16,6 @@ use std::fmt::Display; use std::fmt::Formatter; use common_exception::Result; -use common_planner::IndexType; use super::ScalarExpr; use crate::optimizer::ColumnSet; @@ -27,6 +26,7 @@ use crate::plans::Operator; use crate::plans::PhysicalOperator; use crate::plans::RelOp; use crate::plans::Scalar; +use crate::IndexType; #[derive(Clone, Debug, Eq, PartialEq, Hash, serde::Serialize, serde::Deserialize)] pub enum JoinType { diff --git a/src/query/sql/src/planner/plans/mod.rs b/src/query/sql/src/planner/plans/mod.rs index c412603b3f9b..43f3d03759c3 100644 --- a/src/query/sql/src/planner/plans/mod.rs +++ b/src/query/sql/src/planner/plans/mod.rs @@ -36,15 +36,6 @@ mod sort; mod union_all; mod update; -mod plan_delete; -mod plan_node_stage_table; -mod plan_node_stage; -mod plan_node_statistics; -mod plan_partition; -mod plan_setting; -mod plan_sink; -mod projection; - pub use aggregate::*; pub use copy_v2::*; pub use dummy_table_scan::DummyTableScan; @@ -70,13 +61,3 @@ pub use share::*; pub use sort::*; pub use union_all::UnionAll; pub use update::UpdatePlan; - - -pub use plan_delete::*; -pub use plan_node_stage_table::*; -pub use plan_node_stage::*; -pub use plan_node_statistics::*; -pub use plan_partition::*; -pub use plan_setting::*; -pub use plan_sink::*; -pub use projection::*; diff --git a/src/query/sql/src/planner/plans/physical_scan.rs b/src/query/sql/src/planner/plans/physical_scan.rs index 109c882a8091..16587f4b9bdb 100644 --- a/src/query/sql/src/planner/plans/physical_scan.rs +++ b/src/query/sql/src/planner/plans/physical_scan.rs @@ -15,7 +15,6 @@ use std::hash::Hash; use common_exception::Result; -use common_planner::IndexType; use itertools::Itertools; use super::logical_get::Prewhere; @@ -30,6 +29,7 @@ use crate::plans::PhysicalOperator; use crate::plans::RelOp; use crate::plans::Scalar; use crate::plans::SortItem; +use crate::IndexType; #[derive(Clone, Debug, PartialEq, Eq)] pub struct PhysicalScan { diff --git a/src/query/sql/src/planner/plans/plan.rs b/src/query/sql/src/planner/plans/plan.rs index 2ab0e2b4c262..dda734e1cc49 100644 --- a/src/query/sql/src/planner/plans/plan.rs +++ b/src/query/sql/src/planner/plans/plan.rs @@ -32,6 +32,7 @@ use common_planner::plans::CreateStagePlan; use common_planner::plans::CreateUDFPlan; use common_planner::plans::CreateUserPlan; use common_planner::plans::CreateViewPlan; +use common_planner::plans::DeletePlan; use common_planner::plans::DescribeTablePlan; use common_planner::plans::DropDatabasePlan; use common_planner::plans::DropRolePlan; @@ -53,6 +54,7 @@ use common_planner::plans::RenameTablePlan; use common_planner::plans::RevokePrivilegePlan; use common_planner::plans::RevokeRolePlan; use common_planner::plans::SetRolePlan; +use common_planner::plans::SettingPlan; use common_planner::plans::ShowCreateDatabasePlan; use common_planner::plans::ShowCreateTablePlan; use common_planner::plans::ShowGrantsPlan; @@ -60,7 +62,6 @@ use common_planner::plans::TruncateTablePlan; use common_planner::plans::UndropDatabasePlan; use common_planner::plans::UndropTablePlan; use common_planner::plans::UseDatabasePlan; -use common_planner::MetadataRef; use crate::optimizer::SExpr; use crate::plans::copy_v2::CopyPlanV2; @@ -79,9 +80,7 @@ use crate::plans::share::ShowObjectGrantPrivilegesPlan; use crate::plans::share::ShowSharesPlan; use crate::plans::UpdatePlan; use crate::BindContext; - -use super::DeletePlan; -use super::SettingPlan; +use crate::MetadataRef; #[derive(Clone, Debug)] pub enum Plan { diff --git a/src/query/sql/src/planner/plans/recluster_table.rs b/src/query/sql/src/planner/plans/recluster_table.rs index 846f4fe369c8..9d4d3565fbff 100644 --- a/src/query/sql/src/planner/plans/recluster_table.rs +++ b/src/query/sql/src/planner/plans/recluster_table.rs @@ -15,9 +15,9 @@ use std::sync::Arc; use common_datavalues::DataSchema; use common_datavalues::DataSchemaRef; -use common_planner::MetadataRef; use crate::plans::Scalar; +use crate::MetadataRef; #[derive(Clone, Debug)] pub struct ReclusterTablePlan { diff --git a/src/query/sql/src/planner/plans/scalar.rs b/src/query/sql/src/planner/plans/scalar.rs index 60a7197f9539..6c0bb5e02c65 100644 --- a/src/query/sql/src/planner/plans/scalar.rs +++ b/src/query/sql/src/planner/plans/scalar.rs @@ -22,11 +22,11 @@ use common_datavalues::NullableType; use common_exception::ErrorCode; use common_exception::Result; use common_functions::scalars::FunctionFactory; -use common_planner::IndexType; use crate::binder::ColumnBinding; use crate::optimizer::ColumnSet; use crate::optimizer::SExpr; +use crate::IndexType; pub trait ScalarExpr { /// Get return type and nullability diff --git a/src/query/sql/src/planner/plans/sort.rs b/src/query/sql/src/planner/plans/sort.rs index 642d3d6d69e6..7cf064a7e228 100644 --- a/src/query/sql/src/planner/plans/sort.rs +++ b/src/query/sql/src/planner/plans/sort.rs @@ -13,7 +13,6 @@ // limitations under the License. use common_exception::Result; -use common_planner::IndexType; use crate::optimizer::ColumnSet; use crate::optimizer::Distribution; @@ -25,6 +24,7 @@ use crate::plans::LogicalOperator; use crate::plans::Operator; use crate::plans::PhysicalOperator; use crate::plans::RelOp; +use crate::IndexType; #[derive(Clone, Debug, PartialEq, Eq, Hash)] pub struct Sort { diff --git a/src/query/sql/src/planner/plans/union_all.rs b/src/query/sql/src/planner/plans/union_all.rs index 1d81ebe893be..cf612a68c1ce 100644 --- a/src/query/sql/src/planner/plans/union_all.rs +++ b/src/query/sql/src/planner/plans/union_all.rs @@ -13,7 +13,6 @@ // limitations under the License. use common_exception::Result; -use common_planner::IndexType; use crate::optimizer::ColumnSet; use crate::optimizer::Distribution; @@ -25,6 +24,7 @@ use crate::plans::LogicalOperator; use crate::plans::Operator; use crate::plans::PhysicalOperator; use crate::plans::RelOp; +use crate::IndexType; #[derive(Clone, Debug, PartialEq, Eq, Hash)] pub struct UnionAll { diff --git a/src/query/sql/src/planner/semantic/type_check.rs b/src/query/sql/src/planner/semantic/type_check.rs index 87117b89323c..c10dca540291 100644 --- a/src/query/sql/src/planner/semantic/type_check.rs +++ b/src/query/sql/src/planner/semantic/type_check.rs @@ -32,6 +32,7 @@ use common_ast::parser::tokenize_sql; use common_ast::Backtrace; use common_ast::DisplayError; use common_catalog::catalog::CatalogManager; +use common_catalog::table_context::TableContext; use common_datavalues::type_coercion::merge_types; use common_datavalues::ArrayType; use common_datavalues::DataField; @@ -53,8 +54,6 @@ use common_functions::is_builtin_function; use common_functions::scalars::CastFunction; use common_functions::scalars::FunctionFactory; use common_functions::scalars::TupleFunction; -use common_planner::MetadataRef; -use common_catalog::table_context::TableContext; use common_users::UserApiProvider; use super::name_resolution::NameResolutionContext; @@ -78,6 +77,7 @@ use crate::plans::Scalar; use crate::plans::SubqueryExpr; use crate::plans::SubqueryType; use crate::BindContext; +use crate::MetadataRef; use crate::ScalarExpr; /// A helper for type checking. @@ -2174,7 +2174,6 @@ impl<'a> TypeChecker<'a> { } } - pub fn validate_function_arg( name: &str, args_len: usize, diff --git a/src/query/storages/factory/Cargo.toml b/src/query/storages/factory/Cargo.toml index b4cfc51897e1..6784d606dc72 100644 --- a/src/query/storages/factory/Cargo.toml +++ b/src/query/storages/factory/Cargo.toml @@ -20,12 +20,12 @@ common-datavalues = { path = "../../datavalues" } common-exception = { path = "../../../common/exception" } common-formats = { path = "../../formats" } common-fuse-meta = { path = "../fuse-meta" } -common-legacy-planners = { path = "../../legacy-planners" } common-meta-app = { path = "../../../meta/app" } common-meta-types = { path = "../../../meta/types" } common-pipeline-core = { path = "../../pipeline/core" } common-pipeline-sources = { path = "../../pipeline/sources" } common-pipeline-transforms = { path = "../../pipeline/transforms" } +common-planner = { path = "../../planner" } common-storage = { path = "../../../common/storage" } common-storages-fuse = { path = "../fuse" } common-storages-index = { path = "../index" } diff --git a/src/query/storages/factory/src/result/block_buffer.rs b/src/query/storages/factory/src/result/block_buffer.rs index 2d49e67bef78..c9df591c0342 100644 --- a/src/query/storages/factory/src/result/block_buffer.rs +++ b/src/query/storages/factory/src/result/block_buffer.rs @@ -20,8 +20,9 @@ use common_base::base::tokio::sync::Notify; use common_catalog::table_context::TableContext; use common_datablocks::DataBlock; use common_exception::Result; -use common_legacy_planners::PartInfoPtr; -use common_legacy_planners::Projection; +use common_planner::PartInfoPtr; +use common_planner::plans::Projection; + use crate::fuse::io::BlockReader; use crate::result::ResultQueryInfo; diff --git a/src/query/storages/factory/src/result/result_table.rs b/src/query/storages/factory/src/result/result_table.rs index 38cce589c68c..2938be8cec82 100644 --- a/src/query/storages/factory/src/result/result_table.rs +++ b/src/query/storages/factory/src/result/result_table.rs @@ -20,11 +20,11 @@ use common_datavalues::DataSchemaRef; use common_exception::ErrorCode; use common_exception::Result; use common_fuse_meta::meta::SegmentInfo; -use common_legacy_planners::Extras; -use common_legacy_planners::Partitions; -use common_legacy_planners::Projection; -use common_legacy_planners::ReadDataSourcePlan; -use common_legacy_planners::Statistics; +use common_planner::extras::Extras; +use common_planner::Partitions; +use common_planner::plans::Projection; +use common_planner::ReadDataSourcePlan; +use common_planner::extras::Statistics; use common_meta_app::schema::TableInfo; use common_meta_app::schema::TableMeta; use common_meta_types::UserIdentity; diff --git a/src/query/storages/factory/src/result/result_table_sink.rs b/src/query/storages/factory/src/result/result_table_sink.rs index 92d4385141ed..36ef0cc4d5e3 100644 --- a/src/query/storages/factory/src/result/result_table_sink.rs +++ b/src/query/storages/factory/src/result/result_table_sink.rs @@ -23,8 +23,8 @@ use common_exception::ErrorCode; use common_exception::Result; use common_fuse_meta::meta::SegmentInfo; use common_fuse_meta::meta::Statistics as FuseMetaStatistics; -use common_legacy_planners::PartInfoPtr; -use common_legacy_planners::Projection; +use common_planner::PartInfoPtr; +use common_planner::plans::Projection; use common_pipeline_core::processors::port::InputPort; use common_pipeline_core::processors::processor::Event; use common_pipeline_core::processors::processor::ProcessorPtr; diff --git a/src/query/storages/factory/src/result/result_table_source.rs b/src/query/storages/factory/src/result/result_table_source.rs index 75a3df104b57..9f8836880cd3 100644 --- a/src/query/storages/factory/src/result/result_table_source.rs +++ b/src/query/storages/factory/src/result/result_table_source.rs @@ -21,7 +21,7 @@ use common_catalog::table_context::TableContext; use common_datablocks::DataBlock; use common_exception::ErrorCode; use common_exception::Result; -use common_legacy_planners::PartInfoPtr; +use common_planner::PartInfoPtr; use common_pipeline_core::processors::port::OutputPort; use common_pipeline_core::processors::processor::Event; use common_pipeline_core::processors::processor::ProcessorPtr; diff --git a/src/query/storages/factory/src/result/writer.rs b/src/query/storages/factory/src/result/writer.rs index bc1a2a47383b..71a6cdb34779 100644 --- a/src/query/storages/factory/src/result/writer.rs +++ b/src/query/storages/factory/src/result/writer.rs @@ -23,7 +23,7 @@ use common_datablocks::DataBlock; use common_exception::Result; use common_fuse_meta::meta::SegmentInfo; use common_fuse_meta::meta::Statistics as FuseMetaStatistics; -use common_legacy_planners::PartInfoPtr; +use common_planner::PartInfoPtr; use common_streams::SendableDataBlockStream; use futures::StreamExt; use opendal::Operator; diff --git a/src/query/storages/factory/src/stage/stage_table.rs b/src/query/storages/factory/src/stage/stage_table.rs index 1d67e27f3ef1..b51e932553ef 100644 --- a/src/query/storages/factory/src/stage/stage_table.rs +++ b/src/query/storages/factory/src/stage/stage_table.rs @@ -20,11 +20,11 @@ use common_catalog::table_context::TableContext; use common_datablocks::DataBlock; use common_exception::ErrorCode; use common_exception::Result; -use common_legacy_planners::Extras; -use common_legacy_planners::Partitions; -use common_legacy_planners::ReadDataSourcePlan; -use common_legacy_planners::StageTableInfo; -use common_legacy_planners::Statistics; +use common_planner::extras::Extras; +use common_planner::Partitions; +use common_planner::ReadDataSourcePlan; +use common_planner::StageTableInfo; +use common_planner::extras::Statistics; use common_meta_app::schema::TableInfo; use common_meta_types::StageType; use common_meta_types::UserStageInfo; diff --git a/src/query/storages/factory/src/stage/stage_table_sink.rs b/src/query/storages/factory/src/stage/stage_table_sink.rs index e816d7514751..cd946414008e 100644 --- a/src/query/storages/factory/src/stage/stage_table_sink.rs +++ b/src/query/storages/factory/src/stage/stage_table_sink.rs @@ -26,7 +26,7 @@ use common_exception::ErrorCode; use common_exception::Result; use common_formats::output_format::OutputFormat; use common_formats::output_format::OutputFormatType; -use common_legacy_planners::StageTableInfo; +use common_planner::StageTableInfo; use common_pipeline_core::processors::port::InputPort; use common_pipeline_core::processors::port::OutputPort; use common_pipeline_core::processors::processor::Event; diff --git a/src/query/storages/fuse/Cargo.toml b/src/query/storages/fuse/Cargo.toml index c4edd56f1e3c..36ca18d0558f 100644 --- a/src/query/storages/fuse/Cargo.toml +++ b/src/query/storages/fuse/Cargo.toml @@ -21,14 +21,12 @@ common-datavalues = { path = "../../datavalues" } common-exception = { path = "../../../common/exception" } common-functions = { path = "../../functions" } common-fuse-meta = { path = "../fuse-meta" } -common-legacy-expression = { path = "../../legacy-expression" } -common-legacy-parser = { path = "../../legacy-parser" } -common-legacy-planners = { path = "../../legacy-planners" } common-meta-app = { path = "../../../meta/app" } common-meta-types = { path = "../../../meta/types" } common-pipeline-core = { path = "../../pipeline/core" } common-pipeline-sources = { path = "../../pipeline/sources" } common-pipeline-transforms = { path = "../../pipeline/transforms" } +common-planner = { path = "../../planner" } common-sharing = { path = "../../sharing" } common-storage = { path = "../../../common/storage" } common-storages-cache = { path = "../cache" } diff --git a/src/query/storages/fuse/src/fuse_lazy_part.rs b/src/query/storages/fuse/src/fuse_lazy_part.rs index c6f50015fd97..6c2c4beac405 100644 --- a/src/query/storages/fuse/src/fuse_lazy_part.rs +++ b/src/query/storages/fuse/src/fuse_lazy_part.rs @@ -16,8 +16,8 @@ use std::any::Any; use std::sync::Arc; use common_fuse_meta::meta::Location; -use common_legacy_planners::PartInfo; -use common_legacy_planners::PartInfoPtr; +use common_planner::PartInfo; +use common_planner::PartInfoPtr; #[derive(serde::Serialize, serde::Deserialize, PartialEq, Eq)] pub struct FuseLazyPartInfo { diff --git a/src/query/storages/fuse/src/fuse_part.rs b/src/query/storages/fuse/src/fuse_part.rs index b8e08553e70f..5d5cf7f0a643 100644 --- a/src/query/storages/fuse/src/fuse_part.rs +++ b/src/query/storages/fuse/src/fuse_part.rs @@ -22,9 +22,9 @@ use common_arrow::arrow::datatypes::Schema as ArrowSchema; use common_exception::ErrorCode; use common_exception::Result; use common_fuse_meta::meta::Compression; -use common_legacy_planners::PartInfo; -use common_legacy_planners::PartInfoPtr; -use common_legacy_planners::Projection; +use common_planner::PartInfo; +use common_planner::PartInfoPtr; +use common_planner::plans::Projection; #[derive(serde::Serialize, serde::Deserialize, PartialEq, Eq)] pub struct ColumnMeta { diff --git a/src/query/storages/fuse/src/fuse_table.rs b/src/query/storages/fuse/src/fuse_table.rs index 8b9499d26cae..8c7ca2eabdb7 100644 --- a/src/query/storages/fuse/src/fuse_table.rs +++ b/src/query/storages/fuse/src/fuse_table.rs @@ -35,11 +35,11 @@ use common_fuse_meta::meta::TableSnapshot; use common_fuse_meta::meta::Versioned; use common_legacy_expression::LegacyExpression; use common_legacy_parser::ExpressionParser; -use common_legacy_planners::DeletePlan; -use common_legacy_planners::Extras; -use common_legacy_planners::Partitions; -use common_legacy_planners::ReadDataSourcePlan; -use common_legacy_planners::Statistics; +use common_planner::DeletePlan; +use common_planner::extras::Extras; +use common_planner::Partitions; +use common_planner::ReadDataSourcePlan; +use common_planner::extras::Statistics; use common_meta_app::schema::TableInfo; use common_sharing::create_share_table_operator; use common_storage::init_operator; diff --git a/src/query/storages/fuse/src/io/read/block_reader.rs b/src/query/storages/fuse/src/io/read/block_reader.rs index f80caaeb57a7..a2ea04a3baf5 100644 --- a/src/query/storages/fuse/src/io/read/block_reader.rs +++ b/src/query/storages/fuse/src/io/read/block_reader.rs @@ -34,8 +34,8 @@ use common_exception::ErrorCode; use common_exception::Result; use common_fuse_meta::meta::BlockMeta; use common_fuse_meta::meta::Compression; -use common_legacy_planners::PartInfoPtr; -use common_legacy_planners::Projection; +use common_planner::PartInfoPtr; +use common_planner::plans::Projection; use futures::AsyncReadExt; use futures::StreamExt; use futures::TryStreamExt; diff --git a/src/query/storages/fuse/src/operations/compact.rs b/src/query/storages/fuse/src/operations/compact.rs index 744e027d3d31..c5da23e979b8 100644 --- a/src/query/storages/fuse/src/operations/compact.rs +++ b/src/query/storages/fuse/src/operations/compact.rs @@ -17,8 +17,8 @@ use std::sync::Arc; use common_catalog::table::CompactTarget; use common_exception::Result; use common_fuse_meta::meta::TableSnapshot; -use common_legacy_planners::ReadDataSourcePlan; -use common_legacy_planners::SourceInfo; +use common_planner::ReadDataSourcePlan; +use common_planner::SourceInfo; use common_pipeline_core::Pipeline; use common_pipeline_transforms::processors::transforms::TransformCompact; diff --git a/src/query/storages/fuse/src/operations/delete.rs b/src/query/storages/fuse/src/operations/delete.rs index 2efa316a114b..b8a776393f8b 100644 --- a/src/query/storages/fuse/src/operations/delete.rs +++ b/src/query/storages/fuse/src/operations/delete.rs @@ -21,8 +21,8 @@ use common_exception::Result; use common_fuse_meta::meta::TableSnapshot; use common_legacy_expression::LegacyExpression; use common_legacy_parser::ExpressionParser; -use common_legacy_planners::DeletePlan; -use common_legacy_planners::Extras; +use common_planner::DeletePlan; +use common_planner::extras::Extras; use common_pipeline_transforms::processors::ExpressionExecutor; use tracing::debug; diff --git a/src/query/storages/fuse/src/operations/mutation/block_filter.rs b/src/query/storages/fuse/src/operations/mutation/block_filter.rs index 94036130e937..af5970e445fd 100644 --- a/src/query/storages/fuse/src/operations/mutation/block_filter.rs +++ b/src/query/storages/fuse/src/operations/mutation/block_filter.rs @@ -23,7 +23,7 @@ use common_datavalues::Series; use common_exception::Result; use common_fuse_meta::meta::BlockMeta; use common_legacy_expression::LegacyExpression; -use common_legacy_planners::Projection; +use common_planner::plans::Projection; use crate::operations::mutation::deletion_mutator::Deletion; use crate::pipelines::processors::transforms::ExpressionExecutor; diff --git a/src/query/storages/fuse/src/operations/read_data.rs b/src/query/storages/fuse/src/operations/read_data.rs index 51eaaf6e100c..5e7910646669 100644 --- a/src/query/storages/fuse/src/operations/read_data.rs +++ b/src/query/storages/fuse/src/operations/read_data.rs @@ -19,10 +19,15 @@ use common_catalog::table_context::TableContext; use common_datavalues::DataSchemaRefExt; use common_exception::ErrorCode; use common_exception::Result; -use common_legacy_planners::Extras; -use common_legacy_planners::PrewhereInfo; -use common_legacy_planners::Projection; -use common_legacy_planners::ReadDataSourcePlan; +use common_planner::extras::Extras; +use common_planner::PartInfoPtr; +use common_planner::PrewhereInfo; +use common_planner::plans::Projection; +use common_planner::ReadDataSourcePlan; +use common_pipeline_core::processors::port::OutputPort; +use common_pipeline_core::processors::processor::Event; +use common_pipeline_core::processors::processor::ProcessorPtr; +use common_pipeline_core::processors::Processor; use common_pipeline_core::Pipeline; use common_pipeline_transforms::processors::ExpressionExecutor; use tracing::info; diff --git a/src/query/storages/fuse/src/operations/read_partitions.rs b/src/query/storages/fuse/src/operations/read_partitions.rs index b99fef36047c..312a57d042f8 100644 --- a/src/query/storages/fuse/src/operations/read_partitions.rs +++ b/src/query/storages/fuse/src/operations/read_partitions.rs @@ -22,11 +22,11 @@ use common_exception::Result; use common_fuse_meta::meta::BlockMeta; use common_fuse_meta::meta::Location; use common_fuse_meta::meta::TableSnapshot; -use common_legacy_planners::Extras; -use common_legacy_planners::PartInfoPtr; -use common_legacy_planners::Partitions; -use common_legacy_planners::Projection; -use common_legacy_planners::Statistics; +use common_planner::extras::Extras; +use common_planner::PartInfoPtr; +use common_planner::Partitions; +use common_planner::plans::Projection; +use common_planner::extras::Statistics; use common_meta_app::schema::TableInfo; use opendal::Operator; use tracing::debug; diff --git a/src/query/storages/fuse/src/operations/recluster.rs b/src/query/storages/fuse/src/operations/recluster.rs index 6c9ff232fc05..59a2f334f7b3 100644 --- a/src/query/storages/fuse/src/operations/recluster.rs +++ b/src/query/storages/fuse/src/operations/recluster.rs @@ -20,9 +20,9 @@ use common_catalog::table_context::TableContext; use common_datablocks::SortColumnDescription; use common_exception::Result; use common_fuse_meta::meta::BlockMeta; -use common_legacy_planners::Extras; -use common_legacy_planners::ReadDataSourcePlan; -use common_legacy_planners::SourceInfo; +use common_planner::extras::Extras; +use common_planner::ReadDataSourcePlan; +use common_planner::SourceInfo; use common_pipeline_core::Pipeline; use common_pipeline_transforms::processors::transforms::SortMergeCompactor; use common_pipeline_transforms::processors::transforms::TransformCompact; diff --git a/src/query/storages/fuse/src/pruning/pruning_executor.rs b/src/query/storages/fuse/src/pruning/pruning_executor.rs index 4a103d616bfa..d25e3eb79399 100644 --- a/src/query/storages/fuse/src/pruning/pruning_executor.rs +++ b/src/query/storages/fuse/src/pruning/pruning_executor.rs @@ -27,7 +27,7 @@ use common_exception::Result; use common_fuse_meta::meta::BlockMeta; use common_fuse_meta::meta::Location; use common_fuse_meta::meta::SegmentInfo; -use common_legacy_planners::Extras; +use common_planner::extras::Extras; use futures::future; use opendal::Operator; use tracing::warn; diff --git a/src/query/storages/fuse/src/table_functions/clustering_informations/clustering_information_table.rs b/src/query/storages/fuse/src/table_functions/clustering_informations/clustering_information_table.rs index f55e9c608a7f..04741db8bcf6 100644 --- a/src/query/storages/fuse/src/table_functions/clustering_informations/clustering_information_table.rs +++ b/src/query/storages/fuse/src/table_functions/clustering_informations/clustering_information_table.rs @@ -19,10 +19,10 @@ use common_catalog::catalog::CATALOG_DEFAULT; use common_datablocks::DataBlock; use common_exception::Result; use common_legacy_expression::LegacyExpression; -use common_legacy_planners::Extras; -use common_legacy_planners::Partitions; -use common_legacy_planners::ReadDataSourcePlan; -use common_legacy_planners::Statistics; +use common_planner::extras::Extras; +use common_planner::Partitions; +use common_planner::ReadDataSourcePlan; +use common_planner::extras::Statistics; use common_meta_app::schema::TableIdent; use common_meta_app::schema::TableInfo; use common_meta_app::schema::TableMeta; diff --git a/src/query/storages/fuse/src/table_functions/fuse_blocks/fuse_block_table.rs b/src/query/storages/fuse/src/table_functions/fuse_blocks/fuse_block_table.rs index 56e5e8fc6742..041d7451e19c 100644 --- a/src/query/storages/fuse/src/table_functions/fuse_blocks/fuse_block_table.rs +++ b/src/query/storages/fuse/src/table_functions/fuse_blocks/fuse_block_table.rs @@ -19,10 +19,10 @@ use common_catalog::catalog::CATALOG_DEFAULT; use common_datablocks::DataBlock; use common_exception::Result; use common_legacy_expression::LegacyExpression; -use common_legacy_planners::Extras; -use common_legacy_planners::Partitions; -use common_legacy_planners::ReadDataSourcePlan; -use common_legacy_planners::Statistics; +use common_planner::extras::Extras; +use common_planner::Partitions; +use common_planner::ReadDataSourcePlan; +use common_planner::extras::Statistics; use common_meta_app::schema::TableIdent; use common_meta_app::schema::TableInfo; use common_meta_app::schema::TableMeta; diff --git a/src/query/storages/fuse/src/table_functions/fuse_segments/fuse_segment_table.rs b/src/query/storages/fuse/src/table_functions/fuse_segments/fuse_segment_table.rs index 9357266ae8b7..c2bc27489600 100644 --- a/src/query/storages/fuse/src/table_functions/fuse_segments/fuse_segment_table.rs +++ b/src/query/storages/fuse/src/table_functions/fuse_segments/fuse_segment_table.rs @@ -20,10 +20,10 @@ use common_datablocks::DataBlock; use common_datavalues::DataValue; use common_exception::Result; use common_legacy_expression::LegacyExpression; -use common_legacy_planners::Extras; -use common_legacy_planners::Partitions; -use common_legacy_planners::ReadDataSourcePlan; -use common_legacy_planners::Statistics; +use common_planner::extras::Extras; +use common_planner::Partitions; +use common_planner::ReadDataSourcePlan; +use common_planner::extras::Statistics; use common_meta_app::schema::TableIdent; use common_meta_app::schema::TableInfo; use common_meta_app::schema::TableMeta; diff --git a/src/query/storages/fuse/src/table_functions/fuse_snapshots/fuse_snapshot_table.rs b/src/query/storages/fuse/src/table_functions/fuse_snapshots/fuse_snapshot_table.rs index d170690a0795..0a6a9775f247 100644 --- a/src/query/storages/fuse/src/table_functions/fuse_snapshots/fuse_snapshot_table.rs +++ b/src/query/storages/fuse/src/table_functions/fuse_snapshots/fuse_snapshot_table.rs @@ -19,10 +19,10 @@ use common_catalog::catalog::CATALOG_DEFAULT; use common_datablocks::DataBlock; use common_exception::Result; use common_legacy_expression::LegacyExpression; -use common_legacy_planners::Extras; -use common_legacy_planners::Partitions; -use common_legacy_planners::ReadDataSourcePlan; -use common_legacy_planners::Statistics; +use common_planner::extras::Extras; +use common_planner::Partitions; +use common_planner::ReadDataSourcePlan; +use common_planner::extras::Statistics; use common_meta_app::schema::TableIdent; use common_meta_app::schema::TableInfo; use common_meta_app::schema::TableMeta; diff --git a/src/query/storages/hive/Cargo.toml b/src/query/storages/hive/Cargo.toml index 95359d140b23..9cb9d428203f 100644 --- a/src/query/storages/hive/Cargo.toml +++ b/src/query/storages/hive/Cargo.toml @@ -20,12 +20,11 @@ common-datavalues = { path = "../../datavalues" } common-exception = { path = "../../../common/exception" } common-fuse-meta = { path = "../fuse-meta" } common-hive-meta-store = { path = "../hive-meta-store" } -common-legacy-expression = { path = "../../legacy-expression" } -common-legacy-planners = { path = "../../legacy-planners" } common-meta-app = { path = "../../../meta/app" } common-meta-types = { path = "../../../meta/types" } common-pipeline-core = { path = "../../pipeline/core" } common-pipeline-sources = { path = "../../pipeline/sources" } +common-planner = { path = "../../planner" } common-storage = { path = "../../../common/storage" } common-storages-cache = { path = "../cache" } common-storages-index = { path = "../index" } diff --git a/src/query/storages/hive/src/hive_file_splitter.rs b/src/query/storages/hive/src/hive_file_splitter.rs index e2b8ac520106..cd6bf2ad71ea 100644 --- a/src/query/storages/hive/src/hive_file_splitter.rs +++ b/src/query/storages/hive/src/hive_file_splitter.rs @@ -15,7 +15,7 @@ use std::ops::Range; use std::sync::Arc; -use common_legacy_planners::PartInfo; +use common_planner::PartInfo; use crate::HiveFileInfo; use crate::HivePartInfo; diff --git a/src/query/storages/hive/src/hive_partition.rs b/src/query/storages/hive/src/hive_partition.rs index 62fe7002a4d6..98cb23ccb8ef 100644 --- a/src/query/storages/hive/src/hive_partition.rs +++ b/src/query/storages/hive/src/hive_partition.rs @@ -19,8 +19,8 @@ use std::sync::Arc; use common_exception::ErrorCode; use common_exception::Result; -use common_legacy_planners::PartInfo; -use common_legacy_planners::PartInfoPtr; +use common_planner::PartInfo; +use common_planner::PartInfoPtr; #[derive(serde::Serialize, serde::Deserialize, PartialEq, Eq, Debug, Clone)] pub struct HivePartInfo { diff --git a/src/query/storages/hive/src/hive_table.rs b/src/query/storages/hive/src/hive_table.rs index 9eaecbe9e129..84eadbf13140 100644 --- a/src/query/storages/hive/src/hive_table.rs +++ b/src/query/storages/hive/src/hive_table.rs @@ -30,11 +30,11 @@ use common_exception::ErrorCode; use common_exception::Result; use common_legacy_expression::LegacyExpression; use common_legacy_expression::RequireColumnsVisitor; -use common_legacy_planners::Extras; -use common_legacy_planners::Partitions; -use common_legacy_planners::Projection; -use common_legacy_planners::ReadDataSourcePlan; -use common_legacy_planners::Statistics; +use common_planner::extras::Extras; +use common_planner::Partitions; +use common_planner::plans::Projection; +use common_planner::ReadDataSourcePlan; +use common_planner::extras::Statistics; use common_meta_app::schema::TableInfo; use common_pipeline_core::processors::port::OutputPort; use common_pipeline_core::processors::processor::ProcessorPtr; diff --git a/src/query/storages/hive/src/hive_table_source.rs b/src/query/storages/hive/src/hive_table_source.rs index af7a1b216acd..d7de00adcea4 100644 --- a/src/query/storages/hive/src/hive_table_source.rs +++ b/src/query/storages/hive/src/hive_table_source.rs @@ -23,7 +23,7 @@ use common_catalog::table_context::TableContext; use common_datablocks::DataBlock; use common_exception::ErrorCode; use common_exception::Result; -use common_legacy_planners::PartInfoPtr; +use common_planner::PartInfoPtr; use common_pipeline_core::processors::port::OutputPort; use common_pipeline_core::processors::processor::Event; use common_pipeline_core::processors::processor::ProcessorPtr; diff --git a/src/query/storages/index/Cargo.toml b/src/query/storages/index/Cargo.toml index b6f2ce5980cc..327c90962e3d 100644 --- a/src/query/storages/index/Cargo.toml +++ b/src/query/storages/index/Cargo.toml @@ -21,8 +21,9 @@ common-datavalues = { path = "../../datavalues" } common-exception = { path = "../../../common/exception" } common-functions = { path = "../../functions" } common-fuse-meta = { path = "../fuse-meta" } -common-legacy-expression = { path = "../../legacy-expression" } common-pipeline-transforms = { path = "../../pipeline/transforms" } +common-planner = { path = "../../planner" } +common-sql = { path = "../../sql" } xorfilter-rs = { git = "https://github.com/datafuse-extras/xorfilter", features = [ "cbordata", diff --git a/src/query/storages/index/src/bloom.rs b/src/query/storages/index/src/bloom.rs index d5ec23350430..e65757801417 100644 --- a/src/query/storages/index/src/bloom.rs +++ b/src/query/storages/index/src/bloom.rs @@ -18,7 +18,7 @@ use common_datablocks::DataBlock; use common_datavalues::prelude::*; use common_exception::ErrorCode; use common_exception::Result; -use common_legacy_expression::LegacyExpression; +use common_planner::PhysicalScalar; use crate::filters::Filter; use crate::filters::FilterBuilder; @@ -179,7 +179,7 @@ impl BlockFilter { /// Returns false when the expression must be false, otherwise true. /// The 'true' doesn't really mean the expression is true, but 'maybe true'. /// That is to say, you still need the load all data and run the execution. - pub fn maybe_true(&self, expr: &LegacyExpression) -> Result { + pub fn maybe_true(&self, expr: &PhysicalScalar) -> Result { Ok(self.eval(expr)? != FilterEvalResult::False) } @@ -189,17 +189,19 @@ impl BlockFilter { /// /// Otherwise return either Maybe or NotApplicable. #[tracing::instrument(level = "debug", name = "block_filter_index_eval", skip_all)] - pub fn eval(&self, expr: &LegacyExpression) -> Result { + pub fn eval(&self, expr: &PhysicalScalar) -> Result { // TODO: support multiple columns and other ops like 'in' ... match expr { - LegacyExpression::BinaryExpression { left, op, right } => { - match op.to_lowercase().as_str() { - "=" => self.eval_equivalent_expression(left, right), - "and" => self.eval_logical_and(left, right), - "or" => self.eval_logical_or(left, right), - _ => Ok(FilterEvalResult::NotApplicable), - } - } + PhysicalScalar::Function { + name, + args, + return_type, + } if args.len() == 2 => match name.to_lowercase().as_str() { + "=" => self.eval_equivalent_expression(&args[0], &args[1]), + "and" => self.eval_logical_and(&args[0], &args[1]), + "or" => self.eval_logical_or(&args[0], &args[1]), + _ => Ok(FilterEvalResult::NotApplicable), + }, _ => Ok(FilterEvalResult::NotApplicable), } } @@ -207,16 +209,16 @@ impl BlockFilter { // Evaluate the equivalent expression like "name='Alice'" fn eval_equivalent_expression( &self, - left: &LegacyExpression, - right: &LegacyExpression, + left: &PhysicalScalar, + right: &PhysicalScalar, ) -> Result { let schema: &DataSchemaRef = &self.source_schema; // For now only support single column like "name = 'Alice'" match (left, right) { // match the expression of 'column_name = literal constant' - (LegacyExpression::Column(column), LegacyExpression::Literal { value, .. }) - | (LegacyExpression::Literal { value, .. }, LegacyExpression::Column(column)) => { + (PhysicalScalar::Column(column), PhysicalScalar::Literal { value, .. }) + | (PhysicalScalar::Literal { value, .. }, PhysicalScalar::Column(column)) => { // find the corresponding column from source table match schema.column_with_name(column) { Some((_index, data_field)) => { @@ -244,8 +246,8 @@ impl BlockFilter { // Evaluate the logical and expression fn eval_logical_and( &self, - left: &LegacyExpression, - right: &LegacyExpression, + left: &PhysicalScalar, + right: &PhysicalScalar, ) -> Result { let left_result = self.eval(left)?; if left_result == FilterEvalResult::False { @@ -269,8 +271,8 @@ impl BlockFilter { // Evaluate the logical or expression fn eval_logical_or( &self, - left: &LegacyExpression, - right: &LegacyExpression, + left: &PhysicalScalar, + right: &PhysicalScalar, ) -> Result { let left_result = self.eval(left)?; let right_result = self.eval(right)?; diff --git a/src/query/storages/index/src/range_filter.rs b/src/query/storages/index/src/range_filter.rs index 8f05903cf39d..214ab10453ec 100644 --- a/src/query/storages/index/src/range_filter.rs +++ b/src/query/storages/index/src/range_filter.rs @@ -23,64 +23,51 @@ use common_datavalues::prelude::*; use common_exception::ErrorCode; use common_exception::Result; use common_functions::scalars::check_pattern_type; +use common_functions::scalars::FunctionContext; use common_functions::scalars::FunctionFactory; use common_functions::scalars::PatternType; use common_fuse_meta::meta::StatisticsOfColumns; -use common_legacy_expression::lit; -use common_legacy_expression::ExpressionMonotonicityVisitor; -use common_legacy_expression::LegacyExpression; -use common_legacy_expression::LegacyExpressions; -use common_legacy_expression::RequireColumnsVisitor; use common_pipeline_transforms::processors::transforms::ExpressionExecutor; +use common_planner::PhysicalScalar; +use common_sql::evaluator::EvalNode; +use common_sql::evaluator::Evaluator; +use common_sql::evaluator::PhysicalScalarOp; #[derive(Clone)] pub struct RangeFilter { origin: DataSchemaRef, schema: DataSchemaRef, - executor: Arc, + executor: EvalNode, stat_columns: StatColumns, + func_ctx: FunctionContext, } impl RangeFilter { pub fn try_create( ctx: Arc, - exprs: &[LegacyExpression], + exprs: &[PhysicalScalar], schema: DataSchemaRef, ) -> Result { debug_assert!(!exprs.is_empty()); let mut stat_columns: StatColumns = Vec::new(); let verifiable_expr = exprs .iter() - .fold(None, |acc: Option, expr| { + .fold(None, |acc: Option, expr| { let verifiable_expr = build_verifiable_expr(expr, &schema, &mut stat_columns); match acc { - Some(acc) => Some(acc.and(verifiable_expr)), + Some(acc) => Some(acc.and(&verifiable_expr).unwrap()), None => Some(verifiable_expr), } }) .unwrap(); - let input_fields = stat_columns - .iter() - .map(|c| c.stat_field.clone()) - .collect::>(); - let input_schema = Arc::new(DataSchema::new(input_fields)); - - let output_fields = vec![verifiable_expr.to_data_field(&input_schema)?]; - let output_schema = DataSchemaRefExt::create(output_fields); - let expr_executor = ExpressionExecutor::try_create( - ctx, - "verifiable expression executor in RangeFilter", - input_schema.clone(), - output_schema, - vec![verifiable_expr], - false, - )?; + let executor = Evaluator::eval_physical_scalar(&verifiable_expr)?; Ok(Self { origin: schema, schema: input_schema, - executor: Arc::new(expr_executor), + executor, stat_columns, + func_ctx, }) } @@ -96,9 +83,10 @@ impl RangeFilter { let const_col = ConstColumn::new(Series::from_data(vec![1u8]), 1); let dummy_columns = vec![Arc::new(const_col) as ColumnRef]; let data_block = DataBlock::create(input_schema, dummy_columns); - let executed_data_block = self.executor.execute(&data_block)?; - match executed_data_block.column(0).get(0) { + let executed_data_block = self.executor.eval(&func_ctx, &data_block)?; + + match executed_data_block.vector.get(0) { DataValue::Null => Ok(false), other => other.as_bool(), } @@ -119,9 +107,9 @@ impl RangeFilter { } } let data_block = DataBlock::create(self.schema.clone(), columns); - let executed_data_block = self.executor.execute(&data_block)?; + let executed_data_block = self.executor.eval(&self.func_ctx, &data_block)?; - match executed_data_block.column(0).get(0) { + match executed_data_block.vector.get(0) { DataValue::Null => Ok(false), other => other.as_bool(), } @@ -131,38 +119,19 @@ impl RangeFilter { /// convert expr to Verifiable Expression /// Rules: (section 5.2 of http://vldb.org/pvldb/vol14/p3083-edara.pdf) pub fn build_verifiable_expr( - expr: &LegacyExpression, + expr: &PhysicalScalar, schema: &DataSchemaRef, stat_columns: &mut StatColumns, -) -> LegacyExpression { - let unhandled = lit(true); - - let (exprs, op) = match expr { - LegacyExpression::Literal { .. } => return expr.clone(), - LegacyExpression::ScalarFunction { op, args } => try_convert_is_null(op, args.clone()), - LegacyExpression::BinaryExpression { left, op, right } => { - match op.to_lowercase().as_str() { - "and" => { - let left = build_verifiable_expr(left, schema, stat_columns); - let right = build_verifiable_expr(right, schema, stat_columns); - return left.and(right); - } - "or" => { - let left = build_verifiable_expr(left, schema, stat_columns); - let right = build_verifiable_expr(right, schema, stat_columns); - return left.or(right); - } - _ => ( - vec![left.as_ref().clone(), right.as_ref().clone()], - op.clone(), - ), - } - } - _ => return unhandled, +) -> PhysicalScalar { + let unhandled = PhysicalScalar::Constant { + value: DataValue::Boolean(true), + data_type: bool::to_data_type(), }; - VerifiableExprBuilder::try_create(exprs, op.to_lowercase().as_str(), schema, stat_columns) - .map_or(unhandled.clone(), |mut v| v.build().unwrap_or(unhandled)) + // TODO(sundy) + todo!() + // VerifiableExprBuilder::try_create(exprs, op.to_lowercase().as_str(), schema, stat_columns) + // .map_or(unhandled.clone(), |mut v| v.build().unwrap_or(unhandled)) } fn inverse_operator(op: &str) -> Result<&str> { @@ -180,22 +149,23 @@ fn inverse_operator(op: &str) -> Result<&str> { } /// Try to convert `not(is_not_null)` to `is_null`. -fn try_convert_is_null(op: &str, args: Vec) -> (Vec, String) { - // `is null` will be converted to `not(is not null)` in the parser. - // we should convert it back to `is null` here. - if op == "not" && args.len() == 1 { - if let LegacyExpression::ScalarFunction { - op: inner_op, - args: inner_args, - } = &args[0] - { - if inner_op == "is_not_null" { - return (inner_args.clone(), String::from("is_null")); - } - } - } - (args, String::from(op)) -} +// TODO(sundy) +// fn try_convert_is_null(op: &str, args: Vec) -> (Vec, String) { +// // `is null` will be converted to `not(is not null)` in the parser. +// // we should convert it back to `is null` here. +// if op == "not" && args.len() == 1 { +// if let PhysicalScalar::ScalarFunction { +// op: inner_op, +// args: inner_args, +// } = &args[0] +// { +// if inner_op == "is_not_null" { +// return (inner_args.clone(), String::from("is_null")); +// } +// } +// } +// (args, String::from(op)) +// } #[derive(Debug, Copy, Clone, PartialEq)] enum StatType { @@ -224,7 +194,7 @@ pub struct StatColumn { column_fields: ColumnFields, stat_type: StatType, stat_field: DataField, - expr: LegacyExpression, + expr: PhysicalScalar, } impl StatColumn { @@ -232,7 +202,7 @@ impl StatColumn { column_fields: ColumnFields, stat_type: StatType, field: &DataField, - expr: LegacyExpression, + expr: PhysicalScalar, ) -> Self { let column_new = format!("{}_{}", stat_type, field.name()); let data_type = if matches!(stat_type, StatType::Nulls | StatType::RowCount) { @@ -323,119 +293,36 @@ impl StatColumn { struct VerifiableExprBuilder<'a> { op: &'a str, - args: LegacyExpressions, + args: Vec, fields: Vec<(DataField, ColumnFields)>, stat_columns: &'a mut StatColumns, } impl<'a> VerifiableExprBuilder<'a> { fn try_create( - exprs: LegacyExpressions, + exprs: Vec, op: &'a str, schema: &'a DataSchemaRef, stat_columns: &'a mut StatColumns, ) -> Result { - let (args, cols, op) = match exprs.len() { - 1 => { - let cols = RequireColumnsVisitor::collect_columns_from_expr(&exprs[0])?; - match cols.len() { - 1 => (exprs, vec![cols], op), - _ => { - return Err(ErrorCode::UnknownException( - "Multi-column expressions are not currently supported", - )); - } - } - } - 2 => { - let lhs_cols = RequireColumnsVisitor::collect_columns_from_expr(&exprs[0])?; - let rhs_cols = RequireColumnsVisitor::collect_columns_from_expr(&exprs[1])?; - match (lhs_cols.len(), rhs_cols.len()) { - (0, 0) => { - return Err(ErrorCode::UnknownException( - "Constant expression donot need to be handled", - )); - } - (_, 0) => (vec![exprs[0].clone(), exprs[1].clone()], vec![lhs_cols], op), - (0, _) => { - let op = inverse_operator(op)?; - (vec![exprs[1].clone(), exprs[0].clone()], vec![rhs_cols], op) - } - _ => { - if !lhs_cols.is_disjoint(&rhs_cols) { - return Err(ErrorCode::UnknownException( - "Unsupported condition for left and right have same columns", - )); - } - - if !matches!(op, "=" | "<" | "<=" | ">" | ">=") { - return Err(ErrorCode::UnknownException(format!( - "Unsupported operator '{:?}' for multi-column expression", - op - ))); - } - - if !check_maybe_monotonic(&exprs[1])? { - return Err(ErrorCode::UnknownException( - "Only support the monotonic expression", - )); - } - - ( - vec![exprs[0].clone(), exprs[1].clone()], - vec![lhs_cols, rhs_cols], - op, - ) - } - } - } - _ => { - return Err(ErrorCode::UnknownException( - "Expressions with more than two args are not currently supported", - )); - } - }; - - if !check_maybe_monotonic(&args[0])? { - return Err(ErrorCode::UnknownException( - "Only support the monotonic expression", - )); - } - - let mut fields = Vec::with_capacity(cols.len()); - - let left_cols = get_column_fields(schema, cols[0].clone())?; - let left_field = args[0].to_data_field(schema)?; - fields.push((left_field, left_cols)); - - if cols.len() > 1 { - let right_cols = get_column_fields(schema, cols[1].clone())?; - let right_field = args[1].to_data_field(schema)?; - fields.push((right_field, right_cols)); - } - - Ok(Self { - op, - args, - fields, - stat_columns, - }) + // TODO(sundy) + todo!() } - fn build(&mut self) -> Result { + fn build(&mut self) -> Result { // TODO: support in/not in. match self.op { "is_null" => { // should_keep: col.null_count > 0 let nulls_expr = self.nulls_column_expr(0)?; - let scalar_expr = lit(0u64); - Ok(nulls_expr.gt(scalar_expr)) + let scalar_expr = PhysicalScalar::Constant { value: DataValue::UInt64(0), data_type: u64::to_data_type() }; + nulls_expr.gt(&scalar_expr) } "is_not_null" => { // should_keep: col.null_count != col.row_count let nulls_expr = self.nulls_column_expr(0)?; let row_count_expr = self.row_count_column_expr(0)?; - Ok(nulls_expr.not_eq(row_count_expr)) + nulls_expr.not_eq(&row_count_expr) } "=" => { // left = right => min_left <= max_right and max_left >= min_right @@ -453,14 +340,16 @@ impl<'a> VerifiableExprBuilder<'a> { self.max_column_expr(1)? }; - Ok(left_min.lt_eq(right_max).and(left_max.gt_eq(right_min))) + left_min + .lt_eq(&right_max)? + .and(&left_max.gt_eq(&right_min)?) } "<>" | "!=" => { let left_min = self.min_column_expr(0)?; let left_max = self.max_column_expr(0)?; - Ok(left_min - .not_eq(self.args[1].clone()) - .or(left_max.not_eq(self.args[1].clone()))) + left_min + .not_eq(&self.args[1])? + .or(&left_max.not_eq(&self.args[1])?) } ">" => { // left > right => max_left > min_right @@ -472,7 +361,7 @@ impl<'a> VerifiableExprBuilder<'a> { self.min_column_expr(1)? }; - Ok(left_max.gt(right_min)) + left_max.gt(&right_min) } ">=" => { // left >= right => max_left >= min_right @@ -484,7 +373,7 @@ impl<'a> VerifiableExprBuilder<'a> { self.min_column_expr(1)? }; - Ok(left_max.gt_eq(right_min)) + left_max.gt_eq(&right_min) } "<" => { // left < right => min_left < max_right @@ -496,7 +385,7 @@ impl<'a> VerifiableExprBuilder<'a> { self.max_column_expr(1)? }; - Ok(left_min.lt(right_max)) + left_min.lt(&right_max) } "<=" => { // left <= right => min_left <= max_right @@ -508,10 +397,10 @@ impl<'a> VerifiableExprBuilder<'a> { self.max_column_expr(1)? }; - Ok(left_min.lt_eq(right_max)) + left_min.lt_eq(&right_max) } "like" => { - if let LegacyExpression::Literal { + if let PhysicalScalar::Constant { value: DataValue::String(v), .. } = &self.args[1] @@ -519,13 +408,24 @@ impl<'a> VerifiableExprBuilder<'a> { // e.g. col like 'a%' => max_col >= 'a' and min_col < 'b' let left = left_bound_for_like_pattern(v); if !left.is_empty() { + let left_scalar = PhysicalScalar::Constant { + value: DataValue::String(left), + data_type: Vu8::to_data_type(), + }; let right = right_bound_for_like_pattern(left.clone()); + let right_scalar = PhysicalScalar::Constant { + value: DataValue::String(right), + data_type: Vu8::to_data_type(), + }; + let max_expr = self.max_column_expr(0)?; if right.is_empty() { - return Ok(max_expr.gt_eq(lit(left))); + return max_expr.gt_eq(&left_scalar); } else { let min_expr = self.min_column_expr(0)?; - return Ok(max_expr.gt_eq(lit(left)).and(min_expr.lt(lit(right)))); + return max_expr + .gt_eq(&left_scalar)? + .and(&min_expr.lt(&right_scalar)?); } } } @@ -534,7 +434,7 @@ impl<'a> VerifiableExprBuilder<'a> { )) } "not like" => { - if let LegacyExpression::Literal { + if let PhysicalScalar::Constant { value: DataValue::String(v), .. } = &self.args[1] @@ -544,25 +444,42 @@ impl<'a> VerifiableExprBuilder<'a> { // e.g. col not like 'abc' => min_col != 'abc' or max_col != 'abc' PatternType::OrdinalStr => { let const_arg = left_bound_for_like_pattern(v); + let const_arg_scalar = PhysicalScalar::Constant { + value: DataValue::String(const_arg), + data_type: Vu8::to_data_type(), + }; + let max_expr = self.max_column_expr(0)?; let min_expr = self.min_column_expr(0)?; - return Ok(min_expr - .not_eq(lit(const_arg.clone())) - .or(max_expr.not_eq(lit(const_arg)))); + + return min_expr + .not_eq(&const_arg_scalar)? + .or(&max_expr.not_eq(&const_arg_scalar)?); } // e.g. col not like 'ab%' => min_col < 'ab' or max_col >= 'ac' PatternType::EndOfPercent => { let left = left_bound_for_like_pattern(v); if !left.is_empty() { let right = right_bound_for_like_pattern(left.clone()); + + let left_scalar = PhysicalScalar::Constant { + value: DataValue::String(left), + data_type: Vu8::to_data_type(), + }; + + let right_scalar = PhysicalScalar::Constant { + value: DataValue::String(right), + data_type: Vu8::to_data_type(), + }; + let min_expr = self.min_column_expr(0)?; if right.is_empty() { - return Ok(min_expr.lt(lit(left))); + return min_expr.lt(&left_scalar); } else { let max_expr = self.max_column_expr(0)?; - return Ok(min_expr - .lt(lit(left)) - .or(max_expr.gt_eq(lit(right)))); + return min_expr + .lt(&left_scalar)? + .or(&max_expr.gt_eq(&right_scalar)?); } } } @@ -580,7 +497,7 @@ impl<'a> VerifiableExprBuilder<'a> { } } - fn stat_column_expr(&mut self, stat_type: StatType, index: usize) -> Result { + fn stat_column_expr(&mut self, stat_type: StatType, index: usize) -> Result { let (data_field, column_fields) = self.fields[index].clone(); let stat_col = StatColumn::create( column_fields, @@ -595,24 +512,27 @@ impl<'a> VerifiableExprBuilder<'a> { { self.stat_columns.push(stat_col.clone()); } - Ok(LegacyExpression::Column( - stat_col.stat_field.name().to_owned(), - )) + + Ok(PhysicalScalar::IndexedVariable { + index, + data_type: stat_col.stat_field.data_type().clone(), + display_name: stat_col.stat_field.name().to_string(), + }) } - fn min_column_expr(&mut self, index: usize) -> Result { + fn min_column_expr(&mut self, index: usize) -> Result { self.stat_column_expr(StatType::Min, index) } - fn max_column_expr(&mut self, index: usize) -> Result { + fn max_column_expr(&mut self, index: usize) -> Result { self.stat_column_expr(StatType::Max, index) } - fn nulls_column_expr(&mut self, index: usize) -> Result { + fn nulls_column_expr(&mut self, index: usize) -> Result { self.stat_column_expr(StatType::Nulls, index) } - fn row_count_column_expr(&mut self, index: usize) -> Result { + fn row_count_column_expr(&mut self, index: usize) -> Result { self.stat_column_expr(StatType::RowCount, index) } } @@ -659,7 +579,7 @@ pub fn right_bound_for_like_pattern(prefix: Vec) -> Vec { res } -fn get_maybe_monotonic(op: &str, args: LegacyExpressions) -> Result { +fn get_maybe_monotonic(op: &str, args: &Vec) -> Result { let factory = FunctionFactory::instance(); let function_features = factory.get_features(op)?; if !function_features.maybe_monotonic { @@ -674,18 +594,12 @@ fn get_maybe_monotonic(op: &str, args: LegacyExpressions) -> Result { Ok(true) } -pub fn check_maybe_monotonic(expr: &LegacyExpression) -> Result { +pub fn check_maybe_monotonic(expr: &PhysicalScalar) -> Result { match expr { - LegacyExpression::Literal { .. } => Ok(true), - LegacyExpression::Column { .. } => Ok(true), - LegacyExpression::BinaryExpression { op, left, right } => { - get_maybe_monotonic(op, vec![left.as_ref().clone(), right.as_ref().clone()]) - } - LegacyExpression::UnaryExpression { op, expr } => { - get_maybe_monotonic(op, vec![expr.as_ref().clone()]) - } - LegacyExpression::ScalarFunction { op, args } => get_maybe_monotonic(op, args.clone()), - LegacyExpression::Cast { expr, .. } => check_maybe_monotonic(expr), + PhysicalScalar::Constant { .. } => Ok(true), + PhysicalScalar::IndexedVariable { .. } => Ok(true), + PhysicalScalar::Function { name, args, .. } => get_maybe_monotonic(name, args), + PhysicalScalar::Cast { input, .. } => check_maybe_monotonic(expr), _ => Ok(false), } } diff --git a/src/query/storages/preludes/Cargo.toml b/src/query/storages/preludes/Cargo.toml index 915f20a09ae3..cd65e85a3540 100644 --- a/src/query/storages/preludes/Cargo.toml +++ b/src/query/storages/preludes/Cargo.toml @@ -18,7 +18,6 @@ common-datablocks = { path = "../../datablocks" } common-datavalues = { path = "../../datavalues" } common-exception = { path = "../../../common/exception" } common-functions = { path = "../../functions" } -common-legacy-planners = { path = "../../legacy-planners" } common-meta-app = { path = "../../../meta/app" } common-meta-types = { path = "../../../meta/types" } common-metrics = { path = "../../../common/metrics" } @@ -26,6 +25,7 @@ common-pipeline-core = { path = "../../pipeline/core" } common-pipeline-sinks = { path = "../../pipeline/sinks" } common-pipeline-sources = { path = "../../pipeline/sources" } common-pipeline-transforms = { path = "../../pipeline/transforms" } +common-planner = { path = "../../planner" } common-storage = { path = "../../../common/storage" } common-users = { path = "../../users" } diff --git a/src/query/storages/preludes/src/memory/memory_part.rs b/src/query/storages/preludes/src/memory/memory_part.rs index 4e3e66698d6c..c2ea80e5bd14 100644 --- a/src/query/storages/preludes/src/memory/memory_part.rs +++ b/src/query/storages/preludes/src/memory/memory_part.rs @@ -17,8 +17,8 @@ use std::sync::Arc; use common_exception::ErrorCode; use common_exception::Result; -use common_legacy_planners::PartInfo; -use common_legacy_planners::PartInfoPtr; +use common_planner::PartInfo; +use common_planner::PartInfoPtr; #[derive(serde::Serialize, serde::Deserialize, PartialEq, Eq)] pub struct MemoryPartInfo { diff --git a/src/query/storages/preludes/src/memory/memory_table.rs b/src/query/storages/preludes/src/memory/memory_table.rs index 152396ff0959..f4efbcc83108 100644 --- a/src/query/storages/preludes/src/memory/memory_table.rs +++ b/src/query/storages/preludes/src/memory/memory_table.rs @@ -26,11 +26,11 @@ use common_datavalues::StructColumn; use common_datavalues::TypeID; use common_exception::ErrorCode; use common_exception::Result; -use common_legacy_planners::Extras; -use common_legacy_planners::Partitions; -use common_legacy_planners::Projection; -use common_legacy_planners::ReadDataSourcePlan; -use common_legacy_planners::Statistics; +use common_planner::extras::Extras; +use common_planner::Partitions; +use common_planner::plans::Projection; +use common_planner::ReadDataSourcePlan; +use common_planner::extras::Statistics; use common_meta_app::schema::TableInfo; use common_storage::StorageMetrics; use once_cell::sync::Lazy; diff --git a/src/query/storages/preludes/src/null/null_table.rs b/src/query/storages/preludes/src/null/null_table.rs index 7f03f5bb9ae2..ab1cd5403bc6 100644 --- a/src/query/storages/preludes/src/null/null_table.rs +++ b/src/query/storages/preludes/src/null/null_table.rs @@ -18,10 +18,10 @@ use std::sync::Arc; use common_datablocks::DataBlock; use common_datavalues::DataSchemaRef; use common_exception::Result; -use common_legacy_planners::Extras; -use common_legacy_planners::Partitions; -use common_legacy_planners::ReadDataSourcePlan; -use common_legacy_planners::Statistics; +use common_planner::extras::Extras; +use common_planner::Partitions; +use common_planner::ReadDataSourcePlan; +use common_planner::extras::Statistics; use common_meta_app::schema::TableInfo; use crate::pipelines::processors::port::OutputPort; diff --git a/src/query/storages/preludes/src/random/random_parts.rs b/src/query/storages/preludes/src/random/random_parts.rs index 94c7d9288aea..45d40bba9c73 100644 --- a/src/query/storages/preludes/src/random/random_parts.rs +++ b/src/query/storages/preludes/src/random/random_parts.rs @@ -17,8 +17,8 @@ use std::sync::Arc; use common_exception::ErrorCode; use common_exception::Result; -use common_legacy_planners::PartInfo; -use common_legacy_planners::PartInfoPtr; +use common_planner::PartInfo; +use common_planner::PartInfoPtr; #[derive(serde::Serialize, serde::Deserialize, PartialEq, Eq)] pub struct RandomPartInfo { diff --git a/src/query/storages/preludes/src/random/random_table.rs b/src/query/storages/preludes/src/random/random_table.rs index f348dd1d0e6a..5437ed6b9d75 100644 --- a/src/query/storages/preludes/src/random/random_table.rs +++ b/src/query/storages/preludes/src/random/random_table.rs @@ -19,11 +19,11 @@ use common_datablocks::DataBlock; use common_datavalues::DataSchemaRef; use common_datavalues::DataType; use common_exception::Result; -use common_legacy_planners::Extras; -use common_legacy_planners::Partitions; -use common_legacy_planners::Projection; -use common_legacy_planners::ReadDataSourcePlan; -use common_legacy_planners::Statistics; +use common_planner::extras::Extras; +use common_planner::Partitions; +use common_planner::plans::Projection; +use common_planner::ReadDataSourcePlan; +use common_planner::extras::Statistics; use common_meta_app::schema::TableInfo; use super::RandomPartInfo; diff --git a/src/query/storages/preludes/src/system/log_queue.rs b/src/query/storages/preludes/src/system/log_queue.rs index 053e73e20610..3edd5221024f 100644 --- a/src/query/storages/preludes/src/system/log_queue.rs +++ b/src/query/storages/preludes/src/system/log_queue.rs @@ -26,10 +26,10 @@ use common_datavalues::DataType; use common_datavalues::MutableColumn; use common_exception::ErrorCode; use common_exception::Result; -use common_legacy_planners::Extras; -use common_legacy_planners::Partitions; -use common_legacy_planners::ReadDataSourcePlan; -use common_legacy_planners::Statistics; +use common_planner::extras::Extras; +use common_planner::Partitions; +use common_planner::ReadDataSourcePlan; +use common_planner::extras::Statistics; use common_meta_app::schema::TableIdent; use common_meta_app::schema::TableInfo; use common_meta_app::schema::TableMeta; diff --git a/src/query/storages/preludes/src/system/one_table.rs b/src/query/storages/preludes/src/system/one_table.rs index d48edf639112..968ae4926ccb 100644 --- a/src/query/storages/preludes/src/system/one_table.rs +++ b/src/query/storages/preludes/src/system/one_table.rs @@ -17,9 +17,9 @@ use std::sync::Arc; use common_datablocks::DataBlock; use common_datavalues::prelude::*; use common_exception::Result; -use common_legacy_planners::Extras; -use common_legacy_planners::Partitions; -use common_legacy_planners::Statistics; +use common_planner::extras::Extras; +use common_planner::Partitions; +use common_planner::extras::Statistics; use common_meta_app::schema::TableIdent; use common_meta_app::schema::TableInfo; use common_meta_app::schema::TableMeta; diff --git a/src/query/storages/preludes/src/system/table.rs b/src/query/storages/preludes/src/system/table.rs index 6b89ff6dc67b..d09444e26953 100644 --- a/src/query/storages/preludes/src/system/table.rs +++ b/src/query/storages/preludes/src/system/table.rs @@ -17,11 +17,11 @@ use std::sync::Arc; use common_datablocks::DataBlock; use common_exception::Result; -use common_legacy_planners::Extras; -use common_legacy_planners::PartInfo; -use common_legacy_planners::Partitions; -use common_legacy_planners::ReadDataSourcePlan; -use common_legacy_planners::Statistics; +use common_planner::extras::Extras; +use common_planner::PartInfo; +use common_planner::Partitions; +use common_planner::ReadDataSourcePlan; +use common_planner::extras::Statistics; use common_meta_app::schema::TableInfo; use common_pipeline_sources::processors::sources::EmptySource; diff --git a/src/query/storages/preludes/src/system/tracing_table.rs b/src/query/storages/preludes/src/system/tracing_table.rs index 0d3485a79e93..ca549290ac10 100644 --- a/src/query/storages/preludes/src/system/tracing_table.rs +++ b/src/query/storages/preludes/src/system/tracing_table.rs @@ -23,10 +23,10 @@ use common_datablocks::DataBlock; use common_datavalues::prelude::*; use common_exception::ErrorCode; use common_exception::Result; -use common_legacy_planners::Extras; -use common_legacy_planners::Partitions; -use common_legacy_planners::ReadDataSourcePlan; -use common_legacy_planners::Statistics; +use common_planner::extras::Extras; +use common_planner::Partitions; +use common_planner::ReadDataSourcePlan; +use common_planner::extras::Statistics; use common_meta_app::schema::TableIdent; use common_meta_app::schema::TableInfo; use common_meta_app::schema::TableMeta; From 226b719be68ae25dd9f825ee56365ad7d9841104 Mon Sep 17 00:00:00 2001 From: baishen Date: Thu, 27 Oct 2022 23:26:49 +0800 Subject: [PATCH 03/47] merge --- Cargo.lock | 2 + src/query/catalog/src/table.rs | 12 +- src/query/planner/src/extras.rs | 8 +- src/query/planner/src/lib.rs | 7 +- src/query/planner/src/plan_read_datasource.rs | 1 - src/query/planner/src/plans/delete.rs | 2 - src/query/planner/src/plans/mod.rs | 13 +- src/query/planner/src/plans/projection.rs | 5 +- src/query/service/Cargo.toml | 1 + src/query/service/src/api/http/v1/logs.rs | 2 +- .../src/api/rpc/exchange/data_exchange.rs | 8 +- .../src/api/rpc/flight_scatter_hash.rs | 58 ++-- .../src/api/rpc/flight_scatter_hash_v2.rs | 2 +- src/query/service/src/context_function.rs | 30 +- .../interpreters/fragments/v2/fragmenter.rs | 2 +- .../src/interpreters/interpreter_copy_v2.rs | 6 +- .../src/interpreters/interpreter_delete.rs | 2 +- .../src/interpreters/interpreter_insert_v2.rs | 2 +- .../src/interpreters/interpreter_setting.rs | 2 +- .../interpreter_table_describe.rs | 2 +- .../interpreter_table_recluster.rs | 9 +- .../interpreter_table_show_create.rs | 2 +- .../service/src/pipelines/pipeline_builder.rs | 9 +- .../service/src/pipelines/processors/mod.rs | 2 - .../processors/transforms/hash_join/desc.rs | 4 +- .../transforms/hash_join/join_hash_table.rs | 2 +- .../pipelines/processors/transforms/mod.rs | 3 - src/query/service/src/procedures/procedure.rs | 7 - src/query/service/src/sessions/query_ctx.rs | 10 +- .../service/src/sessions/query_ctx_shared.rs | 23 +- .../src/table_functions/async_crash_me.rs | 15 +- .../src/table_functions/numbers_table.rs | 23 +- .../src/table_functions/sync_crash_me.rs | 13 +- .../tests/it/sql/planner/format/mod.rs | 3 +- .../service/tests/it/storages/fuse/table.rs | 4 +- .../it/storages/fuse/table_test_fixture.rs | 2 +- src/query/service/tests/it/storages/memory.rs | 2 +- .../tests/it/storages/result/result_table.rs | 2 +- .../sql/src/evaluator/physical_scalar.rs | 8 +- .../sql/src/executor/physical_plan_builder.rs | 17 +- .../factory/src/result/block_buffer.rs | 3 +- .../factory/src/result/result_table.rs | 10 +- .../factory/src/result/result_table_sink.rs | 4 +- .../factory/src/result/result_table_source.rs | 2 +- .../storages/factory/src/stage/stage_table.rs | 10 +- .../factory/src/stage/stage_table_sink.rs | 2 +- src/query/storages/fuse/Cargo.toml | 1 + src/query/storages/fuse/src/fuse_part.rs | 2 +- src/query/storages/fuse/src/fuse_table.rs | 22 +- .../storages/fuse/src/io/read/block_reader.rs | 2 +- .../storages/fuse/src/operations/append.rs | 66 +---- .../storages/fuse/src/operations/compact.rs | 4 +- .../storages/fuse/src/operations/delete.rs | 68 +---- .../src/operations/mutation/block_filter.rs | 21 +- .../fuse/src/operations/operation_log.rs | 3 +- .../storages/fuse/src/operations/read_data.rs | 267 ++++++++++++++++-- .../fuse/src/operations/read_partitions.rs | 6 +- .../storages/fuse/src/operations/recluster.rs | 9 +- src/query/storages/fuse/src/pruning/pruner.rs | 71 ++--- .../storages/fuse/src/pruning/range_pruner.rs | 4 +- .../storages/fuse/src/pruning/topn_pruner.rs | 31 +- .../fuse/src/statistics/cluster_statistics.rs | 32 ++- .../clustering_information.rs | 8 +- .../clustering_information_table.rs | 10 +- .../clustering_informations/table_args.rs | 44 +-- .../fuse_blocks/fuse_block_table.rs | 10 +- .../fuse_segments/fuse_segment_table.rs | 9 +- .../fuse_snapshots/fuse_snapshot_table.rs | 10 +- .../fuse/src/table_functions/table_args.rs | 1 - .../hive/src/hive_partition_pruner.rs | 6 +- src/query/storages/hive/src/hive_table.rs | 28 +- .../storages/hive/src/hive_table_source.rs | 2 +- src/query/storages/index/src/bloom.rs | 51 ++-- src/query/storages/index/src/range_filter.rs | 63 +++-- .../preludes/src/memory/memory_table.rs | 6 +- .../storages/preludes/src/null/null_table.rs | 4 +- .../preludes/src/random/random_table.rs | 6 +- .../storages/preludes/src/system/log_queue.rs | 8 +- .../storages/preludes/src/system/one_table.rs | 6 +- .../storages/preludes/src/system/table.rs | 6 +- .../preludes/src/system/tracing_table.rs | 8 +- 81 files changed, 657 insertions(+), 596 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 679587e6cdde..4de783068f8b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2125,6 +2125,7 @@ dependencies = [ "common-pipeline-transforms", "common-planner", "common-sharing", + "common-sql", "common-storage", "common-storages-cache", "common-storages-constants", @@ -2911,6 +2912,7 @@ dependencies = [ "tonic", "tracing", "typetag", + "unicode-segmentation", "url", "uuid", "walkdir", diff --git a/src/query/catalog/src/table.rs b/src/query/catalog/src/table.rs index a61cb97f65cf..8c660658c189 100644 --- a/src/query/catalog/src/table.rs +++ b/src/query/catalog/src/table.rs @@ -24,15 +24,15 @@ use common_datavalues::DataSchemaRef; use common_datavalues::DataValue; use common_exception::ErrorCode; use common_exception::Result; -use common_planner::PhysicalScalar; -use common_planner::plans::DeletePlan; -use common_planner::Partitions; -use common_planner::ReadDataSourcePlan; -use common_planner::extras::Statistics; -use common_planner::extras::Extras; use common_meta_app::schema::TableInfo; use common_meta_types::MetaId; use common_pipeline_core::Pipeline; +use common_planner::extras::Extras; +use common_planner::extras::Statistics; +use common_planner::plans::DeletePlan; +use common_planner::Partitions; +use common_planner::PhysicalScalar; +use common_planner::ReadDataSourcePlan; use common_storage::StorageMetrics; use crate::table::column_stats_provider_impls::DummyColumnStatisticsProvider; diff --git a/src/query/planner/src/extras.rs b/src/query/planner/src/extras.rs index 2b5a979d0734..a4c293ca209f 100644 --- a/src/query/planner/src/extras.rs +++ b/src/query/planner/src/extras.rs @@ -14,12 +14,12 @@ use std::fmt::Debug; -use common_meta_app::schema::TableInfo; use common_datavalues::prelude::*; +use common_meta_app::schema::TableInfo; use once_cell::sync::Lazy; - -use crate::{PhysicalScalar, plans::Projection}; +use crate::plans::Projection; +use crate::PhysicalScalar; #[derive(serde::Serialize, serde::Deserialize, Clone, Debug, PartialEq, Eq)] pub enum StageKind { @@ -28,7 +28,6 @@ pub enum StageKind { Merge, } - #[derive(serde::Serialize, serde::Deserialize, Clone, Debug, PartialEq)] pub struct PrewhereInfo { /// columns to be ouput be prewhere scan @@ -70,7 +69,6 @@ impl Extras { } } - #[derive(serde::Serialize, serde::Deserialize, PartialEq, Eq, Clone, Debug, Default)] pub struct Statistics { /// Total rows of the query read. diff --git a/src/query/planner/src/lib.rs b/src/query/planner/src/lib.rs index 76fdfe17fce3..c12956026675 100644 --- a/src/query/planner/src/lib.rs +++ b/src/query/planner/src/lib.rs @@ -21,17 +21,16 @@ //! After all the planners work, `Interpreter` will use `PhysicalPlan` to //! build pipelines, then our processes will produce result data blocks. -mod physical_scalar; mod partition; +mod physical_scalar; pub mod extras; -pub mod stage_table; pub mod plan_read_datasource; +pub mod stage_table; -pub use physical_scalar::*; pub use partition::*; +pub use physical_scalar::*; pub use plan_read_datasource::*; // Plan will be used publicly. pub mod plans; - diff --git a/src/query/planner/src/plan_read_datasource.rs b/src/query/planner/src/plan_read_datasource.rs index b78c05f529d1..b269dcd5aa43 100644 --- a/src/query/planner/src/plan_read_datasource.rs +++ b/src/query/planner/src/plan_read_datasource.rs @@ -23,7 +23,6 @@ use common_meta_app::schema::TableInfo; use crate::extras::Extras; use crate::extras::Statistics; - use crate::partition::Partitions; use crate::plans::Projection; use crate::stage_table::StageTableInfo; diff --git a/src/query/planner/src/plans/delete.rs b/src/query/planner/src/plans/delete.rs index b1769f633870..59d32302448a 100644 --- a/src/query/planner/src/plans/delete.rs +++ b/src/query/planner/src/plans/delete.rs @@ -50,5 +50,3 @@ impl DeletePlan { Arc::new(DataSchema::empty()) } } - - diff --git a/src/query/planner/src/plans/mod.rs b/src/query/planner/src/plans/mod.rs index acca28e892c5..3b073027efc6 100644 --- a/src/query/planner/src/plans/mod.rs +++ b/src/query/planner/src/plans/mod.rs @@ -23,6 +23,7 @@ mod create_stage; mod create_udf; mod create_user; mod create_view; +mod delete; mod describe_table; mod drop_database; mod drop_role; @@ -38,12 +39,14 @@ mod grant_role; mod kill; mod list; mod optimize_table; +mod projection; mod remove_stage; mod rename_database; mod rename_table; mod revoke_privilege; mod revoke_role; mod set_role; +mod setting; mod show_create_database; mod show_create_table; mod show_grants; @@ -51,9 +54,6 @@ mod truncate_table; mod undrop_database; mod undrop_table; mod use_database; -mod delete; -mod setting; -mod projection; pub use alter_table_cluster_key::AlterTableClusterKeyPlan; pub use alter_udf::AlterUDFPlan; @@ -66,6 +66,7 @@ pub use create_stage::CreateStagePlan; pub use create_udf::CreateUDFPlan; pub use create_user::CreateUserPlan; pub use create_view::CreateViewPlan; +pub use delete::*; pub use describe_table::DescribeTablePlan; pub use drop_database::DropDatabasePlan; pub use drop_role::DropRolePlan; @@ -82,6 +83,7 @@ pub use kill::KillPlan; pub use list::ListPlan; pub use optimize_table::OptimizeTableAction; pub use optimize_table::OptimizeTablePlan; +pub use projection::*; pub use remove_stage::RemoveStagePlan; pub use rename_database::RenameDatabaseEntity; pub use rename_database::RenameDatabasePlan; @@ -90,6 +92,7 @@ pub use rename_table::RenameTablePlan; pub use revoke_privilege::RevokePrivilegePlan; pub use revoke_role::RevokeRolePlan; pub use set_role::SetRolePlan; +pub use setting::*; pub use show_create_database::ShowCreateDatabasePlan; pub use show_create_table::ShowCreateTablePlan; pub use show_grants::ShowGrantsPlan; @@ -97,7 +100,3 @@ pub use truncate_table::TruncateTablePlan; pub use undrop_database::UndropDatabasePlan; pub use undrop_table::UndropTablePlan; pub use use_database::UseDatabasePlan; - -pub use delete::*; -pub use setting::*; -pub use projection::*; diff --git a/src/query/planner/src/plans/projection.rs b/src/query/planner/src/plans/projection.rs index b1aae3bb9d5d..deda09d8e561 100644 --- a/src/query/planner/src/plans/projection.rs +++ b/src/query/planner/src/plans/projection.rs @@ -12,7 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. -use std::{collections::BTreeMap, fmt::Formatter}; +use std::collections::BTreeMap; +use std::fmt::Formatter; use common_datavalues::DataSchema; @@ -60,4 +61,4 @@ impl core::fmt::Debug for Projection { } } } -} \ No newline at end of file +} diff --git a/src/query/service/Cargo.toml b/src/query/service/Cargo.toml index 28f15928ca53..65294434480f 100644 --- a/src/query/service/Cargo.toml +++ b/src/query/service/Cargo.toml @@ -83,6 +83,7 @@ backon = "0.2" bumpalo = "3.11.0" byteorder = "1.4.3" bytes = "1.2.1" +unicode-segmentation = "1.10.0" chrono = "0.4.22" chrono-tz = "0.6.3" futures = "0.3.24" diff --git a/src/query/service/src/api/http/v1/logs.rs b/src/query/service/src/api/http/v1/logs.rs index 44a98b65ba90..5acdbd0bd136 100644 --- a/src/query/service/src/api/http/v1/logs.rs +++ b/src/query/service/src/api/http/v1/logs.rs @@ -16,6 +16,7 @@ use std::sync::Arc; use common_exception::ErrorCode; use common_exception::Result; +use common_sql::executor::table_read_plan::ToReadDataSourcePlan; use common_streams::SendableDataBlockStream; use poem::http::StatusCode; use poem::Body; @@ -26,7 +27,6 @@ use crate::sessions::QueryContext; use crate::sessions::SessionManager; use crate::sessions::SessionType; use crate::sessions::TableContext; -use crate::storages::ToReadDataSourcePlan; use crate::stream::DataBlockStream; // read log files from cfg.log.log_dir diff --git a/src/query/service/src/api/rpc/exchange/data_exchange.rs b/src/query/service/src/api/rpc/exchange/data_exchange.rs index 38d8139233d2..e52a19bd41f6 100644 --- a/src/query/service/src/api/rpc/exchange/data_exchange.rs +++ b/src/query/service/src/api/rpc/exchange/data_exchange.rs @@ -12,9 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -use common_legacy_expression::LegacyExpression; - -use crate::sql::executor::PhysicalScalar; +use common_planner::PhysicalScalar; #[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] pub enum DataExchange { @@ -47,13 +45,13 @@ impl DataExchange { #[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] pub struct ShuffleDataExchange { pub destination_ids: Vec, - pub exchange_expression: LegacyExpression, + pub exchange_expression: PhysicalScalar, } impl ShuffleDataExchange { pub fn create( destination_ids: Vec, - exchange_expression: LegacyExpression, + exchange_expression: PhysicalScalar, ) -> DataExchange { DataExchange::ShuffleDataExchange(ShuffleDataExchange { destination_ids, diff --git a/src/query/service/src/api/rpc/flight_scatter_hash.rs b/src/query/service/src/api/rpc/flight_scatter_hash.rs index a721ae587cf2..60f398026c2e 100644 --- a/src/query/service/src/api/rpc/flight_scatter_hash.rs +++ b/src/query/service/src/api/rpc/flight_scatter_hash.rs @@ -18,16 +18,17 @@ use common_datablocks::DataBlock; use common_datavalues::prelude::*; use common_exception::ErrorCode; use common_exception::Result; -use common_legacy_expression::LegacyExpression; +use common_functions::scalars::FunctionContext; +use common_planner::PhysicalScalar; +use common_sql::evaluator::EvalNode; +use common_sql::evaluator::Evaluator; use crate::api::rpc::flight_scatter::FlightScatter; -use crate::pipelines::processors::transforms::ExpressionExecutor; use crate::sessions::QueryContext; #[derive(Clone)] pub struct HashFlightScatter { - scatter_expression_executor: Arc, - scatter_expression_name: String, + scatter_expression_executor: Arc, scattered_size: usize, } @@ -35,7 +36,7 @@ impl HashFlightScatter { pub fn try_create( ctx: Arc, schema: DataSchemaRef, - expr: Option, + expr: Option, num: usize, ) -> Result { match expr { @@ -50,10 +51,11 @@ impl HashFlightScatter { impl FlightScatter for HashFlightScatter { fn execute(&self, data_block: &DataBlock, _num: usize) -> Result> { let expression_executor = self.scatter_expression_executor.clone(); - let evaluated_data_block = expression_executor.execute(data_block)?; - let indices = evaluated_data_block.try_column_by_name(&self.scatter_expression_name)?; + let indices = expression_executor + .eval(&FunctionContext::default(), data_block)? + .vector; - let col: &PrimitiveColumn = Series::check_get(indices)?; + let col: &PrimitiveColumn = Series::check_get(&indices)?; let indices: Vec = col.iter().map(|c| *c as usize).collect(); DataBlock::scatter_block(data_block, &indices, self.scattered_size) } @@ -63,16 +65,14 @@ impl HashFlightScatter { fn try_create_impl( schema: DataSchemaRef, num: usize, - expr: LegacyExpression, + expr: PhysicalScalar, ctx: Arc, ) -> Result { let expression = Self::expr_action(num, expr); - let indices_expr_executor = Self::expr_executor(ctx, schema, &expression)?; - indices_expr_executor.validate()?; + let indices_expr_executor = Evaluator::eval_physical_scalar(&expression)?; Ok(HashFlightScatter { scatter_expression_executor: Arc::new(indices_expr_executor), - scatter_expression_name: expression.column_name(), scattered_size: num, }) } @@ -81,35 +81,17 @@ impl HashFlightScatter { DataSchemaRefExt::create(vec![DataField::new(output_name, u64::to_data_type())]) } - fn expr_executor( - ctx: Arc, - schema: DataSchemaRef, - expr: &LegacyExpression, - ) -> Result { - ExpressionExecutor::try_create( - ctx, - "indices expression in FlightScatterByHash", - schema, - Self::indices_expr_schema(&expr.column_name()), - vec![expr.clone()], - false, - ) - } - - fn expr_action(num: usize, expr: LegacyExpression) -> LegacyExpression { - LegacyExpression::ScalarFunction { - op: String::from("modulo"), + fn expr_action(num: usize, expr: PhysicalScalar) -> PhysicalScalar { + PhysicalScalar::Function { + name: String::from("modulo"), args: vec![ - LegacyExpression::Cast { - expr: Box::new(expr), - data_type: u64::to_data_type(), - pg_style: false, + PhysicalScalar::Cast { + input: Box::new(expr), + target: u64::to_data_type(), }, - LegacyExpression::create_literal_with_type( - DataValue::UInt64(num as u64), - u64::to_data_type(), - ), + PhysicalScalar::Constant { value: DataValue::UInt64(num as u64), data_type: u64::to_data_type() } ], + return_type: u64::to_data_type(), } } } diff --git a/src/query/service/src/api/rpc/flight_scatter_hash_v2.rs b/src/query/service/src/api/rpc/flight_scatter_hash_v2.rs index 9e07dc590fc6..a10e5e3a24e1 100644 --- a/src/query/service/src/api/rpc/flight_scatter_hash_v2.rs +++ b/src/query/service/src/api/rpc/flight_scatter_hash_v2.rs @@ -22,12 +22,12 @@ use common_exception::Result; use common_functions::scalars::Function; use common_functions::scalars::FunctionContext; use common_functions::scalars::FunctionFactory; +use common_planner::PhysicalScalar; use crate::api::rpc::flight_scatter::FlightScatter; use crate::sql::evaluator::EvalNode; use crate::sql::evaluator::Evaluator; use crate::sql::evaluator::TypedVector; -use crate::sql::executor::PhysicalScalar; #[derive(Clone)] pub struct HashFlightScatterV2 { diff --git a/src/query/service/src/context_function.rs b/src/query/service/src/context_function.rs index 328a17c87000..e4cac694be36 100644 --- a/src/query/service/src/context_function.rs +++ b/src/query/service/src/context_function.rs @@ -18,7 +18,6 @@ use common_datavalues::DataValue; use common_exception::ErrorCode; use common_exception::Result; use common_functions::is_builtin_function; -use common_legacy_expression::LegacyExpression; use crate::sessions::QueryContext; use crate::sessions::TableContext; @@ -28,10 +27,7 @@ pub struct ContextFunction; impl ContextFunction { // Some function args need from context // such as `SELECT database()`, the arg is ctx.get_default_db() - pub fn build_args_from_ctx( - ctx: Arc, - name: &str, - ) -> Result> { + pub fn build_args_from_ctx(ctx: Arc, name: &str) -> Result> { // Check the function is supported in common functions. if !is_builtin_function(name) { return Result::Err(ErrorCode::UnknownFunction(format!( @@ -42,28 +38,22 @@ impl ContextFunction { Ok(match name.to_lowercase().as_str() { "database" | "currentdatabase" | "current_database" => { - vec![LegacyExpression::create_literal(DataValue::String( - ctx.get_current_database().into_bytes(), - ))] + vec![DataValue::String(ctx.get_current_database().into_bytes())] } - "version" => vec![LegacyExpression::create_literal(DataValue::String( - ctx.get_fuse_version().into_bytes(), - ))], - "user" | "currentuser" | "current_user" => vec![LegacyExpression::create_literal( - DataValue::String(ctx.get_current_user()?.identity().to_string().into_bytes()), + "version" => vec![DataValue::String(ctx.get_fuse_version().into_bytes())], + "user" | "currentuser" | "current_user" => vec![DataValue::String( + ctx.get_current_user()?.identity().to_string().into_bytes(), )], - "current_role" => vec![LegacyExpression::create_literal(DataValue::String( + "current_role" => vec![DataValue::String( ctx.get_current_role() .map(|r| r.name) .unwrap_or_else(|| "".to_string()) .into_bytes(), - ))], - "connection_id" => vec![LegacyExpression::create_literal(DataValue::String( - ctx.get_connection_id().into_bytes(), - ))], - "timezone" => vec![LegacyExpression::create_literal(DataValue::String( + )], + "connection_id" => vec![DataValue::String(ctx.get_connection_id().into_bytes())], + "timezone" => vec![DataValue::String( ctx.get_settings().get_timezone()?.into_bytes(), - ))], + )], _ => vec![], }) } diff --git a/src/query/service/src/interpreters/fragments/v2/fragmenter.rs b/src/query/service/src/interpreters/fragments/v2/fragmenter.rs index eb1dfe033222..7ca105bae788 100644 --- a/src/query/service/src/interpreters/fragments/v2/fragmenter.rs +++ b/src/query/service/src/interpreters/fragments/v2/fragmenter.rs @@ -16,7 +16,7 @@ use std::sync::Arc; use common_catalog::table_context::TableContext; use common_exception::Result; -use common_sql::executor::StageKind; +use common_planner::extras::StageKind; use super::FragmentType; use super::PlanFragment; diff --git a/src/query/service/src/interpreters/interpreter_copy_v2.rs b/src/query/service/src/interpreters/interpreter_copy_v2.rs index 13ba55db56d4..96ae3e479bbf 100644 --- a/src/query/service/src/interpreters/interpreter_copy_v2.rs +++ b/src/query/service/src/interpreters/interpreter_copy_v2.rs @@ -19,13 +19,13 @@ use common_base::base::GlobalIORuntime; use common_datavalues::prelude::*; use common_exception::ErrorCode; use common_exception::Result; -use common_planner::ReadDataSourcePlan; -use common_planner::SourceInfo; -use common_planner::StageTableInfo; use common_meta_app::schema::GetTableCopiedFileReq; use common_meta_app::schema::TableCopiedFileInfo; use common_meta_app::schema::UpsertTableCopiedFileReq; use common_meta_types::UserStageInfo; +use common_planner::ReadDataSourcePlan; +use common_planner::SourceInfo; +use common_planner::stage_table::StageTableInfo; use regex::Regex; use super::append2table; diff --git a/src/query/service/src/interpreters/interpreter_delete.rs b/src/query/service/src/interpreters/interpreter_delete.rs index c590b19cb84f..e0a4eba4637d 100644 --- a/src/query/service/src/interpreters/interpreter_delete.rs +++ b/src/query/service/src/interpreters/interpreter_delete.rs @@ -16,7 +16,7 @@ use std::sync::Arc; use common_datavalues::DataSchemaRef; use common_exception::Result; -use common_planner::DeletePlan; +use common_planner::plans::DeletePlan; use crate::interpreters::Interpreter; use crate::pipelines::PipelineBuildResult; diff --git a/src/query/service/src/interpreters/interpreter_insert_v2.rs b/src/query/service/src/interpreters/interpreter_insert_v2.rs index 1411ae69454d..3277f548245e 100644 --- a/src/query/service/src/interpreters/interpreter_insert_v2.rs +++ b/src/query/service/src/interpreters/interpreter_insert_v2.rs @@ -32,7 +32,7 @@ use common_io::prelude::NestedCheckpointReader; use common_pipeline_sources::processors::sources::AsyncSource; use common_pipeline_sources::processors::sources::AsyncSourcer; use common_pipeline_transforms::processors::transforms::Transform; -use crate::Metadata; +use common_sql::Metadata; use common_sql::MetadataRef; use parking_lot::Mutex; use parking_lot::RwLock; diff --git a/src/query/service/src/interpreters/interpreter_setting.rs b/src/query/service/src/interpreters/interpreter_setting.rs index a407d8a3159f..a64ee229ce9d 100644 --- a/src/query/service/src/interpreters/interpreter_setting.rs +++ b/src/query/service/src/interpreters/interpreter_setting.rs @@ -17,7 +17,7 @@ use std::sync::Arc; use chrono_tz::Tz; use common_exception::ErrorCode; use common_exception::Result; -use common_planner::SettingPlan; +use common_planner::plans::SettingPlan; use crate::interpreters::Interpreter; use crate::pipelines::PipelineBuildResult; diff --git a/src/query/service/src/interpreters/interpreter_table_describe.rs b/src/query/service/src/interpreters/interpreter_table_describe.rs index 91742ad6b36d..5bb50d0c17b5 100644 --- a/src/query/service/src/interpreters/interpreter_table_describe.rs +++ b/src/query/service/src/interpreters/interpreter_table_describe.rs @@ -19,12 +19,12 @@ use common_datavalues::prelude::*; use common_exception::ErrorCode; use common_exception::Result; use common_planner::plans::DescribeTablePlan; +use common_planner::PhysicalScalar; use crate::interpreters::Interpreter; use crate::pipelines::PipelineBuildResult; use crate::sessions::QueryContext; use crate::sessions::TableContext; -use crate::sql::executor::PhysicalScalar; use crate::sql::Planner; use crate::storages::view::view_table::QUERY; use crate::storages::view::view_table::VIEW_ENGINE; diff --git a/src/query/service/src/interpreters/interpreter_table_recluster.rs b/src/query/service/src/interpreters/interpreter_table_recluster.rs index d1c6b6fbb11e..7ece7634453a 100644 --- a/src/query/service/src/interpreters/interpreter_table_recluster.rs +++ b/src/query/service/src/interpreters/interpreter_table_recluster.rs @@ -26,7 +26,6 @@ use crate::pipelines::Pipeline; use crate::pipelines::PipelineBuildResult; use crate::sessions::QueryContext; use crate::sessions::TableContext; -use crate::sql::executor::ExpressionBuilderWithoutRenaming; use crate::sql::plans::ReclusterTablePlan; pub struct ReclusterTableInterpreter { @@ -57,12 +56,8 @@ impl Interpreter for ReclusterTableInterpreter { let extras = match &plan.push_downs { None => None, Some(scalar) => { - let eb = ExpressionBuilderWithoutRenaming::create(plan.metadata.clone()); - let pred_expr = eb.build(scalar)?; - Some(Extras { - filters: vec![pred_expr], - ..Extras::default() - }) + // todo(sundy) + todo!() } }; loop { diff --git a/src/query/service/src/interpreters/interpreter_table_show_create.rs b/src/query/service/src/interpreters/interpreter_table_show_create.rs index 3bf4ff98d338..557ce842667e 100644 --- a/src/query/service/src/interpreters/interpreter_table_show_create.rs +++ b/src/query/service/src/interpreters/interpreter_table_show_create.rs @@ -18,13 +18,13 @@ use common_datablocks::DataBlock; use common_datavalues::prelude::*; use common_exception::Result; use common_planner::plans::ShowCreateTablePlan; +use common_planner::PhysicalScalar; use tracing::debug; use crate::interpreters::Interpreter; use crate::pipelines::PipelineBuildResult; use crate::sessions::QueryContext; use crate::sessions::TableContext; -use crate::sql::executor::PhysicalScalar; use crate::sql::is_internal_opt_key; pub struct ShowCreateTableInterpreter { diff --git a/src/query/service/src/pipelines/pipeline_builder.rs b/src/query/service/src/pipelines/pipeline_builder.rs index 6015943e93f3..624c06441d29 100644 --- a/src/query/service/src/pipelines/pipeline_builder.rs +++ b/src/query/service/src/pipelines/pipeline_builder.rs @@ -30,6 +30,8 @@ use common_functions::scalars::FunctionContext; use common_functions::scalars::FunctionFactory; use common_pipeline_core::Pipe; use common_pipeline_sinks::processors::sinks::UnionReceiveSink; +use common_planner::AggregateFunctionDesc; +use common_planner::PhysicalScalar; use crate::interpreters::fill_missing_columns; use crate::pipelines::processors::port::InputPort; @@ -61,7 +63,6 @@ use crate::sessions::QueryContext; use crate::sessions::TableContext; use crate::sql::evaluator::Evaluator; use crate::sql::executor::AggregateFinal; -use crate::sql::executor::AggregateFunctionDesc; use crate::sql::executor::AggregatePartial; use crate::sql::executor::ColumnID; use crate::sql::executor::DistributedInsertSelect; @@ -72,7 +73,6 @@ use crate::sql::executor::Filter; use crate::sql::executor::HashJoin; use crate::sql::executor::Limit; use crate::sql::executor::PhysicalPlan; -use crate::sql::executor::PhysicalScalar; use crate::sql::executor::Project; use crate::sql::executor::Sort; use crate::sql::executor::TableScan; @@ -268,10 +268,7 @@ impl PipelineBuilder { let func = FunctionFactory::instance().get("and_filters", &data_types)?; predicate = PhysicalScalar::Function { name: "and_filters".to_string(), - args: vec![ - predicate.clone() , - pred.clone() , - ], + args: vec![predicate.clone(), pred.clone()], return_type: func.return_type(), }; } diff --git a/src/query/service/src/pipelines/processors/mod.rs b/src/query/service/src/pipelines/processors/mod.rs index 40803ae3beb1..bb2d3e7ca05a 100644 --- a/src/query/service/src/pipelines/processors/mod.rs +++ b/src/query/service/src/pipelines/processors/mod.rs @@ -35,8 +35,6 @@ pub use sources::SyncSourcer; pub use transforms::AggregatorParams; pub use transforms::AggregatorTransformParams; pub use transforms::BlockCompactor; -pub use transforms::ExpressionTransform; -pub use transforms::FixedKeyHashTable; pub use transforms::HashJoinDesc; pub use transforms::HashJoinState; pub use transforms::HashTable; diff --git a/src/query/service/src/pipelines/processors/transforms/hash_join/desc.rs b/src/query/service/src/pipelines/processors/transforms/hash_join/desc.rs index 0b6ad6053bb8..ed769af2f4a0 100644 --- a/src/query/service/src/pipelines/processors/transforms/hash_join/desc.rs +++ b/src/query/service/src/pipelines/processors/transforms/hash_join/desc.rs @@ -17,7 +17,8 @@ use std::sync::Arc; use common_catalog::table_context::TableContext; use common_exception::Result; use common_functions::scalars::FunctionFactory; -use common_planner::IndexType; +use common_planner::PhysicalScalar; +use common_sql::IndexType; use parking_lot::RwLock; use crate::pipelines::processors::transforms::hash_join::row::RowPtr; @@ -25,7 +26,6 @@ use crate::sessions::QueryContext; use crate::sql::evaluator::EvalNode; use crate::sql::evaluator::Evaluator; use crate::sql::executor::HashJoin; -use crate::sql::executor::PhysicalScalar; use crate::sql::plans::JoinType; #[derive(Clone, Copy, Eq, PartialEq, Debug, Hash)] diff --git a/src/query/service/src/pipelines/processors/transforms/hash_join/join_hash_table.rs b/src/query/service/src/pipelines/processors/transforms/hash_join/join_hash_table.rs index 8ec6a05c3570..d420fcd05978 100644 --- a/src/query/service/src/pipelines/processors/transforms/hash_join/join_hash_table.rs +++ b/src/query/service/src/pipelines/processors/transforms/hash_join/join_hash_table.rs @@ -31,6 +31,7 @@ use common_exception::Result; use common_hashtable::HashMap; use common_hashtable::HashtableKeyable; use common_hashtable::UnsizedHashMap; +use common_planner::PhysicalScalar; use parking_lot::RwLock; use primitive_types::U256; use primitive_types::U512; @@ -43,7 +44,6 @@ use crate::pipelines::processors::transforms::hash_join::util::build_schema_wrap use crate::pipelines::processors::transforms::hash_join::util::probe_schema_wrap_nullable; use crate::sessions::QueryContext; use crate::sessions::TableContext; -use crate::sql::executor::PhysicalScalar; use crate::sql::planner::plans::JoinType; pub struct SerializerHashTable { diff --git a/src/query/service/src/pipelines/processors/transforms/mod.rs b/src/query/service/src/pipelines/processors/transforms/mod.rs index 8984c51724e1..c584a4fd171c 100644 --- a/src/query/service/src/pipelines/processors/transforms/mod.rs +++ b/src/query/service/src/pipelines/processors/transforms/mod.rs @@ -36,10 +36,8 @@ pub use chunk_operator::CompoundChunkOperator; use common_pipeline_transforms::processors::transforms::transform; use common_pipeline_transforms::processors::transforms::transform_block_compact; use common_pipeline_transforms::processors::transforms::transform_compact; -use common_pipeline_transforms::processors::transforms::transform_expression; use common_pipeline_transforms::processors::transforms::transform_sort_merge; use common_pipeline_transforms::processors::transforms::transform_sort_partial; -pub use common_pipeline_transforms::processors::ExpressionExecutor; pub use hash_join::FixedKeyHashTable; pub use hash_join::HashJoinDesc; pub use hash_join::HashJoinState; @@ -56,7 +54,6 @@ pub use transform_compact::TransformCompact; pub use transform_create_sets::SubqueryReceiver; pub use transform_create_sets::TransformCreateSets; pub use transform_dummy::TransformDummy; -pub use transform_expression::ExpressionTransform; pub use transform_hash_join::SinkBuildHashTable; pub use transform_hash_join::TransformHashJoinProbe; pub use transform_limit::TransformLimit; diff --git a/src/query/service/src/procedures/procedure.rs b/src/query/service/src/procedures/procedure.rs index ee5f91ef946a..daf89c9040f3 100644 --- a/src/query/service/src/procedures/procedure.rs +++ b/src/query/service/src/procedures/procedure.rs @@ -18,7 +18,6 @@ use common_datablocks::DataBlock; use common_datavalues::DataSchema; use common_exception::ErrorCode; use common_exception::Result; -use common_legacy_expression::validate_function_arg; use common_pipeline_core::processors::port::OutputPort; use common_pipeline_core::Pipe; use common_pipeline_core::Pipeline; @@ -39,12 +38,6 @@ pub trait Procedure: Sync + Send { fn validate(&self, ctx: Arc, args: &[String]) -> Result<()> { let features = self.features(); - validate_function_arg( - self.name(), - args.len(), - features.variadic_arguments, - features.num_arguments, - )?; if features.management_mode_required && !ctx.get_config().query.management_mode { return Err(ErrorCode::ManagementModePermissionDenied(format!( "Access denied: '{}' only used in management-mode", diff --git a/src/query/service/src/sessions/query_ctx.rs b/src/query/service/src/sessions/query_ctx.rs index b113cb04d303..a2a13f00b465 100644 --- a/src/query/service/src/sessions/query_ctx.rs +++ b/src/query/service/src/sessions/query_ctx.rs @@ -30,16 +30,18 @@ use common_base::base::TrySpawn; use common_config::Config; use common_config::DATABEND_COMMIT_VERSION; use common_datablocks::DataBlock; +use common_datavalues::DataValue; use common_exception::ErrorCode; use common_exception::Result; use common_functions::scalars::FunctionContext; use common_io::prelude::FormatSettings; -use common_legacy_expression::LegacyExpression; +use common_meta_app::schema::TableInfo; +use common_meta_types::UserInfo; use common_planner::PartInfoPtr; use common_planner::Partitions; use common_planner::ReadDataSourcePlan; use common_planner::SourceInfo; -use common_planner::StageTableInfo; +use common_planner::stage_table::StageTableInfo; use common_meta_app::schema::TableInfo; use common_meta_types::RoleInfo; use common_meta_types::UserInfo; @@ -93,7 +95,7 @@ impl QueryContext { &self, catalog_name: &str, table_info: &TableInfo, - table_args: Option>, + table_args: Option>, ) -> Result> { let catalog = self.get_catalog(catalog_name)?; if table_args.is_none() { @@ -112,7 +114,7 @@ impl QueryContext { &self, _catalog: &str, table_info: &StageTableInfo, - _table_args: Option>, + _table_args: Option>, ) -> Result> { StageTable::try_create(table_info.clone()) } diff --git a/src/query/service/src/sessions/query_ctx_shared.rs b/src/query/service/src/sessions/query_ctx_shared.rs index 098032b27c53..29db7af06324 100644 --- a/src/query/service/src/sessions/query_ctx_shared.rs +++ b/src/query/service/src/sessions/query_ctx_shared.rs @@ -43,7 +43,6 @@ use crate::servers::http::v1::HttpQueryHandle; use crate::sessions::query_affect::QueryAffect; use crate::sessions::Session; use crate::sessions::Settings; -use crate::sql::SQLCommon; use crate::storages::Table; type DatabaseAndTable = (String, String, String); @@ -274,7 +273,7 @@ impl QueryContextShared { pub fn attach_query_str(&self, kind: String, query: &str) { { let mut running_query = self.running_query.write(); - *running_query = Some(SQLCommon::short_sql(query)); + *running_query = Some(short_sql(query)); } { @@ -336,3 +335,23 @@ impl QueryContextShared { self.created_time } } + +pub fn short_sql(query: &str) -> String { + use unicode_segmentation::UnicodeSegmentation; + let query = query.trim_start(); + if query.len() >= 64 && query[..6].eq_ignore_ascii_case("INSERT") { + // keep first 64 graphemes + String::from_utf8( + query + .graphemes(true) + .take(64) + .flat_map(|g| g.as_bytes().iter()) + .copied() // copied converts &u8 into u8 + .chain(b"...".iter().copied()) + .collect::>(), + ) + .unwrap() // by construction, this cannot panic as we extracted unicode grapheme + } else { + query.to_string() + } +} diff --git a/src/query/service/src/table_functions/async_crash_me.rs b/src/query/service/src/table_functions/async_crash_me.rs index 0815e0226cb7..a14b38b16551 100644 --- a/src/query/service/src/table_functions/async_crash_me.rs +++ b/src/query/service/src/table_functions/async_crash_me.rs @@ -25,14 +25,13 @@ use common_datavalues::chrono::TimeZone; use common_datavalues::chrono::Utc; use common_datavalues::prelude::*; use common_exception::Result; -use common_legacy_expression::LegacyExpression; -use common_planner::extras::Extras; -use common_planner::Partitions; -use common_planner::ReadDataSourcePlan; -use common_planner::extras::Statistics; use common_meta_app::schema::TableIdent; use common_meta_app::schema::TableInfo; use common_meta_app::schema::TableMeta; +use common_planner::extras::Extras; +use common_planner::extras::Statistics; +use common_planner::Partitions; +use common_planner::ReadDataSourcePlan; use futures::Stream; use crate::pipelines::processors::port::OutputPort; @@ -61,9 +60,7 @@ impl AsyncCrashMeTable { if let Some(args) = &table_args { if args.len() == 1 { let arg = &args[0]; - if let LegacyExpression::Literal { value, .. } = arg { - panic_message = Some(String::from_utf8(value.as_string()?)?); - } + panic_message = Some(String::from_utf8(arg.as_string()?)?); } } @@ -114,7 +111,7 @@ impl Table for AsyncCrashMeTable { } fn table_args(&self) -> Option> { - Some(vec![LegacyExpression::create_literal(DataValue::UInt64(0))]) + Some(vec![DataValue::UInt64(0)]) } fn read_data( diff --git a/src/query/service/src/table_functions/numbers_table.rs b/src/query/service/src/table_functions/numbers_table.rs index c3008ed585a6..ee957d36f5bd 100644 --- a/src/query/service/src/table_functions/numbers_table.rs +++ b/src/query/service/src/table_functions/numbers_table.rs @@ -24,15 +24,14 @@ use common_datavalues::chrono::Utc; use common_datavalues::prelude::*; use common_exception::ErrorCode; use common_exception::Result; -use common_legacy_expression::LegacyExpression; +use common_meta_app::schema::TableIdent; +use common_meta_app::schema::TableInfo; +use common_meta_app::schema::TableMeta; use common_planner::extras::Extras; +use common_planner::extras::Statistics; use common_planner::PartInfoPtr; use common_planner::Partitions; use common_planner::ReadDataSourcePlan; -use common_planner::extras::Statistics; -use common_meta_app::schema::TableIdent; -use common_meta_app::schema::TableInfo; -use common_meta_app::schema::TableMeta; use crate::pipelines::processors::port::OutputPort; use crate::pipelines::processors::processor::ProcessorPtr; @@ -65,7 +64,7 @@ impl NumbersTable { if let Some(args) = &table_args { if args.len() == 1 { let arg = &args[0]; - total = Some(value.as_u64()?); + total = Some(arg.as_u64()?); } } @@ -129,22 +128,14 @@ impl Table for NumbersTable { let mut limit = None; if let Some(extras) = &push_downs { - if extras.limit.is_some() && extras.filters.is_empty() { - let sort_descriptions_result = - get_sort_descriptions(&self.table_info.schema(), &extras.order_by); - + if extras.limit.is_some() && extras.filters.is_empty() && extras.order_by.is_empty() { // It is allowed to have an error when we can't get sort columns from the expression. For // example 'select number from numbers(10) order by number+4 limit 10', the column 'number+4' // doesn't exist in the numbers table. // For case like that, we ignore the error and don't apply any optimization. // No order by case - match sort_descriptions_result { - Ok(v) if v.is_empty() => { - limit = extras.limit; - } - _ => {} - } + limit = extras.limit; } } let total = match limit { diff --git a/src/query/service/src/table_functions/sync_crash_me.rs b/src/query/service/src/table_functions/sync_crash_me.rs index 6b4197496bc3..f02ce69904b0 100644 --- a/src/query/service/src/table_functions/sync_crash_me.rs +++ b/src/query/service/src/table_functions/sync_crash_me.rs @@ -24,14 +24,13 @@ use common_datavalues::chrono::TimeZone; use common_datavalues::chrono::Utc; use common_datavalues::prelude::*; use common_exception::Result; -use common_legacy_expression::LegacyExpression; -use common_planner::extras::Extras; -use common_planner::Partitions; -use common_planner::ReadDataSourcePlan; -use common_planner::extras::Statistics; use common_meta_app::schema::TableIdent; use common_meta_app::schema::TableInfo; use common_meta_app::schema::TableMeta; +use common_planner::extras::Extras; +use common_planner::extras::Statistics; +use common_planner::Partitions; +use common_planner::ReadDataSourcePlan; use futures::Stream; use crate::pipelines::processors::port::OutputPort; @@ -61,9 +60,7 @@ impl SyncCrashMeTable { if let Some(args) = &table_args { if args.len() == 1 { let arg = &args[0]; - if let LegacyExpression::Literal { value, .. } = arg { - panic_message = Some(String::from_utf8(value.as_string()?)?); - } + panic_message = Some(String::from_utf8(arg.as_string()?)?); } } diff --git a/src/query/service/tests/it/sql/planner/format/mod.rs b/src/query/service/tests/it/sql/planner/format/mod.rs index 767552872684..6aeef4f12bbe 100644 --- a/src/query/service/tests/it/sql/planner/format/mod.rs +++ b/src/query/service/tests/it/sql/planner/format/mod.rs @@ -20,7 +20,6 @@ use common_datavalues::DataValue; use common_meta_app::schema::TableIdent; use common_meta_app::schema::TableInfo; use common_meta_app::schema::TableMeta; -use crate::Metadata; use databend_query::sql::optimizer::SExpr; use databend_query::sql::planner::plans::JoinType; use databend_query::sql::plans::BoundColumnRef; @@ -34,6 +33,8 @@ use databend_query::sql::Visibility; use databend_query::storages::Table; use parking_lot::RwLock; +use crate::Metadata; + struct DummyTable { table_info: TableInfo, } diff --git a/src/query/service/tests/it/storages/fuse/table.rs b/src/query/service/tests/it/storages/fuse/table.rs index c2c2e1aa47f1..b1fd16685b41 100644 --- a/src/query/service/tests/it/storages/fuse/table.rs +++ b/src/query/service/tests/it/storages/fuse/table.rs @@ -17,11 +17,11 @@ use std::default::Default; use common_ast::ast::Engine; use common_base::base::tokio; use common_exception::Result; -use common_planner::ReadDataSourcePlan; -use common_planner::SourceInfo; use common_meta_app::schema::TableInfo; use common_planner::plans::AlterTableClusterKeyPlan; use common_planner::plans::DropTableClusterKeyPlan; +use common_planner::ReadDataSourcePlan; +use common_planner::SourceInfo; use databend_query::interpreters::AlterTableClusterKeyInterpreter; use databend_query::interpreters::CreateTableInterpreterV2; use databend_query::interpreters::DropTableClusterKeyInterpreter; diff --git a/src/query/service/tests/it/storages/fuse/table_test_fixture.rs b/src/query/service/tests/it/storages/fuse/table_test_fixture.rs index a3f7b0c5702a..0cac77f82b8e 100644 --- a/src/query/service/tests/it/storages/fuse/table_test_fixture.rs +++ b/src/query/service/tests/it/storages/fuse/table_test_fixture.rs @@ -22,8 +22,8 @@ use common_datablocks::DataBlock; use common_datavalues::prelude::*; use common_exception::Result; use common_legacy_expression::LegacyExpression; -use common_planner::extras::Extras; use common_meta_app::schema::DatabaseMeta; +use common_planner::extras::Extras; use common_planner::plans::CreateDatabasePlan; use common_storage::StorageFsConfig; use common_storage::StorageParams; diff --git a/src/query/service/tests/it/storages/memory.rs b/src/query/service/tests/it/storages/memory.rs index 02ed228e3dd1..b26382b71352 100644 --- a/src/query/service/tests/it/storages/memory.rs +++ b/src/query/service/tests/it/storages/memory.rs @@ -17,9 +17,9 @@ use common_datablocks::assert_blocks_sorted_eq; use common_datablocks::DataBlock; use common_datavalues::prelude::*; use common_exception::Result; -use common_planner::*; use common_meta_app::schema::TableInfo; use common_meta_app::schema::TableMeta; +use common_planner::*; use databend_query::sessions::TableContext; use databend_query::sql::plans::create_table_v2::TableOptions; use databend_query::storages::memory::MemoryTable; diff --git a/src/query/service/tests/it/storages/result/result_table.rs b/src/query/service/tests/it/storages/result/result_table.rs index 4defda991951..5cf1c64c04a6 100644 --- a/src/query/service/tests/it/storages/result/result_table.rs +++ b/src/query/service/tests/it/storages/result/result_table.rs @@ -23,9 +23,9 @@ use common_datavalues::DataField; use common_datavalues::DataSchema; use common_datavalues::ToDataType; use common_exception::Result; +use common_meta_types::UserIdentity; use common_planner::ReadDataSourcePlan; use common_planner::SourceInfo; -use common_meta_types::UserIdentity; use databend_query::sessions::TableContext; use databend_query::storages::result::ResultQueryInfo; use databend_query::storages::result::ResultTable; diff --git a/src/query/sql/src/evaluator/physical_scalar.rs b/src/query/sql/src/evaluator/physical_scalar.rs index 2300eac38b82..2612c841049b 100644 --- a/src/query/sql/src/evaluator/physical_scalar.rs +++ b/src/query/sql/src/evaluator/physical_scalar.rs @@ -40,11 +40,10 @@ pub trait PhysicalScalarOp { self.binary_op("=", other) } - fn not_eq(&self, other: &Self) -> Result { + fn not_eq(&self, other: &Self) -> Result { self.binary_op("!=", other) } - fn gt_eq(&self, other: &Self) -> Result { self.binary_op(">=", other) } @@ -69,10 +68,7 @@ impl PhysicalScalarOp for PhysicalScalar { Ok(PhysicalScalar::Function { name: name.to_owned(), - args: vec![ - self.clone(), - other.clone() - ], + args: vec![self.clone(), other.clone()], return_type: func.return_type(), }) } diff --git a/src/query/sql/src/executor/physical_plan_builder.rs b/src/query/sql/src/executor/physical_plan_builder.rs index 7f62a71cfe06..a932f013a047 100644 --- a/src/query/sql/src/executor/physical_plan_builder.rs +++ b/src/query/sql/src/executor/physical_plan_builder.rs @@ -626,26 +626,17 @@ impl<'a> PhysicalScalarBuilder<'a> { }), Scalar::AndExpr(and) => Ok(PhysicalScalar::Function { name: "and".to_string(), - args: vec![ - self.build(&and.left)?, - self.build(&and.right)? - ], + args: vec![self.build(&and.left)?, self.build(&and.right)?], return_type: and.data_type(), }), Scalar::OrExpr(or) => Ok(PhysicalScalar::Function { name: "or".to_string(), - args: vec![ - self.build(&or.left)? , - self.build(&or.right)? - ], + args: vec![self.build(&or.left)?, self.build(&or.right)?], return_type: or.data_type(), }), Scalar::ComparisonExpr(comp) => Ok(PhysicalScalar::Function { name: comp.op.to_func_name(), - args: vec![ - self.build(&comp.left)? , - self.build(&comp.right)? - ], + args: vec![self.build(&comp.left)?, self.build(&comp.right)?], return_type: comp.data_type(), }), Scalar::FunctionCall(func) => Ok(PhysicalScalar::Function { @@ -654,7 +645,7 @@ impl<'a> PhysicalScalarBuilder<'a> { .arguments .iter() .zip(func.arg_types.iter()) - .map(|(arg, typ)| Ok(self.build(arg)?)) + .map(|(arg, _)| Ok(self.build(arg)?)) .collect::>()?, return_type: *func.return_type.clone(), }), diff --git a/src/query/storages/factory/src/result/block_buffer.rs b/src/query/storages/factory/src/result/block_buffer.rs index c9df591c0342..8dbad0e361c3 100644 --- a/src/query/storages/factory/src/result/block_buffer.rs +++ b/src/query/storages/factory/src/result/block_buffer.rs @@ -20,9 +20,8 @@ use common_base::base::tokio::sync::Notify; use common_catalog::table_context::TableContext; use common_datablocks::DataBlock; use common_exception::Result; -use common_planner::PartInfoPtr; use common_planner::plans::Projection; - +use common_planner::PartInfoPtr; use crate::fuse::io::BlockReader; use crate::result::ResultQueryInfo; diff --git a/src/query/storages/factory/src/result/result_table.rs b/src/query/storages/factory/src/result/result_table.rs index 2938be8cec82..2e33320949d3 100644 --- a/src/query/storages/factory/src/result/result_table.rs +++ b/src/query/storages/factory/src/result/result_table.rs @@ -20,16 +20,16 @@ use common_datavalues::DataSchemaRef; use common_exception::ErrorCode; use common_exception::Result; use common_fuse_meta::meta::SegmentInfo; -use common_planner::extras::Extras; -use common_planner::Partitions; -use common_planner::plans::Projection; -use common_planner::ReadDataSourcePlan; -use common_planner::extras::Statistics; use common_meta_app::schema::TableInfo; use common_meta_app::schema::TableMeta; use common_meta_types::UserIdentity; use common_pipeline_core::Pipeline; use common_pipeline_transforms::processors::transforms::TransformLimit; +use common_planner::extras::Extras; +use common_planner::extras::Statistics; +use common_planner::plans::Projection; +use common_planner::Partitions; +use common_planner::ReadDataSourcePlan; use serde::Deserialize; use serde::Serialize; diff --git a/src/query/storages/factory/src/result/result_table_sink.rs b/src/query/storages/factory/src/result/result_table_sink.rs index 36ef0cc4d5e3..495542526ff1 100644 --- a/src/query/storages/factory/src/result/result_table_sink.rs +++ b/src/query/storages/factory/src/result/result_table_sink.rs @@ -23,12 +23,12 @@ use common_exception::ErrorCode; use common_exception::Result; use common_fuse_meta::meta::SegmentInfo; use common_fuse_meta::meta::Statistics as FuseMetaStatistics; -use common_planner::PartInfoPtr; -use common_planner::plans::Projection; use common_pipeline_core::processors::port::InputPort; use common_pipeline_core::processors::processor::Event; use common_pipeline_core::processors::processor::ProcessorPtr; use common_pipeline_core::processors::Processor; +use common_planner::plans::Projection; +use common_planner::PartInfoPtr; use opendal::Operator; use crate::fuse::io::BlockReader; diff --git a/src/query/storages/factory/src/result/result_table_source.rs b/src/query/storages/factory/src/result/result_table_source.rs index 9f8836880cd3..0c6d6e198e4d 100644 --- a/src/query/storages/factory/src/result/result_table_source.rs +++ b/src/query/storages/factory/src/result/result_table_source.rs @@ -21,11 +21,11 @@ use common_catalog::table_context::TableContext; use common_datablocks::DataBlock; use common_exception::ErrorCode; use common_exception::Result; -use common_planner::PartInfoPtr; use common_pipeline_core::processors::port::OutputPort; use common_pipeline_core::processors::processor::Event; use common_pipeline_core::processors::processor::ProcessorPtr; use common_pipeline_core::processors::Processor; +use common_planner::PartInfoPtr; use crate::fuse::io::BlockReader; use crate::result::result_table_source::State::Generated; diff --git a/src/query/storages/factory/src/stage/stage_table.rs b/src/query/storages/factory/src/stage/stage_table.rs index b51e932553ef..b1f66f715b14 100644 --- a/src/query/storages/factory/src/stage/stage_table.rs +++ b/src/query/storages/factory/src/stage/stage_table.rs @@ -20,17 +20,17 @@ use common_catalog::table_context::TableContext; use common_datablocks::DataBlock; use common_exception::ErrorCode; use common_exception::Result; -use common_planner::extras::Extras; -use common_planner::Partitions; -use common_planner::ReadDataSourcePlan; -use common_planner::StageTableInfo; -use common_planner::extras::Statistics; use common_meta_app::schema::TableInfo; use common_meta_types::StageType; use common_meta_types::UserStageInfo; use common_pipeline_core::Pipeline; use common_pipeline_sources::processors::sources::input_formats::InputContext; use common_pipeline_transforms::processors::transforms::TransformLimit; +use common_planner::extras::Extras; +use common_planner::extras::Statistics; +use common_planner::stage_table::StageTableInfo; +use common_planner::Partitions; +use common_planner::ReadDataSourcePlan; use common_storage::init_operator; use opendal::layers::SubdirLayer; use opendal::Operator; diff --git a/src/query/storages/factory/src/stage/stage_table_sink.rs b/src/query/storages/factory/src/stage/stage_table_sink.rs index cd946414008e..ddfee9b52366 100644 --- a/src/query/storages/factory/src/stage/stage_table_sink.rs +++ b/src/query/storages/factory/src/stage/stage_table_sink.rs @@ -26,12 +26,12 @@ use common_exception::ErrorCode; use common_exception::Result; use common_formats::output_format::OutputFormat; use common_formats::output_format::OutputFormatType; -use common_planner::StageTableInfo; use common_pipeline_core::processors::port::InputPort; use common_pipeline_core::processors::port::OutputPort; use common_pipeline_core::processors::processor::Event; use common_pipeline_core::processors::processor::ProcessorPtr; use common_pipeline_core::processors::Processor; +use common_planner::stage_table::StageTableInfo; use opendal::Operator; use tracing::warn; diff --git a/src/query/storages/fuse/Cargo.toml b/src/query/storages/fuse/Cargo.toml index 36ca18d0558f..ebb42d248059 100644 --- a/src/query/storages/fuse/Cargo.toml +++ b/src/query/storages/fuse/Cargo.toml @@ -16,6 +16,7 @@ common-arrow = { path = "../../../common/arrow" } common-base = { path = "../../../common/base" } common-cache = { path = "../../../common/cache" } common-catalog = { path = "../../../common/../query/catalog" } +common-sql = { path = "../../../common/../query/sql" } common-datablocks = { path = "../../datablocks" } common-datavalues = { path = "../../datavalues" } common-exception = { path = "../../../common/exception" } diff --git a/src/query/storages/fuse/src/fuse_part.rs b/src/query/storages/fuse/src/fuse_part.rs index 5d5cf7f0a643..253c6888d2eb 100644 --- a/src/query/storages/fuse/src/fuse_part.rs +++ b/src/query/storages/fuse/src/fuse_part.rs @@ -22,9 +22,9 @@ use common_arrow::arrow::datatypes::Schema as ArrowSchema; use common_exception::ErrorCode; use common_exception::Result; use common_fuse_meta::meta::Compression; +use common_planner::plans::Projection; use common_planner::PartInfo; use common_planner::PartInfoPtr; -use common_planner::plans::Projection; #[derive(serde::Serialize, serde::Deserialize, PartialEq, Eq)] pub struct ColumnMeta { diff --git a/src/query/storages/fuse/src/fuse_table.rs b/src/query/storages/fuse/src/fuse_table.rs index 8c7ca2eabdb7..995d2629a62b 100644 --- a/src/query/storages/fuse/src/fuse_table.rs +++ b/src/query/storages/fuse/src/fuse_table.rs @@ -33,14 +33,13 @@ use common_fuse_meta::meta::ColumnStatistics as FuseColumnStatistics; use common_fuse_meta::meta::Statistics as FuseStatistics; use common_fuse_meta::meta::TableSnapshot; use common_fuse_meta::meta::Versioned; -use common_legacy_expression::LegacyExpression; -use common_legacy_parser::ExpressionParser; -use common_planner::DeletePlan; +use common_meta_app::schema::TableInfo; use common_planner::extras::Extras; +use common_planner::extras::Statistics; +use common_planner::plans::DeletePlan; use common_planner::Partitions; +use common_planner::PhysicalScalar; use common_planner::ReadDataSourcePlan; -use common_planner::extras::Statistics; -use common_meta_app::schema::TableInfo; use common_sharing::create_share_table_operator; use common_storage::init_operator; use common_storage::DataOperator; @@ -73,7 +72,7 @@ pub struct FuseTable { pub(crate) table_info: TableInfo, pub(crate) meta_location_generator: TableMetaLocationGenerator, - pub(crate) cluster_keys: Vec, + pub(crate) cluster_keys: Vec, pub(crate) cluster_key_meta: Option, pub(crate) read_only: bool, @@ -87,7 +86,14 @@ impl FuseTable { Ok(r) } - fn init_operator(table_info: &TableInfo) -> Result { + pub fn do_create(table_info: TableInfo, read_only: bool) -> Result> { + let storage_prefix = Self::parse_storage_prefix(&table_info)?; + let cluster_key_meta = table_info.meta.cluster_key(); + let mut cluster_keys = Vec::new(); + if let Some((_, order)) = &cluster_key_meta { + // todo(sundy) + // sync_type_checker or block_on + } let operator = match table_info.from_share { Some(ref from_share) => create_share_table_operator( ShareTableConfig::share_endpoint_address(), @@ -264,7 +270,7 @@ impl Table for FuseTable { true } - fn cluster_keys(&self) -> Vec { + fn cluster_keys(&self) -> Vec { self.cluster_keys.clone() } diff --git a/src/query/storages/fuse/src/io/read/block_reader.rs b/src/query/storages/fuse/src/io/read/block_reader.rs index a2ea04a3baf5..aa177d632f3e 100644 --- a/src/query/storages/fuse/src/io/read/block_reader.rs +++ b/src/query/storages/fuse/src/io/read/block_reader.rs @@ -34,8 +34,8 @@ use common_exception::ErrorCode; use common_exception::Result; use common_fuse_meta::meta::BlockMeta; use common_fuse_meta::meta::Compression; -use common_planner::PartInfoPtr; use common_planner::plans::Projection; +use common_planner::PartInfoPtr; use futures::AsyncReadExt; use futures::StreamExt; use futures::TryStreamExt; diff --git a/src/query/storages/fuse/src/operations/append.rs b/src/query/storages/fuse/src/operations/append.rs index 20f009964fa2..77c1c650d739 100644 --- a/src/query/storages/fuse/src/operations/append.rs +++ b/src/query/storages/fuse/src/operations/append.rs @@ -17,14 +17,12 @@ use std::sync::Arc; use common_catalog::table_context::TableContext; use common_datablocks::SortColumnDescription; +use common_datavalues::DataField; use common_datavalues::DataSchemaRefExt; use common_exception::Result; -use common_legacy_expression::LegacyExpression; use common_pipeline_core::Pipeline; -use common_pipeline_transforms::processors::transforms::ExpressionTransform; use common_pipeline_transforms::processors::transforms::TransformCompact; use common_pipeline_transforms::processors::transforms::TransformSortPartial; -use common_pipeline_transforms::processors::ExpressionExecutor; use crate::io::BlockCompactor; use crate::operations::FuseTableSink; @@ -60,7 +58,8 @@ impl FuseTable { .cluster_keys .iter() .map(|expr| SortColumnDescription { - column_name: expr.column_name(), + // todo(sundy): use index instead + column_name: expr.pretty_display(), asc: true, nulls_first: false, }) @@ -114,63 +113,8 @@ impl FuseTable { if self.cluster_keys.is_empty() { return Ok(ClusterStatsGenerator::default()); } - - let input_schema = self.table_info.schema(); - let mut merged = input_schema.fields().clone(); - - let mut cluster_key_index = Vec::with_capacity(self.cluster_keys.len()); - for expr in &self.cluster_keys { - let cname = expr.column_name(); - let index = match merged.iter().position(|x| x.name() == &cname) { - None => { - merged.push(expr.to_data_field(&input_schema)?); - merged.len() - 1 - } - Some(idx) => idx, - }; - cluster_key_index.push(index); - } - - let output_schema = DataSchemaRefExt::create(merged); - - let mut expression_executor = None; - if output_schema != input_schema { - pipeline.add_transform(|transform_input_port, transform_output_port| { - ExpressionTransform::try_create( - transform_input_port, - transform_output_port, - input_schema.clone(), - output_schema.clone(), - self.cluster_keys.clone(), - ctx.clone(), - ) - })?; - - let exprs: Vec = output_schema - .fields() - .iter() - .map(|f| LegacyExpression::Column(f.name().to_owned())) - .collect(); - - let executor = ExpressionExecutor::try_create( - ctx.clone(), - "remove unused columns", - output_schema.clone(), - input_schema.clone(), - exprs, - true, - )?; - executor.validate()?; - expression_executor = Some(executor); - } - - Ok(ClusterStatsGenerator::new( - self.cluster_key_meta.as_ref().unwrap().0, - cluster_key_index, - expression_executor, - level, - block_compactor, - )) + // todo(sundy) project transform + todo!() } pub fn get_option(&self, opt_key: &str, default: T) -> T { diff --git a/src/query/storages/fuse/src/operations/compact.rs b/src/query/storages/fuse/src/operations/compact.rs index c5da23e979b8..d2f21265c2b3 100644 --- a/src/query/storages/fuse/src/operations/compact.rs +++ b/src/query/storages/fuse/src/operations/compact.rs @@ -17,10 +17,10 @@ use std::sync::Arc; use common_catalog::table::CompactTarget; use common_exception::Result; use common_fuse_meta::meta::TableSnapshot; -use common_planner::ReadDataSourcePlan; -use common_planner::SourceInfo; use common_pipeline_core::Pipeline; use common_pipeline_transforms::processors::transforms::TransformCompact; +use common_planner::ReadDataSourcePlan; +use common_planner::SourceInfo; use super::FuseTableSink; use crate::operations::mutation::SegmentCompactMutator; diff --git a/src/query/storages/fuse/src/operations/delete.rs b/src/query/storages/fuse/src/operations/delete.rs index b8a776393f8b..83cd47b334a1 100644 --- a/src/query/storages/fuse/src/operations/delete.rs +++ b/src/query/storages/fuse/src/operations/delete.rs @@ -15,15 +15,14 @@ use std::sync::Arc; use common_catalog::table_context::TableContext; +use common_datavalues::DataField; use common_datavalues::DataSchemaRefExt; use common_exception::ErrorCode; use common_exception::Result; use common_fuse_meta::meta::TableSnapshot; -use common_legacy_expression::LegacyExpression; -use common_legacy_parser::ExpressionParser; -use common_planner::DeletePlan; use common_planner::extras::Extras; -use common_pipeline_transforms::processors::ExpressionExecutor; +use common_planner::plans::DeletePlan; +use common_planner::PhysicalScalar; use tracing::debug; use crate::operations::mutation::delete_from_block; @@ -52,12 +51,8 @@ impl FuseTable { // check if unconditional deletion if let Some(filter) = &plan.selection { - let expr = ExpressionParser::parse_exprs(filter)?; - if expr.is_empty() { - return Err(ErrorCode::IndexOutOfBounds( - "expression should be valid, but not", - )); - } + // todo(sundy): use type_checker + let expr = vec![]; self.delete_rows(ctx, &snapshot, &expr[0], plan).await } else { // deleting the whole table... just a truncate @@ -74,7 +69,7 @@ impl FuseTable { &self, ctx: Arc, snapshot: &Arc, - filter: &LegacyExpression, + filter: &PhysicalScalar, plan: &DeletePlan, ) -> Result<()> { let cluster_stats_gen = self.cluster_stats_gen(ctx.clone())?; @@ -150,54 +145,7 @@ impl FuseTable { } fn cluster_stats_gen(&self, ctx: Arc) -> Result { - if self.cluster_key_meta.is_none() { - return Ok(ClusterStatsGenerator::default()); - } - - let len = self.cluster_keys.len(); - let cluster_key_id = self.cluster_key_meta.clone().unwrap().0; - - let input_schema = self.table_info.schema(); - let input_fields = input_schema.fields().clone(); - - let mut cluster_key_index = Vec::with_capacity(len); - let mut output_fields = Vec::with_capacity(len); - let mut exists = true; - for expr in &self.cluster_keys { - output_fields.push(expr.to_data_field(&input_schema)?); - - if exists { - match input_fields - .iter() - .position(|x| x.name() == &expr.column_name()) - { - None => exists = false, - Some(idx) => cluster_key_index.push(idx), - }; - } - } - - let mut expression_executor = None; - if !exists { - cluster_key_index = (0..len).collect(); - let output_schema = DataSchemaRefExt::create(output_fields); - let executor = ExpressionExecutor::try_create( - ctx, - "expression executor for generator cluster statistics", - input_schema, - output_schema, - self.cluster_keys.clone(), - true, - )?; - expression_executor = Some(executor); - } - - Ok(ClusterStatsGenerator::new( - cluster_key_id, - cluster_key_index, - expression_executor, - 0, - self.get_block_compactor(), - )) + // todo:(sundy) + todo!() } } diff --git a/src/query/storages/fuse/src/operations/mutation/block_filter.rs b/src/query/storages/fuse/src/operations/mutation/block_filter.rs index af5970e445fd..1faa083a473e 100644 --- a/src/query/storages/fuse/src/operations/mutation/block_filter.rs +++ b/src/query/storages/fuse/src/operations/mutation/block_filter.rs @@ -22,11 +22,10 @@ use common_datavalues::DataSchemaRefExt; use common_datavalues::Series; use common_exception::Result; use common_fuse_meta::meta::BlockMeta; -use common_legacy_expression::LegacyExpression; use common_planner::plans::Projection; +use common_planner::PhysicalScalar; use crate::operations::mutation::deletion_mutator::Deletion; -use crate::pipelines::processors::transforms::ExpressionExecutor; use crate::FuseTable; pub async fn delete_from_block( @@ -34,7 +33,7 @@ pub async fn delete_from_block( block_meta: &BlockMeta, ctx: &Arc, filter_column_proj: Projection, - filter_expr: &LegacyExpression, + filter_expr: &PhysicalScalar, ) -> Result { let mut filtering_whole_block = false; @@ -62,20 +61,8 @@ pub async fn delete_from_block( let reader = table.create_block_reader(proj)?; let data_block = reader.read_with_block_meta(block_meta).await?; - let schema = table.table_info.schema(); - let expr_field = filter_expr.to_data_field(&schema)?; - let expr_schema = DataSchemaRefExt::create(vec![expr_field]); - - // get the filter - let expr_exec = ExpressionExecutor::try_create( - ctx.clone(), - "filter expression executor (delete) ", - schema.clone(), - expr_schema, - vec![filter_expr.clone()], - false, - )?; - let filter_result = expr_exec.execute(&data_block)?; + // todo(sundy) + let filter_result = DataBlock::empty(); let predicates = DataBlock::cast_to_nonull_boolean(filter_result.column(0))?; // shortcut, if predicates is const boolean (or can be cast to boolean) diff --git a/src/query/storages/fuse/src/operations/operation_log.rs b/src/query/storages/fuse/src/operations/operation_log.rs index 70609e88ea61..b826e156d9eb 100644 --- a/src/query/storages/fuse/src/operations/operation_log.rs +++ b/src/query/storages/fuse/src/operations/operation_log.rs @@ -22,6 +22,7 @@ use common_datavalues::DataSchemaRef; use common_datavalues::DataValue; use common_exception::ErrorCode; use common_fuse_meta::meta::SegmentInfo; +use common_planner::extras::SINK_SCHEMA; // currently, only support append, pub type TableOperationLog = Vec; @@ -34,7 +35,7 @@ pub struct AppendOperationLogEntry { impl AppendOperationLogEntry { pub fn schema() -> DataSchemaRef { - common_legacy_planners::SINK_SCHEMA.clone() + SINK_SCHEMA.clone() } pub fn new(segment_location: String, segment_info: Arc) -> Self { diff --git a/src/query/storages/fuse/src/operations/read_data.rs b/src/query/storages/fuse/src/operations/read_data.rs index 5e7910646669..d59c6ac46227 100644 --- a/src/query/storages/fuse/src/operations/read_data.rs +++ b/src/query/storages/fuse/src/operations/read_data.rs @@ -19,17 +19,18 @@ use common_catalog::table_context::TableContext; use common_datavalues::DataSchemaRefExt; use common_exception::ErrorCode; use common_exception::Result; -use common_planner::extras::Extras; -use common_planner::PartInfoPtr; -use common_planner::PrewhereInfo; -use common_planner::plans::Projection; -use common_planner::ReadDataSourcePlan; +use common_functions::scalars::FunctionContext; use common_pipeline_core::processors::port::OutputPort; use common_pipeline_core::processors::processor::Event; use common_pipeline_core::processors::processor::ProcessorPtr; use common_pipeline_core::processors::Processor; use common_pipeline_core::Pipeline; -use common_pipeline_transforms::processors::ExpressionExecutor; +use common_planner::extras::Extras; +use common_planner::extras::PrewhereInfo; +use common_planner::plans::Projection; +use common_planner::PartInfoPtr; +use common_planner::ReadDataSourcePlan; +use common_sql::evaluator::EvalNode; use tracing::info; use crate::fuse_lazy_part::FuseLazyPartInfo; @@ -93,24 +94,12 @@ impl FuseTable { &self, ctx: Arc, plan: &ReadDataSourcePlan, - ) -> Result>> { + ) -> Result>> { Ok(match self.prewhere_of_push_downs(&plan.push_downs) { None => Arc::new(None), Some(v) => { - let table_schema = self.table_info.schema(); - let prewhere_schema = Arc::new(v.prewhere_columns.project_schema(&table_schema)); - let expr_field = v.filter.to_data_field(&prewhere_schema)?; - let expr_schema = DataSchemaRefExt::create(vec![expr_field]); - - let executor = ExpressionExecutor::try_create( - ctx, - "filter expression executor (prewhere) ", - prewhere_schema, - expr_schema, - vec![v.filter], - false, - )?; - Arc::new(Some(executor)) + // todo(sundy) + todo!() } }) } @@ -214,3 +203,239 @@ impl FuseTable { pipeline.resize(resize_to) } } + +type DataChunks = Vec<(usize, Vec)>; + +struct PrewhereData { + data_block: DataBlock, + filter: ColumnRef, +} + +enum State { + ReadDataPrewhere(Option), + ReadDataRemain(PartInfoPtr, PrewhereData), + PrewhereFilter(PartInfoPtr, DataChunks), + Deserialize(PartInfoPtr, DataChunks, Option), + Generated(Option, DataBlock), + Finish, +} + +struct FuseTableSource { + state: State, + ctx: Arc, + scan_progress: Arc, + output: Arc, + output_reader: Arc, + + prewhere_reader: Arc, + prewhere_filter: Arc>, + remain_reader: Arc>, +} + +impl FuseTableSource { + pub fn create( + ctx: Arc, + output: Arc, + output_reader: Arc, + prewhere_reader: Arc, + prewhere_filter: Arc>, + remain_reader: Arc>, + ) -> Result { + let scan_progress = ctx.get_scan_progress(); + Ok(ProcessorPtr::create(Box::new(FuseTableSource { + ctx, + output, + scan_progress, + state: State::ReadDataPrewhere(None), + output_reader, + prewhere_reader, + prewhere_filter, + remain_reader, + }))) + } + + fn generate_one_block(&mut self, block: DataBlock) -> Result<()> { + let new_part = self.ctx.try_get_part(); + // resort and prune columns + let block = block.resort(self.output_reader.schema())?; + self.state = State::Generated(new_part, block); + Ok(()) + } + + fn generate_one_empty_block(&mut self) -> Result<()> { + let schema = self.output_reader.schema(); + let new_part = self.ctx.try_get_part(); + self.state = Generated(new_part, DataBlock::empty_with_schema(schema)); + Ok(()) + } +} + +#[async_trait::async_trait] +impl Processor for FuseTableSource { + fn name(&self) -> String { + "FuseEngineSource".to_string() + } + + fn as_any(&mut self) -> &mut dyn Any { + self + } + + fn event(&mut self) -> Result { + if matches!(self.state, State::ReadDataPrewhere(None)) { + self.state = match self.ctx.try_get_part() { + None => State::Finish, + Some(part) => State::ReadDataPrewhere(Some(part)), + } + } + + if matches!(self.state, State::Finish) { + self.output.finish(); + return Ok(Event::Finished); + } + + if self.output.is_finished() { + return Ok(Event::Finished); + } + + if !self.output.can_push() { + return Ok(Event::NeedConsume); + } + + if matches!(self.state, State::Generated(_, _)) { + if let Generated(part, data_block) = std::mem::replace(&mut self.state, State::Finish) { + self.state = match part { + None => State::Finish, + Some(part) => State::ReadDataPrewhere(Some(part)), + }; + + self.output.push_data(Ok(data_block)); + return Ok(Event::NeedConsume); + } + } + + match self.state { + State::Finish => Ok(Event::Finished), + State::ReadDataPrewhere(_) => Ok(Event::Async), + State::ReadDataRemain(_, _) => Ok(Event::Async), + State::PrewhereFilter(_, _) => Ok(Event::Sync), + State::Deserialize(_, _, _) => Ok(Event::Sync), + State::Generated(_, _) => Err(ErrorCode::LogicalError("It's a bug.")), + } + } + + fn process(&mut self) -> Result<()> { + match std::mem::replace(&mut self.state, State::Finish) { + State::Deserialize(part, chunks, prewhere_data) => { + let data_block = if let Some(PrewhereData { + data_block: mut prewhere_blocks, + filter, + }) = prewhere_data + { + let block = if chunks.is_empty() { + prewhere_blocks + } else if let Some(remain_reader) = self.remain_reader.as_ref() { + let remain_block = remain_reader.deserialize(part, chunks)?; + for (col, field) in remain_block + .columns() + .iter() + .zip(remain_block.schema().fields()) + { + prewhere_blocks = + prewhere_blocks.add_column(col.clone(), field.clone())?; + } + prewhere_blocks + } else { + return Err(ErrorCode::LogicalError("It's a bug. Need remain reader")); + }; + // the last step of prewhere + let progress_values = ProgressValues { + rows: block.num_rows(), + bytes: block.memory_size(), + }; + self.scan_progress.incr(&progress_values); + DataBlock::filter_block(block, &filter)? + } else { + let block = self.output_reader.deserialize(part, chunks)?; + let progress_values = ProgressValues { + rows: block.num_rows(), + bytes: block.memory_size(), + }; + self.scan_progress.incr(&progress_values); + + block + }; + + self.generate_one_block(data_block)?; + Ok(()) + } + State::PrewhereFilter(part, chunks) => { + // deserialize prewhere data block first + let data_block = self.prewhere_reader.deserialize(part.clone(), chunks)?; + if let Some(filter) = self.prewhere_filter.as_ref() { + // do filter + let res = filter + .eval(&FunctionContext::default(), &data_block)? + .vector; + let filter = DataBlock::cast_to_nonull_boolean(&res)?; + // shortcut, if predicates is const boolean (or can be cast to boolean) + if !DataBlock::filter_exists(&filter)? { + // all rows in this block are filtered out + // turn to read next part + let progress_values = ProgressValues { + rows: data_block.num_rows(), + bytes: data_block.memory_size(), + }; + self.scan_progress.incr(&progress_values); + self.generate_one_empty_block()?; + return Ok(()); + } + if self.remain_reader.is_none() { + // shortcut, we don't need to read remain data + let progress_values = ProgressValues { + rows: data_block.num_rows(), + bytes: data_block.memory_size(), + }; + self.scan_progress.incr(&progress_values); + let block = DataBlock::filter_block(data_block, &filter)?; + self.generate_one_block(block)?; + } else { + self.state = + State::ReadDataRemain(part, PrewhereData { data_block, filter }); + } + Ok(()) + } else { + Err(ErrorCode::LogicalError( + "It's a bug. No need to do prewhere filter", + )) + } + } + _ => Err(ErrorCode::LogicalError("It's a bug.")), + } + } + + async fn async_process(&mut self) -> Result<()> { + match std::mem::replace(&mut self.state, State::Finish) { + State::ReadDataPrewhere(Some(part)) => { + let chunks = self.prewhere_reader.read_columns_data(part.clone()).await?; + + if self.prewhere_filter.is_some() { + self.state = State::PrewhereFilter(part, chunks); + } else { + // all needed columns are read. + self.state = State::Deserialize(part, chunks, None) + } + Ok(()) + } + State::ReadDataRemain(part, prewhere_data) => { + if let Some(remain_reader) = self.remain_reader.as_ref() { + let chunks = remain_reader.read_columns_data(part.clone()).await?; + self.state = State::Deserialize(part, chunks, Some(prewhere_data)); + Ok(()) + } else { + return Err(ErrorCode::LogicalError("It's a bug. No remain reader")); + } + } + _ => Err(ErrorCode::LogicalError("It's a bug.")), + } + } +} diff --git a/src/query/storages/fuse/src/operations/read_partitions.rs b/src/query/storages/fuse/src/operations/read_partitions.rs index 312a57d042f8..e02210ac09b9 100644 --- a/src/query/storages/fuse/src/operations/read_partitions.rs +++ b/src/query/storages/fuse/src/operations/read_partitions.rs @@ -22,12 +22,12 @@ use common_exception::Result; use common_fuse_meta::meta::BlockMeta; use common_fuse_meta::meta::Location; use common_fuse_meta::meta::TableSnapshot; +use common_meta_app::schema::TableInfo; use common_planner::extras::Extras; +use common_planner::extras::Statistics; +use common_planner::plans::Projection; use common_planner::PartInfoPtr; use common_planner::Partitions; -use common_planner::plans::Projection; -use common_planner::extras::Statistics; -use common_meta_app::schema::TableInfo; use opendal::Operator; use tracing::debug; use tracing::info; diff --git a/src/query/storages/fuse/src/operations/recluster.rs b/src/query/storages/fuse/src/operations/recluster.rs index 59a2f334f7b3..132701b4b30c 100644 --- a/src/query/storages/fuse/src/operations/recluster.rs +++ b/src/query/storages/fuse/src/operations/recluster.rs @@ -20,14 +20,14 @@ use common_catalog::table_context::TableContext; use common_datablocks::SortColumnDescription; use common_exception::Result; use common_fuse_meta::meta::BlockMeta; -use common_planner::extras::Extras; -use common_planner::ReadDataSourcePlan; -use common_planner::SourceInfo; use common_pipeline_core::Pipeline; use common_pipeline_transforms::processors::transforms::SortMergeCompactor; use common_pipeline_transforms::processors::transforms::TransformCompact; use common_pipeline_transforms::processors::transforms::TransformSortMerge; use common_pipeline_transforms::processors::transforms::TransformSortPartial; +use common_planner::extras::Extras; +use common_planner::ReadDataSourcePlan; +use common_planner::SourceInfo; use crate::operations::FuseTableSink; use crate::operations::ReclusterMutator; @@ -149,11 +149,12 @@ impl FuseTable { .cluster_keys .iter() .map(|expr| SortColumnDescription { - column_name: expr.column_name(), + column_name: "_cluster_key".to_string(), asc: true, nulls_first: false, }) .collect(); + pipeline.add_transform(|transform_input_port, transform_output_port| { TransformSortPartial::try_create( transform_input_port, diff --git a/src/query/storages/fuse/src/pruning/pruner.rs b/src/query/storages/fuse/src/pruning/pruner.rs index 1913880ee593..3d49a933c161 100644 --- a/src/query/storages/fuse/src/pruning/pruner.rs +++ b/src/query/storages/fuse/src/pruning/pruner.rs @@ -19,9 +19,8 @@ use common_catalog::table_context::TableContext; use common_datavalues::DataSchemaRef; use common_exception::Result; use common_fuse_meta::meta::Location; -use common_legacy_expression::ExpressionVisitor; -use common_legacy_expression::LegacyExpression; -use common_legacy_expression::Recursion; +use common_planner::PhysicalScalar; +use common_sql::evaluator::PhysicalScalarOp; use common_storages_index::BlockFilter; use opendal::Operator; @@ -40,7 +39,7 @@ struct FilterPruner { index_columns: Vec, /// the expression that would be evaluate - filter_expression: LegacyExpression, + filter_expression: PhysicalScalar, /// the data accessor dal: Operator, @@ -53,7 +52,7 @@ impl FilterPruner { pub fn new( ctx: Arc, index_columns: Vec, - filter_expression: LegacyExpression, + filter_expression: PhysicalScalar, dal: Operator, data_schema: DataSchemaRef, ) -> Self { @@ -104,7 +103,7 @@ impl Pruner for FilterPruner { /// otherwise, a [Filter] backed pruner will be return pub fn new_filter_pruner( ctx: &Arc, - filter_exprs: Option<&[LegacyExpression]>, + filter_exprs: Option<&[PhysicalScalar]>, schema: &DataSchemaRef, dal: Operator, ) -> Result>> { @@ -115,8 +114,8 @@ pub fn new_filter_pruner( // check if there were applicable filter conditions let expr = exprs .iter() - .fold(None, |acc: Option, item| match acc { - Some(acc) => Some(acc.and(item.clone())), + .fold(None, |acc: Option, item| match acc { + Some(acc) => Some(acc.and(item).unwrap()), None => Some(item.clone()), }) .unwrap(); @@ -124,9 +123,12 @@ pub fn new_filter_pruner( let point_query_cols = columns_names_of_eq_expressions(&expr)?; if !point_query_cols.is_empty() { // convert to filter column names + + // todo(sundy) + let idx = 0; let filter_block_cols = point_query_cols .into_iter() - .map(|n| BlockFilter::build_filter_column_name(&n)) + .map(|n| BlockFilter::build_filter_column(idx)) .collect(); return Ok(Some(Arc::new(FilterPruner::new( @@ -152,7 +154,7 @@ mod util { ctx: Arc, dal: Operator, schema: &DataSchemaRef, - filter_expr: &LegacyExpression, + filter_expr: &PhysicalScalar, filter_col_names: &[String], index_location: &Location, index_length: u64, @@ -181,34 +183,33 @@ mod util { columns: HashSet, } - impl ExpressionVisitor for PointQueryVisitor { - fn pre_visit(mut self, expr: &LegacyExpression) -> Result> { - // TODO - // 1. only binary op "=" is considered, which is NOT enough - // 2. should combine this logic with Filter - match expr { - LegacyExpression::BinaryExpression { left, op, right } if op.as_str() == "=" => { - match (left.as_ref(), right.as_ref()) { - (LegacyExpression::Column(column), LegacyExpression::Literal { .. }) - | (LegacyExpression::Literal { .. }, LegacyExpression::Column(column)) => { - self.columns.insert(column.clone()); - Ok(Recursion::Stop(self)) - } - _ => Ok(Recursion::Continue(self)), - } - } - _ => Ok(Recursion::Continue(self)), - } - } - } - - pub fn columns_names_of_eq_expressions(filter_expr: &LegacyExpression) -> Result> { + // todo(sundy): + // impl ExpressionVisitor for PointQueryVisitor { + // fn pre_visit(mut self, expr: &PhysicalScalar) -> Result> { + // // TODO + // // 1. only binary op "=" is considered, which is NOT enough + // // 2. should combine this logic with Filter + // match expr { + // PhysicalScalar::BinaryExpression { left, op, right } if op.as_str() == "=" => { + // match (left.as_ref(), right.as_ref()) { + // (PhysicalScalar::Column(column), PhysicalScalar::Literal { .. }) + // | (PhysicalScalar::Literal { .. }, PhysicalScalar::Column(column)) => { + // self.columns.insert(column.clone()); + // Ok(Recursion::Stop(self)) + // } + // _ => Ok(Recursion::Continue(self)), + // } + // } + // _ => Ok(Recursion::Continue(self)), + // } + // } + // } + + pub fn columns_names_of_eq_expressions(filter_expr: &PhysicalScalar) -> Result> { let visitor = PointQueryVisitor { columns: HashSet::new(), }; - filter_expr - .accept(visitor) - .map(|r| r.columns.into_iter().collect()) + todo!() } } diff --git a/src/query/storages/fuse/src/pruning/range_pruner.rs b/src/query/storages/fuse/src/pruning/range_pruner.rs index bb191c6e1313..f5e39f9fe194 100644 --- a/src/query/storages/fuse/src/pruning/range_pruner.rs +++ b/src/query/storages/fuse/src/pruning/range_pruner.rs @@ -18,7 +18,7 @@ use common_catalog::table_context::TableContext; use common_datavalues::DataSchemaRef; use common_exception::Result; use common_fuse_meta::meta::StatisticsOfColumns; -use common_legacy_expression::LegacyExpression; +use common_planner::PhysicalScalar; use common_storages_index::RangeFilter; pub trait RangePruner { @@ -57,7 +57,7 @@ impl RangePruner for RangeFilter { pub fn new_range_pruner<'a>( ctx: &Arc, - filter_expr: Option<&'a [LegacyExpression]>, + filter_expr: Option<&'a [PhysicalScalar]>, schema: &'a DataSchemaRef, ) -> Result> { Ok(match filter_expr { diff --git a/src/query/storages/fuse/src/pruning/topn_pruner.rs b/src/query/storages/fuse/src/pruning/topn_pruner.rs index 4bea1269430a..41f6b6ce4e11 100644 --- a/src/query/storages/fuse/src/pruning/topn_pruner.rs +++ b/src/query/storages/fuse/src/pruning/topn_pruner.rs @@ -17,16 +17,20 @@ use common_exception::ErrorCode; use common_exception::Result; use common_fuse_meta::meta::BlockMeta; use common_fuse_meta::meta::ColumnStatistics; -use common_legacy_expression::LegacyExpression; +use common_planner::PhysicalScalar; pub(crate) struct TopNPrunner { schema: DataSchemaRef, - sort: Vec, + sort: Vec<(PhysicalScalar, bool, bool)>, limit: usize, } impl TopNPrunner { - pub(crate) fn new(schema: DataSchemaRef, sort: Vec, limit: usize) -> Self { + pub(crate) fn new( + schema: DataSchemaRef, + sort: Vec<(PhysicalScalar, bool, bool)>, + limit: usize, + ) -> Self { Self { schema, sort, @@ -45,26 +49,11 @@ impl TopNPrunner { return Ok(metas); } - let (sort, asc, nulls_first) = match &self.sort[0] { - LegacyExpression::Sort { - expr, - asc, - nulls_first, - .. - } => (expr, asc, nulls_first), - _ => unreachable!(), - }; - + let (sort, asc, nulls_first) = &self.sort[0]; // Currently, we only support topn on single-column sort. // TODO: support monadic + multi expression + order by cluster key sort. - let column = if let LegacyExpression::Column(c) = sort.as_ref() { - c - } else { - return Ok(metas); - }; - - let sort_idx = if let Ok(index) = self.schema.index_of(column.as_str()) { - index as u32 + let sort_idx = if let PhysicalScalar::IndexedVariable { index, .. } = sort { + *index as u32 } else { return Ok(metas); }; diff --git a/src/query/storages/fuse/src/statistics/cluster_statistics.rs b/src/query/storages/fuse/src/statistics/cluster_statistics.rs index 5c3c5f70e0bb..9472fc5be16a 100644 --- a/src/query/storages/fuse/src/statistics/cluster_statistics.rs +++ b/src/query/storages/fuse/src/statistics/cluster_statistics.rs @@ -13,10 +13,12 @@ // limitations under the License. use common_datablocks::DataBlock; +use common_datavalues::DataField; use common_datavalues::DataValue; use common_exception::Result; +use common_functions::scalars::FunctionContext; use common_fuse_meta::meta::ClusterStatistics; -use common_pipeline_transforms::processors::ExpressionExecutor; +use common_sql::evaluator::EvalNode; use crate::io::BlockCompactor; @@ -24,7 +26,7 @@ use crate::io::BlockCompactor; pub struct ClusterStatsGenerator { cluster_key_id: u32, cluster_key_index: Vec, - expression_executor: Option, + expression_executor: Option, level: i32, block_compactor: BlockCompactor, } @@ -33,7 +35,7 @@ impl ClusterStatsGenerator { pub fn new( cluster_key_id: u32, cluster_key_index: Vec, - expression_executor: Option, + expression_executor: Option, level: i32, block_compactor: BlockCompactor, ) -> Self { @@ -53,12 +55,12 @@ impl ClusterStatsGenerator { data_block: &DataBlock, ) -> Result<(Option, DataBlock)> { let cluster_stats = self.clusters_statistics(data_block, self.level)?; - let mut block = data_block.clone(); - // Remove unused columns. - if let Some(executor) = &self.expression_executor { - block = executor.execute(&block)?; + let func_ctx = FunctionContext::default(); + let column = executor.eval(&func_ctx, &block)?.vector; + let field = DataField::new("_cluster_key", column.data_type()); + block = block.add_column(column, field)?; } Ok((cluster_stats, block)) @@ -79,17 +81,19 @@ impl ClusterStatsGenerator { return Ok(None); } - let block = if let Some(executor) = &self.expression_executor { + let mut data_block = data_block.clone(); + if let Some(executor) = &self.expression_executor { // For a clustered table, data_block has been sorted, but may not contain cluster key. // So only need to get the first and the last row for execute. let indices = vec![0u32, data_block.num_rows() as u32 - 1]; - let input = DataBlock::block_take_by_indices(data_block, &indices)?; - executor.execute(&input)? - } else { - data_block.clone() - }; + let input = DataBlock::block_take_by_indices(&data_block, &indices)?; + let func_ctx = FunctionContext::default(); + let column = executor.eval(&func_ctx, &input)?.vector; + let field = DataField::new("_cluster_key", column.data_type()); + data_block = data_block.add_column(column, field)?; + } - self.clusters_statistics(&block, origin_stats.level) + self.clusters_statistics(&data_block, origin_stats.level) } fn clusters_statistics( diff --git a/src/query/storages/fuse/src/table_functions/clustering_informations/clustering_information.rs b/src/query/storages/fuse/src/table_functions/clustering_informations/clustering_information.rs index 47b7e91e28e7..b1cb3cbd9b34 100644 --- a/src/query/storages/fuse/src/table_functions/clustering_informations/clustering_information.rs +++ b/src/query/storages/fuse/src/table_functions/clustering_informations/clustering_information.rs @@ -22,7 +22,7 @@ use common_datavalues::prelude::*; use common_exception::ErrorCode; use common_exception::Result; use common_fuse_meta::meta::BlockMeta; -use common_legacy_expression::LegacyExpression; +use common_planner::PhysicalScalar; use serde_json::json; use crate::io::MetaReaders; @@ -33,7 +33,7 @@ use crate::Table; pub struct ClusteringInformation<'a> { pub ctx: Arc, pub table: &'a FuseTable, - pub cluster_keys: Vec, + pub cluster_keys: Vec, } struct ClusteringStatistics { @@ -48,7 +48,7 @@ impl<'a> ClusteringInformation<'a> { pub fn new( ctx: Arc, table: &'a FuseTable, - cluster_keys: Vec, + cluster_keys: Vec, ) -> Self { Self { ctx, @@ -75,7 +75,7 @@ impl<'a> ClusteringInformation<'a> { let names = self .cluster_keys .iter() - .map(|x| x.column_name()) + .map(|x| x.pretty_display()) .collect::>() .join(", "); let cluster_by_keys = format!("({})", names); diff --git a/src/query/storages/fuse/src/table_functions/clustering_informations/clustering_information_table.rs b/src/query/storages/fuse/src/table_functions/clustering_informations/clustering_information_table.rs index 04741db8bcf6..d5e4b006e258 100644 --- a/src/query/storages/fuse/src/table_functions/clustering_informations/clustering_information_table.rs +++ b/src/query/storages/fuse/src/table_functions/clustering_informations/clustering_information_table.rs @@ -17,15 +17,15 @@ use std::sync::Arc; use common_catalog::catalog::CATALOG_DEFAULT; use common_datablocks::DataBlock; +use common_datavalues::DataValue; use common_exception::Result; -use common_legacy_expression::LegacyExpression; -use common_planner::extras::Extras; -use common_planner::Partitions; -use common_planner::ReadDataSourcePlan; -use common_planner::extras::Statistics; use common_meta_app::schema::TableIdent; use common_meta_app::schema::TableInfo; use common_meta_app::schema::TableMeta; +use common_planner::extras::Extras; +use common_planner::extras::Statistics; +use common_planner::Partitions; +use common_planner::ReadDataSourcePlan; use super::clustering_information::ClusteringInformation; use super::table_args::get_cluster_keys; diff --git a/src/query/storages/fuse/src/table_functions/clustering_informations/table_args.rs b/src/query/storages/fuse/src/table_functions/clustering_informations/table_args.rs index 0be1a8496b5a..e9058af3f4b1 100644 --- a/src/query/storages/fuse/src/table_functions/clustering_informations/table_args.rs +++ b/src/query/storages/fuse/src/table_functions/clustering_informations/table_args.rs @@ -14,9 +14,7 @@ use common_exception::ErrorCode; use common_exception::Result; -use common_legacy_expression::validate_expression; -use common_legacy_expression::LegacyExpression; -use common_legacy_parser::ExpressionParser; +use common_planner::PhysicalScalar; use crate::table_functions::string_value; use crate::table_functions::TableArgs; @@ -38,24 +36,26 @@ pub fn parse_func_table_args(table_args: &TableArgs) -> Result<(String, String)> } } -pub fn get_cluster_keys(table: &FuseTable, definition: &str) -> Result> { - let cluster_keys = if !definition.is_empty() { - let schema = table.schema(); - let exprs = ExpressionParser::parse_exprs(definition)?; - for expr in exprs.iter() { - validate_expression(expr, &schema)?; - } - exprs - } else { - table.cluster_keys() - }; - - if cluster_keys.is_empty() { - return Err(ErrorCode::InvalidClusterKeys(format!( - "Invalid clustering keys or table {} is not clustered", - table.name() - ))); - } +pub fn get_cluster_keys(table: &FuseTable, definition: &str) -> Result> { + // todo(sundy) + todo!() + // let cluster_keys = if !definition.is_empty() { + // let schema = table.schema(); + // let exprs = ExpressionParser::parse_exprs(definition)?; + // for expr in exprs.iter() { + // validate_expression(expr, &schema)?; + // } + // exprs + // } else { + // table.cluster_keys() + // }; - Ok(cluster_keys) + // if cluster_keys.is_empty() { + // return Err(ErrorCode::InvalidClusterKeys(format!( + // "Invalid clustering keys or table {} is not clustered", + // table.name() + // ))); + // } + // + // Ok(cluster_keys) } diff --git a/src/query/storages/fuse/src/table_functions/fuse_blocks/fuse_block_table.rs b/src/query/storages/fuse/src/table_functions/fuse_blocks/fuse_block_table.rs index 041d7451e19c..c1512c4b59f2 100644 --- a/src/query/storages/fuse/src/table_functions/fuse_blocks/fuse_block_table.rs +++ b/src/query/storages/fuse/src/table_functions/fuse_blocks/fuse_block_table.rs @@ -17,15 +17,15 @@ use std::sync::Arc; use common_catalog::catalog::CATALOG_DEFAULT; use common_datablocks::DataBlock; +use common_datavalues::DataValue; use common_exception::Result; -use common_legacy_expression::LegacyExpression; -use common_planner::extras::Extras; -use common_planner::Partitions; -use common_planner::ReadDataSourcePlan; -use common_planner::extras::Statistics; use common_meta_app::schema::TableIdent; use common_meta_app::schema::TableInfo; use common_meta_app::schema::TableMeta; +use common_planner::extras::Extras; +use common_planner::extras::Statistics; +use common_planner::Partitions; +use common_planner::ReadDataSourcePlan; use super::table_args::parse_func_table_args; use crate::pipelines::processors::port::OutputPort; diff --git a/src/query/storages/fuse/src/table_functions/fuse_segments/fuse_segment_table.rs b/src/query/storages/fuse/src/table_functions/fuse_segments/fuse_segment_table.rs index c2bc27489600..ddf8f1f04fc7 100644 --- a/src/query/storages/fuse/src/table_functions/fuse_segments/fuse_segment_table.rs +++ b/src/query/storages/fuse/src/table_functions/fuse_segments/fuse_segment_table.rs @@ -19,14 +19,13 @@ use common_catalog::catalog::CATALOG_DEFAULT; use common_datablocks::DataBlock; use common_datavalues::DataValue; use common_exception::Result; -use common_legacy_expression::LegacyExpression; -use common_planner::extras::Extras; -use common_planner::Partitions; -use common_planner::ReadDataSourcePlan; -use common_planner::extras::Statistics; use common_meta_app::schema::TableIdent; use common_meta_app::schema::TableInfo; use common_meta_app::schema::TableMeta; +use common_planner::extras::Extras; +use common_planner::extras::Statistics; +use common_planner::Partitions; +use common_planner::ReadDataSourcePlan; use super::fuse_segment::FuseSegment; use super::table_args::parse_func_history_args; diff --git a/src/query/storages/fuse/src/table_functions/fuse_snapshots/fuse_snapshot_table.rs b/src/query/storages/fuse/src/table_functions/fuse_snapshots/fuse_snapshot_table.rs index 0a6a9775f247..cd6fb0bc9d44 100644 --- a/src/query/storages/fuse/src/table_functions/fuse_snapshots/fuse_snapshot_table.rs +++ b/src/query/storages/fuse/src/table_functions/fuse_snapshots/fuse_snapshot_table.rs @@ -17,18 +17,18 @@ use std::sync::Arc; use common_catalog::catalog::CATALOG_DEFAULT; use common_datablocks::DataBlock; +use common_datavalues::DataValue; use common_exception::Result; -use common_legacy_expression::LegacyExpression; -use common_planner::extras::Extras; -use common_planner::Partitions; -use common_planner::ReadDataSourcePlan; -use common_planner::extras::Statistics; use common_meta_app::schema::TableIdent; use common_meta_app::schema::TableInfo; use common_meta_app::schema::TableMeta; use common_pipeline_core::processors::processor::ProcessorPtr; use common_pipeline_sources::processors::sources::AsyncSource; use common_pipeline_sources::processors::sources::AsyncSourcer; +use common_planner::extras::Extras; +use common_planner::extras::Statistics; +use common_planner::Partitions; +use common_planner::ReadDataSourcePlan; use super::fuse_snapshot::FuseSnapshot; use super::table_args::parse_func_history_args; diff --git a/src/query/storages/fuse/src/table_functions/table_args.rs b/src/query/storages/fuse/src/table_functions/table_args.rs index 455639051321..cf1307ac0c5e 100644 --- a/src/query/storages/fuse/src/table_functions/table_args.rs +++ b/src/query/storages/fuse/src/table_functions/table_args.rs @@ -15,7 +15,6 @@ use common_datavalues::DataValue; use common_exception::ErrorCode; use common_exception::Result; -use common_legacy_expression::LegacyExpression; pub fn string_value(value: &DataValue) -> Result { String::from_utf8(value.as_string()?) diff --git a/src/query/storages/hive/src/hive_partition_pruner.rs b/src/query/storages/hive/src/hive_partition_pruner.rs index f361412f7f43..d4b8d8f85425 100644 --- a/src/query/storages/hive/src/hive_partition_pruner.rs +++ b/src/query/storages/hive/src/hive_partition_pruner.rs @@ -24,12 +24,12 @@ use common_exception::ErrorCode; use common_exception::Result; use common_fuse_meta::meta::ColumnStatistics; use common_fuse_meta::meta::StatisticsOfColumns; -use common_legacy_expression::LegacyExpression; +use common_planner::PhysicalScalar; use common_storages_index::range_filter::RangeFilter; pub struct HivePartitionPruner { pub ctx: Arc, - pub filters: Vec, + pub filters: Vec, // pub partitions: Vec, pub partition_schema: Arc, } @@ -37,7 +37,7 @@ pub struct HivePartitionPruner { impl HivePartitionPruner { pub fn create( ctx: Arc, - filters: Vec, + filters: Vec, partition_schema: Arc, ) -> Self { HivePartitionPruner { diff --git a/src/query/storages/hive/src/hive_table.rs b/src/query/storages/hive/src/hive_table.rs index 84eadbf13140..7ab1054e8f3c 100644 --- a/src/query/storages/hive/src/hive_table.rs +++ b/src/query/storages/hive/src/hive_table.rs @@ -26,15 +26,9 @@ use common_datablocks::DataBlock; use common_datavalues::DataField; use common_datavalues::DataSchema; use common_datavalues::DataSchemaRef; +use common_datavalues::DataValue; use common_exception::ErrorCode; use common_exception::Result; -use common_legacy_expression::LegacyExpression; -use common_legacy_expression::RequireColumnsVisitor; -use common_planner::extras::Extras; -use common_planner::Partitions; -use common_planner::plans::Projection; -use common_planner::ReadDataSourcePlan; -use common_planner::extras::Statistics; use common_meta_app::schema::TableInfo; use common_pipeline_core::processors::port::OutputPort; use common_pipeline_core::processors::processor::ProcessorPtr; @@ -42,6 +36,12 @@ use common_pipeline_core::Pipeline; use common_pipeline_core::SourcePipeBuilder; use common_pipeline_sources::processors::sources::sync_source::SyncSource; use common_pipeline_sources::processors::sources::sync_source::SyncSourcer; +use common_planner::extras::Extras; +use common_planner::extras::Statistics; +use common_planner::plans::Projection; +use common_planner::Partitions; +use common_planner::PhysicalScalar; +use common_planner::ReadDataSourcePlan; use common_storage::init_operator; use common_storage::DataOperator; use common_storages_index::RangeFilter; @@ -227,11 +227,13 @@ impl HiveTable { } } - fn get_columns_from_expressions(expressions: &[LegacyExpression]) -> HashSet { - expressions - .iter() - .flat_map(|e| RequireColumnsVisitor::collect_columns_from_expr(e).unwrap()) - .collect::>() + fn get_columns_from_expressions(expressions: &[PhysicalScalar]) -> HashSet { + // todo(sundy) + todo!() + // expressions + // .iter() + // .flat_map(|e| RequireColumnsVisitor::collect_columns_from_expr(e).unwrap()) + // .collect::>() } fn get_projections(&self, push_downs: &Option) -> Result> { @@ -293,7 +295,7 @@ impl HiveTable { &self, ctx: Arc, partition_keys: Vec, - filter_expressions: Vec, + filter_expressions: Vec, ) -> Result)>> { let hive_catalog = ctx.get_catalog(CATALOG_HIVE)?; let hive_catalog = hive_catalog.as_any().downcast_ref::().unwrap(); diff --git a/src/query/storages/hive/src/hive_table_source.rs b/src/query/storages/hive/src/hive_table_source.rs index d7de00adcea4..620e743292d2 100644 --- a/src/query/storages/hive/src/hive_table_source.rs +++ b/src/query/storages/hive/src/hive_table_source.rs @@ -23,11 +23,11 @@ use common_catalog::table_context::TableContext; use common_datablocks::DataBlock; use common_exception::ErrorCode; use common_exception::Result; -use common_planner::PartInfoPtr; use common_pipeline_core::processors::port::OutputPort; use common_pipeline_core::processors::processor::Event; use common_pipeline_core::processors::processor::ProcessorPtr; use common_pipeline_core::processors::Processor; +use common_planner::PartInfoPtr; use opendal::Operator; use crate::hive_parquet_block_reader::DataBlockDeserializer; diff --git a/src/query/storages/index/src/bloom.rs b/src/query/storages/index/src/bloom.rs index e65757801417..11459dbafda4 100644 --- a/src/query/storages/index/src/bloom.rs +++ b/src/query/storages/index/src/bloom.rs @@ -74,17 +74,17 @@ pub enum FilterEvalResult { impl BlockFilter { /// For every applicable column, we will create a filter. /// The filter will be stored with field name 'Bloom(column_name)' - pub fn build_filter_column_name(column_name: &str) -> String { - format!("Bloom({})", column_name) + pub fn build_filter_column(index: usize) -> String { + format!("Bloom({})", index) } pub fn build_filter_schema(data_schema: &DataSchema) -> DataSchema { let mut filter_fields = vec![]; let fields = data_schema.fields(); - for field in fields.iter() { + for (index, field) in fields.iter().enumerate() { if Xor8Filter::is_supported_type(field.data_type()) { // create field for applicable ones - let column_name = Self::build_filter_column_name(field.name()); + let column_name = Self::build_filter_column(index); let filter_field = DataField::new(&column_name, Vu8::to_data_type()); filter_fields.push(filter_field); @@ -154,11 +154,11 @@ impl BlockFilter { pub fn find( &self, - column_name: &str, + index: usize, target: DataValue, typ: &DataTypeImpl, ) -> Result { - let filter_column = Self::build_filter_column_name(column_name); + let filter_column = Self::build_filter_column(index); if !self.filter_block.schema().has_field(&filter_column) || !Xor8Filter::is_supported_type(typ) || target.is_null() @@ -217,27 +217,26 @@ impl BlockFilter { // For now only support single column like "name = 'Alice'" match (left, right) { // match the expression of 'column_name = literal constant' - (PhysicalScalar::Column(column), PhysicalScalar::Literal { value, .. }) - | (PhysicalScalar::Literal { value, .. }, PhysicalScalar::Column(column)) => { + ( + PhysicalScalar::IndexedVariable { index, .. }, + PhysicalScalar::Constant { value, .. }, + ) + | ( + PhysicalScalar::Constant { value, .. }, + PhysicalScalar::IndexedVariable { index, .. }, + ) => { // find the corresponding column from source table - match schema.column_with_name(column) { - Some((_index, data_field)) => { - let data_type = data_field.data_type(); - - // check if cast needed - let value = if &value.data_type() != data_type { - let col = value.as_const_column(data_type, 1)?; - col.get_checked(0)? - } else { - value.clone() - }; - self.find(column, value, data_type) - } - None => Err(ErrorCode::BadArguments(format!( - "Column '{}' not found in schema", - column - ))), - } + let data_field = schema.field(*index); + let data_type = data_field.data_type(); + + // check if cast needed + let value = if &value.data_type() != data_type { + let col = value.as_const_column(data_type, 1)?; + col.get_checked(0)? + } else { + value.clone() + }; + self.find(*index, value, data_type) } _ => Ok(FilterEvalResult::NotApplicable), } diff --git a/src/query/storages/index/src/range_filter.rs b/src/query/storages/index/src/range_filter.rs index 214ab10453ec..dbdd4cff1040 100644 --- a/src/query/storages/index/src/range_filter.rs +++ b/src/query/storages/index/src/range_filter.rs @@ -25,9 +25,9 @@ use common_exception::Result; use common_functions::scalars::check_pattern_type; use common_functions::scalars::FunctionContext; use common_functions::scalars::FunctionFactory; +use common_functions::scalars::Monotonicity; use common_functions::scalars::PatternType; use common_fuse_meta::meta::StatisticsOfColumns; -use common_pipeline_transforms::processors::transforms::ExpressionExecutor; use common_planner::PhysicalScalar; use common_sql::evaluator::EvalNode; use common_sql::evaluator::Evaluator; @@ -61,7 +61,15 @@ impl RangeFilter { }) .unwrap(); + let input_fields = stat_columns + .iter() + .map(|c| c.stat_field.clone()) + .collect::>(); + let input_schema = Arc::new(DataSchema::new(input_fields)); + let executor = Evaluator::eval_physical_scalar(&verifiable_expr)?; + let func_ctx = ctx.try_get_function_context()?; + Ok(Self { origin: schema, schema: input_schema, @@ -84,7 +92,7 @@ impl RangeFilter { let dummy_columns = vec![Arc::new(const_col) as ColumnRef]; let data_block = DataBlock::create(input_schema, dummy_columns); - let executed_data_block = self.executor.eval(&func_ctx, &data_block)?; + let executed_data_block = self.executor.eval(&self.func_ctx, &data_block)?; match executed_data_block.vector.get(0) { DataValue::Null => Ok(false), @@ -131,7 +139,7 @@ pub fn build_verifiable_expr( // TODO(sundy) todo!() // VerifiableExprBuilder::try_create(exprs, op.to_lowercase().as_str(), schema, stat_columns) - // .map_or(unhandled.clone(), |mut v| v.build().unwrap_or(unhandled)) + // .map_or(unhandled.clone(), |mut v| v.build().unwrap_or(unhandled)) } fn inverse_operator(op: &str) -> Result<&str> { @@ -259,15 +267,18 @@ impl StatColumn { variables.insert(v.name().clone(), (variable_left, variable_right)); } - let monotonicity = ExpressionMonotonicityVisitor::check_expression( - schema, - &self.expr, - variables, - single_point, - ); - if !monotonicity.is_monotonic { - return Ok(None); - } + // TODO: sundy + + let monotonicity = Monotonicity::default(); + // let monotonicity = ExpressionMonotonicityVisitor::check_expression( + // schema, + // &self.expr, + // variables, + // single_point, + // ); + // if !monotonicity.is_monotonic { + // return Ok(None); + // } let column_with_field_opt = match self.stat_type { StatType::Min => { @@ -315,7 +326,10 @@ impl<'a> VerifiableExprBuilder<'a> { "is_null" => { // should_keep: col.null_count > 0 let nulls_expr = self.nulls_column_expr(0)?; - let scalar_expr = PhysicalScalar::Constant { value: DataValue::UInt64(0), data_type: u64::to_data_type() }; + let scalar_expr = PhysicalScalar::Constant { + value: DataValue::UInt64(0), + data_type: u64::to_data_type(), + }; nulls_expr.gt(&scalar_expr) } "is_not_null" => { @@ -408,20 +422,22 @@ impl<'a> VerifiableExprBuilder<'a> { // e.g. col like 'a%' => max_col >= 'a' and min_col < 'b' let left = left_bound_for_like_pattern(v); if !left.is_empty() { + let right = right_bound_for_like_pattern(left.clone()); + let left_scalar = PhysicalScalar::Constant { value: DataValue::String(left), data_type: Vu8::to_data_type(), }; - let right = right_bound_for_like_pattern(left.clone()); - let right_scalar = PhysicalScalar::Constant { - value: DataValue::String(right), - data_type: Vu8::to_data_type(), - }; let max_expr = self.max_column_expr(0)?; if right.is_empty() { return max_expr.gt_eq(&left_scalar); } else { + let right_scalar = PhysicalScalar::Constant { + value: DataValue::String(right), + data_type: Vu8::to_data_type(), + }; + let min_expr = self.min_column_expr(0)?; return max_expr .gt_eq(&left_scalar)? @@ -467,15 +483,14 @@ impl<'a> VerifiableExprBuilder<'a> { data_type: Vu8::to_data_type(), }; - let right_scalar = PhysicalScalar::Constant { - value: DataValue::String(right), - data_type: Vu8::to_data_type(), - }; - let min_expr = self.min_column_expr(0)?; if right.is_empty() { return min_expr.lt(&left_scalar); } else { + let right_scalar = PhysicalScalar::Constant { + value: DataValue::String(right), + data_type: Vu8::to_data_type(), + }; let max_expr = self.max_column_expr(0)?; return min_expr .lt(&left_scalar)? @@ -599,7 +614,7 @@ pub fn check_maybe_monotonic(expr: &PhysicalScalar) -> Result { PhysicalScalar::Constant { .. } => Ok(true), PhysicalScalar::IndexedVariable { .. } => Ok(true), PhysicalScalar::Function { name, args, .. } => get_maybe_monotonic(name, args), - PhysicalScalar::Cast { input, .. } => check_maybe_monotonic(expr), + PhysicalScalar::Cast { input, .. } => check_maybe_monotonic(input.as_ref()), _ => Ok(false), } } diff --git a/src/query/storages/preludes/src/memory/memory_table.rs b/src/query/storages/preludes/src/memory/memory_table.rs index f4efbcc83108..a325e49b171b 100644 --- a/src/query/storages/preludes/src/memory/memory_table.rs +++ b/src/query/storages/preludes/src/memory/memory_table.rs @@ -26,12 +26,12 @@ use common_datavalues::StructColumn; use common_datavalues::TypeID; use common_exception::ErrorCode; use common_exception::Result; +use common_meta_app::schema::TableInfo; use common_planner::extras::Extras; -use common_planner::Partitions; +use common_planner::extras::Statistics; use common_planner::plans::Projection; +use common_planner::Partitions; use common_planner::ReadDataSourcePlan; -use common_planner::extras::Statistics; -use common_meta_app::schema::TableInfo; use common_storage::StorageMetrics; use once_cell::sync::Lazy; use parking_lot::Mutex; diff --git a/src/query/storages/preludes/src/null/null_table.rs b/src/query/storages/preludes/src/null/null_table.rs index ab1cd5403bc6..67b64ae1bf71 100644 --- a/src/query/storages/preludes/src/null/null_table.rs +++ b/src/query/storages/preludes/src/null/null_table.rs @@ -18,11 +18,11 @@ use std::sync::Arc; use common_datablocks::DataBlock; use common_datavalues::DataSchemaRef; use common_exception::Result; +use common_meta_app::schema::TableInfo; use common_planner::extras::Extras; +use common_planner::extras::Statistics; use common_planner::Partitions; use common_planner::ReadDataSourcePlan; -use common_planner::extras::Statistics; -use common_meta_app::schema::TableInfo; use crate::pipelines::processors::port::OutputPort; use crate::pipelines::processors::processor::ProcessorPtr; diff --git a/src/query/storages/preludes/src/random/random_table.rs b/src/query/storages/preludes/src/random/random_table.rs index 5437ed6b9d75..da39a62e5076 100644 --- a/src/query/storages/preludes/src/random/random_table.rs +++ b/src/query/storages/preludes/src/random/random_table.rs @@ -19,12 +19,12 @@ use common_datablocks::DataBlock; use common_datavalues::DataSchemaRef; use common_datavalues::DataType; use common_exception::Result; +use common_meta_app::schema::TableInfo; use common_planner::extras::Extras; -use common_planner::Partitions; +use common_planner::extras::Statistics; use common_planner::plans::Projection; +use common_planner::Partitions; use common_planner::ReadDataSourcePlan; -use common_planner::extras::Statistics; -use common_meta_app::schema::TableInfo; use super::RandomPartInfo; use crate::pipelines::processors::port::OutputPort; diff --git a/src/query/storages/preludes/src/system/log_queue.rs b/src/query/storages/preludes/src/system/log_queue.rs index 3edd5221024f..9f78e4dfdd25 100644 --- a/src/query/storages/preludes/src/system/log_queue.rs +++ b/src/query/storages/preludes/src/system/log_queue.rs @@ -26,10 +26,6 @@ use common_datavalues::DataType; use common_datavalues::MutableColumn; use common_exception::ErrorCode; use common_exception::Result; -use common_planner::extras::Extras; -use common_planner::Partitions; -use common_planner::ReadDataSourcePlan; -use common_planner::extras::Statistics; use common_meta_app::schema::TableIdent; use common_meta_app::schema::TableInfo; use common_meta_app::schema::TableMeta; @@ -38,6 +34,10 @@ use common_pipeline_core::processors::processor::ProcessorPtr; use common_pipeline_core::Pipeline; use common_pipeline_sources::processors::sources::SyncSource; use common_pipeline_sources::processors::sources::SyncSourcer; +use common_planner::extras::Extras; +use common_planner::extras::Statistics; +use common_planner::Partitions; +use common_planner::ReadDataSourcePlan; use once_cell::sync::OnceCell; use parking_lot::RwLock; diff --git a/src/query/storages/preludes/src/system/one_table.rs b/src/query/storages/preludes/src/system/one_table.rs index 968ae4926ccb..874848fd7164 100644 --- a/src/query/storages/preludes/src/system/one_table.rs +++ b/src/query/storages/preludes/src/system/one_table.rs @@ -17,12 +17,12 @@ use std::sync::Arc; use common_datablocks::DataBlock; use common_datavalues::prelude::*; use common_exception::Result; -use common_planner::extras::Extras; -use common_planner::Partitions; -use common_planner::extras::Statistics; use common_meta_app::schema::TableIdent; use common_meta_app::schema::TableInfo; use common_meta_app::schema::TableMeta; +use common_planner::extras::Extras; +use common_planner::extras::Statistics; +use common_planner::Partitions; use super::table::SystemTablePart; use crate::sessions::TableContext; diff --git a/src/query/storages/preludes/src/system/table.rs b/src/query/storages/preludes/src/system/table.rs index d09444e26953..830d690adb31 100644 --- a/src/query/storages/preludes/src/system/table.rs +++ b/src/query/storages/preludes/src/system/table.rs @@ -17,13 +17,13 @@ use std::sync::Arc; use common_datablocks::DataBlock; use common_exception::Result; +use common_meta_app::schema::TableInfo; +use common_pipeline_sources::processors::sources::EmptySource; use common_planner::extras::Extras; +use common_planner::extras::Statistics; use common_planner::PartInfo; use common_planner::Partitions; use common_planner::ReadDataSourcePlan; -use common_planner::extras::Statistics; -use common_meta_app::schema::TableInfo; -use common_pipeline_sources::processors::sources::EmptySource; use crate::pipelines::processors::port::OutputPort; use crate::pipelines::processors::processor::ProcessorPtr; diff --git a/src/query/storages/preludes/src/system/tracing_table.rs b/src/query/storages/preludes/src/system/tracing_table.rs index ca549290ac10..c1849820e37f 100644 --- a/src/query/storages/preludes/src/system/tracing_table.rs +++ b/src/query/storages/preludes/src/system/tracing_table.rs @@ -23,13 +23,13 @@ use common_datablocks::DataBlock; use common_datavalues::prelude::*; use common_exception::ErrorCode; use common_exception::Result; -use common_planner::extras::Extras; -use common_planner::Partitions; -use common_planner::ReadDataSourcePlan; -use common_planner::extras::Statistics; use common_meta_app::schema::TableIdent; use common_meta_app::schema::TableInfo; use common_meta_app::schema::TableMeta; +use common_planner::extras::Extras; +use common_planner::extras::Statistics; +use common_planner::Partitions; +use common_planner::ReadDataSourcePlan; use tracing::debug; use walkdir::WalkDir; From 614f613251a514556983400adf10d4a55512e2bb Mon Sep 17 00:00:00 2001 From: sundy-li <543950155@qq.com> Date: Wed, 26 Oct 2022 12:17:56 +0800 Subject: [PATCH 04/47] refactor(query): add comments --- src/query/sql/src/executor/physical_plan_builder.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/query/sql/src/executor/physical_plan_builder.rs b/src/query/sql/src/executor/physical_plan_builder.rs index a932f013a047..9683989274fa 100644 --- a/src/query/sql/src/executor/physical_plan_builder.rs +++ b/src/query/sql/src/executor/physical_plan_builder.rs @@ -567,6 +567,8 @@ impl PhysicalPlanBuilder { let metadata = self.metadata.read(); let ty = metadata.column(item.index).data_type(); let name = metadata.column(item.index).name(); + + // sort item is already a column let scalar = PhysicalScalar::IndexedVariable { index: item.index, data_type: ty.clone(), From ea61ca651e1d90ecef3de2a232c0ac4a566460a2 Mon Sep 17 00:00:00 2001 From: baishen Date: Thu, 27 Oct 2022 18:58:01 +0800 Subject: [PATCH 05/47] merge --- Cargo.lock | 1 + src/query/planner/Cargo.toml | 1 + src/query/planner/src/lib.rs | 2 + .../src/api/rpc/flight_scatter_hash.rs | 5 +- .../src/interpreters/interpreter_copy_v2.rs | 2 +- src/query/service/src/procedures/procedure.rs | 10 ++ src/query/service/src/sessions/query_ctx.rs | 1 + .../src/table_functions/numbers_table.rs | 4 +- src/query/sql/src/planner/mod.rs | 1 + src/query/sql/src/planner/semantic/mod.rs | 1 + src/query/storages/index/src/range_filter.rs | 136 ++++++++++++++---- 11 files changed, 129 insertions(+), 35 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 4de783068f8b..048e4f155869 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1883,6 +1883,7 @@ name = "common-planner" version = "0.1.0" dependencies = [ "common-datavalues", + "common-exception", "common-meta-app", "common-meta-types", "once_cell", diff --git a/src/query/planner/Cargo.toml b/src/query/planner/Cargo.toml index be7a897ddbe3..662d50d121c5 100644 --- a/src/query/planner/Cargo.toml +++ b/src/query/planner/Cargo.toml @@ -8,6 +8,7 @@ edition = { workspace = true } [dependencies] common-datavalues = { path = "../datavalues" } +common-exception = { path = "../../common/exception" } common-meta-app = { path = "../../meta/app" } common-meta-types = { path = "../../meta/types" } diff --git a/src/query/planner/src/lib.rs b/src/query/planner/src/lib.rs index c12956026675..9951b07ec7e8 100644 --- a/src/query/planner/src/lib.rs +++ b/src/query/planner/src/lib.rs @@ -25,11 +25,13 @@ mod partition; mod physical_scalar; pub mod extras; +mod physical_scalar_visitor; pub mod plan_read_datasource; pub mod stage_table; pub use partition::*; pub use physical_scalar::*; +pub use physical_scalar_visitor::*; pub use plan_read_datasource::*; // Plan will be used publicly. diff --git a/src/query/service/src/api/rpc/flight_scatter_hash.rs b/src/query/service/src/api/rpc/flight_scatter_hash.rs index 60f398026c2e..3c078e0a6073 100644 --- a/src/query/service/src/api/rpc/flight_scatter_hash.rs +++ b/src/query/service/src/api/rpc/flight_scatter_hash.rs @@ -89,7 +89,10 @@ impl HashFlightScatter { input: Box::new(expr), target: u64::to_data_type(), }, - PhysicalScalar::Constant { value: DataValue::UInt64(num as u64), data_type: u64::to_data_type() } + PhysicalScalar::Constant { + value: DataValue::UInt64(num as u64), + data_type: u64::to_data_type(), + }, ], return_type: u64::to_data_type(), } diff --git a/src/query/service/src/interpreters/interpreter_copy_v2.rs b/src/query/service/src/interpreters/interpreter_copy_v2.rs index 96ae3e479bbf..6a1c5db9e6e5 100644 --- a/src/query/service/src/interpreters/interpreter_copy_v2.rs +++ b/src/query/service/src/interpreters/interpreter_copy_v2.rs @@ -23,9 +23,9 @@ use common_meta_app::schema::GetTableCopiedFileReq; use common_meta_app::schema::TableCopiedFileInfo; use common_meta_app::schema::UpsertTableCopiedFileReq; use common_meta_types::UserStageInfo; +use common_planner::stage_table::StageTableInfo; use common_planner::ReadDataSourcePlan; use common_planner::SourceInfo; -use common_planner::stage_table::StageTableInfo; use regex::Regex; use super::append2table; diff --git a/src/query/service/src/procedures/procedure.rs b/src/query/service/src/procedures/procedure.rs index daf89c9040f3..ffdd3462b4d1 100644 --- a/src/query/service/src/procedures/procedure.rs +++ b/src/query/service/src/procedures/procedure.rs @@ -22,6 +22,7 @@ use common_pipeline_core::processors::port::OutputPort; use common_pipeline_core::Pipe; use common_pipeline_core::Pipeline; use common_pipeline_sources::processors::sources::StreamSource; +use common_sql::validate_function_arg; use common_streams::DataBlockStream; use common_streams::SendableDataBlockStream; use futures::StreamExt; @@ -38,12 +39,21 @@ pub trait Procedure: Sync + Send { fn validate(&self, ctx: Arc, args: &[String]) -> Result<()> { let features = self.features(); + + validate_function_arg( + self.name(), + args.len(), + features.variadic_arguments, + features.num_arguments, + )?; + if features.management_mode_required && !ctx.get_config().query.management_mode { return Err(ErrorCode::ManagementModePermissionDenied(format!( "Access denied: '{}' only used in management-mode", self.name() ))); } + if let Some(user_option_flag) = features.user_option_flag { let user_info = ctx.get_current_user()?; if !user_info.has_option_flag(user_option_flag) { diff --git a/src/query/service/src/sessions/query_ctx.rs b/src/query/service/src/sessions/query_ctx.rs index a2a13f00b465..1997d4f62e49 100644 --- a/src/query/service/src/sessions/query_ctx.rs +++ b/src/query/service/src/sessions/query_ctx.rs @@ -37,6 +37,7 @@ use common_functions::scalars::FunctionContext; use common_io::prelude::FormatSettings; use common_meta_app::schema::TableInfo; use common_meta_types::UserInfo; +use common_planner::stage_table::StageTableInfo; use common_planner::PartInfoPtr; use common_planner::Partitions; use common_planner::ReadDataSourcePlan; diff --git a/src/query/service/src/table_functions/numbers_table.rs b/src/query/service/src/table_functions/numbers_table.rs index ee957d36f5bd..b6566e084664 100644 --- a/src/query/service/src/table_functions/numbers_table.rs +++ b/src/query/service/src/table_functions/numbers_table.rs @@ -128,14 +128,14 @@ impl Table for NumbersTable { let mut limit = None; if let Some(extras) = &push_downs { - if extras.limit.is_some() && extras.filters.is_empty() && extras.order_by.is_empty() { + if extras.limit.is_some() && extras.filters.is_empty() && extras.order_by.is_empty() { // It is allowed to have an error when we can't get sort columns from the expression. For // example 'select number from numbers(10) order by number+4 limit 10', the column 'number+4' // doesn't exist in the numbers table. // For case like that, we ignore the error and don't apply any optimization. // No order by case - limit = extras.limit; + limit = extras.limit; } } let total = match limit { diff --git a/src/query/sql/src/planner/mod.rs b/src/query/sql/src/planner/mod.rs index e7096bb2a3b9..00512f607cc7 100644 --- a/src/query/sql/src/planner/mod.rs +++ b/src/query/sql/src/planner/mod.rs @@ -31,5 +31,6 @@ pub use metadata::*; pub use planner::Planner; pub use plans::ScalarExpr; pub use semantic::normalize_identifier; +pub use semantic::validate_function_arg; pub use semantic::IdentifierNormalizer; pub use semantic::NameResolutionContext; diff --git a/src/query/sql/src/planner/semantic/mod.rs b/src/query/sql/src/planner/semantic/mod.rs index 2f25bce4369d..3274cacdf174 100644 --- a/src/query/sql/src/planner/semantic/mod.rs +++ b/src/query/sql/src/planner/semantic/mod.rs @@ -20,4 +20,5 @@ pub use grouping_check::GroupingChecker; pub use name_resolution::normalize_identifier; pub use name_resolution::IdentifierNormalizer; pub use name_resolution::NameResolutionContext; +pub use type_check::validate_function_arg; pub use type_check::TypeChecker; diff --git a/src/query/storages/index/src/range_filter.rs b/src/query/storages/index/src/range_filter.rs index dbdd4cff1040..daebe7874026 100644 --- a/src/query/storages/index/src/range_filter.rs +++ b/src/query/storages/index/src/range_filter.rs @@ -29,6 +29,7 @@ use common_functions::scalars::Monotonicity; use common_functions::scalars::PatternType; use common_fuse_meta::meta::StatisticsOfColumns; use common_planner::PhysicalScalar; +use common_planner::RequireColumnsVisitor; use common_sql::evaluator::EvalNode; use common_sql::evaluator::Evaluator; use common_sql::evaluator::PhysicalScalarOp; @@ -50,6 +51,7 @@ impl RangeFilter { ) -> Result { debug_assert!(!exprs.is_empty()); let mut stat_columns: StatColumns = Vec::new(); + let verifiable_expr = exprs .iter() .fold(None, |acc: Option, expr| { @@ -136,10 +138,35 @@ pub fn build_verifiable_expr( data_type: bool::to_data_type(), }; - // TODO(sundy) - todo!() - // VerifiableExprBuilder::try_create(exprs, op.to_lowercase().as_str(), schema, stat_columns) - // .map_or(unhandled.clone(), |mut v| v.build().unwrap_or(unhandled)) + /// TODO: Try to convert `not(is_not_null)` to `is_null`. + let (exprs, op) = match expr { + PhysicalScalar::Constant { .. } => return expr.clone(), + PhysicalScalar::Function { + name, + args, + return_type, + } if args.len() == 2 => { + let left = &args[0]; + let right = &args[1]; + match name.to_lowercase().as_str() { + "and" => { + let left = build_verifiable_expr(left, schema, stat_columns); + let right = build_verifiable_expr(right, schema, stat_columns); + return left.and(&right).unwrap(); + } + "or" => { + let left = build_verifiable_expr(left, schema, stat_columns); + let right = build_verifiable_expr(right, schema, stat_columns); + return left.or(&right).unwrap(); + } + _ => (vec![left.clone(), right.clone()], name.clone()), + } + } + _ => return unhandled, + }; + + VerifiableExprBuilder::try_create(exprs, op.to_lowercase().as_str(), schema, stat_columns) + .map_or(unhandled.clone(), |mut v| v.build().unwrap_or(unhandled)) } fn inverse_operator(op: &str) -> Result<&str> { @@ -156,25 +183,6 @@ fn inverse_operator(op: &str) -> Result<&str> { } } -/// Try to convert `not(is_not_null)` to `is_null`. -// TODO(sundy) -// fn try_convert_is_null(op: &str, args: Vec) -> (Vec, String) { -// // `is null` will be converted to `not(is not null)` in the parser. -// // we should convert it back to `is null` here. -// if op == "not" && args.len() == 1 { -// if let PhysicalScalar::ScalarFunction { -// op: inner_op, -// args: inner_args, -// } = &args[0] -// { -// if inner_op == "is_not_null" { -// return (inner_args.clone(), String::from("is_null")); -// } -// } -// } -// (args, String::from(op)) -// } - #[derive(Debug, Copy, Clone, PartialEq)] enum StatType { Min, @@ -316,8 +324,77 @@ impl<'a> VerifiableExprBuilder<'a> { schema: &'a DataSchemaRef, stat_columns: &'a mut StatColumns, ) -> Result { - // TODO(sundy) - todo!() + // collect state columns + // exprs's len must be 2 + let lhs_cols = RequireColumnsVisitor::collect_columns_from_expr(&exprs[0])?; + let rhs_cols = RequireColumnsVisitor::collect_columns_from_expr(&exprs[1])?; + let (args, cols, op) = match (lhs_cols.len(), rhs_cols.len()) { + (0, 0) => { + return Err(ErrorCode::UnknownException( + "Constant expression donot need to be handled", + )); + } + (_, 0) => (vec![exprs[0].clone(), exprs[1].clone()], vec![lhs_cols], op), + (0, _) => { + let op = inverse_operator(op)?; + (vec![exprs[1].clone(), exprs[0].clone()], vec![rhs_cols], op) + } + _ => { + if !lhs_cols.is_disjoint(&rhs_cols) { + return Err(ErrorCode::UnknownException( + "Unsupported condition for left and right have same columns", + )); + } + + if !matches!(op, "=" | "<" | "<=" | ">" | ">=") { + return Err(ErrorCode::UnknownException(format!( + "Unsupported operator '{:?}' for multi-column expression", + op + ))); + } + + if !check_maybe_monotonic(&exprs[1])? { + return Err(ErrorCode::UnknownException( + "Only support the monotonic expression", + )); + } + + ( + vec![exprs[0].clone(), exprs[1].clone()], + vec![lhs_cols, rhs_cols], + op, + ) + } + }; + + if !check_maybe_monotonic(&args[0])? { + return Err(ErrorCode::UnknownException( + "Only support the monotonic expression", + )); + } + + let mut fields = Vec::with_capacity(cols.len()); + + let left_cols = get_column_fields(schema, cols[0].clone())?; + + let left_name = args[0].pretty_display(); + let left_field = DataField::new(&left_name, args[0].data_type().clone()); + + fields.push((left_field, left_cols)); + + if cols.len() > 1 { + let right_cols = get_column_fields(schema, cols[1].clone())?; + let right_name = args[1].pretty_display(); + let right_field = DataField::new(&right_name, args[1].data_type().clone()); + fields.push((right_field, right_cols)); + } + + Ok(Self { + op, + args, + fields, + stat_columns, + }) } fn build(&mut self) -> Result { @@ -615,17 +692,14 @@ pub fn check_maybe_monotonic(expr: &PhysicalScalar) -> Result { PhysicalScalar::IndexedVariable { .. } => Ok(true), PhysicalScalar::Function { name, args, .. } => get_maybe_monotonic(name, args), PhysicalScalar::Cast { input, .. } => check_maybe_monotonic(input.as_ref()), - _ => Ok(false), } } -fn get_column_fields(schema: &DataSchemaRef, cols: HashSet) -> Result { +fn get_column_fields(schema: &DataSchemaRef, cols: HashSet) -> Result { let mut column_fields = HashMap::with_capacity(cols.len()); + for col in &cols { - let (index, field) = schema - .column_with_name(col.as_str()) - .ok_or_else(|| ErrorCode::UnknownException("Unable to find the column name"))?; - column_fields.insert(index as u32, field.clone()); + column_fields.insert(*col as u32, schema.field(*col).clone()); } Ok(column_fields) } From 71f01b5b748575aca67132d2f12503db79b47c0f Mon Sep 17 00:00:00 2001 From: sundy-li <543950155@qq.com> Date: Wed, 26 Oct 2022 17:11:37 +0800 Subject: [PATCH 06/47] refactor(query): add visitor --- .../planner/src/physical_scalar_visitor.rs | 154 ++++++++++++++++++ 1 file changed, 154 insertions(+) create mode 100644 src/query/planner/src/physical_scalar_visitor.rs diff --git a/src/query/planner/src/physical_scalar_visitor.rs b/src/query/planner/src/physical_scalar_visitor.rs new file mode 100644 index 000000000000..cd93653495ec --- /dev/null +++ b/src/query/planner/src/physical_scalar_visitor.rs @@ -0,0 +1,154 @@ +// Copyright 2022 Datafuse Labs. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::collections::HashSet; + +use common_exception::Result; + +use crate::PhysicalScalar; + +/// Controls how the visitor recursion should proceed. +pub enum Recursion { + /// Attempt to visit all the children, recursively, of this expression. + Continue(V), + /// Do not visit the children of this expression, though the walk + /// of parents of this expression will not be affected + Stop(V), +} + +/// Encode the traversal of an expression tree. When passed to +/// `PhysicalScalarVisitor::accept`, `PhysicalScalarVisitor::visit` is invoked +/// recursively on all nodes of an expression tree. See the comments +/// on `PhysicalScalarVisitor::accept` for details on its use +pub trait PhysicalScalarVisitor: Sized { + /// Invoked before any children of `expr` are visisted. + fn pre_visit(self, expr: &PhysicalScalar) -> Result>; + + fn visit(mut self, predecessor_expr: &PhysicalScalar) -> Result { + let mut stack = vec![RecursionProcessing::Call(predecessor_expr)]; + while let Some(element) = stack.pop() { + match element { + RecursionProcessing::Ret(expr) => { + self = self.post_visit(expr)?; + } + RecursionProcessing::Call(expr) => { + stack.push(RecursionProcessing::Ret(expr)); + self = match self.pre_visit(expr)? { + Recursion::Stop(visitor) => visitor, + Recursion::Continue(visitor) => { + match expr { + PhysicalScalar::Function { args, .. } => { + for arg in args { + stack.push(RecursionProcessing::Call(arg)); + } + } + PhysicalScalar::Cast { input, .. } => { + stack.push(RecursionProcessing::Call(input)); + } + _ => {} + }; + + visitor + } + } + } + } + } + + Ok(self) + } + + /// Invoked after all children of `scalar` are visited. Default + /// implementation does nothing. + fn post_visit(self, _scalar: &PhysicalScalar) -> Result { + Ok(self) + } +} + +impl PhysicalScalar { + /// Performs a depth first walk of an expression and + /// its children, calling [`PhysicalScalarVisitor::pre_visit`] and + /// `visitor.post_visit`. + /// + /// Implements the [visitor pattern](https://en.wikipedia.org/wiki/Visitor_pattern) to + /// separate expression algorithms from the structure of the + /// `Expr` tree and make it easier to add new types of expressions + /// and algorithms that walk the tree. + /// + /// For an expression tree such as + /// ```text + /// BinaryExpr (GT) + /// left: Column("foo") + /// right: Column("bar") + /// ``` + /// + /// The nodes are visited using the following order + /// ```text + /// pre_visit(ScalarFunction(GT)) + /// pre_visit(Column("foo")) + /// post_visit(Column("foo")) + /// pre_visit(Column("bar")) + /// post_visit(Column("bar")) + /// post_visit(ScalarFunction(GT)) + /// ``` + /// + /// If an Err result is returned, recursion is stopped immediately + pub fn accept(&self, visitor: V) -> Result { + let visitor = match visitor.pre_visit(self)? { + Recursion::Continue(visitor) => visitor, + // If the recursion should stop, do not visit children + Recursion::Stop(visitor) => return Ok(visitor), + }; + + let visitor = visitor.visit(self)?; + visitor.post_visit(self) + } +} + +enum RecursionProcessing<'a> { + Call(&'a PhysicalScalar), + Ret(&'a PhysicalScalar), +} + +// This visitor is for recursively visiting expression tree and collects all columns. +pub struct RequireColumnsVisitor { + pub required_columns: HashSet, +} + +impl RequireColumnsVisitor { + pub fn default() -> Self { + Self { + required_columns: HashSet::new(), + } + } + + pub fn collect_columns_from_expr(expr: &PhysicalScalar) -> Result> { + let mut visitor = Self::default(); + visitor = expr.accept(visitor)?; + Ok(visitor.required_columns) + } +} + +impl PhysicalScalarVisitor for RequireColumnsVisitor { + fn pre_visit(self, expr: &PhysicalScalar) -> Result> { + match expr { + PhysicalScalar::IndexedVariable { index, .. } => { + let mut v = self; + v.required_columns.insert(*index); + Ok(Recursion::Continue(v)) + } + _ => Ok(Recursion::Continue(self)), + } + } +} From c874c1464e9f6f0a5b9089937622a6a4866e8f0e Mon Sep 17 00:00:00 2001 From: sundy-li <543950155@qq.com> Date: Wed, 26 Oct 2022 21:42:45 +0800 Subject: [PATCH 07/47] refactor(query): add monotonicity check --- src/query/sql/src/evaluator/mod.rs | 2 + src/query/storages/index/src/bloom.rs | 19 +++--- src/query/storages/index/src/range_filter.rs | 64 +++++++++---------- .../index/tests/it/filters/bloom_filter.rs | 9 ++- 4 files changed, 46 insertions(+), 48 deletions(-) diff --git a/src/query/sql/src/evaluator/mod.rs b/src/query/sql/src/evaluator/mod.rs index 368c22840deb..9d61a2fff10d 100644 --- a/src/query/sql/src/evaluator/mod.rs +++ b/src/query/sql/src/evaluator/mod.rs @@ -13,12 +13,14 @@ // limitations under the License. mod eval_node; +mod monotonicity; mod physical_scalar; mod scalar; use common_datavalues::ColumnRef; use common_datavalues::DataTypeImpl; pub use eval_node::EvalNode; +pub use monotonicity::PhysicalScalarMonotonicityVisitor; pub use physical_scalar::PhysicalScalarOp; pub struct Evaluator; diff --git a/src/query/storages/index/src/bloom.rs b/src/query/storages/index/src/bloom.rs index 11459dbafda4..1032dbe1745d 100644 --- a/src/query/storages/index/src/bloom.rs +++ b/src/query/storages/index/src/bloom.rs @@ -83,7 +83,6 @@ impl BlockFilter { for (index, field) in fields.iter().enumerate() { if Xor8Filter::is_supported_type(field.data_type()) { // create field for applicable ones - let column_name = Self::build_filter_column(index); let filter_field = DataField::new(&column_name, Vu8::to_data_type()); @@ -192,16 +191,14 @@ impl BlockFilter { pub fn eval(&self, expr: &PhysicalScalar) -> Result { // TODO: support multiple columns and other ops like 'in' ... match expr { - PhysicalScalar::Function { - name, - args, - return_type, - } if args.len() == 2 => match name.to_lowercase().as_str() { - "=" => self.eval_equivalent_expression(&args[0], &args[1]), - "and" => self.eval_logical_and(&args[0], &args[1]), - "or" => self.eval_logical_or(&args[0], &args[1]), - _ => Ok(FilterEvalResult::NotApplicable), - }, + PhysicalScalar::Function { name, args, .. } if args.len() == 2 => { + match name.to_lowercase().as_str() { + "=" => self.eval_equivalent_expression(&args[0], &args[1]), + "and" => self.eval_logical_and(&args[0], &args[1]), + "or" => self.eval_logical_or(&args[0], &args[1]), + _ => Ok(FilterEvalResult::NotApplicable), + } + } _ => Ok(FilterEvalResult::NotApplicable), } } diff --git a/src/query/storages/index/src/range_filter.rs b/src/query/storages/index/src/range_filter.rs index daebe7874026..5ea69dbc32e2 100644 --- a/src/query/storages/index/src/range_filter.rs +++ b/src/query/storages/index/src/range_filter.rs @@ -25,13 +25,13 @@ use common_exception::Result; use common_functions::scalars::check_pattern_type; use common_functions::scalars::FunctionContext; use common_functions::scalars::FunctionFactory; -use common_functions::scalars::Monotonicity; use common_functions::scalars::PatternType; use common_fuse_meta::meta::StatisticsOfColumns; use common_planner::PhysicalScalar; use common_planner::RequireColumnsVisitor; use common_sql::evaluator::EvalNode; use common_sql::evaluator::Evaluator; +use common_sql::evaluator::PhysicalScalarMonotonicityVisitor; use common_sql::evaluator::PhysicalScalarOp; #[derive(Clone)] @@ -138,14 +138,10 @@ pub fn build_verifiable_expr( data_type: bool::to_data_type(), }; - /// TODO: Try to convert `not(is_not_null)` to `is_null`. + // TODO: Try to convert `not(is_not_null)` to `is_null`. let (exprs, op) = match expr { PhysicalScalar::Constant { .. } => return expr.clone(), - PhysicalScalar::Function { - name, - args, - return_type, - } if args.len() == 2 => { + PhysicalScalar::Function { name, args, .. } if args.len() == 2 => { let left = &args[0]; let right = &args[1]; match name.to_lowercase().as_str() { @@ -203,7 +199,7 @@ impl fmt::Display for StatType { } pub type StatColumns = Vec; -pub type ColumnFields = HashMap; +pub type ColumnFields = HashMap; #[derive(Debug, Clone)] pub struct StatColumn { @@ -244,7 +240,8 @@ impl StatColumn { if self.stat_type == StatType::Nulls { // The len of column_fields is 1. let (k, _) = self.column_fields.iter().next().unwrap(); - let stat = stats.get(k).ok_or_else(|| { + let k = *k as u32; + let stat = stats.get(&k).ok_or_else(|| { ErrorCode::UnknownException(format!( "Unable to get the colStats by ColumnId: {}", k @@ -256,10 +253,11 @@ impl StatColumn { let mut single_point = true; let mut variables = HashMap::with_capacity(self.column_fields.len()); for (k, v) in &self.column_fields { - let stat = stats.get(k).ok_or_else(|| { + let k32 = *k as u32; + let stat = stats.get(&k32).ok_or_else(|| { ErrorCode::UnknownException(format!( "Unable to get the colStats by ColumnId: {}", - k + k32 )) })?; @@ -272,21 +270,18 @@ impl StatColumn { let max_col = v.data_type().create_constant_column(&stat.max, 1)?; let variable_right = Some(ColumnWithField::new(max_col, v.clone())); - variables.insert(v.name().clone(), (variable_left, variable_right)); + variables.insert(*k, (variable_left, variable_right)); } - // TODO: sundy - - let monotonicity = Monotonicity::default(); - // let monotonicity = ExpressionMonotonicityVisitor::check_expression( - // schema, - // &self.expr, - // variables, - // single_point, - // ); - // if !monotonicity.is_monotonic { - // return Ok(None); - // } + let monotonicity = PhysicalScalarMonotonicityVisitor::check_expression( + schema, + &self.expr, + variables, + single_point, + ); + if !monotonicity.is_monotonic { + return Ok(None); + } let column_with_field_opt = match self.stat_type { StatType::Min => { @@ -597,16 +592,21 @@ impl<'a> VerifiableExprBuilder<'a> { &data_field, self.args[index].clone(), ); - if !self - .stat_columns - .iter() - .any(|c| c.stat_type == stat_type && c.stat_field.name() == data_field.name()) - { + + let column_index = + self.stat_columns.iter().enumerate().find(|&(_, c)| { + c.stat_type == stat_type && c.stat_field.name() == data_field.name() + }); + + let column_index = if let Some((column_index, _)) = column_index { + column_index + } else { self.stat_columns.push(stat_col.clone()); - } + self.stat_columns.len() - 1 + }; Ok(PhysicalScalar::IndexedVariable { - index, + index: column_index, data_type: stat_col.stat_field.data_type().clone(), display_name: stat_col.stat_field.name().to_string(), }) @@ -699,7 +699,7 @@ fn get_column_fields(schema: &DataSchemaRef, cols: HashSet) -> Result Result<()> { assert_eq!(supported_types.len(), index.filter_block.columns().len()); // check index columns - schema.fields().iter().for_each(|field| { - let col_name = BlockFilter::build_filter_column_name(field.name()); + schema.fields().iter().enumerate().for_each(|(i, field)| { + let col_name = BlockFilter::build_filter_column(i); let maybe_index_col = index.filter_block.try_column_by_name(&col_name); if supported_types.contains(field.data_type()) { assert!(maybe_index_col.is_ok(), "check field {}", field.name()) @@ -82,12 +82,11 @@ fn test_column_type_support() -> Result<()> { }); // check applicable - schema.fields().iter().for_each(|field| { + schema.fields().iter().enumerate().for_each(|(i, field)| { // type of input data value does not matter here, will be casted during filtering let value = DataValue::Boolean(true); - let col_name = field.name().as_str(); let data_type = field.data_type(); - let r = index.find(col_name, value, data_type).unwrap(); + let r = index.find(i, value, data_type).unwrap(); if supported_types.contains(field.data_type()) { assert_ne!( r, From 5675401b44c8a3a323a1e6abf320c8b804daffff Mon Sep 17 00:00:00 2001 From: sundy-li <543950155@qq.com> Date: Wed, 26 Oct 2022 21:42:50 +0800 Subject: [PATCH 08/47] refactor(query): add monotonicity check --- src/query/sql/src/evaluator/monotonicity.rs | 224 ++++++++++++++++++++ 1 file changed, 224 insertions(+) create mode 100644 src/query/sql/src/evaluator/monotonicity.rs diff --git a/src/query/sql/src/evaluator/monotonicity.rs b/src/query/sql/src/evaluator/monotonicity.rs new file mode 100644 index 000000000000..9e39d767fdb0 --- /dev/null +++ b/src/query/sql/src/evaluator/monotonicity.rs @@ -0,0 +1,224 @@ +// Copyright 2022 Datafuse Labs. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::collections::HashMap; + +use common_datavalues::prelude::*; +use common_datavalues::DataField; +use common_datavalues::DataSchemaRef; +use common_exception::ErrorCode; +use common_exception::Result; +use common_functions::scalars::Function; +use common_functions::scalars::FunctionContext; +use common_functions::scalars::FunctionFactory; +use common_functions::scalars::Monotonicity; +use common_planner::PhysicalScalar; +use common_planner::PhysicalScalarVisitor; +use common_planner::Recursion; + +// PhysicalScalarMonotonicityVisitor visit the expression tree to calculate monotonicity. +// For example, a function of Add(Neg(number), 5) for number < -100 will have a tree like this: +// +// . MonotonicityNode::Function -- 'Add' +// (mono: is_positive=true, Range{105, MAX}) +// / \ +// / \ +// MonotonicityNode::Function -- f(x)=-x Monotonicity::Constant -- 5 +// (mono: is_positive=true, range{100, MAX}) +// / +// / +// MonotonicityNode::Function -- f(x)=x +// (range{MIN, -100}) +// +// The structure of the tree is basically the structure of the expression. +// Simple depth first search visit the expression tree and gete monotonicity from +// every function. Each function is responsible to implement its own monotonicity +// function. +#[derive(Clone)] +pub struct PhysicalScalarMonotonicityVisitor { + input_schema: DataSchemaRef, + // HashMap + // variable_left: the variable range left. + // variable_right: the variable range right. + variables: HashMap, Option)>, + stack: Vec<(DataTypeImpl, Monotonicity)>, + + single_point: bool, +} + +impl PhysicalScalarMonotonicityVisitor { + fn create( + input_schema: DataSchemaRef, + variables: HashMap, Option)>, + single_point: bool, + ) -> Self { + Self { + input_schema, + variables, + stack: vec![], + single_point, + } + } + + pub fn finalize(mut self) -> Result { + match self.stack.len() { + 1 => { + let (_, monotonic) = self.stack.remove(0); + Ok(monotonic) + } + _ => Err(ErrorCode::LogicalError( + "Stack has too many elements in PhysicalScalarMonotonicityVisitor::finalize", + )), + } + } + + fn try_calculate_boundary( + func: &dyn Function, + result_type: &DataTypeImpl, + args: Vec>, + ) -> Result> { + if args.iter().any(|col| col.is_none()) { + Ok(None) + } else { + let input_columns = args + .into_iter() + .map(|col_opt| col_opt.unwrap()) + .collect::>(); + // TODO(veeupup): whether we need to pass function context here? + let col = func.eval(FunctionContext::default(), &input_columns, 1)?; + let data_field = DataField::new("dummy", result_type.clone()); + let data_column_field = ColumnWithField::new(col, data_field); + Ok(Some(data_column_field)) + } + } + + fn visit_function(mut self, op: &str, args_size: usize) -> Result { + let mut left_vec = Vec::with_capacity(args_size); + let mut right_vec = Vec::with_capacity(args_size); + let mut arg_types = Vec::with_capacity(args_size); + let mut monotonicity_vec = Vec::with_capacity(args_size); + + for index in 0..args_size { + match self.stack.pop() { + None => { + return Err(ErrorCode::LogicalError(format!( + "Expected {} arguments, actual {}.", + args_size, index + ))); + } + Some((arg_type, monotonic)) => { + left_vec.push(monotonic.left.clone()); + right_vec.push(monotonic.right.clone()); + arg_types.push(arg_type); + monotonicity_vec.push(monotonic); + } + } + } + + let instance = FunctionFactory::instance(); + + let arg_types: Vec<&DataTypeImpl> = arg_types.iter().collect(); + let func = instance.get(op, &arg_types)?; + + let return_type = func.return_type(); + let mut monotonic = match self.single_point { + false => func.get_monotonicity(monotonicity_vec.as_ref())?, + true => { + let features = instance.get_features(op)?; + if features.is_deterministic { + Monotonicity::create_constant() + } else { + Monotonicity::default() + } + } + }; + + // Neither a monotonic expression nor constant, interrupt the traversal and return an error directly. + if !monotonic.is_monotonic && !monotonic.is_constant { + return Err(ErrorCode::UnknownException(format!( + "Function '{}' is not monotonic in the variables range", + op + ))); + } + + monotonic.left = Self::try_calculate_boundary(func.as_ref(), &return_type, left_vec)?; + monotonic.right = Self::try_calculate_boundary(func.as_ref(), &return_type, right_vec)?; + + self.stack.push((return_type, monotonic)); + Ok(self) + } + + /// Check whether the expression is monotonic or not. The left should be <= right. + /// Return the monotonicity information, together with column name if any. + pub fn check_expression( + schema: DataSchemaRef, + expr: &PhysicalScalar, + variables: HashMap, Option)>, + single_point: bool, + ) -> Monotonicity { + let visitor = Self::create(schema, variables, single_point); + visitor.visit(expr).map_or(Monotonicity::default(), |v| { + v.finalize().unwrap_or_else(|_| Monotonicity::default()) + }) + } +} + +impl PhysicalScalarVisitor for PhysicalScalarMonotonicityVisitor { + fn pre_visit(self, _expr: &PhysicalScalar) -> Result> { + Ok(Recursion::Continue(self)) + } + + fn post_visit(mut self, expr: &PhysicalScalar) -> Result { + match expr { + PhysicalScalar::IndexedVariable { index, .. } => { + let (left, right) = self.variables.get(index).ok_or_else(|| { + ErrorCode::BadArguments(format!("Cannot find the column: '{:?}'", *index)) + })?; + + let field = self.input_schema.field(*index); + let return_type = field.data_type(); + + let monotonic = Monotonicity { + is_monotonic: true, + is_positive: true, + is_constant: false, + left: left.clone(), + right: right.clone(), + }; + + self.stack.push((return_type.clone(), monotonic)); + Ok(self) + } + PhysicalScalar::Constant { value, data_type } => { + let name = value.to_string(); + let data_field = DataField::new(&name, data_type.clone()); + + let col = data_type.create_constant_column(value, 1)?; + let data_column_field = ColumnWithField::new(col, data_field); + let monotonic = Monotonicity { + is_monotonic: true, + is_positive: true, + is_constant: true, + left: Some(data_column_field.clone()), + right: Some(data_column_field), + }; + + self.stack.push((data_type.clone(), monotonic)); + Ok(self) + } + PhysicalScalar::Function { name, args, .. } => self.visit_function(name, args.len()), + _ => Err(ErrorCode::UnknownException("Unable to get monotonicity")), + } + } +} From 516ff051af88601dccc9f8c7889357738163d5ee Mon Sep 17 00:00:00 2001 From: baishen Date: Thu, 27 Oct 2022 18:58:35 +0800 Subject: [PATCH 09/47] merge --- .../storages/fuse/src/operations/append.rs | 2 - .../storages/fuse/src/operations/delete.rs | 5 +- .../src/operations/mutation/block_filter.rs | 8 +-- .../storages/fuse/src/operations/read_data.rs | 8 ++- .../storages/fuse/src/operations/recluster.rs | 2 +- src/query/storages/fuse/src/pruning/pruner.rs | 66 +++++++++---------- .../fuse/src/pruning/pruning_executor.rs | 2 +- .../storages/fuse/src/pruning/topn_pruner.rs | 3 - .../clustering_informations/table_args.rs | 1 - 9 files changed, 45 insertions(+), 52 deletions(-) diff --git a/src/query/storages/fuse/src/operations/append.rs b/src/query/storages/fuse/src/operations/append.rs index 77c1c650d739..ab8e90698b73 100644 --- a/src/query/storages/fuse/src/operations/append.rs +++ b/src/query/storages/fuse/src/operations/append.rs @@ -17,8 +17,6 @@ use std::sync::Arc; use common_catalog::table_context::TableContext; use common_datablocks::SortColumnDescription; -use common_datavalues::DataField; -use common_datavalues::DataSchemaRefExt; use common_exception::Result; use common_pipeline_core::Pipeline; use common_pipeline_transforms::processors::transforms::TransformCompact; diff --git a/src/query/storages/fuse/src/operations/delete.rs b/src/query/storages/fuse/src/operations/delete.rs index 83cd47b334a1..aea4049495e6 100644 --- a/src/query/storages/fuse/src/operations/delete.rs +++ b/src/query/storages/fuse/src/operations/delete.rs @@ -15,9 +15,6 @@ use std::sync::Arc; use common_catalog::table_context::TableContext; -use common_datavalues::DataField; -use common_datavalues::DataSchemaRefExt; -use common_exception::ErrorCode; use common_exception::Result; use common_fuse_meta::meta::TableSnapshot; use common_planner::extras::Extras; @@ -145,7 +142,7 @@ impl FuseTable { } fn cluster_stats_gen(&self, ctx: Arc) -> Result { - // todo:(sundy) + // todo(sundy) todo!() } } diff --git a/src/query/storages/fuse/src/operations/mutation/block_filter.rs b/src/query/storages/fuse/src/operations/mutation/block_filter.rs index 1faa083a473e..73ce6b2824c4 100644 --- a/src/query/storages/fuse/src/operations/mutation/block_filter.rs +++ b/src/query/storages/fuse/src/operations/mutation/block_filter.rs @@ -18,12 +18,12 @@ use std::sync::Arc; use common_catalog::table_context::TableContext; use common_datablocks::DataBlock; use common_datavalues::BooleanColumn; -use common_datavalues::DataSchemaRefExt; use common_datavalues::Series; use common_exception::Result; use common_fuse_meta::meta::BlockMeta; use common_planner::plans::Projection; use common_planner::PhysicalScalar; +use common_sql::evaluator::Evaluator; use crate::operations::mutation::deletion_mutator::Deletion; use crate::FuseTable; @@ -61,10 +61,10 @@ pub async fn delete_from_block( let reader = table.create_block_reader(proj)?; let data_block = reader.read_with_block_meta(block_meta).await?; - // todo(sundy) - let filter_result = DataBlock::empty(); + let eval_node = Evaluator::eval_physical_scalar(filter_expr)?; + let filter_result = eval_node.eval(&ctx.try_get_function_context()?, &data_block)?.vector; + let predicates = DataBlock::cast_to_nonull_boolean(&filter_result)?; - let predicates = DataBlock::cast_to_nonull_boolean(filter_result.column(0))?; // shortcut, if predicates is const boolean (or can be cast to boolean) if let Some(const_bool) = DataBlock::try_as_const_bool(&predicates)? { return if const_bool { diff --git a/src/query/storages/fuse/src/operations/read_data.rs b/src/query/storages/fuse/src/operations/read_data.rs index d59c6ac46227..b9be2ee47252 100644 --- a/src/query/storages/fuse/src/operations/read_data.rs +++ b/src/query/storages/fuse/src/operations/read_data.rs @@ -16,7 +16,8 @@ use std::sync::Arc; use common_base::base::Runtime; use common_catalog::table_context::TableContext; -use common_datavalues::DataSchemaRefExt; +use common_datablocks::DataBlock; +use common_datavalues::ColumnRef; use common_exception::ErrorCode; use common_exception::Result; use common_functions::scalars::FunctionContext; @@ -31,6 +32,7 @@ use common_planner::plans::Projection; use common_planner::PartInfoPtr; use common_planner::ReadDataSourcePlan; use common_sql::evaluator::EvalNode; +use common_sql::evaluator::Evaluator; use tracing::info; use crate::fuse_lazy_part::FuseLazyPartInfo; @@ -98,8 +100,8 @@ impl FuseTable { Ok(match self.prewhere_of_push_downs(&plan.push_downs) { None => Arc::new(None), Some(v) => { - // todo(sundy) - todo!() + let executor = Evaluator::eval_physical_scalar(&v.filter)?; + Arc::new(Some(executor)) } }) } diff --git a/src/query/storages/fuse/src/operations/recluster.rs b/src/query/storages/fuse/src/operations/recluster.rs index 132701b4b30c..63ba699527cb 100644 --- a/src/query/storages/fuse/src/operations/recluster.rs +++ b/src/query/storages/fuse/src/operations/recluster.rs @@ -149,7 +149,7 @@ impl FuseTable { .cluster_keys .iter() .map(|expr| SortColumnDescription { - column_name: "_cluster_key".to_string(), + column_name: expr.pretty_display(), asc: true, nulls_first: false, }) diff --git a/src/query/storages/fuse/src/pruning/pruner.rs b/src/query/storages/fuse/src/pruning/pruner.rs index 3d49a933c161..230329c49ff9 100644 --- a/src/query/storages/fuse/src/pruning/pruner.rs +++ b/src/query/storages/fuse/src/pruning/pruner.rs @@ -35,7 +35,7 @@ pub trait Pruner { struct FilterPruner { ctx: Arc, - /// columns that should be loaded from filter block + /// indices that should be loaded from filter block index_columns: Vec, /// the expression that would be evaluate @@ -120,15 +120,12 @@ pub fn new_filter_pruner( }) .unwrap(); - let point_query_cols = columns_names_of_eq_expressions(&expr)?; + let point_query_cols = columns_indices_of_eq_expressions(&expr)?; if !point_query_cols.is_empty() { // convert to filter column names - - // todo(sundy) - let idx = 0; let filter_block_cols = point_query_cols .into_iter() - .map(|n| BlockFilter::build_filter_column(idx)) + .map(|index| BlockFilter::build_filter_column(index)) .collect(); return Ok(Some(Arc::new(FilterPruner::new( @@ -147,6 +144,7 @@ pub fn new_filter_pruner( mod util { use common_exception::ErrorCode; + use common_planner::PhysicalScalarVisitor; use super::*; #[tracing::instrument(level = "debug", skip_all)] @@ -161,7 +159,7 @@ mod util { ) -> Result { // load the relevant index columns let maybe_filter = index_location - .read_filter(ctx.clone(), dal, filter_col_names, index_length) + .read_filter(ctx.clone(), dal, &filter_col_names, index_length) .await; match maybe_filter { @@ -179,37 +177,39 @@ mod util { } struct PointQueryVisitor { - // names of columns which used by point query kept here - columns: HashSet, + // indices of columns which used by point query kept here + columns: HashSet, + } + + impl PhysicalScalarVisitor for PointQueryVisitor { + fn pre_visit(mut self, expr: &PhysicalScalar) -> Result> { + // 1. only binary op "=" is considered, which is NOT enough + // 2. should combine this logic with Filter + match expr { + PhysicalScalar::Function { name, args, .. } if name.as_str() == "=" && args.len() == 2 => { + match (&args[0], &args[1]) { + (PhysicalScalar::IndexedVariable { index, ..}, PhysicalScalar::Constant { .. }) + | (PhysicalScalar::Constant { .. }, PhysicalScalar::IndexedVariable { index, .. }) => { + self.columns.insert(*index); + Ok(common_planner::Recursion::Stop(self)) + } + _ => Ok(common_planner::Recursion::Continue(self)), + } + } + _ => Ok(common_planner::Recursion::Continue(self)), + } + } } - // todo(sundy): - // impl ExpressionVisitor for PointQueryVisitor { - // fn pre_visit(mut self, expr: &PhysicalScalar) -> Result> { - // // TODO - // // 1. only binary op "=" is considered, which is NOT enough - // // 2. should combine this logic with Filter - // match expr { - // PhysicalScalar::BinaryExpression { left, op, right } if op.as_str() == "=" => { - // match (left.as_ref(), right.as_ref()) { - // (PhysicalScalar::Column(column), PhysicalScalar::Literal { .. }) - // | (PhysicalScalar::Literal { .. }, PhysicalScalar::Column(column)) => { - // self.columns.insert(column.clone()); - // Ok(Recursion::Stop(self)) - // } - // _ => Ok(Recursion::Continue(self)), - // } - // } - // _ => Ok(Recursion::Continue(self)), - // } - // } - // } - - pub fn columns_names_of_eq_expressions(filter_expr: &PhysicalScalar) -> Result> { + + + pub fn columns_indices_of_eq_expressions(filter_expr: &PhysicalScalar) -> Result> { let visitor = PointQueryVisitor { columns: HashSet::new(), }; - todo!() + filter_expr + .accept(visitor) + .map(|r| r.columns.into_iter().collect()) } } diff --git a/src/query/storages/fuse/src/pruning/pruning_executor.rs b/src/query/storages/fuse/src/pruning/pruning_executor.rs index d25e3eb79399..ae0aeb0fbb40 100644 --- a/src/query/storages/fuse/src/pruning/pruning_executor.rs +++ b/src/query/storages/fuse/src/pruning/pruning_executor.rs @@ -157,7 +157,7 @@ impl BlockPruner { let push_down = push_down.as_ref().unwrap(); let limit = push_down.limit.unwrap(); let sort = push_down.order_by.clone(); - let tpruner = topn_pruner::TopNPrunner::new(schema, sort, limit); + let tpruner = topn_pruner::TopNPrunner::new(sort, limit); return tpruner.prune(metas); } diff --git a/src/query/storages/fuse/src/pruning/topn_pruner.rs b/src/query/storages/fuse/src/pruning/topn_pruner.rs index 41f6b6ce4e11..ce19e5c2fff3 100644 --- a/src/query/storages/fuse/src/pruning/topn_pruner.rs +++ b/src/query/storages/fuse/src/pruning/topn_pruner.rs @@ -20,19 +20,16 @@ use common_fuse_meta::meta::ColumnStatistics; use common_planner::PhysicalScalar; pub(crate) struct TopNPrunner { - schema: DataSchemaRef, sort: Vec<(PhysicalScalar, bool, bool)>, limit: usize, } impl TopNPrunner { pub(crate) fn new( - schema: DataSchemaRef, sort: Vec<(PhysicalScalar, bool, bool)>, limit: usize, ) -> Self { Self { - schema, sort, limit, } diff --git a/src/query/storages/fuse/src/table_functions/clustering_informations/table_args.rs b/src/query/storages/fuse/src/table_functions/clustering_informations/table_args.rs index e9058af3f4b1..bc34691c9ac6 100644 --- a/src/query/storages/fuse/src/table_functions/clustering_informations/table_args.rs +++ b/src/query/storages/fuse/src/table_functions/clustering_informations/table_args.rs @@ -19,7 +19,6 @@ use common_planner::PhysicalScalar; use crate::table_functions::string_value; use crate::table_functions::TableArgs; use crate::FuseTable; -use crate::Table; pub fn parse_func_table_args(table_args: &TableArgs) -> Result<(String, String)> { match table_args { From 4bc43636cafa3a9ef3d9653967ef98dfcc80cbd8 Mon Sep 17 00:00:00 2001 From: baishen Date: Thu, 27 Oct 2022 09:07:39 +0800 Subject: [PATCH 10/47] implement get_cluster_keys --- Cargo.lock | 2 + .../systems/clustering_information.rs | 2 +- src/query/storages/fuse/Cargo.toml | 2 + .../src/operations/mutation/block_filter.rs | 4 +- src/query/storages/fuse/src/pruning/pruner.rs | 16 ++-- .../storages/fuse/src/pruning/topn_pruner.rs | 10 +-- .../clustering_information_table.rs | 2 +- .../clustering_informations/table_args.rs | 78 +++++++++++++------ 8 files changed, 78 insertions(+), 38 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 048e4f155869..b5ee58b8fcd7 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2111,6 +2111,7 @@ dependencies = [ "backon", "chrono", "common-arrow", + "common-ast", "common-base", "common-cache", "common-catalog", @@ -2136,6 +2137,7 @@ dependencies = [ "itertools", "metrics", "opendal", + "parking_lot 0.12.1", "serde", "serde_json", "tracing", diff --git a/src/query/service/src/procedures/systems/clustering_information.rs b/src/query/service/src/procedures/systems/clustering_information.rs index 5615c7b50b4d..ce5e370a1f90 100644 --- a/src/query/service/src/procedures/systems/clustering_information.rs +++ b/src/query/service/src/procedures/systems/clustering_information.rs @@ -61,7 +61,7 @@ impl OneBlockProcedure for ClusteringInformationProcedure { let tbl = FuseTable::try_from_table(tbl.as_ref())?; let definition = if args.len() > 2 { &args[2] } else { "" }; - let cluster_keys = get_cluster_keys(tbl, definition)?; + let cluster_keys = get_cluster_keys(ctx.clone(), tbl, definition).await?; Ok(ClusteringInformation::new(ctx, tbl, cluster_keys) .get_clustering_info() diff --git a/src/query/storages/fuse/Cargo.toml b/src/query/storages/fuse/Cargo.toml index ebb42d248059..c8a1a1e92cf9 100644 --- a/src/query/storages/fuse/Cargo.toml +++ b/src/query/storages/fuse/Cargo.toml @@ -12,6 +12,7 @@ doctest = false test = false [dependencies] +common-ast = { path = "../../ast" } common-arrow = { path = "../../../common/arrow" } common-base = { path = "../../../common/base" } common-cache = { path = "../../../common/cache" } @@ -43,6 +44,7 @@ futures-util = "0.3.24" itertools = "0.10.5" metrics = "0.20.1" opendal = { version = "0.19", features = ["layers-retry"] } +parking_lot = "0.12.1" serde = { workspace = true } serde_json = { workspace = true } tracing = "0.1.36" diff --git a/src/query/storages/fuse/src/operations/mutation/block_filter.rs b/src/query/storages/fuse/src/operations/mutation/block_filter.rs index 73ce6b2824c4..f4a1795163cc 100644 --- a/src/query/storages/fuse/src/operations/mutation/block_filter.rs +++ b/src/query/storages/fuse/src/operations/mutation/block_filter.rs @@ -62,7 +62,9 @@ pub async fn delete_from_block( let data_block = reader.read_with_block_meta(block_meta).await?; let eval_node = Evaluator::eval_physical_scalar(filter_expr)?; - let filter_result = eval_node.eval(&ctx.try_get_function_context()?, &data_block)?.vector; + let filter_result = eval_node + .eval(&ctx.try_get_function_context()?, &data_block)? + .vector; let predicates = DataBlock::cast_to_nonull_boolean(&filter_result)?; // shortcut, if predicates is const boolean (or can be cast to boolean) diff --git a/src/query/storages/fuse/src/pruning/pruner.rs b/src/query/storages/fuse/src/pruning/pruner.rs index 230329c49ff9..4d792c206b9a 100644 --- a/src/query/storages/fuse/src/pruning/pruner.rs +++ b/src/query/storages/fuse/src/pruning/pruner.rs @@ -186,10 +186,18 @@ mod util { // 1. only binary op "=" is considered, which is NOT enough // 2. should combine this logic with Filter match expr { - PhysicalScalar::Function { name, args, .. } if name.as_str() == "=" && args.len() == 2 => { + PhysicalScalar::Function { name, args, .. } + if name.as_str() == "=" && args.len() == 2 => + { match (&args[0], &args[1]) { - (PhysicalScalar::IndexedVariable { index, ..}, PhysicalScalar::Constant { .. }) - | (PhysicalScalar::Constant { .. }, PhysicalScalar::IndexedVariable { index, .. }) => { + ( + PhysicalScalar::IndexedVariable { index, .. }, + PhysicalScalar::Constant { .. }, + ) + | ( + PhysicalScalar::Constant { .. }, + PhysicalScalar::IndexedVariable { index, .. }, + ) => { self.columns.insert(*index); Ok(common_planner::Recursion::Stop(self)) } @@ -201,8 +209,6 @@ mod util { } } - - pub fn columns_indices_of_eq_expressions(filter_expr: &PhysicalScalar) -> Result> { let visitor = PointQueryVisitor { columns: HashSet::new(), diff --git a/src/query/storages/fuse/src/pruning/topn_pruner.rs b/src/query/storages/fuse/src/pruning/topn_pruner.rs index ce19e5c2fff3..d722531626bc 100644 --- a/src/query/storages/fuse/src/pruning/topn_pruner.rs +++ b/src/query/storages/fuse/src/pruning/topn_pruner.rs @@ -25,14 +25,8 @@ pub(crate) struct TopNPrunner { } impl TopNPrunner { - pub(crate) fn new( - sort: Vec<(PhysicalScalar, bool, bool)>, - limit: usize, - ) -> Self { - Self { - sort, - limit, - } + pub(crate) fn new(sort: Vec<(PhysicalScalar, bool, bool)>, limit: usize) -> Self { + Self { sort, limit } } } diff --git a/src/query/storages/fuse/src/table_functions/clustering_informations/clustering_information_table.rs b/src/query/storages/fuse/src/table_functions/clustering_informations/clustering_information_table.rs index d5e4b006e258..a54319cee424 100644 --- a/src/query/storages/fuse/src/table_functions/clustering_informations/clustering_information_table.rs +++ b/src/query/storages/fuse/src/table_functions/clustering_informations/clustering_information_table.rs @@ -182,7 +182,7 @@ impl AsyncSource for ClusteringInformationSource { .await?; let tbl = FuseTable::try_from_table(tbl.as_ref())?; - let cluster_keys = get_cluster_keys(tbl, &self.arg_cluster_keys)?; + let cluster_keys = get_cluster_keys(self.ctx.clone(), tbl, &self.arg_cluster_keys).await?; Ok(Some( ClusteringInformation::new(self.ctx.clone(), tbl, cluster_keys) diff --git a/src/query/storages/fuse/src/table_functions/clustering_informations/table_args.rs b/src/query/storages/fuse/src/table_functions/clustering_informations/table_args.rs index bc34691c9ac6..0b644b2ba6b9 100644 --- a/src/query/storages/fuse/src/table_functions/clustering_informations/table_args.rs +++ b/src/query/storages/fuse/src/table_functions/clustering_informations/table_args.rs @@ -12,9 +12,22 @@ // See the License for the specific language governing permissions and // limitations under the License. +use std::sync::Arc; + +use common_ast::parser::parse_comma_separated_exprs; +use common_ast::parser::tokenize_sql; +use common_ast::Backtrace; +use common_ast::Dialect; +use common_catalog::table_context::TableContext; use common_exception::ErrorCode; use common_exception::Result; use common_planner::PhysicalScalar; +use common_sql::executor::PhysicalScalarBuilder; +use common_sql::BindContext; +use common_sql::Metadata; +use common_sql::NameResolutionContext; +use common_sql::ScalarBinder; +use parking_lot::RwLock; use crate::table_functions::string_value; use crate::table_functions::TableArgs; @@ -35,26 +48,47 @@ pub fn parse_func_table_args(table_args: &TableArgs) -> Result<(String, String)> } } -pub fn get_cluster_keys(table: &FuseTable, definition: &str) -> Result> { - // todo(sundy) - todo!() - // let cluster_keys = if !definition.is_empty() { - // let schema = table.schema(); - // let exprs = ExpressionParser::parse_exprs(definition)?; - // for expr in exprs.iter() { - // validate_expression(expr, &schema)?; - // } - // exprs - // } else { - // table.cluster_keys() - // }; - - // if cluster_keys.is_empty() { - // return Err(ErrorCode::InvalidClusterKeys(format!( - // "Invalid clustering keys or table {} is not clustered", - // table.name() - // ))); - // } - // - // Ok(cluster_keys) +pub async fn get_cluster_keys( + ctx: Arc, + table: &FuseTable, + definition: &str, +) -> Result> { + let cluster_keys = if !definition.is_empty() { + let schema = table.schema(); + + let sql_dialect = Dialect::MySQL; + let tokens = tokenize_sql(definition)?; + let backtrace = Backtrace::new(); + let exprs = parse_comma_separated_exprs( + &tokens[1..tokens.len() as usize], + sql_dialect, + &backtrace, + )?; + + let settings = ctx.get_settings(); + let bind_context = BindContext::new(); + let name_resolution_ctx = NameResolutionContext::try_from(settings.as_ref())?; + let metadata = Arc::new(RwLock::new(Metadata::default())); + let mut scalar_binder = + ScalarBinder::new(&bind_context, ctx, &name_resolution_ctx, metadata, &[]); + let mut physical_scalars = Vec::with_capacity(exprs.len()); + let mut physical_scalar_builder = PhysicalScalarBuilder::new(&schema); + for expr in exprs.iter() { + let (scalar, _) = scalar_binder.bind(expr).await?; + let physical_scalar = physical_scalar_builder.build(&scalar)?; + physical_scalars.push(physical_scalar); + } + physical_scalars + } else { + table.cluster_keys() + }; + + if cluster_keys.is_empty() { + return Err(ErrorCode::InvalidClusterKeys(format!( + "Invalid clustering keys or table {} is not clustered", + table.name() + ))); + } + + Ok(cluster_keys) } From deb6f3dc2f2d839ec6b7311a2d0c2788ce3ed98b Mon Sep 17 00:00:00 2001 From: sundy-li <543950155@qq.com> Date: Thu, 27 Oct 2022 11:13:08 +0800 Subject: [PATCH 11/47] refactor(query): add append --- Cargo.lock | 2 + src/query/datablocks/src/data_block.rs | 9 +- .../src/interpreters/interpreter_insert_v2.rs | 2 + .../service/src/pipelines/pipeline_builder.rs | 1 + .../processors/transforms/chunk_operator.rs | 147 ------------------ .../pipelines/processors/transforms/mod.rs | 3 - .../processors/transforms/transform_addon.rs | 1 + src/query/sql/Cargo.toml | 4 + src/query/sql/src/evaluator/mod.rs | 3 + src/query/storages/fuse/Cargo.toml | 4 +- .../storages/fuse/src/operations/append.rs | 54 ++++++- .../storages/fuse/src/operations/read_data.rs | 2 +- .../fuse/src/statistics/cluster_statistics.rs | 35 ++--- .../clustering_informations/table_args.rs | 1 + 14 files changed, 92 insertions(+), 176 deletions(-) delete mode 100644 src/query/service/src/pipelines/processors/transforms/chunk_operator.rs diff --git a/Cargo.lock b/Cargo.lock index b5ee58b8fcd7..305ebbd11df6 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1992,7 +1992,9 @@ dependencies = [ "common-meta-store", "common-meta-types", "common-metrics", + "common-pipeline-core", "common-pipeline-sources", + "common-pipeline-transforms", "common-planner", "common-settings", "common-storage", diff --git a/src/query/datablocks/src/data_block.rs b/src/query/datablocks/src/data_block.rs index 87995cbd04f9..fd89b18fd560 100644 --- a/src/query/datablocks/src/data_block.rs +++ b/src/query/datablocks/src/data_block.rs @@ -195,11 +195,10 @@ impl DataBlock { } #[inline] - pub fn remove_column(self, name: &str) -> Result { + pub fn remove_column_index(self, idx: usize) -> Result { let mut columns = self.columns.clone(); let mut fields = self.schema().fields().clone(); - let idx = self.schema.index_of(name)?; columns.remove(idx); fields.remove(idx); let new_schema = Arc::new(DataSchema::new(fields)); @@ -220,6 +219,12 @@ impl DataBlock { }) } + #[inline] + pub fn remove_column(self, name: &str) -> Result { + let idx = self.schema.index_of(name)?; + self.remove_column_index(idx) + } + #[inline] pub fn resort(self, schema: DataSchemaRef) -> Result { let mut columns = Vec::with_capacity(self.num_columns()); diff --git a/src/query/service/src/interpreters/interpreter_insert_v2.rs b/src/query/service/src/interpreters/interpreter_insert_v2.rs index 3277f548245e..2b8146036294 100644 --- a/src/query/service/src/interpreters/interpreter_insert_v2.rs +++ b/src/query/service/src/interpreters/interpreter_insert_v2.rs @@ -32,6 +32,8 @@ use common_io::prelude::NestedCheckpointReader; use common_pipeline_sources::processors::sources::AsyncSource; use common_pipeline_sources::processors::sources::AsyncSourcer; use common_pipeline_transforms::processors::transforms::Transform; +use common_sql::evaluator::ChunkOperator; +use common_sql::evaluator::CompoundChunkOperator; use common_sql::Metadata; use common_sql::MetadataRef; use parking_lot::Mutex; diff --git a/src/query/service/src/pipelines/pipeline_builder.rs b/src/query/service/src/pipelines/pipeline_builder.rs index 624c06441d29..cddf710e5277 100644 --- a/src/query/service/src/pipelines/pipeline_builder.rs +++ b/src/query/service/src/pipelines/pipeline_builder.rs @@ -32,6 +32,7 @@ use common_pipeline_core::Pipe; use common_pipeline_sinks::processors::sinks::UnionReceiveSink; use common_planner::AggregateFunctionDesc; use common_planner::PhysicalScalar; +use common_sql::evaluator::CompoundChunkOperator; use crate::interpreters::fill_missing_columns; use crate::pipelines::processors::port::InputPort; diff --git a/src/query/service/src/pipelines/processors/transforms/chunk_operator.rs b/src/query/service/src/pipelines/processors/transforms/chunk_operator.rs deleted file mode 100644 index 7edf62d88f34..000000000000 --- a/src/query/service/src/pipelines/processors/transforms/chunk_operator.rs +++ /dev/null @@ -1,147 +0,0 @@ -// Copyright 2022 Datafuse Labs. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -use std::sync::Arc; - -use common_datablocks::DataBlock; -use common_datavalues::DataField; -use common_datavalues::DataSchemaRef; -use common_exception::Result; -use common_functions::scalars::FunctionContext; -use common_pipeline_core::processors::port::InputPort; -use common_pipeline_core::processors::port::OutputPort; -use common_pipeline_core::processors::processor::ProcessorPtr; -use common_pipeline_transforms::processors::transforms::Transform; -use common_pipeline_transforms::processors::transforms::Transformer; - -use crate::sql::evaluator::EvalNode; - -/// `ChunkOperator` takes a `DataBlock` as input and produces a `DataBlock` as output. -#[derive(Clone)] -pub enum ChunkOperator { - /// Evaluate expression and append result column to the end. - Map { - /// Name of column in `DataBlock`, should be deprecated later. - name: String, - eval: EvalNode, - }, - - /// Filter the input `DataBlock` with the predicate `eval`. - Filter { eval: EvalNode }, - - /// Reorganize the input `DataBlock` with `offsets`. - Project { offsets: Vec }, - - /// Replace name of `DataField`s of input `DataBlock`. - Rename { output_schema: DataSchemaRef }, -} - -impl ChunkOperator { - pub fn execute(&self, func_ctx: &FunctionContext, input: DataBlock) -> Result { - match self { - ChunkOperator::Map { name, eval } => { - let result = eval.eval(func_ctx, &input)?; - input.add_column(result.vector, DataField::new(name, result.logical_type)) - } - - ChunkOperator::Filter { eval } => { - let result = eval.eval(func_ctx, &input)?; - let predicate = result.vector; - DataBlock::filter_block(input, &predicate) - } - - ChunkOperator::Project { offsets } => { - let mut result = DataBlock::empty(); - for offset in offsets { - result = result.add_column( - input.column(*offset).clone(), - input.schema().field(*offset).clone(), - )?; - } - Ok(result) - } - - ChunkOperator::Rename { output_schema } => Ok(DataBlock::create( - output_schema.clone(), - input.columns().to_vec(), - )), - } - } -} - -/// `CompoundChunkOperator` is a pipeline of `ChunkOperator`s -pub struct CompoundChunkOperator { - pub operators: Vec, - pub ctx: FunctionContext, -} - -impl CompoundChunkOperator { - pub fn create( - input_port: Arc, - output_port: Arc, - ctx: FunctionContext, - operators: Vec, - ) -> ProcessorPtr { - Transformer::::create(input_port, output_port, Self { operators, ctx }) - } - - #[allow(dead_code)] - pub fn append(self, operator: ChunkOperator) -> Self { - let mut result = self; - result.operators.push(operator); - result - } - - #[allow(dead_code)] - pub fn merge(self, other: Self) -> Self { - let mut operators = self.operators; - operators.extend(other.operators); - Self { - operators, - ctx: self.ctx, - } - } -} - -impl Transform for CompoundChunkOperator { - const NAME: &'static str = "CompoundChunkOperator"; - - const SKIP_EMPTY_DATA_BLOCK: bool = true; - - fn transform(&mut self, data: DataBlock) -> Result { - self.operators - .iter() - .try_fold(data, |input, op| op.execute(&self.ctx, input)) - } - - fn name(&self) -> String { - format!( - "{}({})", - Self::NAME, - self.operators - .iter() - .map(|op| { - match op { - ChunkOperator::Map { .. } => "Map", - ChunkOperator::Filter { .. } => "Filter", - ChunkOperator::Project { .. } => "Project", - ChunkOperator::Rename { .. } => "Rename", - } - .to_string() - }) - .collect::>() - .join("->") - ) - } -} diff --git a/src/query/service/src/pipelines/processors/transforms/mod.rs b/src/query/service/src/pipelines/processors/transforms/mod.rs index c584a4fd171c..687a2f9bf8a2 100644 --- a/src/query/service/src/pipelines/processors/transforms/mod.rs +++ b/src/query/service/src/pipelines/processors/transforms/mod.rs @@ -13,7 +13,6 @@ // limitations under the License. mod aggregator; -mod chunk_operator; pub(crate) mod hash_join; mod transform_addon; mod transform_aggregator; @@ -31,8 +30,6 @@ mod transform_right_semi_anti_join; pub use aggregator::AggregatorParams; pub use aggregator::AggregatorTransformParams; -pub use chunk_operator::ChunkOperator; -pub use chunk_operator::CompoundChunkOperator; use common_pipeline_transforms::processors::transforms::transform; use common_pipeline_transforms::processors::transforms::transform_block_compact; use common_pipeline_transforms::processors::transforms::transform_compact; diff --git a/src/query/service/src/pipelines/processors/transforms/transform_addon.rs b/src/query/service/src/pipelines/processors/transforms/transform_addon.rs index 77ae985617b6..2f4b38c2330b 100644 --- a/src/query/service/src/pipelines/processors/transforms/transform_addon.rs +++ b/src/query/service/src/pipelines/processors/transforms/transform_addon.rs @@ -20,6 +20,7 @@ use common_datavalues::DataField; use common_datavalues::DataSchemaRef; use common_datavalues::DataType; use common_exception::Result; +use common_sql::evaluator::ChunkOperator; use super::ChunkOperator; use super::CompoundChunkOperator; diff --git a/src/query/sql/Cargo.toml b/src/query/sql/Cargo.toml index 045cb64957e8..bfc41d4957b5 100644 --- a/src/query/sql/Cargo.toml +++ b/src/query/sql/Cargo.toml @@ -32,7 +32,11 @@ common-meta-app = { path = "../../meta/app" } common-meta-store = { path = "../../meta/store" } common-meta-types = { path = "../../meta/types" } common-metrics = { path = "../../common/metrics" } + +common-pipeline-core = { path = "../pipeline/core" } common-pipeline-sources = { path = "../pipeline/sources" } +common-pipeline-transforms = { path = "../pipeline/transforms" } + common-planner = { path = "../planner" } common-settings = { path = "../settings" } common-storage = { path = "../../common/storage" } diff --git a/src/query/sql/src/evaluator/mod.rs b/src/query/sql/src/evaluator/mod.rs index 9d61a2fff10d..fd476a2997ad 100644 --- a/src/query/sql/src/evaluator/mod.rs +++ b/src/query/sql/src/evaluator/mod.rs @@ -12,11 +12,14 @@ // See the License for the specific language governing permissions and // limitations under the License. +mod chunk_operator; mod eval_node; mod monotonicity; mod physical_scalar; mod scalar; +pub use chunk_operator::ChunkOperator; +pub use chunk_operator::CompoundChunkOperator; use common_datavalues::ColumnRef; use common_datavalues::DataTypeImpl; pub use eval_node::EvalNode; diff --git a/src/query/storages/fuse/Cargo.toml b/src/query/storages/fuse/Cargo.toml index c8a1a1e92cf9..e19df3e6db80 100644 --- a/src/query/storages/fuse/Cargo.toml +++ b/src/query/storages/fuse/Cargo.toml @@ -12,12 +12,11 @@ doctest = false test = false [dependencies] -common-ast = { path = "../../ast" } common-arrow = { path = "../../../common/arrow" } +common-ast = { path = "../../ast" } common-base = { path = "../../../common/base" } common-cache = { path = "../../../common/cache" } common-catalog = { path = "../../../common/../query/catalog" } -common-sql = { path = "../../../common/../query/sql" } common-datablocks = { path = "../../datablocks" } common-datavalues = { path = "../../datavalues" } common-exception = { path = "../../../common/exception" } @@ -30,6 +29,7 @@ common-pipeline-sources = { path = "../../pipeline/sources" } common-pipeline-transforms = { path = "../../pipeline/transforms" } common-planner = { path = "../../planner" } common-sharing = { path = "../../sharing" } +common-sql = { path = "../../../common/../query/sql" } common-storage = { path = "../../../common/storage" } common-storages-cache = { path = "../cache" } common-storages-constants = { path = "../constants" } diff --git a/src/query/storages/fuse/src/operations/append.rs b/src/query/storages/fuse/src/operations/append.rs index ab8e90698b73..3bb08a9a8859 100644 --- a/src/query/storages/fuse/src/operations/append.rs +++ b/src/query/storages/fuse/src/operations/append.rs @@ -17,10 +17,14 @@ use std::sync::Arc; use common_catalog::table_context::TableContext; use common_datablocks::SortColumnDescription; +use common_datavalues::DataField; use common_exception::Result; use common_pipeline_core::Pipeline; use common_pipeline_transforms::processors::transforms::TransformCompact; use common_pipeline_transforms::processors::transforms::TransformSortPartial; +use common_sql::evaluator::ChunkOperator; +use common_sql::evaluator::CompoundChunkOperator; +use common_sql::evaluator::Evaluator; use crate::io::BlockCompactor; use crate::operations::FuseTableSink; @@ -50,6 +54,7 @@ impl FuseTable { let cluster_stats_gen = self.get_cluster_stats_gen(ctx.clone(), pipeline, 0, block_compactor)?; + if !self.cluster_keys.is_empty() { // sort let sort_descs: Vec = self @@ -111,8 +116,53 @@ impl FuseTable { if self.cluster_keys.is_empty() { return Ok(ClusterStatsGenerator::default()); } - // todo(sundy) project transform - todo!() + + let input_schema = self.table_info.schema(); + let mut merged = input_schema.fields().clone(); + + let mut cluster_key_index = Vec::with_capacity(self.cluster_keys.len()); + let mut extra_key_index = Vec::with_capacity(self.cluster_keys.len()); + + let mut operators = Vec::with_capacity(self.cluster_keys.len()); + + for expr in &self.cluster_keys { + let cname = expr.pretty_display(); + + let index = match merged.iter().position(|x| x.name() == &cname) { + None => { + let field = DataField::new(&cname, expr.data_type()); + operators.push(ChunkOperator::Map { + eval: Evaluator::eval_physical_scalar(expr)?, + name: field.name().to_string(), + }); + extra_key_index.push(merged.len() - 1); + + merged.push(field); + merged.len() - 1 + } + Some(idx) => idx, + }; + cluster_key_index.push(index); + } + if !operators.is_empty() { + let func_ctx = ctx.try_get_function_context()?; + pipeline.add_transform(move |input, output| { + Ok(CompoundChunkOperator::create( + input, + output, + func_ctx.clone(), + operators.clone(), + )) + })?; + } + + Ok(ClusterStatsGenerator::new( + self.cluster_key_meta.as_ref().unwrap().0, + cluster_key_index, + extra_key_index, + level, + block_compactor, + )) } pub fn get_option(&self, opt_key: &str, default: T) -> T { diff --git a/src/query/storages/fuse/src/operations/read_data.rs b/src/query/storages/fuse/src/operations/read_data.rs index b9be2ee47252..e2248d5237cc 100644 --- a/src/query/storages/fuse/src/operations/read_data.rs +++ b/src/query/storages/fuse/src/operations/read_data.rs @@ -94,7 +94,7 @@ impl FuseTable { // Build the prewhere filter executor. fn build_prewhere_filter_executor( &self, - ctx: Arc, + _ctx: Arc, plan: &ReadDataSourcePlan, ) -> Result>> { Ok(match self.prewhere_of_push_downs(&plan.push_downs) { diff --git a/src/query/storages/fuse/src/statistics/cluster_statistics.rs b/src/query/storages/fuse/src/statistics/cluster_statistics.rs index 9472fc5be16a..dbfe810bbd65 100644 --- a/src/query/storages/fuse/src/statistics/cluster_statistics.rs +++ b/src/query/storages/fuse/src/statistics/cluster_statistics.rs @@ -26,7 +26,7 @@ use crate::io::BlockCompactor; pub struct ClusterStatsGenerator { cluster_key_id: u32, cluster_key_index: Vec, - expression_executor: Option, + extra_key_index: Vec, level: i32, block_compactor: BlockCompactor, } @@ -35,14 +35,14 @@ impl ClusterStatsGenerator { pub fn new( cluster_key_id: u32, cluster_key_index: Vec, - expression_executor: Option, + extra_key_index: Vec, level: i32, block_compactor: BlockCompactor, ) -> Self { Self { cluster_key_id, cluster_key_index, - expression_executor, + extra_key_index, level, block_compactor, } @@ -56,11 +56,9 @@ impl ClusterStatsGenerator { ) -> Result<(Option, DataBlock)> { let cluster_stats = self.clusters_statistics(data_block, self.level)?; let mut block = data_block.clone(); - if let Some(executor) = &self.expression_executor { - let func_ctx = FunctionContext::default(); - let column = executor.eval(&func_ctx, &block)?.vector; - let field = DataField::new("_cluster_key", column.data_type()); - block = block.add_column(column, field)?; + + for id in self.extra_key_index.iter() { + block = block.remove_column_index(*id)?; } Ok((cluster_stats, block)) @@ -81,19 +79,18 @@ impl ClusterStatsGenerator { return Ok(None); } - let mut data_block = data_block.clone(); - if let Some(executor) = &self.expression_executor { - // For a clustered table, data_block has been sorted, but may not contain cluster key. - // So only need to get the first and the last row for execute. - let indices = vec![0u32, data_block.num_rows() as u32 - 1]; - let input = DataBlock::block_take_by_indices(&data_block, &indices)?; - let func_ctx = FunctionContext::default(); - let column = executor.eval(&func_ctx, &input)?.vector; - let field = DataField::new("_cluster_key", column.data_type()); - data_block = data_block.add_column(column, field)?; + let mut block = data_block.clone(); + + for id in self.extra_key_index.iter() { + block = block.remove_column_index(*id)?; + } + + if !self.cluster_key_index.is_empty() { + let indices = vec![0u32, block.num_rows() as u32 - 1]; + block = DataBlock::block_take_by_indices(&block, &indices)?; } - self.clusters_statistics(&data_block, origin_stats.level) + self.clusters_statistics(&block, origin_stats.level) } fn clusters_statistics( diff --git a/src/query/storages/fuse/src/table_functions/clustering_informations/table_args.rs b/src/query/storages/fuse/src/table_functions/clustering_informations/table_args.rs index 0b644b2ba6b9..cd70e6d4979b 100644 --- a/src/query/storages/fuse/src/table_functions/clustering_informations/table_args.rs +++ b/src/query/storages/fuse/src/table_functions/clustering_informations/table_args.rs @@ -18,6 +18,7 @@ use common_ast::parser::parse_comma_separated_exprs; use common_ast::parser::tokenize_sql; use common_ast::Backtrace; use common_ast::Dialect; +use common_catalog::table::Table; use common_catalog::table_context::TableContext; use common_exception::ErrorCode; use common_exception::Result; From 7787d3d96ddfd8c60ee4e177bfd5d6ff35bc95e0 Mon Sep 17 00:00:00 2001 From: sundy-li <543950155@qq.com> Date: Thu, 27 Oct 2022 11:15:36 +0800 Subject: [PATCH 12/47] refactor(query): add append --- .../src/interpreters/interpreter_insert_v2.rs | 2 - .../service/src/pipelines/pipeline_builder.rs | 3 +- .../processors/transforms/transform_addon.rs | 3 +- src/query/sql/src/evaluator/chunk_operator.rs | 147 ++++++++++++++++++ 4 files changed, 149 insertions(+), 6 deletions(-) create mode 100644 src/query/sql/src/evaluator/chunk_operator.rs diff --git a/src/query/service/src/interpreters/interpreter_insert_v2.rs b/src/query/service/src/interpreters/interpreter_insert_v2.rs index 2b8146036294..c6338d7f6309 100644 --- a/src/query/service/src/interpreters/interpreter_insert_v2.rs +++ b/src/query/service/src/interpreters/interpreter_insert_v2.rs @@ -43,8 +43,6 @@ use super::interpreter_common::append2table; use super::plan_schedulers::build_schedule_pipeline; use crate::interpreters::Interpreter; use crate::interpreters::InterpreterPtr; -use crate::pipelines::processors::transforms::ChunkOperator; -use crate::pipelines::processors::transforms::CompoundChunkOperator; use crate::pipelines::PipelineBuildResult; use crate::pipelines::PipelineBuilder; use crate::pipelines::SourcePipeBuilder; diff --git a/src/query/service/src/pipelines/pipeline_builder.rs b/src/query/service/src/pipelines/pipeline_builder.rs index cddf710e5277..ba2fcb7842a0 100644 --- a/src/query/service/src/pipelines/pipeline_builder.rs +++ b/src/query/service/src/pipelines/pipeline_builder.rs @@ -32,12 +32,11 @@ use common_pipeline_core::Pipe; use common_pipeline_sinks::processors::sinks::UnionReceiveSink; use common_planner::AggregateFunctionDesc; use common_planner::PhysicalScalar; +use common_sql::evaluator::ChunkOperator; use common_sql::evaluator::CompoundChunkOperator; use crate::interpreters::fill_missing_columns; use crate::pipelines::processors::port::InputPort; -use crate::pipelines::processors::transforms::ChunkOperator; -use crate::pipelines::processors::transforms::CompoundChunkOperator; use crate::pipelines::processors::transforms::HashJoinDesc; use crate::pipelines::processors::transforms::RightSemiAntiJoinCompactor; use crate::pipelines::processors::transforms::TransformMarkJoin; diff --git a/src/query/service/src/pipelines/processors/transforms/transform_addon.rs b/src/query/service/src/pipelines/processors/transforms/transform_addon.rs index 2f4b38c2330b..ef2a561b59b7 100644 --- a/src/query/service/src/pipelines/processors/transforms/transform_addon.rs +++ b/src/query/service/src/pipelines/processors/transforms/transform_addon.rs @@ -21,9 +21,8 @@ use common_datavalues::DataSchemaRef; use common_datavalues::DataType; use common_exception::Result; use common_sql::evaluator::ChunkOperator; +use common_sql::evaluator::CompoundChunkOperator; -use super::ChunkOperator; -use super::CompoundChunkOperator; use crate::pipelines::processors::port::InputPort; use crate::pipelines::processors::port::OutputPort; use crate::pipelines::processors::processor::ProcessorPtr; diff --git a/src/query/sql/src/evaluator/chunk_operator.rs b/src/query/sql/src/evaluator/chunk_operator.rs new file mode 100644 index 000000000000..e7ddfa8f83ba --- /dev/null +++ b/src/query/sql/src/evaluator/chunk_operator.rs @@ -0,0 +1,147 @@ +// Copyright 2022 Datafuse Labs. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::sync::Arc; + +use common_datablocks::DataBlock; +use common_datavalues::DataField; +use common_datavalues::DataSchemaRef; +use common_exception::Result; +use common_functions::scalars::FunctionContext; +use common_pipeline_core::processors::port::InputPort; +use common_pipeline_core::processors::port::OutputPort; +use common_pipeline_core::processors::processor::ProcessorPtr; +use common_pipeline_transforms::processors::transforms::Transform; +use common_pipeline_transforms::processors::transforms::Transformer; + +use crate::evaluator::EvalNode; + +/// `ChunkOperator` takes a `DataBlock` as input and produces a `DataBlock` as output. +#[derive(Clone)] +pub enum ChunkOperator { + /// Evaluate expression and append result column to the end. + Map { + /// Name of column in `DataBlock`, should be deprecated later. + name: String, + eval: EvalNode, + }, + + /// Filter the input `DataBlock` with the predicate `eval`. + Filter { eval: EvalNode }, + + /// Reorganize the input `DataBlock` with `offsets`. + Project { offsets: Vec }, + + /// Replace name of `DataField`s of input `DataBlock`. + Rename { output_schema: DataSchemaRef }, +} + +impl ChunkOperator { + pub fn execute(&self, func_ctx: &FunctionContext, input: DataBlock) -> Result { + match self { + ChunkOperator::Map { name, eval } => { + let result = eval.eval(func_ctx, &input)?; + input.add_column(result.vector, DataField::new(name, result.logical_type)) + } + + ChunkOperator::Filter { eval } => { + let result = eval.eval(func_ctx, &input)?; + let predicate = result.vector; + DataBlock::filter_block(input, &predicate) + } + + ChunkOperator::Project { offsets } => { + let mut result = DataBlock::empty(); + for offset in offsets { + result = result.add_column( + input.column(*offset).clone(), + input.schema().field(*offset).clone(), + )?; + } + Ok(result) + } + + ChunkOperator::Rename { output_schema } => Ok(DataBlock::create( + output_schema.clone(), + input.columns().to_vec(), + )), + } + } +} + +/// `CompoundChunkOperator` is a pipeline of `ChunkOperator`s +pub struct CompoundChunkOperator { + pub operators: Vec, + pub ctx: FunctionContext, +} + +impl CompoundChunkOperator { + pub fn create( + input_port: Arc, + output_port: Arc, + ctx: FunctionContext, + operators: Vec, + ) -> ProcessorPtr { + Transformer::::create(input_port, output_port, Self { operators, ctx }) + } + + #[allow(dead_code)] + pub fn append(self, operator: ChunkOperator) -> Self { + let mut result = self; + result.operators.push(operator); + result + } + + #[allow(dead_code)] + pub fn merge(self, other: Self) -> Self { + let mut operators = self.operators; + operators.extend(other.operators); + Self { + operators, + ctx: self.ctx, + } + } +} + +impl Transform for CompoundChunkOperator { + const NAME: &'static str = "CompoundChunkOperator"; + + const SKIP_EMPTY_DATA_BLOCK: bool = true; + + fn transform(&mut self, data: DataBlock) -> Result { + self.operators + .iter() + .try_fold(data, |input, op| op.execute(&self.ctx, input)) + } + + fn name(&self) -> String { + format!( + "{}({})", + Self::NAME, + self.operators + .iter() + .map(|op| { + match op { + ChunkOperator::Map { .. } => "Map", + ChunkOperator::Filter { .. } => "Filter", + ChunkOperator::Project { .. } => "Project", + ChunkOperator::Rename { .. } => "Rename", + } + .to_string() + }) + .collect::>() + .join("->") + ) + } +} From 210269f1ec1c0c06fd4612879db7a3fa6604e9ec Mon Sep 17 00:00:00 2001 From: baishen Date: Thu, 27 Oct 2022 18:59:18 +0800 Subject: [PATCH 13/47] merge --- Cargo.lock | 2 - src/query/sql/src/planner/mod.rs | 2 + .../sql/src/planner/physical_scalar_parser.rs | 65 +++++++++++++++++++ src/query/storages/fuse/Cargo.toml | 1 - .../storages/fuse/src/operations/delete.rs | 14 +++- .../clustering_informations/table_args.rs | 37 +---------- 6 files changed, 81 insertions(+), 40 deletions(-) create mode 100644 src/query/sql/src/planner/physical_scalar_parser.rs diff --git a/Cargo.lock b/Cargo.lock index 305ebbd11df6..8be6baa187a8 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2113,7 +2113,6 @@ dependencies = [ "backon", "chrono", "common-arrow", - "common-ast", "common-base", "common-cache", "common-catalog", @@ -2139,7 +2138,6 @@ dependencies = [ "itertools", "metrics", "opendal", - "parking_lot 0.12.1", "serde", "serde_json", "tracing", diff --git a/src/query/sql/src/planner/mod.rs b/src/query/sql/src/planner/mod.rs index 00512f607cc7..f4b68e553e49 100644 --- a/src/query/sql/src/planner/mod.rs +++ b/src/query/sql/src/planner/mod.rs @@ -14,6 +14,7 @@ mod format; mod metadata; +mod physical_scalar_parser; #[allow(clippy::module_inception)] mod planner; mod semantic; @@ -28,6 +29,7 @@ pub use binder::ColumnBinding; pub use binder::ScalarBinder; pub use binder::Visibility; pub use metadata::*; +pub use physical_scalar_parser::PhysicalScalarParser; pub use planner::Planner; pub use plans::ScalarExpr; pub use semantic::normalize_identifier; diff --git a/src/query/sql/src/planner/physical_scalar_parser.rs b/src/query/sql/src/planner/physical_scalar_parser.rs new file mode 100644 index 000000000000..20b699f33cff --- /dev/null +++ b/src/query/sql/src/planner/physical_scalar_parser.rs @@ -0,0 +1,65 @@ +// Copyright 2022 Datafuse Labs. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::sync::Arc; + +use common_ast::parser::parse_comma_separated_exprs; +use common_ast::parser::tokenize_sql; +use common_ast::Backtrace; +use common_ast::Dialect; +use common_catalog::table_context::TableContext; +use common_datavalues::DataSchemaRef; +use common_exception::Result; +use common_planner::PhysicalScalar; +use parking_lot::RwLock; + +use crate::executor::PhysicalScalarBuilder; +use crate::BindContext; +use crate::Metadata; +use crate::NameResolutionContext; +use crate::ScalarBinder; + +pub struct PhysicalScalarParser; + +impl PhysicalScalarParser { + pub async fn parse_exprs( + ctx: Arc, + schema: DataSchemaRef, + sql: &str, + ) -> Result> { + let sql_dialect = Dialect::MySQL; + let tokens = tokenize_sql(sql)?; + let backtrace = Backtrace::new(); + let exprs = parse_comma_separated_exprs( + &tokens[1..tokens.len() as usize], + sql_dialect, + &backtrace, + )?; + + let settings = ctx.get_settings(); + let bind_context = BindContext::new(); + let name_resolution_ctx = NameResolutionContext::try_from(settings.as_ref())?; + let metadata = Arc::new(RwLock::new(Metadata::default())); + let mut scalar_binder = + ScalarBinder::new(&bind_context, ctx, &name_resolution_ctx, metadata, &[]); + let mut physical_scalars = Vec::with_capacity(exprs.len()); + let mut physical_scalar_builder = PhysicalScalarBuilder::new(&schema); + for expr in exprs.iter() { + let (scalar, _) = scalar_binder.bind(expr).await?; + let physical_scalar = physical_scalar_builder.build(&scalar)?; + physical_scalars.push(physical_scalar); + } + Ok(physical_scalars) + } +} diff --git a/src/query/storages/fuse/Cargo.toml b/src/query/storages/fuse/Cargo.toml index e19df3e6db80..33c3a2ec1e19 100644 --- a/src/query/storages/fuse/Cargo.toml +++ b/src/query/storages/fuse/Cargo.toml @@ -44,7 +44,6 @@ futures-util = "0.3.24" itertools = "0.10.5" metrics = "0.20.1" opendal = { version = "0.19", features = ["layers-retry"] } -parking_lot = "0.12.1" serde = { workspace = true } serde_json = { workspace = true } tracing = "0.1.36" diff --git a/src/query/storages/fuse/src/operations/delete.rs b/src/query/storages/fuse/src/operations/delete.rs index aea4049495e6..f6ab9c96e3b9 100644 --- a/src/query/storages/fuse/src/operations/delete.rs +++ b/src/query/storages/fuse/src/operations/delete.rs @@ -15,11 +15,13 @@ use std::sync::Arc; use common_catalog::table_context::TableContext; +use common_exception::ErrorCode; use common_exception::Result; use common_fuse_meta::meta::TableSnapshot; use common_planner::extras::Extras; use common_planner::plans::DeletePlan; use common_planner::PhysicalScalar; +use common_sql::PhysicalScalarParser; use tracing::debug; use crate::operations::mutation::delete_from_block; @@ -48,9 +50,15 @@ impl FuseTable { // check if unconditional deletion if let Some(filter) = &plan.selection { - // todo(sundy): use type_checker - let expr = vec![]; - self.delete_rows(ctx, &snapshot, &expr[0], plan).await + let physical_scalars = + PhysicalScalarParser::parse_exprs(ctx.clone(), plan.schema(), filter).await?; + if physical_scalars.is_empty() { + return Err(ErrorCode::IndexOutOfBounds( + "expression should be valid, but not", + )); + } + self.delete_rows(ctx.clone(), &snapshot, &physical_scalars[0], plan) + .await } else { // deleting the whole table... just a truncate let purge = false; diff --git a/src/query/storages/fuse/src/table_functions/clustering_informations/table_args.rs b/src/query/storages/fuse/src/table_functions/clustering_informations/table_args.rs index cd70e6d4979b..d84ea2e60b5b 100644 --- a/src/query/storages/fuse/src/table_functions/clustering_informations/table_args.rs +++ b/src/query/storages/fuse/src/table_functions/clustering_informations/table_args.rs @@ -14,21 +14,12 @@ use std::sync::Arc; -use common_ast::parser::parse_comma_separated_exprs; -use common_ast::parser::tokenize_sql; -use common_ast::Backtrace; -use common_ast::Dialect; use common_catalog::table::Table; use common_catalog::table_context::TableContext; use common_exception::ErrorCode; use common_exception::Result; use common_planner::PhysicalScalar; -use common_sql::executor::PhysicalScalarBuilder; -use common_sql::BindContext; -use common_sql::Metadata; -use common_sql::NameResolutionContext; -use common_sql::ScalarBinder; -use parking_lot::RwLock; +use common_sql::PhysicalScalarParser; use crate::table_functions::string_value; use crate::table_functions::TableArgs; @@ -55,30 +46,8 @@ pub async fn get_cluster_keys( definition: &str, ) -> Result> { let cluster_keys = if !definition.is_empty() { - let schema = table.schema(); - - let sql_dialect = Dialect::MySQL; - let tokens = tokenize_sql(definition)?; - let backtrace = Backtrace::new(); - let exprs = parse_comma_separated_exprs( - &tokens[1..tokens.len() as usize], - sql_dialect, - &backtrace, - )?; - - let settings = ctx.get_settings(); - let bind_context = BindContext::new(); - let name_resolution_ctx = NameResolutionContext::try_from(settings.as_ref())?; - let metadata = Arc::new(RwLock::new(Metadata::default())); - let mut scalar_binder = - ScalarBinder::new(&bind_context, ctx, &name_resolution_ctx, metadata, &[]); - let mut physical_scalars = Vec::with_capacity(exprs.len()); - let mut physical_scalar_builder = PhysicalScalarBuilder::new(&schema); - for expr in exprs.iter() { - let (scalar, _) = scalar_binder.bind(expr).await?; - let physical_scalar = physical_scalar_builder.build(&scalar)?; - physical_scalars.push(physical_scalar); - } + let physical_scalars = + PhysicalScalarParser::parse_exprs(ctx, table.schema(), definition).await?; physical_scalars } else { table.cluster_keys() From aea1f74bd57b2ef5d6265fe369b239757dc04faa Mon Sep 17 00:00:00 2001 From: baishen Date: Thu, 27 Oct 2022 19:02:37 +0800 Subject: [PATCH 14/47] merge --- src/query/planner/src/physical_scalar.rs | 26 +++++++++++ .../interpreter_table_recluster.rs | 10 +++- .../sql/src/planner/physical_scalar_parser.rs | 4 +- .../storages/fuse/src/operations/delete.rs | 46 ++++++++++++++++++- .../fuse/src/statistics/cluster_statistics.rs | 3 -- 5 files changed, 80 insertions(+), 9 deletions(-) diff --git a/src/query/planner/src/physical_scalar.rs b/src/query/planner/src/physical_scalar.rs index 375b40f94134..045ab953d806 100644 --- a/src/query/planner/src/physical_scalar.rs +++ b/src/query/planner/src/physical_scalar.rs @@ -16,6 +16,9 @@ use std::fmt::Display; use std::fmt::Formatter; use common_datavalues::format_data_type_sql; +use common_datavalues::format_datavalue_sql; +use common_datavalues::DataField; +use common_datavalues::DataType; use common_datavalues::DataTypeImpl; use common_datavalues::DataValue; @@ -48,6 +51,23 @@ pub enum PhysicalScalar { } impl PhysicalScalar { + pub fn column_name(&self) -> String { + match self { + PhysicalScalar::IndexedVariable { display_name, .. } => display_name.clone(), + PhysicalScalar::Constant { value, .. } => format_datavalue_sql(value), + PhysicalScalar::Function { name, args, .. } => { + let args_column_name = args + .iter() + .map(PhysicalScalar::column_name) + .collect::>(); + format!("{}({})", name, args_column_name.join(", ")) + } + PhysicalScalar::Cast { input, target } => { + format!("{}::{}", input.column_name(), target.sql_name()) + } + } + } + pub fn data_type(&self) -> DataTypeImpl { match self { PhysicalScalar::Constant { data_type, .. } => data_type.clone(), @@ -57,6 +77,12 @@ impl PhysicalScalar { } } + pub fn to_data_field(&self) -> DataField { + let name = self.column_name(); + let data_type = self.data_type(); + DataField::new(&name, data_type) + } + /// Display with readable variable name. pub fn pretty_display(&self) -> String { match self { diff --git a/src/query/service/src/interpreters/interpreter_table_recluster.rs b/src/query/service/src/interpreters/interpreter_table_recluster.rs index 7ece7634453a..3f74d290816e 100644 --- a/src/query/service/src/interpreters/interpreter_table_recluster.rs +++ b/src/query/service/src/interpreters/interpreter_table_recluster.rs @@ -26,6 +26,7 @@ use crate::pipelines::Pipeline; use crate::pipelines::PipelineBuildResult; use crate::sessions::QueryContext; use crate::sessions::TableContext; +use crate::sql::executor::PhysicalScalarBuilder; use crate::sql::plans::ReclusterTablePlan; pub struct ReclusterTableInterpreter { @@ -56,8 +57,13 @@ impl Interpreter for ReclusterTableInterpreter { let extras = match &plan.push_downs { None => None, Some(scalar) => { - // todo(sundy) - todo!() + let schema = self.plan.schema(); + let mut builder = PhysicalScalarBuilder::new(&schema); + let physical_scalar = builder.build(&scalar)?; + Some(Extras { + filters: vec![physical_scalar], + ..Extras::default() + }) } }; loop { diff --git a/src/query/sql/src/planner/physical_scalar_parser.rs b/src/query/sql/src/planner/physical_scalar_parser.rs index 20b699f33cff..121342a9769c 100644 --- a/src/query/sql/src/planner/physical_scalar_parser.rs +++ b/src/query/sql/src/planner/physical_scalar_parser.rs @@ -54,10 +54,10 @@ impl PhysicalScalarParser { let mut scalar_binder = ScalarBinder::new(&bind_context, ctx, &name_resolution_ctx, metadata, &[]); let mut physical_scalars = Vec::with_capacity(exprs.len()); - let mut physical_scalar_builder = PhysicalScalarBuilder::new(&schema); + let mut builder = PhysicalScalarBuilder::new(&schema); for expr in exprs.iter() { let (scalar, _) = scalar_binder.bind(expr).await?; - let physical_scalar = physical_scalar_builder.build(&scalar)?; + let physical_scalar = builder.build(&scalar)?; physical_scalars.push(physical_scalar); } Ok(physical_scalars) diff --git a/src/query/storages/fuse/src/operations/delete.rs b/src/query/storages/fuse/src/operations/delete.rs index f6ab9c96e3b9..3332772f963a 100644 --- a/src/query/storages/fuse/src/operations/delete.rs +++ b/src/query/storages/fuse/src/operations/delete.rs @@ -15,12 +15,14 @@ use std::sync::Arc; use common_catalog::table_context::TableContext; +use common_datavalues::DataSchemaRefExt; use common_exception::ErrorCode; use common_exception::Result; use common_fuse_meta::meta::TableSnapshot; use common_planner::extras::Extras; use common_planner::plans::DeletePlan; use common_planner::PhysicalScalar; +use common_sql::evaluator::Evaluator; use common_sql::PhysicalScalarParser; use tracing::debug; @@ -150,7 +152,47 @@ impl FuseTable { } fn cluster_stats_gen(&self, ctx: Arc) -> Result { - // todo(sundy) - todo!() + if self.cluster_key_meta.is_none() { + return Ok(ClusterStatsGenerator::default()); + } + + let len = self.cluster_keys.len(); + let cluster_key_id = self.cluster_key_meta.clone().unwrap().0; + + let input_schema = self.table_info.schema(); + let input_fields = input_schema.fields().clone(); + + let mut cluster_key_index = Vec::with_capacity(len); + let mut output_fields = Vec::with_capacity(len); + let mut exists = true; + for expr in &self.cluster_keys { + output_fields.push(expr.to_data_field()); + + if exists { + match input_fields + .iter() + .position(|x| x.name() == &expr.column_name()) + { + None => exists = false, + Some(idx) => cluster_key_index.push(idx), + }; + } + } + + let mut expression_executor = None; + if !exists { + cluster_key_index = (0..len).collect(); + + let executor = Evaluator::eval_physical_scalars(&self.cluster_keys)?; + expression_executor = Some(executor); + } + + Ok(ClusterStatsGenerator::new( + cluster_key_id, + cluster_key_index, + expression_executor, + 0, + self.get_block_compactor(), + )) } } diff --git a/src/query/storages/fuse/src/statistics/cluster_statistics.rs b/src/query/storages/fuse/src/statistics/cluster_statistics.rs index dbfe810bbd65..ef2d9e429f29 100644 --- a/src/query/storages/fuse/src/statistics/cluster_statistics.rs +++ b/src/query/storages/fuse/src/statistics/cluster_statistics.rs @@ -13,12 +13,9 @@ // limitations under the License. use common_datablocks::DataBlock; -use common_datavalues::DataField; use common_datavalues::DataValue; use common_exception::Result; -use common_functions::scalars::FunctionContext; use common_fuse_meta::meta::ClusterStatistics; -use common_sql::evaluator::EvalNode; use crate::io::BlockCompactor; From 04f8f1534d52b35e0d81d6fbddb2527128bfd924 Mon Sep 17 00:00:00 2001 From: baishen Date: Thu, 27 Oct 2022 14:20:47 +0800 Subject: [PATCH 15/47] refactor(query): remove column_name --- src/query/planner/src/physical_scalar.rs | 19 +------------------ .../storages/fuse/src/operations/delete.rs | 2 +- 2 files changed, 2 insertions(+), 19 deletions(-) diff --git a/src/query/planner/src/physical_scalar.rs b/src/query/planner/src/physical_scalar.rs index 045ab953d806..89b3f468ae80 100644 --- a/src/query/planner/src/physical_scalar.rs +++ b/src/query/planner/src/physical_scalar.rs @@ -51,23 +51,6 @@ pub enum PhysicalScalar { } impl PhysicalScalar { - pub fn column_name(&self) -> String { - match self { - PhysicalScalar::IndexedVariable { display_name, .. } => display_name.clone(), - PhysicalScalar::Constant { value, .. } => format_datavalue_sql(value), - PhysicalScalar::Function { name, args, .. } => { - let args_column_name = args - .iter() - .map(PhysicalScalar::column_name) - .collect::>(); - format!("{}({})", name, args_column_name.join(", ")) - } - PhysicalScalar::Cast { input, target } => { - format!("{}::{}", input.column_name(), target.sql_name()) - } - } - } - pub fn data_type(&self) -> DataTypeImpl { match self { PhysicalScalar::Constant { data_type, .. } => data_type.clone(), @@ -78,7 +61,7 @@ impl PhysicalScalar { } pub fn to_data_field(&self) -> DataField { - let name = self.column_name(); + let name = self.pretty_display(); let data_type = self.data_type(); DataField::new(&name, data_type) } diff --git a/src/query/storages/fuse/src/operations/delete.rs b/src/query/storages/fuse/src/operations/delete.rs index 3332772f963a..a9c33cf251ca 100644 --- a/src/query/storages/fuse/src/operations/delete.rs +++ b/src/query/storages/fuse/src/operations/delete.rs @@ -171,7 +171,7 @@ impl FuseTable { if exists { match input_fields .iter() - .position(|x| x.name() == &expr.column_name()) + .position(|x| x.name() == &expr.pretty_display()) { None => exists = false, Some(idx) => cluster_key_index.push(idx), From 355713b76cdb725887ae904c1ed39597801aa241 Mon Sep 17 00:00:00 2001 From: sundy-li <543950155@qq.com> Date: Thu, 27 Oct 2022 15:29:06 +0800 Subject: [PATCH 16/47] refactor(query): update hive --- .../src/api/rpc/flight_scatter_hash.rs | 6 +---- src/query/storages/hive/src/hive_table.rs | 23 ++++++++++++------- 2 files changed, 16 insertions(+), 13 deletions(-) diff --git a/src/query/service/src/api/rpc/flight_scatter_hash.rs b/src/query/service/src/api/rpc/flight_scatter_hash.rs index 3c078e0a6073..35c1b0d6a052 100644 --- a/src/query/service/src/api/rpc/flight_scatter_hash.rs +++ b/src/query/service/src/api/rpc/flight_scatter_hash.rs @@ -66,7 +66,7 @@ impl HashFlightScatter { schema: DataSchemaRef, num: usize, expr: PhysicalScalar, - ctx: Arc, + _ctx: Arc, ) -> Result { let expression = Self::expr_action(num, expr); let indices_expr_executor = Evaluator::eval_physical_scalar(&expression)?; @@ -77,10 +77,6 @@ impl HashFlightScatter { }) } - fn indices_expr_schema(output_name: &str) -> DataSchemaRef { - DataSchemaRefExt::create(vec![DataField::new(output_name, u64::to_data_type())]) - } - fn expr_action(num: usize, expr: PhysicalScalar) -> PhysicalScalar { PhysicalScalar::Function { name: String::from("modulo"), diff --git a/src/query/storages/hive/src/hive_table.rs b/src/query/storages/hive/src/hive_table.rs index 7ab1054e8f3c..c74493136643 100644 --- a/src/query/storages/hive/src/hive_table.rs +++ b/src/query/storages/hive/src/hive_table.rs @@ -42,6 +42,7 @@ use common_planner::plans::Projection; use common_planner::Partitions; use common_planner::PhysicalScalar; use common_planner::ReadDataSourcePlan; +use common_planner::RequireColumnsVisitor; use common_storage::init_operator; use common_storage::DataOperator; use common_storages_index::RangeFilter; @@ -212,7 +213,7 @@ impl HiveTable { // filter out the partition column related expressions let partition_keys = self.get_partition_key_sets(); - let columns = Self::get_columns_from_expressions(f); + let columns = Self::get_columns_from_expressions(f, &plan.schema()); if columns.difference(&partition_keys).count() == 0 { return true; } @@ -227,13 +228,19 @@ impl HiveTable { } } - fn get_columns_from_expressions(expressions: &[PhysicalScalar]) -> HashSet { - // todo(sundy) - todo!() - // expressions - // .iter() - // .flat_map(|e| RequireColumnsVisitor::collect_columns_from_expr(e).unwrap()) - // .collect::>() + fn get_columns_from_expressions( + expressions: &[PhysicalScalar], + schema: &DataSchemaRef, + ) -> HashSet { + let result = expressions + .iter() + .flat_map(|e| RequireColumnsVisitor::collect_columns_from_expr(e).unwrap()) + .collect::>(); + + result + .iter() + .map(|index| schema.field(*index).name().clone()) + .collect() } fn get_projections(&self, push_downs: &Option) -> Result> { From c5fdd486b4f00eda1ea733965188893eebd4d739 Mon Sep 17 00:00:00 2001 From: baishen Date: Thu, 27 Oct 2022 19:04:53 +0800 Subject: [PATCH 17/47] merge --- src/query/planner/src/physical_scalar.rs | 2 - src/query/storages/fuse/Cargo.toml | 1 - .../storages/fuse/src/operations/delete.rs | 61 +++++++------------ .../storages/fuse/src/pruning/topn_pruner.rs | 1 - 4 files changed, 22 insertions(+), 43 deletions(-) diff --git a/src/query/planner/src/physical_scalar.rs b/src/query/planner/src/physical_scalar.rs index 89b3f468ae80..198571203fe5 100644 --- a/src/query/planner/src/physical_scalar.rs +++ b/src/query/planner/src/physical_scalar.rs @@ -16,9 +16,7 @@ use std::fmt::Display; use std::fmt::Formatter; use common_datavalues::format_data_type_sql; -use common_datavalues::format_datavalue_sql; use common_datavalues::DataField; -use common_datavalues::DataType; use common_datavalues::DataTypeImpl; use common_datavalues::DataValue; diff --git a/src/query/storages/fuse/Cargo.toml b/src/query/storages/fuse/Cargo.toml index 33c3a2ec1e19..c22d0e518d8c 100644 --- a/src/query/storages/fuse/Cargo.toml +++ b/src/query/storages/fuse/Cargo.toml @@ -13,7 +13,6 @@ test = false [dependencies] common-arrow = { path = "../../../common/arrow" } -common-ast = { path = "../../ast" } common-base = { path = "../../../common/base" } common-cache = { path = "../../../common/cache" } common-catalog = { path = "../../../common/../query/catalog" } diff --git a/src/query/storages/fuse/src/operations/delete.rs b/src/query/storages/fuse/src/operations/delete.rs index a9c33cf251ca..f45977ba2f38 100644 --- a/src/query/storages/fuse/src/operations/delete.rs +++ b/src/query/storages/fuse/src/operations/delete.rs @@ -15,14 +15,13 @@ use std::sync::Arc; use common_catalog::table_context::TableContext; -use common_datavalues::DataSchemaRefExt; +use common_datavalues::DataField; use common_exception::ErrorCode; use common_exception::Result; use common_fuse_meta::meta::TableSnapshot; use common_planner::extras::Extras; use common_planner::plans::DeletePlan; use common_planner::PhysicalScalar; -use common_sql::evaluator::Evaluator; use common_sql::PhysicalScalarParser; use tracing::debug; @@ -35,7 +34,7 @@ use crate::FuseTable; impl FuseTable { pub async fn do_delete(&self, ctx: Arc, plan: &DeletePlan) -> Result<()> { - let snapshot_opt = self.read_table_snapshot().await?; + let snapshot_opt = self.read_table_snapshot(ctx.clone()).await?; // check if table is empty let snapshot = if let Some(val) = snapshot_opt { @@ -52,8 +51,7 @@ impl FuseTable { // check if unconditional deletion if let Some(filter) = &plan.selection { - let physical_scalars = - PhysicalScalarParser::parse_exprs(ctx.clone(), plan.schema(), filter).await?; + let physical_scalars = PhysicalScalarParser::parse_exprs(plan.schema(), filter)?; if physical_scalars.is_empty() { return Err(ErrorCode::IndexOutOfBounds( "expression should be valid, but not", @@ -79,7 +77,7 @@ impl FuseTable { filter: &PhysicalScalar, plan: &DeletePlan, ) -> Result<()> { - let cluster_stats_gen = self.cluster_stats_gen(ctx.clone())?; + let cluster_stats_gen = self.cluster_stats_gen()?; let mut deletion_collector = DeletionMutator::try_create( ctx.clone(), self.get_operator(), @@ -151,46 +149,31 @@ impl FuseTable { .await } - fn cluster_stats_gen(&self, ctx: Arc) -> Result { - if self.cluster_key_meta.is_none() { - return Ok(ClusterStatsGenerator::default()); - } - - let len = self.cluster_keys.len(); - let cluster_key_id = self.cluster_key_meta.clone().unwrap().0; - + fn cluster_stats_gen(&self) -> Result { let input_schema = self.table_info.schema(); - let input_fields = input_schema.fields().clone(); + let mut merged = input_schema.fields().clone(); - let mut cluster_key_index = Vec::with_capacity(len); - let mut output_fields = Vec::with_capacity(len); - let mut exists = true; + let mut cluster_key_index = Vec::with_capacity(self.cluster_keys.len()); + let mut extra_key_index = Vec::with_capacity(self.cluster_keys.len()); for expr in &self.cluster_keys { - output_fields.push(expr.to_data_field()); - - if exists { - match input_fields - .iter() - .position(|x| x.name() == &expr.pretty_display()) - { - None => exists = false, - Some(idx) => cluster_key_index.push(idx), - }; - } - } - - let mut expression_executor = None; - if !exists { - cluster_key_index = (0..len).collect(); - - let executor = Evaluator::eval_physical_scalars(&self.cluster_keys)?; - expression_executor = Some(executor); + let cname = expr.pretty_display(); + let index = match merged.iter().position(|x| x.name() == &cname) { + None => { + let field = DataField::new(&cname, expr.data_type()); + merged.push(field); + + extra_key_index.push(merged.len() - 1); + merged.len() - 1 + } + Some(idx) => idx, + }; + cluster_key_index.push(index); } Ok(ClusterStatsGenerator::new( - cluster_key_id, + self.cluster_key_meta.as_ref().unwrap().0, cluster_key_index, - expression_executor, + extra_key_index, 0, self.get_block_compactor(), )) diff --git a/src/query/storages/fuse/src/pruning/topn_pruner.rs b/src/query/storages/fuse/src/pruning/topn_pruner.rs index d722531626bc..54a4de191437 100644 --- a/src/query/storages/fuse/src/pruning/topn_pruner.rs +++ b/src/query/storages/fuse/src/pruning/topn_pruner.rs @@ -12,7 +12,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -use common_datavalues::DataSchemaRef; use common_exception::ErrorCode; use common_exception::Result; use common_fuse_meta::meta::BlockMeta; From 2162adeb25b77e57ca6bc1bbc547a9f7a98b710a Mon Sep 17 00:00:00 2001 From: baishen Date: Thu, 27 Oct 2022 18:44:27 +0800 Subject: [PATCH 18/47] refactor(query): use SyncTypeChecker --- .../src/api/rpc/flight_scatter_hash.rs | 2 +- .../systems/clustering_information.rs | 2 +- .../sql/src/planner/physical_scalar_parser.rs | 22 +- src/query/sql/src/planner/semantic/mod.rs | 2 + .../src/planner/semantic/sync_type_checker.rs | 1446 +++++++++++++++++ src/query/storages/fuse/src/fuse_table.rs | 5 +- .../clustering_information_table.rs | 2 +- .../clustering_informations/table_args.rs | 12 +- 8 files changed, 1462 insertions(+), 31 deletions(-) create mode 100644 src/query/sql/src/planner/semantic/sync_type_checker.rs diff --git a/src/query/service/src/api/rpc/flight_scatter_hash.rs b/src/query/service/src/api/rpc/flight_scatter_hash.rs index 35c1b0d6a052..f47bff90059d 100644 --- a/src/query/service/src/api/rpc/flight_scatter_hash.rs +++ b/src/query/service/src/api/rpc/flight_scatter_hash.rs @@ -63,7 +63,7 @@ impl FlightScatter for HashFlightScatter { impl HashFlightScatter { fn try_create_impl( - schema: DataSchemaRef, + _schema: DataSchemaRef, num: usize, expr: PhysicalScalar, _ctx: Arc, diff --git a/src/query/service/src/procedures/systems/clustering_information.rs b/src/query/service/src/procedures/systems/clustering_information.rs index ce5e370a1f90..5615c7b50b4d 100644 --- a/src/query/service/src/procedures/systems/clustering_information.rs +++ b/src/query/service/src/procedures/systems/clustering_information.rs @@ -61,7 +61,7 @@ impl OneBlockProcedure for ClusteringInformationProcedure { let tbl = FuseTable::try_from_table(tbl.as_ref())?; let definition = if args.len() > 2 { &args[2] } else { "" }; - let cluster_keys = get_cluster_keys(ctx.clone(), tbl, definition).await?; + let cluster_keys = get_cluster_keys(tbl, definition)?; Ok(ClusteringInformation::new(ctx, tbl, cluster_keys) .get_clustering_info() diff --git a/src/query/sql/src/planner/physical_scalar_parser.rs b/src/query/sql/src/planner/physical_scalar_parser.rs index 121342a9769c..5c183e76ffcf 100644 --- a/src/query/sql/src/planner/physical_scalar_parser.rs +++ b/src/query/sql/src/planner/physical_scalar_parser.rs @@ -12,32 +12,24 @@ // See the License for the specific language governing permissions and // limitations under the License. -use std::sync::Arc; - use common_ast::parser::parse_comma_separated_exprs; use common_ast::parser::tokenize_sql; use common_ast::Backtrace; use common_ast::Dialect; -use common_catalog::table_context::TableContext; use common_datavalues::DataSchemaRef; use common_exception::Result; use common_planner::PhysicalScalar; -use parking_lot::RwLock; +use common_settings::Settings; use crate::executor::PhysicalScalarBuilder; +use crate::planner::semantic::SyncTypeChecker; use crate::BindContext; -use crate::Metadata; use crate::NameResolutionContext; -use crate::ScalarBinder; pub struct PhysicalScalarParser; impl PhysicalScalarParser { - pub async fn parse_exprs( - ctx: Arc, - schema: DataSchemaRef, - sql: &str, - ) -> Result> { + pub fn parse_exprs(schema: DataSchemaRef, sql: &str) -> Result> { let sql_dialect = Dialect::MySQL; let tokens = tokenize_sql(sql)?; let backtrace = Backtrace::new(); @@ -47,16 +39,14 @@ impl PhysicalScalarParser { &backtrace, )?; - let settings = ctx.get_settings(); + let settings = Settings::default_settings(""); let bind_context = BindContext::new(); let name_resolution_ctx = NameResolutionContext::try_from(settings.as_ref())?; - let metadata = Arc::new(RwLock::new(Metadata::default())); - let mut scalar_binder = - ScalarBinder::new(&bind_context, ctx, &name_resolution_ctx, metadata, &[]); + let mut type_checker = SyncTypeChecker::new(&bind_context, &name_resolution_ctx, &[]); let mut physical_scalars = Vec::with_capacity(exprs.len()); let mut builder = PhysicalScalarBuilder::new(&schema); for expr in exprs.iter() { - let (scalar, _) = scalar_binder.bind(expr).await?; + let (scalar, _) = *type_checker.resolve(expr, None)?; let physical_scalar = builder.build(&scalar)?; physical_scalars.push(physical_scalar); } diff --git a/src/query/sql/src/planner/semantic/mod.rs b/src/query/sql/src/planner/semantic/mod.rs index 3274cacdf174..62c42c3d4ccb 100644 --- a/src/query/sql/src/planner/semantic/mod.rs +++ b/src/query/sql/src/planner/semantic/mod.rs @@ -14,11 +14,13 @@ mod grouping_check; mod name_resolution; +mod sync_type_checker; mod type_check; pub use grouping_check::GroupingChecker; pub use name_resolution::normalize_identifier; pub use name_resolution::IdentifierNormalizer; pub use name_resolution::NameResolutionContext; +pub use sync_type_checker::SyncTypeChecker; pub use type_check::validate_function_arg; pub use type_check::TypeChecker; diff --git a/src/query/sql/src/planner/semantic/sync_type_checker.rs b/src/query/sql/src/planner/semantic/sync_type_checker.rs new file mode 100644 index 000000000000..301c0b0c40c4 --- /dev/null +++ b/src/query/sql/src/planner/semantic/sync_type_checker.rs @@ -0,0 +1,1446 @@ +// Copyright 2022 Datafuse Labs. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::vec; + +use common_ast::ast::BinaryOperator; +use common_ast::ast::Expr; +use common_ast::ast::Identifier; +use common_ast::ast::IntervalKind as ASTIntervalKind; +use common_ast::ast::Literal; +use common_ast::ast::MapAccessor; +use common_ast::ast::SubqueryModifier; +use common_ast::ast::TrimWhere; +use common_ast::ast::UnaryOperator; +use common_ast::parser::token::Token; +use common_ast::DisplayError; +use common_datavalues::type_coercion::merge_types; +use common_datavalues::ArrayType; +use common_datavalues::DataField; +use common_datavalues::DataType; +use common_datavalues::DataTypeImpl; +use common_datavalues::DataValue; +use common_datavalues::IntervalKind; +use common_datavalues::IntervalType; +use common_datavalues::NullType; +use common_datavalues::NullableType; +use common_datavalues::StringType; +use common_datavalues::StructType; +use common_datavalues::TimestampType; +use common_datavalues::TypeID; +use common_exception::ErrorCode; +use common_exception::Result; +use common_functions::aggregates::AggregateFunctionFactory; +use common_functions::is_builtin_function; +use common_functions::scalars::CastFunction; +use common_functions::scalars::FunctionContext; +use common_functions::scalars::FunctionFactory; +use common_functions::scalars::TupleFunction; + +use super::name_resolution::NameResolutionContext; +use super::normalize_identifier; +use crate::binder::NameResolutionResult; +use crate::evaluator::Evaluator; +use crate::planner::metadata::optimize_remove_count_args; +use crate::plans::AggregateFunction; +use crate::plans::AndExpr; +use crate::plans::BoundColumnRef; +use crate::plans::CastExpr; +use crate::plans::ComparisonExpr; +use crate::plans::ComparisonOp; +use crate::plans::ConstantExpr; +use crate::plans::FunctionCall; +use crate::plans::OrExpr; +use crate::plans::Scalar; +use crate::plans::SubqueryType; +use crate::validate_function_arg; +use crate::BindContext; +use crate::ScalarExpr; + +// todo refine this +// `SyncTypeChecker` is the synchronous `TypeChecker` +pub struct SyncTypeChecker<'a> { + bind_context: &'a BindContext, + name_resolution_ctx: &'a NameResolutionContext, + + aliases: &'a [(String, Scalar)], + + // true if current expr is inside an aggregate function. + // This is used to check if there is nested aggregate function. + in_aggregate_function: bool, +} + +impl<'a> SyncTypeChecker<'a> { + pub fn new( + bind_context: &'a BindContext, + name_resolution_ctx: &'a NameResolutionContext, + aliases: &'a [(String, Scalar)], + ) -> Self { + Self { + bind_context, + name_resolution_ctx, + aliases, + in_aggregate_function: false, + } + } + + fn post_resolve( + &mut self, + scalar: &Scalar, + data_type: &DataTypeImpl, + ) -> Result<(Scalar, DataTypeImpl)> { + // Try constant folding + if let Ok((value, value_type)) = Evaluator::eval_scalar(scalar).and_then(|evaluator| { + let func_ctx = FunctionContext::default(); + if scalar.is_deterministic() { + evaluator.try_eval_const(&func_ctx) + } else { + Err(ErrorCode::LogicalError( + "Constant folding requires the function deterministic", + )) + } + }) { + Ok(( + ConstantExpr { + value, + data_type: Box::new(value_type), + } + .into(), + data_type.clone(), + )) + } else { + Ok((scalar.clone(), data_type.clone())) + } + } + + pub fn resolve( + &mut self, + expr: &Expr<'_>, + required_type: Option, + ) -> Result> { + let box (scalar, data_type): Box<(Scalar, DataTypeImpl)> = match expr { + Expr::ColumnRef { + database, + table, + column: ident, + .. + } => { + let database = database + .as_ref() + .map(|ident| normalize_identifier(ident, self.name_resolution_ctx).name); + let table = table + .as_ref() + .map(|ident| normalize_identifier(ident, self.name_resolution_ctx).name); + let column = normalize_identifier(ident, self.name_resolution_ctx).name; + let result = self.bind_context.resolve_name( + database.as_deref(), + table.as_deref(), + column.as_str(), + &ident.span, + self.aliases, + )?; + let (scalar, data_type) = match result { + NameResolutionResult::Column(column) => { + let data_type = *column.data_type.clone(); + (BoundColumnRef { column }.into(), data_type) + } + NameResolutionResult::Alias { scalar, .. } => { + (scalar.clone(), scalar.data_type()) + } + }; + + Box::new((scalar, data_type)) + } + + Expr::IsNull { + span, expr, not, .. + } => { + let args = &[expr.as_ref()]; + if *not { + self.resolve_function(span, "is_not_null", args, required_type)? + } else { + self.resolve_function( + span, + "not", + &[&Expr::FunctionCall { + span, + distinct: false, + name: Identifier { + name: "is_not_null".to_string(), + quote: None, + span: span[0].clone(), + }, + args: vec![(args[0]).clone()], + params: vec![], + }], + None, + )? + } + } + + Expr::IsDistinctFrom { + span, + left, + right, + not, + } => { + let left_null_expr = Box::new(Expr::IsNull { + span, + expr: left.clone(), + not: false, + }); + let right_null_expr = Box::new(Expr::IsNull { + span, + expr: right.clone(), + not: false, + }); + let op = if *not { + BinaryOperator::Eq + } else { + BinaryOperator::NotEq + }; + let box (scalar, data_type) = self.resolve_function( + span, + "multi_if", + &[ + &Expr::BinaryOp { + span, + op: BinaryOperator::And, + left: left_null_expr.clone(), + right: right_null_expr.clone(), + }, + &Expr::Literal { + span, + lit: Literal::Boolean(*not), + }, + &Expr::BinaryOp { + span, + op: BinaryOperator::Or, + left: left_null_expr.clone(), + right: right_null_expr.clone(), + }, + &Expr::Literal { + span, + lit: Literal::Boolean(!*not), + }, + &Expr::BinaryOp { + span, + op, + left: left.clone(), + right: right.clone(), + }, + ], + None, + )?; + self.resolve_scalar_function_call( + span, + "assume_not_null", + vec![scalar], + vec![data_type], + required_type, + )? + } + + Expr::InList { + span, + expr, + list, + not, + .. + } => { + if list.len() > 3 + && list + .iter() + .all(|e| matches!(e, Expr::Literal { lit, .. } if lit != &Literal::Null)) + { + let tuple_expr = Expr::Tuple { + span, + exprs: list.clone(), + }; + let args = vec![expr.as_ref(), &tuple_expr]; + if *not { + self.resolve_function( + span, + "not", + &[&Expr::FunctionCall { + span, + distinct: false, + name: Identifier { + name: "in".to_string(), + quote: None, + span: span[0].clone(), + }, + args: args.iter().copied().cloned().collect(), + params: vec![], + }], + None, + )? + } else { + self.resolve_function(span, "in", &args, required_type)? + } + } else { + let mut result = list + .iter() + .map(|e| Expr::BinaryOp { + span, + op: BinaryOperator::Eq, + left: expr.clone(), + right: Box::new(e.clone()), + }) + .fold(None, |mut acc, e| { + match acc.as_mut() { + None => acc = Some(e), + Some(acc) => { + *acc = Expr::BinaryOp { + span, + op: BinaryOperator::Or, + left: Box::new(acc.clone()), + right: Box::new(e.clone()), + } + } + } + acc + }) + .unwrap(); + + if *not { + result = Expr::UnaryOp { + span, + op: UnaryOperator::Not, + expr: Box::new(result), + }; + } + self.resolve(&result, required_type)? + } + } + + Expr::Between { + span, + expr, + low, + high, + not, + .. + } => { + if !*not { + // Rewrite `expr BETWEEN low AND high` + // into `expr >= low AND expr <= high` + let box (ge_func, left_type) = + self.resolve_function(span, ">=", &[expr.as_ref(), low.as_ref()], None)?; + let box (le_func, right_type) = + self.resolve_function(span, "<=", &[expr.as_ref(), high.as_ref()], None)?; + let func = + FunctionFactory::instance().get("and", &[&left_type, &right_type])?; + Box::new(( + AndExpr { + left: Box::new(ge_func), + right: Box::new(le_func), + return_type: Box::new(func.return_type()), + } + .into(), + func.return_type(), + )) + } else { + // Rewrite `expr NOT BETWEEN low AND high` + // into `expr < low OR expr > high` + let box (lt_func, left_type) = + self.resolve_function(span, "<", &[expr.as_ref(), low.as_ref()], None)?; + let box (gt_func, right_type) = + self.resolve_function(span, ">", &[expr.as_ref(), high.as_ref()], None)?; + let func = FunctionFactory::instance().get("or", &[&left_type, &right_type])?; + Box::new(( + OrExpr { + left: Box::new(lt_func), + right: Box::new(gt_func), + return_type: Box::new(func.return_type()), + } + .into(), + func.return_type(), + )) + } + } + + Expr::BinaryOp { + span, + op, + left, + right, + .. + } => { + if let Expr::Subquery { + subquery, modifier, .. + } = &**right + { + if let Some(subquery_modifier) = modifier { + match subquery_modifier { + SubqueryModifier::Any | SubqueryModifier::Some => { + return Err(ErrorCode::SemanticError( + expr.span() + .display_error("not support subquery".to_string()), + )); + } + SubqueryModifier::All => { + let contrary_op = op.to_contrary()?; + let rewritten_subquery = Expr::Subquery { + span: right.span(), + modifier: Some(SubqueryModifier::Any), + subquery: (*subquery).clone(), + }; + self.resolve_function( + span, + "not", + &[&Expr::BinaryOp { + span, + op: contrary_op, + left: (*left).clone(), + right: Box::new(rewritten_subquery), + }], + None, + )? + } + } + } else { + self.resolve_binary_op( + span, + op, + left.as_ref(), + right.as_ref(), + required_type, + )? + } + } else { + self.resolve_binary_op(span, op, left.as_ref(), right.as_ref(), required_type)? + } + } + + Expr::UnaryOp { span, op, expr, .. } => { + self.resolve_unary_op(span, op, expr.as_ref(), required_type)? + } + + Expr::Cast { + expr, target_type, .. + } => { + let box (scalar, data_type) = self.resolve(expr, required_type)?; + let cast_func = + CastFunction::create("", target_type.to_string().as_str(), data_type.clone())?; + Box::new(( + CastExpr { + argument: Box::new(scalar), + from_type: Box::new(data_type), + target_type: Box::new(cast_func.return_type()), + } + .into(), + cast_func.return_type(), + )) + } + + Expr::Case { + span, + operand, + conditions, + results, + else_result, + } => { + let mut arguments = Vec::with_capacity(conditions.len() * 2 + 1); + for (c, r) in conditions.iter().zip(results.iter()) { + match operand { + Some(operand) => { + // compare case operand with each conditions until one of them is equal + let equal_expr = Expr::FunctionCall { + span, + distinct: false, + name: Identifier { + name: "=".to_string(), + quote: None, + span: span[0].clone(), + }, + args: vec![*operand.clone(), c.clone()], + params: vec![], + }; + arguments.push(equal_expr) + } + None => arguments.push(c.clone()), + } + arguments.push(r.clone()); + } + let null_arg = Expr::Literal { + span: &[], + lit: Literal::Null, + }; + + if let Some(expr) = else_result { + arguments.push(*expr.clone()); + } else { + arguments.push(null_arg) + } + let args_ref: Vec<&Expr> = arguments.iter().collect(); + self.resolve_function(span, "multi_if", &args_ref, required_type)? + } + + Expr::Substring { + span, + expr, + substring_from, + substring_for, + .. + } => { + let mut arguments = vec![expr.as_ref(), substring_from.as_ref()]; + if let Some(substring_for) = substring_for { + arguments.push(substring_for.as_ref()); + } + self.resolve_function(span, "substring", &arguments, required_type)? + } + + Expr::Literal { lit, .. } => { + let box (value, data_type) = self.resolve_literal(lit, required_type)?; + Box::new(( + ConstantExpr { + value, + data_type: Box::new(data_type.clone()), + } + .into(), + data_type, + )) + } + + Expr::FunctionCall { + span, + distinct, + name, + args, + params, + .. + } => { + let func_name = name.name.as_str(); + if !is_builtin_function(func_name) + && !Self::is_rewritable_scalar_function(func_name) + { + return Err(ErrorCode::SemanticError( + expr.span() + .display_error("not support UDF functions".to_string()), + )); + } + + let args: Vec<&Expr> = args.iter().collect(); + + if AggregateFunctionFactory::instance().check(func_name) { + if self.in_aggregate_function { + // Reset the state + self.in_aggregate_function = false; + return Err(ErrorCode::SemanticError(expr.span().display_error( + "aggregate function calls cannot be nested".to_string(), + ))); + } + + // Check aggregate function + let params = params + .iter() + .map(|literal| { + self.resolve_literal(literal, None) + .map(|box (value, _)| value) + }) + .collect::>>()?; + + self.in_aggregate_function = true; + let mut arguments = vec![]; + for arg in args.iter() { + arguments.push(self.resolve(arg, None)?); + } + self.in_aggregate_function = false; + + let data_fields = arguments + .iter() + .map(|box (_, data_type)| DataField::new("", data_type.clone())) + .collect(); + + // Rewrite `xxx(distinct)` to `xxx_distinct(...)` + let (func_name, distinct) = + if func_name.eq_ignore_ascii_case("count") && *distinct { + ("count_distinct", false) + } else { + (func_name, *distinct) + }; + + let func_name = if distinct { + format!("{}_distinct", func_name) + } else { + func_name.to_string() + }; + + let agg_func = AggregateFunctionFactory::instance() + .get(&func_name, params.clone(), data_fields) + .map_err(|e| ErrorCode::SemanticError(span.display_error(e.message())))?; + + let args = if optimize_remove_count_args(&func_name, distinct, args.as_slice()) + { + vec![] + } else { + arguments.into_iter().map(|box (arg, _)| arg).collect() + }; + + Box::new(( + AggregateFunction { + display_name: format!("{:#}", expr), + func_name, + distinct: false, + params, + args, + return_type: Box::new(agg_func.return_type()?), + } + .into(), + agg_func.return_type()?, + )) + } else { + // Scalar function + self.resolve_function(span, func_name, &args, required_type)? + } + } + + Expr::CountAll { .. } => { + let agg_func = AggregateFunctionFactory::instance().get("count", vec![], vec![])?; + + Box::new(( + AggregateFunction { + display_name: format!("{:#}", expr), + func_name: "count".to_string(), + distinct: false, + params: vec![], + args: vec![], + return_type: Box::new(agg_func.return_type()?), + } + .into(), + agg_func.return_type()?, + )) + } + + Expr::Exists { .. } => { + return Err(ErrorCode::SemanticError( + expr.span() + .display_error("not support subquery".to_string()), + )); + } + + Expr::Subquery { .. } => { + return Err(ErrorCode::SemanticError( + expr.span() + .display_error("not support subquery".to_string()), + )); + } + + Expr::InSubquery { .. } => { + return Err(ErrorCode::SemanticError( + expr.span() + .display_error("not support subquery".to_string()), + )); + } + + expr @ Expr::MapAccess { + span, + expr: inner_expr, + accessor, + } => { + // If it's map accessors to a tuple column, pushdown the map accessors to storage. + let mut accessors = Vec::new(); + let mut expr = expr; + loop { + match expr { + Expr::MapAccess { + expr: inner_expr, + accessor: + accessor @ (MapAccessor::Period { .. } + | MapAccessor::PeriodNumber { .. } + | MapAccessor::Colon { .. } + | MapAccessor::Bracket { + key: + box Expr::Literal { + lit: Literal::String(..), + .. + }, + }), + .. + } => { + accessors.push(accessor.clone()); + expr = &**inner_expr; + } + Expr::ColumnRef { + database, + table, + column, + .. + } => { + let (_, data_type) = *self.resolve(expr, None)?; + if data_type.data_type_id() != TypeID::Struct { + break; + } + return self.resolve_map_access_pushdown( + data_type, + accessors, + database.clone(), + table.clone(), + column.clone(), + ); + } + _ => { + break; + } + } + } + + // Otherwise, desugar it into a `get` function. + let arg = match accessor { + MapAccessor::Bracket { key } => (**key).clone(), + MapAccessor::Period { key } | MapAccessor::Colon { key } => Expr::Literal { + span, + lit: Literal::String(key.name.clone()), + }, + MapAccessor::PeriodNumber { .. } => unimplemented!(), + }; + self.resolve_function(span, "get", &[inner_expr, &arg], None)? + } + + Expr::TryCast { + span, + expr, + target_type, + .. + } => { + let box (scalar, data_type) = self.resolve(expr, required_type)?; + let cast_func = CastFunction::create_try( + "", + target_type.to_string().as_str(), + data_type.clone(), + ) + .map_err(|e| ErrorCode::SemanticError(span.display_error(e.message())))?; + Box::new(( + CastExpr { + argument: Box::new(scalar), + from_type: Box::new(data_type), + target_type: Box::new(cast_func.return_type()), + } + .into(), + cast_func.return_type(), + )) + } + + Expr::Extract { + span, kind, expr, .. + } => self.resolve_extract_expr(span, kind, expr, required_type)?, + + Expr::Interval { + span, expr, unit, .. + } => self.resolve_interval(span, expr, unit, required_type)?, + + Expr::DateAdd { + span, + unit, + interval, + date, + .. + } => self.resolve_date_add(span, unit, interval, date, required_type)?, + Expr::DateSub { + span, + unit, + interval, + date, + .. + } => self.resolve_date_add( + span, + unit, + &Expr::UnaryOp { + span, + op: UnaryOperator::Minus, + expr: interval.clone(), + }, + date, + required_type, + )?, + Expr::DateTrunc { + span, unit, date, .. + } => self.resolve_date_trunc(span, date, unit, required_type)?, + Expr::Trim { + span, + expr, + trim_where, + .. + } => self.resolve_trim_function(span, expr, trim_where)?, + + Expr::Array { span, exprs, .. } => self.resolve_array(span, exprs)?, + + Expr::Position { + substr_expr, + str_expr, + span, + .. + } => self.resolve_function( + span, + "locate", + &[substr_expr.as_ref(), str_expr.as_ref()], + None, + )?, + + Expr::Tuple { span, exprs, .. } => self.resolve_tuple(span, exprs)?, + }; + + Ok(Box::new(self.post_resolve(&scalar, &data_type)?)) + } + + /// Resolve function call. + pub fn resolve_function( + &mut self, + span: &[Token<'_>], + func_name: &str, + arguments: &[&Expr<'_>], + _required_type: Option, + ) -> Result> { + let mut args = vec![]; + let mut arg_types = vec![]; + + for argument in arguments { + let box (arg, mut arg_type) = self.resolve(argument, None)?; + if let Scalar::SubqueryExpr(subquery) = &arg { + if subquery.typ == SubqueryType::Scalar && !arg.data_type().is_nullable() { + arg_type = NullableType::new_impl(arg_type); + } + } + args.push(arg); + arg_types.push(arg_type); + } + + let arg_types_ref: Vec<&DataTypeImpl> = arg_types.iter().collect(); + + // Validate function arguments. + // TODO(leiysky): should be done in `FunctionFactory::get`. + let feature = FunctionFactory::instance().get_features(func_name)?; + validate_function_arg( + func_name, + arguments.len(), + feature.variadic_arguments, + feature.num_arguments, + )?; + + let func = FunctionFactory::instance() + .get(func_name, &arg_types_ref) + .map_err(|e| ErrorCode::SemanticError(span.display_error(e.message())))?; + Ok(Box::new(( + FunctionCall { + arguments: args, + func_name: func_name.to_string(), + arg_types: arg_types.to_vec(), + return_type: Box::new(func.return_type()), + } + .into(), + func.return_type(), + ))) + } + + pub fn resolve_scalar_function_call( + &mut self, + span: &[Token<'_>], + func_name: &str, + arguments: Vec, + arguments_types: Vec, + _required_type: Option, + ) -> Result> { + let arg_types_ref: Vec<&DataTypeImpl> = arguments_types.iter().collect(); + let func = FunctionFactory::instance() + .get(func_name, &arg_types_ref) + .map_err(|e| ErrorCode::SemanticError(span.display_error(e.message())))?; + + Ok(Box::new(( + FunctionCall { + arguments, + func_name: func_name.to_string(), + arg_types: arguments_types.to_vec(), + return_type: Box::new(func.return_type()), + } + .into(), + func.return_type(), + ))) + } + + /// Resolve binary expressions. Most of the binary expressions + /// would be transformed into `FunctionCall`, except comparison + /// expressions, conjunction(`AND`) and disjunction(`OR`). + pub fn resolve_binary_op( + &mut self, + span: &[Token<'_>], + op: &BinaryOperator, + left: &Expr<'_>, + right: &Expr<'_>, + required_type: Option, + ) -> Result> { + match op { + BinaryOperator::Plus + | BinaryOperator::Minus + | BinaryOperator::Multiply + | BinaryOperator::Div + | BinaryOperator::Divide + | BinaryOperator::Modulo + | BinaryOperator::StringConcat + | BinaryOperator::Like + | BinaryOperator::NotLike + | BinaryOperator::Regexp + | BinaryOperator::RLike + | BinaryOperator::NotRegexp + | BinaryOperator::NotRLike + | BinaryOperator::BitwiseOr + | BinaryOperator::BitwiseAnd + | BinaryOperator::BitwiseXor + | BinaryOperator::Xor => { + self.resolve_function(span, op.to_string().as_str(), &[left, right], required_type) + } + BinaryOperator::Gt + | BinaryOperator::Lt + | BinaryOperator::Gte + | BinaryOperator::Lte + | BinaryOperator::Eq + | BinaryOperator::NotEq => { + let op = ComparisonOp::try_from(op)?; + let box (left, _) = self.resolve(left, None)?; + let box (right, _) = self.resolve(right, None)?; + let func = FunctionFactory::instance() + .get(op.to_func_name(), &[&left.data_type(), &right.data_type()])?; + Ok(Box::new(( + ComparisonExpr { + op, + left: Box::new(left), + right: Box::new(right), + return_type: Box::new(func.return_type()), + } + .into(), + func.return_type(), + ))) + } + BinaryOperator::And => { + let box (left, _) = self.resolve(left, None)?; + let box (right, _) = self.resolve(right, None)?; + let func = FunctionFactory::instance() + .get("and", &[&left.data_type(), &right.data_type()])?; + Ok(Box::new(( + AndExpr { + left: Box::new(left), + right: Box::new(right), + return_type: Box::new(func.return_type()), + } + .into(), + func.return_type(), + ))) + } + BinaryOperator::Or => { + let box (left, _) = self.resolve(left, None)?; + let box (right, _) = self.resolve(right, None)?; + let func = FunctionFactory::instance() + .get("or", &[&left.data_type(), &right.data_type()])?; + Ok(Box::new(( + OrExpr { + left: Box::new(left), + right: Box::new(right), + return_type: Box::new(func.return_type()), + } + .into(), + func.return_type(), + ))) + } + } + } + + /// Resolve unary expressions. + pub fn resolve_unary_op( + &mut self, + span: &[Token<'_>], + op: &UnaryOperator, + child: &Expr<'_>, + required_type: Option, + ) -> Result> { + match op { + UnaryOperator::Plus => { + // Omit unary + operator + self.resolve(child, required_type) + } + + UnaryOperator::Minus => self.resolve_function(span, "negate", &[child], required_type), + + UnaryOperator::Not => self.resolve_function(span, "not", &[child], required_type), + } + } + + pub fn resolve_extract_expr( + &mut self, + span: &[Token<'_>], + interval_kind: &ASTIntervalKind, + arg: &Expr<'_>, + _required_type: Option, + ) -> Result> { + match interval_kind { + ASTIntervalKind::Year => { + self.resolve_function(span, "to_year", &[arg], Some(TimestampType::new_impl())) + } + ASTIntervalKind::Quarter => { + self.resolve_function(span, "to_quarter", &[arg], Some(TimestampType::new_impl())) + } + ASTIntervalKind::Month => { + self.resolve_function(span, "to_month", &[arg], Some(TimestampType::new_impl())) + } + ASTIntervalKind::Day => self.resolve_function( + span, + "to_day_of_month", + &[arg], + Some(TimestampType::new_impl()), + ), + ASTIntervalKind::Hour => { + self.resolve_function(span, "to_hour", &[arg], Some(TimestampType::new_impl())) + } + ASTIntervalKind::Minute => { + self.resolve_function(span, "to_minute", &[arg], Some(TimestampType::new_impl())) + } + ASTIntervalKind::Second => { + self.resolve_function(span, "to_second", &[arg], Some(TimestampType::new_impl())) + } + ASTIntervalKind::Doy => self.resolve_function( + span, + "to_day_of_year", + &[arg], + Some(TimestampType::new_impl()), + ), + ASTIntervalKind::Dow => self.resolve_function( + span, + "to_day_of_week", + &[arg], + Some(TimestampType::new_impl()), + ), + } + } + + pub fn resolve_interval( + &mut self, + span: &[Token<'_>], + arg: &Expr<'_>, + interval_kind: &ASTIntervalKind, + _required_type: Option, + ) -> Result> { + match interval_kind { + ASTIntervalKind::Year => self.resolve_function( + span, + "to_interval_year", + &[arg], + Some(IntervalType::new_impl(IntervalKind::Year)), + ), + ASTIntervalKind::Quarter => self.resolve_function( + span, + "to_interval_quarter", + &[arg], + Some(IntervalType::new_impl(IntervalKind::Quarter)), + ), + ASTIntervalKind::Month => self.resolve_function( + span, + "to_interval_month", + &[arg], + Some(IntervalType::new_impl(IntervalKind::Month)), + ), + ASTIntervalKind::Day => self.resolve_function( + span, + "to_interval_day", + &[arg], + Some(IntervalType::new_impl(IntervalKind::Day)), + ), + ASTIntervalKind::Hour => self.resolve_function( + span, + "to_interval_hour", + &[arg], + Some(IntervalType::new_impl(IntervalKind::Hour)), + ), + ASTIntervalKind::Minute => self.resolve_function( + span, + "to_interval_minute", + &[arg], + Some(IntervalType::new_impl(IntervalKind::Minute)), + ), + ASTIntervalKind::Second => self.resolve_function( + span, + "to_interval_second", + &[arg], + Some(IntervalType::new_impl(IntervalKind::Second)), + ), + ASTIntervalKind::Doy => self.resolve_function( + span, + "to_interval_doy", + &[arg], + Some(IntervalType::new_impl(IntervalKind::Doy)), + ), + ASTIntervalKind::Dow => self.resolve_function( + span, + "to_interval_dow", + &[arg], + Some(IntervalType::new_impl(IntervalKind::Dow)), + ), + } + } + + pub fn resolve_date_add( + &mut self, + span: &[Token<'_>], + interval_kind: &ASTIntervalKind, + interval: &Expr<'_>, + date: &Expr<'_>, + _required_type: Option, + ) -> Result> { + let mut args = vec![]; + let mut arg_types = vec![]; + + let box (date, date_type) = self.resolve(date, None)?; + args.push(date); + arg_types.push(date_type); + + let box (interval, interval_type) = + self.resolve_interval(span, interval, interval_kind, None)?; + args.push(interval); + arg_types.push(interval_type); + + let arg_types_ref: Vec<&DataTypeImpl> = arg_types.iter().collect(); + + let func = FunctionFactory::instance() + .get("date_add", &arg_types_ref) + .map_err(|e| ErrorCode::SemanticError(span.display_error(e.message())))?; + Ok(Box::new(( + FunctionCall { + arguments: args, + func_name: "date_add".to_string(), + arg_types: arg_types.to_vec(), + return_type: Box::new(func.return_type()), + } + .into(), + func.return_type(), + ))) + } + + pub fn resolve_date_trunc( + &mut self, + span: &[Token<'_>], + date: &Expr<'_>, + kind: &ASTIntervalKind, + _required_type: Option, + ) -> Result> { + match kind { + ASTIntervalKind::Year => { + self.resolve_function( + span, + "to_start_of_year", + &[date], + Some(TimestampType::new_impl()), + ) + } + ASTIntervalKind::Quarter => { + self.resolve_function( + span, + "to_start_of_quarter", + &[date], + Some(TimestampType::new_impl()), + ) + } + ASTIntervalKind::Month => { + self.resolve_function( + span, + "to_start_of_month", + &[date], + Some(TimestampType::new_impl()), + ) + } + ASTIntervalKind::Day => { + self.resolve_function( + span, + "to_start_of_day", + &[date], + Some(TimestampType::new_impl()), + ) + } + ASTIntervalKind::Hour => { + self.resolve_function( + span, + "to_start_of_hour", + &[date], + Some(TimestampType::new_impl()), + ) + } + ASTIntervalKind::Minute => { + self.resolve_function( + span, + "to_start_of_minute", + &[date], + Some(TimestampType::new_impl()), + ) + } + ASTIntervalKind::Second => { + self.resolve_function( + span, + "to_start_of_second", + &[date], + Some(TimestampType::new_impl()), + ) + } + _ => Err(ErrorCode::SemanticError(span.display_error("Only these interval types are currently supported: [year, month, day, hour, minute, second]".to_string()))), + } + } + + fn is_rewritable_scalar_function(func_name: &str) -> bool { + matches!( + func_name.to_lowercase().as_str(), + "database" + | "currentdatabase" + | "current_database" + | "version" + | "user" + | "currentuser" + | "current_user" + | "connection_id" + | "timezone" + | "nullif" + | "ifnull" + | "coalesce" + ) + } + + fn resolve_trim_function( + &mut self, + span: &[Token<'_>], + expr: &Expr<'_>, + trim_where: &Option<(TrimWhere, Box>)>, + ) -> Result> { + let (func_name, trim_scalar, trim_type) = if let Some((trim_type, trim_expr)) = trim_where { + let func_name = match trim_type { + TrimWhere::Leading => "trim_leading", + TrimWhere::Trailing => "trim_trailing", + TrimWhere::Both => "trim_both", + }; + + let box (trim_scalar, trim_type) = self.resolve(trim_expr, None)?; + (func_name, trim_scalar, trim_type) + } else { + let trim_scalar = ConstantExpr { + value: DataValue::String(" ".as_bytes().to_vec()), + data_type: Box::new(StringType::new_impl()), + } + .into(); + ("trim_both", trim_scalar, StringType::new_impl()) + }; + + let box (trim_source, source_type) = self.resolve(expr, None)?; + let args = vec![trim_source, trim_scalar]; + let func = FunctionFactory::instance() + .get(func_name, &[&source_type, &trim_type]) + .map_err(|e| ErrorCode::SemanticError(span.display_error(e.message())))?; + + Ok(Box::new(( + FunctionCall { + arguments: args, + func_name: func_name.to_string(), + arg_types: vec![source_type, trim_type], + return_type: Box::new(func.return_type()), + } + .into(), + func.return_type(), + ))) + } + + /// Resolve literal values. + pub fn resolve_literal( + &self, + literal: &Literal, + _required_type: Option, + ) -> Result> { + // TODO(leiysky): try cast value to required type + let value = match literal { + Literal::Integer(uint) => DataValue::UInt64(*uint), + Literal::Float(float) => DataValue::Float64(*float), + Literal::String(string) => DataValue::String(string.as_bytes().to_vec()), + Literal::Boolean(boolean) => DataValue::Boolean(*boolean), + Literal::Null => DataValue::Null, + _ => Err(ErrorCode::SemanticError(format!( + "Unsupported literal value: {literal}" + )))?, + }; + + let data_type = value.data_type(); + + Ok(Box::new((value, data_type))) + } + + // TODO(leiysky): use an array builder function instead, since we should allow declaring + // an array with variable as element. + fn resolve_array( + &mut self, + span: &[Token<'_>], + exprs: &[Expr<'_>], + ) -> Result> { + let mut elems = Vec::with_capacity(exprs.len()); + let mut types = Vec::with_capacity(exprs.len()); + for expr in exprs.iter() { + let box (arg, data_type) = self.resolve(expr, None)?; + types.push(data_type); + if let Scalar::ConstantExpr(elem) = arg { + elems.push(elem.value); + } else { + return Err(ErrorCode::SemanticError( + expr.span() + .display_error("Array element must be literal".to_string()), + )); + } + } + let element_type = if elems.is_empty() { + NullType::new_impl() + } else { + types + .iter() + .fold(Ok(types[0].clone()), |acc, v| merge_types(&acc?, v)) + .map_err(|e| ErrorCode::SemanticError(span.display_error(e.message())))? + }; + Ok(Box::new(( + ConstantExpr { + value: DataValue::Array(elems), + data_type: Box::new(ArrayType::new_impl(element_type.clone())), + } + .into(), + ArrayType::new_impl(element_type), + ))) + } + + fn resolve_tuple( + &mut self, + span: &[Token<'_>], + exprs: &[Expr<'_>], + ) -> Result> { + let mut args = Vec::with_capacity(exprs.len()); + let mut arg_types = Vec::with_capacity(exprs.len()); + for expr in exprs { + let box (arg, data_type) = self.resolve(expr, None)?; + args.push(arg); + arg_types.push(data_type); + } + let arg_types_ref: Vec<&DataTypeImpl> = arg_types.iter().collect(); + let tuple_func = TupleFunction::try_create_func("", &arg_types_ref) + .map_err(|e| ErrorCode::SemanticError(span.display_error(e.message())))?; + Ok(Box::new(( + FunctionCall { + arguments: args, + func_name: "tuple".to_string(), + arg_types, + return_type: Box::new(tuple_func.return_type()), + } + .into(), + tuple_func.return_type(), + ))) + } + + fn resolve_map_access_pushdown( + &mut self, + data_type: DataTypeImpl, + mut accessors: Vec, + database: Option, + table: Option, + column: Identifier, + ) -> Result> { + let mut names = Vec::new(); + let column_name = normalize_identifier(&column, self.name_resolution_ctx).name; + names.push(column_name); + let mut data_types = Vec::new(); + data_types.push(data_type.clone()); + + while !accessors.is_empty() { + let data_type = data_types.pop().unwrap(); + let struct_type: StructType = data_type.try_into()?; + let inner_types = struct_type.types(); + let inner_names = match struct_type.names() { + Some(inner_names) => inner_names.clone(), + None => (0..inner_types.len()) + .map(|i| format!("{}", i + 1)) + .collect::>(), + }; + + let accessor = accessors.pop().unwrap(); + let accessor_lit = match accessor { + MapAccessor::Bracket { + key: + box Expr::Literal { + lit: lit @ Literal::String(_), + .. + }, + } => lit, + MapAccessor::Period { key } | MapAccessor::Colon { key } => { + Literal::String(key.name.clone()) + } + MapAccessor::PeriodNumber { key } => Literal::Integer(key), + _ => unreachable!(), + }; + + match accessor_lit { + Literal::Integer(idx) => { + if idx == 0 { + return Err(ErrorCode::SemanticError( + "tuple index is starting from 1, but 0 is found".to_string(), + )); + } + if idx as usize > inner_types.len() { + return Err(ErrorCode::SemanticError(format!( + "tuple index {} is out of bounds for length {}", + idx, + inner_types.len() + ))); + } + let inner_name = inner_names.get(idx as usize - 1).unwrap(); + let inner_type = inner_types.get(idx as usize - 1).unwrap(); + names.push(inner_name.clone()); + data_types.push(inner_type.clone()); + } + Literal::String(name) => match inner_names.iter().position(|k| k == &name) { + Some(idx) => { + let inner_name = inner_names.get(idx).unwrap(); + let inner_type = inner_types.get(idx).unwrap(); + names.push(inner_name.clone()); + data_types.push(inner_type.clone()); + } + None => { + return Err(ErrorCode::SemanticError(format!( + "tuple name `{}` does not exist, available names are: {:?}", + name, &inner_names + ))); + } + }, + _ => unreachable!(), + } + } + + let database = database + .as_ref() + .map(|ident| normalize_identifier(ident, self.name_resolution_ctx).name); + let table = table + .as_ref() + .map(|ident| normalize_identifier(ident, self.name_resolution_ctx).name); + let inner_column_name = names.join(":"); + + let result = self.bind_context.resolve_name( + database.as_deref(), + table.as_deref(), + inner_column_name.as_str(), + &column.span, + self.aliases, + )?; + let (scalar, data_type) = match result { + NameResolutionResult::Column(column) => { + let data_type = *column.data_type.clone(); + (BoundColumnRef { column }.into(), data_type) + } + NameResolutionResult::Alias { scalar, .. } => (scalar.clone(), scalar.data_type()), + }; + Ok(Box::new((scalar, data_type))) + } +} diff --git a/src/query/storages/fuse/src/fuse_table.rs b/src/query/storages/fuse/src/fuse_table.rs index 995d2629a62b..754070805560 100644 --- a/src/query/storages/fuse/src/fuse_table.rs +++ b/src/query/storages/fuse/src/fuse_table.rs @@ -41,6 +41,7 @@ use common_planner::Partitions; use common_planner::PhysicalScalar; use common_planner::ReadDataSourcePlan; use common_sharing::create_share_table_operator; +use common_sql::PhysicalScalarParser; use common_storage::init_operator; use common_storage::DataOperator; use common_storage::ShareTableConfig; @@ -89,10 +90,10 @@ impl FuseTable { pub fn do_create(table_info: TableInfo, read_only: bool) -> Result> { let storage_prefix = Self::parse_storage_prefix(&table_info)?; let cluster_key_meta = table_info.meta.cluster_key(); + let schema = table_info.schema(); let mut cluster_keys = Vec::new(); if let Some((_, order)) = &cluster_key_meta { - // todo(sundy) - // sync_type_checker or block_on + cluster_keys = PhysicalScalarParser::parse_exprs(schema, order)?; } let operator = match table_info.from_share { Some(ref from_share) => create_share_table_operator( diff --git a/src/query/storages/fuse/src/table_functions/clustering_informations/clustering_information_table.rs b/src/query/storages/fuse/src/table_functions/clustering_informations/clustering_information_table.rs index a54319cee424..d5e4b006e258 100644 --- a/src/query/storages/fuse/src/table_functions/clustering_informations/clustering_information_table.rs +++ b/src/query/storages/fuse/src/table_functions/clustering_informations/clustering_information_table.rs @@ -182,7 +182,7 @@ impl AsyncSource for ClusteringInformationSource { .await?; let tbl = FuseTable::try_from_table(tbl.as_ref())?; - let cluster_keys = get_cluster_keys(self.ctx.clone(), tbl, &self.arg_cluster_keys).await?; + let cluster_keys = get_cluster_keys(tbl, &self.arg_cluster_keys)?; Ok(Some( ClusteringInformation::new(self.ctx.clone(), tbl, cluster_keys) diff --git a/src/query/storages/fuse/src/table_functions/clustering_informations/table_args.rs b/src/query/storages/fuse/src/table_functions/clustering_informations/table_args.rs index d84ea2e60b5b..846ddc979557 100644 --- a/src/query/storages/fuse/src/table_functions/clustering_informations/table_args.rs +++ b/src/query/storages/fuse/src/table_functions/clustering_informations/table_args.rs @@ -12,10 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -use std::sync::Arc; - use common_catalog::table::Table; -use common_catalog::table_context::TableContext; use common_exception::ErrorCode; use common_exception::Result; use common_planner::PhysicalScalar; @@ -40,14 +37,9 @@ pub fn parse_func_table_args(table_args: &TableArgs) -> Result<(String, String)> } } -pub async fn get_cluster_keys( - ctx: Arc, - table: &FuseTable, - definition: &str, -) -> Result> { +pub fn get_cluster_keys(table: &FuseTable, definition: &str) -> Result> { let cluster_keys = if !definition.is_empty() { - let physical_scalars = - PhysicalScalarParser::parse_exprs(ctx, table.schema(), definition).await?; + let physical_scalars = PhysicalScalarParser::parse_exprs(table.schema(), definition)?; physical_scalars } else { table.cluster_keys() From 13d363e8fb5864faaed7ffc31f50443f1d0f423a Mon Sep 17 00:00:00 2001 From: baishen Date: Thu, 27 Oct 2022 21:29:56 +0800 Subject: [PATCH 19/47] fix --- src/query/planner/src/extras.rs | 4 +- src/query/service/src/sessions/query_ctx.rs | 5 +- .../sql/src/evaluator/physical_scalar.rs | 2 +- .../sql/src/executor/physical_plan_builder.rs | 4 +- .../src/planner/semantic/sync_type_checker.rs | 8 +- src/query/storages/fuse/src/fuse_table.rs | 24 +- .../storages/fuse/src/operations/delete.rs | 2 +- .../fuse/src/operations/fuse_source.rs | 304 ------------------ src/query/storages/fuse/src/operations/mod.rs | 3 - .../storages/fuse/src/operations/read_data.rs | 5 +- 10 files changed, 18 insertions(+), 343 deletions(-) delete mode 100644 src/query/storages/fuse/src/operations/fuse_source.rs diff --git a/src/query/planner/src/extras.rs b/src/query/planner/src/extras.rs index a4c293ca209f..df73dbc37941 100644 --- a/src/query/planner/src/extras.rs +++ b/src/query/planner/src/extras.rs @@ -28,7 +28,7 @@ pub enum StageKind { Merge, } -#[derive(serde::Serialize, serde::Deserialize, Clone, Debug, PartialEq)] +#[derive(serde::Serialize, serde::Deserialize, Clone, Debug, PartialEq, Eq)] pub struct PrewhereInfo { /// columns to be ouput be prewhere scan pub output_columns: Projection, @@ -41,7 +41,7 @@ pub struct PrewhereInfo { } /// Extras is a wrapper for push down items. -#[derive(serde::Serialize, serde::Deserialize, Clone, Debug, PartialEq, Default)] +#[derive(serde::Serialize, serde::Deserialize, Clone, Debug, PartialEq, Eq, Default)] pub struct Extras { /// Optional column indices to use as a projection pub projection: Option, diff --git a/src/query/service/src/sessions/query_ctx.rs b/src/query/service/src/sessions/query_ctx.rs index 1997d4f62e49..c563edf73dbc 100644 --- a/src/query/service/src/sessions/query_ctx.rs +++ b/src/query/service/src/sessions/query_ctx.rs @@ -36,16 +36,13 @@ use common_exception::Result; use common_functions::scalars::FunctionContext; use common_io::prelude::FormatSettings; use common_meta_app::schema::TableInfo; +use common_meta_types::RoleInfo; use common_meta_types::UserInfo; use common_planner::stage_table::StageTableInfo; use common_planner::PartInfoPtr; use common_planner::Partitions; use common_planner::ReadDataSourcePlan; use common_planner::SourceInfo; -use common_planner::stage_table::StageTableInfo; -use common_meta_app::schema::TableInfo; -use common_meta_types::RoleInfo; -use common_meta_types::UserInfo; use common_storage::DataOperator; use common_storage::StorageMetrics; use parking_lot::RwLock; diff --git a/src/query/sql/src/evaluator/physical_scalar.rs b/src/query/sql/src/evaluator/physical_scalar.rs index 2612c841049b..30007418b02a 100644 --- a/src/query/sql/src/evaluator/physical_scalar.rs +++ b/src/query/sql/src/evaluator/physical_scalar.rs @@ -91,7 +91,7 @@ impl Evaluator { PhysicalScalar::Function { name, args, .. } => { let eval_args: Vec = args .iter() - .map(|v| Self::eval_physical_scalar(v)) + .map(Self::eval_physical_scalar) .collect::>()?; // special case for in function diff --git a/src/query/sql/src/executor/physical_plan_builder.rs b/src/query/sql/src/executor/physical_plan_builder.rs index 9683989274fa..4785d21d1303 100644 --- a/src/query/sql/src/executor/physical_plan_builder.rs +++ b/src/query/sql/src/executor/physical_plan_builder.rs @@ -518,7 +518,7 @@ impl PhysicalPlanBuilder { }); assert!( - !predicate.is_some(), + predicate.is_none(), "There should be at least one predicate in prewhere" ); @@ -647,7 +647,7 @@ impl<'a> PhysicalScalarBuilder<'a> { .arguments .iter() .zip(func.arg_types.iter()) - .map(|(arg, _)| Ok(self.build(arg)?)) + .map(|(arg, _)| self.build(arg)) .collect::>()?, return_type: *func.return_type.clone(), }), diff --git a/src/query/sql/src/planner/semantic/sync_type_checker.rs b/src/query/sql/src/planner/semantic/sync_type_checker.rs index 301c0b0c40c4..dd699855499e 100644 --- a/src/query/sql/src/planner/semantic/sync_type_checker.rs +++ b/src/query/sql/src/planner/semantic/sync_type_checker.rs @@ -1344,10 +1344,10 @@ impl<'a> SyncTypeChecker<'a> { fn resolve_map_access_pushdown( &mut self, data_type: DataTypeImpl, - mut accessors: Vec, - database: Option, - table: Option, - column: Identifier, + mut accessors: Vec>, + database: Option>, + table: Option>, + column: Identifier<'async_recursion>, ) -> Result> { let mut names = Vec::new(); let column_name = normalize_identifier(&column, self.name_resolution_ctx).name; diff --git a/src/query/storages/fuse/src/fuse_table.rs b/src/query/storages/fuse/src/fuse_table.rs index 754070805560..8306c9d6fa6e 100644 --- a/src/query/storages/fuse/src/fuse_table.rs +++ b/src/query/storages/fuse/src/fuse_table.rs @@ -95,7 +95,7 @@ impl FuseTable { if let Some((_, order)) = &cluster_key_meta { cluster_keys = PhysicalScalarParser::parse_exprs(schema, order)?; } - let operator = match table_info.from_share { + let mut operator = match table_info.from_share { Some(ref from_share) => create_share_table_operator( ShareTableConfig::share_endpoint_address(), &table_info.tenant, @@ -114,27 +114,9 @@ impl FuseTable { } } }; - Ok(operator) - } - - pub fn do_create(table_info: TableInfo, read_only: bool) -> Result> { - let operator = Self::init_operator(&table_info)?; - Self::do_create_with_operator(table_info, operator, read_only) - } - - pub fn do_create_with_operator( - table_info: TableInfo, - operator: Operator, - read_only: bool, - ) -> Result> { - let storage_prefix = Self::parse_storage_prefix(&table_info)?; - let cluster_key_meta = table_info.meta.cluster_key(); - let mut cluster_keys = Vec::new(); - if let Some((_, order)) = &cluster_key_meta { - cluster_keys = ExpressionParser::parse_exprs(order)?; - } let data_metrics = Arc::new(StorageMetrics::default()); - let operator = operator.layer(StorageMetricsLayer::new(data_metrics.clone())); + operator = operator.layer(StorageMetricsLayer::new(data_metrics.clone())); + Ok(Box::new(FuseTable { table_info, cluster_keys, diff --git a/src/query/storages/fuse/src/operations/delete.rs b/src/query/storages/fuse/src/operations/delete.rs index f45977ba2f38..0c2fd7d096b3 100644 --- a/src/query/storages/fuse/src/operations/delete.rs +++ b/src/query/storages/fuse/src/operations/delete.rs @@ -34,7 +34,7 @@ use crate::FuseTable; impl FuseTable { pub async fn do_delete(&self, ctx: Arc, plan: &DeletePlan) -> Result<()> { - let snapshot_opt = self.read_table_snapshot(ctx.clone()).await?; + let snapshot_opt = self.read_table_snapshot().await?; // check if table is empty let snapshot = if let Some(val) = snapshot_opt { diff --git a/src/query/storages/fuse/src/operations/fuse_source.rs b/src/query/storages/fuse/src/operations/fuse_source.rs deleted file mode 100644 index 1d3432b25afb..000000000000 --- a/src/query/storages/fuse/src/operations/fuse_source.rs +++ /dev/null @@ -1,304 +0,0 @@ -// Copyright 2021 Datafuse Labs. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -use std::any::Any; -use std::sync::Arc; - -use common_base::base::Progress; -use common_base::base::ProgressValues; -use common_catalog::table_context::TableContext; -use common_datablocks::DataBlock; -use common_datavalues::ColumnRef; -use common_exception::ErrorCode; -use common_exception::Result; -use common_legacy_planners::PartInfoPtr; -use common_pipeline_core::processors::port::OutputPort; -use common_pipeline_core::processors::processor::Event; -use common_pipeline_core::processors::processor::ProcessorPtr; -use common_pipeline_core::processors::Processor; -use common_pipeline_transforms::processors::ExpressionExecutor; - -use crate::io::BlockReader; -use crate::operations::State::Generated; - -type DataChunks = Vec<(usize, Vec)>; - -pub struct PrewhereData { - data_block: DataBlock, - filter: ColumnRef, -} - -pub enum State { - ReadDataPrewhere(Option), - ReadDataRemain(PartInfoPtr, PrewhereData), - PrewhereFilter(PartInfoPtr, DataChunks), - Deserialize(PartInfoPtr, DataChunks, Option), - Generated(Option, DataBlock), - Finish, -} - -pub struct FuseTableSource { - state: State, - ctx: Arc, - scan_progress: Arc, - output: Arc, - output_reader: Arc, - - prewhere_reader: Arc, - prewhere_filter: Arc>, - remain_reader: Arc>, - - support_blocking: bool, -} - -impl FuseTableSource { - pub fn create( - ctx: Arc, - output: Arc, - output_reader: Arc, - prewhere_reader: Arc, - prewhere_filter: Arc>, - remain_reader: Arc>, - ) -> Result { - let scan_progress = ctx.get_scan_progress(); - let support_blocking = prewhere_reader.support_blocking_api(); - Ok(ProcessorPtr::create(Box::new(FuseTableSource { - ctx, - output, - scan_progress, - state: State::ReadDataPrewhere(None), - output_reader, - prewhere_reader, - prewhere_filter, - remain_reader, - support_blocking, - }))) - } - - fn generate_one_block(&mut self, block: DataBlock) -> Result<()> { - let new_part = self.ctx.try_get_part(); - // resort and prune columns - let block = block.resort(self.output_reader.schema())?; - self.state = State::Generated(new_part, block); - Ok(()) - } - - fn generate_one_empty_block(&mut self) -> Result<()> { - let schema = self.output_reader.schema(); - let new_part = self.ctx.try_get_part(); - self.state = Generated(new_part, DataBlock::empty_with_schema(schema)); - Ok(()) - } -} - -#[async_trait::async_trait] -impl Processor for FuseTableSource { - fn name(&self) -> String { - "FuseEngineSource".to_string() - } - - fn as_any(&mut self) -> &mut dyn Any { - self - } - - fn event(&mut self) -> Result { - if matches!(self.state, State::ReadDataPrewhere(None)) { - self.state = match self.ctx.try_get_part() { - None => State::Finish, - Some(part) => State::ReadDataPrewhere(Some(part)), - } - } - - if matches!(self.state, State::Finish) { - self.output.finish(); - return Ok(Event::Finished); - } - - if self.output.is_finished() { - return Ok(Event::Finished); - } - - if !self.output.can_push() { - return Ok(Event::NeedConsume); - } - - if matches!(self.state, State::Generated(_, _)) { - if let Generated(part, data_block) = std::mem::replace(&mut self.state, State::Finish) { - self.state = match part { - None => State::Finish, - Some(part) => State::ReadDataPrewhere(Some(part)), - }; - - self.output.push_data(Ok(data_block)); - return Ok(Event::NeedConsume); - } - } - - match self.state { - State::Finish => Ok(Event::Finished), - State::ReadDataPrewhere(_) => { - if self.support_blocking { - Ok(Event::Sync) - } else { - Ok(Event::Async) - } - } - State::ReadDataRemain(_, _) => { - if self.support_blocking { - Ok(Event::Sync) - } else { - Ok(Event::Async) - } - } - State::PrewhereFilter(_, _) => Ok(Event::Sync), - State::Deserialize(_, _, _) => Ok(Event::Sync), - State::Generated(_, _) => Err(ErrorCode::LogicalError("It's a bug.")), - } - } - - fn process(&mut self) -> Result<()> { - match std::mem::replace(&mut self.state, State::Finish) { - State::Deserialize(part, chunks, prewhere_data) => { - let data_block = if let Some(PrewhereData { - data_block: mut prewhere_blocks, - filter, - }) = prewhere_data - { - let block = if chunks.is_empty() { - prewhere_blocks - } else if let Some(remain_reader) = self.remain_reader.as_ref() { - let remain_block = remain_reader.deserialize(part, chunks)?; - for (col, field) in remain_block - .columns() - .iter() - .zip(remain_block.schema().fields()) - { - prewhere_blocks = - prewhere_blocks.add_column(col.clone(), field.clone())?; - } - prewhere_blocks - } else { - return Err(ErrorCode::LogicalError("It's a bug. Need remain reader")); - }; - // the last step of prewhere - let progress_values = ProgressValues { - rows: block.num_rows(), - bytes: block.memory_size(), - }; - self.scan_progress.incr(&progress_values); - DataBlock::filter_block(block, &filter)? - } else { - let block = self.output_reader.deserialize(part, chunks)?; - let progress_values = ProgressValues { - rows: block.num_rows(), - bytes: block.memory_size(), - }; - self.scan_progress.incr(&progress_values); - - block - }; - - self.generate_one_block(data_block)?; - Ok(()) - } - State::PrewhereFilter(part, chunks) => { - // deserialize prewhere data block first - let data_block = self.prewhere_reader.deserialize(part.clone(), chunks)?; - if let Some(filter) = self.prewhere_filter.as_ref() { - // do filter - let res = filter.execute(&data_block)?; - let filter = DataBlock::cast_to_nonull_boolean(res.column(0))?; - // shortcut, if predicates is const boolean (or can be cast to boolean) - if !DataBlock::filter_exists(&filter)? { - // all rows in this block are filtered out - // turn to read next part - let progress_values = ProgressValues { - rows: data_block.num_rows(), - bytes: data_block.memory_size(), - }; - self.scan_progress.incr(&progress_values); - self.generate_one_empty_block()?; - return Ok(()); - } - if self.remain_reader.is_none() { - // shortcut, we don't need to read remain data - let progress_values = ProgressValues { - rows: data_block.num_rows(), - bytes: data_block.memory_size(), - }; - self.scan_progress.incr(&progress_values); - let block = DataBlock::filter_block(data_block, &filter)?; - self.generate_one_block(block)?; - } else { - self.state = - State::ReadDataRemain(part, PrewhereData { data_block, filter }); - } - Ok(()) - } else { - Err(ErrorCode::LogicalError( - "It's a bug. No need to do prewhere filter", - )) - } - } - - State::ReadDataPrewhere(Some(part)) => { - let chunks = self.prewhere_reader.sync_read_columns_data(part.clone())?; - - if self.prewhere_filter.is_some() { - self.state = State::PrewhereFilter(part, chunks); - } else { - // all needed columns are read. - self.state = State::Deserialize(part, chunks, None) - } - Ok(()) - } - State::ReadDataRemain(part, prewhere_data) => { - if let Some(remain_reader) = self.remain_reader.as_ref() { - let chunks = remain_reader.sync_read_columns_data(part.clone())?; - self.state = State::Deserialize(part, chunks, Some(prewhere_data)); - Ok(()) - } else { - Err(ErrorCode::LogicalError("It's a bug. No remain reader")) - } - } - _ => Err(ErrorCode::LogicalError("It's a bug.")), - } - } - - async fn async_process(&mut self) -> Result<()> { - match std::mem::replace(&mut self.state, State::Finish) { - State::ReadDataPrewhere(Some(part)) => { - let chunks = self.prewhere_reader.read_columns_data(part.clone()).await?; - - if self.prewhere_filter.is_some() { - self.state = State::PrewhereFilter(part, chunks); - } else { - // all needed columns are read. - self.state = State::Deserialize(part, chunks, None) - } - Ok(()) - } - State::ReadDataRemain(part, prewhere_data) => { - if let Some(remain_reader) = self.remain_reader.as_ref() { - let chunks = remain_reader.read_columns_data(part.clone()).await?; - self.state = State::Deserialize(part, chunks, Some(prewhere_data)); - Ok(()) - } else { - Err(ErrorCode::LogicalError("It's a bug. No remain reader")) - } - } - _ => Err(ErrorCode::LogicalError("It's a bug.")), - } - } -} diff --git a/src/query/storages/fuse/src/operations/mod.rs b/src/query/storages/fuse/src/operations/mod.rs index cfab90d3ef54..548984401a3d 100644 --- a/src/query/storages/fuse/src/operations/mod.rs +++ b/src/query/storages/fuse/src/operations/mod.rs @@ -26,13 +26,10 @@ mod read_partitions; mod recluster; mod truncate; -mod fuse_source; pub mod util; pub(crate) use compact::CompactOptions; pub use fuse_sink::FuseTableSink; -pub use fuse_source::FuseTableSource; -pub use fuse_source::State; pub use mutation::delete_from_block; pub use mutation::DeletionMutator; pub use mutation::FullCompactMutator; diff --git a/src/query/storages/fuse/src/operations/read_data.rs b/src/query/storages/fuse/src/operations/read_data.rs index e2248d5237cc..f42ed959bb2b 100644 --- a/src/query/storages/fuse/src/operations/read_data.rs +++ b/src/query/storages/fuse/src/operations/read_data.rs @@ -12,8 +12,11 @@ // See the License for the specific language governing permissions and // limitations under the License. +use std::any::Any; use std::sync::Arc; +use common_base::base::Progress; +use common_base::base::ProgressValues; use common_base::base::Runtime; use common_catalog::table_context::TableContext; use common_datablocks::DataBlock; @@ -37,7 +40,7 @@ use tracing::info; use crate::fuse_lazy_part::FuseLazyPartInfo; use crate::io::BlockReader; -use crate::operations::FuseTableSource; +use crate::operations::read_data::State::Generated; use crate::FuseTable; impl FuseTable { From b7aa4f2a15f698919e614683457d116454286fbc Mon Sep 17 00:00:00 2001 From: baishen Date: Thu, 27 Oct 2022 23:17:51 +0800 Subject: [PATCH 20/47] refactor(query): PhysicalScalarParser add table --- .../sql/src/planner/physical_scalar_parser.rs | 41 ++++++++++++++++++- .../src/planner/semantic/sync_type_checker.rs | 8 ++-- src/query/storages/fuse/src/fuse_table.rs | 18 ++++---- .../storages/fuse/src/operations/append.rs | 18 ++++---- .../storages/fuse/src/operations/delete.rs | 12 ++++-- .../storages/fuse/src/operations/recluster.rs | 2 +- .../clustering_informations/table_args.rs | 6 ++- 7 files changed, 76 insertions(+), 29 deletions(-) diff --git a/src/query/sql/src/planner/physical_scalar_parser.rs b/src/query/sql/src/planner/physical_scalar_parser.rs index 5c183e76ffcf..946f4732cbd3 100644 --- a/src/query/sql/src/planner/physical_scalar_parser.rs +++ b/src/query/sql/src/planner/physical_scalar_parser.rs @@ -12,24 +12,36 @@ // See the License for the specific language governing permissions and // limitations under the License. +use std::sync::Arc; + use common_ast::parser::parse_comma_separated_exprs; use common_ast::parser::tokenize_sql; use common_ast::Backtrace; use common_ast::Dialect; +use common_catalog::catalog::CATALOG_DEFAULT; +use common_catalog::table::Table; use common_datavalues::DataSchemaRef; use common_exception::Result; use common_planner::PhysicalScalar; use common_settings::Settings; +use parking_lot::RwLock; use crate::executor::PhysicalScalarBuilder; use crate::planner::semantic::SyncTypeChecker; use crate::BindContext; +use crate::ColumnBinding; +use crate::Metadata; use crate::NameResolutionContext; +use crate::Visibility; pub struct PhysicalScalarParser; impl PhysicalScalarParser { - pub fn parse_exprs(schema: DataSchemaRef, sql: &str) -> Result> { + pub fn parse_exprs( + schema: DataSchemaRef, + table_meta: Arc, + sql: &str, + ) -> Result> { let sql_dialect = Dialect::MySQL; let tokens = tokenize_sql(sql)?; let backtrace = Backtrace::new(); @@ -40,7 +52,32 @@ impl PhysicalScalarParser { )?; let settings = Settings::default_settings(""); - let bind_context = BindContext::new(); + let mut bind_context = BindContext::new(); + let metadata = Arc::new(RwLock::new(Metadata::default())); + let table_index = metadata.write().add_table( + CATALOG_DEFAULT.to_owned(), + "default".to_string(), + table_meta, + ); + + let columns = metadata.read().columns_by_table_index(table_index); + let table = metadata.read().table(table_index).clone(); + for column in columns.iter() { + let column_binding = ColumnBinding { + database_name: Some("default".to_string()), + table_name: Some(table.name().to_string()), + column_name: column.name().to_string(), + index: column.index(), + data_type: Box::new(column.data_type().clone()), + visibility: if column.has_path_indices() { + Visibility::InVisible + } else { + Visibility::Visible + }, + }; + bind_context.add_column_binding(column_binding); + } + let name_resolution_ctx = NameResolutionContext::try_from(settings.as_ref())?; let mut type_checker = SyncTypeChecker::new(&bind_context, &name_resolution_ctx, &[]); let mut physical_scalars = Vec::with_capacity(exprs.len()); diff --git a/src/query/sql/src/planner/semantic/sync_type_checker.rs b/src/query/sql/src/planner/semantic/sync_type_checker.rs index dd699855499e..301c0b0c40c4 100644 --- a/src/query/sql/src/planner/semantic/sync_type_checker.rs +++ b/src/query/sql/src/planner/semantic/sync_type_checker.rs @@ -1344,10 +1344,10 @@ impl<'a> SyncTypeChecker<'a> { fn resolve_map_access_pushdown( &mut self, data_type: DataTypeImpl, - mut accessors: Vec>, - database: Option>, - table: Option>, - column: Identifier<'async_recursion>, + mut accessors: Vec, + database: Option, + table: Option, + column: Identifier, ) -> Result> { let mut names = Vec::new(); let column_name = normalize_identifier(&column, self.name_resolution_ctx).name; diff --git a/src/query/storages/fuse/src/fuse_table.rs b/src/query/storages/fuse/src/fuse_table.rs index 8306c9d6fa6e..63550b4e5cb8 100644 --- a/src/query/storages/fuse/src/fuse_table.rs +++ b/src/query/storages/fuse/src/fuse_table.rs @@ -73,7 +73,6 @@ pub struct FuseTable { pub(crate) table_info: TableInfo, pub(crate) meta_location_generator: TableMetaLocationGenerator, - pub(crate) cluster_keys: Vec, pub(crate) cluster_key_meta: Option, pub(crate) read_only: bool, @@ -90,11 +89,6 @@ impl FuseTable { pub fn do_create(table_info: TableInfo, read_only: bool) -> Result> { let storage_prefix = Self::parse_storage_prefix(&table_info)?; let cluster_key_meta = table_info.meta.cluster_key(); - let schema = table_info.schema(); - let mut cluster_keys = Vec::new(); - if let Some((_, order)) = &cluster_key_meta { - cluster_keys = PhysicalScalarParser::parse_exprs(schema, order)?; - } let mut operator = match table_info.from_share { Some(ref from_share) => create_share_table_operator( ShareTableConfig::share_endpoint_address(), @@ -119,9 +113,8 @@ impl FuseTable { Ok(Box::new(FuseTable { table_info, - cluster_keys, - cluster_key_meta, meta_location_generator: TableMetaLocationGenerator::with_prefix(storage_prefix), + cluster_key_meta, read_only, operator, data_metrics, @@ -254,7 +247,14 @@ impl Table for FuseTable { } fn cluster_keys(&self) -> Vec { - self.cluster_keys.clone() + let schema = self.table_info.schema(); + let table_meta = Arc::new(self.clone()); + if let Some((_, order)) = &self.cluster_key_meta { + let cluster_keys = + PhysicalScalarParser::parse_exprs(schema, table_meta, order).unwrap(); + return cluster_keys; + } + vec![] } fn support_prewhere(&self) -> bool { diff --git a/src/query/storages/fuse/src/operations/append.rs b/src/query/storages/fuse/src/operations/append.rs index 3bb08a9a8859..1d3d2a07efda 100644 --- a/src/query/storages/fuse/src/operations/append.rs +++ b/src/query/storages/fuse/src/operations/append.rs @@ -15,6 +15,7 @@ use std::str::FromStr; use std::sync::Arc; +use common_catalog::table::Table; use common_catalog::table_context::TableContext; use common_datablocks::SortColumnDescription; use common_datavalues::DataField; @@ -55,10 +56,10 @@ impl FuseTable { let cluster_stats_gen = self.get_cluster_stats_gen(ctx.clone(), pipeline, 0, block_compactor)?; - if !self.cluster_keys.is_empty() { + let cluster_keys = self.cluster_keys(); + if !cluster_keys.is_empty() { // sort - let sort_descs: Vec = self - .cluster_keys + let sort_descs: Vec = cluster_keys .iter() .map(|expr| SortColumnDescription { // todo(sundy): use index instead @@ -113,19 +114,20 @@ impl FuseTable { level: i32, block_compactor: BlockCompactor, ) -> Result { - if self.cluster_keys.is_empty() { + let cluster_keys = self.cluster_keys(); + if cluster_keys.is_empty() { return Ok(ClusterStatsGenerator::default()); } let input_schema = self.table_info.schema(); let mut merged = input_schema.fields().clone(); - let mut cluster_key_index = Vec::with_capacity(self.cluster_keys.len()); - let mut extra_key_index = Vec::with_capacity(self.cluster_keys.len()); + let mut cluster_key_index = Vec::with_capacity(cluster_keys.len()); + let mut extra_key_index = Vec::with_capacity(cluster_keys.len()); - let mut operators = Vec::with_capacity(self.cluster_keys.len()); + let mut operators = Vec::with_capacity(cluster_keys.len()); - for expr in &self.cluster_keys { + for expr in &cluster_keys { let cname = expr.pretty_display(); let index = match merged.iter().position(|x| x.name() == &cname) { diff --git a/src/query/storages/fuse/src/operations/delete.rs b/src/query/storages/fuse/src/operations/delete.rs index 0c2fd7d096b3..8586e768cc25 100644 --- a/src/query/storages/fuse/src/operations/delete.rs +++ b/src/query/storages/fuse/src/operations/delete.rs @@ -14,6 +14,7 @@ use std::sync::Arc; +use common_catalog::table::Table; use common_catalog::table_context::TableContext; use common_datavalues::DataField; use common_exception::ErrorCode; @@ -51,7 +52,9 @@ impl FuseTable { // check if unconditional deletion if let Some(filter) = &plan.selection { - let physical_scalars = PhysicalScalarParser::parse_exprs(plan.schema(), filter)?; + let table_meta = Arc::new(self.clone()); + let physical_scalars = + PhysicalScalarParser::parse_exprs(plan.schema(), table_meta, filter)?; if physical_scalars.is_empty() { return Err(ErrorCode::IndexOutOfBounds( "expression should be valid, but not", @@ -153,9 +156,10 @@ impl FuseTable { let input_schema = self.table_info.schema(); let mut merged = input_schema.fields().clone(); - let mut cluster_key_index = Vec::with_capacity(self.cluster_keys.len()); - let mut extra_key_index = Vec::with_capacity(self.cluster_keys.len()); - for expr in &self.cluster_keys { + let cluster_keys = self.cluster_keys(); + let mut cluster_key_index = Vec::with_capacity(cluster_keys.len()); + let mut extra_key_index = Vec::with_capacity(cluster_keys.len()); + for expr in &cluster_keys { let cname = expr.pretty_display(); let index = match merged.iter().position(|x| x.name() == &cname) { None => { diff --git a/src/query/storages/fuse/src/operations/recluster.rs b/src/query/storages/fuse/src/operations/recluster.rs index 63ba699527cb..4344c92b783f 100644 --- a/src/query/storages/fuse/src/operations/recluster.rs +++ b/src/query/storages/fuse/src/operations/recluster.rs @@ -146,7 +146,7 @@ impl FuseTable { // sort let sort_descs: Vec = self - .cluster_keys + .cluster_keys() .iter() .map(|expr| SortColumnDescription { column_name: expr.pretty_display(), diff --git a/src/query/storages/fuse/src/table_functions/clustering_informations/table_args.rs b/src/query/storages/fuse/src/table_functions/clustering_informations/table_args.rs index 846ddc979557..52811d8e1971 100644 --- a/src/query/storages/fuse/src/table_functions/clustering_informations/table_args.rs +++ b/src/query/storages/fuse/src/table_functions/clustering_informations/table_args.rs @@ -12,6 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. +use std::sync::Arc; + use common_catalog::table::Table; use common_exception::ErrorCode; use common_exception::Result; @@ -39,7 +41,9 @@ pub fn parse_func_table_args(table_args: &TableArgs) -> Result<(String, String)> pub fn get_cluster_keys(table: &FuseTable, definition: &str) -> Result> { let cluster_keys = if !definition.is_empty() { - let physical_scalars = PhysicalScalarParser::parse_exprs(table.schema(), definition)?; + let table_meta = Arc::new(table.clone()); + let physical_scalars = + PhysicalScalarParser::parse_exprs(table.schema(), table_meta, definition)?; physical_scalars } else { table.cluster_keys() From 05822862dde1d649634025b0ad98b0e582c0ae78 Mon Sep 17 00:00:00 2001 From: baishen Date: Fri, 28 Oct 2022 08:41:32 +0800 Subject: [PATCH 21/47] fix clippy --- .../interpreter_table_recluster.rs | 2 +- .../tests/it/sql/planner/format/mod.rs | 3 +- .../sql/src/evaluator/physical_scalar.rs | 3 +- .../src/planner/semantic/sync_type_checker.rs | 64 ++++++++----------- src/query/storages/fuse/src/pruning/pruner.rs | 4 +- .../clustering_informations/table_args.rs | 4 +- src/query/storages/index/src/range_filter.rs | 6 +- 7 files changed, 36 insertions(+), 50 deletions(-) diff --git a/src/query/service/src/interpreters/interpreter_table_recluster.rs b/src/query/service/src/interpreters/interpreter_table_recluster.rs index 3f74d290816e..d2f7e4ec4f38 100644 --- a/src/query/service/src/interpreters/interpreter_table_recluster.rs +++ b/src/query/service/src/interpreters/interpreter_table_recluster.rs @@ -59,7 +59,7 @@ impl Interpreter for ReclusterTableInterpreter { Some(scalar) => { let schema = self.plan.schema(); let mut builder = PhysicalScalarBuilder::new(&schema); - let physical_scalar = builder.build(&scalar)?; + let physical_scalar = builder.build(scalar)?; Some(Extras { filters: vec![physical_scalar], ..Extras::default() diff --git a/src/query/service/tests/it/sql/planner/format/mod.rs b/src/query/service/tests/it/sql/planner/format/mod.rs index 6aeef4f12bbe..9f3ccfc919df 100644 --- a/src/query/service/tests/it/sql/planner/format/mod.rs +++ b/src/query/service/tests/it/sql/planner/format/mod.rs @@ -21,6 +21,7 @@ use common_meta_app::schema::TableIdent; use common_meta_app::schema::TableInfo; use common_meta_app::schema::TableMeta; use databend_query::sql::optimizer::SExpr; +use databend_query::sql::planner::Metadata; use databend_query::sql::planner::plans::JoinType; use databend_query::sql::plans::BoundColumnRef; use databend_query::sql::plans::ConstantExpr; @@ -33,8 +34,6 @@ use databend_query::sql::Visibility; use databend_query::storages::Table; use parking_lot::RwLock; -use crate::Metadata; - struct DummyTable { table_info: TableInfo, } diff --git a/src/query/sql/src/evaluator/physical_scalar.rs b/src/query/sql/src/evaluator/physical_scalar.rs index 30007418b02a..d8fd9bac7c05 100644 --- a/src/query/sql/src/evaluator/physical_scalar.rs +++ b/src/query/sql/src/evaluator/physical_scalar.rs @@ -120,8 +120,7 @@ impl Evaluator { } let data_types: Vec = args.iter().map(|v| v.data_type()).collect(); - let data_types: Vec<&DataTypeImpl> = data_types.iter().map(|v| v).collect(); - + let data_types: Vec<&DataTypeImpl> = data_types.iter().collect(); let func = FunctionFactory::instance().get(name, &data_types)?; Ok(EvalNode::Function { func, diff --git a/src/query/sql/src/planner/semantic/sync_type_checker.rs b/src/query/sql/src/planner/semantic/sync_type_checker.rs index 301c0b0c40c4..0d0b0a44dc4e 100644 --- a/src/query/sql/src/planner/semantic/sync_type_checker.rs +++ b/src/query/sql/src/planner/semantic/sync_type_checker.rs @@ -382,46 +382,36 @@ impl<'a> SyncTypeChecker<'a> { subquery, modifier, .. } = &**right { - if let Some(subquery_modifier) = modifier { - match subquery_modifier { - SubqueryModifier::Any | SubqueryModifier::Some => { - return Err(ErrorCode::SemanticError( - expr.span() - .display_error("not support subquery".to_string()), - )); - } - SubqueryModifier::All => { - let contrary_op = op.to_contrary()?; - let rewritten_subquery = Expr::Subquery { - span: right.span(), - modifier: Some(SubqueryModifier::Any), - subquery: (*subquery).clone(), - }; - self.resolve_function( + match modifier { + Some(SubqueryModifier::Any) | Some(SubqueryModifier::Some) => { + return Err(ErrorCode::SemanticError( + expr.span() + .display_error("not support subquery".to_string()), + )); + } + Some(SubqueryModifier::All) => { + let contrary_op = op.to_contrary()?; + let rewritten_subquery = Expr::Subquery { + span: right.span(), + modifier: Some(SubqueryModifier::Any), + subquery: (*subquery).clone(), + }; + return self.resolve_function( + span, + "not", + &[&Expr::BinaryOp { span, - "not", - &[&Expr::BinaryOp { - span, - op: contrary_op, - left: (*left).clone(), - right: Box::new(rewritten_subquery), - }], - None, - )? - } + op: contrary_op, + left: (*left).clone(), + right: Box::new(rewritten_subquery), + }], + None, + ); } - } else { - self.resolve_binary_op( - span, - op, - left.as_ref(), - right.as_ref(), - required_type, - )? + None => {} } - } else { - self.resolve_binary_op(span, op, left.as_ref(), right.as_ref(), required_type)? } + self.resolve_binary_op(span, op, left.as_ref(), right.as_ref(), required_type)? } Expr::UnaryOp { span, op, expr, .. } => { @@ -1353,7 +1343,7 @@ impl<'a> SyncTypeChecker<'a> { let column_name = normalize_identifier(&column, self.name_resolution_ctx).name; names.push(column_name); let mut data_types = Vec::new(); - data_types.push(data_type.clone()); + data_types.push(data_type); while !accessors.is_empty() { let data_type = data_types.pop().unwrap(); diff --git a/src/query/storages/fuse/src/pruning/pruner.rs b/src/query/storages/fuse/src/pruning/pruner.rs index 4d792c206b9a..47bf1c317796 100644 --- a/src/query/storages/fuse/src/pruning/pruner.rs +++ b/src/query/storages/fuse/src/pruning/pruner.rs @@ -125,7 +125,7 @@ pub fn new_filter_pruner( // convert to filter column names let filter_block_cols = point_query_cols .into_iter() - .map(|index| BlockFilter::build_filter_column(index)) + .map(BlockFilter::build_filter_column) .collect(); return Ok(Some(Arc::new(FilterPruner::new( @@ -159,7 +159,7 @@ mod util { ) -> Result { // load the relevant index columns let maybe_filter = index_location - .read_filter(ctx.clone(), dal, &filter_col_names, index_length) + .read_filter(ctx.clone(), dal, filter_col_names, index_length) .await; match maybe_filter { diff --git a/src/query/storages/fuse/src/table_functions/clustering_informations/table_args.rs b/src/query/storages/fuse/src/table_functions/clustering_informations/table_args.rs index 52811d8e1971..ff3464c7949d 100644 --- a/src/query/storages/fuse/src/table_functions/clustering_informations/table_args.rs +++ b/src/query/storages/fuse/src/table_functions/clustering_informations/table_args.rs @@ -42,9 +42,7 @@ pub fn parse_func_table_args(table_args: &TableArgs) -> Result<(String, String)> pub fn get_cluster_keys(table: &FuseTable, definition: &str) -> Result> { let cluster_keys = if !definition.is_empty() { let table_meta = Arc::new(table.clone()); - let physical_scalars = - PhysicalScalarParser::parse_exprs(table.schema(), table_meta, definition)?; - physical_scalars + PhysicalScalarParser::parse_exprs(table.schema(), table_meta, definition)? } else { table.cluster_keys() }; diff --git a/src/query/storages/index/src/range_filter.rs b/src/query/storages/index/src/range_filter.rs index 5ea69dbc32e2..041ce6fc504c 100644 --- a/src/query/storages/index/src/range_filter.rs +++ b/src/query/storages/index/src/range_filter.rs @@ -373,14 +373,14 @@ impl<'a> VerifiableExprBuilder<'a> { let left_cols = get_column_fields(schema, cols[0].clone())?; let left_name = args[0].pretty_display(); - let left_field = DataField::new(&left_name, args[0].data_type().clone()); + let left_field = DataField::new(&left_name, args[0].data_type()); fields.push((left_field, left_cols)); if cols.len() > 1 { let right_cols = get_column_fields(schema, cols[1].clone())?; let right_name = args[1].pretty_display(); - let right_field = DataField::new(&right_name, args[1].data_type().clone()); + let right_field = DataField::new(&right_name, args[1].data_type()); fields.push((right_field, right_cols)); } @@ -679,7 +679,7 @@ fn get_maybe_monotonic(op: &str, args: &Vec) -> Result { } for arg in args { - if !check_maybe_monotonic(&arg)? { + if !check_maybe_monotonic(arg)? { return Ok(false); } } From 580f249d16adee0d5537697938879527ff3f5bdc Mon Sep 17 00:00:00 2001 From: sundy-li <543950155@qq.com> Date: Fri, 28 Oct 2022 10:54:57 +0800 Subject: [PATCH 22/47] refactor(query): add expression --- src/query/catalog/src/table.rs | 4 +- src/query/planner/src/extras.rs | 8 +- src/query/planner/src/lib.rs | 8 +- src/query/planner/src/physical_scalar.rs | 133 --------------- .../planner/src/physical_scalar_visitor.rs | 154 ------------------ src/query/sql/src/evaluator/mod.rs | 1 - src/query/sql/src/evaluator/monotonicity.rs | 7 +- .../sql/src/evaluator/physical_scalar.rs | 51 +----- src/query/sql/src/executor/format.rs | 4 +- src/query/sql/src/executor/mod.rs | 6 + src/query/sql/src/executor/physical_plan.rs | 6 +- .../sql/src/executor/physical_plan_builder.rs | 20 ++- .../sql/src/planner/physical_scalar_parser.rs | 2 +- 13 files changed, 38 insertions(+), 366 deletions(-) delete mode 100644 src/query/planner/src/physical_scalar.rs delete mode 100644 src/query/planner/src/physical_scalar_visitor.rs diff --git a/src/query/catalog/src/table.rs b/src/query/catalog/src/table.rs index 8c660658c189..6a9150cf4ffe 100644 --- a/src/query/catalog/src/table.rs +++ b/src/query/catalog/src/table.rs @@ -31,7 +31,7 @@ use common_planner::extras::Extras; use common_planner::extras::Statistics; use common_planner::plans::DeletePlan; use common_planner::Partitions; -use common_planner::PhysicalScalar; +use common_planner::Expression; use common_planner::ReadDataSourcePlan; use common_storage::StorageMetrics; @@ -90,7 +90,7 @@ pub trait Table: Sync + Send { false } - fn cluster_keys(&self) -> Vec { + fn cluster_keys(&self) -> Vec { vec![] } diff --git a/src/query/planner/src/extras.rs b/src/query/planner/src/extras.rs index df73dbc37941..67bdaaf75e94 100644 --- a/src/query/planner/src/extras.rs +++ b/src/query/planner/src/extras.rs @@ -19,7 +19,7 @@ use common_meta_app::schema::TableInfo; use once_cell::sync::Lazy; use crate::plans::Projection; -use crate::PhysicalScalar; +use crate::Expression; #[derive(serde::Serialize, serde::Deserialize, Clone, Debug, PartialEq, Eq)] pub enum StageKind { @@ -37,7 +37,7 @@ pub struct PrewhereInfo { /// remain_columns = scan.columns - need_columns pub remain_columns: Projection, /// filter for prewhere - pub filter: PhysicalScalar, + pub filter: Expression, } /// Extras is a wrapper for push down items. @@ -47,14 +47,14 @@ pub struct Extras { pub projection: Option, /// Optional filter expression plan /// split_conjunctions by `and` operator - pub filters: Vec, + pub filters: Vec, /// Optional prewhere information /// used for prewhere optimization pub prewhere: Option, /// Optional limit to skip read pub limit: Option, /// Optional order_by expression plan, asc, null_first - pub order_by: Vec<(PhysicalScalar, bool, bool)>, + pub order_by: Vec<(Expression, bool, bool)>, } impl Extras { diff --git a/src/query/planner/src/lib.rs b/src/query/planner/src/lib.rs index 9951b07ec7e8..d733a1a044aa 100644 --- a/src/query/planner/src/lib.rs +++ b/src/query/planner/src/lib.rs @@ -22,16 +22,16 @@ //! build pipelines, then our processes will produce result data blocks. mod partition; -mod physical_scalar; +mod expression; pub mod extras; -mod physical_scalar_visitor; +mod expression_visitor; pub mod plan_read_datasource; pub mod stage_table; pub use partition::*; -pub use physical_scalar::*; -pub use physical_scalar_visitor::*; +pub use expression::*; +pub use expression_visitor::*; pub use plan_read_datasource::*; // Plan will be used publicly. diff --git a/src/query/planner/src/physical_scalar.rs b/src/query/planner/src/physical_scalar.rs deleted file mode 100644 index 198571203fe5..000000000000 --- a/src/query/planner/src/physical_scalar.rs +++ /dev/null @@ -1,133 +0,0 @@ -// Copyright 2022 Datafuse Labs. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -use std::fmt::Display; -use std::fmt::Formatter; - -use common_datavalues::format_data_type_sql; -use common_datavalues::DataField; -use common_datavalues::DataTypeImpl; -use common_datavalues::DataValue; - -type ColumnID = String; -type IndexType = usize; - -/// Serializable and desugared representation of `Scalar`. -#[derive(Clone, Debug, Eq, PartialEq, serde::Serialize, serde::Deserialize)] -pub enum PhysicalScalar { - IndexedVariable { - index: usize, - data_type: DataTypeImpl, - - display_name: String, - }, - Constant { - value: DataValue, - data_type: DataTypeImpl, - }, - Function { - name: String, - args: Vec, - return_type: DataTypeImpl, - }, - - Cast { - input: Box, - target: DataTypeImpl, - }, -} - -impl PhysicalScalar { - pub fn data_type(&self) -> DataTypeImpl { - match self { - PhysicalScalar::Constant { data_type, .. } => data_type.clone(), - PhysicalScalar::Function { return_type, .. } => return_type.clone(), - PhysicalScalar::Cast { target, .. } => target.clone(), - PhysicalScalar::IndexedVariable { data_type, .. } => data_type.clone(), - } - } - - pub fn to_data_field(&self) -> DataField { - let name = self.pretty_display(); - let data_type = self.data_type(); - DataField::new(&name, data_type) - } - - /// Display with readable variable name. - pub fn pretty_display(&self) -> String { - match self { - PhysicalScalar::Constant { value, .. } => value.to_string(), - PhysicalScalar::Function { name, args, .. } => { - let args = args - .iter() - .map(|arg| arg.pretty_display()) - .collect::>() - .join(", "); - format!("{}({})", name, args) - } - PhysicalScalar::Cast { input, target } => format!( - "CAST({} AS {})", - input.pretty_display(), - format_data_type_sql(target) - ), - PhysicalScalar::IndexedVariable { display_name, .. } => display_name.clone(), - } - } -} - -impl Display for PhysicalScalar { - fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { - match &self { - PhysicalScalar::Constant { value, .. } => write!(f, "{}", value), - PhysicalScalar::Function { name, args, .. } => write!( - f, - "{}({})", - name, - args.iter() - .map(|arg| format!("{}", arg)) - .collect::>() - .join(", ") - ), - PhysicalScalar::Cast { input, target } => { - write!(f, "CAST({} AS {})", input, format_data_type_sql(target)) - } - PhysicalScalar::IndexedVariable { index, .. } => write!(f, "${index}"), - } - } -} - -#[derive(Clone, Debug, Eq, PartialEq, serde::Serialize, serde::Deserialize)] -pub struct AggregateFunctionDesc { - pub sig: AggregateFunctionSignature, - pub column_id: ColumnID, - pub args: Vec, - - /// Only used for debugging - pub arg_indices: Vec, -} - -#[derive(Clone, Debug, Eq, PartialEq, serde::Serialize, serde::Deserialize)] -pub struct AggregateFunctionSignature { - pub name: String, - pub args: Vec, - pub params: Vec, - pub return_type: DataTypeImpl, -} - -#[derive(Clone, Debug, Eq, PartialEq, serde::Serialize, serde::Deserialize)] -pub struct SortDesc { - pub asc: bool, - pub nulls_first: bool, - pub order_by: ColumnID, -} diff --git a/src/query/planner/src/physical_scalar_visitor.rs b/src/query/planner/src/physical_scalar_visitor.rs deleted file mode 100644 index cd93653495ec..000000000000 --- a/src/query/planner/src/physical_scalar_visitor.rs +++ /dev/null @@ -1,154 +0,0 @@ -// Copyright 2022 Datafuse Labs. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -use std::collections::HashSet; - -use common_exception::Result; - -use crate::PhysicalScalar; - -/// Controls how the visitor recursion should proceed. -pub enum Recursion { - /// Attempt to visit all the children, recursively, of this expression. - Continue(V), - /// Do not visit the children of this expression, though the walk - /// of parents of this expression will not be affected - Stop(V), -} - -/// Encode the traversal of an expression tree. When passed to -/// `PhysicalScalarVisitor::accept`, `PhysicalScalarVisitor::visit` is invoked -/// recursively on all nodes of an expression tree. See the comments -/// on `PhysicalScalarVisitor::accept` for details on its use -pub trait PhysicalScalarVisitor: Sized { - /// Invoked before any children of `expr` are visisted. - fn pre_visit(self, expr: &PhysicalScalar) -> Result>; - - fn visit(mut self, predecessor_expr: &PhysicalScalar) -> Result { - let mut stack = vec![RecursionProcessing::Call(predecessor_expr)]; - while let Some(element) = stack.pop() { - match element { - RecursionProcessing::Ret(expr) => { - self = self.post_visit(expr)?; - } - RecursionProcessing::Call(expr) => { - stack.push(RecursionProcessing::Ret(expr)); - self = match self.pre_visit(expr)? { - Recursion::Stop(visitor) => visitor, - Recursion::Continue(visitor) => { - match expr { - PhysicalScalar::Function { args, .. } => { - for arg in args { - stack.push(RecursionProcessing::Call(arg)); - } - } - PhysicalScalar::Cast { input, .. } => { - stack.push(RecursionProcessing::Call(input)); - } - _ => {} - }; - - visitor - } - } - } - } - } - - Ok(self) - } - - /// Invoked after all children of `scalar` are visited. Default - /// implementation does nothing. - fn post_visit(self, _scalar: &PhysicalScalar) -> Result { - Ok(self) - } -} - -impl PhysicalScalar { - /// Performs a depth first walk of an expression and - /// its children, calling [`PhysicalScalarVisitor::pre_visit`] and - /// `visitor.post_visit`. - /// - /// Implements the [visitor pattern](https://en.wikipedia.org/wiki/Visitor_pattern) to - /// separate expression algorithms from the structure of the - /// `Expr` tree and make it easier to add new types of expressions - /// and algorithms that walk the tree. - /// - /// For an expression tree such as - /// ```text - /// BinaryExpr (GT) - /// left: Column("foo") - /// right: Column("bar") - /// ``` - /// - /// The nodes are visited using the following order - /// ```text - /// pre_visit(ScalarFunction(GT)) - /// pre_visit(Column("foo")) - /// post_visit(Column("foo")) - /// pre_visit(Column("bar")) - /// post_visit(Column("bar")) - /// post_visit(ScalarFunction(GT)) - /// ``` - /// - /// If an Err result is returned, recursion is stopped immediately - pub fn accept(&self, visitor: V) -> Result { - let visitor = match visitor.pre_visit(self)? { - Recursion::Continue(visitor) => visitor, - // If the recursion should stop, do not visit children - Recursion::Stop(visitor) => return Ok(visitor), - }; - - let visitor = visitor.visit(self)?; - visitor.post_visit(self) - } -} - -enum RecursionProcessing<'a> { - Call(&'a PhysicalScalar), - Ret(&'a PhysicalScalar), -} - -// This visitor is for recursively visiting expression tree and collects all columns. -pub struct RequireColumnsVisitor { - pub required_columns: HashSet, -} - -impl RequireColumnsVisitor { - pub fn default() -> Self { - Self { - required_columns: HashSet::new(), - } - } - - pub fn collect_columns_from_expr(expr: &PhysicalScalar) -> Result> { - let mut visitor = Self::default(); - visitor = expr.accept(visitor)?; - Ok(visitor.required_columns) - } -} - -impl PhysicalScalarVisitor for RequireColumnsVisitor { - fn pre_visit(self, expr: &PhysicalScalar) -> Result> { - match expr { - PhysicalScalar::IndexedVariable { index, .. } => { - let mut v = self; - v.required_columns.insert(*index); - Ok(Recursion::Continue(v)) - } - _ => Ok(Recursion::Continue(self)), - } - } -} diff --git a/src/query/sql/src/evaluator/mod.rs b/src/query/sql/src/evaluator/mod.rs index fd476a2997ad..2d099821bcbb 100644 --- a/src/query/sql/src/evaluator/mod.rs +++ b/src/query/sql/src/evaluator/mod.rs @@ -24,7 +24,6 @@ use common_datavalues::ColumnRef; use common_datavalues::DataTypeImpl; pub use eval_node::EvalNode; pub use monotonicity::PhysicalScalarMonotonicityVisitor; -pub use physical_scalar::PhysicalScalarOp; pub struct Evaluator; diff --git a/src/query/sql/src/evaluator/monotonicity.rs b/src/query/sql/src/evaluator/monotonicity.rs index 9e39d767fdb0..9fbd90e798a4 100644 --- a/src/query/sql/src/evaluator/monotonicity.rs +++ b/src/query/sql/src/evaluator/monotonicity.rs @@ -23,9 +23,10 @@ use common_functions::scalars::Function; use common_functions::scalars::FunctionContext; use common_functions::scalars::FunctionFactory; use common_functions::scalars::Monotonicity; -use common_planner::PhysicalScalar; -use common_planner::PhysicalScalarVisitor; -use common_planner::Recursion; + +use crate::executor::PhysicalScalar; +use crate::executor::PhysicalScalarVisitor; +use crate::executor::Recursion; // PhysicalScalarMonotonicityVisitor visit the expression tree to calculate monotonicity. // For example, a function of Add(Neg(number), 5) for number < -100 will have a tree like this: diff --git a/src/query/sql/src/evaluator/physical_scalar.rs b/src/query/sql/src/evaluator/physical_scalar.rs index d8fd9bac7c05..1c81cbf6761a 100644 --- a/src/query/sql/src/evaluator/physical_scalar.rs +++ b/src/query/sql/src/evaluator/physical_scalar.rs @@ -20,59 +20,10 @@ use common_exception::Result; use common_functions::scalars::in_evaluator; use common_functions::scalars::CastFunction; use common_functions::scalars::FunctionFactory; -use common_planner::PhysicalScalar; use crate::evaluator::eval_node::EvalNode; use crate::evaluator::Evaluator; - -pub trait PhysicalScalarOp { - fn binary_op(&self, name: &str, other: &Self) -> Result; - - fn and(&self, other: &Self) -> Result { - self.binary_op("and", other) - } - - fn or(&self, other: &Self) -> Result { - self.binary_op("or", other) - } - - fn eq(&self, other: &Self) -> Result { - self.binary_op("=", other) - } - - fn not_eq(&self, other: &Self) -> Result { - self.binary_op("!=", other) - } - - fn gt_eq(&self, other: &Self) -> Result { - self.binary_op(">=", other) - } - - fn gt(&self, other: &Self) -> Result { - self.binary_op(">", other) - } - - fn lt_eq(&self, other: &Self) -> Result { - self.binary_op("<=", other) - } - - fn lt(&self, other: &Self) -> Result { - self.binary_op("=", other) - } -} - -impl PhysicalScalarOp for PhysicalScalar { - fn binary_op(&self, name: &str, other: &PhysicalScalar) -> Result { - let func = - FunctionFactory::instance().get(name, &[&self.data_type(), &other.data_type()])?; - - Ok(PhysicalScalar::Function { - name: name.to_owned(), - args: vec![self.clone(), other.clone()], - return_type: func.return_type(), - }) - } -} +use crate::executor::PhysicalScalar; impl Evaluator { pub fn eval_physical_scalars(physical_scalars: &[PhysicalScalar]) -> Result> { diff --git a/src/query/sql/src/executor/format.rs b/src/query/sql/src/executor/format.rs index bd04bc2ff83b..f3edaf5ce316 100644 --- a/src/query/sql/src/executor/format.rs +++ b/src/query/sql/src/executor/format.rs @@ -16,10 +16,10 @@ use common_ast::ast::FormatTreeNode; use common_exception::ErrorCode; use common_exception::Result; use common_planner::extras::StageKind; -use common_planner::AggregateFunctionDesc; use itertools::Itertools; use super::AggregateFinal; +use super::AggregateFunctionDesc; use super::AggregatePartial; use super::EvalScalar; use super::Exchange; @@ -79,7 +79,7 @@ fn table_scan_to_format_tree( extras .filters .iter() - .map(|f| f.pretty_display()) + .map(|f| f.column_name()) .collect::>() .join(", ") }); diff --git a/src/query/sql/src/executor/mod.rs b/src/query/sql/src/executor/mod.rs index 07294d99e2af..6d39dcc6ceb6 100644 --- a/src/query/sql/src/executor/mod.rs +++ b/src/query/sql/src/executor/mod.rs @@ -12,16 +12,22 @@ // See the License for the specific language governing permissions and // limitations under the License. +mod expression_builder; mod format; mod physical_plan; mod physical_plan_builder; mod physical_plan_display; mod physical_plan_visitor; +mod physical_scalar; +mod physical_scalar_visitor; pub mod table_read_plan; mod util; +pub use expression_builder::*; pub use physical_plan::*; pub use physical_plan_builder::PhysicalPlanBuilder; pub use physical_plan_builder::PhysicalScalarBuilder; pub use physical_plan_visitor::PhysicalPlanReplacer; +pub use physical_scalar::*; +pub use physical_scalar_visitor::*; pub use util::*; diff --git a/src/query/sql/src/executor/physical_plan.rs b/src/query/sql/src/executor/physical_plan.rs index 9c64e22abcc8..da904f356e8f 100644 --- a/src/query/sql/src/executor/physical_plan.rs +++ b/src/query/sql/src/executor/physical_plan.rs @@ -27,11 +27,11 @@ use common_exception::Result; use common_meta_app::schema::TableInfo; use common_planner::extras::StageKind; use common_planner::extras::SINK_SCHEMA; -use common_planner::AggregateFunctionDesc; -use common_planner::PhysicalScalar; use common_planner::ReadDataSourcePlan; -use common_planner::SortDesc; +use super::AggregateFunctionDesc; +use super::SortDesc; +use crate::executor::PhysicalScalar; use crate::optimizer::ColumnSet; use crate::plans::JoinType; use crate::ColumnBinding; diff --git a/src/query/sql/src/executor/physical_plan_builder.rs b/src/query/sql/src/executor/physical_plan_builder.rs index 4785d21d1303..37181e7e24f2 100644 --- a/src/query/sql/src/executor/physical_plan_builder.rs +++ b/src/query/sql/src/executor/physical_plan_builder.rs @@ -28,13 +28,12 @@ use common_planner::extras::Extras; use common_planner::extras::PrewhereInfo; use common_planner::extras::StageKind; use common_planner::plans::Projection; -use common_planner::AggregateFunctionDesc; -use common_planner::AggregateFunctionSignature; -use common_planner::PhysicalScalar; -use common_planner::SortDesc; +use common_planner::Expression; use itertools::Itertools; use super::AggregateFinal; +use super::AggregateFunctionDesc; +use super::AggregateFunctionSignature; use super::AggregatePartial; use super::Exchange as PhysicalExchange; use super::Filter; @@ -46,7 +45,10 @@ use crate::executor::table_read_plan::ToReadDataSourcePlan; use crate::executor::util::check_physical; use crate::executor::ColumnID; use crate::executor::EvalScalar; +use crate::executor::ExpressionBuilderWithoutRenaming; use crate::executor::PhysicalPlan; +use crate::executor::PhysicalScalar; +use crate::executor::SortDesc; use crate::executor::UnionAll; use crate::optimizer::ColumnSet; use crate::optimizer::SExpr; @@ -482,12 +484,11 @@ impl PhysicalPlanBuilder { let projection = Self::build_projection(&metadata, table_schema, &scan.columns, has_inner_column); - let mut builder = PhysicalScalarBuilder::new(table_schema); - let push_down_filters = scan .push_down_predicates .clone() .map(|predicates| { + let builder = ExpressionBuilderWithoutRenaming::create(self.metadata.clone()); predicates .into_iter() .map(|scalar| builder.build(&scalar)) @@ -522,6 +523,7 @@ impl PhysicalPlanBuilder { "There should be at least one predicate in prewhere" ); + let builder = ExpressionBuilderWithoutRenaming::create(self.metadata.clone()); let filter = builder.build(&predicate.unwrap())?; let remain_columns = scan @@ -529,6 +531,7 @@ impl PhysicalPlanBuilder { .difference(&prewhere.prewhere_columns) .copied() .collect::>(); + let output_columns = Self::build_projection( &metadata, table_schema, @@ -569,10 +572,9 @@ impl PhysicalPlanBuilder { let name = metadata.column(item.index).name(); // sort item is already a column - let scalar = PhysicalScalar::IndexedVariable { - index: item.index, + let scalar = Expression::IndexedVariable { + name: name.to_string(), data_type: ty.clone(), - display_name: name.to_string(), }; Ok((scalar, item.asc, item.nulls_first)) diff --git a/src/query/sql/src/planner/physical_scalar_parser.rs b/src/query/sql/src/planner/physical_scalar_parser.rs index 946f4732cbd3..33f80a97394a 100644 --- a/src/query/sql/src/planner/physical_scalar_parser.rs +++ b/src/query/sql/src/planner/physical_scalar_parser.rs @@ -22,10 +22,10 @@ use common_catalog::catalog::CATALOG_DEFAULT; use common_catalog::table::Table; use common_datavalues::DataSchemaRef; use common_exception::Result; -use common_planner::PhysicalScalar; use common_settings::Settings; use parking_lot::RwLock; +use crate::executor::PhysicalScalar; use crate::executor::PhysicalScalarBuilder; use crate::planner::semantic::SyncTypeChecker; use crate::BindContext; From 7b5cf92864681323944b5a734c4910f0e44fda69 Mon Sep 17 00:00:00 2001 From: sundy-li <543950155@qq.com> Date: Fri, 28 Oct 2022 10:55:00 +0800 Subject: [PATCH 23/47] refactor(query): add expression --- src/query/planner/src/expression.rs | 103 ++++++++++++ src/query/planner/src/expression_visitor.rs | 154 +++++++++++++++++ .../sql/src/executor/expression_builder.rs | 155 ++++++++++++++++++ src/query/sql/src/executor/physical_scalar.rs | 126 ++++++++++++++ .../src/executor/physical_scalar_visitor.rs | 154 +++++++++++++++++ 5 files changed, 692 insertions(+) create mode 100644 src/query/planner/src/expression.rs create mode 100644 src/query/planner/src/expression_visitor.rs create mode 100644 src/query/sql/src/executor/expression_builder.rs create mode 100644 src/query/sql/src/executor/physical_scalar.rs create mode 100644 src/query/sql/src/executor/physical_scalar_visitor.rs diff --git a/src/query/planner/src/expression.rs b/src/query/planner/src/expression.rs new file mode 100644 index 000000000000..19617ce4f77a --- /dev/null +++ b/src/query/planner/src/expression.rs @@ -0,0 +1,103 @@ +// Copyright 2022 Datafuse Labs. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::fmt::Display; +use std::fmt::Formatter; + +use common_datavalues::format_data_type_sql; +use common_datavalues::DataField; +use common_datavalues::DataTypeImpl; +use common_datavalues::DataValue; + +/// Serializable and desugared representation of `Scalar`. +#[derive(Clone, Debug, Eq, PartialEq, serde::Serialize, serde::Deserialize)] +pub enum Expression { + IndexedVariable { + name: String, + data_type: DataTypeImpl, + }, + Constant { + value: DataValue, + data_type: DataTypeImpl, + }, + Function { + name: String, + args: Vec, + return_type: DataTypeImpl, + }, + + Cast { + input: Box, + target: DataTypeImpl, + }, +} + +impl Expression { + pub fn data_type(&self) -> DataTypeImpl { + match self { + Expression::Constant { data_type, .. } => data_type.clone(), + Expression::Function { return_type, .. } => return_type.clone(), + Expression::Cast { target, .. } => target.clone(), + Expression::IndexedVariable { data_type, .. } => data_type.clone(), + } + } + + pub fn to_data_field(&self) -> DataField { + let name = self.column_name(); + let data_type = self.data_type(); + DataField::new(&name, data_type) + } + + /// Display with readable variable name. + pub fn column_name(&self) -> String { + match self { + Expression::Constant { value, .. } => value.to_string(), + Expression::Function { name, args, .. } => { + let args = args + .iter() + .map(|arg| arg.column_name()) + .collect::>() + .join(", "); + format!("{}({})", name, args) + } + Expression::Cast { input, target } => format!( + "CAST({} AS {})", + input.column_name(), + format_data_type_sql(target) + ), + Expression::IndexedVariable { name, .. } => name.clone(), + } + } +} + +impl Display for Expression { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + match &self { + Expression::Constant { value, .. } => write!(f, "{}", value), + Expression::Function { name, args, .. } => write!( + f, + "{}({})", + name, + args.iter() + .map(|arg| format!("{}", arg)) + .collect::>() + .join(", ") + ), + Expression::Cast { input, target } => { + write!(f, "CAST({} AS {})", input, format_data_type_sql(target)) + } + Expression::IndexedVariable { name, .. } => write!(f, "${name}"), + } + } +} diff --git a/src/query/planner/src/expression_visitor.rs b/src/query/planner/src/expression_visitor.rs new file mode 100644 index 000000000000..1f162e82c0e4 --- /dev/null +++ b/src/query/planner/src/expression_visitor.rs @@ -0,0 +1,154 @@ +// Copyright 2022 Datafuse Labs. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::collections::HashSet; + +use common_exception::Result; + +use crate::Expression; + +/// Controls how the visitor recursion should proceed. +pub enum Recursion { + /// Attempt to visit all the children, recursively, of this expression. + Continue(V), + /// Do not visit the children of this expression, though the walk + /// of parents of this expression will not be affected + Stop(V), +} + +/// Encode the traversal of an expression tree. When passed to +/// `ExpressionVisitor::accept`, `ExpressionVisitor::visit` is invoked +/// recursively on all nodes of an expression tree. See the comments +/// on `ExpressionVisitor::accept` for details on its use +pub trait ExpressionVisitor: Sized { + /// Invoked before any children of `expr` are visisted. + fn pre_visit(self, expr: &Expression) -> Result>; + + fn visit(mut self, predecessor_expr: &Expression) -> Result { + let mut stack = vec![RecursionProcessing::Call(predecessor_expr)]; + while let Some(element) = stack.pop() { + match element { + RecursionProcessing::Ret(expr) => { + self = self.post_visit(expr)?; + } + RecursionProcessing::Call(expr) => { + stack.push(RecursionProcessing::Ret(expr)); + self = match self.pre_visit(expr)? { + Recursion::Stop(visitor) => visitor, + Recursion::Continue(visitor) => { + match expr { + Expression::Function { args, .. } => { + for arg in args { + stack.push(RecursionProcessing::Call(arg)); + } + } + Expression::Cast { input, .. } => { + stack.push(RecursionProcessing::Call(input)); + } + _ => {} + }; + + visitor + } + } + } + } + } + + Ok(self) + } + + /// Invoked after all children of `scalar` are visited. Default + /// implementation does nothing. + fn post_visit(self, _scalar: &Expression) -> Result { + Ok(self) + } +} + +impl Expression { + /// Performs a depth first walk of an expression and + /// its children, calling [`ExpressionVisitor::pre_visit`] and + /// `visitor.post_visit`. + /// + /// Implements the [visitor pattern](https://en.wikipedia.org/wiki/Visitor_pattern) to + /// separate expression algorithms from the structure of the + /// `Expr` tree and make it easier to add new types of expressions + /// and algorithms that walk the tree. + /// + /// For an expression tree such as + /// ```text + /// BinaryExpr (GT) + /// left: Column("foo") + /// right: Column("bar") + /// ``` + /// + /// The nodes are visited using the following order + /// ```text + /// pre_visit(ScalarFunction(GT)) + /// pre_visit(Column("foo")) + /// post_visit(Column("foo")) + /// pre_visit(Column("bar")) + /// post_visit(Column("bar")) + /// post_visit(ScalarFunction(GT)) + /// ``` + /// + /// If an Err result is returned, recursion is stopped immediately + pub fn accept(&self, visitor: V) -> Result { + let visitor = match visitor.pre_visit(self)? { + Recursion::Continue(visitor) => visitor, + // If the recursion should stop, do not visit children + Recursion::Stop(visitor) => return Ok(visitor), + }; + + let visitor = visitor.visit(self)?; + visitor.post_visit(self) + } +} + +enum RecursionProcessing<'a> { + Call(&'a Expression), + Ret(&'a Expression), +} + +// This visitor is for recursively visiting expression tree and collects all columns. +pub struct RequireColumnsVisitor { + pub required_columns: HashSet, +} + +impl RequireColumnsVisitor { + pub fn default() -> Self { + Self { + required_columns: HashSet::new(), + } + } + + pub fn collect_columns_from_expr(expr: &Expression) -> Result> { + let mut visitor = Self::default(); + visitor = expr.accept(visitor)?; + Ok(visitor.required_columns) + } +} + +impl ExpressionVisitor for RequireColumnsVisitor { + fn pre_visit(self, expr: &Expression) -> Result> { + match expr { + Expression::IndexedVariable { name, .. } => { + let mut v = self; + v.required_columns.insert(name.clone()); + Ok(Recursion::Continue(v)) + } + _ => Ok(Recursion::Continue(self)), + } + } +} diff --git a/src/query/sql/src/executor/expression_builder.rs b/src/query/sql/src/executor/expression_builder.rs new file mode 100644 index 000000000000..cb22b7375e17 --- /dev/null +++ b/src/query/sql/src/executor/expression_builder.rs @@ -0,0 +1,155 @@ +// Copyright 2021 Datafuse Labs. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use common_exception::ErrorCode; +use common_exception::Result; +use common_functions::scalars::FunctionFactory; +use common_planner::Expression; + +use crate::executor::util::format_field_name; +use crate::plans::Scalar; +use crate::IndexType; +use crate::MetadataRef; +use crate::ScalarExpr; + +pub trait FiledNameFormat { + fn format(display_name: &str, index: IndexType) -> String; +} + +impl FiledNameFormat for ExpressionBuilder { + fn format(display_name: &str, index: IndexType) -> String { + format_field_name(display_name, index) + } +} + +impl FiledNameFormat for ExpressionBuilder { + fn format(display_name: &str, _index: IndexType) -> String { + display_name.to_owned() + } +} + +pub struct ExpressionBuilder { + metadata: MetadataRef, +} + +pub type ExpressionBuilderWithoutRenaming = ExpressionBuilder; +pub type ExpressionBuilderWithRenaming = ExpressionBuilder; + +impl ExpressionBuilder +where ExpressionBuilder: FiledNameFormat +{ + pub fn create(metadata: MetadataRef) -> Self { + ExpressionBuilder { metadata } + } + + pub fn build(&self, scalar: &Scalar) -> Result { + match scalar { + Scalar::BoundColumnRef(column_ref) => { + let metadata = self.metadata.read(); + let name = metadata.column(column_ref.column.index).name(); + Ok(Expression::IndexedVariable { + name: name.to_string(), + data_type: (*column_ref.column.data_type).clone(), + }) + } + Scalar::ConstantExpr(constant) => Ok(Expression::Constant { + value: constant.value.clone(), + data_type: *constant.data_type.clone(), + }), + Scalar::AndExpr(and) => Ok(Expression::Function { + name: "and".to_string(), + args: vec![self.build(&and.left)?, self.build(&and.right)?], + return_type: and.data_type(), + }), + Scalar::OrExpr(or) => Ok(Expression::Function { + name: "or".to_string(), + args: vec![self.build(&or.left)?, self.build(&or.right)?], + return_type: or.data_type(), + }), + Scalar::ComparisonExpr(comp) => Ok(Expression::Function { + name: comp.op.to_func_name(), + args: vec![self.build(&comp.left)?, self.build(&comp.right)?], + return_type: comp.data_type(), + }), + Scalar::FunctionCall(func) => Ok(Expression::Function { + name: func.func_name.clone(), + args: func + .arguments + .iter() + .zip(func.arg_types.iter()) + .map(|(arg, _)| self.build(arg)) + .collect::>()?, + return_type: *func.return_type.clone(), + }), + Scalar::CastExpr(cast) => Ok(Expression::Cast { + input: Box::new(self.build(&cast.argument)?), + target: *cast.target_type.clone(), + }), + + _ => Err(ErrorCode::LogicalError(format!( + "Unsupported physical scalar: {:?}", + scalar + ))), + } + } +} + +pub trait ExpressionOp { + fn binary_op(&self, name: &str, other: &Self) -> Result; + + fn and(&self, other: &Self) -> Result { + self.binary_op("and", other) + } + + fn or(&self, other: &Self) -> Result { + self.binary_op("or", other) + } + + fn eq(&self, other: &Self) -> Result { + self.binary_op("=", other) + } + + fn not_eq(&self, other: &Self) -> Result { + self.binary_op("!=", other) + } + + fn gt_eq(&self, other: &Self) -> Result { + self.binary_op(">=", other) + } + + fn gt(&self, other: &Self) -> Result { + self.binary_op(">", other) + } + + fn lt_eq(&self, other: &Self) -> Result { + self.binary_op("<=", other) + } + + fn lt(&self, other: &Self) -> Result { + self.binary_op("=", other) + } +} + +impl ExpressionOp for Expression { + fn binary_op(&self, name: &str, other: &Expression) -> Result { + let func = + FunctionFactory::instance().get(name, &[&self.data_type(), &other.data_type()])?; + + Ok(Expression::Function { + name: name.to_owned(), + args: vec![self.clone(), other.clone()], + return_type: func.return_type(), + }) + } +} diff --git a/src/query/sql/src/executor/physical_scalar.rs b/src/query/sql/src/executor/physical_scalar.rs new file mode 100644 index 000000000000..375b40f94134 --- /dev/null +++ b/src/query/sql/src/executor/physical_scalar.rs @@ -0,0 +1,126 @@ +// Copyright 2022 Datafuse Labs. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::fmt::Display; +use std::fmt::Formatter; + +use common_datavalues::format_data_type_sql; +use common_datavalues::DataTypeImpl; +use common_datavalues::DataValue; + +type ColumnID = String; +type IndexType = usize; + +/// Serializable and desugared representation of `Scalar`. +#[derive(Clone, Debug, Eq, PartialEq, serde::Serialize, serde::Deserialize)] +pub enum PhysicalScalar { + IndexedVariable { + index: usize, + data_type: DataTypeImpl, + + display_name: String, + }, + Constant { + value: DataValue, + data_type: DataTypeImpl, + }, + Function { + name: String, + args: Vec, + return_type: DataTypeImpl, + }, + + Cast { + input: Box, + target: DataTypeImpl, + }, +} + +impl PhysicalScalar { + pub fn data_type(&self) -> DataTypeImpl { + match self { + PhysicalScalar::Constant { data_type, .. } => data_type.clone(), + PhysicalScalar::Function { return_type, .. } => return_type.clone(), + PhysicalScalar::Cast { target, .. } => target.clone(), + PhysicalScalar::IndexedVariable { data_type, .. } => data_type.clone(), + } + } + + /// Display with readable variable name. + pub fn pretty_display(&self) -> String { + match self { + PhysicalScalar::Constant { value, .. } => value.to_string(), + PhysicalScalar::Function { name, args, .. } => { + let args = args + .iter() + .map(|arg| arg.pretty_display()) + .collect::>() + .join(", "); + format!("{}({})", name, args) + } + PhysicalScalar::Cast { input, target } => format!( + "CAST({} AS {})", + input.pretty_display(), + format_data_type_sql(target) + ), + PhysicalScalar::IndexedVariable { display_name, .. } => display_name.clone(), + } + } +} + +impl Display for PhysicalScalar { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + match &self { + PhysicalScalar::Constant { value, .. } => write!(f, "{}", value), + PhysicalScalar::Function { name, args, .. } => write!( + f, + "{}({})", + name, + args.iter() + .map(|arg| format!("{}", arg)) + .collect::>() + .join(", ") + ), + PhysicalScalar::Cast { input, target } => { + write!(f, "CAST({} AS {})", input, format_data_type_sql(target)) + } + PhysicalScalar::IndexedVariable { index, .. } => write!(f, "${index}"), + } + } +} + +#[derive(Clone, Debug, Eq, PartialEq, serde::Serialize, serde::Deserialize)] +pub struct AggregateFunctionDesc { + pub sig: AggregateFunctionSignature, + pub column_id: ColumnID, + pub args: Vec, + + /// Only used for debugging + pub arg_indices: Vec, +} + +#[derive(Clone, Debug, Eq, PartialEq, serde::Serialize, serde::Deserialize)] +pub struct AggregateFunctionSignature { + pub name: String, + pub args: Vec, + pub params: Vec, + pub return_type: DataTypeImpl, +} + +#[derive(Clone, Debug, Eq, PartialEq, serde::Serialize, serde::Deserialize)] +pub struct SortDesc { + pub asc: bool, + pub nulls_first: bool, + pub order_by: ColumnID, +} diff --git a/src/query/sql/src/executor/physical_scalar_visitor.rs b/src/query/sql/src/executor/physical_scalar_visitor.rs new file mode 100644 index 000000000000..37c5cbb3344b --- /dev/null +++ b/src/query/sql/src/executor/physical_scalar_visitor.rs @@ -0,0 +1,154 @@ +// Copyright 2022 Datafuse Labs. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::collections::HashSet; + +use common_exception::Result; + +use super::PhysicalScalar; + +/// Controls how the visitor recursion should proceed. +pub enum Recursion { + /// Attempt to visit all the children, recursively, of this expression. + Continue(V), + /// Do not visit the children of this expression, though the walk + /// of parents of this expression will not be affected + Stop(V), +} + +/// Encode the traversal of an expression tree. When passed to +/// `PhysicalScalarVisitor::accept`, `PhysicalScalarVisitor::visit` is invoked +/// recursively on all nodes of an expression tree. See the comments +/// on `PhysicalScalarVisitor::accept` for details on its use +pub trait PhysicalScalarVisitor: Sized { + /// Invoked before any children of `expr` are visisted. + fn pre_visit(self, expr: &PhysicalScalar) -> Result>; + + fn visit(mut self, predecessor_expr: &PhysicalScalar) -> Result { + let mut stack = vec![RecursionProcessing::Call(predecessor_expr)]; + while let Some(element) = stack.pop() { + match element { + RecursionProcessing::Ret(expr) => { + self = self.post_visit(expr)?; + } + RecursionProcessing::Call(expr) => { + stack.push(RecursionProcessing::Ret(expr)); + self = match self.pre_visit(expr)? { + Recursion::Stop(visitor) => visitor, + Recursion::Continue(visitor) => { + match expr { + PhysicalScalar::Function { args, .. } => { + for arg in args { + stack.push(RecursionProcessing::Call(arg)); + } + } + PhysicalScalar::Cast { input, .. } => { + stack.push(RecursionProcessing::Call(input)); + } + _ => {} + }; + + visitor + } + } + } + } + } + + Ok(self) + } + + /// Invoked after all children of `scalar` are visited. Default + /// implementation does nothing. + fn post_visit(self, _scalar: &PhysicalScalar) -> Result { + Ok(self) + } +} + +impl PhysicalScalar { + /// Performs a depth first walk of an expression and + /// its children, calling [`PhysicalScalarVisitor::pre_visit`] and + /// `visitor.post_visit`. + /// + /// Implements the [visitor pattern](https://en.wikipedia.org/wiki/Visitor_pattern) to + /// separate expression algorithms from the structure of the + /// `Expr` tree and make it easier to add new types of expressions + /// and algorithms that walk the tree. + /// + /// For an expression tree such as + /// ```text + /// BinaryExpr (GT) + /// left: Column("foo") + /// right: Column("bar") + /// ``` + /// + /// The nodes are visited using the following order + /// ```text + /// pre_visit(ScalarFunction(GT)) + /// pre_visit(Column("foo")) + /// post_visit(Column("foo")) + /// pre_visit(Column("bar")) + /// post_visit(Column("bar")) + /// post_visit(ScalarFunction(GT)) + /// ``` + /// + /// If an Err result is returned, recursion is stopped immediately + pub fn accept(&self, visitor: V) -> Result { + let visitor = match visitor.pre_visit(self)? { + Recursion::Continue(visitor) => visitor, + // If the recursion should stop, do not visit children + Recursion::Stop(visitor) => return Ok(visitor), + }; + + let visitor = visitor.visit(self)?; + visitor.post_visit(self) + } +} + +enum RecursionProcessing<'a> { + Call(&'a PhysicalScalar), + Ret(&'a PhysicalScalar), +} + +// This visitor is for recursively visiting expression tree and collects all columns. +pub struct RequireColumnsVisitor { + pub required_columns: HashSet, +} + +impl RequireColumnsVisitor { + pub fn default() -> Self { + Self { + required_columns: HashSet::new(), + } + } + + pub fn collect_columns_from_expr(expr: &PhysicalScalar) -> Result> { + let mut visitor = Self::default(); + visitor = expr.accept(visitor)?; + Ok(visitor.required_columns) + } +} + +impl PhysicalScalarVisitor for RequireColumnsVisitor { + fn pre_visit(self, expr: &PhysicalScalar) -> Result> { + match expr { + PhysicalScalar::IndexedVariable { index, .. } => { + let mut v = self; + v.required_columns.insert(*index); + Ok(Recursion::Continue(v)) + } + _ => Ok(Recursion::Continue(self)), + } + } +} From ee36eb82de74bbe73ac3b82b3bbdea1cd03ba08d Mon Sep 17 00:00:00 2001 From: sundy-li <543950155@qq.com> Date: Fri, 28 Oct 2022 11:07:45 +0800 Subject: [PATCH 24/47] refactor(query): add expression --- src/query/catalog/src/table.rs | 2 +- src/query/planner/src/lib.rs | 6 ++-- .../tests/it/sql/planner/format/mod.rs | 2 +- .../sql/src/evaluator/physical_scalar.rs | 7 ++++ .../sql/src/executor/expression_builder.rs | 2 +- src/query/sql/src/executor/physical_scalar.rs | 34 +++++++++++++++++++ 6 files changed, 47 insertions(+), 6 deletions(-) diff --git a/src/query/catalog/src/table.rs b/src/query/catalog/src/table.rs index 6a9150cf4ffe..02e04ab7f0ea 100644 --- a/src/query/catalog/src/table.rs +++ b/src/query/catalog/src/table.rs @@ -30,8 +30,8 @@ use common_pipeline_core::Pipeline; use common_planner::extras::Extras; use common_planner::extras::Statistics; use common_planner::plans::DeletePlan; -use common_planner::Partitions; use common_planner::Expression; +use common_planner::Partitions; use common_planner::ReadDataSourcePlan; use common_storage::StorageMetrics; diff --git a/src/query/planner/src/lib.rs b/src/query/planner/src/lib.rs index d733a1a044aa..65a5d6123ebb 100644 --- a/src/query/planner/src/lib.rs +++ b/src/query/planner/src/lib.rs @@ -21,17 +21,17 @@ //! After all the planners work, `Interpreter` will use `PhysicalPlan` to //! build pipelines, then our processes will produce result data blocks. -mod partition; mod expression; +mod partition; -pub mod extras; mod expression_visitor; +pub mod extras; pub mod plan_read_datasource; pub mod stage_table; -pub use partition::*; pub use expression::*; pub use expression_visitor::*; +pub use partition::*; pub use plan_read_datasource::*; // Plan will be used publicly. diff --git a/src/query/service/tests/it/sql/planner/format/mod.rs b/src/query/service/tests/it/sql/planner/format/mod.rs index 9f3ccfc919df..f1f669987e2d 100644 --- a/src/query/service/tests/it/sql/planner/format/mod.rs +++ b/src/query/service/tests/it/sql/planner/format/mod.rs @@ -21,8 +21,8 @@ use common_meta_app::schema::TableIdent; use common_meta_app::schema::TableInfo; use common_meta_app::schema::TableMeta; use databend_query::sql::optimizer::SExpr; -use databend_query::sql::planner::Metadata; use databend_query::sql::planner::plans::JoinType; +use databend_query::sql::planner::Metadata; use databend_query::sql::plans::BoundColumnRef; use databend_query::sql::plans::ConstantExpr; use databend_query::sql::plans::Filter; diff --git a/src/query/sql/src/evaluator/physical_scalar.rs b/src/query/sql/src/evaluator/physical_scalar.rs index 1c81cbf6761a..c6ba756cac3e 100644 --- a/src/query/sql/src/evaluator/physical_scalar.rs +++ b/src/query/sql/src/evaluator/physical_scalar.rs @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +use common_datavalues::DataSchema; use common_datavalues::DataType; use common_datavalues::DataTypeImpl; use common_datavalues::DataValue; @@ -20,12 +21,18 @@ use common_exception::Result; use common_functions::scalars::in_evaluator; use common_functions::scalars::CastFunction; use common_functions::scalars::FunctionFactory; +use common_planner::Expression; use crate::evaluator::eval_node::EvalNode; use crate::evaluator::Evaluator; use crate::executor::PhysicalScalar; impl Evaluator { + pub fn eval_expression(expression: &Expression, schema: &DataSchema) -> Result { + let physical_scalar = PhysicalScalar::from_expression(&expression, schema)?; + Self::eval_physical_scalar(&physical_scalar) + } + pub fn eval_physical_scalars(physical_scalars: &[PhysicalScalar]) -> Result> { physical_scalars .iter() diff --git a/src/query/sql/src/executor/expression_builder.rs b/src/query/sql/src/executor/expression_builder.rs index cb22b7375e17..5f97e9520c50 100644 --- a/src/query/sql/src/executor/expression_builder.rs +++ b/src/query/sql/src/executor/expression_builder.rs @@ -60,7 +60,7 @@ where ExpressionBuilder: FiledNameFormat let name = metadata.column(column_ref.column.index).name(); Ok(Expression::IndexedVariable { name: name.to_string(), - data_type: (*column_ref.column.data_type).clone(), + data_type: (*column_ref.column.data_type).clone(), }) } Scalar::ConstantExpr(constant) => Ok(Expression::Constant { diff --git a/src/query/sql/src/executor/physical_scalar.rs b/src/query/sql/src/executor/physical_scalar.rs index 375b40f94134..cf49f0049380 100644 --- a/src/query/sql/src/executor/physical_scalar.rs +++ b/src/query/sql/src/executor/physical_scalar.rs @@ -16,8 +16,11 @@ use std::fmt::Display; use std::fmt::Formatter; use common_datavalues::format_data_type_sql; +use common_datavalues::DataSchema; use common_datavalues::DataTypeImpl; use common_datavalues::DataValue; +use common_exception::Result; +use common_planner::Expression; type ColumnID = String; type IndexType = usize; @@ -77,6 +80,37 @@ impl PhysicalScalar { PhysicalScalar::IndexedVariable { display_name, .. } => display_name.clone(), } } + + // todo(sundy) + pub fn from_expression(expression: &Expression, schema: &DataSchema) -> Result { + match expression { + Expression::IndexedVariable { name, data_type } => todo!(), + Expression::Constant { value, data_type } => todo!(), + Expression::Function { + name, + args, + return_type, + } => todo!(), + Expression::Cast { input, target } => todo!(), + } + } + + pub fn to_expression(&self, schema: &DataSchema) -> Result { + match self { + PhysicalScalar::IndexedVariable { + index, + data_type, + display_name, + } => todo!(), + PhysicalScalar::Constant { value, data_type } => todo!(), + PhysicalScalar::Function { + name, + args, + return_type, + } => todo!(), + PhysicalScalar::Cast { input, target } => todo!(), + } + } } impl Display for PhysicalScalar { From 7378bfa5daea7d7a84adafa8f4c4b0da42290087 Mon Sep 17 00:00:00 2001 From: sundy-li <543950155@qq.com> Date: Fri, 28 Oct 2022 11:40:42 +0800 Subject: [PATCH 25/47] refactor(query): rename to expression_parser --- src/query/sql/src/evaluator/mod.rs | 2 +- src/query/sql/src/evaluator/monotonicity.rs | 41 ++++--- ..._scalar_parser.rs => expression_parser.rs} | 22 ++-- src/query/sql/src/planner/mod.rs | 4 +- src/query/storages/fuse/src/fuse_table.rs | 4 +- .../storages/fuse/src/operations/delete.rs | 2 +- .../src/operations/mutation/block_filter.rs | 2 +- src/query/storages/fuse/src/pruning/pruner.rs | 4 +- .../storages/fuse/src/pruning/range_pruner.rs | 2 +- .../storages/fuse/src/pruning/topn_pruner.rs | 2 +- .../clustering_information.rs | 2 +- .../clustering_informations/table_args.rs | 6 +- src/query/storages/index/src/bloom.rs | 50 +++----- src/query/storages/index/src/range_filter.rs | 107 ++++++++---------- 14 files changed, 116 insertions(+), 134 deletions(-) rename src/query/sql/src/planner/{physical_scalar_parser.rs => expression_parser.rs} (86%) diff --git a/src/query/sql/src/evaluator/mod.rs b/src/query/sql/src/evaluator/mod.rs index 2d099821bcbb..cf2d9d13db13 100644 --- a/src/query/sql/src/evaluator/mod.rs +++ b/src/query/sql/src/evaluator/mod.rs @@ -23,7 +23,7 @@ pub use chunk_operator::CompoundChunkOperator; use common_datavalues::ColumnRef; use common_datavalues::DataTypeImpl; pub use eval_node::EvalNode; -pub use monotonicity::PhysicalScalarMonotonicityVisitor; +pub use monotonicity::ExpressionMonotonicityVisitor; pub struct Evaluator; diff --git a/src/query/sql/src/evaluator/monotonicity.rs b/src/query/sql/src/evaluator/monotonicity.rs index 9fbd90e798a4..92082c7b7b01 100644 --- a/src/query/sql/src/evaluator/monotonicity.rs +++ b/src/query/sql/src/evaluator/monotonicity.rs @@ -23,12 +23,11 @@ use common_functions::scalars::Function; use common_functions::scalars::FunctionContext; use common_functions::scalars::FunctionFactory; use common_functions::scalars::Monotonicity; +use common_planner::Expression; +use common_planner::ExpressionVisitor; +use common_planner::Recursion; -use crate::executor::PhysicalScalar; -use crate::executor::PhysicalScalarVisitor; -use crate::executor::Recursion; - -// PhysicalScalarMonotonicityVisitor visit the expression tree to calculate monotonicity. +// ExpressionMonotonicityVisitor visit the expression tree to calculate monotonicity. // For example, a function of Add(Neg(number), 5) for number < -100 will have a tree like this: // // . MonotonicityNode::Function -- 'Add' @@ -47,21 +46,21 @@ use crate::executor::Recursion; // every function. Each function is responsible to implement its own monotonicity // function. #[derive(Clone)] -pub struct PhysicalScalarMonotonicityVisitor { +pub struct ExpressionMonotonicityVisitor { input_schema: DataSchemaRef, // HashMap // variable_left: the variable range left. // variable_right: the variable range right. - variables: HashMap, Option)>, + variables: HashMap, Option)>, stack: Vec<(DataTypeImpl, Monotonicity)>, single_point: bool, } -impl PhysicalScalarMonotonicityVisitor { +impl ExpressionMonotonicityVisitor { fn create( input_schema: DataSchemaRef, - variables: HashMap, Option)>, + variables: HashMap, Option)>, single_point: bool, ) -> Self { Self { @@ -79,7 +78,7 @@ impl PhysicalScalarMonotonicityVisitor { Ok(monotonic) } _ => Err(ErrorCode::LogicalError( - "Stack has too many elements in PhysicalScalarMonotonicityVisitor::finalize", + "Stack has too many elements in ExpressionMonotonicityVisitor::finalize", )), } } @@ -164,8 +163,8 @@ impl PhysicalScalarMonotonicityVisitor { /// Return the monotonicity information, together with column name if any. pub fn check_expression( schema: DataSchemaRef, - expr: &PhysicalScalar, - variables: HashMap, Option)>, + expr: &Expression, + variables: HashMap, Option)>, single_point: bool, ) -> Monotonicity { let visitor = Self::create(schema, variables, single_point); @@ -175,19 +174,19 @@ impl PhysicalScalarMonotonicityVisitor { } } -impl PhysicalScalarVisitor for PhysicalScalarMonotonicityVisitor { - fn pre_visit(self, _expr: &PhysicalScalar) -> Result> { +impl ExpressionVisitor for ExpressionMonotonicityVisitor { + fn pre_visit(self, _expr: &Expression) -> Result> { Ok(Recursion::Continue(self)) } - fn post_visit(mut self, expr: &PhysicalScalar) -> Result { + fn post_visit(mut self, expr: &Expression) -> Result { match expr { - PhysicalScalar::IndexedVariable { index, .. } => { - let (left, right) = self.variables.get(index).ok_or_else(|| { - ErrorCode::BadArguments(format!("Cannot find the column: '{:?}'", *index)) + Expression::IndexedVariable { name, .. } => { + let (left, right) = self.variables.get(name).ok_or_else(|| { + ErrorCode::BadArguments(format!("Cannot find the column: '{:?}'", name)) })?; - let field = self.input_schema.field(*index); + let field = self.input_schema.field_with_name(name)?; let return_type = field.data_type(); let monotonic = Monotonicity { @@ -201,7 +200,7 @@ impl PhysicalScalarVisitor for PhysicalScalarMonotonicityVisitor { self.stack.push((return_type.clone(), monotonic)); Ok(self) } - PhysicalScalar::Constant { value, data_type } => { + Expression::Constant { value, data_type } => { let name = value.to_string(); let data_field = DataField::new(&name, data_type.clone()); @@ -218,7 +217,7 @@ impl PhysicalScalarVisitor for PhysicalScalarMonotonicityVisitor { self.stack.push((data_type.clone(), monotonic)); Ok(self) } - PhysicalScalar::Function { name, args, .. } => self.visit_function(name, args.len()), + Expression::Function { name, args, .. } => self.visit_function(name, args.len()), _ => Err(ErrorCode::UnknownException("Unable to get monotonicity")), } } diff --git a/src/query/sql/src/planner/physical_scalar_parser.rs b/src/query/sql/src/planner/expression_parser.rs similarity index 86% rename from src/query/sql/src/planner/physical_scalar_parser.rs rename to src/query/sql/src/planner/expression_parser.rs index 33f80a97394a..12809c0300ff 100644 --- a/src/query/sql/src/planner/physical_scalar_parser.rs +++ b/src/query/sql/src/planner/expression_parser.rs @@ -22,11 +22,11 @@ use common_catalog::catalog::CATALOG_DEFAULT; use common_catalog::table::Table; use common_datavalues::DataSchemaRef; use common_exception::Result; +use common_planner::Expression; use common_settings::Settings; use parking_lot::RwLock; -use crate::executor::PhysicalScalar; -use crate::executor::PhysicalScalarBuilder; +use crate::executor::ExpressionBuilderWithoutRenaming; use crate::planner::semantic::SyncTypeChecker; use crate::BindContext; use crate::ColumnBinding; @@ -34,14 +34,14 @@ use crate::Metadata; use crate::NameResolutionContext; use crate::Visibility; -pub struct PhysicalScalarParser; +pub struct ExpressionParser; -impl PhysicalScalarParser { +impl ExpressionParser { pub fn parse_exprs( schema: DataSchemaRef, table_meta: Arc, sql: &str, - ) -> Result> { + ) -> Result> { let sql_dialect = Dialect::MySQL; let tokens = tokenize_sql(sql)?; let backtrace = Backtrace::new(); @@ -80,13 +80,15 @@ impl PhysicalScalarParser { let name_resolution_ctx = NameResolutionContext::try_from(settings.as_ref())?; let mut type_checker = SyncTypeChecker::new(&bind_context, &name_resolution_ctx, &[]); - let mut physical_scalars = Vec::with_capacity(exprs.len()); - let mut builder = PhysicalScalarBuilder::new(&schema); + let mut expressions = Vec::with_capacity(exprs.len()); + + let builder = ExpressionBuilderWithoutRenaming::create(metadata.clone()); + for expr in exprs.iter() { let (scalar, _) = *type_checker.resolve(expr, None)?; - let physical_scalar = builder.build(&scalar)?; - physical_scalars.push(physical_scalar); + let expr = builder.build(&scalar)?; + expressions.push(expr); } - Ok(physical_scalars) + Ok(expressions) } } diff --git a/src/query/sql/src/planner/mod.rs b/src/query/sql/src/planner/mod.rs index f4b68e553e49..9890934d165d 100644 --- a/src/query/sql/src/planner/mod.rs +++ b/src/query/sql/src/planner/mod.rs @@ -12,9 +12,9 @@ // See the License for the specific language governing permissions and // limitations under the License. +mod expression_parser; mod format; mod metadata; -mod physical_scalar_parser; #[allow(clippy::module_inception)] mod planner; mod semantic; @@ -28,8 +28,8 @@ pub use binder::Binder; pub use binder::ColumnBinding; pub use binder::ScalarBinder; pub use binder::Visibility; +pub use expression_parser::ExpressionParser; pub use metadata::*; -pub use physical_scalar_parser::PhysicalScalarParser; pub use planner::Planner; pub use plans::ScalarExpr; pub use semantic::normalize_identifier; diff --git a/src/query/storages/fuse/src/fuse_table.rs b/src/query/storages/fuse/src/fuse_table.rs index 63550b4e5cb8..8dd98239bd4f 100644 --- a/src/query/storages/fuse/src/fuse_table.rs +++ b/src/query/storages/fuse/src/fuse_table.rs @@ -38,7 +38,7 @@ use common_planner::extras::Extras; use common_planner::extras::Statistics; use common_planner::plans::DeletePlan; use common_planner::Partitions; -use common_planner::PhysicalScalar; +use common_planner::Expression; use common_planner::ReadDataSourcePlan; use common_sharing::create_share_table_operator; use common_sql::PhysicalScalarParser; @@ -246,7 +246,7 @@ impl Table for FuseTable { true } - fn cluster_keys(&self) -> Vec { + fn cluster_keys(&self) -> Vec { let schema = self.table_info.schema(); let table_meta = Arc::new(self.clone()); if let Some((_, order)) = &self.cluster_key_meta { diff --git a/src/query/storages/fuse/src/operations/delete.rs b/src/query/storages/fuse/src/operations/delete.rs index 8586e768cc25..77c84e1f87d2 100644 --- a/src/query/storages/fuse/src/operations/delete.rs +++ b/src/query/storages/fuse/src/operations/delete.rs @@ -22,7 +22,7 @@ use common_exception::Result; use common_fuse_meta::meta::TableSnapshot; use common_planner::extras::Extras; use common_planner::plans::DeletePlan; -use common_planner::PhysicalScalar; +use common_planner::Expression; use common_sql::PhysicalScalarParser; use tracing::debug; diff --git a/src/query/storages/fuse/src/operations/mutation/block_filter.rs b/src/query/storages/fuse/src/operations/mutation/block_filter.rs index f4a1795163cc..3e2a7b87a478 100644 --- a/src/query/storages/fuse/src/operations/mutation/block_filter.rs +++ b/src/query/storages/fuse/src/operations/mutation/block_filter.rs @@ -22,7 +22,7 @@ use common_datavalues::Series; use common_exception::Result; use common_fuse_meta::meta::BlockMeta; use common_planner::plans::Projection; -use common_planner::PhysicalScalar; +use common_planner::Expression; use common_sql::evaluator::Evaluator; use crate::operations::mutation::deletion_mutator::Deletion; diff --git a/src/query/storages/fuse/src/pruning/pruner.rs b/src/query/storages/fuse/src/pruning/pruner.rs index 47bf1c317796..feb958da37c1 100644 --- a/src/query/storages/fuse/src/pruning/pruner.rs +++ b/src/query/storages/fuse/src/pruning/pruner.rs @@ -19,8 +19,8 @@ use common_catalog::table_context::TableContext; use common_datavalues::DataSchemaRef; use common_exception::Result; use common_fuse_meta::meta::Location; -use common_planner::PhysicalScalar; -use common_sql::evaluator::PhysicalScalarOp; +use common_planner::Expression; +use common_sql::executor::ExpressionOp; use common_storages_index::BlockFilter; use opendal::Operator; diff --git a/src/query/storages/fuse/src/pruning/range_pruner.rs b/src/query/storages/fuse/src/pruning/range_pruner.rs index f5e39f9fe194..73042955a266 100644 --- a/src/query/storages/fuse/src/pruning/range_pruner.rs +++ b/src/query/storages/fuse/src/pruning/range_pruner.rs @@ -18,7 +18,7 @@ use common_catalog::table_context::TableContext; use common_datavalues::DataSchemaRef; use common_exception::Result; use common_fuse_meta::meta::StatisticsOfColumns; -use common_planner::PhysicalScalar; +use common_planner::Expression; use common_storages_index::RangeFilter; pub trait RangePruner { diff --git a/src/query/storages/fuse/src/pruning/topn_pruner.rs b/src/query/storages/fuse/src/pruning/topn_pruner.rs index 54a4de191437..b72bdbc8ca57 100644 --- a/src/query/storages/fuse/src/pruning/topn_pruner.rs +++ b/src/query/storages/fuse/src/pruning/topn_pruner.rs @@ -16,7 +16,7 @@ use common_exception::ErrorCode; use common_exception::Result; use common_fuse_meta::meta::BlockMeta; use common_fuse_meta::meta::ColumnStatistics; -use common_planner::PhysicalScalar; +use common_planner::Expression; pub(crate) struct TopNPrunner { sort: Vec<(PhysicalScalar, bool, bool)>, diff --git a/src/query/storages/fuse/src/table_functions/clustering_informations/clustering_information.rs b/src/query/storages/fuse/src/table_functions/clustering_informations/clustering_information.rs index b1cb3cbd9b34..2ffe36c988ae 100644 --- a/src/query/storages/fuse/src/table_functions/clustering_informations/clustering_information.rs +++ b/src/query/storages/fuse/src/table_functions/clustering_informations/clustering_information.rs @@ -22,7 +22,7 @@ use common_datavalues::prelude::*; use common_exception::ErrorCode; use common_exception::Result; use common_fuse_meta::meta::BlockMeta; -use common_planner::PhysicalScalar; +use common_planner::Expression; use serde_json::json; use crate::io::MetaReaders; diff --git a/src/query/storages/fuse/src/table_functions/clustering_informations/table_args.rs b/src/query/storages/fuse/src/table_functions/clustering_informations/table_args.rs index ff3464c7949d..ec62c6b8e092 100644 --- a/src/query/storages/fuse/src/table_functions/clustering_informations/table_args.rs +++ b/src/query/storages/fuse/src/table_functions/clustering_informations/table_args.rs @@ -17,7 +17,7 @@ use std::sync::Arc; use common_catalog::table::Table; use common_exception::ErrorCode; use common_exception::Result; -use common_planner::PhysicalScalar; +use common_planner::Expression; use common_sql::PhysicalScalarParser; use crate::table_functions::string_value; @@ -39,7 +39,7 @@ pub fn parse_func_table_args(table_args: &TableArgs) -> Result<(String, String)> } } -pub fn get_cluster_keys(table: &FuseTable, definition: &str) -> Result> { +pub fn get_cluster_keys(table: &FuseTable, definition: &str) -> Result> { let cluster_keys = if !definition.is_empty() { let table_meta = Arc::new(table.clone()); PhysicalScalarParser::parse_exprs(table.schema(), table_meta, definition)? @@ -54,5 +54,7 @@ pub fn get_cluster_keys(table: &FuseTable, definition: &str) -> Result String { - format!("Bloom({})", index) + pub fn build_filter_column(name: &str) -> String { + format!("Bloom({})", name) } pub fn build_filter_schema(data_schema: &DataSchema) -> DataSchema { let mut filter_fields = vec![]; let fields = data_schema.fields(); - for (index, field) in fields.iter().enumerate() { + for field in fields.iter() { if Xor8Filter::is_supported_type(field.data_type()) { // create field for applicable ones - let column_name = Self::build_filter_column(index); + let column_name = Self::build_filter_column(field.name()); let filter_field = DataField::new(&column_name, Vu8::to_data_type()); filter_fields.push(filter_field); @@ -153,11 +153,11 @@ impl BlockFilter { pub fn find( &self, - index: usize, + name: &str, target: DataValue, typ: &DataTypeImpl, ) -> Result { - let filter_column = Self::build_filter_column(index); + let filter_column = Self::build_filter_column(name); if !self.filter_block.schema().has_field(&filter_column) || !Xor8Filter::is_supported_type(typ) || target.is_null() @@ -178,7 +178,7 @@ impl BlockFilter { /// Returns false when the expression must be false, otherwise true. /// The 'true' doesn't really mean the expression is true, but 'maybe true'. /// That is to say, you still need the load all data and run the execution. - pub fn maybe_true(&self, expr: &PhysicalScalar) -> Result { + pub fn maybe_true(&self, expr: &Expression) -> Result { Ok(self.eval(expr)? != FilterEvalResult::False) } @@ -188,10 +188,10 @@ impl BlockFilter { /// /// Otherwise return either Maybe or NotApplicable. #[tracing::instrument(level = "debug", name = "block_filter_index_eval", skip_all)] - pub fn eval(&self, expr: &PhysicalScalar) -> Result { + pub fn eval(&self, expr: &Expression) -> Result { // TODO: support multiple columns and other ops like 'in' ... match expr { - PhysicalScalar::Function { name, args, .. } if args.len() == 2 => { + Expression::Function { name, args, .. } if args.len() == 2 => { match name.to_lowercase().as_str() { "=" => self.eval_equivalent_expression(&args[0], &args[1]), "and" => self.eval_logical_and(&args[0], &args[1]), @@ -206,24 +206,18 @@ impl BlockFilter { // Evaluate the equivalent expression like "name='Alice'" fn eval_equivalent_expression( &self, - left: &PhysicalScalar, - right: &PhysicalScalar, + left: &Expression, + right: &Expression, ) -> Result { let schema: &DataSchemaRef = &self.source_schema; // For now only support single column like "name = 'Alice'" match (left, right) { // match the expression of 'column_name = literal constant' - ( - PhysicalScalar::IndexedVariable { index, .. }, - PhysicalScalar::Constant { value, .. }, - ) - | ( - PhysicalScalar::Constant { value, .. }, - PhysicalScalar::IndexedVariable { index, .. }, - ) => { + (Expression::IndexedVariable { name, .. }, Expression::Constant { value, .. }) + | (Expression::Constant { value, .. }, Expression::IndexedVariable { name, .. }) => { // find the corresponding column from source table - let data_field = schema.field(*index); + let data_field = schema.field_with_name(name)?; let data_type = data_field.data_type(); // check if cast needed @@ -233,18 +227,14 @@ impl BlockFilter { } else { value.clone() }; - self.find(*index, value, data_type) + self.find(name, value, data_type) } _ => Ok(FilterEvalResult::NotApplicable), } } // Evaluate the logical and expression - fn eval_logical_and( - &self, - left: &PhysicalScalar, - right: &PhysicalScalar, - ) -> Result { + fn eval_logical_and(&self, left: &Expression, right: &Expression) -> Result { let left_result = self.eval(left)?; if left_result == FilterEvalResult::False { return Ok(FilterEvalResult::False); @@ -265,11 +255,7 @@ impl BlockFilter { } // Evaluate the logical or expression - fn eval_logical_or( - &self, - left: &PhysicalScalar, - right: &PhysicalScalar, - ) -> Result { + fn eval_logical_or(&self, left: &Expression, right: &Expression) -> Result { let left_result = self.eval(left)?; let right_result = self.eval(right)?; match (&left_result, &right_result) { diff --git a/src/query/storages/index/src/range_filter.rs b/src/query/storages/index/src/range_filter.rs index 041ce6fc504c..dadba392b1c2 100644 --- a/src/query/storages/index/src/range_filter.rs +++ b/src/query/storages/index/src/range_filter.rs @@ -27,12 +27,12 @@ use common_functions::scalars::FunctionContext; use common_functions::scalars::FunctionFactory; use common_functions::scalars::PatternType; use common_fuse_meta::meta::StatisticsOfColumns; -use common_planner::PhysicalScalar; +use common_planner::Expression; use common_planner::RequireColumnsVisitor; use common_sql::evaluator::EvalNode; use common_sql::evaluator::Evaluator; -use common_sql::evaluator::PhysicalScalarMonotonicityVisitor; -use common_sql::evaluator::PhysicalScalarOp; +use common_sql::evaluator::ExpressionMonotonicityVisitor; +use common_sql::executor::ExpressionOp; #[derive(Clone)] pub struct RangeFilter { @@ -46,7 +46,7 @@ pub struct RangeFilter { impl RangeFilter { pub fn try_create( ctx: Arc, - exprs: &[PhysicalScalar], + exprs: &[Expression], schema: DataSchemaRef, ) -> Result { debug_assert!(!exprs.is_empty()); @@ -54,7 +54,7 @@ impl RangeFilter { let verifiable_expr = exprs .iter() - .fold(None, |acc: Option, expr| { + .fold(None, |acc: Option, expr| { let verifiable_expr = build_verifiable_expr(expr, &schema, &mut stat_columns); match acc { Some(acc) => Some(acc.and(&verifiable_expr).unwrap()), @@ -68,8 +68,8 @@ impl RangeFilter { .map(|c| c.stat_field.clone()) .collect::>(); let input_schema = Arc::new(DataSchema::new(input_fields)); + let executor = Evaluator::eval_expression(&verifiable_expr, &input_schema)?; - let executor = Evaluator::eval_physical_scalar(&verifiable_expr)?; let func_ctx = ctx.try_get_function_context()?; Ok(Self { @@ -129,19 +129,19 @@ impl RangeFilter { /// convert expr to Verifiable Expression /// Rules: (section 5.2 of http://vldb.org/pvldb/vol14/p3083-edara.pdf) pub fn build_verifiable_expr( - expr: &PhysicalScalar, + expr: &Expression, schema: &DataSchemaRef, stat_columns: &mut StatColumns, -) -> PhysicalScalar { - let unhandled = PhysicalScalar::Constant { +) -> Expression { + let unhandled = Expression::Constant { value: DataValue::Boolean(true), data_type: bool::to_data_type(), }; // TODO: Try to convert `not(is_not_null)` to `is_null`. let (exprs, op) = match expr { - PhysicalScalar::Constant { .. } => return expr.clone(), - PhysicalScalar::Function { name, args, .. } if args.len() == 2 => { + Expression::Constant { .. } => return expr.clone(), + Expression::Function { name, args, .. } if args.len() == 2 => { let left = &args[0]; let right = &args[1]; match name.to_lowercase().as_str() { @@ -206,7 +206,7 @@ pub struct StatColumn { column_fields: ColumnFields, stat_type: StatType, stat_field: DataField, - expr: PhysicalScalar, + expr: Expression, } impl StatColumn { @@ -214,7 +214,7 @@ impl StatColumn { column_fields: ColumnFields, stat_type: StatType, field: &DataField, - expr: PhysicalScalar, + expr: Expression, ) -> Self { let column_new = format!("{}_{}", stat_type, field.name()); let data_type = if matches!(stat_type, StatType::Nulls | StatType::RowCount) { @@ -270,10 +270,10 @@ impl StatColumn { let max_col = v.data_type().create_constant_column(&stat.max, 1)?; let variable_right = Some(ColumnWithField::new(max_col, v.clone())); - variables.insert(*k, (variable_left, variable_right)); + variables.insert(v.name().clone(), (variable_left, variable_right)); } - let monotonicity = PhysicalScalarMonotonicityVisitor::check_expression( + let monotonicity = ExpressionMonotonicityVisitor::check_expression( schema, &self.expr, variables, @@ -307,14 +307,14 @@ impl StatColumn { struct VerifiableExprBuilder<'a> { op: &'a str, - args: Vec, + args: Vec, fields: Vec<(DataField, ColumnFields)>, stat_columns: &'a mut StatColumns, } impl<'a> VerifiableExprBuilder<'a> { fn try_create( - exprs: Vec, + exprs: Vec, op: &'a str, schema: &'a DataSchemaRef, stat_columns: &'a mut StatColumns, @@ -371,16 +371,13 @@ impl<'a> VerifiableExprBuilder<'a> { let mut fields = Vec::with_capacity(cols.len()); let left_cols = get_column_fields(schema, cols[0].clone())?; - - let left_name = args[0].pretty_display(); - let left_field = DataField::new(&left_name, args[0].data_type()); + let left_field = args[0].to_data_field(); fields.push((left_field, left_cols)); if cols.len() > 1 { let right_cols = get_column_fields(schema, cols[1].clone())?; - let right_name = args[1].pretty_display(); - let right_field = DataField::new(&right_name, args[1].data_type()); + let right_field = args[1].to_data_field(); fields.push((right_field, right_cols)); } @@ -392,13 +389,13 @@ impl<'a> VerifiableExprBuilder<'a> { }) } - fn build(&mut self) -> Result { + fn build(&mut self) -> Result { // TODO: support in/not in. match self.op { "is_null" => { // should_keep: col.null_count > 0 let nulls_expr = self.nulls_column_expr(0)?; - let scalar_expr = PhysicalScalar::Constant { + let scalar_expr = Expression::Constant { value: DataValue::UInt64(0), data_type: u64::to_data_type(), }; @@ -486,7 +483,7 @@ impl<'a> VerifiableExprBuilder<'a> { left_min.lt_eq(&right_max) } "like" => { - if let PhysicalScalar::Constant { + if let Expression::Constant { value: DataValue::String(v), .. } = &self.args[1] @@ -496,7 +493,7 @@ impl<'a> VerifiableExprBuilder<'a> { if !left.is_empty() { let right = right_bound_for_like_pattern(left.clone()); - let left_scalar = PhysicalScalar::Constant { + let left_scalar = Expression::Constant { value: DataValue::String(left), data_type: Vu8::to_data_type(), }; @@ -505,7 +502,7 @@ impl<'a> VerifiableExprBuilder<'a> { if right.is_empty() { return max_expr.gt_eq(&left_scalar); } else { - let right_scalar = PhysicalScalar::Constant { + let right_scalar = Expression::Constant { value: DataValue::String(right), data_type: Vu8::to_data_type(), }; @@ -522,7 +519,7 @@ impl<'a> VerifiableExprBuilder<'a> { )) } "not like" => { - if let PhysicalScalar::Constant { + if let Expression::Constant { value: DataValue::String(v), .. } = &self.args[1] @@ -532,7 +529,7 @@ impl<'a> VerifiableExprBuilder<'a> { // e.g. col not like 'abc' => min_col != 'abc' or max_col != 'abc' PatternType::OrdinalStr => { let const_arg = left_bound_for_like_pattern(v); - let const_arg_scalar = PhysicalScalar::Constant { + let const_arg_scalar = Expression::Constant { value: DataValue::String(const_arg), data_type: Vu8::to_data_type(), }; @@ -550,7 +547,7 @@ impl<'a> VerifiableExprBuilder<'a> { if !left.is_empty() { let right = right_bound_for_like_pattern(left.clone()); - let left_scalar = PhysicalScalar::Constant { + let left_scalar = Expression::Constant { value: DataValue::String(left), data_type: Vu8::to_data_type(), }; @@ -559,7 +556,7 @@ impl<'a> VerifiableExprBuilder<'a> { if right.is_empty() { return min_expr.lt(&left_scalar); } else { - let right_scalar = PhysicalScalar::Constant { + let right_scalar = Expression::Constant { value: DataValue::String(right), data_type: Vu8::to_data_type(), }; @@ -584,7 +581,7 @@ impl<'a> VerifiableExprBuilder<'a> { } } - fn stat_column_expr(&mut self, stat_type: StatType, index: usize) -> Result { + fn stat_column_expr(&mut self, stat_type: StatType, index: usize) -> Result { let (data_field, column_fields) = self.fields[index].clone(); let stat_col = StatColumn::create( column_fields, @@ -593,38 +590,33 @@ impl<'a> VerifiableExprBuilder<'a> { self.args[index].clone(), ); - let column_index = - self.stat_columns.iter().enumerate().find(|&(_, c)| { - c.stat_type == stat_type && c.stat_field.name() == data_field.name() - }); - - let column_index = if let Some((column_index, _)) = column_index { - column_index - } else { + if !self + .stat_columns + .iter() + .any(|c| c.stat_type == stat_type && c.stat_field.name() == data_field.name()) + { self.stat_columns.push(stat_col.clone()); - self.stat_columns.len() - 1 - }; + } - Ok(PhysicalScalar::IndexedVariable { - index: column_index, + Ok(Expression::IndexedVariable { + name: stat_col.stat_field.name().to_string(), data_type: stat_col.stat_field.data_type().clone(), - display_name: stat_col.stat_field.name().to_string(), }) } - fn min_column_expr(&mut self, index: usize) -> Result { + fn min_column_expr(&mut self, index: usize) -> Result { self.stat_column_expr(StatType::Min, index) } - fn max_column_expr(&mut self, index: usize) -> Result { + fn max_column_expr(&mut self, index: usize) -> Result { self.stat_column_expr(StatType::Max, index) } - fn nulls_column_expr(&mut self, index: usize) -> Result { + fn nulls_column_expr(&mut self, index: usize) -> Result { self.stat_column_expr(StatType::Nulls, index) } - fn row_count_column_expr(&mut self, index: usize) -> Result { + fn row_count_column_expr(&mut self, index: usize) -> Result { self.stat_column_expr(StatType::RowCount, index) } } @@ -671,7 +663,7 @@ pub fn right_bound_for_like_pattern(prefix: Vec) -> Vec { res } -fn get_maybe_monotonic(op: &str, args: &Vec) -> Result { +fn get_maybe_monotonic(op: &str, args: &Vec) -> Result { let factory = FunctionFactory::instance(); let function_features = factory.get_features(op)?; if !function_features.maybe_monotonic { @@ -686,20 +678,21 @@ fn get_maybe_monotonic(op: &str, args: &Vec) -> Result { Ok(true) } -pub fn check_maybe_monotonic(expr: &PhysicalScalar) -> Result { +pub fn check_maybe_monotonic(expr: &Expression) -> Result { match expr { - PhysicalScalar::Constant { .. } => Ok(true), - PhysicalScalar::IndexedVariable { .. } => Ok(true), - PhysicalScalar::Function { name, args, .. } => get_maybe_monotonic(name, args), - PhysicalScalar::Cast { input, .. } => check_maybe_monotonic(input.as_ref()), + Expression::Constant { .. } => Ok(true), + Expression::IndexedVariable { .. } => Ok(true), + Expression::Function { name, args, .. } => get_maybe_monotonic(name, args), + Expression::Cast { input, .. } => check_maybe_monotonic(input.as_ref()), } } -fn get_column_fields(schema: &DataSchemaRef, cols: HashSet) -> Result { +fn get_column_fields(schema: &DataSchemaRef, cols: HashSet) -> Result { let mut column_fields = HashMap::with_capacity(cols.len()); for col in &cols { - column_fields.insert(*col, schema.field(*col).clone()); + let index = schema.index_of(col)?; + column_fields.insert(index, schema.field(index).clone()); } Ok(column_fields) } From 97fac585ff8347897056bf7beba385d45de5b9c8 Mon Sep 17 00:00:00 2001 From: sundy-li <543950155@qq.com> Date: Fri, 28 Oct 2022 11:55:26 +0800 Subject: [PATCH 26/47] refactor(query): refactor fuse --- src/query/storages/fuse/src/fuse_table.rs | 7 ++-- .../storages/fuse/src/operations/append.rs | 6 +-- .../storages/fuse/src/operations/delete.rs | 8 ++-- .../src/operations/mutation/block_filter.rs | 4 +- .../storages/fuse/src/operations/read_data.rs | 7 +++- .../storages/fuse/src/operations/recluster.rs | 2 +- src/query/storages/fuse/src/pruning/pruner.rs | 41 ++++++++----------- .../fuse/src/pruning/pruning_executor.rs | 2 +- .../storages/fuse/src/pruning/range_pruner.rs | 2 +- .../storages/fuse/src/pruning/topn_pruner.rs | 29 ++++++++++--- .../clustering_information.rs | 6 +-- .../clustering_informations/table_args.rs | 6 +-- 12 files changed, 67 insertions(+), 53 deletions(-) diff --git a/src/query/storages/fuse/src/fuse_table.rs b/src/query/storages/fuse/src/fuse_table.rs index 8dd98239bd4f..8f0dfbd72676 100644 --- a/src/query/storages/fuse/src/fuse_table.rs +++ b/src/query/storages/fuse/src/fuse_table.rs @@ -37,11 +37,11 @@ use common_meta_app::schema::TableInfo; use common_planner::extras::Extras; use common_planner::extras::Statistics; use common_planner::plans::DeletePlan; -use common_planner::Partitions; use common_planner::Expression; +use common_planner::Partitions; use common_planner::ReadDataSourcePlan; use common_sharing::create_share_table_operator; -use common_sql::PhysicalScalarParser; +use common_sql::ExpressionParser; use common_storage::init_operator; use common_storage::DataOperator; use common_storage::ShareTableConfig; @@ -250,8 +250,7 @@ impl Table for FuseTable { let schema = self.table_info.schema(); let table_meta = Arc::new(self.clone()); if let Some((_, order)) = &self.cluster_key_meta { - let cluster_keys = - PhysicalScalarParser::parse_exprs(schema, table_meta, order).unwrap(); + let cluster_keys = ExpressionParser::parse_exprs(schema, table_meta, order).unwrap(); return cluster_keys; } vec![] diff --git a/src/query/storages/fuse/src/operations/append.rs b/src/query/storages/fuse/src/operations/append.rs index 1d3d2a07efda..ab3cec3e14b9 100644 --- a/src/query/storages/fuse/src/operations/append.rs +++ b/src/query/storages/fuse/src/operations/append.rs @@ -63,7 +63,7 @@ impl FuseTable { .iter() .map(|expr| SortColumnDescription { // todo(sundy): use index instead - column_name: expr.pretty_display(), + column_name: expr.column_name(), asc: true, nulls_first: false, }) @@ -128,13 +128,13 @@ impl FuseTable { let mut operators = Vec::with_capacity(cluster_keys.len()); for expr in &cluster_keys { - let cname = expr.pretty_display(); + let cname = expr.column_name(); let index = match merged.iter().position(|x| x.name() == &cname) { None => { let field = DataField::new(&cname, expr.data_type()); operators.push(ChunkOperator::Map { - eval: Evaluator::eval_physical_scalar(expr)?, + eval: Evaluator::eval_expression(expr, &input_schema)?, name: field.name().to_string(), }); extra_key_index.push(merged.len() - 1); diff --git a/src/query/storages/fuse/src/operations/delete.rs b/src/query/storages/fuse/src/operations/delete.rs index 77c84e1f87d2..7903699e474b 100644 --- a/src/query/storages/fuse/src/operations/delete.rs +++ b/src/query/storages/fuse/src/operations/delete.rs @@ -23,7 +23,7 @@ use common_fuse_meta::meta::TableSnapshot; use common_planner::extras::Extras; use common_planner::plans::DeletePlan; use common_planner::Expression; -use common_sql::PhysicalScalarParser; +use common_sql::ExpressionParser; use tracing::debug; use crate::operations::mutation::delete_from_block; @@ -54,7 +54,7 @@ impl FuseTable { if let Some(filter) = &plan.selection { let table_meta = Arc::new(self.clone()); let physical_scalars = - PhysicalScalarParser::parse_exprs(plan.schema(), table_meta, filter)?; + ExpressionParser::parse_exprs(plan.schema(), table_meta, filter)?; if physical_scalars.is_empty() { return Err(ErrorCode::IndexOutOfBounds( "expression should be valid, but not", @@ -77,7 +77,7 @@ impl FuseTable { &self, ctx: Arc, snapshot: &Arc, - filter: &PhysicalScalar, + filter: &Expression, plan: &DeletePlan, ) -> Result<()> { let cluster_stats_gen = self.cluster_stats_gen()?; @@ -160,7 +160,7 @@ impl FuseTable { let mut cluster_key_index = Vec::with_capacity(cluster_keys.len()); let mut extra_key_index = Vec::with_capacity(cluster_keys.len()); for expr in &cluster_keys { - let cname = expr.pretty_display(); + let cname = expr.column_name(); let index = match merged.iter().position(|x| x.name() == &cname) { None => { let field = DataField::new(&cname, expr.data_type()); diff --git a/src/query/storages/fuse/src/operations/mutation/block_filter.rs b/src/query/storages/fuse/src/operations/mutation/block_filter.rs index 3e2a7b87a478..52b437d0f0db 100644 --- a/src/query/storages/fuse/src/operations/mutation/block_filter.rs +++ b/src/query/storages/fuse/src/operations/mutation/block_filter.rs @@ -33,7 +33,7 @@ pub async fn delete_from_block( block_meta: &BlockMeta, ctx: &Arc, filter_column_proj: Projection, - filter_expr: &PhysicalScalar, + filter_expr: &Expression, ) -> Result { let mut filtering_whole_block = false; @@ -61,7 +61,7 @@ pub async fn delete_from_block( let reader = table.create_block_reader(proj)?; let data_block = reader.read_with_block_meta(block_meta).await?; - let eval_node = Evaluator::eval_physical_scalar(filter_expr)?; + let eval_node = Evaluator::eval_expression(filter_expr, data_block.schema().as_ref())?; let filter_result = eval_node .eval(&ctx.try_get_function_context()?, &data_block)? .vector; diff --git a/src/query/storages/fuse/src/operations/read_data.rs b/src/query/storages/fuse/src/operations/read_data.rs index f42ed959bb2b..fa16566da214 100644 --- a/src/query/storages/fuse/src/operations/read_data.rs +++ b/src/query/storages/fuse/src/operations/read_data.rs @@ -21,6 +21,7 @@ use common_base::base::Runtime; use common_catalog::table_context::TableContext; use common_datablocks::DataBlock; use common_datavalues::ColumnRef; +use common_datavalues::DataSchemaRef; use common_exception::ErrorCode; use common_exception::Result; use common_functions::scalars::FunctionContext; @@ -99,11 +100,12 @@ impl FuseTable { &self, _ctx: Arc, plan: &ReadDataSourcePlan, + schema: DataSchemaRef, ) -> Result>> { Ok(match self.prewhere_of_push_downs(&plan.push_downs) { None => Arc::new(None), Some(v) => { - let executor = Evaluator::eval_physical_scalar(&v.filter)?; + let executor = Evaluator::eval_expression(&v.filter, schema.as_ref())?; Arc::new(Some(executor)) } }) @@ -178,7 +180,8 @@ impl FuseTable { let block_reader = self.build_block_reader(plan)?; let prewhere_reader = self.build_prewhere_reader(plan)?; - let prewhere_filter = self.build_prewhere_filter_executor(ctx.clone(), plan)?; + let prewhere_filter = + self.build_prewhere_filter_executor(ctx.clone(), plan, prewhere_reader.schema())?; let remain_reader = self.build_remain_reader(plan)?; info!("read block data adjust max io requests:{}", max_io_requests); diff --git a/src/query/storages/fuse/src/operations/recluster.rs b/src/query/storages/fuse/src/operations/recluster.rs index 4344c92b783f..eb1f87de2f6f 100644 --- a/src/query/storages/fuse/src/operations/recluster.rs +++ b/src/query/storages/fuse/src/operations/recluster.rs @@ -149,7 +149,7 @@ impl FuseTable { .cluster_keys() .iter() .map(|expr| SortColumnDescription { - column_name: expr.pretty_display(), + column_name: expr.column_name(), asc: true, nulls_first: false, }) diff --git a/src/query/storages/fuse/src/pruning/pruner.rs b/src/query/storages/fuse/src/pruning/pruner.rs index feb958da37c1..329828f8a482 100644 --- a/src/query/storages/fuse/src/pruning/pruner.rs +++ b/src/query/storages/fuse/src/pruning/pruner.rs @@ -39,7 +39,7 @@ struct FilterPruner { index_columns: Vec, /// the expression that would be evaluate - filter_expression: PhysicalScalar, + filter_expression: Expression, /// the data accessor dal: Operator, @@ -52,7 +52,7 @@ impl FilterPruner { pub fn new( ctx: Arc, index_columns: Vec, - filter_expression: PhysicalScalar, + filter_expression: Expression, dal: Operator, data_schema: DataSchemaRef, ) -> Self { @@ -103,7 +103,7 @@ impl Pruner for FilterPruner { /// otherwise, a [Filter] backed pruner will be return pub fn new_filter_pruner( ctx: &Arc, - filter_exprs: Option<&[PhysicalScalar]>, + filter_exprs: Option<&[Expression]>, schema: &DataSchemaRef, dal: Operator, ) -> Result>> { @@ -114,18 +114,18 @@ pub fn new_filter_pruner( // check if there were applicable filter conditions let expr = exprs .iter() - .fold(None, |acc: Option, item| match acc { + .fold(None, |acc: Option, item| match acc { Some(acc) => Some(acc.and(item).unwrap()), None => Some(item.clone()), }) .unwrap(); - let point_query_cols = columns_indices_of_eq_expressions(&expr)?; + let point_query_cols = columns_of_eq_expressions(&expr)?; if !point_query_cols.is_empty() { // convert to filter column names let filter_block_cols = point_query_cols - .into_iter() - .map(BlockFilter::build_filter_column) + .iter() + .map(|c| BlockFilter::build_filter_column(c.as_str())) .collect(); return Ok(Some(Arc::new(FilterPruner::new( @@ -144,7 +144,7 @@ pub fn new_filter_pruner( mod util { use common_exception::ErrorCode; - use common_planner::PhysicalScalarVisitor; + use common_planner::ExpressionVisitor; use super::*; #[tracing::instrument(level = "debug", skip_all)] @@ -152,7 +152,7 @@ mod util { ctx: Arc, dal: Operator, schema: &DataSchemaRef, - filter_expr: &PhysicalScalar, + filter_expr: &Expression, filter_col_names: &[String], index_location: &Location, index_length: u64, @@ -178,27 +178,22 @@ mod util { struct PointQueryVisitor { // indices of columns which used by point query kept here - columns: HashSet, + columns: HashSet, } - impl PhysicalScalarVisitor for PointQueryVisitor { - fn pre_visit(mut self, expr: &PhysicalScalar) -> Result> { + impl ExpressionVisitor for PointQueryVisitor { + fn pre_visit(mut self, expr: &Expression) -> Result> { // 1. only binary op "=" is considered, which is NOT enough // 2. should combine this logic with Filter match expr { - PhysicalScalar::Function { name, args, .. } + Expression::Function { name, args, .. } if name.as_str() == "=" && args.len() == 2 => { match (&args[0], &args[1]) { - ( - PhysicalScalar::IndexedVariable { index, .. }, - PhysicalScalar::Constant { .. }, - ) - | ( - PhysicalScalar::Constant { .. }, - PhysicalScalar::IndexedVariable { index, .. }, - ) => { - self.columns.insert(*index); + (Expression::IndexedVariable { name, .. }, Expression::Constant { .. }) + | (Expression::Constant { .. }, Expression::IndexedVariable { name, .. }) => + { + self.columns.insert(name.clone()); Ok(common_planner::Recursion::Stop(self)) } _ => Ok(common_planner::Recursion::Continue(self)), @@ -209,7 +204,7 @@ mod util { } } - pub fn columns_indices_of_eq_expressions(filter_expr: &PhysicalScalar) -> Result> { + pub fn columns_of_eq_expressions(filter_expr: &Expression) -> Result> { let visitor = PointQueryVisitor { columns: HashSet::new(), }; diff --git a/src/query/storages/fuse/src/pruning/pruning_executor.rs b/src/query/storages/fuse/src/pruning/pruning_executor.rs index ae0aeb0fbb40..d25e3eb79399 100644 --- a/src/query/storages/fuse/src/pruning/pruning_executor.rs +++ b/src/query/storages/fuse/src/pruning/pruning_executor.rs @@ -157,7 +157,7 @@ impl BlockPruner { let push_down = push_down.as_ref().unwrap(); let limit = push_down.limit.unwrap(); let sort = push_down.order_by.clone(); - let tpruner = topn_pruner::TopNPrunner::new(sort, limit); + let tpruner = topn_pruner::TopNPrunner::new(schema, sort, limit); return tpruner.prune(metas); } diff --git a/src/query/storages/fuse/src/pruning/range_pruner.rs b/src/query/storages/fuse/src/pruning/range_pruner.rs index 73042955a266..fdaceddfc33e 100644 --- a/src/query/storages/fuse/src/pruning/range_pruner.rs +++ b/src/query/storages/fuse/src/pruning/range_pruner.rs @@ -57,7 +57,7 @@ impl RangePruner for RangeFilter { pub fn new_range_pruner<'a>( ctx: &Arc, - filter_expr: Option<&'a [PhysicalScalar]>, + filter_expr: Option<&'a [Expression]>, schema: &'a DataSchemaRef, ) -> Result> { Ok(match filter_expr { diff --git a/src/query/storages/fuse/src/pruning/topn_pruner.rs b/src/query/storages/fuse/src/pruning/topn_pruner.rs index b72bdbc8ca57..7fb7640602cb 100644 --- a/src/query/storages/fuse/src/pruning/topn_pruner.rs +++ b/src/query/storages/fuse/src/pruning/topn_pruner.rs @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +use common_datavalues::DataSchemaRef; use common_exception::ErrorCode; use common_exception::Result; use common_fuse_meta::meta::BlockMeta; @@ -19,13 +20,22 @@ use common_fuse_meta::meta::ColumnStatistics; use common_planner::Expression; pub(crate) struct TopNPrunner { - sort: Vec<(PhysicalScalar, bool, bool)>, + schema: DataSchemaRef, + sort: Vec<(Expression, bool, bool)>, limit: usize, } impl TopNPrunner { - pub(crate) fn new(sort: Vec<(PhysicalScalar, bool, bool)>, limit: usize) -> Self { - Self { sort, limit } + pub(crate) fn new( + schema: DataSchemaRef, + sort: Vec<(Expression, bool, bool)>, + limit: usize, + ) -> Self { + Self { + schema, + sort, + limit, + } } } @@ -42,8 +52,17 @@ impl TopNPrunner { let (sort, asc, nulls_first) = &self.sort[0]; // Currently, we only support topn on single-column sort. // TODO: support monadic + multi expression + order by cluster key sort. - let sort_idx = if let PhysicalScalar::IndexedVariable { index, .. } = sort { - *index as u32 + + // Currently, we only support topn on single-column sort. + // TODO: support monadic + multi expression + order by cluster key sort. + let column = if let Expression::IndexedVariable { name, .. } = sort { + name + } else { + return Ok(metas); + }; + + let sort_idx = if let Ok(index) = self.schema.index_of(column.as_str()) { + index as u32 } else { return Ok(metas); }; diff --git a/src/query/storages/fuse/src/table_functions/clustering_informations/clustering_information.rs b/src/query/storages/fuse/src/table_functions/clustering_informations/clustering_information.rs index 2ffe36c988ae..f4f65d781fb3 100644 --- a/src/query/storages/fuse/src/table_functions/clustering_informations/clustering_information.rs +++ b/src/query/storages/fuse/src/table_functions/clustering_informations/clustering_information.rs @@ -33,7 +33,7 @@ use crate::Table; pub struct ClusteringInformation<'a> { pub ctx: Arc, pub table: &'a FuseTable, - pub cluster_keys: Vec, + pub cluster_keys: Vec, } struct ClusteringStatistics { @@ -48,7 +48,7 @@ impl<'a> ClusteringInformation<'a> { pub fn new( ctx: Arc, table: &'a FuseTable, - cluster_keys: Vec, + cluster_keys: Vec, ) -> Self { Self { ctx, @@ -75,7 +75,7 @@ impl<'a> ClusteringInformation<'a> { let names = self .cluster_keys .iter() - .map(|x| x.pretty_display()) + .map(|x| x.column_name()) .collect::>() .join(", "); let cluster_by_keys = format!("({})", names); diff --git a/src/query/storages/fuse/src/table_functions/clustering_informations/table_args.rs b/src/query/storages/fuse/src/table_functions/clustering_informations/table_args.rs index ec62c6b8e092..2c71a46adbc4 100644 --- a/src/query/storages/fuse/src/table_functions/clustering_informations/table_args.rs +++ b/src/query/storages/fuse/src/table_functions/clustering_informations/table_args.rs @@ -18,7 +18,7 @@ use common_catalog::table::Table; use common_exception::ErrorCode; use common_exception::Result; use common_planner::Expression; -use common_sql::PhysicalScalarParser; +use common_sql::ExpressionParser; use crate::table_functions::string_value; use crate::table_functions::TableArgs; @@ -42,7 +42,7 @@ pub fn parse_func_table_args(table_args: &TableArgs) -> Result<(String, String)> pub fn get_cluster_keys(table: &FuseTable, definition: &str) -> Result> { let cluster_keys = if !definition.is_empty() { let table_meta = Arc::new(table.clone()); - PhysicalScalarParser::parse_exprs(table.schema(), table_meta, definition)? + ExpressionParser::parse_exprs(table.schema(), table_meta, definition)? } else { table.cluster_keys() }; @@ -54,7 +54,5 @@ pub fn get_cluster_keys(table: &FuseTable, definition: &str) -> Result Date: Fri, 28 Oct 2022 12:12:05 +0800 Subject: [PATCH 27/47] refactor(query): refactor services --- .../service/src/api/rpc/exchange/data_exchange.rs | 2 +- src/query/service/src/api/rpc/flight_scatter_hash.rs | 2 +- .../service/src/api/rpc/flight_scatter_hash_v2.rs | 2 +- .../src/interpreters/interpreter_table_describe.rs | 2 +- .../src/interpreters/interpreter_table_recluster.rs | 10 +++++----- .../src/interpreters/interpreter_table_show_create.rs | 2 +- src/query/service/src/pipelines/pipeline_builder.rs | 4 ++-- .../pipelines/processors/transforms/hash_join/desc.rs | 2 +- .../processors/transforms/hash_join/join_hash_table.rs | 2 +- src/query/sql/src/planner/expression_parser.rs | 6 +----- src/query/storages/fuse/src/fuse_table.rs | 3 +-- src/query/storages/fuse/src/operations/delete.rs | 3 +-- .../clustering_informations/table_args.rs | 2 +- 13 files changed, 18 insertions(+), 24 deletions(-) diff --git a/src/query/service/src/api/rpc/exchange/data_exchange.rs b/src/query/service/src/api/rpc/exchange/data_exchange.rs index e52a19bd41f6..a48ba39dabe4 100644 --- a/src/query/service/src/api/rpc/exchange/data_exchange.rs +++ b/src/query/service/src/api/rpc/exchange/data_exchange.rs @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -use common_planner::PhysicalScalar; +use common_sql::executor::PhysicalScalar; #[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] pub enum DataExchange { diff --git a/src/query/service/src/api/rpc/flight_scatter_hash.rs b/src/query/service/src/api/rpc/flight_scatter_hash.rs index f47bff90059d..b0dd73ef119a 100644 --- a/src/query/service/src/api/rpc/flight_scatter_hash.rs +++ b/src/query/service/src/api/rpc/flight_scatter_hash.rs @@ -19,9 +19,9 @@ use common_datavalues::prelude::*; use common_exception::ErrorCode; use common_exception::Result; use common_functions::scalars::FunctionContext; -use common_planner::PhysicalScalar; use common_sql::evaluator::EvalNode; use common_sql::evaluator::Evaluator; +use common_sql::executor::PhysicalScalar; use crate::api::rpc::flight_scatter::FlightScatter; use crate::sessions::QueryContext; diff --git a/src/query/service/src/api/rpc/flight_scatter_hash_v2.rs b/src/query/service/src/api/rpc/flight_scatter_hash_v2.rs index a10e5e3a24e1..528e61d6af34 100644 --- a/src/query/service/src/api/rpc/flight_scatter_hash_v2.rs +++ b/src/query/service/src/api/rpc/flight_scatter_hash_v2.rs @@ -22,7 +22,7 @@ use common_exception::Result; use common_functions::scalars::Function; use common_functions::scalars::FunctionContext; use common_functions::scalars::FunctionFactory; -use common_planner::PhysicalScalar; +use common_sql::executor::PhysicalScalar; use crate::api::rpc::flight_scatter::FlightScatter; use crate::sql::evaluator::EvalNode; diff --git a/src/query/service/src/interpreters/interpreter_table_describe.rs b/src/query/service/src/interpreters/interpreter_table_describe.rs index 5bb50d0c17b5..bb44b36a0cd3 100644 --- a/src/query/service/src/interpreters/interpreter_table_describe.rs +++ b/src/query/service/src/interpreters/interpreter_table_describe.rs @@ -19,7 +19,7 @@ use common_datavalues::prelude::*; use common_exception::ErrorCode; use common_exception::Result; use common_planner::plans::DescribeTablePlan; -use common_planner::PhysicalScalar; +use common_sql::executor::PhysicalScalar; use crate::interpreters::Interpreter; use crate::pipelines::PipelineBuildResult; diff --git a/src/query/service/src/interpreters/interpreter_table_recluster.rs b/src/query/service/src/interpreters/interpreter_table_recluster.rs index d2f7e4ec4f38..860fc465df08 100644 --- a/src/query/service/src/interpreters/interpreter_table_recluster.rs +++ b/src/query/service/src/interpreters/interpreter_table_recluster.rs @@ -17,6 +17,7 @@ use std::time::SystemTime; use common_exception::Result; use common_planner::extras::Extras; +use common_sql::executor::ExpressionBuilderWithoutRenaming; use crate::interpreters::Interpreter; use crate::interpreters::InterpreterClusteringHistory; @@ -26,7 +27,6 @@ use crate::pipelines::Pipeline; use crate::pipelines::PipelineBuildResult; use crate::sessions::QueryContext; use crate::sessions::TableContext; -use crate::sql::executor::PhysicalScalarBuilder; use crate::sql::plans::ReclusterTablePlan; pub struct ReclusterTableInterpreter { @@ -57,15 +57,15 @@ impl Interpreter for ReclusterTableInterpreter { let extras = match &plan.push_downs { None => None, Some(scalar) => { - let schema = self.plan.schema(); - let mut builder = PhysicalScalarBuilder::new(&schema); - let physical_scalar = builder.build(scalar)?; + let eb = ExpressionBuilderWithoutRenaming::create(plan.metadata.clone()); + let pred_expr = eb.build(scalar)?; Some(Extras { - filters: vec![physical_scalar], + filters: vec![pred_expr], ..Extras::default() }) } }; + loop { let table = self .ctx diff --git a/src/query/service/src/interpreters/interpreter_table_show_create.rs b/src/query/service/src/interpreters/interpreter_table_show_create.rs index 557ce842667e..9000bf203897 100644 --- a/src/query/service/src/interpreters/interpreter_table_show_create.rs +++ b/src/query/service/src/interpreters/interpreter_table_show_create.rs @@ -18,7 +18,7 @@ use common_datablocks::DataBlock; use common_datavalues::prelude::*; use common_exception::Result; use common_planner::plans::ShowCreateTablePlan; -use common_planner::PhysicalScalar; +use common_sql::executor::PhysicalScalar; use tracing::debug; use crate::interpreters::Interpreter; diff --git a/src/query/service/src/pipelines/pipeline_builder.rs b/src/query/service/src/pipelines/pipeline_builder.rs index ba2fcb7842a0..366e79ec55bd 100644 --- a/src/query/service/src/pipelines/pipeline_builder.rs +++ b/src/query/service/src/pipelines/pipeline_builder.rs @@ -30,10 +30,10 @@ use common_functions::scalars::FunctionContext; use common_functions::scalars::FunctionFactory; use common_pipeline_core::Pipe; use common_pipeline_sinks::processors::sinks::UnionReceiveSink; -use common_planner::AggregateFunctionDesc; -use common_planner::PhysicalScalar; use common_sql::evaluator::ChunkOperator; use common_sql::evaluator::CompoundChunkOperator; +use common_sql::executor::AggregateFunctionDesc; +use common_sql::executor::PhysicalScalar; use crate::interpreters::fill_missing_columns; use crate::pipelines::processors::port::InputPort; diff --git a/src/query/service/src/pipelines/processors/transforms/hash_join/desc.rs b/src/query/service/src/pipelines/processors/transforms/hash_join/desc.rs index ed769af2f4a0..013fa2ab5b6c 100644 --- a/src/query/service/src/pipelines/processors/transforms/hash_join/desc.rs +++ b/src/query/service/src/pipelines/processors/transforms/hash_join/desc.rs @@ -17,7 +17,7 @@ use std::sync::Arc; use common_catalog::table_context::TableContext; use common_exception::Result; use common_functions::scalars::FunctionFactory; -use common_planner::PhysicalScalar; +use common_sql::executor::PhysicalScalar; use common_sql::IndexType; use parking_lot::RwLock; diff --git a/src/query/service/src/pipelines/processors/transforms/hash_join/join_hash_table.rs b/src/query/service/src/pipelines/processors/transforms/hash_join/join_hash_table.rs index d420fcd05978..6e06d5e0bec2 100644 --- a/src/query/service/src/pipelines/processors/transforms/hash_join/join_hash_table.rs +++ b/src/query/service/src/pipelines/processors/transforms/hash_join/join_hash_table.rs @@ -31,7 +31,7 @@ use common_exception::Result; use common_hashtable::HashMap; use common_hashtable::HashtableKeyable; use common_hashtable::UnsizedHashMap; -use common_planner::PhysicalScalar; +use common_sql::executor::PhysicalScalar; use parking_lot::RwLock; use primitive_types::U256; use primitive_types::U512; diff --git a/src/query/sql/src/planner/expression_parser.rs b/src/query/sql/src/planner/expression_parser.rs index 12809c0300ff..9cd2dc77d35d 100644 --- a/src/query/sql/src/planner/expression_parser.rs +++ b/src/query/sql/src/planner/expression_parser.rs @@ -37,11 +37,7 @@ use crate::Visibility; pub struct ExpressionParser; impl ExpressionParser { - pub fn parse_exprs( - schema: DataSchemaRef, - table_meta: Arc, - sql: &str, - ) -> Result> { + pub fn parse_exprs(table_meta: Arc, sql: &str) -> Result> { let sql_dialect = Dialect::MySQL; let tokens = tokenize_sql(sql)?; let backtrace = Backtrace::new(); diff --git a/src/query/storages/fuse/src/fuse_table.rs b/src/query/storages/fuse/src/fuse_table.rs index 8f0dfbd72676..a17d5efab4af 100644 --- a/src/query/storages/fuse/src/fuse_table.rs +++ b/src/query/storages/fuse/src/fuse_table.rs @@ -247,10 +247,9 @@ impl Table for FuseTable { } fn cluster_keys(&self) -> Vec { - let schema = self.table_info.schema(); let table_meta = Arc::new(self.clone()); if let Some((_, order)) = &self.cluster_key_meta { - let cluster_keys = ExpressionParser::parse_exprs(schema, table_meta, order).unwrap(); + let cluster_keys = ExpressionParser::parse_exprs(table_meta, order).unwrap(); return cluster_keys; } vec![] diff --git a/src/query/storages/fuse/src/operations/delete.rs b/src/query/storages/fuse/src/operations/delete.rs index 7903699e474b..863d799bbae2 100644 --- a/src/query/storages/fuse/src/operations/delete.rs +++ b/src/query/storages/fuse/src/operations/delete.rs @@ -53,8 +53,7 @@ impl FuseTable { // check if unconditional deletion if let Some(filter) = &plan.selection { let table_meta = Arc::new(self.clone()); - let physical_scalars = - ExpressionParser::parse_exprs(plan.schema(), table_meta, filter)?; + let physical_scalars = ExpressionParser::parse_exprs(table_meta, filter)?; if physical_scalars.is_empty() { return Err(ErrorCode::IndexOutOfBounds( "expression should be valid, but not", diff --git a/src/query/storages/fuse/src/table_functions/clustering_informations/table_args.rs b/src/query/storages/fuse/src/table_functions/clustering_informations/table_args.rs index 2c71a46adbc4..8f9aef4348b3 100644 --- a/src/query/storages/fuse/src/table_functions/clustering_informations/table_args.rs +++ b/src/query/storages/fuse/src/table_functions/clustering_informations/table_args.rs @@ -42,7 +42,7 @@ pub fn parse_func_table_args(table_args: &TableArgs) -> Result<(String, String)> pub fn get_cluster_keys(table: &FuseTable, definition: &str) -> Result> { let cluster_keys = if !definition.is_empty() { let table_meta = Arc::new(table.clone()); - ExpressionParser::parse_exprs(table.schema(), table_meta, definition)? + ExpressionParser::parse_exprs(table_meta, definition)? } else { table.cluster_keys() }; From 16d6ee85413436696f6fe1c416a84622aa93b181 Mon Sep 17 00:00:00 2001 From: baishen Date: Fri, 28 Oct 2022 12:44:48 +0800 Subject: [PATCH 28/47] merge --- src/query/sql/src/executor/physical_scalar.rs | 62 ++++++++++++++++--- .../hive/src/hive_partition_pruner.rs | 6 +- src/query/storages/hive/src/hive_table.rs | 6 +- 3 files changed, 58 insertions(+), 16 deletions(-) diff --git a/src/query/sql/src/executor/physical_scalar.rs b/src/query/sql/src/executor/physical_scalar.rs index cf49f0049380..e7dd5655cbf1 100644 --- a/src/query/sql/src/executor/physical_scalar.rs +++ b/src/query/sql/src/executor/physical_scalar.rs @@ -81,34 +81,76 @@ impl PhysicalScalar { } } - // todo(sundy) pub fn from_expression(expression: &Expression, schema: &DataSchema) -> Result { match expression { - Expression::IndexedVariable { name, data_type } => todo!(), - Expression::Constant { value, data_type } => todo!(), + Expression::IndexedVariable { name, data_type } => { + Ok(PhysicalScalar::IndexedVariable { + index: schema.index_of(name)?, + display_name: name.to_string(), + data_type: data_type.clone(), + }) + } + Expression::Constant { value, data_type } => Ok(PhysicalScalar::Constant { + value: value.clone(), + data_type: data_type.clone(), + }), Expression::Function { name, args, return_type, - } => todo!(), - Expression::Cast { input, target } => todo!(), + } => { + let mut new_args = Vec::with_capacity(args.len()); + for arg in args.iter() { + let new_arg = Self::from_expression(arg, schema)?; + new_args.push(new_arg); + } + Ok(PhysicalScalar::Function { + name: name.to_string(), + args: new_args, + return_type: return_type.clone(), + }) + } + Expression::Cast { input, target } => Ok(PhysicalScalar::Cast { + input: Box::new(Self::from_expression(input, schema)?), + target: target.clone(), + }), } } pub fn to_expression(&self, schema: &DataSchema) -> Result { match self { PhysicalScalar::IndexedVariable { - index, data_type, display_name, - } => todo!(), - PhysicalScalar::Constant { value, data_type } => todo!(), + .. + } => Ok(Expression::IndexedVariable { + data_type: data_type.clone(), + name: display_name.to_string(), + }), + PhysicalScalar::Constant { value, data_type } => Ok(Expression::Constant { + value: value.clone(), + data_type: data_type.clone(), + }), PhysicalScalar::Function { name, args, return_type, - } => todo!(), - PhysicalScalar::Cast { input, target } => todo!(), + } => { + let mut new_args = Vec::with_capacity(args.len()); + for arg in args.iter() { + let new_arg = arg.to_expression(schema)?; + new_args.push(new_arg); + } + Ok(Expression::Function { + name: name.to_string(), + args: new_args, + return_type: return_type.clone(), + }) + } + PhysicalScalar::Cast { input, target } => Ok(Expression::Cast { + input: Box::new(input.to_expression(schema)?), + target: target.clone(), + }), } } } diff --git a/src/query/storages/hive/src/hive_partition_pruner.rs b/src/query/storages/hive/src/hive_partition_pruner.rs index d4b8d8f85425..6d4c6f307289 100644 --- a/src/query/storages/hive/src/hive_partition_pruner.rs +++ b/src/query/storages/hive/src/hive_partition_pruner.rs @@ -24,12 +24,12 @@ use common_exception::ErrorCode; use common_exception::Result; use common_fuse_meta::meta::ColumnStatistics; use common_fuse_meta::meta::StatisticsOfColumns; -use common_planner::PhysicalScalar; +use common_planner::Expression; use common_storages_index::range_filter::RangeFilter; pub struct HivePartitionPruner { pub ctx: Arc, - pub filters: Vec, + pub filters: Vec, // pub partitions: Vec, pub partition_schema: Arc, } @@ -37,7 +37,7 @@ pub struct HivePartitionPruner { impl HivePartitionPruner { pub fn create( ctx: Arc, - filters: Vec, + filters: Vec, partition_schema: Arc, ) -> Self { HivePartitionPruner { diff --git a/src/query/storages/hive/src/hive_table.rs b/src/query/storages/hive/src/hive_table.rs index c74493136643..98ed777584d4 100644 --- a/src/query/storages/hive/src/hive_table.rs +++ b/src/query/storages/hive/src/hive_table.rs @@ -39,8 +39,8 @@ use common_pipeline_sources::processors::sources::sync_source::SyncSourcer; use common_planner::extras::Extras; use common_planner::extras::Statistics; use common_planner::plans::Projection; +use common_planner::Expression; use common_planner::Partitions; -use common_planner::PhysicalScalar; use common_planner::ReadDataSourcePlan; use common_planner::RequireColumnsVisitor; use common_storage::init_operator; @@ -229,7 +229,7 @@ impl HiveTable { } fn get_columns_from_expressions( - expressions: &[PhysicalScalar], + expressions: &[Expression], schema: &DataSchemaRef, ) -> HashSet { let result = expressions @@ -302,7 +302,7 @@ impl HiveTable { &self, ctx: Arc, partition_keys: Vec, - filter_expressions: Vec, + filter_expressions: Vec, ) -> Result)>> { let hive_catalog = ctx.get_catalog(CATALOG_HIVE)?; let hive_catalog = hive_catalog.as_any().downcast_ref::().unwrap(); From 923358eeb97dd9e56ad8b2b66382176ede005287 Mon Sep 17 00:00:00 2001 From: sundy-li <543950155@qq.com> Date: Fri, 28 Oct 2022 13:00:44 +0800 Subject: [PATCH 29/47] refactor(query): refactor --- src/query/sql/src/evaluator/physical_scalar.rs | 2 +- .../sql/src/executor/physical_plan_builder.rs | 2 +- src/query/sql/src/executor/physical_scalar.rs | 15 ++++++++------- src/query/sql/src/planner/expression_parser.rs | 1 - 4 files changed, 10 insertions(+), 10 deletions(-) diff --git a/src/query/sql/src/evaluator/physical_scalar.rs b/src/query/sql/src/evaluator/physical_scalar.rs index c6ba756cac3e..cbc60d7df308 100644 --- a/src/query/sql/src/evaluator/physical_scalar.rs +++ b/src/query/sql/src/evaluator/physical_scalar.rs @@ -29,7 +29,7 @@ use crate::executor::PhysicalScalar; impl Evaluator { pub fn eval_expression(expression: &Expression, schema: &DataSchema) -> Result { - let physical_scalar = PhysicalScalar::from_expression(&expression, schema)?; + let physical_scalar = PhysicalScalar::from_expression(expression, schema)?; Self::eval_physical_scalar(&physical_scalar) } diff --git a/src/query/sql/src/executor/physical_plan_builder.rs b/src/query/sql/src/executor/physical_plan_builder.rs index 37181e7e24f2..f07cb5a56273 100644 --- a/src/query/sql/src/executor/physical_plan_builder.rs +++ b/src/query/sql/src/executor/physical_plan_builder.rs @@ -519,7 +519,7 @@ impl PhysicalPlanBuilder { }); assert!( - predicate.is_none(), + predicate.is_some(), "There should be at least one predicate in prewhere" ); diff --git a/src/query/sql/src/executor/physical_scalar.rs b/src/query/sql/src/executor/physical_scalar.rs index e7dd5655cbf1..5f2209cb15c8 100644 --- a/src/query/sql/src/executor/physical_scalar.rs +++ b/src/query/sql/src/executor/physical_scalar.rs @@ -120,13 +120,14 @@ impl PhysicalScalar { pub fn to_expression(&self, schema: &DataSchema) -> Result { match self { PhysicalScalar::IndexedVariable { - data_type, - display_name, - .. - } => Ok(Expression::IndexedVariable { - data_type: data_type.clone(), - name: display_name.to_string(), - }), + index, data_type, .. + } => { + let name = schema.field(*index); + Ok(Expression::IndexedVariable { + data_type: data_type.clone(), + name: name.name().clone(), + }) + } PhysicalScalar::Constant { value, data_type } => Ok(Expression::Constant { value: value.clone(), data_type: data_type.clone(), diff --git a/src/query/sql/src/planner/expression_parser.rs b/src/query/sql/src/planner/expression_parser.rs index 9cd2dc77d35d..8637c95431e9 100644 --- a/src/query/sql/src/planner/expression_parser.rs +++ b/src/query/sql/src/planner/expression_parser.rs @@ -20,7 +20,6 @@ use common_ast::Backtrace; use common_ast::Dialect; use common_catalog::catalog::CATALOG_DEFAULT; use common_catalog::table::Table; -use common_datavalues::DataSchemaRef; use common_exception::Result; use common_planner::Expression; use common_settings::Settings; From a60ede8047f6fffb8a2ea7c755a3b1d65cbd61c0 Mon Sep 17 00:00:00 2001 From: sundy-li <543950155@qq.com> Date: Fri, 28 Oct 2022 13:04:40 +0800 Subject: [PATCH 30/47] refactor(query): refactor --- src/query/storages/hive/src/hive_table.rs | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) diff --git a/src/query/storages/hive/src/hive_table.rs b/src/query/storages/hive/src/hive_table.rs index 98ed777584d4..52eb35a7cf9b 100644 --- a/src/query/storages/hive/src/hive_table.rs +++ b/src/query/storages/hive/src/hive_table.rs @@ -228,19 +228,11 @@ impl HiveTable { } } - fn get_columns_from_expressions( - expressions: &[Expression], - schema: &DataSchemaRef, - ) -> HashSet { - let result = expressions + fn get_columns_from_expressions(expressions: &[Expression]) -> HashSet { + expressions .iter() .flat_map(|e| RequireColumnsVisitor::collect_columns_from_expr(e).unwrap()) - .collect::>(); - - result - .iter() - .map(|index| schema.field(*index).name().clone()) - .collect() + .collect::>() } fn get_projections(&self, push_downs: &Option) -> Result> { From 36eb297cfed51f80de7b96707d07dec8ac901762 Mon Sep 17 00:00:00 2001 From: sundy-li <543950155@qq.com> Date: Fri, 28 Oct 2022 13:12:15 +0800 Subject: [PATCH 31/47] refactor(query): refactor --- Cargo.lock | 1 - src/query/storages/hive/src/hive_table.rs | 2 +- src/query/storages/index/Cargo.toml | 1 - 3 files changed, 1 insertion(+), 3 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 8be6baa187a8..50161971dd02 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2187,7 +2187,6 @@ dependencies = [ "common-exception", "common-functions", "common-fuse-meta", - "common-pipeline-transforms", "common-planner", "common-sql", "criterion", diff --git a/src/query/storages/hive/src/hive_table.rs b/src/query/storages/hive/src/hive_table.rs index 52eb35a7cf9b..5526ffbd8cb2 100644 --- a/src/query/storages/hive/src/hive_table.rs +++ b/src/query/storages/hive/src/hive_table.rs @@ -213,7 +213,7 @@ impl HiveTable { // filter out the partition column related expressions let partition_keys = self.get_partition_key_sets(); - let columns = Self::get_columns_from_expressions(f, &plan.schema()); + let columns = Self::get_columns_from_expressions(f); if columns.difference(&partition_keys).count() == 0 { return true; } diff --git a/src/query/storages/index/Cargo.toml b/src/query/storages/index/Cargo.toml index 327c90962e3d..68f50afea3fd 100644 --- a/src/query/storages/index/Cargo.toml +++ b/src/query/storages/index/Cargo.toml @@ -21,7 +21,6 @@ common-datavalues = { path = "../../datavalues" } common-exception = { path = "../../../common/exception" } common-functions = { path = "../../functions" } common-fuse-meta = { path = "../fuse-meta" } -common-pipeline-transforms = { path = "../../pipeline/transforms" } common-planner = { path = "../../planner" } common-sql = { path = "../../sql" } From 0d1fb5bea612651f7a9df76697264457b6bc48f2 Mon Sep 17 00:00:00 2001 From: sundy-li <543950155@qq.com> Date: Fri, 28 Oct 2022 15:13:48 +0800 Subject: [PATCH 32/47] refactor(query): merge main --- src/query/sql/src/planner/expression_parser.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/query/sql/src/planner/expression_parser.rs b/src/query/sql/src/planner/expression_parser.rs index 8637c95431e9..ab21b2121a1c 100644 --- a/src/query/sql/src/planner/expression_parser.rs +++ b/src/query/sql/src/planner/expression_parser.rs @@ -77,7 +77,7 @@ impl ExpressionParser { let mut type_checker = SyncTypeChecker::new(&bind_context, &name_resolution_ctx, &[]); let mut expressions = Vec::with_capacity(exprs.len()); - let builder = ExpressionBuilderWithoutRenaming::create(metadata.clone()); + let builder = ExpressionBuilderWithoutRenaming::create(metadata); for expr in exprs.iter() { let (scalar, _) = *type_checker.resolve(expr, None)?; From d92409d5de77b34a1a54b355937be10e9ede2bf0 Mon Sep 17 00:00:00 2001 From: sundy-li <543950155@qq.com> Date: Fri, 28 Oct 2022 15:34:12 +0800 Subject: [PATCH 33/47] refactor(query): fix bug --- src/query/sql/src/executor/expression_builder.rs | 2 +- src/query/storages/fuse/src/operations/delete.rs | 4 ++++ src/query/storages/index/src/range_filter.rs | 1 + 3 files changed, 6 insertions(+), 1 deletion(-) diff --git a/src/query/sql/src/executor/expression_builder.rs b/src/query/sql/src/executor/expression_builder.rs index 5f97e9520c50..55d3f7f128bd 100644 --- a/src/query/sql/src/executor/expression_builder.rs +++ b/src/query/sql/src/executor/expression_builder.rs @@ -137,7 +137,7 @@ pub trait ExpressionOp { } fn lt(&self, other: &Self) -> Result { - self.binary_op("=", other) + self.binary_op("<", other) } } diff --git a/src/query/storages/fuse/src/operations/delete.rs b/src/query/storages/fuse/src/operations/delete.rs index 863d799bbae2..f02793398c73 100644 --- a/src/query/storages/fuse/src/operations/delete.rs +++ b/src/query/storages/fuse/src/operations/delete.rs @@ -152,6 +152,10 @@ impl FuseTable { } fn cluster_stats_gen(&self) -> Result { + if self.cluster_key_meta.is_none() { + return Ok(ClusterStatsGenerator::default()); + } + let input_schema = self.table_info.schema(); let mut merged = input_schema.fields().clone(); diff --git a/src/query/storages/index/src/range_filter.rs b/src/query/storages/index/src/range_filter.rs index dadba392b1c2..431ce2f292fb 100644 --- a/src/query/storages/index/src/range_filter.rs +++ b/src/query/storages/index/src/range_filter.rs @@ -63,6 +63,7 @@ impl RangeFilter { }) .unwrap(); + tracing::debug!("verifiable_expr: {}", verifiable_expr); let input_fields = stat_columns .iter() .map(|c| c.stat_field.clone()) From 66ee67ab0dfded20cabcda7f5e19e2cc9f2bd276 Mon Sep 17 00:00:00 2001 From: sundy-li <543950155@qq.com> Date: Fri, 28 Oct 2022 22:25:52 +0800 Subject: [PATCH 34/47] refactor(query): fix bugs --- src/query/planner/src/expression.rs | 27 +++++++--- .../sql/src/planner/semantic/type_check.rs | 3 +- .../storages/fuse/src/operations/append.rs | 5 +- .../issues/rigger/division_by_zero_null.test | 50 ------------------- 4 files changed, 24 insertions(+), 61 deletions(-) delete mode 100644 tests/logictest/suites/duckdb/issues/rigger/division_by_zero_null.test diff --git a/src/query/planner/src/expression.rs b/src/query/planner/src/expression.rs index 19617ce4f77a..0cacf6b0aca8 100644 --- a/src/query/planner/src/expression.rs +++ b/src/query/planner/src/expression.rs @@ -63,14 +63,25 @@ impl Expression { pub fn column_name(&self) -> String { match self { Expression::Constant { value, .. } => value.to_string(), - Expression::Function { name, args, .. } => { - let args = args - .iter() - .map(|arg| arg.column_name()) - .collect::>() - .join(", "); - format!("{}({})", name, args) - } + Expression::Function { name, args, .. } => match name.as_str() { + "+" | "-" | "*" | "/" | "%" => { + format!( + "({})", + args.iter() + .map(|arg| arg.column_name()) + .collect::>() + .join(name) + ) + } + _ => { + let args = args + .iter() + .map(|arg| arg.column_name()) + .collect::>() + .join(", "); + format!("{}({})", name, args) + } + }, Expression::Cast { input, target } => format!( "CAST({} AS {})", input.column_name(), diff --git a/src/query/sql/src/planner/semantic/type_check.rs b/src/query/sql/src/planner/semantic/type_check.rs index c10dca540291..c938a8e8d8c8 100644 --- a/src/query/sql/src/planner/semantic/type_check.rs +++ b/src/query/sql/src/planner/semantic/type_check.rs @@ -1426,7 +1426,8 @@ impl<'a> TypeChecker<'a> { outer_columns: rel_prop.outer_columns, }; - Ok(Box::new((subquery_expr.into(), *data_type))) + let data_type = subquery_expr.data_type(); + Ok(Box::new((subquery_expr.into(), data_type))) } fn is_rewritable_scalar_function(func_name: &str) -> bool { diff --git a/src/query/storages/fuse/src/operations/append.rs b/src/query/storages/fuse/src/operations/append.rs index ab3cec3e14b9..d27a471173ab 100644 --- a/src/query/storages/fuse/src/operations/append.rs +++ b/src/query/storages/fuse/src/operations/append.rs @@ -137,15 +137,16 @@ impl FuseTable { eval: Evaluator::eval_expression(expr, &input_schema)?, name: field.name().to_string(), }); - extra_key_index.push(merged.len() - 1); - merged.push(field); + + extra_key_index.push(merged.len() - 1); merged.len() - 1 } Some(idx) => idx, }; cluster_key_index.push(index); } + if !operators.is_empty() { let func_ctx = ctx.try_get_function_context()?; pipeline.add_transform(move |input, output| { diff --git a/tests/logictest/suites/duckdb/issues/rigger/division_by_zero_null.test b/tests/logictest/suites/duckdb/issues/rigger/division_by_zero_null.test deleted file mode 100644 index f0d526b9c93d..000000000000 --- a/tests/logictest/suites/duckdb/issues/rigger/division_by_zero_null.test +++ /dev/null @@ -1,50 +0,0 @@ -statement ok -drop table if exists t1; - -statement ok -drop table if exists t0; - -statement ok -CREATE TABLE t0(c0 DOUBLE, c1 DOUBLE NULL); - -statement ok -INSERT INTO t0 VALUES(0.0,0.0); - -statement ok -INSERT INTO t0 VALUES(0.0,NULL); - -statement error 1006 -SELECT t0.c1, (1/t0.c1) IS NULL FROM t0 order by c1; - -statement query T -SELECT t0.c1 FROM t0 WHERE (((1/(t0.c1))) IS NULL) order by c1; - ----- -NULL - -statement ok -CREATE TABLE t1(c0 DOUBLE); - -statement ok -INSERT INTO t1 VALUES(10000.0); - -statement ok -INSERT INTO t1 VALUES(0.0); - -statement query FI -SELECT c0, exp(c0) IS NULL FROM t1 order by c0; - ----- -0.0 0 -10000.0 0 - -statement query FF -SELECT c0, exp(c0) FROM t1 order by c0; - ----- -0.0 1.0 -10000.0 inf - -statement ok -SELECT c0 FROM t1 WHERE exp(c0) IS NULL; - From 73bb7fe34b86f95e3de85ad7ae33a6693f9b7c3b Mon Sep 17 00:00:00 2001 From: sundy-li <543950155@qq.com> Date: Fri, 28 Oct 2022 22:25:58 +0800 Subject: [PATCH 35/47] refactor(query): fix tests --- .../issues/rigger/division_by_zero_error.test | 50 +++++++++++++++++++ 1 file changed, 50 insertions(+) create mode 100644 tests/logictest/suites/duckdb/issues/rigger/division_by_zero_error.test diff --git a/tests/logictest/suites/duckdb/issues/rigger/division_by_zero_error.test b/tests/logictest/suites/duckdb/issues/rigger/division_by_zero_error.test new file mode 100644 index 000000000000..b83c5849a7cc --- /dev/null +++ b/tests/logictest/suites/duckdb/issues/rigger/division_by_zero_error.test @@ -0,0 +1,50 @@ +statement ok +drop table if exists t1; + +statement ok +drop table if exists t0; + +statement ok +CREATE TABLE t0(c0 DOUBLE, c1 DOUBLE NULL); + +statement ok +INSERT INTO t0 VALUES(0.0,0.0); + +statement ok +INSERT INTO t0 VALUES(0.0,NULL); + +statement error 1006 +SELECT t0.c1, (1/t0.c1) IS NULL FROM t0 order by c1; + +statement error 1006 +SELECT t0.c1 FROM t0 WHERE (((1/(t0.c1))) IS NULL) order by c1; + +---- +NULL + +statement ok +CREATE TABLE t1(c0 DOUBLE); + +statement ok +INSERT INTO t1 VALUES(10000.0); + +statement ok +INSERT INTO t1 VALUES(0.0); + +statement query FI +SELECT c0, exp(c0) IS NULL FROM t1 order by c0; + +---- +0.0 0 +10000.0 0 + +statement query FF +SELECT c0, exp(c0) FROM t1 order by c0; + +---- +0.0 1.0 +10000.0 inf + +statement ok +SELECT c0 FROM t1 WHERE exp(c0) IS NULL; + From 870c3e00f872bf6a77ba14746e0494b0d4ac5246 Mon Sep 17 00:00:00 2001 From: sundy-li <543950155@qq.com> Date: Sat, 29 Oct 2022 12:47:49 +0800 Subject: [PATCH 36/47] refactor(query): fix nullable filter --- .../sql/src/executor/expression_builder.rs | 39 +++++++++++++++++++ .../sql/src/executor/physical_plan_builder.rs | 12 +++++- 2 files changed, 50 insertions(+), 1 deletion(-) diff --git a/src/query/sql/src/executor/expression_builder.rs b/src/query/sql/src/executor/expression_builder.rs index 55d3f7f128bd..a71cce0e3bf8 100644 --- a/src/query/sql/src/executor/expression_builder.rs +++ b/src/query/sql/src/executor/expression_builder.rs @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +use common_datavalues::DataSchema; use common_exception::ErrorCode; use common_exception::Result; use common_functions::scalars::FunctionFactory; @@ -103,6 +104,44 @@ where ExpressionBuilder: FiledNameFormat ))), } } + + // the datatype may be wrong if the expression is push down from the upper node (join) + // todo(leisky) + pub fn normalize_schema(expression: &Expression, schema: &DataSchema) -> Result { + match expression { + Expression::IndexedVariable { name, .. } => { + let data_type = schema.field_with_name(name)?.data_type().clone(); + Ok(Expression::IndexedVariable { + name: name.clone(), + data_type, + }) + } + Expression::Function { name, args, .. } => { + let args = args + .iter() + .map(|arg| Self::normalize_schema(arg, schema)) + .collect::>>(); + + let args = args?; + + let types = args.iter().map(|arg| arg.data_type()).collect::>(); + let types = types.iter().collect::>(); + let func = FunctionFactory::instance().get(name, &types)?; + + Ok(Expression::Function { + name: name.clone(), + args, + return_type: func.return_type(), + }) + } + + Expression::Cast { input, target } => Ok(Expression::Cast { + input: Box::new(Self::normalize_schema(input.as_ref(), schema)?), + target: target.clone(), + }), + Expression::Constant { .. } => Ok(expression.clone()), + } + } } pub trait ExpressionOp { diff --git a/src/query/sql/src/executor/physical_plan_builder.rs b/src/query/sql/src/executor/physical_plan_builder.rs index f07cb5a56273..d45e2c2f6523 100644 --- a/src/query/sql/src/executor/physical_plan_builder.rs +++ b/src/query/sql/src/executor/physical_plan_builder.rs @@ -491,7 +491,13 @@ impl PhysicalPlanBuilder { let builder = ExpressionBuilderWithoutRenaming::create(self.metadata.clone()); predicates .into_iter() - .map(|scalar| builder.build(&scalar)) + .map(|scalar| { + let expression = builder.build(&scalar)?; + ExpressionBuilderWithoutRenaming::normalize_schema( + &expression, + table_schema.as_ref(), + ) + }) .collect::>>() }) .transpose()?; @@ -525,6 +531,10 @@ impl PhysicalPlanBuilder { let builder = ExpressionBuilderWithoutRenaming::create(self.metadata.clone()); let filter = builder.build(&predicate.unwrap())?; + let filter = ExpressionBuilderWithoutRenaming::normalize_schema( + &filter, + table_schema.as_ref(), + )?; let remain_columns = scan .columns From b20dd9ce49a9005f035590f1a01a4ac1e9abc6eb Mon Sep 17 00:00:00 2001 From: sundy-li <543950155@qq.com> Date: Sat, 29 Oct 2022 13:34:34 +0800 Subject: [PATCH 37/47] refactor(query): fix pretty name --- src/query/planner/src/expression.rs | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/src/query/planner/src/expression.rs b/src/query/planner/src/expression.rs index 0cacf6b0aca8..4aadf2b965d3 100644 --- a/src/query/planner/src/expression.rs +++ b/src/query/planner/src/expression.rs @@ -64,7 +64,7 @@ impl Expression { match self { Expression::Constant { value, .. } => value.to_string(), Expression::Function { name, args, .. } => match name.as_str() { - "+" | "-" | "*" | "/" | "%" => { + "+" | "-" | "*" | "/" | "%" if args.len() == 2 => { format!( "({})", args.iter() @@ -73,6 +73,14 @@ impl Expression { .join(name) ) } + ">=" | "<=" | "=" | ">" | "<" if args.len() == 2 => { + format!( + "({} {} {})", + args[0].column_name(), + name, + args[1].column_name() + ) + } _ => { let args = args .iter() From 8f01427654707cf4e46f0dedbffbe11ca60f00f4 Mon Sep 17 00:00:00 2001 From: sundy-li <543950155@qq.com> Date: Sat, 29 Oct 2022 14:47:20 +0800 Subject: [PATCH 38/47] refactor(query): fix tests --- src/query/planner/src/expression.rs | 4 ++-- src/query/sql/src/executor/physical_plan_builder.rs | 9 ++++----- 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/src/query/planner/src/expression.rs b/src/query/planner/src/expression.rs index 4aadf2b965d3..8acfdd536d5e 100644 --- a/src/query/planner/src/expression.rs +++ b/src/query/planner/src/expression.rs @@ -62,7 +62,7 @@ impl Expression { /// Display with readable variable name. pub fn column_name(&self) -> String { match self { - Expression::Constant { value, .. } => value.to_string(), + Expression::Constant { value, .. } => common_datavalues::format_datavalue_sql(value), Expression::Function { name, args, .. } => match name.as_str() { "+" | "-" | "*" | "/" | "%" if args.len() == 2 => { format!( @@ -73,7 +73,7 @@ impl Expression { .join(name) ) } - ">=" | "<=" | "=" | ">" | "<" if args.len() == 2 => { + ">=" | "<=" | "=" | ">" | "<" | "or" | "and" if args.len() == 2 => { format!( "({} {} {})", args[0].column_name(), diff --git a/src/query/sql/src/executor/physical_plan_builder.rs b/src/query/sql/src/executor/physical_plan_builder.rs index d45e2c2f6523..34bb8096b482 100644 --- a/src/query/sql/src/executor/physical_plan_builder.rs +++ b/src/query/sql/src/executor/physical_plan_builder.rs @@ -483,6 +483,7 @@ impl PhysicalPlanBuilder { let projection = Self::build_projection(&metadata, table_schema, &scan.columns, has_inner_column); + let project_schema = projection.project_schema(table_schema); let push_down_filters = scan .push_down_predicates @@ -495,7 +496,7 @@ impl PhysicalPlanBuilder { let expression = builder.build(&scalar)?; ExpressionBuilderWithoutRenaming::normalize_schema( &expression, - table_schema.as_ref(), + &project_schema, ) }) .collect::>>() @@ -531,10 +532,8 @@ impl PhysicalPlanBuilder { let builder = ExpressionBuilderWithoutRenaming::create(self.metadata.clone()); let filter = builder.build(&predicate.unwrap())?; - let filter = ExpressionBuilderWithoutRenaming::normalize_schema( - &filter, - table_schema.as_ref(), - )?; + let filter = + ExpressionBuilderWithoutRenaming::normalize_schema(&filter, &project_schema)?; let remain_columns = scan .columns From d3e56af88d25936017b5b7a54a5a6c2b8ad2a425 Mon Sep 17 00:00:00 2001 From: sundy-li <543950155@qq.com> Date: Sat, 29 Oct 2022 17:51:55 +0800 Subject: [PATCH 39/47] refactor(query): fix tests --- src/query/sql/src/executor/expression_builder.rs | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/query/sql/src/executor/expression_builder.rs b/src/query/sql/src/executor/expression_builder.rs index a71cce0e3bf8..64c32ce96dd8 100644 --- a/src/query/sql/src/executor/expression_builder.rs +++ b/src/query/sql/src/executor/expression_builder.rs @@ -110,7 +110,10 @@ where ExpressionBuilder: FiledNameFormat pub fn normalize_schema(expression: &Expression, schema: &DataSchema) -> Result { match expression { Expression::IndexedVariable { name, .. } => { - let data_type = schema.field_with_name(name)?.data_type().clone(); + let data_type = match schema.field_with_name(name) { + Ok(f) => f.data_type().clone(), + Err(_) => return Ok(expression.clone()), + }; Ok(Expression::IndexedVariable { name: name.clone(), data_type, From 22e4b0f7ca7576a780c19947ee3df899eafe1031 Mon Sep 17 00:00:00 2001 From: sundy-li <543950155@qq.com> Date: Sat, 29 Oct 2022 21:32:16 +0800 Subject: [PATCH 40/47] refactor(query): fix main --- src/query/storages/fuse/src/operations/mod.rs | 3 + .../storages/fuse/src/operations/read_data.rs | 249 +----------------- 2 files changed, 4 insertions(+), 248 deletions(-) diff --git a/src/query/storages/fuse/src/operations/mod.rs b/src/query/storages/fuse/src/operations/mod.rs index 548984401a3d..cfab90d3ef54 100644 --- a/src/query/storages/fuse/src/operations/mod.rs +++ b/src/query/storages/fuse/src/operations/mod.rs @@ -26,10 +26,13 @@ mod read_partitions; mod recluster; mod truncate; +mod fuse_source; pub mod util; pub(crate) use compact::CompactOptions; pub use fuse_sink::FuseTableSink; +pub use fuse_source::FuseTableSource; +pub use fuse_source::State; pub use mutation::delete_from_block; pub use mutation::DeletionMutator; pub use mutation::FullCompactMutator; diff --git a/src/query/storages/fuse/src/operations/read_data.rs b/src/query/storages/fuse/src/operations/read_data.rs index fa16566da214..33cff8e43071 100644 --- a/src/query/storages/fuse/src/operations/read_data.rs +++ b/src/query/storages/fuse/src/operations/read_data.rs @@ -12,28 +12,17 @@ // See the License for the specific language governing permissions and // limitations under the License. -use std::any::Any; use std::sync::Arc; -use common_base::base::Progress; -use common_base::base::ProgressValues; use common_base::base::Runtime; use common_catalog::table_context::TableContext; -use common_datablocks::DataBlock; -use common_datavalues::ColumnRef; use common_datavalues::DataSchemaRef; use common_exception::ErrorCode; use common_exception::Result; -use common_functions::scalars::FunctionContext; -use common_pipeline_core::processors::port::OutputPort; -use common_pipeline_core::processors::processor::Event; -use common_pipeline_core::processors::processor::ProcessorPtr; -use common_pipeline_core::processors::Processor; use common_pipeline_core::Pipeline; use common_planner::extras::Extras; use common_planner::extras::PrewhereInfo; use common_planner::plans::Projection; -use common_planner::PartInfoPtr; use common_planner::ReadDataSourcePlan; use common_sql::evaluator::EvalNode; use common_sql::evaluator::Evaluator; @@ -41,7 +30,7 @@ use tracing::info; use crate::fuse_lazy_part::FuseLazyPartInfo; use crate::io::BlockReader; -use crate::operations::read_data::State::Generated; +use crate::operations::FuseTableSource; use crate::FuseTable; impl FuseTable { @@ -211,239 +200,3 @@ impl FuseTable { pipeline.resize(resize_to) } } - -type DataChunks = Vec<(usize, Vec)>; - -struct PrewhereData { - data_block: DataBlock, - filter: ColumnRef, -} - -enum State { - ReadDataPrewhere(Option), - ReadDataRemain(PartInfoPtr, PrewhereData), - PrewhereFilter(PartInfoPtr, DataChunks), - Deserialize(PartInfoPtr, DataChunks, Option), - Generated(Option, DataBlock), - Finish, -} - -struct FuseTableSource { - state: State, - ctx: Arc, - scan_progress: Arc, - output: Arc, - output_reader: Arc, - - prewhere_reader: Arc, - prewhere_filter: Arc>, - remain_reader: Arc>, -} - -impl FuseTableSource { - pub fn create( - ctx: Arc, - output: Arc, - output_reader: Arc, - prewhere_reader: Arc, - prewhere_filter: Arc>, - remain_reader: Arc>, - ) -> Result { - let scan_progress = ctx.get_scan_progress(); - Ok(ProcessorPtr::create(Box::new(FuseTableSource { - ctx, - output, - scan_progress, - state: State::ReadDataPrewhere(None), - output_reader, - prewhere_reader, - prewhere_filter, - remain_reader, - }))) - } - - fn generate_one_block(&mut self, block: DataBlock) -> Result<()> { - let new_part = self.ctx.try_get_part(); - // resort and prune columns - let block = block.resort(self.output_reader.schema())?; - self.state = State::Generated(new_part, block); - Ok(()) - } - - fn generate_one_empty_block(&mut self) -> Result<()> { - let schema = self.output_reader.schema(); - let new_part = self.ctx.try_get_part(); - self.state = Generated(new_part, DataBlock::empty_with_schema(schema)); - Ok(()) - } -} - -#[async_trait::async_trait] -impl Processor for FuseTableSource { - fn name(&self) -> String { - "FuseEngineSource".to_string() - } - - fn as_any(&mut self) -> &mut dyn Any { - self - } - - fn event(&mut self) -> Result { - if matches!(self.state, State::ReadDataPrewhere(None)) { - self.state = match self.ctx.try_get_part() { - None => State::Finish, - Some(part) => State::ReadDataPrewhere(Some(part)), - } - } - - if matches!(self.state, State::Finish) { - self.output.finish(); - return Ok(Event::Finished); - } - - if self.output.is_finished() { - return Ok(Event::Finished); - } - - if !self.output.can_push() { - return Ok(Event::NeedConsume); - } - - if matches!(self.state, State::Generated(_, _)) { - if let Generated(part, data_block) = std::mem::replace(&mut self.state, State::Finish) { - self.state = match part { - None => State::Finish, - Some(part) => State::ReadDataPrewhere(Some(part)), - }; - - self.output.push_data(Ok(data_block)); - return Ok(Event::NeedConsume); - } - } - - match self.state { - State::Finish => Ok(Event::Finished), - State::ReadDataPrewhere(_) => Ok(Event::Async), - State::ReadDataRemain(_, _) => Ok(Event::Async), - State::PrewhereFilter(_, _) => Ok(Event::Sync), - State::Deserialize(_, _, _) => Ok(Event::Sync), - State::Generated(_, _) => Err(ErrorCode::LogicalError("It's a bug.")), - } - } - - fn process(&mut self) -> Result<()> { - match std::mem::replace(&mut self.state, State::Finish) { - State::Deserialize(part, chunks, prewhere_data) => { - let data_block = if let Some(PrewhereData { - data_block: mut prewhere_blocks, - filter, - }) = prewhere_data - { - let block = if chunks.is_empty() { - prewhere_blocks - } else if let Some(remain_reader) = self.remain_reader.as_ref() { - let remain_block = remain_reader.deserialize(part, chunks)?; - for (col, field) in remain_block - .columns() - .iter() - .zip(remain_block.schema().fields()) - { - prewhere_blocks = - prewhere_blocks.add_column(col.clone(), field.clone())?; - } - prewhere_blocks - } else { - return Err(ErrorCode::LogicalError("It's a bug. Need remain reader")); - }; - // the last step of prewhere - let progress_values = ProgressValues { - rows: block.num_rows(), - bytes: block.memory_size(), - }; - self.scan_progress.incr(&progress_values); - DataBlock::filter_block(block, &filter)? - } else { - let block = self.output_reader.deserialize(part, chunks)?; - let progress_values = ProgressValues { - rows: block.num_rows(), - bytes: block.memory_size(), - }; - self.scan_progress.incr(&progress_values); - - block - }; - - self.generate_one_block(data_block)?; - Ok(()) - } - State::PrewhereFilter(part, chunks) => { - // deserialize prewhere data block first - let data_block = self.prewhere_reader.deserialize(part.clone(), chunks)?; - if let Some(filter) = self.prewhere_filter.as_ref() { - // do filter - let res = filter - .eval(&FunctionContext::default(), &data_block)? - .vector; - let filter = DataBlock::cast_to_nonull_boolean(&res)?; - // shortcut, if predicates is const boolean (or can be cast to boolean) - if !DataBlock::filter_exists(&filter)? { - // all rows in this block are filtered out - // turn to read next part - let progress_values = ProgressValues { - rows: data_block.num_rows(), - bytes: data_block.memory_size(), - }; - self.scan_progress.incr(&progress_values); - self.generate_one_empty_block()?; - return Ok(()); - } - if self.remain_reader.is_none() { - // shortcut, we don't need to read remain data - let progress_values = ProgressValues { - rows: data_block.num_rows(), - bytes: data_block.memory_size(), - }; - self.scan_progress.incr(&progress_values); - let block = DataBlock::filter_block(data_block, &filter)?; - self.generate_one_block(block)?; - } else { - self.state = - State::ReadDataRemain(part, PrewhereData { data_block, filter }); - } - Ok(()) - } else { - Err(ErrorCode::LogicalError( - "It's a bug. No need to do prewhere filter", - )) - } - } - _ => Err(ErrorCode::LogicalError("It's a bug.")), - } - } - - async fn async_process(&mut self) -> Result<()> { - match std::mem::replace(&mut self.state, State::Finish) { - State::ReadDataPrewhere(Some(part)) => { - let chunks = self.prewhere_reader.read_columns_data(part.clone()).await?; - - if self.prewhere_filter.is_some() { - self.state = State::PrewhereFilter(part, chunks); - } else { - // all needed columns are read. - self.state = State::Deserialize(part, chunks, None) - } - Ok(()) - } - State::ReadDataRemain(part, prewhere_data) => { - if let Some(remain_reader) = self.remain_reader.as_ref() { - let chunks = remain_reader.read_columns_data(part.clone()).await?; - self.state = State::Deserialize(part, chunks, Some(prewhere_data)); - Ok(()) - } else { - return Err(ErrorCode::LogicalError("It's a bug. No remain reader")); - } - } - _ => Err(ErrorCode::LogicalError("It's a bug.")), - } - } -} From e0ec48b595199143d15c05cc0a15545ffa3f8841 Mon Sep 17 00:00:00 2001 From: sundy-li <543950155@qq.com> Date: Sat, 29 Oct 2022 21:32:23 +0800 Subject: [PATCH 41/47] refactor(query): fix main --- .../fuse/src/operations/fuse_source.rs | 307 ++++++++++++++++++ 1 file changed, 307 insertions(+) create mode 100644 src/query/storages/fuse/src/operations/fuse_source.rs diff --git a/src/query/storages/fuse/src/operations/fuse_source.rs b/src/query/storages/fuse/src/operations/fuse_source.rs new file mode 100644 index 000000000000..6910b4effa35 --- /dev/null +++ b/src/query/storages/fuse/src/operations/fuse_source.rs @@ -0,0 +1,307 @@ +// Copyright 2021 Datafuse Labs. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::any::Any; +use std::sync::Arc; + +use common_base::base::Progress; +use common_base::base::ProgressValues; +use common_catalog::table_context::TableContext; +use common_datablocks::DataBlock; +use common_datavalues::ColumnRef; +use common_exception::ErrorCode; +use common_exception::Result; +use common_functions::scalars::FunctionContext; +use common_pipeline_core::processors::port::OutputPort; +use common_pipeline_core::processors::processor::Event; +use common_pipeline_core::processors::processor::ProcessorPtr; +use common_pipeline_core::processors::Processor; +use common_planner::PartInfoPtr; +use common_sql::evaluator::EvalNode; + +use crate::io::BlockReader; +use crate::operations::State::Generated; + +type DataChunks = Vec<(usize, Vec)>; + +pub struct PrewhereData { + data_block: DataBlock, + filter: ColumnRef, +} + +pub enum State { + ReadDataPrewhere(Option), + ReadDataRemain(PartInfoPtr, PrewhereData), + PrewhereFilter(PartInfoPtr, DataChunks), + Deserialize(PartInfoPtr, DataChunks, Option), + Generated(Option, DataBlock), + Finish, +} + +pub struct FuseTableSource { + state: State, + ctx: Arc, + scan_progress: Arc, + output: Arc, + output_reader: Arc, + + prewhere_reader: Arc, + prewhere_filter: Arc>, + remain_reader: Arc>, + + support_blocking: bool, +} + +impl FuseTableSource { + pub fn create( + ctx: Arc, + output: Arc, + output_reader: Arc, + prewhere_reader: Arc, + prewhere_filter: Arc>, + remain_reader: Arc>, + ) -> Result { + let scan_progress = ctx.get_scan_progress(); + let support_blocking = prewhere_reader.support_blocking_api(); + Ok(ProcessorPtr::create(Box::new(FuseTableSource { + ctx, + output, + scan_progress, + state: State::ReadDataPrewhere(None), + output_reader, + prewhere_reader, + prewhere_filter, + remain_reader, + support_blocking, + }))) + } + + fn generate_one_block(&mut self, block: DataBlock) -> Result<()> { + let new_part = self.ctx.try_get_part(); + // resort and prune columns + let block = block.resort(self.output_reader.schema())?; + self.state = State::Generated(new_part, block); + Ok(()) + } + + fn generate_one_empty_block(&mut self) -> Result<()> { + let schema = self.output_reader.schema(); + let new_part = self.ctx.try_get_part(); + self.state = Generated(new_part, DataBlock::empty_with_schema(schema)); + Ok(()) + } +} + +#[async_trait::async_trait] +impl Processor for FuseTableSource { + fn name(&self) -> String { + "FuseEngineSource".to_string() + } + + fn as_any(&mut self) -> &mut dyn Any { + self + } + + fn event(&mut self) -> Result { + if matches!(self.state, State::ReadDataPrewhere(None)) { + self.state = match self.ctx.try_get_part() { + None => State::Finish, + Some(part) => State::ReadDataPrewhere(Some(part)), + } + } + + if matches!(self.state, State::Finish) { + self.output.finish(); + return Ok(Event::Finished); + } + + if self.output.is_finished() { + return Ok(Event::Finished); + } + + if !self.output.can_push() { + return Ok(Event::NeedConsume); + } + + if matches!(self.state, State::Generated(_, _)) { + if let Generated(part, data_block) = std::mem::replace(&mut self.state, State::Finish) { + self.state = match part { + None => State::Finish, + Some(part) => State::ReadDataPrewhere(Some(part)), + }; + + self.output.push_data(Ok(data_block)); + return Ok(Event::NeedConsume); + } + } + + match self.state { + State::Finish => Ok(Event::Finished), + State::ReadDataPrewhere(_) => { + if self.support_blocking { + Ok(Event::Sync) + } else { + Ok(Event::Async) + } + } + State::ReadDataRemain(_, _) => { + if self.support_blocking { + Ok(Event::Sync) + } else { + Ok(Event::Async) + } + } + State::PrewhereFilter(_, _) => Ok(Event::Sync), + State::Deserialize(_, _, _) => Ok(Event::Sync), + State::Generated(_, _) => Err(ErrorCode::LogicalError("It's a bug.")), + } + } + + fn process(&mut self) -> Result<()> { + match std::mem::replace(&mut self.state, State::Finish) { + State::Deserialize(part, chunks, prewhere_data) => { + let data_block = if let Some(PrewhereData { + data_block: mut prewhere_blocks, + filter, + }) = prewhere_data + { + let block = if chunks.is_empty() { + prewhere_blocks + } else if let Some(remain_reader) = self.remain_reader.as_ref() { + let remain_block = remain_reader.deserialize(part, chunks)?; + for (col, field) in remain_block + .columns() + .iter() + .zip(remain_block.schema().fields()) + { + prewhere_blocks = + prewhere_blocks.add_column(col.clone(), field.clone())?; + } + prewhere_blocks + } else { + return Err(ErrorCode::LogicalError("It's a bug. Need remain reader")); + }; + // the last step of prewhere + let progress_values = ProgressValues { + rows: block.num_rows(), + bytes: block.memory_size(), + }; + self.scan_progress.incr(&progress_values); + DataBlock::filter_block(block, &filter)? + } else { + let block = self.output_reader.deserialize(part, chunks)?; + let progress_values = ProgressValues { + rows: block.num_rows(), + bytes: block.memory_size(), + }; + self.scan_progress.incr(&progress_values); + + block + }; + + self.generate_one_block(data_block)?; + Ok(()) + } + State::PrewhereFilter(part, chunks) => { + // deserialize prewhere data block first + let data_block = self.prewhere_reader.deserialize(part.clone(), chunks)?; + if let Some(filter) = self.prewhere_filter.as_ref() { + // do filter + let res = filter + .eval(&FunctionContext::default(), &data_block)? + .vector; + let filter = DataBlock::cast_to_nonull_boolean(&res)?; + // shortcut, if predicates is const boolean (or can be cast to boolean) + if !DataBlock::filter_exists(&filter)? { + // all rows in this block are filtered out + // turn to read next part + let progress_values = ProgressValues { + rows: data_block.num_rows(), + bytes: data_block.memory_size(), + }; + self.scan_progress.incr(&progress_values); + self.generate_one_empty_block()?; + return Ok(()); + } + if self.remain_reader.is_none() { + // shortcut, we don't need to read remain data + let progress_values = ProgressValues { + rows: data_block.num_rows(), + bytes: data_block.memory_size(), + }; + self.scan_progress.incr(&progress_values); + let block = DataBlock::filter_block(data_block, &filter)?; + self.generate_one_block(block)?; + } else { + self.state = + State::ReadDataRemain(part, PrewhereData { data_block, filter }); + } + Ok(()) + } else { + Err(ErrorCode::LogicalError( + "It's a bug. No need to do prewhere filter", + )) + } + } + + State::ReadDataPrewhere(Some(part)) => { + let chunks = self.prewhere_reader.sync_read_columns_data(part.clone())?; + + if self.prewhere_filter.is_some() { + self.state = State::PrewhereFilter(part, chunks); + } else { + // all needed columns are read. + self.state = State::Deserialize(part, chunks, None) + } + Ok(()) + } + State::ReadDataRemain(part, prewhere_data) => { + if let Some(remain_reader) = self.remain_reader.as_ref() { + let chunks = remain_reader.sync_read_columns_data(part.clone())?; + self.state = State::Deserialize(part, chunks, Some(prewhere_data)); + Ok(()) + } else { + Err(ErrorCode::LogicalError("It's a bug. No remain reader")) + } + } + _ => Err(ErrorCode::LogicalError("It's a bug.")), + } + } + + async fn async_process(&mut self) -> Result<()> { + match std::mem::replace(&mut self.state, State::Finish) { + State::ReadDataPrewhere(Some(part)) => { + let chunks = self.prewhere_reader.read_columns_data(part.clone()).await?; + + if self.prewhere_filter.is_some() { + self.state = State::PrewhereFilter(part, chunks); + } else { + // all needed columns are read. + self.state = State::Deserialize(part, chunks, None) + } + Ok(()) + } + State::ReadDataRemain(part, prewhere_data) => { + if let Some(remain_reader) = self.remain_reader.as_ref() { + let chunks = remain_reader.read_columns_data(part.clone()).await?; + self.state = State::Deserialize(part, chunks, Some(prewhere_data)); + Ok(()) + } else { + Err(ErrorCode::LogicalError("It's a bug. No remain reader")) + } + } + _ => Err(ErrorCode::LogicalError("It's a bug.")), + } + } +} From 1ac020045681227e9c36351d6953e82cd8767614 Mon Sep 17 00:00:00 2001 From: baishen Date: Sat, 29 Oct 2022 23:15:10 +0800 Subject: [PATCH 42/47] fix --- src/query/storages/fuse/src/pruning/pruner.rs | 2 +- src/query/storages/index/src/bloom.rs | 11 ++++++----- .../storages/index/tests/it/filters/bloom_filter.rs | 9 +++++---- 3 files changed, 12 insertions(+), 10 deletions(-) diff --git a/src/query/storages/fuse/src/pruning/pruner.rs b/src/query/storages/fuse/src/pruning/pruner.rs index 329828f8a482..3c6d000ef0bd 100644 --- a/src/query/storages/fuse/src/pruning/pruner.rs +++ b/src/query/storages/fuse/src/pruning/pruner.rs @@ -125,7 +125,7 @@ pub fn new_filter_pruner( // convert to filter column names let filter_block_cols = point_query_cols .iter() - .map(|c| BlockFilter::build_filter_column(c.as_str())) + .map(|n| BlockFilter::build_filter_column_name(&n)) .collect(); return Ok(Some(Arc::new(FilterPruner::new( diff --git a/src/query/storages/index/src/bloom.rs b/src/query/storages/index/src/bloom.rs index 5a615df3fe8a..2464671f5b42 100644 --- a/src/query/storages/index/src/bloom.rs +++ b/src/query/storages/index/src/bloom.rs @@ -74,8 +74,8 @@ pub enum FilterEvalResult { impl BlockFilter { /// For every applicable column, we will create a filter. /// The filter will be stored with field name 'Bloom(column_name)' - pub fn build_filter_column(name: &str) -> String { - format!("Bloom({})", name) + pub fn build_filter_column_name(column_name: &str) -> String { + format!("Bloom({})", column_name) } pub fn build_filter_schema(data_schema: &DataSchema) -> DataSchema { let mut filter_fields = vec![]; @@ -83,7 +83,8 @@ impl BlockFilter { for field in fields.iter() { if Xor8Filter::is_supported_type(field.data_type()) { // create field for applicable ones - let column_name = Self::build_filter_column(field.name()); + + let column_name = Self::build_filter_column_name(field.name()); let filter_field = DataField::new(&column_name, Vu8::to_data_type()); filter_fields.push(filter_field); @@ -153,11 +154,11 @@ impl BlockFilter { pub fn find( &self, - name: &str, + column_name: &str, target: DataValue, typ: &DataTypeImpl, ) -> Result { - let filter_column = Self::build_filter_column(name); + let filter_column = Self::build_filter_column_name(column_name); if !self.filter_block.schema().has_field(&filter_column) || !Xor8Filter::is_supported_type(typ) || target.is_null() diff --git a/src/query/storages/index/tests/it/filters/bloom_filter.rs b/src/query/storages/index/tests/it/filters/bloom_filter.rs index ee6106699d5b..e655babc2d2c 100644 --- a/src/query/storages/index/tests/it/filters/bloom_filter.rs +++ b/src/query/storages/index/tests/it/filters/bloom_filter.rs @@ -71,8 +71,8 @@ fn test_column_type_support() -> Result<()> { assert_eq!(supported_types.len(), index.filter_block.columns().len()); // check index columns - schema.fields().iter().enumerate().for_each(|(i, field)| { - let col_name = BlockFilter::build_filter_column(i); + schema.fields().iter().for_each(|field| { + let col_name = BlockFilter::build_filter_column_name(field.name()); let maybe_index_col = index.filter_block.try_column_by_name(&col_name); if supported_types.contains(field.data_type()) { assert!(maybe_index_col.is_ok(), "check field {}", field.name()) @@ -82,11 +82,12 @@ fn test_column_type_support() -> Result<()> { }); // check applicable - schema.fields().iter().enumerate().for_each(|(i, field)| { + schema.fields().iter().for_each(|field| { // type of input data value does not matter here, will be casted during filtering let value = DataValue::Boolean(true); + let col_name = field.name().as_str(); let data_type = field.data_type(); - let r = index.find(i, value, data_type).unwrap(); + let r = index.find(col_name, value, data_type).unwrap(); if supported_types.contains(field.data_type()) { assert_ne!( r, From 05f945c0c21359dea86dc9f22332d7bc4f5dc205 Mon Sep 17 00:00:00 2001 From: baishen Date: Sun, 30 Oct 2022 19:11:43 +0800 Subject: [PATCH 43/47] refactor(query): fix unit test --- .../service/tests/it/storages/fuse/pruning.rs | 36 ++-- .../tests/it/storages/fuse/statistics.rs | 36 ++-- .../service/tests/it/storages/fuse/table.rs | 2 +- .../clustering_information_table.rs | 5 +- .../table_functions/fuse_snapshot_table.rs | 5 +- .../it/storages/fuse/table_test_fixture.rs | 8 +- .../tests/it/storages/index/range_filter.rs | 170 +++++++++--------- src/query/service/tests/it/storages/memory.rs | 6 +- src/query/service/tests/it/storages/null.rs | 2 +- src/query/service/tests/it/storages/system.rs | 2 +- .../tests/it/table_functions/numbers_table.rs | 7 +- .../sql/src/executor/expression_builder.rs | 130 ++++++++++++++ 12 files changed, 259 insertions(+), 150 deletions(-) diff --git a/src/query/service/tests/it/storages/fuse/pruning.rs b/src/query/service/tests/it/storages/fuse/pruning.rs index 456f98fb523a..be1f36ab7ffd 100644 --- a/src/query/service/tests/it/storages/fuse/pruning.rs +++ b/src/query/service/tests/it/storages/fuse/pruning.rs @@ -21,12 +21,12 @@ use common_datavalues::prelude::*; use common_exception::Result; use common_fuse_meta::meta::BlockMeta; use common_fuse_meta::meta::TableSnapshot; -use common_legacy_expression::add; -use common_legacy_expression::col; -use common_legacy_expression::lit; -use common_legacy_expression::sub; -use common_legacy_expression::LegacyExpression; use common_planner::extras::Extras; +use common_sql::executor::add; +use common_sql::executor::col; +use common_sql::executor::lit; +use common_sql::executor::sub; +use common_sql::executor::ExpressionOp; use common_storages_fuse::FuseTable; use databend_query::interpreters::CreateTableInterpreterV2; use databend_query::interpreters::Interpreter; @@ -153,32 +153,26 @@ async fn test_block_pruner() -> Result<()> { // nothing is pruned let mut e1 = Extras::default(); - e1.filters = vec![col("a").gt(lit(30u64))]; + e1.filters = vec![col("a", u64::to_data_type()).gt(&lit(30u64))?]; // some blocks pruned let mut e2 = Extras::default(); let max_val_of_b = 6u64; - e2.filters = vec![col("a").gt(lit(0u64)).and(col("b").gt(lit(max_val_of_b)))]; + e2.filters = vec![ + col("a", u64::to_data_type()) + .gt(&lit(0u64))? + .and(&col("b", u64::to_data_type()).gt(&lit(max_val_of_b))?)?, + ]; let b2 = num_blocks - max_val_of_b as usize - 1; // Sort asc Limit let mut e3 = Extras::default(); - e3.order_by = vec![LegacyExpression::Sort { - expr: Box::new(col("b")), - asc: true, - nulls_first: false, - origin_expr: Box::new(col("b")), - }]; + e3.order_by = vec![(col("b", u64::to_data_type()), true, false)]; e3.limit = Some(3); // Sort desc Limit let mut e4 = Extras::default(); - e4.order_by = vec![LegacyExpression::Sort { - expr: Box::new(col("b")), - asc: false, - nulls_first: false, - origin_expr: Box::new(col("b")), - }]; + e4.order_by = vec![(col("b", u64::to_data_type()), false, false)]; e4.limit = Some(4); let extras = vec![ @@ -298,7 +292,7 @@ async fn test_block_pruner_monotonic() -> Result<()> { // a + b > 20; some blocks pruned let mut extra = Extras::default(); - let pred = add(col("a"), col("b")).gt(lit(20u64)); + let pred = add(col("a", u64::to_data_type()), col("b", u64::to_data_type())).gt(&lit(20u64))?; extra.filters = vec![pred]; let blocks = apply_block_pruning( @@ -314,7 +308,7 @@ async fn test_block_pruner_monotonic() -> Result<()> { // b - a < 20; nothing will be pruned. let mut extra = Extras::default(); - let pred = sub(col("b"), col("a")).lt(lit(20u64)); + let pred = sub(col("b", u64::to_data_type()), col("a", u64::to_data_type())).lt(&lit(20u64))?; extra.filters = vec![pred]; let blocks = apply_block_pruning( diff --git a/src/query/service/tests/it/storages/fuse/statistics.rs b/src/query/service/tests/it/storages/fuse/statistics.rs index f9425bb1471c..566f0874f73e 100644 --- a/src/query/service/tests/it/storages/fuse/statistics.rs +++ b/src/query/service/tests/it/storages/fuse/statistics.rs @@ -18,14 +18,15 @@ use common_base::base::tokio; use common_datablocks::DataBlock; use common_datavalues::prelude::*; use common_functions::aggregates::eval_aggr; +use common_functions::scalars::FunctionContext; use common_fuse_meta::meta::BlockMeta; use common_fuse_meta::meta::ClusterStatistics; use common_fuse_meta::meta::ColumnStatistics; use common_fuse_meta::meta::Statistics; -use common_legacy_expression::add; -use common_legacy_expression::col; -use common_legacy_expression::lit; -use common_pipeline_transforms::processors::ExpressionExecutor; +use common_sql::evaluator::Evaluator; +use common_sql::executor::add; +use common_sql::executor::col; +use common_sql::executor::lit; use common_storages_fuse::statistics::reducers::reduce_block_metas; use common_storages_fuse::statistics::Trim; use common_storages_fuse::statistics::STATS_REPLACEMENT_CHAR; @@ -179,14 +180,14 @@ fn test_ft_stats_cluster_stats() -> common_exception::Result<()> { ]); let block_compactor = BlockCompactor::new(1_000_000, 800_000, 100 * 1024 * 1024); - let stats_gen = ClusterStatsGenerator::new(0, vec![0], None, 0, block_compactor.clone()); + let stats_gen = ClusterStatsGenerator::new(0, vec![0], vec![], 0, block_compactor.clone()); let (stats, _) = stats_gen.gen_stats_for_append(&blocks)?; assert!(stats.is_some()); let stats = stats.unwrap(); assert_eq!(vec![DataValue::Int64(1)], stats.min); assert_eq!(vec![DataValue::Int64(3)], stats.max); - let stats_gen = ClusterStatsGenerator::new(1, vec![1], None, 0, block_compactor); + let stats_gen = ClusterStatsGenerator::new(1, vec![1], vec![], 0, block_compactor); let (stats, _) = stats_gen.gen_stats_for_append(&blocks)?; assert!(stats.is_some()); let stats = stats.unwrap(); @@ -208,7 +209,7 @@ async fn test_ft_cluster_stats_with_stats() -> common_exception::Result<()> { }); let block_compactor = BlockCompactor::new(1_000_000, 800_000, 100 * 1024 * 1024); - let stats_gen = ClusterStatsGenerator::new(0, vec![0], None, 0, block_compactor.clone()); + let stats_gen = ClusterStatsGenerator::new(0, vec![0], vec![], 0, block_compactor.clone()); let stats = stats_gen.gen_with_origin_stats(&blocks, origin.clone())?; assert!(stats.is_some()); let stats = stats.unwrap(); @@ -216,20 +217,15 @@ async fn test_ft_cluster_stats_with_stats() -> common_exception::Result<()> { assert_eq!(vec![DataValue::Int64(3)], stats.max); // add expression executor. - let fixture = TestFixture::new().await; - let ctx = fixture.ctx(); + let expr = add(col("a", i64::to_data_type()), lit(1)); + let eval_node = Evaluator::eval_expression(&expr, &schema)?; + let func_ctx = FunctionContext::default(); + let result = eval_node.eval(&func_ctx, &blocks)?; let output_schema = DataSchemaRefExt::create(vec![DataField::new("(a + 1)", i64::to_data_type())]); - let executor = ExpressionExecutor::try_create( - ctx, - "expression executor for generator cluster statistics", - schema, - output_schema, - vec![add(col("a"), lit(1))], - true, - )?; - let stats_gen = - ClusterStatsGenerator::new(0, vec![0], Some(executor), 0, block_compactor.clone()); + let blocks = DataBlock::create(output_schema, vec![result.vector]); + + let stats_gen = ClusterStatsGenerator::new(0, vec![0], vec![], 0, block_compactor.clone()); let stats = stats_gen.gen_with_origin_stats(&blocks, origin.clone())?; assert!(stats.is_some()); let stats = stats.unwrap(); @@ -237,7 +233,7 @@ async fn test_ft_cluster_stats_with_stats() -> common_exception::Result<()> { assert_eq!(vec![DataValue::Int64(4)], stats.max); // different cluster_key_id. - let stats_gen = ClusterStatsGenerator::new(1, vec![0], None, 0, block_compactor); + let stats_gen = ClusterStatsGenerator::new(1, vec![0], vec![], 0, block_compactor); let stats = stats_gen.gen_with_origin_stats(&blocks, origin)?; assert!(stats.is_none()); diff --git a/src/query/service/tests/it/storages/fuse/table.rs b/src/query/service/tests/it/storages/fuse/table.rs index b1fd16685b41..f4160aaea0b2 100644 --- a/src/query/service/tests/it/storages/fuse/table.rs +++ b/src/query/service/tests/it/storages/fuse/table.rs @@ -22,6 +22,7 @@ use common_planner::plans::AlterTableClusterKeyPlan; use common_planner::plans::DropTableClusterKeyPlan; use common_planner::ReadDataSourcePlan; use common_planner::SourceInfo; +use common_sql::executor::table_read_plan::ToReadDataSourcePlan; use databend_query::interpreters::AlterTableClusterKeyInterpreter; use databend_query::interpreters::CreateTableInterpreterV2; use databend_query::interpreters::DropTableClusterKeyInterpreter; @@ -34,7 +35,6 @@ use databend_query::sql::OPT_KEY_DATABASE_ID; use databend_query::sql::OPT_KEY_SNAPSHOT_LOCATION; use databend_query::storages::fuse::io::MetaReaders; use databend_query::storages::fuse::FuseTable; -use databend_query::storages::ToReadDataSourcePlan; use databend_query::stream::DataBlockStream; use futures::TryStreamExt; diff --git a/src/query/service/tests/it/storages/fuse/table_functions/clustering_information_table.rs b/src/query/service/tests/it/storages/fuse/table_functions/clustering_information_table.rs index d807726670ff..02d452efd164 100644 --- a/src/query/service/tests/it/storages/fuse/table_functions/clustering_information_table.rs +++ b/src/query/service/tests/it/storages/fuse/table_functions/clustering_information_table.rs @@ -17,7 +17,6 @@ use common_datablocks::DataBlock; use common_datavalues::prelude::*; use common_exception::ErrorCode; use common_exception::Result; -use common_legacy_expression::*; use tokio_stream::StreamExt; use crate::storages::fuse::table_test_fixture::*; @@ -33,8 +32,8 @@ async fn test_clustering_information_table_read() -> Result<()> { fixture.create_default_table().await?; // func args - let arg_db = LegacyExpression::create_literal(DataValue::String(db.as_bytes().to_vec())); - let arg_tbl = LegacyExpression::create_literal(DataValue::String(tbl.as_bytes().to_vec())); + let arg_db = DataValue::String(db.as_bytes().to_vec()); + let arg_tbl = DataValue::String(tbl.as_bytes().to_vec()); { let expected = vec![ diff --git a/src/query/service/tests/it/storages/fuse/table_functions/fuse_snapshot_table.rs b/src/query/service/tests/it/storages/fuse/table_functions/fuse_snapshot_table.rs index 1043656c5cfa..1074608238b0 100644 --- a/src/query/service/tests/it/storages/fuse/table_functions/fuse_snapshot_table.rs +++ b/src/query/service/tests/it/storages/fuse/table_functions/fuse_snapshot_table.rs @@ -17,7 +17,6 @@ use common_datablocks::DataBlock; use common_datavalues::prelude::*; use common_exception::ErrorCode; use common_exception::Result; -use common_legacy_expression::*; use tokio_stream::StreamExt; use crate::storages::fuse::table_test_fixture::*; @@ -51,14 +50,14 @@ async fn test_fuse_snapshot_table_args() -> Result<()> { test_drive_with_args(query_ctx.clone(), Some(vec![])).await, ); - let arg_db = LegacyExpression::create_literal(DataValue::String(test_db.as_bytes().to_vec())); + let arg_db = DataValue::String(test_db.as_bytes().to_vec()); expects_err( "bad argument (no table)", ErrorCode::bad_arguments_code(), test_drive_with_args(query_ctx.clone(), Some(vec![arg_db])).await, ); - let arg_db = LegacyExpression::create_literal(DataValue::String(test_db.as_bytes().to_vec())); + let arg_db = DataValue::String(test_db.as_bytes().to_vec()); expects_err( "bad argument (too many args)", ErrorCode::bad_arguments_code(), diff --git a/src/query/service/tests/it/storages/fuse/table_test_fixture.rs b/src/query/service/tests/it/storages/fuse/table_test_fixture.rs index 0cac77f82b8e..89f82786aa76 100644 --- a/src/query/service/tests/it/storages/fuse/table_test_fixture.rs +++ b/src/query/service/tests/it/storages/fuse/table_test_fixture.rs @@ -21,10 +21,10 @@ use common_datablocks::assert_blocks_sorted_eq_with_name; use common_datablocks::DataBlock; use common_datavalues::prelude::*; use common_exception::Result; -use common_legacy_expression::LegacyExpression; use common_meta_app::schema::DatabaseMeta; use common_planner::extras::Extras; use common_planner::plans::CreateDatabasePlan; +use common_sql::executor::table_read_plan::ToReadDataSourcePlan; use common_storage::StorageFsConfig; use common_storage::StorageParams; use common_storages_fuse::FUSE_TBL_XOR_BLOOM_INDEX_PREFIX; @@ -47,7 +47,6 @@ use databend_query::storages::fuse::FUSE_TBL_BLOCK_PREFIX; use databend_query::storages::fuse::FUSE_TBL_SEGMENT_PREFIX; use databend_query::storages::fuse::FUSE_TBL_SNAPSHOT_PREFIX; use databend_query::storages::Table; -use databend_query::storages::ToReadDataSourcePlan; use databend_query::stream::DataBlockStream; use databend_query::table_functions::TableArgs; use futures::TryStreamExt; @@ -321,10 +320,7 @@ pub async fn test_drive( None => DataValue::Null, }; - let tbl_args = Some(vec![ - LegacyExpression::create_literal(arg_db), - LegacyExpression::create_literal(arg_tbl), - ]); + let tbl_args = Some(vec![arg_db, arg_tbl]); test_drive_with_args(ctx, tbl_args).await } diff --git a/src/query/service/tests/it/storages/index/range_filter.rs b/src/query/service/tests/it/storages/index/range_filter.rs index 3e408ea70fa2..aacd80b7d930 100644 --- a/src/query/service/tests/it/storages/index/range_filter.rs +++ b/src/query/service/tests/it/storages/index/range_filter.rs @@ -19,7 +19,15 @@ use common_datavalues::prelude::*; use common_exception::Result; use common_fuse_meta::meta::ColumnStatistics; use common_fuse_meta::meta::StatisticsOfColumns; -use common_legacy_expression::*; +use common_planner::Expression; +use common_sql::executor::add; +use common_sql::executor::col; +use common_sql::executor::func; +use common_sql::executor::lit; +use common_sql::executor::lit_null; +use common_sql::executor::neg; +use common_sql::executor::sub; +use common_sql::executor::ExpressionOp; use databend_query::storages::index::range_filter::build_verifiable_expr; use databend_query::storages::index::range_filter::left_bound_for_like_pattern; use databend_query::storages::index::range_filter::right_bound_for_like_pattern; @@ -59,129 +67,135 @@ async fn test_range_filter() -> Result<()> { let tests: Vec = vec![ Test { name: "a < 1 and b > 3", - expr: col("a").lt(lit(1)).and(col("b").gt(lit(3i32))), + expr: col("a", i64::to_data_type()) + .lt(&lit(1))? + .and(&col("b", i32::to_data_type()).gt(&lit(3i32))?)?, expect: false, error: "", }, Test { name: "1 > -a or 3 >= b", - expr: lit(1).gt(neg(col("a"))).or(lit(3i32).gt_eq(col("b"))), + expr: lit(1) + .gt(&neg(col("a", i64::to_data_type())))? + .or(&lit(3i32).gt_eq(&col("b", i32::to_data_type()))?)?, expect: true, error: "", }, Test { name: "a = 1 and b != 3", - expr: col("a").eq(lit(1)).and(col("b").not_eq(lit(3))), + expr: ExpressionOp::eq(&col("a", i64::to_data_type()), &lit(1))? + .and(&col("b", i32::to_data_type()).not_eq(&lit(3))?)?, expect: true, error: "", }, Test { name: "a is null", - expr: LegacyExpression::create_scalar_function("is_null", vec![col("a")]), + expr: col("a", i64::to_data_type()).unary_op("is_null")?, expect: true, error: "", }, Test { name: "a is not null", - expr: LegacyExpression::create_scalar_function("is_not_null", vec![col("a")]), + expr: col("a", i64::to_data_type()).unary_op("is_not_null")?, expect: false, error: "", }, Test { name: "b is not null", - expr: LegacyExpression::create_scalar_function("is_not_null", vec![col("b")]), + expr: col("b", i32::to_data_type()).unary_op("is_not_null")?, expect: true, error: "", }, Test { name: "null", - expr: LegacyExpression::create_literal(DataValue::Null), + expr: lit_null(), expect: false, error: "", }, Test { name: "b >= 0 and c like '%sys%'", - expr: col("b") - .gt_eq(lit(0)) - .and(LegacyExpression::create_binary_expression("like", vec![ - col("c"), - lit("%sys%".as_bytes()), - ])), + expr: col("b", i32::to_data_type()) + .gt_eq(&lit(0))? + .and(&col("c", Vu8::to_data_type()).binary_op("like", &lit("%sys%".as_bytes()))?)?, expect: true, error: "", }, Test { name: "c like 'ab_'", - expr: LegacyExpression::create_binary_expression("like", vec![ - col("c"), - lit("ab_".as_bytes()), - ]), + expr: col("c", Vu8::to_data_type()).binary_op("like", &lit("ab_".as_bytes()))?, expect: true, error: "", }, Test { name: "c like 'bcdf'", - expr: LegacyExpression::create_binary_expression("like", vec![ - col("c"), - lit("bcdf".as_bytes()), - ]), + expr: col("c", Vu8::to_data_type()).binary_op("like", &lit("bcdf".as_bytes()))?, expect: false, error: "", }, Test { name: "c not like 'ac%'", - expr: LegacyExpression::create_binary_expression("not like", vec![ - col("c"), - lit("ac%".as_bytes()), - ]), + expr: col("c", Vu8::to_data_type()).binary_op("not_like", &lit("ac%".as_bytes()))?, expect: true, error: "", }, Test { name: "a + b > 30", - expr: add(col("a"), col("b")).gt(lit(30i32)), + expr: add(col("a", i64::to_data_type()), col("b", i32::to_data_type())) + .gt(&lit(30i32))?, expect: false, error: "", }, Test { name: "a + b < 10", - expr: add(col("a"), col("b")).lt(lit(10i32)), + expr: add(col("a", i64::to_data_type()), col("b", i32::to_data_type())) + .lt(&lit(10i32))?, expect: true, error: "", }, Test { name: "a - b <= -10", - expr: sub(col("a"), col("b")).lt_eq(lit(-10i32)), + expr: sub(col("a", i64::to_data_type()), col("b", i32::to_data_type())) + .lt_eq(&lit(-10i32))?, expect: true, error: "Code: 1067, displayText = Function '-' is not monotonic in the variables range.", }, Test { name: "a < b", - expr: col("a").lt(col("b")), + expr: col("a", i64::to_data_type()).lt(&col("b", i32::to_data_type()))?, expect: true, error: "", }, Test { name: "a + 9 < b", - expr: add(col("a"), lit(9)).lt(col("b")), + expr: add(col("a", i64::to_data_type()), lit(9)).lt(&col("b", i32::to_data_type()))?, expect: false, error: "", }, Test { name: "a > 1 and d > 2", - expr: col("a").gt(lit(1)).and(col("d").gt(lit(2))), + expr: col("a", i64::to_data_type()) + .gt(&lit(1))? + .and(&col("d", i32::to_data_type()).gt(&lit(2))?)?, expect: true, error: "", }, Test { name: "a > 100 or d > 2", - expr: col("a").gt(lit(100)).or(col("d").gt(lit(2))), + expr: col("a", i64::to_data_type()).gt(&lit(100))?.or(&col( + "d", + i32::to_data_type(), + ) + .gt(&lit(2))?)?, expect: true, error: "", }, Test { name: "a > 100 or a + d > 2", - expr: col("a").gt(lit(100)).or(add(col("a"), col("d")).gt(lit(2))), + expr: col("a", i64::to_data_type()).gt(&lit(100))?.or(&add( + col("a", i64::to_data_type()), + col("d", i32::to_data_type()), + ) + .gt(&lit(2))?)?, expect: true, error: "", }, @@ -210,134 +224,114 @@ fn test_build_verifiable_function() -> Result<()> { struct Test { name: &'static str, - expr: LegacyExpression, + expr: Expression, expect: &'static str, } let tests: Vec = vec![ Test { name: "a < 1 and b > 3", - expr: col("a").lt(lit(1)).and(col("b").gt(lit(3))), + expr: col("a", i64::to_data_type()) + .lt(&lit(1))? + .and(&col("b", i32::to_data_type()).gt(&lit(3))?)?, expect: "((min_a < 1) and (max_b > 3))", }, Test { name: "1 > -a or 3 >= b", - expr: lit(1).gt(neg(col("a"))).or(lit(3).gt_eq(col("b"))), + expr: lit(1) + .gt(&neg(col("a", i64::to_data_type())))? + .or(&lit(3).gt_eq(&col("b", i32::to_data_type()))?)?, expect: "((min_(negate a) < 1) or (min_b <= 3))", }, Test { name: "a = 1 and b != 3", - expr: col("a").eq(lit(1)).and(col("b").not_eq(lit(3))), + expr: ExpressionOp::eq(&col("a", i64::to_data_type()), &lit(1))? + .and(&col("b", i32::to_data_type()).not_eq(&lit(3))?)?, expect: "(((min_a <= 1) and (max_a >= 1)) and ((min_b != 3) or (max_b != 3)))", }, Test { name: "a is null", - expr: LegacyExpression::create_scalar_function("is_null", vec![col("a")]), + expr: col("a", i64::to_data_type()).unary_op("is_null")?, expect: "(nulls_a > 0)", }, Test { name: "a is not null", - expr: LegacyExpression::create_scalar_function("is_not_null", vec![col("a")]), + expr: col("a", i64::to_data_type()).unary_op("is_not_null")?, expect: "(nulls_a != row_count_a)", }, Test { name: "b >= 0 and c like 0xffffff", - expr: col("b") - .gt_eq(lit(0)) - .and(LegacyExpression::create_binary_expression("like", vec![ - col("c"), - lit(vec![255u8, 255, 255]), - ])), + expr: col("b", i32::to_data_type()).gt_eq(&lit(0))?.and( + &col("c", Vu8::to_data_type()).binary_op("like", &lit(vec![255u8, 255, 255]))?, + )?, expect: "((max_b >= 0) and (max_c >= ffffff))", }, Test { name: "c like 'sys_'", - expr: LegacyExpression::create_binary_expression("like", vec![ - col("c"), - lit("sys_".as_bytes()), - ]), + expr: col("c", Vu8::to_data_type()).binary_op("like", &lit("sys_".as_bytes()))?, expect: "((max_c >= sys) and (min_c < syt))", }, Test { name: "c like 'sys\\%'", - expr: LegacyExpression::create_binary_expression("like", vec![ - col("c"), - lit("sys\\%".as_bytes()), - ]), + expr: col("c", Vu8::to_data_type()).binary_op("like", &lit("sys\\%".as_bytes()))?, expect: "((max_c >= sys%) and (min_c < sys&))", }, Test { name: "c like 'sys\\t'", - expr: LegacyExpression::create_binary_expression("like", vec![ - col("c"), - lit("sys\\t".as_bytes()), - ]), + expr: col("c", Vu8::to_data_type()).binary_op("like", &lit("sys\\t".as_bytes()))?, expect: "((max_c >= sys\\t) and (min_c < sys\\u))", }, Test { name: "c not like 'sys\\%'", - expr: LegacyExpression::create_binary_expression("not like", vec![ - col("c"), - lit("sys\\%".as_bytes()), - ]), + expr: col("c", Vu8::to_data_type()).binary_op("not_like", &lit("sys\\%".as_bytes()))?, expect: "((min_c != sys%) or (max_c != sys%))", }, Test { name: "c not like 'sys\\s'", - expr: LegacyExpression::create_binary_expression("not like", vec![ - col("c"), - lit("sys\\s".as_bytes()), - ]), + expr: col("c", Vu8::to_data_type()).binary_op("not_like", &lit("sys\\s".as_bytes()))?, expect: "((min_c != sys\\s) or (max_c != sys\\s))", }, Test { name: "c not like 'sys%'", - expr: LegacyExpression::create_binary_expression("not like", vec![ - col("c"), - lit("sys%".as_bytes()), - ]), + expr: col("c", Vu8::to_data_type()).binary_op("not_like", &lit("sys%".as_bytes()))?, expect: "((min_c < sys) or (max_c >= syt))", }, Test { name: "c not like 'sys%a'", - expr: LegacyExpression::create_binary_expression("not like", vec![ - col("c"), - lit("sys%a".as_bytes()), - ]), + expr: col("c", Vu8::to_data_type()).binary_op("not_like", &lit("sys%a".as_bytes()))?, expect: "true", }, Test { name: "c not like 0xffffff%", - expr: LegacyExpression::create_binary_expression("not like", vec![ - col("c"), - lit(vec![255u8, 255, 255, 37]), - ]), + expr: col("c", Vu8::to_data_type()) + .binary_op("not_like", &lit(vec![255u8, 255, 255, 37]))?, expect: "(min_c < ffffff)", }, Test { name: "abs(a) = b - 3", - expr: LegacyExpression::create_scalar_function("abs", vec![col("a")]) - .eq(add(col("b"), lit(3))), + expr: ExpressionOp::eq( + &col("a", i64::to_data_type()).unary_op("abs")?, + &add(col("b", i32::to_data_type()), lit(3)), + )?, expect: "((min_abs(a) <= max_(b + 3)) and (max_abs(a) >= min_(b + 3)))", }, Test { name: "a + b <= 3", - expr: add(col("a"), col("b")).lt_eq(lit(3)), + expr: add(col("a", i64::to_data_type()), col("b", i32::to_data_type())) + .lt_eq(&lit(3))?, expect: "(min_(a + b) <= 3)", }, Test { name: "a + b <= 10 - a", - expr: add(col("a"), col("b")).lt_eq(sub(lit(10), col("a"))), + expr: add(col("a", i64::to_data_type()), col("b", i32::to_data_type())) + .lt_eq(&sub(lit(10), col("a", i64::to_data_type())))?, expect: "true", }, Test { name: "a <= b + rand()", expr: add( - col("a"), - add( - col("b"), - LegacyExpression::create_scalar_function("rand", vec![]), - ), + col("a", i64::to_data_type()), + add(col("b", i32::to_data_type()), func("rand")?), ), expect: "true", }, @@ -419,7 +413,7 @@ fn test_bound_for_like_pattern() -> Result<()> { struct Test { name: &'static str, - expr: LegacyExpression, + expr: Expression, expect: bool, error: &'static str, } diff --git a/src/query/service/tests/it/storages/memory.rs b/src/query/service/tests/it/storages/memory.rs index b26382b71352..37c8383b8b1a 100644 --- a/src/query/service/tests/it/storages/memory.rs +++ b/src/query/service/tests/it/storages/memory.rs @@ -19,11 +19,13 @@ use common_datavalues::prelude::*; use common_exception::Result; use common_meta_app::schema::TableInfo; use common_meta_app::schema::TableMeta; -use common_planner::*; +use common_planner::extras::Extras; +use common_planner::extras::Statistics; +use common_planner::plans::Projection; +use common_sql::executor::table_read_plan::ToReadDataSourcePlan; use databend_query::sessions::TableContext; use databend_query::sql::plans::create_table_v2::TableOptions; use databend_query::storages::memory::MemoryTable; -use databend_query::storages::ToReadDataSourcePlan; use databend_query::stream::DataBlockStream; use futures::TryStreamExt; diff --git a/src/query/service/tests/it/storages/null.rs b/src/query/service/tests/it/storages/null.rs index d822d64267ee..de40d11b1ccd 100644 --- a/src/query/service/tests/it/storages/null.rs +++ b/src/query/service/tests/it/storages/null.rs @@ -17,9 +17,9 @@ use common_datavalues::prelude::*; use common_exception::Result; use common_meta_app::schema::TableInfo; use common_meta_app::schema::TableMeta; +use common_sql::executor::table_read_plan::ToReadDataSourcePlan; use databend_query::sql::plans::create_table_v2::TableOptions; use databend_query::storages::null::NullTable; -use databend_query::storages::ToReadDataSourcePlan; use databend_query::stream::DataBlockStream; use futures::TryStreamExt; diff --git a/src/query/service/tests/it/storages/system.rs b/src/query/service/tests/it/storages/system.rs index d5a14b6ca243..8c6adbc70b78 100644 --- a/src/query/service/tests/it/storages/system.rs +++ b/src/query/service/tests/it/storages/system.rs @@ -27,6 +27,7 @@ use common_meta_types::UserInfo; use common_meta_types::UserOption; use common_meta_types::UserQuota; use common_metrics::init_default_metrics_recorder; +use common_sql::executor::table_read_plan::ToReadDataSourcePlan; use common_storage::StorageParams; use common_storage::StorageS3Config; use common_storages_factory::system::CatalogsTable; @@ -47,7 +48,6 @@ use databend_query::storages::system::SettingsTable; use databend_query::storages::system::TablesTableWithoutHistory; use databend_query::storages::system::TracingTable; use databend_query::storages::system::UsersTable; -use databend_query::storages::ToReadDataSourcePlan; use databend_query::stream::DataBlockStream; use futures::TryStreamExt; use goldenfile::Mint; diff --git a/src/query/service/tests/it/table_functions/numbers_table.rs b/src/query/service/tests/it/table_functions/numbers_table.rs index 72ac048034a1..a81f1678a3e8 100644 --- a/src/query/service/tests/it/table_functions/numbers_table.rs +++ b/src/query/service/tests/it/table_functions/numbers_table.rs @@ -15,14 +15,13 @@ use common_base::base::tokio; use common_datavalues::prelude::*; use common_exception::Result; -use common_legacy_expression::*; -use common_planner::*; +use common_planner::extras::Extras; +use common_sql::executor::table_read_plan::ToReadDataSourcePlan; use databend_query::interpreters::InterpreterFactory; use databend_query::sessions::SessionManager; use databend_query::sessions::SessionType; use databend_query::sessions::TableContext; use databend_query::sql::Planner; -use databend_query::storages::ToReadDataSourcePlan; use databend_query::stream::DataBlockStream; use databend_query::table_functions::NumbersTable; use futures::TryStreamExt; @@ -33,7 +32,7 @@ use crate::tests::TestGlobalServices; #[tokio::test] async fn test_number_table() -> Result<()> { - let tbl_args = Some(vec![LegacyExpression::create_literal(DataValue::UInt64(8))]); + let tbl_args = Some(vec![DataValue::UInt64(8)]); let (_guard, ctx) = crate::tests::create_query_context().await?; let table = NumbersTable::create("system", "numbers_mt", 1, tbl_args)?; diff --git a/src/query/sql/src/executor/expression_builder.rs b/src/query/sql/src/executor/expression_builder.rs index 64c32ce96dd8..9d17a482810c 100644 --- a/src/query/sql/src/executor/expression_builder.rs +++ b/src/query/sql/src/executor/expression_builder.rs @@ -13,6 +13,11 @@ // limitations under the License. use common_datavalues::DataSchema; +use common_datavalues::DataTypeImpl; +use common_datavalues::DataValue; +use common_datavalues::NullType; +use common_datavalues::StringType; +use common_datavalues::ToDataType; use common_exception::ErrorCode; use common_exception::Result; use common_functions::scalars::FunctionFactory; @@ -150,6 +155,8 @@ where ExpressionBuilder: FiledNameFormat pub trait ExpressionOp { fn binary_op(&self, name: &str, other: &Self) -> Result; + fn unary_op(&self, name: &str) -> Result; + fn and(&self, other: &Self) -> Result { self.binary_op("and", other) } @@ -194,4 +201,127 @@ impl ExpressionOp for Expression { return_type: func.return_type(), }) } + + fn unary_op(&self, name: &str) -> Result { + let func = FunctionFactory::instance().get(name, &[&self.data_type()])?; + + Ok(Expression::Function { + name: name.to_owned(), + args: vec![self.clone()], + return_type: func.return_type(), + }) + } +} + +/// Add binary function. +pub fn add(left: Expression, right: Expression) -> Expression { + left.binary_op("+", &right).unwrap() +} + +/// Sub binary function. +pub fn sub(left: Expression, right: Expression) -> Expression { + left.binary_op("-", &right).unwrap() +} + +/// Not. +pub fn not(other: Expression) -> Expression { + other.unary_op("not").unwrap() +} + +// Neg. +pub fn neg(other: Expression) -> Expression { + other.unary_op("negate").unwrap() +} + +/// Mod binary function. +pub fn modular(left: Expression, right: Expression) -> Expression { + left.binary_op("%", &right).unwrap() +} + +/// sum() aggregate function. +pub fn sum(other: Expression) -> Expression { + other.unary_op("sum").unwrap() +} + +/// avg() aggregate function. +pub fn avg(other: Expression) -> Expression { + other.unary_op("avg").unwrap() +} + +pub fn func(name: &str) -> Result { + let func = FunctionFactory::instance().get(name, &[])?; + + Ok(Expression::Function { + name: name.to_owned(), + args: vec![], + return_type: func.return_type(), + }) +} + +pub fn col(name: &str, data_type: DataTypeImpl) -> Expression { + Expression::IndexedVariable { + name: name.to_string(), + data_type, + } +} + +pub trait Literal { + fn to_literal(&self) -> Expression; +} + +impl Literal for &[u8] { + fn to_literal(&self) -> Expression { + Expression::Constant { + value: DataValue::String(self.to_vec()), + data_type: *Box::new(StringType::new_impl()), + } + } +} + +impl Literal for Vec { + fn to_literal(&self) -> Expression { + Expression::Constant { + value: DataValue::String(self.clone()), + data_type: *Box::new(StringType::new_impl()), + } + } +} + +macro_rules! make_literal { + ($TYPE: ty, $SUPER: ident, $SCALAR: ident) => { + #[allow(missing_docs)] + impl Literal for $TYPE { + fn to_literal(&self) -> Expression { + Expression::Constant { + value: DataValue::$SCALAR(*self as $SUPER), + data_type: *Box::new($SUPER::to_data_type()), + } + } + } + }; +} + +make_literal!(bool, bool, Boolean); +make_literal!(f32, f64, Float64); +make_literal!(f64, f64, Float64); + +make_literal!(i8, i64, Int64); +make_literal!(i16, i64, Int64); +make_literal!(i32, i64, Int64); +make_literal!(i64, i64, Int64); + +make_literal!(u8, u64, UInt64); +make_literal!(u16, u64, UInt64); +make_literal!(u32, u64, UInt64); +make_literal!(u64, u64, UInt64); + +pub fn lit(n: T) -> Expression { + n.to_literal() +} + +pub fn lit_null() -> Expression { + Expression::Constant { + value: DataValue::Null, + data_type: *Box::new(NullType::new_impl()), + } } From 3ab9e25c69e0f1c5d896056a2a57368a252b6e1c Mon Sep 17 00:00:00 2001 From: baishen Date: Sun, 30 Oct 2022 19:22:29 +0800 Subject: [PATCH 44/47] refactor(query): fix taplo --- src/query/service/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/query/service/Cargo.toml b/src/query/service/Cargo.toml index 65294434480f..368d7371bb96 100644 --- a/src/query/service/Cargo.toml +++ b/src/query/service/Cargo.toml @@ -83,7 +83,6 @@ backon = "0.2" bumpalo = "3.11.0" byteorder = "1.4.3" bytes = "1.2.1" -unicode-segmentation = "1.10.0" chrono = "0.4.22" chrono-tz = "0.6.3" futures = "0.3.24" @@ -114,6 +113,7 @@ tokio-stream = { version = "0.1.10", features = ["net"] } tonic = "0.8.1" tracing = "0.1.36" typetag = "0.2.3" +unicode-segmentation = "1.10.0" uuid = { version = "1.1.2", features = ["serde", "v4"] } [dev-dependencies] From a760be3a98dac01dc9ce390c63880170ccb8f0fa Mon Sep 17 00:00:00 2001 From: baishen Date: Sun, 30 Oct 2022 20:08:47 +0800 Subject: [PATCH 45/47] refactor(query): fix clippy --- Cargo.lock | 2 +- src/query/README.md | 1 - src/query/legacy-expression/src/expression.rs | 376 ------------ .../tests/it/monotonicity.rs | 581 ------------------ .../tests/it/storages/index/range_filter.rs | 12 +- src/query/storages/fuse/src/pruning/pruner.rs | 2 +- 6 files changed, 8 insertions(+), 966 deletions(-) delete mode 100644 src/query/legacy-expression/src/expression.rs delete mode 100644 src/query/legacy-expression/tests/it/monotonicity.rs diff --git a/Cargo.lock b/Cargo.lock index 50161971dd02..440479666110 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -8949,7 +8949,7 @@ dependencies = [ [[package]] name = "xorfilter-rs" version = "0.5.1" -source = "git+https://github.com/datafuse-extras/xorfilter?rev=databend-alpha.1#008ca3ccdf8b13b2b123749d56d6179c4aac33f1" +source = "git+https://github.com/datafuse-extras/xorfilter?tag=databend-alpha.1#008ca3ccdf8b13b2b123749d56d6179c4aac33f1" dependencies = [ "cbordata", ] diff --git a/src/query/README.md b/src/query/README.md index 5ce0bfec1b5d..b755d96bc7b1 100644 --- a/src/query/README.md +++ b/src/query/README.md @@ -12,7 +12,6 @@ Databend Query is a Distributed Query Engine at scale. - [`formats`](./formats/), the serialization and deserialization of data in various formats to the outside. - [`functions`](./functions/), scalar functions and aggregate functions, etc., will be gradually migrated to `functions-v2`. - [`functions-v2`](./functions-v2/), scalar functions and aggregate functions, etc., based on `expression`. -- [`legacy-parser`](./legacy-parser/), the old parser, which will be replaced by ast, built with sqlparser-rs. - [`menagement`](./menagement/) for clusters, quotas, etc. - [`pipeline`](./pipeline/) implements the scheduling framework for physical operators. - [`planners`](./planners/) builds an execution plan from the user's SQL statement and represents the query with different types of relational operators. diff --git a/src/query/legacy-expression/src/expression.rs b/src/query/legacy-expression/src/expression.rs deleted file mode 100644 index 3c2ee5da22e8..000000000000 --- a/src/query/legacy-expression/src/expression.rs +++ /dev/null @@ -1,376 +0,0 @@ -// Copyright 2022 Datafuse Labs. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -use std::collections::HashSet; -use std::fmt; - -use common_datavalues::prelude::*; -use common_exception::ErrorCode; -use common_exception::Result; -use common_functions::aggregates::AggregateFunctionFactory; -use common_functions::aggregates::AggregateFunctionRef; -use once_cell::sync::Lazy; - -use crate::ExpressionDataTypeVisitor; -use crate::ExpressionVisitor; - -static OP_SET: Lazy> = Lazy::new(|| { - [ - "database", - "version", - "current_user", - "current_role", - "user", - ] - .iter() - .copied() - .collect() -}); - -/// REMOVE ME: LegacyExpression should be removed. -#[derive(serde::Serialize, serde::Deserialize, Clone, PartialEq)] -pub enum LegacyExpression { - /// An expression with a alias name. - Alias(String, Box), - - /// Column name. - Column(String), - - /// Qualified column name. - QualifiedColumn(Vec), - - /// Constant value. - /// Note: When literal represents a column, its column_name will not be None - Literal { - value: DataValue, - column_name: Option, - - // Logic data_type for this literal - data_type: DataTypeImpl, - }, - - /// A unary expression such as "NOT foo" - UnaryExpression { - op: String, - expr: Box, - }, - - /// A binary expression such as "age > 40" - BinaryExpression { - left: Box, - op: String, - right: Box, - }, - - /// ScalarFunction with a set of arguments. - /// Note: BinaryFunction is a also kind of functions function - ScalarFunction { - op: String, - args: Vec, - }, - - /// AggregateFunction with a set of arguments. - AggregateFunction { - op: String, - distinct: bool, - params: Vec, - args: Vec, - }, - - /// A sort expression, that can be used to sort values. - Sort { - /// The expression to sort on - expr: Box, - /// The direction of the sort - asc: bool, - /// Whether to put Nulls before all other data values - nulls_first: bool, - /// The original expression from parser. Because sort 'expr' field maybe overwritten by a Column expression, like - /// from BinaryExpression { +, number, number} to Column(number+number), the orig_expr is for keeping the original - /// one that is before overwritten. This field is mostly for function monotonicity optimization purpose. - origin_expr: Box, - }, - - /// All fields(*) in a schema. - Wildcard, - - /// Casts the expression to a given type and will return a runtime error if the expression cannot be cast. - /// This expression is guaranteed to have a fixed type. - Cast { - /// The expression being cast - expr: Box, - /// The `DataType` the expression will yield - data_type: DataTypeImpl, - /// The PostgreSQL style cast `expr::datatype` - pg_style: bool, - }, - - /// Access elements of `Array`, `Object` and `Variant` by index or key, like `arr[0][1]`, or `obj:k1:k2` - MapAccess { - name: String, - args: Vec, - }, -} - -impl LegacyExpression { - pub fn create_literal(value: DataValue) -> LegacyExpression { - let data_type = value.data_type(); - LegacyExpression::Literal { - value, - column_name: None, - data_type, - } - } - - pub fn create_literal_with_type(value: DataValue, data_type: DataTypeImpl) -> LegacyExpression { - LegacyExpression::Literal { - value, - data_type, - column_name: None, - } - } - - pub fn column_name(&self) -> String { - match self { - LegacyExpression::Alias(name, _expr) => name.clone(), - LegacyExpression::Column(name) => name.clone(), - LegacyExpression::Literal { - value, column_name, .. - } => match column_name { - Some(name) => name.clone(), - None => format_datavalue_sql(value), - }, - LegacyExpression::UnaryExpression { op, expr } => { - format!("({} {})", op.to_lowercase(), expr.column_name()) - } - LegacyExpression::BinaryExpression { op, left, right } => { - format!( - "({} {} {})", - left.column_name(), - op.to_lowercase(), - right.column_name() - ) - } - LegacyExpression::ScalarFunction { op, args } => { - match OP_SET.get(&op.to_lowercase().as_ref()) { - Some(_) => format!("{}()", op), - None => { - let args_column_name = args - .iter() - .map(LegacyExpression::column_name) - .collect::>(); - - format!("{}({})", op, args_column_name.join(", ")) - } - } - } - LegacyExpression::AggregateFunction { - op, - distinct, - params, - args, - } => { - let args_column_name = args - .iter() - .map(LegacyExpression::column_name) - .collect::>(); - let params_name = params - .iter() - .map(|v| DataValue::custom_display(v, true)) - .collect::>(); - - let prefix = if params.is_empty() { - op.to_string() - } else { - format!("{}({})", op, params_name.join(", ")) - }; - - match distinct { - true => format!("{}(distinct {})", prefix, args_column_name.join(", ")), - false => format!("{}({})", prefix, args_column_name.join(", ")), - } - } - LegacyExpression::Sort { expr, .. } => expr.column_name(), - LegacyExpression::Cast { - expr, - data_type, - pg_style, - } => { - if *pg_style { - format!("{}::{}", expr.column_name(), data_type.sql_name()) - } else if data_type.is_nullable() { - let ty: NullableType = data_type.to_owned().try_into().unwrap(); - format!( - "try_cast({} as {})", - expr.column_name(), - ty.inner_type().sql_name() - ) - } else { - format!("cast({} as {})", expr.column_name(), data_type.sql_name()) - } - } - LegacyExpression::MapAccess { name, .. } => name.clone(), - _ => format!("{:?}", self), - } - } - - pub fn to_data_field(&self, input_schema: &DataSchemaRef) -> Result { - let name = self.column_name(); - self.to_data_type(input_schema) - .map(|return_type| DataField::new(&name, return_type)) - } - - pub fn to_data_type(&self, input_schema: &DataSchemaRef) -> Result { - let visitor = ExpressionDataTypeVisitor::create(input_schema.clone()); - visitor.visit(self)?.finalize() - } - - pub fn to_aggregate_function(&self, schema: &DataSchemaRef) -> Result { - match self { - LegacyExpression::AggregateFunction { - op, - distinct, - params, - args, - } => { - let mut func_name = op.clone(); - if *distinct { - func_name += "_distinct"; - } - - let mut fields = Vec::with_capacity(args.len()); - for arg in args.iter() { - fields.push(arg.to_data_field(schema)?); - } - AggregateFunctionFactory::instance().get(&func_name, params.clone(), fields) - } - _ => Err(ErrorCode::LogicalError(format!( - "Expression must be aggregated function, {:?}", - self - ))), - } - } - - pub fn to_aggregate_function_names(&self) -> Result> { - match self { - LegacyExpression::AggregateFunction { args, .. } => { - let mut names = Vec::with_capacity(args.len()); - for arg in args.iter() { - names.push(arg.column_name()); - } - Ok(names) - } - _ => Err(ErrorCode::LogicalError( - "Expression must be aggregated function", - )), - } - } - - pub fn create_scalar_function(op: &str, args: LegacyExpressions) -> LegacyExpression { - let op = op.to_string(); - LegacyExpression::ScalarFunction { op, args } - } - - pub fn create_unary_expression(op: &str, mut args: LegacyExpressions) -> LegacyExpression { - let op = op.to_string(); - let expr = Box::new(args.remove(0)); - LegacyExpression::UnaryExpression { op, expr } - } - - pub fn create_binary_expression(op: &str, mut args: LegacyExpressions) -> LegacyExpression { - let op = op.to_string(); - let left = Box::new(args.remove(0)); - let right = Box::new(args.remove(0)); - LegacyExpression::BinaryExpression { op, left, right } - } -} - -// Also used as expression column name -impl fmt::Debug for LegacyExpression { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - match self { - LegacyExpression::Alias(alias, v) => write!(f, "{:?} as {:#}", v, alias), - LegacyExpression::Column(ref v) => write!(f, "{:#}", v), - LegacyExpression::QualifiedColumn(v) => write!(f, "{:?}", v.join(".")), - LegacyExpression::Literal { ref value, .. } => write!(f, "{:#}", value), - LegacyExpression::BinaryExpression { op, left, right } => { - write!(f, "({:?} {} {:?})", left, op, right,) - } - - LegacyExpression::UnaryExpression { op, expr } => { - write!(f, "({} {:?})", op, expr) - } - - LegacyExpression::ScalarFunction { op, args } => { - write!(f, "{}(", op)?; - - for (i, _) in args.iter().enumerate() { - if i > 0 { - write!(f, ", ")?; - } - write!(f, "{:?}", args[i],)?; - } - write!(f, ")") - } - - LegacyExpression::AggregateFunction { - op, - distinct, - params, - args, - } => { - let args_column_name = args - .iter() - .map(LegacyExpression::column_name) - .collect::>(); - let params_name = params - .iter() - .map(|v| DataValue::custom_display(v, true)) - .collect::>(); - - if params.is_empty() { - write!(f, "{}", op)?; - } else { - write!(f, "{}({})", op, params_name.join(", "))?; - }; - - match distinct { - true => write!(f, "(distinct {})", args_column_name.join(", "))?, - false => write!(f, "({})", args_column_name.join(", "))?, - } - Ok(()) - } - - LegacyExpression::Sort { expr, .. } => write!(f, "{:?}", expr), - LegacyExpression::Wildcard => write!(f, "*"), - LegacyExpression::Cast { - expr, - data_type, - pg_style, - } => { - if *pg_style { - write!(f, "{:?}::{}", expr, data_type.name()) - } else if data_type.is_nullable() { - let ty: NullableType = data_type.to_owned().try_into().unwrap(); - write!(f, "try_cast({:?} as {})", expr, ty.inner_type().name()) - } else { - write!(f, "cast({:?} as {})", expr, data_type.name()) - } - } - LegacyExpression::MapAccess { name, .. } => write!(f, "{}", name), - } - } -} - -pub type LegacyExpressions = Vec; diff --git a/src/query/legacy-expression/tests/it/monotonicity.rs b/src/query/legacy-expression/tests/it/monotonicity.rs deleted file mode 100644 index 5759683b0b77..000000000000 --- a/src/query/legacy-expression/tests/it/monotonicity.rs +++ /dev/null @@ -1,581 +0,0 @@ -// Copyright 2021 Datafuse Labs. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -use std::collections::HashMap; - -use common_datavalues::prelude::*; -use common_exception::Result; -use common_functions::scalars::Monotonicity; -use common_legacy_expression::*; - -struct Test { - name: &'static str, - expr: LegacyExpression, - column: &'static str, - left: Option, - right: Option, - expect_mono: Monotonicity, -} - -fn create_f64(d: f64) -> Option { - let data_field = DataField::new("x", f64::to_data_type()); - let col = data_field - .data_type() - .create_constant_column(&DataValue::Float64(d), 1) - .unwrap(); - Some(ColumnWithField::new(col, data_field)) -} - -fn create_u8(d: u8) -> Option { - let data_field = DataField::new("x", u8::to_data_type()); - let col = data_field - .data_type() - .create_constant_column(&DataValue::UInt64(d as u64), 1) - .unwrap(); - - Some(ColumnWithField::new(col, data_field)) -} - -fn create_datetime(d: i64) -> Option { - let data_field = DataField::new("x", TimestampType::new_impl()); - let col = data_field - .data_type() - .create_constant_column(&DataValue::Int64(d), 1) - .unwrap(); - - Some(ColumnWithField::new(col, data_field)) -} - -fn verify_test(t: Test) -> Result<()> { - let schema = DataSchemaRefExt::create(vec![ - DataField::new("x", f64::to_data_type()), - DataField::new("y", i64::to_data_type()), - DataField::new("z", TimestampType::new_impl()), - ]); - - let mut variables = HashMap::new(); - variables.insert(t.column.to_string(), (t.left.clone(), t.right.clone())); - - let mut single_point = false; - if t.left.is_some() && t.right.is_some() { - let left = t.left.unwrap().column().get_checked(0)?; - let right = t.right.unwrap().column().get_checked(0)?; - if left == right { - single_point = true; - } - } - - let mono = - ExpressionMonotonicityVisitor::check_expression(schema, &t.expr, variables, single_point); - - assert_eq!( - mono.is_monotonic, t.expect_mono.is_monotonic, - "{} is_monotonic", - t.name - ); - assert_eq!( - mono.is_constant, t.expect_mono.is_constant, - "{} is_constant", - t.name - ); - - if t.expect_mono.is_monotonic { - assert_eq!( - mono.is_positive, t.expect_mono.is_positive, - "{} is_positive", - t.name - ); - } - - if t.expect_mono.is_monotonic || t.expect_mono.is_constant { - let left = mono.left; - let right = mono.right; - - let expected_left = t.expect_mono.left; - let expected_right = t.expect_mono.right; - - if expected_left.is_none() { - assert!(left.is_none(), "{} left", t.name); - } else { - let left_val = left.unwrap().column().get_checked(0)?; - let expected_left_val = expected_left.unwrap().column().get_checked(0)?; - assert!(left_val == expected_left_val, "{}", t.name); - } - - if expected_right.is_none() { - assert!(right.is_none(), "{} right", t.name); - } else { - let right_val = right.unwrap().column().get_checked(0)?; - let expected_right_val = expected_right.unwrap().column().get_checked(0)?; - assert!(right_val == expected_right_val, "{}", t.name); - } - } - Ok(()) -} - -#[test] -fn test_arithmetic_plus_minus() -> Result<()> { - let test_suite = vec![ - Test { - name: "f(x) = x + 12", - expr: add(col("x"), lit(12i32)), - column: "x", - left: None, - right: None, - expect_mono: Monotonicity { - is_monotonic: true, - is_positive: true, - is_constant: false, - left: None, - right: None, - }, - }, - Test { - name: "f(x) = -x + 12", - expr: add(neg(col("x")), lit(12)), - column: "x", - left: None, - right: None, - expect_mono: Monotonicity { - is_monotonic: true, - is_positive: false, - is_constant: false, - left: None, - right: None, - }, - }, - Test { - // Cannot find the column name 'y'. - name: "f(x,y) = x + y", - expr: add(col("x"), col("y")), - column: "x", - left: None, - right: None, - expect_mono: Monotonicity { - is_monotonic: false, - is_positive: true, - is_constant: false, - left: None, - right: None, - }, - }, - Test { - name: "f(x) = (-x + 12) - x + (1 - x)", - expr: add( - sub(add(neg(col("x")), lit(12)), col("x")), - sub(lit(1), col("x")), - ), - column: "x", - left: None, - right: None, - expect_mono: Monotonicity { - is_monotonic: true, - is_positive: false, - is_constant: false, - left: None, - right: None, - }, - }, - Test { - // Function '-' is not monotonic in the variables range. - name: "f(x) = (x + 12) - x + (1 - x)", - expr: add(sub(add(col("x"), lit(12)), col("x")), sub(lit(1), col("x"))), - column: "x", - left: None, - right: None, - expect_mono: Monotonicity::default(), - }, - ]; - - for t in test_suite.into_iter() { - verify_test(t)?; - } - Ok(()) -} - -#[test] -fn test_arithmetic_mul_div() -> Result<()> { - let test_suite = vec![ - Test { - name: "f(x) = -5 * x", - expr: LegacyExpression::create_binary_expression("*", vec![lit(-5_i8), col("x")]), - column: "x", - left: None, - right: None, - expect_mono: Monotonicity { - is_monotonic: true, - is_positive: false, - is_constant: false, - left: None, - right: None, - }, - }, - Test { - name: "f(x) = -1/x", - expr: LegacyExpression::create_binary_expression("/", vec![lit(-1_i8), col("x")]), - column: "x", - left: create_f64(5.0), - right: create_f64(10.0), - expect_mono: Monotonicity { - is_monotonic: false, - is_positive: true, - is_constant: false, - left: create_f64(-0.2), - right: create_f64(-0.1), - }, - }, - Test { - name: "f(x) = x/10", - expr: LegacyExpression::create_binary_expression("/", vec![col("x"), lit(10_i8)]), - column: "x", - left: None, - right: None, - expect_mono: Monotonicity { - is_monotonic: false, - is_positive: true, - is_constant: false, - left: None, - right: None, - }, - }, - Test { - // Function '*' is not monotonic in the variables range. - name: "f(x) = x * (x-12) where x in [10-1000]", - expr: LegacyExpression::create_binary_expression("*", vec![ - col("x"), - sub(col("x"), lit(12_i64)), - ]), - column: "x", - left: create_f64(10.0), - right: create_f64(1000.0), - expect_mono: Monotonicity::default(), - }, - Test { - name: "f(x) = x * (x-12) where x in [12, 100]", - expr: LegacyExpression::create_binary_expression("*", vec![ - col("x"), - sub(col("x"), lit(12_i64)), - ]), - column: "x", - left: create_f64(12.0), - right: create_f64(100.0), - expect_mono: Monotonicity { - is_monotonic: true, - is_positive: true, - is_constant: false, - left: create_f64(0.0), - right: create_f64(8800.0), - }, - }, - Test { - name: "f(x) = x/(1/x) where x >= 1", - expr: LegacyExpression::create_binary_expression("/", vec![ - col("x"), - LegacyExpression::create_binary_expression("/", vec![lit(1_i8), col("x")]), - ]), - column: "x", - left: create_f64(1.0), - right: create_f64(2.0), - expect_mono: Monotonicity { - is_monotonic: false, - is_positive: true, - is_constant: false, - left: create_f64(1.0), - right: create_f64(4.0), - }, - }, - Test { - // Function '/' is not monotonic in the variables range. - name: "f(x) = -x/(2/(x-2)) where x in [0-10]", - expr: LegacyExpression::create_binary_expression("/", vec![ - neg(col("x")), - LegacyExpression::create_binary_expression("/", vec![ - lit(2_i8), - sub(col("x"), lit(2_i8)), - ]), - ]), - column: "x", - left: create_f64(0.0), - right: create_f64(10.0), - expect_mono: Monotonicity::default(), - }, - Test { - name: "f(x) = -x/(2/(x-2)) where x in [4-10]", - expr: LegacyExpression::create_binary_expression("/", vec![ - neg(col("x")), - LegacyExpression::create_binary_expression("/", vec![ - lit(2_i8), - sub(col("x"), lit(2_i8)), - ]), - ]), - column: "x", - left: create_f64(4.0), - right: create_f64(10.0), - expect_mono: Monotonicity { - is_monotonic: false, - is_positive: false, - is_constant: false, - left: create_f64(-4.0), - right: create_f64(-40.0), - }, - }, - ]; - - for t in test_suite.into_iter() { - verify_test(t)?; - } - Ok(()) -} - -#[test] -fn test_abs_function() -> Result<()> { - let test_suite = vec![ - Test { - // Function 'abs' is not monotonic in the variables range. - name: "f(x) = abs(x + 12)", - expr: LegacyExpression::create_scalar_function("abs", vec![add(col("x"), lit(12i32))]), - column: "x", - left: None, - right: None, - expect_mono: Monotonicity::default(), - }, - Test { - name: "f(x) = abs(x) where 0 <= x <= 10", - expr: LegacyExpression::create_scalar_function("abs", vec![col("x")]), - column: "x", - left: create_f64(0.0), - right: create_f64(10.0), - expect_mono: Monotonicity { - is_monotonic: true, - is_positive: true, - is_constant: false, - left: create_f64(0.0), - right: create_f64(10.0), - }, - }, - Test { - name: "f(x) = abs(x) where -10 <= x <= -2", - expr: LegacyExpression::create_scalar_function("abs", vec![col("x")]), - column: "x", - left: create_f64(-10.0), - right: create_f64(-2.0), - expect_mono: Monotonicity { - is_monotonic: true, - is_positive: false, - is_constant: false, - left: create_f64(10.0), - right: create_f64(2.0), - }, - }, - Test { - // Function 'abs' is not monotonic in the variables range. - name: "f(x) = abs(x) where -5 <= x <= 5", - expr: LegacyExpression::create_scalar_function("abs", vec![col("x")]), - column: "x", - left: create_f64(-5.0), - right: create_f64(5.0), - expect_mono: Monotonicity::default(), - }, - Test { - name: "f(x) = abs(x + 12) where -12 <= x <= 1000", - expr: LegacyExpression::create_scalar_function("abs", vec![add(col("x"), lit(12i32))]), - column: "x", - left: create_f64(-12.0), - right: create_f64(1000.0), - expect_mono: Monotonicity { - is_monotonic: true, - is_positive: true, - is_constant: false, - left: create_f64(0.0), - right: create_f64(1012.0), - }, - }, - Test { - // Function 'abs' is not monotonic in the variables range. - name: "f(x) = abs(x + 12) where -14 <= x <= 20", - expr: LegacyExpression::create_scalar_function("abs", vec![add(col("x"), lit(12i32))]), - column: "x", - left: create_f64(-14.0), - right: create_f64(20.0), - expect_mono: Monotonicity::default(), - }, - Test { - name: "f(x) = abs( (x - 7) + (x - 3) ) where 5 <= x <= 100", - expr: LegacyExpression::create_scalar_function("abs", vec![add( - sub(col("x"), lit(7_i32)), - sub(col("x"), lit(3_i32)), - )]), - column: "x", - left: create_f64(5.0), - right: create_f64(100.0), - expect_mono: Monotonicity { - is_monotonic: true, - is_positive: true, - is_constant: false, - left: create_f64(0.0), - right: create_f64(190.0), - }, - }, - Test { - name: "f(x) = abs( (-x + 8) - x) where -100 <= x <= 4", - expr: LegacyExpression::create_scalar_function("abs", vec![sub( - add(neg(col("x")), lit(8)), - col("x"), - )]), - column: "x", - left: create_f64(-100.0), - right: create_f64(4.0), - expect_mono: Monotonicity { - is_monotonic: true, - is_positive: false, - is_constant: false, - left: create_f64(208.0), - right: create_f64(0.0), - }, - }, - ]; - - for t in test_suite.into_iter() { - verify_test(t)?; - } - Ok(()) -} - -#[test] -fn test_dates_function() -> Result<()> { - let test_suite = vec![ - Test { - name: "f(x) = to_start_of_week(z+12)", - expr: LegacyExpression::create_scalar_function("to_start_of_week", vec![add( - col("z"), - lit(12i32), - )]), - column: "z", - left: None, - right: None, - expect_mono: Monotonicity { - is_monotonic: true, - is_positive: true, - is_constant: false, - left: None, - right: None, - }, - }, - Test { - name: "f(x) = to_monday(x)", - expr: LegacyExpression::create_scalar_function("to_monday", vec![col("x")]), - column: "x", - left: None, - right: None, - expect_mono: Monotonicity { - is_monotonic: true, - is_positive: true, - is_constant: false, - left: None, - right: None, - }, - }, - Test { - // Function 'to_second' is not monotonic in the variables range. - name: "f(x) = to_second(x)", - expr: LegacyExpression::create_scalar_function("to_second", vec![col("x")]), - column: "x", - left: None, - right: None, - expect_mono: Monotonicity::default(), - }, - Test { - name: "f(z) = to_second(z)", - expr: LegacyExpression::create_scalar_function("to_second", vec![col("z")]), - column: "z", - left: create_datetime(1638288000000000), - right: create_datetime(1638288059000000), - expect_mono: Monotonicity { - is_monotonic: true, - is_positive: true, - is_constant: false, - left: create_u8(0), - right: create_u8(59), - }, - }, - Test { - // Function 'to_day_of_year' is not monotonic in the variables range. - name: "f(z) = to_day_of_year(z)", - expr: LegacyExpression::create_scalar_function("to_day_of_year", vec![col("z")]), - column: "z", - left: create_datetime(1606752119000000), - right: create_datetime(1638288059000000), - expect_mono: Monotonicity::default(), - }, - Test { - name: "f(z) = to_start_of_hour(z)", - expr: LegacyExpression::create_scalar_function("to_start_of_hour", vec![col("z")]), - column: "z", - left: None, - right: None, - expect_mono: Monotonicity { - is_monotonic: true, - is_positive: true, - is_constant: false, - left: None, - right: None, - }, - }, - ]; - - for t in test_suite.into_iter() { - verify_test(t)?; - } - Ok(()) -} - -#[test] -fn test_single_point() -> Result<()> { - let test_suite = vec![ - Test { - // Function 'rand' is not monotonic in the variables range. - name: "f(x) = x + rand()", - expr: add( - col("x"), - LegacyExpression::create_scalar_function("rand", vec![]), - ), - column: "x", - left: create_f64(1.0), - right: create_f64(1.0), - expect_mono: Monotonicity::default(), - }, - Test { - name: "f(x) = x * (12 - x)", - expr: LegacyExpression::create_binary_expression("*", vec![ - col("x"), - sub(lit(12_i64), col("x")), - ]), - column: "x", - left: create_f64(1.0), - right: create_f64(1.0), - expect_mono: Monotonicity { - is_monotonic: true, - is_positive: true, - is_constant: true, - left: create_f64(11.0), - right: create_f64(11.0), - }, - }, - ]; - - for t in test_suite.into_iter() { - verify_test(t)?; - } - Ok(()) -} diff --git a/src/query/service/tests/it/storages/index/range_filter.rs b/src/query/service/tests/it/storages/index/range_filter.rs index aacd80b7d930..6b95d2f35506 100644 --- a/src/query/service/tests/it/storages/index/range_filter.rs +++ b/src/query/service/tests/it/storages/index/range_filter.rs @@ -134,7 +134,7 @@ async fn test_range_filter() -> Result<()> { }, Test { name: "c not like 'ac%'", - expr: col("c", Vu8::to_data_type()).binary_op("not_like", &lit("ac%".as_bytes()))?, + expr: col("c", Vu8::to_data_type()).binary_op("not like", &lit("ac%".as_bytes()))?, expect: true, error: "", }, @@ -283,28 +283,28 @@ fn test_build_verifiable_function() -> Result<()> { }, Test { name: "c not like 'sys\\%'", - expr: col("c", Vu8::to_data_type()).binary_op("not_like", &lit("sys\\%".as_bytes()))?, + expr: col("c", Vu8::to_data_type()).binary_op("not like", &lit("sys\\%".as_bytes()))?, expect: "((min_c != sys%) or (max_c != sys%))", }, Test { name: "c not like 'sys\\s'", - expr: col("c", Vu8::to_data_type()).binary_op("not_like", &lit("sys\\s".as_bytes()))?, + expr: col("c", Vu8::to_data_type()).binary_op("not like", &lit("sys\\s".as_bytes()))?, expect: "((min_c != sys\\s) or (max_c != sys\\s))", }, Test { name: "c not like 'sys%'", - expr: col("c", Vu8::to_data_type()).binary_op("not_like", &lit("sys%".as_bytes()))?, + expr: col("c", Vu8::to_data_type()).binary_op("not like", &lit("sys%".as_bytes()))?, expect: "((min_c < sys) or (max_c >= syt))", }, Test { name: "c not like 'sys%a'", - expr: col("c", Vu8::to_data_type()).binary_op("not_like", &lit("sys%a".as_bytes()))?, + expr: col("c", Vu8::to_data_type()).binary_op("not like", &lit("sys%a".as_bytes()))?, expect: "true", }, Test { name: "c not like 0xffffff%", expr: col("c", Vu8::to_data_type()) - .binary_op("not_like", &lit(vec![255u8, 255, 255, 37]))?, + .binary_op("not like", &lit(vec![255u8, 255, 255, 37]))?, expect: "(min_c < ffffff)", }, Test { diff --git a/src/query/storages/fuse/src/pruning/pruner.rs b/src/query/storages/fuse/src/pruning/pruner.rs index 3c6d000ef0bd..a82bdb2c6d49 100644 --- a/src/query/storages/fuse/src/pruning/pruner.rs +++ b/src/query/storages/fuse/src/pruning/pruner.rs @@ -125,7 +125,7 @@ pub fn new_filter_pruner( // convert to filter column names let filter_block_cols = point_query_cols .iter() - .map(|n| BlockFilter::build_filter_column_name(&n)) + .map(|n| BlockFilter::build_filter_column_name(n)) .collect(); return Ok(Some(Arc::new(FilterPruner::new( From 96dce85a5de18558269468cf25559ec9bf4a7e75 Mon Sep 17 00:00:00 2001 From: baishen Date: Mon, 31 Oct 2022 00:49:03 +0800 Subject: [PATCH 46/47] refactor(query): fix test --- src/query/planner/src/expression.rs | 47 ++++-- .../tests/it/storages/fuse/statistics.rs | 2 +- .../tests/it/storages/index/range_filter.rs | 2 +- src/query/storages/index/src/range_filter.rs | 141 ++++++++++++------ 4 files changed, 130 insertions(+), 62 deletions(-) diff --git a/src/query/planner/src/expression.rs b/src/query/planner/src/expression.rs index 8acfdd536d5e..14b0229a820d 100644 --- a/src/query/planner/src/expression.rs +++ b/src/query/planner/src/expression.rs @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +use std::fmt::Debug; use std::fmt::Display; use std::fmt::Formatter; @@ -21,7 +22,7 @@ use common_datavalues::DataTypeImpl; use common_datavalues::DataValue; /// Serializable and desugared representation of `Scalar`. -#[derive(Clone, Debug, Eq, PartialEq, serde::Serialize, serde::Deserialize)] +#[derive(Clone, Eq, PartialEq, serde::Serialize, serde::Deserialize)] pub enum Expression { IndexedVariable { name: String, @@ -64,16 +65,10 @@ impl Expression { match self { Expression::Constant { value, .. } => common_datavalues::format_datavalue_sql(value), Expression::Function { name, args, .. } => match name.as_str() { - "+" | "-" | "*" | "/" | "%" if args.len() == 2 => { - format!( - "({})", - args.iter() - .map(|arg| arg.column_name()) - .collect::>() - .join(name) - ) - } - ">=" | "<=" | "=" | ">" | "<" | "or" | "and" if args.len() == 2 => { + "+" | "-" | "*" | "/" | "%" | ">=" | "<=" | "=" | "!=" | ">" | "<" | "or" + | "and" + if args.len() == 2 => + { format!( "({} {} {})", args[0].column_name(), @@ -120,3 +115,33 @@ impl Display for Expression { } } } + +impl Debug for Expression { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + match &self { + Expression::Constant { value, .. } => write!(f, "{:#}", value), + Expression::Function { name, args, .. } => match name.as_str() { + "+" | "-" | "*" | "/" | "%" | ">=" | "<=" | "=" | "!=" | ">" | "<" | "or" + | "and" + if args.len() == 2 => + { + write!(f, "({:?} {} {:?})", args[0], name, args[1]) + } + _ => { + write!(f, "{}(", name)?; + for (i, _) in args.iter().enumerate() { + if i > 0 { + write!(f, ", ")?; + } + write!(f, "{:?}", args[i])?; + } + write!(f, ")") + } + }, + Expression::Cast { input, target } => { + write!(f, "cast({:?} as {})", input, format_data_type_sql(target)) + } + Expression::IndexedVariable { name, .. } => write!(f, "{:#}", name), + } + } +} diff --git a/src/query/service/tests/it/storages/fuse/statistics.rs b/src/query/service/tests/it/storages/fuse/statistics.rs index 566f0874f73e..91c0cf1608a5 100644 --- a/src/query/service/tests/it/storages/fuse/statistics.rs +++ b/src/query/service/tests/it/storages/fuse/statistics.rs @@ -217,7 +217,7 @@ async fn test_ft_cluster_stats_with_stats() -> common_exception::Result<()> { assert_eq!(vec![DataValue::Int64(3)], stats.max); // add expression executor. - let expr = add(col("a", i64::to_data_type()), lit(1)); + let expr = add(col("a", i32::to_data_type()), lit(1)); let eval_node = Evaluator::eval_expression(&expr, &schema)?; let func_ctx = FunctionContext::default(); let result = eval_node.eval(&func_ctx, &blocks)?; diff --git a/src/query/service/tests/it/storages/index/range_filter.rs b/src/query/service/tests/it/storages/index/range_filter.rs index 6b95d2f35506..b2babfb038a6 100644 --- a/src/query/service/tests/it/storages/index/range_filter.rs +++ b/src/query/service/tests/it/storages/index/range_filter.rs @@ -241,7 +241,7 @@ fn test_build_verifiable_function() -> Result<()> { expr: lit(1) .gt(&neg(col("a", i64::to_data_type())))? .or(&lit(3).gt_eq(&col("b", i32::to_data_type()))?)?, - expect: "((min_(negate a) < 1) or (min_b <= 3))", + expect: "((min_negate(a) < 1) or (min_b <= 3))", }, Test { name: "a = 1 and b != 3", diff --git a/src/query/storages/index/src/range_filter.rs b/src/query/storages/index/src/range_filter.rs index 431ce2f292fb..f4756af9fd85 100644 --- a/src/query/storages/index/src/range_filter.rs +++ b/src/query/storages/index/src/range_filter.rs @@ -139,24 +139,27 @@ pub fn build_verifiable_expr( data_type: bool::to_data_type(), }; - // TODO: Try to convert `not(is_not_null)` to `is_null`. let (exprs, op) = match expr { Expression::Constant { .. } => return expr.clone(), - Expression::Function { name, args, .. } if args.len() == 2 => { - let left = &args[0]; - let right = &args[1]; - match name.to_lowercase().as_str() { - "and" => { - let left = build_verifiable_expr(left, schema, stat_columns); - let right = build_verifiable_expr(right, schema, stat_columns); - return left.and(&right).unwrap(); - } - "or" => { - let left = build_verifiable_expr(left, schema, stat_columns); - let right = build_verifiable_expr(right, schema, stat_columns); - return left.or(&right).unwrap(); + Expression::Function { name, args, .. } => { + if args.len() == 2 { + let left = &args[0]; + let right = &args[1]; + match name.to_lowercase().as_str() { + "and" => { + let left = build_verifiable_expr(left, schema, stat_columns); + let right = build_verifiable_expr(right, schema, stat_columns); + return left.and(&right).unwrap(); + } + "or" => { + let left = build_verifiable_expr(left, schema, stat_columns); + let right = build_verifiable_expr(right, schema, stat_columns); + return left.or(&right).unwrap(); + } + _ => (vec![left.clone(), right.clone()], name.clone()), } - _ => (vec![left.clone(), right.clone()], name.clone()), + } else { + try_convert_is_null(name.to_lowercase().as_str(), args.clone()) } } _ => return unhandled, @@ -180,6 +183,25 @@ fn inverse_operator(op: &str) -> Result<&str> { } } +/// Try to convert `not(is_not_null)` to `is_null`. +fn try_convert_is_null(name: &str, args: Vec) -> (Vec, String) { + // `is null` will be converted to `not(is not null)` in the parser. + // we should convert it back to `is null` here. + if name == "not" && args.len() == 1 { + if let Expression::Function { + name: inner_name, + args: inner_args, + .. + } = &args[0] + { + if inner_name == "is_not_null" { + return (inner_args.clone(), String::from("is_null")); + } + } + } + (args, String::from(name)) +} + #[derive(Debug, Copy, Clone, PartialEq)] enum StatType { Min, @@ -322,44 +344,65 @@ impl<'a> VerifiableExprBuilder<'a> { ) -> Result { // collect state columns // exprs's len must be 2 - let lhs_cols = RequireColumnsVisitor::collect_columns_from_expr(&exprs[0])?; - let rhs_cols = RequireColumnsVisitor::collect_columns_from_expr(&exprs[1])?; - let (args, cols, op) = match (lhs_cols.len(), rhs_cols.len()) { - (0, 0) => { - return Err(ErrorCode::UnknownException( - "Constant expression donot need to be handled", - )); - } - (_, 0) => (vec![exprs[0].clone(), exprs[1].clone()], vec![lhs_cols], op), - (0, _) => { - let op = inverse_operator(op)?; - (vec![exprs[1].clone(), exprs[0].clone()], vec![rhs_cols], op) - } - _ => { - if !lhs_cols.is_disjoint(&rhs_cols) { - return Err(ErrorCode::UnknownException( - "Unsupported condition for left and right have same columns", - )); + let (args, cols, op) = match exprs.len() { + 1 => { + let cols = RequireColumnsVisitor::collect_columns_from_expr(&exprs[0])?; + match cols.len() { + 1 => (exprs, vec![cols], op), + _ => { + return Err(ErrorCode::UnknownException( + "Multi-column expressions are not currently supported", + )); + } } + } + 2 => { + let lhs_cols = RequireColumnsVisitor::collect_columns_from_expr(&exprs[0])?; + let rhs_cols = RequireColumnsVisitor::collect_columns_from_expr(&exprs[1])?; + + match (lhs_cols.len(), rhs_cols.len()) { + (0, 0) => { + return Err(ErrorCode::UnknownException( + "Constant expression donot need to be handled", + )); + } + (_, 0) => (vec![exprs[0].clone(), exprs[1].clone()], vec![lhs_cols], op), + (0, _) => { + let op = inverse_operator(op)?; + (vec![exprs[1].clone(), exprs[0].clone()], vec![rhs_cols], op) + } + _ => { + if !lhs_cols.is_disjoint(&rhs_cols) { + return Err(ErrorCode::UnknownException( + "Unsupported condition for left and right have same columns", + )); + } - if !matches!(op, "=" | "<" | "<=" | ">" | ">=") { - return Err(ErrorCode::UnknownException(format!( - "Unsupported operator '{:?}' for multi-column expression", - op - ))); - } + if !matches!(op, "=" | "<" | "<=" | ">" | ">=") { + return Err(ErrorCode::UnknownException(format!( + "Unsupported operator '{:?}' for multi-column expression", + op + ))); + } - if !check_maybe_monotonic(&exprs[1])? { - return Err(ErrorCode::UnknownException( - "Only support the monotonic expression", - )); - } + if !check_maybe_monotonic(&exprs[1])? { + return Err(ErrorCode::UnknownException( + "Only support the monotonic expression", + )); + } - ( - vec![exprs[0].clone(), exprs[1].clone()], - vec![lhs_cols, rhs_cols], - op, - ) + ( + vec![exprs[0].clone(), exprs[1].clone()], + vec![lhs_cols, rhs_cols], + op, + ) + } + } + } + _ => { + return Err(ErrorCode::UnknownException( + "Expressions with more than two args are not currently supported", + )); } }; From aacb4cd9a8c79b0d55c7d633f3bca5d44aab6650 Mon Sep 17 00:00:00 2001 From: sundy-li <543950155@qq.com> Date: Mon, 31 Oct 2022 08:26:26 +0800 Subject: [PATCH 47/47] refactor(query): fix tests --- .../datavalues/src/columns/primitive/primitive.rs | 14 +++++++++++++- .../issues/rigger/division_by_zero_error.test | 2 +- 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/src/query/datavalues/src/columns/primitive/primitive.rs b/src/query/datavalues/src/columns/primitive/primitive.rs index 5617d45f9847..386d32ffd3e6 100644 --- a/src/query/datavalues/src/columns/primitive/primitive.rs +++ b/src/query/datavalues/src/columns/primitive/primitive.rs @@ -87,10 +87,17 @@ impl PrimitiveColumn { // for all the timestamp column we will cast to int64 with microsecond precision ArrowDataType::Timestamp(x, _) => { let p = convert_precision_to_micros(x); + + if p == (1, 1) { + let array = array.as_any().downcast_ref::>().unwrap(); + return Self::new(array.clone()); + } + let array = array .as_any() .downcast_ref::>() .expect("primitive cast should be ok"); + let array = unary(array, |x| x * p.0 / p.1, expected_arrow); Self::from_arrow_array(&array) @@ -104,11 +111,16 @@ impl PrimitiveColumn { .expect("primitive cast should be ok"); let array = unary(array, |x| x as i64 * p.0 / p.1, expected_arrow); - Self::from_arrow_array(&array) } ArrowDataType::Time64(x) => { let p = convert_precision_to_micros(x); + + if p == (1, 1) { + let array = array.as_any().downcast_ref::>().unwrap(); + return Self::new(array.clone()); + } + let array = array .as_any() .downcast_ref::>() diff --git a/tests/logictest/suites/duckdb/issues/rigger/division_by_zero_error.test b/tests/logictest/suites/duckdb/issues/rigger/division_by_zero_error.test index b83c5849a7cc..1af7db256ca4 100644 --- a/tests/logictest/suites/duckdb/issues/rigger/division_by_zero_error.test +++ b/tests/logictest/suites/duckdb/issues/rigger/division_by_zero_error.test @@ -16,7 +16,7 @@ INSERT INTO t0 VALUES(0.0,NULL); statement error 1006 SELECT t0.c1, (1/t0.c1) IS NULL FROM t0 order by c1; -statement error 1006 +statement ok SELECT t0.c1 FROM t0 WHERE (((1/(t0.c1))) IS NULL) order by c1; ----