From 8777e77ab9c539b8c53a72c4fb8aa8ac0b5123c0 Mon Sep 17 00:00:00 2001 From: coldWater Date: Mon, 8 Jul 2024 14:52:03 +0800 Subject: [PATCH 01/21] feat: Reducing column.clone overhead in transform aggregate by introducing InputColumns (#15991) * input columns Signed-off-by: coldWater * clean up Signed-off-by: coldWater --------- Signed-off-by: coldWater Co-authored-by: sundyli <543950155@qq.com> --- .../src/aggregate/aggregate_function.rs | 107 +++++++++++++++++- .../src/aggregate/aggregate_hashtable.rs | 2 +- .../adaptors/aggregate_null_unary_adaptor.rs | 24 ++-- .../aggregate_null_variadic_adaptor.rs | 23 ++-- .../adaptors/aggregate_ornull_adaptor.rs | 8 +- .../src/aggregates/aggregate_arg_min_max.rs | 8 +- .../src/aggregates/aggregate_array_agg.rs | 7 +- .../src/aggregates/aggregate_array_moving.rs | 13 ++- .../src/aggregates/aggregate_bitmap.rs | 21 ++-- .../aggregate_combinator_distinct.rs | 29 +++-- .../src/aggregates/aggregate_combinator_if.rs | 19 ++-- .../aggregates/aggregate_combinator_state.rs | 8 +- .../src/aggregates/aggregate_count.rs | 7 +- .../src/aggregates/aggregate_covariance.rs | 8 +- .../src/aggregates/aggregate_null_result.rs | 8 +- .../aggregates/aggregate_quantile_tdigest.rs | 8 +- .../aggregate_quantile_tdigest_weighted.rs | 8 +- .../src/aggregates/aggregate_retention.rs | 8 +- .../src/aggregates/aggregate_string_agg.rs | 8 +- .../src/aggregates/aggregate_unary.rs | 8 +- .../src/aggregates/aggregate_window_funnel.rs | 8 +- .../src/aggregates/aggregator_common.rs | 3 +- .../functions/tests/it/aggregates/mod.rs | 3 +- .../aggregator/transform_aggregate_partial.rs | 2 +- .../aggregator/transform_single_key.rs | 16 +-- .../transforms/window/transform_window.rs | 2 +- .../transforms/window/window_function.rs | 17 +-- 27 files changed, 241 insertions(+), 142 deletions(-) diff --git a/src/query/expression/src/aggregate/aggregate_function.rs b/src/query/expression/src/aggregate/aggregate_function.rs index b5ab4f99a1856..bb31d2f1afe58 100755 --- a/src/query/expression/src/aggregate/aggregate_function.rs +++ b/src/query/expression/src/aggregate/aggregate_function.rs @@ -14,6 +14,8 @@ use std::alloc::Layout; use std::fmt; +use std::ops::Index; +use std::ops::Range; use std::sync::Arc; use databend_common_arrow::arrow::bitmap::Bitmap; @@ -24,6 +26,7 @@ use crate::types::binary::BinaryColumnBuilder; use crate::types::DataType; use crate::Column; use crate::ColumnBuilder; +use crate::DataBlock; use crate::Scalar; pub type AggregateFunctionRef = Arc; @@ -47,7 +50,7 @@ pub trait AggregateFunction: fmt::Display + Sync + Send { fn accumulate( &self, _place: StateAddr, - _columns: &[Column], + _columns: InputColumns, _validity: Option<&Bitmap>, _input_rows: usize, ) -> Result<()>; @@ -57,7 +60,7 @@ pub trait AggregateFunction: fmt::Display + Sync + Send { &self, places: &[StateAddr], offset: usize, - columns: &[Column], + columns: InputColumns, _input_rows: usize, ) -> Result<()> { for (row, place) in places.iter().enumerate() { @@ -67,7 +70,7 @@ pub trait AggregateFunction: fmt::Display + Sync + Send { } // Used in aggregate_null_adaptor - fn accumulate_row(&self, _place: StateAddr, _columns: &[Column], _row: usize) -> Result<()>; + fn accumulate_row(&self, _place: StateAddr, _columns: InputColumns, _row: usize) -> Result<()>; // serialize the state into binary array fn batch_serialize( @@ -157,7 +160,7 @@ pub trait AggregateFunction: fmt::Display + Sync + Send { Ok(None) } - fn get_if_condition(&self, _columns: &[Column]) -> Option { + fn get_if_condition(&self, _columns: InputColumns) -> Option { None } @@ -166,3 +169,99 @@ pub trait AggregateFunction: fmt::Display + Sync + Send { true } } + +#[derive(Copy, Clone)] +pub enum InputColumns<'a> { + Slice(&'a [Column]), + Block(BlockProxy<'a>), +} + +impl Index for InputColumns<'_> { + type Output = Column; + + fn index(&self, index: usize) -> &Self::Output { + match self { + Self::Slice(slice) => slice.index(index), + Self::Block(BlockProxy { args, data }) => { + data.get_by_offset(args[index]).value.as_column().unwrap() + } + } + } +} + +impl<'a> InputColumns<'a> { + pub fn is_empty(&self) -> bool { + self.len() == 0 + } + + pub fn len(&self) -> usize { + match self { + Self::Slice(s) => s.len(), + Self::Block(BlockProxy { args, .. }) => args.len(), + } + } + + pub fn slice(&self, index: Range) -> InputColumns<'_> { + match self { + Self::Slice(s) => Self::Slice(&s[index]), + Self::Block(BlockProxy { args, data }) => Self::Block(BlockProxy { + args: &args[index], + data, + }), + } + } + + pub fn iter(&self) -> InputColumnsIter { + match self { + Self::Slice(s) => InputColumnsIter { + iter: 0..s.len(), + this: self, + }, + Self::Block(BlockProxy { args, .. }) => InputColumnsIter { + iter: 0..args.len(), + this: self, + }, + } + } + + pub fn new_block_proxy(args: &'a [usize], data: &'a DataBlock) -> InputColumns<'a> { + Self::Block(BlockProxy { args, data }) + } +} + +pub struct InputColumnsIter<'a> { + iter: Range, + this: &'a InputColumns<'a>, +} + +impl<'a> Iterator for InputColumnsIter<'a> { + type Item = &'a Column; + + fn next(&mut self) -> Option { + self.iter.next().map(|index| self.this.index(index)) + } +} + +impl<'a> From<&'a [Column]> for InputColumns<'a> { + fn from(value: &'a [Column]) -> Self { + InputColumns::Slice(value) + } +} + +impl<'a, const N: usize> From<&'a [Column; N]> for InputColumns<'a> { + fn from(value: &'a [Column; N]) -> Self { + InputColumns::Slice(value.as_slice()) + } +} + +impl<'a> From<&'a Vec> for InputColumns<'a> { + fn from(value: &'a Vec) -> Self { + InputColumns::Slice(value) + } +} + +#[derive(Copy, Clone)] +pub struct BlockProxy<'a> { + args: &'a [usize], + data: &'a DataBlock, +} diff --git a/src/query/expression/src/aggregate/aggregate_hashtable.rs b/src/query/expression/src/aggregate/aggregate_hashtable.rs index 3f118cd8ac092..7e1873b092680 100644 --- a/src/query/expression/src/aggregate/aggregate_hashtable.rs +++ b/src/query/expression/src/aggregate/aggregate_hashtable.rs @@ -205,7 +205,7 @@ impl AggregateHashTable { .zip(params.iter()) .zip(self.payload.state_addr_offsets.iter()) { - aggr.accumulate_keys(state_places, *addr_offset, params, row_count)?; + aggr.accumulate_keys(state_places, *addr_offset, params.into(), row_count)?; } } else { for ((aggr, agg_state), addr_offset) in self diff --git a/src/query/functions/src/aggregates/adaptors/aggregate_null_unary_adaptor.rs b/src/query/functions/src/aggregates/adaptors/aggregate_null_unary_adaptor.rs index a36e27a6caba6..609a0c7a9a146 100644 --- a/src/query/functions/src/aggregates/adaptors/aggregate_null_unary_adaptor.rs +++ b/src/query/functions/src/aggregates/adaptors/aggregate_null_unary_adaptor.rs @@ -20,8 +20,8 @@ use databend_common_arrow::arrow::bitmap::Bitmap; use databend_common_exception::Result; use databend_common_expression::types::DataType; use databend_common_expression::utils::column_merge_validity; -use databend_common_expression::Column; use databend_common_expression::ColumnBuilder; +use databend_common_expression::InputColumns; use databend_common_io::prelude::BinaryWrite; use crate::aggregates::AggregateFunction; @@ -109,16 +109,16 @@ impl AggregateFunction for AggregateNullUnaryAdapto fn accumulate( &self, place: StateAddr, - columns: &[Column], + columns: InputColumns, validity: Option<&Bitmap>, input_rows: usize, ) -> Result<()> { let col = &columns[0]; let validity = column_merge_validity(col, validity.cloned()); - let not_null_column = col.remove_nullable(); - + let not_null_column = &[col.remove_nullable()]; + let not_null_column = not_null_column.into(); self.nested - .accumulate(place, &[not_null_column], validity.as_ref(), input_rows)?; + .accumulate(place, not_null_column, validity.as_ref(), input_rows)?; if validity .as_ref() @@ -135,13 +135,13 @@ impl AggregateFunction for AggregateNullUnaryAdapto &self, places: &[StateAddr], offset: usize, - columns: &[Column], + columns: InputColumns, input_rows: usize, ) -> Result<()> { let col = &columns[0]; let validity = column_merge_validity(col, None); - let not_null_columns = vec![col.remove_nullable()]; - let not_null_columns = ¬_null_columns; + let not_null_columns = &[col.remove_nullable()]; + let not_null_columns = not_null_columns.into(); match validity { Some(v) if v.unset_bits() > 0 => { @@ -170,11 +170,11 @@ impl AggregateFunction for AggregateNullUnaryAdapto Ok(()) } - fn accumulate_row(&self, place: StateAddr, columns: &[Column], row: usize) -> Result<()> { + fn accumulate_row(&self, place: StateAddr, columns: InputColumns, row: usize) -> Result<()> { let col = &columns[0]; let validity = column_merge_validity(col, None); - let not_null_columns = vec![col.remove_nullable()]; - let not_null_columns = ¬_null_columns; + let not_null_columns = &[col.remove_nullable()]; + let not_null_columns = not_null_columns.into(); match validity { Some(v) if v.unset_bits() > 0 => { @@ -270,7 +270,7 @@ impl AggregateFunction for AggregateNullUnaryAdapto self.nested.convert_const_to_full() } - fn get_if_condition(&self, columns: &[Column]) -> Option { + fn get_if_condition(&self, columns: InputColumns) -> Option { self.nested.get_if_condition(columns) } } diff --git a/src/query/functions/src/aggregates/adaptors/aggregate_null_variadic_adaptor.rs b/src/query/functions/src/aggregates/adaptors/aggregate_null_variadic_adaptor.rs index 3ff84082a9a6c..b02b2fdff89b1 100644 --- a/src/query/functions/src/aggregates/adaptors/aggregate_null_variadic_adaptor.rs +++ b/src/query/functions/src/aggregates/adaptors/aggregate_null_variadic_adaptor.rs @@ -20,8 +20,8 @@ use databend_common_arrow::arrow::bitmap::Bitmap; use databend_common_exception::Result; use databend_common_expression::types::DataType; use databend_common_expression::utils::column_merge_validity; -use databend_common_expression::Column; use databend_common_expression::ColumnBuilder; +use databend_common_expression::InputColumns; use databend_common_io::prelude::BinaryWrite; use crate::aggregates::AggregateFunction; @@ -110,7 +110,7 @@ impl AggregateFunction fn accumulate( &self, place: StateAddr, - columns: &[Column], + columns: InputColumns, validity: Option<&Bitmap>, input_rows: usize, ) -> Result<()> { @@ -120,9 +120,10 @@ impl AggregateFunction validity = column_merge_validity(col, validity); not_null_columns.push(col.remove_nullable()); } + let not_null_columns = (¬_null_columns).into(); self.nested - .accumulate(place, ¬_null_columns, validity.as_ref(), input_rows)?; + .accumulate(place, not_null_columns, validity.as_ref(), input_rows)?; if validity .as_ref() @@ -138,7 +139,7 @@ impl AggregateFunction &self, places: &[StateAddr], offset: usize, - columns: &[Column], + columns: InputColumns, input_rows: usize, ) -> Result<()> { let mut not_null_columns = Vec::with_capacity(columns.len()); @@ -147,6 +148,7 @@ impl AggregateFunction validity = column_merge_validity(col, validity); not_null_columns.push(col.remove_nullable()); } + let not_null_columns = (¬_null_columns).into(); match validity { Some(v) if v.unset_bits() > 0 => { @@ -158,13 +160,13 @@ impl AggregateFunction if valid { self.set_flag(place.next(offset), 1); self.nested - .accumulate_row(place.next(offset), ¬_null_columns, row)?; + .accumulate_row(place.next(offset), not_null_columns, row)?; } } } _ => { self.nested - .accumulate_keys(places, offset, ¬_null_columns, input_rows)?; + .accumulate_keys(places, offset, not_null_columns, input_rows)?; places .iter() .for_each(|place| self.set_flag(place.next(offset), 1)); @@ -173,13 +175,14 @@ impl AggregateFunction Ok(()) } - fn accumulate_row(&self, place: StateAddr, columns: &[Column], row: usize) -> Result<()> { + fn accumulate_row(&self, place: StateAddr, columns: InputColumns, row: usize) -> Result<()> { let mut not_null_columns = Vec::with_capacity(columns.len()); let mut validity = None; for col in columns.iter() { validity = column_merge_validity(col, validity); not_null_columns.push(col.remove_nullable()); } + let not_null_columns = (¬_null_columns).into(); match validity { Some(v) if v.unset_bits() > 0 => { @@ -190,11 +193,11 @@ impl AggregateFunction if unsafe { v.get_bit_unchecked(row) } { self.set_flag(place, 1); - self.nested.accumulate_row(place, ¬_null_columns, row)?; + self.nested.accumulate_row(place, not_null_columns, row)?; } } _ => { - self.nested.accumulate_row(place, ¬_null_columns, row)?; + self.nested.accumulate_row(place, not_null_columns, row)?; self.set_flag(place, 1); } } @@ -273,7 +276,7 @@ impl AggregateFunction self.nested.convert_const_to_full() } - fn get_if_condition(&self, columns: &[Column]) -> Option { + fn get_if_condition(&self, columns: InputColumns) -> Option { self.nested.get_if_condition(columns) } } diff --git a/src/query/functions/src/aggregates/adaptors/aggregate_ornull_adaptor.rs b/src/query/functions/src/aggregates/adaptors/aggregate_ornull_adaptor.rs index 8333689fb8d3d..95dcdd0a6d561 100644 --- a/src/query/functions/src/aggregates/adaptors/aggregate_ornull_adaptor.rs +++ b/src/query/functions/src/aggregates/adaptors/aggregate_ornull_adaptor.rs @@ -19,8 +19,8 @@ use std::sync::Arc; use databend_common_arrow::arrow::bitmap::Bitmap; use databend_common_exception::Result; use databend_common_expression::types::DataType; -use databend_common_expression::Column; use databend_common_expression::ColumnBuilder; +use databend_common_expression::InputColumns; use databend_common_expression::Scalar; use databend_common_io::prelude::BinaryWrite; @@ -101,7 +101,7 @@ impl AggregateFunction for AggregateFunctionOrNullAdaptor { fn accumulate( &self, place: StateAddr, - columns: &[Column], + columns: InputColumns, validity: Option<&Bitmap>, input_rows: usize, ) -> Result<()> { @@ -134,7 +134,7 @@ impl AggregateFunction for AggregateFunctionOrNullAdaptor { &self, places: &[StateAddr], offset: usize, - columns: &[Column], + columns: InputColumns, input_rows: usize, ) -> Result<()> { self.inner @@ -165,7 +165,7 @@ impl AggregateFunction for AggregateFunctionOrNullAdaptor { } #[inline] - fn accumulate_row(&self, place: StateAddr, columns: &[Column], row: usize) -> Result<()> { + fn accumulate_row(&self, place: StateAddr, columns: InputColumns, row: usize) -> Result<()> { self.inner.accumulate_row(place, columns, row)?; self.set_flag(place, 1); Ok(()) diff --git a/src/query/functions/src/aggregates/aggregate_arg_min_max.rs b/src/query/functions/src/aggregates/aggregate_arg_min_max.rs index e671b1a74a216..ce14a28d9b5d1 100644 --- a/src/query/functions/src/aggregates/aggregate_arg_min_max.rs +++ b/src/query/functions/src/aggregates/aggregate_arg_min_max.rs @@ -25,8 +25,8 @@ use databend_common_exception::Result; use databend_common_expression::types::number::*; use databend_common_expression::types::*; use databend_common_expression::with_number_mapped_type; -use databend_common_expression::Column; use databend_common_expression::ColumnBuilder; +use databend_common_expression::InputColumns; use databend_common_expression::Scalar; use super::aggregate_function_factory::AggregateFunctionDescription; @@ -232,7 +232,7 @@ where fn accumulate( &self, place: StateAddr, - columns: &[Column], + columns: InputColumns, validity: Option<&Bitmap>, _input_rows: usize, ) -> Result<()> { @@ -246,7 +246,7 @@ where &self, places: &[StateAddr], offset: usize, - columns: &[Column], + columns: InputColumns, _input_rows: usize, ) -> Result<()> { let arg_col = A::try_downcast_column(&columns[0]).unwrap(); @@ -268,7 +268,7 @@ where Ok(()) } - fn accumulate_row(&self, place: StateAddr, columns: &[Column], row: usize) -> Result<()> { + fn accumulate_row(&self, place: StateAddr, columns: InputColumns, row: usize) -> Result<()> { let arg_col = A::try_downcast_column(&columns[0]).unwrap(); let val_col = V::try_downcast_column(&columns[1]).unwrap(); let state = place.get::(); diff --git a/src/query/functions/src/aggregates/aggregate_array_agg.rs b/src/query/functions/src/aggregates/aggregate_array_agg.rs index 10c29ee104890..5596b16fadb78 100644 --- a/src/query/functions/src/aggregates/aggregate_array_agg.rs +++ b/src/query/functions/src/aggregates/aggregate_array_agg.rs @@ -29,6 +29,7 @@ use databend_common_expression::types::*; use databend_common_expression::with_number_mapped_type; use databend_common_expression::Column; use databend_common_expression::ColumnBuilder; +use databend_common_expression::InputColumns; use databend_common_expression::Scalar; use databend_common_expression::ScalarRef; use ethnum::i256; @@ -256,7 +257,7 @@ where fn accumulate( &self, place: StateAddr, - columns: &[Column], + columns: InputColumns, _validity: Option<&Bitmap>, _input_rows: usize, ) -> Result<()> { @@ -277,7 +278,7 @@ where &self, places: &[StateAddr], offset: usize, - columns: &[Column], + columns: InputColumns, _input_rows: usize, ) -> Result<()> { match &columns[0] { @@ -310,7 +311,7 @@ where Ok(()) } - fn accumulate_row(&self, place: StateAddr, columns: &[Column], row: usize) -> Result<()> { + fn accumulate_row(&self, place: StateAddr, columns: InputColumns, row: usize) -> Result<()> { let state = place.get::(); match &columns[0] { Column::Nullable(box nullable_column) => { diff --git a/src/query/functions/src/aggregates/aggregate_array_moving.rs b/src/query/functions/src/aggregates/aggregate_array_moving.rs index fcaa66b70dad3..3ca323a355362 100644 --- a/src/query/functions/src/aggregates/aggregate_array_moving.rs +++ b/src/query/functions/src/aggregates/aggregate_array_moving.rs @@ -41,6 +41,7 @@ use databend_common_expression::Column; use databend_common_expression::ColumnBuilder; use databend_common_expression::Expr; use databend_common_expression::FunctionContext; +use databend_common_expression::InputColumns; use databend_common_expression::Scalar; use databend_common_expression::ScalarRef; use ethnum::i256; @@ -413,7 +414,7 @@ where State: SumState fn accumulate( &self, place: StateAddr, - columns: &[Column], + columns: InputColumns, validity: Option<&Bitmap>, _input_rows: usize, ) -> Result<()> { @@ -425,13 +426,13 @@ where State: SumState &self, places: &[StateAddr], offset: usize, - columns: &[Column], + columns: InputColumns, _input_rows: usize, ) -> Result<()> { State::accumulate_keys(places, offset, &columns[0]) } - fn accumulate_row(&self, place: StateAddr, columns: &[Column], row: usize) -> Result<()> { + fn accumulate_row(&self, place: StateAddr, columns: InputColumns, row: usize) -> Result<()> { let state = place.get::(); state.accumulate_row(&columns[0], row) } @@ -607,7 +608,7 @@ where State: SumState fn accumulate( &self, place: StateAddr, - columns: &[Column], + columns: InputColumns, validity: Option<&Bitmap>, _input_rows: usize, ) -> Result<()> { @@ -619,13 +620,13 @@ where State: SumState &self, places: &[StateAddr], offset: usize, - columns: &[Column], + columns: InputColumns, _input_rows: usize, ) -> Result<()> { State::accumulate_keys(places, offset, &columns[0]) } - fn accumulate_row(&self, place: StateAddr, columns: &[Column], row: usize) -> Result<()> { + fn accumulate_row(&self, place: StateAddr, columns: InputColumns, row: usize) -> Result<()> { let state = place.get::(); state.accumulate_row(&columns[0], row) } diff --git a/src/query/functions/src/aggregates/aggregate_bitmap.rs b/src/query/functions/src/aggregates/aggregate_bitmap.rs index 38f7f6bc0165e..c8ef9322ebce1 100644 --- a/src/query/functions/src/aggregates/aggregate_bitmap.rs +++ b/src/query/functions/src/aggregates/aggregate_bitmap.rs @@ -30,10 +30,10 @@ use databend_common_expression::type_check::check_number; use databend_common_expression::types::decimal::DecimalType; use databend_common_expression::types::*; use databend_common_expression::with_number_mapped_type; -use databend_common_expression::Column; use databend_common_expression::ColumnBuilder; use databend_common_expression::Expr; use databend_common_expression::FunctionContext; +use databend_common_expression::InputColumns; use databend_common_expression::Scalar; use databend_common_io::prelude::BinaryWrite; use ethnum::i256; @@ -225,7 +225,7 @@ where fn accumulate( &self, place: StateAddr, - columns: &[Column], + columns: InputColumns, validity: Option<&Bitmap>, _input_rows: usize, ) -> Result<()> { @@ -262,7 +262,7 @@ where &self, places: &[StateAddr], offset: usize, - columns: &[Column], + columns: InputColumns, _input_rows: usize, ) -> Result<()> { let column = BitmapType::try_downcast_column(&columns[0]).unwrap(); @@ -276,7 +276,7 @@ where Ok(()) } - fn accumulate_row(&self, place: StateAddr, columns: &[Column], row: usize) -> Result<()> { + fn accumulate_row(&self, place: StateAddr, columns: InputColumns, row: usize) -> Result<()> { let column = BitmapType::try_downcast_column(&columns[0]).unwrap(); let state = place.get::(); if let Some(data) = BitmapType::index_column(&column, row) { @@ -372,7 +372,7 @@ where Ok(Arc::new(func)) } - fn get_filter_bitmap(&self, columns: &[Column]) -> Bitmap { + fn get_filter_bitmap(&self, columns: InputColumns) -> Bitmap { let filter_col = T::try_downcast_column(&columns[1]).unwrap(); let mut result = MutableBitmap::from_len_zeroed(columns[0].len()); @@ -390,7 +390,7 @@ where Bitmap::from(result) } - fn filter_row(&self, columns: &[Column], row: usize) -> Result { + fn filter_row(&self, columns: InputColumns, row: usize) -> Result { let check_col = T::try_downcast_column(&columns[1]).unwrap(); let check_val_opt = T::index_column(&check_col, row); @@ -443,7 +443,7 @@ where fn accumulate( &self, place: StateAddr, - columns: &[Column], + columns: InputColumns, validity: Option<&Bitmap>, input_rows: usize, ) -> Result<()> { @@ -460,7 +460,7 @@ where &self, places: &[StateAddr], offset: usize, - columns: &[Column], + columns: InputColumns, _input_rows: usize, ) -> Result<()> { let predicate = self.get_filter_bitmap(columns); @@ -470,11 +470,12 @@ where let new_places_slice = new_places.as_slice(); let row_size = predicate.len() - predicate.unset_bits(); + let input = [column]; self.inner - .accumulate_keys(new_places_slice, offset, vec![column].as_slice(), row_size) + .accumulate_keys(new_places_slice, offset, input.as_slice().into(), row_size) } - fn accumulate_row(&self, place: StateAddr, columns: &[Column], row: usize) -> Result<()> { + fn accumulate_row(&self, place: StateAddr, columns: InputColumns, row: usize) -> Result<()> { if self.filter_row(columns, row)? { return self.inner.accumulate_row(place, columns, row); } diff --git a/src/query/functions/src/aggregates/aggregate_combinator_distinct.rs b/src/query/functions/src/aggregates/aggregate_combinator_distinct.rs index edc0c1f6eb0ba..9aac40302a6f7 100644 --- a/src/query/functions/src/aggregates/aggregate_combinator_distinct.rs +++ b/src/query/functions/src/aggregates/aggregate_combinator_distinct.rs @@ -23,8 +23,8 @@ use databend_common_expression::types::number::NumberColumnBuilder; use databend_common_expression::types::DataType; use databend_common_expression::types::NumberDataType; use databend_common_expression::with_number_mapped_type; -use databend_common_expression::Column; use databend_common_expression::ColumnBuilder; +use databend_common_expression::InputColumns; use databend_common_expression::Scalar; use super::aggregate_distinct_state::AggregateDistinctNumberState; @@ -78,17 +78,29 @@ where State: DistinctStateFunc fn accumulate( &self, place: StateAddr, - columns: &[Column], + columns: InputColumns, validity: Option<&Bitmap>, input_rows: usize, ) -> Result<()> { let state = place.get::(); - state.batch_add(columns, validity, input_rows) + match columns { + InputColumns::Slice(s) => state.batch_add(s, validity, input_rows), + _ => { + let columns = columns.iter().cloned().collect::>(); + state.batch_add(columns.as_slice(), validity, input_rows) + } + } } - fn accumulate_row(&self, place: StateAddr, columns: &[Column], row: usize) -> Result<()> { + fn accumulate_row(&self, place: StateAddr, columns: InputColumns, row: usize) -> Result<()> { let state = place.get::(); - state.add(columns, row) + match columns { + InputColumns::Slice(s) => state.add(s, row), + _ => { + let columns = columns.iter().cloned().collect::>(); + state.add(columns.as_slice(), row) + } + } } fn serialize(&self, place: StateAddr, writer: &mut Vec) -> Result<()> { @@ -129,10 +141,9 @@ where State: DistinctStateFunc if state.is_empty() { return self.nested.merge_result(nested_place, builder); } - let columns = state.build_columns(&self.arguments).unwrap(); - + let columns = &state.build_columns(&self.arguments).unwrap(); self.nested - .accumulate(nested_place, &columns, None, state.len())?; + .accumulate(nested_place, columns.into(), None, state.len())?; // merge_result self.nested.merge_result(nested_place, builder) } @@ -153,7 +164,7 @@ where State: DistinctStateFunc } } - fn get_if_condition(&self, columns: &[Column]) -> Option { + fn get_if_condition(&self, columns: InputColumns) -> Option { self.nested.get_if_condition(columns) } } diff --git a/src/query/functions/src/aggregates/aggregate_combinator_if.rs b/src/query/functions/src/aggregates/aggregate_combinator_if.rs index 2b8e438cc4072..9dd1971b889b9 100644 --- a/src/query/functions/src/aggregates/aggregate_combinator_if.rs +++ b/src/query/functions/src/aggregates/aggregate_combinator_if.rs @@ -24,6 +24,7 @@ use databend_common_expression::types::DataType; use databend_common_expression::types::ValueType; use databend_common_expression::Column; use databend_common_expression::ColumnBuilder; +use databend_common_expression::InputColumns; use databend_common_expression::Scalar; use super::StateAddr; @@ -102,7 +103,7 @@ impl AggregateFunction for AggregateIfCombinator { fn accumulate( &self, place: StateAddr, - columns: &[Column], + columns: InputColumns, validity: Option<&Bitmap>, input_rows: usize, ) -> Result<()> { @@ -115,7 +116,7 @@ impl AggregateFunction for AggregateIfCombinator { }; self.nested.accumulate( place, - &columns[0..self.argument_len - 1], + columns.slice(0..self.argument_len - 1), Some(&bitmap), input_rows, ) @@ -125,26 +126,26 @@ impl AggregateFunction for AggregateIfCombinator { &self, places: &[StateAddr], offset: usize, - columns: &[Column], + columns: InputColumns, _input_rows: usize, ) -> Result<()> { let predicate: Bitmap = BooleanType::try_downcast_column(&columns[self.argument_len - 1]).unwrap(); let (columns, row_size) = - self.filter_column(&columns[0..self.argument_len - 1], &predicate); + self.filter_column(columns.slice(0..self.argument_len - 1), &predicate); let new_places = Self::filter_place(places, &predicate); let new_places_slice = new_places.as_slice(); self.nested - .accumulate_keys(new_places_slice, offset, &columns, row_size) + .accumulate_keys(new_places_slice, offset, (&columns).into(), row_size) } - fn accumulate_row(&self, place: StateAddr, columns: &[Column], row: usize) -> Result<()> { + fn accumulate_row(&self, place: StateAddr, columns: InputColumns, row: usize) -> Result<()> { let predicate: Bitmap = BooleanType::try_downcast_column(&columns[self.argument_len - 1]).unwrap(); if predicate.get_bit(row) { self.nested - .accumulate_row(place, &columns[0..self.argument_len - 1], row)?; + .accumulate_row(place, columns.slice(0..self.argument_len - 1), row)?; } Ok(()) } @@ -173,7 +174,7 @@ impl AggregateFunction for AggregateIfCombinator { self.nested.drop_state(place); } - fn get_if_condition(&self, columns: &[Column]) -> Option { + fn get_if_condition(&self, columns: InputColumns) -> Option { let condition_col = &columns[self.argument_len - 1]; let predicate: Bitmap = BooleanType::try_downcast_column(&condition_col.remove_nullable()).unwrap(); @@ -189,7 +190,7 @@ impl fmt::Display for AggregateIfCombinator { impl AggregateIfCombinator { #[inline] - fn filter_column(&self, columns: &[Column], predicate: &Bitmap) -> (Vec, usize) { + fn filter_column(&self, columns: InputColumns, predicate: &Bitmap) -> (Vec, usize) { let columns = columns .iter() .map(|c| c.filter(predicate)) diff --git a/src/query/functions/src/aggregates/aggregate_combinator_state.rs b/src/query/functions/src/aggregates/aggregate_combinator_state.rs index d26190e21228f..413a8b2293ce4 100644 --- a/src/query/functions/src/aggregates/aggregate_combinator_state.rs +++ b/src/query/functions/src/aggregates/aggregate_combinator_state.rs @@ -19,8 +19,8 @@ use std::sync::Arc; use databend_common_arrow::arrow::bitmap::Bitmap; use databend_common_exception::Result; use databend_common_expression::types::DataType; -use databend_common_expression::Column; use databend_common_expression::ColumnBuilder; +use databend_common_expression::InputColumns; use databend_common_expression::Scalar; use super::AggregateFunctionFactory; @@ -85,7 +85,7 @@ impl AggregateFunction for AggregateStateCombinator { fn accumulate( &self, place: StateAddr, - columns: &[Column], + columns: InputColumns, validity: Option<&Bitmap>, input_rows: usize, ) -> Result<()> { @@ -96,14 +96,14 @@ impl AggregateFunction for AggregateStateCombinator { &self, places: &[StateAddr], offset: usize, - columns: &[Column], + columns: InputColumns, input_rows: usize, ) -> Result<()> { self.nested .accumulate_keys(places, offset, columns, input_rows) } - fn accumulate_row(&self, place: StateAddr, columns: &[Column], row: usize) -> Result<()> { + fn accumulate_row(&self, place: StateAddr, columns: InputColumns, row: usize) -> Result<()> { self.nested.accumulate_row(place, columns, row) } diff --git a/src/query/functions/src/aggregates/aggregate_count.rs b/src/query/functions/src/aggregates/aggregate_count.rs index c3b9de9ed2445..c3eec671e6ac0 100644 --- a/src/query/functions/src/aggregates/aggregate_count.rs +++ b/src/query/functions/src/aggregates/aggregate_count.rs @@ -24,6 +24,7 @@ use databend_common_expression::types::NumberDataType; use databend_common_expression::utils::column_merge_validity; use databend_common_expression::Column; use databend_common_expression::ColumnBuilder; +use databend_common_expression::InputColumns; use databend_common_expression::Scalar; use super::aggregate_function::AggregateFunction; @@ -86,7 +87,7 @@ impl AggregateFunction for AggregateCountFunction { fn accumulate( &self, place: StateAddr, - columns: &[Column], + columns: InputColumns, validity: Option<&Bitmap>, input_rows: usize, ) -> Result<()> { @@ -110,7 +111,7 @@ impl AggregateFunction for AggregateCountFunction { &self, places: &[StateAddr], offset: usize, - columns: &[Column], + columns: InputColumns, _input_rows: usize, ) -> Result<()> { let validity = columns @@ -142,7 +143,7 @@ impl AggregateFunction for AggregateCountFunction { Ok(()) } - fn accumulate_row(&self, place: StateAddr, _columns: &[Column], _row: usize) -> Result<()> { + fn accumulate_row(&self, place: StateAddr, _columns: InputColumns, _row: usize) -> Result<()> { let state = place.get::(); state.count += 1; Ok(()) diff --git a/src/query/functions/src/aggregates/aggregate_covariance.rs b/src/query/functions/src/aggregates/aggregate_covariance.rs index aaeb0f243506c..dd4c99e9429bc 100644 --- a/src/query/functions/src/aggregates/aggregate_covariance.rs +++ b/src/query/functions/src/aggregates/aggregate_covariance.rs @@ -29,8 +29,8 @@ use databend_common_expression::types::NumberDataType; use databend_common_expression::types::NumberType; use databend_common_expression::types::ValueType; use databend_common_expression::with_number_mapped_type; -use databend_common_expression::Column; use databend_common_expression::ColumnBuilder; +use databend_common_expression::InputColumns; use databend_common_expression::Scalar; use num_traits::AsPrimitive; @@ -165,7 +165,7 @@ where fn accumulate( &self, place: StateAddr, - columns: &[Column], + columns: InputColumns, validity: Option<&Bitmap>, _input_rows: usize, ) -> Result<()> { @@ -198,7 +198,7 @@ where &self, places: &[StateAddr], offset: usize, - columns: &[Column], + columns: InputColumns, _input_rows: usize, ) -> Result<()> { let left = NumberType::::try_downcast_column(&columns[0]).unwrap(); @@ -214,7 +214,7 @@ where Ok(()) } - fn accumulate_row(&self, place: StateAddr, columns: &[Column], row: usize) -> Result<()> { + fn accumulate_row(&self, place: StateAddr, columns: InputColumns, row: usize) -> Result<()> { let left = NumberType::::try_downcast_column(&columns[0]).unwrap(); let right = NumberType::::try_downcast_column(&columns[1]).unwrap(); diff --git a/src/query/functions/src/aggregates/aggregate_null_result.rs b/src/query/functions/src/aggregates/aggregate_null_result.rs index 4cdb2dd3a09da..b3f67f7794136 100644 --- a/src/query/functions/src/aggregates/aggregate_null_result.rs +++ b/src/query/functions/src/aggregates/aggregate_null_result.rs @@ -21,8 +21,8 @@ use databend_common_exception::Result; use databend_common_expression::types::AnyType; use databend_common_expression::types::DataType; use databend_common_expression::types::ValueType; -use databend_common_expression::Column; use databend_common_expression::ColumnBuilder; +use databend_common_expression::InputColumns; use super::aggregate_function::AggregateFunction; use super::StateAddr; @@ -56,7 +56,7 @@ impl AggregateFunction for AggregateNullResultFunction { fn accumulate( &self, __place: StateAddr, - _columns: &[Column], + _columns: InputColumns, _validity: Option<&Bitmap>, _input_rows: usize, ) -> Result<()> { @@ -67,13 +67,13 @@ impl AggregateFunction for AggregateNullResultFunction { &self, _places: &[StateAddr], _offset: usize, - _columns: &[Column], + _columns: InputColumns, _input_rows: usize, ) -> Result<()> { Ok(()) } - fn accumulate_row(&self, _place: StateAddr, _columns: &[Column], _row: usize) -> Result<()> { + fn accumulate_row(&self, _place: StateAddr, _columns: InputColumns, _row: usize) -> Result<()> { Ok(()) } diff --git a/src/query/functions/src/aggregates/aggregate_quantile_tdigest.rs b/src/query/functions/src/aggregates/aggregate_quantile_tdigest.rs index e4110eee9aa54..fa03827aa9e6a 100644 --- a/src/query/functions/src/aggregates/aggregate_quantile_tdigest.rs +++ b/src/query/functions/src/aggregates/aggregate_quantile_tdigest.rs @@ -28,10 +28,10 @@ use databend_common_expression::type_check::check_number; use databend_common_expression::types::number::*; use databend_common_expression::types::*; use databend_common_expression::with_number_mapped_type; -use databend_common_expression::Column; use databend_common_expression::ColumnBuilder; use databend_common_expression::Expr; use databend_common_expression::FunctionContext; +use databend_common_expression::InputColumns; use databend_common_expression::Scalar; use databend_common_expression::ScalarRef; use itertools::Itertools; @@ -311,7 +311,7 @@ where T: Number + AsPrimitive fn accumulate( &self, place: StateAddr, - columns: &[Column], + columns: InputColumns, validity: Option<&Bitmap>, _input_rows: usize, ) -> Result<()> { @@ -334,7 +334,7 @@ where T: Number + AsPrimitive Ok(()) } - fn accumulate_row(&self, place: StateAddr, columns: &[Column], row: usize) -> Result<()> { + fn accumulate_row(&self, place: StateAddr, columns: InputColumns, row: usize) -> Result<()> { let column = NumberType::::try_downcast_column(&columns[0]).unwrap(); let v = NumberType::::index_column(&column, row); if let Some(v) = v { @@ -347,7 +347,7 @@ where T: Number + AsPrimitive &self, places: &[StateAddr], offset: usize, - columns: &[Column], + columns: InputColumns, _input_rows: usize, ) -> Result<()> { let column = NumberType::::try_downcast_column(&columns[0]).unwrap(); diff --git a/src/query/functions/src/aggregates/aggregate_quantile_tdigest_weighted.rs b/src/query/functions/src/aggregates/aggregate_quantile_tdigest_weighted.rs index 5f68596770ac5..c6f3dbaf6b637 100644 --- a/src/query/functions/src/aggregates/aggregate_quantile_tdigest_weighted.rs +++ b/src/query/functions/src/aggregates/aggregate_quantile_tdigest_weighted.rs @@ -26,10 +26,10 @@ use databend_common_expression::types::number::*; use databend_common_expression::types::*; use databend_common_expression::with_number_mapped_type; use databend_common_expression::with_unsigned_integer_mapped_type; -use databend_common_expression::Column; use databend_common_expression::ColumnBuilder; use databend_common_expression::Expr; use databend_common_expression::FunctionContext; +use databend_common_expression::InputColumns; use databend_common_expression::Scalar; use num_traits::AsPrimitive; @@ -86,7 +86,7 @@ where fn accumulate( &self, place: StateAddr, - columns: &[Column], + columns: InputColumns, validity: Option<&Bitmap>, _input_rows: usize, ) -> Result<()> { @@ -112,7 +112,7 @@ where Ok(()) } - fn accumulate_row(&self, place: StateAddr, columns: &[Column], row: usize) -> Result<()> { + fn accumulate_row(&self, place: StateAddr, columns: InputColumns, row: usize) -> Result<()> { let column = NumberType::::try_downcast_column(&columns[0]).unwrap(); let weighted = NumberType::::try_downcast_column(&columns[1]).unwrap(); let value = unsafe { column.get_unchecked(row) }; @@ -126,7 +126,7 @@ where &self, places: &[StateAddr], offset: usize, - columns: &[Column], + columns: InputColumns, _input_rows: usize, ) -> Result<()> { let column = NumberType::::try_downcast_column(&columns[0]).unwrap(); diff --git a/src/query/functions/src/aggregates/aggregate_retention.rs b/src/query/functions/src/aggregates/aggregate_retention.rs index 37e6e3d2088c2..98380e69857a3 100644 --- a/src/query/functions/src/aggregates/aggregate_retention.rs +++ b/src/query/functions/src/aggregates/aggregate_retention.rs @@ -25,8 +25,8 @@ use databend_common_expression::types::BooleanType; use databend_common_expression::types::DataType; use databend_common_expression::types::NumberDataType; use databend_common_expression::types::ValueType; -use databend_common_expression::Column; use databend_common_expression::ColumnBuilder; +use databend_common_expression::InputColumns; use databend_common_expression::Scalar; use super::aggregate_function::AggregateFunction; @@ -81,7 +81,7 @@ impl AggregateFunction for AggregateRetentionFunction { fn accumulate( &self, place: StateAddr, - columns: &[Column], + columns: InputColumns, _validity: Option<&Bitmap>, input_rows: usize, ) -> Result<()> { @@ -104,7 +104,7 @@ impl AggregateFunction for AggregateRetentionFunction { &self, places: &[StateAddr], offset: usize, - columns: &[Column], + columns: InputColumns, _input_rows: usize, ) -> Result<()> { let new_columns = columns @@ -123,7 +123,7 @@ impl AggregateFunction for AggregateRetentionFunction { Ok(()) } - fn accumulate_row(&self, place: StateAddr, columns: &[Column], row: usize) -> Result<()> { + fn accumulate_row(&self, place: StateAddr, columns: InputColumns, row: usize) -> Result<()> { let state = place.get::(); let new_columns = columns .iter() diff --git a/src/query/functions/src/aggregates/aggregate_string_agg.rs b/src/query/functions/src/aggregates/aggregate_string_agg.rs index 354e1660b4d9d..60ff6fd4cf3ff 100644 --- a/src/query/functions/src/aggregates/aggregate_string_agg.rs +++ b/src/query/functions/src/aggregates/aggregate_string_agg.rs @@ -24,8 +24,8 @@ use databend_common_exception::Result; use databend_common_expression::types::DataType; use databend_common_expression::types::StringType; use databend_common_expression::types::ValueType; -use databend_common_expression::Column; use databend_common_expression::ColumnBuilder; +use databend_common_expression::InputColumns; use databend_common_expression::Scalar; use super::aggregate_function_factory::AggregateFunctionDescription; @@ -68,7 +68,7 @@ impl AggregateFunction for AggregateStringAggFunction { fn accumulate( &self, place: StateAddr, - columns: &[Column], + columns: InputColumns, validity: Option<&Bitmap>, _input_rows: usize, ) -> Result<()> { @@ -97,7 +97,7 @@ impl AggregateFunction for AggregateStringAggFunction { &self, places: &[StateAddr], offset: usize, - columns: &[Column], + columns: InputColumns, _input_rows: usize, ) -> Result<()> { let column = StringType::try_downcast_column(&columns[0]).unwrap(); @@ -111,7 +111,7 @@ impl AggregateFunction for AggregateStringAggFunction { Ok(()) } - fn accumulate_row(&self, place: StateAddr, columns: &[Column], row: usize) -> Result<()> { + fn accumulate_row(&self, place: StateAddr, columns: InputColumns, row: usize) -> Result<()> { let column = StringType::try_downcast_column(&columns[0]).unwrap(); let v = StringType::index_column(&column, row); if let Some(v) = v { diff --git a/src/query/functions/src/aggregates/aggregate_unary.rs b/src/query/functions/src/aggregates/aggregate_unary.rs index 8a817edf13d13..32dfa7315d236 100644 --- a/src/query/functions/src/aggregates/aggregate_unary.rs +++ b/src/query/functions/src/aggregates/aggregate_unary.rs @@ -27,8 +27,8 @@ use databend_common_expression::types::DecimalSize; use databend_common_expression::types::ValueType; use databend_common_expression::AggregateFunction; use databend_common_expression::AggregateFunctionRef; -use databend_common_expression::Column; use databend_common_expression::ColumnBuilder; +use databend_common_expression::InputColumns; use databend_common_expression::Scalar; use databend_common_expression::StateAddr; @@ -189,7 +189,7 @@ where fn accumulate( &self, place: StateAddr, - columns: &[Column], + columns: InputColumns, validity: Option<&Bitmap>, _input_rows: usize, ) -> Result<()> { @@ -214,7 +214,7 @@ where Ok(()) } - fn accumulate_row(&self, place: StateAddr, columns: &[Column], row: usize) -> Result<()> { + fn accumulate_row(&self, place: StateAddr, columns: InputColumns, row: usize) -> Result<()> { let column = T::try_downcast_column(&columns[0]).unwrap(); let value = T::index_column(&column, row); @@ -227,7 +227,7 @@ where &self, places: &[StateAddr], offset: usize, - columns: &[Column], + columns: InputColumns, _input_rows: usize, ) -> Result<()> { let column = T::try_downcast_column(&columns[0]).unwrap(); diff --git a/src/query/functions/src/aggregates/aggregate_window_funnel.rs b/src/query/functions/src/aggregates/aggregate_window_funnel.rs index 9493a8dc5bd4a..4fc76b93872df 100644 --- a/src/query/functions/src/aggregates/aggregate_window_funnel.rs +++ b/src/query/functions/src/aggregates/aggregate_window_funnel.rs @@ -36,10 +36,10 @@ use databend_common_expression::types::NumberType; use databend_common_expression::types::TimestampType; use databend_common_expression::types::ValueType; use databend_common_expression::with_integer_mapped_type; -use databend_common_expression::Column; use databend_common_expression::ColumnBuilder; use databend_common_expression::Expr; use databend_common_expression::FunctionContext; +use databend_common_expression::InputColumns; use databend_common_expression::Scalar; use num_traits::AsPrimitive; @@ -188,7 +188,7 @@ where fn accumulate( &self, place: StateAddr, - columns: &[Column], + columns: InputColumns, validity: Option<&Bitmap>, _input_rows: usize, ) -> Result<()> { @@ -236,7 +236,7 @@ where &self, places: &[StateAddr], offset: usize, - columns: &[Column], + columns: InputColumns, _input_rows: usize, ) -> Result<()> { let mut dcolumns = Vec::with_capacity(self.event_size); @@ -259,7 +259,7 @@ where Ok(()) } - fn accumulate_row(&self, place: StateAddr, columns: &[Column], row: usize) -> Result<()> { + fn accumulate_row(&self, place: StateAddr, columns: InputColumns, row: usize) -> Result<()> { let tcolumn = T::try_downcast_column(&columns[0]).unwrap(); let timestamp = unsafe { T::index_column_unchecked(&tcolumn, row) }; let timestamp = T::to_owned_scalar(timestamp); diff --git a/src/query/functions/src/aggregates/aggregator_common.rs b/src/query/functions/src/aggregates/aggregator_common.rs index d68e198125a44..775a057c7112b 100644 --- a/src/query/functions/src/aggregates/aggregator_common.rs +++ b/src/query/functions/src/aggregates/aggregator_common.rs @@ -143,14 +143,13 @@ pub fn eval_aggr( rows: usize, ) -> Result<(Column, DataType)> { let factory = AggregateFunctionFactory::instance(); - let cols: Vec = columns.to_owned(); let arguments = columns.iter().map(|x| x.data_type()).collect(); let func = factory.get(name, params, arguments)?; let data_type = func.return_type()?; let eval = EvalAggr::new(func.clone()); - func.accumulate(eval.addr, &cols, None, rows)?; + func.accumulate(eval.addr, columns.into(), None, rows)?; let mut builder = ColumnBuilder::with_capacity(&data_type, 1024); func.merge_result(eval.addr, &mut builder)?; Ok((builder.build(), data_type)) diff --git a/src/query/functions/tests/it/aggregates/mod.rs b/src/query/functions/tests/it/aggregates/mod.rs index 26974e70bc953..813c59de6871a 100644 --- a/src/query/functions/tests/it/aggregates/mod.rs +++ b/src/query/functions/tests/it/aggregates/mod.rs @@ -187,7 +187,6 @@ pub fn simulate_two_groups_group_by( ) -> databend_common_exception::Result<(Column, DataType)> { let factory = AggregateFunctionFactory::instance(); let arguments: Vec = columns.iter().map(|c| c.data_type()).collect(); - let cols: Vec = columns.to_owned(); let func = factory.get(name, params, arguments)?; let data_type = func.return_type()?; @@ -210,7 +209,7 @@ pub fn simulate_two_groups_group_by( }) .collect::>(); - func.accumulate_keys(&places, 0, &cols, rows)?; + func.accumulate_keys(&places, 0, columns.into(), rows)?; let mut builder = ColumnBuilder::with_capacity(&data_type, 1024); func.merge_result(addr1.into(), &mut builder)?; diff --git a/src/query/service/src/pipelines/processors/transforms/aggregator/transform_aggregate_partial.rs b/src/query/service/src/pipelines/processors/transforms/aggregator/transform_aggregate_partial.rs index 1e580c63ce71d..d6be772031d2a 100644 --- a/src/query/service/src/pipelines/processors/transforms/aggregator/transform_aggregate_partial.rs +++ b/src/query/service/src/pipelines/processors/transforms/aggregator/transform_aggregate_partial.rs @@ -216,7 +216,7 @@ impl TransformPartialAggregate { let function = &aggregate_functions[index]; let state_offset = offsets_aggregate_states[index]; let function_arguments = &aggr_arg_columns_slice[index]; - function.accumulate_keys(places, state_offset, function_arguments, rows)?; + function.accumulate_keys(places, state_offset, function_arguments.into(), rows)?; } Ok(()) diff --git a/src/query/service/src/pipelines/processors/transforms/aggregator/transform_single_key.rs b/src/query/service/src/pipelines/processors/transforms/aggregator/transform_single_key.rs index b440d2453001b..de05ea7d2e316 100644 --- a/src/query/service/src/pipelines/processors/transforms/aggregator/transform_single_key.rs +++ b/src/query/service/src/pipelines/processors/transforms/aggregator/transform_single_key.rs @@ -27,6 +27,7 @@ use databend_common_expression::BlockMetaInfoDowncast; use databend_common_expression::Column; use databend_common_expression::ColumnBuilder; use databend_common_expression::DataBlock; +use databend_common_expression::InputColumns; use databend_common_expression::Scalar; use databend_common_expression::Value; use databend_common_functions::aggregates::AggregateFunctionRef; @@ -92,17 +93,6 @@ impl AccumulatingTransform for PartialSingleStateAggregator { let block = block.convert_to_full(); for (idx, func) in self.funcs.iter().enumerate() { - let mut arg_columns = vec![]; - for index in self.arg_indices[idx].iter() { - arg_columns.push( - block - .get_by_offset(*index) - .value - .as_column() - .unwrap() - .clone(), - ); - } let place = self.places[idx]; if is_agg_index_block { // Aggregation states are in the back of the block. @@ -111,7 +101,9 @@ impl AccumulatingTransform for PartialSingleStateAggregator { func.batch_merge_single(place, agg_state)?; } else { - func.accumulate(place, &arg_columns, None, block.num_rows())?; + let columns = + InputColumns::new_block_proxy(self.arg_indices[idx].as_slice(), &block); + func.accumulate(place, columns, None, block.num_rows())?; } } diff --git a/src/query/service/src/pipelines/processors/transforms/window/transform_window.rs b/src/query/service/src/pipelines/processors/transforms/window/transform_window.rs index 302a72596f7fa..7cb2e4378d229 100644 --- a/src/query/service/src/pipelines/processors/transforms/window/transform_window.rs +++ b/src/query/service/src/pipelines/processors/transforms/window/transform_window.rs @@ -500,7 +500,7 @@ impl TransformWindow { }; let cols = agg.arg_columns(data); for row in start_row..end_row { - agg.accumulate_row(&cols, row)?; + agg.accumulate_row(cols, row)?; } } diff --git a/src/query/service/src/pipelines/processors/transforms/window/window_function.rs b/src/query/service/src/pipelines/processors/transforms/window/window_function.rs index 99aac5ea754a7..dde5a2521b091 100644 --- a/src/query/service/src/pipelines/processors/transforms/window/window_function.rs +++ b/src/query/service/src/pipelines/processors/transforms/window/window_function.rs @@ -18,10 +18,10 @@ use databend_common_base::runtime::drop_guard; use databend_common_exception::Result; use databend_common_expression::types::DataType; use databend_common_expression::types::NumberDataType; -use databend_common_expression::Column; use databend_common_expression::ColumnBuilder; use databend_common_expression::DataBlock; use databend_common_expression::DataSchema; +use databend_common_expression::InputColumns; use databend_common_functions::aggregates::get_layout_offsets; use databend_common_functions::aggregates::AggregateFunction; use databend_common_functions::aggregates::AggregateFunctionFactory; @@ -60,21 +60,12 @@ impl WindowFuncAggImpl { } #[inline] - pub fn arg_columns(&self, data: &DataBlock) -> Vec { - self.args - .iter() - .map(|index| { - data.get_by_offset(*index) - .value - .as_column() - .cloned() - .unwrap() - }) - .collect() + pub fn arg_columns<'a>(&'a self, data: &'a DataBlock) -> InputColumns { + InputColumns::new_block_proxy(&self.args, data) } #[inline] - pub fn accumulate_row(&self, args: &[Column], row: usize) -> Result<()> { + pub fn accumulate_row(&self, args: InputColumns, row: usize) -> Result<()> { self.agg.accumulate_row(self.place, args, row) } From ad2ccae161e468071e58c9acbfd74128924695d0 Mon Sep 17 00:00:00 2001 From: Liuqing Yue Date: Mon, 8 Jul 2024 15:19:41 +0800 Subject: [PATCH 02/21] feat: support real-time retrieval of profiles from admin API (part 2) (#15975) * fix(executor): fix empty query profiles for admin api * feat: support real-time retrieval of profiles from admin API (part 2) * add test --------- Co-authored-by: zhang2014 --- src/common/exception/src/exception_code.rs | 1 + src/query/config/src/config.rs | 5 + src/query/config/src/inner.rs | 2 + src/query/service/src/global_services.rs | 3 + .../service/src/interpreters/interpreter.rs | 10 +- .../src/servers/admin/v1/query_profiling.rs | 118 +++++++++++++----- .../servers/flight/v1/actions/get_profile.rs | 23 ++-- src/query/service/src/sessions/query_ctx.rs | 2 +- src/query/service/src/sessions/session_mgr.rs | 40 +++++- .../storages/testdata/configs_table_basic.txt | 1 + src/query/storages/system/src/log_queue.rs | 6 +- .../02_query/02_0007_get_profile.result | 2 + .../02_query/02_0007_get_profile.sh | 23 ++++ 13 files changed, 193 insertions(+), 43 deletions(-) create mode 100644 tests/suites/1_stateful/02_query/02_0007_get_profile.result create mode 100755 tests/suites/1_stateful/02_query/02_0007_get_profile.sh diff --git a/src/common/exception/src/exception_code.rs b/src/common/exception/src/exception_code.rs index 88d5668bbc909..62e81f631b9c0 100644 --- a/src/common/exception/src/exception_code.rs +++ b/src/common/exception/src/exception_code.rs @@ -157,6 +157,7 @@ build_exceptions! { OutofSequenceRange(1124), WrongSequenceCount(1125), UnknownSequence(1126), + UnknownQuery(1127), // Data Related Errors diff --git a/src/query/config/src/config.rs b/src/query/config/src/config.rs index 3ba0ba678e664..276d8bfacb430 100644 --- a/src/query/config/src/config.rs +++ b/src/query/config/src/config.rs @@ -1669,6 +1669,9 @@ pub struct QueryConfig { #[clap(long, value_name = "VALUE", default_value = "0")] pub cloud_control_grpc_timeout: u64, + #[clap(long, value_name = "VALUE", default_value = "50")] + pub max_cached_queries_profiles: usize, + #[clap(skip)] pub settings: HashMap, } @@ -1754,6 +1757,7 @@ impl TryInto for QueryConfig { udf_server_allow_list: self.udf_server_allow_list, cloud_control_grpc_server_address: self.cloud_control_grpc_server_address, cloud_control_grpc_timeout: self.cloud_control_grpc_timeout, + max_cached_queries_profiles: self.max_cached_queries_profiles, settings: self .settings .into_iter() @@ -1852,6 +1856,7 @@ impl From for QueryConfig { udf_server_allow_list: inner.udf_server_allow_list, cloud_control_grpc_server_address: inner.cloud_control_grpc_server_address, cloud_control_grpc_timeout: inner.cloud_control_grpc_timeout, + max_cached_queries_profiles: inner.max_cached_queries_profiles, settings: HashMap::new(), } } diff --git a/src/query/config/src/inner.rs b/src/query/config/src/inner.rs index 317b236319404..499539b5e5636 100644 --- a/src/query/config/src/inner.rs +++ b/src/query/config/src/inner.rs @@ -235,6 +235,7 @@ pub struct QueryConfig { pub cloud_control_grpc_server_address: Option, pub cloud_control_grpc_timeout: u64, + pub max_cached_queries_profiles: usize, pub settings: HashMap, } @@ -307,6 +308,7 @@ impl Default for QueryConfig { cloud_control_grpc_server_address: None, cloud_control_grpc_timeout: 0, data_retention_time_in_days_max: 90, + max_cached_queries_profiles: 50, settings: HashMap::new(), } } diff --git a/src/query/service/src/global_services.rs b/src/query/service/src/global_services.rs index 7604a4b51b24d..12faef0d9cf2e 100644 --- a/src/query/service/src/global_services.rs +++ b/src/query/service/src/global_services.rs @@ -44,6 +44,7 @@ use crate::clusters::ClusterDiscovery; use crate::locks::LockManager; #[cfg(feature = "enable_queries_executor")] use crate::pipelines::executor::GlobalQueriesExecutor; +use crate::servers::admin::v1::query_profiling::ProfilesCacheQueue; use crate::servers::flight::v1::exchange::DataExchangeManager; use crate::servers::http::v1::HttpQueryManager; use crate::sessions::QueriesQueueManager; @@ -150,6 +151,8 @@ impl GlobalServices { CloudControlApiProvider::init(addr, config.query.cloud_control_grpc_timeout).await?; } + ProfilesCacheQueue::init(config.query.max_cached_queries_profiles); + #[cfg(feature = "enable_queries_executor")] { GlobalQueriesExecutor::init()?; diff --git a/src/query/service/src/interpreters/interpreter.rs b/src/query/service/src/interpreters/interpreter.rs index ad98b95f0b12e..2615b9b192bc3 100644 --- a/src/query/service/src/interpreters/interpreter.rs +++ b/src/query/service/src/interpreters/interpreter.rs @@ -49,6 +49,8 @@ use crate::pipelines::executor::ExecutorSettings; use crate::pipelines::executor::PipelineCompleteExecutor; use crate::pipelines::executor::PipelinePullingExecutor; use crate::pipelines::PipelineBuildResult; +use crate::servers::admin::v1::query_profiling::ProfilesCacheElement; +use crate::servers::admin::v1::query_profiling::ProfilesCacheQueue; use crate::sessions::QueryContext; use crate::sessions::SessionManager; use crate::stream::DataBlockStream; @@ -132,10 +134,16 @@ pub trait Interpreter: Sync + Send { "{}", serde_json::to_string(&QueryProfiles { query_id: query_ctx.get_id(), - profiles: query_profiles, + profiles: query_profiles.clone(), statistics_desc: get_statistics_desc(), })? ); + let profiles_queue = ProfilesCacheQueue::instance()?; + + profiles_queue.append_data(ProfilesCacheElement { + query_id: query_ctx.get_id(), + profiles: query_profiles, + })?; } hook_vacuum_temp_files(&query_ctx)?; diff --git a/src/query/service/src/servers/admin/v1/query_profiling.rs b/src/query/service/src/servers/admin/v1/query_profiling.rs index 78d69a7623c68..24585c4daf88a 100644 --- a/src/query/service/src/servers/admin/v1/query_profiling.rs +++ b/src/query/service/src/servers/admin/v1/query_profiling.rs @@ -19,47 +19,73 @@ use std::sync::Arc; use databend_common_base::runtime::profile::get_statistics_desc; use databend_common_base::runtime::profile::ProfileDesc; use databend_common_base::runtime::profile::ProfileStatisticsName; -use databend_common_catalog::table_context::TableContext; +use databend_common_config::GlobalConfig; use databend_common_exception::ErrorCode; +use databend_common_expression::ColumnBuilder; +use databend_common_expression::TableSchemaRef; use databend_common_pipeline_core::PlanProfile; +use databend_common_storages_system::SystemLogElement; +use databend_common_storages_system::SystemLogQueue; use http::StatusCode; use poem::web::Json; use poem::web::Path; use poem::IntoResponse; +use crate::clusters::ClusterDiscovery; use crate::clusters::ClusterHelper; use crate::servers::flight::v1::actions::GET_PROFILE; use crate::sessions::SessionManager; -use crate::sessions::SessionType; #[poem::handler] #[async_backtrace::framed] pub async fn query_profiling_handler( Path(query_id): Path, ) -> poem::Result { - let session_manager = SessionManager::instance(); #[derive(serde::Serialize)] struct QueryProfiles { query_id: String, profiles: Vec, statistics_desc: Arc>, } - let res = match session_manager.get_session_by_id(&query_id) { - Some(session) => { - // can get profile from current node - session.get_query_profiles() + match get_profile_from_cache(&query_id) { + Ok(profiles) => { + return Ok(Json(QueryProfiles { + query_id: query_id.clone(), + profiles, + statistics_desc: get_statistics_desc(), + })); } - None => { - // need get profile from clusters - get_cluster_profile(&session_manager, &query_id) - .await - .map_err(|cause| { - poem::Error::from_string( - format!("Failed to fetch cluster node profile. cause: {cause}"), - StatusCode::INTERNAL_SERVER_ERROR, - ) - })? + Err(cause) => { + if cause.code() != ErrorCode::UNKNOWN_QUERY { + return Err(poem::Error::from_string( + format!("Failed to fetch profile from cache queue. cause: {cause}"), + StatusCode::INTERNAL_SERVER_ERROR, + )); + } } + } + let res = match SessionManager::instance().get_query_profiles(&query_id) { + Ok(profiles) => profiles, + Err(cause) => match cause.code() == ErrorCode::UNKNOWN_QUERY { + true => match get_cluster_profile(&query_id).await { + Ok(profiles) => profiles, + Err(cause) => { + return Err(match cause.code() == ErrorCode::UNKNOWN_QUERY { + true => poem::Error::from_string(cause.message(), StatusCode::NOT_FOUND), + false => poem::Error::from_string( + format!("Failed to fetch cluster node profile. cause: {cause}"), + StatusCode::INTERNAL_SERVER_ERROR, + ), + }); + } + }, + false => { + return Err(poem::Error::from_string( + format!("Failed to fetch cluster node profile. cause: {cause}"), + StatusCode::INTERNAL_SERVER_ERROR, + )); + } + }, }; Ok(Json(QueryProfiles { @@ -69,16 +95,10 @@ pub async fn query_profiling_handler( })) } -async fn get_cluster_profile( - session_manager: &SessionManager, - query_id: &str, -) -> Result, ErrorCode> { - let session = session_manager - .create_session(SessionType::HTTPAPI("QueryProfiling".to_string())) - .await?; +async fn get_cluster_profile(query_id: &str) -> Result, ErrorCode> { + let config = GlobalConfig::instance(); + let cluster = ClusterDiscovery::instance().discover(&config).await?; - let ctx = Arc::new(session).create_query_context().await?; - let cluster = ctx.get_cluster(); let mut message = HashMap::with_capacity(cluster.nodes.len()); for node_info in &cluster.nodes { @@ -87,10 +107,48 @@ async fn get_cluster_profile( } } - let settings = ctx.get_settings(); - let timeout = settings.get_flight_client_timeout()?; let res = cluster - .do_action::>(GET_PROFILE, message, timeout) + .do_action::<_, Option>>(GET_PROFILE, message, 60) .await?; - Ok(res.into_iter().flat_map(|(_key, value)| value).collect()) + + match res.into_values().find(Option::is_some) { + None => Err(ErrorCode::UnknownQuery(format!( + "Not found query {}", + query_id + ))), + Some(profiles) => Ok(profiles.unwrap()), + } +} + +pub fn get_profile_from_cache(target: &str) -> Result, ErrorCode> { + let profiles_queue = ProfilesCacheQueue::instance()?; + for element in profiles_queue.data.read().event_queue.iter().flatten() { + if element.query_id == target { + return Ok(element.profiles.clone()); + } + } + Err(ErrorCode::UnknownQuery(format!( + "Not found query {}", + target + ))) +} +#[derive(Clone)] +pub struct ProfilesCacheElement { + pub query_id: String, + pub profiles: Vec, } + +impl SystemLogElement for ProfilesCacheElement { + const TABLE_NAME: &'static str = "profiles_cache_not_table"; + fn schema() -> TableSchemaRef { + unreachable!() + } + fn fill_to_data_block( + &self, + _: &mut Vec, + ) -> databend_common_exception::Result<()> { + unreachable!() + } +} + +pub type ProfilesCacheQueue = SystemLogQueue; diff --git a/src/query/service/src/servers/flight/v1/actions/get_profile.rs b/src/query/service/src/servers/flight/v1/actions/get_profile.rs index 0ba171744964f..f24325ea22909 100644 --- a/src/query/service/src/servers/flight/v1/actions/get_profile.rs +++ b/src/query/service/src/servers/flight/v1/actions/get_profile.rs @@ -12,18 +12,27 @@ // See the License for the specific language governing permissions and // limitations under the License. +use databend_common_exception::ErrorCode; use databend_common_exception::Result; use databend_common_pipeline_core::PlanProfile; -use crate::servers::flight::v1::actions::create_session; +use crate::servers::admin::v1::query_profiling::get_profile_from_cache; +use crate::sessions::SessionManager; pub static GET_PROFILE: &str = "/actions/get_profile"; -pub async fn get_profile(query_id: String) -> Result> { - let session = create_session()?; - let query_context = session.create_query_context().await?; - match query_context.get_session_by_id(&query_id) { - Some(session) => Ok(session.get_query_profiles()), - None => Ok(vec![]), +pub async fn get_profile(query_id: String) -> Result>> { + match SessionManager::instance().get_query_profiles(&query_id) { + Ok(profiles) => Ok(Some(profiles)), + Err(cause) => match cause.code() == ErrorCode::UNKNOWN_QUERY { + true => match get_profile_from_cache(&query_id) { + Ok(profiles) => Ok(Some(profiles)), + Err(cause) => match cause.code() == ErrorCode::UNKNOWN_QUERY { + true => Ok(None), + false => Err(cause), + }, + }, + false => Err(cause), + }, } } diff --git a/src/query/service/src/sessions/query_ctx.rs b/src/query/service/src/sessions/query_ctx.rs index 0422aa17bbf5f..9867928c3650c 100644 --- a/src/query/service/src/sessions/query_ctx.rs +++ b/src/query/service/src/sessions/query_ctx.rs @@ -1022,7 +1022,7 @@ impl TableContext for QueryContext { } fn get_queries_profile(&self) -> HashMap> { - SessionManager::instance().get_queries_profile() + SessionManager::instance().get_queries_profiles() } fn set_merge_into_join(&self, join: MergeIntoJoin) { diff --git a/src/query/service/src/sessions/session_mgr.rs b/src/query/service/src/sessions/session_mgr.rs index 834db92f258c1..00323ba15f34e 100644 --- a/src/query/service/src/sessions/session_mgr.rs +++ b/src/query/service/src/sessions/session_mgr.rs @@ -357,7 +357,7 @@ impl SessionManager { status_t } - pub fn get_queries_profile(&self) -> HashMap> { + pub fn get_queries_profiles(&self) -> HashMap> { let active_sessions = { // Here the situation is the same of method `graceful_shutdown`: // @@ -393,4 +393,42 @@ impl SessionManager { queries_profiles } + + pub fn get_query_profiles(&self, query_id: &str) -> Result> { + let active_sessions = { + // Here the situation is the same of method `graceful_shutdown`: + // + // We should drop the read lock before + // - acquiring upgraded session reference: the Arc, + // - extracting the ProcessInfo from it + // - and then drop the Arc + // Since there are chances that we are the last one that holding the reference, and the + // destruction of session need to acquire the write lock of `active_sessions`, which leads + // to dead lock. + // + // Although online expression can also do this, to make this clearer, we wrap it in a block + + let active_sessions_guard = self.active_sessions.read(); + active_sessions_guard.values().cloned().collect::>() + }; + + for weak_ptr in active_sessions { + let Some(arc_session) = weak_ptr.upgrade() else { + continue; + }; + + let session_ctx = arc_session.session_ctx.as_ref(); + + if let Some(context_shared) = session_ctx.get_query_context_shared() { + if query_id == *context_shared.init_query_id.as_ref().read() { + return Ok(context_shared.get_query_profiles()); + } + } + } + + Err(ErrorCode::UnknownQuery(format!( + "Unknown query {}", + query_id + ))) + } } diff --git a/src/query/service/tests/it/storages/testdata/configs_table_basic.txt b/src/query/service/tests/it/storages/testdata/configs_table_basic.txt index a5123e1750257..5990874119c10 100644 --- a/src/query/service/tests/it/storages/testdata/configs_table_basic.txt +++ b/src/query/service/tests/it/storages/testdata/configs_table_basic.txt @@ -101,6 +101,7 @@ DB.Table: 'system'.'configs', Table: configs-table_id:1, ver:0, Engine: SystemCo | 'query' | 'jwt_key_files' | '' | '' | | 'query' | 'management_mode' | 'false' | '' | | 'query' | 'max_active_sessions' | '256' | '' | +| 'query' | 'max_cached_queries_profiles' | '50' | '' | | 'query' | 'max_memory_limit_enabled' | 'false' | '' | | 'query' | 'max_query_log_size' | '10000' | '' | | 'query' | 'max_running_queries' | '8' | '' | diff --git a/src/query/storages/system/src/log_queue.rs b/src/query/storages/system/src/log_queue.rs index 91999644f94aa..586bba395e957 100644 --- a/src/query/storages/system/src/log_queue.rs +++ b/src/query/storages/system/src/log_queue.rs @@ -52,9 +52,9 @@ pub trait SystemLogElement: Send + Sync + Clone { fn fill_to_data_block(&self, columns: &mut Vec) -> Result<()>; } -struct Data { +pub struct Data { index: usize, - event_queue: Vec>, + pub event_queue: Vec>, } impl Data { @@ -68,7 +68,7 @@ impl Data { pub struct SystemLogQueue { max_rows: usize, - data: Arc>>, + pub data: Arc>>, } static INSTANCES_MAP: OnceCell>>> = diff --git a/tests/suites/1_stateful/02_query/02_0007_get_profile.result b/tests/suites/1_stateful/02_query/02_0007_get_profile.result new file mode 100644 index 0000000000000..bb101b641b9bd --- /dev/null +++ b/tests/suites/1_stateful/02_query/02_0007_get_profile.result @@ -0,0 +1,2 @@ +true +true diff --git a/tests/suites/1_stateful/02_query/02_0007_get_profile.sh b/tests/suites/1_stateful/02_query/02_0007_get_profile.sh new file mode 100755 index 0000000000000..3200031cf1481 --- /dev/null +++ b/tests/suites/1_stateful/02_query/02_0007_get_profile.sh @@ -0,0 +1,23 @@ +#!/usr/bin/env bash +response=$(curl -s -u root: -XPOST "http://localhost:8000/v1/query" -H 'Content-Type: application/json' -d '{"sql": "select sleep(2)"}') + +query_id=$(echo $response | jq -r '.id') + +# test running status +http_code=$(curl -s -o /dev/null -w "%{http_code}" -u root: "http://localhost:8080/v1/queries/${query_id}/profiling") +if [ "$http_code" -eq 200 ]; then + echo "true" +else + echo "false" +fi + +sleep 3 + +#test finished status +http_code=$(curl -s -o /dev/null -w "%{http_code}" -u root: "http://localhost:8080/v1/queries/${query_id}/profiling") +if [ "$http_code" -eq 200 ]; then + echo "true" +else + echo "false" +fi + From 71b951a070045244bb87ac62ade4be92c8f413be Mon Sep 17 00:00:00 2001 From: Sky Fan <3374614481@qq.com> Date: Mon, 8 Jul 2024 15:52:30 +0800 Subject: [PATCH 03/21] refactor: unify transaction related code (#15966) * prepare replace update_table_meta * modify multi table insert commit * modify CommitInterpreter * simplify select with consume * simplify select with consume * make lint * simplify update_stream_metas * refactor update table meta * respect share table * replace update table meta * fix ut * revert unify of query_build_update_stream_req and dml_build_update_stream_req * fix ut * fix ut * fix comment * adjust log --- src/meta/api/src/schema_api.rs | 13 - src/meta/api/src/schema_api_impl.rs | 217 ++------------ src/meta/api/src/schema_api_test_suite.rs | 268 ++++++++++++------ src/meta/app/src/schema/table.rs | 11 +- src/query/catalog/src/catalog/interface.rs | 54 +++- .../catalog/src/catalog/session_catalog.rs | 37 +-- src/query/catalog/src/database.rs | 23 +- .../src/catalogs/default/database_catalog.rs | 29 +- .../src/catalogs/default/immutable_catalog.rs | 14 - .../src/catalogs/default/mutable_catalog.rs | 66 ++--- .../src/databases/default/default_database.rs | 19 +- .../src/databases/share/share_database.rs | 21 +- .../service/src/interpreters/common/mod.rs | 1 - .../service/src/interpreters/common/stream.rs | 22 +- .../interpreter_copy_into_location.rs | 2 +- .../src/interpreters/interpreter_select.rs | 76 ++--- .../interpreter_table_add_column.rs | 5 +- .../interpreter_table_drop_column.rs | 5 +- .../interpreter_table_modify_column.rs | 15 +- .../interpreter_table_modify_comment.rs | 5 +- .../interpreter_table_rename_column.rs | 5 +- .../interpreters/interpreter_txn_commit.rs | 5 +- .../tests/it/sql/exec/get_table_bind_test.rs | 20 +- .../it/storages/fuse/operations/commit.rs | 27 +- src/query/storages/common/txn/src/manager.rs | 55 ++-- .../storages/fuse/src/operations/commit.rs | 13 +- .../processors/multi_table_insert_commit.rs | 211 ++++++-------- .../storages/fuse/src/operations/revert.rs | 5 +- .../storages/hive/hive/src/hive_catalog.rs | 13 - src/query/storages/iceberg/src/catalog.rs | 11 - 30 files changed, 506 insertions(+), 762 deletions(-) diff --git a/src/meta/api/src/schema_api.rs b/src/meta/api/src/schema_api.rs index f0afe96c24a36..b562c9626874a 100644 --- a/src/meta/api/src/schema_api.rs +++ b/src/meta/api/src/schema_api.rs @@ -91,9 +91,6 @@ use databend_common_meta_app::schema::UpdateIndexReply; use databend_common_meta_app::schema::UpdateIndexReq; use databend_common_meta_app::schema::UpdateMultiTableMetaReq; use databend_common_meta_app::schema::UpdateMultiTableMetaResult; -use databend_common_meta_app::schema::UpdateStreamMetaReq; -use databend_common_meta_app::schema::UpdateTableMetaReply; -use databend_common_meta_app::schema::UpdateTableMetaReq; use databend_common_meta_app::schema::UpdateVirtualColumnReply; use databend_common_meta_app::schema::UpdateVirtualColumnReq; use databend_common_meta_app::schema::UpsertTableOptionReply; @@ -246,11 +243,6 @@ pub trait SchemaApi: Send + Sync { req: UpsertTableOptionReq, ) -> Result; - async fn update_table_meta( - &self, - req: UpdateTableMetaReq, - ) -> Result; - async fn update_multi_table_meta( &self, req: UpdateMultiTableMetaReq, @@ -312,9 +304,4 @@ pub trait SchemaApi: Send + Sync { async fn get_table_lvt(&self, req: GetLVTReq) -> Result; fn name(&self) -> String; - - async fn update_stream_metas( - &self, - update_stream_meta_reqs: &[UpdateStreamMetaReq], - ) -> Result<(), KVAppError>; } diff --git a/src/meta/api/src/schema_api_impl.rs b/src/meta/api/src/schema_api_impl.rs index 3dd88aaf02c69..a628a4018ed4b 100644 --- a/src/meta/api/src/schema_api_impl.rs +++ b/src/meta/api/src/schema_api_impl.rs @@ -36,7 +36,6 @@ use databend_common_meta_app::app_error::DropDbWithDropTime; use databend_common_meta_app::app_error::DropIndexWithDropTime; use databend_common_meta_app::app_error::DropTableWithDropTime; use databend_common_meta_app::app_error::DuplicatedIndexColumnId; -use databend_common_meta_app::app_error::DuplicatedUpsertFiles; use databend_common_meta_app::app_error::GetIndexWithDropTime; use databend_common_meta_app::app_error::IndexAlreadyExists; use databend_common_meta_app::app_error::IndexColumnIdNotFound; @@ -58,7 +57,6 @@ use databend_common_meta_app::app_error::UnknownIndex; use databend_common_meta_app::app_error::UnknownStreamId; use databend_common_meta_app::app_error::UnknownTable; use databend_common_meta_app::app_error::UnknownTableId; -use databend_common_meta_app::app_error::UpdateStreamMetasFailed; use databend_common_meta_app::app_error::ViewAlreadyExists; use databend_common_meta_app::app_error::VirtualColumnAlreadyExists; use databend_common_meta_app::data_mask::MaskPolicyTableIdListIdent; @@ -177,7 +175,6 @@ use databend_common_meta_app::schema::UpdateMultiTableMetaReq; use databend_common_meta_app::schema::UpdateMultiTableMetaResult; use databend_common_meta_app::schema::UpdateStreamMetaReq; use databend_common_meta_app::schema::UpdateTableMetaReply; -use databend_common_meta_app::schema::UpdateTableMetaReq; use databend_common_meta_app::schema::UpdateVirtualColumnReply; use databend_common_meta_app::schema::UpdateVirtualColumnReq; use databend_common_meta_app::schema::UpsertTableCopiedFileReq; @@ -2918,188 +2915,6 @@ impl + ?Sized> SchemaApi for KV { } } - #[logcall::logcall] - #[minitrace::trace] - async fn update_stream_metas( - &self, - update_stream_meta_reqs: &[UpdateStreamMetaReq], - ) -> Result<(), KVAppError> { - if update_stream_meta_reqs.is_empty() { - return Ok(()); - } - - let mut txn_req = TxnRequest { - condition: vec![], - if_then: vec![], - else_then: vec![], - }; - - append_update_stream_meta_requests( - self, - &mut txn_req, - update_stream_meta_reqs, - "update_stream_metas", - ) - .await?; - - let (success, _) = send_txn(self, txn_req).await?; - - if !success { - let msg = update_stream_meta_reqs - .iter() - .map(|req| format!("stream [id {}, seq {} ]", req.stream_id, req.seq)) - .collect::>() - .join(","); - return Err(KVAppError::AppError(AppError::from( - UpdateStreamMetasFailed::new(msg), - ))); - } else { - Ok(()) - } - } - - #[logcall::logcall] - #[minitrace::trace] - async fn update_table_meta( - &self, - req: UpdateTableMetaReq, - ) -> Result { - debug!(req :? =(&req); "SchemaApi: {}", func_name!()); - let tbid = TableId { - table_id: req.table_id, - }; - let req_seq = req.seq; - - let fail_if_duplicated = req - .copied_files - .as_ref() - .map(|v| v.fail_if_duplicated) - .unwrap_or(false); - - loop { - let (tb_meta_seq, table_meta): (_, Option) = - get_pb_value(self, &tbid).await?; - - debug!(ident :% =(&tbid); "update_table_meta"); - - if tb_meta_seq == 0 || table_meta.is_none() { - return Err(KVAppError::AppError(AppError::UnknownTableId( - UnknownTableId::new(req.table_id, "update_table_meta"), - ))); - } - if req_seq.match_seq(tb_meta_seq).is_err() { - return Err(KVAppError::AppError(AppError::from( - TableVersionMismatched::new( - req.table_id, - req.seq, - tb_meta_seq, - "update_table_meta", - ), - ))); - } - - let get_table_meta = TxnOp { - request: Some(Request::Get(TxnGetRequest { - key: tbid.to_string_key(), - })), - }; - - let mut txn_req = TxnRequest { - condition: vec![ - // table is not changed - txn_cond_seq(&tbid, Eq, tb_meta_seq), - ], - if_then: vec![ - txn_op_put(&tbid, serialize_struct(&req.new_table_meta)?), // tb_id -> tb_meta - ], - else_then: vec![get_table_meta], - }; - - if let Some(req) = &req.copied_files { - let (conditions, match_operations) = - build_upsert_table_copied_file_info_conditions( - &tbid, - req, - tb_meta_seq, - req.fail_if_duplicated, - )?; - txn_req.condition.extend(conditions); - txn_req.if_then.extend(match_operations) - } - - append_update_stream_meta_requests( - self, - &mut txn_req, - &req.update_stream_meta, - "update_table_meta", - ) - .await?; - - if let Some(deduplicated_label) = req.deduplicated_label.clone() { - txn_req - .if_then - .push(build_upsert_table_deduplicated_label(deduplicated_label)) - } - - let (succ, responses) = send_txn(self, txn_req).await?; - - debug!( - id :? =(&tbid), - succ = succ; - "update_table_meta" - ); - - if succ { - return Ok(UpdateTableMetaReply { - share_table_info: get_share_table_info_map(self, &table_meta.unwrap()).await?, - }); - } else { - let resp = responses - .first() - // fail fast if response is None (which should not happen) - .expect("internal error: expect one response if update_table_meta txn failed."); - - if let Some(Response::Get(get_resp)) = &resp.response { - // deserialize table version info - let (tb_meta_seq, _): (_, Option) = - if let Some(seq_v) = &get_resp.value { - (seq_v.seq, Some(deserialize_struct(&seq_v.data)?)) - } else { - (0, None) - }; - - // check table version - if req_seq.match_seq(tb_meta_seq).is_ok() { - // if table version does match, but tx failed, - if fail_if_duplicated { - // report file duplication error - return Err(KVAppError::AppError(AppError::from( - DuplicatedUpsertFiles::new(req.table_id, "update_table_meta"), - ))); - } else { - // continue and try update the "table copied files" - continue; - }; - } else { - return Err(KVAppError::AppError(AppError::from( - TableVersionMismatched::new( - req.table_id, - req.seq, - tb_meta_seq, - "update_table_meta", - ), - ))); - } - } else { - unreachable!( - "internal error: expect some TxnGetResponseGet, but got {:?}", - resp.response - ); - } - } - } - } - async fn update_multi_table_meta( &self, req: UpdateMultiTableMetaReq, @@ -3122,7 +2937,7 @@ impl + ?Sized> SchemaApi for KV { .iter() .map(|req| { TableId { - table_id: req.table_id, + table_id: req.0.table_id, } .to_string_key() }) @@ -3131,16 +2946,16 @@ impl + ?Sized> SchemaApi for KV { for (req, (tb_meta_seq, table_meta)) in update_table_metas.iter().zip(tb_meta_vec.iter_mut()) { - let req_seq = req.seq; + let req_seq = req.0.seq; if *tb_meta_seq == 0 || table_meta.is_none() { return Err(KVAppError::AppError(AppError::UnknownTableId( - UnknownTableId::new(req.table_id, "update_multi_table_meta"), + UnknownTableId::new(req.0.table_id, "update_multi_table_meta"), ))); } if req_seq.match_seq(*tb_meta_seq).is_err() { mismatched_tbs.push(( - req.table_id, + req.0.table_id, *tb_meta_seq, std::mem::take(table_meta).unwrap(), )); @@ -3153,15 +2968,15 @@ impl + ?Sized> SchemaApi for KV { for (req, (tb_meta_seq, _)) in update_table_metas.iter().zip(tb_meta_vec.iter()) { let tbid = TableId { - table_id: req.table_id, + table_id: req.0.table_id, }; - tbl_seqs.insert(req.table_id, *tb_meta_seq); + tbl_seqs.insert(req.0.table_id, *tb_meta_seq); txn_req .condition .push(txn_cond_seq(&tbid, Eq, *tb_meta_seq)); txn_req .if_then - .push(txn_op_put(&tbid, serialize_struct(&req.new_table_meta)?)); + .push(txn_op_put(&tbid, serialize_struct(&req.0.new_table_meta)?)); txn_req.else_then.push(TxnOp { request: Some(Request::Get(TxnGetRequest { key: tbid.to_string_key(), @@ -3233,7 +3048,17 @@ impl + ?Sized> SchemaApi for KV { } let (succ, responses) = send_txn(self, txn_req).await?; if succ { - return Ok(std::result::Result::Ok(())); + let mut share_table_info = vec![]; + for (_, tb_meta) in tb_meta_vec { + if let Some(info) = + get_share_table_info_map(self, tb_meta.as_ref().unwrap()).await? + { + share_table_info.extend(info); + } + } + return Ok(std::result::Result::Ok(UpdateTableMetaReply { + share_table_info: Some(share_table_info), + })); } let mut mismatched_tbs = vec![]; for (resp, req) in responses.iter().zip(update_table_metas.iter()) { @@ -3248,13 +3073,13 @@ impl + ?Sized> SchemaApi for KV { (seq_v.seq, deserialize_struct(&seq_v.data)?) } else { return Err(KVAppError::AppError(AppError::UnknownTableId( - UnknownTableId::new(req.table_id, "update_multi_table_meta"), + UnknownTableId::new(req.0.table_id, "update_multi_table_meta"), ))); }; // check table version - if req.seq.match_seq(tb_meta_seq).is_err() { - mismatched_tbs.push((req.table_id, tb_meta_seq, table_meta)); + if req.0.seq.match_seq(tb_meta_seq).is_err() { + mismatched_tbs.push((req.0.table_id, tb_meta_seq, table_meta)); } } diff --git a/src/meta/api/src/schema_api_test_suite.rs b/src/meta/api/src/schema_api_test_suite.rs index 2f245016c4313..f7f329d7fe4e0 100644 --- a/src/meta/api/src/schema_api_test_suite.rs +++ b/src/meta/api/src/schema_api_test_suite.rs @@ -19,6 +19,7 @@ use std::collections::BTreeSet; use std::collections::HashMap; use std::collections::HashSet; use std::sync::Arc; +use std::vec; use chrono::DateTime; use chrono::Duration; @@ -118,6 +119,7 @@ use databend_common_meta_app::schema::TableStatistics; use databend_common_meta_app::schema::TruncateTableReq; use databend_common_meta_app::schema::UndropDatabaseReq; use databend_common_meta_app::schema::UndropTableReq; +use databend_common_meta_app::schema::UpdateMultiTableMetaReq; use databend_common_meta_app::schema::UpdateTableMetaReq; use databend_common_meta_app::schema::UpdateVirtualColumnReq; use databend_common_meta_app::schema::UpsertTableCopiedFileReq; @@ -2578,15 +2580,17 @@ impl SchemaApiTestSuite { new_table_meta.statistics = table_statistics; let table_id = table.ident.table_id; let table_version = table.ident.seq; - mt.update_table_meta(UpdateTableMetaReq { + let req = UpdateTableMetaReq { table_id, seq: MatchSeq::Exact(table_version), new_table_meta: new_table_meta.clone(), - copied_files: None, - deduplicated_label: None, - update_stream_meta: vec![], + }; + mt.update_multi_table_meta(UpdateMultiTableMetaReq { + update_table_metas: vec![(req, table.as_ref().clone())], + ..Default::default() }) - .await?; + .await? + .unwrap(); let table = mt .get_table((tenant_name, "db1", "tb2").into()) @@ -2605,21 +2609,21 @@ impl SchemaApiTestSuite { let new_table_meta = table.meta.clone(); let table_id = table.ident.table_id; let table_version = table.ident.seq; + let req = UpdateTableMetaReq { + table_id, + seq: MatchSeq::Exact(table_version + 1), + new_table_meta: new_table_meta.clone(), + }; let res = mt - .update_table_meta(UpdateTableMetaReq { - table_id, - seq: MatchSeq::Exact(table_version + 1), - new_table_meta: new_table_meta.clone(), - copied_files: None, - deduplicated_label: None, - update_stream_meta: vec![], + .update_multi_table_meta(UpdateMultiTableMetaReq { + update_table_metas: vec![(req, table.as_ref().clone())], + ..Default::default() }) - .await; + .await?; let err = res.unwrap_err(); - let err = ErrorCode::from(err); - assert_eq!(ErrorCode::TABLE_VERSION_MISMATCHED, err.code()); + assert!(!err.is_empty()); } info!("--- update table meta, with upsert file req"); @@ -2651,15 +2655,19 @@ impl SchemaApiTestSuite { expire_at: None, fail_if_duplicated: true, }; - mt.update_table_meta(UpdateTableMetaReq { + + let req = UpdateTableMetaReq { table_id, seq: MatchSeq::Exact(table_version), new_table_meta: new_table_meta.clone(), - copied_files: Some(upsert_source_table), - deduplicated_label: None, - update_stream_meta: vec![], + }; + mt.update_multi_table_meta(UpdateMultiTableMetaReq { + update_table_metas: vec![(req, table.as_ref().clone())], + copied_files: vec![(table_id, upsert_source_table)], + ..Default::default() }) - .await?; + .await? + .unwrap(); let table = mt .get_table((tenant_name, "db1", "tb2").into()) @@ -2697,15 +2705,18 @@ impl SchemaApiTestSuite { expire_at: None, fail_if_duplicated: true, }; - mt.update_table_meta(UpdateTableMetaReq { + let req = UpdateTableMetaReq { table_id, seq: MatchSeq::Exact(table_version), new_table_meta: new_table_meta.clone(), - copied_files: Some(upsert_source_table), - deduplicated_label: None, - update_stream_meta: vec![], + }; + mt.update_multi_table_meta(UpdateMultiTableMetaReq { + update_table_metas: vec![(req, table.as_ref().clone())], + copied_files: vec![(table_id, upsert_source_table)], + ..Default::default() }) - .await?; + .await? + .unwrap(); let table = mt .get_table((tenant_name, "db1", "tb2").into()) @@ -2743,19 +2754,21 @@ impl SchemaApiTestSuite { expire_at: None, fail_if_duplicated: true, }; + let req = UpdateTableMetaReq { + table_id, + seq: MatchSeq::Exact(table_version), + new_table_meta: new_table_meta.clone(), + }; let result = mt - .update_table_meta(UpdateTableMetaReq { - table_id, - seq: MatchSeq::Exact(table_version), - new_table_meta: new_table_meta.clone(), - copied_files: Some(upsert_source_table), - deduplicated_label: None, - update_stream_meta: vec![], + .update_multi_table_meta(UpdateMultiTableMetaReq { + update_table_metas: vec![(req, table.as_ref().clone())], + copied_files: vec![(table_id, upsert_source_table)], + ..Default::default() }) .await; let err = result.unwrap_err(); let err = ErrorCode::from(err); - assert_eq!(ErrorCode::DUPLICATED_UPSERT_FILES, err.code()); + assert_eq!(ErrorCode::UNRESOLVABLE_CONFLICT, err.code()); } } Ok(()) @@ -3588,7 +3601,7 @@ impl SchemaApiTestSuite { let mut file_info = BTreeMap::new(); file_info.insert("file".to_string(), stage_info.clone()); - let req = UpsertTableCopiedFileReq { + let copied_file_req = UpsertTableCopiedFileReq { file_info: file_info.clone(), expire_at: Some((Utc::now().timestamp() + 86400) as u64), fail_if_duplicated: true, @@ -3598,12 +3611,23 @@ impl SchemaApiTestSuite { table_id, seq: MatchSeq::Any, new_table_meta: table_meta.clone(), - copied_files: Some(req), - deduplicated_label: None, - update_stream_meta: vec![], }; - let _ = mt.update_table_meta(req).await?; + let table = mt + .get_table(GetTableReq { + inner: tbl_name_ident, + }) + .await? + .as_ref() + .clone(); + + let req = UpdateMultiTableMetaReq { + update_table_metas: vec![(req, table)], + copied_files: vec![(table_id, copied_file_req)], + ..Default::default() + }; + + let _ = mt.update_multi_table_meta(req).await?; let key = TableCopiedFileNameIdent { table_id, @@ -3724,7 +3748,7 @@ impl SchemaApiTestSuite { let req = CreateTableReq { create_option: CreateOption::Create, - name_ident: tbl_name_ident, + name_ident: tbl_name_ident.clone(), table_meta: create_table_meta.clone(), as_dropped: false, }; @@ -3741,7 +3765,7 @@ impl SchemaApiTestSuite { let mut file_info = BTreeMap::new(); file_info.insert("file".to_string(), stage_info.clone()); - let req = UpsertTableCopiedFileReq { + let copied_file_req = UpsertTableCopiedFileReq { file_info: file_info.clone(), expire_at: Some((Utc::now().timestamp() + 86400) as u64), fail_if_duplicated: true, @@ -3751,12 +3775,23 @@ impl SchemaApiTestSuite { table_id, seq: MatchSeq::Any, new_table_meta: create_table_meta.clone(), - copied_files: Some(req), - deduplicated_label: None, - update_stream_meta: vec![], }; - let _ = mt.update_table_meta(req).await?; + let table = mt + .get_table(GetTableReq { + inner: tbl_name_ident.clone(), + }) + .await? + .as_ref() + .clone(); + + let req = UpdateMultiTableMetaReq { + update_table_metas: vec![(req, table)], + copied_files: vec![(table_id, copied_file_req)], + ..Default::default() + }; + + let _ = mt.update_multi_table_meta(req).await?; let key = TableCopiedFileNameIdent { table_id, @@ -5647,7 +5682,11 @@ impl SchemaApiTestSuite { ..TableMeta::default() }; let created_on = Utc::now(); - + let tbl_name_ident = TableNameIdent { + tenant: Tenant::new_or_err(tenant_name, func_name!())?, + db_name: db_name.to_string(), + table_name: tbl_name.to_string(), + }; info!("--- prepare db and table"); { let plan = CreateDatabaseReq { @@ -5663,11 +5702,7 @@ impl SchemaApiTestSuite { let req = CreateTableReq { create_option: CreateOption::Create, - name_ident: TableNameIdent { - tenant: Tenant::new_or_err(tenant_name, func_name!())?, - db_name: db_name.to_string(), - table_name: tbl_name.to_string(), - }, + name_ident: tbl_name_ident.clone(), table_meta: table_meta(created_on), as_dropped: false, }; @@ -5685,7 +5720,7 @@ impl SchemaApiTestSuite { let mut file_info = BTreeMap::new(); file_info.insert("file".to_string(), stage_info.clone()); - let req = UpsertTableCopiedFileReq { + let copied_file_req = UpsertTableCopiedFileReq { file_info: file_info.clone(), expire_at: Some((Utc::now().timestamp() + 86400) as u64), fail_if_duplicated: true, @@ -5695,12 +5730,23 @@ impl SchemaApiTestSuite { table_id, seq: MatchSeq::Any, new_table_meta: table_meta(created_on), - copied_files: Some(req), - deduplicated_label: None, - update_stream_meta: vec![], }; - let _ = mt.update_table_meta(req).await?; + let table = mt + .get_table(GetTableReq { + inner: tbl_name_ident.clone(), + }) + .await? + .as_ref() + .clone(); + + let req = UpdateMultiTableMetaReq { + update_table_metas: vec![(req, table)], + copied_files: vec![(table_id, copied_file_req)], + ..Default::default() + }; + + let _ = mt.update_multi_table_meta(req).await?; let req = GetTableCopiedFileReq { table_id, @@ -5723,7 +5769,7 @@ impl SchemaApiTestSuite { let mut file_info = BTreeMap::new(); file_info.insert("file2".to_string(), stage_info.clone()); - let req = UpsertTableCopiedFileReq { + let copied_file_req = UpsertTableCopiedFileReq { file_info: file_info.clone(), expire_at: Some((Utc::now().timestamp() - 86400) as u64), fail_if_duplicated: true, @@ -5733,12 +5779,23 @@ impl SchemaApiTestSuite { table_id, seq: MatchSeq::Any, new_table_meta: table_meta(created_on), - copied_files: Some(req), - deduplicated_label: None, - update_stream_meta: vec![], }; - let _ = mt.update_table_meta(req).await?; + let table = mt + .get_table(GetTableReq { + inner: tbl_name_ident.clone(), + }) + .await? + .as_ref() + .clone(); + + let req = UpdateMultiTableMetaReq { + update_table_metas: vec![(req, table)], + copied_files: vec![(table_id, copied_file_req)], + ..Default::default() + }; + + let _ = mt.update_multi_table_meta(req).await?; let req = GetTableCopiedFileReq { table_id, @@ -7101,6 +7158,11 @@ impl SchemaApiTestSuite { let created_on = Utc::now(); info!("--- prepare db and table"); + let tbl_name_ident = TableNameIdent { + tenant: Tenant::new_or_err(tenant_name, func_name!())?, + db_name: db_name.to_string(), + table_name: tbl_name.to_string(), + }; { let plan = CreateDatabaseReq { create_option: CreateOption::Create, @@ -7115,17 +7177,13 @@ impl SchemaApiTestSuite { let req = CreateTableReq { create_option: CreateOption::Create, - name_ident: TableNameIdent { - tenant: Tenant::new_or_err(tenant_name, func_name!())?, - db_name: db_name.to_string(), - table_name: tbl_name.to_string(), - }, + name_ident: tbl_name_ident.clone(), table_meta: table_meta(created_on), as_dropped: false, }; let resp = mt.create_table(req.clone()).await?; table_id = resp.table_id; - } + }; info!("--- create and get stage file info"); { @@ -7137,7 +7195,7 @@ impl SchemaApiTestSuite { let mut file_info = BTreeMap::new(); file_info.insert("file".to_string(), stage_info.clone()); - let req = UpsertTableCopiedFileReq { + let copied_file_req = UpsertTableCopiedFileReq { file_info: file_info.clone(), expire_at: Some((Utc::now().timestamp() + 86400) as u64), fail_if_duplicated: true, @@ -7147,12 +7205,23 @@ impl SchemaApiTestSuite { table_id, seq: MatchSeq::Any, new_table_meta: table_meta(created_on), - copied_files: Some(req), - deduplicated_label: None, - update_stream_meta: vec![], }; - let _ = mt.update_table_meta(req).await?; + let table = mt + .get_table(GetTableReq { + inner: tbl_name_ident.clone(), + }) + .await? + .as_ref() + .clone(); + + let req = UpdateMultiTableMetaReq { + update_table_metas: vec![(req, table)], + copied_files: vec![(table_id, copied_file_req)], + ..Default::default() + }; + + let _ = mt.update_multi_table_meta(req).await?; let req = GetTableCopiedFileReq { table_id, @@ -7184,7 +7253,7 @@ impl SchemaApiTestSuite { file_info.insert("file".to_string(), stage_info.clone()); file_info.insert("file_not_exist".to_string(), stage_info.clone()); - let req = UpsertTableCopiedFileReq { + let copied_file_req = UpsertTableCopiedFileReq { file_info: file_info.clone(), expire_at: Some((Utc::now().timestamp() + 86400) as u64), fail_if_duplicated: true, @@ -7194,15 +7263,26 @@ impl SchemaApiTestSuite { table_id, seq: MatchSeq::Any, new_table_meta: table_meta(created_on), - copied_files: Some(req), - deduplicated_label: None, - update_stream_meta: vec![], }; - let result = mt.update_table_meta(req).await; + let table = mt + .get_table(GetTableReq { + inner: tbl_name_ident.clone(), + }) + .await? + .as_ref() + .clone(); + + let req = UpdateMultiTableMetaReq { + update_table_metas: vec![(req, table)], + copied_files: vec![(table_id, copied_file_req)], + ..Default::default() + }; + + let result = mt.update_multi_table_meta(req).await; let err = result.unwrap_err(); let err = ErrorCode::from(err); - assert_eq!(ErrorCode::DUPLICATED_UPSERT_FILES, err.code()); + assert_eq!(ErrorCode::UNRESOLVABLE_CONFLICT, err.code()); let req = GetTableCopiedFileReq { table_id, @@ -7228,7 +7308,7 @@ impl SchemaApiTestSuite { file_info.insert("file".to_string(), stage_info.clone()); file_info.insert("file_not_exist".to_string(), stage_info.clone()); - let req = UpsertTableCopiedFileReq { + let copied_file_req = UpsertTableCopiedFileReq { file_info: file_info.clone(), expire_at: Some((Utc::now().timestamp() + 86400) as u64), fail_if_duplicated: false, @@ -7238,12 +7318,23 @@ impl SchemaApiTestSuite { table_id, seq: MatchSeq::Any, new_table_meta: table_meta(created_on), - copied_files: Some(req), - deduplicated_label: None, - update_stream_meta: vec![], }; - mt.update_table_meta(req).await?; + let table = mt + .get_table(GetTableReq { + inner: tbl_name_ident, + }) + .await? + .as_ref() + .clone(); + + let req = UpdateMultiTableMetaReq { + update_table_metas: vec![(req, table)], + copied_files: vec![(table_id, copied_file_req)], + ..Default::default() + }; + + mt.update_multi_table_meta(req).await?.unwrap(); let req = GetTableCopiedFileReq { table_id, @@ -7408,7 +7499,7 @@ where MT: SchemaApi + kvapi::AsKVApi file_infos.insert(format!("file{}", i), stage_info); } - let req = UpsertTableCopiedFileReq { + let copied_file_req = UpsertTableCopiedFileReq { file_info: file_infos.clone(), expire_at: Some((Utc::now().timestamp() + 86400) as u64), fail_if_duplicated: true, @@ -7418,12 +7509,15 @@ where MT: SchemaApi + kvapi::AsKVApi table_id: self.table_id, seq: MatchSeq::Any, new_table_meta: self.table_meta(), - copied_files: Some(req), - deduplicated_label: None, - update_stream_meta: vec![], }; - self.mt.update_table_meta(req).await?; + let req = UpdateMultiTableMetaReq { + update_table_metas: vec![(req, Default::default())], + copied_files: vec![(self.table_id, copied_file_req)], + ..Default::default() + }; + + self.mt.update_multi_table_meta(req).await?.unwrap(); Ok(file_infos) } diff --git a/src/meta/app/src/schema/table.rs b/src/meta/app/src/schema/table.rs index 85172a433639a..cb6bbc6f9309d 100644 --- a/src/meta/app/src/schema/table.rs +++ b/src/meta/app/src/schema/table.rs @@ -712,13 +712,11 @@ pub struct UpdateTableMetaReq { pub table_id: u64, pub seq: MatchSeq, pub new_table_meta: TableMeta, - pub copied_files: Option, - pub update_stream_meta: Vec, - pub deduplicated_label: Option, } +#[derive(Clone, Debug, PartialEq, Eq, Default)] pub struct UpdateMultiTableMetaReq { - pub update_table_metas: Vec, + pub update_table_metas: Vec<(UpdateTableMetaReq, TableInfo)>, pub copied_files: Vec<(u64, UpsertTableCopiedFileReq)>, pub update_stream_metas: Vec, pub deduplicated_labels: Vec, @@ -727,7 +725,8 @@ pub struct UpdateMultiTableMetaReq { /// The result of updating multiple table meta /// /// If update fails due to table version mismatch, the `Err` will contain the (table id, seq , table meta)s that fail to update. -pub type UpdateMultiTableMetaResult = std::result::Result<(), Vec<(u64, u64, TableMeta)>>; +pub type UpdateMultiTableMetaResult = + std::result::Result>; impl UpsertTableOptionReq { pub fn new( @@ -780,7 +779,7 @@ pub struct UpsertTableOptionReply { pub share_table_info: Option>, } -#[derive(Clone, Debug, PartialEq, Eq)] +#[derive(Clone, Debug, PartialEq, Eq, Default)] pub struct UpdateTableMetaReply { pub share_table_info: Option>, } diff --git a/src/query/catalog/src/catalog/interface.rs b/src/query/catalog/src/catalog/interface.rs index b7a2ff107bf1b..68cfb3d8f1fb8 100644 --- a/src/query/catalog/src/catalog/interface.rs +++ b/src/query/catalog/src/catalog/interface.rs @@ -288,18 +288,7 @@ pub trait Catalog: DynClone + Send + Sync + Debug { req: UpsertTableOptionReq, ) -> Result; - async fn update_table_meta( - &self, - table_info: &TableInfo, - req: UpdateTableMetaReq, - ) -> Result; - - // update stream metas, currently used by "copy into location form stream" - async fn update_stream_metas(&self, _update_stream_meta: &[UpdateStreamMetaReq]) -> Result<()> { - Ok(()) - } - - async fn update_multi_table_meta( + async fn retryable_update_multi_table_meta( &self, _req: UpdateMultiTableMetaReq, ) -> Result { @@ -308,6 +297,47 @@ pub trait Catalog: DynClone + Send + Sync + Debug { )) } + async fn update_multi_table_meta( + &self, + req: UpdateMultiTableMetaReq, + ) -> Result { + self.retryable_update_multi_table_meta(req) + .await? + .map_err(|e| { + ErrorCode::TableVersionMismatched(format!( + "Fail to update table metas, conflict tables: {:?}", + e.iter() + .map(|(tid, seq, meta)| (tid, seq, &meta.engine)) + .collect::>() + )) + }) + } + + // update stream metas, currently used by "copy into location form stream" + async fn update_stream_metas( + &self, + update_stream_metas: Vec, + ) -> Result<()> { + self.update_multi_table_meta(UpdateMultiTableMetaReq { + update_stream_metas, + ..Default::default() + }) + .await + .map(|_| ()) + } + + async fn update_single_table_meta( + &self, + req: UpdateTableMetaReq, + table_info: &TableInfo, + ) -> Result { + self.update_multi_table_meta(UpdateMultiTableMetaReq { + update_table_metas: vec![(req, table_info.clone())], + ..Default::default() + }) + .await + } + async fn set_table_column_mask_policy( &self, req: SetTableColumnMaskPolicyReq, diff --git a/src/query/catalog/src/catalog/session_catalog.rs b/src/query/catalog/src/catalog/session_catalog.rs index 36e16339119b0..ac95dc9ac7767 100644 --- a/src/query/catalog/src/catalog/session_catalog.rs +++ b/src/query/catalog/src/catalog/session_catalog.rs @@ -87,9 +87,6 @@ use databend_common_meta_app::schema::UpdateIndexReply; use databend_common_meta_app::schema::UpdateIndexReq; use databend_common_meta_app::schema::UpdateMultiTableMetaReq; use databend_common_meta_app::schema::UpdateMultiTableMetaResult; -use databend_common_meta_app::schema::UpdateStreamMetaReq; -use databend_common_meta_app::schema::UpdateTableMetaReply; -use databend_common_meta_app::schema::UpdateTableMetaReq; use databend_common_meta_app::schema::UpdateVirtualColumnReply; use databend_common_meta_app::schema::UpdateVirtualColumnReq; use databend_common_meta_app::schema::UpsertTableOptionReply; @@ -357,43 +354,21 @@ impl Catalog for SessionCatalog { self.inner.upsert_table_option(tenant, db_name, req).await } - async fn update_table_meta( + async fn retryable_update_multi_table_meta( &self, - table_info: &TableInfo, - req: UpdateTableMetaReq, - ) -> Result { - let state = self.txn_mgr.lock().state(); - match state { - TxnState::AutoCommit => self.inner.update_table_meta(table_info, req).await, - TxnState::Active => { - self.txn_mgr.lock().update_table_meta(req, table_info); - Ok(UpdateTableMetaReply { - share_table_info: None, - }) - } - TxnState::Fail => unreachable!(), - } - } - - async fn update_stream_metas(&self, update_stream_metas: &[UpdateStreamMetaReq]) -> Result<()> { + req: UpdateMultiTableMetaReq, + ) -> Result { let state = self.txn_mgr.lock().state(); match state { - TxnState::AutoCommit => self.inner.update_stream_metas(update_stream_metas).await, + TxnState::AutoCommit => self.inner.retryable_update_multi_table_meta(req).await, TxnState::Active => { - self.txn_mgr.lock().update_stream_metas(update_stream_metas); - Ok(()) + self.txn_mgr.lock().update_multi_table_meta(req); + Ok(Ok(Default::default())) } TxnState::Fail => unreachable!(), } } - async fn update_multi_table_meta( - &self, - req: UpdateMultiTableMetaReq, - ) -> Result { - self.inner.update_multi_table_meta(req).await - } - async fn set_table_column_mask_policy( &self, req: SetTableColumnMaskPolicyReq, diff --git a/src/query/catalog/src/database.rs b/src/query/catalog/src/database.rs index 64a3a6b34c3fa..81c0bd66adc91 100644 --- a/src/query/catalog/src/database.rs +++ b/src/query/catalog/src/database.rs @@ -35,8 +35,8 @@ use databend_common_meta_app::schema::TruncateTableReply; use databend_common_meta_app::schema::TruncateTableReq; use databend_common_meta_app::schema::UndropTableReply; use databend_common_meta_app::schema::UndropTableReq; -use databend_common_meta_app::schema::UpdateTableMetaReply; -use databend_common_meta_app::schema::UpdateTableMetaReq; +use databend_common_meta_app::schema::UpdateMultiTableMetaReq; +use databend_common_meta_app::schema::UpdateMultiTableMetaResult; use databend_common_meta_app::schema::UpsertTableOptionReply; use databend_common_meta_app::schema::UpsertTableOptionReq; use databend_common_meta_app::tenant::Tenant; @@ -161,14 +161,6 @@ pub trait Database: DynClone + Sync + Send { ))) } - #[async_backtrace::framed] - async fn update_table_meta(&self, _req: UpdateTableMetaReq) -> Result { - Err(ErrorCode::Unimplemented(format!( - "UnImplement update_table_meta in {} Database", - self.name() - ))) - } - #[async_backtrace::framed] async fn set_table_column_mask_policy( &self, @@ -198,4 +190,15 @@ pub trait Database: DynClone + Sync + Send { self.name() ))) } + + #[async_backtrace::framed] + async fn retryable_update_multi_table_meta( + &self, + _req: UpdateMultiTableMetaReq, + ) -> Result { + Err(ErrorCode::Unimplemented(format!( + "UnImplement retryable_update_multi_table_meta in {} Database", + self.name() + ))) + } } diff --git a/src/query/service/src/catalogs/default/database_catalog.rs b/src/query/service/src/catalogs/default/database_catalog.rs index 4251944eaa074..564b9e66c3f5c 100644 --- a/src/query/service/src/catalogs/default/database_catalog.rs +++ b/src/query/service/src/catalogs/default/database_catalog.rs @@ -95,9 +95,6 @@ use databend_common_meta_app::schema::UpdateIndexReply; use databend_common_meta_app::schema::UpdateIndexReq; use databend_common_meta_app::schema::UpdateMultiTableMetaReq; use databend_common_meta_app::schema::UpdateMultiTableMetaResult; -use databend_common_meta_app::schema::UpdateStreamMetaReq; -use databend_common_meta_app::schema::UpdateTableMetaReply; -use databend_common_meta_app::schema::UpdateTableMetaReq; use databend_common_meta_app::schema::UpdateVirtualColumnReply; use databend_common_meta_app::schema::UpdateVirtualColumnReq; use databend_common_meta_app::schema::UpsertTableOptionReply; @@ -574,31 +571,13 @@ impl Catalog for DatabaseCatalog { } #[async_backtrace::framed] - async fn update_table_meta( - &self, - table_info: &TableInfo, - req: UpdateTableMetaReq, - ) -> Result { - self.mutable_catalog - .update_table_meta(table_info, req) - .await - } - - async fn update_stream_metas( - &self, - update_stream_meta_reqs: &[UpdateStreamMetaReq], - ) -> Result<()> { - self.mutable_catalog - .update_stream_metas(update_stream_meta_reqs) - .await - } - - #[async_backtrace::framed] - async fn update_multi_table_meta( + async fn retryable_update_multi_table_meta( &self, reqs: UpdateMultiTableMetaReq, ) -> Result { - self.mutable_catalog.update_multi_table_meta(reqs).await + self.mutable_catalog + .retryable_update_multi_table_meta(reqs) + .await } #[async_backtrace::framed] diff --git a/src/query/service/src/catalogs/default/immutable_catalog.rs b/src/query/service/src/catalogs/default/immutable_catalog.rs index cf2fd9129d88b..2baad78bf3145 100644 --- a/src/query/service/src/catalogs/default/immutable_catalog.rs +++ b/src/query/service/src/catalogs/default/immutable_catalog.rs @@ -85,8 +85,6 @@ use databend_common_meta_app::schema::UndropTableReply; use databend_common_meta_app::schema::UndropTableReq; use databend_common_meta_app::schema::UpdateIndexReply; use databend_common_meta_app::schema::UpdateIndexReq; -use databend_common_meta_app::schema::UpdateTableMetaReply; -use databend_common_meta_app::schema::UpdateTableMetaReq; use databend_common_meta_app::schema::UpdateVirtualColumnReply; use databend_common_meta_app::schema::UpdateVirtualColumnReq; use databend_common_meta_app::schema::UpsertTableOptionReply; @@ -375,18 +373,6 @@ impl Catalog for ImmutableCatalog { ))) } - #[async_backtrace::framed] - async fn update_table_meta( - &self, - _table_info: &TableInfo, - req: UpdateTableMetaReq, - ) -> Result { - Err(ErrorCode::Unimplemented(format!( - "update table meta not allowed for system database {:?}", - req - ))) - } - #[async_backtrace::framed] async fn set_table_column_mask_policy( &self, diff --git a/src/query/service/src/catalogs/default/mutable_catalog.rs b/src/query/service/src/catalogs/default/mutable_catalog.rs index 9883efddddc1a..1cb31555ba1e6 100644 --- a/src/query/service/src/catalogs/default/mutable_catalog.rs +++ b/src/query/service/src/catalogs/default/mutable_catalog.rs @@ -20,6 +20,7 @@ use std::time::Instant; use databend_common_catalog::catalog::Catalog; use databend_common_config::InnerConfig; +use databend_common_exception::ErrorCode; use databend_common_exception::Result; use databend_common_meta_api::SchemaApi; use databend_common_meta_api::SequenceApi; @@ -101,9 +102,6 @@ use databend_common_meta_app::schema::UpdateIndexReply; use databend_common_meta_app::schema::UpdateIndexReq; use databend_common_meta_app::schema::UpdateMultiTableMetaReq; use databend_common_meta_app::schema::UpdateMultiTableMetaResult; -use databend_common_meta_app::schema::UpdateStreamMetaReq; -use databend_common_meta_app::schema::UpdateTableMetaReply; -use databend_common_meta_app::schema::UpdateTableMetaReq; use databend_common_meta_app::schema::UpdateVirtualColumnReply; use databend_common_meta_app::schema::UpdateVirtualColumnReq; use databend_common_meta_app::schema::UpsertTableOptionReply; @@ -514,52 +512,40 @@ impl Catalog for MutableCatalog { } #[async_backtrace::framed] - async fn update_table_meta( + async fn retryable_update_multi_table_meta( &self, - table_info: &TableInfo, - req: UpdateTableMetaReq, - ) -> Result { - match table_info.db_type.clone() { - DatabaseType::NormalDB => { - info!( - "updating table meta. table desc: [{}], has copied files: [{}]?", - table_info.desc, - req.copied_files.is_some() - ); - let begin = Instant::now(); - let res = self.ctx.meta.update_table_meta(req).await; - info!( - "update table meta done. table id: {:?}, time used {:?}", - table_info.ident, - begin.elapsed() - ); - Ok(res?) + req: UpdateMultiTableMetaReq, + ) -> Result { + // deal with share table + { + if req.update_table_metas.len() == 1 { + match req.update_table_metas[0].1.db_type.clone() { + DatabaseType::NormalDB => {} + DatabaseType::ShareDB(share_params) => { + let share_ident = share_params.share_ident; + let tenant = Tenant::new_or_err(share_ident.tenant_name(), func_name!())?; + let db = self.get_database(&tenant, share_ident.share_name()).await?; + return db.retryable_update_multi_table_meta(req).await; + } + } } - DatabaseType::ShareDB(share_params) => { - let share_ident = share_params.share_ident; - let tenant = Tenant::new_or_err(share_ident.tenant_name(), func_name!())?; - let db = self.get_database(&tenant, share_ident.share_name()).await?; - db.update_table_meta(req).await + if req + .update_table_metas + .iter() + .any(|(_, info)| matches!(info.db_type, DatabaseType::ShareDB(_))) + { + return Err(ErrorCode::StorageOther( + "update table meta from multi share db, or update table meta from share db and normal db in one request, is not supported", + )); } } - } - - async fn update_stream_metas(&self, reqs: &[UpdateStreamMetaReq]) -> Result<()> { - self.ctx.meta.update_stream_metas(reqs).await?; - Ok(()) - } - #[async_backtrace::framed] - async fn update_multi_table_meta( - &self, - reqs: UpdateMultiTableMetaReq, - ) -> Result { info!( "updating multi table meta. number of tables: {}", - reqs.update_table_metas.len() + req.update_table_metas.len() ); let begin = Instant::now(); - let res = self.ctx.meta.update_multi_table_meta(reqs).await; + let res = self.ctx.meta.update_multi_table_meta(req).await; info!( "update multi table meta done. time used {:?}", begin.elapsed() diff --git a/src/query/service/src/databases/default/default_database.rs b/src/query/service/src/databases/default/default_database.rs index 466da2b9eadb5..bc2562087e680 100644 --- a/src/query/service/src/databases/default/default_database.rs +++ b/src/query/service/src/databases/default/default_database.rs @@ -37,8 +37,8 @@ use databend_common_meta_app::schema::TruncateTableReply; use databend_common_meta_app::schema::TruncateTableReq; use databend_common_meta_app::schema::UndropTableReply; use databend_common_meta_app::schema::UndropTableReq; -use databend_common_meta_app::schema::UpdateTableMetaReply; -use databend_common_meta_app::schema::UpdateTableMetaReq; +use databend_common_meta_app::schema::UpdateMultiTableMetaReq; +use databend_common_meta_app::schema::UpdateMultiTableMetaResult; use databend_common_meta_app::schema::UpsertTableOptionReply; use databend_common_meta_app::schema::UpsertTableOptionReq; @@ -197,12 +197,6 @@ impl Database for DefaultDatabase { Ok(res) } - #[async_backtrace::framed] - async fn update_table_meta(&self, req: UpdateTableMetaReq) -> Result { - let res = self.ctx.meta.update_table_meta(req).await?; - Ok(res) - } - async fn set_table_column_mask_policy( &self, req: SetTableColumnMaskPolicyReq, @@ -225,4 +219,13 @@ impl Database for DefaultDatabase { let res = self.ctx.meta.truncate_table(req).await?; Ok(res) } + + #[async_backtrace::framed] + async fn retryable_update_multi_table_meta( + &self, + req: UpdateMultiTableMetaReq, + ) -> Result { + let res = self.ctx.meta.update_multi_table_meta(req).await?; + Ok(res) + } } diff --git a/src/query/service/src/databases/share/share_database.rs b/src/query/service/src/databases/share/share_database.rs index 8a8034cec5365..dcc55fa3485c9 100644 --- a/src/query/service/src/databases/share/share_database.rs +++ b/src/query/service/src/databases/share/share_database.rs @@ -39,8 +39,8 @@ use databend_common_meta_app::schema::TruncateTableReply; use databend_common_meta_app::schema::TruncateTableReq; use databend_common_meta_app::schema::UndropTableReply; use databend_common_meta_app::schema::UndropTableReq; -use databend_common_meta_app::schema::UpdateTableMetaReply; -use databend_common_meta_app::schema::UpdateTableMetaReq; +use databend_common_meta_app::schema::UpdateMultiTableMetaReq; +use databend_common_meta_app::schema::UpdateMultiTableMetaResult; use databend_common_meta_app::schema::UpsertTableOptionReply; use databend_common_meta_app::schema::UpsertTableOptionReq; use databend_common_meta_app::share::GetShareEndpointReq; @@ -249,13 +249,6 @@ impl Database for ShareDatabase { )) } - #[async_backtrace::framed] - async fn update_table_meta(&self, _req: UpdateTableMetaReq) -> Result { - Err(ErrorCode::PermissionDenied( - "Permission denied, cannot upsert table meta from a shared database".to_string(), - )) - } - #[async_backtrace::framed] async fn set_table_column_mask_policy( &self, @@ -282,4 +275,14 @@ impl Database for ShareDatabase { "Permission denied, cannot truncate table from a shared database".to_string(), )) } + + #[async_backtrace::framed] + async fn retryable_update_multi_table_meta( + &self, + _req: UpdateMultiTableMetaReq, + ) -> Result { + Err(ErrorCode::PermissionDenied( + "Permission denied, cannot upsert table meta from a shared database".to_string(), + )) + } } diff --git a/src/query/service/src/interpreters/common/mod.rs b/src/query/service/src/interpreters/common/mod.rs index 59a37a43ec9f4..5567449698f6f 100644 --- a/src/query/service/src/interpreters/common/mod.rs +++ b/src/query/service/src/interpreters/common/mod.rs @@ -29,7 +29,6 @@ pub use query_log::InterpreterQueryLog; pub use shared_table::save_share_table_info; pub use stream::dml_build_update_stream_req; pub use stream::query_build_update_stream_req; -pub use stream::StreamTableUpdates; pub use table::check_referenced_computed_columns; pub use task::get_task_client_config; pub use task::make_schedule_options; diff --git a/src/query/service/src/interpreters/common/stream.rs b/src/query/service/src/interpreters/common/stream.rs index ad16da59a8177..7227e422ed150 100644 --- a/src/query/service/src/interpreters/common/stream.rs +++ b/src/query/service/src/interpreters/common/stream.rs @@ -121,8 +121,7 @@ where F: Fn(&TableEntry) -> bool { } pub struct StreamTableUpdates { - pub update_table_metas: Vec, - pub table_infos: Vec, + pub update_table_metas: Vec<(UpdateTableMetaReq, TableInfo)>, } pub async fn query_build_update_stream_req( ctx: &Arc, @@ -142,11 +141,9 @@ pub async fn query_build_update_stream_req( let cap = streams.len(); let mut update_table_meta_reqs = Vec::with_capacity(cap); - let mut table_infos = Vec::with_capacity(cap); for table in streams.into_iter() { let stream = StreamTable::try_from_table(table.as_ref())?; let stream_info = stream.get_table_info(); - table_infos.push(stream_info.clone()); let source_table = stream.source_table(ctx.clone()).await?; let inner_fuse = FuseTable::try_from_table(source_table.as_ref())?; @@ -161,18 +158,17 @@ pub async fn query_build_update_stream_req( new_table_meta.options = options; new_table_meta.updated_on = Utc::now(); - update_table_meta_reqs.push(UpdateTableMetaReq { - table_id: stream_info.ident.table_id, - seq: MatchSeq::Exact(stream_info.ident.seq), - new_table_meta, - copied_files: None, - update_stream_meta: vec![], - deduplicated_label: None, - }); + update_table_meta_reqs.push(( + UpdateTableMetaReq { + table_id: stream_info.ident.table_id, + seq: MatchSeq::Exact(stream_info.ident.seq), + new_table_meta, + }, + stream_info.clone(), + )); } Ok(Some(StreamTableUpdates { update_table_metas: update_table_meta_reqs, - table_infos, })) } diff --git a/src/query/service/src/interpreters/interpreter_copy_into_location.rs b/src/query/service/src/interpreters/interpreter_copy_into_location.rs index 11a28768069d5..b37bb2218451e 100644 --- a/src/query/service/src/interpreters/interpreter_copy_into_location.rs +++ b/src/query/service/src/interpreters/interpreter_copy_into_location.rs @@ -157,7 +157,7 @@ impl Interpreter for CopyIntoLocationInterpreter { move |info: &ExecutionInfo| match &info.res { Ok(_) => GlobalIORuntime::instance().block_on(async move { info!("Updating the stream meta for COPY INTO LOCATION statement",); - catalog.update_stream_metas(&update_stream_reqs).await?; + catalog.update_stream_metas(update_stream_reqs).await?; Ok(()) }), Err(e) => Err(e.clone()), diff --git a/src/query/service/src/interpreters/interpreter_select.rs b/src/query/service/src/interpreters/interpreter_select.rs index e6a27c6e77dc9..61582714e37f7 100644 --- a/src/query/service/src/interpreters/interpreter_select.rs +++ b/src/query/service/src/interpreters/interpreter_select.rs @@ -12,8 +12,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -use std::collections::HashSet; -use std::hash::RandomState; use std::sync::Arc; use databend_common_base::runtime::GlobalIORuntime; @@ -47,7 +45,6 @@ use log::error; use log::info; use crate::interpreters::common::query_build_update_stream_req; -use crate::interpreters::common::StreamTableUpdates; use crate::interpreters::Interpreter; use crate::pipelines::PipelineBuildResult; use crate::schedulers::build_query_pipeline; @@ -141,67 +138,28 @@ impl SelectInterpreter { .await?; // consume stream - if let Some(StreamTableUpdates { - update_table_metas, - table_infos, - }) = query_build_update_stream_req(&self.ctx, &self.metadata).await? - { - assert!(!update_table_metas.is_empty()); - - // defensively checks that all catalog names are identical - // - // NOTE(from xuanwo): - // Maybe we can remove this check since all table stored in metasrv - // must be the same catalog. - { - let mut iter = table_infos.iter().map(|item| item.catalog()); - let first = iter.next().unwrap(); - let all_of_the_same_catalog = iter.all(|item| item == first); - if !all_of_the_same_catalog { - let cats: HashSet<&str, RandomState> = HashSet::from_iter(iter); - return Err(ErrorCode::BadArguments(format!( - "Consuming streams of different catalogs are not support. catalogs are {:?}", - cats - ))); - } - } + let update_stream_metas = query_build_update_stream_req(&self.ctx, &self.metadata).await?; - let catalog_name = table_infos[0].catalog(); - let catalog = self.ctx.get_catalog(catalog_name).await?; - let query_id = self.ctx.get_id(); - let auto_commit = !self.ctx.txn_mgr().lock().is_active(); - build_res - .main_pipeline - .set_on_finished(move |info: &ExecutionInfo| match &info.res { - Ok(_) => GlobalIORuntime::instance().block_on(async move { - info!( - "Updating the stream meta to consume data, query_id: {}", - query_id - ); - - if auto_commit { - info!("(auto) committing stream consumptions"); - // commit to meta server directly + let catalog = self.ctx.get_default_catalog()?; + build_res + .main_pipeline + .set_on_finished(move |info: &ExecutionInfo| match &info.res { + Ok(_) => GlobalIORuntime::instance().block_on(async move { + info!("Updating the stream meta to consume data"); + + match update_stream_metas { + Some(streams) => { let r = UpdateMultiTableMetaReq { - update_table_metas, - copied_files: vec![], - update_stream_metas: vec![], - deduplicated_labels: vec![], + update_table_metas: streams.update_table_metas, + ..Default::default() }; catalog.update_multi_table_meta(r).await.map(|_| ()) - } else { - info!("(non-auto) committing stream consumptions"); - for (req, info) in - update_table_metas.into_iter().zip(table_infos.into_iter()) - { - catalog.update_table_meta(&info, req).await?; - } - Ok(()) } - }), - Err(error_code) => Err(error_code.clone()), - }); - } + None => Ok(()), + } + }), + Err(error_code) => Err(error_code.clone()), + }); Ok(build_res) } diff --git a/src/query/service/src/interpreters/interpreter_table_add_column.rs b/src/query/service/src/interpreters/interpreter_table_add_column.rs index c3b6ac7a56999..ba9aed2f38857 100644 --- a/src/query/service/src/interpreters/interpreter_table_add_column.rs +++ b/src/query/service/src/interpreters/interpreter_table_add_column.rs @@ -126,12 +126,9 @@ impl Interpreter for AddTableColumnInterpreter { table_id, seq: MatchSeq::Exact(table_version), new_table_meta, - copied_files: None, - deduplicated_label: None, - update_stream_meta: vec![], }; - let res = catalog.update_table_meta(table_info, req).await?; + let res = catalog.update_single_table_meta(req, table_info).await?; save_share_table_info(&self.ctx, &res.share_table_info).await?; }; diff --git a/src/query/service/src/interpreters/interpreter_table_drop_column.rs b/src/query/service/src/interpreters/interpreter_table_drop_column.rs index 02e62ec3628c2..e07c24cc19acd 100644 --- a/src/query/service/src/interpreters/interpreter_table_drop_column.rs +++ b/src/query/service/src/interpreters/interpreter_table_drop_column.rs @@ -136,12 +136,9 @@ impl Interpreter for DropTableColumnInterpreter { table_id, seq: MatchSeq::Exact(table_version), new_table_meta, - copied_files: None, - deduplicated_label: None, - update_stream_meta: vec![], }; - let res = catalog.update_table_meta(table_info, req).await?; + let res = catalog.update_single_table_meta(req, table_info).await?; save_share_table_info(&self.ctx, &res.share_table_info).await?; diff --git a/src/query/service/src/interpreters/interpreter_table_modify_column.rs b/src/query/service/src/interpreters/interpreter_table_modify_column.rs index 612af7ae25710..65172d934e943 100644 --- a/src/query/service/src/interpreters/interpreter_table_modify_column.rs +++ b/src/query/service/src/interpreters/interpreter_table_modify_column.rs @@ -244,13 +244,10 @@ impl ModifyTableColumnInterpreter { table_id, seq: MatchSeq::Exact(table_version), new_table_meta: table_info.meta, - copied_files: None, - deduplicated_label: None, - update_stream_meta: vec![], }; catalog - .update_table_meta(table.get_table_info(), req) + .update_single_table_meta(req, table.get_table_info()) .await?; return Ok(PipelineBuildResult::create()); @@ -327,13 +324,10 @@ impl ModifyTableColumnInterpreter { table_id, seq: MatchSeq::Exact(table_version), new_table_meta: table_info.meta, - copied_files: None, - deduplicated_label: None, - update_stream_meta: vec![], }; let res = catalog - .update_table_meta(table.get_table_info(), req) + .update_single_table_meta(req, table.get_table_info()) .await?; save_share_table_info(&self.ctx, &res.share_table_info).await?; @@ -501,12 +495,9 @@ impl ModifyTableColumnInterpreter { table_id, seq: MatchSeq::Exact(table_version), new_table_meta, - copied_files: None, - deduplicated_label: None, - update_stream_meta: vec![], }; - let res = catalog.update_table_meta(table_info, req).await?; + let res = catalog.update_single_table_meta(req, table_info).await?; save_share_table_info(&self.ctx, &res.share_table_info).await?; diff --git a/src/query/service/src/interpreters/interpreter_table_modify_comment.rs b/src/query/service/src/interpreters/interpreter_table_modify_comment.rs index ceefe682a14d4..6de2b3131addc 100644 --- a/src/query/service/src/interpreters/interpreter_table_modify_comment.rs +++ b/src/query/service/src/interpreters/interpreter_table_modify_comment.rs @@ -92,12 +92,9 @@ impl Interpreter for ModifyTableCommentInterpreter { table_id, seq: MatchSeq::Exact(table_version), new_table_meta, - copied_files: None, - deduplicated_label: None, - update_stream_meta: vec![], }; - catalog.update_table_meta(table_info, req).await?; + catalog.update_single_table_meta(req, table_info).await?; }; Ok(PipelineBuildResult::create()) diff --git a/src/query/service/src/interpreters/interpreter_table_rename_column.rs b/src/query/service/src/interpreters/interpreter_table_rename_column.rs index c1eacd854bbe4..f248a11a93388 100644 --- a/src/query/service/src/interpreters/interpreter_table_rename_column.rs +++ b/src/query/service/src/interpreters/interpreter_table_rename_column.rs @@ -128,12 +128,9 @@ impl Interpreter for RenameTableColumnInterpreter { table_id, seq: MatchSeq::Exact(table_version), new_table_meta, - copied_files: None, - deduplicated_label: None, - update_stream_meta: vec![], }; - let res = catalog.update_table_meta(table_info, req).await?; + let res = catalog.update_single_table_meta(req, table_info).await?; save_share_table_info(&self.ctx, &res.share_table_info).await?; }; diff --git a/src/query/service/src/interpreters/interpreter_txn_commit.rs b/src/query/service/src/interpreters/interpreter_txn_commit.rs index 7c2545c4ee7a3..d943f3906d63c 100644 --- a/src/query/service/src/interpreters/interpreter_txn_commit.rs +++ b/src/query/service/src/interpreters/interpreter_txn_commit.rs @@ -64,7 +64,7 @@ impl Interpreter for CommitInterpreter { let table_descriptions = req .update_table_metas .iter() - .map(|req| (req.table_id, req.seq, req.new_table_meta.engine.clone())) + .map(|(req, _)| (req.table_id, req.seq, req.new_table_meta.engine.clone())) .collect::>(); let stream_descriptions = req .update_stream_metas @@ -75,7 +75,8 @@ impl Interpreter for CommitInterpreter { }; let mismatched_tids = { - let ret = catalog.update_multi_table_meta(req).await; + self.ctx.txn_mgr().lock().set_auto_commit(); + let ret = catalog.retryable_update_multi_table_meta(req).await; if let Err(ref e) = ret { // other errors may occur, especially the version mismatch of streams, // let's log it here for the convenience of diagnostics diff --git a/src/query/service/tests/it/sql/exec/get_table_bind_test.rs b/src/query/service/tests/it/sql/exec/get_table_bind_test.rs index 16d11f8c26ba7..169e3ba8dd6a2 100644 --- a/src/query/service/tests/it/sql/exec/get_table_bind_test.rs +++ b/src/query/service/tests/it/sql/exec/get_table_bind_test.rs @@ -116,8 +116,6 @@ use databend_common_meta_app::schema::UndropTableReply; use databend_common_meta_app::schema::UndropTableReq; use databend_common_meta_app::schema::UpdateIndexReply; use databend_common_meta_app::schema::UpdateIndexReq; -use databend_common_meta_app::schema::UpdateTableMetaReply; -use databend_common_meta_app::schema::UpdateTableMetaReq; use databend_common_meta_app::schema::UpdateVirtualColumnReply; use databend_common_meta_app::schema::UpdateVirtualColumnReq; use databend_common_meta_app::schema::UpsertTableOptionReply; @@ -152,7 +150,6 @@ type MetaType = (String, String, String); #[derive(Clone, Debug)] struct FakedCatalog { cat: Arc, - error_injection: Option, } #[async_trait::async_trait] @@ -267,18 +264,6 @@ impl Catalog for FakedCatalog { todo!() } - async fn update_table_meta( - &self, - table_info: &TableInfo, - req: UpdateTableMetaReq, - ) -> Result { - if let Some(e) = &self.error_injection { - Err(e.clone()) - } else { - self.cat.update_table_meta(table_info, req).await - } - } - async fn set_table_column_mask_policy( &self, _req: SetTableColumnMaskPolicyReq, @@ -939,10 +924,7 @@ async fn test_get_same_table_once() -> Result<()> { fixture.create_default_table().await?; let ctx = fixture.new_query_ctx().await?; let catalog = ctx.get_catalog("default").await?; - let faked_catalog = FakedCatalog { - cat: catalog, - error_injection: None, - }; + let faked_catalog = FakedCatalog { cat: catalog }; let ctx = Arc::new(CtxDelegation::new(ctx, faked_catalog)); diff --git a/src/query/service/tests/it/storages/fuse/operations/commit.rs b/src/query/service/tests/it/storages/fuse/operations/commit.rs index 757aab8d12dbd..30a1e030e48b2 100644 --- a/src/query/service/tests/it/storages/fuse/operations/commit.rs +++ b/src/query/service/tests/it/storages/fuse/operations/commit.rs @@ -115,8 +115,8 @@ use databend_common_meta_app::schema::UndropTableReply; use databend_common_meta_app::schema::UndropTableReq; use databend_common_meta_app::schema::UpdateIndexReply; use databend_common_meta_app::schema::UpdateIndexReq; -use databend_common_meta_app::schema::UpdateTableMetaReply; -use databend_common_meta_app::schema::UpdateTableMetaReq; +use databend_common_meta_app::schema::UpdateMultiTableMetaReq; +use databend_common_meta_app::schema::UpdateMultiTableMetaResult; use databend_common_meta_app::schema::UpdateVirtualColumnReply; use databend_common_meta_app::schema::UpdateVirtualColumnReq; use databend_common_meta_app::schema::UpsertTableOptionReply; @@ -955,18 +955,6 @@ impl Catalog for FakedCatalog { todo!() } - async fn update_table_meta( - &self, - table_info: &TableInfo, - req: UpdateTableMetaReq, - ) -> Result { - if let Some(e) = &self.error_injection { - Err(e.clone()) - } else { - self.cat.update_table_meta(table_info, req).await - } - } - async fn set_table_column_mask_policy( &self, _req: SetTableColumnMaskPolicyReq, @@ -1112,4 +1100,15 @@ impl Catalog for FakedCatalog { async fn drop_sequence(&self, _req: DropSequenceReq) -> Result { unimplemented!() } + + async fn retryable_update_multi_table_meta( + &self, + req: UpdateMultiTableMetaReq, + ) -> Result { + if let Some(e) = &self.error_injection { + Err(e.clone()) + } else { + self.cat.retryable_update_multi_table_meta(req).await + } + } } diff --git a/src/query/storages/common/txn/src/manager.rs b/src/query/storages/common/txn/src/manager.rs index 269fb6834e37c..962ebfcc37d33 100644 --- a/src/query/storages/common/txn/src/manager.rs +++ b/src/query/storages/common/txn/src/manager.rs @@ -76,24 +76,25 @@ impl TxnBuffer { self.stream_tables.clear(); } - fn update_table_meta(&mut self, req: UpdateTableMetaReq, table_info: &TableInfo) { - let table_id = req.table_id; - self.table_desc_to_id - .insert(table_info.desc.clone(), table_id); - - self.mutated_tables.insert(table_id, TableInfo { - meta: req.new_table_meta.clone(), - ..table_info.clone() - }); + fn update_multi_table_meta(&mut self, mut req: UpdateMultiTableMetaReq) { + for (req, table_info) in req.update_table_metas { + let table_id = req.table_id; + self.table_desc_to_id + .insert(table_info.desc.clone(), table_id); + + self.mutated_tables.insert(table_id, TableInfo { + meta: req.new_table_meta.clone(), + ..table_info.clone() + }); + } - self.copied_files - .entry(table_id) - .or_default() - .extend(req.copied_files); + for (table_id, file) in std::mem::take(&mut req.copied_files) { + self.copied_files.entry(table_id).or_default().push(file); + } - self.update_stream_metas(&req.update_stream_meta); + self.update_stream_metas(&req.update_stream_metas); - self.deduplicated_labels.extend(req.deduplicated_label); + self.deduplicated_labels.extend(req.deduplicated_labels); } fn update_stream_metas(&mut self, reqs: &[UpdateStreamMetaReq]) { @@ -137,6 +138,10 @@ impl TxnManager { } } + pub fn set_auto_commit(&mut self) { + self.state = TxnState::AutoCommit; + } + pub fn force_set_fail(&mut self) { self.state = TxnState::Fail; } @@ -153,8 +158,8 @@ impl TxnManager { self.state.clone() } - pub fn update_table_meta(&mut self, req: UpdateTableMetaReq, table_info: &TableInfo) { - self.txn_buffer.update_table_meta(req, table_info); + pub fn update_multi_table_meta(&mut self, req: UpdateMultiTableMetaReq) { + self.txn_buffer.update_multi_table_meta(req); } pub fn update_stream_metas(&mut self, reqs: &[UpdateStreamMetaReq]) { @@ -221,13 +226,15 @@ impl TxnManager { .txn_buffer .mutated_tables .iter() - .map(|(id, info)| UpdateTableMetaReq { - table_id: *id, - seq: MatchSeq::Exact(info.ident.seq), - new_table_meta: info.meta.clone(), - copied_files: None, - update_stream_meta: vec![], - deduplicated_label: None, + .map(|(id, info)| { + ( + UpdateTableMetaReq { + table_id: *id, + seq: MatchSeq::Exact(info.ident.seq), + new_table_meta: info.meta.clone(), + }, + info.clone(), + ) }) .collect(), copied_files, diff --git a/src/query/storages/fuse/src/operations/commit.rs b/src/query/storages/fuse/src/operations/commit.rs index a3eafe9fcfe59..2baf524b52b6a 100644 --- a/src/query/storages/fuse/src/operations/commit.rs +++ b/src/query/storages/fuse/src/operations/commit.rs @@ -28,6 +28,7 @@ use databend_common_expression::TableSchemaRef; use databend_common_meta_app::schema::TableInfo; use databend_common_meta_app::schema::TableMeta; use databend_common_meta_app::schema::TableStatistics; +use databend_common_meta_app::schema::UpdateMultiTableMetaReq; use databend_common_meta_app::schema::UpdateStreamMetaReq; use databend_common_meta_app::schema::UpdateTableMetaReq; use databend_common_meta_app::schema::UpsertTableCopiedFileReq; @@ -219,13 +220,17 @@ impl FuseTable { table_id, seq: MatchSeq::Exact(table_version), new_table_meta, - copied_files: copied_files.clone(), - deduplicated_label, - update_stream_meta: update_stream_meta.to_vec(), }; // 3. let's roll - catalog.update_table_meta(table_info, req).await?; + catalog + .update_multi_table_meta(UpdateMultiTableMetaReq { + update_table_metas: vec![(req, table_info.clone())], + update_stream_metas: update_stream_meta.to_vec(), + copied_files: copied_files.iter().map(|c| (table_id, c.clone())).collect(), + deduplicated_labels: deduplicated_label.into_iter().collect(), + }) + .await?; // update_table_meta succeed, populate the snapshot cache item and try keeping a hit file of last snapshot TableSnapshot::cache().put(snapshot_location.clone(), Arc::new(snapshot)); diff --git a/src/query/storages/fuse/src/operations/common/processors/multi_table_insert_commit.rs b/src/query/storages/fuse/src/operations/common/processors/multi_table_insert_commit.rs index eb2cd4ffe0e02..74c7cda756446 100644 --- a/src/query/storages/fuse/src/operations/common/processors/multi_table_insert_commit.rs +++ b/src/query/storages/fuse/src/operations/common/processors/multi_table_insert_commit.rs @@ -82,140 +82,118 @@ impl AsyncSink for CommitMultiTableInsert { #[async_backtrace::framed] async fn on_finish(&mut self) -> Result<()> { - let mut update_table_meta_reqs = Vec::with_capacity(self.commit_metas.len()); - let mut table_infos = Vec::with_capacity(self.commit_metas.len()); + let mut update_table_metas = Vec::with_capacity(self.commit_metas.len()); let mut snapshot_generators = HashMap::with_capacity(self.commit_metas.len()); for (table_id, commit_meta) in std::mem::take(&mut self.commit_metas).into_iter() { // generate snapshot let mut snapshot_generator = AppendGenerator::new(self.ctx.clone(), self.overwrite); snapshot_generator.set_conflict_resolve_context(commit_meta.conflict_resolve_context); let table = self.tables.get(&table_id).unwrap(); - update_table_meta_reqs - .push(build_update_table_meta_req(table.as_ref(), &snapshot_generator).await?); + update_table_metas.push(( + build_update_table_meta_req(table.as_ref(), &snapshot_generator).await?, + table.get_table_info().clone(), + )); snapshot_generators.insert(table_id, snapshot_generator); - table_infos.push(table.get_table_info()); } - let is_active = self.ctx.txn_mgr().lock().is_active(); - match is_active { - true => { - // inside explicit transaction - if update_table_meta_reqs.is_empty() { - return Err(ErrorCode::Internal( - "No table meta to update in multi table insert commit. It's a bug", - )); - } - // any one of the reqs may carry the update_stream_meta, we arbitrarily choose the first one... ". - // It is safe to index the first element because there is at least a into clause in the multi table insert, - // which will generate a req(by design, no matter whether the table is actually updated or not, it will generate a new snapshot). - update_table_meta_reqs[0].update_stream_meta = - std::mem::take(&mut self.update_stream_meta); - update_table_meta_reqs[0].deduplicated_label = self.deduplicated_label.clone(); - for (req, info) in update_table_meta_reqs.into_iter().zip(table_infos.iter()) { - self.catalog.update_table_meta(info, req).await?; - } - } - false => { - // auto commit - let mut backoff = set_backoff(None, None, None); - let mut retries = 0; - loop { - let update_multi_table_meta_req = UpdateMultiTableMetaReq { - update_table_metas: update_table_meta_reqs.clone(), - copied_files: vec![], - update_stream_metas: self.update_stream_meta.clone(), - deduplicated_labels: self.deduplicated_label.clone().into_iter().collect(), - }; + let mut backoff = set_backoff(None, None, None); + let mut retries = 0; - let update_meta_result = { - let ret = self - .catalog - .update_multi_table_meta(update_multi_table_meta_req) - .await; - if let Err(ref e) = ret { - // other errors may occur, especially the version mismatch of streams, - // let's log it here for the convenience of diagnostics - error!( - "Non-recoverable fault occurred during updating tables. {}", - e - ); - } - ret? - }; + loop { + let update_multi_table_meta_req = UpdateMultiTableMetaReq { + update_table_metas: update_table_metas.clone(), + copied_files: vec![], + update_stream_metas: self.update_stream_meta.clone(), + deduplicated_labels: self.deduplicated_label.clone().into_iter().collect(), + }; - let Err(update_failed_tbls) = update_meta_result else { - let table_descriptions = self - .tables - .values() - .map(|tbl| { - let table_info = tbl.get_table_info(); - (&table_info.desc, &table_info.ident, &table_info.meta.engine) - }) - .collect::>(); - let stream_descriptions = self - .update_stream_meta - .iter() - .map(|s| (s.stream_id, s.seq, "stream")) - .collect::>(); - info!( - "update tables success (auto commit), tables updated {:?}, streams updated {:?}", - table_descriptions, stream_descriptions - ); + let update_meta_result = match self + .catalog + .retryable_update_multi_table_meta(update_multi_table_meta_req) + .await + { + Ok(ret) => ret, + Err(e) => { + // other errors may occur, especially the version mismatch of streams, + // let's log it here for the convenience of diagnostics + error!( + "Non-recoverable fault occurred during updating tables. {}", + e + ); + return Err(e); + } + }; - return Ok(()); - }; - let update_failed_tbl_descriptions: Vec<_> = update_failed_tbls - .iter() - .map(|(tid, seq, meta)| { - let tbl_info = self.tables.get(tid).unwrap().get_table_info(); - (&tbl_info.desc, (tid, seq), &meta.engine) - }) - .collect(); - match backoff.next_backoff() { - Some(duration) => { - retries += 1; + let Err(update_failed_tbls) = update_meta_result else { + let table_descriptions = self + .tables + .values() + .map(|tbl| { + let table_info = tbl.get_table_info(); + (&table_info.desc, &table_info.ident, &table_info.meta.engine) + }) + .collect::>(); + let stream_descriptions = self + .update_stream_meta + .iter() + .map(|s| (s.stream_id, s.seq, "stream")) + .collect::>(); + info!( + "update tables success (auto commit), tables updated {:?}, streams updated {:?}", + table_descriptions, stream_descriptions + ); + + return Ok(()); + }; + let update_failed_tbl_descriptions: Vec<_> = update_failed_tbls + .iter() + .map(|(tid, seq, meta)| { + let tbl_info = self.tables.get(tid).unwrap().get_table_info(); + (&tbl_info.desc, (tid, seq), &meta.engine) + }) + .collect(); + match backoff.next_backoff() { + Some(duration) => { + retries += 1; - debug!( - "Failed(temporarily) to update tables: {:?}, the commit process of multi-table insert will be retried after {} ms, retrying {} times", - update_failed_tbl_descriptions, - duration.as_millis(), - retries, - ); - databend_common_base::base::tokio::time::sleep(duration).await; - for (tid, seq, meta) in update_failed_tbls { - let table = self.tables.get_mut(&tid).unwrap(); - *table = table - .refresh_with_seq_meta(self.ctx.as_ref(), seq, meta) - .await?; - for req in update_table_meta_reqs.iter_mut() { - if req.table_id == tid { - *req = build_update_table_meta_req( - table.as_ref(), - snapshot_generators.get(&tid).unwrap(), - ) - .await?; - break; - } - } + debug!( + "Failed(temporarily) to update tables: {:?}, the commit process of multi-table insert will be retried after {} ms, retrying {} times", + update_failed_tbl_descriptions, + duration.as_millis(), + retries, + ); + databend_common_base::base::tokio::time::sleep(duration).await; + for (tid, seq, meta) in update_failed_tbls { + let table = self.tables.get_mut(&tid).unwrap(); + *table = table + .refresh_with_seq_meta(self.ctx.as_ref(), seq, meta) + .await?; + for (req, _) in update_table_metas.iter_mut() { + if req.table_id == tid { + *req = build_update_table_meta_req( + table.as_ref(), + snapshot_generators.get(&tid).unwrap(), + ) + .await?; + break; } } - None => { - let err_msg = format!( - "Can not fulfill the tx after retries({} times, {} ms), aborted. updated tables {:?}", - retries, - Instant::now() - .duration_since(backoff.start_time) - .as_millis(), - update_failed_tbl_descriptions, - ); - error!("{}", err_msg); - return Err(ErrorCode::OCCRetryFailure(err_msg)); - } } } + None => { + let err_msg = format!( + "Can not fulfill the tx after retries({} times, {} ms), aborted. updated tables {:?}", + retries, + Instant::now() + .duration_since(backoff.start_time) + .as_millis(), + update_failed_tbl_descriptions, + ); + error!("{}", err_msg); + return Err(ErrorCode::OCCRetryFailure(err_msg)); + } } } - Ok(()) } #[unboxed_simple] @@ -276,9 +254,6 @@ async fn build_update_table_meta_req( table_id, seq: MatchSeq::Exact(table_version), new_table_meta, - copied_files: None, - deduplicated_label: None, - update_stream_meta: vec![], }; Ok(req) } diff --git a/src/query/storages/fuse/src/operations/revert.rs b/src/query/storages/fuse/src/operations/revert.rs index 6e35bdfdbfa42..ead9ac4e60fed 100644 --- a/src/query/storages/fuse/src/operations/revert.rs +++ b/src/query/storages/fuse/src/operations/revert.rs @@ -58,13 +58,10 @@ impl FuseTable { table_id, seq: MatchSeq::Exact(base_version), new_table_meta: table_meta_to_be_committed, - copied_files: None, - deduplicated_label: None, - update_stream_meta: vec![], }; // 4. let's roll - let reply = catalog.update_table_meta(&self.table_info, req).await; + let reply = catalog.update_single_table_meta(req, table_info).await; if reply.is_ok() { // try keep the snapshot hit let snapshot_location = table_reverting_to.snapshot_loc().await?.ok_or_else(|| { diff --git a/src/query/storages/hive/hive/src/hive_catalog.rs b/src/query/storages/hive/hive/src/hive_catalog.rs index 1a59cb63f19b1..260fc8b24bd05 100644 --- a/src/query/storages/hive/hive/src/hive_catalog.rs +++ b/src/query/storages/hive/hive/src/hive_catalog.rs @@ -91,8 +91,6 @@ use databend_common_meta_app::schema::UndropTableReply; use databend_common_meta_app::schema::UndropTableReq; use databend_common_meta_app::schema::UpdateIndexReply; use databend_common_meta_app::schema::UpdateIndexReq; -use databend_common_meta_app::schema::UpdateTableMetaReply; -use databend_common_meta_app::schema::UpdateTableMetaReq; use databend_common_meta_app::schema::UpdateVirtualColumnReply; use databend_common_meta_app::schema::UpdateVirtualColumnReq; use databend_common_meta_app::schema::UpsertTableOptionReply; @@ -524,17 +522,6 @@ impl Catalog for HiveCatalog { )) } - #[async_backtrace::framed] - async fn update_table_meta( - &self, - _table_info: &TableInfo, - _req: UpdateTableMetaReq, - ) -> Result { - Err(ErrorCode::Unimplemented( - "Cannot update table meta in HIVE catalog", - )) - } - #[async_backtrace::framed] async fn set_table_column_mask_policy( &self, diff --git a/src/query/storages/iceberg/src/catalog.rs b/src/query/storages/iceberg/src/catalog.rs index a0e837c069f9f..fb83d2945c04f 100644 --- a/src/query/storages/iceberg/src/catalog.rs +++ b/src/query/storages/iceberg/src/catalog.rs @@ -90,8 +90,6 @@ use databend_common_meta_app::schema::UndropTableReply; use databend_common_meta_app::schema::UndropTableReq; use databend_common_meta_app::schema::UpdateIndexReply; use databend_common_meta_app::schema::UpdateIndexReq; -use databend_common_meta_app::schema::UpdateTableMetaReply; -use databend_common_meta_app::schema::UpdateTableMetaReq; use databend_common_meta_app::schema::UpdateVirtualColumnReply; use databend_common_meta_app::schema::UpdateVirtualColumnReq; use databend_common_meta_app::schema::UpsertTableOptionReply; @@ -361,15 +359,6 @@ impl Catalog for IcebergCatalog { unimplemented!() } - #[async_backtrace::framed] - async fn update_table_meta( - &self, - _table_info: &TableInfo, - _req: UpdateTableMetaReq, - ) -> Result { - unimplemented!() - } - #[async_backtrace::framed] async fn set_table_column_mask_policy( &self, From 4d3c3947bee7f5200fec2b9f7de2f26a2a500c43 Mon Sep 17 00:00:00 2001 From: Yang Xiufeng Date: Mon, 8 Jul 2024 15:53:40 +0800 Subject: [PATCH 04/21] feat: parquet add option `missing_field_as`. (#15993) parquet add option `missing_field_as`. --- .../src/file_format_from_to_protobuf_impl.rs | 10 ++-- src/meta/proto-conv/src/util.rs | 1 + src/meta/proto-conv/tests/it/main.rs | 1 + .../tests/it/v099_parquet_format_params.rs | 49 +++++++++++++++++++ src/meta/protos/proto/file_format.proto | 1 + 5 files changed, 57 insertions(+), 5 deletions(-) create mode 100644 src/meta/proto-conv/tests/it/v099_parquet_format_params.rs diff --git a/src/meta/proto-conv/src/file_format_from_to_protobuf_impl.rs b/src/meta/proto-conv/src/file_format_from_to_protobuf_impl.rs index f73209540afe6..a62d9697735a6 100644 --- a/src/meta/proto-conv/src/file_format_from_to_protobuf_impl.rs +++ b/src/meta/proto-conv/src/file_format_from_to_protobuf_impl.rs @@ -21,7 +21,6 @@ use databend_common_io::GeometryDataType; use databend_common_meta_app as mt; use databend_common_meta_app::principal::BinaryFormat; use databend_common_meta_app::principal::EmptyFieldAs; -use databend_common_meta_app::principal::NullAs; use databend_common_protos::pb; use num::FromPrimitive; @@ -331,16 +330,17 @@ impl FromToProto for mt::principal::ParquetFileFormatParams { fn from_pb(p: pb::ParquetFileFormatParams) -> Result where Self: Sized { reader_check_msg(p.ver, p.min_reader_ver)?; - Ok(mt::principal::ParquetFileFormatParams { - missing_field_as: NullAs::Error, - null_if: p.null_if, - }) + mt::principal::ParquetFileFormatParams::try_create(p.missing_field_as.as_deref(), p.null_if) + .map_err(|e| Incompatible { + reason: format!("{e}"), + }) } fn to_pb(&self) -> Result { Ok(pb::ParquetFileFormatParams { ver: VER, min_reader_ver: MIN_READER_VER, + missing_field_as: Some(self.missing_field_as.to_string()), null_if: self.null_if.clone(), }) } diff --git a/src/meta/proto-conv/src/util.rs b/src/meta/proto-conv/src/util.rs index a85bafc99095a..c319ccba89a96 100644 --- a/src/meta/proto-conv/src/util.rs +++ b/src/meta/proto-conv/src/util.rs @@ -128,6 +128,7 @@ const META_CHANGE_LOG: &[(u64, &str)] = &[ (96, "2024-07-02: Add: add using_share_endpoint field into DatabaseMeta"), (97, "2024-07-04: Add: missing_field_as in user.proto/OrcFileFormatParams"), (98, "2024-07-04: Add: add iceberg catalog option in catalog option"), + (99, "2024-07-08: Add: missing_field_as in user.proto/ParquetFileFormatParams"), // Dear developer: // If you're gonna add a new metadata version, you'll have to add a test for it. // You could just copy an existing test file(e.g., `../tests/it/v024_table_meta.rs`) diff --git a/src/meta/proto-conv/tests/it/main.rs b/src/meta/proto-conv/tests/it/main.rs index 5ed6ccdd6e9db..0208cd4db940a 100644 --- a/src/meta/proto-conv/tests/it/main.rs +++ b/src/meta/proto-conv/tests/it/main.rs @@ -102,3 +102,4 @@ mod v095_share_endpoint_meta; mod v096_database_meta; mod v097_orc_format_params; mod v098_catalog_option; +mod v099_parquet_format_params; diff --git a/src/meta/proto-conv/tests/it/v099_parquet_format_params.rs b/src/meta/proto-conv/tests/it/v099_parquet_format_params.rs new file mode 100644 index 0000000000000..348ae556d5f20 --- /dev/null +++ b/src/meta/proto-conv/tests/it/v099_parquet_format_params.rs @@ -0,0 +1,49 @@ +// Copyright 2023 Datafuse Labs. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use databend_common_meta_app::principal::NullAs; +use databend_common_meta_app::principal::ParquetFileFormatParams; +use minitrace::func_name; + +use crate::common; + +// These bytes are built when a new version in introduced, + +// and are kept for backward compatibility test. +// +// ************************************************************* +// * These messages should never be updated, * +// * only be added when a new version is added, * +// * or be removed when an old version is no longer supported. * +// ************************************************************* +// +#[test] +fn test_decode_v99_parquet_file_format_params() -> anyhow::Result<()> { + let parquet_file_format_params_v99 = vec![ + 10, 13, 70, 73, 69, 76, 68, 95, 68, 69, 70, 65, 85, 76, 84, 34, 0, 34, 1, 97, 160, 6, 99, + 168, 6, 24, + ]; + let want = || ParquetFileFormatParams { + missing_field_as: NullAs::FieldDefault, + null_if: vec!["".to_string(), "a".to_string()], + }; + common::test_load_old( + func_name!(), + parquet_file_format_params_v99.as_slice(), + 99, + want(), + )?; + common::test_pb_from_to(func_name!(), want())?; + Ok(()) +} diff --git a/src/meta/protos/proto/file_format.proto b/src/meta/protos/proto/file_format.proto index 48c304fc5208b..acac85199d737 100644 --- a/src/meta/protos/proto/file_format.proto +++ b/src/meta/protos/proto/file_format.proto @@ -91,6 +91,7 @@ message FileFormatParams { message ParquetFileFormatParams { uint64 ver = 100; uint64 min_reader_ver = 101; + optional string missing_field_as = 1; repeated string null_if = 4; } From 9ce86e561b93b212902f2e2b5924dc4527f41c34 Mon Sep 17 00:00:00 2001 From: dantengsky Date: Mon, 8 Jul 2024 23:16:44 +0800 Subject: [PATCH 05/21] refactor: rename metrics & tweak system.caches table (#15996) * refactor: rename metrics & tweak system.caches table * update test golden file * clean up --- src/common/metrics/src/metrics/cache.rs | 11 +- .../it/storages/testdata/caches_table.txt | 26 ++--- src/query/storages/common/cache/src/cache.rs | 32 ++++++ src/query/storages/common/cache/src/lib.rs | 3 + .../common/cache/src/providers/mod.rs | 1 + .../cache/src/providers/table_data_cache.rs | 33 ++++-- .../common/cache_manager/src/cache_manager.rs | 58 ++++++---- .../read/block/block_reader_merge_io_async.rs | 14 ++- src/query/storages/system/src/caches_table.rs | 103 +++++++++--------- 9 files changed, 176 insertions(+), 105 deletions(-) diff --git a/src/common/metrics/src/metrics/cache.rs b/src/common/metrics/src/metrics/cache.rs index 6f04dbe21344d..56012bae60117 100644 --- a/src/common/metrics/src/metrics/cache.rs +++ b/src/common/metrics/src/metrics/cache.rs @@ -29,6 +29,8 @@ static CACHE_ACCESS_COUNT: LazyLock> = LazyLock::new(|| register_counter_family("cache_access_count")); static CACHE_MISS_COUNT: LazyLock> = LazyLock::new(|| register_counter_family("cache_miss_count")); +static CACHE_MISS_BYTES: LazyLock> = + LazyLock::new(|| register_counter_family("cache_miss_bytes")); static CACHE_MISS_LOAD_MILLISECOND: LazyLock> = LazyLock::new(|| register_histogram_family_in_milliseconds("cache_miss_load_millisecond")); static CACHE_HIT_COUNT: LazyLock> = @@ -47,7 +49,6 @@ pub fn metrics_inc_cache_access_count(c: u64, cache_name: &str) { } pub fn metrics_inc_cache_miss_count(c: u64, cache_name: &str) { - // increment_gauge!(("fuse_memory_miss_count"), c as f64); CACHE_MISS_COUNT .get_or_create(&CacheLabels { cache_name: cache_name.to_string(), @@ -55,6 +56,14 @@ pub fn metrics_inc_cache_miss_count(c: u64, cache_name: &str) { .inc_by(c); } +pub fn metrics_inc_cache_miss_bytes(c: u64, cache_name: &str) { + CACHE_MISS_BYTES + .get_or_create(&CacheLabels { + cache_name: cache_name.to_string(), + }) + .inc_by(c); +} + // When cache miss, load time cost. pub fn metrics_inc_cache_miss_load_millisecond(c: u64, cache_name: &str) { CACHE_MISS_LOAD_MILLISECOND diff --git a/src/query/service/tests/it/storages/testdata/caches_table.txt b/src/query/service/tests/it/storages/testdata/caches_table.txt index 5b3404c083b01..f318a525f6f5e 100644 --- a/src/query/service/tests/it/storages/testdata/caches_table.txt +++ b/src/query/service/tests/it/storages/testdata/caches_table.txt @@ -1,18 +1,18 @@ ---------- TABLE INFO ------------ DB.Table: 'system'.'caches', Table: caches-table_id:1, ver:0, Engine: SystemCache -------- TABLE CONTENTS ---------- -+-------------+----------------------------------+----------+----------+ -| Column 0 | Column 1 | Column 2 | Column 3 | -+-------------+----------------------------------+----------+----------+ -| 'test-node' | 'bloom_index_filter_cache' | 0 | 0 | -| 'test-node' | 'bloom_index_meta_cache' | 0 | 0 | -| 'test-node' | 'file_meta_data_cache' | 0 | 0 | -| 'test-node' | 'inverted_index_file_cache' | 0 | 0 | -| 'test-node' | 'inverted_index_meta_cache' | 0 | 0 | -| 'test-node' | 'prune_partitions_cache' | 0 | 0 | -| 'test-node' | 'segment_info_cache' | 0 | 0 | -| 'test-node' | 'table_snapshot_cache' | 0 | 0 | -| 'test-node' | 'table_snapshot_statistic_cache' | 0 | 0 | -+-------------+----------------------------------+----------+----------+ ++-------------+----------------------------------------------+----------+----------+ +| Column 0 | Column 1 | Column 2 | Column 3 | ++-------------+----------------------------------------------+----------+----------+ +| 'test-node' | 'memory_cache_bloom_index_file_meta_data' | 0 | 0 | +| 'test-node' | 'memory_cache_bloom_index_filter' | 0 | 0 | +| 'test-node' | 'memory_cache_compact_segment_info' | 0 | 0 | +| 'test-node' | 'memory_cache_inverted_index_file' | 0 | 0 | +| 'test-node' | 'memory_cache_inverted_index_file_meta_data' | 0 | 0 | +| 'test-node' | 'memory_cache_parquet_file_meta' | 0 | 0 | +| 'test-node' | 'memory_cache_prune_partitions' | 0 | 0 | +| 'test-node' | 'memory_cache_table_snapshot' | 0 | 0 | +| 'test-node' | 'memory_cache_table_statistics' | 0 | 0 | ++-------------+----------------------------------------------+----------+----------+ diff --git a/src/query/storages/common/cache/src/cache.rs b/src/query/storages/common/cache/src/cache.rs index b3cd6620e8ebc..33c4c577d8b3c 100644 --- a/src/query/storages/common/cache/src/cache.rs +++ b/src/query/storages/common/cache/src/cache.rs @@ -67,6 +67,38 @@ impl NamedCache { } } +pub trait CacheAccessorExt { + fn get_with_len>(&self, k: Q, len: u64) -> Option>; +} + +impl CacheAccessorExt for NamedCache +where + C: CacheAccessor, + K: Eq + Hash, + S: BuildHasher, + M: CountableMeter>, +{ + fn get_with_len>(&self, k: Q, len: u64) -> Option> { + let r = self.get(k); + if r.is_none() { + metrics_inc_cache_miss_count(len, &self.name); + } + r + } +} + +impl CacheAccessorExt for Option> +where + C: CacheAccessor, + K: Eq + Hash, + S: BuildHasher, + M: CountableMeter>, +{ + fn get_with_len>(&self, k: Q, len: u64) -> Option> { + self.as_ref().and_then(|cache| cache.get_with_len(k, len)) + } +} + impl CacheAccessor for NamedCache where C: CacheAccessor, diff --git a/src/query/storages/common/cache/src/lib.rs b/src/query/storages/common/cache/src/lib.rs index fd9e7a4c2e92c..9fa5fc58b4971 100644 --- a/src/query/storages/common/cache/src/lib.rs +++ b/src/query/storages/common/cache/src/lib.rs @@ -19,8 +19,10 @@ mod providers; mod read; pub use cache::CacheAccessor; +pub use cache::CacheAccessorExt; pub use cache::Named; pub use cache::NamedCache; +pub use databend_common_cache::CountableMeter; pub use providers::DiskCacheError; pub use providers::DiskCacheKey; pub use providers::DiskCacheResult; @@ -33,6 +35,7 @@ pub use providers::LruDiskCacheHolder; pub use providers::TableDataCache; pub use providers::TableDataCacheBuilder; pub use providers::TableDataCacheKey; +pub use providers::DISK_TABLE_DATA_CACHE_NAME; pub use read::CacheKey; pub use read::CachedReader; pub use read::InMemoryBytesCacheReader; diff --git a/src/query/storages/common/cache/src/providers/mod.rs b/src/query/storages/common/cache/src/providers/mod.rs index dc90a058580ed..4994dff8fb1c3 100644 --- a/src/query/storages/common/cache/src/providers/mod.rs +++ b/src/query/storages/common/cache/src/providers/mod.rs @@ -29,3 +29,4 @@ pub use memory_cache::InMemoryItemCacheHolder; pub use table_data_cache::TableDataCache; pub use table_data_cache::TableDataCacheBuilder; pub use table_data_cache::TableDataCacheKey; +pub use table_data_cache::DISK_TABLE_DATA_CACHE_NAME; diff --git a/src/query/storages/common/cache/src/providers/table_data_cache.rs b/src/query/storages/common/cache/src/providers/table_data_cache.rs index 17b9f05115a02..6cd42f9849495 100644 --- a/src/query/storages/common/cache/src/providers/table_data_cache.rs +++ b/src/query/storages/common/cache/src/providers/table_data_cache.rs @@ -31,6 +31,7 @@ use log::info; use crate::providers::LruDiskCacheHolder; use crate::CacheAccessor; +use crate::CacheAccessorExt; use crate::LruDiskCacheBuilder; struct CacheItem { @@ -70,7 +71,7 @@ pub struct TableDataCache { _cache_populator: DiskCachePopulator, } -const TABLE_DATA_CACHE_NAME: &str = "table_data"; +pub const DISK_TABLE_DATA_CACHE_NAME: &str = "disk_cache_table_data"; pub struct TableDataCacheBuilder; @@ -98,16 +99,32 @@ impl TableDataCacheBuilder { } } +impl CacheAccessorExt for TableDataCache { + fn get_with_len>(&self, k: Q, len: u64) -> Option> { + let r = self.get(k); + if r.is_none() { + metrics_inc_cache_miss_count(len, DISK_TABLE_DATA_CACHE_NAME); + } + r + } +} + +impl CacheAccessorExt for Option { + fn get_with_len>(&self, k: Q, len: u64) -> Option> { + self.as_ref().and_then(|cache| cache.get_with_len(k, len)) + } +} + impl CacheAccessor for TableDataCache { fn get>(&self, k: Q) -> Option> { - metrics_inc_cache_access_count(1, TABLE_DATA_CACHE_NAME); + metrics_inc_cache_access_count(1, DISK_TABLE_DATA_CACHE_NAME); let k = k.as_ref(); if let Some(item) = self.external_cache.get(k) { - metrics_inc_cache_hit_count(1, TABLE_DATA_CACHE_NAME); Profile::record_usize_profile(ProfileStatisticsName::ScanCacheBytes, item.len()); + metrics_inc_cache_hit_count(1, DISK_TABLE_DATA_CACHE_NAME); Some(item) } else { - metrics_inc_cache_miss_count(1, TABLE_DATA_CACHE_NAME); + metrics_inc_cache_miss_count(1, DISK_TABLE_DATA_CACHE_NAME); None } } @@ -119,11 +136,11 @@ impl CacheAccessor for TableDataCache let msg = CacheItem { key: k, value: v }; match self.population_queue.try_send(msg) { Ok(_) => { - metrics_inc_cache_population_pending_count(1, TABLE_DATA_CACHE_NAME); + metrics_inc_cache_population_pending_count(1, DISK_TABLE_DATA_CACHE_NAME); } Err(TrySendError::Full(_)) => { - metrics_inc_cache_population_pending_count(-1, TABLE_DATA_CACHE_NAME); - metrics_inc_cache_population_overflow_count(1, TABLE_DATA_CACHE_NAME); + metrics_inc_cache_population_pending_count(-1, DISK_TABLE_DATA_CACHE_NAME); + metrics_inc_cache_population_overflow_count(1, DISK_TABLE_DATA_CACHE_NAME); } Err(TrySendError::Disconnected(_)) => { error!("table data cache population thread is down"); @@ -167,7 +184,7 @@ where T: CacheAccessor + Send + Sync + } } self.cache.put(key, value); - metrics_inc_cache_population_pending_count(-1, TABLE_DATA_CACHE_NAME); + metrics_inc_cache_population_pending_count(-1, DISK_TABLE_DATA_CACHE_NAME); } Err(_) => { info!("table data cache worker shutdown"); diff --git a/src/query/storages/common/cache_manager/src/cache_manager.rs b/src/query/storages/common/cache_manager/src/cache_manager.rs index 6f29707ba8d1d..211ec87f00f2f 100644 --- a/src/query/storages/common/cache_manager/src/cache_manager.rs +++ b/src/query/storages/common/cache_manager/src/cache_manager.rs @@ -111,10 +111,12 @@ impl CacheManager { } else { max_server_memory_usage * config.table_data_deserialized_memory_ratio / 100 }; - let table_column_array_cache = Self::new_in_memory_cache( + + // Cache of deserialized table data + let in_memory_table_data_cache = Self::new_named_cache_with_meter( memory_cache_capacity, ColumnArrayMeter, - "table_data_column_array", + "memory_cache_table_data", ); // setup in-memory table meta cache @@ -130,30 +132,34 @@ impl CacheManager { file_meta_data_cache: None, table_statistic_cache: None, table_data_cache, - table_column_array_cache, + table_column_array_cache: in_memory_table_data_cache, })); } else { - let table_snapshot_cache = - Self::new_item_cache(config.table_meta_snapshot_count, "table_snapshot"); - let table_statistic_cache = - Self::new_item_cache(config.table_meta_statistic_count, "table_statistics"); - let segment_info_cache = Self::new_in_memory_cache( + let table_snapshot_cache = Self::new_named_cache( + config.table_meta_snapshot_count, + "memory_cache_table_snapshot", + ); + let table_statistic_cache = Self::new_named_cache( + config.table_meta_statistic_count, + "memory_cache_table_statistics", + ); + let segment_info_cache = Self::new_named_cache_with_meter( config.table_meta_segment_bytes, CompactSegmentInfoMeter {}, - "segment_info", + "memory_cache_compact_segment_info", ); - let bloom_index_filter_cache = Self::new_in_memory_cache( + let bloom_index_filter_cache = Self::new_named_cache_with_meter( config.table_bloom_index_filter_size, BloomIndexFilterMeter {}, - "bloom_index_filter", + "memory_cache_bloom_index_filter", ); - let bloom_index_meta_cache = Self::new_item_cache( + let bloom_index_meta_cache = Self::new_named_cache( config.table_bloom_index_meta_count, - "bloom_index_file_meta_data", + "memory_cache_bloom_index_file_meta_data", ); - let inverted_index_meta_cache = Self::new_item_cache( + let inverted_index_meta_cache = Self::new_named_cache( config.inverted_index_meta_count, - "inverted_index_file_meta_data", + "memory_cache_inverted_index_file_meta_data", ); // setup in-memory inverted index filter cache @@ -162,16 +168,20 @@ impl CacheManager { } else { config.inverted_index_filter_size }; - let inverted_index_file_cache = Self::new_in_memory_cache( + let inverted_index_file_cache = Self::new_named_cache_with_meter( inverted_index_file_size, InvertedIndexFileMeter {}, - "inverted_index_file", + "memory_cache_inverted_index_file", + ); + let prune_partitions_cache = Self::new_named_cache( + config.table_prune_partitions_count, + "memory_cache_prune_partitions", ); - let prune_partitions_cache = - Self::new_item_cache(config.table_prune_partitions_count, "prune_partitions"); - let file_meta_data_cache = - Self::new_item_cache(DEFAULT_FILE_META_DATA_CACHE_ITEMS, "parquet_file_meta"); + let file_meta_data_cache = Self::new_named_cache( + DEFAULT_FILE_META_DATA_CACHE_ITEMS, + "memory_cache_parquet_file_meta", + ); GlobalInstance::set(Arc::new(Self { table_snapshot_cache, segment_info_cache, @@ -183,7 +193,7 @@ impl CacheManager { file_meta_data_cache, table_statistic_cache, table_data_cache, - table_column_array_cache, + table_column_array_cache: in_memory_table_data_cache, })); } @@ -239,7 +249,7 @@ impl CacheManager { } // create cache that meters size by `Count` - fn new_item_cache( + fn new_named_cache( capacity: u64, name: impl Into, ) -> Option>> { @@ -251,7 +261,7 @@ impl CacheManager { } // create cache that meters size by `meter` - fn new_in_memory_cache( + fn new_named_cache_with_meter( capacity: u64, meter: M, name: &str, diff --git a/src/query/storages/fuse/src/io/read/block/block_reader_merge_io_async.rs b/src/query/storages/fuse/src/io/read/block/block_reader_merge_io_async.rs index 94d001cfafb22..155358eeb46f5 100644 --- a/src/query/storages/fuse/src/io/read/block/block_reader_merge_io_async.rs +++ b/src/query/storages/fuse/src/io/read/block/block_reader_merge_io_async.rs @@ -23,7 +23,7 @@ use databend_common_exception::ErrorCode; use databend_common_exception::Result; use databend_common_expression::ColumnId; use databend_common_metrics::storage::*; -use databend_storages_common_cache::CacheAccessor; +use databend_storages_common_cache::CacheAccessorExt; use databend_storages_common_cache::TableDataCacheKey; use databend_storages_common_cache_manager::CacheManager; use databend_storages_common_table_meta::meta::ColumnMeta; @@ -160,19 +160,21 @@ impl BlockReader { let column_cache_key = TableDataCacheKey::new(location, *column_id, offset, len); - // first, check column array object cache - if let Some(cache_array) = column_array_cache.get(&column_cache_key) { + // first, check in memory table data cache + if let Some(cache_array) = column_array_cache.get_with_len(&column_cache_key, len) { cached_column_array.push((*column_id, cache_array)); continue; } - // and then, check column data cache - if let Some(cached_column_raw_data) = column_data_cache.get(&column_cache_key) { + // and then, check on disk table data cache + if let Some(cached_column_raw_data) = + column_data_cache.get_with_len(&column_cache_key, len) + { cached_column_data.push((*column_id, cached_column_raw_data)); continue; } - // if all cache missed, prepare the ranges to be read + // if all caches missed, prepare the ranges to be read ranges.push((*column_id, offset..(offset + len))); // Perf diff --git a/src/query/storages/system/src/caches_table.rs b/src/query/storages/system/src/caches_table.rs index 72eb9f69b08f4..919fd8891ba9b 100644 --- a/src/query/storages/system/src/caches_table.rs +++ b/src/query/storages/system/src/caches_table.rs @@ -12,6 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. +use std::hash::BuildHasher; +use std::hash::Hash; use std::sync::Arc; use databend_common_catalog::table::Table; @@ -29,6 +31,9 @@ use databend_common_meta_app::schema::TableInfo; use databend_common_meta_app::schema::TableMeta; use databend_common_storages_fuse::TableContext; use databend_storages_common_cache::CacheAccessor; +use databend_storages_common_cache::CountableMeter; +use databend_storages_common_cache::NamedCache; +use databend_storages_common_cache::DISK_TABLE_DATA_CACHE_NAME; use databend_storages_common_cache_manager::CacheManager; use crate::SyncOneBlockSystemTable; @@ -38,6 +43,14 @@ pub struct CachesTable { table_info: TableInfo, } +#[derive(Default)] +struct CachesTableColumns { + nodes: Vec, + names: Vec, + num_items: Vec, + size: Vec, +} + impl SyncSystemTable for CachesTable { const NAME: &'static str = "system.caches"; @@ -50,11 +63,6 @@ impl SyncSystemTable for CachesTable { fn get_full_data(&self, ctx: Arc) -> Result { let local_node = ctx.get_cluster().local_id.clone(); - let mut nodes = Vec::new(); - let mut names = Vec::new(); - let mut num_items = Vec::new(); - let mut size = Vec::new(); - let cache_manager = CacheManager::instance(); let table_snapshot_cache = cache_manager.get_table_snapshot_cache(); @@ -69,87 +77,60 @@ impl SyncSystemTable for CachesTable { let table_data_cache = cache_manager.get_table_data_cache(); let table_column_array_cache = cache_manager.get_table_data_array_cache(); + let mut columns = CachesTableColumns::default(); + if let Some(table_snapshot_cache) = table_snapshot_cache { - nodes.push(local_node.clone()); - names.push("table_snapshot_cache".to_string()); - num_items.push(table_snapshot_cache.len() as u64); - size.push(table_snapshot_cache.size()); + Self::append_row(&table_snapshot_cache, &local_node, &mut columns); } if let Some(table_snapshot_statistic_cache) = table_snapshot_statistic_cache { - nodes.push(local_node.clone()); - names.push("table_snapshot_statistic_cache".to_string()); - num_items.push(table_snapshot_statistic_cache.len() as u64); - size.push(table_snapshot_statistic_cache.size()); + Self::append_row(&table_snapshot_statistic_cache, &local_node, &mut columns); } if let Some(segment_info_cache) = segment_info_cache { - nodes.push(local_node.clone()); - names.push("segment_info_cache".to_string()); - num_items.push(segment_info_cache.len() as u64); - size.push(segment_info_cache.size()); + Self::append_row(&segment_info_cache, &local_node, &mut columns); } if let Some(bloom_index_filter_cache) = bloom_index_filter_cache { - nodes.push(local_node.clone()); - names.push("bloom_index_filter_cache".to_string()); - num_items.push(bloom_index_filter_cache.len() as u64); - size.push(bloom_index_filter_cache.size()); + Self::append_row(&bloom_index_filter_cache, &local_node, &mut columns); } if let Some(bloom_index_meta_cache) = bloom_index_meta_cache { - nodes.push(local_node.clone()); - names.push("bloom_index_meta_cache".to_string()); - num_items.push(bloom_index_meta_cache.len() as u64); - size.push(bloom_index_meta_cache.size()); + Self::append_row(&bloom_index_meta_cache, &local_node, &mut columns); } if let Some(inverted_index_meta_cache) = inverted_index_meta_cache { - nodes.push(local_node.clone()); - names.push("inverted_index_meta_cache".to_string()); - num_items.push(inverted_index_meta_cache.len() as u64); - size.push(inverted_index_meta_cache.size()); + Self::append_row(&inverted_index_meta_cache, &local_node, &mut columns); } if let Some(inverted_index_file_cache) = inverted_index_file_cache { - nodes.push(local_node.clone()); - names.push("inverted_index_file_cache".to_string()); - num_items.push(inverted_index_file_cache.len() as u64); - size.push(inverted_index_file_cache.size()); + Self::append_row(&inverted_index_file_cache, &local_node, &mut columns); } if let Some(prune_partitions_cache) = prune_partitions_cache { - nodes.push(local_node.clone()); - names.push("prune_partitions_cache".to_string()); - num_items.push(prune_partitions_cache.len() as u64); - size.push(prune_partitions_cache.size()); + Self::append_row(&prune_partitions_cache, &local_node, &mut columns); } if let Some(file_meta_data_cache) = file_meta_data_cache { - nodes.push(local_node.clone()); - names.push("file_meta_data_cache".to_string()); - num_items.push(file_meta_data_cache.len() as u64); - size.push(file_meta_data_cache.size()); + Self::append_row(&file_meta_data_cache, &local_node, &mut columns); } if let Some(table_data_cache) = table_data_cache { - nodes.push(local_node.clone()); - names.push("table_data_cache".to_string()); - num_items.push(table_data_cache.len() as u64); - size.push(table_data_cache.size()); + // table data cache is not a named cache yet + columns.nodes.push(local_node.clone()); + columns.names.push(DISK_TABLE_DATA_CACHE_NAME.to_string()); + columns.num_items.push(table_data_cache.len() as u64); + columns.size.push(table_data_cache.size()); } if let Some(table_column_array_cache) = table_column_array_cache { - nodes.push(local_node.clone()); - names.push("table_column_array_cache".to_string()); - num_items.push(table_column_array_cache.len() as u64); - size.push(table_column_array_cache.size()); + Self::append_row(&table_column_array_cache, &local_node, &mut columns); } Ok(DataBlock::new_from_columns(vec![ - StringType::from_data(nodes), - StringType::from_data(names), - UInt64Type::from_data(num_items), - UInt64Type::from_data(size), + StringType::from_data(columns.nodes), + StringType::from_data(columns.names), + UInt64Type::from_data(columns.num_items), + UInt64Type::from_data(columns.size), ])) } } @@ -177,4 +158,20 @@ impl CachesTable { }; SyncOneBlockSystemTable::create(Self { table_info }) } + + fn append_row( + cache: &NamedCache, + local_node: &str, + row: &mut CachesTableColumns, + ) where + C: CacheAccessor, + K: Eq + Hash, + S: BuildHasher, + M: CountableMeter>, + { + row.nodes.push(local_node.to_string()); + row.names.push(cache.name().to_string()); + row.num_items.push(cache.len() as u64); + row.size.push(cache.size()); + } } From 554c8d9da397a1d5132bd83a34b6c00384f8b7ce Mon Sep 17 00:00:00 2001 From: Bohu Date: Tue, 9 Jul 2024 08:48:40 +0800 Subject: [PATCH 06/21] Update audit.toml --- .cargo/audit.toml | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/.cargo/audit.toml b/.cargo/audit.toml index dbb3edd7496ce..c74717b72c9ce 100644 --- a/.cargo/audit.toml +++ b/.cargo/audit.toml @@ -36,5 +36,13 @@ ignore = [ # Degradation of service in h2 servers with CONTINUATION Flood(Wait for tonic to fix) "RUSTSEC-2024-0332", # `rustls::ConnectionCommon::complete_io` could fall into an infinite loop based on network input - "RUSTSEC-2024-0336" + "RUSTSEC-2024-0336", + # gix-fs: Traversal outside working tree enables arbitrary code execution + "RUSTSEC-2024-0350", + # gix-index: Traversal outside working tree enables arbitrary code execution + "RUSTSEC-2024-0348", + # gix-index: Refs and paths with reserved Windows device names access the devices + "RUSTSEC-2024-0352", + # gix-ref: Refs and paths with reserved Windows device names access the devices + "RUSTSEC-2024-0351" ] From 231693582867c450bd1c8bf4be2e8ce5d79a899e Mon Sep 17 00:00:00 2001 From: coldWater Date: Tue, 9 Jul 2024 09:46:28 +0800 Subject: [PATCH 07/21] refactor: upgrade QuotaMgr to using protobuf (#15858) * quota read Signed-off-by: coldWater * set_quota Signed-off-by: coldWater * refine Signed-off-by: coldWater * update Signed-off-by: coldWater * fix Signed-off-by: coldWater * fix Signed-off-by: coldWater * rename Signed-off-by: coldWater * check_and_upgrade_to_pb Signed-off-by: coldWater * WRITE_PB Signed-off-by: coldWater * generic const parameter Signed-off-by: coldWater * add it Signed-off-by: coldWater * fix Signed-off-by: coldWater * fix Signed-off-by: coldWater --------- Signed-off-by: coldWater Co-authored-by: Bohu --- src/meta/app/src/tenant/quota.rs | 16 ---- src/meta/proto-conv/src/lib.rs | 1 + .../src/tenant_quota_from_to_protobuf_impl.rs | 57 ++++++++++++ src/meta/proto-conv/src/util.rs | 1 + src/meta/proto-conv/tests/it/main.rs | 1 + .../proto-conv/tests/it/v100_tenant_quota.rs | 43 +++++++++ src/meta/protos/proto/tenant.proto | 20 ++++ src/query/management/src/quota/quota_mgr.rs | 63 +++++++++---- src/query/management/tests/it/main.rs | 1 + src/query/management/tests/it/quota.rs | 92 +++++++++++++++++++ src/query/users/src/user_api.rs | 3 +- 11 files changed, 265 insertions(+), 33 deletions(-) create mode 100644 src/meta/proto-conv/src/tenant_quota_from_to_protobuf_impl.rs create mode 100644 src/meta/proto-conv/tests/it/v100_tenant_quota.rs create mode 100644 src/query/management/tests/it/quota.rs diff --git a/src/meta/app/src/tenant/quota.rs b/src/meta/app/src/tenant/quota.rs index 45f186cf97460..f1dbeffde9d5e 100644 --- a/src/meta/app/src/tenant/quota.rs +++ b/src/meta/app/src/tenant/quota.rs @@ -12,8 +12,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -use databend_common_exception::ErrorCode; - #[derive(serde::Serialize, serde::Deserialize, Clone, Debug, Eq, PartialEq, Default)] #[serde(default)] pub struct TenantQuota { @@ -32,17 +30,3 @@ pub struct TenantQuota { // The max number of users can be created in the tenant. pub max_users: u32, } - -impl TryFrom> for TenantQuota { - type Error = ErrorCode; - - fn try_from(value: Vec) -> databend_common_exception::Result { - match serde_json::from_slice(&value) { - Ok(quota) => Ok(quota), - Err(err) => Err(ErrorCode::IllegalTenantQuotaFormat(format!( - "Cannot deserialize tenant quota from bytes. cause {}", - err - ))), - } - } -} diff --git a/src/meta/proto-conv/src/lib.rs b/src/meta/proto-conv/src/lib.rs index f89814953d45e..6ba70872e0589 100644 --- a/src/meta/proto-conv/src/lib.rs +++ b/src/meta/proto-conv/src/lib.rs @@ -81,6 +81,7 @@ mod sequence_from_to_protobuf_impl; mod share_from_to_protobuf_impl; mod stage_from_to_protobuf_impl; mod table_from_to_protobuf_impl; +mod tenant_quota_from_to_protobuf_impl; mod tident_from_to_protobuf_impl; mod udf_from_to_protobuf_impl; mod user_from_to_protobuf_impl; diff --git a/src/meta/proto-conv/src/tenant_quota_from_to_protobuf_impl.rs b/src/meta/proto-conv/src/tenant_quota_from_to_protobuf_impl.rs new file mode 100644 index 0000000000000..61b671993473d --- /dev/null +++ b/src/meta/proto-conv/src/tenant_quota_from_to_protobuf_impl.rs @@ -0,0 +1,57 @@ +// Copyright 2021 Datafuse Labs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//! This mod is the key point about compatibility. +//! Everytime update anything in this file, update the `VER` and let the tests pass. + +use databend_common_meta_app::tenant; +use databend_common_protos::pb; + +use crate::reader_check_msg; +use crate::FromToProto; +use crate::Incompatible; +use crate::MIN_READER_VER; +use crate::VER; + +impl FromToProto for tenant::TenantQuota { + type PB = pb::TenantQuota; + fn get_pb_ver(p: &Self::PB) -> u64 { + p.ver + } + fn from_pb(p: pb::TenantQuota) -> Result { + reader_check_msg(p.ver, p.min_reader_ver)?; + + let v = Self { + max_databases: p.max_databases, + max_tables_per_database: p.max_tables_per_database, + max_stages: p.max_stages, + max_files_per_stage: p.max_files_per_stage, + max_users: p.max_users, + }; + Ok(v) + } + + fn to_pb(&self) -> Result { + let p = pb::TenantQuota { + ver: VER, + min_reader_ver: MIN_READER_VER, + max_databases: self.max_databases, + max_tables_per_database: self.max_tables_per_database, + max_stages: self.max_stages, + max_files_per_stage: self.max_files_per_stage, + max_users: self.max_users, + }; + Ok(p) + } +} diff --git a/src/meta/proto-conv/src/util.rs b/src/meta/proto-conv/src/util.rs index c319ccba89a96..e7d30f178eb85 100644 --- a/src/meta/proto-conv/src/util.rs +++ b/src/meta/proto-conv/src/util.rs @@ -129,6 +129,7 @@ const META_CHANGE_LOG: &[(u64, &str)] = &[ (97, "2024-07-04: Add: missing_field_as in user.proto/OrcFileFormatParams"), (98, "2024-07-04: Add: add iceberg catalog option in catalog option"), (99, "2024-07-08: Add: missing_field_as in user.proto/ParquetFileFormatParams"), + (100, "2024-06-21: Add: tenant.proto/TenantQuota"), // Dear developer: // If you're gonna add a new metadata version, you'll have to add a test for it. // You could just copy an existing test file(e.g., `../tests/it/v024_table_meta.rs`) diff --git a/src/meta/proto-conv/tests/it/main.rs b/src/meta/proto-conv/tests/it/main.rs index 0208cd4db940a..0399c3fecb815 100644 --- a/src/meta/proto-conv/tests/it/main.rs +++ b/src/meta/proto-conv/tests/it/main.rs @@ -103,3 +103,4 @@ mod v096_database_meta; mod v097_orc_format_params; mod v098_catalog_option; mod v099_parquet_format_params; +mod v100_tenant_quota; diff --git a/src/meta/proto-conv/tests/it/v100_tenant_quota.rs b/src/meta/proto-conv/tests/it/v100_tenant_quota.rs new file mode 100644 index 0000000000000..2828c160e88f8 --- /dev/null +++ b/src/meta/proto-conv/tests/it/v100_tenant_quota.rs @@ -0,0 +1,43 @@ +// Copyright 2023 Datafuse Labs. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use databend_common_meta_app::tenant::TenantQuota; +use minitrace::func_name; + +use crate::common; + +// These bytes are built when a new version in introduced, + +// and are kept for backward compatibility test. +// +// ************************************************************* +// * These messages should never be updated, * +// * only be added when a new version is added, * +// * or be removed when an old version is no longer supported. * +// ************************************************************* +// +#[test] +fn test_decode_v100_tenant_quota() -> anyhow::Result<()> { + let tenant_quota_v100 = vec![8, 1, 16, 2, 24, 3, 32, 4, 40, 5, 160, 6, 100, 168, 6, 24]; + let want = || TenantQuota { + max_databases: 1, + max_tables_per_database: 2, + max_stages: 3, + max_files_per_stage: 4, + max_users: 5, + }; + common::test_load_old(func_name!(), tenant_quota_v100.as_slice(), 100, want())?; + common::test_pb_from_to(func_name!(), want())?; + Ok(()) +} diff --git a/src/meta/protos/proto/tenant.proto b/src/meta/protos/proto/tenant.proto index 517b5f6e87b79..35fc51262d34f 100644 --- a/src/meta/protos/proto/tenant.proto +++ b/src/meta/protos/proto/tenant.proto @@ -28,3 +28,23 @@ message TIdent { // Resource name string name = 2; } + +message TenantQuota { + uint64 ver = 100; + uint64 min_reader_ver = 101; + + // The max databases can be created in the tenant. + uint32 max_databases = 1; + + // The max tables per database can be created in the tenant. + uint32 max_tables_per_database = 2; + + // The max stages can be created in the tenant. + uint32 max_stages = 3; + + // The max files per stage can be created in the tenant. + uint32 max_files_per_stage = 4; + + // The max number of users can be created in the tenant. + uint32 max_users = 5; +} \ No newline at end of file diff --git a/src/query/management/src/quota/quota_mgr.rs b/src/query/management/src/quota/quota_mgr.rs index 65982e35d88c8..9f9c0e5caf67a 100644 --- a/src/query/management/src/quota/quota_mgr.rs +++ b/src/query/management/src/quota/quota_mgr.rs @@ -16,27 +16,31 @@ use std::sync::Arc; use databend_common_exception::ErrorCode; use databend_common_exception::Result; +use databend_common_meta_api::kv_pb_api::KVPbApi; +use databend_common_meta_api::kv_pb_api::UpsertPB; use databend_common_meta_app::tenant::Tenant; use databend_common_meta_app::tenant::TenantQuota; use databend_common_meta_app::tenant::TenantQuotaIdent; use databend_common_meta_kvapi::kvapi; use databend_common_meta_kvapi::kvapi::Key; -use databend_common_meta_types::IntoSeqV; use databend_common_meta_types::MatchSeq; use databend_common_meta_types::MatchSeqExt; use databend_common_meta_types::MetaError; use databend_common_meta_types::SeqV; use databend_common_meta_types::UpsertKV; use databend_common_meta_types::With; +use minitrace::func_name; use super::quota_api::QuotaApi; +use crate::serde::check_and_upgrade_to_pb; +use crate::serde::Quota; -pub struct QuotaMgr { +pub struct QuotaMgr { kv_api: Arc>, ident: TenantQuotaIdent, } -impl QuotaMgr { +impl QuotaMgr { pub fn create(kv_api: Arc>, tenant: &Tenant) -> Self { QuotaMgr { kv_api, @@ -49,32 +53,59 @@ impl QuotaMgr { } } -// TODO: use pb to replace json #[async_trait::async_trait] -impl QuotaApi for QuotaMgr { +impl QuotaApi for QuotaMgr { #[async_backtrace::framed] async fn get_quota(&self, seq: MatchSeq) -> Result> { let res = self.kv_api.get_kv(&self.key()).await?; match res { + None => Ok(SeqV::new(0, TenantQuota::default())), Some(seq_value) => match seq.match_seq(&seq_value) { - Ok(_) => Ok(seq_value.into_seqv()?), Err(_) => Err(ErrorCode::TenantQuotaUnknown("Tenant does not exist.")), + Ok(_) => { + let mut quota = if WRITE_PB { + Quota::new(func_name!()) + } else { + // Do not serialize to protobuf format + Quota::new_limit(func_name!(), 0) + }; + + let u = check_and_upgrade_to_pb( + &mut quota, + &self.key(), + &seq_value, + self.kv_api.as_ref(), + ) + .await?; + + // Keep the original seq. + Ok(SeqV::with_meta(seq_value.seq, seq_value.meta, u.data)) + } }, - None => Ok(SeqV::new(0, TenantQuota::default())), } } #[async_backtrace::framed] async fn set_quota(&self, quota: &TenantQuota, seq: MatchSeq) -> Result { - let value = serde_json::to_vec(quota)?; - let res = self - .kv_api - .upsert_kv(UpsertKV::update(&self.key(), &value).with(seq)) - .await?; - - match res.result { - Some(SeqV { seq: s, .. }) => Ok(s), - None => Err(ErrorCode::TenantQuotaUnknown("Quota does not exist.")), + if WRITE_PB { + let res = self + .kv_api + .upsert_pb(&UpsertPB::update(self.ident.clone(), quota.clone()).with(seq)) + .await?; + match res.result { + Some(SeqV { seq: s, .. }) => Ok(s), + None => Err(ErrorCode::TenantQuotaUnknown("Quota does not exist.")), + } + } else { + let value = serde_json::to_vec(quota)?; + let res = self + .kv_api + .upsert_kv(UpsertKV::update(&self.key(), &value).with(seq)) + .await?; + match res.result { + Some(SeqV { seq: s, .. }) => Ok(s), + None => Err(ErrorCode::TenantQuotaUnknown("Quota does not exist.")), + } } } } diff --git a/src/query/management/tests/it/main.rs b/src/query/management/tests/it/main.rs index 9fa760984a638..f3f2482be8ade 100644 --- a/src/query/management/tests/it/main.rs +++ b/src/query/management/tests/it/main.rs @@ -15,6 +15,7 @@ #![allow(clippy::uninlined_format_args)] mod cluster; +mod quota; mod role; mod setting; mod stage; diff --git a/src/query/management/tests/it/quota.rs b/src/query/management/tests/it/quota.rs new file mode 100644 index 0000000000000..5e72b53ae4b41 --- /dev/null +++ b/src/query/management/tests/it/quota.rs @@ -0,0 +1,92 @@ +// Copyright 2021 Datafuse Labs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::sync::Arc; + +use databend_common_base::base::tokio; +use databend_common_exception::Result; +use databend_common_management::*; +use databend_common_meta_api::deserialize_struct; +use databend_common_meta_app::tenant::Tenant; +use databend_common_meta_app::tenant::TenantQuota; +use databend_common_meta_embedded::MetaEmbedded; +use databend_common_meta_kvapi::kvapi::KVApi; +use databend_common_meta_types::MatchSeq; + +#[tokio::test(flavor = "multi_thread", worker_threads = 1)] +async fn test_update_quota_from_json_to_pb() -> Result<()> { + let (kv_api, quota_api_json, quota_api_pb) = new_quota_api().await?; + + // when disable write pb + + let quota0 = TenantQuota { + max_databases: 2, + max_tables_per_database: 3, + max_stages: 4, + max_files_per_stage: 5, + max_users: 6, + }; + quota_api_json.set_quota("a0, MatchSeq::GE(0)).await?; + + let value = kv_api.get_kv("__fd_quotas/admin").await?; + let s: String = String::from_utf8(value.unwrap().data)?; + assert_eq!( + s, + "{\"max_databases\":2,\"max_tables_per_database\":3,\"max_stages\":4,\"max_files_per_stage\":5,\"max_users\":6}" + ); + + let quota1 = quota_api_json.get_quota(MatchSeq::GE(0)).await?.data; + assert_eq!(quota1, quota0); + + let value = kv_api.get_kv("__fd_quotas/admin").await?; + let s: String = String::from_utf8(value.unwrap().data)?; + assert_eq!( + s, + "{\"max_databases\":2,\"max_tables_per_database\":3,\"max_stages\":4,\"max_files_per_stage\":5,\"max_users\":6}" + ); + + // when enable write pb + + let quota2 = quota_api_pb.get_quota(MatchSeq::GE(0)).await?.data; + assert_eq!(quota2, quota0); + + let value = kv_api.get_kv("__fd_quotas/admin").await?; + let res = deserialize_struct::(&value.unwrap().data); + assert_eq!(res.unwrap(), quota0); + + let quota3 = quota_api_json.get_quota(MatchSeq::GE(0)).await?.data; + assert_eq!(quota3, quota0); + + let quota4 = TenantQuota { + max_databases: 3, + ..quota0 + }; + quota_api_pb.set_quota("a4, MatchSeq::GE(0)).await?; + + let quota5 = quota_api_pb.get_quota(MatchSeq::GE(0)).await?.data; + assert_eq!(quota5, quota4); + + let value = kv_api.get_kv("__fd_quotas/admin").await?; + let res = deserialize_struct::(&value.unwrap().data); + assert_eq!(res.unwrap(), quota4); + + Ok(()) +} + +async fn new_quota_api() -> Result<(Arc, QuotaMgr, QuotaMgr)> { + let test_api = Arc::new(MetaEmbedded::new_temp().await?); + let mgr_json = QuotaMgr::::create(test_api.clone(), &Tenant::new_literal("admin")); + let mgr_pb = QuotaMgr::::create(test_api.clone(), &Tenant::new_literal("admin")); + Ok((test_api, mgr_json, mgr_pb)) +} diff --git a/src/query/users/src/user_api.rs b/src/query/users/src/user_api.rs index e372cda2ff9f0..b252376bc337f 100644 --- a/src/query/users/src/user_api.rs +++ b/src/query/users/src/user_api.rs @@ -141,7 +141,8 @@ impl UserApiProvider { } pub fn tenant_quota_api(&self, tenant: &Tenant) -> Arc { - Arc::new(QuotaMgr::create(self.client.clone(), tenant)) + const WRITE_PB: bool = false; + Arc::new(QuotaMgr::::create(self.client.clone(), tenant)) } pub fn setting_api(&self, tenant: &Tenant) -> Arc { From 44b3b38da7498a1efd026633b169968f1a770ef4 Mon Sep 17 00:00:00 2001 From: Sky Fan <3374614481@qq.com> Date: Tue, 9 Jul 2024 11:37:32 +0800 Subject: [PATCH 08/21] fix: wrong arrow schema when fuse engine read parquet (#15997) * fix: wrong arrow schema when fuse engine read parquet * add logic test * move test to standalone --- .../fuse/src/io/read/block/parquet/deserialize.rs | 2 +- tests/sqllogictests/suites/mode/standalone/pr15804.test | 8 ++++++++ 2 files changed, 9 insertions(+), 1 deletion(-) create mode 100644 tests/sqllogictests/suites/mode/standalone/pr15804.test diff --git a/src/query/storages/fuse/src/io/read/block/parquet/deserialize.rs b/src/query/storages/fuse/src/io/read/block/parquet/deserialize.rs index 83a4e1452ac99..8f4495402f39f 100644 --- a/src/query/storages/fuse/src/io/read/block/parquet/deserialize.rs +++ b/src/query/storages/fuse/src/io/read/block/parquet/deserialize.rs @@ -63,7 +63,7 @@ pub fn column_chunks_to_record_batch( let field_levels = parquet_to_arrow_field_levels( &parquet_schema, ProjectionMask::leaves(&parquet_schema, projection_mask), - None, + Some(arrow_schema.fields()), )?; let mut record_reader = ParquetRecordBatchReader::try_new_with_row_groups( &field_levels, diff --git a/tests/sqllogictests/suites/mode/standalone/pr15804.test b/tests/sqllogictests/suites/mode/standalone/pr15804.test new file mode 100644 index 0000000000000..42c5c5cb56b31 --- /dev/null +++ b/tests/sqllogictests/suites/mode/standalone/pr15804.test @@ -0,0 +1,8 @@ +statement ok +create or replace table t2(c varchar); + +statement ok +insert into t2 select repeat('a', 1000000) from numbers(3000); + +statement ok +select * from t2 ignore_result; \ No newline at end of file From b5feb1208c52bdf8caabe6f050e250dcd7cea453 Mon Sep 17 00:00:00 2001 From: Winter Zhang Date: Tue, 9 Jul 2024 12:00:43 +0800 Subject: [PATCH 09/21] chore(query): fix cluster ci failure if set license (#15999) --- .../base/01_system/01_0012_system_temp_files.test | 10 ++++++++++ .../0_stateless/20+_others/20_0014_sort_spill.sql | 2 ++ 2 files changed, 12 insertions(+) diff --git a/tests/sqllogictests/suites/base/01_system/01_0012_system_temp_files.test b/tests/sqllogictests/suites/base/01_system/01_0012_system_temp_files.test index 3233b6cf668c6..ab2a04c7b797e 100644 --- a/tests/sqllogictests/suites/base/01_system/01_0012_system_temp_files.test +++ b/tests/sqllogictests/suites/base/01_system/01_0012_system_temp_files.test @@ -2,9 +2,15 @@ onlyif http statement ok set max_threads = 8; +onlyif http +statement ok +SET max_vacuum_temp_files_after_query=0; + +onlyif http statement ok SET aggregate_spilling_bytes_threshold_per_proc=1; + onlyif http query T SELECT COUNT() FROM (SELECT number::string, count() FROM numbers_mt(100000) group by number::string); @@ -21,6 +27,10 @@ onlyif http statement ok unset max_threads; +onlyif http +statement ok +unset max_vacuum_temp_files_after_query; + onlyif http statement ok unset aggregate_spilling_bytes_threshold_per_proc; diff --git a/tests/suites/0_stateless/20+_others/20_0014_sort_spill.sql b/tests/suites/0_stateless/20+_others/20_0014_sort_spill.sql index 3111d15371bd8..da0b0fedce548 100644 --- a/tests/suites/0_stateless/20+_others/20_0014_sort_spill.sql +++ b/tests/suites/0_stateless/20+_others/20_0014_sort_spill.sql @@ -1,4 +1,5 @@ SELECT '==TEST GLOBAL SORT=='; +SET max_vacuum_temp_files_after_query=0; set sort_spilling_bytes_threshold_per_proc = 8; DROP TABLE if EXISTS t; DROP TABLE IF EXISTS temp_files_count; @@ -92,5 +93,6 @@ SELECT x, y FROM xy ORDER BY x NULLS FIRST, y DESC NULLS LAST LIMIT 3; SELECT '==================='; INSERT INTO temp_files_count SELECT COUNT() as count, 8 as number FROM system.temp_files; +unset max_vacuum_temp_files_after_query; set sort_spilling_bytes_threshold_per_proc = 0; SELECT any_if(count, number = 8) - any_if(count, number = 7) FROM temp_files_count; \ No newline at end of file From 2005750bef5dcbbb1f4c7568afb294e25353b27d Mon Sep 17 00:00:00 2001 From: everpcpc Date: Tue, 9 Jul 2024 12:33:15 +0800 Subject: [PATCH 10/21] chore(ci): add license for cluster test (#15995) --- .github/workflows/reuse.linux.yml | 112 +++++++++++---------------- .github/workflows/reuse.sqllogic.yml | 58 ++++++++++++++ Cargo.toml | 2 +- clippy.toml | 2 +- 4 files changed, 107 insertions(+), 67 deletions(-) diff --git a/.github/workflows/reuse.linux.yml b/.github/workflows/reuse.linux.yml index d560fc29383d2..416298fb08a1d 100644 --- a/.github/workflows/reuse.linux.yml +++ b/.github/workflows/reuse.linux.yml @@ -20,7 +20,7 @@ env: jobs: check: - runs-on: [ self-hosted, X64, Linux, 16c32g, "${{ inputs.runner_provider }}" ] + runs-on: [self-hosted, X64, Linux, 16c32g, "${{ inputs.runner_provider }}"] steps: - uses: actions/checkout@v4 with: @@ -32,7 +32,7 @@ jobs: github_token: ${{ github.token }} license: - runs-on: [ self-hosted, X64, Linux, 4c8g, "${{ inputs.runner_provider }}" ] + runs-on: [self-hosted, X64, Linux, 4c8g, "${{ inputs.runner_provider }}"] outputs: license: ${{ steps.license.outputs.license }} steps: @@ -128,7 +128,7 @@ jobs: artifacts: query build_musl: - runs-on: [ self-hosted, X64, Linux, 16c32g, "${{ inputs.runner_provider }}" ] + runs-on: [self-hosted, X64, Linux, 16c32g, "${{ inputs.runner_provider }}"] strategy: fail-fast: false matrix: @@ -148,7 +148,7 @@ jobs: artifacts: query test_unit: - runs-on: [ self-hosted, X64, Linux, 16c32g, "${{ inputs.runner_provider }}" ] + runs-on: [self-hosted, X64, Linux, 16c32g, "${{ inputs.runner_provider }}"] steps: - uses: actions/checkout@v4 with: @@ -158,72 +158,78 @@ jobs: timeout-minutes: 60 test_metactl: - runs-on: [ self-hosted, X64, Linux, 4c8g, "${{ inputs.runner_provider }}" ] - needs: [ build, check ] + runs-on: [self-hosted, X64, Linux, 4c8g, "${{ inputs.runner_provider }}"] + needs: [build, check] steps: - uses: actions/checkout@v4 - uses: ./.github/actions/test_metactl timeout-minutes: 10 test_compat_meta_query: - runs-on: [ self-hosted, X64, Linux, 4c8g, "${{ inputs.runner_provider }}" ] - needs: [ build, check ] + runs-on: [self-hosted, X64, Linux, 4c8g, "${{ inputs.runner_provider }}"] + needs: [build, check] steps: - uses: actions/checkout@v4 - uses: ./.github/actions/test_compat_meta_query timeout-minutes: 10 test_compat_fuse: - runs-on: [ self-hosted, X64, Linux, 4c8g, "${{ inputs.runner_provider }}" ] - needs: [ build, check ] + runs-on: [self-hosted, X64, Linux, 4c8g, "${{ inputs.runner_provider }}"] + needs: [build, check] steps: - uses: actions/checkout@v4 - uses: ./.github/actions/test_compat_fuse timeout-minutes: 20 test_compat_meta_meta: - runs-on: [ self-hosted, X64, Linux, 4c8g, "${{ inputs.runner_provider }}" ] - needs: [ build, check ] + runs-on: [self-hosted, X64, Linux, 4c8g, "${{ inputs.runner_provider }}"] + needs: [build, check] steps: - uses: actions/checkout@v4 - uses: ./.github/actions/test_compat_meta_meta timeout-minutes: 20 test_logs: - runs-on: [ self-hosted, X64, Linux, 4c8g, "${{ inputs.runner_provider }}" ] - needs: [ build, check ] + runs-on: [self-hosted, X64, Linux, 4c8g, "${{ inputs.runner_provider }}"] + needs: [build, check] steps: - uses: actions/checkout@v4 - uses: ./.github/actions/test_logs timeout-minutes: 20 test_meta_cluster: - runs-on: [ self-hosted, X64, Linux, 4c8g, "${{ inputs.runner_provider }}" ] - needs: [ build, check ] + runs-on: [self-hosted, X64, Linux, 4c8g, "${{ inputs.runner_provider }}"] + needs: [build, check] steps: - uses: actions/checkout@v4 - uses: ./.github/actions/test_meta_cluster timeout-minutes: 10 test_stateless_standalone: - runs-on: [ self-hosted, X64, Linux, 4c8g, "${{ inputs.runner_provider }}" ] - needs: [ build, check ] + runs-on: [self-hosted, X64, Linux, 4c8g, "${{ inputs.runner_provider }}"] + needs: [build, check] steps: - uses: actions/checkout@v4 - uses: ./.github/actions/test_stateless_standalone_linux timeout-minutes: 15 test_stateless_cluster: - runs-on: [ self-hosted, X64, Linux, 4c8g, "${{ inputs.runner_provider }}" ] - needs: [ build, check ] + runs-on: [self-hosted, X64, Linux, 4c8g, "${{ inputs.runner_provider }}"] + needs: [build, check, license] steps: - uses: actions/checkout@v4 + - name: Decode license + id: license + run: | + echo "license=$(echo '${{ needs.license.outputs.license }}' | base64 -d)" >> $GITHUB_OUTPUT - uses: ./.github/actions/test_stateless_cluster_linux + env: + QUERY_DATABEND_ENTERPRISE_LICENSE: ${{ steps.license.outputs.license }} timeout-minutes: 15 test_stateful_standalone: - runs-on: [ self-hosted, X64, Linux, 4c8g, "${{ inputs.runner_provider }}" ] - needs: [ build, check ] + runs-on: [self-hosted, X64, Linux, 4c8g, "${{ inputs.runner_provider }}"] + needs: [build, check] steps: - uses: actions/checkout@v4 - uses: ./.github/actions/test_stateful_standalone_linux @@ -235,11 +241,17 @@ jobs: name: test-stateful-standalone-linux test_stateful_cluster: - runs-on: [ self-hosted, X64, Linux, 4c8g, "${{ inputs.runner_provider }}" ] - needs: [ build, check ] + runs-on: [self-hosted, X64, Linux, 4c8g, "${{ inputs.runner_provider }}"] + needs: [build, check, license] steps: - uses: actions/checkout@v4 + - name: Decode license + id: license + run: | + echo "license=$(echo '${{ needs.license.outputs.license }}' | base64 -d)" >> $GITHUB_OUTPUT - uses: ./.github/actions/test_stateful_cluster_linux + env: + QUERY_DATABEND_ENTERPRISE_LICENSE: ${{ steps.license.outputs.license }} timeout-minutes: 15 - name: Upload failure if: failure() @@ -249,16 +261,16 @@ jobs: test_stateful_large_data: if: contains(github.event.pull_request.labels.*.name, 'ci-largedata') - runs-on: [ self-hosted, X64, Linux, 4c8g, "${{ inputs.runner_provider }}" ] - needs: [ build, check ] + runs-on: [self-hosted, X64, Linux, 4c8g, "${{ inputs.runner_provider }}"] + needs: [build, check] steps: - uses: actions/checkout@v4 - uses: ./.github/actions/test_stateful_large_data timeout-minutes: 60 test_stateful_iceberg_rest: - runs-on: [ self-hosted, X64, Linux, 4c8g, "${{ inputs.runner_provider }}" ] - needs: [ build, check ] + runs-on: [self-hosted, X64, Linux, 4c8g, "${{ inputs.runner_provider }}"] + needs: [build, check] steps: - uses: actions/checkout@v4 - uses: ./.github/actions/test_stateful_iceberg_rest_standalone @@ -279,8 +291,8 @@ jobs: # continue-on-error: true test_ee_standalone: - needs: [ build, check, license ] - runs-on: [ self-hosted, X64, Linux, 4c8g, "${{ inputs.runner_provider }}" ] + needs: [build, check, license] + runs-on: [self-hosted, X64, Linux, 4c8g, "${{ inputs.runner_provider }}"] if: needs.license.outputs.license steps: - uses: actions/checkout@v4 @@ -299,8 +311,8 @@ jobs: name: test-stateful-standalone-linux test_ee_standalone_background: - needs: [ build, check, license ] - runs-on: [ self-hosted, X64, Linux, 4c8g, "${{ inputs.runner_provider }}" ] + needs: [build, check, license] + runs-on: [self-hosted, X64, Linux, 4c8g, "${{ inputs.runner_provider }}"] if: needs.license.outputs.license steps: - uses: actions/checkout@v4 @@ -320,8 +332,8 @@ jobs: name: test-stateful-standalone-linux test_ee_management_mode: - needs: [ build, check, license ] - runs-on: [ self-hosted, X64, Linux, 4c8g, "${{ inputs.runner_provider }}" ] + needs: [build, check, license] + runs-on: [self-hosted, X64, Linux, 4c8g, "${{ inputs.runner_provider }}"] if: needs.license.outputs.license steps: - uses: actions/checkout@v4 @@ -340,38 +352,8 @@ jobs: with: name: test-ee-management-mode-linux - sqllogic_ee: - runs-on: [ self-hosted, X64, Linux, 4c8g, "${{ inputs.runner_provider }}" ] - if: needs.license.outputs.license - needs: [ build, check, license ] - strategy: - fail-fast: false - matrix: - format: - - "parquet" - - "native" - steps: - - uses: actions/checkout@v4 - - name: Decode license - id: license - run: | - echo "license=$(echo '${{ needs.license.outputs.license }}' | base64 -d)" >> $GITHUB_OUTPUT - - uses: ./.github/actions/test_ee_sqllogic_standalone_linux - timeout-minutes: 15 - env: - QUERY_DATABEND_ENTERPRISE_LICENSE: ${{ steps.license.outputs.license }} - with: - dirs: ee - handlers: mysql,http - storage-format: ${{ matrix.format }} - - name: Upload failure - if: failure() - uses: ./.github/actions/artifact_failure - with: - name: test-ee-sqllogic-standalone-ee-${{ matrix.format }} - sqllogic: - needs: [ build, check ] + needs: [build, check] uses: ./.github/workflows/reuse.sqllogic.yml secrets: inherit with: diff --git a/.github/workflows/reuse.sqllogic.yml b/.github/workflows/reuse.sqllogic.yml index edf94f9d5cfe5..c33b0fae46f32 100644 --- a/.github/workflows/reuse.sqllogic.yml +++ b/.github/workflows/reuse.sqllogic.yml @@ -19,6 +19,28 @@ env: RUNNER_PROVIDER: ${{ inputs.runner_provider }} jobs: + license: + runs-on: [self-hosted, X64, Linux, 4c8g, "${{ inputs.runner_provider }}"] + outputs: + license: ${{ steps.license.outputs.license }} + steps: + - uses: actions/checkout@v4 + - name: Get License from S3 + if: inputs.runner_provider == 'aws' + run: | + aws s3 cp s3://databend-ci/misc/license.key license.key + aws s3 cp s3://databend-ci/misc/license.json license.json + - name: Get License from GCS + if: inputs.runner_provider == 'gcp' + run: | + gcloud storage cp gs://databend-ci/misc/license.key license.key + gcloud storage cp gs://databend-ci/misc/license.json license.json + - name: Output License + id: license + run: | + echo "license=$(cat license.key | base64 -w 0)" >> $GITHUB_OUTPUT + cat license.json + management_mode: runs-on: [self-hosted, X64, Linux, 4c8g, "${{ inputs.runner_provider }}"] steps: @@ -128,6 +150,7 @@ jobs: cluster: runs-on: [self-hosted, X64, Linux, 4c8g, "${{ inputs.runner_provider }}"] + needs: license strategy: fail-fast: false matrix: @@ -145,7 +168,13 @@ jobs: - "http" steps: - uses: actions/checkout@v4 + - name: Decode license + id: license + run: | + echo "license=$(echo '${{ needs.license.outputs.license }}' | base64 -d)" >> $GITHUB_OUTPUT - uses: ./.github/actions/test_sqllogic_cluster_linux + env: + QUERY_DATABEND_ENTERPRISE_LICENSE: ${{ steps.license.outputs.license }} timeout-minutes: 15 with: dirs: ${{ matrix.dirs }} @@ -201,3 +230,32 @@ jobs: uses: ./.github/actions/artifact_failure with: name: test-sqllogic-standalone-no-table-meta-cache-${{ matrix.dirs }}-${{ matrix.handler }} + + ee: + runs-on: [self-hosted, X64, Linux, 4c8g, "${{ inputs.runner_provider }}"] + needs: license + strategy: + fail-fast: false + matrix: + format: + - "parquet" + - "native" + steps: + - uses: actions/checkout@v4 + - name: Decode license + id: license + run: | + echo "license=$(echo '${{ needs.license.outputs.license }}' | base64 -d)" >> $GITHUB_OUTPUT + - uses: ./.github/actions/test_ee_sqllogic_standalone_linux + timeout-minutes: 15 + env: + QUERY_DATABEND_ENTERPRISE_LICENSE: ${{ steps.license.outputs.license }} + with: + dirs: ee + handlers: mysql,http + storage-format: ${{ matrix.format }} + - name: Upload failure + if: failure() + uses: ./.github/actions/artifact_failure + with: + name: test-ee-sqllogic-standalone-ee-${{ matrix.format }} diff --git a/Cargo.toml b/Cargo.toml index ceb602b561f46..43be6996548bb 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -104,8 +104,8 @@ members = [ "src/tests/sqlsmith", ] -[workspace.dependencies] # Workspace dependencies +[workspace.dependencies] databend-common-arrow = { path = "src/common/arrow" } databend-common-ast = { path = "src/query/ast" } databend-common-async-functions = { path = "src/query/async_functions" } diff --git a/clippy.toml b/clippy.toml index 21e89856ab760..21de708a92a5d 100644 --- a/clippy.toml +++ b/clippy.toml @@ -37,4 +37,4 @@ disallowed-macros = [ avoid-breaking-exported-api = true too-many-arguments-threshold = 10 upper-case-acronyms-aggressive = false -enum-variant-size-threshold = 200 \ No newline at end of file +enum-variant-size-threshold = 200 From 0ffe50a9384e84cab3987e6361368113c5e83d88 Mon Sep 17 00:00:00 2001 From: baishen Date: Tue, 9 Jul 2024 17:20:26 +0800 Subject: [PATCH 11/21] fix(query): fix drop table column with quotes (#16006) * fix(query): fix drop table column with quotes * add tests * fix * fix --- src/query/sql/src/planner/binder/ddl/index.rs | 4 ++-- src/query/sql/src/planner/binder/ddl/table.rs | 15 ++++++++------- src/query/sql/src/planner/semantic/type_check.rs | 2 +- .../05_0028_ddl_alter_table_add_drop_column.test | 16 ++++++++++++---- 4 files changed, 23 insertions(+), 14 deletions(-) diff --git a/src/query/sql/src/planner/binder/ddl/index.rs b/src/query/sql/src/planner/binder/ddl/index.rs index 6fca4bef0d3e9..2d55447ea08de 100644 --- a/src/query/sql/src/planner/binder/ddl/index.rs +++ b/src/query/sql/src/planner/binder/ddl/index.rs @@ -213,8 +213,8 @@ impl Binder { if !agg_index_checker.is_supported() { return Err(ErrorCode::UnsupportedIndex(format!( "Currently create aggregating index just support simple query, like: {}, \ - and these aggregate funcs: {}, \ - and non-deterministic functions are not support like: NOW()", + and these aggregate funcs: {}, \ + and non-deterministic functions are not support like: NOW()", "SELECT ... FROM ... WHERE ... GROUP BY ...", SUPPORTED_AGGREGATING_INDEX_FUNCTIONS.join(",") ))); diff --git a/src/query/sql/src/planner/binder/ddl/table.rs b/src/query/sql/src/planner/binder/ddl/table.rs index 77920e702f5db..5b7ef2d41359f 100644 --- a/src/query/sql/src/planner/binder/ddl/table.rs +++ b/src/query/sql/src/planner/binder/ddl/table.rs @@ -887,16 +887,16 @@ impl Binder { let mut lock_guard = None; let action_in_plan = match action { ModifyColumnAction::SetMaskingPolicy(column, name) => { - ModifyColumnActionInPlan::SetMaskingPolicy( - column.to_string(), - name.to_string(), - ) + let column = self.normalize_object_identifier(column); + ModifyColumnActionInPlan::SetMaskingPolicy(column, name.to_string()) } ModifyColumnAction::UnsetMaskingPolicy(column) => { - ModifyColumnActionInPlan::UnsetMaskingPolicy(column.to_string()) + let column = self.normalize_object_identifier(column); + ModifyColumnActionInPlan::UnsetMaskingPolicy(column) } ModifyColumnAction::ConvertStoredComputedColumn(column) => { - ModifyColumnActionInPlan::ConvertStoredComputedColumn(column.to_string()) + let column = self.normalize_object_identifier(column); + ModifyColumnActionInPlan::ConvertStoredComputedColumn(column) } ModifyColumnAction::SetDataType(column_def_vec) => { let mut field_and_comment = Vec::with_capacity(column_def_vec.len()); @@ -933,11 +933,12 @@ impl Binder { }))) } AlterTableAction::DropColumn { column } => { + let column = self.normalize_object_identifier(column); Ok(Plan::DropTableColumn(Box::new(DropTableColumnPlan { catalog, database, table, - column: column.to_string(), + column, }))) } AlterTableAction::AlterTableClusterKey { cluster_by } => { diff --git a/src/query/sql/src/planner/semantic/type_check.rs b/src/query/sql/src/planner/semantic/type_check.rs index 8b89fd42223eb..66e43cc9245ea 100644 --- a/src/query/sql/src/planner/semantic/type_check.rs +++ b/src/query/sql/src/planner/semantic/type_check.rs @@ -4356,7 +4356,7 @@ pub fn resolve_type_name(type_name: &TypeName, not_null: bool) -> Result names .iter() .map(|i| { - if i.quote.is_some() { + if i.is_quoted() { i.name.clone() } else { i.name.to_lowercase() diff --git a/tests/sqllogictests/suites/base/05_ddl/05_0028_ddl_alter_table_add_drop_column.test b/tests/sqllogictests/suites/base/05_ddl/05_0028_ddl_alter_table_add_drop_column.test index 00f47da55af96..4a4bc8e5b59db 100644 --- a/tests/sqllogictests/suites/base/05_ddl/05_0028_ddl_alter_table_add_drop_column.test +++ b/tests/sqllogictests/suites/base/05_ddl/05_0028_ddl_alter_table_add_drop_column.test @@ -1,6 +1,9 @@ statement ok USE default +statement ok +set sql_dialect = 'PostgreSQL' + statement ok DROP TABLE IF EXISTS `05_0028_at_t0` @@ -25,13 +28,21 @@ SELECT * FROM `05_0028_at_t0` 1 (1,100,12.34,'abc','2020-01-01','2020-01-01 00:00:00.000000') [101,2] 4.0 statement ok -create table `05_0028_at_t0_1`(a int not null, b Tuple(Bool, Int64, Float64, String, Date, Timestamp) not null, c Array(UInt8) not null, d double not null) AS SELECT * FROM `05_0028_at_t0`; +create table `05_0028_at_t0_1`(a int not null, b Tuple(Bool, Int64, Float64, String, Date, Timestamp) not null, c Array(UInt8) not null, "s-f_f" double not null) AS SELECT * FROM `05_0028_at_t0`; query ITIF SELECT * FROM `05_0028_at_t0_1` ---- 1 (1,100,12.34,'abc','2020-01-01','2020-01-01 00:00:00.000000') [101,2] 4.0 +statement ok +alter table `05_0028_at_t0_1` drop column "s-f_f" + +query ITI +SELECT * FROM `05_0028_at_t0_1` +---- +1 (1,100,12.34,'abc','2020-01-01','2020-01-01 00:00:00.000000') [101,2] + statement ok DROP TABLE IF EXISTS `05_0028_at_t0_1` @@ -184,9 +195,6 @@ insert into `05_0028_at_t0_3` values(1) statement ok ALTER TABLE `05_0028_at_t0_3` ADD COLUMN a float not null default 0.1 COMMENT 'first' FIRST -statement ok -set sql_dialect = 'PostgreSQL' - query TT SHOW CREATE TABLE `05_0028_at_t0_3` ---- From 6f07d868f109acbd3d26b1a34222f23351dba4ff Mon Sep 17 00:00:00 2001 From: sundyli <543950155@qq.com> Date: Tue, 9 Jul 2024 21:51:55 +0800 Subject: [PATCH 12/21] fix(query): fix incorrect fast_memcmp function (#16008) --- src/common/hashtable/src/utils.rs | 11 ++++++++--- src/common/hashtable/tests/it/main.rs | 26 ++++++++++++++++++++++++++ 2 files changed, 34 insertions(+), 3 deletions(-) diff --git a/src/common/hashtable/src/utils.rs b/src/common/hashtable/src/utils.rs index 3dd98c85304e3..bb2d631d5a210 100644 --- a/src/common/hashtable/src/utils.rs +++ b/src/common/hashtable/src/utils.rs @@ -196,10 +196,15 @@ pub mod sse { } match size / 16 { - 3 if !compare_sse2(a.add(32), b.add(32)) => false, - 2 if !compare_sse2(a.add(16), b.add(16)) => false, + 3 if !compare_sse2(a.add(32), b.add(32)) + || !compare_sse2(a.add(16), b.add(16)) + || !compare_sse2(a, b) => + { + false + } + 2 if !compare_sse2(a.add(16), b.add(16)) || !compare_sse2(a, b) => false, 1 if !compare_sse2(a, b) => false, - _ => compare_sse2(a.add(size - 16), b.add(size - 16)), + _ => compare_sse2(a.add(size).sub(16), b.add(size).sub(16)), } } } diff --git a/src/common/hashtable/tests/it/main.rs b/src/common/hashtable/tests/it/main.rs index 897587bffdd82..ddd71033f5af4 100644 --- a/src/common/hashtable/tests/it/main.rs +++ b/src/common/hashtable/tests/it/main.rs @@ -20,6 +20,7 @@ use std::sync::atomic::Ordering; use std::sync::Arc; use bumpalo::Bump; +use databend_common_hashtable::fast_memcmp; use databend_common_hashtable::DictionaryKeys; use databend_common_hashtable::DictionaryStringHashMap; use databend_common_hashtable::HashMap; @@ -27,6 +28,7 @@ use databend_common_hashtable::HashtableEntryMutRefLike; use databend_common_hashtable::HashtableLike; use databend_common_hashtable::ShortStringHashMap; use databend_common_hashtable::StackHashMap; +use rand::distributions::Alphanumeric; use rand::Rng; macro_rules! simple_test { @@ -90,6 +92,30 @@ fn test_stack_hash_map() { simple_test!(StackHashMap); } +#[test] +fn test_fast_memcmp() { + let mut rng = rand::thread_rng(); + for size in 1..129 { + let a: Vec = rand::thread_rng() + .sample_iter(&Alphanumeric) + .take(size) + .collect(); + for _ in 0..1024 { + // change a random byte in b and cmpare with a + let mut b = a.clone(); + let idx = rng.gen_range(0..size); + if b[idx] == u8::MAX { + b[idx] = 1; + } else { + b[idx] += 1; + } + assert!(!fast_memcmp(a.as_slice(), b.as_slice())); + b[idx] = a[idx]; + assert!(fast_memcmp(a.as_slice(), b.as_slice())); + } + } +} + #[test] fn test_unsized_hash_map() { static COUNT: AtomicUsize = AtomicUsize::new(0); From cdc86c9842af37d02acd6dd1bb22be2632aa19d6 Mon Sep 17 00:00:00 2001 From: Andy Lok Date: Wed, 10 Jul 2024 06:53:30 +0800 Subject: [PATCH 13/21] chore: update nom-rule (#16009) --- Cargo.lock | 6 +++--- src/query/ast/Cargo.toml | 4 ++-- src/query/ast/src/ast/query.rs | 4 ++-- src/query/ast/src/lib.rs | 3 --- src/query/ast/src/parser/query.rs | 1 + 5 files changed, 8 insertions(+), 10 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 8aa2b226b2fb2..352e39f8775fa 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3117,7 +3117,7 @@ dependencies = [ [[package]] name = "databend-common-ast" -version = "0.0.0" +version = "0.0.1" dependencies = [ "criterion", "derive-visitor", @@ -10672,9 +10672,9 @@ dependencies = [ [[package]] name = "nom-rule" -version = "0.3.0" +version = "0.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ea8dd3c6d80d1e61031aecc5fdfaf4b7d2324dbd94d575ac12f94e5d6856c2d4" +checksum = "8f0f33cd048b8e0f5cd2fc6fd26f4feda8e33b4781673e59fe1406b137600e9d" dependencies = [ "nom", "pratt 0.3.0", diff --git a/src/query/ast/Cargo.toml b/src/query/ast/Cargo.toml index 4eb9e7fc52a6a..bd12016f14b8c 100644 --- a/src/query/ast/Cargo.toml +++ b/src/query/ast/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "databend-common-ast" -version = "0.0.0" +version = "0.0.1" publish = true description = "SQL parser for Databend" authors = { workspace = true } @@ -20,7 +20,7 @@ itertools = { workspace = true } logos = "0.12.1" minitrace = { workspace = true } nom = "7.1.1" -nom-rule = "0.3.0" +nom-rule = "0.3.1" ordered-float = { workspace = true } pratt = "0.4.0" pretty = "0.11.3" diff --git a/src/query/ast/src/ast/query.rs b/src/query/ast/src/ast/query.rs index 1b91ff7072fcc..ce0f192e033bb 100644 --- a/src/query/ast/src/ast/query.rs +++ b/src/query/ast/src/ast/query.rs @@ -384,7 +384,7 @@ impl SelectTarget { pub fn has_window(&self) -> bool { match self { - SelectTarget::AliasedExpr { box expr, .. } => match expr { + SelectTarget::AliasedExpr { expr, .. } => match &**expr { Expr::FunctionCall { func, .. } => func.window.is_some(), _ => false, }, @@ -394,7 +394,7 @@ impl SelectTarget { pub fn function_call_name(&self) -> Option { match self { - SelectTarget::AliasedExpr { box expr, .. } => match expr { + SelectTarget::AliasedExpr { expr, .. } => match &**expr { Expr::FunctionCall { func, .. } if func.window.is_none() => { Some(func.name.name.to_lowercase()) } diff --git a/src/query/ast/src/lib.rs b/src/query/ast/src/lib.rs index fb189ada198ce..b2f8dc4d68f67 100644 --- a/src/query/ast/src/lib.rs +++ b/src/query/ast/src/lib.rs @@ -12,10 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -#![feature(box_patterns)] // TODO(xuanwo): Add crate level documents here. -#![allow(clippy::uninlined_format_args)] -#![allow(clippy::type_complexity)] pub mod ast; mod error; diff --git a/src/query/ast/src/parser/query.rs b/src/query/ast/src/parser/query.rs index 0e2d82b766e44..6e809686df933 100644 --- a/src/query/ast/src/parser/query.rs +++ b/src/query/ast/src/parser/query.rs @@ -395,6 +395,7 @@ pub fn exclude_col(i: Input) -> IResult> { )(i) } +#[allow(clippy::type_complexity)] pub fn select_target(i: Input) -> IResult { fn qualified_wildcard_transform( res: Option<(Identifier, &Token<'_>, Option<(Identifier, &Token<'_>)>)>, From 737f02ef0d36a3d6a16aeeecbb0aec7154dbd3ce Mon Sep 17 00:00:00 2001 From: Yang Xiufeng Date: Wed, 10 Jul 2024 11:00:36 +0800 Subject: [PATCH 14/21] feat: http handler limit body size of each response to 10MB. (#15960) --- .../servers/http/v1/http_query_handlers.rs | 13 ++-- src/query/service/src/servers/http/v1/mod.rs | 4 +- .../src/servers/http/v1/query/page_manager.rs | 69 +++++++++++++------ .../v1/{json_block.rs => string_block.rs} | 30 ++++---- .../it/servers/http/http_query_handlers.rs | 50 +++++++------- .../tests/it/servers/http/json_block.rs | 27 +++----- 6 files changed, 107 insertions(+), 86 deletions(-) rename src/query/service/src/servers/http/v1/{json_block.rs => string_block.rs} (75%) diff --git a/src/query/service/src/servers/http/v1/http_query_handlers.rs b/src/query/service/src/servers/http/v1/http_query_handlers.rs index 490808aa29e4d..ba47de355e884 100644 --- a/src/query/service/src/servers/http/v1/http_query_handlers.rs +++ b/src/query/service/src/servers/http/v1/http_query_handlers.rs @@ -35,7 +35,6 @@ use poem::IntoResponse; use poem::Route; use serde::Deserialize; use serde::Serialize; -use serde_json::Value as JsonValue; use super::query::ExecuteStateKind; use super::query::HttpQueryRequest; @@ -46,7 +45,7 @@ use crate::servers::http::v1::query::Progresses; use crate::servers::http::v1::HttpQueryContext; use crate::servers::http::v1::HttpQueryManager; use crate::servers::http::v1::HttpSessionConf; -use crate::servers::http::v1::JsonBlock; +use crate::servers::http::v1::StringBlock; use crate::sessions::QueryAffect; const HEADER_QUERY_ID: &str = "X-DATABEND-QUERY-ID"; @@ -128,7 +127,7 @@ pub struct QueryResponse { #[serde(skip_serializing_if = "Option::is_none")] pub has_result_set: Option, pub schema: Vec, - pub data: Vec>, + pub data: Vec>, pub affect: Option, pub stats: QueryStats, @@ -148,11 +147,11 @@ impl QueryResponse { ) -> impl IntoResponse { let state = r.state.clone(); let (data, next_uri) = if is_final { - (JsonBlock::empty(), None) + (StringBlock::empty(), None) } else { match state.state { ExecuteStateKind::Running | ExecuteStateKind::Starting => match r.data { - None => (JsonBlock::empty(), Some(make_state_uri(&id))), + None => (StringBlock::empty(), Some(make_state_uri(&id))), Some(d) => { let uri = match d.next_page_no { Some(n) => Some(make_page_uri(&id, n)), @@ -161,9 +160,9 @@ impl QueryResponse { (d.page.data, uri) } }, - ExecuteStateKind::Failed => (JsonBlock::empty(), Some(make_final_uri(&id))), + ExecuteStateKind::Failed => (StringBlock::empty(), Some(make_final_uri(&id))), ExecuteStateKind::Succeeded => match r.data { - None => (JsonBlock::empty(), Some(make_final_uri(&id))), + None => (StringBlock::empty(), Some(make_final_uri(&id))), Some(d) => { let uri = match d.next_page_no { Some(n) => Some(make_page_uri(&id, n)), diff --git a/src/query/service/src/servers/http/v1/mod.rs b/src/query/service/src/servers/http/v1/mod.rs index 5c2f5cf7370ff..2988debf62e12 100644 --- a/src/query/service/src/servers/http/v1/mod.rs +++ b/src/query/service/src/servers/http/v1/mod.rs @@ -13,10 +13,10 @@ // limitations under the License. mod http_query_handlers; -pub mod json_block; mod login; mod query; mod stage; +pub mod string_block; mod suggestions; pub use http_query_handlers::make_final_uri; @@ -26,7 +26,6 @@ pub use http_query_handlers::query_route; pub use http_query_handlers::QueryError; pub use http_query_handlers::QueryResponse; pub use http_query_handlers::QueryStats; -pub(crate) use json_block::JsonBlock; pub(crate) use login::login_handler; pub use query::ExecuteStateKind; pub use query::ExpiringMap; @@ -36,6 +35,7 @@ pub use query::HttpQueryManager; pub use query::HttpSessionConf; pub use stage::upload_to_stage; pub use stage::UploadToStageResponse; +pub(crate) use string_block::StringBlock; pub use suggestions::list_suggestions; pub use suggestions::SuggestionsResponse; diff --git a/src/query/service/src/servers/http/v1/query/page_manager.rs b/src/query/service/src/servers/http/v1/query/page_manager.rs index 1fd3d1d8e2289..491dc1aad41f5 100644 --- a/src/query/service/src/servers/http/v1/query/page_manager.rs +++ b/src/query/service/src/servers/http/v1/query/page_manager.rs @@ -24,11 +24,10 @@ use databend_common_io::prelude::FormatSettings; use log::debug; use log::info; use parking_lot::RwLock; -use serde_json::Value as JsonValue; -use crate::servers::http::v1::json_block::block_to_json_value; use crate::servers::http::v1::query::sized_spsc::SizedChannelReceiver; -use crate::servers::http::v1::JsonBlock; +use crate::servers::http::v1::string_block::block_to_strings; +use crate::servers::http::v1::StringBlock; #[derive(Debug, PartialEq, Eq)] pub enum Wait { @@ -38,7 +37,7 @@ pub enum Wait { #[derive(Clone)] pub struct Page { - pub data: JsonBlock, + pub data: StringBlock, pub total_rows: usize, } @@ -54,7 +53,7 @@ pub struct PageManager { end: bool, block_end: bool, last_page: Option, - row_buffer: VecDeque>, + row_buffer: VecDeque>, block_receiver: SizedChannelReceiver, format_settings: Arc>>, } @@ -109,7 +108,7 @@ impl PageManager { // but the response may be lost and client will retry, // we simply return an empty page. let page = Page { - data: JsonBlock::default(), + data: StringBlock::default(), total_rows: self.total_rows, }; Ok(page) @@ -130,42 +129,61 @@ impl PageManager { fn append_block( &mut self, - rows: &mut Vec>, + rows: &mut Vec>, block: DataBlock, - remain: usize, + remain_rows: usize, + remain_size: &mut usize, ) -> Result<()> { let format_settings = { let guard = self.format_settings.read(); guard.as_ref().unwrap().clone() }; - let mut iter = block_to_json_value(&block, &format_settings)? + let mut iter = block_to_strings(&block, &format_settings)? .into_iter() .peekable(); - let chunk: Vec<_> = iter.by_ref().take(remain).collect(); + let chunk: Vec<_> = iter + .by_ref() + .take(remain_rows) + .take_while(|r| { + let size = row_size(r); + let ok = *remain_size > size; + if ok { + *remain_size -= size; + } + ok + }) + .collect(); rows.extend(chunk); self.row_buffer = iter.by_ref().collect(); Ok(()) } #[async_backtrace::framed] - async fn collect_new_page(&mut self, tp: &Wait) -> Result<(JsonBlock, bool)> { - let mut res: Vec> = Vec::with_capacity(self.max_rows_per_page); + async fn collect_new_page(&mut self, tp: &Wait) -> Result<(StringBlock, bool)> { + let mut res: Vec> = Vec::with_capacity(self.max_rows_per_page); + let mut max_size_per_page = 10 * 1024 * 1024; while res.len() < self.max_rows_per_page { if let Some(row) = self.row_buffer.pop_front() { - res.push(row) - } else { - break; + let size = row_size(&row); + if max_size_per_page > size { + res.push(row); + max_size_per_page -= size; + continue; + } } + break; } loop { assert!(self.max_rows_per_page >= res.len()); - let remain = self.max_rows_per_page - res.len(); - if remain == 0 { + let remain_rows = self.max_rows_per_page - res.len(); + if remain_rows == 0 { break; } match tp { Wait::Async => match self.block_receiver.try_recv() { - Some(block) => self.append_block(&mut res, block, remain)?, + Some(block) => { + self.append_block(&mut res, block, remain_rows, &mut max_size_per_page)? + } None => break, }, Wait::Deadline(t) => { @@ -174,7 +192,12 @@ impl PageManager { match tokio::time::timeout(d, self.block_receiver.recv()).await { Ok(Some(block)) => { debug!("http query got new block with {} rows", block.num_rows()); - self.append_block(&mut res, block, remain)?; + self.append_block( + &mut res, + block, + remain_rows, + &mut max_size_per_page, + )?; } Ok(None) => { info!("http query reach end of blocks"); @@ -189,7 +212,7 @@ impl PageManager { } } - let block = JsonBlock { data: res }; + let block = StringBlock { data: res }; // try to report 'no more data' earlier to client to avoid unnecessary http call if !self.block_end { @@ -206,3 +229,9 @@ impl PageManager { self.row_buffer.clear() } } + +fn row_size(row: &[String]) -> usize { + let n = row.len(); + // ["1","2"], + row.iter().map(|s| s.len()).sum::() + n * 3 + 2 +} diff --git a/src/query/service/src/servers/http/v1/json_block.rs b/src/query/service/src/servers/http/v1/string_block.rs similarity index 75% rename from src/query/service/src/servers/http/v1/json_block.rs rename to src/query/service/src/servers/http/v1/string_block.rs index 08b395edfc8d7..894403b6144d5 100644 --- a/src/query/service/src/servers/http/v1/json_block.rs +++ b/src/query/service/src/servers/http/v1/string_block.rs @@ -19,19 +19,15 @@ use databend_common_expression::Column; use databend_common_expression::DataBlock; use databend_common_formats::field_encoder::FieldEncoderValues; use databend_common_io::prelude::FormatSettings; -use serde_json::Value as JsonValue; #[derive(Debug, Clone, Default)] -pub struct JsonBlock { - pub(crate) data: Vec>, +pub struct StringBlock { + pub(crate) data: Vec>, } -pub type JsonBlockRef = Arc; +pub type StringBlockRef = Arc; -pub fn block_to_json_value( - block: &DataBlock, - format: &FormatSettings, -) -> Result>> { +pub fn block_to_strings(block: &DataBlock, format: &FormatSettings) -> Result>> { if block.is_empty() { return Ok(vec![]); } @@ -48,29 +44,29 @@ pub fn block_to_json_value( FieldEncoderValues::create_for_http_handler(format.timezone, format.geometry_format); let mut buf = vec![]; for row_index in 0..rows_size { - let mut row: Vec = Vec::with_capacity(block.num_columns()); + let mut row: Vec = Vec::with_capacity(block.num_columns()); for column in &columns { buf.clear(); encoder.write_field(column, row_index, &mut buf, false); - row.push(serde_json::to_value(String::from_utf8_lossy(&buf))?); + row.push(String::from_utf8_lossy(&buf).into_owned()); } res.push(row) } Ok(res) } -impl JsonBlock { +impl StringBlock { pub fn empty() -> Self { Self { data: vec![] } } pub fn new(block: &DataBlock, format: &FormatSettings) -> Result { - Ok(JsonBlock { - data: block_to_json_value(block, format)?, + Ok(StringBlock { + data: block_to_strings(block, format)?, }) } - pub fn concat(blocks: Vec) -> Self { + pub fn concat(blocks: Vec) -> Self { if blocks.is_empty() { return Self::empty(); } @@ -87,13 +83,13 @@ impl JsonBlock { self.data.is_empty() } - pub fn data(&self) -> &Vec> { + pub fn data(&self) -> &Vec> { &self.data } } -impl From for Vec> { - fn from(block: JsonBlock) -> Self { +impl From for Vec> { + fn from(block: StringBlock) -> Self { block.data } } diff --git a/src/query/service/tests/it/servers/http/http_query_handlers.rs b/src/query/service/tests/it/servers/http/http_query_handlers.rs index ee30738784c8f..eaf37c3a90f15 100644 --- a/src/query/service/tests/it/servers/http/http_query_handlers.rs +++ b/src/query/service/tests/it/servers/http/http_query_handlers.rs @@ -64,7 +64,6 @@ use poem::Response; use poem::Route; use pretty_assertions::assert_eq; use serde_json::json; -use serde_json::Value; use tokio::time::sleep; use wiremock::matchers::method; use wiremock::matchers::path; @@ -200,7 +199,7 @@ impl TestHttpQueryFetchReply { self.resps.last().unwrap().clone() } - fn data(&self) -> Vec> { + fn data(&self) -> Vec> { let mut result = vec![]; for (_, resp) in &self.resps { result.extend(resp.data.clone()); @@ -702,7 +701,7 @@ async fn test_system_tables() -> Result<()> { .data .iter() .flatten() - .map(|j| j.as_str().unwrap().to_string()) + .map(|j| j.to_string()) .collect::>(); let skipped = [ @@ -796,7 +795,7 @@ async fn test_query_log() -> Result<()> { let (status, result) = post_sql_to_endpoint(&ep, sql, 3).await?; assert_eq!(status, StatusCode::OK, "{:?}", result); assert_eq!( - result.data[0][1].as_str().unwrap(), + result.data[0][1].to_string(), result_type_2.stats.running_time_ms.to_string(), ); @@ -806,30 +805,25 @@ async fn test_query_log() -> Result<()> { assert_eq!(result.data.len(), 1, "{:?}", result); assert!( result.data[0][0] - .as_str() - .unwrap() + .to_string() .to_lowercase() .contains("create table"), "{:?}", result ); assert!( - result.data[0][2] - .as_str() - .unwrap() - .to_lowercase() - .contains("exist"), + result.data[0][2].to_lowercase().contains("exist"), "{:?}", result ); assert_eq!( - result.data[0][1].as_str().unwrap(), + result.data[0][1], ErrorCode::TABLE_ALREADY_EXISTS.to_string(), "{:?}", result ); assert_eq!( - result.data[0][4].as_str().unwrap(), + result.data[0][4], result_type_3.stats.running_time_ms.to_string(), "{:?}", result @@ -864,22 +858,14 @@ async fn test_query_log_killed() -> Result<()> { let (status, result) = post_sql_to_endpoint(&ep, sql, 3).await?; assert_eq!(status, StatusCode::OK, "{:?}", result); assert_eq!(result.data.len(), 1, "{:?}", result); + assert!(result.data[0][0].contains("sleep"), "{:?}", result); assert!( - result.data[0][0].as_str().unwrap().contains("sleep"), - "{:?}", - result - ); - assert!( - result.data[0][2] - .as_str() - .unwrap() - .to_lowercase() - .contains("killed"), + result.data[0][2].to_lowercase().contains("killed"), "{:?}", result ); assert_eq!( - result.data[0][1].as_str().unwrap(), + result.data[0][1], ErrorCode::ABORTED_QUERY.to_string(), "{:?}", result @@ -1769,3 +1755,19 @@ async fn test_has_result_set() -> Result<()> { Ok(()) } + +#[tokio::test(flavor = "current_thread")] +async fn test_max_size_per_page() -> Result<()> { + let _fixture = TestFixture::setup().await?; + + let sql = "select repeat('1', 1000) as a, repeat('2', 1000) from numbers(10000)"; + let wait_time_secs = 5; + let json = serde_json::json!({"sql": sql.to_string(), "pagination": {"wait_time_secs": wait_time_secs}}); + let (_, reply, body) = TestHttpQueryRequest::new(json).fetch_begin().await?; + assert!(reply.error.is_none(), "{:?}", reply.error); + let len = body.len() as i32; + let target = 10485760; // 10M + assert!(len < target); + assert!(len > target - 2000); + Ok(()) +} diff --git a/src/query/service/tests/it/servers/http/json_block.rs b/src/query/service/tests/it/servers/http/json_block.rs index 134ee5d591aa9..11ea1c39f28ff 100644 --- a/src/query/service/tests/it/servers/http/json_block.rs +++ b/src/query/service/tests/it/servers/http/json_block.rs @@ -24,16 +24,8 @@ use databend_common_expression::Column; use databend_common_expression::DataBlock; use databend_common_expression::FromData; use databend_common_io::prelude::FormatSettings; -use databend_query::servers::http::v1::json_block::JsonBlock; +use databend_query::servers::http::v1::string_block::StringBlock; use pretty_assertions::assert_eq; -use serde::Serialize; -use serde_json::to_value; -use serde_json::Value as JsonValue; - -fn val(v: T) -> JsonValue -where T: Serialize { - to_value(v).unwrap() -} fn test_data_block(is_nullable: bool) -> Result<()> { let mut columns = vec![ @@ -59,12 +51,15 @@ fn test_data_block(is_nullable: bool) -> Result<()> { let block = DataBlock::new_from_columns(columns); let format = FormatSettings::default(); - let json_block = JsonBlock::new(&block, &format)?; - let expect = vec![ - vec![val("1"), val("a"), val("1"), val("1.1"), val("1970-01-02")], - vec![val("2"), val("b"), val("1"), val("2.2"), val("1970-01-03")], - vec![val("3"), val("c"), val("0"), val("3.3"), val("1970-01-04")], - ]; + let json_block = StringBlock::new(&block, &format)?; + let expect = [ + vec!["1", "a", "1", "1.1", "1970-01-02"], + vec!["2", "b", "1", "2.2", "1970-01-03"], + vec!["3", "c", "0", "3.3", "1970-01-04"], + ] + .iter() + .map(|r| r.iter().map(|v| v.to_string()).collect::>()) + .collect::>(); assert_eq!(json_block.data().clone(), expect); Ok(()) @@ -84,7 +79,7 @@ fn test_data_block_not_nullable() -> Result<()> { fn test_empty_block() -> Result<()> { let block = DataBlock::empty(); let format = FormatSettings::default(); - let json_block = JsonBlock::new(&block, &format)?; + let json_block = StringBlock::new(&block, &format)?; assert!(json_block.is_empty()); Ok(()) } From 26d24ac0d41552ede4e533608c7e85b05186f240 Mon Sep 17 00:00:00 2001 From: TCeason <33082201+TCeason@users.noreply.github.com> Date: Wed, 10 Jul 2024 12:24:18 +0800 Subject: [PATCH 15/21] fix(query): coalesce continue loop when arg is null (#16002) * chore(query): assume_not_null support NULL type arg * modify * optimize * optimize --- src/query/sql/src/planner/semantic/type_check.rs | 8 +++++++- tests/sqllogictests/suites/crdb/conditional.test | 5 +++++ 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/src/query/sql/src/planner/semantic/type_check.rs b/src/query/sql/src/planner/semantic/type_check.rs index 66e43cc9245ea..662e32428f1c0 100644 --- a/src/query/sql/src/planner/semantic/type_check.rs +++ b/src/query/sql/src/planner/semantic/type_check.rs @@ -2977,7 +2977,6 @@ impl<'a> TypeChecker<'a> { // if(is_not_null(arg0), assume_not_null(arg0), is_not_null(arg1), assume_not_null(arg1), ..., argN) // with constant Literal::Null arguments removed. let mut new_args = Vec::with_capacity(args.len() * 2 + 1); - for arg in args.iter() { if let Expr::Literal { span: _, @@ -2992,6 +2991,13 @@ impl<'a> TypeChecker<'a> { expr: Box::new((*arg).clone()), not: true, }; + if let Ok(res) = self.resolve(&is_not_null_expr) { + if let ScalarExpr::ConstantExpr(c) = res.0 { + if Scalar::Boolean(false) == c.value { + continue; + } + } + } let assume_not_null_expr = Expr::FunctionCall { span, diff --git a/tests/sqllogictests/suites/crdb/conditional.test b/tests/sqllogictests/suites/crdb/conditional.test index 07e6bc4cd95c2..2dfd4984b1774 100644 --- a/tests/sqllogictests/suites/crdb/conditional.test +++ b/tests/sqllogictests/suites/crdb/conditional.test @@ -13,6 +13,11 @@ SELECT IFNULL(1, 2), IFNULL(NULL, 2), COALESCE(1, 2), COALESCE(NULL, ---- 1 2 1 2 +query IIII +select COALESCE(try_to_date('ab'),'2022-02-02'); +---- +2022-02-02 + statement ok DROP TABLE IF EXISTS t From 706dab3dd0eb0551d8d8f19d6767db0e06e3fffb Mon Sep 17 00:00:00 2001 From: Freejww <103876282+Freejww@users.noreply.github.com> Date: Wed, 10 Jul 2024 12:09:10 +0800 Subject: [PATCH 16/21] refactor(query): use shuffle on distributed merge into (#15946) * refactor(query): use shuffle on distributed merge into * fix sqllogical test * chore(query): remove MergeIntoOp * chore(query): add shuffle by block id * add sqllogical tests * fix sqllogical test * refactor(query): keep insert data on local node * revert setting * revert * source as build can use broadcast in inner join * inner join use optimizer's result --- .../pipelines/builders/builder_merge_into.rs | 251 +++++++---------- .../builder_merge_into_append_not_matched.rs | 215 -------------- .../builders/builder_merge_into_manipulate.rs | 50 +--- .../builders/builder_merge_into_organize.rs | 79 +----- .../builders/builder_merge_into_serialize.rs | 127 --------- .../pipelines/builders/builder_row_fetch.rs | 5 +- .../service/src/pipelines/builders/mod.rs | 2 - .../service/src/pipelines/pipeline_builder.rs | 20 +- .../service/src/pipelines/processors/mod.rs | 1 - .../pipelines/processors/transforms/mod.rs | 6 - .../processor_accumulate_row_number.rs | 103 ------- .../processor_deduplicate_row_number.rs | 140 ---------- ...cessor_extract_hash_table_by_row_number.rs | 183 ------------ .../transform_recursive_cte_source.rs | 3 - .../flight/v1/exchange/exchange_injector.rs | 19 +- .../flight/v1/scatter/flight_scatter_hash.rs | 63 ++++- src/query/sql/src/executor/format.rs | 46 +-- src/query/sql/src/executor/physical_plan.rs | 42 --- .../sql/src/executor/physical_plan_builder.rs | 1 - .../sql/src/executor/physical_plan_display.rs | 28 -- .../sql/src/executor/physical_plan_visitor.rs | 50 ---- .../sql/src/executor/physical_plans/mod.rs | 4 - .../physical_plans/physical_merge_into.rs | 262 ++++++------------ .../physical_merge_into_add_row_number.rs | 80 ------ .../physical_merge_into_manipulate.rs | 3 - .../physical_merge_into_organize.rs | 5 +- .../physical_merge_into_serialize.rs | 44 --- .../sql/src/planner/binder/merge_into.rs | 20 +- src/query/sql/src/planner/binder/util.rs | 1 - .../decorrelate/subquery_rewriter.rs | 1 - .../distributed/distributed_merge.rs | 90 +++--- .../src/planner/optimizer/distributed/mod.rs | 2 +- src/query/sql/src/planner/optimizer/format.rs | 1 - .../src/planner/optimizer/hyper_dp/dphyp.rs | 2 +- .../sql/src/planner/optimizer/optimizer.rs | 38 +-- .../rule/rewrite/rule_semi_to_inner_join.rs | 1 - src/query/sql/src/planner/optimizer/s_expr.rs | 2 - .../sql/src/planner/plans/add_row_number.rs | 66 ----- src/query/sql/src/planner/plans/merge_into.rs | 4 +- src/query/sql/src/planner/plans/mod.rs | 2 - src/query/sql/src/planner/plans/operator.rs | 14 - .../storages/fuse/src/operations/append.rs | 30 +- .../fuse/src/operations/merge_into/mod.rs | 3 - .../operations/merge_into/processors/mod.rs | 6 - ...istributed_merge_into_block_deserialize.rs | 90 ------ ..._distributed_merge_into_block_serialize.rs | 69 ----- .../transform_add_rownumber_column.rs | 94 ------- .../merge_into_non_equal_distributed.test | 194 ++++++------- 48 files changed, 448 insertions(+), 2114 deletions(-) delete mode 100644 src/query/service/src/pipelines/builders/builder_merge_into_append_not_matched.rs delete mode 100644 src/query/service/src/pipelines/builders/builder_merge_into_serialize.rs delete mode 100644 src/query/service/src/pipelines/processors/transforms/processor_accumulate_row_number.rs delete mode 100644 src/query/service/src/pipelines/processors/transforms/processor_deduplicate_row_number.rs delete mode 100644 src/query/service/src/pipelines/processors/transforms/processor_extract_hash_table_by_row_number.rs delete mode 100644 src/query/sql/src/executor/physical_plans/physical_merge_into_add_row_number.rs delete mode 100644 src/query/sql/src/executor/physical_plans/physical_merge_into_serialize.rs delete mode 100644 src/query/sql/src/planner/plans/add_row_number.rs delete mode 100644 src/query/storages/fuse/src/operations/merge_into/processors/processor_distributed_merge_into_block_deserialize.rs delete mode 100644 src/query/storages/fuse/src/operations/merge_into/processors/processor_distributed_merge_into_block_serialize.rs delete mode 100644 src/query/storages/fuse/src/operations/merge_into/processors/transform_add_rownumber_column.rs diff --git a/src/query/service/src/pipelines/builders/builder_merge_into.rs b/src/query/service/src/pipelines/builders/builder_merge_into.rs index 9c1590290f0a2..eb7c55616c815 100644 --- a/src/query/service/src/pipelines/builders/builder_merge_into.rs +++ b/src/query/service/src/pipelines/builders/builder_merge_into.rs @@ -12,65 +12,36 @@ // See the License for the specific language governing permissions and // limitations under the License. -use std::sync::atomic::AtomicU64; use std::sync::Arc; +use databend_common_base::base::tokio::sync::Semaphore; use databend_common_catalog::table::Table; -use databend_common_catalog::table_context::TableContext; -use databend_common_exception::ErrorCode; use databend_common_exception::Result; +use databend_common_expression::BlockThresholds; use databend_common_expression::DataSchema; use databend_common_expression::DataSchemaRef; use databend_common_pipeline_core::processors::InputPort; use databend_common_pipeline_core::processors::OutputPort; use databend_common_pipeline_core::processors::ProcessorPtr; use databend_common_pipeline_core::Pipe; -use databend_common_pipeline_core::TransformPipeBuilder; use databend_common_pipeline_transforms::processors::create_dummy_item; use databend_common_pipeline_transforms::processors::BlockCompactor; use databend_common_pipeline_transforms::processors::TransformCompact; use databend_common_pipeline_transforms::processors::TransformPipelineHelper; +use databend_common_sql::binder::MergeIntoType; use databend_common_sql::executor::physical_plans::MergeInto; -use databend_common_sql::executor::physical_plans::MergeIntoAddRowNumber; -use databend_common_storages_fuse::operations::TransformAddRowNumberColumnProcessor; -use databend_common_storages_fuse::operations::TransformDistributedMergeIntoBlockSerialize; +use databend_common_sql::executor::physical_plans::MutationKind; +use databend_common_storages_fuse::operations::TransformSerializeBlock; use databend_common_storages_fuse::operations::TransformSerializeSegment; use databend_common_storages_fuse::operations::UnMatchedExprs; use databend_common_storages_fuse::FuseTable; -use crate::pipelines::processors::transforms::AccumulateRowNumber; use crate::pipelines::processors::transforms::TransformAddComputedColumns; use crate::pipelines::processors::TransformResortAddOnWithoutSourceSchema; use crate::pipelines::PipelineBuilder; impl PipelineBuilder { - // build add row_number column for source table when enable_right_broadcast = true - // it must be distributed merge into execution - pub(crate) fn build_merge_into_add_row_number( - &mut self, - merge_into_add_row_number: &MergeIntoAddRowNumber, - ) -> Result<()> { - self.build_pipeline(&merge_into_add_row_number.input)?; - - let node_index = merge_into_add_row_number - .cluster_index - .get(&self.ctx.get_cluster().local_id); - - if node_index.is_none() { - return Err(ErrorCode::NotFoundClusterNode(format!( - "can't find out {} when build distributed merge into pipeline", - self.ctx.get_cluster().local_id - ))); - } - let node_index = *node_index.unwrap() as u16; - let row_number = Arc::new(AtomicU64::new(0)); - self.main_pipeline.add_transformer(|| { - TransformAddRowNumberColumnProcessor::new(node_index, row_number.clone()) - }); - Ok(()) - } - - // build merge into mutation pipeline + // build merge into serialize and mutation pipeline pub(crate) fn build_merge_into(&mut self, merge_into: &MergeInto) -> Result<()> { self.build_pipeline(&merge_into.input)?; @@ -81,6 +52,12 @@ impl PipelineBuilder { let table = FuseTable::try_from_table(tbl.as_ref())?; let block_thresholds = table.get_block_thresholds(); + let cluster_stats_gen = + table.get_cluster_stats_gen(self.ctx.clone(), 0, block_thresholds, None)?; + + let io_request_semaphore = + Arc::new(Semaphore::new(self.settings.get_max_threads()? as usize)); + let serialize_segment_transform = TransformSerializeSegment::new( InputPort::create(), OutputPort::create(), @@ -88,18 +65,71 @@ impl PipelineBuilder { block_thresholds, ); - // complete pipeline: - // aggregate_mutator port/dummy_item port aggregate_mutator port/dummy_item port (this depends on apply_row_id) + // For row_id port, create rowid_aggregate_mutator + // For matched data port and unmatched port, do serialize + let serialize_len = match merge_into.merge_type { + MergeIntoType::InsertOnly => self.main_pipeline.output_len(), + MergeIntoType::FullOperation | MergeIntoType::MatchedOnly => { + // remove row id port + self.main_pipeline.output_len() - 1 + } + }; + + // 1. Fill default and computed columns + self.build_fill_columns_in_merge_into( + tbl.clone(), + serialize_len, + merge_into.need_match, + merge_into.unmatched.clone(), + )?; + + // 2. Add cluster‘s blocksort if it's a cluster table + self.build_compact_and_cluster_sort_in_merge_into( + table, + merge_into.need_match, + serialize_len, + block_thresholds, + )?; + + let mut pipe_items = Vec::with_capacity(self.main_pipeline.output_len()); + + // 3.1 Add rowid_aggregate_mutator for row_id port + if merge_into.need_match { + pipe_items.push(table.rowid_aggregate_mutator( + self.ctx.clone(), + cluster_stats_gen.clone(), + io_request_semaphore, + merge_into.segments.clone(), + false, + )?); + } + + // 3.2 Add serialize_block_transform for data port + for _ in 0..serialize_len { + let serialize_block_transform = TransformSerializeBlock::try_create( + self.ctx.clone(), + InputPort::create(), + OutputPort::create(), + table, + cluster_stats_gen.clone(), + MutationKind::MergeInto, + )?; + pipe_items.push(serialize_block_transform.into_pipe_item()); + } + + let output_len = pipe_items.iter().map(|item| item.outputs_port.len()).sum(); + self.main_pipeline.add_pipe(Pipe::create( + self.main_pipeline.output_len(), + output_len, + pipe_items, + )); + + // The complete pipeline: + // aggregate_mutator port aggregate_mutator port // serialize_block port0 - // serialize_block port1 ======> serialize_block port + // serialize_block port1 ======> serialize_block port // ....... - // row_number_port (enable_right_broadcast = true) row_number_port - let output_len = self.main_pipeline.output_len(); - let mut ranges = Vec::with_capacity(output_len); - let (serialize_len, _) = merge_into - .merge_into_op - .get_serialize_and_row_number_len(output_len, merge_into.enable_right_broadcast); - + let mut ranges = Vec::with_capacity(self.main_pipeline.output_len()); // row id port let row_id_offset = if merge_into.need_match { ranges.push(vec![0]); @@ -108,24 +138,16 @@ impl PipelineBuilder { 0 }; - // resize data ports - // for distributed insert-only(right anti join), the serialize_len is zero. - if serialize_len > 0 { - let mut vec = Vec::with_capacity(output_len); - for idx in 0..serialize_len { - vec.push(idx + row_id_offset); - } - ranges.push(vec); - } - - // with row_number - if merge_into.enable_right_broadcast { - ranges.push(vec![output_len - 1]); + // Resize data ports + debug_assert!(serialize_len > 0); + let mut vec = Vec::with_capacity(self.main_pipeline.output_len()); + for idx in 0..serialize_len { + vec.push(idx + row_id_offset); } - + ranges.push(vec); self.main_pipeline.resize_partial_one(ranges)?; - let pipe_items = if !merge_into.distributed { + let pipe_items = { let mut vec = Vec::with_capacity(2); if merge_into.need_match { // row_id port @@ -134,70 +156,19 @@ impl PipelineBuilder { // data port vec.push(serialize_segment_transform.into_pipe_item()); vec - } else { - let mut vec = Vec::with_capacity(3); - if merge_into.need_match { - // row_id port - vec.push(create_dummy_item()) - } - // for distributed insert-only(right anti join), the serialize_len is zero. - if serialize_len > 0 { - // data port - vec.push(serialize_segment_transform.into_pipe_item()); - } - if merge_into.enable_right_broadcast { - // row number port - vec.push(create_dummy_item()) - } - vec }; - // the complete pipeline: - // -----------Standalone----------- + // The complete pipeline: // output_port0: MutationLogs(row_id) // output_port1: MutationLogs(data) // 1. FullOperation and MatchedOnly: same as above // 2. InsertOnly: no output_port0 - - //-----------Distributed----------- - // output_port0: MutationLogs(row_id) - // output_port1: MutationLogs(data) - // output_port2: row_number (enable_right_broadcast = true) - // 1. MatchedOnly, no output_port2 - // 2. InsertOnly: no output_port0 - let output_len = pipe_items.iter().map(|item| item.outputs_port.len()).sum(); self.main_pipeline.add_pipe(Pipe::create( self.main_pipeline.output_len(), output_len, pipe_items, )); - - // accumulate row_number - if merge_into.enable_right_broadcast { - let pipe_items = if merge_into.need_match { - vec![ - create_dummy_item(), - create_dummy_item(), - AccumulateRowNumber::create()?.into_pipe_item(), - ] - } else { - vec![AccumulateRowNumber::create()?.into_pipe_item()] - }; - let output_len = pipe_items.iter().map(|item| item.outputs_port.len()).sum(); - self.main_pipeline.add_pipe(Pipe::create( - self.main_pipeline.output_len(), - output_len, - pipe_items, - )); - } - - // add distributed_merge_into_block_serialize - // we will wrap rowid and log as MixRowIdKindAndLog - if merge_into.distributed && merge_into.change_join_order { - self.main_pipeline - .add_transformer(|| TransformDistributedMergeIntoBlockSerialize {}); - } Ok(()) } @@ -205,34 +176,11 @@ impl PipelineBuilder { &mut self, tbl: Arc, transform_len: usize, - is_build_merge_into_append_not_matched: bool, - distributed: bool, need_match: bool, - enable_right_broadcast: bool, unmatched: UnMatchedExprs, ) -> Result<()> { let table = FuseTable::try_from_table(tbl.as_ref())?; - let add_builder_pipe = |mut builder: TransformPipeBuilder| -> Pipe { - if is_build_merge_into_append_not_matched { - builder.add_items(vec![create_dummy_item()]); - } else if !distributed { - if need_match { - builder.add_items_prepend(vec![create_dummy_item()]); - } - } else { - if need_match { - // receive row_id - builder.add_items_prepend(vec![create_dummy_item()]); - } - if enable_right_broadcast { - // receive row_number - builder.add_items(vec![create_dummy_item()]); - } - } - builder.finalize() - }; - // fill default columns let table_default_schema = &table.schema_with_stream().remove_computed_fields(); let mut builder = self @@ -249,7 +197,10 @@ impl PipelineBuilder { }, transform_len, )?; - self.main_pipeline.add_pipe(add_builder_pipe(builder)); + if need_match { + builder.add_items_prepend(vec![create_dummy_item()]); + } + self.main_pipeline.add_pipe(builder.finalize()); // fill computed columns let table_computed_schema = &table.schema_with_stream().remove_virtual_computed_fields(); @@ -268,7 +219,10 @@ impl PipelineBuilder { }, transform_len, )?; - self.main_pipeline.add_pipe(add_builder_pipe(builder)); + if need_match { + builder.add_items_prepend(vec![create_dummy_item()]); + } + self.main_pipeline.add_pipe(builder.finalize()); } Ok(()) } @@ -276,13 +230,10 @@ impl PipelineBuilder { pub fn build_compact_and_cluster_sort_in_merge_into( &mut self, table: &FuseTable, - is_build_merge_into_append_not_matched: bool, need_match: bool, - enable_right_broadcast: bool, - mid_len: usize, - last_len: usize, + transform_len: usize, + block_thresholds: BlockThresholds, ) -> Result<()> { - let block_thresholds = table.get_block_thresholds(); // we should avoid too much little block write, because for s3 write, there are too many // little blocks, it will cause high latency. let mut builder = self.main_pipeline.add_transform_with_specified_len( @@ -293,21 +244,11 @@ impl PipelineBuilder { BlockCompactor::new(block_thresholds), )?)) }, - mid_len, + transform_len, )?; - - if is_build_merge_into_append_not_matched { - builder.add_items(vec![create_dummy_item()]); - } - if need_match { builder.add_items_prepend(vec![create_dummy_item()]); } - - // need to receive row_number, we should give a dummy item here. - if enable_right_broadcast { - builder.add_items(vec![create_dummy_item()]); - } self.main_pipeline.add_pipe(builder.finalize()); // cluster sort @@ -315,8 +256,8 @@ impl PipelineBuilder { self.ctx.clone(), &mut self.main_pipeline, block_thresholds, - mid_len, - last_len, + transform_len, + need_match, )?; Ok(()) } diff --git a/src/query/service/src/pipelines/builders/builder_merge_into_append_not_matched.rs b/src/query/service/src/pipelines/builders/builder_merge_into_append_not_matched.rs deleted file mode 100644 index 0eafd54f6ec0b..0000000000000 --- a/src/query/service/src/pipelines/builders/builder_merge_into_append_not_matched.rs +++ /dev/null @@ -1,215 +0,0 @@ -// Copyright 2021 Datafuse Labs -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -use std::sync::Arc; - -use databend_common_base::base::tokio::sync::Semaphore; -use databend_common_catalog::table::Table; -use databend_common_exception::Result; -use databend_common_pipeline_core::processors::InputPort; -use databend_common_pipeline_core::processors::OutputPort; -use databend_common_pipeline_core::Pipe; -use databend_common_pipeline_transforms::processors::create_dummy_item; -use databend_common_sql::binder::MergeIntoType; -use databend_common_sql::executor::physical_plans::MergeIntoAppendNotMatched; -use databend_common_sql::executor::physical_plans::MutationKind; -use databend_common_storages_fuse::operations::MergeIntoNotMatchedProcessor; -use databend_common_storages_fuse::operations::RowNumberAndLogSplitProcessor; -use databend_common_storages_fuse::operations::TransformDistributedMergeIntoBlockDeserialize; -use databend_common_storages_fuse::operations::TransformSerializeBlock; -use databend_common_storages_fuse::operations::TransformSerializeSegment; -use databend_common_storages_fuse::FuseTable; - -use crate::pipelines::processors::transforms::ExtractHashTableByRowNumber; -use crate::pipelines::processors::DeduplicateRowNumber; -use crate::pipelines::PipelineBuilder; - -impl PipelineBuilder { - // build merge into append not matched pipeline. - // it must be distributed merge into execution - pub(crate) fn build_merge_into_append_not_matched( - &mut self, - merge_into_append_not_macted: &MergeIntoAppendNotMatched, - ) -> Result<()> { - self.build_pipeline(&merge_into_append_not_macted.input)?; - // there are two cases: - // 1. if source is build side (change_join_order = false). - // receive row numbers and MutationLogs, exactly below: - // 1.1 full operation: MutationLogs and row numbers - // 1.2 matched only: MutationLogs - // 1.3 insert only: row numbers - // 2. if target table is build side (change_join_order = true). - // receive rowids and MutationLogs,exactly below: - // 2.1 full operation: rowids and MutationLogs - // 2.2 matched only: rowids and MutationLogs - // 2.3 insert only: MutationLogs - - // deserialize MixRowIdKindAndLog - if merge_into_append_not_macted.change_join_order { - self.main_pipeline - .add_transform(|transform_input_port, transform_output_port| { - Ok(TransformDistributedMergeIntoBlockDeserialize::create( - transform_input_port, - transform_output_port, - )) - })?; - } - - let tbl = self - .ctx - .build_table_by_table_info(&merge_into_append_not_macted.table_info, None)?; - let table = FuseTable::try_from_table(tbl.as_ref())?; - let block_thresholds = table.get_block_thresholds(); - let cluster_stats_gen = - table.get_cluster_stats_gen(self.ctx.clone(), 0, block_thresholds, None)?; - - // case 1 - if !merge_into_append_not_macted.change_join_order { - if matches!( - merge_into_append_not_macted.merge_type, - MergeIntoType::MatchedOnly - ) { - // we will receive MutationLogs only without row_number. - return Ok(()); - } - assert!(self.join_state.is_some()); - assert!(self.merge_into_probe_data_fields.is_some()); - self.main_pipeline.resize(1, false)?; - let join_state = self.join_state.clone().unwrap(); - // split row_number and log - // output_port_row_number - // output_port_log - self.main_pipeline - .add_pipe(RowNumberAndLogSplitProcessor::create()?.into_pipe()); - - // accumulate source data which is not matched from hashstate - let pipe_items = vec![ - DeduplicateRowNumber::create()?.into_pipe_item(), - create_dummy_item(), - ]; - self.main_pipeline.add_pipe(Pipe::create(2, 2, pipe_items)); - - let pipe_items = vec![ - ExtractHashTableByRowNumber::create( - join_state, - self.merge_into_probe_data_fields.clone().unwrap(), - merge_into_append_not_macted.merge_type.clone(), - )? - .into_pipe_item(), - create_dummy_item(), - ]; - self.main_pipeline.add_pipe(Pipe::create(2, 2, pipe_items)); - - // not matched operation - let merge_into_not_matched_processor = MergeIntoNotMatchedProcessor::create( - merge_into_append_not_macted.unmatched.clone(), - merge_into_append_not_macted.input_schema.clone(), - self.func_ctx.clone(), - self.ctx.clone(), - )?; - let pipe_items = vec![ - merge_into_not_matched_processor.into_pipe_item(), - create_dummy_item(), - ]; - self.main_pipeline.add_pipe(Pipe::create(2, 2, pipe_items)); - - // split row_number and log - // output_port_not_matched_data - // output_port_log - // start to append data - // 1. fill default and computed columns - self.build_fill_columns_in_merge_into( - tbl.clone(), - 1, - true, - false, - false, - false, - merge_into_append_not_macted.unmatched.clone(), - )?; - - // 2. compact blocks and cluster sort - self.build_compact_and_cluster_sort_in_merge_into(table, true, false, false, 1, 1)?; - - // 3. serialize block - let serialize_block_transform = TransformSerializeBlock::try_create( - self.ctx.clone(), - InputPort::create(), - OutputPort::create(), - table, - cluster_stats_gen.clone(), - MutationKind::MergeInto, - )?; - - let pipe_items = vec![ - serialize_block_transform.into_pipe_item(), - create_dummy_item(), - ]; - self.main_pipeline.add_pipe(Pipe::create(2, 2, pipe_items)); - - // 4. serialize segment - let serialize_segment_transform = TransformSerializeSegment::new( - InputPort::create(), - OutputPort::create(), - table, - block_thresholds, - ); - let pipe_items = vec![ - serialize_segment_transform.into_pipe_item(), - create_dummy_item(), - ]; - self.main_pipeline.add_pipe(Pipe::create(2, 2, pipe_items)); - - // resize to one, because they are all mutation logs now. - self.main_pipeline.try_resize(1)?; - } else { - // case 2 - if matches!( - merge_into_append_not_macted.merge_type, - MergeIntoType::InsertOnly - ) { - // we will receive MutationLogs only without rowids. - return Ok(()); - } - self.main_pipeline.resize(1, false)?; - // we will receive MutationLogs and rowids. So we should apply - // rowids firstly and then send all mutation logs to commit sink. - // we need to spilt rowid and mutationlogs, and we can get pipeitems: - // 1.row_id port - // 2.logs port - self.main_pipeline - .add_pipe(RowNumberAndLogSplitProcessor::create()?.into_pipe()); - - let max_threads = self.settings.get_max_threads()?; - let io_request_semaphore = Arc::new(Semaphore::new(max_threads as usize)); - // MutationsLogs port0 - // MutationsLogs port1 - assert_eq!(self.main_pipeline.output_len(), 2); - self.main_pipeline.add_pipe(Pipe::create(2, 2, vec![ - table.rowid_aggregate_mutator( - self.ctx.clone(), - cluster_stats_gen, - io_request_semaphore, - merge_into_append_not_macted.segments.clone(), - false, // we don't support for distributed mode. - )?, - create_dummy_item(), - ])); - assert_eq!(self.main_pipeline.output_len(), 2); - self.main_pipeline.try_resize(1)?; - } - - Ok(()) - } -} diff --git a/src/query/service/src/pipelines/builders/builder_merge_into_manipulate.rs b/src/query/service/src/pipelines/builders/builder_merge_into_manipulate.rs index 9ae56a6f757b4..c381fc503489a 100644 --- a/src/query/service/src/pipelines/builders/builder_merge_into_manipulate.rs +++ b/src/query/service/src/pipelines/builders/builder_merge_into_manipulate.rs @@ -16,14 +16,8 @@ use std::sync::Arc; use databend_common_exception::Result; use databend_common_expression::DataSchema; -use databend_common_pipeline_core::processors::InputPort; -use databend_common_pipeline_core::processors::OutputPort; -use databend_common_pipeline_core::processors::ProcessorPtr; use databend_common_pipeline_core::Pipe; -use databend_common_pipeline_core::PipeItem; use databend_common_sql::binder::MergeIntoType; -use databend_common_sql::evaluator::BlockOperator; -use databend_common_sql::evaluator::CompoundBlockOperator; use databend_common_sql::executor::physical_plans::MergeIntoManipulate; use databend_common_storages_fuse::operations::MatchedSplitProcessor; use databend_common_storages_fuse::operations::MergeIntoNotMatchedProcessor; @@ -85,47 +79,17 @@ impl PipelineBuilder { } if need_unmatch { - // If merge into doesn't contain right broadcast join, execute insert in local. - if !merge_into_manipulate.enable_right_broadcast { - let merge_into_not_matched_processor = MergeIntoNotMatchedProcessor::create( - merge_into_manipulate.unmatched.clone(), - merge_into_manipulate.unmatched_schema.clone(), - self.func_ctx.clone(), - self.ctx.clone(), - )?; - pipe_items.push(merge_into_not_matched_processor.into_pipe_item()); - } else { - let input_num_columns = input_schema.num_fields(); - debug_assert!( - merge_into_manipulate.source_row_id_idx.is_some() - || merge_into_manipulate.source_row_number_idx.is_some() - ); - let idx = merge_into_manipulate - .source_row_id_idx - .unwrap_or_else(|| merge_into_manipulate.source_row_number_idx.unwrap()); - let input_port = InputPort::create(); - let output_port = OutputPort::create(); - // project row number column - let proc = ProcessorPtr::create(CompoundBlockOperator::create( - input_port.clone(), - output_port.clone(), - input_num_columns, - self.func_ctx.clone(), - vec![BlockOperator::Project { - projection: vec![idx], - }], - )); - pipe_items.push(PipeItem { - processor: proc, - inputs_port: vec![input_port], - outputs_port: vec![output_port], - }) - }; + let merge_into_not_matched_processor = MergeIntoNotMatchedProcessor::create( + merge_into_manipulate.unmatched.clone(), + merge_into_manipulate.unmatched_schema.clone(), + self.func_ctx.clone(), + self.ctx.clone(), + )?; + pipe_items.push(merge_into_not_matched_processor.into_pipe_item()); } } let output_len = pipe_items.iter().map(|item| item.outputs_port.len()).sum(); - self.main_pipeline.add_pipe(Pipe::create( self.main_pipeline.output_len(), output_len, diff --git a/src/query/service/src/pipelines/builders/builder_merge_into_organize.rs b/src/query/service/src/pipelines/builders/builder_merge_into_organize.rs index 04a5cde6fa9d6..5754fa60d680e 100644 --- a/src/query/service/src/pipelines/builders/builder_merge_into_organize.rs +++ b/src/query/service/src/pipelines/builders/builder_merge_into_organize.rs @@ -13,7 +13,7 @@ // limitations under the License. use databend_common_exception::Result; -use databend_common_sql::executor::physical_plans::MergeIntoOp; +use databend_common_sql::binder::MergeIntoType; use databend_common_sql::executor::physical_plans::MergeIntoOrganize; use crate::pipelines::PipelineBuilder; @@ -26,7 +26,8 @@ impl PipelineBuilder { ) -> Result<()> { self.build_pipeline(&merge_into_organize.input)?; - // ------------------------------Standalone------------------------------------------------- + // The complete pipeline: + // ----------------------------------------------------------------------------------------- // row_id port0_1 row_id port0_1 row_id port0_1 // matched data port0_2 ..... row_id port1_1 row_id port // unmatched port0_3 data port0_2 ...... @@ -36,40 +37,12 @@ impl PipelineBuilder { // ...... ..... // ----------------------------------------------------------------------------------------- // 1. matched only or complete pipeline are same with above - // 2. for unmatched only/insert only, there are no row_id port - - // ---------------------Distributed(change_join_order = false)------------ - // row_id port0_1 row_id port0_1 row_id port - // matched data port0_2 row_id port1_1 matched data port0_2 - // row_number port0_3 matched data port0_2 matched data port1_2 - // row_id port1_1 matched data port1_2 ...... - // matched data port1_2 ===> ..... ====> ...... - // row_number port1_3 ..... ...... - // row_number port0_3 row_number port - // ...... row_number port1_3 - // ...... ..... - // ---------------------------------------------------------------------- - // 1.for matched only, there is no row_number port - // 2.for unmatched only/insert only, there is no row_id port and matched data port - - // ---------------------Distributed(change_join_order = true)------------ - // row_id port0_1 row_id port0_1 row_id port - // matched data port0_2 row_id port1_1 matched data port0_2 - // unmatched port0_3 matched data port0_2 matched data port1_2 - // row_id port1_1 matched data port1_2 ...... - // matched data port1_2 ===> ..... ====> ...... - // unmatched port1_3 ..... ...... - // unmatched port0_3 unmatched port - // ...... unmatched port1_3 - // ...... ..... - // ---------------------------------------------------------------------- - // 1.for matched only, there is no unmatched port - // 2.for unmatched only/insert only, there is no row_id port and matched data port + // 2. for unmatched only, there are no row_id port let mut ranges = Vec::with_capacity(self.main_pipeline.output_len()); let mut rules = Vec::with_capacity(self.main_pipeline.output_len()); - match merge_into_organize.merge_into_op { - MergeIntoOp::StandaloneFullOperation => { + match merge_into_organize.merge_type { + MergeIntoType::FullOperation => { assert_eq!(self.main_pipeline.output_len() % 3, 0); // merge matched update ports and not matched ports ===> data ports for idx in (0..self.main_pipeline.output_len()).step_by(3) { @@ -86,27 +59,8 @@ impl PipelineBuilder { self.main_pipeline.reorder_inputs(rules); self.resize_row_id(2)?; } - MergeIntoOp::StandaloneMatchedOnly => { - let row_id_len = self.main_pipeline.output_len() / 2; - for idx in 0..row_id_len { - rules.push(idx); - rules.push(idx + row_id_len); - } - self.main_pipeline.reorder_inputs(rules); - self.resize_row_id(2)?; - } - MergeIntoOp::StandaloneInsertOnly => {} - MergeIntoOp::DistributedFullOperation => { - let row_id_len = self.main_pipeline.output_len() / 3; - for idx in 0..row_id_len { - rules.push(idx); - rules.push(idx + row_id_len); - rules.push(idx + row_id_len * 2); - } - self.main_pipeline.reorder_inputs(rules); - self.resize_row_id(3)?; - } - MergeIntoOp::DistributedMatchedOnly => { + MergeIntoType::MatchedOnly => { + assert_eq!(self.main_pipeline.output_len() % 2, 0); let row_id_len = self.main_pipeline.output_len() / 2; for idx in 0..row_id_len { rules.push(idx); @@ -115,10 +69,7 @@ impl PipelineBuilder { self.main_pipeline.reorder_inputs(rules); self.resize_row_id(2)?; } - MergeIntoOp::DistributedInsertOnly => { - // insert-only, there are only row_number ports/unmatched ports - self.main_pipeline.try_resize(1)?; - } + MergeIntoType::InsertOnly => {} } Ok(()) } @@ -133,21 +84,11 @@ impl PipelineBuilder { } ranges.push(vec.clone()); - // Standalone: data port(matched update port and unmatched port) - // Distributed: matched update port + // data ports for idx in 0..row_id_len { ranges.push(vec![idx + row_id_len]); } - // Distributed: need to resize row_number port/unmatched data port. - if step == 3 { - vec.clear(); - for idx in 0..row_id_len { - vec.push(idx + row_id_len * 2); - } - ranges.push(vec); - } - self.main_pipeline.resize_partial_one(ranges.clone()) } } diff --git a/src/query/service/src/pipelines/builders/builder_merge_into_serialize.rs b/src/query/service/src/pipelines/builders/builder_merge_into_serialize.rs deleted file mode 100644 index 55ff086894c11..0000000000000 --- a/src/query/service/src/pipelines/builders/builder_merge_into_serialize.rs +++ /dev/null @@ -1,127 +0,0 @@ -// Copyright 2021 Datafuse Labs -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -use std::sync::Arc; - -use databend_common_base::base::tokio::sync::Semaphore; -use databend_common_catalog::table::Table; -use databend_common_exception::Result; -use databend_common_pipeline_core::processors::InputPort; -use databend_common_pipeline_core::processors::OutputPort; -use databend_common_pipeline_core::Pipe; -use databend_common_pipeline_transforms::processors::create_dummy_item; -use databend_common_sql::executor::physical_plans::MergeIntoSerialize; -use databend_common_sql::executor::physical_plans::MutationKind; -use databend_common_storages_fuse::operations::TransformSerializeBlock; -use databend_common_storages_fuse::FuseTable; - -use crate::pipelines::PipelineBuilder; - -impl PipelineBuilder { - pub(crate) fn build_merge_into_serialize( - &mut self, - merge_into_serialize: &MergeIntoSerialize, - ) -> Result<()> { - self.build_pipeline(&merge_into_serialize.input)?; - - // For row_id port, if (distributed && !enable_right_broadcast) create dummy; else create rowid_aggregate_mutator - // For matched data port and unmatched port, do serialize - // For row_number port, create dummy - - let tbl = self - .ctx - .build_table_by_table_info(&merge_into_serialize.table_info, None)?; - - let table = FuseTable::try_from_table(tbl.as_ref())?; - let block_thresholds = table.get_block_thresholds(); - - let cluster_stats_gen = - table.get_cluster_stats_gen(self.ctx.clone(), 0, block_thresholds, None)?; - - let (serialize_len, row_number_len) = merge_into_serialize - .merge_into_op - .get_serialize_and_row_number_len( - self.main_pipeline.output_len(), - merge_into_serialize.enable_right_broadcast, - ); - - // 1. Fill default and computed columns - self.build_fill_columns_in_merge_into( - tbl.clone(), - serialize_len, - false, - merge_into_serialize.distributed, - merge_into_serialize.need_match, - merge_into_serialize.enable_right_broadcast, - merge_into_serialize.unmatched.clone(), - )?; - - // 2. Add cluster‘s blocksort if it's a cluster table - self.build_compact_and_cluster_sort_in_merge_into( - table, - false, - merge_into_serialize.need_match, - merge_into_serialize.enable_right_broadcast, - serialize_len, - row_number_len, - )?; - - let max_threads = self.settings.get_max_threads()?; - let io_request_semaphore = Arc::new(Semaphore::new(max_threads as usize)); - - let mut pipe_items = Vec::with_capacity(self.main_pipeline.output_len()); - - // 3.1 Add rowid_aggregate_mutator for row_id port - if merge_into_serialize.need_match { - // rowid should be accumulated in main node. - if merge_into_serialize.distributed && merge_into_serialize.change_join_order { - pipe_items.push(create_dummy_item()) - } else { - pipe_items.push(table.rowid_aggregate_mutator( - self.ctx.clone(), - cluster_stats_gen.clone(), - io_request_semaphore, - merge_into_serialize.segments.clone(), - false, - )?); - } - } - - // 3.2 Add serialize_block_transform for data port - for _ in 0..serialize_len { - let serialize_block_transform = TransformSerializeBlock::try_create( - self.ctx.clone(), - InputPort::create(), - OutputPort::create(), - table, - cluster_stats_gen.clone(), - MutationKind::MergeInto, - )?; - pipe_items.push(serialize_block_transform.into_pipe_item()); - } - - // 3.3 Add dummy port for row_number - if merge_into_serialize.enable_right_broadcast { - pipe_items.push(create_dummy_item()); - } - - let output_len = pipe_items.iter().map(|item| item.outputs_port.len()).sum(); - self.main_pipeline.add_pipe(Pipe::create( - self.main_pipeline.output_len(), - output_len, - pipe_items, - )); - Ok(()) - } -} diff --git a/src/query/service/src/pipelines/builders/builder_row_fetch.rs b/src/query/service/src/pipelines/builders/builder_row_fetch.rs index 67245a521ae0a..946c59c6b633b 100644 --- a/src/query/service/src/pipelines/builders/builder_row_fetch.rs +++ b/src/query/service/src/pipelines/builders/builder_row_fetch.rs @@ -28,7 +28,10 @@ impl PipelineBuilder { pub(crate) fn build_row_fetch(&mut self, row_fetch: &RowFetch) -> Result<()> { debug_assert!(matches!( &*row_fetch.input, - PhysicalPlan::Limit(_) | PhysicalPlan::HashJoin(_) | PhysicalPlan::MergeIntoSplit(_) + PhysicalPlan::Limit(_) + | PhysicalPlan::HashJoin(_) + | PhysicalPlan::MergeIntoSplit(_) + | PhysicalPlan::ExchangeSource(_) )); self.build_pipeline(&row_fetch.input)?; let processor = row_fetch_processor( diff --git a/src/query/service/src/pipelines/builders/mod.rs b/src/query/service/src/pipelines/builders/mod.rs index e89c7d2d25bd1..19d2b88219c7f 100644 --- a/src/query/service/src/pipelines/builders/mod.rs +++ b/src/query/service/src/pipelines/builders/mod.rs @@ -28,10 +28,8 @@ mod builder_insert_multi_table; mod builder_join; mod builder_limit; mod builder_merge_into; -mod builder_merge_into_append_not_matched; mod builder_merge_into_manipulate; mod builder_merge_into_organize; -mod builder_merge_into_serialize; mod builder_merge_into_split; mod builder_on_finished; mod builder_project; diff --git a/src/query/service/src/pipelines/pipeline_builder.rs b/src/query/service/src/pipelines/pipeline_builder.rs index 743dada57775c..33c04b6fe8958 100644 --- a/src/query/service/src/pipelines/pipeline_builder.rs +++ b/src/query/service/src/pipelines/pipeline_builder.rs @@ -24,7 +24,7 @@ use databend_common_pipeline_core::processors::PlanScope; use databend_common_pipeline_core::processors::PlanScopeGuard; use databend_common_pipeline_core::Pipeline; use databend_common_settings::Settings; -use databend_common_sql::executor::physical_plans::MergeIntoOp; +use databend_common_sql::binder::MergeIntoType; use databend_common_sql::executor::PhysicalPlan; use databend_common_sql::IndexType; @@ -108,14 +108,7 @@ impl PipelineBuilder { pub(crate) fn add_plan_scope(&mut self, plan: &PhysicalPlan) -> Result> { match plan { PhysicalPlan::EvalScalar(v) if v.exprs.is_empty() => Ok(None), - PhysicalPlan::MergeInto(v) - if !matches!( - v.merge_into_op, - MergeIntoOp::DistributedFullOperation | MergeIntoOp::StandaloneFullOperation - ) => - { - Ok(None) - } + PhysicalPlan::MergeInto(v) if v.merge_type != MergeIntoType::FullOperation => Ok(None), // hided plans in profile PhysicalPlan::Shuffle(_) => Ok(None), @@ -195,12 +188,6 @@ impl PipelineBuilder { // Merge into. PhysicalPlan::MergeInto(merge_into) => self.build_merge_into(merge_into), - PhysicalPlan::MergeIntoAppendNotMatched(merge_into_append_not_matched) => { - self.build_merge_into_append_not_matched(merge_into_append_not_matched) - } - PhysicalPlan::MergeIntoAddRowNumber(merge_into_add_row_number) => { - self.build_merge_into_add_row_number(merge_into_add_row_number) - } PhysicalPlan::MergeIntoSplit(merge_into_split) => { self.build_merge_into_split(merge_into_split) } @@ -210,9 +197,6 @@ impl PipelineBuilder { PhysicalPlan::MergeIntoOrganize(merge_into_organize) => { self.build_merge_into_organize(merge_into_organize) } - PhysicalPlan::MergeIntoSerialize(merge_into_serialize) => { - self.build_merge_into_serialize(merge_into_serialize) - } // Commit. PhysicalPlan::CommitSink(plan) => self.build_commit_sink(plan), diff --git a/src/query/service/src/pipelines/processors/mod.rs b/src/query/service/src/pipelines/processors/mod.rs index 8e9fdc2411d38..1d924d948156e 100644 --- a/src/query/service/src/pipelines/processors/mod.rs +++ b/src/query/service/src/pipelines/processors/mod.rs @@ -15,7 +15,6 @@ pub use databend_common_pipeline_core::processors::*; pub(crate) mod transforms; -pub use transforms::DeduplicateRowNumber; pub use transforms::HashJoinBuildState; pub use transforms::HashJoinDesc; pub use transforms::HashJoinState; diff --git a/src/query/service/src/pipelines/processors/transforms/mod.rs b/src/query/service/src/pipelines/processors/transforms/mod.rs index 01ca9bc39271d..f8f19ac7b67ea 100644 --- a/src/query/service/src/pipelines/processors/transforms/mod.rs +++ b/src/query/service/src/pipelines/processors/transforms/mod.rs @@ -15,9 +15,6 @@ pub mod aggregator; pub mod group_by; mod hash_join; -mod processor_accumulate_row_number; -mod processor_deduplicate_row_number; -mod processor_extract_hash_table_by_row_number; pub(crate) mod range_join; mod transform_add_computed_columns; mod transform_add_const_columns; @@ -44,9 +41,6 @@ mod transform_udf_server; mod window; pub use hash_join::*; -pub use processor_accumulate_row_number::AccumulateRowNumber; -pub use processor_deduplicate_row_number::DeduplicateRowNumber; -pub use processor_extract_hash_table_by_row_number::ExtractHashTableByRowNumber; pub use transform_add_computed_columns::TransformAddComputedColumns; pub use transform_add_const_columns::TransformAddConstColumns; pub use transform_add_internal_columns::TransformAddInternalColumns; diff --git a/src/query/service/src/pipelines/processors/transforms/processor_accumulate_row_number.rs b/src/query/service/src/pipelines/processors/transforms/processor_accumulate_row_number.rs deleted file mode 100644 index 1f0dbcc35bca8..0000000000000 --- a/src/query/service/src/pipelines/processors/transforms/processor_accumulate_row_number.rs +++ /dev/null @@ -1,103 +0,0 @@ -// Copyright 2021 Datafuse Labs -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -use databend_common_exception::Result; -use databend_common_expression::types::DataType; -use databend_common_expression::types::NumberDataType; -use databend_common_expression::DataBlock; -use databend_common_pipeline_core::processors::InputPort; -use databend_common_pipeline_core::processors::OutputPort; -use databend_common_pipeline_core::processors::ProcessorPtr; -use databend_common_pipeline_core::PipeItem; -use databend_common_pipeline_transforms::processors::AsyncAccumulatingTransform; -use databend_common_pipeline_transforms::processors::AsyncAccumulatingTransformer; -use databend_common_storages_fuse::operations::SourceFullMatched; -use log::info; - -pub struct AccumulateRowNumber { - data_blocks: Vec, -} - -#[async_trait::async_trait] -impl AsyncAccumulatingTransform for AccumulateRowNumber { - const NAME: &'static str = "AccumulateRowNumber"; - - #[async_backtrace::framed] - async fn transform(&mut self, data: DataBlock) -> Result> { - self.accumulate(data).await?; - // no partial output - Ok(None) - } - - #[async_backtrace::framed] - async fn on_finish(&mut self, _output: bool) -> Result> { - self.apply().await - } -} - -impl AccumulateRowNumber { - #[async_backtrace::framed] - pub async fn accumulate(&mut self, data_block: DataBlock) -> Result<()> { - info!( - "accept a block, num_rows:{:?},num_columns:{:?}", - data_block.num_rows(), - data_block.num_columns(), - ); - // if matched all source data, we will get an empty block, but which - // has source join schema,not only row_number,for compound_block project, - // it will do nothing for empty block. - if !data_block.is_empty() { - assert_eq!(data_block.num_columns(), 1); - assert_eq!( - data_block.get_by_offset(0).data_type, - DataType::Number(NumberDataType::UInt64) - ); - self.data_blocks.push(data_block); - } - Ok(()) - } - - #[async_backtrace::framed] - pub async fn apply(&mut self) -> Result> { - // for distributed execution, if it's insert-only - // merge into , we use right anti join.if all source - // data is matched, we can't get any block. - if self.data_blocks.is_empty() { - return Ok(Some(DataBlock::empty_with_meta(Box::new( - SourceFullMatched, - )))); - } - - // row_numbers is small, so concat is ok. - Ok(Some(DataBlock::concat(&self.data_blocks)?)) - } -} - -impl AccumulateRowNumber { - pub fn create() -> Result { - Ok(Self { - data_blocks: Vec::with_capacity(10), - }) - } - - pub fn into_pipe_item(self) -> PipeItem { - let input = InputPort::create(); - let output = OutputPort::create(); - let processor_ptr = - AsyncAccumulatingTransformer::create(input.clone(), output.clone(), self); - PipeItem::create(ProcessorPtr::create(processor_ptr), vec![input], vec![ - output, - ]) - } -} diff --git a/src/query/service/src/pipelines/processors/transforms/processor_deduplicate_row_number.rs b/src/query/service/src/pipelines/processors/transforms/processor_deduplicate_row_number.rs deleted file mode 100644 index 238d7e232b483..0000000000000 --- a/src/query/service/src/pipelines/processors/transforms/processor_deduplicate_row_number.rs +++ /dev/null @@ -1,140 +0,0 @@ -// Copyright 2021 Datafuse Labs -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -use std::collections::HashSet; - -use databend_common_arrow::arrow::buffer::Buffer; -use databend_common_exception::Result; -use databend_common_expression::types::DataType; -use databend_common_expression::types::NumberDataType; -use databend_common_expression::types::UInt64Type; -use databend_common_expression::DataBlock; -use databend_common_expression::FromData; -use databend_common_expression::Value; -use databend_common_metrics::storage::*; -use databend_common_pipeline_core::processors::InputPort; -use databend_common_pipeline_core::processors::OutputPort; -use databend_common_pipeline_core::processors::ProcessorPtr; -use databend_common_pipeline_core::PipeItem; -use databend_common_pipeline_transforms::processors::AsyncAccumulatingTransform; -use databend_common_pipeline_transforms::processors::AsyncAccumulatingTransformer; -use itertools::Itertools; -use log::info; - -pub struct DeduplicateRowNumber { - unique_row_number: HashSet, - accepted_data: bool, -} - -#[async_trait::async_trait] -impl AsyncAccumulatingTransform for DeduplicateRowNumber { - const NAME: &'static str = "DeduplicateRowNumber"; - - #[async_backtrace::framed] - async fn transform(&mut self, data: DataBlock) -> Result> { - self.accumulate(data).await?; - // no partial output - Ok(None) - } - - #[async_backtrace::framed] - async fn on_finish(&mut self, _output: bool) -> Result> { - if self.unique_row_number.is_empty() { - Ok(Some(DataBlock::empty())) - } else { - self.apply().await - } - } -} - -impl DeduplicateRowNumber { - #[async_backtrace::framed] - pub async fn accumulate(&mut self, data_block: DataBlock) -> Result<()> { - // warning!!!: if all source data is matched, will - // we receive a empty block as expected? the answer is yes. - // but if there is still also some data unmatched, we won't receive - // an empty block. - if data_block.is_empty() { - merge_into_distributed_empty_row_number(1); - self.unique_row_number.clear(); - self.accepted_data = true; - return Ok(()); - } - - let row_number_vec = get_row_number(&data_block, 0); - merge_into_distributed_deduplicate_row_number(data_block.num_rows() as u32); - if !self.accepted_data { - self.unique_row_number = row_number_vec.into_iter().collect(); - merge_into_distributed_init_unique_number(self.unique_row_number.len() as u32); - info!( - "init unique_row_number_len:{}", - self.unique_row_number.len(), - ); - self.accepted_data = true; - return Ok(()); - } - - let mut new_set = HashSet::with_capacity(self.unique_row_number.len()); - for number in row_number_vec { - if self.unique_row_number.contains(&number) { - new_set.insert(number); - } - } - merge_into_distributed_new_set_len(new_set.len() as u32); - info!("init new_set_len:{}", new_set.len()); - self.unique_row_number = new_set; - Ok(()) - } - - #[async_backtrace::framed] - pub async fn apply(&mut self) -> Result> { - let row_number_vecs = self.unique_row_number.clone().into_iter().collect_vec(); - merge_into_distributed_apply_row_number(row_number_vecs.len() as u32); - Ok(Some(DataBlock::new_from_columns(vec![ - UInt64Type::from_data(row_number_vecs), - ]))) - } -} - -pub(crate) fn get_row_number(data_block: &DataBlock, row_number_idx: usize) -> Buffer { - let row_number_col = data_block.get_by_offset(row_number_idx); - assert_eq!( - row_number_col.data_type, - DataType::Number(NumberDataType::UInt64) - ); - let value = row_number_col.value.try_downcast::().unwrap(); - match value { - Value::Scalar(scalar) => Buffer::from(vec![scalar]), - Value::Column(column) => column, - } -} - -impl DeduplicateRowNumber { - pub fn create() -> Result { - Ok(Self { - unique_row_number: HashSet::new(), - accepted_data: false, - }) - } - - pub fn into_pipe_item(self) -> PipeItem { - let input = InputPort::create(); - let output = OutputPort::create(); - let processor_ptr = - AsyncAccumulatingTransformer::create(input.clone(), output.clone(), self); - PipeItem::create(ProcessorPtr::create(processor_ptr), vec![input], vec![ - output, - ]) - } -} diff --git a/src/query/service/src/pipelines/processors/transforms/processor_extract_hash_table_by_row_number.rs b/src/query/service/src/pipelines/processors/transforms/processor_extract_hash_table_by_row_number.rs deleted file mode 100644 index 2a98320aa8a0b..0000000000000 --- a/src/query/service/src/pipelines/processors/transforms/processor_extract_hash_table_by_row_number.rs +++ /dev/null @@ -1,183 +0,0 @@ -// Copyright 2021 Datafuse Labs -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -use std::any::Any; -use std::collections::HashSet; -use std::sync::Arc; - -use databend_common_arrow::arrow::bitmap::MutableBitmap; -use databend_common_exception::Result; -use databend_common_expression::types::DataType; -use databend_common_expression::types::NumberDataType; -use databend_common_expression::BlockEntry; -use databend_common_expression::DataBlock; -use databend_common_expression::DataField; -use databend_common_expression::Scalar; -use databend_common_expression::Value; -use databend_common_metrics::storage::*; -use databend_common_pipeline_core::processors::Event; -use databend_common_pipeline_core::processors::InputPort; -use databend_common_pipeline_core::processors::OutputPort; -use databend_common_pipeline_core::processors::Processor; -use databend_common_pipeline_core::processors::ProcessorPtr; -use databend_common_pipeline_core::PipeItem; -use databend_common_sql::binder::MergeIntoType; - -use super::hash_join::HashJoinBuildState; -use super::processor_deduplicate_row_number::get_row_number; -pub struct ExtractHashTableByRowNumber { - input_port: Arc, - output_port: Arc, - input_data: Option, - output_data: Vec, - merge_into_probe_data_fields: Vec, - hashstate: Arc, - // if insert only, we don't need to - // fill null BlockEntries - merge_type: MergeIntoType, -} - -impl ExtractHashTableByRowNumber { - pub fn create( - hashstate: Arc, - merge_into_probe_data_fields: Vec, - merge_type: MergeIntoType, - ) -> Result { - Ok(Self { - input_port: InputPort::create(), - output_port: OutputPort::create(), - hashstate, - merge_into_probe_data_fields, - input_data: None, - output_data: Vec::new(), - merge_type, - }) - } - - pub fn into_pipe_item(self) -> PipeItem { - let input = self.input_port.clone(); - let output_port = self.output_port.clone(); - let processor_ptr = ProcessorPtr::create(Box::new(self)); - PipeItem::create(processor_ptr, vec![input], vec![output_port]) - } -} - -impl Processor for ExtractHashTableByRowNumber { - fn name(&self) -> String { - "ExtractHashTableByRowNumber".to_owned() - } - - #[doc = " Reference used for downcast."] - fn as_any(&mut self) -> &mut dyn Any { - self - } - - fn event(&mut self) -> Result { - let finished = self.input_port.is_finished() && self.output_data.is_empty(); - if finished { - self.output_port.finish(); - return Ok(Event::Finished); - } - - let mut pushed_something = false; - - if self.output_port.can_push() && !self.output_data.is_empty() { - self.output_port - .push_data(Ok(self.output_data.pop().unwrap())); - pushed_something = true - } - - if pushed_something { - return Ok(Event::NeedConsume); - } - - if self.input_port.has_data() { - if self.output_data.is_empty() { - self.input_data = Some(self.input_port.pull_data().unwrap()?); - Ok(Event::Sync) - } else { - Ok(Event::NeedConsume) - } - } else { - self.input_port.set_need_data(); - Ok(Event::NeedData) - } - } - - fn process(&mut self) -> Result<()> { - if let Some(data_block) = self.input_data.take() { - if data_block.is_empty() { - merge_into_distributed_hashtable_empty_block(1); - return Ok(()); - } - - merge_into_distributed_hashtable_fetch_row_number(data_block.num_rows() as u32); - let row_number_vec = get_row_number(&data_block, 0); - let length = row_number_vec.len(); - let row_number_set: HashSet = row_number_vec.into_iter().collect(); - assert_eq!(row_number_set.len(), length); - - // get datablocks from hashstate. - unsafe { - let build_state = &*self.hashstate.hash_join_state.build_state.get(); - for block in build_state.generation_state.chunks.iter() { - assert_eq!( - block.columns()[block.num_columns() - 1].data_type, - DataType::Number(NumberDataType::UInt64) - ); - let row_numbers = get_row_number(block, block.num_columns() - 1); - let mut bitmap = MutableBitmap::with_capacity(row_numbers.len()); - for row_number in row_numbers.iter() { - if row_number_set.contains(row_number) { - bitmap.push(true); - } else { - bitmap.push(false); - } - } - let filtered_block = block.clone().filter_with_bitmap(&bitmap.into())?; - let res_block = if let MergeIntoType::InsertOnly = self.merge_type { - filtered_block - } else { - // Create null chunk for unmatched rows in probe side - let mut null_block = DataBlock::new( - self.merge_into_probe_data_fields - .iter() - .map(|df| { - BlockEntry::new( - df.data_type().clone(), - Value::Scalar(Scalar::Null), - ) - }) - .collect(), - filtered_block.num_rows(), - ); - null_block.merge_block(filtered_block); - null_block - }; - - if res_block.is_empty() { - merge_into_distributed_hashtable_push_empty_null_block(1); - } else { - merge_into_distributed_hashtable_push_null_block(1); - merge_into_distributed_hashtable_push_null_block_rows( - res_block.num_rows() as u32 - ); - } - self.output_data.push(res_block); - } - } - } - Ok(()) - } -} diff --git a/src/query/service/src/pipelines/processors/transforms/transform_recursive_cte_source.rs b/src/query/service/src/pipelines/processors/transforms/transform_recursive_cte_source.rs index 4d638b84981ca..5514635261a50 100644 --- a/src/query/service/src/pipelines/processors/transforms/transform_recursive_cte_source.rs +++ b/src/query/service/src/pipelines/processors/transforms/transform_recursive_cte_source.rs @@ -331,12 +331,9 @@ async fn create_memory_table_for_cte_scan( | PhysicalPlan::ReplaceDeduplicate(_) | PhysicalPlan::ReplaceInto(_) | PhysicalPlan::MergeInto(_) - | PhysicalPlan::MergeIntoAppendNotMatched(_) - | PhysicalPlan::MergeIntoAddRowNumber(_) | PhysicalPlan::MergeIntoSplit(_) | PhysicalPlan::MergeIntoManipulate(_) | PhysicalPlan::MergeIntoOrganize(_) - | PhysicalPlan::MergeIntoSerialize(_) | PhysicalPlan::CompactSource(_) | PhysicalPlan::CommitSink(_) | PhysicalPlan::ReclusterSource(_) diff --git a/src/query/service/src/servers/flight/v1/exchange/exchange_injector.rs b/src/query/service/src/servers/flight/v1/exchange/exchange_injector.rs index a8e9a4394cf73..4aa65ba175a83 100644 --- a/src/query/service/src/servers/flight/v1/exchange/exchange_injector.rs +++ b/src/query/service/src/servers/flight/v1/exchange/exchange_injector.rs @@ -86,11 +86,20 @@ impl ExchangeInjector for DefaultExchangeInjector { DataExchange::Broadcast(exchange) => Box::new(BroadcastFlightScatter::try_create( exchange.destination_ids.len(), )?), - DataExchange::ShuffleDataExchange(exchange) => HashFlightScatter::try_create( - ctx.get_function_context()?, - exchange.shuffle_keys.clone(), - exchange.destination_ids.len(), - )?, + DataExchange::ShuffleDataExchange(exchange) => { + let local_id = &ctx.get_cluster().local_id; + let local_pos = exchange + .destination_ids + .iter() + .position(|x| x == local_id) + .unwrap(); + HashFlightScatter::try_create( + ctx.get_function_context()?, + exchange.shuffle_keys.clone(), + exchange.destination_ids.len(), + local_pos, + )? + } })) } diff --git a/src/query/service/src/servers/flight/v1/scatter/flight_scatter_hash.rs b/src/query/service/src/servers/flight/v1/scatter/flight_scatter_hash.rs index fd188b62e2c72..ff1d07a2ab9ee 100644 --- a/src/query/service/src/servers/flight/v1/scatter/flight_scatter_hash.rs +++ b/src/query/service/src/servers/flight/v1/scatter/flight_scatter_hash.rs @@ -30,6 +30,7 @@ use databend_common_expression::DataBlock; use databend_common_expression::Evaluator; use databend_common_expression::Expr; use databend_common_expression::FunctionContext; +use databend_common_expression::FunctionID; use databend_common_expression::RemoteExpr; use databend_common_expression::Scalar; use databend_common_expression::Value; @@ -49,9 +50,15 @@ impl HashFlightScatter { func_ctx: FunctionContext, hash_keys: Vec, scatter_size: usize, + local_pos: usize, ) -> Result> { if hash_keys.len() == 1 { - return OneHashKeyFlightScatter::try_create(func_ctx, &hash_keys[0], scatter_size); + return OneHashKeyFlightScatter::try_create( + func_ctx, + &hash_keys[0], + scatter_size, + local_pos, + ); } let hash_key = hash_keys .iter() @@ -79,6 +86,7 @@ struct OneHashKeyFlightScatter { scatter_size: usize, func_ctx: FunctionContext, indices_scalar: Expr, + default_scatter_index: u64, } impl OneHashKeyFlightScatter { @@ -86,7 +94,13 @@ impl OneHashKeyFlightScatter { func_ctx: FunctionContext, hash_key: &RemoteExpr, scatter_size: usize, + local_pos: usize, ) -> Result> { + let default_scatter_index = if shuffle_by_block_id_in_merge_into(hash_key) { + local_pos as u64 + } else { + 0 + }; let indices_scalar = check_function( None, "modulo", @@ -112,6 +126,7 @@ impl OneHashKeyFlightScatter { scatter_size, func_ctx, indices_scalar, + default_scatter_index, })) } } @@ -122,7 +137,7 @@ impl FlightScatter for OneHashKeyFlightScatter { let num = data_block.num_rows(); let indices = evaluator.run(&self.indices_scalar).unwrap(); - let indices = get_hash_values(indices, num)?; + let indices = get_hash_values(indices, num, self.default_scatter_index)?; let data_blocks = DataBlock::scatter(&data_block, &indices, self.scatter_size)?; let block_meta = data_block.get_meta(); @@ -143,7 +158,7 @@ impl FlightScatter for HashFlightScatter { let mut hash_keys = Vec::with_capacity(self.hash_key.len()); for expr in &self.hash_key { let indices = evaluator.run(expr).unwrap(); - let indices = get_hash_values(indices, num)?; + let indices = get_hash_values(indices, num, 0)?; hash_keys.push(indices) } self.combine_hash_keys(&hash_keys, num) @@ -186,10 +201,38 @@ impl HashFlightScatter { } } -fn get_hash_values(column: Value, rows: usize) -> Result> { +fn shuffle_by_block_id_in_merge_into(expr: &RemoteExpr) -> bool { + if let RemoteExpr::FunctionCall { + id: FunctionID::Builtin { name, .. }, + args, + .. + } = expr + { + if name == "bit_and" { + if let RemoteExpr::FunctionCall { + id: FunctionID::Builtin { name, .. }, + .. + } = &args[0] + { + if name == "bit_shift_right" { + return true; + } + } + } + } + false +} + +fn get_hash_values( + column: Value, + rows: usize, + default_scatter_index: u64, +) -> Result> { match column { Value::Scalar(c) => match c { - databend_common_expression::Scalar::Null => Ok(vec![0; rows].into()), + databend_common_expression::Scalar::Null => { + Ok(vec![default_scatter_index; rows].into()) + } databend_common_expression::Scalar::Number(NumberScalar::UInt64(x)) => { Ok(vec![x; rows].into()) } @@ -205,7 +248,7 @@ fn get_hash_values(column: Value, rows: usize) -> Result> { if null_map.unset_bits() == 0 { Ok(column.column) } else if null_map.unset_bits() == null_map.len() { - Ok(vec![0; rows].into()) + Ok(vec![default_scatter_index; rows].into()) } else { let mut need_new_vec = true; if let Some(column) = unsafe { column.column.get_mut() } { @@ -213,7 +256,11 @@ fn get_hash_values(column: Value, rows: usize) -> Result> { .iter_mut() .zip(null_map.iter()) .for_each(|(x, valid)| { - *x *= valid as u64; + if valid { + *x *= valid as u64; + } else { + *x = default_scatter_index; + } }); need_new_vec = false; } @@ -225,7 +272,7 @@ fn get_hash_values(column: Value, rows: usize) -> Result> { .column .iter() .zip(null_map.iter()) - .map(|(x, b)| if b { *x } else { 0 }) + .map(|(x, b)| if b { *x } else { default_scatter_index }) .collect()) } } diff --git a/src/query/sql/src/executor/format.rs b/src/query/sql/src/executor/format.rs index 3c07a0554287f..4764111204fd3 100644 --- a/src/query/sql/src/executor/format.rs +++ b/src/query/sql/src/executor/format.rs @@ -49,11 +49,8 @@ use crate::executor::physical_plans::HashJoin; use crate::executor::physical_plans::Limit; use crate::executor::physical_plans::MaterializedCte; use crate::executor::physical_plans::MergeInto; -use crate::executor::physical_plans::MergeIntoAddRowNumber; -use crate::executor::physical_plans::MergeIntoAppendNotMatched; use crate::executor::physical_plans::MergeIntoManipulate; use crate::executor::physical_plans::MergeIntoOrganize; -use crate::executor::physical_plans::MergeIntoSerialize; use crate::executor::physical_plans::MergeIntoSplit; use crate::executor::physical_plans::ProjectSet; use crate::executor::physical_plans::RangeJoin; @@ -251,20 +248,11 @@ fn to_format_tree( } PhysicalPlan::ReplaceInto(_) => Ok(FormatTreeNode::new("Replace".to_string())), PhysicalPlan::MergeInto(plan) => format_merge_into(plan, metadata, profs), - PhysicalPlan::MergeIntoAddRowNumber(plan) => { - format_merge_into_add_row_number(plan, metadata, profs) - } - PhysicalPlan::MergeIntoAppendNotMatched(plan) => { - format_merge_into_append_not_matched(plan, metadata, profs) - } PhysicalPlan::MergeIntoSplit(plan) => format_merge_into_split(plan, metadata, profs), PhysicalPlan::MergeIntoManipulate(plan) => { format_merge_into_manipulate(plan, metadata, profs) } PhysicalPlan::MergeIntoOrganize(plan) => format_merge_into_organize(plan, metadata, profs), - PhysicalPlan::MergeIntoSerialize(plan) => { - format_merge_into_serialize(plan, metadata, profs) - } PhysicalPlan::CteScan(plan) => cte_scan_to_format_tree(plan), PhysicalPlan::RecursiveCteScan(_) => { Ok(FormatTreeNode::new("RecursiveCTEScan".to_string())) @@ -397,15 +385,7 @@ fn format_merge_into( ))]; let target_schema = table_entry.table().schema_with_stream(); - let merge_into_serialize: &PhysicalPlan = &merge_into.input; - let merge_into_organize: &PhysicalPlan = - if let PhysicalPlan::MergeIntoSerialize(plan) = merge_into_serialize { - &plan.input - } else { - return Err(ErrorCode::Internal( - "Expect MergeIntoSerialize after MergeInto ".to_string(), - )); - }; + let merge_into_organize: &PhysicalPlan = &merge_into.input; let merge_into_manipulate: &PhysicalPlan = if let PhysicalPlan::MergeIntoOrganize(plan) = merge_into_organize { &plan.input @@ -501,22 +481,6 @@ fn format_merge_into( )) } -fn format_merge_into_add_row_number( - plan: &MergeIntoAddRowNumber, - metadata: &Metadata, - profs: &HashMap, -) -> Result> { - to_format_tree(&plan.input, metadata, profs) -} - -fn format_merge_into_append_not_matched( - plan: &MergeIntoAppendNotMatched, - metadata: &Metadata, - profs: &HashMap, -) -> Result> { - to_format_tree(&plan.input, metadata, profs) -} - fn format_merge_into_split( plan: &MergeIntoSplit, metadata: &Metadata, @@ -541,14 +505,6 @@ fn format_merge_into_organize( to_format_tree(&plan.input, metadata, profs) } -fn format_merge_into_serialize( - plan: &MergeIntoSerialize, - metadata: &Metadata, - profs: &HashMap, -) -> Result> { - to_format_tree(&plan.input, metadata, profs) -} - fn copy_into_table(plan: &CopyIntoTable) -> Result> { Ok(FormatTreeNode::new(format!( "CopyIntoTable: {}", diff --git a/src/query/sql/src/executor/physical_plan.rs b/src/query/sql/src/executor/physical_plan.rs index 53ac47ae1f5cc..07d88f82295e2 100644 --- a/src/query/sql/src/executor/physical_plan.rs +++ b/src/query/sql/src/executor/physical_plan.rs @@ -24,7 +24,6 @@ use itertools::Itertools; use super::physical_plans::MergeIntoManipulate; use super::physical_plans::MergeIntoOrganize; -use super::physical_plans::MergeIntoSerialize; use super::physical_plans::MergeIntoSplit; use crate::executor::physical_plans::AggregateExpand; use crate::executor::physical_plans::AggregateFinal; @@ -58,8 +57,6 @@ use crate::executor::physical_plans::HashJoin; use crate::executor::physical_plans::Limit; use crate::executor::physical_plans::MaterializedCte; use crate::executor::physical_plans::MergeInto; -use crate::executor::physical_plans::MergeIntoAddRowNumber; -use crate::executor::physical_plans::MergeIntoAppendNotMatched; use crate::executor::physical_plans::ProjectSet; use crate::executor::physical_plans::RangeJoin; use crate::executor::physical_plans::ReclusterSink; @@ -124,12 +121,9 @@ pub enum PhysicalPlan { /// MergeInto MergeInto(Box), - MergeIntoAppendNotMatched(Box), - MergeIntoAddRowNumber(Box), MergeIntoSplit(Box), MergeIntoManipulate(Box), MergeIntoOrganize(Box), - MergeIntoSerialize(Box), /// Compact CompactSource(Box), @@ -316,16 +310,6 @@ impl PhysicalPlan { *next_id += 1; plan.input.adjust_plan_id(next_id); } - PhysicalPlan::MergeIntoAddRowNumber(plan) => { - plan.plan_id = *next_id; - *next_id += 1; - plan.input.adjust_plan_id(next_id); - } - PhysicalPlan::MergeIntoAppendNotMatched(plan) => { - plan.plan_id = *next_id; - *next_id += 1; - plan.input.adjust_plan_id(next_id); - } PhysicalPlan::MergeIntoSplit(plan) => { plan.plan_id = *next_id; *next_id += 1; @@ -341,11 +325,6 @@ impl PhysicalPlan { *next_id += 1; plan.input.adjust_plan_id(next_id); } - PhysicalPlan::MergeIntoSerialize(plan) => { - plan.plan_id = *next_id; - *next_id += 1; - plan.input.adjust_plan_id(next_id); - } PhysicalPlan::CommitSink(plan) => { plan.plan_id = *next_id; *next_id += 1; @@ -455,12 +434,9 @@ impl PhysicalPlan { PhysicalPlan::Udf(v) => v.plan_id, PhysicalPlan::DeleteSource(v) => v.plan_id, PhysicalPlan::MergeInto(v) => v.plan_id, - PhysicalPlan::MergeIntoAddRowNumber(v) => v.plan_id, - PhysicalPlan::MergeIntoAppendNotMatched(v) => v.plan_id, PhysicalPlan::MergeIntoSplit(v) => v.plan_id, PhysicalPlan::MergeIntoManipulate(v) => v.plan_id, PhysicalPlan::MergeIntoOrganize(v) => v.plan_id, - PhysicalPlan::MergeIntoSerialize(v) => v.plan_id, PhysicalPlan::CommitSink(v) => v.plan_id, PhysicalPlan::CopyIntoTable(v) => v.plan_id, PhysicalPlan::CopyIntoLocation(v) => v.plan_id, @@ -514,15 +490,12 @@ impl PhysicalPlan { PhysicalPlan::RecursiveCteScan(plan) => plan.output_schema(), PhysicalPlan::Udf(plan) => plan.output_schema(), PhysicalPlan::MergeInto(plan) => Ok(plan.output_schema.clone()), - PhysicalPlan::MergeIntoAddRowNumber(plan) => plan.output_schema(), PhysicalPlan::MergeIntoSplit(plan) => plan.output_schema(), PhysicalPlan::MergeIntoManipulate(plan) => plan.output_schema(), PhysicalPlan::MergeIntoOrganize(plan) => plan.output_schema(), - PhysicalPlan::MergeIntoSerialize(plan) => plan.output_schema(), PhysicalPlan::ReplaceAsyncSourcer(_) | PhysicalPlan::ReplaceDeduplicate(_) | PhysicalPlan::ReplaceInto(_) - | PhysicalPlan::MergeIntoAppendNotMatched(_) | PhysicalPlan::CompactSource(_) | PhysicalPlan::CommitSink(_) | PhysicalPlan::DistributedInsertSelect(_) @@ -578,12 +551,9 @@ impl PhysicalPlan { PhysicalPlan::ReplaceDeduplicate(_) => "ReplaceDeduplicate".to_string(), PhysicalPlan::ReplaceInto(_) => "Replace".to_string(), PhysicalPlan::MergeInto(_) => "MergeInto".to_string(), - PhysicalPlan::MergeIntoAppendNotMatched(_) => "MergeIntoAppendNotMatched".to_string(), - PhysicalPlan::MergeIntoAddRowNumber(_) => "AddRowNumber".to_string(), PhysicalPlan::MergeIntoSplit(_) => "MergeIntoSplit".to_string(), PhysicalPlan::MergeIntoManipulate(_) => "MergeIntoManipulate".to_string(), PhysicalPlan::MergeIntoOrganize(_) => "MergeIntoOrganize".to_string(), - PhysicalPlan::MergeIntoSerialize(_) => "MergeIntoSerialize".to_string(), PhysicalPlan::CteScan(_) => "PhysicalCteScan".to_string(), PhysicalPlan::RecursiveCteScan(_) => "RecursiveCteScan".to_string(), PhysicalPlan::MaterializedCte(_) => "PhysicalMaterializedCte".to_string(), @@ -652,20 +622,11 @@ impl PhysicalPlan { } PhysicalPlan::ReplaceInto(plan) => Box::new(std::iter::once(plan.input.as_ref())), PhysicalPlan::MergeInto(plan) => Box::new(std::iter::once(plan.input.as_ref())), - PhysicalPlan::MergeIntoAddRowNumber(plan) => { - Box::new(std::iter::once(plan.input.as_ref())) - } - PhysicalPlan::MergeIntoAppendNotMatched(plan) => { - Box::new(std::iter::once(plan.input.as_ref())) - } PhysicalPlan::MergeIntoSplit(plan) => Box::new(std::iter::once(plan.input.as_ref())), PhysicalPlan::MergeIntoManipulate(plan) => { Box::new(std::iter::once(plan.input.as_ref())) } PhysicalPlan::MergeIntoOrganize(plan) => Box::new(std::iter::once(plan.input.as_ref())), - PhysicalPlan::MergeIntoSerialize(plan) => { - Box::new(std::iter::once(plan.input.as_ref())) - } PhysicalPlan::MaterializedCte(plan) => Box::new( std::iter::once(plan.left.as_ref()).chain(std::iter::once(plan.right.as_ref())), ), @@ -718,12 +679,9 @@ impl PhysicalPlan { | PhysicalPlan::ReplaceDeduplicate(_) | PhysicalPlan::ReplaceInto(_) | PhysicalPlan::MergeInto(_) - | PhysicalPlan::MergeIntoAddRowNumber(_) - | PhysicalPlan::MergeIntoAppendNotMatched(_) | PhysicalPlan::MergeIntoSplit(_) | PhysicalPlan::MergeIntoManipulate(_) | PhysicalPlan::MergeIntoOrganize(_) - | PhysicalPlan::MergeIntoSerialize(_) | PhysicalPlan::ConstantTableScan(_) | PhysicalPlan::ExpressionScan(_) | PhysicalPlan::CacheScan(_) diff --git a/src/query/sql/src/executor/physical_plan_builder.rs b/src/query/sql/src/executor/physical_plan_builder.rs index ba51f2f855fdf..a2fc89aae6cb5 100644 --- a/src/query/sql/src/executor/physical_plan_builder.rs +++ b/src/query/sql/src/executor/physical_plan_builder.rs @@ -120,7 +120,6 @@ impl PhysicalPlanBuilder { self.build_expression_scan(s_expr, scan, required).await } RelOperator::CacheScan(scan) => self.build_cache_scan(scan, required).await, - RelOperator::AddRowNumber(_) => self.build_add_row_number(s_expr, required).await, RelOperator::Udf(udf) => self.build_udf(s_expr, udf, required, stat_info).await, RelOperator::RecursiveCteScan(scan) => { self.build_recursive_cte_scan(scan, stat_info).await diff --git a/src/query/sql/src/executor/physical_plan_display.rs b/src/query/sql/src/executor/physical_plan_display.rs index ee4b51321e86b..2e951401e8647 100644 --- a/src/query/sql/src/executor/physical_plan_display.rs +++ b/src/query/sql/src/executor/physical_plan_display.rs @@ -21,7 +21,6 @@ use itertools::Itertools; use super::physical_plans::AsyncFunction; use super::physical_plans::MergeIntoManipulate; use super::physical_plans::MergeIntoOrganize; -use super::physical_plans::MergeIntoSerialize; use super::physical_plans::MergeIntoSplit; use crate::executor::physical_plan::PhysicalPlan; use crate::executor::physical_plans::AggregateExpand; @@ -46,8 +45,6 @@ use crate::executor::physical_plans::HashJoin; use crate::executor::physical_plans::Limit; use crate::executor::physical_plans::MaterializedCte; use crate::executor::physical_plans::MergeInto; -use crate::executor::physical_plans::MergeIntoAddRowNumber; -use crate::executor::physical_plans::MergeIntoAppendNotMatched; use crate::executor::physical_plans::ProjectSet; use crate::executor::physical_plans::RangeJoin; use crate::executor::physical_plans::ReclusterSink; @@ -110,10 +107,6 @@ impl<'a> Display for PhysicalPlanIndentFormatDisplay<'a> { PhysicalPlan::ReplaceDeduplicate(deduplicate) => write!(f, "{}", deduplicate)?, PhysicalPlan::ReplaceInto(replace) => write!(f, "{}", replace)?, PhysicalPlan::MergeInto(merge_into) => write!(f, "{}", merge_into)?, - PhysicalPlan::MergeIntoAppendNotMatched(merge_into_row_id_apply) => { - write!(f, "{}", merge_into_row_id_apply)? - } - PhysicalPlan::MergeIntoAddRowNumber(add_row_number) => write!(f, "{}", add_row_number)?, PhysicalPlan::MergeIntoSplit(merge_into_split) => write!(f, "{}", merge_into_split)?, PhysicalPlan::MergeIntoManipulate(merge_into_manipulate) => { write!(f, "{}", merge_into_manipulate)? @@ -121,9 +114,6 @@ impl<'a> Display for PhysicalPlanIndentFormatDisplay<'a> { PhysicalPlan::MergeIntoOrganize(merge_into_organize) => { write!(f, "{}", merge_into_organize)? } - PhysicalPlan::MergeIntoSerialize(merge_into_serialize) => { - write!(f, "{}", merge_into_serialize)? - } PhysicalPlan::CteScan(cte_scan) => write!(f, "{}", cte_scan)?, PhysicalPlan::RecursiveCteScan(recursive_cte_scan) => { write!(f, "{}", recursive_cte_scan)? @@ -528,18 +518,6 @@ impl Display for MergeInto { } } -impl Display for MergeIntoAddRowNumber { - fn fmt(&self, f: &mut Formatter) -> std::fmt::Result { - write!(f, "MergeIntoAddRowNumber") - } -} - -impl Display for MergeIntoAppendNotMatched { - fn fmt(&self, f: &mut Formatter) -> std::fmt::Result { - write!(f, "MergeIntoAppendNotMatched") - } -} - impl Display for MergeIntoSplit { fn fmt(&self, f: &mut Formatter) -> std::fmt::Result { write!(f, "MergeIntoSplit") @@ -558,12 +536,6 @@ impl Display for MergeIntoOrganize { } } -impl Display for MergeIntoSerialize { - fn fmt(&self, f: &mut Formatter) -> std::fmt::Result { - write!(f, "MergeIntoSerialize") - } -} - impl Display for ReclusterSource { fn fmt(&self, f: &mut Formatter) -> std::fmt::Result { write!(f, "ReclusterSource") diff --git a/src/query/sql/src/executor/physical_plan_visitor.rs b/src/query/sql/src/executor/physical_plan_visitor.rs index 028735d65d572..378fc42671579 100644 --- a/src/query/sql/src/executor/physical_plan_visitor.rs +++ b/src/query/sql/src/executor/physical_plan_visitor.rs @@ -18,7 +18,6 @@ use super::physical_plans::CacheScan; use super::physical_plans::ExpressionScan; use super::physical_plans::MergeIntoManipulate; use super::physical_plans::MergeIntoOrganize; -use super::physical_plans::MergeIntoSerialize; use super::physical_plans::MergeIntoSplit; use super::physical_plans::RecursiveCteScan; use crate::executor::physical_plan::PhysicalPlan; @@ -51,8 +50,6 @@ use crate::executor::physical_plans::HashJoin; use crate::executor::physical_plans::Limit; use crate::executor::physical_plans::MaterializedCte; use crate::executor::physical_plans::MergeInto; -use crate::executor::physical_plans::MergeIntoAddRowNumber; -use crate::executor::physical_plans::MergeIntoAppendNotMatched; use crate::executor::physical_plans::ProjectSet; use crate::executor::physical_plans::RangeJoin; use crate::executor::physical_plans::ReclusterSink; @@ -101,14 +98,9 @@ pub trait PhysicalPlanReplacer { PhysicalPlan::ReplaceDeduplicate(plan) => self.replace_deduplicate(plan), PhysicalPlan::ReplaceInto(plan) => self.replace_replace_into(plan), PhysicalPlan::MergeInto(plan) => self.replace_merge_into(plan), - PhysicalPlan::MergeIntoAddRowNumber(plan) => self.replace_add_row_number(plan), - PhysicalPlan::MergeIntoAppendNotMatched(plan) => { - self.replace_merge_into_row_id_apply(plan) - } PhysicalPlan::MergeIntoSplit(plan) => self.replace_merge_into_split(plan), PhysicalPlan::MergeIntoManipulate(plan) => self.replace_merge_into_manipulate(plan), PhysicalPlan::MergeIntoOrganize(plan) => self.replace_merge_into_organize(plan), - PhysicalPlan::MergeIntoSerialize(plan) => self.replace_merge_into_serialize(plan), PhysicalPlan::MaterializedCte(plan) => self.replace_materialized_cte(plan), PhysicalPlan::ConstantTableScan(plan) => self.replace_constant_table_scan(plan), PhysicalPlan::ExpressionScan(plan) => self.replace_expression_scan(plan), @@ -491,29 +483,6 @@ pub trait PhysicalPlanReplacer { }))) } - fn replace_add_row_number(&mut self, plan: &MergeIntoAddRowNumber) -> Result { - let input = self.replace(&plan.input)?; - Ok(PhysicalPlan::MergeIntoAddRowNumber(Box::new( - MergeIntoAddRowNumber { - input: Box::new(input), - ..plan.clone() - }, - ))) - } - - fn replace_merge_into_row_id_apply( - &mut self, - plan: &MergeIntoAppendNotMatched, - ) -> Result { - let input = self.replace(&plan.input)?; - Ok(PhysicalPlan::MergeIntoAppendNotMatched(Box::new( - MergeIntoAppendNotMatched { - input: Box::new(input), - ..plan.clone() - }, - ))) - } - fn replace_merge_into_split(&mut self, plan: &MergeIntoSplit) -> Result { let input = self.replace(&plan.input)?; Ok(PhysicalPlan::MergeIntoSplit(Box::new(MergeIntoSplit { @@ -545,16 +514,6 @@ pub trait PhysicalPlanReplacer { ))) } - fn replace_merge_into_serialize(&mut self, plan: &MergeIntoSerialize) -> Result { - let input = self.replace(&plan.input)?; - Ok(PhysicalPlan::MergeIntoSerialize(Box::new( - MergeIntoSerialize { - input: Box::new(input), - ..plan.clone() - }, - ))) - } - fn replace_project_set(&mut self, plan: &ProjectSet) -> Result { let input = self.replace(&plan.input)?; Ok(PhysicalPlan::ProjectSet(ProjectSet { @@ -759,12 +718,6 @@ impl PhysicalPlan { PhysicalPlan::MergeInto(plan) => { Self::traverse(&plan.input, pre_visit, visit, post_visit); } - PhysicalPlan::MergeIntoAddRowNumber(plan) => { - Self::traverse(&plan.input, pre_visit, visit, post_visit); - } - PhysicalPlan::MergeIntoAppendNotMatched(plan) => { - Self::traverse(&plan.input, pre_visit, visit, post_visit); - } PhysicalPlan::MergeIntoSplit(plan) => { Self::traverse(&plan.input, pre_visit, visit, post_visit); } @@ -774,9 +727,6 @@ impl PhysicalPlan { PhysicalPlan::MergeIntoOrganize(plan) => { Self::traverse(&plan.input, pre_visit, visit, post_visit); } - PhysicalPlan::MergeIntoSerialize(plan) => { - Self::traverse(&plan.input, pre_visit, visit, post_visit); - } PhysicalPlan::MaterializedCte(plan) => { Self::traverse(&plan.left, pre_visit, visit, post_visit); Self::traverse(&plan.right, pre_visit, visit, post_visit); diff --git a/src/query/sql/src/executor/physical_plans/mod.rs b/src/query/sql/src/executor/physical_plans/mod.rs index 0fdbd2a5eaed7..a7aae036c3131 100644 --- a/src/query/sql/src/executor/physical_plans/mod.rs +++ b/src/query/sql/src/executor/physical_plans/mod.rs @@ -58,12 +58,8 @@ mod physical_materialized_cte; pub use physical_materialized_cte::MaterializedCte; mod physical_merge_into; pub use physical_merge_into::*; -mod physical_merge_into_add_row_number; -pub use physical_merge_into_add_row_number::MergeIntoAddRowNumber; mod physical_merge_into_organize; pub use physical_merge_into_organize::MergeIntoOrganize; -mod physical_merge_into_serialize; -pub use physical_merge_into_serialize::MergeIntoSerialize; mod physical_merge_into_manipulate; pub use physical_merge_into_manipulate::MergeIntoManipulate; mod physical_merge_into_split; diff --git a/src/query/sql/src/executor/physical_plans/physical_merge_into.rs b/src/query/sql/src/executor/physical_plans/physical_merge_into.rs index e019ad0d09355..87901c584e193 100644 --- a/src/query/sql/src/executor/physical_plans/physical_merge_into.rs +++ b/src/query/sql/src/executor/physical_plans/physical_merge_into.rs @@ -15,21 +15,24 @@ use std::collections::HashMap; use std::u64::MAX; +use databend_common_catalog::plan::NUM_ROW_ID_PREFIX_BITS; use databend_common_exception::ErrorCode; use databend_common_exception::Result; +use databend_common_expression::type_check::check_function; use databend_common_expression::types::DataType; use databend_common_expression::types::NumberDataType; use databend_common_expression::ConstantFolder; use databend_common_expression::DataField; -use databend_common_expression::DataSchema; use databend_common_expression::DataSchemaRef; +use databend_common_expression::Expr; use databend_common_expression::FieldIndex; use databend_common_expression::RemoteExpr; +use databend_common_expression::Scalar; use databend_common_expression::ROW_ID_COL_NAME; -use databend_common_expression::ROW_NUMBER_COL_NAME; use databend_common_functions::BUILTIN_FUNCTIONS; use databend_common_meta_app::schema::TableInfo; use databend_storages_common_table_meta::meta::Location; +use databend_storages_common_table_meta::meta::NUM_BLOCK_ID_BITS; use itertools::Itertools; use crate::binder::MergeIntoType; @@ -39,7 +42,6 @@ use crate::executor::physical_plans::Exchange; use crate::executor::physical_plans::FragmentKind; use crate::executor::physical_plans::MergeIntoManipulate; use crate::executor::physical_plans::MergeIntoOrganize; -use crate::executor::physical_plans::MergeIntoSerialize; use crate::executor::physical_plans::MergeIntoSplit; use crate::executor::physical_plans::MutationKind; use crate::executor::physical_plans::RowFetch; @@ -47,6 +49,8 @@ use crate::executor::PhysicalPlanBuilder; use crate::optimizer::ColumnSet; use crate::optimizer::SExpr; use crate::plans; +use crate::plans::BoundColumnRef; +use crate::BindContext; use crate::ColumnEntry; use crate::IndexType; use crate::ScalarExpr; @@ -64,14 +68,13 @@ pub struct MergeInto { pub table_info: TableInfo, // (DataSchemaRef, Option, Vec,Vec) => (source_schema, condition, value_exprs) pub unmatched: Vec<(DataSchemaRef, Option, Vec)>, + pub segments: Vec<(usize, Location)>, pub output_schema: DataSchemaRef, - pub merge_into_op: MergeIntoOp, + pub merge_type: MergeIntoType, pub target_table_index: usize, pub need_match: bool, pub distributed: bool, - pub change_join_order: bool, pub target_build_optimization: bool, - pub enable_right_broadcast: bool, } impl PhysicalPlanBuilder { @@ -114,11 +117,9 @@ impl PhysicalPlanBuilder { field_index_map, merge_type, distributed, - change_join_order, row_id_index, - source_row_id_index, + change_join_order, can_try_update_column_only, - enable_right_broadcast, lazy_columns, .. } = merge_into; @@ -184,6 +185,70 @@ impl PhysicalPlanBuilder { } } + let source_is_broadcast = + matches!(merge_type, MergeIntoType::MatchedOnly) && !change_join_order; + if *distributed && !is_insert_only && !source_is_broadcast { + let mut row_id_column = None; + for column_binding in bind_context.columns.iter() { + if BindContext::match_column_binding( + Some(database.as_str()), + Some(table_name.as_str()), + ROW_ID_COL_NAME, + column_binding, + ) { + row_id_column = Some(ScalarExpr::BoundColumnRef(BoundColumnRef { + span: None, + column: column_binding.clone(), + })); + break; + } + } + let row_id_column = row_id_column.ok_or_else(|| ErrorCode::Internal("It's a bug"))?; + + let row_id_expr = row_id_column + .type_check(join_output_schema.as_ref())? + .project_column_ref(|index| { + join_output_schema.index_of(&index.to_string()).unwrap() + }); + + let expr = check_function( + None, + "bit_and", + &[], + &[ + check_function( + None, + "bit_shift_right", + &[], + &[row_id_expr, Expr::Constant { + span: None, + scalar: Scalar::Number(((64 - NUM_ROW_ID_PREFIX_BITS) as u64).into()), + data_type: DataType::Number(NumberDataType::UInt64), + }], + &BUILTIN_FUNCTIONS, + )?, + Expr::Constant { + span: None, + scalar: Scalar::Number((((1 << NUM_BLOCK_ID_BITS) - 1) as u64).into()), + data_type: DataType::Number(NumberDataType::UInt64), + }, + ], + &BUILTIN_FUNCTIONS, + )?; + // For distributed merge into, shuffle by block_id(computed by row_id) + // to avoid many nodes update the same physical block simultaneously, + // update data that belong to one physical block will shuffle to one node, + // insert data just keep in local node. + plan = PhysicalPlan::Exchange(Exchange { + plan_id: 0, + input: Box::new(plan), + kind: FragmentKind::Normal, + keys: vec![expr.as_remote_expr()], + allow_adjust_parallelism: true, + ignore_exchange: false, + }); + } + if let Some(merge_into_split_idx) = merge_into_split_idx { plan = PhysicalPlan::MergeIntoSplit(Box::new(MergeIntoSplit { plan_id: 0, @@ -248,28 +313,6 @@ impl PhysicalPlanBuilder { let output_schema = plan.output_schema()?; - let mut source_row_id_idx = None; - let mut source_row_number_idx = None; - if *enable_right_broadcast { - if let Some(source_row_id_index) = source_row_id_index { - for (idx, data_field) in join_output_schema.fields().iter().enumerate() { - if *data_field.name() == source_row_id_index.to_string() { - source_row_id_idx = Some(idx); - break; - } - } - } else { - source_row_number_idx = Some(join_output_schema.index_of(ROW_NUMBER_COL_NAME)?); - } - }; - - if *enable_right_broadcast && source_row_number_idx.is_none() && source_row_id_idx.is_none() - { - return Err(ErrorCode::InvalidRowIdIndex( - "can't get internal row_number_idx or row_id_idx when running merge into", - )); - } - let table = self.ctx.get_table(catalog, database, table_name).await?; let table_info = table.get_table_info(); let table_name = table_name.clone(); @@ -390,26 +433,14 @@ impl PhysicalPlanBuilder { field_index_of_input_schema: field_index_of_input_schema.clone(), merge_type: merge_type.clone(), row_id_idx: row_id_offset, - source_row_id_idx, - source_row_number_idx, - enable_right_broadcast: *enable_right_broadcast, can_try_update_column_only: *can_try_update_column_only, unmatched_schema: join_output_schema.clone(), })); - let merge_into_op = match (merge_type, distributed) { - (MergeIntoType::FullOperation, true) => MergeIntoOp::DistributedFullOperation, - (MergeIntoType::FullOperation, false) => MergeIntoOp::StandaloneFullOperation, - (MergeIntoType::MatchedOnly, true) => MergeIntoOp::DistributedMatchedOnly, - (MergeIntoType::MatchedOnly, false) => MergeIntoOp::StandaloneMatchedOnly, - (MergeIntoType::InsertOnly, true) => MergeIntoOp::DistributedInsertOnly, - (MergeIntoType::InsertOnly, false) => MergeIntoOp::StandaloneInsertOnly, - }; - plan = PhysicalPlan::MergeIntoOrganize(Box::new(MergeIntoOrganize { plan_id: 0, input: Box::new(plan.clone()), - merge_into_op: merge_into_op.clone(), + merge_type: merge_type.clone(), })); let segments: Vec<_> = base_snapshot @@ -419,81 +450,31 @@ impl PhysicalPlanBuilder { .enumerate() .collect(); - plan = PhysicalPlan::MergeIntoSerialize(Box::new(MergeIntoSerialize { - plan_id: 0, - input: Box::new(plan), + let merge_into = PhysicalPlan::MergeInto(Box::new(MergeInto { + input: Box::new(plan.clone()), table_info: table_info.clone(), - unmatched: unmatched.clone(), + unmatched, segments: segments.clone(), distributed: *distributed, - change_join_order: *change_join_order, - merge_into_op: merge_into_op.clone(), + output_schema: DataSchemaRef::default(), + merge_type: merge_type.clone(), + target_table_index: *target_table_index, need_match: !is_insert_only, - enable_right_broadcast: *enable_right_broadcast, + target_build_optimization: false, + plan_id: u32::MAX, })); let commit_input = if !distributed { - PhysicalPlan::MergeInto(Box::new(MergeInto { - input: Box::new(plan.clone()), - table_info: table_info.clone(), - unmatched, - distributed: false, - output_schema: DataSchemaRef::default(), - merge_into_op: merge_into_op.clone(), - target_table_index: *target_table_index, - need_match: !is_insert_only, - change_join_order: *change_join_order, - target_build_optimization: false, - plan_id: u32::MAX, - enable_right_broadcast: *enable_right_broadcast, - })) + merge_into } else { - let merge_append = PhysicalPlan::MergeInto(Box::new(MergeInto { - input: Box::new(plan.clone()), - table_info: table_info.clone(), - unmatched: unmatched.clone(), - distributed: true, - output_schema: if let Some(idx) = source_row_number_idx { - DataSchemaRef::new(DataSchema::new(vec![output_schema.fields[idx].clone()])) - } else { - DataSchemaRef::new(DataSchema::new(vec![DataField::new( - ROW_ID_COL_NAME, - DataType::Number(NumberDataType::UInt64), - )])) - }, - merge_into_op: merge_into_op.clone(), - target_table_index: *target_table_index, - need_match: !is_insert_only, - change_join_order: *change_join_order, - target_build_optimization: false, // we don't support for distributed mode for now. - plan_id: u32::MAX, - enable_right_broadcast: *enable_right_broadcast, - })); - // if change_join_order = true, it means the target is build side, - // in this way, we will do matched operation and not matched operation - // locally in every node, and the main node just receive row ids to apply. - let segments = if *change_join_order { - segments.clone() - } else { - vec![] - }; - PhysicalPlan::MergeIntoAppendNotMatched(Box::new(MergeIntoAppendNotMatched { - input: Box::new(PhysicalPlan::Exchange(Exchange { - plan_id: 0, - input: Box::new(merge_append), - kind: FragmentKind::Merge, - keys: vec![], - allow_adjust_parallelism: true, - ignore_exchange: false, - })), - table_info: table_info.clone(), - unmatched: unmatched.clone(), - input_schema: join_output_schema.clone(), - merge_type: merge_type.clone(), - change_join_order: *change_join_order, - segments, - plan_id: u32::MAX, - })) + PhysicalPlan::Exchange(Exchange { + plan_id: 0, + input: Box::new(merge_into), + kind: FragmentKind::Merge, + keys: vec![], + allow_adjust_parallelism: true, + ignore_exchange: false, + }) }; // build mutation_aggregate @@ -528,62 +509,3 @@ impl PhysicalPlanBuilder { Ok(filer.as_remote_expr()) } } - -#[derive(Clone, Debug, serde::Serialize, serde::Deserialize)] -pub struct MergeIntoAppendNotMatched { - pub plan_id: u32, - pub input: Box, - pub table_info: TableInfo, - // (DataSchemaRef, Option, Vec,Vec) => (source_schema, condition, value_exprs) - pub unmatched: Vec<(DataSchemaRef, Option, Vec)>, - pub input_schema: DataSchemaRef, - pub merge_type: MergeIntoType, - pub change_join_order: bool, - pub segments: Vec<(usize, Location)>, -} - -#[derive(Clone, Debug, PartialEq, serde::Serialize, serde::Deserialize)] -pub enum MergeIntoOp { - StandaloneMatchedOnly, - StandaloneFullOperation, - StandaloneInsertOnly, - DistributedMatchedOnly, - DistributedFullOperation, - DistributedInsertOnly, -} - -impl MergeIntoOp { - pub fn get_serialize_and_row_number_len( - &self, - output_len: usize, - enable_right_broadcast: bool, - ) -> (usize, usize) { - match self { - MergeIntoOp::StandaloneFullOperation - | MergeIntoOp::StandaloneMatchedOnly - | MergeIntoOp::DistributedMatchedOnly => (output_len - 1, 0), /* remove first row_id port */ - MergeIntoOp::StandaloneInsertOnly => (output_len, 0), - MergeIntoOp::DistributedFullOperation => { - if enable_right_broadcast { - // remove first row_id port and last row_number port - (output_len - 2, 1) - } else { - // remove first row_id port - (output_len - 1, 0) - } - } - MergeIntoOp::DistributedInsertOnly => { - // only one row_number port/unmatched port, refer to `builder_merge_into_organize` - assert_eq!(output_len, 1); - if enable_right_broadcast { - // only one row_number port - // use (0, 0) instead of (0, 1) to avoid appending many dummy items - (0, 0) - } else { - // only one unmatched port - (1, 0) - } - } - } - } -} diff --git a/src/query/sql/src/executor/physical_plans/physical_merge_into_add_row_number.rs b/src/query/sql/src/executor/physical_plans/physical_merge_into_add_row_number.rs deleted file mode 100644 index e20816aef9e8b..0000000000000 --- a/src/query/sql/src/executor/physical_plans/physical_merge_into_add_row_number.rs +++ /dev/null @@ -1,80 +0,0 @@ -// Copyright 2021 Datafuse Labs -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -use std::collections::BTreeMap; -use std::sync::Arc; - -use databend_common_exception::ErrorCode; -use databend_common_exception::Result; -use databend_common_expression::types::DataType; -use databend_common_expression::types::NumberDataType; -use databend_common_expression::DataField; -use databend_common_expression::DataSchema; -use databend_common_expression::DataSchemaRef; -use databend_common_expression::ROW_NUMBER_COL_NAME; - -use crate::executor::PhysicalPlan; -use crate::executor::PhysicalPlanBuilder; -use crate::optimizer::ColumnSet; -use crate::optimizer::SExpr; - -// add row_number for distributed merge into -#[derive(Clone, Debug, serde::Serialize, serde::Deserialize)] -pub struct MergeIntoAddRowNumber { - pub plan_id: u32, - pub cluster_index: BTreeMap, - pub input: Box, - pub output_schema: DataSchemaRef, -} - -impl MergeIntoAddRowNumber { - pub fn output_schema(&self) -> Result { - Ok(self.output_schema.clone()) - } -} - -impl PhysicalPlanBuilder { - pub(crate) async fn build_add_row_number( - &mut self, - s_expr: &SExpr, - required: ColumnSet, - ) -> Result { - let input_plan = self.build(s_expr.child(0)?, required).await?; - if self.ctx.get_cluster().is_empty() { - return Err(ErrorCode::CannotConnectNode( - "there is only one node when build distributed merge into", - )); - } - let mut cluster_index = BTreeMap::new(); - for (id, node) in self.ctx.get_cluster().nodes.iter().enumerate() { - cluster_index.insert(node.id.clone(), id); - } - let input_schema = input_plan.output_schema()?; - let mut fields = input_schema.fields.clone(); - fields.push(DataField::new( - ROW_NUMBER_COL_NAME, - DataType::Number(NumberDataType::UInt64), - )); - let meta = input_schema.meta().clone(); - - Ok(PhysicalPlan::MergeIntoAddRowNumber(Box::new( - MergeIntoAddRowNumber { - plan_id: u32::MAX, - cluster_index, - input: Box::new(input_plan), - output_schema: Arc::new(DataSchema::new_from(fields, meta)), - }, - ))) - } -} diff --git a/src/query/sql/src/executor/physical_plans/physical_merge_into_manipulate.rs b/src/query/sql/src/executor/physical_plans/physical_merge_into_manipulate.rs index d9bfda17bfdaa..6db5e131ae912 100644 --- a/src/query/sql/src/executor/physical_plans/physical_merge_into_manipulate.rs +++ b/src/query/sql/src/executor/physical_plans/physical_merge_into_manipulate.rs @@ -40,9 +40,6 @@ pub struct MergeIntoManipulate { // merge_type pub merge_type: MergeIntoType, pub row_id_idx: usize, - pub source_row_id_idx: Option, - pub source_row_number_idx: Option, - pub enable_right_broadcast: bool, pub can_try_update_column_only: bool, pub unmatched_schema: DataSchemaRef, } diff --git a/src/query/sql/src/executor/physical_plans/physical_merge_into_organize.rs b/src/query/sql/src/executor/physical_plans/physical_merge_into_organize.rs index 0d9ef50ee6a22..196328103b5a9 100644 --- a/src/query/sql/src/executor/physical_plans/physical_merge_into_organize.rs +++ b/src/query/sql/src/executor/physical_plans/physical_merge_into_organize.rs @@ -15,15 +15,14 @@ use databend_common_exception::Result; use databend_common_expression::DataSchemaRef; +use crate::binder::MergeIntoType; use crate::executor::physical_plan::PhysicalPlan; -use crate::executor::physical_plans::MergeIntoOp; #[derive(Clone, Debug, serde::Serialize, serde::Deserialize)] pub struct MergeIntoOrganize { pub plan_id: u32, pub input: Box, - // merge_into_operation - pub merge_into_op: MergeIntoOp, + pub merge_type: MergeIntoType, } impl MergeIntoOrganize { diff --git a/src/query/sql/src/executor/physical_plans/physical_merge_into_serialize.rs b/src/query/sql/src/executor/physical_plans/physical_merge_into_serialize.rs deleted file mode 100644 index 2003c55115034..0000000000000 --- a/src/query/sql/src/executor/physical_plans/physical_merge_into_serialize.rs +++ /dev/null @@ -1,44 +0,0 @@ -// Copyright 2021 Datafuse Labs -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -use databend_common_exception::Result; -use databend_common_expression::DataSchemaRef; -use databend_common_expression::RemoteExpr; -use databend_common_meta_app::schema::TableInfo; -use databend_storages_common_table_meta::meta::Location; - -use crate::executor::physical_plan::PhysicalPlan; -use crate::executor::physical_plans::MergeIntoOp; - -#[derive(Clone, Debug, serde::Serialize, serde::Deserialize)] -pub struct MergeIntoSerialize { - pub plan_id: u32, - pub input: Box, - pub table_info: TableInfo, - // (DataSchemaRef, Option, Vec,Vec) => (source_schema, condition, value_exprs) - pub unmatched: Vec<(DataSchemaRef, Option, Vec)>, - // used to record the index of target table's field in merge_source_schema - pub segments: Vec<(usize, Location)>, - pub distributed: bool, - pub change_join_order: bool, - pub need_match: bool, - pub merge_into_op: MergeIntoOp, - pub enable_right_broadcast: bool, -} - -impl MergeIntoSerialize { - pub fn output_schema(&self) -> Result { - self.input.output_schema() - } -} diff --git a/src/query/sql/src/planner/binder/merge_into.rs b/src/query/sql/src/planner/binder/merge_into.rs index 84d30803274cb..9c69dacec6183 100644 --- a/src/query/sql/src/planner/binder/merge_into.rs +++ b/src/query/sql/src/planner/binder/merge_into.rs @@ -221,8 +221,6 @@ impl Binder { merge_options, .. } = stmt; - let settings = self.ctx.get_settings(); - if merge_options.is_empty() { return Err(ErrorCode::BadArguments( "at least one matched or unmatched clause for merge into", @@ -281,20 +279,6 @@ impl Binder { let (mut source_expr, mut source_context) = self.bind_table_reference(bind_context, &source_data)?; - // try add internal_column (_row_id) for source_table - let mut source_table_index = DUMMY_TABLE_INDEX; - let mut source_row_id_index = None; - - if settings.get_enable_distributed_merge_into()? { - self.try_add_internal_column_binding( - &source_data, - &mut source_context, - &mut source_expr, - &mut source_table_index, - &mut source_row_id_index, - )?; - } - // remove stream column. source_context .columns @@ -576,11 +560,9 @@ impl Binder { field_index_map, merge_type, distributed: false, - change_join_order: false, row_id_index, - source_row_id_index, + change_join_order: false, can_try_update_column_only: self.can_try_update_column_only(&matched_clauses), - enable_right_broadcast: false, lazy_columns, lock_guard, }; diff --git a/src/query/sql/src/planner/binder/util.rs b/src/query/sql/src/planner/binder/util.rs index d862efda959cf..552e8093d1e24 100644 --- a/src/query/sql/src/planner/binder/util.rs +++ b/src/query/sql/src/planner/binder/util.rs @@ -62,7 +62,6 @@ impl Binder { } RelOperator::Exchange(_) - | RelOperator::AddRowNumber(_) | RelOperator::Scan(_) | RelOperator::CteScan(_) | RelOperator::DummyTableScan(_) diff --git a/src/query/sql/src/planner/optimizer/decorrelate/subquery_rewriter.rs b/src/query/sql/src/planner/optimizer/decorrelate/subquery_rewriter.rs index 4bf785f7c84fd..231f715c7c76d 100644 --- a/src/query/sql/src/planner/optimizer/decorrelate/subquery_rewriter.rs +++ b/src/query/sql/src/planner/optimizer/decorrelate/subquery_rewriter.rs @@ -186,7 +186,6 @@ impl SubqueryRewriter { | RelOperator::ConstantTableScan(_) | RelOperator::ExpressionScan(_) | RelOperator::CacheScan(_) - | RelOperator::AddRowNumber(_) | RelOperator::Exchange(_) | RelOperator::RecursiveCteScan(_) | RelOperator::MergeInto(_) => Ok(s_expr.clone()), diff --git a/src/query/sql/src/planner/optimizer/distributed/distributed_merge.rs b/src/query/sql/src/planner/optimizer/distributed/distributed_merge.rs index 7c31af79c3ed8..01a52b085b034 100644 --- a/src/query/sql/src/planner/optimizer/distributed/distributed_merge.rs +++ b/src/query/sql/src/planner/optimizer/distributed/distributed_merge.rs @@ -18,91 +18,75 @@ use databend_common_exception::Result; use crate::optimizer::extract::Matcher; use crate::optimizer::SExpr; -use crate::plans::AddRowNumber; -use crate::plans::Exchange::Broadcast; +use crate::plans::Exchange::Hash; use crate::plans::Join; use crate::plans::RelOp; use crate::plans::RelOperator; -pub struct MergeSourceOptimizer { - pub merge_source_matcher: Matcher, +pub struct MergeOptimizer { + pub merge_matcher: Matcher, } -impl MergeSourceOptimizer { +impl MergeOptimizer { pub fn create() -> Self { Self { - merge_source_matcher: Self::merge_source_matcher(), + merge_matcher: Self::merge_matcher(), } } - pub fn optimize(&self, s_expr: &SExpr, source_has_row_id: bool) -> Result { - let left_exchange = s_expr.child(0)?; - assert_eq!(left_exchange.children.len(), 1); - let left_exchange_input = left_exchange.child(0)?; + pub fn optimize(&self, s_expr: &SExpr) -> Result { + let left_exchange_input = s_expr.child(0)?; let right_exchange = s_expr.child(1)?; assert_eq!(right_exchange.children.len(), 1); let right_exchange_input = right_exchange.child(0)?; - // source is build side - let new_join_children = if source_has_row_id { - vec![ - Arc::new(left_exchange_input.clone()), - Arc::new(SExpr::create_unary( - Arc::new(RelOperator::Exchange(Broadcast)), - Arc::new(right_exchange_input.clone()), - )), - ] - } else { - vec![ - Arc::new(left_exchange_input.clone()), - Arc::new(SExpr::create_unary( - Arc::new(RelOperator::Exchange(Broadcast)), - Arc::new(SExpr::create_unary( - Arc::new(RelOperator::AddRowNumber(AddRowNumber)), - Arc::new(right_exchange_input.clone()), - )), - )), - ] - }; - let mut join: Join = s_expr.plan().clone().try_into()?; join.need_hold_hash_table = true; + + let (left_conditions, right_conditions): (Vec<_>, Vec<_>) = join + .equi_conditions + .iter() + .map(|condition| (condition.left.clone(), condition.right.clone())) + .unzip(); + + let new_join_children = vec![ + Arc::new(SExpr::create_unary( + Arc::new(RelOperator::Exchange(Hash(left_conditions))), + Arc::new(left_exchange_input.clone()), + )), + Arc::new(SExpr::create_unary( + Arc::new(RelOperator::Exchange(Hash(right_conditions))), + Arc::new(right_exchange_input.clone()), + )), + ]; let mut join_s_expr = s_expr.replace_plan(Arc::new(RelOperator::Join(join))); join_s_expr = join_s_expr.replace_children(new_join_children); Ok(join_s_expr) } - // for right outer join (source as build) - fn merge_source_matcher() -> Matcher { + fn merge_matcher() -> Matcher { // Input: // Join // / \ // / \ - // Exchange Exchange(Shuffle) - // | | - // * * - // source is build we will get below: + // Exchange + // (Broadcast) + // | | + // * * // Output: // Join // / \ // / \ - // Exchange Exchange(Broadcast) - // (Random) | - // | AddRowNumber - // | | - // * * + // Exchange Exchange + // (Shuffle) (Shuffle) + // | | + // * * Matcher::MatchOp { op_type: RelOp::Join, - children: vec![ - Matcher::MatchOp { - op_type: RelOp::Exchange, - children: vec![Matcher::Leaf], - }, - Matcher::MatchOp { - op_type: RelOp::Exchange, - children: vec![Matcher::Leaf], - }, - ], + children: vec![Matcher::Leaf, Matcher::MatchOp { + op_type: RelOp::Exchange, + children: vec![Matcher::Leaf], + }], } } } diff --git a/src/query/sql/src/planner/optimizer/distributed/mod.rs b/src/query/sql/src/planner/optimizer/distributed/mod.rs index 3ecdb0f2fd5f3..d66256ee6d364 100644 --- a/src/query/sql/src/planner/optimizer/distributed/mod.rs +++ b/src/query/sql/src/planner/optimizer/distributed/mod.rs @@ -18,5 +18,5 @@ mod distributed_merge; mod sort_and_limit; pub use distributed::optimize_distributed_query; -pub use distributed_merge::MergeSourceOptimizer; +pub use distributed_merge::MergeOptimizer; pub use sort_and_limit::SortAndLimitPushDownOptimizer; diff --git a/src/query/sql/src/planner/optimizer/format.rs b/src/query/sql/src/planner/optimizer/format.rs index 686a9a1e87f38..75fb209a0ad9e 100644 --- a/src/query/sql/src/planner/optimizer/format.rs +++ b/src/query/sql/src/planner/optimizer/format.rs @@ -70,7 +70,6 @@ pub fn display_rel_op(rel_op: &RelOperator) -> String { RelOperator::ConstantTableScan(_) => "ConstantTableScan".to_string(), RelOperator::ExpressionScan(_) => "ExpressionScan".to_string(), RelOperator::CacheScan(_) => "CacheScan".to_string(), - RelOperator::AddRowNumber(_) => "AddRowNumber".to_string(), RelOperator::Udf(_) => "Udf".to_string(), RelOperator::RecursiveCteScan(_) => "RecursiveCteScan".to_string(), RelOperator::AsyncFunction(_) => "AsyncFunction".to_string(), diff --git a/src/query/sql/src/planner/optimizer/hyper_dp/dphyp.rs b/src/query/sql/src/planner/optimizer/hyper_dp/dphyp.rs index f312855b40210..c3e84fac214c0 100644 --- a/src/query/sql/src/planner/optimizer/hyper_dp/dphyp.rs +++ b/src/query/sql/src/planner/optimizer/hyper_dp/dphyp.rs @@ -258,7 +258,7 @@ impl DPhpy { self.join_relations.push(JoinRelation::new(&new_s_expr)); Ok((Arc::new(new_s_expr), true)) } - RelOperator::Exchange(_) | RelOperator::AddRowNumber(_) => { + RelOperator::Exchange(_) => { unreachable!() } RelOperator::DummyTableScan(_) diff --git a/src/query/sql/src/planner/optimizer/optimizer.rs b/src/query/sql/src/planner/optimizer/optimizer.rs index 369da7be92cf1..42ea8b49e1837 100644 --- a/src/query/sql/src/planner/optimizer/optimizer.rs +++ b/src/query/sql/src/planner/optimizer/optimizer.rs @@ -24,7 +24,7 @@ use databend_common_exception::Result; use educe::Educe; use log::info; -use super::distributed::MergeSourceOptimizer; +use super::distributed::MergeOptimizer; use super::format::display_memo; use super::Memo; use crate::binder::target_table_position; @@ -459,7 +459,7 @@ async fn optimize_merge_into(mut opt_ctx: OptimizerContext, s_expr: SExpr) -> Re .table_ctx .get_settings() .get_enable_distributed_merge_into()?; - let mut new_columns_set = plan.columns_set.clone(); + let new_columns_set = plan.columns_set.clone(); if join_order_changed && matches!(plan.merge_type, MergeIntoType::FullOperation) && opt_ctx @@ -481,31 +481,17 @@ async fn optimize_merge_into(mut opt_ctx: OptimizerContext, s_expr: SExpr) -> Re } plan.change_join_order = join_order_changed; - if opt_ctx.enable_distributed_optimization { - let merge_source_optimizer = MergeSourceOptimizer::create(); - // Inner join shouldn't add `RowNumber` node. - let mut enable_right_broadcast = false; - - if matches!(join_op.join_type, JoinType::RightAnti | JoinType::Right) - && merge_source_optimizer - .merge_source_matcher - .matches(&join_s_expr) + let distributed = !join_s_expr.has_merge_exchange(); + if opt_ctx.enable_distributed_optimization && distributed { + let merge_optimizer = MergeOptimizer::create(); + // Left join changes to shuffle. + if matches!(join_op.join_type, JoinType::Left | JoinType::LeftAnti) + && merge_optimizer.merge_matcher.matches(&join_s_expr) { - // If source is physical table, use row_id - let source_has_row_id = if let Some(source_row_id_index) = plan.source_row_id_index { - new_columns_set.insert(source_row_id_index); - true - } else { - false - }; - // Todo(xudong): should consider the cost of shuffle and broadcast. - // Current behavior is to always use broadcast join.(source table is usually small) - join_s_expr = merge_source_optimizer.optimize(&join_s_expr, source_has_row_id)?; - enable_right_broadcast = true; - } - let distributed = !join_s_expr.has_merge_exchange(); - plan.distributed = distributed; - plan.enable_right_broadcast = enable_right_broadcast; + join_s_expr = merge_optimizer.optimize(&join_s_expr)?; + }; + + plan.distributed = true; plan.columns_set = new_columns_set; Ok(Plan::MergeInto { schema: plan.schema(), diff --git a/src/query/sql/src/planner/optimizer/rule/rewrite/rule_semi_to_inner_join.rs b/src/query/sql/src/planner/optimizer/rule/rewrite/rule_semi_to_inner_join.rs index 5c027c3d715e9..be7bae6fea6f4 100644 --- a/src/query/sql/src/planner/optimizer/rule/rewrite/rule_semi_to_inner_join.rs +++ b/src/query/sql/src/planner/optimizer/rule/rewrite/rule_semi_to_inner_join.rs @@ -126,7 +126,6 @@ fn find_group_by_keys(child: &SExpr, group_by_keys: &mut HashSet) -> RelOperator::Sort(_) | RelOperator::Limit(_) | RelOperator::Exchange(_) - | RelOperator::AddRowNumber(_) | RelOperator::UnionAll(_) | RelOperator::DummyTableScan(_) | RelOperator::ProjectSet(_) diff --git a/src/query/sql/src/planner/optimizer/s_expr.rs b/src/query/sql/src/planner/optimizer/s_expr.rs index 3ba3debdecd0f..aeacc9a012e84 100644 --- a/src/query/sql/src/planner/optimizer/s_expr.rs +++ b/src/query/sql/src/planner/optimizer/s_expr.rs @@ -319,7 +319,6 @@ impl SExpr { | RelOperator::Sort(_) | RelOperator::DummyTableScan(_) | RelOperator::CteScan(_) - | RelOperator::AddRowNumber(_) | RelOperator::MaterializedCte(_) | RelOperator::ConstantTableScan(_) | RelOperator::ExpressionScan(_) @@ -419,7 +418,6 @@ fn find_subquery(rel_op: &RelOperator) -> bool { | RelOperator::Sort(_) | RelOperator::DummyTableScan(_) | RelOperator::CteScan(_) - | RelOperator::AddRowNumber(_) | RelOperator::MaterializedCte(_) | RelOperator::ConstantTableScan(_) | RelOperator::ExpressionScan(_) diff --git a/src/query/sql/src/planner/plans/add_row_number.rs b/src/query/sql/src/planner/plans/add_row_number.rs deleted file mode 100644 index f2d5d281ffb44..0000000000000 --- a/src/query/sql/src/planner/plans/add_row_number.rs +++ /dev/null @@ -1,66 +0,0 @@ -// Copyright 2021 Datafuse Labs -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -use std::sync::Arc; - -use databend_common_catalog::table_context::TableContext; - -use super::Operator; -use super::RelOp; -use crate::optimizer::PhysicalProperty; -use crate::optimizer::RelExpr; -use crate::optimizer::RelationalProperty; -use crate::optimizer::RequiredProperty; -use crate::optimizer::StatInfo; - -#[derive(Clone, Debug, PartialEq, Eq, Hash)] -pub struct AddRowNumber; - -impl Operator for AddRowNumber { - fn rel_op(&self) -> RelOp { - RelOp::AddRowNumber - } - - fn arity(&self) -> usize { - 1 - } - - fn derive_relational_prop( - &self, - rel_expr: &RelExpr, - ) -> databend_common_exception::Result> { - rel_expr.derive_relational_prop_child(0) - } - - fn derive_physical_prop( - &self, - rel_expr: &RelExpr, - ) -> databend_common_exception::Result { - rel_expr.derive_physical_prop_child(0) - } - - fn derive_stats(&self, rel_expr: &RelExpr) -> databend_common_exception::Result> { - rel_expr.derive_cardinality_child(0) - } - - fn compute_required_prop_child( - &self, - _ctx: Arc, - _rel_expr: &RelExpr, - _child_index: usize, - required: &RequiredProperty, - ) -> databend_common_exception::Result { - Ok(required.clone()) - } -} diff --git a/src/query/sql/src/planner/plans/merge_into.rs b/src/query/sql/src/planner/plans/merge_into.rs index 7b83bc054ea81..b61dc3f9d02c9 100644 --- a/src/query/sql/src/planner/plans/merge_into.rs +++ b/src/query/sql/src/planner/plans/merge_into.rs @@ -76,18 +76,16 @@ pub struct MergeInto { pub field_index_map: HashMap, pub merge_type: MergeIntoType, pub distributed: bool, - pub change_join_order: bool, // when we use target table as build side or insert only, we will remove rowid columns. // also use for split pub row_id_index: IndexType, - pub source_row_id_index: Option, + pub change_join_order: bool, // an optimization: // if it's full_operation/mactehd only and we have only one update without condition here, we shouldn't run // evaluator, we can just do projection to get the right columns.But the limitation is below: // `update *`` or `update set t1.a = t2.a ...`, the right expr on the `=` must be only a column, // we don't support complex expressions. pub can_try_update_column_only: bool, - pub enable_right_broadcast: bool, pub lazy_columns: HashSet, pub lock_guard: Option>, } diff --git a/src/query/sql/src/planner/plans/mod.rs b/src/query/sql/src/planner/plans/mod.rs index 2554cf93c7d87..67cd12221205f 100644 --- a/src/query/sql/src/planner/plans/mod.rs +++ b/src/query/sql/src/planner/plans/mod.rs @@ -12,7 +12,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -mod add_row_number; mod aggregate; mod async_function; mod cache_scan; @@ -57,7 +56,6 @@ mod union_all; mod update; mod window; -pub use add_row_number::AddRowNumber; pub use aggregate::*; pub use async_function::AsyncFunction; pub use cache_scan::*; diff --git a/src/query/sql/src/planner/plans/operator.rs b/src/query/sql/src/planner/plans/operator.rs index 800ace522c08a..8a1e32c211666 100644 --- a/src/query/sql/src/planner/plans/operator.rs +++ b/src/query/sql/src/planner/plans/operator.rs @@ -18,7 +18,6 @@ use databend_common_catalog::table_context::TableContext; use databend_common_exception::ErrorCode; use databend_common_exception::Result; -use super::add_row_number::AddRowNumber; use super::aggregate::Aggregate; use super::dummy_table_scan::DummyTableScan; use super::eval_scalar::EvalScalar; @@ -102,7 +101,6 @@ pub enum RelOp { ConstantTableScan, ExpressionScan, CacheScan, - AddRowNumber, Udf, AsyncFunction, RecursiveCteScan, @@ -124,7 +122,6 @@ pub enum RelOperator { Sort(Sort), Limit(Limit), Exchange(Exchange), - AddRowNumber(AddRowNumber), UnionAll(UnionAll), DummyTableScan(DummyTableScan), Window(Window), @@ -159,7 +156,6 @@ impl Operator for RelOperator { RelOperator::ConstantTableScan(rel_op) => rel_op.rel_op(), RelOperator::ExpressionScan(rel_op) => rel_op.rel_op(), RelOperator::CacheScan(rel_op) => rel_op.rel_op(), - RelOperator::AddRowNumber(rel_op) => rel_op.rel_op(), RelOperator::Udf(rel_op) => rel_op.rel_op(), RelOperator::RecursiveCteScan(rel_op) => rel_op.rel_op(), RelOperator::AsyncFunction(rel_op) => rel_op.rel_op(), @@ -178,7 +174,6 @@ impl Operator for RelOperator { RelOperator::Sort(rel_op) => rel_op.arity(), RelOperator::Limit(rel_op) => rel_op.arity(), RelOperator::Exchange(rel_op) => rel_op.arity(), - RelOperator::AddRowNumber(rel_op) => rel_op.arity(), RelOperator::UnionAll(rel_op) => rel_op.arity(), RelOperator::DummyTableScan(rel_op) => rel_op.arity(), RelOperator::Window(rel_op) => rel_op.arity(), @@ -213,7 +208,6 @@ impl Operator for RelOperator { RelOperator::ConstantTableScan(rel_op) => rel_op.derive_relational_prop(rel_expr), RelOperator::ExpressionScan(rel_op) => rel_op.derive_relational_prop(rel_expr), RelOperator::CacheScan(rel_op) => rel_op.derive_relational_prop(rel_expr), - RelOperator::AddRowNumber(rel_op) => rel_op.derive_relational_prop(rel_expr), RelOperator::Udf(rel_op) => rel_op.derive_relational_prop(rel_expr), RelOperator::RecursiveCteScan(rel_op) => rel_op.derive_relational_prop(rel_expr), RelOperator::AsyncFunction(rel_op) => rel_op.derive_relational_prop(rel_expr), @@ -240,7 +234,6 @@ impl Operator for RelOperator { RelOperator::ConstantTableScan(rel_op) => rel_op.derive_physical_prop(rel_expr), RelOperator::ExpressionScan(rel_op) => rel_op.derive_physical_prop(rel_expr), RelOperator::CacheScan(rel_op) => rel_op.derive_physical_prop(rel_expr), - RelOperator::AddRowNumber(rel_op) => rel_op.derive_physical_prop(rel_expr), RelOperator::Udf(rel_op) => rel_op.derive_physical_prop(rel_expr), RelOperator::RecursiveCteScan(rel_op) => rel_op.derive_physical_prop(rel_expr), RelOperator::AsyncFunction(rel_op) => rel_op.derive_physical_prop(rel_expr), @@ -267,7 +260,6 @@ impl Operator for RelOperator { RelOperator::ConstantTableScan(rel_op) => rel_op.derive_stats(rel_expr), RelOperator::ExpressionScan(rel_op) => rel_op.derive_stats(rel_expr), RelOperator::CacheScan(rel_op) => rel_op.derive_stats(rel_expr), - RelOperator::AddRowNumber(rel_op) => rel_op.derive_stats(rel_expr), RelOperator::Udf(rel_op) => rel_op.derive_stats(rel_expr), RelOperator::RecursiveCteScan(rel_op) => rel_op.derive_stats(rel_expr), RelOperator::AsyncFunction(rel_op) => rel_op.derive_stats(rel_expr), @@ -334,9 +326,6 @@ impl Operator for RelOperator { RelOperator::CacheScan(rel_op) => { rel_op.compute_required_prop_child(ctx, rel_expr, child_index, required) } - RelOperator::AddRowNumber(rel_op) => { - rel_op.compute_required_prop_child(ctx, rel_expr, child_index, required) - } RelOperator::Udf(rel_op) => { rel_op.compute_required_prop_child(ctx, rel_expr, child_index, required) } @@ -386,9 +375,6 @@ impl Operator for RelOperator { RelOperator::Exchange(rel_op) => { rel_op.compute_required_prop_children(ctx, rel_expr, required) } - RelOperator::AddRowNumber(rel_op) => { - rel_op.compute_required_prop_children(ctx, rel_expr, required) - } RelOperator::UnionAll(rel_op) => { rel_op.compute_required_prop_children(ctx, rel_expr, required) } diff --git a/src/query/storages/fuse/src/operations/append.rs b/src/query/storages/fuse/src/operations/append.rs index ac177aadc9ce9..ad65ad0f92248 100644 --- a/src/query/storages/fuse/src/operations/append.rs +++ b/src/query/storages/fuse/src/operations/append.rs @@ -27,7 +27,7 @@ use databend_common_expression::SortColumnDescription; use databend_common_functions::BUILTIN_FUNCTIONS; use databend_common_pipeline_core::processors::ProcessorPtr; use databend_common_pipeline_core::Pipeline; -use databend_common_pipeline_transforms::processors::create_dummy_items; +use databend_common_pipeline_transforms::processors::create_dummy_item; use databend_common_pipeline_transforms::processors::BlockCompactor; use databend_common_pipeline_transforms::processors::BlockCompactorForCopy; use databend_common_pipeline_transforms::processors::TransformCompact; @@ -96,20 +96,12 @@ impl FuseTable { ctx: Arc, pipeline: &mut Pipeline, block_thresholds: BlockThresholds, - specified_mid_len: usize, - specified_last_len: usize, + transform_len: usize, + need_match: bool, ) -> Result { let cluster_stats_gen = self.get_cluster_stats_gen(ctx.clone(), 0, block_thresholds, None)?; - let output_lens = pipeline.output_len(); - let items1 = create_dummy_items( - output_lens - specified_mid_len - specified_last_len, - output_lens, - ); - let items2 = create_dummy_items( - output_lens - specified_mid_len - specified_last_len, - output_lens, - ); + let operators = cluster_stats_gen.operators.clone(); if !operators.is_empty() { let num_input_columns = self.table_info.schema().fields().len(); @@ -122,10 +114,11 @@ impl FuseTable { num_input_columns, )) }, - specified_mid_len, + transform_len, )?; - builder.add_items_prepend(items1); - builder.add_items(create_dummy_items(specified_last_len, specified_last_len)); + if need_match { + builder.add_items_prepend(vec![create_dummy_item()]); + } pipeline.add_pipe(builder.finalize()); } @@ -144,10 +137,11 @@ impl FuseTable { let mut builder = pipeline.try_create_transform_pipeline_builder_with_len( || Ok(TransformSortPartial::new(None, sort_desc.clone())), - specified_mid_len, + transform_len, )?; - builder.add_items_prepend(items2); - builder.add_items(create_dummy_items(specified_last_len, specified_last_len)); + if need_match { + builder.add_items_prepend(vec![create_dummy_item()]); + } pipeline.add_pipe(builder.finalize()); } Ok(cluster_stats_gen) diff --git a/src/query/storages/fuse/src/operations/merge_into/mod.rs b/src/query/storages/fuse/src/operations/merge_into/mod.rs index 6a80257ed28ef..e53a7d1a3a7e6 100644 --- a/src/query/storages/fuse/src/operations/merge_into/mod.rs +++ b/src/query/storages/fuse/src/operations/merge_into/mod.rs @@ -22,7 +22,4 @@ pub use processors::MergeIntoSplitProcessor; pub use processors::MixRowIdKindAndLog; pub use processors::RowNumberAndLogSplitProcessor; pub use processors::SourceFullMatched; -pub use processors::TransformAddRowNumberColumnProcessor; -pub use processors::TransformDistributedMergeIntoBlockDeserialize; -pub use processors::TransformDistributedMergeIntoBlockSerialize; pub use processors::UnMatchedExprs; diff --git a/src/query/storages/fuse/src/operations/merge_into/processors/mod.rs b/src/query/storages/fuse/src/operations/merge_into/processors/mod.rs index c400e14eae31b..70cfd2b6000e5 100644 --- a/src/query/storages/fuse/src/operations/merge_into/processors/mod.rs +++ b/src/query/storages/fuse/src/operations/merge_into/processors/mod.rs @@ -12,17 +12,12 @@ // See the License for the specific language governing permissions and // limitations under the License. -mod processor_distributed_merge_into_block_deserialize; -mod processor_distributed_merge_into_block_serialize; mod processor_merge_into_matched_and_split; mod processor_merge_into_not_matched; mod processor_merge_into_split; mod processor_merge_into_split_row_number_and_log; -mod transform_add_rownumber_column; mod transform_matched_mutation_aggregator; -pub use processor_distributed_merge_into_block_deserialize::TransformDistributedMergeIntoBlockDeserialize; -pub use processor_distributed_merge_into_block_serialize::TransformDistributedMergeIntoBlockSerialize; pub use processor_merge_into_matched_and_split::MatchedSplitProcessor; pub use processor_merge_into_matched_and_split::MixRowIdKindAndLog; pub(crate) use processor_merge_into_matched_and_split::RowIdKind; @@ -31,4 +26,3 @@ pub use processor_merge_into_not_matched::MergeIntoNotMatchedProcessor; pub use processor_merge_into_not_matched::UnMatchedExprs; pub use processor_merge_into_split::MergeIntoSplitProcessor; pub use processor_merge_into_split_row_number_and_log::RowNumberAndLogSplitProcessor; -pub use transform_add_rownumber_column::TransformAddRowNumberColumnProcessor; diff --git a/src/query/storages/fuse/src/operations/merge_into/processors/processor_distributed_merge_into_block_deserialize.rs b/src/query/storages/fuse/src/operations/merge_into/processors/processor_distributed_merge_into_block_deserialize.rs deleted file mode 100644 index 184223ae7e1fc..0000000000000 --- a/src/query/storages/fuse/src/operations/merge_into/processors/processor_distributed_merge_into_block_deserialize.rs +++ /dev/null @@ -1,90 +0,0 @@ -// Copyright 2021 Datafuse Labs -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -use std::sync::Arc; - -use databend_common_exception::ErrorCode; -use databend_common_exception::Result; -use databend_common_expression::BlockMetaInfoDowncast; -use databend_common_expression::DataBlock; -use databend_common_pipeline_core::processors::InputPort; -use databend_common_pipeline_core::processors::OutputPort; -use databend_common_pipeline_core::processors::ProcessorPtr; -use databend_common_pipeline_core::Pipe; -use databend_common_pipeline_core::PipeItem; -use databend_common_pipeline_transforms::processors::Transform; -use databend_common_pipeline_transforms::processors::Transformer; - -use super::processor_merge_into_matched_and_split::MixRowIdKindAndLog; -use super::RowIdKind; - -// It will receive MutationLogs Or RowIds. -// But for MutationLogs, it's a empty block -// we will add a fake BlockEntry to make it consistent with -// RowIds, because arrow-flight requires this. -pub struct TransformDistributedMergeIntoBlockDeserialize; - -/// this processor will be used in the future for merge into based on shuffle hash join. -impl TransformDistributedMergeIntoBlockDeserialize { - pub fn create(input: Arc, output: Arc) -> ProcessorPtr { - ProcessorPtr::create(Transformer::create( - input, - output, - TransformDistributedMergeIntoBlockDeserialize {}, - )) - } - - fn create_distributed_merge_into_transform_item() -> PipeItem { - let input = InputPort::create(); - let output = OutputPort::create(); - PipeItem::create( - TransformDistributedMergeIntoBlockDeserialize::create(input.clone(), output.clone()), - vec![input], - vec![output], - ) - } - - pub fn into_pipe() -> Pipe { - let pipe_item = Self::create_distributed_merge_into_transform_item(); - Pipe::create(1, 1, vec![pipe_item]) - } -} - -#[async_trait::async_trait] -impl Transform for TransformDistributedMergeIntoBlockDeserialize { - const NAME: &'static str = "TransformDistributedMergeIntoBlockDeserialize"; - - fn transform(&mut self, data: DataBlock) -> Result { - let mix_kind = MixRowIdKindAndLog::downcast_ref_from(data.get_meta().unwrap()).unwrap(); - match mix_kind.kind { - 0 => Ok(DataBlock::new_with_meta( - data.columns().to_vec(), - data.num_rows(), - Some(Box::new(mix_kind.log.clone().unwrap())), - )), - - 1 => Ok(DataBlock::new_with_meta( - data.columns().to_vec(), - data.num_rows(), - Some(Box::new(RowIdKind::Update)), - )), - 2 => Ok(DataBlock::new_with_meta( - data.columns().to_vec(), - data.num_rows(), - Some(Box::new(RowIdKind::Delete)), - )), - _ => Err(ErrorCode::BadBytes("get error MixRowIdKindAndLog kind")), - } - } -} diff --git a/src/query/storages/fuse/src/operations/merge_into/processors/processor_distributed_merge_into_block_serialize.rs b/src/query/storages/fuse/src/operations/merge_into/processors/processor_distributed_merge_into_block_serialize.rs deleted file mode 100644 index c11901a0b1f9f..0000000000000 --- a/src/query/storages/fuse/src/operations/merge_into/processors/processor_distributed_merge_into_block_serialize.rs +++ /dev/null @@ -1,69 +0,0 @@ -// Copyright 2021 Datafuse Labs -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -use databend_common_exception::Result; -use databend_common_expression::types::DataType; -use databend_common_expression::types::NumberDataType::UInt64; -use databend_common_expression::types::NumberType; -use databend_common_expression::BlockEntry; -use databend_common_expression::BlockMetaInfoDowncast; -use databend_common_expression::DataBlock; -use databend_common_expression::Value; -use databend_common_pipeline_transforms::processors::Transform; - -use super::processor_merge_into_matched_and_split::MixRowIdKindAndLog; -use super::RowIdKind; -use crate::operations::common::MutationLogs; - -// It will receive MutationLogs Or RowIds. -// But for MutationLogs, it's a empty block -// we will add a fake BlockEntry to make it consistent with -// RowIds, because arrow-flight requires this. -pub struct TransformDistributedMergeIntoBlockSerialize; - -#[async_trait::async_trait] -impl Transform for TransformDistributedMergeIntoBlockSerialize { - const NAME: &'static str = "TransformDistributedMergeIntoBlockSerialize"; - - fn transform(&mut self, data: DataBlock) -> Result { - // 1. MutationLogs - if data.is_empty() { - let scalar_value = Value::>::Scalar(0); - let entry = BlockEntry::new(DataType::Number(UInt64), scalar_value.upcast()); - let log = MutationLogs::try_from(data)?; - Ok(DataBlock::new_with_meta( - vec![entry], - 1, - Some(Box::new(MixRowIdKindAndLog { - log: Some(log), - kind: 0, - })), - )) - } else { - // RowIdKind - let row_id_kind = RowIdKind::downcast_ref_from(data.get_meta().unwrap()).unwrap(); - Ok(DataBlock::new_with_meta( - data.columns().to_vec(), - data.num_rows(), - Some(Box::new(MixRowIdKindAndLog { - log: None, - kind: match row_id_kind { - RowIdKind::Update => 1, - RowIdKind::Delete => 2, - }, - })), - )) - } - } -} diff --git a/src/query/storages/fuse/src/operations/merge_into/processors/transform_add_rownumber_column.rs b/src/query/storages/fuse/src/operations/merge_into/processors/transform_add_rownumber_column.rs deleted file mode 100644 index 2d39d4e617d4e..0000000000000 --- a/src/query/storages/fuse/src/operations/merge_into/processors/transform_add_rownumber_column.rs +++ /dev/null @@ -1,94 +0,0 @@ -// Copyright 2021 Datafuse Labs -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -use std::sync::atomic::AtomicU64; -use std::sync::atomic::Ordering; -use std::sync::Arc; - -use databend_common_exception::Result; -use databend_common_expression::types::DataType; -use databend_common_expression::types::NumberDataType; -use databend_common_expression::types::UInt64Type; -use databend_common_expression::BlockEntry; -use databend_common_expression::DataBlock; -use databend_common_expression::FromData; -use databend_common_expression::Value; -use databend_common_metrics::storage::*; -use databend_common_pipeline_core::processors::InputPort; -use databend_common_pipeline_core::processors::OutputPort; -use databend_common_pipeline_core::processors::ProcessorPtr; -use databend_common_pipeline_transforms::processors::Transform; -use databend_common_pipeline_transforms::processors::Transformer; - -const PREFIX_OFFSET: usize = 48; - -pub struct TransformAddRowNumberColumnProcessor { - // node_id - prefix: u64, - // current row number, row_number shouldn't be overflow 48bits - row_number: Arc, -} - -impl TransformAddRowNumberColumnProcessor { - pub fn new(node_id: u16, row_number: Arc) -> Self { - TransformAddRowNumberColumnProcessor { - prefix: (node_id as u64) << PREFIX_OFFSET, - row_number, - } - } - pub fn create( - input: Arc, - output: Arc, - node_id: u16, - row_number: Arc, - ) -> Result { - Ok(ProcessorPtr::create(Transformer::create( - input, - output, - TransformAddRowNumberColumnProcessor { - prefix: (node_id as u64) << PREFIX_OFFSET, - row_number, - }, - ))) - } -} - -impl TransformAddRowNumberColumnProcessor { - fn generate_row_number(&mut self, num_rows: u64) -> u64 { - let row_number = self.row_number.fetch_add(num_rows, Ordering::SeqCst); - self.prefix | row_number - } -} - -#[async_trait::async_trait] -impl Transform for TransformAddRowNumberColumnProcessor { - const NAME: &'static str = "TransformAddRowNumberColumnProcessor"; - fn transform(&mut self, data: DataBlock) -> Result { - let num_rows = data.num_rows() as u64; - let row_number = self.generate_row_number(num_rows); - let mut row_numbers = Vec::with_capacity(data.num_rows()); - for number in row_number..row_number + num_rows { - row_numbers.push(number); - } - merge_into_distributed_generate_row_numbers(row_numbers.len() as u32); - let mut data_block = data; - let row_number_entry = BlockEntry::new( - DataType::Number(NumberDataType::UInt64), - Value::Column(UInt64Type::from_data(row_numbers)), - ); - - data_block.add_column(row_number_entry); - Ok(data_block) - } -} diff --git a/tests/sqllogictests/suites/mode/cluster/merge_into_non_equal_distributed.test b/tests/sqllogictests/suites/mode/cluster/merge_into_non_equal_distributed.test index 21e23af9c6826..7f09d27ba0172 100644 --- a/tests/sqllogictests/suites/mode/cluster/merge_into_non_equal_distributed.test +++ b/tests/sqllogictests/suites/mode/cluster/merge_into_non_equal_distributed.test @@ -57,37 +57,37 @@ query T explain merge into t1 using (select * from t2) as t on t1.a > t.a when matched then update * when not matched then insert *; ---- CommitSink -└── Exchange - ├── output columns: [#_row_number] - ├── exchange type: Merge - └── MergeInto - ├── target table: [catalog: default] [database: default] [table: t1] - ├── matched update: [condition: None, update set a = if(CAST(_predicate (#18446744073709551615) AS Boolean NULL), a (#0), t1.a (#1))] - ├── unmatched insert: [condition: None, insert into (a) values(a (#0))] - └── RowFetch - ├── output columns: [t1.a (#1), t1._row_id (#2), t2.a (#0), #_row_number] - ├── columns to fetch: [] - └── HashJoin - ├── output columns: [t1.a (#1), t1._row_id (#2), t2.a (#0), #_row_number] - ├── join type: RIGHT OUTER - ├── build keys: [] - ├── probe keys: [] - ├── filters: [t1.a (#1) > t.a (#0)] - ├── estimated rows: 15.00 - ├── Exchange(Build) - │ ├── output columns: [t2.a (#0), #_row_number] - │ ├── exchange type: Broadcast - │ └── TableScan - │ ├── table: default.default.t2 - │ ├── output columns: [a (#0)] - │ ├── read rows: 1 - │ ├── read size: < 1 KiB - │ ├── partitions total: 1 - │ ├── partitions scanned: 1 - │ ├── pruning stats: [segments: , blocks: ] - │ ├── push downs: [filters: [], limit: NONE] - │ └── estimated rows: 1.00 - └── TableScan(Probe) +└── MergeInto + ├── target table: [catalog: default] [database: default] [table: t1] + ├── matched update: [condition: None, update set a = if(CAST(_predicate (#18446744073709551615) AS Boolean NULL), a (#0), t1.a (#1))] + ├── unmatched insert: [condition: None, insert into (a) values(a (#0))] + └── RowFetch + ├── output columns: [t1.a (#1), t1._row_id (#2), t2.a (#0)] + ├── columns to fetch: [] + └── HashJoin + ├── output columns: [t1.a (#1), t1._row_id (#2), t2.a (#0)] + ├── join type: RIGHT OUTER + ├── build keys: [] + ├── probe keys: [] + ├── filters: [t1.a (#1) > t.a (#0)] + ├── estimated rows: 15.00 + ├── Exchange(Build) + │ ├── output columns: [t2.a (#0)] + │ ├── exchange type: Merge + │ └── TableScan + │ ├── table: default.default.t2 + │ ├── output columns: [a (#0)] + │ ├── read rows: 1 + │ ├── read size: < 1 KiB + │ ├── partitions total: 1 + │ ├── partitions scanned: 1 + │ ├── pruning stats: [segments: , blocks: ] + │ ├── push downs: [filters: [], limit: NONE] + │ └── estimated rows: 1.00 + └── Exchange(Probe) + ├── output columns: [t1.a (#1), t1._row_id (#2)] + ├── exchange type: Merge + └── TableScan ├── table: default.default.t1 ├── output columns: [a (#1), _row_id (#2)] ├── read rows: 15 @@ -103,39 +103,39 @@ query T explain merge into t1 using t2 on t1.a < t2.a when matched then update * when not matched then insert *; ---- CommitSink -└── Exchange - ├── output columns: [#_row_id] - ├── exchange type: Merge - └── MergeInto - ├── target table: [catalog: default] [database: default] [table: t1] - ├── matched update: [condition: None, update set a = if(CAST(_predicate (#18446744073709551615) AS Boolean NULL), a (#0), t1.a (#2))] - ├── unmatched insert: [condition: None, insert into (a) values(a (#0))] - └── RowFetch - ├── output columns: [t1.a (#2), t1._row_id (#3), t2.a (#0), t2._row_id (#1)] - ├── columns to fetch: [] - └── HashJoin - ├── output columns: [t1.a (#2), t1._row_id (#3), t2.a (#0), t2._row_id (#1)] - ├── join type: RIGHT OUTER - ├── build keys: [] - ├── probe keys: [] - ├── filters: [t1.a (#2) < t2.a (#0)] - ├── estimated rows: 15.00 - ├── Exchange(Build) - │ ├── output columns: [t2.a (#0), t2._row_id (#1)] - │ ├── exchange type: Broadcast - │ └── TableScan - │ ├── table: default.default.t2 - │ ├── output columns: [a (#0), _row_id (#1)] - │ ├── read rows: 1 - │ ├── read size: < 1 KiB - │ ├── partitions total: 1 - │ ├── partitions scanned: 1 - │ ├── pruning stats: [segments: , blocks: ] - │ ├── push downs: [filters: [], limit: NONE] - │ └── estimated rows: 1.00 - └── TableScan(Probe) +└── MergeInto + ├── target table: [catalog: default] [database: default] [table: t1] + ├── matched update: [condition: None, update set a = if(CAST(_predicate (#18446744073709551615) AS Boolean NULL), a (#0), t1.a (#1))] + ├── unmatched insert: [condition: None, insert into (a) values(a (#0))] + └── RowFetch + ├── output columns: [t1.a (#1), t1._row_id (#2), t2.a (#0)] + ├── columns to fetch: [] + └── HashJoin + ├── output columns: [t1.a (#1), t1._row_id (#2), t2.a (#0)] + ├── join type: RIGHT OUTER + ├── build keys: [] + ├── probe keys: [] + ├── filters: [t1.a (#1) < t2.a (#0)] + ├── estimated rows: 15.00 + ├── Exchange(Build) + │ ├── output columns: [t2.a (#0)] + │ ├── exchange type: Merge + │ └── TableScan + │ ├── table: default.default.t2 + │ ├── output columns: [a (#0)] + │ ├── read rows: 1 + │ ├── read size: < 1 KiB + │ ├── partitions total: 1 + │ ├── partitions scanned: 1 + │ ├── pruning stats: [segments: , blocks: ] + │ ├── push downs: [filters: [], limit: NONE] + │ └── estimated rows: 1.00 + └── Exchange(Probe) + ├── output columns: [t1.a (#1), t1._row_id (#2)] + ├── exchange type: Merge + └── TableScan ├── table: default.default.t1 - ├── output columns: [a (#2), _row_id (#3)] + ├── output columns: [a (#1), _row_id (#2)] ├── read rows: 15 ├── read size: < 1 KiB ├── partitions total: 3 @@ -204,44 +204,50 @@ explain merge into t1 using (select $1 as a from @ss) as t2 on t1.a = t2.a when ---- CommitSink └── Exchange - ├── output columns: [#_row_number] + ├── output columns: [] ├── exchange type: Merge └── MergeInto ├── target table: [catalog: default] [database: default] [table: t1] ├── matched update: [condition: None, update set a = if(CAST(_predicate (#18446744073709551615) AS Boolean NULL), CAST(a (#0) AS Int32 NULL), t1.a (#1))] ├── unmatched insert: [condition: None, insert into (a) values(CAST(a (#0) AS Int32 NULL))] └── RowFetch - ├── output columns: [t1.a (#1), t1._row_id (#2), stage._$1 (#0), #_row_number] + ├── output columns: [t1.a (#1), t1._row_id (#2), stage._$1 (#0)] ├── columns to fetch: [] - └── HashJoin - ├── output columns: [t1.a (#1), t1._row_id (#2), stage._$1 (#0), #_row_number] - ├── join type: RIGHT OUTER - ├── build keys: [CAST(t2.a (#0) AS Int64 NULL)] - ├── probe keys: [CAST(t1.a (#1) AS Int64 NULL)] - ├── filters: [] - ├── estimated rows: 0.00 - ├── Exchange(Build) - │ ├── output columns: [stage._$1 (#0), #_row_number] - │ ├── exchange type: Broadcast - │ └── TableScan - │ ├── table: default.system.stage - │ ├── output columns: [_$1 (#0)] - │ ├── read rows: 6 - │ ├── read size: < 1 KiB - │ ├── partitions total: 1 - │ ├── partitions scanned: 1 - │ ├── push downs: [filters: [], limit: NONE] - │ └── estimated rows: 0.00 - └── TableScan(Probe) - ├── table: default.default.t1 - ├── output columns: [a (#1), _row_id (#2)] - ├── read rows: 2 - ├── read size: < 1 KiB - ├── partitions total: 1 - ├── partitions scanned: 1 - ├── pruning stats: [segments: , blocks: ] - ├── push downs: [filters: [], limit: NONE] - └── estimated rows: 2.00 + └── Exchange + ├── output columns: [t1.a (#1), t1._row_id (#2), stage._$1 (#0)] + ├── exchange type: Hash(bit_and(bit_shift_right(t1._row_id (#2), CAST(31 AS UInt64 NULL)), CAST(2047 AS UInt64 NULL))) + └── HashJoin + ├── output columns: [t1.a (#1), t1._row_id (#2), stage._$1 (#0)] + ├── join type: RIGHT OUTER + ├── build keys: [CAST(t2.a (#0) AS Int64 NULL)] + ├── probe keys: [CAST(t1.a (#1) AS Int64 NULL)] + ├── filters: [] + ├── estimated rows: 0.00 + ├── Exchange(Build) + │ ├── output columns: [stage._$1 (#0)] + │ ├── exchange type: Hash(CAST(t2.a (#0) AS Int64 NULL)) + │ └── TableScan + │ ├── table: default.system.stage + │ ├── output columns: [_$1 (#0)] + │ ├── read rows: 6 + │ ├── read size: < 1 KiB + │ ├── partitions total: 1 + │ ├── partitions scanned: 1 + │ ├── push downs: [filters: [], limit: NONE] + │ └── estimated rows: 0.00 + └── Exchange(Probe) + ├── output columns: [t1.a (#1), t1._row_id (#2)] + ├── exchange type: Hash(CAST(t1.a (#1) AS Int64 NULL)) + └── TableScan + ├── table: default.default.t1 + ├── output columns: [a (#1), _row_id (#2)] + ├── read rows: 2 + ├── read size: < 1 KiB + ├── partitions total: 1 + ├── partitions scanned: 1 + ├── pruning stats: [segments: , blocks: ] + ├── push downs: [filters: [], limit: NONE] + └── estimated rows: 2.00 query TT merge into t1 using (select $1 as a from @ss) as t2 on t1.a = t2.a when matched then update * when not matched then insert *; From 0b7791f137aa00c92ac846a21a432c4ae417e0ac Mon Sep 17 00:00:00 2001 From: Yang Xiufeng Date: Wed, 10 Jul 2024 14:37:58 +0800 Subject: [PATCH 17/21] chore: orc and parquet use option missing_field_as. (#16007) * chore: orc use option missing_field_as. * chore: parquet use option missing_field_as. * fix header. * fix unused deps. * fix taplo. * fix. --- Cargo.lock | 20 ++- Cargo.toml | 2 + src/query/storages/common/stage/Cargo.toml | 21 +++ src/query/storages/common/stage/src/lib.rs | 18 +++ .../common/stage/src/read/columnar/mod.rs | 17 +++ .../stage/src/read/columnar/projection.rs | 120 ++++++++++++++++++ .../storages/common/stage/src/read/mod.rs | 19 +++ .../stage/src/read/single_file_partition.rs} | 16 +-- src/query/storages/orc/Cargo.toml | 2 +- .../src/copy_into_table/processors/source.rs | 4 +- .../orc/src/copy_into_table/projection.rs | 90 ++----------- .../storages/orc/src/copy_into_table/table.rs | 16 ++- src/query/storages/orc/src/lib.rs | 1 - .../storages/orc/src/processors/source.rs | 4 +- src/query/storages/orc/src/read_partition.rs | 5 +- src/query/storages/parquet/Cargo.toml | 2 +- .../src/parquet_rs/copy_into_table/reader.rs | 82 ++---------- .../src/parquet_rs/copy_into_table/table.rs | 7 + src/query/storages/stage/Cargo.toml | 2 + src/query/storages/stage/src/read/mod.rs | 1 - .../stage/src/read/one_file_partition.rs | 58 --------- .../src/read/row_based/processors/reader.rs | 6 +- src/query/storages/stage/src/stage_table.rs | 4 +- .../formats/orc/copy_orc_missing_field.test | 11 +- 24 files changed, 294 insertions(+), 234 deletions(-) create mode 100644 src/query/storages/common/stage/Cargo.toml create mode 100644 src/query/storages/common/stage/src/lib.rs create mode 100644 src/query/storages/common/stage/src/read/columnar/mod.rs create mode 100644 src/query/storages/common/stage/src/read/columnar/projection.rs create mode 100644 src/query/storages/common/stage/src/read/mod.rs rename src/query/storages/{orc/src/orc_file_partition.rs => common/stage/src/read/single_file_partition.rs} (80%) delete mode 100644 src/query/storages/stage/src/read/one_file_partition.rs diff --git a/Cargo.lock b/Cargo.lock index 352e39f8775fa..f0ed0edb3e031 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4439,7 +4439,6 @@ name = "databend-common-storages-orc" version = "0.1.0" dependencies = [ "arrow-array", - "arrow-cast", "arrow-schema", "async-backtrace", "async-trait-fn", @@ -4456,6 +4455,7 @@ dependencies = [ "databend-common-pipeline-sources", "databend-common-pipeline-transforms", "databend-common-storage", + "databend-storages-common-stage", "databend-storages-common-table-meta", "futures-util", "log", @@ -4472,7 +4472,6 @@ version = "0.1.0" dependencies = [ "arrow-array", "arrow-buffer", - "arrow-cast", "arrow-schema", "async-backtrace", "async-trait-fn", @@ -4491,6 +4490,7 @@ dependencies = [ "databend-common-sql", "databend-common-storage", "databend-storages-common-pruner", + "databend-storages-common-stage", "databend-storages-common-table-meta", "ethnum", "futures", @@ -4596,6 +4596,7 @@ dependencies = [ "databend-common-storage", "databend-common-storages-orc", "databend-common-storages-parquet", + "databend-storages-common-stage", "databend-storages-common-table-meta", "enum-as-inner 0.6.0", "futures", @@ -5403,6 +5404,21 @@ dependencies = [ "typetag", ] +[[package]] +name = "databend-storages-common-stage" +version = "0.1.0" +dependencies = [ + "arrow-cast", + "arrow-schema", + "databend-common-catalog", + "databend-common-exception", + "databend-common-expression", + "databend-common-functions", + "databend-common-meta-app", + "serde", + "typetag", +] + [[package]] name = "databend-storages-common-table-meta" version = "0.1.0" diff --git a/Cargo.toml b/Cargo.toml index 43be6996548bb..e92613cc32c87 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -52,6 +52,7 @@ members = [ "src/query/storages/common/index", "src/query/storages/common/io", "src/query/storages/common/pruner", + "src/query/storages/common/stage", "src/query/storages/common/txn", "src/query/storages/common/table_meta", "src/query/storages/delta", @@ -198,6 +199,7 @@ databend-storages-common-cache-manager = { path = "src/query/storages/common/cac databend-storages-common-index = { path = "src/query/storages/common/index" } databend-storages-common-io = { path = "src/query/storages/common/io" } databend-storages-common-pruner = { path = "src/query/storages/common/pruner" } +databend-storages-common-stage = { path = "src/query/storages/common/stage" } databend-storages-common-table-meta = { path = "src/query/storages/common/table_meta" } databend-storages-common-txn = { path = "src/query/storages/common/txn" } diff --git a/src/query/storages/common/stage/Cargo.toml b/src/query/storages/common/stage/Cargo.toml new file mode 100644 index 0000000000000..6e23da1fcb610 --- /dev/null +++ b/src/query/storages/common/stage/Cargo.toml @@ -0,0 +1,21 @@ +[package] +name = "databend-storages-common-stage" +version = { workspace = true } +authors = { workspace = true } +license = { workspace = true } +publish = { workspace = true } +edition = { workspace = true } + +[dependencies] +arrow-cast = { workspace = true } +arrow-schema = { workspace = true, features = ["serde"] } +databend-common-catalog = { workspace = true } +databend-common-exception = { workspace = true } +databend-common-expression = { workspace = true } +databend-common-functions = { workspace = true } +databend-common-meta-app = { workspace = true } +serde = { workspace = true } +typetag = { workspace = true } + +[lints] +workspace = true diff --git a/src/query/storages/common/stage/src/lib.rs b/src/query/storages/common/stage/src/lib.rs new file mode 100644 index 0000000000000..ba1776e7c859e --- /dev/null +++ b/src/query/storages/common/stage/src/lib.rs @@ -0,0 +1,18 @@ +// Copyright 2021 Datafuse Labs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +mod read; + +pub use read::SingleFilePartition; +pub use read::*; diff --git a/src/query/storages/common/stage/src/read/columnar/mod.rs b/src/query/storages/common/stage/src/read/columnar/mod.rs new file mode 100644 index 0000000000000..fe553923689a6 --- /dev/null +++ b/src/query/storages/common/stage/src/read/columnar/mod.rs @@ -0,0 +1,17 @@ +// Copyright 2021 Datafuse Labs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +mod projection; + +pub use projection::project_columnar; diff --git a/src/query/storages/common/stage/src/read/columnar/projection.rs b/src/query/storages/common/stage/src/read/columnar/projection.rs new file mode 100644 index 0000000000000..385587ecfc5cc --- /dev/null +++ b/src/query/storages/common/stage/src/read/columnar/projection.rs @@ -0,0 +1,120 @@ +// Copyright 2021 Datafuse Labs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use arrow_cast::can_cast_types; +use arrow_schema::Field; +use databend_common_exception::ErrorCode; +use databend_common_expression::type_check::check_cast; +use databend_common_expression::Expr; +use databend_common_expression::RemoteExpr; +use databend_common_expression::Scalar; +use databend_common_expression::TableSchemaRef; +use databend_common_functions::BUILTIN_FUNCTIONS; +use databend_common_meta_app::principal::NullAs; + +/// 1. try auto cast +/// 2. fill missing value according to NullAs +/// used for orc and parquet now +pub fn project_columnar( + input_schema: &TableSchemaRef, + output_schema: &TableSchemaRef, + null_as: &NullAs, + default_values: &Option>, + location: &str, +) -> databend_common_exception::Result<(Vec, Vec)> { + let mut pushdown_columns = vec![]; + let mut output_projection = vec![]; + + for (i, to_field) in output_schema.fields().iter().enumerate() { + let field_name = to_field.name(); + let expr = match input_schema + .fields() + .iter() + .position(|f| f.name() == field_name) + { + Some(pos) => { + pushdown_columns.push(pos); + let from_field = input_schema.field(pos); + let expr = Expr::ColumnRef { + span: None, + id: pos, + data_type: from_field.data_type().into(), + display_name: from_field.name().clone(), + }; + + // find a better way to do check cast + if from_field.data_type == to_field.data_type { + expr + } else if can_cast_types( + Field::from(from_field).data_type(), + Field::from(to_field).data_type(), + ) { + check_cast( + None, + false, + expr, + &to_field.data_type().into(), + &BUILTIN_FUNCTIONS, + )? + } else { + return Err(ErrorCode::BadDataValueType(format!( + "fail to load file {}: Cannot cast column {} from {:?} to {:?}", + location, + field_name, + from_field.data_type(), + to_field.data_type() + ))); + } + } + None => { + match null_as { + // default + NullAs::Error => { + return Err(ErrorCode::BadDataValueType(format!( + "file {} missing column `{}`", + location, field_name, + ))); + } + NullAs::Null => { + if to_field.is_nullable() { + Expr::Constant { + span: None, + data_type: to_field.data_type().into(), + scalar: Scalar::Null, + } + } else { + return Err(ErrorCode::BadDataValueType(format!( + "{} missing column `{}`", + location, field_name, + ))); + } + } + NullAs::FieldDefault => { + let default_values = &default_values.as_deref().expect( + "default_values should not be none when miss_field_as=FIELD_DEFAULT", + ); + default_values[i].as_expr(&BUILTIN_FUNCTIONS) + } + } + } + }; + output_projection.push(expr); + } + if pushdown_columns.is_empty() { + return Err(ErrorCode::BadBytes(format!( + "not column name match in file {location}", + ))); + } + Ok((output_projection, pushdown_columns)) +} diff --git a/src/query/storages/common/stage/src/read/mod.rs b/src/query/storages/common/stage/src/read/mod.rs new file mode 100644 index 0000000000000..212e7303726e5 --- /dev/null +++ b/src/query/storages/common/stage/src/read/mod.rs @@ -0,0 +1,19 @@ +// Copyright 2021 Datafuse Labs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +mod columnar; +mod single_file_partition; + +pub use columnar::*; +pub use single_file_partition::SingleFilePartition; diff --git a/src/query/storages/orc/src/orc_file_partition.rs b/src/query/storages/common/stage/src/read/single_file_partition.rs similarity index 80% rename from src/query/storages/orc/src/orc_file_partition.rs rename to src/query/storages/common/stage/src/read/single_file_partition.rs index 94e26db2cd50d..4d71086d94ee5 100644 --- a/src/query/storages/orc/src/orc_file_partition.rs +++ b/src/query/storages/common/stage/src/read/single_file_partition.rs @@ -23,20 +23,20 @@ use databend_common_exception::ErrorCode; use databend_common_exception::Result; #[derive(serde::Serialize, serde::Deserialize, Clone, Eq, PartialEq)] -pub struct OrcFilePartition { +pub struct SingleFilePartition { pub path: String, pub size: usize, } -#[typetag::serde(name = "orc_part")] -impl PartInfo for OrcFilePartition { +#[typetag::serde(name = "single_file_part")] +impl PartInfo for SingleFilePartition { fn as_any(&self) -> &dyn Any { self } fn equals(&self, info: &Box) -> bool { info.as_any() - .downcast_ref::() + .downcast_ref::() .is_some_and(|other| self == other) } @@ -47,12 +47,12 @@ impl PartInfo for OrcFilePartition { } } -impl OrcFilePartition { - pub fn from_part(info: &PartInfoPtr) -> Result<&OrcFilePartition> { +impl SingleFilePartition { + pub fn from_part(info: &PartInfoPtr) -> Result<&SingleFilePartition> { info.as_any() - .downcast_ref::() + .downcast_ref::() .ok_or_else(|| { - ErrorCode::Internal("Cannot downcast from PartInfo to OrcFilePartition.") + ErrorCode::Internal("Cannot downcast from PartInfo to SingleFilePartition.") }) } } diff --git a/src/query/storages/orc/Cargo.toml b/src/query/storages/orc/Cargo.toml index ca7d4f12f00aa..3f79855c9f3a5 100644 --- a/src/query/storages/orc/Cargo.toml +++ b/src/query/storages/orc/Cargo.toml @@ -21,10 +21,10 @@ databend-common-pipeline-core = { workspace = true } databend-common-pipeline-sources = { workspace = true } databend-common-pipeline-transforms = { workspace = true } databend-common-storage = { workspace = true } +databend-storages-common-stage = { workspace = true } databend-storages-common-table-meta = { workspace = true } arrow-array = { workspace = true } -arrow-cast = { workspace = true } arrow-schema = { workspace = true, features = ["serde"] } async-backtrace = { workspace = true } async-trait = { workspace = true } diff --git a/src/query/storages/orc/src/copy_into_table/processors/source.rs b/src/query/storages/orc/src/copy_into_table/processors/source.rs index 39b9875a343c5..761c098e58c11 100644 --- a/src/query/storages/orc/src/copy_into_table/processors/source.rs +++ b/src/query/storages/orc/src/copy_into_table/processors/source.rs @@ -28,13 +28,13 @@ use databend_common_pipeline_core::processors::OutputPort; use databend_common_pipeline_core::processors::ProcessorPtr; use databend_common_pipeline_sources::AsyncSource; use databend_common_pipeline_sources::AsyncSourcer; +use databend_storages_common_stage::SingleFilePartition; use opendal::Operator; use orc_rust::async_arrow_reader::StripeFactory; use orc_rust::ArrowReaderBuilder; use crate::chunk_reader_impl::OrcChunkReader; use crate::hashable_schema::HashableSchema; -use crate::orc_file_partition::OrcFilePartition; use crate::strip::StripeInMemory; use crate::utils::map_orc_error; @@ -71,7 +71,7 @@ impl ORCSourceForCopy { Some(part) => part, None => return Ok(false), }; - let file = OrcFilePartition::from_part(&part)?.clone(); + let file = SingleFilePartition::from_part(&part)?.clone(); let path = file.path.clone(); let size = file.size; diff --git a/src/query/storages/orc/src/copy_into_table/projection.rs b/src/query/storages/orc/src/copy_into_table/projection.rs index 0b2e12f91eb6f..378bd4340d6d4 100644 --- a/src/query/storages/orc/src/copy_into_table/projection.rs +++ b/src/query/storages/orc/src/copy_into_table/projection.rs @@ -14,15 +14,12 @@ use std::sync::Arc; -use arrow_cast::can_cast_types; -use arrow_schema::Field; -use databend_common_exception::ErrorCode; use databend_common_exception::Result; -use databend_common_expression::type_check::check_cast; use databend_common_expression::Expr; use databend_common_expression::RemoteExpr; use databend_common_expression::TableSchemaRef; -use databend_common_functions::BUILTIN_FUNCTIONS; +use databend_common_meta_app::principal::NullAs; +use databend_storages_common_stage::project_columnar; use crate::hashable_schema::HashableSchema; @@ -30,6 +27,7 @@ use crate::hashable_schema::HashableSchema; pub struct ProjectionFactory { pub output_schema: TableSchemaRef, default_values: Option>, + null_as: NullAs, projections: Arc>>, } @@ -38,10 +36,12 @@ impl ProjectionFactory { pub fn try_create( output_schema: TableSchemaRef, default_values: Option>, + null_as: NullAs, ) -> Result { Ok(Self { output_schema, default_values, + null_as, projections: Default::default(), }) } @@ -49,80 +49,16 @@ impl ProjectionFactory { if let Some(v) = self.projections.get(schema) { Ok(v.clone()) } else { - let v = self.try_create_projection(schema.clone(), location)?; + let v = project_columnar( + &schema.table_schema, + &self.output_schema, + &self.null_as, + &self.default_values, + location, + )? + .0; self.projections.insert(schema.clone(), v.clone()); Ok(v) } } - - fn try_create_projection(&self, schema: HashableSchema, location: &str) -> Result> { - let mut pushdown_columns = vec![]; - let mut output_projection = vec![]; - - let mut num_inputs = 0; - for (i, to_field) in self.output_schema.fields().iter().enumerate() { - let field_name = to_field.name(); - let expr = match schema - .table_schema - .fields() - .iter() - .position(|f| f.name() == field_name) - { - Some(pos) => { - pushdown_columns.push(pos); - let from_field = schema.table_schema.field(pos); - let expr = Expr::ColumnRef { - span: None, - id: pos, - data_type: from_field.data_type().into(), - display_name: from_field.name().clone(), - }; - - // find a better way to do check cast - if from_field.data_type == to_field.data_type { - expr - } else if can_cast_types( - Field::from(from_field).data_type(), - Field::from(to_field).data_type(), - ) { - check_cast( - None, - false, - expr, - &to_field.data_type().into(), - &BUILTIN_FUNCTIONS, - )? - } else { - return Err(ErrorCode::BadDataValueType(format!( - "fail to load file {}: Cannot cast column {} from {:?} to {:?}", - location, - field_name, - from_field.data_type(), - to_field.data_type() - ))); - } - } - None => { - if let Some(remote_exprs) = &self.default_values { - remote_exprs[i].as_expr(&BUILTIN_FUNCTIONS) - } else { - return Err(ErrorCode::BadDataValueType(format!( - "{} missing column {}", - location, field_name, - ))); - } - } - }; - if !matches!(expr, Expr::Constant { .. }) { - num_inputs += 1; - } - output_projection.push(expr); - } - if num_inputs == 0 { - return Err(ErrorCode::BadBytes(format!( - "not column name match in parquet file {location}", - ))); - } - Ok(output_projection) - } } diff --git a/src/query/storages/orc/src/copy_into_table/table.rs b/src/query/storages/orc/src/copy_into_table/table.rs index db9ecd54abfd5..c6cfe9c7bd963 100644 --- a/src/query/storages/orc/src/copy_into_table/table.rs +++ b/src/query/storages/orc/src/copy_into_table/table.rs @@ -24,16 +24,17 @@ use databend_common_catalog::table_context::TableContext; use databend_common_exception::ErrorCode; use databend_common_exception::Result; use databend_common_expression::DataSchema; +use databend_common_meta_app::principal::FileFormatParams; use databend_common_pipeline_core::processors::ProcessorPtr; use databend_common_pipeline_core::Pipeline; use databend_common_pipeline_sources::EmptySource; use databend_common_storage::init_stage_operator; +use databend_storages_common_stage::SingleFilePartition; use crate::copy_into_table::meta::read_metas_in_parallel_for_copy; use crate::copy_into_table::processors::decoder::StripeDecoderForCopy; use crate::copy_into_table::processors::source::ORCSourceForCopy; use crate::copy_into_table::projection::ProjectionFactory; -use crate::orc_file_partition::OrcFilePartition; use crate::read_partition::read_partitions_simple; pub struct OrcTableForCopy {} @@ -46,15 +47,20 @@ impl OrcTableForCopy { _push_down: Option, ) -> Result<(PartStatistics, Partitions)> { let n = ctx.get_settings().get_max_threads()?; + let fmt = match &stage_table_info.stage_info.file_format_params { + FileFormatParams::Orc(fmt) => fmt, + _ => unreachable!("do_read_partitions expect orc"), + }; let parts = read_partitions_simple(ctx, stage_table_info).await?; let projections = Arc::new(ProjectionFactory::try_create( stage_table_info.schema.clone(), stage_table_info.default_values.clone(), + fmt.missing_field_as.clone(), )?); let op = init_stage_operator(&stage_table_info.stage_info)?; let mut files = vec![]; for part in &parts.1.partitions { - let file = OrcFilePartition::from_part(part)?.clone(); + let file = SingleFilePartition::from_part(part)?.clone(); files.push((file.path, file.size as u64)) } read_metas_in_parallel_for_copy(&op, &files, n as usize, &projections).await?; @@ -80,6 +86,11 @@ impl OrcTableForCopy { return Err(ErrorCode::Internal("")); }; + let fmt = match &stage_table_info.stage_info.file_format_params { + FileFormatParams::Orc(fmt) => fmt, + _ => unreachable!("do_read_partitions expect orc"), + }; + let settings = ctx.get_settings(); ctx.set_partitions(plan.parts.clone())?; @@ -94,6 +105,7 @@ impl OrcTableForCopy { let projections = Arc::new(ProjectionFactory::try_create( stage_table_info.schema.clone(), stage_table_info.default_values.clone(), + fmt.missing_field_as.clone(), )?); let output_data_schema = Arc::new(DataSchema::from(stage_table_info.schema())); pipeline.add_transform(|input, output| { diff --git a/src/query/storages/orc/src/lib.rs b/src/query/storages/orc/src/lib.rs index 5829ae99e7635..5c78c6620e2dc 100644 --- a/src/query/storages/orc/src/lib.rs +++ b/src/query/storages/orc/src/lib.rs @@ -26,7 +26,6 @@ mod chunk_reader_impl; mod copy_into_table; mod hashable_schema; -mod orc_file_partition; mod processors; mod read_partition; mod read_pipeline; diff --git a/src/query/storages/orc/src/processors/source.rs b/src/query/storages/orc/src/processors/source.rs index a5f74a2e62304..c330ed6bbc164 100644 --- a/src/query/storages/orc/src/processors/source.rs +++ b/src/query/storages/orc/src/processors/source.rs @@ -27,12 +27,12 @@ use databend_common_pipeline_core::processors::OutputPort; use databend_common_pipeline_core::processors::ProcessorPtr; use databend_common_pipeline_sources::AsyncSource; use databend_common_pipeline_sources::AsyncSourcer; +use databend_storages_common_stage::SingleFilePartition; use opendal::Operator; use orc_rust::async_arrow_reader::StripeFactory; use orc_rust::ArrowReaderBuilder; use crate::chunk_reader_impl::OrcChunkReader; -use crate::orc_file_partition::OrcFilePartition; use crate::strip::StripeInMemory; use crate::utils::map_orc_error; @@ -81,7 +81,7 @@ impl ORCSource { Some(part) => part, None => return Ok(false), }; - let file = OrcFilePartition::from_part(&part)?.clone(); + let file = SingleFilePartition::from_part(&part)?.clone(); let path = file.path.clone(); let size = file.size; diff --git a/src/query/storages/orc/src/read_partition.rs b/src/query/storages/orc/src/read_partition.rs index 81c9e6ab6388f..38fe0cbe92d42 100644 --- a/src/query/storages/orc/src/read_partition.rs +++ b/src/query/storages/orc/src/read_partition.rs @@ -20,8 +20,7 @@ use databend_common_catalog::plan::Partitions; use databend_common_catalog::plan::PartitionsShuffleKind; use databend_common_catalog::plan::StageTableInfo; use databend_common_catalog::table_context::TableContext; - -use crate::orc_file_partition::OrcFilePartition; +use databend_storages_common_stage::SingleFilePartition; pub async fn read_partitions_simple( ctx: Arc, @@ -50,7 +49,7 @@ pub async fn read_partitions_simple( let partitions = files .into_iter() .map(|v| { - let part = OrcFilePartition { + let part = SingleFilePartition { path: v.path.clone(), size: v.size as usize, }; diff --git a/src/query/storages/parquet/Cargo.toml b/src/query/storages/parquet/Cargo.toml index cc29a35aafbe3..2fa779fb9bf42 100644 --- a/src/query/storages/parquet/Cargo.toml +++ b/src/query/storages/parquet/Cargo.toml @@ -13,7 +13,6 @@ test = true [dependencies] arrow-array = { workspace = true } arrow-buffer = { workspace = true } -arrow-cast = { workspace = true } arrow-schema = { workspace = true } async-backtrace = { workspace = true } async-trait = { workspace = true } @@ -31,6 +30,7 @@ databend-common-pipeline-core = { workspace = true } databend-common-settings = { workspace = true } databend-common-storage = { workspace = true } databend-storages-common-pruner = { workspace = true } +databend-storages-common-stage = { workspace = true } databend-storages-common-table-meta = { workspace = true } ethnum = { workspace = true } futures = { workspace = true } diff --git a/src/query/storages/parquet/src/parquet_rs/copy_into_table/reader.rs b/src/query/storages/parquet/src/parquet_rs/copy_into_table/reader.rs index 3548edac3a079..511c4b941ad29 100644 --- a/src/query/storages/parquet/src/parquet_rs/copy_into_table/reader.rs +++ b/src/query/storages/parquet/src/parquet_rs/copy_into_table/reader.rs @@ -15,19 +15,16 @@ use std::collections::HashMap; use std::sync::Arc; -use arrow_cast::can_cast_types; -use arrow_schema::Field; use databend_common_catalog::plan::Projection; use databend_common_catalog::plan::PushDownInfo; use databend_common_catalog::table_context::TableContext; -use databend_common_exception::ErrorCode; use databend_common_exception::Result; -use databend_common_expression::type_check::check_cast; use databend_common_expression::Expr; use databend_common_expression::RemoteExpr; use databend_common_expression::TableSchemaRef; -use databend_common_functions::BUILTIN_FUNCTIONS; +use databend_common_meta_app::principal::NullAs; use databend_common_storage::parquet_rs::infer_schema_with_extension; +use databend_storages_common_stage::project_columnar; use opendal::Operator; use parquet::file::metadata::FileMetaData; @@ -77,74 +74,19 @@ impl RowGroupReaderForCopy { file_metadata: &FileMetaData, output_schema: TableSchemaRef, default_values: Option>, + missing_as: &NullAs, ) -> Result { let arrow_schema = infer_schema_with_extension(file_metadata)?; let schema_descr = file_metadata.schema_descr_ptr(); - let parquet_table_schema = arrow_to_table_schema(&arrow_schema)?; - let mut pushdown_columns = vec![]; - let mut output_projection = vec![]; + let parquet_table_schema = Arc::new(arrow_to_table_schema(&arrow_schema)?); - let mut num_inputs = 0; - for (i, to_field) in output_schema.fields().iter().enumerate() { - let field_name = to_field.name(); - let expr = match parquet_table_schema - .fields() - .iter() - .position(|f| f.name() == field_name) - { - Some(pos) => { - num_inputs += 1; - pushdown_columns.push(pos); - let from_field = parquet_table_schema.field(pos); - let expr = Expr::ColumnRef { - span: None, - id: pos, - data_type: from_field.data_type().into(), - display_name: from_field.name().clone(), - }; - - // find a better way to do check cast - if from_field.data_type == to_field.data_type { - expr - } else if can_cast_types( - Field::from(from_field).data_type(), - Field::from(to_field).data_type(), - ) { - check_cast( - None, - false, - expr, - &to_field.data_type().into(), - &BUILTIN_FUNCTIONS, - )? - } else { - return Err(ErrorCode::BadDataValueType(format!( - "Cannot cast column {} from {:?} to {:?}", - field_name, - from_field.data_type(), - to_field.data_type() - ))); - } - } - None => { - if let Some(remote_expr) = &default_values.as_ref().and_then(|vals| vals.get(i)) - { - remote_expr.as_expr(&BUILTIN_FUNCTIONS) - } else { - return Err(ErrorCode::BadDataValueType(format!( - "{} missing column {}", - location, field_name, - ))); - } - } - }; - output_projection.push(expr); - } - if num_inputs == 0 { - return Err(ErrorCode::BadBytes(format!( - "not column name match in parquet file {location}", - ))); - } + let (mut output_projection, mut pushdown_columns) = project_columnar( + &parquet_table_schema, + &output_schema, + missing_as, + &default_values, + location, + )?; pushdown_columns.sort(); let mapping = pushdown_columns .clone() @@ -169,7 +111,7 @@ impl RowGroupReaderForCopy { let mut reader_builder = ParquetRSReaderBuilder::create_with_parquet_schema( ctx, op, - Arc::new(parquet_table_schema), + parquet_table_schema, schema_descr, ) .with_push_downs(Some(&pushdowns)); diff --git a/src/query/storages/parquet/src/parquet_rs/copy_into_table/table.rs b/src/query/storages/parquet/src/parquet_rs/copy_into_table/table.rs index 5ac735d6334c8..35eacab67d9be 100644 --- a/src/query/storages/parquet/src/parquet_rs/copy_into_table/table.rs +++ b/src/query/storages/parquet/src/parquet_rs/copy_into_table/table.rs @@ -27,6 +27,7 @@ use databend_common_catalog::table_context::TableContext; use databend_common_exception::ErrorCode; use databend_common_exception::Result; use databend_common_expression::DataSchema; +use databend_common_meta_app::principal::FileFormatParams; use databend_common_pipeline_core::Pipeline; use databend_common_storage::init_stage_operator; use databend_common_storage::FileStatus; @@ -127,6 +128,11 @@ impl ParquetTableForCopy { )); }; + let fmt = match &stage_table_info.stage_info.file_format_params { + FileFormatParams::Parquet(fmt) => fmt, + _ => unreachable!("do_read_partitions expect parquet"), + }; + let operator = init_stage_operator(&stage_table_info.stage_info)?; let mut readers = HashMap::new(); @@ -148,6 +154,7 @@ impl ParquetTableForCopy { &file_meta_data, stage_table_info.schema.clone(), stage_table_info.default_values.clone(), + &fmt.missing_field_as, )?, ); } diff --git a/src/query/storages/stage/Cargo.toml b/src/query/storages/stage/Cargo.toml index 324f14f9799eb..42add0ed52ac2 100644 --- a/src/query/storages/stage/Cargo.toml +++ b/src/query/storages/stage/Cargo.toml @@ -34,6 +34,7 @@ databend-common-settings = { workspace = true } databend-common-storage = { workspace = true } databend-common-storages-orc = { workspace = true } databend-common-storages-parquet = { workspace = true } +databend-storages-common-stage = { workspace = true } databend-storages-common-table-meta = { workspace = true } enum-as-inner = "0.6.0" futures = { workspace = true } @@ -43,6 +44,7 @@ parquet = { workspace = true } serde = { workspace = true } serde_json = { workspace = true } typetag = { workspace = true } + uuid = { workspace = true } [build-dependencies] diff --git a/src/query/storages/stage/src/read/mod.rs b/src/query/storages/stage/src/read/mod.rs index 6091fa51c00b8..b0aed6d8c799c 100644 --- a/src/query/storages/stage/src/read/mod.rs +++ b/src/query/storages/stage/src/read/mod.rs @@ -14,5 +14,4 @@ mod error_handler; mod load_context; -pub(crate) mod one_file_partition; pub mod row_based; diff --git a/src/query/storages/stage/src/read/one_file_partition.rs b/src/query/storages/stage/src/read/one_file_partition.rs deleted file mode 100644 index a9b6284a5ec24..0000000000000 --- a/src/query/storages/stage/src/read/one_file_partition.rs +++ /dev/null @@ -1,58 +0,0 @@ -// Copyright 2021 Datafuse Labs -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -use std::any::Any; -use std::hash::DefaultHasher; -use std::hash::Hash; -use std::hash::Hasher; - -use databend_common_catalog::plan::PartInfo; -use databend_common_catalog::plan::PartInfoPtr; -use databend_common_exception::ErrorCode; -use databend_common_exception::Result; - -#[derive(serde::Serialize, serde::Deserialize, Clone, Eq, PartialEq)] -pub struct OneFilePartition { - pub path: String, - pub size: usize, -} - -#[typetag::serde(name = "text_part")] -impl PartInfo for OneFilePartition { - fn as_any(&self) -> &dyn Any { - self - } - - fn equals(&self, info: &Box) -> bool { - info.as_any() - .downcast_ref::() - .is_some_and(|other| self == other) - } - - fn hash(&self) -> u64 { - let mut s = DefaultHasher::new(); - self.path.hash(&mut s); - s.finish() - } -} - -impl OneFilePartition { - pub fn from_part(info: &PartInfoPtr) -> Result<&OneFilePartition> { - info.as_any() - .downcast_ref::() - .ok_or_else(|| { - ErrorCode::Internal("Cannot downcast from PartInfo to OneFilePartition.") - }) - } -} diff --git a/src/query/storages/stage/src/read/row_based/processors/reader.rs b/src/query/storages/stage/src/read/row_based/processors/reader.rs index 99e156dbddac1..5ce00277384a0 100644 --- a/src/query/storages/stage/src/read/row_based/processors/reader.rs +++ b/src/query/storages/stage/src/read/row_based/processors/reader.rs @@ -22,16 +22,16 @@ use databend_common_exception::ErrorCode; use databend_common_exception::Result; use databend_common_expression::DataBlock; use databend_common_pipeline_sources::PrefetchAsyncSource; +use databend_storages_common_stage::SingleFilePartition; use futures::AsyncRead; use futures::AsyncReadExt; use log::debug; use opendal::Operator; -use crate::read::one_file_partition::OneFilePartition; use crate::read::row_based::batch::BytesBatch; struct FileState { - file: OneFilePartition, + file: SingleFilePartition, reader: opendal::FuturesAsyncReader, offset: usize, } @@ -128,7 +128,7 @@ impl PrefetchAsyncSource for BytesReader { Some(part) => part, None => return Ok(None), }; - let file = OneFilePartition::from_part(&part)?.clone(); + let file = SingleFilePartition::from_part(&part)?.clone(); let reader = self .op diff --git a/src/query/storages/stage/src/stage_table.rs b/src/query/storages/stage/src/stage_table.rs index e26060b8ba83a..d4cc8782e190b 100644 --- a/src/query/storages/stage/src/stage_table.rs +++ b/src/query/storages/stage/src/stage_table.rs @@ -36,9 +36,9 @@ use databend_common_storage::init_stage_operator; use databend_common_storage::StageFileInfo; use databend_common_storages_orc::OrcTableForCopy; use databend_common_storages_parquet::ParquetTableForCopy; +use databend_storages_common_stage::SingleFilePartition; use opendal::Operator; -use crate::read::one_file_partition::OneFilePartition; use crate::read::row_based::RowBasedReadPipelineBuilder; /// TODO: we need to track the data metrics in stage table. @@ -107,7 +107,7 @@ impl StageTable { let partitions = files .into_iter() .map(|v| { - let part = OneFilePartition { + let part = SingleFilePartition { path: v.path.clone(), size: v.size as usize, }; diff --git a/tests/sqllogictests/suites/stage/formats/orc/copy_orc_missing_field.test b/tests/sqllogictests/suites/stage/formats/orc/copy_orc_missing_field.test index 6d2702439971e..08009f7f7fc90 100644 --- a/tests/sqllogictests/suites/stage/formats/orc/copy_orc_missing_field.test +++ b/tests/sqllogictests/suites/stage/formats/orc/copy_orc_missing_field.test @@ -5,8 +5,17 @@ drop table if exists nested statement ok create table nested (map map(string not null,int), nest tuple(float32, bool), value array(int32)); -query +query error 1010.*missing column copy into nested from @data/orc/nested file_format = (type = orc) RETURN_FAILED_ONLY=TRUE + +query ? +select count(*) from nested +---- +0 + + +query +copy into nested from @data/orc/nested file_format = (type = orc missing_field_as=field_default) RETURN_FAILED_ONLY=TRUE ---- From ece2a28b64d369975f0f02648b137c01a38af4bf Mon Sep 17 00:00:00 2001 From: sundyli <543950155@qq.com> Date: Wed, 10 Jul 2024 15:13:49 +0800 Subject: [PATCH 18/21] fix(query): unify udf allow list validator (#16012) --- src/query/sql/src/planner/binder/udf.rs | 27 +---------------- .../sql/src/planner/semantic/type_check.rs | 22 ++------------ src/query/sql/src/planner/udf_validator.rs | 29 +++++++++++++++++++ 3 files changed, 33 insertions(+), 45 deletions(-) diff --git a/src/query/sql/src/planner/binder/udf.rs b/src/query/sql/src/planner/binder/udf.rs index 6a2db81bd584c..e150c681beeba 100644 --- a/src/query/sql/src/planner/binder/udf.rs +++ b/src/query/sql/src/planner/binder/udf.rs @@ -19,10 +19,8 @@ use databend_common_ast::ast::AlterUDFStmt; use databend_common_ast::ast::CreateUDFStmt; use databend_common_ast::ast::Identifier; use databend_common_ast::ast::UDFDefinition; -use databend_common_config::GlobalConfig; use databend_common_exception::ErrorCode; use databend_common_exception::Result; -use databend_common_exception::ToErrorCode; use databend_common_expression::types::DataType; use databend_common_expression::udf_client::UDFFlightClient; use databend_common_meta_app::principal::LambdaUDF; @@ -82,30 +80,7 @@ impl Binder { handler, language, } => { - if !GlobalConfig::instance().query.enable_udf_server { - return Err(ErrorCode::Unimplemented( - "UDF server is not allowed, you can enable it by setting 'enable_udf_server = true' in query node config", - )); - } - let udf_server_allow_list = &GlobalConfig::instance().query.udf_server_allow_list; - let url_addr = - url::Url::parse(address).map_err_to_code(ErrorCode::InvalidArgument, || { - format!( - "udf server address '{address}' is invalid, please check the address", - ) - })?; - - if udf_server_allow_list.iter().all(|allow_url| { - if let Ok(allow_url) = url::Url::parse(allow_url) { - allow_url.host_str() != url_addr.host_str() - } else { - true - } - }) { - return Err(ErrorCode::InvalidArgument(format!( - "Unallowed UDF server address, '{address}' is not in udf_server_allow_list" - ))); - } + UDFValidator::is_udf_server_allowed(address.as_str())?; let mut arg_datatypes = Vec::with_capacity(arg_types.len()); for arg_type in arg_types { diff --git a/src/query/sql/src/planner/semantic/type_check.rs b/src/query/sql/src/planner/semantic/type_check.rs index 662e32428f1c0..2ff1bbc387798 100644 --- a/src/query/sql/src/planner/semantic/type_check.rs +++ b/src/query/sql/src/planner/semantic/type_check.rs @@ -54,7 +54,6 @@ use databend_common_catalog::plan::InvertedIndexInfo; use databend_common_catalog::table_context::TableContext; use databend_common_compress::CompressAlgorithm; use databend_common_compress::DecompressDecoder; -use databend_common_config::GlobalConfig; use databend_common_exception::ErrorCode; use databend_common_exception::Result; use databend_common_expression::display::display_tuple_field_name; @@ -119,6 +118,7 @@ use crate::optimizer::SExpr; use crate::parse_lambda_expr; use crate::planner::metadata::optimize_remove_count_args; use crate::planner::semantic::lowering::TypeCheck; +use crate::planner::udf_validator::UDFValidator; use crate::plans::Aggregate; use crate::plans::AggregateFunction; use crate::plans::AggregateMode; @@ -3357,23 +3357,7 @@ impl<'a> TypeChecker<'a> { arguments: &[Expr], udf_definition: UDFServer, ) -> Result> { - if !GlobalConfig::instance().query.enable_udf_server { - return Err(ErrorCode::Unimplemented( - "UDF server is not allowed, you can enable it by setting 'enable_udf_server = true' in query node config", - )); - } - - let udf_server_allow_list = &GlobalConfig::instance().query.udf_server_allow_list; - let address = &udf_definition.address; - if udf_server_allow_list - .iter() - .all(|addr| addr.trim_end_matches('/') != address.trim_end_matches('/')) - { - return Err(ErrorCode::InvalidArgument(format!( - "Unallowed UDF server address, '{address}' is not in udf_server_allow_list" - ))); - } - + UDFValidator::is_udf_server_allowed(&udf_definition.address)?; if arguments.len() != udf_definition.arg_types.len() { return Err(ErrorCode::InvalidArgument(format!( "Require {} parameters, but got: {}", @@ -3403,7 +3387,7 @@ impl<'a> TypeChecker<'a> { name, func_name: udf_definition.handler, display_name, - udf_type: UDFType::Server(address.clone()), + udf_type: UDFType::Server(udf_definition.address.clone()), arg_types: udf_definition.arg_types, return_type: Box::new(udf_definition.return_type.clone()), arguments: args, diff --git a/src/query/sql/src/planner/udf_validator.rs b/src/query/sql/src/planner/udf_validator.rs index d33fad58c8474..aa3a1212b9861 100644 --- a/src/query/sql/src/planner/udf_validator.rs +++ b/src/query/sql/src/planner/udf_validator.rs @@ -18,8 +18,10 @@ use databend_common_ast::ast::ColumnRef; use databend_common_ast::ast::Expr; use databend_common_ast::ast::FunctionCall; use databend_common_ast::ast::Lambda; +use databend_common_config::GlobalConfig; use databend_common_exception::ErrorCode; use databend_common_exception::Result; +use databend_common_exception::ToErrorCode; use databend_common_functions::is_builtin_function; use derive_visitor::Drive; use derive_visitor::Visitor; @@ -89,4 +91,31 @@ impl UDFValidator { }, ))) } + + pub fn is_udf_server_allowed(address: &str) -> Result<()> { + if !GlobalConfig::instance().query.enable_udf_server { + return Err(ErrorCode::Unimplemented( + "UDF server is not allowed, you can enable it by setting 'enable_udf_server = true' in query node config", + )); + } + + let udf_server_allow_list = &GlobalConfig::instance().query.udf_server_allow_list; + let url_addr = url::Url::parse(address) + .map_err_to_code(ErrorCode::InvalidArgument, || { + format!("udf server address '{address}' is invalid, please check the address",) + })?; + + if udf_server_allow_list.iter().all(|allow_url| { + if let Ok(allow_url) = url::Url::parse(allow_url) { + allow_url.host_str() != url_addr.host_str() + } else { + true + } + }) { + return Err(ErrorCode::InvalidArgument(format!( + "Unallowed UDF server address, '{address}' is not in udf_server_allow_list" + ))); + } + Ok(()) + } } From 2e10080408f94d87e202f0ace7bc3cc8994c4c7d Mon Sep 17 00:00:00 2001 From: coldWater Date: Wed, 10 Jul 2024 15:45:10 +0800 Subject: [PATCH 19/21] feat: add NameResolutionSuggest to enhancement table name case sensitive error (#15889) * NameResolutionSuggest Signed-off-by: coldWater * fix Signed-off-by: coldWater * fix Signed-off-by: coldWater * fix Signed-off-by: coldWater * fix Signed-off-by: coldWater * short Signed-off-by: coldWater * it Signed-off-by: coldWater * refine Signed-off-by: coldWater * fix Signed-off-by: coldWater * fix Signed-off-by: coldWater * fix Signed-off-by: coldWater * fully table identifier Signed-off-by: coldWater * refine Signed-off-by: coldWater * suggest database Signed-off-by: coldWater * refine Signed-off-by: coldWater * test Signed-off-by: coldWater * fix Signed-off-by: coldWater * update Signed-off-by: coldWater * refine Signed-off-by: coldWater --------- Signed-off-by: coldWater Co-authored-by: Bohu --- .../binder/bind_table_reference/bind_table.rs | 23 +-- src/query/sql/src/planner/binder/delete.rs | 12 +- src/query/sql/src/planner/binder/insert.rs | 13 +- src/query/sql/src/planner/binder/update.rs | 20 ++- src/query/sql/src/planner/binder/util.rs | 135 ++++++++++++++++++ src/query/sql/src/planner/semantic/mod.rs | 1 + .../src/planner/semantic/name_resolution.rs | 25 ++++ .../suites/base/issues/issue_13718.test | 2 +- .../query/case_sensitivity/name_hit.test | 41 ++++++ .../20+_others/20_0013_pretty_error.result | 4 +- .../09_0001_json_response.result | 2 +- 11 files changed, 241 insertions(+), 37 deletions(-) create mode 100644 tests/sqllogictests/suites/query/case_sensitivity/name_hit.test diff --git a/src/query/sql/src/planner/binder/bind_table_reference/bind_table.rs b/src/query/sql/src/planner/binder/bind_table_reference/bind_table.rs index 60f81b1f63d0e..318a51ae0c3e2 100644 --- a/src/query/sql/src/planner/binder/bind_table_reference/bind_table.rs +++ b/src/query/sql/src/planner/binder/bind_table_reference/bind_table.rs @@ -44,8 +44,12 @@ impl Binder { temporal: &Option, consume: bool, ) -> Result<(SExpr, BindContext)> { - let (catalog, database, table_name) = - self.normalize_object_identifier_triple(catalog, database, table); + let fully_table = self.fully_table_identifier(catalog, database, table); + let (catalog, database, table_name) = ( + fully_table.catalog_name(), + fully_table.database_name(), + fully_table.table_name(), + ); let table_alias_name = alias .as_ref() .map(|table_alias| self.normalize_identifier(&table_alias.name).name); @@ -108,20 +112,7 @@ impl Binder { } parent = bind_context.parent.as_mut(); } - if e.code() == ErrorCode::UNKNOWN_DATABASE { - return Err(ErrorCode::UnknownDatabase(format!( - "Unknown database `{}` in catalog '{catalog}'", - database - )) - .set_span(*span)); - } - if e.code() == ErrorCode::UNKNOWN_TABLE { - return Err(ErrorCode::UnknownTable(format!( - "Unknown table `{database}`.`{table_name}` in catalog '{catalog}'" - )) - .set_span(*span)); - } - return Err(e); + return Err(fully_table.not_found_suggest_error(e)); } }; diff --git a/src/query/sql/src/planner/binder/delete.rs b/src/query/sql/src/planner/binder/delete.rs index 840ff25609fda..ee0a9941a9e9d 100644 --- a/src/query/sql/src/planner/binder/delete.rs +++ b/src/query/sql/src/planner/binder/delete.rs @@ -77,20 +77,25 @@ impl<'a> Binder { self.init_cte(bind_context, with)?; - let (catalog_name, database_name, table_name) = if let TableReference::Table { + let fully_table = if let TableReference::Table { catalog, database, table, .. } = table { - self.normalize_object_identifier_triple(catalog, database, table) + self.fully_table_identifier(catalog, database, table) } else { // we do not support USING clause yet return Err(ErrorCode::Internal( "should not happen, parser should have report error already", )); }; + let (catalog_name, database_name, table_name) = ( + fully_table.catalog_name(), + fully_table.database_name(), + fully_table.table_name(), + ); // Add table lock before execution. let lock_guard = self @@ -102,7 +107,8 @@ impl<'a> Binder { &table_name, &LockTableOption::LockWithRetry, ) - .await?; + .await + .map_err(|err| fully_table.not_found_suggest_error(err))?; let (table_expr, mut context) = self.bind_table_reference(bind_context, table)?; diff --git a/src/query/sql/src/planner/binder/insert.rs b/src/query/sql/src/planner/binder/insert.rs index 078271e000c2d..760f33dd2152a 100644 --- a/src/query/sql/src/planner/binder/insert.rs +++ b/src/query/sql/src/planner/binder/insert.rs @@ -85,12 +85,19 @@ impl Binder { self.init_cte(bind_context, with)?; - let (catalog_name, database_name, table_name) = - self.normalize_object_identifier_triple(catalog, database, table); + let fully_table = self.fully_table_identifier(catalog, database, table); + let (catalog_name, database_name, table_name) = ( + fully_table.catalog_name(), + fully_table.database_name(), + fully_table.table_name(), + ); + let table = self .ctx .get_table(&catalog_name, &database_name, &table_name) - .await?; + .await + .map_err(|err| fully_table.not_found_suggest_error(err))?; + let schema = self.schema_project(&table.schema(), columns)?; let input_source: Result = match source.clone() { diff --git a/src/query/sql/src/planner/binder/update.rs b/src/query/sql/src/planner/binder/update.rs index 1dc53f075f430..a6c4763e380a0 100644 --- a/src/query/sql/src/planner/binder/update.rs +++ b/src/query/sql/src/planner/binder/update.rs @@ -55,28 +55,25 @@ impl Binder { self.init_cte(bind_context, with)?; - let (catalog_name, database_name, table_name) = if let TableReference::Table { + let fully_table = if let TableReference::Table { catalog, database, table, .. } = table { - ( - catalog - .as_ref() - .map_or_else(|| self.ctx.get_current_catalog(), |i| i.name.clone()), - database - .as_ref() - .map_or_else(|| self.ctx.get_current_database(), |i| i.name.clone()), - table.name.clone(), - ) + self.fully_table_identifier(catalog, database, table) } else { // we do not support USING clause yet return Err(ErrorCode::Internal( "should not happen, parser should have report error already", )); }; + let (catalog_name, database_name, table_name) = ( + fully_table.catalog_name(), + fully_table.database_name(), + fully_table.table_name(), + ); // Add table lock. let lock_guard = self @@ -88,7 +85,8 @@ impl Binder { &table_name, &LockTableOption::LockWithRetry, ) - .await?; + .await + .map_err(|err| fully_table.not_found_suggest_error(err))?; let (table_expr, mut context) = self.bind_table_reference(bind_context, table)?; diff --git a/src/query/sql/src/planner/binder/util.rs b/src/query/sql/src/planner/binder/util.rs index 552e8093d1e24..dba07594f5f8a 100644 --- a/src/query/sql/src/planner/binder/util.rs +++ b/src/query/sql/src/planner/binder/util.rs @@ -12,14 +12,21 @@ // See the License for the specific language governing permissions and // limitations under the License. +use databend_common_ast::ast::quote::QuotedIdent; +use databend_common_ast::ast::Identifier; +use databend_common_ast::parser::Dialect; +use databend_common_ast::span::merge_span; use databend_common_exception::ErrorCode; use databend_common_exception::Result; use databend_common_expression::types::DataType; +use crate::normalize_identifier; use crate::optimizer::SExpr; use crate::plans::Operator; use crate::plans::RelOperator; use crate::Binder; +use crate::NameResolutionContext; +use crate::NameResolutionSuggest; /// Ident name can not contain ' or " /// Forbidden ' or " in UserName and RoleName, to prevent Meta injection problem @@ -85,4 +92,132 @@ impl Binder { } Ok(()) } + + pub fn fully_table_identifier( + &self, + catalog: &Option, + database: &Option, + table: &Identifier, + ) -> FullyTableIdentifier<'_> { + let Binder { + ctx, + name_resolution_ctx, + dialect, + .. + } = self; + let catalog = catalog.to_owned().unwrap_or(Identifier { + span: None, + name: ctx.get_current_catalog(), + quote: Some(dialect.default_ident_quote()), + is_hole: false, + }); + let database = database.to_owned().unwrap_or(Identifier { + span: None, + name: ctx.get_current_database(), + quote: Some(dialect.default_ident_quote()), + is_hole: false, + }); + let database = Identifier { + span: merge_span(catalog.span, database.span), + ..database + }; + let table = Identifier { + span: merge_span(database.span, table.span), + name: table.name.clone(), + ..*table + }; + FullyTableIdentifier { + name_resolution_ctx, + dialect: *dialect, + catalog, + database, + table, + } + } +} + +pub struct FullyTableIdentifier<'a> { + name_resolution_ctx: &'a NameResolutionContext, + dialect: Dialect, + pub catalog: Identifier, + pub database: Identifier, + pub table: Identifier, +} + +impl FullyTableIdentifier<'_> { + pub fn new( + name_resolution_ctx: &NameResolutionContext, + dialect: Dialect, + catalog: Identifier, + database: Identifier, + table: Identifier, + ) -> FullyTableIdentifier<'_> { + FullyTableIdentifier { + name_resolution_ctx, + dialect, + catalog, + database, + table, + } + } + + pub fn catalog_name(&self) -> String { + normalize_identifier(&self.catalog, self.name_resolution_ctx).name + } + + pub fn database_name(&self) -> String { + normalize_identifier(&self.database, self.name_resolution_ctx).name + } + + pub fn table_name(&self) -> String { + normalize_identifier(&self.table, self.name_resolution_ctx).name + } + + pub fn not_found_suggest_error(&self, err: ErrorCode) -> ErrorCode { + let Self { + catalog, + database, + table, + .. + } = self; + match err.code() { + ErrorCode::UNKNOWN_DATABASE => { + let error_message = match self.name_resolution_ctx.not_found_suggest(database) { + Some(NameResolutionSuggest::Quoted) => { + format!( + "Unknown database {catalog}.{database} (unquoted). Did you mean {} (quoted)?", + QuotedIdent(&database.name, self.dialect.default_ident_quote()) + ) + } + Some(NameResolutionSuggest::Unqoted) => { + format!( + "Unknown database {catalog}.{database} (quoted). Did you mean {} (unquoted)?", + &database.name + ) + } + None => format!("Unknown database {catalog}.{database} ."), + }; + ErrorCode::UnknownDatabase(error_message).set_span(database.span) + } + ErrorCode::UNKNOWN_TABLE => { + let error_message = match self.name_resolution_ctx.not_found_suggest(table) { + Some(NameResolutionSuggest::Quoted) => { + format!( + "Unknown table {catalog}.{database}.{table} (unquoted). Did you mean {} (quoted)?", + QuotedIdent(&table.name, self.dialect.default_ident_quote()) + ) + } + Some(NameResolutionSuggest::Unqoted) => { + format!( + "Unknown table {catalog}.{database}.{table} (quoted). Did you mean {} (unquoted)?", + &table.name + ) + } + None => format!("Unknown table {catalog}.{database}.{table} ."), + }; + ErrorCode::UnknownTable(error_message).set_span(table.span) + } + _ => err, + } + } } diff --git a/src/query/sql/src/planner/semantic/mod.rs b/src/query/sql/src/planner/semantic/mod.rs index f114681cf40b2..4a67430ca3d28 100644 --- a/src/query/sql/src/planner/semantic/mod.rs +++ b/src/query/sql/src/planner/semantic/mod.rs @@ -39,6 +39,7 @@ pub use name_resolution::compare_table_name; pub use name_resolution::normalize_identifier; pub use name_resolution::IdentifierNormalizer; pub use name_resolution::NameResolutionContext; +pub use name_resolution::NameResolutionSuggest; pub use type_check::resolve_type_name; pub use type_check::resolve_type_name_by_str; pub use type_check::validate_function_arg; diff --git a/src/query/sql/src/planner/semantic/name_resolution.rs b/src/query/sql/src/planner/semantic/name_resolution.rs index 7db239c2f66d3..2b5934a2bf804 100644 --- a/src/query/sql/src/planner/semantic/name_resolution.rs +++ b/src/query/sql/src/planner/semantic/name_resolution.rs @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +use databend_common_ast::ast::quote::ident_needs_quote; use databend_common_ast::ast::Identifier; use databend_common_settings::Settings; use derive_visitor::VisitorMut; @@ -23,6 +24,30 @@ pub struct NameResolutionContext { pub deny_column_reference: bool, } +pub enum NameResolutionSuggest { + Quoted, + Unqoted, +} + +impl NameResolutionContext { + pub fn not_found_suggest(&self, ident: &Identifier) -> Option { + if !ident.name.chars().any(|c| c.is_ascii_uppercase()) { + return None; + } + match ( + self.unquoted_ident_case_sensitive, + self.quoted_ident_case_sensitive, + ident.is_quoted(), + ) { + (false, true, false) => Some(NameResolutionSuggest::Quoted), + (true, false, true) if !ident_needs_quote(&ident.name) => { + Some(NameResolutionSuggest::Unqoted) + } + _ => None, + } + } +} + impl Default for NameResolutionContext { fn default() -> Self { Self { diff --git a/tests/sqllogictests/suites/base/issues/issue_13718.test b/tests/sqllogictests/suites/base/issues/issue_13718.test index 277b62442a1fd..7c02aae2b1bad 100644 --- a/tests/sqllogictests/suites/base/issues/issue_13718.test +++ b/tests/sqllogictests/suites/base/issues/issue_13718.test @@ -1,5 +1,5 @@ statement ok DROP DATABASE IF EXISTS xx -statement error (?s)1003.*Unknown database `xx` in catalog 'default' +statement error (?s)1003,.*Unknown database "default".xx \. select * from xx.yy; diff --git a/tests/sqllogictests/suites/query/case_sensitivity/name_hit.test b/tests/sqllogictests/suites/query/case_sensitivity/name_hit.test new file mode 100644 index 0000000000000..8a8426a8f4423 --- /dev/null +++ b/tests/sqllogictests/suites/query/case_sensitivity/name_hit.test @@ -0,0 +1,41 @@ +statement ok +use default + +statement ok +set unquoted_ident_case_sensitive = 0 + +statement ok +set quoted_ident_case_sensitive = 1 + +statement ok +set sql_dialect = 'mysql' + +statement ok +CREATE TABLE `Student`(id int) + +statement error (?s)1025,.*Unknown table `default`\.`default`\.Student \(unquoted\)\. Did you mean `Student` \(quoted\)\? +INSERT INTO Student VALUES(1) + +statement error (?s)1025,.*Unknown table `default`\.`default`\.Student \(unquoted\)\. Did you mean `Student` \(quoted\)\? +update Student set id = 1 + +statement error (?s)1025,.*Unknown table `default`\.`default`\.Student \(unquoted\)\. Did you mean `Student` \(quoted\)? +delete from Student + +statement error (?s)1025,.*Unknown table `default`\.`default`\.Student \(unquoted\)\. Did you mean `Student` \(quoted\)\? +select * from Student + +statement ok +set unquoted_ident_case_sensitive = 1 + +statement error (?s)1025,.*Unknown table `default`\.`default`\.student \. +INSERT INTO student VALUES(1) + +statement error (?s)1025,.*Unknown table `default`\.`default`\.student \. +update student set id = 1 + +statement error (?s)1025,.*Unknown table `default`\.`default`\.student \. +delete from student + +statement error (?s)1025,.*Unknown table `default`\.`default`\.student \. +select * from student diff --git a/tests/suites/0_stateless/20+_others/20_0013_pretty_error.result b/tests/suites/0_stateless/20+_others/20_0013_pretty_error.result index 995a684980801..c38ba7770e9a4 100644 --- a/tests/suites/0_stateless/20+_others/20_0013_pretty_error.result +++ b/tests/suites/0_stateless/20+_others/20_0013_pretty_error.result @@ -9,7 +9,7 @@ Error: APIError: ResponseError with 1025: error: --> SQL:1:15 | 1 | select * from t - | ^ Unknown table `default`.`t` in catalog 'default' + | ^ Unknown table "default"."default".t . Error: APIError: ResponseError with 1008: error: @@ -35,7 +35,7 @@ Error: APIError: ResponseError with 1025: error: --> SQL:1:20 | 1 | select t1.a:z from t - | ^ Unknown table `default`.`t` in catalog 'default' + | ^ Unknown table "default"."default".t . Error: APIError: ResponseError with 1065: error: diff --git a/tests/suites/1_stateful/09_http_handler/09_0001_json_response.result b/tests/suites/1_stateful/09_http_handler/09_0001_json_response.result index 5eebbf1e77123..47da31200faf4 100755 --- a/tests/suites/1_stateful/09_http_handler/09_0001_json_response.result +++ b/tests/suites/1_stateful/09_http_handler/09_0001_json_response.result @@ -1,3 +1,3 @@ -{"code":1025,"message":"error: \n --> SQL:1:15\n |\n1 | select * from t1\n | ^^ Unknown table `default`.`t1` in catalog 'default'\n\n","detail":""} +{"code":1025,"message":"error: \n --> SQL:1:15\n |\n1 | select * from t1\n | ^^ Unknown table \"default\".\"default\".t1 .\n\n","detail":""} {"error":{"code":"400","message":"parse error: key must be a string at line 1 column 2"}} {"error":{"code":"404","message":"not found"}} From 3812049369bf577ce386ad0300a2570ed8a8c3db Mon Sep 17 00:00:00 2001 From: codedump Date: Wed, 10 Jul 2024 16:31:00 +0800 Subject: [PATCH 20/21] feat: refactor share spec location and format (#15989) * feat: refactor share spec location and format * feat: refactor share spec location and format * feat: remove unused code * add db id into share path * refactor uri path * add proto conv test * add share test cases * refactor share db id * add rename\revoke database test cases * fix test fail --- Cargo.lock | 1 + src/meta/api/src/lib.rs | 3 +- src/meta/api/src/schema_api_impl.rs | 364 ++++++-- src/meta/api/src/schema_api_impl.rs:3213:21 | 0 src/meta/api/src/share_api_impl.rs | 207 +++-- src/meta/api/src/share_api_test_suite.rs | 832 +++++++++++++++++- src/meta/api/src/util.rs | 163 ++-- src/meta/app/src/schema/database.rs | 24 +- src/meta/app/src/schema/mod.rs | 1 + src/meta/app/src/schema/table.rs | 17 +- src/meta/app/src/share/mod.rs | 3 +- src/meta/app/src/share/share.rs | 28 +- .../src/database_from_to_protobuf_impl.rs | 47 + src/meta/proto-conv/src/util.rs | 1 + src/meta/proto-conv/tests/it/main.rs | 1 + src/meta/proto-conv/tests/it/proto_conv.rs | 3 + .../proto-conv/tests/it/v002_database_meta.rs | 1 + .../proto-conv/tests/it/v005_database_meta.rs | 1 + .../proto-conv/tests/it/v055_table_meta.rs | 1 + .../proto-conv/tests/it/v074_table_db_meta.rs | 1 + .../proto-conv/tests/it/v096_database_meta.rs | 1 + .../proto-conv/tests/it/v101_database_meta.rs | 59 ++ src/meta/protos/proto/database.proto | 24 + .../src/catalogs/default/mutable_catalog.rs | 2 +- .../src/databases/share/share_database.rs | 22 +- .../service/src/interpreters/common/mod.rs | 3 - .../src/interpreters/common/shared_table.rs | 34 - .../interpreter_database_create.rs | 42 +- .../interpreters/interpreter_database_drop.rs | 21 +- .../interpreter_database_rename.rs | 22 +- .../interpreter_share_alter_tenants.rs | 32 +- .../interpreters/interpreter_share_create.rs | 15 +- .../interpreters/interpreter_share_drop.rs | 18 +- .../interpreter_share_grant_object.rs | 28 +- .../interpreter_share_revoke_object.rs | 26 +- .../interpreter_table_add_column.rs | 19 +- .../interpreters/interpreter_table_create.rs | 39 +- .../interpreters/interpreter_table_drop.rs | 21 +- .../interpreter_table_drop_column.rs | 18 +- .../interpreter_table_modify_column.rs | 66 +- .../interpreters/interpreter_table_rename.rs | 22 +- .../interpreter_table_rename_column.rs | 18 +- .../interpreter_table_set_options.rs | 13 +- src/query/sharing/src/layer.rs | 6 +- .../sharing/src/share_endpoint_client.rs | 10 +- src/query/storages/fuse/src/fuse_table.rs | 3 +- src/query/storages/share/Cargo.toml | 1 + src/query/storages/share/src/lib.rs | 7 +- src/query/storages/share/src/share.rs | 148 +++- 49 files changed, 1929 insertions(+), 510 deletions(-) create mode 100644 src/meta/api/src/schema_api_impl.rs:3213:21 create mode 100644 src/meta/proto-conv/tests/it/v101_database_meta.rs delete mode 100644 src/query/service/src/interpreters/common/shared_table.rs diff --git a/Cargo.lock b/Cargo.lock index f0ed0edb3e031..cf11d104f0405 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4562,6 +4562,7 @@ dependencies = [ "databend-storages-common-table-meta", "enumflags2", "goldenfile", + "log", "opendal", "serde", "serde_json", diff --git a/src/meta/api/src/lib.rs b/src/meta/api/src/lib.rs index 1a4d58b720f86..495f732303976 100644 --- a/src/meta/api/src/lib.rs +++ b/src/meta/api/src/lib.rs @@ -48,6 +48,7 @@ pub(crate) use schema_api_impl::get_db_or_err; pub use schema_api_test_suite::SchemaApiTestSuite; pub use sequence_api::SequenceApi; pub use share_api::ShareApi; +pub(crate) use share_api_impl::revoke_object_privileges; pub use share_api_test_suite::ShareApiTestSuite; pub use util::assert_table_exist; pub use util::convert_share_meta_to_spec; @@ -62,7 +63,7 @@ pub use util::get_share_database_id_and_privilege; pub use util::get_share_id_to_name_or_err; pub use util::get_share_meta_by_id_or_err; pub use util::get_share_or_err; -pub use util::get_share_table_info; +pub use util::get_table_info_by_share; pub use util::get_u64_value; pub use util::is_all_db_data_removed; pub use util::is_db_need_to_be_remove; diff --git a/src/meta/api/src/schema_api_impl.rs b/src/meta/api/src/schema_api_impl.rs index a628a4018ed4b..e2cd9605b1600 100644 --- a/src/meta/api/src/schema_api_impl.rs +++ b/src/meta/api/src/schema_api_impl.rs @@ -182,8 +182,15 @@ use databend_common_meta_app::schema::UpsertTableOptionReply; use databend_common_meta_app::schema::UpsertTableOptionReq; use databend_common_meta_app::schema::VirtualColumnIdent; use databend_common_meta_app::schema::VirtualColumnMeta; +use databend_common_meta_app::share::share_name_ident::ShareNameIdent; +use databend_common_meta_app::share::ShareGrantObject; +use databend_common_meta_app::share::ShareGrantObjectPrivilege; +use databend_common_meta_app::share::ShareGrantObjectSeqAndId; +use databend_common_meta_app::share::ShareId; +use databend_common_meta_app::share::ShareIdToName; +use databend_common_meta_app::share::ShareObject; use databend_common_meta_app::share::ShareSpec; -use databend_common_meta_app::share::ShareTableInfoMap; +use databend_common_meta_app::share::ShareVecTableInfo; use databend_common_meta_app::tenant::Tenant; use databend_common_meta_app::KeyWithTenant; use databend_common_meta_kvapi::kvapi; @@ -222,7 +229,7 @@ use crate::fetch_id; use crate::get_pb_value; use crate::get_share_id_to_name_or_err; use crate::get_share_meta_by_id_or_err; -use crate::get_share_table_info; +use crate::get_table_info_by_share; use crate::get_u64_value; use crate::is_db_need_to_be_remove; use crate::kv_app_error::KVAppError; @@ -230,6 +237,7 @@ use crate::kv_pb_api::KVPbApi; use crate::list_keys; use crate::list_u64_value; use crate::remove_db_from_share; +use crate::revoke_object_privileges; use crate::send_txn; use crate::serialize_struct; use crate::serialize_u64; @@ -286,7 +294,7 @@ impl + ?Sized> SchemaApi for KV { let mut condition = vec![]; let mut if_then = vec![]; - let spec_vec = if db_id_seq > 0 { + let share_specs = if db_id_seq > 0 { match req.create_option { CreateOption::Create => { return Err(KVAppError::AppError(AppError::DatabaseAlreadyExists( @@ -299,11 +307,11 @@ impl + ?Sized> SchemaApi for KV { CreateOption::CreateIfNotExists => { return Ok(CreateDatabaseReply { db_id, - spec_vec: None, + share_specs: None, }); } CreateOption::CreateOrReplace => { - drop_database_meta( + let (_, share_specs) = drop_database_meta( self, name_key, false, @@ -311,7 +319,8 @@ impl + ?Sized> SchemaApi for KV { &mut condition, &mut if_then, ) - .await? + .await?; + share_specs } } } else { @@ -377,7 +386,7 @@ impl + ?Sized> SchemaApi for KV { ); if succ { - return Ok(CreateDatabaseReply { db_id, spec_vec }); + return Ok(CreateDatabaseReply { db_id, share_specs }); } } } @@ -397,7 +406,7 @@ impl + ?Sized> SchemaApi for KV { let mut condition = vec![]; let mut if_then = vec![]; - let spec_vec = drop_database_meta( + let (db_id, share_specs) = drop_database_meta( self, tenant_dbname, req.if_exists, @@ -421,7 +430,7 @@ impl + ?Sized> SchemaApi for KV { ); if succ { - return Ok(DropDatabaseReply { spec_vec }); + return Ok(DropDatabaseReply { db_id, share_specs }); } } } @@ -549,12 +558,18 @@ impl + ?Sized> SchemaApi for KV { let (old_db_id_seq, old_db_id) = get_u64_value(self, tenant_dbname).await?; if req.if_exists { if old_db_id_seq == 0 { - return Ok(RenameDatabaseReply {}); + return Ok(RenameDatabaseReply { share_spec: None }); } } else { db_has_to_exist(old_db_id_seq, tenant_dbname, "rename_database: src (db)")?; } + let id_key = DatabaseId { db_id: old_db_id }; + let (db_meta_seq, db_meta) = get_pb_value(self, &id_key).await?; + db_has_to_exist(db_meta_seq, tenant_dbname, "rename_database: src (db)")?; + // safe to unwrap + let mut db_meta = db_meta.unwrap(); + debug!( old_db_id = old_db_id, tenant_dbname :? =(tenant_dbname); @@ -627,47 +642,108 @@ impl + ?Sized> SchemaApi for KV { }; // rename database - { - // move db id from old db id list to new db id list - db_id_list.pop(); - new_db_id_list.append(old_db_id); + // move db id from old db id list to new db id list + db_id_list.pop(); + new_db_id_list.append(old_db_id); + + let mut condition = vec![ + // Prevent renaming or deleting in other threads. + txn_cond_seq(tenant_dbname, Eq, old_db_id_seq), + txn_cond_seq(&db_id_key, Eq, db_name_seq), + txn_cond_seq(&tenant_newdbname, Eq, 0), + txn_cond_seq(&dbid_idlist, Eq, db_id_list_seq), + txn_cond_seq(&new_dbid_idlist, Eq, new_db_id_list_seq), + ]; + let mut if_then = vec![ + txn_op_del(tenant_dbname), // del old_db_name + // Renaming db should not affect the seq of db_meta. Just modify db name. + txn_op_put(&tenant_newdbname, serialize_u64(old_db_id)?), /* (tenant, new_db_name) -> old_db_id */ + txn_op_put(&new_dbid_idlist, serialize_struct(&new_db_id_list)?), /* _fd_db_id_list/tenant/new_db_name -> new_db_id_list */ + txn_op_put(&dbid_idlist, serialize_struct(&db_id_list)?), /* _fd_db_id_list/tenant/db_name -> db_id_list */ + txn_op_put( + &db_id_key, + serialize_struct(&DatabaseNameIdentRaw::from(&tenant_newdbname))?, + ), /* __fd_database_id_to_name/ -> (tenant,db_name) */ + ]; - let txn_req = TxnRequest { - condition: vec![ - // Prevent renaming or deleting in other threads. - txn_cond_seq(tenant_dbname, Eq, old_db_id_seq), - txn_cond_seq(&db_id_key, Eq, db_name_seq), - txn_cond_seq(&tenant_newdbname, Eq, 0), - txn_cond_seq(&dbid_idlist, Eq, db_id_list_seq), - txn_cond_seq(&new_dbid_idlist, Eq, new_db_id_list_seq), - ], - if_then: vec![ - txn_op_del(tenant_dbname), // del old_db_name - // Renaming db should not affect the seq of db_meta. Just modify db name. - txn_op_put(&tenant_newdbname, serialize_u64(old_db_id)?), /* (tenant, new_db_name) -> old_db_id */ - txn_op_put(&new_dbid_idlist, serialize_struct(&new_db_id_list)?), /* _fd_db_id_list/tenant/new_db_name -> new_db_id_list */ - txn_op_put(&dbid_idlist, serialize_struct(&db_id_list)?), /* _fd_db_id_list/tenant/db_name -> db_id_list */ - txn_op_put( - &db_id_key, - serialize_struct(&DatabaseNameIdentRaw::from(&tenant_newdbname))?, - ), /* __fd_database_id_to_name/ -> (tenant,db_name) */ - ], - else_then: vec![], - }; + // check if database if shared + let share_spec = if !db_meta.shared_by.is_empty() { + let seq_and_id = + ShareGrantObjectSeqAndId::Database(db_meta_seq, old_db_id, db_meta.clone()); + let object = ShareGrantObject::new(&seq_and_id); + let update_on = Utc::now(); + let mut share_spec_vec = vec![]; + for share_id in &db_meta.shared_by { + let (share_meta_seq, mut share_meta) = get_share_meta_by_id_or_err( + self, + *share_id, + format!("rename database: {}", tenant_dbname.display()), + ) + .await?; + + let _ = revoke_object_privileges( + self, + &mut share_meta, + object.clone(), + *share_id, + ShareGrantObjectPrivilege::Usage, + update_on, + &mut condition, + &mut if_then, + ) + .await?; - let (succ, _responses) = send_txn(self, txn_req).await?; + // save share meta + let share_id_key = ShareId { + share_id: *share_id, + }; + condition.push(txn_cond_seq(&share_id_key, Eq, share_meta_seq)); + if_then.push(txn_op_put(&share_id_key, serialize_struct(&share_meta)?)); - debug!( - name :? =(tenant_dbname), - to :? =(&tenant_newdbname), - database_id :? =(&old_db_id), - succ = succ; - "rename_database" - ); + let id_key = ShareIdToName { + share_id: *share_id, + }; - if succ { - return Ok(RenameDatabaseReply {}); + let (_share_name_seq, share_name) = get_pb_value(self, &id_key).await?; + + share_spec_vec.push( + convert_share_meta_to_spec( + self, + share_name.unwrap().name(), + *share_id, + share_meta, + ) + .await?, + ); } + + // clean db meta shared_by + db_meta.shared_by.clear(); + let db_id_key = DatabaseId { db_id: old_db_id }; + if_then.push(txn_op_put(&db_id_key, serialize_struct(&db_meta)?)); + Some((share_spec_vec, ShareObject::Db(old_db_id))) + } else { + None + }; + + let txn_req = TxnRequest { + condition, + if_then, + else_then: vec![], + }; + + let (succ, _responses) = send_txn(self, txn_req).await?; + + debug!( + name :? =(tenant_dbname), + to :? =(&tenant_newdbname), + database_id :? =(&old_db_id), + succ = succ; + "rename_database" + ); + + if succ { + return Ok(RenameDatabaseReply { share_spec }); } } } @@ -1538,6 +1614,7 @@ impl + ?Sized> SchemaApi for KV { // fixed let key_dbid = DatabaseId { db_id: db_id.data }; + let save_db_id = db_id.data; // fixed let key_dbid_tbname = DBIdTableName { @@ -1666,7 +1743,7 @@ impl + ?Sized> SchemaApi for KV { // need to combine with drop_table_txn operations, just return // the sequence number associated with the value part of // the key-value pair (key_dbid_tbname, table_id). - (None, id.seq) + (None, id.seq, *id.data) } else { construct_drop_table_txn_operations( self, @@ -1684,7 +1761,7 @@ impl + ?Sized> SchemaApi for KV { } } } else { - (None, 0) + (None, 0, 0) } }; @@ -1807,8 +1884,8 @@ impl + ?Sized> SchemaApi for KV { table_id_seq, db_id: db_id.data, new_table: dbid_tbname_seq == 0, - spec_vec: if let Some((spec_vec, mut_share_table_info)) = opt.0 { - Some((spec_vec, mut_share_table_info)) + spec_vec: if let Some(spec_vec) = opt.0 { + Some((save_db_id, opt.2, spec_vec)) } else { None }, @@ -1880,7 +1957,10 @@ impl + ?Sized> SchemaApi for KV { if req.if_exists { if tb_id_seq == 0 { // TODO: table does not exist, can not return table id. - return Ok(RenameTableReply { table_id: 0 }); + return Ok(RenameTableReply { + table_id: 0, + share_table_info: None, + }); } } else { assert_table_exist( @@ -1979,7 +2059,7 @@ impl + ?Sized> SchemaApi for KV { tb_id_list.pop(); new_tb_id_list.append(table_id); - let condition = vec![ + let mut condition = vec![ // db has not to change, i.e., no new table is created. // Renaming db is OK and does not affect the seq of db_meta. txn_cond_seq(&DatabaseId { db_id }, Eq, db_meta_seq), @@ -2014,6 +2094,73 @@ impl + ?Sized> SchemaApi for KV { ); } + // if the table if shared, remove from share + let share_table_info = if !db_meta.shared_by.is_empty() { + let tbid = TableId { table_id }; + + let (tb_meta_seq, table_meta): (_, Option) = + get_pb_value(self, &tbid).await?; + if let Some(mut table_meta) = table_meta { + if !table_meta.shared_by.is_empty() { + let mut spec_vec = Vec::with_capacity(db_meta.shared_by.len()); + for share_id in &table_meta.shared_by { + let res = remove_table_from_share( + self, + *share_id, + table_id, + tenant_dbname_tbname.tenant(), + &mut condition, + &mut then_ops, + ) + .await; + + match res { + Ok((share_name, share_meta)) => { + spec_vec.push( + convert_share_meta_to_spec( + self, + &share_name, + *share_id, + share_meta, + ) + .await?, + ); + } + Err(e) => match e { + // ignore UnknownShareId error + KVAppError::AppError(AppError::UnknownShareId(_)) => { + error!( + "UnknownShareId {} when drop_table_by_id tenant:{} table_id:{} shared by", + share_id, + tenant_dbname_tbname.tenant().tenant_name(), + table_id + ); + } + _ => return Err(e), + }, + } + } + // clear table meta shared_by + table_meta.shared_by.clear(); + condition.push(txn_cond_seq(&tbid, Eq, tb_meta_seq)); + then_ops.push(txn_op_put(&tbid, serialize_struct(&table_meta)?)); + + let share_object = ShareObject::Table(( + db_id, + table_id, + tenant_dbname_tbname.table_name.clone(), + )); + Some((spec_vec, share_object)) + } else { + None + } + } else { + None + } + } else { + None + }; + let txn_req = TxnRequest { condition, if_then: then_ops, @@ -2031,7 +2178,10 @@ impl + ?Sized> SchemaApi for KV { ); if succ { - return Ok(RenameTableReply { table_id }); + return Ok(RenameTableReply { + table_id, + share_table_info, + }); } } } @@ -2493,8 +2643,8 @@ impl + ?Sized> SchemaApi for KV { ); if succ { return Ok(DropTableReply { - spec_vec: if let Some((spec_vec, mut_share_table_info)) = opt.0 { - Some((spec_vec, mut_share_table_info)) + spec_vec: if let Some(spec_vec) = opt.0 { + Some((req.db_id, spec_vec)) } else { None }, @@ -2909,7 +3059,8 @@ impl + ?Sized> SchemaApi for KV { if succ { return Ok(UpsertTableOptionReply { - share_table_info: get_share_table_info_map(self, &table_meta).await?, + share_vec_table_info: get_share_vec_table_info(self, req.table_id, &table_meta) + .await?, }); } } @@ -2966,22 +3117,30 @@ impl + ?Sized> SchemaApi for KV { return Ok(std::result::Result::Err(mismatched_tbs)); } - for (req, (tb_meta_seq, _)) in update_table_metas.iter().zip(tb_meta_vec.iter()) { + let mut new_table_meta_map: BTreeMap = BTreeMap::new(); + for (req, (tb_meta_seq, table_meta)) in update_table_metas.iter().zip(tb_meta_vec.iter()) { let tbid = TableId { table_id: req.0.table_id, }; + // `update_table_meta` MUST NOT modify `shared_by` field + let table_meta = table_meta.as_ref().unwrap(); + let mut new_table_meta = req.0.new_table_meta.clone(); + new_table_meta.shared_by = table_meta.shared_by.clone(); + tbl_seqs.insert(req.0.table_id, *tb_meta_seq); txn_req .condition .push(txn_cond_seq(&tbid, Eq, *tb_meta_seq)); txn_req .if_then - .push(txn_op_put(&tbid, serialize_struct(&req.0.new_table_meta)?)); + .push(txn_op_put(&tbid, serialize_struct(&new_table_meta)?)); txn_req.else_then.push(TxnOp { request: Some(Request::Get(TxnGetRequest { key: tbid.to_string_key(), })), }); + + new_table_meta_map.insert(req.0.table_id, new_table_meta); } for (tbid, req) in copied_files { let tbid = TableId { table_id: tbid }; @@ -3048,16 +3207,17 @@ impl + ?Sized> SchemaApi for KV { } let (succ, responses) = send_txn(self, txn_req).await?; if succ { - let mut share_table_info = vec![]; - for (_, tb_meta) in tb_meta_vec { - if let Some(info) = - get_share_table_info_map(self, tb_meta.as_ref().unwrap()).await? + let mut share_vec_table_infos = Vec::with_capacity(new_table_meta_map.len()); + for (table_id, new_table_meta) in new_table_meta_map.iter() { + if let Some(share_vec_table_info) = + get_share_vec_table_info(self, *table_id, new_table_meta).await? { - share_table_info.extend(info); + share_vec_table_infos.push(share_vec_table_info); } } + return Ok(std::result::Result::Ok(UpdateTableMetaReply { - share_table_info: Some(share_table_info), + share_vec_table_infos: Some(share_vec_table_infos), })); } let mut mismatched_tbs = vec![]; @@ -3181,7 +3341,12 @@ impl + ?Sized> SchemaApi for KV { if succ { return Ok(SetTableColumnMaskPolicyReply { - share_table_info: get_share_table_info_map(self, &new_table_meta).await?, + share_vec_table_info: get_share_vec_table_info( + self, + req.table_id, + &new_table_meta, + ) + .await?, }); } } @@ -4188,7 +4353,7 @@ async fn construct_drop_table_txn_operations( if_delete: bool, condition: &mut Vec, if_then: &mut Vec, -) -> Result<(Option<(Vec, Vec)>, u64), KVAppError> { +) -> Result<(Option>, u64, u64), KVAppError> { let tbid = TableId { table_id }; // Check if table exists. @@ -4224,7 +4389,7 @@ async fn construct_drop_table_txn_operations( let (tb_id_seq, _) = get_u64_value(kv_api, &dbid_tbname).await?; if tb_id_seq == 0 { return if if_exists { - Ok((None, 0)) + Ok((None, 0, 0)) } else { return Err(KVAppError::AppError(AppError::UnknownTable( UnknownTable::new(tbname, "drop_table_by_id"), @@ -4281,17 +4446,15 @@ async fn construct_drop_table_txn_operations( // remove table from share let mut spec_vec = Vec::with_capacity(db_meta.shared_by.len()); - let mut mut_share_table_info = Vec::with_capacity(db_meta.shared_by.len()); for share_id in &db_meta.shared_by { let res = remove_table_from_share(kv_api, *share_id, table_id, tenant, condition, if_then).await; match res { - Ok((share_name, share_meta, share_table_info)) => { + Ok((share_name, share_meta)) => { spec_vec.push( convert_share_meta_to_spec(kv_api, &share_name, *share_id, share_meta).await?, ); - mut_share_table_info.push((share_name.to_string(), share_table_info)); } Err(e) => match e { // ignore UnknownShareId error @@ -4334,9 +4497,9 @@ async fn construct_drop_table_txn_operations( } } if spec_vec.is_empty() { - Ok((None, tb_id_seq)) + Ok((None, tb_id_seq, table_id)) } else { - Ok((Some((spec_vec, mut_share_table_info)), tb_id_seq)) + Ok((Some(spec_vec), tb_id_seq, table_id)) } } @@ -4347,7 +4510,7 @@ async fn drop_database_meta( drop_name_key: bool, condition: &mut Vec, if_then: &mut Vec, -) -> Result>, KVAppError> { +) -> Result<(u64, Option>), KVAppError> { let res = get_db_or_err( kv_api, tenant_dbname, @@ -4360,7 +4523,7 @@ async fn drop_database_meta( Err(e) => { if let KVAppError::AppError(AppError::UnknownDatabase(_)) = e { if if_exists { - return Ok(None); + return Ok((0, None)); } } @@ -4375,14 +4538,14 @@ async fn drop_database_meta( } // remove db from share - let mut spec_vec = Vec::with_capacity(db_meta.shared_by.len()); + let mut share_specs = Vec::with_capacity(db_meta.shared_by.len()); for share_id in &db_meta.shared_by { let res = remove_db_from_share(kv_api, *share_id, db_id, tenant_dbname, condition, if_then).await; match res { Ok((share_name, share_meta)) => { - spec_vec.push( + share_specs.push( convert_share_meta_to_spec(kv_api, &share_name, *share_id, share_meta).await?, ); } @@ -4399,9 +4562,7 @@ async fn drop_database_meta( }, } } - if !spec_vec.is_empty() { - db_meta.shared_by.clear(); - } + db_meta.shared_by.clear(); let (removed, _from_share) = is_db_need_to_be_remove( kv_api, @@ -4488,7 +4649,11 @@ async fn drop_database_meta( }; } - Ok(Some(spec_vec)) + if share_specs.is_empty() { + Ok((db_id, None)) + } else { + Ok((db_id, Some(share_specs))) + } } /// remove copied files for a table. @@ -4660,19 +4825,22 @@ fn table_has_to_not_exist( } } -async fn get_share_table_info_map( +async fn get_share_vec_table_info( kv_api: &(impl kvapi::KVApi + ?Sized), + table_id: u64, table_meta: &TableMeta, -) -> Result>, KVAppError> { +) -> Result, KVAppError> { if table_meta.shared_by.is_empty() { return Ok(None); } - let mut share_table_info_map_vec = vec![]; + let mut share_vec = vec![]; + let mut share_table_info: Option = None; + let mut db_id: Option = None; for share_id in &table_meta.shared_by { let res = get_share_id_to_name_or_err( kv_api, *share_id, - format!("get_share_table_info_map: {}", share_id), + format!("get_share_vec_table_info: {}", share_id), ) .await; @@ -4681,7 +4849,7 @@ async fn get_share_table_info_map( Err(e) => match e { // ignore UnknownShareId error KVAppError::AppError(AppError::UnknownShareId(_)) => { - error!("UnknownShareId {} when get_share_table_info_map", share_id); + error!("UnknownShareId {} when get_share_vec_table_info", share_id); continue; } _ => return Err(e), @@ -4690,7 +4858,7 @@ async fn get_share_table_info_map( let res = get_share_meta_by_id_or_err( kv_api, *share_id, - format!("get_share_table_info_map: {}", share_id), + format!("get_share_vec_table_info: {}", share_id), ) .await; @@ -4699,17 +4867,33 @@ async fn get_share_table_info_map( Err(e) => match e { // ignore UnknownShareId error KVAppError::AppError(AppError::UnknownShareId(_)) => { - error!("UnknownShareId {} when get_share_table_info_map", share_id); + error!("UnknownShareId {} when get_share_vec_table_info", share_id); continue; } _ => return Err(e), }, }; - share_table_info_map_vec - .push(get_share_table_info(kv_api, &share_name.to_tident(()), &share_meta).await?); + if share_table_info.is_none() { + let share_name_key = ShareNameIdent::new( + Tenant { + tenant: share_name.tenant_name().to_string(), + }, + share_name.share_name(), + ); + let (share_db_id, share_table_info_vec) = + get_table_info_by_share(kv_api, Some(table_id), &share_name_key, &share_meta) + .await?; + share_table_info = Some(share_table_info_vec[0].clone()); + db_id = Some(share_db_id); + } + share_vec.push(share_name.name().clone()); } - Ok(Some(share_table_info_map_vec)) + if let Some(share_table_info) = share_table_info { + Ok(Some((share_vec, db_id.unwrap(), share_table_info))) + } else { + Ok(None) + } } fn build_upsert_table_copied_file_info_conditions( diff --git a/src/meta/api/src/schema_api_impl.rs:3213:21 b/src/meta/api/src/schema_api_impl.rs:3213:21 new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/src/meta/api/src/share_api_impl.rs b/src/meta/api/src/share_api_impl.rs index 593db3cafbad3..2cd8ee48dbc2f 100644 --- a/src/meta/api/src/share_api_impl.rs +++ b/src/meta/api/src/share_api_impl.rs @@ -49,7 +49,6 @@ use databend_common_meta_types::TxnCondition; use databend_common_meta_types::TxnOp; use databend_common_meta_types::TxnRequest; use log::debug; -use log::error; use minitrace::func_name; use crate::assert_table_exist; @@ -63,7 +62,6 @@ use crate::get_share_account_meta_or_err; use crate::get_share_id_to_name_or_err; use crate::get_share_meta_by_id_or_err; use crate::get_share_or_err; -use crate::get_share_table_info; use crate::get_u64_value; use crate::kv_app_error::KVAppError; use crate::list_keys; @@ -76,6 +74,7 @@ use crate::txn_op_del; use crate::txn_op_put; use crate::util::get_share_endpoint_id_to_name_or_err; use crate::util::get_share_endpoint_or_err; +use crate::util::get_table_info_by_share; use crate::ShareApi; /// ShareApi is implemented upon kvapi::KVApi. @@ -112,7 +111,7 @@ impl> ShareApi for KV { return if req.if_not_exists { Ok(CreateShareReply { share_id, - spec_vec: None, + share_spec: None, }) } else { Err(KVAppError::AppError(AppError::ShareAlreadyExists( @@ -163,7 +162,15 @@ impl> ShareApi for KV { if succ { return Ok(CreateShareReply { share_id, - spec_vec: Some(get_tenant_share_spec_vec(self, name_key.tenant()).await?), + share_spec: Some( + convert_share_meta_to_spec( + self, + req.share_name.name(), + share_id, + share_meta, + ) + .await?, + ), }); } } @@ -203,7 +210,7 @@ impl> ShareApi for KV { if req.if_exists { return Ok(DropShareReply { share_id: None, - spec_vec: None, + share_spec: None, }); } } @@ -225,7 +232,7 @@ impl> ShareApi for KV { if req.if_exists { return Ok(DropShareReply { share_id: Some(share_id), - spec_vec: None, + share_spec: None, }); } } @@ -300,7 +307,15 @@ impl> ShareApi for KV { if succ { return Ok(DropShareReply { share_id: Some(share_id), - spec_vec: Some(get_tenant_share_spec_vec(self, name_key.tenant()).await?), + share_spec: Some( + convert_share_meta_to_spec( + self, + req.share_name.name(), + share_id, + share_meta, + ) + .await?, + ), }); } } @@ -334,7 +349,7 @@ impl> ShareApi for KV { if req.if_exists { return Ok(AddShareAccountsReply { share_id: None, - spec_vec: None, + share_spec: None, }); } } @@ -409,7 +424,15 @@ impl> ShareApi for KV { if succ { return Ok(AddShareAccountsReply { share_id: Some(share_id), - spec_vec: Some(get_tenant_share_spec_vec(self, name_key.tenant()).await?), + share_spec: Some( + convert_share_meta_to_spec( + self, + req.share_name.name(), + share_id, + share_meta, + ) + .await?, + ), }); } } @@ -443,7 +466,7 @@ impl> ShareApi for KV { if req.if_exists { return Ok(RemoveShareAccountsReply { share_id: None, - spec_vec: None, + share_spec: None, }); } } @@ -526,7 +549,15 @@ impl> ShareApi for KV { if succ { return Ok(RemoveShareAccountsReply { share_id: Some(share_id), - spec_vec: Some(get_tenant_share_spec_vec(self, name_key.tenant()).await?), + share_spec: Some( + convert_share_meta_to_spec( + self, + req.share_name.name(), + share_id, + share_meta, + ) + .await?, + ), }); } } @@ -573,8 +604,8 @@ impl> ShareApi for KV { if has_granted_privileges { return Ok(GrantShareObjectReply { share_id, - spec_vec: None, - share_table_info: (share_name_key.name().to_string(), None), + share_spec: None, + grant_share_table: None, }); } @@ -641,13 +672,36 @@ impl> ShareApi for KV { ); if succ { + let grant_share_table = match seq_and_id { + ShareGrantObjectSeqAndId::Database(..) => None, + ShareGrantObjectSeqAndId::Table( + db_id, + _table_meta_seq, + table_id, + _table_meta, + ) => { + let (_, share_table_info) = get_table_info_by_share( + self, + Some(table_id), + share_name_key, + &share_meta, + ) + .await?; + Some((db_id, share_table_info[0].clone())) + } + }; + let share_spec = convert_share_meta_to_spec( + self, + share_name_key.name(), + share_id, + share_meta, + ) + .await?; + return Ok(GrantShareObjectReply { share_id, - spec_vec: Some( - get_tenant_share_spec_vec(self, share_name_key.tenant()).await?, - ), - share_table_info: get_share_table_info(self, share_name_key, &share_meta) - .await?, + share_spec: Some(share_spec), + grant_share_table, }); } } @@ -694,8 +748,8 @@ impl> ShareApi for KV { if !has_granted_privileges { return Ok(RevokeShareObjectReply { share_id, - spec_vec: None, - share_table_info: (share_name_key.name().to_string(), None), + share_spec: None, + revoke_object: None, }); } @@ -727,40 +781,62 @@ impl> ShareApi for KV { txn_op_put(&object, serialize_struct(&share_ids)?), // (object) -> share_ids ]; - let _ = revoke_object_privileges( - self, - &mut share_meta, - object.clone(), - share_id, - req.privilege, - req.update_on, - &mut condition, - &mut if_then, - ) - .await?; - - // update share meta - if_then.push(txn_op_put(&id_key, serialize_struct(&share_meta)?)); /* (share_id) -> share_meta */ - - match seq_and_id { + // construct the revoke_object before modify share meta + let revoke_object = match seq_and_id { ShareGrantObjectSeqAndId::Database(db_meta_seq, db_id, mut db_meta) => { db_meta.shared_by.remove(&share_id); let key = DatabaseId { db_id }; if_then.push(txn_op_put(&key, serialize_struct(&db_meta)?)); condition.push(txn_cond_seq(&key, Eq, db_meta_seq)); + Some(ShareObject::Db(db_id)) } ShareGrantObjectSeqAndId::Table( - _db_id, + db_id, table_meta_seq, table_id, mut table_meta, ) => { table_meta.shared_by.remove(&share_id); let key = TableId { table_id }; + let revoke_table_id = Some(table_id); if_then.push(txn_op_put(&key, serialize_struct(&table_meta)?)); condition.push(txn_cond_seq(&key, Eq, table_meta_seq)); + + let (_, share_table_info) = get_table_info_by_share( + self, + revoke_table_id, + share_name_key, + &share_meta, + ) + .await?; + if share_table_info.is_empty() { + return Err(KVAppError::AppError(AppError::WrongShareObject( + WrongShareObject::new("table_id".to_string()), + ))); + } + + Some(ShareObject::Table(( + db_id, + table_id, + share_table_info[0].name.clone(), + ))) } - } + }; + + let _ = revoke_object_privileges( + self, + &mut share_meta, + object.clone(), + share_id, + req.privilege, + req.update_on, + &mut condition, + &mut if_then, + ) + .await?; + + // update share meta + if_then.push(txn_op_put(&id_key, serialize_struct(&share_meta)?)); /* (share_id) -> share_meta */ let txn_req = TxnRequest { condition, @@ -777,14 +853,14 @@ impl> ShareApi for KV { "revoke_share_object" ); + let share_spec = + convert_share_meta_to_spec(self, share_name_key.name(), share_id, share_meta) + .await?; if succ { return Ok(RevokeShareObjectReply { share_id, - spec_vec: Some( - get_tenant_share_spec_vec(self, share_name_key.tenant()).await?, - ), - share_table_info: get_share_table_info(self, share_name_key, &share_meta) - .await?, + share_spec: Some(share_spec), + revoke_object, }); } } @@ -1757,51 +1833,8 @@ async fn remove_share_id_from_share_objects( Ok(()) } -async fn get_tenant_share_spec_vec( - kv_api: &(impl kvapi::KVApi + ?Sized), - tenant: &Tenant, -) -> Result, KVAppError> { - let mut share_metas = vec![]; - let share_name_list = ShareNameIdent::new(tenant, "dummy"); - - let dir_name = DirName::new(share_name_list); - - let share_name_list_keys = list_keys(kv_api, &dir_name).await?; - for share_name in share_name_list_keys { - let res = get_share_or_err( - kv_api, - &share_name, - format!("get_tenant_share_spec_vec: {}", share_name.display()), - ) - .await; - - let (_share_id_seq, share_id, _share_meta_seq, share_meta) = match res { - Ok(x) => x, - Err(e) => match e { - KVAppError::AppError(AppError::UnknownShare(e)) => { - error!("{:?} when get_tenant_share_spec_vec", e); - continue; - } - KVAppError::AppError(AppError::UnknownShareId(_)) => { - error!("{:?} when get_tenant_share_spec_vec", e); - continue; - } - _ => { - return Err(e); - } - }, - }; - - share_metas.push( - convert_share_meta_to_spec(kv_api, share_name.name(), share_id, share_meta).await?, - ); - } - - Ok(share_metas) -} - #[allow(clippy::too_many_arguments)] -async fn revoke_object_privileges( +pub async fn revoke_object_privileges( kv_api: &(impl kvapi::KVApi + ?Sized), share_meta: &mut ShareMeta, object: ShareGrantObject, diff --git a/src/meta/api/src/share_api_test_suite.rs b/src/meta/api/src/share_api_test_suite.rs index 34e06f0bf2b1e..60786329b4752 100644 --- a/src/meta/api/src/share_api_test_suite.rs +++ b/src/meta/api/src/share_api_test_suite.rs @@ -25,15 +25,20 @@ use databend_common_meta_app::schema::DatabaseId; use databend_common_meta_app::schema::DatabaseMeta; use databend_common_meta_app::schema::DropDatabaseReq; use databend_common_meta_app::schema::DropTableByIdReq; +use databend_common_meta_app::schema::RenameDatabaseReq; +use databend_common_meta_app::schema::RenameTableReq; use databend_common_meta_app::schema::TableId; use databend_common_meta_app::schema::TableMeta; use databend_common_meta_app::schema::TableNameIdent; +use databend_common_meta_app::schema::UpdateMultiTableMetaReq; +use databend_common_meta_app::schema::UpdateTableMetaReq; use databend_common_meta_app::share::share_end_point_ident::ShareEndpointIdentRaw; use databend_common_meta_app::share::share_name_ident::ShareNameIdent; use databend_common_meta_app::share::share_name_ident::ShareNameIdentRaw; use databend_common_meta_app::share::*; use databend_common_meta_app::tenant::Tenant; use databend_common_meta_kvapi::kvapi; +use databend_common_meta_types::MatchSeq; use databend_common_meta_types::MetaError; use enumflags2::BitFlags; use log::info; @@ -118,6 +123,8 @@ impl ShareApiTestSuite { { let suite = ShareApiTestSuite {}; + suite.test_share_table(&b.build().await).await?; + suite.test_share_database(&b.build().await).await?; suite.share_create_show_drop(&b.build().await).await?; suite .share_endpoint_create_show_drop(&b.build().await) @@ -135,6 +142,735 @@ impl ShareApiTestSuite { Ok(()) } + #[minitrace::trace] + async fn test_share_table + SchemaApi>( + &self, + mt: &MT, + ) -> anyhow::Result<()> { + let tenant_name = "tenant1"; + + let tenant = Tenant::new_literal(tenant_name); + + let share1 = "share1"; + let share2 = "share2"; + let db_name = "db1"; + let table_name = "table"; + + let share_name1 = ShareNameIdent::new(&tenant, share1); + let share_name2 = ShareNameIdent::new(&tenant, share2); + let share_id1: u64; + let share_id2: u64; + let db_id: u64; + let mut table_id: u64; + + let create_on = Utc::now(); + info!("test replace table"); + { + let req = CreateShareReq { + if_not_exists: false, + share_name: share_name1.clone(), + comment: None, + create_on, + }; + + let res = mt.create_share(req).await; + info!("create share res: {:?}", res); + let res = res.unwrap(); + assert_eq!(1, res.share_id, "first database id is 1"); + share_id1 = res.share_id; + + let (share_name_seq, share_name_ret) = + get_share_id_to_name_or_err(mt.as_kv_api(), share_id1, "").await?; + assert!(share_name_seq > 0); + assert_eq!(ShareNameIdentRaw::from(share_name1.clone()), share_name_ret); + + let req = CreateShareReq { + if_not_exists: false, + share_name: share_name2.clone(), + comment: None, + create_on, + }; + + let res = mt.create_share(req).await?; + share_id2 = res.share_id; + + let plan = CreateDatabaseReq { + create_option: CreateOption::Create, + name_ident: DatabaseNameIdent::new(&tenant, db_name), + meta: DatabaseMeta::default(), + }; + + let res = mt.create_database(plan).await?; + info!("create database res: {:?}", res); + assert!(res.share_specs.is_none()); + db_id = res.db_id; + + let req = GrantShareObjectReq { + share_name: share_name1.clone(), + object: ShareGrantObjectName::Database(db_name.to_string()), + grant_on: create_on, + privilege: ShareGrantObjectPrivilege::Usage, + }; + + let res = mt.grant_share_object(req).await?; + info!("grant object res: {:?}", res); + let share_spec = res.share_spec.unwrap(); + assert_eq!(share_spec.name, *share_name1.name()); + assert_eq!(share_spec.database.unwrap().name, db_name.to_string()); + assert_eq!( + share_spec.db_privileges, + Some(BitFlags::from(ShareGrantObjectPrivilege::Usage)) + ); + assert!(res.grant_share_table.is_none()); + + let req = GrantShareObjectReq { + share_name: share_name2.clone(), + object: ShareGrantObjectName::Database(db_name.to_string()), + grant_on: create_on, + privilege: ShareGrantObjectPrivilege::Usage, + }; + + let _res = mt.grant_share_object(req).await?; + + let req = CreateTableReq { + create_option: CreateOption::Create, + name_ident: TableNameIdent { + tenant: tenant.clone(), + db_name: db_name.to_string(), + table_name: table_name.to_string(), + }, + table_meta: TableMeta::default(), + as_dropped: false, + }; + + let res = mt.create_table(req.clone()).await?; + info!("create table res: {:?}", res); + assert!(res.spec_vec.is_none()); + table_id = res.table_id; + + let req = GrantShareObjectReq { + share_name: share_name1.clone(), + object: ShareGrantObjectName::Table(db_name.to_string(), table_name.to_string()), + grant_on: create_on, + privilege: ShareGrantObjectPrivilege::Usage, + }; + + let res = mt.grant_share_object(req).await?; + let (share_db_id, grant_share_table) = res.grant_share_table.unwrap(); + assert_eq!(share_db_id, db_id); + assert_eq!(grant_share_table.name, table_name.to_string()); + + let req = GrantShareObjectReq { + share_name: share_name2.clone(), + object: ShareGrantObjectName::Table(db_name.to_string(), table_name.to_string()), + grant_on: create_on, + privilege: ShareGrantObjectPrivilege::Usage, + }; + + let res = mt.grant_share_object(req).await?; + let (share_db_id, grant_share_table) = res.grant_share_table.unwrap(); + assert_eq!(share_db_id, db_id); + assert_eq!(grant_share_table.name, table_name.to_string()); + + // check TableMeta.shared_by contain share id1 and id2 + let tbid = TableId { table_id }; + let table_meta: TableMeta = get_kv_data(mt.as_kv_api(), &tbid).await?; + assert_eq!(table_meta.shared_by.len(), 2); + assert!(table_meta.shared_by.contains(&share_id1)); + assert!(table_meta.shared_by.contains(&share_id2)); + + // when replace table MUST return table spec + let req = CreateTableReq { + create_option: CreateOption::CreateOrReplace, + name_ident: TableNameIdent { + tenant: tenant.clone(), + db_name: db_name.to_string(), + table_name: table_name.to_string(), + }, + table_meta: TableMeta::default(), + as_dropped: false, + }; + + let res = mt.create_table(req.clone()).await?; + info!("create table res: {:?}", res); + let (share_db_id, share_table_id, share_specs) = res.spec_vec.unwrap(); + assert_eq!(share_db_id, db_id); + assert_eq!(share_table_id, table_id); + assert_eq!(share_specs.len(), 2); + let share_names: Vec = share_specs + .iter() + .map(|share_spec| share_spec.name.clone()) + .collect(); + assert!(share_names.contains(&share1.to_string())); + assert!(share_names.contains(&share2.to_string())); + assert_eq!(share_specs[0].database.as_ref().unwrap().id, db_id); + assert_eq!(share_specs[0].tables.len(), 0); + assert_eq!(share_specs[1].database.as_ref().unwrap().id, db_id); + assert_eq!(share_specs[1].tables.len(), 0); + table_id = res.table_id; + } + + info!("test drop table"); + { + // grant the table again + let req = GrantShareObjectReq { + share_name: share_name1.clone(), + object: ShareGrantObjectName::Table(db_name.to_string(), table_name.to_string()), + grant_on: create_on, + privilege: ShareGrantObjectPrivilege::Usage, + }; + + let res = mt.grant_share_object(req).await?; + let (share_db_id, grant_share_table) = res.grant_share_table.unwrap(); + assert_eq!(share_db_id, db_id); + assert_eq!(grant_share_table.name, table_name.to_string()); + + let req = GrantShareObjectReq { + share_name: share_name2.clone(), + object: ShareGrantObjectName::Table(db_name.to_string(), table_name.to_string()), + grant_on: create_on, + privilege: ShareGrantObjectPrivilege::Usage, + }; + + let res = mt.grant_share_object(req).await?; + let (share_db_id, grant_share_table) = res.grant_share_table.unwrap(); + assert_eq!(share_db_id, db_id); + assert_eq!(grant_share_table.name, table_name.to_string()); + + // drop table MUST return share table info + let plan = DropTableByIdReq { + if_exists: false, + tenant: tenant.clone(), + table_name: table_name.to_string(), + tb_id: table_id, + db_id, + }; + let res = mt.drop_table_by_id(plan).await?; + let (share_db_id, share_specs) = res.spec_vec.unwrap(); + assert_eq!(share_db_id, db_id); + assert_eq!(share_specs.len(), 2); + let share_names: Vec = share_specs + .iter() + .map(|share_spec| share_spec.name.clone()) + .collect(); + assert!(share_names.contains(&share1.to_string())); + assert!(share_names.contains(&share2.to_string())); + assert_eq!(share_specs[0].database.as_ref().unwrap().id, db_id); + assert_eq!(share_specs[0].tables.len(), 0); + assert_eq!(share_specs[1].database.as_ref().unwrap().id, db_id); + assert_eq!(share_specs[1].tables.len(), 0); + } + + info!("test update table meta"); + { + let req = CreateTableReq { + create_option: CreateOption::Create, + name_ident: TableNameIdent { + tenant: tenant.clone(), + db_name: db_name.to_string(), + table_name: table_name.to_string(), + }, + table_meta: TableMeta::default(), + as_dropped: false, + }; + + let res = mt.create_table(req.clone()).await?; + table_id = res.table_id; + + // grant the table again + let req = GrantShareObjectReq { + share_name: share_name1.clone(), + object: ShareGrantObjectName::Table(db_name.to_string(), table_name.to_string()), + grant_on: create_on, + privilege: ShareGrantObjectPrivilege::Usage, + }; + + let _res = mt.grant_share_object(req).await?; + + let req = GrantShareObjectReq { + share_name: share_name2.clone(), + object: ShareGrantObjectName::Table(db_name.to_string(), table_name.to_string()), + grant_on: create_on, + privilege: ShareGrantObjectPrivilege::Usage, + }; + + let _res = mt.grant_share_object(req).await?; + + let tbid = TableId { table_id }; + let table_meta: TableMeta = get_kv_data(mt.as_kv_api(), &tbid).await?; + assert_eq!(table_meta.shared_by.len(), 2); + assert!(table_meta.shared_by.contains(&share_id1)); + assert!(table_meta.shared_by.contains(&share_id2)); + let old_shared_by = table_meta.shared_by.clone(); + + let mut new_table_meta = TableMeta::default(); + new_table_meta + .options + .insert("key".to_string(), "value".to_string()); + + let req = UpdateTableMetaReq { + table_id, + seq: MatchSeq::Any, + new_table_meta, + }; + let table = mt + .get_table((tenant_name, db_name, table_name).into()) + .await + .unwrap(); + let res = mt + .update_multi_table_meta(UpdateMultiTableMetaReq { + update_table_metas: vec![(req, table.as_ref().clone())], + ..Default::default() + }) + .await?; + let share_vec_table_infos = res.unwrap().share_vec_table_infos.unwrap(); + let (share_names, share_db_id, table_info) = &share_vec_table_infos[0]; + assert_eq!(*share_db_id, db_id); + assert_eq!(share_names.len(), 2); + assert!(share_names.contains(&share_name1.share_name().to_string())); + assert!(share_names.contains(&share_name2.share_name().to_string())); + assert_eq!( + table_info.options().get("key").unwrap(), + &"value".to_string() + ); + + // make sure `update_table_meta` does not change `shared_by` + let tbid = TableId { table_id }; + let table_meta: TableMeta = get_kv_data(mt.as_kv_api(), &tbid).await?; + assert_eq!(table_meta.shared_by, old_shared_by); + } + + info!("test rename table"); + // rename share table MUST remove share table from share + { + // first check TableMeta.shared_by is not empty + let tbid = TableId { table_id }; + let table_meta: TableMeta = get_kv_data(mt.as_kv_api(), &tbid).await?; + assert!(table_meta.shared_by.contains(&share_id1)); + assert!(table_meta.shared_by.contains(&share_id2)); + assert_eq!(table_meta.shared_by.len(), 2); + + let new_tbl_name = "new_tbl_name".to_string(); + let req = RenameTableReq { + if_exists: true, + name_ident: TableNameIdent { + tenant: tenant.clone(), + db_name: db_name.to_string(), + table_name: table_name.to_string(), + }, + new_db_name: db_name.to_string(), + new_table_name: new_tbl_name.to_string(), + }; + + let res = mt.rename_table(req).await?; + let (share_specs, share_object) = res.share_table_info.unwrap(); + assert_eq!(share_specs.len(), 2); + let share_names: Vec = share_specs + .iter() + .map(|share_spec| share_spec.name.clone()) + .collect(); + assert!(share_names.contains(&share1.to_string())); + assert!(share_names.contains(&share2.to_string())); + assert_eq!(share_specs[0].tables.len(), 0); + assert_eq!(share_specs[1].tables.len(), 0); + if let ShareObject::Table((share_db_id, share_table_id, share_table_name)) = + share_object + { + assert_eq!(table_id, share_table_id); + assert_eq!(db_id, share_db_id); + assert_eq!(table_name.to_string(), share_table_name); + } else { + unreachable!() + } + + // check TableMeta.shared_by is empty + let tbid = TableId { table_id }; + let table_meta: TableMeta = get_kv_data(mt.as_kv_api(), &tbid).await?; + assert!(table_meta.shared_by.is_empty()); + } + Ok(()) + } + + #[minitrace::trace] + async fn test_share_database + SchemaApi>( + &self, + mt: &MT, + ) -> anyhow::Result<()> { + let tenant_name = "tenant1"; + + let tenant = Tenant::new_literal(tenant_name); + + let share1 = "share1"; + let share2 = "share2"; + let db_name = "db1"; + let table_name = "table"; + + let share_name1 = ShareNameIdent::new(&tenant, share1); + let share_name2 = ShareNameIdent::new(&tenant, share2); + let share_id1: u64; + let share_id2: u64; + let table_id: u64; + let mut db_id: u64; + + let create_on = Utc::now(); + info!("test replace shared database"); + { + let req = CreateShareReq { + if_not_exists: false, + share_name: share_name1.clone(), + comment: None, + create_on, + }; + + let res = mt.create_share(req).await?; + info!("create share res: {:?}", res); + assert_eq!(1, res.share_id, "first database id is 1"); + share_id1 = res.share_id; + + let (share_name_seq, share_name_ret) = + get_share_id_to_name_or_err(mt.as_kv_api(), share_id1, "").await?; + assert!(share_name_seq > 0); + assert_eq!(ShareNameIdentRaw::from(share_name1.clone()), share_name_ret); + + let req = CreateShareReq { + if_not_exists: false, + share_name: share_name2.clone(), + comment: None, + create_on, + }; + + let res = mt.create_share(req).await?; + share_id2 = res.share_id; + + let plan = CreateDatabaseReq { + create_option: CreateOption::Create, + name_ident: DatabaseNameIdent::new(&tenant, db_name), + meta: DatabaseMeta::default(), + }; + + let res = mt.create_database(plan).await?; + info!("create database res: {:?}", res); + assert!(res.share_specs.is_none()); + + let req = GrantShareObjectReq { + share_name: share_name1.clone(), + object: ShareGrantObjectName::Database(db_name.to_string()), + grant_on: create_on, + privilege: ShareGrantObjectPrivilege::Usage, + }; + + let res = mt.grant_share_object(req).await?; + info!("grant object res: {:?}", res); + let share_spec = res.share_spec.unwrap(); + assert_eq!(share_spec.name, *share_name1.name()); + assert_eq!(share_spec.database.unwrap().name, db_name.to_string()); + assert_eq!( + share_spec.db_privileges, + Some(BitFlags::from(ShareGrantObjectPrivilege::Usage)) + ); + assert!(res.grant_share_table.is_none()); + + let req = GrantShareObjectReq { + share_name: share_name2.clone(), + object: ShareGrantObjectName::Database(db_name.to_string()), + grant_on: create_on, + privilege: ShareGrantObjectPrivilege::Usage, + }; + + let _res = mt.grant_share_object(req).await?; + + // when replace shared database MUST return share db spec + let plan = CreateDatabaseReq { + create_option: CreateOption::CreateOrReplace, + name_ident: DatabaseNameIdent::new(&tenant, db_name), + meta: DatabaseMeta::default(), + }; + + let res = mt.create_database(plan).await?; + db_id = res.db_id; + info!("create database res: {:?}", res); + + let share_specs = res.share_specs.unwrap(); + assert_eq!(share_specs.len(), 2); + let share_names: Vec = share_specs + .iter() + .map(|share_spec| share_spec.name.clone()) + .collect(); + assert!(share_names.contains(&share1.to_string())); + assert!(share_names.contains(&share2.to_string())); + assert!(share_specs[0].database.is_none()); + assert!(share_specs[1].database.is_none()); + } + + info!("test drop shared database"); + { + // grant the database again + let req = GrantShareObjectReq { + share_name: share_name1.clone(), + object: ShareGrantObjectName::Database(db_name.to_string()), + grant_on: create_on, + privilege: ShareGrantObjectPrivilege::Usage, + }; + + let res = mt.grant_share_object(req).await?; + let share_spec = res.share_spec.unwrap(); + assert_eq!(share_spec.database.as_ref().unwrap().id, db_id); + + let req = GrantShareObjectReq { + share_name: share_name2.clone(), + object: ShareGrantObjectName::Database(db_name.to_string()), + grant_on: create_on, + privilege: ShareGrantObjectPrivilege::Usage, + }; + + let res = mt.grant_share_object(req).await?; + let share_spec = res.share_spec.unwrap(); + assert_eq!(share_spec.database.as_ref().unwrap().id, db_id); + + // drop database show return share database spec + let plan = DropDatabaseReq { + name_ident: DatabaseNameIdent::new(&tenant, db_name), + if_exists: true, + }; + + let res = mt.drop_database(plan).await?; + info!("drop database res: {:?}", res); + let share_specs = res.share_specs.unwrap(); + assert_eq!(share_specs.len(), 2); + let share_names: Vec = share_specs + .iter() + .map(|share_spec| share_spec.name.clone()) + .collect(); + assert!(share_names.contains(&share1.to_string())); + assert!(share_names.contains(&share2.to_string())); + assert!(share_specs[0].database.is_none()); + assert!(share_specs[1].database.is_none()); + } + + info!("test revoke shared database"); + { + // first check share objects + let req = GetShareGrantObjectReq { + share_name: share_name1.clone(), + }; + + let res = mt.get_share_grant_objects(req).await?; + assert!(res.objects.is_empty()); + + let req = GetShareGrantObjectReq { + share_name: share_name2.clone(), + }; + + let res = mt.get_share_grant_objects(req).await?; + assert!(res.objects.is_empty()); + + let plan = CreateDatabaseReq { + create_option: CreateOption::Create, + name_ident: DatabaseNameIdent::new(&tenant, db_name), + meta: DatabaseMeta::default(), + }; + + let res = mt.create_database(plan).await?; + db_id = res.db_id; + info!("create database res: {:?}", res); + assert!(res.share_specs.is_none()); + + let req = GrantShareObjectReq { + share_name: share_name1.clone(), + object: ShareGrantObjectName::Database(db_name.to_string()), + grant_on: create_on, + privilege: ShareGrantObjectPrivilege::Usage, + }; + + let res = mt.grant_share_object(req).await?; + info!("grant object res: {:?}", res); + let share_spec = res.share_spec.unwrap(); + assert_eq!(share_spec.name, *share_name1.name()); + assert_eq!(share_spec.database.unwrap().name, db_name.to_string()); + assert_eq!( + share_spec.db_privileges, + Some(BitFlags::from(ShareGrantObjectPrivilege::Usage)) + ); + assert!(res.grant_share_table.is_none()); + + let req = GrantShareObjectReq { + share_name: share_name2.clone(), + object: ShareGrantObjectName::Database(db_name.to_string()), + grant_on: create_on, + privilege: ShareGrantObjectPrivilege::Usage, + }; + + let _res = mt.grant_share_object(req).await?; + + // check database meta + let dbid_key = DatabaseId { db_id }; + let database_meta: DatabaseMeta = get_kv_data(mt.as_kv_api(), &dbid_key).await?; + assert!(database_meta.shared_by.contains(&share_id1)); + assert!(database_meta.shared_by.contains(&share_id2)); + + // after grant database check share objects + let req = GetShareGrantObjectReq { + share_name: share_name1.clone(), + }; + + let res = mt.get_share_grant_objects(req).await?; + assert_eq!(res.objects.len(), 1); + + let req = GetShareGrantObjectReq { + share_name: share_name2.clone(), + }; + let res = mt.get_share_grant_objects(req).await?; + assert_eq!(res.objects.len(), 1); + + // create a table and grant share to it + let req = CreateTableReq { + create_option: CreateOption::Create, + name_ident: TableNameIdent { + tenant: tenant.clone(), + db_name: db_name.to_string(), + table_name: table_name.to_string(), + }, + table_meta: TableMeta::default(), + as_dropped: false, + }; + + let res = mt.create_table(req.clone()).await?; + info!("create table res: {:?}", res); + assert!(res.spec_vec.is_none()); + table_id = res.table_id; + + let req = GrantShareObjectReq { + share_name: share_name1.clone(), + object: ShareGrantObjectName::Table(db_name.to_string(), table_name.to_string()), + grant_on: create_on, + privilege: ShareGrantObjectPrivilege::Usage, + }; + + let _res = mt.grant_share_object(req).await?; + + let req = GrantShareObjectReq { + share_name: share_name2.clone(), + object: ShareGrantObjectName::Table(db_name.to_string(), table_name.to_string()), + grant_on: create_on, + privilege: ShareGrantObjectPrivilege::Usage, + }; + + let _res = mt.grant_share_object(req).await?; + + // after grant table check share objects + let req = GetShareGrantObjectReq { + share_name: share_name1.clone(), + }; + + let res = mt.get_share_grant_objects(req).await?; + assert_eq!(res.objects.len(), 2); + + let req = GetShareGrantObjectReq { + share_name: share_name2.clone(), + }; + let res = mt.get_share_grant_objects(req).await?; + assert_eq!(res.objects.len(), 2); + + // check database meta + let dbid_key = DatabaseId { db_id }; + let database_meta: DatabaseMeta = get_kv_data(mt.as_kv_api(), &dbid_key).await?; + assert_eq!(database_meta.shared_by.len(), 2); + assert!(database_meta.shared_by.contains(&share_id1)); + assert!(database_meta.shared_by.contains(&share_id2)); + + // check table meta + let tbid = TableId { table_id }; + let table_meta: TableMeta = get_kv_data(mt.as_kv_api(), &tbid).await?; + assert_eq!(table_meta.shared_by.len(), 2); + assert!(table_meta.shared_by.contains(&share_id1)); + assert!(table_meta.shared_by.contains(&share_id2)); + + // when revoke database priviledge, table priviledge MUST be revoked too. + let req = RevokeShareObjectReq { + share_name: share_name1.clone(), + object: ShareGrantObjectName::Database(db_name.to_string()), + update_on: create_on, + privilege: ShareGrantObjectPrivilege::Usage, + }; + + let res = mt.revoke_share_object(req).await?; + let share_spec = res.clone().share_spec.unwrap(); + assert_eq!(res.share_id, share_id1); + assert!(share_spec.database.is_none()); + assert!(share_spec.tables.is_empty()); + + // after grant table check share objects + let req = GetShareGrantObjectReq { + share_name: share_name1.clone(), + }; + + // check share_grant_objects + let res = mt.get_share_grant_objects(req).await?; + assert!(res.objects.is_empty()); + + // check database meta + let dbid_key = DatabaseId { db_id }; + let database_meta: DatabaseMeta = get_kv_data(mt.as_kv_api(), &dbid_key).await?; + assert_eq!(database_meta.shared_by.len(), 1); + assert!(database_meta.shared_by.contains(&share_id2)); + + // check table meta + let tbid = TableId { table_id }; + let table_meta: TableMeta = get_kv_data(mt.as_kv_api(), &tbid).await?; + assert_eq!(table_meta.shared_by.len(), 1); + assert!(table_meta.shared_by.contains(&share_id2)); + } + + info!("test rename shared database"); + { + // check database meta + let dbid_key = DatabaseId { db_id }; + let database_meta: DatabaseMeta = get_kv_data(mt.as_kv_api(), &dbid_key).await?; + assert!(database_meta.shared_by.contains(&share_id2)); + + // check share objects + let req = GetShareGrantObjectReq { + share_name: share_name2.clone(), + }; + let res = mt.get_share_grant_objects(req).await?; + assert_eq!(res.objects.len(), 2); + + let db2_name = "db2_name"; + // rename database + let req = RenameDatabaseReq { + if_exists: false, + name_ident: DatabaseNameIdent::new(&tenant, db_name), + new_db_name: db2_name.to_string(), + }; + let res = mt.rename_database(req).await?; + info!("rename database res: {:?}", res); + let (share_specs, object) = res.share_spec.unwrap(); + if let ShareObject::Db(old_db_id) = object { + assert_eq!(old_db_id, db_id); + } else { + unreachable!() + } + assert_eq!(share_specs.len(), 1); + let share_spec = &share_specs[0]; + assert_eq!(share_spec.name, share2.to_string()); + assert!(share_spec.database.is_none()); + assert!(share_spec.tables.is_empty()); + + // after rename database check database meta + let dbid_key = DatabaseId { db_id }; + let database_meta: DatabaseMeta = get_kv_data(mt.as_kv_api(), &dbid_key).await?; + assert!(database_meta.shared_by.is_empty()); + + // after rename database check share objects + let req = GetShareGrantObjectReq { + share_name: share_name2.clone(), + }; + let res = mt.get_share_grant_objects(req).await?; + assert_eq!(res.objects.len(), 0); + } + Ok(()) + } + #[minitrace::trace] async fn share_create_show_drop>( &self, @@ -174,7 +910,7 @@ impl ShareApiTestSuite { info!("create share res: {:?}", res); let res = res.unwrap(); assert_eq!(1, res.share_id, "first database id is 1"); - assert_eq!(1, res.spec_vec.unwrap().len()); + assert!(res.share_spec.is_some()); share_id = res.share_id; let (share_name_seq, share_name_ret) = @@ -527,7 +1263,7 @@ impl ShareApiTestSuite { info!("add share account res: {:?}", res); assert!(res.is_ok()); let res = res.unwrap(); - assert_eq!(2, res.spec_vec.unwrap().len()); + assert!(res.share_spec.is_some()); let req = CreateShareReq { if_not_exists: false, @@ -540,7 +1276,7 @@ impl ShareApiTestSuite { info!("add share account res: {:?}", res); assert!(res.is_ok()); let res = res.unwrap(); - assert_eq!(1, res.spec_vec.unwrap().len()); + assert!(res.share_spec.is_some()); } info!("--- add account account1"); @@ -553,9 +1289,12 @@ impl ShareApiTestSuite { }; // get share meta and check account has been added - let res = mt.add_share_tenants(req).await; + let res = mt.add_share_tenants(req).await?; info!("add share account res: {:?}", res); - assert!(res.is_ok()); + let share_spec = res.share_spec.unwrap(); + assert_eq!(&share_spec.name, share_name.share_name()); + assert_eq!(share_spec.tenants.len(), 1); + assert_eq!(share_spec.tenants[0], account.to_string()); let (_share_meta_seq, share_meta) = get_share_meta_by_id_or_err(mt.as_kv_api(), share_id, "").await?; @@ -593,9 +1332,11 @@ impl ShareApiTestSuite { }; // get share meta and check account has been added - let res = mt.add_share_tenants(req).await; - info!("add share account res: {:?}", res); - assert!(res.is_ok()); + let res = mt.add_share_tenants(req).await?; + let share_spec = res.share_spec.unwrap(); + assert_eq!(&share_spec.name, share_name3.share_name()); + assert_eq!(share_spec.tenants.len(), 1); + assert_eq!(share_spec.tenants[0], tenant_name1.to_string()); } // test show share api @@ -661,9 +1402,11 @@ impl ShareApiTestSuite { accounts: vec![account2.to_string()], }; - let res = mt.add_share_tenants(req).await; - info!("add share account res: {:?}", res); - assert!(res.is_ok()); + let res = mt.add_share_tenants(req).await?; + let share_spec = res.share_spec.unwrap(); + assert_eq!(&share_spec.name, share_name.share_name()); + assert_eq!(share_spec.tenants.len(), 2); + assert!(share_spec.tenants.contains(&account2.to_string())); let (_share_meta_seq, share_meta) = get_share_meta_by_id_or_err(mt.as_kv_api(), share_id, "").await?; @@ -678,9 +1421,12 @@ impl ShareApiTestSuite { accounts: vec![account2.to_string()], }; - let res = mt.remove_share_tenants(req).await; + let res = mt.remove_share_tenants(req).await?; info!("remove share account res: {:?}", res); - assert!(res.is_ok()); + let share_spec = res.share_spec.unwrap(); + assert_eq!(&share_spec.name, share_name.share_name()); + assert_eq!(share_spec.tenants.len(), 1); + assert!(!share_spec.tenants.contains(&account2.to_string())); // check account2 has been removed from share_meta let (_share_meta_seq, share_meta) = @@ -707,8 +1453,9 @@ impl ShareApiTestSuite { share_name: share_name.clone(), }; - let res = mt.drop_share(req).await; - assert!(res.is_ok()); + let res = mt.drop_share(req).await?; + let share_spec = res.share_spec.unwrap(); + assert_eq!(&share_spec.name, share_name.share_name()); // check share account meta has been removed let share_account_name = ShareConsumerIdent::new( @@ -903,8 +1650,14 @@ impl ShareApiTestSuite { let res = mt.grant_share_object(req).await?; info!("grant object res: {:?}", res); - assert_eq!(res.share_table_info.0, *share_name.name()); - assert!(res.share_table_info.1.unwrap().is_empty()); + let share_spec = res.share_spec.unwrap(); + assert_eq!(share_spec.name, *share_name.name()); + assert_eq!(share_spec.database.unwrap().name, db_name.to_string()); + assert_eq!( + share_spec.db_privileges, + Some(BitFlags::from(ShareGrantObjectPrivilege::Usage)) + ); + assert!(res.grant_share_table.is_none()); let tbl_ob_name = ShareGrantObjectName::Table(db_name.to_string(), tbl_name.to_string()); @@ -917,16 +1670,13 @@ impl ShareApiTestSuite { let res = mt.grant_share_object(req).await?; info!("grant object res: {:?}", res); - - assert_eq!(res.share_table_info.0, *share_name.name()); - assert_eq!(res.share_table_info.1.as_ref().unwrap().len(), 1); - assert!( - res.share_table_info - .1 - .as_ref() - .unwrap() - .contains_key(tbl_name), - ); + let share_spec = res.share_spec.unwrap(); + assert_eq!(share_spec.name, *share_name.name()); + assert_eq!(share_spec.tables.len(), 1); + assert_eq!(share_spec.tables[0].name, tbl_name.to_string()); + assert_eq!(share_spec.tables[0].database_id, db_id); + assert_eq!(share_spec.tables[0].table_id, table_id); + assert_eq!(res.grant_share_table.unwrap().1.name, tbl_name.to_string()); let (_share_meta_seq, share_meta) = get_share_meta_by_id_or_err(mt.as_kv_api(), share_id, "").await?; @@ -1022,8 +1772,12 @@ impl ShareApiTestSuite { let res = mt.revoke_share_object(req).await?; info!("revoke object res: {:?}", res); - assert_eq!(res.share_table_info.0, *share_name.name()); - assert!(res.share_table_info.1.unwrap().is_empty()); + let share_spec = res.share_spec.unwrap(); + assert_eq!(share_spec.name, share_name.share_name().to_string()); + assert_eq!( + res.revoke_object, + Some(ShareObject::Table((db_id, table_id, tbl_name.to_string()))) + ); let (_share_meta_seq, share_meta) = get_share_meta_by_id_or_err(mt.as_kv_api(), share_id, "").await?; @@ -1084,6 +1838,13 @@ impl ShareApiTestSuite { let res = mt.grant_share_object(req).await?; info!("grant object res: {:?}", res); + let share_spec = res.share_spec.unwrap(); + assert_eq!(share_spec.name, share_name.share_name().to_string()); + assert_eq!(share_spec.tables.len(), 1); + assert_eq!(share_spec.tables[0].name, tbl_name.to_string()); + assert_eq!(share_spec.tables[0].database_id, db_id); + assert_eq!(share_spec.tables[0].table_id, table_id); + assert_eq!(res.grant_share_table.unwrap().1.name, tbl_name.to_string()); // assert table share exists let (_share_meta_seq, share_meta) = @@ -1101,8 +1862,8 @@ impl ShareApiTestSuite { let res = mt.revoke_share_object(req).await?; info!("revoke object res: {:?}", res); - assert_eq!(res.share_table_info.0, *share_name.name()); - assert!(res.share_table_info.1.is_none()); + assert_eq!(res.share_spec.unwrap().name, *share_name.name()); + assert_eq!(res.revoke_object, Some(ShareObject::Db(db_id))); // assert share_meta.database is none, and share_meta.entries is empty let (_share_meta_seq, share_meta) = @@ -1224,7 +1985,6 @@ impl ShareApiTestSuite { let res = mt.grant_share_object(req).await?; info!("grant object res: {:?}", res); - assert_eq!(1, res.spec_vec.unwrap().len()); let tbl_ob_name = ShareGrantObjectName::Table(db_name.to_string(), tbl_name.to_string()); @@ -1237,7 +1997,7 @@ impl ShareApiTestSuite { let res = mt.grant_share_object(req).await?; info!("grant object res: {:?}", res); - assert_eq!(1, res.spec_vec.unwrap().len()); + assert_eq!(res.share_spec.unwrap().name, share1.to_string()); } info!("--- get all share objects"); @@ -1606,7 +2366,6 @@ impl ShareApiTestSuite { let res = mt.grant_share_object(req).await?; info!("grant object res: {:?}", res); - assert_eq!(2, res.spec_vec.unwrap().len()); let tbl_ob_name = ShareGrantObjectName::Table(db_name.to_string(), tbl_name.to_string()); @@ -1619,7 +2378,6 @@ impl ShareApiTestSuite { let res = mt.grant_share_object(req).await?; info!("grant object res: {:?}", res); - assert_eq!(2, res.spec_vec.unwrap().len()); let req = GrantShareObjectReq { share_name: share_name2.clone(), @@ -1628,8 +2386,7 @@ impl ShareApiTestSuite { privilege: ShareGrantObjectPrivilege::Usage, }; - let res = mt.grant_share_object(req).await?; - assert_eq!(2, res.spec_vec.unwrap().len()); + let _res = mt.grant_share_object(req).await?; let tbl_ob_name = ShareGrantObjectName::Table(db_name.to_string(), tbl_name.to_string()); @@ -1642,7 +2399,6 @@ impl ShareApiTestSuite { let res = mt.grant_share_object(req).await?; info!("grant object res: {:?}", res); - assert_eq!(2, res.spec_vec.unwrap().len()); } info!("--- check db and table shared_by field"); diff --git a/src/meta/api/src/util.rs b/src/meta/api/src/util.rs index 6b6770fefa88d..36794be74a3fd 100644 --- a/src/meta/api/src/util.rs +++ b/src/meta/api/src/util.rs @@ -14,12 +14,12 @@ use std::any::type_name; use std::collections::BTreeMap; -use std::collections::HashSet; use std::fmt::Display; use std::sync::Arc; use databend_common_meta_app::app_error::AppError; use databend_common_meta_app::app_error::ShareHasNoGrantedDatabase; +use databend_common_meta_app::app_error::ShareHasNoGrantedPrivilege; use databend_common_meta_app::app_error::UnknownDatabase; use databend_common_meta_app::app_error::UnknownDatabaseId; use databend_common_meta_app::app_error::UnknownShare; @@ -993,7 +993,15 @@ pub async fn remove_db_from_share( } } share_meta.database = None; - share_meta.entries = BTreeMap::new(); + remove_entries_from_share( + kv_api, + share_id, + db_name.tenant(), + &mut share_meta, + condition, + if_then, + ) + .await?; let id_key = ShareId { share_id }; condition.push(txn_cond_seq(&id_key, Eq, share_meta_seq)); @@ -1002,7 +1010,26 @@ pub async fn remove_db_from_share( Ok((share_name.name().to_string(), share_meta)) } -// return (share name, new share meta, new share table info) +async fn remove_entries_from_share( + kv_api: &(impl kvapi::KVApi + ?Sized), + share_id: u64, + tenant: &Tenant, + share_meta: &mut ShareMeta, + condition: &mut Vec, + if_then: &mut Vec, +) -> Result<(), KVAppError> { + // remove table from entries + for entry in share_meta.entries.values() { + if let ShareGrantObject::Table(table_id) = entry.object { + remove_table_from_share(kv_api, share_id, table_id, tenant, condition, if_then).await?; + } + } + share_meta.entries = BTreeMap::new(); + + Ok(()) +} + +// return (share name, new share meta) pub async fn remove_table_from_share( kv_api: &(impl kvapi::KVApi + ?Sized), share_id: u64, @@ -1010,7 +1037,7 @@ pub async fn remove_table_from_share( tenant: &Tenant, condition: &mut Vec, if_then: &mut Vec, -) -> Result<(String, ShareMeta, Option), KVAppError> { +) -> Result<(String, ShareMeta), KVAppError> { let (_seq, share_name) = get_share_id_to_name_or_err( kv_api, share_id, @@ -1053,68 +1080,17 @@ pub async fn remove_table_from_share( condition.push(txn_cond_seq(&id_key, Eq, share_meta_seq)); if_then.push(txn_op_put(&id_key, serialize_struct(&share_meta)?)); - let mut db_ident_raw = None; - let mut shared_db_id = 0; - if let Some(ref entry) = share_meta.database { - if let ShareGrantObject::Database(db_id) = entry.object { - let db_id_key = DatabaseIdToName { db_id }; - let (_db_name_seq, db_name_ident_raw): (_, Option) = - get_pb_value(kv_api, &db_id_key).await?; - db_ident_raw = db_name_ident_raw; - shared_db_id = db_id; - } else { - return Err(KVAppError::AppError(AppError::ShareHasNoGrantedDatabase( - ShareHasNoGrantedDatabase::new(tenant.tenant_name(), share_name.name()), - ))); - } - } - - let share_table_info = match db_ident_raw { - Some(db_ident_raw) => { - let mut table_ids = HashSet::new(); - for entry in share_meta.entries.values() { - if let ShareGrantObject::Table(table_id) = entry.object { - table_ids.insert(table_id); - } else { - unreachable!(); - } - } - let db_name = db_ident_raw.to_tident(()); - let all_tables = list_tables_from_unshare_db(kv_api, shared_db_id, &db_name).await?; - let table_infos = BTreeMap::from_iter( - all_tables - .iter() - .filter(|table_info| table_ids.contains(&table_info.ident.table_id)) - .map(|table_info| { - let mut table_info = table_info.as_ref().clone(); - table_info.db_type = - DatabaseType::ShareDB(ShareDBParams::new(share_name.clone())); - (table_info.name.clone(), table_info) - }) - .collect::>(), - ); - - if table_infos.is_empty() { - None - } else { - Some(table_infos) - } - } - None => { - return Err(KVAppError::AppError(AppError::ShareHasNoGrantedDatabase( - ShareHasNoGrantedDatabase::new(tenant.tenant_name(), share_name.name()), - ))); - } - }; - - Ok((share_name.name().to_string(), share_meta, share_table_info)) + Ok((share_name.name().to_string(), share_meta)) } -pub async fn get_share_table_info( +// if `share_table_id` is Some(), get TableInfo by the table id; +// else if `share_table_id` is Some(), get all the TableInfo of the share +pub async fn get_table_info_by_share( kv_api: &(impl kvapi::KVApi + ?Sized), + share_table_id: Option, share_name: &ShareNameIdent, share_meta: &ShareMeta, -) -> Result { +) -> Result<(u64, Vec), KVAppError> { let mut db_ident_raw = None; let mut shared_db_id = 0; if let Some(ref entry) = share_meta.database { @@ -1131,32 +1107,63 @@ pub async fn get_share_table_info( match db_ident_raw { Some(db_name) => { - let mut table_ids = HashSet::new(); + let mut table_ids = vec![]; for entry in share_meta.entries.values() { if let ShareGrantObject::Table(table_id) = entry.object { - table_ids.insert(table_id); + if let Some(share_table_id) = share_table_id { + if share_table_id == table_id { + table_ids.push(table_id); + break; + } + } else { + table_ids.push(table_id); + } } else { unreachable!(); } } + if table_ids.is_empty() { + return Err(KVAppError::AppError(AppError::ShareHasNoGrantedPrivilege( + ShareHasNoGrantedPrivilege::new(share_name.tenant_name(), share_name.name()), + ))); + } let db_name = db_name.to_tident(()); - let all_tables = list_tables_from_unshare_db(kv_api, shared_db_id, &db_name).await?; - let table_infos = BTreeMap::from_iter( - all_tables - .iter() - .filter(|table_info| table_ids.contains(&table_info.ident.table_id)) - .map(|table_info| { - let mut table_info = table_info.as_ref().clone(); - table_info.db_type = - DatabaseType::ShareDB(ShareDBParams::new(share_name.clone().into())); - (table_info.name.clone(), table_info) - }) - .collect::>(), - ); - Ok((share_name.name().to_string(), Some(table_infos))) + // List tables by tenant, db_id, table_name. + + let dbid_tbname = DBIdTableName { + db_id: shared_db_id, + // Use empty name to scan all tables + table_name: "".to_string(), + }; + + let (dbid_tbnames, _ids) = list_u64_value(kv_api, &dbid_tbname).await?; + + let table_infos = get_tableinfos_by_ids( + kv_api, + &table_ids, + &db_name, + Some(dbid_tbnames), + DatabaseType::NormalDB, + ) + .await?; + + let table_infos = table_infos + .iter() + .map(|table_info| { + let mut table_info = table_info.as_ref().clone(); + // change table db_type as ShareDB + table_info.db_type = + DatabaseType::ShareDB(ShareDBParams::new(share_name.clone().into())); + table_info + }) + .collect(); + + Ok((shared_db_id, table_infos)) } - None => Ok((share_name.name().to_string(), None)), + None => Err(KVAppError::AppError(AppError::ShareHasNoGrantedDatabase( + ShareHasNoGrantedDatabase::new(share_name.tenant_name(), share_name.share_name()), + ))), } } diff --git a/src/meta/app/src/schema/database.rs b/src/meta/app/src/schema/database.rs index d89591399f9eb..5ee39b269109d 100644 --- a/src/meta/app/src/schema/database.rs +++ b/src/meta/app/src/schema/database.rs @@ -27,6 +27,7 @@ use crate::schema::database_name_ident::DatabaseNameIdent; use crate::share::share_name_ident::ShareNameIdentRaw; use crate::share::ShareCredential; use crate::share::ShareCredentialHmac; +use crate::share::ShareObject; use crate::share::ShareSpec; use crate::tenant::Tenant; use crate::tenant::ToTenant; @@ -85,6 +86,13 @@ impl DatabaseIdToName { } } +// see `ShareGrantObjectPrivilege` +#[derive(Clone, Debug, Eq, PartialEq)] +pub enum ShareDbId { + Usage(u64), + Reference(u64), +} + #[derive(Clone, Debug, Eq, PartialEq)] pub struct DatabaseMeta { pub engine: String, @@ -102,6 +110,8 @@ pub struct DatabaseMeta { pub from_share: Option, // share endpoint name, create with `create share endpoint` ddl pub using_share_endpoint: Option, + // from share db id + pub from_share_db_id: Option, } impl Default for DatabaseMeta { @@ -117,6 +127,7 @@ impl Default for DatabaseMeta { shared_by: BTreeSet::new(), from_share: None, using_share_endpoint: None, + from_share_db_id: None, } } } @@ -218,7 +229,9 @@ impl Display for CreateDatabaseReq { #[derive(serde::Serialize, serde::Deserialize, Clone, Debug, Eq, PartialEq)] pub struct CreateDatabaseReply { pub db_id: u64, - pub spec_vec: Option>, + // if `share_specs` is not empty, it means that create database with replace option, + // and `share_specs` vector save the share spec of original database + pub share_specs: Option>, } #[derive(Clone, Debug, PartialEq, Eq)] @@ -241,7 +254,9 @@ impl Display for RenameDatabaseReq { } #[derive(serde::Serialize, serde::Deserialize, Clone, Debug, PartialEq, Eq)] -pub struct RenameDatabaseReply {} +pub struct RenameDatabaseReply { + pub share_spec: Option<(Vec, ShareObject)>, +} #[derive(Clone, Debug, PartialEq, Eq)] pub struct DropDatabaseReq { @@ -263,7 +278,10 @@ impl Display for DropDatabaseReq { #[derive(serde::Serialize, serde::Deserialize, Clone, Debug, PartialEq, Eq)] pub struct DropDatabaseReply { - pub spec_vec: Option>, + pub db_id: u64, + // if `share_specs` is not empty, it means that create database with replace option, + // and `share_specs` vector save the share spec of original database + pub share_specs: Option>, } #[derive(Clone, Debug, PartialEq, Eq)] diff --git a/src/meta/app/src/schema/mod.rs b/src/meta/app/src/schema/mod.rs index ac26bcbc3ac5c..2ddd5b249d902 100644 --- a/src/meta/app/src/schema/mod.rs +++ b/src/meta/app/src/schema/mod.rs @@ -55,6 +55,7 @@ pub use database::ListDatabaseReq; pub use database::RenameDatabaseReply; pub use database::RenameDatabaseReq; pub use database::ShareDBParams; +pub use database::ShareDbId; pub use database::UndropDatabaseReply; pub use database::UndropDatabaseReq; pub use database_id_history_ident::DatabaseIdHistoryIdent; diff --git a/src/meta/app/src/schema/table.rs b/src/meta/app/src/schema/table.rs index cb6bbc6f9309d..1bdb7cbd62ee8 100644 --- a/src/meta/app/src/schema/table.rs +++ b/src/meta/app/src/schema/table.rs @@ -36,8 +36,9 @@ use super::CatalogInfo; use super::CreateOption; use super::ShareDBParams; use crate::schema::database_name_ident::DatabaseNameIdent; +use crate::share::ShareObject; use crate::share::ShareSpec; -use crate::share::ShareTableInfoMap; +use crate::share::ShareVecTableInfo; use crate::storage::StorageParams; use crate::tenant::Tenant; use crate::tenant::ToTenant; @@ -539,7 +540,8 @@ pub struct CreateTableReply { pub table_id_seq: Option, pub db_id: u64, pub new_table: bool, - pub spec_vec: Option<(Vec, Vec)>, + // (db id, removed table id, share spec vector) + pub spec_vec: Option<(u64, u64, Vec)>, pub prev_table_id: Option, pub orphan_table_name: Option, } @@ -580,7 +582,8 @@ impl Display for DropTableByIdReq { #[derive(Clone, Debug, PartialEq, Eq)] pub struct DropTableReply { - pub spec_vec: Option<(Vec, Vec)>, + // db id, share spec vector + pub spec_vec: Option<(u64, Vec)>, } #[derive(Clone, Debug, PartialEq, Eq)] @@ -686,6 +689,8 @@ impl Display for RenameTableReq { #[derive(Clone, Debug, PartialEq, Eq)] pub struct RenameTableReply { pub table_id: u64, + // vec, table id + pub share_table_info: Option<(Vec, ShareObject)>, } #[derive(Clone, Debug, PartialEq, Eq)] @@ -771,17 +776,17 @@ pub struct SetTableColumnMaskPolicyReq { #[derive(Clone, Debug, PartialEq, Eq)] pub struct SetTableColumnMaskPolicyReply { - pub share_table_info: Option>, + pub share_vec_table_info: Option, } #[derive(Clone, Debug, PartialEq, Eq)] pub struct UpsertTableOptionReply { - pub share_table_info: Option>, + pub share_vec_table_info: Option, } #[derive(Clone, Debug, PartialEq, Eq, Default)] pub struct UpdateTableMetaReply { - pub share_table_info: Option>, + pub share_vec_table_infos: Option>, } #[derive(Clone, Debug, PartialEq, Eq)] diff --git a/src/meta/app/src/share/mod.rs b/src/meta/app/src/share/mod.rs index 6458ea60a610c..e54d44bea6234 100644 --- a/src/meta/app/src/share/mod.rs +++ b/src/meta/app/src/share/mod.rs @@ -65,9 +65,10 @@ pub use share::ShareIdToName; pub use share::ShareIdent; pub use share::ShareInfo; pub use share::ShareMeta; +pub use share::ShareObject; pub use share::ShareSpec; -pub use share::ShareTableInfoMap; pub use share::ShareTableSpec; +pub use share::ShareVecTableInfo; pub use share::ShowSharesReply; pub use share::ShowSharesReq; pub use share::TableInfoMap; diff --git a/src/meta/app/src/share/share.rs b/src/meta/app/src/share/share.rs index 4af8052a0588f..5af3fd76febf1 100644 --- a/src/meta/app/src/share/share.rs +++ b/src/meta/app/src/share/share.rs @@ -71,7 +71,7 @@ pub struct CreateShareReq { pub struct CreateShareReply { pub share_id: u64, - pub spec_vec: Option>, + pub share_spec: Option, } #[derive(Clone, Debug, PartialEq, Eq)] @@ -83,7 +83,7 @@ pub struct DropShareReq { #[derive(serde::Serialize, serde::Deserialize, Clone, Debug, PartialEq, Eq)] pub struct DropShareReply { pub share_id: Option, - pub spec_vec: Option>, + pub share_spec: Option, } #[derive(Clone, Debug, PartialEq, Eq)] @@ -97,7 +97,7 @@ pub struct AddShareAccountsReq { #[derive(serde::Serialize, serde::Deserialize, Clone, Debug, PartialEq, Eq)] pub struct AddShareAccountsReply { pub share_id: Option, - pub spec_vec: Option>, + pub share_spec: Option, } #[derive(Clone, Debug, PartialEq, Eq)] @@ -110,7 +110,7 @@ pub struct RemoveShareAccountsReq { #[derive(serde::Serialize, serde::Deserialize, Clone, Debug, PartialEq, Eq)] pub struct RemoveShareAccountsReply { pub share_id: Option, - pub spec_vec: Option>, + pub share_spec: Option, } #[derive(Clone, Debug, PartialEq, Eq)] @@ -165,7 +165,9 @@ pub enum ShareGrantObjectSeqAndId { // share name and shared (table name, table info) map pub type TableInfoMap = BTreeMap; -pub type ShareTableInfoMap = (String, Option); + +// Vec, db id, table info +pub type ShareVecTableInfo = (Vec, u64, TableInfo); #[derive(Clone, Debug, PartialEq, Eq)] pub struct GrantShareObjectReq { @@ -178,8 +180,8 @@ pub struct GrantShareObjectReq { #[derive(Clone, Debug, PartialEq, Eq)] pub struct GrantShareObjectReply { pub share_id: u64, - pub spec_vec: Option>, - pub share_table_info: ShareTableInfoMap, + pub share_spec: Option, + pub grant_share_table: Option<(u64, TableInfo)>, } #[derive(Clone, Debug, PartialEq, Eq)] @@ -190,11 +192,19 @@ pub struct RevokeShareObjectReq { pub update_on: DateTime, } +#[derive(serde::Serialize, serde::Deserialize, Clone, Debug, PartialEq, Eq)] +pub enum ShareObject { + // db id + Db(u64), + // db id, table id, table name + Table((u64, u64, String)), +} + #[derive(Clone, Debug, PartialEq, Eq)] pub struct RevokeShareObjectReply { pub share_id: u64, - pub spec_vec: Option>, - pub share_table_info: ShareTableInfoMap, + pub share_spec: Option, + pub revoke_object: Option, } #[derive(Clone, Debug, PartialEq, Eq)] diff --git a/src/meta/proto-conv/src/database_from_to_protobuf_impl.rs b/src/meta/proto-conv/src/database_from_to_protobuf_impl.rs index cab1b053c7537..2a68116ad844c 100644 --- a/src/meta/proto-conv/src/database_from_to_protobuf_impl.rs +++ b/src/meta/proto-conv/src/database_from_to_protobuf_impl.rs @@ -54,6 +54,10 @@ impl FromToProto for mt::DatabaseMeta { None => None, }, using_share_endpoint: p.using_share_endpoint, + from_share_db_id: match p.from_share_db_id { + Some(from_share_db_id) => Some(mt::ShareDbId::from_pb(from_share_db_id)?), + None => None, + }, }; Ok(v) } @@ -78,11 +82,54 @@ impl FromToProto for mt::DatabaseMeta { None => None, }, using_share_endpoint: self.using_share_endpoint.clone(), + from_share_db_id: match &self.from_share_db_id { + Some(from_share_db_id) => Some(from_share_db_id.to_pb()?), + None => None, + }, }; Ok(p) } } +impl FromToProto for mt::ShareDbId { + type PB = pb::ShareDbId; + fn get_pb_ver(_p: &Self::PB) -> u64 { + 0 + } + + fn from_pb(p: pb::ShareDbId) -> Result + where Self: Sized { + match p.db_id { + Some(pb::share_db_id::DbId::Usage(usage)) => Ok(mt::ShareDbId::Usage(usage.id)), + Some(pb::share_db_id::DbId::Reference(reference)) => { + Ok(mt::ShareDbId::Reference(reference.id)) + } + None => Err(Incompatible { + reason: "ShareDbId cannot be None".to_string(), + }), + } + } + + fn to_pb(&self) -> Result { + match self { + Self::Usage(id) => Ok(Self::PB { + db_id: Some(pb::share_db_id::DbId::Usage(pb::ShareUsageDbId { + ver: VER, + min_reader_ver: MIN_READER_VER, + id: *id, + })), + }), + Self::Reference(id) => Ok(Self::PB { + db_id: Some(pb::share_db_id::DbId::Reference(pb::ShareReferenceDbId { + ver: VER, + min_reader_ver: MIN_READER_VER, + id: *id, + })), + }), + } + } +} + impl FromToProto for mt::DbIdList { type PB = pb::DbIdList; fn get_pb_ver(p: &Self::PB) -> u64 { diff --git a/src/meta/proto-conv/src/util.rs b/src/meta/proto-conv/src/util.rs index e7d30f178eb85..e3393d00279f2 100644 --- a/src/meta/proto-conv/src/util.rs +++ b/src/meta/proto-conv/src/util.rs @@ -130,6 +130,7 @@ const META_CHANGE_LOG: &[(u64, &str)] = &[ (98, "2024-07-04: Add: add iceberg catalog option in catalog option"), (99, "2024-07-08: Add: missing_field_as in user.proto/ParquetFileFormatParams"), (100, "2024-06-21: Add: tenant.proto/TenantQuota"), + (101, "2024-07-06: Add: add from_share_db_id field into DatabaseMeta"), // Dear developer: // If you're gonna add a new metadata version, you'll have to add a test for it. // You could just copy an existing test file(e.g., `../tests/it/v024_table_meta.rs`) diff --git a/src/meta/proto-conv/tests/it/main.rs b/src/meta/proto-conv/tests/it/main.rs index 0399c3fecb815..13b58fbae4c1a 100644 --- a/src/meta/proto-conv/tests/it/main.rs +++ b/src/meta/proto-conv/tests/it/main.rs @@ -104,3 +104,4 @@ mod v097_orc_format_params; mod v098_catalog_option; mod v099_parquet_format_params; mod v100_tenant_quota; +mod v101_database_meta; diff --git a/src/meta/proto-conv/tests/it/proto_conv.rs b/src/meta/proto-conv/tests/it/proto_conv.rs index 42f14d808c222..18556d23a7c5a 100644 --- a/src/meta/proto-conv/tests/it/proto_conv.rs +++ b/src/meta/proto-conv/tests/it/proto_conv.rs @@ -33,6 +33,7 @@ use databend_common_meta_app::schema::IcebergCatalogOption; use databend_common_meta_app::schema::IcebergRestCatalogOption; use databend_common_meta_app::schema::IndexType; use databend_common_meta_app::schema::LockType; +use databend_common_meta_app::schema::ShareDbId; use databend_common_meta_app::share; use databend_common_meta_app::share::share_name_ident::ShareNameIdentRaw; use databend_common_meta_app::share::ShareCredential; @@ -60,6 +61,7 @@ fn new_db_meta_share() -> mt::DatabaseMeta { shared_by: BTreeSet::new(), from_share: Some(ShareNameIdentRaw::new("tenant", "share")), using_share_endpoint: Some("endpoint".to_string()), + from_share_db_id: Some(ShareDbId::Usage(1024)), } } @@ -75,6 +77,7 @@ fn new_db_meta() -> mt::DatabaseMeta { shared_by: BTreeSet::from_iter(vec![1]), from_share: None, using_share_endpoint: None, + from_share_db_id: None, } } diff --git a/src/meta/proto-conv/tests/it/v002_database_meta.rs b/src/meta/proto-conv/tests/it/v002_database_meta.rs index 52550d73db363..3ed2308115ead 100644 --- a/src/meta/proto-conv/tests/it/v002_database_meta.rs +++ b/src/meta/proto-conv/tests/it/v002_database_meta.rs @@ -53,6 +53,7 @@ fn test_decode_v2_database_meta() -> anyhow::Result<()> { shared_by: BTreeSet::from_iter(vec![1]), from_share: None, using_share_endpoint: None, + from_share_db_id: None, }; common::test_pb_from_to(func_name!(), want())?; diff --git a/src/meta/proto-conv/tests/it/v005_database_meta.rs b/src/meta/proto-conv/tests/it/v005_database_meta.rs index ec9a6606f168d..870a9886b30c8 100644 --- a/src/meta/proto-conv/tests/it/v005_database_meta.rs +++ b/src/meta/proto-conv/tests/it/v005_database_meta.rs @@ -55,6 +55,7 @@ fn test_decode_v5_database_meta() -> anyhow::Result<()> { shared_by: BTreeSet::new(), from_share: Some(ShareNameIdentRaw::new("tenant", "share")), using_share_endpoint: None, + from_share_db_id: None, }; common::test_pb_from_to(func_name!(), want())?; diff --git a/src/meta/proto-conv/tests/it/v055_table_meta.rs b/src/meta/proto-conv/tests/it/v055_table_meta.rs index 1084a8678507f..46268f3188659 100644 --- a/src/meta/proto-conv/tests/it/v055_table_meta.rs +++ b/src/meta/proto-conv/tests/it/v055_table_meta.rs @@ -124,6 +124,7 @@ fn test_decode_v51_database_meta() -> anyhow::Result<()> { shared_by: BTreeSet::new(), from_share: Some(ShareNameIdentRaw::new("tenant", "share")), using_share_endpoint: None, + from_share_db_id: None, }; common::test_pb_from_to(func_name!(), want())?; diff --git a/src/meta/proto-conv/tests/it/v074_table_db_meta.rs b/src/meta/proto-conv/tests/it/v074_table_db_meta.rs index 920dd281513e9..8bb431ec7607b 100644 --- a/src/meta/proto-conv/tests/it/v074_table_db_meta.rs +++ b/src/meta/proto-conv/tests/it/v074_table_db_meta.rs @@ -121,6 +121,7 @@ fn test_decode_v74_database_meta() -> anyhow::Result<()> { shared_by: BTreeSet::new(), from_share: Some(ShareNameIdentRaw::new("tenant", "share")), using_share_endpoint: None, + from_share_db_id: None, }; common::test_pb_from_to(func_name!(), want())?; diff --git a/src/meta/proto-conv/tests/it/v096_database_meta.rs b/src/meta/proto-conv/tests/it/v096_database_meta.rs index 856b2967ccb12..ae48dd1040b3f 100644 --- a/src/meta/proto-conv/tests/it/v096_database_meta.rs +++ b/src/meta/proto-conv/tests/it/v096_database_meta.rs @@ -45,6 +45,7 @@ fn test_decode_v96_database_meta() -> anyhow::Result<()> { shared_by: BTreeSet::new(), from_share: Some(ShareNameIdentRaw::new("tenant", "share")), using_share_endpoint: Some("endpoint".to_string()), + from_share_db_id: None, }; common::test_pb_from_to(func_name!(), want())?; diff --git a/src/meta/proto-conv/tests/it/v101_database_meta.rs b/src/meta/proto-conv/tests/it/v101_database_meta.rs new file mode 100644 index 0000000000000..26346adfae8cc --- /dev/null +++ b/src/meta/proto-conv/tests/it/v101_database_meta.rs @@ -0,0 +1,59 @@ +// Copyright 2023 Datafuse Labs. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::collections::BTreeSet; + +use chrono::TimeZone; +use chrono::Utc; +use databend_common_meta_app::schema as mt; +use databend_common_meta_app::schema::ShareDbId; +use databend_common_meta_app::share::share_name_ident::ShareNameIdentRaw; +use maplit::btreemap; +use minitrace::func_name; + +use crate::common; + +#[test] +fn v101_database_meta() -> anyhow::Result<()> { + let bytes: Vec = vec![ + 34, 10, 10, 3, 120, 121, 122, 18, 3, 102, 111, 111, 42, 2, 52, 52, 50, 10, 10, 3, 97, 98, + 99, 18, 3, 100, 101, 102, 162, 1, 23, 50, 48, 49, 52, 45, 49, 49, 45, 50, 56, 32, 49, 50, + 58, 48, 48, 58, 48, 57, 32, 85, 84, 67, 170, 1, 23, 50, 48, 49, 52, 45, 49, 49, 45, 50, 57, + 32, 49, 50, 58, 48, 48, 58, 48, 57, 32, 85, 84, 67, 178, 1, 7, 102, 111, 111, 32, 98, 97, + 114, 202, 1, 21, 10, 6, 116, 101, 110, 97, 110, 116, 18, 5, 115, 104, 97, 114, 101, 160, 6, + 101, 168, 6, 24, 218, 1, 8, 101, 110, 100, 112, 111, 105, 110, 116, 226, 1, 11, 10, 9, 8, + 128, 8, 160, 6, 101, 168, 6, 24, 160, 6, 101, 168, 6, 24, + ]; + + let want = || mt::DatabaseMeta { + engine: "44".to_string(), + engine_options: btreemap! {s("abc") => s("def")}, + options: btreemap! {s("xyz") => s("foo")}, + created_on: Utc.with_ymd_and_hms(2014, 11, 28, 12, 0, 9).unwrap(), + updated_on: Utc.with_ymd_and_hms(2014, 11, 29, 12, 0, 9).unwrap(), + comment: "foo bar".to_string(), + drop_on: None, + shared_by: BTreeSet::new(), + from_share: Some(ShareNameIdentRaw::new("tenant", "share")), + using_share_endpoint: Some("endpoint".to_string()), + from_share_db_id: Some(ShareDbId::Usage(1024)), + }; + + common::test_pb_from_to(func_name!(), want())?; + common::test_load_old(func_name!(), bytes.as_slice(), 101, want()) +} + +fn s(ss: impl ToString) -> String { + ss.to_string() +} diff --git a/src/meta/protos/proto/database.proto b/src/meta/protos/proto/database.proto index 726fdcc0ec042..ff47219fb813b 100644 --- a/src/meta/protos/proto/database.proto +++ b/src/meta/protos/proto/database.proto @@ -23,6 +23,27 @@ package databend_proto; import "share.proto"; import "tenant.proto"; +message ShareDbId { + oneof db_id { + ShareUsageDbId usage = 1; + ShareReferenceDbId reference = 2; + } +} + +message ShareUsageDbId { + uint64 ver = 100; + uint64 min_reader_ver = 101; + + uint64 id = 1; +} + +message ShareReferenceDbId { + uint64 ver = 100; + uint64 min_reader_ver = 101; + + uint64 id = 1; +} + // DatabaseMeta is a container of all non-identity information. message DatabaseMeta { uint64 ver = 100; @@ -60,6 +81,9 @@ message DatabaseMeta { // share endpoint name optional string using_share_endpoint = 27; + // share db id + optional ShareDbId from_share_db_id = 28; + reserved 30; } diff --git a/src/query/service/src/catalogs/default/mutable_catalog.rs b/src/query/service/src/catalogs/default/mutable_catalog.rs index 1cb31555ba1e6..da014f5b3973a 100644 --- a/src/query/service/src/catalogs/default/mutable_catalog.rs +++ b/src/query/service/src/catalogs/default/mutable_catalog.rs @@ -267,7 +267,7 @@ impl Catalog for MutableCatalog { database.init_database(req.name_ident.tenant_name()).await?; Ok(CreateDatabaseReply { db_id: res.db_id, - spec_vec: None, + share_specs: None, }) } diff --git a/src/query/service/src/databases/share/share_database.rs b/src/query/service/src/databases/share/share_database.rs index dcc55fa3485c9..26e78f1685f3c 100644 --- a/src/query/service/src/databases/share/share_database.rs +++ b/src/query/service/src/databases/share/share_database.rs @@ -34,6 +34,7 @@ use databend_common_meta_app::schema::RenameTableReply; use databend_common_meta_app::schema::RenameTableReq; use databend_common_meta_app::schema::SetTableColumnMaskPolicyReply; use databend_common_meta_app::schema::SetTableColumnMaskPolicyReq; +use databend_common_meta_app::schema::ShareDbId; use databend_common_meta_app::schema::TableInfo; use databend_common_meta_app::schema::TruncateTableReply; use databend_common_meta_app::schema::TruncateTableReq; @@ -57,15 +58,28 @@ pub struct ShareDatabase { ctx: DatabaseContext, db_info: DatabaseInfo, + + from_share_db_id: u64, } impl ShareDatabase { pub const NAME: &'static str = "SHARE"; pub fn try_create(ctx: DatabaseContext, db_info: DatabaseInfo) -> Result> { debug_assert!( - db_info.meta.from_share.is_some() && db_info.meta.using_share_endpoint.is_some() + db_info.meta.from_share.is_some() + && db_info.meta.using_share_endpoint.is_some() + && db_info.meta.from_share_db_id.is_some() ); - Ok(Box::new(Self { ctx, db_info })) + let from_share_db_id = db_info.meta.from_share_db_id.as_ref().unwrap(); + let from_share_db_id = match from_share_db_id { + ShareDbId::Usage(id) => *id, + ShareDbId::Reference(id) => *id, + }; + Ok(Box::new(Self { + ctx, + db_info, + from_share_db_id, + })) } fn load_share_tables(&self, table_infos: Vec>) -> Result>> { @@ -107,8 +121,8 @@ impl ShareDatabase { async fn add_share_endpoint_into_table_info(&self, table_info: TableInfo) -> Result { let mut table_info = table_info; let db_type = table_info.db_type.clone(); + let share_endpoint_meta = self.get_share_endpoint_meta().await?; if let DatabaseType::ShareDB(params) = db_type { - let share_endpoint_meta = self.get_share_endpoint_meta().await?; let mut params = params; params.share_endpoint_url = share_endpoint_meta.url.clone(); params.share_endpoint_credential = share_endpoint_meta.credential.clone().unwrap(); @@ -131,6 +145,7 @@ impl ShareDatabase { self.get_tenant().tenant_name(), from_share.tenant_name(), from_share.share_name(), + self.from_share_db_id, table_name, ) .await?; @@ -152,6 +167,7 @@ impl ShareDatabase { &share_endpoint_meta, self.get_tenant().tenant_name(), from_share.tenant_name(), + self.from_share_db_id, from_share.share_name(), ) .await?; diff --git a/src/query/service/src/interpreters/common/mod.rs b/src/query/service/src/interpreters/common/mod.rs index 5567449698f6f..f92c1c765c7df 100644 --- a/src/query/service/src/interpreters/common/mod.rs +++ b/src/query/service/src/interpreters/common/mod.rs @@ -21,12 +21,9 @@ mod table; mod task; mod util; -mod shared_table; - pub use grant::validate_grant_object_exists; pub use notification::get_notification_client_config; pub use query_log::InterpreterQueryLog; -pub use shared_table::save_share_table_info; pub use stream::dml_build_update_stream_req; pub use stream::query_build_update_stream_req; pub use table::check_referenced_computed_columns; diff --git a/src/query/service/src/interpreters/common/shared_table.rs b/src/query/service/src/interpreters/common/shared_table.rs deleted file mode 100644 index cc025ae79dbcc..0000000000000 --- a/src/query/service/src/interpreters/common/shared_table.rs +++ /dev/null @@ -1,34 +0,0 @@ -// Copyright 2021 Datafuse Labs -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -use databend_common_catalog::table_context::TableContext; -use databend_common_exception::Result; -use databend_common_meta_app::share::ShareTableInfoMap; - -use crate::sessions::QueryContext; - -pub async fn save_share_table_info( - ctx: &QueryContext, - share_table_info: &Option>, -) -> Result<()> { - if let Some(share_table_info) = share_table_info { - databend_common_storages_share::save_share_table_info( - ctx.get_tenant().tenant_name(), - ctx.get_application_level_data_operator()?.operator(), - share_table_info, - ) - .await?; - } - Ok(()) -} diff --git a/src/query/service/src/interpreters/interpreter_database_create.rs b/src/query/service/src/interpreters/interpreter_database_create.rs index e1c3932223f28..c80d400f47480 100644 --- a/src/query/service/src/interpreters/interpreter_database_create.rs +++ b/src/query/service/src/interpreters/interpreter_database_create.rs @@ -20,11 +20,13 @@ use databend_common_management::RoleApi; use databend_common_meta_api::ShareApi; use databend_common_meta_app::principal::OwnershipObject; use databend_common_meta_app::schema::CreateDatabaseReq; +use databend_common_meta_app::schema::ShareDbId; use databend_common_meta_app::share::GetShareEndpointReq; use databend_common_meta_app::share::ShareGrantObjectPrivilege; use databend_common_meta_types::MatchSeq; use databend_common_sharing::ShareEndpointClient; use databend_common_sql::plans::CreateDatabasePlan; +use databend_common_storages_share::remove_share_db_dir; use databend_common_storages_share::save_share_spec; use databend_common_users::RoleCacheManager; use databend_common_users::UserApiProvider; @@ -46,7 +48,7 @@ impl CreateDatabaseInterpreter { Ok(CreateDatabaseInterpreter { ctx, plan }) } - async fn check_create_database_from_share(&self) -> Result<()> { + async fn check_create_database_from_share(&self) -> Result> { // safe to unwrap let share_name = self.plan.meta.from_share.clone().unwrap(); let share_endpoint = self.plan.meta.using_share_endpoint.clone().unwrap(); @@ -86,8 +88,13 @@ impl CreateDatabaseInterpreter { share_name.display(), tenant.tenant_name() ))) + } else if let Some(database) = share_spec.database { + Ok(Some(database.id)) } else { - Ok(()) + Err(ErrorCode::ShareHasNoGrantedDatabase(format!( + "share {:?} has no grant database", + share_name + ))) } } else { Err(ErrorCode::ShareHasNoGrantedPrivilege(format!( @@ -128,11 +135,16 @@ impl Interpreter for CreateDatabaseInterpreter { }; // if create from other tenant, check from share endpoint - if self.plan.meta.from_share.is_some() { - self.check_create_database_from_share().await?; - } + let share_db_id = if self.plan.meta.from_share.is_some() { + self.check_create_database_from_share().await? + } else { + None + }; - let create_db_req: CreateDatabaseReq = self.plan.clone().into(); + let mut create_db_req: CreateDatabaseReq = self.plan.clone().into(); + if let Some(share_db_id) = share_db_id { + create_db_req.meta.from_share_db_id = Some(ShareDbId::Usage(share_db_id)); + } let reply = catalog.create_database(create_db_req).await?; // Grant ownership as the current role. The above create_db_req.meta.owner could be removed in @@ -152,17 +164,21 @@ impl Interpreter for CreateDatabaseInterpreter { } // handle share cleanups with the DropDatabaseReply - if let Some(spec_vec) = reply.spec_vec { - let mut share_table_into = Vec::with_capacity(spec_vec.len()); - for share_spec in &spec_vec { - share_table_into.push((share_spec.name.clone(), None)); - } + if let Some(share_specs) = reply.share_specs { + // since db is dropped, first we need to clean share db dir + remove_share_db_dir( + self.ctx.get_tenant().tenant_name(), + self.ctx.get_application_level_data_operator()?.operator(), + reply.db_id, + &share_specs, + ) + .await?; + // then write the new share spec save_share_spec( self.ctx.get_tenant().tenant_name(), self.ctx.get_application_level_data_operator()?.operator(), - Some(spec_vec), - Some(share_table_into), + &share_specs, ) .await?; } diff --git a/src/query/service/src/interpreters/interpreter_database_drop.rs b/src/query/service/src/interpreters/interpreter_database_drop.rs index ee4b76b0985f7..19ccc3e350441 100644 --- a/src/query/service/src/interpreters/interpreter_database_drop.rs +++ b/src/query/service/src/interpreters/interpreter_database_drop.rs @@ -18,6 +18,7 @@ use databend_common_exception::Result; use databend_common_management::RoleApi; use databend_common_meta_app::principal::OwnershipObject; use databend_common_sql::plans::DropDatabasePlan; +use databend_common_storages_share::remove_share_db_dir; use databend_common_storages_share::save_share_spec; use databend_common_users::RoleCacheManager; use databend_common_users::UserApiProvider; @@ -67,20 +68,24 @@ impl Interpreter for DropDatabaseInterpreter { } // actual drop database - let resp = catalog.drop_database(self.plan.clone().into()).await?; + let reply = catalog.drop_database(self.plan.clone().into()).await?; // handle share cleanups with the DropDatabaseReply - if let Some(spec_vec) = resp.spec_vec { - let mut share_table_into = Vec::with_capacity(spec_vec.len()); - for share_spec in &spec_vec { - share_table_into.push((share_spec.name.clone(), None)); - } + if let Some(share_specs) = reply.share_specs { + // since db is dropped, first we need to clean share db dir + remove_share_db_dir( + self.ctx.get_tenant().tenant_name(), + self.ctx.get_application_level_data_operator()?.operator(), + reply.db_id, + &share_specs, + ) + .await?; + // then write the new share spec save_share_spec( self.ctx.get_tenant().tenant_name(), self.ctx.get_application_level_data_operator()?.operator(), - Some(spec_vec), - Some(share_table_into), + &share_specs, ) .await?; } diff --git a/src/query/service/src/interpreters/interpreter_database_rename.rs b/src/query/service/src/interpreters/interpreter_database_rename.rs index 047b6c5e1f1e8..fefbf829fe1dd 100644 --- a/src/query/service/src/interpreters/interpreter_database_rename.rs +++ b/src/query/service/src/interpreters/interpreter_database_rename.rs @@ -18,6 +18,8 @@ use databend_common_exception::Result; use databend_common_meta_app::schema::database_name_ident::DatabaseNameIdent; use databend_common_meta_app::schema::RenameDatabaseReq; use databend_common_sql::plans::RenameDatabasePlan; +use databend_common_storages_share::remove_share_table_object; +use databend_common_storages_share::save_share_spec; use crate::interpreters::Interpreter; use crate::pipelines::PipelineBuildResult; @@ -50,13 +52,31 @@ impl Interpreter for RenameDatabaseInterpreter { for entity in &self.plan.entities { let catalog = self.ctx.get_catalog(&entity.catalog).await?; let tenant = self.plan.tenant.clone(); - catalog + let reply = catalog .rename_database(RenameDatabaseReq { if_exists: entity.if_exists, name_ident: DatabaseNameIdent::new(tenant, &entity.database), new_db_name: entity.new_database.clone(), }) .await?; + if let Some((share_specs, object)) = reply.share_spec { + save_share_spec( + self.ctx.get_tenant().tenant_name(), + self.ctx.get_application_level_data_operator()?.operator(), + &share_specs, + ) + .await?; + + for share_spec in &share_specs { + remove_share_table_object( + self.ctx.get_tenant().tenant_name(), + self.ctx.get_application_level_data_operator()?.operator(), + &share_spec.name, + &[object.clone()], + ) + .await?; + } + } } Ok(PipelineBuildResult::create()) diff --git a/src/query/service/src/interpreters/interpreter_share_alter_tenants.rs b/src/query/service/src/interpreters/interpreter_share_alter_tenants.rs index 4a79652e635e3..79d7528290f55 100644 --- a/src/query/service/src/interpreters/interpreter_share_alter_tenants.rs +++ b/src/query/service/src/interpreters/interpreter_share_alter_tenants.rs @@ -62,14 +62,14 @@ impl Interpreter for AlterShareTenantsInterpreter { share_on: Utc::now(), }; let resp = meta_api.add_share_tenants(req).await?; - - save_share_spec( - self.ctx.get_tenant().tenant_name(), - self.ctx.get_application_level_data_operator()?.operator(), - resp.spec_vec, - None, - ) - .await?; + if let Some(share_spec) = resp.share_spec { + save_share_spec( + self.ctx.get_tenant().tenant_name(), + self.ctx.get_application_level_data_operator()?.operator(), + &[share_spec], + ) + .await?; + } } else { let req = RemoveShareAccountsReq { share_name: ShareNameIdent::new(&tenant, &self.plan.share), @@ -77,14 +77,14 @@ impl Interpreter for AlterShareTenantsInterpreter { accounts: self.plan.accounts.clone(), }; let resp = meta_api.remove_share_tenants(req).await?; - - save_share_spec( - self.ctx.get_tenant().tenant_name(), - self.ctx.get_application_level_data_operator()?.operator(), - resp.spec_vec, - None, - ) - .await?; + if let Some(share_spec) = resp.share_spec { + save_share_spec( + self.ctx.get_tenant().tenant_name(), + self.ctx.get_application_level_data_operator()?.operator(), + &[share_spec], + ) + .await?; + } }; Ok(PipelineBuildResult::create()) diff --git a/src/query/service/src/interpreters/interpreter_share_create.rs b/src/query/service/src/interpreters/interpreter_share_create.rs index ba568196a84ce..e800f50f52eca 100644 --- a/src/query/service/src/interpreters/interpreter_share_create.rs +++ b/src/query/service/src/interpreters/interpreter_share_create.rs @@ -51,13 +51,14 @@ impl Interpreter for CreateShareInterpreter { let meta_api = UserApiProvider::instance().get_meta_store_client(); let resp = meta_api.create_share(self.plan.clone().into()).await?; - save_share_spec( - self.ctx.get_tenant().tenant_name(), - self.ctx.get_application_level_data_operator()?.operator(), - resp.spec_vec, - None, - ) - .await?; + if let Some(share_spec) = resp.share_spec { + save_share_spec( + self.ctx.get_tenant().tenant_name(), + self.ctx.get_application_level_data_operator()?.operator(), + &[share_spec], + ) + .await?; + } Ok(PipelineBuildResult::create()) } diff --git a/src/query/service/src/interpreters/interpreter_share_drop.rs b/src/query/service/src/interpreters/interpreter_share_drop.rs index 653c31023da74..9ab1a7b75642d 100644 --- a/src/query/service/src/interpreters/interpreter_share_drop.rs +++ b/src/query/service/src/interpreters/interpreter_share_drop.rs @@ -16,7 +16,7 @@ use std::sync::Arc; use databend_common_exception::Result; use databend_common_meta_api::ShareApi; -use databend_common_storages_share::save_share_spec; +use databend_common_storages_share::remove_share_dir; use databend_common_users::UserApiProvider; use crate::interpreters::Interpreter; @@ -51,13 +51,15 @@ impl Interpreter for DropShareInterpreter { let meta_api = UserApiProvider::instance().get_meta_store_client(); let resp = meta_api.drop_share(self.plan.clone().into()).await?; - save_share_spec( - self.ctx.get_tenant().tenant_name(), - self.ctx.get_application_level_data_operator()?.operator(), - resp.spec_vec, - Some(vec![(self.plan.share.clone(), None)]), - ) - .await?; + if let Some(share_spec) = resp.share_spec { + // since db is dropped, first we need to clean share dir + remove_share_dir( + self.ctx.get_tenant().tenant_name(), + self.ctx.get_application_level_data_operator()?.operator(), + &[share_spec], + ) + .await?; + } Ok(PipelineBuildResult::create()) } diff --git a/src/query/service/src/interpreters/interpreter_share_grant_object.rs b/src/query/service/src/interpreters/interpreter_share_grant_object.rs index e8858bd3a9747..ca19ca3b9fb3c 100644 --- a/src/query/service/src/interpreters/interpreter_share_grant_object.rs +++ b/src/query/service/src/interpreters/interpreter_share_grant_object.rs @@ -20,6 +20,7 @@ use databend_common_meta_api::ShareApi; use databend_common_meta_app::share::share_name_ident::ShareNameIdent; use databend_common_meta_app::share::GrantShareObjectReq; use databend_common_storages_share::save_share_spec; +use databend_common_storages_share::update_share_table_info; use databend_common_users::UserApiProvider; use crate::interpreters::Interpreter; @@ -61,13 +62,26 @@ impl Interpreter for GrantShareObjectInterpreter { }; let resp = meta_api.grant_share_object(req).await?; - save_share_spec( - self.ctx.get_tenant().tenant_name(), - self.ctx.get_application_level_data_operator()?.operator(), - resp.spec_vec, - Some(vec![resp.share_table_info]), - ) - .await?; + if let Some(share_spec) = &resp.share_spec { + save_share_spec( + self.ctx.get_tenant().tenant_name(), + self.ctx.get_application_level_data_operator()?.operator(), + &[share_spec.clone()], + ) + .await?; + + // if grant object is table, save table info + if let Some((db_id, share_table_info)) = &resp.grant_share_table { + update_share_table_info( + self.ctx.get_tenant().tenant_name(), + self.ctx.get_application_level_data_operator()?.operator(), + &[share_spec.name.clone()], + *db_id, + share_table_info, + ) + .await?; + } + } Ok(PipelineBuildResult::create()) } diff --git a/src/query/service/src/interpreters/interpreter_share_revoke_object.rs b/src/query/service/src/interpreters/interpreter_share_revoke_object.rs index 3d624be98d58e..95a1446d2b1cd 100644 --- a/src/query/service/src/interpreters/interpreter_share_revoke_object.rs +++ b/src/query/service/src/interpreters/interpreter_share_revoke_object.rs @@ -19,6 +19,7 @@ use databend_common_exception::Result; use databend_common_meta_api::ShareApi; use databend_common_meta_app::share::share_name_ident::ShareNameIdent; use databend_common_meta_app::share::RevokeShareObjectReq; +use databend_common_storages_share::remove_share_table_object; use databend_common_storages_share::save_share_spec; use databend_common_users::UserApiProvider; @@ -61,13 +62,24 @@ impl Interpreter for RevokeShareObjectInterpreter { }; let resp = meta_api.revoke_share_object(req).await?; - save_share_spec( - self.ctx.get_tenant().tenant_name(), - self.ctx.get_application_level_data_operator()?.operator(), - resp.spec_vec, - Some(vec![resp.share_table_info]), - ) - .await?; + if let Some(share_spec) = &resp.share_spec { + save_share_spec( + self.ctx.get_tenant().tenant_name(), + self.ctx.get_application_level_data_operator()?.operator(), + &[share_spec.clone()], + ) + .await?; + + if let Some(revoke_object) = resp.revoke_object { + remove_share_table_object( + self.ctx.get_tenant().tenant_name(), + self.ctx.get_application_level_data_operator()?.operator(), + &share_spec.name, + &[revoke_object], + ) + .await?; + } + } Ok(PipelineBuildResult::create()) } diff --git a/src/query/service/src/interpreters/interpreter_table_add_column.rs b/src/query/service/src/interpreters/interpreter_table_add_column.rs index ba9aed2f38857..f6d1aa6e71476 100644 --- a/src/query/service/src/interpreters/interpreter_table_add_column.rs +++ b/src/query/service/src/interpreters/interpreter_table_add_column.rs @@ -28,6 +28,7 @@ use databend_common_sql::field_default_value; use databend_common_sql::plans::AddColumnOption; use databend_common_sql::plans::AddTableColumnPlan; use databend_common_storages_fuse::FuseTable; +use databend_common_storages_share::update_share_table_info; use databend_common_storages_stream::stream_table::STREAM_ENGINE; use databend_common_storages_view::view_table::VIEW_ENGINE; use databend_storages_common_table_meta::meta::TableSnapshot; @@ -35,7 +36,6 @@ use databend_storages_common_table_meta::meta::Versioned; use databend_storages_common_table_meta::table::OPT_KEY_SNAPSHOT_LOCATION; use log::info; -use crate::interpreters::common::save_share_table_info; use crate::interpreters::interpreter_table_create::is_valid_column; use crate::interpreters::Interpreter; use crate::pipelines::PipelineBuildResult; @@ -128,9 +128,20 @@ impl Interpreter for AddTableColumnInterpreter { new_table_meta, }; - let res = catalog.update_single_table_meta(req, table_info).await?; - - save_share_table_info(&self.ctx, &res.share_table_info).await?; + let resp = catalog.update_single_table_meta(req, table_info).await?; + + if let Some(share_vec_table_infos) = &resp.share_vec_table_infos { + for (share_name_vec, db_id, share_table_info) in share_vec_table_infos { + update_share_table_info( + self.ctx.get_tenant().tenant_name(), + self.ctx.get_application_level_data_operator()?.operator(), + share_name_vec, + *db_id, + share_table_info, + ) + .await?; + } + } }; Ok(PipelineBuildResult::create()) diff --git a/src/query/service/src/interpreters/interpreter_table_create.rs b/src/query/service/src/interpreters/interpreter_table_create.rs index 78bdd3478abef..3d530b1bfffa9 100644 --- a/src/query/service/src/interpreters/interpreter_table_create.rs +++ b/src/query/service/src/interpreters/interpreter_table_create.rs @@ -51,6 +51,7 @@ use databend_common_storages_fuse::FUSE_OPT_KEY_BLOCK_PER_SEGMENT; use databend_common_storages_fuse::FUSE_OPT_KEY_ROW_AVG_DEPTH_THRESHOLD; use databend_common_storages_fuse::FUSE_OPT_KEY_ROW_PER_BLOCK; use databend_common_storages_fuse::FUSE_OPT_KEY_ROW_PER_PAGE; +use databend_common_storages_share::remove_share_table_info; use databend_common_storages_share::save_share_spec; use databend_common_users::RoleCacheManager; use databend_common_users::UserApiProvider; @@ -191,7 +192,7 @@ impl CreateTableInterpreter { req.as_dropped = true; req.table_meta.drop_on = Some(Utc::now()); let table_meta = req.table_meta.clone(); - let reply = catalog.create_table(req).await?; + let reply = catalog.create_table(req.clone()).await?; if !reply.new_table && self.plan.create_option != CreateOption::CreateOrReplace { return Ok(PipelineBuildResult::create()); } @@ -249,14 +250,25 @@ impl CreateTableInterpreter { }; // update share spec if needed - if let Some((spec_vec, share_table_info)) = reply.spec_vec { + if let Some((db_id, revoke_table_id, spec_vec)) = reply.spec_vec { save_share_spec( - tenant.tenant_name(), + self.ctx.get_tenant().tenant_name(), self.ctx.get_application_level_data_operator()?.operator(), - Some(spec_vec), - Some(share_table_info), + &spec_vec, ) .await?; + + // remove table info file + for share_spec in spec_vec { + remove_share_table_info( + self.ctx.get_tenant().tenant_name(), + self.ctx.get_application_level_data_operator()?.operator(), + &share_spec.name, + db_id, + revoke_table_id, + ) + .await?; + } } let mut pipeline = InsertInterpreter::try_create(self.ctx.clone(), insert_plan)? @@ -368,14 +380,25 @@ impl CreateTableInterpreter { } // update share spec if needed - if let Some((spec_vec, share_table_info)) = reply.spec_vec { + if let Some((db_id, revoke_table_id, spec_vec)) = reply.spec_vec { save_share_spec( self.ctx.get_tenant().tenant_name(), self.ctx.get_application_level_data_operator()?.operator(), - Some(spec_vec), - Some(share_table_info), + &spec_vec, ) .await?; + + // remove table spec + for share_spec in spec_vec { + remove_share_table_info( + self.ctx.get_tenant().tenant_name(), + self.ctx.get_application_level_data_operator()?.operator(), + &share_spec.name, + db_id, + revoke_table_id, + ) + .await?; + } } Ok(PipelineBuildResult::create()) diff --git a/src/query/service/src/interpreters/interpreter_table_drop.rs b/src/query/service/src/interpreters/interpreter_table_drop.rs index 34a0c41484b46..a599dbb75ed61 100644 --- a/src/query/service/src/interpreters/interpreter_table_drop.rs +++ b/src/query/service/src/interpreters/interpreter_table_drop.rs @@ -23,6 +23,7 @@ use databend_common_meta_app::schema::DropTableByIdReq; use databend_common_sql::plans::DropTablePlan; use databend_common_storages_fuse::operations::TruncateMode; use databend_common_storages_fuse::FuseTable; +use databend_common_storages_share::remove_share_table_info; use databend_common_storages_share::save_share_spec; use databend_common_storages_stream::stream_table::STREAM_ENGINE; use databend_common_storages_view::view_table::VIEW_ENGINE; @@ -74,6 +75,7 @@ impl Interpreter for DropTableInterpreter { } } }; + let table_id = tbl.get_table_info().ident.table_id; let engine = tbl.get_table_info().engine(); if matches!(engine, VIEW_ENGINE | STREAM_ENGINE) { @@ -119,7 +121,7 @@ impl Interpreter for DropTableInterpreter { let owner_object = OwnershipObject::Table { catalog_name: self.plan.catalog.clone(), db_id: db.get_db_info().ident.db_id, - table_id: tbl.get_table_info().ident.table_id, + table_id, }; role_api.revoke_ownership(&owner_object).await?; @@ -150,14 +152,25 @@ impl Interpreter for DropTableInterpreter { } // update share spec if needed - if let Some((spec_vec, share_table_info)) = resp.spec_vec { + if let Some((db_id, spec_vec)) = resp.spec_vec { save_share_spec( self.ctx.get_tenant().tenant_name(), self.ctx.get_application_level_data_operator()?.operator(), - Some(spec_vec), - Some(share_table_info), + &spec_vec, ) .await?; + + // remove table spec + for share_spec in spec_vec { + remove_share_table_info( + self.ctx.get_tenant().tenant_name(), + self.ctx.get_application_level_data_operator()?.operator(), + &share_spec.name, + db_id, + table_id, + ) + .await?; + } } Ok(build_res) diff --git a/src/query/service/src/interpreters/interpreter_table_drop_column.rs b/src/query/service/src/interpreters/interpreter_table_drop_column.rs index e07c24cc19acd..215b13798371c 100644 --- a/src/query/service/src/interpreters/interpreter_table_drop_column.rs +++ b/src/query/service/src/interpreters/interpreter_table_drop_column.rs @@ -23,12 +23,12 @@ use databend_common_meta_app::schema::UpdateTableMetaReq; use databend_common_meta_types::MatchSeq; use databend_common_sql::plans::DropTableColumnPlan; use databend_common_sql::BloomIndexColumns; +use databend_common_storages_share::update_share_table_info; use databend_common_storages_stream::stream_table::STREAM_ENGINE; use databend_common_storages_view::view_table::VIEW_ENGINE; use databend_storages_common_table_meta::table::OPT_KEY_BLOOM_INDEX_COLUMNS; use crate::interpreters::common::check_referenced_computed_columns; -use crate::interpreters::common::save_share_table_info; use crate::interpreters::interpreter_table_add_column::generate_new_snapshot; use crate::interpreters::Interpreter; use crate::pipelines::PipelineBuildResult; @@ -138,9 +138,19 @@ impl Interpreter for DropTableColumnInterpreter { new_table_meta, }; - let res = catalog.update_single_table_meta(req, table_info).await?; - - save_share_table_info(&self.ctx, &res.share_table_info).await?; + let resp = catalog.update_single_table_meta(req, table_info).await?; + if let Some(share_vec_table_infos) = &resp.share_vec_table_infos { + for (share_name_vec, db_id, share_table_info) in share_vec_table_infos { + update_share_table_info( + self.ctx.get_tenant().tenant_name(), + self.ctx.get_application_level_data_operator()?.operator(), + share_name_vec, + *db_id, + share_table_info, + ) + .await?; + } + } Ok(PipelineBuildResult::create()) } diff --git a/src/query/service/src/interpreters/interpreter_table_modify_column.rs b/src/query/service/src/interpreters/interpreter_table_modify_column.rs index 65172d934e943..ab850ae1bce00 100644 --- a/src/query/service/src/interpreters/interpreter_table_modify_column.rs +++ b/src/query/service/src/interpreters/interpreter_table_modify_column.rs @@ -43,6 +43,7 @@ use databend_common_sql::plans::Plan; use databend_common_sql::BloomIndexColumns; use databend_common_sql::Planner; use databend_common_storages_fuse::FuseTable; +use databend_common_storages_share::update_share_table_info; use databend_common_storages_stream::stream_table::STREAM_ENGINE; use databend_common_storages_view::view_table::VIEW_ENGINE; use databend_common_users::UserApiProvider; @@ -51,7 +52,6 @@ use databend_storages_common_index::BloomIndex; use databend_storages_common_table_meta::table::OPT_KEY_BLOOM_INDEX_COLUMNS; use crate::interpreters::common::check_referenced_computed_columns; -use crate::interpreters::common::save_share_table_info; use crate::interpreters::Interpreter; use crate::pipelines::PipelineBuildResult; use crate::schedulers::build_query_pipeline_without_render_result_set; @@ -123,9 +123,7 @@ impl ModifyTableColumnInterpreter { action: SetTableColumnMaskPolicyAction::Set(mask_name, prev_column_mask_name), }; - let res = catalog.set_table_column_mask_policy(req).await?; - - save_share_table_info(&self.ctx, &res.share_table_info).await?; + let _resp = catalog.set_table_column_mask_policy(req).await?; Ok(PipelineBuildResult::create()) } @@ -246,9 +244,21 @@ impl ModifyTableColumnInterpreter { new_table_meta: table_info.meta, }; - catalog + let resp = catalog .update_single_table_meta(req, table.get_table_info()) .await?; + if let Some(share_vec_table_infos) = &resp.share_vec_table_infos { + for (share_name_vec, db_id, share_table_info) in share_vec_table_infos { + update_share_table_info( + self.ctx.get_tenant().tenant_name(), + self.ctx.get_application_level_data_operator()?.operator(), + share_name_vec, + *db_id, + share_table_info, + ) + .await?; + } + } return Ok(PipelineBuildResult::create()); } @@ -326,11 +336,21 @@ impl ModifyTableColumnInterpreter { new_table_meta: table_info.meta, }; - let res = catalog + let resp = catalog .update_single_table_meta(req, table.get_table_info()) .await?; - - save_share_table_info(&self.ctx, &res.share_table_info).await?; + if let Some(share_vec_table_infos) = &resp.share_vec_table_infos { + for (share_name_vec, db_id, share_table_info) in share_vec_table_infos { + update_share_table_info( + self.ctx.get_tenant().tenant_name(), + self.ctx.get_application_level_data_operator()?.operator(), + share_name_vec, + *db_id, + share_table_info, + ) + .await?; + } + } return Ok(PipelineBuildResult::create()); } @@ -441,9 +461,17 @@ impl ModifyTableColumnInterpreter { action: SetTableColumnMaskPolicyAction::Unset(prev_column_mask_name), }; - let res = catalog.set_table_column_mask_policy(req).await?; - - save_share_table_info(&self.ctx, &res.share_table_info).await?; + let resp = catalog.set_table_column_mask_policy(req).await?; + if let Some((share_name_vec, db_id, share_table_info)) = resp.share_vec_table_info { + update_share_table_info( + self.ctx.get_tenant().tenant_name(), + self.ctx.get_application_level_data_operator()?.operator(), + &share_name_vec, + db_id, + &share_table_info, + ) + .await?; + } } Ok(PipelineBuildResult::create()) @@ -497,9 +525,19 @@ impl ModifyTableColumnInterpreter { new_table_meta, }; - let res = catalog.update_single_table_meta(req, table_info).await?; - - save_share_table_info(&self.ctx, &res.share_table_info).await?; + let resp = catalog.update_single_table_meta(req, table_info).await?; + if let Some(share_vec_table_infos) = &resp.share_vec_table_infos { + for (share_name_vec, db_id, share_table_info) in share_vec_table_infos { + update_share_table_info( + self.ctx.get_tenant().tenant_name(), + self.ctx.get_application_level_data_operator()?.operator(), + share_name_vec, + *db_id, + share_table_info, + ) + .await?; + } + } Ok(PipelineBuildResult::create()) } diff --git a/src/query/service/src/interpreters/interpreter_table_rename.rs b/src/query/service/src/interpreters/interpreter_table_rename.rs index 538cc414cb428..7c3367dabcc53 100644 --- a/src/query/service/src/interpreters/interpreter_table_rename.rs +++ b/src/query/service/src/interpreters/interpreter_table_rename.rs @@ -18,6 +18,8 @@ use databend_common_exception::Result; use databend_common_meta_app::schema::RenameTableReq; use databend_common_meta_app::schema::TableNameIdent; use databend_common_sql::plans::RenameTablePlan; +use databend_common_storages_share::remove_share_table_object; +use databend_common_storages_share::save_share_spec; use crate::interpreters::Interpreter; use crate::pipelines::PipelineBuildResult; @@ -51,7 +53,7 @@ impl Interpreter for RenameTableInterpreter { // You must have ALTER and DROP privileges for the original table, // and CREATE and INSERT privileges for the new table. let catalog = self.ctx.get_catalog(&self.plan.catalog).await?; - catalog + let resp = catalog .rename_table(RenameTableReq { if_exists: self.plan.if_exists, name_ident: TableNameIdent { @@ -64,6 +66,24 @@ impl Interpreter for RenameTableInterpreter { }) .await?; + if let Some((spec_vec, share_object)) = resp.share_table_info { + save_share_spec( + self.ctx.get_tenant().tenant_name(), + self.ctx.get_application_level_data_operator()?.operator(), + &spec_vec, + ) + .await?; + + for share_spec in spec_vec { + remove_share_table_object( + self.ctx.get_tenant().tenant_name(), + self.ctx.get_application_level_data_operator()?.operator(), + &share_spec.name, + &[share_object.clone()], + ) + .await?; + } + } Ok(PipelineBuildResult::create()) } } diff --git a/src/query/service/src/interpreters/interpreter_table_rename_column.rs b/src/query/service/src/interpreters/interpreter_table_rename_column.rs index f248a11a93388..845a55a681e52 100644 --- a/src/query/service/src/interpreters/interpreter_table_rename_column.rs +++ b/src/query/service/src/interpreters/interpreter_table_rename_column.rs @@ -23,12 +23,12 @@ use databend_common_meta_app::schema::UpdateTableMetaReq; use databend_common_meta_types::MatchSeq; use databend_common_sql::plans::RenameTableColumnPlan; use databend_common_sql::BloomIndexColumns; +use databend_common_storages_share::update_share_table_info; use databend_common_storages_stream::stream_table::STREAM_ENGINE; use databend_common_storages_view::view_table::VIEW_ENGINE; use databend_storages_common_table_meta::table::OPT_KEY_BLOOM_INDEX_COLUMNS; use crate::interpreters::common::check_referenced_computed_columns; -use crate::interpreters::common::save_share_table_info; use crate::interpreters::interpreter_table_create::is_valid_column; use crate::interpreters::Interpreter; use crate::pipelines::PipelineBuildResult; @@ -130,9 +130,19 @@ impl Interpreter for RenameTableColumnInterpreter { new_table_meta, }; - let res = catalog.update_single_table_meta(req, table_info).await?; - - save_share_table_info(&self.ctx, &res.share_table_info).await?; + let resp = catalog.update_single_table_meta(req, table_info).await?; + if let Some(share_vec_table_infos) = &resp.share_vec_table_infos { + for (share_name_vec, db_id, share_table_info) in share_vec_table_infos { + update_share_table_info( + self.ctx.get_tenant().tenant_name(), + self.ctx.get_application_level_data_operator()?.operator(), + share_name_vec, + *db_id, + share_table_info, + ) + .await?; + } + } }; Ok(PipelineBuildResult::create()) diff --git a/src/query/service/src/interpreters/interpreter_table_set_options.rs b/src/query/service/src/interpreters/interpreter_table_set_options.rs index 714af030983b9..163e7caff13a5 100644 --- a/src/query/service/src/interpreters/interpreter_table_set_options.rs +++ b/src/query/service/src/interpreters/interpreter_table_set_options.rs @@ -22,6 +22,7 @@ use databend_common_meta_app::schema::UpsertTableOptionReq; use databend_common_meta_types::MatchSeq; use databend_common_sql::plans::SetOptionsPlan; use databend_common_storages_fuse::TableContext; +use databend_common_storages_share::update_share_table_info; use databend_storages_common_table_meta::table::OPT_KEY_CHANGE_TRACKING; use databend_storages_common_table_meta::table::OPT_KEY_CHANGE_TRACKING_BEGIN_VER; use databend_storages_common_table_meta::table::OPT_KEY_DATABASE_ID; @@ -123,9 +124,19 @@ impl Interpreter for SetOptionsInterpreter { options: options_map, }; - catalog + let resp = catalog .upsert_table_option(&self.ctx.get_tenant(), database, req) .await?; + if let Some((share_name_vec, db_id, share_table_info)) = resp.share_vec_table_info { + update_share_table_info( + self.ctx.get_tenant().tenant_name(), + self.ctx.get_application_level_data_operator()?.operator(), + &share_name_vec, + db_id, + &share_table_info, + ) + .await?; + } Ok(PipelineBuildResult::create()) } } diff --git a/src/query/sharing/src/layer.rs b/src/query/sharing/src/layer.rs index 61fcf2775ab40..5539275aebd41 100644 --- a/src/query/sharing/src/layer.rs +++ b/src/query/sharing/src/layer.rs @@ -52,16 +52,16 @@ use crate::SharedSigner; pub fn create_share_table_operator( share_params: &ShareDBParams, - table_name: &str, + table_id: u64, ) -> databend_common_exception::Result { let share_ident_raw = &share_params.share_ident; let signer = SharedSigner::new( &share_params.share_endpoint_url, &format!( - "/tenant/{}/{}/table/{}/presign_files", + "/{}/{}/{}/presign_files", share_ident_raw.tenant_name(), share_ident_raw.share_name(), - table_name + table_id ), share_params.share_endpoint_credential.clone(), ); diff --git a/src/query/sharing/src/share_endpoint_client.rs b/src/query/sharing/src/share_endpoint_client.rs index 3b4101c4fade0..7341049b66527 100644 --- a/src/query/sharing/src/share_endpoint_client.rs +++ b/src/query/sharing/src/share_endpoint_client.rs @@ -70,7 +70,7 @@ impl ShareEndpointClient { to_tenant: &str, share_name: &str, ) -> Result { - let path = format!("/tenant/{}/{}/share_spec", to_tenant, share_name); + let path = format!("/{}/{}/share_spec", to_tenant, share_name); // skip path first `/` char let uri = format!("{}{}", share_endpoint_meta.url, &path[1..]); let headers = if let Some(credential) = &share_endpoint_meta.credential { @@ -102,11 +102,12 @@ impl ShareEndpointClient { from_tenant: &str, to_tenant: &str, share_name: &str, + db_id: u64, table_name: &str, ) -> Result { let path = format!( - "/tenant/{}/{}/table/{}/share_table", - to_tenant, share_name, table_name + "/{}/{}/{}/{}/share_table", + to_tenant, share_name, db_id, table_name ); // skip path first `/` char let uri = format!("{}{}", share_endpoint_meta.url, &path[1..]); @@ -137,9 +138,10 @@ impl ShareEndpointClient { share_endpoint_meta: &ShareEndpointMeta, from_tenant: &str, to_tenant: &str, + db_id: u64, share_name: &str, ) -> Result> { - let path = format!("/tenant/{}/{}/share_tables", to_tenant, share_name); + let path = format!("/{}/{}/{}/share_tables", to_tenant, share_name, db_id); // skip path first `/` char let uri = format!("{}{}", share_endpoint_meta.url, &path[1..]); let headers = if let Some(credential) = &share_endpoint_meta.credential { diff --git a/src/query/storages/fuse/src/fuse_table.rs b/src/query/storages/fuse/src/fuse_table.rs index a53a1cfa33b39..db1ddd394564e 100644 --- a/src/query/storages/fuse/src/fuse_table.rs +++ b/src/query/storages/fuse/src/fuse_table.rs @@ -161,7 +161,8 @@ impl FuseTable { let (mut operator, table_type) = match table_info.db_type.clone() { DatabaseType::ShareDB(share_params) => { - let operator = create_share_table_operator(&share_params, &table_info.name)?; + let operator = + create_share_table_operator(&share_params, table_info.ident.table_id)?; (operator, FuseTableType::SharedReadOnly) } DatabaseType::NormalDB => { diff --git a/src/query/storages/share/Cargo.toml b/src/query/storages/share/Cargo.toml index cb6f4ccd6360e..4f0cfc07e18c4 100644 --- a/src/query/storages/share/Cargo.toml +++ b/src/query/storages/share/Cargo.toml @@ -17,6 +17,7 @@ databend-common-exception = { workspace = true } databend-common-meta-app = { workspace = true } databend-storages-common-table-meta = { workspace = true } enumflags2 = { workspace = true } +log = { workspace = true } opendal = { workspace = true } serde = { workspace = true } serde_json = { workspace = true } diff --git a/src/query/storages/share/src/lib.rs b/src/query/storages/share/src/lib.rs index b5e68929cac4d..aa57afbba6ac9 100644 --- a/src/query/storages/share/src/lib.rs +++ b/src/query/storages/share/src/lib.rs @@ -17,6 +17,9 @@ mod share; pub use share::get_share_spec_location; +pub use share::remove_share_db_dir; +pub use share::remove_share_dir; +pub use share::remove_share_table_info; +pub use share::remove_share_table_object; pub use share::save_share_spec; -pub use share::save_share_table_info; -pub use share::share_table_info_location; +pub use share::update_share_table_info; diff --git a/src/query/storages/share/src/share.rs b/src/query/storages/share/src/share.rs index 407d7a0a98283..778e28c7c7f4d 100644 --- a/src/query/storages/share/src/share.rs +++ b/src/query/storages/share/src/share.rs @@ -12,52 +12,99 @@ // See the License for the specific language governing permissions and // limitations under the License. -use std::collections::BTreeMap; - use chrono::DateTime; use chrono::Utc; use databend_common_exception::Result; +use databend_common_meta_app::schema::TableInfo; use databend_common_meta_app::share::ShareDatabaseSpec; +use databend_common_meta_app::share::ShareObject; use databend_common_meta_app::share::ShareSpec; -use databend_common_meta_app::share::ShareTableInfoMap; use databend_common_meta_app::share::ShareTableSpec; +use log::error; use opendal::Operator; const SHARE_CONFIG_PREFIX: &str = "_share_config"; -#[derive(serde::Serialize, serde::Deserialize, Clone, Debug, Default, Eq, PartialEq)] -pub struct ShareSpecVec { - share_specs: BTreeMap, +pub fn get_share_dir(tenant: &str, share_name: &str) -> String { + format!("{}/{}/{}", SHARE_CONFIG_PREFIX, tenant, share_name) } -pub fn get_share_spec_location(tenant: &str) -> String { - format!("{}/{}/share_specs.json", SHARE_CONFIG_PREFIX, tenant,) +pub fn get_share_database_dir(tenant: &str, share_name: &str, db_id: u64) -> String { + format!( + "{}/{}/{}/{}", + SHARE_CONFIG_PREFIX, tenant, share_name, db_id + ) } -pub fn share_table_info_location(tenant: &str, share_name: &str) -> String { +pub fn get_share_spec_location(tenant: &str, share_name: &str) -> String { format!( - "{}/{}/{}_table_info.json", + "{}/{}/{}/share_specs.json", SHARE_CONFIG_PREFIX, tenant, share_name ) } +pub fn share_table_info_location( + tenant: &str, + share_name: &str, + db_id: u64, + table_id: u64, +) -> String { + format!( + "{}/{}/{}/{}/{}_table_info.json", + SHARE_CONFIG_PREFIX, tenant, share_name, db_id, table_id + ) +} + #[async_backtrace::framed] -pub async fn save_share_table_info( +pub async fn save_share_spec( tenant: &str, operator: Operator, - share_table_info: &[ShareTableInfoMap], + share_specs: &[ShareSpec], ) -> Result<()> { - for (share_name, share_table_info) in share_table_info { - let location = share_table_info_location(tenant, share_name); - match share_table_info { - Some(table_info_map) => { - operator - .write(&location, serde_json::to_vec(table_info_map)?) - .await?; - } - None => { + for share_spec in share_specs { + let share_name = &share_spec.name; + let location = get_share_spec_location(tenant, share_name); + let share_spec_ext = ext::ShareSpecExt::from_share_spec(share_spec.clone(), &operator); + let data = serde_json::to_string(&share_spec_ext)?; + operator.write(&location, data).await?; + } + + Ok(()) +} + +#[async_backtrace::framed] +pub async fn remove_share_table_info( + tenant: &str, + operator: Operator, + share_name: &str, + db_id: u64, + share_table_id: u64, +) -> Result<()> { + let location = share_table_info_location(tenant, share_name, db_id, share_table_id); + + operator.delete(&location).await?; + + Ok(()) +} + +#[async_backtrace::framed] +pub async fn remove_share_table_object( + tenant: &str, + operator: Operator, + share_name: &str, + revoke_share_object: &[ShareObject], +) -> Result<()> { + for share_object in revoke_share_object { + match share_object { + ShareObject::Table((db_id, table_id, _share_table)) => { + let location = share_table_info_location(tenant, share_name, *db_id, *table_id); + operator.delete(&location).await?; } + ShareObject::Db(db_id) => { + let dir = get_share_database_dir(tenant, share_name, *db_id); + operator.remove_all(&dir).await?; + } } } @@ -65,30 +112,53 @@ pub async fn save_share_table_info( } #[async_backtrace::framed] -pub async fn save_share_spec( +pub async fn update_share_table_info( tenant: &str, operator: Operator, - spec_vec: Option>, - share_table_info: Option>, + share_name_vec: &[String], + db_id: u64, + share_table_info: &TableInfo, ) -> Result<()> { - if let Some(share_spec) = spec_vec { - let location = get_share_spec_location(tenant); - let mut share_spec_vec = ShareSpecVec::default(); - for spec in share_spec { - let share_name = spec.name.clone(); - let share_spec_ext = ext::ShareSpecExt::from_share_spec(spec, &operator); - share_spec_vec - .share_specs - .insert(share_name, share_spec_ext); + let data = serde_json::to_string(share_table_info)?; + for share_name in share_name_vec { + let location = + share_table_info_location(tenant, share_name, db_id, share_table_info.ident.table_id); + + if let Err(e) = operator.write(&location, data.clone()).await { + error!( + "update_share_table_info of share {} table {} error: {:?}", + share_name, share_table_info.name, e + ); } - operator - .write(&location, serde_json::to_vec(&share_spec_vec)?) - .await?; } - // save share table info - if let Some(share_table_info) = share_table_info { - save_share_table_info(tenant, operator, &share_table_info).await? + Ok(()) +} + +#[async_backtrace::framed] +pub async fn remove_share_dir( + tenant: &str, + operator: Operator, + share_specs: &[ShareSpec], +) -> Result<()> { + for share_spec in share_specs { + let dir = get_share_dir(tenant, &share_spec.name); + operator.remove_all(&dir).await?; + } + + Ok(()) +} + +#[async_backtrace::framed] +pub async fn remove_share_db_dir( + tenant: &str, + operator: Operator, + db_id: u64, + share_specs: &[ShareSpec], +) -> Result<()> { + for share_spec in share_specs { + let dir = get_share_database_dir(tenant, &share_spec.name, db_id); + operator.remove_all(&dir).await?; } Ok(()) From de689a3ef9b944defd5b44c0a168e81dea915a95 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=BC=A0=E6=9E=97=E4=BC=9F?= Date: Wed, 10 Jul 2024 16:31:45 +0800 Subject: [PATCH 21/21] chore: add repo link in databend-common-ast lib (#16010) Add repo link in databend-common-ast lib --- src/query/ast/Cargo.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/src/query/ast/Cargo.toml b/src/query/ast/Cargo.toml index bd12016f14b8c..ba3652a93130d 100644 --- a/src/query/ast/Cargo.toml +++ b/src/query/ast/Cargo.toml @@ -6,6 +6,7 @@ description = "SQL parser for Databend" authors = { workspace = true } license = { workspace = true } edition = { workspace = true } +repository = "https://github.com/datafuselabs/databend/tree/main/src/query/ast" [lib] doctest = false