From 278e18d22436eb76b74b3b0f6efc099070bdc8f4 Mon Sep 17 00:00:00 2001 From: baishen Date: Thu, 3 Nov 2022 22:54:06 +0800 Subject: [PATCH 1/2] refactor(query): migrate deserializations to expression --- .../src/deserializations/boolean.rs | 62 ++++++++++++++ .../expression/src/deserializations/date.rs | 63 ++++++++++++++ .../src/deserializations/empty_array.rs | 62 ++++++++++++++ .../expression/src/deserializations/mod.rs | 51 ++++++++++++ .../expression/src/deserializations/null.rs | 62 ++++++++++++++ .../src/deserializations/nullable.rs | 82 +++++++++++++++++++ .../expression/src/deserializations/number.rs | 69 ++++++++++++++++ .../expression/src/deserializations/string.rs | 71 ++++++++++++++++ .../src/deserializations/timestamp.rs | 63 ++++++++++++++ .../src/deserializations/variant.rs | 72 ++++++++++++++++ src/query/expression/src/lib.rs | 2 + .../expression/src/serializations/array.rs | 1 - .../expression/src/serializations/date.rs | 1 - src/query/expression/src/types.rs | 34 +++++++- src/query/expression/src/types/number.rs | 11 +++ src/query/expression/src/types/string.rs | 12 +++ 16 files changed, 713 insertions(+), 5 deletions(-) create mode 100644 src/query/expression/src/deserializations/boolean.rs create mode 100644 src/query/expression/src/deserializations/date.rs create mode 100644 src/query/expression/src/deserializations/empty_array.rs create mode 100644 src/query/expression/src/deserializations/mod.rs create mode 100644 src/query/expression/src/deserializations/null.rs create mode 100644 src/query/expression/src/deserializations/nullable.rs create mode 100644 src/query/expression/src/deserializations/number.rs create mode 100644 src/query/expression/src/deserializations/string.rs create mode 100644 src/query/expression/src/deserializations/timestamp.rs create mode 100644 src/query/expression/src/deserializations/variant.rs diff --git a/src/query/expression/src/deserializations/boolean.rs b/src/query/expression/src/deserializations/boolean.rs new file mode 100644 index 0000000000000..fb4cec008aa36 --- /dev/null +++ b/src/query/expression/src/deserializations/boolean.rs @@ -0,0 +1,62 @@ +// Copyright 2022 Datafuse Labs. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use common_arrow::arrow::bitmap::MutableBitmap; +use common_io::prelude::*; + +use crate::Column; +use crate::Scalar; +use crate::TypeDeserializer; + +pub struct BooleanDeserializer { + pub builder: MutableBitmap, +} + +impl BooleanDeserializer { + pub fn create() -> Self { + Self { + builder: MutableBitmap::new(), + } + } +} + +impl TypeDeserializer for BooleanDeserializer { + fn memory_size(&self) -> usize { + self.builder.as_slice().len() + } + + fn de_default(&mut self, _format: &FormatSettings) { + self.builder.push(false); + } + + fn append_data_value(&mut self, value: Scalar, _format: &FormatSettings) -> Result<(), String> { + let v = value + .as_boolean() + .ok_or_else(|| "Unable to get boolean value".to_string())?; + self.builder.push(*v); + Ok(()) + } + + fn pop_data_value(&mut self) -> Result { + match self.builder.pop() { + Some(v) => Ok(Scalar::Boolean(v)), + None => Err("Boolean column is empty when pop data value".to_string()), + } + } + + fn finish_to_column(&mut self) -> Column { + self.builder.shrink_to_fit(); + Column::Boolean(std::mem::take(&mut self.builder).into()) + } +} diff --git a/src/query/expression/src/deserializations/date.rs b/src/query/expression/src/deserializations/date.rs new file mode 100644 index 0000000000000..07ed294ce7bd2 --- /dev/null +++ b/src/query/expression/src/deserializations/date.rs @@ -0,0 +1,63 @@ +// Copyright 2022 Datafuse Labs. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use common_io::prelude::*; + +use crate::types::date::check_date; +use crate::Column; +use crate::Scalar; +use crate::TypeDeserializer; + +pub struct DateDeserializer { + pub builder: Vec, +} + +impl DateDeserializer { + pub fn create() -> Self { + Self { + builder: Vec::new(), + } + } +} + +impl TypeDeserializer for DateDeserializer { + fn memory_size(&self) -> usize { + self.builder.len() * std::mem::size_of::() + } + + fn de_default(&mut self, _format: &FormatSettings) { + self.builder.push(i32::default()); + } + + fn append_data_value(&mut self, value: Scalar, _format: &FormatSettings) -> Result<(), String> { + let v = value + .as_date() + .ok_or_else(|| "Unable to get date value".to_string())?; + check_date(*v as i64)?; + self.builder.push(*v); + Ok(()) + } + + fn pop_data_value(&mut self) -> Result { + match self.builder.pop() { + Some(v) => Ok(Scalar::Date(v)), + None => Err("Date column is empty when pop data value".to_string()), + } + } + + fn finish_to_column(&mut self) -> Column { + self.builder.shrink_to_fit(); + Column::Date(std::mem::take(&mut self.builder).into()) + } +} diff --git a/src/query/expression/src/deserializations/empty_array.rs b/src/query/expression/src/deserializations/empty_array.rs new file mode 100644 index 0000000000000..3aea0b9ae9b02 --- /dev/null +++ b/src/query/expression/src/deserializations/empty_array.rs @@ -0,0 +1,62 @@ +// Copyright 2022 Datafuse Labs. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use common_io::prelude::*; + +use crate::Column; +use crate::Scalar; +use crate::TypeDeserializer; + +#[derive(Debug, Default)] +pub struct EmptyArrayDeserializer { + pub len: usize, +} + +impl EmptyArrayDeserializer { + pub fn create() -> Self { + Self { len: 0 } + } +} + +impl TypeDeserializer for EmptyArrayDeserializer { + fn memory_size(&self) -> usize { + self.len + } + + fn de_default(&mut self, _format: &FormatSettings) { + self.len += 1; + } + + fn append_data_value( + &mut self, + _value: Scalar, + _format: &FormatSettings, + ) -> Result<(), String> { + self.len += 1; + Ok(()) + } + + fn pop_data_value(&mut self) -> Result { + if self.len > 0 { + self.len -= 1; + Ok(Scalar::EmptyArray) + } else { + Err("EmptyArray column is empty when pop data value".to_string()) + } + } + + fn finish_to_column(&mut self) -> Column { + Column::EmptyArray { len: self.len } + } +} diff --git a/src/query/expression/src/deserializations/mod.rs b/src/query/expression/src/deserializations/mod.rs new file mode 100644 index 0000000000000..07f0585393bae --- /dev/null +++ b/src/query/expression/src/deserializations/mod.rs @@ -0,0 +1,51 @@ +// Copyright 2022 Datafuse Labs. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use common_io::prelude::*; + +mod boolean; +mod date; +mod empty_array; +mod null; +mod nullable; +mod number; +mod string; +mod timestamp; +mod variant; + +pub use boolean::*; +pub use date::*; +pub use empty_array::*; +pub use null::*; +pub use nullable::*; +pub use number::*; +pub use string::*; +pub use timestamp::*; +pub use variant::*; + +use crate::Column; +use crate::Scalar; + +pub trait TypeDeserializer: Send + Sync { + fn memory_size(&self) -> usize; + + fn de_default(&mut self, format: &FormatSettings); + + fn append_data_value(&mut self, value: Scalar, format: &FormatSettings) -> Result<(), String>; + + /// Note this method will return err only when inner builder is empty. + fn pop_data_value(&mut self) -> Result; + + fn finish_to_column(&mut self) -> Column; +} diff --git a/src/query/expression/src/deserializations/null.rs b/src/query/expression/src/deserializations/null.rs new file mode 100644 index 0000000000000..5bfd6fc8722f6 --- /dev/null +++ b/src/query/expression/src/deserializations/null.rs @@ -0,0 +1,62 @@ +// Copyright 2022 Datafuse Labs. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use common_io::prelude::*; + +use crate::Column; +use crate::Scalar; +use crate::TypeDeserializer; + +#[derive(Debug, Default)] +pub struct NullDeserializer { + pub len: usize, +} + +impl NullDeserializer { + pub fn create() -> Self { + Self { len: 0 } + } +} + +impl TypeDeserializer for NullDeserializer { + fn memory_size(&self) -> usize { + self.len + } + + fn de_default(&mut self, _format: &FormatSettings) { + self.len += 1; + } + + fn append_data_value( + &mut self, + _value: Scalar, + _format: &FormatSettings, + ) -> Result<(), String> { + self.len += 1; + Ok(()) + } + + fn pop_data_value(&mut self) -> Result { + if self.len > 0 { + self.len -= 1; + Ok(Scalar::Null) + } else { + Err("Null column is empty when pop data value".to_string()) + } + } + + fn finish_to_column(&mut self) -> Column { + Column::Null { len: self.len } + } +} diff --git a/src/query/expression/src/deserializations/nullable.rs b/src/query/expression/src/deserializations/nullable.rs new file mode 100644 index 0000000000000..efdc5741576ba --- /dev/null +++ b/src/query/expression/src/deserializations/nullable.rs @@ -0,0 +1,82 @@ +// Copyright 2022 Datafuse Labs. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use common_arrow::arrow::bitmap::MutableBitmap; +use common_io::prelude::*; + +use crate::types::nullable::NullableColumn; +use crate::types::DataType; +use crate::Column; +use crate::Scalar; +use crate::TypeDeserializer; + +pub struct NullableDeserializer { + pub validity: MutableBitmap, + pub inner: Box, +} + +impl NullableDeserializer { + pub fn create(inner_ty: &DataType) -> Self { + Self { + validity: MutableBitmap::new(), + inner: inner_ty.create_deserializer(), + } + } +} + +impl TypeDeserializer for NullableDeserializer { + fn memory_size(&self) -> usize { + self.inner.memory_size() + self.validity.as_slice().len() + } + + fn de_default(&mut self, format: &FormatSettings) { + self.inner.de_default(format); + self.validity.push(false); + } + + fn append_data_value(&mut self, value: Scalar, format: &FormatSettings) -> Result<(), String> { + match value { + Scalar::Null => { + self.validity.push(false); + self.inner.de_default(format); + } + _ => { + self.validity.push(true); + self.inner.append_data_value(value, format)?; + } + } + Ok(()) + } + + fn pop_data_value(&mut self) -> Result { + match self.validity.pop() { + Some(v) => { + if v { + self.inner.pop_data_value() + } else { + Ok(Scalar::Null) + } + } + None => Err("Nullable column is empty when pop data value".to_string()), + } + } + + fn finish_to_column(&mut self) -> Column { + let col = NullableColumn { + column: self.inner.finish_to_column(), + validity: std::mem::take(&mut self.validity).into(), + }; + Column::Nullable(Box::new(col)) + } +} diff --git a/src/query/expression/src/deserializations/number.rs b/src/query/expression/src/deserializations/number.rs new file mode 100644 index 0000000000000..641aec1ecd122 --- /dev/null +++ b/src/query/expression/src/deserializations/number.rs @@ -0,0 +1,69 @@ +// Copyright 2022 Datafuse Labs. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use common_io::prelude::*; + +use crate::types::number::Number; +use crate::Column; +use crate::Scalar; +use crate::TypeDeserializer; + +pub struct NumberDeserializer { + pub builder: Vec, +} + +impl NumberDeserializer { + pub fn create() -> Self { + Self { + builder: Vec::new(), + } + } +} + +impl TypeDeserializer for NumberDeserializer +where T: Number +{ + fn memory_size(&self) -> usize { + self.builder.len() * std::mem::size_of::() + } + + fn de_default(&mut self, _format: &FormatSettings) { + self.builder.push(T::default()); + } + + fn append_data_value(&mut self, value: Scalar, _format: &FormatSettings) -> Result<(), String> { + let v = value + .as_number() + .ok_or_else(|| "Unable to get number value".to_string())?; + let num = T::try_downcast_scalar(v).unwrap(); + self.builder.push(num); + Ok(()) + } + + fn pop_data_value(&mut self) -> Result { + match self.builder.pop() { + Some(v) => { + let num = T::upcast_scalar(v); + Ok(Scalar::Number(num)) + } + None => Err("Number column is empty when pop data value".to_string()), + } + } + + fn finish_to_column(&mut self) -> Column { + self.builder.shrink_to_fit(); + let col = T::upcast_column(std::mem::take(&mut self.builder).into()); + Column::Number(col) + } +} diff --git a/src/query/expression/src/deserializations/string.rs b/src/query/expression/src/deserializations/string.rs new file mode 100644 index 0000000000000..14835a4749cfc --- /dev/null +++ b/src/query/expression/src/deserializations/string.rs @@ -0,0 +1,71 @@ +// Copyright 2022 Datafuse Labs. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use common_io::prelude::*; + +use crate::types::string::StringColumn; +use crate::types::string::StringColumnBuilder; +use crate::Column; +use crate::Scalar; +use crate::TypeDeserializer; + +pub struct StringDeserializer { + pub buffer: Vec, + pub builder: StringColumnBuilder, +} + +impl StringDeserializer { + pub fn create() -> Self { + Self { + buffer: Vec::new(), + builder: StringColumnBuilder::with_capacity(0, 0), + } + } +} + +impl TypeDeserializer for StringDeserializer { + fn memory_size(&self) -> usize { + self.builder.data.len() * std::mem::size_of::() + + self.builder.offsets.len() * std::mem::size_of::() + } + + fn de_default(&mut self, _format: &FormatSettings) { + self.builder.put_str(""); + self.builder.commit_row(); + } + + fn append_data_value(&mut self, value: Scalar, _format: &FormatSettings) -> Result<(), String> { + let v = value + .as_string() + .ok_or_else(|| "Unable to get string value".to_string())?; + self.builder.put(v.as_slice()); + self.builder.commit_row(); + Ok(()) + } + + fn pop_data_value(&mut self) -> Result { + match self.builder.pop() { + Some(v) => Ok(Scalar::String(v)), + None => Err("String column is empty when pop data value".to_string()), + } + } + + fn finish_to_column(&mut self) -> Column { + let col = StringColumn { + data: std::mem::take(&mut self.builder.data).into(), + offsets: std::mem::take(&mut self.builder.offsets).into(), + }; + Column::String(col) + } +} diff --git a/src/query/expression/src/deserializations/timestamp.rs b/src/query/expression/src/deserializations/timestamp.rs new file mode 100644 index 0000000000000..fbace1cb0ad7a --- /dev/null +++ b/src/query/expression/src/deserializations/timestamp.rs @@ -0,0 +1,63 @@ +// Copyright 2022 Datafuse Labs. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use common_io::prelude::*; + +use crate::types::timestamp::check_timestamp; +use crate::Column; +use crate::Scalar; +use crate::TypeDeserializer; + +pub struct TimestampDeserializer { + pub builder: Vec, +} + +impl TimestampDeserializer { + pub fn create() -> Self { + Self { + builder: Vec::new(), + } + } +} + +impl TypeDeserializer for TimestampDeserializer { + fn memory_size(&self) -> usize { + self.builder.len() * std::mem::size_of::() + } + + fn de_default(&mut self, _format: &FormatSettings) { + self.builder.push(i64::default()); + } + + fn append_data_value(&mut self, value: Scalar, _format: &FormatSettings) -> Result<(), String> { + let v = value + .as_timestamp() + .ok_or_else(|| "Unable to get timestamp value".to_string())?; + check_timestamp(*v)?; + self.builder.push(*v); + Ok(()) + } + + fn pop_data_value(&mut self) -> Result { + match self.builder.pop() { + Some(v) => Ok(Scalar::Timestamp(v)), + None => Err("Timestamp column is empty when pop data value".to_string()), + } + } + + fn finish_to_column(&mut self) -> Column { + self.builder.shrink_to_fit(); + Column::Timestamp(std::mem::take(&mut self.builder).into()) + } +} diff --git a/src/query/expression/src/deserializations/variant.rs b/src/query/expression/src/deserializations/variant.rs new file mode 100644 index 0000000000000..3b6ba7bf5d7d7 --- /dev/null +++ b/src/query/expression/src/deserializations/variant.rs @@ -0,0 +1,72 @@ +// Copyright 2022 Datafuse Labs. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use common_io::prelude::*; + +use crate::types::string::StringColumn; +use crate::types::string::StringColumnBuilder; +use crate::types::variant::JSONB_NULL; +use crate::Column; +use crate::Scalar; +use crate::TypeDeserializer; + +pub struct VariantDeserializer { + pub buffer: Vec, + pub builder: StringColumnBuilder, +} + +impl VariantDeserializer { + pub fn create() -> Self { + Self { + buffer: Vec::new(), + builder: StringColumnBuilder::with_capacity(0, 0), + } + } +} + +impl TypeDeserializer for VariantDeserializer { + fn memory_size(&self) -> usize { + self.builder.data.len() * std::mem::size_of::() + + self.builder.offsets.len() * std::mem::size_of::() + } + + fn de_default(&mut self, _format: &FormatSettings) { + self.builder.put(JSONB_NULL); + self.builder.commit_row(); + } + + fn append_data_value(&mut self, value: Scalar, _format: &FormatSettings) -> Result<(), String> { + let v = value + .as_variant() + .ok_or_else(|| "Unable to get variant value".to_string())?; + self.builder.put(v.as_slice()); + self.builder.commit_row(); + Ok(()) + } + + fn pop_data_value(&mut self) -> Result { + match self.builder.pop() { + Some(v) => Ok(Scalar::Variant(v.to_vec())), + None => Err("Variant column is empty when pop data value".to_string()), + } + } + + fn finish_to_column(&mut self) -> Column { + let col = StringColumn { + data: std::mem::take(&mut self.builder.data).into(), + offsets: std::mem::take(&mut self.builder.offsets).into(), + }; + Column::Variant(col) + } +} diff --git a/src/query/expression/src/lib.rs b/src/query/expression/src/lib.rs index 8e6db7389de95..ee58d5a4a7545 100755 --- a/src/query/expression/src/lib.rs +++ b/src/query/expression/src/lib.rs @@ -33,6 +33,7 @@ pub type Result = std::result::Result; mod chunk; pub mod converts; +mod deserializations; mod evaluator; mod expression; mod function; @@ -46,6 +47,7 @@ pub mod utils; pub mod values; pub use crate::chunk::*; +pub use crate::deserializations::TypeDeserializer; pub use crate::evaluator::*; pub use crate::expression::*; pub use crate::function::*; diff --git a/src/query/expression/src/serializations/array.rs b/src/query/expression/src/serializations/array.rs index 436ce64f7d371..4fef6a4fb27d6 100644 --- a/src/query/expression/src/serializations/array.rs +++ b/src/query/expression/src/serializations/array.rs @@ -13,7 +13,6 @@ // limitations under the License. use common_arrow::arrow::buffer::Buffer; -use common_exception::Result; use common_io::prelude::FormatSettings; use crate::types::DataType; diff --git a/src/query/expression/src/serializations/date.rs b/src/query/expression/src/serializations/date.rs index 4026e536ccedc..909d720d7bee6 100644 --- a/src/query/expression/src/serializations/date.rs +++ b/src/query/expression/src/serializations/date.rs @@ -17,7 +17,6 @@ use std::ops::AddAssign; use chrono::Duration; use chrono::NaiveDate; use common_arrow::arrow::buffer::Buffer; -use common_exception::Result; use common_io::prelude::FormatSettings; use crate::Column; diff --git a/src/query/expression/src/types.rs b/src/query/expression/src/types.rs index d8a621fc61122..37dc533f62e24 100755 --- a/src/query/expression/src/types.rs +++ b/src/query/expression/src/types.rs @@ -48,6 +48,16 @@ pub use self::number::NumberType; pub use self::string::StringType; pub use self::timestamp::TimestampType; pub use self::variant::VariantType; +use crate::deserializations::BooleanDeserializer; +use crate::deserializations::DateDeserializer; +use crate::deserializations::EmptyArrayDeserializer; +use crate::deserializations::NullDeserializer; +use crate::deserializations::NullableDeserializer; +use crate::deserializations::NumberDeserializer; +use crate::deserializations::StringDeserializer; +use crate::deserializations::TimestampDeserializer; +use crate::deserializations::TypeDeserializer; +use crate::deserializations::VariantDeserializer; use crate::property::Domain; use crate::serializations::ArraySerializer; use crate::serializations::BooleanSerializer; @@ -125,9 +135,8 @@ impl DataType { DataType::String => Ok(Box::new(StringSerializer::try_create(column)?)), DataType::Number(num_ty) => { with_number_mapped_type!(|NUM_TYPE| match num_ty { - NumberDataType::NUM_TYPE => Ok(Box::new( - NumberSerializer::::try_create(column).unwrap() - )), + NumberDataType::NUM_TYPE => + Ok(Box::new(NumberSerializer::::try_create(column)?)), }) } DataType::Date => Ok(Box::new(DateSerializer::try_create(column)?)), @@ -146,6 +155,25 @@ impl DataType { _ => unreachable!(), } } + + pub fn create_deserializer(&self) -> Box { + match self { + DataType::Null => Box::new(NullDeserializer::create()), + DataType::Boolean => Box::new(BooleanDeserializer::create()), + DataType::String => Box::new(StringDeserializer::create()), + DataType::Number(num_ty) => { + with_number_mapped_type!(|NUM_TYPE| match num_ty { + NumberDataType::NUM_TYPE => Box::new(NumberDeserializer::::create()), + }) + } + DataType::Date => Box::new(DateDeserializer::create()), + DataType::Timestamp => Box::new(TimestampDeserializer::create()), + DataType::Nullable(inner_ty) => Box::new(NullableDeserializer::create(inner_ty)), + DataType::EmptyArray => Box::new(EmptyArrayDeserializer::create()), + DataType::Variant => Box::new(VariantDeserializer::create()), + _ => todo!(), + } + } } pub trait ValueType: Debug + Clone + PartialEq + Sized + 'static { diff --git a/src/query/expression/src/types/number.rs b/src/query/expression/src/types/number.rs index 2b6caac9e0467..2e17a11dd8cba 100644 --- a/src/query/expression/src/types/number.rs +++ b/src/query/expression/src/types/number.rs @@ -549,6 +549,17 @@ impl NumberColumnBuilder { NumberColumnBuilder::NUM_TYPE(builder) => NumberScalar::NUM_TYPE(builder[0]), }) } + + pub fn pop(&mut self) -> Option { + crate::with_number_type!(|NUM_TYPE| match self { + NumberColumnBuilder::NUM_TYPE(builder) => { + match builder.pop() { + Some(num) => Some(NumberScalar::NUM_TYPE(num)), + None => None, + } + } + }) + } } impl SimpleDomain { diff --git a/src/query/expression/src/types/string.rs b/src/query/expression/src/types/string.rs index 75f54f08e24d2..1b59e77797798 100644 --- a/src/query/expression/src/types/string.rs +++ b/src/query/expression/src/types/string.rs @@ -334,6 +334,18 @@ impl StringColumnBuilder { // soundness: the invariant of the struct self.data.get_unchecked(start..end) } + + pub fn pop(&mut self) -> Option> { + if self.len() > 0 { + let index = self.len() - 1; + let start = unsafe { *self.offsets.get_unchecked(index) as usize }; + self.offsets.pop(); + let val = self.data.split_off(start); + Some(val) + } else { + None + } + } } impl<'a> FromIterator<&'a [u8]> for StringColumnBuilder { From fd89b56ed209da8eff7a7dadb5c18290077d9c0a Mon Sep 17 00:00:00 2001 From: baishen Date: Fri, 4 Nov 2022 19:32:09 +0800 Subject: [PATCH 2/2] fix arrow2 --- Cargo.lock | 2 +- src/common/arrow/Cargo.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 6925eea37da62..59d708f7d8ba3 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -164,7 +164,7 @@ dependencies = [ [[package]] name = "arrow2" version = "0.14.2" -source = "git+https://github.com/RinChanNOWWW/arrow2?rev=8bd6417#8bd6417392a44496a2f9ca649f9cd2aa65dcc277" +source = "git+https://github.com/jorgecarleitao/arrow2?rev=562de6a#562de6a6ed961eff46e7db752b384583886a5e5d" dependencies = [ "ahash 0.7.6", "arrow-format", diff --git a/src/common/arrow/Cargo.toml b/src/common/arrow/Cargo.toml index cda84464a9365..1cf6cd679bfd8 100644 --- a/src/common/arrow/Cargo.toml +++ b/src/common/arrow/Cargo.toml @@ -34,7 +34,7 @@ simd = ["arrow/simd"] # Workspace dependencies # Crates.io dependencies -arrow = { package = "arrow2", git = "https://github.com/RinChanNOWWW/arrow2", rev = "8bd6417", default-features = false, features = [ +arrow = { package = "arrow2", git = "https://github.com/jorgecarleitao/arrow2", rev = "562de6a", default-features = false, features = [ "io_parquet", "io_parquet_compression", ] }