diff --git a/src/query/codegen/src/writes/register.rs b/src/query/codegen/src/writes/register.rs index a971a4a507f4..c400ef24e26d 100644 --- a/src/query/codegen/src/writes/register.rs +++ b/src/query/codegen/src/writes/register.rs @@ -230,7 +230,6 @@ pub fn codegen_register() { .map(|n| n + 1) .map(|n| format!("value{n}")) .join(","); - let n_widecards = "_,".repeat(n_args); let any_arg_has_null = (0..n_args) .map(|n| n + 1) .map(|n| format!("arg{n}.has_null")) @@ -262,7 +261,7 @@ pub fn codegen_register() { self.register_{n_args}_arg_core::<{arg_generics} NullableType, _, _>( name, property.clone(), - |{n_widecards}| None, + calc_domain, func ); diff --git a/src/query/expression/src/evaluator.rs b/src/query/expression/src/evaluator.rs index 13d7c89dfcfe..166ca46a9350 100644 --- a/src/query/expression/src/evaluator.rs +++ b/src/query/expression/src/evaluator.rs @@ -17,9 +17,7 @@ use std::sync::Mutex; use chrono_tz::Tz; use common_arrow::arrow::bitmap; -use common_arrow::arrow::bitmap::MutableBitmap; use itertools::Itertools; -use num_traits::AsPrimitive; use crate::chunk::Chunk; use crate::expression::Expr; @@ -28,36 +26,33 @@ use crate::function::FunctionContext; use crate::property::Domain; use crate::types::any::AnyType; use crate::types::array::ArrayColumn; -use crate::types::date::date_to_string; use crate::types::nullable::NullableColumn; use crate::types::nullable::NullableDomain; -use crate::types::number::NumberColumn; use crate::types::number::NumberDataType; -use crate::types::number::NumberDomain; -use crate::types::number::NumberScalar; -use crate::types::number::SimpleDomain; -use crate::types::string::StringColumnBuilder; -use crate::types::timestamp::timestamp_to_string; -use crate::types::variant::cast_scalar_to_variant; -use crate::types::variant::cast_scalars_to_variants; use crate::types::DataType; use crate::utils::arrow::constant_bitmap; +use crate::utils::calculate_function_domain; +use crate::utils::eval_function; use crate::values::Column; use crate::values::ColumnBuilder; use crate::values::Scalar; use crate::values::Value; -use crate::with_number_type; +use crate::FunctionRegistry; use crate::Result; -use crate::ScalarRef; pub struct Evaluator<'a> { input_columns: &'a Chunk, tz: Tz, + fn_registry: &'a FunctionRegistry, } impl<'a> Evaluator<'a> { - pub fn new(input_columns: &'a Chunk, tz: Tz) -> Self { - Evaluator { input_columns, tz } + pub fn new(input_columns: &'a Chunk, tz: Tz, fn_registry: &'a FunctionRegistry) -> Self { + Evaluator { + input_columns, + tz, + fn_registry, + } } pub fn run(&self, expr: &Expr) -> Result> { @@ -93,40 +88,15 @@ impl<'a> Evaluator<'a> { } Expr::Cast { span, + is_try, expr, dest_type, } => { let value = self.run(expr)?; - match value { - Value::Scalar(scalar) => Ok(Value::Scalar(self.run_cast_scalar( - span.clone(), - scalar, - dest_type, - )?)), - Value::Column(col) => Ok(Value::Column(self.run_cast_column( - span.clone(), - col, - dest_type, - )?)), - } - } - Expr::TryCast { - span, - expr, - dest_type, - } => { - let value = self.run(expr)?; - match value { - Value::Scalar(scalar) => Ok(Value::Scalar(self.run_try_cast_scalar( - span.clone(), - scalar, - dest_type, - ))), - Value::Column(col) => Ok(Value::Column(self.run_try_cast_column( - span.clone(), - col, - dest_type, - ))), + if *is_try { + Ok(self.run_try_cast(span.clone(), expr.data_type(), dest_type, value)) + } else { + self.run_cast(span.clone(), expr.data_type(), dest_type, value) } } }; @@ -137,7 +107,7 @@ impl<'a> Evaluator<'a> { if !*RECURSING.lock().unwrap() { *RECURSING.lock().unwrap() = true; assert_eq!( - ConstantFolder::new(&self.input_columns.domains(), self.tz) + ConstantFolder::new(&self.input_columns.domains(), self.tz, self.fn_registry) .fold(expr) .1, None, @@ -150,638 +120,466 @@ impl<'a> Evaluator<'a> { result } - pub fn run_cast_scalar( + fn run_cast( &self, span: Span, - scalar: Scalar, + src_type: &DataType, dest_type: &DataType, - ) -> Result { - match (scalar, dest_type) { - (Scalar::Null, DataType::Nullable(_)) => Ok(Scalar::Null), - (Scalar::EmptyArray, DataType::Array(dest_ty)) => { - let new_column = ColumnBuilder::with_capacity(dest_ty, 0).build(); - Ok(Scalar::Array(new_column)) - } - (scalar, DataType::Nullable(dest_ty)) => self.run_cast_scalar(span, scalar, dest_ty), - (Scalar::Array(array), DataType::Array(dest_ty)) => { - let new_array = self.run_cast_column(span, array, dest_ty)?; - Ok(Scalar::Array(new_array)) - } - (Scalar::Tuple(fields), DataType::Tuple(fields_ty)) => { - let new_fields = fields - .into_iter() - .zip(fields_ty.iter()) - .map(|(field, dest_ty)| self.run_cast_scalar(span.clone(), field, dest_ty)) - .collect::>>()?; - Ok(Scalar::Tuple(new_fields)) - } - (scalar, DataType::Variant) => { - let mut buf = Vec::new(); - cast_scalar_to_variant(scalar.as_ref(), self.tz, &mut buf); - Ok(Scalar::Variant(buf)) - } + value: Value, + ) -> Result> { + if src_type == dest_type { + return Ok(value); + } - (Scalar::Number(num), DataType::Number(dest_ty)) => { - let new_number = with_number_type!(|SRC_TYPE| match num { - NumberScalar::SRC_TYPE(value) => { - with_number_type!(|DEST_TYPE| match dest_ty { - NumberDataType::DEST_TYPE => { - if NumberDataType::SRC_TYPE.can_lossless_cast_to(*dest_ty) { - NumberScalar::DEST_TYPE(value.as_()) - } else { - let value = num_traits::cast::cast(value).ok_or_else(|| { - ( - span.clone(), - format!( - "unable to cast {} to {}", - ScalarRef::Number(num), - stringify!(DEST_TYPE) - ), - ) - })?; - NumberScalar::DEST_TYPE(value) - } - } - }) + match (src_type, dest_type) { + (DataType::Null, DataType::Nullable(_)) => match value { + Value::Scalar(Scalar::Null) => Ok(Value::Scalar(Scalar::Null)), + Value::Column(Column::Null { len }) => { + let mut builder = ColumnBuilder::with_capacity(dest_type, len); + for _ in 0..len { + builder.push_default(); } - }); - Ok(Scalar::Number(new_number)) - } + Ok(Value::Column(builder.build())) + } + _ => unreachable!(), + }, + (DataType::Nullable(inner_src_ty), DataType::Nullable(inner_dest_ty)) => match value { + Value::Scalar(Scalar::Null) => Ok(Value::Scalar(Scalar::Null)), + Value::Scalar(_) => self.run_cast(span, inner_src_ty, inner_dest_ty, value), + Value::Column(Column::Nullable(col)) => { + let column = self + .run_cast(span, inner_src_ty, inner_dest_ty, Value::Column(col.column))? + .into_column() + .unwrap(); + Ok(Value::Column(Column::Nullable(Box::new(NullableColumn { + column, + validity: col.validity, + })))) + } + _ => unreachable!(), + }, + (_, DataType::Nullable(inner_dest_ty)) => match value { + Value::Scalar(scalar) => { + self.run_cast(span, src_type, inner_dest_ty, Value::Scalar(scalar)) + } + Value::Column(col) => { + let column = self + .run_cast(span, src_type, inner_dest_ty, Value::Column(col))? + .into_column() + .unwrap(); + Ok(Value::Column(Column::Nullable(Box::new(NullableColumn { + validity: constant_bitmap(true, column.len()).into(), + column, + })))) + } + }, - (Scalar::Timestamp(value), DataType::Number(dest_ty)) => { - let new_number = with_number_type!(|DEST_TYPE| match dest_ty { - NumberDataType::DEST_TYPE => { - if NumberDataType::Int64.can_lossless_cast_to(*dest_ty) { - NumberScalar::DEST_TYPE(value.as_()) - } else { - let value = num_traits::cast::cast(value).ok_or_else(|| { - ( - span.clone(), - format!( - "unable to cast TimestampType to {}", - stringify!(DEST_TYPE) - ), - ) - })?; - NumberScalar::DEST_TYPE(value) - } + (DataType::EmptyArray, DataType::Array(inner_dest_ty)) => match value { + Value::Scalar(Scalar::EmptyArray) => { + let new_column = ColumnBuilder::with_capacity(inner_dest_ty, 0).build(); + Ok(Value::Scalar(Scalar::Array(new_column))) + } + Value::Column(Column::EmptyArray { len }) => { + let mut builder = ColumnBuilder::with_capacity(dest_type, len); + for _ in 0..len { + builder.push_default(); } - }); - Ok(Scalar::Number(new_number)) - } + Ok(Value::Column(builder.build())) + } + _ => unreachable!(), + }, + (DataType::Array(inner_src_ty), DataType::Array(inner_dest_ty)) => match value { + Value::Scalar(Scalar::Array(array)) => { + let new_array = self + .run_cast(span, inner_src_ty, inner_dest_ty, Value::Column(array))? + .into_column() + .unwrap(); + Ok(Value::Scalar(Scalar::Array(new_array))) + } + Value::Column(Column::Array(col)) => { + let new_col = self + .run_cast(span, inner_src_ty, inner_dest_ty, Value::Column(col.values))? + .into_column() + .unwrap(); + Ok(Value::Column(Column::Array(Box::new(ArrayColumn { + values: new_col, + offsets: col.offsets, + })))) + } + _ => unreachable!(), + }, + + (DataType::Tuple(fields_src_ty), DataType::Tuple(fields_dest_ty)) => match value { + Value::Scalar(Scalar::Tuple(fields)) => { + let new_fields = fields + .into_iter() + .zip(fields_src_ty.iter()) + .zip(fields_dest_ty.iter()) + .map(|((field, src_ty), dest_ty)| { + self.run_cast(span.clone(), src_ty, dest_ty, Value::Scalar(field)) + .map(|val| val.into_scalar().unwrap()) + }) + .collect::>>()?; + Ok(Value::Scalar(Scalar::Tuple(new_fields))) + } + Value::Column(Column::Tuple { fields, len }) => { + let new_fields = fields + .into_iter() + .zip(fields_src_ty.iter()) + .zip(fields_dest_ty.iter()) + .map(|((field, src_ty), dest_ty)| { + self.run_cast(span.clone(), src_ty, dest_ty, Value::Column(field)) + .map(|val| val.into_column().unwrap()) + }) + .collect::>()?; + Ok(Value::Column(Column::Tuple { + fields: new_fields, + len, + })) + } + _ => unreachable!(), + }, - (Scalar::Date(value), DataType::Number(dest_ty)) => { - let new_number = with_number_type!(|DEST_TYPE| match dest_ty { - NumberDataType::DEST_TYPE => { - if NumberDataType::Int32.can_lossless_cast_to(*dest_ty) { - NumberScalar::DEST_TYPE(value.as_()) - } else { - let value = num_traits::cast::cast(value).ok_or_else(|| { - ( - span.clone(), - format!("unable to cast DateType to {}", stringify!(DEST_TYPE)), - ) - })?; - NumberScalar::DEST_TYPE(value) - } - } - }); - Ok(Scalar::Number(new_number)) + (_, DataType::String) => { + self.run_simple_cast(span, src_type, dest_type, value, "to_string") } - - (Scalar::Timestamp(ts), DataType::String) => Ok(Scalar::String( - timestamp_to_string(ts, self.tz) - .to_string() - .as_bytes() - .to_vec(), - )), - - (Scalar::Date(d), DataType::String) => Ok(Scalar::String( - date_to_string(d, self.tz).to_string().as_bytes().to_vec(), - )), - - // identical types - (scalar @ Scalar::Null, DataType::Null) - | (scalar @ Scalar::EmptyArray, DataType::EmptyArray) - | (scalar @ Scalar::Boolean(_), DataType::Boolean) - | (scalar @ Scalar::String(_), DataType::String) - | (scalar @ Scalar::Timestamp(_), DataType::Timestamp) - | (scalar @ Scalar::Date(_), DataType::Date) => Ok(scalar), - - (scalar, dest_ty) => Err(( - span, - (format!("unable to cast {} to {dest_ty}", scalar.as_ref())), - )), - } - } - - #[allow(clippy::only_used_in_recursion)] - pub fn run_cast_column( - &self, - span: Span, - column: Column, - dest_type: &DataType, - ) -> Result { - match (column, dest_type) { - (Column::Null { len }, DataType::Nullable(_)) => { - let mut builder = ColumnBuilder::with_capacity(dest_type, len); - for _ in 0..len { - builder.push_default(); - } - Ok(builder.build()) + (_, DataType::Number(NumberDataType::UInt8)) => { + self.run_simple_cast(span, src_type, dest_type, value, "to_uint8") } - (Column::EmptyArray { len }, DataType::Array(_)) => { - let mut builder = ColumnBuilder::with_capacity(dest_type, len); - for _ in 0..len { - builder.push_default(); - } - Ok(builder.build()) + (_, DataType::Number(NumberDataType::UInt16)) => { + self.run_simple_cast(span, src_type, dest_type, value, "to_uint16") } - (Column::Nullable(box col), DataType::Nullable(dest_ty)) => { - let column = self.run_cast_column(span, col.column, dest_ty)?; - Ok(Column::Nullable(Box::new(NullableColumn { - column, - validity: col.validity, - }))) + (_, DataType::Number(NumberDataType::UInt32)) => { + self.run_simple_cast(span, src_type, dest_type, value, "to_uint32") } - (col, DataType::Nullable(dest_ty)) => { - let column = self.run_cast_column(span, col, dest_ty)?; - Ok(Column::Nullable(Box::new(NullableColumn { - validity: constant_bitmap(true, column.len()).into(), - column, - }))) + (_, DataType::Number(NumberDataType::UInt64)) => { + self.run_simple_cast(span, src_type, dest_type, value, "to_uint64") } - (Column::Array(col), DataType::Array(dest_ty)) => { - let values = self.run_cast_column(span, col.values, dest_ty)?; - Ok(Column::Array(Box::new(ArrayColumn { - values, - offsets: col.offsets, - }))) + (_, DataType::Number(NumberDataType::Int8)) => { + self.run_simple_cast(span, src_type, dest_type, value, "to_int8") } - (Column::Tuple { fields, len }, DataType::Tuple(fields_ty)) => { - let new_fields = fields - .into_iter() - .zip(fields_ty) - .map(|(field, field_ty)| self.run_cast_column(span.clone(), field, field_ty)) - .collect::>()?; - Ok(Column::Tuple { - fields: new_fields, - len, - }) + (_, DataType::Number(NumberDataType::Int16)) => { + self.run_simple_cast(span, src_type, dest_type, value, "to_int16") } - (col, DataType::Variant) => { - let new_col = Column::Variant(cast_scalars_to_variants(col.iter(), self.tz)); - Ok(new_col) + (_, DataType::Number(NumberDataType::Int32)) => { + self.run_simple_cast(span, src_type, dest_type, value, "to_int32") } - - (Column::Number(col), DataType::Number(dest_ty)) => { - let new_column = with_number_type!(|SRC_TYPE| match col { - NumberColumn::SRC_TYPE(col) => { - with_number_type!(|DEST_TYPE| match dest_ty { - NumberDataType::DEST_TYPE => { - if NumberDataType::SRC_TYPE.can_lossless_cast_to(*dest_ty) { - let new_col = col.iter().map(|x| x.as_()).collect::>(); - NumberColumn::DEST_TYPE(new_col.into()) - } else { - let mut new_col = Vec::with_capacity(col.len()); - for &val in col.iter() { - let new_val = - num_traits::cast::cast(val).ok_or_else(|| { - ( - span.clone(), - format!( - "unable to cast {} to {}", - val, - stringify!(DEST_TYPE) - ), - ) - })?; - new_col.push(new_val); - } - NumberColumn::DEST_TYPE(new_col.into()) - } - } - }) - } - }); - Ok(Column::Number(new_column)) + (_, DataType::Number(NumberDataType::Int64)) => { + self.run_simple_cast(span, src_type, dest_type, value, "to_int64") } - - (Column::Timestamp(col), DataType::Number(dest_ty)) => { - let new_column = with_number_type!(|DEST_TYPE| match dest_ty { - NumberDataType::DEST_TYPE => { - if NumberDataType::Int64.can_lossless_cast_to(*dest_ty) { - let new_col = col.iter().map(|x| x.as_()).collect::>(); - NumberColumn::DEST_TYPE(new_col.into()) - } else { - let mut new_col = Vec::with_capacity(col.len()); - for &val in col.iter() { - let new_val = num_traits::cast::cast(val).ok_or_else(|| { - ( - span.clone(), - format!("unable to cast TimestampType to {}", val), - ) - })?; - new_col.push(new_val); - } - NumberColumn::DEST_TYPE(new_col.into()) - } - } - }); - Ok(Column::Number(new_column)) + (_, DataType::Number(NumberDataType::Float32)) => { + self.run_simple_cast(span, src_type, dest_type, value, "to_float32") } - - (Column::Date(col), DataType::Number(dest_ty)) => { - let new_column = with_number_type!(|DEST_TYPE| match dest_ty { - NumberDataType::DEST_TYPE => { - if NumberDataType::Int32.can_lossless_cast_to(*dest_ty) { - let new_col = col.iter().map(|x| x.as_()).collect::>(); - NumberColumn::DEST_TYPE(new_col.into()) - } else { - let mut new_col = Vec::with_capacity(col.len()); - for &val in col.iter() { - let new_val = num_traits::cast::cast(val).ok_or_else(|| { - (span.clone(), format!("unable to cast DateType to {}", val)) - })?; - new_col.push(new_val); - } - NumberColumn::DEST_TYPE(new_col.into()) - } - } - }); - Ok(Column::Number(new_column)) + (_, DataType::Number(NumberDataType::Float64)) => { + self.run_simple_cast(span, src_type, dest_type, value, "to_float64") } - - (Column::Timestamp(col), DataType::String) => { - // We can get the data_capacity, so no need to use `from_iter`. - // "YYYY-mm-DD HH:MM:SS.ssssss" - let mut builder = StringColumnBuilder::with_capacity(col.len(), col.len() * 26); - for val in col.iter() { - let s = timestamp_to_string(*val, self.tz).to_string(); - builder.put_str(s.as_str()); - builder.commit_row(); - } - Ok(Column::String(builder.build())) + (_, DataType::Timestamp) => { + self.run_simple_cast(span, src_type, dest_type, value, "to_timestamp") } - - (Column::Date(col), DataType::String) => { - // We can get the data_capacity, so no need to use `from_iter`. - // "YYYY-mm-DD" - let mut builder = StringColumnBuilder::with_capacity(col.len(), col.len() * 10); - for &val in col.iter() { - let s = date_to_string(val, self.tz).to_string(); - builder.put_str(s.as_str()); - builder.commit_row(); - } - Ok(Column::String(builder.build())) + (_, DataType::Date) => { + self.run_simple_cast(span, src_type, dest_type, value, "to_date") + } + (_, DataType::Variant) => { + self.run_simple_cast(span, src_type, dest_type, value, "to_variant") } - // identical types - (col @ Column::Null { .. }, DataType::Null) - | (col @ Column::EmptyArray { .. }, DataType::EmptyArray) - | (col @ Column::Boolean(_), DataType::Boolean) - | (col @ Column::String { .. }, DataType::String) - | (col @ Column::Timestamp { .. }, DataType::Timestamp) - | (col @ Column::Date(_), DataType::Date) => Ok(col), - - (col, dest_ty) => Err((span, (format!("unable to cast {col:?} to {dest_ty}")))), + _ => Err((span, (format!("unable to cast {src_type} to {dest_type}")))), } } - pub fn run_try_cast_scalar(&self, span: Span, scalar: Scalar, dest_type: &DataType) -> Scalar { - let inner_type: &DataType = dest_type.as_nullable().unwrap(); - self.run_cast_scalar(span, scalar, inner_type) - .unwrap_or(Scalar::Null) - } + fn run_try_cast( + &self, + span: Span, + src_type: &DataType, + dest_type: &DataType, + value: Value, + ) -> Value { + if src_type == dest_type { + return value; + } - #[allow(clippy::only_used_in_recursion)] - pub fn run_try_cast_column(&self, span: Span, column: Column, dest_type: &DataType) -> Column { - let inner_type: &DataType = dest_type.as_nullable().unwrap(); - match (column, inner_type) { - (_, DataType::Null | DataType::Nullable(_)) => { - unreachable!("inner type can not be nullable") - } - (Column::Null { len }, _) => { - let mut builder = ColumnBuilder::with_capacity(dest_type, len); - for _ in 0..len { - builder.push_default(); - } - builder.build() - } - (Column::EmptyArray { len }, DataType::Array(_)) => { - let mut builder = ColumnBuilder::with_capacity(dest_type, len); - for _ in 0..len { - builder.push_default(); - } - builder.build() - } - (Column::Nullable(box col), _) => { - let new_col = *self - .run_try_cast_column(span, col.column, dest_type) - .into_nullable() - .unwrap(); - Column::Nullable(Box::new(NullableColumn { - column: new_col.column, - validity: bitmap::or(&col.validity, &new_col.validity), - })) - } - (Column::Array(col), DataType::Array(dest_ty)) => { - let new_values = self.run_try_cast_column(span, col.values, dest_ty); - let new_col = Column::Array(Box::new(ArrayColumn { - values: new_values, - offsets: col.offsets, - })); - Column::Nullable(Box::new(NullableColumn { - validity: constant_bitmap(true, new_col.len()).into(), - column: new_col, - })) - } - (Column::Tuple { fields, len }, DataType::Tuple(fields_ty)) => { - let new_fields = fields - .into_iter() - .zip(fields_ty) - .map(|(field, field_ty)| { - self.run_try_cast_column(span.clone(), field, field_ty) - }) - .collect(); - let new_col = Column::Tuple { - fields: new_fields, - len, - }; - Column::Nullable(Box::new(NullableColumn { - validity: constant_bitmap(true, len).into(), - column: new_col, - })) - } - (col, DataType::Variant) => { - let new_col = Column::Variant(cast_scalars_to_variants(col.iter(), self.tz)); - Column::Nullable(Box::new(NullableColumn { - validity: constant_bitmap(true, new_col.len()).into(), - column: new_col, - })) - } + // The dest_type of `TRY_CAST` must be `Nullable`, which is guaranteed by the type checker. + let inner_dest_type = &**dest_type.as_nullable().unwrap(); - (Column::Number(col), DataType::Number(dest_ty)) => { - with_number_type!(|SRC_TYPE| match &col { - NumberColumn::SRC_TYPE(col) => { - with_number_type!(|DEST_TYPE| match dest_ty { - NumberDataType::DEST_TYPE => { - if NumberDataType::SRC_TYPE.can_lossless_cast_to(*dest_ty) { - let new_col = col.iter().map(|x| x.as_()).collect::>(); - Column::Nullable(Box::new(NullableColumn { - validity: constant_bitmap(true, new_col.len()).into(), - column: Column::Number(NumberColumn::DEST_TYPE( - new_col.into(), - )), - })) - } else { - let mut new_col = Vec::with_capacity(col.len()); - let mut validity = MutableBitmap::with_capacity(col.len()); - for &val in col.iter() { - if let Some(new_val) = num_traits::cast::cast(val) { - new_col.push(new_val); - validity.push(true); - } else { - new_col.push(Default::default()); - validity.push(false); - } - } - Column::Nullable(Box::new(NullableColumn { - validity: validity.into(), - column: Column::Number(NumberColumn::DEST_TYPE( - new_col.into(), - )), - })) - } - } - }) + match (src_type, inner_dest_type) { + (DataType::Null, _) => match value { + Value::Scalar(Scalar::Null) => Value::Scalar(Scalar::Null), + Value::Column(Column::Null { len }) => { + let mut builder = ColumnBuilder::with_capacity(dest_type, len); + for _ in 0..len { + builder.push_default(); } - }) - } - - (Column::Timestamp(col), DataType::Number(dest_ty)) => { - with_number_type!(|DEST_TYPE| match dest_ty { - NumberDataType::DEST_TYPE => { - if NumberDataType::Int64.can_lossless_cast_to(*dest_ty) { - let new_col = col.iter().map(|x| x.as_()).collect::>(); - Column::Nullable(Box::new(NullableColumn { - validity: constant_bitmap(true, new_col.len()).into(), - column: Column::Number(NumberColumn::DEST_TYPE(new_col.into())), - })) - } else { - let mut new_col = Vec::with_capacity(col.len()); - let mut validity = MutableBitmap::with_capacity(col.len()); - for &val in col.iter() { - if let Some(new_val) = num_traits::cast::cast(val) { - new_col.push(new_val); - validity.push(true); - } else { - new_col.push(Default::default()); - validity.push(false); - } - } - Column::Nullable(Box::new(NullableColumn { - validity: validity.into(), - column: Column::Number(NumberColumn::DEST_TYPE(new_col.into())), - })) - } - } - }) - } + Value::Column(builder.build()) + } + _ => unreachable!(), + }, + (DataType::Nullable(inner_src_ty), _) => match value { + Value::Scalar(Scalar::Null) => Value::Scalar(Scalar::Null), + Value::Scalar(_) => self.run_try_cast(span, inner_src_ty, inner_dest_type, value), + Value::Column(Column::Nullable(col)) => { + let new_col = *self + .run_try_cast(span, inner_src_ty, dest_type, Value::Column(col.column)) + .into_column() + .unwrap() + .into_nullable() + .unwrap(); + Value::Column(Column::Nullable(Box::new(NullableColumn { + column: new_col.column, + validity: bitmap::or(&col.validity, &new_col.validity), + }))) + } + _ => unreachable!(), + }, - (Column::Date(col), DataType::Number(dest_ty)) => { - with_number_type!(|DEST_TYPE| match dest_ty { - NumberDataType::DEST_TYPE => { - if NumberDataType::Int32.can_lossless_cast_to(*dest_ty) { - let new_col = col.iter().map(|x| x.as_()).collect::>(); - Column::Nullable(Box::new(NullableColumn { - validity: constant_bitmap(true, new_col.len()).into(), - column: Column::Number(NumberColumn::DEST_TYPE(new_col.into())), - })) - } else { - let mut new_col = Vec::with_capacity(col.len()); - let mut validity = MutableBitmap::with_capacity(col.len()); - for &val in col.iter() { - if let Some(new_val) = num_traits::cast::cast(val) { - new_col.push(new_val); - validity.push(true); - } else { - new_col.push(Default::default()); - validity.push(false); - } - } - Column::Nullable(Box::new(NullableColumn { - validity: validity.into(), - column: Column::Number(NumberColumn::DEST_TYPE(new_col.into())), - })) - } + (DataType::EmptyArray, DataType::Array(inner_dest_ty)) => match value { + Value::Scalar(Scalar::EmptyArray) => { + let new_column = ColumnBuilder::with_capacity(inner_dest_ty, 0).build(); + Value::Scalar(Scalar::Array(new_column)) + } + Value::Column(Column::EmptyArray { len }) => { + let mut builder = ColumnBuilder::with_capacity(dest_type, len); + for _ in 0..len { + builder.push_default(); } - }) - } - - (Column::Timestamp(col), DataType::String) => { - // We can get the data_capacity, so no need to use `from_iter`. - // "YYYY-mm-DD HH:MM:SS.ssssss" - let mut builder = StringColumnBuilder::with_capacity(col.len(), col.len() * 26); - for val in col.iter() { - let s = timestamp_to_string(*val, self.tz).to_string(); - builder.put_str(s.as_str()); - builder.commit_row(); + Value::Column(builder.build()) } - let new_col = builder.build(); - Column::Nullable(Box::new(NullableColumn { - validity: constant_bitmap(true, col.len()).into(), - column: Column::String(new_col), - })) - } - - (Column::Date(col), DataType::String) => { - // We can get the data_capacity, so no need to use `from_iter`. - // "YYYY-mm-DD" - let mut builder = StringColumnBuilder::with_capacity(col.len(), col.len() * 10); - for &val in col.iter() { - let s = date_to_string(val, self.tz).to_string(); - builder.put_str(s.as_str()); - builder.commit_row(); + _ => unreachable!(), + }, + (DataType::Array(inner_src_ty), DataType::Array(inner_dest_ty)) => match value { + Value::Scalar(Scalar::Array(array)) => { + let new_array = self + .run_try_cast(span, inner_src_ty, inner_dest_ty, Value::Column(array)) + .into_column() + .unwrap(); + Value::Scalar(Scalar::Array(new_array)) } - let new_col = builder.build(); - Column::Nullable(Box::new(NullableColumn { - validity: constant_bitmap(true, col.len()).into(), - column: Column::String(new_col), - })) - } - - // identical types - (column @ Column::Boolean(_), DataType::Boolean) - | (column @ Column::String { .. }, DataType::String) - | (column @ Column::EmptyArray { .. }, DataType::EmptyArray) - | (column @ Column::Timestamp { .. }, DataType::Timestamp) - | (column @ Column::Date(_), DataType::Date) => { - Column::Nullable(Box::new(NullableColumn { - validity: constant_bitmap(true, column.len()).into(), - column, - })) - } - - // failure cases - (col, _) => { - let len = col.len(); - let mut builder = ColumnBuilder::with_capacity(dest_type, len); - for _ in 0..len { - builder.push_default(); + Value::Column(Column::Array(col)) => { + let new_values = self + .run_try_cast(span, inner_src_ty, inner_dest_ty, Value::Column(col.values)) + .into_column() + .unwrap(); + let new_col = Column::Array(Box::new(ArrayColumn { + values: new_values, + offsets: col.offsets, + })); + Value::Column(Column::Nullable(Box::new(NullableColumn { + validity: constant_bitmap(true, new_col.len()).into(), + column: new_col, + }))) } - builder.build() - } + _ => unreachable!(), + }, + + (DataType::Tuple(fields_src_ty), DataType::Tuple(fields_dest_ty)) => match value { + Value::Scalar(Scalar::Tuple(fields)) => { + let new_fields = fields + .into_iter() + .zip(fields_src_ty.iter()) + .zip(fields_dest_ty.iter()) + .map(|((field, src_ty), dest_ty)| { + self.run_try_cast(span.clone(), src_ty, dest_ty, Value::Scalar(field)) + .into_scalar() + .unwrap() + }) + .collect::>(); + Value::Scalar(Scalar::Tuple(new_fields)) + } + Value::Column(Column::Tuple { fields, len }) => { + let new_fields = fields + .into_iter() + .zip(fields_src_ty.iter()) + .zip(fields_dest_ty.iter()) + .map(|((field, src_ty), dest_ty)| { + self.run_try_cast(span.clone(), src_ty, dest_ty, Value::Column(field)) + .into_column() + .unwrap() + }) + .collect(); + let new_col = Column::Tuple { + fields: new_fields, + len, + }; + Value::Column(new_col) + } + _ => unreachable!(), + }, + + (_, DataType::String) => self + .run_simple_cast(span, src_type, dest_type, value, "try_to_string") + .unwrap(), + (_, DataType::Number(NumberDataType::UInt8)) => self + .run_simple_cast(span, src_type, dest_type, value, "try_to_uint8") + .unwrap(), + (_, DataType::Number(NumberDataType::UInt16)) => self + .run_simple_cast(span, src_type, dest_type, value, "try_to_uint16") + .unwrap(), + (_, DataType::Number(NumberDataType::UInt32)) => self + .run_simple_cast(span, src_type, dest_type, value, "try_to_uint32") + .unwrap(), + (_, DataType::Number(NumberDataType::UInt64)) => self + .run_simple_cast(span, src_type, dest_type, value, "try_to_uint64") + .unwrap(), + (_, DataType::Number(NumberDataType::Int8)) => self + .run_simple_cast(span, src_type, dest_type, value, "try_to_int8") + .unwrap(), + (_, DataType::Number(NumberDataType::Int16)) => self + .run_simple_cast(span, src_type, dest_type, value, "try_to_int16") + .unwrap(), + (_, DataType::Number(NumberDataType::Int32)) => self + .run_simple_cast(span, src_type, dest_type, value, "try_to_int32") + .unwrap(), + (_, DataType::Number(NumberDataType::Int64)) => self + .run_simple_cast(span, src_type, dest_type, value, "try_to_int64") + .unwrap(), + (_, DataType::Number(NumberDataType::Float32)) => self + .run_simple_cast(span, src_type, dest_type, value, "try_to_float32") + .unwrap(), + (_, DataType::Number(NumberDataType::Float64)) => self + .run_simple_cast(span, src_type, dest_type, value, "try_to_float64") + .unwrap(), + (_, DataType::Timestamp) => self + .run_simple_cast(span, src_type, dest_type, value, "try_to_timestamp") + .unwrap(), + (_, DataType::Date) => self + .run_simple_cast(span, src_type, dest_type, value, "try_to_date") + .unwrap(), + (_, DataType::Variant) => self + .run_simple_cast(span, src_type, dest_type, value, "try_to_variant") + .unwrap(), + + _ => match value { + Value::Scalar(_) => Value::Scalar(Scalar::Null), + Value::Column(col) => { + let mut builder = ColumnBuilder::with_capacity(dest_type, col.len()); + for _ in 0..col.len() { + builder.push_default(); + } + Value::Column(builder.build()) + } + }, } } + + fn run_simple_cast( + &self, + span: Span, + src_type: &DataType, + dest_type: &DataType, + value: Value, + cast_fn: &str, + ) -> Result> { + let num_rows = match &value { + Value::Scalar(_) => 1, + Value::Column(col) => col.len(), + }; + let (val, ty) = eval_function( + span, + cast_fn, + [(value, src_type.clone())], + self.tz, + num_rows, + self.fn_registry, + )?; + assert_eq!(&ty, dest_type); + Ok(val) + } } pub struct ConstantFolder<'a> { input_domains: &'a [Domain], tz: Tz, + fn_registry: &'a FunctionRegistry, } impl<'a> ConstantFolder<'a> { - pub fn new(input_domains: &'a [Domain], tz: Tz) -> Self { - ConstantFolder { input_domains, tz } + pub fn new(input_domains: &'a [Domain], tz: Tz, fn_registry: &'a FunctionRegistry) -> Self { + ConstantFolder { + input_domains, + tz, + fn_registry, + } } pub fn fold(&self, expr: &Expr) -> (Expr, Option) { - match expr { + let (new_expr, domain) = match expr { Expr::Constant { scalar, .. } => (expr.clone(), Some(scalar.as_ref().domain())), - Expr::ColumnRef { span, id } => { + Expr::ColumnRef { + span, + id, + data_type, + } => { let domain = &self.input_domains[*id]; let expr = domain .as_singleton() .map(|scalar| Expr::Constant { span: span.clone(), scalar, + data_type: data_type.clone(), }) .unwrap_or_else(|| expr.clone()); (expr, Some(domain.clone())) } Expr::Cast { span, + is_try, expr, dest_type, } => { let (inner_expr, inner_domain) = self.fold(expr); - let cast_domain = inner_domain.and_then(|inner_domain| { - self.calculate_cast(span.clone(), &inner_domain, dest_type) - }); + + let new_domain = if *is_try { + inner_domain.and_then(|inner_domain| { + self.calculate_try_cast( + span.clone(), + expr.data_type(), + dest_type, + &inner_domain, + ) + }) + } else { + inner_domain.and_then(|inner_domain| { + self.calculate_cast( + span.clone(), + expr.data_type(), + dest_type, + &inner_domain, + ) + }) + }; let cast_expr = Expr::Cast { span: span.clone(), + is_try: *is_try, expr: Box::new(inner_expr.clone()), dest_type: dest_type.clone(), }; if inner_expr.as_constant().is_some() { let chunk = Chunk::empty(); - let evaluator = Evaluator::new(&chunk, self.tz); + let evaluator = Evaluator::new(&chunk, self.tz, self.fn_registry); if let Ok(Value::Scalar(scalar)) = evaluator.run(&cast_expr) { return ( Expr::Constant { span: span.clone(), scalar, + data_type: dest_type.clone(), }, - cast_domain, + new_domain, ); } } ( - cast_domain + new_domain .as_ref() .and_then(Domain::as_singleton) .map(|scalar| Expr::Constant { span: span.clone(), scalar, + data_type: dest_type.clone(), }) .unwrap_or(cast_expr), - cast_domain, - ) - } - Expr::TryCast { - span, - expr, - dest_type, - } => { - let (inner_expr, inner_domain) = self.fold(expr); - let try_cast_domain = inner_domain.map(|inner_domain| { - self.calculate_try_cast(span.clone(), &inner_domain, dest_type) - }); - - let try_cast_expr = Expr::TryCast { - span: span.clone(), - expr: Box::new(inner_expr.clone()), - dest_type: dest_type.clone(), - }; - - if inner_expr.as_constant().is_some() { - let chunk = Chunk::empty(); - let evaluator = Evaluator::new(&chunk, self.tz); - if let Ok(Value::Scalar(scalar)) = evaluator.run(&try_cast_expr) { - return ( - Expr::Constant { - span: span.clone(), - scalar, - }, - try_cast_domain, - ); - } - } - - ( - try_cast_domain - .as_ref() - .and_then(Domain::as_singleton) - .map(|scalar| Expr::Constant { - span: span.clone(), - scalar, - }) - .unwrap_or(try_cast_expr), - try_cast_domain, + new_domain, ) } Expr::FunctionCall { @@ -790,6 +588,7 @@ impl<'a> ConstantFolder<'a> { function, generics, args, + return_type, } => { let (mut args_expr, mut args_domain) = (Vec::new(), Some(Vec::new())); for arg in args { @@ -809,6 +608,7 @@ impl<'a> ConstantFolder<'a> { Expr::Constant { span: span.clone(), scalar, + data_type: return_type.clone(), }, func_domain, ); @@ -820,16 +620,18 @@ impl<'a> ConstantFolder<'a> { function: function.clone(), generics: generics.clone(), args: args_expr, + return_type: return_type.clone(), }; if all_args_is_scalar { let chunk = Chunk::empty(); - let evaluator = Evaluator::new(&chunk, self.tz); + let evaluator = Evaluator::new(&chunk, self.tz, self.fn_registry); if let Ok(Value::Scalar(scalar)) = evaluator.run(&func_expr) { return ( Expr::Constant { span: span.clone(), scalar, + data_type: return_type.clone(), }, func_domain, ); @@ -838,236 +640,260 @@ impl<'a> ConstantFolder<'a> { (func_expr, func_domain) } - } + }; + + debug_assert_eq!(expr.data_type(), new_expr.data_type()); + + (new_expr, domain) } - #[allow(clippy::only_used_in_recursion)] - pub fn calculate_cast( + fn calculate_cast( &self, span: Span, - domain: &Domain, + src_type: &DataType, dest_type: &DataType, + domain: &Domain, ) -> Option { - match (domain, dest_type) { - ( - Domain::Nullable(NullableDomain { value: None, .. }), - DataType::Null | DataType::Nullable(_), - ) => Some(domain.clone()), - (Domain::Array(None), DataType::EmptyArray | DataType::Array(_)) => { - Some(Domain::Array(None)) + if src_type == dest_type { + return Some(domain.clone()); + } + + match (src_type, dest_type) { + (DataType::Null, DataType::Nullable(_)) => Some(domain.clone()), + (DataType::Nullable(inner_src_ty), DataType::Nullable(inner_dest_ty)) => { + let domain = domain.as_nullable().unwrap(); + let value = match &domain.value { + Some(value) => Some(Box::new(self.calculate_cast( + span, + inner_src_ty, + inner_dest_ty, + value, + )?)), + None => None, + }; + Some(Domain::Nullable(NullableDomain { + has_null: domain.has_null, + value, + })) } - ( - Domain::Nullable(NullableDomain { - has_null, - value: Some(value), - }), - DataType::Nullable(ty), - ) => Some(Domain::Nullable(NullableDomain { - has_null: *has_null, - value: Some(Box::new(self.calculate_cast(span, value, ty)?)), - })), - (domain, DataType::Nullable(ty)) => Some(Domain::Nullable(NullableDomain { + (_, DataType::Nullable(inner_dest_ty)) => Some(Domain::Nullable(NullableDomain { has_null: false, - value: Some(Box::new(self.calculate_cast(span, domain, ty)?)), + value: Some(Box::new(self.calculate_cast( + span, + src_type, + inner_dest_ty, + domain, + )?)), })), - (Domain::Array(Some(domain)), DataType::Array(ty)) => Some(Domain::Array(Some( - Box::new(self.calculate_cast(span, domain, ty)?), - ))), - (Domain::Tuple(fields), DataType::Tuple(fields_ty)) => Some(Domain::Tuple( - fields - .iter() - .zip(fields_ty) - .map(|(field, ty)| self.calculate_cast(span.clone(), field, ty)) - .collect::>>()?, - )), - (_, DataType::Variant) => Some(Domain::Undefined), - (Domain::Number(domain), DataType::Number(dest_ty)) => { - with_number_type!(|SRC_TYPE| match domain { - NumberDomain::SRC_TYPE(domain) => { - with_number_type!(|DEST_TYPE| match dest_ty { - NumberDataType::DEST_TYPE => { - let (domain, overflowing) = domain.overflow_cast(); - if overflowing { - None - } else { - Some(Domain::Number(NumberDomain::DEST_TYPE(domain))) - } - } + (DataType::EmptyArray, DataType::Array(_)) => Some(domain.clone()), + (DataType::Array(inner_src_ty), DataType::Array(inner_dest_ty)) => { + let inner_domain = match domain.as_array().unwrap() { + Some(inner_domain) => Some(Box::new(self.calculate_cast( + span, + inner_src_ty, + inner_dest_ty, + inner_domain, + )?)), + None => None, + }; + Some(Domain::Array(inner_domain)) + } + + (DataType::Tuple(fields_src_ty), DataType::Tuple(fields_dest_ty)) => { + Some(Domain::Tuple( + domain + .as_tuple() + .unwrap() + .iter() + .zip(fields_src_ty) + .zip(fields_dest_ty) + .map(|((field_domain, src_ty), dest_ty)| { + self.calculate_cast(span.clone(), src_ty, dest_ty, field_domain) }) - } - }) - } - - (Domain::Timestamp(domain), DataType::Number(dest_ty)) => { - with_number_type!(|DEST_TYPE| match dest_ty { - NumberDataType::DEST_TYPE => { - let simple_domain = SimpleDomain { - min: domain.min, - max: domain.max, - }; - let (domain, overflowing) = simple_domain.overflow_cast(); - if overflowing { - None - } else { - Some(Domain::Number(NumberDomain::DEST_TYPE(domain))) - } - } - }) - } - - (Domain::Date(domain), DataType::Number(dest_ty)) => { - with_number_type!(|DEST_TYPE| match dest_ty { - NumberDataType::DEST_TYPE => { - let (domain, overflowing) = domain.overflow_cast(); - if overflowing { - None - } else { - Some(Domain::Number(NumberDomain::DEST_TYPE(domain))) - } - } - }) - } + .collect::>>()?, + )) + } + + (_, DataType::String) => self + .calculate_simple_cast(span, src_type, dest_type, domain, "to_string") + .unwrap(), + (_, DataType::Number(NumberDataType::UInt8)) => self + .calculate_simple_cast(span, src_type, dest_type, domain, "to_uint8") + .unwrap(), + (_, DataType::Number(NumberDataType::UInt16)) => self + .calculate_simple_cast(span, src_type, dest_type, domain, "to_uint16") + .unwrap(), + (_, DataType::Number(NumberDataType::UInt32)) => self + .calculate_simple_cast(span, src_type, dest_type, domain, "to_uint32") + .unwrap(), + (_, DataType::Number(NumberDataType::UInt64)) => self + .calculate_simple_cast(span, src_type, dest_type, domain, "to_uint64") + .unwrap(), + (_, DataType::Number(NumberDataType::Int8)) => self + .calculate_simple_cast(span, src_type, dest_type, domain, "to_int8") + .unwrap(), + (_, DataType::Number(NumberDataType::Int16)) => self + .calculate_simple_cast(span, src_type, dest_type, domain, "to_int16") + .unwrap(), + (_, DataType::Number(NumberDataType::Int32)) => self + .calculate_simple_cast(span, src_type, dest_type, domain, "to_int32") + .unwrap(), + (_, DataType::Number(NumberDataType::Int64)) => self + .calculate_simple_cast(span, src_type, dest_type, domain, "to_int64") + .unwrap(), + (_, DataType::Number(NumberDataType::Float32)) => self + .calculate_simple_cast(span, src_type, dest_type, domain, "to_float32") + .unwrap(), + (_, DataType::Number(NumberDataType::Float64)) => self + .calculate_simple_cast(span, src_type, dest_type, domain, "to_float64") + .unwrap(), + (_, DataType::Timestamp) => self + .calculate_simple_cast(span, src_type, dest_type, domain, "to_timestamp") + .unwrap(), + (_, DataType::Date) => self + .calculate_simple_cast(span, src_type, dest_type, domain, "to_date") + .unwrap(), + (_, DataType::Variant) => self + .calculate_simple_cast(span, src_type, dest_type, domain, "to_variant") + .unwrap(), - // identical types - (Domain::Boolean(_), DataType::Boolean) - | (Domain::String(_), DataType::String) - | (Domain::Timestamp(_), DataType::Timestamp) - | (Domain::Date(_), DataType::Date) => Some(domain.clone()), - - // failure cases _ => None, } } - #[allow(clippy::only_used_in_recursion)] - pub fn calculate_try_cast(&self, span: Span, domain: &Domain, dest_type: &DataType) -> Domain { - let inner_type: &DataType = dest_type.as_nullable().unwrap(); - match (domain, inner_type) { - (_, DataType::Null | DataType::Nullable(_)) => { - unreachable!("inner type cannot be nullable") - } - (Domain::Array(None), DataType::EmptyArray | DataType::Array(_)) => { - Domain::Nullable(NullableDomain { - has_null: false, - value: Some(Box::new(Domain::Array(None))), - }) - } - ( - Domain::Nullable(NullableDomain { - has_null, - value: Some(value), - }), - _, - ) => { - let inner_domain = self - .calculate_try_cast(span, value, dest_type) - .into_nullable() - .unwrap(); - Domain::Nullable(NullableDomain { - has_null: *has_null || inner_domain.has_null, - value: inner_domain.value, - }) - } - (Domain::Array(Some(domain)), DataType::Array(ty)) => { - let inner_domain = self.calculate_try_cast(span, domain, ty); - Domain::Nullable(NullableDomain { - has_null: false, - value: Some(Box::new(Domain::Array(Some(Box::new(inner_domain))))), - }) - } - (Domain::Tuple(fields), DataType::Tuple(fields_ty)) => { - let new_fields = fields - .iter() - .zip(fields_ty) - .map(|(field, ty)| self.calculate_try_cast(span.clone(), field, ty)) - .collect(); - Domain::Nullable(NullableDomain { - has_null: false, - value: Some(Box::new(Domain::Tuple(new_fields))), - }) - } - (_, DataType::Variant) => Domain::Nullable(NullableDomain { - has_null: false, - value: Some(Box::new(Domain::Undefined)), - }), - - (Domain::Number(domain), DataType::Number(dest_ty)) => { - with_number_type!(|SRC_TYPE| match domain { - NumberDomain::SRC_TYPE(domain) => { - with_number_type!(|DEST_TYPE| match dest_ty { - NumberDataType::DEST_TYPE => { - let (domain, overflowing) = domain.overflow_cast(); - Domain::Nullable(NullableDomain { - has_null: overflowing, - value: Some(Box::new(Domain::Number(NumberDomain::DEST_TYPE( - domain, - )))), - }) - } - }) - } - }) - } - - (Domain::Timestamp(domain), DataType::Number(dest_ty)) => { - with_number_type!(|DEST_TYPE| match dest_ty { - NumberDataType::DEST_TYPE => { - let simple_domain = SimpleDomain { - min: domain.min, - max: domain.max, - }; - let (domain, overflowing) = simple_domain.overflow_cast(); - Domain::Nullable(NullableDomain { - has_null: overflowing, - value: Some(Box::new(Domain::Number(NumberDomain::DEST_TYPE(domain)))), - }) - } - }) - } + fn calculate_try_cast( + &self, + span: Span, + src_type: &DataType, + dest_type: &DataType, + domain: &Domain, + ) -> Option { + if src_type == dest_type { + return Some(domain.clone()); + } - (Domain::Date(domain), DataType::Number(dest_ty)) => { - with_number_type!(|DEST_TYPE| match dest_ty { - NumberDataType::DEST_TYPE => { - let (domain, overflowing) = domain.overflow_cast(); - Domain::Nullable(NullableDomain { - has_null: overflowing, - value: Some(Box::new(Domain::Number(NumberDomain::DEST_TYPE(domain)))), - }) + // The dest_type of `TRY_CAST` must be `Nullable`, which is guaranteed by the type checker. + let inner_dest_type = &**dest_type.as_nullable().unwrap(); + + match (src_type, inner_dest_type) { + (DataType::Null, _) => Some(domain.clone()), + (DataType::Nullable(inner_src_ty), _) => { + let nullable_domain = domain.as_nullable().unwrap(); + match &nullable_domain.value { + Some(value) => { + let new_domain = self + .calculate_try_cast(span, inner_src_ty, dest_type, value)? + .into_nullable() + .unwrap(); + Some(Domain::Nullable(NullableDomain { + has_null: nullable_domain.has_null || new_domain.has_null, + value: new_domain.value, + })) } - }) + None => Some(domain.clone()), + } } - (Domain::Timestamp(domain), DataType::Date) => Domain::Nullable(NullableDomain { - has_null: false, - value: Some(Box::new(Domain::Date(SimpleDomain { - min: (domain.min / 1000000 / 24 / 3600) as i32, - max: (domain.max / 1000000 / 24 / 3600) as i32, - }))), - }), - - (Domain::Date(domain), DataType::Timestamp) => Domain::Nullable(NullableDomain { - has_null: false, - value: Some(Box::new(Domain::Timestamp(SimpleDomain { - min: domain.min as i64 * 24 * 3600 * 1000000, - max: domain.max as i64 * 24 * 3600 * 1000000, - }))), - }), - - // identical types - (Domain::Boolean(_), DataType::Boolean) - | (Domain::String(_), DataType::String) - | (Domain::Timestamp(_), DataType::Timestamp) - | (Domain::Date(_), DataType::Date) => Domain::Nullable(NullableDomain { + (DataType::EmptyArray, DataType::Array(_)) => Some(Domain::Nullable(NullableDomain { has_null: false, value: Some(Box::new(domain.clone())), - }), + })), + (DataType::Array(inner_src_ty), DataType::Array(inner_dest_ty)) => { + let inner_domain = match domain.as_array().unwrap() { + Some(inner_domain) => Some(Box::new(self.calculate_try_cast( + span, + inner_src_ty, + inner_dest_ty, + inner_domain, + )?)), + None => None, + }; + Some(Domain::Nullable(NullableDomain { + has_null: false, + value: Some(Box::new(Domain::Array(inner_domain))), + })) + } - // failure cases - _ => Domain::Nullable(NullableDomain { + (DataType::Tuple(fields_src_ty), DataType::Tuple(fields_dest_ty)) => { + let fields_domain = domain.as_tuple().unwrap(); + let new_fields_domain = fields_domain + .iter() + .zip(fields_src_ty) + .zip(fields_dest_ty) + .map(|((domain, src_ty), dest_ty)| { + self.calculate_try_cast(span.clone(), src_ty, dest_ty, domain) + }) + .collect::>()?; + Some(Domain::Tuple(new_fields_domain)) + } + + (_, DataType::String) => self + .calculate_simple_cast(span, src_type, dest_type, domain, "try_to_string") + .unwrap(), + (_, DataType::Number(NumberDataType::UInt8)) => self + .calculate_simple_cast(span, src_type, dest_type, domain, "try_to_uint8") + .unwrap(), + (_, DataType::Number(NumberDataType::UInt16)) => self + .calculate_simple_cast(span, src_type, dest_type, domain, "try_to_uint16") + .unwrap(), + (_, DataType::Number(NumberDataType::UInt32)) => self + .calculate_simple_cast(span, src_type, dest_type, domain, "try_to_uint32") + .unwrap(), + (_, DataType::Number(NumberDataType::UInt64)) => self + .calculate_simple_cast(span, src_type, dest_type, domain, "try_to_uint64") + .unwrap(), + (_, DataType::Number(NumberDataType::Int8)) => self + .calculate_simple_cast(span, src_type, dest_type, domain, "try_to_int8") + .unwrap(), + (_, DataType::Number(NumberDataType::Int16)) => self + .calculate_simple_cast(span, src_type, dest_type, domain, "try_to_int16") + .unwrap(), + (_, DataType::Number(NumberDataType::Int32)) => self + .calculate_simple_cast(span, src_type, dest_type, domain, "try_to_int32") + .unwrap(), + (_, DataType::Number(NumberDataType::Int64)) => self + .calculate_simple_cast(span, src_type, dest_type, domain, "try_to_int64") + .unwrap(), + (_, DataType::Number(NumberDataType::Float32)) => self + .calculate_simple_cast(span, src_type, dest_type, domain, "try_to_float32") + .unwrap(), + (_, DataType::Number(NumberDataType::Float64)) => self + .calculate_simple_cast(span, src_type, dest_type, domain, "try_to_float64") + .unwrap(), + (_, DataType::Timestamp) => self + .calculate_simple_cast(span, src_type, dest_type, domain, "try_to_timestamp") + .unwrap(), + (_, DataType::Date) => self + .calculate_simple_cast(span, src_type, dest_type, domain, "try_to_date") + .unwrap(), + (_, DataType::Variant) => self + .calculate_simple_cast(span, src_type, dest_type, domain, "try_to_variant") + .unwrap(), + + _ => Some(Domain::Nullable(NullableDomain { has_null: true, value: None, - }), + })), } } + + fn calculate_simple_cast( + &self, + span: Span, + src_type: &DataType, + dest_type: &DataType, + domain: &Domain, + cast_fn: &str, + ) -> Result> { + let (domain, ty) = calculate_function_domain( + span, + cast_fn, + [(domain.clone(), src_type.clone())], + self.tz, + self.fn_registry, + )?; + assert_eq!(&ty, dest_type); + Ok(domain) + } } diff --git a/src/query/expression/src/expression.rs b/src/query/expression/src/expression.rs index 39385acc5fb5..9323de214b38 100644 --- a/src/query/expression/src/expression.rs +++ b/src/query/expression/src/expression.rs @@ -41,11 +41,7 @@ pub enum RawExpr { }, Cast { span: Span, - expr: Box, - dest_type: DataType, - }, - TryCast { - span: Span, + is_try: bool, expr: Box, dest_type: DataType, }, @@ -63,18 +59,16 @@ pub enum Expr { Constant { span: Span, scalar: Scalar, + data_type: DataType, }, ColumnRef { span: Span, id: usize, + data_type: DataType, }, Cast { span: Span, - expr: Box, - dest_type: DataType, - }, - TryCast { - span: Span, + is_try: bool, expr: Box, dest_type: DataType, }, @@ -85,6 +79,7 @@ pub enum Expr { function: Arc, generics: Vec, args: Vec, + return_type: DataType, }, } @@ -97,18 +92,16 @@ pub enum RemoteExpr { Constant { span: Span, scalar: Scalar, + data_type: DataType, }, ColumnRef { span: Span, id: usize, + data_type: DataType, }, Cast { span: Span, - expr: Box, - dest_type: DataType, - }, - TryCast { - span: Span, + is_try: bool, expr: Box, dest_type: DataType, }, @@ -117,6 +110,7 @@ pub enum RemoteExpr { id: FunctionID, generics: Vec, args: Vec, + return_type: DataType, }, } @@ -145,7 +139,6 @@ impl RawExpr { buf.insert(*id); } RawExpr::Cast { expr, .. } => walk(expr, buf), - RawExpr::TryCast { expr, .. } => walk(expr, buf), RawExpr::FunctionCall { args, .. } => args.iter().for_each(|expr| walk(expr, buf)), RawExpr::Literal { .. } => (), } @@ -157,26 +150,46 @@ impl RawExpr { } } +impl Expr { + pub fn data_type(&self) -> &DataType { + match self { + Expr::Constant { data_type, .. } => data_type, + Expr::ColumnRef { data_type, .. } => data_type, + Expr::Cast { dest_type, .. } => dest_type, + Expr::FunctionCall { return_type, .. } => return_type, + } + } +} + impl RemoteExpr { pub fn from_expr(expr: Expr) -> Self { match expr { - Expr::Constant { span, scalar } => RemoteExpr::Constant { span, scalar }, - Expr::ColumnRef { span, id } => RemoteExpr::ColumnRef { span, id }, - Expr::Cast { + Expr::Constant { span, - expr, - dest_type, - } => RemoteExpr::Cast { + scalar, + data_type, + } => RemoteExpr::Constant { span, - expr: Box::new(RemoteExpr::from_expr(*expr)), - dest_type, + scalar, + data_type, + }, + Expr::ColumnRef { + span, + id, + data_type, + } => RemoteExpr::ColumnRef { + span, + id, + data_type, }, - Expr::TryCast { + Expr::Cast { span, + is_try, expr, dest_type, - } => RemoteExpr::TryCast { + } => RemoteExpr::Cast { span, + is_try, expr: Box::new(RemoteExpr::from_expr(*expr)), dest_type, }, @@ -186,34 +199,45 @@ impl RemoteExpr { function: _, generics, args, + return_type, } => RemoteExpr::FunctionCall { span, id, generics, args: args.into_iter().map(RemoteExpr::from_expr).collect(), + return_type, }, } } pub fn into_expr(self, fn_registry: &FunctionRegistry) -> Option { Some(match self { - RemoteExpr::Constant { span, scalar } => Expr::Constant { span, scalar }, - RemoteExpr::ColumnRef { span, id } => Expr::ColumnRef { span, id }, - RemoteExpr::Cast { + RemoteExpr::Constant { span, - expr, - dest_type, - } => Expr::Cast { + scalar, + data_type, + } => Expr::Constant { span, - expr: Box::new(expr.into_expr(fn_registry)?), - dest_type, + scalar, + data_type, }, - RemoteExpr::TryCast { + RemoteExpr::ColumnRef { + span, + id, + data_type, + } => Expr::ColumnRef { span, + id, + data_type, + }, + RemoteExpr::Cast { + span, + is_try, expr, dest_type, - } => Expr::TryCast { + } => Expr::Cast { span, + is_try, expr: Box::new(expr.into_expr(fn_registry)?), dest_type, }, @@ -222,6 +246,7 @@ impl RemoteExpr { id, generics, args, + return_type, } => { let function = fn_registry.get(&id)?; Expr::FunctionCall { @@ -233,6 +258,7 @@ impl RemoteExpr { .into_iter() .map(|arg| arg.into_expr(fn_registry)) .collect::>()?, + return_type, } } }) diff --git a/src/query/expression/src/function.rs b/src/query/expression/src/function.rs index 8addee7eea37..d2a31b9d6e8c 100755 --- a/src/query/expression/src/function.rs +++ b/src/query/expression/src/function.rs @@ -29,6 +29,7 @@ use crate::utils::arrow::constant_bitmap; use crate::values::Value; use crate::values::ValueRef; use crate::Column; +use crate::Expr; use crate::Scalar; #[derive(Debug, Clone)] @@ -117,7 +118,7 @@ impl FunctionRegistry { &self, name: &str, params: &[usize], - args_type: &[DataType], + args: &[Expr], ) -> Vec<(FunctionID, Arc)> { let name = name.to_lowercase(); let name = self @@ -135,7 +136,7 @@ impl FunctionRegistry { .enumerate() .filter_map(|(id, func)| { if func.signature.name == name - && func.signature.args_type.len() == args_type.len() + && func.signature.args_type.len() == args.len() { Some(( FunctionID::Builtin { @@ -157,6 +158,11 @@ impl FunctionRegistry { } } + let args_type = args + .iter() + .map(Expr::data_type) + .cloned() + .collect::>(); self.factories .get(name) .map(|factories| { @@ -164,13 +170,13 @@ impl FunctionRegistry { .iter() .enumerate() .filter_map(|(id, factory)| { - factory(params, args_type).map(|func| { + factory(params, &args_type).map(|func| { ( FunctionID::Factory { name: name.to_string(), id, params: params.to_vec(), - args_type: args_type.to_vec(), + args_type: args_type.clone(), }, func, ) diff --git a/src/query/expression/src/property.rs b/src/query/expression/src/property.rs index 2fdb2444cc81..06cc2ce0ec0d 100644 --- a/src/query/expression/src/property.rs +++ b/src/query/expression/src/property.rs @@ -44,6 +44,7 @@ pub enum Domain { Timestamp(SimpleDomain), Date(SimpleDomain), Nullable(NullableDomain), + /// `Array(None)` means that the array is empty, thus there is no inner domain information. Array(Option>), Tuple(Vec), Undefined, diff --git a/src/query/expression/src/register.rs b/src/query/expression/src/register.rs index 2841fe20ceee..6c5d34855cb7 100755 --- a/src/query/expression/src/register.rs +++ b/src/query/expression/src/register.rs @@ -484,7 +484,7 @@ impl FunctionRegistry { self.register_1_arg_core::, _, _>( name, property.clone(), - |_| None, + calc_domain, func, ); @@ -538,7 +538,7 @@ impl FunctionRegistry { self.register_2_arg_core::, _, _>( name, property.clone(), - |_, _| None, + calc_domain, func, ); @@ -608,7 +608,7 @@ impl FunctionRegistry { self.register_3_arg_core::, _, _>( name, property.clone(), - |_, _, _| None, + calc_domain, func, ); @@ -685,7 +685,7 @@ impl FunctionRegistry { self.register_4_arg_core::, _, _>( name, property.clone(), - |_, _, _, _| None, + calc_domain, func, ); @@ -771,7 +771,7 @@ impl FunctionRegistry { self.register_5_arg_core::, _, _>( name, property.clone(), - |_, _, _, _, _| None, + calc_domain, func, ); diff --git a/src/query/expression/src/type_check.rs b/src/query/expression/src/type_check.rs index 9c119878dada..a829c810e06b 100755 --- a/src/query/expression/src/type_check.rs +++ b/src/query/expression/src/type_check.rs @@ -29,61 +29,47 @@ use crate::types::DataType; use crate::Result; use crate::Scalar; -pub fn check(ast: &RawExpr, fn_registry: &FunctionRegistry) -> Result<(Expr, DataType)> { +pub fn check(ast: &RawExpr, fn_registry: &FunctionRegistry) -> Result { match ast { RawExpr::Literal { span, lit } => { - let (scalar, ty) = check_literal(lit); - Ok(( - Expr::Constant { - span: span.clone(), - scalar, - }, - ty, - )) + let (scalar, data_type) = check_literal(lit); + Ok(Expr::Constant { + span: span.clone(), + scalar, + data_type, + }) } RawExpr::ColumnRef { span, id, data_type, - } => Ok(( - Expr::ColumnRef { - span: span.clone(), - id: *id, - }, - data_type.clone(), - )), + } => Ok(Expr::ColumnRef { + span: span.clone(), + id: *id, + data_type: data_type.clone(), + }), RawExpr::Cast { span, + is_try, expr, dest_type, } => { - let (expr, _) = check(expr, fn_registry)?; - Ok(( - Expr::Cast { - span: span.clone(), - expr: Box::new(expr), - dest_type: dest_type.clone(), - }, - dest_type.clone(), - )) - } - RawExpr::TryCast { - span, - expr, - dest_type, - } => { - let (expr, _) = check(expr, fn_registry)?; - - let dest_type = wrap_nullable_for_try_cast(span.clone(), dest_type)?; - - Ok(( - Expr::TryCast { + let dest_type = if *is_try { + wrap_nullable_for_try_cast(span.clone(), dest_type)? + } else { + dest_type.clone() + }; + let expr = check(expr, fn_registry)?; + if expr.data_type() == &dest_type { + Ok(expr) + } else { + Ok(Expr::Cast { span: span.clone(), + is_try: *is_try, expr: Box::new(expr), - dest_type: dest_type.clone(), - }, - dest_type, - )) + dest_type, + }) + } } RawExpr::FunctionCall { span, @@ -91,25 +77,11 @@ pub fn check(ast: &RawExpr, fn_registry: &FunctionRegistry) -> Result<(Expr, Dat args, params, } => { - let (mut args_expr, mut args_type) = ( - Vec::with_capacity(args.len()), - Vec::with_capacity(args.len()), - ); - - for arg in args { - let (arg, ty) = check(arg, fn_registry)?; - args_expr.push(arg); - args_type.push(ty); - } - - check_function( - span.clone(), - name, - params, - &args_expr, - &args_type, - fn_registry, - ) + let args_expr: Vec<_> = args + .iter() + .map(|arg| check(arg, fn_registry)) + .try_collect()?; + check_function(span.clone(), name, params, &args_expr, fn_registry) } } } @@ -184,25 +156,22 @@ pub fn check_function( name: &str, params: &[usize], args: &[Expr], - args_type: &[DataType], fn_registry: &FunctionRegistry, -) -> Result<(Expr, DataType)> { - let candidates = fn_registry.search_candidates(name, params, args_type); +) -> Result { + let candidates = fn_registry.search_candidates(name, params, args); let mut fail_resaons = Vec::with_capacity(candidates.len()); for (id, func) in &candidates { - match try_check_function(span.clone(), args, args_type, &func.signature) { - Ok((checked_args, return_ty, generics)) => { - return Ok(( - Expr::FunctionCall { - span, - id: id.clone(), - function: func.clone(), - generics, - args: checked_args, - }, - return_ty, - )); + match try_check_function(span.clone(), args, &func.signature) { + Ok((checked_args, return_type, generics)) => { + return Ok(Expr::FunctionCall { + span, + id: id.clone(), + function: func.clone(), + generics, + args: checked_args, + return_type, + }); } Err(err) => fail_resaons.push(err), } @@ -211,13 +180,17 @@ pub fn check_function( let mut msg = if params.is_empty() { format!( "no overload satisfies `{name}({})`", - args_type.iter().map(ToString::to_string).join(", ") + args.iter() + .map(|arg| arg.data_type().to_string()) + .join(", ") ) } else { format!( "no overload satisfies `{name}({})({})`", params.iter().join(", "), - args_type.iter().map(ToString::to_string).join(", ") + args.iter() + .map(|arg| arg.data_type().to_string()) + .join(", ") ) }; if !candidates.is_empty() { @@ -295,13 +268,13 @@ impl Subsitution { pub fn try_check_function( span: Span, args: &[Expr], - args_type: &[DataType], sig: &FunctionSignature, ) -> Result<(Vec, DataType, Vec)> { assert_eq!(args.len(), sig.args_type.len()); - let substs = args_type + let substs = args .iter() + .map(Expr::data_type) .zip(&sig.args_type) .map(|(src_ty, dest_ty)| unify(src_ty, dest_ty).map_err(|(_, err)| (span.clone(), err))) .collect::>>()?; @@ -312,15 +285,15 @@ pub fn try_check_function( let checked_args = args .iter() - .zip(args_type) .zip(&sig.args_type) - .map(|((arg, arg_type), sig_type)| { + .map(|(arg, sig_type)| { let sig_type = subst.apply(sig_type.clone())?; - Ok(if *arg_type == sig_type { + Ok(if arg.data_type() == &sig_type { arg.clone() } else { Expr::Cast { span: span.clone(), + is_try: false, expr: Box::new(arg.clone()), dest_type: sig_type, } diff --git a/src/query/expression/src/utils/display.rs b/src/query/expression/src/utils/display.rs index e644b363c4bf..af2330fbc71b 100755 --- a/src/query/expression/src/utils/display.rs +++ b/src/query/expression/src/utils/display.rs @@ -303,14 +303,16 @@ impl Display for RawExpr { RawExpr::Literal { lit, .. } => write!(f, "{lit}"), RawExpr::ColumnRef { id, data_type, .. } => write!(f, "ColumnRef({id})::{data_type}"), RawExpr::Cast { - expr, dest_type, .. - } => { - write!(f, "CAST({expr} AS {dest_type})") - } - RawExpr::TryCast { - expr, dest_type, .. + is_try, + expr, + dest_type, + .. } => { - write!(f, "TRY_CAST({expr} AS {dest_type})") + if *is_try { + write!(f, "TRY_CAST({expr} AS {dest_type})") + } else { + write!(f, "CAST({expr} AS {dest_type})") + } } RawExpr::FunctionCall { name, args, params, .. @@ -415,14 +417,16 @@ impl Display for Expr { Expr::Constant { scalar, .. } => write!(f, "{:?}", scalar.as_ref()), Expr::ColumnRef { id, .. } => write!(f, "ColumnRef({id})"), Expr::Cast { - expr, dest_type, .. - } => { - write!(f, "CAST({expr} AS {dest_type})") - } - Expr::TryCast { - expr, dest_type, .. + is_try, + expr, + dest_type, + .. } => { - write!(f, "TRY_CAST({expr} AS {dest_type})") + if *is_try { + write!(f, "TRY_CAST({expr} AS {dest_type})") + } else { + write!(f, "CAST({expr} AS {dest_type})") + } } Expr::FunctionCall { function, diff --git a/src/query/expression/src/utils/mod.rs b/src/query/expression/src/utils/mod.rs index 3c413bd10289..57f806ddc3ed 100644 --- a/src/query/expression/src/utils/mod.rs +++ b/src/query/expression/src/utils/mod.rs @@ -26,6 +26,8 @@ use crate::types::AnyType; use crate::types::DataType; use crate::Chunk; use crate::Column; +use crate::ConstantFolder; +use crate::Domain; use crate::Evaluator; use crate::FunctionRegistry; use crate::RawExpr; @@ -37,12 +39,13 @@ use crate::Value; pub fn eval_function( span: Span, fn_name: &str, - args: impl Iterator, DataType)>, + args: impl IntoIterator, DataType)>, tz: Tz, num_rows: usize, fn_registry: &FunctionRegistry, ) -> Result<(Value, DataType)> { let (args, cols) = args + .into_iter() .enumerate() .map(|(id, (val, ty))| { ( @@ -61,10 +64,44 @@ pub fn eval_function( params: vec![], args, }; - let (expr, ty) = crate::type_check::check(&raw_expr, fn_registry)?; + let expr = crate::type_check::check(&raw_expr, fn_registry)?; let chunk = Chunk::new(cols, num_rows); - let evaluator = Evaluator::new(&chunk, tz); - Ok((evaluator.run(&expr)?, ty)) + let evaluator = Evaluator::new(&chunk, tz, fn_registry); + Ok((evaluator.run(&expr)?, expr.data_type().clone())) +} + +/// A convenient shortcut to calculate the domain of a scalar function. +pub fn calculate_function_domain( + span: Span, + fn_name: &str, + args: impl IntoIterator, + tz: Tz, + fn_registry: &FunctionRegistry, +) -> Result<(Option, DataType)> { + let (args, args_domain): (Vec<_>, Vec<_>) = args + .into_iter() + .enumerate() + .map(|(id, (domain, ty))| { + ( + RawExpr::ColumnRef { + span: span.clone(), + id, + data_type: ty, + }, + domain, + ) + }) + .unzip(); + let raw_expr = RawExpr::FunctionCall { + span, + name: fn_name.to_string(), + params: vec![], + args, + }; + let expr = crate::type_check::check(&raw_expr, fn_registry)?; + let constant_folder = ConstantFolder::new(&args_domain, tz, fn_registry); + let (_, output_domain) = constant_folder.fold(&expr); + Ok((output_domain, expr.data_type().clone())) } pub fn column_merge_validity(column: &Column, bitmap: Option) -> Option { diff --git a/src/query/functions-v2/src/scalars/datetime.rs b/src/query/functions-v2/src/scalars/datetime.rs index 8d6e8580a532..ca35ba8b0112 100644 --- a/src/query/functions-v2/src/scalars/datetime.rs +++ b/src/query/functions-v2/src/scalars/datetime.rs @@ -30,6 +30,7 @@ use common_expression::types::number::UInt16Type; use common_expression::types::number::UInt32Type; use common_expression::types::number::UInt64Type; use common_expression::types::number::UInt8Type; +use common_expression::types::string::StringDomain; use common_expression::types::timestamp::check_timestamp; use common_expression::types::timestamp::microseconds_to_days; use common_expression::types::timestamp::string_to_timestamp; @@ -312,7 +313,15 @@ fn register_to_string(registry: &mut FunctionRegistry) { registry.register_combine_nullable_1_arg::( "try_to_string", FunctionProperty::default(), - |_| None, + |_| { + Some(NullableDomain { + has_null: false, + value: Some(Box::new(StringDomain { + min: vec![], + max: None, + })), + }) + }, vectorize_with_builder_1_arg::>( |val, output, ctx| { write!(output.builder.data, "{}", timestamp_to_string(val, ctx.tz)).unwrap(); @@ -326,7 +335,15 @@ fn register_to_string(registry: &mut FunctionRegistry) { registry.register_combine_nullable_1_arg::( "try_to_string", FunctionProperty::default(), - |_| None, + |_| { + Some(NullableDomain { + has_null: false, + value: Some(Box::new(StringDomain { + min: vec![], + max: None, + })), + }) + }, vectorize_with_builder_1_arg::>(|val, output, ctx| { write!(output.builder.data, "{}", date_to_string(val, ctx.tz)).unwrap(); output.builder.commit_row(); diff --git a/src/query/functions-v2/src/scalars/variant.rs b/src/query/functions-v2/src/scalars/variant.rs index 22aec9f82209..585f2358f65c 100644 --- a/src/query/functions-v2/src/scalars/variant.rs +++ b/src/query/functions-v2/src/scalars/variant.rs @@ -15,6 +15,8 @@ use std::borrow::Cow; use bstr::ByteSlice; +use common_expression::types::nullable::NullableColumn; +use common_expression::types::nullable::NullableDomain; use common_expression::types::number::Float64Type; use common_expression::types::number::Int64Type; use common_expression::types::number::UInt32Type; @@ -27,6 +29,7 @@ use common_expression::types::GenericType; use common_expression::types::NullableType; use common_expression::types::StringType; use common_expression::types::VariantType; +use common_expression::utils::arrow::constant_bitmap; use common_expression::vectorize_with_builder_1_arg; use common_expression::vectorize_with_builder_2_arg; use common_expression::FunctionProperty; @@ -375,7 +378,7 @@ pub fn register(registry: &mut FunctionRegistry) { registry.register_passthrough_nullable_1_arg::, VariantType, _, _>( "to_variant", FunctionProperty::default(), - |_| None, + |_| Some(()), |val, ctx| match val { ValueRef::Scalar(scalar) => { let mut buf = Vec::new(); @@ -388,4 +391,29 @@ pub fn register(registry: &mut FunctionRegistry) { } }, ); + + registry.register_combine_nullable_1_arg::, VariantType, _, _>( + "try_to_variant", + FunctionProperty::default(), + |_| { + Some(NullableDomain { + has_null: false, + value: Some(Box::new(())), + }) + }, + |val, ctx| match val { + ValueRef::Scalar(scalar) => { + let mut buf = Vec::new(); + cast_scalar_to_variant(scalar, ctx.tz, &mut buf); + Ok(Value::Scalar(Some(buf))) + } + ValueRef::Column(col) => { + let new_col = cast_scalars_to_variants(col.iter(), ctx.tz); + Ok(Value::Column(NullableColumn { + validity: constant_bitmap(true, new_col.len()).into(), + column: new_col, + })) + } + }, + ); } diff --git a/src/query/functions-v2/tests/it/aggregates/mod.rs b/src/query/functions-v2/tests/it/aggregates/mod.rs index 384b15ebb53b..f8a9ed966cc3 100644 --- a/src/query/functions-v2/tests/it/aggregates/mod.rs +++ b/src/query/functions-v2/tests/it/aggregates/mod.rs @@ -154,10 +154,10 @@ pub fn run_scalar_expr( chunk: &Chunk, ) -> common_expression::Result<(Value, DataType)> { let fn_registry = builtin_functions(); - let (expr, output_ty) = type_check::check(raw_expr, &fn_registry)?; - let evaluator = Evaluator::new(chunk, chrono_tz::UTC); + let expr = type_check::check(raw_expr, &fn_registry)?; + let evaluator = Evaluator::new(chunk, chrono_tz::UTC, &fn_registry); let result = evaluator.run(&expr)?; - Ok((result, output_ty)) + Ok((result, expr.data_type().clone())) } /// Simulate group-by aggregation. diff --git a/src/query/functions-v2/tests/it/scalars/cast.rs b/src/query/functions-v2/tests/it/scalars/cast.rs index 6659fc2de8f8..e5b9195bd42a 100644 --- a/src/query/functions-v2/tests/it/scalars/cast.rs +++ b/src/query/functions-v2/tests/it/scalars/cast.rs @@ -276,13 +276,13 @@ fn test_cast_number_to_date(file: &mut impl Write) { Column::from_data(vec![-354286, -100, 0, 100, 2932897]), )]); - run_ast(file, "CAST(TO_DATE(-354285) AS INT32)", &[]); - run_ast(file, "CAST(TO_DATE(-100) AS INT32)", &[]); - run_ast(file, "CAST(TO_DATE(-0) AS INT32)", &[]); - run_ast(file, "CAST(TO_DATE(0) AS INT32)", &[]); - run_ast(file, "CAST(TO_DATE(100) AS INT32)", &[]); - run_ast(file, "CAST(TO_DATE(2932896) AS INT32)", &[]); - run_ast(file, "CAST(a AS INT32)", &[( + run_ast(file, "CAST(TO_DATE(-354285) AS INT64)", &[]); + run_ast(file, "CAST(TO_DATE(-100) AS INT64)", &[]); + run_ast(file, "CAST(TO_DATE(-0) AS INT64)", &[]); + run_ast(file, "CAST(TO_DATE(0) AS INT64)", &[]); + run_ast(file, "CAST(TO_DATE(100) AS INT64)", &[]); + run_ast(file, "CAST(TO_DATE(2932896) AS INT64)", &[]); + run_ast(file, "CAST(a AS INT64)", &[( "a", DataType::Date, from_date_data(vec![-354285, -100, 0, 100, 2932896]), diff --git a/src/query/functions-v2/tests/it/scalars/control.rs b/src/query/functions-v2/tests/it/scalars/control.rs index 7cbbfee8b00c..660e84101627 100644 --- a/src/query/functions-v2/tests/it/scalars/control.rs +++ b/src/query/functions-v2/tests/it/scalars/control.rs @@ -89,7 +89,7 @@ fn test_multi_if(file: &mut impl Write) { ), ( "cond_b", - DataType::Boolean, + DataType::Nullable(Box::new(DataType::Boolean)), Column::from_data_with_validity(vec![true, true, true, true], vec![ false, true, false, true, ]), diff --git a/src/query/functions-v2/tests/it/scalars/mod.rs b/src/query/functions-v2/tests/it/scalars/mod.rs index 792c52d081d8..7124e1cacb1c 100644 --- a/src/query/functions-v2/tests/it/scalars/mod.rs +++ b/src/query/functions-v2/tests/it/scalars/mod.rs @@ -52,14 +52,14 @@ pub fn run_ast(file: &mut impl Write, text: &str, columns: &[(&str, DataType, Co ); let fn_registry = builtin_functions(); - let (expr, output_ty) = type_check::check(&raw_expr, &fn_registry)?; + let expr = type_check::check(&raw_expr, &fn_registry)?; let input_domains = columns .iter() .map(|(_, _, col)| col.domain()) .collect::>(); - let constant_folder = ConstantFolder::new(&input_domains, chrono_tz::UTC); + let constant_folder = ConstantFolder::new(&input_domains, chrono_tz::UTC, &fn_registry); let (optimized_expr, output_domain) = constant_folder.fold(&expr); let remote_expr = RemoteExpr::from_expr(optimized_expr); @@ -78,7 +78,7 @@ pub fn run_ast(file: &mut impl Write, text: &str, columns: &[(&str, DataType, Co test_arrow_conversion(col); }); - let evaluator = Evaluator::new(&chunk, chrono_tz::UTC); + let evaluator = Evaluator::new(&chunk, chrono_tz::UTC, &fn_registry); let result = evaluator.run(&expr); let optimized_result = evaluator.run(&optimized_expr); match &result { @@ -94,7 +94,6 @@ pub fn run_ast(file: &mut impl Write, text: &str, columns: &[(&str, DataType, Co raw_expr, expr, input_domains, - output_ty, optimized_expr, output_domain .as_ref() @@ -105,7 +104,7 @@ pub fn run_ast(file: &mut impl Write, text: &str, columns: &[(&str, DataType, Co }; match result { - Ok((raw_expr, expr, input_domains, output_ty, optimized_expr, output_domain, result)) => { + Ok((raw_expr, expr, input_domains, optimized_expr, output_domain, result)) => { writeln!(file, "ast : {text}").unwrap(); writeln!(file, "raw expr : {raw_expr}").unwrap(); writeln!(file, "checked expr : {expr}").unwrap(); @@ -115,7 +114,7 @@ pub fn run_ast(file: &mut impl Write, text: &str, columns: &[(&str, DataType, Co match result { Value::Scalar(output_scalar) => { - writeln!(file, "output type : {output_ty}").unwrap(); + writeln!(file, "output type : {}", expr.data_type()).unwrap(); writeln!(file, "output domain : {output_domain}").unwrap(); writeln!(file, "output : {}", output_scalar.as_ref()).unwrap(); } @@ -149,7 +148,7 @@ pub fn run_ast(file: &mut impl Write, text: &str, columns: &[(&str, DataType, Co let mut type_row = vec!["Type".to_string()]; type_row.extend(columns.iter().map(|(_, ty, _)| ty.to_string())); - type_row.push(output_ty.to_string()); + type_row.push(expr.data_type().to_string()); table.add_row(type_row); let mut domain_row = vec!["Domain".to_string()]; diff --git a/src/query/functions-v2/tests/it/scalars/parser.rs b/src/query/functions-v2/tests/it/scalars/parser.rs index e9df769862ef..4e6cd1b04a97 100644 --- a/src/query/functions-v2/tests/it/scalars/parser.rs +++ b/src/query/functions-v2/tests/it/scalars/parser.rs @@ -16,7 +16,6 @@ use common_ast::ast::BinaryOperator; use common_ast::ast::IntervalKind; use common_ast::ast::Literal as ASTLiteral; use common_ast::ast::MapAccessor; -use common_ast::ast::TypeName; use common_ast::ast::UnaryOperator; use common_ast::parser::parse_expr; use common_ast::parser::token::Token; @@ -113,48 +112,22 @@ pub fn transform_expr(ast: common_ast::ast::Expr, columns: &[(&str, DataType)]) expr, target_type, .. - } => match target_type { - TypeName::Timestamp { .. } => RawExpr::FunctionCall { - span: transform_span(span), - name: "to_timestamp".to_string(), - args: vec![transform_expr(*expr, columns)], - params: vec![], - }, - TypeName::Date => RawExpr::FunctionCall { - span: transform_span(span), - name: "to_date".to_string(), - args: vec![transform_expr(*expr, columns)], - params: vec![], - }, - _ => RawExpr::Cast { - span: transform_span(span), - expr: Box::new(transform_expr(*expr, columns)), - dest_type: transform_data_type(target_type), - }, + } => RawExpr::Cast { + span: transform_span(span), + is_try: false, + expr: Box::new(transform_expr(*expr, columns)), + dest_type: transform_data_type(target_type), }, common_ast::ast::Expr::TryCast { span, expr, target_type, .. - } => match target_type { - TypeName::Timestamp { .. } => RawExpr::FunctionCall { - span: transform_span(span), - name: "try_to_timestamp".to_string(), - args: vec![transform_expr(*expr, columns)], - params: vec![], - }, - TypeName::Date => RawExpr::FunctionCall { - span: transform_span(span), - name: "try_to_date".to_string(), - args: vec![transform_expr(*expr, columns)], - params: vec![], - }, - _ => RawExpr::TryCast { - span: transform_span(span), - expr: Box::new(transform_expr(*expr, columns)), - dest_type: transform_data_type(target_type), - }, + } => RawExpr::Cast { + span: transform_span(span), + is_try: true, + expr: Box::new(transform_expr(*expr, columns)), + dest_type: transform_data_type(target_type), }, common_ast::ast::Expr::FunctionCall { span, diff --git a/src/query/functions-v2/tests/it/scalars/string.rs b/src/query/functions-v2/tests/it/scalars/string.rs index 15f514a2fd10..5be3c1534d14 100644 --- a/src/query/functions-v2/tests/it/scalars/string.rs +++ b/src/query/functions-v2/tests/it/scalars/string.rs @@ -561,7 +561,7 @@ fn test_pad(file: &mut impl Write) { ( "b", DataType::Number(NumberDataType::UInt8), - Column::from_data(vec![0, 3, 5]), + Column::from_data(vec![0u8, 3, 5]), ), ("c", DataType::String, Column::from_data(&["?", "x", "bb"])), ]; @@ -641,7 +641,7 @@ fn test_locate(file: &mut impl Write) { ( "c", DataType::Number(NumberDataType::UInt8), - Column::from_data(vec![1, 2, 0, 1]), + Column::from_data(vec![1u8, 2, 0, 1]), ), ]; run_ast(file, "locate(a, b, c)", &table); @@ -732,12 +732,12 @@ fn test_insert(file: &mut impl Write) { ( "b", DataType::Number(NumberDataType::UInt8), - Column::from_data(vec![1, 4, 1, 1]), + Column::from_data(vec![1u8, 4, 1, 1]), ), ( "c", DataType::Number(NumberDataType::UInt8), - Column::from_data(vec![3, 5, 1, 1]), + Column::from_data(vec![3u8, 5, 1, 1]), ), ( "d", @@ -757,12 +757,12 @@ fn test_insert(file: &mut impl Write) { ( "y", DataType::Nullable(Box::new(DataType::Number(NumberDataType::UInt8))), - Column::from_data_with_validity(vec![1, 4, 1, 1], vec![true, true, false, true]), + Column::from_data_with_validity(vec![1u8, 4, 1, 1], vec![true, true, false, true]), ), ( "z", DataType::Nullable(Box::new(DataType::Number(NumberDataType::UInt8))), - Column::from_data_with_validity(vec![3, 5, 1, 1], vec![true, false, true, true]), + Column::from_data_with_validity(vec![3u8, 5, 1, 1], vec![true, false, true, true]), ), ( "u", diff --git a/src/query/functions-v2/tests/it/scalars/testdata/cast.txt b/src/query/functions-v2/tests/it/scalars/testdata/cast.txt index 4a5716f163f8..6f65e55d0046 100644 --- a/src/query/functions-v2/tests/it/scalars/testdata/cast.txt +++ b/src/query/functions-v2/tests/it/scalars/testdata/cast.txt @@ -138,13 +138,13 @@ evaluation: | Row 4 | 256 | -129 | (NULL, NULL, NULL) | +--------+-----------+------------+------------------------------------------------+ evaluation (internal): -+--------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| Column | Data | -+--------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| a | Int16([0, 1, 2, 127, 256]) | -| b | Int16([0, 1, -127, -128, -129]) | -| Output | NullableColumn { column: Tuple { fields: [NullableColumn { column: Int8([0, 1, 2, 127, 0]), validity: [0b___01111] }, NullableColumn { column: UInt8([0, 1, 0, 0, 0]), validity: [0b___00011] }, NullableColumn { column: Boolean([0b___00000]), validity: [0b___00000] }], len: 5 }, validity: [0b___11111] } | -+--------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ ++--------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| Column | Data | ++--------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| a | Int16([0, 1, 2, 127, 256]) | +| b | Int16([0, 1, -127, -128, -129]) | +| Output | Tuple { fields: [NullableColumn { column: Int8([0, 1, 2, 127, 0]), validity: [0b___01111] }, NullableColumn { column: UInt8([0, 1, 0, 0, 0]), validity: [0b___00011] }, NullableColumn { column: Boolean([0b___00000]), validity: [0b___00000] }], len: 5 } | ++--------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ ast : CAST(a AS INT16) @@ -303,7 +303,7 @@ output : true ast : CAST(CAST('đŸĻ がįžŽå‘ŗしい' AS VARIANT) AS VARIANT) raw expr : CAST(CAST("đŸĻ がįžŽå‘ŗしい" AS Variant) AS Variant) -checked expr : CAST(CAST("đŸĻ がįžŽå‘ŗしい" AS Variant) AS Variant) +checked expr : CAST("đŸĻ がįžŽå‘ŗしい" AS Variant) optimized expr : 0x2000000010000014f09f8da620e3818ce7be8ee591b3e38197e38184 output type : Variant output domain : Undefined @@ -380,10 +380,10 @@ evaluation (internal): ast : TRY_CAST(NULL AS VARIANT) raw expr : TRY_CAST(NULL AS Variant) checked expr : TRY_CAST(NULL AS Variant NULL) -optimized expr : 0x2000000000000000 +optimized expr : NULL output type : Variant NULL -output domain : Undefined -output : null +output domain : {NULL} +output : NULL ast : TRY_CAST(0 AS VARIANT) @@ -469,7 +469,7 @@ output : true ast : TRY_CAST(TRY_CAST('đŸĻ がįžŽå‘ŗしい' AS VARIANT) AS VARIANT) raw expr : TRY_CAST(TRY_CAST("đŸĻ がįžŽå‘ŗしい" AS Variant) AS Variant) -checked expr : TRY_CAST(TRY_CAST("đŸĻ がįžŽå‘ŗしい" AS Variant NULL) AS Variant NULL) +checked expr : TRY_CAST("đŸĻ がįžŽå‘ŗしい" AS Variant NULL) optimized expr : 0x2000000010000014f09f8da620e3818ce7be8ee591b3e38197e38184 output type : Variant NULL output domain : Undefined @@ -507,8 +507,8 @@ error: ast : CAST(-315360000000000 AS TIMESTAMP) -raw expr : to_timestamp(minus(315360000000000_u64)) -checked expr : to_timestamp(minus(315360000000000_u64)) +raw expr : CAST(minus(315360000000000_u64) AS Timestamp) +checked expr : CAST(minus(315360000000000_u64) AS Timestamp) optimized expr : -315360000000000 output type : Timestamp output domain : {-315360000000000..=-315360000000000} @@ -516,8 +516,8 @@ output : 1960-01-04 00:00:00.000000 ast : CAST(-315360000000 AS TIMESTAMP) -raw expr : to_timestamp(minus(315360000000_u64)) -checked expr : to_timestamp(minus(315360000000_u64)) +raw expr : CAST(minus(315360000000_u64) AS Timestamp) +checked expr : CAST(minus(315360000000_u64) AS Timestamp) optimized expr : -315360000000000 output type : Timestamp output domain : {-315360000000000..=-315360000000000} @@ -525,8 +525,8 @@ output : 1960-01-04 00:00:00.000000 ast : CAST(-100 AS TIMESTAMP) -raw expr : to_timestamp(minus(100_u8)) -checked expr : to_timestamp(CAST(minus(100_u8) AS Int64)) +raw expr : CAST(minus(100_u8) AS Timestamp) +checked expr : CAST(minus(100_u8) AS Timestamp) optimized expr : -100000000 output type : Timestamp output domain : {-100000000..=-100000000} @@ -534,8 +534,8 @@ output : 1969-12-31 23:58:20.000000 ast : CAST(-0 AS TIMESTAMP) -raw expr : to_timestamp(minus(0_u8)) -checked expr : to_timestamp(CAST(minus(0_u8) AS Int64)) +raw expr : CAST(minus(0_u8) AS Timestamp) +checked expr : CAST(minus(0_u8) AS Timestamp) optimized expr : 0 output type : Timestamp output domain : {0..=0} @@ -543,8 +543,8 @@ output : 1970-01-01 00:00:00.000000 ast : CAST(0 AS TIMESTAMP) -raw expr : to_timestamp(0_u8) -checked expr : to_timestamp(CAST(0_u8 AS Int64)) +raw expr : CAST(0_u8 AS Timestamp) +checked expr : CAST(0_u8 AS Timestamp) optimized expr : 0 output type : Timestamp output domain : {0..=0} @@ -552,8 +552,8 @@ output : 1970-01-01 00:00:00.000000 ast : CAST(100 AS TIMESTAMP) -raw expr : to_timestamp(100_u8) -checked expr : to_timestamp(CAST(100_u8 AS Int64)) +raw expr : CAST(100_u8 AS Timestamp) +checked expr : CAST(100_u8 AS Timestamp) optimized expr : 100000000 output type : Timestamp output domain : {100000000..=100000000} @@ -561,8 +561,8 @@ output : 1970-01-01 00:01:40.000000 ast : CAST(315360000000 AS TIMESTAMP) -raw expr : to_timestamp(315360000000_u64) -checked expr : to_timestamp(CAST(315360000000_u64 AS Int64)) +raw expr : CAST(315360000000_u64 AS Timestamp) +checked expr : CAST(315360000000_u64 AS Timestamp) optimized expr : 315360000000000 output type : Timestamp output domain : {315360000000000..=315360000000000} @@ -570,8 +570,8 @@ output : 1979-12-30 00:00:00.000000 ast : CAST(315360000000000 AS TIMESTAMP) -raw expr : to_timestamp(315360000000000_u64) -checked expr : to_timestamp(CAST(315360000000000_u64 AS Int64)) +raw expr : CAST(315360000000000_u64 AS Timestamp) +checked expr : CAST(315360000000000_u64 AS Timestamp) optimized expr : 315360000000000 output type : Timestamp output domain : {315360000000000..=315360000000000} @@ -587,8 +587,8 @@ error: ast : CAST(a AS TIMESTAMP) -raw expr : to_timestamp(ColumnRef(0)::Int64) -checked expr : to_timestamp(ColumnRef(0)) +raw expr : CAST(ColumnRef(0)::Int64 AS Timestamp) +checked expr : CAST(ColumnRef(0) AS Timestamp) evaluation: +--------+--------------------------------------+--------------------------------------+ | | a | Output | @@ -613,8 +613,8 @@ evaluation (internal): ast : TRY_CAST(-30610224000000001 AS TIMESTAMP) -raw expr : try_to_timestamp(minus(30610224000000001_u64)) -checked expr : try_to_timestamp(CAST(minus(30610224000000001_u64) AS Int64 NULL)) +raw expr : TRY_CAST(minus(30610224000000001_u64) AS Timestamp) +checked expr : TRY_CAST(minus(30610224000000001_u64) AS Timestamp NULL) optimized expr : NULL output type : Timestamp NULL output domain : Unknown @@ -622,8 +622,8 @@ output : NULL ast : TRY_CAST(253402300800000000 AS TIMESTAMP) -raw expr : try_to_timestamp(253402300800000000_u64) -checked expr : try_to_timestamp(CAST(253402300800000000_u64 AS Int64 NULL)) +raw expr : TRY_CAST(253402300800000000_u64 AS Timestamp) +checked expr : TRY_CAST(253402300800000000_u64 AS Timestamp NULL) optimized expr : NULL output type : Timestamp NULL output domain : Unknown @@ -631,8 +631,8 @@ output : NULL ast : TRY_CAST(a AS TIMESTAMP) -raw expr : try_to_timestamp(ColumnRef(0)::Int64) -checked expr : try_to_timestamp(CAST(ColumnRef(0) AS Int64 NULL)) +raw expr : TRY_CAST(ColumnRef(0)::Int64 AS Timestamp) +checked expr : TRY_CAST(ColumnRef(0) AS Timestamp NULL) evaluation: +--------+-------------------------------------------+----------------------------+ | | a | Output | @@ -761,8 +761,8 @@ error: ast : CAST(-354285 AS DATE) -raw expr : to_date(minus(354285_u32)) -checked expr : to_date(minus(354285_u32)) +raw expr : CAST(minus(354285_u32) AS Date) +checked expr : CAST(minus(354285_u32) AS Date) optimized expr : -354285 output type : Date output domain : {-354285..=-354285} @@ -770,8 +770,8 @@ output : 1000-01-01 ast : CAST(-100 AS DATE) -raw expr : to_date(minus(100_u8)) -checked expr : to_date(CAST(minus(100_u8) AS Int64)) +raw expr : CAST(minus(100_u8) AS Date) +checked expr : CAST(minus(100_u8) AS Date) optimized expr : -100 output type : Date output domain : {-100..=-100} @@ -779,8 +779,8 @@ output : 1969-09-23 ast : CAST(-0 AS DATE) -raw expr : to_date(minus(0_u8)) -checked expr : to_date(CAST(minus(0_u8) AS Int64)) +raw expr : CAST(minus(0_u8) AS Date) +checked expr : CAST(minus(0_u8) AS Date) optimized expr : 0 output type : Date output domain : {0..=0} @@ -788,8 +788,8 @@ output : 1970-01-01 ast : CAST(0 AS DATE) -raw expr : to_date(0_u8) -checked expr : to_date(CAST(0_u8 AS Int64)) +raw expr : CAST(0_u8 AS Date) +checked expr : CAST(0_u8 AS Date) optimized expr : 0 output type : Date output domain : {0..=0} @@ -797,8 +797,8 @@ output : 1970-01-01 ast : CAST(100 AS DATE) -raw expr : to_date(100_u8) -checked expr : to_date(CAST(100_u8 AS Int64)) +raw expr : CAST(100_u8 AS Date) +checked expr : CAST(100_u8 AS Date) optimized expr : 100 output type : Date output domain : {100..=100} @@ -806,8 +806,8 @@ output : 1970-04-11 ast : CAST(2932896 AS DATE) -raw expr : to_date(2932896_u32) -checked expr : to_date(CAST(2932896_u32 AS Int64)) +raw expr : CAST(2932896_u32 AS Date) +checked expr : CAST(2932896_u32 AS Date) optimized expr : 2932896 output type : Date output domain : {2932896..=2932896} @@ -823,8 +823,8 @@ error: ast : CAST(a AS DATE) -raw expr : to_date(ColumnRef(0)::Int32) -checked expr : to_date(CAST(ColumnRef(0) AS Int64)) +raw expr : CAST(ColumnRef(0)::Int32 AS Date) +checked expr : CAST(ColumnRef(0) AS Date) evaluation: +--------+---------------------+---------------------+ | | a | Output | @@ -847,8 +847,8 @@ evaluation (internal): ast : TRY_CAST(-354286 AS DATE) -raw expr : try_to_date(minus(354286_u32)) -checked expr : try_to_date(CAST(minus(354286_u32) AS Int64 NULL)) +raw expr : TRY_CAST(minus(354286_u32) AS Date) +checked expr : TRY_CAST(minus(354286_u32) AS Date NULL) optimized expr : NULL output type : Date NULL output domain : Unknown @@ -856,8 +856,8 @@ output : NULL ast : TRY_CAST(2932897 AS DATE) -raw expr : try_to_date(2932897_u32) -checked expr : try_to_date(CAST(2932897_u32 AS Int64 NULL)) +raw expr : TRY_CAST(2932897_u32 AS Date) +checked expr : TRY_CAST(2932897_u32 AS Date NULL) optimized expr : NULL output type : Date NULL output domain : Unknown @@ -865,8 +865,8 @@ output : NULL ast : TRY_CAST(a AS DATE) -raw expr : try_to_date(ColumnRef(0)::Int32) -checked expr : try_to_date(CAST(ColumnRef(0) AS Int64 NULL)) +raw expr : TRY_CAST(ColumnRef(0)::Int32 AS Date) +checked expr : TRY_CAST(ColumnRef(0) AS Date NULL) evaluation: +--------+---------------------+------------+ | | a | Output | @@ -888,68 +888,68 @@ evaluation (internal): +--------+-------------------------------------------------------------------------+ -ast : CAST(TO_DATE(-354285) AS INT32) -raw expr : CAST(TO_DATE(minus(354285_u32)) AS Int32) -checked expr : CAST(to_date(minus(354285_u32)) AS Int32) -optimized expr : -354285_i32 -output type : Int32 +ast : CAST(TO_DATE(-354285) AS INT64) +raw expr : CAST(TO_DATE(minus(354285_u32)) AS Int64) +checked expr : CAST(to_date(minus(354285_u32)) AS Int64) +optimized expr : -354285_i64 +output type : Int64 output domain : {-354285..=-354285} output : -354285 -ast : CAST(TO_DATE(-100) AS INT32) -raw expr : CAST(TO_DATE(minus(100_u8)) AS Int32) -checked expr : CAST(to_date(CAST(minus(100_u8) AS Int64)) AS Int32) -optimized expr : -100_i32 -output type : Int32 +ast : CAST(TO_DATE(-100) AS INT64) +raw expr : CAST(TO_DATE(minus(100_u8)) AS Int64) +checked expr : CAST(to_date(CAST(minus(100_u8) AS Int64)) AS Int64) +optimized expr : -100_i64 +output type : Int64 output domain : {-100..=-100} output : -100 -ast : CAST(TO_DATE(-0) AS INT32) -raw expr : CAST(TO_DATE(minus(0_u8)) AS Int32) -checked expr : CAST(to_date(CAST(minus(0_u8) AS Int64)) AS Int32) -optimized expr : 0_i32 -output type : Int32 +ast : CAST(TO_DATE(-0) AS INT64) +raw expr : CAST(TO_DATE(minus(0_u8)) AS Int64) +checked expr : CAST(to_date(CAST(minus(0_u8) AS Int64)) AS Int64) +optimized expr : 0_i64 +output type : Int64 output domain : {0..=0} output : 0 -ast : CAST(TO_DATE(0) AS INT32) -raw expr : CAST(TO_DATE(0_u8) AS Int32) -checked expr : CAST(to_date(CAST(0_u8 AS Int64)) AS Int32) -optimized expr : 0_i32 -output type : Int32 +ast : CAST(TO_DATE(0) AS INT64) +raw expr : CAST(TO_DATE(0_u8) AS Int64) +checked expr : CAST(to_date(CAST(0_u8 AS Int64)) AS Int64) +optimized expr : 0_i64 +output type : Int64 output domain : {0..=0} output : 0 -ast : CAST(TO_DATE(100) AS INT32) -raw expr : CAST(TO_DATE(100_u8) AS Int32) -checked expr : CAST(to_date(CAST(100_u8 AS Int64)) AS Int32) -optimized expr : 100_i32 -output type : Int32 +ast : CAST(TO_DATE(100) AS INT64) +raw expr : CAST(TO_DATE(100_u8) AS Int64) +checked expr : CAST(to_date(CAST(100_u8 AS Int64)) AS Int64) +optimized expr : 100_i64 +output type : Int64 output domain : {100..=100} output : 100 -ast : CAST(TO_DATE(2932896) AS INT32) -raw expr : CAST(TO_DATE(2932896_u32) AS Int32) -checked expr : CAST(to_date(CAST(2932896_u32 AS Int64)) AS Int32) -optimized expr : 2932896_i32 -output type : Int32 +ast : CAST(TO_DATE(2932896) AS INT64) +raw expr : CAST(TO_DATE(2932896_u32) AS Int64) +checked expr : CAST(to_date(CAST(2932896_u32 AS Int64)) AS Int64) +optimized expr : 2932896_i64 +output type : Int64 output domain : {2932896..=2932896} output : 2932896 -ast : CAST(a AS INT32) -raw expr : CAST(ColumnRef(0)::Date AS Int32) -checked expr : CAST(ColumnRef(0) AS Int32) +ast : CAST(a AS INT64) +raw expr : CAST(ColumnRef(0)::Date AS Int64) +checked expr : CAST(ColumnRef(0) AS Int64) evaluation: +--------+---------------------+---------------------+ | | a | Output | +--------+---------------------+---------------------+ -| Type | Date | Int32 | +| Type | Date | Int64 | | Domain | {-354285..=2932896} | {-354285..=2932896} | | Row 0 | 1000-01-01 | -354285 | | Row 1 | 1969-09-23 | -100 | @@ -962,13 +962,13 @@ evaluation (internal): | Column | Data | +--------+-----------------------------------------+ | a | [-354285, -100, 0, 100, 2932896] | -| Output | Int32([-354285, -100, 0, 100, 2932896]) | +| Output | Int64([-354285, -100, 0, 100, 2932896]) | +--------+-----------------------------------------+ ast : CAST(TO_DATE(1) AS TIMESTAMP) -raw expr : to_timestamp(TO_DATE(1_u8)) -checked expr : to_timestamp(to_date(CAST(1_u8 AS Int64))) +raw expr : CAST(TO_DATE(1_u8) AS Timestamp) +checked expr : CAST(to_date(CAST(1_u8 AS Int64)) AS Timestamp) optimized expr : 86400000000 output type : Timestamp output domain : {86400000000..=86400000000} @@ -976,8 +976,8 @@ output : 1970-01-02 00:00:00.000000 ast : CAST(TO_TIMESTAMP(1) AS DATE) -raw expr : to_date(TO_TIMESTAMP(1_u8)) -checked expr : to_date(to_timestamp(CAST(1_u8 AS Int64))) +raw expr : CAST(TO_TIMESTAMP(1_u8) AS Date) +checked expr : CAST(to_timestamp(CAST(1_u8 AS Int64)) AS Date) optimized expr : 0 output type : Date output domain : {0..=0} @@ -985,8 +985,8 @@ output : 1970-01-01 ast : CAST(a AS DATE) -raw expr : to_date(ColumnRef(0)::Timestamp) -checked expr : to_date(ColumnRef(0)) +raw expr : CAST(ColumnRef(0)::Timestamp AS Date) +checked expr : CAST(ColumnRef(0) AS Date) evaluation: +--------+--------------------------------------+----------------+ | | a | Output | @@ -1011,8 +1011,8 @@ evaluation (internal): ast : CAST(a AS TIMESTAMP) -raw expr : to_timestamp(ColumnRef(0)::Date) -checked expr : to_timestamp(ColumnRef(0)) +raw expr : CAST(ColumnRef(0)::Date AS Timestamp) +checked expr : CAST(ColumnRef(0) AS Timestamp) evaluation: +--------+---------------------+-------------------------------------------+ | | a | Output | @@ -1035,8 +1035,8 @@ evaluation (internal): ast : CAST(TO_DATE(a) AS TIMESTAMP) -raw expr : to_timestamp(TO_DATE(ColumnRef(0)::Int32)) -checked expr : to_timestamp(to_date(CAST(ColumnRef(0) AS Int64))) +raw expr : CAST(TO_DATE(ColumnRef(0)::Int32) AS Timestamp) +checked expr : CAST(to_date(CAST(ColumnRef(0) AS Int64)) AS Timestamp) evaluation: +--------+---------------------+-------------------------------------------+ | | a | Output | @@ -1161,8 +1161,8 @@ evaluation (internal): ast : TRY_CAST(a as TIMESTAMP) -raw expr : try_to_timestamp(ColumnRef(0)::String) -checked expr : try_to_timestamp(CAST(ColumnRef(0) AS String NULL)) +raw expr : TRY_CAST(ColumnRef(0)::String AS Timestamp) +checked expr : TRY_CAST(ColumnRef(0) AS Timestamp NULL) evaluation: +--------+------------------------------------+----------------------------+ | | a | Output | @@ -1388,8 +1388,8 @@ evaluation (internal): ast : TRY_CAST(a as DATE) -raw expr : try_to_date(ColumnRef(0)::String) -checked expr : try_to_date(CAST(ColumnRef(0) AS String NULL)) +raw expr : TRY_CAST(ColumnRef(0)::String AS Date) +checked expr : TRY_CAST(ColumnRef(0) AS Date NULL) evaluation: +--------+------------------------------------+------------+ | | a | Output | diff --git a/src/query/functions-v2/tests/it/scalars/testdata/control.txt b/src/query/functions-v2/tests/it/scalars/testdata/control.txt index 6f3a9471a9ae..e548be764fe5 100644 --- a/src/query/functions-v2/tests/it/scalars/testdata/control.txt +++ b/src/query/functions-v2/tests/it/scalars/testdata/control.txt @@ -112,13 +112,13 @@ evaluation (internal): ast : multi_if(cond_a, expr_a, cond_b, expr_b, expr_else) -raw expr : multi_if(ColumnRef(0)::Boolean, ColumnRef(1)::Int64, ColumnRef(2)::Boolean, ColumnRef(3)::Int64, ColumnRef(4)::Int64 NULL) -checked expr : multi_if(CAST(ColumnRef(0) AS Boolean NULL), CAST(ColumnRef(1) AS Int64 NULL), CAST(ColumnRef(2) AS Boolean NULL), CAST(ColumnRef(3) AS Int64 NULL), ColumnRef(4)) +raw expr : multi_if(ColumnRef(0)::Boolean, ColumnRef(1)::Int64, ColumnRef(2)::Boolean NULL, ColumnRef(3)::Int64, ColumnRef(4)::Int64 NULL) +checked expr : multi_if(CAST(ColumnRef(0) AS Boolean NULL), CAST(ColumnRef(1) AS Int64 NULL), ColumnRef(2), CAST(ColumnRef(3) AS Int64 NULL), ColumnRef(4)) evaluation: +--------+---------------+---------+-----------------+---------+-------------------+-------------------+ | | cond_a | expr_a | cond_b | expr_b | expr_else | Output | +--------+---------------+---------+-----------------+---------+-------------------+-------------------+ -| Type | Boolean | Int64 | Boolean | Int64 | Int64 NULL | Int64 NULL | +| Type | Boolean | Int64 | Boolean NULL | Int64 | Int64 NULL | Int64 NULL | | Domain | {FALSE, TRUE} | {1..=4} | {TRUE} âˆĒ {NULL} | {5..=8} | {9..=12} âˆĒ {NULL} | {1..=12} âˆĒ {NULL} | | Row 0 | true | 1 | NULL | 5 | 9 | 1 | | Row 1 | true | 2 | true | 6 | 10 | 2 | diff --git a/src/query/functions-v2/tests/it/scalars/testdata/function_list.txt b/src/query/functions-v2/tests/it/scalars/testdata/function_list.txt index bc8e49cc6607..fb3468eb7eb1 100644 --- a/src/query/functions-v2/tests/it/scalars/testdata/function_list.txt +++ b/src/query/functions-v2/tests/it/scalars/testdata/function_list.txt @@ -2394,6 +2394,8 @@ try_to_uint8(Float32) :: UInt8 NULL try_to_uint8(Float32 NULL) :: UInt8 NULL try_to_uint8(Float64) :: UInt8 NULL try_to_uint8(Float64 NULL) :: UInt8 NULL +try_to_variant(T0) :: Variant NULL +try_to_variant(T0 NULL) :: Variant NULL unhex(String) :: String unhex(String NULL) :: String NULL upper(String) :: String diff --git a/src/query/functions-v2/tests/it/scalars/testdata/string.txt b/src/query/functions-v2/tests/it/scalars/testdata/string.txt index 914ee338090b..5dcca653bcd7 100644 --- a/src/query/functions-v2/tests/it/scalars/testdata/string.txt +++ b/src/query/functions-v2/tests/it/scalars/testdata/string.txt @@ -2227,7 +2227,7 @@ evaluation (internal): | Column | Data | +--------+------------------------------------------------------------------+ | a | StringColumn { data: 0x6869746573746363, offsets: [0, 2, 6, 8] } | -| b | Int32([0, 3, 5]) | +| b | UInt8([0, 3, 5]) | | c | StringColumn { data: 0x3f786262, offsets: [0, 1, 2, 4] } | | Output | StringColumn { data: 0x7465736262626363, offsets: [0, 0, 3, 8] } | +--------+------------------------------------------------------------------+ @@ -2299,7 +2299,7 @@ evaluation (internal): | Column | Data | +--------+------------------------------------------------------------------+ | a | StringColumn { data: 0x6869746573746363, offsets: [0, 2, 6, 8] } | -| b | Int32([0, 3, 5]) | +| b | UInt8([0, 3, 5]) | | c | StringColumn { data: 0x3f786262, offsets: [0, 1, 2, 4] } | | Output | StringColumn { data: 0x7465736363626262, offsets: [0, 0, 3, 8] } | +--------+------------------------------------------------------------------+ @@ -2481,7 +2481,7 @@ evaluation (internal): +--------+------------------------------------------------------------------------------------------------+ | a | StringColumn { data: 0x6261726363636371, offsets: [0, 3, 5, 7, 8] } | | b | StringColumn { data: 0x666f6f6261726261726264636361636378783536, offsets: [0, 9, 16, 18, 20] } | -| c | Int32([1, 2, 0, 1]) | +| c | UInt8([1, 2, 0, 1]) | | Output | UInt64([4, 3, 0, 0]) | +--------+------------------------------------------------------------------------------------------------+ @@ -2814,8 +2814,8 @@ evaluation (internal): | Column | Data | +--------+-------------------------------------------------------------------------------+ | a | StringColumn { data: 0x686974657374636371, offsets: [0, 2, 6, 8, 9] } | -| b | Int32([1, 4, 1, 1]) | -| c | Int32([3, 5, 1, 1]) | +| b | UInt8([1, 4, 1, 1]) | +| c | UInt8([3, 5, 1, 1]) | | d | StringColumn { data: 0x78787a6331323536, offsets: [0, 2, 4, 6, 8] } | | Output | StringColumn { data: 0x78787465737a633132633536, offsets: [0, 2, 7, 10, 12] } | +--------+-------------------------------------------------------------------------------+ @@ -2840,8 +2840,8 @@ evaluation (internal): | Column | Data | +--------+----------------------------------------------------------------------------------------------------------------------------------+ | x | NullableColumn { column: StringColumn { data: 0x686974657374636371, offsets: [0, 2, 6, 8, 9] }, validity: [0b____1110] } | -| y | NullableColumn { column: Int32([1, 4, 1, 1]), validity: [0b____1011] } | -| z | NullableColumn { column: Int32([3, 5, 1, 1]), validity: [0b____1101] } | +| y | NullableColumn { column: UInt8([1, 4, 1, 1]), validity: [0b____1011] } | +| z | NullableColumn { column: UInt8([3, 5, 1, 1]), validity: [0b____1101] } | | u | NullableColumn { column: StringColumn { data: 0x78787a6331323536, offsets: [0, 2, 4, 6, 8] }, validity: [0b____1110] } | | Output | NullableColumn { column: StringColumn { data: 0x78787465737a633132633536, offsets: [0, 2, 7, 10, 12] }, validity: [0b____1000] } | +--------+----------------------------------------------------------------------------------------------------------------------------------+