From 56e4ddf93f4b7d128af1e1c917e8f4a8cf74607a Mon Sep 17 00:00:00 2001 From: Weijun-H Date: Fri, 2 Feb 2024 16:08:54 +0800 Subject: [PATCH 01/21] support FixedSizeList Type Coercion --- datafusion/expr/src/built_in_function.rs | 16 +-- datafusion/expr/src/signature.rs | 126 +++++++++++++++++- .../expr/src/type_coercion/functions.rs | 122 +---------------- datafusion/sqllogictest/test_files/array.slt | 4 +- 4 files changed, 142 insertions(+), 126 deletions(-) diff --git a/datafusion/expr/src/built_in_function.rs b/datafusion/expr/src/built_in_function.rs index f92ae87d6e6c..6fcc338f0800 100644 --- a/datafusion/expr/src/built_in_function.rs +++ b/datafusion/expr/src/built_in_function.rs @@ -929,18 +929,18 @@ impl BuiltinScalarFunction { // 0 or more arguments of arbitrary type Signature::one_of(vec![VariadicEqual, Any(0)], self.volatility()) } - BuiltinScalarFunction::ArrayPopFront => Signature::any(1, self.volatility()), - BuiltinScalarFunction::ArrayPopBack => Signature::any(1, self.volatility()), + BuiltinScalarFunction::ArrayPopFront => Signature::array(self.volatility()), + BuiltinScalarFunction::ArrayPopBack => Signature::array(self.volatility()), BuiltinScalarFunction::ArrayConcat => { Signature::variadic_any(self.volatility()) } - BuiltinScalarFunction::ArrayDims => Signature::any(1, self.volatility()), - BuiltinScalarFunction::ArrayEmpty => Signature::any(1, self.volatility()), + BuiltinScalarFunction::ArrayDims => Signature::array(self.volatility()), + BuiltinScalarFunction::ArrayEmpty => Signature::array(self.volatility()), BuiltinScalarFunction::ArrayElement => { Signature::array_and_index(self.volatility()) } BuiltinScalarFunction::ArrayExcept => Signature::any(2, self.volatility()), - BuiltinScalarFunction::Flatten => Signature::any(1, self.volatility()), + BuiltinScalarFunction::Flatten => Signature::array(self.volatility()), BuiltinScalarFunction::ArrayHasAll | BuiltinScalarFunction::ArrayHasAny => { Signature::any(2, self.volatility()) } @@ -950,8 +950,8 @@ impl BuiltinScalarFunction { BuiltinScalarFunction::ArrayLength => { Signature::variadic_any(self.volatility()) } - BuiltinScalarFunction::ArrayNdims => Signature::any(1, self.volatility()), - BuiltinScalarFunction::ArrayDistinct => Signature::any(1, self.volatility()), + BuiltinScalarFunction::ArrayNdims => Signature::array(self.volatility()), + BuiltinScalarFunction::ArrayDistinct => Signature::array(self.volatility()), BuiltinScalarFunction::ArrayPosition => { Signature::array_and_element_and_optional_index(self.volatility()) } @@ -981,7 +981,7 @@ impl BuiltinScalarFunction { BuiltinScalarFunction::ArrayIntersect => Signature::any(2, self.volatility()), BuiltinScalarFunction::ArrayUnion => Signature::any(2, self.volatility()), - BuiltinScalarFunction::Cardinality => Signature::any(1, self.volatility()), + BuiltinScalarFunction::Cardinality => Signature::array(self.volatility()), BuiltinScalarFunction::ArrayResize => { Signature::variadic_any(self.volatility()) } diff --git a/datafusion/expr/src/signature.rs b/datafusion/expr/src/signature.rs index e8d9d8fb3966..28f6bf0c1a44 100644 --- a/datafusion/expr/src/signature.rs +++ b/datafusion/expr/src/signature.rs @@ -18,7 +18,12 @@ //! Signature module contains foundational types that are used to represent signatures, types, //! and return types of functions in DataFusion. -use arrow::datatypes::DataType; +use std::sync::Arc; + +use crate::type_coercion::binary::comparison_coercion; +use arrow::datatypes::{DataType, Field}; +use datafusion_common::utils::coerced_fixed_size_list_to_list; +use datafusion_common::{internal_datafusion_err, DataFusionError, Result}; /// Constant that is used as a placeholder for any valid timezone. /// This is used where a function can accept a timestamp type with any @@ -136,6 +141,115 @@ pub enum ArrayFunctionSignature { ArrayAndIndex, /// Specialized Signature for Array functions of the form (List/LargeList, Element, Optional Index) ArrayAndElementAndOptionalIndex, + /// Specialized Signature for ArrayEmpty and similar functions + Array, +} + +impl ArrayFunctionSignature { + pub fn get_type_signature( + &self, + current_types: &[DataType], + ) -> Result>> { + fn array_append_or_prepend_valid_types( + current_types: &[DataType], + is_append: bool, + ) -> Result>> { + if current_types.len() != 2 { + return Ok(vec![vec![]]); + } + + let (array_type, elem_type) = if is_append { + (¤t_types[0], ¤t_types[1]) + } else { + (¤t_types[1], ¤t_types[0]) + }; + + // We follow Postgres on `array_append(Null, T)`, which is not valid. + if array_type.eq(&DataType::Null) { + return Ok(vec![vec![]]); + } + + // We need to find the coerced base type, mainly for cases like: + // `array_append(List(null), i64)` -> `List(i64)` + let array_base_type = datafusion_common::utils::base_type(array_type); + let elem_base_type = datafusion_common::utils::base_type(elem_type); + let new_base_type = comparison_coercion(&array_base_type, &elem_base_type); + + let new_base_type = new_base_type.ok_or_else(|| { + internal_datafusion_err!( + "Coercion from {array_base_type:?} to {elem_base_type:?} not supported." + ) + })?; + + let array_type = datafusion_common::utils::coerced_type_with_base_type_only( + array_type, + &new_base_type, + ); + + match array_type { + DataType::List(ref field) + | DataType::LargeList(ref field) + | DataType::FixedSizeList(ref field, _) => { + let elem_type = field.data_type(); + if is_append { + Ok(vec![vec![array_type.clone(), elem_type.clone()]]) + } else { + Ok(vec![vec![elem_type.to_owned(), array_type.clone()]]) + } + } + _ => Ok(vec![vec![]]), + } + } + fn array_and_index(current_types: &[DataType]) -> Result>> { + if current_types.len() != 2 { + return Ok(vec![vec![]]); + } + + let array_type = ¤t_types[0]; + + match array_type { + DataType::List(_) + | DataType::LargeList(_) + | DataType::FixedSizeList(_, _) => { + let array_type = coerced_fixed_size_list_to_list(array_type); + Ok(vec![vec![array_type, DataType::Int64]]) + } + _ => Ok(vec![vec![]]), + } + } + fn array(current_types: &[DataType]) -> Result>> { + if current_types.len() != 1 { + return Ok(vec![vec![]]); + } + + let array_type = ¤t_types[0]; + + match array_type { + DataType::List(_) + | DataType::LargeList(_) + | DataType::FixedSizeList(_, _) => { + let array_type = coerced_fixed_size_list_to_list(array_type); + Ok(vec![vec![array_type]]) + } + DataType::Null => Ok(vec![vec![array_type.to_owned()]]), + _ => Ok(vec![vec![DataType::List(Arc::new(Field::new( + "item", + array_type.to_owned(), + true, + )))]]), + } + } + match self { + ArrayFunctionSignature::ArrayAndElement => { + array_append_or_prepend_valid_types(current_types, true) + } + ArrayFunctionSignature::ElementAndArray => { + array_append_or_prepend_valid_types(current_types, false) + } + ArrayFunctionSignature::ArrayAndIndex => array_and_index(current_types), + ArrayFunctionSignature::Array => array(current_types), + } + } } impl std::fmt::Display for ArrayFunctionSignature { @@ -153,6 +267,9 @@ impl std::fmt::Display for ArrayFunctionSignature { ArrayFunctionSignature::ArrayAndIndex => { write!(f, "array, index") } + ArrayFunctionSignature::Array => { + write!(f, "array") + } } } } @@ -325,6 +442,13 @@ impl Signature { volatility, } } + /// Specialized Signature for ArrayEmpty and similar functions + pub fn array(volatility: Volatility) -> Self { + Signature { + type_signature: TypeSignature::ArraySignature(ArrayFunctionSignature::Array), + volatility, + } + } } /// Monotonicity of the `ScalarFunctionExpr` with respect to its arguments. diff --git a/datafusion/expr/src/type_coercion/functions.rs b/datafusion/expr/src/type_coercion/functions.rs index 9cab04bc7605..4e5a2f1b6955 100644 --- a/datafusion/expr/src/type_coercion/functions.rs +++ b/datafusion/expr/src/type_coercion/functions.rs @@ -15,16 +15,14 @@ // specific language governing permissions and limitations // under the License. -use crate::signature::{ArrayFunctionSignature, TIMEZONE_WILDCARD}; +use crate::signature::TIMEZONE_WILDCARD; use crate::{Signature, TypeSignature}; use arrow::{ compute::can_cast_types, datatypes::{DataType, TimeUnit}, }; -use datafusion_common::utils::{coerced_fixed_size_list_to_list, list_ndims}; -use datafusion_common::{ - internal_datafusion_err, internal_err, plan_err, DataFusionError, Result, -}; +use datafusion_common::utils::list_ndims; +use datafusion_common::{internal_err, plan_err, DataFusionError, Result}; use super::binary::comparison_coercion; @@ -80,102 +78,6 @@ fn get_valid_types( signature: &TypeSignature, current_types: &[DataType], ) -> Result>> { - fn array_append_or_prepend_valid_types( - current_types: &[DataType], - is_append: bool, - ) -> Result>> { - if current_types.len() != 2 { - return Ok(vec![vec![]]); - } - - let (array_type, elem_type) = if is_append { - (¤t_types[0], ¤t_types[1]) - } else { - (¤t_types[1], ¤t_types[0]) - }; - - // We follow Postgres on `array_append(Null, T)`, which is not valid. - if array_type.eq(&DataType::Null) { - return Ok(vec![vec![]]); - } - - // We need to find the coerced base type, mainly for cases like: - // `array_append(List(null), i64)` -> `List(i64)` - let array_base_type = datafusion_common::utils::base_type(array_type); - let elem_base_type = datafusion_common::utils::base_type(elem_type); - let new_base_type = comparison_coercion(&array_base_type, &elem_base_type); - - let new_base_type = new_base_type.ok_or_else(|| { - internal_datafusion_err!( - "Coercion from {array_base_type:?} to {elem_base_type:?} not supported." - ) - })?; - - let array_type = datafusion_common::utils::coerced_type_with_base_type_only( - array_type, - &new_base_type, - ); - - match array_type { - DataType::List(ref field) - | DataType::LargeList(ref field) - | DataType::FixedSizeList(ref field, _) => { - let elem_type = field.data_type(); - if is_append { - Ok(vec![vec![array_type.clone(), elem_type.clone()]]) - } else { - Ok(vec![vec![elem_type.to_owned(), array_type.clone()]]) - } - } - _ => Ok(vec![vec![]]), - } - } - fn array_element_and_optional_index( - current_types: &[DataType], - ) -> Result>> { - // make sure there's 2 or 3 arguments - if !(current_types.len() == 2 || current_types.len() == 3) { - return Ok(vec![vec![]]); - } - - let first_two_types = ¤t_types[0..2]; - let mut valid_types = array_append_or_prepend_valid_types(first_two_types, true)?; - - // Early return if there are only 2 arguments - if current_types.len() == 2 { - return Ok(valid_types); - } - - let valid_types_with_index = valid_types - .iter() - .map(|t| { - let mut t = t.clone(); - t.push(DataType::Int64); - t - }) - .collect::>(); - - valid_types.extend(valid_types_with_index); - - Ok(valid_types) - } - fn array_and_index(current_types: &[DataType]) -> Result>> { - if current_types.len() != 2 { - return Ok(vec![vec![]]); - } - - let array_type = ¤t_types[0]; - - match array_type { - DataType::List(_) - | DataType::LargeList(_) - | DataType::FixedSizeList(_, _) => { - let array_type = coerced_fixed_size_list_to_list(array_type); - Ok(vec![vec![array_type, DataType::Int64]]) - } - _ => Ok(vec![vec![]]), - } - } let valid_types = match signature { TypeSignature::Variadic(valid_types) => valid_types .iter() @@ -208,21 +110,9 @@ fn get_valid_types( } TypeSignature::Exact(valid_types) => vec![valid_types.clone()], - TypeSignature::ArraySignature(ref function_signature) => match function_signature - { - ArrayFunctionSignature::ArrayAndElement => { - return array_append_or_prepend_valid_types(current_types, true) - } - ArrayFunctionSignature::ArrayAndElementAndOptionalIndex => { - return array_element_and_optional_index(current_types) - } - ArrayFunctionSignature::ArrayAndIndex => { - return array_and_index(current_types) - } - ArrayFunctionSignature::ElementAndArray => { - return array_append_or_prepend_valid_types(current_types, false) - } - }, + TypeSignature::ArraySignature(ref function_signature) => { + function_signature.get_type_signature(current_types)? + } TypeSignature::Any(number) => { if current_types.len() != *number { diff --git a/datafusion/sqllogictest/test_files/array.slt b/datafusion/sqllogictest/test_files/array.slt index 7f263d904819..e96970ed89ef 100644 --- a/datafusion/sqllogictest/test_files/array.slt +++ b/datafusion/sqllogictest/test_files/array.slt @@ -4217,7 +4217,7 @@ NULL 10 ## array_dims (aliases: `list_dims`) # array dims error -query error Execution error: array_dims does not support type 'Int64' +query error DataFusion error: Error during planning: No function matches the given name and argument types 'array_dims\(Int64\)'. You might need to add explicit type casts.\n\tCandidate functions:\n\tarray_dims\(array\) select array_dims(1); # array_dims scalar function @@ -4328,6 +4328,7 @@ from array_ndims_table; ---- 1 1 2 5 1 1 2 5 +NULL 1 2 5 1 1 2 5 query IIII @@ -4340,6 +4341,7 @@ from large_array_ndims_table; ---- 1 1 2 5 1 1 2 5 +NULL 1 2 5 1 1 2 5 statement ok From c10b8b2eb1da4806226ead97ac0874d88f00c82c Mon Sep 17 00:00:00 2001 From: Weijun-H Date: Fri, 2 Feb 2024 17:08:50 +0800 Subject: [PATCH 02/21] add allow null type coercion parameter --- datafusion/expr/src/built_in_function.rs | 44 +++++++++------ datafusion/expr/src/signature.rs | 54 ++++++++++++++----- datafusion/expr/src/type_coercion/binary.rs | 6 +-- .../expr/src/type_coercion/functions.rs | 4 +- datafusion/sqllogictest/test_files/array.slt | 3 -- 5 files changed, 74 insertions(+), 37 deletions(-) diff --git a/datafusion/expr/src/built_in_function.rs b/datafusion/expr/src/built_in_function.rs index 6fcc338f0800..3bb883b190ff 100644 --- a/datafusion/expr/src/built_in_function.rs +++ b/datafusion/expr/src/built_in_function.rs @@ -923,51 +923,63 @@ impl BuiltinScalarFunction { Signature::variadic_any(self.volatility()) } BuiltinScalarFunction::ArrayAppend => { - Signature::array_and_element(self.volatility()) + Signature::array_and_element(false, self.volatility()) } BuiltinScalarFunction::MakeArray => { // 0 or more arguments of arbitrary type Signature::one_of(vec![VariadicEqual, Any(0)], self.volatility()) } - BuiltinScalarFunction::ArrayPopFront => Signature::array(self.volatility()), - BuiltinScalarFunction::ArrayPopBack => Signature::array(self.volatility()), + BuiltinScalarFunction::ArrayPopFront => { + Signature::array(false, self.volatility()) + } + BuiltinScalarFunction::ArrayPopBack => { + Signature::array(false, self.volatility()) + } BuiltinScalarFunction::ArrayConcat => { Signature::variadic_any(self.volatility()) } - BuiltinScalarFunction::ArrayDims => Signature::array(self.volatility()), - BuiltinScalarFunction::ArrayEmpty => Signature::array(self.volatility()), + BuiltinScalarFunction::ArrayDims => { + Signature::array(false, self.volatility()) + } + BuiltinScalarFunction::ArrayEmpty => { + Signature::array(true, self.volatility()) + } BuiltinScalarFunction::ArrayElement => { - Signature::array_and_index(self.volatility()) + Signature::array_and_index(false, self.volatility()) } BuiltinScalarFunction::ArrayExcept => Signature::any(2, self.volatility()), - BuiltinScalarFunction::Flatten => Signature::array(self.volatility()), + BuiltinScalarFunction::Flatten => Signature::array(false, self.volatility()), BuiltinScalarFunction::ArrayHasAll | BuiltinScalarFunction::ArrayHasAny => { Signature::any(2, self.volatility()) } BuiltinScalarFunction::ArrayHas => { - Signature::array_and_element(self.volatility()) + Signature::array_and_element(false, self.volatility()) } BuiltinScalarFunction::ArrayLength => { Signature::variadic_any(self.volatility()) } - BuiltinScalarFunction::ArrayNdims => Signature::array(self.volatility()), - BuiltinScalarFunction::ArrayDistinct => Signature::array(self.volatility()), + BuiltinScalarFunction::ArrayNdims => { + Signature::array(false, self.volatility()) + } + BuiltinScalarFunction::ArrayDistinct => { + Signature::array(true, self.volatility()) + } BuiltinScalarFunction::ArrayPosition => { Signature::array_and_element_and_optional_index(self.volatility()) } BuiltinScalarFunction::ArrayPositions => { - Signature::array_and_element(self.volatility()) + Signature::array_and_element(false, self.volatility()) } BuiltinScalarFunction::ArrayPrepend => { - Signature::element_and_array(self.volatility()) + Signature::element_and_array(false, self.volatility()) } BuiltinScalarFunction::ArrayRepeat => Signature::any(2, self.volatility()), BuiltinScalarFunction::ArrayRemove => { - Signature::array_and_element(self.volatility()) + Signature::array_and_element(false, self.volatility()) } BuiltinScalarFunction::ArrayRemoveN => Signature::any(3, self.volatility()), BuiltinScalarFunction::ArrayRemoveAll => { - Signature::array_and_element(self.volatility()) + Signature::array_and_element(false, self.volatility()) } BuiltinScalarFunction::ArrayReplace => Signature::any(3, self.volatility()), BuiltinScalarFunction::ArrayReplaceN => Signature::any(4, self.volatility()), @@ -981,7 +993,9 @@ impl BuiltinScalarFunction { BuiltinScalarFunction::ArrayIntersect => Signature::any(2, self.volatility()), BuiltinScalarFunction::ArrayUnion => Signature::any(2, self.volatility()), - BuiltinScalarFunction::Cardinality => Signature::array(self.volatility()), + BuiltinScalarFunction::Cardinality => { + Signature::array(false, self.volatility()) + } BuiltinScalarFunction::ArrayResize => { Signature::variadic_any(self.volatility()) } diff --git a/datafusion/expr/src/signature.rs b/datafusion/expr/src/signature.rs index 28f6bf0c1a44..6e6fe5f2a963 100644 --- a/datafusion/expr/src/signature.rs +++ b/datafusion/expr/src/signature.rs @@ -122,7 +122,8 @@ pub enum TypeSignature { /// is `OneOf(vec![Any(0), VariadicAny])`. OneOf(Vec), /// Specifies Signatures for array functions - ArraySignature(ArrayFunctionSignature), + /// Boolean value specifies whether null type coercion is allowed + ArraySignature(ArrayFunctionSignature, bool), } #[derive(Debug, Clone, PartialEq, Eq, Hash)] @@ -146,13 +147,19 @@ pub enum ArrayFunctionSignature { } impl ArrayFunctionSignature { + /// Arguments to ArrayFunctionSignature + /// `current_types` - The data types of the arguments + /// `coercion` - Whether null type coercion is allowed + /// Returns the valid types for the function signature pub fn get_type_signature( &self, current_types: &[DataType], + allow_null_coercion: bool, ) -> Result>> { fn array_append_or_prepend_valid_types( current_types: &[DataType], is_append: bool, + allow_null_coercion: bool, ) -> Result>> { if current_types.len() != 2 { return Ok(vec![vec![]]); @@ -165,7 +172,7 @@ impl ArrayFunctionSignature { }; // We follow Postgres on `array_append(Null, T)`, which is not valid. - if array_type.eq(&DataType::Null) { + if array_type.eq(&DataType::Null) && !allow_null_coercion { return Ok(vec![vec![]]); } @@ -217,8 +224,13 @@ impl ArrayFunctionSignature { _ => Ok(vec![vec![]]), } } - fn array(current_types: &[DataType]) -> Result>> { - if current_types.len() != 1 { + fn array( + current_types: &[DataType], + allow_null_coercion: bool, + ) -> Result>> { + if current_types.len() != 1 + || (current_types[0].is_null() && !allow_null_coercion) + { return Ok(vec![vec![]]); } @@ -231,7 +243,7 @@ impl ArrayFunctionSignature { let array_type = coerced_fixed_size_list_to_list(array_type); Ok(vec![vec![array_type]]) } - DataType::Null => Ok(vec![vec![array_type.to_owned()]]), + DataType::Null => Ok(vec![vec![array_type.clone()]]), _ => Ok(vec![vec![DataType::List(Arc::new(Field::new( "item", array_type.to_owned(), @@ -241,13 +253,21 @@ impl ArrayFunctionSignature { } match self { ArrayFunctionSignature::ArrayAndElement => { - array_append_or_prepend_valid_types(current_types, true) + array_append_or_prepend_valid_types( + current_types, + true, + allow_null_coercion, + ) } ArrayFunctionSignature::ElementAndArray => { - array_append_or_prepend_valid_types(current_types, false) + array_append_or_prepend_valid_types( + current_types, + false, + allow_null_coercion, + ) } ArrayFunctionSignature::ArrayAndIndex => array_and_index(current_types), - ArrayFunctionSignature::Array => array(current_types), + ArrayFunctionSignature::Array => array(current_types, allow_null_coercion), } } } @@ -302,7 +322,7 @@ impl TypeSignature { TypeSignature::OneOf(sigs) => { sigs.iter().flat_map(|s| s.to_string_repr()).collect() } - TypeSignature::ArraySignature(array_signature) => { + TypeSignature::ArraySignature(array_signature, _) => { vec![array_signature.to_string()] } } @@ -407,10 +427,11 @@ impl Signature { } } /// Specialized Signature for ArrayAppend and similar functions - pub fn array_and_element(volatility: Volatility) -> Self { + pub fn array_and_element(allow_null_coercion: bool, volatility: Volatility) -> Self { Signature { type_signature: TypeSignature::ArraySignature( ArrayFunctionSignature::ArrayAndElement, + allow_null_coercion, ), volatility, } @@ -425,27 +446,32 @@ impl Signature { } } /// Specialized Signature for ArrayPrepend and similar functions - pub fn element_and_array(volatility: Volatility) -> Self { + pub fn element_and_array(allow_null_coercion: bool, volatility: Volatility) -> Self { Signature { type_signature: TypeSignature::ArraySignature( ArrayFunctionSignature::ElementAndArray, + allow_null_coercion, ), volatility, } } /// Specialized Signature for ArrayElement and similar functions - pub fn array_and_index(volatility: Volatility) -> Self { + pub fn array_and_index(allow_null_coercion: bool, volatility: Volatility) -> Self { Signature { type_signature: TypeSignature::ArraySignature( ArrayFunctionSignature::ArrayAndIndex, + allow_null_coercion, ), volatility, } } /// Specialized Signature for ArrayEmpty and similar functions - pub fn array(volatility: Volatility) -> Self { + pub fn array(allow_null_coercion: bool, volatility: Volatility) -> Self { Signature { - type_signature: TypeSignature::ArraySignature(ArrayFunctionSignature::Array), + type_signature: TypeSignature::ArraySignature( + ArrayFunctionSignature::Array, + allow_null_coercion, + ), volatility, } } diff --git a/datafusion/expr/src/type_coercion/binary.rs b/datafusion/expr/src/type_coercion/binary.rs index 70015c699296..a54e88dd879f 100644 --- a/datafusion/expr/src/type_coercion/binary.rs +++ b/datafusion/expr/src/type_coercion/binary.rs @@ -301,7 +301,7 @@ pub fn comparison_coercion(lhs_type: &DataType, rhs_type: &DataType) -> Option Option Option Option { +fn allow_null_coercion(lhs_type: &DataType, rhs_type: &DataType) -> Option { match (lhs_type, rhs_type) { (DataType::Null, other_type) | (other_type, DataType::Null) => { if can_cast_types(&DataType::Null, other_type) { diff --git a/datafusion/expr/src/type_coercion/functions.rs b/datafusion/expr/src/type_coercion/functions.rs index 4e5a2f1b6955..b0054aa28e16 100644 --- a/datafusion/expr/src/type_coercion/functions.rs +++ b/datafusion/expr/src/type_coercion/functions.rs @@ -110,8 +110,8 @@ fn get_valid_types( } TypeSignature::Exact(valid_types) => vec![valid_types.clone()], - TypeSignature::ArraySignature(ref function_signature) => { - function_signature.get_type_signature(current_types)? + TypeSignature::ArraySignature(ref function_signature, allow_null_coercion) => { + function_signature.get_type_signature(current_types, *allow_null_coercion)? } TypeSignature::Any(number) => { diff --git a/datafusion/sqllogictest/test_files/array.slt b/datafusion/sqllogictest/test_files/array.slt index e96970ed89ef..16e2401b40b5 100644 --- a/datafusion/sqllogictest/test_files/array.slt +++ b/datafusion/sqllogictest/test_files/array.slt @@ -4287,9 +4287,6 @@ NULL [3] [4] # array_ndims scalar function #1 -query error -select array_ndims(1); - #follow PostgreSQL query error select From 92d5e9ec80d8672b36a24c5620a43ec4d9993a79 Mon Sep 17 00:00:00 2001 From: Weijun-H Date: Fri, 2 Feb 2024 18:14:29 +0800 Subject: [PATCH 03/21] support null column in FixedSizeList --- datafusion/expr/src/built_in_function.rs | 13 ++++---- .../physical-expr/src/array_expressions.rs | 2 ++ datafusion/sqllogictest/test_files/array.slt | 32 +++++++++++++++++++ 3 files changed, 41 insertions(+), 6 deletions(-) diff --git a/datafusion/expr/src/built_in_function.rs b/datafusion/expr/src/built_in_function.rs index 3bb883b190ff..0df772c0ee68 100644 --- a/datafusion/expr/src/built_in_function.rs +++ b/datafusion/expr/src/built_in_function.rs @@ -31,7 +31,7 @@ use crate::{ }; use arrow::datatypes::{DataType, Field, Fields, IntervalUnit, TimeUnit}; -use datafusion_common::{internal_err, plan_err, DataFusionError, Result}; +use datafusion_common::{exec_err, plan_err, DataFusionError, Result}; use strum::IntoEnumIterator; use strum_macros::EnumIter; @@ -543,10 +543,11 @@ impl BuiltinScalarFunction { BuiltinScalarFunction::Flatten => { fn get_base_type(data_type: &DataType) -> Result { match data_type { - DataType::List(field) if matches!(field.data_type(), DataType::List(_)) => get_base_type(field.data_type()), + DataType::List(field) | DataType::FixedSizeList(field, _) if matches!(field.data_type(), DataType::List(_)|DataType::FixedSizeList(_,_ )) => get_base_type(field.data_type()), DataType::LargeList(field) if matches!(field.data_type(), DataType::LargeList(_)) => get_base_type(field.data_type()), DataType::Null | DataType::List(_) | DataType::LargeList(_) => Ok(data_type.to_owned()), - _ => internal_err!("Not reachable, data_type should be List or LargeList"), + DataType::FixedSizeList(field,_ ) => Ok(DataType::List(field.clone())), + _ => exec_err!("Not reachable, data_type should be List, LargeList or FixedSizeList"), } } @@ -930,10 +931,10 @@ impl BuiltinScalarFunction { Signature::one_of(vec![VariadicEqual, Any(0)], self.volatility()) } BuiltinScalarFunction::ArrayPopFront => { - Signature::array(false, self.volatility()) + Signature::array(true, self.volatility()) } BuiltinScalarFunction::ArrayPopBack => { - Signature::array(false, self.volatility()) + Signature::array(true, self.volatility()) } BuiltinScalarFunction::ArrayConcat => { Signature::variadic_any(self.volatility()) @@ -948,7 +949,7 @@ impl BuiltinScalarFunction { Signature::array_and_index(false, self.volatility()) } BuiltinScalarFunction::ArrayExcept => Signature::any(2, self.volatility()), - BuiltinScalarFunction::Flatten => Signature::array(false, self.volatility()), + BuiltinScalarFunction::Flatten => Signature::array(true, self.volatility()), BuiltinScalarFunction::ArrayHasAll | BuiltinScalarFunction::ArrayHasAny => { Signature::any(2, self.volatility()) } diff --git a/datafusion/physical-expr/src/array_expressions.rs b/datafusion/physical-expr/src/array_expressions.rs index 38a4359b4f4b..bd929e1306da 100644 --- a/datafusion/physical-expr/src/array_expressions.rs +++ b/datafusion/physical-expr/src/array_expressions.rs @@ -791,6 +791,7 @@ pub fn array_pop_front(args: &[ArrayRef]) -> Result { let array = as_large_list_array(&args[0])?; general_pop_front_list::(array) } + DataType::Null => Ok(args[0].clone()), _ => exec_err!( "array_pop_front does not support type: {:?}", array_data_type @@ -814,6 +815,7 @@ pub fn array_pop_back(args: &[ArrayRef]) -> Result { let array = as_large_list_array(&args[0])?; general_pop_back_list::(array) } + DataType::Null => Ok(args[0].clone()), _ => exec_err!( "array_pop_back does not support type: {:?}", array_data_type diff --git a/datafusion/sqllogictest/test_files/array.slt b/datafusion/sqllogictest/test_files/array.slt index 16e2401b40b5..f774ebaee9bc 100644 --- a/datafusion/sqllogictest/test_files/array.slt +++ b/datafusion/sqllogictest/test_files/array.slt @@ -1303,6 +1303,13 @@ NULL 43 ## array_pop_back (aliases: `list_pop_back`) +# array_pop_back scalar function with null +# follow clickhouse and duckdb +query ? +select array_pop_back(null); +---- +NULL + # array_pop_back scalar function #1 query ?? select array_pop_back(make_array(1, 2, 3, 4, 5)), array_pop_back(make_array('h', 'e', 'l', 'l', 'o')); @@ -1409,6 +1416,13 @@ select array_pop_back(arrow_cast(column1, 'LargeList(Int64)')) from arrayspop; ## array_pop_front (aliases: `list_pop_front`) +# array_pop_front scalar function with null +# follow clickhouse and duckdb +query ? +select array_pop_front(null); +---- +NULL + # array_pop_front scalar function #1 query ?? select array_pop_front(make_array(1, 2, 3, 4, 5)), array_pop_front(make_array('h', 'e', 'l', 'l', 'o')); @@ -4315,6 +4329,15 @@ AS SELECT arrow_cast(column4, 'LargeList(List(List(List(List(Int64)))))') as column4 FROM array_ndims_table; +statement ok +CREATE TABLE fixed_array_ndims_table +AS SELECT + column1, + arrow_cast(column2, 'FixedSizeList(3, Int64)') as column2, + arrow_cast(column3, 'FixedSizeList(3, List(Int64))') as column3, + arrow_cast(column4, 'FixedSizeList(3, List(List(List(List(Int64)))))') as column4 +FROM array_ndims_table; + query IIII select array_ndims(column1), @@ -4341,6 +4364,8 @@ from large_array_ndims_table; NULL 1 2 5 1 1 2 5 + + statement ok drop table array_ndims_table; @@ -5402,6 +5427,13 @@ select flatten(arrow_cast(make_array(1, 2, 1, 3, 2), 'LargeList(Int64)')), ---- [1, 2, 1, 3, 2] [1, 2, 3, , 4, , 5] [1.1, 2.2, 3.3, 4.4] +query ??? +select flatten(arrow_cast(make_array(1, 2, 1, 3, 2), 'FixedSizeList(5, Int64)')), + flatten(arrow_cast(make_array([1], [2, 3], [null], make_array(4, null, 5)), 'FixedSizeList(4, List(Int64))')), + flatten(arrow_cast(make_array([[1.1], [2.2]], [[3.3], [4.4]]), 'FixedSizeList(2, List(List(Float64)))')); +---- +[1, 2, 1, 3, 2] [1, 2, 3, , 4, , 5] [1.1, 2.2, 3.3, 4.4] + # flatten with column values query ???? select flatten(column1), From b61cbefc42db5be9e69fd480155716ecd03ca983 Mon Sep 17 00:00:00 2001 From: Weijun-H Date: Wed, 7 Feb 2024 23:00:52 +0800 Subject: [PATCH 04/21] Add test --- datafusion/sqllogictest/test_files/array.slt | 88 ++++++++++++++++++-- 1 file changed, 79 insertions(+), 9 deletions(-) diff --git a/datafusion/sqllogictest/test_files/array.slt b/datafusion/sqllogictest/test_files/array.slt index f774ebaee9bc..c9f1c2790a05 100644 --- a/datafusion/sqllogictest/test_files/array.slt +++ b/datafusion/sqllogictest/test_files/array.slt @@ -346,10 +346,31 @@ AS VALUES statement ok CREATE TABLE array_distinct_table_1D_large +AS SELECT + arrow_cast(column1, 'LargeList(Int64)') AS column1 +FROM array_distinct_table_1D +; + +statement ok +CREATE TABLE array_distinct_table_1D_fixed +AS SELECT + arrow_cast(column1, 'FixedSizeList(5, Int64)') AS column1 +FROM array_distinct_table_1D +; + +statement ok +CREATE TABLE array_distinct_table_1D_UTF8_fixed +AS SELECT + arrow_cast(column1, 'FixedSizeList(5, Utf8)') AS column1 +FROM array_distinct_table_1D_UTF8 +; + +statement ok +CREATE TABLE array_distinct_table_2D_fixed AS VALUES - (arrow_cast(make_array(1, 1, 2, 2, 3), 'LargeList(Int64)')), - (arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeList(Int64)')), - (arrow_cast(make_array(3, 5, 3, 3, 3), 'LargeList(Int64)')) + (arrow_cast(make_array([1,2], [1,2], [3,4], [3,4], [5,6]), 'FixedSizeList(5, List(Int64))')), + (arrow_cast(make_array([1,2], [3,4], [5,6], [7,8], [9,10]), 'FixedSizeList(5, List(Int64))')), + (arrow_cast(make_array([5,6], [5,6], NULL, NULL, NULL), 'FixedSizeList(5, List(Int64))')) ; statement ok @@ -2107,6 +2128,11 @@ select array_prepend(1, arrow_cast(make_array(2, 3, 4), 'LargeList(Int64)')), ar ---- [1, 2, 3, 4] [1.0, 2.0, 3.0, 4.0] [h, e, l, l, o] +query ??? +select array_prepend(1, arrow_cast([2, 3, 4], 'FixedSizeList(3, Int64)')), array_prepend(1.0, arrow_cast([2.0, 3.0, 4.0], 'FixedSizeList(3, Float64)')), array_prepend('h', arrow_cast(['e', 'l', 'l', 'o'], 'FixedSizeList(4, Utf8)')); +---- +[1, 2, 3, 4] [1.0, 2.0, 3.0, 4.0] [h, e, l, l, o] + # array_prepend scalar function #4 (element is list) query ??? select array_prepend(make_array(1), make_array(make_array(2), make_array(3), make_array(4))), array_prepend(make_array(1.0), make_array([2.0], [3.0], [4.0])), array_prepend(make_array('h'), make_array(['e'], ['l'], ['l'], ['o'])); @@ -2120,6 +2146,13 @@ select array_prepend(arrow_cast(make_array(1), 'LargeList(Int64)'), arrow_cast(m ---- [[1], [2], [3], [4]] [[1.0], [2.0], [3.0], [4.0]] [[h], [e], [l], [l], [o]] +query ??? +select array_prepend(arrow_cast([1], 'FixedSizeList(1, Int64)'), arrow_cast([[1], [2], [3]], 'FixedSizeList(3, List(Int64))')), + array_prepend(arrow_cast([1.0], 'FixedSizeList(1, Float64)'), arrow_cast([[2.0], [3.0], [4.0]], 'FixedSizeList(3, List(Float64))')), + array_prepend(arrow_cast(['h'], 'FixedSizeList(1, Utf8)'), arrow_cast([['e'], ['l'], ['l'], ['o']], 'FixedSizeList(4, List(Utf8))')); +---- +[[1], [1], [2], [3]] [[1.0], [2.0], [3.0], [4.0]] [[h], [e], [l], [l], [o]] + # list_prepend scalar function #5 (function alias `array_prepend`) query ??? select list_prepend(1, make_array(2, 3, 4)), list_prepend(1.0, make_array(2.0, 3.0, 4.0)), list_prepend('h', make_array('e', 'l', 'l', 'o')); @@ -4331,12 +4364,12 @@ FROM array_ndims_table; statement ok CREATE TABLE fixed_array_ndims_table -AS SELECT - column1, - arrow_cast(column2, 'FixedSizeList(3, Int64)') as column2, - arrow_cast(column3, 'FixedSizeList(3, List(Int64))') as column3, - arrow_cast(column4, 'FixedSizeList(3, List(List(List(List(Int64)))))') as column4 -FROM array_ndims_table; +AS VALUES + (arrow_cast([1], 'FixedSizeList(1, Int64)'), arrow_cast([1, 2, 3], 'FixedSizeList(3, Int64)'), arrow_cast([[7]], 'FixedSizeList(1, List(Int64))'), arrow_cast([[[[[10]]]]], 'FixedSizeList(1, List(List(List(List(Int64)))))')), + (arrow_cast([2], 'FixedSizeList(1, Int64)'), arrow_cast([4, 5, 6], 'FixedSizeList(3, Int64)'), arrow_cast([[8]], 'FixedSizeList(1, List(Int64))'), arrow_cast([[[[[10]]]]], 'FixedSizeList(1, List(List(List(List(Int64)))))')), + (null, arrow_cast([6, 7, 8], 'FixedSizeList(3, Int64)'), arrow_cast([[9]], 'FixedSizeList(1, List(Int64))'), arrow_cast([[[[[10]]]]], 'FixedSizeList(1, List(List(List(List(Int64)))))')), + (arrow_cast([3], 'FixedSizeList(1, Int64)'), arrow_cast([6, 7, 8], 'FixedSizeList(3, Int64)'), arrow_cast([[9]], 'FixedSizeList(1, List(Int64))'), arrow_cast([[[[[10]]]]], 'FixedSizeList(1, List(List(List(List(Int64)))))')) +; query IIII select @@ -4364,6 +4397,19 @@ from large_array_ndims_table; NULL 1 2 5 1 1 2 5 +query IIII +select + array_ndims(column1), + array_ndims(column2), + array_ndims(column3), + array_ndims(column4) +from fixed_array_ndims_table; +---- +1 1 2 5 +1 1 2 5 +NULL 1 2 5 +1 1 2 5 + statement ok @@ -4865,6 +4911,30 @@ from array_distinct_table_1D_large; [1, 2, 3, 4, 5] [3, 5] +query ? +select array_distinct(column1) +from array_distinct_table_1D_fixed; +---- +[1, 2, 3] +[1, 2, 3, 4, 5] +[3, 5] + +query ? +select array_distinct(column1) +from array_distinct_table_1D_UTF8_fixed; +---- +[a, bc, def] +[a, bc, def, defg] +[defg] + +query ? +select array_distinct(column1) +from array_distinct_table_2D_fixed; +---- +[[1, 2], [3, 4], [5, 6]] +[[1, 2], [3, 4], [5, 6], [7, 8], [9, 10]] +[, [5, 6]] + query ??? select array_intersect(column1, column2), array_intersect(column3, column4), From 3220da51b5acb5e09f3859bda395234c3d754ca3 Mon Sep 17 00:00:00 2001 From: Weijun-H Date: Thu, 8 Feb 2024 10:52:17 +0800 Subject: [PATCH 05/21] Add tests for cardinality with fixed size lists --- datafusion/expr/src/signature.rs | 2 +- datafusion/sqllogictest/test_files/array.slt | 28 ++++++++++++++++++++ 2 files changed, 29 insertions(+), 1 deletion(-) diff --git a/datafusion/expr/src/signature.rs b/datafusion/expr/src/signature.rs index 6e6fe5f2a963..ae7f0ce1607b 100644 --- a/datafusion/expr/src/signature.rs +++ b/datafusion/expr/src/signature.rs @@ -149,7 +149,7 @@ pub enum ArrayFunctionSignature { impl ArrayFunctionSignature { /// Arguments to ArrayFunctionSignature /// `current_types` - The data types of the arguments - /// `coercion` - Whether null type coercion is allowed + /// `allow_null_coercion` - Whether null type coercion is allowed /// Returns the valid types for the function signature pub fn get_type_signature( &self, diff --git a/datafusion/sqllogictest/test_files/array.slt b/datafusion/sqllogictest/test_files/array.slt index c9f1c2790a05..17b64d7814a5 100644 --- a/datafusion/sqllogictest/test_files/array.slt +++ b/datafusion/sqllogictest/test_files/array.slt @@ -3795,6 +3795,11 @@ select cardinality(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeList(Int64)')), c ---- 5 3 5 +query III +select cardinality(arrow_cast([1, 2, 3, 4, 5], 'FixedSizeList(5, Int64)')), cardinality(arrow_cast([1, 3, 5], 'FixedSizeList(3, Int64)')), cardinality(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'FixedSizeList(5, Utf8)')); +---- +5 3 5 + # cardinality scalar function #2 query II select cardinality(make_array([1, 2], [3, 4], [5, 6])), cardinality(array_repeat(array_repeat(array_repeat(3, 3), 2), 3)); @@ -3806,6 +3811,11 @@ select cardinality(arrow_cast(make_array([1, 2], [3, 4], [5, 6]), 'LargeList(Lis ---- 6 +query I +select cardinality(arrow_cast([[1, 2], [3, 4], [5, 6]], 'FixedSizeList(3, List(Int64))')); +---- +6 + # cardinality scalar function #3 query II select cardinality(make_array()), cardinality(make_array(make_array())) @@ -3817,6 +3827,13 @@ select cardinality(arrow_cast(make_array(), 'LargeList(Null)')), cardinality(arr ---- NULL 0 +#TODO +#https://github.com/apache/arrow-datafusion/issues/9158 +#query II +#select cardinality(arrow_cast(make_array(), 'FixedSizeList(1, Null)')), cardinality(arrow_cast(make_array(make_array()), 'FixedSizeList(1, List(Null))')) +#---- +#NULL 0 + # cardinality with columns query III select cardinality(column1), cardinality(column2), cardinality(column3) from arrays; @@ -3840,6 +3857,17 @@ NULL 3 4 4 NULL 1 4 3 NULL +query III +select cardinality(column1), cardinality(column2), cardinality(column3) from fixed_size_arrays; +---- +4 3 5 +4 3 5 +4 3 5 +4 3 3 +NULL 3 4 +4 NULL 1 +4 3 NULL + ## array_remove (aliases: `list_remove`) # array_remove scalar function #1 From b5652902fbcb72b14a6e5201843e3de96d32058c Mon Sep 17 00:00:00 2001 From: Weijun-H Date: Thu, 8 Feb 2024 11:09:27 +0800 Subject: [PATCH 06/21] chore --- datafusion/expr/src/type_coercion/binary.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/datafusion/expr/src/type_coercion/binary.rs b/datafusion/expr/src/type_coercion/binary.rs index a54e88dd879f..70015c699296 100644 --- a/datafusion/expr/src/type_coercion/binary.rs +++ b/datafusion/expr/src/type_coercion/binary.rs @@ -301,7 +301,7 @@ pub fn comparison_coercion(lhs_type: &DataType, rhs_type: &DataType) -> Option Option Option Option { +fn null_coercion(lhs_type: &DataType, rhs_type: &DataType) -> Option { match (lhs_type, rhs_type) { (DataType::Null, other_type) | (other_type, DataType::Null) => { if can_cast_types(&DataType::Null, other_type) { From 58b7d095d9720343073c94075e2e62b899c8a648 Mon Sep 17 00:00:00 2001 From: Weijun-H Date: Thu, 8 Feb 2024 11:40:53 +0800 Subject: [PATCH 07/21] fix ci --- datafusion/sqllogictest/test_files/array.slt | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/datafusion/sqllogictest/test_files/array.slt b/datafusion/sqllogictest/test_files/array.slt index 17b64d7814a5..a16e96b6f9cd 100644 --- a/datafusion/sqllogictest/test_files/array.slt +++ b/datafusion/sqllogictest/test_files/array.slt @@ -3863,9 +3863,9 @@ select cardinality(column1), cardinality(column2), cardinality(column3) from fix 4 3 5 4 3 5 4 3 5 -4 3 3 -NULL 3 4 -4 NULL 1 +4 3 5 +NULL 3 5 +4 NULL 5 4 3 NULL ## array_remove (aliases: `list_remove`) From 3d5c89986f3af73cf02d8f99d73c8a79f8d729d7 Mon Sep 17 00:00:00 2001 From: Weijun-H Date: Tue, 13 Feb 2024 18:24:33 +0800 Subject: [PATCH 08/21] add comment --- datafusion/expr/src/signature.rs | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/datafusion/expr/src/signature.rs b/datafusion/expr/src/signature.rs index ae7f0ce1607b..f2a8459fb23f 100644 --- a/datafusion/expr/src/signature.rs +++ b/datafusion/expr/src/signature.rs @@ -129,7 +129,7 @@ pub enum TypeSignature { #[derive(Debug, Clone, PartialEq, Eq, Hash)] pub enum ArrayFunctionSignature { /// Specialized Signature for ArrayAppend and similar functions - /// The first argument should be List/LargeList, and the second argument should be non-list or list. + /// The first argument should be List/LargeList/FixedSizedList, and the second argument should be non-list or list. /// The second argument's list dimension should be one dimension less than the first argument's list dimension. /// List dimension of the List/LargeList is equivalent to the number of List. /// List dimension of the non-list is 0. @@ -139,10 +139,13 @@ pub enum ArrayFunctionSignature { /// The first argument's list dimension should be one dimension less than the second argument's list dimension. ElementAndArray, /// Specialized Signature for Array functions of the form (List/LargeList, Index) + /// The first argument should be List/LargeList/FixedSizedList, and the second argument should be Int64. ArrayAndIndex, /// Specialized Signature for Array functions of the form (List/LargeList, Element, Optional Index) ArrayAndElementAndOptionalIndex, /// Specialized Signature for ArrayEmpty and similar functions + /// The function takes a single argument that must be a List/LargeList/FixedSizeList + /// or something that can be coerced to one of those types. Array, } From eeed955494ec672a03b84bf80d386a044fe8cb0b Mon Sep 17 00:00:00 2001 From: Weijun-H Date: Tue, 13 Feb 2024 18:56:21 +0800 Subject: [PATCH 09/21] Fix array_element function signature --- datafusion/expr/src/built_in_function.rs | 2 +- datafusion/expr/src/signature.rs | 92 ++++++++++--------- .../expr/src/type_coercion/functions.rs | 4 +- .../physical-expr/src/array_expressions.rs | 1 + datafusion/sqllogictest/test_files/array.slt | 4 +- 5 files changed, 53 insertions(+), 50 deletions(-) diff --git a/datafusion/expr/src/built_in_function.rs b/datafusion/expr/src/built_in_function.rs index 0df772c0ee68..48ad52b95f46 100644 --- a/datafusion/expr/src/built_in_function.rs +++ b/datafusion/expr/src/built_in_function.rs @@ -946,7 +946,7 @@ impl BuiltinScalarFunction { Signature::array(true, self.volatility()) } BuiltinScalarFunction::ArrayElement => { - Signature::array_and_index(false, self.volatility()) + Signature::array_and_index(true, self.volatility()) } BuiltinScalarFunction::ArrayExcept => Signature::any(2, self.volatility()), BuiltinScalarFunction::Flatten => Signature::array(true, self.volatility()), diff --git a/datafusion/expr/src/signature.rs b/datafusion/expr/src/signature.rs index f2a8459fb23f..d081b14e0bb1 100644 --- a/datafusion/expr/src/signature.rs +++ b/datafusion/expr/src/signature.rs @@ -122,31 +122,34 @@ pub enum TypeSignature { /// is `OneOf(vec![Any(0), VariadicAny])`. OneOf(Vec), /// Specifies Signatures for array functions - /// Boolean value specifies whether null type coercion is allowed - ArraySignature(ArrayFunctionSignature, bool), + ArraySignature(ArrayFunctionSignature), } #[derive(Debug, Clone, PartialEq, Eq, Hash)] pub enum ArrayFunctionSignature { /// Specialized Signature for ArrayAppend and similar functions + /// If `allow_null` is true, the function also accepts a single argument of type Null. /// The first argument should be List/LargeList/FixedSizedList, and the second argument should be non-list or list. /// The second argument's list dimension should be one dimension less than the first argument's list dimension. /// List dimension of the List/LargeList is equivalent to the number of List. /// List dimension of the non-list is 0. - ArrayAndElement, + ArrayAndElement(bool), /// Specialized Signature for ArrayPrepend and similar functions + /// If `allow_null` is true, the function also accepts a single argument of type Null. /// The first argument should be non-list or list, and the second argument should be List/LargeList. /// The first argument's list dimension should be one dimension less than the second argument's list dimension. - ElementAndArray, + ElementAndArray(bool), /// Specialized Signature for Array functions of the form (List/LargeList, Index) + /// If `allow_null` is true, the function also accepts a single argument of type Null. /// The first argument should be List/LargeList/FixedSizedList, and the second argument should be Int64. - ArrayAndIndex, + ArrayAndIndex(bool), /// Specialized Signature for Array functions of the form (List/LargeList, Element, Optional Index) ArrayAndElementAndOptionalIndex, /// Specialized Signature for ArrayEmpty and similar functions /// The function takes a single argument that must be a List/LargeList/FixedSizeList /// or something that can be coerced to one of those types. - Array, + /// If `allow_null` is true, the function also accepts a single argument of type Null. + Array(bool), } impl ArrayFunctionSignature { @@ -157,7 +160,6 @@ impl ArrayFunctionSignature { pub fn get_type_signature( &self, current_types: &[DataType], - allow_null_coercion: bool, ) -> Result>> { fn array_append_or_prepend_valid_types( current_types: &[DataType], @@ -210,13 +212,20 @@ impl ArrayFunctionSignature { _ => Ok(vec![vec![]]), } } - fn array_and_index(current_types: &[DataType]) -> Result>> { + fn array_and_index( + current_types: &[DataType], + allow_null_coercion: bool, + ) -> Result>> { if current_types.len() != 2 { return Ok(vec![vec![]]); } let array_type = ¤t_types[0]; + if array_type.eq(&DataType::Null) && !allow_null_coercion { + return Ok(vec![vec![]]); + } + match array_type { DataType::List(_) | DataType::LargeList(_) @@ -224,6 +233,7 @@ impl ArrayFunctionSignature { let array_type = coerced_fixed_size_list_to_list(array_type); Ok(vec![vec![array_type, DataType::Int64]]) } + DataType::Null => Ok(vec![vec![array_type.clone(), DataType::Int64]]), _ => Ok(vec![vec![]]), } } @@ -255,22 +265,18 @@ impl ArrayFunctionSignature { } } match self { - ArrayFunctionSignature::ArrayAndElement => { - array_append_or_prepend_valid_types( - current_types, - true, - allow_null_coercion, - ) + ArrayFunctionSignature::ArrayAndElement(allow_null) => { + array_append_or_prepend_valid_types(current_types, true, *allow_null) } - ArrayFunctionSignature::ElementAndArray => { - array_append_or_prepend_valid_types( - current_types, - false, - allow_null_coercion, - ) + ArrayFunctionSignature::ElementAndArray(allow_null) => { + array_append_or_prepend_valid_types(current_types, false, *allow_null) + } + ArrayFunctionSignature::ArrayAndIndex(allow_null) => { + array_and_index(current_types, *allow_null) + } + ArrayFunctionSignature::Array(allow_null) => { + array(current_types, *allow_null) } - ArrayFunctionSignature::ArrayAndIndex => array_and_index(current_types), - ArrayFunctionSignature::Array => array(current_types, allow_null_coercion), } } } @@ -278,20 +284,20 @@ impl ArrayFunctionSignature { impl std::fmt::Display for ArrayFunctionSignature { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { - ArrayFunctionSignature::ArrayAndElement => { - write!(f, "array, element") + ArrayFunctionSignature::ArrayAndElement(allow_null) => { + write!(f, "ArrayAndElement({})", *allow_null) } ArrayFunctionSignature::ArrayAndElementAndOptionalIndex => { write!(f, "array, element, [index]") } - ArrayFunctionSignature::ElementAndArray => { - write!(f, "element, array") + ArrayFunctionSignature::ElementAndArray(allow_null) => { + write!(f, "ElementAndArray({})", *allow_null) } - ArrayFunctionSignature::ArrayAndIndex => { - write!(f, "array, index") + ArrayFunctionSignature::ArrayAndIndex(allow_null) => { + write!(f, "ArrayAndIndex({})", *allow_null) } - ArrayFunctionSignature::Array => { - write!(f, "array") + ArrayFunctionSignature::Array(allow_null) => { + write!(f, "Array({})", *allow_null) } } } @@ -325,7 +331,7 @@ impl TypeSignature { TypeSignature::OneOf(sigs) => { sigs.iter().flat_map(|s| s.to_string_repr()).collect() } - TypeSignature::ArraySignature(array_signature, _) => { + TypeSignature::ArraySignature(array_signature) => { vec![array_signature.to_string()] } } @@ -430,11 +436,10 @@ impl Signature { } } /// Specialized Signature for ArrayAppend and similar functions - pub fn array_and_element(allow_null_coercion: bool, volatility: Volatility) -> Self { + pub fn array_and_element(allow_null: bool, volatility: Volatility) -> Self { Signature { type_signature: TypeSignature::ArraySignature( - ArrayFunctionSignature::ArrayAndElement, - allow_null_coercion, + ArrayFunctionSignature::ArrayAndElement(allow_null), ), volatility, } @@ -449,32 +454,29 @@ impl Signature { } } /// Specialized Signature for ArrayPrepend and similar functions - pub fn element_and_array(allow_null_coercion: bool, volatility: Volatility) -> Self { + pub fn element_and_array(allow_null: bool, volatility: Volatility) -> Self { Signature { type_signature: TypeSignature::ArraySignature( - ArrayFunctionSignature::ElementAndArray, - allow_null_coercion, + ArrayFunctionSignature::ElementAndArray(allow_null), ), volatility, } } /// Specialized Signature for ArrayElement and similar functions - pub fn array_and_index(allow_null_coercion: bool, volatility: Volatility) -> Self { + pub fn array_and_index(allow_null: bool, volatility: Volatility) -> Self { Signature { type_signature: TypeSignature::ArraySignature( - ArrayFunctionSignature::ArrayAndIndex, - allow_null_coercion, + ArrayFunctionSignature::ArrayAndIndex(allow_null), ), volatility, } } /// Specialized Signature for ArrayEmpty and similar functions - pub fn array(allow_null_coercion: bool, volatility: Volatility) -> Self { + pub fn array(allow_null: bool, volatility: Volatility) -> Self { Signature { - type_signature: TypeSignature::ArraySignature( - ArrayFunctionSignature::Array, - allow_null_coercion, - ), + type_signature: TypeSignature::ArraySignature(ArrayFunctionSignature::Array( + allow_null, + )), volatility, } } diff --git a/datafusion/expr/src/type_coercion/functions.rs b/datafusion/expr/src/type_coercion/functions.rs index b0054aa28e16..4e5a2f1b6955 100644 --- a/datafusion/expr/src/type_coercion/functions.rs +++ b/datafusion/expr/src/type_coercion/functions.rs @@ -110,8 +110,8 @@ fn get_valid_types( } TypeSignature::Exact(valid_types) => vec![valid_types.clone()], - TypeSignature::ArraySignature(ref function_signature, allow_null_coercion) => { - function_signature.get_type_signature(current_types, *allow_null_coercion)? + TypeSignature::ArraySignature(ref function_signature) => { + function_signature.get_type_signature(current_types)? } TypeSignature::Any(number) => { diff --git a/datafusion/physical-expr/src/array_expressions.rs b/datafusion/physical-expr/src/array_expressions.rs index bd929e1306da..03cd7f0f2867 100644 --- a/datafusion/physical-expr/src/array_expressions.rs +++ b/datafusion/physical-expr/src/array_expressions.rs @@ -433,6 +433,7 @@ pub fn array_element(args: &[ArrayRef]) -> Result { let indexes = as_int64_array(&args[1])?; general_array_element::(array, indexes) } + DataType::Null => Ok(args[0].clone()), _ => exec_err!( "array_element does not support type: {:?}", args[0].data_type() diff --git a/datafusion/sqllogictest/test_files/array.slt b/datafusion/sqllogictest/test_files/array.slt index a16e96b6f9cd..ce8eb522e9be 100644 --- a/datafusion/sqllogictest/test_files/array.slt +++ b/datafusion/sqllogictest/test_files/array.slt @@ -1084,7 +1084,7 @@ from arrays_values_without_nulls; ## array_element (aliases: array_extract, list_extract, list_element) # array_element error -query error DataFusion error: Error during planning: No function matches the given name and argument types 'array_element\(Int64, Int64\)'. You might need to add explicit type casts.\n\tCandidate functions:\n\tarray_element\(array, index\) +query error DataFusion error: Error during planning: No function matches the given name and argument types 'array_element\(Int64, Int64\)'. You might need to add explicit type casts.\n\tCandidate functions:\n\tarray_element\(ArrayAndIndex\(true\)\) select array_element(1, 2); # array_element with null @@ -4292,7 +4292,7 @@ NULL 10 ## array_dims (aliases: `list_dims`) # array dims error -query error DataFusion error: Error during planning: No function matches the given name and argument types 'array_dims\(Int64\)'. You might need to add explicit type casts.\n\tCandidate functions:\n\tarray_dims\(array\) +query error DataFusion error: Error during planning: No function matches the given name and argument types 'array_dims\(Int64\)'. You might need to add explicit type casts.\n\tCandidate functions:\n\tarray_dims\(Array\(false\)\) select array_dims(1); # array_dims scalar function From 342d3790b41f88f2407214b1e400d4c522ff5381 Mon Sep 17 00:00:00 2001 From: Weijun-H Date: Sun, 18 Feb 2024 09:34:06 +0800 Subject: [PATCH 10/21] Remove unused imports and simplify code --- datafusion/expr/src/signature.rs | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/datafusion/expr/src/signature.rs b/datafusion/expr/src/signature.rs index d081b14e0bb1..c9fe9623f313 100644 --- a/datafusion/expr/src/signature.rs +++ b/datafusion/expr/src/signature.rs @@ -18,10 +18,8 @@ //! Signature module contains foundational types that are used to represent signatures, types, //! and return types of functions in DataFusion. -use std::sync::Arc; - use crate::type_coercion::binary::comparison_coercion; -use arrow::datatypes::{DataType, Field}; +use arrow::datatypes::DataType; use datafusion_common::utils::coerced_fixed_size_list_to_list; use datafusion_common::{internal_datafusion_err, DataFusionError, Result}; @@ -257,11 +255,7 @@ impl ArrayFunctionSignature { Ok(vec![vec![array_type]]) } DataType::Null => Ok(vec![vec![array_type.clone()]]), - _ => Ok(vec![vec![DataType::List(Arc::new(Field::new( - "item", - array_type.to_owned(), - true, - )))]]), + _ => Ok(vec![vec![]]), } } match self { From d1f8744367f00ece8812ca00383fecbe46041b26 Mon Sep 17 00:00:00 2001 From: Weijun-H Date: Mon, 19 Feb 2024 16:23:04 +0800 Subject: [PATCH 11/21] Fix array function signatures and behavior --- datafusion/expr/src/built_in_function.rs | 8 +++--- datafusion/expr/src/signature.rs | 25 +++++++++++-------- .../physical-expr/src/array_expressions.rs | 13 +++++++++- datafusion/sqllogictest/test_files/array.slt | 19 +++++++++++++- 4 files changed, 49 insertions(+), 16 deletions(-) diff --git a/datafusion/expr/src/built_in_function.rs b/datafusion/expr/src/built_in_function.rs index 48ad52b95f46..e82ed9458f49 100644 --- a/datafusion/expr/src/built_in_function.rs +++ b/datafusion/expr/src/built_in_function.rs @@ -954,7 +954,7 @@ impl BuiltinScalarFunction { Signature::any(2, self.volatility()) } BuiltinScalarFunction::ArrayHas => { - Signature::array_and_element(false, self.volatility()) + Signature::array_and_element(true, self.volatility()) } BuiltinScalarFunction::ArrayLength => { Signature::variadic_any(self.volatility()) @@ -969,18 +969,18 @@ impl BuiltinScalarFunction { Signature::array_and_element_and_optional_index(self.volatility()) } BuiltinScalarFunction::ArrayPositions => { - Signature::array_and_element(false, self.volatility()) + Signature::array_and_element(true, self.volatility()) } BuiltinScalarFunction::ArrayPrepend => { Signature::element_and_array(false, self.volatility()) } BuiltinScalarFunction::ArrayRepeat => Signature::any(2, self.volatility()), BuiltinScalarFunction::ArrayRemove => { - Signature::array_and_element(false, self.volatility()) + Signature::array_and_element(true, self.volatility()) } BuiltinScalarFunction::ArrayRemoveN => Signature::any(3, self.volatility()), BuiltinScalarFunction::ArrayRemoveAll => { - Signature::array_and_element(false, self.volatility()) + Signature::array_and_element(true, self.volatility()) } BuiltinScalarFunction::ArrayReplace => Signature::any(3, self.volatility()), BuiltinScalarFunction::ArrayReplaceN => Signature::any(4, self.volatility()), diff --git a/datafusion/expr/src/signature.rs b/datafusion/expr/src/signature.rs index c9fe9623f313..4e2e24f4d878 100644 --- a/datafusion/expr/src/signature.rs +++ b/datafusion/expr/src/signature.rs @@ -175,8 +175,12 @@ impl ArrayFunctionSignature { }; // We follow Postgres on `array_append(Null, T)`, which is not valid. - if array_type.eq(&DataType::Null) && !allow_null_coercion { - return Ok(vec![vec![]]); + if array_type.eq(&DataType::Null) { + if allow_null_coercion { + return Ok(vec![vec![array_type.clone(), elem_type.clone()]]); + } else { + return Ok(vec![vec![]]); + } } // We need to find the coerced base type, mainly for cases like: @@ -191,20 +195,21 @@ impl ArrayFunctionSignature { ) })?; - let array_type = datafusion_common::utils::coerced_type_with_base_type_only( - array_type, - &new_base_type, - ); + let new_array_type = + datafusion_common::utils::coerced_type_with_base_type_only( + array_type, + &new_base_type, + ); - match array_type { + match new_array_type { DataType::List(ref field) | DataType::LargeList(ref field) | DataType::FixedSizeList(ref field, _) => { - let elem_type = field.data_type(); + let new_elem_type = field.data_type(); if is_append { - Ok(vec![vec![array_type.clone(), elem_type.clone()]]) + Ok(vec![vec![new_array_type.clone(), new_elem_type.clone()]]) } else { - Ok(vec![vec![elem_type.to_owned(), array_type.clone()]]) + Ok(vec![vec![new_elem_type.to_owned(), new_array_type.clone()]]) } } _ => Ok(vec![vec![]]), diff --git a/datafusion/physical-expr/src/array_expressions.rs b/datafusion/physical-expr/src/array_expressions.rs index 03cd7f0f2867..32702cdb0440 100644 --- a/datafusion/physical-expr/src/array_expressions.rs +++ b/datafusion/physical-expr/src/array_expressions.rs @@ -86,7 +86,9 @@ fn compare_element_to_list( row_index: usize, eq: bool, ) -> Result { - if list_array_row.data_type() != element_array.data_type() { + if list_array_row.data_type() != element_array.data_type() + && !element_array.data_type().is_null() + { return exec_err!( "compare_element_to_list received incompatible types: '{:?}' and '{:?}'.", list_array_row.data_type(), @@ -1481,6 +1483,10 @@ pub fn array_positions(args: &[ArrayRef]) -> Result { check_datatypes("array_positions", &[arr.values(), element])?; general_positions::(arr, element) } + DataType::Null => Ok(new_null_array( + &DataType::List(Arc::new(Field::new("item", DataType::UInt64, true))), + 1, + )), array_type => { exec_err!("array_positions does not support type '{array_type:?}'.") } @@ -1613,6 +1619,10 @@ fn array_remove_internal( element_array: &ArrayRef, arr_n: Vec, ) -> Result { + if array.data_type().is_null() { + return Ok(array.clone()); + } + match array.data_type() { DataType::List(_) => { let list_array = array.as_list::(); @@ -2287,6 +2297,7 @@ pub fn array_has(args: &[ArrayRef]) -> Result { DataType::LargeList(_) => { general_array_has_dispatch::(&args[0], &args[1], ComparisonType::Single) } + DataType::Null => Ok(new_null_array(&DataType::Boolean, 1)), _ => exec_err!("array_has does not support type '{array_type:?}'."), } } diff --git a/datafusion/sqllogictest/test_files/array.slt b/datafusion/sqllogictest/test_files/array.slt index ce8eb522e9be..5cf22963067f 100644 --- a/datafusion/sqllogictest/test_files/array.slt +++ b/datafusion/sqllogictest/test_files/array.slt @@ -2770,12 +2770,17 @@ NULL 1 NULL ## array_positions (aliases: `list_positions`) -# array_position with NULL (follow PostgreSQL) query ? select array_positions([1, 2, 3, 4, 5], null); ---- [] +# array_positions with NULL (follow PostgreSQL) +query ? +select array_positions(null, 1); +---- +NULL + # array_positions scalar function #1 query ??? select array_positions(['h', 'e', 'l', 'l', 'o'], 'l'), array_positions([1, 2, 3, 4, 5], 5), array_positions([1, 1, 1], 1); @@ -3899,6 +3904,13 @@ select ---- [1, , 3] [, 2.2, 3.3] [, bc] +# follow PostgreSQL behavior +query ? +select + array_remove(NULL, 1) +---- +NULL + query ?? select array_remove(make_array(1, null, 2), null), @@ -4059,6 +4071,11 @@ select array_remove_n(make_array([1, 2, 3], [4, 5, 6], [4, 5, 6], [10, 11, 12], ## array_remove_all (aliases: `list_removes`) # array_remove_all with NULL elements +query ? +select array_remove_all(NULL, 1); +---- +NULL + query ? select array_remove_all(make_array(1, 2, 2, 1, 1), NULL); ---- From 4de5ee2a2621e85567b6b4fef0a55e0c22a23aa0 Mon Sep 17 00:00:00 2001 From: Weijun-H Date: Mon, 19 Feb 2024 16:37:31 +0800 Subject: [PATCH 12/21] fix conflict --- datafusion/expr/src/signature.rs | 34 ++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/datafusion/expr/src/signature.rs b/datafusion/expr/src/signature.rs index 4e2e24f4d878..cfd9d29b9f55 100644 --- a/datafusion/expr/src/signature.rs +++ b/datafusion/expr/src/signature.rs @@ -159,6 +159,37 @@ impl ArrayFunctionSignature { &self, current_types: &[DataType], ) -> Result>> { + fn array_element_and_optional_index( + current_types: &[DataType], + ) -> Result>> { + // make sure there's 2 or 3 arguments + if !(current_types.len() == 2 || current_types.len() == 3) { + return Ok(vec![vec![]]); + } + + let first_two_types = ¤t_types[0..2]; + let mut valid_types = + array_append_or_prepend_valid_types(first_two_types, true, true)?; + + // Early return if there are only 2 arguments + if current_types.len() == 2 { + return Ok(valid_types); + } + + let valid_types_with_index = valid_types + .iter() + .map(|t| { + let mut t = t.clone(); + t.push(DataType::Int64); + t + }) + .collect::>(); + + valid_types.extend(valid_types_with_index); + + Ok(valid_types) + } + fn array_append_or_prepend_valid_types( current_types: &[DataType], is_append: bool, @@ -273,6 +304,9 @@ impl ArrayFunctionSignature { ArrayFunctionSignature::ArrayAndIndex(allow_null) => { array_and_index(current_types, *allow_null) } + ArrayFunctionSignature::ArrayAndElementAndOptionalIndex => { + array_element_and_optional_index(current_types) + } ArrayFunctionSignature::Array(allow_null) => { array(current_types, *allow_null) } From 13801bee21110a41948f6fde08fe6daabde6b9e8 Mon Sep 17 00:00:00 2001 From: Weijun-H Date: Wed, 21 Feb 2024 17:53:41 +0800 Subject: [PATCH 13/21] fix conflict --- datafusion/sqllogictest/test_files/array.slt | 1 + 1 file changed, 1 insertion(+) diff --git a/datafusion/sqllogictest/test_files/array.slt b/datafusion/sqllogictest/test_files/array.slt index 5cf22963067f..83ef09592df0 100644 --- a/datafusion/sqllogictest/test_files/array.slt +++ b/datafusion/sqllogictest/test_files/array.slt @@ -4395,6 +4395,7 @@ CREATE TABLE array_ndims_table AS VALUES ([1], [1, 2, 3], [[7]], [[[[[10]]]]]), ([2], [4, 5], [[8]], [[[[[10]]]]]), + (NUll, [6, 7], [[9]], [[[[[10]]]]]), ([3], [6], [[9]], [[[[[10]]]]]) ; From 4b21267c9072e5bde7044e73728d28b0323e102c Mon Sep 17 00:00:00 2001 From: Weijun-H Date: Wed, 21 Feb 2024 19:14:13 +0800 Subject: [PATCH 14/21] add tests for FixedSizeList --- datafusion/sqllogictest/test_files/array.slt | 340 ++++++++++++++++++- 1 file changed, 338 insertions(+), 2 deletions(-) diff --git a/datafusion/sqllogictest/test_files/array.slt b/datafusion/sqllogictest/test_files/array.slt index 83ef09592df0..a45028021154 100644 --- a/datafusion/sqllogictest/test_files/array.slt +++ b/datafusion/sqllogictest/test_files/array.slt @@ -123,6 +123,13 @@ AS VALUES (make_array(NULL, 10, 11, 12)) ; +statement ok +CREATE TABLE large_arrayspop +AS SELECT + arrow_cast(column1, 'LargeList(Int64)') AS column1 +FROM arrayspop +; + statement ok CREATE TABLE nested_arrays AS VALUES @@ -172,6 +179,15 @@ AS SELECT FROM arrays_values ; +statement ok +CREATE TABLE fixed_arrays_values +AS SELECT + arrow_cast(column1, 'FixedSizeList(10, Int64)') AS column1, + column2, + column3, + column4 +FROM arrays_values +; statement ok CREATE TABLE arrays_values_v2 @@ -212,6 +228,22 @@ AS FROM flatten_table ; +statement ok +CREATE TABLE fixed_size_flatten_table +AS VALUES + (arrow_cast(make_array([1], [2], [3]), 'FixedSizeList(3, List(Int64))'), + arrow_cast(make_array([[1, 2, 3]], [[4, 5]], [[6]]), 'FixedSizeList(3, List(List(Int64)))'), + arrow_cast(make_array([[[1]]], [[[2, 3]]]), 'FixedSizeList(2, List(List(List(Int64))))'), + arrow_cast(make_array([1.0], [2.1, 2.2], [3.2, 3.3, 3.4]), 'FixedSizeList(3, List(Float64))') + ), + ( + arrow_cast(make_array([1, 2], [3, 4], [5, 6]), 'FixedSizeList(3, List(Int64))'), + arrow_cast(make_array([[8]], [[9, 10]], [[11, 12, 13]]), 'FixedSizeList(3, List(List(Int64)))'), + arrow_cast(make_array([[[1,2]]], [[[3]]]), 'FixedSizeList(2, List(List(List(Int64))))'), + arrow_cast(make_array([1.0, 2.0], [3.0, 4.0], [5.0, 6.0]), 'FixedSizeList(3, List(Float64))') + ) +; + statement ok CREATE TABLE array_has_table_1D AS VALUES @@ -1124,7 +1156,7 @@ select array_element(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeList(Int64)'), NULL NULL query IT -select array_element(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeList(Int64)'), 7), array_element(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'LargeList(Utf8)'), 11); +select array_element(arrow_cast(make_array(1, 2, 3, 4, 5), 'FixedSizeList(5, Int64)'), 7), array_element(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'FixedSizeList(5, Utf8)'), 11); ---- NULL NULL @@ -1342,6 +1374,11 @@ select array_pop_back(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeList(Int64)')) ---- [1, 2, 3, 4] [h, e, l, l] +query ?? +select array_pop_back(arrow_cast(make_array(1, 2, 3, 4, 5), 'FixedSizeList(5, Int64)')), array_pop_back(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'FixedSizeList(5, Utf8)')); +---- +[1, 2, 3, 4] [h, e, l, l] + # array_pop_back scalar function #2 (after array_pop_back, array is empty) query ? select array_pop_back(make_array(1)); @@ -1353,6 +1390,11 @@ select array_pop_back(arrow_cast(make_array(1), 'LargeList(Int64)')); ---- [] +query ? +select array_pop_back(arrow_cast(make_array(1), 'FixedSizeList(1, Int64)')); +---- +[] + # array_pop_back scalar function #3 (array_pop_back the empty array) query ? select array_pop_back(array_pop_back(make_array(1))); @@ -1364,12 +1406,27 @@ select array_pop_back(array_pop_back(arrow_cast(make_array(1), 'LargeList(Int64) ---- [] +query ? +select array_pop_back(array_pop_back(arrow_cast(make_array(1), 'FixedSizeList(1, Int64)'))); +---- +[] + # array_pop_back scalar function #4 (array_pop_back the arrays which have NULL) query ?? select array_pop_back(make_array(1, 2, 3, 4, NULL)), array_pop_back(make_array(NULL, 'e', 'l', NULL, 'o')); ---- [1, 2, 3, 4] [, e, l, ] +query ?? +select array_pop_back(arrow_cast(make_array(1, 2, 3, 4, NULL), 'LargeList(Int64)')), array_pop_back(arrow_cast(make_array(NULL, 'e', 'l', NULL, 'o'), 'LargeList(Utf8)')); +---- +[1, 2, 3, 4] [, e, l, ] + +query ?? +select array_pop_back(arrow_cast(make_array(1, 2, 3, 4, NULL), 'FixedSizeList(5, Int64)')), array_pop_back(arrow_cast(make_array(NULL, 'e', 'l', NULL, 'o'), 'FixedSizeList(5, Utf8)')); +---- +[1, 2, 3, 4] [, e, l, ] + # array_pop_back scalar function #5 (array_pop_back the nested arrays) query ? select array_pop_back(make_array(make_array(1, 2, 3), make_array(2, 9, 1), make_array(7, 8, 9), make_array(1, 2, 3), make_array(1, 7, 4), make_array(4, 5, 6))); @@ -1381,6 +1438,11 @@ select array_pop_back(arrow_cast(make_array(make_array(1, 2, 3), make_array(2, 9 ---- [[1, 2, 3], [2, 9, 1], [7, 8, 9], [1, 2, 3], [1, 7, 4]] +query ? +select array_pop_back(arrow_cast(make_array(make_array(1, 2, 3), make_array(2, 9, 1), make_array(7, 8, 9), make_array(1, 2, 3), make_array(1, 7, 4), make_array(4, 5, 6)), 'FixedSizeList(6, List(Int64))')); +---- +[[1, 2, 3], [2, 9, 1], [7, 8, 9], [1, 2, 3], [1, 7, 4]] + # array_pop_back scalar function #6 (array_pop_back the nested arrays with NULL) query ? select array_pop_back(make_array(make_array(1, 2, 3), make_array(2, 9, 1), make_array(7, 8, 9), make_array(1, 2, 3), make_array(1, 7, 4), NULL)); @@ -1392,6 +1454,11 @@ select array_pop_back(arrow_cast(make_array(make_array(1, 2, 3), make_array(2, 9 ---- [[1, 2, 3], [2, 9, 1], [7, 8, 9], [1, 2, 3], [1, 7, 4]] +query ? +select array_pop_back(arrow_cast(make_array(make_array(1, 2, 3), make_array(2, 9, 1), make_array(7, 8, 9), make_array(1, 2, 3), make_array(1, 7, 4), NULL), 'FixedSizeList(6, List(Int64))')); +---- +[[1, 2, 3], [2, 9, 1], [7, 8, 9], [1, 2, 3], [1, 7, 4]] + # array_pop_back scalar function #7 (array_pop_back the nested arrays with NULL) query ? select array_pop_back(make_array(make_array(1, 2, 3), make_array(2, 9, 1), make_array(7, 8, 9), NULL, make_array(1, 7, 4))); @@ -1403,6 +1470,11 @@ select array_pop_back(arrow_cast(make_array(make_array(1, 2, 3), make_array(2, 9 ---- [[1, 2, 3], [2, 9, 1], [7, 8, 9], ] +query ? +select array_pop_back(arrow_cast(make_array(make_array(1, 2, 3), make_array(2, 9, 1), make_array(7, 8, 9), NULL, make_array(1, 7, 4)), 'FixedSizeList(5, List(Int64))')); +---- +[[1, 2, 3], [2, 9, 1], [7, 8, 9], ] + # array_pop_back scalar function #8 (after array_pop_back, nested array is empty) query ? select array_pop_back(make_array(make_array(1, 2, 3))); @@ -1414,6 +1486,11 @@ select array_pop_back(arrow_cast(make_array(make_array(1, 2, 3)), 'LargeList(Lis ---- [] +query ? +select array_pop_back(arrow_cast(make_array(make_array(1, 2, 3)), 'FixedSizeList(1, List(Int64))')); +---- +[] + # array_pop_back with columns query ? select array_pop_back(column1) from arrayspop; @@ -1435,6 +1512,26 @@ select array_pop_back(arrow_cast(column1, 'LargeList(Int64)')) from arrayspop; [] [, 10, 11] +query ? +select array_pop_back(column1) from large_arrayspop; +---- +[1, 2] +[3, 4, 5] +[6, 7, 8, ] +[, ] +[] +[, 10, 11] + +query ? +select array_pop_back(arrow_cast(column1, 'LargeList(Int64)')) from large_arrayspop; +---- +[1, 2] +[3, 4, 5] +[6, 7, 8, ] +[, ] +[] +[, 10, 11] + ## array_pop_front (aliases: `list_pop_front`) # array_pop_front scalar function with null @@ -1455,6 +1552,11 @@ select array_pop_front(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeList(Int64)') ---- [2, 3, 4, 5] [e, l, l, o] +query ?? +select array_pop_front(arrow_cast(make_array(1, 2, 3, 4, 5), 'FixedSizeList(5, Int64)')), array_pop_front(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'FixedSizeList(5, Utf8)')); +---- +[2, 3, 4, 5] [e, l, l, o] + # array_pop_front scalar function #2 (after array_pop_front, array is empty) query ? select array_pop_front(make_array(1)); @@ -1466,6 +1568,11 @@ select array_pop_front(arrow_cast(make_array(1), 'LargeList(Int64)')); ---- [] +query ? +select array_pop_front(arrow_cast(make_array(1), 'FixedSizeList(1, Int64)')); +---- +[] + # array_pop_front scalar function #3 (array_pop_front the empty array) query ? select array_pop_front(array_pop_front(make_array(1))); @@ -1477,6 +1584,11 @@ select array_pop_front(array_pop_front(arrow_cast(make_array(1), 'LargeList(Int6 ---- [] +query ? +select array_pop_front(array_pop_front(arrow_cast(make_array(1), 'FixedSizeList(1, Int64)'))); +---- +[] + # array_pop_front scalar function #5 (array_pop_front the nested arrays) query ? select array_pop_front(make_array(make_array(1, 2, 3), make_array(2, 9, 1), make_array(7, 8, 9), make_array(1, 2, 3), make_array(1, 7, 4), make_array(4, 5, 6))); @@ -1488,6 +1600,11 @@ select array_pop_front(arrow_cast(make_array(make_array(1, 2, 3), make_array(2, ---- [[2, 9, 1], [7, 8, 9], [1, 2, 3], [1, 7, 4], [4, 5, 6]] +query ? +select array_pop_front(arrow_cast(make_array(make_array(1, 2, 3), make_array(2, 9, 1), make_array(7, 8, 9), make_array(1, 2, 3), make_array(1, 7, 4), make_array(4, 5, 6)), 'FixedSizeList(6, List(Int64))')); +---- +[[2, 9, 1], [7, 8, 9], [1, 2, 3], [1, 7, 4], [4, 5, 6]] + # array_pop_front scalar function #6 (array_pop_front the nested arrays with NULL) query ? select array_pop_front(make_array(NULL, make_array(1, 2, 3), make_array(2, 9, 1), make_array(7, 8, 9), make_array(1, 2, 3), make_array(1, 7, 4))); @@ -1499,6 +1616,11 @@ select array_pop_front(arrow_cast(make_array(NULL, make_array(1, 2, 3), make_arr ---- [[1, 2, 3], [2, 9, 1], [7, 8, 9], [1, 2, 3], [1, 7, 4]] +query ? +select array_pop_front(arrow_cast(make_array(NULL, make_array(1, 2, 3), make_array(2, 9, 1), make_array(7, 8, 9), make_array(1, 2, 3), make_array(1, 7, 4)), 'FixedSizeList(6, List(Int64))')); +---- +[[1, 2, 3], [2, 9, 1], [7, 8, 9], [1, 2, 3], [1, 7, 4]] + # array_pop_front scalar function #8 (after array_pop_front, nested array is empty) query ? select array_pop_front(make_array(make_array(1, 2, 3))); @@ -1510,6 +1632,11 @@ select array_pop_front(arrow_cast(make_array(make_array(1, 2, 3)), 'LargeList(Li ---- [] +query ? +select array_pop_front(arrow_cast(make_array(make_array(1, 2, 3)), 'FixedSizeList(1, List(Int64))')); +---- +[] + ## array_slice (aliases: list_slice) # array_slice scalar function #1 (with positive indexes) @@ -1909,6 +2036,17 @@ select ---- [4] [] [1, , 3, 4] [, , 1] +#TODO: https://github.com/apache/arrow-datafusion/issues/9158 +#query ???? +#select +# array_append(arrow_cast(make_array(), 'FixedSizeList(1, Null)'), 4), +# array_append(arrow_cast(make_array(), 'FixedSizeList(1, Null)'), null), +# array_append(arrow_cast(make_array(1, null, 3), 'FixedSizeList(3, Int64)'), 4), +# array_append(arrow_cast(make_array(null, null), 'FixedSizeList(2, Null)'), 1) +#; +#---- +#[4] [] [1, , 3, 4] [, , 1] + # test invalid (non-null) query error select array_append(1, 2); @@ -1933,6 +2071,13 @@ select ---- [[1, , 3], []] [[1, , 3], ] +query ?? +select + array_append(arrow_cast(make_array(make_array(1, null, 3)), 'FixedSizeList(1, List(Int64))'), [null]), + array_append(arrow_cast(make_array(make_array(1, null, 3)), 'FixedSizeList(1, List(Int64))'), null); +---- +[[1, , 3], []] [[1, , 3], ] + # array_append scalar function #3 query ??? select array_append(make_array(1, 2, 3), 4), array_append(make_array(1.0, 2.0, 3.0), 4.0), array_append(make_array('h', 'e', 'l', 'l'), 'o'); @@ -1940,7 +2085,12 @@ select array_append(make_array(1, 2, 3), 4), array_append(make_array(1.0, 2.0, 3 [1, 2, 3, 4] [1.0, 2.0, 3.0, 4.0] [h, e, l, l, o] query ??? -select array_append(arrow_cast(make_array(1, 2, 3), 'LargeList(Int64)'), 4), array_append(arrow_cast(make_array(1.0, 2.0, 3.0), 'LargeList(Float64)'), 4.0), array_append(make_array('h', 'e', 'l', 'l'), 'o'); +select array_append(arrow_cast(make_array(1, 2, 3), 'LargeList(Int64)'), 4), array_append(arrow_cast(make_array(1.0, 2.0, 3.0), 'LargeList(Float64)'), 4.0), array_append(arrow_cast(make_array('h', 'e', 'l', 'l'), 'LargeList(Utf8)'), 'o'); +---- +[1, 2, 3, 4] [1.0, 2.0, 3.0, 4.0] [h, e, l, l, o] + +query ??? +select array_append(arrow_cast(make_array(1, 2, 3), 'FixedSizeList(3, Int64)'), 4), array_append(arrow_cast(make_array(1.0, 2.0, 3.0), 'FixedSizeList(3, Float64)'), 4.0), array_append(arrow_cast(make_array('h', 'e', 'l', 'l'), 'FixedSizeList(4, Utf8)'), 'o'); ---- [1, 2, 3, 4] [1.0, 2.0, 3.0, 4.0] [h, e, l, l, o] @@ -1955,6 +2105,11 @@ select array_append(arrow_cast(make_array([1], [2], [3]), 'LargeList(LargeList(I ---- [[1], [2], [3], [4]] [[1.0], [2.0], [3.0], [4.0]] [[h], [e], [l], [l], [o]] +query ??? +select array_append(arrow_cast(make_array([1], [2], [3]), 'FixedSizeList(3, List(Int64))'), [4]), array_append(arrow_cast(make_array([1.0], [2.0], [3.0]), 'FixedSizeList(3, List(Float64))'), [4.0]), array_append(arrow_cast(make_array(['h'], ['e'], ['l'], ['l']), 'FixedSizeList(4, List(Utf8))'), ['o']); +---- +[[1], [2], [3], [4]] [[1.0], [2.0], [3.0], [4.0]] [[h], [e], [l], [l], [o]] + # list_append scalar function #5 (function alias `array_append`) query ??? select list_append(make_array(1, 2, 3), 4), list_append(make_array(1.0, 2.0, 3.0), 4.0), list_append(make_array('h', 'e', 'l', 'l'), 'o'); @@ -2013,6 +2168,18 @@ select array_append(column1, column2) from large_arrays_values; [51, 52, , 54, 55, 56, 57, 58, 59, 60, 55] [61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 66] +query ? +select array_append(column1, column2) from fixed_arrays_values; +---- +[, 2, 3, 4, 5, 6, 7, 8, 9, 10, 1] +[11, 12, 13, 14, 15, 16, 17, 18, , 20, 12] +[21, 22, 23, , 25, 26, 27, 28, 29, 30, 23] +[31, 32, 33, 34, 35, , 37, 38, 39, 40, 34] +[, , , , , , , , , , 44] +[41, 42, 43, 44, 45, 46, 47, 48, 49, 50, ] +[51, 52, , 54, 55, 56, 57, 58, 59, 60, 55] +[61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 66] + # array_append with columns #2 (element is list) query ? select array_append(column1, column2) from nested_arrays; @@ -2026,6 +2193,12 @@ select array_append(column1, column2) from large_nested_arrays; [[1, 2, 3], [2, 9, 1], [7, 8, 9], [1, 2, 3], [1, 7, 4], [4, 5, 6], [7, 8, 9]] [[4, 5, 6], [10, 11, 12], [4, 9, 8], [7, 8, 9], [10, 11, 12], [1, 8, 7], [10, 11, 12]] +query ? +select array_append(column1, column2) from fixed_size_nested_arrays; +---- +[[1, 2, 3], [2, 9, 1], [7, 8, 9], [1, 2, 3], [1, 7, 4], [4, 5, 6], [7, 8, 9]] +[[4, 5, 6], [10, 11, 12], [4, 9, 8], [7, 8, 9], [10, 11, 12], [1, 8, 7], [10, 11, 12]] + # array_append with columns and scalars #1 query ?? select array_append(column2, 100.1), array_append(column3, '.') from arrays; @@ -2049,6 +2222,17 @@ select array_append(column2, 100.1), array_append(column3, '.') from large_array [100.1] [,, .] [16.6, 17.7, 18.8, 100.1] [.] +query ?? +select array_append(column2, 100.1), array_append(column3, '.') from fixed_size_arrays; +---- +[1.1, 2.2, 3.3, 100.1] [L, o, r, e, m, .] +[, 5.5, 6.6, 100.1] [i, p, , u, m, .] +[7.7, 8.8, 9.9, 100.1] [d, , l, o, r, .] +[10.1, , 12.2, 100.1] [s, i, t, a, b, .] +[13.3, 14.4, 15.5, 100.1] [a, m, e, t, x, .] +[, , , 100.1] [,, a, b, c, d, .] +[16.6, 17.7, 18.8, 100.1] [, , , , , .] + # array_append with columns and scalars #2 query ?? select array_append(column1, make_array(1, 11, 111)), array_append(make_array(make_array(1, 2, 3), make_array(11, 12, 13)), column2) from nested_arrays; @@ -2062,6 +2246,12 @@ select array_append(column1, arrow_cast(make_array(1, 11, 111), 'LargeList(Int64 [[1, 2, 3], [2, 9, 1], [7, 8, 9], [1, 2, 3], [1, 7, 4], [4, 5, 6], [1, 11, 111]] [[1, 2, 3], [11, 12, 13], [7, 8, 9]] [[4, 5, 6], [10, 11, 12], [4, 9, 8], [7, 8, 9], [10, 11, 12], [1, 8, 7], [1, 11, 111]] [[1, 2, 3], [11, 12, 13], [10, 11, 12]] +query ?? +select array_append(column1, arrow_cast(make_array(1, 11, 111), 'FixedSizeList(3, Int64)')), array_append(arrow_cast(make_array(make_array(1, 2, 3), make_array(11, 12, 13)), 'FixedSizeList(2, List(Int64))'), column2) from fixed_size_nested_arrays; +---- +[[1, 2, 3], [2, 9, 1], [7, 8, 9], [1, 2, 3], [1, 7, 4], [4, 5, 6], [1, 11, 111]] [[1, 2, 3], [11, 12, 13], [7, 8, 9]] +[[4, 5, 6], [10, 11, 12], [4, 9, 8], [7, 8, 9], [10, 11, 12], [1, 8, 7], [1, 11, 111]] [[1, 2, 3], [11, 12, 13], [10, 11, 12]] + ## array_prepend (aliases: `list_prepend`, `array_push_front`, `list_push_front`) # array_prepend with NULLs @@ -2211,6 +2401,18 @@ select array_prepend(column2, column1) from large_arrays_values; [55, 51, 52, , 54, 55, 56, 57, 58, 59, 60] [66, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70] +query ? +select array_prepend(column2, column1) from fixed_arrays_values; +---- +[1, , 2, 3, 4, 5, 6, 7, 8, 9, 10] +[12, 11, 12, 13, 14, 15, 16, 17, 18, , 20] +[23, 21, 22, 23, , 25, 26, 27, 28, 29, 30] +[34, 31, 32, 33, 34, 35, , 37, 38, 39, 40] +[44, , , , , , , , , , ] +[, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50] +[55, 51, 52, , 54, 55, 56, 57, 58, 59, 60] +[66, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70] + # array_prepend with columns #2 (element is list) query ? select array_prepend(column2, column1) from nested_arrays; @@ -2224,6 +2426,12 @@ select array_prepend(column2, column1) from large_nested_arrays; [[7, 8, 9], [1, 2, 3], [2, 9, 1], [7, 8, 9], [1, 2, 3], [1, 7, 4], [4, 5, 6]] [[10, 11, 12], [4, 5, 6], [10, 11, 12], [4, 9, 8], [7, 8, 9], [10, 11, 12], [1, 8, 7]] +query ? +select array_prepend(column2, column1) from fixed_size_nested_arrays; +---- +[[7, 8, 9], [1, 2, 3], [2, 9, 1], [7, 8, 9], [1, 2, 3], [1, 7, 4], [4, 5, 6]] +[[10, 11, 12], [4, 5, 6], [10, 11, 12], [4, 9, 8], [7, 8, 9], [10, 11, 12], [1, 8, 7]] + # array_prepend with columns and scalars #1 query ?? select array_prepend(100.1, column2), array_prepend('.', column3) from arrays; @@ -2247,6 +2455,17 @@ select array_prepend(100.1, column2), array_prepend('.', column3) from large_arr [100.1] [., ,] [100.1, 16.6, 17.7, 18.8] [.] +query ?? +select array_prepend(100.1, column2), array_prepend('.', column3) from fixed_size_arrays; +---- +[100.1, 1.1, 2.2, 3.3] [., L, o, r, e, m] +[100.1, , 5.5, 6.6] [., i, p, , u, m] +[100.1, 7.7, 8.8, 9.9] [., d, , l, o, r] +[100.1, 10.1, , 12.2] [., s, i, t, a, b] +[100.1, 13.3, 14.4, 15.5] [., a, m, e, t, x] +[100.1, , , ] [., ,, a, b, c, d] +[100.1, 16.6, 17.7, 18.8] [., , , , , ] + # array_prepend with columns and scalars #2 (element is list) query ?? select array_prepend(make_array(1, 11, 111), column1), array_prepend(column2, make_array(make_array(1, 2, 3), make_array(11, 12, 13))) from nested_arrays; @@ -2260,6 +2479,12 @@ select array_prepend(arrow_cast(make_array(1, 11, 111), 'LargeList(Int64)'), col [[1, 11, 111], [1, 2, 3], [2, 9, 1], [7, 8, 9], [1, 2, 3], [1, 7, 4], [4, 5, 6]] [[7, 8, 9], [1, 2, 3], [11, 12, 13]] [[1, 11, 111], [4, 5, 6], [10, 11, 12], [4, 9, 8], [7, 8, 9], [10, 11, 12], [1, 8, 7]] [[10, 11, 12], [1, 2, 3], [11, 12, 13]] +query ?? +select array_prepend(arrow_cast(make_array(1, 11, 111), 'FixedSizeList(3, Int64)'), column1), array_prepend(column2, arrow_cast(make_array(make_array(1, 2, 3), make_array(11, 12, 13)), 'FixedSizeList(2, List(Int64))')) from fixed_size_nested_arrays; +---- +[[1, 11, 111], [1, 2, 3], [2, 9, 1], [7, 8, 9], [1, 2, 3], [1, 7, 4], [4, 5, 6]] [[7, 8, 9], [1, 2, 3], [11, 12, 13]] +[[1, 11, 111], [4, 5, 6], [10, 11, 12], [4, 9, 8], [7, 8, 9], [10, 11, 12], [1, 8, 7]] [[10, 11, 12], [1, 2, 3], [11, 12, 13]] + ## array_repeat (aliases: `list_repeat`) # array_repeat scalar function #1 @@ -3881,6 +4106,13 @@ select array_remove(make_array(1, 2, 2, 1, 1), 2), array_remove(make_array(1.0, ---- [1, 2, 1, 1] [2.0, 2.0, 1.0, 1.0] [h, e, l, o] +query ??? +select array_remove(arrow_cast(make_array(1, 2, 2, 1, 1), 'LargeList(Int64)'), 2), + array_remove(arrow_cast(make_array(1.0, 2.0, 2.0, 1.0, 1.0), 'LargeList(Float64)'), 1.0), + array_remove(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'LargeList(Utf8)'), 'l'); +---- +[1, 2, 1, 1] [2.0, 2.0, 1.0, 1.0] [h, e, l, o] + query ??? select array_remove(arrow_cast(make_array(1, 2, 2, 1, 1), 'FixedSizeList(5, Int64)'), 2), array_remove(arrow_cast(make_array(1.0, 2.0, 2.0, 1.0, 1.0), 'FixedSizeList(5, Float64)'), 1.0), @@ -3896,6 +4128,14 @@ select ---- [1, , 3] [, 2.2, 3.3] [, bc] +query ??? +select + array_remove(arrow_cast(make_array(1, null, 2, 3), 'LargeList(Int64)'), 2), + array_remove(arrow_cast(make_array(1.1, null, 2.2, 3.3), 'LargeList(Float64)'), 1.1), + array_remove(arrow_cast(make_array('a', null, 'bc'), 'LargeList(Utf8)'), 'a'); +---- +[1, , 3] [, 2.2, 3.3] [, bc] + query ??? select array_remove(arrow_cast(make_array(1, null, 2, 3), 'FixedSizeList(4, Int64)'), 2), @@ -3918,12 +4158,32 @@ select ---- [1, 2] [1, 2, ] +query ?? +select + array_remove(arrow_cast(make_array(1, null, 2), 'LargeList(Int64)'), null), + array_remove(arrow_cast(make_array(1, null, 2, null), 'LargeList(Int64)'), null); +---- +[1, 2] [1, 2, ] + +query ?? +select + array_remove(arrow_cast(make_array(1, null, 2), 'FixedSizeList(3, Int64)'), null), + array_remove(arrow_cast(make_array(1, null, 2, null), 'FixedSizeList(4, Int64)'), null); +---- +[1, 2] [1, 2, ] + # array_remove scalar function #2 (element is list) query ?? select array_remove(make_array([1, 2, 3], [4, 5, 6], [5, 5, 5], [4, 5, 6], [7, 8, 9]), [4, 5, 6]), array_remove(make_array([1, 3, 2], [2, 3, 4], [2, 3, 4], [5, 3, 1], [1, 3, 2]), [2, 3, 4]); ---- [[1, 2, 3], [5, 5, 5], [4, 5, 6], [7, 8, 9]] [[1, 3, 2], [2, 3, 4], [5, 3, 1], [1, 3, 2]] +query ?? +select array_remove(arrow_cast(make_array([1, 2, 3], [4, 5, 6], [5, 5, 5], [4, 5, 6], [7, 8, 9]), 'LargeList(List(Int64))'), [4, 5, 6]), + array_remove(arrow_cast(make_array([1, 3, 2], [2, 3, 4], [2, 3, 4], [5, 3, 1], [1, 3, 2]), 'LargeList(List(Int64))'), [2, 3, 4]); +---- +[[1, 2, 3], [5, 5, 5], [4, 5, 6], [7, 8, 9]] [[1, 3, 2], [2, 3, 4], [5, 3, 1], [1, 3, 2]] + query ?? select array_remove(arrow_cast(make_array([1, 2, 3], [4, 5, 6], [5, 5, 5], [4, 5, 6], [7, 8, 9]), 'FixedSizeList(5, List(Int64))'), [4, 5, 6]), array_remove(arrow_cast(make_array([1, 3, 2], [2, 3, 4], [2, 3, 4], [5, 3, 1], [1, 3, 2]), 'FixedSizeList(5, List(Int64))'), [2, 3, 4]); @@ -3951,6 +4211,14 @@ select array_remove(column1, column2) from arrays_with_repeating_elements; [7, 7, 8, 7, 9, 7, 8, 7, 7] [11, 12, 10, 11, 12, 10, 11, 12, 10] +query ? +select array_remove(column1, column2) from large_arrays_with_repeating_elements; +---- +[1, 1, 3, 2, 2, 1, 3, 2, 3] +[4, 5, 5, 6, 5, 5, 5, 4, 4] +[7, 7, 8, 7, 9, 7, 8, 7, 7] +[11, 12, 10, 11, 12, 10, 11, 12, 10] + query ? select array_remove(column1, column2) from fixed_arrays_with_repeating_elements; ---- @@ -3968,6 +4236,14 @@ select array_remove(column1, column2) from nested_arrays_with_repeating_elements [[19, 20, 21], [19, 20, 21], [22, 23, 24], [19, 20, 21], [25, 26, 27], [19, 20, 21], [22, 23, 24], [19, 20, 21], [19, 20, 21]] [[31, 32, 33], [34, 35, 36], [28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30]] +query ? +select array_remove(column1, column2) from large_nested_arrays_with_repeating_elements; +---- +[[1, 2, 3], [1, 2, 3], [7, 8, 9], [4, 5, 6], [4, 5, 6], [1, 2, 3], [7, 8, 9], [4, 5, 6], [7, 8, 9]] +[[10, 11, 12], [13, 14, 15], [13, 14, 15], [16, 17, 18], [13, 14, 15], [13, 14, 15], [13, 14, 15], [10, 11, 12], [10, 11, 12]] +[[19, 20, 21], [19, 20, 21], [22, 23, 24], [19, 20, 21], [25, 26, 27], [19, 20, 21], [22, 23, 24], [19, 20, 21], [19, 20, 21]] +[[31, 32, 33], [34, 35, 36], [28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30]] + query ? select array_remove(column1, column2) from fixed_size_nested_arrays_with_repeating_elements; ---- @@ -3985,6 +4261,14 @@ select array_remove(make_array(1, 2, 2, 4, 5, 4, 4, 7, 7, 10, 7, 8), column2), a [1, 2, 2, 4, 5, 4, 4, 7, 10, 7, 8] [7, 7, 7, 8, 7, 9, 7, 8, 7, 7] [1, 2, 2, 4, 5, 4, 4, 7, 7, 7, 8] [10, 11, 12, 10, 11, 12, 10, 11, 12, 10] +query ?? +select array_remove(make_array(1, 2, 2, 4, 5, 4, 4, 7, 7, 10, 7, 8), column2), array_remove(column1, 1) from large_arrays_with_repeating_elements; +---- +[1, 2, 4, 5, 4, 4, 7, 7, 10, 7, 8] [2, 1, 3, 2, 2, 1, 3, 2, 3] +[1, 2, 2, 5, 4, 4, 7, 7, 10, 7, 8] [4, 4, 5, 5, 6, 5, 5, 5, 4, 4] +[1, 2, 2, 4, 5, 4, 4, 7, 10, 7, 8] [7, 7, 7, 8, 7, 9, 7, 8, 7, 7] +[1, 2, 2, 4, 5, 4, 4, 7, 7, 7, 8] [10, 11, 12, 10, 11, 12, 10, 11, 12, 10] + query ?? select array_remove(make_array(1, 2, 2, 4, 5, 4, 4, 7, 7, 10, 7, 8), column2), array_remove(column1, 1) from fixed_arrays_with_repeating_elements; ---- @@ -4003,6 +4287,15 @@ select array_remove(make_array([1, 2, 3], [4, 5, 6], [4, 5, 6], [10, 11, 12], [1 [[1, 2, 3], [4, 5, 6], [4, 5, 6], [10, 11, 12], [13, 14, 15], [10, 11, 12], [10, 11, 12], [19, 20, 21], [28, 29, 30], [19, 20, 21], [22, 23, 24]] [[19, 20, 21], [19, 20, 21], [19, 20, 21], [22, 23, 24], [19, 20, 21], [25, 26, 27], [19, 20, 21], [22, 23, 24], [19, 20, 21], [19, 20, 21]] [[1, 2, 3], [4, 5, 6], [4, 5, 6], [10, 11, 12], [13, 14, 15], [10, 11, 12], [10, 11, 12], [19, 20, 21], [19, 20, 21], [19, 20, 21], [22, 23, 24]] [[28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30]] +query ?? +select array_remove(make_array([1, 2, 3], [4, 5, 6], [4, 5, 6], [10, 11, 12], [13, 14, 15], [10, 11, 12], [10, 11, 12], [19, 20, 21], [19, 20, 21], [28, 29, 30], [19, 20, 21], [22, 23, 24]), column2), + array_remove(column1, make_array(1, 2, 3)) from large_nested_arrays_with_repeating_elements; +---- +[[1, 2, 3], [4, 5, 6], [10, 11, 12], [13, 14, 15], [10, 11, 12], [10, 11, 12], [19, 20, 21], [19, 20, 21], [28, 29, 30], [19, 20, 21], [22, 23, 24]] [[4, 5, 6], [1, 2, 3], [7, 8, 9], [4, 5, 6], [4, 5, 6], [1, 2, 3], [7, 8, 9], [4, 5, 6], [7, 8, 9]] +[[1, 2, 3], [4, 5, 6], [4, 5, 6], [13, 14, 15], [10, 11, 12], [10, 11, 12], [19, 20, 21], [19, 20, 21], [28, 29, 30], [19, 20, 21], [22, 23, 24]] [[10, 11, 12], [10, 11, 12], [13, 14, 15], [13, 14, 15], [16, 17, 18], [13, 14, 15], [13, 14, 15], [13, 14, 15], [10, 11, 12], [10, 11, 12]] +[[1, 2, 3], [4, 5, 6], [4, 5, 6], [10, 11, 12], [13, 14, 15], [10, 11, 12], [10, 11, 12], [19, 20, 21], [28, 29, 30], [19, 20, 21], [22, 23, 24]] [[19, 20, 21], [19, 20, 21], [19, 20, 21], [22, 23, 24], [19, 20, 21], [25, 26, 27], [19, 20, 21], [22, 23, 24], [19, 20, 21], [19, 20, 21]] +[[1, 2, 3], [4, 5, 6], [4, 5, 6], [10, 11, 12], [13, 14, 15], [10, 11, 12], [10, 11, 12], [19, 20, 21], [19, 20, 21], [19, 20, 21], [22, 23, 24]] [[28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30]] + query ?? select array_remove(make_array([1, 2, 3], [4, 5, 6], [4, 5, 6], [10, 11, 12], [13, 14, 15], [10, 11, 12], [10, 11, 12], [19, 20, 21], [19, 20, 21], [28, 29, 30], [19, 20, 21], [22, 23, 24]), column2), array_remove(column1, make_array(1, 2, 3)) from fixed_size_nested_arrays_with_repeating_elements; @@ -5571,6 +5864,16 @@ from large_flatten_table; [1, 2, 3] [1, 2, 3, 4, 5, 6] [1, 2, 3] [1.0, 2.1, 2.2, 3.2, 3.3, 3.4] [1, 2, 3, 4, 5, 6] [8] [1, 2, 3] [1.0, 2.0, 3.0, 4.0, 5.0, 6.0] +query ???? +select flatten(column1), + flatten(column2), + flatten(column3), + flatten(column4) +from fixed_size_flatten_table; +---- +[1, 2, 3] [1, 2, 3, 4, 5, 6] [1, 2, 3] [1.0, 2.1, 2.2, 3.2, 3.3, 3.4] +[1, 2, 3, 4, 5, 6] [8, 9, 10, 11, 12, 13] [1, 2, 3] [1.0, 2.0, 3.0, 4.0, 5.0, 6.0] + ## empty # empty scalar function #1 query B @@ -5583,6 +5886,11 @@ select empty(arrow_cast(make_array(1), 'LargeList(Int64)')); ---- false +query B +select empty(arrow_cast(make_array(1), 'FixedSizeList(1, Int64)')); +---- +false + # empty scalar function #2 query B select empty(make_array()); @@ -5594,6 +5902,12 @@ select empty(arrow_cast(make_array(), 'LargeList(Null)')); ---- true +#TODO: https://github.com/apache/arrow-datafusion/issues/9158 +#query B +#select empty(arrow_cast(make_array(), 'FixedSizeList(0, Null)')); +#---- +#true + # empty scalar function #3 query B select empty(make_array(NULL)); @@ -5605,6 +5919,11 @@ select empty(arrow_cast(make_array(NULL), 'LargeList(Null)')); ---- false +query B +select empty(arrow_cast(make_array(NULL), 'FixedSizeList(1, Null)')); +---- +false + # empty scalar function #4 query B select empty(NULL); @@ -5634,6 +5953,17 @@ NULL false false +query B +select empty(column1) from fixed_size_arrays; +---- +false +false +false +false +NULL +false +false + query ? SELECT string_to_array('abcxxxdef', 'xxx') ---- @@ -5833,6 +6163,9 @@ drop table fixed_slices; statement ok drop table arrayspop; +statement ok +drop table large_arrayspop; + statement ok drop table arrays_values; @@ -5953,6 +6286,9 @@ drop table flatten_table; statement ok drop table large_flatten_table; +statement ok +drop table fixed_size_flatten_table; + statement ok drop table arrays_values_without_nulls; From 1d9f35e9538cc9d865649ace7339072f86074950 Mon Sep 17 00:00:00 2001 From: Weijun-H Date: Thu, 22 Feb 2024 22:23:45 +0800 Subject: [PATCH 15/21] remove unreacheable null check --- datafusion/physical-expr/src/array_expressions.rs | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/datafusion/physical-expr/src/array_expressions.rs b/datafusion/physical-expr/src/array_expressions.rs index 32702cdb0440..fb34f351a9bd 100644 --- a/datafusion/physical-expr/src/array_expressions.rs +++ b/datafusion/physical-expr/src/array_expressions.rs @@ -86,9 +86,7 @@ fn compare_element_to_list( row_index: usize, eq: bool, ) -> Result { - if list_array_row.data_type() != element_array.data_type() - && !element_array.data_type().is_null() - { + if list_array_row.data_type() != element_array.data_type() { return exec_err!( "compare_element_to_list received incompatible types: '{:?}' and '{:?}'.", list_array_row.data_type(), @@ -435,7 +433,6 @@ pub fn array_element(args: &[ArrayRef]) -> Result { let indexes = as_int64_array(&args[1])?; general_array_element::(array, indexes) } - DataType::Null => Ok(args[0].clone()), _ => exec_err!( "array_element does not support type: {:?}", args[0].data_type() From cb375284681a241abe116f5df258fe19342431bf Mon Sep 17 00:00:00 2001 From: Weijun-H Date: Thu, 22 Feb 2024 23:01:21 +0800 Subject: [PATCH 16/21] simplify the code --- datafusion/expr/src/signature.rs | 23 ++++++----------------- 1 file changed, 6 insertions(+), 17 deletions(-) diff --git a/datafusion/expr/src/signature.rs b/datafusion/expr/src/signature.rs index cfd9d29b9f55..e66a6e53b683 100644 --- a/datafusion/expr/src/signature.rs +++ b/datafusion/expr/src/signature.rs @@ -209,9 +209,8 @@ impl ArrayFunctionSignature { if array_type.eq(&DataType::Null) { if allow_null_coercion { return Ok(vec![vec![array_type.clone(), elem_type.clone()]]); - } else { - return Ok(vec![vec![]]); } + return Ok(vec![vec![]]); } // We need to find the coerced base type, mainly for cases like: @@ -253,22 +252,12 @@ impl ArrayFunctionSignature { if current_types.len() != 2 { return Ok(vec![vec![]]); } - let array_type = ¤t_types[0]; - - if array_type.eq(&DataType::Null) && !allow_null_coercion { - return Ok(vec![vec![]]); - } - - match array_type { - DataType::List(_) - | DataType::LargeList(_) - | DataType::FixedSizeList(_, _) => { - let array_type = coerced_fixed_size_list_to_list(array_type); - Ok(vec![vec![array_type, DataType::Int64]]) - } - DataType::Null => Ok(vec![vec![array_type.clone(), DataType::Int64]]), - _ => Ok(vec![vec![]]), + let array_type = array(&[array_type.clone()], allow_null_coercion)?; + if array_type[0].is_empty() { + Ok(vec![vec![]]) + } else { + Ok(vec![vec![array_type[0][0].clone(), DataType::Int64]]) } } fn array( From a7f121e4d714cb0cfdb11da056bc6f3057bd6666 Mon Sep 17 00:00:00 2001 From: Weijun-H Date: Sun, 25 Feb 2024 22:37:15 +0800 Subject: [PATCH 17/21] remove null checking --- datafusion/expr/src/built_in_function.rs | 44 ++++------ datafusion/expr/src/signature.rs | 86 +++++++------------ .../physical-expr/src/array_expressions.rs | 11 --- datafusion/sqllogictest/test_files/array.slt | 79 +++++++++-------- 4 files changed, 90 insertions(+), 130 deletions(-) diff --git a/datafusion/expr/src/built_in_function.rs b/datafusion/expr/src/built_in_function.rs index e82ed9458f49..8b4e65121c79 100644 --- a/datafusion/expr/src/built_in_function.rs +++ b/datafusion/expr/src/built_in_function.rs @@ -924,63 +924,51 @@ impl BuiltinScalarFunction { Signature::variadic_any(self.volatility()) } BuiltinScalarFunction::ArrayAppend => { - Signature::array_and_element(false, self.volatility()) + Signature::array_and_element(self.volatility()) } BuiltinScalarFunction::MakeArray => { // 0 or more arguments of arbitrary type Signature::one_of(vec![VariadicEqual, Any(0)], self.volatility()) } - BuiltinScalarFunction::ArrayPopFront => { - Signature::array(true, self.volatility()) - } - BuiltinScalarFunction::ArrayPopBack => { - Signature::array(true, self.volatility()) - } + BuiltinScalarFunction::ArrayPopFront => Signature::array(self.volatility()), + BuiltinScalarFunction::ArrayPopBack => Signature::array(self.volatility()), BuiltinScalarFunction::ArrayConcat => { Signature::variadic_any(self.volatility()) } - BuiltinScalarFunction::ArrayDims => { - Signature::array(false, self.volatility()) - } - BuiltinScalarFunction::ArrayEmpty => { - Signature::array(true, self.volatility()) - } + BuiltinScalarFunction::ArrayDims => Signature::array(self.volatility()), + BuiltinScalarFunction::ArrayEmpty => Signature::array(self.volatility()), BuiltinScalarFunction::ArrayElement => { - Signature::array_and_index(true, self.volatility()) + Signature::array_and_index(self.volatility()) } BuiltinScalarFunction::ArrayExcept => Signature::any(2, self.volatility()), - BuiltinScalarFunction::Flatten => Signature::array(true, self.volatility()), + BuiltinScalarFunction::Flatten => Signature::array(self.volatility()), BuiltinScalarFunction::ArrayHasAll | BuiltinScalarFunction::ArrayHasAny => { Signature::any(2, self.volatility()) } BuiltinScalarFunction::ArrayHas => { - Signature::array_and_element(true, self.volatility()) + Signature::array_and_element(self.volatility()) } BuiltinScalarFunction::ArrayLength => { Signature::variadic_any(self.volatility()) } - BuiltinScalarFunction::ArrayNdims => { - Signature::array(false, self.volatility()) - } - BuiltinScalarFunction::ArrayDistinct => { - Signature::array(true, self.volatility()) - } + BuiltinScalarFunction::ArrayNdims => Signature::array(self.volatility()), + BuiltinScalarFunction::ArrayDistinct => Signature::array(self.volatility()), BuiltinScalarFunction::ArrayPosition => { Signature::array_and_element_and_optional_index(self.volatility()) } BuiltinScalarFunction::ArrayPositions => { - Signature::array_and_element(true, self.volatility()) + Signature::array_and_element(self.volatility()) } BuiltinScalarFunction::ArrayPrepend => { - Signature::element_and_array(false, self.volatility()) + Signature::element_and_array(self.volatility()) } BuiltinScalarFunction::ArrayRepeat => Signature::any(2, self.volatility()), BuiltinScalarFunction::ArrayRemove => { - Signature::array_and_element(true, self.volatility()) + Signature::array_and_element(self.volatility()) } BuiltinScalarFunction::ArrayRemoveN => Signature::any(3, self.volatility()), BuiltinScalarFunction::ArrayRemoveAll => { - Signature::array_and_element(true, self.volatility()) + Signature::array_and_element(self.volatility()) } BuiltinScalarFunction::ArrayReplace => Signature::any(3, self.volatility()), BuiltinScalarFunction::ArrayReplaceN => Signature::any(4, self.volatility()), @@ -994,9 +982,7 @@ impl BuiltinScalarFunction { BuiltinScalarFunction::ArrayIntersect => Signature::any(2, self.volatility()), BuiltinScalarFunction::ArrayUnion => Signature::any(2, self.volatility()), - BuiltinScalarFunction::Cardinality => { - Signature::array(false, self.volatility()) - } + BuiltinScalarFunction::Cardinality => Signature::array(self.volatility()), BuiltinScalarFunction::ArrayResize => { Signature::variadic_any(self.volatility()) } diff --git a/datafusion/expr/src/signature.rs b/datafusion/expr/src/signature.rs index e66a6e53b683..011f9ccae134 100644 --- a/datafusion/expr/src/signature.rs +++ b/datafusion/expr/src/signature.rs @@ -126,34 +126,29 @@ pub enum TypeSignature { #[derive(Debug, Clone, PartialEq, Eq, Hash)] pub enum ArrayFunctionSignature { /// Specialized Signature for ArrayAppend and similar functions - /// If `allow_null` is true, the function also accepts a single argument of type Null. /// The first argument should be List/LargeList/FixedSizedList, and the second argument should be non-list or list. /// The second argument's list dimension should be one dimension less than the first argument's list dimension. /// List dimension of the List/LargeList is equivalent to the number of List. /// List dimension of the non-list is 0. - ArrayAndElement(bool), + ArrayAndElement, /// Specialized Signature for ArrayPrepend and similar functions - /// If `allow_null` is true, the function also accepts a single argument of type Null. /// The first argument should be non-list or list, and the second argument should be List/LargeList. /// The first argument's list dimension should be one dimension less than the second argument's list dimension. - ElementAndArray(bool), + ElementAndArray, /// Specialized Signature for Array functions of the form (List/LargeList, Index) - /// If `allow_null` is true, the function also accepts a single argument of type Null. /// The first argument should be List/LargeList/FixedSizedList, and the second argument should be Int64. - ArrayAndIndex(bool), + ArrayAndIndex, /// Specialized Signature for Array functions of the form (List/LargeList, Element, Optional Index) ArrayAndElementAndOptionalIndex, /// Specialized Signature for ArrayEmpty and similar functions /// The function takes a single argument that must be a List/LargeList/FixedSizeList /// or something that can be coerced to one of those types. - /// If `allow_null` is true, the function also accepts a single argument of type Null. - Array(bool), + Array, } impl ArrayFunctionSignature { /// Arguments to ArrayFunctionSignature /// `current_types` - The data types of the arguments - /// `allow_null_coercion` - Whether null type coercion is allowed /// Returns the valid types for the function signature pub fn get_type_signature( &self, @@ -169,7 +164,7 @@ impl ArrayFunctionSignature { let first_two_types = ¤t_types[0..2]; let mut valid_types = - array_append_or_prepend_valid_types(first_two_types, true, true)?; + array_append_or_prepend_valid_types(first_two_types, true)?; // Early return if there are only 2 arguments if current_types.len() == 2 { @@ -193,7 +188,6 @@ impl ArrayFunctionSignature { fn array_append_or_prepend_valid_types( current_types: &[DataType], is_append: bool, - allow_null_coercion: bool, ) -> Result>> { if current_types.len() != 2 { return Ok(vec![vec![]]); @@ -207,9 +201,6 @@ impl ArrayFunctionSignature { // We follow Postgres on `array_append(Null, T)`, which is not valid. if array_type.eq(&DataType::Null) { - if allow_null_coercion { - return Ok(vec![vec![array_type.clone(), elem_type.clone()]]); - } return Ok(vec![vec![]]); } @@ -245,28 +236,20 @@ impl ArrayFunctionSignature { _ => Ok(vec![vec![]]), } } - fn array_and_index( - current_types: &[DataType], - allow_null_coercion: bool, - ) -> Result>> { + fn array_and_index(current_types: &[DataType]) -> Result>> { if current_types.len() != 2 { return Ok(vec![vec![]]); } let array_type = ¤t_types[0]; - let array_type = array(&[array_type.clone()], allow_null_coercion)?; + let array_type = array(&[array_type.clone()])?; if array_type[0].is_empty() { Ok(vec![vec![]]) } else { Ok(vec![vec![array_type[0][0].clone(), DataType::Int64]]) } } - fn array( - current_types: &[DataType], - allow_null_coercion: bool, - ) -> Result>> { - if current_types.len() != 1 - || (current_types[0].is_null() && !allow_null_coercion) - { + fn array(current_types: &[DataType]) -> Result>> { + if current_types.len() != 1 { return Ok(vec![vec![]]); } @@ -279,26 +262,21 @@ impl ArrayFunctionSignature { let array_type = coerced_fixed_size_list_to_list(array_type); Ok(vec![vec![array_type]]) } - DataType::Null => Ok(vec![vec![array_type.clone()]]), _ => Ok(vec![vec![]]), } } match self { - ArrayFunctionSignature::ArrayAndElement(allow_null) => { - array_append_or_prepend_valid_types(current_types, true, *allow_null) - } - ArrayFunctionSignature::ElementAndArray(allow_null) => { - array_append_or_prepend_valid_types(current_types, false, *allow_null) + ArrayFunctionSignature::ArrayAndElement => { + array_append_or_prepend_valid_types(current_types, true) } - ArrayFunctionSignature::ArrayAndIndex(allow_null) => { - array_and_index(current_types, *allow_null) + ArrayFunctionSignature::ElementAndArray => { + array_append_or_prepend_valid_types(current_types, false) } + ArrayFunctionSignature::ArrayAndIndex => array_and_index(current_types), ArrayFunctionSignature::ArrayAndElementAndOptionalIndex => { array_element_and_optional_index(current_types) } - ArrayFunctionSignature::Array(allow_null) => { - array(current_types, *allow_null) - } + ArrayFunctionSignature::Array => array(current_types), } } } @@ -306,20 +284,20 @@ impl ArrayFunctionSignature { impl std::fmt::Display for ArrayFunctionSignature { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { - ArrayFunctionSignature::ArrayAndElement(allow_null) => { - write!(f, "ArrayAndElement({})", *allow_null) + ArrayFunctionSignature::ArrayAndElement => { + write!(f, "ArrayAndElement") } ArrayFunctionSignature::ArrayAndElementAndOptionalIndex => { write!(f, "array, element, [index]") } - ArrayFunctionSignature::ElementAndArray(allow_null) => { - write!(f, "ElementAndArray({})", *allow_null) + ArrayFunctionSignature::ElementAndArray => { + write!(f, "ElementAndArray") } - ArrayFunctionSignature::ArrayAndIndex(allow_null) => { - write!(f, "ArrayAndIndex({})", *allow_null) + ArrayFunctionSignature::ArrayAndIndex => { + write!(f, "ArrayAndIndex") } - ArrayFunctionSignature::Array(allow_null) => { - write!(f, "Array({})", *allow_null) + ArrayFunctionSignature::Array => { + write!(f, "Array") } } } @@ -458,10 +436,10 @@ impl Signature { } } /// Specialized Signature for ArrayAppend and similar functions - pub fn array_and_element(allow_null: bool, volatility: Volatility) -> Self { + pub fn array_and_element(volatility: Volatility) -> Self { Signature { type_signature: TypeSignature::ArraySignature( - ArrayFunctionSignature::ArrayAndElement(allow_null), + ArrayFunctionSignature::ArrayAndElement, ), volatility, } @@ -476,29 +454,27 @@ impl Signature { } } /// Specialized Signature for ArrayPrepend and similar functions - pub fn element_and_array(allow_null: bool, volatility: Volatility) -> Self { + pub fn element_and_array(volatility: Volatility) -> Self { Signature { type_signature: TypeSignature::ArraySignature( - ArrayFunctionSignature::ElementAndArray(allow_null), + ArrayFunctionSignature::ElementAndArray, ), volatility, } } /// Specialized Signature for ArrayElement and similar functions - pub fn array_and_index(allow_null: bool, volatility: Volatility) -> Self { + pub fn array_and_index(volatility: Volatility) -> Self { Signature { type_signature: TypeSignature::ArraySignature( - ArrayFunctionSignature::ArrayAndIndex(allow_null), + ArrayFunctionSignature::ArrayAndIndex, ), volatility, } } /// Specialized Signature for ArrayEmpty and similar functions - pub fn array(allow_null: bool, volatility: Volatility) -> Self { + pub fn array(volatility: Volatility) -> Self { Signature { - type_signature: TypeSignature::ArraySignature(ArrayFunctionSignature::Array( - allow_null, - )), + type_signature: TypeSignature::ArraySignature(ArrayFunctionSignature::Array), volatility, } } diff --git a/datafusion/physical-expr/src/array_expressions.rs b/datafusion/physical-expr/src/array_expressions.rs index fb34f351a9bd..38a4359b4f4b 100644 --- a/datafusion/physical-expr/src/array_expressions.rs +++ b/datafusion/physical-expr/src/array_expressions.rs @@ -791,7 +791,6 @@ pub fn array_pop_front(args: &[ArrayRef]) -> Result { let array = as_large_list_array(&args[0])?; general_pop_front_list::(array) } - DataType::Null => Ok(args[0].clone()), _ => exec_err!( "array_pop_front does not support type: {:?}", array_data_type @@ -815,7 +814,6 @@ pub fn array_pop_back(args: &[ArrayRef]) -> Result { let array = as_large_list_array(&args[0])?; general_pop_back_list::(array) } - DataType::Null => Ok(args[0].clone()), _ => exec_err!( "array_pop_back does not support type: {:?}", array_data_type @@ -1480,10 +1478,6 @@ pub fn array_positions(args: &[ArrayRef]) -> Result { check_datatypes("array_positions", &[arr.values(), element])?; general_positions::(arr, element) } - DataType::Null => Ok(new_null_array( - &DataType::List(Arc::new(Field::new("item", DataType::UInt64, true))), - 1, - )), array_type => { exec_err!("array_positions does not support type '{array_type:?}'.") } @@ -1616,10 +1610,6 @@ fn array_remove_internal( element_array: &ArrayRef, arr_n: Vec, ) -> Result { - if array.data_type().is_null() { - return Ok(array.clone()); - } - match array.data_type() { DataType::List(_) => { let list_array = array.as_list::(); @@ -2294,7 +2284,6 @@ pub fn array_has(args: &[ArrayRef]) -> Result { DataType::LargeList(_) => { general_array_has_dispatch::(&args[0], &args[1], ComparisonType::Single) } - DataType::Null => Ok(new_null_array(&DataType::Boolean, 1)), _ => exec_err!("array_has does not support type '{array_type:?}'."), } } diff --git a/datafusion/sqllogictest/test_files/array.slt b/datafusion/sqllogictest/test_files/array.slt index a45028021154..c534360501d0 100644 --- a/datafusion/sqllogictest/test_files/array.slt +++ b/datafusion/sqllogictest/test_files/array.slt @@ -1116,7 +1116,7 @@ from arrays_values_without_nulls; ## array_element (aliases: array_extract, list_extract, list_element) # array_element error -query error DataFusion error: Error during planning: No function matches the given name and argument types 'array_element\(Int64, Int64\)'. You might need to add explicit type casts.\n\tCandidate functions:\n\tarray_element\(ArrayAndIndex\(true\)\) +query error DataFusion error: Error during planning: No function matches the given name and argument types 'array_element\(Int64, Int64\)'. You might need to add explicit type casts.\n\tCandidate functions:\n\tarray_element\(ArrayAndIndex\) select array_element(1, 2); # array_element with null @@ -1357,11 +1357,12 @@ NULL 43 ## array_pop_back (aliases: `list_pop_back`) # array_pop_back scalar function with null +#TODO: https://github.com/apache/arrow-datafusion/issues/7142 # follow clickhouse and duckdb -query ? -select array_pop_back(null); ----- -NULL +#query ? +#select array_pop_back(null); +#---- +#NULL # array_pop_back scalar function #1 query ?? @@ -1534,12 +1535,13 @@ select array_pop_back(arrow_cast(column1, 'LargeList(Int64)')) from large_arrays ## array_pop_front (aliases: `list_pop_front`) +#TODO:https://github.com/apache/arrow-datafusion/issues/7142 # array_pop_front scalar function with null # follow clickhouse and duckdb -query ? -select array_pop_front(null); ----- -NULL +#query ? +#select array_pop_front(null); +#---- +#NULL # array_pop_front scalar function #1 query ?? @@ -3000,11 +3002,12 @@ select array_positions([1, 2, 3, 4, 5], null); ---- [] +#TODO: https://github.com/apache/arrow-datafusion/issues/7142 # array_positions with NULL (follow PostgreSQL) -query ? -select array_positions(null, 1); ----- -NULL +#query ? +#select array_positions(null, 1); +#---- +#NULL # array_positions scalar function #1 query ??? @@ -4144,12 +4147,13 @@ select ---- [1, , 3] [, 2.2, 3.3] [, bc] +#TODO: https://github.com/apache/arrow-datafusion/issues/7142 # follow PostgreSQL behavior -query ? -select - array_remove(NULL, 1) ----- -NULL +#query ? +#select +# array_remove(NULL, 1) +#---- +#NULL query ?? select @@ -4363,11 +4367,12 @@ select array_remove_n(make_array([1, 2, 3], [4, 5, 6], [4, 5, 6], [10, 11, 12], ## array_remove_all (aliases: `list_removes`) +#TODO: https://github.com/apache/arrow-datafusion/issues/7142 # array_remove_all with NULL elements -query ? -select array_remove_all(NULL, 1); ----- -NULL +#query ? +#select array_remove_all(NULL, 1); +#---- +#NULL query ? select array_remove_all(make_array(1, 2, 2, 1, 1), NULL); @@ -4602,7 +4607,7 @@ NULL 10 ## array_dims (aliases: `list_dims`) # array dims error -query error DataFusion error: Error during planning: No function matches the given name and argument types 'array_dims\(Int64\)'. You might need to add explicit type casts.\n\tCandidate functions:\n\tarray_dims\(Array\(false\)\) +query error DataFusion error: Error during planning: No function matches the given name and argument types 'array_dims\(Int64\)'. You might need to add explicit type casts.\n\tCandidate functions:\n\tarray_dims\(Array\) select array_dims(1); # array_dims scalar function @@ -5203,10 +5208,11 @@ true false true false false false true true false false true false true ## array_distinct -query ? -select array_distinct(null); ----- -NULL +#TODO: https://github.com/apache/arrow-datafusion/issues/7142 +#query ? +#select array_distinct(null); +#---- +#NULL query ? select array_distinct([]); @@ -5815,11 +5821,13 @@ select array_concat(column1, [7]) from arrays_values_v2; [7] # flatten + +#TODO: https://github.com/apache/arrow-datafusion/issues/7142 # follow DuckDB -query ? -select flatten(NULL); ----- -NULL +#query ? +#select flatten(NULL); +#---- +#NULL # flatten with scalar values #1 query ??? @@ -5924,11 +5932,12 @@ select empty(arrow_cast(make_array(NULL), 'FixedSizeList(1, Null)')); ---- false +#TODO: https://github.com/apache/arrow-datafusion/issues/7142 # empty scalar function #4 -query B -select empty(NULL); ----- -NULL +#query B +#select empty(NULL); +#---- +#NULL # empty scalar function #5 query B From 0a338b67a2ffc359f9000a806de7d914a261abac Mon Sep 17 00:00:00 2001 From: Weijun-H Date: Mon, 26 Feb 2024 10:17:06 +0800 Subject: [PATCH 18/21] reformat output --- datafusion/expr/src/signature.rs | 8 ++++---- datafusion/sqllogictest/test_files/array.slt | 21 +++++++++----------- 2 files changed, 13 insertions(+), 16 deletions(-) diff --git a/datafusion/expr/src/signature.rs b/datafusion/expr/src/signature.rs index 011f9ccae134..6095ff3fec19 100644 --- a/datafusion/expr/src/signature.rs +++ b/datafusion/expr/src/signature.rs @@ -285,19 +285,19 @@ impl std::fmt::Display for ArrayFunctionSignature { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { ArrayFunctionSignature::ArrayAndElement => { - write!(f, "ArrayAndElement") + write!(f, "array, element") } ArrayFunctionSignature::ArrayAndElementAndOptionalIndex => { write!(f, "array, element, [index]") } ArrayFunctionSignature::ElementAndArray => { - write!(f, "ElementAndArray") + write!(f, "element, array") } ArrayFunctionSignature::ArrayAndIndex => { - write!(f, "ArrayAndIndex") + write!(f, "array, index") } ArrayFunctionSignature::Array => { - write!(f, "Array") + write!(f, "array") } } } diff --git a/datafusion/sqllogictest/test_files/array.slt b/datafusion/sqllogictest/test_files/array.slt index c534360501d0..bdf8654dfefd 100644 --- a/datafusion/sqllogictest/test_files/array.slt +++ b/datafusion/sqllogictest/test_files/array.slt @@ -1116,7 +1116,7 @@ from arrays_values_without_nulls; ## array_element (aliases: array_extract, list_extract, list_element) # array_element error -query error DataFusion error: Error during planning: No function matches the given name and argument types 'array_element\(Int64, Int64\)'. You might need to add explicit type casts.\n\tCandidate functions:\n\tarray_element\(ArrayAndIndex\) +query error DataFusion error: Error during planning: No function matches the given name and argument types 'array_element\(Int64, Int64\)'. You might need to add explicit type casts.\n\tCandidate functions:\n\tarray_element\(array, index\) select array_element(1, 2); # array_element with null @@ -2038,16 +2038,13 @@ select ---- [4] [] [1, , 3, 4] [, , 1] -#TODO: https://github.com/apache/arrow-datafusion/issues/9158 -#query ???? -#select -# array_append(arrow_cast(make_array(), 'FixedSizeList(1, Null)'), 4), -# array_append(arrow_cast(make_array(), 'FixedSizeList(1, Null)'), null), -# array_append(arrow_cast(make_array(1, null, 3), 'FixedSizeList(3, Int64)'), 4), -# array_append(arrow_cast(make_array(null, null), 'FixedSizeList(2, Null)'), 1) -#; -#---- -#[4] [] [1, , 3, 4] [, , 1] +query ?? +select + array_append(arrow_cast(make_array(1, null, 3), 'FixedSizeList(3, Int64)'), 4), + array_append(arrow_cast(make_array(null, null), 'FixedSizeList(2, Int64)'), 1) +; +---- +[1, , 3, 4] [, , 1] # test invalid (non-null) query error @@ -4607,7 +4604,7 @@ NULL 10 ## array_dims (aliases: `list_dims`) # array dims error -query error DataFusion error: Error during planning: No function matches the given name and argument types 'array_dims\(Int64\)'. You might need to add explicit type casts.\n\tCandidate functions:\n\tarray_dims\(Array\) +query error DataFusion error: Error during planning: No function matches the given name and argument types 'array_dims\(Int64\)'. You might need to add explicit type casts.\n\tCandidate functions:\n\tarray_dims\(array\) select array_dims(1); # array_dims scalar function From 9d9527eebcf416369c9dcea8e6db7f2c5f331e25 Mon Sep 17 00:00:00 2001 From: Weijun-H Date: Mon, 26 Feb 2024 10:33:23 +0800 Subject: [PATCH 19/21] simplify code --- datafusion/expr/src/signature.rs | 45 ++++++++++++++++---------------- 1 file changed, 22 insertions(+), 23 deletions(-) diff --git a/datafusion/expr/src/signature.rs b/datafusion/expr/src/signature.rs index 6095ff3fec19..7080d8b7c8fc 100644 --- a/datafusion/expr/src/signature.rs +++ b/datafusion/expr/src/signature.rs @@ -236,33 +236,15 @@ impl ArrayFunctionSignature { _ => Ok(vec![vec![]]), } } - fn array_and_index(current_types: &[DataType]) -> Result>> { - if current_types.len() != 2 { - return Ok(vec![vec![]]); - } - let array_type = ¤t_types[0]; - let array_type = array(&[array_type.clone()])?; - if array_type[0].is_empty() { - Ok(vec![vec![]]) - } else { - Ok(vec![vec![array_type[0][0].clone(), DataType::Int64]]) - } - } - fn array(current_types: &[DataType]) -> Result>> { - if current_types.len() != 1 { - return Ok(vec![vec![]]); - } - - let array_type = ¤t_types[0]; - + fn array(array_type: &DataType) -> Option { match array_type { DataType::List(_) | DataType::LargeList(_) | DataType::FixedSizeList(_, _) => { let array_type = coerced_fixed_size_list_to_list(array_type); - Ok(vec![vec![array_type]]) + Some(array_type) } - _ => Ok(vec![vec![]]), + _ => None, } } match self { @@ -272,11 +254,28 @@ impl ArrayFunctionSignature { ArrayFunctionSignature::ElementAndArray => { array_append_or_prepend_valid_types(current_types, false) } - ArrayFunctionSignature::ArrayAndIndex => array_and_index(current_types), + ArrayFunctionSignature::ArrayAndIndex => { + if current_types.len() != 2 { + return Ok(vec![vec![]]); + } + array(¤t_types[0]).map_or_else( + || Ok(vec![vec![]]), + |array_type| Ok(vec![vec![array_type, DataType::Int64]]), + ) + } ArrayFunctionSignature::ArrayAndElementAndOptionalIndex => { array_element_and_optional_index(current_types) } - ArrayFunctionSignature::Array => array(current_types), + ArrayFunctionSignature::Array => { + if current_types.len() != 1 { + return Ok(vec![vec![]]); + } + + array(¤t_types[0]).map_or_else( + || Ok(vec![vec![]]), + |array_type| Ok(vec![vec![array_type]]), + ) + } } } } From 9201a36ef817e343df6e7ee27bcdba2a2333f546 Mon Sep 17 00:00:00 2001 From: Weijun-H Date: Mon, 26 Feb 2024 10:45:49 +0800 Subject: [PATCH 20/21] add tests for array_dims --- datafusion/sqllogictest/test_files/array.slt | 31 ++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/datafusion/sqllogictest/test_files/array.slt b/datafusion/sqllogictest/test_files/array.slt index bdf8654dfefd..da02a80a104f 100644 --- a/datafusion/sqllogictest/test_files/array.slt +++ b/datafusion/sqllogictest/test_files/array.slt @@ -4618,12 +4618,27 @@ select array_dims(arrow_cast(make_array(1, 2, 3), 'LargeList(Int64)')), array_di ---- [3] [2, 2] [1, 1, 1, 2, 1] +query ??? +select array_dims(arrow_cast(make_array(1, 2, 3), 'FixedSizeList(3, Int64)')), array_dims(arrow_cast(make_array([1, 2], [3, 4]), 'FixedSizeList(2, List(Int64))')), array_dims(arrow_cast(make_array([[[[1], [2]]]]), 'FixedSizeList(1, List(List(List(List(Int64)))))')); +---- +[3] [2, 2] [1, 1, 1, 2, 1] + # array_dims scalar function #2 query ?? select array_dims(array_repeat(array_repeat(array_repeat(2, 3), 2), 1)), array_dims(array_repeat(array_repeat(array_repeat(3, 4), 5), 2)); ---- [1, 2, 3] [2, 5, 4] +query ?? +select array_dims(arrow_cast(array_repeat(array_repeat(array_repeat(2, 3), 2), 1), 'LargeList(List(List(Int64)))')), array_dims(arrow_cast(array_repeat(array_repeat(array_repeat(3, 4), 5), 2), 'LargeList(List(List(Int64)))')); +---- +[1, 2, 3] [2, 5, 4] + +query ?? +select array_dims(arrow_cast(array_repeat(array_repeat(array_repeat(2, 3), 2), 1), 'FixedSizeList(1, List(List(Int64)))')), array_dims(arrow_cast(array_repeat(array_repeat(array_repeat(3, 4), 5), 2), 'FixedSizeList(2, List(List(Int64)))')); +---- +[1, 2, 3] [2, 5, 4] + # array_dims scalar function #3 query ?? select array_dims(make_array()), array_dims(make_array(make_array())) @@ -4646,6 +4661,11 @@ select list_dims(arrow_cast(make_array(1, 2, 3), 'LargeList(Int64)')), list_dims ---- [3] [2, 2] [1, 1, 1, 2, 1] +query ??? +select list_dims(arrow_cast(make_array(1, 2, 3), 'FixedSizeList(3, Int64)')), list_dims(arrow_cast(make_array([1, 2], [3, 4]), 'FixedSizeList(2, List(Int64))')), list_dims(arrow_cast(make_array([[[[1], [2]]]]), 'FixedSizeList(1, List(List(List(List(Int64)))))')); +---- +[3] [2, 2] [1, 1, 1, 2, 1] + # array_dims with columns query ??? select array_dims(column1), array_dims(column2), array_dims(column3) from arrays; @@ -4669,6 +4689,17 @@ NULL [3] [4] [2, 2] NULL [1] [2, 2] [3] NULL +query ??? +select array_dims(column1), array_dims(column2), array_dims(column3) from fixed_size_arrays; +---- +[2, 2] [3] [5] +[2, 2] [3] [5] +[2, 2] [3] [5] +[2, 2] [3] [5] +NULL [3] [5] +[2, 2] NULL [5] +[2, 2] [3] NULL + ## array_ndims (aliases: `list_ndims`) From ad73b17362d9341682c0a6a8091eb167c4df15c7 Mon Sep 17 00:00:00 2001 From: Weijun-H Date: Mon, 26 Feb 2024 19:28:56 +0800 Subject: [PATCH 21/21] Refactor type coercion functions in datafusion/expr module --- datafusion/expr/src/signature.rs | 137 ------------------ .../expr/src/type_coercion/functions.rs | 131 ++++++++++++++++- 2 files changed, 125 insertions(+), 143 deletions(-) diff --git a/datafusion/expr/src/signature.rs b/datafusion/expr/src/signature.rs index 7080d8b7c8fc..663ecf7b1b8e 100644 --- a/datafusion/expr/src/signature.rs +++ b/datafusion/expr/src/signature.rs @@ -18,10 +18,7 @@ //! Signature module contains foundational types that are used to represent signatures, types, //! and return types of functions in DataFusion. -use crate::type_coercion::binary::comparison_coercion; use arrow::datatypes::DataType; -use datafusion_common::utils::coerced_fixed_size_list_to_list; -use datafusion_common::{internal_datafusion_err, DataFusionError, Result}; /// Constant that is used as a placeholder for any valid timezone. /// This is used where a function can accept a timestamp type with any @@ -146,140 +143,6 @@ pub enum ArrayFunctionSignature { Array, } -impl ArrayFunctionSignature { - /// Arguments to ArrayFunctionSignature - /// `current_types` - The data types of the arguments - /// Returns the valid types for the function signature - pub fn get_type_signature( - &self, - current_types: &[DataType], - ) -> Result>> { - fn array_element_and_optional_index( - current_types: &[DataType], - ) -> Result>> { - // make sure there's 2 or 3 arguments - if !(current_types.len() == 2 || current_types.len() == 3) { - return Ok(vec![vec![]]); - } - - let first_two_types = ¤t_types[0..2]; - let mut valid_types = - array_append_or_prepend_valid_types(first_two_types, true)?; - - // Early return if there are only 2 arguments - if current_types.len() == 2 { - return Ok(valid_types); - } - - let valid_types_with_index = valid_types - .iter() - .map(|t| { - let mut t = t.clone(); - t.push(DataType::Int64); - t - }) - .collect::>(); - - valid_types.extend(valid_types_with_index); - - Ok(valid_types) - } - - fn array_append_or_prepend_valid_types( - current_types: &[DataType], - is_append: bool, - ) -> Result>> { - if current_types.len() != 2 { - return Ok(vec![vec![]]); - } - - let (array_type, elem_type) = if is_append { - (¤t_types[0], ¤t_types[1]) - } else { - (¤t_types[1], ¤t_types[0]) - }; - - // We follow Postgres on `array_append(Null, T)`, which is not valid. - if array_type.eq(&DataType::Null) { - return Ok(vec![vec![]]); - } - - // We need to find the coerced base type, mainly for cases like: - // `array_append(List(null), i64)` -> `List(i64)` - let array_base_type = datafusion_common::utils::base_type(array_type); - let elem_base_type = datafusion_common::utils::base_type(elem_type); - let new_base_type = comparison_coercion(&array_base_type, &elem_base_type); - - let new_base_type = new_base_type.ok_or_else(|| { - internal_datafusion_err!( - "Coercion from {array_base_type:?} to {elem_base_type:?} not supported." - ) - })?; - - let new_array_type = - datafusion_common::utils::coerced_type_with_base_type_only( - array_type, - &new_base_type, - ); - - match new_array_type { - DataType::List(ref field) - | DataType::LargeList(ref field) - | DataType::FixedSizeList(ref field, _) => { - let new_elem_type = field.data_type(); - if is_append { - Ok(vec![vec![new_array_type.clone(), new_elem_type.clone()]]) - } else { - Ok(vec![vec![new_elem_type.to_owned(), new_array_type.clone()]]) - } - } - _ => Ok(vec![vec![]]), - } - } - fn array(array_type: &DataType) -> Option { - match array_type { - DataType::List(_) - | DataType::LargeList(_) - | DataType::FixedSizeList(_, _) => { - let array_type = coerced_fixed_size_list_to_list(array_type); - Some(array_type) - } - _ => None, - } - } - match self { - ArrayFunctionSignature::ArrayAndElement => { - array_append_or_prepend_valid_types(current_types, true) - } - ArrayFunctionSignature::ElementAndArray => { - array_append_or_prepend_valid_types(current_types, false) - } - ArrayFunctionSignature::ArrayAndIndex => { - if current_types.len() != 2 { - return Ok(vec![vec![]]); - } - array(¤t_types[0]).map_or_else( - || Ok(vec![vec![]]), - |array_type| Ok(vec![vec![array_type, DataType::Int64]]), - ) - } - ArrayFunctionSignature::ArrayAndElementAndOptionalIndex => { - array_element_and_optional_index(current_types) - } - ArrayFunctionSignature::Array => { - if current_types.len() != 1 { - return Ok(vec![vec![]]); - } - - array(¤t_types[0]).map_or_else( - || Ok(vec![vec![]]), - |array_type| Ok(vec![vec![array_type]]), - ) - } - } - } -} - impl std::fmt::Display for ArrayFunctionSignature { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { diff --git a/datafusion/expr/src/type_coercion/functions.rs b/datafusion/expr/src/type_coercion/functions.rs index 4e5a2f1b6955..2022d67879f8 100644 --- a/datafusion/expr/src/type_coercion/functions.rs +++ b/datafusion/expr/src/type_coercion/functions.rs @@ -15,14 +15,16 @@ // specific language governing permissions and limitations // under the License. -use crate::signature::TIMEZONE_WILDCARD; +use crate::signature::{ArrayFunctionSignature, TIMEZONE_WILDCARD}; use crate::{Signature, TypeSignature}; use arrow::{ compute::can_cast_types, datatypes::{DataType, TimeUnit}, }; -use datafusion_common::utils::list_ndims; -use datafusion_common::{internal_err, plan_err, DataFusionError, Result}; +use datafusion_common::utils::{coerced_fixed_size_list_to_list, list_ndims}; +use datafusion_common::{ + internal_datafusion_err, internal_err, plan_err, DataFusionError, Result, +}; use super::binary::comparison_coercion; @@ -78,6 +80,98 @@ fn get_valid_types( signature: &TypeSignature, current_types: &[DataType], ) -> Result>> { + fn array_element_and_optional_index( + current_types: &[DataType], + ) -> Result>> { + // make sure there's 2 or 3 arguments + if !(current_types.len() == 2 || current_types.len() == 3) { + return Ok(vec![vec![]]); + } + + let first_two_types = ¤t_types[0..2]; + let mut valid_types = array_append_or_prepend_valid_types(first_two_types, true)?; + + // Early return if there are only 2 arguments + if current_types.len() == 2 { + return Ok(valid_types); + } + + let valid_types_with_index = valid_types + .iter() + .map(|t| { + let mut t = t.clone(); + t.push(DataType::Int64); + t + }) + .collect::>(); + + valid_types.extend(valid_types_with_index); + + Ok(valid_types) + } + + fn array_append_or_prepend_valid_types( + current_types: &[DataType], + is_append: bool, + ) -> Result>> { + if current_types.len() != 2 { + return Ok(vec![vec![]]); + } + + let (array_type, elem_type) = if is_append { + (¤t_types[0], ¤t_types[1]) + } else { + (¤t_types[1], ¤t_types[0]) + }; + + // We follow Postgres on `array_append(Null, T)`, which is not valid. + if array_type.eq(&DataType::Null) { + return Ok(vec![vec![]]); + } + + // We need to find the coerced base type, mainly for cases like: + // `array_append(List(null), i64)` -> `List(i64)` + let array_base_type = datafusion_common::utils::base_type(array_type); + let elem_base_type = datafusion_common::utils::base_type(elem_type); + let new_base_type = comparison_coercion(&array_base_type, &elem_base_type); + + let new_base_type = new_base_type.ok_or_else(|| { + internal_datafusion_err!( + "Coercion from {array_base_type:?} to {elem_base_type:?} not supported." + ) + })?; + + let new_array_type = datafusion_common::utils::coerced_type_with_base_type_only( + array_type, + &new_base_type, + ); + + match new_array_type { + DataType::List(ref field) + | DataType::LargeList(ref field) + | DataType::FixedSizeList(ref field, _) => { + let new_elem_type = field.data_type(); + if is_append { + Ok(vec![vec![new_array_type.clone(), new_elem_type.clone()]]) + } else { + Ok(vec![vec![new_elem_type.to_owned(), new_array_type.clone()]]) + } + } + _ => Ok(vec![vec![]]), + } + } + fn array(array_type: &DataType) -> Option { + match array_type { + DataType::List(_) + | DataType::LargeList(_) + | DataType::FixedSizeList(_, _) => { + let array_type = coerced_fixed_size_list_to_list(array_type); + Some(array_type) + } + _ => None, + } + } + let valid_types = match signature { TypeSignature::Variadic(valid_types) => valid_types .iter() @@ -110,10 +204,35 @@ fn get_valid_types( } TypeSignature::Exact(valid_types) => vec![valid_types.clone()], - TypeSignature::ArraySignature(ref function_signature) => { - function_signature.get_type_signature(current_types)? - } + TypeSignature::ArraySignature(ref function_signature) => match function_signature + { + ArrayFunctionSignature::ArrayAndElement => { + array_append_or_prepend_valid_types(current_types, true)? + } + ArrayFunctionSignature::ElementAndArray => { + array_append_or_prepend_valid_types(current_types, false)? + } + ArrayFunctionSignature::ArrayAndIndex => { + if current_types.len() != 2 { + return Ok(vec![vec![]]); + } + array(¤t_types[0]).map_or_else( + || vec![vec![]], + |array_type| vec![vec![array_type, DataType::Int64]], + ) + } + ArrayFunctionSignature::ArrayAndElementAndOptionalIndex => { + array_element_and_optional_index(current_types)? + } + ArrayFunctionSignature::Array => { + if current_types.len() != 1 { + return Ok(vec![vec![]]); + } + array(¤t_types[0]) + .map_or_else(|| vec![vec![]], |array_type| vec![vec![array_type]]) + } + }, TypeSignature::Any(number) => { if current_types.len() != *number { return plan_err!(