From b728232b91862826061dfa878cefe90d25576f78 Mon Sep 17 00:00:00 2001 From: Alex Huang Date: Mon, 26 Feb 2024 21:10:13 +0800 Subject: [PATCH] feat: support `FixedSizeList` Type Coercion (#9108) * support FixedSizeList Type Coercion * add allow null type coercion parameter * support null column in FixedSizeList * Add test * Add tests for cardinality with fixed size lists * chore * fix ci * add comment * Fix array_element function signature * Remove unused imports and simplify code * Fix array function signatures and behavior * fix conflict * fix conflict * add tests for FixedSizeList * remove unreacheable null check * simplify the code * remove null checking * reformat output * simplify code * add tests for array_dims * Refactor type coercion functions in datafusion/expr module --- datafusion/expr/src/built_in_function.rs | 23 +- datafusion/expr/src/signature.rs | 17 +- .../expr/src/type_coercion/functions.rs | 109 ++-- datafusion/sqllogictest/test_files/array.slt | 562 +++++++++++++++++- 4 files changed, 628 insertions(+), 83 deletions(-) diff --git a/datafusion/expr/src/built_in_function.rs b/datafusion/expr/src/built_in_function.rs index f92ae87d6e6c..8b4e65121c79 100644 --- a/datafusion/expr/src/built_in_function.rs +++ b/datafusion/expr/src/built_in_function.rs @@ -31,7 +31,7 @@ use crate::{ }; use arrow::datatypes::{DataType, Field, Fields, IntervalUnit, TimeUnit}; -use datafusion_common::{internal_err, plan_err, DataFusionError, Result}; +use datafusion_common::{exec_err, plan_err, DataFusionError, Result}; use strum::IntoEnumIterator; use strum_macros::EnumIter; @@ -543,10 +543,11 @@ impl BuiltinScalarFunction { BuiltinScalarFunction::Flatten => { fn get_base_type(data_type: &DataType) -> Result { match data_type { - DataType::List(field) if matches!(field.data_type(), DataType::List(_)) => get_base_type(field.data_type()), + DataType::List(field) | DataType::FixedSizeList(field, _) if matches!(field.data_type(), DataType::List(_)|DataType::FixedSizeList(_,_ )) => get_base_type(field.data_type()), DataType::LargeList(field) if matches!(field.data_type(), DataType::LargeList(_)) => get_base_type(field.data_type()), DataType::Null | DataType::List(_) | DataType::LargeList(_) => Ok(data_type.to_owned()), - _ => internal_err!("Not reachable, data_type should be List or LargeList"), + DataType::FixedSizeList(field,_ ) => Ok(DataType::List(field.clone())), + _ => exec_err!("Not reachable, data_type should be List, LargeList or FixedSizeList"), } } @@ -929,18 +930,18 @@ impl BuiltinScalarFunction { // 0 or more arguments of arbitrary type Signature::one_of(vec![VariadicEqual, Any(0)], self.volatility()) } - BuiltinScalarFunction::ArrayPopFront => Signature::any(1, self.volatility()), - BuiltinScalarFunction::ArrayPopBack => Signature::any(1, self.volatility()), + BuiltinScalarFunction::ArrayPopFront => Signature::array(self.volatility()), + BuiltinScalarFunction::ArrayPopBack => Signature::array(self.volatility()), BuiltinScalarFunction::ArrayConcat => { Signature::variadic_any(self.volatility()) } - BuiltinScalarFunction::ArrayDims => Signature::any(1, self.volatility()), - BuiltinScalarFunction::ArrayEmpty => Signature::any(1, self.volatility()), + BuiltinScalarFunction::ArrayDims => Signature::array(self.volatility()), + BuiltinScalarFunction::ArrayEmpty => Signature::array(self.volatility()), BuiltinScalarFunction::ArrayElement => { Signature::array_and_index(self.volatility()) } BuiltinScalarFunction::ArrayExcept => Signature::any(2, self.volatility()), - BuiltinScalarFunction::Flatten => Signature::any(1, self.volatility()), + BuiltinScalarFunction::Flatten => Signature::array(self.volatility()), BuiltinScalarFunction::ArrayHasAll | BuiltinScalarFunction::ArrayHasAny => { Signature::any(2, self.volatility()) } @@ -950,8 +951,8 @@ impl BuiltinScalarFunction { BuiltinScalarFunction::ArrayLength => { Signature::variadic_any(self.volatility()) } - BuiltinScalarFunction::ArrayNdims => Signature::any(1, self.volatility()), - BuiltinScalarFunction::ArrayDistinct => Signature::any(1, self.volatility()), + BuiltinScalarFunction::ArrayNdims => Signature::array(self.volatility()), + BuiltinScalarFunction::ArrayDistinct => Signature::array(self.volatility()), BuiltinScalarFunction::ArrayPosition => { Signature::array_and_element_and_optional_index(self.volatility()) } @@ -981,7 +982,7 @@ impl BuiltinScalarFunction { BuiltinScalarFunction::ArrayIntersect => Signature::any(2, self.volatility()), BuiltinScalarFunction::ArrayUnion => Signature::any(2, self.volatility()), - BuiltinScalarFunction::Cardinality => Signature::any(1, self.volatility()), + BuiltinScalarFunction::Cardinality => Signature::array(self.volatility()), BuiltinScalarFunction::ArrayResize => { Signature::variadic_any(self.volatility()) } diff --git a/datafusion/expr/src/signature.rs b/datafusion/expr/src/signature.rs index e8d9d8fb3966..663ecf7b1b8e 100644 --- a/datafusion/expr/src/signature.rs +++ b/datafusion/expr/src/signature.rs @@ -123,7 +123,7 @@ pub enum TypeSignature { #[derive(Debug, Clone, PartialEq, Eq, Hash)] pub enum ArrayFunctionSignature { /// Specialized Signature for ArrayAppend and similar functions - /// The first argument should be List/LargeList, and the second argument should be non-list or list. + /// The first argument should be List/LargeList/FixedSizedList, and the second argument should be non-list or list. /// The second argument's list dimension should be one dimension less than the first argument's list dimension. /// List dimension of the List/LargeList is equivalent to the number of List. /// List dimension of the non-list is 0. @@ -133,9 +133,14 @@ pub enum ArrayFunctionSignature { /// The first argument's list dimension should be one dimension less than the second argument's list dimension. ElementAndArray, /// Specialized Signature for Array functions of the form (List/LargeList, Index) + /// The first argument should be List/LargeList/FixedSizedList, and the second argument should be Int64. ArrayAndIndex, /// Specialized Signature for Array functions of the form (List/LargeList, Element, Optional Index) ArrayAndElementAndOptionalIndex, + /// Specialized Signature for ArrayEmpty and similar functions + /// The function takes a single argument that must be a List/LargeList/FixedSizeList + /// or something that can be coerced to one of those types. + Array, } impl std::fmt::Display for ArrayFunctionSignature { @@ -153,6 +158,9 @@ impl std::fmt::Display for ArrayFunctionSignature { ArrayFunctionSignature::ArrayAndIndex => { write!(f, "array, index") } + ArrayFunctionSignature::Array => { + write!(f, "array") + } } } } @@ -325,6 +333,13 @@ impl Signature { volatility, } } + /// Specialized Signature for ArrayEmpty and similar functions + pub fn array(volatility: Volatility) -> Self { + Signature { + type_signature: TypeSignature::ArraySignature(ArrayFunctionSignature::Array), + volatility, + } + } } /// Monotonicity of the `ScalarFunctionExpr` with respect to its arguments. diff --git a/datafusion/expr/src/type_coercion/functions.rs b/datafusion/expr/src/type_coercion/functions.rs index 9cab04bc7605..2022d67879f8 100644 --- a/datafusion/expr/src/type_coercion/functions.rs +++ b/datafusion/expr/src/type_coercion/functions.rs @@ -80,6 +80,36 @@ fn get_valid_types( signature: &TypeSignature, current_types: &[DataType], ) -> Result>> { + fn array_element_and_optional_index( + current_types: &[DataType], + ) -> Result>> { + // make sure there's 2 or 3 arguments + if !(current_types.len() == 2 || current_types.len() == 3) { + return Ok(vec![vec![]]); + } + + let first_two_types = ¤t_types[0..2]; + let mut valid_types = array_append_or_prepend_valid_types(first_two_types, true)?; + + // Early return if there are only 2 arguments + if current_types.len() == 2 { + return Ok(valid_types); + } + + let valid_types_with_index = valid_types + .iter() + .map(|t| { + let mut t = t.clone(); + t.push(DataType::Int64); + t + }) + .collect::>(); + + valid_types.extend(valid_types_with_index); + + Ok(valid_types) + } + fn array_append_or_prepend_valid_types( current_types: &[DataType], is_append: bool, @@ -111,71 +141,37 @@ fn get_valid_types( ) })?; - let array_type = datafusion_common::utils::coerced_type_with_base_type_only( + let new_array_type = datafusion_common::utils::coerced_type_with_base_type_only( array_type, &new_base_type, ); - match array_type { + match new_array_type { DataType::List(ref field) | DataType::LargeList(ref field) | DataType::FixedSizeList(ref field, _) => { - let elem_type = field.data_type(); + let new_elem_type = field.data_type(); if is_append { - Ok(vec![vec![array_type.clone(), elem_type.clone()]]) + Ok(vec![vec![new_array_type.clone(), new_elem_type.clone()]]) } else { - Ok(vec![vec![elem_type.to_owned(), array_type.clone()]]) + Ok(vec![vec![new_elem_type.to_owned(), new_array_type.clone()]]) } } _ => Ok(vec![vec![]]), } } - fn array_element_and_optional_index( - current_types: &[DataType], - ) -> Result>> { - // make sure there's 2 or 3 arguments - if !(current_types.len() == 2 || current_types.len() == 3) { - return Ok(vec![vec![]]); - } - - let first_two_types = ¤t_types[0..2]; - let mut valid_types = array_append_or_prepend_valid_types(first_two_types, true)?; - - // Early return if there are only 2 arguments - if current_types.len() == 2 { - return Ok(valid_types); - } - - let valid_types_with_index = valid_types - .iter() - .map(|t| { - let mut t = t.clone(); - t.push(DataType::Int64); - t - }) - .collect::>(); - - valid_types.extend(valid_types_with_index); - - Ok(valid_types) - } - fn array_and_index(current_types: &[DataType]) -> Result>> { - if current_types.len() != 2 { - return Ok(vec![vec![]]); - } - - let array_type = ¤t_types[0]; - + fn array(array_type: &DataType) -> Option { match array_type { DataType::List(_) | DataType::LargeList(_) | DataType::FixedSizeList(_, _) => { let array_type = coerced_fixed_size_list_to_list(array_type); - Ok(vec![vec![array_type, DataType::Int64]]) + Some(array_type) } - _ => Ok(vec![vec![]]), + _ => None, } } + let valid_types = match signature { TypeSignature::Variadic(valid_types) => valid_types .iter() @@ -211,19 +207,32 @@ fn get_valid_types( TypeSignature::ArraySignature(ref function_signature) => match function_signature { ArrayFunctionSignature::ArrayAndElement => { - return array_append_or_prepend_valid_types(current_types, true) + array_append_or_prepend_valid_types(current_types, true)? } - ArrayFunctionSignature::ArrayAndElementAndOptionalIndex => { - return array_element_and_optional_index(current_types) + ArrayFunctionSignature::ElementAndArray => { + array_append_or_prepend_valid_types(current_types, false)? } ArrayFunctionSignature::ArrayAndIndex => { - return array_and_index(current_types) + if current_types.len() != 2 { + return Ok(vec![vec![]]); + } + array(¤t_types[0]).map_or_else( + || vec![vec![]], + |array_type| vec![vec![array_type, DataType::Int64]], + ) } - ArrayFunctionSignature::ElementAndArray => { - return array_append_or_prepend_valid_types(current_types, false) + ArrayFunctionSignature::ArrayAndElementAndOptionalIndex => { + array_element_and_optional_index(current_types)? } - }, + ArrayFunctionSignature::Array => { + if current_types.len() != 1 { + return Ok(vec![vec![]]); + } + array(¤t_types[0]) + .map_or_else(|| vec![vec![]], |array_type| vec![vec![array_type]]) + } + }, TypeSignature::Any(number) => { if current_types.len() != *number { return plan_err!( diff --git a/datafusion/sqllogictest/test_files/array.slt b/datafusion/sqllogictest/test_files/array.slt index 7f263d904819..da02a80a104f 100644 --- a/datafusion/sqllogictest/test_files/array.slt +++ b/datafusion/sqllogictest/test_files/array.slt @@ -123,6 +123,13 @@ AS VALUES (make_array(NULL, 10, 11, 12)) ; +statement ok +CREATE TABLE large_arrayspop +AS SELECT + arrow_cast(column1, 'LargeList(Int64)') AS column1 +FROM arrayspop +; + statement ok CREATE TABLE nested_arrays AS VALUES @@ -172,6 +179,15 @@ AS SELECT FROM arrays_values ; +statement ok +CREATE TABLE fixed_arrays_values +AS SELECT + arrow_cast(column1, 'FixedSizeList(10, Int64)') AS column1, + column2, + column3, + column4 +FROM arrays_values +; statement ok CREATE TABLE arrays_values_v2 @@ -212,6 +228,22 @@ AS FROM flatten_table ; +statement ok +CREATE TABLE fixed_size_flatten_table +AS VALUES + (arrow_cast(make_array([1], [2], [3]), 'FixedSizeList(3, List(Int64))'), + arrow_cast(make_array([[1, 2, 3]], [[4, 5]], [[6]]), 'FixedSizeList(3, List(List(Int64)))'), + arrow_cast(make_array([[[1]]], [[[2, 3]]]), 'FixedSizeList(2, List(List(List(Int64))))'), + arrow_cast(make_array([1.0], [2.1, 2.2], [3.2, 3.3, 3.4]), 'FixedSizeList(3, List(Float64))') + ), + ( + arrow_cast(make_array([1, 2], [3, 4], [5, 6]), 'FixedSizeList(3, List(Int64))'), + arrow_cast(make_array([[8]], [[9, 10]], [[11, 12, 13]]), 'FixedSizeList(3, List(List(Int64)))'), + arrow_cast(make_array([[[1,2]]], [[[3]]]), 'FixedSizeList(2, List(List(List(Int64))))'), + arrow_cast(make_array([1.0, 2.0], [3.0, 4.0], [5.0, 6.0]), 'FixedSizeList(3, List(Float64))') + ) +; + statement ok CREATE TABLE array_has_table_1D AS VALUES @@ -346,10 +378,31 @@ AS VALUES statement ok CREATE TABLE array_distinct_table_1D_large +AS SELECT + arrow_cast(column1, 'LargeList(Int64)') AS column1 +FROM array_distinct_table_1D +; + +statement ok +CREATE TABLE array_distinct_table_1D_fixed +AS SELECT + arrow_cast(column1, 'FixedSizeList(5, Int64)') AS column1 +FROM array_distinct_table_1D +; + +statement ok +CREATE TABLE array_distinct_table_1D_UTF8_fixed +AS SELECT + arrow_cast(column1, 'FixedSizeList(5, Utf8)') AS column1 +FROM array_distinct_table_1D_UTF8 +; + +statement ok +CREATE TABLE array_distinct_table_2D_fixed AS VALUES - (arrow_cast(make_array(1, 1, 2, 2, 3), 'LargeList(Int64)')), - (arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeList(Int64)')), - (arrow_cast(make_array(3, 5, 3, 3, 3), 'LargeList(Int64)')) + (arrow_cast(make_array([1,2], [1,2], [3,4], [3,4], [5,6]), 'FixedSizeList(5, List(Int64))')), + (arrow_cast(make_array([1,2], [3,4], [5,6], [7,8], [9,10]), 'FixedSizeList(5, List(Int64))')), + (arrow_cast(make_array([5,6], [5,6], NULL, NULL, NULL), 'FixedSizeList(5, List(Int64))')) ; statement ok @@ -1103,7 +1156,7 @@ select array_element(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeList(Int64)'), NULL NULL query IT -select array_element(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeList(Int64)'), 7), array_element(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'LargeList(Utf8)'), 11); +select array_element(arrow_cast(make_array(1, 2, 3, 4, 5), 'FixedSizeList(5, Int64)'), 7), array_element(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'FixedSizeList(5, Utf8)'), 11); ---- NULL NULL @@ -1303,6 +1356,14 @@ NULL 43 ## array_pop_back (aliases: `list_pop_back`) +# array_pop_back scalar function with null +#TODO: https://github.com/apache/arrow-datafusion/issues/7142 +# follow clickhouse and duckdb +#query ? +#select array_pop_back(null); +#---- +#NULL + # array_pop_back scalar function #1 query ?? select array_pop_back(make_array(1, 2, 3, 4, 5)), array_pop_back(make_array('h', 'e', 'l', 'l', 'o')); @@ -1314,6 +1375,11 @@ select array_pop_back(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeList(Int64)')) ---- [1, 2, 3, 4] [h, e, l, l] +query ?? +select array_pop_back(arrow_cast(make_array(1, 2, 3, 4, 5), 'FixedSizeList(5, Int64)')), array_pop_back(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'FixedSizeList(5, Utf8)')); +---- +[1, 2, 3, 4] [h, e, l, l] + # array_pop_back scalar function #2 (after array_pop_back, array is empty) query ? select array_pop_back(make_array(1)); @@ -1325,6 +1391,11 @@ select array_pop_back(arrow_cast(make_array(1), 'LargeList(Int64)')); ---- [] +query ? +select array_pop_back(arrow_cast(make_array(1), 'FixedSizeList(1, Int64)')); +---- +[] + # array_pop_back scalar function #3 (array_pop_back the empty array) query ? select array_pop_back(array_pop_back(make_array(1))); @@ -1336,12 +1407,27 @@ select array_pop_back(array_pop_back(arrow_cast(make_array(1), 'LargeList(Int64) ---- [] +query ? +select array_pop_back(array_pop_back(arrow_cast(make_array(1), 'FixedSizeList(1, Int64)'))); +---- +[] + # array_pop_back scalar function #4 (array_pop_back the arrays which have NULL) query ?? select array_pop_back(make_array(1, 2, 3, 4, NULL)), array_pop_back(make_array(NULL, 'e', 'l', NULL, 'o')); ---- [1, 2, 3, 4] [, e, l, ] +query ?? +select array_pop_back(arrow_cast(make_array(1, 2, 3, 4, NULL), 'LargeList(Int64)')), array_pop_back(arrow_cast(make_array(NULL, 'e', 'l', NULL, 'o'), 'LargeList(Utf8)')); +---- +[1, 2, 3, 4] [, e, l, ] + +query ?? +select array_pop_back(arrow_cast(make_array(1, 2, 3, 4, NULL), 'FixedSizeList(5, Int64)')), array_pop_back(arrow_cast(make_array(NULL, 'e', 'l', NULL, 'o'), 'FixedSizeList(5, Utf8)')); +---- +[1, 2, 3, 4] [, e, l, ] + # array_pop_back scalar function #5 (array_pop_back the nested arrays) query ? select array_pop_back(make_array(make_array(1, 2, 3), make_array(2, 9, 1), make_array(7, 8, 9), make_array(1, 2, 3), make_array(1, 7, 4), make_array(4, 5, 6))); @@ -1353,6 +1439,11 @@ select array_pop_back(arrow_cast(make_array(make_array(1, 2, 3), make_array(2, 9 ---- [[1, 2, 3], [2, 9, 1], [7, 8, 9], [1, 2, 3], [1, 7, 4]] +query ? +select array_pop_back(arrow_cast(make_array(make_array(1, 2, 3), make_array(2, 9, 1), make_array(7, 8, 9), make_array(1, 2, 3), make_array(1, 7, 4), make_array(4, 5, 6)), 'FixedSizeList(6, List(Int64))')); +---- +[[1, 2, 3], [2, 9, 1], [7, 8, 9], [1, 2, 3], [1, 7, 4]] + # array_pop_back scalar function #6 (array_pop_back the nested arrays with NULL) query ? select array_pop_back(make_array(make_array(1, 2, 3), make_array(2, 9, 1), make_array(7, 8, 9), make_array(1, 2, 3), make_array(1, 7, 4), NULL)); @@ -1364,6 +1455,11 @@ select array_pop_back(arrow_cast(make_array(make_array(1, 2, 3), make_array(2, 9 ---- [[1, 2, 3], [2, 9, 1], [7, 8, 9], [1, 2, 3], [1, 7, 4]] +query ? +select array_pop_back(arrow_cast(make_array(make_array(1, 2, 3), make_array(2, 9, 1), make_array(7, 8, 9), make_array(1, 2, 3), make_array(1, 7, 4), NULL), 'FixedSizeList(6, List(Int64))')); +---- +[[1, 2, 3], [2, 9, 1], [7, 8, 9], [1, 2, 3], [1, 7, 4]] + # array_pop_back scalar function #7 (array_pop_back the nested arrays with NULL) query ? select array_pop_back(make_array(make_array(1, 2, 3), make_array(2, 9, 1), make_array(7, 8, 9), NULL, make_array(1, 7, 4))); @@ -1375,6 +1471,11 @@ select array_pop_back(arrow_cast(make_array(make_array(1, 2, 3), make_array(2, 9 ---- [[1, 2, 3], [2, 9, 1], [7, 8, 9], ] +query ? +select array_pop_back(arrow_cast(make_array(make_array(1, 2, 3), make_array(2, 9, 1), make_array(7, 8, 9), NULL, make_array(1, 7, 4)), 'FixedSizeList(5, List(Int64))')); +---- +[[1, 2, 3], [2, 9, 1], [7, 8, 9], ] + # array_pop_back scalar function #8 (after array_pop_back, nested array is empty) query ? select array_pop_back(make_array(make_array(1, 2, 3))); @@ -1386,6 +1487,11 @@ select array_pop_back(arrow_cast(make_array(make_array(1, 2, 3)), 'LargeList(Lis ---- [] +query ? +select array_pop_back(arrow_cast(make_array(make_array(1, 2, 3)), 'FixedSizeList(1, List(Int64))')); +---- +[] + # array_pop_back with columns query ? select array_pop_back(column1) from arrayspop; @@ -1407,8 +1513,36 @@ select array_pop_back(arrow_cast(column1, 'LargeList(Int64)')) from arrayspop; [] [, 10, 11] +query ? +select array_pop_back(column1) from large_arrayspop; +---- +[1, 2] +[3, 4, 5] +[6, 7, 8, ] +[, ] +[] +[, 10, 11] + +query ? +select array_pop_back(arrow_cast(column1, 'LargeList(Int64)')) from large_arrayspop; +---- +[1, 2] +[3, 4, 5] +[6, 7, 8, ] +[, ] +[] +[, 10, 11] + ## array_pop_front (aliases: `list_pop_front`) +#TODO:https://github.com/apache/arrow-datafusion/issues/7142 +# array_pop_front scalar function with null +# follow clickhouse and duckdb +#query ? +#select array_pop_front(null); +#---- +#NULL + # array_pop_front scalar function #1 query ?? select array_pop_front(make_array(1, 2, 3, 4, 5)), array_pop_front(make_array('h', 'e', 'l', 'l', 'o')); @@ -1420,6 +1554,11 @@ select array_pop_front(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeList(Int64)') ---- [2, 3, 4, 5] [e, l, l, o] +query ?? +select array_pop_front(arrow_cast(make_array(1, 2, 3, 4, 5), 'FixedSizeList(5, Int64)')), array_pop_front(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'FixedSizeList(5, Utf8)')); +---- +[2, 3, 4, 5] [e, l, l, o] + # array_pop_front scalar function #2 (after array_pop_front, array is empty) query ? select array_pop_front(make_array(1)); @@ -1431,6 +1570,11 @@ select array_pop_front(arrow_cast(make_array(1), 'LargeList(Int64)')); ---- [] +query ? +select array_pop_front(arrow_cast(make_array(1), 'FixedSizeList(1, Int64)')); +---- +[] + # array_pop_front scalar function #3 (array_pop_front the empty array) query ? select array_pop_front(array_pop_front(make_array(1))); @@ -1442,6 +1586,11 @@ select array_pop_front(array_pop_front(arrow_cast(make_array(1), 'LargeList(Int6 ---- [] +query ? +select array_pop_front(array_pop_front(arrow_cast(make_array(1), 'FixedSizeList(1, Int64)'))); +---- +[] + # array_pop_front scalar function #5 (array_pop_front the nested arrays) query ? select array_pop_front(make_array(make_array(1, 2, 3), make_array(2, 9, 1), make_array(7, 8, 9), make_array(1, 2, 3), make_array(1, 7, 4), make_array(4, 5, 6))); @@ -1453,6 +1602,11 @@ select array_pop_front(arrow_cast(make_array(make_array(1, 2, 3), make_array(2, ---- [[2, 9, 1], [7, 8, 9], [1, 2, 3], [1, 7, 4], [4, 5, 6]] +query ? +select array_pop_front(arrow_cast(make_array(make_array(1, 2, 3), make_array(2, 9, 1), make_array(7, 8, 9), make_array(1, 2, 3), make_array(1, 7, 4), make_array(4, 5, 6)), 'FixedSizeList(6, List(Int64))')); +---- +[[2, 9, 1], [7, 8, 9], [1, 2, 3], [1, 7, 4], [4, 5, 6]] + # array_pop_front scalar function #6 (array_pop_front the nested arrays with NULL) query ? select array_pop_front(make_array(NULL, make_array(1, 2, 3), make_array(2, 9, 1), make_array(7, 8, 9), make_array(1, 2, 3), make_array(1, 7, 4))); @@ -1464,6 +1618,11 @@ select array_pop_front(arrow_cast(make_array(NULL, make_array(1, 2, 3), make_arr ---- [[1, 2, 3], [2, 9, 1], [7, 8, 9], [1, 2, 3], [1, 7, 4]] +query ? +select array_pop_front(arrow_cast(make_array(NULL, make_array(1, 2, 3), make_array(2, 9, 1), make_array(7, 8, 9), make_array(1, 2, 3), make_array(1, 7, 4)), 'FixedSizeList(6, List(Int64))')); +---- +[[1, 2, 3], [2, 9, 1], [7, 8, 9], [1, 2, 3], [1, 7, 4]] + # array_pop_front scalar function #8 (after array_pop_front, nested array is empty) query ? select array_pop_front(make_array(make_array(1, 2, 3))); @@ -1475,6 +1634,11 @@ select array_pop_front(arrow_cast(make_array(make_array(1, 2, 3)), 'LargeList(Li ---- [] +query ? +select array_pop_front(arrow_cast(make_array(make_array(1, 2, 3)), 'FixedSizeList(1, List(Int64))')); +---- +[] + ## array_slice (aliases: list_slice) # array_slice scalar function #1 (with positive indexes) @@ -1874,6 +2038,14 @@ select ---- [4] [] [1, , 3, 4] [, , 1] +query ?? +select + array_append(arrow_cast(make_array(1, null, 3), 'FixedSizeList(3, Int64)'), 4), + array_append(arrow_cast(make_array(null, null), 'FixedSizeList(2, Int64)'), 1) +; +---- +[1, , 3, 4] [, , 1] + # test invalid (non-null) query error select array_append(1, 2); @@ -1898,6 +2070,13 @@ select ---- [[1, , 3], []] [[1, , 3], ] +query ?? +select + array_append(arrow_cast(make_array(make_array(1, null, 3)), 'FixedSizeList(1, List(Int64))'), [null]), + array_append(arrow_cast(make_array(make_array(1, null, 3)), 'FixedSizeList(1, List(Int64))'), null); +---- +[[1, , 3], []] [[1, , 3], ] + # array_append scalar function #3 query ??? select array_append(make_array(1, 2, 3), 4), array_append(make_array(1.0, 2.0, 3.0), 4.0), array_append(make_array('h', 'e', 'l', 'l'), 'o'); @@ -1905,7 +2084,12 @@ select array_append(make_array(1, 2, 3), 4), array_append(make_array(1.0, 2.0, 3 [1, 2, 3, 4] [1.0, 2.0, 3.0, 4.0] [h, e, l, l, o] query ??? -select array_append(arrow_cast(make_array(1, 2, 3), 'LargeList(Int64)'), 4), array_append(arrow_cast(make_array(1.0, 2.0, 3.0), 'LargeList(Float64)'), 4.0), array_append(make_array('h', 'e', 'l', 'l'), 'o'); +select array_append(arrow_cast(make_array(1, 2, 3), 'LargeList(Int64)'), 4), array_append(arrow_cast(make_array(1.0, 2.0, 3.0), 'LargeList(Float64)'), 4.0), array_append(arrow_cast(make_array('h', 'e', 'l', 'l'), 'LargeList(Utf8)'), 'o'); +---- +[1, 2, 3, 4] [1.0, 2.0, 3.0, 4.0] [h, e, l, l, o] + +query ??? +select array_append(arrow_cast(make_array(1, 2, 3), 'FixedSizeList(3, Int64)'), 4), array_append(arrow_cast(make_array(1.0, 2.0, 3.0), 'FixedSizeList(3, Float64)'), 4.0), array_append(arrow_cast(make_array('h', 'e', 'l', 'l'), 'FixedSizeList(4, Utf8)'), 'o'); ---- [1, 2, 3, 4] [1.0, 2.0, 3.0, 4.0] [h, e, l, l, o] @@ -1920,6 +2104,11 @@ select array_append(arrow_cast(make_array([1], [2], [3]), 'LargeList(LargeList(I ---- [[1], [2], [3], [4]] [[1.0], [2.0], [3.0], [4.0]] [[h], [e], [l], [l], [o]] +query ??? +select array_append(arrow_cast(make_array([1], [2], [3]), 'FixedSizeList(3, List(Int64))'), [4]), array_append(arrow_cast(make_array([1.0], [2.0], [3.0]), 'FixedSizeList(3, List(Float64))'), [4.0]), array_append(arrow_cast(make_array(['h'], ['e'], ['l'], ['l']), 'FixedSizeList(4, List(Utf8))'), ['o']); +---- +[[1], [2], [3], [4]] [[1.0], [2.0], [3.0], [4.0]] [[h], [e], [l], [l], [o]] + # list_append scalar function #5 (function alias `array_append`) query ??? select list_append(make_array(1, 2, 3), 4), list_append(make_array(1.0, 2.0, 3.0), 4.0), list_append(make_array('h', 'e', 'l', 'l'), 'o'); @@ -1978,6 +2167,18 @@ select array_append(column1, column2) from large_arrays_values; [51, 52, , 54, 55, 56, 57, 58, 59, 60, 55] [61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 66] +query ? +select array_append(column1, column2) from fixed_arrays_values; +---- +[, 2, 3, 4, 5, 6, 7, 8, 9, 10, 1] +[11, 12, 13, 14, 15, 16, 17, 18, , 20, 12] +[21, 22, 23, , 25, 26, 27, 28, 29, 30, 23] +[31, 32, 33, 34, 35, , 37, 38, 39, 40, 34] +[, , , , , , , , , , 44] +[41, 42, 43, 44, 45, 46, 47, 48, 49, 50, ] +[51, 52, , 54, 55, 56, 57, 58, 59, 60, 55] +[61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 66] + # array_append with columns #2 (element is list) query ? select array_append(column1, column2) from nested_arrays; @@ -1991,6 +2192,12 @@ select array_append(column1, column2) from large_nested_arrays; [[1, 2, 3], [2, 9, 1], [7, 8, 9], [1, 2, 3], [1, 7, 4], [4, 5, 6], [7, 8, 9]] [[4, 5, 6], [10, 11, 12], [4, 9, 8], [7, 8, 9], [10, 11, 12], [1, 8, 7], [10, 11, 12]] +query ? +select array_append(column1, column2) from fixed_size_nested_arrays; +---- +[[1, 2, 3], [2, 9, 1], [7, 8, 9], [1, 2, 3], [1, 7, 4], [4, 5, 6], [7, 8, 9]] +[[4, 5, 6], [10, 11, 12], [4, 9, 8], [7, 8, 9], [10, 11, 12], [1, 8, 7], [10, 11, 12]] + # array_append with columns and scalars #1 query ?? select array_append(column2, 100.1), array_append(column3, '.') from arrays; @@ -2014,6 +2221,17 @@ select array_append(column2, 100.1), array_append(column3, '.') from large_array [100.1] [,, .] [16.6, 17.7, 18.8, 100.1] [.] +query ?? +select array_append(column2, 100.1), array_append(column3, '.') from fixed_size_arrays; +---- +[1.1, 2.2, 3.3, 100.1] [L, o, r, e, m, .] +[, 5.5, 6.6, 100.1] [i, p, , u, m, .] +[7.7, 8.8, 9.9, 100.1] [d, , l, o, r, .] +[10.1, , 12.2, 100.1] [s, i, t, a, b, .] +[13.3, 14.4, 15.5, 100.1] [a, m, e, t, x, .] +[, , , 100.1] [,, a, b, c, d, .] +[16.6, 17.7, 18.8, 100.1] [, , , , , .] + # array_append with columns and scalars #2 query ?? select array_append(column1, make_array(1, 11, 111)), array_append(make_array(make_array(1, 2, 3), make_array(11, 12, 13)), column2) from nested_arrays; @@ -2027,6 +2245,12 @@ select array_append(column1, arrow_cast(make_array(1, 11, 111), 'LargeList(Int64 [[1, 2, 3], [2, 9, 1], [7, 8, 9], [1, 2, 3], [1, 7, 4], [4, 5, 6], [1, 11, 111]] [[1, 2, 3], [11, 12, 13], [7, 8, 9]] [[4, 5, 6], [10, 11, 12], [4, 9, 8], [7, 8, 9], [10, 11, 12], [1, 8, 7], [1, 11, 111]] [[1, 2, 3], [11, 12, 13], [10, 11, 12]] +query ?? +select array_append(column1, arrow_cast(make_array(1, 11, 111), 'FixedSizeList(3, Int64)')), array_append(arrow_cast(make_array(make_array(1, 2, 3), make_array(11, 12, 13)), 'FixedSizeList(2, List(Int64))'), column2) from fixed_size_nested_arrays; +---- +[[1, 2, 3], [2, 9, 1], [7, 8, 9], [1, 2, 3], [1, 7, 4], [4, 5, 6], [1, 11, 111]] [[1, 2, 3], [11, 12, 13], [7, 8, 9]] +[[4, 5, 6], [10, 11, 12], [4, 9, 8], [7, 8, 9], [10, 11, 12], [1, 8, 7], [1, 11, 111]] [[1, 2, 3], [11, 12, 13], [10, 11, 12]] + ## array_prepend (aliases: `list_prepend`, `array_push_front`, `list_push_front`) # array_prepend with NULLs @@ -2093,6 +2317,11 @@ select array_prepend(1, arrow_cast(make_array(2, 3, 4), 'LargeList(Int64)')), ar ---- [1, 2, 3, 4] [1.0, 2.0, 3.0, 4.0] [h, e, l, l, o] +query ??? +select array_prepend(1, arrow_cast([2, 3, 4], 'FixedSizeList(3, Int64)')), array_prepend(1.0, arrow_cast([2.0, 3.0, 4.0], 'FixedSizeList(3, Float64)')), array_prepend('h', arrow_cast(['e', 'l', 'l', 'o'], 'FixedSizeList(4, Utf8)')); +---- +[1, 2, 3, 4] [1.0, 2.0, 3.0, 4.0] [h, e, l, l, o] + # array_prepend scalar function #4 (element is list) query ??? select array_prepend(make_array(1), make_array(make_array(2), make_array(3), make_array(4))), array_prepend(make_array(1.0), make_array([2.0], [3.0], [4.0])), array_prepend(make_array('h'), make_array(['e'], ['l'], ['l'], ['o'])); @@ -2106,6 +2335,13 @@ select array_prepend(arrow_cast(make_array(1), 'LargeList(Int64)'), arrow_cast(m ---- [[1], [2], [3], [4]] [[1.0], [2.0], [3.0], [4.0]] [[h], [e], [l], [l], [o]] +query ??? +select array_prepend(arrow_cast([1], 'FixedSizeList(1, Int64)'), arrow_cast([[1], [2], [3]], 'FixedSizeList(3, List(Int64))')), + array_prepend(arrow_cast([1.0], 'FixedSizeList(1, Float64)'), arrow_cast([[2.0], [3.0], [4.0]], 'FixedSizeList(3, List(Float64))')), + array_prepend(arrow_cast(['h'], 'FixedSizeList(1, Utf8)'), arrow_cast([['e'], ['l'], ['l'], ['o']], 'FixedSizeList(4, List(Utf8))')); +---- +[[1], [1], [2], [3]] [[1.0], [2.0], [3.0], [4.0]] [[h], [e], [l], [l], [o]] + # list_prepend scalar function #5 (function alias `array_prepend`) query ??? select list_prepend(1, make_array(2, 3, 4)), list_prepend(1.0, make_array(2.0, 3.0, 4.0)), list_prepend('h', make_array('e', 'l', 'l', 'o')); @@ -2164,6 +2400,18 @@ select array_prepend(column2, column1) from large_arrays_values; [55, 51, 52, , 54, 55, 56, 57, 58, 59, 60] [66, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70] +query ? +select array_prepend(column2, column1) from fixed_arrays_values; +---- +[1, , 2, 3, 4, 5, 6, 7, 8, 9, 10] +[12, 11, 12, 13, 14, 15, 16, 17, 18, , 20] +[23, 21, 22, 23, , 25, 26, 27, 28, 29, 30] +[34, 31, 32, 33, 34, 35, , 37, 38, 39, 40] +[44, , , , , , , , , , ] +[, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50] +[55, 51, 52, , 54, 55, 56, 57, 58, 59, 60] +[66, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70] + # array_prepend with columns #2 (element is list) query ? select array_prepend(column2, column1) from nested_arrays; @@ -2177,6 +2425,12 @@ select array_prepend(column2, column1) from large_nested_arrays; [[7, 8, 9], [1, 2, 3], [2, 9, 1], [7, 8, 9], [1, 2, 3], [1, 7, 4], [4, 5, 6]] [[10, 11, 12], [4, 5, 6], [10, 11, 12], [4, 9, 8], [7, 8, 9], [10, 11, 12], [1, 8, 7]] +query ? +select array_prepend(column2, column1) from fixed_size_nested_arrays; +---- +[[7, 8, 9], [1, 2, 3], [2, 9, 1], [7, 8, 9], [1, 2, 3], [1, 7, 4], [4, 5, 6]] +[[10, 11, 12], [4, 5, 6], [10, 11, 12], [4, 9, 8], [7, 8, 9], [10, 11, 12], [1, 8, 7]] + # array_prepend with columns and scalars #1 query ?? select array_prepend(100.1, column2), array_prepend('.', column3) from arrays; @@ -2200,6 +2454,17 @@ select array_prepend(100.1, column2), array_prepend('.', column3) from large_arr [100.1] [., ,] [100.1, 16.6, 17.7, 18.8] [.] +query ?? +select array_prepend(100.1, column2), array_prepend('.', column3) from fixed_size_arrays; +---- +[100.1, 1.1, 2.2, 3.3] [., L, o, r, e, m] +[100.1, , 5.5, 6.6] [., i, p, , u, m] +[100.1, 7.7, 8.8, 9.9] [., d, , l, o, r] +[100.1, 10.1, , 12.2] [., s, i, t, a, b] +[100.1, 13.3, 14.4, 15.5] [., a, m, e, t, x] +[100.1, , , ] [., ,, a, b, c, d] +[100.1, 16.6, 17.7, 18.8] [., , , , , ] + # array_prepend with columns and scalars #2 (element is list) query ?? select array_prepend(make_array(1, 11, 111), column1), array_prepend(column2, make_array(make_array(1, 2, 3), make_array(11, 12, 13))) from nested_arrays; @@ -2213,6 +2478,12 @@ select array_prepend(arrow_cast(make_array(1, 11, 111), 'LargeList(Int64)'), col [[1, 11, 111], [1, 2, 3], [2, 9, 1], [7, 8, 9], [1, 2, 3], [1, 7, 4], [4, 5, 6]] [[7, 8, 9], [1, 2, 3], [11, 12, 13]] [[1, 11, 111], [4, 5, 6], [10, 11, 12], [4, 9, 8], [7, 8, 9], [10, 11, 12], [1, 8, 7]] [[10, 11, 12], [1, 2, 3], [11, 12, 13]] +query ?? +select array_prepend(arrow_cast(make_array(1, 11, 111), 'FixedSizeList(3, Int64)'), column1), array_prepend(column2, arrow_cast(make_array(make_array(1, 2, 3), make_array(11, 12, 13)), 'FixedSizeList(2, List(Int64))')) from fixed_size_nested_arrays; +---- +[[1, 11, 111], [1, 2, 3], [2, 9, 1], [7, 8, 9], [1, 2, 3], [1, 7, 4], [4, 5, 6]] [[7, 8, 9], [1, 2, 3], [11, 12, 13]] +[[1, 11, 111], [4, 5, 6], [10, 11, 12], [4, 9, 8], [7, 8, 9], [10, 11, 12], [1, 8, 7]] [[10, 11, 12], [1, 2, 3], [11, 12, 13]] + ## array_repeat (aliases: `list_repeat`) # array_repeat scalar function #1 @@ -2723,12 +2994,18 @@ NULL 1 NULL ## array_positions (aliases: `list_positions`) -# array_position with NULL (follow PostgreSQL) query ? select array_positions([1, 2, 3, 4, 5], null); ---- [] +#TODO: https://github.com/apache/arrow-datafusion/issues/7142 +# array_positions with NULL (follow PostgreSQL) +#query ? +#select array_positions(null, 1); +#---- +#NULL + # array_positions scalar function #1 query ??? select array_positions(['h', 'e', 'l', 'l', 'o'], 'l'), array_positions([1, 2, 3, 4, 5], 5), array_positions([1, 1, 1], 1); @@ -3748,6 +4025,11 @@ select cardinality(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeList(Int64)')), c ---- 5 3 5 +query III +select cardinality(arrow_cast([1, 2, 3, 4, 5], 'FixedSizeList(5, Int64)')), cardinality(arrow_cast([1, 3, 5], 'FixedSizeList(3, Int64)')), cardinality(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'FixedSizeList(5, Utf8)')); +---- +5 3 5 + # cardinality scalar function #2 query II select cardinality(make_array([1, 2], [3, 4], [5, 6])), cardinality(array_repeat(array_repeat(array_repeat(3, 3), 2), 3)); @@ -3759,6 +4041,11 @@ select cardinality(arrow_cast(make_array([1, 2], [3, 4], [5, 6]), 'LargeList(Lis ---- 6 +query I +select cardinality(arrow_cast([[1, 2], [3, 4], [5, 6]], 'FixedSizeList(3, List(Int64))')); +---- +6 + # cardinality scalar function #3 query II select cardinality(make_array()), cardinality(make_array(make_array())) @@ -3770,6 +4057,13 @@ select cardinality(arrow_cast(make_array(), 'LargeList(Null)')), cardinality(arr ---- NULL 0 +#TODO +#https://github.com/apache/arrow-datafusion/issues/9158 +#query II +#select cardinality(arrow_cast(make_array(), 'FixedSizeList(1, Null)')), cardinality(arrow_cast(make_array(make_array()), 'FixedSizeList(1, List(Null))')) +#---- +#NULL 0 + # cardinality with columns query III select cardinality(column1), cardinality(column2), cardinality(column3) from arrays; @@ -3793,6 +4087,17 @@ NULL 3 4 4 NULL 1 4 3 NULL +query III +select cardinality(column1), cardinality(column2), cardinality(column3) from fixed_size_arrays; +---- +4 3 5 +4 3 5 +4 3 5 +4 3 5 +NULL 3 5 +4 NULL 5 +4 3 NULL + ## array_remove (aliases: `list_remove`) # array_remove scalar function #1 @@ -3801,6 +4106,13 @@ select array_remove(make_array(1, 2, 2, 1, 1), 2), array_remove(make_array(1.0, ---- [1, 2, 1, 1] [2.0, 2.0, 1.0, 1.0] [h, e, l, o] +query ??? +select array_remove(arrow_cast(make_array(1, 2, 2, 1, 1), 'LargeList(Int64)'), 2), + array_remove(arrow_cast(make_array(1.0, 2.0, 2.0, 1.0, 1.0), 'LargeList(Float64)'), 1.0), + array_remove(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'LargeList(Utf8)'), 'l'); +---- +[1, 2, 1, 1] [2.0, 2.0, 1.0, 1.0] [h, e, l, o] + query ??? select array_remove(arrow_cast(make_array(1, 2, 2, 1, 1), 'FixedSizeList(5, Int64)'), 2), array_remove(arrow_cast(make_array(1.0, 2.0, 2.0, 1.0, 1.0), 'FixedSizeList(5, Float64)'), 1.0), @@ -3816,6 +4128,14 @@ select ---- [1, , 3] [, 2.2, 3.3] [, bc] +query ??? +select + array_remove(arrow_cast(make_array(1, null, 2, 3), 'LargeList(Int64)'), 2), + array_remove(arrow_cast(make_array(1.1, null, 2.2, 3.3), 'LargeList(Float64)'), 1.1), + array_remove(arrow_cast(make_array('a', null, 'bc'), 'LargeList(Utf8)'), 'a'); +---- +[1, , 3] [, 2.2, 3.3] [, bc] + query ??? select array_remove(arrow_cast(make_array(1, null, 2, 3), 'FixedSizeList(4, Int64)'), 2), @@ -3824,6 +4144,14 @@ select ---- [1, , 3] [, 2.2, 3.3] [, bc] +#TODO: https://github.com/apache/arrow-datafusion/issues/7142 +# follow PostgreSQL behavior +#query ? +#select +# array_remove(NULL, 1) +#---- +#NULL + query ?? select array_remove(make_array(1, null, 2), null), @@ -3831,12 +4159,32 @@ select ---- [1, 2] [1, 2, ] +query ?? +select + array_remove(arrow_cast(make_array(1, null, 2), 'LargeList(Int64)'), null), + array_remove(arrow_cast(make_array(1, null, 2, null), 'LargeList(Int64)'), null); +---- +[1, 2] [1, 2, ] + +query ?? +select + array_remove(arrow_cast(make_array(1, null, 2), 'FixedSizeList(3, Int64)'), null), + array_remove(arrow_cast(make_array(1, null, 2, null), 'FixedSizeList(4, Int64)'), null); +---- +[1, 2] [1, 2, ] + # array_remove scalar function #2 (element is list) query ?? select array_remove(make_array([1, 2, 3], [4, 5, 6], [5, 5, 5], [4, 5, 6], [7, 8, 9]), [4, 5, 6]), array_remove(make_array([1, 3, 2], [2, 3, 4], [2, 3, 4], [5, 3, 1], [1, 3, 2]), [2, 3, 4]); ---- [[1, 2, 3], [5, 5, 5], [4, 5, 6], [7, 8, 9]] [[1, 3, 2], [2, 3, 4], [5, 3, 1], [1, 3, 2]] +query ?? +select array_remove(arrow_cast(make_array([1, 2, 3], [4, 5, 6], [5, 5, 5], [4, 5, 6], [7, 8, 9]), 'LargeList(List(Int64))'), [4, 5, 6]), + array_remove(arrow_cast(make_array([1, 3, 2], [2, 3, 4], [2, 3, 4], [5, 3, 1], [1, 3, 2]), 'LargeList(List(Int64))'), [2, 3, 4]); +---- +[[1, 2, 3], [5, 5, 5], [4, 5, 6], [7, 8, 9]] [[1, 3, 2], [2, 3, 4], [5, 3, 1], [1, 3, 2]] + query ?? select array_remove(arrow_cast(make_array([1, 2, 3], [4, 5, 6], [5, 5, 5], [4, 5, 6], [7, 8, 9]), 'FixedSizeList(5, List(Int64))'), [4, 5, 6]), array_remove(arrow_cast(make_array([1, 3, 2], [2, 3, 4], [2, 3, 4], [5, 3, 1], [1, 3, 2]), 'FixedSizeList(5, List(Int64))'), [2, 3, 4]); @@ -3864,6 +4212,14 @@ select array_remove(column1, column2) from arrays_with_repeating_elements; [7, 7, 8, 7, 9, 7, 8, 7, 7] [11, 12, 10, 11, 12, 10, 11, 12, 10] +query ? +select array_remove(column1, column2) from large_arrays_with_repeating_elements; +---- +[1, 1, 3, 2, 2, 1, 3, 2, 3] +[4, 5, 5, 6, 5, 5, 5, 4, 4] +[7, 7, 8, 7, 9, 7, 8, 7, 7] +[11, 12, 10, 11, 12, 10, 11, 12, 10] + query ? select array_remove(column1, column2) from fixed_arrays_with_repeating_elements; ---- @@ -3881,6 +4237,14 @@ select array_remove(column1, column2) from nested_arrays_with_repeating_elements [[19, 20, 21], [19, 20, 21], [22, 23, 24], [19, 20, 21], [25, 26, 27], [19, 20, 21], [22, 23, 24], [19, 20, 21], [19, 20, 21]] [[31, 32, 33], [34, 35, 36], [28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30]] +query ? +select array_remove(column1, column2) from large_nested_arrays_with_repeating_elements; +---- +[[1, 2, 3], [1, 2, 3], [7, 8, 9], [4, 5, 6], [4, 5, 6], [1, 2, 3], [7, 8, 9], [4, 5, 6], [7, 8, 9]] +[[10, 11, 12], [13, 14, 15], [13, 14, 15], [16, 17, 18], [13, 14, 15], [13, 14, 15], [13, 14, 15], [10, 11, 12], [10, 11, 12]] +[[19, 20, 21], [19, 20, 21], [22, 23, 24], [19, 20, 21], [25, 26, 27], [19, 20, 21], [22, 23, 24], [19, 20, 21], [19, 20, 21]] +[[31, 32, 33], [34, 35, 36], [28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30]] + query ? select array_remove(column1, column2) from fixed_size_nested_arrays_with_repeating_elements; ---- @@ -3898,6 +4262,14 @@ select array_remove(make_array(1, 2, 2, 4, 5, 4, 4, 7, 7, 10, 7, 8), column2), a [1, 2, 2, 4, 5, 4, 4, 7, 10, 7, 8] [7, 7, 7, 8, 7, 9, 7, 8, 7, 7] [1, 2, 2, 4, 5, 4, 4, 7, 7, 7, 8] [10, 11, 12, 10, 11, 12, 10, 11, 12, 10] +query ?? +select array_remove(make_array(1, 2, 2, 4, 5, 4, 4, 7, 7, 10, 7, 8), column2), array_remove(column1, 1) from large_arrays_with_repeating_elements; +---- +[1, 2, 4, 5, 4, 4, 7, 7, 10, 7, 8] [2, 1, 3, 2, 2, 1, 3, 2, 3] +[1, 2, 2, 5, 4, 4, 7, 7, 10, 7, 8] [4, 4, 5, 5, 6, 5, 5, 5, 4, 4] +[1, 2, 2, 4, 5, 4, 4, 7, 10, 7, 8] [7, 7, 7, 8, 7, 9, 7, 8, 7, 7] +[1, 2, 2, 4, 5, 4, 4, 7, 7, 7, 8] [10, 11, 12, 10, 11, 12, 10, 11, 12, 10] + query ?? select array_remove(make_array(1, 2, 2, 4, 5, 4, 4, 7, 7, 10, 7, 8), column2), array_remove(column1, 1) from fixed_arrays_with_repeating_elements; ---- @@ -3916,6 +4288,15 @@ select array_remove(make_array([1, 2, 3], [4, 5, 6], [4, 5, 6], [10, 11, 12], [1 [[1, 2, 3], [4, 5, 6], [4, 5, 6], [10, 11, 12], [13, 14, 15], [10, 11, 12], [10, 11, 12], [19, 20, 21], [28, 29, 30], [19, 20, 21], [22, 23, 24]] [[19, 20, 21], [19, 20, 21], [19, 20, 21], [22, 23, 24], [19, 20, 21], [25, 26, 27], [19, 20, 21], [22, 23, 24], [19, 20, 21], [19, 20, 21]] [[1, 2, 3], [4, 5, 6], [4, 5, 6], [10, 11, 12], [13, 14, 15], [10, 11, 12], [10, 11, 12], [19, 20, 21], [19, 20, 21], [19, 20, 21], [22, 23, 24]] [[28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30]] +query ?? +select array_remove(make_array([1, 2, 3], [4, 5, 6], [4, 5, 6], [10, 11, 12], [13, 14, 15], [10, 11, 12], [10, 11, 12], [19, 20, 21], [19, 20, 21], [28, 29, 30], [19, 20, 21], [22, 23, 24]), column2), + array_remove(column1, make_array(1, 2, 3)) from large_nested_arrays_with_repeating_elements; +---- +[[1, 2, 3], [4, 5, 6], [10, 11, 12], [13, 14, 15], [10, 11, 12], [10, 11, 12], [19, 20, 21], [19, 20, 21], [28, 29, 30], [19, 20, 21], [22, 23, 24]] [[4, 5, 6], [1, 2, 3], [7, 8, 9], [4, 5, 6], [4, 5, 6], [1, 2, 3], [7, 8, 9], [4, 5, 6], [7, 8, 9]] +[[1, 2, 3], [4, 5, 6], [4, 5, 6], [13, 14, 15], [10, 11, 12], [10, 11, 12], [19, 20, 21], [19, 20, 21], [28, 29, 30], [19, 20, 21], [22, 23, 24]] [[10, 11, 12], [10, 11, 12], [13, 14, 15], [13, 14, 15], [16, 17, 18], [13, 14, 15], [13, 14, 15], [13, 14, 15], [10, 11, 12], [10, 11, 12]] +[[1, 2, 3], [4, 5, 6], [4, 5, 6], [10, 11, 12], [13, 14, 15], [10, 11, 12], [10, 11, 12], [19, 20, 21], [28, 29, 30], [19, 20, 21], [22, 23, 24]] [[19, 20, 21], [19, 20, 21], [19, 20, 21], [22, 23, 24], [19, 20, 21], [25, 26, 27], [19, 20, 21], [22, 23, 24], [19, 20, 21], [19, 20, 21]] +[[1, 2, 3], [4, 5, 6], [4, 5, 6], [10, 11, 12], [13, 14, 15], [10, 11, 12], [10, 11, 12], [19, 20, 21], [19, 20, 21], [19, 20, 21], [22, 23, 24]] [[28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30]] + query ?? select array_remove(make_array([1, 2, 3], [4, 5, 6], [4, 5, 6], [10, 11, 12], [13, 14, 15], [10, 11, 12], [10, 11, 12], [19, 20, 21], [19, 20, 21], [28, 29, 30], [19, 20, 21], [22, 23, 24]), column2), array_remove(column1, make_array(1, 2, 3)) from fixed_size_nested_arrays_with_repeating_elements; @@ -3983,7 +4364,13 @@ select array_remove_n(make_array([1, 2, 3], [4, 5, 6], [4, 5, 6], [10, 11, 12], ## array_remove_all (aliases: `list_removes`) +#TODO: https://github.com/apache/arrow-datafusion/issues/7142 # array_remove_all with NULL elements +#query ? +#select array_remove_all(NULL, 1); +#---- +#NULL + query ? select array_remove_all(make_array(1, 2, 2, 1, 1), NULL); ---- @@ -4217,7 +4604,7 @@ NULL 10 ## array_dims (aliases: `list_dims`) # array dims error -query error Execution error: array_dims does not support type 'Int64' +query error DataFusion error: Error during planning: No function matches the given name and argument types 'array_dims\(Int64\)'. You might need to add explicit type casts.\n\tCandidate functions:\n\tarray_dims\(array\) select array_dims(1); # array_dims scalar function @@ -4231,12 +4618,27 @@ select array_dims(arrow_cast(make_array(1, 2, 3), 'LargeList(Int64)')), array_di ---- [3] [2, 2] [1, 1, 1, 2, 1] +query ??? +select array_dims(arrow_cast(make_array(1, 2, 3), 'FixedSizeList(3, Int64)')), array_dims(arrow_cast(make_array([1, 2], [3, 4]), 'FixedSizeList(2, List(Int64))')), array_dims(arrow_cast(make_array([[[[1], [2]]]]), 'FixedSizeList(1, List(List(List(List(Int64)))))')); +---- +[3] [2, 2] [1, 1, 1, 2, 1] + # array_dims scalar function #2 query ?? select array_dims(array_repeat(array_repeat(array_repeat(2, 3), 2), 1)), array_dims(array_repeat(array_repeat(array_repeat(3, 4), 5), 2)); ---- [1, 2, 3] [2, 5, 4] +query ?? +select array_dims(arrow_cast(array_repeat(array_repeat(array_repeat(2, 3), 2), 1), 'LargeList(List(List(Int64)))')), array_dims(arrow_cast(array_repeat(array_repeat(array_repeat(3, 4), 5), 2), 'LargeList(List(List(Int64)))')); +---- +[1, 2, 3] [2, 5, 4] + +query ?? +select array_dims(arrow_cast(array_repeat(array_repeat(array_repeat(2, 3), 2), 1), 'FixedSizeList(1, List(List(Int64)))')), array_dims(arrow_cast(array_repeat(array_repeat(array_repeat(3, 4), 5), 2), 'FixedSizeList(2, List(List(Int64)))')); +---- +[1, 2, 3] [2, 5, 4] + # array_dims scalar function #3 query ?? select array_dims(make_array()), array_dims(make_array(make_array())) @@ -4259,6 +4661,11 @@ select list_dims(arrow_cast(make_array(1, 2, 3), 'LargeList(Int64)')), list_dims ---- [3] [2, 2] [1, 1, 1, 2, 1] +query ??? +select list_dims(arrow_cast(make_array(1, 2, 3), 'FixedSizeList(3, Int64)')), list_dims(arrow_cast(make_array([1, 2], [3, 4]), 'FixedSizeList(2, List(Int64))')), list_dims(arrow_cast(make_array([[[[1], [2]]]]), 'FixedSizeList(1, List(List(List(List(Int64)))))')); +---- +[3] [2, 2] [1, 1, 1, 2, 1] + # array_dims with columns query ??? select array_dims(column1), array_dims(column2), array_dims(column3) from arrays; @@ -4282,14 +4689,22 @@ NULL [3] [4] [2, 2] NULL [1] [2, 2] [3] NULL +query ??? +select array_dims(column1), array_dims(column2), array_dims(column3) from fixed_size_arrays; +---- +[2, 2] [3] [5] +[2, 2] [3] [5] +[2, 2] [3] [5] +[2, 2] [3] [5] +NULL [3] [5] +[2, 2] NULL [5] +[2, 2] [3] NULL + ## array_ndims (aliases: `list_ndims`) # array_ndims scalar function #1 -query error -select array_ndims(1); - #follow PostgreSQL query error select @@ -4306,6 +4721,7 @@ CREATE TABLE array_ndims_table AS VALUES ([1], [1, 2, 3], [[7]], [[[[[10]]]]]), ([2], [4, 5], [[8]], [[[[[10]]]]]), + (NUll, [6, 7], [[9]], [[[[[10]]]]]), ([3], [6], [[9]], [[[[[10]]]]]) ; @@ -4318,6 +4734,15 @@ AS SELECT arrow_cast(column4, 'LargeList(List(List(List(List(Int64)))))') as column4 FROM array_ndims_table; +statement ok +CREATE TABLE fixed_array_ndims_table +AS VALUES + (arrow_cast([1], 'FixedSizeList(1, Int64)'), arrow_cast([1, 2, 3], 'FixedSizeList(3, Int64)'), arrow_cast([[7]], 'FixedSizeList(1, List(Int64))'), arrow_cast([[[[[10]]]]], 'FixedSizeList(1, List(List(List(List(Int64)))))')), + (arrow_cast([2], 'FixedSizeList(1, Int64)'), arrow_cast([4, 5, 6], 'FixedSizeList(3, Int64)'), arrow_cast([[8]], 'FixedSizeList(1, List(Int64))'), arrow_cast([[[[[10]]]]], 'FixedSizeList(1, List(List(List(List(Int64)))))')), + (null, arrow_cast([6, 7, 8], 'FixedSizeList(3, Int64)'), arrow_cast([[9]], 'FixedSizeList(1, List(Int64))'), arrow_cast([[[[[10]]]]], 'FixedSizeList(1, List(List(List(List(Int64)))))')), + (arrow_cast([3], 'FixedSizeList(1, Int64)'), arrow_cast([6, 7, 8], 'FixedSizeList(3, Int64)'), arrow_cast([[9]], 'FixedSizeList(1, List(Int64))'), arrow_cast([[[[[10]]]]], 'FixedSizeList(1, List(List(List(List(Int64)))))')) +; + query IIII select array_ndims(column1), @@ -4328,6 +4753,7 @@ from array_ndims_table; ---- 1 1 2 5 1 1 2 5 +NULL 1 2 5 1 1 2 5 query IIII @@ -4340,8 +4766,24 @@ from large_array_ndims_table; ---- 1 1 2 5 1 1 2 5 +NULL 1 2 5 +1 1 2 5 + +query IIII +select + array_ndims(column1), + array_ndims(column2), + array_ndims(column3), + array_ndims(column4) +from fixed_array_ndims_table; +---- +1 1 2 5 +1 1 2 5 +NULL 1 2 5 1 1 2 5 + + statement ok drop table array_ndims_table; @@ -4794,10 +5236,11 @@ true false true false false false true true false false true false true ## array_distinct -query ? -select array_distinct(null); ----- -NULL +#TODO: https://github.com/apache/arrow-datafusion/issues/7142 +#query ? +#select array_distinct(null); +#---- +#NULL query ? select array_distinct([]); @@ -4841,6 +5284,30 @@ from array_distinct_table_1D_large; [1, 2, 3, 4, 5] [3, 5] +query ? +select array_distinct(column1) +from array_distinct_table_1D_fixed; +---- +[1, 2, 3] +[1, 2, 3, 4, 5] +[3, 5] + +query ? +select array_distinct(column1) +from array_distinct_table_1D_UTF8_fixed; +---- +[a, bc, def] +[a, bc, def, defg] +[defg] + +query ? +select array_distinct(column1) +from array_distinct_table_2D_fixed; +---- +[[1, 2], [3, 4], [5, 6]] +[[1, 2], [3, 4], [5, 6], [7, 8], [9, 10]] +[, [5, 6]] + query ??? select array_intersect(column1, column2), array_intersect(column3, column4), @@ -5382,11 +5849,13 @@ select array_concat(column1, [7]) from arrays_values_v2; [7] # flatten + +#TODO: https://github.com/apache/arrow-datafusion/issues/7142 # follow DuckDB -query ? -select flatten(NULL); ----- -NULL +#query ? +#select flatten(NULL); +#---- +#NULL # flatten with scalar values #1 query ??? @@ -5403,6 +5872,13 @@ select flatten(arrow_cast(make_array(1, 2, 1, 3, 2), 'LargeList(Int64)')), ---- [1, 2, 1, 3, 2] [1, 2, 3, , 4, , 5] [1.1, 2.2, 3.3, 4.4] +query ??? +select flatten(arrow_cast(make_array(1, 2, 1, 3, 2), 'FixedSizeList(5, Int64)')), + flatten(arrow_cast(make_array([1], [2, 3], [null], make_array(4, null, 5)), 'FixedSizeList(4, List(Int64))')), + flatten(arrow_cast(make_array([[1.1], [2.2]], [[3.3], [4.4]]), 'FixedSizeList(2, List(List(Float64)))')); +---- +[1, 2, 1, 3, 2] [1, 2, 3, , 4, , 5] [1.1, 2.2, 3.3, 4.4] + # flatten with column values query ???? select flatten(column1), @@ -5424,6 +5900,16 @@ from large_flatten_table; [1, 2, 3] [1, 2, 3, 4, 5, 6] [1, 2, 3] [1.0, 2.1, 2.2, 3.2, 3.3, 3.4] [1, 2, 3, 4, 5, 6] [8] [1, 2, 3] [1.0, 2.0, 3.0, 4.0, 5.0, 6.0] +query ???? +select flatten(column1), + flatten(column2), + flatten(column3), + flatten(column4) +from fixed_size_flatten_table; +---- +[1, 2, 3] [1, 2, 3, 4, 5, 6] [1, 2, 3] [1.0, 2.1, 2.2, 3.2, 3.3, 3.4] +[1, 2, 3, 4, 5, 6] [8, 9, 10, 11, 12, 13] [1, 2, 3] [1.0, 2.0, 3.0, 4.0, 5.0, 6.0] + ## empty # empty scalar function #1 query B @@ -5436,6 +5922,11 @@ select empty(arrow_cast(make_array(1), 'LargeList(Int64)')); ---- false +query B +select empty(arrow_cast(make_array(1), 'FixedSizeList(1, Int64)')); +---- +false + # empty scalar function #2 query B select empty(make_array()); @@ -5447,6 +5938,12 @@ select empty(arrow_cast(make_array(), 'LargeList(Null)')); ---- true +#TODO: https://github.com/apache/arrow-datafusion/issues/9158 +#query B +#select empty(arrow_cast(make_array(), 'FixedSizeList(0, Null)')); +#---- +#true + # empty scalar function #3 query B select empty(make_array(NULL)); @@ -5458,11 +5955,17 @@ select empty(arrow_cast(make_array(NULL), 'LargeList(Null)')); ---- false -# empty scalar function #4 query B -select empty(NULL); +select empty(arrow_cast(make_array(NULL), 'FixedSizeList(1, Null)')); ---- -NULL +false + +#TODO: https://github.com/apache/arrow-datafusion/issues/7142 +# empty scalar function #4 +#query B +#select empty(NULL); +#---- +#NULL # empty scalar function #5 query B @@ -5487,6 +5990,17 @@ NULL false false +query B +select empty(column1) from fixed_size_arrays; +---- +false +false +false +false +NULL +false +false + query ? SELECT string_to_array('abcxxxdef', 'xxx') ---- @@ -5686,6 +6200,9 @@ drop table fixed_slices; statement ok drop table arrayspop; +statement ok +drop table large_arrayspop; + statement ok drop table arrays_values; @@ -5806,6 +6323,9 @@ drop table flatten_table; statement ok drop table large_flatten_table; +statement ok +drop table fixed_size_flatten_table; + statement ok drop table arrays_values_without_nulls;