diff --git a/Cargo.toml b/Cargo.toml index b1f07aa531df..b9bc3c3fc07c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -157,15 +157,15 @@ unused_imports = "deny" ## Temporary arrow-rs patch until 52.2.0 is released [patch.crates-io] -arrow = { git = "https://github.com/apache/arrow-rs.git", rev = "66390ff8ec15bb6ed585f353f67a19574da4375a" } -arrow-array = { git = "https://github.com/apache/arrow-rs.git", rev = "66390ff8ec15bb6ed585f353f67a19574da4375a" } -arrow-buffer = { git = "https://github.com/apache/arrow-rs.git", rev = "66390ff8ec15bb6ed585f353f67a19574da4375a" } -arrow-cast = { git = "https://github.com/apache/arrow-rs.git", rev = "66390ff8ec15bb6ed585f353f67a19574da4375a" } -arrow-data = { git = "https://github.com/apache/arrow-rs.git", rev = "66390ff8ec15bb6ed585f353f67a19574da4375a" } -arrow-ipc = { git = "https://github.com/apache/arrow-rs.git", rev = "66390ff8ec15bb6ed585f353f67a19574da4375a" } -arrow-schema = { git = "https://github.com/apache/arrow-rs.git", rev = "66390ff8ec15bb6ed585f353f67a19574da4375a" } -arrow-select = { git = "https://github.com/apache/arrow-rs.git", rev = "66390ff8ec15bb6ed585f353f67a19574da4375a" } -arrow-string = { git = "https://github.com/apache/arrow-rs.git", rev = "66390ff8ec15bb6ed585f353f67a19574da4375a" } -arrow-ord = { git = "https://github.com/apache/arrow-rs.git", rev = "66390ff8ec15bb6ed585f353f67a19574da4375a" } -arrow-flight = { git = "https://github.com/apache/arrow-rs.git", rev = "66390ff8ec15bb6ed585f353f67a19574da4375a" } -parquet = { git = "https://github.com/apache/arrow-rs.git", rev = "66390ff8ec15bb6ed585f353f67a19574da4375a" } +arrow = { git = "https://github.com/apache/arrow-rs.git", rev = "8a5be1330e30e6dd7760dba910737550d760e612" } +arrow-array = { git = "https://github.com/apache/arrow-rs.git", rev = "8a5be1330e30e6dd7760dba910737550d760e612" } +arrow-buffer = { git = "https://github.com/apache/arrow-rs.git", rev = "8a5be1330e30e6dd7760dba910737550d760e612" } +arrow-cast = { git = "https://github.com/apache/arrow-rs.git", rev = "8a5be1330e30e6dd7760dba910737550d760e612" } +arrow-data = { git = "https://github.com/apache/arrow-rs.git", rev = "8a5be1330e30e6dd7760dba910737550d760e612" } +arrow-ipc = { git = "https://github.com/apache/arrow-rs.git", rev = "8a5be1330e30e6dd7760dba910737550d760e612" } +arrow-schema = { git = "https://github.com/apache/arrow-rs.git", rev = "8a5be1330e30e6dd7760dba910737550d760e612" } +arrow-select = { git = "https://github.com/apache/arrow-rs.git", rev = "8a5be1330e30e6dd7760dba910737550d760e612" } +arrow-string = { git = "https://github.com/apache/arrow-rs.git", rev = "8a5be1330e30e6dd7760dba910737550d760e612" } +arrow-ord = { git = "https://github.com/apache/arrow-rs.git", rev = "8a5be1330e30e6dd7760dba910737550d760e612" } +arrow-flight = { git = "https://github.com/apache/arrow-rs.git", rev = "8a5be1330e30e6dd7760dba910737550d760e612" } +parquet = { git = "https://github.com/apache/arrow-rs.git", rev = "8a5be1330e30e6dd7760dba910737550d760e612" } diff --git a/datafusion-cli/Cargo.toml b/datafusion-cli/Cargo.toml index 10bea5f4ce54..958941743e33 100644 --- a/datafusion-cli/Cargo.toml +++ b/datafusion-cli/Cargo.toml @@ -64,15 +64,15 @@ predicates = "3.0" rstest = "0.17" [patch.crates-io] -arrow = { git = "https://github.com/apache/arrow-rs.git", rev = "66390ff8ec15bb6ed585f353f67a19574da4375a" } -arrow-array = { git = "https://github.com/apache/arrow-rs.git", rev = "66390ff8ec15bb6ed585f353f67a19574da4375a" } -arrow-buffer = { git = "https://github.com/apache/arrow-rs.git", rev = "66390ff8ec15bb6ed585f353f67a19574da4375a" } -arrow-cast = { git = "https://github.com/apache/arrow-rs.git", rev = "66390ff8ec15bb6ed585f353f67a19574da4375a" } -arrow-data = { git = "https://github.com/apache/arrow-rs.git", rev = "66390ff8ec15bb6ed585f353f67a19574da4375a" } -arrow-ipc = { git = "https://github.com/apache/arrow-rs.git", rev = "66390ff8ec15bb6ed585f353f67a19574da4375a" } -arrow-schema = { git = "https://github.com/apache/arrow-rs.git", rev = "66390ff8ec15bb6ed585f353f67a19574da4375a" } -arrow-select = { git = "https://github.com/apache/arrow-rs.git", rev = "66390ff8ec15bb6ed585f353f67a19574da4375a" } -arrow-string = { git = "https://github.com/apache/arrow-rs.git", rev = "66390ff8ec15bb6ed585f353f67a19574da4375a" } -arrow-ord = { git = "https://github.com/apache/arrow-rs.git", rev = "66390ff8ec15bb6ed585f353f67a19574da4375a" } -arrow-flight = { git = "https://github.com/apache/arrow-rs.git", rev = "66390ff8ec15bb6ed585f353f67a19574da4375a" } -parquet = { git = "https://github.com/apache/arrow-rs.git", rev = "66390ff8ec15bb6ed585f353f67a19574da4375a" } +arrow = { git = "https://github.com/apache/arrow-rs.git", rev = "8a5be1330e30e6dd7760dba910737550d760e612" } +arrow-array = { git = "https://github.com/apache/arrow-rs.git", rev = "8a5be1330e30e6dd7760dba910737550d760e612" } +arrow-buffer = { git = "https://github.com/apache/arrow-rs.git", rev = "8a5be1330e30e6dd7760dba910737550d760e612" } +arrow-cast = { git = "https://github.com/apache/arrow-rs.git", rev = "8a5be1330e30e6dd7760dba910737550d760e612" } +arrow-data = { git = "https://github.com/apache/arrow-rs.git", rev = "8a5be1330e30e6dd7760dba910737550d760e612" } +arrow-ipc = { git = "https://github.com/apache/arrow-rs.git", rev = "8a5be1330e30e6dd7760dba910737550d760e612" } +arrow-schema = { git = "https://github.com/apache/arrow-rs.git", rev = "8a5be1330e30e6dd7760dba910737550d760e612" } +arrow-select = { git = "https://github.com/apache/arrow-rs.git", rev = "8a5be1330e30e6dd7760dba910737550d760e612" } +arrow-string = { git = "https://github.com/apache/arrow-rs.git", rev = "8a5be1330e30e6dd7760dba910737550d760e612" } +arrow-ord = { git = "https://github.com/apache/arrow-rs.git", rev = "8a5be1330e30e6dd7760dba910737550d760e612" } +arrow-flight = { git = "https://github.com/apache/arrow-rs.git", rev = "8a5be1330e30e6dd7760dba910737550d760e612" } +parquet = { git = "https://github.com/apache/arrow-rs.git", rev = "8a5be1330e30e6dd7760dba910737550d760e612" } diff --git a/datafusion/expr/src/type_coercion/binary.rs b/datafusion/expr/src/type_coercion/binary.rs index 70139aaa4a0c..4e5ed42f981e 100644 --- a/datafusion/expr/src/type_coercion/binary.rs +++ b/datafusion/expr/src/type_coercion/binary.rs @@ -527,7 +527,7 @@ fn string_numeric_coercion(lhs_type: &DataType, rhs_type: &DataType) -> Option Option { match (l, r) { - // Coerce Utf8/LargeUtf8 to Date32/Date64/Time32/Time64/Timestamp - (Utf8, temporal) | (LargeUtf8, temporal) => match temporal { - Date32 | Date64 => Some(temporal.clone()), - Time32(_) | Time64(_) => { - if is_time_with_valid_unit(temporal.to_owned()) { - Some(temporal.to_owned()) - } else { - None + // Coerce Utf8View/Utf8/LargeUtf8 to Date32/Date64/Time32/Time64/Timestamp + (Utf8, temporal) | (LargeUtf8, temporal) | (Utf8View, temporal) => { + match temporal { + Date32 | Date64 => Some(temporal.clone()), + Time32(_) | Time64(_) => { + if is_time_with_valid_unit(temporal.to_owned()) { + Some(temporal.to_owned()) + } else { + None + } } + Timestamp(_, tz) => Some(Timestamp(TimeUnit::Nanosecond, tz.clone())), + _ => None, } - Timestamp(_, tz) => Some(Timestamp(TimeUnit::Nanosecond, tz.clone())), - _ => None, - }, + } _ => None, } } diff --git a/datafusion/functions/src/datetime/date_part.rs b/datafusion/functions/src/datetime/date_part.rs index e1efb4811ec0..e24b11aeb71f 100644 --- a/datafusion/functions/src/datetime/date_part.rs +++ b/datafusion/functions/src/datetime/date_part.rs @@ -21,7 +21,7 @@ use std::sync::Arc; use arrow::array::{Array, ArrayRef, Float64Array}; use arrow::compute::{binary, cast, date_part, DatePart}; use arrow::datatypes::DataType::{ - Date32, Date64, Float64, Time32, Time64, Timestamp, Utf8, + Date32, Date64, Float64, Time32, Time64, Timestamp, Utf8, Utf8View, }; use arrow::datatypes::TimeUnit::{Microsecond, Millisecond, Nanosecond, Second}; use arrow::datatypes::{DataType, TimeUnit}; @@ -56,31 +56,57 @@ impl DatePartFunc { signature: Signature::one_of( vec![ Exact(vec![Utf8, Timestamp(Nanosecond, None)]), + Exact(vec![Utf8View, Timestamp(Nanosecond, None)]), Exact(vec![ Utf8, Timestamp(Nanosecond, Some(TIMEZONE_WILDCARD.into())), ]), + Exact(vec![ + Utf8View, + Timestamp(Nanosecond, Some(TIMEZONE_WILDCARD.into())), + ]), Exact(vec![Utf8, Timestamp(Millisecond, None)]), + Exact(vec![Utf8View, Timestamp(Millisecond, None)]), Exact(vec![ Utf8, Timestamp(Millisecond, Some(TIMEZONE_WILDCARD.into())), ]), + Exact(vec![ + Utf8View, + Timestamp(Millisecond, Some(TIMEZONE_WILDCARD.into())), + ]), Exact(vec![Utf8, Timestamp(Microsecond, None)]), + Exact(vec![Utf8View, Timestamp(Microsecond, None)]), Exact(vec![ Utf8, Timestamp(Microsecond, Some(TIMEZONE_WILDCARD.into())), ]), + Exact(vec![ + Utf8View, + Timestamp(Microsecond, Some(TIMEZONE_WILDCARD.into())), + ]), Exact(vec![Utf8, Timestamp(Second, None)]), + Exact(vec![Utf8View, Timestamp(Second, None)]), Exact(vec![ Utf8, Timestamp(Second, Some(TIMEZONE_WILDCARD.into())), ]), + Exact(vec![ + Utf8View, + Timestamp(Second, Some(TIMEZONE_WILDCARD.into())), + ]), Exact(vec![Utf8, Date64]), + Exact(vec![Utf8View, Date64]), Exact(vec![Utf8, Date32]), + Exact(vec![Utf8View, Date32]), Exact(vec![Utf8, Time32(Second)]), + Exact(vec![Utf8View, Time32(Second)]), Exact(vec![Utf8, Time32(Millisecond)]), + Exact(vec![Utf8View, Time32(Millisecond)]), Exact(vec![Utf8, Time64(Microsecond)]), + Exact(vec![Utf8View, Time64(Microsecond)]), Exact(vec![Utf8, Time64(Nanosecond)]), + Exact(vec![Utf8View, Time64(Nanosecond)]), ], Volatility::Immutable, ), @@ -114,6 +140,8 @@ impl ScalarUDFImpl for DatePartFunc { let part = if let ColumnarValue::Scalar(ScalarValue::Utf8(Some(v))) = part { v + } else if let ColumnarValue::Scalar(ScalarValue::Utf8View(Some(v))) = part { + v } else { return exec_err!( "First argument of `DATE_PART` must be non-null scalar Utf8" diff --git a/datafusion/functions/src/datetime/date_trunc.rs b/datafusion/functions/src/datetime/date_trunc.rs index 6b52507a9c6f..8be774e70181 100644 --- a/datafusion/functions/src/datetime/date_trunc.rs +++ b/datafusion/functions/src/datetime/date_trunc.rs @@ -29,7 +29,7 @@ use arrow::array::types::{ TimestampNanosecondType, TimestampSecondType, }; use arrow::array::{Array, PrimitiveArray}; -use arrow::datatypes::DataType::{self, Null, Timestamp, Utf8}; +use arrow::datatypes::DataType::{self, Null, Timestamp, Utf8, Utf8View}; use arrow::datatypes::TimeUnit::{self, Microsecond, Millisecond, Nanosecond, Second}; use datafusion_common::cast::as_primitive_array; use datafusion_common::{exec_err, plan_err, DataFusionError, Result, ScalarValue}; @@ -61,25 +61,45 @@ impl DateTruncFunc { signature: Signature::one_of( vec![ Exact(vec![Utf8, Timestamp(Nanosecond, None)]), + Exact(vec![Utf8View, Timestamp(Nanosecond, None)]), Exact(vec![ Utf8, Timestamp(Nanosecond, Some(TIMEZONE_WILDCARD.into())), ]), + Exact(vec![ + Utf8View, + Timestamp(Nanosecond, Some(TIMEZONE_WILDCARD.into())), + ]), Exact(vec![Utf8, Timestamp(Microsecond, None)]), + Exact(vec![Utf8View, Timestamp(Microsecond, None)]), Exact(vec![ Utf8, Timestamp(Microsecond, Some(TIMEZONE_WILDCARD.into())), ]), + Exact(vec![ + Utf8View, + Timestamp(Microsecond, Some(TIMEZONE_WILDCARD.into())), + ]), Exact(vec![Utf8, Timestamp(Millisecond, None)]), + Exact(vec![Utf8View, Timestamp(Millisecond, None)]), Exact(vec![ Utf8, Timestamp(Millisecond, Some(TIMEZONE_WILDCARD.into())), ]), + Exact(vec![ + Utf8View, + Timestamp(Millisecond, Some(TIMEZONE_WILDCARD.into())), + ]), Exact(vec![Utf8, Timestamp(Second, None)]), + Exact(vec![Utf8View, Timestamp(Second, None)]), Exact(vec![ Utf8, Timestamp(Second, Some(TIMEZONE_WILDCARD.into())), ]), + Exact(vec![ + Utf8View, + Timestamp(Second, Some(TIMEZONE_WILDCARD.into())), + ]), ], Volatility::Immutable, ), @@ -121,6 +141,9 @@ impl ScalarUDFImpl for DateTruncFunc { granularity { v.to_lowercase() + } else if let ColumnarValue::Scalar(ScalarValue::Utf8View(Some(v))) = + granularity { + v.to_lowercase() } else { return exec_err!("Granularity of `date_trunc` must be non-null scalar Utf8"); }; diff --git a/datafusion/functions/src/datetime/make_date.rs b/datafusion/functions/src/datetime/make_date.rs index 6aa72572bc4d..ded7b454f9eb 100644 --- a/datafusion/functions/src/datetime/make_date.rs +++ b/datafusion/functions/src/datetime/make_date.rs @@ -23,7 +23,7 @@ use arrow::array::cast::AsArray; use arrow::array::types::{Date32Type, Int32Type}; use arrow::array::PrimitiveArray; use arrow::datatypes::DataType; -use arrow::datatypes::DataType::{Date32, Int32, Int64, UInt32, UInt64, Utf8}; +use arrow::datatypes::DataType::{Date32, Int32, Int64, UInt32, UInt64, Utf8, Utf8View}; use chrono::prelude::*; use datafusion_common::{exec_err, Result, ScalarValue}; @@ -45,7 +45,7 @@ impl MakeDateFunc { Self { signature: Signature::uniform( 3, - vec![Int32, Int64, UInt32, UInt64, Utf8], + vec![Int32, Int64, UInt32, UInt64, Utf8, Utf8View], Volatility::Immutable, ), } diff --git a/datafusion/functions/src/utils.rs b/datafusion/functions/src/utils.rs index 393dcc456a88..6fcb9c6f0840 100644 --- a/datafusion/functions/src/utils.rs +++ b/datafusion/functions/src/utils.rs @@ -15,12 +15,14 @@ // specific language governing permissions and limitations // under the License. +use std::sync::Arc; + use arrow::array::ArrayRef; use arrow::datatypes::DataType; + use datafusion_common::{Result, ScalarValue}; use datafusion_expr::function::Hint; use datafusion_expr::{ColumnarValue, ScalarFunctionImplementation}; -use std::sync::Arc; /// Creates a function to identify the optimal return type of a string function given /// the type of its first argument. @@ -29,6 +31,8 @@ use std::sync::Arc; /// `$largeUtf8Type`, /// /// If the input type is `Utf8` or `Binary` the return type is `$utf8Type`, +/// +/// If the input type is `Utf8View` the return type is `Utf8View`, macro_rules! get_optimal_return_type { ($FUNC:ident, $largeUtf8Type:expr, $utf8Type:expr) => { pub(crate) fn $FUNC(arg_type: &DataType, name: &str) -> Result { @@ -37,6 +41,8 @@ macro_rules! get_optimal_return_type { DataType::LargeUtf8 | DataType::LargeBinary => $largeUtf8Type, // Binary inputs are automatically coerced to Utf8 DataType::Utf8 | DataType::Binary => $utf8Type, + // Utf8View inputs will yield Utf8View outputs + DataType::Utf8View => DataType::Utf8View, DataType::Null => DataType::Null, DataType::Dictionary(_, value_type) => match **value_type { DataType::LargeUtf8 | DataType::LargeBinary => $largeUtf8Type, diff --git a/datafusion/sqllogictest/src/engines/datafusion_engine/normalize.rs b/datafusion/sqllogictest/src/engines/datafusion_engine/normalize.rs index 520b6b53b32d..9ec9fd5f11fd 100644 --- a/datafusion/sqllogictest/src/engines/datafusion_engine/normalize.rs +++ b/datafusion/sqllogictest/src/engines/datafusion_engine/normalize.rs @@ -233,6 +233,9 @@ pub fn cell_to_string(col: &ArrayRef, row: usize) -> Result { DataType::Utf8 => { Ok(varchar_to_str(get_row_value!(array::StringArray, col, row))) } + DataType::Utf8View => { + Ok(varchar_to_str(get_row_value!(array::StringViewArray, col, row))) + } _ => { let f = ArrayFormatter::try_new(col.as_ref(), &DEFAULT_FORMAT_OPTIONS); Ok(f.unwrap().value(row).to_string()) diff --git a/datafusion/sqllogictest/test_files/string_view.slt b/datafusion/sqllogictest/test_files/string_view.slt index 3ba4e271c2f6..3f9a4793f655 100644 --- a/datafusion/sqllogictest/test_files/string_view.slt +++ b/datafusion/sqllogictest/test_files/string_view.slt @@ -324,3 +324,24 @@ logical_plan statement ok drop table test; + +# coercion from stringview to integer, as input to make_date +query D +select make_date(arrow_cast('2024', 'Utf8View'), arrow_cast('01', 'Utf8View'), arrow_cast('23', 'Utf8View')) +---- +2024-01-23 + +# coercions between stringview and date types +statement ok +create table dates (dt date) as values + (date '2024-01-23'), + (date '2023-11-30'); + +query D +select t.dt from dates t where arrow_cast('2024-01-01', 'Utf8View') < t.dt; +---- +2024-01-23 + + +statement ok +drop table dates;