From 3e02689e3464bc8cf929a0d116888fb6f59999fa Mon Sep 17 00:00:00 2001 From: Adrian Garcia Badaracco <1755071+adriangb@users.noreply.github.com> Date: Thu, 8 Aug 2024 14:59:48 -0500 Subject: [PATCH] Add time dictionary coercions (#6208) * Add time dictionary coercions * format * Pass through primitive values --- arrow-cast/src/cast/dictionary.rs | 59 +++++++++++++++++++++++++++++++ arrow-cast/src/cast/mod.rs | 30 ++++++++++++++++ 2 files changed, 89 insertions(+) diff --git a/arrow-cast/src/cast/dictionary.rs b/arrow-cast/src/cast/dictionary.rs index ee2021d15b60..daaddc4915ef 100644 --- a/arrow-cast/src/cast/dictionary.rs +++ b/arrow-cast/src/cast/dictionary.rs @@ -162,6 +162,26 @@ where take(cast_dict_values.as_ref(), dict_array.keys(), None) } +/// Pack a data type into a dictionary array passing the values through a primitive array +pub(crate) fn pack_array_to_dictionary_via_primitive( + array: &dyn Array, + primitive_type: DataType, + dict_value_type: &DataType, + cast_options: &CastOptions, +) -> Result { + let primitive = cast_with_options(array, &primitive_type, cast_options)?; + let dict = cast_with_options( + primitive.as_ref(), + &DataType::Dictionary(Box::new(K::DATA_TYPE), Box::new(primitive_type)), + cast_options, + )?; + cast_with_options( + dict.as_ref(), + &DataType::Dictionary(Box::new(K::DATA_TYPE), Box::new(dict_value_type.clone())), + cast_options, + ) +} + /// Attempts to encode an array into an `ArrayDictionary` with index /// type K and value (dictionary) type value_type /// @@ -188,6 +208,45 @@ pub(crate) fn cast_to_dictionary( Decimal256(_, _) => { pack_numeric_to_dictionary::(array, dict_value_type, cast_options) } + Float16 => { + pack_numeric_to_dictionary::(array, dict_value_type, cast_options) + } + Float32 => { + pack_numeric_to_dictionary::(array, dict_value_type, cast_options) + } + Float64 => { + pack_numeric_to_dictionary::(array, dict_value_type, cast_options) + } + Date32 => pack_array_to_dictionary_via_primitive::( + array, + DataType::Int32, + dict_value_type, + cast_options, + ), + Date64 => pack_array_to_dictionary_via_primitive::( + array, + DataType::Int64, + dict_value_type, + cast_options, + ), + Time32(_) => pack_array_to_dictionary_via_primitive::( + array, + DataType::Int32, + dict_value_type, + cast_options, + ), + Time64(_) => pack_array_to_dictionary_via_primitive::( + array, + DataType::Int64, + dict_value_type, + cast_options, + ), + Timestamp(_, _) => pack_array_to_dictionary_via_primitive::( + array, + DataType::Int64, + dict_value_type, + cast_options, + ), Utf8 => { // If the input is a view type, we can avoid casting (thus copying) the data if array.data_type() == &DataType::Utf8View { diff --git a/arrow-cast/src/cast/mod.rs b/arrow-cast/src/cast/mod.rs index 9f552ec72502..93f8a06ea02a 100644 --- a/arrow-cast/src/cast/mod.rs +++ b/arrow-cast/src/cast/mod.rs @@ -6768,6 +6768,36 @@ mod tests { assert_eq!(array_to_strings(&cast_array), expected); } + #[test] + fn test_cast_time_array_to_dict() { + use DataType::*; + + let array = Arc::new(Date32Array::from(vec![Some(1000), None, Some(2000)])) as ArrayRef; + + let expected = vec!["1972-09-27", "null", "1975-06-24"]; + + let cast_type = Dictionary(Box::new(UInt8), Box::new(Date32)); + let cast_array = cast(&array, &cast_type).expect("cast failed"); + assert_eq!(cast_array.data_type(), &cast_type); + assert_eq!(array_to_strings(&cast_array), expected); + } + + #[test] + fn test_cast_timestamp_array_to_dict() { + use DataType::*; + + let array = Arc::new( + TimestampSecondArray::from(vec![Some(1000), None, Some(2000)]).with_timezone_utc(), + ) as ArrayRef; + + let expected = vec!["1970-01-01T00:16:40", "null", "1970-01-01T00:33:20"]; + + let cast_type = Dictionary(Box::new(UInt8), Box::new(Timestamp(TimeUnit::Second, None))); + let cast_array = cast(&array, &cast_type).expect("cast failed"); + assert_eq!(cast_array.data_type(), &cast_type); + assert_eq!(array_to_strings(&cast_array), expected); + } + #[test] fn test_cast_string_array_to_dict() { use DataType::*;