From f2d0264b8a31bcd4995433a44ac990ba310effb1 Mon Sep 17 00:00:00 2001 From: Andrew Duffy Date: Wed, 17 Jul 2024 11:51:24 +0100 Subject: [PATCH 1/5] Enable casting from Utf8View -> string or temporal types --- arrow-cast/src/cast/mod.rs | 62 +++++++++++++++++++++++++++++++++-- arrow-cast/src/cast/string.rs | 25 +++++++++++--- 2 files changed, 81 insertions(+), 6 deletions(-) diff --git a/arrow-cast/src/cast/mod.rs b/arrow-cast/src/cast/mod.rs index c9de714e7d55..564c2b512d38 100644 --- a/arrow-cast/src/cast/mod.rs +++ b/arrow-cast/src/cast/mod.rs @@ -210,7 +210,7 @@ pub fn can_cast_types(from_type: &DataType, to_type: &DataType) -> bool { (LargeBinary, Binary | Utf8 | LargeUtf8 | FixedSizeBinary(_) | BinaryView) => true, (FixedSizeBinary(_), Binary | LargeBinary) => true, ( - Utf8 | LargeUtf8, + Utf8 | LargeUtf8 | Utf8View, Binary | LargeBinary | Utf8 @@ -228,7 +228,6 @@ pub fn can_cast_types(from_type: &DataType, to_type: &DataType) -> bool { | Interval(_), ) => true, (Utf8 | LargeUtf8, Utf8View) => true, - (Utf8View, Utf8 | LargeUtf8) => true, (BinaryView, Binary | LargeBinary) => true, (Utf8 | LargeUtf8, _) => to_type.is_numeric() && to_type != &Float16, (_, Utf8 | LargeUtf8) => from_type.is_primitive(), @@ -1269,6 +1268,65 @@ pub fn cast_with_options( "Casting from {from_type:?} to {to_type:?} not supported", ))), }, + (Utf8View, _) => match to_type { + UInt8 => parse_string_view::(array, cast_options), + UInt16 => parse_string_view::(array, cast_options), + UInt32 => parse_string_view::(array, cast_options), + UInt64 => parse_string_view::(array, cast_options), + Int8 => parse_string_view::(array, cast_options), + Int16 => parse_string_view::(array, cast_options), + Int32 => parse_string_view::(array, cast_options), + Int64 => parse_string_view::(array, cast_options), + Float32 => parse_string_view::(array, cast_options), + Float64 => parse_string_view::(array, cast_options), + Date32 => parse_string_view::(array, cast_options), + Date64 => parse_string_view::(array, cast_options), + // Binary => Ok(Arc::new(BinaryArray::from( + // array.as_string::().clone(), + // ))), + // LargeBinary => { + // let binary = BinaryArray::from(array.as_string::().clone()); + // cast_byte_container::(&binary) + // } + Utf8View => Ok(Arc::new(StringViewArray::from(array.as_string::()))), + LargeUtf8 => cast_byte_container::(array), + Time32(TimeUnit::Second) => parse_string_view::(array, cast_options), + Time32(TimeUnit::Millisecond) => { + parse_string_view::(array, cast_options) + } + Time64(TimeUnit::Microsecond) => { + parse_string_view::(array, cast_options) + } + Time64(TimeUnit::Nanosecond) => { + parse_string_view::(array, cast_options) + } + // Timestamp(TimeUnit::Second, to_tz) => { + // cast_string_to_timestamp::(array, to_tz, cast_options) + // } + // Timestamp(TimeUnit::Millisecond, to_tz) => cast_string_to_timestamp::< + // i32, + // TimestampMillisecondType, + // >(array, to_tz, cast_options), + // Timestamp(TimeUnit::Microsecond, to_tz) => cast_string_to_timestamp::< + // i32, + // TimestampMicrosecondType, + // >(array, to_tz, cast_options), + // Timestamp(TimeUnit::Nanosecond, to_tz) => { + // cast_string_to_timestamp::(array, to_tz, cast_options) + // } + // Interval(IntervalUnit::YearMonth) => { + // cast_string_to_year_month_interval::(array, cast_options) + // } + // Interval(IntervalUnit::DayTime) => { + // cast_string_to_day_time_interval::(array, cast_options) + // } + // Interval(IntervalUnit::MonthDayNano) => { + // cast_string_to_month_day_nano_interval::(array, cast_options) + // } + _ => Err(ArrowError::CastError(format!( + "Casting from {from_type:?} to {to_type:?} not supported", + ))), + }, (LargeUtf8, _) => match to_type { UInt8 => parse_string::(array, cast_options), UInt16 => parse_string::(array, cast_options), diff --git a/arrow-cast/src/cast/string.rs b/arrow-cast/src/cast/string.rs index 4b83a2a5e7da..8f23f61b49cf 100644 --- a/arrow-cast/src/cast/string.rs +++ b/arrow-cast/src/cast/string.rs @@ -15,6 +15,7 @@ // specific language governing permissions and limitations // under the License. +use arrow_buffer::NullBuffer; use crate::cast::*; pub(crate) fn value_to_string( @@ -43,8 +44,25 @@ pub(crate) fn parse_string( cast_options: &CastOptions, ) -> Result { let string_array = array.as_string::(); + parse_string_iter::(string_array.iter(), cast_options, || string_array.nulls().cloned()) +} + +/// Parse UTF-8 View +pub(crate) fn parse_string_view( + array: &dyn Array, + cast_options: &CastOptions, +) -> Result { + let string_view_array = array.as_string_view(); + parse_string_iter::(string_view_array.iter(), cast_options, || string_view_array.nulls().cloned()) +} + +fn parse_string_iter<'a, P: Parser, I: Iterator>, F: FnOnce() -> Option>( + iter: I, + cast_options: &CastOptions, + nulls: F, +) -> Result { let array = if cast_options.safe { - let iter = string_array.iter().map(|x| x.and_then(P::parse)); + let iter = iter.map(|x| x.and_then(P::parse)); // Benefit: // 20% performance improvement @@ -52,8 +70,7 @@ pub(crate) fn parse_string( // The iterator is trustedLen because it comes from an `StringArray`. unsafe { PrimitiveArray::

::from_trusted_len_iter(iter) } } else { - let v = string_array - .iter() + let v = iter .map(|x| match x { Some(v) => P::parse(v).ok_or_else(|| { ArrowError::CastError(format!( @@ -65,7 +82,7 @@ pub(crate) fn parse_string( None => Ok(P::Native::default()), }) .collect::, ArrowError>>()?; - PrimitiveArray::new(v.into(), string_array.nulls().cloned()) + PrimitiveArray::new(v.into(), nulls()) }; Ok(Arc::new(array) as ArrayRef) From 1d5b6136414882984da0748468b580e6af1db212 Mon Sep 17 00:00:00 2001 From: Andrew Duffy Date: Thu, 18 Jul 2024 15:33:01 +0100 Subject: [PATCH 2/5] save --- arrow-cast/src/cast/mod.rs | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) diff --git a/arrow-cast/src/cast/mod.rs b/arrow-cast/src/cast/mod.rs index 564c2b512d38..272c7d08e9fc 100644 --- a/arrow-cast/src/cast/mod.rs +++ b/arrow-cast/src/cast/mod.rs @@ -67,6 +67,7 @@ use arrow_schema::*; use arrow_select::take::take; use num::cast::AsPrimitive; use num::{NumCast, ToPrimitive}; +use arrow_schema::DataType::{Utf8, Utf8View}; /// CastOptions provides a way to override the default cast behaviors #[derive(Debug, Clone, PartialEq, Eq, Hash)] @@ -1281,15 +1282,10 @@ pub fn cast_with_options( Float64 => parse_string_view::(array, cast_options), Date32 => parse_string_view::(array, cast_options), Date64 => parse_string_view::(array, cast_options), - // Binary => Ok(Arc::new(BinaryArray::from( - // array.as_string::().clone(), - // ))), - // LargeBinary => { - // let binary = BinaryArray::from(array.as_string::().clone()); - // cast_byte_container::(&binary) - // } - Utf8View => Ok(Arc::new(StringViewArray::from(array.as_string::()))), - LargeUtf8 => cast_byte_container::(array), + Binary => cast_view_to_byte::>(array), + LargeBinary => cast_view_to_byte::>(array), + Utf8 => cast_view_to_byte::>(array), + LargeUtf8 => cast_view_to_byte::>(array), Time32(TimeUnit::Second) => parse_string_view::(array, cast_options), Time32(TimeUnit::Millisecond) => { parse_string_view::(array, cast_options) @@ -1423,8 +1419,6 @@ pub fn cast_with_options( "Casting from {from_type:?} to {to_type:?} not supported", ))), }, - (Utf8View, Utf8) => cast_view_to_byte::>(array), - (Utf8View, LargeUtf8) => cast_view_to_byte::>(array), (BinaryView, Binary) => cast_view_to_byte::>(array), (BinaryView, LargeBinary) => { cast_view_to_byte::>(array) From b0b057738332a54e5e56497a6797fc7001e784f2 Mon Sep 17 00:00:00 2001 From: Andrew Duffy Date: Thu, 18 Jul 2024 16:41:24 +0100 Subject: [PATCH 3/5] implement casting utf8view -> timestamp/interval types, with tests --- arrow-cast/src/cast/mod.rs | 139 ++++++++++++++++++++++----------- arrow-cast/src/cast/string.rs | 140 ++++++++++++++++++++++++++-------- 2 files changed, 201 insertions(+), 78 deletions(-) diff --git a/arrow-cast/src/cast/mod.rs b/arrow-cast/src/cast/mod.rs index 272c7d08e9fc..f44140a0d2d7 100644 --- a/arrow-cast/src/cast/mod.rs +++ b/arrow-cast/src/cast/mod.rs @@ -1296,29 +1296,27 @@ pub fn cast_with_options( Time64(TimeUnit::Nanosecond) => { parse_string_view::(array, cast_options) } - // Timestamp(TimeUnit::Second, to_tz) => { - // cast_string_to_timestamp::(array, to_tz, cast_options) - // } - // Timestamp(TimeUnit::Millisecond, to_tz) => cast_string_to_timestamp::< - // i32, - // TimestampMillisecondType, - // >(array, to_tz, cast_options), - // Timestamp(TimeUnit::Microsecond, to_tz) => cast_string_to_timestamp::< - // i32, - // TimestampMicrosecondType, - // >(array, to_tz, cast_options), - // Timestamp(TimeUnit::Nanosecond, to_tz) => { - // cast_string_to_timestamp::(array, to_tz, cast_options) - // } - // Interval(IntervalUnit::YearMonth) => { - // cast_string_to_year_month_interval::(array, cast_options) - // } - // Interval(IntervalUnit::DayTime) => { - // cast_string_to_day_time_interval::(array, cast_options) - // } - // Interval(IntervalUnit::MonthDayNano) => { - // cast_string_to_month_day_nano_interval::(array, cast_options) - // } + Timestamp(TimeUnit::Second, to_tz) => { + cast_view_to_timestamp::(array, to_tz, cast_options) + } + Timestamp(TimeUnit::Millisecond, to_tz) => { + cast_view_to_timestamp::(array, to_tz, cast_options) + } + Timestamp(TimeUnit::Microsecond, to_tz) => { + cast_view_to_timestamp::(array, to_tz, cast_options) + } + Timestamp(TimeUnit::Nanosecond, to_tz) => { + cast_view_to_timestamp::(array, to_tz, cast_options) + } + Interval(IntervalUnit::YearMonth) => { + cast_view_to_year_month_interval(array, cast_options) + } + Interval(IntervalUnit::DayTime) => { + cast_view_to_day_time_interval(array, cast_options) + } + Interval(IntervalUnit::MonthDayNano) => { + cast_view_to_month_day_nano_interval(array, cast_options) + } _ => Err(ArrowError::CastError(format!( "Casting from {from_type:?} to {to_type:?} not supported", ))), @@ -4012,6 +4010,11 @@ mod tests { #[test] fn test_cast_string_to_timestamp() { + let a0 = Arc::new(StringViewArray::from(vec![ + Some("2020-09-08T12:00:00.123456789+00:00"), + Some("Not a valid date"), + None, + ])) as ArrayRef; let a1 = Arc::new(StringArray::from(vec![ Some("2020-09-08T12:00:00.123456789+00:00"), Some("Not a valid date"), @@ -4022,7 +4025,7 @@ mod tests { Some("Not a valid date"), None, ])) as ArrayRef; - for array in &[a1, a2] { + for array in &[a0, a1, a2] { for time_unit in &[ TimeUnit::Second, TimeUnit::Millisecond, @@ -4091,6 +4094,11 @@ mod tests { #[test] fn test_cast_string_to_date32() { + let a0 = Arc::new(StringViewArray::from(vec![ + Some("2018-12-25"), + Some("Not a valid date"), + None, + ])) as ArrayRef; let a1 = Arc::new(StringArray::from(vec![ Some("2018-12-25"), Some("Not a valid date"), @@ -4101,7 +4109,7 @@ mod tests { Some("Not a valid date"), None, ])) as ArrayRef; - for array in &[a1, a2] { + for array in &[a0, a1, a2] { let to_type = DataType::Date32; let b = cast(array, &to_type).unwrap(); let c = b.as_primitive::(); @@ -4123,30 +4131,47 @@ mod tests { #[test] fn test_cast_string_format_yyyymmdd_to_date32() { - let a = Arc::new(StringArray::from(vec![ + let a0 = Arc::new(StringViewArray::from(vec![ + Some("2020-12-25"), + Some("20201117"), + ])) as ArrayRef; + let a1 = Arc::new(StringArray::from(vec![ + Some("2020-12-25"), + Some("20201117"), + ])) as ArrayRef; + let a2 = Arc::new(LargeStringArray::from(vec![ Some("2020-12-25"), Some("20201117"), ])) as ArrayRef; - let to_type = DataType::Date32; - let options = CastOptions { - safe: false, - format_options: FormatOptions::default(), - }; - let result = cast_with_options(&a, &to_type, &options).unwrap(); - let c = result.as_primitive::(); - assert_eq!( - chrono::NaiveDate::from_ymd_opt(2020, 12, 25), - c.value_as_date(0) - ); - assert_eq!( - chrono::NaiveDate::from_ymd_opt(2020, 11, 17), - c.value_as_date(1) - ); + for array in &[a0, a1, a2] { + let to_type = DataType::Date32; + let options = CastOptions { + safe: false, + format_options: FormatOptions::default(), + }; + let result = cast_with_options(&array, &to_type, &options).unwrap(); + let c = result.as_primitive::(); + assert_eq!( + chrono::NaiveDate::from_ymd_opt(2020, 12, 25), + c.value_as_date(0) + ); + assert_eq!( + chrono::NaiveDate::from_ymd_opt(2020, 11, 17), + c.value_as_date(1) + ); + } } #[test] fn test_cast_string_to_time32second() { + let a0 = Arc::new(StringViewArray::from(vec![ + Some("08:08:35.091323414"), + Some("08:08:60.091323414"), // leap second + Some("08:08:61.091323414"), // not valid + Some("Not a valid time"), + None, + ])) as ArrayRef; let a1 = Arc::new(StringArray::from(vec![ Some("08:08:35.091323414"), Some("08:08:60.091323414"), // leap second @@ -4161,7 +4186,7 @@ mod tests { Some("Not a valid time"), None, ])) as ArrayRef; - for array in &[a1, a2] { + for array in &[a0, a1, a2] { let to_type = DataType::Time32(TimeUnit::Second); let b = cast(array, &to_type).unwrap(); let c = b.as_primitive::(); @@ -4182,6 +4207,13 @@ mod tests { #[test] fn test_cast_string_to_time32millisecond() { + let a0 = Arc::new(StringViewArray::from(vec![ + Some("08:08:35.091323414"), + Some("08:08:60.091323414"), // leap second + Some("08:08:61.091323414"), // not valid + Some("Not a valid time"), + None, + ])) as ArrayRef; let a1 = Arc::new(StringArray::from(vec![ Some("08:08:35.091323414"), Some("08:08:60.091323414"), // leap second @@ -4196,7 +4228,7 @@ mod tests { Some("Not a valid time"), None, ])) as ArrayRef; - for array in &[a1, a2] { + for array in &[a0, a1, a2] { let to_type = DataType::Time32(TimeUnit::Millisecond); let b = cast(array, &to_type).unwrap(); let c = b.as_primitive::(); @@ -4217,6 +4249,11 @@ mod tests { #[test] fn test_cast_string_to_time64microsecond() { + let a0 = Arc::new(StringViewArray::from(vec![ + Some("08:08:35.091323414"), + Some("Not a valid time"), + None, + ])) as ArrayRef; let a1 = Arc::new(StringArray::from(vec![ Some("08:08:35.091323414"), Some("Not a valid time"), @@ -4227,7 +4264,7 @@ mod tests { Some("Not a valid time"), None, ])) as ArrayRef; - for array in &[a1, a2] { + for array in &[a0, a1, a2] { let to_type = DataType::Time64(TimeUnit::Microsecond); let b = cast(array, &to_type).unwrap(); let c = b.as_primitive::(); @@ -4246,6 +4283,11 @@ mod tests { #[test] fn test_cast_string_to_time64nanosecond() { + let a0 = Arc::new(StringViewArray::from(vec![ + Some("08:08:35.091323414"), + Some("Not a valid time"), + None, + ])) as ArrayRef; let a1 = Arc::new(StringArray::from(vec![ Some("08:08:35.091323414"), Some("Not a valid time"), @@ -4256,7 +4298,7 @@ mod tests { Some("Not a valid time"), None, ])) as ArrayRef; - for array in &[a1, a2] { + for array in &[a0, a1, a2] { let to_type = DataType::Time64(TimeUnit::Nanosecond); let b = cast(array, &to_type).unwrap(); let c = b.as_primitive::(); @@ -4275,6 +4317,11 @@ mod tests { #[test] fn test_cast_string_to_date64() { + let a0 = Arc::new(StringViewArray::from(vec![ + Some("2020-09-08T12:00:00"), + Some("Not a valid date"), + None, + ])) as ArrayRef; let a1 = Arc::new(StringArray::from(vec![ Some("2020-09-08T12:00:00"), Some("Not a valid date"), @@ -4285,7 +4332,7 @@ mod tests { Some("Not a valid date"), None, ])) as ArrayRef; - for array in &[a1, a2] { + for array in &[a0, a1, a2] { let to_type = DataType::Date64; let b = cast(array, &to_type).unwrap(); let c = b.as_primitive::(); diff --git a/arrow-cast/src/cast/string.rs b/arrow-cast/src/cast/string.rs index 8f23f61b49cf..293d2aecb86b 100644 --- a/arrow-cast/src/cast/string.rs +++ b/arrow-cast/src/cast/string.rs @@ -56,7 +56,7 @@ pub(crate) fn parse_string_view( parse_string_iter::(string_view_array.iter(), cast_options, || string_view_array.nulls().cloned()) } -fn parse_string_iter<'a, P: Parser, I: Iterator>, F: FnOnce() -> Option>( +fn parse_string_iter<'a, P: Parser, I: Iterator>, F: FnOnce() -> Option>( iter: I, cast_options: &CastOptions, nulls: F, @@ -98,20 +98,37 @@ pub(crate) fn cast_string_to_timestamp = match to_tz { Some(tz) => { let tz: Tz = tz.as_ref().parse()?; - cast_string_to_timestamp_impl(array, &tz, cast_options)? + cast_string_to_timestamp_impl(array.iter(), &tz, cast_options)? } - None => cast_string_to_timestamp_impl(array, &Utc, cast_options)?, + None => cast_string_to_timestamp_impl(array.iter(), &Utc, cast_options)?, }; Ok(Arc::new(out.with_timezone_opt(to_tz.clone()))) } -fn cast_string_to_timestamp_impl( - array: &GenericStringArray, +/// Casts string view arrays to an ArrowTimestampType (TimeStampNanosecondArray, etc.) +pub(crate) fn cast_view_to_timestamp( + array: &dyn Array, + to_tz: &Option>, + cast_options: &CastOptions, +) -> Result { + let array = array.as_string_view(); + let out: PrimitiveArray = match to_tz { + Some(tz) => { + let tz: Tz = tz.as_ref().parse()?; + cast_string_to_timestamp_impl(array.iter(), &tz, cast_options)? + } + None => cast_string_to_timestamp_impl(array.iter(), &Utc, cast_options)?, + }; + Ok(Arc::new(out.with_timezone_opt(to_tz.clone()))) +} + +fn cast_string_to_timestamp_impl<'a, I: Iterator>, T: ArrowTimestampType, Tz: TimeZone>( + iter: I, tz: &Tz, cast_options: &CastOptions, ) -> Result, ArrowError> { if cast_options.safe { - let iter = array.iter().map(|v| { + let iter = iter.map(|v| { v.and_then(|v| { let naive = string_to_datetime(tz, v).ok()?.naive_utc(); T::make_value(naive) @@ -124,8 +141,7 @@ fn cast_string_to_timestamp_impl>, _>>()?; @@ -165,29 +181,7 @@ where .as_any() .downcast_ref::>() .unwrap(); - let interval_array = if cast_options.safe { - let iter = string_array - .iter() - .map(|v| v.and_then(|v| parse_function(v).ok())); - - // Benefit: - // 20% performance improvement - // Soundness: - // The iterator is trustedLen because it comes from an `StringArray`. - unsafe { PrimitiveArray::::from_trusted_len_iter(iter) } - } else { - let vec = string_array - .iter() - .map(|v| v.map(parse_function).transpose()) - .collect::, ArrowError>>()?; - - // Benefit: - // 20% performance improvement - // Soundness: - // The iterator is trustedLen because it comes from an `StringArray`. - unsafe { PrimitiveArray::::from_trusted_len_iter(vec) } - }; - Ok(Arc::new(interval_array) as ArrayRef) + cast_string_to_interval_impl::<_, ArrowType, F>(string_array.iter(), cast_options, parse_function) } pub(crate) fn cast_string_to_year_month_interval( @@ -223,6 +217,88 @@ pub(crate) fn cast_string_to_month_day_nano_interval( ) } +pub(crate) fn cast_view_to_interval( + array: &dyn Array, + cast_options: &CastOptions, + parse_function: F, +) -> Result +where + ArrowType: ArrowPrimitiveType, + F: Fn(&str) -> Result + Copy, +{ + let string_view_array = array + .as_any() + .downcast_ref::() + .unwrap(); + cast_string_to_interval_impl::<_, ArrowType, F>(string_view_array.iter(), cast_options, parse_function) +} + +pub(crate) fn cast_view_to_year_month_interval( + array: &dyn Array, + cast_options: &CastOptions, +) -> Result { + cast_view_to_interval::<_, IntervalYearMonthType>( + array, + cast_options, + parse_interval_year_month, + ) +} + +pub(crate) fn cast_view_to_day_time_interval( + array: &dyn Array, + cast_options: &CastOptions, +) -> Result { + cast_view_to_interval::<_, IntervalDayTimeType>( + array, + cast_options, + parse_interval_day_time, + ) +} + +pub(crate) fn cast_view_to_month_day_nano_interval( + array: &dyn Array, + cast_options: &CastOptions, +) -> Result { + cast_view_to_interval::<_, IntervalMonthDayNanoType>( + array, + cast_options, + parse_interval_month_day_nano, + ) +} + +fn cast_string_to_interval_impl<'a, I, ArrowType, F>( + iter: I, + cast_options: &CastOptions, + parse_function: F, +) -> Result +where + I: Iterator>, + ArrowType: ArrowPrimitiveType, + F: Fn(&str) -> Result + Copy, +{ + let interval_array = if cast_options.safe { + let iter = iter + .map(|v| v.and_then(|v| parse_function(v).ok())); + + // Benefit: + // 20% performance improvement + // Soundness: + // The iterator is trustedLen because it comes from an `StringArray`. + unsafe { PrimitiveArray::::from_trusted_len_iter(iter) } + } else { + let vec = iter + .map(|v| v.map(parse_function).transpose()) + .collect::, ArrowError>>()?; + + // Benefit: + // 20% performance improvement + // Soundness: + // The iterator is trustedLen because it comes from an `StringArray`. + unsafe { PrimitiveArray::::from_trusted_len_iter(vec) } + }; + Ok(Arc::new(interval_array) as ArrayRef) +} + /// A specified helper to cast from `GenericBinaryArray` to `GenericStringArray` when they have same /// offset size so re-encoding offset is unnecessary. pub(crate) fn cast_binary_to_string( From 180ea493e8ee165f7b67945806be1397c5472e4f Mon Sep 17 00:00:00 2001 From: Andrew Duffy Date: Thu, 18 Jul 2024 16:51:54 +0100 Subject: [PATCH 4/5] fix clippy --- arrow-cast/src/cast/mod.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/arrow-cast/src/cast/mod.rs b/arrow-cast/src/cast/mod.rs index f44140a0d2d7..a5fb380b6ac6 100644 --- a/arrow-cast/src/cast/mod.rs +++ b/arrow-cast/src/cast/mod.rs @@ -67,7 +67,6 @@ use arrow_schema::*; use arrow_select::take::take; use num::cast::AsPrimitive; use num::{NumCast, ToPrimitive}; -use arrow_schema::DataType::{Utf8, Utf8View}; /// CastOptions provides a way to override the default cast behaviors #[derive(Debug, Clone, PartialEq, Eq, Hash)] From 73478cda56a31548667d934dac276c01bbc8a673 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Fri, 19 Jul 2024 15:00:26 -0400 Subject: [PATCH 5/5] fmt --- arrow-cast/src/cast/mod.rs | 4 +-- arrow-cast/src/cast/string.rs | 50 ++++++++++++++++++++++------------- 2 files changed, 33 insertions(+), 21 deletions(-) diff --git a/arrow-cast/src/cast/mod.rs b/arrow-cast/src/cast/mod.rs index a5fb380b6ac6..1770157bcfd9 100644 --- a/arrow-cast/src/cast/mod.rs +++ b/arrow-cast/src/cast/mod.rs @@ -1310,9 +1310,7 @@ pub fn cast_with_options( Interval(IntervalUnit::YearMonth) => { cast_view_to_year_month_interval(array, cast_options) } - Interval(IntervalUnit::DayTime) => { - cast_view_to_day_time_interval(array, cast_options) - } + Interval(IntervalUnit::DayTime) => cast_view_to_day_time_interval(array, cast_options), Interval(IntervalUnit::MonthDayNano) => { cast_view_to_month_day_nano_interval(array, cast_options) } diff --git a/arrow-cast/src/cast/string.rs b/arrow-cast/src/cast/string.rs index 293d2aecb86b..7d0e7e21c859 100644 --- a/arrow-cast/src/cast/string.rs +++ b/arrow-cast/src/cast/string.rs @@ -15,8 +15,8 @@ // specific language governing permissions and limitations // under the License. -use arrow_buffer::NullBuffer; use crate::cast::*; +use arrow_buffer::NullBuffer; pub(crate) fn value_to_string( array: &dyn Array, @@ -44,7 +44,9 @@ pub(crate) fn parse_string( cast_options: &CastOptions, ) -> Result { let string_array = array.as_string::(); - parse_string_iter::(string_array.iter(), cast_options, || string_array.nulls().cloned()) + parse_string_iter::(string_array.iter(), cast_options, || { + string_array.nulls().cloned() + }) } /// Parse UTF-8 View @@ -53,10 +55,17 @@ pub(crate) fn parse_string_view( cast_options: &CastOptions, ) -> Result { let string_view_array = array.as_string_view(); - parse_string_iter::(string_view_array.iter(), cast_options, || string_view_array.nulls().cloned()) + parse_string_iter::(string_view_array.iter(), cast_options, || { + string_view_array.nulls().cloned() + }) } -fn parse_string_iter<'a, P: Parser, I: Iterator>, F: FnOnce() -> Option>( +fn parse_string_iter< + 'a, + P: Parser, + I: Iterator>, + F: FnOnce() -> Option, +>( iter: I, cast_options: &CastOptions, nulls: F, @@ -122,7 +131,12 @@ pub(crate) fn cast_view_to_timestamp( Ok(Arc::new(out.with_timezone_opt(to_tz.clone()))) } -fn cast_string_to_timestamp_impl<'a, I: Iterator>, T: ArrowTimestampType, Tz: TimeZone>( +fn cast_string_to_timestamp_impl< + 'a, + I: Iterator>, + T: ArrowTimestampType, + Tz: TimeZone, +>( iter: I, tz: &Tz, cast_options: &CastOptions, @@ -181,7 +195,11 @@ where .as_any() .downcast_ref::>() .unwrap(); - cast_string_to_interval_impl::<_, ArrowType, F>(string_array.iter(), cast_options, parse_function) + cast_string_to_interval_impl::<_, ArrowType, F>( + string_array.iter(), + cast_options, + parse_function, + ) } pub(crate) fn cast_string_to_year_month_interval( @@ -226,11 +244,12 @@ where ArrowType: ArrowPrimitiveType, F: Fn(&str) -> Result + Copy, { - let string_view_array = array - .as_any() - .downcast_ref::() - .unwrap(); - cast_string_to_interval_impl::<_, ArrowType, F>(string_view_array.iter(), cast_options, parse_function) + let string_view_array = array.as_any().downcast_ref::().unwrap(); + cast_string_to_interval_impl::<_, ArrowType, F>( + string_view_array.iter(), + cast_options, + parse_function, + ) } pub(crate) fn cast_view_to_year_month_interval( @@ -248,11 +267,7 @@ pub(crate) fn cast_view_to_day_time_interval( array: &dyn Array, cast_options: &CastOptions, ) -> Result { - cast_view_to_interval::<_, IntervalDayTimeType>( - array, - cast_options, - parse_interval_day_time, - ) + cast_view_to_interval::<_, IntervalDayTimeType>(array, cast_options, parse_interval_day_time) } pub(crate) fn cast_view_to_month_day_nano_interval( @@ -277,8 +292,7 @@ where F: Fn(&str) -> Result + Copy, { let interval_array = if cast_options.safe { - let iter = iter - .map(|v| v.and_then(|v| parse_function(v).ok())); + let iter = iter.map(|v| v.and_then(|v| parse_function(v).ok())); // Benefit: // 20% performance improvement