diff --git a/crates/polars-arrow/src/array/primitive/mod.rs b/crates/polars-arrow/src/array/primitive/mod.rs index 5f2d351af535..831a10c372cc 100644 --- a/crates/polars-arrow/src/array/primitive/mod.rs +++ b/crates/polars-arrow/src/array/primitive/mod.rs @@ -100,6 +100,10 @@ impl PrimitiveArray { values: Buffer, validity: Option, ) -> Self { + if cfg!(debug_assertions) { + check(&dtype, &values, validity.as_ref().map(|v| v.len())).unwrap(); + } + Self { dtype, values, diff --git a/crates/polars-arrow/src/compute/cast/mod.rs b/crates/polars-arrow/src/compute/cast/mod.rs index 9193abe8d476..4dd8857b95c3 100644 --- a/crates/polars-arrow/src/compute/cast/mod.rs +++ b/crates/polars-arrow/src/compute/cast/mod.rs @@ -689,22 +689,16 @@ pub fn cast( // temporal casts (Int32, Date32) => primitive_to_same_primitive_dyn::(array, to_type), - (Int32, Time32(TimeUnit::Second)) => primitive_to_same_primitive_dyn::(array, to_type), - (Int32, Time32(TimeUnit::Millisecond)) => { - primitive_to_same_primitive_dyn::(array, to_type) - }, + (Int32, Time32(TimeUnit::Second)) => primitive_dyn!(array, int32_to_time32s), + (Int32, Time32(TimeUnit::Millisecond)) => primitive_dyn!(array, int32_to_time32ms), // No support for microsecond/nanosecond with i32 (Date32, Int32) => primitive_to_same_primitive_dyn::(array, to_type), (Date32, Int64) => primitive_to_primitive_dyn::(array, to_type, options), (Time32(_), Int32) => primitive_to_same_primitive_dyn::(array, to_type), (Int64, Date64) => primitive_to_same_primitive_dyn::(array, to_type), // No support for second/milliseconds with i64 - (Int64, Time64(TimeUnit::Microsecond)) => { - primitive_to_same_primitive_dyn::(array, to_type) - }, - (Int64, Time64(TimeUnit::Nanosecond)) => { - primitive_to_same_primitive_dyn::(array, to_type) - }, + (Int64, Time64(TimeUnit::Microsecond)) => primitive_dyn!(array, int64_to_time64us), + (Int64, Time64(TimeUnit::Nanosecond)) => primitive_dyn!(array, int64_to_time64ns), (Date64, Int32) => primitive_to_primitive_dyn::(array, to_type, options), (Date64, Int64) => primitive_to_same_primitive_dyn::(array, to_type), diff --git a/crates/polars-arrow/src/compute/cast/primitive_to.rs b/crates/polars-arrow/src/compute/cast/primitive_to.rs index 6fa9b9fb01fd..03a9c427cf4b 100644 --- a/crates/polars-arrow/src/compute/cast/primitive_to.rs +++ b/crates/polars-arrow/src/compute/cast/primitive_to.rs @@ -325,6 +325,83 @@ pub fn primitive_to_dictionary( Ok(array.into()) } +/// # Safety +/// +/// `dtype` should be valid for primitive. +pub unsafe fn primitive_map_is_valid( + from: &PrimitiveArray, + f: impl Fn(T) -> bool, + dtype: ArrowDataType, +) -> PrimitiveArray { + let values = from.values().clone(); + + let validity: Bitmap = values.iter().map(|&v| f(v)).collect(); + + let validity = if validity.unset_bits() > 0 { + let new_validity = match from.validity() { + None => validity, + Some(v) => v & &validity, + }; + + Some(new_validity) + } else { + from.validity().cloned() + }; + + // SAFETY: + // - Validity did not change length + // - dtype should be valid + unsafe { PrimitiveArray::new_unchecked(dtype, values, validity) } +} + +/// Conversion of `Int32` to `Time32(TimeUnit::Second)` +pub fn int32_to_time32s(from: &PrimitiveArray) -> PrimitiveArray { + // SAFETY: Time32(TimeUnit::Second) is valid for Int32 + unsafe { + primitive_map_is_valid( + from, + |v| (0..SECONDS_IN_DAY as i32).contains(&v), + ArrowDataType::Time32(TimeUnit::Second), + ) + } +} + +/// Conversion of `Int32` to `Time32(TimeUnit::Millisecond)` +pub fn int32_to_time32ms(from: &PrimitiveArray) -> PrimitiveArray { + // SAFETY: Time32(TimeUnit::Millisecond) is valid for Int32 + unsafe { + primitive_map_is_valid( + from, + |v| (0..MILLISECONDS_IN_DAY as i32).contains(&v), + ArrowDataType::Time32(TimeUnit::Millisecond), + ) + } +} + +/// Conversion of `Int64` to `Time32(TimeUnit::Microsecond)` +pub fn int64_to_time64us(from: &PrimitiveArray) -> PrimitiveArray { + // SAFETY: Time64(TimeUnit::Microsecond) is valid for Int64 + unsafe { + primitive_map_is_valid( + from, + |v| (0..MICROSECONDS_IN_DAY).contains(&v), + ArrowDataType::Time32(TimeUnit::Microsecond), + ) + } +} + +/// Conversion of `Int64` to `Time32(TimeUnit::Nanosecond)` +pub fn int64_to_time64ns(from: &PrimitiveArray) -> PrimitiveArray { + // SAFETY: Time64(TimeUnit::Nanosecond) is valid for Int64 + unsafe { + primitive_map_is_valid( + from, + |v| (0..NANOSECONDS_IN_DAY).contains(&v), + ArrowDataType::Time64(TimeUnit::Nanosecond), + ) + } +} + /// Conversion of dates pub fn date32_to_date64(from: &PrimitiveArray) -> PrimitiveArray { unary( diff --git a/crates/polars-core/src/chunked_array/logical/time.rs b/crates/polars-core/src/chunked_array/logical/time.rs index 7e404ad98b61..d82a090817b0 100644 --- a/crates/polars-core/src/chunked_array/logical/time.rs +++ b/crates/polars-core/src/chunked_array/logical/time.rs @@ -1,3 +1,5 @@ +use arrow::compute::cast::CastOptionsImpl; + use super::*; use crate::prelude::*; @@ -10,8 +12,45 @@ impl From for TimeChunked { } impl Int64Chunked { - pub fn into_time(self) -> TimeChunked { - TimeChunked::new_logical(self) + pub fn into_time(mut self) -> TimeChunked { + let mut null_count = 0; + + // Invalid time values are replaced with `null` during the arrow cast. We utilize the + // validity coming from there to create the new TimeChunked. + let chunks = std::mem::take(&mut self.chunks) + .into_iter() + .map(|chunk| { + // We need to retain the PhysicalType underneath, but we should properly update the + // validity as that might change because Time is not valid for all values of Int64. + let casted = arrow::compute::cast::cast( + chunk.as_ref(), + &ArrowDataType::Time64(ArrowTimeUnit::Nanosecond), + CastOptionsImpl::default(), + ) + .unwrap(); + let validity = casted.validity(); + + match validity { + None => chunk, + Some(validity) => { + null_count += validity.unset_bits(); + chunk.with_validity(Some(validity.clone())) + }, + } + }) + .collect::>>(); + + let null_count = null_count as IdxSize; + + debug_assert!(null_count >= self.null_count); + + // @TODO: We throw away metadata here. That is mostly not needed. + // SAFETY: We calculated the null_count again. And we are taking the rest from the previous + // Int64Chunked. + let int64chunked = + unsafe { Self::new_with_dims(self.field.clone(), chunks, self.length, null_count) }; + + TimeChunked::new_logical(int64chunked) } } diff --git a/crates/polars-core/src/series/mod.rs b/crates/polars-core/src/series/mod.rs index 6e8e92019f81..a41c5822283c 100644 --- a/crates/polars-core/src/series/mod.rs +++ b/crates/polars-core/src/series/mod.rs @@ -713,10 +713,6 @@ impl Series { #[cfg(feature = "dtype-time")] pub(crate) fn into_time(self) -> Series { - #[cfg(not(feature = "dtype-time"))] - { - panic!("activate feature dtype-time") - } match self.dtype() { DataType::Int64 => self.i64().unwrap().clone().into_time().into_series(), DataType::Time => self diff --git a/crates/polars-expr/src/expressions/literal.rs b/crates/polars-expr/src/expressions/literal.rs index 88303ff31697..2089e4cf5bb4 100644 --- a/crates/polars-expr/src/expressions/literal.rs +++ b/crates/polars-expr/src/expressions/literal.rs @@ -1,6 +1,7 @@ use std::borrow::Cow; use std::ops::Deref; +use arrow::temporal_conversions::NANOSECONDS_IN_DAY; use polars_core::prelude::*; use polars_core::utils::NoNull; use polars_plan::constants::get_literal_name; @@ -91,9 +92,18 @@ impl PhysicalExpr for LiteralExpr { .into_date() .into_series(), #[cfg(feature = "dtype-time")] - Time(v) => Int64Chunked::full(get_literal_name().clone(), *v, 1) - .into_time() - .into_series(), + Time(v) => { + if !(0..NANOSECONDS_IN_DAY).contains(v) { + polars_bail!( + InvalidOperation: "value `{v}` is out-of-range for `time` which can be 0 - {}", + NANOSECONDS_IN_DAY - 1 + ); + } + + Int64Chunked::full(get_literal_name().clone(), *v, 1) + .into_time() + .into_series() + }, Series(series) => series.deref().clone(), OtherScalar(s) => s.clone().into_series(get_literal_name().clone()), lv @ (Int(_) | Float(_) | StrCat(_)) => polars_core::prelude::Series::from_any_values( diff --git a/py-polars/tests/unit/datatypes/test_time.py b/py-polars/tests/unit/datatypes/test_time.py index c7915a342db6..a7eb5c095964 100644 --- a/py-polars/tests/unit/datatypes/test_time.py +++ b/py-polars/tests/unit/datatypes/test_time.py @@ -1,5 +1,7 @@ from datetime import time +import pytest + import polars as pl @@ -15,3 +17,17 @@ def test_time_microseconds_3843() -> None: in_val = [time(0, 9, 11, 558332)] s = pl.Series(in_val) assert s.to_list() == in_val + + +def test_invalid_casts() -> None: + with pytest.raises(pl.exceptions.InvalidOperationError): + pl.DataFrame({"a": []}).with_columns(a=pl.lit(-1).cast(pl.Time)) + + with pytest.raises(pl.exceptions.InvalidOperationError): + pl.Series([-1]).cast(pl.Time) + + with pytest.raises(pl.exceptions.InvalidOperationError): + pl.Series([24 * 60 * 60 * 1_000_000_000]).cast(pl.Time) + + largest_value = pl.Series([24 * 60 * 60 * 1_000_000_000 - 1]).cast(pl.Time) + assert "23:59:59.999999999" in str(largest_value)