From be2b3c42f0ee0d9d0530a73d79bd1395328455ef Mon Sep 17 00:00:00 2001 From: Stijn de Gooijer Date: Mon, 5 Feb 2024 23:32:58 +0100 Subject: [PATCH 1/3] Move Time to Rust --- py-polars/polars/series/series.py | 4 ++-- py-polars/src/conversion/chunked_array.rs | 17 +++++++++++------ py-polars/src/conversion/mod.rs | 2 +- py-polars/src/series/export.rs | 7 +++++++ 4 files changed, 21 insertions(+), 9 deletions(-) diff --git a/py-polars/polars/series/series.py b/py-polars/polars/series/series.py index a8f82d9af321..7719c7a5a37d 100644 --- a/py-polars/polars/series/series.py +++ b/py-polars/polars/series/series.py @@ -4361,7 +4361,7 @@ def temporal_dtype_to_numpy(dtype: PolarsDataType) -> Any: zero_copy_only=zero_copy_only, writable=writable ) - if dtype in (Time, Decimal): + if dtype == Decimal: # There are no native NumPy "time" or "decimal" dtypes raise_no_zero_copy() return np.array(self.to_list(), dtype="object", copy=False) @@ -4386,7 +4386,7 @@ def temporal_dtype_to_numpy(dtype: PolarsDataType) -> Any: else: raise_no_zero_copy() np_array = self._s.to_numpy() - if dtype.is_temporal(): + if dtype in (Datetime, Duration, Date): np_dtype = temporal_dtype_to_numpy(dtype) np_array = np_array.view(np_dtype) diff --git a/py-polars/src/conversion/chunked_array.rs b/py-polars/src/conversion/chunked_array.rs index 8246312e45a3..e3d59dfc3b12 100644 --- a/py-polars/src/conversion/chunked_array.rs +++ b/py-polars/src/conversion/chunked_array.rs @@ -141,16 +141,21 @@ impl ToPyObject for Wrap<&DatetimeChunked> { impl ToPyObject for Wrap<&TimeChunked> { fn to_object(&self, py: Python) -> PyObject { - let utils = UTILS.as_ref(py); - let convert = utils.getattr(intern!(py, "_to_python_time")).unwrap(); - let iter = self - .0 - .into_iter() - .map(|opt_v| opt_v.map(|v| convert.call1((v,)).unwrap())); + let iter = time_to_pyobject_iter(py, self.0); PyList::new(py, iter).into_py(py) } } +pub(crate) fn time_to_pyobject_iter<'a>( + py: Python<'a>, + ca: &'a TimeChunked, +) -> impl ExactSizeIterator> { + let utils = UTILS.as_ref(py); + let convert = utils.getattr(intern!(py, "_to_python_time")).unwrap(); + ca.0.into_iter() + .map(|opt_v| opt_v.map(|v| convert.call1((v,)).unwrap())) +} + impl ToPyObject for Wrap<&DateChunked> { fn to_object(&self, py: Python) -> PyObject { let utils = UTILS.as_ref(py); diff --git a/py-polars/src/conversion/mod.rs b/py-polars/src/conversion/mod.rs index 4b789bbcdff4..5471bdbd3e73 100644 --- a/py-polars/src/conversion/mod.rs +++ b/py-polars/src/conversion/mod.rs @@ -1,5 +1,5 @@ pub(crate) mod any_value; -mod chunked_array; +pub(crate) mod chunked_array; use std::fmt::{Display, Formatter}; use std::hash::{Hash, Hasher}; diff --git a/py-polars/src/series/export.rs b/py-polars/src/series/export.rs index bffdcef0772b..428a673a4995 100644 --- a/py-polars/src/series/export.rs +++ b/py-polars/src/series/export.rs @@ -4,6 +4,7 @@ use polars_core::prelude::*; use pyo3::prelude::*; use pyo3::types::PyList; +use crate::conversion::chunked_array::time_to_pyobject_iter; use crate::error::PyPolarsErr; use crate::prelude::{ObjectValue, *}; use crate::{arrow_interop, raise_err, PySeries}; @@ -185,6 +186,12 @@ impl PySeries { }, Date => date_series_to_numpy(py, s), Datetime(_, _) | Duration(_) => temporal_series_to_numpy(py, s), + Time => { + let ca = s.time().unwrap(); + let iter = time_to_pyobject_iter(py, ca); + let np_arr = PyArray1::from_iter(py, iter.map(|v| v.into_py(py))); + np_arr.into_py(py) + }, String => { let ca = s.str().unwrap(); let np_arr = PyArray1::from_iter(py, ca.into_iter().map(|s| s.into_py(py))); From a978d359af4bedeea0d8cc2aa65bbf39fde15e4b Mon Sep 17 00:00:00 2001 From: Stijn de Gooijer Date: Mon, 5 Feb 2024 23:44:51 +0100 Subject: [PATCH 2/3] Move Decimal to Rust --- py-polars/polars/series/series.py | 7 +-- py-polars/src/conversion/chunked_array.rs | 53 +++++++++++++---------- py-polars/src/series/export.rs | 8 +++- 3 files changed, 38 insertions(+), 30 deletions(-) diff --git a/py-polars/polars/series/series.py b/py-polars/polars/series/series.py index 7719c7a5a37d..e59e65210352 100644 --- a/py-polars/polars/series/series.py +++ b/py-polars/polars/series/series.py @@ -4361,13 +4361,8 @@ def temporal_dtype_to_numpy(dtype: PolarsDataType) -> Any: zero_copy_only=zero_copy_only, writable=writable ) - if dtype == Decimal: - # There are no native NumPy "time" or "decimal" dtypes - raise_no_zero_copy() - return np.array(self.to_list(), dtype="object", copy=False) - if self.null_count() == 0: - if dtype.is_numeric(): + if dtype.is_numeric() and dtype != Decimal: np_array = self._view(ignore_nulls=True) elif dtype == Boolean: raise_no_zero_copy() diff --git a/py-polars/src/conversion/chunked_array.rs b/py-polars/src/conversion/chunked_array.rs index e3d59dfc3b12..483189093125 100644 --- a/py-polars/src/conversion/chunked_array.rs +++ b/py-polars/src/conversion/chunked_array.rs @@ -170,29 +170,36 @@ impl ToPyObject for Wrap<&DateChunked> { impl ToPyObject for Wrap<&DecimalChunked> { fn to_object(&self, py: Python) -> PyObject { - let utils = UTILS.as_ref(py); - let convert = utils.getattr(intern!(py, "_to_python_decimal")).unwrap(); - let py_scale = (-(self.0.scale() as i32)).to_object(py); - // if we don't know precision, the only safe bet is to set it to 39 - let py_precision = self.0.precision().unwrap_or(39).to_object(py); - let iter = self.0.into_iter().map(|opt_v| { - opt_v.map(|v| { - // TODO! use AnyValue so that we have a single impl. - const N: usize = 3; - let mut buf = [0_u128; N]; - let n_digits = decimal_to_digits(v.abs(), &mut buf); - let buf = unsafe { - std::slice::from_raw_parts( - buf.as_slice().as_ptr() as *const u8, - N * std::mem::size_of::(), - ) - }; - let digits = PyTuple::new(py, buf.iter().take(n_digits)); - convert - .call1((v.is_negative() as u8, digits, &py_precision, &py_scale)) - .unwrap() - }) - }); + let iter = decimal_to_pyobject_iter(py, self.0); PyList::new(py, iter).into_py(py) } } + +pub(crate) fn decimal_to_pyobject_iter<'a>( + py: Python<'a>, + ca: &'a DecimalChunked, +) -> impl ExactSizeIterator> { + let utils = UTILS.as_ref(py); + let convert = utils.getattr(intern!(py, "_to_python_decimal")).unwrap(); + let py_scale = (-(ca.scale() as i32)).to_object(py); + // if we don't know precision, the only safe bet is to set it to 39 + let py_precision = ca.precision().unwrap_or(39).to_object(py); + ca.into_iter().map(move |opt_v| { + opt_v.map(|v| { + // TODO! use AnyValue so that we have a single impl. + const N: usize = 3; + let mut buf = [0_u128; N]; + let n_digits = decimal_to_digits(v.abs(), &mut buf); + let buf = unsafe { + std::slice::from_raw_parts( + buf.as_slice().as_ptr() as *const u8, + N * std::mem::size_of::(), + ) + }; + let digits = PyTuple::new(py, buf.iter().take(n_digits)); + convert + .call1((v.is_negative() as u8, digits, &py_precision, &py_scale)) + .unwrap() + }) + }) +} diff --git a/py-polars/src/series/export.rs b/py-polars/src/series/export.rs index 428a673a4995..71d84af1104f 100644 --- a/py-polars/src/series/export.rs +++ b/py-polars/src/series/export.rs @@ -4,7 +4,7 @@ use polars_core::prelude::*; use pyo3::prelude::*; use pyo3::types::PyList; -use crate::conversion::chunked_array::time_to_pyobject_iter; +use crate::conversion::chunked_array::{decimal_to_pyobject_iter, time_to_pyobject_iter}; use crate::error::PyPolarsErr; use crate::prelude::{ObjectValue, *}; use crate::{arrow_interop, raise_err, PySeries}; @@ -207,6 +207,12 @@ impl PySeries { let np_arr = PyArray1::from_iter(py, ca.iter_str().map(|s| s.into_py(py))); np_arr.into_py(py) }, + Decimal(_, _) => { + let ca = s.decimal().unwrap(); + let iter = decimal_to_pyobject_iter(py, ca); + let np_arr = PyArray1::from_iter(py, iter.map(|v| v.into_py(py))); + np_arr.into_py(py) + }, #[cfg(feature = "object")] Object(_, _) => { let ca = s From 2dfd845ef53333d815a982604264a115336bc4eb Mon Sep 17 00:00:00 2001 From: Stijn de Gooijer Date: Wed, 7 Feb 2024 15:10:09 +0100 Subject: [PATCH 3/3] Syntax --- py-polars/polars/series/series.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/py-polars/polars/series/series.py b/py-polars/polars/series/series.py index e59e65210352..abf3d220232a 100644 --- a/py-polars/polars/series/series.py +++ b/py-polars/polars/series/series.py @@ -4354,15 +4354,14 @@ def temporal_dtype_to_numpy(dtype: PolarsDataType) -> Any: if ( use_pyarrow and _PYARROW_AVAILABLE - and dtype != Object - and (dtype == Time or not dtype.is_temporal()) + and dtype not in (Object, Datetime, Duration, Date) ): return self.to_arrow().to_numpy( zero_copy_only=zero_copy_only, writable=writable ) if self.null_count() == 0: - if dtype.is_numeric() and dtype != Decimal: + if dtype.is_integer() or dtype.is_float(): np_array = self._view(ignore_nulls=True) elif dtype == Boolean: raise_no_zero_copy()