pola-rs · ritchie46 · Feb 4, 2024 · Feb 3, 2024 · Feb 4, 2024 · Feb 4, 2024
@@ -22,6 +22,7 @@ either = { workspace = true }
 itoa = { workspace = true }
 libc = "0.2"
 ndarray = { workspace = true }
+num-traits = { workspace = true }
 numpy = { version = "0.20", default-features = false }
 once_cell = { workspace = true }
 pyo3 = { workspace = true, features = ["abi3-py38", "extension-module", "multiple-pymethods"] }

@@ -4145,12 +4145,18 @@ def to_physical(self) -> Series:
 
     def to_list(self, *, use_pyarrow: bool | None = None) -> list[Any]:
         """
-        Convert this Series to a Python List. This operation clones data.
+        Convert this Series to a Python list.
+
+        This operation copies data.
 
         Parameters
         ----------
         use_pyarrow
-            Use pyarrow for the conversion.
+            Use PyArrow to perform the conversion.
+
+            .. deprecated:: 0.19.9
+                This parameter will be removed. The function can safely be called
+                without the parameter - it should give the exact same result.
 
         Examples
         --------
@@ -4283,32 +4289,31 @@ def to_numpy(
         use_pyarrow: bool = True,
     ) -> np.ndarray[Any, Any]:
         """
-        Convert this Series to numpy.
+        Convert this Series to a NumPy ndarray.
 
-        This operation may clone data but is completely safe. Note that:
+        This operation may copy data, but is completely safe. Note that:
 
-        - data which is purely numeric AND without null values is not cloned;
-        - floating point `nan` values can be zero-copied;
-        - booleans can't be zero-copied.
+        - Data which is purely numeric AND without null values is not cloned
+        - Floating point `nan` values can be zero-copied
+        - Booleans cannot be zero-copied
 
-        To ensure that no data is cloned, set `zero_copy_only=True`.
+        To ensure that no data is copied, set `zero_copy_only=True`.
 
         Parameters
         ----------
         zero_copy_only
-            If True, an exception will be raised if the conversion to a numpy
-            array would require copying the underlying data (e.g. in presence
-            of nulls, or for non-primitive types).
+            Raise an exception if the conversion to a NumPy would require copying
+            the underlying data. Data copy occurs, for example, when the Series contains
+            nulls or non-numeric types.
         writable
-            For numpy arrays created with zero copy (view on the Arrow data),
+            For NumPy arrays created with zero copy (view on the Arrow data),
             the resulting array is not writable (Arrow data is immutable).
             By setting this to True, a copy of the array is made to ensure
             it is writable.
         use_pyarrow
             Use `pyarrow.Array.to_numpy
             <https://arrow.apache.org/docs/python/generated/pyarrow.Array.html#pyarrow.Array.to_numpy>`_
-
-            for the conversion to numpy.
+            for the conversion to NumPy.
 
         Examples
         --------
@@ -4417,7 +4422,7 @@ def _view(self, *, ignore_nulls: bool = False) -> SeriesView:
 
     def to_arrow(self) -> pa.Array:
         """
-        Get the underlying Arrow Array.
+        Return the underlying Arrow array.
 
         If the Series contains only a single chunk this operation is zero copy.
 

@@ -1,3 +1,4 @@
+use num_traits::{Float, NumCast};
 use numpy::PyArray1;
 use polars_core::prelude::*;
 use pyo3::prelude::*;
@@ -9,76 +10,8 @@ use crate::{arrow_interop, raise_err, PySeries};
 
 #[pymethods]
 impl PySeries {
-    #[allow(clippy::wrong_self_convention)]
-    fn to_arrow(&mut self) -> PyResult<PyObject> {
-        self.rechunk(true);
-        Python::with_gil(|py| {
-            let pyarrow = py.import("pyarrow")?;
-
-            arrow_interop::to_py::to_py_array(self.series.to_arrow(0, false), py, pyarrow)
-        })
-    }
-
-    /// For numeric types, this should only be called for Series with null types.
-    /// Non-nullable types are handled with `view()`.
-    /// This will cast to floats so that `None = np.nan`.
-    fn to_numpy(&self, py: Python) -> PyResult<PyObject> {
-        let s = &self.series;
-        match s.dtype() {
-            dt if dt.is_numeric() => {
-                if s.bit_repr_is_large() {
-                    let s = s.cast(&DataType::Float64).unwrap();
-                    let ca = s.f64().unwrap();
-                    let np_arr =
-                        PyArray1::from_iter(py, ca.iter().map(|opt_v| opt_v.unwrap_or(f64::NAN)));
-                    Ok(np_arr.into_py(py))
-                } else {
-                    let s = s.cast(&DataType::Float32).unwrap();
-                    let ca = s.f32().unwrap();
-                    let np_arr =
-                        PyArray1::from_iter(py, ca.iter().map(|opt_v| opt_v.unwrap_or(f32::NAN)));
-                    Ok(np_arr.into_py(py))
-                }
-            },
-            DataType::String => {
-                let ca = s.str().unwrap();
-                let np_arr = PyArray1::from_iter(py, ca.into_iter().map(|s| s.into_py(py)));
-                Ok(np_arr.into_py(py))
-            },
-            DataType::Binary => {
-                let ca = s.binary().unwrap();
-                let np_arr = PyArray1::from_iter(py, ca.into_iter().map(|s| s.into_py(py)));
-                Ok(np_arr.into_py(py))
-            },
-            DataType::Boolean => {
-                let ca = s.bool().unwrap();
-                let np_arr = PyArray1::from_iter(py, ca.into_iter().map(|s| s.into_py(py)));
-                Ok(np_arr.into_py(py))
-            },
-            #[cfg(feature = "object")]
-            DataType::Object(_, _) => {
-                let ca = s
-                    .as_any()
-                    .downcast_ref::<ObjectChunked<ObjectValue>>()
-                    .unwrap();
-                let np_arr =
-                    PyArray1::from_iter(py, ca.into_iter().map(|opt_v| opt_v.to_object(py)));
-                Ok(np_arr.into_py(py))
-            },
-            DataType::Null => {
-                let n = s.len();
-                let np_arr = PyArray1::from_iter(py, std::iter::repeat(f32::NAN).take(n));
-                Ok(np_arr.into_py(py))
-            },
-            dt => {
-                raise_err!(
-                    format!("'to_numpy' not supported for dtype: {dt:?}"),
-                    ComputeError
-                );
-            },
-        }
-    }
-
+    /// Convert this Series to a Python list.
+    /// This operation copies data.
     pub fn to_list(&self) -> PyObject {
         Python::with_gil(|py| {
             let series = &self.series;
@@ -213,4 +146,89 @@ impl PySeries {
             pylist.to_object(py)
         })
     }
+
+    /// Return the underlying Arrow array.
+    #[allow(clippy::wrong_self_convention)]
+    fn to_arrow(&mut self) -> PyResult<PyObject> {
+        self.rechunk(true);
+        Python::with_gil(|py| {
+            let pyarrow = py.import("pyarrow")?;
+
+            arrow_interop::to_py::to_py_array(self.series.to_arrow(0, false), py, pyarrow)
+        })
+    }
+
+    /// Convert this Series to a NumPy ndarray.
+    ///
+    /// This method will copy data - numeric types without null values should
+    /// be handled on the Python side in a zero-copy manner.
+    ///
+    /// This method will cast integers to floats so that `null = np.nan`.
+    fn to_numpy(&self, py: Python) -> PyResult<PyObject> {
+        use DataType::*;
+        let s = &self.series;
+        let out = match s.dtype() {
+            Int8 => numeric_series_to_numpy::<Int8Type, f32>(py, s),
+            Int16 => numeric_series_to_numpy::<Int16Type, f32>(py, s),
+            Int32 => numeric_series_to_numpy::<Int32Type, f64>(py, s),
+            Int64 => numeric_series_to_numpy::<Int64Type, f64>(py, s),
+            UInt8 => numeric_series_to_numpy::<UInt8Type, f32>(py, s),
+            UInt16 => numeric_series_to_numpy::<UInt16Type, f32>(py, s),
+            UInt32 => numeric_series_to_numpy::<UInt32Type, f64>(py, s),
+            UInt64 => numeric_series_to_numpy::<UInt64Type, f64>(py, s),
+            Float32 => numeric_series_to_numpy::<Float32Type, f32>(py, s),
+            Float64 => numeric_series_to_numpy::<Float64Type, f64>(py, s),
+            Boolean => {
+                let ca = s.bool().unwrap();
+                let np_arr = PyArray1::from_iter(py, ca.into_iter().map(|s| s.into_py(py)));
+                np_arr.into_py(py)
+            },
+            String => {
+                let ca = s.str().unwrap();
+                let np_arr = PyArray1::from_iter(py, ca.into_iter().map(|s| s.into_py(py)));
+                np_arr.into_py(py)
+            },
+            Binary => {
+                let ca = s.binary().unwrap();
+                let np_arr = PyArray1::from_iter(py, ca.into_iter().map(|s| s.into_py(py)));
+                np_arr.into_py(py)
+            },
+            #[cfg(feature = "object")]
+            Object(_, _) => {
+                let ca = s
+                    .as_any()
+                    .downcast_ref::<ObjectChunked<ObjectValue>>()
+                    .unwrap();
+                let np_arr =
+                    PyArray1::from_iter(py, ca.into_iter().map(|opt_v| opt_v.to_object(py)));
+                np_arr.into_py(py)
+            },
+            Null => {
+                let n = s.len();
+                let np_arr = PyArray1::from_iter(py, std::iter::repeat(f32::NAN).take(n));
+                np_arr.into_py(py)
+            },
+            dt => {
+                raise_err!(
+                    format!("`to_numpy` not supported for dtype {dt:?}"),
+                    ComputeError
+                );
+            },
+        };
+        Ok(out)
+    }
+}
+
+fn numeric_series_to_numpy<T, U>(py: Python, s: &Series) -> PyObject
+where
+    T: PolarsNumericType,
+    U: Float + numpy::Element,
+{
+    let ca: &ChunkedArray<T> = s.as_ref().as_ref();
+    let mapper = |opt_v: Option<T::Native>| match opt_v {
+        Some(v) => NumCast::from(v).unwrap(),
+        None => U::nan(),
+    };
+    let np_arr = PyArray1::from_iter(py, ca.iter().map(mapper));
+    np_arr.into_py(py)
 }
@@ -1,4 +1,5 @@
 import numpy as np
+import numpy.typing as npt
 import pytest
 
 import polars as pl
@@ -63,3 +64,26 @@ def test_series_to_numpy_bool_with_nulls() -> None:
     result = s.to_numpy(use_pyarrow=False)
     assert s.to_list() == result.tolist()
     assert result.dtype == np.object_
+
+
+@pytest.mark.parametrize(
+    ("dtype", "expected_dtype"),
+    [
+        (pl.Int8, np.float32),
+        (pl.Int16, np.float32),
+        (pl.Int32, np.float64),
+        (pl.Int64, np.float64),
+        (pl.UInt8, np.float32),
+        (pl.UInt16, np.float32),
+        (pl.UInt32, np.float64),
+        (pl.UInt64, np.float64),
+        (pl.Float32, np.float32),
+        (pl.Float64, np.float64),
+    ],
+)
+def test_series_to_numpy_numeric_with_nulls(
+    dtype: pl.PolarsDataType, expected_dtype: npt.DTypeLike
+) -> None:
+    s = pl.Series([1, 2, None], dtype=dtype, strict=False)
+    result = s.to_numpy(use_pyarrow=False)
+    assert result.dtype == expected_dtype