Avoid additional copy for integers

pola-rs · Feb 4, 2024 · 6d57b33 · 6d57b33
1 parent 30aabf2
commit 6d57b33
Show file tree

Hide file tree

Showing 3 changed files with 36 additions and 22 deletions.
diff --git a/Cargo.lock b/Cargo.lock
diff --git a/py-polars/Cargo.toml b/py-polars/Cargo.toml
@@ -22,6 +22,7 @@ either = { workspace = true }
 itoa = { workspace = true }
 libc = "0.2"
 ndarray = { workspace = true }
+num-traits = { workspace = true }
 numpy = { version = "0.20", default-features = false }
 once_cell = { workspace = true }
 pyo3 = { workspace = true, features = ["abi3-py38", "extension-module", "multiple-pymethods"] }

diff --git a/py-polars/src/series/export.rs b/py-polars/src/series/export.rs
@@ -1,3 +1,4 @@
+use num_traits::{Float, NumCast};
 use numpy::PyArray1;
 use polars_core::prelude::*;
 use pyo3::prelude::*;
@@ -19,41 +20,37 @@ impl PySeries {
         })
     }
 
-    /// For numeric types, this should only be called for Series with null types.
+    /// For numeric types, this should only be called for Series with null values.
     /// Non-nullable types are handled with `view()`.
     /// This will cast to floats so that `None = np.nan`.
     fn to_numpy(&self, py: Python) -> PyResult<PyObject> {
         use DataType::*;
         let s = &self.series;
-        match s.dtype() {
-            Int32 | UInt32 | Int64 | UInt64 | Float64 => {
-                let s = s.cast(&DataType::Float64).unwrap();
-                let ca = s.f64().unwrap();
-                let np_arr =
-                    PyArray1::from_iter(py, ca.iter().map(|opt_v| opt_v.unwrap_or(f64::NAN)));
-                Ok(np_arr.into_py(py))
-            },
-            Int8 | UInt8 | Int16 | UInt16 | Float32 => {
-                let s = s.cast(&DataType::Float32).unwrap();
-                let ca = s.f32().unwrap();
-                let np_arr =
-                    PyArray1::from_iter(py, ca.iter().map(|opt_v| opt_v.unwrap_or(f32::NAN)));
-                Ok(np_arr.into_py(py))
-            },
+        let out = match s.dtype() {
+            Int8 => numeric_series_to_numpy::<Int8Type, f32>(py, s),
+            Int16 => numeric_series_to_numpy::<Int16Type, f32>(py, s),
+            Int32 => numeric_series_to_numpy::<Int32Type, f64>(py, s),
+            Int64 => numeric_series_to_numpy::<Int64Type, f64>(py, s),
+            UInt8 => numeric_series_to_numpy::<UInt8Type, f32>(py, s),
+            UInt16 => numeric_series_to_numpy::<UInt16Type, f32>(py, s),
+            UInt32 => numeric_series_to_numpy::<UInt32Type, f64>(py, s),
+            UInt64 => numeric_series_to_numpy::<UInt64Type, f64>(py, s),
+            Float32 => numeric_series_to_numpy::<Float32Type, f32>(py, s),
+            Float64 => numeric_series_to_numpy::<Float64Type, f64>(py, s),
             Boolean => {
                 let ca = s.bool().unwrap();
                 let np_arr = PyArray1::from_iter(py, ca.into_iter().map(|s| s.into_py(py)));
-                Ok(np_arr.into_py(py))
+                np_arr.into_py(py)
             },
             String => {
                 let ca = s.str().unwrap();
                 let np_arr = PyArray1::from_iter(py, ca.into_iter().map(|s| s.into_py(py)));
-                Ok(np_arr.into_py(py))
+                np_arr.into_py(py)
             },
             Binary => {
                 let ca = s.binary().unwrap();
                 let np_arr = PyArray1::from_iter(py, ca.into_iter().map(|s| s.into_py(py)));
-                Ok(np_arr.into_py(py))
+                np_arr.into_py(py)
             },
             #[cfg(feature = "object")]
             Object(_, _) => {
@@ -63,20 +60,21 @@ impl PySeries {
                     .unwrap();
                 let np_arr =
                     PyArray1::from_iter(py, ca.into_iter().map(|opt_v| opt_v.to_object(py)));
-                Ok(np_arr.into_py(py))
+                np_arr.into_py(py)
             },
             Null => {
                 let n = s.len();
                 let np_arr = PyArray1::from_iter(py, std::iter::repeat(f32::NAN).take(n));
-                Ok(np_arr.into_py(py))
+                np_arr.into_py(py)
             },
             dt => {
                 raise_err!(
                     format!("'to_numpy' not supported for dtype: {dt:?}"),
                     ComputeError
                 );
             },
-        }
+        };
+        Ok(out)
     }
 
     pub fn to_list(&self) -> PyObject {
@@ -214,3 +212,17 @@ impl PySeries {
         })
     }
 }
+
+fn numeric_series_to_numpy<T, U>(py: Python, s: &Series) -> PyObject
+where
+    T: PolarsNumericType,
+    U: Float + numpy::Element,
+{
+    let ca: &ChunkedArray<T> = s.as_ref().as_ref();
+    let mapper = |opt_v: Option<T::Native>| match opt_v {
+        Some(v) => NumCast::from(v).unwrap(),
+        None => U::nan(),
+    };
+    let np_arr = PyArray1::from_iter(py, ca.iter().map(mapper));
+    np_arr.into_py(py)
+}