From e51660ed53978aea74d45a32fca20159a956ed59 Mon Sep 17 00:00:00 2001
From: Stijn de Gooijer <stijndegooijer@gmail.com>
Date: Sat, 3 Feb 2024 20:16:50 +0100
Subject: [PATCH 1/3] feat(python!): Change `Series.to_numpy` to return `f64`
 for `Int32/UInt32` Series instead of `f32`

---
 py-polars/src/series/export.rs             | 44 +++++++++++-----------
 py-polars/tests/unit/interop/test_numpy.py | 24 ++++++++++++
 2 files changed, 46 insertions(+), 22 deletions(-)
diff --git a/py-polars/src/series/export.rs b/py-polars/src/series/export.rs
index 63f72460881a..0cc19054c5db 100644
--- a/py-polars/src/series/export.rs
+++ b/py-polars/src/series/export.rs
@@ -23,40 +23,40 @@ impl PySeries {
     /// Non-nullable types are handled with `view()`.
     /// This will cast to floats so that `None = np.nan`.
     fn to_numpy(&self, py: Python) -> PyResult<PyObject> {
+        use DataType::*;
         let s = &self.series;
         match s.dtype() {
-            dt if dt.is_numeric() => {
-                if s.bit_repr_is_large() {
-                    let s = s.cast(&DataType::Float64).unwrap();
-                    let ca = s.f64().unwrap();
-                    let np_arr =
-                        PyArray1::from_iter(py, ca.iter().map(|opt_v| opt_v.unwrap_or(f64::NAN)));
-                    Ok(np_arr.into_py(py))
-                } else {
-                    let s = s.cast(&DataType::Float32).unwrap();
-                    let ca = s.f32().unwrap();
-                    let np_arr =
-                        PyArray1::from_iter(py, ca.iter().map(|opt_v| opt_v.unwrap_or(f32::NAN)));
-                    Ok(np_arr.into_py(py))
-                }
+            Int32 | UInt32 | Int64 | UInt64 | Float64 => {
+                let s = s.cast(&DataType::Float64).unwrap();
+                let ca = s.f64().unwrap();
+                let np_arr =
+                    PyArray1::from_iter(py, ca.iter().map(|opt_v| opt_v.unwrap_or(f64::NAN)));
+                Ok(np_arr.into_py(py))
             },
-            DataType::String => {
-                let ca = s.str().unwrap();
+            Int8 | UInt8 | Int16 | UInt16 | Float32 => {
+                let s = s.cast(&DataType::Float32).unwrap();
+                let ca = s.f32().unwrap();
+                let np_arr =
+                    PyArray1::from_iter(py, ca.iter().map(|opt_v| opt_v.unwrap_or(f32::NAN)));
+                Ok(np_arr.into_py(py))
+            },
+            Boolean => {
+                let ca = s.bool().unwrap();
                 let np_arr = PyArray1::from_iter(py, ca.into_iter().map(|s| s.into_py(py)));
                 Ok(np_arr.into_py(py))
             },
-            DataType::Binary => {
-                let ca = s.binary().unwrap();
+            String => {
+                let ca = s.str().unwrap();
                 let np_arr = PyArray1::from_iter(py, ca.into_iter().map(|s| s.into_py(py)));
                 Ok(np_arr.into_py(py))
             },
-            DataType::Boolean => {
-                let ca = s.bool().unwrap();
+            Binary => {
+                let ca = s.binary().unwrap();
                 let np_arr = PyArray1::from_iter(py, ca.into_iter().map(|s| s.into_py(py)));
                 Ok(np_arr.into_py(py))
             },
             #[cfg(feature = "object")]
-            DataType::Object(_, _) => {
+            Object(_, _) => {
                 let ca = s
                     .as_any()
                     .downcast_ref::<ObjectChunked<ObjectValue>>()
@@ -65,7 +65,7 @@ impl PySeries {
                     PyArray1::from_iter(py, ca.into_iter().map(|opt_v| opt_v.to_object(py)));
                 Ok(np_arr.into_py(py))
             },
-            DataType::Null => {
+            Null => {
                 let n = s.len();
                 let np_arr = PyArray1::from_iter(py, std::iter::repeat(f32::NAN).take(n));
                 Ok(np_arr.into_py(py))
diff --git a/py-polars/tests/unit/interop/test_numpy.py b/py-polars/tests/unit/interop/test_numpy.py
index b97635c20c82..70a47361c20c 100644
--- a/py-polars/tests/unit/interop/test_numpy.py
+++ b/py-polars/tests/unit/interop/test_numpy.py
@@ -1,4 +1,5 @@
 import numpy as np
+import numpy.typing as npt
 import pytest
 
 import polars as pl
@@ -63,3 +64,26 @@ def test_series_to_numpy_bool_with_nulls() -> None:
     result = s.to_numpy(use_pyarrow=False)
     assert s.to_list() == result.tolist()
     assert result.dtype == np.object_
+
+
+@pytest.mark.parametrize(
+    ("dtype", "expected_dtype"),
+    [
+        (pl.Int8, np.float32),
+        (pl.Int16, np.float32),
+        (pl.Int32, np.float64),
+        (pl.Int64, np.float64),
+        (pl.UInt8, np.float32),
+        (pl.UInt16, np.float32),
+        (pl.UInt32, np.float64),
+        (pl.UInt64, np.float64),
+        (pl.Float32, np.float32),
+        (pl.Float64, np.float64),
+    ],
+)
+def test_series_to_numpy_numeric_with_nulls(
+    dtype: pl.PolarsDataType, expected_dtype: npt.DTypeLike
+) -> None:
+    s = pl.Series([1, 2, None], dtype=dtype, strict=False)
+    result = s.to_numpy(use_pyarrow=False)
+    assert result.dtype == expected_dtype

From 269b4bb0ae49b9c59dc1ca79d4f2306652ee7fe5 Mon Sep 17 00:00:00 2001
From: Stijn de Gooijer <stijndegooijer@gmail.com>
Date: Sun, 4 Feb 2024 01:34:22 +0100
Subject: [PATCH 2/3] Avoid additional copy for integers

---
 Cargo.lock                     |  1 +
 py-polars/Cargo.toml           |  1 +
 py-polars/src/series/export.rs | 56 +++++++++++++++++++++-------------
 3 files changed, 36 insertions(+), 22 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index 3277337e26f6..02e0a3742e44 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -3065,6 +3065,7 @@ dependencies = [
  "libc",
  "mimalloc",
  "ndarray",
+ "num-traits",
  "numpy",
  "once_cell",
  "polars",
diff --git a/py-polars/Cargo.toml b/py-polars/Cargo.toml
index 8a5f05b79499..01ad293dfe62 100644
--- a/py-polars/Cargo.toml
+++ b/py-polars/Cargo.toml
@@ -22,6 +22,7 @@ either = { workspace = true }
 itoa = { workspace = true }
 libc = "0.2"
 ndarray = { workspace = true }
+num-traits = { workspace = true }
 numpy = { version = "0.20", default-features = false }
 once_cell = { workspace = true }
 pyo3 = { workspace = true, features = ["abi3-py38", "extension-module", "multiple-pymethods"] }
diff --git a/py-polars/src/series/export.rs b/py-polars/src/series/export.rs
index 0cc19054c5db..de366285deab 100644
--- a/py-polars/src/series/export.rs
+++ b/py-polars/src/series/export.rs
@@ -1,3 +1,4 @@
+use num_traits::{Float, NumCast};
 use numpy::PyArray1;
 use polars_core::prelude::*;
 use pyo3::prelude::*;
@@ -19,41 +20,37 @@ impl PySeries {
         })
     }
 
-    /// For numeric types, this should only be called for Series with null types.
+    /// For numeric types, this should only be called for Series with null values.
     /// Non-nullable types are handled with `view()`.
     /// This will cast to floats so that `None = np.nan`.
     fn to_numpy(&self, py: Python) -> PyResult<PyObject> {
         use DataType::*;
         let s = &self.series;
-        match s.dtype() {
-            Int32 | UInt32 | Int64 | UInt64 | Float64 => {
-                let s = s.cast(&DataType::Float64).unwrap();
-                let ca = s.f64().unwrap();
-                let np_arr =
-                    PyArray1::from_iter(py, ca.iter().map(|opt_v| opt_v.unwrap_or(f64::NAN)));
-                Ok(np_arr.into_py(py))
-            },
-            Int8 | UInt8 | Int16 | UInt16 | Float32 => {
-                let s = s.cast(&DataType::Float32).unwrap();
-                let ca = s.f32().unwrap();
-                let np_arr =
-                    PyArray1::from_iter(py, ca.iter().map(|opt_v| opt_v.unwrap_or(f32::NAN)));
-                Ok(np_arr.into_py(py))
-            },
+        let out = match s.dtype() {
+            Int8 => numeric_series_to_numpy::<Int8Type, f32>(py, s),
+            Int16 => numeric_series_to_numpy::<Int16Type, f32>(py, s),
+            Int32 => numeric_series_to_numpy::<Int32Type, f64>(py, s),
+            Int64 => numeric_series_to_numpy::<Int64Type, f64>(py, s),
+            UInt8 => numeric_series_to_numpy::<UInt8Type, f32>(py, s),
+            UInt16 => numeric_series_to_numpy::<UInt16Type, f32>(py, s),
+            UInt32 => numeric_series_to_numpy::<UInt32Type, f64>(py, s),
+            UInt64 => numeric_series_to_numpy::<UInt64Type, f64>(py, s),
+            Float32 => numeric_series_to_numpy::<Float32Type, f32>(py, s),
+            Float64 => numeric_series_to_numpy::<Float64Type, f64>(py, s),
             Boolean => {
                 let ca = s.bool().unwrap();
                 let np_arr = PyArray1::from_iter(py, ca.into_iter().map(|s| s.into_py(py)));
-                Ok(np_arr.into_py(py))
+                np_arr.into_py(py)
             },
             String => {
                 let ca = s.str().unwrap();
                 let np_arr = PyArray1::from_iter(py, ca.into_iter().map(|s| s.into_py(py)));
-                Ok(np_arr.into_py(py))
+                np_arr.into_py(py)
             },
             Binary => {
                 let ca = s.binary().unwrap();
                 let np_arr = PyArray1::from_iter(py, ca.into_iter().map(|s| s.into_py(py)));
-                Ok(np_arr.into_py(py))
+                np_arr.into_py(py)
             },
             #[cfg(feature = "object")]
             Object(_, _) => {
@@ -63,12 +60,12 @@ impl PySeries {
                     .unwrap();
                 let np_arr =
                     PyArray1::from_iter(py, ca.into_iter().map(|opt_v| opt_v.to_object(py)));
-                Ok(np_arr.into_py(py))
+                np_arr.into_py(py)
             },
             Null => {
                 let n = s.len();
                 let np_arr = PyArray1::from_iter(py, std::iter::repeat(f32::NAN).take(n));
-                Ok(np_arr.into_py(py))
+                np_arr.into_py(py)
             },
             dt => {
                 raise_err!(
@@ -76,7 +73,8 @@ impl PySeries {
                     ComputeError
                 );
             },
-        }
+        };
+        Ok(out)
     }
 
     pub fn to_list(&self) -> PyObject {
@@ -214,3 +212,17 @@ impl PySeries {
         })
     }
 }
+
+fn numeric_series_to_numpy<T, U>(py: Python, s: &Series) -> PyObject
+where
+    T: PolarsNumericType,
+    U: Float + numpy::Element,
+{
+    let ca: &ChunkedArray<T> = s.as_ref().as_ref();
+    let mapper = |opt_v: Option<T::Native>| match opt_v {
+        Some(v) => NumCast::from(v).unwrap(),
+        None => U::nan(),
+    };
+    let np_arr = PyArray1::from_iter(py, ca.iter().map(mapper));
+    np_arr.into_py(py)
+}

From 772bfddf08bad82967207d8474bacc1467319fba Mon Sep 17 00:00:00 2001
From: Stijn de Gooijer <stijndegooijer@gmail.com>
Date: Sun, 4 Feb 2024 02:11:48 +0100
Subject: [PATCH 3/3] Drive-by cleanup docs

---
 py-polars/polars/series/series.py |  35 ++++----
 py-polars/src/series/export.rs    | 140 ++++++++++++++++--------------
 2 files changed, 93 insertions(+), 82 deletions(-)

diff --git a/py-polars/polars/series/series.py b/py-polars/polars/series/series.py
index cb75302e826b..7f71fa9a52cb 100644
--- a/py-polars/polars/series/series.py
+++ b/py-polars/polars/series/series.py
@@ -4145,12 +4145,18 @@ def to_physical(self) -> Series:
 
     def to_list(self, *, use_pyarrow: bool | None = None) -> list[Any]:
         """
-        Convert this Series to a Python List. This operation clones data.
+        Convert this Series to a Python list.
+
+        This operation copies data.
 
         Parameters
         ----------
         use_pyarrow
-            Use pyarrow for the conversion.
+            Use PyArrow to perform the conversion.
+
+            .. deprecated:: 0.19.9
+                This parameter will be removed. The function can safely be called
+                without the parameter - it should give the exact same result.
 
         Examples
         --------
@@ -4283,32 +4289,31 @@ def to_numpy(
         use_pyarrow: bool = True,
     ) -> np.ndarray[Any, Any]:
         """
-        Convert this Series to numpy.
+        Convert this Series to a NumPy ndarray.
 
-        This operation may clone data but is completely safe. Note that:
+        This operation may copy data, but is completely safe. Note that:
 
-        - data which is purely numeric AND without null values is not cloned;
-        - floating point `nan` values can be zero-copied;
-        - booleans can't be zero-copied.
+        - Data which is purely numeric AND without null values is not cloned
+        - Floating point `nan` values can be zero-copied
+        - Booleans cannot be zero-copied
 
-        To ensure that no data is cloned, set `zero_copy_only=True`.
+        To ensure that no data is copied, set `zero_copy_only=True`.
 
         Parameters
         ----------
         zero_copy_only
-            If True, an exception will be raised if the conversion to a numpy
-            array would require copying the underlying data (e.g. in presence
-            of nulls, or for non-primitive types).
+            Raise an exception if the conversion to a NumPy would require copying
+            the underlying data. Data copy occurs, for example, when the Series contains
+            nulls or non-numeric types.
         writable
-            For numpy arrays created with zero copy (view on the Arrow data),
+            For NumPy arrays created with zero copy (view on the Arrow data),
             the resulting array is not writable (Arrow data is immutable).
             By setting this to True, a copy of the array is made to ensure
             it is writable.
         use_pyarrow
             Use `pyarrow.Array.to_numpy
             <https://arrow.apache.org/docs/python/generated/pyarrow.Array.html#pyarrow.Array.to_numpy>`_
-
-            for the conversion to numpy.
+            for the conversion to NumPy.
 
         Examples
         --------
@@ -4417,7 +4422,7 @@ def _view(self, *, ignore_nulls: bool = False) -> SeriesView:
 
     def to_arrow(self) -> pa.Array:
         """
-        Get the underlying Arrow Array.
+        Return the underlying Arrow array.
 
         If the Series contains only a single chunk this operation is zero copy.
 
diff --git a/py-polars/src/series/export.rs b/py-polars/src/series/export.rs
index de366285deab..b0e08ee99140 100644
--- a/py-polars/src/series/export.rs
+++ b/py-polars/src/series/export.rs
@@ -10,73 +10,8 @@ use crate::{arrow_interop, raise_err, PySeries};
 
 #[pymethods]
 impl PySeries {
-    #[allow(clippy::wrong_self_convention)]
-    fn to_arrow(&mut self) -> PyResult<PyObject> {
-        self.rechunk(true);
-        Python::with_gil(|py| {
-            let pyarrow = py.import("pyarrow")?;
-
-            arrow_interop::to_py::to_py_array(self.series.to_arrow(0, false), py, pyarrow)
-        })
-    }
-
-    /// For numeric types, this should only be called for Series with null values.
-    /// Non-nullable types are handled with `view()`.
-    /// This will cast to floats so that `None = np.nan`.
-    fn to_numpy(&self, py: Python) -> PyResult<PyObject> {
-        use DataType::*;
-        let s = &self.series;
-        let out = match s.dtype() {
-            Int8 => numeric_series_to_numpy::<Int8Type, f32>(py, s),
-            Int16 => numeric_series_to_numpy::<Int16Type, f32>(py, s),
-            Int32 => numeric_series_to_numpy::<Int32Type, f64>(py, s),
-            Int64 => numeric_series_to_numpy::<Int64Type, f64>(py, s),
-            UInt8 => numeric_series_to_numpy::<UInt8Type, f32>(py, s),
-            UInt16 => numeric_series_to_numpy::<UInt16Type, f32>(py, s),
-            UInt32 => numeric_series_to_numpy::<UInt32Type, f64>(py, s),
-            UInt64 => numeric_series_to_numpy::<UInt64Type, f64>(py, s),
-            Float32 => numeric_series_to_numpy::<Float32Type, f32>(py, s),
-            Float64 => numeric_series_to_numpy::<Float64Type, f64>(py, s),
-            Boolean => {
-                let ca = s.bool().unwrap();
-                let np_arr = PyArray1::from_iter(py, ca.into_iter().map(|s| s.into_py(py)));
-                np_arr.into_py(py)
-            },
-            String => {
-                let ca = s.str().unwrap();
-                let np_arr = PyArray1::from_iter(py, ca.into_iter().map(|s| s.into_py(py)));
-                np_arr.into_py(py)
-            },
-            Binary => {
-                let ca = s.binary().unwrap();
-                let np_arr = PyArray1::from_iter(py, ca.into_iter().map(|s| s.into_py(py)));
-                np_arr.into_py(py)
-            },
-            #[cfg(feature = "object")]
-            Object(_, _) => {
-                let ca = s
-                    .as_any()
-                    .downcast_ref::<ObjectChunked<ObjectValue>>()
-                    .unwrap();
-                let np_arr =
-                    PyArray1::from_iter(py, ca.into_iter().map(|opt_v| opt_v.to_object(py)));
-                np_arr.into_py(py)
-            },
-            Null => {
-                let n = s.len();
-                let np_arr = PyArray1::from_iter(py, std::iter::repeat(f32::NAN).take(n));
-                np_arr.into_py(py)
-            },
-            dt => {
-                raise_err!(
-                    format!("'to_numpy' not supported for dtype: {dt:?}"),
-                    ComputeError
-                );
-            },
-        };
-        Ok(out)
-    }
-
+    /// Convert this Series to a Python list.
+    /// This operation copies data.
     pub fn to_list(&self) -> PyObject {
         Python::with_gil(|py| {
             let series = &self.series;
@@ -211,6 +146,77 @@ impl PySeries {
             pylist.to_object(py)
         })
     }
+
+    /// Return the underlying Arrow array.
+    #[allow(clippy::wrong_self_convention)]
+    fn to_arrow(&mut self) -> PyResult<PyObject> {
+        self.rechunk(true);
+        Python::with_gil(|py| {
+            let pyarrow = py.import("pyarrow")?;
+
+            arrow_interop::to_py::to_py_array(self.series.to_arrow(0, false), py, pyarrow)
+        })
+    }
+
+    /// Convert this Series to a NumPy ndarray.
+    ///
+    /// This method will copy data - numeric types without null values should
+    /// be handled on the Python side in a zero-copy manner.
+    ///
+    /// This method will cast integers to floats so that `null = np.nan`.
+    fn to_numpy(&self, py: Python) -> PyResult<PyObject> {
+        use DataType::*;
+        let s = &self.series;
+        let out = match s.dtype() {
+            Int8 => numeric_series_to_numpy::<Int8Type, f32>(py, s),
+            Int16 => numeric_series_to_numpy::<Int16Type, f32>(py, s),
+            Int32 => numeric_series_to_numpy::<Int32Type, f64>(py, s),
+            Int64 => numeric_series_to_numpy::<Int64Type, f64>(py, s),
+            UInt8 => numeric_series_to_numpy::<UInt8Type, f32>(py, s),
+            UInt16 => numeric_series_to_numpy::<UInt16Type, f32>(py, s),
+            UInt32 => numeric_series_to_numpy::<UInt32Type, f64>(py, s),
+            UInt64 => numeric_series_to_numpy::<UInt64Type, f64>(py, s),
+            Float32 => numeric_series_to_numpy::<Float32Type, f32>(py, s),
+            Float64 => numeric_series_to_numpy::<Float64Type, f64>(py, s),
+            Boolean => {
+                let ca = s.bool().unwrap();
+                let np_arr = PyArray1::from_iter(py, ca.into_iter().map(|s| s.into_py(py)));
+                np_arr.into_py(py)
+            },
+            String => {
+                let ca = s.str().unwrap();
+                let np_arr = PyArray1::from_iter(py, ca.into_iter().map(|s| s.into_py(py)));
+                np_arr.into_py(py)
+            },
+            Binary => {
+                let ca = s.binary().unwrap();
+                let np_arr = PyArray1::from_iter(py, ca.into_iter().map(|s| s.into_py(py)));
+                np_arr.into_py(py)
+            },
+            #[cfg(feature = "object")]
+            Object(_, _) => {
+                let ca = s
+                    .as_any()
+                    .downcast_ref::<ObjectChunked<ObjectValue>>()
+                    .unwrap();
+                let np_arr =
+                    PyArray1::from_iter(py, ca.into_iter().map(|opt_v| opt_v.to_object(py)));
+                np_arr.into_py(py)
+            },
+            Null => {
+                let n = s.len();
+                let np_arr = PyArray1::from_iter(py, std::iter::repeat(f32::NAN).take(n));
+                np_arr.into_py(py)
+            },
+            dt => {
+                raise_err!(
+                    format!("`to_numpy` not supported for dtype {dt:?}"),
+                    ComputeError
+                );
+            },
+        };
+        Ok(out)
+    }
 }
 
 fn numeric_series_to_numpy<T, U>(py: Python, s: &Series) -> PyObject