From 85b39a74fae737a6eb236f6ecdf8f50c6cd5bbb9 Mon Sep 17 00:00:00 2001 From: Stijn de Gooijer Date: Sun, 4 Feb 2024 02:11:48 +0100 Subject: [PATCH] Drive-by cleanup docs --- py-polars/polars/series/series.py | 29 ++++--- py-polars/src/series/export.rs | 140 ++++++++++++++++-------------- 2 files changed, 90 insertions(+), 79 deletions(-) diff --git a/py-polars/polars/series/series.py b/py-polars/polars/series/series.py index cb75302e826b7..3ba5f70cbe5cb 100644 --- a/py-polars/polars/series/series.py +++ b/py-polars/polars/series/series.py @@ -4145,12 +4145,18 @@ def to_physical(self) -> Series: def to_list(self, *, use_pyarrow: bool | None = None) -> list[Any]: """ - Convert this Series to a Python List. This operation clones data. + Convert this Series to a Python list. + + This operation copies data. Parameters ---------- use_pyarrow - Use pyarrow for the conversion. + Use PyArrow to perform the conversion. + + .. deprecated:: 0.19.9 + This parameter will be removed. The function can safely be called + without the parameter - it should give the exact same result. Examples -------- @@ -4283,21 +4289,21 @@ def to_numpy( use_pyarrow: bool = True, ) -> np.ndarray[Any, Any]: """ - Convert this Series to numpy. + Convert this Series to a NumPy ndarray. - This operation may clone data but is completely safe. Note that: + This operation may copy data, but is completely safe. Note that: - - data which is purely numeric AND without null values is not cloned; - - floating point `nan` values can be zero-copied; - - booleans can't be zero-copied. + - Data which is purely numeric AND without null values is not cloned + - Floating point `nan` values can be zero-copied + - Booleans cannot be zero-copied - To ensure that no data is cloned, set `zero_copy_only=True`. + To ensure that no data is copied, set `zero_copy_only=True`. Parameters ---------- zero_copy_only - If True, an exception will be raised if the conversion to a numpy - array would require copying the underlying data (e.g. in presence + Raise an exception if the conversion to a NumPy would require copying + the underlying data. (e.g. in presence of nulls, or for non-primitive types). writable For numpy arrays created with zero copy (view on the Arrow data), @@ -4307,7 +4313,6 @@ def to_numpy( use_pyarrow Use `pyarrow.Array.to_numpy `_ - for the conversion to numpy. Examples @@ -4417,7 +4422,7 @@ def _view(self, *, ignore_nulls: bool = False) -> SeriesView: def to_arrow(self) -> pa.Array: """ - Get the underlying Arrow Array. + Return the underlying Arrow array. If the Series contains only a single chunk this operation is zero copy. diff --git a/py-polars/src/series/export.rs b/py-polars/src/series/export.rs index de366285deabf..b0e08ee99140e 100644 --- a/py-polars/src/series/export.rs +++ b/py-polars/src/series/export.rs @@ -10,73 +10,8 @@ use crate::{arrow_interop, raise_err, PySeries}; #[pymethods] impl PySeries { - #[allow(clippy::wrong_self_convention)] - fn to_arrow(&mut self) -> PyResult { - self.rechunk(true); - Python::with_gil(|py| { - let pyarrow = py.import("pyarrow")?; - - arrow_interop::to_py::to_py_array(self.series.to_arrow(0, false), py, pyarrow) - }) - } - - /// For numeric types, this should only be called for Series with null values. - /// Non-nullable types are handled with `view()`. - /// This will cast to floats so that `None = np.nan`. - fn to_numpy(&self, py: Python) -> PyResult { - use DataType::*; - let s = &self.series; - let out = match s.dtype() { - Int8 => numeric_series_to_numpy::(py, s), - Int16 => numeric_series_to_numpy::(py, s), - Int32 => numeric_series_to_numpy::(py, s), - Int64 => numeric_series_to_numpy::(py, s), - UInt8 => numeric_series_to_numpy::(py, s), - UInt16 => numeric_series_to_numpy::(py, s), - UInt32 => numeric_series_to_numpy::(py, s), - UInt64 => numeric_series_to_numpy::(py, s), - Float32 => numeric_series_to_numpy::(py, s), - Float64 => numeric_series_to_numpy::(py, s), - Boolean => { - let ca = s.bool().unwrap(); - let np_arr = PyArray1::from_iter(py, ca.into_iter().map(|s| s.into_py(py))); - np_arr.into_py(py) - }, - String => { - let ca = s.str().unwrap(); - let np_arr = PyArray1::from_iter(py, ca.into_iter().map(|s| s.into_py(py))); - np_arr.into_py(py) - }, - Binary => { - let ca = s.binary().unwrap(); - let np_arr = PyArray1::from_iter(py, ca.into_iter().map(|s| s.into_py(py))); - np_arr.into_py(py) - }, - #[cfg(feature = "object")] - Object(_, _) => { - let ca = s - .as_any() - .downcast_ref::>() - .unwrap(); - let np_arr = - PyArray1::from_iter(py, ca.into_iter().map(|opt_v| opt_v.to_object(py))); - np_arr.into_py(py) - }, - Null => { - let n = s.len(); - let np_arr = PyArray1::from_iter(py, std::iter::repeat(f32::NAN).take(n)); - np_arr.into_py(py) - }, - dt => { - raise_err!( - format!("'to_numpy' not supported for dtype: {dt:?}"), - ComputeError - ); - }, - }; - Ok(out) - } - + /// Convert this Series to a Python list. + /// This operation copies data. pub fn to_list(&self) -> PyObject { Python::with_gil(|py| { let series = &self.series; @@ -211,6 +146,77 @@ impl PySeries { pylist.to_object(py) }) } + + /// Return the underlying Arrow array. + #[allow(clippy::wrong_self_convention)] + fn to_arrow(&mut self) -> PyResult { + self.rechunk(true); + Python::with_gil(|py| { + let pyarrow = py.import("pyarrow")?; + + arrow_interop::to_py::to_py_array(self.series.to_arrow(0, false), py, pyarrow) + }) + } + + /// Convert this Series to a NumPy ndarray. + /// + /// This method will copy data - numeric types without null values should + /// be handled on the Python side in a zero-copy manner. + /// + /// This method will cast integers to floats so that `null = np.nan`. + fn to_numpy(&self, py: Python) -> PyResult { + use DataType::*; + let s = &self.series; + let out = match s.dtype() { + Int8 => numeric_series_to_numpy::(py, s), + Int16 => numeric_series_to_numpy::(py, s), + Int32 => numeric_series_to_numpy::(py, s), + Int64 => numeric_series_to_numpy::(py, s), + UInt8 => numeric_series_to_numpy::(py, s), + UInt16 => numeric_series_to_numpy::(py, s), + UInt32 => numeric_series_to_numpy::(py, s), + UInt64 => numeric_series_to_numpy::(py, s), + Float32 => numeric_series_to_numpy::(py, s), + Float64 => numeric_series_to_numpy::(py, s), + Boolean => { + let ca = s.bool().unwrap(); + let np_arr = PyArray1::from_iter(py, ca.into_iter().map(|s| s.into_py(py))); + np_arr.into_py(py) + }, + String => { + let ca = s.str().unwrap(); + let np_arr = PyArray1::from_iter(py, ca.into_iter().map(|s| s.into_py(py))); + np_arr.into_py(py) + }, + Binary => { + let ca = s.binary().unwrap(); + let np_arr = PyArray1::from_iter(py, ca.into_iter().map(|s| s.into_py(py))); + np_arr.into_py(py) + }, + #[cfg(feature = "object")] + Object(_, _) => { + let ca = s + .as_any() + .downcast_ref::>() + .unwrap(); + let np_arr = + PyArray1::from_iter(py, ca.into_iter().map(|opt_v| opt_v.to_object(py))); + np_arr.into_py(py) + }, + Null => { + let n = s.len(); + let np_arr = PyArray1::from_iter(py, std::iter::repeat(f32::NAN).take(n)); + np_arr.into_py(py) + }, + dt => { + raise_err!( + format!("`to_numpy` not supported for dtype {dt:?}"), + ComputeError + ); + }, + }; + Ok(out) + } } fn numeric_series_to_numpy(py: Python, s: &Series) -> PyObject