Skip to content

Commit

Permalink
Drive-by cleanup docs
Browse files Browse the repository at this point in the history
  • Loading branch information
stinodego committed Feb 4, 2024
1 parent 269b4bb commit 772bfdd
Show file tree
Hide file tree
Showing 2 changed files with 93 additions and 82 deletions.
35 changes: 20 additions & 15 deletions py-polars/polars/series/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -4145,12 +4145,18 @@ def to_physical(self) -> Series:

def to_list(self, *, use_pyarrow: bool | None = None) -> list[Any]:
"""
Convert this Series to a Python List. This operation clones data.
Convert this Series to a Python list.
This operation copies data.
Parameters
----------
use_pyarrow
Use pyarrow for the conversion.
Use PyArrow to perform the conversion.
.. deprecated:: 0.19.9
This parameter will be removed. The function can safely be called
without the parameter - it should give the exact same result.
Examples
--------
Expand Down Expand Up @@ -4283,32 +4289,31 @@ def to_numpy(
use_pyarrow: bool = True,
) -> np.ndarray[Any, Any]:
"""
Convert this Series to numpy.
Convert this Series to a NumPy ndarray.
This operation may clone data but is completely safe. Note that:
This operation may copy data, but is completely safe. Note that:
- data which is purely numeric AND without null values is not cloned;
- floating point `nan` values can be zero-copied;
- booleans can't be zero-copied.
- Data which is purely numeric AND without null values is not cloned
- Floating point `nan` values can be zero-copied
- Booleans cannot be zero-copied
To ensure that no data is cloned, set `zero_copy_only=True`.
To ensure that no data is copied, set `zero_copy_only=True`.
Parameters
----------
zero_copy_only
If True, an exception will be raised if the conversion to a numpy
array would require copying the underlying data (e.g. in presence
of nulls, or for non-primitive types).
Raise an exception if the conversion to a NumPy would require copying
the underlying data. Data copy occurs, for example, when the Series contains
nulls or non-numeric types.
writable
For numpy arrays created with zero copy (view on the Arrow data),
For NumPy arrays created with zero copy (view on the Arrow data),
the resulting array is not writable (Arrow data is immutable).
By setting this to True, a copy of the array is made to ensure
it is writable.
use_pyarrow
Use `pyarrow.Array.to_numpy
<https://arrow.apache.org/docs/python/generated/pyarrow.Array.html#pyarrow.Array.to_numpy>`_
for the conversion to numpy.
for the conversion to NumPy.
Examples
--------
Expand Down Expand Up @@ -4417,7 +4422,7 @@ def _view(self, *, ignore_nulls: bool = False) -> SeriesView:

def to_arrow(self) -> pa.Array:
"""
Get the underlying Arrow Array.
Return the underlying Arrow array.
If the Series contains only a single chunk this operation is zero copy.
Expand Down
140 changes: 73 additions & 67 deletions py-polars/src/series/export.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,73 +10,8 @@ use crate::{arrow_interop, raise_err, PySeries};

#[pymethods]
impl PySeries {
#[allow(clippy::wrong_self_convention)]
fn to_arrow(&mut self) -> PyResult<PyObject> {
self.rechunk(true);
Python::with_gil(|py| {
let pyarrow = py.import("pyarrow")?;

arrow_interop::to_py::to_py_array(self.series.to_arrow(0, false), py, pyarrow)
})
}

/// For numeric types, this should only be called for Series with null values.
/// Non-nullable types are handled with `view()`.
/// This will cast to floats so that `None = np.nan`.
fn to_numpy(&self, py: Python) -> PyResult<PyObject> {
use DataType::*;
let s = &self.series;
let out = match s.dtype() {
Int8 => numeric_series_to_numpy::<Int8Type, f32>(py, s),
Int16 => numeric_series_to_numpy::<Int16Type, f32>(py, s),
Int32 => numeric_series_to_numpy::<Int32Type, f64>(py, s),
Int64 => numeric_series_to_numpy::<Int64Type, f64>(py, s),
UInt8 => numeric_series_to_numpy::<UInt8Type, f32>(py, s),
UInt16 => numeric_series_to_numpy::<UInt16Type, f32>(py, s),
UInt32 => numeric_series_to_numpy::<UInt32Type, f64>(py, s),
UInt64 => numeric_series_to_numpy::<UInt64Type, f64>(py, s),
Float32 => numeric_series_to_numpy::<Float32Type, f32>(py, s),
Float64 => numeric_series_to_numpy::<Float64Type, f64>(py, s),
Boolean => {
let ca = s.bool().unwrap();
let np_arr = PyArray1::from_iter(py, ca.into_iter().map(|s| s.into_py(py)));
np_arr.into_py(py)
},
String => {
let ca = s.str().unwrap();
let np_arr = PyArray1::from_iter(py, ca.into_iter().map(|s| s.into_py(py)));
np_arr.into_py(py)
},
Binary => {
let ca = s.binary().unwrap();
let np_arr = PyArray1::from_iter(py, ca.into_iter().map(|s| s.into_py(py)));
np_arr.into_py(py)
},
#[cfg(feature = "object")]
Object(_, _) => {
let ca = s
.as_any()
.downcast_ref::<ObjectChunked<ObjectValue>>()
.unwrap();
let np_arr =
PyArray1::from_iter(py, ca.into_iter().map(|opt_v| opt_v.to_object(py)));
np_arr.into_py(py)
},
Null => {
let n = s.len();
let np_arr = PyArray1::from_iter(py, std::iter::repeat(f32::NAN).take(n));
np_arr.into_py(py)
},
dt => {
raise_err!(
format!("'to_numpy' not supported for dtype: {dt:?}"),
ComputeError
);
},
};
Ok(out)
}

/// Convert this Series to a Python list.
/// This operation copies data.
pub fn to_list(&self) -> PyObject {
Python::with_gil(|py| {
let series = &self.series;
Expand Down Expand Up @@ -211,6 +146,77 @@ impl PySeries {
pylist.to_object(py)
})
}

/// Return the underlying Arrow array.
#[allow(clippy::wrong_self_convention)]
fn to_arrow(&mut self) -> PyResult<PyObject> {
self.rechunk(true);
Python::with_gil(|py| {
let pyarrow = py.import("pyarrow")?;

arrow_interop::to_py::to_py_array(self.series.to_arrow(0, false), py, pyarrow)
})
}

/// Convert this Series to a NumPy ndarray.
///
/// This method will copy data - numeric types without null values should
/// be handled on the Python side in a zero-copy manner.
///
/// This method will cast integers to floats so that `null = np.nan`.
fn to_numpy(&self, py: Python) -> PyResult<PyObject> {
use DataType::*;
let s = &self.series;
let out = match s.dtype() {
Int8 => numeric_series_to_numpy::<Int8Type, f32>(py, s),
Int16 => numeric_series_to_numpy::<Int16Type, f32>(py, s),
Int32 => numeric_series_to_numpy::<Int32Type, f64>(py, s),
Int64 => numeric_series_to_numpy::<Int64Type, f64>(py, s),
UInt8 => numeric_series_to_numpy::<UInt8Type, f32>(py, s),
UInt16 => numeric_series_to_numpy::<UInt16Type, f32>(py, s),
UInt32 => numeric_series_to_numpy::<UInt32Type, f64>(py, s),
UInt64 => numeric_series_to_numpy::<UInt64Type, f64>(py, s),
Float32 => numeric_series_to_numpy::<Float32Type, f32>(py, s),
Float64 => numeric_series_to_numpy::<Float64Type, f64>(py, s),
Boolean => {
let ca = s.bool().unwrap();
let np_arr = PyArray1::from_iter(py, ca.into_iter().map(|s| s.into_py(py)));
np_arr.into_py(py)
},
String => {
let ca = s.str().unwrap();
let np_arr = PyArray1::from_iter(py, ca.into_iter().map(|s| s.into_py(py)));
np_arr.into_py(py)
},
Binary => {
let ca = s.binary().unwrap();
let np_arr = PyArray1::from_iter(py, ca.into_iter().map(|s| s.into_py(py)));
np_arr.into_py(py)
},
#[cfg(feature = "object")]
Object(_, _) => {
let ca = s
.as_any()
.downcast_ref::<ObjectChunked<ObjectValue>>()
.unwrap();
let np_arr =
PyArray1::from_iter(py, ca.into_iter().map(|opt_v| opt_v.to_object(py)));
np_arr.into_py(py)
},
Null => {
let n = s.len();
let np_arr = PyArray1::from_iter(py, std::iter::repeat(f32::NAN).take(n));
np_arr.into_py(py)
},
dt => {
raise_err!(
format!("`to_numpy` not supported for dtype {dt:?}"),
ComputeError
);
},
};
Ok(out)
}
}

fn numeric_series_to_numpy<T, U>(py: Python, s: &Series) -> PyObject
Expand Down

0 comments on commit 772bfdd

Please sign in to comment.