Skip to content

Commit

Permalink
Avoid additional copy for integers
Browse files Browse the repository at this point in the history
  • Loading branch information
stinodego committed Feb 4, 2024
1 parent d66edaf commit c1463d7
Show file tree
Hide file tree
Showing 3 changed files with 36 additions and 22 deletions.
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions py-polars/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ either = { workspace = true }
itoa = { workspace = true }
libc = "0.2"
ndarray = { workspace = true }
num-traits = { workspace = true }
numpy = { version = "0.20", default-features = false }
once_cell = { workspace = true }
pyo3 = { workspace = true, features = ["abi3-py38", "extension-module", "multiple-pymethods"] }
Expand Down
56 changes: 34 additions & 22 deletions py-polars/src/series/export.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
use num_traits::{Float, NumCast};
use numpy::PyArray1;
use polars_core::prelude::*;
use pyo3::prelude::*;
Expand All @@ -19,41 +20,37 @@ impl PySeries {
})
}

/// For numeric types, this should only be called for Series with null types.
/// For numeric types, this should only be called for Series with null values.
/// Non-nullable types are handled with `view()`.
/// This will cast to floats so that `None = np.nan`.
fn to_numpy(&self, py: Python) -> PyResult<PyObject> {
use DataType::*;
let s = &self.series;
match s.dtype() {
Int32 | UInt32 | Int64 | UInt64 | Float64 => {
let s = s.cast(&DataType::Float64).unwrap();
let ca = s.f64().unwrap();
let np_arr =
PyArray1::from_iter(py, ca.iter().map(|opt_v| opt_v.unwrap_or(f64::NAN)));
Ok(np_arr.into_py(py))
},
Int8 | UInt8 | Int16 | UInt16 | Float32 => {
let s = s.cast(&DataType::Float32).unwrap();
let ca = s.f32().unwrap();
let np_arr =
PyArray1::from_iter(py, ca.iter().map(|opt_v| opt_v.unwrap_or(f32::NAN)));
Ok(np_arr.into_py(py))
},
let out = match s.dtype() {
Int8 => numeric_series_to_numpy::<Int8Type, f32>(py, s),
Int16 => numeric_series_to_numpy::<Int16Type, f32>(py, s),
Int32 => numeric_series_to_numpy::<Int32Type, f64>(py, s),
Int64 => numeric_series_to_numpy::<Int64Type, f64>(py, s),
UInt8 => numeric_series_to_numpy::<UInt8Type, f32>(py, s),
UInt16 => numeric_series_to_numpy::<UInt16Type, f32>(py, s),
UInt32 => numeric_series_to_numpy::<UInt32Type, f64>(py, s),
UInt64 => numeric_series_to_numpy::<UInt64Type, f64>(py, s),
Float32 => numeric_series_to_numpy::<Float32Type, f32>(py, s),
Float64 => numeric_series_to_numpy::<Float64Type, f64>(py, s),
Boolean => {
let ca = s.bool().unwrap();
let np_arr = PyArray1::from_iter(py, ca.into_iter().map(|s| s.into_py(py)));
Ok(np_arr.into_py(py))
np_arr.into_py(py)
},
String => {
let ca = s.str().unwrap();
let np_arr = PyArray1::from_iter(py, ca.into_iter().map(|s| s.into_py(py)));
Ok(np_arr.into_py(py))
np_arr.into_py(py)
},
Binary => {
let ca = s.binary().unwrap();
let np_arr = PyArray1::from_iter(py, ca.into_iter().map(|s| s.into_py(py)));
Ok(np_arr.into_py(py))
np_arr.into_py(py)
},
#[cfg(feature = "object")]
Object(_, _) => {
Expand All @@ -63,20 +60,21 @@ impl PySeries {
.unwrap();
let np_arr =
PyArray1::from_iter(py, ca.into_iter().map(|opt_v| opt_v.to_object(py)));
Ok(np_arr.into_py(py))
np_arr.into_py(py)
},
Null => {
let n = s.len();
let np_arr = PyArray1::from_iter(py, std::iter::repeat(f32::NAN).take(n));
Ok(np_arr.into_py(py))
np_arr.into_py(py)
},
dt => {
raise_err!(
format!("'to_numpy' not supported for dtype: {dt:?}"),
ComputeError
);
},
}
};
Ok(out)
}

pub fn to_list(&self) -> PyObject {
Expand Down Expand Up @@ -214,3 +212,17 @@ impl PySeries {
})
}
}

fn numeric_series_to_numpy<T, U>(py: Python, s: &Series) -> PyObject
where
T: PolarsNumericType,
U: Float + numpy::Element,
{
let ca: &ChunkedArray<T> = s.as_ref().as_ref();
let mapper = |opt_v: Option<T::Native>| match opt_v {
Some(v) => NumCast::from(v).unwrap(),
None => U::nan(),
};
let np_arr = PyArray1::from_iter(py, ca.iter().map(mapper));
np_arr.into_py(py)
}

0 comments on commit c1463d7

Please sign in to comment.