Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improved numpy ffi for boolean arrays #540

Merged
merged 2 commits into from
Feb 29, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions python/core/python/geoarrow/rust/core/_rust.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -459,11 +459,13 @@ class RectArray:
def total_bounds(self) -> Tuple[float, float, float, float]: ...

class BooleanArray:
def __array__(self) -> NDArray[np.bool_]: ...
def __eq__(self, other: Self) -> bool: ...
def __len__(self) -> int: ...
def __arrow_c_array__(
self, requested_schema: object | None = None
) -> Tuple[object, object]: ...
def to_numpy(self) -> NDArray[np.bool_]: ...

# class Float16Array:
# def __array__(self) -> NDArray[np.float16]: ...
Expand Down Expand Up @@ -899,6 +901,7 @@ class ChunkedRectArray:
def total_bounds(self) -> Tuple[float, float, float, float]: ...

class ChunkedBooleanArray:
def __array__(self) -> NDArray[np.bool_]: ...
def __arrow_c_stream__(self, requested_schema: object | None = None) -> object: ...
def __eq__(self, other: Self) -> bool: ...
def __len__(self) -> int: ...
Expand All @@ -907,6 +910,7 @@ class ChunkedBooleanArray:
@classmethod
def from_arrow_arrays(cls, input: Sequence[ArrowArrayExportable]) -> Self: ...
def num_chunks(self) -> int: ...
def to_numpy(self) -> NDArray[np.bool_]: ...

# class ChunkedFloat16Array:
# def __array__(self) -> NDArray[np.float16]: ...
Expand Down
45 changes: 45 additions & 0 deletions python/core/src/interop/numpy/to_numpy.rs
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,25 @@ impl_array!(Int16Array);
impl_array!(Int32Array);
impl_array!(Int64Array);

#[pymethods]
impl BooleanArray {
pub fn __array__<'py>(&'py self, py: Python<'py>) -> PyResult<&'py PyAny> {
if self.0.null_count() > 0 {
return Err(PyValueError::new_err(
"Cannot create numpy array from pyarrow array with nulls.",
));
}

let bools = self.0.values().iter().collect::<Vec<_>>();
Ok(bools.to_pyarray(py))
}

/// Copy this array to a `numpy` NDArray
pub fn to_numpy<'py>(&'py self, py: Python<'py>) -> PyResult<&'py PyAny> {
self.__array__(py)
}
}

macro_rules! impl_chunked {
($struct_name:ty) => {
#[pymethods]
Expand All @@ -67,6 +86,7 @@ macro_rules! impl_chunked {
let numpy_mod = py.import(intern!(py, "numpy"))?;
numpy_mod.call_method1(intern!(py, "concatenate"), (py_arrays,))
}

/// Copy this array to a `numpy` NDArray
pub fn to_numpy<'py>(&'py self, py: Python<'py>) -> PyResult<&'py PyAny> {
self.__array__(py)
Expand All @@ -88,6 +108,31 @@ impl_chunked!(ChunkedInt16Array);
impl_chunked!(ChunkedInt32Array);
impl_chunked!(ChunkedInt64Array);

#[pymethods]
impl ChunkedBooleanArray {
pub fn __array__<'py>(&'py self, py: Python<'py>) -> PyResult<&'py PyAny> {
if self.0.null_count() > 0 {
return Err(PyValueError::new_err(
"Cannot create numpy array from pyarrow array with nulls.",
));
}

let np_chunks = self
.0
.chunks()
.iter()
.map(|chunk| Ok(BooleanArray(chunk.clone()).__array__(py)?.to_object(py)))
.collect::<PyResult<Vec<_>>>()?;

let numpy_mod = py.import(intern!(py, "numpy"))?;
numpy_mod.call_method1(intern!(py, "concatenate"), (np_chunks,))
}

/// Copy this array to a `numpy` NDArray
pub fn to_numpy<'py>(&'py self, py: Python<'py>) -> PyResult<&'py PyAny> {
self.__array__(py)
}
}
#[pymethods]
impl WKBArray {
/// An implementation of the Array interface, for interoperability with numpy and other
Expand Down
6 changes: 6 additions & 0 deletions src/chunked_array/chunked_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,12 @@ impl<A: Array> ChunkedArray<A> {
self.chunks.first().unwrap().data_type()
}

pub fn null_count(&self) -> usize {
self.chunks()
.iter()
.fold(0, |acc, chunk| acc + chunk.null_count())
}

pub fn chunks(&self) -> &[A] {
self.chunks.as_slice()
}
Expand Down
Loading