Skip to content

Commit

Permalink
Function to access list offsets (#87)
Browse files Browse the repository at this point in the history
  • Loading branch information
kylebarron authored Jul 31, 2024
1 parent 562fa9d commit bd71cf8
Show file tree
Hide file tree
Showing 3 changed files with 91 additions and 0 deletions.
22 changes: 22 additions & 0 deletions arro3-compute/python/arro3/compute/_compute.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,28 @@ def list_flatten(
_description_
"""

@overload
def list_offsets(input: ArrowArrayExportable, *, physical: bool = True) -> Array: ...
@overload
def list_offsets(
input: ArrowStreamExportable, *, physical: bool = True
) -> ArrayReader: ...
def list_offsets(
input: ArrowArrayExportable | ArrowStreamExportable, *, physical: bool = True
) -> Array | ArrayReader:
"""Access the offsets of this ListArray or LargeListArray
Args:
input: _description_
physical: If True, return the physical (unsliced) offsets of the provided list array. Slicing offsets (False) is not yet implemented.
Raises:
Exception if not a list-typed array.
Returns:
_description_
"""

def struct_field(
values: ArrowArrayExportable,
/,
Expand Down
2 changes: 2 additions & 0 deletions arro3-compute/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ use pyo3::prelude::*;
mod cast;
mod concat;
mod list_flatten;
mod list_offsets;
mod struct_field;
mod take;

Expand All @@ -22,6 +23,7 @@ fn _compute(_py: Python, m: &Bound<PyModule>) -> PyResult<()> {
m.add_wrapped(wrap_pyfunction!(take::take))?;

m.add_wrapped(wrap_pyfunction!(list_flatten::list_flatten))?;
m.add_wrapped(wrap_pyfunction!(list_offsets::list_offsets))?;
m.add_wrapped(wrap_pyfunction!(struct_field::struct_field))?;

Ok(())
Expand Down
67 changes: 67 additions & 0 deletions arro3-compute/src/list_offsets.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
use std::sync::Arc;

use arrow::array::AsArray;
use arrow_array::{ArrayRef, Int32Array, Int64Array};
use arrow_schema::{ArrowError, DataType, Field};
use pyo3::prelude::*;
use pyo3_arrow::error::PyArrowResult;
use pyo3_arrow::ffi::ArrayIterator;
use pyo3_arrow::input::AnyArray;
use pyo3_arrow::{PyArray, PyArrayReader};

#[pyfunction]
#[pyo3(signature = (input, *, physical=true))]
pub fn list_offsets(py: Python, input: AnyArray, physical: bool) -> PyArrowResult<PyObject> {
match input {
AnyArray::Array(array) => {
let (array, _field) = array.into_inner();
let offsets = _list_offsets(array, physical)?;
Ok(PyArray::from_array_ref(offsets).to_arro3(py)?)
}
AnyArray::Stream(stream) => {
let reader = stream.into_reader()?;
let out_field = match reader.field().data_type() {
DataType::List(_) => Field::new("", DataType::Int32, false),
DataType::LargeList(_) => Field::new("", DataType::Int64, false),
_ => {
return Err(
ArrowError::SchemaError("Expected list-typed Array".to_string()).into(),
);
}
};

let iter = reader.into_iter().map(move |array| {
let out = _list_offsets(array?, physical)?;
Ok(out)
});
Ok(
PyArrayReader::new(Box::new(ArrayIterator::new(iter, out_field.into())))
.to_arro3(py)?,
)
}
}
}

fn _list_offsets(array: ArrayRef, physical: bool) -> Result<ArrayRef, ArrowError> {
if !physical {
return Err(ArrowError::ComputeError(
"Logical list offset slicing not yet implemented".to_string(),
));
}

match array.data_type() {
DataType::List(_) => {
let arr = array.as_list::<i32>();
let offsets = arr.offsets();
Ok(Arc::new(Int32Array::from(offsets.to_vec())))
}
DataType::LargeList(_) => {
let arr = array.as_list::<i64>();
let offsets = arr.offsets();
Ok(Arc::new(Int64Array::from(offsets.to_vec())))
}
_ => Err(ArrowError::SchemaError(
"Expected list-typed Array".to_string(),
)),
}
}

0 comments on commit bd71cf8

Please sign in to comment.