Skip to content

Commit

Permalink
fixed utf8 and fixedsize binary conversion from arrow to numpy
Browse files Browse the repository at this point in the history
  • Loading branch information
«ratal» committed Jan 19, 2024
1 parent 9fc6447 commit f645c8c
Showing 1 changed file with 19 additions and 6 deletions.
25 changes: 19 additions & 6 deletions src/export/numpy.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
//! this module provides methods to get directly channelData into python
use arrow2::array::{Array, BinaryArray, PrimitiveArray, Utf8Array};
use arrow2::array::{Array, BinaryArray, FixedSizeBinaryArray, PrimitiveArray, Utf8Array};
use arrow2::bitmap::Bitmap;
use arrow2::datatypes::{DataType, PhysicalType, PrimitiveType};

Expand Down Expand Up @@ -155,12 +155,17 @@ pub fn arrow_to_numpy(py: Python, array: Box<dyn Array>) -> PyObject {
.expect("could not downcast binary array to bytes vect");
array.values().to_pyarray(py).into_py(py)
}
DataType::FixedSizeBinary(_) => {
DataType::FixedSizeBinary(size) => {
let array = array
.as_any()
.downcast_ref::<BinaryArray<i64>>()
.downcast_ref::<FixedSizeBinaryArray>()
.expect("could not downcast large binary to bytes vect");
array.values().to_pyarray(py).into_py(py)
array
.values()
.to_pyarray(py)
.reshape([array.len() / size, *size])
.expect("failed reshaping the fixedsizebinaryarray")
.into_py(py)
}
DataType::LargeBinary => {
let array = array
Expand All @@ -174,14 +179,22 @@ pub fn arrow_to_numpy(py: Python, array: Box<dyn Array>) -> PyObject {
.as_any()
.downcast_ref::<Utf8Array<i32>>()
.expect("could not downcast to utf8 array");
array.values().to_pyarray(py).into_py(py)
let mut vect_str = Vec::<PyObject>::with_capacity(array.len());
array
.values_iter()
.for_each(|x| vect_str.push(x.to_object(py)));
vect_str.to_pyarray(py).into_py(py)
}
DataType::LargeUtf8 => {
let array = array
.as_any()
.downcast_ref::<Utf8Array<i64>>()
.expect("could not downcast to long utf8 array");
array.values().to_pyarray(py).into_py(py)
let mut vect_str = Vec::<PyObject>::with_capacity(array.len());
array
.values_iter()
.for_each(|x| vect_str.push(x.to_object(py)));
vect_str.to_pyarray(py).into_py(py)
}
DataType::FixedSizeList(field, _size) => match field.data_type.to_physical_type() {
// Complex types
Expand Down

0 comments on commit f645c8c

Please sign in to comment.