Skip to content

Commit

Permalink
From numpy (#68)
Browse files Browse the repository at this point in the history
* From numpy

* fix lint

* remove unwrap

* cleanup
  • Loading branch information
kylebarron authored Jul 26, 2024
1 parent 6bcd421 commit 264f20f
Show file tree
Hide file tree
Showing 12 changed files with 204 additions and 3 deletions.
3 changes: 3 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 3 additions & 0 deletions arro3-core/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -17,5 +17,8 @@ name = "_rust"
crate-type = ["cdylib"]

[dependencies]
arrow-array = { workspace = true }
arrow-buffer = { workspace = true }
arrow-schema = { workspace = true }
pyo3-arrow = { path = "../pyo3-arrow" }
pyo3 = { workspace = true, features = ["abi3-py38"] }
5 changes: 5 additions & 0 deletions arro3-core/python/arro3/core/_rust.pyi
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from typing import Sequence
import numpy as np
from numpy.typing import NDArray

from .types import (
Expand Down Expand Up @@ -34,6 +35,10 @@ class Array:
def from_arrow_pycapsule(cls, schema_capsule, array_capsule) -> Array:
"""Construct this object from bare Arrow PyCapsules"""

@classmethod
def from_numpy(cls, array: np.ndarray, type: ArrowSchemaExportable) -> Array:
"""Construct an Array from a numpy ndarray"""

def to_numpy(self) -> NDArray:
"""Return a numpy copy of this array."""

Expand Down
102 changes: 102 additions & 0 deletions arro3-core/src/constructors.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
use std::sync::Arc;

use arrow_array::cast::AsArray;
use arrow_array::types::{Int32Type, Int64Type};
use arrow_array::{Array, ArrayRef, FixedSizeListArray, LargeListArray, ListArray, StructArray};
use arrow_buffer::OffsetBuffer;
use arrow_schema::{DataType, Field};
use pyo3::exceptions::PyValueError;
use pyo3::prelude::*;

use pyo3_arrow::error::PyArrowResult;
use pyo3_arrow::{PyArray, PyField};

#[pyfunction]
#[pyo3(signature=(values, list_size, *, r#type=None))]
#[allow(dead_code)]
fn fixed_size_list_array(
py: Python,
values: PyArray,
list_size: i32,
r#type: Option<PyField>,
) -> PyArrowResult<PyObject> {
let (values_array, values_field) = values.into_inner();
let field = r#type.map(|f| f.into_inner()).unwrap_or_else(|| {
Arc::new(Field::new_fixed_size_list(
"",
values_field,
list_size,
true,
))
});

let array = FixedSizeListArray::try_new(field.clone(), list_size, values_array, None)?;
Ok(PyArray::new(Arc::new(array), field).to_arro3(py)?)
}

#[pyfunction]
#[pyo3(signature=(offsets, values, *, r#type=None))]
#[allow(dead_code)]
fn list_array(
py: Python,
offsets: PyArray,
values: PyArray,
r#type: Option<PyField>,
) -> PyArrowResult<PyObject> {
let (values_array, values_field) = values.into_inner();
let (offsets_array, _) = offsets.into_inner();
let large_offsets = match offsets_array.data_type() {
DataType::Int32 => false,
DataType::Int64 => true,
_ => {
return Err(
PyValueError::new_err("Expected offsets to have int32 or int64 type").into(),
)
}
};
let field = r#type.map(|f| f.into_inner()).unwrap_or_else(|| {
if large_offsets {
Arc::new(Field::new_large_list("item", values_field, true))
} else {
Arc::new(Field::new_list("item", values_field, true))
}
});

let list_array: ArrayRef = if large_offsets {
Arc::new(LargeListArray::try_new(
field.clone(),
OffsetBuffer::new(offsets_array.as_primitive::<Int64Type>().values().clone()),
values_array,
None,
)?)
} else {
Arc::new(ListArray::try_new(
field.clone(),
OffsetBuffer::new(offsets_array.as_primitive::<Int32Type>().values().clone()),
values_array,
None,
)?)
};
Ok(PyArray::new(Arc::new(list_array), field).to_arro3(py)?)
}

#[pyfunction]
#[pyo3(signature=(arrays, *, fields))]
#[allow(dead_code)]
fn struct_array(py: Python, arrays: Vec<PyArray>, fields: Vec<PyField>) -> PyArrowResult<PyObject> {
let arrays = arrays
.into_iter()
.map(|arr| {
let (arr, _field) = arr.into_inner();
arr
})
.collect::<Vec<_>>();
let fields = fields
.into_iter()
.map(|field| field.into_inner())
.collect::<Vec<_>>();

let array = StructArray::try_new(fields.clone().into(), arrays, None)?;
let field = Field::new_struct("", fields, true);
Ok(PyArray::new(Arc::new(array), field.into()).to_arro3(py)?)
}
2 changes: 2 additions & 0 deletions arro3-core/src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
use pyo3::prelude::*;

mod constructors;

const VERSION: &str = env!("CARGO_PKG_VERSION");

#[pyfunction]
Expand Down
18 changes: 17 additions & 1 deletion pyo3-arrow/src/array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ use std::sync::Arc;

use arrow_array::{make_array, Array, ArrayRef};
use arrow_schema::{ArrowError, Field, FieldRef};
use numpy::PyUntypedArray;
use pyo3::intern;
use pyo3::prelude::*;
use pyo3::types::{PyCapsule, PyTuple, PyType};
Expand All @@ -11,6 +12,7 @@ use crate::error::PyArrowResult;
use crate::ffi::from_python::utils::import_array_pycapsules;
use crate::ffi::to_array_pycapsules;
use crate::ffi::to_python::nanoarrow::to_nanoarrow_array;
use crate::interop::numpy::from_numpy::from_numpy;
use crate::interop::numpy::to_numpy::to_numpy;
use crate::PyDataType;

Expand Down Expand Up @@ -157,7 +159,6 @@ impl PyArray {
input.extract()
}

/// Construct this object from a bare Arrow PyCapsule
#[classmethod]
pub fn from_arrow_pycapsule(
_cls: &Bound<PyType>,
Expand All @@ -168,6 +169,21 @@ impl PyArray {
Ok(Self::new(array, Arc::new(field)))
}

#[classmethod]
pub fn from_numpy(
_cls: &Bound<PyType>,
array: Bound<'_, PyAny>,
r#type: PyDataType,
) -> PyArrowResult<Self> {
let mut numpy_array = array;
if numpy_array.hasattr("__array__")? {
numpy_array = numpy_array.call_method0("__array__")?;
};
let numpy_array: &PyUntypedArray = FromPyObject::extract_bound(&numpy_array)?;
let arrow_array = from_numpy(numpy_array, r#type.into_inner())?;
Ok(Self::from_array_ref(arrow_array))
}

#[pyo3(signature = (offset=0, length=None))]
pub fn slice(&self, py: Python, offset: usize, length: Option<usize>) -> PyResult<PyObject> {
let length = length.unwrap_or_else(|| self.array.len() - offset);
Expand Down
2 changes: 1 addition & 1 deletion pyo3-arrow/src/datatypes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ impl<'a> FromPyObject<'a> for PyTimeUnit {
}
}

#[derive(Clone, PartialEq, Eq, Debug)]
#[derive(PartialEq, Eq, Debug)]
#[pyclass(module = "arro3.core._rust", name = "DataType", subclass)]
pub struct PyDataType(DataType);

Expand Down
10 changes: 10 additions & 0 deletions pyo3-arrow/src/error.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
use pyo3::exceptions::{PyException, PyTypeError, PyValueError};
use pyo3::prelude::*;
use pyo3::PyDowncastError;

pub enum PyArrowError {
ArrowError(arrow::error::ArrowError),
Expand Down Expand Up @@ -27,6 +28,15 @@ impl From<PyTypeError> for PyArrowError {
}
}

impl<'a> From<PyDowncastError<'a>> for PyArrowError {
fn from(other: PyDowncastError<'a>) -> Self {
Self::PyErr(PyValueError::new_err(format!(
"Could not downcast: {}",
other
)))
}
}

impl From<PyValueError> for PyArrowError {
fn from(other: PyValueError) -> Self {
Self::PyErr((&other).into())
Expand Down
12 changes: 12 additions & 0 deletions pyo3-arrow/src/ffi/from_python/datatypes.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
use crate::ffi::from_python::utils::call_arrow_c_schema;
use crate::field::PyField;
use crate::PyDataType;
use pyo3::prelude::*;
use pyo3::{PyAny, PyResult};

impl<'a> FromPyObject<'a> for PyDataType {
fn extract_bound(ob: &Bound<'a, PyAny>) -> PyResult<Self> {
let capsule = call_arrow_c_schema(ob)?;
Python::with_gil(|py| Self::from_arrow_pycapsule(&py.get_type_bound::<PyField>(), &capsule))
}
}
1 change: 1 addition & 0 deletions pyo3-arrow/src/ffi/from_python/mod.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
mod array;
mod array_reader;
mod chunked;
mod datatypes;
pub(crate) mod ffi_stream;
mod field;
mod input;
Expand Down
46 changes: 46 additions & 0 deletions pyo3-arrow/src/interop/numpy/from_numpy.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
use std::sync::Arc;

use arrow::datatypes::{
Float32Type, Float64Type, Int16Type, Int32Type, Int64Type, Int8Type, UInt16Type, UInt32Type,
UInt64Type, UInt8Type,
};
use arrow_array::{ArrayRef, BooleanArray, PrimitiveArray};
use arrow_buffer::BooleanBuffer;
use arrow_schema::DataType;
use numpy::{PyArray1, PyUntypedArray};
use pyo3::exceptions::PyValueError;

use crate::error::PyArrowResult;

pub fn from_numpy(array: &PyUntypedArray, arrow_data_type: DataType) -> PyArrowResult<ArrayRef> {
macro_rules! numpy_to_arrow {
($dtype:ty, $arrow_type:ty) => {{
let arr = array.downcast::<PyArray1<$dtype>>()?;
Ok(Arc::new(PrimitiveArray::<$arrow_type>::from(
arr.to_owned_array().to_vec(),
)))
}};
}

match arrow_data_type {
// DataType::Float16 => numpy_to_arrow!(f16, Float16Type),
DataType::Float32 => numpy_to_arrow!(f32, Float32Type),
DataType::Float64 => numpy_to_arrow!(f64, Float64Type),
DataType::UInt8 => numpy_to_arrow!(u8, UInt8Type),
DataType::UInt16 => numpy_to_arrow!(u16, UInt16Type),
DataType::UInt32 => numpy_to_arrow!(u32, UInt32Type),
DataType::UInt64 => numpy_to_arrow!(u64, UInt64Type),
DataType::Int8 => numpy_to_arrow!(i8, Int8Type),
DataType::Int16 => numpy_to_arrow!(i16, Int16Type),
DataType::Int32 => numpy_to_arrow!(i32, Int32Type),
DataType::Int64 => numpy_to_arrow!(i64, Int64Type),
DataType::Boolean => {
let arr = array.downcast::<PyArray1<bool>>()?;
let buffer = BooleanBuffer::from(arr.to_owned_array().to_vec());
Ok(Arc::new(BooleanArray::new(buffer, None)))
}
_ => {
Err(PyValueError::new_err(format!("Unsupported data type {}", arrow_data_type)).into())
}
}
}
3 changes: 2 additions & 1 deletion pyo3-arrow/src/interop/numpy/mod.rs
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
pub mod to_numpy;
pub(crate) mod from_numpy;
pub(crate) mod to_numpy;

0 comments on commit 264f20f

Please sign in to comment.