From e22e38f45e2ab5fc7e806a446f74da4e51a359dd Mon Sep 17 00:00:00 2001 From: Kyle Barron Date: Mon, 29 Jul 2024 14:57:39 -0400 Subject: [PATCH 1/2] Array construction from arbitrary python values --- pyo3-arrow/src/array.rs | 78 +++++++++++++++++++++- pyo3-arrow/src/interop/numpy/from_numpy.rs | 8 +-- 2 files changed, 79 insertions(+), 7 deletions(-) diff --git a/pyo3-arrow/src/array.rs b/pyo3-arrow/src/array.rs index 3807269..23a6e5d 100644 --- a/pyo3-arrow/src/array.rs +++ b/pyo3-arrow/src/array.rs @@ -1,9 +1,17 @@ use std::fmt::Display; use std::sync::Arc; -use arrow_array::{make_array, Array, ArrayRef}; -use arrow_schema::{ArrowError, Field, FieldRef}; +use arrow::datatypes::{ + Float32Type, Float64Type, Int16Type, Int32Type, Int64Type, Int8Type, UInt16Type, UInt32Type, + UInt64Type, UInt8Type, +}; +use arrow_array::{ + make_array, Array, ArrayRef, BinaryArray, BinaryViewArray, BooleanArray, LargeBinaryArray, + LargeStringArray, PrimitiveArray, StringArray, StringViewArray, +}; +use arrow_schema::{ArrowError, DataType, Field, FieldRef}; use numpy::PyUntypedArray; +use pyo3::exceptions::PyNotImplementedError; use pyo3::intern; use pyo3::prelude::*; use pyo3::types::{PyCapsule, PyTuple, PyType}; @@ -120,6 +128,72 @@ impl Display for PyArray { #[pymethods] impl PyArray { + #[new] + #[pyo3(signature = (obj, /, r#type, *))] + pub fn init(py: Python, obj: PyObject, r#type: PyDataType) -> PyResult { + if obj.bind(py).hasattr("__arrow_c_array__")? { + return Self::from_arrow(&py.get_type_bound::(), obj.bind(py)); + } + + macro_rules! impl_primitive { + ($rust_type:ty, $arrow_type:ty) => {{ + let values: Vec<$rust_type> = obj.extract(py)?; + Arc::new(PrimitiveArray::<$arrow_type>::from(values)) + }}; + } + + let data_type = r#type.into_inner(); + let array: ArrayRef = match data_type { + DataType::Float32 => impl_primitive!(f32, Float32Type), + DataType::Float64 => impl_primitive!(f64, Float64Type), + DataType::UInt8 => impl_primitive!(u8, UInt8Type), + DataType::UInt16 => impl_primitive!(u16, UInt16Type), + DataType::UInt32 => impl_primitive!(u32, UInt32Type), + DataType::UInt64 => impl_primitive!(u64, UInt64Type), + DataType::Int8 => impl_primitive!(i8, Int8Type), + DataType::Int16 => impl_primitive!(i16, Int16Type), + DataType::Int32 => impl_primitive!(i32, Int32Type), + DataType::Int64 => impl_primitive!(i64, Int64Type), + DataType::Boolean => { + let values: Vec = obj.extract(py)?; + Arc::new(BooleanArray::from(values)) + } + DataType::Binary => { + let values: Vec> = obj.extract(py)?; + let slices = values.iter().map(|x| x.as_slice()).collect::>(); + Arc::new(BinaryArray::from(slices)) + } + DataType::LargeBinary => { + let values: Vec> = obj.extract(py)?; + let slices = values.iter().map(|x| x.as_slice()).collect::>(); + Arc::new(LargeBinaryArray::from(slices)) + } + DataType::BinaryView => { + let values: Vec> = obj.extract(py)?; + let slices = values.iter().map(|x| x.as_slice()).collect::>(); + Arc::new(BinaryViewArray::from(slices)) + } + DataType::Utf8 => { + let values: Vec = obj.extract(py)?; + Arc::new(StringArray::from(values)) + } + DataType::LargeUtf8 => { + let values: Vec = obj.extract(py)?; + Arc::new(LargeStringArray::from(values)) + } + DataType::Utf8View => { + let values: Vec = obj.extract(py)?; + Arc::new(StringViewArray::from(values)) + } + dt => { + return Err(PyNotImplementedError::new_err(format!( + "Array constructor for {dt} not yet implemented." + ))) + } + }; + Ok(Self::new(array, Field::new("", data_type, true).into())) + } + /// An implementation of the Array interface, for interoperability with numpy and other /// array libraries. pub fn __array__(&self, py: Python) -> PyResult { diff --git a/pyo3-arrow/src/interop/numpy/from_numpy.rs b/pyo3-arrow/src/interop/numpy/from_numpy.rs index 54222c8..b4ae525 100644 --- a/pyo3-arrow/src/interop/numpy/from_numpy.rs +++ b/pyo3-arrow/src/interop/numpy/from_numpy.rs @@ -5,7 +5,6 @@ use arrow::datatypes::{ UInt64Type, UInt8Type, }; use arrow_array::{ArrayRef, BooleanArray, PrimitiveArray}; -use arrow_buffer::BooleanBuffer; use arrow_schema::DataType; use numpy::{PyArray1, PyUntypedArray}; use pyo3::exceptions::PyValueError; @@ -14,8 +13,8 @@ use crate::error::PyArrowResult; pub fn from_numpy(array: &PyUntypedArray, arrow_data_type: DataType) -> PyArrowResult { macro_rules! numpy_to_arrow { - ($dtype:ty, $arrow_type:ty) => {{ - let arr = array.downcast::>()?; + ($rust_type:ty, $arrow_type:ty) => {{ + let arr = array.downcast::>()?; Ok(Arc::new(PrimitiveArray::<$arrow_type>::from( arr.to_owned_array().to_vec(), ))) @@ -36,8 +35,7 @@ pub fn from_numpy(array: &PyUntypedArray, arrow_data_type: DataType) -> PyArrowR DataType::Int64 => numpy_to_arrow!(i64, Int64Type), DataType::Boolean => { let arr = array.downcast::>()?; - let buffer = BooleanBuffer::from(arr.to_owned_array().to_vec()); - Ok(Arc::new(BooleanArray::new(buffer, None))) + Ok(Arc::new(BooleanArray::from(arr.to_owned_array().to_vec()))) } _ => { Err(PyValueError::new_err(format!("Unsupported data type {}", arrow_data_type)).into()) From 54a2c58748cc332747bf62fec2ed05d2afdabe41 Mon Sep 17 00:00:00 2001 From: Kyle Barron Date: Mon, 29 Jul 2024 15:09:30 -0400 Subject: [PATCH 2/2] construction of arbitrary array --- arro3-core/python/arro3/core/_rust.pyi | 9 ++++++++- pyo3-arrow/src/array.rs | 4 ---- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/arro3-core/python/arro3/core/_rust.pyi b/arro3-core/python/arro3/core/_rust.pyi index 8dacd0b..6eb78b2 100644 --- a/arro3-core/python/arro3/core/_rust.pyi +++ b/arro3-core/python/arro3/core/_rust.pyi @@ -1,4 +1,4 @@ -from typing import Sequence +from typing import Any, Sequence import numpy as np from numpy.typing import NDArray @@ -9,6 +9,13 @@ from .types import ( ) class Array: + def __init__(self, obj: Sequence[Any], /, type: ArrowSchemaExportable) -> None: + """Create arro3.core.Array instance from a sequence of Python objects. + + Args: + obj: A sequence of input objects. + type: Explicit type to attempt to coerce to. + """ def __array__(self) -> NDArray: ... def __arrow_c_array__( self, requested_schema: object | None = None diff --git a/pyo3-arrow/src/array.rs b/pyo3-arrow/src/array.rs index 23a6e5d..bd8c278 100644 --- a/pyo3-arrow/src/array.rs +++ b/pyo3-arrow/src/array.rs @@ -131,10 +131,6 @@ impl PyArray { #[new] #[pyo3(signature = (obj, /, r#type, *))] pub fn init(py: Python, obj: PyObject, r#type: PyDataType) -> PyResult { - if obj.bind(py).hasattr("__arrow_c_array__")? { - return Self::from_arrow(&py.get_type_bound::(), obj.bind(py)); - } - macro_rules! impl_primitive { ($rust_type:ty, $arrow_type:ty) => {{ let values: Vec<$rust_type> = obj.extract(py)?;