Skip to content

Commit

Permalink
Array construction from arbitrary python values (#75)
Browse files Browse the repository at this point in the history
* Array construction from arbitrary python values

* construction of arbitrary array
  • Loading branch information
kylebarron authored Jul 29, 2024
1 parent c0f68c4 commit 9578330
Show file tree
Hide file tree
Showing 3 changed files with 83 additions and 8 deletions.
9 changes: 8 additions & 1 deletion arro3-core/python/arro3/core/_core.pyi
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from typing import Sequence
from typing import Any, Sequence
import numpy as np
from numpy.typing import NDArray

Expand All @@ -9,6 +9,13 @@ from .types import (
)

class Array:
def __init__(self, obj: Sequence[Any], /, type: ArrowSchemaExportable) -> None:
"""Create arro3.core.Array instance from a sequence of Python objects.
Args:
obj: A sequence of input objects.
type: Explicit type to attempt to coerce to.
"""
def __array__(self) -> NDArray: ...
def __arrow_c_array__(
self, requested_schema: object | None = None
Expand Down
74 changes: 72 additions & 2 deletions pyo3-arrow/src/array.rs
Original file line number Diff line number Diff line change
@@ -1,9 +1,17 @@
use std::fmt::Display;
use std::sync::Arc;

use arrow_array::{make_array, Array, ArrayRef};
use arrow_schema::{ArrowError, Field, FieldRef};
use arrow::datatypes::{
Float32Type, Float64Type, Int16Type, Int32Type, Int64Type, Int8Type, UInt16Type, UInt32Type,
UInt64Type, UInt8Type,
};
use arrow_array::{
make_array, Array, ArrayRef, BinaryArray, BinaryViewArray, BooleanArray, LargeBinaryArray,
LargeStringArray, PrimitiveArray, StringArray, StringViewArray,
};
use arrow_schema::{ArrowError, DataType, Field, FieldRef};
use numpy::PyUntypedArray;
use pyo3::exceptions::PyNotImplementedError;
use pyo3::intern;
use pyo3::prelude::*;
use pyo3::types::{PyCapsule, PyTuple, PyType};
Expand Down Expand Up @@ -123,6 +131,68 @@ impl Display for PyArray {

#[pymethods]
impl PyArray {
#[new]
#[pyo3(signature = (obj, /, r#type, *))]
pub fn init(py: Python, obj: PyObject, r#type: PyDataType) -> PyResult<Self> {
macro_rules! impl_primitive {
($rust_type:ty, $arrow_type:ty) => {{
let values: Vec<$rust_type> = obj.extract(py)?;
Arc::new(PrimitiveArray::<$arrow_type>::from(values))
}};
}

let data_type = r#type.into_inner();
let array: ArrayRef = match data_type {
DataType::Float32 => impl_primitive!(f32, Float32Type),
DataType::Float64 => impl_primitive!(f64, Float64Type),
DataType::UInt8 => impl_primitive!(u8, UInt8Type),
DataType::UInt16 => impl_primitive!(u16, UInt16Type),
DataType::UInt32 => impl_primitive!(u32, UInt32Type),
DataType::UInt64 => impl_primitive!(u64, UInt64Type),
DataType::Int8 => impl_primitive!(i8, Int8Type),
DataType::Int16 => impl_primitive!(i16, Int16Type),
DataType::Int32 => impl_primitive!(i32, Int32Type),
DataType::Int64 => impl_primitive!(i64, Int64Type),
DataType::Boolean => {
let values: Vec<bool> = obj.extract(py)?;
Arc::new(BooleanArray::from(values))
}
DataType::Binary => {
let values: Vec<Vec<u8>> = obj.extract(py)?;
let slices = values.iter().map(|x| x.as_slice()).collect::<Vec<_>>();
Arc::new(BinaryArray::from(slices))
}
DataType::LargeBinary => {
let values: Vec<Vec<u8>> = obj.extract(py)?;
let slices = values.iter().map(|x| x.as_slice()).collect::<Vec<_>>();
Arc::new(LargeBinaryArray::from(slices))
}
DataType::BinaryView => {
let values: Vec<Vec<u8>> = obj.extract(py)?;
let slices = values.iter().map(|x| x.as_slice()).collect::<Vec<_>>();
Arc::new(BinaryViewArray::from(slices))
}
DataType::Utf8 => {
let values: Vec<String> = obj.extract(py)?;
Arc::new(StringArray::from(values))
}
DataType::LargeUtf8 => {
let values: Vec<String> = obj.extract(py)?;
Arc::new(LargeStringArray::from(values))
}
DataType::Utf8View => {
let values: Vec<String> = obj.extract(py)?;
Arc::new(StringViewArray::from(values))
}
dt => {
return Err(PyNotImplementedError::new_err(format!(
"Array constructor for {dt} not yet implemented."
)))
}
};
Ok(Self::new(array, Field::new("", data_type, true).into()))
}

/// An implementation of the Array interface, for interoperability with numpy and other
/// array libraries.
pub fn __array__(&self, py: Python) -> PyResult<PyObject> {
Expand Down
8 changes: 3 additions & 5 deletions pyo3-arrow/src/interop/numpy/from_numpy.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@ use arrow::datatypes::{
UInt64Type, UInt8Type,
};
use arrow_array::{ArrayRef, BooleanArray, PrimitiveArray};
use arrow_buffer::BooleanBuffer;
use arrow_schema::DataType;
use numpy::{PyArray1, PyUntypedArray};
use pyo3::exceptions::PyValueError;
Expand All @@ -14,8 +13,8 @@ use crate::error::PyArrowResult;

pub fn from_numpy(array: &PyUntypedArray, arrow_data_type: DataType) -> PyArrowResult<ArrayRef> {
macro_rules! numpy_to_arrow {
($dtype:ty, $arrow_type:ty) => {{
let arr = array.downcast::<PyArray1<$dtype>>()?;
($rust_type:ty, $arrow_type:ty) => {{
let arr = array.downcast::<PyArray1<$rust_type>>()?;
Ok(Arc::new(PrimitiveArray::<$arrow_type>::from(
arr.to_owned_array().to_vec(),
)))
Expand All @@ -36,8 +35,7 @@ pub fn from_numpy(array: &PyUntypedArray, arrow_data_type: DataType) -> PyArrowR
DataType::Int64 => numpy_to_arrow!(i64, Int64Type),
DataType::Boolean => {
let arr = array.downcast::<PyArray1<bool>>()?;
let buffer = BooleanBuffer::from(arr.to_owned_array().to_vec());
Ok(Arc::new(BooleanArray::new(buffer, None)))
Ok(Arc::new(BooleanArray::from(arr.to_owned_array().to_vec())))
}
_ => {
Err(PyValueError::new_err(format!("Unsupported data type {}", arrow_data_type)).into())
Expand Down

0 comments on commit 9578330

Please sign in to comment.