Skip to content

Commit

Permalink
[Images] [4/N] Add Image series --> Python (NumPy) series casting. (#…
Browse files Browse the repository at this point in the history
…990)

This PR adds an `Image` series --> Python egress path via the casting
kernel, where elements in an `ImageArray` series are converted to NumPy
ndarrays when hit with a `.cast(DataType::Python)`. This is added for
both the fixed-shape image and variable-shape image arrays.

Automatic conversion of PIL images on `Series.from_pylist()` and to
NumPy ndarrays on `Series.to_pylist()` is saved for a future PR.
  • Loading branch information
clarkzinzow authored Jun 5, 2023
1 parent 74e3ca5 commit 8cc6e69
Show file tree
Hide file tree
Showing 5 changed files with 160 additions and 72 deletions.
9 changes: 7 additions & 2 deletions src/array/ops/as_arrow.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,11 @@ use crate::{
},
};

#[cfg(feature = "python")]
use crate::array::pseudo_arrow::PseudoArrowArray;
#[cfg(feature = "python")]
use crate::datatypes::PythonArray;

pub trait AsArrow {
type Output;

Expand Down Expand Up @@ -92,8 +97,8 @@ impl AsArrow for StructArray {
}

#[cfg(feature = "python")]
impl AsArrow for crate::datatypes::PythonArray {
type Output = crate::array::pseudo_arrow::PseudoArrowArray<pyo3::PyObject>;
impl AsArrow for PythonArray {
type Output = PseudoArrowArray<pyo3::PyObject>;

// downcasts a DataArray<T> to a PseudoArrowArray of PyObject.
fn as_arrow(&self) -> &Self::Output {
Expand Down
12 changes: 8 additions & 4 deletions src/array/ops/broadcast.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,11 @@ use crate::{

use super::as_arrow::AsArrow;

#[cfg(feature = "python")]
use crate::array::pseudo_arrow::PseudoArrowArray;
#[cfg(feature = "python")]
use crate::datatypes::PythonArray;

pub trait Broadcastable {
fn broadcast(&self, num: usize) -> DaftResult<Self>
where
Expand Down Expand Up @@ -292,9 +297,8 @@ impl Broadcastable for crate::datatypes::PythonArray {
}
};

let repeated_values_array: Box<dyn arrow2::array::Array> = Box::new(
crate::array::pseudo_arrow::PseudoArrowArray::new(repeated_values.into(), validity),
);
crate::datatypes::PythonArray::new(self.field.clone(), repeated_values_array)
let repeated_values_array: Box<dyn arrow2::array::Array> =
Box::new(PseudoArrowArray::new(repeated_values.into(), validity));
PythonArray::new(self.field.clone(), repeated_values_array)
}
}
103 changes: 93 additions & 10 deletions src/array/ops/cast.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,32 +5,40 @@ use arrow2::compute::{

use crate::series::IntoSeries;
use crate::{
array::{ops::image::ImageArrayVecs, DataArray},
array::DataArray,
datatypes::logical::{
DateArray, EmbeddingArray, FixedShapeImageArray, ImageArray, LogicalArray,
},
datatypes::{DaftArrowBackedType, DataType, Field, Utf8Array},
error::{DaftError, DaftResult},
series::Series,
with_match_arrow_daft_types, with_match_daft_logical_types, with_match_numeric_daft_types,
with_match_arrow_daft_types, with_match_daft_logical_types,
};
use arrow2::array::Array;
use num_traits::NumCast;

#[cfg(feature = "python")]
use crate::datatypes::{FixedSizeListArray, ListArray};
use crate::array::{ops::image::ImageArrayVecs, pseudo_arrow::PseudoArrowArray};
#[cfg(feature = "python")]
use crate::datatypes::{FixedSizeListArray, ImageMode, ListArray, PythonArray};
#[cfg(feature = "python")]
use crate::ffi;
#[cfg(feature = "python")]
use crate::datatypes::{ImageMode, PythonArray};
use crate::with_match_numeric_daft_types;
#[cfg(feature = "python")]
use arrow2::array::Array;
#[cfg(feature = "python")]
use log;
#[cfg(feature = "python")]
use num_traits::ToPrimitive;
use ndarray::IntoDimension;
#[cfg(feature = "python")]
use num_traits::{NumCast, ToPrimitive};
#[cfg(feature = "python")]
use numpy::PyReadonlyArrayDyn;
use numpy::{PyArray3, PyReadonlyArrayDyn};
#[cfg(feature = "python")]
use pyo3::prelude::*;
#[cfg(feature = "python")]
use std::iter;
#[cfg(feature = "python")]
use std::ops::Deref;

use super::as_arrow::AsArrow;
use std::sync::Arc;
Expand Down Expand Up @@ -701,12 +709,87 @@ impl EmbeddingArray {

impl ImageArray {
pub fn cast(&self, dtype: &DataType) -> DaftResult<Series> {
self.physical.cast(dtype)
match dtype {
#[cfg(feature = "python")]
DataType::Python => Python::with_gil(|py| {
let mut ndarrays = Vec::with_capacity(self.len());
let da = self.data_array();
let ca = self.channel_array();
let ha = self.height_array();
let wa = self.width_array();
let pyarrow = py.import("pyarrow")?;
for (i, arrow_array) in da.iter().enumerate() {
let shape = (
ha.value(i) as usize,
wa.value(i) as usize,
ca.value(i) as usize,
);
let py_array = match arrow_array {
Some(arrow_array) => ffi::to_py_array(arrow_array, py, pyarrow)?
.call_method1(py, pyo3::intern!(py, "to_numpy"), (false,))?
.call_method1(py, pyo3::intern!(py, "reshape"), (shape,))?,
None => PyArray3::<u8>::zeros(py, shape.into_dimension(), false)
.deref()
.to_object(py),
};
ndarrays.push(py_array);
}
let values_array =
PseudoArrowArray::new(ndarrays.into(), self.as_arrow().validity().cloned());
Ok(PythonArray::new(
Field::new(self.name(), dtype.clone()).into(),
values_array.to_boxed(),
)?
.into_series())
}),
_ => self.physical.cast(dtype),
}
}
}

impl FixedShapeImageArray {
pub fn cast(&self, dtype: &DataType) -> DaftResult<Series> {
self.physical.cast(dtype)
match (dtype, self.logical_type()) {
#[cfg(feature = "python")]
(DataType::Python, DataType::FixedShapeImage(_, mode, height, width)) => {
pyo3::Python::with_gil(|py| {
let shape = (
self.len(),
*height as usize,
*width as usize,
mode.num_channels() as usize,
);
let pyarrow = py.import("pyarrow")?;
// Only go through FFI layer once instead of for every image.
// We create an (N, H, W, C) ndarray view on the entire image array
// buffer sans the validity mask, and then create a subndarray view
// for each image ndarray in the PythonArray.
let py_array = ffi::to_py_array(
self.as_arrow().values().with_validity(None),
py,
pyarrow,
)?
.call_method1(py, pyo3::intern!(py, "to_numpy"), (false,))?
.call_method1(
py,
pyo3::intern!(py, "reshape"),
(shape,),
)?;
let ndarrays = py_array
.as_ref(py)
.iter()?
.map(|a| a.unwrap().to_object(py))
.collect::<Vec<PyObject>>();
let values_array =
PseudoArrowArray::new(ndarrays.into(), self.as_arrow().validity().cloned());
Ok(PythonArray::new(
Field::new(self.name(), dtype.clone()).into(),
values_array.to_boxed(),
)?
.into_series())
})
}
(_, _) => self.physical.cast(dtype),
}
}
}
14 changes: 7 additions & 7 deletions src/array/ops/image.rs
Original file line number Diff line number Diff line change
Expand Up @@ -184,42 +184,42 @@ pub struct ImageArrayVecs<T> {
}

impl ImageArray {
fn image_mode(&self) -> &Option<ImageMode> {
pub fn image_mode(&self) -> &Option<ImageMode> {
match self.logical_type() {
DataType::Image(_, mode) => mode,
_ => panic!("Expected dtype to be Image"),
}
}

fn data_array(&self) -> &arrow2::array::ListArray<i64> {
pub fn data_array(&self) -> &arrow2::array::ListArray<i64> {
let p = self.physical.as_arrow();
const IMAGE_DATA_IDX: usize = 0;
let array = p.values().get(IMAGE_DATA_IDX).unwrap();
array.as_ref().as_any().downcast_ref().unwrap()
}

fn channel_array(&self) -> &arrow2::array::UInt16Array {
pub fn channel_array(&self) -> &arrow2::array::UInt16Array {
let p = self.physical.as_arrow();
const IMAGE_CHANNEL_IDX: usize = 1;
let array = p.values().get(IMAGE_CHANNEL_IDX).unwrap();
array.as_ref().as_any().downcast_ref().unwrap()
}

fn height_array(&self) -> &arrow2::array::UInt32Array {
pub fn height_array(&self) -> &arrow2::array::UInt32Array {
let p = self.physical.as_arrow();
const IMAGE_HEIGHT_IDX: usize = 2;
let array = p.values().get(IMAGE_HEIGHT_IDX).unwrap();
array.as_ref().as_any().downcast_ref().unwrap()
}

fn width_array(&self) -> &arrow2::array::UInt32Array {
pub fn width_array(&self) -> &arrow2::array::UInt32Array {
let p = self.physical.as_arrow();
const IMAGE_WIDTH_IDX: usize = 3;
let array = p.values().get(IMAGE_WIDTH_IDX).unwrap();
array.as_ref().as_any().downcast_ref().unwrap()
}

fn mode_array(&self) -> &arrow2::array::UInt8Array {
pub fn mode_array(&self) -> &arrow2::array::UInt8Array {
let p = self.physical.as_arrow();
const IMAGE_MODE_IDX: usize = 4;
let array = p.values().get(IMAGE_MODE_IDX).unwrap();
Expand Down Expand Up @@ -297,7 +297,7 @@ impl ImageArray {
))
}

fn as_image_obj<'a>(&'a self, idx: usize) -> Option<DaftImageBuffer<'a>> {
pub fn as_image_obj<'a>(&'a self, idx: usize) -> Option<DaftImageBuffer<'a>> {
assert!(idx < self.len());
if !self.physical.is_valid(idx) {
return None;
Expand Down
Loading

0 comments on commit 8cc6e69

Please sign in to comment.