Skip to content

Commit

Permalink
Consolidate vecs -> ImageArray construction between kernels and casti…
Browse files Browse the repository at this point in the history
…ng; always return an ImageArray
  • Loading branch information
clarkzinzow committed Jun 2, 2023
1 parent 3ed5648 commit cd47dac
Show file tree
Hide file tree
Showing 3 changed files with 119 additions and 115 deletions.
77 changes: 19 additions & 58 deletions src/array/ops/cast.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ use arrow2::compute::{

use crate::series::IntoSeries;
use crate::{
array::DataArray,
array::{ops::image::ImageArrayVecs, DataArray},
datatypes::logical::{
DateArray, EmbeddingArray, FixedShapeImageArray, ImageArray, LogicalArray,
},
Expand All @@ -18,7 +18,7 @@ use arrow2::array::Array;
use num_traits::NumCast;

#[cfg(feature = "python")]
use crate::datatypes::{FixedSizeListArray, ListArray, StructArray};
use crate::datatypes::{FixedSizeListArray, ListArray};
#[cfg(feature = "python")]
use crate::datatypes::{ImageMode, PythonArray};
#[cfg(feature = "python")]
Expand Down Expand Up @@ -496,14 +496,15 @@ fn extract_python_like_to_list<
}

#[cfg(feature = "python")]
fn extract_python_like_to_image_struct<
fn extract_python_like_to_image_array<
Tgt: numpy::Element + NumCast + ToPrimitive + arrow2::types::NativeType,
>(
py: Python<'_>,
python_objects: &PythonArray,
dtype: &DataType,
child_dtype: &DataType,
mode_from_dtype: Option<ImageMode>,
) -> DaftResult<StructArray> {
) -> DaftResult<ImageArray> {
// 3 dimensions - height x width x channel.

let shape_size = 3;
Expand All @@ -519,24 +520,8 @@ fn extract_python_like_to_image_struct<
let offsets = offsets.expect("Offsets should but non-None for image struct array");
let shapes = shapes.expect("Shapes should be non-None for image struct array");

let values_array: Box<dyn arrow2::array::Array> =
Box::new(arrow2::array::PrimitiveArray::from_vec(values_vec));

let inner_dtype = child_dtype.to_arrow()?;

let data_dtype = arrow2::datatypes::DataType::LargeList(Box::new(
arrow2::datatypes::Field::new("data", inner_dtype, true),
));

let validity = python_objects.as_arrow().validity();

let data_array = Box::new(arrow2::array::ListArray::new(
data_dtype.clone(),
arrow2::offset::OffsetsBuffer::try_from(offsets)?,
values_array,
validity.cloned(),
));

let num_rows = shapes.len();

let mut channels = Vec::<u16>::with_capacity(num_rows);
Expand Down Expand Up @@ -591,37 +576,18 @@ fn extract_python_like_to_image_struct<
child_dtype,
)?) as u8);
}

let channel_array = Box::new(arrow2::array::PrimitiveArray::from_vec(channels));
let height_array = Box::new(arrow2::array::PrimitiveArray::from_vec(heights));
let width_array = Box::new(arrow2::array::PrimitiveArray::from_vec(widths));
let mode_array = Box::new(arrow2::array::PrimitiveArray::from_vec(modes));

let struct_dtype = arrow2::datatypes::DataType::Struct(vec![
arrow2::datatypes::Field::new("data", data_dtype, true),
arrow2::datatypes::Field::new("channel", channel_array.data_type().clone(), true),
arrow2::datatypes::Field::new("height", height_array.data_type().clone(), true),
arrow2::datatypes::Field::new("width", width_array.data_type().clone(), true),
arrow2::datatypes::Field::new("mode", mode_array.data_type().clone(), true),
]);

let daft_type = (&struct_dtype).into();

let struct_array = arrow2::array::StructArray::new(
struct_dtype,
vec![
data_array,
channel_array,
height_array,
width_array,
mode_array,
],
validity.cloned(),
);

StructArray::new(
Field::new(python_objects.name(), daft_type).into(),
Box::new(struct_array),
ImageArray::from_vecs(
python_objects.name(),
dtype.clone(),
ImageArrayVecs {
data: values_vec,
channels,
heights,
widths,
modes,
offsets,
validity: validity.cloned(),
},
)
}

Expand Down Expand Up @@ -707,13 +673,8 @@ impl PythonArray {
with_match_numeric_daft_types!(**inner_dtype, |$T| {
type Tgt = <$T as DaftNumericType>::Native;
pyo3::Python::with_gil(|py| {
let result = extract_python_like_to_image_struct::<Tgt>(py, self, inner_dtype, *mode)?;
Ok(
ImageArray::new(
Field::new(self.name(), dtype.clone()),
result,
).into_series()
)
let result = extract_python_like_to_image_array::<Tgt>(py, self, dtype, inner_dtype, *mode)?;
Ok(result.into_series())
})
})
}
Expand Down
143 changes: 98 additions & 45 deletions src/array/ops/image.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,7 @@ use std::vec;

use image::{ColorType, DynamicImage, ImageBuffer};

use crate::datatypes::logical::ImageArray;
use crate::datatypes::{BinaryArray, DataType, Field, ImageMode};
use crate::datatypes::{logical::ImageArray, BinaryArray, DataType, Field, ImageMode, StructArray};
use crate::error::{DaftError, DaftResult};
use image::{Luma, LumaA, Rgb, Rgba};

Expand Down Expand Up @@ -174,6 +173,16 @@ impl<'a> From<DynamicImage> for DaftImageBuffer<'a> {
}
}

pub struct ImageArrayVecs<T> {
pub data: Vec<T>,
pub channels: Vec<u16>,
pub heights: Vec<u32>,
pub widths: Vec<u32>,
pub modes: Vec<u8>,
pub offsets: Vec<i64>,
pub validity: Option<arrow2::bitmap::Bitmap>,
}

impl ImageArray {
fn image_mode(&self) -> &Option<ImageMode> {
match self.logical_type() {
Expand Down Expand Up @@ -217,6 +226,77 @@ impl ImageArray {
array.as_ref().as_any().downcast_ref().unwrap()
}

pub fn from_vecs<T: arrow2::types::NativeType>(
name: &str,
data_type: DataType,
vecs: ImageArrayVecs<T>,
) -> DaftResult<Self> {
if vecs.data.is_empty() {
// Create an all-null array if the data array is empty.
let physical_type = data_type.to_physical();
let null_struct_array =
arrow2::array::new_null_array(physical_type.to_arrow()?, vecs.channels.len());
let daft_struct_array =
StructArray::new(Field::new(name, physical_type).into(), null_struct_array)?;
return Ok(ImageArray::new(
Field::new(name, data_type),
daft_struct_array,
));
}
let offsets = arrow2::offset::OffsetsBuffer::try_from(vecs.offsets)?;
let arrow_dtype: arrow2::datatypes::DataType = T::PRIMITIVE.into();
if let DataType::Image(inner_dtype, _) = &data_type {
if inner_dtype.to_arrow()? != arrow_dtype {
panic!("Inner value dtype of provided dtype {data_type:?} is inconsistent with inferred value dtype {arrow_dtype:?}");
}
}

let list_datatype = arrow2::datatypes::DataType::LargeList(Box::new(
arrow2::datatypes::Field::new("data", arrow_dtype, true),
));
let data_array = Box::new(arrow2::array::ListArray::<i64>::new(
list_datatype,
offsets,
Box::new(arrow2::array::PrimitiveArray::from_vec(vecs.data)),
vecs.validity.clone(),
));

let values: Vec<Box<dyn arrow2::array::Array>> = vec![
data_array,
Box::new(
arrow2::array::UInt16Array::from_vec(vecs.channels)
.with_validity(vecs.validity.clone()),
),
Box::new(
arrow2::array::UInt32Array::from_vec(vecs.heights)
.with_validity(vecs.validity.clone()),
),
Box::new(
arrow2::array::UInt32Array::from_vec(vecs.widths)
.with_validity(vecs.validity.clone()),
),
Box::new(
arrow2::array::UInt8Array::from_vec(vecs.modes)
.with_validity(vecs.validity.clone()),
),
];
let physical_type = data_type.to_physical();
let struct_array = Box::new(arrow2::array::StructArray::new(
physical_type.to_arrow()?,
values,
vecs.validity,
));

let daft_struct_array = crate::datatypes::StructArray::new(
Field::new(name, physical_type).into(),
struct_array,
)?;
Ok(ImageArray::new(
Field::new(name, data_type),
daft_struct_array,
))
}

fn as_image_obj<'a>(&'a self, idx: usize) -> Option<DaftImageBuffer<'a>> {
assert!(idx < self.len());
if !self.physical.is_valid(idx) {
Expand Down Expand Up @@ -311,51 +391,24 @@ impl ImageArray {
offsets.push(offsets.last().unwrap() + buffer.len() as i64);
}

let collected_data = data_ref.concat();
let offsets = arrow2::offset::OffsetsBuffer::try_from(offsets)?;
let value_dtype = DataType::UInt8;
let data_type = DataType::Image(Box::new(value_dtype.clone()), *image_mode);

let data = data_ref.concat();
let validity: Option<arrow2::bitmap::Bitmap> = match validity.unset_bits() {
0 => None,
_ => Some(validity.into()),
};
let arrow_dtype = value_dtype.to_arrow()?;

let list_datatype = arrow2::datatypes::DataType::LargeList(Box::new(
arrow2::datatypes::Field::new("data", arrow_dtype, true),
));
let data_array = Box::new(arrow2::array::ListArray::<i64>::new(
list_datatype,
offsets,
Box::new(arrow2::array::PrimitiveArray::from_vec(collected_data)),
validity.clone(),
));

let values: Vec<Box<dyn arrow2::array::Array>> = vec![
data_array,
Box::new(
arrow2::array::UInt16Array::from_vec(channels).with_validity(validity.clone()),
),
Box::new(arrow2::array::UInt32Array::from_vec(heights).with_validity(validity.clone())),
Box::new(arrow2::array::UInt32Array::from_vec(widths).with_validity(validity.clone())),
Box::new(arrow2::array::UInt8Array::from_vec(modes).with_validity(validity.clone())),
];
let physical_type = data_type.to_physical();
let struct_array = Box::new(arrow2::array::StructArray::new(
physical_type.to_arrow()?,
values,
validity,
));

let daft_struct_array = crate::datatypes::StructArray::new(
Field::new(name, physical_type).into(),
struct_array,
)?;
Ok(ImageArray::new(
Field::new(name, data_type),
daft_struct_array,
))
ImageArray::from_vecs(
name,
DataType::Image(Box::new(DataType::UInt8), *image_mode),
ImageArrayVecs {
data,
channels,
heights,
widths,
modes,
offsets,
validity,
},
)
}
}

Expand Down Expand Up @@ -386,8 +439,8 @@ impl BinaryArray {
}
img_bufs.push(img_buf);
}
// Series::image_decode() guarantees that we have at least one non-None element in this array.
let cached_dtype = cached_dtype.unwrap();
// Fall back to UInt8 dtype if series is all nulls.
let cached_dtype = cached_dtype.unwrap_or(DataType::UInt8);
match cached_dtype {
DataType::UInt8 => Ok(ImageArray::from_daft_image_buffers(self.name(), img_bufs.as_slice(), &None)?),
_ => unimplemented!("Decoding images of dtype {cached_dtype:?} is not supported, only uint8 images are supported."),
Expand Down
14 changes: 2 additions & 12 deletions src/series/ops/image.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,4 @@
use arrow2;

use crate::datatypes::{DataType, ImageType, NullArray};
use crate::datatypes::{DataType, ImageType};

use crate::{
error::{DaftError, DaftResult},
Expand All @@ -10,15 +8,7 @@ use crate::{
impl Series {
pub fn image_decode(&self) -> DaftResult<Series> {
match self.data_type() {
DataType::Binary => {
let binary_array = self.binary()?;
if binary_array.data().null_count() == self.len() {
// All images are None, so return a NullArray.
Ok(NullArray::from(("item", Box::new(arrow2::array::NullArray::new(arrow2::datatypes::DataType::Null, self.len())))).into_series())
} else {
Ok(self.binary()?.image_decode()?.into_series())
}
},
DataType::Binary => Ok(self.binary()?.image_decode()?.into_series()),
dtype => Err(DaftError::ValueError(format!(
"Decoding in-memory data into images is only supported for binary arrays, but got {}", dtype
))),
Expand Down

0 comments on commit cd47dac

Please sign in to comment.