Skip to content

Commit

Permalink
[Images] [3/N] Add image decoding for uint8 images. (#981)
Browse files Browse the repository at this point in the history
This PR adds basic support for decoding uint8 images.
  • Loading branch information
clarkzinzow authored Jun 2, 2023
1 parent ea3d2f8 commit 0f5b4b7
Show file tree
Hide file tree
Showing 9 changed files with 410 additions and 171 deletions.
1 change: 1 addition & 0 deletions requirements-dev.txt
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ pandas==1.3.5; python_version < '3.8'
pandas==2.0.2; python_version >= '3.8'
xxhash>=3.0.0
Pillow==9.5.0
opencv-python==4.7.0.72

# Ray
ray[data, default]==2.4.0
Expand Down
11 changes: 9 additions & 2 deletions src/array/from.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
use std::sync::Arc;

use crate::datatypes::{
BinaryArray, BooleanArray, DaftNumericType, DaftPhysicalType, DataType, Field, Utf8Array,
Utf8Type,
BinaryArray, BooleanArray, DaftNumericType, DaftPhysicalType, DataType, Field, NullArray,
Utf8Array, Utf8Type,
};

use crate::array::DataArray;
Expand All @@ -17,6 +17,13 @@ impl<T: DaftNumericType> From<(&str, Box<arrow2::array::PrimitiveArray<T::Native
}
}

impl From<(&str, Box<arrow2::array::NullArray>)> for NullArray {
fn from(item: (&str, Box<arrow2::array::NullArray>)) -> Self {
let (name, array) = item;
DataArray::new(Field::new(name, DataType::Null).into(), array).unwrap()
}
}

impl From<(&str, Box<arrow2::array::Utf8Array<i64>>)> for Utf8Array {
fn from(item: (&str, Box<arrow2::array::Utf8Array<i64>>)) -> Self {
let (name, array) = item;
Expand Down
77 changes: 19 additions & 58 deletions src/array/ops/cast.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ use arrow2::compute::{

use crate::series::IntoSeries;
use crate::{
array::DataArray,
array::{ops::image::ImageArrayVecs, DataArray},
datatypes::logical::{
DateArray, EmbeddingArray, FixedShapeImageArray, ImageArray, LogicalArray,
},
Expand All @@ -18,7 +18,7 @@ use arrow2::array::Array;
use num_traits::NumCast;

#[cfg(feature = "python")]
use crate::datatypes::{FixedSizeListArray, ListArray, StructArray};
use crate::datatypes::{FixedSizeListArray, ListArray};
#[cfg(feature = "python")]
use crate::datatypes::{ImageMode, PythonArray};
#[cfg(feature = "python")]
Expand Down Expand Up @@ -496,14 +496,15 @@ fn extract_python_like_to_list<
}

#[cfg(feature = "python")]
fn extract_python_like_to_image_struct<
fn extract_python_like_to_image_array<
Tgt: numpy::Element + NumCast + ToPrimitive + arrow2::types::NativeType,
>(
py: Python<'_>,
python_objects: &PythonArray,
dtype: &DataType,
child_dtype: &DataType,
mode_from_dtype: Option<ImageMode>,
) -> DaftResult<StructArray> {
) -> DaftResult<ImageArray> {
// 3 dimensions - height x width x channel.

let shape_size = 3;
Expand All @@ -519,24 +520,8 @@ fn extract_python_like_to_image_struct<
let offsets = offsets.expect("Offsets should but non-None for image struct array");
let shapes = shapes.expect("Shapes should be non-None for image struct array");

let values_array: Box<dyn arrow2::array::Array> =
Box::new(arrow2::array::PrimitiveArray::from_vec(values_vec));

let inner_dtype = child_dtype.to_arrow()?;

let data_dtype = arrow2::datatypes::DataType::LargeList(Box::new(
arrow2::datatypes::Field::new("data", inner_dtype, true),
));

let validity = python_objects.as_arrow().validity();

let data_array = Box::new(arrow2::array::ListArray::new(
data_dtype.clone(),
arrow2::offset::OffsetsBuffer::try_from(offsets)?,
values_array,
validity.cloned(),
));

let num_rows = shapes.len();

let mut channels = Vec::<u16>::with_capacity(num_rows);
Expand Down Expand Up @@ -591,37 +576,18 @@ fn extract_python_like_to_image_struct<
child_dtype,
)?) as u8);
}

let channel_array = Box::new(arrow2::array::PrimitiveArray::from_vec(channels));
let height_array = Box::new(arrow2::array::PrimitiveArray::from_vec(heights));
let width_array = Box::new(arrow2::array::PrimitiveArray::from_vec(widths));
let mode_array = Box::new(arrow2::array::PrimitiveArray::from_vec(modes));

let struct_dtype = arrow2::datatypes::DataType::Struct(vec![
arrow2::datatypes::Field::new("data", data_dtype, true),
arrow2::datatypes::Field::new("channel", channel_array.data_type().clone(), true),
arrow2::datatypes::Field::new("height", height_array.data_type().clone(), true),
arrow2::datatypes::Field::new("width", width_array.data_type().clone(), true),
arrow2::datatypes::Field::new("mode", mode_array.data_type().clone(), true),
]);

let daft_type = (&struct_dtype).into();

let struct_array = arrow2::array::StructArray::new(
struct_dtype,
vec![
data_array,
channel_array,
height_array,
width_array,
mode_array,
],
validity.cloned(),
);

StructArray::new(
Field::new(python_objects.name(), daft_type).into(),
Box::new(struct_array),
ImageArray::from_vecs(
python_objects.name(),
dtype.clone(),
ImageArrayVecs {
data: values_vec,
channels,
heights,
widths,
modes,
offsets,
validity: validity.cloned(),
},
)
}

Expand Down Expand Up @@ -707,13 +673,8 @@ impl PythonArray {
with_match_numeric_daft_types!(**inner_dtype, |$T| {
type Tgt = <$T as DaftNumericType>::Native;
pyo3::Python::with_gil(|py| {
let result = extract_python_like_to_image_struct::<Tgt>(py, self, inner_dtype, *mode)?;
Ok(
ImageArray::new(
Field::new(self.name(), dtype.clone()),
result,
).into_series()
)
let result = extract_python_like_to_image_array::<Tgt>(py, self, dtype, inner_dtype, *mode)?;
Ok(result.into_series())
})
})
}
Expand Down
Loading

0 comments on commit 0f5b4b7

Please sign in to comment.