Skip to content

Commit

Permalink
Image Expressions fix (#1001)
Browse files Browse the repository at this point in the history
* Implements the Image Expressions instead of the todo marcros
  • Loading branch information
samster25 authored Jun 6, 2023
1 parent 8cc6e69 commit 21f8136
Show file tree
Hide file tree
Showing 7 changed files with 127 additions and 7 deletions.
40 changes: 35 additions & 5 deletions src/dsl/functions/image/decode.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,10 @@
use crate::{datatypes::Field, dsl::Expr, error::DaftResult, schema::Schema, series::Series};
use crate::{
datatypes::{DataType, Field},
dsl::Expr,
error::{DaftError, DaftResult},
schema::Schema,
series::Series,
};

use super::super::FunctionEvaluator;

Expand All @@ -9,11 +15,35 @@ impl FunctionEvaluator for DecodeEvaluator {
"decode"
}

fn to_field(&self, _: &[Expr], _: &Schema) -> DaftResult<Field> {
todo!("not implemented");
fn to_field(&self, inputs: &[Expr], schema: &Schema) -> DaftResult<Field> {
match inputs {
[input] => {
let field = input.to_field(schema)?;
if !matches!(field.dtype, DataType::Binary) {
return Err(DaftError::TypeError(format!(
"ImageDecode can only decode BinaryArrays, got {}",
field
)));
}
Ok(Field::new(
field.name,
DataType::Image(Box::new(DataType::UInt8), None),
))
}
_ => Err(DaftError::SchemaMismatch(format!(
"Expected 1 input arg, got {}",
inputs.len()
))),
}
}

fn evaluate(&self, _: &[Series], _: &Expr) -> DaftResult<Series> {
todo!("not implemented");
fn evaluate(&self, inputs: &[Series], _: &Expr) -> DaftResult<Series> {
match inputs {
[input] => input.image_decode(),
_ => Err(DaftError::ValueError(format!(
"Expected 1 input arg, got {}",
inputs.len()
))),
}
}
}
21 changes: 19 additions & 2 deletions src/dsl/functions/image/resize.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
use crate::datatypes::DataType;
use crate::dsl::functions::image::ImageExpr;
use crate::error::DaftError;
use crate::{datatypes::Field, dsl::Expr, error::DaftResult, schema::Schema, series::Series};
Expand All @@ -13,8 +14,24 @@ impl FunctionEvaluator for ResizeEvaluator {
"resize"
}

fn to_field(&self, _: &[Expr], _: &Schema) -> DaftResult<Field> {
todo!("not implemented");
fn to_field(&self, inputs: &[Expr], schema: &Schema) -> DaftResult<Field> {
match inputs {
[input] => {
let field = input.to_field(schema)?;

match &field.dtype {
DataType::Image(_, _) => Ok(field.clone()),
_ => Err(DaftError::TypeError(format!(
"ImageResize can only resize ImageArrays, got {}",
field
))),
}
}
_ => Err(DaftError::SchemaMismatch(format!(
"Expected 1 input arg, got {}",
inputs.len()
))),
}
}

fn evaluate(&self, inputs: &[Series], expr: &Expr) -> DaftResult<Series> {
Expand Down
Binary file added tests/cookbook/assets/images/0000.jpg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added tests/cookbook/assets/images/0007.jpg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added tests/cookbook/assets/images/0018.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added tests/cookbook/assets/images/0025.tiff
Binary file not shown.
73 changes: 73 additions & 0 deletions tests/cookbook/test_image.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
from __future__ import annotations

import numpy as np
from PIL import Image

import daft
from daft import col
from daft.datatype import DataType
from daft.series import Series
from tests.cookbook.assets import ASSET_FOLDER


def test_image_resize_mixed_modes():
rgba = np.ones((2, 2, 4), dtype=np.uint8)
rgba[..., 1] = 2
rgba[..., 2] = 3
rgba[..., 3] = 4

data = [
rgba[..., :3], # rgb
rgba, # RGBA
np.arange(12, dtype=np.uint8).reshape((1, 4, 3)), # RGB
np.arange(12, dtype=np.uint8).reshape((3, 4)) * 10, # L
np.ones(24, dtype=np.uint8).reshape((3, 4, 2)) * 10, # LA
None,
]

s = Series.from_pylist(data, pyobj="force")
df = daft.from_pydict({"img": s})

target_dtype = DataType.image()
df = df.select(df["img"].cast(target_dtype))

assert df.schema()["img"].dtype == target_dtype

df = df.with_column("resized", df["img"].image.resize(5, 5))

assert df.schema()["resized"].dtype == target_dtype

as_py = df.to_pydict()["resized"]

first_resized = np.array(as_py[0]["data"]).reshape(5, 5, 3)
assert np.all(first_resized[..., 0] == 1)
assert np.all(first_resized[..., 1] == 2)
assert np.all(first_resized[..., 2] == 3)

second_resized = np.array(as_py[1]["data"]).reshape(5, 5, 4)
assert np.all(second_resized[..., 0] == 1)
assert np.all(second_resized[..., 1] == 2)
assert np.all(second_resized[..., 2] == 3)
assert np.all(second_resized[..., 3] == 4)

for i in range(2, 4):
resized_i = np.array(as_py[i]["data"]).reshape(5, 5, -1)
resized_i_gt = np.asarray(Image.fromarray(data[i]).resize((5, 5), resample=Image.BILINEAR)).reshape(5, 5, -1)
assert np.all(resized_i == resized_i_gt), f"{i} does not match"

# LA sampling doesn't work for some reason in PIL
resized_i = np.array(as_py[4]["data"]).reshape(5, 5, -1)
assert np.all(resized_i == 10)

assert as_py[-1] == None


def test_image_decode() -> None:
df = (
daft.from_glob_path(f"{ASSET_FOLDER}/images/**")
.into_partitions(2)
.with_column("image", col("path").url.download().image.decode().image.resize(10, 10))
)
target_dtype = DataType.image()
assert df.schema()["image"].dtype == target_dtype
df.collect()

0 comments on commit 21f8136

Please sign in to comment.