Skip to content

Commit

Permalink
[FEAT]: sql image_encode and image_resize (#2764)
Browse files Browse the repository at this point in the history
depends on #2757
  • Loading branch information
universalmind303 authored Sep 4, 2024
1 parent 734c13f commit 00528ea
Show file tree
Hide file tree
Showing 25 changed files with 900 additions and 441 deletions.
12 changes: 8 additions & 4 deletions daft/daft.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -1146,10 +1146,6 @@ class PyExpr:
def utf8_to_date(self, format: str) -> PyExpr: ...
def utf8_to_datetime(self, format: str, timezone: str | None = None) -> PyExpr: ...
def utf8_normalize(self, remove_punct: bool, lowercase: bool, nfd_unicode: bool, white_space: bool) -> PyExpr: ...
def image_encode(self, image_format: ImageFormat) -> PyExpr: ...
def image_resize(self, w: int, h: int) -> PyExpr: ...
def image_crop(self, bbox: PyExpr) -> PyExpr: ...
def image_to_mode(self, mode: ImageMode) -> PyExpr: ...
def list_join(self, delimiter: PyExpr) -> PyExpr: ...
def list_count(self, mode: CountMode) -> PyExpr: ...
def list_get(self, idx: PyExpr, default: PyExpr) -> PyExpr: ...
Expand Down Expand Up @@ -1245,7 +1241,15 @@ def utf8_count_matches(expr: PyExpr, patterns: PyExpr, whole_words: bool, case_s
def list_sort(expr: PyExpr, desc: PyExpr) -> PyExpr: ...
def cbrt(expr: PyExpr) -> PyExpr: ...
def to_struct(inputs: list[PyExpr]) -> PyExpr: ...

# ---
# expr.image namespace
# ---
def image_crop(expr: PyExpr, bbox: PyExpr) -> PyExpr: ...
def image_decode(expr: PyExpr, raise_on_error: bool, mode: ImageMode | None = None) -> PyExpr: ...
def image_encode(expr: PyExpr, image_format: ImageFormat) -> PyExpr: ...
def image_resize(expr: PyExpr, w: int, h: int) -> PyExpr: ...
def image_to_mode(expr: PyExpr, mode: ImageMode) -> PyExpr: ...

class PyCatalog:
@staticmethod
Expand Down
8 changes: 4 additions & 4 deletions daft/expressions/expressions.py
Original file line number Diff line number Diff line change
Expand Up @@ -3158,7 +3158,7 @@ def encode(self, image_format: str | ImageFormat) -> Expression:
image_format = ImageFormat.from_format_string(image_format.upper())
if not isinstance(image_format, ImageFormat):
raise ValueError(f"image_format must be a string or ImageFormat variant, but got: {image_format}")
return Expression._from_pyexpr(self._expr.image_encode(image_format))
return Expression._from_pyexpr(native.image_encode(self._expr, image_format))

def resize(self, w: int, h: int) -> Expression:
"""
Expand All @@ -3175,7 +3175,7 @@ def resize(self, w: int, h: int) -> Expression:
raise TypeError(f"expected int for w but got {type(w)}")
if not isinstance(h, int):
raise TypeError(f"expected int for h but got {type(h)}")
return Expression._from_pyexpr(self._expr.image_resize(w, h))
return Expression._from_pyexpr(native.image_resize(self._expr, w, h))

def crop(self, bbox: tuple[int, int, int, int] | Expression) -> Expression:
"""
Expand All @@ -3196,14 +3196,14 @@ def crop(self, bbox: tuple[int, int, int, int] | Expression) -> Expression:
)
bbox = Expression._to_expression(bbox).cast(DataType.fixed_size_list(DataType.uint64(), 4))
assert isinstance(bbox, Expression)
return Expression._from_pyexpr(self._expr.image_crop(bbox._expr))
return Expression._from_pyexpr(native.image_crop(self._expr, bbox._expr))

def to_mode(self, mode: str | ImageMode) -> Expression:
if isinstance(mode, str):
mode = ImageMode.from_mode_string(mode.upper())
if not isinstance(mode, ImageMode):
raise ValueError(f"mode must be a string or ImageMode variant, but got: {mode}")
return Expression._from_pyexpr(self._expr.image_to_mode(mode))
return Expression._from_pyexpr(native.image_to_mode(self._expr, mode))


class ExpressionPartitioningNamespace(ExpressionNamespace):
Expand Down
57 changes: 57 additions & 0 deletions src/daft-core/src/array/from_iter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -72,3 +72,60 @@ impl BooleanArray {
.unwrap()
}
}

impl<T> DataArray<T>
where
T: DaftNumericType,
{
pub fn from_values(
name: &str,
iter: impl arrow2::trusted_len::TrustedLen<Item = T::Native>,
) -> Self {
let arrow_array = Box::new(
arrow2::array::PrimitiveArray::<T::Native>::from_trusted_len_values_iter(iter),
);
DataArray::new(Field::new(name, T::get_dtype()).into(), arrow_array).unwrap()
}
}

impl Utf8Array {
pub fn from_values<S: AsRef<str>>(
name: &str,
iter: impl arrow2::trusted_len::TrustedLen<Item = S>,
) -> Self {
let arrow_array =
Box::new(arrow2::array::Utf8Array::<i64>::from_trusted_len_values_iter(iter));
DataArray::new(Field::new(name, crate::DataType::Utf8).into(), arrow_array).unwrap()
}
}

impl BinaryArray {
pub fn from_values<S: AsRef<[u8]>>(
name: &str,
iter: impl arrow2::trusted_len::TrustedLen<Item = S>,
) -> Self {
let arrow_array =
Box::new(arrow2::array::BinaryArray::<i64>::from_trusted_len_values_iter(iter));
DataArray::new(
Field::new(name, crate::DataType::Binary).into(),
arrow_array,
)
.unwrap()
}
}

impl BooleanArray {
pub fn from_values(
name: &str,
iter: impl arrow2::trusted_len::TrustedLen<Item = bool>,
) -> Self {
let arrow_array = Box::new(arrow2::array::BooleanArray::from_trusted_len_values_iter(
iter,
));
DataArray::new(
Field::new(name, crate::DataType::Boolean).into(),
arrow_array,
)
.unwrap()
}
}
53 changes: 0 additions & 53 deletions src/daft-dsl/src/functions/image/encode.rs

This file was deleted.

70 changes: 0 additions & 70 deletions src/daft-dsl/src/functions/image/mod.rs

This file was deleted.

72 changes: 0 additions & 72 deletions src/daft-dsl/src/functions/image/resize.rs

This file was deleted.

64 changes: 0 additions & 64 deletions src/daft-dsl/src/functions/image/to_mode.rs

This file was deleted.

Loading

0 comments on commit 00528ea

Please sign in to comment.