Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/main' into bcherry/oai-detail
Browse files Browse the repository at this point in the history
  • Loading branch information
bcherry committed Dec 13, 2024
2 parents 039e55e + d589a1e commit 483eae1
Show file tree
Hide file tree
Showing 5 changed files with 49 additions and 6 deletions.
9 changes: 9 additions & 0 deletions .changeset/tiny-papayas-film.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
---
"livekit-agents": patch
"livekit-plugins-anthropic": patch
"livekit-plugins-openai": patch
---

Fix center_aspect_fit bug, add scale_aspect_fit and scale_aspect_fill resizing options.

Make scale_aspect_fit the new default resizing option for video frames.
40 changes: 37 additions & 3 deletions livekit-agents/livekit/agents/utils/images/image.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,18 @@ class EncodeOptions:
class ResizeOptions:
width: int
height: int
strategy: Literal["center_aspect_fit", "center_aspect_cover", "skew"]
strategy: Literal[
# Fit the image into the provided dimensions, with letterboxing
"center_aspect_fit",
# Fill the provided dimensions, with cropping
"center_aspect_cover",
# Fit the image into the provided dimensions, preserving its original aspect ratio
"scale_aspect_fit",
# Fill the provided dimensions, preserving its original aspect ratio (image will be larger than the provided dimensions)
"scale_aspect_cover",
# Precisely resize the image to the provided dimensions
"skew",
]


def import_pil():
Expand Down Expand Up @@ -83,10 +94,11 @@ def _resize_image(image: Any, options: EncodeOptions):

# If the new image is wider than the original
if resize_opts.width / resize_opts.height > image.width / image.height:
new_width = resize_opts.width
new_height = int(image.height * (resize_opts.width / image.width))
new_height = resize_opts.height
new_width = int(image.width * (resize_opts.height / image.height))

resized = image.resize((new_width, new_height))

Image.Image.paste(
result,
resized,
Expand Down Expand Up @@ -118,5 +130,27 @@ def _resize_image(image: Any, options: EncodeOptions):
),
)
return result
elif resize_opts.strategy == "scale_aspect_fill":
# Start with assuming width is the limiting dimension
new_width = resize_opts.width
new_height = int(image.height * (resize_opts.width / image.width))

# If height is under the limit, scale based on height instead
if new_height < resize_opts.height:
new_height = resize_opts.height
new_width = int(image.width * (resize_opts.height / image.height))

return image.resize((new_width, new_height))
elif resize_opts.strategy == "scale_aspect_fit":
# Start with assuming width is the limiting dimension
new_width = resize_opts.width
new_height = int(image.height * (resize_opts.width / image.width))

# If height would exceed the limit, scale based on height instead
if new_height > resize_opts.height:
new_height = resize_opts.height
new_width = int(image.width * (resize_opts.height / image.height))

return image.resize((new_width, new_height))

raise ValueError(f"Unknown resize strategy: {resize_opts.strategy}")
Original file line number Diff line number Diff line change
Expand Up @@ -466,7 +466,7 @@ def _build_anthropic_image_content(
opts.resize_options = utils.images.ResizeOptions(
width=image.inference_width,
height=image.inference_height,
strategy="center_aspect_fit",
strategy="scale_aspect_fit",
)

encoded_data = utils.images.encode(image.image, opts)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -532,7 +532,7 @@ async def _upload_frame(
opts.resize_options = utils.images.ResizeOptions(
width=inference_width,
height=inference_height,
strategy="center_aspect_fit",
strategy="scale_aspect_fit",
)

encoded_data = utils.images.encode(frame, opts)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ def _build_oai_image_content(image: llm.ChatImage, cache_key: Any):
opts.resize_options = utils.images.ResizeOptions(
width=image.inference_width,
height=image.inference_height,
strategy="center_aspect_fit",
strategy="scale_aspect_fit",
)

encoded_data = utils.images.encode(image.image, opts)
Expand Down

0 comments on commit 483eae1

Please sign in to comment.