Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Setup ORT for Text and Images #139

Merged
merged 16 commits into from
Nov 26, 2024
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 38 additions & 1 deletion ahnlich/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions ahnlich/ai/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ tracing-opentelemetry.workspace = true
futures.workspace = true
tiktoken-rs = "0.5.9"
itertools.workspace = true
tokenizers = { version = "0.20.1", features = ["hf-hub"] }
[dev-dependencies]
db = { path = "../db", version = "*" }
pretty_assertions.workspace = true
20 changes: 13 additions & 7 deletions ahnlich/ai/src/cli/server.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,10 @@ pub enum SupportedModels {
BGELargeEnV15,
#[clap(name = "resnet-50")]
Resnet50,
#[clap(name = "clip-vit-b32")]
ClipVitB32,
#[clap(name = "clip-vit-b32-image")]
ClipVitB32Image,
#[clap(name = "clip-vit-b32-text")]
ClipVitB32Text,
}

#[derive(Parser)]
Expand Down Expand Up @@ -132,9 +134,10 @@ impl Default for AIProxyConfig {
supported_models: vec![
SupportedModels::AllMiniLML6V2,
SupportedModels::AllMiniLML12V2,
SupportedModels::Resnet50,
SupportedModels::ClipVitB32,
SupportedModels::BGEBaseEnV15,
SupportedModels::ClipVitB32Text,
SupportedModels::Resnet50,
SupportedModels::ClipVitB32Image,
],
model_cache_location: home_dir()
.map(|mut path| {
Expand Down Expand Up @@ -192,7 +195,8 @@ impl fmt::Display for SupportedModels {
SupportedModels::BGEBaseEnV15 => write!(f, "BGEBase-En-v1.5"),
SupportedModels::BGELargeEnV15 => write!(f, "BGELarge-En-v1.5"),
SupportedModels::Resnet50 => write!(f, "Resnet-50"),
SupportedModels::ClipVitB32 => write!(f, "ClipVit-B32"),
SupportedModels::ClipVitB32Image => write!(f, "ClipVit-B32-Image"),
SupportedModels::ClipVitB32Text => write!(f, "ClipVit-B32-Text"),
}
}
}
Expand All @@ -205,7 +209,8 @@ impl From<&AIModel> for SupportedModels {
AIModel::BGEBaseEnV15 => SupportedModels::BGEBaseEnV15,
AIModel::BGELargeEnV15 => SupportedModels::BGELargeEnV15,
AIModel::Resnet50 => SupportedModels::Resnet50,
AIModel::ClipVitB32 => SupportedModels::ClipVitB32,
AIModel::ClipVitB32Image => SupportedModels::ClipVitB32Image,
AIModel::ClipVitB32Text => SupportedModels::ClipVitB32Text,
}
}
}
Expand All @@ -218,7 +223,8 @@ impl From<&SupportedModels> for AIModel {
SupportedModels::BGEBaseEnV15 => AIModel::BGEBaseEnV15,
SupportedModels::BGELargeEnV15 => AIModel::BGELargeEnV15,
SupportedModels::Resnet50 => AIModel::Resnet50,
SupportedModels::ClipVitB32 => AIModel::ClipVitB32,
SupportedModels::ClipVitB32Image => AIModel::ClipVitB32Image,
SupportedModels::ClipVitB32Text => AIModel::ClipVitB32Text,
}
}
}
Expand Down
105 changes: 65 additions & 40 deletions ahnlich/ai/src/engine/ai/models.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ use ahnlich_types::{
ai::{AIModel, AIStoreInputType},
keyval::{StoreInput, StoreKey},
};
use image::{GenericImageView, ImageReader};
use image::{DynamicImage, GenericImageView, ImageFormat, ImageReader};
use ndarray::ArrayView;
use ndarray::{Array, Ix3};
use nonzero_ext::nonzero;
Expand All @@ -18,6 +18,8 @@ use std::io::Cursor;
use std::num::NonZeroUsize;
use std::path::Path;
use strum::Display;
use serde::ser::Error as SerError;
use serde::de::Error as DeError;

#[derive(Display)]
pub enum ModelType {
Expand Down Expand Up @@ -92,17 +94,30 @@ impl From<&AIModel> for Model {
description: String::from("Residual Networks model, with 50 layers."),
embedding_size: nonzero!(2048usize),
},
AIModel::ClipVitB32 => Self {
AIModel::ClipVitB32Image => Self {
model_type: ModelType::Image {
expected_image_dimensions: (nonzero!(224usize), nonzero!(224usize)),
},
provider: ModelProviders::ORT(ORTProvider::new()),
supported_model: SupportedModels::ClipVitB32,
supported_model: SupportedModels::ClipVitB32Image,
description: String::from(
"Contrastive Language-Image Pre-Training Vision transformer model, base scale.",
),
embedding_size: nonzero!(512usize),
},
AIModel::ClipVitB32Text => Self {
model_type: ModelType::Text {
// Token size source: https://github.com/UKPLab/sentence-transformers/issues/1269
max_input_tokens: nonzero!(77usize),
},
provider: ModelProviders::ORT(ORTProvider::new()),
supported_model: SupportedModels::ClipVitB32Text,
description: String::from(
"Contrastive Language-Image Pre-Training Text transformer model, base scale. \
Ideal for embedding very short text and using in combination with ClipVitB32Image",
),
embedding_size: nonzero!(512usize),
},
}
}
}
Expand Down Expand Up @@ -250,7 +265,8 @@ pub enum ModelInput {
#[derive(Debug, Clone)]
pub struct ImageArray {
array: Array<f32, Ix3>,
bytes: Vec<u8>,
image: DynamicImage,
image_format: ImageFormat
}

impl ImageArray {
Expand All @@ -259,10 +275,18 @@ impl ImageArray {
.with_guessed_format()
.map_err(|_| AIProxyError::ImageBytesDecodeError)?;

let img = img_reader
let image_format = &img_reader
.format()
.ok_or(AIProxyError::ImageBytesDecodeError)?;

let image = img_reader
.decode()
.map_err(|_| AIProxyError::ImageBytesDecodeError)?;
let (width, height) = img.dimensions();

// Always convert to RGB8 format
// https://github.com/Anush008/fastembed-rs/blob/cea92b6c8b877efda762393848d1c449a4eea126/src/image_embedding/utils.rs#L198
let image: DynamicImage = image.to_owned().into_rgb8().into();
let (width, height) = image.dimensions();

if width == 0 || height == 0 {
return Err(AIProxyError::ImageNonzeroDimensionError {
Expand All @@ -271,12 +295,13 @@ impl ImageArray {
});
}

let channels = img.color().channel_count();
let shape = (height as usize, width as usize, channels as usize);
let array = Array::from_shape_vec(shape, img.into_bytes())
let channels = &image.color().channel_count();
let shape = (height as usize, width as usize, *channels as usize);
let array = Array::from_shape_vec(shape, image.clone().into_bytes())
.map_err(|_| AIProxyError::ImageBytesDecodeError)?
.mapv(f32::from);
Ok(ImageArray { array, bytes })

Ok(ImageArray { array, image, image_format: image_format.to_owned() })
}

// Swapping axes from [rows, columns, channels] to [channels, rows, columns] for ONNX
Expand All @@ -289,42 +314,42 @@ impl ImageArray {
self.array.view()
}

pub fn get_bytes(&self) -> &Vec<u8> {
&self.bytes
pub fn get_bytes(&self) -> Result<Vec<u8>, AIProxyError> {
let mut buffer = Cursor::new(Vec::new());
let _ = &self.image
.write_to(&mut buffer, self.image_format)
.map_err(|_| AIProxyError::ImageBytesEncodeError)?;
let bytes = buffer.into_inner();
Ok(bytes)
}

pub fn resize(&self, width: NonZeroUsize, height: NonZeroUsize) -> Result<Self, AIProxyError> {
let width = usize::from(width);
let height = usize::from(height);
let img_reader = ImageReader::new(Cursor::new(&self.bytes))
.with_guessed_format()
.map_err(|_| AIProxyError::ImageBytesDecodeError)?;
let img_format = img_reader
.format()
.ok_or(AIProxyError::ImageBytesDecodeError)?;
let original_img = img_reader
.decode()
.map_err(|_| AIProxyError::ImageBytesDecodeError)?;

let resized_img = original_img.resize_exact(
width as u32,
height as u32,
image::imageops::FilterType::Triangle,
pub fn resize(&self, width: u32, height: u32, filter: Option<image::imageops::FilterType>) -> Result<Self, AIProxyError> {
let filter_type = filter.unwrap_or(image::imageops::FilterType::CatmullRom);
let resized_img = self.image.resize_exact(
width,
height,
filter_type,
);
let channels = resized_img.color().channel_count();
let shape = (height, width, channels as usize);

let mut buffer = Cursor::new(Vec::new());
resized_img
.write_to(&mut buffer, img_format)
.map_err(|_| AIProxyError::ImageResizeError)?;
let shape = (height as usize, width as usize, channels as usize);

let flattened_pixels = resized_img.into_bytes();
let flattened_pixels = resized_img.clone().into_bytes();
let array = Array::from_shape_vec(shape, flattened_pixels)
.map_err(|_| AIProxyError::ImageResizeError)?
.mapv(f32::from);
let bytes = buffer.into_inner();
Ok(ImageArray { array, bytes })
Ok(ImageArray { array, image: resized_img, image_format: self.image_format })
}

pub fn crop(&self, x: u32, y: u32, width: u32, height: u32) -> Result<Self, AIProxyError> {
let cropped_img = self.image.crop_imm(x, y, width, height);
let channels = cropped_img.color().channel_count();
let shape = (height as usize, width as usize, channels as usize);

let flattened_pixels = cropped_img.clone().into_bytes();
let array = Array::from_shape_vec(shape, flattened_pixels)
.map_err(|_| AIProxyError::ImageCropError)?
.mapv(f32::from);
Ok(ImageArray { array, image: cropped_img, image_format: self.image_format })
}

pub fn image_dim(&self) -> (NonZeroUsize, NonZeroUsize) {
Expand All @@ -341,7 +366,7 @@ impl Serialize for ImageArray {
where
S: Serializer,
{
serializer.serialize_bytes(self.get_bytes())
serializer.serialize_bytes(&self.get_bytes().map_err(S::Error::custom)?)
}
}

Expand All @@ -351,7 +376,7 @@ impl<'de> Deserialize<'de> for ImageArray {
D: Deserializer<'de>,
{
let bytes: Vec<u8> = Deserialize::deserialize(deserializer)?;
ImageArray::try_new(bytes).map_err(serde::de::Error::custom)
ImageArray::try_new(bytes).map_err(D::Error::custom)
}
}

Expand Down
Loading
Loading