From 4ed8654b40b1a0930dc1ff7f9aa78bb4f5e0847f Mon Sep 17 00:00:00 2001 From: Diretnan Domnan Date: Mon, 16 Dec 2024 13:37:50 +0100 Subject: [PATCH] Adding various GPU execution providers (#173) * Adding various execution providers * Removing windows DirectML provider * Setting gpu execution providers as features * Adding aarch64-linux binaries to fix #170 --- .github/workflows/release.yml | 57 ++++++++++++++++++----- ahnlich/.cargo/config.toml | 5 ++ ahnlich/Cargo.lock | 1 + ahnlich/ai/Cargo.toml | 16 ++++++- ahnlich/ai/src/engine/ai/providers/ort.rs | 16 +++++++ 5 files changed, 82 insertions(+), 13 deletions(-) create mode 100644 ahnlich/.cargo/config.toml diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 61869854..35165a62 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -30,9 +30,9 @@ jobs: echo "BIN_SUFFIX=${value}" >> $GITHUB_OUTPUT echo "BIN_NAME=ahnlich-${value}" >> $GITHUB_OUTPUT - build_linux_binaries_and_publish: + build_linux_x86-64_binaries_and_publish: needs: prebuild_preparation - name: Build Linux Binaries + name: Build Linux x86_64 Binaries runs-on: ubuntu-latest steps: - name: "Checkout" @@ -46,17 +46,42 @@ jobs: - name: Build Linux Release for ${{ needs.prebuild_preparation.outputs.bin_name }} working-directory: ./ahnlich run: | - cargo build --release --bin ${{ needs.prebuild_preparation.outputs.bin_name }} - tar -cvzf linux-${{ needs.prebuild_preparation.outputs.bin_name }}.tar.gz -C target/release ${{ needs.prebuild_preparation.outputs.bin_name }} - gh release upload ${{github.event.release.tag_name}} linux-${{ needs.prebuild_preparation.outputs.bin_name }}.tar.gz + cargo build --release --target x86_64-unknown-linux-gnu --bin ${{ needs.prebuild_preparation.outputs.bin_name }} + tar -cvzf x86_64-linux-${{ needs.prebuild_preparation.outputs.bin_name }}.tar.gz -C target/x86_64-unknown-linux-gnu/release ${{ needs.prebuild_preparation.outputs.bin_name }} + gh release upload ${{github.event.release.tag_name}} x86_64-linux-${{ needs.prebuild_preparation.outputs.bin_name }}.tar.gz env: GITHUB_TOKEN: ${{ secrets.GH_TOKEN }} shell: bash - build_macos_darwin_binaries_and_publish: + build_linux_aarch64_binaries_and_publish: needs: prebuild_preparation - name: Build MacOs Darwin Binaries + name: Build Linux Aarch64 Binaries + runs-on: ubuntu-latest + steps: + - name: "Checkout" + uses: actions/checkout@v4 + + - name: Get Cargo toolchain + uses: actions-rs/toolchain@v1 + with: + toolchain: 1.81.0 + + - name: Build Linux Release for ${{ needs.prebuild_preparation.outputs.bin_name }} + working-directory: ./ahnlich + run: | + rustup target add aarch64-unknown-linux-gnu + cargo build --release --target aarch64-unknown-linux-gnu --bin ${{ needs.prebuild_preparation.outputs.bin_name }} + tar -cvzf aarch64-linux-${{ needs.prebuild_preparation.outputs.bin_name }}.tar.gz -C target/aarch64-unknown-linux-gnu/release ${{ needs.prebuild_preparation.outputs.bin_name }} + gh release upload ${{github.event.release.tag_name}} aarch64-linux-${{ needs.prebuild_preparation.outputs.bin_name }}.tar.gz + + env: + GITHUB_TOKEN: ${{ secrets.GH_TOKEN }} + shell: bash + + build_macos_aarch64_binaries_and_publish: + needs: prebuild_preparation + name: Build MacOs Aarch64 Binaries runs-on: macos-latest outputs: bin_name: ${{ needs.prebuild_preparation.outputs.bin_name }} @@ -68,9 +93,13 @@ jobs: - name: Build Aarch64 Darwin Release for ${{ needs.prebuild_preparation.outputs.bin_name }} working-directory: ./ahnlich run: | - cargo build --release --target aarch64-apple-darwin --bin ${{ needs.prebuild_preparation.outputs.bin_name }} - tar -cvzf aarch64-darwin-${{ needs.prebuild_preparation.outputs.bin_name }}.tar.gz -C target/aarch64-apple-darwin/release ${{ needs.prebuild_preparation.outputs.bin_name }} - gh release upload ${{github.event.release.tag_name}} aarch64-darwin-${{ needs.prebuild_preparation.outputs.bin_name }}.tar.gz + if [ $BIN_NAME == "ahnlich-ai" ]; then + cargo build --features coreml --release --target aarch64-apple-darwin --bin ${{ needs.prebuild_preparation.outputs.bin_name }} + else + cargo build --release --target aarch64-apple-darwin --bin ${{ needs.prebuild_preparation.outputs.bin_name }} + fi + tar -cvzf aarch64-apple-darwin-${{ needs.prebuild_preparation.outputs.bin_name }}.tar.gz -C target/aarch64-apple-darwin/release ${{ needs.prebuild_preparation.outputs.bin_name }} + gh release upload ${{github.event.release.tag_name}} aarch64-apple-darwin-${{ needs.prebuild_preparation.outputs.bin_name }}.tar.gz env: GITHUB_TOKEN: ${{ secrets.GH_TOKEN }} @@ -90,10 +119,14 @@ jobs: with: toolchain: 1.81.0 - - name: Build x86_64 Apple Darwin Release for ${{ needs.prebuild_preparation.outputs.bin_name }} + - name: Build x86_64 Darwin Release for ${{ needs.prebuild_preparation.outputs.bin_name }} working-directory: ./ahnlich run: | - cargo build --release --target x86_64-apple-darwin --bin ${{ needs.prebuild_preparation.outputs.bin_name }} + if [ $BIN_NAME == "ahnlich-ai" ]; then + cargo build --features coreml --release --target x86_64-apple-darwin --bin ${{ needs.prebuild_preparation.outputs.bin_name }} + else + cargo build --release --target x86_64-apple-darwin --bin ${{ needs.prebuild_preparation.outputs.bin_name }} + fi tar -cvzf x86_64-apple-darwin-${{ needs.prebuild_preparation.outputs.bin_name }}.tar.gz -C target/x86_64-apple-darwin/release ${{ needs.prebuild_preparation.outputs.bin_name }} gh release upload ${{github.event.release.tag_name}} x86_64-apple-darwin-${{ needs.prebuild_preparation.outputs.bin_name }}.tar.gz diff --git a/ahnlich/.cargo/config.toml b/ahnlich/.cargo/config.toml new file mode 100644 index 00000000..2a61cb0e --- /dev/null +++ b/ahnlich/.cargo/config.toml @@ -0,0 +1,5 @@ +[target.aarch64-apple-darwin] +rustflags = ["-Clink-arg=-fapple-link-rtlib"] + +[target.x86_64-apple-darwin] +rustflags = ["-Clink-arg=-fapple-link-rtlib"] diff --git a/ahnlich/Cargo.lock b/ahnlich/Cargo.lock index 0065fa72..57b9c755 100644 --- a/ahnlich/Cargo.lock +++ b/ahnlich/Cargo.lock @@ -108,6 +108,7 @@ dependencies = [ "nonzero_ext", "once_cell", "ort", + "ort-sys", "pretty_assertions", "rayon", "serde", diff --git a/ahnlich/ai/Cargo.toml b/ahnlich/ai/Cargo.toml index 6ec6fadb..e822d9ed 100644 --- a/ahnlich/ai/Cargo.toml +++ b/ahnlich/ai/Cargo.toml @@ -40,13 +40,27 @@ fallible_collections.workspace = true rayon.workspace = true hf-hub = { version = "0.3", default-features = false } dirs = "5.0.1" -ort = { version = "=2.0.0-rc.5", features = ["ndarray"] } +ort = { version = "=2.0.0-rc.5", features = [ + "ndarray", +] } +ort-sys = "=2.0.0-rc.8" moka = { version = "0.12.8", features = ["future"] } tracing-opentelemetry.workspace = true futures.workspace = true tiktoken-rs = "0.5.9" itertools.workspace = true tokenizers = { version = "0.20.1", features = ["hf-hub"] } + +[features] +# ORT Execution providers +default = ["tensorrt", "cuda"] +tensorrt = ["ort/tensorrt"] +cuda = ["ort/cuda"] +# activate only on apple devices +coreml = ["ort/coreml"] +# activate only on windows devices +directml = ["ort/directml"] + [dev-dependencies] db = { path = "../db", version = "*" } pretty_assertions.workspace = true diff --git a/ahnlich/ai/src/engine/ai/providers/ort.rs b/ahnlich/ai/src/engine/ai/providers/ort.rs index e75300b9..f2d3cd7b 100644 --- a/ahnlich/ai/src/engine/ai/providers/ort.rs +++ b/ahnlich/ai/src/engine/ai/providers/ort.rs @@ -5,6 +5,10 @@ use crate::error::AIProxyError; use fallible_collections::FallibleVec; use hf_hub::{api::sync::ApiBuilder, Cache}; use itertools::Itertools; +use ort::{ + CUDAExecutionProvider, CoreMLExecutionProvider, DirectMLExecutionProvider, + TensorRTExecutionProvider, +}; use ort::{Session, SessionOutputs, Value}; use rayon::prelude::*; @@ -350,6 +354,18 @@ impl ProviderTrait for ORTProvider { } fn load_model(&mut self) -> Result<(), AIProxyError> { + ort::init() + .with_execution_providers([ + // Prefer TensorRT over CUDA. + TensorRTExecutionProvider::default().build(), + CUDAExecutionProvider::default().build(), + // Use DirectML on Windows if NVIDIA EPs are not available + DirectMLExecutionProvider::default().build(), + // Or use ANE on Apple platforms + CoreMLExecutionProvider::default().build(), + ]) + .commit()?; + let Some(cache_location) = self.cache_location.clone() else { return Err(AIProxyError::CacheLocationNotInitiailized); };