merge

evilsocket · Dec 4, 2024 · 515d04b · 515d04b
2 parents 1d38e4b + ffa12f3
commit 515d04b
Show file tree

Hide file tree

Showing 3 changed files with 74 additions and 0 deletions.
diff --git a/README.md b/README.md
@@ -38,6 +38,7 @@ Nerve features integrations for any model accessible via the following providers
 | **Nvidia NIM** | `NIM_API_KEY` | `nim://nvidia/nemotron-4-340b-instruct` |
 | **DeepSeek** | `DEEPSEEK_API_KEY` | `deepseek://deepseek-chat` |
 | **xAI** | `XAI_API_KEY` | `xai://grok-beta` |
+| **Mistral.ai** | `MISTRAL_API_KEY` | `mistral://mistral-large-latest` |
 | **Novita** | `NOVITA_API_KEY` | `novita://meta-llama/llama-3.1-70b-instruct` |
 
 ¹ Refer to [this document](https://huggingface.co/blog/tgi-messages-api#using-inference-endpoints-with-openai-client-libraries) for how to configure a custom Huggingface endpoint.

diff --git a/src/agent/generator/mistral.rs b/src/agent/generator/mistral.rs
@@ -0,0 +1,66 @@
+use anyhow::Result;
+use async_trait::async_trait;
+
+use crate::agent::state::SharedState;
+
+use super::{openai::OpenAIClient, ChatOptions, ChatResponse, Client};
+
+pub struct MistralClient {
+    client: OpenAIClient,
+}
+
+#[async_trait]
+impl Client for MistralClient {
+    fn new(_: &str, _: u16, model_name: &str, _: u32) -> anyhow::Result<Self>
+    where
+        Self: Sized,
+    {
+        let client = OpenAIClient::custom(model_name, "MISTRAL_API_KEY", "https://api.mistral.ai/v1/")?;
+
+        Ok(Self { client })
+    }
+
+    async fn check_native_tools_support(&self) -> Result<bool> {
+        self.client.check_native_tools_support().await
+    }
+
+    async fn chat(
+        &self,
+        state: SharedState,
+        options: &ChatOptions,
+    ) -> anyhow::Result<ChatResponse> {
+        let response = self.client.chat(state.clone(), options).await;
+
+        if let Err(error) = &response {
+            if self.check_rate_limit(&error.to_string()).await {
+                return self.chat(state, options).await;                
+            }
+        }
+
+        response
+    }
+
+    async fn check_rate_limit(&self, error: &str) -> bool {
+        // if message contains "Requests rate limit exceeded" return true
+        if error.contains("Requests rate limit exceeded") {
+            let retry_time = std::time::Duration::from_secs(5);
+            log::warn!(
+                "rate limit reached for this model, retrying in {:?} ...",
+                &retry_time,
+            );
+
+            tokio::time::sleep(retry_time).await;
+
+            return true;
+        }
+
+        false
+    }
+}
+
+#[async_trait]
+impl mini_rag::Embedder for MistralClient {
+    async fn embed(&self, text: &str) -> Result<mini_rag::Embeddings> {
+        self.client.embed(text).await
+    }
+}
diff --git a/src/agent/generator/mod.rs b/src/agent/generator/mod.rs
@@ -21,6 +21,7 @@ mod ollama;
 mod openai;
 mod openai_compatible;
 mod xai;
+mod mistral;
 
 pub(crate) mod history;
 mod options;
@@ -216,6 +217,12 @@ macro_rules! factory_body {
                 $model_name,
                 $context_window,
             )?)),
+            "mistral" => Ok(Box::new(mistral::MistralClient::new(
+                $url,
+                $port,
+                $model_name,
+                $context_window,
+            )?)),
             "http" => Ok(Box::new(openai_compatible::OpenAiCompatibleClient::new(
                 $url,
                 $port,