feat: add llama 3.1

umbertogriffo · Jul 26, 2024 · cbd9a33 · cbd9a33
1 parent 1ee770a
commit cbd9a33
Show file tree

Hide file tree

Showing 4 changed files with 25 additions and 16 deletions.
diff --git a/README.md b/README.md
@@ -135,18 +135,19 @@ format.
 
 ### Supported Models
 
-| 🤖 Model                                      | Supported | Model Size | Notes and link to the model                                                                                                                                          |
-|-----------------------------------------------|-----------|------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------|
-| `llama-3` Meta Llama 3 Instruct               | ✅         | 8B         | Less accurate than OpenChat - [link](https://huggingface.co/bartowski/Meta-Llama-3-8B-Instruct-GGUF)                                                                 |
-| `openchat-3.6` **Recommended** - OpenChat 3.6 | ✅         | 8B         | [link](https://huggingface.co/bartowski/openchat-3.6-8b-20240522-GGUF)                                                                                               |
-| `openchat-3.5` - OpenChat 3.5                 | ✅         | 7B         | [link](https://huggingface.co/TheBloke/openchat-3.5-0106-GGUF)                                                                                                       |
-| `starling` Starling Beta                      | ✅         | 7B         | Is trained from `Openchat-3.5-0106`. It's recommended if you prefer more verbosity over OpenChat - [link](https://huggingface.co/bartowski/Starling-LM-7B-beta-GGUF) |
-| `neural-beagle` NeuralBeagle14                | ✅         | 7B         | [link](https://huggingface.co/TheBloke/NeuralBeagle14-7B-GGUF)                                                                                                       |
-| `dolphin` Dolphin 2.6 Mistral DPO Laser       | ✅         | 7B         | [link](https://huggingface.co/TheBloke/dolphin-2.6-mistral-7B-dpo-laser-GGUF)                                                                                        |
-| `zephyr` Zephyr Beta                          | ✅         | 7B         | [link](https://huggingface.co/TheBloke/zephyr-7B-beta-GGUF)                                                                                                          |
-| `mistral` Mistral OpenOrca                    | ✅         | 7B         | [link](https://huggingface.co/TheBloke/Mistral-7B-OpenOrca-GGUF)                                                                                                     |
-| `phi-3` Phi-3 Mini 4K Instruct                | ✅         | 3.8B       | [link](https://huggingface.co/bartowski/Phi-3.1-mini-4k-instruct-GGUF)                                                                                               |
-| `stablelm-zephyr` StableLM Zephyr OpenOrca    | ✅         | 3B         | [link](https://huggingface.co/TheBloke/stablelm-zephyr-3b-GGUF)                                                                                                      |
+| 🤖 Model                                   | Supported | Model Size | Notes and link to the model                                                                                                                                          |
+|--------------------------------------------|-----------|------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| `llama-3.1` Meta Llama 3.1 Instruct        | ✅         | 8B         | [link](https://huggingface.co/bartowski/Meta-Llama-3.1-8B-Instruct-GGUF)                                                                                             |
+| `llama-3` Meta Llama 3 Instruct            | ✅         | 8B         | [link](https://huggingface.co/bartowski/Meta-Llama-3-8B-Instruct-GGUF)                                                                                               |
+| `openchat-3.6` - OpenChat 3.6              | ✅         | 8B         | **Recommended model** [link](https://huggingface.co/bartowski/openchat-3.6-8b-20240522-GGUF)                                                                         |
+| `openchat-3.5` - OpenChat 3.5              | ✅         | 7B         | [link](https://huggingface.co/TheBloke/openchat-3.5-0106-GGUF)                                                                                                       |
+| `starling` Starling Beta                   | ✅         | 7B         | Is trained from `Openchat-3.5-0106`. It's recommended if you prefer more verbosity over OpenChat - [link](https://huggingface.co/bartowski/Starling-LM-7B-beta-GGUF) |
+| `neural-beagle` NeuralBeagle14             | ✅         | 7B         | [link](https://huggingface.co/TheBloke/NeuralBeagle14-7B-GGUF)                                                                                                       |
+| `dolphin` Dolphin 2.6 Mistral DPO Laser    | ✅         | 7B         | [link](https://huggingface.co/TheBloke/dolphin-2.6-mistral-7B-dpo-laser-GGUF)                                                                                        |
+| `zephyr` Zephyr Beta                       | ✅         | 7B         | [link](https://huggingface.co/TheBloke/zephyr-7B-beta-GGUF)                                                                                                          |
+| `mistral` Mistral OpenOrca                 | ✅         | 7B         | [link](https://huggingface.co/TheBloke/Mistral-7B-OpenOrca-GGUF)                                                                                                     |
+| `phi-3` Phi-3 Mini 4K Instruct             | ✅         | 3.8B       | [link](https://huggingface.co/bartowski/Phi-3.1-mini-4k-instruct-GGUF)                                                                                               |
+| `stablelm-zephyr` StableLM Zephyr OpenOrca | ✅         | 3B         | [link](https://huggingface.co/TheBloke/stablelm-zephyr-3b-GGUF)                                                                                                      |
 
 ## Supported Response Synthesis strategies
 

diff --git a/chatbot/bot/model/model_settings.py b/chatbot/bot/model/model_settings.py
@@ -1,7 +1,7 @@
 from enum import Enum
 
 from bot.model.settings.dolphin import DolphinSettings
-from bot.model.settings.llama_3 import LlamaThreeSettings
+from bot.model.settings.llama_3 import Llama3Settings, Llama31Settings
 from bot.model.settings.mistral import MistralSettings
 from bot.model.settings.neural_beagle import NeuralBeagleSettings
 from bot.model.settings.openchat import OpenChat35Settings, OpenChat36Settings
@@ -22,6 +22,7 @@ class ModelType(Enum):
     NEURAL_BEAGLE = "neural-beagle"
     PHI_3 = "phi-3"
     LLAMA_3 = "llama-3"
+    LLAMA_3_1 = "llama-3.1"
 
 
 SUPPORTED_MODELS = {
@@ -34,7 +35,8 @@ class ModelType(Enum):
     ModelType.STARLING.value: StarlingSettings,
     ModelType.NEURAL_BEAGLE.value: NeuralBeagleSettings,
     ModelType.PHI_3.value: PhiThreeSettings,
-    ModelType.LLAMA_3.value: LlamaThreeSettings,
+    ModelType.LLAMA_3.value: Llama3Settings,
+    ModelType.LLAMA_3_1.value: Llama31Settings,
 }
 
 

diff --git a/chatbot/bot/model/settings/llama_3.py b/chatbot/bot/model/settings/llama_3.py
@@ -2,7 +2,7 @@
 from bot.model.model import Model
 
 
-class LlamaThreeSettings(Model):
+class Llama3Settings(Model):
     url = "https://huggingface.co/bartowski/Meta-Llama-3-8B-Instruct-GGUF/resolve/main/Meta-Llama-3-8B-Instruct-Q4_K_M.gguf"
     file_name = "Meta-Llama-3-8B-Instruct-Q4_K_M.gguf"
     clients = [LlmClientType.LAMA_CPP]
@@ -66,3 +66,9 @@ class LlamaThreeSettings(Model):
 Please also don't reformulate the follow up question, and write just a concise answer.
 <|eot_id|><|start_header_id|>assistant<|end_header_id|>
 """
+
+
+class Llama31Settings(Llama3Settings):
+    url = "https://huggingface.co/bartowski/Meta-Llama-3.1-8B-Instruct-GGUF/resolve/main/Meta-Llama-3.1-8B-Instruct-Q4_K_M.gguf"
+    file_name = "Meta-Llama-3.1-8B-Instruct-Q4_K_M.gguf"
+    clients = [LlmClientType.LAMA_CPP]
diff --git a/todo.md b/todo.md
@@ -2,7 +2,7 @@
 - Test Flash attention:
   - https://github.com/ggerganov/llama.cpp/pull/5021
   - use LLAMA_CUDA since LLAMA_CUBLAS is deprecated
-- Google Search with LLM
+- Google Search with LLM:
   - https://huggingface.co/blog/nand-tmp/google-search-with-llm
   - https://blog.nextideatech.com/how-to-use-google-search-with-langchain-openai/
   - https://medium.com/@reynxzz/rag-with-gemini-google-search-and-bq-vector-search-for-content-personalization-08fe7dab6b33