Add codellama examples

mudler · Jan 25, 2024 · e02f2f0 · e02f2f0
1 parent ee79fce
commit e02f2f0
Show file tree

Hide file tree

Showing 4 changed files with 35 additions and 3 deletions.
diff --git a/docs/content/docs/getting-started/quickstart.md b/docs/content/docs/getting-started/quickstart.md
@@ -40,7 +40,7 @@ There are different categories of models: [LLMs]({{%relref "docs/features/text-g
 
 {{% alert icon="💡" %}}
 
-To customize the models, see [Model customization]({{%relref "docs/getting-started/customize-model" %}}). For more model configurations, visit the [Examples Section](https://github.com/mudler/LocalAI/tree/master/examples/configurations).
+To customize the models, see [Model customization]({{%relref "docs/getting-started/customize-model" %}}). For more model configurations, visit the [Examples Section](https://github.com/mudler/LocalAI/tree/master/examples/configurations) and the configurations for the models below is available [here](https://github.com/mudler/LocalAI/tree/master/embedded/models).
 {{% /alert %}}
 
 {{< tabs tabTotal="3" >}}
@@ -67,6 +67,7 @@ To customize the models, see [Model customization]({{%relref "docs/getting-start
 | animagine-xl | [Text to Image]({{%relref "docs/features/image-generation" %}}) | GPU-only |
 | transformers-tinyllama | [LLM]({{%relref "docs/features/text-generation" %}}) | GPU-only |
 | [codellama-7b](https://huggingface.co/codellama/CodeLlama-7b-hf) (with transformers) | [LLM]({{%relref "docs/features/text-generation" %}}) | GPU-only |
+| [codellama-7b-gguf](https://huggingface.co/TheBloke/CodeLlama-7B-GGUF) (with llama.cpp) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg-core codellama-7b-gguf``` |
 {{% /tab %}}
 {{% tab tabName="GPU (CUDA 11)" %}}
 
@@ -92,6 +93,7 @@ To customize the models, see [Model customization]({{%relref "docs/getting-start
 | animagine-xl | [Text to Image]({{%relref "docs/features/image-generation" %}}) |  ```docker run -ti -p 8080:8080 -e COMPEL=0 --gpus all localai/localai:{{< version >}}-cublas-cuda11 animagine-xl``` |
 | transformers-tinyllama | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11 transformers-tinyllama``` |
 | [codellama-7b](https://huggingface.co/codellama/CodeLlama-7b-hf) | [LLM]({{%relref "docs/features/text-generation" %}})  | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11 codellama-7b``` |
+| [codellama-7b-gguf](https://huggingface.co/TheBloke/CodeLlama-7B-GGUF) | [LLM]({{%relref "docs/features/text-generation" %}})  | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11-core codellama-7b-gguf``` |
 {{% /tab %}}
 
 
@@ -118,7 +120,7 @@ To customize the models, see [Model customization]({{%relref "docs/getting-start
 | animagine-xl | [Text to Image]({{%relref "docs/features/image-generation" %}}) | GPU-only | ```docker run -ti -p 8080:8080 -e COMPEL=0 --gpus all localai/localai:{{< version >}}-cublas-cuda12 animagine-xl``` |
 | transformers-tinyllama | [LLM]({{%relref "docs/features/text-generation" %}}) | GPU-only | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12 transformers-tinyllama``` |
 | [codellama-7b](https://huggingface.co/codellama/CodeLlama-7b-hf) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12 codellama-7b``` |
-{{% /tab %}}
+| [codellama-7b-gguf](https://huggingface.co/TheBloke/CodeLlama-7B-GGUF) | [LLM]({{%relref "docs/features/text-generation" %}})  | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12-core codellama-7b-gguf``` |
 {{% /tab %}}
 
 {{< /tabs >}}

diff --git a/embedded/models/codellama-7b-gguf.yaml b/embedded/models/codellama-7b-gguf.yaml
@@ -0,0 +1,16 @@
+name: codellama-7b-gguf
+backend: transformers
+parameters:
+  model: huggingface://TheBloke/CodeLlama-7B-GGUF/codellama-7b.Q4_K_M.gguf
+  temperature: 0.2
+  top_k: 40
+  seed: -1
+  top_p: 0.95
+context_size: 4096
+f16: true
+gpu_layers: 90
+usage: |
+      curl http://localhost:8080/v1/completions -H "Content-Type: application/json" -d '{
+          "model": "codellama-7b-gguf",
+          "prompt": "import socket\n\ndef ping_exponential_backoff(host: str):"
+      }'
diff --git a/embedded/models/codellama-7b.yaml b/embedded/models/codellama-7b.yaml
@@ -0,0 +1,14 @@
+name: codellama-7b
+backend: transformers
+parameters:
+  model: codellama/CodeLlama-7b-hf
+  temperature: 0.2
+  top_k: 40
+  seed: -1
+  top_p: 0.95
+
+usage: |
+      curl http://localhost:8080/v1/completions -H "Content-Type: application/json" -d '{
+          "model": "codellama-7b",
+          "prompt": "import socket\n\ndef ping_exponential_backoff(host: str):"
+      }'
diff --git a/embedded/models/dolphin-2.5-mixtral-8x7b.yaml b/embedded/models/dolphin-2.5-mixtral-8x7b.yaml
@@ -1,7 +1,7 @@
 name: dolphin-mixtral-8x7b
 mmap: true
 parameters:
-  model: huggingface://TheBloke/dolphin-2.5-mixtral-8x7b-GGUF/blob/main/dolphin-2.5-mixtral-8x7b.Q2_K.gguf
+  model: huggingface://TheBloke/dolphin-2.5-mixtral-8x7b-GGUF/dolphin-2.5-mixtral-8x7b.Q2_K.gguf
   temperature: 0.2
   top_k: 40
   top_p: 0.95