diff --git a/examples/configurations/README.md b/examples/configurations/README.md index 30731e7f4159..ef731d822ccc 100644 --- a/examples/configurations/README.md +++ b/examples/configurations/README.md @@ -64,4 +64,21 @@ wget https://huggingface.co/mys/ggml_bakllava-1/resolve/main/mmproj-model-f16.gg curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{ "model": "llava", "messages": [{"role": "user", "content": [{"type":"text", "text": "What is in the image?"}, {"type": "image_url", "image_url": {"url": "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg" }}], "temperature": 0.9}]}' + +``` + +### Mixtral + +``` +cp -r examples/configuration/mixtral/* models/ +wget https://huggingface.co/TheBloke/Mixtral-8x7B-Instruct-v0.1-GGUF/resolve/main/mixtral-8x7b-instruct-v0.1.Q2_K.gguf -O models/mixtral-8x7b-instruct-v0.1.Q2_K.gguf +``` + +#### Test it out + +``` +curl http://localhost:8080/v1/completions -H "Content-Type: application/json" -d '{ + "model": "mixtral", + "prompt": "How fast is light?", + "temperature": 0.1 }' ``` diff --git a/examples/configurations/mixtral/mixtral b/examples/configurations/mixtral/mixtral new file mode 100644 index 000000000000..88ce5c01337f --- /dev/null +++ b/examples/configurations/mixtral/mixtral @@ -0,0 +1 @@ +[INST] {{.Input}} [/INST] diff --git a/examples/configurations/mixtral/mixtral-chat b/examples/configurations/mixtral/mixtral-chat new file mode 100644 index 000000000000..88ce5c01337f --- /dev/null +++ b/examples/configurations/mixtral/mixtral-chat @@ -0,0 +1 @@ +[INST] {{.Input}} [/INST] diff --git a/examples/configurations/mixtral/mixtral.yaml b/examples/configurations/mixtral/mixtral.yaml new file mode 100755 index 000000000000..9a2d7eed087a --- /dev/null +++ b/examples/configurations/mixtral/mixtral.yaml @@ -0,0 +1,16 @@ +context_size: 512 +f16: true +threads: 11 +gpu_layers: 90 +name: mixtral +mmap: true +parameters: + model: mixtral-8x7b-instruct-v0.1.Q2_K.gguf + temperature: 0.2 + top_k: 40 + top_p: 0.95 + batch: 512 + tfz: 1.0 +template: + chat: mixtral-chat + completion: mixtral