Skip to content

Commit

Permalink
fix: fix the prompt templates
Browse files Browse the repository at this point in the history
  • Loading branch information
umbertogriffo committed Jul 29, 2024
1 parent cbd9a33 commit 3c46741
Show file tree
Hide file tree
Showing 10 changed files with 82 additions and 158 deletions.
8 changes: 3 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -138,15 +138,13 @@ format.
| 🤖 Model | Supported | Model Size | Notes and link to the model |
|--------------------------------------------|-----------|------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------|
| `llama-3.1` Meta Llama 3.1 Instruct || 8B | [link](https://huggingface.co/bartowski/Meta-Llama-3.1-8B-Instruct-GGUF) |
| `llama-3` Meta Llama 3 Instruct || 8B | [link](https://huggingface.co/bartowski/Meta-Llama-3-8B-Instruct-GGUF) |
| `openchat-3.6` - OpenChat 3.6 || 8B | **Recommended model** [link](https://huggingface.co/bartowski/openchat-3.6-8b-20240522-GGUF) |
| `openchat-3.5` - OpenChat 3.5 || 7B | [link](https://huggingface.co/TheBloke/openchat-3.5-0106-GGUF) |
| `openchat-3.6` - OpenChat 3.6 || 8B | [link](https://huggingface.co/bartowski/openchat-3.6-8b-20240522-GGUF) |
| `openchat-3.5` - OpenChat 3.5 || 7B | **Recommended model** [link](https://huggingface.co/TheBloke/openchat-3.5-0106-GGUF) |
| `starling` Starling Beta || 7B | Is trained from `Openchat-3.5-0106`. It's recommended if you prefer more verbosity over OpenChat - [link](https://huggingface.co/bartowski/Starling-LM-7B-beta-GGUF) |
| `neural-beagle` NeuralBeagle14 || 7B | [link](https://huggingface.co/TheBloke/NeuralBeagle14-7B-GGUF) |
| `dolphin` Dolphin 2.6 Mistral DPO Laser || 7B | [link](https://huggingface.co/TheBloke/dolphin-2.6-mistral-7B-dpo-laser-GGUF) |
| `zephyr` Zephyr Beta || 7B | [link](https://huggingface.co/TheBloke/zephyr-7B-beta-GGUF) |
| `mistral` Mistral OpenOrca || 7B | [link](https://huggingface.co/TheBloke/Mistral-7B-OpenOrca-GGUF) |
| `phi-3` Phi-3 Mini 4K Instruct || 3.8B | [link](https://huggingface.co/bartowski/Phi-3.1-mini-4k-instruct-GGUF) |
| `phi-3` Phi-3.1 Mini 4K Instruct || 3.8B | Set `max-new-tokens` up to `1024`. Not recommended for RAG. [link](https://huggingface.co/bartowski/Phi-3.1-mini-4k-instruct-GGUF) |
| `stablelm-zephyr` StableLM Zephyr OpenOrca || 3B | [link](https://huggingface.co/TheBloke/stablelm-zephyr-3b-GGUF) |

## Supported Response Synthesis strategies
Expand Down
7 changes: 7 additions & 0 deletions chatbot/bot/conversation/conversation_retrieval.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,9 @@ def refine_question(self, question: str, max_new_tokens: int = 128) -> str:
conversation_awareness_prompt = self.llm.generate_refined_question_conversation_awareness_prompt(
question, chat_history
)

logger.info(f"--- Prompt:\n {conversation_awareness_prompt} \n---")

refined_question = self.llm.generate_answer(conversation_awareness_prompt, max_new_tokens=max_new_tokens)

logger.info(f"--- Refined Question: {refined_question} ---")
Expand Down Expand Up @@ -139,13 +142,17 @@ def answer(self, question: str, max_new_tokens: int = 512) -> Any:
conversation_awareness_prompt = self.llm.generate_refined_answer_conversation_awareness_prompt(
question, chat_history
)

logger.debug(f"--- Prompt:\n {conversation_awareness_prompt} \n---")

streamer = self.llm.start_answer_iterator_streamer(
conversation_awareness_prompt, max_new_tokens=max_new_tokens
)

return streamer
else:
prompt = self.llm.generate_qa_prompt(question=question)
logger.debug(f"--- Prompt:\n {prompt} \n---")
streamer = self.llm.start_answer_iterator_streamer(prompt, max_new_tokens=max_new_tokens)
return streamer

Expand Down
7 changes: 1 addition & 6 deletions chatbot/bot/model/model_settings.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,8 @@
from enum import Enum

from bot.model.settings.dolphin import DolphinSettings
from bot.model.settings.llama_3 import Llama3Settings, Llama31Settings
from bot.model.settings.llama_3 import Llama31Settings
from bot.model.settings.mistral import MistralSettings
from bot.model.settings.neural_beagle import NeuralBeagleSettings
from bot.model.settings.openchat import OpenChat35Settings, OpenChat36Settings
from bot.model.settings.phi_3 import PhiThreeSettings
from bot.model.settings.stablelm_zephyr import StableLMZephyrSettings
Expand All @@ -19,9 +18,7 @@ class ModelType(Enum):
OPENCHAT_3_5 = "openchat-3.5"
OPENCHAT_3_6 = "openchat-3.6"
STARLING = "starling"
NEURAL_BEAGLE = "neural-beagle"
PHI_3 = "phi-3"
LLAMA_3 = "llama-3"
LLAMA_3_1 = "llama-3.1"


Expand All @@ -33,9 +30,7 @@ class ModelType(Enum):
ModelType.OPENCHAT_3_5.value: OpenChat35Settings,
ModelType.OPENCHAT_3_6.value: OpenChat36Settings,
ModelType.STARLING.value: StarlingSettings,
ModelType.NEURAL_BEAGLE.value: NeuralBeagleSettings,
ModelType.PHI_3.value: PhiThreeSettings,
ModelType.LLAMA_3.value: Llama3Settings,
ModelType.LLAMA_3_1.value: Llama31Settings,
}

Expand Down
35 changes: 15 additions & 20 deletions chatbot/bot/model/settings/llama_3.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,21 +2,19 @@
from bot.model.model import Model


class Llama3Settings(Model):
url = "https://huggingface.co/bartowski/Meta-Llama-3-8B-Instruct-GGUF/resolve/main/Meta-Llama-3-8B-Instruct-Q4_K_M.gguf"
file_name = "Meta-Llama-3-8B-Instruct-Q4_K_M.gguf"
class Llama31Settings(Model):
url = "https://huggingface.co/bartowski/Meta-Llama-3.1-8B-Instruct-GGUF/resolve/main/Meta-Llama-3.1-8B-Instruct-Q4_K_M.gguf"
file_name = "Meta-Llama-3.1-8B-Instruct-Q4_K_M.gguf"
clients = [LlmClientType.LAMA_CPP]
config = {
"n_ctx": 4096, # The max sequence length to use - note that longer sequence lengths require much more resources
"n_threads": 8, # The number of CPU threads to use, tailor to your system and the resulting performance
"n_gpu_layers": 50, # The number of layers to offload to GPU, if you have GPU acceleration available
}
config_answer = {"temperature": 0.7, "stop": []}
system_template = (
"<|begin_of_text|><|start_header_id|>system<|end_header_id|>You are a helpful, respectful and "
"honest assistant. <|eot_id|><|start_header_id|>user<|end_header_id|>"
)
qa_prompt_template = """{system}\n
system_template = "You are a helpful, respectful and honest assistant."
qa_prompt_template = """<|start_header_id|>System<|end_header_id|>\n
{system}<|eot_id|><|start_header_id|>user<|end_header_id|>\n
Answer the question below:
{question}<|eot_id|><|start_header_id|>assistant<|end_header_id|>
"""
Expand All @@ -28,7 +26,8 @@ class Llama3Settings(Model):
Given the context information and not prior knowledge, answer the question below:
{question}<|eot_id|><|start_header_id|>assistant<|end_header_id|>
"""
refined_ctx_prompt_template = """{system}\n
refined_ctx_prompt_template = """<|start_header_id|>System<|end_header_id|>\n
{system}<|eot_id|><|start_header_id|>user<|end_header_id|>\n
{question}
We have provided an existing answer: {existing_answer}
We have the opportunity to refine the existing answer
Expand All @@ -40,7 +39,8 @@ class Llama3Settings(Model):
If the context isn't useful, return the original answer.
Refined Answer:<|eot_id|><|start_header_id|>assistant<|end_header_id|>
"""
refined_question_conversation_awareness_prompt_template = """{system}\n
refined_question_conversation_awareness_prompt_template = """<|start_header_id|>System<|end_header_id|>\n
{system}<|eot_id|><|start_header_id|>user<|end_header_id|>\n
Chat History:
---------------------
{chat_history}
Expand All @@ -50,11 +50,12 @@ class Llama3Settings(Model):
Standalone question:<|eot_id|><|start_header_id|>assistant<|end_header_id|>
"""

refined_answer_conversation_awareness_prompt_template = """
refined_answer_conversation_awareness_prompt_template = """<|start_header_id|>System<|end_header_id|>
{system}<|eot_id|><|start_header_id|>user<|end_header_id|>
You are engaging in a conversation with a human participant who is unaware that they might be
interacting with a machine. \n
Your goal is to respond in a way that convincingly simulates human-like intelligence and behavior. \n
The conversation should be natural, coherent, and contextually relevant. \n
interacting with a machine.
Your goal is to respond in a way that convincingly simulates human-like intelligence and behavior.
The conversation should be natural, coherent, and contextually relevant.
Chat History:
---------------------
{chat_history}
Expand All @@ -66,9 +67,3 @@ class Llama3Settings(Model):
Please also don't reformulate the follow up question, and write just a concise answer.
<|eot_id|><|start_header_id|>assistant<|end_header_id|>
"""


class Llama31Settings(Llama3Settings):
url = "https://huggingface.co/bartowski/Meta-Llama-3.1-8B-Instruct-GGUF/resolve/main/Meta-Llama-3.1-8B-Instruct-Q4_K_M.gguf"
file_name = "Meta-Llama-3.1-8B-Instruct-Q4_K_M.gguf"
clients = [LlmClientType.LAMA_CPP]
71 changes: 0 additions & 71 deletions chatbot/bot/model/settings/neural_beagle.py

This file was deleted.

37 changes: 17 additions & 20 deletions chatbot/bot/model/settings/openchat.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,20 +12,20 @@ class OpenChat35Settings(Model):
"n_gpu_layers": 50, # The number of layers to offload to GPU, if you have GPU acceleration available
}
config_answer = {"temperature": 0.7, "stop": []}
system_template = "You are a helpful, respectful and honest assistant. "
qa_prompt_template = """{system}\n
system_template = ""
qa_prompt_template = """
GPT4 Correct User: Answer the question below:
{question}<|end_of_turn|>GPT4 Correct Assistant:
"""
ctx_prompt_template = """{system}\n
ctx_prompt_template = """
GPT4 Correct User: Context information is below.
---------------------
{context}
---------------------
Given the context information and not prior knowledge, answer the question below:
{question}<|end_of_turn|>GPT4 Correct Assistant:
"""
refined_ctx_prompt_template = """{system}\n
refined_ctx_prompt_template = """
GPT4 Correct User: The original query is as follows: {question}
We have provided an existing answer: {existing_answer}
We have the opportunity to refine the existing answer
Expand All @@ -37,7 +37,7 @@ class OpenChat35Settings(Model):
If the context isn't useful, return the original answer.
Refined Answer:<|end_of_turn|>GPT4 Correct Assistant:
"""
refined_question_conversation_awareness_prompt_template = """{system}\n
refined_question_conversation_awareness_prompt_template = """
GPT4 Correct User: Chat History:
---------------------
{chat_history}
Expand All @@ -49,9 +49,9 @@ class OpenChat35Settings(Model):

refined_answer_conversation_awareness_prompt_template = """
GPT4 Correct User: You are engaging in a conversation with a human participant who is unaware that they might be
interacting with a machine. \n
Your goal is to respond in a way that convincingly simulates human-like intelligence and behavior. \n
The conversation should be natural, coherent, and contextually relevant. \n
interacting with a machine.
Your goal is to respond in a way that convincingly simulates human-like intelligence and behavior.
The conversation should be natural, coherent, and contextually relevant.
Chat History:
---------------------
{chat_history}
Expand All @@ -76,23 +76,20 @@ class OpenChat36Settings(Model):
"flash_attn": False, # Use flash attention.
}
config_answer = {"temperature": 0.7, "stop": []}
system_template = (
"<|start_header_id|>system<|end_header_id|>You are a helpful, respectful and "
"honest assistant. <|eot_id|><|start_header_id|>GPT4 Correct User<|end_header_id|>"
)
qa_prompt_template = """{system}\n
system_template = ""
qa_prompt_template = """<|start_header_id|>GPT4 Correct User<|end_header_id|>\n
Answer the question below:
{question}<|eot_id|><|start_header_id|>GPT4 Correct Assistant<|end_header_id|>\n\n
"""
ctx_prompt_template = """{system}\n
ctx_prompt_template = """<|begin_of_text|><|start_header_id|>GPT4 Correct User<|end_header_id|>\n
Context information is below.
---------------------
{context}
---------------------
Given the context information and not prior knowledge, answer the question below:
{question}<|eot_id|><|start_header_id|>GPT4 Correct Assistant<|end_header_id|>\n\n
"""
refined_ctx_prompt_template = """{system}\n
refined_ctx_prompt_template = """<|start_header_id|>GPT4 Correct User<|end_header_id|>\n
The original query is as follows: {question}
We have provided an existing answer: {existing_answer}
We have the opportunity to refine the existing answer
Expand All @@ -104,7 +101,7 @@ class OpenChat36Settings(Model):
If the context isn't useful, return the original answer.
Refined Answer:<|eot_id|><|start_header_id|>GPT4 Correct Assistant<|end_header_id|>\n\n
"""
refined_question_conversation_awareness_prompt_template = """{system}\n
refined_question_conversation_awareness_prompt_template = """<|start_header_id|>GPT4 Correct User<|end_header_id|>\n
Chat History:
---------------------
{chat_history}
Expand All @@ -114,11 +111,11 @@ class OpenChat36Settings(Model):
Standalone question:<|eot_id|><|start_header_id|>GPT4 Correct Assistant<|end_header_id|>\n\n
"""

refined_answer_conversation_awareness_prompt_template = """
refined_answer_conversation_awareness_prompt_template = """<|start_header_id|>GPT4 Correct User<|end_header_id|>\n
You are engaging in a conversation with a human participant who is unaware that they might be
interacting with a machine. \n
Your goal is to respond in a way that convincingly simulates human-like intelligence and behavior. \n
The conversation should be natural, coherent, and contextually relevant. \n
interacting with a machine.
Your goal is to respond in a way that convincingly simulates human-like intelligence and behavior.
The conversation should be natural, coherent, and contextually relevant.
Chat History:
---------------------
{chat_history}
Expand Down
Loading

0 comments on commit 3c46741

Please sign in to comment.