-
-
Notifications
You must be signed in to change notification settings - Fork 877
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Added Orca Mini prompt strategy (#263)
* added Orca Mini prompt strategy * maybe this fixed precommit errors? * pre-commits passing --------- Co-authored-by: Jan Philipp Harries <jpdus@users.noreply.github.com>
- Loading branch information
Showing
1 changed file
with
46 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,46 @@ | ||
""" | ||
Prompt Strategy for finetuning Orca Mini (v2) models | ||
see also https://huggingface.co/psmathur/orca_mini_v2_7b for more information | ||
Use dataset type: orcamini in conig.yml to use this prompt style. | ||
Compared to the alpaca_w_system.open_orca dataset type, | ||
this one specifies the system prompt with "### System:". | ||
Not suited/tested for multiple-turn conversations without further adjustments. | ||
""" | ||
from typing import Generator, Union | ||
|
||
from axolotl.prompt_strategies.alpaca_w_system import OpenOrcaPromptTokenizingStrategy | ||
from axolotl.prompters import AlpacaPrompter | ||
|
||
|
||
class OrcaMiniPrompter(AlpacaPrompter): | ||
"""Adjusted Prompter for Orca Mini (v2) datasets""" | ||
|
||
def match_prompt_style(self): | ||
self.turn_no_input_format = ( | ||
"### System:\n{system}\n\n### User:\n{instruction}\n\n### Response:\n" | ||
) | ||
|
||
def build_prompt_w_system( | ||
self, | ||
system: str, | ||
instruction: str, | ||
output: Union[None, str] = None, | ||
) -> Generator[str, None, None]: | ||
# returns the full prompt from instruction and optional input | ||
# if a label (=response, =output) is provided, it's also appended. | ||
res = self.turn_no_input_format.format(system=system, instruction=instruction) | ||
if output: | ||
res = f"{res}{output}" | ||
yield res | ||
|
||
|
||
def load(tokenizer, cfg): | ||
return OpenOrcaPromptTokenizingStrategy( | ||
OrcaMiniPrompter(), | ||
tokenizer, | ||
cfg.train_on_inputs, | ||
cfg.sequence_len, | ||
) |