Skip to content

Commit

Permalink
Added Orca Mini prompt strategy (#263)
Browse files Browse the repository at this point in the history
* added Orca Mini prompt strategy

* maybe this fixed precommit errors?

* pre-commits passing

---------

Co-authored-by: Jan Philipp Harries <jpdus@users.noreply.github.com>
  • Loading branch information
jphme and Jan Philipp Harries authored Aug 5, 2023
1 parent fe28543 commit c93655c
Showing 1 changed file with 46 additions and 0 deletions.
46 changes: 46 additions & 0 deletions src/axolotl/prompt_strategies/orcamini.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
"""
Prompt Strategy for finetuning Orca Mini (v2) models
see also https://huggingface.co/psmathur/orca_mini_v2_7b for more information
Use dataset type: orcamini in conig.yml to use this prompt style.
Compared to the alpaca_w_system.open_orca dataset type,
this one specifies the system prompt with "### System:".
Not suited/tested for multiple-turn conversations without further adjustments.
"""
from typing import Generator, Union

from axolotl.prompt_strategies.alpaca_w_system import OpenOrcaPromptTokenizingStrategy
from axolotl.prompters import AlpacaPrompter


class OrcaMiniPrompter(AlpacaPrompter):
"""Adjusted Prompter for Orca Mini (v2) datasets"""

def match_prompt_style(self):
self.turn_no_input_format = (
"### System:\n{system}\n\n### User:\n{instruction}\n\n### Response:\n"
)

def build_prompt_w_system(
self,
system: str,
instruction: str,
output: Union[None, str] = None,
) -> Generator[str, None, None]:
# returns the full prompt from instruction and optional input
# if a label (=response, =output) is provided, it's also appended.
res = self.turn_no_input_format.format(system=system, instruction=instruction)
if output:
res = f"{res}{output}"
yield res


def load(tokenizer, cfg):
return OpenOrcaPromptTokenizingStrategy(
OrcaMiniPrompter(),
tokenizer,
cfg.train_on_inputs,
cfg.sequence_len,
)

0 comments on commit c93655c

Please sign in to comment.