Skip to content
This repository has been archived by the owner on Oct 25, 2024. It is now read-only.

Refined NeuralChat finetuning config #222

Merged
merged 3 commits into from
Sep 1, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions intel_extension_for_transformers/llm/finetuning/finetuning.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,15 +54,15 @@
import importlib.util
from transformers.utils.import_utils import is_optimum_available
from .data_utils import preprocess_dataset, ALPACA_PROMPT_DICT
from intel_extension_for_transformers.neural_chat.config import FinetuningConfig
from intel_extension_for_transformers.neural_chat.config import BaseFinetuningConfig


def is_optimum_habana_available():
return is_optimum_available() and importlib.util.find_spec("optimum.habana") != None


class Finetuning:
def __init__(self, finetuning_config: FinetuningConfig):
def __init__(self, finetuning_config: BaseFinetuningConfig):
self.model_args, self.data_args, self.training_args, self.finetune_args = (
finetuning_config.model_args,
finetuning_config.data_args,
Expand Down
77 changes: 75 additions & 2 deletions intel_extension_for_transformers/neural_chat/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,26 @@ We provide multiple plugins to augment the chatbot on top of LLM inference. Our

Finetune the pretrained large language model (LLM) with the instruction-following dataset for creating the customized chatbot is very easy for NeuralChat.

### Finetuning for Text Generation Task

**command line experience**

```shell
neuralchat finetune --base_model "meta-llama/Llama-2-7b-chat-hf" --config pipeline/finetuning/config/finetuning.yaml
```


**Python API experience**

```python
>>> from intel_extension_for_transformers.neural_chat import TextGenerationFinetuningConfig
>>> from intel_extension_for_transformers.neural_chat import finetune_model
>>> finetune_cfg = TextGenerationFinetuningConfig()
>>> finetuned_model = finetune_model(finetune_cfg)
```

### Finetuning for Summarization Task

**command line experience**

```shell
Expand All @@ -132,12 +152,65 @@ neuralchat finetune --base_model "meta-llama/Llama-2-7b-chat-hf" --config pipeli
**Python API experience**

```python
>>> from intel_extension_for_transformers.neural_chat import FinetuningConfig
>>> from intel_extension_for_transformers.neural_chat import SummarizationFinetuningConfig
>>> from intel_extension_for_transformers.neural_chat import finetune_model
>>> finetune_cfg = FinetuningConfig()
>>> finetune_cfg = SummarizationFinetuningConfig()
>>> finetuned_model = finetune_model(finetune_cfg)
```

### Finetuning for Code Generation Task

**command line experience**

```shell
neuralchat finetune --base_model "meta-llama/Llama-2-7b-chat-hf" --config pipeline/finetuning/config/finetuning.yaml
```


**Python API experience**

```python
>>> from intel_extension_for_transformers.neural_chat import CodeGenerationFinetuningConfig
>>> from intel_extension_for_transformers.neural_chat import finetune_model
>>> finetune_cfg = CodeGenerationFinetuningConfig()
>>> finetuned_model = finetune_model(finetune_cfg)
```

### Finetuning for Text-to-Speech(TTS) Task

**command line experience**

```shell
neuralchat finetune --base_model "meta-llama/Llama-2-7b-chat-hf" --config pipeline/finetuning/config/finetuning.yaml
```


**Python API experience**

```python
>>> from intel_extension_for_transformers.neural_chat import TTSFinetuningConfig
>>> from intel_extension_for_transformers.neural_chat import finetune_model
>>> finetune_cfg = TTSFinetuningConfig()
>>> finetuned_model = finetune_model(finetune_cfg)
```

### Inference with Finetuned Model

By default, Parameter-Efficient Fine-Tuning (PEFT) methods are used to accelerate the finetuning process, and to reduce the finetuning cost as well. Below shows the way to load the finetuned model of such and inference with it.

**Python API experience**

```python
>>> from intel_extension_for_transformers.neural_chat import build_chatbot
>>> from intel_extension_for_transformers.neural_chat.config import PipelineConfig, LoadingModelConfig
>>> chatbot = build_chatbot(
PipelineConfig(
loading_config=LoadingModelConfig(peft_path="/path/to/peft_model")
)
)
>>> response = chatbot.predict("Tell me about Intel Xeon Scalable Processors.")
```

## Quantization

NeuralChat provides three quantization approaches respectively (PostTrainingDynamic, PostTrainingStatic, QuantAwareTraining) based on [Intel® Neural Compressor](https://github.com/intel/neural-compressor).
Expand Down
7 changes: 6 additions & 1 deletion intel_extension_for_transformers/neural_chat/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,12 @@

from .config import PipelineConfig
from .config import GenerationConfig
from .config import FinetuningConfig
from .config import (
TextGenerationFinetuningConfig,
SummarizationFinetuningConfig,
CodeGenerationFinetuningConfig,
TTSFinetuningConfig
)
from .config import OptimizationConfig
from .chatbot import build_chatbot
from .chatbot import finetune_model
Expand Down
8 changes: 4 additions & 4 deletions intel_extension_for_transformers/neural_chat/chatbot.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
from intel_extension_for_transformers.llm.quantization.optimization import Optimization
from .config import PipelineConfig
from .config import OptimizationConfig
from .config import FinetuningConfig
from .config import BaseFinetuningConfig
from .plugins import is_plugin_enabled, get_plugin_instance, get_registered_plugins
from .config import DeviceOptions
from .models.base_model import get_model_adapter
Expand Down Expand Up @@ -89,14 +89,14 @@ def build_chatbot(config: PipelineConfig=None):

return adapter

def finetune_model(config: FinetuningConfig):
def finetune_model(config: BaseFinetuningConfig):
"""Finetune the model based on the provided configuration.

Args:
config (FinetuningConfig): Configuration for finetuning the model.
config (BaseFinetuningConfig): Configuration for finetuning the model.
"""

assert config is not None, "FinetuningConfig is needed for finetuning."
assert config is not None, "BaseFinetuningConfig is needed for finetuning."
finetuning = Finetuning(config)
finetuning.finetune()

Expand Down
26 changes: 25 additions & 1 deletion intel_extension_for_transformers/neural_chat/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -312,12 +312,36 @@ class FinetuningArguments:
)

@dataclass
class FinetuningConfig:
class TTSDatasetArguments:
audio_paths: Optional[str] = field(default=None, metadata={"help": "The path of audios."})
gender: Optional[str] = field(default=None, metadata={"help": "Gender."})
language: Optional[str] = field(default="English", metadata={"help": "Language."})

@dataclass
class TTSModelArguments:
step: int = field(default=0, metadata={"help": "TTS model step."})
warmup_step: int = field(default=0, metadata={"help": "TTS model warmup step."})
learning_rate: float = field(default=5e-5, metadata={"help": "Learning rate."})

@dataclass
class BaseFinetuningConfig:
model_args: ModelArguments
data_args: DataArguments
training_args: TrainingArguments
finetune_args: FinetuningArguments

TextGenerationFinetuningConfig = BaseFinetuningConfig

SummarizationFinetuningConfig = BaseFinetuningConfig

CodeGenerationFinetuningConfig = BaseFinetuningConfig

@dataclass
class TTSFinetuningConfig(BaseFinetuningConfig):
training_args: TrainingArguments
dataset_args: TTSDatasetArguments
model_args: TTSModelArguments

@dataclass
class GenerationConfig:
device: str = "cpu"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,10 @@
ModelArguments,
DataArguments,
FinetuningArguments,
FinetuningConfig,
TextGenerationFinetuningConfig,
)
from intel_extension_for_transformers.neural_chat.chatbot import finetune_model
from intel_extension_for_transformers.neural_chat.pipeline.finetuning.finetuning import is_optimum_habana_available
from intel_extension_for_transformers.llm.finetuning.finetuning import is_optimum_habana_available

def main():
# See all possible arguments in src/transformers/training_args.py
Expand Down Expand Up @@ -56,7 +56,7 @@ def main():
finetune_args,
) = parser.parse_args_into_dataclasses()

finetune_cfg = FinetuningConfig(
finetune_cfg = TextGenerationFinetuningConfig(
model_args=model_args,
data_args=data_args,
training_args=training_args,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
ModelArguments,
DataArguments,
FinetuningArguments,
FinetuningConfig,
TextGenerationFinetuningConfig,
)
from intel_extension_for_transformers.neural_chat.chatbot import finetune_model

Expand All @@ -48,7 +48,7 @@ def main():
finetune_args,
) = parser.parse_args_into_dataclasses()

finetune_cfg = FinetuningConfig(
finetune_cfg = TextGenerationFinetuningConfig(
model_args=model_args,
data_args=data_args,
training_args=training_args,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
ModelArguments,
DataArguments,
FinetuningArguments,
FinetuningConfig,
TextGenerationFinetuningConfig,
)
from intel_extension_for_transformers.neural_chat.chatbot import finetune_model

Expand Down Expand Up @@ -57,7 +57,7 @@ def test_finetune_clm(self):
overwrite_output_dir=True
)
finetune_args = FinetuningArguments()
finetune_cfg = FinetuningConfig(
finetune_cfg = TextGenerationFinetuningConfig(
model_args=model_args,
data_args=data_args,
training_args=training_args,
Expand All @@ -75,7 +75,7 @@ def test_finetune_seq2seq(self):
overwrite_output_dir=True
)
finetune_args = FinetuningArguments()
finetune_cfg = FinetuningConfig(
finetune_cfg = TextGenerationFinetuningConfig(
model_args=model_args,
data_args=data_args,
training_args=training_args,
Expand Down