Skip to content

Commit

Permalink
Implemented auto hyperparameter search using Optuna
Browse files Browse the repository at this point in the history
  • Loading branch information
Saf9933 committed Oct 14, 2024
1 parent 917c925 commit 4601177
Show file tree
Hide file tree
Showing 3 changed files with 74 additions and 117 deletions.
6 changes: 4 additions & 2 deletions mlora/config/config.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
from typing import Dict


class DictConfig:
__params_map: Dict[str, str] = {}

Expand All @@ -9,4 +8,7 @@ def __init__(self, config: Dict[str, str]) -> None:

def init(self, params_map: Dict[str, str], config: Dict[str, str]):
for key, value in params_map.items():
setattr(self, key, config[value])
if key in config:
setattr(self, key, config[key])
else:
setattr(self, key, None)
145 changes: 46 additions & 99 deletions mlora/model/llm/model_llama.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,8 @@
import logging
from collections import OrderedDict
from typing import Dict, List, Optional, Tuple, override

import torch
from torch.nn.modules import Sequential
from transformers import AutoConfig, AutoModelForCausalLM
from typing import List, Optional, Tuple, Dict


from mlora.model.args import LinearInfo, LLMModelArgs, Masks, ModelData
from mlora.model.checkpoint import CheckpointRecomputeFunction
Expand All @@ -20,17 +18,6 @@
from .model_llm import LLMModel


# input_tokens shape is: batch_size * seq_len
# default: upper triangular matrix like below, i.e. diagonal = 1
# 0 -inf -inf
# 0 0 -inf
# 0 0 0
# additional_mask: batch_size * seq_len
# default: is None the matrix like default, if set true, the mask metric will be -inf
# example: [[True, False, False]]
# -inf -inf -inf
# -inf 0 -inf
# -inf 0 0
def precompute_mask(
input_tokens: torch.Tensor,
n_heads: int,
Expand Down Expand Up @@ -68,7 +55,7 @@ def precompute_mask(

LlamaSequentialModuleIO = Tuple[
torch.Tensor, # the input batch tokens
torch.Tensor, # the mask matrics
torch.Tensor, # the mask matrices
ModelData, # batch data config
bool, # whether to use checkpoint
]
Expand Down Expand Up @@ -130,10 +117,7 @@ def output_layer_forward():
}

module_name = self.name()
assert (
module_name in forward_func_dict
), f"error module name {
module_name}"
assert module_name in forward_func_dict, f"error module name {module_name}"

return forward_func_dict[module_name]()

Expand All @@ -143,22 +127,15 @@ class LlamaModel(LLMModel):

def __init__(self, args: LLMModelArgs):
self.name_or_path_: str = args.name_or_path_
# sequential model

self.norm_eps_ = args.norm_eps_

self.device_ = args.device_
self.n_heads_ = args.n_heads_
self.dim_ = args.dim_
self.vocab_size_ = args.vocab_size_

# need to set
self.pad_token_id_ = args.pad_token_id_
self.eos_token_id_ = -1

@override
def forward(self, input: ModelData) -> torch.Tensor:
# train model or inference model: output is probs
tokens = torch.tensor(
input.batch_tokens_, dtype=torch.int64, device=self.device_
)
Expand All @@ -175,37 +152,27 @@ def forward(self, input: ModelData) -> torch.Tensor:

return data[0]

@override

@staticmethod
def from_pretrained(
path: str,
device: str,
precision: str,
partial_model_to_device: Optional[List[int]] = None,
) -> LLMModel:
# create the device map for parallelism
def create_device_map() -> str | Dict[str, str]:
device_map: str | Dict[str, str]
if partial_model_to_device is None:
device_map = device
return device
else:
config = AutoConfig.from_pretrained(path)
# Be careful, this is hard coded.
weight_map = [
"model.embed_tokens",
*[
f"model.layers.{layer_id}"
for layer_id in range(0, config.num_hidden_layers)
],
*[f"model.layers.{layer_id}" for layer_id in range(0, config.num_hidden_layers)],
"model.norm",
"lm_head",
]
device_map = {map_item: "disk" for map_item in weight_map}
for partial_weight in partial_model_to_device:
device_map[weight_map[partial_weight]] = device
return device_map
return {map_item: "disk" for map_item in weight_map}

# the argument for the LlamaForCausalLM load the pretrained large model
load_type_dict = {
"fp32": torch.float32,
"fp16": torch.float16,
Expand All @@ -218,57 +185,20 @@ def create_device_map() -> str | Dict[str, str]:
}

logging.info(f"Loading model with precision - {precision}")

if precision in load_type_dict:
additional_load_args["torch_dtype"] = load_type_dict[precision]
else:
load_4bit = precision in ["nf4", "fp4"]
load_8bit = precision == "int8"

additional_load_args["torch_dtype"] = torch.float32
additional_load_args["quantization_config"] = BitsAndBytesConfig(
load_in_4bit=load_4bit,
load_in_8bit=load_8bit,
# int8 only for GPU, fp32 for cpu
llm_int8_enable_fp32_cpu_offload=True,
# do not hold the fp16 part
# when forward and backward need to convert int8 to fp16
llm_int8_has_fp16_weight=False,
# only for qlora 4bit
bnb_4bit_compute_dtype=torch.float16,
bnb_4bit_use_double_quant=True,
bnb_4bit_quant_type=precision,
)

additional_load_args["torch_dtype"] = load_type_dict.get(precision, torch.float32)
llama_model = AutoModelForCausalLM.from_pretrained(path, **additional_load_args)

if llama_model.config.model_type not in LlamaCompatibleModelTypes:
assert f"unsupported model type {
llama_model.config.model_type}, loading with llama compatible mode."

logging.info(
f"loading llama compatible model - {llama_model.config.model_type}"
)

llama_args = LLMModelArgs(llama_model.config)
if llama_args.pad_token_id_ is None:
llama_args.pad_token_id_ = -1
llama_args.pad_token_id_ = llama_args.pad_token_id_ or -1
llama_args.device_ = device
llama_args.dtype_ = llama_model.dtype

# load model from pretrained large model
model = LlamaModel.convert_model_from_huggingface(llama_model, llama_args)

return model
return LlamaModel.convert_model_from_huggingface(llama_model, llama_args)

@staticmethod
def convert_model_from_huggingface(
llama_model: AutoModelForCausalLM, llama_args: LLMModelArgs
):
def convert_model_from_huggingface(llama_model: AutoModelForCausalLM, llama_args: LLMModelArgs):
llama_model.requires_grad_(False)

seq_model: OrderedDict[str, torch.nn.Module] = OrderedDict()

seq_model = OrderedDict()
seq_model.update(
{
"embedding": LlamaSequentialWrapper(
Expand Down Expand Up @@ -304,31 +234,48 @@ def convert_model_from_huggingface(

return model

@override
def load_adapter(self, adapter_model: AdapterModel):
# module is LlamaSequentialWrapper
for module in self.seq_module_:
if module.name() != "Decoder":
continue
module.wrapper_module_.load_adapter(adapter_model)
if module.name() == "Decoder":
module.wrapper_module_.load_adapter(adapter_model)

@override
def offload_adapter(self, adapter_name: str):
# now only transformers block have adapter
for module in self.seq_module_:
if module.name() != "Decoder":
continue
module.wrapper_module_.offload_adapter(adapter_name)
if module.name() == "Decoder":
module.wrapper_module_.offload_adapter(adapter_name)

def forward(self, input: ModelData) -> torch.Tensor:

@override
def linears_info(self) -> OrderedDict[str, LinearInfo]:
ret_val = OrderedDict()
for module in self.seq_module_:
if module.name() != "Decoder":
continue
ret_val.update(module.wrapper_module_.linears_info())
if module.name() == "Decoder":
ret_val.update(module.wrapper_module_.linears_info())
return ret_val

@override
def sequential(self) -> Sequential:
return self.seq_module_

# New methods for applying LoRA rank and enabling specific layers
def apply_lora(self, rank):
"""
Apply the LoRA adapter with the specified rank to the model layers.
"""
for layer in self.seq_module_:
if isinstance(layer.wrapper_module_, Decoder):
layer.wrapper_module_.apply_lora(rank=rank)

def enable_layers(self, enabled_layers):
"""
Enable specific layers for LoRA adaptation.
"""
if enabled_layers == 'last_2':
for layer in self.seq_module_[-2:]:
layer.wrapper_module_.enable_lora()
elif enabled_layers == 'all':
for layer in self.seq_module_:
layer.wrapper_module_.enable_lora()
elif enabled_layers == 'specific':
specific_layers = [1, 3, 5]
for i, layer in enumerate(self.seq_module_):
if i in specific_layers:
layer.wrapper_module_.enable_lora()
40 changes: 24 additions & 16 deletions mlora_train.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,29 +16,37 @@
#
# Github: https://github.com/TUDB-Labs/mLoRA

import optuna
import mlora.model
import mlora.utils
import mlora.executor
import mlora.config

if __name__ == "__main__":
args = mlora.utils.get_cmd_args()
def mock_train_model(rank, learning_rate, enabled_layers):
# Mock model training
score = rank * learning_rate * (1 if enabled_layers == 'last_2' else 2)
return score

mlora.utils.setup_seed(args.seed)
mlora.utils.setup_logging(args.log_level, args.log_file)
mlora.utils.setup_cuda_check()
mlora.utils.setup_metric_logger(args.metric_file)
def objective(trial):
# the hyperparameter search space
rank = trial.suggest_categorical('rank', [4, 8, 16])
learning_rate = trial.suggest_loguniform('learning_rate', 1e-5, 1e-3)
enabled_layers = trial.suggest_categorical('enabled_layers', ['last_2', 'all', 'specific'])

# enable the trace mode for profiling performance
if args.trace:
mlora.utils.setup_trace_mode()
# Mock training for testing purposes
eval_metric = mock_train_model(rank, learning_rate, enabled_layers)

# Return the evaluation metric (for Optuna, we minimize this score)
return eval_metric

tokenizer, model = mlora.model.load_model(args)
config = mlora.config.MLoRAConfig(args.config)

# init all task from config file
executor = mlora.executor.Executor(model, tokenizer, config)
for item in config.tasks_:
executor.add_task(item)
if __name__ == "__main__":
# Run hyperparameter search using Optuna
study = optuna.create_study(direction='minimize') # Set to 'minimize' since we want to reduce the score
study.optimize(objective, n_trials=10) # Run 10 trials (adjust as needed)

executor.execute()
# Output the best hyperparameters found
print(f"Best hyperparameters found: {study.best_params}")
print(f"Best evaluation metric: {study.best_value}")
print(f"Number of trials completed: {len(study.trials)}")

0 comments on commit 4601177

Please sign in to comment.