Skip to content

Commit

Permalink
typo of init_method
Browse files Browse the repository at this point in the history
  • Loading branch information
x54-729 committed Jul 20, 2023
1 parent 5ea680a commit 139d142
Show file tree
Hide file tree
Showing 3 changed files with 17 additions and 40 deletions.
4 changes: 2 additions & 2 deletions collie/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ class CollieConfig:
:param use_flash: 是否使用 `FlashAttention <https://github.com/HazyResearch/flash-attention>`_ 。
仅对部分模型有效。
:param dropout: :class:`Dropout` 的概率。仅对部分模型有效。
:param initization_method: 初始化方法。必须是一个接收一个 ``torch.Tensor``
:param init_method: 初始化方法。必须是一个接收一个 ``torch.Tensor``
并返回一个 ``torch.Tensor`` 的可调用对象。
:param low_cpu_mem_usage: 是否在初始化模型时尝试减少 CPU 占用
:param ds_config: **DeepSpeed** 的配置文件。可以是一个路径或字典。
Expand Down Expand Up @@ -139,7 +139,7 @@ class CollieConfig:
"help": "Dropout probability."
}
)
initization_method: Callable = field(
init_method: Callable = field(
default_factory=lambda: torch.nn.init.uniform_,
metadata={
"help": "Initialization method. Possible values are 'none', 'normal', 'xavier_normal', "
Expand Down
50 changes: 14 additions & 36 deletions collie/models/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -184,11 +184,11 @@ def from_config(cls, config: Union[CollieConfig, str], **kwargs):
if param.device == torch.device("meta"):
set_module_tensor_to_device(
module=model, tensor_name=name, device="cpu" if param.device == torch.device("meta") else param.device,
value=config.initization_method(torch.empty((*param.data.size(),),dtype=config.model_config.torch_dtype)),
value=config.init_method(torch.empty((*param.data.size(),),dtype=config.model_config.torch_dtype)),
dtype=config.model_config.torch_dtype
)
else:
param.data = config.initization_method(torch.zeros_like(param.data)).to(config.model_config.torch_dtype).to(param.device)
param.data = config.init_method(torch.zeros_like(param.data)).to(config.model_config.torch_dtype).to(param.device)
if kwargs.get("get_peft", True) and config.peft_config.peft_type is not None:
model = get_peft_model(model, config.peft_config)
model.print_trainable_parameters()
Expand Down Expand Up @@ -530,12 +530,8 @@ def resize_token_embeddings(self, new_num_tokens: Optional[int] = None) -> None:
new_embedding.weight.data[start_pos_new:end_pos_new, :] \
= embedding.weight.data[start_pos_old:end_pos_old, :]
if end_pos_new < (new_num_tokens // env.tp_size):
initization_method = self.collie_config.initization_method
if self.collie_config.initization_method_params is not None:
initization_method = initization_method(new_embedding.weight[end_pos_new:new_num_tokens // env.tp_size, :],
**self.collie_config.initization_method_params)
else:
initization_method(new_embedding.weight[end_pos_new:new_num_tokens // env.tp_size, :])
init_method = self.collie_config.init_method
init_method(new_embedding.weight[end_pos_new:new_num_tokens // env.tp_size, :])
else:
if env.tp_size > 1 and isinstance(new_embedding, tensor_parallel.VocabParallelEmbedding):
weights_list = [embedding.weight.clone() for _ in range(env.tp_size)]
Expand All @@ -544,12 +540,8 @@ def resize_token_embeddings(self, new_num_tokens: Optional[int] = None) -> None:
new_embedding.weight.data[start_pos_new:end_pos_new, :] \
= embedding.weight.data[start_pos_old:end_pos_old, :]
if end_pos_new < (new_num_tokens // env.tp_size):
initization_method = self.collie_config.initization_method
if self.collie_config.initization_method_params is not None:
initization_method = initization_method(new_embedding.weight[end_pos_new:new_num_tokens // env.tp_size, :],
**self.collie_config.initization_method_params)
else:
initization_method(new_embedding.weight[end_pos_new:new_num_tokens // env.tp_size, :])
init_method = self.collie_config.init_method
init_method(new_embedding.weight[end_pos_new:new_num_tokens // env.tp_size, :])
self.set_input_embedding(embedding_name, new_embedding)
if lm_head is not None:
if embedding is not None and id(lm_head.weight) == id(embedding.weight):
Expand Down Expand Up @@ -604,17 +596,10 @@ def resize_token_embeddings(self, new_num_tokens: Optional[int] = None) -> None:
new_lm_head.bias.data[start_pos_new:end_pos_new] \
= lm_head.bias.data[start_pos_old:end_pos_old]
if end_pos_new < (new_num_tokens // env.tp_size):
initization_method = self.collie_config.initization_method
if self.collie_config.initization_method_params is not None:
initization_method = initization_method(new_lm_head.weight[end_pos_new:new_num_tokens // env.tp_size, :],
**self.collie_config.initization_method_params)
if lm_head.bias is not None:
initization_method(new_lm_head.bias[end_pos_new:new_num_tokens // env.tp_size],
**self.collie_config.initization_method_params)
else:
initization_method(new_lm_head.weight[end_pos_new:new_num_tokens // env.tp_size, :])
if lm_head.bias is not None:
initization_method(new_lm_head.bias[end_pos_new:new_num_tokens // env.tp_size])
init_method = self.collie_config.init_method
init_method(new_lm_head.weight[end_pos_new:new_num_tokens // env.tp_size, :])
if lm_head.bias is not None:
init_method(new_lm_head.bias[end_pos_new:new_num_tokens // env.tp_size])
else:
if env.tp_size > 1 and isinstance(new_lm_head, tensor_parallel.ColumnParallelLinear):
weights_list = [lm_head.weight.clone() for _ in range(env.tp_size)]
Expand All @@ -630,17 +615,10 @@ def resize_token_embeddings(self, new_num_tokens: Optional[int] = None) -> None:
new_lm_head.bias.data[start_pos_new:end_pos_new] \
= lm_head.bias.data[start_pos_old:end_pos_old]
if end_pos_new < (new_num_tokens // env.tp_size):
initization_method = self.collie_config.initization_method
if self.collie_config.initization_method_params is not None:
initization_method = initization_method(new_lm_head.weight[end_pos_new:new_num_tokens // env.tp_size, :],
**self.collie_config.initization_method_params)
if lm_head.bias is not None:
initization_method(new_lm_head.bias[end_pos_new:new_num_tokens // env.tp_size],
**self.collie_config.initization_method_params)
else:
initization_method(new_lm_head.weight[end_pos_new:new_num_tokens // env.tp_size, :])
if lm_head.bias is not None:
initization_method(new_lm_head.bias[end_pos_new:new_num_tokens // env.tp_size])
init_method = self.collie_config.init_method
init_method(new_lm_head.weight[end_pos_new:new_num_tokens // env.tp_size, :])
if lm_head.bias is not None:
init_method(new_lm_head.bias[end_pos_new:new_num_tokens // env.tp_size])
self.set_lm_head(lm_head_name, new_lm_head)


Expand Down
3 changes: 1 addition & 2 deletions docs/source/tutorials/collie-tutorial-2-trainer.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -79,8 +79,7 @@
"| checkpointing | 是否使用 activation checkpointing,默认 True |\n",
"| use_flash | 是否使用 flash attention 进行自注意力加速,默认 True | \n",
"| dropout | dropout 大小,浮点数 默认 0.0 | \n",
"| initization_method | 参数初始化方法,可选值 'none'(默认)、'normal'、'kaiming_normal'、'kaiming_uniform'等 | \n",
"| initization_method_params | 指定参数对应初始化方法,字典型 默认 None |\n",
"| init_method | 参数初始化方法,可选值 'none'(默认)、'normal'、'kaiming_normal'、'kaiming_uniform'等 | \n",
"| low_cpu_mem_usage | 是否在模型初始化阶段尝试减少 CPU 占用,默认 True | \n",
"| ds_config | 指定 deepspeed 参数,字典型 或 json文件名,涉及零冗余优化器,详见 [tutorial-3](https://openlmlab-collie.readthedocs.io/zh_CN/latest/tutorials/collie-tutorial-3-parallel.html#3.5-%E2%80%82-零冗余优化器(ZeRO)) | \n",
"| model_config | 指定模型架构相关的配置项,默认 [transformers.PretrainedConfig()](https://huggingface.co/docs/transformers/v4.30.0/en/main_classes/configuration#transformers.PretrainedConfig) | \n",
Expand Down

0 comments on commit 139d142

Please sign in to comment.