Skip to content

Commit

Permalink
Add lazy init for export (#10613)
Browse files Browse the repository at this point in the history
* Add lazy init for export

Signed-off-by: Alexandros Koumparoulis <akoumparouli@nvidia.com>

* Apply isort and black reformatting

Signed-off-by: akoumpa <akoumpa@users.noreply.github.com>

---------

Signed-off-by: Alexandros Koumparoulis <akoumparouli@nvidia.com>
Signed-off-by: akoumpa <akoumpa@users.noreply.github.com>
Co-authored-by: akoumpa <akoumpa@users.noreply.github.com>
  • Loading branch information
akoumpa and akoumpa committed Sep 27, 2024
1 parent a725511 commit fdaf607
Show file tree
Hide file tree
Showing 10 changed files with 31 additions and 10 deletions.
4 changes: 3 additions & 1 deletion nemo/collections/llm/gpt/model/baichuan.py
Original file line number Diff line number Diff line change
Expand Up @@ -144,8 +144,10 @@ def make_vocab_size_divisible_by(vocab_size):
class HFBaichuan2Exporter(io.ModelConnector[Baichuan2Model, "AutoModelForCausalLM"]):
def init(self) -> "AutoModelForCausalLM":
from transformers import AutoModelForCausalLM
from transformers.modeling_utils import no_init_weights

return AutoModelForCausalLM.from_config(self.config, trust_remote_code=True)
with no_init_weights(True):
return AutoModelForCausalLM.from_config(self.config, trust_remote_code=True)

def apply(self, output_path: Path) -> Path:
target = self.init()
Expand Down
4 changes: 3 additions & 1 deletion nemo/collections/llm/gpt/model/chatglm.py
Original file line number Diff line number Diff line change
Expand Up @@ -141,8 +141,10 @@ def config(self) -> ChatGLMConfig:
class HFChatGLMExporter(io.ModelConnector[ChatGLMModel, "AutoModelForCausalLM"]):
def init(self) -> "AutoModelForCausalLM":
from transformers import AutoModelForCausalLM
from transformers.modeling_utils import no_init_weights

return AutoModelForCausalLM.from_config(self.config, trust_remote_code=True)
with no_init_weights(True):
return AutoModelForCausalLM.from_config(self.config, trust_remote_code=True)

def apply(self, output_path: Path) -> Path:
target = self.init()
Expand Down
4 changes: 3 additions & 1 deletion nemo/collections/llm/gpt/model/gemma.py
Original file line number Diff line number Diff line change
Expand Up @@ -173,8 +173,10 @@ def make_vocab_size_divisible_by(vocab_size):
class HFGemmaExporter(io.ModelConnector[GemmaModel, "GemmaForCausalLM"]):
def init(self) -> "GemmaForCausalLM":
from transformers import AutoModelForCausalLM
from transformers.modeling_utils import no_init_weights

return AutoModelForCausalLM.from_config(self.config)
with no_init_weights(True):
return AutoModelForCausalLM.from_config(self.config)

def apply(self, output_path: Path) -> Path:
target = self.init()
Expand Down
4 changes: 3 additions & 1 deletion nemo/collections/llm/gpt/model/llama.py
Original file line number Diff line number Diff line change
Expand Up @@ -291,8 +291,10 @@ def make_vocab_size_divisible_by(vocab_size):
class HFLlamaExporter(io.ModelConnector[LlamaModel, "LlamaForCausalLM"]):
def init(self) -> "LlamaForCausalLM":
from transformers import AutoModelForCausalLM
from transformers.modeling_utils import no_init_weights

return AutoModelForCausalLM.from_config(self.config)
with no_init_weights(True):
return AutoModelForCausalLM.from_config(self.config)

def apply(self, output_path: Path) -> Path:
target = self.init()
Expand Down
4 changes: 3 additions & 1 deletion nemo/collections/llm/gpt/model/mistral.py
Original file line number Diff line number Diff line change
Expand Up @@ -188,8 +188,10 @@ def make_vocab_size_divisible_by(mistral_vocab_size):
class HFMistralExporter(io.ModelConnector[MistralModel, "MistralForCausalLM"]):
def init(self) -> "MistralForCausalLM":
from transformers import AutoModelForCausalLM
from transformers.modeling_utils import no_init_weights

return AutoModelForCausalLM.from_config(self.config)
with no_init_weights(True):
return AutoModelForCausalLM.from_config(self.config)

def apply(self, output_path: Path) -> Path:
# TODO: Make it work with lazy init
Expand Down
4 changes: 3 additions & 1 deletion nemo/collections/llm/gpt/model/mixtral.py
Original file line number Diff line number Diff line change
Expand Up @@ -268,8 +268,10 @@ def _import_moe_w1_w3(gate_proj, up_proj):
class HFMixtralExporter(io.ModelConnector[MixtralModel, "MixtralForCausalLM"]):
def init(self) -> "MixtralForCausalLM":
from transformers import AutoModelForCausalLM
from transformers.modeling_utils import no_init_weights

return AutoModelForCausalLM.from_config(self.config)
with no_init_weights(True):
return AutoModelForCausalLM.from_config(self.config)

def apply(self, output_path: Path) -> Path:
# TODO: Make it work with lazy init
Expand Down
5 changes: 4 additions & 1 deletion nemo/collections/llm/gpt/model/nemotron.py
Original file line number Diff line number Diff line change
Expand Up @@ -212,7 +212,10 @@ def make_vocab_size_divisible_by(vocab_size):
@io.model_exporter(NemotronModel, "hf")
class HFNemotronExporter(io.ModelConnector[NemotronModel, "NemotronForCausalLM"]):
def init(self) -> "NemotronForCausalLM":
return NemotronForCausalLM.from_config(self.config)
from transformers.modeling_utils import no_init_weights

with no_init_weights(True):
return NemotronForCausalLM.from_config(self.config)

def apply(self, output_path: Path) -> Path:
target = self.init()
Expand Down
4 changes: 3 additions & 1 deletion nemo/collections/llm/gpt/model/qwen2.py
Original file line number Diff line number Diff line change
Expand Up @@ -174,8 +174,10 @@ def config(self) -> Qwen2Config:
class HFQwen2Exporter(io.ModelConnector[Qwen2Model, "AutoModelForCausalLM"]):
def init(self) -> "AutoModelForCausalLM":
from transformers import AutoModelForCausalLM
from transformers.modeling_utils import no_init_weights

return AutoModelForCausalLM.from_config(self.config, trust_remote_code=True)
with no_init_weights(True):
return AutoModelForCausalLM.from_config(self.config, trust_remote_code=True)

def apply(self, output_path: Path) -> Path:
target = self.init()
Expand Down
4 changes: 3 additions & 1 deletion nemo/collections/llm/gpt/model/starcoder.py
Original file line number Diff line number Diff line change
Expand Up @@ -159,8 +159,10 @@ def make_vocab_size_divisible_by(vocab_size):
class HFStarcoderExporter(io.ModelConnector[StarcoderModel, "GPTBigCodeForCausalLM"]):
def init(self) -> "GPTBigCodeForCausalLM":
from transformers import GPTBigCodeForCausalLM
from transformers.modeling_utils import no_init_weights

return GPTBigCodeForCausalLM._from_config(self.config)
with no_init_weights(True):
return GPTBigCodeForCausalLM._from_config(self.config)

def apply(self, output_path: Path) -> Path:
target = self.init()
Expand Down
4 changes: 3 additions & 1 deletion nemo/collections/llm/gpt/model/starcoder2.py
Original file line number Diff line number Diff line change
Expand Up @@ -184,8 +184,10 @@ def make_vocab_size_divisible_by(vocab_size):
class HFStarcoder2Exporter(io.ModelConnector[Starcoder2Model, "Starcoder2ForCausalLM"]):
def init(self) -> "Starcoder2ForCausalLM":
from transformers import Starcoder2ForCausalLM
from transformers.modeling_utils import no_init_weights

return Starcoder2ForCausalLM._from_config(self.config)
with no_init_weights(True):
return Starcoder2ForCausalLM._from_config(self.config)

def apply(self, output_path: Path) -> Path:
target = self.init()
Expand Down

0 comments on commit fdaf607

Please sign in to comment.