[Draft] Nemotron in Nemo-UX (NVIDIA#10138)

* add nemotron * add nemotron exporter. make converted model identical * Apply isort and black reformatting Signed-off-by: suiyoubi <suiyoubi@users.noreply.github.com> * add more config * Apply isort and black reformatting Signed-off-by: suiyoubi <suiyoubi@users.noreply.github.com> * add config * Apply isort and black reformatting Signed-off-by: suiyoubi <suiyoubi@users.noreply.github.com> * import refactor * Apply isort and black reformatting Signed-off-by: suiyoubi <suiyoubi@users.noreply.github.com> * refactor config * add 22B config --------- Signed-off-by: suiyoubi <suiyoubi@users.noreply.github.com> Co-authored-by: suiyoubi <suiyoubi@users.noreply.github.com> Signed-off-by: adityavavre <aditya.vavre@gmail.com>
adityavavre · Sep 15, 2024 · 2736785 · 2736785
1 parent 05c8375
commit 2736785
Show file tree

Hide file tree

Showing 4 changed files with 372 additions and 0 deletions.
diff --git a/nemo/collections/llm/__init__.py b/nemo/collections/llm/__init__.py
@@ -48,6 +48,13 @@
     MixtralConfig8x7B,
     MixtralConfig8x22B,
     MixtralModel,
+    Nemotron3Config4B,
+    Nemotron3Config8B,
+    Nemotron4Config15B,
+    Nemotron4Config22B,
+    Nemotron4Config340B,
+    NemotronConfig,
+    NemotronModel,
     gpt_data_step,
     gpt_forward_step,
 )
@@ -73,6 +80,13 @@
     "MixtralConfig8x7B",
     "MixtralConfig8x22B",
     "MixtralModel",
+    "NemotronModel",
+    "Nemotron3Config4B",
+    "Nemotron3Config8B",
+    "Nemotron4Config15B",
+    "Nemotron4Config22B",
+    "Nemotron4Config340B",
+    "NemotronConfig",
     "LlamaConfig",
     "Llama2Config7B",
     "Llama2Config13B",

diff --git a/nemo/collections/llm/fn/activation.py b/nemo/collections/llm/fn/activation.py
@@ -9,3 +9,9 @@ def gelu_impl(x):
 
 def openai_gelu(x):
     return gelu_impl(x)
+
+
+@torch.jit.script
+def squared_relu(x):
+    """Squared ReLU activation function."""
+    return torch.pow(torch.nn.functional.relu(x), 2)
diff --git a/nemo/collections/llm/gpt/model/__init__.py b/nemo/collections/llm/gpt/model/__init__.py
@@ -37,6 +37,15 @@
     MixtralConfig8x22B,
     MixtralModel,
 )
+from nemo.collections.llm.gpt.model.nemotron import (
+    Nemotron3Config4B,
+    Nemotron3Config8B,
+    Nemotron4Config15B,
+    Nemotron4Config22B,
+    Nemotron4Config340B,
+    NemotronConfig,
+    NemotronModel,
+)
 
 __all__ = [
     "GPTConfig",
@@ -53,6 +62,13 @@
     "Llama2Config70B",
     "Llama3Config8B",
     "Llama3Config70B",
+    "NemotronConfig",
+    "Nemotron3Config4B",
+    "Nemotron3Config8B",
+    "Nemotron4Config15B",
+    "Nemotron4Config22B",
+    "Nemotron4Config340B",
+    "NemotronModel",
     "CodeLlamaConfig7B",
     "CodeLlamaConfig13B",
     "CodeLlamaConfig34B",