Adapt num_moe_experts check slightly

Signed-off-by: Jan Lasek <janek.lasek@gmail.com>
NVIDIA · Dec 13, 2024 · ce002a8 · ce002a8
1 parent fe5dac5
commit ce002a8
Showing 1 changed file with 8 additions and 3 deletions.
diff --git a/nemo/export/tensorrt_llm.py b/nemo/export/tensorrt_llm.py
@@ -370,9 +370,14 @@ def export(
                         model_configs, fp8_quantized, fp8_kvcache
                     )
 
-                    # TODO: Temporary fix
-                    if model_configs.get("num_moe_experts", None) in {None, 0}:
-                        model_configs["num_moe_experts"] = 1
+                    # TODO: Temporary fix to handle `<= 0` check for num_moe_experts in M-LM, see
+                    # https://github.com/NVIDIA/Megatron-LM/blob/99f23d2f111d12b73b1fbf386c60517101ff8abe/megatron/core/transformer/transformer_config.py#L409
+                    # Checking first if num_moe_experts is a part of the model config to avoid inserting it unnecessairly.
+                    breakpoint()
+                    if model_configs.get("num_moe_experts", None) is not None:
+                        if model_configs["num_moe_experts"] <= 0:
+                            LOGGER.warning(f"Overriding num_moe_experts={model_configs['num_moe_experts']} to 1")
+                            model_configs["num_moe_experts"] = 1
 
                     # We build the transformer config using the nemo model config.
                     transformer_config = self.get_transformer_config(model_configs)