Skip to content

Commit

Permalink
Adapt num_moe_experts check slightly
Browse files Browse the repository at this point in the history
Signed-off-by: Jan Lasek <janek.lasek@gmail.com>
  • Loading branch information
janekl committed Dec 13, 2024
1 parent fe5dac5 commit ce002a8
Showing 1 changed file with 8 additions and 3 deletions.
11 changes: 8 additions & 3 deletions nemo/export/tensorrt_llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -370,9 +370,14 @@ def export(
model_configs, fp8_quantized, fp8_kvcache
)

# TODO: Temporary fix
if model_configs.get("num_moe_experts", None) in {None, 0}:
model_configs["num_moe_experts"] = 1
# TODO: Temporary fix to handle `<= 0` check for num_moe_experts in M-LM, see
# https://github.com/NVIDIA/Megatron-LM/blob/99f23d2f111d12b73b1fbf386c60517101ff8abe/megatron/core/transformer/transformer_config.py#L409
# Checking first if num_moe_experts is a part of the model config to avoid inserting it unnecessairly.
breakpoint()
if model_configs.get("num_moe_experts", None) is not None:
if model_configs["num_moe_experts"] <= 0:
LOGGER.warning(f"Overriding num_moe_experts={model_configs['num_moe_experts']} to 1")
model_configs["num_moe_experts"] = 1

# We build the transformer config using the nemo model config.
transformer_config = self.get_transformer_config(model_configs)
Expand Down

0 comments on commit ce002a8

Please sign in to comment.