Skip to content

Commit

Permalink
add test case; fix imports for tests
Browse files Browse the repository at this point in the history
  • Loading branch information
dsikka committed Aug 30, 2024
1 parent 4da163b commit 21d2337
Show file tree
Hide file tree
Showing 3 changed files with 7 additions and 7 deletions.
1 change: 1 addition & 0 deletions tests/weight_loading/models.txt
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ compressed-tensors, nm-testing/Phi-3-mini-128k-instruct-FP8, main
compressed-tensors, neuralmagic/Phi-3-medium-128k-instruct-quantized.w4a16, main
compressed-tensors, nm-testing/Mixtral-8x7B-Instruct-v0.1-W4A16-quantized, main
compressed-tensors, nm-testing/Mixtral-8x7B-Instruct-v0.1-W4A16-channel-quantized, main
compressed-tensors, nm-testing/Mixtral-8x7B-Instruct-v0.1-W8A16-quantized, main
awq, casperhansen/mixtral-instruct-awq, main
awq_marlin, casperhansen/mixtral-instruct-awq, main
fp8, neuralmagic/Meta-Llama-3-8B-Instruct-FP8-KV, main
Expand Down
8 changes: 4 additions & 4 deletions vllm/model_executor/layers/fused_moe/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
from vllm.model_executor.layers.fused_moe.fused_moe_marlin import (
fused_moe_marlin, single_moe_marlin)
from vllm.model_executor.layers.fused_moe.layer import (
FusedMoE, FusedMoEMethodBase, FusedMoeWeightScaleSupported)
from vllm.triton_utils import HAS_TRITON
Expand All @@ -8,16 +6,18 @@
"FusedMoE",
"FusedMoEMethodBase",
"FusedMoeWeightScaleSupported",
"fused_moe_marlin",
"single_moe_marlin",
]

if HAS_TRITON:
from vllm.model_executor.layers.fused_moe.fused_moe import (
fused_experts, fused_moe, fused_topk, get_config_file_name,
grouped_topk)
from vllm.model_executor.layers.fused_moe.fused_moe_marlin import (
fused_moe_marlin, single_moe_marlin)

__all__ += [
"fused_moe_marlin",
"single_moe_marlin",
"fused_moe",
"fused_topk",
"fused_experts",
Expand Down
5 changes: 2 additions & 3 deletions vllm/model_executor/layers/fused_moe/fused_moe_marlin.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,10 @@
import torch

from vllm import _custom_ops as ops
from vllm.model_executor.layers.fused_moe.fused_moe import (
fused_topk, moe_align_block_size, try_get_optimal_moe_config)
from vllm.scalar_type import scalar_types

from .fused_moe import (fused_topk, moe_align_block_size,
try_get_optimal_moe_config)


def single_moe_marlin(
hidden_states: torch.Tensor,
Expand Down

0 comments on commit 21d2337

Please sign in to comment.