diff --git a/tests/integration/test_lists/test-db/l0_dgx_b200.yml b/tests/integration/test_lists/test-db/l0_dgx_b200.yml index b48e1b4ee8d..4dc08bf942f 100644 --- a/tests/integration/test_lists/test-db/l0_dgx_b200.yml +++ b/tests/integration/test_lists/test-db/l0_dgx_b200.yml @@ -15,6 +15,7 @@ l0_dgx_b200: backend: pytorch tests: - unittest/_torch/multi_gpu_modeling -k "deepseek" + - unittest/_torch/modules/test_fused_moe.py::test_fused_moe_alltoall_fp4[MNNVL] - unittest/_torch/modules/test_fused_moe.py::test_fused_moe_alltoall_fp4[DeepEPLowLatency] - accuracy/test_llm_api_pytorch.py::TestLlama3_1_8BInstruct::test_bfloat16_4gpus[pp4-attn_backend=TRTLLM-torch_compile=False] - accuracy/test_llm_api_pytorch.py::TestLlama3_1_8BInstruct::test_fp8_4gpus[tp4-fp8kv=True-attn_backend=TRTLLM-torch_compile=False] diff --git a/tests/integration/test_lists/test-db/l0_dgx_h100.yml b/tests/integration/test_lists/test-db/l0_dgx_h100.yml index 38ea6fe8f69..92f4052f897 100644 --- a/tests/integration/test_lists/test-db/l0_dgx_h100.yml +++ b/tests/integration/test_lists/test-db/l0_dgx_h100.yml @@ -78,6 +78,7 @@ l0_dgx_h100: auto_trigger: deepseek tests: - unittest/_torch/multi_gpu_modeling -k "deepseek" + - unittest/_torch/modules/test_fused_moe.py::test_fused_moe_alltoall[MNNVL] - unittest/_torch/modules/test_fused_moe.py::test_fused_moe_alltoall[DeepEP] - unittest/_torch/modules/test_fused_moe.py::test_fused_moe_alltoall[DeepEPLowLatency] - unittest/_torch/modules/test_fused_moe.py::test_fused_moe_w4afp8[MoEWeightLoadingMode.VANILLA-dtype0] diff --git a/tests/unittest/_torch/modules/test_fused_moe.py b/tests/unittest/_torch/modules/test_fused_moe.py index 397314bcab0..9a6720eca73 100644 --- a/tests/unittest/_torch/modules/test_fused_moe.py +++ b/tests/unittest/_torch/modules/test_fused_moe.py @@ -212,11 +212,14 @@ def per_rank_test_fused_moe_alltoall(job_id): weights = {} for expert_id in range(NUM_EXPERTS): w1_weight = torch.empty((INTERMEDIATE_SIZE, HIDDEN_SIZE), - dtype=dtype) + dtype=dtype, + device="cuda") w2_weight = torch.empty((HIDDEN_SIZE, INTERMEDIATE_SIZE), - dtype=dtype) + dtype=dtype, + device="cuda") w3_weight = torch.empty((INTERMEDIATE_SIZE, HIDDEN_SIZE), - dtype=dtype) + dtype=dtype, + device="cuda") torch.nn.init.xavier_uniform_(w1_weight) torch.nn.init.xavier_uniform_(w2_weight) torch.nn.init.xavier_uniform_(w3_weight)