@@ -71,49 +71,7 @@ def test_models_distributed_Qwen3_MOE_TP2_WITH_FULL_DECODE_ONLY():
7171 name_1 = "vllm_fullgraph_outputs" ,
7272 )
7373
74- def test_models_distributed_Qwen3_MOE_TP2_WITH_FULL_AND_PIECEWISE ():
75- if 'HCCL_OP_EXPANSION_MODE' in os .environ :
76- del os .environ ['HCCL_OP_EXPANSION_MODE' ]
77- prompts = [
78- "Hello, my name is" , "The president of the United States is" ,
79- "The capital of France is" , "The future of AI is"
80- ]
81- model = "Qwen/Qwen3-30B-A3B"
82- sampling_params = SamplingParams (max_tokens = 32 , temperature = 0.0 )
83- with VllmRunner (model ,
84- max_model_len = 1024 ,
85- tensor_parallel_size = 2 ,
86- enforce_eager = False ,
87- compilation_config = {"cudagraph_mode" :
88- "FULL_AND_PIECEWISE" }) as runner :
89- vllm_fullgraph_outputs = runner .model .generate (prompts ,
90- sampling_params )
91-
92- with VllmRunner (
93- model ,
94- max_model_len = 1024 ,
95- enforce_eager = True ,
96- ) as runner :
97- vllm_eager_outputs = runner .model .generate (prompts , sampling_params )
98-
99- vllm_fullgraph_outputs_list = []
100- for output in vllm_fullgraph_outputs :
101- vllm_fullgraph_outputs_list .append (
102- (output .outputs [0 ].index , output .outputs [0 ].text ))
103-
104- vllm_eager_outputs_list = []
105- for output in vllm_eager_outputs :
106- vllm_eager_outputs_list .append (
107- (output .outputs [0 ].index , output .outputs [0 ].text ))
108-
109- check_outputs_equal (
110- outputs_0_lst = vllm_eager_outputs_list ,
111- outputs_1_lst = vllm_fullgraph_outputs_list ,
112- name_0 = "vllm_eager_outputs" ,
113- name_1 = "vllm_fullgraph_outputs" ,
114- )
115-
116- def test_models_distributed_Qwen3_MOE_TP2_WITH_FULL_AND_PIECEWISE ():
74+ def test_models_distributed_Qwen3_MOE_TP2_WITH_FULL ():
11775 if 'HCCL_OP_EXPANSION_MODE' in os .environ :
11876 del os .environ ['HCCL_OP_EXPANSION_MODE' ]
11977 prompts = [
0 commit comments