|
29 | 29 | from tests.e2e.model_utils import check_outputs_equal |
30 | 30 |
|
31 | 31 |
|
32 | | -def test_models_distributed_Qwen3_MOE_TP2_WITH_FULLGRAPH(): |
| 32 | +def test_models_distributed_Qwen3_MOE_TP2_WITH_FULL_DECODE_ONLY(): |
33 | 33 | if 'HCCL_OP_EXPANSION_MODE' in os.environ: |
34 | 34 | del os.environ['HCCL_OP_EXPANSION_MODE'] |
35 | 35 | prompts = [ |
@@ -70,3 +70,45 @@ def test_models_distributed_Qwen3_MOE_TP2_WITH_FULLGRAPH(): |
70 | 70 | name_0="vllm_eager_outputs", |
71 | 71 | name_1="vllm_fullgraph_outputs", |
72 | 72 | ) |
| 73 | + |
| 74 | +def test_models_distributed_Qwen3_MOE_TP2_WITH_FULL(): |
| 75 | + if 'HCCL_OP_EXPANSION_MODE' in os.environ: |
| 76 | + del os.environ['HCCL_OP_EXPANSION_MODE'] |
| 77 | + prompts = [ |
| 78 | + "Hello, my name is", "The president of the United States is", |
| 79 | + "The capital of France is", "The future of AI is" |
| 80 | + ] |
| 81 | + model = "Qwen/Qwen3-30B-A3B" |
| 82 | + sampling_params = SamplingParams(max_tokens=32, temperature=0.0) |
| 83 | + with VllmRunner(model, |
| 84 | + max_model_len=1024, |
| 85 | + tensor_parallel_size=2, |
| 86 | + enforce_eager=False, |
| 87 | + compilation_config={"cudagraph_mode": |
| 88 | + "FULL"}) as runner: |
| 89 | + vllm_fullgraph_outputs = runner.model.generate(prompts, |
| 90 | + sampling_params) |
| 91 | + |
| 92 | + with VllmRunner( |
| 93 | + model, |
| 94 | + max_model_len=1024, |
| 95 | + enforce_eager=True, |
| 96 | + ) as runner: |
| 97 | + vllm_eager_outputs = runner.model.generate(prompts, sampling_params) |
| 98 | + |
| 99 | + vllm_fullgraph_outputs_list = [] |
| 100 | + for output in vllm_fullgraph_outputs: |
| 101 | + vllm_fullgraph_outputs_list.append( |
| 102 | + (output.outputs[0].index, output.outputs[0].text)) |
| 103 | + |
| 104 | + vllm_eager_outputs_list = [] |
| 105 | + for output in vllm_eager_outputs: |
| 106 | + vllm_eager_outputs_list.append( |
| 107 | + (output.outputs[0].index, output.outputs[0].text)) |
| 108 | + |
| 109 | + check_outputs_equal( |
| 110 | + outputs_0_lst=vllm_eager_outputs_list, |
| 111 | + outputs_1_lst=vllm_fullgraph_outputs_list, |
| 112 | + name_0="vllm_eager_outputs", |
| 113 | + name_1="vllm_fullgraph_outputs", |
| 114 | + ) |
0 commit comments