3939 # if this test fails, it means the nightly torch version is not compatible with some
4040 # of the dependencies. Please check the error message and add the package to whitelist
4141 # in /vllm/tools/pre_commit/generate_nightly_torch_test.py
42- mirror_hardwares : [amdexperimental]
42+ mirror_hardwares : [amdexperimental, amdproduction, amdtentative ]
4343 agent_pool : mi325_1
44- # grade: Blocking
44+ grade : Blocking
4545 soft_fail : true
4646 source_file_dependencies :
4747 - requirements/nightly_torch_test.txt
5050
5151- label : Async Engine, Inputs, Utils, Worker Test # 10min
5252 timeout_in_minutes : 15
53- mirror_hardwares : [amdexperimental, amdproduction]
53+ mirror_hardwares : [amdexperimental, amdproduction, amdtentative ]
5454 agent_pool : mi325_1
55- # grade: Blocking
55+ grade : Blocking
5656 source_file_dependencies :
5757 - vllm/
5858 - tests/multimodal
6363
6464- label : Async Engine, Inputs, Utils, Worker, Config Test (CPU) # 15min
6565 timeout_in_minutes : 20
66- mirror_hardwares : [amdexperimental, amdproduction]
66+ mirror_hardwares : [amdexperimental, amdproduction, amdtentative ]
6767 agent_pool : mi325_1
68- # grade: Blocking
68+ grade : Blocking
6969 source_file_dependencies :
7070 - vllm/
7171 - tests/test_inputs.py
@@ -115,9 +115,9 @@ steps:
115115 - pytest -v -s basic_correctness/test_cpu_offload.py
116116
117117- label : Entrypoints Unit Tests # 5min
118- mirror_hardwares : [amdexperimental, amdproduction]
118+ mirror_hardwares : [amdexperimental, amdproduction, amdtentative ]
119119 agent_pool : mi325_1
120- # grade: Blocking
120+ grade : Blocking
121121 timeout_in_minutes : 10
122122 working_dir : " /vllm-workspace/tests"
123123 fast_check : true
@@ -214,6 +214,7 @@ steps:
214214 # test with internal dp
215215 - python3 ../examples/offline_inference/data_parallel.py --enforce-eager
216216 - TP_SIZE=2 DP_SIZE=2 pytest -v -s v1/distributed/test_async_llm_dp.py
217+ - TP_SIZE=2 DP_SIZE=2 pytest -v -s v1/distributed/test_eagle_dp.py
217218 - TP_SIZE=2 DP_SIZE=2 pytest -v -s v1/distributed/test_external_lb_dp.py
218219 - TP_SIZE=1 DP_SIZE=4 pytest -v -s v1/distributed/test_internal_lb_dp.py
219220 - TP_SIZE=1 DP_SIZE=4 pytest -v -s v1/distributed/test_hybrid_lb_dp.py
@@ -252,9 +253,9 @@ steps:
252253 - torchrun --nproc-per-node=8 ../examples/offline_inference/torchrun_dp_example.py --tp-size=2 --pp-size=1 --dp-size=4 --enable-ep
253254
254255- label : EPLB Algorithm Test # 5min
255- mirror_hardwares : [amdexperimental, amdproduction]
256+ mirror_hardwares : [amdexperimental, amdproduction, amdtentative ]
256257 agent_pool : mi325_1
257- # grade: Blocking
258+ grade : Blocking
258259 timeout_in_minutes : 15
259260 working_dir : " /vllm-workspace/tests"
260261 source_file_dependencies :
@@ -341,9 +342,9 @@ steps:
341342
342343- label : V1 Test entrypoints # 35min
343344 timeout_in_minutes : 50
344- mirror_hardwares : [amdexperimental, amdproduction]
345+ mirror_hardwares : [amdexperimental, amdproduction, amdtentative ]
345346 agent_pool : mi325_1
346- # grade: Blocking
347+ grade : Blocking
347348 source_file_dependencies :
348349 - vllm/
349350 - tests/v1
@@ -391,6 +392,20 @@ steps:
391392 commands :
392393 - pytest -v -s v1/attention
393394
395+ - label : Batch Invariance Tests (H100) # 10min
396+ mirror_hardwares : [amdexperimental]
397+ agent_pool : mi325_1
398+ timeout_in_minutes : 25
399+ gpu : h100
400+ source_file_dependencies :
401+ - vllm/
402+ - tests/v1/determinism/
403+ commands :
404+ - export VLLM_WORKER_MULTIPROC_METHOD=spawn
405+ - pip install pytest-timeout pytest-forked
406+ - pytest -v -s v1/determinism/test_batch_invariance.py
407+ - pytest -v -s v1/determinism/test_rms_norm_batch_invariant.py
408+
394409- label : V1 Test attention (B200) # 10min
395410 timeout_in_minutes : 30
396411 gpu : b200
@@ -401,9 +416,9 @@ steps:
401416 - VLLM_DISABLE_FLASHINFER_PREFILL=1 pytest -v -s v1/attention # TODO: FI prefill is bugged and causes incorrectness, fix this
402417
403418- label : V1 Test others (CPU) # 5 mins
404- mirror_hardwares : [amdexperimental, amdproduction]
419+ mirror_hardwares : [amdexperimental, amdproduction, amdtentative ]
405420 agent_pool : mi325_1
406- # grade: Blocking
421+ grade : Blocking
407422 source_file_dependencies :
408423 - vllm/
409424 - tests/v1
@@ -495,7 +510,7 @@ steps:
495510
496511- label : PyTorch Compilation Unit Tests # 15min
497512 timeout_in_minutes : 30
498- mirror_hardwares : [amdexperimental]
513+ mirror_hardwares : [amdexperimental, amdproduction ]
499514 agent_pool : mi325_1
500515 # grade: Blocking
501516 torch_nightly : true
@@ -512,7 +527,7 @@ steps:
512527
513528- label : PyTorch Fullgraph Smoke Test # 15min
514529 timeout_in_minutes : 30
515- mirror_hardwares : [amdexperimental]
530+ mirror_hardwares : [amdexperimental, amdproduction ]
516531 agent_pool : mi325_1
517532 # grade: Blocking
518533 torch_nightly : true
@@ -568,7 +583,7 @@ steps:
568583
569584- label : Kernels Attention Test %N # 23min
570585 timeout_in_minutes : 35
571- mirror_hardwares : [amdexperimental]
586+ mirror_hardwares : [amdexperimental, amdproduction ]
572587 agent_pool : mi325_8
573588 # grade: Blocking
574589 source_file_dependencies :
@@ -595,7 +610,7 @@ steps:
595610
596611- label : Kernels MoE Test %N # 40min
597612 timeout_in_minutes : 60
598- mirror_hardwares : [amdexperimental]
613+ mirror_hardwares : [amdexperimental, amdproduction ]
599614 agent_pool : mi325_8
600615 # grade: Blocking
601616 source_file_dependencies :
@@ -622,6 +637,26 @@ steps:
622637 commands :
623638 - pytest -v -s kernels/mamba
624639
640+ - label : Kernels DeepGEMM Test (H100) # Nvidia-centric
641+ # Not replicating for CUTLAS & CuTe
642+ timeout_in_minutes : 45
643+ gpu : h100
644+ num_gpus : 1
645+ source_file_dependencies :
646+ - tools/install_deepgemm.sh
647+ - vllm/utils/deep_gemm.py
648+ - vllm/model_executor/layers/fused_moe
649+ - vllm/model_executor/layers/quantization
650+ - tests/kernels/quantization/test_block_fp8.py
651+ - tests/kernels/moe/test_deepgemm.py
652+ - tests/kernels/moe/test_batched_deepgemm.py
653+ - tests/kernels/attention/test_deepgemm_attention.py
654+ commands :
655+ - pytest -v -s kernels/quantization/test_block_fp8.py -k deep_gemm
656+ - pytest -v -s kernels/moe/test_deepgemm.py
657+ - pytest -v -s kernels/moe/test_batched_deepgemm.py
658+ - pytest -v -s kernels/attention/test_deepgemm_attention.py
659+
625660- label : Model Executor Test # 23min
626661 timeout_in_minutes : 35
627662 torch_nightly : true
@@ -1055,6 +1090,7 @@ steps:
10551090 - pytest -v -s tests/kernels/moe/test_nvfp4_moe.py
10561091 - pytest -v -s tests/kernels/moe/test_ocp_mx_moe.py
10571092 - pytest -v -s tests/kernels/moe/test_flashinfer.py
1093+ - pytest -v -s tests/kernels/moe/test_cutedsl_moe.py
10581094
10591095- label : Blackwell Fusion and Compile Tests # 30 min
10601096 timeout_in_minutes : 40
@@ -1064,11 +1100,19 @@ steps:
10641100 - csrc/quantization/fp4/
10651101 - vllm/model_executor/layers/quantization/utils/flashinfer_utils.py
10661102 - vllm/v1/attention/backends/flashinfer.py
1103+ - vllm/v1/worker/
1104+ - vllm/v1/cudagraph_dispatcher.py
10671105 - vllm/compilation/
10681106 # can affect pattern matching
10691107 - vllm/model_executor/layers/layernorm.py
10701108 - vllm/model_executor/layers/activation.py
10711109 - vllm/model_executor/layers/quantization/input_quant_fp8.py
1110+ - vllm/model_executor/layers/fused_moe/layer.py
1111+ - tests/compile/test_fusion_attn.py
1112+ - tests/compile/test_silu_mul_quant_fusion.py
1113+ - tests/compile/distributed/test_fusion_all_reduce.py
1114+ - tests/compile/distributed/test_fusions_e2e.py
1115+ - tests/compile/fullgraph/test_full_graph.py
10721116 commands :
10731117 - nvidia-smi
10741118 - pytest -v -s tests/compile/test_fusion_attn.py
@@ -1079,7 +1123,7 @@ steps:
10791123 # Wrap with quotes to escape yaml
10801124 - " pytest -v -s tests/compile/distributed/test_fusions_e2e.py::test_tp2_attn_quant_allreduce_rmsnorm -k 'True and not +quant_fp8 and not +rms_norm'"
10811125 # test_fp8_kv_scale_compile requires FlashAttention (not supported on default L4/L40)
1082- - pytest -v -s tests/compile/distributed /test_full_graph.py::test_fp8_kv_scale_compile
1126+ - pytest -v -s tests/compile/fullgraph /test_full_graph.py::test_fp8_kv_scale_compile
10831127
10841128- label : Blackwell Fusion E2E Tests # 30 min
10851129 timeout_in_minutes : 40
@@ -1101,7 +1145,7 @@ steps:
11011145 commands :
11021146 - nvidia-smi
11031147 # Run all e2e fusion tests
1104- - pytest -v -s tests/compile/test_fusions_e2e.py
1148+ - pytest -v -s tests/compile/distributed/ test_fusions_e2e.py
11051149
11061150- label : ROCm GPT-OSS Eval
11071151 timeout_in_minutes : 60
@@ -1216,6 +1260,7 @@ steps:
12161260 - tests/v1/worker/test_worker_memory_snapshot.py
12171261 commands :
12181262 - TP_SIZE=1 DP_SIZE=2 pytest -v -s v1/distributed/test_async_llm_dp.py
1263+ - TP_SIZE=1 DP_SIZE=2 pytest -v -s v1/distributed/test_eagle_dp.py
12191264 - TP_SIZE=1 DP_SIZE=2 pytest -v -s v1/distributed/test_external_lb_dp.py
12201265 - DP_SIZE=2 pytest -v -s v1/entrypoints/openai/test_multi_api_servers.py
12211266 - pytest -v -s entrypoints/llm/test_collective_rpc.py
@@ -1251,7 +1296,7 @@ steps:
12511296
12521297- label : Plugin Tests (2 GPUs) # 40min
12531298 timeout_in_minutes : 60
1254- mirror_hardwares : [amdexperimental]
1299+ mirror_hardwares : [amdexperimental, amdproduction ]
12551300 agent_pool : mi325_2
12561301 # grade: Blocking
12571302 working_dir : " /vllm-workspace/tests"
@@ -1327,7 +1372,7 @@ steps:
13271372
13281373- label : Weight Loading Multiple GPU Test # 33min
13291374 timeout_in_minutes : 45
1330- mirror_hardwares : [amdexperimental]
1375+ mirror_hardwares : [amdexperimental, amdproduction ]
13311376 agent_pool : mi325_2
13321377 # grade: Blocking
13331378 working_dir : " /vllm-workspace/tests"
@@ -1432,7 +1477,7 @@ steps:
14321477 - pytest -v -s tests/compile/distributed/test_fusion_all_reduce.py
14331478 # - pytest -v -s tests/compile/distributed/test_fusions_e2e.py::test_tp2_attn_quant_allreduce_rmsnorm
14341479 - " pytest -v -s tests/compile/distributed/test_fusions_e2e.py -k 'not Llama-4'"
1435- - pytest -v -s tests/compile/ distributed/test_sequence_parallel.py
1480+ - pytest -v -s tests/distributed/test_sequence_parallel.py
14361481 - pytest -v -s tests/distributed/test_context_parallel.py
14371482 - CUDA_VISIBLE_DEVICES=1,2 VLLM_ALL2ALL_BACKEND=deepep_high_throughput VLLM_USE_DEEP_GEMM=1 VLLM_LOGGING_LEVEL=DEBUG python3 examples/offline_inference/data_parallel.py --model Qwen/Qwen1.5-MoE-A2.7B --tp-size=1 --dp-size=2 --max-model-len 2048
14381483 - pytest -v -s tests/v1/distributed/test_dbo.py
@@ -1464,7 +1509,7 @@ steps:
14641509 - bash .buildkite/scripts/run-prime-rl-test.sh
14651510
14661511- label : DeepSeek V2-Lite Accuracy
1467- mirror_hardwares : [amdexperimental]
1512+ mirror_hardwares : [amdexperimental, amdproduction ]
14681513 agent_pool : mi325_4
14691514 # grade: Blocking
14701515 timeout_in_minutes : 60
@@ -1475,8 +1520,8 @@ steps:
14751520 commands :
14761521 - bash .buildkite/scripts/scheduled_integration_test/deepseek_v2_lite_ep_eplb.sh 0.25 200 8010
14771522
1478- - label : Qwen3-30B-A3B-FP8-block Accuracy
1479- mirror_hardwares : [amdexperimental]
1523+ - label : Qwen3-30B-A3B-FP8-block Accuracy (H100)
1524+ mirror_hardwares : [amdexperimental, amdproduction ]
14801525 agent_pool : mi325_4
14811526 # grade: Blocking
14821527 timeout_in_minutes : 60
@@ -1486,3 +1531,12 @@ steps:
14861531 working_dir : " /vllm-workspace"
14871532 commands :
14881533 - bash .buildkite/scripts/scheduled_integration_test/qwen30b_a3b_fp8_block_ep_eplb.sh 0.8 200 8020
1534+
1535+ - label : Qwen3-30B-A3B-FP8-block Accuracy (B200)
1536+ timeout_in_minutes : 60
1537+ gpu : b200
1538+ optional : true
1539+ num_gpus : 2
1540+ working_dir : " /vllm-workspace"
1541+ commands :
1542+ - bash .buildkite/scripts/scheduled_integration_test/qwen30b_a3b_fp8_block_ep_eplb.sh 0.8 200 8020 2 1
0 commit comments