@@ -1068,7 +1068,7 @@ steps:
10681068 # this runner has 2 GPUs available even though num_gpus=2 is not set
10691069 - pytest -v -s tests/compile/test_fusion_all_reduce.py
10701070 # Limit to Inductor partition, no custom ops, and allreduce & attn fusion to reduce running time
1071- # Wrap with quotes to escape yaml
1071+ # Wrap with quotes to escape yaml
10721072 - " pytest -v -s tests/compile/test_fusions_e2e.py::test_tp2_attn_quant_allreduce_rmsnorm -k 'True and Llama-3.1 and -quant_fp8 and -rms_norm'"
10731073
10741074- label : Blackwell Fusion E2E Tests # 30 min
@@ -1095,10 +1095,11 @@ steps:
10951095 # test_fp8_kv_scale_compile requires FlashAttention (not supported on default L4/L40)
10961096 - pytest -v -s tests/compile/test_full_graph.py::test_fp8_kv_scale_compile
10971097
1098- - label : Blackwell GPT-OSS Eval
1098+ - label : ROCm GPT-OSS Eval
10991099 timeout_in_minutes : 60
11001100 working_dir : " /vllm-workspace/"
1101- gpu : b200
1101+ agent_pool : mi325_1
1102+ mirror_hardwares : [amdproduction]
11021103 optional : true # run on nightlies
11031104 source_file_dependencies :
11041105 - tests/evals/gpt_oss
@@ -1107,7 +1108,7 @@ steps:
11071108 - vllm/v1/attention/backends/flashinfer.py
11081109 commands :
11091110 - uv pip install --system 'gpt-oss[eval]==0.0.5'
1110- - pytest -s -v tests/evals/gpt_oss/test_gpqa_correctness.py --model openai/gpt-oss-20b --metric 0.58
1111+ - VLLM_ROCM_USE_AITER_MHA=0 VLLM_ROCM_USE_AITER=1 VLLM_USE_AITER_UNIFIED_ATTENTION=1 pytest -s -v tests/evals/gpt_oss/test_gpqa_correctness.py --model openai/gpt-oss-20b --metric 0.58
11111112
11121113- label : Blackwell Quantized MoE Test
11131114 timeout_in_minutes : 60
0 commit comments