@@ -577,47 +577,21 @@ steps:
577577 commands :
578578 - bash scripts/run-benchmarks.sh
579579
580- - label : AMD LM Eval Large Models
580+ - label : ROCM LM Eval Large Models
581581 mirror_hardwares : [amdproduction]
582582 agent_pool : mi325_8
583583 num_gpus : 8
584584 working_dir : " /vllm-workspace/.buildkite/lm-eval-harness"
585585 commands :
586- - export VLLM_WORKER_MULTIPROC_METHOD=spawn
587- - pytest -s -v test_lm_eval_correctness.py --config-list-file=configs/models-large-mi300.txt --tp-size=8
588-
589- - label : AMD LM Eval Large Models tp4
590- mirror_hardwares : [amdproduction]
591- agent_pool : mi325_4
592- num_gpus : 4
593- working_dir : " /vllm-workspace/.buildkite/lm-eval-harness"
594- commands :
595- # GPU diagnostics
586+ # GPU diagnostics
596587 - echo '=== GPU Diagnostics ==='
597588 - rocm-smi --showid
598589 - printenv HIP_VISIBLE_DEVICES
599590 - python3 -c 'import torch; print(torch.cuda.device_count())'
600591 - ls -la /dev/dri/
601592 - echo '=== End GPU Diagnostics ==='
602- # Actual test
603593 - export VLLM_WORKER_MULTIPROC_METHOD=spawn
604- - pytest -s -v test_lm_eval_correctness.py --config-list-file=configs/models-large-mi300.txt --tp-size=4
605-
606- - label : AMD LM Eval Large Models tp4(2)
607- mirror_hardwares : [amdproduction]
608- agent_pool : mi325_4
609- num_gpus : 4
610- working_dir : " /vllm-workspace/.buildkite/lm-eval-harness"
611- commands :
612- # GPU diagnostics
613- - echo '=== GPU Diagnostics ==='
614- - rocm-smi --showid
615- - printenv HIP_VISIBLE_DEVICES
616- - ls -la /dev/dri/
617- - echo '=== End GPU Diagnostics ==='
618- # Actual test
619- - export VLLM_WORKER_MULTIPROC_METHOD=spawn
620- - pytest -s -v test_lm_eval_correctness.py --config-list-file=configs/models-large-mi300.txt --tp-size=4
594+ - pytest -s -v test_lm_eval_correctness.py --config-list-file=configs/models-large-mi300.txt --tp-size=8
621595
622596- label : Benchmarks CLI Test # 7min
623597 timeout_in_minutes : 20
@@ -1231,7 +1205,7 @@ steps:
12311205 - bash weight_loading/run_model_weight_loading_test.sh -c weight_loading/models.txt
12321206
12331207- label : Weight Loading Multiple GPU Test - Large Models # optional
1234- mirror_hardwares : [amdexperimental, amdproduction ]
1208+ mirror_hardwares : [amdexperimental]
12351209 agent_pool : mi325_2
12361210 # grade: Blocking
12371211 working_dir : " /vllm-workspace/tests"
@@ -1242,7 +1216,6 @@ steps:
12421216 - vllm/
12431217 - tests/weight_loading
12441218 commands :
1245- - export VLLM_WORKER_MULTIPROC_METHOD=spawn
12461219 - bash weight_loading/run_model_weight_loading_test.sh -c weight_loading/models-large.txt
12471220
12481221
@@ -1274,6 +1247,7 @@ steps:
12741247 commands :
12751248 - export VLLM_WORKER_MULTIPROC_METHOD=spawn
12761249 - pytest -s -v test_lm_eval_correctness.py --config-list-file=configs/models-large.txt --tp-size=4
1250+
12771251# #### H200 test #####
12781252- label : Distrubted Tests (H200) # optional
12791253 gpu : h200
@@ -1296,7 +1270,7 @@ steps:
12961270
12971271# #### RL Integration Tests #####
12981272- label : Prime-RL Integration Test # 15min
1299- mirror_hardwares : [amdexperimental, amdproduction ]
1273+ mirror_hardwares : [amdexperimental]
13001274 agent_pool : mi325_2
13011275 # grade: Blocking
13021276 timeout_in_minutes : 30
0 commit comments