Skip to content

Commit 5d56857

Browse files
committed
update
Signed-off-by: zhewenli <[email protected]>
1 parent 4c2cd7a commit 5d56857

File tree

1 file changed

+6
-32
lines changed

1 file changed

+6
-32
lines changed

.buildkite/test-amd.yaml

Lines changed: 6 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -577,47 +577,21 @@ steps:
577577
commands:
578578
- bash scripts/run-benchmarks.sh
579579

580-
- label: AMD LM Eval Large Models
580+
- label: ROCM LM Eval Large Models
581581
mirror_hardwares: [amdproduction]
582582
agent_pool: mi325_8
583583
num_gpus: 8
584584
working_dir: "/vllm-workspace/.buildkite/lm-eval-harness"
585585
commands:
586-
- export VLLM_WORKER_MULTIPROC_METHOD=spawn
587-
- pytest -s -v test_lm_eval_correctness.py --config-list-file=configs/models-large-mi300.txt --tp-size=8
588-
589-
- label: AMD LM Eval Large Models tp4
590-
mirror_hardwares: [amdproduction]
591-
agent_pool: mi325_4
592-
num_gpus: 4
593-
working_dir: "/vllm-workspace/.buildkite/lm-eval-harness"
594-
commands:
595-
# GPU diagnostics
586+
# GPU diagnostics
596587
- echo '=== GPU Diagnostics ==='
597588
- rocm-smi --showid
598589
- printenv HIP_VISIBLE_DEVICES
599590
- python3 -c 'import torch; print(torch.cuda.device_count())'
600591
- ls -la /dev/dri/
601592
- echo '=== End GPU Diagnostics ==='
602-
# Actual test
603593
- export VLLM_WORKER_MULTIPROC_METHOD=spawn
604-
- pytest -s -v test_lm_eval_correctness.py --config-list-file=configs/models-large-mi300.txt --tp-size=4
605-
606-
- label: AMD LM Eval Large Models tp4(2)
607-
mirror_hardwares: [amdproduction]
608-
agent_pool: mi325_4
609-
num_gpus: 4
610-
working_dir: "/vllm-workspace/.buildkite/lm-eval-harness"
611-
commands:
612-
# GPU diagnostics
613-
- echo '=== GPU Diagnostics ==='
614-
- rocm-smi --showid
615-
- printenv HIP_VISIBLE_DEVICES
616-
- ls -la /dev/dri/
617-
- echo '=== End GPU Diagnostics ==='
618-
# Actual test
619-
- export VLLM_WORKER_MULTIPROC_METHOD=spawn
620-
- pytest -s -v test_lm_eval_correctness.py --config-list-file=configs/models-large-mi300.txt --tp-size=4
594+
- pytest -s -v test_lm_eval_correctness.py --config-list-file=configs/models-large-mi300.txt --tp-size=8
621595

622596
- label: Benchmarks CLI Test # 7min
623597
timeout_in_minutes: 20
@@ -1231,7 +1205,7 @@ steps:
12311205
- bash weight_loading/run_model_weight_loading_test.sh -c weight_loading/models.txt
12321206

12331207
- label: Weight Loading Multiple GPU Test - Large Models # optional
1234-
mirror_hardwares: [amdexperimental, amdproduction]
1208+
mirror_hardwares: [amdexperimental]
12351209
agent_pool: mi325_2
12361210
# grade: Blocking
12371211
working_dir: "/vllm-workspace/tests"
@@ -1242,7 +1216,6 @@ steps:
12421216
- vllm/
12431217
- tests/weight_loading
12441218
commands:
1245-
- export VLLM_WORKER_MULTIPROC_METHOD=spawn
12461219
- bash weight_loading/run_model_weight_loading_test.sh -c weight_loading/models-large.txt
12471220

12481221

@@ -1274,6 +1247,7 @@ steps:
12741247
commands:
12751248
- export VLLM_WORKER_MULTIPROC_METHOD=spawn
12761249
- pytest -s -v test_lm_eval_correctness.py --config-list-file=configs/models-large.txt --tp-size=4
1250+
12771251
##### H200 test #####
12781252
- label: Distrubted Tests (H200) # optional
12791253
gpu: h200
@@ -1296,7 +1270,7 @@ steps:
12961270

12971271
##### RL Integration Tests #####
12981272
- label: Prime-RL Integration Test # 15min
1299-
mirror_hardwares: [amdexperimental, amdproduction]
1273+
mirror_hardwares: [amdexperimental]
13001274
agent_pool: mi325_2
13011275
# grade: Blocking
13021276
timeout_in_minutes: 30

0 commit comments

Comments
 (0)