update

zhewenl · zhewenl · commit 5d56857a6e6e · 2025-10-21T17:22:58.000-07:00
Signed-off-by: zhewenli &lt;zhewenli@meta.com&gt;
diff --git a/.buildkite/test-amd.yaml b/.buildkite/test-amd.yaml
@@ -577,47 +577,21 @@ steps:
   commands:
   - bash scripts/run-benchmarks.sh
 
-- label: AMD LM Eval Large Models
+- label: ROCM LM Eval Large Models
   mirror_hardwares: [amdproduction]
   agent_pool: mi325_8
   num_gpus: 8
   working_dir: "/vllm-workspace/.buildkite/lm-eval-harness"
   commands:
-  - export VLLM_WORKER_MULTIPROC_METHOD=spawn
-  - pytest -s -v test_lm_eval_correctness.py --config-list-file=configs/models-large-mi300.txt --tp-size=8
-
-- label: AMD LM Eval Large Models tp4
-  mirror_hardwares: [amdproduction]
-  agent_pool: mi325_4
-  num_gpus: 4
-  working_dir: "/vllm-workspace/.buildkite/lm-eval-harness"
-  commands:
-  # GPU diagnostics
+    # GPU diagnostics
   - echo '=== GPU Diagnostics ==='
   - rocm-smi --showid
   - printenv HIP_VISIBLE_DEVICES
   - python3 -c 'import torch; print(torch.cuda.device_count())'
   - ls -la /dev/dri/
   - echo '=== End GPU Diagnostics ==='
-  # Actual test
   - export VLLM_WORKER_MULTIPROC_METHOD=spawn
-  - pytest -s -v test_lm_eval_correctness.py --config-list-file=configs/models-large-mi300.txt --tp-size=4
-
-- label: AMD LM Eval Large Models tp4(2)
-  mirror_hardwares: [amdproduction]
-  agent_pool: mi325_4
-  num_gpus: 4
-  working_dir: "/vllm-workspace/.buildkite/lm-eval-harness"
-  commands:
-  # GPU diagnostics
-  - echo '=== GPU Diagnostics ==='
-  - rocm-smi --showid
-  - printenv HIP_VISIBLE_DEVICES
-  - ls -la /dev/dri/
-  - echo '=== End GPU Diagnostics ==='
-  # Actual test
-  - export VLLM_WORKER_MULTIPROC_METHOD=spawn
-  - pytest -s -v test_lm_eval_correctness.py --config-list-file=configs/models-large-mi300.txt --tp-size=4
+  - pytest -s -v test_lm_eval_correctness.py --config-list-file=configs/models-large-mi300.txt --tp-size=8
 
 - label: Benchmarks CLI Test # 7min
   timeout_in_minutes: 20
@@ -1231,7 +1205,7 @@ steps:
     - bash weight_loading/run_model_weight_loading_test.sh -c weight_loading/models.txt
 
 - label: Weight Loading Multiple GPU Test - Large Models # optional
-  mirror_hardwares: [amdexperimental, amdproduction]
+  mirror_hardwares: [amdexperimental]
   agent_pool: mi325_2
   # grade: Blocking
   working_dir: "/vllm-workspace/tests"
@@ -1242,7 +1216,6 @@ steps:
   - vllm/
   - tests/weight_loading
   commands:
-    - export VLLM_WORKER_MULTIPROC_METHOD=spawn
     - bash weight_loading/run_model_weight_loading_test.sh -c weight_loading/models-large.txt
 
 
@@ -1274,6 +1247,7 @@ steps:
   commands:
   - export VLLM_WORKER_MULTIPROC_METHOD=spawn
   - pytest -s -v test_lm_eval_correctness.py --config-list-file=configs/models-large.txt --tp-size=4
+
 ##### H200 test #####
 - label: Distrubted Tests (H200) # optional
   gpu: h200
@@ -1296,7 +1270,7 @@ steps:
 
 ##### RL Integration Tests #####
 - label: Prime-RL Integration Test # 15min
-  mirror_hardwares: [amdexperimental, amdproduction]
+  mirror_hardwares: [amdexperimental]
   agent_pool: mi325_2
   # grade: Blocking
   timeout_in_minutes: 30