Skip to content

Commit 9fbcfa3

Browse files
fluctluxwangxiyuan
andauthored
[CI] Fix ngram & suffix test oom (#4755)
### What this PR does / why we need it? Avoid oom during CI by using `with VllmRunner` instead of `LLM()`, and enable `test_ngram_correctness` ### How was this patch tested? CI passed. - vLLM version: v0.12.0 - vLLM main: vllm-project/vllm@ad32e3e --------- Signed-off-by: fluctlux <[email protected]> Co-authored-by: wangxiyuan <[email protected]>
1 parent 916a9a1 commit 9fbcfa3

File tree

2 files changed

+13
-9
lines changed

2 files changed

+13
-9
lines changed

.github/workflows/_e2e_test.yaml

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -108,8 +108,7 @@ jobs:
108108
# ------------------------------------ v1 spec decode test ------------------------------------ #
109109
pytest -sv tests/e2e/singlecard/spec_decode_v1/test_v1_mtp_correctness.py
110110
pytest -sv tests/e2e/singlecard/spec_decode_v1/test_v1_mtp_torchair_correctness.py
111-
# Fix me: test_eagle_correctness OOM error
112-
#pytest -sv tests/e2e/singlecard/spec_decode_v1/test_v1_spec_decode.py
111+
pytest -sv tests/e2e/singlecard/spec_decode_v1/test_v1_spec_decode.py
113112
114113
e2e-2-cards:
115114
name: multicard-2

tests/e2e/singlecard/spec_decode_v1/test_v1_spec_decode.py

Lines changed: 12 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
# SPDX-License-Identifier: Apache-2.0
22
from __future__ import annotations
33

4+
import os
45
import random
56
from typing import Any
67

@@ -9,6 +10,8 @@
910

1011
from tests.e2e.conftest import VllmRunner
1112

13+
os.environ["VLLM_WORKER_MULTIPROC_METHOD"] = "spawn"
14+
1215

1316
@pytest.fixture
1417
def test_prompts():
@@ -61,7 +64,6 @@ def eagle3_model_name():
6164
return "vllm-ascend/EAGLE3-LLaMA3.1-Instruct-8B"
6265

6366

64-
@pytest.mark.skip("TODO: Revert me after ngram oom issue on ci is fixed")
6567
def test_ngram_correctness(
6668
test_prompts: list[list[dict[str, Any]]],
6769
sampling_config: SamplingParams,
@@ -71,9 +73,11 @@ def test_ngram_correctness(
7173
Compare the outputs of a original LLM and a speculative LLM
7274
should be the same when using ngram speculative decoding.
7375
'''
74-
ref_llm = LLM(model=model_name, max_model_len=1024, enforce_eager=False)
75-
ref_outputs = ref_llm.chat(test_prompts, sampling_config)
76-
del ref_llm
76+
77+
with VllmRunner(model_name, max_model_len=1024,
78+
enforce_eager=False) as ref_llm:
79+
ref_outputs = ref_llm.model.chat(test_prompts, sampling_config)
80+
7781
with VllmRunner(model_name,
7882
speculative_config={
7983
"method": "ngram",
@@ -156,9 +160,10 @@ def test_suffix_correctness(
156160
Compare the outputs of a original LLM and a speculative LLM
157161
should be the same when using ngram speculative decoding.
158162
'''
159-
ref_llm = LLM(model=model_name, max_model_len=1024, enforce_eager=False)
160-
ref_outputs = ref_llm.chat(test_prompts, sampling_config)
161-
del ref_llm
163+
with VllmRunner(model_name, max_model_len=1024,
164+
enforce_eager=False) as ref_llm:
165+
ref_outputs = ref_llm.model.chat(test_prompts, sampling_config)
166+
162167
with VllmRunner(model_name,
163168
speculative_config={
164169
"method": "suffix",

0 commit comments

Comments
 (0)