Skip to content

Commit 0b65ac6

Browse files
remove useless patch (#4699)
patach_config is useless now. Let's remove it - vLLM version: v0.12.0 - vLLM main: vllm-project/vllm@ad32e3e Signed-off-by: wangxiyuan <[email protected]> Co-authored-by: Mengqing Cao <[email protected]>
1 parent 866347a commit 0b65ac6

21 files changed

+30
-277
lines changed

examples/external_online_dp/run_dp_template.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,4 +29,4 @@ vllm serve model_path \
2929
--trust-remote-code \
3030
--gpu-memory-utilization 0.9 \
3131
--quantization ascend \
32-
--speculative-config '{"num_speculative_tokens": 1, "method":"deepseek_mtp"}' \
32+
--speculative-config '{"num_speculative_tokens": 1, "method":"mtp"}'

tests/e2e/nightly/features/test_mtpx_deepseek_r1_0528_w8a8.py

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -74,10 +74,7 @@ async def test_models(model: str, mode: str) -> None:
7474
"VLLM_EXECUTE_MODEL_TIMEOUT_SECONDS": "3600000"
7575
}
7676
additional_config: dict[str, Any] = {}
77-
speculative_config = {
78-
"num_speculative_tokens": 2,
79-
"method": "deepseek_mtp"
80-
}
77+
speculative_config = {"num_speculative_tokens": 2, "method": "mtp"}
8178
compilation_config = {
8279
"cudagraph_capture_sizes": [56],
8380
"cudagraph_mode": "FULL_DECODE_ONLY"

tests/e2e/nightly/features/test_prefix_cache_deepseek_r1_0528_w8a8.py

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -84,10 +84,7 @@ async def test_models(model: str) -> None:
8484
"chunked_prefill_for_mla": True,
8585
"enable_weight_nz_layout": True
8686
}
87-
speculative_config = {
88-
"num_speculative_tokens": 1,
89-
"method": "deepseek_mtp"
90-
}
87+
speculative_config = {"num_speculative_tokens": 1, "method": "mtp"}
9188
server_args = [
9289
"--quantization", "ascend", "--data-parallel-size", "2",
9390
"--tensor-parallel-size", "8", "--enable-expert-parallel", "--port",

tests/e2e/nightly/models/test_deepseek_r1_0528_w8a8.py

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -76,10 +76,7 @@ async def test_models(model: str, mode: str) -> None:
7676
"HCCL_BUFFSIZE": "1024",
7777
"PYTORCH_NPU_ALLOC_CONF": "expandable_segments:True"
7878
}
79-
speculative_config = {
80-
"num_speculative_tokens": 1,
81-
"method": "deepseek_mtp"
82-
}
79+
speculative_config = {"num_speculative_tokens": 1, "method": "mtp"}
8380
additional_config = {
8481
"torchair_graph_config": {
8582
"enabled": True,

tests/e2e/nightly/models/test_deepseek_r1_w8a8_eplb.py

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -62,10 +62,7 @@ async def test_models(model: str) -> None:
6262
"DISABLE_L2_CACHE": "1",
6363
"DYNAMIC_EPLB": "true",
6464
}
65-
speculative_config = {
66-
"num_speculative_tokens": 1,
67-
"method": "deepseek_mtp"
68-
}
65+
speculative_config = {"num_speculative_tokens": 1, "method": "mtp"}
6966
compilation_config = {
7067
"cudagraph_capture_sizes": [24],
7168
"cudagraph_mode": "FULL_DECODE_ONLY"

tests/e2e/nightly/multi_node/config/models/DeepSeek-R1-W8A8-A2-torchair.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ deployment:
2929
--trust-remote-code
3030
--quantization ascend
3131
--gpu-memory-utilization 0.9
32-
--speculative-config '{"num_speculative_tokens": 1, "method":"deepseek_mtp"}'
32+
--speculative-config '{"num_speculative_tokens": 1, "method":"mtp"}'
3333
--additional-config '{"torchair_graph_config":{"enabled":true,"enable_multistream_moe":true},"chunked_prefill_for_mla":true,"enable_weight_nz_layout":true}'
3434
3535
-
@@ -50,7 +50,7 @@ deployment:
5050
--trust-remote-code
5151
--quantization ascend
5252
--gpu-memory-utilization 0.9
53-
--speculative-config '{"num_speculative_tokens": 1, "method":"deepseek_mtp"}'
53+
--speculative-config '{"num_speculative_tokens": 1, "method":"mtp"}'
5454
--additional-config '{"torchair_graph_config":{"enabled":true,"enable_multistream_moe":true},"chunked_prefill_for_mla":true,"enable_weight_nz_layout":true}'
5555
benchmarks:
5656
acc:

tests/e2e/nightly/multi_node/config/models/DeepSeek-R1-W8A8-A2.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ deployment:
3030
--quantization ascend
3131
--gpu-memory-utilization 0.9
3232
--enforce-eager
33-
--speculative-config '{"num_speculative_tokens": 1, "method":"deepseek_mtp"}'
33+
--speculative-config '{"num_speculative_tokens": 1, "method":"mtp"}'
3434
--additional-config '{"chunked_prefill_for_mla":true,"enable_weight_nz_layout":true}'
3535
3636
-
@@ -52,6 +52,6 @@ deployment:
5252
--quantization ascend
5353
--gpu-memory-utilization 0.9
5454
--enforce-eager
55-
--speculative-config '{"num_speculative_tokens": 1, "method":"deepseek_mtp"}'
55+
--speculative-config '{"num_speculative_tokens": 1, "method":"mtp"}'
5656
--additional-config '{"chunked_prefill_for_mla":true,"enable_weight_nz_layout":true}'
5757
benchmarks:

tests/e2e/nightly/multi_node/config/models/DeepSeek-R1-W8A8-EPLB.yaml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ deployment:
3939
--max-num-batched-tokens 16384
4040
--trust-remote-code
4141
--gpu-memory-utilization 0.9
42-
--speculative-config '{"num_speculative_tokens": 1, "method":"deepseek_mtp"}'
42+
--speculative-config '{"num_speculative_tokens": 1, "method":"mtp"}'
4343
--kv-transfer-config
4444
'{"kv_connector": "LLMDataDistCMgrConnector",
4545
"kv_buffer_device": "npu",
@@ -69,7 +69,7 @@ deployment:
6969
--max-num-batched-tokens 16384
7070
--trust-remote-code
7171
--gpu-memory-utilization 0.9
72-
--speculative-config '{"num_speculative_tokens": 1, "method":"deepseek_mtp"}'
72+
--speculative-config '{"num_speculative_tokens": 1, "method":"mtp"}'
7373
--kv-transfer-config
7474
'{"kv_connector": "LLMDataDistCMgrConnector",
7575
"kv_buffer_device": "npu",
@@ -100,7 +100,7 @@ deployment:
100100
--max-num-batched-tokens 256
101101
--trust-remote-code
102102
--gpu-memory-utilization 0.9
103-
--speculative-config '{"num_speculative_tokens": 1, "method":"deepseek_mtp"}'
103+
--speculative-config '{"num_speculative_tokens": 1, "method":"mtp"}'
104104
--kv-transfer-config
105105
'{"kv_connector": "LLMDataDistCMgrConnector",
106106
"kv_buffer_device": "npu",
@@ -130,7 +130,7 @@ deployment:
130130
--max-num-batched-tokens 256
131131
--trust-remote-code
132132
--gpu-memory-utilization 0.9
133-
--speculative-config '{"num_speculative_tokens": 1, "method":"deepseek_mtp"}'
133+
--speculative-config '{"num_speculative_tokens": 1, "method":"mtp"}'
134134
--kv-transfer-config
135135
'{"kv_connector": "LLMDataDistCMgrConnector",
136136
"kv_buffer_device": "npu",

tests/e2e/nightly/multi_node/config/models/DeepSeek-R1-W8A8.yaml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ deployment:
3838
--max-num-batched-tokens 16384
3939
--trust-remote-code
4040
--gpu-memory-utilization 0.9
41-
--speculative-config '{"num_speculative_tokens": 1, "method":"deepseek_mtp"}'
41+
--speculative-config '{"num_speculative_tokens": 1, "method":"mtp"}'
4242
--kv-transfer-config
4343
'{"kv_connector": "LLMDataDistCMgrConnector",
4444
"kv_buffer_device": "npu",
@@ -68,7 +68,7 @@ deployment:
6868
--max-num-batched-tokens 16384
6969
--trust-remote-code
7070
--gpu-memory-utilization 0.9
71-
--speculative-config '{"num_speculative_tokens": 1, "method":"deepseek_mtp"}'
71+
--speculative-config '{"num_speculative_tokens": 1, "method":"mtp"}'
7272
--kv-transfer-config
7373
'{"kv_connector": "LLMDataDistCMgrConnector",
7474
"kv_buffer_device": "npu",
@@ -99,7 +99,7 @@ deployment:
9999
--max-num-batched-tokens 256
100100
--trust-remote-code
101101
--gpu-memory-utilization 0.9
102-
--speculative-config '{"num_speculative_tokens": 1, "method":"deepseek_mtp"}'
102+
--speculative-config '{"num_speculative_tokens": 1, "method":"mtp"}'
103103
--kv-transfer-config
104104
'{"kv_connector": "LLMDataDistCMgrConnector",
105105
"kv_buffer_device": "npu",
@@ -129,7 +129,7 @@ deployment:
129129
--max-num-batched-tokens 256
130130
--trust-remote-code
131131
--gpu-memory-utilization 0.9
132-
--speculative-config '{"num_speculative_tokens": 1, "method":"deepseek_mtp"}'
132+
--speculative-config '{"num_speculative_tokens": 1, "method":"mtp"}'
133133
--kv-transfer-config
134134
'{"kv_connector": "LLMDataDistCMgrConnector",
135135
"kv_buffer_device": "npu",

tests/e2e/singlecard/spec_decode_v1/test_v1_mtp_correctness.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@ def mtp_correctness(sampling_config: SamplingParams,
5656
enable_expert_parallel=True,
5757
speculative_config={
5858
"method":
59-
"deepseek_mtp",
59+
"mtp",
6060
"num_speculative_tokens":
6161
num_speculative_tokens,
6262
"disable_padded_drafter_batch":

0 commit comments

Comments
 (0)