Skip to content

Commit 595fe99

Browse files
committed
fix init_with_cudagraph_sizes
Signed-off-by: Icey <[email protected]>
1 parent 98c3882 commit 595fe99

File tree

2 files changed

+15
-4
lines changed

2 files changed

+15
-4
lines changed

vllm_ascend/platform.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -260,8 +260,11 @@ def check_and_update_config(cls, vllm_config: VllmConfig) -> None:
260260
f"{vllm_config.parallel_config.tensor_parallel_size}")
261261
if len(sp_aclgraph_sizes) != len(original_sizes):
262262
compilation_config.cudagraph_capture_sizes = sp_aclgraph_sizes
263-
vllm_config.compilation_config.init_with_cudagraph_sizes(
264-
sp_aclgraph_sizes)
263+
if vllm_version_is("0.11.0"):
264+
compilation_config.init_with_cudagraph_sizes(
265+
sp_aclgraph_sizes)
266+
else:
267+
vllm_config.compilation_config.post_init_cudagraph_sizes()
265268

266269
# TODO: Full graph is fully supported later, and the default value will be set to full graph.
267270
if compilation_config.cudagraph_mode == CUDAGraphMode.FULL_AND_PIECEWISE:

vllm_ascend/utils.py

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -402,7 +402,11 @@ def update_aclgraph_sizes(vllm_config: VllmConfig) -> None:
402402
indices[0], indices[-1] = 0, len(original_sizes) - 1
403403

404404
sampled_sizes = [original_sizes[i] for i in indices]
405-
compilation_config.init_with_cudagraph_sizes(sampled_sizes)
405+
if vllm_version_is("0.11.0"):
406+
compilation_config.init_with_cudagraph_sizes(sampled_sizes)
407+
else:
408+
compilation_config.cudagraph_capture_sizes = sampled_sizes
409+
compilation_config.post_init_cudagraph_sizes()
406410

407411
logger.info(
408412
"Adjusted ACL graph batch sizes for %s model (layers: %d): %d → %d sizes",
@@ -433,7 +437,11 @@ def update_aclgraph_sizes(vllm_config: VllmConfig) -> None:
433437
if original_sizes[0] < (num_speculative_tokens + 1) * max_num_seqs:
434438
enlarged_sizes = [(num_speculative_tokens + 1) * size
435439
for size in original_sizes]
436-
compilation_config.init_with_cudagraph_sizes(enlarged_sizes)
440+
if vllm_version_is("0.11.0"):
441+
compilation_config.init_with_cudagraph_sizes(enlarged_sizes)
442+
else:
443+
compilation_config.cudagraph_capture_sizes = enlarged_sizes
444+
compilation_config.post_init_cudagraph_sizes()
437445
logger.info(
438446
"Adjusted ACL graphs: %s → %s for speculative decoding",
439447
original_sizes, enlarged_sizes)

0 commit comments

Comments
 (0)