File tree Expand file tree Collapse file tree 2 files changed +15
-4
lines changed
Expand file tree Collapse file tree 2 files changed +15
-4
lines changed Original file line number Diff line number Diff line change @@ -260,8 +260,11 @@ def check_and_update_config(cls, vllm_config: VllmConfig) -> None:
260260 f"{ vllm_config .parallel_config .tensor_parallel_size } " )
261261 if len (sp_aclgraph_sizes ) != len (original_sizes ):
262262 compilation_config .cudagraph_capture_sizes = sp_aclgraph_sizes
263- vllm_config .compilation_config .init_with_cudagraph_sizes (
264- sp_aclgraph_sizes )
263+ if vllm_version_is ("0.11.0" ):
264+ compilation_config .init_with_cudagraph_sizes (
265+ sp_aclgraph_sizes )
266+ else :
267+ vllm_config .compilation_config .post_init_cudagraph_sizes ()
265268
266269 # TODO: Full graph is fully supported later, and the default value will be set to full graph.
267270 if compilation_config .cudagraph_mode == CUDAGraphMode .FULL_AND_PIECEWISE :
Original file line number Diff line number Diff line change @@ -402,7 +402,11 @@ def update_aclgraph_sizes(vllm_config: VllmConfig) -> None:
402402 indices [0 ], indices [- 1 ] = 0 , len (original_sizes ) - 1
403403
404404 sampled_sizes = [original_sizes [i ] for i in indices ]
405- compilation_config .init_with_cudagraph_sizes (sampled_sizes )
405+ if vllm_version_is ("0.11.0" ):
406+ compilation_config .init_with_cudagraph_sizes (sampled_sizes )
407+ else :
408+ compilation_config .cudagraph_capture_sizes = sampled_sizes
409+ compilation_config .post_init_cudagraph_sizes ()
406410
407411 logger .info (
408412 "Adjusted ACL graph batch sizes for %s model (layers: %d): %d → %d sizes" ,
@@ -433,7 +437,11 @@ def update_aclgraph_sizes(vllm_config: VllmConfig) -> None:
433437 if original_sizes [0 ] < (num_speculative_tokens + 1 ) * max_num_seqs :
434438 enlarged_sizes = [(num_speculative_tokens + 1 ) * size
435439 for size in original_sizes ]
436- compilation_config .init_with_cudagraph_sizes (enlarged_sizes )
440+ if vllm_version_is ("0.11.0" ):
441+ compilation_config .init_with_cudagraph_sizes (enlarged_sizes )
442+ else :
443+ compilation_config .cudagraph_capture_sizes = enlarged_sizes
444+ compilation_config .post_init_cudagraph_sizes ()
437445 logger .info (
438446 "Adjusted ACL graphs: %s → %s for speculative decoding" ,
439447 original_sizes , enlarged_sizes )
You can’t perform that action at this time.
0 commit comments