File tree Expand file tree Collapse file tree 2 files changed +3
-3
lines changed
Expand file tree Collapse file tree 2 files changed +3
-3
lines changed Original file line number Diff line number Diff line change 88 is_v1_kv_transfer_group )
99from vllm .forward_context import ForwardContext , get_forward_context
1010
11- # We find that _npu_paged_attention still performes better than
11+ # We find that _npu_paged_attention still performs better than
1212# npu_fused_infer_attention_score in some cases. We allow to execute
1313# _npu_paged_attention in this cases. This should be removed once
14- # npu_fused_infer_attention_score performes better on all scenarios.
14+ # npu_fused_infer_attention_score performs better on all scenarios.
1515PAGED_ATTENTION_LIST = [1 , 2 , 3 , 4 ]
1616
1717
Original file line number Diff line number Diff line change @@ -509,7 +509,7 @@ def set_graph_params(aclgraph_capture_sizes: set[int]):
509509 )
510510
511511
512- def update_graph_params_workspaces (num_tokens : int , workspace : int ):
512+ def update_graph_params_workspaces (num_tokens : int , workspace : torch . Tensor ):
513513 global _graph_params
514514 if _graph_params is not None :
515515 _graph_params .workspaces [num_tokens ] = weak_ref_tensors (workspace )
You can’t perform that action at this time.
0 commit comments