Skip to content

Commit 2839f2a

Browse files
author
wangxiaoxin-sherie
committed
xx
1 parent bfbd8cf commit 2839f2a

File tree

1 file changed

+1
-4
lines changed

1 file changed

+1
-4
lines changed

vllm_ascend/attention/attention_splitfusedpa.py

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -147,9 +147,6 @@ class AscendMetadata:
147147
seq_lens_list: List[int] = None # type: ignore
148148
actual_seq_lengths_q: List[int] = None # type: ignore
149149

150-
query_start_loc: torch.Tensor = None
151-
seq_lens_list: List[int] = None
152-
153150
query_start_loc_list: List[int] = None
154151
query_lens: torch.Tensor = None
155152
# Maximum query length in the batch (None for decoding).
@@ -336,7 +333,7 @@ def full_graph_attention(self,
336333
forward_context: ForwardContext = get_forward_context()
337334
if forward_context.capturing:
338335
graph_params = get_graph_params()
339-
query_start_loc = attn_metadata.query_start_loc_list
336+
query_start_loc = attn_metadata.actual_seq_lengths_q
340337
seq_lens = attn_metadata.seq_lens_list
341338
num_tokens = query_start_loc[-1]
342339
query = query[:num_tokens]

0 commit comments

Comments
 (0)