We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent f506fb9 commit 98841f5Copy full SHA for 98841f5
vllm_ascend/attention/attention_v1.py
@@ -327,7 +327,7 @@ def build(
327
num_computed_tokens_cpu = (seq_lens - query_lens)
328
329
if attn_state == AscendAttentionState.DecodeOnly and \
330
- common_attn_metadata.num_input_tokens > num_actual_tokens:
+ common_attn_metadata.num_input_tokens > num_actual_tokens:
331
padded_num_tokens = common_attn_metadata.num_input_tokens - num_actual_tokens
332
seq_lens = torch.cat([
333
seq_lens,
0 commit comments