Skip to content

Commit 6fdff86

Browse files
author
wangxiaoxin-sherie
committed
xx
1 parent 6b5ca13 commit 6fdff86

File tree

1 file changed

+5
-2
lines changed

1 file changed

+5
-2
lines changed

vllm_ascend/worker/model_runner_v1.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2271,8 +2271,10 @@ def _build_dummy_attn_metadata(
22712271
self.seq_lens_np[:num_reqs] = seq_lens
22722272
self.seq_lens_np[num_reqs:] = 0
22732273

2274-
self.query_start_loc[:num_reqs + 1] = torch.arange(num_reqs + 1)
2275-
self.query_start_loc_cpu[:num_reqs + 1] = torch.arange(num_reqs + 1)
2274+
cu_num_tokens, arange = self._get_cumsum_and_arange(num_scheduled_tokens)
2275+
2276+
self.query_start_loc[1:num_reqs + 1] = torch.Tensor(cu_num_tokens)
2277+
self.query_start_loc_cpu[1:num_reqs + 1] = torch.Tensor(cu_num_tokens)
22762278

22772279
num_computed_tokens_cpu = (
22782280
self.input_batch.num_computed_tokens_cpu_tensor[:num_reqs])
@@ -2427,6 +2429,7 @@ def _dummy_run(
24272429
max_query_len=max_query_len,
24282430
aclgraph_runtime_mode=aclgraph_runtime_mode,
24292431
force_attention=force_attention,
2432+
num_scheduled_tokens=num_scheduled_tokens,
24302433
)
24312434

24322435
if not self.in_profile_run and self.dynamic_eplb:

0 commit comments

Comments
 (0)