Skip to content

Commit 59b8c6a

Browse files
committed
1
Signed-off-by: MengqingCao <[email protected]>
1 parent 0e7ff1b commit 59b8c6a

File tree

1 file changed

+4
-2
lines changed

1 file changed

+4
-2
lines changed

vllm_ascend/worker/model_runner_v1.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2774,6 +2774,7 @@ def _allocate_kv_cache_tensors(
27742774
# TODO: REFACTOR ME to sharing hybrid cache
27752775
for idx in range(len(kv_cache_tensor.shared_by)):
27762776
layer_name = kv_cache_tensor.shared_by[idx]
2777+
print(30*"-", f"layer_name: {layer_name}")
27772778
if "linear_attn" in layer_name:
27782779
# for mamba linear attention
27792780
if self.vllm_config.kv_transfer_config is None:
@@ -2788,7 +2789,8 @@ def _allocate_kv_cache_tensors(
27882789
tensor = self._align_memory(
27892790
tensor, alignment)[:kv_cache_tensor.size]
27902791
kv_cache_raw_tensors[layer_name] = tensor
2791-
elif "attn" in layer_name:
2792+
elif "attn" in layer_name and layer_name not in kv_cache_raw_tensors.keys():
2793+
print(30*"/", f"layer_name: {layer_name}")
27922794
# NOTE: We need to init k cache tensor (nope cache tensor in mla) and
27932795
# v cache tensor (rope cache tensor in mla) separately to support llmdatadist,
27942796
# as it only support the 0-dim of kv_cache is `num_blocks`.
@@ -2862,14 +2864,14 @@ def _allocate_kv_cache_tensors(
28622864
and "linear_attn" not in layer_name_inner):
28632865
kv_cache_raw_tensors[layer_name_inner] = (k_tensor, v_tensor) if \
28642866
not self.use_sparse else (k_tensor, v_tensor, k_cache_tensor)
2865-
break
28662867

28672868
layer_names = set()
28682869
for group in kv_cache_config.kv_cache_groups:
28692870
for layer_name in group.layer_names:
28702871
if layer_name in self.runner_only_attn_layers:
28712872
continue
28722873
layer_names.add(layer_name)
2874+
print(30*"=", f"layer_name: {layer_name}: kv_cache_raw_tensors[layer_name]: {id(kv_cache_raw_tensors[layer_name])}")
28732875
assert layer_names == set(kv_cache_raw_tensors.keys(
28742876
)), "Some layers are not correctly initialized"
28752877

0 commit comments

Comments
 (0)