@@ -2774,6 +2774,7 @@ def _allocate_kv_cache_tensors(
27742774 # TODO: REFACTOR ME to sharing hybrid cache
27752775 for idx in range (len (kv_cache_tensor .shared_by )):
27762776 layer_name = kv_cache_tensor .shared_by [idx ]
2777+ print (30 * "-" , f"layer_name: { layer_name } " )
27772778 if "linear_attn" in layer_name :
27782779 # for mamba linear attention
27792780 if self .vllm_config .kv_transfer_config is None :
@@ -2788,7 +2789,8 @@ def _allocate_kv_cache_tensors(
27882789 tensor = self ._align_memory (
27892790 tensor , alignment )[:kv_cache_tensor .size ]
27902791 kv_cache_raw_tensors [layer_name ] = tensor
2791- elif "attn" in layer_name :
2792+ elif "attn" in layer_name and layer_name not in kv_cache_raw_tensors .keys ():
2793+ print (30 * "/" , f"layer_name: { layer_name } " )
27922794 # NOTE: We need to init k cache tensor (nope cache tensor in mla) and
27932795 # v cache tensor (rope cache tensor in mla) separately to support llmdatadist,
27942796 # as it only support the 0-dim of kv_cache is `num_blocks`.
@@ -2862,14 +2864,14 @@ def _allocate_kv_cache_tensors(
28622864 and "linear_attn" not in layer_name_inner ):
28632865 kv_cache_raw_tensors [layer_name_inner ] = (k_tensor , v_tensor ) if \
28642866 not self .use_sparse else (k_tensor , v_tensor , k_cache_tensor )
2865- break
28662867
28672868 layer_names = set ()
28682869 for group in kv_cache_config .kv_cache_groups :
28692870 for layer_name in group .layer_names :
28702871 if layer_name in self .runner_only_attn_layers :
28712872 continue
28722873 layer_names .add (layer_name )
2874+ print (30 * "=" , f"layer_name: { layer_name } : kv_cache_raw_tensors[layer_name]: { id (kv_cache_raw_tensors [layer_name ])} " )
28732875 assert layer_names == set (kv_cache_raw_tensors .keys (
28742876 )), "Some layers are not correctly initialized"
28752877
0 commit comments