@@ -45,7 +45,7 @@ def __init__(self, vllm_config: VllmConfig, role: KVConnectorRole):
4545 )
4646
4747 assert self .connector_worker is not None
48- if vllm_config .parallel_config .rank == 0 and self .kv_role == "kv_producer " :
48+ if vllm_config .parallel_config .rank == 0 and self .kv_role != "kv_consumer " :
4949 self .lookup_server = MooncakeLookupServer (
5050 self .connector_worker , vllm_config , self .use_layerwise )
5151
@@ -163,7 +163,7 @@ def __init__(self, vllm_config: "VllmConfig", use_layerwise):
163163 self .use_layerwise = use_layerwise
164164 self .kv_role = vllm_config .kv_transfer_config .kv_role
165165 self .client = MooncakeLookupClient (
166- vllm_config ) if self .kv_role == "kv_producer " else None
166+ vllm_config ) if self .kv_role != "kv_consumer " else None
167167 self .consumer_is_to_load = vllm_config .kv_transfer_config .kv_connector_extra_config .get (
168168 "consumer_is_to_load" , False )
169169 self .load_async = vllm_config .kv_transfer_config .kv_connector_extra_config .get (
@@ -208,7 +208,7 @@ def get_num_new_matched_tokens(
208208 else :
209209 token_ids = torch .tensor (request .prompt_token_ids )
210210
211- num_external_hit_tokens = self .client .lookup (token_ids )
211+ num_external_hit_tokens = self .client .lookup (token_ids ) # type: ignore[union-attr]
212212
213213 if num_external_hit_tokens == request .num_tokens :
214214 num_external_hit_tokens -= 1
0 commit comments