diff --git a/vllm_ascend/distributed/mooncake/mooncake_store_connector_v1.py b/vllm_ascend/distributed/mooncake/mooncake_store_connector_v1.py index f55dd03bbe5..6c7299a6905 100644 --- a/vllm_ascend/distributed/mooncake/mooncake_store_connector_v1.py +++ b/vllm_ascend/distributed/mooncake/mooncake_store_connector_v1.py @@ -45,7 +45,7 @@ def __init__(self, vllm_config: VllmConfig, role: KVConnectorRole): ) assert self.connector_worker is not None - if vllm_config.parallel_config.rank == 0: + if vllm_config.parallel_config.rank == 0 and self.kv_role != "kv_consumer": self.lookup_server = MooncakeLookupServer( self.connector_worker, vllm_config, self.use_layerwise) @@ -160,9 +160,10 @@ def get_zmq_rpc_path_mooncake( class MooncakeStoreConnectorV1Scheduler: def __init__(self, vllm_config: "VllmConfig", use_layerwise): - self.client = MooncakeLookupClient(vllm_config) self.use_layerwise = use_layerwise self.kv_role = vllm_config.kv_transfer_config.kv_role + self.client = MooncakeLookupClient( + vllm_config) if self.kv_role != "kv_consumer" else None self.consumer_is_to_load = vllm_config.kv_transfer_config.kv_connector_extra_config.get( "consumer_is_to_load", False) self.load_async = vllm_config.kv_transfer_config.kv_connector_extra_config.get( @@ -207,7 +208,8 @@ def get_num_new_matched_tokens( else: token_ids = torch.tensor(request.prompt_token_ids) - num_external_hit_tokens = self.client.lookup(token_ids) + num_external_hit_tokens = self.client.lookup( # type: ignore[union-attr] + token_ids) if num_external_hit_tokens == request.num_tokens: num_external_hit_tokens -= 1