Skip to content

Commit b8a317c

Browse files
[main][Bugfix] Remove the ZMQ communication setup on the D node (#4926)
In the PD separation scenario, the D node does not need to perform get operations, and therefore does not need to create ZeroMQ (ZMQ) communication. - vLLM version: v0.12.0 - vLLM main: vllm-project/vllm@ad32e3e Signed-off-by: SlightwindSec <[email protected]>
1 parent d54db76 commit b8a317c

File tree

2 files changed

+5
-4
lines changed

2 files changed

+5
-4
lines changed

vllm_ascend/distributed/kvpool/ascend_store_connector.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ def __init__(self,
5555
)
5656

5757
assert self.connector_worker is not None
58-
if vllm_config.parallel_config.rank == 0:
58+
if vllm_config.parallel_config.rank == 0 and self.kv_role != "kv_consumer":
5959
self.lookup_server = LookupKeyServer(self.connector_worker,
6060
vllm_config,
6161
self.use_layerwise)

vllm_ascend/distributed/kvpool/pool_scheduler.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -20,13 +20,14 @@
2020
class KVPoolScheduler:
2121

2222
def __init__(self, vllm_config: "VllmConfig", use_layerwise):
23-
self.client = LookupKeyClient(vllm_config)
2423
self.use_layerwise = use_layerwise
2524
self.kv_role = vllm_config.kv_transfer_config.kv_role
2625
self.consumer_is_to_load = vllm_config.kv_transfer_config.kv_connector_extra_config.get(
2726
"consumer_is_to_load", False)
2827
self.load_async = vllm_config.kv_transfer_config.kv_connector_extra_config.get(
2928
"load_async", False)
29+
self.client = LookupKeyClient(
30+
vllm_config) if self.kv_role != "kv_consumer" else None
3031
# request_id -> (vllm cached tokes, kvpool cached tokens)
3132
self.load_specs: dict[str, LoadSpec] = {}
3233
self.pcp_size = getattr(vllm_config.parallel_config,
@@ -74,8 +75,8 @@ def get_num_new_matched_tokens(
7475
else:
7576
token_len = len(request.prompt_token_ids)
7677

77-
num_external_hit_tokens = self.client.lookup(token_len,
78-
request.block_hashes)
78+
num_external_hit_tokens = self.client.lookup( # type: ignore[union-attr]
79+
token_len, request.block_hashes)
7980

8081
if num_external_hit_tokens == request.num_tokens:
8182
num_external_hit_tokens -= 1

0 commit comments

Comments
 (0)