Skip to content

Commit 688b133

Browse files
[P/D] check kv extra config and del hccl backend (#4547)
### What this PR does / why we need it? check kv extra config & del hccl backend - vLLM version: v0.12.0 - vLLM main: vllm-project/vllm@ad32e3e --------- Signed-off-by: liziyu <[email protected]> Co-authored-by: wangxiyuan <[email protected]>
1 parent b91a5f0 commit 688b133

File tree

8 files changed

+133
-211
lines changed

8 files changed

+133
-211
lines changed

docs/source/tutorials/DeepSeek-V3.1.md

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -430,7 +430,6 @@ vllm serve /weights/DeepSeek-V3.1_w8a8mix_mtp \
430430
"engine_id": "0",
431431
"kv_connector_module_path": "vllm_ascend.distributed.mooncake_connector",
432432
"kv_connector_extra_config": {
433-
"use_ascend_direct": true,
434433
"prefill": {
435434
"dp_size": 2,
436435
"tp_size": 8
@@ -510,7 +509,6 @@ vllm serve /weights/DeepSeek-V3.1_w8a8mix_mtp \
510509
"engine_id": "1",
511510
"kv_connector_module_path": "vllm_ascend.distributed.mooncake_connector",
512511
"kv_connector_extra_config": {
513-
"use_ascend_direct": true,
514512
"prefill": {
515513
"dp_size": 2,
516514
"tp_size": 8
@@ -590,7 +588,6 @@ vllm serve /weights/DeepSeek-V3.1_w8a8mix_mtp \
590588
"engine_id": "2",
591589
"kv_connector_module_path": "vllm_ascend.distributed.mooncake_connector",
592590
"kv_connector_extra_config": {
593-
"use_ascend_direct": true,
594591
"prefill": {
595592
"dp_size": 2,
596593
"tp_size": 8
@@ -670,7 +667,6 @@ vllm serve /weights/DeepSeek-V3.1_w8a8mix_mtp \
670667
"engine_id": "3",
671668
"kv_connector_module_path": "vllm_ascend.distributed.mooncake_connector",
672669
"kv_connector_extra_config": {
673-
"use_ascend_direct": true,
674670
"prefill": {
675671
"dp_size": 2,
676672
"tp_size": 8

docs/source/user_guide/feature_guide/kv_pool.md

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,6 @@ The environment variable **MOONCAKE_CONFIG_PATH** is configured to the full path
4141
"metadata_server": "P2PHANDSHAKE",
4242
"protocol": "ascend",
4343
"device_name": "",
44-
"use_ascend_direct": true,
4544
"alloc_in_same_node": true,
4645
"master_server_address": "xx.xx.xx.xx:50088",
4746
"global_segment_size": "1GB" (1024MB/1048576KB/1073741824B/1073741824)
@@ -52,7 +51,6 @@ The environment variable **MOONCAKE_CONFIG_PATH** is configured to the full path
5251
**metadata_server**: Configured as **P2PHANDSHAKE**.
5352
**protocol:** Configured for Ascend to use Mooncake's HCCL communication.
5453
**device_name**: ""
55-
**use_ascend_direct**: Indicator for using ADXL engine.
5654
**alloc_in_same_node**: Indicator for preferring local buffer allocation strategy.
5755
**master_server_address**: Configured with the IP and port of the master service.
5856
**global_segment_size**: Expands the kvcache size registered by the PD node to the master.
@@ -133,7 +131,7 @@ python3 -m vllm.entrypoints.openai.api_server \
133131
}
134132
]
135133
}
136-
}' > p.log 2>&1
134+
}'
137135
```
138136

139137
`decode` Node:
@@ -177,7 +175,6 @@ python3 -m vllm.entrypoints.openai.api_server \
177175
"kv_role": "kv_consumer",
178176
"kv_port": "20002",
179177
"kv_connector_extra_config": {
180-
"use_ascend_direct": true,
181178
"prefill": {
182179
"dp_size": 1,
183180
"tp_size": 1
@@ -196,7 +193,7 @@ python3 -m vllm.entrypoints.openai.api_server \
196193
}
197194
]
198195
}
199-
}' > d.log 2>&1
196+
}'
200197
```
201198

202199
#### 2、Start proxy_server.

tests/ut/kv_connector/test_mooncake_connector.py

Lines changed: 40 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -639,10 +639,15 @@ class TestMooncakeConnectorSchedulerMatchedTokens(unittest.TestCase):
639639
def setUp(self):
640640
config = MockVllmConfig()
641641
self.p1 = patch(
642-
'vllm_ascend.distributed.mooncake_layerwise_connector.get_ascend_config',
643-
new=MagicMock(return_value=None))
642+
'vllm_ascend.distributed.mooncake_connector.init_ascend_config',
643+
new=MagicMock())
644+
self.p2 = patch(
645+
'vllm_ascend.distributed.mooncake_connector.get_ascend_config',
646+
new=MagicMock(return_value=MagicMock()))
644647
self.p1.start()
648+
self.p2.start()
645649
self.addCleanup(self.p1.stop)
650+
self.addCleanup(self.p2.stop)
646651
self.scheduler = MooncakeConnectorScheduler(config, "test_engine")
647652

648653
def test_get_num_new_matched_tokens(self):
@@ -716,7 +721,9 @@ def test_scheduler_role(self):
716721
config = MockVllmConfig()
717722
with patch(
718723
'vllm_ascend.distributed.mooncake_connector.init_ascend_config'
719-
):
724+
), patch(
725+
'vllm_ascend.distributed.mooncake_connector.get_ascend_config',
726+
return_value=MagicMock()):
720727
connector = MooncakeConnector(config, KVConnectorRole.SCHEDULER)
721728
self.assertIsNotNone(connector.connector_scheduler)
722729
self.assertIsNone(connector.connector_worker)
@@ -726,7 +733,9 @@ def test_scheduler_methods(self, mock_method):
726733
config = MockVllmConfig()
727734
with patch(
728735
'vllm_ascend.distributed.mooncake_connector.init_ascend_config'
729-
):
736+
), patch(
737+
'vllm_ascend.distributed.mooncake_connector.get_ascend_config',
738+
return_value=MagicMock()):
730739
connector = MooncakeConnector(config, KVConnectorRole.SCHEDULER)
731740
request = MockRequest("req1")
732741
connector.get_num_new_matched_tokens(request, 0)
@@ -756,7 +765,9 @@ def setUp(self):
756765
def test_scheduler_initialization(self):
757766
with patch(
758767
'vllm_ascend.distributed.mooncake_connector.init_ascend_config'
759-
):
768+
), patch(
769+
'vllm_ascend.distributed.mooncake_connector.get_ascend_config',
770+
return_value=MagicMock()):
760771
connector = MooncakeConnector(self.config,
761772
KVConnectorRole.SCHEDULER)
762773
self.assertIsNotNone(connector.connector_scheduler)
@@ -766,7 +777,9 @@ def test_scheduler_initialization(self):
766777
def test_get_num_new_matched_tokens(self, mock_method):
767778
with patch(
768779
'vllm_ascend.distributed.mooncake_connector.init_ascend_config'
769-
):
780+
), patch(
781+
'vllm_ascend.distributed.mooncake_connector.get_ascend_config',
782+
return_value=MagicMock()):
770783
connector = MooncakeConnector(self.config,
771784
KVConnectorRole.SCHEDULER)
772785
request = MockRequest("req1")
@@ -777,7 +790,9 @@ def test_get_num_new_matched_tokens(self, mock_method):
777790
def test_update_state_after_alloc(self, mock_method):
778791
with patch(
779792
'vllm_ascend.distributed.mooncake_connector.init_ascend_config'
780-
):
793+
), patch(
794+
'vllm_ascend.distributed.mooncake_connector.get_ascend_config',
795+
return_value=MagicMock()):
781796
connector = MooncakeConnector(self.config,
782797
KVConnectorRole.SCHEDULER)
783798
request = MockRequest("req1")
@@ -789,7 +804,9 @@ def test_update_state_after_alloc(self, mock_method):
789804
def test_build_connector_meta(self, mock_method):
790805
with patch(
791806
'vllm_ascend.distributed.mooncake_connector.init_ascend_config'
792-
):
807+
), patch(
808+
'vllm_ascend.distributed.mooncake_connector.get_ascend_config',
809+
return_value=MagicMock()):
793810
connector = MooncakeConnector(self.config,
794811
KVConnectorRole.SCHEDULER)
795812
scheduler_output = MockSchedulerOutput()
@@ -800,7 +817,9 @@ def test_build_connector_meta(self, mock_method):
800817
def test_request_finished(self, mock_method):
801818
with patch(
802819
'vllm_ascend.distributed.mooncake_connector.init_ascend_config'
803-
):
820+
), patch(
821+
'vllm_ascend.distributed.mooncake_connector.get_ascend_config',
822+
return_value=MagicMock()):
804823
connector = MooncakeConnector(self.config,
805824
KVConnectorRole.SCHEDULER)
806825
request = MockRequest("req1")
@@ -814,7 +833,9 @@ def setUp(self):
814833
self.config = MockVllmConfig()
815834
with patch(
816835
'vllm_ascend.distributed.mooncake_connector.init_ascend_config'
817-
):
836+
), patch(
837+
'vllm_ascend.distributed.mooncake_connector.get_ascend_config',
838+
return_value=MagicMock()):
818839
self.scheduler = MooncakeConnectorScheduler(
819840
self.config, "test_engine")
820841

@@ -1037,9 +1058,6 @@ def setUp(self):
10371058
self.mock_pcp_group.device_group = MagicMock()
10381059

10391060
self.patches = [
1040-
patch(
1041-
'vllm_ascend.distributed.mooncake_layerwise_connector.envs_ascend.PHYSICAL_DEVICES',
1042-
'10,11'),
10431061
patch('torch.Tensor.size', return_value=(10, 16, 8, 16)),
10441062
patch('torch.Tensor.element_size', return_value=4),
10451063
patch('torch.Tensor.data_ptr', return_value=0x1000),
@@ -1056,8 +1074,11 @@ def setUp(self):
10561074
'vllm_ascend.distributed.mooncake_connector.string_to_int64_hash',
10571075
mock_string_to_int64_hash),
10581076
patch(
1059-
'vllm_ascend.distributed.mooncake_transfer_engine.TransferEngine',
1077+
'vllm_ascend.distributed.mooncake_connector.global_te.get_transfer_engine',
10601078
return_value=self.mock_transfer_engine),
1079+
patch(
1080+
'vllm_ascend.distributed.mooncake_connector.global_te.register_buffer',
1081+
return_value=None),
10611082
patch(
10621083
'vllm_ascend.distributed.mooncake_connector.KVCacheSendingThread',
10631084
MagicMock()),
@@ -1073,10 +1094,13 @@ def setUp(self):
10731094
patch('vllm.distributed.parallel_state._DCP',
10741095
return_value=self.mock_dcp),
10751096
patch(
1076-
'vllm.distributed.get_decode_context_model_parallel_world_size',
1097+
'vllm_ascend.distributed.mooncake_connector.get_decode_context_model_parallel_world_size',
10771098
return_value=1),
10781099
patch('vllm_ascend.distributed.mooncake_connector.get_pcp_group',
10791100
return_value=self.mock_pcp_group),
1101+
patch(
1102+
'vllm_ascend.distributed.mooncake_connector.get_ascend_config',
1103+
return_value=MagicMock()),
10801104
]
10811105

10821106
for p in self.patches:
@@ -1090,46 +1114,6 @@ def tearDown(self):
10901114
for p in self.patches:
10911115
p.stop() # type: ignore
10921116

1093-
def test_worker_use_ascend_direct(self):
1094-
test_case = [True, False]
1095-
1096-
for use_ascend_direct in test_case:
1097-
with self.subTest(use_ascend_direct=use_ascend_direct):
1098-
config = MagicMock()
1099-
config.kv_transfer_config = MagicMock()
1100-
config.kv_transfer_config.get_from_extra_config.side_effect = (
1101-
lambda k, d: {
1102-
"prefill": {
1103-
"tp_size": 2,
1104-
"dp_size": 1
1105-
},
1106-
"decode": {
1107-
"tp_size": 2,
1108-
"dp_size": 1
1109-
},
1110-
"use_ascend_direct": use_ascend_direct,
1111-
}.get(k, d))
1112-
1113-
config.parallel_config = MagicMock()
1114-
config.parallel_config.tensor_parallel_size = 2
1115-
config.parallel_config.data_parallel_rank = 0
1116-
config.parallel_config.data_parallel_size_local = 1
1117-
config.kv_transfer_config.kv_port = 8000
1118-
config.kv_transfer_config.kv_role = 'worker'
1119-
1120-
with patch(
1121-
"vllm_ascend.distributed.mooncake_connector.get_tensor_model_parallel_rank",
1122-
return_value=0):
1123-
with patch(
1124-
"vllm_ascend.distributed.mooncake_connector.get_tp_group",
1125-
return_value=None):
1126-
with patch(
1127-
"vllm_ascend.distributed.mooncake_connector.get_ip",
1128-
return_value="127.0.0.1"):
1129-
worker = MooncakeConnectorWorker(
1130-
config, self.engine_id)
1131-
self.assertIsNotNone(worker)
1132-
11331117
def test_register_kv_caches_producer(self):
11341118
worker = MooncakeConnectorWorker(self.vllm_config, self.engine_id)
11351119
worker.register_kv_caches(self.kv_caches)
@@ -1160,7 +1144,7 @@ def test_device_id_selection_with_physical_devices(self):
11601144
# Test with physical devices set
11611145
worker = MooncakeConnectorWorker(self.vllm_config, self.engine_id)
11621146
# Default tp_rank is 0, so device_id should be 10
1163-
self.assertEqual(worker.device_id, 10)
1147+
self.assertIsNotNone(worker.engine)
11641148

11651149

11661150
if __name__ == '__main__':

tests/ut/kv_connector/test_mooncake_layerwise_connector.py

Lines changed: 8 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,7 @@ def setUp(self):
5858
6000], # 2 * total_layers
5959
use_mla=True,
6060
block_len=[1024, 2048],
61+
decode_tp_size=1,
6162
first_kv_cache=self.first_kv_cache,
6263
callback_func=MagicMock())
6364

@@ -97,6 +98,7 @@ def test_transfer_pd_gt1_uses_buffers_and_calls_engine(
9798
kv_cache_base_addr=[1111, 2222, 3333, 4444],
9899
use_mla=False,
99100
block_len=[64],
101+
decode_tp_size=1,
100102
first_kv_cache=self.first_kv_cache,
101103
callback_func=MagicMock())
102104

@@ -155,6 +157,7 @@ def test_transfer_skips_when_tp_not_sender(self):
155157
kv_cache_base_addr=[1000, 2000],
156158
use_mla=False,
157159
block_len=[1024],
160+
decode_tp_size=1,
158161
first_kv_cache=self.first_kv_cache,
159162
callback_func=MagicMock())
160163
req_meta = self.req_meta_base
@@ -397,7 +400,6 @@ def __init__(self):
397400
"tp_size": 2,
398401
"dp_size": 1
399402
},
400-
"use_ascend_direct": True,
401403
}.get(k, d)
402404

403405

@@ -806,9 +808,6 @@ def setUp(self):
806808
self.mock_transfer_engine.register_memory.return_value = 0
807809

808810
self.patches = [
809-
patch(
810-
'vllm_ascend.distributed.mooncake_layerwise_connector.envs_ascend.PHYSICAL_DEVICES',
811-
'10,11'),
812811
patch('torch.Tensor.size', return_value=(10, 16, 8, 16)),
813812
patch('torch.Tensor.element_size', return_value=4),
814813
patch('torch.Tensor.data_ptr', return_value=0x1000),
@@ -827,8 +826,11 @@ def setUp(self):
827826
'vllm_ascend.distributed.mooncake_layerwise_connector.string_to_int64_hash',
828827
side_effect=lambda s: hash(s)),
829828
patch(
830-
'vllm_ascend.distributed.mooncake_layerwise_connector.TransferEngine',
829+
'vllm_ascend.distributed.mooncake_layerwise_connector.global_te.get_transfer_engine',
831830
return_value=self.mock_transfer_engine),
831+
patch(
832+
'vllm_ascend.distributed.mooncake_layerwise_connector.global_te.register_buffer',
833+
return_value=None),
832834
patch(
833835
'vllm_ascend.distributed.mooncake_layerwise_connector.KVCacheSendingLayerThread',
834836
MagicMock()),
@@ -859,26 +861,6 @@ def tearDown(self):
859861
for p in self.patches:
860862
p.stop() # type: ignore
861863

862-
def test_worker_use_ascend_direct(self):
863-
for use_ascend_direct in (True, False):
864-
with self.subTest(use_ascend_direct=use_ascend_direct):
865-
config = MockVllmConfig()
866-
config.kv_transfer_config.get_from_extra_config.side_effect = (
867-
lambda k, d: {
868-
"prefill": {
869-
"tp_size": 2,
870-
"dp_size": 1
871-
},
872-
"decode": {
873-
"tp_size": 2,
874-
"dp_size": 1
875-
},
876-
"use_ascend_direct": use_ascend_direct,
877-
}.get(k, d))
878-
worker = MooncakeLayerwiseConnectorWorker(
879-
config, self.engine_id)
880-
self.assertIsNotNone(worker)
881-
882864
def test_register_kv_caches_producer(self):
883865

884866
self.vllm_config.kv_transfer_config.is_kv_producer = True
@@ -915,7 +897,7 @@ def test_register_kv_caches_mla_case(self):
915897
def test_device_id_selection_with_physical_devices(self):
916898
worker = MooncakeLayerwiseConnectorWorker(self.vllm_config,
917899
self.engine_id)
918-
self.assertEqual(worker.device_id, 10)
900+
self.assertIsNotNone(worker.engine)
919901

920902

921903
if __name__ == '__main__':

0 commit comments

Comments
 (0)