@@ -57,7 +57,7 @@ for i in {0..15}; do hccn_tool -i $i -ping -g address x.x.x.x;done
5757Mooncake is the serving platform for Kimi, a leading LLM service provided by Moonshot AI. First, we need to obtain the Mooncake project. Refer to the following command:
5858
5959``` shell
60- git clone -b pooling_async_memecpy_v1 https://github.com/AscendTransport /Mooncake
60+ git clone https://github.com/kvcache-ai /Mooncake.git
6161```
6262
6363Update and install Python.
@@ -67,22 +67,25 @@ apt-get update
6767apt-get install python3
6868```
6969
70- Install the relevant dependencies. The installation of Go is not required.
70+ Modify Mooncake compilation option
7171
7272``` shell
7373cd Mooncake
74- bash dependencies.sh -y
74+ vi mooncake-common/common.cmake
75+ # find this row and set USE_ASCEND_DIRECT ON.
76+ option(USE_ASCEND_DIRECT " option for using ascend npu with adxl engine" ON)
7577```
7678
7779Install mpi
7880
7981``` shell
80- apt purge mpich libmpich-dev -y
81- apt purge openmpi-bin -y
82- apt purge openmpi-bin libopenmpi-dev -y
83- apt install mpich libmpich-dev -y
84- export CPATH=/usr/lib/aarch64-linux-gnu/mpich/include/:$CPATH
85- export CPATH=/usr/lib/aarch64-linux-gnu/openmpi/lib:$CPATH
82+ apt-get install mpich libmpich-dev -y
83+ ```
84+
85+ Install the relevant dependencies. The installation of Go is not required.
86+
87+ ``` shell
88+ bash dependencies.sh -y
8689```
8790
8891Compile and install
@@ -93,8 +96,6 @@ cd build
9396cmake ..
9497make -j
9598make install
96- cp mooncake-transfer-engine/src/transport/ascend_transport/hccl_transport/ascend_transport_c/libascend_transport_mem.so /usr/local/Ascend/ascend-toolkit/latest/python/site-packages/
97- cp mooncake-transfer-engine/src/libtransfer_engine.so /usr/local/Ascend/ascend-toolkit/latest/python/site-packages/
9899```
99100
100101## Prefiller/Decoder Deployment
@@ -119,10 +120,6 @@ export VLLM_USE_V1=1
119120export HCCL_BUFFSIZE=1024
120121export OMP_PROC_BIND=false
121122export OMP_NUM_THREADS=10
122- export ASCEND_AGGREGATE_ENABLE=1 # enable aggregated transmission
123- export ASCEND_TRANSPORT_PRINT=0 # print ascend transport logs
124- export ACL_OP_INIT_MODE=1 # acl op initialization mode to prevent device id acquisition failure
125- export ASCEND_A3_ENABLE=1 # enable hccs transmission for A3; set to 0 for A2
126123export LD_LIBRARY_PATH=/usr/local/Ascend/ascend-toolkit/latest/python/site-packages:$LD_LIBRARY_PATH
127124
128125vllm serve /model/Qwen3-235B-A22B-W8A8 \
@@ -178,10 +175,6 @@ export VLLM_USE_V1=1
178175export HCCL_BUFFSIZE=1024
179176export OMP_PROC_BIND=false
180177export OMP_NUM_THREADS=10
181- export ASCEND_AGGREGATE_ENABLE=1
182- export ASCEND_TRANSPORT_PRINT=0
183- export ACL_OP_INIT_MODE=1
184- export ASCEND_A3_ENABLE=1
185178export LD_LIBRARY_PATH=/usr/local/Ascend/ascend-toolkit/latest/python/site-packages:$LD_LIBRARY_PATH
186179
187180vllm serve /model/Qwen3-235B-A22B-W8A8 \
@@ -237,10 +230,6 @@ export VLLM_USE_V1=1
237230export HCCL_BUFFSIZE=2048
238231export OMP_PROC_BIND=false
239232export OMP_NUM_THREADS=10
240- export ASCEND_AGGREGATE_ENABLE=1
241- export ASCEND_TRANSPORT_PRINT=0
242- export ACL_OP_INIT_MODE=1
243- export ASCEND_A3_ENABLE=1
244233export LD_LIBRARY_PATH=/usr/local/Ascend/ascend-toolkit/latest/python/site-packages:$LD_LIBRARY_PATH
245234
246235vllm serve /model/Qwen3-235B-A22B-W8A8 \
@@ -298,10 +287,6 @@ export VLLM_USE_V1=1
298287export HCCL_BUFFSIZE=2048
299288export OMP_PROC_BIND=false
300289export OMP_NUM_THREADS=10
301- export ASCEND_AGGREGATE_ENABLE=1
302- export ASCEND_TRANSPORT_PRINT=0
303- export ACL_OP_INIT_MODE=1
304- export ASCEND_A3_ENABLE=1
305290export LD_LIBRARY_PATH=/usr/local/Ascend/ascend-toolkit/latest/python/site-packages:$LD_LIBRARY_PATH
306291
307292vllm serve /model/Qwen3-235B-A22B-W8A8 \
@@ -366,10 +351,6 @@ export VLLM_USE_V1=1
366351export HCCL_BUFFSIZE=1024
367352export OMP_PROC_BIND=false
368353export OMP_NUM_THREADS=10
369- export ASCEND_AGGREGATE_ENABLE=1
370- export ASCEND_TRANSPORT_PRINT=0
371- export ACL_OP_INIT_MODE=1
372- export ASCEND_A3_ENABLE=1
373354export LD_LIBRARY_PATH=/usr/local/Ascend/ascend-toolkit/latest/python/site-packages:$LD_LIBRARY_PATH
374355
375356vllm serve /model/Qwen3-235B-A22B-W8A8 \
@@ -425,10 +406,6 @@ export VLLM_USE_V1=1
425406export HCCL_BUFFSIZE=1024
426407export OMP_PROC_BIND=false
427408export OMP_NUM_THREADS=10
428- export ASCEND_AGGREGATE_ENABLE=1
429- export ASCEND_TRANSPORT_PRINT=0
430- export ACL_OP_INIT_MODE=1
431- export ASCEND_A3_ENABLE=1
432409export LD_LIBRARY_PATH=/usr/local/Ascend/ascend-toolkit/latest/python/site-packages:$LD_LIBRARY_PATH
433410
434411vllm serve /model/Qwen3-235B-A22B-W8A8 \
@@ -484,10 +461,6 @@ export VLLM_USE_V1=1
484461export HCCL_BUFFSIZE=2048
485462export OMP_PROC_BIND=false
486463export OMP_NUM_THREADS=10
487- export ASCEND_AGGREGATE_ENABLE=1
488- export ASCEND_TRANSPORT_PRINT=0
489- export ACL_OP_INIT_MODE=1
490- export ASCEND_A3_ENABLE=1
491464export LD_LIBRARY_PATH=/usr/local/Ascend/ascend-toolkit/latest/python/site-packages:$LD_LIBRARY_PATH
492465
493466vllm serve /model/Qwen3-235B-A22B-W8A8 \
@@ -545,10 +518,6 @@ export VLLM_USE_V1=1
545518export HCCL_BUFFSIZE=2048
546519export OMP_PROC_BIND=false
547520export OMP_NUM_THREADS=10
548- export ASCEND_AGGREGATE_ENABLE=1
549- export ASCEND_TRANSPORT_PRINT=0
550- export ACL_OP_INIT_MODE=1
551- export ASCEND_A3_ENABLE=1
552521export LD_LIBRARY_PATH=/usr/local/Ascend/ascend-toolkit/latest/python/site-packages:$LD_LIBRARY_PATH
553522
554523vllm serve /model/Qwen3-235B-A22B-W8A8 \
0 commit comments