@@ -84,7 +84,7 @@ export OMP_PROC_BIND=false
8484export OMP_NUM_THREADS=100
8585export VLLM_USE_V1=1
8686export HCCL_BUFFSIZE=200
87- export VLLM_ASCEND_ENALBE_MLAPO =1
87+ export VLLM_ASCEND_ENABLE_MLAPO =1
8888export PYTORCH_NPU_ALLOC_CONF=expandable_segments:True
8989export VLLM_ASCEND_ENABLE_FLASHCOMM1=0
9090export DISABLE_L2_CACHE=1
@@ -98,9 +98,9 @@ vllm serve vllm-ascend/DeepSeek-V3.1_w8a8mix_mtp \
9898--seed 1024 \
9999--served-model-name deepseek_v3 \
100100--enable-expert-parallel \
101- --max-num-seqs 8 \
102- --max-model-len 40000 \
103- --max-num-batched-tokens 2048 \
101+ --max-num-seqs 16 \
102+ --max-model-len 8192 \
103+ --max-num-batched-tokens 4096 \
104104--trust-remote-code \
105105--no-enable-prefix-caching \
106106--gpu-memory-utilization 0.92 \
@@ -144,9 +144,9 @@ export OMP_NUM_THREADS=100
144144export VLLM_USE_V1=1
145145export HCCL_BUFFSIZE=200
146146export PYTORCH_NPU_ALLOC_CONF=expandable_segments:True
147- export VLLM_ASCEND_ENALBE_MLAPO =1
148- export HCCL_INTRA_PCIE_ENALBE =1
149- export HCCL_INTRA_ROCE_ENALBE =0
147+ export VLLM_ASCEND_ENABLE_MLAPO =1
148+ export HCCL_INTRA_PCIE_ENABLE =1
149+ export HCCL_INTRA_ROCE_ENABLE =0
150150
151151vllm serve vllm-ascend/DeepSeek-V3.1_w8a8mix_mtp \
152152--host 0.0.0.0 \
@@ -199,13 +199,14 @@ export OMP_PROC_BIND=false
199199export OMP_NUM_THREADS=100
200200export HCCL_BUFFSIZE=200
201201export PYTORCH_NPU_ALLOC_CONF=expandable_segments:True
202- export VLLM_ASCEND_ENALBE_MLAPO =1
203- export HCCL_INTRA_PCIE_ENALBE =1
204- export HCCL_INTRA_ROCE_ENALBE =0
202+ export VLLM_ASCEND_ENABLE_MLAPO =1
203+ export HCCL_INTRA_PCIE_ENABLE =1
204+ export HCCL_INTRA_ROCE_ENABLE =0
205205
206206vllm serve vllm-ascend/DeepSeek-V3.1_w8a8mix_mtp \
207207--host 0.0.0.0 \
208208--port 8004 \
209+ --headless \
209210--data-parallel-size 4 \
210211--data-parallel-size-local 2 \
211212--data-parallel-start-rank 2 \
@@ -368,7 +369,7 @@ export HCCL_CONNECT_TIMEOUT=120
368369export OMP_PROC_BIND=false
369370export OMP_NUM_THREADS=10
370371export PYTORCH_NPU_ALLOC_CONF=expandable_segments:True
371- export VLLM_ASCEND_ENALBE_MLAPO =1
372+ export VLLM_ASCEND_ENABLE_MLAPO =1
372373export HCCL_BUFFSIZE=256
373374export TASK_QUEUE_ENABLE=1
374375export HCCL_OP_EXPANSION_MODE=" AIV"
@@ -449,7 +450,7 @@ export HCCL_CONNECT_TIMEOUT=120
449450export OMP_PROC_BIND=false
450451export OMP_NUM_THREADS=10
451452export PYTORCH_NPU_ALLOC_CONF=expandable_segments:True
452- export VLLM_ASCEND_ENALBE_MLAPO =1
453+ export VLLM_ASCEND_ENABLE_MLAPO =1
453454export HCCL_BUFFSIZE=256
454455export TASK_QUEUE_ENABLE=1
455456export HCCL_OP_EXPANSION_MODE=" AIV"
@@ -530,7 +531,7 @@ export HCCL_CONNECT_TIMEOUT=120
530531export OMP_PROC_BIND=false
531532export OMP_NUM_THREADS=10
532533export PYTORCH_NPU_ALLOC_CONF=expandable_segments:True
533- export VLLM_ASCEND_ENALBE_MLAPO =1
534+ export VLLM_ASCEND_ENABLE_MLAPO =1
534535export HCCL_BUFFSIZE=600
535536export TASK_QUEUE_ENABLE=1
536537export HCCL_OP_EXPANSION_MODE=" AIV"
@@ -611,7 +612,7 @@ export HCCL_CONNECT_TIMEOUT=120
611612export OMP_PROC_BIND=false
612613export OMP_NUM_THREADS=10
613614export PYTORCH_NPU_ALLOC_CONF=expandable_segments:True
614- export VLLM_ASCEND_ENALBE_MLAPO =1
615+ export VLLM_ASCEND_ENABLE_MLAPO =1
615616export HCCL_BUFFSIZE=600
616617export TASK_QUEUE_ENABLE=1
617618export HCCL_OP_EXPANSION_MODE=" AIV"
0 commit comments