Skip to content

Commit 74cdeb3

Browse files
committed
update
Signed-off-by: 1092626063 <[email protected]>
1 parent be588ec commit 74cdeb3

File tree

1 file changed

+15
-14
lines changed

1 file changed

+15
-14
lines changed

docs/source/tutorials/DeepSeek-V3.1.md

Lines changed: 15 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -84,7 +84,7 @@ export OMP_PROC_BIND=false
8484
export OMP_NUM_THREADS=100
8585
export VLLM_USE_V1=1
8686
export HCCL_BUFFSIZE=200
87-
export VLLM_ASCEND_ENALBE_MLAPO=1
87+
export VLLM_ASCEND_ENABLE_MLAPO=1
8888
export PYTORCH_NPU_ALLOC_CONF=expandable_segments:True
8989
export VLLM_ASCEND_ENABLE_FLASHCOMM1=0
9090
export DISABLE_L2_CACHE=1
@@ -98,9 +98,9 @@ vllm serve vllm-ascend/DeepSeek-V3.1_w8a8mix_mtp \
9898
--seed 1024 \
9999
--served-model-name deepseek_v3 \
100100
--enable-expert-parallel \
101-
--max-num-seqs 8 \
102-
--max-model-len 40000 \
103-
--max-num-batched-tokens 2048 \
101+
--max-num-seqs 16 \
102+
--max-model-len 8192 \
103+
--max-num-batched-tokens 4096 \
104104
--trust-remote-code \
105105
--no-enable-prefix-caching \
106106
--gpu-memory-utilization 0.92 \
@@ -144,9 +144,9 @@ export OMP_NUM_THREADS=100
144144
export VLLM_USE_V1=1
145145
export HCCL_BUFFSIZE=200
146146
export PYTORCH_NPU_ALLOC_CONF=expandable_segments:True
147-
export VLLM_ASCEND_ENALBE_MLAPO=1
148-
export HCCL_INTRA_PCIE_ENALBE=1
149-
export HCCL_INTRA_ROCE_ENALBE=0
147+
export VLLM_ASCEND_ENABLE_MLAPO=1
148+
export HCCL_INTRA_PCIE_ENABLE=1
149+
export HCCL_INTRA_ROCE_ENABLE=0
150150

151151
vllm serve vllm-ascend/DeepSeek-V3.1_w8a8mix_mtp \
152152
--host 0.0.0.0 \
@@ -199,13 +199,14 @@ export OMP_PROC_BIND=false
199199
export OMP_NUM_THREADS=100
200200
export HCCL_BUFFSIZE=200
201201
export PYTORCH_NPU_ALLOC_CONF=expandable_segments:True
202-
export VLLM_ASCEND_ENALBE_MLAPO=1
203-
export HCCL_INTRA_PCIE_ENALBE=1
204-
export HCCL_INTRA_ROCE_ENALBE=0
202+
export VLLM_ASCEND_ENABLE_MLAPO=1
203+
export HCCL_INTRA_PCIE_ENABLE=1
204+
export HCCL_INTRA_ROCE_ENABLE=0
205205

206206
vllm serve vllm-ascend/DeepSeek-V3.1_w8a8mix_mtp \
207207
--host 0.0.0.0 \
208208
--port 8004 \
209+
--headless \
209210
--data-parallel-size 4 \
210211
--data-parallel-size-local 2 \
211212
--data-parallel-start-rank 2 \
@@ -368,7 +369,7 @@ export HCCL_CONNECT_TIMEOUT=120
368369
export OMP_PROC_BIND=false
369370
export OMP_NUM_THREADS=10
370371
export PYTORCH_NPU_ALLOC_CONF=expandable_segments:True
371-
export VLLM_ASCEND_ENALBE_MLAPO=1
372+
export VLLM_ASCEND_ENABLE_MLAPO=1
372373
export HCCL_BUFFSIZE=256
373374
export TASK_QUEUE_ENABLE=1
374375
export HCCL_OP_EXPANSION_MODE="AIV"
@@ -449,7 +450,7 @@ export HCCL_CONNECT_TIMEOUT=120
449450
export OMP_PROC_BIND=false
450451
export OMP_NUM_THREADS=10
451452
export PYTORCH_NPU_ALLOC_CONF=expandable_segments:True
452-
export VLLM_ASCEND_ENALBE_MLAPO=1
453+
export VLLM_ASCEND_ENABLE_MLAPO=1
453454
export HCCL_BUFFSIZE=256
454455
export TASK_QUEUE_ENABLE=1
455456
export HCCL_OP_EXPANSION_MODE="AIV"
@@ -530,7 +531,7 @@ export HCCL_CONNECT_TIMEOUT=120
530531
export OMP_PROC_BIND=false
531532
export OMP_NUM_THREADS=10
532533
export PYTORCH_NPU_ALLOC_CONF=expandable_segments:True
533-
export VLLM_ASCEND_ENALBE_MLAPO=1
534+
export VLLM_ASCEND_ENABLE_MLAPO=1
534535
export HCCL_BUFFSIZE=600
535536
export TASK_QUEUE_ENABLE=1
536537
export HCCL_OP_EXPANSION_MODE="AIV"
@@ -611,7 +612,7 @@ export HCCL_CONNECT_TIMEOUT=120
611612
export OMP_PROC_BIND=false
612613
export OMP_NUM_THREADS=10
613614
export PYTORCH_NPU_ALLOC_CONF=expandable_segments:True
614-
export VLLM_ASCEND_ENALBE_MLAPO=1
615+
export VLLM_ASCEND_ENABLE_MLAPO=1
615616
export HCCL_BUFFSIZE=600
616617
export TASK_QUEUE_ENABLE=1
617618
export HCCL_OP_EXPANSION_MODE="AIV"

0 commit comments

Comments
 (0)