Skip to content

Commit 98c3882

Browse files
committed
change commit and fix send_delta_data
Signed-off-by: Icey <[email protected]>
1 parent aab23e7 commit 98c3882

File tree

4 files changed

+46
-23
lines changed

4 files changed

+46
-23
lines changed

.github/workflows/format_pr_body.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ jobs:
3636

3737
- name: Get vLLM version
3838
run: |
39-
VLLM_COMMIT=c9461e05a4ed3557cfbf4b15ded1e26761cc39ca
39+
VLLM_COMMIT=83f478bb19489b41e9d208b47b4bb5a95ac171ac
4040
echo "VLLM_COMMIT=https://github.com/vllm-project/vllm/commit/$VLLM_COMMIT" >> $GITHUB_ENV
4141
4242
- name: Checkout repository

.github/workflows/vllm_ascend_test.yaml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ jobs:
4242
lint:
4343
uses: ./.github/workflows/pre-commit.yml
4444
with:
45-
vllm: c9461e05a4ed3557cfbf4b15ded1e26761cc39ca
45+
vllm: 83f478bb19489b41e9d208b47b4bb5a95ac171ac
4646

4747
changes:
4848
runs-on: ubuntu-latest
@@ -83,7 +83,7 @@ jobs:
8383
VLLM_USE_MODELSCOPE: True
8484
strategy:
8585
matrix:
86-
vllm_version: [c9461e05a4ed3557cfbf4b15ded1e26761cc39ca, v0.11.0]
86+
vllm_version: [83f478bb19489b41e9d208b47b4bb5a95ac171ac, v0.11.0]
8787
steps:
8888
- name: Install packages
8989
run: |
@@ -140,7 +140,7 @@ jobs:
140140
name: e2e-light
141141
strategy:
142142
matrix:
143-
vllm_version: [c9461e05a4ed3557cfbf4b15ded1e26761cc39ca, v0.11.0]
143+
vllm_version: [83f478bb19489b41e9d208b47b4bb5a95ac171ac, v0.11.0]
144144
# Note (yikun): If CI resource are limited we can split job into two chain jobs
145145
needs: [lint, changes]
146146
# only trigger e2e test after lint passed and the change is e2e related with pull request.

.github/workflows/vllm_ascend_test_full.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,7 @@ jobs:
6969
name: e2e-full
7070
strategy:
7171
matrix:
72-
vllm_version: [c9461e05a4ed3557cfbf4b15ded1e26761cc39ca, v0.11.0]
72+
vllm_version: [83f478bb19489b41e9d208b47b4bb5a95ac171ac, v0.11.0]
7373
needs: [changes]
7474
if: ${{ needs.changes.outputs.e2e_tracker == 'true' }}
7575
uses: ./.github/workflows/_e2e_test.yaml

vllm_ascend/platform.py

Lines changed: 41 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -142,24 +142,47 @@ def check_and_update_config(cls, vllm_config: VllmConfig) -> None:
142142
"Non-MLA LLMs forcibly disable the chunked prefill feature,"
143143
"as the performance of operators supporting this feature "
144144
"functionality is currently suboptimal.")
145-
if not model_config.is_multimodal_model and \
146-
structured_outputs_config.backend == "auto" and \
147-
not getattr(scheduler_config, "scheduler_delay_factor", 0) > 0 and \
148-
scheduler_config.policy == "fcfs":
149-
ascend_scheduler_config.enabled = True
150-
chunked_prefill_enabled_in_ascend_scheduler = getattr(
151-
ascend_scheduler_config, "enable_chunked_prefill", False)
152-
if chunked_prefill_enabled_in_ascend_scheduler:
153-
logger.warning(
154-
"Chunked prefill feature is enabled in ascend_scheduler,"
155-
"but note that the operator supporting this feature "
156-
"would lead to performance degradation.")
157-
# In this situation, max_num_batched_tokens would have been rewritten.
158-
# So we must make sure max_num_batched_tokens is not smaller than max_model_len.
159-
if (scheduler_config.max_num_batched_tokens
160-
< scheduler_config.max_model_len
161-
and not chunked_prefill_enabled_in_ascend_scheduler):
162-
scheduler_config.max_num_batched_tokens = scheduler_config.max_model_len
145+
if vllm_version_is("0.11.0"):
146+
if not model_config.is_multimodal_model and \
147+
structured_outputs_config.backend == "auto" and \
148+
not scheduler_config.send_delta_data and \
149+
not getattr(scheduler_config, "scheduler_delay_factor", 0) > 0 and \
150+
scheduler_config.policy == "fcfs":
151+
ascend_scheduler_config.enabled = True
152+
chunked_prefill_enabled_in_ascend_scheduler = getattr(
153+
ascend_scheduler_config, "enable_chunked_prefill",
154+
False)
155+
if chunked_prefill_enabled_in_ascend_scheduler:
156+
logger.warning(
157+
"Chunked prefill feature is enabled in ascend_scheduler,"
158+
"but note that the operator supporting this feature "
159+
"would lead to performance degradation.")
160+
# In this situation, max_num_batched_tokens would have been rewritten.
161+
# So we must make sure max_num_batched_tokens is not smaller than max_model_len.
162+
if (scheduler_config.max_num_batched_tokens
163+
< scheduler_config.max_model_len and
164+
not chunked_prefill_enabled_in_ascend_scheduler):
165+
scheduler_config.max_num_batched_tokens = scheduler_config.max_model_len
166+
else:
167+
if not model_config.is_multimodal_model and \
168+
structured_outputs_config.backend == "auto" and \
169+
not getattr(scheduler_config, "scheduler_delay_factor", 0) > 0 and \
170+
scheduler_config.policy == "fcfs":
171+
ascend_scheduler_config.enabled = True
172+
chunked_prefill_enabled_in_ascend_scheduler = getattr(
173+
ascend_scheduler_config, "enable_chunked_prefill",
174+
False)
175+
if chunked_prefill_enabled_in_ascend_scheduler:
176+
logger.warning(
177+
"Chunked prefill feature is enabled in ascend_scheduler,"
178+
"but note that the operator supporting this feature "
179+
"would lead to performance degradation.")
180+
# In this situation, max_num_batched_tokens would have been rewritten.
181+
# So we must make sure max_num_batched_tokens is not smaller than max_model_len.
182+
if (scheduler_config.max_num_batched_tokens
183+
< scheduler_config.max_model_len and
184+
not chunked_prefill_enabled_in_ascend_scheduler):
185+
scheduler_config.max_num_batched_tokens = scheduler_config.max_model_len
163186

164187
kv_cache_dtype = vllm_config.additional_config.get(
165188
"kv_cache_dtype", None)

0 commit comments

Comments
 (0)