We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 2223ea3 commit 0a43813Copy full SHA for 0a43813
vllm_ascend/worker/worker_v1.py
@@ -470,14 +470,4 @@ def take_draft_token_ids(self) -> Optional[DraftTokenIds]:
470
return self.model_runner.take_draft_token_ids()
471
472
def reload_weights(self) -> None:
473
- if self.vllm_config.model_config.enable_sleep_mode:
474
- allocator = CaMemAllocator.get_instance()
475
- assert allocator.get_current_usage() == 0, (
476
- "Sleep mode can only be "
477
- "used for one instance per process.")
478
- context = allocator.use_memory_pool(tag="weights")
479
- else:
480
- from contextlib import nullcontext
481
- context = nullcontext() # type: ignore
482
- with context:
483
- self.model_runner.reload_weights()
+ self.model_runner.reload_weights()
0 commit comments