Skip to content

Commit 6e0e0d4

Browse files
stash
1 parent dcfd3b8 commit 6e0e0d4

File tree

2 files changed

+15
-11
lines changed

2 files changed

+15
-11
lines changed

vllm/v1/executor/multiproc_executor.py

Lines changed: 9 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
import signal
44
import sys
55
import time
6+
import weakref
67
from dataclasses import dataclass
78
from enum import Enum, auto
89
from multiprocessing.process import BaseProcess
@@ -19,9 +20,8 @@
1920
from vllm.executor.multiproc_worker_utils import (
2021
_add_prefix, set_multiprocessing_worker_envs)
2122
from vllm.logger import init_logger
22-
from vllm.utils import (get_distributed_init_method, get_exception_traceback,
23-
get_mp_context, get_open_port, get_open_zmq_ipc_path,
24-
kill_process_tree, zmq_socket_ctx)
23+
from vllm.utils import (get_distributed_init_method, get_mp_context,
24+
get_open_port, get_open_zmq_ipc_path, zmq_socket_ctx)
2525
from vllm.v1.executor.abstract import Executor
2626
from vllm.v1.outputs import ModelRunnerOutput
2727
from vllm.worker.worker_base import WorkerWrapperBase
@@ -35,6 +35,9 @@
3535
class MultiprocExecutor(Executor):
3636

3737
def __init__(self, vllm_config: VllmConfig) -> None:
38+
# Call self.shutdown at exit to clean up
39+
# and ensure workers will be terminated.
40+
self._finalizer = weakref.finalize(self, self.shutdown)
3841

3942
# The child processes will send SIGQUIT when unrecoverable
4043
# errors happen.
@@ -344,15 +347,12 @@ def signal_handler(signum, frame):
344347
worker.worker_busy_loop()
345348

346349
except SystemExit:
347-
# worker_busy_loop sends exceptions to Executor and raises
348-
# SystemExit.
349-
shutdown_requested = True
350350
logger.debug("Worker interrupted.")
351351

352352
except Exception:
353353
# worker_busy_loop sends exceptions exceptons to Executor
354354
# for shutdown, but if there is an error in startup or an
355-
# error with IPC
355+
# error with IPC itself, we need to alert the parent.
356356
# itself, we need to alert the parent so we can shut down.
357357
psutil.Process().parent().send_signal(signal.SIGQUIT)
358358
raise
@@ -390,18 +390,16 @@ class ResponseStatus(Enum):
390390

391391
def worker_busy_loop(self):
392392
"""Main busy loop for Multiprocessing Workers"""
393+
393394
while True:
394395
method, args, kwargs = self.rpc_broadcast_mq.dequeue()
395396

396397
try:
397-
if self.rank == 0:
398-
raise ValueError("SIMULATE CUDA ERROR")
399398
output = getattr(self.worker, method)(*args, **kwargs)
400399
except Exception as e:
401400
self.worker_response_mq.enqueue(
402401
(WorkerProc.ResponseStatus.FAILURE, e))
403-
traceback = get_exception_traceback()
404-
logger.error("WorkerProc hit an exception: %s", traceback)
402+
logger.exception("WorkerProc hit an exception: %s", exc_info=e)
405403
continue
406404

407405
self.worker_response_mq.enqueue(

vllm/v1/worker/gpu_worker.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,8 @@ def __init__(
3535
distributed_init_method: str,
3636
):
3737

38+
self.i = 0
39+
3840
# TODO: use WorkerBase.__init__(self, vllm_config=vllm_config)
3941
self.vllm_config = vllm_config
4042
self.model_config = vllm_config.model_config
@@ -201,6 +203,10 @@ def execute_model(
201203
self,
202204
scheduler_output: "SchedulerOutput",
203205
) -> ModelRunnerOutput:
206+
if self.rank == 0 and self.i == 10:
207+
raise ValueError("ERROR FROM HERE :)")
208+
self.i += 1
209+
204210
output = self.model_runner.execute_model(scheduler_output)
205211
return output if self.rank == 0 else None
206212

0 commit comments

Comments
 (0)