Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion open_source/deps/requirements_lock_torch_gpu_cuda12.txt
Original file line number Diff line number Diff line change
Expand Up @@ -568,7 +568,7 @@ filelock==3.20.0 \
# huggingface-hub
# torch
# transformers
flash-attn @ https://github.com/Dao-AILab/flash-attention/releases/download/v2.7.4.post1/flash_attn-2.7.4.post1+cu12torch2.6cxx11abiTRUE-cp310-cp310-linux_x86_64.whl \
flash-attn @ https://rtp-opensource.oss-cn-hangzhou.aliyuncs.com/rtp_llm/flash_attn-2.7.4.post1%2Bcu12torch2.6cxx11abiTRUE-cp310-cp310-linux_x86_64.whl \
--hash=sha256:bfdb0f290cc3d21d0810ba49a360ef91090f62cdc1345ec6900447e0d12d99af
# via -r open_source/deps/requirements_torch_gpu_cuda12.txt
flashinfer-python==0.2.5 \
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -638,7 +638,7 @@ filelock==3.13.1 \
# huggingface-hub
# torch
# transformers
flash-attn @ https://github.com/Dao-AILab/flash-attention/releases/download/v2.7.4.post1/flash_attn-2.7.4.post1+cu12torch2.6cxx11abiTRUE-cp310-cp310-linux_x86_64.whl \
flash-attn @ https://rtp-opensource.oss-cn-hangzhou.aliyuncs.com/rtp_llm/flash_attn-2.7.4.post1%2Bcu12torch2.6cxx11abiTRUE-cp310-cp310-linux_x86_64.whl \
--hash=sha256:bfdb0f290cc3d21d0810ba49a360ef91090f62cdc1345ec6900447e0d12d99af
# via -r open_source/deps/requirements_torch_gpu_cuda12_9.txt
flashinfer-python==0.2.5 \
Expand Down
2 changes: 1 addition & 1 deletion open_source/deps/requirements_torch_gpu_cuda12.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ autoawq>=0.2.9
datasets
https://mirrors.aliyun.com/pytorch-wheels/cu126/torch-2.6.0%2Bcu126-cp310-cp310-manylinux_2_28_x86_64.whl#sha256=c55280b4da58e565d8a25e0e844dc27d0c96aaada7b90b4de70a45397faf604e
https://mirrors.aliyun.com/pytorch-wheels/cu126/torchvision-0.21.0%2Bcu126-cp310-cp310-linux_x86_64.whl#sha256=db4369a89b866b319c8dd73931c3e5f314aa535f7035ae2336ce9a26d7ace15a
https://github.com/Dao-AILab/flash-attention/releases/download/v2.7.4.post1/flash_attn-2.7.4.post1+cu12torch2.6cxx11abiTRUE-cp310-cp310-linux_x86_64.whl
https://rtp-opensource.oss-cn-hangzhou.aliyuncs.com/rtp_llm/flash_attn-2.7.4.post1%2Bcu12torch2.6cxx11abiTRUE-cp310-cp310-linux_x86_64.whl
tensorrt==10.3.0
tensorrt-cu12-bindings==10.3.0
tensorrt-cu12-libs==10.3.0
Expand Down
2 changes: 1 addition & 1 deletion open_source/deps/requirements_torch_gpu_cuda12_9.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ autoawq
datasets
https://download.pytorch.org/whl/cu129/torch-2.8.0%2Bcu129-cp310-cp310-manylinux_2_28_x86_64.whl
https://download.pytorch.org/whl/cu129/torchvision-0.23.0%2Bcu129-cp310-cp310-manylinux_2_28_x86_64.whl
https://github.com/Dao-AILab/flash-attention/releases/download/v2.7.4.post1/flash_attn-2.7.4.post1+cu12torch2.6cxx11abiTRUE-cp310-cp310-linux_x86_64.whl
https://rtp-opensource.oss-cn-hangzhou.aliyuncs.com/rtp_llm/flash_attn-2.7.4.post1%2Bcu12torch2.6cxx11abiTRUE-cp310-cp310-linux_x86_64.whl
tensorrt==10.3.0
tensorrt-cu12-bindings==10.3.0
tensorrt-cu12-libs==10.3.0
Expand Down
1 change: 0 additions & 1 deletion rtp_llm/cpp/models/PyWrappedModel.cc
Original file line number Diff line number Diff line change
Expand Up @@ -212,7 +212,6 @@ GptModelOutputs PyWrappedModel::forwardMicroBatched(const GptModelInputs& inputs
GptModelOutputs PyWrappedModel::forward(const GptModelInputs& inputs) {

py::gil_scoped_acquire gil;
printBufferDataDebug(*inputs.combo_position_ids, "forward inputs.combo_position_ids");
try {
RTP_LLM_LOG_DEBUG("Calling forward method on Python object instance.");

Expand Down
7 changes: 4 additions & 3 deletions rtp_llm/models_py/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@ py_library(
]),
deps = [
"//rtp_llm/models_py/distributed:deepep_wrapper",
"//rtp_llm/models_py/distributed:process_group_state",
],
visibility = ["//visibility:public"],
)
Expand Down Expand Up @@ -44,14 +43,16 @@ py_library(
name = "modules_cuda",
srcs = glob([
"modules/cuda/*.py",
]),
"modules/cuda/**/*.py",
])
)

py_library(
name = "modules_rocm",
srcs = glob([
"modules/rocm/*.py",
]),
"modules/rocm/**/*.py",
])
)

py_library(
Expand Down
11 changes: 1 addition & 10 deletions rtp_llm/models_py/distributed/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -2,18 +2,9 @@ load("//bazel:arch_select.bzl", "deep_ep_py_deps")

deep_ep_py_deps()

py_library(
name = "process_group_state",
srcs = ["process_group_state.py"],
deps = [
"//rtp_llm:torch",
],
visibility = ["//visibility:public"],
)

py_library(
name = "deepep_wrapper",
srcs = ["deepep_wrapper.py"],
srcs = ["deepep_wrapper.py", "deepep_initializer.py"],
deps = [
"//rtp_llm:torch",
] + select({
Expand Down
91 changes: 91 additions & 0 deletions rtp_llm/models_py/distributed/deepep_initializer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
"""DeepEP initialization manager

Manages singleton initialization of DeepEP environment, ensuring thread safety.
"""

import logging
import threading
from typing import Optional

import torch
import torch.distributed

from rtp_llm.config.gpt_init_model_parameters import GptInitModelParameters

try:
import rtp_llm.models_py.distributed.deepep_wrapper as deepep_wrapper_module
except Exception as e:
logging.error(f"DeepEP is not supported on this device: {e}")
deepep_wrapper_module = None


class DeepEpInitializer:
"""Singleton class for managing DeepEP initialization state"""

_initialized: bool = False
_lock: threading.Lock = threading.Lock()

@classmethod
def supported(cls) -> bool:
return deepep_wrapper_module is not None

@classmethod
def ensure_initialized(
cls, config: GptInitModelParameters, timeout: Optional[int] = None
) -> None:
"""Ensure DeepEP environment is initialized (thread-safe)

Args:
config: Model initialization parameters
"""
if cls._initialized:
return

if not cls.supported():
raise RuntimeError("DeepEP is not supported on this device")

with cls._lock:
if cls._initialized:
return
cls._do_initialization(config, timeout)
cls._initialized = True

@classmethod
def get_deepep_wrapper(cls, config: GptInitModelParameters):
cls.ensure_initialized(config)
assert deepep_wrapper_module is not None
return deepep_wrapper_module.get_deepep_wrapper()

@classmethod
def _do_initialization(
cls, config: GptInitModelParameters, timeout: Optional[int]
) -> None:
"""Perform actual initialization logic

Args:
config: Model initialization parameters
"""
assert (
torch.distributed.is_initialized()
), "Distributed environment is not initialized"
assert deepep_wrapper_module is not None, "deepep_wrapper is not imported"
default_group = torch.distributed.group.WORLD
assert default_group is not None, "Default process group is not initialized"
deepep_wrapper_module.init_deepep_wrapper(group=default_group, params=config)

@classmethod
def is_initialized(cls) -> bool:
"""Check if initialized

Returns:
Whether initialized
"""
return cls._initialized

@classmethod
def reset(cls) -> None:
"""Reset initialization state (for testing only)"""
with cls._lock:
if cls._initialized and deepep_wrapper_module is not None:
deepep_wrapper_module.destroy_deepep_wrapper()
cls._initialized = False
205 changes: 0 additions & 205 deletions rtp_llm/models_py/distributed/process_group_state.py

This file was deleted.

Loading