Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 22 additions & 26 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -19,23 +19,23 @@ set(MSCCLPP_VERSION "${MSCCLPP_MAJOR}.${MSCCLPP_MINOR}.${MSCCLPP_PATCH}")
find_package(Git)
set(GIT_HASH "UNKNOWN")
if(Git_FOUND)
execute_process(
COMMAND "${GIT_EXECUTABLE}" rev-parse --short=12 HEAD
WORKING_DIRECTORY "${CMAKE_SOURCE_DIR}"
OUTPUT_VARIABLE _git_out
OUTPUT_STRIP_TRAILING_WHITESPACE
)
if(NOT _git_out STREQUAL "")
set(GIT_HASH "${_git_out}")
endif()
execute_process(
COMMAND "${GIT_EXECUTABLE}" rev-parse --short=12 HEAD
WORKING_DIRECTORY "${CMAKE_SOURCE_DIR}"
OUTPUT_VARIABLE _git_out
OUTPUT_STRIP_TRAILING_WHITESPACE
)
if(NOT _git_out STREQUAL "")
set(GIT_HASH "${_git_out}")
endif()
else()
message(WARNING "Git not found, setting GIT_HASH to 'UNKNOWN'")
endif()

configure_file(
"${CMAKE_CURRENT_SOURCE_DIR}/include/mscclpp/version.hpp.in"
"${CMAKE_CURRENT_BINARY_DIR}/include/mscclpp/version.hpp"
@ONLY
"${CMAKE_CURRENT_SOURCE_DIR}/include/mscclpp/version.hpp.in"
"${CMAKE_CURRENT_BINARY_DIR}/include/mscclpp/version.hpp"
@ONLY
)

list(APPEND CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/cmake)
Expand All @@ -47,6 +47,7 @@ option(MSCCLPP_BUILD_PYTHON_BINDINGS "Build Python bindings" ON)
option(MSCCLPP_BUILD_APPS_NCCL "Build NCCL interfaces" ON)
option(MSCCLPP_USE_CUDA "Use NVIDIA/CUDA." OFF)
option(MSCCLPP_USE_ROCM "Use AMD/ROCm." OFF)
option(MSCCLPP_USE_IB "Use InfiniBand." ON)
option(MSCCLPP_BYPASS_GPU_CHECK "Bypass GPU check." OFF)
option(MSCCLPP_NPKIT_FLAGS "Set NPKIT flags" OFF)
set(MSCCLPP_GPU_ARCHS "" CACHE STRING "Specify GPU architectures with delimiters (comma, space, or semicolon).")
Expand Down Expand Up @@ -141,23 +142,18 @@ else()
endif()

if(CMAKE_BUILD_TYPE STREQUAL "Debug")
add_compile_definitions(DEBUG_BUILD)
add_compile_definitions(DEBUG_BUILD)
endif()

find_package(IBVerbs)
if(MSCCLPP_USE_IB)
find_package(IBVerbs)
if(NOT IBVERBS_FOUND)
message(FATAL_ERROR "IBVerbs not found. Install libibverbs-dev or rdma-core-devel. If you want to disable InfiniBand, add `-DMSCCLPP_USE_IB=OFF` in your cmake command.")
endif()
endif()
find_package(NUMA REQUIRED)
find_package(Threads REQUIRED)

set(CMAKE_COLOR_DIAGNOSTICS ON)
function(msg_red text)
string(ASCII 27 ESC)
message("${ESC}[31m${text}${ESC}[0m")
endfunction()

if(NOT IBVERBS_FOUND)
msg_red("libibverbs not found. Install libibverbs-dev or rdma-core-devel.")
endif()

include(FetchContent)
FetchContent_Declare(json URL https://github.com/nlohmann/json/releases/download/v3.11.3/json.tar.xz)
FetchContent_MakeAvailable(json)
Expand All @@ -168,7 +164,7 @@ target_include_directories(mscclpp_obj
${GPU_INCLUDE_DIRS}
${NUMA_INCLUDE_DIRS})
target_link_libraries(mscclpp_obj PRIVATE ${GPU_LIBRARIES} ${NUMA_LIBRARIES} nlohmann_json::nlohmann_json Threads::Threads dl)
if(IBVERBS_FOUND)
if(MSCCLPP_USE_IB)
target_include_directories(mscclpp_obj SYSTEM PRIVATE ${IBVERBS_INCLUDE_DIRS})
target_link_libraries(mscclpp_obj PRIVATE ${IBVERBS_LIBRARIES})
target_compile_definitions(mscclpp_obj PUBLIC USE_IBVERBS)
Expand All @@ -179,7 +175,7 @@ if(MSCCLPP_USE_CUDA)
elseif(MSCCLPP_USE_ROCM)
target_compile_definitions(mscclpp_obj PRIVATE MSCCLPP_USE_ROCM)
foreach(arch ${MSCCLPP_GPU_ARCHS})
target_compile_options(mscclpp_obj PRIVATE --offload-arch=${arch})
target_compile_options(mscclpp_obj PRIVATE --offload-arch=${arch})
endforeach()
endif()
if(MSCCLPP_ENABLE_TRACE)
Expand Down
8 changes: 3 additions & 5 deletions docs/quickstart.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
* AMD MI250X GPUs + ROCm >= 5.7
* AMD MI300X GPUs + ROCm >= 6.0
* OS
* Tested on Ubuntu 18.04 and later
* Tested on Ubuntu 20.04 and later
* Libraries
* [libnuma](https://github.com/numactl/numactl)
```bash
Expand All @@ -32,10 +32,7 @@
If you don't want to build Python module, you need to set `-DMSCCLPP_BUILD_PYTHON_BINDINGS=OFF` in your `cmake` command (see details in [Install from Source](#install-from-source)).
* (Optional, for benchmarks) MPI
* Others
* For NVIDIA platforms, `nvidia_peermem` driver should be loaded on all nodes. Check it via:
```bash
lsmod | grep nvidia_peermem
```
* For RDMA (InfiniBand or RoCE) support on NVIDIA platforms, [GPUDirect RDMA](https://docs.nvidia.com/datacenter/cloud-native/gpu-operator/latest/gpu-operator-rdma.html#gpudirect-rdma-and-gpudirect-storage) should be supported by the system. See the detailed prerequisites from [this NVIDIA documentation](https://docs.nvidia.com/datacenter/cloud-native/gpu-operator/latest/gpu-operator-rdma.html#common-prerequisites).
* For NVLink SHARP (NVLS) support on NVIDIA platforms, the Linux kernel version should be 5.6 or above.

(docker-images)=
Expand Down Expand Up @@ -91,6 +88,7 @@ There are a few optional CMake options you can set:
- `-DMSCCLPP_GPU_ARCHS=<arch-list>`: Specify the GPU architectures to build for. For example, `-DMSCCLPP_GPU_ARCHS="80,90"` for NVIDIA A100 and H100 GPUs, `-DMSCCLPP_GPU_ARCHS=gfx942` for AMD MI300x GPU.
- `-DMSCCLPP_BYPASS_GPU_CHECK=ON -DMSCCLPP_USE_CUDA=ON`: If the build environment doesn't have GPUs and only has CUDA installed, you can set these options to bypass GPU checks and use CUDA APIs. This is useful for building on CI systems or environments without GPUs.
- `-DMSCCLPP_BYPASS_GPU_CHECK=ON -DMSCCLPP_USE_ROCM=ON`: If the build environment doesn't have GPUs and only has ROCm installed, you can set these options to bypass GPU checks and use ROCm APIs.
- `-DMSCCLPP_USE_IB=OFF`: Don't build InfiniBand support.
- `-DMSCCLPP_BUILD_PYTHON_BINDINGS=OFF`: Don't build the Python module.
- `-DMSCCLPP_BUILD_TESTS=OFF`: Don't build the tests.
- `-DMSCCLPP_BUILD_APPS_NCCL=OFF`: Don't build the NCCL API.
Expand Down
13 changes: 7 additions & 6 deletions src/ib.cc
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
#if !defined(__HIP_PLATFORM_AMD__)

// Check if nvidia_peermem kernel module is loaded
static bool checkNvPeerMemLoaded() {
[[maybe_unused]] static bool checkNvPeerMemLoaded() {
std::ifstream file("/proc/modules");
std::string line;
while (std::getline(file, line)) {
Expand Down Expand Up @@ -77,6 +77,12 @@ IbMr::IbMr(ibv_pd* pd, void* buff, std::size_t size) : buff(buff) {
throw Error("Registeration of dma-buf based memory region failed on HIP platform", ErrorCode::InvalidUsage);
#endif // !defined(__HIP_PLATFORM_AMD__)
} else {
#if !defined(__HIP_PLATFORM_AMD__)
// nvidia-peermem is needed only when DMABUF is not supported
if (!checkNvPeerMemLoaded()) {
throw Error("nvidia_peermem kernel module is not loaded", ErrorCode::InternalError);
}
#endif // !defined(__HIP_PLATFORM_AMD__)
this->mr = IBVerbs::ibv_reg_mr2(pd, reinterpret_cast<void*>(addr), pages * pageSize,
IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_WRITE | IBV_ACCESS_REMOTE_READ |
IBV_ACCESS_RELAXED_ORDERING | IBV_ACCESS_REMOTE_ATOMIC);
Expand Down Expand Up @@ -329,11 +335,6 @@ int IbQp::getWcStatus(int idx) const { return (*this->wcs)[idx].status; }
int IbQp::getNumCqItems() const { return this->numSignaledPostedItems; }

IbCtx::IbCtx(const std::string& devName) : devName(devName) {
#if !defined(__HIP_PLATFORM_AMD__)
if (!checkNvPeerMemLoaded()) {
throw Error("nvidia_peermem kernel module is not loaded", ErrorCode::InternalError);
}
#endif // !defined(__HIP_PLATFORM_AMD__)
int num;
struct ibv_device** devices = IBVerbs::ibv_get_device_list(&num);
for (int i = 0; i < num; ++i) {
Expand Down
4 changes: 2 additions & 2 deletions test/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
find_package(MPI)

set(TEST_LIBS_COMMON mscclpp ${GPU_LIBRARIES} ${NUMA_LIBRARIES} Threads::Threads)
if(IBVERBS_FOUND)
if(MSCCLPP_USE_IB)
list(APPEND TEST_LIBS_COMMON ${IBVERBS_LIBRARIES})
endif()
set(TEST_LIBS_GTEST GTest::gtest_main GTest::gmock_main)
Expand All @@ -19,7 +19,7 @@ endif()
function(add_test_executable name sources)
add_executable(${name} ${sources})
target_link_libraries(${name} ${TEST_LIBS_COMMON} MPI::MPI_CXX)
if(IBVERBS_FOUND)
if(MSCCLPP_USE_IB)
target_compile_definitions(${name} PRIVATE USE_IBVERBS)
endif()
target_include_directories(${name} ${TEST_INC_COMMON} ${TEST_INC_INTERNAL})
Expand Down
14 changes: 12 additions & 2 deletions test/mp_unit/communicator_tests.cu
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,11 @@ void CommunicatorTest::SetUp() {

ASSERT_EQ((deviceBufferSize / sizeof(int)) % gEnv->worldSize, 0);

#if defined(USE_IBVERBS)
connectMesh(true, true, false);
#else
connectMesh(true, false, false);
#endif

devicePtr.resize(numBuffers);
localMemory.resize(numBuffers);
Expand All @@ -117,10 +121,16 @@ void CommunicatorTest::SetUp() {
}
}

#if defined(USE_IBVERBS)
auto transport = mscclpp::Transport::CudaIpc | ibTransport;
#else
auto transport = mscclpp::Transport::CudaIpc;
#endif

for (size_t n = 0; n < numBuffers; n++) {
devicePtr[n] = mscclpp::detail::gpuCallocShared<int>(deviceBufferSize / sizeof(int));
registerMemoryPairs(devicePtr[n].get(), deviceBufferSize, mscclpp::Transport::CudaIpc | ibTransport, 0, remoteRanks,
localMemory[n], remoteMemory[n]);
registerMemoryPairs(devicePtr[n].get(), deviceBufferSize, transport, 0, remoteRanks, localMemory[n],
remoteMemory[n]);
}
}

Expand Down
4 changes: 4 additions & 0 deletions test/mp_unit/ib_tests.cu
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,10 @@ void IbTestBase::SetUp() {
}

void IbPeerToPeerTest::SetUp() {
#if !defined(USE_IBVERBS)
GTEST_SKIP() << "This test requires IBVerbs that the current build does not support.";
#endif // !defined(USE_IBVERBS)

IbTestBase::SetUp();

mscclpp::UniqueId id;
Expand Down
9 changes: 3 additions & 6 deletions test/mp_unit/memory_channel_tests.cu
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ void MemoryChannelOneToOneTest::setupMeshConnections(std::vector<mscclpp::Memory
const int rank = communicator->bootstrap()->getRank();
const int worldSize = communicator->bootstrap()->getNranks();
const bool isInPlace = (outputBuff == nullptr);
mscclpp::TransportFlags transport = mscclpp::Transport::CudaIpc | ibTransport;
mscclpp::TransportFlags transport = mscclpp::Transport::CudaIpc;

std::vector<std::shared_future<std::shared_ptr<mscclpp::Connection>>> connectionFutures(worldSize);
std::vector<std::shared_future<mscclpp::RegisteredMemory>> remoteMemFutures(worldSize);
Expand All @@ -38,11 +38,8 @@ void MemoryChannelOneToOneTest::setupMeshConnections(std::vector<mscclpp::Memory
if (r == rank) {
continue;
}
if (rankToNode(r) == rankToNode(gEnv->rank)) {
connectionFutures[r] = communicator->connect(mscclpp::Transport::CudaIpc, r);
} else {
connectionFutures[r] = communicator->connect(ibTransport, r);
}
// No IB for MemoryChannel tests
connectionFutures[r] = communicator->connect(mscclpp::Transport::CudaIpc, r);

if (isInPlace) {
communicator->sendMemory(inputBufRegMem, r);
Expand Down
46 changes: 35 additions & 11 deletions test/mp_unit/port_channel_tests.cu
Original file line number Diff line number Diff line change
Expand Up @@ -242,31 +242,43 @@ void PortChannelOneToOneTest::testPingPongPerf(PingPongTestParams params) {
}

TEST_F(PortChannelOneToOneTest, PingPong) {
testPingPong(PingPongTestParams{.useIPC = true, .useIB = true, .useEthernet = false, .waitWithPoll = false});
testPingPong(PingPongTestParams{.useIPC = true, .useIB = false, .useEthernet = false, .waitWithPoll = false});
}

TEST_F(PortChannelOneToOneTest, PingPongIb) {
#if defined(USE_IBVERBS)
testPingPong(PingPongTestParams{.useIPC = false, .useIB = true, .useEthernet = false, .waitWithPoll = false});
#else // !defined(USE_IBVERBS)
GTEST_SKIP() << "This test requires IBVerbs that the current build does not support.";
#endif // !defined(USE_IBVERBS)
}

TEST_F(PortChannelOneToOneTest, PingPongEthernet) {
testPingPong(PingPongTestParams{.useIPC = false, .useIB = false, .useEthernet = true, .waitWithPoll = false});
}

TEST_F(PortChannelOneToOneTest, PingPongWithPoll) {
testPingPong(PingPongTestParams{.useIPC = true, .useIB = true, .useEthernet = false, .waitWithPoll = true});
testPingPong(PingPongTestParams{.useIPC = true, .useIB = false, .useEthernet = false, .waitWithPoll = true});
}

TEST_F(PortChannelOneToOneTest, PingPongIbWithPoll) {
#if defined(USE_IBVERBS)
testPingPong(PingPongTestParams{.useIPC = false, .useIB = true, .useEthernet = false, .waitWithPoll = true});
#else // !defined(USE_IBVERBS)
GTEST_SKIP() << "This test requires IBVerbs that the current build does not support.";
#endif // !defined(USE_IBVERBS)
}

TEST_F(PortChannelOneToOneTest, PingPongPerf) {
testPingPongPerf(PingPongTestParams{.useIPC = true, .useIB = true, .useEthernet = false, .waitWithPoll = false});
testPingPongPerf(PingPongTestParams{.useIPC = true, .useIB = false, .useEthernet = false, .waitWithPoll = false});
}

TEST_F(PortChannelOneToOneTest, PingPongPerfIb) {
#if defined(USE_IBVERBS)
testPingPongPerf(PingPongTestParams{.useIPC = false, .useIB = true, .useEthernet = false, .waitWithPoll = false});
#else // !defined(USE_IBVERBS)
GTEST_SKIP() << "This test requires IBVerbs that the current build does not support.";
#endif // !defined(USE_IBVERBS)
}

TEST_F(PortChannelOneToOneTest, PingPongPerfEthernet) {
Expand Down Expand Up @@ -339,7 +351,7 @@ __global__ void kernelProxyLLPingPong(int* buff, mscclpp::LLPacket* putPktBuf, m
}
}

void PortChannelOneToOneTest::testPacketPingPong(bool useIbOnly) {
void PortChannelOneToOneTest::testPacketPingPong(bool useIb) {
if (gEnv->rank >= numRanksToUse) return;

const int nElem = 4 * 1024 * 1024;
Expand All @@ -351,8 +363,8 @@ void PortChannelOneToOneTest::testPacketPingPong(bool useIbOnly) {
auto putPacketBuffer = mscclpp::GpuBuffer<mscclpp::LLPacket>(nPacket).memory();
auto getPacketBuffer = mscclpp::GpuBuffer<mscclpp::LLPacket>(nPacket).memory();

setupMeshConnections(portChannels, !useIbOnly, true, false, putPacketBuffer.get(),
nPacket * sizeof(mscclpp::LLPacket), getPacketBuffer.get(), nPacket * sizeof(mscclpp::LLPacket));
setupMeshConnections(portChannels, !useIb, useIb, false, putPacketBuffer.get(), nPacket * sizeof(mscclpp::LLPacket),
getPacketBuffer.get(), nPacket * sizeof(mscclpp::LLPacket));

ASSERT_EQ(portChannels.size(), 1);

Expand Down Expand Up @@ -406,7 +418,7 @@ void PortChannelOneToOneTest::testPacketPingPong(bool useIbOnly) {
proxyService->stopProxy();
}

void PortChannelOneToOneTest::testPacketPingPongPerf(bool useIbOnly) {
void PortChannelOneToOneTest::testPacketPingPongPerf(bool useIb) {
if (gEnv->rank >= numRanksToUse) return;

const int nElem = 4 * 1024 * 1024;
Expand All @@ -418,8 +430,8 @@ void PortChannelOneToOneTest::testPacketPingPongPerf(bool useIbOnly) {
auto putPacketBuffer = mscclpp::GpuBuffer<mscclpp::LLPacket>(nPacket).memory();
auto getPacketBuffer = mscclpp::GpuBuffer<mscclpp::LLPacket>(nPacket).memory();

setupMeshConnections(portChannels, !useIbOnly, true, false, putPacketBuffer.get(),
nPacket * sizeof(mscclpp::LLPacket), getPacketBuffer.get(), nPacket * sizeof(mscclpp::LLPacket));
setupMeshConnections(portChannels, !useIb, useIb, false, putPacketBuffer.get(), nPacket * sizeof(mscclpp::LLPacket),
getPacketBuffer.get(), nPacket * sizeof(mscclpp::LLPacket));

ASSERT_EQ(portChannels.size(), 1);

Expand Down Expand Up @@ -464,8 +476,20 @@ void PortChannelOneToOneTest::testPacketPingPongPerf(bool useIbOnly) {

TEST_F(PortChannelOneToOneTest, PacketPingPong) { testPacketPingPong(false); }

TEST_F(PortChannelOneToOneTest, PacketPingPongIb) { testPacketPingPong(true); }
TEST_F(PortChannelOneToOneTest, PacketPingPongIb) {
#if defined(USE_IBVERBS)
testPacketPingPong(true);
#else // !defined(USE_IBVERBS)
GTEST_SKIP() << "This test requires IBVerbs that the current build does not support.";
#endif // !defined(USE_IBVERBS)
}

TEST_F(PortChannelOneToOneTest, PacketPingPongPerf) { testPacketPingPongPerf(false); }

TEST_F(PortChannelOneToOneTest, PacketPingPongPerfIb) { testPacketPingPongPerf(true); }
TEST_F(PortChannelOneToOneTest, PacketPingPongPerfIb) {
#if defined(USE_IBVERBS)
testPacketPingPongPerf(true);
#else // !defined(USE_IBVERBS)
GTEST_SKIP() << "This test requires IBVerbs that the current build does not support.";
#endif // !defined(USE_IBVERBS)
}
4 changes: 2 additions & 2 deletions test/perf/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ find_package(MPI REQUIRED)

# Set up common libraries and includes for tests
set(PERF_TEST_LIBS_COMMON mscclpp ${GPU_LIBRARIES} ${NUMA_LIBRARIES} Threads::Threads MPI::MPI_CXX)
if(IBVERBS_FOUND)
if(MSCCLPP_USE_IB)
list(APPEND PERF_TEST_LIBS_COMMON ${IBVERBS_LIBRARIES})
endif()

Expand All @@ -27,7 +27,7 @@ function(add_perf_test_executable name sources)
# Link nlohmann_json - use the target from main project
target_link_libraries(${name} nlohmann_json::nlohmann_json)

if(IBVERBS_FOUND)
if(MSCCLPP_USE_IB)
target_compile_definitions(${name} PRIVATE USE_IBVERBS)
endif()

Expand Down
Loading