Skip to content

Commit 9994f53

Browse files
authored
Fixes for no-IB systems (#667)
* Add a compile flag `MSCCLPP_USE_IB` that explicitly specifies IB on/off * Fix `nvidia-peermem` check; no need for DMABUF supported systems * Fix `mp_unit_tests` to skip all IB tests when built with `-DMSCCLPP_USE_IB=OFF`
1 parent 2b987cf commit 9994f53

File tree

9 files changed

+90
-60
lines changed

9 files changed

+90
-60
lines changed

CMakeLists.txt

Lines changed: 22 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -19,23 +19,23 @@ set(MSCCLPP_VERSION "${MSCCLPP_MAJOR}.${MSCCLPP_MINOR}.${MSCCLPP_PATCH}")
1919
find_package(Git)
2020
set(GIT_HASH "UNKNOWN")
2121
if(Git_FOUND)
22-
execute_process(
23-
COMMAND "${GIT_EXECUTABLE}" rev-parse --short=12 HEAD
24-
WORKING_DIRECTORY "${CMAKE_SOURCE_DIR}"
25-
OUTPUT_VARIABLE _git_out
26-
OUTPUT_STRIP_TRAILING_WHITESPACE
27-
)
28-
if(NOT _git_out STREQUAL "")
29-
set(GIT_HASH "${_git_out}")
30-
endif()
22+
execute_process(
23+
COMMAND "${GIT_EXECUTABLE}" rev-parse --short=12 HEAD
24+
WORKING_DIRECTORY "${CMAKE_SOURCE_DIR}"
25+
OUTPUT_VARIABLE _git_out
26+
OUTPUT_STRIP_TRAILING_WHITESPACE
27+
)
28+
if(NOT _git_out STREQUAL "")
29+
set(GIT_HASH "${_git_out}")
30+
endif()
3131
else()
3232
message(WARNING "Git not found, setting GIT_HASH to 'UNKNOWN'")
3333
endif()
3434

3535
configure_file(
36-
"${CMAKE_CURRENT_SOURCE_DIR}/include/mscclpp/version.hpp.in"
37-
"${CMAKE_CURRENT_BINARY_DIR}/include/mscclpp/version.hpp"
38-
@ONLY
36+
"${CMAKE_CURRENT_SOURCE_DIR}/include/mscclpp/version.hpp.in"
37+
"${CMAKE_CURRENT_BINARY_DIR}/include/mscclpp/version.hpp"
38+
@ONLY
3939
)
4040

4141
list(APPEND CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/cmake)
@@ -47,6 +47,7 @@ option(MSCCLPP_BUILD_PYTHON_BINDINGS "Build Python bindings" ON)
4747
option(MSCCLPP_BUILD_APPS_NCCL "Build NCCL interfaces" ON)
4848
option(MSCCLPP_USE_CUDA "Use NVIDIA/CUDA." OFF)
4949
option(MSCCLPP_USE_ROCM "Use AMD/ROCm." OFF)
50+
option(MSCCLPP_USE_IB "Use InfiniBand." ON)
5051
option(MSCCLPP_BYPASS_GPU_CHECK "Bypass GPU check." OFF)
5152
option(MSCCLPP_NPKIT_FLAGS "Set NPKIT flags" OFF)
5253
set(MSCCLPP_GPU_ARCHS "" CACHE STRING "Specify GPU architectures with delimiters (comma, space, or semicolon).")
@@ -141,23 +142,18 @@ else()
141142
endif()
142143

143144
if(CMAKE_BUILD_TYPE STREQUAL "Debug")
144-
add_compile_definitions(DEBUG_BUILD)
145+
add_compile_definitions(DEBUG_BUILD)
145146
endif()
146147

147-
find_package(IBVerbs)
148+
if(MSCCLPP_USE_IB)
149+
find_package(IBVerbs)
150+
if(NOT IBVERBS_FOUND)
151+
message(FATAL_ERROR "IBVerbs not found. Install libibverbs-dev or rdma-core-devel. If you want to disable InfiniBand, add `-DMSCCLPP_USE_IB=OFF` in your cmake command.")
152+
endif()
153+
endif()
148154
find_package(NUMA REQUIRED)
149155
find_package(Threads REQUIRED)
150156

151-
set(CMAKE_COLOR_DIAGNOSTICS ON)
152-
function(msg_red text)
153-
string(ASCII 27 ESC)
154-
message("${ESC}[31m${text}${ESC}[0m")
155-
endfunction()
156-
157-
if(NOT IBVERBS_FOUND)
158-
msg_red("libibverbs not found. Install libibverbs-dev or rdma-core-devel.")
159-
endif()
160-
161157
include(FetchContent)
162158
FetchContent_Declare(json URL https://github.com/nlohmann/json/releases/download/v3.11.3/json.tar.xz)
163159
FetchContent_MakeAvailable(json)
@@ -168,7 +164,7 @@ target_include_directories(mscclpp_obj
168164
${GPU_INCLUDE_DIRS}
169165
${NUMA_INCLUDE_DIRS})
170166
target_link_libraries(mscclpp_obj PRIVATE ${GPU_LIBRARIES} ${NUMA_LIBRARIES} nlohmann_json::nlohmann_json Threads::Threads dl)
171-
if(IBVERBS_FOUND)
167+
if(MSCCLPP_USE_IB)
172168
target_include_directories(mscclpp_obj SYSTEM PRIVATE ${IBVERBS_INCLUDE_DIRS})
173169
target_link_libraries(mscclpp_obj PRIVATE ${IBVERBS_LIBRARIES})
174170
target_compile_definitions(mscclpp_obj PUBLIC USE_IBVERBS)
@@ -179,7 +175,7 @@ if(MSCCLPP_USE_CUDA)
179175
elseif(MSCCLPP_USE_ROCM)
180176
target_compile_definitions(mscclpp_obj PRIVATE MSCCLPP_USE_ROCM)
181177
foreach(arch ${MSCCLPP_GPU_ARCHS})
182-
target_compile_options(mscclpp_obj PRIVATE --offload-arch=${arch})
178+
target_compile_options(mscclpp_obj PRIVATE --offload-arch=${arch})
183179
endforeach()
184180
endif()
185181
if(MSCCLPP_ENABLE_TRACE)

docs/quickstart.md

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919
* AMD MI250X GPUs + ROCm >= 5.7
2020
* AMD MI300X GPUs + ROCm >= 6.0
2121
* OS
22-
* Tested on Ubuntu 18.04 and later
22+
* Tested on Ubuntu 20.04 and later
2323
* Libraries
2424
* [libnuma](https://github.com/numactl/numactl)
2525
```bash
@@ -32,10 +32,7 @@
3232
If you don't want to build Python module, you need to set `-DMSCCLPP_BUILD_PYTHON_BINDINGS=OFF` in your `cmake` command (see details in [Install from Source](#install-from-source)).
3333
* (Optional, for benchmarks) MPI
3434
* Others
35-
* For NVIDIA platforms, `nvidia_peermem` driver should be loaded on all nodes. Check it via:
36-
```bash
37-
lsmod | grep nvidia_peermem
38-
```
35+
* For RDMA (InfiniBand or RoCE) support on NVIDIA platforms, [GPUDirect RDMA](https://docs.nvidia.com/datacenter/cloud-native/gpu-operator/latest/gpu-operator-rdma.html#gpudirect-rdma-and-gpudirect-storage) should be supported by the system. See the detailed prerequisites from [this NVIDIA documentation](https://docs.nvidia.com/datacenter/cloud-native/gpu-operator/latest/gpu-operator-rdma.html#common-prerequisites).
3936
* For NVLink SHARP (NVLS) support on NVIDIA platforms, the Linux kernel version should be 5.6 or above.
4037
4138
(docker-images)=
@@ -91,6 +88,7 @@ There are a few optional CMake options you can set:
9188
- `-DMSCCLPP_GPU_ARCHS=<arch-list>`: Specify the GPU architectures to build for. For example, `-DMSCCLPP_GPU_ARCHS="80,90"` for NVIDIA A100 and H100 GPUs, `-DMSCCLPP_GPU_ARCHS=gfx942` for AMD MI300x GPU.
9289
- `-DMSCCLPP_BYPASS_GPU_CHECK=ON -DMSCCLPP_USE_CUDA=ON`: If the build environment doesn't have GPUs and only has CUDA installed, you can set these options to bypass GPU checks and use CUDA APIs. This is useful for building on CI systems or environments without GPUs.
9390
- `-DMSCCLPP_BYPASS_GPU_CHECK=ON -DMSCCLPP_USE_ROCM=ON`: If the build environment doesn't have GPUs and only has ROCm installed, you can set these options to bypass GPU checks and use ROCm APIs.
91+
- `-DMSCCLPP_USE_IB=OFF`: Don't build InfiniBand support.
9492
- `-DMSCCLPP_BUILD_PYTHON_BINDINGS=OFF`: Don't build the Python module.
9593
- `-DMSCCLPP_BUILD_TESTS=OFF`: Don't build the tests.
9694
- `-DMSCCLPP_BUILD_APPS_NCCL=OFF`: Don't build the NCCL API.

src/ib.cc

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@
2525
#if !defined(__HIP_PLATFORM_AMD__)
2626

2727
// Check if nvidia_peermem kernel module is loaded
28-
static bool checkNvPeerMemLoaded() {
28+
[[maybe_unused]] static bool checkNvPeerMemLoaded() {
2929
std::ifstream file("/proc/modules");
3030
std::string line;
3131
while (std::getline(file, line)) {
@@ -77,6 +77,12 @@ IbMr::IbMr(ibv_pd* pd, void* buff, std::size_t size) : buff(buff) {
7777
throw Error("Registeration of dma-buf based memory region failed on HIP platform", ErrorCode::InvalidUsage);
7878
#endif // !defined(__HIP_PLATFORM_AMD__)
7979
} else {
80+
#if !defined(__HIP_PLATFORM_AMD__)
81+
// nvidia-peermem is needed only when DMABUF is not supported
82+
if (!checkNvPeerMemLoaded()) {
83+
throw Error("nvidia_peermem kernel module is not loaded", ErrorCode::InternalError);
84+
}
85+
#endif // !defined(__HIP_PLATFORM_AMD__)
8086
this->mr = IBVerbs::ibv_reg_mr2(pd, reinterpret_cast<void*>(addr), pages * pageSize,
8187
IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_WRITE | IBV_ACCESS_REMOTE_READ |
8288
IBV_ACCESS_RELAXED_ORDERING | IBV_ACCESS_REMOTE_ATOMIC);
@@ -329,11 +335,6 @@ int IbQp::getWcStatus(int idx) const { return (*this->wcs)[idx].status; }
329335
int IbQp::getNumCqItems() const { return this->numSignaledPostedItems; }
330336

331337
IbCtx::IbCtx(const std::string& devName) : devName(devName) {
332-
#if !defined(__HIP_PLATFORM_AMD__)
333-
if (!checkNvPeerMemLoaded()) {
334-
throw Error("nvidia_peermem kernel module is not loaded", ErrorCode::InternalError);
335-
}
336-
#endif // !defined(__HIP_PLATFORM_AMD__)
337338
int num;
338339
struct ibv_device** devices = IBVerbs::ibv_get_device_list(&num);
339340
for (int i = 0; i < num; ++i) {

test/CMakeLists.txt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
find_package(MPI)
55

66
set(TEST_LIBS_COMMON mscclpp ${GPU_LIBRARIES} ${NUMA_LIBRARIES} Threads::Threads)
7-
if(IBVERBS_FOUND)
7+
if(MSCCLPP_USE_IB)
88
list(APPEND TEST_LIBS_COMMON ${IBVERBS_LIBRARIES})
99
endif()
1010
set(TEST_LIBS_GTEST GTest::gtest_main GTest::gmock_main)
@@ -19,7 +19,7 @@ endif()
1919
function(add_test_executable name sources)
2020
add_executable(${name} ${sources})
2121
target_link_libraries(${name} ${TEST_LIBS_COMMON} MPI::MPI_CXX)
22-
if(IBVERBS_FOUND)
22+
if(MSCCLPP_USE_IB)
2323
target_compile_definitions(${name} PRIVATE USE_IBVERBS)
2424
endif()
2525
target_include_directories(${name} ${TEST_INC_COMMON} ${TEST_INC_INTERNAL})

test/mp_unit/communicator_tests.cu

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -104,7 +104,11 @@ void CommunicatorTest::SetUp() {
104104

105105
ASSERT_EQ((deviceBufferSize / sizeof(int)) % gEnv->worldSize, 0);
106106

107+
#if defined(USE_IBVERBS)
107108
connectMesh(true, true, false);
109+
#else
110+
connectMesh(true, false, false);
111+
#endif
108112

109113
devicePtr.resize(numBuffers);
110114
localMemory.resize(numBuffers);
@@ -117,10 +121,16 @@ void CommunicatorTest::SetUp() {
117121
}
118122
}
119123

124+
#if defined(USE_IBVERBS)
125+
auto transport = mscclpp::Transport::CudaIpc | ibTransport;
126+
#else
127+
auto transport = mscclpp::Transport::CudaIpc;
128+
#endif
129+
120130
for (size_t n = 0; n < numBuffers; n++) {
121131
devicePtr[n] = mscclpp::detail::gpuCallocShared<int>(deviceBufferSize / sizeof(int));
122-
registerMemoryPairs(devicePtr[n].get(), deviceBufferSize, mscclpp::Transport::CudaIpc | ibTransport, 0, remoteRanks,
123-
localMemory[n], remoteMemory[n]);
132+
registerMemoryPairs(devicePtr[n].get(), deviceBufferSize, transport, 0, remoteRanks, localMemory[n],
133+
remoteMemory[n]);
124134
}
125135
}
126136

test/mp_unit/ib_tests.cu

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,10 @@ void IbTestBase::SetUp() {
1818
}
1919

2020
void IbPeerToPeerTest::SetUp() {
21+
#if !defined(USE_IBVERBS)
22+
GTEST_SKIP() << "This test requires IBVerbs that the current build does not support.";
23+
#endif // !defined(USE_IBVERBS)
24+
2125
IbTestBase::SetUp();
2226

2327
mscclpp::UniqueId id;

test/mp_unit/memory_channel_tests.cu

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ void MemoryChannelOneToOneTest::setupMeshConnections(std::vector<mscclpp::Memory
2323
const int rank = communicator->bootstrap()->getRank();
2424
const int worldSize = communicator->bootstrap()->getNranks();
2525
const bool isInPlace = (outputBuff == nullptr);
26-
mscclpp::TransportFlags transport = mscclpp::Transport::CudaIpc | ibTransport;
26+
mscclpp::TransportFlags transport = mscclpp::Transport::CudaIpc;
2727

2828
std::vector<std::shared_future<std::shared_ptr<mscclpp::Connection>>> connectionFutures(worldSize);
2929
std::vector<std::shared_future<mscclpp::RegisteredMemory>> remoteMemFutures(worldSize);
@@ -38,11 +38,8 @@ void MemoryChannelOneToOneTest::setupMeshConnections(std::vector<mscclpp::Memory
3838
if (r == rank) {
3939
continue;
4040
}
41-
if (rankToNode(r) == rankToNode(gEnv->rank)) {
42-
connectionFutures[r] = communicator->connect(mscclpp::Transport::CudaIpc, r);
43-
} else {
44-
connectionFutures[r] = communicator->connect(ibTransport, r);
45-
}
41+
// No IB for MemoryChannel tests
42+
connectionFutures[r] = communicator->connect(mscclpp::Transport::CudaIpc, r);
4643

4744
if (isInPlace) {
4845
communicator->sendMemory(inputBufRegMem, r);

test/mp_unit/port_channel_tests.cu

Lines changed: 35 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -242,31 +242,43 @@ void PortChannelOneToOneTest::testPingPongPerf(PingPongTestParams params) {
242242
}
243243

244244
TEST_F(PortChannelOneToOneTest, PingPong) {
245-
testPingPong(PingPongTestParams{.useIPC = true, .useIB = true, .useEthernet = false, .waitWithPoll = false});
245+
testPingPong(PingPongTestParams{.useIPC = true, .useIB = false, .useEthernet = false, .waitWithPoll = false});
246246
}
247247

248248
TEST_F(PortChannelOneToOneTest, PingPongIb) {
249+
#if defined(USE_IBVERBS)
249250
testPingPong(PingPongTestParams{.useIPC = false, .useIB = true, .useEthernet = false, .waitWithPoll = false});
251+
#else // !defined(USE_IBVERBS)
252+
GTEST_SKIP() << "This test requires IBVerbs that the current build does not support.";
253+
#endif // !defined(USE_IBVERBS)
250254
}
251255

252256
TEST_F(PortChannelOneToOneTest, PingPongEthernet) {
253257
testPingPong(PingPongTestParams{.useIPC = false, .useIB = false, .useEthernet = true, .waitWithPoll = false});
254258
}
255259

256260
TEST_F(PortChannelOneToOneTest, PingPongWithPoll) {
257-
testPingPong(PingPongTestParams{.useIPC = true, .useIB = true, .useEthernet = false, .waitWithPoll = true});
261+
testPingPong(PingPongTestParams{.useIPC = true, .useIB = false, .useEthernet = false, .waitWithPoll = true});
258262
}
259263

260264
TEST_F(PortChannelOneToOneTest, PingPongIbWithPoll) {
265+
#if defined(USE_IBVERBS)
261266
testPingPong(PingPongTestParams{.useIPC = false, .useIB = true, .useEthernet = false, .waitWithPoll = true});
267+
#else // !defined(USE_IBVERBS)
268+
GTEST_SKIP() << "This test requires IBVerbs that the current build does not support.";
269+
#endif // !defined(USE_IBVERBS)
262270
}
263271

264272
TEST_F(PortChannelOneToOneTest, PingPongPerf) {
265-
testPingPongPerf(PingPongTestParams{.useIPC = true, .useIB = true, .useEthernet = false, .waitWithPoll = false});
273+
testPingPongPerf(PingPongTestParams{.useIPC = true, .useIB = false, .useEthernet = false, .waitWithPoll = false});
266274
}
267275

268276
TEST_F(PortChannelOneToOneTest, PingPongPerfIb) {
277+
#if defined(USE_IBVERBS)
269278
testPingPongPerf(PingPongTestParams{.useIPC = false, .useIB = true, .useEthernet = false, .waitWithPoll = false});
279+
#else // !defined(USE_IBVERBS)
280+
GTEST_SKIP() << "This test requires IBVerbs that the current build does not support.";
281+
#endif // !defined(USE_IBVERBS)
270282
}
271283

272284
TEST_F(PortChannelOneToOneTest, PingPongPerfEthernet) {
@@ -339,7 +351,7 @@ __global__ void kernelProxyLLPingPong(int* buff, mscclpp::LLPacket* putPktBuf, m
339351
}
340352
}
341353

342-
void PortChannelOneToOneTest::testPacketPingPong(bool useIbOnly) {
354+
void PortChannelOneToOneTest::testPacketPingPong(bool useIb) {
343355
if (gEnv->rank >= numRanksToUse) return;
344356

345357
const int nElem = 4 * 1024 * 1024;
@@ -351,8 +363,8 @@ void PortChannelOneToOneTest::testPacketPingPong(bool useIbOnly) {
351363
auto putPacketBuffer = mscclpp::GpuBuffer<mscclpp::LLPacket>(nPacket).memory();
352364
auto getPacketBuffer = mscclpp::GpuBuffer<mscclpp::LLPacket>(nPacket).memory();
353365

354-
setupMeshConnections(portChannels, !useIbOnly, true, false, putPacketBuffer.get(),
355-
nPacket * sizeof(mscclpp::LLPacket), getPacketBuffer.get(), nPacket * sizeof(mscclpp::LLPacket));
366+
setupMeshConnections(portChannels, !useIb, useIb, false, putPacketBuffer.get(), nPacket * sizeof(mscclpp::LLPacket),
367+
getPacketBuffer.get(), nPacket * sizeof(mscclpp::LLPacket));
356368

357369
ASSERT_EQ(portChannels.size(), 1);
358370

@@ -406,7 +418,7 @@ void PortChannelOneToOneTest::testPacketPingPong(bool useIbOnly) {
406418
proxyService->stopProxy();
407419
}
408420

409-
void PortChannelOneToOneTest::testPacketPingPongPerf(bool useIbOnly) {
421+
void PortChannelOneToOneTest::testPacketPingPongPerf(bool useIb) {
410422
if (gEnv->rank >= numRanksToUse) return;
411423

412424
const int nElem = 4 * 1024 * 1024;
@@ -418,8 +430,8 @@ void PortChannelOneToOneTest::testPacketPingPongPerf(bool useIbOnly) {
418430
auto putPacketBuffer = mscclpp::GpuBuffer<mscclpp::LLPacket>(nPacket).memory();
419431
auto getPacketBuffer = mscclpp::GpuBuffer<mscclpp::LLPacket>(nPacket).memory();
420432

421-
setupMeshConnections(portChannels, !useIbOnly, true, false, putPacketBuffer.get(),
422-
nPacket * sizeof(mscclpp::LLPacket), getPacketBuffer.get(), nPacket * sizeof(mscclpp::LLPacket));
433+
setupMeshConnections(portChannels, !useIb, useIb, false, putPacketBuffer.get(), nPacket * sizeof(mscclpp::LLPacket),
434+
getPacketBuffer.get(), nPacket * sizeof(mscclpp::LLPacket));
423435

424436
ASSERT_EQ(portChannels.size(), 1);
425437

@@ -464,8 +476,20 @@ void PortChannelOneToOneTest::testPacketPingPongPerf(bool useIbOnly) {
464476

465477
TEST_F(PortChannelOneToOneTest, PacketPingPong) { testPacketPingPong(false); }
466478

467-
TEST_F(PortChannelOneToOneTest, PacketPingPongIb) { testPacketPingPong(true); }
479+
TEST_F(PortChannelOneToOneTest, PacketPingPongIb) {
480+
#if defined(USE_IBVERBS)
481+
testPacketPingPong(true);
482+
#else // !defined(USE_IBVERBS)
483+
GTEST_SKIP() << "This test requires IBVerbs that the current build does not support.";
484+
#endif // !defined(USE_IBVERBS)
485+
}
468486

469487
TEST_F(PortChannelOneToOneTest, PacketPingPongPerf) { testPacketPingPongPerf(false); }
470488

471-
TEST_F(PortChannelOneToOneTest, PacketPingPongPerfIb) { testPacketPingPongPerf(true); }
489+
TEST_F(PortChannelOneToOneTest, PacketPingPongPerfIb) {
490+
#if defined(USE_IBVERBS)
491+
testPacketPingPongPerf(true);
492+
#else // !defined(USE_IBVERBS)
493+
GTEST_SKIP() << "This test requires IBVerbs that the current build does not support.";
494+
#endif // !defined(USE_IBVERBS)
495+
}

test/perf/CMakeLists.txt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ find_package(MPI REQUIRED)
88

99
# Set up common libraries and includes for tests
1010
set(PERF_TEST_LIBS_COMMON mscclpp ${GPU_LIBRARIES} ${NUMA_LIBRARIES} Threads::Threads MPI::MPI_CXX)
11-
if(IBVERBS_FOUND)
11+
if(MSCCLPP_USE_IB)
1212
list(APPEND PERF_TEST_LIBS_COMMON ${IBVERBS_LIBRARIES})
1313
endif()
1414

@@ -27,7 +27,7 @@ function(add_perf_test_executable name sources)
2727
# Link nlohmann_json - use the target from main project
2828
target_link_libraries(${name} nlohmann_json::nlohmann_json)
2929

30-
if(IBVERBS_FOUND)
30+
if(MSCCLPP_USE_IB)
3131
target_compile_definitions(${name} PRIVATE USE_IBVERBS)
3232
endif()
3333

0 commit comments

Comments
 (0)