Skip to content

Commit f4a77c5

Browse files
Binyang2014seagater
authored andcommitted
Fix rocm build issue
1 parent 08a8903 commit f4a77c5

File tree

2 files changed

+2
-3
lines changed

2 files changed

+2
-3
lines changed

apps/nccl/src/allreduce.hpp

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1237,7 +1237,6 @@ class AllreduceNvlsPacket : public mscclpp::AlgorithmBuilder {
12371237

12381238
size_t scratchBufferSize_;
12391239
std::shared_ptr<char> scratchBuffer_;
1240-
const int nSegmentsForScratchBuffer_ = 2;
12411240
const size_t nvlsBufferSize_ = (1 << 30);
12421241

12431242
std::shared_ptr<uint32_t> deviceFlag_;

src/include/execution_kernel.hpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1177,7 +1177,7 @@ class ExecutionKernel {
11771177
case DataType::FP8_E4M3:
11781178
executionKernel<__fp8_e4m3, PacketType, ReuseScratch><<<nthreadblocks, nthreads, sharedMemSize, stream>>>(
11791179
rank, (__fp8_e4m3*)src, (__fp8_e4m3*)dst, (__fp8_e4m3*)scratch, scratchOffset, scratchChunkSize, plan,
1180-
semaphores, flag
1180+
semaphores, localMemoryIdBegin, flag
11811181
#if defined(ENABLE_NPKIT)
11821182
,
11831183
NpKit::GetGpuEventCollectContexts(), NpKit::GetCpuTimestamp());
@@ -1188,7 +1188,7 @@ class ExecutionKernel {
11881188
case DataType::FP8_E5M2:
11891189
executionKernel<__fp8_e5m2, PacketType, ReuseScratch><<<nthreadblocks, nthreads, sharedMemSize, stream>>>(
11901190
rank, (__fp8_e5m2*)src, (__fp8_e5m2*)dst, (__fp8_e5m2*)scratch, scratchOffset, scratchChunkSize, plan,
1191-
semaphores, flag
1191+
semaphores, localMemoryIdBegin, flag
11921192
#if defined(ENABLE_NPKIT)
11931193
,
11941194
NpKit::GetGpuEventCollectContexts(), NpKit::GetCpuTimestamp());

0 commit comments

Comments
 (0)