@@ -36,36 +36,36 @@ AllToAllInterNode::AllToAllInterNode(
3636 numTokensPerDP = mallocZeroBuffer<uint32_t >(numLocalExperts * numDPGroups);
3737
3838 numTokensBuffer = (uint64_t *)nvshmem_malloc (sizeof (uint64_t ) * numLocalExperts * numDPGroups);
39- ROSE_ASSERT (numTokensBuffer != nullptr , " failed to allocate numTokensBuffer" );
39+ PPLX_ASSERT (numTokensBuffer != nullptr , " failed to allocate numTokensBuffer" );
4040 cudaMemset (numTokensBuffer, 0 , sizeof (uint64_t ) * numLocalExperts * numDPGroups);
4141
4242 numDispatchRecvBuffer =
4343 (uint64_t *)nvshmem_malloc (sizeof (uint64_t ) * numLocalExperts * numDPGroups);
44- ROSE_ASSERT (numDispatchRecvBuffer != nullptr , " failed to allocate numDispatchRecvBuffer" );
44+ PPLX_ASSERT (numDispatchRecvBuffer != nullptr , " failed to allocate numDispatchRecvBuffer" );
4545 cudaMemset (numDispatchRecvBuffer, 0 , sizeof (uint64_t ) * numLocalExperts * numDPGroups);
4646
4747 combineSignalBuffer = (uint64_t *)nvshmem_malloc (sizeof (uint64_t ) * maxNumTokens);
48- ROSE_ASSERT (combineSignalBuffer != nullptr , " failed to allocate combineSignalBuffer" );
48+ PPLX_ASSERT (combineSignalBuffer != nullptr , " failed to allocate combineSignalBuffer" );
4949 cudaMemset (combineSignalBuffer, 0 , sizeof (uint64_t ) * maxNumTokens);
5050
5151 combineSyncBuffer = (uint64_t *)nvshmem_malloc (sizeof (uint64_t ) * worldSize);
52- ROSE_ASSERT (combineSyncBuffer != nullptr , " failed to allocate combineSyncBuffer" );
52+ PPLX_ASSERT (combineSyncBuffer != nullptr , " failed to allocate combineSyncBuffer" );
5353 cudaMemset (combineSyncBuffer, 0 , sizeof (uint64_t ) * worldSize);
5454
5555 // Buffers for dispatch.
5656 const size_t perTokenBytes =
5757 round_up<size_t >(hiddenDimBytes + hiddenDimScaleBytes + sizeof (uint32_t ), 16 );
5858 xDispatchIn = (std::byte *)nvshmem_malloc (maxNumTokens * perTokenBytes);
59- ROSE_ASSERT (xDispatchIn != nullptr , " failed to allocate xDispatchIn" );
59+ PPLX_ASSERT (xDispatchIn != nullptr , " failed to allocate xDispatchIn" );
6060 xDispatchOut = (std::byte *)nvshmem_malloc (maxBatchTokens * perTokenBytes);
61- ROSE_ASSERT (xDispatchOut != nullptr , " failed to allocate xDispatchOut" );
61+ PPLX_ASSERT (xDispatchOut != nullptr , " failed to allocate xDispatchOut" );
6262
6363 // Buffers for combine. The allocations are a bit wider to accommodate all
6464 // possible data types (primarily float for testing and bfloat16 for prod).
6565 xCombineIn = (std::byte *)nvshmem_malloc (maxBatchTokens * hiddenDim * sizeof (float ));
66- ROSE_ASSERT (xCombineIn != nullptr , " failed to allocate xCombineIn" );
66+ PPLX_ASSERT (xCombineIn != nullptr , " failed to allocate xCombineIn" );
6767 xCombineOut = (std::byte *)nvshmem_malloc (maxNumTokens * numExperts * hiddenDim * sizeof (float ));
68- ROSE_ASSERT (xCombineOut != nullptr , " failed to allocate xCombineOut" );
68+ PPLX_ASSERT (xCombineOut != nullptr , " failed to allocate xCombineOut" );
6969
7070 // Buffers for token tracking.
7171 sourceIndex = mallocZeroBuffer<uint32_t >(maxBatchTokens);
0 commit comments