Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -994,6 +994,11 @@ class GemmUniversal<

// Get next work tile
auto [next_work_tile_info, increment_pipe] = scheduler.fetch_next_work(work_tile_info, tile_scheduler_pipeline, tile_scheduler_pipe_consumer_state);

if (!next_work_tile_info.is_valid()) {
cutlass::arch::launch_dependent_grids();
}

work_tile_info = next_work_tile_info;
if (increment_pipe) {
++tile_scheduler_pipe_consumer_state;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1038,6 +1038,11 @@ class GemmUniversal<

// Get next work tile
auto [next_work_tile_info, increment_pipe] = scheduler.fetch_next_work(work_tile_info, tile_scheduler_pipeline, tile_scheduler_pipe_consumer_state);

if (!next_work_tile_info.is_valid()) {
cutlass::arch::launch_dependent_grids();
}

work_tile_info = next_work_tile_info;
if (increment_pipe) {
++tile_scheduler_pipe_consumer_state;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -804,15 +804,14 @@ class GemmUniversal<
// Update starting mainloop pipeline state for the next tile
mainloop_pipe_consumer_state.advance(work_k_tile_count);
}
#ifdef CUTLASS_ENABLE_GDC_FOR_SM90

if (scheduler.is_last_tile(work_tile_info)) {
// Hint on an early release of global memory resources.
// The timing of calling this function only influences performance,
// not functional correctness.
cutlass::arch::launch_dependent_grids();

}
#endif

// Index of warp group within consumer warp groups
int consumer_warp_group_idx = canonical_warp_group_idx() - NumLoadWarpGroups;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -800,7 +800,6 @@ class GemmUniversal<
else if (warp_group_role == WarpGroupRole::Consumer0 || warp_group_role == WarpGroupRole::Consumer1) {
cutlass::arch::warpgroup_reg_alloc<MmaRegisterRequirement>();

#ifdef CUTLASS_ENABLE_GDC_FOR_SM90
// It is possible to have work tiles start off invalid,
// so we have to check that first.
if (not work_tile_info.is_valid()) {
Expand All @@ -811,7 +810,6 @@ class GemmUniversal<

return;
}
#endif

if constexpr (IsSchedDynamicPersistent) {
// Consumer0's initial tile is static. It starts consuming the 2nd tile.
Expand Down Expand Up @@ -868,15 +866,13 @@ class GemmUniversal<
// Update starting mainloop pipeline state for the next tile
mainloop_pipe_consumer_state.advance(k_tile_count * NumMmaWarpGroups);

#ifdef CUTLASS_ENABLE_GDC_FOR_SM90
if (scheduler.is_last_tile(work_tile_info, NumMmaWarpGroups)) {
// Hint on an early release of global memory resources.
// The timing of calling this function only influences performance,
// not functional correctness.
cutlass::arch::launch_dependent_grids();

}
#endif

// Order two Math WG's Epilogue one after the other
math_wg_order_barrier.wait();
Expand Down