Skip to content

Commit 7510c31

Browse files
committed
Apply suggestions to more places
1 parent bca9c1b commit 7510c31

File tree

1 file changed

+10
-16
lines changed

1 file changed

+10
-16
lines changed

Src/Base/AMReX_GpuLaunchFunctsG.H

Lines changed: 10 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -753,21 +753,18 @@ void launch (T const& n, L const& f) noexcept
753753
static_assert(sizeof(T) >= 2);
754754
if (amrex::isEmpty(n)) { return; }
755755
const auto& nec = Gpu::makeNExecutionConfigs<MT>(n);
756-
T ndone = 0;
757756
for (auto const& ec : nec) {
758-
T nleft = n - ndone;
757+
const T start_idx = T(ec.start_idx);
758+
const T nleft = n - start_idx;
759759
AMREX_LAUNCH_KERNEL(MT, ec.nblocks, MT, 0, Gpu::gpuStream(),
760760
[=] AMREX_GPU_DEVICE () noexcept {
761761
// This will not overflow, even though nblocks*MT might.
762762
auto tid = T(MT)*T(blockIdx.x)+T(threadIdx.x);
763763
if (tid < nleft) {
764-
f(tid+ndone);
764+
f(tid+start_idx);
765765
}
766766
});
767-
if (Long(nleft) > ec.ntotalthreads) {
768-
ndone += T(ec.ntotalthreads);
769-
}
770-
}
767+
771768
AMREX_GPU_ERROR_CHECK();
772769
}
773770

@@ -778,17 +775,16 @@ void launch (BoxND<dim> const& box, L const& f) noexcept
778775
const auto& nec = Gpu::makeNExecutionConfigs<MT>(box);
779776
const BoxIndexerND<dim> indexer(box);
780777
const auto type = box.ixType();
781-
std::uint64_t ndone = 0;
782778
for (auto const& ec : nec) {
779+
const T start_idx = T(ec.start_idx);
783780
AMREX_LAUNCH_KERNEL(MT, ec.nblocks, MT, 0, Gpu::gpuStream(),
784781
[=] AMREX_GPU_DEVICE () noexcept {
785-
auto icell = std::uint64_t(MT)*blockIdx.x+threadIdx.x + ndone;
782+
auto icell = std::uint64_t(MT)*blockIdx.x+threadIdx.x + start_idx;
786783
if (icell < indexer.numPts()) {
787784
auto iv = indexer.intVect(icell);
788785
f(BoxND<dim>(iv,iv,type));
789786
}
790787
});
791-
ndone += ec.ntotalthreads;
792788
}
793789
AMREX_GPU_ERROR_CHECK();
794790
}
@@ -824,19 +820,18 @@ ParallelFor (Gpu::KernelInfo const&, BoxND<dim> const& box, L const& f) noexcept
824820
if (amrex::isEmpty(box)) { return; }
825821
const BoxIndexerND<dim> indexer(box);
826822
const auto& nec = Gpu::makeNExecutionConfigs<MT>(box);
827-
std::uint64_t ndone = 0;
828823
for (auto const& ec : nec) {
824+
const T start_idx = T(ec.start_idx);
829825
AMREX_LAUNCH_KERNEL(MT, ec.nblocks, MT, 0, Gpu::gpuStream(),
830826
[=] AMREX_GPU_DEVICE () noexcept {
831-
auto icell = std::uint64_t(MT)*blockIdx.x+threadIdx.x + ndone;
827+
auto icell = std::uint64_t(MT)*blockIdx.x+threadIdx.x + start_idx;
832828
if (icell < indexer.numPts()) {
833829
auto iv = indexer.intVect(icell);
834830
detail::call_f_intvect_handler(f, iv,
835831
Gpu::Handler(amrex::min((indexer.numPts()-icell+(std::uint64_t)threadIdx.x),
836832
(std::uint64_t)blockDim.x)));
837833
}
838834
});
839-
ndone += ec.ntotalthreads;
840835
}
841836
AMREX_GPU_ERROR_CHECK();
842837
}
@@ -848,19 +843,18 @@ ParallelFor (Gpu::KernelInfo const&, BoxND<dim> const& box, T ncomp, L const& f)
848843
if (amrex::isEmpty(box)) { return; }
849844
const BoxIndexerND<dim> indexer(box);
850845
const auto& nec = Gpu::makeNExecutionConfigs<MT>(box);
851-
std::uint64_t ndone = 0;
852846
for (auto const& ec : nec) {
847+
const T start_idx = T(ec.start_idx);
853848
AMREX_LAUNCH_KERNEL(MT, ec.nblocks, MT, 0, Gpu::gpuStream(),
854849
[=] AMREX_GPU_DEVICE () noexcept {
855-
auto icell = std::uint64_t(MT)*blockIdx.x+threadIdx.x + ndone;
850+
auto icell = std::uint64_t(MT)*blockIdx.x+threadIdx.x + start_idx;
856851
if (icell < indexer.numPts()) {
857852
auto iv = indexer.intVect(icell);
858853
detail::call_f_intvect_ncomp_handler(f, iv, ncomp,
859854
Gpu::Handler(amrex::min((indexer.numPts()-icell+(std::uint64_t)threadIdx.x),
860855
(std::uint64_t)blockDim.x)));
861856
}
862857
});
863-
ndone += ec.ntotalthreads;
864858
}
865859
AMREX_GPU_ERROR_CHECK();
866860
}

0 commit comments

Comments
 (0)