@@ -378,11 +378,14 @@ T PrefixSum_mp (N n, FIN const& fin, FOUT const& fout, TYPE, RetSum a_ret_sum)
378378 T totalsum = 0 ;
379379 if (a_ret_sum) {
380380 Gpu::dtoh_memcpy_async (&totalsum, totalsum_p, sizeof (T));
381- }
382- Gpu::streamSynchronize ();
383- The_Arena ()->free (dp);
384381
385- AMREX_GPU_ERROR_CHECK ();
382+ Gpu::streamSynchronize ();
383+ The_Arena ()->free (dp);
384+
385+ AMREX_GPU_ERROR_CHECK ();
386+ } else {
387+ Gpu::freeAsync (The_Arena (), dp);
388+ }
386389
387390 return totalsum;
388391}
@@ -617,11 +620,14 @@ T PrefixSum (N n, FIN && fin, FOUT && fout, TYPE type, RetSum a_ret_sum = retSum
617620 // xxxxx SYCL todo: Should test if using pinned memory and thus
618621 // avoiding memcpy is faster.
619622 Gpu::dtoh_memcpy_async (&totalsum, totalsum_p, sizeof (T));
620- }
621- Gpu::streamSynchronize ();
622- The_Arena ()->free (dp);
623623
624- AMREX_GPU_ERROR_CHECK ();
624+ Gpu::streamSynchronize ();
625+ The_Arena ()->free (dp);
626+
627+ AMREX_GPU_ERROR_CHECK ();
628+ } else {
629+ Gpu::freeAsync (The_Arena (), dp);
630+ }
625631
626632 return totalsum;
627633}
@@ -780,10 +786,14 @@ T PrefixSum (N n, FIN const& fin, FOUT const& fout, TYPE, RetSum a_ret_sum = ret
780786 }
781787 });
782788
783- Gpu::streamSynchronize ();
784- AMREX_GPU_ERROR_CHECK ();
789+ if (totalsum_p) {
790+ Gpu::streamSynchronize ();
791+ AMREX_GPU_ERROR_CHECK ();
785792
786- The_Arena ()->free (dp);
793+ The_Arena ()->free (dp);
794+ } else {
795+ Gpu::freeAsync (The_Arena (), dp);
796+ }
787797
788798 T ret = (a_ret_sum) ? *totalsum_p : T (0 );
789799 if (totalsum_p) { The_Pinned_Arena ()->free (totalsum_p); }
@@ -1177,11 +1187,14 @@ T PrefixSum (N n, FIN const& fin, FOUT const& fout, TYPE, RetSum a_ret_sum = ret
11771187 // xxxxx CUDA < 11 todo: Should test if using pinned memory and thus
11781188 // avoiding memcpy is faster.
11791189 Gpu::dtoh_memcpy_async (&totalsum, totalsum_p, sizeof (T));
1180- }
1181- Gpu::streamSynchronize ();
1182- The_Arena ()->free (dp);
11831190
1184- AMREX_GPU_ERROR_CHECK ();
1191+ Gpu::streamSynchronize ();
1192+ The_Arena ()->free (dp);
1193+
1194+ AMREX_GPU_ERROR_CHECK ();
1195+ } else {
1196+ Gpu::freeAsync (The_Arena (), dp);
1197+ }
11851198
11861199 return totalsum;
11871200}
0 commit comments