Skip to content

Commit e3771c7

Browse files
committed
Use freeAsync for all PrefixSum backends
1 parent 369dfd1 commit e3771c7

File tree

1 file changed

+28
-15
lines changed

1 file changed

+28
-15
lines changed

Src/Base/AMReX_Scan.H

Lines changed: 28 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -378,11 +378,14 @@ T PrefixSum_mp (N n, FIN const& fin, FOUT const& fout, TYPE, RetSum a_ret_sum)
378378
T totalsum = 0;
379379
if (a_ret_sum) {
380380
Gpu::dtoh_memcpy_async(&totalsum, totalsum_p, sizeof(T));
381-
}
382-
Gpu::streamSynchronize();
383-
The_Arena()->free(dp);
384381

385-
AMREX_GPU_ERROR_CHECK();
382+
Gpu::streamSynchronize();
383+
The_Arena()->free(dp);
384+
385+
AMREX_GPU_ERROR_CHECK();
386+
} else {
387+
Gpu::freeAsync(The_Arena(), dp);
388+
}
386389

387390
return totalsum;
388391
}
@@ -617,11 +620,14 @@ T PrefixSum (N n, FIN && fin, FOUT && fout, TYPE type, RetSum a_ret_sum = retSum
617620
// xxxxx SYCL todo: Should test if using pinned memory and thus
618621
// avoiding memcpy is faster.
619622
Gpu::dtoh_memcpy_async(&totalsum, totalsum_p, sizeof(T));
620-
}
621-
Gpu::streamSynchronize();
622-
The_Arena()->free(dp);
623623

624-
AMREX_GPU_ERROR_CHECK();
624+
Gpu::streamSynchronize();
625+
The_Arena()->free(dp);
626+
627+
AMREX_GPU_ERROR_CHECK();
628+
} else {
629+
Gpu::freeAsync(The_Arena(), dp);
630+
}
625631

626632
return totalsum;
627633
}
@@ -780,10 +786,14 @@ T PrefixSum (N n, FIN const& fin, FOUT const& fout, TYPE, RetSum a_ret_sum = ret
780786
}
781787
});
782788

783-
Gpu::streamSynchronize();
784-
AMREX_GPU_ERROR_CHECK();
789+
if (totalsum_p) {
790+
Gpu::streamSynchronize();
791+
AMREX_GPU_ERROR_CHECK();
785792

786-
The_Arena()->free(dp);
793+
The_Arena()->free(dp);
794+
} else {
795+
Gpu::freeAsync(The_Arena(), dp);
796+
}
787797

788798
T ret = (a_ret_sum) ? *totalsum_p : T(0);
789799
if (totalsum_p) { The_Pinned_Arena()->free(totalsum_p); }
@@ -1177,11 +1187,14 @@ T PrefixSum (N n, FIN const& fin, FOUT const& fout, TYPE, RetSum a_ret_sum = ret
11771187
// xxxxx CUDA < 11 todo: Should test if using pinned memory and thus
11781188
// avoiding memcpy is faster.
11791189
Gpu::dtoh_memcpy_async(&totalsum, totalsum_p, sizeof(T));
1180-
}
1181-
Gpu::streamSynchronize();
1182-
The_Arena()->free(dp);
11831190

1184-
AMREX_GPU_ERROR_CHECK();
1191+
Gpu::streamSynchronize();
1192+
The_Arena()->free(dp);
1193+
1194+
AMREX_GPU_ERROR_CHECK();
1195+
} else {
1196+
Gpu::freeAsync(The_Arena(), dp);
1197+
}
11851198

11861199
return totalsum;
11871200
}

0 commit comments

Comments
 (0)