Skip to content

Commit fc42a39

Browse files
authored
FFT OpenBC Solver: Communication optimization (#4230)
1 parent 9e53c0c commit fc42a39

File tree

3 files changed

+42
-3
lines changed

3 files changed

+42
-3
lines changed

Src/Base/AMReX_NonLocalBC.H

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -528,7 +528,7 @@ unpack_recv_buffer_gpu (FabArray<FAB>& mf, int scomp, int ncomp,
528528
struct PackComponents {
529529
int dest_component{0};
530530
int src_component{0};
531-
int n_components{0};
531+
int n_components{1};
532532
};
533533

534534
//! \brief Dispatch local copies to the default behaviour that knows no DTOS nor projection.

Src/FFT/AMReX_FFT_OpenBCSolver.H

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -160,7 +160,9 @@ void OpenBCSolver<T>::solve (MF& phi, MF const& rho)
160160
inmf.setVal(T(0));
161161
inmf.ParallelCopy(rho, 0, 0, 1);
162162

163+
m_r2c.m_openbc_half = true;
163164
m_r2c.forward(inmf);
165+
m_r2c.m_openbc_half = false;
164166

165167
auto scaling_factor = m_r2c.scalingFactor();
166168

@@ -196,7 +198,9 @@ void OpenBCSolver<T>::solve (MF& phi, MF const& rho)
196198
}
197199
}
198200

201+
m_r2c.m_openbc_half = true;
199202
m_r2c.backward_doit(phi, phi.nGrowVect());
203+
m_r2c.m_openbc_half = false;
200204
}
201205

202206
}

Src/FFT/AMReX_FFT_R2C.H

Lines changed: 37 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -186,6 +186,8 @@ private:
186186
std::unique_ptr<MultiBlockCommMetaData> m_cmd_z2y; // (z,x,y) -> (y,x,z)
187187
std::unique_ptr<MultiBlockCommMetaData> m_cmd_x2z; // (x,y,z) -> (z,x,y)
188188
std::unique_ptr<MultiBlockCommMetaData> m_cmd_z2x; // (z,x,y) -> (x,y,z)
189+
std::unique_ptr<MultiBlockCommMetaData> m_cmd_x2z_half; // for openbc
190+
std::unique_ptr<MultiBlockCommMetaData> m_cmd_z2x_half; // for openbc
189191
Swap01 m_dtos_x2y{};
190192
Swap01 m_dtos_y2x{};
191193
Swap02 m_dtos_y2z{};
@@ -209,6 +211,7 @@ private:
209211
Info m_info;
210212

211213
bool m_slab_decomp = false;
214+
bool m_openbc_half = false;
212215
};
213216

214217
template <typename T, Direction D, DomainStrategy S>
@@ -415,7 +418,24 @@ void R2C<T,D,S>::forward (MF const& inmf)
415418
}
416419
#if (AMREX_SPACEDIM == 3)
417420
else if ( m_cmd_x2z) {
418-
ParallelCopy(m_cz, m_cx, *m_cmd_x2z, 0, 0, 1, m_dtos_x2z);
421+
if (m_openbc_half) {
422+
Box upper_half = m_spectral_domain_z;
423+
// Note that z-direction's index is 0 because we z is the unit-stride direction here.
424+
upper_half.growLo (0,-m_spectral_domain_z.length(0)/2);
425+
if (! m_cmd_x2z_half) {
426+
Box bottom_half = m_spectral_domain_z;
427+
bottom_half.growHi(0,-m_spectral_domain_z.length(0)/2);
428+
m_cmd_x2z_half = std::make_unique<MultiBlockCommMetaData>
429+
(m_cz, bottom_half, m_cx, IntVect(0), m_dtos_x2z);
430+
}
431+
NonLocalBC::ApplyDtosAndProjectionOnReciever packing
432+
{NonLocalBC::PackComponents{}, m_dtos_x2z};
433+
auto handler = ParallelCopy_nowait(m_cz, m_cx, *m_cmd_x2z_half, packing);
434+
m_cz.setVal(0, upper_half, 0, 1);
435+
ParallelCopy_finish(m_cz, std::move(handler), *m_cmd_x2z_half, packing);
436+
} else {
437+
ParallelCopy(m_cz, m_cx, *m_cmd_x2z, 0, 0, 1, m_dtos_x2z);
438+
}
419439
}
420440
#endif
421441
m_fft_fwd_z.template compute_c2c<Direction::forward>();
@@ -439,7 +459,22 @@ void R2C<T,D,S>::backward_doit (MF& outmf, IntVect const& ngout)
439459
}
440460
#if (AMREX_SPACEDIM == 3)
441461
else if ( m_cmd_z2x) {
442-
ParallelCopy(m_cx, m_cz, *m_cmd_z2x, 0, 0, 1, m_dtos_z2x);
462+
if (m_openbc_half) {
463+
Box upper_half = m_spectral_domain_x;
464+
upper_half.growLo (2,-m_spectral_domain_x.length(2)/2);
465+
if (! m_cmd_z2x_half) {
466+
Box bottom_half = m_spectral_domain_x;
467+
bottom_half.growHi(2,-m_spectral_domain_x.length(2)/2);
468+
m_cmd_z2x_half = std::make_unique<MultiBlockCommMetaData>
469+
(m_cx, bottom_half, m_cz, IntVect(0), m_dtos_z2x);
470+
}
471+
NonLocalBC::ApplyDtosAndProjectionOnReciever packing
472+
{NonLocalBC::PackComponents{}, m_dtos_z2x};
473+
auto handler = ParallelCopy_nowait(m_cx, m_cz, *m_cmd_z2x_half, packing);
474+
ParallelCopy_finish(m_cx, std::move(handler), *m_cmd_z2x_half, packing);
475+
} else {
476+
ParallelCopy(m_cx, m_cz, *m_cmd_z2x, 0, 0, 1, m_dtos_z2x);
477+
}
443478
}
444479
#endif
445480

0 commit comments

Comments
 (0)