Skip to content

Commit 6e7ca38

Browse files
committed
Merge 5.1 release branch into amd-fftw
2 parents cfcb67c + 0c35a58 commit 6e7ca38

File tree

7 files changed

+408
-477
lines changed

7 files changed

+408
-477
lines changed

CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -254,7 +254,7 @@ if (MSVC)
254254
endif(MSVC)
255255

256256
string(TIMESTAMP TODAY "%Y%m%d")
257-
add_compile_definitions(AOCL_FFTW_VERSION="AOCL-FFTW 5.0.0 Build ${TODAY}")
257+
add_compile_definitions(AOCL_FFTW_VERSION="AOCL-FFTW 5.1.0 Build ${TODAY}")
258258

259259
find_library (LIBM_LIBRARY NAMES m)
260260
if (LIBM_LIBRARY)

COPYRIGHT

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
/*
22
* Copyright (c) 2003, 2007-14 Matteo Frigo
33
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
4-
* Copyright (C) 2019-2024, Advanced Micro Devices, Inc. All Rights Reserved.
4+
* Copyright (C) 2019-2025, Advanced Micro Devices, Inc. All Rights Reserved.
55
*
66
* This program is free software; you can redistribute it and/or modify
77
* it under the terms of the GNU General Public License as published by

LICENSE

Lines changed: 265 additions & 468 deletions
Large diffs are not rendered by default.

NOTICES

Lines changed: 134 additions & 0 deletions
Large diffs are not rendered by default.

configure

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18385,7 +18385,7 @@ fi
1838518385

1838618386
dateVar=`date +"%Y%m%d"`
1838718387

18388-
printf "%s\n" "#define AOCL_FFTW_VERSION \"AOCL-FFTW 5.0.0 Build $dateVar\"" >>confdefs.h
18388+
printf "%s\n" "#define AOCL_FFTW_VERSION \"AOCL-FFTW 5.1.0 Build $dateVar\"" >>confdefs.h
1838918389

1839018390
# Check whether --enable-amd-fast-planner was given.
1839118391
if test ${enable_amd_fast_planner+y}

configure.ac

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -790,7 +790,7 @@ AC_ARG_ENABLE(openmp, [AC_HELP_STRING([--enable-openmp],[use OpenMP directives f
790790
AC_ARG_ENABLE(threads, [AC_HELP_STRING([--enable-threads],[compile FFTW SMP threads library])], enable_threads=$enableval, enable_threads=no)
791791
dnl aocl version number of amd-fftw
792792
dateVar=`date +"%Y%m%d"`
793-
AC_DEFINE_UNQUOTED(AOCL_FFTW_VERSION,"AOCL-FFTW 5.0.0 Build $dateVar",[AOCL Version of AMD-FFTW])
793+
AC_DEFINE_UNQUOTED(AOCL_FFTW_VERSION,"AOCL-FFTW 5.1.0 Build $dateVar",[AOCL Version of AMD-FFTW])
794794
dnl amd optimization switch to enable AMD Fast Planner for AMD cpus --enable-amd-fast-planner
795795
AC_ARG_ENABLE(amd-fast-planner, [AC_HELP_STRING([--enable-amd-fast-planner],[enable AMD Fast Planner for a faster planning time on AMD cpus])], have_amd_fast_planner=$enableval, have_amd_fast_planner=no)
796796
dnl amd optimization switch to enable AMD Top N Planner for AMD cpus --enable-amd-top-n-planner

mpi/transpose-pairwise-omc.c

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,7 @@ static void transpose_chunks(int *sched, int n_pes, int my_pe,
7171
buf[1] = bufs[1];
7272
#endif
7373
int pe = sched[0], pe2, j=0;
74-
MPI_Status send_status, recv_status;
74+
MPI_Request send_request, recv_request;
7575

7676
#ifdef AMD_MPI_TRANSPOSE_LOGS
7777
printf("TRANSPOSE-PAIRWISE: n_pes[%d], my_pe[%d], first_pe[%d]\n", n_pes, my_pe, pe);
@@ -105,13 +105,13 @@ static void transpose_chunks(int *sched, int n_pes, int my_pe,
105105
#ifdef AMD_MPI_TRANSPOSE_LOGS
106106
printf("OVERLAP REGION:(my_pe != pe)=> my_pe[%d], i[%d]:pe[%d], src_offset[%d], dst_offset[%d], size[%d]\n", my_pe, i, pe2, sbo[pe2], rbo[pe2], sbs[pe2]);
107107
#endif
108-
MPI_Isend(buf[j&0x1], (int) (sbs[pe]), FFTW_MPI_TYPE, pe, (my_pe * n_pes + pe) & 0xffff, comm, &send_status);
109-
MPI_Irecv(O + rbo[pe], (int) (rbs[pe]), FFTW_MPI_TYPE, pe, (pe * n_pes + my_pe) & 0xffff, comm, &recv_status);
108+
MPI_Isend(buf[j&0x1], (int) (sbs[pe]), FFTW_MPI_TYPE, pe, (my_pe * n_pes + pe) & 0xffff, comm, &send_request);
109+
MPI_Irecv(O + rbo[pe], (int) (rbs[pe]), FFTW_MPI_TYPE, pe, (pe * n_pes + my_pe) & 0xffff, comm, &recv_request);
110110
memcpy(buf[(j+1)&0x1], O + sbo[pe2], sbs[pe2] * sizeof(R));
111111
pe = pe2;
112112
//barrier
113-
MPI_Wait(&send_status, MPI_STATUS_IGNORE);
114-
MPI_Wait(&recv_status, MPI_STATUS_IGNORE);
113+
MPI_Wait(&send_request, MPI_STATUS_IGNORE);
114+
MPI_Wait(&recv_request, MPI_STATUS_IGNORE);
115115
}
116116
j++;
117117
}

0 commit comments

Comments
 (0)