@@ -130,18 +130,16 @@ static void fft_destroy_setup (FFT_Setup* s)
130130}
131131
132132// ====================================================================
133- static inline auto interleave2 (__m128 in1, __m128 in2)
133+ static inline void interleave2 (__m128 in1, __m128 in2, __m128& out1, __m128& out2 )
134134{
135- auto out1 = _mm_unpacklo_ps (in1, in2);
136- auto out2 = _mm_unpackhi_ps (in1, in2);
137- return std::make_tuple (out1, out2);
135+ out1 = _mm_unpacklo_ps (in1, in2);
136+ out2 = _mm_unpackhi_ps (in1, in2);
138137}
139138
140- static inline auto uninterleave2 (__m128 in1, __m128 in2)
139+ static inline void uninterleave2 (__m128 in1, __m128 in2, __m128& out1, __m128& out2 )
141140{
142- auto out1 = _mm_shuffle_ps (in1, in2, _MM_SHUFFLE (2 , 0 , 2 , 0 ));
143- auto out2 = _mm_shuffle_ps (in1, in2, _MM_SHUFFLE (3 , 1 , 3 , 1 ));
144- return std::make_tuple (out1, out2);
141+ out1 = _mm_shuffle_ps (in1, in2, _MM_SHUFFLE (2 , 0 , 2 , 0 ));
142+ out2 = _mm_shuffle_ps (in1, in2, _MM_SHUFFLE (3 , 1 , 3 , 1 ));
145143}
146144
147145static inline auto mul_scalar (__m128 a, float b)
@@ -630,7 +628,7 @@ static void radf2_ps (int ido, int l1, const __m128* __restrict cc, __m128* __re
630628 }
631629 for (k = 0 ; k < l1ido; k += ido)
632630 {
633- ch[2 * k + ido] = _mm_xor_ps (cc[ido - 1 + k + l1ido], _mm_set1_ps (-0 .f )); // negate
631+ ch[2 * k + ido] = _mm_xor_ps (cc[ido - 1 + k + l1ido], _mm_set1_ps (-0 .f )); // negate
634632 ch[2 * k + ido - 1 ] = cc[k + ido - 1 ];
635633 }
636634}
@@ -961,8 +959,9 @@ static void pffft_real_finalize (int Ncvec, const __m128* in, __m128* out, const
961959 int k, dk = Ncvec / (int ) SIMD_SZ; // number of 4x4 matrix blocks
962960 /* fftpack order is f0r f1r f1i f2r f2i ... f(n-1)r f(n-1)i f(n)r */
963961
964- union v4sf_union {
965- __m128 v;
962+ union v4sf_union
963+ {
964+ __m128 v;
966965 float f[SIMD_SZ];
967966 };
968967
@@ -1073,8 +1072,9 @@ static void pffft_real_preprocess (int Ncvec, const __m128* in, __m128* out, con
10731072 int k, dk = Ncvec / (int ) SIMD_SZ; // number of 4x4 matrix blocks
10741073 /* fftpack order is f0r f1r f1i f2r f2i ... f(n-1)r f(n-1)i f(n)r */
10751074
1076- union v4sf_union {
1077- __m128 v;
1075+ union v4sf_union
1076+ {
1077+ __m128 v;
10781078 float f[SIMD_SZ];
10791079 };
10801080
@@ -1470,14 +1470,16 @@ static void reversed_copy (int N, const __m128* in, int in_stride, __m128* out)
14701470 auto * in_start = in;
14711471 auto * out_start = out;
14721472
1473- auto [g0, g1] = interleave2 (in[0 ], in[1 ]);
1473+ __m128 g0, g1;
1474+ interleave2 (in[0 ], in[1 ], g0, g1);
14741475 in += in_stride;
14751476
14761477 *--out = _mm_shuffle_ps (g1, g0, _MM_SHUFFLE (3 , 2 , 1 , 0 )); // [g0l, g0h], [g1l g1h] -> [g1l, g0h]
14771478 int k;
14781479 for (k = 1 ; k < N; ++k)
14791480 {
1480- auto [h0, h1] = interleave2 (in[0 ], in[1 ]);
1481+ __m128 h0, h1;
1482+ interleave2 (in[0 ], in[1 ], h0, h1);
14811483 in += in_stride;
14821484 *--out = _mm_shuffle_ps (h0, g1, _MM_SHUFFLE (3 , 2 , 1 , 0 ));
14831485 *--out = _mm_shuffle_ps (h1, h0, _MM_SHUFFLE (3 , 2 , 1 , 0 ));
@@ -1498,15 +1500,15 @@ static void unreversed_copy (int N, const __m128* in, __m128* out, int out_strid
14981500 h1 = *in++;
14991501 g1 = _mm_shuffle_ps (h0, g1, _MM_SHUFFLE (3 , 2 , 1 , 0 ));
15001502 h0 = _mm_shuffle_ps (h1, h0, _MM_SHUFFLE (3 , 2 , 1 , 0 ));
1501- std::tie ( out[0 ], out[1 ]) = uninterleave2 (h0, g1 );
1503+ uninterleave2 (h0, g1, out[0 ], out[1 ]);
15021504 out += out_stride;
15031505 g1 = h1;
15041506 }
15051507 h0 = *in++;
15061508 h1 = g0;
15071509 g1 = _mm_shuffle_ps (h0, g1, _MM_SHUFFLE (3 , 2 , 1 , 0 ));
15081510 h0 = _mm_shuffle_ps (h1, h0, _MM_SHUFFLE (3 , 2 , 1 , 0 ));
1509- std::tie ( out[0 ], out[1 ]) = uninterleave2 (h0, g1 );
1511+ uninterleave2 (h0, g1, out[0 ], out[1 ]);
15101512}
15111513
15121514static void pffft_zreorder (FFT_Setup* setup, const float * in, float * out, fft_direction_t direction)
@@ -1522,8 +1524,8 @@ static void pffft_zreorder (FFT_Setup* setup, const float* in, float* out, fft_d
15221524 {
15231525 for (k = 0 ; k < dk; ++k)
15241526 {
1525- std::tie (vout[ 2 * ( 0 * dk + k) + 0 ], vout[2 * (0 * dk + k) + 1 ]) = interleave2 (vin[k * 8 + 0 ], vin[k * 8 + 1 ]);
1526- std::tie (vout[ 2 * ( 2 * dk + k) + 0 ], vout[2 * (2 * dk + k) + 1 ]) = interleave2 (vin[k * 8 + 4 ], vin[k * 8 + 5 ]);
1527+ interleave2 (vin[k * 8 + 0 ], vin[k * 8 + 1 ], vout[2 * (0 * dk + k) + 0 ], vout[ 2 * ( 0 * dk + k) + 1 ]);
1528+ interleave2 (vin[k * 8 + 4 ], vin[k * 8 + 5 ], vout[2 * (2 * dk + k) + 0 ], vout[ 2 * ( 2 * dk + k) + 1 ]);
15271529 }
15281530 reversed_copy (dk, vin + 2 , 8 , (__m128*) (out + N / 2 ));
15291531 reversed_copy (dk, vin + 6 , 8 , (__m128*) (out + N));
@@ -1532,8 +1534,8 @@ static void pffft_zreorder (FFT_Setup* setup, const float* in, float* out, fft_d
15321534 {
15331535 for (k = 0 ; k < dk; ++k)
15341536 {
1535- std::tie (vout[k * 8 + 0 ], vout[k * 8 + 1 ]) = uninterleave2 ( vin[2 * (0 * dk + k) + 0 ], vin[ 2 * ( 0 * dk + k) + 1 ]);
1536- std::tie (vout[k * 8 + 4 ], vout[k * 8 + 5 ]) = uninterleave2 ( vin[2 * (2 * dk + k) + 0 ], vin[ 2 * ( 2 * dk + k) + 1 ]);
1537+ uninterleave2 (vin[ 2 * ( 0 * dk + k) + 0 ], vin[2 * (0 * dk + k) + 1 ], vout[k * 8 + 0 ], vout[k * 8 + 1 ]);
1538+ uninterleave2 (vin[ 2 * ( 2 * dk + k) + 0 ], vin[2 * (2 * dk + k) + 1 ], vout[k * 8 + 4 ], vout[k * 8 + 5 ]);
15371539 }
15381540 unreversed_copy (dk, (__m128*) (in + N / 4 ), (__m128*) (out + N - 6 * SIMD_SZ), -8 );
15391541 unreversed_copy (dk, (__m128*) (in + 3 * N / 4 ), (__m128*) (out + N - 2 * SIMD_SZ), -8 );
@@ -1546,15 +1548,15 @@ static void pffft_zreorder (FFT_Setup* setup, const float* in, float* out, fft_d
15461548 for (k = 0 ; k < Ncvec; ++k)
15471549 {
15481550 int kk = (k / 4 ) + (k % 4 ) * (Ncvec / 4 );
1549- std::tie (vout[kk * 2 ], vout[kk * 2 + 1 ]) = interleave2 (vin[k * 2 ], vin[k * 2 + 1 ]);
1551+ interleave2 (vin[k * 2 ], vin[k * 2 + 1 ], vout[kk * 2 ], vout[kk * 2 + 1 ]);
15501552 }
15511553 }
15521554 else
15531555 {
15541556 for (k = 0 ; k < Ncvec; ++k)
15551557 {
15561558 int kk = (k / 4 ) + (k % 4 ) * (Ncvec / 4 );
1557- std::tie (vout[k * 2 ], vout[k * 2 + 1 ]) = uninterleave2 (vin[kk * 2 ], vin[kk * 2 + 1 ]);
1559+ uninterleave2 (vin[kk * 2 ], vin[kk * 2 + 1 ], vout[k * 2 ], vout[k * 2 + 1 ]);
15581560 }
15591561 }
15601562 }
@@ -1591,7 +1593,7 @@ void pffft_transform_internal (FFT_Setup* setup, const float* finput, float* fou
15911593 __m128* tmp = buff[ib];
15921594 for (k = 0 ; k < Ncvec; ++k)
15931595 {
1594- std::tie (tmp [k * 2 ], tmp [k * 2 + 1 ]) = uninterleave2 (vinput [k * 2 ], vinput [k * 2 + 1 ]);
1596+ uninterleave2 (vinput [k * 2 ], vinput [k * 2 + 1 ], tmp [k * 2 ], tmp [k * 2 + 1 ]);
15951597 }
15961598 ib = (cfftf1_ps (Ncvec, buff[ib], buff[! ib], buff[ib], setup->twiddle , &setup->ifac [0 ], -1 ) == buff[0 ] ? 0 : 1 );
15971599 pffft_cplx_finalize (Ncvec, buff[ib], buff[! ib], (__m128*) setup->e );
@@ -1626,7 +1628,7 @@ void pffft_transform_internal (FFT_Setup* setup, const float* finput, float* fou
16261628 ib = (cfftf1_ps (Ncvec, buff[ib], buff[0 ], buff[1 ], setup->twiddle , &setup->ifac [0 ], +1 ) == buff[0 ] ? 0 : 1 );
16271629 for (k = 0 ; k < Ncvec; ++k)
16281630 {
1629- std::tie (buff[ib][k * 2 ], buff[ib][k * 2 + 1 ]) = interleave2 ( buff[ib][k * 2 ], buff[ib][k * 2 + 1 ]);
1631+ interleave2 (buff[ib][k * 2 ], buff[ib][k * 2 + 1 ], buff[ib][k * 2 ], buff[ib][k * 2 + 1 ]);
16301632 }
16311633 }
16321634 }
@@ -1672,8 +1674,8 @@ void pffft_convolve_internal (FFT_Setup* setup, const float* a, const float* b,
16721674 br = vb[2 * i + 0 ];
16731675 bi = vb[2 * i + 1 ];
16741676 std::tie (ar, ai) = cplx_mul_v (ar, ai, br, bi);
1675- vab[2 * i + 0 ] = _mm_add_ps (vab[2 * i + 0 ], _mm_mul_ps (ar, vscal));
1676- vab[2 * i + 1 ] = _mm_add_ps (vab[2 * i + 1 ], _mm_mul_ps (ai, vscal));
1677+ vab[2 * i + 0 ] = _mm_add_ps (vab[2 * i + 0 ], _mm_mul_ps (ar, vscal));
1678+ vab[2 * i + 1 ] = _mm_add_ps (vab[2 * i + 1 ], _mm_mul_ps (ai, vscal));
16771679 ar = va[2 * i + 2 ];
16781680 ai = va[2 * i + 3 ];
16791681 br = vb[2 * i + 2 ];
0 commit comments