Skip to content

Commit a5072de

Browse files
Implementing accumulate functionality and updating comments + README (#5)
* Implementing accumulate functionality and updating comments + README * Fixes * More cleanup * Fixing test
1 parent d7fe226 commit a5072de

File tree

8 files changed

+112
-2
lines changed

8 files changed

+112
-2
lines changed

.gitignore

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,8 @@
11
build*/
22

3+
.focus-config
4+
.raddbg_project
35
.vscode/
46
.idea/
7+
8+

README.md

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,9 @@
77
`chowdsp_fft` is a fork of the [`pffft`](https://bitbucket.org/jpommier/pffft/src/master/)
88
library for computing Fast Fourier Transforms. This fork adds a couple
99
of optimizations, most importantly the ability to use AVX intrinsics
10-
for single-precision FFTs on compatible platforms.
10+
for single-precision FFTs on compatible platforms. The library also
11+
contains some methods which may be useful for computing convolutions
12+
using frequency-domain multiplication.
1113

1214
## Disclaimer
1315

chowdsp_fft.cpp

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,7 @@ FFT_Setup* fft_new_setup (int N, fft_transform_t transform);
9090
void fft_destroy_setup (FFT_Setup* s);
9191
void pffft_transform_internal (FFT_Setup* setup, const float* finput, float* foutput, void* scratch, fft_direction_t direction, int ordered);
9292
void pffft_convolve_internal (FFT_Setup* setup, const float* a, const float* b, float* ab, float scaling);
93+
void fft_accumulate_internal (const float* a, const float* b, float* ab, int N);
9394
} // namespace chowdsp::fft::avx
9495
static constexpr uintptr_t address_mask = ~static_cast<uintptr_t> (3);
9596
static constexpr uintptr_t typeid_mask = static_cast<uintptr_t> (3);
@@ -267,6 +268,26 @@ void fft_destroy_setup (void* ptr)
267268
#endif
268269
}
269270

271+
int fft_simd_width_bytes (void* setup)
272+
{
273+
#if defined(__SSE2__) || defined(_M_AMD64) || defined(_M_X64)
274+
#if CHOWDSP_FFT_COMPILER_SUPPORTS_AVX
275+
if (check_is_pointer_sse_setup (setup))
276+
{
277+
return 16;
278+
}
279+
else
280+
{
281+
return 32;
282+
}
283+
#else
284+
return 16;
285+
#endif
286+
#elif defined(__ARM_NEON__) || defined(_M_ARM64)
287+
return 16;
288+
#endif
289+
}
290+
270291
void fft_transform (void* setup, const float* input, float* output, float* work, fft_direction_t direction)
271292
{
272293
#if defined(__SSE2__) || defined(_M_AMD64) || defined(_M_X64)
@@ -382,4 +403,24 @@ void fft_convolve_unordered (void* setup, const float* a, const float* b, float*
382403
scaling);
383404
#endif
384405
}
406+
407+
void fft_accumulate (void* setup, const float* a, const float* b, float* ab, int N)
408+
{
409+
#if defined(__SSE2__) || defined(_M_AMD64) || defined(_M_X64)
410+
#if CHOWDSP_FFT_COMPILER_SUPPORTS_AVX
411+
if (check_is_pointer_sse_setup (setup))
412+
{
413+
sse::fft_accumulate_internal (a, b, ab, N);
414+
}
415+
else
416+
{
417+
avx::fft_accumulate_internal (a, b, ab, N);
418+
}
419+
#else
420+
sse::fft_accumulate_internal (a, b, ab, N);
421+
#endif
422+
#elif defined(__ARM_NEON__) || defined(_M_ARM64)
423+
neon::fft_accumulate_internal (a, b, ab, N);
424+
#endif
425+
}
385426
} // namespace chowdsp::fft

chowdsp_fft.h

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,9 @@ void* fft_new_setup (int N, fft_transform_t transform, bool use_avx_if_available
8686
);
8787
void fft_destroy_setup (void*);
8888

89+
/** Returns the width (in bytes) of the SIMD registers used by the FFT setup. */
90+
int fft_simd_width_bytes (void* setup);
91+
8992
/*
9093
Perform a Fourier transform , The z-domain data is stored as
9194
interleaved complex numbers.
@@ -111,9 +114,19 @@ void fft_transform_unordered (void* setup, const float* input, float* output, fl
111114

112115
/**
113116
* Convolve two (unordered) frequency-domain signals, with some scale factor.
117+
*
118+
* The operation performed is: dft_ab += (dft_a * fdt_b)*scaling
119+
*
120+
* The dft_a, dft_b and dft_ab pointers may alias.
114121
*/
115122
void fft_convolve_unordered (void* setup, const float* a, const float* b, float* ab, float scaling);
116123

124+
/**
125+
* Computes the sum of two signals of length N.
126+
* N must be a multiple of the FFT setup's SIMD width.
127+
*/
128+
void fft_accumulate (void* setup, const float* a, const float* b, float* ab, int N);
129+
117130
void* aligned_malloc (size_t nb_bytes);
118131
void aligned_free (void*);
119132

simd/chowdsp_fft_impl_avx.cpp

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2028,5 +2028,20 @@ void pffft_convolve_internal (FFT_Setup* setup, const float* a, const float* b,
20282028
reinterpret_cast<float*> (&vab[1])[0] = abi0 + ai0 * bi0 * scaling;
20292029
}
20302030
}
2031+
2032+
void fft_accumulate_internal (const float* a, const float* b, float* ab, int N)
2033+
{
2034+
assert (N % (SIMD_SZ * 2) == 0);
2035+
const auto Ncvec = N / (int) SIMD_SZ;
2036+
auto* va = (const __m256*) a;
2037+
auto* vb = (const __m256*) b;
2038+
auto* vab = (__m256*) ab;
2039+
2040+
for (int i = 0; i < Ncvec; i += 2)
2041+
{
2042+
vab[i] = _mm256_add_ps (va[i], vb[i]);
2043+
vab[i + 1] = _mm256_add_ps (va[i + 1], vb[i + 1]);
2044+
}
2045+
}
20312046
} // namespace chowdsp::fft::avx
20322047
#endif

simd/chowdsp_fft_impl_neon.cpp

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1674,4 +1674,19 @@ void pffft_convolve_internal (FFT_Setup* setup, const float* a, const float* b,
16741674
reinterpret_cast<float*> (&vab[1])[0] = abi0 + ai0 * bi0 * scaling;
16751675
}
16761676
}
1677+
1678+
void fft_accumulate_internal (const float* a, const float* b, float* ab, int N)
1679+
{
1680+
assert (N % (SIMD_SZ * 2) == 0);
1681+
const auto Ncvec = N / (int) SIMD_SZ;
1682+
auto* va = (const float32x4_t*) a;
1683+
auto* vb = (const float32x4_t*) b;
1684+
auto* vab = (float32x4_t*) ab;
1685+
1686+
for (int i = 0; i < Ncvec; i += 2)
1687+
{
1688+
vab[i] = vaddq_f32 (va[i], vb[i]);
1689+
vab[i + 1] = vaddq_f32 (va[i + 1], vb[i + 1]);
1690+
}
1691+
}
16771692
} // namespace chowdsp::fft::neon

simd/chowdsp_fft_impl_sse.cpp

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1689,4 +1689,19 @@ void pffft_convolve_internal (FFT_Setup* setup, const float* a, const float* b,
16891689
reinterpret_cast<float*> (&vab[1])[0] = abi0 + ai0 * bi0 * scaling;
16901690
}
16911691
}
1692+
1693+
void fft_accumulate_internal (const float* a, const float* b, float* ab, int N)
1694+
{
1695+
assert (N % (SIMD_SZ * 2) == 0);
1696+
const auto Ncvec = N / (int) SIMD_SZ;
1697+
auto* va = (const __m128*) a;
1698+
auto* vb = (const __m128*) b;
1699+
auto* vab = (__m128*) ab;
1700+
1701+
for (int i = 0; i < Ncvec; i += 2)
1702+
{
1703+
vab[i] = _mm_add_ps (va[i], vb[i]);
1704+
vab[i + 1] = _mm_add_ps (va[i + 1], vb[i + 1]);
1705+
}
1706+
}
16921707
} // namespace chowdsp::fft::sse

test/test.cpp

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,8 @@ void test_fft_complex (int N, bool use_avx = false)
3030
auto* fft_setup = chowdsp::fft::fft_new_setup (N, chowdsp::fft::FFT_COMPLEX, use_avx);
3131
REQUIRE (fft_setup != nullptr);
3232
auto* pffft_setup = pffft_new_setup (N, PFFFT_COMPLEX);
33+
if (! use_avx)
34+
REQUIRE (chowdsp::fft::fft_simd_width_bytes (fft_setup) == 16);
3335

3436
chowdsp::fft::fft_transform (fft_setup, data, data, work_data, chowdsp::fft::FFT_FORWARD);
3537
pffft_transform_ordered (pffft_setup, data_ref, data_ref, work_data_ref, PFFFT_FORWARD);
@@ -75,7 +77,7 @@ void test_fft_real (int N, bool use_avx = false)
7577

7678
chowdsp::fft::fft_transform (fft_setup, data, data, work_data, chowdsp::fft::FFT_FORWARD);
7779
pffft_transform_ordered (pffft_setup, data_ref, data_ref, work_data_ref, PFFFT_FORWARD);
78-
80+
7981
compare (data_ref, data, N);
8082

8183
chowdsp::fft::fft_transform (fft_setup, data, data, work_data, chowdsp::fft::FFT_BACKWARD);
@@ -176,13 +178,16 @@ void test_convolution_real (int N, bool use_avx = false)
176178
pffft_transform (pffft_setup, sine2_ref, sine2_ref, work_data_ref, PFFFT_FORWARD);
177179
pffft_zconvolve_accumulate (pffft_setup, sine1_ref, sine2_ref, out_ref, norm_gain);
178180
pffft_transform (pffft_setup, out_ref, out_ref, work_data_ref, PFFFT_BACKWARD);
181+
for (int i = 0; i < N; ++i)
182+
out_ref[i] += sine1_ref[i];
179183

180184
auto* fft_setup = chowdsp::fft::fft_new_setup (N, chowdsp::fft::FFT_REAL, use_avx);
181185
REQUIRE (fft_setup != nullptr);
182186
chowdsp::fft::fft_transform_unordered (fft_setup, sine1, sine1, work_data, chowdsp::fft::FFT_FORWARD);
183187
chowdsp::fft::fft_transform_unordered (fft_setup, sine2, sine2, work_data, chowdsp::fft::FFT_FORWARD);
184188
chowdsp::fft::fft_convolve_unordered (fft_setup, sine1, sine2, out, norm_gain);
185189
chowdsp::fft::fft_transform_unordered (fft_setup, out, out, work_data, chowdsp::fft::FFT_BACKWARD);
190+
chowdsp::fft::fft_accumulate (fft_setup, out, sine1_ref, out, N);
186191

187192
compare (out_ref, out, N);
188193

0 commit comments

Comments
 (0)