From 276d34baa779fc0ae670f2afebb839d177b696a2 Mon Sep 17 00:00:00 2001 From: jatin Date: Mon, 17 Nov 2025 23:34:57 -0500 Subject: [PATCH 1/3] Small fixes --- chowdsp_polyphase_fir.cpp | 6 +++--- simd/chowdsp_polyphase_fir_impl_neon.cpp | 1 - simd/chowdsp_polyphase_fir_impl_sse.cpp | 8 -------- 3 files changed, 3 insertions(+), 12 deletions(-) diff --git a/chowdsp_polyphase_fir.cpp b/chowdsp_polyphase_fir.cpp index 9ba02de..d9054b9 100644 --- a/chowdsp_polyphase_fir.cpp +++ b/chowdsp_polyphase_fir.cpp @@ -251,11 +251,11 @@ void process_decimate (struct Polyphase_FIR_State* state, for (filter_idx = 1; filter_idx < state->factor; ++filter_idx) { filter_state = ch_state + filter_idx * state->state_per_filter_padded; - samples_to_save = state->taps_per_filter_padded; + samples_to_save = state->taps_per_filter_padded - 1; std::memcpy (scratch, - filter_state + n_samples_out, + filter_state + n_samples_out + 1, samples_to_save * sizeof (float)); - std::memcpy (filter_state, + std::memcpy (filter_state + 1, scratch, samples_to_save * sizeof (float)); } diff --git a/simd/chowdsp_polyphase_fir_impl_neon.cpp b/simd/chowdsp_polyphase_fir_impl_neon.cpp index d01a865..c85435a 100644 --- a/simd/chowdsp_polyphase_fir_impl_neon.cpp +++ b/simd/chowdsp_polyphase_fir_impl_neon.cpp @@ -101,7 +101,6 @@ static void process_fir_decim (const Polyphase_FIR_State* state, } scratch_v[n] = vaddq_f32 (scratch_v[n], vaddq_f32 (accum_0, accum_1)); - // scratch_v[n] += vaddq_f32 (accum_0, accum_1); } } diff --git a/simd/chowdsp_polyphase_fir_impl_sse.cpp b/simd/chowdsp_polyphase_fir_impl_sse.cpp index 812accd..627fdee 100644 --- a/simd/chowdsp_polyphase_fir_impl_sse.cpp +++ b/simd/chowdsp_polyphase_fir_impl_sse.cpp @@ -57,10 +57,6 @@ static void process_fir_decim (const Polyphase_FIR_State* state, accum = _mm_add_ps (accum, _mm_mul_ps (z, filter_coeffs[k])); } scratch_v[n] = accum; - - // auto rr = _mm_add_ps (_mm_shuffle_ps (accum, accum, 0x4e), accum); - // rr = _mm_add_ps (rr, _mm_shuffle_ps (rr, rr, 0xb1)); - // y_data[n] = _mm_cvtss_f32 (rr); } for (filter_idx = 1; filter_idx < state->factor; ++filter_idx) @@ -76,10 +72,6 @@ static void process_fir_decim (const Polyphase_FIR_State* state, accum = _mm_add_ps (accum, _mm_mul_ps (z, filter_coeffs[k])); } scratch_v[n] = _mm_add_ps (scratch_v[n], accum); - - // auto rr = _mm_add_ps (_mm_shuffle_ps (accum, accum, 0x4e), accum); - // rr = _mm_add_ps (rr, _mm_shuffle_ps (rr, rr, 0xb1)); - // y_data[n] += _mm_cvtss_f32 (rr); } } From 3617797405233e1c5393703149dbafa3f6cdeedd Mon Sep 17 00:00:00 2001 From: jatin Date: Tue, 18 Nov 2025 10:04:44 -0500 Subject: [PATCH 2/3] Fixes for AVX and SSE --- simd/chowdsp_polyphase_fir_impl_avx.cpp | 2 +- simd/chowdsp_polyphase_fir_impl_sse.cpp | 2 +- test/test.cpp | 128 ++++++++++++------------ 3 files changed, 66 insertions(+), 66 deletions(-) diff --git a/simd/chowdsp_polyphase_fir_impl_avx.cpp b/simd/chowdsp_polyphase_fir_impl_avx.cpp index f30291f..ae11836 100644 --- a/simd/chowdsp_polyphase_fir_impl_avx.cpp +++ b/simd/chowdsp_polyphase_fir_impl_avx.cpp @@ -85,7 +85,7 @@ void process_fir_decim (const Polyphase_FIR_State* state, __m256 rr = _mm256_dp_ps (scratch_v[n], one_avx, 0xff); __m256 tmp = _mm256_permute2f128_ps (rr, rr, 1); rr = _mm256_add_ps (rr, tmp); - y_data[n] += _mm256_cvtss_f32 (rr); + y_data[n] = _mm256_cvtss_f32 (rr); } } } // namespace chowdsp::polyphase_fir::avx diff --git a/simd/chowdsp_polyphase_fir_impl_sse.cpp b/simd/chowdsp_polyphase_fir_impl_sse.cpp index 627fdee..5961127 100644 --- a/simd/chowdsp_polyphase_fir_impl_sse.cpp +++ b/simd/chowdsp_polyphase_fir_impl_sse.cpp @@ -80,7 +80,7 @@ static void process_fir_decim (const Polyphase_FIR_State* state, const auto accum = scratch_v[n]; auto rr = _mm_add_ps (_mm_shuffle_ps (accum, accum, 0x4e), accum); rr = _mm_add_ps (rr, _mm_shuffle_ps (rr, rr, 0xb1)); - y_data[n] += _mm_cvtss_f32 (rr); + y_data[n] = _mm_cvtss_f32 (rr); } } } // namespace chowdsp::polyphase_fir::sse diff --git a/test/test.cpp b/test/test.cpp index f2a24e2..b866684 100644 --- a/test/test.cpp +++ b/test/test.cpp @@ -39,37 +39,37 @@ static void test_interp (int n_channels, int n_samples, bool use_avx) chowdsp::Buffer buffer_in { n_channels, n_samples }; for (auto [ch, data] : chowdsp::buffer_iters::channels (buffer_in)) for (auto [n, x] : chowdsp::enumerate (data)) - x = static_cast (n + (size_t) ch); + x = static_cast (n + (size_t) ch + 1); + + chowdsp::ArenaAllocator<> ref_arena { 1 << 14 }; + chowdsp::FIRPolyphaseInterpolator ref_filter; + ref_filter.prepare (n_channels, n_samples, coeffs, ref_arena); + + namespace pfir = chowdsp::polyphase_fir; + const auto alignment = use_avx ? 32 : 16; + const auto block_size_1 = n_samples / 2; + const auto block_size_2 = n_samples - block_size_1; + const auto max_block_size = std::max (block_size_1, block_size_2); + + const auto persistent_bytes = pfir::persistent_bytes_required (n_channels, n_taps, factor, max_block_size, alignment); + const auto scratch_bytes = pfir::scratch_bytes_required (n_taps, factor, max_block_size, alignment); + chowdsp::ArenaAllocator<> arena { persistent_bytes + scratch_bytes + alignment }; + + auto state = pfir::init (n_channels, + n_taps, + factor, + max_block_size, + arena.allocate_bytes (persistent_bytes, alignment), + alignment); + pfir::load_coeffs (state, coeffs, n_taps); + auto* scratch_data = arena.allocate_bytes (scratch_bytes, alignment); chowdsp::Buffer ref_buffer_out { n_channels, n_samples * factor }; + chowdsp::Buffer test_buffer_out { n_channels, n_samples * factor }; + for (int i = 0; i < 4; ++i) { - chowdsp::ArenaAllocator<> arena { 1 << 14 }; - chowdsp::FIRPolyphaseInterpolator ref_filter; - ref_filter.prepare (n_channels, n_samples, coeffs, arena); ref_filter.processBlock (buffer_in, ref_buffer_out); - } - chowdsp::Buffer test_buffer_out { n_channels, n_samples * factor }; - { - namespace pfir = chowdsp::polyphase_fir; - const auto alignment = use_avx ? 32 : 16; - const auto block_size_1 = n_samples / 2; - const auto block_size_2 = n_samples - block_size_1; - const auto max_block_size = std::max (block_size_1, block_size_2); - - const auto persistent_bytes = pfir::persistent_bytes_required (n_channels, n_taps, factor, max_block_size, alignment); - const auto scratch_bytes = pfir::scratch_bytes_required (n_taps, factor, max_block_size, alignment); - chowdsp::ArenaAllocator<> arena { persistent_bytes + scratch_bytes + alignment }; - - auto state = pfir::init (n_channels, - n_taps, - factor, - max_block_size, - arena.allocate_bytes (persistent_bytes, alignment), - alignment); - pfir::load_coeffs (state, coeffs, n_taps); - - auto* scratch_data = arena.allocate_bytes (scratch_bytes, alignment); auto half_buffer_in = chowdsp::BufferView { buffer_in, 0, block_size_1 }; auto half_buffer_out = chowdsp::BufferView { test_buffer_out, 0, block_size_1 * factor }; pfir::process_interpolate (state, @@ -89,13 +89,13 @@ static void test_interp (int n_channels, int n_samples, bool use_avx) block_size_2, scratch_data, use_avx); - } - for (const auto [ch, ref_data, test_data] : chowdsp::buffer_iters::zip_channels (std::as_const (ref_buffer_out), - std::as_const (test_buffer_out))) - { - for (const auto [ref, test] : chowdsp::zip (ref_data, test_data)) - REQUIRE (test == Catch::Approx { ref }.margin (1.0e-6)); + for (const auto [ch, ref_data, test_data] : chowdsp::buffer_iters::zip_channels (std::as_const (ref_buffer_out), + std::as_const (test_buffer_out))) + { + for (const auto [ref, test] : chowdsp::zip (ref_data, test_data)) + REQUIRE (test == Catch::Approx { ref }.margin (1.0e-6)); + } } } @@ -105,37 +105,37 @@ static void test_decim (int n_channels, int n_samples, bool use_avx) chowdsp::Buffer buffer_in { n_channels, n_samples * factor }; for (auto [ch, data] : chowdsp::buffer_iters::channels (buffer_in)) for (auto [n, x] : chowdsp::enumerate (data)) - x = static_cast (n + (size_t) ch); + x = static_cast (n + (size_t) ch + 1); + + chowdsp::ArenaAllocator<> ref_arena { 1 << 14 }; + chowdsp::FIRPolyphaseDecimator ref_filter; + ref_filter.prepare (n_channels, n_samples * factor, coeffs, ref_arena); + + namespace pfir = chowdsp::polyphase_fir; + const auto alignment = use_avx ? 32 : 16; + const auto block_size_1 = n_samples / 2; + const auto block_size_2 = n_samples - block_size_1; + const auto max_block_size = std::max (block_size_1, block_size_2); + + const auto persistent_bytes = pfir::persistent_bytes_required (n_channels, n_taps, factor, max_block_size, alignment); + const auto scratch_bytes = pfir::scratch_bytes_required (n_taps, factor, max_block_size, alignment); + chowdsp::ArenaAllocator<> arena { persistent_bytes + scratch_bytes + alignment }; + + auto state = pfir::init (n_channels, + n_taps, + factor, + max_block_size, + arena.allocate_bytes (persistent_bytes, alignment), + alignment); + pfir::load_coeffs (state, coeffs, n_taps); + auto* scratch_data = arena.allocate_bytes (scratch_bytes, alignment); chowdsp::Buffer ref_buffer_out { n_channels, n_samples }; + chowdsp::Buffer test_buffer_out { n_channels, n_samples }; + for (int i = 0; i < 4; ++i) { - chowdsp::ArenaAllocator<> arena { 1 << 14 }; - chowdsp::FIRPolyphaseDecimator ref_filter; - ref_filter.prepare (n_channels, n_samples * factor, coeffs, arena); ref_filter.processBlock (buffer_in, ref_buffer_out); - } - chowdsp::Buffer test_buffer_out { n_channels, n_samples }; - { - namespace pfir = chowdsp::polyphase_fir; - const auto alignment = use_avx ? 32 : 16; - const auto block_size_1 = n_samples / 2; - const auto block_size_2 = n_samples - block_size_1; - const auto max_block_size = std::max (block_size_1, block_size_2); - - const auto persistent_bytes = pfir::persistent_bytes_required (n_channels, n_taps, factor, max_block_size, alignment); - const auto scratch_bytes = pfir::scratch_bytes_required (n_taps, factor, max_block_size, alignment); - chowdsp::ArenaAllocator<> arena { persistent_bytes + scratch_bytes + alignment }; - - auto state = pfir::init (n_channels, - n_taps, - factor, - max_block_size, - arena.allocate_bytes (persistent_bytes, alignment), - alignment); - pfir::load_coeffs (state, coeffs, n_taps); - - auto* scratch_data = arena.allocate_bytes (scratch_bytes, alignment); auto half_buffer_in = chowdsp::BufferView { buffer_in, 0, block_size_1 * factor }; auto half_buffer_out = chowdsp::BufferView { test_buffer_out, 0, block_size_1 }; pfir::process_decimate (state, @@ -155,13 +155,13 @@ static void test_decim (int n_channels, int n_samples, bool use_avx) block_size_2 * factor, scratch_data, use_avx); - } - for (const auto [ch, ref_data, test_data] : chowdsp::buffer_iters::zip_channels (std::as_const (ref_buffer_out), - std::as_const (test_buffer_out))) - { - for (const auto [ref, test] : chowdsp::zip (ref_data, test_data)) - REQUIRE (test == Catch::Approx { ref }.margin (1.0e-6)); + for (const auto [ch, ref_data, test_data] : chowdsp::buffer_iters::zip_channels (std::as_const (ref_buffer_out), + std::as_const (test_buffer_out))) + { + for (const auto [ref, test] : chowdsp::zip (ref_data, test_data)) + REQUIRE (test == Catch::Approx { ref }.margin (1.0e-6)); + } } } From 8cc3e405ff383b8bff9d709fe9bce50be6523af6 Mon Sep 17 00:00:00 2001 From: jatin Date: Tue, 18 Nov 2025 10:25:56 -0500 Subject: [PATCH 3/3] Add round trip test --- chowdsp_polyphase_fir.cpp | 2 +- test/test.cpp | 111 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 112 insertions(+), 1 deletion(-) diff --git a/chowdsp_polyphase_fir.cpp b/chowdsp_polyphase_fir.cpp index d9054b9..ba0c8c8 100644 --- a/chowdsp_polyphase_fir.cpp +++ b/chowdsp_polyphase_fir.cpp @@ -210,7 +210,7 @@ void process_decimate (struct Polyphase_FIR_State* state, for (int ch = 0; ch < n_channels; ++ch) { - auto* ch_state = state->interp_state + ch * (state->state_per_filter_padded * state->factor); + auto* ch_state = state->decim_state + ch * (state->state_per_filter_padded * state->factor); { // copy x_data into ch_state auto* x_data = in[ch]; diff --git a/test/test.cpp b/test/test.cpp index b866684..60fa035 100644 --- a/test/test.cpp +++ b/test/test.cpp @@ -165,6 +165,93 @@ static void test_decim (int n_channels, int n_samples, bool use_avx) } } +template +static void test_round_trip (int n_channels, int n_samples, bool use_avx) +{ + chowdsp::Buffer buffer_in { n_channels, n_samples }; + for (auto [ch, data] : chowdsp::buffer_iters::channels (buffer_in)) + for (auto [n, x] : chowdsp::enumerate (data)) + x = static_cast (n + (size_t) ch + 1); + + chowdsp::ArenaAllocator<> ref_arena { 1 << 15 }; + chowdsp::FIRPolyphaseInterpolator ref_filter_interp; + ref_filter_interp.prepare (n_channels, n_samples, coeffs, ref_arena); + chowdsp::FIRPolyphaseDecimator ref_filter_decim; + ref_filter_decim.prepare (n_channels, n_samples * factor, coeffs, ref_arena); + + namespace pfir = chowdsp::polyphase_fir; + const auto alignment = use_avx ? 32 : 16; + const auto block_size_1 = n_samples / 2; + const auto block_size_2 = n_samples - block_size_1; + const auto max_block_size = std::max (block_size_1, block_size_2); + + const auto persistent_bytes = pfir::persistent_bytes_required (n_channels, n_taps, factor, max_block_size, alignment); + const auto scratch_bytes = pfir::scratch_bytes_required (n_taps, factor, max_block_size, alignment); + chowdsp::ArenaAllocator<> arena { persistent_bytes + scratch_bytes + alignment }; + + auto state = pfir::init (n_channels, + n_taps, + factor, + max_block_size, + arena.allocate_bytes (persistent_bytes, alignment), + alignment); + pfir::load_coeffs (state, coeffs, n_taps); + auto* scratch_data = arena.allocate_bytes (scratch_bytes, alignment); + + chowdsp::Buffer ref_buffer_interp { n_channels, n_samples * factor }; + chowdsp::Buffer ref_buffer_out { n_channels, n_samples }; + chowdsp::Buffer test_buffer_interp { n_channels, n_samples * factor }; + chowdsp::Buffer test_buffer_out { n_channels, n_samples }; + for (int i = 0; i < 4; ++i) + { + ref_filter_interp.processBlock (buffer_in, ref_buffer_interp); + ref_filter_decim.processBlock (ref_buffer_interp, ref_buffer_out); + + auto half_buffer_in = chowdsp::BufferView { buffer_in, 0, block_size_1 }; + auto half_buffer_interp = chowdsp::BufferView { test_buffer_interp, 0, block_size_1 * factor }; + auto half_buffer_out = chowdsp::BufferView { test_buffer_out, 0, block_size_1 }; + pfir::process_interpolate (state, + half_buffer_in.getArrayOfReadPointers(), + half_buffer_interp.getArrayOfWritePointers(), + n_channels, + block_size_1, + scratch_data, + use_avx); + pfir::process_decimate (state, + half_buffer_interp.getArrayOfReadPointers(), + half_buffer_out.getArrayOfWritePointers(), + n_channels, + block_size_1 * factor, + scratch_data, + use_avx); + + half_buffer_in = chowdsp::BufferView { buffer_in, block_size_1, block_size_2 }; + half_buffer_interp = chowdsp::BufferView { test_buffer_interp, block_size_1 * factor, block_size_2 * factor }; + half_buffer_out = chowdsp::BufferView { test_buffer_out, block_size_1, block_size_2 }; + pfir::process_interpolate (state, + half_buffer_in.getArrayOfReadPointers(), + half_buffer_interp.getArrayOfWritePointers(), + n_channels, + block_size_2, + scratch_data, + use_avx); + pfir::process_decimate (state, + half_buffer_interp.getArrayOfReadPointers(), + half_buffer_out.getArrayOfWritePointers(), + n_channels, + block_size_2 * factor, + scratch_data, + use_avx); + + for (const auto [ch, ref_data, test_data] : chowdsp::buffer_iters::zip_channels (std::as_const (ref_buffer_out), + std::as_const (test_buffer_out))) + { + for (const auto [ref, test] : chowdsp::zip (ref_data, test_data)) + REQUIRE (test == Catch::Approx { ref }.margin (1.0e-6)); + } + } +} + TEST_CASE ("Polyphase Interpolation") { #if defined(__SSE2__) || defined(_M_AMD64) || defined(_M_X64) @@ -212,3 +299,27 @@ TEST_CASE ("Polyphase Decimation") } } } + +TEST_CASE ("Round-Trip Polyphase Interpolation/Decimation") +{ +#if defined(__SSE2__) || defined(_M_AMD64) || defined(_M_X64) + const bool use_avx[] = { false, true }; +#else + const bool use_avx[] = { false }; +#endif + const int channels[] = { 1, 2 }; + const int samples[] = { 16, 127 }; + + for (auto avx : use_avx) + { + for (auto n_channels : channels) + { + for (auto n_samples : samples) + { + test_round_trip<1> (n_channels, n_samples, avx); + test_round_trip<2> (n_channels, n_samples, avx); + test_round_trip<3> (n_channels, n_samples, avx); + } + } + } +}