Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions chowdsp_polyphase_fir.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -210,7 +210,7 @@ void process_decimate (struct Polyphase_FIR_State* state,

for (int ch = 0; ch < n_channels; ++ch)
{
auto* ch_state = state->interp_state + ch * (state->state_per_filter_padded * state->factor);
auto* ch_state = state->decim_state + ch * (state->state_per_filter_padded * state->factor);

{ // copy x_data into ch_state
auto* x_data = in[ch];
Expand Down Expand Up @@ -251,11 +251,11 @@ void process_decimate (struct Polyphase_FIR_State* state,
for (filter_idx = 1; filter_idx < state->factor; ++filter_idx)
{
filter_state = ch_state + filter_idx * state->state_per_filter_padded;
samples_to_save = state->taps_per_filter_padded;
samples_to_save = state->taps_per_filter_padded - 1;
std::memcpy (scratch,
filter_state + n_samples_out,
filter_state + n_samples_out + 1,
samples_to_save * sizeof (float));
std::memcpy (filter_state,
std::memcpy (filter_state + 1,
scratch,
samples_to_save * sizeof (float));
}
Expand Down
2 changes: 1 addition & 1 deletion simd/chowdsp_polyphase_fir_impl_avx.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ void process_fir_decim (const Polyphase_FIR_State* state,
__m256 rr = _mm256_dp_ps (scratch_v[n], one_avx, 0xff);
__m256 tmp = _mm256_permute2f128_ps (rr, rr, 1);
rr = _mm256_add_ps (rr, tmp);
y_data[n] += _mm256_cvtss_f32 (rr);
y_data[n] = _mm256_cvtss_f32 (rr);
}
}
} // namespace chowdsp::polyphase_fir::avx
Expand Down
1 change: 0 additions & 1 deletion simd/chowdsp_polyphase_fir_impl_neon.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,6 @@ static void process_fir_decim (const Polyphase_FIR_State* state,
}

scratch_v[n] = vaddq_f32 (scratch_v[n], vaddq_f32 (accum_0, accum_1));
// scratch_v[n] += vaddq_f32 (accum_0, accum_1);
}
}

Expand Down
10 changes: 1 addition & 9 deletions simd/chowdsp_polyphase_fir_impl_sse.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -57,10 +57,6 @@ static void process_fir_decim (const Polyphase_FIR_State* state,
accum = _mm_add_ps (accum, _mm_mul_ps (z, filter_coeffs[k]));
}
scratch_v[n] = accum;

// auto rr = _mm_add_ps (_mm_shuffle_ps (accum, accum, 0x4e), accum);
// rr = _mm_add_ps (rr, _mm_shuffle_ps (rr, rr, 0xb1));
// y_data[n] = _mm_cvtss_f32 (rr);
}

for (filter_idx = 1; filter_idx < state->factor; ++filter_idx)
Expand All @@ -76,10 +72,6 @@ static void process_fir_decim (const Polyphase_FIR_State* state,
accum = _mm_add_ps (accum, _mm_mul_ps (z, filter_coeffs[k]));
}
scratch_v[n] = _mm_add_ps (scratch_v[n], accum);

// auto rr = _mm_add_ps (_mm_shuffle_ps (accum, accum, 0x4e), accum);
// rr = _mm_add_ps (rr, _mm_shuffle_ps (rr, rr, 0xb1));
// y_data[n] += _mm_cvtss_f32 (rr);
}
}

Expand All @@ -88,7 +80,7 @@ static void process_fir_decim (const Polyphase_FIR_State* state,
const auto accum = scratch_v[n];
auto rr = _mm_add_ps (_mm_shuffle_ps (accum, accum, 0x4e), accum);
rr = _mm_add_ps (rr, _mm_shuffle_ps (rr, rr, 0xb1));
y_data[n] += _mm_cvtss_f32 (rr);
y_data[n] = _mm_cvtss_f32 (rr);
}
}
} // namespace chowdsp::polyphase_fir::sse
235 changes: 173 additions & 62 deletions test/test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,37 +39,37 @@ static void test_interp (int n_channels, int n_samples, bool use_avx)
chowdsp::Buffer<float> buffer_in { n_channels, n_samples };
for (auto [ch, data] : chowdsp::buffer_iters::channels (buffer_in))
for (auto [n, x] : chowdsp::enumerate (data))
x = static_cast<float> (n + (size_t) ch);
x = static_cast<float> (n + (size_t) ch + 1);

chowdsp::ArenaAllocator<> ref_arena { 1 << 14 };
chowdsp::FIRPolyphaseInterpolator<float, factor, n_taps> ref_filter;
ref_filter.prepare (n_channels, n_samples, coeffs, ref_arena);

namespace pfir = chowdsp::polyphase_fir;
const auto alignment = use_avx ? 32 : 16;
const auto block_size_1 = n_samples / 2;
const auto block_size_2 = n_samples - block_size_1;
const auto max_block_size = std::max (block_size_1, block_size_2);

const auto persistent_bytes = pfir::persistent_bytes_required (n_channels, n_taps, factor, max_block_size, alignment);
const auto scratch_bytes = pfir::scratch_bytes_required (n_taps, factor, max_block_size, alignment);
chowdsp::ArenaAllocator<> arena { persistent_bytes + scratch_bytes + alignment };

auto state = pfir::init (n_channels,
n_taps,
factor,
max_block_size,
arena.allocate_bytes (persistent_bytes, alignment),
alignment);
pfir::load_coeffs (state, coeffs, n_taps);
auto* scratch_data = arena.allocate_bytes (scratch_bytes, alignment);

chowdsp::Buffer<float> ref_buffer_out { n_channels, n_samples * factor };
chowdsp::Buffer<float> test_buffer_out { n_channels, n_samples * factor };
for (int i = 0; i < 4; ++i)
{
chowdsp::ArenaAllocator<> arena { 1 << 14 };
chowdsp::FIRPolyphaseInterpolator<float, factor, n_taps> ref_filter;
ref_filter.prepare (n_channels, n_samples, coeffs, arena);
ref_filter.processBlock (buffer_in, ref_buffer_out);
}

chowdsp::Buffer<float> test_buffer_out { n_channels, n_samples * factor };
{
namespace pfir = chowdsp::polyphase_fir;
const auto alignment = use_avx ? 32 : 16;
const auto block_size_1 = n_samples / 2;
const auto block_size_2 = n_samples - block_size_1;
const auto max_block_size = std::max (block_size_1, block_size_2);

const auto persistent_bytes = pfir::persistent_bytes_required (n_channels, n_taps, factor, max_block_size, alignment);
const auto scratch_bytes = pfir::scratch_bytes_required (n_taps, factor, max_block_size, alignment);
chowdsp::ArenaAllocator<> arena { persistent_bytes + scratch_bytes + alignment };

auto state = pfir::init (n_channels,
n_taps,
factor,
max_block_size,
arena.allocate_bytes (persistent_bytes, alignment),
alignment);
pfir::load_coeffs (state, coeffs, n_taps);

auto* scratch_data = arena.allocate_bytes (scratch_bytes, alignment);
auto half_buffer_in = chowdsp::BufferView { buffer_in, 0, block_size_1 };
auto half_buffer_out = chowdsp::BufferView { test_buffer_out, 0, block_size_1 * factor };
pfir::process_interpolate (state,
Expand All @@ -89,13 +89,13 @@ static void test_interp (int n_channels, int n_samples, bool use_avx)
block_size_2,
scratch_data,
use_avx);
}

for (const auto [ch, ref_data, test_data] : chowdsp::buffer_iters::zip_channels (std::as_const (ref_buffer_out),
std::as_const (test_buffer_out)))
{
for (const auto [ref, test] : chowdsp::zip (ref_data, test_data))
REQUIRE (test == Catch::Approx { ref }.margin (1.0e-6));
for (const auto [ch, ref_data, test_data] : chowdsp::buffer_iters::zip_channels (std::as_const (ref_buffer_out),
std::as_const (test_buffer_out)))
{
for (const auto [ref, test] : chowdsp::zip (ref_data, test_data))
REQUIRE (test == Catch::Approx { ref }.margin (1.0e-6));
}
}
}

Expand All @@ -105,37 +105,37 @@ static void test_decim (int n_channels, int n_samples, bool use_avx)
chowdsp::Buffer<float> buffer_in { n_channels, n_samples * factor };
for (auto [ch, data] : chowdsp::buffer_iters::channels (buffer_in))
for (auto [n, x] : chowdsp::enumerate (data))
x = static_cast<float> (n + (size_t) ch);
x = static_cast<float> (n + (size_t) ch + 1);

chowdsp::ArenaAllocator<> ref_arena { 1 << 14 };
chowdsp::FIRPolyphaseDecimator<float, factor, n_taps> ref_filter;
ref_filter.prepare (n_channels, n_samples * factor, coeffs, ref_arena);

namespace pfir = chowdsp::polyphase_fir;
const auto alignment = use_avx ? 32 : 16;
const auto block_size_1 = n_samples / 2;
const auto block_size_2 = n_samples - block_size_1;
const auto max_block_size = std::max (block_size_1, block_size_2);

const auto persistent_bytes = pfir::persistent_bytes_required (n_channels, n_taps, factor, max_block_size, alignment);
const auto scratch_bytes = pfir::scratch_bytes_required (n_taps, factor, max_block_size, alignment);
chowdsp::ArenaAllocator<> arena { persistent_bytes + scratch_bytes + alignment };

auto state = pfir::init (n_channels,
n_taps,
factor,
max_block_size,
arena.allocate_bytes (persistent_bytes, alignment),
alignment);
pfir::load_coeffs (state, coeffs, n_taps);
auto* scratch_data = arena.allocate_bytes (scratch_bytes, alignment);

chowdsp::Buffer<float> ref_buffer_out { n_channels, n_samples };
chowdsp::Buffer<float> test_buffer_out { n_channels, n_samples };
for (int i = 0; i < 4; ++i)
{
chowdsp::ArenaAllocator<> arena { 1 << 14 };
chowdsp::FIRPolyphaseDecimator<float, factor, n_taps> ref_filter;
ref_filter.prepare (n_channels, n_samples * factor, coeffs, arena);
ref_filter.processBlock (buffer_in, ref_buffer_out);
}

chowdsp::Buffer<float> test_buffer_out { n_channels, n_samples };
{
namespace pfir = chowdsp::polyphase_fir;
const auto alignment = use_avx ? 32 : 16;
const auto block_size_1 = n_samples / 2;
const auto block_size_2 = n_samples - block_size_1;
const auto max_block_size = std::max (block_size_1, block_size_2);

const auto persistent_bytes = pfir::persistent_bytes_required (n_channels, n_taps, factor, max_block_size, alignment);
const auto scratch_bytes = pfir::scratch_bytes_required (n_taps, factor, max_block_size, alignment);
chowdsp::ArenaAllocator<> arena { persistent_bytes + scratch_bytes + alignment };

auto state = pfir::init (n_channels,
n_taps,
factor,
max_block_size,
arena.allocate_bytes (persistent_bytes, alignment),
alignment);
pfir::load_coeffs (state, coeffs, n_taps);

auto* scratch_data = arena.allocate_bytes (scratch_bytes, alignment);
auto half_buffer_in = chowdsp::BufferView { buffer_in, 0, block_size_1 * factor };
auto half_buffer_out = chowdsp::BufferView { test_buffer_out, 0, block_size_1 };
pfir::process_decimate (state,
Expand All @@ -155,13 +155,100 @@ static void test_decim (int n_channels, int n_samples, bool use_avx)
block_size_2 * factor,
scratch_data,
use_avx);

for (const auto [ch, ref_data, test_data] : chowdsp::buffer_iters::zip_channels (std::as_const (ref_buffer_out),
std::as_const (test_buffer_out)))
{
for (const auto [ref, test] : chowdsp::zip (ref_data, test_data))
REQUIRE (test == Catch::Approx { ref }.margin (1.0e-6));
}
}
}

template <int factor>
static void test_round_trip (int n_channels, int n_samples, bool use_avx)
{
chowdsp::Buffer<float> buffer_in { n_channels, n_samples };
for (auto [ch, data] : chowdsp::buffer_iters::channels (buffer_in))
for (auto [n, x] : chowdsp::enumerate (data))
x = static_cast<float> (n + (size_t) ch + 1);

chowdsp::ArenaAllocator<> ref_arena { 1 << 15 };
chowdsp::FIRPolyphaseInterpolator<float, factor, n_taps> ref_filter_interp;
ref_filter_interp.prepare (n_channels, n_samples, coeffs, ref_arena);
chowdsp::FIRPolyphaseDecimator<float, factor, n_taps> ref_filter_decim;
ref_filter_decim.prepare (n_channels, n_samples * factor, coeffs, ref_arena);

namespace pfir = chowdsp::polyphase_fir;
const auto alignment = use_avx ? 32 : 16;
const auto block_size_1 = n_samples / 2;
const auto block_size_2 = n_samples - block_size_1;
const auto max_block_size = std::max (block_size_1, block_size_2);

const auto persistent_bytes = pfir::persistent_bytes_required (n_channels, n_taps, factor, max_block_size, alignment);
const auto scratch_bytes = pfir::scratch_bytes_required (n_taps, factor, max_block_size, alignment);
chowdsp::ArenaAllocator<> arena { persistent_bytes + scratch_bytes + alignment };

auto state = pfir::init (n_channels,
n_taps,
factor,
max_block_size,
arena.allocate_bytes (persistent_bytes, alignment),
alignment);
pfir::load_coeffs (state, coeffs, n_taps);
auto* scratch_data = arena.allocate_bytes (scratch_bytes, alignment);

for (const auto [ch, ref_data, test_data] : chowdsp::buffer_iters::zip_channels (std::as_const (ref_buffer_out),
std::as_const (test_buffer_out)))
chowdsp::Buffer<float> ref_buffer_interp { n_channels, n_samples * factor };
chowdsp::Buffer<float> ref_buffer_out { n_channels, n_samples };
chowdsp::Buffer<float> test_buffer_interp { n_channels, n_samples * factor };
chowdsp::Buffer<float> test_buffer_out { n_channels, n_samples };
for (int i = 0; i < 4; ++i)
{
for (const auto [ref, test] : chowdsp::zip (ref_data, test_data))
REQUIRE (test == Catch::Approx { ref }.margin (1.0e-6));
ref_filter_interp.processBlock (buffer_in, ref_buffer_interp);
ref_filter_decim.processBlock (ref_buffer_interp, ref_buffer_out);

auto half_buffer_in = chowdsp::BufferView { buffer_in, 0, block_size_1 };
auto half_buffer_interp = chowdsp::BufferView { test_buffer_interp, 0, block_size_1 * factor };
auto half_buffer_out = chowdsp::BufferView { test_buffer_out, 0, block_size_1 };
pfir::process_interpolate (state,
half_buffer_in.getArrayOfReadPointers(),
half_buffer_interp.getArrayOfWritePointers(),
n_channels,
block_size_1,
scratch_data,
use_avx);
pfir::process_decimate (state,
half_buffer_interp.getArrayOfReadPointers(),
half_buffer_out.getArrayOfWritePointers(),
n_channels,
block_size_1 * factor,
scratch_data,
use_avx);

half_buffer_in = chowdsp::BufferView { buffer_in, block_size_1, block_size_2 };
half_buffer_interp = chowdsp::BufferView { test_buffer_interp, block_size_1 * factor, block_size_2 * factor };
half_buffer_out = chowdsp::BufferView { test_buffer_out, block_size_1, block_size_2 };
pfir::process_interpolate (state,
half_buffer_in.getArrayOfReadPointers(),
half_buffer_interp.getArrayOfWritePointers(),
n_channels,
block_size_2,
scratch_data,
use_avx);
pfir::process_decimate (state,
half_buffer_interp.getArrayOfReadPointers(),
half_buffer_out.getArrayOfWritePointers(),
n_channels,
block_size_2 * factor,
scratch_data,
use_avx);

for (const auto [ch, ref_data, test_data] : chowdsp::buffer_iters::zip_channels (std::as_const (ref_buffer_out),
std::as_const (test_buffer_out)))
{
for (const auto [ref, test] : chowdsp::zip (ref_data, test_data))
REQUIRE (test == Catch::Approx { ref }.margin (1.0e-6));
}
}
}

Expand Down Expand Up @@ -212,3 +299,27 @@ TEST_CASE ("Polyphase Decimation")
}
}
}

TEST_CASE ("Round-Trip Polyphase Interpolation/Decimation")
{
#if defined(__SSE2__) || defined(_M_AMD64) || defined(_M_X64)
const bool use_avx[] = { false, true };
#else
const bool use_avx[] = { false };
#endif
const int channels[] = { 1, 2 };
const int samples[] = { 16, 127 };

for (auto avx : use_avx)
{
for (auto n_channels : channels)
{
for (auto n_samples : samples)
{
test_round_trip<1> (n_channels, n_samples, avx);
test_round_trip<2> (n_channels, n_samples, avx);
test_round_trip<3> (n_channels, n_samples, avx);
}
}
}
}