Skip to content

Commit 21be874

Browse files
Implement processing with latency
1 parent 09c6105 commit 21be874

File tree

4 files changed

+164
-21
lines changed

4 files changed

+164
-21
lines changed

README.md

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@ convolution using [`chowdsp_fft`](https://github.com/Chowdhury-DSP/chowdsp_fft).
88

99
## TODO
1010
- Renaming state to specify uniform partitioning
11-
- Implement process w/ latency
1211

1312
## License
1413
`chowdsp_convolution` is licensed under the BSD 3-clause license. Enjoy!

chowdsp_convolution.cpp

Lines changed: 107 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -136,6 +136,7 @@ void process_samples (const Config* config,
136136
const auto segment_num_samples = config->fft_size;
137137
int num_samples_processed = 0;
138138
auto index_step = state->num_segments / ir->num_segments;
139+
const auto fft_inv_scale = 1.0f / static_cast<float> (config->fft_size);
139140

140141
while (num_samples_processed < num_samples)
141142
{
@@ -157,7 +158,6 @@ void process_samples (const Config* config,
157158
fft::FFT_FORWARD);
158159

159160
// Complex multiplication
160-
const auto fft_inv_scale = 1.0f / static_cast<float> (config->fft_size);
161161
if (input_data_was_empty)
162162
{
163163
memset (state->output_temp_data, 0, config->fft_size * sizeof (float));
@@ -243,4 +243,110 @@ void process_samples (const Config* config,
243243
num_samples_processed += samples_to_process;
244244
}
245245
}
246+
247+
void process_samples_with_latency (const Config* config,
248+
const IR_State* ir,
249+
Process_State* state,
250+
const float* input,
251+
float* output,
252+
int num_samples,
253+
float* fft_scratch)
254+
{
255+
const auto segment_num_samples = config->fft_size;
256+
int num_samples_processed = 0;
257+
auto index_step = state->num_segments / ir->num_segments;
258+
const auto fft_inv_scale = 1.0f / static_cast<float> (config->fft_size);
259+
260+
while (num_samples_processed < num_samples)
261+
{
262+
const auto samples_to_process = std::min (num_samples - num_samples_processed,
263+
config->block_size - state->input_data_pos);
264+
265+
memcpy (state->input_data + state->input_data_pos,
266+
input + num_samples_processed,
267+
samples_to_process * sizeof (float));
268+
269+
memcpy (output + num_samples_processed,
270+
state->output_data + state->input_data_pos,
271+
samples_to_process * sizeof (float));
272+
273+
num_samples_processed += samples_to_process;
274+
state->input_data_pos += samples_to_process;
275+
276+
if (state->input_data_pos == config->block_size)
277+
{
278+
// Copy input data in input segment
279+
auto* input_segment_data = state->segments + segment_num_samples * state->current_segment;
280+
memcpy (input_segment_data, state->input_data, config->fft_size * sizeof (float));
281+
282+
fft::fft_transform_unordered (config->fft,
283+
input_segment_data,
284+
input_segment_data,
285+
fft_scratch,
286+
fft::FFT_FORWARD);
287+
288+
// Complex multiplication
289+
memset (state->output_temp_data, 0, config->fft_size * sizeof (float));
290+
291+
auto index = state->current_segment;
292+
for (int seg_idx = 1; seg_idx < ir->num_segments; ++seg_idx)
293+
{
294+
index += index_step;
295+
if (index >= state->num_segments)
296+
index -= state->num_segments;
297+
298+
const auto* input_segment = state->segments + segment_num_samples * index;
299+
const auto* ir_segment = ir->segments + segment_num_samples * seg_idx;
300+
fft::fft_convolve_unordered (config->fft,
301+
input_segment,
302+
ir_segment,
303+
state->output_temp_data,
304+
fft_inv_scale);
305+
}
306+
307+
memcpy (state->output_data, state->output_temp_data, config->fft_size * sizeof (float));
308+
309+
fft::fft_convolve_unordered (config->fft,
310+
input_segment_data,
311+
ir->segments,
312+
state->output_data,
313+
fft_inv_scale);
314+
fft::fft_transform_unordered (config->fft,
315+
state->output_data,
316+
state->output_data,
317+
fft_scratch,
318+
fft::FFT_BACKWARD);
319+
320+
// Add overlap
321+
fft::fft_accumulate (config->fft,
322+
state->overlap_data,
323+
state->output_data,
324+
state->output_data,
325+
config->block_size);
326+
327+
// Input buffer is empty again now
328+
memset (state->input_data, 0, config->fft_size * sizeof (float));
329+
330+
// Extra step for segSize > blockSize
331+
const auto extra_block_samples = config->fft_size - 2 * config->block_size;
332+
if (extra_block_samples > 0)
333+
{
334+
fft::fft_accumulate (config->fft,
335+
state->overlap_data + config->block_size,
336+
state->output_data + config->block_size,
337+
state->output_data + config->block_size,
338+
extra_block_samples);
339+
}
340+
341+
// Save the overlap
342+
memcpy (state->overlap_data,
343+
state->output_data + config->block_size,
344+
(config->fft_size - config->block_size) * sizeof (float));
345+
346+
state->current_segment = (state->current_segment > 0) ? (state->current_segment - 1) : (state->num_segments - 1);
347+
348+
state->input_data_pos = 0;
349+
}
350+
}
351+
}
246352
} // namespace chowdsp::convolution

chowdsp_convolution.h

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,5 +64,20 @@ void process_samples (const Config*,
6464
int N,
6565
float* fft_scratch);
6666

67+
/**
68+
* Similar to process_samples(), but with an added
69+
* config->block_size samples of latency. In exchange,
70+
* the convolution processing will be a little bit
71+
* faster, especially when processing with odd block
72+
* sizes.
73+
*/
74+
void process_samples_with_latency (const Config*,
75+
const IR_State*,
76+
Process_State*,
77+
const float* in,
78+
float* out,
79+
int N,
80+
float* fft_scratch);
81+
6782
// @TODO: process_samples_with_latency
6883
} // namespace chowdsp::convolution

test/chowdsp_convolution_test.cpp

Lines changed: 42 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -323,10 +323,11 @@ std::vector<float> generate (size_t N, std::mt19937& rng)
323323
return data;
324324
}
325325

326-
static bool test_convolution (int ir_length_samples, int block_size, int num_blocks)
326+
static bool test_convolution (int ir_length_samples, int block_size, int num_blocks, bool latency)
327327
{
328328
std::cout << "Running test with IR length: " << ir_length_samples
329-
<< " and block size: " << block_size << '\n';
329+
<< ", block size: " << block_size
330+
<< ", latency: " << (latency ? "ON" : "OFF") << '\n';
330331

331332
std::mt19937 rng { 0x12345 };
332333
auto ir = generate (ir_length_samples, rng);
@@ -339,7 +340,10 @@ static bool test_convolution (int ir_length_samples, int block_size, int num_blo
339340
{
340341
const auto* block_in = input.data() + (i * block_size);
341342
auto* block_out_ref = ref_output.data() + (i * block_size);
342-
reference_engine.processSamples (block_in, block_out_ref, block_size);
343+
if (latency)
344+
reference_engine.processSamplesWithAddedLatency (block_in, block_out_ref, block_size);
345+
else
346+
reference_engine.processSamples (block_in, block_out_ref, block_size);
343347
}
344348
auto duration = std::chrono::high_resolution_clock::now() - start;
345349
auto ref_duration_seconds = std::chrono::duration<float> (duration).count();
@@ -365,13 +369,27 @@ static bool test_convolution (int ir_length_samples, int block_size, int num_blo
365369
{
366370
const auto* block_in = input.data() + (i * block_size);
367371
auto* block_out_test = test_output.data() + (i * block_size);
368-
chowdsp::convolution::process_samples (&conv_config,
369-
&ir_state,
370-
&conv_state,
371-
block_in,
372-
block_out_test,
373-
block_size,
374-
fft_scratch);
372+
if (latency)
373+
{
374+
chowdsp::convolution::process_samples_with_latency (
375+
&conv_config,
376+
&ir_state,
377+
&conv_state,
378+
block_in,
379+
block_out_test,
380+
block_size,
381+
fft_scratch);
382+
}
383+
else
384+
{
385+
chowdsp::convolution::process_samples (&conv_config,
386+
&ir_state,
387+
&conv_state,
388+
block_in,
389+
block_out_test,
390+
block_size,
391+
fft_scratch);
392+
}
375393
}
376394
duration = std::chrono::high_resolution_clock::now() - start;
377395
auto test_duration_seconds = std::chrono::duration<float> (duration).count();
@@ -403,17 +421,22 @@ static bool test_convolution (int ir_length_samples, int block_size, int num_blo
403421
int main()
404422
{
405423
auto success = true;
406-
success &= test_convolution (6000, 2048, 4);
407-
success &= test_convolution (6000, 512, 20);
408-
success &= test_convolution (6000, 511, 20);
409-
success &= test_convolution (6000, 32, 400);
410-
success &= test_convolution (100, 2048, 2);
411-
success &= test_convolution (100, 512, 4);
412-
success &= test_convolution (100, 511, 4);
413-
success &= test_convolution (100, 32, 10);
424+
for (bool latency : { false, true })
425+
{
426+
success &= test_convolution (6000, 2048, 4, latency);
427+
success &= test_convolution (6000, 512, 20, latency);
428+
success &= test_convolution (6000, 511, 20, latency);
429+
success &= test_convolution (6000, 32, 400, latency);
430+
success &= test_convolution (100, 2048, 2, latency);
431+
success &= test_convolution (100, 512, 4, latency);
432+
success &= test_convolution (100, 511, 4, latency);
433+
success &= test_convolution (100, 32, 10, latency);
434+
}
414435

415436
#if BUILD_RELEASE
416-
success &= test_convolution (48'000, 512, 10'000);
437+
std::cout << "Speed comparisons:\n";
438+
success &= test_convolution (48'000, 512, 10'000, false);
439+
success &= test_convolution (48'000, 512, 10'000, true);
417440
#endif
418441

419442
return success ? 0 : 1;

0 commit comments

Comments
 (0)