2024-05-28 10:12 PM
I am making a function that convolves over a given audio file with an impulse filter to make stereo 3d sound (binaural audio).
The best state I've reached is a clear left channel, but some constant static in the right channel although the indexing for buffers of both channels were the same.
I went over to my computer, adapted the program and ran it with GCC on windows, work flawlessly, sound is clean. And when I adapt that working solution back onto the MCU, it is now just static noises. Guess it has something to do with memory and stacks, but not sure where to start. A faulty audio file sample has been attached, VOLUME WARNING!!!
So the main question is, why would a convolving solution work on Windows, but not on the MCU?
The convolution has been splitted into small blocks to limit memory usage.
Thanks in advance!
The code is below, disregard the file open, close operations, they are there.
Here is the most recent code that produces only static in both channels:
static float filter_l[256]; // given filters have 256 samples each channel
static float filter_r[256];
f_read(&f_file, filter_l, sizeof(float)*256, &byteCheck);
f_read(&f_file, filter_r, sizeof(float)*256, &byteCheck);
f_close(&f_file);
// setup for audio file comprehension and format
static TinyWav tw; // address to store read audio file
FIL tw_fil;
tw.fp = &tw_fil;
uint32_t sample_rate;
// load audio file
f_res = tinywav_open_read(&tw, audio_file, TW_INLINE);
// get # of elements of data block per channel
uint32_t data_size = tw.h.Subchunk2Size / (sizeof(float));
// 8 = 4 * 2 (4 bytes/float * 2 channels
sample_rate = tw.h.SampleRate; // get audio's sample rate
uint32_t data_left = data_size;
uint32_t iteration = (uint32_t)ceil((float)data_size/CONVOLVE_BLOCK_SIZE);
// prepare output file
static TinyWav tw_out;
FIL tw_out_fil;
tw_out.fp = &tw_out_fil;
tinywav_open_write(&tw_out,
NUM_CHANNELS,
sample_rate,
TW_FLOAT32, // the output samples will be 32-bit floats. TW_INT16 is also supported
TW_SPLIT, // the samples to be written will be provided by an array of pointer
// that points to different sub-arrays: [[L,L,L,L], [R,R,R,R]]
output_path // the output path
);
static float cache_l[FILTER_SIZE - 1] = {0};
static float cache_r[FILTER_SIZE - 1] = {0};
static float sample_l[CONVOLVE_BLOCK_SIZE + FILTER_SIZE - 1] = {0};
static float sample_r[CONVOLVE_BLOCK_SIZE + FILTER_SIZE - 1] = {0};
//static float sample_out_inline[CONVOLVE_BLOCK_SIZE * NUM_CHANNELS] = {0};
static float samples[2 * CONVOLVE_BLOCK_SIZE] = {0};
static float out_l[CONVOLVE_BLOCK_SIZE + FILTER_SIZE - 1] = {0};
static float out_r[CONVOLVE_BLOCK_SIZE + FILTER_SIZE - 1] = {0};
for (int i = 0; i < iteration; ++i) {
uint32_t input_seq_length = data_left < CONVOLVE_BLOCK_SIZE ? data_left : CONVOLVE_BLOCK_SIZE;
uint32_t sample_length;
if(i == 0) {
sample_length = input_seq_length;
} else {
sample_length = input_seq_length + FILTER_SIZE - 1;
}
if(i == 0) { // 1st iteration (edge case) as there are no data to prepend
tinywav_read_f(&tw, samples, input_seq_length);
// split inline sample array
copy_array_f(sample_l, samples, 0, 0, input_seq_length);
copy_array_f(sample_r, samples, 0, input_seq_length, input_seq_length);
// Cache the last n samples of the current block to pad to the next block
// so that convolution is continuous (where n = FILTER_SIZE - 1)
copy_array_f(cache_l, sample_l, 0, input_seq_length - FILTER_SIZE + 1, FILTER_SIZE - 1);
copy_array_f(cache_r, sample_r, 0, input_seq_length - FILTER_SIZE + 1, FILTER_SIZE - 1);
// Convolution: A:= filter, B:= input seq
// Convolution for Left channel
conv_32(filter_l, sample_l, out_l, 0, input_seq_length);
// Convolution for Right channel
conv_32(filter_r, sample_r, out_r, 0, input_seq_length);
float* out_split[2] = {out_l, out_r};
tinywav_write_f(&tw_out, out_split, input_seq_length);
} else { // i > 0 => prepend data first and then convolve so convolution is continuous
//sample_length = input_seq_length + FILTER_SIZE - 1;
// load cache, prepend tail of last block into current block
copy_array_f(sample_l, cache_l, 0, 0, FILTER_SIZE - 1);
copy_array_f(sample_r, cache_r, 0, 0, FILTER_SIZE - 1);
tinywav_read_f(&tw, samples, input_seq_length);
// split inline sample array
copy_array_f(sample_l, samples, FILTER_SIZE - 1, 0, input_seq_length);
copy_array_f(sample_r, samples, FILTER_SIZE - 1, input_seq_length, input_seq_length);
// Cache
copy_array_f(cache_l, sample_l, 0, sample_length - FILTER_SIZE + 1, FILTER_SIZE - 1);
copy_array_f(cache_r, sample_r, 0, sample_length - FILTER_SIZE + 1, FILTER_SIZE - 1);
// Convolution: A:= filter, B:= input seq
// Convolution for Left channel
conv_32(filter_l, sample_l, out_l, FILTER_SIZE - 1, input_seq_length);
// Convolution for Right channel
conv_32(filter_r, sample_r, out_r, FILTER_SIZE - 1, input_seq_length);
float* out_split[2] = {&out_l[FILTER_SIZE - 1], &out_r[FILTER_SIZE - 1]};
tinywav_write_f(&tw_out, out_split, input_seq_length);
}
data_left -= input_seq_length;
// print to console every 10 rounds or end of loop
if(i % 10 == 0 || i == iteration - 1) {
print_f("done convolution block: %d / %d (%i)\r\n", i, iteration - 1);
}
}
Here is another version of convolution code, more optimised, and only has static on the R channel:
// Prepare array to cache the tail of current samples to prepend to next convolving block
static float cache_last_samples[(NUM_CHANNELS * (FILTER_SIZE - 1))] = {0};
float* cache_ptrs[NUM_CHANNELS];
// For audio read
// samples are cached in TW_SPLIT format: [[L,L,L,L], [R,R,R,R]]
static float samples[(NUM_CHANNELS * CONVOLVE_BLOCK_SIZE) + ((FILTER_SIZE - 1) * NUM_CHANNELS)] = {0};
// create pointers for left and right channel in samples array
float* sample_ptrs[NUM_CHANNELS];
// For audio write
// array to store converted binaural sample
static float sample_out[(NUM_CHANNELS * CONVOLVE_BLOCK_SIZE) + ((FILTER_SIZE - 1) * NUM_CHANNELS)] = {0};
float* sample_out_ptrs[NUM_CHANNELS];
// sample pointers to offset for prepended cache when i > 0
static float* sample_ptrs_offset[NUM_CHANNELS];
float* sample_out_ptrs_offset[NUM_CHANNELS];
// generate pointers to different channel section for both read and write
for (int j = 0; j < NUM_CHANNELS; ++j) {
cache_ptrs[j] = cache_last_samples + j * (FILTER_SIZE - 1);
sample_ptrs[j] = samples + j * (CONVOLVE_BLOCK_SIZE + (FILTER_SIZE));
sample_out_ptrs[j] = sample_out + j * (CONVOLVE_BLOCK_SIZE + (FILTER_SIZE));
sample_ptrs_offset[j] = sample_ptrs[j] + (FILTER_SIZE - 1);
sample_out_ptrs_offset[j] = sample_out_ptrs[j] + (FILTER_SIZE - 1);
}
for (int i = 0; i < iteration; ++i) {
uint32_t input_seq_length = data_left < CONVOLVE_BLOCK_SIZE ? data_left : CONVOLVE_BLOCK_SIZE;
uint32_t sample_length = input_seq_length + FILTER_SIZE - 1;
if(i == 0) { // 1st iteration (edge case) as there are no data to prepend
tinywav_read_f(&tw, sample_ptrs, input_seq_length);
// Cache the last n samples of the current block to pad to the next block
// so that convolution is continuous (where n = FILTER_SIZE - 1)
copy_array_f(cache_ptrs[0], sample_ptrs[0], 0, input_seq_length - FILTER_SIZE + 1, FILTER_SIZE - 1);
copy_array_f(cache_ptrs[1], sample_ptrs[1], 0, input_seq_length - FILTER_SIZE + 1, FILTER_SIZE - 1);
// Convolution: A:= filter, B:= input seq
// Convolution for Left channel
arm_conv_partial_f32(filter_l, FILTER_SIZE, sample_ptrs[0], input_seq_length,
sample_out_ptrs[0], 0, input_seq_length);
// Convolution for Right channel
arm_conv_partial_f32(filter_r, FILTER_SIZE, sample_ptrs[1], input_seq_length,
sample_out_ptrs[1], 0, input_seq_length);
//copy_array_f(sample_out_ptrs[1], sample_ptrs[1], 0, 0, input_seq_length); // TEST
tinywav_write_f(&tw_out, sample_out_ptrs, input_seq_length);
} else { // i > 0 => prepend data first and then convolve so convolution is continuous
//sample_length = input_seq_length + FILTER_SIZE - 1;
// load cache, prepend tail of last block into current block
copy_array_f(sample_ptrs[0], cache_ptrs[0], 0, 0, FILTER_SIZE - 1);
copy_array_f(sample_ptrs[1], cache_ptrs[1], 0, 0, FILTER_SIZE - 1);
tinywav_read_f(&tw, sample_ptrs_offset, input_seq_length);
// Cache
copy_array_f(cache_ptrs[0], sample_ptrs_offset[0], 0, input_seq_length - FILTER_SIZE + 1, FILTER_SIZE - 1);
copy_array_f(cache_ptrs[1], sample_ptrs_offset[1], 0, input_seq_length - FILTER_SIZE + 1, FILTER_SIZE - 1);
// Convolution: A:= filter, B:= input seq
// Convolution for Left channel
arm_status as1 = arm_conv_partial_f32(filter_l, FILTER_SIZE, sample_ptrs[0], sample_length,
sample_out_ptrs[0], FILTER_SIZE - 1, input_seq_length);
// Convolution for Right channel
arm_status as2 = arm_conv_partial_f32(filter_r, FILTER_SIZE, sample_ptrs[1], sample_length,
sample_out_ptrs[1], FILTER_SIZE - 1, input_seq_length);
if (as1 != ARM_MATH_SUCCESS || as2 != ARM_MATH_SUCCESS) {
printf("arm_conv error (%i)\r\n", as1, as2);
}
//copy_array_f(sample_out_ptrs[1], sample_ptrs_offset[1], 256, 0, input_seq_length); // TEST
tinywav_write_f(&tw_out, sample_out_ptrs_offset, input_seq_length);
}
data_left -= input_seq_length;
// print to console every 10 rounds or end of loop
if(i % 10 == 0 || i == iteration - 1) {
printf("done convolution block: %d / %d (%i)\r\n", i, iteration - 1);
}
}
tinywav_close_write(&tw_out);
tinywav_close_read(&tw);