Audio computing on F411RE produces static noises, but not on Windows, same code

cloudyrowly · ‎2024-05-28

I am making a function that convolves over a given audio file with an impulse filter to make stereo 3d sound (binaural audio).

The best state I've reached is a clear left channel, but some constant static in the right channel although the indexing for buffers of both channels were the same.

I went over to my computer, adapted the program and ran it with GCC on windows, work flawlessly, sound is clean. And when I adapt that working solution back onto the MCU, it is now just static noises. Guess it has something to do with memory and stacks, but not sure where to start. A faulty audio file sample has been attached, VOLUME WARNING!!!

So the main question is, why would a convolving solution work on Windows, but not on the MCU?

The convolution has been splitted into small blocks to limit memory usage.

Thanks in advance!

The code is below, disregard the file open, close operations, they are there.

Here is the most recent code that produces only static in both channels:


static float filter_l[256];  // given filters have 256 samples each channel
	static float filter_r[256];
	f_read(&f_file, filter_l, sizeof(float)*256, &byteCheck);
	f_read(&f_file, filter_r, sizeof(float)*256, &byteCheck);
	f_close(&f_file);

	// setup for audio file comprehension and format
	static TinyWav tw; // address to store read audio file
	FIL tw_fil;
	tw.fp = &tw_fil;
	uint32_t sample_rate;

	// load audio file
	f_res = tinywav_open_read(&tw, audio_file, TW_INLINE);

	// get # of elements of data block per channel
	uint32_t data_size = tw.h.Subchunk2Size / (sizeof(float));
																				  // 8 = 4 * 2 (4 bytes/float * 2 channels
	sample_rate = tw.h.SampleRate;          // get audio's sample rate
	uint32_t data_left = data_size;
	uint32_t iteration = (uint32_t)ceil((float)data_size/CONVOLVE_BLOCK_SIZE);

	// prepare output file
	static TinyWav tw_out;
	FIL tw_out_fil;
	tw_out.fp = &tw_out_fil;
	tinywav_open_write(&tw_out,
	    NUM_CHANNELS,
	    sample_rate,
	    TW_FLOAT32, // the output samples will be 32-bit floats. TW_INT16 is also supported
	    TW_SPLIT,   // the samples to be written will be provided by an array of pointer
								  // that points to different sub-arrays: [[L,L,L,L], [R,R,R,R]]
	    output_path // the output path
	);

	static float cache_l[FILTER_SIZE - 1] = {0};
	static float cache_r[FILTER_SIZE - 1] = {0};

	static float sample_l[CONVOLVE_BLOCK_SIZE + FILTER_SIZE - 1] = {0};
	static float sample_r[CONVOLVE_BLOCK_SIZE + FILTER_SIZE - 1] = {0};

	//static float sample_out_inline[CONVOLVE_BLOCK_SIZE * NUM_CHANNELS] = {0};

	static float samples[2 * CONVOLVE_BLOCK_SIZE] = {0};
	static float out_l[CONVOLVE_BLOCK_SIZE + FILTER_SIZE - 1] = {0};
	static float out_r[CONVOLVE_BLOCK_SIZE + FILTER_SIZE - 1] = {0};

	for (int i = 0; i < iteration; ++i) {
		uint32_t input_seq_length = data_left < CONVOLVE_BLOCK_SIZE ? data_left : CONVOLVE_BLOCK_SIZE;
		uint32_t sample_length;
		if(i == 0) {
			sample_length = input_seq_length;
		} else {
			sample_length = input_seq_length + FILTER_SIZE - 1;
		}

		if(i == 0) {  // 1st iteration (edge case) as there are no data to prepend
			tinywav_read_f(&tw, samples, input_seq_length);

			// split inline sample array
			copy_array_f(sample_l, samples, 0, 0, input_seq_length);
			copy_array_f(sample_r, samples, 0, input_seq_length, input_seq_length);

			// Cache the last n samples of the current block to pad to the next block
			// so that convolution is continuous (where n = FILTER_SIZE - 1)
			copy_array_f(cache_l, sample_l, 0, input_seq_length - FILTER_SIZE + 1, FILTER_SIZE - 1);
			copy_array_f(cache_r, sample_r, 0, input_seq_length - FILTER_SIZE + 1, FILTER_SIZE - 1);
			// Convolution: A:= filter, B:= input seq
			// Convolution for Left channel
			conv_32(filter_l, sample_l, out_l, 0, input_seq_length);
			// Convolution for Right channel
			conv_32(filter_r, sample_r, out_r, 0, input_seq_length);

			float* out_split[2] = {out_l, out_r};
			
			tinywav_write_f(&tw_out, out_split, input_seq_length);

		} else {  // i > 0 => prepend data first and then convolve so convolution is continuous
			//sample_length = input_seq_length + FILTER_SIZE - 1;
			// load cache, prepend tail of last block into current block
			copy_array_f(sample_l, cache_l, 0, 0, FILTER_SIZE - 1);
			copy_array_f(sample_r, cache_r, 0, 0, FILTER_SIZE - 1);

			tinywav_read_f(&tw, samples, input_seq_length);

			// split inline sample array
			copy_array_f(sample_l, samples, FILTER_SIZE - 1, 0, input_seq_length);
			copy_array_f(sample_r, samples, FILTER_SIZE - 1, input_seq_length, input_seq_length);

			// Cache
			copy_array_f(cache_l, sample_l, 0, sample_length - FILTER_SIZE + 1, FILTER_SIZE - 1);
			copy_array_f(cache_r, sample_r, 0, sample_length - FILTER_SIZE + 1, FILTER_SIZE - 1);

			// Convolution: A:= filter, B:= input seq
			// Convolution for Left channel
			conv_32(filter_l, sample_l, out_l, FILTER_SIZE - 1, input_seq_length);
			// Convolution for Right channel
                conv_32(filter_r, sample_r, out_r, FILTER_SIZE - 1, input_seq_length);
			
			float* out_split[2] = {&out_l[FILTER_SIZE - 1], &out_r[FILTER_SIZE - 1]};
			tinywav_write_f(&tw_out, out_split, input_seq_length);
		}

		data_left -= input_seq_length;
		// print to console every 10 rounds or end of loop
		if(i % 10 == 0 || i == iteration - 1) {
			print_f("done convolution block: %d / %d (%i)\r\n", i, iteration - 1);
		}
	}

Here is another version of convolution code, more optimised, and only has static on the R channel:


// Prepare array to cache the tail of current samples to prepend to next convolving block
	static float cache_last_samples[(NUM_CHANNELS * (FILTER_SIZE - 1))] = {0};
	float* cache_ptrs[NUM_CHANNELS];
  
	// For audio read
	// samples are cached in TW_SPLIT format: [[L,L,L,L], [R,R,R,R]]
	static float samples[(NUM_CHANNELS * CONVOLVE_BLOCK_SIZE) + ((FILTER_SIZE - 1) * NUM_CHANNELS)] = {0};
	// create pointers for left and right channel in samples array
	float* sample_ptrs[NUM_CHANNELS];
  
	// For audio write
	// array to store converted binaural sample
	static float sample_out[(NUM_CHANNELS * CONVOLVE_BLOCK_SIZE) + ((FILTER_SIZE - 1) * NUM_CHANNELS)] = {0};
	float* sample_out_ptrs[NUM_CHANNELS];
  
	// sample pointers to offset for prepended cache when i > 0
	static float* sample_ptrs_offset[NUM_CHANNELS];
	float* sample_out_ptrs_offset[NUM_CHANNELS];
  
	// generate pointers to different channel section for both read and write
	for (int j = 0; j < NUM_CHANNELS; ++j) {
		cache_ptrs[j] = cache_last_samples + j * (FILTER_SIZE - 1);
		sample_ptrs[j] = samples + j * (CONVOLVE_BLOCK_SIZE + (FILTER_SIZE));
		sample_out_ptrs[j] = sample_out + j * (CONVOLVE_BLOCK_SIZE + (FILTER_SIZE));
		sample_ptrs_offset[j] = sample_ptrs[j] + (FILTER_SIZE - 1);
		sample_out_ptrs_offset[j] = sample_out_ptrs[j] + (FILTER_SIZE - 1);
	}
  
	for (int i = 0; i < iteration; ++i) {
		uint32_t input_seq_length = data_left < CONVOLVE_BLOCK_SIZE ? data_left : CONVOLVE_BLOCK_SIZE;
	  uint32_t sample_length = input_seq_length + FILTER_SIZE - 1;
  
	  if(i == 0) {  // 1st iteration (edge case) as there are no data to prepend
			tinywav_read_f(&tw, sample_ptrs, input_seq_length);
  
			// Cache the last n samples of the current block to pad to the next block
			// so that convolution is continuous (where n = FILTER_SIZE - 1)
			copy_array_f(cache_ptrs[0], sample_ptrs[0], 0, input_seq_length - FILTER_SIZE + 1, FILTER_SIZE - 1);
			copy_array_f(cache_ptrs[1], sample_ptrs[1], 0, input_seq_length - FILTER_SIZE + 1, FILTER_SIZE - 1);
			// Convolution: A:= filter, B:= input seq
			// Convolution for Left channel
			arm_conv_partial_f32(filter_l, FILTER_SIZE, sample_ptrs[0], input_seq_length,
													 sample_out_ptrs[0], 0, input_seq_length);
			// Convolution for Right channel
			arm_conv_partial_f32(filter_r, FILTER_SIZE, sample_ptrs[1], input_seq_length,
													 sample_out_ptrs[1], 0, input_seq_length);
  
			//copy_array_f(sample_out_ptrs[1], sample_ptrs[1], 0, 0, input_seq_length);  // TEST
			tinywav_write_f(&tw_out, sample_out_ptrs, input_seq_length);
  
		} else {  // i > 0 => prepend data first and then convolve so convolution is continuous
			//sample_length = input_seq_length + FILTER_SIZE - 1;
			// load cache, prepend tail of last block into current block
			copy_array_f(sample_ptrs[0], cache_ptrs[0], 0, 0, FILTER_SIZE - 1);
			copy_array_f(sample_ptrs[1], cache_ptrs[1], 0, 0, FILTER_SIZE - 1);
  
			tinywav_read_f(&tw, sample_ptrs_offset, input_seq_length);
  
			// Cache
			copy_array_f(cache_ptrs[0], sample_ptrs_offset[0], 0, input_seq_length - FILTER_SIZE + 1, FILTER_SIZE - 1);
			copy_array_f(cache_ptrs[1], sample_ptrs_offset[1], 0, input_seq_length - FILTER_SIZE + 1, FILTER_SIZE - 1);
  
			// Convolution: A:= filter, B:= input seq
			// Convolution for Left channel
			arm_status as1 = arm_conv_partial_f32(filter_l, FILTER_SIZE, sample_ptrs[0], sample_length,
													 sample_out_ptrs[0], FILTER_SIZE - 1, input_seq_length);
			// Convolution for Right channel
		  arm_status as2 = arm_conv_partial_f32(filter_r, FILTER_SIZE, sample_ptrs[1], sample_length,
													 sample_out_ptrs[1], FILTER_SIZE - 1, input_seq_length);
  
		  if (as1 != ARM_MATH_SUCCESS || as2 != ARM_MATH_SUCCESS) {
		  	printf("arm_conv error (%i)\r\n", as1, as2);
		  }
			//copy_array_f(sample_out_ptrs[1], sample_ptrs_offset[1], 256, 0, input_seq_length);  // TEST
			tinywav_write_f(&tw_out, sample_out_ptrs_offset, input_seq_length);
			}
  
		data_left -= input_seq_length;
		// print to console every 10 rounds or end of loop
		if(i % 10 == 0 || i == iteration - 1) {
			printf("done convolution block: %d / %d (%i)\r\n", i, iteration - 1);
		}
	}

	tinywav_close_write(&tw_out);
	tinywav_close_read(&tw);