HAL SAI cannot run quick enough for basic audio sampling?

I am trying to take a microphone sample every 62.5 microseconds (16,000Hz) which is the standard rate for audio files.

The problem i have found is that HAL_SAI_Receive_DMA() returns HAL_BUSY for some samples. So we dont receive the required 16,000 samples in the 1 second, it only manages 14750.

The SAI component can reach rates of 192KHz so it makes no sense that it cannot handle 16KHz? So how do i fix the problem of receiving HAL_BUSY?


Here is an example sketch running on an STML452RE:

#include <SoftwareSerial.h>
#include <Arduino.h>
#include <HardwareTimer.h>
#include "stm32l4xx.h"
#include "stm32l4xx_hal.h"
#include "stm32l4xx_hal_def.h"
#include "stm32l4xx_hal_gpio.h"
#include "stm32l4xx_hal_sai.h"
#include "stm32l4xx_hal_exti.h"
#include "stm32l4xx_hal_cortex.h"
#include "Sysclock_Config.h"
GPIO_InitTypeDef gpioInit;
SAI_HandleTypeDef saiHandle;
RCC_PeriphCLKInitTypeDef periphInit;
DMA_HandleTypeDef hdmaInit;
uint8_t	SAIDataBuffer[8] = { 0 };
HardwareTimer hardwareTimer(TIM3);
void assert_failed(uint8_t* inFileName, uint32_t line)
	char* fileNameAsString = (char*)inFileName;
	Serial2.println(F("*** [HAL ASSERT FAILED] ***"));
	Serial2.printf("%s\r\n", (char*)inFileName);
	Serial2.printf("%i\r\n", line);
void _Error_Handler(const char* file, int line)
	Serial2.println(F("*** [ERROR HANDLED] ***"));
	Serial2.printf("%s\r\n", file);
	Serial2.printf("%i\r\n", line);
extern "C"
	void HAL_SAI_TxHalfCpltCallback(SAI_HandleTypeDef* hsai)
	void HAL_SAI_TxCpltCallback(SAI_HandleTypeDef* hsai)
	void HAL_SAI_RxHalfCpltCallback(SAI_HandleTypeDef* hsai)
	void HAL_SAI_RxCpltCallback(SAI_HandleTypeDef* hsai)
	void HAL_SAI_ErrorCallback(SAI_HandleTypeDef* hsai)
	__weak void SAI1_IRQHandler(void)
	__weak void DMA2_Channel1_IRQHandler(void)
		/* USER CODE BEGIN DMA2_Channel1_IRQn 0 */
		/* USER CODE END DMA2_Channel1_IRQn 0 */
		/* USER CODE BEGIN DMA2_Channel1_IRQn 1 */
		/* USER CODE END DMA2_Channel1_IRQn 1 */
void samplerCallback();
// the setup function runs once when you press reset or power the board
void setup()
	gpioInit.Pin = GPIO_PIN_15 | GPIO_PIN_10 | GPIO_PIN_12;
	gpioInit.Mode = GPIO_MODE_AF_PP;
	gpioInit.Pull = GPIO_NOPULL;
	gpioInit.Alternate = GPIO_AF13_SAI1;
	HAL_GPIO_Init(GPIOB, &gpioInit);
	pinMode(PB1, OUTPUT);
	digitalWrite(PB1, LOW);
	saiHandle.Instance = SAI1_Block_A; // audio block A.
	saiHandle.Init.Protocol = SAI_FREE_PROTOCOL;
	saiHandle.Init.FirstBit = SAI_FIRSTBIT_MSB;
	saiHandle.Init.AudioFrequency = SAI_AUDIO_FREQUENCY_16K;
	saiHandle.Init.AudioMode = SAI_MODEMASTER_RX; // block a must provide clock signals and receive from the data line.
	saiHandle.Init.Synchro = SAI_ASYNCHRONOUS;	// we only want to use this one audio block.
	saiHandle.Init.SynchroExt = SAI_SYNCEXT_DISABLE; // disable sychronizing the 2 audio blocks.
	saiHandle.Init.OutputDrive = SAI_OUTPUTDRIVE_DISABLE; // assume to power the data?
	saiHandle.Init.NoDivider = SAI_MASTERDIVIDER_ENABLE; // any frame length allowed.
	saiHandle.Init.FIFOThreshold = SAI_FIFOTHRESHOLD_EMPTY;	// used for interrupts.
	saiHandle.Init.MonoStereoMode = SAI_STEREOMODE; // mono mode only available in transmission mode.
	saiHandle.Init.CompandingMode = SAI_NOCOMPANDING; // telecommunications specification (not needed)
	saiHandle.Init.TriState = SAI_OUTPUT_NOTRELEASED; // assume the SAI is ma
	saiHandle.FrameInit.FrameLength = 64; // 64 bit frame. (2 slots)
	saiHandle.FrameInit.ActiveFrameLength = 32; // Frame synchronization active level length. (half the frame length)
	saiHandle.FrameInit.FSDefinition = SAI_FS_CHANNEL_IDENTIFICATION;
	saiHandle.FrameInit.FSPolarity = SAI_FS_ACTIVE_LOW;
	saiHandle.FrameInit.FSOffset = SAI_FS_BEFOREFIRSTBIT;
	saiHandle.SlotInit.FirstBitOffset = 0; // no offset in receive mode -> FBOFF <= (SLOTSZ - DS)
	saiHandle.SlotInit.SlotSize = SAI_SLOTSIZE_32B; // 32 bits per slot to contain the 32 data bits (24 data, 8 zeroed)
	saiHandle.SlotInit.SlotNumber = 2;
	saiHandle.SlotInit.SlotActive = SAI_SLOTACTIVE_ALL;
	hdmaInit.Instance = DMA2_Channel1;
	hdmaInit.Init.Request = DMA_REQUEST_1;
	hdmaInit.Init.Direction = DMA_PERIPH_TO_MEMORY;
	hdmaInit.Init.PeriphInc = DMA_PINC_DISABLE;
	hdmaInit.Init.MemInc = DMA_MINC_ENABLE;
	hdmaInit.Init.PeriphDataAlignment = DMA_PDATAALIGN_WORD;
	hdmaInit.Init.MemDataAlignment = DMA_MDATAALIGN_WORD;
	hdmaInit.Init.Mode = DMA_NORMAL;
	hdmaInit.Init.Priority = DMA_PRIORITY_VERY_HIGH;
	if (HAL_DMA_Init(&hdmaInit) != HAL_OK)
	/* Several peripheral DMA handle pointers point to the same DMA handle.
	 Be aware that there is only one channel to perform all the requested DMAs. */
	__HAL_LINKDMA(&saiHandle, hdmarx, hdmaInit);
	__HAL_LINKDMA(&saiHandle, hdmatx, hdmaInit);
	HAL_NVIC_SetPriority(DMA2_Channel1_IRQn, 0, 0);
	HAL_NVIC_SetPriority(SAI1_IRQn, 0, 0);
	HAL_StatusTypeDef saiStatus = HAL_SAI_InitProtocol(&saiHandle,
													   SAI_I2S_STANDARD, // runs the SAI_InitI2S() function.
													   SAI_PROTOCOL_DATASIZE_24BIT,// 24 bits (24 bit is standard for I2S).
													   2);			 // number of slots per frame - 1
	if (saiStatus != HAL_OK)
		Serial2.println("SAI ERROR");
		while (1) {}
		Serial2.println("Sai init ok.");
	hardwareTimer.setMode(1, TIMER_OUTPUT_COMPARE, NC); // remove this for new version of STM32Dino, required version 1.8.0.
	hardwareTimer.setOverflow(16000, HERTZ_FORMAT);
	start = millis();
int32_t GetSample()
	uint8_t a1 = SAIDataBuffer[0];
	uint8_t b1 = SAIDataBuffer[1];
	uint8_t c1 = SAIDataBuffer[2];
	uint8_t d1 = SAIDataBuffer[3];
	uint8_t a2 = SAIDataBuffer[4];
	uint8_t b2 = SAIDataBuffer[5];
	uint8_t c2 = SAIDataBuffer[6];
	uint8_t d2 = SAIDataBuffer[7];
	// value as signed 24 bit integer
	int32_t slot24sb = 0;
	slot24sb = slot24sb | (a1 << 8);
	slot24sb = slot24sb | (b1 << 16);
	slot24sb = slot24sb | (c1 << 24);
	slot24sb = slot24sb >> 8;
	return slot24sb;
int num_samples = 0;
void samplerCallback(void)
	HAL_StatusTypeDef rxResponse;
	rxResponse = HAL_SAI_Receive_DMA(&saiHandle, SAIDataBuffer, 2U);
	int sample = GetSample();
	if (rxResponse != HAL_OK)
		//Serial2.println("Error in SAI");
// the loop function runs over and over again until power down or reset
void loop()
	if (millis() - start > 1000)
		while (1) {}

The issue was that the Master Clock Divider was not configured correctly and that samples need to be handled in the "Transfer Complete" callback. Thank you @Community member​ and @Community member​