2025-10-21 6:38 AM
I'm experiencing a configuration incompatibility between SAI1 and GPDMA1 on STM32N657 when using FreeRTOS with D-Cache enabled. The SAI/GPDMA configuration works individually but fails when used together, specifically related to D-Cache coherency issues.
Hardware Configuration
These are the SAI and GPDMA configurations:
static void MX_SAI1_Init(void)
{
hsai_BlockB1.Instance = SAI1_Block_B;
hsai_BlockB1.Init.AudioMode = SAI_MODEMASTER_RX;
hsai_BlockB1.Init.Synchro = SAI_ASYNCHRONOUS;
hsai_BlockB1.Init.OutputDrive = SAI_OUTPUTDRIVE_DISABLE;
hsai_BlockB1.Init.NoDivider = SAI_MASTERDIVIDER_DISABLE;
hsai_BlockB1.Init.FIFOThreshold = SAI_FIFOTHRESHOLD_EMPTY;
hsai_BlockB1.Init.SynchroExt = SAI_SYNCEXT_DISABLE;
hsai_BlockB1.Init.MckOutput = SAI_MCK_OUTPUT_DISABLE;
hsai_BlockB1.Init.MonoStereoMode = SAI_STEREOMODE;
hsai_BlockB1.Init.CompandingMode = SAI_NOCOMPANDING;
hsai_BlockB1.Init.AudioFrequency = SAI_AUDIO_FREQUENCY_16K;
hsai_BlockB1.Init.Mckdiv = 2;
hsai_BlockB1.Init.DataSize = SAI_DATASIZE_24;
hsai_BlockB1.FrameInit.FrameLength = 64;
hsai_BlockB1.FrameInit.ActiveFrameLength = 32;
hsai_BlockB1.FrameInit.FSDefinition = SAI_FS_CHANNEL_IDENTIFICATION;
hsai_BlockB1.FrameInit.FSPolarity = SAI_FS_ACTIVE_LOW;
hsai_BlockB1.FrameInit.FSOffset = SAI_FS_FIRSTBIT;
hsai_BlockB1.SlotInit.FirstBitOffset = 0;
hsai_BlockB1.SlotInit.SlotSize = SAI_SLOTSIZE_32B;
hsai_BlockB1.SlotInit.SlotNumber = 2;
hsai_BlockB1.SlotInit.SlotActive = SAI_SLOTACTIVE_0 | SAI_SLOTACTIVE_1;
if (HAL_SAI_Init(&hsai_BlockB1) != HAL_OK)
{
Error_Handler();
}
}
static void MX_GPDMA1_Init(void)
{
__HAL_RCC_GPDMA1_CLK_ENABLE();
HAL_NVIC_SetPriority(GPDMA1_Channel0_IRQn, 15, 0);
HAL_NVIC_EnableIRQ(GPDMA1_Channel0_IRQn);
}
static void MX_GPIO_Init(void)
{
GPIO_InitTypeDef GPIO_InitStruct = {0};
__HAL_RCC_GPIOC_CLK_ENABLE();
__HAL_RCC_GPIOE_CLK_ENABLE();
__HAL_RCC_GPIOH_CLK_ENABLE();
__HAL_RCC_GPIOB_CLK_ENABLE();
__HAL_RCC_GPIOA_CLK_ENABLE();
__HAL_RCC_GPIOG_CLK_ENABLE();
// LED GPIOs
/* Configure default pin levels (LEDs OFF)*/
HAL_GPIO_WritePin(GPIOG, LED1_PIN | LED2_PIN | LED3_PIN, GPIO_PIN_SET);
/* Configure the LED pins */
GPIO_InitStruct.Pin = LED1_PIN | LED2_PIN | LED3_PIN;
GPIO_InitStruct.Mode = GPIO_MODE_OUTPUT_PP;
GPIO_InitStruct.Pull = GPIO_NOPULL;
GPIO_InitStruct.Speed = GPIO_SPEED_FREQ_VERY_HIGH;
HAL_GPIO_Init(GPIOG, &GPIO_InitStruct);
// SPI1 CS GPIO
/*Configure GPIO pin Output Level */
HAL_GPIO_WritePin(CS_GPIO_Port, CS_Pin, GPIO_PIN_SET);
/*Configure GPIO pin : CS_Pin */
GPIO_InitStruct.Pin = CS_Pin;
GPIO_InitStruct.Mode = GPIO_MODE_OUTPUT_PP;
GPIO_InitStruct.Pull = GPIO_NOPULL;
GPIO_InitStruct.Speed = GPIO_SPEED_FREQ_HIGH;
HAL_GPIO_Init(CS_GPIO_Port, &GPIO_InitStruct);
}
void HAL_SAI_MspInit(SAI_HandleTypeDef* hsai)
{
GPIO_InitTypeDef GPIO_InitStruct;
DMA_NodeConfTypeDef NodeConfig;
RCC_PeriphCLKInitTypeDef PeriphClkInitStruct = {0};
/* SAI1 */
if(hsai->Instance==SAI1_Block_B)
{
// Initializes the peripherals clock fpr SAI1
PeriphClkInitStruct.PeriphClockSelection = RCC_PERIPHCLK_SAI1;
PeriphClkInitStruct.Sai1ClockSelection = RCC_SAI1CLKSOURCE_IC7;
PeriphClkInitStruct.ICSelection[RCC_IC7].ClockSelection = RCC_ICCLKSOURCE_PLL4;
PeriphClkInitStruct.ICSelection[RCC_IC7].ClockDivider = 250;
if (HAL_RCCEx_PeriphCLKConfig(&PeriphClkInitStruct) != HAL_OK)
{
Error_Handler();
}
__HAL_RCC_SAI1_CLK_ENABLE();
__HAL_RCC_GPIOG_CLK_ENABLE();
__HAL_RCC_GPIOA_CLK_ENABLE();
/**SAI1_B_Block_B GPIO Configuration
PG1 ------> SAI1_SCK_B
PA3 ------> SAI1_SD_B
PG2 ------> SAI1_FS_B
*/
GPIO_InitStruct.Pin = GPIO_PIN_1|GPIO_PIN_2;
GPIO_InitStruct.Mode = GPIO_MODE_AF_PP;
GPIO_InitStruct.Pull = GPIO_NOPULL;
GPIO_InitStruct.Speed = GPIO_SPEED_FREQ_HIGH;
GPIO_InitStruct.Alternate = GPIO_AF6_SAI1;
HAL_GPIO_Init(GPIOG, &GPIO_InitStruct);
GPIO_InitStruct.Pin = GPIO_PIN_3;
GPIO_InitStruct.Mode = GPIO_MODE_AF_PP;
GPIO_InitStruct.Pull = GPIO_NOPULL;
GPIO_InitStruct.Speed = GPIO_SPEED_FREQ_HIGH;
GPIO_InitStruct.Alternate = GPIO_AF6_SAI1;
HAL_GPIO_Init(GPIOA, &GPIO_InitStruct);
/* Peripheral DMA init*/
NodeConfig.NodeType = DMA_GPDMA_LINEAR_NODE;
NodeConfig.Init.Request = GPDMA1_REQUEST_SAI1_B;
NodeConfig.Init.BlkHWRequest = DMA_BREQ_SINGLE_BURST;
NodeConfig.Init.Direction = DMA_PERIPH_TO_MEMORY;
NodeConfig.Init.Priority = DMA_HIGH_PRIORITY;
NodeConfig.Init.SrcInc = DMA_SINC_FIXED;
NodeConfig.Init.DestInc = DMA_DINC_INCREMENTED;
NodeConfig.Init.SrcDataWidth = DMA_SRC_DATAWIDTH_WORD;
NodeConfig.Init.DestDataWidth = DMA_DEST_DATAWIDTH_WORD;
NodeConfig.Init.SrcBurstLength = 1;
NodeConfig.Init.DestBurstLength = 1;
NodeConfig.Init.TransferAllocatedPort = DMA_SRC_ALLOCATED_PORT0|DMA_DEST_ALLOCATED_PORT0;
NodeConfig.Init.TransferEventMode = DMA_TCEM_BLOCK_TRANSFER;
NodeConfig.Init.Mode = DMA_NORMAL;
NodeConfig.TriggerConfig.TriggerPolarity = DMA_TRIG_POLARITY_MASKED;
NodeConfig.TriggerConfig.TriggerSelection = GPDMA1_TRIGGER_GPDMA1_CH0_TCF;
NodeConfig.DataHandlingConfig.DataExchange = DMA_EXCHANGE_NONE;
NodeConfig.DataHandlingConfig.DataAlignment = DMA_DATA_RIGHTALIGN_ZEROPADDED;
NodeConfig.SrcSecure = DMA_CHANNEL_SRC_SEC;
NodeConfig.DestSecure = DMA_CHANNEL_DEST_SEC;
if (HAL_DMAEx_List_BuildNode(&NodeConfig, &Node_GPDMA1_Channel0) != HAL_OK)
{
Error_Handler();
}
if (HAL_DMAEx_List_InsertNode(&List_GPDMA1_Channel0, NULL, &Node_GPDMA1_Channel0) != HAL_OK)
{
Error_Handler();
}
if (HAL_DMAEx_List_SetCircularMode(&List_GPDMA1_Channel0) != HAL_OK)
{
Error_Handler();
}
handle_GPDMA1_Channel0.Instance = GPDMA1_Channel0;
handle_GPDMA1_Channel0.InitLinkedList.Priority = DMA_LOW_PRIORITY_LOW_WEIGHT;
handle_GPDMA1_Channel0.InitLinkedList.LinkStepMode = DMA_LSM_FULL_EXECUTION;
handle_GPDMA1_Channel0.InitLinkedList.LinkAllocatedPort = DMA_LINK_ALLOCATED_PORT0;
handle_GPDMA1_Channel0.InitLinkedList.TransferEventMode = DMA_TCEM_BLOCK_TRANSFER;
handle_GPDMA1_Channel0.InitLinkedList.LinkedListMode = DMA_LINKEDLIST_CIRCULAR;
if (HAL_DMAEx_List_Init(&handle_GPDMA1_Channel0) != HAL_OK)
{
Error_Handler();
}
if (HAL_DMAEx_List_LinkQ(&handle_GPDMA1_Channel0, &List_GPDMA1_Channel0) != HAL_OK)
{
Error_Handler();
}
__HAL_LINKDMA(hsai, hdmarx, handle_GPDMA1_Channel0);
}
}The SystemIsolation_Config is as follow :
static void SystemIsolation_Config(void)
{
/* set all required IPs as secure privileged */
__HAL_RCC_RIFSC_CLK_ENABLE();
/* set GPDMA1 channel 0 used by SAI1 */
if (HAL_DMA_ConfigChannelAttributes(&handle_GPDMA1_Channel0,DMA_CHANNEL_SEC|DMA_CHANNEL_PRIV|DMA_CHANNEL_SRC_SEC|DMA_CHANNEL_DEST_SEC)!= HAL_OK )
{
Error_Handler();
}
}I tried to configure a section only for acquisition buffer corresponding to the DMA's buffer. The linker description is attached to the post while the half completed callback function and the DMA buffer are declared as follow:
#define DMA_SZIZE (2*400)
__attribute__((section(".SRAM_DMA"), aligned(32)))
uint32_t acquisition_buffer[DMA_SZIZE]; // Located at 0x34000400 (AXISRAM1_DMA)
void HAL_SAI_RxHalfCpltCallback(SAI_HandleTypeDef *hsai)
{
if (hsai->Instance == hsai_BlockB1.Instance)
{
buffer_ready = 1;
osSemaphoreRelease(sid_acquisition);
}
}
This is the observed behavior :
With D-Cache enabled:
I tried to disable the D-Cache and the initialization SAI and GPDMA doesn't show any error such as above. However, when I'm launching the osKernel from FreeRTOS, the MCU crashes.
I admit I'm a bit lost with the configuration of D-Cache memory regarding the DMA usage right now.
Do you know how to configure the SAI and GPDMA correctly (knowing the buffer can be in Dcache) ?
What is the recommended approach for using SAI+GPDMA with FreeRTOS when D-Cache must remain enabled?
I tried to follow this example but it doesn't use the Dcache :
STM32H573I-DK/Examples/SAI/SAI_AudioPlay
Any insights into the root cause or recommended solutions would be greatly appreciated.
Best Regards.
Solved! Go to Solution.
2025-10-21 9:33 AM
Hello @jweber
When the DMA buffer resides in a cacheable memory region, it is essential to manually manage the data cache to ensure data coherency between the CPU and the DMA controller. Before starting a DMA transfer, the buffer should be cleaned and invalidated using the SCB_CleanDCache_by_Addr() and SCB_InvalidateDCache_by_Addr() functions to ensure that any modified data in the cache is written back to memory and that the cache does not contain stale data. After the DMA transfer completes, the buffer must be invalidated again so that the CPU reads the latest data written by the DMA.
2025-10-21 9:33 AM
Hello @jweber
When the DMA buffer resides in a cacheable memory region, it is essential to manually manage the data cache to ensure data coherency between the CPU and the DMA controller. Before starting a DMA transfer, the buffer should be cleaned and invalidated using the SCB_CleanDCache_by_Addr() and SCB_InvalidateDCache_by_Addr() functions to ensure that any modified data in the cache is written back to memory and that the cache does not contain stale data. After the DMA transfer completes, the buffer must be invalidated again so that the CPU reads the latest data written by the DMA.
2025-10-27 12:45 AM
Thank you for this helpful remark. I though the dedicated section I created was sufficient and the manual cache management wasn't necessary as soon as I create a section for DMA in the linker.
With the help of examples, I've manage to start the transfer and to fix my SAI-I2S synchronisation with the microphone I'm using (INMP441) using the following code :
SCB_InvalidateDCache_by_Addr((uint32_t *)input_buffer_dma,
N_SAMPLES_DMA_BUFFER * sizeof(uint32_t));
if (HAL_OK != HAL_SAI_Receive_DMA(&h_sai_B1, (uint8_t *) input_buffer_dma, N_SAMPLES_DMA_BUFFER))
{
Error_Handler();
}and just before extract and convert data from DMA to my memory.
Best Regards.