cancel
Showing results for 
Search instead for 
Did you mean: 

STM32H743 USART DMA and DCache

Manu Abraham
Senior

Hi,

I am trying to enable DCache on a USART Tx DMA snippet, which causes USART Tx to fail.

The impression that I have is that the MCU has already written to physical memory and the DMA should have no trouble in accessing the written memory ?

Can someone help me to fix the issue ?

Thanks,

Manu

Attached complete code snippet.

#include "stm32h7xx_ll_bus.h"
#include "stm32h7xx_ll_cortex.h"
#include "stm32h7xx_ll_dma.h"
#include "stm32h7xx_ll_gpio.h"
#include "stm32h7xx_ll_pwr.h"
#include "stm32h7xx_ll_rcc.h"
#include "stm32h7xx_ll_system.h"
#include "stm32h7xx_ll_usart.h"
#include "stm32h7xx_ll_utils.h"
 
#include "string.h"
#include <stdarg.h>
#include <stdio.h>
 
#define APB_Div			4
#define ARRAY_LEN(x)            (sizeof(x) / sizeof((x)[0]))
 
__IO uint8_t tx_done = 0, rx_done = 0;
const uint8_t exstr[] ="test";
uint8_t rxbuf[80];
uint8_t rxlen = ARRAY_LEN(exstr);
 
void SystemClock_Config(void)
{
	LL_FLASH_SetLatency(LL_FLASH_LATENCY_4);
	if (LL_FLASH_GetLatency() != LL_FLASH_LATENCY_4)
		while (1) {}
 
	LL_PWR_ConfigSupply(LL_PWR_LDO_SUPPLY);
	LL_PWR_SetRegulVoltageScaling(LL_PWR_REGU_VOLTAGE_SCALE0);
 
	LL_RCC_HSE_EnableBypass();
	LL_RCC_HSE_Enable();
	while (!LL_RCC_HSE_IsReady()) {}
 
	LL_RCC_PLL_SetSource(LL_RCC_PLLSOURCE_HSE);
	LL_RCC_PLL1P_Enable();
	LL_RCC_PLL1Q_Enable();
	LL_RCC_PLL1_SetVCOInputRange(LL_RCC_PLLINPUTRANGE_8_16);
	LL_RCC_PLL1_SetVCOOutputRange(LL_RCC_PLLVCORANGE_WIDE);
	LL_RCC_PLL1_SetM(1);
	LL_RCC_PLL1_SetN(120);
	LL_RCC_PLL1_SetP(2);
	LL_RCC_PLL1_SetQ(20);
	LL_RCC_PLL1_SetR(2);
	LL_RCC_PLL1_Enable();
	while (!LL_RCC_PLL1_IsReady()) {}
 
	LL_RCC_SetAHBPrescaler(LL_RCC_AHB_DIV_2);
	LL_RCC_SetSysClkSource(LL_RCC_SYS_CLKSOURCE_PLL1);
	LL_RCC_SetSysPrescaler(LL_RCC_SYSCLK_DIV_1);
	LL_RCC_SetAHBPrescaler(LL_RCC_AHB_DIV_2);
	LL_RCC_SetAPB1Prescaler(LL_RCC_APB1_DIV_2);
	LL_RCC_SetAPB2Prescaler(LL_RCC_APB2_DIV_2);
	LL_RCC_SetAPB3Prescaler(LL_RCC_APB3_DIV_2);
	LL_RCC_SetAPB4Prescaler(LL_RCC_APB4_DIV_2);
 
	LL_Init1msTick(480000000);
	LL_SYSTICK_SetClkSource(LL_SYSTICK_CLKSOURCE_HCLK);
	LL_SetSystemCoreClock(480000000);			/* 480Mhz */
	LL_RCC_SetUSARTClockSource(LL_RCC_USART234578_CLKSOURCE_PCLK1);
}
 
void gpio_init(void)
{
	LL_AHB4_GRP1_EnableClock(LL_AHB4_GRP1_PERIPH_GPIOD);	/* USART GPIO Clk */
 
	LL_GPIO_SetPinMode(GPIOD, LL_GPIO_PIN_8, LL_GPIO_MODE_ALTERNATE);
	LL_GPIO_SetAFPin_8_15(GPIOD, LL_GPIO_PIN_8, LL_GPIO_AF_7);
	LL_GPIO_SetPinSpeed(GPIOD, LL_GPIO_PIN_8, LL_GPIO_SPEED_FREQ_HIGH);
	LL_GPIO_SetPinOutputType(GPIOD, LL_GPIO_PIN_8, LL_GPIO_OUTPUT_PUSHPULL);
	LL_GPIO_SetPinPull(GPIOD, LL_GPIO_PIN_8, LL_GPIO_PULL_UP);
 
	LL_GPIO_SetPinMode(GPIOD, LL_GPIO_PIN_9, LL_GPIO_MODE_ALTERNATE);
	LL_GPIO_SetAFPin_8_15(GPIOD, LL_GPIO_PIN_9, LL_GPIO_AF_7);
	LL_GPIO_SetPinSpeed(GPIOD, LL_GPIO_PIN_9, LL_GPIO_SPEED_FREQ_HIGH);
	LL_GPIO_SetPinOutputType(GPIOD, LL_GPIO_PIN_9, LL_GPIO_OUTPUT_PUSHPULL);
	LL_GPIO_SetPinPull(GPIOD, LL_GPIO_PIN_9, LL_GPIO_PULL_UP);
}
 
__STATIC_INLINE void LL_DMA_EnablePeriphDMA(DMA_TypeDef *DMAx, uint32_t Stream)
{
	register uint32_t dma_base_addr = (uint32_t)DMAx;
 
	SET_BIT(((DMA_Stream_TypeDef *)(dma_base_addr + LL_DMA_STR_OFFSET_TAB[Stream]))->CR, DMA_SxCR_TRBUFF);
}
 
__STATIC_INLINE void LL_DMA_DisablePeriphDMA(DMA_TypeDef *DMAx, uint32_t Stream)
{
	register uint32_t dma_base_addr = (uint32_t)DMAx;
 
	CLEAR_BIT(((DMA_Stream_TypeDef *)(dma_base_addr + LL_DMA_STR_OFFSET_TAB[Stream]))->CR, DMA_SxCR_TRBUFF);
}
 
void config_usart(void)
{
	LL_APB1_GRP1_EnableClock(LL_APB1_GRP1_PERIPH_USART3);	/* USART3 Clk */
	LL_USART_SetTransferDirection(USART3,
				      LL_USART_DIRECTION_TX_RX);
 
	LL_USART_ConfigCharacter(USART3,
				 LL_USART_DATAWIDTH_8B,
				 LL_USART_PARITY_NONE,
				 LL_USART_STOPBITS_1);
 
	LL_USART_SetBaudRate(USART3,
			     (SystemCoreClock / APB_Div),
			     LL_USART_PRESCALER_DIV1,
			     LL_USART_OVERSAMPLING_16,
			     115200);
 
	LL_USART_Enable(USART3);				/* Enable USART */
	while ((!(LL_USART_IsActiveFlag_TEACK(USART3))) ||
	       (!(LL_USART_IsActiveFlag_REACK(USART3)))) { }
}
 
void config_dma(void)
{
	LL_AHB1_GRP1_EnableClock(LL_AHB1_GRP1_PERIPH_DMA1);
	LL_DMA_SetPeriphRequest(DMA1,
				LL_DMA_STREAM_0,		/* Request DMA Stream 0 */
				LL_DMAMUX1_REQ_USART3_TX);	/* for DMAMUX1 USART3 Tx */
 
	LL_DMA_SetPeriphRequest(DMA1,
				LL_DMA_STREAM_1,		/* Request DMA Stream 1 */
				LL_DMAMUX1_REQ_USART3_RX);	/* for DMAMUX1 USART3 Rx */
 
	/* Tx DMA */
	LL_DMA_SetDataTransferDirection(DMA1,
					LL_DMA_STREAM_0,
					LL_DMA_DIRECTION_MEMORY_TO_PERIPH);
 
	LL_DMA_SetStreamPriorityLevel(DMA1, LL_DMA_STREAM_0, LL_DMA_PRIORITY_HIGH);
	LL_DMA_SetMode(DMA1, LL_DMA_STREAM_0, LL_DMA_MODE_NORMAL);
 
	LL_DMA_SetPeriphIncMode(DMA1, LL_DMA_STREAM_0, LL_DMA_PERIPH_NOINCREMENT);
	LL_DMA_SetMemoryIncMode(DMA1, LL_DMA_STREAM_0, LL_DMA_MEMORY_INCREMENT);
	LL_DMA_SetPeriphSize(DMA1, LL_DMA_STREAM_0, LL_DMA_PDATAALIGN_BYTE);
	LL_DMA_SetMemorySize(DMA1, LL_DMA_STREAM_0, LL_DMA_PDATAALIGN_BYTE);
 
	/* Rx DMA */
	LL_DMA_SetDataTransferDirection(DMA1,
					LL_DMA_STREAM_1,
					LL_DMA_DIRECTION_PERIPH_TO_MEMORY);
 
	LL_DMA_SetStreamPriorityLevel(DMA1, LL_DMA_STREAM_1, LL_DMA_PRIORITY_HIGH);
	LL_DMA_SetMode(DMA1, LL_DMA_STREAM_1, LL_DMA_MODE_CIRCULAR);
 
	LL_DMA_SetPeriphIncMode(DMA1, LL_DMA_STREAM_1, LL_DMA_PERIPH_NOINCREMENT);
	LL_DMA_SetMemoryIncMode(DMA1, LL_DMA_STREAM_1, LL_DMA_MEMORY_INCREMENT);
	LL_DMA_SetPeriphSize(DMA1, LL_DMA_STREAM_1, LL_DMA_PDATAALIGN_BYTE);
	LL_DMA_SetMemorySize(DMA1, LL_DMA_STREAM_1, LL_DMA_PDATAALIGN_BYTE);
	LL_DMA_DisableFifoMode(DMA1, LL_DMA_STREAM_1);
 
	LL_DMA_EnableIT_TC(DMA1, LL_DMA_STREAM_0);		/* Tx Transfer Complete */
	LL_DMA_EnableIT_TE(DMA1, LL_DMA_STREAM_0);		/* Tx Transfer Error */
 
	LL_DMA_EnableIT_HT(DMA1, LL_DMA_STREAM_1);		/* Rx Half Transfer Done */
	LL_DMA_EnableIT_TC(DMA1, LL_DMA_STREAM_1);		/* Rx Transfer Complete */
	LL_DMA_EnableIT_TE(DMA1, LL_DMA_STREAM_1);		/* Tx Transfer Error */
 
	NVIC_SetPriority(DMA1_Stream0_IRQn, 0);			/* Stream Priority */
	NVIC_SetPriority(DMA1_Stream1_IRQn, 0);
	NVIC_EnableIRQ(DMA1_Stream0_IRQn);			/* Global IRQ */
	NVIC_EnableIRQ(DMA1_Stream1_IRQn);
 
	LL_USART_EnableDMAReq_TX(USART3);			/* Enable Tx DMARQ */
	LL_USART_EnableDMAReq_RX(USART3);			/* Enable Rx DMARQ */
 
	LL_DMA_ClearFlag_TC1(DMA1);				/* Transfer complete */
	LL_DMA_ClearFlag_HT1(DMA1);				/* Half Transfer done */
	LL_DMA_ClearFlag_TE1(DMA1);				/* Transfer Error */
	LL_DMA_ClearFlag_DME1(DMA1);				/* Direct MODE Error */
	LL_DMA_ClearFlag_FE1(DMA1);				/* FIFO Error */
 
	LL_DMA_SetDataLength(DMA1, LL_DMA_STREAM_1, rxlen);
	LL_DMA_SetPeriphAddress(DMA1, LL_DMA_STREAM_1, LL_USART_DMA_GetRegAddr(USART3, LL_USART_DMA_REG_DATA_RECEIVE));
	LL_DMA_SetMemoryAddress(DMA1, LL_DMA_STREAM_1, (uint32_t)rxbuf);
 
	LL_DMA_EnablePeriphDMA(DMA1, LL_DMA_STREAM_1);		/* Enable USART3 Stream DMA */
	LL_DMA_EnableStream(DMA1, LL_DMA_STREAM_1);		/* Enable DMA Channel Rx */
}
 
void start_dma(uint8_t *buf, uint8_t txlen)
{
	tx_done = 0;
	LL_GPIO_ResetOutputPin(GPIOB, LL_GPIO_PIN_0);		/* Done: LED OFF */
 
	LL_DMA_ClearFlag_TC0(DMA1);				/* Transfer complete */
	LL_DMA_ClearFlag_HT0(DMA1);				/* Half Transfer done */
	LL_DMA_ClearFlag_TE0(DMA1);				/* Transfer Error */
	LL_DMA_ClearFlag_DME0(DMA1);				/* Direct MODE Error */
	LL_DMA_ClearFlag_FE0(DMA1);				/* FIFO Error */
 
	LL_DMA_SetDataLength(DMA1, LL_DMA_STREAM_0, txlen);	/* DMA Stream length */
	LL_DMA_ConfigAddresses(DMA1,
			       LL_DMA_STREAM_0,			/* USART3 DMA Stream 0 */
			       (uint32_t)buf,			/* Transmit string */
			       LL_USART_DMA_GetRegAddr(USART3, LL_USART_DMA_REG_DATA_TRANSMIT),
			       LL_DMA_GetDataTransferDirection(DMA1, LL_DMA_STREAM_0));
 
	LL_DMA_EnablePeriphDMA(DMA1, LL_DMA_STREAM_0);		/* Enable USART3 Stream DMA */
	LL_DMA_EnableStream(DMA1, LL_DMA_STREAM_0);		/* Enable DMA Channel Tx */
}
 
void stop_dma(void)
{
	while (!tx_done) {}
	LL_DMA_DisableStream(DMA1, LL_DMA_STREAM_0);
	LL_DMA_DisablePeriphDMA(DMA1, LL_DMA_STREAM_0);		/* Disable USART3 Stream DMA */
	LL_GPIO_ResetOutputPin(GPIOB, LL_GPIO_PIN_0);		/* Done: LED ON */
}
 
void DMA1_Stream0_IRQHandler(void)
{
	if (LL_DMA_IsEnabledIT_TC(DMA1, LL_DMA_STREAM_0) &&
	    LL_DMA_IsActiveFlag_TC0(DMA1)) {
 
		LL_DMA_ClearFlag_TC0(DMA1);
		LL_GPIO_SetOutputPin(GPIOE, LL_GPIO_PIN_1);	/* Done: LED2 ON */
		tx_done = 1;
	}
 
	if (LL_DMA_IsEnabledIT_TE(DMA1, LL_DMA_STREAM_0) &&
	    LL_DMA_IsActiveFlag_TE0(DMA1)) {
	}
}
 
void DMA1_Stream1_IRQHandler(void)
{
	if (LL_DMA_IsEnabledIT_HT(DMA1, LL_DMA_STREAM_1) &&
	    LL_DMA_IsActiveFlag_HT1(DMA1)) {
		LL_DMA_ClearFlag_HT1(DMA1);
	}
	if (LL_DMA_IsEnabledIT_TC(DMA1, LL_DMA_STREAM_1) &&
	    LL_DMA_IsActiveFlag_TC1(DMA1)) {
 
		LL_DMA_ClearFlag_TC1(DMA1);             	/* Clear transfer complete flag */
		LL_GPIO_TogglePin(GPIOB, LL_GPIO_PIN_14);	/* LED3 Toggle */
		rx_done = 1;
	}
	if (LL_DMA_IsEnabledIT_TE(DMA1, LL_DMA_STREAM_1) &&
	    LL_DMA_IsActiveFlag_TE1(DMA1)) {
 
	}
}
 
void printh(const char *fmt, ...)
{
	uint8_t pb[80];
	uint8_t len;
	va_list argp;
 
	va_start(argp, fmt);
	vsprintf((char *)&pb[0], fmt, argp);
	va_end(argp);
	len = strlen((char *)pb);
	start_dma(pb, len);
	stop_dma();
}
 
int main(void)
{
	SystemClock_Config();
 
	SCB_EnableICache();					/* Enable I-Cache */
//	SCB_EnableDCache();					/* Enable D-Cache */
	gpio_init();
	config_usart();
	config_dma();
	LL_GPIO_ResetOutputPin(GPIOB, LL_GPIO_PIN_0);		/* Done: LED ON */
	LL_mDelay(500);
	printh("STM32H743 USART DMA Test\r\n");
	LL_GPIO_SetOutputPin(GPIOB, LL_GPIO_PIN_0);		/* Done: LED ON */
	LL_mDelay(500);
	LL_GPIO_ResetOutputPin(GPIOB, LL_GPIO_PIN_0);		/* Done: LED ON */
	LL_mDelay(500);
	printh("Hello Again\r\n");
	LL_GPIO_SetOutputPin(GPIOB, LL_GPIO_PIN_0);		/* Done: LED ON */
 
	while (1) { }
}

1 ACCEPTED SOLUTION

Accepted Solutions
TDK
Guru

AXI SRAM only operates in WBWA (write-back write-allocate) and does not have a write-through address mapping.

> But why would the Cache need cleaning ?

When cache is used in WBWA mode, the CPU only writes to the memory when necessary, such as when it needs to make space for a new line of cache. This is to increase performance. If you need the memory to be valid for some reason (such as if DMA wants to read from it), you can force the CPU to write it by doing a clean operation.

> Or, does it imply that the MCU has written to the Cache, not to the actual memory ?

Yes.

If you feel a post has answered your question, please click "Accept as Solution".

View solution in original post

6 REPLIES 6
TDK
Guru

SRAM is not write-through by default (as generated by STM32CubeMX code), but can be if you change your linker file. You could either clean cache before starting DMA, or use the write-through RAM address range.

See "Table 6. Memory map and default device memory area attributes" in the reference manual.

If you feel a post has answered your question, please click "Accept as Solution".

It's manually written code, not generated.

Sorry, that I did not mention earlier, The dma buffer is located in AXI SRAM, not DTCM.

This is what .sct looks like

LR_IROM1 0x08000000 0x00200000  {    ; load region size_region
  ER_IROM1 0x08000000 0x00200000  {  ; load address = execution address
   *.o (RESET, +First)
   *(InRoot$$Sections)
   .ANY (+RO)
   .ANY (+XO)
  }
  RW_IRAM2 0x24000000 0x00080000  {  ; RW data
   .ANY (+RW +ZI)
  }
}
 

That said, the following change to clean the cache does fix the problem:

void start_dma(uint8_t *buf, uint8_t txlen)
{
	tx_done = 0;
	LL_GPIO_ResetOutputPin(GPIOB, LL_GPIO_PIN_0);		/* Done: LED OFF */
	SCB_CleanDCache();
 
	LL_DMA_ClearFlag_TC0(DMA1);				/* Transfer complete */
	LL_DMA_ClearFlag_HT0(DMA1);				/* Half Transfer done */
	LL_DMA_ClearFlag_TE0(DMA1);				/* Transfer Error */
	LL_DMA_ClearFlag_DME0(DMA1);				/* Direct MODE Error */
	LL_DMA_ClearFlag_FE0(DMA1);				/* FIFO Error */
 
	LL_DMA_SetDataLength(DMA1, LL_DMA_STREAM_0, txlen);	/* DMA Stream length */
	LL_DMA_ConfigAddresses(DMA1,
			       LL_DMA_STREAM_0,			/* USART3 DMA Stream 0 */
			       (uint32_t)buf,			/* Transmit string */
			       LL_USART_DMA_GetRegAddr(USART3, LL_USART_DMA_REG_DATA_TRANSMIT),
			       LL_DMA_GetDataTransferDirection(DMA1, LL_DMA_STREAM_0));
 
	LL_DMA_EnablePeriphDMA(DMA1, LL_DMA_STREAM_0);		/* Enable USART3 Stream DMA */
	LL_DMA_EnableStream(DMA1, LL_DMA_STREAM_0);		/* Enable DMA Channel Tx */
}

Ok. But why would the Cache need cleaning ? Read from a DMA transfer I can understand, but a DMA write ..

Or, does it imply that the MCU has written to the Cache, not to the actual memory ?

TDK
Guru

AXI SRAM only operates in WBWA (write-back write-allocate) and does not have a write-through address mapping.

> But why would the Cache need cleaning ?

When cache is used in WBWA mode, the CPU only writes to the memory when necessary, such as when it needs to make space for a new line of cache. This is to increase performance. If you need the memory to be valid for some reason (such as if DMA wants to read from it), you can force the CPU to write it by doing a clean operation.

> Or, does it imply that the MCU has written to the Cache, not to the actual memory ?

Yes.

If you feel a post has answered your question, please click "Accept as Solution".

Thanks, for the details and the clarification

RMcCa
Senior II

Use dma with d2 sram and then use the mdma to copy the data into tcram. Avoids any cache issues.​

It's a bit of an overkill to use 2 DMA transfers to do a simple print. :\

In addition, the DMA setup itself will cause much CPU overhead, by then it is better to avoid all DMA altogether to do a simple printf.