cancel
Showing results for 
Search instead for 
Did you mean: 

STM32H7A3 LPUART and BDMA

magene
Senior II

I'm trying to get the LPUART working with BDMA on a STM32H7A3 MCU.  Eventually I'd like to set it up so the MCU goes into a low power state and wakes up when it receives a LF character match in the incoming message, but for now I'm just trying to get a small test program working starting with just transmitting some bytes using BDMA.  I have DMA with character match working on a regular UART with DMA and I'm using that code as an example along with the reference manual sections on the LPUART and BDMA. I think I'm doing everything the reference manual says to do but maybe I missed or misunderstood something. I can send characters out the LPUART without DMA and see them show up on my logic analyzer but when I try to do a BDMA transmit, nothing shows up on the LPUART_TX line to trigger the logic analyzer.

I've read this article ( https://community.st.com/t5/stm32-mcus/dma-is-not-working-on-stm32h7-devices/ta-p/49498 ) many times and it seems pretty clear I should be making sure my tx buffer is in SRAM4 but I haven't figured out how to do that yet. 

I use CubeMX to generate the startup code and the VisualGDB development environment to develop the application.  I attached the .ioc file for reference.

Thanks for the help

Here's the test code

 

 

TEST(CPFTestGroup, LPUARTTest)
{
	const ALIGN_32BYTES(std::uint8_t  getFPCmd[]) = "abcdefghijklmnop";
	const uint32_t getFPCmdSize = sizeof(getFPCmd);

	SystemClock::configSystemClock();
	MX_LPUART1_UART_Init();
	MX_BDMA2_Init();

//Sending characters byte by byte works
	for (int i = 0; i < 128; i++)
	{
		LL_LPUART_TransmitData8(LPUART1, i);
		LL_mDelay(1);
	}

//Using BDMA doesn't work
    //Setup TX
	LL_BDMA_ConfigAddresses(BDMA2, LL_BDMA_CHANNEL_1, reinterpret_cast<uint32_t>(getFPCmd),
		LL_LPUART_DMA_GetRegAddr(LPUART1, LL_LPUART_DMA_REG_DATA_TRANSMIT),	LL_BDMA_DIRECTION_MEMORY_TO_PERIPH);
	LL_BDMA_SetDataLength(BDMA2, LL_BDMA_CHANNEL_1, 16);

	LL_BDMA_EnableChannel(BDMA2, LL_BDMA_CHANNEL_1);
	LL_LPUART_EnableDMAReq_TX(LPUART1);
}

 

 

Here's the MX_LPUART1_UART_Init  and MX_BDMA2_Init() code generated by CubeMX

 

 

void MX_LPUART1_UART_Init(void)
{

  /* USER CODE BEGIN LPUART1_Init 0 */

  /* USER CODE END LPUART1_Init 0 */

  LL_LPUART_InitTypeDef LPUART_InitStruct = {0};

  LL_GPIO_InitTypeDef GPIO_InitStruct = {0};

  LL_RCC_SetLPUARTClockSource(LL_RCC_LPUART1_CLKSOURCE_LSE);

  /* Peripheral clock enable */
  LL_APB4_GRP1_EnableClock(LL_APB4_GRP1_PERIPH_LPUART1);

  LL_AHB4_GRP1_EnableClock(LL_AHB4_GRP1_PERIPH_GPIOB);
  /**LPUART1 GPIO Configuration
  PB6   ------> LPUART1_TX
  PB7   ------> LPUART1_RX
  */
  GPIO_InitStruct.Pin = LL_GPIO_PIN_6|LL_GPIO_PIN_7;
  GPIO_InitStruct.Mode = LL_GPIO_MODE_ALTERNATE;
  GPIO_InitStruct.Speed = LL_GPIO_SPEED_FREQ_LOW;
  GPIO_InitStruct.OutputType = LL_GPIO_OUTPUT_PUSHPULL;
  GPIO_InitStruct.Pull = LL_GPIO_PULL_NO;
  GPIO_InitStruct.Alternate = LL_GPIO_AF_8;
  LL_GPIO_Init(GPIOB, &GPIO_InitStruct);

  /* LPUART1 DMA Init */

  /* LPUART1_RX Init */
  LL_BDMA_SetPeriphRequest(BDMA2, LL_BDMA_CHANNEL_0, LL_DMAMUX2_REQ_LPUART1_RX);

  LL_BDMA_SetDataTransferDirection(BDMA2, LL_BDMA_CHANNEL_0, LL_BDMA_DIRECTION_PERIPH_TO_MEMORY);

  LL_BDMA_SetChannelPriorityLevel(BDMA2, LL_BDMA_CHANNEL_0, LL_BDMA_PRIORITY_LOW);

  LL_BDMA_SetMode(BDMA2, LL_BDMA_CHANNEL_0, LL_BDMA_MODE_NORMAL);

  LL_BDMA_SetPeriphIncMode(BDMA2, LL_BDMA_CHANNEL_0, LL_BDMA_PERIPH_NOINCREMENT);

  LL_BDMA_SetMemoryIncMode(BDMA2, LL_BDMA_CHANNEL_0, LL_BDMA_MEMORY_INCREMENT);

  LL_BDMA_SetPeriphSize(BDMA2, LL_BDMA_CHANNEL_0, LL_BDMA_PDATAALIGN_BYTE);

  LL_BDMA_SetMemorySize(BDMA2, LL_BDMA_CHANNEL_0, LL_BDMA_MDATAALIGN_BYTE);

  /* LPUART1_TX Init */
  LL_BDMA_SetPeriphRequest(BDMA2, LL_BDMA_CHANNEL_1, LL_DMAMUX2_REQ_LPUART1_TX);

  LL_BDMA_SetDataTransferDirection(BDMA2, LL_BDMA_CHANNEL_1, LL_BDMA_DIRECTION_MEMORY_TO_PERIPH);

  LL_BDMA_SetChannelPriorityLevel(BDMA2, LL_BDMA_CHANNEL_1, LL_BDMA_PRIORITY_LOW);

  LL_BDMA_SetMode(BDMA2, LL_BDMA_CHANNEL_1, LL_BDMA_MODE_NORMAL);

  LL_BDMA_SetPeriphIncMode(BDMA2, LL_BDMA_CHANNEL_1, LL_BDMA_PERIPH_NOINCREMENT);

  LL_BDMA_SetMemoryIncMode(BDMA2, LL_BDMA_CHANNEL_1, LL_BDMA_MEMORY_INCREMENT);

  LL_BDMA_SetPeriphSize(BDMA2, LL_BDMA_CHANNEL_1, LL_BDMA_PDATAALIGN_BYTE);

  LL_BDMA_SetMemorySize(BDMA2, LL_BDMA_CHANNEL_1, LL_BDMA_MDATAALIGN_BYTE);

  /* LPUART1 interrupt Init */
  NVIC_SetPriority(LPUART1_IRQn, NVIC_EncodePriority(NVIC_GetPriorityGrouping(),0, 0));
  NVIC_EnableIRQ(LPUART1_IRQn);

  /* USER CODE BEGIN LPUART1_Init 1 */

  /* USER CODE END LPUART1_Init 1 */
  LPUART_InitStruct.PrescalerValue = LL_LPUART_PRESCALER_DIV1;
  LPUART_InitStruct.BaudRate = 9600;
  LPUART_InitStruct.DataWidth = LL_LPUART_DATAWIDTH_8B;
  LPUART_InitStruct.StopBits = LL_LPUART_STOPBITS_1;
  LPUART_InitStruct.Parity = LL_LPUART_PARITY_NONE;
  LPUART_InitStruct.TransferDirection = LL_LPUART_DIRECTION_TX_RX;
  LPUART_InitStruct.HardwareFlowControl = LL_LPUART_HWCONTROL_NONE;
  LL_LPUART_Init(LPUART1, &LPUART_InitStruct);
  LL_LPUART_SetTXFIFOThreshold(LPUART1, LL_LPUART_FIFOTHRESHOLD_1_8);
  LL_LPUART_SetRXFIFOThreshold(LPUART1, LL_LPUART_FIFOTHRESHOLD_1_8);
  LL_LPUART_DisableFIFO(LPUART1);

  /* USER CODE BEGIN WKUPType LPUART1 */

  /* USER CODE END WKUPType LPUART1 */

  LL_LPUART_Enable(LPUART1);

  /* Polling LPUART1 initialisation */
  while((!(LL_LPUART_IsActiveFlag_TEACK(LPUART1))) || (!(LL_LPUART_IsActiveFlag_REACK(LPUART1))))
  {
  }
  /* USER CODE BEGIN LPUART1_Init 2 */

  /* USER CODE END LPUART1_Init 2 */

}
void MX_BDMA2_Init(void)
{

  /* Init with LL driver */
  /* DMA controller clock enable */
  __HAL_RCC_BDMA2_CLK_ENABLE();

  /* DMA interrupt init */
  /* BDMA2_Channel0_IRQn interrupt configuration */
  NVIC_SetPriority(BDMA2_Channel0_IRQn, NVIC_EncodePriority(NVIC_GetPriorityGrouping(),0, 0));
  NVIC_EnableIRQ(BDMA2_Channel0_IRQn);
  /* BDMA2_Channel1_IRQn interrupt configuration */
  NVIC_SetPriority(BDMA2_Channel1_IRQn, NVIC_EncodePriority(NVIC_GetPriorityGrouping(),0, 0));
  NVIC_EnableIRQ(BDMA2_Channel1_IRQn);

}

 

 

Here's configSystemClock and the SystemClock_Config generated by CubeMX

 

 

void SystemClock::configSystemClock()
{
	SystemClock_Config();
}

void SystemClock_Config(void)
{
	/*AXI clock gating */
	RCC->CKGAENR = 0xFFFFFFFF;

	LL_FLASH_SetLatency(LL_FLASH_LATENCY_3);
	while (LL_FLASH_GetLatency() != LL_FLASH_LATENCY_3)
	{
	}
	LL_PWR_ConfigSupply(LL_PWR_DIRECT_SMPS_SUPPLY);
	LL_PWR_SetRegulVoltageScaling(LL_PWR_REGU_VOLTAGE_SCALE0);
	while (LL_PWR_IsActiveFlag_VOS() == 0)
	{
	}
	LL_RCC_HSE_Enable();

	/* Wait till HSE is ready */
	while (LL_RCC_HSE_IsReady() != 1)
	{

	}
	LL_PWR_EnableBkUpAccess();
	LL_RCC_LSE_SetDriveCapability(LL_RCC_LSEDRIVE_LOW);
	LL_RCC_LSE_Enable();

	/* Wait till LSE is ready */
	while (LL_RCC_LSE_IsReady() != 1)
	{

	}
	LL_RCC_HSE_EnableCSS();
	LL_RCC_LSE_EnableCSS();
	LL_RCC_PLL_SetSource(LL_RCC_PLLSOURCE_HSE);
	LL_RCC_PLL1P_Enable();
	LL_RCC_PLL1Q_Enable();
	LL_RCC_PLL1_SetVCOInputRange(LL_RCC_PLLINPUTRANGE_8_16);
	LL_RCC_PLL1_SetVCOOutputRange(LL_RCC_PLLVCORANGE_WIDE);
	LL_RCC_PLL1_SetM(3);
	LL_RCC_PLL1_SetN(70);
	LL_RCC_PLL1_SetP(2);
	LL_RCC_PLL1_SetQ(35);
	LL_RCC_PLL1_SetR(2);
	LL_RCC_PLL1_Enable();

	/* Wait till PLL is ready */
	while (LL_RCC_PLL1_IsReady() != 1)
	{
	}

	/* Intermediate AHB prescaler 2 when target frequency clock is higher than 80 MHz */
	LL_RCC_SetAHBPrescaler(LL_RCC_AHB_DIV_2);

	LL_RCC_SetSysClkSource(LL_RCC_SYS_CLKSOURCE_PLL1);

	/* Wait till System clock is ready */
	while (LL_RCC_GetSysClkSource() != LL_RCC_SYS_CLKSOURCE_STATUS_PLL1)
	{

	}
	LL_RCC_SetAHBPrescaler(LL_RCC_AHB_DIV_2);
	LL_RCC_SetAPB1Prescaler(LL_RCC_APB1_DIV_1);
	LL_RCC_SetAPB2Prescaler(LL_RCC_APB2_DIV_1);
	LL_RCC_SetAPB3Prescaler(LL_RCC_APB3_DIV_1);
	LL_RCC_SetAPB4Prescaler(LL_RCC_APB4_DIV_1);
	LL_SetSystemCoreClock(280000000);

	/* Update the time base */
	if (HAL_InitTick(TICK_INT_PRIORITY) != HAL_OK)
	{
		SystemClock_Error_Handler();
	}
	LL_RCC_HSE_EnableCSS();
	LL_RCC_LSE_EnableCSS();
}

 

 

 

1 ACCEPTED SOLUTION

Accepted Solutions

D-cache should not make things more difficult , so i recommend to not use it.

Just in Cube do not enable it, later can enable and do the cache management - was my idea, to make it more easy.

->

AScha3_0-1703841692534.png

I forgot: you have to add this to the xxx_flash.ld file   (just before the comment line : /*Remove... )

  .RAM_SRD_sec :					/* (NOLOAD) <- no initialize */ 
  {
   	. = ALIGN(32);
    *(.RAM_SRD_section) 
    . = ALIGN(32);
  } >RAM_SRD

  /* Remove information from the compiler libraries */

And after mod the xxx_flash.ld file you see it is used ? ->

AScha3_1-1703843164263.png

Now it should work.   😎

 

If you feel a post has answered your question, please click "Accept as Solution".

View solution in original post

6 REPLIES 6
AScha.3
Principal III

H7A3 has D-cache - how you manage this /with dma ? (see nothing in your text)

Simple solution: switch off (or not enable) D-cache . (NOW ! if bdma working...can work on this, cache management.)

If you have your data in ram, bdma cannot work:

AScha3_0-1703788868192.png

Then to put your data array to a certain ram , write (example for my H7 :(

 

 

__attribute__((section(".RAM_SRD_section"))) int32_t  playbuf[4096];

 

If the ram section to use is not in the linker script, you have to modify it, to have RAM_SRD , + give it a name.

For bdma only SRD area is useful:

AScha3_1-1703788969581.png

Now look at your xxx_FLASH.ld file and edit...add new section, if not there (i have no H7A3 with this, so i cannot just give you one);

My H7A3 xx.ld file just has:

 

/* Memories definition */
MEMORY
{
  DTCMRAM    (xrw)    : ORIGIN = 0x20000000,   LENGTH = 128K
  ITCMRAM    (xrw)    : ORIGIN = 0x00000000,   LENGTH = 64K
  RAM    (xrw)    : ORIGIN = 0x24000000,   LENGTH = 1024K
  FLASH    (rx)    : ORIGIN = 0x8000000,   LENGTH = 2048K
}

 

 

-> modify -> add srd ram

 

/* Memories definition */
MEMORY
{
  DTCMRAM    (xrw)    : ORIGIN = 0x20000000,   LENGTH = 128K
  ITCMRAM    (xrw)    : ORIGIN = 0x00000000,   LENGTH = 64K
  RAM    (xrw)    : ORIGIN = 0x24000000,   LENGTH = 1024K
  RAM_SRD    (xrw)    : ORIGIN = 0x38000000,   LENGTH = 32K
  FLASH    (rx)    : ORIGIN = 0x8000000,   LENGTH = 2048K
}

 

 

ed.: I forgot:

 you have to add this to the xxx_flash.ld file   (just before the comment line : /*Remove... )

  .RAM_SRD_sec :					/* (NOLOAD) <- no initialize */ 
  {
   	. = ALIGN(32);
    *(.RAM_SRD_section) 
    . = ALIGN(32);
  } >RAM_SRD

  /* Remove information from the compiler libraries */

 

 ...maybe like this ... then try.

If you feel a post has answered your question, please click "Accept as Solution".
magene
Senior II

@AScha.3 That helps a lot but I'm not there yet.  I modified my test code to look like this:

//Per the reference manual (RM0455), BDMA only has access to SRD ram section
__attribute__((section(".RAM_SRD_section"))) ALIGN_32BYTES(std::uint8_t  getFPCmd[]) = "abcdefghijklmnopqrstuvwxyz";

TEST(CPFTestGroup, LPUARTTest)
{
	const uint32_t getFPCmdSize = sizeof(getFPCmd);

	SystemClock::configSystemClock();
	MX_LPUART1_UART_Init();
	MX_BDMA2_Init();

	for (int i = 0; i < 128; i++)
	{
		LL_LPUART_TransmitData8(LPUART1, i);
		LL_mDelay(1);
	}

    //Setup TX
	LL_BDMA_ConfigAddresses(BDMA2, LL_BDMA_CHANNEL_1, reinterpret_cast<uint32_t>(getFPCmd),
		LL_LPUART_DMA_GetRegAddr(LPUART1, LL_LPUART_DMA_REG_DATA_TRANSMIT),	LL_BDMA_DIRECTION_MEMORY_TO_PERIPH);
	LL_BDMA_SetDataLength(BDMA2, LL_BDMA_CHANNEL_1, 16);

	LL_BDMA_EnableChannel(BDMA2, LL_BDMA_CHANNEL_1);
	LL_LPUART_EnableDMAReq_TX(LPUART1);

	LL_mDelay(2000);
	std::cout << "LPUARTTest Done" << std::endl;
}

and modified the STM32H7A3LI_flash.lds file using VisualGDB to look like this

/* Generated by LinkerScriptGenerator [http://visualgdb.com/tools/LinkerScriptGenerator]
 * Target: STM32H7A3LI
 * The file is provided under the BSD license.
 */

ENTRY(Reset_Handler)

MEMORY
{
	FLASH (RX)    : ORIGIN = 0x08000000, LENGTH = 2M
	SRAM (RWX)    : ORIGIN = 0x24000000, LENGTH = 1M
	DTCMRAM (RWX) : ORIGIN = 0x20000000, LENGTH = 128K
	ITCMRAM (RWX) : ORIGIN = 0x00000000, LENGTH = 64K
	/* --- begin generated external memories -- */
	RAM_SRD (RWX) : ORIGIN = 0x38000000, LENGTH = 32K
	/* --- end generated external memories -- */
}

but when I try to disable cache like this

__attribute__((section(".RAM_SRD_section"))) ALIGN_32BYTES(std::uint8_t  getFPCmd[]) = "abcdefghijklmnopqrstuvwxyz";

TEST(CPFTestGroup, LPUARTTest)
{
	SCB_DisableDCache();
	const uint32_t getFPCmdSize = sizeof(getFPCmd);

	SystemClock::configSystemClock();
	MX_LPUART1_UART_Init();
	MX_BDMA2_Init();

the program immediately throws a "Received a SIGTRAP: Trace/breakpoint trap" exception, seemingly before any code runs, and jumps into the hard fault handler here:

void __attribute__ ((weak, naked)) HardFault_Handler() 
{
	//If you hit the breakpoint below, one of the interrupts was unhandled in your code. 
	//Define the following function in your code to handle it:
	//	extern "C" void HardFault_Handler();
	__asm("bkpt 255");
	__asm("bx lr");
}

Thanks again for the help.  Seems like ST might want to consider an example for this.

I got past the hard fault exception by replacing SCB_DisableDCache(); with SCB_InvalidateDCache(); like this but I'm still not seeing any characters on the logic analyzer when I try to transmit with the BDMA module.  Any help will be greatly appreciated.

Thanks

//Per the reference manual (RM0455), BDMA only has access to SRD ram section
__attribute__((section("RAM_SRD_section"))) ALIGN_32BYTES(std::uint8_t  getFPCmd[]) = "abcdefghijklmnopqrstuvwxyz";

TEST(CPFTestGroup, LPUARTTest)
{
	SCB_InvalidateDCache();
	const uint32_t getFPCmdSize = sizeof(getFPCmd);

	SystemClock::configSystemClock();
	MX_LPUART1_UART_Init();
	MX_BDMA2_Init();

	for (int i = 0; i < 128; i++)
	{
		LL_LPUART_TransmitData8(LPUART1, i);
		LL_mDelay(1);
	}

    //Setup TX
	LL_BDMA_ConfigAddresses(BDMA2, LL_BDMA_CHANNEL_1, reinterpret_cast<uint32_t>(getFPCmd),
		LL_LPUART_DMA_GetRegAddr(LPUART1, LL_LPUART_DMA_REG_DATA_TRANSMIT),	LL_BDMA_DIRECTION_MEMORY_TO_PERIPH);
	LL_BDMA_SetDataLength(BDMA2, LL_BDMA_CHANNEL_1, 16);

	LL_BDMA_EnableChannel(BDMA2, LL_BDMA_CHANNEL_1);
	LL_LPUART_EnableDMAReq_TX(LPUART1);

	LL_mDelay(2000);
	std::cout << "LPUARTTest Done" << std::endl;
}

 

D-cache should not make things more difficult , so i recommend to not use it.

Just in Cube do not enable it, later can enable and do the cache management - was my idea, to make it more easy.

->

AScha3_0-1703841692534.png

I forgot: you have to add this to the xxx_flash.ld file   (just before the comment line : /*Remove... )

  .RAM_SRD_sec :					/* (NOLOAD) <- no initialize */ 
  {
   	. = ALIGN(32);
    *(.RAM_SRD_section) 
    . = ALIGN(32);
  } >RAM_SRD

  /* Remove information from the compiler libraries */

And after mod the xxx_flash.ld file you see it is used ? ->

AScha3_1-1703843164263.png

Now it should work.   😎

 

If you feel a post has answered your question, please click "Accept as Solution".

@AScha.3 First of all, thanks for the detailed help.  I'm probably in over my head here and really appreciate your help. I still can't transmit using BDMA but I think I was able to get the SRD memory section enabled and put the data I want to TX there. VisualGDB provides a window to add external memory and it does a lot of the work for me but it does a lot more than you suggested which maybe a good thing or a bad thing.  Here's how it modified the .lds file.  VisualGDB set all the ALIGN(X) lines to ALIGN(4) so I changed them to ALIGN(32).  I'm wondering if the .lds file works in bytes and not bits?

	/* --- begin generated external memory sections -- */
	.ram_srd_text :
	{
		. = ALIGN(32);
		_sram_srd_text = .;

		PROVIDE(__ram_srd_text_start = _sram_srd_text);
		*(.ram_srd_text)
		*(.ram_srd_text*)
		. = ALIGN(32);
		_eram_srd_text = .;

		PROVIDE(__ram_srd_text_end = _eram_srd_text);
	} > RAM_SRD AT >FLASH
	_siram_srd_text = LOADADDR(.ram_srd_text);

	.ram_srd_data :
	{
		. = ALIGN(32);
		_sram_srd_data = .;

		PROVIDE(__ram_srd_data_start = _sram_srd_data);
		*(.ram_srd_data)
		*(.ram_srd_data*)
		. = ALIGN(32);
		_eram_srd_data = .;

		PROVIDE(__ram_srd_data_end = _eram_srd_data);
	} > RAM_SRD AT >FLASH
	_siram_srd_data = LOADADDR(.ram_srd_data);

	.ram_srd_bss (NOLOAD) :
	{
		. = ALIGN(32);
		_sram_srd_bss = .;

		PROVIDE(__ram_srd_bss_start = _sram_srd_bss);
		*(.ram_srd_bss)
		*(.ram_srd_bss*)
		. = ALIGN(32);
		_eram_srd_bss = .;

		PROVIDE(__ram_srd_bss_end = _eram_srd_bss);
	} > RAM_SRD

	/* --- end generated external memory sections -- */

VisualGDB has a nice embedded memory explorer and here's what it shows.

magene_0-1703873444992.png

Both DCache and ICache were already disabled in CubeMX so that probably wasn't my issue.

magene_1-1703873675865.png

At this point I think the problem may be in my code that sets up the LPUART and BDMA and tries to transmit my message.  So I'm going to triple check what's going on there.

Thanks again for all the help

 

👍 😎

+

ALIGN(32) is just good - if you (later) want use d-cache , the address is matching the cache access (is 32 byte line).

( ALIGN(xx) is in bytes.)

+

I-cache you should enable, makes code faster and "produces" no difficult side effects.

+

Your dma + uart was working, so bdma2 + lpuart should work also. (but i never tried-so i cannot help much here.)

If you feel a post has answered your question, please click "Accept as Solution".