cancel
Showing results for 
Search instead for 
Did you mean: 

STM32H7 DMA receive only works if program started with debugger

angrypotato
Associate II

Hello,

I am running into an issue where I can only receive on UART5 of an STM32H7A3LI if I have been debugging since a power cycle. The code runs normally otherwise after a power cycle, but UART5 cannot receive unless it is debugging.

  • Using a JLink Pro, VSCode, arm-none-eabi-gdb-py, STM32H7A3LI on a custom PCB
  • I have confirmed data is sent and received regardless of debug state using a logic analzyer, the problem is definitely on the MCU
  • I have tried attaching the debugger to an existing session using SEGGER's ozone, the DMA counter gets incremented on the first byte received but the data is just a 0, it is not the actual byte received
  • This does not happen on other UARTs or USARTs, I am able to receive normally from other devices connected to other UARTs regardless of whether or not I am debugging
  • The Instruction and Data caching has been disabled for the send buffer. It was not needed for any other RX buffer, only the TX buffers had issues with caching. I did try receiving on an RX buffer with caching disabled but this did not fix the problem.
  • DMA is initialized before any peripherals. I also changed the order of UART inits, this had no affect as long as DMA was initialized first.
  • When not using DMA, and receive stops working regardless of whether or not I am debugging
  • The same problem happens on other DMA streams, but not other UARTs
  • I am using FreeRTOS

Init code:

 

static void MX_UART5_Init(void)
{
  /* USER CODE BEGIN UART5_Init 0 */

  /* USER CODE END UART5_Init 0 */

  /* USER CODE BEGIN UART5_Init 1 */

  /* USER CODE END UART5_Init 1 */
  huart5.Instance = UART5;
  huart5.Init.BaudRate = 38400;
  huart5.Init.WordLength = UART_WORDLENGTH_8B;
  huart5.Init.StopBits = UART_STOPBITS_1;
  huart5.Init.Parity = UART_PARITY_NONE;
  huart5.Init.Mode = UART_MODE_TX_RX;
  huart5.Init.HwFlowCtl = UART_HWCONTROL_NONE;
  huart5.Init.OverSampling = UART_OVERSAMPLING_16;
  huart5.Init.OneBitSampling = UART_ONE_BIT_SAMPLE_DISABLE;
  huart5.Init.ClockPrescaler = UART_PRESCALER_DIV1;
  huart5.AdvancedInit.AdvFeatureInit = UART_ADVFEATURE_NO_INIT;
  if (HAL_UART_Init(&huart5) != HAL_OK)
  {
    Error_Handler();
  }
  if (HAL_UARTEx_SetTxFifoThreshold(&huart5, UART_TXFIFO_THRESHOLD_1_8) != HAL_OK)
  {
    Error_Handler();
  }
  if (HAL_UARTEx_SetRxFifoThreshold(&huart5, UART_RXFIFO_THRESHOLD_1_8) != HAL_OK)
  {
    Error_Handler();
  }
  if (HAL_UARTEx_DisableFifoMode(&huart5) != HAL_OK)
  {
    Error_Handler();
  }
  /* USER CODE BEGIN UART5_Init 2 */

  /* USER CODE END UART5_Init 2 */
}

 

stm32h7xx_hal_msp.c

 

else if(huart->Instance==UART5)
  {
  /* USER CODE BEGIN UART5_MspInit 0 */

  /* USER CODE END UART5_MspInit 0 */

  /** Initializes the peripherals clock
  */
    PeriphClkInitStruct.PeriphClockSelection = RCC_PERIPHCLK_UART5;
    PeriphClkInitStruct.Usart234578ClockSelection = RCC_USART234578CLKSOURCE_D2PCLK1;
    if (HAL_RCCEx_PeriphCLKConfig(&PeriphClkInitStruct) != HAL_OK)
    {
      Error_Handler();
    }

    /* Peripheral clock enable */
    __HAL_RCC_UART5_CLK_ENABLE();

    __HAL_RCC_GPIOD_CLK_ENABLE();
    __HAL_RCC_GPIOC_CLK_ENABLE();
    /**UART5 GPIO Configuration
    PD2     ------> UART5_RX
    PC12     ------> UART5_TX
    */
    GPIO_InitStruct.Pin = GPIO_PIN_2;
    GPIO_InitStruct.Mode = GPIO_MODE_AF_PP;
    GPIO_InitStruct.Pull = GPIO_NOPULL;
    GPIO_InitStruct.Speed = GPIO_SPEED_FREQ_VERY_HIGH;
    GPIO_InitStruct.Alternate = GPIO_AF8_UART5;
    HAL_GPIO_Init(GPIOD, &GPIO_InitStruct);

    GPIO_InitStruct.Pin = GPIO_PIN_12;
    GPIO_InitStruct.Mode = GPIO_MODE_AF_PP;
    GPIO_InitStruct.Pull = GPIO_NOPULL;
    GPIO_InitStruct.Speed = GPIO_SPEED_FREQ_VERY_HIGH;
    GPIO_InitStruct.Alternate = GPIO_AF8_UART5;
    HAL_GPIO_Init(GPIOC, &GPIO_InitStruct);

    /* UART5 DMA Init */
    /* UART5_RX Init */
    hdma_uart5_rx.Instance = DMA1_Stream2;
    hdma_uart5_rx.Init.Request = DMA_REQUEST_UART5_RX;
    hdma_uart5_rx.Init.Direction = DMA_PERIPH_TO_MEMORY;
    hdma_uart5_rx.Init.PeriphInc = DMA_PINC_DISABLE;
    hdma_uart5_rx.Init.MemInc = DMA_MINC_ENABLE;
    hdma_uart5_rx.Init.PeriphDataAlignment = DMA_PDATAALIGN_BYTE;
    hdma_uart5_rx.Init.MemDataAlignment = DMA_MDATAALIGN_BYTE;
    hdma_uart5_rx.Init.Mode = DMA_CIRCULAR;
    hdma_uart5_rx.Init.Priority = DMA_PRIORITY_LOW;
    hdma_uart5_rx.Init.FIFOMode = DMA_FIFOMODE_DISABLE;
    if (HAL_DMA_Init(&hdma_uart5_rx) != HAL_OK)
    {
      Error_Handler();
    }

    __HAL_LINKDMA(huart,hdmarx,hdma_uart5_rx);

    /* UART5_TX Init */
    hdma_uart5_tx.Instance = DMA1_Stream3;
    hdma_uart5_tx.Init.Request = DMA_REQUEST_UART5_TX;
    hdma_uart5_tx.Init.Direction = DMA_MEMORY_TO_PERIPH;
    hdma_uart5_tx.Init.PeriphInc = DMA_PINC_DISABLE;
    hdma_uart5_tx.Init.MemInc = DMA_MINC_ENABLE;
    hdma_uart5_tx.Init.PeriphDataAlignment = DMA_PDATAALIGN_BYTE;
    hdma_uart5_tx.Init.MemDataAlignment = DMA_MDATAALIGN_BYTE;
    hdma_uart5_tx.Init.Mode = DMA_NORMAL;
    hdma_uart5_tx.Init.Priority = DMA_PRIORITY_LOW;
    hdma_uart5_tx.Init.FIFOMode = DMA_FIFOMODE_DISABLE;
    if (HAL_DMA_Init(&hdma_uart5_tx) != HAL_OK)
    {
      Error_Handler();
    }

    __HAL_LINKDMA(huart,hdmatx,hdma_uart5_tx);

  /* USER CODE BEGIN UART5_MspInit 1 */

  /* USER CODE END UART5_MspInit 1 */
  }

 

 

I use the __HAL_DMA_GET_COUNTER(huart5->hdmarx) macro to check the position of the DMA data pointer, if the counter does not match the previous position, I copy the data into the receive buffer. The code works on several other UARTs for this project and other projects using different MCUs, regardless of whether or not it is being debugged.

 

Any help would be greatly appreciated, I have been struggling with this for weeks. Please let me know if I am missing any useful information. Thank you!

1 ACCEPTED SOLUTION

Accepted Solutions
angrypotato
Associate II

Fixed!

I added the UART5_IRQHandler.

stm32h7xx_hal_msp.c
/* UART5 interrupt Init */
HAL_NVIC_SetPriority(UART5_IRQn, 5, 0);
HAL_NVIC_EnableIRQ(UART5_IRQn);

stm32h7xx_it.c
/**
  * @brief This function handles UART5 global interrupt.
  */
void UART5_IRQHandler(void)
{
  /* USER CODE BEGIN UART5_IRQn 0 */

  /* USER CODE END UART5_IRQn 0 */
  HAL_UART_IRQHandler(&huart5);
  /* USER CODE BEGIN UART5_IRQn 1 */

  /* USER CODE END UART5_IRQn 1 */
}

 I'm still not sure why I need this for this UART, since it wasn't needed for any other UART on this project. But its working now! Thank you everyone for your help!

View solution in original post

7 REPLIES 7

>>Please let me know if I am missing any useful information.

Perhaps dump the UART and DMA registers in the failing states. Avoid the use of the debugger by outputting useful telemetry to another serial port so you can observe without interference. Watch Views over UART and other peripherals with FIFO's, etc can be very disruptive and the access of the registers has secondary effects.

You don't need to disable caching, but you do need to manage coherency and that data is flushed to memory. Have buffers on 32-byte boundaries so you can use SCB_CleanDCache_by_Addr() etc. with out issues with abutting data/structures.

Reception can fail if there are unaddressed sticky errors, ie parity, noise, framing, etc.

Make sure to explicitly clear all local/auto variables.

Tips, Buy me a coffee, or three.. PayPal Venmo
Up vote any posts that you find helpful, it shows what's working..
TDK
Guru

> The same problem happens on other DMA streams, but not other UARTs

So probably not a UART specific thing here, perhaps memory management.

Is data cache enabled? If so, disable it for debugging.

 

If you feel a post has answered your question, please click "Accept as Solution".

Thank you for your reply!

My buffers are already 32 bit aligned using gcc attributes, I also added a SCB_CleanDCache_by_Addr() to my ring buffer update loop before every __HAL_DMA_GET_COUNTER(huart5->hdmarx) call with no effect.

I also commented out the line that reads the SVD file in vscode, so I am not reading any of those registers in debug mode.

What sort of telemetry should I output from a spare UART? I read the peripheral registers using Ozone in the failed state, but nothing seemed odd to me.

How can I disable the data cache for debugging? Here is the code I am currently using for the cache setup.

boardInit(): (first function call from main)

extern uint32_t _nocache_dma_buffer_start, _nocache_dma_buffer_end; // defined in LD file

void MPU_Config(void)
{
  MPU_Region_InitTypeDef MPU_InitStruct = {0};

  /* Disables the MPU */
  HAL_MPU_Disable();

  /** Initializes and configures the Region and the memory to be protected */
  /* NOTE: 8-bit bitmap disable segments. See rm0433 2.3.1, pm0253 Table 89. MPU_RASR SRD and Subregions help.
   *    I think what this means is this...  0=>4G no-cache, no-access except in subregions 0,1,2,7 where
   *    the default configuration exists and what we're doing right here is "disabled" (SRD) */
  MPU_InitStruct.Enable = MPU_REGION_ENABLE;
  MPU_InitStruct.Number = MPU_REGION_NUMBER0;
  MPU_InitStruct.BaseAddress = 0x0;             /* 4GB starting from 0 (this is the complete address space) */
  MPU_InitStruct.Size = MPU_REGION_SIZE_4GB;
  MPU_InitStruct.SubRegionDisable = 0x87;
  MPU_InitStruct.TypeExtField = MPU_TEX_LEVEL0;
  MPU_InitStruct.AccessPermission = MPU_REGION_NO_ACCESS;
  MPU_InitStruct.DisableExec = MPU_INSTRUCTION_ACCESS_DISABLE;
  MPU_InitStruct.IsShareable = MPU_ACCESS_SHAREABLE;
  MPU_InitStruct.IsCacheable = MPU_ACCESS_NOT_CACHEABLE;
  MPU_InitStruct.IsBufferable = MPU_ACCESS_NOT_BUFFERABLE;
  HAL_MPU_ConfigRegion(&MPU_InitStruct);

  /** Setup DMA buffers for serial I/O */
  uint32_t dma_buf_len = ((uint32_t)&_nocache_dma_buffer_end - (uint32_t)&_nocache_dma_buffer_start);
  if (dma_buf_len > 8192) {
    Error_Handler();
  }
  MPU_InitStruct.Enable = MPU_REGION_ENABLE;
  MPU_InitStruct.Number = MPU_REGION_NUMBER1;
  MPU_InitStruct.BaseAddress = (uint32_t)&_nocache_dma_buffer_start;
  MPU_InitStruct.Size = MPU_REGION_SIZE_8KB;
  MPU_InitStruct.SubRegionDisable = 0x0;
  MPU_InitStruct.TypeExtField = MPU_TEX_LEVEL1;
  MPU_InitStruct.AccessPermission = MPU_REGION_FULL_ACCESS;
  MPU_InitStruct.DisableExec = MPU_INSTRUCTION_ACCESS_DISABLE;
  MPU_InitStruct.IsShareable = MPU_ACCESS_SHAREABLE;
  MPU_InitStruct.IsCacheable = MPU_ACCESS_NOT_CACHEABLE;
  MPU_InitStruct.IsBufferable = MPU_ACCESS_NOT_BUFFERABLE;
  HAL_MPU_ConfigRegion(&MPU_InitStruct);

  /* Enables the MPU */
  HAL_MPU_Enable(MPU_PRIVILEGED_DEFAULT);
}

int boardInit()
{
  for (int len = 0xFFFF; len--; ) {
    asm("nop");
  }
  MPU_Config();
  SCB_EnableICache();
  SCB_EnableDCache();

  HAL_Init();
  SystemClock_Config();

/* Configure the peripherals common clocks */
  PeriphCommonClock_Config();
  MX_GPIO_Init();
  MX_DMA_Init(); // Must be called in between MX_GPIO_Init() and the UART_Inits()!
  MX_UART4_Init();
  MX_USART1_UART_Init();
  MX_USART2_UART_Init();
  MX_USART6_UART_Init();
  MX_SPI4_Init();
  MX_UART7_Init();
  MX_I2C1_Init();
  MX_I2C2_SMBUS_Init();
  MX_ADC1_Init();
  MX_ADC2_Init();
  MX_SPI5_Init();
  MX_UART5_Init();
  MX_USART3_UART_Init();
  MX_RTC_Init();
  return 0;
}

LD file (relevant section on line 116):

/* Entry Point */
ENTRY(Reset_Handler)

/* Highest address of the user mode stack */
_estack = ORIGIN(RAM) + LENGTH(RAM);    /* end of RAM */
/* Generate a link error if heap and stack don't fit into RAM */
_Min_Heap_Size = 0x200;      /* required amount of heap  */
_Min_Stack_Size = 0x400; /* required amount of stack */

/* Specify the memory areas */
MEMORY
{
DTCMRAM (xrw)      : ORIGIN = 0x20000000, LENGTH = 128K
RAM (xrw)      : ORIGIN = 0x24000000, LENGTH = 1024K
ITCMRAM (xrw)      : ORIGIN = 0x00000000, LENGTH = 64K
FLASH (rx)      : ORIGIN = 0x8000000, LENGTH = 2048K
}

/* Define output sections */
SECTIONS
{
  /* The startup code goes first into FLASH */
  .isr_vector :
  {
    . = ALIGN(4);
    KEEP(*(.isr_vector)) /* Startup code */
    . = ALIGN(4);
  } >FLASH

  /* The program code and other data goes into FLASH */
  .text :
  {
    . = ALIGN(4);
    *(.text)           /* .text sections (code) */
    *(.text*)          /* .text* sections (code) */
    *(.glue_7)         /* glue arm to thumb code */
    *(.glue_7t)        /* glue thumb to arm code */
    *(.eh_frame)

    KEEP (*(.init))
    KEEP (*(.fini))

    . = ALIGN(4);
    _etext = .;        /* define a global symbols at end of code */
  } >FLASH

  /* Constant data goes into FLASH */
  .rodata :
  {
    . = ALIGN(4);
    *(.rodata)         /* .rodata sections (constants, strings, etc.) */
    *(.rodata*)        /* .rodata* sections (constants, strings, etc.) */
    . = ALIGN(4);
  } >FLASH

  .ARM.extab   : { *(.ARM.extab* .gnu.linkonce.armextab.*) } >FLASH
  .ARM : {
    __exidx_start = .;
    *(.ARM.exidx*)
    __exidx_end = .;
  } >FLASH

  .preinit_array     :
  {
    PROVIDE_HIDDEN (__preinit_array_start = .);
    KEEP (*(.preinit_array*))
    PROVIDE_HIDDEN (__preinit_array_end = .);
  } >FLASH
  .init_array :
  {
    PROVIDE_HIDDEN (__init_array_start = .);
    KEEP (*(SORT(.init_array.*)))
    KEEP (*(.init_array*))
    PROVIDE_HIDDEN (__init_array_end = .);
  } >FLASH
  .fini_array :
  {
    PROVIDE_HIDDEN (__fini_array_start = .);
    KEEP (*(SORT(.fini_array.*)))
    KEEP (*(.fini_array*))
    PROVIDE_HIDDEN (__fini_array_end = .);
  } >FLASH

  /* used by the startup to initialize data */
  _sidata = LOADADDR(.data);

  /* Initialized data sections goes into RAM, load LMA copy after code */
  .data :
  {
    . = ALIGN(4);
    _sdata = .;        /* create a global symbol at data start */
    *(.data)           /* .data sections */
    *(.data*)          /* .data* sections */

    . = ALIGN(4);
    _edata = .;        /* define a global symbol at data end */
  } >RAM AT> FLASH


  /* Uninitialized data section */
  . = ALIGN(4);
  .bss :
  {
    /* This is used by the startup in order to initialize the .bss secion */
    _sbss = .;         /* define a global symbol at bss start */
    __bss_start__ = _sbss;
    *(.bss)
    *(.bss*)
    *(COMMON)

    . = ALIGN(4);
    _ebss = .;         /* define a global symbol at bss end */
    __bss_end__ = _ebss;
  } >RAM

  .dma_buffers (NOLOAD) : {
    . = ALIGN(4K);            /* Align the beginning of this block to match the MPU configuration */
    _nocache_dma_buffer_start = .;

    /* uartioRecv Tx/Rx buffer for serial c2srv */
    _c2srv_buffer_start = .;
    *(.dma_buffers.c2srv)
    _c2srv_buffer_end = .;

    /* uartioRecv Ring 0 */
    . = ALIGN(32);
    _uartio_ring_start = .;
    // UART DMA buffers are created with this attribute section
    *(.dma_buffers.uartio)
    _uartio_ring_end = .;

    /* Testing buffer for rx/tx
    . = ALIGN(32);
    *(.dma_buffers_uartio_dbuf)
    . = ALIGN(32);
    */
    _nocache_dma_buffer_end = .;
    . = ALIGN(4K);

  } >RAM

  /* User_heap_stack section, used to check that there is enough RAM left */
  ._user_heap_stack :
  {
    . = ALIGN(8);
    PROVIDE ( end = . );
    PROVIDE ( _end = . );
    . = . + _Min_Heap_Size;
    . = . + _Min_Stack_Size;
    . = ALIGN(8);
  } >RAM



  /* Remove information from the standard libraries */
  /DISCARD/ :
  {
    libc.a ( * )
    libm.a ( * )
    libgcc.a ( * )
  }

  .ARM.attributes 0 : { *(.ARM.attributes) }
}

 

Comment out the SCB_EnableDCache line. Consider also commenting out MPU_Config to remove that from the equation.

 

If you feel a post has answered your question, please click "Accept as Solution".

>>My buffers are already 32 bit aligned

Cache lines are 32-BYTE wide / aligned

SCB_InvalidateDCache_by_Addr() and SCB_InvalidateDCache() need special care as writes and surrounding writes may never make it to memory.

Tips, Buy me a coffee, or three.. PayPal Venmo
Up vote any posts that you find helpful, it shows what's working..
angrypotato
Associate II

Fixed!

I added the UART5_IRQHandler.

stm32h7xx_hal_msp.c
/* UART5 interrupt Init */
HAL_NVIC_SetPriority(UART5_IRQn, 5, 0);
HAL_NVIC_EnableIRQ(UART5_IRQn);

stm32h7xx_it.c
/**
  * @brief This function handles UART5 global interrupt.
  */
void UART5_IRQHandler(void)
{
  /* USER CODE BEGIN UART5_IRQn 0 */

  /* USER CODE END UART5_IRQn 0 */
  HAL_UART_IRQHandler(&huart5);
  /* USER CODE BEGIN UART5_IRQn 1 */

  /* USER CODE END UART5_IRQn 1 */
}

 I'm still not sure why I need this for this UART, since it wasn't needed for any other UART on this project. But its working now! Thank you everyone for your help!