Skip to main content
AGime.1
Associate II
April 21, 2022
Question

Unable to Rx and Tx SPI with DMA

  • April 21, 2022
  • 2 replies
  • 1755 views

Hi.

I'm trying to implement a SPI slave device using a STM32F401 with DMA. I'm being able to do Rx without problems but once I do a Tx everything goes crazy: first the next receive operation reads the last transmitted byte and after that any transmit operation does not complete (does not trigger the TxCpl handler). But the thing goes a lot more crazy, if I set a breakpoint in the Tx interrupt handler I am able to send more data (but the trashed Rx problem persists) without being blocked... and if I inspect the SPI instance then everythin works, no trashed rx nor blocking...

If I change the Tx to use interrupts instead of DMA everything works as expected but I need the DMA as the target speed will be at least 15Mhz (I'm testing with 100Khz).

Also I noticed that if I set the DMA to use a circular buffer nothing gets transferred...

I have tried to enable/disable the SPI with the SPEN flag before any transfer, tried to read SR and DR but nothing works...

Any help would be much appreciated.

This is the SPI configuration:

static void MX_SPI1_Init(void)
{
 
 /* USER CODE BEGIN SPI1_Init 0 */
 
 /* USER CODE END SPI1_Init 0 */
 
 /* USER CODE BEGIN SPI1_Init 1 */
 
 /* USER CODE END SPI1_Init 1 */
 /* SPI1 parameter configuration*/
 hspi1.Instance = SPI1;
 hspi1.Init.Mode = SPI_MODE_SLAVE;
 hspi1.Init.Direction = SPI_DIRECTION_2LINES;
 hspi1.Init.DataSize = SPI_DATASIZE_8BIT;
 hspi1.Init.CLKPolarity = SPI_POLARITY_LOW;
 hspi1.Init.CLKPhase = SPI_PHASE_2EDGE;
 hspi1.Init.NSS = SPI_NSS_HARD_INPUT;
 hspi1.Init.FirstBit = SPI_FIRSTBIT_MSB;
 hspi1.Init.TIMode = SPI_TIMODE_DISABLE;
 hspi1.Init.CRCCalculation = SPI_CRCCALCULATION_DISABLE;
 hspi1.Init.CRCPolynomial = 10;
 if (HAL_SPI_Init(&hspi1) != HAL_OK)
 {
 Error_Handler();
 }
 /* USER CODE BEGIN SPI1_Init 2 */
 
 /* USER CODE END SPI1_Init 2 */
 
}
 
//------------------------------------------------------
 
void HAL_SPI_MspInit(SPI_HandleTypeDef* hspi)
{
 GPIO_InitTypeDef GPIO_InitStruct = {0};
 if(hspi->Instance==SPI1)
 {
 /* USER CODE BEGIN SPI1_MspInit 0 */
 
 /* USER CODE END SPI1_MspInit 0 */
 /* Peripheral clock enable */
 __HAL_RCC_SPI1_CLK_ENABLE();
 
 __HAL_RCC_GPIOA_CLK_ENABLE();
 /**SPI1 GPIO Configuration
 PA4 ------> SPI1_NSS
 PA5 ------> SPI1_SCK
 PA6 ------> SPI1_MISO
 PA7 ------> SPI1_MOSI
 */
 GPIO_InitStruct.Pin = GPIO_PIN_4|GPIO_PIN_5|GPIO_PIN_6|GPIO_PIN_7;
 GPIO_InitStruct.Mode = GPIO_MODE_AF_PP;
 GPIO_InitStruct.Pull = GPIO_NOPULL;
 GPIO_InitStruct.Speed = GPIO_SPEED_FREQ_VERY_HIGH;
 GPIO_InitStruct.Alternate = GPIO_AF5_SPI1;
 HAL_GPIO_Init(GPIOA, &GPIO_InitStruct);
 
 /* SPI1 DMA Init */
 /* SPI1_RX Init */
 hdma_spi1_rx.Instance = DMA2_Stream0;
 hdma_spi1_rx.Init.Channel = DMA_CHANNEL_3;
 hdma_spi1_rx.Init.Direction = DMA_PERIPH_TO_MEMORY;
 hdma_spi1_rx.Init.PeriphInc = DMA_PINC_DISABLE;
 hdma_spi1_rx.Init.MemInc = DMA_MINC_ENABLE;
 hdma_spi1_rx.Init.PeriphDataAlignment = DMA_PDATAALIGN_BYTE;
 hdma_spi1_rx.Init.MemDataAlignment = DMA_MDATAALIGN_BYTE;
 hdma_spi1_rx.Init.Mode = DMA_NORMAL;
 hdma_spi1_rx.Init.Priority = DMA_PRIORITY_LOW;
 hdma_spi1_rx.Init.FIFOMode = DMA_FIFOMODE_DISABLE;
 if (HAL_DMA_Init(&hdma_spi1_rx) != HAL_OK)
 {
 Error_Handler();
 }
 
 __HAL_LINKDMA(hspi,hdmarx,hdma_spi1_rx);
 
 /* SPI1_TX Init */
 hdma_spi1_tx.Instance = DMA2_Stream3;
 hdma_spi1_tx.Init.Channel = DMA_CHANNEL_3;
 hdma_spi1_tx.Init.Direction = DMA_MEMORY_TO_PERIPH;
 hdma_spi1_tx.Init.PeriphInc = DMA_PINC_DISABLE;
 hdma_spi1_tx.Init.MemInc = DMA_MINC_ENABLE;
 hdma_spi1_tx.Init.PeriphDataAlignment = DMA_PDATAALIGN_BYTE;
 hdma_spi1_tx.Init.MemDataAlignment = DMA_MDATAALIGN_BYTE;
 hdma_spi1_tx.Init.Mode = DMA_NORMAL;
 hdma_spi1_tx.Init.Priority = DMA_PRIORITY_HIGH;
 hdma_spi1_tx.Init.FIFOMode = DMA_FIFOMODE_DISABLE;
 if (HAL_DMA_Init(&hdma_spi1_tx) != HAL_OK)
 {
 Error_Handler();
 }
 
 __HAL_LINKDMA(hspi,hdmatx,hdma_spi1_tx);
 
 /* USER CODE BEGIN SPI1_MspInit 1 */
 
 /* USER CODE END SPI1_MspInit 1 */
 }
 
}

And the DMA channels:

static void MX_DMA_Init(void)
{
 
 /* DMA controller clock enable */
 __HAL_RCC_DMA2_CLK_ENABLE();
 
 /* DMA interrupt init */
 /* DMA2_Stream0_IRQn interrupt configuration */
 HAL_NVIC_SetPriority(DMA2_Stream0_IRQn, 0, 0);
 HAL_NVIC_EnableIRQ(DMA2_Stream0_IRQn);
 /* DMA2_Stream3_IRQn interrupt configuration */
 HAL_NVIC_SetPriority(DMA2_Stream3_IRQn, 0, 0);
 HAL_NVIC_EnableIRQ(DMA2_Stream3_IRQn);
 
}
 

To begin receiving data I call this function:

void PrepareTransferReceive()
{
	HAL_SPI_Receive_DMA(spi, (uint8_t*)&device->currentTransfer, 4);
}

And the transfers are initiated outside any interrupt calling this function (always initiated as a response to a Rx):

bool SendDeviceDescriptor()
{
	if(device->transferState != PendingAcknowledgement)
		return false;
 
	device->transferState = SendingTransfer;
 
	HAL_SPI_Transmit_DMA(spi, (uint8_t*)device, 4);
 
	RAISE_RDY();
 
	WaitForPendingAcknowledgement();
 
	if(device->capability == 0xFF)
	{
		device->transferState = SendingTransfer;
		//EnsureIdle();
		HAL_SPI_Transmit_DMA(spi, device->capabilityList, 4);
 
		RAISE_RDY();
 
		WaitForPendingAcknowledgement();
	}
 
	return true;
}
 
void WaitForPendingAcknowledgement()
{
	while(device->transferState != PendingAcknowledgement);
 
}

And finally these are the ISR's:

void HAL_SPI_RxHalfCpltCallback (SPI_HandleTypeDef * hspi)
{
	if(hspi == spi)
	{
		DROP_INT();
		DROP_RDY();
 
		if(device->transferState == Idle)
			device->transferState = ReceivingTransfer;
	}
}
 
void HAL_SPI_RxCpltCallback (SPI_HandleTypeDef * hspi)
{
	if(hspi == spi)
	{
		device->transferState = PendingAcknowledgement;
	}
}
 
void HAL_SPI_TxHalfCpltCallback (SPI_HandleTypeDef * hspi)
{
	if(hspi == spi)
	{
		DROP_INT();
		DROP_RDY();
	}
}
 
void HAL_SPI_TxCpltCallback (SPI_HandleTypeDef * hspi)
{
	if(hspi == spi)
	{
		device->transferState = PendingAcknowledgement;
	}
}

Cheers.

This topic has been closed for replies.

2 replies

TDK
Super User
April 21, 2022

> DROP_RDY();

At 15 MHz and a 4 byte transfer size, it's possible this isn't executed until after the entire transfer is complete.

Can you look on a scope and verify what's going on?

> first the next receive operation reads the last transmitted byte and after that any transmit operation does not complete

Certainly suggests the master and slave are out of sync.

"If you feel a post has answered your question, please click ""Accept as Solution""."
AGime.1
AGime.1Author
Associate II
April 21, 2022

Hi.

I want to reach 15Mhz, but as I stated for the development I'm using only 100Khz to avoid any problem related to speed.

And yes, I checked the signals with a scope, I can see how the data is correctly transferred and then the DMA populates the first byte of the buffer with the last value sent (and if I check with the debugger the DR register from the SPI port I can see that value there before starting the DMA read).

Also I tried to add a cleanup routine before receiving by testing TXE and RXNE and is very strange, if I place a breakpoint inside the check of RXNE it does not hit it, the code does not execute, but if I place the breakpoint before the check, use the debugger to read the values of the SPI device and then continue the execution it finds the RXNE flag set, clears the DR register and then the next read works correctly.

I am new to the STM platform so I'm a bit lost, but I would think this is some kind of problem with caching and when the debugger reads the registers the cache is invalidated, but I have no idea if there is such kind of caching mechanism in the F401, I know there is ART but as far as I know it only affects flash reads so I have no idea on what to do...

Tesla DeLorean
Guru
April 21, 2022

Not caching, but understand peripheral registers look at combinatorial and synchronous logic, not "memory"

Don't be staring at a Peripheral Register View in the debugger. It doesn't have magic access to the registers, reading the DR to show it to you will clear/change SR states. Similarly FIFO structures. So USB, SDMMC, QSPI, etc

Assume always that the debugger is invasive, if you want to know internal states/flow, instrument your code so that it can report these to you.

Tips, Buy me a coffee, or three.. PayPal VenmoUp vote any posts that you find helpful, it shows what's working..
Piranha
Principal III
April 21, 2022

Is device->transferState defined as volatile? (it must be) And you are modifying it from both interrupt and non-interrupt code - are you sure there are no race conditions?

AGime.1
AGime.1Author
Associate II
April 21, 2022

This is the definition of the structure (yes, it's volatile).

typedef struct eZXDevice
{
	unsigned short vendorId;
	eZXDeviceSpeed speed;
	unsigned char capability;
	unsigned char capabilityList[4];
	volatile eZXDeviceTransferState transferState;
	volatile eZXDeviceTransfer currentTransfer;
 
} eZXDevice;

And I'm sure this is not a race condition, code that modifies the status outside the interrupts expects a concrete status which ensures that no transfer is in progress so no interrupt that modifies the status can happen at the same time.