I wrote some code for an SPI-slave that communicates with a transmission size of 4 bytes, plus 1 byte of CRC. Since the SPI-module natively only supports 1 and 2-byte transfers, the DMA controller had to be used. This was not an easy task and there are bugs in the HAL-source code. Hopefully someone will benefit from this post and won't have to go through what I had to go through. 

The code below take errors into account and successfully recovers from them. The following errors can occur:
1. Too few bits are received. In this case the DMA receive stream and the SPI module never generate any interrupts and that's why the sanity counter is used as a timeout.
2. Too many bits are received. This is not treated as an error as long as the CRC matches for the first 40 bits. If the CRC doesn't match for the first 40 bits, then it's treated like a CRC error.
3. CRC error: In this case the DMA transmit stream first generates a transfer complete interrupt, then the SPI-module generates a CRC error interrupt and then the DMA receive stream generates a transmit complete interrupt. 

In the normal case, without any errors, you don't get any SPI interrupt. So, the only interrupt you receive under all circulstances is the transmit complete interrupt from the DMA transmit stream and that's why that is the only interrupt that's enabled.

#define SPI2_OK                                        0x00
#define SPI2_CRC_ERROR                                 0x01
#define SPI2_MODE_FAULT                                0x02
#define SPI2_OVERRUN_ERROR                             0x03
#define SPI2_TI_FRAME_FORMAT_ERROR                     0x04
#define RECEIVE_DMA_TRANSFER_ERROR                     0x05
#define RECEIVE_DMA_TRANSFER_FIFO_ERROR                0x06
#define RECEIVE_DMA_TRANSFER_DIRECT_MODE_ERROR         0x07
#define TRANSMIT_DMA_TRANSFER_ERROR                    0x08
#define TRANSMIT_DMA_TRANSFER_FIFO_ERROR               0x09
#define TRANSMIT_DMA_TRANSFER_DIRECT_MODE_ERROR        0x0A
#define TOO_FEW_BITS_RECEIVED_ERROR                    0x0B
 
void initSpi2(void);
 
// 1st parameter in callback: Status (see defines above).
// 2nd parameter in callback: The received data, if status = SPI2_OK
void prepareTransferSpi2(uint32_t wordToTransmit, EventCallback_t transferCompletedCallback);

#define CRITICAL_ERROR(arg)   arg
 
//#define DEBUG_CODE(arg)   arg
#define DEBUG_CODE(arg)
  
#define SPI2_MISO_PORT                        GPIOC
#define SPI2_MISO_PIN                         GPIO_PIN_2
#define SPI2_MOSI_PORT                        GPIOC
#define SPI2_MOSI_PIN                         GPIO_PIN_3
#define SPI2_CLK_PORT                         GPIOI
#define SPI2_CLK_PIN                          GPIO_PIN_1
#define SPI2_NSS_PORT                         GPIOB
#define SPI2_NSS_PIN                          GPIO_PIN_9
 
 
#define RECEIVE_SIZE 5
#define TRANSMIT_SIZE 4
static uint8_t receiveDMAbuffer[5];
static uint8_t transmitDMAbuffer[TRANSMIT_SIZE + 1];  // Allocate an extra byte to be safe (who knows
                                                      // where the DMA controller puts the CRC-byte)
static uint32_t sanityCheckLimit;
static Bool_t Stream3TxRxCpltReceived;
static volatile EventCallback_t transferCompletedCallback_ = (EventCallback_t)NULL;
 
 
#define __DMA_GET_DME_FLAG_INDEX(__DMA_STREAM__)\
(((uint32_t)(__DMA_STREAM__) == ((uint32_t)DMA1_Stream0))? DMA_FLAG_DMEIF0_4 :\
 ((uint32_t)(__DMA_STREAM__) == ((uint32_t)DMA2_Stream0))? DMA_FLAG_DMEIF0_4 :\
 ((uint32_t)(__DMA_STREAM__) == ((uint32_t)DMA1_Stream4))? DMA_FLAG_DMEIF0_4 :\
 ((uint32_t)(__DMA_STREAM__) == ((uint32_t)DMA2_Stream4))? DMA_FLAG_DMEIF0_4 :\
 ((uint32_t)(__DMA_STREAM__) == ((uint32_t)DMA1_Stream1))? DMA_FLAG_DMEIF1_5 :\
 ((uint32_t)(__DMA_STREAM__) == ((uint32_t)DMA2_Stream1))? DMA_FLAG_DMEIF1_5 :\
 ((uint32_t)(__DMA_STREAM__) == ((uint32_t)DMA1_Stream5))? DMA_FLAG_DMEIF1_5 :\
 ((uint32_t)(__DMA_STREAM__) == ((uint32_t)DMA2_Stream5))? DMA_FLAG_DMEIF1_5 :\
 ((uint32_t)(__DMA_STREAM__) == ((uint32_t)DMA1_Stream2))? DMA_FLAG_DMEIF2_6 :\
 ((uint32_t)(__DMA_STREAM__) == ((uint32_t)DMA2_Stream2))? DMA_FLAG_DMEIF2_6 :\
 ((uint32_t)(__DMA_STREAM__) == ((uint32_t)DMA1_Stream6))? DMA_FLAG_DMEIF2_6 :\
 ((uint32_t)(__DMA_STREAM__) == ((uint32_t)DMA2_Stream6))? DMA_FLAG_DMEIF2_6 :\
   DMA_FLAG_DMEIF3_7)
 
#define __DMA_GET_TC_FLAG_INDEX(__DMA_STREAM__) \
(((uint32_t)(__DMA_STREAM__) == ((uint32_t)DMA1_Stream0))? DMA_FLAG_TCIF0_4 :\
 ((uint32_t)(__DMA_STREAM__) == ((uint32_t)DMA2_Stream0))? DMA_FLAG_TCIF0_4 :\
 ((uint32_t)(__DMA_STREAM__) == ((uint32_t)DMA1_Stream4))? DMA_FLAG_TCIF0_4 :\
 ((uint32_t)(__DMA_STREAM__) == ((uint32_t)DMA2_Stream4))? DMA_FLAG_TCIF0_4 :\
 ((uint32_t)(__DMA_STREAM__) == ((uint32_t)DMA1_Stream1))? DMA_FLAG_TCIF1_5 :\
 ((uint32_t)(__DMA_STREAM__) == ((uint32_t)DMA2_Stream1))? DMA_FLAG_TCIF1_5 :\
 ((uint32_t)(__DMA_STREAM__) == ((uint32_t)DMA1_Stream5))? DMA_FLAG_TCIF1_5 :\
 ((uint32_t)(__DMA_STREAM__) == ((uint32_t)DMA2_Stream5))? DMA_FLAG_TCIF1_5 :\
 ((uint32_t)(__DMA_STREAM__) == ((uint32_t)DMA1_Stream2))? DMA_FLAG_TCIF2_6 :\
 ((uint32_t)(__DMA_STREAM__) == ((uint32_t)DMA2_Stream2))? DMA_FLAG_TCIF2_6 :\
 ((uint32_t)(__DMA_STREAM__) == ((uint32_t)DMA1_Stream6))? DMA_FLAG_TCIF2_6 :\
 ((uint32_t)(__DMA_STREAM__) == ((uint32_t)DMA2_Stream6))? DMA_FLAG_TCIF2_6 :\
   DMA_FLAG_TCIF3_7)
 
#define __DMA_GET_TE_FLAG_INDEX(__DMA_STREAM__)\
(((uint32_t)(__DMA_STREAM__) == ((uint32_t)DMA1_Stream0))? DMA_FLAG_TEIF0_4 :\
 ((uint32_t)(__DMA_STREAM__) == ((uint32_t)DMA2_Stream0))? DMA_FLAG_TEIF0_4 :\
 ((uint32_t)(__DMA_STREAM__) == ((uint32_t)DMA1_Stream4))? DMA_FLAG_TEIF0_4 :\
 ((uint32_t)(__DMA_STREAM__) == ((uint32_t)DMA2_Stream4))? DMA_FLAG_TEIF0_4 :\
 ((uint32_t)(__DMA_STREAM__) == ((uint32_t)DMA1_Stream1))? DMA_FLAG_TEIF1_5 :\
 ((uint32_t)(__DMA_STREAM__) == ((uint32_t)DMA2_Stream1))? DMA_FLAG_TEIF1_5 :\
 ((uint32_t)(__DMA_STREAM__) == ((uint32_t)DMA1_Stream5))? DMA_FLAG_TEIF1_5 :\
 ((uint32_t)(__DMA_STREAM__) == ((uint32_t)DMA2_Stream5))? DMA_FLAG_TEIF1_5 :\
 ((uint32_t)(__DMA_STREAM__) == ((uint32_t)DMA1_Stream2))? DMA_FLAG_TEIF2_6 :\
 ((uint32_t)(__DMA_STREAM__) == ((uint32_t)DMA2_Stream2))? DMA_FLAG_TEIF2_6 :\
 ((uint32_t)(__DMA_STREAM__) == ((uint32_t)DMA1_Stream6))? DMA_FLAG_TEIF2_6 :\
 ((uint32_t)(__DMA_STREAM__) == ((uint32_t)DMA2_Stream6))? DMA_FLAG_TEIF2_6 :\
   DMA_FLAG_TEIF3_7)
 
#define __DMA_GET_FE_FLAG_INDEX(__DMA_STREAM__)\
(((uint32_t)(__DMA_STREAM__) == ((uint32_t)DMA1_Stream0))? DMA_FLAG_FEIF0_4 :\
 ((uint32_t)(__DMA_STREAM__) == ((uint32_t)DMA2_Stream0))? DMA_FLAG_FEIF0_4 :\
 ((uint32_t)(__DMA_STREAM__) == ((uint32_t)DMA1_Stream4))? DMA_FLAG_FEIF0_4 :\
 ((uint32_t)(__DMA_STREAM__) == ((uint32_t)DMA2_Stream4))? DMA_FLAG_FEIF0_4 :\
 ((uint32_t)(__DMA_STREAM__) == ((uint32_t)DMA1_Stream1))? DMA_FLAG_FEIF1_5 :\
 ((uint32_t)(__DMA_STREAM__) == ((uint32_t)DMA2_Stream1))? DMA_FLAG_FEIF1_5 :\
 ((uint32_t)(__DMA_STREAM__) == ((uint32_t)DMA1_Stream5))? DMA_FLAG_FEIF1_5 :\
 ((uint32_t)(__DMA_STREAM__) == ((uint32_t)DMA2_Stream5))? DMA_FLAG_FEIF1_5 :\
 ((uint32_t)(__DMA_STREAM__) == ((uint32_t)DMA1_Stream2))? DMA_FLAG_FEIF2_6 :\
 ((uint32_t)(__DMA_STREAM__) == ((uint32_t)DMA2_Stream2))? DMA_FLAG_FEIF2_6 :\
 ((uint32_t)(__DMA_STREAM__) == ((uint32_t)DMA1_Stream6))? DMA_FLAG_FEIF2_6 :\
 ((uint32_t)(__DMA_STREAM__) == ((uint32_t)DMA2_Stream6))? DMA_FLAG_FEIF2_6 :\
   DMA_FLAG_FEIF3_7)
 
#define __DMA_GET_HT_FLAG_INDEX(__DMA_STREAM__)\
(((uint32_t)(__DMA_STREAM__) == ((uint32_t)DMA1_Stream0))? DMA_FLAG_HTIF0_4 :\
 ((uint32_t)(__DMA_STREAM__) == ((uint32_t)DMA2_Stream0))? DMA_FLAG_HTIF0_4 :\
 ((uint32_t)(__DMA_STREAM__) == ((uint32_t)DMA1_Stream4))? DMA_FLAG_HTIF0_4 :\
 ((uint32_t)(__DMA_STREAM__) == ((uint32_t)DMA2_Stream4))? DMA_FLAG_HTIF0_4 :\
 ((uint32_t)(__DMA_STREAM__) == ((uint32_t)DMA1_Stream1))? DMA_FLAG_HTIF1_5 :\
 ((uint32_t)(__DMA_STREAM__) == ((uint32_t)DMA2_Stream1))? DMA_FLAG_HTIF1_5 :\
 ((uint32_t)(__DMA_STREAM__) == ((uint32_t)DMA1_Stream5))? DMA_FLAG_HTIF1_5 :\
 ((uint32_t)(__DMA_STREAM__) == ((uint32_t)DMA2_Stream5))? DMA_FLAG_HTIF1_5 :\
 ((uint32_t)(__DMA_STREAM__) == ((uint32_t)DMA1_Stream2))? DMA_FLAG_HTIF2_6 :\
 ((uint32_t)(__DMA_STREAM__) == ((uint32_t)DMA2_Stream2))? DMA_FLAG_HTIF2_6 :\
 ((uint32_t)(__DMA_STREAM__) == ((uint32_t)DMA1_Stream6))? DMA_FLAG_HTIF2_6 :\
 ((uint32_t)(__DMA_STREAM__) == ((uint32_t)DMA2_Stream6))? DMA_FLAG_HTIF2_6 :\
   DMA_FLAG_HTIF3_7)
 
#define __DMA_CLEAR_FLAG(__DMA_STREAM__, __FLAG__) \
(((uint32_t)(__DMA_STREAM__) > (uint32_t)DMA2_Stream3)? (DMA2->HIFCR = (__FLAG__)) :\
 ((uint32_t)(__DMA_STREAM__) > (uint32_t)DMA1_Stream7)? (DMA2->LIFCR = (__FLAG__)) :\
 ((uint32_t)(__DMA_STREAM__) > (uint32_t)DMA1_Stream3)? (DMA1->HIFCR = (__FLAG__)) : (DMA1->LIFCR = (__FLAG__)))
 
#define __DMA_ENABLE_IT(__DMA_STREAM__, __INTERRUPT__)   (((__INTERRUPT__) != DMA_IT_FE)? \
((__DMA_STREAM__)->CR |= (__INTERRUPT__)) : ((__DMA_STREAM__)->FCR |= (__INTERRUPT__)))
 
#define __DMA_GET_FLAG(__DMA_STREAM__, __FLAG__)\
(((uint32_t)(__DMA_STREAM__) > (uint32_t)DMA2_Stream3)? (DMA2->HISR & (__FLAG__)) :\
 ((uint32_t)(__DMA_STREAM__) > (uint32_t)DMA1_Stream7)? (DMA2->LISR & (__FLAG__)) :\
 ((uint32_t)(__DMA_STREAM__) > (uint32_t)DMA1_Stream3)? (DMA1->HISR & (__FLAG__)) : (DMA1->LISR & (__FLAG__)))
 
#define __DMA_DISABLE_IT(__DMA_STREAM__, __INTERRUPT__)  (((__INTERRUPT__) != DMA_IT_FE)? \
((__DMA_STREAM__)->CR &= ~(__INTERRUPT__)) : ((__DMA_STREAM__)->FCR &= ~(__INTERRUPT__)))
 
#define __DMA_GET_IT_SOURCE(__DMA_STREAM__, __INTERRUPT__)  (((__INTERRUPT__) != DMA_IT_FE)? \
                                                        ((__DMA_STREAM__)->CR & (__INTERRUPT__)) : \
                                                        ((__DMA_STREAM__)->FCR & (__INTERRUPT__)))
 
#define __SPI_GET_FLAG(__SPI_MODULE__, __FLAG__) ((((__SPI_MODULE__)->SR) & (__FLAG__)) == (__FLAG__))
#define __SPI_GET_IT_SOURCE(__SPI_MODULE__, __INTERRUPT__) ((((__SPI_MODULE__)->CR2 & (__INTERRUPT__)) == (__INTERRUPT__)) ? 1 : 0)
#define __SPI_CLEAR_CRCERRFLAG(__SPI_MODULE__) ((__SPI_MODULE__)->SR = ~(SPI_FLAG_CRCERR))
 
#define __SPI_CLEAR_MODFFLAG(__SPI_MODULE__)            \
  do{                                                   \
    __IO uint32_t tmpreg;                               \
    tmpreg = (__SPI_MODULE__)->SR;                      \
    (__SPI_MODULE__)->CR1 &= (~SPI_CR1_SPE);            \
    UNUSED(tmpreg);                                     \
  } while(0)
 
#define __SPI_CLEAR_OVRFLAG(__SPI_MODULE__)       \
  do{                                             \
    __IO uint32_t tmpreg;                         \
    tmpreg = (__SPI_MODULE__)->DR;                \
    tmpreg = (__SPI_MODULE__)->SR;                \
    UNUSED(tmpreg);                               \
  } while(0)
 
 
#define __SPI_CLEAR_FREFLAG(__SPI_MODULE__)     \
  do{                                           \
  __IO uint32_t tmpreg;                         \
  tmpreg = (__SPI_MODULE__)->SR;                \
  UNUSED(tmpreg);                               \
  } while(0)
 
 
void initSpi2(void)
{
    GPIO_InitTypeDef GPIO_InitStructure;
 
    /***************************************************
    *****************  SPI MISO  ***********************
    ***************************************************/
    GPIO_InitStructure.Pin       = SPI2_MISO_PIN;
    GPIO_InitStructure.Mode      = GPIO_MODE_AF_PP;
    GPIO_InitStructure.Speed     = GPIO_SPEED_FAST;
    GPIO_InitStructure.Pull      = GPIO_NOPULL;
    GPIO_InitStructure.Alternate = GPIO_AF5_SPI2;
    HAL_GPIO_Init(SPI2_MISO_PORT, &GPIO_InitStructure);
 
    /***************************************************
    *****************  SPI MOSI  ***********************
    ***************************************************/
    GPIO_InitStructure.Pin       = SPI2_MOSI_PIN;
    GPIO_InitStructure.Mode      = GPIO_MODE_AF_OD;
    GPIO_InitStructure.Speed     = GPIO_SPEED_FAST;
    GPIO_InitStructure.Pull      = GPIO_NOPULL;
    GPIO_InitStructure.Alternate = GPIO_AF5_SPI2;
    HAL_GPIO_Init(SPI2_MOSI_PORT, &GPIO_InitStructure);
 
    /***************************************************
    *****************  SPI CLK  ************************
    ***************************************************/
    GPIO_InitStructure.Pin       = SPI2_CLK_PIN;
    GPIO_InitStructure.Mode      = GPIO_MODE_AF_OD;
    GPIO_InitStructure.Speed     = GPIO_SPEED_FAST;
    GPIO_InitStructure.Pull      = GPIO_NOPULL;
    GPIO_InitStructure.Alternate = GPIO_AF5_SPI2;
    HAL_GPIO_Init(SPI2_CLK_PORT, &GPIO_InitStructure);
 
    /***************************************************
    *****************  SPI NSS  ************************
    ***************************************************/
    GPIO_InitStructure.Pin       = SPI2_NSS_PIN;
    GPIO_InitStructure.Mode      = GPIO_MODE_AF_OD;
    GPIO_InitStructure.Speed     = GPIO_SPEED_FAST;
    GPIO_InitStructure.Pull      = GPIO_NOPULL;
    GPIO_InitStructure.Alternate = GPIO_AF5_SPI2;
    HAL_GPIO_Init(SPI2_NSS_PORT, &GPIO_InitStructure);
 
    sanityCheckLimit = (22 * SystemCoreClock) / (100000);
}
 
 
void prepareTransferSpi2(uint32_t wordToTransmit, EventCallback_t transferCompletedCallback)
{
    uint32_t tmp;
    transferCompletedCallback_ = transferCompletedCallback;
    Stream3TxRxCpltReceived = FALSE;
    uint16_t i;
    for (i = 0; i < TRANSMIT_SIZE; i++)
    {
        transmitDMAbuffer[TRANSMIT_SIZE - i - 1] = (uint8_t)(wordToTransmit >> (i << 3));
    }
 
    __DMA1_CLK_ENABLE();
    __SPI2_CLK_ENABLE();
    HAL_NVIC_DisableIRQ(DMA1_Stream3_IRQn);
    HAL_NVIC_DisableIRQ(DMA1_Stream4_IRQn);
    HAL_NVIC_DisableIRQ(SPI2_IRQn);
    __DMA1_FORCE_RESET();
    __DMA1_RELEASE_RESET();
    __SPI2_FORCE_RESET();
    __SPI2_RELEASE_RESET();
 
    tmp = DMA1_Stream3->CR;                                // Get the CR register value
 
    // Clear CHSEL, MBURST, PBURST, PL, MSIZE, PSIZE, MINC, PINC, CIRC, DIR, CT and DBM bits
    tmp &= ((uint32_t)~(DMA_SxCR_CHSEL | DMA_SxCR_MBURST | DMA_SxCR_PBURST | DMA_SxCR_PL    |
                        DMA_SxCR_MSIZE | DMA_SxCR_PSIZE  | DMA_SxCR_MINC   | DMA_SxCR_PINC  |
                        DMA_SxCR_CIRC  | DMA_SxCR_DIR    | DMA_SxCR_CT     | DMA_SxCR_DBM));
 
    // Prepare the DMA Stream configuration
    tmp |=  DMA_CHANNEL_0       | DMA_PERIPH_TO_MEMORY | DMA_PINC_DISABLE | DMA_MINC_ENABLE |
            DMA_PDATAALIGN_BYTE | DMA_MDATAALIGN_BYTE  | DMA_CIRCULAR     | DMA_PRIORITY_LOW;
 
    DMA1_Stream3->CR = tmp;                                // Write to DMA Stream CR register
    tmp = DMA1_Stream3->FCR;                               // Get the FCR register value
    tmp &= (uint32_t)~(DMA_SxFCR_DMDIS | DMA_SxFCR_FTH);   // Clear Direct mode and FIFO threshold bits
    tmp |= DMA_FIFOMODE_DISABLE;                           // Prepare the DMA Stream FIFO configuration
    DMA1_Stream3->FCR = tmp;                               // Write to DMA Stream FCR
 
 
    tmp = DMA1_Stream4->CR;                                // Get the CR register value
 
    // Clear CHSEL, MBURST, PBURST, PL, MSIZE, PSIZE, MINC, PINC, CIRC, DIR, CT and DBM bits
    tmp &= ((uint32_t)~(DMA_SxCR_CHSEL | DMA_SxCR_MBURST | DMA_SxCR_PBURST | DMA_SxCR_PL    |
                        DMA_SxCR_MSIZE | DMA_SxCR_PSIZE  | DMA_SxCR_MINC   | DMA_SxCR_PINC  |
                        DMA_SxCR_CIRC  |  DMA_SxCR_DIR   | DMA_SxCR_CT     | DMA_SxCR_DBM));
 
    // Prepare the DMA Stream configuration
    tmp |=  DMA_CHANNEL_0       | DMA_MEMORY_TO_PERIPH | DMA_PINC_DISABLE | DMA_MINC_ENABLE  |
            DMA_PDATAALIGN_BYTE | DMA_MDATAALIGN_BYTE  | DMA_NORMAL       | DMA_PRIORITY_LOW;
 
    DMA1_Stream4->CR = tmp;                                // Write to DMA Stream CR register
    tmp = DMA1_Stream4->FCR;                               // Get the FCR register value
    tmp &= (uint32_t)~(DMA_SxFCR_DMDIS | DMA_SxFCR_FTH);   // Clear Direct mode and FIFO threshold bits
    tmp |= DMA_FIFOMODE_DISABLE;                           // Prepare the DMA Stream FIFO configuration
    DMA1_Stream4->FCR = tmp;                               // Write to DMA Stream FCR
 
 
    DMA1_Stream3->NDTR = RECEIVE_SIZE;                     // Configure DMA Stream data length
    DMA1_Stream3->PAR  = (uint32_t)&SPI2->DR;              // Configure DMA Stream source address
    DMA1_Stream3->M0AR = (uint32_t)receiveDMAbuffer;       // Configure DMA Stream destination address
    DMA1_Stream4->NDTR = TRANSMIT_SIZE;                    // Configure DMA Stream data length
    DMA1_Stream4->PAR  = (uint32_t)&SPI2->DR;              // Configure DMA Stream destination address
    DMA1_Stream4->M0AR = (uint32_t)transmitDMAbuffer;      // Configure DMA Stream source address
 
    //----------------------- SPIx CR1 & CR2 Configuration ---------------------
    // Configure : SPI Mode, Communication Mode, Data size, Clock polarity and phase, NSS management,
    // Communication speed, First bit and CRC calculation state
    SPI2->CR1 = (SPI_MODE_SLAVE  | SPI_DIRECTION_2LINES               | SPI_DATASIZE_8BIT |  SPI_POLARITY_HIGH  |
                 SPI_PHASE_1EDGE | (SPI_NSS_HARD_INPUT & SPI_CR1_SSM) | SPI_FIRSTBIT_MSB  |  SPI_CRCCALCULATION_ENABLED);
 
    // Configure : NSS management
    SPI2->CR2 = (((SPI_NSS_HARD_INPUT >> 16) & SPI_CR2_SSOE) | SPI_TIMODE_DISABLED);
    SPI2->CRCPR = 7;                                       // Configure : CRC Polynomial
 
    // Activate the SPI mode (Make sure that I2SMOD bit in I2SCFGR register is reset)
    SPI2->I2SCFGR &= (uint32_t)(~SPI_I2SCFGR_I2SMOD);
 
    HAL_NVIC_EnableIRQ(DMA1_Stream4_IRQn);
    __DMA_ENABLE_IT(DMA1_Stream4, DMA_IT_TC);              // Enable the transfer complete interrupt
    __DMA_ENABLE_IT(DMA1_Stream4, DMA_IT_TE);              // Enable the transfer Error interrupt
    __DMA_ENABLE_IT(DMA1_Stream4, DMA_IT_FE);              // Enable the FIFO Error interrupt
    __DMA_ENABLE_IT(DMA1_Stream4, DMA_IT_DME);             // Enable the direct mode Error interrupt
 
    DMA1_Stream3->CR |= DMA_SxCR_EN;                       // Enable the Peripheral
    DMA1_Stream4->CR |= DMA_SxCR_EN;                       // Enable the Peripheral
    SPI2->CR1 |= SPI_CR1_SPE;                              // Enable SPI peripheral
    SPI2->CR2 |= SPI_CR2_RXDMAEN;                          // Enable Rx DMA Request
    SPI2->CR2 |= SPI_CR2_TXDMAEN;                          // Enable Tx DMA Request
}
 
 
static void shutDownSpi2(void)
{
    DEBUG_CODE( UART1_TransmitNullTerminatedString((uint8_t*)"\r\nshutDownSpi2 beginning");     )
    HAL_NVIC_DisableIRQ(DMA1_Stream3_IRQn);
    HAL_NVIC_DisableIRQ(DMA1_Stream4_IRQn);
    HAL_NVIC_DisableIRQ(SPI2_IRQn);
    __SPI2_FORCE_RESET();
    __SPI2_RELEASE_RESET();
    __DMA1_FORCE_RESET();
    __DMA1_RELEASE_RESET();
    __DMA1_CLK_DISABLE();
    __SPI2_CLK_DISABLE();
    DEBUG_CODE( UART1_TransmitNullTerminatedString((uint8_t*)"\r\nshutDownSpi2 end");     )
}
 
 
void DMA1_Stream3_IRQHandler_(void)
{
    DEBUG_CODE( UART1_TransmitNullTerminatedString((uint8_t*)"\r\nDMA1_Stream3_IRQHandler_ beginning");     )
 
    // Transfer Error Interrupt management
    if(__DMA_GET_FLAG(DMA1_Stream3, __DMA_GET_TE_FLAG_INDEX(DMA1_Stream3)) != RESET)
    {
        __DMA_DISABLE_IT(DMA1_Stream3, DMA_IT_TE);                             // Disable the transfer error interrupt
        __DMA_CLEAR_FLAG(DMA1_Stream3, __DMA_GET_TE_FLAG_INDEX(DMA1_Stream3)); // Clear the transfer error flag
 
        // Transfer error callback
        CRITICAL_ERROR( UART1_TransmitNullTerminatedString((uint8_t*)"\r\nDMA1_Stream3_IRQHandler Transfer Error");  )
        CRITICAL_ERROR( UART1_WaitForTransmitToFinish();                                                             )
        shutDownSpi2();
        if (transferCompletedCallback_ != ((EventCallback_t)NULL))
        {
            (void)transferCompletedCallback_(RECEIVE_DMA_TRANSFER_ERROR, 0);
        }
    }
 
 
    // FIFO Error Interrupt management
    if(__DMA_GET_FLAG(DMA1_Stream3, __DMA_GET_FE_FLAG_INDEX(DMA1_Stream3)) != RESET)
    {
        __DMA_DISABLE_IT(DMA1_Stream3, DMA_IT_FE);                              // Disable the FIFO Error interrupt
        __DMA_CLEAR_FLAG(DMA1_Stream3, __DMA_GET_FE_FLAG_INDEX(DMA1_Stream3));  // Clear the FIFO error flag
 
        // Transfer error callback
        CRITICAL_ERROR( UART1_TransmitNullTerminatedString((uint8_t*)"\r\nDMA1_Stream3_IRQHandler FIFO Error");      )
        CRITICAL_ERROR( UART1_WaitForTransmitToFinish();                                                             )
        shutDownSpi2();
        if (transferCompletedCallback_ != ((EventCallback_t)NULL))
        {
            (void)transferCompletedCallback_(RECEIVE_DMA_TRANSFER_FIFO_ERROR, 0);
        }
    }
 
 
    // Direct Mode Error Interrupt management
    if(__DMA_GET_FLAG(DMA1_Stream3, __DMA_GET_DME_FLAG_INDEX(DMA1_Stream3)) != RESET)
    {
        __DMA_DISABLE_IT(DMA1_Stream3, DMA_IT_DME);                              // Disable the direct mode Error interrupt
        __DMA_CLEAR_FLAG(DMA1_Stream3, __DMA_GET_DME_FLAG_INDEX(DMA1_Stream3));  // Clear the direct mode error flag
 
        // Transfer error callback
        CRITICAL_ERROR( UART1_TransmitNullTerminatedString((uint8_t*)"\r\nDMA1_Stream3_IRQHandler Direct Mode Error"); )
        CRITICAL_ERROR( UART1_WaitForTransmitToFinish();                                                               )
        shutDownSpi2();
        if (transferCompletedCallback_ != ((EventCallback_t)NULL))
        {
            (void)transferCompletedCallback_(RECEIVE_DMA_TRANSFER_DIRECT_MODE_ERROR, 0);
        }
    }
 
 
    // Transfer Complete Interrupt management
    if(__DMA_GET_FLAG(DMA1_Stream3, __DMA_GET_TC_FLAG_INDEX(DMA1_Stream3)) != RESET)
    {
        Stream3TxRxCpltReceived = TRUE;
        // Disable the transfer complete interrupt if the DMA mode is not CIRCULAR
        if((DMA1_Stream3->CR & DMA_SxCR_CIRC) == 0)
        {
            __DMA_DISABLE_IT(DMA1_Stream3, DMA_IT_TC);   // Disable the transfer complete interrupt
        }
        __DMA_CLEAR_FLAG(DMA1_Stream3, __DMA_GET_TC_FLAG_INDEX(DMA1_Stream3));  // Clear the transfer complete flag
 
        DEBUG_CODE( UART1_TransmitNullTerminatedString((uint8_t*)"\r\nSPI_DMATransmitReceiveCplt");   )
        DEBUG_CODE( UART1_WaitForTransmitToFinish();                                                  )
    }
    DEBUG_CODE( UART1_TransmitNullTerminatedString((uint8_t*)"\r\nDMA1_Stream3_IRQHandler_ end");          )
}
 
 
void DMA1_Stream4_IRQHandler_(void)
{
    DEBUG_CODE( UART1_TransmitNullTerminatedString((uint8_t*)"\r\nDMA1_Stream4_IRQHandler_ beginning");    )
    // Transfer Error Interrupt management
    if(__DMA_GET_FLAG(DMA1_Stream4, __DMA_GET_TE_FLAG_INDEX(DMA1_Stream4)) != RESET)
    {
        __DMA_DISABLE_IT(DMA1_Stream4, DMA_IT_TE);                             // Disable the transfer error interrupt
        __DMA_CLEAR_FLAG(DMA1_Stream4, __DMA_GET_TE_FLAG_INDEX(DMA1_Stream4)); // Clear the transfer error flag
 
        // Transfer error callback
        CRITICAL_ERROR( UART1_TransmitNullTerminatedString((uint8_t*)"\r\nDMA1_Stream4_IRQHandler Transfer Error");  )
        CRITICAL_ERROR( UART1_WaitForTransmitToFinish();                                                             )
        shutDownSpi2();
        if (transferCompletedCallback_ != ((EventCallback_t)NULL))
        {
            (void)transferCompletedCallback_(TRANSMIT_DMA_TRANSFER_ERROR, 0);
        }
    }
 
 
    // FIFO Error Interrupt management
    if(__DMA_GET_FLAG(DMA1_Stream4, __DMA_GET_FE_FLAG_INDEX(DMA1_Stream4)) != RESET)
    {
        __DMA_DISABLE_IT(DMA1_Stream4, DMA_IT_FE);                              // Disable the FIFO Error interrupt
        __DMA_CLEAR_FLAG(DMA1_Stream4, __DMA_GET_FE_FLAG_INDEX(DMA1_Stream4));  // Clear the FIFO error flag
 
        // Transfer error callback
        CRITICAL_ERROR( UART1_TransmitNullTerminatedString((uint8_t*)"\r\nDMA1_Stream4_IRQHandler FIFO Error");   )
        CRITICAL_ERROR( UART1_WaitForTransmitToFinish();                                                          )
        shutDownSpi2();
        if (transferCompletedCallback_ != ((EventCallback_t)NULL))
        {
            (void)transferCompletedCallback_(TRANSMIT_DMA_TRANSFER_FIFO_ERROR, 0);
        }
    }
 
 
    // Direct Mode Error Interrupt management
    if(__DMA_GET_FLAG(DMA1_Stream4, __DMA_GET_DME_FLAG_INDEX(DMA1_Stream4)) != RESET)
    {
        __DMA_DISABLE_IT(DMA1_Stream4, DMA_IT_DME);                              // Disable the direct mode Error interrupt
        __DMA_CLEAR_FLAG(DMA1_Stream4, __DMA_GET_DME_FLAG_INDEX(DMA1_Stream4));  // Clear the direct mode error flag
 
        // Transfer error callback
        CRITICAL_ERROR( UART1_TransmitNullTerminatedString((uint8_t*)"\r\nDMA1_Stream4_IRQHandler Direct Mode Error");  )
        CRITICAL_ERROR( UART1_WaitForTransmitToFinish();                                                                )
        shutDownSpi2();
        if (transferCompletedCallback_ != ((EventCallback_t)NULL))
        {
            (void)transferCompletedCallback_(TRANSMIT_DMA_TRANSFER_DIRECT_MODE_ERROR, 0);
        }
    }
 
 
    // Transfer Complete Interrupt management
    if(__DMA_GET_FLAG(DMA1_Stream4, __DMA_GET_TC_FLAG_INDEX(DMA1_Stream4)) != RESET)
    {
        // Disable the transfer complete interrupt if the DMA mode is not CIRCULAR
        if((DMA1_Stream4->CR & DMA_SxCR_CIRC) == 0)
        {
 
          __DMA_DISABLE_IT(DMA1_Stream4, DMA_IT_TC);   // Disable the transfer complete interrupt
        }
        __DMA_CLEAR_FLAG(DMA1_Stream4, __DMA_GET_TC_FLAG_INDEX(DMA1_Stream4));  // Clear the transfer complete flag
 
        // Transfer complete callback
        DEBUG_CODE( UART1_TransmitNullTerminatedString((uint8_t*)"\r\nSPI_DMATransmitReceiveCplt"); )
        DEBUG_CODE( UART1_WaitForTransmitToFinish();                                                )
    }
 
    DEBUG_CODE( UART1_TransmitNullTerminatedString((uint8_t*)"\r\nDMA1_Stream4_IRQHandler_ end");    )
}
 
 
void SPI2_IRQHandler_(void)
{
    uint16_t status;
    status = SPI2_OK;
    DEBUG_CODE( UART1_TransmitNullTerminatedString((uint8_t*)"\r\nSPI2_IRQHandler_ beginning");  )
    DEBUG_CODE( UART1_WaitForTransmitToFinish();                                                 )
    if (!Stream3TxRxCpltReceived)
    {
        status = TOO_FEW_BITS_RECEIVED_ERROR;
        CRITICAL_ERROR( UART1_TransmitNullTerminatedString((uint8_t*)"\r\nNo SPI_DMATransmitReceiveCplt ");   )
        CRITICAL_ERROR( UART1_TransmitNullTerminatedString((uint8_t*)"on stream 3, which means too few ");    )
        CRITICAL_ERROR( UART1_TransmitNullTerminatedString((uint8_t*)"bits received!!! ");                    )
    }
    if (SPI2->SR & SPI_SR_RXNE)
    {   // No need to clear interrupt flag
        DEBUG_CODE( UART1_TransmitNullTerminatedString((uint8_t*)"\r\nReceive buffer not empty flag is set");  )
    }
    if (SPI2->SR & SPI_SR_CRCERR)
    {
        status = SPI2_CRC_ERROR;
        CRITICAL_ERROR( UART1_TransmitNullTerminatedString((uint8_t*)"\r\nCRCERR flag is set");    )
        __SPI_CLEAR_CRCERRFLAG(SPI2);
    }
    if (SPI2->SR & SPI_SR_MODF)
    {
        status = SPI2_MODE_FAULT;
        CRITICAL_ERROR( UART1_TransmitNullTerminatedString((uint8_t*)"\r\nMode fault flag is set"); )
        __SPI_CLEAR_MODFFLAG(SPI2);
    }
    if (SPI2->SR & SPI_SR_OVR)
    {
        status = SPI2_OVERRUN_ERROR;
        CRITICAL_ERROR( UART1_TransmitNullTerminatedString((uint8_t*)"\r\nOverrun flag is set");    )
        __SPI_CLEAR_OVRFLAG(SPI2);
    }
    if (SPI2->SR & SPI_SR_FRE)
    {
        status = SPI2_TI_FRAME_FORMAT_ERROR;
        CRITICAL_ERROR( UART1_TransmitNullTerminatedString((uint8_t*)"\r\nTI frame format error flag is set"); )
        __SPI_CLEAR_FREFLAG(SPI2);
    }
    if (SPI2->SR & SPI_SR_TXE)
    {   // No need to clear interrupt flag
        DEBUG_CODE( UART1_TransmitNullTerminatedString((uint8_t*)"\r\nTransmit Buffer Empty flag is set"); )
        if (status == SPI2_OK)
        {
            DEBUG_CODE( uint16_t i;                                                                                                         )
            DEBUG_CODE( UART1_TransmitNullTerminatedString((uint8_t*) "\r\nSlave received the following in the receive DMA buffer: ");  )
            DEBUG_CODE( for (i = 0; i < (RECEIVE_SIZE - 1); i++)                                                                            )
            DEBUG_CODE( {                                                                                                                   )
            DEBUG_CODE(     UART1_TransmitNullTerminatedString((uint8_t*) "\r\n0x");                                                    )
            DEBUG_CODE(     UART1_TransmitHexNumberAsASCII((uint32_t)receiveDMAbuffer[i]);                                              )
            DEBUG_CODE( }                                                                                                                   )
            shutDownSpi2();
            if (transferCompletedCallback_ != ((EventCallback_t)NULL))
            {
                (void)transferCompletedCallback_(SPI2_OK, (uint32_t) ((((uint32_t)receiveDMAbuffer[0]) << 24) |
                                                                      (((uint32_t)receiveDMAbuffer[1]) << 16) |
                                                                      (((uint32_t)receiveDMAbuffer[2]) << 8) |
                                                                      (((uint32_t)receiveDMAbuffer[3]) << 0)));
            }
        }
    }
    if (status != SPI2_OK)
    {
        shutDownSpi2();
        if (transferCompletedCallback_ != ((EventCallback_t)NULL))
        {
            (void)transferCompletedCallback_(status, 0);
        }
    }
    DEBUG_CODE( UART1_TransmitNullTerminatedString((uint8_t*)"\r\nSPI2_IRQHandler_ end");        )
    DEBUG_CODE( UART1_WaitForTransmitToFinish();                                                 )
}
 
 
void DMA1_Stream4_IRQHandler(void)
{
    uint32_t sanityCheck;
    DEBUG_CODE( UART1_TransmitNullTerminatedString((uint8_t*)"\r\nDMA1_Stream4_IRQHandler beginning");  )
    DMA1_Stream4_IRQHandler_();
    while (SPI2->CR2 & SPI_SR_BSY) {}                                          // Wait for SPI module to finish
    sanityCheck = sanityCheckLimit;
    while (((DMA1->LISR & DMA_FLAG_TCIF3_7) == 0) && (0 < sanityCheck--)) {}   // Wait for transfer complete on receive stream
    DMA1_Stream3_IRQHandler_();
    SPI2_IRQHandler_();
    DEBUG_CODE( UART1_TransmitNullTerminatedString((uint8_t*)"\r\nDMA1_Stream4_IRQHandler end");        )
}

If your master needs to calculate the CRC manually (if it's not supported in hardware), here's how to do that:

static uint8_t CrcSoftwareFunc(uint8_t Initial_Crc, uint8_t Input_Data)
{
    uint8_t i;
    uint8_t Crc;
    const uint8_t POLYNOMIAL = 7;
 
    // Initial XOR operation with the previous CRC value
    Crc = Initial_Crc ^ Input_Data;
 
    // The CRC algorithm routine
    for (i = 0; i < 8; i++)
    {
        if (Crc & 0x80)
        {
            Crc = (Crc << 1) ^ POLYNOMIAL;
        }
        else
        {
            Crc <<= 1;
        }
    }
    return Crc;
}
 
 
uint8_t calculateCRC(uint8_t firstByte, uint8_t secondByte, uint8_t thirdByte, uint8_t forthByte)
{
    uint8_t Crc;
    Crc = CrcSoftwareFunc(0, firstByte);
    Crc = CrcSoftwareFunc(Crc, secondByte);
    Crc = CrcSoftwareFunc(Crc, thirdByte);
    Crc = CrcSoftwareFunc(Crc, forthByte);
    return Crc;
}