Hello Everyone,
I am running a communication between two STM32F4 discovery boards using SPI. I am using NSS whereby a 4th pin is used to control the communication. The NSS line is always low during the communication. The Master is sending 11bytes and the slave 11bytes at the same time due to full-duplex mode. However, the Master receives all the bytes from slave except the first byte.

I have checked on scope, the NSS line is low during the communication. I have tried different CPOL and CPHA settings. However , the Master is on no condition receiving the first byte. It is always 0.

Master uses SPI3 and slave SPI2.

The following is the code for Master:

#include "stm32f4xx.h"
#include "stm32f4xx_spi.h"
#include "stm32f4xx_rcc.h"
#include "stm32f4xx_gpio.h"

/**********************************************************************************/
/* defines*/
/**********************************************************************************/

#define RX_BUFFER_LENGTH    11
#define TX_BUFFER_LENGTH    11
#define UTD_SPI_LEN         128

/**********************************************************************************/
/* variables*/
/**********************************************************************************/

__IO uint8_t aRxBuffer [RX_BUFFER_LENGTH];
__IO uint8_t aTxBuffer [TX_BUFFER_LENGTH];

__IO uint8_t ubRxIndex = 0;
__IO uint8_t ubTxIndex = 0;

uint8_t utd_readbuf [UTD_SPI_LEN];
uint8_t utd_writebuf[UTD_SPI_LEN];

RCC_ClocksTypeDef RCC_Clocks;
uint8_t sysclk_src;

/**********************************************************************************/
/* prototypes*/
/**********************************************************************************/

void mySPI_Init(void);
uint8_t mySPI_SendData(uint8_t);

/**********************************************************************************/
/* functions*/
/**********************************************************************************/

int main(void)
{
    RCC_HSEConfig(RCC_HSE_ON);
    while(!RCC_WaitForHSEStartUp())
    {}

    RCC_GetClocksFreq(&RCC_Clocks);
    sysclk_src=RCC_GetSYSCLKSource();
    RCC->AHB1ENR |= 0x00000001; // Enable clock for GPIOA for reading the User switch
    GPIOA->MODER |= 0x00000000; // Input pin for reading User Button PA0

    mySPI_Init();

    aTxBuffer[0] = 0xA0;
    aTxBuffer[1] = 0xA1;
    aTxBuffer[2] = 0xA2;
    aTxBuffer[3] = 0xA3;
    aTxBuffer[4] = 0xA4;
    aTxBuffer[5] = 0xA5;
    aTxBuffer[6] = 0xA6;
    aTxBuffer[7] = 0xA7;
    aTxBuffer[8] = 0xA8;
    aTxBuffer[9] = 0xA9;
    aTxBuffer[10] = 0xAA;

    while(1){

        if ((GPIOA->IDR & 0x01)) {

            ubTxIndex = 0;                                  // reset to zero before send data again

            SPI_I2S_ITConfig(SPI3, SPI_I2S_IT_TXE, ENABLE);
            SPI_I2S_ITConfig(SPI3, SPI_I2S_IT_RXNE, ENABLE);
            SPI_Cmd(SPI3, ENABLE);

            int i;
            for ( i =0; i < 50000; i++) {
                asm("NOP");
             }
        }
    }
}

void mySPI_Init(void){

    RCC_AHB1PeriphClockCmd(RCC_AHB1Periph_GPIOA |    
 RCC_AHB1Periph_GPIOB , ENABLE);
    GPIO_InitTypeDef GPIO_InitTypeDefStruct;

    /* configure pins used by SPI3 * PA15 = NSS * PB3 = SCK * PB4 = MISO * PB5 = MOSI */
    GPIO_InitTypeDefStruct.GPIO_Pin = GPIO_Pin_3 |GPIO_Pin_4 | GPIO_Pin_5;
    GPIO_InitTypeDefStruct.GPIO_Mode = GPIO_Mode_AF;
    GPIO_InitTypeDefStruct.GPIO_Speed = GPIO_Speed_50MHz;
    GPIO_InitTypeDefStruct.GPIO_OType = GPIO_OType_PP;
    GPIO_InitTypeDefStruct.GPIO_PuPd =  GPIO_PuPd_UP;
    GPIO_Init(GPIOB, &GPIO_InitTypeDefStruct);

    GPIO_InitTypeDefStruct.GPIO_Pin = GPIO_Pin_15;
    GPIO_InitTypeDefStruct.GPIO_Mode = GPIO_Mode_AF;
    GPIO_InitTypeDefStruct.GPIO_Speed = GPIO_Speed_50MHz;
    GPIO_InitTypeDefStruct.GPIO_OType = GPIO_OType_PP;
    GPIO_InitTypeDefStruct.GPIO_PuPd =  GPIO_PuPd_UP;
    GPIO_Init(GPIOA, &GPIO_InitTypeDefStruct);

    GPIO_PinAFConfig(GPIOB, GPIO_PinSource3, GPIO_AF_SPI3);
    GPIO_PinAFConfig(GPIOB, GPIO_PinSource4, GPIO_AF_SPI3);
    GPIO_PinAFConfig(GPIOB, GPIO_PinSource5, GPIO_AF_SPI3);
    GPIO_PinAFConfig(GPIOA, GPIO_PinSource15, GPIO_AF_SPI3);

    RCC_APB1PeriphClockCmd(RCC_APB1Periph_SPI3, ENABLE);
    SPI_InitTypeDef SPI_InitTypeDefStruct;
    SPI_InitTypeDefStruct.SPI_BaudRatePrescaler = SPI_BaudRatePrescaler_256;
    SPI_InitTypeDefStruct.SPI_Direction = SPI_Direction_2Lines_FullDuplex; //SPI_Direction_2Lines_FullDuplex;
    SPI_InitTypeDefStruct.SPI_Mode = SPI_Mode_Master;
    SPI_InitTypeDefStruct.SPI_DataSize = SPI_DataSize_8b;
    SPI_InitTypeDefStruct.SPI_NSS = SPI_NSS_Hard;                  // Chip Select Software
    SPI_InitTypeDefStruct.SPI_FirstBit = SPI_FirstBit_MSB;
    SPI_InitTypeDefStruct.SPI_CPOL = SPI_CPOL_High;                // Clock Pol
    SPI_InitTypeDefStruct.SPI_CPHA = SPI_CPHA_1Edge;
    SPI_Init(SPI3, &SPI_InitTypeDefStruct);

    SPI_SSOutputCmd(SPI3, ENABLE);

    NVIC_EnableIRQ (SPI3_IRQn);                                    // enable interrupt on SPI1

}

void SPI3_IRQHandler(void)
{

    if (SPI_I2S_GetITStatus(SPI3, SPI_I2S_IT_TXE) == SET)
    {
       if(ubTxIndex < TX_BUFFER_LENGTH)
       {
                     SPI3->DR = aTxBuffer[ubTxIndex];
                     ubTxIndex++;
      }
      else
       {
                SPI_I2S_ITConfig(SPI3, SPI_I2S_IT_TXE, DISABLE);

      }
    }

    if ((SPI_I2S_GetITStatus(SPI3, SPI_I2S_IT_RXNE) == SET))
        {
            if(ubRxIndex < RX_BUFFER_LENGTH)
            {
                       aRxBuffer[ubRxIndex] = SPI3->DR ;
                       ubRxIndex++;
            }
            else
            {
                SPI_I2S_ITConfig(SPI3, SPI_I2S_IT_RXNE, DISABLE);

            }
        }

}

Here is the code for slave :

#include "stm32f4xx.h"
#include "stm32f4xx_spi.h"
#include "stm32f4xx_rcc.h"
#include "stm32f4xx_gpio.h"

/**********************************************************************************/
/* defines */
/**********************************************************************************/
#define RX_BUFFER_LENGTH    11
#define TX_BUFFER_LENGTH    11

/**********************************************************************************/
/* variables */
/**********************************************************************************/
__IO uint8_t aRxBuffer [RX_BUFFER_LENGTH];
__IO uint8_t aTxBuffer [TX_BUFFER_LENGTH];

__IO uint8_t ubRxIndex=0;
__IO uint8_t ubTxIndex=0;

uint8_t Buffer_voll=0;

RCC_ClocksTypeDef RCC_Clocks;
uint8_t sysclk_src;

void mySPI_Init(void);

int main(void)
{
    RCC_HSEConfig(RCC_HSE_ON);
    while(!RCC_WaitForHSEStartUp())
    {}

    RCC_GetClocksFreq(&RCC_Clocks);
    sysclk_src=RCC_GetSYSCLKSource();

    mySPI_Init();

    aTxBuffer[0] = 0xBB;
    aTxBuffer[1] = 0xB0;
    aTxBuffer[2] = 0xB1;
    aTxBuffer[3] = 0xB2;
    aTxBuffer[4] = 0xB3;
    aTxBuffer[5] = 0xB4;
    aTxBuffer[6] = 0xB5;
    aTxBuffer[7] = 0xB6;
    aTxBuffer[8] = 0xB7;
    aTxBuffer[9] = 0xB8;
    aTxBuffer[10] = 0xB9;

    while(1)
    {  }

}

void mySPI_Init(void){

    RCC_AHB1PeriphClockCmd(RCC_AHB1Periph_GPIOB , ENABLE);
    GPIO_InitTypeDef GPIO_InitTypeDefStruct;

    /* configure pins used by SPI2 * PB12 = NSS * PB13 = SCK * PB14 = MISO * PB15 = MOSI */
    GPIO_InitTypeDefStruct.GPIO_Pin = GPIO_Pin_12 | GPIO_Pin_13 | GPIO_Pin_14 | GPIO_Pin_15;
    GPIO_InitTypeDefStruct.GPIO_Mode = GPIO_Mode_AF;
    GPIO_InitTypeDefStruct.GPIO_Speed = GPIO_Speed_50MHz;
    GPIO_InitTypeDefStruct.GPIO_OType = GPIO_OType_PP;
    GPIO_InitTypeDefStruct.GPIO_PuPd = GPIO_PuPd_UP;
    GPIO_Init(GPIOB, &GPIO_InitTypeDefStruct);

    GPIO_PinAFConfig(GPIOB, GPIO_PinSource12, GPIO_AF_SPI2);
    GPIO_PinAFConfig(GPIOB, GPIO_PinSource13, GPIO_AF_SPI2);
    GPIO_PinAFConfig(GPIOB, GPIO_PinSource14, GPIO_AF_SPI2);
    GPIO_PinAFConfig(GPIOB, GPIO_PinSource15, GPIO_AF_SPI2);

    RCC_APB1PeriphClockCmd(RCC_APB1Periph_SPI2, ENABLE);
    SPI_InitTypeDef SPI_InitTypeDefStruct;

    SPI_InitTypeDefStruct.SPI_BaudRatePrescaler = SPI_BaudRatePrescaler_256;
    SPI_InitTypeDefStruct.SPI_Direction = SPI_Direction_2Lines_FullDuplex;
    SPI_InitTypeDefStruct.SPI_Mode = SPI_Mode_Slave;
    SPI_InitTypeDefStruct.SPI_DataSize = SPI_DataSize_8b;
    SPI_InitTypeDefStruct.SPI_NSS = SPI_NSS_Hard ;
    SPI_InitTypeDefStruct.SPI_FirstBit = SPI_FirstBit_MSB;
    SPI_InitTypeDefStruct.SPI_CPOL = SPI_CPOL_High;
    SPI_InitTypeDefStruct.SPI_CPHA = SPI_CPHA_1Edge;
    SPI_Init(SPI2, &SPI_InitTypeDefStruct);

    NVIC_EnableIRQ (SPI2_IRQn);                                // enable interrupt on SPI2
    SPI_I2S_ITConfig(SPI2, SPI_I2S_IT_TXE, ENABLE);
    SPI_I2S_ITConfig(SPI2, SPI_I2S_IT_RXNE, ENABLE);
    SPI_Cmd(SPI2, ENABLE);

}

void SPI2_IRQHandler(void)
{

   if (SPI_I2S_GetITStatus(SPI2, SPI_I2S_IT_TXE) == SET  )
    {
           if(ubTxIndex < TX_BUFFER_LENGTH)
           {
                        SPI2->DR = aTxBuffer[ubTxIndex];
                        ubTxIndex++;
          }
          else
           {

                    SPI_I2S_ITConfig(SPI2, SPI_I2S_IT_TXE, DISABLE);
          }
    }
    if ((SPI_I2S_GetITStatus(SPI2, SPI_I2S_IT_RXNE) == SET ))
    {
            if(ubRxIndex < RX_BUFFER_LENGTH)
            {
                       aRxBuffer[ubRxIndex] = SPI2->DR ;
                       ubRxIndex++;
            }
            else
            {
                SPI_I2S_ITConfig(SPI2, SPI_I2S_IT_RXNE, DISABLE);
            }
    }
}

So , the master misses the first byte 0xBB. I do not understand , where the problem could be. As seen , both master and slave are initialized with the same clock speed. If anyone can throw light on the problem , it would be great help. Thank you.