2020-08-07 03:00 AM
Hello Everyone,
I try to extract MFccColumn from my microphones plug on stm32h7. But when I put my buffer output audio on my function for extracting the mfcc I always get nan or -nan on pOutColBuffer[NUM_MELS].
Does someone have idea? Thanks,
Here the code:
#include "feature_extraction.h"
#include "AWEPlatform.h"
#define SAMPLE_RATE 16000U /* Input signal sampling rate */
#define FFT_LEN 192U /* Number of FFT points. Must be greater or equal to FRAME_LEN */
#define NUM_FRAMES 16U /* Number of columns in spectrogram */
#define FRAME_LEN FFT_LEN /* Window length and then padded with zeros to match FFT_LEN. */
#define HOP_LEN 92U /* Number of overlapping samples between successive frames. */
#define NUM_MELS 16U /* Number of mel bands */
float32_t pInBuffer[FRAME_LEN]; /* 8.0 KB */
float32_t pOutColBuffer[NUM_MELS]; /* 0.5 KB */
float32_t pOutMelSpectrogram[NUM_MELS * NUM_FRAMES]; /* 7.0 KB */
float32_t pSpectrScratchBuffer[FFT_LEN]; /* 8.0 KB */
float32_t pWindowFuncBuffer[FFT_LEN]; /* 8.0 KB */
uint32_t pMelFilterStartIndices[NUM_MELS]; /* 0.5 KB */
uint32_t pMelFilterStopIndices[NUM_MELS]; /* 0.5 KB */
float32_t pMelFilterCoefs[2020]; /* 7.9 KB */ /* Size given by S_MelFilter.CoefficientsLength */
/* Allocate buffers and structures */
arm_rfft_fast_instance_f32 S_Rfft; /* 24 B */
MelFilterTypeDef S_MelFilter; /* 48 B */
SpectrogramTypeDef S_Spectr; /* 28 B */
MelSpectrogramTypeDef S_MelSpectr; /* 8 B */
LogMelSpectrogramTypeDef S_LogMelSpectr; /* 16 B */
DCT_InstanceTypeDef S_Dctinstance;
MfccTypeDef S_Mfcc;
/*
* Python equivalent:
* librosa.feature.melspectrogram(y=y, sr=16000, n_mels=128, hop_length=1024, center=False)
*/
void Preprocessing_Init(void)
{
/* Init window function */
if (Window_Init(pWindowFuncBuffer, FRAME_LEN, WINDOW_HANN) != 0)
{
printf("Init error\n");
exit(1);
}
/* Init RFFT */
arm_rfft_fast_init_f32(&S_Rfft, FFT_LEN);
/* Init Mel filter */
S_MelFilter.pStartIndices = (uint32_t *)pMelFilterStartIndices;
S_MelFilter.pStopIndices = (uint32_t *)pMelFilterStopIndices;
S_MelFilter.pCoefficients = (uint32_t *)pMelFilterCoefs;
S_MelFilter.NumMels = NUM_MELS;
S_MelFilter.FFTLen = FFT_LEN;
S_MelFilter.SampRate = SAMPLE_RATE;
S_MelFilter.FMin = 0.0;
S_MelFilter.FMax = S_MelFilter.SampRate / 2.0;
S_MelFilter.Formula = MEL_SLANEY;
S_MelFilter.Normalize = 1;
S_MelFilter.Mel2F = 1;
MelFilterbank_Init(&S_MelFilter);
/* Init Spectrogram */
S_Spectr.pRfft = &S_Rfft;
S_Spectr.Type = SPECTRUM_TYPE_POWER;
S_Spectr.pWindow = (float32_t *)pWindowFuncBuffer;
S_Spectr.SampRate = SAMPLE_RATE;
S_Spectr.FrameLen = FRAME_LEN;
S_Spectr.FFTLen = FFT_LEN;
S_Spectr.pScratch = pSpectrScratchBuffer;
/* Init MelSpectrogram */
S_MelSpectr.SpectrogramConf = &S_Spectr;
S_MelSpectr.MelFilter = &S_MelFilter;
/* LogMelSpectr */
S_LogMelSpectr.LogFormula = LOGMELSPECTROGRAM_SCALE_DB;
S_LogMelSpectr.MelSpectrogramConf = &S_MelSpectr;
S_LogMelSpectr.Ref = 1.0;
S_LogMelSpectr.TopdB = -80;
/* DCT instacne */
float32_t pDCTCoefsBuffer[16 * 16];
//float32_t pOutBuffer[13];
S_Dctinstance.NumFilters = 16;
S_Dctinstance.NumInputs = 16;
S_Dctinstance.Type = DCT_TYPE_III;
S_Dctinstance.RemoveDCTZero = 1;
S_Dctinstance.pDCTCoefs = pDCTCoefsBuffer;
DCT_Init(&S_Dctinstance);
/* Init Mfcc */
S_Mfcc.LogMelConf = &S_LogMelSpectr;
S_Mfcc.pDCT = &S_Dctinstance;
S_Mfcc.NumMfccCoefs = NUM_FRAMES;
S_Mfcc.pScratch = pSpectrScratchBuffer;
}
void AudioPreprocessing_Run(int16_t *pInSignal)
{
buf_to_float(pInSignal,pInBuffer,FRAME_LEN);
MfccColumn(&S_Mfcc, pInBuffer, pOutColBuffer);
/* Reshape col into pOutMelSpectrogram */
//for (uint32_t i = 0; i < NUM_MELS; i++)
//{
// pOutMelSpectrogram[i * NUM_FRAMES + frame_index] = pOutColBuffer[i];
//}
}
My code to get Microphones. Its works well:
void SetupAudio(void)
{
memset(AudioBufferIn, 0, sizeof(AudioBufferIn));
memset(AudioBufferOut, 0, sizeof(AudioBufferOut));
BSP_DFSDM_IN_Init(INPUT_DEVICE_DIGITAL_MIC1, BSP_AUDIO_FREQUENCY_48K, MIC_SAMPLE_SIZE_IN_BITS, NUMBER_MICS);
BSP_AUDIO_IN_OUT_Init(INPUT_DEVICE_INPUT_LINE_1, OUTPUT_DEVICE_HEADPHONE, BSP_AUDIO_FREQUENCY_48K);
} // End SetupAudio
// Audio Init
void AudioInit(void)
{
// some code ...
HAL_NVIC_SetPriority(AUDIO_IN_SAIx_DMAx_IRQ, 3, 0);
HAL_NVIC_EnableIRQ(AUDIO_IN_SAIx_DMAx_IRQ);
BSP_DFSDM_IN_Record(DFSDM_scratchBufferList, MIC_SINGLE_CHANNEL_BUFFER_SIZE);
BSP_AUDIO_IN_Record( (UINT16 *)AudioBufferIn, INPUT_AUDIO_BUFFER_SIZE);
BSP_AUDIO_OUT_Play( (UINT16 *)AudioBufferOut, OUTPUT_AUDIO_BUFFER_SIZE);
//--- MFCC Init -----
Preprocessing_Init();
} // End awe_pltInit
//-----------------------------------------------------------------------------
// METHOD: AUDIO_IN_DMA_Callback
// PURPOSE: Interrupt handler - called each time a frame of audio received
//-----------------------------------------------------------------------------
void AUDIO_IN_DMA_Callback(void)
{
// some codes ...
//--- MFCC Compute -----
AudioPreprocessing_Run(&AudioBufferOut[0]);
// some code ...
} // End AUDIO_IN_DMA_Callback