2025-01-18 04:58 PM
I'm trying to make a snoring detector embedded on stm32wb55 dk board but everytime I encounter an inference unsuccessful :\ to be precise, the returned error looks like that input size or dimension is wrong.
this is the code that I am running
#include "microphone.h"
#include "dbg_trace.h"
#include "arm_math.h"
#include "arm_const_structs.h"
#include "ai_platform.h"
#include "snoring_dataset.h"
#include "snoring_dataset_data.h"
//#define AUDIO_OUT_MS (20)
//#define AUDIO_OUT_SAMPLING_FREQUENCY 16000
#define FFT_SIZE 512
#define FFT_BINS 256
#define TARGET_BINS 40 // Final bins after averaging
#define FRAME_SIZE 960 // 60 ms at 16 kHz
#define STRIDE_SIZE 480 // 30 ms overlap
#define SEQUENCE_FRAMES 16 // Number of frames for the new model
#define CLASSIFIER_INPUT_SIZE (SEQUENCE_FRAMES * TARGET_BINS) // 640 floats
#define BUFFER_SIZE 32 // Circular buffer size for frames
#define REDUCE_FACTOR 6 // Average 6 bins together
#define CIRCULAR_BUFFER_SIZE SEQUENCE_FRAMES // Circular buffer holds 16 frames
uint16_t PDM_Buffer[((((2 * AUDIO_IN_CHANNELS * AUDIO_IN_SAMPLING_FREQUENCY)
/ 1000) * MAX_DECIMATION_FACTOR) / 16) * N_MS_PER_INTERRUPT];
uint16_t PCM_Buffer[((AUDIO_IN_SAMPLING_FREQUENCY) / 1000) * N_MS_PER_INTERRUPT];
BSP_AUDIO_Init_t MicParams;
static float32_t pcm_buffer_prev[FRAME_SIZE - STRIDE_SIZE] = { 0 }; // Previous overlap data
static float32_t frame[FRAME_SIZE] = { 0 }; // 30 ms frame
static float32_t fft_output[FFT_BINS]; // FFT output
// Circular buffer to store spectrogram frames
static volatile float32_t circularBuffer[CIRCULAR_BUFFER_SIZE][TARGET_BINS] = {
0 };
static uint16_t bufferIndex = 0; // Index in the circular buffer
static uint16_t framesFilled = 0; // Number of frames currently filled in the buffer
static ai_handle snoring_model = AI_HANDLE_NULL;
AI_ALIGNED(4) static ai_u8 activations[AI_SNORING_DATASET_DATA_ACTIVATIONS_SIZE];
AI_ALIGNED(4) static ai_i8 in_data[AI_SNORING_DATASET_IN_1_SIZE_BYTES];
AI_ALIGNED(4) static ai_i8 out_data[AI_SNORING_DATASET_OUT_1_SIZE_BYTES];
AI_ALIGNED(4) static ai_buffer ai_input[AI_SNORING_DATASET_IN_NUM] = { 0 };
AI_ALIGNED(4) static ai_buffer ai_output[AI_SNORING_DATASET_OUT_NUM] = { 0 };
void classifyAudio();
static void Init_Acquisition_Peripherals(uint32_t AudioFreq, uint32_t ChnlNbrIn,
uint32_t ChnlNbrOut);
static void Start_Acquisition(void);
static void AudioProcess(void);
void InitAudio(void) {
Init_Acquisition_Peripherals(AUDIO_IN_SAMPLING_FREQUENCY, AUDIO_IN_CHANNELS,
0);
Start_Acquisition();
ai_network_params ai_params =
AI_NETWORK_PARAMS_INIT(
AI_SNORING_DATASET_DATA_WEIGHTS(ai_snoring_dataset_data_weights_get()),
AI_SNORING_DATASET_DATA_ACTIVATIONS(activations)
);
ai_error ai_err = ai_snoring_dataset_create(&snoring_model,
AI_SNORING_DATASET_DATA_CONFIG);
if (ai_err.type != AI_ERROR_NONE) {
APP_DBG_MSG("Error: Model creation failed. Type=%d, Code=%d\r\n",
ai_err.type, ai_err.code);
return;
}
if (!ai_snoring_dataset_init(snoring_model, &ai_params)) {
APP_DBG_MSG("Error: Model initialization failed.\r\n");
return;
}
ai_input[0] = (ai_buffer ) { .format = AI_BUFFER_FORMAT_FLOAT,
.data =
AI_HANDLE_PTR(in_data), .meta_info = NULL, .flags =
AI_FLAG_NONE, .size = CLASSIFIER_INPUT_SIZE, .shape =
AI_BUFFER_SHAPE_INIT( // Shape: Batch, Width, Height, Channels
AI_SHAPE_BCWH,
4,// 4 dimensions in total
1,// Batch size
640,// Flattened tensor size
1,// Height placeholder
1// Width placeholder
), };
ai_output[0] = (ai_buffer ) { .format = AI_BUFFER_FORMAT_FLOAT, .data =
AI_HANDLE_PTR(out_data), .meta_info = NULL, .flags =
AI_FLAG_NONE, .size = AI_SNORING_DATASET_OUT_1_SIZE_BYTES
/ sizeof(float), .shape = AI_BUFFER_SHAPE_INIT( // Shape: Batch, Width, Height, Channels
AI_SHAPE_BCWH,
4,// 4 dimensions in total
1,// Batch size
2,// Output classes
1,// Height placeholder
1// Width placeholder
), };
APP_DBG_MSG("==>> INFO: Neural Network model initialized successfully\r\n");
APP_DBG_MSG("Input Tensor Configuration:\r\n");
APP_DBG_MSG(" Format: 0x%X\r\n", ai_input[0].format);
APP_DBG_MSG(" Shape: %u x %u x %u x %u\r\n", ai_input[0].shape.data[0],
ai_input[0].shape.data[1], ai_input[0].shape.data[2],
ai_input[0].shape.data[3]);
APP_DBG_MSG(" Size: %u elements\r\n", ai_input[0].size);
APP_DBG_MSG("Output Tensor Configuration:\r\n");
APP_DBG_MSG(" Format: 0x%X\r\n", ai_output[0].format);
APP_DBG_MSG(" Shape: %u x %u x %u x %u\r\n", ai_output[0].shape.data[0],
ai_output[0].shape.data[1], ai_output[0].shape.data[2],
ai_output[0].shape.data[3]);
APP_DBG_MSG(" Size: %u elements\r\n", ai_output[0].size);
ai_network_report report;
if (ai_snoring_dataset_get_info(snoring_model, &report)) {
// General model information
APP_DBG_MSG("Model Name: %s\r\n", report.model_name);
APP_DBG_MSG("Model Signature: %s\r\n", report.model_signature);
APP_DBG_MSG("Model Date/Time: %s\r\n", report.model_datetime);
APP_DBG_MSG("Compile Date/Time: %s\r\n", report.compile_datetime);
// Runtime and tool versions
APP_DBG_MSG("Runtime Version: %u.%u.%u\r\n",
report.runtime_version.major, report.runtime_version.minor,
report.runtime_version.micro);
APP_DBG_MSG("Tool Version: %u.%u.%u\r\n", report.tool_version.major,
report.tool_version.minor, report.tool_version.micro);
APP_DBG_MSG("Tool API Version: %u.%u.%u\r\n",
report.tool_api_version.major, report.tool_api_version.minor,
report.tool_api_version.micro);
// MACC (Multiply-Accumulate Operations) information
APP_DBG_MSG("Number of MACCs: %" PRIu64 "\r\n", report.n_macc);
// Inputs
APP_DBG_MSG("Number of Inputs: %u\r\n", report.n_inputs);
for (ai_u16 i = 0; i < report.n_inputs; i++) {
APP_DBG_MSG(" Input %u Shape: ", i);
for (ai_u32 dim = 0; dim < report.inputs[i].shape.size; dim++) {
APP_DBG_MSG("%u ", report.inputs[i].shape.data[dim]);
}
APP_DBG_MSG("\r\n");
APP_DBG_MSG(" Input Format: 0x%X\r\n", report.inputs[i].format);
APP_DBG_MSG(" Input Size: %u elements\r\n", report.inputs[i].size);
}
// Outputs
APP_DBG_MSG("Number of Outputs: %u\r\n", report.n_outputs);
for (ai_u16 i = 0; i < report.n_outputs; i++) {
APP_DBG_MSG(" Output %u Shape: ", i);
for (ai_u32 dim = 0; dim < report.outputs[i].shape.size; dim++) {
APP_DBG_MSG("%u ", report.outputs[i].shape.data[dim]);
}
APP_DBG_MSG("\r\n");
APP_DBG_MSG(" Output Format: 0x%X\r\n", report.outputs[i].format);
APP_DBG_MSG(" Output Size: %u elements\r\n",
report.outputs[i].size);
}
// Nodes and signature
APP_DBG_MSG("Number of Nodes: %u\r\n", report.n_nodes);
APP_DBG_MSG("Network Signature: 0x%08X\r\n", report.signature);
} else {
APP_DBG_MSG("Error querying model info.\r\n");
}
}
/**
* @brief Process PCM data, update circular buffer, and trigger classification.
* @PAram None
* @retval None
*/
static void AudioProcess(void) {
// Step 1: Convert PDM to PCM
BSP_AUDIO_IN_PDMToPCM(BSP_AUDIO_INSTANCE, PDM_Buffer, PCM_Buffer);
// Step 2: Prepare 60 ms frame with overlap
memcpy(frame, pcm_buffer_prev,
sizeof(float32_t) * (FRAME_SIZE - STRIDE_SIZE)); // Copy previous overlap
for (uint32_t i = 0; i < STRIDE_SIZE; i++) {
frame[FRAME_SIZE - STRIDE_SIZE + i] = (float32_t) PCM_Buffer[i]
/ 32768.0f; // Normalize PCM
}
memcpy(pcm_buffer_prev, &frame[STRIDE_SIZE],
sizeof(float32_t) * (FRAME_SIZE - STRIDE_SIZE)); // Update overlap
// Step 3: Apply FFT
arm_rfft_fast_instance_f32 fft_instance;
arm_rfft_fast_init_f32(&fft_instance, FFT_BINS);
arm_rfft_fast_f32(&fft_instance, frame, fft_output, 0);
// Step 4: Compute FFT magnitudes
for (uint32_t i = 0; i < FFT_BINS / 2; i++) {
fft_output[i] = sqrtf(
fft_output[i * 2] * fft_output[i * 2]
+ fft_output[i * 2 + 1] * fft_output[i * 2 + 1]);
}
// Step 5: Reduce frequency bins by averaging
float32_t spectrogram[TARGET_BINS];
for (uint32_t i = 0; i < TARGET_BINS; i++) {
float32_t bin_sum = 0.0f;
for (uint32_t j = 0; j < FFT_SIZE / (2 * TARGET_BINS); j++) {
bin_sum += fft_output[i * REDUCE_FACTOR + j];
}
spectrogram[i] = bin_sum / REDUCE_FACTOR;
}
// Step 6: Add spectrogram to circular buffer
memcpy(circularBuffer[bufferIndex], spectrogram,
sizeof(float32_t) * TARGET_BINS);
bufferIndex = (bufferIndex + 1) % CIRCULAR_BUFFER_SIZE;
framesFilled =
(framesFilled < CIRCULAR_BUFFER_SIZE) ?
framesFilled + 1 : CIRCULAR_BUFFER_SIZE;
// Run classification once we have at least 16 frames
if (framesFilled >= CIRCULAR_BUFFER_SIZE) {
classifyAudio();
}
}
/**
* @brief Run AI inference.
*/
void classifyAudio() {
ai_i32 nbatch;
// Flatten circular buffer in correct order
uint16_t readIndex = bufferIndex; // Start from the oldest frame
for (uint32_t frame = 0; frame < SEQUENCE_FRAMES; frame++) {
for (uint32_t bin = 0; bin < TARGET_BINS; bin++) {
((float*) in_data)[frame * TARGET_BINS + bin] =
circularBuffer[readIndex][bin];
}
readIndex = (readIndex + 1) % CIRCULAR_BUFFER_SIZE; // Move to the next frame
}
// Debug flattened input data
APP_DBG_MSG("-> ");
for (uint32_t i = 0; i < 40; i++) {
APP_DBG_MSG("%.1f ", ((float*) in_data)[i]);
}
APP_DBG_MSG("\r\n");
// Assign input buffer to AI
ai_input[0].data = AI_HANDLE_PTR(in_data);
// Run inference
nbatch = ai_snoring_dataset_run(snoring_model, ai_input, ai_output);
if (nbatch != 1) {
ai_error err = ai_snoring_dataset_get_error(snoring_model);
APP_DBG_MSG("Inference failed: type=%d, code=%d\r\n", err.type,
err.code);
return;
}
// Process results
float class_prob_0 = ((float*) out_data)[0];
float class_prob_1 = ((float*) out_data)[1];
APP_DBG_MSG("==> Classification Results:\r\n");
APP_DBG_MSG("Class 0 Probability: %.3f\r\n", class_prob_0);
APP_DBG_MSG("Class 1 Probability: %.3f\r\n", class_prob_1);
if (class_prob_0 > class_prob_1) {
APP_DBG_MSG("Predicted Class: 0 (Non-snoring)\r\n");
} else {
APP_DBG_MSG("Predicted Class: 1 (Snoring)\r\n");
}
}
/**
* @brief User function that is called when 1 ms of PDM data is available.
* In this application only PDM to PCM conversion and USB streaming
* is performed.
* User can add his own code here to perform some DSP or audio analysis.
* @PAram none
* @retval None
*/
static void Init_Acquisition_Peripherals(uint32_t AudioFreq, uint32_t ChnlNbrIn,
uint32_t ChnlNbrOut) {
MicParams.BitsPerSample = 16;
MicParams.ChannelsNbr = ChnlNbrIn;
MicParams.Device = AUDIO_IN_DIGITAL_MIC;
MicParams.SampleRate = AudioFreq;
MicParams.Volume = AUDIO_VOLUME_INPUT;
if (BSP_AUDIO_IN_Init(BSP_AUDIO_INSTANCE, &MicParams) != BSP_ERROR_NONE) {
Error_Handler();
}
}
/**
* @brief User function that is called when 1 ms of PDM data is available.
* In this application only PDM to PCM conversion and USB streaming
* is performed.
* User can add his own code here to perform some DSP or audio analysis.
* @PAram none
* @retval None
*/
static void Start_Acquisition(void) {
if (BSP_AUDIO_IN_Record(BSP_AUDIO_INSTANCE, (uint8_t*) PDM_Buffer,
AUDIO_IN_BUFFER_SIZE) != BSP_ERROR_NONE) {
Error_Handler();
}
}
/**
* @brief Half Transfer user callback, called by BSP functions.
* @PAram None
* @retval None
*/
void BSP_AUDIO_IN_HalfTransfer_CallBack(uint32_t Instance) {
AudioProcess();
}
/**
* @brief Transfer Complete user callback, called by BSP functions.
* @PAram None
* @retval None
*/
void BSP_AUDIO_IN_TransferComplete_CallBack(uint32_t Instance) {
AudioProcess();
}
running the above code I received such output
==>> INFO: Neural Network model initialized successfully
Input Tensor Configuration:
Format: 0x9821040
Shape: 1 x 640 x 1 x 1
Size: 640 elements
Output Tensor Configuration:
Format: 0x1821040
Shape: 1 x 2 x 1 x 1
Size: 2 elements
Model Name: snoring_dataset
Model Signature: 0x473bce4868635c036103868a9df2ef26
Model Date/Time: 2025-01-19T00:53:28+0100
Compile Date/Time: Jan 19 2025 00:56:02
Runtime Version: 9.1.0
Tool Version: 1.0.0
Tool API Version: 1.5.0
Number of MACCs: lu
Number of Inputs: 1
Input 0 Shape: 1 640 1 1
Input Format: 0x9821040
Input Size: 640 elements
Number of Outputs: 1
Output 0 Shape: 1 2 1 1
Output Format: 0x9821040
Output Size: 2 elements
Number of Nodes: 4
Network Signature: 0x3E65B9D4
-> 103.7 9.8 9.7 10.5 2.8 1.9 7.4 9.9 2.1 3.0 2.2 3.3 3.0 1.7 1.8 4.7 3.9 4.1 4.9 2.0 2.9 0.1 -2.8 0.2 -0.7 1.9 0.4 -0.1 0.2 1.6 0.9 -2.4 2.0 -0.3 -2.2 -2.5 2.2 0.9 -1.5 0.3
Inference failed: type=18, code=24
-> 92.6 12.5 9.1 8.6 7.2 6.7 12.4 5.8 7.3 5.1 5.6 3.6 5.1 4.0 7.2 9.5 10.4 7.8 9.3 3.7 4.9 -0.2 0.0 -3.0 0.7 2.8 0.7 -0.9 1.7 -1.5 -1.2 3.7 -2.6 -6.4 2.9 -1.2 6.8 1.9 -0.7 -1.4
Inference failed: type=18, code=24
-> 99.0 11.4 20.5 7.6 8.9 7.6 8.0 7.0 3.6 8.4 4.9 7.0 4.8 6.7 5.3 6.0 10.8 5.6 7.3 4.8 6.4 2.4 -0.4 -1.0 1.4 0.8 2.4 -0.3 -1.2 2.8 2.6 0.2 -2.0 3.1 1.6 0.7 -1.4 2.0 0.7 1.4
Inference failed: type=18, code=24
-> 33.2 15.9 13.4 11.5 12.9 21.6 16.9 15.5 8.0 13.4 10.9 8.3 9.0 10.6 7.7 6.3 8.2 8.6 11.3 8.1 8.3 5.8 -2.9 -5.5 2.7 -0.7 5.7 -0.7 -1.7 3.1 -1.4 -0.1 -4.4 -0.1 1.0 2.0 0.1 4.2 -3.4 2.9
Inference failed: type=18, code=24
-> 33.8 8.7 15.8 9.6 12.7 19.3 25.8 6.7 11.9 9.1 9.7 6.8 12.2 7.6 5.6 8.5 6.1 7.9 4.7 7.3 6.6 -0.5 -0.1 0.7 2.1 -6.8 -1.3 1.6 -3.5 -2.1 -3.7 -0.1 -2.5 1.2 -0.7 -2.0 -0.4 -0.1 -0.6 -4.1
Inference failed: type=18, code=24
-> 22.1 15.1 10.3 14.3 10.1 11.1 16.1 8.7 8.2 10.7 9.8 6.8 8.8 7.1 4.5 5.8 5.8 5.8 7.4 5.4 5.7 2.1 -1.7 1.5 -1.1 -1.3 1.7 0.9 -1.4 1.2 -0.1 -1.4 0.5 0.9 -1.8 2.7 -0.1 -2.3 1.9 -0.2
Inference failed: type=18, code=24
-> 11.6 6.2 14.8 4.0 11.3 4.4 5.4 5.8 2.0 4.1 3.2 1.5 4.1 3.4 3.4 3.7 3.9 3.0 4.6 1.6 3.7 0.9 0.1 0.0 1.3 -2.2 0.2 1.8 -0.5 -1.0 0.1 0.1 1.1 -0.5 -1.1 -0.2 2.5 -1.7 -0.4 0.3
Inference failed: type=18, code=24
this is the code of the model
def create_model():
model = models.Sequential([
# Input layer with reshape
layers.Reshape((16, 40, 1), input_shape=(16 * 40,)),
# DepthwiseConv2D layer
# was layers.DepthwiseConv2D(kernel_size=(10, 8), depth_multiplier=8, activation='relu', padding='same'),
layers.DepthwiseConv2D(kernel_size=(8, 6), depth_multiplier=2, activation='relu', padding='same'),
# Flattening the output for the fully connected layer
layers.Flatten(),
# Fully connected layer
layers.Dense(2, activation='softmax') # Softmax activation for classification
])
return model
# Save the trained model as TensorFlow Lite with legacy support
tflite_model_path = MODEL_NAME + '_legacy.tflite'
converter = tf.lite.TFLiteConverter.from_keras_model(model)
converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS]
converter.target_spec.experimental_select_user_ops = False
tflite_model = converter.convert()
# Save the TFLite model
with open(tflite_model_path, 'wb') as f:
f.write(tflite_model)
print(f"TensorFlow Lite model saved at {tflite_model_path}")
i was following the approach as suggested in https://www.adrianarotaru.com/files/snoring.pdf with some changes to adapt to very small microcontroller
Any help is greatly appreciated!