Inference error running neural network on STM32WB55

SerFabio89 · ‎2025-01-18

I'm trying to make a snoring detector embedded on stm32wb55 dk board but everytime I encounter an inference unsuccessful :\ to be precise, the returned error looks like that input size or dimension is wrong.

this is the code that I am running

#include "microphone.h"
#include "dbg_trace.h"
#include "arm_math.h"
#include "arm_const_structs.h"

#include "ai_platform.h"
#include "snoring_dataset.h"
#include "snoring_dataset_data.h"

//#define AUDIO_OUT_MS                                     (20)
//#define AUDIO_OUT_SAMPLING_FREQUENCY                    16000

#define FFT_SIZE 512
#define FFT_BINS 256
#define TARGET_BINS 40       // Final bins after averaging
#define FRAME_SIZE 960       // 60 ms at 16 kHz
#define STRIDE_SIZE 480      // 30 ms overlap
#define SEQUENCE_FRAMES 16   // Number of frames for the new model
#define CLASSIFIER_INPUT_SIZE (SEQUENCE_FRAMES * TARGET_BINS) // 640 floats
#define BUFFER_SIZE 32       // Circular buffer size for frames
#define REDUCE_FACTOR 6      // Average 6 bins together
#define CIRCULAR_BUFFER_SIZE SEQUENCE_FRAMES // Circular buffer holds 16 frames

uint16_t PDM_Buffer[((((2 * AUDIO_IN_CHANNELS * AUDIO_IN_SAMPLING_FREQUENCY)
		/ 1000) * MAX_DECIMATION_FACTOR) / 16) * N_MS_PER_INTERRUPT];
uint16_t PCM_Buffer[((AUDIO_IN_SAMPLING_FREQUENCY) / 1000) * N_MS_PER_INTERRUPT];
BSP_AUDIO_Init_t MicParams;

static float32_t pcm_buffer_prev[FRAME_SIZE - STRIDE_SIZE] = { 0 }; // Previous overlap data
static float32_t frame[FRAME_SIZE] = { 0 };                       // 30 ms frame
static float32_t fft_output[FFT_BINS];                           // FFT output

// Circular buffer to store spectrogram frames
static volatile float32_t circularBuffer[CIRCULAR_BUFFER_SIZE][TARGET_BINS] = {
		0 };
static uint16_t bufferIndex = 0; // Index in the circular buffer
static uint16_t framesFilled = 0; // Number of frames currently filled in the buffer

static ai_handle snoring_model = AI_HANDLE_NULL;
AI_ALIGNED(4) static ai_u8 activations[AI_SNORING_DATASET_DATA_ACTIVATIONS_SIZE];
AI_ALIGNED(4) static ai_i8 in_data[AI_SNORING_DATASET_IN_1_SIZE_BYTES];
AI_ALIGNED(4) static ai_i8 out_data[AI_SNORING_DATASET_OUT_1_SIZE_BYTES];
AI_ALIGNED(4) static ai_buffer ai_input[AI_SNORING_DATASET_IN_NUM] = { 0 };
AI_ALIGNED(4) static ai_buffer ai_output[AI_SNORING_DATASET_OUT_NUM] = { 0 };

void classifyAudio();

static void Init_Acquisition_Peripherals(uint32_t AudioFreq, uint32_t ChnlNbrIn,
		uint32_t ChnlNbrOut);
static void Start_Acquisition(void);
static void AudioProcess(void);

void InitAudio(void) {
	Init_Acquisition_Peripherals(AUDIO_IN_SAMPLING_FREQUENCY, AUDIO_IN_CHANNELS,
			0);
	Start_Acquisition();

	ai_network_params ai_params =
					AI_NETWORK_PARAMS_INIT(
							AI_SNORING_DATASET_DATA_WEIGHTS(ai_snoring_dataset_data_weights_get()),
							AI_SNORING_DATASET_DATA_ACTIVATIONS(activations)
					);

	ai_error ai_err = ai_snoring_dataset_create(&snoring_model,
	AI_SNORING_DATASET_DATA_CONFIG);
	if (ai_err.type != AI_ERROR_NONE) {
		APP_DBG_MSG("Error: Model creation failed. Type=%d, Code=%d\r\n",
				ai_err.type, ai_err.code);
		return;
	}

	if (!ai_snoring_dataset_init(snoring_model, &ai_params)) {
		APP_DBG_MSG("Error: Model initialization failed.\r\n");
		return;
	}

	ai_input[0] = (ai_buffer ) { .format = AI_BUFFER_FORMAT_FLOAT, 
		.data =
					AI_HANDLE_PTR(in_data), .meta_info = NULL, .flags =
			AI_FLAG_NONE, .size = CLASSIFIER_INPUT_SIZE, .shape =
			AI_BUFFER_SHAPE_INIT( // Shape: Batch, Width, Height, Channels
					AI_SHAPE_BCWH,
					4,// 4 dimensions in total
					1,// Batch size
					640,// Flattened tensor size
					1,// Height placeholder
					1// Width placeholder
			), };

	ai_output[0] = (ai_buffer ) { .format = AI_BUFFER_FORMAT_FLOAT, .data =
					AI_HANDLE_PTR(out_data), .meta_info = NULL, .flags =
			AI_FLAG_NONE, .size = AI_SNORING_DATASET_OUT_1_SIZE_BYTES
					/ sizeof(float), .shape = AI_BUFFER_SHAPE_INIT( // Shape: Batch, Width, Height, Channels
					AI_SHAPE_BCWH,
					4,// 4 dimensions in total
					1,// Batch size
					2,// Output classes
					1,// Height placeholder
					1// Width placeholder
			), };

	APP_DBG_MSG("==>> INFO: Neural Network model initialized successfully\r\n");

	APP_DBG_MSG("Input Tensor Configuration:\r\n");
	APP_DBG_MSG("  Format: 0x%X\r\n", ai_input[0].format);
	APP_DBG_MSG("  Shape: %u x %u x %u x %u\r\n", ai_input[0].shape.data[0],
			ai_input[0].shape.data[1], ai_input[0].shape.data[2],
			ai_input[0].shape.data[3]);
	APP_DBG_MSG("  Size: %u elements\r\n", ai_input[0].size);

	APP_DBG_MSG("Output Tensor Configuration:\r\n");
	APP_DBG_MSG("  Format: 0x%X\r\n", ai_output[0].format);
	APP_DBG_MSG("  Shape: %u x %u x %u x %u\r\n", ai_output[0].shape.data[0],
			ai_output[0].shape.data[1], ai_output[0].shape.data[2],
			ai_output[0].shape.data[3]);
	APP_DBG_MSG("  Size: %u elements\r\n", ai_output[0].size);

	ai_network_report report;
	if (ai_snoring_dataset_get_info(snoring_model, &report)) {
		// General model information
		APP_DBG_MSG("Model Name: %s\r\n", report.model_name);
		APP_DBG_MSG("Model Signature: %s\r\n", report.model_signature);
		APP_DBG_MSG("Model Date/Time: %s\r\n", report.model_datetime);
		APP_DBG_MSG("Compile Date/Time: %s\r\n", report.compile_datetime);

		// Runtime and tool versions
		APP_DBG_MSG("Runtime Version: %u.%u.%u\r\n",
				report.runtime_version.major, report.runtime_version.minor,
				report.runtime_version.micro);
		APP_DBG_MSG("Tool Version: %u.%u.%u\r\n", report.tool_version.major,
				report.tool_version.minor, report.tool_version.micro);
		APP_DBG_MSG("Tool API Version: %u.%u.%u\r\n",
				report.tool_api_version.major, report.tool_api_version.minor,
				report.tool_api_version.micro);

		// MACC (Multiply-Accumulate Operations) information
		APP_DBG_MSG("Number of MACCs: %" PRIu64 "\r\n", report.n_macc);

		// Inputs
		APP_DBG_MSG("Number of Inputs: %u\r\n", report.n_inputs);
		for (ai_u16 i = 0; i < report.n_inputs; i++) {
			APP_DBG_MSG("  Input %u Shape: ", i);
			for (ai_u32 dim = 0; dim < report.inputs[i].shape.size; dim++) {
				APP_DBG_MSG("%u ", report.inputs[i].shape.data[dim]);
			}
			APP_DBG_MSG("\r\n");
			APP_DBG_MSG("  Input Format: 0x%X\r\n", report.inputs[i].format);
			APP_DBG_MSG("  Input Size: %u elements\r\n", report.inputs[i].size);
		}

		// Outputs
		APP_DBG_MSG("Number of Outputs: %u\r\n", report.n_outputs);
		for (ai_u16 i = 0; i < report.n_outputs; i++) {
			APP_DBG_MSG("  Output %u Shape: ", i);
			for (ai_u32 dim = 0; dim < report.outputs[i].shape.size; dim++) {
				APP_DBG_MSG("%u ", report.outputs[i].shape.data[dim]);
			}
			APP_DBG_MSG("\r\n");
			APP_DBG_MSG("  Output Format: 0x%X\r\n", report.outputs[i].format);
			APP_DBG_MSG("  Output Size: %u elements\r\n",
					report.outputs[i].size);
		}

		// Nodes and signature
		APP_DBG_MSG("Number of Nodes: %u\r\n", report.n_nodes);
		APP_DBG_MSG("Network Signature: 0x%08X\r\n", report.signature);
	} else {
		APP_DBG_MSG("Error querying model info.\r\n");
	}

}

/**
 * @brief  Process PCM data, update circular buffer, and trigger classification.
 * @PAram  None
 * @retval None
 */
static void AudioProcess(void) {
	// Step 1: Convert PDM to PCM
	BSP_AUDIO_IN_PDMToPCM(BSP_AUDIO_INSTANCE, PDM_Buffer, PCM_Buffer);

	// Step 2: Prepare 60 ms frame with overlap
	memcpy(frame, pcm_buffer_prev,
			sizeof(float32_t) * (FRAME_SIZE - STRIDE_SIZE)); // Copy previous overlap
	for (uint32_t i = 0; i < STRIDE_SIZE; i++) {
		frame[FRAME_SIZE - STRIDE_SIZE + i] = (float32_t) PCM_Buffer[i]
				/ 32768.0f; // Normalize PCM
	}
	memcpy(pcm_buffer_prev, &frame[STRIDE_SIZE],
			sizeof(float32_t) * (FRAME_SIZE - STRIDE_SIZE)); // Update overlap

	// Step 3: Apply FFT
	arm_rfft_fast_instance_f32 fft_instance;
	arm_rfft_fast_init_f32(&fft_instance, FFT_BINS);
	arm_rfft_fast_f32(&fft_instance, frame, fft_output, 0);

	// Step 4: Compute FFT magnitudes
	for (uint32_t i = 0; i < FFT_BINS / 2; i++) {
		fft_output[i] = sqrtf(
				fft_output[i * 2] * fft_output[i * 2]
						+ fft_output[i * 2 + 1] * fft_output[i * 2 + 1]);
	}

	// Step 5: Reduce frequency bins by averaging
	float32_t spectrogram[TARGET_BINS];
	for (uint32_t i = 0; i < TARGET_BINS; i++) {
		float32_t bin_sum = 0.0f;
		for (uint32_t j = 0; j < FFT_SIZE / (2 * TARGET_BINS); j++) {
			bin_sum += fft_output[i * REDUCE_FACTOR + j];
		}
		spectrogram[i] = bin_sum / REDUCE_FACTOR;
	}

	// Step 6: Add spectrogram to circular buffer
	memcpy(circularBuffer[bufferIndex], spectrogram,
			sizeof(float32_t) * TARGET_BINS);
	bufferIndex = (bufferIndex + 1) % CIRCULAR_BUFFER_SIZE;

	framesFilled =
			(framesFilled < CIRCULAR_BUFFER_SIZE) ?
					framesFilled + 1 : CIRCULAR_BUFFER_SIZE;
	// Run classification once we have at least 16 frames
	if (framesFilled >= CIRCULAR_BUFFER_SIZE) {
		classifyAudio();
	}

}

/**
 * @brief Run AI inference.
 */
void classifyAudio() {
	ai_i32 nbatch;

	// Flatten circular buffer in correct order
	uint16_t readIndex = bufferIndex; // Start from the oldest frame
	for (uint32_t frame = 0; frame < SEQUENCE_FRAMES; frame++) {
		for (uint32_t bin = 0; bin < TARGET_BINS; bin++) {
			((float*) in_data)[frame * TARGET_BINS + bin] =
					circularBuffer[readIndex][bin];
		}
		readIndex = (readIndex + 1) % CIRCULAR_BUFFER_SIZE; // Move to the next frame
	}

	// Debug flattened input data
	APP_DBG_MSG("-> ");
	for (uint32_t i = 0; i < 40; i++) {
		APP_DBG_MSG("%.1f ", ((float*) in_data)[i]);
	}
	APP_DBG_MSG("\r\n");

	// Assign input buffer to AI
	ai_input[0].data = AI_HANDLE_PTR(in_data);

	// Run inference
	nbatch = ai_snoring_dataset_run(snoring_model, ai_input, ai_output);

	if (nbatch != 1) {
		ai_error err = ai_snoring_dataset_get_error(snoring_model);
		APP_DBG_MSG("Inference failed: type=%d, code=%d\r\n", err.type,
				err.code);
		return;
	}

	// Process results
	float class_prob_0 = ((float*) out_data)[0];
	float class_prob_1 = ((float*) out_data)[1];

	APP_DBG_MSG("==> Classification Results:\r\n");
	APP_DBG_MSG("Class 0 Probability: %.3f\r\n", class_prob_0);
	APP_DBG_MSG("Class 1 Probability: %.3f\r\n", class_prob_1);

	if (class_prob_0 > class_prob_1) {
		APP_DBG_MSG("Predicted Class: 0 (Non-snoring)\r\n");
	} else {
		APP_DBG_MSG("Predicted Class: 1 (Snoring)\r\n");
	}

}

/**
 * @brief  User function that is called when 1 ms of PDM data is available.
 * 		  In this application only PDM to PCM conversion and USB streaming
 *                  is performed.
 * 		  User can add his own code here to perform some DSP or audio analysis.
 * @PAram  none
 * @retval None
 */
static void Init_Acquisition_Peripherals(uint32_t AudioFreq, uint32_t ChnlNbrIn,
		uint32_t ChnlNbrOut) {
	MicParams.BitsPerSample = 16;
	MicParams.ChannelsNbr = ChnlNbrIn;
	MicParams.Device = AUDIO_IN_DIGITAL_MIC;
	MicParams.SampleRate = AudioFreq;
	MicParams.Volume = AUDIO_VOLUME_INPUT;

	if (BSP_AUDIO_IN_Init(BSP_AUDIO_INSTANCE, &MicParams) != BSP_ERROR_NONE) {
		Error_Handler();
	}
}

/**
 * @brief  User function that is called when 1 ms of PDM data is available.
 * 		  In this application only PDM to PCM conversion and USB streaming
 *                  is performed.
 * 		  User can add his own code here to perform some DSP or audio analysis.
 * @PAram  none
 * @retval None
 */
static void Start_Acquisition(void) {
	if (BSP_AUDIO_IN_Record(BSP_AUDIO_INSTANCE, (uint8_t*) PDM_Buffer,
	AUDIO_IN_BUFFER_SIZE) != BSP_ERROR_NONE) {
		Error_Handler();
	}
}

/**
 * @brief  Half Transfer user callback, called by BSP functions.
 * @PAram  None
 * @retval None
 */
void BSP_AUDIO_IN_HalfTransfer_CallBack(uint32_t Instance) {
	AudioProcess();
}

/**
 * @brief  Transfer Complete user callback, called by BSP functions.
 * @PAram  None
 * @retval None
 */
void BSP_AUDIO_IN_TransferComplete_CallBack(uint32_t Instance) {
	AudioProcess();
}

running the above code I received such output

==>> INFO: Neural Network model initialized successfully
Input Tensor Configuration:
Format: 0x9821040
Shape: 1 x 640 x 1 x 1
Size: 640 elements
Output Tensor Configuration:
Format: 0x1821040
Shape: 1 x 2 x 1 x 1
Size: 2 elements
Model Name: snoring_dataset
Model Signature: 0x473bce4868635c036103868a9df2ef26
Model Date/Time: 2025-01-19T00:53:28+0100
Compile Date/Time: Jan 19 2025 00:56:02
Runtime Version: 9.1.0
Tool Version: 1.0.0
Tool API Version: 1.5.0
Number of MACCs: lu
Number of Inputs: 1
Input 0 Shape: 1 640 1 1
Input Format: 0x9821040
Input Size: 640 elements
Number of Outputs: 1
Output 0 Shape: 1 2 1 1
Output Format: 0x9821040
Output Size: 2 elements
Number of Nodes: 4
Network Signature: 0x3E65B9D4
-> 103.7 9.8 9.7 10.5 2.8 1.9 7.4 9.9 2.1 3.0 2.2 3.3 3.0 1.7 1.8 4.7 3.9 4.1 4.9 2.0 2.9 0.1 -2.8 0.2 -0.7 1.9 0.4 -0.1 0.2 1.6 0.9 -2.4 2.0 -0.3 -2.2 -2.5 2.2 0.9 -1.5 0.3
Inference failed: type=18, code=24
-> 92.6 12.5 9.1 8.6 7.2 6.7 12.4 5.8 7.3 5.1 5.6 3.6 5.1 4.0 7.2 9.5 10.4 7.8 9.3 3.7 4.9 -0.2 0.0 -3.0 0.7 2.8 0.7 -0.9 1.7 -1.5 -1.2 3.7 -2.6 -6.4 2.9 -1.2 6.8 1.9 -0.7 -1.4
Inference failed: type=18, code=24
-> 99.0 11.4 20.5 7.6 8.9 7.6 8.0 7.0 3.6 8.4 4.9 7.0 4.8 6.7 5.3 6.0 10.8 5.6 7.3 4.8 6.4 2.4 -0.4 -1.0 1.4 0.8 2.4 -0.3 -1.2 2.8 2.6 0.2 -2.0 3.1 1.6 0.7 -1.4 2.0 0.7 1.4
Inference failed: type=18, code=24
-> 33.2 15.9 13.4 11.5 12.9 21.6 16.9 15.5 8.0 13.4 10.9 8.3 9.0 10.6 7.7 6.3 8.2 8.6 11.3 8.1 8.3 5.8 -2.9 -5.5 2.7 -0.7 5.7 -0.7 -1.7 3.1 -1.4 -0.1 -4.4 -0.1 1.0 2.0 0.1 4.2 -3.4 2.9
Inference failed: type=18, code=24
-> 33.8 8.7 15.8 9.6 12.7 19.3 25.8 6.7 11.9 9.1 9.7 6.8 12.2 7.6 5.6 8.5 6.1 7.9 4.7 7.3 6.6 -0.5 -0.1 0.7 2.1 -6.8 -1.3 1.6 -3.5 -2.1 -3.7 -0.1 -2.5 1.2 -0.7 -2.0 -0.4 -0.1 -0.6 -4.1
Inference failed: type=18, code=24
-> 22.1 15.1 10.3 14.3 10.1 11.1 16.1 8.7 8.2 10.7 9.8 6.8 8.8 7.1 4.5 5.8 5.8 5.8 7.4 5.4 5.7 2.1 -1.7 1.5 -1.1 -1.3 1.7 0.9 -1.4 1.2 -0.1 -1.4 0.5 0.9 -1.8 2.7 -0.1 -2.3 1.9 -0.2
Inference failed: type=18, code=24
-> 11.6 6.2 14.8 4.0 11.3 4.4 5.4 5.8 2.0 4.1 3.2 1.5 4.1 3.4 3.4 3.7 3.9 3.0 4.6 1.6 3.7 0.9 0.1 0.0 1.3 -2.2 0.2 1.8 -0.5 -1.0 0.1 0.1 1.1 -0.5 -1.1 -0.2 2.5 -1.7 -0.4 0.3
Inference failed: type=18, code=24

this is the code of the model

def create_model():
    model = models.Sequential([
        # Input layer with reshape
        layers.Reshape((16, 40, 1), input_shape=(16 * 40,)),

        # DepthwiseConv2D layer
        # was layers.DepthwiseConv2D(kernel_size=(10, 8), depth_multiplier=8, activation='relu', padding='same'),
        layers.DepthwiseConv2D(kernel_size=(8, 6), depth_multiplier=2, activation='relu', padding='same'),

        # Flattening the output for the fully connected layer
        layers.Flatten(),

        # Fully connected layer
        layers.Dense(2, activation='softmax')  # Softmax activation for classification
    ])
    return model

# Save the trained model as TensorFlow Lite with legacy support
tflite_model_path = MODEL_NAME + '_legacy.tflite'

converter = tf.lite.TFLiteConverter.from_keras_model(model)
converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS]
converter.target_spec.experimental_select_user_ops = False
tflite_model = converter.convert()

# Save the TFLite model
with open(tflite_model_path, 'wb') as f:
    f.write(tflite_model)

print(f"TensorFlow Lite model saved at {tflite_model_path}")

i was following the approach as suggested in https://www.adrianarotaru.com/files/snoring.pdf with some changes to adapt to very small microcontroller

Any help is greatly appreciated!

jean-michel.d · ‎2025-01-21

Hi SerBio89,

I have review quickly the code and I don't see evident issue which explains the returned error (type=18 & error=24) indicating an INVALID_INPUT description due to a INVALID_SIZE.

Before to investigate more, can you change a little bit your code to avoid a potential misalignment to the input/output descriptors ( ai_buffer ai_input[]/ai_output[]).

/* Replace */
AI_ALIGNED(4) static ai_buffer ai_input[AI_SNORING_DATASET_IN_NUM] = { 0 };
AI_ALIGNED(4) static ai_buffer ai_output[AI_SNORING_DATASET_OUT_NUM] = { 0 };

/* by */
static ai_buffer *ai_input;
static ai_buffer *ai_output;

and

/* Replace */
ai_input[0] = (ai_buffer ) { ... };
ai_output[0] = (ai_buffer ) { ... };

/ by */
ai_input = ai_snoring_dataset_inputs_get(snoring_model, NULL);
ai_output = ai_snoring_dataset_outputs_get(snoring_model, NULL);

After before to call the inference you can set the @ of the input/output buffers

  ai_input[0].data = AI_HANDLE_PTR(in_data);
  ai_output[0].data = AI_HANDLE_PTR(out_data);

	// Run inference
	nbatch = ai_snoring_dataset_run(snoring_model, ai_input, ai_output);

br,

JM

View solution in original post

jean-michel.d · ‎2025-01-21

Hi SerBio89,

I have review quickly the code and I don't see evident issue which explains the returned error (type=18 & error=24) indicating an INVALID_INPUT description due to a INVALID_SIZE.

Before to investigate more, can you change a little bit your code to avoid a potential misalignment to the input/output descriptors ( ai_buffer ai_input[]/ai_output[]).

/* Replace */
AI_ALIGNED(4) static ai_buffer ai_input[AI_SNORING_DATASET_IN_NUM] = { 0 };
AI_ALIGNED(4) static ai_buffer ai_output[AI_SNORING_DATASET_OUT_NUM] = { 0 };

/* by */
static ai_buffer *ai_input;
static ai_buffer *ai_output;

and

/* Replace */
ai_input[0] = (ai_buffer ) { ... };
ai_output[0] = (ai_buffer ) { ... };

/ by */
ai_input = ai_snoring_dataset_inputs_get(snoring_model, NULL);
ai_output = ai_snoring_dataset_outputs_get(snoring_model, NULL);

After before to call the inference you can set the @ of the input/output buffers

  ai_input[0].data = AI_HANDLE_PTR(in_data);
  ai_output[0].data = AI_HANDLE_PTR(out_data);

	// Run inference
	nbatch = ai_snoring_dataset_run(snoring_model, ai_input, ai_output);

br,

JM

SerFabio89 · ‎2025-01-21

Thanks, it now works!

Nest step... understanding why FFT done with CMSIS is different from the one I get on Python during training.

Any advice on this?

Julian E. · ‎2025-01-22

Hello @SerFabio89 ,

This may be useful:

arm_rfft_f32 result is different with python fft result. · Issue #216 · ARM-software/CMSIS-DSP

Have a good day,

Julian

In order to give better visibility on the answered topics, please click on 'Accept as Solution' on the reply which solved your issue or answered your question.

SerFabio89 · ‎2025-01-22

Man, I really appreaciate it! Results are now amazing ))

Kudos!

Julian E. · ‎2025-01-22

Nice!

In order to give better visibility on the answered topics, please click on 'Accept as Solution' on the reply which solved your issue or answered your question.