cancel
Showing results for 
Search instead for 
Did you mean: 

Inference error running neural network on STM32WB55

SerFabio89
Visitor

I'm trying to make a snoring detector embedded on stm32wb55 dk board but everytime I encounter an inference unsuccessful :\ to be precise, the returned error looks like that input size or dimension is wrong.

this is the code that I am running

 

#include "microphone.h"
#include "dbg_trace.h"
#include "arm_math.h"
#include "arm_const_structs.h"

#include "ai_platform.h"
#include "snoring_dataset.h"
#include "snoring_dataset_data.h"

//#define AUDIO_OUT_MS                                     (20)
//#define AUDIO_OUT_SAMPLING_FREQUENCY                    16000

#define FFT_SIZE 512
#define FFT_BINS 256
#define TARGET_BINS 40       // Final bins after averaging
#define FRAME_SIZE 960       // 60 ms at 16 kHz
#define STRIDE_SIZE 480      // 30 ms overlap
#define SEQUENCE_FRAMES 16   // Number of frames for the new model
#define CLASSIFIER_INPUT_SIZE (SEQUENCE_FRAMES * TARGET_BINS) // 640 floats
#define BUFFER_SIZE 32       // Circular buffer size for frames
#define REDUCE_FACTOR 6      // Average 6 bins together
#define CIRCULAR_BUFFER_SIZE SEQUENCE_FRAMES // Circular buffer holds 16 frames

uint16_t PDM_Buffer[((((2 * AUDIO_IN_CHANNELS * AUDIO_IN_SAMPLING_FREQUENCY)
		/ 1000) * MAX_DECIMATION_FACTOR) / 16) * N_MS_PER_INTERRUPT];
uint16_t PCM_Buffer[((AUDIO_IN_SAMPLING_FREQUENCY) / 1000) * N_MS_PER_INTERRUPT];
BSP_AUDIO_Init_t MicParams;

static float32_t pcm_buffer_prev[FRAME_SIZE - STRIDE_SIZE] = { 0 }; // Previous overlap data
static float32_t frame[FRAME_SIZE] = { 0 };                       // 30 ms frame
static float32_t fft_output[FFT_BINS];                           // FFT output

// Circular buffer to store spectrogram frames
static volatile float32_t circularBuffer[CIRCULAR_BUFFER_SIZE][TARGET_BINS] = {
		0 };
static uint16_t bufferIndex = 0; // Index in the circular buffer
static uint16_t framesFilled = 0; // Number of frames currently filled in the buffer

static ai_handle snoring_model = AI_HANDLE_NULL;
AI_ALIGNED(4) static ai_u8 activations[AI_SNORING_DATASET_DATA_ACTIVATIONS_SIZE];
AI_ALIGNED(4) static ai_i8 in_data[AI_SNORING_DATASET_IN_1_SIZE_BYTES];
AI_ALIGNED(4) static ai_i8 out_data[AI_SNORING_DATASET_OUT_1_SIZE_BYTES];
AI_ALIGNED(4) static ai_buffer ai_input[AI_SNORING_DATASET_IN_NUM] = { 0 };
AI_ALIGNED(4) static ai_buffer ai_output[AI_SNORING_DATASET_OUT_NUM] = { 0 };

void classifyAudio();

static void Init_Acquisition_Peripherals(uint32_t AudioFreq, uint32_t ChnlNbrIn,
		uint32_t ChnlNbrOut);
static void Start_Acquisition(void);
static void AudioProcess(void);

void InitAudio(void) {
	Init_Acquisition_Peripherals(AUDIO_IN_SAMPLING_FREQUENCY, AUDIO_IN_CHANNELS,
			0);
	Start_Acquisition();

	ai_network_params ai_params =
					AI_NETWORK_PARAMS_INIT(
							AI_SNORING_DATASET_DATA_WEIGHTS(ai_snoring_dataset_data_weights_get()),
							AI_SNORING_DATASET_DATA_ACTIVATIONS(activations)
					);

	ai_error ai_err = ai_snoring_dataset_create(&snoring_model,
	AI_SNORING_DATASET_DATA_CONFIG);
	if (ai_err.type != AI_ERROR_NONE) {
		APP_DBG_MSG("Error: Model creation failed. Type=%d, Code=%d\r\n",
				ai_err.type, ai_err.code);
		return;
	}

	if (!ai_snoring_dataset_init(snoring_model, &ai_params)) {
		APP_DBG_MSG("Error: Model initialization failed.\r\n");
		return;
	}

	ai_input[0] = (ai_buffer ) { .format = AI_BUFFER_FORMAT_FLOAT, 
		.data =
					AI_HANDLE_PTR(in_data), .meta_info = NULL, .flags =
			AI_FLAG_NONE, .size = CLASSIFIER_INPUT_SIZE, .shape =
			AI_BUFFER_SHAPE_INIT( // Shape: Batch, Width, Height, Channels
					AI_SHAPE_BCWH,
					4,// 4 dimensions in total
					1,// Batch size
					640,// Flattened tensor size
					1,// Height placeholder
					1// Width placeholder
			), };

	ai_output[0] = (ai_buffer ) { .format = AI_BUFFER_FORMAT_FLOAT, .data =
					AI_HANDLE_PTR(out_data), .meta_info = NULL, .flags =
			AI_FLAG_NONE, .size = AI_SNORING_DATASET_OUT_1_SIZE_BYTES
					/ sizeof(float), .shape = AI_BUFFER_SHAPE_INIT( // Shape: Batch, Width, Height, Channels
					AI_SHAPE_BCWH,
					4,// 4 dimensions in total
					1,// Batch size
					2,// Output classes
					1,// Height placeholder
					1// Width placeholder
			), };

	APP_DBG_MSG("==>> INFO: Neural Network model initialized successfully\r\n");

	APP_DBG_MSG("Input Tensor Configuration:\r\n");
	APP_DBG_MSG("  Format: 0x%X\r\n", ai_input[0].format);
	APP_DBG_MSG("  Shape: %u x %u x %u x %u\r\n", ai_input[0].shape.data[0],
			ai_input[0].shape.data[1], ai_input[0].shape.data[2],
			ai_input[0].shape.data[3]);
	APP_DBG_MSG("  Size: %u elements\r\n", ai_input[0].size);

	APP_DBG_MSG("Output Tensor Configuration:\r\n");
	APP_DBG_MSG("  Format: 0x%X\r\n", ai_output[0].format);
	APP_DBG_MSG("  Shape: %u x %u x %u x %u\r\n", ai_output[0].shape.data[0],
			ai_output[0].shape.data[1], ai_output[0].shape.data[2],
			ai_output[0].shape.data[3]);
	APP_DBG_MSG("  Size: %u elements\r\n", ai_output[0].size);

	ai_network_report report;
	if (ai_snoring_dataset_get_info(snoring_model, &report)) {
		// General model information
		APP_DBG_MSG("Model Name: %s\r\n", report.model_name);
		APP_DBG_MSG("Model Signature: %s\r\n", report.model_signature);
		APP_DBG_MSG("Model Date/Time: %s\r\n", report.model_datetime);
		APP_DBG_MSG("Compile Date/Time: %s\r\n", report.compile_datetime);

		// Runtime and tool versions
		APP_DBG_MSG("Runtime Version: %u.%u.%u\r\n",
				report.runtime_version.major, report.runtime_version.minor,
				report.runtime_version.micro);
		APP_DBG_MSG("Tool Version: %u.%u.%u\r\n", report.tool_version.major,
				report.tool_version.minor, report.tool_version.micro);
		APP_DBG_MSG("Tool API Version: %u.%u.%u\r\n",
				report.tool_api_version.major, report.tool_api_version.minor,
				report.tool_api_version.micro);

		// MACC (Multiply-Accumulate Operations) information
		APP_DBG_MSG("Number of MACCs: %" PRIu64 "\r\n", report.n_macc);

		// Inputs
		APP_DBG_MSG("Number of Inputs: %u\r\n", report.n_inputs);
		for (ai_u16 i = 0; i < report.n_inputs; i++) {
			APP_DBG_MSG("  Input %u Shape: ", i);
			for (ai_u32 dim = 0; dim < report.inputs[i].shape.size; dim++) {
				APP_DBG_MSG("%u ", report.inputs[i].shape.data[dim]);
			}
			APP_DBG_MSG("\r\n");
			APP_DBG_MSG("  Input Format: 0x%X\r\n", report.inputs[i].format);
			APP_DBG_MSG("  Input Size: %u elements\r\n", report.inputs[i].size);
		}

		// Outputs
		APP_DBG_MSG("Number of Outputs: %u\r\n", report.n_outputs);
		for (ai_u16 i = 0; i < report.n_outputs; i++) {
			APP_DBG_MSG("  Output %u Shape: ", i);
			for (ai_u32 dim = 0; dim < report.outputs[i].shape.size; dim++) {
				APP_DBG_MSG("%u ", report.outputs[i].shape.data[dim]);
			}
			APP_DBG_MSG("\r\n");
			APP_DBG_MSG("  Output Format: 0x%X\r\n", report.outputs[i].format);
			APP_DBG_MSG("  Output Size: %u elements\r\n",
					report.outputs[i].size);
		}

		// Nodes and signature
		APP_DBG_MSG("Number of Nodes: %u\r\n", report.n_nodes);
		APP_DBG_MSG("Network Signature: 0x%08X\r\n", report.signature);
	} else {
		APP_DBG_MSG("Error querying model info.\r\n");
	}

}

/**
 * @brief  Process PCM data, update circular buffer, and trigger classification.
 * @PAram  None
 * @retval None
 */
static void AudioProcess(void) {
	// Step 1: Convert PDM to PCM
	BSP_AUDIO_IN_PDMToPCM(BSP_AUDIO_INSTANCE, PDM_Buffer, PCM_Buffer);

	// Step 2: Prepare 60 ms frame with overlap
	memcpy(frame, pcm_buffer_prev,
			sizeof(float32_t) * (FRAME_SIZE - STRIDE_SIZE)); // Copy previous overlap
	for (uint32_t i = 0; i < STRIDE_SIZE; i++) {
		frame[FRAME_SIZE - STRIDE_SIZE + i] = (float32_t) PCM_Buffer[i]
				/ 32768.0f; // Normalize PCM
	}
	memcpy(pcm_buffer_prev, &frame[STRIDE_SIZE],
			sizeof(float32_t) * (FRAME_SIZE - STRIDE_SIZE)); // Update overlap

	// Step 3: Apply FFT
	arm_rfft_fast_instance_f32 fft_instance;
	arm_rfft_fast_init_f32(&fft_instance, FFT_BINS);
	arm_rfft_fast_f32(&fft_instance, frame, fft_output, 0);

	// Step 4: Compute FFT magnitudes
	for (uint32_t i = 0; i < FFT_BINS / 2; i++) {
		fft_output[i] = sqrtf(
				fft_output[i * 2] * fft_output[i * 2]
						+ fft_output[i * 2 + 1] * fft_output[i * 2 + 1]);
	}

	// Step 5: Reduce frequency bins by averaging
	float32_t spectrogram[TARGET_BINS];
	for (uint32_t i = 0; i < TARGET_BINS; i++) {
		float32_t bin_sum = 0.0f;
		for (uint32_t j = 0; j < FFT_SIZE / (2 * TARGET_BINS); j++) {
			bin_sum += fft_output[i * REDUCE_FACTOR + j];
		}
		spectrogram[i] = bin_sum / REDUCE_FACTOR;
	}

	// Step 6: Add spectrogram to circular buffer
	memcpy(circularBuffer[bufferIndex], spectrogram,
			sizeof(float32_t) * TARGET_BINS);
	bufferIndex = (bufferIndex + 1) % CIRCULAR_BUFFER_SIZE;

	framesFilled =
			(framesFilled < CIRCULAR_BUFFER_SIZE) ?
					framesFilled + 1 : CIRCULAR_BUFFER_SIZE;
	// Run classification once we have at least 16 frames
	if (framesFilled >= CIRCULAR_BUFFER_SIZE) {
		classifyAudio();
	}

}

/**
 * @brief Run AI inference.
 */
void classifyAudio() {
	ai_i32 nbatch;

	// Flatten circular buffer in correct order
	uint16_t readIndex = bufferIndex; // Start from the oldest frame
	for (uint32_t frame = 0; frame < SEQUENCE_FRAMES; frame++) {
		for (uint32_t bin = 0; bin < TARGET_BINS; bin++) {
			((float*) in_data)[frame * TARGET_BINS + bin] =
					circularBuffer[readIndex][bin];
		}
		readIndex = (readIndex + 1) % CIRCULAR_BUFFER_SIZE; // Move to the next frame
	}

	// Debug flattened input data
	APP_DBG_MSG("-> ");
	for (uint32_t i = 0; i < 40; i++) {
		APP_DBG_MSG("%.1f ", ((float*) in_data)[i]);
	}
	APP_DBG_MSG("\r\n");

	// Assign input buffer to AI
	ai_input[0].data = AI_HANDLE_PTR(in_data);

	// Run inference
	nbatch = ai_snoring_dataset_run(snoring_model, ai_input, ai_output);

	if (nbatch != 1) {
		ai_error err = ai_snoring_dataset_get_error(snoring_model);
		APP_DBG_MSG("Inference failed: type=%d, code=%d\r\n", err.type,
				err.code);
		return;
	}

	// Process results
	float class_prob_0 = ((float*) out_data)[0];
	float class_prob_1 = ((float*) out_data)[1];

	APP_DBG_MSG("==> Classification Results:\r\n");
	APP_DBG_MSG("Class 0 Probability: %.3f\r\n", class_prob_0);
	APP_DBG_MSG("Class 1 Probability: %.3f\r\n", class_prob_1);

	if (class_prob_0 > class_prob_1) {
		APP_DBG_MSG("Predicted Class: 0 (Non-snoring)\r\n");
	} else {
		APP_DBG_MSG("Predicted Class: 1 (Snoring)\r\n");
	}

}

/**
 * @brief  User function that is called when 1 ms of PDM data is available.
 * 		  In this application only PDM to PCM conversion and USB streaming
 *                  is performed.
 * 		  User can add his own code here to perform some DSP or audio analysis.
 * @PAram  none
 * @retval None
 */
static void Init_Acquisition_Peripherals(uint32_t AudioFreq, uint32_t ChnlNbrIn,
		uint32_t ChnlNbrOut) {
	MicParams.BitsPerSample = 16;
	MicParams.ChannelsNbr = ChnlNbrIn;
	MicParams.Device = AUDIO_IN_DIGITAL_MIC;
	MicParams.SampleRate = AudioFreq;
	MicParams.Volume = AUDIO_VOLUME_INPUT;

	if (BSP_AUDIO_IN_Init(BSP_AUDIO_INSTANCE, &MicParams) != BSP_ERROR_NONE) {
		Error_Handler();
	}
}

/**
 * @brief  User function that is called when 1 ms of PDM data is available.
 * 		  In this application only PDM to PCM conversion and USB streaming
 *                  is performed.
 * 		  User can add his own code here to perform some DSP or audio analysis.
 * @PAram  none
 * @retval None
 */
static void Start_Acquisition(void) {
	if (BSP_AUDIO_IN_Record(BSP_AUDIO_INSTANCE, (uint8_t*) PDM_Buffer,
	AUDIO_IN_BUFFER_SIZE) != BSP_ERROR_NONE) {
		Error_Handler();
	}
}

/**
 * @brief  Half Transfer user callback, called by BSP functions.
 * @PAram  None
 * @retval None
 */
void BSP_AUDIO_IN_HalfTransfer_CallBack(uint32_t Instance) {
	AudioProcess();
}

/**
 * @brief  Transfer Complete user callback, called by BSP functions.
 * @PAram  None
 * @retval None
 */
void BSP_AUDIO_IN_TransferComplete_CallBack(uint32_t Instance) {
	AudioProcess();
}

 

 running the above code I received such output


==>> INFO: Neural Network model initialized successfully
Input Tensor Configuration:
Format: 0x9821040
Shape: 1 x 640 x 1 x 1
Size: 640 elements
Output Tensor Configuration:
Format: 0x1821040
Shape: 1 x 2 x 1 x 1
Size: 2 elements
Model Name: snoring_dataset
Model Signature: 0x473bce4868635c036103868a9df2ef26
Model Date/Time: 2025-01-19T00:53:28+0100
Compile Date/Time: Jan 19 2025 00:56:02
Runtime Version: 9.1.0
Tool Version: 1.0.0
Tool API Version: 1.5.0
Number of MACCs: lu
Number of Inputs: 1
Input 0 Shape: 1 640 1 1
Input Format: 0x9821040
Input Size: 640 elements
Number of Outputs: 1
Output 0 Shape: 1 2 1 1
Output Format: 0x9821040
Output Size: 2 elements
Number of Nodes: 4
Network Signature: 0x3E65B9D4
-> 103.7 9.8 9.7 10.5 2.8 1.9 7.4 9.9 2.1 3.0 2.2 3.3 3.0 1.7 1.8 4.7 3.9 4.1 4.9 2.0 2.9 0.1 -2.8 0.2 -0.7 1.9 0.4 -0.1 0.2 1.6 0.9 -2.4 2.0 -0.3 -2.2 -2.5 2.2 0.9 -1.5 0.3
Inference failed: type=18, code=24
-> 92.6 12.5 9.1 8.6 7.2 6.7 12.4 5.8 7.3 5.1 5.6 3.6 5.1 4.0 7.2 9.5 10.4 7.8 9.3 3.7 4.9 -0.2 0.0 -3.0 0.7 2.8 0.7 -0.9 1.7 -1.5 -1.2 3.7 -2.6 -6.4 2.9 -1.2 6.8 1.9 -0.7 -1.4
Inference failed: type=18, code=24
-> 99.0 11.4 20.5 7.6 8.9 7.6 8.0 7.0 3.6 8.4 4.9 7.0 4.8 6.7 5.3 6.0 10.8 5.6 7.3 4.8 6.4 2.4 -0.4 -1.0 1.4 0.8 2.4 -0.3 -1.2 2.8 2.6 0.2 -2.0 3.1 1.6 0.7 -1.4 2.0 0.7 1.4
Inference failed: type=18, code=24
-> 33.2 15.9 13.4 11.5 12.9 21.6 16.9 15.5 8.0 13.4 10.9 8.3 9.0 10.6 7.7 6.3 8.2 8.6 11.3 8.1 8.3 5.8 -2.9 -5.5 2.7 -0.7 5.7 -0.7 -1.7 3.1 -1.4 -0.1 -4.4 -0.1 1.0 2.0 0.1 4.2 -3.4 2.9
Inference failed: type=18, code=24
-> 33.8 8.7 15.8 9.6 12.7 19.3 25.8 6.7 11.9 9.1 9.7 6.8 12.2 7.6 5.6 8.5 6.1 7.9 4.7 7.3 6.6 -0.5 -0.1 0.7 2.1 -6.8 -1.3 1.6 -3.5 -2.1 -3.7 -0.1 -2.5 1.2 -0.7 -2.0 -0.4 -0.1 -0.6 -4.1
Inference failed: type=18, code=24
-> 22.1 15.1 10.3 14.3 10.1 11.1 16.1 8.7 8.2 10.7 9.8 6.8 8.8 7.1 4.5 5.8 5.8 5.8 7.4 5.4 5.7 2.1 -1.7 1.5 -1.1 -1.3 1.7 0.9 -1.4 1.2 -0.1 -1.4 0.5 0.9 -1.8 2.7 -0.1 -2.3 1.9 -0.2
Inference failed: type=18, code=24
-> 11.6 6.2 14.8 4.0 11.3 4.4 5.4 5.8 2.0 4.1 3.2 1.5 4.1 3.4 3.4 3.7 3.9 3.0 4.6 1.6 3.7 0.9 0.1 0.0 1.3 -2.2 0.2 1.8 -0.5 -1.0 0.1 0.1 1.1 -0.5 -1.1 -0.2 2.5 -1.7 -0.4 0.3
Inference failed: type=18, code=24

 

this is the code of the model

 

def create_model():
    model = models.Sequential([
        # Input layer with reshape
        layers.Reshape((16, 40, 1), input_shape=(16 * 40,)),

        # DepthwiseConv2D layer
        # was layers.DepthwiseConv2D(kernel_size=(10, 8), depth_multiplier=8, activation='relu', padding='same'),
        layers.DepthwiseConv2D(kernel_size=(8, 6), depth_multiplier=2, activation='relu', padding='same'),

        # Flattening the output for the fully connected layer
        layers.Flatten(),

        # Fully connected layer
        layers.Dense(2, activation='softmax')  # Softmax activation for classification
    ])
    return model

# Save the trained model as TensorFlow Lite with legacy support
tflite_model_path = MODEL_NAME + '_legacy.tflite'

converter = tf.lite.TFLiteConverter.from_keras_model(model)
converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS]
converter.target_spec.experimental_select_user_ops = False
tflite_model = converter.convert()

# Save the TFLite model
with open(tflite_model_path, 'wb') as f:
    f.write(tflite_model)

print(f"TensorFlow Lite model saved at {tflite_model_path}")

 


i was following the approach as suggested in https://www.adrianarotaru.com/files/snoring.pdf with some changes to adapt to very small microcontroller

 

 

Any help is greatly appreciated!

0 REPLIES 0