2025-10-10 11:19 PM
Solved! Go to Solution.
2025-10-30 3:53 AM
using the saved_model.pb and this quantize script:
import tensorflow as tf
import numpy as np
def representative_dataset():
    for _ in range(10):
      data = np.random.rand(1, 640, 640, 3)
      yield [data.astype(np.float32)]
# Convert the model
converter = tf.lite.TFLiteConverter.from_saved_model("./saved_model")
converter.optimizations = [tf.lite.Optimize.DEFAULT]
converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]
converter.inference_input_type = tf.uint8  # or tf.int8
converter.inference_output_type = tf.float32  # or tf.int8
converter.representative_dataset = representative_dataset
converter._experimental_disable_per_channel = True
tflite_model = converter.convert()
# Save the model
with open("model.tflite", 'wb') as f:
    f.write(tflite_model)then passing the edge ai -> model.nb
and testing on the board with this script:
from stai_mpu import stai_mpu_network
from numpy.typing import NDArray
from typing import Any, List
from pathlib import Path
from PIL import Image
from argparse import ArgumentParser
from timeit import default_timer as timer
import cv2 as cv
import numpy as np
import time
def intersection(rect1, rect2):
    """
    This method return the intersection of two rectangles
    """
    rect1_x1,rect1_y1,rect1_x2,rect1_y2 = rect1[:4]
    rect2_x1,rect2_y1,rect2_x2,rect2_y2 = rect2[:4]
    x1 = max(rect1_x1,rect2_x1)
    y1 = max(rect1_y1,rect2_y1)
    x2 = min(rect1_x2,rect2_x2)
    y2 = min(rect1_y2,rect2_y2)
    return (x2-x1)*(y2-y1)
def union(rect1,rect2):
    """
    This method return the union of two rectangles
    """
    rect1_x1,rect1_y1,rect1_x2,rect1_y2 = rect1[:4]
    rect2_x1,rect2_y1,rect2_x2,rect2_y2 = rect2[:4]
    rect1_area = (rect1_x2-rect1_x1)*(rect1_y2-rect1_y1)
    rect2_area = (rect2_x2-rect2_x1)*(rect2_y2-rect2_y1)
    return rect1_area + rect2_area - intersection(rect1,rect2)
def iou(rect1,rect2):
    """
    This method compute IoU
    """
    return intersection(rect1,rect2)/union(rect1,rect2)
def get_results(stai_mpu_model, threshold, iou_threshold):
    # Lists to hold respective values while unwrapping.
    base_objects_list = []
    final_dets = []
    # Output (0-4: box coordinates, 5-84: COCO classes confidence)
    output = stai_mpu_model.get_output(index=0)
    output = np.transpose(np.squeeze(output))
    #output = np.squeeze(output)
    print("output shape: ", output.shape)
    # Split output -> [0..3]: box coordinates, [5]: confidence level
    confidence_level = output[:, 4:]  # Shape: (1344, 1)
    print("confidence shape: ", confidence_level.shape)
    print(np.max(confidence_level, axis=0))
    print(np.max(confidence_level, axis=1))
    indices = np.where(confidence_level > threshold)[0]
    print(indices)
    filtered_output = output[indices]
    print(filtered_output.shape)
    for i in range(filtered_output.shape[0]):
        x_center, y_center, width, height = filtered_output[i][:4]
        left = (x_center - width/2)
        top = (y_center - height/2)
        right = (x_center + width/2)
        bottom = (y_center + height/2)
        score = np.max(filtered_output[i][4:]) # filtered_output[i][4]
        class_id = 0
        base_objects_list.append([left, top, right, bottom, score, class_id])
    # Do NMS
    base_objects_list.sort(key=lambda x: x[4], reverse=True)
    while len(base_objects_list)>0:
        final_dets.append(base_objects_list[0])
        base_objects_list = [objects for objects in base_objects_list if iou(objects,base_objects_list[0]) < iou_threshold]
    return final_dets
def load_labels(filename):
    with open(filename, 'r') as f:
        return [line.strip() for line in f.readlines()]
if __name__ == '__main__':
    parser = ArgumentParser()
    parser.add_argument('-i','--image', help='image to be classified.')
    parser.add_argument('-m','--model_file',help='model to be executed.')
    parser.add_argument('-l','--label_file', help='name of labels file.')
    parser.add_argument('--input_mean', default=127.5, help='input_mean')
    parser.add_argument('--input_std', default=127.5,help='input stddev')
    args = parser.parse_args()
    stai_model = stai_mpu_network(model_path=args.model_file, use_hw_acceleration=True)
    # Read input tensor information
    num_inputs = stai_model.get_num_inputs()
    input_tensor_infos = stai_model.get_input_infos()
    for i in range(0, num_inputs):
        input_tensor_shape = input_tensor_infos[i].get_shape()
        input_tensor_name = input_tensor_infos[i].get_name()
        input_tensor_rank = input_tensor_infos[i].get_rank()
        input_tensor_dtype = input_tensor_infos[i].get_dtype()
        print("**Input node: {} -Input_name:{} -Input_dims:{} - input_type:{} -Input_shape:{}".format(i, input_tensor_name,
                                                                                                    input_tensor_rank,
                                                                                                    input_tensor_dtype,
                                                                                                    input_tensor_shape))
        if input_tensor_infos[i].get_qtype() == "staticAffine":
            # Reading the input scale and zero point variables
            input_tensor_scale = input_tensor_infos[i].get_scale()
            input_tensor_zp = input_tensor_infos[i].get_zero_point()
        if input_tensor_infos[i].get_qtype() == "dynamicFixedPoint":
            # Reading the dynamic fixed point position
            input_tensor_dfp_pos = input_tensor_infos[i].get_fixed_point_pos()
    # Read output tensor information
    num_outputs = stai_model.get_num_outputs()
    output_tensor_infos = stai_model.get_output_infos()
    for i in range(0, num_outputs):
        output_tensor_shape = output_tensor_infos[i].get_shape()
        output_tensor_name = output_tensor_infos[i].get_name()
        output_tensor_rank = output_tensor_infos[i].get_rank()
        output_tensor_dtype = output_tensor_infos[i].get_dtype()
        print("**Output node: {} -Output_name:{} -Output_dims:{} -  Output_type:{} -Output_shape:{}".format(i, output_tensor_name,
                                                                                                        output_tensor_rank,
                                                                                                        output_tensor_dtype,
                                                                                                        output_tensor_shape))
        if output_tensor_infos[i].get_qtype() == "staticAffine":
            # Reading the output scale and zero point variables
            output_tensor_scale = output_tensor_infos[i].get_scale()
            output_tensor_zp = output_tensor_infos[i].get_zero_point()
        if output_tensor_infos[i].get_qtype() == "dynamicFixedPoint":
            # Reading the dynamic fixed point position
            output_tensor_dfp_pos = output_tensor_infos[i].get_fixed_point_pos()
    # Reading input image
    input_width = input_tensor_shape[1]
    print(input_width)
    input_height = input_tensor_shape[2]
    print(input_height)
    input_image = Image.open(args.image).resize((input_width,input_height))
    input_data = np.expand_dims(input_image, axis=0)
    if input_tensor_dtype == np.float32:
        input_data = (np.float32(input_data) - args.input_mean) /args.input_std
    print("----1")
    img_array_after = np.array(input_data)
    print("Dtype after resize: ", img_array_after.dtype)                                                                                                    
    print("Shape after resize: ", img_array_after.shape)
    print("----1test")
    if input_tensor_dtype == np.float32:
         print("float32")
    if input_tensor_dtype == np.float16:
         print("float16")
         #input_data = (np.float32(input_data) - args.input_mean) /args.input_std
         input_data = np.float32(input_data)
    print("----2test")
    stai_model.set_input(0, input_data)
    print("----2")
    start = timer()
    stai_model.run()
    end = timer()
    print("Inference time: ", (end - start) *1000, "ms")
    final_dets = get_results(stai_model, 0.5, 0.5)
    print(final_dets)
 
we get this:
Loading dynamically: /usr/lib/libstai_mpu_ovx.so.6
[OVX]: Loading nbg model
**Input node: 0 -Input_name: -Input_dims:4 - input_type:uint8 -Input_shape:(1, 640, 640, 3)
**Output node: 0 -Output_name: -Output_dims:3 -  Output_type:float16 -Output_shape:(1, 11, 8400)
640
640
----1
Dtype after resize:  uint8
Shape after resize:  (1, 640, 640, 3)
----1test
----2test
----2
Inference time:  118.66035591810942 ms
output shape:  (8400, 11)
confidence shape:  (8400, 7)
[0.75878906 0.01785278 0.54003906 0.         0.         0.0044632
0.        ]
[0. 0. 0. ... 0. 0. 0.]
[8250 8251 8252 8270 8272 8291]
(6, 11)
[[0.3995361328125, 0.265625, 0.7967529296875, 1.001953125, 0.75878906, 0]]
 
Have a good day,
Julian
2025-10-13 12:44 AM - edited 2025-10-13 12:44 AM
Hello @fanronghua0123456 ,
There are a few common reasons for this symptom:
Could you look at these possible causes and let me know if any of this help?
Have a good day,
Julian
2025-10-13 1:06 AM
hello .@Julian E.
You're not wrong. I will refer to the following two links:
1.
https://github.com/stm32-hotspot/ultralytics/tree/main/examples/YOLOv8-STEdgeAI/stedgeai_models
2.
But from what I see, both of these articles talk about quantizing .tflite to .nb format, and neither mentions quantizing .pt to .nb format. Do I have to first convert the .pt file to .tflite format, and then convert it to .nb format?
2025-10-13 4:46 AM - edited 2025-10-13 4:46 AM
Hello @fanronghua0123456,
You issue seems similar to this one:
Issues converting YOLOv8 ONNX model to uint8 .nb f... - STMicroelectronics Community
The issue comes from the fact that the model was never quantized before conversion to the STM32 Edge AI format (.nb).
STEdgeAI does not perform quantization, it only converts and optimizes an already quantized model into a format that can run efficiently on the STM32 hardware.
Here’s the correct process to follow:
./stedgeai generate -m path/to/quantized_model --target stm32mp25
This step converts the already quantized model to the .nb format optimized for your hardware.
If you skip the quantization step and directly run stedgeai generate on a non-quantized ONNX model, the resulting .nb model will not behave correctly which explains why your output probabilities are all 1.0.
Important clarification:
It’s essential to understand that quantization and conversion/optimization for hardware are two separate steps.
STEdgeAI does not perform quantization — it only takes a quantized model (in ONNX or TFLite format) and converts it into the .nb format optimized for STM32 hardware.
So you cannot quantize a .pt model directly to .nb.
The correct workflow is:
model.pt → export to .onnx or .tflite 
→ quantization (INT8) using the appropriate tool 
→ conversion/optimization with STEdgeAI (→ .nb)
→ run on STM32 hardware
Have a good day,
Julian
2025-10-13 8:25 PM
hello.@Julian E.
Thanks for your reply!
1. output best.tflite.
model = YOLO('/home/alientek/best.pt') 
model.export(format='tflite', imgsz=640, int8=True)
#######
Export complete (150.0s)
Results saved to /home/alientek
Predict: yolo predict task=detect model=/home/alientek/best_saved_model/best_int8.tflite imgsz=640 int8
Validate: yolo val task=detect model=/home/alientek/best_saved_model/best_int8.tflite imgsz=640 data=ultralytics/cfg/datasets/handler.yaml int8
Visualize: https://netron.app
2. i get this error output!!
(yolov11) alientek@ubuntu:/opt/ST/STEdgeAI/2.2/Utilities/linux$ sudo ./stedgeai generate --model /home/alientek/best_saved_model/best_int8.tflite --target stm32mp25
[sudo] password for alientek:
ST Edge AI Core v2.2.0-20266 2adc00962
PASS: 0%| | 0/2 [00:00<?, ?it/s]E [ops/vsi_nn_op_conv2d.c:op_check:189]Inputs/Outputs data type not support: FLOAT16, SYMM PC INT8
E [vsi_nn_graph.c:setup_node:551]Check node[3] CONV2D fail
E [vnn_.c:vnn_Create:9463]CHECK STATUS(-1:A generic error code, used when no other describes the error.)
E [main.c:vnn_CreateNeuralNetwork:198]CHECK PTR 198
E [main.c:main:232]CHECK PTR 232
E 10:53:20 Fatal model generation error: 65280
E 10:53:20 ('Fatal model generation error: 65280', 'nbg_generate')
Error during first compilation.
Retrying with other settings...
PASS: 0%| | 0/2 [01:37<?, ?it/s]E [ops/vsi_nn_op_conv2d.c:op_check:189]Inputs/Outputs data type not support: FLOAT16, SYMM PC INT8
E [vsi_nn_graph.c:setup_node:551]Check node[3] CONV2D fail
E [vnn_.c:vnn_Create:9463]CHECK STATUS(-1:A generic error code, used when no other describes the error.)
E [main.c:vnn_CreateNeuralNetwork:198]CHECK PTR 198
E [main.c:main:232]CHECK PTR 232
E 10:53:56 Fatal model generation error: 65280
E 10:53:56 ('Fatal model generation error: 65280', 'nbg_generate')
2025-10-13 8:27 PM
or I specified that the input and output are int8.But it still reports an error
(yolov11) alientek@ubuntu:/opt/ST/STEdgeAI/2.2/Utilities/linux$ sudo ./stedgeai generate --model /home/alientek/best_saved_model/best_int8.tflite --target stm32mp25 --input-data-type int8 --output-data-type int8
[sudo] password for alientek:
ST Edge AI Core v2.2.0-20266 2adc00962
PASS: 0%| | 0/2 [00:00<?, ?it/s]make: *** [/opt/ST/STEdgeAI/2.2/Utilities/linux/export_ovxlib/makefile.linux:53: vnn_pre_process.o] Error 1
E 11:24:53 Fatal model compilation error: 512
E 11:24:53 ('Fatal model compilation error: 512', 'nbg_compile')
Error during first compilation.
Retrying with other settings...
PASS: 0%| | 0/2 [00:51<?, ?it/s]make: *** [/opt/ST/STEdgeAI/2.2/Utilities/linux/export_ovxlib/makefile.linux:53: vnn_pre_process.o] Error 1
E 11:25:18 Fatal model compilation error: 512
E 11:25:18 ('Fatal model compilation error: 512', 'nbg_compile')
E010(InvalidModelError): Error during NBG compilation, model is not supported
2025-10-14 6:48 AM - edited 2025-10-14 6:51 AM
Hello @fanronghua0123456,
When you export the model, you get multiple ones.
It seems that this one: best_int8.tflite is not fully quantized.
In particular, the conv3 is not quantized:
explaining the error you get:
PASS: 0%| | 0/2 [00:00<?, ?it/s]E [ops/vsi_nn_op_conv2d.c:op_check:189]Inputs/Outputs data type not support: FLOAT16, SYMM PC INT8
You need to use this one: yolo11n_integer_quant.tflite
This should work, but,by default, ultralytics exports the model quantized in per channel, which is not optimal for MP2. You will get an inference time of around 400ms.
╔════════════════════════════════════════════════╗
║     X-LINUX-AI unified NN model benchmark      ║
╠═════════════════════════════╦══════════════════╣
║           Machine           ║  STM32MP257F-DK  ║
║          CPU cores          ║        2         ║
║     CPU Clock frequency     ║      1.5GHz      ║
║   GPU/NPU Driver Version    ║      6.4.21      ║
║   GPU/NPU Clock frequency   ║     800 MHZ      ║
║     X-LINUX-AI Version      ║      v6.1.1      ║
║                             ║                  ║
║                             ║                  ║
╚═════════════════════════════╩══════════════════╝
For hardware accelerated models, computation engine used for benchmark is NPU running at 800 MHZ
For other models, computation engine uses for benchmark is CPU with 2 cores at :  1.5GHz
╔═════════════════════════════════════════════════════════════════════════════════════╗
║                                 NBG models benchmark                                ║
╠═══════════════════════╦═════════════════════╦═══════╦═══════╦═══════╦═══════════════╣
║       Model Name      ║ Inference Time (ms) ║ CPU % ║ GPU % ║ NPU % ║ Peak RAM (MB) ║
╠═══════════════════════╬═════════════════════╬═══════╬═══════╬═══════╬═══════════════╣
║ yolo11n_integer_quant ║        464.31       ║  0.0  ║ 89.95 ║ 10.05 ║     27.78     ║
╚═══════════════════════╩═════════════════════╩═══════╩═══════╩═══════╩═══════════════╝
╔══════════════════════════════════════════════════════════════════════════╗
║                            Non-Optimal models                            ║
╠═══════════════════════╦══════════════════════════════════════════════════╣
║       model name      ║                     comments                     ║
╠═══════════════════════╬══════════════════════════════════════════════════╣
║ yolo11n_integer_quant ║ GPU usage is 89.95% compared to NPU usage 10.05% ║
║                       ║ please verify if the model is quantized or that  ║
║                       ║ the quantization scheme used is the 8-bits per-  ║
║                       ║                      tensor                      ║
╚═══════════════════════╩══════════════════════════════════════════════════╝
Note: Peak RAM information is only APPROXIMATE to the actual memory footprint of the model at runtime.
Take the information at your discretion.
 
You can edit the code in ultralytics/ultralytics/engine/exporter.py to quantize the model in per tensor:
In def export_saved_model(self, prefix=colorstr("TensorFlow SavedModel:")):
keras_model = onnx2tf.convert(
1065             input_onnx_file_path=f_onnx,
1066             output_folder_path=str(f),
1067             not_use_onnxsim=True,
1068             verbosity="error",  # note INT8-FP16 activation bug https://github.com/ultralytics/ultraly     tics/issues/15873
1069             output_integer_quantized_tflite=self.args.int8,
1070             quant_type="per-tensor",
1071             custom_input_op_name_np_data_path=np_data,
1072             enable_batchmatmul_unfold=True and not self.args.int8,  # fix lower no. of detected object     s on GPU delegate
1073             output_signaturedefs=True,  # fix error with Attention block group convolution
1074             disable_group_convolution=self.args.format in {"tfjs", "edgetpu"},  # fix error with group      convolution
1075         )
The option 'quant_type="per-tensor"'
Which should give you this:
root@stm32mp2-e3-c3-c9:~# x-linux-ai-benchmark -m yolo11n_integer_quant_pt.nb 
╔════════════════════════════════════════════════╗
║     X-LINUX-AI unified NN model benchmark      ║
╠═════════════════════════════╦══════════════════╣
║           Machine           ║  STM32MP257F-DK  ║
║          CPU cores          ║        2         ║
║     CPU Clock frequency     ║      1.5GHz      ║
║   GPU/NPU Driver Version    ║      6.4.21      ║
║   GPU/NPU Clock frequency   ║     800 MHZ      ║
║     X-LINUX-AI Version      ║      v6.1.1      ║
║                             ║                  ║
║                             ║                  ║
╚═════════════════════════════╩══════════════════╝
For hardware accelerated models, computation engine used for benchmark is NPU running at 800 MHZ
For other models, computation engine uses for benchmark is CPU with 2 cores at :  1.5GHz
╔════════════════════════════════════════════════════════════════════════════════════════╗
║                                  NBG models benchmark                                  ║
╠══════════════════════════╦═════════════════════╦═══════╦═══════╦═══════╦═══════════════╣
║        Model Name        ║ Inference Time (ms) ║ CPU % ║ GPU % ║ NPU % ║ Peak RAM (MB) ║
╠══════════════════════════╬═════════════════════╬═══════╬═══════╬═══════╬═══════════════╣
║ yolo11n_integer_quant_pt ║        109.95       ║  0.0  ║ 14.19 ║ 85.81 ║     26.48     ║
╚══════════════════════════╩═════════════════════╩═══════╩═══════╩═══════╩═══════════════╝
Note: Peak RAM information is only APPROXIMATE to the actual memory footprint of the model at runtime.
Take the information at your discretion.
So going from 400ms to 109ms because the NPU usage goes from 10% to 80% (due to per tensor quantization).
Have a good day,
Julian
2025-10-14 8:07 PM - edited 2025-10-14 10:31 PM
Hello. @Julian E.
Thank you very much for your reply. I followed your plan。
1.Increase parameters
quant_type="per-tensor",
2.used _integer_quant.tflite file.
******_integer_quant.tflite , and it really works. I have attached the picture for your reference.
but when I call it using C stai_mpu_wrapper library , and i want to output my shape.
std::vector<ObjDetect_Results> parseModelOutput(float* output, int num_classes = 7, float confidence_threshold = 0.55f)
{
    std::vector<ObjDetect_Results> detections;
    float *data = output;
    const int num_boxes = 8400;
    const int attributes_per_box = 4 + num_classes; // 4 postion  + num_classes 
    for (int w = 0; w < 8400; w++)
    {
        for (int h = 0; h < attributes_per_box; h++)
        {
                printf(" %.2f ",data[h * 8400 + w]);
        }
        printf("\r\n");
    }
    return detections;and my shape look like this.
0.02  0.02  0.03  0.03  0.00  0.00  0.00  0.00  0.00  0.00  0.00
 0.04  0.02  0.06  0.04  0.00  0.00  0.00  0.00  0.00  0.00  0.00
 0.04  0.02  0.06  0.03  0.00  0.00  0.00  0.00  0.00  0.00  0.00
 0.04  0.02  0.05  0.03  0.00  0.00  0.00  0.00  0.00  0.00  0.00
 0.04  0.01  0.05  0.02  0.00  0.00  0.00  0.00  0.00  0.00  0.00
 0.06  0.01  0.05  0.02  0.00  0.00  0.00  0.00  0.00  0.00  0.00
 0.09  0.01  0.10  0.02  0.00  0.00  0.00  0.00  0.00  0.00  0.00
 0.09  0.01  0.14  0.02  0.00  0.00  0.00  0.00  0.00  0.00  0.00
 0.09  0.02  0.13  0.03  0.00  0.00  0.00  0.00  0.00  0.00  0.00
 0.10  0.02  0.13  0.03  0.00  0.00  0.00  0.00  0.00  0.00  0.00
 0.11  0.01  0.15  0.02  0.00  0.00  0.00  0.00  0.00  0.00  0.00
 0.12  0.01  0.16  0.02  0.00  0.00  0.00  0.00  0.00  0.00  0.00
 0.12  0.01  0.17  0.01  0.00  0.00  0.00  0.00  0.00  0.00  0.00
 0.14  0.01  0.16  0.02  0.00  0.00  0.00  0.00  0.00  0.00  0.00
 0.19  0.01  0.09  0.02  0.00  0.00  0.00  0.00  0.00  0.00  0.00
 0.20  0.02  0.07  0.04  0.00  0.00  0.00  0.00  0.00  0.00  0.00
 0.20  0.02  0.07  0.04  0.00  0.00  0.00  0.00  0.00  0.00  0.00
 0.20  0.02  0.10  0.04  0.00  0.00  0.00  0.00  0.00  0.00  0.00
 0.21  0.01  0.14  0.02  0.00  0.00  0.00  0.00  0.00  0.00  0.00
 0.22  0.01  0.15  0.01  0.00  0.00  0.00  0.00  0.00  0.00  0.00
 0.22  0.01  0.18  0.01  0.00  0.00  0.00  0.00  0.00  0.00  0.00
 0.23  0.01  0.18  0.01  0.00  0.00  0.00  0.00  0.00  0.00  0.00
 0.25  0.01  0.13  0.02  0.00  0.00  0.00  0.00  0.00  0.00  0.00
 0.27  0.01  0.09  0.01  0.00  0.00  0.00  0.00  0.00  0.00  0.00
 0.28  0.01  0.09  0.01  0.00  0.00  0.00  0.00  0.00  0.00  0.00
  0.27  0.01  0.13  0.01  0.00  0.00  0.00  0.00  0.00  0.00  0.00
 0.32  0.01  0.20  0.01  0.00  0.00  0.00  0.00  0.00  0.00  0.00
 0.35  0.01  0.13  0.02  0.00  0.00  0.00  0.00  0.00  0.00  0.00
 0.35  0.02  0.09  0.03  0.00  0.00  0.00  0.00  0.00  0.00  0.00
 0.37  0.02  0.09  0.04  0.00  0.00  0.00  0.00  0.00  0.00  0.00
 0.37  0.02  0.13  0.03  0.00  0.00  0.00  0.00  0.00  0.00  0.00
 0.37  0.01  0.15  0.02  0.00  0.00  0.00  0.00  0.00  0.00  0.00
 0.37  0.01  0.18  0.01  0.00  0.00  0.00  0.00  0.00  0.00  0.00
 0.38  0.01  0.17  0.01  0.00  0.00  0.00  0.00  0.00  0.00  0.00
 0.43  0.01  0.09  0.02  0.00  0.00  0.00  0.00  0.00  0.00  0.00
 0.45  0.02  0.06  0.04  0.00  0.00  0.00  0.00  0.00  0.00  0.00
 0.45  0.03  0.06  0.05  0.00  0.00  0.00  0.00  0.00  0.00  0.00
“ 0.27 0.01 0.13 0.01” Do we need to make some changes?
Thanks
2025-10-15 1:07 AM
Ultralytics YOLO models (v5 → v8) flatten their detection output into a tensor of shape: (1, N, 4 + num_classes)
Where:
Each element represents one anchor (one “proposal box”)
So, your tensor shape is:
And it’s stored in column-major order for TensorFlow Lite, meaning your code’s access pattern data[h * 8400 + w] is correct.
Meaning of each number:
Let’s decode a line from your printout:
0.02 0.02 0.03 0.03 0.00 0.00 0.00 0.00 0.00 0.00 0.00
=> [x_center, y_center, width, height, class0_score, class1_score, … class6_score]
These values are normalized between 0 and 1 (relative to input image size).
So for an input 640×640:
float cx = 0.02 * 640; // ≈ 12.8 pixels
float cy = 0.02 * 640; // ≈ 12.8 pixels
float w = 0.03 * 640; // ≈ 19.2 pixels
float h = 0.03 * 640; // ≈ 19.2 pixels
This means the box covers roughly a 19×19 area at (12,12).
Then the following 7 numbers are class probabilities (after sigmoid).
They’re all 0.00 here, meaning confidence is too low to consider it a detection.
Why all class scores look 0.00
That’s totally normal, most of the 8400 proposals have near-zero confidence.
Only a few (maybe 10–30) will have high class scores (e.g., 0.6, 0.8, etc.).
Your print loop shows all boxes, so you mostly see background noise.
To focus on useful detections, apply your confidence threshold (0.55):
for (int w = 0; w < 8400; w++)
{
    float cx = data[0 * 8400 + w];
    float cy = data[1 * 8400 + w];
    float w_box = data[2 * 8400 + w];
    float h_box = data[3 * 8400 + w];
    // Find class with max probability
    float max_score = 0;
    int max_class = -1;
    for (int c = 0; c < num_classes; c++)
    {
        float score = data[(4 + c) * 8400 + w];
        if (score > max_score)
        {
            max_score = score;
            max_class = c;
        }
    }
    if (max_score > confidence_threshold)
    {
        ObjDetect_Results det;
        det.x = cx;
        det.y = cy;
        det.w = w_box;
        det.h = h_box;
        det.class_id = max_class;
        det.confidence = max_score;
        detections.push_back(det);
    }
}
Then perform non-max suppression (NMS) to remove overlapping boxes.
Understanding “8400” detections
Why 8400? For YOLOv8 at 640×640:
Ultralytics TFLite export uses CHW flattened layout.
That means your array ordering data[attr * num_boxes + box_index] is correct.
If you print only a few detections (where class score > 0.5), you’ll see:
cx=0.51 cy=0.42 w=0.21 h=0.31 class=2 conf=0.84
Which is what you want to visualize or draw.
Have a good day,
Julian
2025-10-15 4:32 AM - edited 2025-10-23 11:34 PM
hello .@Julian E.
Currently, all output seems to be 0.00, as if the target cannot be detected.
and I used “best_saved_model/best_integer_quant.tflite” to perform inference on the PC,
but it's looks success!
tflite_model = YOLO("/home/alientek/best_saved_model/best_integer_quant.tflite")
# Run inference
results = tflite_model("/home/alientek/hander1_1.jpg")out put like follow hanler1 is result.
(yolov11) alientek@ubuntu:~/yolov11$ python test1.py 
/home/alientek/.conda/envs/yolov11/lib/python3.10/site-packages/requests/__init__.py:86: RequestsDependencyWarning: Unable to find acceptable character detection dependency (chardet or charset_normalizer).
  warnings.warn(
WARNING ⚠️ Unable to automatically guess model task, assuming 'task=detect'. Explicitly define task for your model, i.e. 'task=detect', 'segment', 'classify','pose' or 'obb'.
Loading /home/alientek/best_saved_model/best_integer_quant.tflite for TensorFlow Lite inference...
INFO: Created TensorFlow Lite XNNPACK delegate for CPU.
image 1/1 /home/alientek/hander1_1.jpg: 640x640 1 handler1, 1760.9ms
Speed: 38.9ms preprocess, 1760.9ms inference, 26.5ms postprocess per image at shape (1, 3, 640, 640)Could there be a problem with the model conversion?
Is there a simple Python demo to test inference of a .nb model on a single image?
Thanks
