After having success in Optimiziation warnings meaning - #9 by ade I made a pause for about 2 months, now once I’ve got back to converting yolo11 models in exactly the same way I did before the performance is very poor, basically broken comapred to same models converted before, a yolo11m
example:
$ hailortcli run detector.hef --batch-size 8
Running streaming inference (1.hef):
Transform data: true
Type: auto
Quantized: true
Network drone_detector/drone_detector: 100% | 80 | FPS: 15.97 | ETA: 00:00:00
> Inference result:
Network group: drone_detector
Frames count: 80
FPS: 15.97
Send Rate: 156.97 Mbit/s
Recv Rate: 155.99 Mbit/s
When doing real inference it’s even worse than that and whole thing basically grinds to a halt fast. I’ve made sure my PCIe is V3 and generally changed nothing except running everything on fresh Debian installation on RPI and compiling with a slightly newer version of dataflow compiler (3.31.0 instead of 3.30.0 previously, but I did try 3.30.0 again just in case, the result is the same).
I’ve tried this on many RPI5 devices and on many AI hats (same on both 13, 26 TOPs).
For reference, here’s conversion steps I use to make a HEF file:
# import torch
# # Load our model into our environment
# checkpoint = torch.load('best.pt', weights_only=False)
# model = checkpoint['model']
# model = model.float()
# model.eval()
# # Dummy input in FP32
# dummy_input = torch.randn(16, 3, 640, 640, dtype=torch.float)
# # Export to ONNX
# torch.onnx.export(
# model,
# dummy_input,
# "best.onnx",
# export_params=True,
# opset_version=11, # Adjust opset version if needed
# do_constant_folding=True,
# input_names=['input'],
# output_names=['output'])
# print("ONNX model exported successfully!")
# ===============================================
# import onnx
# import onnxruntime as ort
# import torch
# # Load the ONNX model
# onnx_model = onnx.load("best.onnx")
# onnx.checker.check_model(onnx_model)
# print("ONNX model is valid!")
# # Test the ONNX model with ONNX Runtime
# dummy_input = torch.randn(16, 3, 640, 640).numpy()
# ort_session = ort.InferenceSession("best.onnx")
# outputs = ort_session.run(None, {"input": dummy_input})
# print(outputs[0])
# ===============================================
# from hailo_sdk_client import ClientRunner
# onnx_path = "best.onnx"
# onnx_model_name = "detector"
# chosen_hw_arch = "hailo8"
# # Initialize the ClientRunner
# runner = ClientRunner(hw_arch=chosen_hw_arch)
# end_node_names = [
# "/model.23/cv3.0/cv3.0.2/Conv",
# "/model.23/cv2.0/cv2.0.2/Conv",
# "/model.23/cv3.1/cv3.1.2/Conv",
# "/model.23/cv2.1/cv2.1.2/Conv",
# "/model.23/cv2.2/cv2.2.2/Conv",
# "/model.23/cv3.2/cv3.2.2/Conv",
# ]
# net_input_shapes={"input": [16, 3, 640, 640]}
# try:
# hn, npz = runner.translate_onnx_model(
# onnx_path,
# onnx_model_name,
# end_node_names=end_node_names,
# net_input_shapes=net_input_shapes,
# )
# print("Model translation successful.")
# except Exception as e:
# print(f"Error during model translation: {e}")
# raise
# hailo_model_har_name = f"{onnx_model_name}.har"
# try:
# runner.save_har(hailo_model_har_name)
# print(f"HAR file saved as: {hailo_model_har_name}")
# except Exception as e:
# print(f"Error saving HAR file: {e}")
# # ===============================================
# from hailo_sdk_client import ClientRunner
# har_path = "detector.har"
# runner = ClientRunner(har=har_path)
# from pprint import pprint
# try:
# hn_dict = runner.get_hn() # Or use runner._hn if get_hn() is unavailable
# print("Inspecting layers from HailoNet (OrderedDict):")
# for key, value in hn_dict.items():
# print(f"Key: {key}")
# pprint(value)
# print("\\n" + "="*80 + "\\n")
# except Exception as e:
# print(f"Error while inspecting hn_dict: {e}")
# # ===============================================
# import numpy as np
# from PIL import Image
# import os
# # Paths to directories and files
# image_dir = 'calibration'
# # File paths for saving calibration data
# calibration_data_path = os.path.join("calibration_data.npy")
# processed_data_path = os.path.join("processed_calibration_data.npy")
# # Initialize an empty list for calibration data
# calib_data = []
# # Process all image files in the directory
# for img_name in os.listdir(image_dir):
# img_path = os.path.join(image_dir, img_name)
# if img_name.lower().endswith(('.jpg', '.jpeg', '.png')):
# img = Image.open(img_path).convert("RGB").resize((640, 640))
# img_array = np.array(img) / 255.0 # Normalize to [0, 1]
# calib_data.append(img_array)
# # Convert the calibration data to a NumPy array
# calib_data = np.array(calib_data)
# # Save the normalized calibration data
# np.save(calibration_data_path, calib_data)
# print(f"Normalized calibration dataset saved with shape: {calib_data.shape} to {calibration_data_path}")
# # Scale the normalized data back to [0, 255]
# processed_calibration_data = calib_data * 255.0
# # Save the processed calibration data
# np.save(processed_data_path, processed_calibration_data)
# print(f"Processed calibration dataset saved with shape: {processed_calibration_data.shape} to {processed_data_path}")
# # ===============================================
# import json
# import os
# nms_layer_config = {
# "nms_scores_th": 0.2,
# "nms_iou_th": 0.7,
# "image_dims": [
# 640,
# 640
# ],
# "max_proposals_per_class": 100,
# "classes": 80,
# "regression_length": 16,
# "background_removal": False,
# "background_removal_index": 0,
# "bbox_decoders": [
# {
# "name": "bbox_decoder71",
# "stride": 8,
# "reg_layer": "conv71",
# "cls_layer": "conv74"
# },
# {
# "name": "bbox_decoder87",
# "stride": 16,
# "reg_layer": "conv87",
# "cls_layer": "conv90"
# },
# {
# "name": "bbox_decoder102",
# "stride": 32,
# "reg_layer": "conv102",
# "cls_layer": "conv105"
# }
# ]
# }
# # Path to save the updated JSON configuration
# output_path = os.path.join("nms_layer_config.json")
# # Save the updated configuration as a JSON file
# with open(output_path, "w") as json_file:
# json.dump(nms_layer_config, json_file, indent=4)
# print(f"NMS layer configuration saved to {output_path}")
# # ===============================================
# import os
# from hailo_sdk_client import ClientRunner
# # Define your model's HAR file name
# model_name = "detector"
# hailo_model_har_name = f"{model_name}.har"
# # Ensure the HAR file exists
# assert os.path.isfile(hailo_model_har_name), "Please provide a valid path for the HAR file"
# # Initialize the ClientRunner with the HAR file
# runner = ClientRunner(har=hailo_model_har_name)
# # Define the model script to add a normalization layer
# # Normalization for [0, 1] range
# alls = """
# normalization1 = normalization([0.0, 0.0, 0.0], [255.0, 255.0, 255.0])
# change_output_activation(conv74, sigmoid)
# change_output_activation(conv90, sigmoid)
# change_output_activation(conv105, sigmoid)
# model_optimization_config(calibration, batch_size=8)
# model_optimization_flavor(optimization_level=1, compression_level=0, batch_size=8)
# nms_postprocess("nms_layer_config.json", meta_arch=yolov8, engine=cpu)
# performance_param(compiler_optimization_level=0)
# """
# # Load the model script into the ClientRunner
# runner.load_model_script(alls)
# # Define a calibration dataset
# # Replace 'calib_dataset' with the actual dataset you're using for calibration
# # For example, if it's a directory of images, prepare the dataset accordingly
# calib_dataset = "processed_calibration_data.npy"
# # Perform optimization with the calibration dataset
# runner.optimize(calib_dataset)
# # Save the optimized model to a new Quantized HAR file
# quantized_model_har_path = f"{model_name}_quantized_model.har"
# runner.save_har(quantized_model_har_path)
# print(f"Quantized HAR file saved to: {quantized_model_har_path}")
# # # # ===============================================
from hailo_sdk_client import ClientRunner
# Define the quantized model HAR file
model_name = "detector"
quantized_model_har_path = f"{model_name}_quantized_model.har"
# Initialize the ClientRunner with the HAR file
runner = ClientRunner(har=quantized_model_har_path)
print("[info] ClientRunner initialized successfully.")
# Compile the model
try:
hef = runner.compile()
print("[info] Compilation completed successfully.")
except Exception as e:
print(f"[error] Failed to compile the model: {e}")
raise
file_name = f"{model_name}.hef"
with open(file_name, "wb") as f:
f.write(hef)