Hello All,
for some weeks I tried to convert a pretrained Model to a usable hef for an University Project so it can run on the hailo8l. I used the DFC Tutorial to parse the onnx, then Optimize it with a Dataset of relevant Pictures and compiled it.
Now I try to run it on the pi. When I try it with an onnx the results are accurate:
import onnxruntime as ort
from PIL import Image
import numpy as np
def preprocess_image(image_path, input_size):
image = Image.open(image_path).convert("RGB")
image = image.resize(input_size)
image_array = np.asarray(image).astype(np.float32)
image_array /= 255.0
image_array = np.transpose(image_array, (2, 0, 1))
image_array = np.expand_dims(image_array, axis=0)
return image_array
# Modell laden
onnx_model_path = "bird-resnet34.onnx"
session = ort.InferenceSession(onnx_model_path)
# Bild vorbereiten
image_path = "snowly-owl-migration.webp"
input_size = (250, 250) # Modellabhängig
input_data = preprocess_image(image_path, input_size)
print(f"Input shape: {input_data.shape}")
# Vorhersagen
input_name = session.get_inputs()[0].name
output_name = session.get_outputs()[0].name
predictions = session.run([output_name], {input_name: input_data})[0]
# Höchstwahrscheinlichkeit und Klasse
predicted_class = np.argmax(predictions)
print(f"Vorhergesagte Klasse: {predicted_class} ({bird_name_map[predicted_class]})")
Now I want to use the converted Onnx as a hef file on the Pi.
The Problem is that the result of the model is a 450 long array with the probability of every bird as a float. The Problem I think is that the hailo 8l only uses uint8 and now my output looks like this for example:
[ 45 124 132 60 124 52 150 168 96 37 50 119 73 4 11 91 88 53
95 59 24 123 77 57 89 103 26 74 26 131 140 102 106 93 89 78
105 138 171 110 194 148 106 100 140 70 78 127 69 63 116 100 95 97
169 221 68 16 11 0 139 151 6 108 112 123 37 104 190 98 155 66
22 45 98 86 80 50 80 16 32 83 91 116 116 126 124 141 134 180
128 8 97 90 146 77 84 0 68 81 112 120 63 130 112 95 52 63
116 139 69 130 145 79 63 134 70 65 49 79 206 90 57 139 136 43
133 72 114 140 92 67 30 144 77 99 179 116 10 0 173 62 17 22
134 37 218 120 112 38 131 161 66 56 151 136 115 147 81 149 85 137
51 91 160 105 4 109 117 166 100 79 132 49 122 170 115 44 143 143
164 106 34 126 118 162 68 110 152 70 96 163 174 145 119 186 147 92
168 144 106 137 160 179 146 150 145 16 52 70 111 123 127 0 135 122
174 149 50 72 145 116 131 51 154 48 64 54 132 80 164 107 145 140
105 140 189 78 142 188 76 83 138 153 128 113 144 106 98 127 72 174
86 112 126 161 108 93 76 74 94 92 21 29 235 116 123 122 79 64
81 173 142 0 119 119 127 29 108 32 135 161 110 72 145 62 46 110
127 173 47 103 22 98 102 54 100 127 103 116 152 101 176 116 73 106
169 0 103 173 55 103 78 86 70 201 58 72 47 70 50 127 59 115
68 45 113 138 143 128 78 114 53 100 148 30 111 98 110 57 101 142
111 77 0 38 112 28 83 26 104 126 112 98 141 112 59 120 70 128
103 65 42 146 170 162 146 85 64 180 82 82 65 86 38 86 163 22
127 86 79 56 4 127 186 135 79 109 77 90 126 107 113 99 94 123
136 138 69 92 71 86 124 138 31 0 121 55 26 97 101 84 115 70
158 128 56 120 145 60 147 15 54 18 70 59 74 14 132 2 149 0
151 128 117 116 179 75 152 118 98 57 87 58 81 112 157 71 146 46]
but even when I just look for the Max number, it isnt a remotlyy correct answer. I used this code:
import numpy as np
import cv2
from hailo_platform import (VDevice, HEF, ConfigureParams, InferVStreams, InputVStreamParams, OutputVStreamParams,
FormatType, HailoStreamInterface)
def initialize_hailo(hef_path):
# Initialisiere das Hailo-Gerät
target = VDevice()
hef = HEF(hef_path)
# Netzwerkgruppen konfigurieren
configure_params = ConfigureParams.create_from_hef(hef=hef, interface=HailoStreamInterface.PCIe)
network_groups = target.configure(hef, configure_params)
return target, hef, network_groups[0]
def create_vstreams(network_group):
# Eingabe- und Ausgabe-Streams erstellen
input_vstreams_params = InputVStreamParams.make(network_group, format_type=FormatType.UINT8)
output_vstreams_params = OutputVStreamParams.make(network_group, format_type=FormatType.UINT8)
return input_vstreams_params, output_vstreams_params
def preprocess_image(image_path, input_shape, mean, std):
# Lade und transformiere das Bild
image = cv2.imread(image_path)
height, width, _ = input_shape # Entpacke Höhe, Breite und Kanäle
resized = cv2.resize(image, (width, height)) # Breite und Höhe für OpenCV
normalized = (resized / 255.0 - mean) / std
return normalized.astype(np.uint8)
def preprocess_frame(frame, input_shape):
height, width, _ = input_shape # Entpacke Höhe, Breite und Kanäle
resized = cv2.resize(frame, (width, height)) # Breite und Höhe für OpenCV
normalized = resized / 255.0 # Normalisiere die Pixelwerte
return normalized.astype(np.uint8)
def run_inference(network_group, input_vstreams_params, output_vstreams_params, input_data):
with InferVStreams(network_group, input_vstreams_params, output_vstreams_params) as infer_pipeline:
input_vstream_info = network_group.get_input_vstream_infos()[0]
output_vstream_info = network_group.get_output_vstream_infos()[0]
# Eingabedaten verpacken
input_data_dict = {input_vstream_info.name: np.expand_dims(input_data, axis=0)}
# Netzwerkgruppe aktivieren
with network_group.activate():
infer_results = infer_pipeline.infer(input_data_dict)
# Ergebnisse extrahieren
return infer_results[output_vstream_info.name]
def interpret_results(results, labels):
# Höchste Wahrscheinlichkeit finden
predicted_class = np.argmax(results)
confidence = results[0][predicted_class]
return labels[predicted_class], confidence
bird_name_map= {0: 'ABBOTTS BABBLER', ...}
hef_path = "/home/vogelscheuche/Vogelscheuche/bird-resnet34_v3.hef"
image_path = "snowy-owl-migration.webp"
# Normalisierungswerte
mean = np.array([0.4758, 0.4685, 0.3870]) # Mittelwerte für R, G, B
std = np.array([0.2376, 0.2282, 0.2475]) # Standardabweichungen für R, G, B
#Initialisiere Hailo
target, hef, network_group = initialize_hailo(hef_path)
input_vstreams_params, output_vstreams_params = create_vstreams(network_group)
input_vstream_info = hef.get_input_vstream_infos()[0]
input_shape = input_vstream_info.shape
print('input shape',input_shape)
input_data = preprocess_image(image_path, input_shape,mean,std)
print('input data shape', input_data.shape)
results = run_inference(network_group, input_vstreams_params, output_vstreams_params, input_data)
print(results)
predicted_bird, confidence = interpret_results(results, bird_name_map)
print(f"Erkannter Vogel: {predicted_bird} mit {confidence * 100:.2f}% Sicherheit")
Is my error because I use the results wrong, or could it be that I butchered the Conversion from onnx to hef?
Any help would be appreciated
Thank You Very much,
Till