After many frustrating hours, I was finally able to get everything installed in order to be able to execute an HEF file, feed video from the picam input into it, and display it using OpenCV as well as use OpenCV to draw boxes over detected objects.
The problem is that the results are all over the place and are nowhere near as good as the demo program. Additionally, even when I tried to find the source code for the demo program, it didn’t seem to run due to it calling a function in some internal hailo module that didn’t seem to have any documentation for it.
This is not so much an issue with the hardware, but more of a question of how am I supposed to use these pre-compiled YOLO (including YOLOv8n and YOLOv8s) HEFs? What am I missing?
I would download the hailo model zoo but the only device that I have with a hailo chip is my pi 5 which doesn’t appear to support the model zoo.
For context, this is the code that I am currently using. In theory, this should be sufficient, no?
import os
#os.environ["DISPLAY"] = ":0"
os.environ.setdefault("DISPLAY", ":0")
print("The display appears to be: ", os.getenv("DISPLAY"))
import cv2
print("init win call")
cv2.startWindowThread()
cv2.namedWindow("picam", cv2.WINDOW_AUTOSIZE)
print("win call succeeded")
import numpy as np
import hailo_platform as hpf
from picamera2 import Picamera2, Preview
import time
from ultralytics.utils.ops import non_max_suppression
import torch
import signal, sys
COCO_NAMES = [
"person","bicycle","car","motorcycle","airplane","bus","train","truck","boat",
"traffic light","fire hydrant","stop sign","parking meter","bench","bird","cat",
"dog","horse","sheep","cow","elephant","bear","zebra","giraffe","backpack",
"umbrella","handbag","tie","suitcase","frisbee","skis","snowboard","sports ball",
"kite","baseball bat","baseball glove","skateboard","surfboard","tennis racket",
"bottle","wine glass","cup","fork","knife","spoon","bowl","banana","apple",
"sandwich","orange","broccoli","carrot","hot dog","pizza","donut","cake","chair",
"couch","potted plant","bed","dining table","toilet","tv","laptop","mouse",
"remote","keyboard","cell phone","microwave","oven","toaster","sink",
"refrigerator","book","clock","vase","scissors","teddy bear","hair drier",
"toothbrush"
]
def cleanup(*_):
if 'picam2' in globals():
picam2.stop(); picam2.close()
cv2.destroyAllWindows()
sys.exit(0)
signal.signal(signal.SIGINT, cleanup)
signal.signal(signal.SIGTERM, cleanup)
hef = hpf.HEF("/home/user/Desktop/hailo/8l/yolov8x.hef")
def decode_yolo(raw, conf_thres=0.6, iou_thres=0.5):
import torch
from ultralytics.utils.ops import non_max_suppression
t = torch.as_tensor(raw, dtype=torch.float32)
C is 84 or 85
t = t.reshape(-1, t.shape[-1]) # (?, 84)
# If channels are on the right, swap to (C, N)
if t.shape[1] in (84, 85):
t = t.T # (84, N)
if t.shape[0] == 84: # 4 box + 80 cls
obj = torch.ones(1, t.shape[1], dtype=t.dtype)
t = torch.cat((t[:4], obj, t[4:]), dim=0) # → 85 × N
preds = t.T.unsqueeze(0) # (1, N, 85)
det = non_max_suppression(preds, conf_thres, iou_thres, max_det=100)[0]
return det.cpu().numpy().tolist()
CONF_THRES = 0.15
def parse_hailo_boxes(raw, img_w, img_h, conf=CONF_THRES):
boxes = []
for cls_id, arr in enumerate(raw): # 80 class slots
if len(arr) == 0:
continue
for det in arr:
if len(det) == 5:
xc, yc, w, h, score = det
elif len(det) == 4:
xc, yc, w, h = det
score = 1.0
else:
continue
if score < conf:
continue
# device gives normalised xywh in range 0-1
x0 = (xc - w / 2) * img_w
y0 = (yc - h / 2) * img_h
x1 = (xc + w / 2) * img_w
y1 = (yc + h / 2) * img_h
boxes.append([x0, y0, x1, y1, score, cls_id])
return boxes
try:
with hpf.VDevice() as dev:
cfg = hpf.ConfigureParams.create_from_hef(
hef, interface=hpf.HailoStreamInterface.PCIe)
ng = dev.configure(hef, cfg)[0]
ng_p = ng.create_params()
in_info = hef.get_input_vstream_infos()[0]
out_info = hef.get_output_vstream_infos()[0]
in_vs = hpf.InputVStreamParams.make_from_network_group(
ng, quantized=False, format_type=hpf.FormatType.UINT8)
out_vs = hpf.OutputVStreamParams.make_from_network_group(
ng, quantized=False, format_type=hpf.FormatType.FLOAT32)
H, W, _ = in_info.shape # e.g. 640×640×3 for YOLO-n
global picam2
picam2 = Picamera2()
if not picam2:
print("Error capturing picam")
picam2.configure(
picam2.create_video_configuration(
main={"size": (W, H), "format": "RGB888"})) # RGB → cheap to convert
picam2.start()
with ng.activate(ng_p), \
hpf.InferVStreams(ng, in_vs, out_vs) as pipe:
while True:
#for OpenCV webcam usage
#ok, frame = cam.read()
#if not ok:
# print("Cam couldn't read frame")
# break
frame = picam2.capture_array()
frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
h, w, _ = in_info.shape
inp = cv2.resize(frame, (w, h))
inp = np.expand_dims(inp, 0).astype(np.uint8)
res = pipe.infer({in_info.name: inp})
raw = res[out_info.name][0]
print("type(raw):", type(raw))
print("len(raw):", len(raw))
print("type(raw[0]):", type(raw[0]))
if hasattr(raw[0], "shape"):
print("raw[0].shape:", raw[0].shape)
#raw = np.asarray(raw, dtype=np.float32)
#boxes = decode_yolo(raw) # user-defined post-proc
#raw = res[out_info.name]
boxes = parse_hailo_boxes(raw, W, H)
frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
num_displayed_detections = 0
for x0, y0, x1, y1, score, cls in boxes:
if (num_displayed_detections > 2):
break
num_displayed_detections = num_displayed_detections + 1
cx, cy = (x0 + x1) / 2, (y0 + y1) / 2
print(f"class {cls} @ ({cx:.1f},{cy:.1f}) {score:.2f}")
label = f'{COCO_NAMES[cls] if cls < len(COCO_NAMES) else cls}: {score:.2f}'
(tw, th), _ = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1)
cv2.rectangle(frame, (int(x0), int(y0) - th - 4), (int(x0) + tw, int(y0)), (0, 255, 0), cv2.FILLED)
cv2.putText(frame, label, (int(x0), int(y0)-2), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), 1, cv2.LINE_AA)
cv2.rectangle(frame, (int(x0), int(y0)), (int(x1), int(y1)), (0, 0, 255), 3)
cv2.imshow("picam", frame)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
finally:
#cam.release()
picam2.stop()
picam2.close()
cv2.destroyAllWindows()