Issues with the pre-compiled YOLO models for the Hailo 8L -- unsure how to get started

After many frustrating hours, I was finally able to get everything installed in order to be able to execute an HEF file, feed video from the picam input into it, and display it using OpenCV as well as use OpenCV to draw boxes over detected objects.

The problem is that the results are all over the place and are nowhere near as good as the demo program. Additionally, even when I tried to find the source code for the demo program, it didn’t seem to run due to it calling a function in some internal hailo module that didn’t seem to have any documentation for it.

This is not so much an issue with the hardware, but more of a question of how am I supposed to use these pre-compiled YOLO (including YOLOv8n and YOLOv8s) HEFs? What am I missing?

I would download the hailo model zoo but the only device that I have with a hailo chip is my pi 5 which doesn’t appear to support the model zoo.

For context, this is the code that I am currently using. In theory, this should be sufficient, no?

import os
#os.environ["DISPLAY"] = ":0"
os.environ.setdefault("DISPLAY", ":0") 
print("The display appears to be: ", os.getenv("DISPLAY"))

import cv2
print("init win call")
cv2.startWindowThread()
cv2.namedWindow("picam", cv2.WINDOW_AUTOSIZE)
print("win call succeeded")

import numpy as np
import hailo_platform as hpf
from picamera2 import Picamera2, Preview
import time
from ultralytics.utils.ops import non_max_suppression
import torch
import signal, sys

COCO_NAMES = [
    "person","bicycle","car","motorcycle","airplane","bus","train","truck","boat",
    "traffic light","fire hydrant","stop sign","parking meter","bench","bird","cat",
    "dog","horse","sheep","cow","elephant","bear","zebra","giraffe","backpack",
    "umbrella","handbag","tie","suitcase","frisbee","skis","snowboard","sports ball",
    "kite","baseball bat","baseball glove","skateboard","surfboard","tennis racket",
    "bottle","wine glass","cup","fork","knife","spoon","bowl","banana","apple",
    "sandwich","orange","broccoli","carrot","hot dog","pizza","donut","cake","chair",
    "couch","potted plant","bed","dining table","toilet","tv","laptop","mouse",
    "remote","keyboard","cell phone","microwave","oven","toaster","sink",
    "refrigerator","book","clock","vase","scissors","teddy bear","hair drier",
    "toothbrush"
]



def cleanup(*_):
    if 'picam2' in globals():
        picam2.stop(); picam2.close()
    cv2.destroyAllWindows()
    sys.exit(0)

signal.signal(signal.SIGINT, cleanup)
signal.signal(signal.SIGTERM, cleanup)

hef = hpf.HEF("/home/user/Desktop/hailo/8l/yolov8x.hef")


def decode_yolo(raw, conf_thres=0.6, iou_thres=0.5):
    import torch
    from ultralytics.utils.ops import non_max_suppression

    t = torch.as_tensor(raw, dtype=torch.float32)

    C is 84 or 85
    t = t.reshape(-1, t.shape[-1])          # (?, 84)

    # If channels are on the right, swap to (C, N)
    if t.shape[1] in (84, 85):
        t = t.T                             # (84, N)

    if t.shape[0] == 84:                    # 4 box + 80 cls
        obj = torch.ones(1, t.shape[1], dtype=t.dtype)
        t = torch.cat((t[:4], obj, t[4:]), dim=0)   # → 85 × N

    preds = t.T.unsqueeze(0)                # (1, N, 85)
    det   = non_max_suppression(preds, conf_thres, iou_thres, max_det=100)[0]
    return det.cpu().numpy().tolist()


CONF_THRES = 0.15
def parse_hailo_boxes(raw, img_w, img_h, conf=CONF_THRES):
    boxes = []
    for cls_id, arr in enumerate(raw):          # 80 class slots
        if len(arr) == 0:
            continue

        for det in arr:
            if len(det) == 5:
                xc, yc, w, h, score = det
            elif len(det) == 4:
                xc, yc, w, h = det
                score = 1.0
            else:
                continue

            if score < conf:
                continue
            # device gives normalised xywh in range 0-1
            x0 = (xc - w / 2) * img_w
            y0 = (yc - h / 2) * img_h
            x1 = (xc + w / 2) * img_w
            y1 = (yc + h / 2) * img_h
            boxes.append([x0, y0, x1, y1, score, cls_id])

    return boxes


try:
    with hpf.VDevice() as dev:
        cfg   = hpf.ConfigureParams.create_from_hef(
                    hef, interface=hpf.HailoStreamInterface.PCIe)

        ng    = dev.configure(hef, cfg)[0]
        ng_p  = ng.create_params()

        in_info  = hef.get_input_vstream_infos()[0]
        out_info = hef.get_output_vstream_infos()[0]

        in_vs  = hpf.InputVStreamParams.make_from_network_group(
                    ng, quantized=False, format_type=hpf.FormatType.UINT8)
        out_vs = hpf.OutputVStreamParams.make_from_network_group(
                    ng, quantized=False, format_type=hpf.FormatType.FLOAT32)
        
        H, W, _ = in_info.shape                       # e.g. 640×640×3 for YOLO-n
        global picam2
        picam2 = Picamera2()
        if not picam2:
            print("Error capturing picam")

        picam2.configure(
            picam2.create_video_configuration(
                main={"size": (W, H), "format": "RGB888"}))  # RGB → cheap to convert
        picam2.start() 

        with ng.activate(ng_p), \
             hpf.InferVStreams(ng, in_vs, out_vs) as pipe:

            while True:
                #for OpenCV webcam usage
                #ok, frame = cam.read()
                #if not ok:
                #    print("Cam couldn't read frame")
                #   break

                frame = picam2.capture_array()
                frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)

                h, w, _ = in_info.shape
                inp = cv2.resize(frame, (w, h))
                inp = np.expand_dims(inp, 0).astype(np.uint8)

                res = pipe.infer({in_info.name: inp})
                raw = res[out_info.name][0]

                print("type(raw):", type(raw))
                print("len(raw):", len(raw))
                print("type(raw[0]):", type(raw[0]))
                if hasattr(raw[0], "shape"):
                    print("raw[0].shape:", raw[0].shape)

                #raw = np.asarray(raw, dtype=np.float32)
                #boxes = decode_yolo(raw)      # user-defined post-proc
                #raw = res[out_info.name]
                boxes = parse_hailo_boxes(raw, W, H)
                frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
                num_displayed_detections = 0
                for x0, y0, x1, y1, score, cls in boxes:
                    if (num_displayed_detections > 2):
                        break
                    num_displayed_detections = num_displayed_detections + 1
                    cx, cy = (x0 + x1) / 2, (y0 + y1) / 2
                    print(f"class {cls} @ ({cx:.1f},{cy:.1f}) {score:.2f}")
                    label = f'{COCO_NAMES[cls] if cls < len(COCO_NAMES) else cls}: {score:.2f}'
                    (tw, th), _ = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1)
                    cv2.rectangle(frame, (int(x0), int(y0) - th - 4), (int(x0) + tw, int(y0)), (0, 255, 0), cv2.FILLED)
                    cv2.putText(frame, label, (int(x0), int(y0)-2), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), 1, cv2.LINE_AA)
                    cv2.rectangle(frame, (int(x0), int(y0)), (int(x1), int(y1)), (0, 0, 255), 3)
                cv2.imshow("picam", frame)

                if cv2.waitKey(1) & 0xFF == ord('q'):
                    break
finally:
    #cam.release()
    picam2.stop()
    picam2.close()
    cv2.destroyAllWindows()

Hi @Alex_Larsen
Welcome to the Hailo community. AT DeGirum, we developed PySDK, a python package to simplify working with Hailo devices. You can see usage examples at: DeGirum/hailo_examples: DeGirum PySDK with Hailo AI Accelerators. Specifically, if you want to use YOLO models, you can see this notebook: hailo_examples/examples/002_yolov8.ipynb at main · DeGirum/hailo_examples. Let me know if you need any further help.