import argparse
import cv2
from picamera2 import MappedArray, Picamera2, Preview
from picamera2.devices import Hailo
import numpy as np
from control_settings_in_yaml import generate_controls_from_yaml
def extract_detections(hailo_output, w, h, class_names, threshold=0.5):
"""Extract detections from the HailoRT-postprocess output."""
results = []
for class_id, detections in enumerate(hailo_output):
for detection in detections:
detection_array = np.array(detection)
score = detection_array[4]
if score >= threshold:
y0, x0, y1, x1 = detection_array[:4]
bbox = (int(x0 * w), int(y0 * h), int(x1 * w), int(y1 * h))
results.append([class_names[class_id], bbox, score])
print(
f"Detection(s) found for class '{class_names[class_id]}', Score: {score:.2f}"
)
return results
def draw_objects(request):
current_detections = detections
if current_detections:
with MappedArray(request, "main") as m:
for class_name, bbox, score in current_detections:
x0, y0, x1, y1 = bbox
label = f"{class_name} %{int(score * 100)}"
cv2.rectangle(m.array, (x0, y0), (x1, y1), (0, 255, 0), 2)
cv2.putText(
m.array,
label,
(x0 + 5, y0 + 15),
cv2.FONT_HERSHEY_SIMPLEX,
0.5,
(0, 255, 0),
1,
cv2.LINE_AA,
)
if __name__ == "__main__":
parser = argparse.ArgumentParser(
description="Record a video with Picamera2 and perform object detection."
)
parser.add_argument("--width", type=int, default=1080,
help="Width of the video")
parser.add_argument("--height", type=int, default=720,
help="Height of the video")
parser.add_argument(
"--config_file_path",
type=str,
default="config.yaml",
help="Configuration file path",
)
parser.add_argument(
"-m", "--model", help="Path for the HEF model.", default="yolov8n.hef"
)
parser.add_argument(
"-l",
"--labels",
default="coco_1.txt",
help="Path to a text file containing labels.",
)
parser.add_argument(
"-s",
"--score_thresh",
type=float,
default=0.5,
help="Score threshold, must be a float between 0 and 1.",
)
args = parser.parse_args()
video_w = args.width
video_h = args.height
score_thresh = args.score_thresh
labels = args.labels
model = args.model
# Get the Hailo model, the input size it wants, and the size of our preview stream.
with Hailo(model) as hailo:
model_h, model_w, _ = hailo.get_input_shape()
# Load class names from the labels file
with open(labels, "r", encoding="utf-8") as f:
class_names = f.read().splitlines()
# The list of detected objects to draw.
detections = None
with Picamera2() as picam2:
# Configure and start Picamera2.
picam2.video_configuration.main.size = (video_w, video_h)
main = {'size': (video_w, video_h), 'format': 'XBGR8888'}
lores = {'size': (model_w, model_h), 'format': 'YUV420'}
config = picam2.create_preview_configuration(main, lores=lores)
picam2.configure(config)
# Generate control dictionary from yaml file
camera_control_dict = generate_controls_from_yaml(
args.config_file_path)
picam2.set_controls(camera_control_dict)
picam2.start_preview(Preview.QTGL, x=0, y=0,
width=video_w, height=video_h)
picam2.start()
picam2.pre_callback = draw_objects
while True:
frame = picam2.capture_array('lores')
rgb = cv2.cvtColor(frame, cv2.COLOR_YUV420p2RGB)
results = hailo.run(rgb)
detections = extract_detections(
results, video_w, video_h, class_names, score_thresh
)
I am running inference on a YOLOv8n model using a Raspberry Pi CM4 with Hailo. I am using the following script and the ‘lores’ camera flow for inference.
I have tested passing the frame directly in all available image formats in Picamera2 (XBGR8888, XRGB8888, RGB888, BGR888, and YUV420) to hailo.run
, but they all throw errors. I have to convert them first using OpenCV, and I am concerned that this might be affecting latency.
In the visualization, there is flickering in the bounding boxes, and the model is not detecting properly. The model was compiled using Hailo Dataflow Compiler with optimization level 2, and I tested it with a dataset where it performed very well, so the issue might be in the inference script.
Could you suggest what might be wrong in this script?
Thanks!