I’m using a custom yolov8n model. Does infer_results require any additional postprocessing? What am I missing? Any help would be appreciated.
import cv2
import os, random, time
import numpy as np
from hailo_platform import (HEF, Device, VDevice, HailoStreamInterface, InferVStreams, ConfigureParams,
InputVStreamParams, OutputVStreamParams, InputVStreams, OutputVStreams, FormatType)
# yolox_s_leaky input resolution
INPUT_RES_H = 640
INPUT_RES_W = 640
# Loading compiled HEFs to device:
hef_path = 'yolov8n.hef'
video_file = "test.mp4"
hef = HEF(hef_path)
devices = Device.scan()
with VDevice(device_ids=devices) as target:
configure_params = ConfigureParams.create_from_hef(hef, interface=HailoStreamInterface.PCIe)
network_group = target.configure(hef, configure_params)[0]
network_group_params = network_group.create_params()
input_vstream_info = hef.get_input_vstream_infos()[0]
output_vstream_info = hef.get_output_vstream_infos()[0]
input_vstreams_params = InputVStreamParams.make_from_network_group(network_group, quantized=False, format_type=FormatType.FLOAT32)
output_vstreams_params = OutputVStreamParams.make_from_network_group(network_group, quantized=False, format_type=FormatType.FLOAT32)
height, width, channels = hef.get_input_vstream_infos()[0].shape
source = 'camera'
cap = cv2.VideoCapture(video_file)
# check if the camera was opened successfully
if not cap.isOpened():
print("Could not open camera")
exit()
start_time = time.time()
frame_count = 0
while True:
# read a frame from the video source
ret, frame = cap.read()
# check if the frame was successfully read
if not ret:
print("Could not read frame")
break
frame_count += 1
# Get height and width from capture
orig_w = cap.get(cv2.CAP_PROP_FRAME_WIDTH)
orig_h = cap.get(cv2.CAP_PROP_FRAME_HEIGHT)
# loop if video source
if source == 'video' and not cap.get(cv2.CAP_PROP_POS_FRAMES) % cap.get(cv2.CAP_PROP_FRAME_COUNT):
cap.set(cv2.CAP_PROP_POS_FRAMES, 0)
# resize image for yolox_s_leaky input resolution and infer it
resized_img = cv2.resize(frame, (INPUT_RES_H, INPUT_RES_W), interpolation = cv2.INTER_AREA)
with InferVStreams(network_group, input_vstreams_params, output_vstreams_params) as infer_pipeline:
input_data = {input_vstream_info.name: np.expand_dims(np.asarray(resized_img), axis=0).astype(np.float32)}
with network_group.activate(network_group_params):
infer_results = infer_pipeline.infer(input_data)
print(infer_results['yolov8n/yolov8_nms_postprocess'])
print("===========================")
for key in infer_results.keys():
for cls, results in enumerate(infer_results[key][0]):
for x,y,w,h,conf in results:
if conf > 0.5:
x *= orig_w
y *= orig_h
w *= orig_w
h *= orig_h
cv2.rectangle(frame, (int(x-w/2),int(y-h/2)), (int(x+w/2),int(y+h/2)), (0,0,255), 2)
cv2.imshow('frame',frame)
if cv2.waitKey(1) & 0xFF == ord('q'):
cv2.destroyAllWindows()
break
avg_fps = frame_count / (time.time() - start_time)
print(f"FPS = {avg_fps}")