Face detector inference in hailo

I’m new to hailo (but not to ML). I have it on my raspberry pi 5 and trying to figure out how to use it. I’ve downloaded a face detection hef model, and I’m not sure how I can get the (parsed) restuls from it.

I got the retinaface_mobilenet_v1 model from the hailo model zoo Hailo8L (downloaded the hef file)
but the output of the model, is not the same as the one I see in the models’ github page.
In their github page, I can see that in their detect.py file the yodel returns 3 tensors - loc, conf, landms which makes sense. But when I run inference on this model, I see weird output(s). I’m currently running the following code which I took from the HRT_2_Infer_Pipeline_Inference_Tutorial.ipynb from the hailo tutorial. I validated my results with the profile HTML and it looks like it is correct, but I’m not sure how I’m suppose to use it for face detection.

import numpy as np
from multiprocessing import Process
from hailo_platform import (HEF, VDevice, HailoStreamInterface, InferVStreams, ConfigureParams,
    InputVStreamParams, OutputVStreamParams, InputVStreams, OutputVStreams, FormatType)
import cv2

# The target can be used as a context manager ("with" statement) to ensure it's released on time.
# Here it's avoided for the sake of simplicity
target = VDevice()

# Loading compiled HEFs to device:
model_name = 'retinaface_mobilenet_v1'
hef_path = '../hefs/{}.hef'.format(model_name) 
hef = HEF(hef_path)
    
# Configure network groups
configure_params = ConfigureParams.create_from_hef(hef=hef, interface=HailoStreamInterface.PCIe)
network_groups = target.configure(hef, configure_params)
network_group = network_groups[0]
network_group_params = network_group.create_params()

# Create input and output virtual streams params
input_vstreams_params = InputVStreamParams.make(network_group, format_type=FormatType.FLOAT32)
output_vstreams_params = OutputVStreamParams.make(network_group, format_type=FormatType.UINT8)

# Define dataset params
input_vstream_info = hef.get_input_vstream_infos()[0]
output_vstream_info = hef.get_output_vstream_infos()[0]
image_height, image_width, channels = input_vstream_info.shape
num_of_images = 10
low, high = 2, 20

def get_imgs_for_model(imgs_dir, target_width, target_height):
    imgs_list = glob(f'{imgs_dir}/*.jpg')
    imgs = []
    target_size = (target_width, target_height)

    for img_path in imgs_list:
        image = cv2.imread(img_path)
        image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)  # Convert to RGB format if needed
        image_resized = cv2.resize(image_rgb, target_size, interpolation=cv2.INTER_AREA)
        imgs.append(image_resized[None,...])
    return np.concatenate(imgs, axis=0)

imgs = get_imgs_for_model(imgs_dir, image_width, image_height)
# Infer 
with InferVStreams(network_group, input_vstreams_params, output_vstreams_params) as infer_pipeline:
    input_data = {input_vstream_info.name: dataset}
    with network_group.activate(network_group_params):
        infer_results = infer_pipeline.infer(input_data)
        print('Stream output shape is {}'.format(infer_results[output_vstream_info.name].shape))

The output I’m getting is
```Stream output shape is (5, 92, 160, 8)`