Extracting Keypoints from a Custom Dataset Model for Pose Estimation

I have a model trained on a custom dataset using the Ultralytics YoloV8n-pose network, which has 5 keypoints. I converted the model to ONNX format and compiled it using hailo_sdk_client.

When running inference on an RPi, the following code is used:

from multiprocessing import Process
import matplotlib.pyplot as plt
import hailo
import numpy as np
from hailo_platform import (
    HEF,
    ConfigureParams,
    FormatType,
    HailoSchedulingAlgorithm,
    HailoStreamInterface,
    InferVStreams,
    InputVStreamParams,
    InputVStreams,
    OutputVStreamParams,
    OutputVStreams,
    VDevice,
)

params = VDevice.create_params()
params.scheduling_algorithm = HailoSchedulingAlgorithm.NONE

target = VDevice(params=params)

# Loading compiled HEFs to device:
model_name = "aruco"
hef_path = f"{model_name}.hef"
hef = HEF(hef_path)

configure_params = ConfigureParams.create_from_hef(hef=hef, interface=HailoStreamInterface.PCIe)
network_groups = target.configure(hef, configure_params)
network_group = network_groups[0]
network_group_params = network_group.create_params()


input_vstreams_params = InputVStreamParams.make(network_group, quantized=False, format_type=FormatType.UINT8)
output_vstreams_params = OutputVStreamParams.make(network_group, quantized=True, format_type=FormatType.UINT8)

# Define dataset params
input_vstream_info = hef.get_input_vstream_infos()[0]
output_vstream_info = hef.get_output_vstream_infos()[0]
image_height, image_width, channels = input_vstream_info.shape
num_of_images = 7
low, high = 2, 20

dataset = np.load('calibration_data.npy')


input_data = {input_vstream_info.name: dataset}

with InferVStreams(network_group, input_vstreams_params, output_vstreams_params) as infer_pipeline:
    with network_group.activate(network_group_params):
        infer_results = infer_pipeline.infer(input_data)
        # The result output tensor is infer_results[output_vstream_info.name]
        print(f"Stream output shape is {infer_results[output_vstream_info.name].shape}")

This program runs successfully, with the infer_results output shape being (7, 1, 8400, 10).

How can I extract the pose keypoints, bounding boxes, and other parameters from the output?

Thank you.

Hi @sujith.christopher.b,
We have an example in our Hailo Application git repo for yolov5 and yolov8 pose:

It would give you an idea on how to extract the data you want.

Regards,