Hi
I am trying to execute the inference on a quantized Yolov5 on a Hailo8 device. I get the following error message:
[HailoRT] [error] CHECK failed - The given output format type UINT8 is not supported, should be HAILO_FORMAT_TYPE_FLOAT32
[HailoRT] [error] CHECK_SUCCESS failed with status=HAILO_INVALID_ARGUMENT(2)
[HailoRT] [error] CHECK_SUCCESS failed with status=HAILO_INVALID_ARGUMENT(2)
[HailoRT] [error] CHECK_SUCCESS failed with status=HAILO_INVALID_ARGUMENT(2)
Traceback (most recent call last):
File “/local/workspace/FA/Detection/HWinfer.py”, line 341, in
t0 = time()
File “/local/workspace/FA/Detection/HWinfer.py”, line 247, in HWinference
print(‘Inference on HW’)
File “/local/workspace/hailo_virtualenv/lib/python3.10/site-packages/hailo_platform/pyhailort/pyhailort.py”, line 930, in enter
self._infer_pipeline = _pyhailort.InferVStreams(self._configured_net_group._configured_network,
hailo_platform.pyhailort._pyhailort.HailoRTStatusException: 2
I have first created a hef file by running:
# Initialize a new client runner
runner = ClientRunner(hw_arch='hailo8')
# Translate YOLO model from ONNX
runner.translate_onnx_model(onnx_path, end_node_names=end_node_names)
# Add model script with NMS layer at the network's output.
nms_scores_th = 0.35
nms_iou_th = 0.95
ratio4bits = 0.2
opti = 2
filepath = f'{model_name}_opti{opti}_ratio{ratio4bits:.1f}'
hef_path = filepath + '.hef'
"""
model_script_lines = ['normalization_rule1 = normalization([0.0, 0.0, 0.0], [255.0, 255.0, 255.0])\n',
f'resize_input1= resize(resize_shapes=[{HEIGHT},{WIDTH}])\n',
f'model_optimization_flavor(optimization_level={opti}, batch_size=8)\n',
f'model_optimization_config(compression_params, auto_4bit_weights_ratio={ratio4bits})\n',
f'nms_postprocess(meta_arch={model_type}, engine=cpu, nms_scores_th={nms_scores_th}, nms_iou_th={nms_iou_th})\n',]
runner.load_model_script(''.join(model_script_lines))
runner.optimize(calib_dataset)
# Run quantized inference
with runner.infer_context(InferenceContext.SDK_QUANTIZED) as ctx:
nms_quantized_output = runner.infer(ctx, calib_dataset[:nCalib, ...])
# Compile and save to hef file
hef = runner.compile()
print(f'Saving compiled model: {hef_path}')
with open(hef_path, 'wb') as f:
f.write(hef)
Then I just load the model and execute the inference on the board:
from hailo_platform import (
HEF,
ConfigureParams,
FormatType,
HailoSchedulingAlgorithm,
HailoStreamInterface,
InferVStreams,
InputVStreamParams,
# InputVStreams,
OutputVStreamParams,
# OutputVStreams,
VDevice,
)
# Setting VDevice params to disable the HailoRT service feature
params = VDevice.create_params()
params.scheduling_algorithm = HailoSchedulingAlgorithm.NONE
# Create virtual device on host
target = VDevice(params=params)
hef = HEF(hef_path)
# Get the 'network groups' (connectivity groups, aka. 'different networks') information from the .hef
configure_params = ConfigureParams.create_from_hef(hef=hef, interface=HailoStreamInterface.PCIe)
model_name = hef.get_network_group_names()[0]
configure_params[model_name].batch_size = 4
network_groups = target.configure(hef, configure_params)
network_group = network_groups[0]
network_group_params = network_group.create_params()
# Create input and output virtual streams params
input_vstreams_params = InputVStreamParams.make( network_group, quantized=True, format_type=FormatType.UINT8)
OutputVStreamParams.make(network_group, quantized=quantized, format_type=FormatType.UINT8)
input_vstream_info = hef.get_input_vstream_infos()[0]
output_vstream_info = hef.get_output_vstream_infos()[0]
dataset = calib_dataset.astype(np.uint8)
input_data = {input_vstream_info.name: dataset}
with InferVStreams(network_group, input_vstreams_params, output_vstreams_params) as infer_pipeline:
with network_group.activate(network_group_params):
infer_results = infer_pipeline.infer(input_data)
nms_output = infer_results[output_vstream_info.name]
I quantized the network before saving it in the hef file, so why does it ask for FLOAT32 data type?