The given output format type UINT8 is not supported, should be HAILO_FORMAT_TYPE_FLOAT32

Hi
I am trying to execute the inference on a quantized Yolov5 on a Hailo8 device. I get the following error message:

[HailoRT] [error] CHECK failed - The given output format type UINT8 is not supported, should be HAILO_FORMAT_TYPE_FLOAT32
[HailoRT] [error] CHECK_SUCCESS failed with status=HAILO_INVALID_ARGUMENT(2)
[HailoRT] [error] CHECK_SUCCESS failed with status=HAILO_INVALID_ARGUMENT(2)
[HailoRT] [error] CHECK_SUCCESS failed with status=HAILO_INVALID_ARGUMENT(2)
Traceback (most recent call last):
File “/local/workspace/FA/Detection/HWinfer.py”, line 341, in
t0 = time()
File “/local/workspace/FA/Detection/HWinfer.py”, line 247, in HWinference
print(‘Inference on HW’)
File “/local/workspace/hailo_virtualenv/lib/python3.10/site-packages/hailo_platform/pyhailort/pyhailort.py”, line 930, in enter
self._infer_pipeline = _pyhailort.InferVStreams(self._configured_net_group._configured_network,
hailo_platform.pyhailort._pyhailort.HailoRTStatusException: 2

I have first created a hef file by running:

# Initialize a new client runner
runner = ClientRunner(hw_arch='hailo8')
# Translate YOLO model from ONNX
runner.translate_onnx_model(onnx_path, end_node_names=end_node_names)

# Add model script with NMS layer at the network's output.
nms_scores_th = 0.35
nms_iou_th    = 0.95
ratio4bits    = 0.2
opti          = 2
filepath = f'{model_name}_opti{opti}_ratio{ratio4bits:.1f}'
hef_path = filepath + '.hef'
"""
model_script_lines = ['normalization_rule1 = normalization([0.0, 0.0, 0.0], [255.0, 255.0, 255.0])\n',
      f'resize_input1= resize(resize_shapes=[{HEIGHT},{WIDTH}])\n',
      f'model_optimization_flavor(optimization_level={opti}, batch_size=8)\n',
      f'model_optimization_config(compression_params, auto_4bit_weights_ratio={ratio4bits})\n',
      f'nms_postprocess(meta_arch={model_type}, engine=cpu, nms_scores_th={nms_scores_th}, nms_iou_th={nms_iou_th})\n',]
runner.load_model_script(''.join(model_script_lines))
runner.optimize(calib_dataset)
# Run quantized inference
with runner.infer_context(InferenceContext.SDK_QUANTIZED) as ctx:
    nms_quantized_output = runner.infer(ctx, calib_dataset[:nCalib, ...])
# Compile and save to hef file
hef = runner.compile()
print(f'Saving compiled model: {hef_path}')
with open(hef_path, 'wb') as f:
    f.write(hef)

Then I just load the model and execute the inference on the board:

from hailo_platform import (
    HEF,
    ConfigureParams,
    FormatType,
    HailoSchedulingAlgorithm,
    HailoStreamInterface,
    InferVStreams,
    InputVStreamParams,
    # InputVStreams,
    OutputVStreamParams,
    # OutputVStreams,
    VDevice,
)
# Setting VDevice params to disable the HailoRT service feature
params = VDevice.create_params()
params.scheduling_algorithm = HailoSchedulingAlgorithm.NONE
# Create virtual device on host
target = VDevice(params=params)
hef = HEF(hef_path)
# Get the 'network groups' (connectivity groups, aka. 'different networks') information from the .hef
configure_params = ConfigureParams.create_from_hef(hef=hef, interface=HailoStreamInterface.PCIe)
model_name = hef.get_network_group_names()[0]
configure_params[model_name].batch_size = 4
network_groups = target.configure(hef, configure_params)
network_group = network_groups[0]
network_group_params = network_group.create_params()
# Create input and output virtual streams params
input_vstreams_params  = InputVStreamParams.make( network_group, quantized=True, format_type=FormatType.UINT8)
OutputVStreamParams.make(network_group, quantized=quantized,  format_type=FormatType.UINT8)
input_vstream_info     = hef.get_input_vstream_infos()[0]
output_vstream_info    = hef.get_output_vstream_infos()[0]
dataset = calib_dataset.astype(np.uint8)
input_data = {input_vstream_info.name: dataset}
with InferVStreams(network_group, input_vstreams_params, output_vstreams_params) as infer_pipeline:
    with network_group.activate(network_group_params):
        infer_results = infer_pipeline.infer(input_data)
        nms_output = infer_results[output_vstream_info.name]

I quantized the network before saving it in the hef file, so why does it ask for FLOAT32 data type?

Hey @fabrice.auzanneau ,

  1. Check HEF Output Format:

    print(hef.get_output_vstream_infos())
    
    • If the expected format is FLOAT32, change your inference setup.
  2. Update Output Stream Format:
    Modify your inference pipeline:

    output_vstreams_params = OutputVStreamParams.make(network_group, quantized=True, format_type=FormatType.FLOAT32)
    
  3. Force NMS Output to UINT8:
    In your model script, update nms_postprocess:

    f'nms_postprocess(..., format_type=UINT8)\n'
    

Thanks for your answer
Here the output from
print(hef.get_output_vstream_infos())

[VStreamInfo(“model/yolov8_nms_postprocess”)]

When I add format_type=UINT8 in the nms_postprocess:

model_script_lines = ['normalization_rule1 = normalization([0.0, 0.0, 0.0], [255.0, 255.0, 255.0])\n',
      f'resize_input1= resize(resize_shapes=[{HEIGHT},{WIDTH}])\n',
      f'model_optimization_flavor(optimization_level={opti}, batch_size=8)\n',
      f'model_optimization_config(compression_params, auto_4bit_weights_ratio={ratio4bits})\n',
      'post_quantization_optimization(adaround, shuffle=False)\n',
      f'nms_postprocess(meta_arch={model_type}, engine=cpu, nms_scores_th={nms_scores_th}, nms_iou_th={nms_iou_th}, format_type=UINT8)\n',]
runner.load_model_script(''.join(model_script_lines))

I get an error message:

Model script parsing failed: No argument named format_type. Please make sure to use the argument name as it appears in the command description…

Hi @fabrice.auzanneau
In your first post, you mentioned the model is yolov5 but later on you say that the output_vstream_info shows yolov8_nms_postprocess. From the logs you provided, I could not deduce the value you used for model_typeand other variables such as end_node_names.

Hi , thanks for your answer.
I am using model_type = 'yolov8' because using yolov5 crashes my script.
Here are the first lines of the script:

# Yolo v5n
model_name = 'yolov5nu'
model_type = 'yolov8'
end_node_names = ['/model.24/cv2.0/cv2.0.2/Conv', '/model.24/cv3.0/cv3.0.2/Conv', '/model.24/cv2.1/cv2.1.2/Conv', '/model.24/cv3.1/cv3.1.2/Conv', '/model.24/cv2.2/cv2.2.2/Conv', '/model.24/cv3.2/cv3.2.2/Conv']
onnx_path  = f'./{model_name}.onnx'
# Initialize a new client runner
runner = ClientRunner(hw_arch='hailo8')
# Translate YOLO model from ONNX
runner.translate_onnx_model(onnx_path, end_node_names=end_node_names)

Then:

model_script_lines = ['normalization_rule1 = normalization([0.0, 0.0, 0.0], [255.0, 255.0, 255.0])\n',
      f'resize_input1= resize(resize_shapes=[{HEIGHT},{WIDTH}])\n',
      f'model_optimization_flavor(optimization_level={opti}, batch_size=8)\n',
      f'model_optimization_config(compression_params, auto_4bit_weights_ratio={ratio4bits})\n',
      'post_quantization_optimization(adaround, shuffle=False)\n',
      f'nms_postprocess(meta_arch={model_type}, engine=cpu, nms_scores_th={nms_scores_th}, nms_iou_th={nms_iou_th})\n',]
runner.load_model_script(''.join(model_script_lines))
runner.optimize(calib_dataset)

If I use model_type = 'yolov5'the runner.optimize() crashes:

Traceback (most recent call last):
File “/local/workspace/FA/Detection/HWinfer.py”, line 306, in
runner.optimize(calib_dataset)
File “/local/workspace/hailo_virtualenv/lib/python3.10/site-packages/hailo_sdk_common/states/states.py”, line 16, in wrapped_func
return func(self, *args, **kwargs)
File “/local/workspace/hailo_virtualenv/lib/python3.10/site-packages/hailo_sdk_client/runner/client_runner.py”, line 2128, in optimize
self._optimize(calib_data, data_type=data_type, work_dir=work_dir)
File “/local/workspace/hailo_virtualenv/lib/python3.10/site-packages/hailo_sdk_common/states/states.py”, line 16, in wrapped_func
return func(self, *args, **kwargs)
File “/local/workspace/hailo_virtualenv/lib/python3.10/site-packages/hailo_sdk_client/runner/client_runner.py”, line 1969, in _optimize
self._optimize_full_precision(calib_data=calib_data, data_type=data_type)
File “/local/workspace/hailo_virtualenv/lib/python3.10/site-packages/hailo_sdk_client/runner/client_runner.py”, line 2034, in _optimize_full_precision
self._sdk_backend.optimize_full_precision(calib_data=calib_data, data_type=data_type)
File “/local/workspace/hailo_virtualenv/lib/python3.10/site-packages/hailo_sdk_client/sdk_backend/sdk_backend.py”, line 1591, in optimize_full_precision
model, params = self._apply_model_modification_commands(model, params, update_model_and_params)
File “/local/workspace/hailo_virtualenv/lib/python3.10/site-packages/hailo_sdk_client/sdk_backend/sdk_backend.py”, line 1482, in _apply_model_modification_commands
model, params = command.apply(model, params, hw_consts=self.hw_arch.consts)
File “/local/workspace/hailo_virtualenv/lib/python3.10/site-packages/hailo_sdk_client/sdk_backend/script_parser/nms_postprocess_command.py”, line 397, in apply
self._update_config_file(hailo_nn)
File “/local/workspace/hailo_virtualenv/lib/python3.10/site-packages/hailo_sdk_client/sdk_backend/script_parser/nms_postprocess_command.py”, line 558, in _update_config_file
self._update_config_layers(hailo_nn)
File “/local/workspace/hailo_virtualenv/lib/python3.10/site-packages/hailo_sdk_client/sdk_backend/script_parser/nms_postprocess_command.py”, line 608, in _update_config_layers
self._set_yolo_config_layers(hailo_nn)
File “/local/workspace/hailo_virtualenv/lib/python3.10/site-packages/hailo_sdk_client/sdk_backend/script_parser/nms_postprocess_command.py”, line 667, in _set_yolo_config_layers
raise AllocatorScriptParserException(msg)
hailo_sdk_client.sdk_backend.sdk_backend_exceptions.AllocatorScriptParserException: Cannot infer bbox conv layers automatically. Please specify the bbox layer in the json configuration file