Currently i am optimizing the model on linux OS that came with the hailo chip and camera. how do i use a gpu. i do have a 4090 on my system but i dont know whether i can quantize the model locally without the hailo architecture. Other than that, how to use any other gpus (a100, h100s etc). Kindly share some piece of documentation or code that provids some guidance into using gpus
Ive attached my code for reference.
import json
import onnx
import numpy as np
from pprint import pprint
import tensorflow as tf
from IPython.display import SVG
from matplotlib import patches
from matplotlib import pyplot as plt
from PIL import Image
from tensorflow.python.eager.context import eager_mode
from pprint import pprint
from hailo_sdk_client import ClientRunner, InferenceContext
onnx_model_name = “yolo11m_EXP3”
onnx_path = “/local/shared_with_docker/models/yolo11m_EXP3.onnx”
chosen_hw_arch = “hailo8” # Specify the target hardware architecture
runner = ClientRunner(hw_arch=chosen_hw_arch)
“”“yolo11m”“”
end_node_names = [
“/model.23/cv2.0/cv2.0.2/Conv”, #41 reg_layer
“/model.23/cv3.0/cv3.0.2/Conv”, #42 cls_layer
“/model.23/cv2.1/cv2.1.2/Conv”, #52 reg_layer
“/model.23/cv3.1/cv3.1.2/Conv”, #53 cls_layer
“/model.23/cv2.2/cv2.2.2/Conv”, #62 reg_layer
“/model.23/cv3.2/cv3.2.2/Conv”, #63 cls_layer
]
model = onnx.load(onnx_path)
hn, npz = runner.translate_onnx_model(
onnx_path,
onnx_model_name,
end_node_names=end_node_names,
net_input_shapes={“images”: [1, 3, 640, 640]},
)
# hailo_model_har_name = f"/local/shared_with_docker/models/{onnx_model_name}_hailo_model.har"
# hailo_model_har_name = f"/local/exploration/{onnx_model_name}_hailo_model.har"
# runner.save_har(hailo_model_har_name)
hailo_model_har_name = f"/local/shared_with_docker/{onnx_model_name}_hailo_model.har"
runner.save_har(hailo_model_har_name)
runner = ClientRunner(har=hailo_model_har_name)
try:
# Access the HailoNet as an OrderedDict
hn_dict = runner.get_hn() # Or use runner._hn if get_hn() is unavailable
print(“Inspecting layers from HailoNet (OrderedDict):”)
for key, value in hn_dict.items():
print(f"Key: {key}")
pprint(value)
print("\n" + "="*80 + "\n") # Add a separator between layers for clarity
except Exception as e:
print(f"Error while inspecting hn_dict: {e}")
# nms_layer_config = {
# “nms_scores_th”: 0.2,
# “nms_iou_th”: 0.7,
# “image_dims”: [
# 640,
# 640
# ],
# “max_proposals_per_class”: 100,
# “classes”: 1,
# “regression_length”: 16,
# “background_removal”: False,
# “background_removal_index”: 0,
# “bbox_decoders”: [
# {
# “name”: “bbox_decoder71”,
# “stride”: 8,
# “reg_layer”: “conv71”,
# “cls_layer”: “conv74”
# },
# {
# “name”: “bbox_decoder87”,
# “stride”: 16,
# “reg_layer”: “conv87”,
# “cls_layer”: “conv90”
# },
# {
# “name”: “bbox_decoder102”,
# “stride”: 32,
# “reg_layer”: “conv102”,
# “cls_layer”: “conv105”
# }
# ]
# }
# output_dir = “/local/exploration/nms_config”
# os.makedirs(output_dir, exist_ok=True)
# output_path = os.path.join(output_dir, “nms_layer_config_yolo11m.json”)
# with open(output_path, “w”) as json_file:
# json.dump(nms_layer_config, json_file, indent=4)
# print(f"NMS layer configuration saved to {output_path}")
# original dataset
path = "/local/shared_with_docker/calliberation_data_v3_npy/processed_calibration_data.npy"
N, H, W, C = 1233, 640, 640, 3
calib_data = np.memmap(path, dtype=np.float32, mode=‘r’, shape=(N, H, W, C))
# calib_data = np.array(path, dtype=np.float32)
print(calib_data.shape, calib_data.dtype, type(calib_data))
## SDK_FP_OPTIMIZED
# # conf for yolo8n:
# alls = "
# normalization1 = normalization([0.0, 0.0, 0.0], [255.0, 255.0, 255.0])
# change_output_activation(conv74, sigmoid)
# change_output_activation(conv90, sigmoid)
# change_output_activation(conv105, sigmoid)
# nms_postprocess(“/local/shared_with_docker/nms_config/nms_layer_config_yolo11m.json”, meta_arch=yolov8, engine=cpu)
# allocator_param(width_splitter_defuse=disabled)
# "
# inc in alls:
# post_quantization_optimization(finetune, policy=enabled, loss_factors=[0.125, 2, 0.25, 0.125, 2, 0.25, 0.125, 2, 0.25, 1, 1, 1], dataset_size=4000, epochs=8, learning_rate=1e-5, loss_layer_names=[conv36, conv37, conv38, conv47, conv48, conv49, conv57, conv58, conv59, conv33, conv43, conv54], loss_types=[l2, l2, l2, l2, l2, l2, l2, l2, l2, l2rel, l2rel, l2rel])
# performance_param(compiler_optimization_level=max)
# allocator_param(enable_post_split_average_buffers=disabled)
# conf for yolo11m:
alls = "
normalization1 = normalization([0.0, 0.0, 0.0], [255.0, 255.0, 255.0])
change_output_activation(conv74, sigmoid)
change_output_activation(conv90, sigmoid)
change_output_activation(conv105, sigmoid)
nms_postprocess(“/local/shared_with_docker/nms_config/nms_layer_config_yolo11m.json”, meta_arch=yolov8, engine=cpu)
model_optimization_config(calibration, batch_size=1)
performance_param(compiler_optimization_level=max)
"
# performance_param(compiler_optimization_level=max) - To achieve optimal performance, set the compiler_optimization_level to “max”
# post_quantization_optimization(finetune, policy=enabled, learning_rate=0.00001)
# resources_param(max_apu_utilization=0.8, max_compute_16bit_utilization=0.8, max_compute_utilization=0.8, max_control_utilization=0.8, max_input_aligner_utilization=0.8, max_memory_utilization=0.8, max_utilization=0.0)
# model_optimization_flavor(optimization_level=0)
runner.load_model_script(alls)
runner.optimize_full_precision()
## SDK_QUANTIZED
runner.optimize(calib_data)
# Save the optimized model to a new Quantized HAR file
quantized_model_har_path = f"/local/shared_with_docker/{onnx_model_name}_quantized_model.har"
runner.save_har(quantized_model_har_path)
print(f"Quantized HAR file saved to: {quantized_model_har_path}")
## COMPILATION
runner = ClientRunner(har=quantized_model_har_path)
print("[info] ClientRunner initialized successfully.")
hef = runner.compile()
file_name = f"/local/shared_with_docker/{onnx_model_name}.hef"
with open(file_name, "wb") as f:
f.write(hef)