Hello. I’m having a difficult problem. I’m getting an error when converting my onnx model to hef. The conversion code is:
import hailo_sdk_client
from hailo_sdk_client import ClientRunner
import numpy as np
import os
import logging
import traceback
# 设置日志级别为 DEBUG 以获取更多信息
logging.basicConfig(
level=logging.DEBUG,
format='%(asctime)s - %(levelname)s - %(message)s',
handlers=[
logging.FileHandler('hailo_quantization.log'),
logging.StreamHandler()
]
)
# --- 1. Hailo 硬件架构 ---
chosen_hw_arch = "hailo8"
# --- 2. 模型路径和文件名 ---
onnx_model_name = "head_singleframe"
onnx_path = "/home/ubuntu/ycy/head_singleframe.onnx"
output_dir = "/home/ubuntu/ycy/onnx_models/hailo_outputs"
hailo_model_har_path = os.path.join(output_dir, f"{onnx_model_name}_hailo_model.har")
hailo_quantized_har_path = os.path.join(output_dir, f"{onnx_model_name}_hailo_quantized_model.har")
hailo_model_hef_path = os.path.join(output_dir, f"{onnx_model_name}.hef")
# --- 3. 确保输出目录存在 ---
os.makedirs(output_dir, exist_ok=True)
# --- 4. 初始化 Hailo ClientRunner ---
try:
runner = ClientRunner(hw_arch=chosen_hw_arch)
print(f"✅ Initialized ClientRunner with hardware architecture: {chosen_hw_arch}")
except Exception as e:
print(f"❌ Failed to initialize ClientRunner: {e}")
exit(1)
# --- 5. 验证 ONNX 模型文件存在 ---
if not os.path.exists(onnx_path):
print(f"❌ ONNX model file not found: {onnx_path}")
exit(1)
print(f"✅ ONNX model file found: {onnx_path}")
# --- 6. 指定 ONNX 模型的多个输入节点 ---
net_input_shapes = {
"inst_dfg0": (1, 900, 512),
"anchor": (1, 900, 11),
"anchor_embed": (1, 900, 256),
"time_interval": (1, 1), # 与 ONNX 模型输入形状 [1, 1] 一致
}
# --- 7. 转换 ONNX 模型为 HAR ---
try:
hn, npz = runner.translate_onnx_model(
model=onnx_path,
net_name=onnx_model_name,
net_input_shapes=net_input_shapes,
start_node_names=list(net_input_shapes.keys())
)
print(f"✅ Successfully translated ONNX model to HAR")
except Exception as e:
print(f"❌ Failed to translate ONNX model to HAR: {e}")
traceback.print_exc()
exit(1)
# --- 8. 保存 HAR 模型 ---
try:
runner.save_har(hailo_model_har_path)
print(f"✅ HAR model saved to: {hailo_model_har_path}")
except Exception as e:
print(f"❌ Failed to save HAR model: {e}")
exit(1)
# --- 9. 校准数据集准备 ---
def generate_calibration_dataset(num_samples: int = 100):
calib_data_path = "/home/ubuntu/ycy/calib_data/calib_data.npz"
hailo_input_names_mapping = {
'inst_dfg0': 'head_singleframe/input_layer1',
'anchor': 'head_singleframe/input_layer2',
'anchor_embed': 'head_singleframe/input_layer3',
'time_interval': 'head_singleframe/input_layer4',
}
if os.path.exists(calib_data_path):
print(f"✅ Loading real calibration data from {calib_data_path}")
data = np.load(calib_data_path, allow_pickle=True)
calib_data = data["calib_data"][:num_samples]
inst_dfg0_array = np.stack([np.nan_to_num(s["inst_dfg0"], nan=0.0, posinf=0.0, neginf=0.0) / 10.0 for s in calib_data]).astype(np.float32)
anchor_array = np.stack([np.nan_to_num(s["anchor"], nan=0.0, posinf=0.0, neginf=0.0) / 100.0 for s in calib_data]).astype(np.float32)
anchor_embed_array = np.stack([np.nan_to_num(s["anchor_embed"], nan=0.0, posinf=0.0, neginf=0.0) / 10.0 for s in calib_data]).astype(np.float32)
# 修正 time_interval 的形状为 (num_samples, 1, 1, 1)
time_interval_array = np.stack([np.expand_dims(s["time_interval"], axis=-1) for s in calib_data]).astype(np.float32)
print(f"✅ Loaded {num_samples} real calibration samples.")
# 返回 NumPy 字典
calib_dataset = {
hailo_input_names_mapping['inst_dfg0']: inst_dfg0_array,
hailo_input_names_mapping['anchor']: anchor_array,
hailo_input_names_mapping['anchor_embed']: anchor_embed_array,
hailo_input_names_mapping['time_interval']: time_interval_array,
}
print(f"✅ Generated calibration dataset shapes:")
for name, data in calib_dataset.items():
print(f" - {name}: {data.shape}")
return calib_dataset
else:
print(f"⚠️ Calibration data not found at {calib_data_path}, generating random data")
random_data_dict = {
hailo_input_names_mapping['inst_dfg0']: np.random.randn(num_samples, 1, 900, 512).astype(np.float32) / 10.0,
hailo_input_names_mapping['anchor']: np.random.randn(num_samples, 1, 900, 11).astype(np.float32) / 100.0,
hailo_input_names_mapping['anchor_embed']: np.random.randn(num_samples, 1, 900, 256).astype(np.float32) / 10.0,
hailo_input_names_mapping['time_interval']: np.zeros((num_samples, 1, 1, 1), dtype=np.float32),
}
print(f"✅ Generated random dataset shapes:")
for name, data in random_data_dict.items():
print(f" - {name}: {data.shape}")
return random_data_dict
calib_dataset = generate_calibration_dataset(num_samples=100)
print(f"✅ Generated calibration dataset: {type(calib_dataset)}")
# --- 10. 量化 HAR 模型 ---
runner = ClientRunner(har=hailo_model_har_path)
alls_lines = [
'model_optimization_flavor(optimization_level=0, compression_level=0)'
# 使用最简优化配置,避免层规范化分解问题
]
print("--- Begin quantization ---")
runner.load_model_script('\n'.join(alls_lines))
try:
runner.optimize(calib_dataset)
runner.save_har(hailo_quantized_har_path)
print(f"✅ Quantized HAR model saved to: {hailo_quantized_har_path}")
except Exception as e:
print(f"❌ Failed to quantize HAR model: {e}")
traceback.print_exc()
exit(1)
# --- 11. 导出 HEF 模型 ---
runner = ClientRunner(har=hailo_quantized_har_path)
try:
compiled_hef = runner.compile()
with open(hailo_model_hef_path, "wb") as f:
f.write(compiled_hef)
print(f"✅ HEF model exported to: {hailo_model_hef_path}")
except Exception as e:
print(f"❌ Failed to compile HEF model: {e}")
exit(1)
The error message is as follows:
[warning] Cannot use graphviz, so no visualizations will be created
✅ Initialized ClientRunner with hardware architecture: hailo8
✅ ONNX model file found: /home/ubuntu/ycy/head_singleframe.onnx
[info] Translation started on ONNX model head_singleframe
[info] Restored ONNX model head_singleframe (completion time: 00:00:00.02)
[info] Extracted ONNXRuntime meta-data for Hailo model (completion time: 00:00:00.09)
[info] Start nodes mapped from original model: 'inst_dfg0': 'head_singleframe/input_layer1', 'anchor': 'head_singleframe/input_layer2', 'anchor_embed': 'head_singleframe/input_layer3', 'time_interval': 'head_singleframe/input_layer4'.
[info] End nodes mapped from original model: '/layers.3/cls_layers/cls_layers.6/Add', '/layers.3/Concat', '/layers.3/quality_layers/quality_layers.6/Add'.
[info] Translation completed on ONNX model head_singleframe (completion time: 00:00:00.24)
✅ Successfully translated ONNX model to HAR
[info] Saved HAR to: /home/ubuntu/ycy/onnx_models/hailo_outputs/head_singleframe_hailo_model.har
✅ HAR model saved to: /home/ubuntu/ycy/onnx_models/hailo_outputs/head_singleframe_hailo_model.har
✅ Loading real calibration data from /home/ubuntu/ycy/calib_data/calib_data.npz
✅ Loaded 100 real calibration samples.
✅ Generated calibration dataset shapes:
- head_singleframe/input_layer1: (64, 1, 900, 512)
- head_singleframe/input_layer2: (64, 1, 900, 11)
- head_singleframe/input_layer3: (64, 1, 900, 256)
- head_singleframe/input_layer4: (64, 1, 1, 1)
✅ Generated calibration dataset: <class 'dict'>
--- Begin quantization ---
[info] Loading model script commands to head_singleframe from string
[info] Starting Model Optimization
[warning] Running model optimization with zero level of optimization is not recommended for production use and might lead to suboptimal accuracy results
[info] Model received quantization params from the hn
[info] Starting Mixed Precision
[info] Mixed Precision is done (completion time is 00:00:00.08)
[info] Starting Layer Norm Decomposition
[info] Using dataset with 64 entries for calibration
Calibration: 100%|███████████████████████████████████████████████████████████████████████████████████████████████| 64/64 [00:36<00:00, 1.75entries/s]
[info] Layer Norm Decomposition is done (completion time is 00:00:39.98)
[info] Starting Stats Collector
[info] Using dataset with 64 entries for calibration
Calibration: 100%|███████████████████████████████████████████████████████████████████████████████████████████████| 64/64 [00:25<00:00, 2.53entries/s]
[info] Stats Collector is done (completion time is 00:00:26.45)
[info] Starting Fix zp_comp Encoding
[info] Fix zp_comp Encoding is done (completion time is 00:00:00.00)
[info] matmul_equalization skipped
[info] activation fitting started for head_singleframe/activation1/act_op
/home/ubuntu/miniconda3/envs/hailo/lib/python3.10/site-packages/pwlf/pwlf.py:1248: RuntimeWarning: invalid value encountered in divide
self.slopes = np.divide(
[info] activation fitting started for head_singleframe/conv_var_inv_layer_normalization1/act_op
[info] No shifts available for layer head_singleframe/conv3/conv_op, using max shift instead. delta=0.9112
[info] No shifts available for layer head_singleframe/conv3/conv_op, using max shift instead. delta=0.4556
[info] No shifts available for layer head_singleframe/conv3/conv_op, using max shift instead. delta=0.4556
[info] activation fitting started for head_singleframe/conv_var_inv_layer_normalization2/act_op
[info] activation fitting started for head_singleframe/conv_var_inv_layer_normalization3/act_op
[info] activation fitting started for head_singleframe/conv_var_inv_layer_normalization4/act_op
[info] activation fitting started for head_singleframe/conv_var_inv_layer_normalization5/act_op
[info] activation fitting started for head_singleframe/conv_var_inv_layer_normalization6/act_op
[info] activation fitting started for head_singleframe/conv_var_inv_layer_normalization7/act_op
[info] activation fitting started for head_singleframe/conv_var_inv_layer_normalization8/act_op
[info] No shifts available for layer head_singleframe/conv14/conv_op, using max shift instead. delta=0.3263
[info] No shifts available for layer head_singleframe/conv14/conv_op, using max shift instead. delta=0.1632
❌ Failed to quantize HAR model: the exponent [[nan]
[nan]
[nan]
[nan]
[nan]
[nan]
[nan]
[nan]] is not in range [ 7 22]
Traceback (most recent call last):
File "/home/ubuntu/ycy/projects/mmdet3d_plugin/tools/test/hailoe.py", line 1025, in <module>
runner.optimize(calib_dataset)
File "/home/ubuntu/miniconda3/envs/hailo/lib/python3.10/site-packages/hailo_sdk_common/states/states.py", line 16, in wrapped_func
return func(self, *args, **kwargs)
File "/home/ubuntu/miniconda3/envs/hailo/lib/python3.10/site-packages/hailo_sdk_client/runner/client_runner.py", line 2093, in optimize
self._optimize(calib_data, data_type=data_type, work_dir=work_dir)
File "/home/ubuntu/miniconda3/envs/hailo/lib/python3.10/site-packages/hailo_sdk_common/states/states.py", line 16, in wrapped_func
return func(self, *args, **kwargs)
File "/home/ubuntu/miniconda3/envs/hailo/lib/python3.10/site-packages/hailo_sdk_client/runner/client_runner.py", line 1935, in _optimize
self._sdk_backend.full_quantization(calib_data, data_type=data_type, work_dir=work_dir)
File "/home/ubuntu/miniconda3/envs/hailo/lib/python3.10/site-packages/hailo_sdk_client/sdk_backend/sdk_backend.py", line 1045, in full_quantization
self._full_acceleras_run(self.calibration_data, data_type)
File "/home/ubuntu/miniconda3/envs/hailo/lib/python3.10/site-packages/hailo_sdk_client/sdk_backend/sdk_backend.py", line 1229, in _full_acceleras_run
optimization_flow.run()
File "/home/ubuntu/miniconda3/envs/hailo/lib/python3.10/site-packages/hailo_model_optimization/tools/orchestator.py", line 306, in wrapper
return func(self, *args, **kwargs)
File "/home/ubuntu/miniconda3/envs/hailo/lib/python3.10/site-packages/hailo_model_optimization/flows/optimization_flow.py", line 316, in run
step_func()
File "/home/ubuntu/miniconda3/envs/hailo/lib/python3.10/site-packages/hailo_model_optimization/tools/orchestator.py", line 250, in wrapped
result = method(*args, **kwargs)
File "/home/ubuntu/miniconda3/envs/hailo/lib/python3.10/site-packages/hailo_model_optimization/tools/subprocess_wrapper.py", line 122, in parent_wrapper
func(self, *args, **kwargs)
File "/home/ubuntu/miniconda3/envs/hailo/lib/python3.10/site-packages/hailo_model_optimization/flows/optimization_flow.py", line 336, in step1
self._update_quantize_data()
File "/home/ubuntu/miniconda3/envs/hailo/lib/python3.10/site-packages/hailo_model_optimization/tools/orchestator.py", line 250, in wrapped
result = method(*args, **kwargs)
File "/home/ubuntu/miniconda3/envs/hailo/lib/python3.10/site-packages/hailo_model_optimization/flows/optimization_flow.py", line 324, in _update_quantize_data
self.set_quant_params(self._model.export_hw_params())
File "/home/ubuntu/miniconda3/envs/hailo/lib/python3.10/site-packages/hailo_model_optimization/acceleras/model/hailo_model/hailo_model.py", line 467, in export_hw_params
return self._export_npz(mode=NpzExportMode.QNPZ)
File "/home/ubuntu/miniconda3/envs/hailo/lib/python3.10/site-packages/hailo_model_optimization/acceleras/model/hailo_model/hailo_model.py", line 521, in _export_npz
params_exported = acceleras_layer.export_hw_params()
File "/home/ubuntu/miniconda3/envs/hailo/lib/python3.10/site-packages/hailo_model_optimization/acceleras/hailo_layers/base_hailo_layer.py", line 440, in export_hw_params
return self.export_qnpz(convert=True)
File "/home/ubuntu/miniconda3/envs/hailo/lib/python3.10/site-packages/hailo_model_optimization/acceleras/hailo_layers/base_hailo_layer.py", line 797, in export_qnpz
export_params = self._export_quant_internal()
File "/home/ubuntu/miniconda3/envs/hailo/lib/python3.10/site-packages/hailo_model_optimization/acceleras/hailo_layers/base_hailo_layer.py", line 710, in _export_quant_internal
export_params.update(op.export_quant())
File "/home/ubuntu/miniconda3/envs/hailo/lib/python3.10/site-packages/hailo_model_optimization/acceleras/atomic_ops/base_atomic_op.py", line 493, in export_quant
export_params.update(self.export_quant_weights())
File "/home/ubuntu/miniconda3/envs/hailo/lib/python3.10/site-packages/hailo_model_optimization/acceleras/atomic_ops/activation_op.py", line 1565, in export_quant_weights
self.check_exp_range()
File "/home/ubuntu/miniconda3/envs/hailo/lib/python3.10/site-packages/hailo_model_optimization/acceleras/atomic_ops/activation_op.py", line 1059, in check_exp_range
raise AccelerasNegativeSlopesError(exponents.numpy(), valid_range)
hailo_model_optimization.acceleras.utils.acceleras_exceptions.AccelerasNegativeSlopesError: the exponent [[nan]
[nan]
[nan]
[nan]
[nan]
[nan]
[nan]
[nan]] is not in range [ 7 22]
Can you help me identify the problem?