Hi,
I’m getting an exception error in the “optimization” phase of converting an ONNX model to HEF.
Specifically, the error is this one:
Call arguments received by layer 'lat_model' (type LATModel):
• inputs=tf.Tensor(shape=(1, 768, 768, 3), dtype=float32)
Full traceback here:
Traceback
[info] Fine Tune is done (completion time is 03:53:06.54)
[info] Starting Layer Noise Analysis
Full Quant Analysis: 0%| | 0/16 [00:00<?, ?iterations/s]Traceback (most recent call last):
File "/usr/local/bin/hailo", line 8, in <module>
sys.exit(main())
File "/usr/local/lib/python3.8/dist-packages/hailo_sdk_client/tools/cmd_utils/main.py", line 111, in main
ret_val = client_command_runner.run()
File "/usr/local/lib/python3.8/dist-packages/hailo_sdk_client/tools/cmd_utils/base_utils.py", line 68, in run
return self._run(argv)
File "/usr/local/lib/python3.8/dist-packages/hailo_sdk_client/tools/cmd_utils/base_utils.py", line 89, in _run
return args.func(args)
File "/usr/local/lib/python3.8/dist-packages/hailo_sdk_client/tools/optimize_cli.py", line 120, in run
self._runner.optimize(dataset, work_dir=args.work_dir)
File "/usr/local/lib/python3.8/dist-packages/hailo_sdk_common/states/states.py", line 16, in wrapped_func
return func(self, *args, **kwargs)
File "/usr/local/lib/python3.8/dist-packages/hailo_sdk_client/runner/client_runner.py", line 2093, in optimize
self._optimize(calib_data, data_type=data_type, work_dir=work_dir)
File "/usr/local/lib/python3.8/dist-packages/hailo_sdk_common/states/states.py", line 16, in wrapped_func
return func(self, *args, **kwargs)
File "/usr/local/lib/python3.8/dist-packages/hailo_sdk_client/runner/client_runner.py", line 1935, in _optimize
self._sdk_backend.full_quantization(calib_data, data_type=data_type, work_dir=work_dir)
File "/usr/local/lib/python3.8/dist-packages/hailo_sdk_client/sdk_backend/sdk_backend.py", line 1045, in full_quantization
self._full_acceleras_run(self.calibration_data, data_type)
File "/usr/local/lib/python3.8/dist-packages/hailo_sdk_client/sdk_backend/sdk_backend.py", line 1229, in _full_acceleras_run
optimization_flow.run()
File "/usr/local/lib/python3.8/dist-packages/hailo_model_optimization/tools/orchestator.py", line 306, in wrapper
return func(self, *args, **kwargs)
File "/usr/local/lib/python3.8/dist-packages/hailo_model_optimization/flows/optimization_flow.py", line 316, in run
step_func()
File "/usr/local/lib/python3.8/dist-packages/hailo_model_optimization/tools/orchestator.py", line 250, in wrapped
result = method(*args, **kwargs)
File "/usr/local/lib/python3.8/dist-packages/hailo_model_optimization/tools/subprocess_wrapper.py", line 111, in parent_wrapper
raise SubprocessTracebackFailure(*child_messages)
hailo_model_optimization.acceleras.utils.acceleras_exceptions.SubprocessTracebackFailure: Subprocess failed with traceback
Traceback (most recent call last):
File "/usr/local/lib/python3.8/dist-packages/hailo_model_optimization/tools/subprocess_wrapper.py", line 73, in child_wrapper
func(self, *args, **kwargs)
File "/usr/local/lib/python3.8/dist-packages/hailo_model_optimization/flows/optimization_flow.py", line 347, in step3
self.finalize_optimization()
File "/usr/local/lib/python3.8/dist-packages/hailo_model_optimization/tools/orchestator.py", line 250, in wrapped
result = method(*args, **kwargs)
File "/usr/local/lib/python3.8/dist-packages/hailo_model_optimization/flows/optimization_flow.py", line 405, in finalize_optimization
self._noise_analysis()
File "/usr/local/lib/python3.8/dist-packages/hailo_model_optimization/tools/orchestator.py", line 250, in wrapped
result = method(*args, **kwargs)
File "/usr/local/lib/python3.8/dist-packages/hailo_model_optimization/flows/optimization_flow.py", line 585, in _noise_analysis
algo.run()
File "/usr/local/lib/python3.8/dist-packages/hailo_model_optimization/algorithms/optimization_algorithm.py", line 50, in run
return super().run()
File "/usr/local/lib/python3.8/dist-packages/hailo_model_optimization/algorithms/algorithm_base.py", line 151, in run
self._run_int()
File "/usr/local/lib/python3.8/dist-packages/hailo_model_optimization/algorithms/hailo_layer_noise_analysis.py", line 83, in _run_int
self.analyze_full_quant_net()
File "/usr/local/lib/python3.8/dist-packages/hailo_model_optimization/algorithms/hailo_layer_noise_analysis.py", line 197, in analyze_full_quant_net
lat_model.predict_on_batch(inputs)
File "/usr/local/lib/python3.8/dist-packages/keras/engine/training.py", line 2603, in predict_on_batch
outputs = self.predict_function(iterator)
File "/usr/local/lib/python3.8/dist-packages/tensorflow/python/util/traceback_utils.py", line 153, in error_handler
raise e.with_traceback(filtered_tb) from None
File "/tmp/__autograph_generated_file2kyb_0w6.py", line 15, in tf__predict_function
retval_ = ag__.converted_call(ag__.ld(step_function), (ag__.ld(self), ag__.ld(iterator)), None, fscope)
File "/usr/local/lib/python3.8/dist-packages/keras/engine/training.py", line 2155, in step_function
outputs = model.distribute_strategy.run(run_step, args=(data,))
File "/usr/local/lib/python3.8/dist-packages/keras/engine/training.py", line 2143, in run_step
outputs = model.predict_step(data)
File "/usr/local/lib/python3.8/dist-packages/keras/engine/training.py", line 2111, in predict_step
return self(x, training=False)
File "/usr/local/lib/python3.8/dist-packages/keras/utils/traceback_utils.py", line 70, in error_handler
raise e.with_traceback(filtered_tb) from None
File "/tmp/__autograph_generated_file5np1nct9.py", line 188, in tf__call
ag__.for_stmt(ag__.converted_call(ag__.ld(self)._model.flow.toposort, (), None, fscope), None, loop_body_5, get_state_9, set_state_9, (), {'iterate_names': 'lname'})
File "/tmp/__autograph_generated_file5np1nct9.py", line 167, in loop_body_5
ag__.if_stmt(ag__.not_(continue__1), if_body_3, else_body_3, get_state_8, set_state_8, (), 0)
File "/tmp/__autograph_generated_file5np1nct9.py", line 94, in if_body_3
n_ancestors = ag__.converted_call(ag__.ld(self)._native_model.flow.ancestors, (ag__.ld(lname),), None, fscope)
File "/tmp/__autograph_generated_filevy4hyjj8.py", line 12, in tf__ancestors
retval_ = ag__.converted_call(ag__.ld(nx).ancestors, (ag__.ld(self), ag__.ld(source)), None, fscope)
File "/tmp/__autograph_generated_file_8xa5mvx.py", line 24, in tf__ancestors
anc = {ag__.ld(n) for (n, d) in ag__.converted_call(ag__.converted_call(ag__.ld(nx).shortest_path_length, (ag__.ld(G),), dict(target=ag__.ld(source)), fscope).items, (), None, fscope)}
File "/tmp/__autograph_generated_filendi24xs0.py", line 233, in tf__shortest_path_length
ag__.if_stmt((ag__.ld(source) is None), if_body_12, else_body_12, get_state_12, set_state_12, ('paths', 'G'), 1)
File "/tmp/__autograph_generated_filendi24xs0.py", line 137, in if_body_12
ag__.if_stmt((ag__.ld(target) is None), if_body_6, else_body_6, get_state_6, set_state_6, ('paths', 'G'), 1)
File "/tmp/__autograph_generated_filendi24xs0.py", line 96, in else_body_6
ag__.if_stmt(ag__.converted_call(ag__.ld(G).is_directed, (), None, fscope), if_body_3, else_body_3, get_state_3, set_state_3, ('G',), 1)
File "/tmp/__autograph_generated_filendi24xs0.py", line 91, in if_body_3
G = ag__.converted_call(ag__.ld(G).reverse, (), dict(copy=False), fscope)
File "/tmp/__autograph_generated_file07hvlrb6.py", line 41, in tf__reverse
ag__.if_stmt(ag__.ld(copy), if_body, else_body, get_state, set_state, ('do_return', 'retval_'), 2)
File "/tmp/__autograph_generated_file07hvlrb6.py", line 36, in else_body
retval_ = ag__.converted_call(ag__.ld(nx).graphviews.reverse_view, (ag__.ld(self),), None, fscope)
TypeError: in user code:
File "/usr/local/lib/python3.8/dist-packages/keras/engine/training.py", line 2169, in predict_function *
return step_function(self, iterator)
File "/usr/local/lib/python3.8/dist-packages/keras/engine/training.py", line 2155, in step_function **
outputs = model.distribute_strategy.run(run_step, args=(data,))
File "/usr/local/lib/python3.8/dist-packages/keras/engine/training.py", line 2143, in run_step **
outputs = model.predict_step(data)
File "/usr/local/lib/python3.8/dist-packages/keras/engine/training.py", line 2111, in predict_step
return self(x, training=False)
File "/usr/local/lib/python3.8/dist-packages/keras/utils/traceback_utils.py", line 70, in error_handler
raise e.with_traceback(filtered_tb) from None
File "/tmp/__autograph_generated_file5np1nct9.py", line 188, in tf__call
ag__.for_stmt(ag__.converted_call(ag__.ld(self)._model.flow.toposort, (), None, fscope), None, loop_body_5, get_state_9, set_state_9, (), {'iterate_names': 'lname'})
File "/tmp/__autograph_generated_file5np1nct9.py", line 167, in loop_body_5
ag__.if_stmt(ag__.not_(continue__1), if_body_3, else_body_3, get_state_8, set_state_8, (), 0)
File "/tmp/__autograph_generated_file5np1nct9.py", line 94, in if_body_3
n_ancestors = ag__.converted_call(ag__.ld(self)._native_model.flow.ancestors, (ag__.ld(lname),), None, fscope)
File "/tmp/__autograph_generated_filevy4hyjj8.py", line 12, in tf__ancestors
retval_ = ag__.converted_call(ag__.ld(nx).ancestors, (ag__.ld(self), ag__.ld(source)), None, fscope)
File "/tmp/__autograph_generated_file_8xa5mvx.py", line 24, in tf__ancestors
anc = {ag__.ld(n) for (n, d) in ag__.converted_call(ag__.converted_call(ag__.ld(nx).shortest_path_length, (ag__.ld(G),), dict(target=ag__.ld(source)), fscope).items, (), None, fscope)}
File "/tmp/__autograph_generated_filendi24xs0.py", line 233, in tf__shortest_path_length
ag__.if_stmt((ag__.ld(source) is None), if_body_12, else_body_12, get_state_12, set_state_12, ('paths', 'G'), 1)
File "/tmp/__autograph_generated_filendi24xs0.py", line 137, in if_body_12
ag__.if_stmt((ag__.ld(target) is None), if_body_6, else_body_6, get_state_6, set_state_6, ('paths', 'G'), 1)
File "/tmp/__autograph_generated_filendi24xs0.py", line 96, in else_body_6
ag__.if_stmt(ag__.converted_call(ag__.ld(G).is_directed, (), None, fscope), if_body_3, else_body_3, get_state_3, set_state_3, ('G',), 1)
File "/tmp/__autograph_generated_filendi24xs0.py", line 91, in if_body_3
G = ag__.converted_call(ag__.ld(G).reverse, (), dict(copy=False), fscope)
File "/tmp/__autograph_generated_file07hvlrb6.py", line 41, in tf__reverse
ag__.if_stmt(ag__.ld(copy), if_body, else_body, get_state, set_state, ('do_return', 'retval_'), 2)
File "/tmp/__autograph_generated_file07hvlrb6.py", line 36, in else_body
retval_ = ag__.converted_call(ag__.ld(nx).graphviews.reverse_view, (ag__.ld(self),), None, fscope)
TypeError: Exception encountered when calling layer 'lat_model' (type LATModel).
in user code:
File "/usr/local/lib/python3.8/dist-packages/hailo_model_optimization/algorithms/lat_utils/lat_model.py", line 340, in call *
n_ancestors = self._native_model.flow.ancestors(lname)
File "/usr/local/lib/python3.8/dist-packages/hailo_model_optimization/acceleras/model/hailo_model/model_flow.py", line 31, in ancestors *
return nx.ancestors(self, source)
File "/usr/local/lib/python3.8/dist-packages/networkx/algorithms/dag.py", line 74, in ancestors *
anc = {n for n, d in nx.shortest_path_length(G, target=source).items()}
File "/usr/local/lib/python3.8/dist-packages/networkx/algorithms/shortest_paths/generic.py", line 273, in shortest_path_length *
G = G.reverse(copy=False)
File "/usr/local/lib/python3.8/dist-packages/networkx/classes/digraph.py", line 1221, in reverse *
return nx.graphviews.reverse_view(self)
TypeError: tf__func() missing 1 required keyword-only argument: '__wrapper'
Call arguments received by layer 'lat_model' (type LATModel):
• inputs=tf.Tensor(shape=(1, 768, 768, 3), dtype=float32)
The model I’m trying to convert is a UNET model that is trained with pytorch and then converted to ONNX. I also use this model for inference directly or converted to TensorRT with no problem.
This is the ‘unet.dot’ created by hailo visualizer after the conversion:
strict digraph "" {
input_layer1 -> "conv1 (3x3/1) (3->64) +Relu" [label="[-1, 768, 768, 3]"];
"conv1 (3x3/1) (3->64) +Relu" -> "conv2 (3x3/1) (64->64) +Relu" [label="[-1, 768, 768, 64]"];
"conv2 (3x3/1) (64->64) +Relu" -> "maxpool1 (2x2/2)" [label="[-1, 768, 768, 64]"];
"conv2 (3x3/1) (64->64) +Relu" -> concat4 [label="[-1, 768, 768, 64]"];
"maxpool1 (2x2/2)" -> "conv3 (3x3/1) (64->128) +Relu" [label="[-1, 384, 384, 64]"];
"conv3 (3x3/1) (64->128) +Relu" -> "conv4 (3x3/1) (128->128) +Relu" [label="[-1, 384, 384, 128]"];
"conv4 (3x3/1) (128->128) +Relu" -> "maxpool2 (2x2/2)" [label="[-1, 384, 384, 128]"];
"conv4 (3x3/1) (128->128) +Relu" -> concat3 [label="[-1, 384, 384, 128]"];
"maxpool2 (2x2/2)" -> "conv5 (3x3/1) (128->256) +Relu" [label="[-1, 192, 192, 128]"];
"conv5 (3x3/1) (128->256) +Relu" -> "conv6 (3x3/1) (256->256) +Relu" [label="[-1, 192, 192, 256]"];
"conv6 (3x3/1) (256->256) +Relu" -> "maxpool3 (2x2/2)" [label="[-1, 192, 192, 256]"];
"conv6 (3x3/1) (256->256) +Relu" -> concat2 [label="[-1, 192, 192, 256]"];
"maxpool3 (2x2/2)" -> "conv7 (3x3/1) (256->512) +Relu" [label="[-1, 96, 96, 256]"];
"conv7 (3x3/1) (256->512) +Relu" -> "conv8 (3x3/1) (512->512) +Relu" [label="[-1, 96, 96, 512]"];
"conv8 (3x3/1) (512->512) +Relu" -> "maxpool4 (2x2/2)" [label="[-1, 96, 96, 512]"];
"conv8 (3x3/1) (512->512) +Relu" -> concat1 [label="[-1, 96, 96, 512]"];
"maxpool4 (2x2/2)" -> "conv9 (3x3/1) (512->1024) +Relu" [label="[-1, 48, 48, 512]"];
"conv9 (3x3/1) (512->1024) +Relu" -> "conv10 (3x3/1) (1024->1024) +Relu" [label="[-1, 48, 48, 1024]"];
"conv10 (3x3/1) (1024->1024) +Relu" -> "deconv1 (2x2/2) (1024->512)" [label="[-1, 48, 48, 1024]"];
"deconv1 (2x2/2) (1024->512)" -> concat1 [label="[-1, 96, 96, 512]"];
concat1 -> "conv11 (3x3/1) (1024->512) +Relu" [label="[-1, 96, 96, 1024]"];
"conv11 (3x3/1) (1024->512) +Relu" -> "conv12 (3x3/1) (512->512) +Relu" [label="[-1, 96, 96, 512]"];
"conv12 (3x3/1) (512->512) +Relu" -> "deconv2 (2x2/2) (512->256)" [label="[-1, 96, 96, 512]"];
"deconv2 (2x2/2) (512->256)" -> concat2 [label="[-1, 192, 192, 256]"];
concat2 -> "conv13 (3x3/1) (512->256) +Relu" [label="[-1, 192, 192, 512]"];
"conv13 (3x3/1) (512->256) +Relu" -> "conv14 (3x3/1) (256->256) +Relu" [label="[-1, 192, 192, 256]"];
"conv14 (3x3/1) (256->256) +Relu" -> "deconv3 (2x2/2) (256->128)" [label="[-1, 192, 192, 256]"];
"deconv3 (2x2/2) (256->128)" -> concat3 [label="[-1, 384, 384, 128]"];
concat3 -> "conv15 (3x3/1) (256->128) +Relu" [label="[-1, 384, 384, 256]"];
"conv15 (3x3/1) (256->128) +Relu" -> "conv16 (3x3/1) (128->128) +Relu" [label="[-1, 384, 384, 128]"];
"conv16 (3x3/1) (128->128) +Relu" -> "deconv4 (2x2/2) (128->64)" [label="[-1, 384, 384, 128]"];
"deconv4 (2x2/2) (128->64)" -> concat4 [label="[-1, 768, 768, 64]"];
concat4 -> "conv17 (3x3/1) (128->64) +Relu" [label="[-1, 768, 768, 128]"];
"conv17 (3x3/1) (128->64) +Relu" -> "conv18 (3x3/1) (64->64) +Relu" [label="[-1, 768, 768, 64]"];
"conv18 (3x3/1) (64->64) +Relu" -> "conv19 (1x1/1) (64->2)" [label="[-1, 768, 768, 64]"];
"conv19 (1x1/1) (64->2)" -> output_layer1 [label="[-1, 768, 768, 2]"];
}
The code executed for the ONNX Conversion was:
runner = ClientRunner(hw_arch="hailo8")
hn, npz = runner.translate_onnx_model(
'unet_best.onnx',
'unet,
start_node_names =["input_image"],
end_node_names =["mask"],
net_input_shapes ={"input_image": [1, 3, 768, 768]},
)
# Save HAR to disk
runner.save_har(hailo_model_har_name)
I then create a ‘calib_dataset’ using 1024 images with shape 768x768x3 :
images_path = './dataset'
images_list = [img_name for img_name in os.listdir(images_path) if os.path.splitext(img_name)[1] == ".png"]
calib_dataset = np.stack([np.array(Image.open(os.path.join(images_path, img_name))) for img_name in images_list])
np.save("calib_set.npy", calib_dataset)
And I prepare the ‘Optimization’ phase with:
alls_model_script = [
"normalization1 = normalization([0, 0, 0], [255, 255, 255])\n",
"performance_param(compiler_optimization_level=max)\n",
"model_optimization_config(calibration, batch_size=1)\n",
"post_quantization_optimization(finetune, policy=enabled, learning_rate=1e-5, epochs=1, batch_size=2)\n",
]
runner.load_model_script("".join(alls_model_script))
runner.optimize(calib_dataset)
runner.save_har(quantized_model_har_path)
The ‘post_quantization_optimization’ line with epochs=1 was to try debug the problem faster.
Every time I run this code or try to execute the optimization using the command line, I have the same error.
hailo optimize --hw-arch hailo8 --calib-set-path ./calib_set.npy --work-dir ./workdir --model-script unet_optimizer_script.alls --output-har-path unet_quantized.har unet_converted_model.har
This is the specs Hailo command says of my machine with a 4060 TI GPU installed:
[info] CPU: Architecture: x86_64, Model: Intel(R) Core(TM) i5-9400 CPU @ 2.90GHz, Number Of Cores: 6, Utilization: 18.2%
[info] Memory: Total: 15GB, Available: 9GB
[info] System info: OS: Linux, Kernel: 6.1.0-21-amd64
[info] Hailo DFC Version: 3.28.0
[info] HailoRT Version: Not Installed
Can someone help me on how to solve this error.
Thank you,