I’m deploying a custom-trained DINOv2 model on a Hailo-8L with a Raspberry Pi 5 using DFC’s notebooks as base tutorials. The DINOv2 model was fine-tuned as a classifier with an additional top layer.
To convert the ONNX DINOv2 model into a HEF format, I’ve broken the process into three steps:
- ONNX to Hailo-Compatible Representation (HAR)
- Quantization to 8-bit
- Compiling the network into a HEF model
1. ONNX to HAR Conversion
I successfully transformed the ONNX DINOv2 model into a HAR. Here’s the key configuration I used:
runner = ClientRunner(hw_arch=chosen_hw_arch)
hn, npz = runner.translate_onnx_model(
onnx_path,
onnx_model_name,
start_node_names=["x.1"],
end_node_names=["1144"],
net_input_shapes={"x.1": [1, 3, 224, 224]},
)
Node names and shapes were extracted using Netron. Here’s the output from Hailo Visualizer.
2. Quantization Step
I encountered errors in the quantization process but first, let me walk through the steps I took before the issue arose.
Preprocessing & Postprocessing Functions:
# -----------------------------------------
# Pre processing (prepare the input images)
# -----------------------------------------
def preproc(image, output_height=224, output_width=224, resize_side=256, normalize=False):
"""imagenet-standard: aspect-preserving resize to 256px smaller-side, then central-crop to 224px"""
with eager_mode():
h, w = image.shape[0], image.shape[1]
scale = tf.cond(tf.less(h, w), lambda: resize_side / h, lambda: resize_side / w)
resized_image = tf.compat.v1.image.resize_bilinear(tf.expand_dims(image, 0), [int(h * scale), int(w * scale)])
cropped_image = tf.compat.v1.image.resize_with_crop_or_pad(resized_image, output_height, output_width)
if normalize:
# Default normalization parameters for ImageNet
cropped_image = (cropped_image - [123.675, 116.28, 103.53]) / [58.395, 57.12, 57.375]
return tf.squeeze(cropped_image)
# -----------------------------------------------------
# Post processing (what to do with the model's outputs)
# -----------------------------------------------------
def _get_imagenet_labels(json_path="../data/imagenet_names.json"):
imagenet_names = json.load(open(json_path))
imagenet_names = [imagenet_names[str(i)] for i in range(1001)]
return imagenet_names[1:]
#imagenet_labels = _get_imagenet_labels()
imagenet_labels = ['apple', 'banana', 'drink_carton', 'metal_can', 'mixed', 'organic', 'paper', 'paper_coffee_cup', 'plastic_bag', 'plastic_bottle', 'pmd', 'rest']
print(f'Labels of dataset: {imagenet_labels}')
def postproc(results):
labels = []
scores = []
for result in results:
top_ind = np.argmax(result)
cur_label = imagenet_labels[top_ind]
cur_score = 100 * result[top_ind]
labels.append(cur_label)
scores.append(cur_score)
return scores, labels
# -------------
# Visualization
# -------------
def mynorm(data):
return (data - np.min(data)) / (np.max(data) - np.min(data))
def visualize_results(
images,
first_scores,
first_labels,
second_scores=None,
second_labels=None,
first_title="Full Precision",
second_title="Other",
):
# Deal with input arguments
assert (second_scores is None and second_labels is None) or (
second_scores is not None and second_labels is not None
), "second_scores and second_labels must both be supplied, or both not be supplied"
assert len(images) == len(first_scores) == len(first_labels), "lengths of inputs must be equal"
show_only_first = second_scores is None
if not show_only_first:
assert len(images) == len(second_scores) == len(second_labels), "lengths of inputs must be equal"
# Display
for img_idx in range(len(images)):
plt.figure()
plt.imshow(mynorm(images[img_idx]))
if not show_only_first:
plt.title(
f"{first_title}: top-1 class is {first_labels[img_idx]}. Confidence is {first_scores[img_idx]:.2f}%,\n"
f"{second_title}: top-1 class is {second_labels[img_idx]}. Confidence is {second_scores[img_idx]:.2f}%",
)
else:
plt.title(
f"{first_title}: top-1 class is {first_labels[img_idx]}. Confidence is {first_scores[img_idx]:.2f}%",
)
Loading the Har model:
model_name = "dinov2"
hailo_model_har_name = f"{model_name}_hailo_model.har"
assert os.path.isfile(hailo_model_har_name), "Please provide valid path for HAR file"
runner = ClientRunner(har=hailo_model_har_name)
# By default it uses the hw_arch that is saved on the HAR. For overriding, use the hw_arch flag.
Loading the Dataset:
images_path = '/home/jetshu/Documents/PLAEX/code/PLAEXDatabaseManager/GranularDataset/val' #"../data"
images_list = [img_name for img_name in os.listdir(images_path) if os.path.splitext(img_name)[1] == ".jpg"]
# Create an un-normalized dataset for visualization
image_dataset = np.zeros((len(images_list), 224, 224, 3))
# Create a normalized dataset to feed into the Native emulator
image_dataset_normalized = np.zeros((len(images_list), 224, 224, 3))
for idx, img_name in enumerate(sorted(images_list)):
img = np.array(Image.open(os.path.join(images_path, img_name)))
img_preproc = preproc(img)
image_dataset[idx, :, :, :] = img_preproc.numpy()
img_preproc_norm = preproc(img, normalize=True)
image_dataset_normalized[idx, :, :, :] = img_preproc_norm.numpy()
Then is evaluated with Native Emulator and everything is fine for the moment. I got decent accuracy metrics:
# Notice that we use the normalized images, because normalization is not in the model
with runner.infer_context(InferenceContext.SDK_NATIVE) as ctx:
native_res = runner.infer(ctx, image_dataset_normalized[:IMAGES_TO_VISUALIZE, :, :, :])
#print(native_res)
native_scores, native_labels = postproc(native_res)
visualize_results(image_dataset[:IMAGES_TO_VISUALIZE, :, :, :], native_scores, native_labels)
Applying modifications as adding Normalization layer at the end:
model_script_lines = [
# Add normalization layer with mean [123.675, 116.28, 103.53] and std [58.395, 57.12, 57.375])
"test_normalization1 = normalization([123.675, 116.28, 103.53], [58.395, 57.12, 57.375])\n",
# For multiple input nodes:
# {normalization_layer_name_1} = normalization([list of means per channel], [list of stds per channel], {input_layer_name_1_from_hn})\n',
# {normalization_layer_name_2} = normalization([list of means per channel], [list of stds per channel], {input_layer_name_2_from_hn})\n',
# ...
]
# Load the model script to ClientRunner so it will be considered on optimization
runner.load_model_script("".join(model_script_lines))
runner.optimize_full_precision()
Running the SDK_FP_Optimized produced reasonable accuracy metrics:
# Notice that we use the original images, because normalization is IN the model
with runner.infer_context(InferenceContext.SDK_FP_OPTIMIZED) as ctx:
modified_res = runner.infer(ctx, image_dataset[:IMAGES_TO_VISUALIZE, :, :, :])
modified_scores, modified_labels = postproc(modified_res)
visualize_results(
image_dataset[:IMAGES_TO_VISUALIZE, :, :, :],
native_scores,
native_labels,
modified_scores,
modified_labels,
second_title="FP Modified",
)
However, I encountered an error during the optimization process with the following code:
# The original images are being used, just as the input to the SDK_FP_OPTIMIZED emulator
calib_dataset = image_dataset
# For calling Optimize, use the short version: runner.optimize(calib_dataset)
# A more general approach is being used here that works also with multiple input nodes.
# The calibration dataset could also be a dictionary with the format:
# {input_layer_name_1_from_hn: layer_1_calib_dataset, input_layer_name_2_from_hn: layer_2_calib_dataset}
hn_layers = runner.get_hn_dict()["layers"]
print("Input layers are: ")
print([layer for layer in hn_layers if hn_layers[layer]["type"] == "input_layer"]) # See available input layer names
calib_dataset_dict = {"dinov2/input_layer1": calib_dataset} # In our case there is only one input layer
runner.optimize(calib_dataset_dict)
Here’s the log output:
Input layers are:
['dinov2/input_layer1']
[info] Starting Model Optimization
[warning] Reducing optimization level to 1 (the accuracy won't be optimized and compression won't be used) because there's less data than the recommended amount (1024)
[info] Model received quantization params from the hn
2024-10-20 17:32:09.390079: E tensorflow/compiler/xla/stream_executor/cuda/cuda_driver.cc:1279] could not retrieve CUDA device count: CUDA_ERROR_NOT_INITIALIZED: initialization error
2024-10-20 17:32:09.554167: E tensorflow/compiler/xla/stream_executor/cuda/cuda_driver.cc:1279] could not retrieve CUDA device count: CUDA_ERROR_NOT_INITIALIZED: initialization error
[info] Starting Mixed Precision
[info] Mixed Precision is done (completion time is 00:00:02.23)
[info] Starting LayerNorm Decomposition
[info] Using dataset with 64 entries for calibration
Calibration: 0%| | 0/64 [00:00<?, ?entries/s]2024-10-20 17:32:52.778889: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype int32
[[{{node Placeholder/_0}}]]
2024-10-20 17:32:52.805111: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype float and shape [8,224,224,3]
[[{{node Placeholder/_0}}]]
Calibration: 12%|█▎ | 8/64 [02:24<16:48, 18.00s/entries]2024-10-20 17:35:10.773695: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype float and shape [8,224,224,3]
[[{{node Placeholder/_0}}]]
Calibration: 25%|██▌ | 16/64 [02:24<05:56, 7.43s/entries]2024-10-20 17:35:11.065021: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype float and shape [8,224,224,3]
[[{{node Placeholder/_0}}]]
Calibration: 38%|███▊ | 24/64 [02:24<02:42, 4.06s/entries]2024-10-20 17:35:11.353900: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype float and shape [8,224,224,3]
[[{{node Placeholder/_0}}]]
Calibration: 50%|█████ | 32/64 [02:24<01:19, 2.47s/entries]2024-10-20 17:35:11.652946: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype float and shape [8,224,224,3]
[[{{node Placeholder/_0}}]]
Calibration: 62%|██████▎ | 40/64 [02:25<00:38, 1.59s/entries]2024-10-20 17:35:11.947633: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype float and shape [8,224,224,3]
[[{{node Placeholder/_0}}]]
Calibration: 75%|███████▌ | 48/64 [02:25<00:17, 1.06s/entries]2024-10-20 17:35:12.239267: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype float and shape [8,224,224,3]
[[{{node Placeholder/_0}}]]
Calibration: 88%|████████▊ | 56/64 [02:25<00:05, 1.37entries/s]2024-10-20 17:35:12.529566: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype float and shape [8,224,224,3]
[[{{node Placeholder/_0}}]]
Calibration: 100%|██████████| 64/64 [02:26<00:00, 2.28s/entries]
[info] LayerNorm Decomposition is done (completion time is 00:02:54.00)
[info] Starting Statistics Collector
[info] Using dataset with 64 entries for calibration
Calibration: 0%| | 0/64 [00:00<?, ?entries/s]2024-10-20 17:36:06.437562: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype int32
[[{{node Placeholder/_0}}]]
2024-10-20 17:36:06.458755: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype float and shape [8,224,224,3]
[[{{node Placeholder/_0}}]]
Calibration: 12%|█▎ | 8/64 [04:16<29:50, 31.97s/entries]2024-10-20 17:39:47.898910: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype float and shape [8,224,224,3]
[[{{node Placeholder/_0}}]]
Calibration: 25%|██▌ | 16/64 [04:17<10:39, 13.32s/entries]2024-10-20 17:39:48.773738: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype float and shape [8,224,224,3]
[[{{node Placeholder/_0}}]]
Calibration: 38%|███▊ | 24/64 [04:18<04:51, 7.29s/entries]2024-10-20 17:39:49.582326: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype float and shape [8,224,224,3]
[[{{node Placeholder/_0}}]]
Calibration: 50%|█████ | 32/64 [04:19<02:22, 4.45s/entries]2024-10-20 17:39:50.377881: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype float and shape [8,224,224,3]
[[{{node Placeholder/_0}}]]
Calibration: 62%|██████▎ | 40/64 [04:20<01:09, 2.88s/entries]2024-10-20 17:39:51.189714: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype float and shape [8,224,224,3]
[[{{node Placeholder/_0}}]]
Calibration: 75%|███████▌ | 48/64 [04:21<00:30, 1.94s/entries]2024-10-20 17:39:51.984864: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype float and shape [8,224,224,3]
[[{{node Placeholder/_0}}]]
Calibration: 88%|████████▊ | 56/64 [04:21<00:10, 1.34s/entries]2024-10-20 17:39:52.779747: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype float and shape [8,224,224,3]
[[{{node Placeholder/_0}}]]
Calibration: 100%|██████████| 64/64 [04:22<00:00, 4.11s/entries]
[info] Statistics Collector is done (completion time is 00:04:41.08)
[info] Starting Fix zp_comp Encoding
[info] Fix zp_comp Encoding is done (completion time is 00:00:00.00)
[info] Starting Matmul Equalization
The error traceback is as follows:
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
Cell In[9], line 11
9 print([layer for layer in hn_layers if hn_layers[layer]["type"] == "input_layer"]) # See available input layer names
10 calib_dataset_dict = {"dinov2/input_layer1": calib_dataset} # In our case there is only one input layer
---> 11 runner.optimize(calib_dataset_dict)
File ~/Documents/PLAEX/code/Hailo/hailo_env/lib/python3.10/site-packages/hailo_sdk_common/states/states.py:16, in allowed_states.<locals>.wrap.<locals>.wrapped_func(self, *args, **kwargs)
12 if self._state not in states:
13 raise InvalidStateException(
14 f"The execution of {func.__name__} is not available under the state: {self._state.value}",
15 )
---> 16 return func(self, *args, **kwargs)
File ~/Documents/PLAEX/code/Hailo/hailo_env/lib/python3.10/site-packages/hailo_sdk_client/runner/client_runner.py:2093, in ClientRunner.optimize(self, calib_data, data_type, work_dir)
2061 @allowed_states(States.HAILO_MODEL, States.FP_OPTIMIZED_MODEL)
2062 def optimize(self, calib_data, data_type=CalibrationDataType.auto, work_dir=None):
2063 """
2064 Apply optimizations to the model:
2065
(...)
2091
2092 """
-> 2093 self._optimize(calib_data, data_type=data_type, work_dir=work_dir)
File ~/Documents/PLAEX/code/Hailo/hailo_env/lib/python3.10/site-packages/hailo_sdk_common/states/states.py:16, in allowed_states.<locals>.wrap.<locals>.wrapped_func(self, *args, **kwargs)
12 if self._state not in states:
13 raise InvalidStateException(
14 f"The execution of {func.__name__} is not available under the state: {self._state.value}",
15 )
---> 16 return func(self, *args, **kwargs)
File ~/Documents/PLAEX/code/Hailo/hailo_env/lib/python3.10/site-packages/hailo_sdk_client/runner/client_runner.py:1935, in ClientRunner._optimize(self, calib_data, data_type, work_dir)
1933 if self._state == States.HAILO_MODEL:
1934 self._optimize_full_precision(calib_data=calib_data, data_type=data_type)
-> 1935 self._sdk_backend.full_quantization(calib_data, data_type=data_type, work_dir=work_dir)
1936 self._state = States.QUANTIZED_MODEL
File ~/Documents/PLAEX/code/Hailo/hailo_env/lib/python3.10/site-packages/hailo_sdk_client/sdk_backend/sdk_backend.py:1045, in SDKBackendQuantization.full_quantization(self, data, data_type, work_dir, force_results_by_layer)
1043 self.pre_quantization_structural()
1044 params_pre_quantization = copy.deepcopy(self.get_params_pre_quantization())
-> 1045 self._full_acceleras_run(self.calibration_data, data_type)
1046 self._logger.verbose("Core and post Quantization is done with Acceleras")
1047 self._finalize_quantization()
File ~/Documents/PLAEX/code/Hailo/hailo_env/lib/python3.10/site-packages/hailo_sdk_client/sdk_backend/sdk_backend.py:1229, in SDKBackendQuantization._full_acceleras_run(self, data, data_type)
1224 else:
1225 # the compression level and the optimization level are not set to the recommended values
1226 self._logger.info(
1227 f"To obtain best performance for models with number of parameters larger than {self.hw_arch.HAILO15_LARGE_MODEL_PARAMS_TH}, it is recommended to use optimization level and compression level {RECOMMENDED_OPTM_LEVEL}."
1228 )
-> 1229 optimization_flow.run()
1230 fp_after_optimization = optimization_flow.get_acceleras_params()
1231 self.save_and_load_params_hailo_optimized(fp_after_optimization)
File ~/Documents/PLAEX/code/Hailo/hailo_env/lib/python3.10/site-packages/hailo_model_optimization/tools/orchestator.py:306, in flow_control_method.<locals>.wrapper(self, *args, **kwargs)
302 setattr(self, attr_name, method_wrapper(method, sup))
304 try:
305 # Execute the original run function
--> 306 return func(self, *args, **kwargs)
308 finally:
309 # Restore original methods after run execution
310 self._call_history = sup.export() if sup.current_checkpoint else None
File ~/Documents/PLAEX/code/Hailo/hailo_env/lib/python3.10/site-packages/hailo_model_optimization/flows/optimization_flow.py:326, in OptimizationFlow.run(self, memento, run_until)
324 for ind, step_func in enumerate(step_funcs):
325 self._step_index = ind
--> 326 step_func()
File ~/Documents/PLAEX/code/Hailo/hailo_env/lib/python3.10/site-packages/hailo_model_optimization/tools/orchestator.py:250, in flow_control_method.<locals>.wrapper.<locals>.method_wrapper.<locals>.wrapped(*args, **kwargs)
248 if sup.state == SupervisorState.RUNNING:
249 with sup.visit_node(method_name):
--> 250 result = method(*args, **kwargs)
251 if method_name == run_until:
252 sup.state = SupervisorState.DISABLED
File ~/Documents/PLAEX/code/Hailo/hailo_env/lib/python3.10/site-packages/hailo_model_optimization/tools/subprocess_wrapper.py:124, in subprocess_wrapper.<locals>.decorator.<locals>.parent_wrapper(self, *args, **kwargs)
122 self.dist_info = dist_info
123 self.build_model()
--> 124 func(self, *args, **kwargs)
126 # This flag should be present in Develop
127 if force_rebuild:
File ~/Documents/PLAEX/code/Hailo/hailo_env/lib/python3.10/site-packages/hailo_model_optimization/flows/optimization_flow.py:345, in OptimizationFlow.step1(self)
343 self._update_fp_data()
344 self.pre_quantization_optimization()
--> 345 self.core_quantization()
346 self._update_quantize_data()
File ~/Documents/PLAEX/code/Hailo/hailo_env/lib/python3.10/site-packages/hailo_model_optimization/tools/orchestator.py:250, in flow_control_method.<locals>.wrapper.<locals>.method_wrapper.<locals>.wrapped(*args, **kwargs)
248 if sup.state == SupervisorState.RUNNING:
249 with sup.visit_node(method_name):
--> 250 result = method(*args, **kwargs)
251 if method_name == run_until:
252 sup.state = SupervisorState.DISABLED
File ~/Documents/PLAEX/code/Hailo/hailo_env/lib/python3.10/site-packages/hailo_model_optimization/flows/optimization_flow.py:402, in OptimizationFlow.core_quantization(self)
400 self._force_preact_stats()
401 self._fix_zp_comp_encoding()
--> 402 self._matmul_equalization()
403 self._create_hw_params()
File ~/Documents/PLAEX/code/Hailo/hailo_env/lib/python3.10/site-packages/hailo_model_optimization/tools/orchestator.py:250, in flow_control_method.<locals>.wrapper.<locals>.method_wrapper.<locals>.wrapped(*args, **kwargs)
248 if sup.state == SupervisorState.RUNNING:
249 with sup.visit_node(method_name):
--> 250 result = method(*args, **kwargs)
251 if method_name == run_until:
252 sup.state = SupervisorState.DISABLED
File ~/Documents/PLAEX/code/Hailo/hailo_env/lib/python3.10/site-packages/hailo_model_optimization/flows/optimization_flow.py:582, in OptimizationFlow._matmul_equalization(self)
580 def _matmul_equalization(self):
581 algo = MatmulEqualization(self.model, self._parsed_config, logging.INFO, self._logger)
--> 582 algo.run()
583 self._finalize_algorithm(algo)
File ~/Documents/PLAEX/code/Hailo/hailo_env/lib/python3.10/site-packages/hailo_model_optimization/algorithms/optimization_algorithm.py:50, in OptimizationAlgorithm.run(self)
48 def run(self):
49 self.finalize_config()
---> 50 return super().run()
File ~/Documents/PLAEX/code/Hailo/hailo_env/lib/python3.10/site-packages/hailo_model_optimization/algorithms/algorithm_base.py:150, in AlgorithmBase.run(self, memento)
148 self.restore(memento)
149 start_time = time.time()
--> 150 self._run_int()
151 self._results.status = AlgorithmStatus.SUCCESSFULLY_DONE
152 except KeyboardInterrupt:
File ~/Documents/PLAEX/code/Hailo/hailo_env/lib/python3.10/site-packages/hailo_model_optimization/algorithms/matmul_equalization/matmul_equalization.py:123, in MatmulEqualization._run_int(self)
117 self._logger.info(
118 f"{self._name}: Could not be apply because it "
119 f"has an unsuported layer {e.layer_name} on its inputs flow"
120 )
121 return
--> 123 self.equalize_input_paths(normal_subflow, transpose_subflow, layer)
124 self.matmul_propagation(block)
File ~/Documents/PLAEX/code/Hailo/hailo_env/lib/python3.10/site-packages/hailo_model_optimization/algorithms/matmul_equalization/matmul_equalization.py:154, in MatmulEqualization.equalize_input_paths(self, normal_flow, transpose_flow, layer)
151 factors = np.ones(layer.groups, dtype=np.float32)
153 for _ in range(2):
--> 154 set_scales_forward(self._model, normal_flow, layer.groups, factors)
155 set_scales_forward(self._model, transpose_flow, layer.groups, factors, transpose=True)
157 fix_scales_reduce_sum(self._model, transpose_flow, layer.groups, layer, self._logger)
File ~/Documents/PLAEX/code/Hailo/hailo_env/lib/python3.10/site-packages/hailo_model_optimization/algorithms/matmul_equalization/matmul_equalization.py:332, in set_scales_forward(model, flow, groups, factors, transpose)
330 in_node = flow.get_sources()[0]
331 in_layer = model.layers[in_node]
--> 332 scales, zp_points = get_scales_output(in_layer, groups, factors, transpose)
333 in_layer.set_output_scale(scales, 0)
334 in_layer.set_output_zero_point(zp_points, 0)
File ~/Documents/PLAEX/code/Hailo/hailo_env/lib/python3.10/site-packages/hailo_model_optimization/algorithms/matmul_equalization/matmul_equalization.py:359, in get_scales_output(layer, groups, factors, transpose)
357 else:
358 candidate_zp, candidate_scale, _ = limvals_to_zp_scale(layer.get_output_limvals()[0], lossy_element)
--> 359 scales, zp = find_the_best_zp(
360 xmin=min_vals,
361 xmax=max_vals,
362 bins=bins,
363 start_scales=np.repeat(candidate_scale, max_vals.size),
364 start_zp=candidate_zp,
365 )
367 else:
368 scales = np.array(
369 [
370 scale
(...)
374 ]
375 )
File ~/Documents/PLAEX/code/Hailo/hailo_env/lib/python3.10/site-packages/hailo_model_optimization/algorithms/matmul_equalization/optimal_zp_finder.py:237, in find_the_best_zp(xmin, xmax, bins, start_scales, start_zp)
229 cons = [
230 {"type": "ineq", "fun": constraint1},
231 {"type": "ineq", "fun": constraint2},
232 {"type": "ineq", "fun": constraint3},
233 {"type": "ineq", "fun": constraint4},
234 ]
236 # Call minimize
--> 237 result = minimize(objective, x0, method="SLSQP", constraints=cons)
238 if result.success:
239 zp = result.x[-1] # Last element is zp
File ~/Documents/PLAEX/code/Hailo/hailo_env/lib/python3.10/site-packages/scipy/optimize/_minimize.py:722, in minimize(fun, x0, args, method, jac, hess, hessp, bounds, constraints, tol, callback, options)
719 res = _minimize_cobyla(fun, x0, args, constraints, callback=callback,
720 bounds=bounds, **options)
721 elif meth == 'slsqp':
--> 722 res = _minimize_slsqp(fun, x0, args, jac, bounds,
723 constraints, callback=callback, **options)
724 elif meth == 'trust-constr':
725 res = _minimize_trustregion_constr(fun, x0, args, jac, hess, hessp,
726 bounds, constraints,
727 callback=callback, **options)
File ~/Documents/PLAEX/code/Hailo/hailo_env/lib/python3.10/site-packages/scipy/optimize/_slsqp_py.py:431, in _minimize_slsqp(func, x0, args, jac, bounds, constraints, maxiter, ftol, iprint, disp, eps, callback, finite_diff_rel_step, **unknown_options)
427 a = _eval_con_normals(x, cons, la, n, m, meq, mieq)
429 while 1:
430 # Call SLSQP
--> 431 slsqp(m, meq, x, xl, xu, fx, c, g, a, acc, majiter, mode, w, jw,
432 alpha, f0, gs, h1, h2, h3, h4, t, t0, tol,
433 iexact, incons, ireset, itermx, line,
434 n1, n2, n3)
436 if mode == 1: # objective and constraint evaluation required
437 fx = wrapped_fun(x)
ValueError: failed to initialize intent(inout) array -- expected elsize=8 but got 4
Speficially this line of error:
ValueError: failed to initialize intent(inout) array -- expected elsize=8 but got 4
What I do so far is
- Check my dataset if its uploaded and visualized correctly (Pass)
- Change the batch of the dataset (different batches 1, 4 or 8 got the same error)
- Change the data type of the dataset (Float32 to Float34 and vice versa, got the same error)
I know from this previous post that DinoV2 is not currently supported, but could you recommend me some advice to go on? Or should I wait until DinoV2 is officially supported?