The following is my workflow.
I have a segformer.pth which I convert to simplified onnx.
# Initialize model
segformer_config = SegformerConfig(
num_channels=3,
num_labels=num_classes,
image_size=768,
label_ignore_index=255
)
model = SegformerForSemanticSegmentation(config=segformer_config)
checkpoint = torch.load(PTH_PATH)
model.load_state_dict(checkpoint["model"], strict=False)
model.eval()
# Dummy input for export
dummy_input = torch.randn(1, 3, 768, 768)
onnx_export_path = "segformer_b0_best.onnx"
# Export to ONNX
torch.onnx.export(
model,
dummy_input,
onnx_export_path,
export_params=True,
opset_version=13,
do_constant_folding=True,
input_names=["input"],
output_names=["output"],
dynamic_axes=None
)
print("✅ Exported original ONNX")
# Validate
onnx_model = onnx.load(onnx_export_path)
onnx.checker.check_model(onnx_model)
print("✅ ONNX model is valid")
# Simplify
simplified_model, check = simplify(onnx_model)
if check:
simplified_path = "segformer_b0_simplified.onnx"
onnx.save(simplified_model, simplified_path)
print(f"✅ Simplified model saved as {simplified_path}")
I then convert the simplified onnx to har
from hailo_sdk_client import ClientRunner
onnx_model_name = 'segformer_b0'
onnx_path = "segformer_b0_simplified.onnx"
hw_arch = 'hailo8l'
runner = ClientRunner(hw_arch=hw_arch)
hn, npz = runner.translate_onnx_model(
onnx_path,
onnx_model_name
)
save_file = "segformer_b0.har"
runner.save_har(save_file)
print(f"✅ HAR file saved as {save_file}")
I then optimized the har
# optimize_har.py
from hailo_sdk_client import ClientRunner
# user params
CALIB_PATH = "resized_val/calib_npy"
MODEL_NAME = "segformer_b0"
HAR_IN = f"{MODEL_NAME}.har"
HAR_OUT = f"{MODEL_NAME}_quantized_model.har"
ALLS_FILE = "model_script.alls"
runner = ClientRunner(har=HAR_IN)
runner.load_model_script(model_script=ALLS_FILE)
runner.optimize(CALIB_PATH)
runner.save_har(HAR_OUT)
print(f"\n✅ Optimized HAR file saved as {HAR_OUT}")
with the following model_scripts.alls
normalize1 = normalization([123.675, 116.28, 103.53], [58.395, 57.12, 57.375])
bgr_to_rgb = input_conversion(bgr_to_rgb)
quantization_param([segformer_b0/matmul2], force_range_in=[0, 2.5])
quantization_param([segformer_b0/matmul4], force_range_in=[0, 3])
Finally I compile the quantized har
# optimize_har.py
from hailo_sdk_client import ClientRunner
# user params
MODEL_NAME = "segformer_b0"
HAR_IN = f"{MODEL_NAME}_quantized_model.har"
runner = ClientRunner(har=HAR_IN)
hef = runner.compile()
file_name = f'{MODEL_NAME}.hef'
with open(file_name, 'wb') as f:
f.write(hef)
print(f"\n✅ HEF file saved as {MODEL_NAME}.hef")
However, the compilation results in the following
[info] To achieve optimal performance, set the compiler_optimization_level to "max" by adding performance_param(compiler_optimization_level=max) to the model script. Note that this may increase compilation time.
[info] Loading network parameters
[info] Starting Hailo allocation and compilation flow
[info] Building optimization options for network layers...
[info] Successfully built optimization options - 17s 631ms
[info] Trying to compile the network in a single context
[info] Single context flow failed: Recoverable single context error
[info] Building optimization options for network layers...
[info] Successfully built optimization options - 38s 87ms
[info] Using Multi-context flow
[info] Resources optimization params: max_control_utilization=60%, max_compute_utilization=60%, max_compute_16bit_utilization=60%, max_memory_utilization (weights)=60%, max_input_aligner_utilization=60%, max_apu_utilization=60%
[info] Finding the best partition to contexts...
[.<==>...................................] Duration: 00:01:16
Found valid partition to 13 contexts
[info] Searching for a better partition...
[...........<==>.........................] Duration: 00:00:44
Found valid partition to 13 contexts, Performance improved by 7.3%
[info] Searching for a better partition...
[...................................<==>.] Elapsed: 00:00:42
[info] Partition to contexts finished successfully
[info] Partitioner finished after 241 iterations, Time it took: 2m 45s 149ms
[info] Applying selected partition to 13 contexts...
[error] Mapping Failed (allocation time: 3m 45s)
Value doesn't fit in field (1474)
[error] Failed to produce compiled graph
[error] BackendAllocatorException: Compilation failed: Value doesn't fit in field (1474)