Error compiling custom quantized segformer har

The following is my workflow.

I have a segformer.pth which I convert to simplified onnx.

# Initialize model
segformer_config = SegformerConfig(
    num_channels=3,
    num_labels=num_classes,
    image_size=768,
    label_ignore_index=255
)
model = SegformerForSemanticSegmentation(config=segformer_config)
checkpoint = torch.load(PTH_PATH)
model.load_state_dict(checkpoint["model"], strict=False)
model.eval()

# Dummy input for export
dummy_input = torch.randn(1, 3, 768, 768)
onnx_export_path = "segformer_b0_best.onnx"

# Export to ONNX
torch.onnx.export(
    model,
    dummy_input,
    onnx_export_path,
    export_params=True,
    opset_version=13,
    do_constant_folding=True,
    input_names=["input"],
    output_names=["output"],
    dynamic_axes=None
)
print("✅ Exported original ONNX")

# Validate
onnx_model = onnx.load(onnx_export_path)
onnx.checker.check_model(onnx_model)
print("✅ ONNX model is valid")

# Simplify
simplified_model, check = simplify(onnx_model)
if check:
    simplified_path = "segformer_b0_simplified.onnx"
    onnx.save(simplified_model, simplified_path)
    print(f"✅ Simplified model saved as {simplified_path}")

I then convert the simplified onnx to har

from hailo_sdk_client import ClientRunner

onnx_model_name = 'segformer_b0'
onnx_path = "segformer_b0_simplified.onnx"
hw_arch = 'hailo8l'

runner = ClientRunner(hw_arch=hw_arch)
hn, npz = runner.translate_onnx_model(
    onnx_path,
    onnx_model_name
)

save_file = "segformer_b0.har"
runner.save_har(save_file)
print(f"✅ HAR file saved as {save_file}")

I then optimized the har

# optimize_har.py
from hailo_sdk_client import ClientRunner

# user params
CALIB_PATH       = "resized_val/calib_npy"
MODEL_NAME       = "segformer_b0"
HAR_IN           = f"{MODEL_NAME}.har"
HAR_OUT          = f"{MODEL_NAME}_quantized_model.har"
ALLS_FILE        = "model_script.alls"

runner = ClientRunner(har=HAR_IN)

runner.load_model_script(model_script=ALLS_FILE)

runner.optimize(CALIB_PATH)
runner.save_har(HAR_OUT)
print(f"\n✅ Optimized HAR file saved as {HAR_OUT}")

with the following model_scripts.alls

normalize1 = normalization([123.675, 116.28, 103.53], [58.395, 57.12, 57.375])
bgr_to_rgb = input_conversion(bgr_to_rgb)
quantization_param([segformer_b0/matmul2], force_range_in=[0, 2.5])
quantization_param([segformer_b0/matmul4], force_range_in=[0, 3])

Finally I compile the quantized har

# optimize_har.py
from hailo_sdk_client import ClientRunner

# user params
MODEL_NAME       = "segformer_b0"
HAR_IN          = f"{MODEL_NAME}_quantized_model.har"

runner = ClientRunner(har=HAR_IN)
hef = runner.compile()
file_name = f'{MODEL_NAME}.hef'
with open(file_name, 'wb') as f:
    f.write(hef)
print(f"\n✅ HEF file saved as {MODEL_NAME}.hef")

However, the compilation results in the following

[info] To achieve optimal performance, set the compiler_optimization_level to "max" by adding performance_param(compiler_optimization_level=max) to the model script. Note that this may increase compilation time.
[info] Loading network parameters
[info] Starting Hailo allocation and compilation flow
[info] Building optimization options for network layers...
[info] Successfully built optimization options - 17s 631ms
[info] Trying to compile the network in a single context
[info] Single context flow failed: Recoverable single context error
[info] Building optimization options for network layers...
[info] Successfully built optimization options - 38s 87ms
[info] Using Multi-context flow
[info] Resources optimization params: max_control_utilization=60%, max_compute_utilization=60%, max_compute_16bit_utilization=60%, max_memory_utilization (weights)=60%, max_input_aligner_utilization=60%, max_apu_utilization=60%
[info] Finding the best partition to contexts...
[.<==>...................................] Duration: 00:01:16                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                            
Found valid partition to 13 contexts
[info] Searching for a better partition...
[...........<==>.........................] Duration: 00:00:44                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                            
Found valid partition to 13 contexts, Performance improved by 7.3%
[info] Searching for a better partition...
[...................................<==>.] Elapsed: 00:00:42                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                             
[info] Partition to contexts finished successfully
[info] Partitioner finished after 241 iterations, Time it took: 2m 45s 149ms
[info] Applying selected partition to 13 contexts...
[error] Mapping Failed (allocation time: 3m 45s)
Value doesn't fit in field (1474)

[error] Failed to produce compiled graph
[error] BackendAllocatorException: Compilation failed: Value doesn't fit in field (1474)

Fixed it using a smaller input size : 768x768x3 → 512x512x3