Hailo DFC 3.32.0 compile error - Agent infeasible

I’ve seen a few posts regarding “Agent infeasible” errors and they all seemed to have been fixed by aiming to take up less memory on the intended hailo device, normally by using smaller models, smaller input resolutions, or smaller batch sizes.

I’ve put a lot of time into trying to produce a useble .hef file over the past few weeks, and today I decided to try and produce a model that probably won’t be capable enough for my use case just to see if I can get something small over the line.

I’ve successfully parsed and optimised 2x models with the DFC tutorial feature in Jupyter, 1 is yolov8s with 640x640 input, and the other is yolov8s with 960x960 input.

When i try to compile the 640x640 optimized har:

[info] To achieve optimal performance, set the compiler_optimization_level to "max" by adding performance_param(compiler_optimization_level=max) to the model script. Note that this may increase compilation time.
[info] Loading network parameters
[info] Starting Hailo allocation and compilation flow
[info] Building optimization options for network layers...
[info] Successfully built optimization options - 2s 720ms
[info] Trying to compile the network in a single context
[info] Single context flow failed: Recoverable single context error
[info] Building optimization options for network layers...
[info] Successfully built optimization options - 4s 883ms
[error] Mapping Failed (allocation time: 4s)

[error] Failed to produce compiled graph

No successful assignments: concat17 errors:
	Agent infeasible


---------------------------------------------------------------------------
BackendAllocatorException                 Traceback (most recent call last)
Cell In[4], line 1
----> 1 hef = runner.compile()
      2 file_name = f"{model_name}.hef"
      3 with open(file_name, "wb") as f:

File ~/projects/vision/venv/lib/python3.10/site-packages/hailo_sdk_client/runner/client_runner.py:911, in ClientRunner.compile(self)
    899 def compile(self):
    900     """
    901     DFC API for compiling current model to Hailo hardware.
    902 
   (...)
    909 
    910     """
--> 911     return self._compile()

File ~/projects/vision/venv/lib/python3.10/site-packages/hailo_sdk_common/states/states.py:16, in allowed_states.<locals>.wrap.<locals>.wrapped_func(self, *args, **kwargs)
     12 if self._state not in states:
     13     raise InvalidStateException(
     14         f"The execution of {func.__name__} is not available under the state: {self._state.value}",
     15     )
---> 16 return func(self, *args, **kwargs)

File ~/projects/vision/venv/lib/python3.10/site-packages/hailo_sdk_client/runner/client_runner.py:1128, in ClientRunner._compile(self, fps, mapping_timeout, allocator_script_filename)
   1122         self._logger.warning(
   1123             f"Taking model script commands from {allocator_script_filename} and ignoring "
   1124             f"previous allocation script commands",
   1125         )
   1126     self.load_model_script(allocator_script_filename)
-> 1128 serialized_hef = self._sdk_backend.compile(fps, self.model_script, mapping_timeout)
   1130 self._auto_model_script = self._sdk_backend.get_auto_alls()
   1131 self._state = States.COMPILED_SLIM_MODEL if orig_state in SLIM_STATES else States.COMPILED_MODEL

File ~/projects/vision/venv/lib/python3.10/site-packages/hailo_sdk_client/sdk_backend/sdk_backend.py:1910, in SdkBackendCompilation.compile(self, fps, allocator_script, mapping_timeout)
   1908 def compile(self, fps, allocator_script=None, mapping_timeout=None):
   1909     self._model.fill_default_quantization_params(logger=self._logger)
-> 1910     hef, mapped_graph_file = self._compile(fps, allocator_script, mapping_timeout)
   1911     # TODO: https://hailotech.atlassian.net/browse/SDK-31038
   1912     if not SDKPaths().is_internal:

File ~/projects/vision/venv/lib/python3.10/site-packages/hailo_sdk_client/sdk_backend/sdk_backend.py:1904, in SdkBackendCompilation._compile(self, fps, allocator_script, mapping_timeout)
   1898 if not model_params and self.requires_quantized_weights:
   1899     raise BackendRuntimeException(
   1900         "Model requires quantized weights in order to run on HW, but none were given. "
   1901         "Did you forget to quantize?",
   1902     )
-> 1904 hef, mapped_graph_file, auto_alls = self.hef_full_build(fps, mapping_timeout, model_params, allocator_script)
   1905 self._auto_alls = auto_alls
   1906 return hef, mapped_graph_file

File ~/projects/vision/venv/lib/python3.10/site-packages/hailo_sdk_client/sdk_backend/sdk_backend.py:1880, in SdkBackendCompilation.hef_full_build(self, fps, mapping_timeout, params, allocator_script)
   1878 config_paths = ConfigPaths(self._hw_arch, self._model.name)
   1879 config_paths.set_stage("inference")
-> 1880 auto_alls, self._hef_data, self._integrated_graph = allocator.create_mapping_and_full_build_hef(
   1881     config_paths.get_path("network_graph"),
   1882     config_paths.get_path("mapped_graph"),
   1883     config_paths.get_path("compilation_output_proto"),
   1884     params=params,
   1885     allocator_script=allocator_script,
   1886     compiler_statistics_path=config_paths.get_path("compiler_statistics"),
   1887     nms_metadata=self._nms_metadata,
   1888     har=self.har,
   1889     alls_ignore_invalid_cmds=self._alls_ignore_invalid_cmds,
   1890 )
   1892 return self._hef_data, config_paths.get_path("mapped_graph"), auto_alls

File ~/projects/vision/venv/lib/python3.10/site-packages/hailo_sdk_client/allocator/hailo_tools_runner.py:764, in HailoToolsRunner.create_mapping_and_full_build_hef(self, network_graph_path, output_path, compilation_output_proto, agent, strategy, auto_mapping, params, expected_output_tensor, expected_pre_acts, network_inputs, network_outputs, allocator_script, allocator_script_mode, compiler_statistics_path, nms_metadata, har, alls_ignore_invalid_cmds)
    759 if self.hn.net_params.clusters_placement != [[]]:
    760     assert (
    761         len(self.hn.net_params.clusters_placement) <= self._number_of_clusters
    762     ), "Number of clusters in layer placements is larger than allowed number of clusters"
--> 764 self.call_builder(
    765     network_graph_path,
    766     output_path,
    767     compilation_output_proto=compilation_output_proto,
    768     agent=agent,
    769     strategy=strategy,
    770     exit_point=BuilderExitPoint.POST_CAT,
    771     params=params,
    772     expected_output_tensor=expected_output_tensor,
    773     expected_pre_acts=expected_pre_acts,
    774     network_inputs=network_inputs,
    775     network_outputs=network_outputs,
    776     allocator_script=allocator_script,
    777     allocator_script_mode=allocator_script_mode,
    778     compiler_statistics_path=compiler_statistics_path,
    779     nms_metadata=nms_metadata,
    780     har=har,
    781     alls_ignore_invalid_cmds=alls_ignore_invalid_cmds,
    782 )
    784 return self._auto_alls, self._output_hef_data, self._output_integrated_pb_graph

File ~/projects/vision/venv/lib/python3.10/site-packages/hailo_sdk_client/allocator/hailo_tools_runner.py:696, in HailoToolsRunner.call_builder(self, network_graph_path, output_path, blind_deserialize, **kwargs)
    694 sys.excepthook = _hailo_tools_exception_hook
    695 try:
--> 696     self.run_builder(network_graph_path, output_path, **kwargs)
    697 except BackendInternalException:
    698     try:

File ~/projects/vision/venv/lib/python3.10/site-packages/hailo_sdk_client/allocator/hailo_tools_runner.py:570, in HailoToolsRunner.run_builder(self, network_graph_filename, output_filename, compilation_output_proto, agent, strategy, exit_point, params, expected_output_tensor, expected_pre_acts, network_inputs, network_outputs, allocator_script, allocator_script_mode, compiler_statistics_path, is_debug, nms_metadata, har, alls_ignore_invalid_cmds)
    568 compiler_msg = e.hailo_tools_error
    569 if compiler_msg:
--> 570     raise e.internal_exception("Compilation failed:", hailo_tools_error=compiler_msg) from None
    571 else:
    572     raise e.internal_exception("Compilation failed with unexpected crash") from None

BackendAllocatorException: Compilation failed: No successful assignments: concat17 errors:
	Agent infeasible

When i try the 960x960 optimized har:

[info] To achieve optimal performance, set the compiler_optimization_level to "max" by adding performance_param(compiler_optimization_level=max) to the model script. Note that this may increase compilation time.
[info] Loading network parameters
[info] Starting Hailo allocation and compilation flow
[info] Building optimization options for network layers...
[info] Successfully built optimization options - 9s 276ms
[info] Trying to compile the network in a single context
[info] Single context flow failed: Recoverable single context error
[info] Building optimization options for network layers...
[info] Successfully built optimization options - 15s 384ms
[error] Mapping Failed (allocation time: 15s)

[error] Failed to produce compiled graph

No successful assignments: concat17 errors:
	Agent infeasible by resources sanity.: Memory units capacity exceeded (available: 128, required: 161).



---------------------------------------------------------------------------
BackendAllocatorException                 Traceback (most recent call last)
Cell In[4], line 1
----> 1 hef = runner.compile()
      2 file_name = f"{model_name}.hef"
      3 with open(file_name, "wb") as f:

File ~/projects/vision/venv/lib/python3.10/site-packages/hailo_sdk_client/runner/client_runner.py:911, in ClientRunner.compile(self)
    899 def compile(self):
    900     """
    901     DFC API for compiling current model to Hailo hardware.
    902 
   (...)
    909 
    910     """
--> 911     return self._compile()

File ~/projects/vision/venv/lib/python3.10/site-packages/hailo_sdk_common/states/states.py:16, in allowed_states.<locals>.wrap.<locals>.wrapped_func(self, *args, **kwargs)
     12 if self._state not in states:
     13     raise InvalidStateException(
     14         f"The execution of {func.__name__} is not available under the state: {self._state.value}",
     15     )
---> 16 return func(self, *args, **kwargs)

File ~/projects/vision/venv/lib/python3.10/site-packages/hailo_sdk_client/runner/client_runner.py:1128, in ClientRunner._compile(self, fps, mapping_timeout, allocator_script_filename)
   1122         self._logger.warning(
   1123             f"Taking model script commands from {allocator_script_filename} and ignoring "
   1124             f"previous allocation script commands",
   1125         )
   1126     self.load_model_script(allocator_script_filename)
-> 1128 serialized_hef = self._sdk_backend.compile(fps, self.model_script, mapping_timeout)
   1130 self._auto_model_script = self._sdk_backend.get_auto_alls()
   1131 self._state = States.COMPILED_SLIM_MODEL if orig_state in SLIM_STATES else States.COMPILED_MODEL

File ~/projects/vision/venv/lib/python3.10/site-packages/hailo_sdk_client/sdk_backend/sdk_backend.py:1910, in SdkBackendCompilation.compile(self, fps, allocator_script, mapping_timeout)
   1908 def compile(self, fps, allocator_script=None, mapping_timeout=None):
   1909     self._model.fill_default_quantization_params(logger=self._logger)
-> 1910     hef, mapped_graph_file = self._compile(fps, allocator_script, mapping_timeout)
   1911     # TODO: https://hailotech.atlassian.net/browse/SDK-31038
   1912     if not SDKPaths().is_internal:

File ~/projects/vision/venv/lib/python3.10/site-packages/hailo_sdk_client/sdk_backend/sdk_backend.py:1904, in SdkBackendCompilation._compile(self, fps, allocator_script, mapping_timeout)
   1898 if not model_params and self.requires_quantized_weights:
   1899     raise BackendRuntimeException(
   1900         "Model requires quantized weights in order to run on HW, but none were given. "
   1901         "Did you forget to quantize?",
   1902     )
-> 1904 hef, mapped_graph_file, auto_alls = self.hef_full_build(fps, mapping_timeout, model_params, allocator_script)
   1905 self._auto_alls = auto_alls
   1906 return hef, mapped_graph_file

File ~/projects/vision/venv/lib/python3.10/site-packages/hailo_sdk_client/sdk_backend/sdk_backend.py:1880, in SdkBackendCompilation.hef_full_build(self, fps, mapping_timeout, params, allocator_script)
   1878 config_paths = ConfigPaths(self._hw_arch, self._model.name)
   1879 config_paths.set_stage("inference")
-> 1880 auto_alls, self._hef_data, self._integrated_graph = allocator.create_mapping_and_full_build_hef(
   1881     config_paths.get_path("network_graph"),
   1882     config_paths.get_path("mapped_graph"),
   1883     config_paths.get_path("compilation_output_proto"),
   1884     params=params,
   1885     allocator_script=allocator_script,
   1886     compiler_statistics_path=config_paths.get_path("compiler_statistics"),
   1887     nms_metadata=self._nms_metadata,
   1888     har=self.har,
   1889     alls_ignore_invalid_cmds=self._alls_ignore_invalid_cmds,
   1890 )
   1892 return self._hef_data, config_paths.get_path("mapped_graph"), auto_alls

File ~/projects/vision/venv/lib/python3.10/site-packages/hailo_sdk_client/allocator/hailo_tools_runner.py:764, in HailoToolsRunner.create_mapping_and_full_build_hef(self, network_graph_path, output_path, compilation_output_proto, agent, strategy, auto_mapping, params, expected_output_tensor, expected_pre_acts, network_inputs, network_outputs, allocator_script, allocator_script_mode, compiler_statistics_path, nms_metadata, har, alls_ignore_invalid_cmds)
    759 if self.hn.net_params.clusters_placement != [[]]:
    760     assert (
    761         len(self.hn.net_params.clusters_placement) <= self._number_of_clusters
    762     ), "Number of clusters in layer placements is larger than allowed number of clusters"
--> 764 self.call_builder(
    765     network_graph_path,
    766     output_path,
    767     compilation_output_proto=compilation_output_proto,
    768     agent=agent,
    769     strategy=strategy,
    770     exit_point=BuilderExitPoint.POST_CAT,
    771     params=params,
    772     expected_output_tensor=expected_output_tensor,
    773     expected_pre_acts=expected_pre_acts,
    774     network_inputs=network_inputs,
    775     network_outputs=network_outputs,
    776     allocator_script=allocator_script,
    777     allocator_script_mode=allocator_script_mode,
    778     compiler_statistics_path=compiler_statistics_path,
    779     nms_metadata=nms_metadata,
    780     har=har,
    781     alls_ignore_invalid_cmds=alls_ignore_invalid_cmds,
    782 )
    784 return self._auto_alls, self._output_hef_data, self._output_integrated_pb_graph

File ~/projects/vision/venv/lib/python3.10/site-packages/hailo_sdk_client/allocator/hailo_tools_runner.py:696, in HailoToolsRunner.call_builder(self, network_graph_path, output_path, blind_deserialize, **kwargs)
    694 sys.excepthook = _hailo_tools_exception_hook
    695 try:
--> 696     self.run_builder(network_graph_path, output_path, **kwargs)
    697 except BackendInternalException:
    698     try:

File ~/projects/vision/venv/lib/python3.10/site-packages/hailo_sdk_client/allocator/hailo_tools_runner.py:570, in HailoToolsRunner.run_builder(self, network_graph_filename, output_filename, compilation_output_proto, agent, strategy, exit_point, params, expected_output_tensor, expected_pre_acts, network_inputs, network_outputs, allocator_script, allocator_script_mode, compiler_statistics_path, is_debug, nms_metadata, har, alls_ignore_invalid_cmds)
    568 compiler_msg = e.hailo_tools_error
    569 if compiler_msg:
--> 570     raise e.internal_exception("Compilation failed:", hailo_tools_error=compiler_msg) from None
    571 else:
    572     raise e.internal_exception("Compilation failed with unexpected crash") from None

BackendAllocatorException: Compilation failed: No successful assignments: concat17 errors:
	Agent infeasible by resources sanity.: Memory units capacity exceeded (available: 128, required: 161).

What can I do to overcome this? And how can I make the solution repeatable so I can use this device as intended?

As I said, I don’t think the model input sizes will allow detection of some of the small objects for my use case, so i’d ideally like to understand how to use the compiler for any potential input.

Thanks in advance for any help!

Hey @Liam_D_IMK,

You’re spot on about that error. I’ve seen this before - it’s usually the concat17 operation combined with trying to use more than 128 memory units that causes the headache.

The typical culprits are:

  • Really large tensors getting concatenated together
  • Too many layers cramped into one cluster context
  • Input resolution or intermediate feature maps just blowing past the cluster memory limits

Quick question before we dive into troubleshooting your setup - are you running this straight from the ONNX compile, or did you go through the full pipeline (ONNX → HAR parsed → HAR optimized/quantized) and it’s now choking at the compilation stage?

Here’s what I’d try first:

Step 1 - Crank up the compiler optimization to max:

performance_param(compiler_optimization_level=max)

This tells the DFC to really go aggressive with layout, quantization, and scheduling optimizations.

Step 2 - Check the auto-generated debug file:
When compilation fails, the SDK should drop an .alls file for you automatically. Load it up and take a look:

runner.load_model_script("auto_model_script.alls")

You can actually edit this file to manually tweak cluster placements or mess with layer settings around that problematic concat17.

Step 3 - Manual cluster placement:
If the automatic placement isn’t working out, you can force specific layers onto different clusters:

layer("concat17").cluster_placement(0)

If concat17 is merging a bunch of large inputs, you might want to spread those other layers across different clusters too.

Let me know how it goes!

Hello @omria, thanks for your response.

Ref your question, I am going through each step of the pipeline as guided by the Hailo Tutorial in Jupyter notebook (Using the ‘quick optimisation’ branch). Other than file paths, the only things i changed are below:

Start and end nodes

runner = ClientRunner(hw_arch=chosen_hw_arch)
hn, npz = runner.translate_onnx_model(
onnx_path,
onnx_model_name,
start_node_names=[“images”],
end_node_names=[“/model.22/Concat_3”]

Image sizes (1280 x 1280 input size example)

def preproc(image, output_height=1280, output_width=1280, resize_side=1280):
“”“imagenet-standard: aspect-preserving resize to 256px smaller-side, then central-crop to 224px”“”
with eager_mode():
h, w = image.shape[0], image.shape[1]
scale = tf.cond(tf.less(h, w), lambda: resize_side / h, lambda: resize_side / w)
resized_image = tf.image.resize(tf.expand_dims(image, 0), [int(h * scale), int(w * scale)])
cropped_image = tf.image.resize_with_crop_or_pad(resized_image, output_height, output_width)

return tf.squeeze(cropped_image)

images_path = “/path_to *cal_*images/”
images_list = [img_name for img_name in os.listdir(images_path) if os.path.splitext(img_name)[1] == “.jpg”]

calib_dataset = np.zeros((len(images_list), 1280, 1280, 3))
for idx, img_name in enumerate(sorted(images_list)):
img = np.array(Image.open(os.path.join(images_path, img_name)))
img_preproc = preproc(img)
calib_dataset[idx, :, :, :] = img_preproc.numpy()

np.save(“calib_set.npy”, calib_dataset)

I’ll try out your suggestions next week, let me know if this response changes any of it please.

Thanks!

1 Like