Concat17 error during custom yolov8m.pt compilation using DFC

I have a custom yolov8m model for which I’d like to run inference on the Hailo-8L Entry-Level AI Accelerator. I’m using the Dataflow Compiler to try and convert my .pt file into a .hef file. I am trying to use the notebooks from the tutorials (I accessed them by running hailo tutorialin Ubuntu), and changing the arguments from those for ResNet to those for yolov8m. I first converted my .pt model into a .onnx, and then followed the tutorial. I received no issues during the first two steps (parsing and optimization), but I am receiving this error while trying to compile using runner.compile().

No successful assignments: concat17 errors:
Agent infeasible


BackendAllocatorException Traceback (most recent call last)
Cell In[14], line 1
----> 1 hef = runner.compile()
3 file_name = f"{model_name}.hef"
4 with open(file_name, “wb”) as f:

File ~/yolo/lib/python3.10/site-packages/hailo_sdk_client/runner/client_runner.py:896, in ClientRunner.compile(self)
884 def compile(self):
885 “”"
886 DFC API for compiling current model to Hailo hardware.
887
(…)
894
895 “”"
→ 896 return self._compile()

File ~/yolo/lib/python3.10/site-packages/hailo_sdk_common/states/states.py:16, in allowed_states…wrap…wrapped_func(self, *args, **kwargs)
12 if self._state not in states:
13 raise InvalidStateException(
14 f"The execution of {func.name} is not available under the state: {self._state.value}",
15 )
—> 16 return func(self, *args, **kwargs)

File ~/yolo/lib/python3.10/site-packages/hailo_sdk_client/runner/client_runner.py:1113, in ClientRunner._compile(self, fps, mapping_timeout, allocator_script_filename)
1107 self._logger.warning(
1108 f"Taking model script commands from {allocator_script_filename} and ignoring "
1109 f"previous allocation script commands",
1110 )
1111 self.load_model_script(allocator_script_filename)
→ 1113 serialized_hef = self._sdk_backend.compile(fps, self.model_script, mapping_timeout)
1115 self._auto_model_script = self._sdk_backend.get_auto_alls()
1116 self._state = States.COMPILED_SLIM_MODEL if orig_state in SLIM_STATES else States.COMPILED_MODEL

File ~/yolo/lib/python3.10/site-packages/hailo_sdk_client/sdk_backend/sdk_backend.py:1763, in SdkBackendCompilation.compile(self, fps, allocator_script, mapping_timeout)
1761 def compile(self, fps, allocator_script=None, mapping_timeout=None):
1762 self._model.fill_default_quantization_params(logger=self._logger)
→ 1763 hef, mapped_graph_file = self._compile(fps, allocator_script, mapping_timeout)
1764 # TODO: Jira
1765 if not SDKPaths().is_internal:

File ~/yolo/lib/python3.10/site-packages/hailo_sdk_client/sdk_backend/sdk_backend.py:1757, in SdkBackendCompilation._compile(self, fps, allocator_script, mapping_timeout)
1751 if not model_params and self.requires_quantized_weights:
1752 raise BackendRuntimeException(
1753 "Model requires quantized weights in order to run on HW, but none were given. "
1754 “Did you forget to quantize?”,
1755 )
→ 1757 hef, mapped_graph_file, auto_alls = self.hef_full_build(fps, mapping_timeout, model_params, allocator_script)
1758 self._auto_alls = auto_alls
1759 return hef, mapped_graph_file

File ~/yolo/lib/python3.10/site-packages/hailo_sdk_client/sdk_backend/sdk_backend.py:1733, in SdkBackendCompilation.hef_full_build(self, fps, mapping_timeout, params, allocator_script)
1731 config_paths = ConfigPaths(self._hw_arch, self._model.name)
1732 config_paths.set_stage(“inference”)
→ 1733 auto_alls, self._hef_data, self._integrated_graph = allocator.create_mapping_and_full_build_hef(
1734 config_paths.get_path(“network_graph”),
1735 config_paths.get_path(“mapped_graph”),
1736 config_paths.get_path(“compilation_output_proto”),
1737 params=params,
1738 allocator_script=allocator_script,
1739 compiler_statistics_path=config_paths.get_path(“compiler_statistics”),
1740 nms_metadata=self._nms_metadata,
1741 har=self.har,
1742 alls_ignore_invalid_cmds=self._alls_ignore_invalid_cmds,
1743 )
1745 return self._hef_data, config_paths.get_path(“mapped_graph”), auto_alls

File ~/yolo/lib/python3.10/site-packages/hailo_sdk_client/allocator/hailo_tools_runner.py:764, in HailoToolsRunner.create_mapping_and_full_build_hef(self, network_graph_path, output_path, compilation_output_proto, agent, strategy, auto_mapping, params, expected_output_tensor, expected_pre_acts, network_inputs, network_outputs, allocator_script, allocator_script_mode, compiler_statistics_path, nms_metadata, har, alls_ignore_invalid_cmds)
759 if self.hn.net_params.clusters_placement != :
760 assert (
761 len(self.hn.net_params.clusters_placement) <= self._number_of_clusters
762 ), “Number of clusters in layer placements is larger than allowed number of clusters”
→ 764 self.call_builder(
765 network_graph_path,
766 output_path,
767 compilation_output_proto=compilation_output_proto,
768 agent=agent,
769 strategy=strategy,
770 exit_point=BuilderExitPoint.POST_CAT,
771 params=params,
772 expected_output_tensor=expected_output_tensor,
773 expected_pre_acts=expected_pre_acts,
774 network_inputs=network_inputs,
775 network_outputs=network_outputs,
776 allocator_script=allocator_script,
777 allocator_script_mode=allocator_script_mode,
778 compiler_statistics_path=compiler_statistics_path,
779 nms_metadata=nms_metadata,
780 har=har,
781 alls_ignore_invalid_cmds=alls_ignore_invalid_cmds,
782 )
784 return self._auto_alls, self._output_hef_data, self._output_integrated_pb_graph

File ~/yolo/lib/python3.10/site-packages/hailo_sdk_client/allocator/hailo_tools_runner.py:696, in HailoToolsRunner.call_builder(self, network_graph_path, output_path, blind_deserialize, **kwargs)
694 sys.excepthook = _hailo_tools_exception_hook
695 try:
→ 696 self.run_builder(network_graph_path, output_path, **kwargs)
697 except BackendInternalException:
698 try:

File ~/yolo/lib/python3.10/site-packages/hailo_sdk_client/allocator/hailo_tools_runner.py:570, in HailoToolsRunner.run_builder(self, network_graph_filename, output_filename, compilation_output_proto, agent, strategy, exit_point, params, expected_output_tensor, expected_pre_acts, network_inputs, network_outputs, allocator_script, allocator_script_mode, compiler_statistics_path, is_debug, nms_metadata, har, alls_ignore_invalid_cmds)
568 compiler_msg = e.hailo_tools_error
569 if compiler_msg:
→ 570 raise e.internal_exception(“Compilation failed:”, hailo_tools_error=compiler_msg) from None
571 else:
572 raise e.internal_exception(“Compilation failed with unexpected crash”) from None

BackendAllocatorException: Compilation failed: No successful assignments: concat17 errors:
Agent infeasible

What is the issue here, and what can I do to resolve it? Any help is appreciated, thank you!

Hey @Ajitesh_Dasaratha,

This issue can arise from the concat not being in the specific way we support it:

Concat layers are supported in Hailo but with specific limitations:

  • Only supports concatenation in the features dimension
  • Limited to 4 inputs maximum
  • Concat command handles only two input layers at a time in model scripts

Your “Agent Infeasible” error likely means:

  1. Your concat operation has too many inputs
  2. The concat operation is too large for direct implementation

Solution:

  1. Split large concat operations into sequential smaller ones:
concat1 = concat([layer1, layer2], intermediate_concat)
concat2 = concat([intermediate_concat, layer3], final_concat)
  1. Alternative approaches:
  • Use Output Multiplexing (output_mux) for tensors with different scales/dimensions
  • Consider feature splitting (feature_splitter) or shape splitting (shape_splitter)

Hello, could you elaborate on your splitting solution please?

Where exactly would i use the below?

concat1 = concat([layer1, layer2], intermediate_concat)
concat2 = concat([intermediate_concat, layer3], final_concat)

Thanks!

Hey @Liam_D_IMK ,

Approach 1: Fix it in PyTorch before export

Create a simple helper function that chains multiple 2-input concats:

def chain_cat(tensors, dim=1):
    result = tensors[0]
    for tensor in tensors[1:]:
        result = torch.cat([result, tensor], dim=dim)
    return result

Then replace your multi-input torch.cat() calls with this function. For YOLOv8, you’ll mainly need to patch the SPPF module since that’s where the 4-way concat typically happens.

Example for SPPF (pattern is similar in custom blocks):
import torch
import torch.nn as nn
from ultralytics import YOLO

# Example SPPF with chain_cat
class SPPFChain(nn.Module):
    def __init__(self, c1, c2, k=5):
        super().__init__()
        c_ = c1 // 2
        self.cv1 = nn.Conv2d(c1, c_, 1, 1, 0)
        self.cv2 = nn.Conv2d(c_ * 4, c2, 1, 1, 0)
        self.m   = nn.MaxPool2d(kernel_size=k, stride=1, padding=k // 2)

    def forward(self, x):
        x = self.cv1(x)
        y1 = self.m(x)
        y2 = self.m(y1)
        y3 = self.m(y2)
        # Replace 4-way cat with chained 2‑input cats (axis=channels)
        y  = chain_cat([x, y1, y2, y3], dim=1)
        return self.cv2(y)

# Load model and surgically replace the SPPF (or any module that uses multi-cat)
m = YOLO('yolov8m.pt').model

def replace_sppf(module):
    for name, child in list(module.named_children()):
        # Heuristic: if it "looks like" SPPF, or match your known class
        if child.__class__.__name__ == 'SPPF':
            # You must pass the correct channel sizes from original child
            c1 = child.cv1.in_channels
            c2 = child.cv2.out_channels
            k  = child.m.kernel_size
            setattr(module, name, SPPFChain(c1, c2, k))
        else:
            replace_sppf(child)

replace_sppf(m)

# (Optional) If you know other blocks use multi-cat, patch them similarly.

# Export to ONNX
dummy = torch.randn(1, 3, 640, 640)
torch.onnx.export(
    m, dummy, "yolov8m_fixed.onnx",
    opset_version=12, do_constant_folding=True,
    input_names=["images"], output_names=["output"]
)

Approach 2: Fix the ONNX after export

If you don’t want to modify the PyTorch code, you can post-process the ONNX file to split multi-input Concat nodes into chains of binary operations. This involves walking through the graph, finding problematic Concat nodes, and replacing them with a sequence of 2-input concatenations.

My recommendation: Go with Approach 1 if possible. It’s cleaner, more maintainable, and you’ll catch any issues during the PyTorch-to-ONNX conversion rather than debugging a post-processed graph.

Key things to remember:

  • Always concat along the channel dimension (dim=1 for NCHW)
  • Make sure tensor dimensions match except for the concat axis
  • Test the modified model before sending it to Hailo compilation

Let me know if you need help adapting it to your specific model architecture.

1 Like

Thanks for the feedback!

I’ll try this next week and come back to you if I have any more questions.

1 Like