Hello, I am on the way to parse an area-attention module but there are some issues with certain layers. Hope I can get some advice about errors I have got
Environment:
dataflow: 3.30.0
torch: 2.4.1
python: 3.10.12
hailo chip: Hailo8
Error:
hailo_sdk_client.model_translator.exceptions.ParsingWithRecommendationException: Parsing failed. The errors found in the graph are:
UnsupportedShuffleLayerError in op /Transpose_5: Failed to determine type of layer to create in node /Transpose_5
UnsupportedShuffleLayerError in op /Transpose_4: Failed to determine type of layer to create in node /Transpose_4
UnsupportedShuffleLayerError in op /Reshape_4: Failed to determine type of layer to create in node /Reshape_4
UnsupportedReduceMaxLayerError in op /ReduceMax: Failed to create reduce max layer at vertex /ReduceMax. Reduce max is only supported on the features axis, and with keepdim=True
UnsupportedShuffleLayerError in op /Reshape_3: Failed to determine type of layer to create in node /Reshape_3
UnsupportedShuffleLayerError in op /Reshape_6: Failed to determine type of layer to create in node /Reshape_6
UnsupportedShuffleLayerError in op /Reshape_5: Failed to determine type of layer to create in node /Reshape_5
UnsupportedShuffleLayerError in op /Transpose_7: Failed to determine type of layer to create in node /Transpose_7
UnsupportedShuffleLayerError in op /Transpose_6: Failed to determine type of layer to create in node /Transpose_6
Please try to parse the model again, using these end node names: /Mul_3, /Slice_3
Code:
import onnx
import torch
from torch import nn
from hailo_sdk_client import ClientRunner
def softmax(logits, axis=-1):
max_values = torch.max(logits, dim=axis, keepdim=True).values
exps = torch.exp(logits - max_values)
return exps / torch.sum(exps, dim=axis, keepdim=True)
def autopad(k, p=None, d=1): # kernel, padding, dilation
"""Pad to 'same' shape outputs."""
if d > 1:
k = d * (k - 1) + 1 if isinstance(k, int) else [d * (x - 1) + 1 for x in k] # actual kernel-size
if p is None:
p = k // 2 if isinstance(k, int) else [x // 2 for x in k] # auto-pad
return p
class Conv(nn.Module):
"""Standard convolution with args(ch_in, ch_out, kernel, stride, padding, groups, dilation, activation)."""
default_act = nn.SiLU() # default activation
def __init__(self, c1, c2, k=1, s=1, p=None, g=1, d=1, act=True):
"""Initialize Conv layer with given arguments including activation."""
super().__init__()
self.conv = nn.Conv2d(c1, c2, k, s, autopad(k, p, d), groups=g, dilation=d, bias=False)
self.bn = nn.BatchNorm2d(c2)
self.act = self.default_act if act is True else act if isinstance(act, nn.Module) else nn.Identity()
def forward(self, x):
"""Apply convolution, batch normalization and activation to input tensor."""
return self.act(self.bn(self.conv(x)))
def forward_fuse(self, x):
"""Apply convolution and activation without batch normalization."""
return self.act(self.conv(x))
class AAttn(nn.Module):
"""
Area-attention module for YOLO models, providing efficient attention mechanisms.
This module implements an area-based attention mechanism that processes input features in a spatially-aware manner,
making it particularly effective for object detection tasks.
Attributes:
area (int): Number of areas the feature map is divided.
num_heads (int): Number of heads into which the attention mechanism is divided.
head_dim (int): Dimension of each attention head.
qkv (Conv): Convolution layer for computing query, key and value tensors.
proj (Conv): Projection convolution layer.
pe (Conv): Position encoding convolution layer.
Methods:
forward: Applies area-attention to input tensor.
Examples:
>>> attn = AAttn(dim=256, num_heads=8, area=4)
>>> x = torch.randn(1, 256, 32, 32)
>>> output = attn(x)
>>> print(output.shape)
torch.Size([1, 256, 32, 32])
"""
def __init__(self, dim, num_heads, area=1):
"""
Initializes an Area-attention module for YOLO models.
Args:
dim (int): Number of hidden channels.
num_heads (int): Number of heads into which the attention mechanism is divided.
area (int): Number of areas the feature map is divided, default is 1.
"""
super().__init__()
self.area = area
self.num_heads = num_heads
self.head_dim = head_dim = dim // num_heads
all_head_dim = head_dim * self.num_heads
self.qkv = Conv(dim, all_head_dim * 3, 1, act=False)
self.proj = Conv(all_head_dim, dim, 1, act=False)
self.pe = Conv(all_head_dim, dim, 7, 1, 3, g=dim, act=False)
self.act = torch.nn.Softmax(dim=-1)
def forward(self, x):
"""Processes the input tensor 'x' through the area-attention."""
B, C, H, W = x.shape
N = H * W
qkv = self.qkv(x).flatten(2).transpose(1, 2)
if self.area > 1:
qkv = qkv.reshape(B * self.area, N // self.area, C * 3)
B, N, _ = qkv.shape
qkv = qkv.view(B, N, self.num_heads, self.head_dim * 3)
qkv = qkv.permute(0, 2, 3, 1)
q, k, v = torch.chunk(qkv, 3, dim=2)
#q, k, v = qkv.split([self.head_dim, self.head_dim, self.head_dim], dim=2)
attn = (q.transpose(-2, -1) @ k) * (self.head_dim**-0.5)
attn = softmax(attn)
#attn = attn.softmax(dim=-1)
x = v @ attn.transpose(-2, -1)
x = x.permute(0, 3, 1, 2)
v = v.permute(0, 3, 1, 2)
if self.area > 1:
x = x.reshape(B // self.area, N * self.area, C)
v = v.reshape(B // self.area, N * self.area, C)
B, N, _ = x.shape
x = x.reshape(B, H, W, C)
x = x.permute(0, 3, 1, 2)
v = v.reshape(B, H, W, C)
v = v.permute(0, 3, 1, 2)
x = x + self.pe(v)
return self.proj(x)
if __name__ == "__main__":
onnx_path = "test.onnx"
model = AAttn(dim=256, num_heads=8, area=4)
model.eval()
# Dummy input in FP32
data_shape = [1, 256, 32, 32]
dummy_input = torch.zeros(data_shape, dtype=torch.float)
# Export to ONNX
torch.onnx.export(
model,
dummy_input,
onnx_path,
export_params=True,
opset_version=15, # Adjust opset version if needed
do_constant_folding=True,
input_names=['input'],
output_names=['output']
)
onnx_model = onnx.load(onnx_path)
onnx.checker.check_model(onnx_model)
onnx_inputs = onnx_model.graph.input
onnx_outputs = onnx_model.graph.output
start_node_name = onnx_inputs[0].name
end_node_name = onnx_outputs[0].name
runner = ClientRunner(hw_arch="hailo8")
_ = runner.translate_onnx_model(
onnx_path,
"parse.har",
start_node_names=[start_node_name],
end_node_names=[end_node_name],
net_input_shapes={start_node_name: data_shape}
)
Extra stuff i have tried but got same error:
- according to Hailo Dataflow Compiler User Guide, permute is not on the supported layers list
qkv.permute(0,2,3,1) -> transpose(1,2).transpose(2,3)
- Following the p138 of User Guide, Features to Columns Reshape Reshaping a tensor from (batch, height, 1, F) to (batch, height, W′, F′), where F =W′· F′.
qkv.view(B, N, self.num_heads, self.head_dim * 3) -> qkv.unsqueeze(2).view(B, N, self.num_heads, self.head_dim * 3)
- created a function for softmax instead of using
attn = attn.softmax(dim=-1)
def softmax(logits, axis=-1):
max_values = torch.max(logits, dim=axis, keepdim=True).values
exps = torch.exp(logits - max_values)
return exps / torch.sum(exps, dim=axis, keepdim=True)