Hello, I am currently parsing a module and got an error. I really don’t understand why the error occurs as it seems there is no problem with ONNX.
code:
import onnx
import torch
from torch import nn
from hailo_sdk_client import ClientRunner
def autopad(k, p=None, d=1): # kernel, padding, dilation
"""Pad to 'same' shape outputs."""
if d > 1:
k = d * (k - 1) + 1 if isinstance(k, int) else [d * (x - 1) + 1 for x in k] # actual kernel-size
if p is None:
p = k // 2 if isinstance(k, int) else [x // 2 for x in k] # auto-pad
return p
class Conv(nn.Module):
"""Standard convolution with args(ch_in, ch_out, kernel, stride, padding, groups, dilation, activation)."""
default_act = nn.SiLU() # default activation
def __init__(self, c1, c2, k=1, s=1, p=None, g=1, d=1, act=True):
"""Initialize Conv layer with given arguments including activation."""
super().__init__()
self.conv = nn.Conv2d(c1, c2, k, s, autopad(k, p, d), groups=g, dilation=d, bias=False)
self.bn = nn.BatchNorm2d(c2)
self.act = self.default_act if act is True else act if isinstance(act, nn.Module) else nn.Identity()
def forward(self, x):
"""Apply convolution, batch normalization and activation to input tensor."""
return self.act(self.bn(self.conv(x)))
def forward_fuse(self, x):
"""Apply convolution and activation without batch normalization."""
return self.act(self.conv(x))
class AAttn(nn.Module):
"""
Area-attention module for YOLO models, providing efficient attention mechanisms.
This module implements an area-based attention mechanism that processes input features in a spatially-aware manner,
making it particularly effective for object detection tasks.
Attributes:
area (int): Number of areas the feature map is divided.
num_heads (int): Number of heads into which the attention mechanism is divided.
head_dim (int): Dimension of each attention head.
qkv (Conv): Convolution layer for computing query, key and value tensors.
proj (Conv): Projection convolution layer.
pe (Conv): Position encoding convolution layer.
Methods:
forward: Applies area-attention to input tensor.
Examples:
>>> attn = AAttn(dim=256, num_heads=8, area=4)
>>> x = torch.randn(1, 256, 32, 32)
>>> output = attn(x)
>>> print(output.shape)
torch.Size([1, 256, 32, 32])
"""
def __init__(self, dim, num_heads, area=1):
"""
Initializes an Area-attention module for YOLO models.
Args:
dim (int): Number of hidden channels.
num_heads (int): Number of heads into which the attention mechanism is divided.
area (int): Number of areas the feature map is divided, default is 1.
"""
super().__init__()
self.area = area
self.num_heads = num_heads
self.head_dim = head_dim = dim // num_heads
all_head_dim = head_dim * self.num_heads
self.qkv = Conv(dim, all_head_dim * 3, 1, act=False)
self.proj = Conv(all_head_dim, dim, 1, act=False)
self.pe = Conv(all_head_dim, dim, 7, 1, 3, g=dim, act=False)
def forward(self, x):
"""Processes the input tensor 'x' through the area-attention."""
B, C, H, W = x.shape
N = H * W
qkv = self.qkv(x).flatten(2).transpose(1, 2)
if self.area > 1:
qkv = qkv.transpose(-2, -1).unsqueeze(2)
qkv = qkv.view(B, C * 3, B * self.area, N // self.area)
qkv = qkv.permute(2, 3, 0, 1)
B, N, _, _ = qkv.shape
#qkv = qkv.reshape(B * self.area, N // self.area, C * 3)
#B, N, _ = qkv.shape
qkv = qkv.view(B, N, self.num_heads, self.head_dim * 3)
qkv = qkv.permute(0, 2, 3, 1)
q, k, v = qkv.split([self.head_dim, self.head_dim, self.head_dim], dim=2)
#qkv = qkv.unsqueeze(2).view(B, N, self.num_heads, self.head_dim * 3)
#q, k, v = qkv.split([self.head_dim, self.head_dim, self.head_dim], dim=3)
#q = q.permute(0, 2, 3, 1)
#k = k.permute(0, 2, 3, 1)
#v = v.permute(0, 2, 3, 1)
attn = (q.transpose(-2, -1) @ k) * (self.head_dim**-0.5)
attn = attn.softmax(dim=-1)
x = v @ attn.transpose(-2, -1)
x = x.permute(0, 3, 1, 2)
v = v.permute(0, 3, 1, 2)
#if self.area > 1:
#x = x.reshape(B // self.area, N * self.area, C)
#v = v.reshape(B // self.area, N * self.area, C)
#B, N, _ = x.shape
if self.area > 1:
B = B // self.area
x = x.flatten(2).permute(2, 0, 1).flatten(1).unsqueeze(0).unsqueeze(2).reshape(B, C, H, H)
v = v.flatten(2).permute(2, 0, 1).flatten(1).unsqueeze(0).unsqueeze(2).reshape(B, C, H, H)
#x = x.reshape(B, H, W, C)
#x = x.permute(0, 3, 1, 2)
#v = v.reshape(B, H, W, C)
#v = v.permute(0, 3, 1, 2)
x = x + self.pe(v)
x = self.proj(x)
return x
if __name__ == "__main__":
onnx_path = "test.onnx"
model = AAttn(dim=256, num_heads=4, area=4)
model.eval()
# Dummy input in FP32
data_shape = [1, 256, 40, 40]
dummy_input = torch.randn(data_shape, dtype=torch.float)
# Export to ONNX
torch.onnx.export(
model,
dummy_input,
onnx_path,
export_params=True,
opset_version=11, # Adjust opset version if needed
do_constant_folding=True,
input_names=['input'],
output_names=['output']
)
onnx_model = onnx.load(onnx_path)
onnx.checker.check_model(onnx_model)
onnx_inputs = onnx_model.graph.input
onnx_outputs = onnx_model.graph.output
runner = ClientRunner(hw_arch="hailo8")
_ = runner.translate_onnx_model(
onnx_path,
"test_renamed",
end_node_names=["/proj/conv/Conv"],
net_input_shapes={"input": data_shape}
)
runner.save_har("test.har")