[Error] TypeError: object of type 'NoneType' has no len()

Hello, I am currently parsing a module and got an error. I really don’t understand why the error occurs as it seems there is no problem with ONNX.

code:

import onnx
import torch
from torch import nn
from hailo_sdk_client import ClientRunner

def autopad(k, p=None, d=1):  # kernel, padding, dilation
    """Pad to 'same' shape outputs."""
    if d > 1:
        k = d * (k - 1) + 1 if isinstance(k, int) else [d * (x - 1) + 1 for x in k]  # actual kernel-size
    if p is None:
        p = k // 2 if isinstance(k, int) else [x // 2 for x in k]  # auto-pad
    return p

class Conv(nn.Module):
    """Standard convolution with args(ch_in, ch_out, kernel, stride, padding, groups, dilation, activation)."""

    default_act = nn.SiLU()  # default activation

    def __init__(self, c1, c2, k=1, s=1, p=None, g=1, d=1, act=True):
        """Initialize Conv layer with given arguments including activation."""
        super().__init__()
        self.conv = nn.Conv2d(c1, c2, k, s, autopad(k, p, d), groups=g, dilation=d, bias=False)
        self.bn = nn.BatchNorm2d(c2)
        self.act = self.default_act if act is True else act if isinstance(act, nn.Module) else nn.Identity()

    def forward(self, x):
        """Apply convolution, batch normalization and activation to input tensor."""
        return self.act(self.bn(self.conv(x)))

    def forward_fuse(self, x):
        """Apply convolution and activation without batch normalization."""
        return self.act(self.conv(x))

class AAttn(nn.Module):
    """
    Area-attention module for YOLO models, providing efficient attention mechanisms.

    This module implements an area-based attention mechanism that processes input features in a spatially-aware manner,
    making it particularly effective for object detection tasks.

    Attributes:
        area (int): Number of areas the feature map is divided.
        num_heads (int): Number of heads into which the attention mechanism is divided.
        head_dim (int): Dimension of each attention head.
        qkv (Conv): Convolution layer for computing query, key and value tensors.
        proj (Conv): Projection convolution layer.
        pe (Conv): Position encoding convolution layer.

    Methods:
        forward: Applies area-attention to input tensor.

    Examples:
        >>> attn = AAttn(dim=256, num_heads=8, area=4)
        >>> x = torch.randn(1, 256, 32, 32)
        >>> output = attn(x)
        >>> print(output.shape)
        torch.Size([1, 256, 32, 32])
    """

    def __init__(self, dim, num_heads, area=1):
        """
        Initializes an Area-attention module for YOLO models.

        Args:
            dim (int): Number of hidden channels.
            num_heads (int): Number of heads into which the attention mechanism is divided.
            area (int): Number of areas the feature map is divided, default is 1.
        """
        super().__init__()
        self.area = area

        self.num_heads = num_heads
        self.head_dim = head_dim = dim // num_heads
        all_head_dim = head_dim * self.num_heads

        self.qkv = Conv(dim, all_head_dim * 3, 1, act=False)
        self.proj = Conv(all_head_dim, dim, 1, act=False)
        self.pe = Conv(all_head_dim, dim, 7, 1, 3, g=dim, act=False)

    def forward(self, x):
        """Processes the input tensor 'x' through the area-attention."""
        B, C, H, W = x.shape
        N = H * W

        qkv = self.qkv(x).flatten(2).transpose(1, 2)
        if self.area > 1:
            qkv = qkv.transpose(-2, -1).unsqueeze(2)
            qkv = qkv.view(B, C * 3, B * self.area, N // self.area)
            qkv = qkv.permute(2, 3, 0, 1)
            B, N, _, _ = qkv.shape
            #qkv = qkv.reshape(B * self.area, N // self.area, C * 3)
            #B, N, _ = qkv.shape
        qkv = qkv.view(B, N, self.num_heads, self.head_dim * 3)
        qkv = qkv.permute(0, 2, 3, 1)
        q, k, v = qkv.split([self.head_dim, self.head_dim, self.head_dim], dim=2)
        #qkv = qkv.unsqueeze(2).view(B, N, self.num_heads, self.head_dim * 3)
        #q, k, v = qkv.split([self.head_dim, self.head_dim, self.head_dim], dim=3)
        #q = q.permute(0, 2, 3, 1)
        #k = k.permute(0, 2, 3, 1)
        #v = v.permute(0, 2, 3, 1)
    
        attn = (q.transpose(-2, -1) @ k) * (self.head_dim**-0.5)
        attn = attn.softmax(dim=-1)
        x = v @ attn.transpose(-2, -1)
        x = x.permute(0, 3, 1, 2)
        v = v.permute(0, 3, 1, 2)

        #if self.area > 1:
            #x = x.reshape(B // self.area, N * self.area, C)
            #v = v.reshape(B // self.area, N * self.area, C)
            #B, N, _ = x.shape
        if self.area > 1:
            B = B // self.area
        x = x.flatten(2).permute(2, 0, 1).flatten(1).unsqueeze(0).unsqueeze(2).reshape(B, C, H, H)
        v = v.flatten(2).permute(2, 0, 1).flatten(1).unsqueeze(0).unsqueeze(2).reshape(B, C, H, H)
        #x = x.reshape(B, H, W, C)
        #x = x.permute(0, 3, 1, 2)
        #v = v.reshape(B, H, W, C)
        #v = v.permute(0, 3, 1, 2)

        x = x + self.pe(v)
        x = self.proj(x)
        return x

if __name__ == "__main__":
    onnx_path = "test.onnx"

    model = AAttn(dim=256, num_heads=4, area=4)
    model.eval()

    # Dummy input in FP32
    data_shape = [1, 256, 40, 40]
    dummy_input = torch.randn(data_shape, dtype=torch.float)

    # Export to ONNX
    torch.onnx.export(
        model,
        dummy_input,
        onnx_path,
        export_params=True,
        opset_version=11,  # Adjust opset version if needed
        do_constant_folding=True,
        input_names=['input'],
        output_names=['output']
    )
    
    onnx_model = onnx.load(onnx_path)
    onnx.checker.check_model(onnx_model)

    onnx_inputs = onnx_model.graph.input
    onnx_outputs = onnx_model.graph.output

    runner = ClientRunner(hw_arch="hailo8")
    _ = runner.translate_onnx_model(
            onnx_path,
            "test_renamed",
            end_node_names=["/proj/conv/Conv"],
            net_input_shapes={"input": data_shape}
        )

    runner.save_har("test.har")

Hey @HappySniper95 ,

Seems one of the functions returns a zero or NoneType , looking at the code i suspect one of the following :

1. Issue in qkv Computation

qkv = self.qkv(x).flatten(2).transpose(1, 2)
  • The self.qkv(x) call may be returning None
  • Check if self.qkv is properly initialized or if x is None

2. split() Function Called on None

q, k, v = qkv.split([self.head_dim, self.head_dim, self.head_dim], dim=2)
  • If qkv is None, this will trigger the error

3. Try updating opset_version=11 to opset_version=13

4. Make sure "/proj/conv/Conv" exists in the model.

We recommend checking these areas and updating your ONNX export to use a more recent opset version for better compatibility.

If this doesn’t work , Can you provide a full log or a more detailed one so i can help you better!

Thank you for your reply.
I have gone through your suggestions but still couldn’t resolve the issue. However, one thing I’ve noticed is that converting the Attention module is possible. Therefore, I guess the issue may have occured due to reshape, split, or permute operations.
I would like to know if there have been any updates on those layers beyond what is covered in the Hailo Dataflow Compilier User Guide(v3.30.0)

Attention module:

class Attention(nn.Module):
    """
    Attention module that performs self-attention on the input tensor.

    Args:
        dim (int): The input tensor dimension.
        num_heads (int): The number of attention heads.
        attn_ratio (float): The ratio of the attention key dimension to the head dimension.

    Attributes:
        num_heads (int): The number of attention heads.
        head_dim (int): The dimension of each attention head.
        key_dim (int): The dimension of the attention key.
        scale (float): The scaling factor for the attention scores.
        qkv (Conv): Convolutional layer for computing the query, key, and value.
        proj (Conv): Convolutional layer for projecting the attended values.
        pe (Conv): Convolutional layer for positional encoding.
    """

    def __init__(self, dim, num_heads=8, attn_ratio=0.5):
        """
        Initialize multi-head attention module.

        Args:
            dim (int): Input dimension.
            num_heads (int): Number of attention heads.
            attn_ratio (float): Attention ratio for key dimension.
        """
        super().__init__()
        self.num_heads = num_heads
        self.head_dim = dim // num_heads
        self.key_dim = int(self.head_dim * attn_ratio)
        self.scale = self.key_dim**-0.5
        nh_kd = self.key_dim * num_heads
        h = dim + nh_kd * 2
        self.qkv = Conv(dim, h, 1, act=False)
        self.proj = Conv(dim, dim, 1, act=False)
        self.pe = Conv(dim, dim, 3, 1, g=dim, act=False)

    def forward(self, x):
        """
        Forward pass of the Attention module.

        Args:
            x (torch.Tensor): The input tensor.

        Returns:
            (torch.Tensor): The output tensor after self-attention.
        """
        B, C, H, W = x.shape
        N = H * W
        qkv = self.qkv(x)
        q, k, v = qkv.view(B, self.num_heads, self.key_dim * 2 + self.head_dim, N).split(
            [self.key_dim, self.key_dim, self.head_dim], dim=2
        )

        attn = (q.transpose(-2, -1) @ k) * self.scale
        attn = attn.softmax(dim=-1)
        x = (v @ attn.transpose(-2, -1)).view(B, C, H, W) + self.pe(v.reshape(B, C, H, W))
        x = self.proj(x)
        return x

I changed two things:

  • The calculation of query, key, and value
  • The use of 4D permutes, replacing them with flattening, 3D permutes, and unsqueeze.

However, the same error still occurs. Can anyone help with the issue?

class AAttn(nn.Module):
    """
    Area-attention module for YOLO models, providing efficient attention mechanisms.

    This module implements an area-based attention mechanism that processes input features in a spatially-aware manner,
    making it particularly effective for object detection tasks.

    Attributes:
        area (int): Number of areas the feature map is divided.
        num_heads (int): Number of heads into which the attention mechanism is divided.
        head_dim (int): Dimension of each attention head.
        qkv (Conv): Convolution layer for computing query, key and value tensors.
        proj (Conv): Projection convolution layer.
        pe (Conv): Position encoding convolution layer.

    Methods:
        forward: Applies area-attention to input tensor.

    Examples:
        >>> attn = AAttn(dim=256, num_heads=8, area=4)
        >>> x = torch.randn(1, 256, 32, 32)
        >>> output = attn(x)
        >>> print(output.shape)
        torch.Size([1, 256, 32, 32])
    """

    def __init__(self, dim, num_heads, area=1):
        """
        Initializes an Area-attention module for YOLO models.

        Args:
            dim (int): Number of hidden channels.
            num_heads (int): Number of heads into which the attention mechanism is divided.
            area (int): Number of areas the feature map is divided, default is 1.
        """
        super().__init__()
        self.area = area

        self.num_heads = num_heads
        self.head_dim = head_dim = dim // num_heads
        all_head_dim = head_dim * self.num_heads

        self.qk = Conv(dim, all_head_dim * 2, 1, act=False)
        self.v = Conv(dim, all_head_dim, 1, act=False)
        self.proj = Conv(all_head_dim, dim, 1, act=False)
        self.pe = Conv(all_head_dim, dim, 5, 1, 2, g=dim, act=False)

    def forward(self, x):
        """Processes the input tensor 'x' through the area-attention."""
        B, C, H, W = x.shape
        N = H * W

        qk = self.qk(x).flatten(2)
        v = self.v(x)
        pp = self.pe(v)
        v = v.flatten(2)

        if self.area > 1:
            qk = qk.unsqueeze(2)
            qk = qk.reshape(1, C * 2, B * self.area, N // self.area)
            qk = qk.transpose(0, 2)
            v = v.unsqueeze(2)
            v = v.reshape(1, C, B * self.area, N // self.area)
            v = v.transpose(0, 2)
            B, _, _, N = qk.shape

        q, k = qk.split([C, C], dim=1)
        q = q.flatten(2)
        q = q.unsqueeze(1)
        q = torch.cat(self.split(q, 2, self.head_dim, self.num_heads), dim=1)
        k = k.flatten(2)
        k = k.unsqueeze(1)
        k = torch.cat(self.split(k, 2, self.head_dim, self.num_heads), dim=1)
        v = v.flatten(2)
        v = v.unsqueeze(1)
        v = torch.cat(self.split(v, 2, self.head_dim, self.num_heads), dim=1)

        attn = (q.transpose(-2, -1) @ k) * (self.head_dim ** -0.5)
        max_attn = attn.max(dim=-1, keepdim=True).values
        exp_attn = torch.exp(attn - max_attn)
        attn = exp_attn / exp_attn.sum(dim=-1, keepdim=True)
        x = (v @ attn.transpose(-2, -1))

        if self.area > 1:
            x = x.transpose(0, 2)
            x = x.flatten(2)
            x = x.permute(2, 0, 1)
            x = x.unsqueeze(0)
            x = x.flatten(2)
            x = x.permute(0, 2, 1)
            B, _, N = x.shape
        x = x.unsqueeze(2)
        x = x.reshape(B, C, H, W)
        x = self.proj(x + pp)
        return x
    
    def split(self, tensor, dim, split_value, split_num):
        res = []
        for num in range(split_num):
            start = split_value * num
            end = split_value * (num + 1)
            if dim == 0:
                res.append(tensor[start:end, :, :, :])
            elif dim == 1:
                res.append(tensor[:, start:end, :, :])
            elif dim == 2:
                res.append(tensor[:, :, start:end, :])
            else:
                res.append(tensor[:, :, :, start:end])
        return res