Hello Community,
I use Hailo8L with last beta frigate which contain hailo module developed by Hailo AI team. I use it on the QNAP TS262 storage with Debian and back ported kernel 6.12.22+bpo-amd64 with x86_64 OS. Version of the hailo pci module 4.20.1. The kernel stack trace points to an issue with VDMA buffer mapping in the hailo_pci driver. Please find below log.
best regards,
Patryk
[442539.846650] WARNING: CPU: 0 PID: 4164123 at include/linux/rwsem.h:80 find_vma+0x59/0x70
[442539.846659] Modules linked in: bluetooth msr mptcp_diag xsk_diag raw_diag unix_diag af_packet_diag netlink_diag vhost_net vhost vhost_iotlb tap tun tls tcp_diag udp_diag inet_diag veth nf_conntrack_netlink xt_nat xt_tcpudp xt_conntrack xt_MASQUERADE xt_set ip_set nft_chain_nat nf_nat nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 xt_addrtype nft_compat xfrm_user xfrm_algo squashfs overlay bridge cfg80211 rfkill softdog drivetemp sunrpc binfmt_misc nvidia_drm(POE) nvidia_modeset(POE) nvidia_uvm(POE) nls_ascii nls_cp437 vfat fat snd_ctl_led snd_soc_skl_hda_dsp snd_soc_intel_sof_board_helpers snd_soc_intel_hda_dsp_common snd_sof_probes snd_soc_dmic snd_sof_pci_intel_icl snd_sof_pci_intel_cnl snd_sof_intel_hda_generic soundwire_intel soundwire_generic_allocation soundwire_cadence snd_sof_intel_hda_common snd_soc_hdac_hda snd_sof_intel_hda_mlink snd_sof_intel_hda snd_sof_pci snd_sof_xtensa_dsp snd_sof snd_sof_utils snd_soc_acpi_intel_match snd_soc_acpi soundwire_bus snd_soc_avs x86_pkg_temp_thermal snd_soc_hda_codec
[442539.846713] intel_powerclamp snd_hda_ext_core kvm_intel snd_soc_core snd_compress i915 snd_pcm_dmaengine nvidia(POE) snd_hda_codec_hdmi snd_hda_intel snd_intel_dspcfg snd_intel_sdw_acpi kvm snd_hda_codec zfs(POE) snd_hda_core drm_buddy crct10dif_pclmul mei_pxp ghash_clmulni_intel snd_hwdep mei_hdcp drm_display_helper snd_pcm sha512_ssse3 intel_rapl_msr evdev sha256_ssse3 sha1_ssse3 aesni_intel snd_timer ucsi_acpi cec typec_ucsi snd gf128mul mei_me crypto_simd wdat_wdt ttm cryptd intel_cstate processor_thermal_device_pci_legacy typec ee1004 mei watchdog wmi_bmof processor_thermal_device soundcore pcspkr processor_thermal_wt_hint roles drm_kms_helper processor_thermal_rfim ir_rc6_decoder processor_thermal_rapl intel_rapl_common intel_pmc_core intel_ipu6 rc_rc6_mce ene_ir processor_thermal_wt_req processor_thermal_power_floor rc_core intel_vsec processor_thermal_mbox pmt_telemetry int340x_thermal_zone ipu_bridge pmt_class button i2c_algo_bit intel_soc_dts_iosf acpi_tad acpi_pad spl(OE) sg hailo_pci(OE) i2c_dev coretemp
[442539.846769] 8021q garp stp mrp llc macvlan drm drbd lru_cache dm_crypt nf_tables dm_mod loop nfnetlink efi_pstore configfs ip_tables x_tables autofs4 ext4 crc16 mbcache jbd2 btrfs blake2b_generic efivarfs raid10 raid456 async_raid6_recov async_memcpy async_pq async_xor async_tx xor raid6_pq libcrc32c crc32c_generic raid0 raid1 md_mod sd_mod mmc_block ahci sdhci_pci libahci cqhci libata sdhci xhci_pci scsi_mod mmc_core xhci_hcd nvme usbcore nvme_core crc32_pclmul igc i2c_i801 i2c_smbus crc32c_intel nvme_auth fan usb_common video scsi_common wmi pinctrl_jasperlake
[442539.846817] CPU: 0 UID: 0 PID: 4164123 Comm: frigate.detecto Tainted: P W OE 6.12.22+bpo-amd64 #1 Debian 6.12.22-1~bpo12+1
[442539.846821] Tainted: [P]=PROPRIETARY_MODULE, [W]=WARN, [O]=OOT_MODULE, [E]=UNSIGNED_MODULE
[442539.846822] Hardware name: Default string Default string/Default string, BIOS Q07RAR08 05/31/2023
[442539.846824] RIP: 0010:find_vma+0x59/0x70
[442539.846828] Code: c7 40 48 c7 c2 ff ff ff ff 48 89 e6 e8 a0 1e 91 00 48 8b 54 24 08 65 48 2b 14 25 28 00 00 00 75 0d 48 83 c4 10 c3 cc cc cc cc <0f> 0b eb d0 e8 ce 34 93 00 66 66 2e 0f 1f 84 00 00 00 00 00 0f 1f
[442539.846831] RSP: 0018:ffffa5f207af7c48 EFLAGS: 00010246
[442539.846833] RAX: 0000000000000000 RBX: ffff97bd2d7b8b80 RCX: 0000000000000000
[442539.846835] RDX: 0000000000000000 RSI: 00007f0f7f609000 RDI: ffff97bf3a10a680
[442539.846836] RBP: ffffa5f207af7cd8 R08: 0000000000000080 R09: 0000000000000000
[442539.846837] R10: ffffa5f207af7c60 R11: 0000000000000000 R12: 00007f0f7f609000
[442539.846839] R13: ffff97bd018990c8 R14: 0000000000000000 R15: 0000000000000000
[442539.846840] FS: 00007f0f837fe6c0(0000) GS:ffff97c070000000(0000) knlGS:0000000000000000
[442539.846842] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[442539.846844] CR2: 000055ab9b8e4398 CR3: 000000013ce16000 CR4: 0000000000352ef0
[442539.846846] Call Trace:
[442539.846848]
[442539.846852] ? __warn+0x89/0x130
[442539.846857] ? find_vma+0x59/0x70
[442539.846860] ? report_bug+0x164/0x190
[442539.846865] ? handle_bug+0x58/0x90
[442539.846868] ? exc_invalid_op+0x17/0x70
[442539.846870] ? asm_exc_invalid_op+0x1a/0x20
[442539.846876] ? find_vma+0x59/0x70
[442539.846880] hailo_vdma_buffer_map+0x167/0x7f0 [hailo_pci]
[442539.846888] hailo_vdma_buffer_map_ioctl+0xbd/0x270 [hailo_pci]
[442539.846894] hailo_pcie_fops_unlockedioctl+0x1d6/0x6f0 [hailo_pci]
[442539.846900] __x64_sys_ioctl+0x91/0xd0
[442539.846905] do_syscall_64+0x82/0x190
[442539.846907] ? sched_clock+0x10/0x30
[442539.846910] ? sched_clock_cpu+0xf/0x190
[442539.846914] ? psi_group_change+0x129/0x360
[442539.846916] ? finish_task_switch.isra.0+0x88/0x2d0
[442539.846919] ? __rseq_handle_notify_resume+0xa4/0x4f0
[442539.846923] ? __schedule+0x503/0xbf0
[442539.846926] ? restore_fpregs_from_fpstate+0x3c/0xa0
[442539.846929] ? switch_fpu_return+0x4f/0xd0
[442539.846932] ? clear_bhb_loop+0x25/0x80
[442539.846934] ? clear_bhb_loop+0x25/0x80
[442539.846936] ? clear_bhb_loop+0x25/0x80
[442539.846938] entry_SYSCALL_64_after_hwframe+0x76/0x7e
[442539.846941] RIP: 0033:0x7f0fb8b30d1b
[442539.846943] Code: 00 48 89 44 24 18 31 c0 48 8d 44 24 60 c7 04 24 10 00 00 00 48 89 44 24 08 48 8d 44 24 20 48 89 44 24 10 b8 10 00 00 00 0f 05 <89> c2 3d 00 f0 ff ff 77 1c 48 8b 44 24 18 64 48 2b 04 25 28 00 00
[442539.846945] RSP: 002b:00007f0f837fc550 EFLAGS: 00000246 ORIG_RAX: 0000000000000010
[442539.846948] RAX: ffffffffffffffda RBX: 00007f0f837fc850 RCX: 00007f0fb8b30d1b
[442539.846949] RDX: 00007f0f837fc600 RSI: 00000000c0287604 RDI: 000000000000003f
[442539.846950] RBP: 00000000159d6e90 R08: 0000000000000001 R09: 0000000000000000
[442539.846951] R10: 00000000159d6f70 R11: 0000000000000246 R12: 00000000159d6f20
[442539.846953] R13: 0000000000000001 R14: 00000000159d6e90 R15: 00000000159d6f48
[442539.846955]
[442539.846956] —[ end trace 0000000000000000 ]—