Why official Qwen2-VL-2B-Instruct model doesn't work?

Hello there!

Recently, I have got a new Raspberry PI AI+ 2 hat (Hailo10h)

So, yesterday I tested it a lot and found something that surprised me (in not a good way):

There is the code:

#!/usr/bin/env python3
from pathlib import Path
import sys
import time
import signal

import cv2
import numpy as np
from picamera2 import Picamera2

from hailo_platform import VDevice
from hailo_platform.genai import VLM

HEF_PATH = "/usr/local/hailo/resources/models/hailo10h/Qwen2-VL-2B-Instruct.hef"

PROMPT_TEXT = "Describe what you see in this image in one short paragraph."
SYSTEM_TEXT = "You are a helpful vision assistant."

MODEL_WIDTH = 336
MODEL_HEIGHT = 336

TEMPERATURE = 0.1
MAX_GENERATED_TOKENS = 200
SEED = 42

CAMERA_WIDTH = 1280
CAMERA_HEIGHT = 720

# Delay between completed responses
INFERENCE_INTERVAL_SEC = 2.0

RUNNING = True


def handle_signal(signum, frame):
    global RUNNING
    RUNNING = False


def build_prompt(user_text: str):
    return [
        {
            "role": "system",
            "content": [{"type": "text", "text": SYSTEM_TEXT}],
        },
        {
            "role": "user",
            "content": [
                {"type": "image"},
                {"type": "text", "text": user_text},
            ],
        },
    ]


def init_camera() -> Picamera2:
    picam2 = Picamera2()
    config = picam2.create_preview_configuration(
        main={"size": (CAMERA_WIDTH, CAMERA_HEIGHT), "format": "RGB888"}
    )
    picam2.configure(config)
    picam2.start()
    time.sleep(1.0)
    return picam2


def capture_frame_from_rpi_camera(picam2: Picamera2) -> np.ndarray:
    frame = picam2.capture_array()
    if frame is None:
        raise RuntimeError("Failed to capture frame from Raspberry Pi camera")
    return frame


def preprocess_for_vlm(frame_rgb: np.ndarray) -> np.ndarray:
    if frame_rgb.dtype != np.uint8:
        frame_rgb = frame_rgb.astype(np.uint8)
    return cv2.resize(frame_rgb, (MODEL_WIDTH, MODEL_HEIGHT))


def clean_response(text):
    if not isinstance(text, str):
        return str(text)
    return text.split("<|im_end|>")[0].strip()


def main():
    hef_path = Path(HEF_PATH)
    if not hef_path.is_file():
        print(f"ERROR: HEF file not found: {hef_path}", file=sys.stderr)
        return 1

    signal.signal(signal.SIGINT, handle_signal)
    signal.signal(signal.SIGTERM, handle_signal)

    picam2 = None
    vdevice = None
    vlm = None

    try:
        print("Initializing camera...")
        picam2 = init_camera()

        print("Initializing Hailo VDevice...")
        params = VDevice.create_params()
        vdevice = VDevice(params)

        print("Loading VLM...")
        vlm = VLM(vdevice, str(hef_path))

        print("Running live inference. Press Ctrl+C to stop.\n")

        while RUNNING:
            # Capture a fresh frame for this iteration only
            frame_rgb = capture_frame_from_rpi_camera(picam2)
            input_image = preprocess_for_vlm(frame_rgb)

            try:
                response = vlm.generate_all(
                    prompt=build_prompt(PROMPT_TEXT),
                    frames=[input_image],
                    temperature=TEMPERATURE,
                    seed=SEED,
                    max_generated_tokens=MAX_GENERATED_TOKENS,
                )
                print(f"[{time.strftime('%H:%M:%S')}] {clean_response(response)}\n")

            except Exception as exc:
                print(f"WARNING: inference failed: {exc}", file=sys.stderr)

            # Wait only after response is done, then next loop gets a new frame
            if RUNNING and INFERENCE_INTERVAL_SEC > 0:
                time.sleep(INFERENCE_INTERVAL_SEC)

        print("Stopping...")
        return 0

    except Exception as exc:
        print(f"ERROR: {exc}", file=sys.stderr)
        return 1

    finally:
        if picam2 is not None:
            try:
                picam2.stop()
            except Exception:
                pass
            try:
                picam2.close()
            except Exception:
                pass

        if vlm is not None:
            try:
                vlm.clear_context()
            except Exception:
                pass
            try:
                vlm.release()
            except Exception:
                pass

        if vdevice is not None:
            try:
                vdevice.release()
            except Exception:
                pass


if __name__ == "__main__":
    raise SystemExit(main())

When I use HEF_PATH = “downloaded Qwen from hailo“ it doesn’t work.

What can be reasons for it?

Has anyone had the same issue?

There are logs -
[HailoRT] [error] CHECK_SUCCESS failed with status=HAILO_INVALID_OPERATION(6) - Failed to create VLM [HailoRT] [error] CHECK_SUCCESS failed with status=HAILO_INVALID_OPERATION(6) - Failed to create VLM [HailoRT] [error] CHECK_SUCCESS failed with status=HAILO_INVALID_OPERATION(6)