Hello there!
Recently, I have got a new Raspberry PI AI+ 2 hat (Hailo10h)
So, yesterday I tested it a lot and found something that surprised me (in not a good way):
- I downloaded compiled .hef models Qwen2-VL-2B-Instruct from here - Qwen2‑VL 2B Image‑Text (Vision‑Language) Model | Hailo AI
and it provided me some errors (Error code 6) - When I change the model to model from hailo_apps, it worked.
There is the code:
#!/usr/bin/env python3
from pathlib import Path
import sys
import time
import signal
import cv2
import numpy as np
from picamera2 import Picamera2
from hailo_platform import VDevice
from hailo_platform.genai import VLM
HEF_PATH = "/usr/local/hailo/resources/models/hailo10h/Qwen2-VL-2B-Instruct.hef"
PROMPT_TEXT = "Describe what you see in this image in one short paragraph."
SYSTEM_TEXT = "You are a helpful vision assistant."
MODEL_WIDTH = 336
MODEL_HEIGHT = 336
TEMPERATURE = 0.1
MAX_GENERATED_TOKENS = 200
SEED = 42
CAMERA_WIDTH = 1280
CAMERA_HEIGHT = 720
# Delay between completed responses
INFERENCE_INTERVAL_SEC = 2.0
RUNNING = True
def handle_signal(signum, frame):
global RUNNING
RUNNING = False
def build_prompt(user_text: str):
return [
{
"role": "system",
"content": [{"type": "text", "text": SYSTEM_TEXT}],
},
{
"role": "user",
"content": [
{"type": "image"},
{"type": "text", "text": user_text},
],
},
]
def init_camera() -> Picamera2:
picam2 = Picamera2()
config = picam2.create_preview_configuration(
main={"size": (CAMERA_WIDTH, CAMERA_HEIGHT), "format": "RGB888"}
)
picam2.configure(config)
picam2.start()
time.sleep(1.0)
return picam2
def capture_frame_from_rpi_camera(picam2: Picamera2) -> np.ndarray:
frame = picam2.capture_array()
if frame is None:
raise RuntimeError("Failed to capture frame from Raspberry Pi camera")
return frame
def preprocess_for_vlm(frame_rgb: np.ndarray) -> np.ndarray:
if frame_rgb.dtype != np.uint8:
frame_rgb = frame_rgb.astype(np.uint8)
return cv2.resize(frame_rgb, (MODEL_WIDTH, MODEL_HEIGHT))
def clean_response(text):
if not isinstance(text, str):
return str(text)
return text.split("<|im_end|>")[0].strip()
def main():
hef_path = Path(HEF_PATH)
if not hef_path.is_file():
print(f"ERROR: HEF file not found: {hef_path}", file=sys.stderr)
return 1
signal.signal(signal.SIGINT, handle_signal)
signal.signal(signal.SIGTERM, handle_signal)
picam2 = None
vdevice = None
vlm = None
try:
print("Initializing camera...")
picam2 = init_camera()
print("Initializing Hailo VDevice...")
params = VDevice.create_params()
vdevice = VDevice(params)
print("Loading VLM...")
vlm = VLM(vdevice, str(hef_path))
print("Running live inference. Press Ctrl+C to stop.\n")
while RUNNING:
# Capture a fresh frame for this iteration only
frame_rgb = capture_frame_from_rpi_camera(picam2)
input_image = preprocess_for_vlm(frame_rgb)
try:
response = vlm.generate_all(
prompt=build_prompt(PROMPT_TEXT),
frames=[input_image],
temperature=TEMPERATURE,
seed=SEED,
max_generated_tokens=MAX_GENERATED_TOKENS,
)
print(f"[{time.strftime('%H:%M:%S')}] {clean_response(response)}\n")
except Exception as exc:
print(f"WARNING: inference failed: {exc}", file=sys.stderr)
# Wait only after response is done, then next loop gets a new frame
if RUNNING and INFERENCE_INTERVAL_SEC > 0:
time.sleep(INFERENCE_INTERVAL_SEC)
print("Stopping...")
return 0
except Exception as exc:
print(f"ERROR: {exc}", file=sys.stderr)
return 1
finally:
if picam2 is not None:
try:
picam2.stop()
except Exception:
pass
try:
picam2.close()
except Exception:
pass
if vlm is not None:
try:
vlm.clear_context()
except Exception:
pass
try:
vlm.release()
except Exception:
pass
if vdevice is not None:
try:
vdevice.release()
except Exception:
pass
if __name__ == "__main__":
raise SystemExit(main())
When I use HEF_PATH = “downloaded Qwen from hailo“ it doesn’t work.
What can be reasons for it?
Has anyone had the same issue?