PI 5 + Whisper Small HEF

Christian_Schnizler · March 16, 2026, 12:49pm

Hi,

i’m trying to run a whisper server with the Whisper-Small.hef file.

I downloaded the HEF File from this link:

My Setup is a raspberry pi 5 with the AI HAT 2+ (should be the Hailo 10H chip).
The code works with the whisper-base.hef, but fails with the whisper-small.hef

I get the following error:
[HailoRT] [error] CHECK_SUCCESS failed with status=HAILO_NOT_FOUND(61) - Failed to create Speech2Text
[HailoRT] [error] CHECK_SUCCESS failed with status=HAILO_NOT_FOUND(61)
Traceback (most recent call last):
File “/home/admin/projects/desk-measure-assistant/server/app.py”, line 19, in
whisper = HailoWhisperService(MODEL_PATH)
File “/home/admin/projects/desk-measure-assistant/server/whisper_service.py”, line 16, in init
self.speech2text = Speech2Text(self.vdevice, self.hef_path)
~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File “/usr/lib/python3/dist-packages/hailo_platform/pyhailort/pyhailort.py”, line 5167, in init
self._speech2text = _pyhailort.Speech2Text.create(vdevice._vdevice, model_path)
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
hailo_platform.pyhailort._pyhailort.HailoRTStatusException: 61

My app.py looks like this:

import os
import uvicorn
import paho.mqtt.client as mqtt
from fastapi import FastAPI, UploadFile, File
from whisper_service import HailoWhisperService

— Konfiguration —

MODEL_PATH = “Whisper-Small.hef”
MQTT_BROKER = “”
CLIENT_ID = “”
MQTT_USER = “”
MQTT_PASS = “”


TOPIC_SUB = “start_stop_measuring”  
— Setup —

app = FastAPI()
whisper = HailoWhisperService(MODEL_PATH)

MQTT Setup

mqtt_client = mqtt.Client(client_id=CLIENT_ID, protocol=mqtt.MQTTv311)
mqtt_client.username_pw_set(MQTT_USER, MQTT_PASS)

@app.on_event(“startup”)
def startup_event():
print(“Verbinde mit MQTT Broker…”)
try:
mqtt_client.connect(MQTT_BROKER, 1883, 60)
mqtt_client.loop_start()
print(f"✓ MQTT Verbindung zu {MQTT_BROKER} aktiv.“)
except Exception as e:
print(f"⚠ MQTT Verbindung fehlgeschlagen: {e}”)

@app.post(“/transcribe”)
async def transcribe(file: UploadFile = File(…)):
temp_path = f"tmp_{file.filename}"
try:
with open(temp_path, “wb”) as buffer:
buffer.write(await file.read())

    # Whisper Transkription
    text = whisper.transcribe(temp_path).upper()
    print(f"Erkannter Text: {text}")

    # Logik basierend auf deinem Pico-Code
    command = None
    if "START" in text or "MESSUNG" in text or "AN" in text:
        command = "START"
    elif "STOP" in text or "ENDE" in text or "AUS" in text:
        command = "STOP"

    # Wenn ein Befehl erkannt wurde, an den Pico senden
    if command:
        mqtt_client.publish(TOPIC_SUB, command)
        print(f"-> Befehl '{command}' an Topic '{TOPIC_SUB}' gesendet.")
        # Rückgabe mit 'transcription' für den Client
        return {"transcription": text, "status": "command_sent", "command": command}

    # Fallback, wenn kein Befehl erkannt wurde
    return {"transcription": text, "status": "no_command_recognized"}

except Exception as e:
    print(f"Fehler: {e}")
    return {"error": str(e)}
finally:
    if os.path.exists(temp_path):
        os.remove(temp_path)

if name == “main”:
# Starte den Server
uvicorn.run(app, host=“0.0.0.0”, port=8000)

And this is the whisper_service.py

import wave
import numpy as np
from hailo_platform import VDevice
from hailo_platform.genai import Speech2Text, Speech2TextTask

class HailoWhisperService:
def init(self, hef_path):
self.hef_path = str(hef_path)

    # Initialisierung des VDevices ohne manuelle Gruppenzuweisung,
    # da die Standard-Parameter für den Raspberry Pi 5 (Hailo-10H)
    # automatisch korrekt erkannt werden.
    self.vdevice = VDevice()

    # Modell laden
    self.speech2text = Speech2Text(self.vdevice, self.hef_path)

def transcribe(self, audio_path):
    # Audio-Datei einlesen
    with wave.open(str(audio_path), 'rb') as wav_file:
        frames = wav_file.getnframes()
        raw_audio = wav_file.readframes(frames)

    # Konvertierung für den Hailo NPU (Float32, Normalized)
    audio_data = np.frombuffer(raw_audio, dtype=np.int16).astype(np.float32) / 32768.0
    audio_data = audio_data.astype('<f4')

    # Generierung der Transkription
    segments = self.speech2text.generate_all_segments(
        audio_data=audio_data,
        task=Speech2TextTask.TRANSCRIBE,
        language="en",
        timeout_ms=15000
    )

    # Zusammenführen der Text-Segmente
    if segments:
        return ''.join([seg.text for seg in segments]).strip()
    return ""

def __del__(self):
    # Aufräumen der NPU-Ressourcen beim Beenden des Objekts
    if hasattr(self, 'speech2text') and self.speech2text:
        self.speech2text.release()
    if hasattr(self, 'vdevice') and self.vdevice:
        self.vdevice.release()(venv)