Hi,
i’m trying to run a whisper server with the Whisper-Small.hef file.
I downloaded the HEF File from this link:
My Setup is a raspberry pi 5 with the AI HAT 2+ (should be the Hailo 10H chip).
The code works with the whisper-base.hef, but fails with the whisper-small.hef
I get the following error:
[HailoRT] [error] CHECK_SUCCESS failed with status=HAILO_NOT_FOUND(61) - Failed to create Speech2Text
[HailoRT] [error] CHECK_SUCCESS failed with status=HAILO_NOT_FOUND(61)
Traceback (most recent call last):
File “/home/admin/projects/desk-measure-assistant/server/app.py”, line 19, in
whisper = HailoWhisperService(MODEL_PATH)
File “/home/admin/projects/desk-measure-assistant/server/whisper_service.py”, line 16, in init
self.speech2text = Speech2Text(self.vdevice, self.hef_path)
~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File “/usr/lib/python3/dist-packages/hailo_platform/pyhailort/pyhailort.py”, line 5167, in init
self._speech2text = _pyhailort.Speech2Text.create(vdevice._vdevice, model_path)
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
hailo_platform.pyhailort._pyhailort.HailoRTStatusException: 61
My app.py looks like this:
import os
import uvicorn
import paho.mqtt.client as mqtt
from fastapi import FastAPI, UploadFile, File
from whisper_service import HailoWhisperService
— Konfiguration —
MODEL_PATH = “Whisper-Small.hef”
MQTT_BROKER = “”
CLIENT_ID = “”
MQTT_USER = “”
MQTT_PASS = “”
TOPIC_SUB = “start_stop_measuring”
— Setup —
app = FastAPI()
whisper = HailoWhisperService(MODEL_PATH)
MQTT Setup
mqtt_client = mqtt.Client(client_id=CLIENT_ID, protocol=mqtt.MQTTv311)
mqtt_client.username_pw_set(MQTT_USER, MQTT_PASS)
@app.on_event(“startup”)
def startup_event():
print(“Verbinde mit MQTT Broker…”)
try:
mqtt_client.connect(MQTT_BROKER, 1883, 60)
mqtt_client.loop_start()
print(f"✓ MQTT Verbindung zu {MQTT_BROKER} aktiv.“)
except Exception as e:
print(f"⚠ MQTT Verbindung fehlgeschlagen: {e}”)
@app.post(“/transcribe”)
async def transcribe(file: UploadFile = File(…)):
temp_path = f"tmp_{file.filename}"
try:
with open(temp_path, “wb”) as buffer:
buffer.write(await file.read())
# Whisper Transkription
text = whisper.transcribe(temp_path).upper()
print(f"Erkannter Text: {text}")
# Logik basierend auf deinem Pico-Code
command = None
if "START" in text or "MESSUNG" in text or "AN" in text:
command = "START"
elif "STOP" in text or "ENDE" in text or "AUS" in text:
command = "STOP"
# Wenn ein Befehl erkannt wurde, an den Pico senden
if command:
mqtt_client.publish(TOPIC_SUB, command)
print(f"-> Befehl '{command}' an Topic '{TOPIC_SUB}' gesendet.")
# Rückgabe mit 'transcription' für den Client
return {"transcription": text, "status": "command_sent", "command": command}
# Fallback, wenn kein Befehl erkannt wurde
return {"transcription": text, "status": "no_command_recognized"}
except Exception as e:
print(f"Fehler: {e}")
return {"error": str(e)}
finally:
if os.path.exists(temp_path):
os.remove(temp_path)
if name == “main”:
# Starte den Server
uvicorn.run(app, host=“0.0.0.0”, port=8000)
And this is the whisper_service.py
import wave import numpy as np from hailo_platform import VDevice from hailo_platform.genai import Speech2Text, Speech2TextTask class HailoWhisperService: def init(self, hef_path): self.hef_path = str(hef_path) # Initialisierung des VDevices ohne manuelle Gruppenzuweisung, # da die Standard-Parameter für den Raspberry Pi 5 (Hailo-10H) # automatisch korrekt erkannt werden. self.vdevice = VDevice() # Modell laden self.speech2text = Speech2Text(self.vdevice, self.hef_path) def transcribe(self, audio_path): # Audio-Datei einlesen with wave.open(str(audio_path), 'rb') as wav_file: frames = wav_file.getnframes() raw_audio = wav_file.readframes(frames) # Konvertierung für den Hailo NPU (Float32, Normalized) audio_data = np.frombuffer(raw_audio, dtype=np.int16).astype(np.float32) / 32768.0 audio_data = audio_data.astype('<f4') # Generierung der Transkription segments = self.speech2text.generate_all_segments( audio_data=audio_data, task=Speech2TextTask.TRANSCRIBE, language="en", timeout_ms=15000 ) # Zusammenführen der Text-Segmente if segments: return ''.join([seg.text for seg in segments]).strip() return "" def __del__(self): # Aufräumen der NPU-Ressourcen beim Beenden des Objekts if hasattr(self, 'speech2text') and self.speech2text: self.speech2text.release() if hasattr(self, 'vdevice') and self.vdevice: self.vdevice.release()(venv)