Hi folks,
I created a CNN with LSTM layers with a single outpout y€[True, False] for a computer vision project and trained it with pytorch lightning. The accuracy is ~88%.
I converted the pytorch checkpoint to an oonx, then to a .har and then quantized it.
Running inference with the quantized .har gives me 87% accuracy and an F1 Score of 75%.
However, when i convert it to .hef and then run inference on the hailo8l on the rpi5 i only get 76% accurracy. (which is the worst possible result, because ~ 76% of the data is False and the model gives only False as output with the same data). Fiddling around with the softmax threshold i get a maximum of 76%. So the model is kinda giving me crap results. I am stuck since days, is there anything i am missing?
This is the conversion from quantized .har to .hef:
quantized_model_har_path = f"{models_root}/{model_name}_quantized.har"
runner = ClientRunner(har=quantized_model_har_path, hw_arch='hailo8l')
model_script = """
performance_param(compiler_optimization_level=max)
"""
runner.load_model_script(model_script)
hef = runner.compile()
file_name = f"{quantized_model_har_path.replace('.har', '.hef')}"
with open(file_name, "wb") as f:
f.write(hef)
And here the inference script:
#!/usr/bin/env python3
import torch
import os
import numpy as np
from hailo_platform import VDevice, HailoSchedulingAlgorithm
from pathlib import Path
from PIL import Image
from torchvision import transforms
from torch.utils.data import DataLoader
from pinno_cv_utils.datasets.labelstudio import FrameTagDataset
from sklearn.metrics import accuracy_score, f1_score
import time
# Paths and HEF setup
data_root = Path(__file__).parents[2] / "hibeas" / "data"
models_dir = Path(__file__).parents[2] / "models"
model_name = "best_checkpoint_model_quantized"
hef_model_path = models_dir / f"{model_name}.hef"
# Check if HEF file exists
if not hef_model_path.exists():
raise FileNotFoundError("HEF file not found!")
# DataLoader setup
test_set = FrameTagDataset(
dataset_path=data_root / "dataset_15_05_2024",
labels_path=data_root / "dataset_15_05_2024/annotations_hibeam_dataset_15_05_2024.json",
train_val_test="test",
)
# Optionally select a subset for testing
test_indices = np.random.choice(len(test_set), 2000, replace=False)
test_dataloader = DataLoader(
torch.utils.data.Subset(test_set, test_indices), batch_size=1
)
# Main inference function
if __name__ == "__main__":
start_time = time.time()
timeout_ms = 1000
labels_list, predictions_list = [], []
# Create VDevice with parameters
params = VDevice.create_params()
params.scheduling_algorithm = HailoSchedulingAlgorithm.ROUND_ROBIN
with VDevice(params) as vdevice:
# Create an infer model from the HEF
infer_model = vdevice.create_infer_model(str(hef_model_path))
# Retrieve quantization information
output_stream = infer_model.output()
quant_infos = output_stream.quant_infos
if len(quant_infos) == 1:
quant_info = quant_infos[0]
scale = quant_info.qp_scale
zero_point = quant_info.qp_zp
else:
raise ValueError("Multiple quant_infos detected. Ensure the model uses a single quantization scheme.")
# Configure the infer model
with infer_model.configure() as configured_infer_model:
# Create bindings
bindings = configured_infer_model.create_bindings()
# Prepare output buffer
output_buffer = np.empty(infer_model.output().shape, dtype=np.uint8)
# Inference loop
for images, labels, _, _ in test_dataloader:
image = images[0].type(torch.uint8).numpy()
# Set input and output buffers
bindings.input().set_buffer(image)
bindings.output().set_buffer(output_buffer)
# Run synchronous inference
configured_infer_model.run([bindings], timeout_ms)
# Get raw output
raw_output = bindings.output().get_buffer()
# Dequantize the output
dequantized_output = (raw_output.astype(np.float32) - zero_point) * scale
# Apply sigmoid
output_tensor = torch.tensor(dequantized_output)
sigmoid_output = torch.sigmoid(output_tensor)
# Get prediction
output_bool = sigmoid_output > 0.5
prediction = output_bool.item()
labels_list.extend(labels[:, 1].numpy())
predictions_list.extend(output_bool.cpu().numpy())
# Optionally print prediction
# print(f"Raw: {raw_output.item()} - Dequant: {dequantized_output.item()} "
# f"- Sigmoid: {np.round(sigmoid_output.item(), 5)} - Prediction: {prediction}")
# Compute and display inference time and accuracy
end_time = time.time()
acc = accuracy_score(labels_list, predictions_list)
f1 = f1_score(labels_list, predictions_list)
print("-------------------------------------")
print(f" Infer Time: {end_time - start_time:.3f} sec")
print(f" Average FPS: {len(test_dataloader) / (end_time - start_time):.3f}")
print(f" Accuracy: {acc * 100:.2f}%")
print(f"F1 Score: {f1:.4f}")
print(f"Labels: True: {labels_list.count(True)} - False: {labels_list.count(False)}")
print(f"Predictions: True: {predictions_list.count(True)} - False: {predictions_list.count(False)}")
print("-------------------------------------")
This gives me an accuracy of 76%, but as said, this result is worthless.
F1 Score is 0.
So the problem must be somewhere in the conversion from HARto HEF? or is it in the inference?
HAR inference is here (works with quantized and 87% accuracy)
import os
import numpy as np
import torch
from torchvision.transforms import Normalize, Resize, ToTensor, Compose
from torch.utils.data import DataLoader
from pinno_cv_utils.datasets.labelstudio import FrameTagDataset
from hailo_sdk_client import ClientRunner, InferenceContext
import pathlib
from tqdm import tqdm
from sklearn.metrics import accuracy_score, f1_score
# Step 1: Set up paths and load the HAR file
models_dir = pathlib.Path(__file__).parents[2] / "models"
#model_name = "best_checkpoint_model"
model_name = "best_checkpoint_model_quantized"
har_model_path = f'{models_dir}/{model_name}.har'
torch.cuda.empty_cache()
# Step 2: Initialize the Hailo ClientRunner and load the HAR model
runner = ClientRunner(har=har_model_path)
# Step 3: Define dataset paths
data_root = pathlib.Path(__file__).parents[2] / "hibeas" / "data"
assert data_root.exists()
# Dataset for Test
test_set = FrameTagDataset(
dataset_path=data_root / "dataset_15_05_2024",
labels_path=data_root
/ "dataset_15_05_2024/annotations_hibeam_dataset_15_05_2024.json",
train_val_test="test",
transform=Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]),
fps=25
)
# only use 1000 random samples
test_set = torch.utils.data.Subset(test_set, np.random.choice(len(test_set), 300))
# Test dataloader
test_dataloader = DataLoader(
test_set, batch_size=1, shuffle=False, num_workers=2, pin_memory=False
)
# Preprocessing transforms
transform = Compose(
[
Resize((224, 224)), # Resize to the required input size for your model
ToTensor(), # Convert to tensor
Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]), # Normalize
]
)
# Step 4: Create inference context (SDK_NATIVE or SDK_QUANTIZED based on model type)
with runner.infer_context(InferenceContext.SDK_NATIVE) as context:
def run_inference_on_test_set(runner, context, dataloader):
all_preds = []
all_labels = []
with torch.no_grad():
for batch in tqdm(dataloader, desc="Running Hailo Inference"):
images, labels, _, _ = batch
# Preprocess images if not already a tensor
if not isinstance(images, torch.Tensor):
images = torch.stack([transform(image) for image in images])
# Transpose images from NCHW to NHWC format for Hailo inference
images = images.permute(0, 2, 3, 1).numpy() # Convert to NHWC
# Run Hailo inference on the batch
outputs = runner.infer(context, images)
# Process outputs
output_tensor = torch.tensor(outputs).sigmoid()
output_bool = output_tensor > 0.5
# Append batch predictions and labels
all_preds.extend(output_bool.cpu().numpy())
all_labels.extend(labels[:, 1].numpy()) # Assuming you're using the second label in multi-label format
print(outputs, output_tensor, output_bool)
return all_labels, all_preds
# Execute inference
print("Running Hailo inference on test set...")
true_labels, predictions = run_inference_on_test_set(runner, context, test_dataloader)
# Calculate accuracy and F1 score
accuracy = accuracy_score(true_labels, predictions)
f1 = f1_score(true_labels, predictions)
print(f"Accuracy: {accuracy:.4f}")
print(f"F1 Score: {f1:.4f}")