ConvNext model completely broken after quantization - heavy performance loss
Hey everyone, I’m losing my mind here and could really use some help.
I’m trying to convert a ConvNext-based fall detection model to HEF. The ONNX model works perfectly (100% accuracy), but after quantization I’m getting 58% accuracy which is basically random guessing for a 2-class problem. I’ve tried everything I can think of and nothing changes this number.
The model
It’s a ConvNext backbone (DINOv3 from HuggingFace) with a simple CNN classifier head. Input is [1, 45, 224, 224] - I had to reshape from the original [1, 15, 3, 224, 224] because Hailo gave me errors with that format, so I stacked the 15 frames into 45 channels instead. Output is just [1, 2] for binary classification (fall vs no_fall).
What I’ve tried so far
Attempt 1 - Simple mode (baseline test):
model_optimization_flavor(optimization_level=0, compression_level=0)
Didn’t work
Attempt 2 - Standard advanced settings:
set_seed(seed=42)
model_optimization_config(calibration, calibset_size=512, batch_size=8)
model_optimization_flavor(optimization_level=4, compression_level=0)
pre_quantization_optimization(equalization, policy=enabled)
pre_quantization_optimization(activation_clipping, layers={*}, mode=percentile, clipping_values=[0.5, 99.5])
pre_quantization_optimization(weights_clipping, layers={*}, mode=mmse)
quantization_param(output_layer1, precision_mode=a16_w16)
post_quantization_optimization(bias_correction, policy=enabled)
post_quantization_optimization(finetune, policy=enabled, dataset_size=1024, batch_size=8, epochs=8, learning_rate=0.0001)
Didn’t work
Attempt 3 - Maximum accuracy mode:
model_optimization_config(calibration, calibset_size=1024, batch_size=4)
model_optimization_flavor(optimization_level=4, compression_level=0)
pre_quantization_optimization(equalization, policy=enabled)
pre_quantization_optimization(activation_clipping, layers={*}, mode=percentile, clipping_values=[0.1, 99.9])
pre_quantization_optimization(weights_clipping, layers={*}, mode=mmse)
quantization_param(output_layer1, precision_mode=a16_w16)
post_quantization_optimization(bias_correction, policy=enabled)
post_quantization_optimization(finetune, policy=enabled, dataset_size=1024, batch_size=4, epochs=16, learning_rate=0.00005)
Didn’t work
Tried 16 epochs of fine-tuning, gentler clipping (0.1-99.9 percentile), smaller batches, lower learning rate - nothing changes the accuracy.
What’s definitely working
- ONNX model is valid and runs fine in ONNX Runtime (98% accuracy)
- Calibration data looks good: 1024 samples, balanced between fall/no_fall classes, proper ImageNet normalization taken directly from my original dataset
- The Hailo pipeline completes without errors - parsing, optimization, compilation all work
- Inference on Hailo works correctly but has this drop of performance
What I really need to know
-
Is ConvNext actually supported on Hailo-8? Or is it just a matter of the architecture being too complex?
-
Could the input shape reshaping be breaking something? I had to go from [1, 15, 3, 224, 224] to [1, 45, 224, 224] because of Hailo errors. (Don’t think but just to double check it)
-
Why isn’t fine-tuning helping at all? I tried up to 16 epochs and the accuracy doesn’t budge from 58%. (Don’t take this number to be representative of the model’s performance in real world it randomly switched from all fall or all no fall)
-
Should I just give up on PTQ and try QAT instead? The problem is the model is trained in PyTorch (DINOv3 ConvNext from HuggingFace) and I’ve read that since my model is in pytorch it should not be supported in hailo to do QAT. Any guidance there would be helpful too.
-
What diagnostic tools can I run to figure out which layers are losing accuracy? Layer noise analysis or something?
I’ve pasted down here the convertion script that I’ve used, just in case it helps. And the netron of my ONNX model.
At this point I’m starting to think there’s either a fundamental incompatibility with ConvNext architecture or I’m doing something really wrong that I can’t see. Any ideas would be appreciated because I’m about ready to give up on this.
Thanks and happy new year.
File used to do the onnx to hef convertion (a little revised and shortend) and the netron model
CALIB_SAMPLES = 1024
BALANCE_CLASSES = True
OPTIMIZATION_MODE = "max_accuracy"
SKIP_PARSING = False
SKIP_OPTIMIZATION = False
class FallDetectionDatasetLoader:
def __init__(self, dataset_path, target_samples=1024, balance=True):
self.dataset_path = Path(dataset_path)
self.target_samples = target_samples
self.balance = balance
def load_video_sequence(self, video_folder):
frames = []
frame_files = sorted(video_folder.glob("*.jpg"))
if len(frame_files) != 15:
return None
mean = np.array([0.485, 0.456, 0.406], dtype=np.float32)
std = np.array([0.229, 0.224, 0.225], dtype=np.float32)
for frame_path in frame_files:
frame = cv2.imread(str(frame_path))
frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
frame = cv2.resize(frame, (224, 224))
frame = frame.astype(np.float32) / 255.0
frame = (frame - mean) / std
frames.append(frame)
frames_stacked = np.stack(frames, axis=0)
frames_merged = frames_stacked.transpose(1, 2, 3, 0)
frames_merged = frames_merged.reshape(224, 224, 45)
return frames_merged
def collect_samples(self):
fall_samples = []
no_fall_samples = []
fall_dir = self.dataset_path / "fall"
if fall_dir.exists():
for video_dir in tqdm(list(fall_dir.iterdir()), desc="Loading fall samples"):
if video_dir.is_dir():
sample = self.load_video_sequence(video_dir)
if sample is not None:
fall_samples.append(sample)
no_fall_dir = self.dataset_path / "no_fall"
if no_fall_dir.exists():
for video_dir in tqdm(list(no_fall_dir.iterdir()), desc="Loading no_fall samples"):
if video_dir.is_dir():
sample = self.load_video_sequence(video_dir)
if sample is not None:
no_fall_samples.append(sample)
return fall_samples, no_fall_samples
def balance_and_sample(self, fall_samples, no_fall_samples):
if self.balance:
min_samples = min(len(fall_samples), len(no_fall_samples))
fall_samples = random.sample(fall_samples, min_samples)
no_fall_samples = random.sample(no_fall_samples, min_samples)
all_samples = fall_samples + no_fall_samples
random.shuffle(all_samples)
if len(all_samples) > self.target_samples:
all_samples = random.sample(all_samples, self.target_samples)
calibration_data = np.stack(all_samples, axis=0)
return calibration_data
def prepare_calibration_data(self):
fall_samples, no_fall_samples = self.collect_samples()
if len(fall_samples) == 0 or len(no_fall_samples) == 0:
raise ValueError("Could not find samples in fall/no_fall directories!")
calibration_data = self.balance_and_sample(fall_samples, no_fall_samples)
return calibration_data
class ONNXtoHEFConverter:
def __init__(self, onnx_path, model_name="fall_detection", hw_arch="hailo8"):
self.onnx_path = onnx_path
self.model_name = model_name
self.hw_arch = hw_arch
self.runner = None
def parse_onnx(self, ):
parsed_har = f"{self.model_name}_parsed.har"
self.runner = ClientRunner(hw_arch=self.hw_arch)
hn, npz = self.runner.translate_onnx_model(
self.onnx_path,
self.model_name
)
self.runner.save_har(parsed_har)
return parsed_har
def optimize_model(self, calibration_data, model_script_path=None):
if model_script_path and os.path.exists(model_script_path):
self.runner.load_model_script(model_script_path)
self.runner.optimize(calibration_data)
optimized_har = f"{self.model_name}_optimized.har"
self.runner.save_har(optimized_har)
return optimized_har
def validate_accuracy(self, calibration_data):
test_data = calibration_data[:min(100, len(calibration_data))]
with self.runner.infer_context(InferenceContext.SDK_FP_OPTIMIZED) as fp_ctx:
fp_outputs = self.runner.infer(fp_ctx, test_data)
with self.runner.infer_context(InferenceContext.SDK_QUANTIZED) as q_ctx:
q_outputs = self.runner.infer(q_ctx, test_data)
fp_preds = np.argmax(fp_outputs, axis=-1).flatten()
q_preds = np.argmax(q_outputs, axis=-1).flatten()
agreement = np.mean(fp_preds == q_preds) * 100
return agreement
def compile_to_hef(self):
hef = self.runner.compile()
hef_path = f"{self.model_name}.hef"
with open(hef_path, "wb") as f:
f.write(hef)
compiled_har = f"{self.model_name}_compiled.har"
self.runner.save_har(compiled_har)
return hef_path
def run_profiler(self):
compiled_har = f"{self.model_name}_compiled.har"
if os.path.exists(compiled_har):
os.system(f"hailo profiler {compiled_har}")
def main():
CALIB_PATH = f"{MODEL_NAME}_calibration.npy"
if not os.path.exists(ONNX_PATH):
return
if not os.path.exists(DATASET_PATH):
return
calibration_data = None
if os.path.exists(CALIB_PATH):
calibration_data = np.load(CALIB_PATH)
else:
loader = FallDetectionDatasetLoader(
dataset_path=DATASET_PATH,
target_samples=CALIB_SAMPLES,
balance=BALANCE_CLASSES
)
calibration_data = loader.prepare_calibration_data()
np.save(CALIB_PATH, calibration_data)
converter = ONNXtoHEFConverter(
onnx_path=ONNX_PATH,
model_name=MODEL_NAME,
hw_arch=HW_ARCH
)
optimized_har = f"{MODEL_NAME}_optimized.har"
if not os.path.exists(optimized_har):
return
converter.runner = ClientRunner(hw_arch=HW_ARCH)
converter.runner.load_har(optimized_har)
parsed_har = None
parsed_har = converter.parse_onnx()
optimized_har = converter.optimize_model(
calibration_data=calibration_data,
model_script_path=MODEL_SCRIPT if os.path.exists(MODEL_SCRIPT) else None
)
parsed_har = converter.parse_onnx()
optimized_har = converter.optimize_model(
calibration_data=calibration_data,
model_script_path=MODEL_SCRIPT if os.path.exists(MODEL_SCRIPT) else None
)
accuracy = converter.validate_accuracy(calibration_data)
hef_path = converter.compile_to_hef()
converter.run_profiler()