Latency Benchmark for various batch sizes=(1,8,16,32,63)

Ashish_Kejriwal · August 4, 2025, 10:28am

Hi, I have written below Python script (measure_latency_kpi_sync) to Benchmark Latency measurement for various batch sizes (1,8,16,32,63) using the run (sync) command but the results does not match the hailortcli benchmark tool.

Similarly to benchmark FPS measurement (measure_fps_kpi_async) using run_async but the results don’t match what is given on the Hailo8L model zoo.

Kindly may you please check and let us know what we are doing wrong and how can we improve the code so it matches Hailo8L provided results? Thanks

HailoRT version 4.22.0

def measure_latency_kpi_sync(hef_file, batch_size):
timeout_ms = 10000
number_of_frames = 100
inference_times =

params = VDevice.create_params()
params.scheduling_algorithm = HailoSchedulingAlgorithm.ROUND_ROBIN

# The vdevice is used as a context manager ("with" statement) to ensure it's released on time.
with VDevice(params) as vdevice:

	# Create an infer model from an HEF:
	infer_model = vdevice.create_infer_model(hef_file)
	infer_model.set_batch_size(batch_size)
	# Configure the infer model and create bindings for it
	with infer_model.configure() as configured_infer_model:
		bindings = configured_infer_model.create_bindings()
		#print(infer_model.input().shape)
		for i in range(number_of_frames):
			# Set input and output buffers
			buffer = np.empty(infer_model.input().shape, dtype=np.uint8)
			bindings.input().set_buffer(buffer)
			if len(bindings._output_names) == 1:
				bindings.output().set_buffer(np.empty(infer_model.output().shape, dtype=np.uint8))
			else:
				for name in bindings._output_names:
					bindings.output(name).set_buffer(np.empty(infer_model.output(name).shape, dtype=np.uint8))
			bindings_list = []
			for j in range(batch_size):
				bindings_list.append(bindings)
			# Run synchronous inference and access the output buffers
			start_time = time.time()
			configured_infer_model.run(bindings_list, timeout_ms)
			inference_time = time.time() - start_time
			inference_times.append(inference_time)
		latency_mean = np.mean(inference_times)*1000
		latency_std = np.std(inference_times)*1000
		print(f"Average Latency of model {os.path.basename(hef_file)} is {latency_mean} ms and std: {latency_std} ms for batch_size {batch_size}\n")
		return latency_mean, latency_std

def measure_fps_kpi_async(hef_file):
	number_of_frames = 5000
	timeout_ms = 10000
	fps_times = []
	params = VDevice.create_params()
	params.scheduling_algorithm = HailoSchedulingAlgorithm.ROUND_ROBIN
	params.group_id = "SHARED"
	with VDevice(params) as vdevice:
		# Create an infer model from an HEF:
		infer_model = vdevice.create_infer_model(hef_file)
		
		# Once the infer model is set, configure the infer model
		with infer_model.configure() as configured_infer_model:
			bindings = configured_infer_model.create_bindings()
			for i in range(3):
				inference_times = []
				for _ in range(number_of_frames):
					# Create bindings for it and set buffers
					bindings.input().set_buffer(np.empty(infer_model.input().shape, dtype=np.uint8))
					if len(bindings._output_names) == 1:
						bindings.output().set_buffer(np.empty(infer_model.output().shape, dtype=np.uint8))
					else:
						for name in bindings._output_names:
							bindings.output(name).set_buffer(np.empty(infer_model.output(name).shape, dtype=np.uint8))
					start_time = time.time()
					configured_infer_model.wait_for_async_ready(timeout_ms=10000)
					job = configured_infer_model.run_async([bindings], partial(example_callback, bindings=bindings))
					configured_infer_model.wait_for_async_ready(timeout_ms=10000)
					inference_time = time.time() - start_time
					inference_times.append(inference_time)
				job.wait(timeout_ms)
				fps_times.append(1/np.mean(inference_times))
			fps_mean = np.mean(fps_times)
			fps_std = np.std(fps_times)
			print(f" FPS of model {os.path.basename(hef_file)} is {fps_mean} and std: {fps_std} \n")
			return fps_mean, fps_std

Topic		Replies	Views
Improve FPS using Python API General fps , python , hailo8	1	162	April 22, 2025
batch more than 1 General	22	90	July 15, 2025
Hailo8L Async Inference issue - invalid operation ( General hailort	1	145	March 24, 2025
Poor performance of Hailo8L and Rpi5 General raspberry-pi , performance	6	895	March 20, 2025
My model runs slower than expected General debug , optimization	1	757	July 17, 2024

Latency Benchmark for various batch sizes=(1,8,16,32,63)

Related topics