Hi, I have written below Python script (measure_latency_kpi_sync) to Benchmark Latency measurement for various batch sizes (1,8,16,32,63) using the run (sync) command but the results does not match the hailortcli benchmark tool.
Similarly to benchmark FPS measurement (measure_fps_kpi_async) using run_async but the results don’t match what is given on the Hailo8L model zoo.
Kindly may you please check and let us know what we are doing wrong and how can we improve the code so it matches Hailo8L provided results? Thanks
HailoRT version 4.22.0
def measure_latency_kpi_sync(hef_file, batch_size):
timeout_ms = 10000
number_of_frames = 100
inference_times =params = VDevice.create_params() params.scheduling_algorithm = HailoSchedulingAlgorithm.ROUND_ROBIN # The vdevice is used as a context manager ("with" statement) to ensure it's released on time. with VDevice(params) as vdevice: # Create an infer model from an HEF: infer_model = vdevice.create_infer_model(hef_file) infer_model.set_batch_size(batch_size) # Configure the infer model and create bindings for it with infer_model.configure() as configured_infer_model: bindings = configured_infer_model.create_bindings() #print(infer_model.input().shape) for i in range(number_of_frames): # Set input and output buffers buffer = np.empty(infer_model.input().shape, dtype=np.uint8) bindings.input().set_buffer(buffer) if len(bindings._output_names) == 1: bindings.output().set_buffer(np.empty(infer_model.output().shape, dtype=np.uint8)) else: for name in bindings._output_names: bindings.output(name).set_buffer(np.empty(infer_model.output(name).shape, dtype=np.uint8)) bindings_list = [] for j in range(batch_size): bindings_list.append(bindings) # Run synchronous inference and access the output buffers start_time = time.time() configured_infer_model.run(bindings_list, timeout_ms) inference_time = time.time() - start_time inference_times.append(inference_time) latency_mean = np.mean(inference_times)*1000 latency_std = np.std(inference_times)*1000 print(f"Average Latency of model {os.path.basename(hef_file)} is {latency_mean} ms and std: {latency_std} ms for batch_size {batch_size}\n") return latency_mean, latency_std
def measure_fps_kpi_async(hef_file): number_of_frames = 5000 timeout_ms = 10000 fps_times = [] params = VDevice.create_params() params.scheduling_algorithm = HailoSchedulingAlgorithm.ROUND_ROBIN params.group_id = "SHARED" with VDevice(params) as vdevice: # Create an infer model from an HEF: infer_model = vdevice.create_infer_model(hef_file) # Once the infer model is set, configure the infer model with infer_model.configure() as configured_infer_model: bindings = configured_infer_model.create_bindings() for i in range(3): inference_times = [] for _ in range(number_of_frames): # Create bindings for it and set buffers bindings.input().set_buffer(np.empty(infer_model.input().shape, dtype=np.uint8)) if len(bindings._output_names) == 1: bindings.output().set_buffer(np.empty(infer_model.output().shape, dtype=np.uint8)) else: for name in bindings._output_names: bindings.output(name).set_buffer(np.empty(infer_model.output(name).shape, dtype=np.uint8)) start_time = time.time() configured_infer_model.wait_for_async_ready(timeout_ms=10000) job = configured_infer_model.run_async([bindings], partial(example_callback, bindings=bindings)) configured_infer_model.wait_for_async_ready(timeout_ms=10000) inference_time = time.time() - start_time inference_times.append(inference_time) job.wait(timeout_ms) fps_times.append(1/np.mean(inference_times)) fps_mean = np.mean(fps_times) fps_std = np.std(fps_times) print(f" FPS of model {os.path.basename(hef_file)} is {fps_mean} and std: {fps_std} \n") return fps_mean, fps_std