Hi @omria and Hailo Team,
Thank you again for your previous response and the detailed clarification.
Unfortunately, I’ve followed all the steps as recommended, but I’m still seeing the COCO labels (“person”, “bicycle”, “car”) during evaluation and visualization, instead of my custom labels (“Gun”, “Person_with_Mask”, “Person”).
Steps I’ve Re-Applied (as per your suggestions)
1. Created the custom_postprocess.json file:
{
"labels": ["Gun", "Person_with_Mask", "Person"]
}
2. Updated my ids3cls.yaml:
base:
- base/yolov8.yaml
postprocessing:
postprocess_config_file: /home/graphin/Hailoexecutablefile/hailo_model_zoo/hailo_model_zoo/cfg/networks/postprocess/custom_labels.json
device_pre_post_layers:
nms: true
hpp: true
classes: 3
class_names:
-Gun
-Person
-Person_with_mask
network:
network_name: ids3cls
paths:
network_path:
- /home/graphin/Hailoexecutablefile/idsv3.onnx
alls_script: ids3cls.alls
parser:
nodes:
- null
- - /model.23/cv2.0/cv2.0.2/Conv
- /model.23/cv3.0/cv3.0.2/Conv
- /model.23/cv2.1/cv2.1.2/Conv
- /model.23/cv3.1/cv3.1.2/Conv
- /model.23/cv2.2/cv2.2.2/Conv
- /model.23/cv3.2/cv3.2.2/Conv
evaluation:
dataset_name: ids3cls_custom
labels_offset: 0
classes: 3
data_set: /home/graphin/.hailomz/data/models_files/ids3cls/2021-06-18/coco_calib2017.tfrecord
quantization:
calib_set:
- /home/graphin/.hailomz/data/models_files/ids3cls/2021-06-18/coco_val2017.tfrecord
info:
task: object detection
input_shape: 640x640x3
output_shape: 80x5x100
operations: 6.55G
parameters: 2.6M
framework: pytorch
training_data: /home/graphin/Documents/Dataset/train/
validation_data: /home/graphin/Documents/Dataset/valid/
3. Registered Dataset in datasets_info.py:
from enum import Enum
CLASS_NAMES_COCO = (
"person",
"bicycle",
"car",
"motorcycle",
"airplane",
"bus",
"train",
"truck",
"boat",
"traffic light",
"fire hydrant",
"stop sign",
"parking meter",
"bench",
"bird",
"cat",
"dog",
"horse",
"sheep",
"cow",
"elephant",
"bear",
"zebra",
"giraffe",
"backpack",
"umbrella",
"handbag",
"tie",
"suitcase",
"frisbee",
"skis",
"snowboard",
"sports ball",
"kite",
"baseball bat",
"baseball glove",
"skateboard",
"surfboard",
"tennis racket",
"bottle",
"wine glass",
"cup",
"fork",
"knife",
"spoon",
"bowl",
"banana",
"apple",
"sandwich",
"orange",
"broccoli",
"carrot",
"hot dog",
"pizza",
"donut",
"cake",
"chair",
"couch",
"potted plant",
"bed",
"dining table",
"toilet",
"tv",
"laptop",
"mouse",
"remote",
"keyboard",
"cell phone",
"microwave",
"oven",
"toaster",
"sink",
"refrigerator",
"book",
"clock",
"vase",
"scissors",
"teddy bear",
"hair drier",
"toothbrush",
)
COCO_LABEL_MAP = {
1: 1,
2: 2,
3: 3,
4: 4,
5: 5,
6: 6,
7: 7,
8: 8,
9: 9,
10: 10,
11: 11,
13: 12,
14: 13,
15: 14,
16: 15,
17: 16,
18: 17,
19: 18,
20: 19,
21: 20,
22: 21,
23: 22,
24: 23,
25: 24,
27: 25,
28: 26,
31: 27,
32: 28,
33: 29,
34: 30,
35: 31,
36: 32,
37: 33,
38: 34,
39: 35,
40: 36,
41: 37,
42: 38,
43: 39,
44: 40,
46: 41,
47: 42,
48: 43,
49: 44,
50: 45,
51: 46,
52: 47,
53: 48,
54: 49,
55: 50,
56: 51,
57: 52,
58: 53,
59: 54,
60: 55,
61: 56,
62: 57,
63: 58,
64: 59,
65: 60,
67: 61,
70: 62,
72: 63,
73: 64,
74: 65,
75: 66,
76: 67,
77: 68,
78: 69,
79: 70,
80: 71,
81: 72,
82: 73,
84: 74,
85: 75,
86: 76,
87: 77,
88: 78,
89: 79,
90: 80,
}
CLASS_NAMES_D2S = (
"adelholzener_alpenquelle_classic_075",
"adelholzener_alpenquelle_naturell_075",
"adelholzener_classic_bio_apfelschorle_02",
"adelholzener_classic_naturell_02",
"adelholzener_gourmet_mineralwasser_02",
"augustiner_lagerbraeu_hell_05",
"augustiner_weissbier_05",
"coca_cola_05",
"coca_cola_light_05",
"suntory_gokuri_lemonade",
"tegernseer_hell_03",
"corny_nussvoll",
"corny_nussvoll_single",
"corny_schoko_banane",
"corny_schoko_banane_single",
"dr_oetker_vitalis_knuspermuesli_klassisch",
"koelln_muesli_fruechte",
"koelln_muesli_schoko",
"caona_cocoa",
"cocoba_cocoa",
"cafe_wunderbar_espresso",
"douwe_egberts_professional_ground_coffee",
"gepa_bio_caffe_crema",
"gepa_italienischer_bio_espresso",
"apple_braeburn_bundle",
"apple_golden_delicious",
"apple_granny_smith",
"apple_red_boskoop",
"avocado",
"banana_bundle",
"banana_single",
"grapes_green_sugraone_seedless",
"grapes_sweet_celebration_seedless",
"kiwi",
"orange_single",
"oranges",
"pear",
"clementine",
"clementine_single",
"pasta_reggia_elicoidali",
"pasta_reggia_fusilli",
"pasta_reggia_spaghetti",
"franken_tafelreiniger",
"pelikan_tintenpatrone_canon",
"ethiquable_gruener_tee_ceylon",
"gepa_bio_und_fair_fencheltee",
"gepa_bio_und_fair_kamillentee",
"gepa_bio_und_fair_kraeuterteemischung",
"gepa_bio_und_fair_pfefferminztee",
"gepa_bio_und_fair_rooibostee",
"kilimanjaro_tea_earl_grey",
"cucumber",
"carrot",
"corn_salad",
"lettuce",
"vine_tomatoes",
"roma_vine_tomatoes",
"rocket",
"salad_iceberg",
"zucchini",
)
D2S_LABEL_MAP = {i + 1: i + 1 for i in range(len(CLASS_NAMES_D2S))}
CLASS_NAMES_D2S_FRUITS = (
"apple",
"avocado",
"banana_single",
"clementine_single",
"kiwi",
"orange_single",
"pear",
"cucumber",
"carrot",
)
D2S_FRUITS_LABEL_MAP = {i + 1: i + 1 for i in range(len(CLASS_NAMES_D2S_FRUITS))}
CLASS_NAMES_NUSCENES = (
"car",
"truck",
"construction_vehicle",
"bus",
"trailer",
"barrier",
"motorcycle",
"bicycle",
"pedestrian",
"traffic_cone",
)
NUSCENES_LABEL_MAP = {i: i for i in range(len(CLASS_NAMES_NUSCENES))}
CLASS_NAMES_IDS3CLS = (
"GUN",
"Person_with_Mask",
"person"
)
IDS3CLS_LABEL_MAP = {i: i for i in range(len(CLASS_NAMES_IDS3CLS))}
class DatasetInfo(object):
def __init__(self, class_names, label_map):
self._class_names = class_names
self._label_map = label_map
@property
def class_names(self):
return self._class_names
@property
def label_map(self):
return self._label_map
class BasicDatasetsEnum(Enum):
COCO = "coco_detection"
D2S = "d2s_detection"
D2S_FRUITS = "d2s_fruits_detection"
NUSCENES = "nuscenes"
IDS3CLS = "ids3cls_custom"
DATASETS_INFO = {
BasicDatasetsEnum.COCO.value: DatasetInfo(class_names=CLASS_NAMES_COCO, label_map=COCO_LABEL_MAP),
BasicDatasetsEnum.D2S.value: DatasetInfo(class_names=CLASS_NAMES_D2S, label_map=D2S_LABEL_MAP),
BasicDatasetsEnum.D2S_FRUITS.value: DatasetInfo(class_names=CLASS_NAMES_D2S_FRUITS, label_map=D2S_FRUITS_LABEL_MAP),
BasicDatasetsEnum.NUSCENES.value: DatasetInfo(class_names=CLASS_NAMES_NUSCENES, label_map=NUSCENES_LABEL_MAP),
BasicDatasetsEnum.IDS3CLS.value: DatasetInfo(class_names=CLASS_NAMES_IDS3CLS, label_map=IDS3CLS_LABEL_MAP),
}
def get_dataset_info(dataset_name):
print(f"🧪 Using dataset_name: {dataset_name}")
if dataset_name not in DATASETS_INFO:
raise ValueError("ERROR unknown network_selection {}".format(dataset_name))
return DATASETS_INFO[dataset_name]
4. Registered ids3cls_custom in parse_coco.py:
import tensorflow as tf
from hailo_model_zoo.core.factory import DATASET_FACTORY
@DATASET_FACTORY.register(name="cocopose_single_person")
def parse_single_person_pose_estimation_record(serialized_example):
"""Parse serialized example of TfRecord and extract dictionary of all the information"""
features = tf.io.parse_single_example(
serialized_example,
features={
"height": tf.io.FixedLenFeature([], tf.int64),
"width": tf.io.FixedLenFeature([], tf.int64),
"xmin": tf.io.VarLenFeature(tf.float32),
"xmax": tf.io.VarLenFeature(tf.float32),
"ymin": tf.io.VarLenFeature(tf.float32),
"ymax": tf.io.VarLenFeature(tf.float32),
"image_id": tf.io.FixedLenFeature([], tf.int64),
"image_name": tf.io.FixedLenFeature([], tf.string),
"image_jpeg": tf.io.FixedLenFeature([], tf.string),
},
)
height = tf.cast(features["height"], tf.int32)
width = tf.cast(features["width"], tf.int32)
xmin = tf.sparse.to_dense(features["xmin"], default_value=0)
xmax = tf.sparse.to_dense(features["xmax"], default_value=0)
ymin = tf.sparse.to_dense(features["ymin"], default_value=0)
ymax = tf.sparse.to_dense(features["ymax"], default_value=0)
bbox = tf.transpose(tf.stack([xmin, xmax, ymin, ymax]))
image_id = tf.cast(features["image_id"], tf.int32)
image_name = tf.cast(features["image_name"], tf.string)
image = tf.image.decode_jpeg(features["image_jpeg"], channels=3)
image_shape = tf.stack([height, width, 3])
image = tf.cast(tf.reshape(image, image_shape), tf.uint8)
image_info = {"image_id": image_id, "image_name": image_name, "bbox": bbox}
return [image, image_info]
@DATASET_FACTORY.register(name="cocopose")
def parse_pose_estimation_record(serialized_example):
"""Parse serialized example of TfRecord and extract dictionary of all the information"""
features = tf.io.parse_single_example(
serialized_example,
features={
"height": tf.io.FixedLenFeature([], tf.int64),
"width": tf.io.FixedLenFeature([], tf.int64),
"image_id": tf.io.FixedLenFeature([], tf.int64),
"image_name": tf.io.FixedLenFeature([], tf.string),
"image_jpeg": tf.io.FixedLenFeature([], tf.string),
},
)
height = tf.cast(features["height"], tf.int32)
width = tf.cast(features["width"], tf.int32)
image_id = tf.cast(features["image_id"], tf.int32)
image_name = tf.cast(features["image_name"], tf.string)
image = tf.image.decode_jpeg(features["image_jpeg"], channels=3)
image_shape = tf.stack([height, width, 3])
image = tf.cast(tf.reshape(image, image_shape), tf.uint8)
image_info = {"image_id": image_id, "image_name": image_name}
return [image, image_info]
@DATASET_FACTORY.register(name="coco_segmentation")
@DATASET_FACTORY.register(name="cityscapes")
@DATASET_FACTORY.register(name="oxford_pet")
def parse_segmentation_record(serialized_example):
"""Parse serialized example of TfRecord and extract dictionary of all the information"""
features = tf.io.parse_single_example(
serialized_example,
features={
"height": tf.io.FixedLenFeature([], tf.int64),
"width": tf.io.FixedLenFeature([], tf.int64),
"xmin": tf.io.VarLenFeature(tf.float32),
"xmax": tf.io.VarLenFeature(tf.float32),
"ymin": tf.io.VarLenFeature(tf.float32),
"ymax": tf.io.VarLenFeature(tf.float32),
"category_id": tf.io.VarLenFeature(tf.int64),
"image_name": tf.io.FixedLenFeature([], tf.string),
"mask": tf.io.FixedLenFeature([], tf.string),
"image_jpeg": tf.io.FixedLenFeature([], tf.string),
},
)
height = tf.cast(features["height"], tf.int32)
width = tf.cast(features["width"], tf.int32)
image_name = tf.cast(features["image_name"], tf.string)
image = tf.image.decode_jpeg(features["image_jpeg"], channels=3)
mask = tf.io.decode_raw(features["mask"], tf.uint8)
image_shape = tf.stack([height, width, 3])
mask_shape = tf.stack([height, width, 1])
image = tf.cast(tf.reshape(image, image_shape), tf.uint8)
mask = tf.cast(tf.reshape(mask, mask_shape), tf.uint8)
image_info = {"image_name": image_name, "mask": mask}
return [image, image_info]
@DATASET_FACTORY.register(name="coco_detection")
@DATASET_FACTORY.register(name="ids3cls_custom")
@DATASET_FACTORY.register(name="open_images")
@DATASET_FACTORY.register(name="visdrone_detection")
@DATASET_FACTORY.register(name="d2s_detection")
@DATASET_FACTORY.register(name="d2s_fruits_detection")
@DATASET_FACTORY.register(name="coco_2017_detection")
@DATASET_FACTORY.register(name="vehicle_detection")
@DATASET_FACTORY.register(name="license_plates")
@DATASET_FACTORY.register(name="personface_detection")
def parse_detection_record(serialized_example):
"""Parse serialized example of TfRecord and extract dictionary of all the information"""
features = tf.io.parse_single_example(
serialized_example,
features={
"height": tf.io.FixedLenFeature([], tf.int64),
"width": tf.io.FixedLenFeature([], tf.int64),
"image_id": tf.io.FixedLenFeature([], tf.int64),
"xmin": tf.io.VarLenFeature(tf.float32),
"xmax": tf.io.VarLenFeature(tf.float32),
"ymin": tf.io.VarLenFeature(tf.float32),
"ymax": tf.io.VarLenFeature(tf.float32),
"area": tf.io.VarLenFeature(tf.float32),
"category_id": tf.io.VarLenFeature(tf.int64),
"is_crowd": tf.io.VarLenFeature(tf.int64),
"num_boxes": tf.io.FixedLenFeature([], tf.int64),
"image_name": tf.io.FixedLenFeature([], tf.string),
"image_jpeg": tf.io.FixedLenFeature([], tf.string),
},
)
height = tf.cast(features["height"], tf.int32)
width = tf.cast(features["width"], tf.int32)
image_id = tf.cast(features["image_id"], tf.int32)
image_name = tf.cast(features["image_name"], tf.string)
image = tf.image.decode_jpeg(features["image_jpeg"], channels=3)
image_shape = tf.stack([height, width, 3])
image = tf.cast(tf.reshape(image, image_shape), tf.uint8)
image_info = {"image_name": image_name}
image_info["height"] = height
image_info["width"] = width
image_info["image_id"] = image_id
image_info["num_boxes"] = tf.cast(features["num_boxes"], tf.int32)
image_info["is_crowd"] = tf.sparse.to_dense(features["is_crowd"], default_value=0)
image_info["xmin"] = tf.sparse.to_dense(features["xmin"], default_value=0)
image_info["xmax"] = tf.sparse.to_dense(features["xmax"], default_value=0)
image_info["ymin"] = tf.sparse.to_dense(features["ymin"], default_value=0)
image_info["ymax"] = tf.sparse.to_dense(features["ymax"], default_value=0)
image_info["area"] = tf.sparse.to_dense(features["area"], default_value=0)
image_info["category_id"] = tf.sparse.to_dense(features["category_id"], default_value=0)
return [image, image_info]
5. Reran Full Pipeline (from scratch):
hailomz parse --hw-arch hailo8l --ckpt ids3cls.onnx ids3cls
hailomz optimize --hw-arch hailo8l --har ./ids3cls.har ids3cls
hailomz compile ids3cls --hw-arch hailo8l --har ./ids3cls.har
Still Not Working
Even after all these steps, the label mapping during inference and evaluation still defaults to COCO. I confirmed that the detections (bounding boxes) are correct, but the label names are wrong.
My Question Now:
Is there another hardcoded fallback (e.g., inside compiled .hef, or within HailoRT API defaults) that could still override postprocess_config_file?
Also, do I need to include class_names: under postprocessing in YAML in addition to using the postprocess config?
Kindly help me through out this process could you verify the .yaml and the .json files used and kindly let me know what exactly i done wrong
Let me know if you’d like me to share folder structure, full YAML config, or video evidence.
Thanks again for the ongoing support!
Best regards,
Ragul M