HAILOPYTHON -- How to get the image detected in the ROI

I’m trying to get the image from the ROI for Plate Number OCR

import hailo
# Importing VideoFrame before importing GST is must
from gsthailo import VideoFrame
from gi.repository import Gst
import cv2
import numpy as np
#from paddleocr import PaddleOCR

# Initialize PaddleOCR
#ocr = PaddleOCR(use_angle_cls=True, lang='en')  # Initialize OCR with English language

def process_detection_and_visualize_with_map_buffer(video_frame, bbox, label="object", confidence=0.0):
    """
    Process detection, convert normalized bbox to pixel coordinates, and draw on the frame using map_buffer.

    Parameters:
    - video_frame: The video frame containing the buffer.
    - bbox: Bounding box in normalized format (xmin, ymin, width, height).
    - label: Detection label (e.g., "license_plate").
    - confidence: Detection confidence score.

    Returns:
    - frame: NumPy array with bounding box and label drawn on it.
    """
    # Map the buffer to retrieve the frame data
    success, data, video_width, video_height = video_frame.map_buffer()
    if not success:
        raise RuntimeError("Failed to map the video frame buffer.")

    try:
        # Convert the mapped buffer to a NumPy array
        frame = np.frombuffer(data, dtype=np.uint8).reshape((video_height, video_width, 3))

        # Extract normalized bbox values
        xmin, ymin, width, height = bbox

        # Convert normalized bbox to pixel values
        x1 = int(xmin * video_width)
        y1 = int(ymin * video_height)
        x2 = int((xmin + width) * video_width)
        y2 = int((ymin + height) * video_height)

        # Draw the bounding box
        cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)

        # Prepare the label text
        display_label = f"{label} {confidence:.2f}"
        
        # Ensure label text doesn't overflow the frame boundaries
        text_x = max(0, x1)
        text_y = max(0, y1 - 10)
        
        # Draw the label text
        cv2.putText(frame, display_label, (text_x, text_y), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)

        return frame
    finally:
        # Unmap the buffer to release resources
        video_frame.unmap_buffer()





def run(video_frame: VideoFrame):
       
    try:
        video_info = video_frame.video_info
        #print(f"Video Info - Width: {video_info.width}, Height: {video_info.height}, Frame Size: {video_info.size}")
        #print(f"ROI Objects: {video_frame.roi.get_objects()}")
        
        
        video_info_from_caps = video_frame._video_info_from_caps
        
        #print(dir(video_info.from_caps))
        #print(f"Video Info - Width: {video_info_from_caps.width}, Height: {video_info_from_caps.height}, Frame Size: {video_info.size}")
        #print(f"ROI Objects: {video_frame.roi.get_objects()}")
        
        
        # caps = Gst.Caps.from_string("video/x-raw, format=RGB, width=640, height=480")
        # video_frame.video_info = video_info.from_caps(caps)

        objects = video_frame.roi.get_objects()
        #print(f"Number of objects in ROI: {len(objects)}")
        for obj in objects:
            
            #print(dir(obj))
            
            #print(f"TYPE : {obj.get_type()}")
            what = obj.get_type()
            
            #print(f"TYPE What : {what}")
            if "hailo_object_t.HAILO_DETECTION" == f"{what}":
                #print("Object is a detection")  
                bbox = obj.get_bbox()        
                
                detections = obj.get_objects_typed(hailo.HAILO_DETECTION)
                
                print(dir(detections))
                print(video_info.finfo)   
                
                #print(dir(video_info.from_caps()))
                #print(dir(video_info.new_from_caps()))
                
                
                # Call the methods to retrieve values
                xmin = bbox.xmin()
                ymin = bbox.ymin()
                width = bbox.width()
                height = bbox.height()

                label = obj.get_label()  # Assuming get_label() exists
                confidence = obj.get_confidence()  # Assuming get_confidence() exists

                print(f"Object detected: Label={label}, Confidence={confidence}")
                print(f"Bounding Box: xmin={xmin}, ymin={ymin}, width={width}, height={height}")                        
                
                # Extract the frame as a NumPy array
                #image = video_frame.buffer
                
                #print(dir(image))
                
                #width, height = 640, 480  # Replace with actual frame dimensions
                #frame = extract_frame_from_buffer(video_frame, width, height)
                video_width = 1536  # Frame width
                video_height = 864  # Frame height
                #frame_with_detections = process_detection_and_visualize_with_map_buffer(video_frame, video_width, video_height, bbox)


                # Define offsets
                x_offset = 0.05  # Example offset for x (positive to move right)
                y_offset = 0.05  # Example offset for y (positive to move down)

                # Apply offsets
                offset_xmin = xmin + x_offset
                offset_ymin = ymin + y_offset

                # Ensure the offsets remain within valid bounds (0.0 to 1.0 for normalized values)
                offset_xmin = max(0.0, min(1.0, offset_xmin))
                offset_ymin = max(0.0, min(1.0, offset_ymin))

                #ocr_results = ocr.ocr(preprocessed_plate, cls=True)
                
                # Create a new bounding box with the offsets
                #car_bbox = hailo.HailoBBox(xmin=offset_xmin, ymin=offset_ymin, width=width, height=height)

                #car = hailo.HailoDetection(bbox=car_bbox, label='Car', confidence=confidence)
                #video_frame.roi.add_object(car)

                # Draw on the frame
                # cv2.rectangle(frame, (xmin, ymin), (xmax, ymax), (0, 255, 0), 2)
                # cv2.putText(frame, f"{label} ({confidence:.2f})", (xmin, ymin - 10),
                #             cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 1)
            
                # Ensure the frame is mapped and accessible
                # with video_frame.map_buffer() as map_info:
                #     frame = video_frame.numpy_array_from_buffer(map_info)
                #     print("Frame received and processed.")

        return Gst.FlowReturn.OK
    except Exception as e:
        print(f"Error in hailopython module: {e}")
        return Gst.FlowReturn.ERROR

Hey @joms

  1. Use video_frame.map_buffer() to map the video frame buffer to a NumPy array. This enables access to the frame data for processing.
  2. Ensure that the bounding boxes (bbox) for detected objects are in a normalized format (xmin, ymin, width, height). Normalized bounding boxes represent relative coordinates and dimensions, ranging from 0 to 1.
  3. Convert the normalized bounding boxes to pixel coordinates by multiplying the normalized values by the frame’s width and height. This step is essential to locate and extract the ROI.
  4. Extract the ROI using the converted pixel coordinates. You can achieve this using NumPy array slicing.
  5. Perform additional processing or analysis on the extracted ROI as required.
  6. Finally, release the mapped memory by calling video_frame.unmap_buffer().
import hailo
from gsthailo import VideoFrame
from gi.repository import Gst
import cv2
import numpy as np

def process_bbox(video_frame, bbox):
    """
    Extracts and processes the Region of Interest (ROI) from a video frame based on the bounding box.

    Parameters:
    - video_frame: The video frame containing the buffer.
    - bbox: Bounding box in normalized format (xmin, ymin, width, height).

    Returns:
    - roi_image: NumPy array containing the ROI image.
    """
    success, data, video_width, video_height = video_frame.map_buffer()
    if not success:
        raise RuntimeError("Failed to map the video frame buffer.")
    
    try:
        # Convert buffer to NumPy array
        frame = np.frombuffer(data, dtype=np.uint8).reshape((video_height, video_width, 3))
        
        # Convert normalized bbox to pixel coordinates
        xmin, ymin, width, height = bbox
        x1 = int(xmin * video_width)
        y1 = int(ymin * video_height)
        x2 = int((xmin + width) * video_width)
        y2 = int((ymin + height) * video_height)
        
        # Ensure the bbox is within the frame boundaries
        x1, y1, x2, y2 = max(0, x1), max(0, y1), min(video_width, x2), min(video_height, y2)
        
        # Extract ROI
        roi_image = frame[y1:y2, x1:x2]
        
        return roi_image
    
    finally:
        # Unmap buffer
        video_frame.unmap_buffer()

def run(video_frame: VideoFrame):
    """
    Processes a video frame to extract and process ROIs for detected objects.

    Parameters:
    - video_frame: The video frame containing detected objects.

    Returns:
    - Gst.FlowReturn.OK on success, Gst.FlowReturn.ERROR on failure.
    """
    try:
        objects = video_frame.roi.get_objects()
        num_objects = len(objects)
        
        print(f"Number of objects in ROI: {num_objects}")
        
        for obj in objects:
            if obj.get_type() == hailo.HAILO_DETECTION:
                bbox = obj.get_bbox()
                
                # Retrieve label and confidence
                label = obj.get_label()
                confidence = obj.get_confidence()
                
                print(f"Detected: {label} with confidence: {confidence:.2f}")
                print(f"BBox: {bbox.xmin():.2f}, {bbox.ymin():.2f}, {bbox.width():.2f}, {bbox.height():.2f}")
                
                # Extract and process the ROI image
                roi_image = process_bbox(video_frame, (bbox.xmin(), bbox.ymin(), bbox.width(), bbox.height()))
                
                # Optionally save or visualize the ROI image
                cv2.imshow(f"{label} - {confidence:.2f}", roi_image)
                cv2.waitKey(1)  # Allow the display window to refresh and handle events
        
        return Gst.FlowReturn.OK
    
    except Exception as e:
        print(f"Error processing frame: {e}")
        return Gst.FlowReturn.ERROR

Note that the code is untested and might require adjustments for your specific setup.

Let me know if you have further questions or need additional assistance!

This part of the code always video_width = 0 and video_height = 0

 success, data, video_width, video_height = video_frame.map_buffer()

This is my solution

buffer = video_frame.buffer
success, map_info = buffer.map(Gst.MapFlags.READ)
data = np.frombuffer(map_info.data, dtype=np.uint8)
buffer_size = len(data)
# Assume 3 channels (RGB)
channels = 3
# Dynamically infer width and height
inferred_height = int((buffer_size / channels) ** 0.5)
inferred_width = int(buffer_size / (inferred_height * channels))

I am trying to get image from the ROI for object detection with using yolov8s hef model and postprocessing library. I need image data so that I can draw a line and count the objects that crossed it.

pipeline="$gst_top_command gst-launch-1.0 \
     hailoroundrobin mode=0 name=fun ! \
     queue name=hailo_pre_infer_q_0 leaky=downstream max-size-buffers=5 max-size-bytes=0 max-size-time=0 ! \
     hailonet hef-path=$HEF_PATH nms-score-threshold=0.3 nms-iou-threshold=0.45 output-format-type=HAILO_FORMAT_TYPE_FLOAT32 ! \
     queue name=hailo_postprocess0 leaky=no max-size-buffers=30 max-size-bytes=0 max-size-time=0 ! \
     hailofilter so-path=$POSTPROCESS_SO qos=false ! \
     hailotracker name=tracker ! \
     queue name=hailo_draw0 leaky=no max-size-buffers=30 max-size-bytes=0 max-size-time=0 ! \
     hailooverlay ! hailostreamrouter name=sid $streamrouter_input_streams \
     compositor name=comp start-time-selection=0 $compositor_locations ! videoscale n-threads=8 name=disp_scale ! video/x-raw,width=$screen_width,height=$screen_height ! \
     fpsdisplaysink video-sink='$video_sink_element' name=hailo_display sync=false text-overlay=false \
     $rtsp_sources ${additional_parameters}"

Please help me.

void filter(HailoROIPtr roi, void *params_void_ptr)
{
if (!roi->has_tensors())
{
return;
}
// Retrieve parameters for NMS (Non-Maximum Suppression) from the void pointer
YoloParamsNMS *params = reinterpret_cast<YoloParamsNMS *>(params_void_ptr);

// Retrieve tensors from ROI
std::vector<HailoTensorPtr> tensors = roi->get_tensors();
int num_detections = 0;
static int count_crossing_line = 0; // Persistent counter for objects crossing the line

// Define a normalized line (horizontal) for counting objects
const float LINE_Y_NORM = 0.5; // Normalized y-coordinate (between 0 and 1) for the line
const cv::Scalar LINE_COLOR = cv::Scalar(0, 0, 255); // Red line color (BGR)
const int LINE_THICKNESS = 2;

// Get the dimensions of the ROI's bounding box
HailoBBox roi_bbox = hailo_common::create_flattened_bbox(roi->get_bbox(), roi->get_scaling_bbox());
int img_width = roi_bbox.width();
int img_height = roi_bbox.height();
float LINE_Y = LINE_Y_NORM * img_height; // Convert normalized y to pixel coordinates

// Access the image data from the ROI (Assume it's stored in roi->get_image_data() or similar)
// Modify this depending on how the image data is accessed
/*
auto image_data = roi->get_tensor(DEFAULT_YOLOV8S_OUTPUT_LAYER);  // Change this to the correct method to get image data
if (image_data == nullptr)
{
    std::cerr << "Failed to access image data" << std::endl;
    return;
}

// Create an OpenCV image from the raw data
cv::Mat image(cv::Size(img_width, img_height), CV_8UC3, image_data);

// Draw the line on the image (for object crossing detection)
cv::line(image, cv::Point(0, LINE_Y), cv::Point(img_width, LINE_Y), LINE_COLOR, LINE_THICKNESS);
*/ 

// Loop through tensors to find detections (NMS post-processing)
for (auto tensor : tensors)
{
    if (std::regex_search(tensor->name(), std::regex("nms_postprocess")))
    {
        // Decode the detections using NMS post-processing
        auto post = HailoNMSDecode(tensor, params->labels, params->detection_threshold, params->max_boxes, params->filter_by_score);
        auto detections = post.decode<float32_t, common::hailo_bbox_float32_t>();

        // Loop through each detection and process it
        for (auto &detection : detections)
        {
            if (detection.get_confidence() == 0)
            {
                continue; // Skip detections with no confidence
            }

            // Extract the label and bounding box of the detection
            std::string label = detection.get_label();
            HailoBBox bbox = detection.get_bbox();

            // Calculate the center of the bounding box (for line crossing detection)
            float center_y = bbox.ymin() + (bbox.height() / 2);

            // Track objects that cross the line
            static std::map<std::string, bool> already_counted; // Prevent double-counting
            if (center_y > LINE_Y && !already_counted[label])
            {
                count_crossing_line++; // Increment counter when object crosses the line
                already_counted[label] = true; // Mark the object as counted
                std::cout << "Object crossed the line: " << label << ", Count: " << count_crossing_line << std::endl;
            }
            else if (center_y < LINE_Y && already_counted[label])
            {
                already_counted[label] = false; // Reset the count when the object moves above the line
            }

            // Print detection details
            std::cout << "Detection: " << label << ", Confidence: " 
                      << std::fixed << std::setprecision(2) 
                      << detection.get_confidence() * 100.0 << "%" << std::endl;

            if (label == "person") // Count persons separately
            {
                num_detections++;
            }
        }

        // Output the total number of persons detected
        std::cout << "Persons detected: " << num_detections << std::endl;

        // Add the detections back to the ROI (if needed for further processing)
        hailo_common::add_detections(roi, detections);
    }
}

}

this is a working code in python using callback feature of GStreamer to get all the ROI images from the detection :

def app_callback(pad, info):
    # Get the GstBuffer from the probe info
    #print(dir(pad.get_current_caps()))
    
    caps = pad.get_current_caps()
    if caps:
        # We can now extract information from the caps
        structure = caps.get_structure(0)
        if structure:
            # Extracting some common properties
            test_format = structure.get_value('format')
            inferred_width = structure.get_value('width')
            inferred_height = structure.get_value('height')
            #print(f"Format: {test_format}, Width: {inferred_width}, Height: {inferred_height}")
    
    
    buffer = info.get_buffer()
    if buffer is None:
        return Gst.PadProbeReturn.OK

    success, map_info = buffer.map(Gst.MapFlags.READ)
    if not success:
        raise RuntimeError("Failed to map the GStreamer buffer.")

    try:
       # Get the detections from the buffer
       roi = hailo.get_roi_from_buffer(buffer)
       stream_id = roi.get_stream_id()
       detections = roi.get_objects_typed(hailo.HAILO_DETECTION)
       for detection in detections:
           #print(dir(detection))
           bbox = detection.get_bbox()
           #print(f"Detected object: {bbox}")
           xmin = bbox.xmin()
           ymin = bbox.ymin()
           width = bbox.width()
           height = bbox.height()
           print(f"Detected object {stream_id} : {xmin}, {ymin}, {width}, {height}")
           
           # Convert buffer data to a NumPy array
           data = np.frombuffer(map_info.data, dtype=np.uint8)
           #buffer_size = len(data)

           # Assume 3 channels (RGB)
           channels = 3
           # Reshape buffer into a frame and make it writable
           frame = data.reshape((inferred_height, inferred_width, channels))
           frame = np.copy(frame)  # Create a writable copy of the frame

           # Extract bbox details
           xmin = bbox.xmin()
           ymin = bbox.ymin()
           bbox_width = bbox.width()
           bbox_height = bbox.height()

           # Convert normalized bbox to pixel values
           x1 = int(xmin * inferred_width)
           y1 = int(ymin * inferred_height)
           x2 = int((xmin + bbox_width) * inferred_width)
           y2 = int((ymin + bbox_height) * inferred_height)

           # Ensure coordinates are within image boundaries
           x1, y1 = max(0, x1), max(0, y1)
           x2, y2 = min(inferred_width, x2), min(inferred_height, y2)
        
           # Define folder names and ensure they exist
           full_frame_folder = "license_plate_captured"
           cropped_folder = "license_plate_captured"
           os.makedirs(full_frame_folder, exist_ok=True)
           os.makedirs(cropped_folder, exist_ok=True)

           # Crop and save the license plate
           cropped_plate = frame[y1:y2, x1:x2]
        
           # Crop and save the license plate
           cropped_plate_filename = os.path.join(cropped_folder, f"license_plate_{uuid.uuid4().hex}.jpg")
           #cv2.imwrite(cropped_plate_filename, cropped_plate)
           #print(f"Cropped license plate saved to {cropped_plate_filename}")
        
           # Draw the bounding box on the frame
           cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)

           # Add label and confidence
           display_label = "License Plate Detected"
           cv2.putText(frame, display_label, (x1, max(0, y1 - 10)),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
      
           # Save the full frame with bounding box
           full_frame_filename = os.path.join(full_frame_folder, f"car_{uuid.uuid4().hex}.jpg")
           #cv2.imwrite(full_frame_filename, frame)
           #print(f"Full frame saved to {full_frame_filename}")      
    finally:
        # Unmap the buffer
        buffer.unmap(map_info)

    return Gst.PadProbeReturn.OK

Hi @joms,
I am also trying to use the hailopython with my own python module, but getting below issue.
jkrpi5@jk:~ $ gst-launch-1.0 videotestsrc ! hailopython module=/home/jkrpi5/my_module.py qos=false ! autovideosink
Setting pipeline to PAUSED …
Pipeline is PREROLLING …
Fatal Python error: init_threadstate: thread state already initialized
Python runtime state: initialized

Thread 0x00007ffecc02cc00 (most recent call first):

Aborted

I followed the below link:

Hardware details below:
hailortcli fw-control identify
Executing on device: 0000:01:00.0
Identifying board
Control Protocol Version: 2
Firmware Version: 4.18.0 (release,app,extended context switch buffer)
Logger Version: 0
Board Name: Hailo-8
Device Architecture: HAILO8L
Serial Number: HLDDLBB242601548
Part Number: HM21LB1C2LAE
Product Name: HAILO-8L AI ACC M.2 B+M KEY MODULE EXT TMP

It seems hailopython worked for you. Am I missing anything.?
Appreciate your help.