How to get original frame from GStreamer pipeline

 def get_pipeline_string(self):
        if self.source_type == "rpi":
            source_element = (
                "libcamerasrc name=src_0 ! "
                f"video/x-raw, format={self.network_format}, width=1536, height=864 ! "
                + QUEUE("queue_src_scale")
                + "videoscale ! "
                f"video/x-raw, format={self.network_format}, width={self.network_width}, height={self.network_height}, framerate=30/1 ! "
            )
        elif self.source_type == "usb":
            source_element = (
                f"v4l2src device={self.video_source} name=src_0 ! "
                "video/x-raw, width=640, height=480, framerate=30/1 ! "
            )
        else:
            source_element = (
                f"filesrc location=\"{self.video_source}\" name=src_0 ! "
                + QUEUE("queue_dec264")
                + " qtdemux ! h264parse ! avdec_h264 max-threads=2 ! "
                " video/x-raw, format=I420 ! "
            )
        source_element += QUEUE("queue_scale")
        source_element += "videoscale n-threads=2 ! "
        source_element += QUEUE("queue_src_convert")
        source_element += "videoconvert n-threads=3 name=src_convert qos=false ! "
        source_element += f"video/x-raw, format={self.network_format}, width={self.network_width}, height={self.network_height}, pixel-aspect-ratio=1/1 ! "

        pipeline_string = (
            "hailomuxer name=hmux "
            + source_element
            + "tee name=t ! "
            + QUEUE("bypass_queue", max_size_buffers=20)
            + "hmux.sink_0 "
            + "t. ! "
            + QUEUE("queue_hailonet")
            + "videoconvert n-threads=3 ! "
            f"hailonet hef-path={self.hef_path} batch-size={self.batch_size} {self.thresholds_str} force-writable=true ! "
            + QUEUE("queue_hailofilter")
            + f"hailofilter so-path={self.default_postprocess_so} {self.labels_config} qos=false ! "
            + QUEUE("queue_hmuc")
            + "hmux.sink_1 "
            + "hmux. ! "
            + QUEUE("queue_hailo_python")
            + QUEUE("queue_user_callback")
            + "identity name=identity_callback ! "
            + QUEUE("queue_hailooverlay")
            + "hailooverlay ! "
            + QUEUE("queue_videoconvert")
            + "videoconvert n-threads=3 qos=false ! "
            + QUEUE("queue_hailo_display")
            + f"fpsdisplaysink video-sink={self.video_sink} name=hailo_display sync={self.sync} text-overlay={self.options_menu.show_fps} signal-fps-measurements=true "
        )
        print(pipeline_string)
        return pipeline_string

This is original pipeline available on hailo-rpi5-examples. I want to get or extract original frame that hailo is processing. I tried to use tee elements but it is not extracting me original frame. I want to get original frame here in this app_callback function.

def app_callback(pad, info, user_data):
    # Get the GstBuffer from the probe info
    buffer = info.get_buffer()
    # Check if the buffer is valid
    if buffer is None:
        return Gst.PadProbeReturn.OK

    # Using the user_data to count the number of frames
    user_data.increment()
    string_to_print = f"Frame count: {user_data.get_count()}\n"

    # Get the caps from the pad
    format, width, height = get_caps_from_pad(pad)

    # If the user_data.use_frame is set to True, we can get the video frame from the buffer
    frame = None
    if user_data.use_frame and format is not None and width is not None and height is not None:
        # Get video frame
        frame = get_numpy_from_buffer(buffer, format, width, height)

    # Get the detections from the buffer
    roi = hailo.get_roi_from_buffer(buffer)
    detections = roi.get_objects_typed(hailo.HAILO_DETECTION)

    # Parse the detections
    detection_count = 0
    for detection in detections:
        label = detection.get_label()
        bbox = detection.get_bbox()
        confidence = detection.get_confidence()
        if label == "person":
            string_to_print += f"Detection: {label} {confidence:.2f}\n"
            detection_count += 1
    if user_data.use_frame:
        # Note: using imshow will not work here, as the callback function is not running in the main thread
        # Let's print the detection count to the frame
        cv2.putText(frame, f"Detections: {detection_count}", (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
        # Example of how to use the new_variable and new_function from the user_data
        # Let's print the new_variable and the result of the new_function to the frame
        cv2.putText(frame, f"{user_data.new_function()} {user_data.new_variable}", (10, 60), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
        # Convert the frame to BGR
        frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
        user_data.set_frame(frame)

    print(string_to_print)
    return Gst.PadProbeReturn.OK

How can I do that?

Hi @malikharis3984,

To extract the original frame:

  1. Modify the pipeline:
    • Branch the frame using tee before any transformations
    • Add a queue after tee
    • Attach a pad probe to extract frame data
pipeline_string = (
    "hailomuxer name=hmux "
    + source_element  # This is your original video source element
    + "tee name=frame_tee ! "  # Add a tee element to split the pipeline
    + QUEUE("original_frame_queue")  # Queue for original frame extraction
    + "identity name=original_frame_identity ! "  # Identity element to easily probe this point
    + QUEUE("queue_scale")  # Proceed with the rest of the pipeline for Hailo processing
    + "videoscale n-threads=2 ! "
    + QUEUE("queue_src_convert")
    + "videoconvert n-threads=3 name=src_convert qos=false ! "
    + f"video/x-raw, format={self.network_format}, width={self.network_width}, height={self.network_height}, pixel-aspect-ratio=1/1 ! "
    # Continue the rest of the pipeline...
    # More elements like hailonet, hailofilter, etc.
)

  1. Implement the extraction:
def add_original_frame_probe(self, pipeline):
    identity = pipeline.get_by_name("original_frame_identity")
    pad = identity.get_static_pad("src")
    pad.add_probe(Gst.PadProbeType.BUFFER, app_callback, user_data)

def app_callback(pad, info, user_data):
    buffer = info.get_buffer()
    if not buffer:
        return Gst.PadProbeReturn.OK
    
    format, width, height = get_caps_from_pad(pad)
    frame = get_numpy_from_buffer(buffer, format, width, height) if all([format, width, height]) else None
    
    if frame is not None:
        frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
        cv2.imwrite('original_frame.jpg', frame)
    
    return Gst.PadProbeReturn.OK

Note: To display frames using cv2.imshow in app_callback, run it in a separate thread using a queue to pass frames, as the callback doesn’t run in the main thread.

Regards

I made changes in pipeline as you told and implemented functions and same logic of app_callback as you told me and it working fine. It is giving me original frame 1536x864. The issue is that my detections are not working as it was before. I want to get bounding boxes of 640x640 and mapped them on original frame so that I can crop the portion from original image on which i got detection not from 640x640. Is there any solution for this if so please guide me?

Here’s an updated approach:

  1. Calculate Scaling Factors:
original_width, original_height = 1536, 864
processed_width, processed_height = 640, 640

scale_x = original_width / processed_width
scale_y = original_height / processed_height
  1. Modify the app_callback function to scale bounding boxes:
def app_callback(pad, info, user_data):
    buffer = info.get_buffer()
    if buffer is None:
        return Gst.PadProbeReturn.OK

    user_data.increment()
    original_frame = user_data.get_original_frame()
    
    roi = hailo.get_roi_from_buffer(buffer)
    detections = roi.get_objects_typed(hailo.HAILO_DETECTION)
    scaled_detections = []

    for detection in detections:
        label = detection.get_label()
        bbox = detection.get_bbox()
        confidence = detection.get_confidence()

        # Scale bounding box coordinates
        x1 = int(bbox.xmin * scale_x)
        y1 = int(bbox.ymin * scale_y)
        x2 = int(bbox.xmax * scale_x)
        y2 = int(bbox.ymax * scale_y)

        scaled_detections.append({
            'label': label,
            'confidence': confidence,
            'bbox': (x1, y1, x2, y2)
        })

    # Process detections
    if original_frame is not None:
        for det in scaled_detections:
            if det['label'] == "person":
                x1, y1, x2, y2 = det['bbox']
                cropped = original_frame[y1:y2, x1:x2]

                # Optionally save or display the cropped image
                # cv2.imwrite(f"person_{user_data.get_count()}.jpg", cropped)
                # cv2.imshow(f"Detection {user_data.get_count()}", cropped)
                # cv2.waitKey(1)

                print(f"Detection: {det['label']} {det['confidence']:.2f}")

    return Gst.PadProbeReturn.OK

Explanation:

  • The scaling factors adjust bounding boxes from the processed frame (640x640) to the original frame size.
  • For each detection (focusing on “person”), it crops the detected region from the original frame.
  • You can save, display, or further process the cropped images.

Notes:

  • Ensure boundary checks to prevent the scaled coordinates from exceeding the original frame’s bounds.
  • This approach assumes similar aspect ratios for the processed and original frames; adjust if necessary.
  • Processing high-resolution frames in real-time can be resource-intensive, so consider performance optimizations depending on your hardware.

how do I get the original frame at this place that’s the main issue. like your implementation of get_original_frame() that what I want. I know well about scaling but I am getting issues getting original frame here.

Where is “here”? Let’s clarify a few points so I can better assist you:

  1. Is the code I provided about extracting the original frame working (as extraction only) and not the boxes? The boxes are more of a post-processing step and not part of the main pipeline.

  2. If you want mapping to the original frame, then I believe using the scaling factor is the way to go.

  3. However, what do you mean when you say that you are having issues getting the original frame out? Could you please provide more details about the specific problem you’re encountering?

By answering these questions and providing more context, I’ll be able to offer more targeted advice and help you resolve the issues you’re facing.