Michael Abrams
Michael Abrams

Reputation: 31

trying to track an object, bounding box jitters

I am trying to write a python script that can convert a 16:9 video into a 9:16 while keeping the region of interest in frame. I am having trouble on how to do this in a well manner. My current approach is to use YOLO object reconition every 120 frames and crop around it. This works for the most part, but the end video is very choppy as there is no transition between the frames. How can I go about fixing this, or is there a better way to accomplish this task?

from moviepy.editor import VideoFileClip
from ultralytics import YOLO
import numpy as np
import cv2

model = YOLO("yolov8n.pt")
clip = VideoFileClip("Mack Falls Off Cliff.mp4")


def apply_mask(frame, bbox):
    height, width, _ = frame.shape
    x1, y1, x2, y2 = [int(val) for val in bbox]

    # Calculate the aspect ratio of the bounding box
    bbox_width = x2 - x1
    bbox_height = y2 - y1
    bbox_aspect_ratio = bbox_width / bbox_height

    # Determine the crop region based on the desired 9:16 aspect ratio
    if bbox_aspect_ratio > 9 / 16:
        # Crop horizontally
        new_width = int(bbox_height * (9 / 16))
        x1 = x1 + int((bbox_width - new_width) / 2)
        x2 = x1 + new_width
    else:
        # Crop vertically
        new_height = int(bbox_width * (16 / 9))
        y1 = y1 + int((bbox_height - new_height) / 2)
        y2 = y1 + new_height

    # Extract the cropped region and resize to the desired 9:16 resolution
    cropped_frame = frame[y1:y2, x1:x2]
    masked_frame = cv2.resize(cropped_frame, (720, 1280))
    return masked_frame


prev_bbox = None
frame_count = 0
crop_interval = 120


def process_frame(frame):
    global prev_bbox, frame_count

    if frame_count % crop_interval == 0:
        results = model(frame)
        bboxes = results[0].boxes.xyxy.cpu().numpy()

        if len(bboxes) > 0:
            bbox = max(bboxes, key=lambda x: (x[2] - x[0]) * (x[3] - x[1]))
            if prev_bbox is not None:
                bbox = smooth_bbox(prev_bbox, bbox)
            prev_bbox = bbox
            masked_frame = apply_mask(frame, bbox)
        else:
            if prev_bbox is not None:
                masked_frame = apply_mask(frame, prev_bbox)
            else:
                masked_frame = frame
    else:
        if prev_bbox is not None:
            masked_frame = apply_mask(frame, prev_bbox)
        else:
            masked_frame = frame

    frame_count += 1
    return masked_frame


def smooth_bbox(prev_bbox, curr_bbox, smoothing_factor=0.95):
    smooth_bbox = [
        int(prev_val * smoothing_factor + curr_val * (1 - smoothing_factor))
        for prev_val, curr_val in zip(prev_bbox, curr_bbox)
    ]
    return smooth_bbox


processed_clip = clip.fl_image(process_frame)
processed_clip.write_videofile("output_video1.mp4")

Upvotes: 0

Views: 185

Answers (0)

Related Questions