Reputation: 31
I am trying to write a python script that can convert a 16:9 video into a 9:16 while keeping the region of interest in frame. I am having trouble on how to do this in a well manner. My current approach is to use YOLO object reconition every 120 frames and crop around it. This works for the most part, but the end video is very choppy as there is no transition between the frames. How can I go about fixing this, or is there a better way to accomplish this task?
from moviepy.editor import VideoFileClip
from ultralytics import YOLO
import numpy as np
import cv2
model = YOLO("yolov8n.pt")
clip = VideoFileClip("Mack Falls Off Cliff.mp4")
def apply_mask(frame, bbox):
height, width, _ = frame.shape
x1, y1, x2, y2 = [int(val) for val in bbox]
# Calculate the aspect ratio of the bounding box
bbox_width = x2 - x1
bbox_height = y2 - y1
bbox_aspect_ratio = bbox_width / bbox_height
# Determine the crop region based on the desired 9:16 aspect ratio
if bbox_aspect_ratio > 9 / 16:
# Crop horizontally
new_width = int(bbox_height * (9 / 16))
x1 = x1 + int((bbox_width - new_width) / 2)
x2 = x1 + new_width
else:
# Crop vertically
new_height = int(bbox_width * (16 / 9))
y1 = y1 + int((bbox_height - new_height) / 2)
y2 = y1 + new_height
# Extract the cropped region and resize to the desired 9:16 resolution
cropped_frame = frame[y1:y2, x1:x2]
masked_frame = cv2.resize(cropped_frame, (720, 1280))
return masked_frame
prev_bbox = None
frame_count = 0
crop_interval = 120
def process_frame(frame):
global prev_bbox, frame_count
if frame_count % crop_interval == 0:
results = model(frame)
bboxes = results[0].boxes.xyxy.cpu().numpy()
if len(bboxes) > 0:
bbox = max(bboxes, key=lambda x: (x[2] - x[0]) * (x[3] - x[1]))
if prev_bbox is not None:
bbox = smooth_bbox(prev_bbox, bbox)
prev_bbox = bbox
masked_frame = apply_mask(frame, bbox)
else:
if prev_bbox is not None:
masked_frame = apply_mask(frame, prev_bbox)
else:
masked_frame = frame
else:
if prev_bbox is not None:
masked_frame = apply_mask(frame, prev_bbox)
else:
masked_frame = frame
frame_count += 1
return masked_frame
def smooth_bbox(prev_bbox, curr_bbox, smoothing_factor=0.95):
smooth_bbox = [
int(prev_val * smoothing_factor + curr_val * (1 - smoothing_factor))
for prev_val, curr_val in zip(prev_bbox, curr_bbox)
]
return smooth_bbox
processed_clip = clip.fl_image(process_frame)
processed_clip.write_videofile("output_video1.mp4")
Upvotes: 0
Views: 185