Reputation: 27
I'm trying to allow the user to export the scene as an mp4 (video format), the items of the scene consists of QGraphicsVideoItem and multiple QGraphicsTextItem, I need to export the scene as it will allow the user to save the video with the text items. I've found one of the ways to do this but the issue is that it will take hours for a simple 5 seconds videos as it saves every image to a byte to create a video, every image is a millisecond. If I change from millisecond to seconds it could speed up but the video will not look as smooth, is there a more efficient way of doing this, without it taking so long?
from PySide6.QtCore import *
from PySide6.QtGui import *
from PySide6.QtWidgets import *
from PySide6.QtSvgWidgets import *
from PySide6.QtMultimediaWidgets import QGraphicsVideoItem
from PySide6.QtMultimedia import QMediaPlayer, QAudioOutput, QMediaMetaData
import subprocess
import sys
class ExportVideo(QThread):
def __init__(self, video_item, video_player, graphics_scene, graphics_view):
super().__init__()
self.video_item = video_item
self.video_player = video_player
self.graphics_scene = graphics_scene
self.graphics_view = graphics_view
def run(self):
self.video_player.pause()
duration = self.video_player.duration()
meta = self.video_player.metaData()
# Prepare a pipe for ffmpeg to write to
ffmpeg_process = subprocess.Popen(['ffmpeg', '-y', '-f', 'image2pipe', '-r', '1000', '-i', '-', '-c:v', 'libx265', '-pix_fmt', 'yuv420p', 'output.mp4'], stdin=subprocess.PIPE)
for duration in range(0, duration):
self.video_player.setPosition(duration)
# Add logic to render the frame here
print("Exporting frame:", duration)
image = QImage(self.graphics_scene.sceneRect().size().toSize(), QImage.Format_ARGB32)
painter = QPainter(image)
self.graphics_scene.render(painter)
painter.end()
# Convert QImage to bytes
byte_array = QByteArray()
buffer = QBuffer(byte_array)
buffer.open(QIODevice.WriteOnly)
image.save(buffer, 'JPEG')
# Write image bytes to ffmpeg process
ffmpeg_process.stdin.write(byte_array.data())
# Close the pipe to signal ffmpeg that all frames have been processed
ffmpeg_process.stdin.close()
ffmpeg_process.wait()
class PyVideoPlayer(QWidget):
def __init__(self):
super().__init__()
self.text_data = []
self.mediaPlayer = QMediaPlayer()
self.audioOutput = QAudioOutput()
self.graphics_view = QGraphicsView()
self.graphic_scene = QGraphicsScene()
self.graphics_view.setScene(self.graphic_scene)
self.graphic_scene.setBackgroundBrush(Qt.black)
self.graphics_view.setHorizontalScrollBarPolicy(Qt.ScrollBarAlwaysOff)
self.graphics_view.setVerticalScrollBarPolicy(Qt.ScrollBarAlwaysOff)
self.video_item = QGraphicsVideoItem()
self.graphic_scene.addItem(self.video_item)
self.save_video = QPushButton()
layout = QVBoxLayout()
layout.addWidget(self.graphics_view, stretch=1)
layout.addWidget(self.save_video)
self.setLayout(layout)
# Slots Section
self.mediaPlayer.setVideoOutput(self.video_item)
self.mediaPlayer.positionChanged.connect(self.changeVideoPosition)
self.save_video.clicked.connect(self.saveVideo)
def setMedia(self, fileName):
self.mediaPlayer.setSource(QUrl.fromLocalFile(fileName))
self.mediaPlayer.setAudioOutput(self.audioOutput)
self.play()
self.video_item.setSize(self.mediaPlayer.videoSink().videoSize())
self.text_item = QGraphicsTextItem()
self.text_item.setPlainText("Test Dummy")
self.text_item.setDefaultTextColor(Qt.white)
font = QFont()
font.setPointSize(90)
self.text_item.setFont(font)
self.text_item.setPos(self.graphic_scene.sceneRect().x() + self.text_item.boundingRect().width(), self.graphic_scene.sceneRect().center().y() - self.text_item.boundingRect().height())
self.graphic_scene.addItem(self.text_item)
self.text_data.append("Test Dummy")
def play(self):
if self.mediaPlayer.playbackState() == QMediaPlayer.PlaybackState.PlayingState:
self.mediaPlayer.pause()
else:
self.mediaPlayer.play()
def changeVideoPosition(self, duration):
if duration > 1000 and self.text_item.isVisible():
print("Hide Text")
self.text_item.hide()
def resize_graphic_scene(self):
self.graphics_view.fitInView(self.graphic_scene.sceneRect(), Qt.KeepAspectRatio)
def showEvent(self, event):
self.resize_graphic_scene()
def resizeEvent(self, event):
self.resize_graphic_scene()
def saveVideo(self):
self.videoExport = ExportVideo(self.video_item, self.mediaPlayer, self.graphic_scene, self.graphics_view)
self.videoExport.start()
if __name__ == "__main__":
app = QApplication(sys.argv)
window = PyVideoPlayer()
window.setMedia("example.mp4")
window.setGeometry(100, 100, 400, 300) # Set the window size
window.setWindowTitle("QGraphicsView Example")
window.show()
sys.exit(app.exec())
Assertion fctx->async_lock failed at C:/ffmpeg-n6.0/libavcodec/pthread_frame.c:155.
I think it must have to do with setting the video player position.Upvotes: 0
Views: 217
Reputation: 27
The fix I found doesn't use PyQt for the main exporting of the video but rather its all FFMPEG. I knew FFMPEG didn't support Rich Text Format (RTF) so I started to look at other possible ways to overcome this issue, I found out you could use a file format called Advanced SubStation Alpha (.ASS) which lets you control anything to do with the text, such as font, size, color, start / end times and more.
When using .ASS you need to make sure you are formatting the .ASS file correctly otherwise FFMPEG might not write the subtitles how you want them. You also need the text you want to display in the video as a html which can be done by using pyqt text.toHtml()
.
When you've got your text html version location path, you will also need the stroke size / color and start / end of your text, you can also edit it to include the text position. You will have to call the function with your original video.
from bs4 import BeautifulSoup
import subprocess, sys
from PySide6.QtCore import *
from PySide6.QtWidgets import *
class videoExport(QThread):
def __init__(self, text_and_stroke, video_location):
super().__init__()
self.text_and_stroke = text_and_stroke
self.video_location = video_location
def run(self):
dialogues = []
for html_file_path, pos_x, pos_y, stroke_size, stroke_color, start, end in self.text_and_stroke:
with open(html_file_path, 'r') as file:
html_content = file.read()
soup = BeautifulSoup(html_content, 'html.parser')
body_tag = soup.find('body')
if body_tag:
self.styles_attributes = self.retrieve_text_style(body_tag.get('style'))
text_style = self.text_styles(soup, stroke_size, stroke_color)
self.styles_attributes = ",".join(self.styles_attributes)
text_style = "".join(text_style)
new_dialogue = f"""Dialogue: {start},{end},Default,{{\pos({pos_x},{pos_y})}}{text_style}"""
dialogues.append(new_dialogue)
file.close()
create_ass_file = f"""[Script Info]
Title: Video Subtitles
ScriptType: v4.00+
Collisions: Normal
PlayDepth: 0
[V4+ Styles]
Format: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, OutlineColour, BorderStyle, Encoding
Style: Default, {self.styles_attributes},&HFFB0B0,&HFFFF00,&H998877,0,0
Style: Background, {self.styles_attributes},&H00FFFFFF,&H000000FF,&H00000000,3,0
[Events]
Format: Start, End, Style, Text
"""
for dialogue in dialogues:
create_ass_file += f"{dialogue}\n"
with open("new_ass.ass", 'w') as file:
file.write(create_ass_file)
file.close()
self.add_subtitle_to_video(self.video_location, "new_ass.ass")
def add_subtitle_to_video(self, video_file, ass_file):
video_text = [
"ffmpeg",
"-y",
"-i", video_file,
"-vf", f"subtitles={ass_file}",
"-c:a", "copy",
"output.mp4"
]
process = subprocess.Popen(video_text, stdout=subprocess.PIPE, stderr=subprocess.STDOUT,universal_newlines=True)
# ADDS ANY TEXT STYLE TO THE TEXT
# ///////////////////////////////////////////////////////////////
def text_styles(self, soup, stroke_size, stroke_color):
text_styles = []
paragraph = soup.find_all('p')
span_elements = soup.find_all('span')
if paragraph:
for paragraph_element in paragraph:
for content in paragraph_element.contents:
if isinstance(content, str):
written_text = content.strip()
text_styles.append(written_text)
if span_elements:
for span in span_elements:
style_attr = span.get('style')
style_and_name = style_attr.split(';')
# Initialize variables to hold style modifications
italic_str = underline_str = ""
color_str = background_color_str = text_stroke = ""
font_weight_str = font_size_str = ""
font_family_str = ""
for text in style_and_name:
if ':' in text:
style = text.split(':')[0].strip()
style_name = text.split(':')[1].strip().replace("'", '')
if stroke_size > 0:
stroke_color = stroke_color.replace('#', '')
red = int(stroke_color[0:2], 16)
green = int(stroke_color[2:4], 16)
blue = int(stroke_color[4:6], 16)
text_stroke = f"\\bord{stroke_size}\\3c&H{blue:02X}{green:02X}{red:02X}&"
# Modify text based on style
if style_name == 'italic':
italic_str = "{\\i1}"
elif style_name == 'underline':
underline_str = "{\\u1}"
elif style == 'color':
hex_color = style_name.replace('#', '')
red = int(hex_color[0:2], 16)
green = int(hex_color[2:4], 16)
blue = int(hex_color[4:6], 16)
color_str = f"\\c&H{blue:02X}{green:02X}{red:02X}&"
elif style == 'background-color' and stroke_size <= 0:
hex_color = style_name.replace('#', '')
red = int(hex_color[0:2], 16)
green = int(hex_color[2:4], 16)
blue = int(hex_color[4:6], 16)
background_color_str = f"\\rBackground\\bord1\\3c&H{blue:02X}{green:02X}{red:02X}&"
elif style == 'font-weight':
font_weight_str = "{\\b1}"
elif style == 'font-size':
font_size = style_name.replace('pt', '')
font_size_str = f"\\fs{font_size}"
elif style == 'font-family':
font_family_str = f"\\fn{style_name}"
# Combine all style modifications
text_style = "{" + text_stroke + background_color_str + color_str + italic_str + underline_str + font_size_str + font_family_str + font_weight_str + "}"
text_with_styles = f"{text_style}{span.text.strip()}{{\\r}}"
# Append modified text to the list
text_styles.append(text_with_styles)
return text_styles
# Retrive the basic text style attributes
# ///////////////////////////////////////////////////////////////
def retrieve_text_style(self, style_attr):
style_and_name = style_attr.split(';')
style_attributes = []
for text in style_and_name:
if ':' in text:
style_name = text.split(':')[1].strip().replace("'", '')
if 'pt' in style_name:
style_name = style_name.replace('pt', '')
style_attributes.append(style_name)
break
style_attributes.append(style_name)
return style_attributes
class MainWindow(QMainWindow):
def __init__(self):
super().__init__()
self.setWindowTitle("Video Exporter")
self.export_button = QPushButton("Export Video")
self.export_button.clicked.connect(self.export_video)
layout = QVBoxLayout()
layout.addWidget(self.export_button)
central_widget = QWidget()
central_widget.setLayout(layout)
self.setCentralWidget(central_widget)
def export_video(self):
text_and_stroke = [("new_subtitle.html", "0", "0", 10, "#FFFFFF", "0:00:00.00", "0:00:05.00")]
self.export_thread = videoExport(text_and_stroke, "other.mp4")
self.export_thread.finished.connect(self.on_export_finished)
self.export_thread.start()
def on_export_finished(self):
QMessageBox.information(self, "Export Finished", "Video export completed!")
if __name__ == "__main__":
app = QApplication(sys.argv)
window = MainWindow()
window.show()
sys.exit(app.exec())
The negatives of doing this way is that if you have a text animation within your scene you will have to learn how to convert it to .ASS animation which should be possible, also if you have non text animations on your QGraphicsScene and want to export as an MP4 you will probably have to use multiprocessing to capture the scene, (there could be other ways). The text stroke is only applicable if you are able to retrieve it I'm unsure if there is a way the the basic QGraphicsTextItem but since I'm not using Qt's outline system but instead drawing my own I can grab it.
Some more information about the .ASS I found useful and the way I create my outline text: https://stackoverflow.com/a/78362730/22802649 https://hhsprings.bitbucket.io/docs/programming/examples/ffmpeg/subtitle/ass.html
Upvotes: 0
Reputation: 5543
I think the problem that you'll face with the current approach (UI screen capture to produce annotated video) is that you'll likely lose the original video resolution (say if its 1080p but displayed in 300x400 UI frame, then you'll end up with 300x400 video not 1080p).
A better approach IMO is to generate a text image (PNG) with transparent background in FFmpeg (or any other means) with the matching frame resolution (so 1080p in my example), and load each of such images onto the QGraphicScene as a QGraphicItem (which I presume can be dragged around to reposition).
This will get you the background video and a bunch of text images with their display times and position offsets. Then, FFmpeg can handle merging these files together.
To generate a text PNG, you can run
import subprocess as sp
video_size = [1920, 1080]
text = "hello world"
color = "black"
fontsize = 30
fontfile = "Freeserif.ttf"
textfile = "temp_01.png" # place it in a temp folder & increment
sp.run(
[
"ffmpeg",
"-f", "lavfi",
"-i", f"color=c={color}@0:size={video_size[0]}x{video_size[1]},format=rgba",
"-vf", f'drawtext=fontsize={fontsize}:fontfile={fontfile}:text=\'{text}\':x=(w-text_w)/2:y=(h-text_h)/2',
"-update", "1", "-vframes", "1",
"-y", # if needed to overwrite old
textfile,
]
)
This script creates a PNG with "hello world" in the middle of the screen. Let's say we want to put this text between timestamps 1 and 2s of the video with user-defined offset of [-200px,100px] (towards the bottom-left corner).
text_start = 1
text_end = 2
text_xoff = -200
text_yoff = 100
videofile = "example.mp4"
outfile = "annotated.mp4"
sp.run(
[
"ffmpeg",
"-i", videofile, # [0:v]
"-i", textfile, # [1:v]
"-filter_complex", f"[0:v][1:v]overlay=x={text_xoff}:y={text_yoff}:enable='between(t,{text_start},{text_end})'[out]",
"-map", "[out]",
'-y', # again, if need to overwrite previous output
outfile,
]
)
If you have multiple text images to overlay, you need to list all the text files as additional inputs (-i ...
) and run overlay filters in cascade:
[0:v][1:v]overlay=...[out1];
[out1][2:v]overlay=...[out2];
...
[outN][N:v]overlay=...[out]
Obviously, you want t generate the filtergraph expression programmatically.
Here are the links to the filters used color
, format
, drawtext
, overlay
. Familiarize yourself with these filters and the filtergraph construction in general (see the top of the linked doc page), especially be aware of the character escaping. (hint: place the text in single quotes and escape single quotes in your overlay text)
Notes
overlay
filter supports the text image to move in time thus animatable, but displaying the animation on Qt would be a painI'm not familiar with Qt end of the business. So, I'll leave that to you.
Feel free to ask questions in the comment section.
Upvotes: 0