PyQt QGraphicsScene Render Scene As Video Format

Question

I'm trying to allow the user to export the scene as an mp4 (video format), the items of the scene consists of QGraphicsVideoItem and multiple QGraphicsTextItem, I need to export the scene as it will allow the user to save the video with the text items. I've found one of the ways to do this but the issue is that it will take hours for a simple 5 seconds videos as it saves every image to a byte to create a video, every image is a millisecond. If I change from millisecond to seconds it could speed up but the video will not look as smooth, is there a more efficient way of doing this, without it taking so long?

from PySide6.QtCore import *
from PySide6.QtGui import *
from PySide6.QtWidgets import *
from PySide6.QtSvgWidgets import *
from PySide6.QtMultimediaWidgets import QGraphicsVideoItem
from PySide6.QtMultimedia import QMediaPlayer, QAudioOutput, QMediaMetaData 

import subprocess

import sys

class ExportVideo(QThread):
    def __init__(self, video_item, video_player, graphics_scene, graphics_view):
        super().__init__()
        self.video_item = video_item
        self.video_player = video_player
        self.graphics_scene = graphics_scene
        self.graphics_view = graphics_view
    
    def run(self):
        self.video_player.pause()
        duration = self.video_player.duration()
        meta = self.video_player.metaData()


        # Prepare a pipe for ffmpeg to write to
        ffmpeg_process = subprocess.Popen(['ffmpeg', '-y', '-f', 'image2pipe', '-r', '1000', '-i', '-', '-c:v', 'libx265', '-pix_fmt', 'yuv420p', 'output.mp4'], stdin=subprocess.PIPE)

        for duration in range(0, duration):
            self.video_player.setPosition(duration)

            # Add logic to render the frame here
            print("Exporting frame:", duration) 

            image = QImage(self.graphics_scene.sceneRect().size().toSize(), QImage.Format_ARGB32)
            painter = QPainter(image)
            self.graphics_scene.render(painter)
            painter.end()

            # Convert QImage to bytes
            byte_array = QByteArray()
            buffer = QBuffer(byte_array)
            buffer.open(QIODevice.WriteOnly)
            image.save(buffer, 'JPEG')

            # Write image bytes to ffmpeg process
            ffmpeg_process.stdin.write(byte_array.data())

        # Close the pipe to signal ffmpeg that all frames have been processed
        ffmpeg_process.stdin.close()
        ffmpeg_process.wait()


class PyVideoPlayer(QWidget):
    
    def __init__(self):
        super().__init__()

        self.text_data = []

        self.mediaPlayer = QMediaPlayer()
        self.audioOutput = QAudioOutput()

        self.graphics_view = QGraphicsView()
        self.graphic_scene = QGraphicsScene()

        self.graphics_view.setScene(self.graphic_scene)
        self.graphic_scene.setBackgroundBrush(Qt.black)
        self.graphics_view.setHorizontalScrollBarPolicy(Qt.ScrollBarAlwaysOff)
        self.graphics_view.setVerticalScrollBarPolicy(Qt.ScrollBarAlwaysOff)

        self.video_item = QGraphicsVideoItem()
        self.graphic_scene.addItem(self.video_item)
        self.save_video = QPushButton()
        
        layout = QVBoxLayout()
        layout.addWidget(self.graphics_view, stretch=1)
        layout.addWidget(self.save_video)
        self.setLayout(layout)

        # Slots Section
        self.mediaPlayer.setVideoOutput(self.video_item)
        self.mediaPlayer.positionChanged.connect(self.changeVideoPosition)
        self.save_video.clicked.connect(self.saveVideo)

    def setMedia(self, fileName):
        self.mediaPlayer.setSource(QUrl.fromLocalFile(fileName))
        self.mediaPlayer.setAudioOutput(self.audioOutput)
        self.play()
        self.video_item.setSize(self.mediaPlayer.videoSink().videoSize())

        self.text_item = QGraphicsTextItem()
        self.text_item.setPlainText("Test Dummy")
        self.text_item.setDefaultTextColor(Qt.white)
        font = QFont()
        font.setPointSize(90)  
        self.text_item.setFont(font)
        self.text_item.setPos(self.graphic_scene.sceneRect().x() + self.text_item.boundingRect().width(), self.graphic_scene.sceneRect().center().y() - self.text_item.boundingRect().height())
        self.graphic_scene.addItem(self.text_item)
        self.text_data.append("Test Dummy")

    def play(self):
        if self.mediaPlayer.playbackState() == QMediaPlayer.PlaybackState.PlayingState:
            self.mediaPlayer.pause()
        else:
            self.mediaPlayer.play()

    def changeVideoPosition(self, duration):
        if duration > 1000 and self.text_item.isVisible():
            print("Hide Text")
            self.text_item.hide()

    def resize_graphic_scene(self):
        self.graphics_view.fitInView(self.graphic_scene.sceneRect(), Qt.KeepAspectRatio)

    def showEvent(self, event):
        self.resize_graphic_scene()

    def resizeEvent(self, event):
        self.resize_graphic_scene()

    def saveVideo(self):
        self.videoExport = ExportVideo(self.video_item, self.mediaPlayer, self.graphic_scene, self.graphics_view)
        self.videoExport.start()

    


if __name__ == "__main__":
    app = QApplication(sys.argv)
    window = PyVideoPlayer()
    window.setMedia("example.mp4")
    window.setGeometry(100, 100, 400, 300)  # Set the window size
    window.setWindowTitle("QGraphicsView Example")
    window.show()
    sys.exit(app.exec())

EDIT: Changed from PNG to JPEG, it speeds up the image.save, I also get this issue: Assertion fctx->async_lock failed at C:/ffmpeg-n6.0/libavcodec/pthread_frame.c:155. I think it must have to do with setting the video player position.

Alex · Accepted Answer

The fix I found doesn't use PyQt for the main exporting of the video but rather its all FFMPEG. I knew FFMPEG didn't support Rich Text Format (RTF) so I started to look at other possible ways to overcome this issue, I found out you could use a file format called Advanced SubStation Alpha (.ASS) which lets you control anything to do with the text, such as font, size, color, start / end times and more.

When using .ASS you need to make sure you are formatting the .ASS file correctly otherwise FFMPEG might not write the subtitles how you want them. You also need the text you want to display in the video as a html which can be done by using pyqt text.toHtml().

When you've got your text html version location path, you will also need the stroke size / color and start / end of your text, you can also edit it to include the text position. You will have to call the function with your original video.

from bs4 import BeautifulSoup
import subprocess, sys
from PySide6.QtCore import *
from PySide6.QtWidgets import *

class videoExport(QThread):
    def __init__(self, text_and_stroke, video_location):
        super().__init__()
        self.text_and_stroke = text_and_stroke
        self.video_location = video_location

    def run(self):
        dialogues = []

        for html_file_path, pos_x, pos_y, stroke_size, stroke_color, start, end in self.text_and_stroke:
            with open(html_file_path, 'r') as file:
                html_content = file.read()
                
            soup = BeautifulSoup(html_content, 'html.parser')
            body_tag = soup.find('body')


            if body_tag:
                self.styles_attributes = self.retrieve_text_style(body_tag.get('style'))
                text_style = self.text_styles(soup, stroke_size, stroke_color)

                self.styles_attributes = ",".join(self.styles_attributes)
                text_style = "".join(text_style)

                new_dialogue = f"""Dialogue: {start},{end},Default,{{\pos({pos_x},{pos_y})}}{text_style}"""
                dialogues.append(new_dialogue)

            file.close()


        create_ass_file = f"""[Script Info]
Title: Video Subtitles
ScriptType: v4.00+
Collisions: Normal
PlayDepth: 0

[V4+ Styles]
Format: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, OutlineColour, BorderStyle, Encoding
Style: Default, {self.styles_attributes},&HFFB0B0,&HFFFF00,&H998877,0,0
Style: Background, {self.styles_attributes},&H00FFFFFF,&H000000FF,&H00000000,3,0

[Events]
Format: Start, End, Style, Text
"""
        
        for dialogue in dialogues:
            create_ass_file += f"{dialogue}
"

        with open("new_ass.ass", 'w') as file:
            file.write(create_ass_file)

        file.close()

        self.add_subtitle_to_video(self.video_location, "new_ass.ass")

    
    def add_subtitle_to_video(self, video_file, ass_file):
        video_text = [
            "ffmpeg",
            "-y",
            "-i", video_file,
            "-vf", f"subtitles={ass_file}",
            "-c:a", "copy",
            "output.mp4"
        ]
        process = subprocess.Popen(video_text, stdout=subprocess.PIPE, stderr=subprocess.STDOUT,universal_newlines=True)


    # ADDS ANY TEXT STYLE TO THE TEXT
    # ///////////////////////////////////////////////////////////////

    def text_styles(self, soup, stroke_size, stroke_color):
        text_styles = []
        paragraph = soup.find_all('p')
        span_elements = soup.find_all('span')

        if paragraph:
            for paragraph_element in paragraph:
                for content in paragraph_element.contents:
                    if isinstance(content, str):
                        written_text = content.strip()
                        text_styles.append(written_text)


        if span_elements:

            for span in span_elements:

                style_attr = span.get('style')
                style_and_name = style_attr.split(';')

                # Initialize variables to hold style modifications
                italic_str = underline_str = ""
                color_str = background_color_str = text_stroke = ""
                font_weight_str = font_size_str = ""
                font_family_str = ""

                for text in style_and_name:
                    if ':' in text:
                        style = text.split(':')[0].strip()
                        style_name = text.split(':')[1].strip().replace("'", '')

                        if stroke_size > 0:
                            stroke_color = stroke_color.replace('#', '')
                            red = int(stroke_color[0:2], 16)
                            green = int(stroke_color[2:4], 16)
                            blue = int(stroke_color[4:6], 16)
                            text_stroke = f"\bord{stroke_size}\3c&H{blue:02X}{green:02X}{red:02X}&"

                        # Modify text based on style
                        if style_name == 'italic':
                            italic_str = "{\i1}"

                        elif style_name == 'underline':
                            underline_str = "{\u1}"

                        elif style == 'color':
                            hex_color = style_name.replace('#', '')
                            red = int(hex_color[0:2], 16)
                            green = int(hex_color[2:4], 16)
                            blue = int(hex_color[4:6], 16)
                            color_str = f"\c&H{blue:02X}{green:02X}{red:02X}&"

                        elif style == 'background-color' and stroke_size <= 0:
                            hex_color = style_name.replace('#', '')
                            red = int(hex_color[0:2], 16)
                            green = int(hex_color[2:4], 16)
                            blue = int(hex_color[4:6], 16)
                            background_color_str = f"\rBackground\bord1\3c&H{blue:02X}{green:02X}{red:02X}&"

                        elif style == 'font-weight':
                            font_weight_str = "{\b1}"

                        elif style == 'font-size':
                            font_size = style_name.replace('pt', '')
                            font_size_str = f"\fs{font_size}"
                        
                        elif style == 'font-family':
                            font_family_str = f"\fn{style_name}"

                        
                        # Combine all style modifications
                        text_style = "{" + text_stroke + background_color_str + color_str + italic_str + underline_str + font_size_str + font_family_str + font_weight_str + "}"  
                        text_with_styles = f"{text_style}{span.text.strip()}{{\r}}"
            
                # Append modified text to the list
                text_styles.append(text_with_styles)

        return text_styles
    

    # Retrive the basic text style attributes
    # ///////////////////////////////////////////////////////////////

    def retrieve_text_style(self, style_attr):
        style_and_name = style_attr.split(';')
        style_attributes = []
        
        for text in style_and_name:
            if ':' in text:
                style_name = text.split(':')[1].strip().replace("'", '')
                if 'pt' in style_name:
                    style_name = style_name.replace('pt', '')
                    style_attributes.append(style_name)
                    break
                
                style_attributes.append(style_name)
        
        return style_attributes
    

class MainWindow(QMainWindow):
    def __init__(self):
        super().__init__()

        self.setWindowTitle("Video Exporter")

        self.export_button = QPushButton("Export Video")
        self.export_button.clicked.connect(self.export_video)

        layout = QVBoxLayout()
        layout.addWidget(self.export_button)

        central_widget = QWidget()
        central_widget.setLayout(layout)
        self.setCentralWidget(central_widget)

    def export_video(self):
        text_and_stroke = [("new_subtitle.html", "0", "0", 10, "#FFFFFF", "0:00:00.00", "0:00:05.00")]
        self.export_thread = videoExport(text_and_stroke, "other.mp4")
        self.export_thread.finished.connect(self.on_export_finished)
        self.export_thread.start()

    def on_export_finished(self):
        QMessageBox.information(self, "Export Finished", "Video export completed!")


if __name__ == "__main__":
    app = QApplication(sys.argv)
    window = MainWindow()
    window.show()
    sys.exit(app.exec())

The negatives of doing this way is that if you have a text animation within your scene you will have to learn how to convert it to .ASS animation which should be possible, also if you have non text animations on your QGraphicsScene and want to export as an MP4 you will probably have to use multiprocessing to capture the scene, (there could be other ways). The text stroke is only applicable if you are able to retrieve it I'm unsure if there is a way the the basic QGraphicsTextItem but since I'm not using Qt's outline system but instead drawing my own I can grab it.

Some more information about the .ASS I found useful and the way I create my outline text: https://stackoverflow.com/a/78362730/22802649 https://hhsprings.bitbucket.io/docs/programming/examples/ffmpeg/subtitle/ass.html

PyQt QGraphicsScene Render Scene As Video Format

Answers (2)

Related Questions