Alex
Alex

Reputation: 27

PyQt QGraphicsScene Render Scene As Video Format

I'm trying to allow the user to export the scene as an mp4 (video format), the items of the scene consists of QGraphicsVideoItem and multiple QGraphicsTextItem, I need to export the scene as it will allow the user to save the video with the text items. I've found one of the ways to do this but the issue is that it will take hours for a simple 5 seconds videos as it saves every image to a byte to create a video, every image is a millisecond. If I change from millisecond to seconds it could speed up but the video will not look as smooth, is there a more efficient way of doing this, without it taking so long?

from PySide6.QtCore import *
from PySide6.QtGui import *
from PySide6.QtWidgets import *
from PySide6.QtSvgWidgets import *
from PySide6.QtMultimediaWidgets import QGraphicsVideoItem
from PySide6.QtMultimedia import QMediaPlayer, QAudioOutput, QMediaMetaData 

import subprocess

import sys

class ExportVideo(QThread):
    def __init__(self, video_item, video_player, graphics_scene, graphics_view):
        super().__init__()
        self.video_item = video_item
        self.video_player = video_player
        self.graphics_scene = graphics_scene
        self.graphics_view = graphics_view
    
    def run(self):
        self.video_player.pause()
        duration = self.video_player.duration()
        meta = self.video_player.metaData()


        # Prepare a pipe for ffmpeg to write to
        ffmpeg_process = subprocess.Popen(['ffmpeg', '-y', '-f', 'image2pipe', '-r', '1000', '-i', '-', '-c:v', 'libx265', '-pix_fmt', 'yuv420p', 'output.mp4'], stdin=subprocess.PIPE)

        for duration in range(0, duration):
            self.video_player.setPosition(duration)

            # Add logic to render the frame here
            print("Exporting frame:", duration) 

            image = QImage(self.graphics_scene.sceneRect().size().toSize(), QImage.Format_ARGB32)
            painter = QPainter(image)
            self.graphics_scene.render(painter)
            painter.end()

            # Convert QImage to bytes
            byte_array = QByteArray()
            buffer = QBuffer(byte_array)
            buffer.open(QIODevice.WriteOnly)
            image.save(buffer, 'JPEG')

            # Write image bytes to ffmpeg process
            ffmpeg_process.stdin.write(byte_array.data())

        # Close the pipe to signal ffmpeg that all frames have been processed
        ffmpeg_process.stdin.close()
        ffmpeg_process.wait()


class PyVideoPlayer(QWidget):
    
    def __init__(self):
        super().__init__()

        self.text_data = []

        self.mediaPlayer = QMediaPlayer()
        self.audioOutput = QAudioOutput()

        self.graphics_view = QGraphicsView()
        self.graphic_scene = QGraphicsScene()

        self.graphics_view.setScene(self.graphic_scene)
        self.graphic_scene.setBackgroundBrush(Qt.black)
        self.graphics_view.setHorizontalScrollBarPolicy(Qt.ScrollBarAlwaysOff)
        self.graphics_view.setVerticalScrollBarPolicy(Qt.ScrollBarAlwaysOff)

        self.video_item = QGraphicsVideoItem()
        self.graphic_scene.addItem(self.video_item)
        self.save_video = QPushButton()
        
        layout = QVBoxLayout()
        layout.addWidget(self.graphics_view, stretch=1)
        layout.addWidget(self.save_video)
        self.setLayout(layout)

        # Slots Section
        self.mediaPlayer.setVideoOutput(self.video_item)
        self.mediaPlayer.positionChanged.connect(self.changeVideoPosition)
        self.save_video.clicked.connect(self.saveVideo)

    def setMedia(self, fileName):
        self.mediaPlayer.setSource(QUrl.fromLocalFile(fileName))
        self.mediaPlayer.setAudioOutput(self.audioOutput)
        self.play()
        self.video_item.setSize(self.mediaPlayer.videoSink().videoSize())

        self.text_item = QGraphicsTextItem()
        self.text_item.setPlainText("Test Dummy")
        self.text_item.setDefaultTextColor(Qt.white)
        font = QFont()
        font.setPointSize(90)  
        self.text_item.setFont(font)
        self.text_item.setPos(self.graphic_scene.sceneRect().x() + self.text_item.boundingRect().width(), self.graphic_scene.sceneRect().center().y() - self.text_item.boundingRect().height())
        self.graphic_scene.addItem(self.text_item)
        self.text_data.append("Test Dummy")

    def play(self):
        if self.mediaPlayer.playbackState() == QMediaPlayer.PlaybackState.PlayingState:
            self.mediaPlayer.pause()
        else:
            self.mediaPlayer.play()

    def changeVideoPosition(self, duration):
        if duration > 1000 and self.text_item.isVisible():
            print("Hide Text")
            self.text_item.hide()

    def resize_graphic_scene(self):
        self.graphics_view.fitInView(self.graphic_scene.sceneRect(), Qt.KeepAspectRatio)

    def showEvent(self, event):
        self.resize_graphic_scene()

    def resizeEvent(self, event):
        self.resize_graphic_scene()

    def saveVideo(self):
        self.videoExport = ExportVideo(self.video_item, self.mediaPlayer, self.graphic_scene, self.graphics_view)
        self.videoExport.start()

    


if __name__ == "__main__":
    app = QApplication(sys.argv)
    window = PyVideoPlayer()
    window.setMedia("example.mp4")
    window.setGeometry(100, 100, 400, 300)  # Set the window size
    window.setWindowTitle("QGraphicsView Example")
    window.show()
    sys.exit(app.exec())

Upvotes: 0

Views: 217

Answers (2)

Alex
Alex

Reputation: 27

The fix I found doesn't use PyQt for the main exporting of the video but rather its all FFMPEG. I knew FFMPEG didn't support Rich Text Format (RTF) so I started to look at other possible ways to overcome this issue, I found out you could use a file format called Advanced SubStation Alpha (.ASS) which lets you control anything to do with the text, such as font, size, color, start / end times and more.

When using .ASS you need to make sure you are formatting the .ASS file correctly otherwise FFMPEG might not write the subtitles how you want them. You also need the text you want to display in the video as a html which can be done by using pyqt text.toHtml().

When you've got your text html version location path, you will also need the stroke size / color and start / end of your text, you can also edit it to include the text position. You will have to call the function with your original video.

from bs4 import BeautifulSoup
import subprocess, sys
from PySide6.QtCore import *
from PySide6.QtWidgets import *

class videoExport(QThread):
    def __init__(self, text_and_stroke, video_location):
        super().__init__()
        self.text_and_stroke = text_and_stroke
        self.video_location = video_location

    def run(self):
        dialogues = []

        for html_file_path, pos_x, pos_y, stroke_size, stroke_color, start, end in self.text_and_stroke:
            with open(html_file_path, 'r') as file:
                html_content = file.read()
                
            soup = BeautifulSoup(html_content, 'html.parser')
            body_tag = soup.find('body')


            if body_tag:
                self.styles_attributes = self.retrieve_text_style(body_tag.get('style'))
                text_style = self.text_styles(soup, stroke_size, stroke_color)

                self.styles_attributes = ",".join(self.styles_attributes)
                text_style = "".join(text_style)

                new_dialogue = f"""Dialogue: {start},{end},Default,{{\pos({pos_x},{pos_y})}}{text_style}"""
                dialogues.append(new_dialogue)

            file.close()


        create_ass_file = f"""[Script Info]
Title: Video Subtitles
ScriptType: v4.00+
Collisions: Normal
PlayDepth: 0

[V4+ Styles]
Format: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, OutlineColour, BorderStyle, Encoding
Style: Default, {self.styles_attributes},&HFFB0B0,&HFFFF00,&H998877,0,0
Style: Background, {self.styles_attributes},&H00FFFFFF,&H000000FF,&H00000000,3,0

[Events]
Format: Start, End, Style, Text
"""
        
        for dialogue in dialogues:
            create_ass_file += f"{dialogue}\n"

        with open("new_ass.ass", 'w') as file:
            file.write(create_ass_file)

        file.close()

        self.add_subtitle_to_video(self.video_location, "new_ass.ass")

    
    def add_subtitle_to_video(self, video_file, ass_file):
        video_text = [
            "ffmpeg",
            "-y",
            "-i", video_file,
            "-vf", f"subtitles={ass_file}",
            "-c:a", "copy",
            "output.mp4"
        ]
        process = subprocess.Popen(video_text, stdout=subprocess.PIPE, stderr=subprocess.STDOUT,universal_newlines=True)


    # ADDS ANY TEXT STYLE TO THE TEXT
    # ///////////////////////////////////////////////////////////////

    def text_styles(self, soup, stroke_size, stroke_color):
        text_styles = []
        paragraph = soup.find_all('p')
        span_elements = soup.find_all('span')

        if paragraph:
            for paragraph_element in paragraph:
                for content in paragraph_element.contents:
                    if isinstance(content, str):
                        written_text = content.strip()
                        text_styles.append(written_text)


        if span_elements:

            for span in span_elements:

                style_attr = span.get('style')
                style_and_name = style_attr.split(';')

                # Initialize variables to hold style modifications
                italic_str = underline_str = ""
                color_str = background_color_str = text_stroke = ""
                font_weight_str = font_size_str = ""
                font_family_str = ""

                for text in style_and_name:
                    if ':' in text:
                        style = text.split(':')[0].strip()
                        style_name = text.split(':')[1].strip().replace("'", '')

                        if stroke_size > 0:
                            stroke_color = stroke_color.replace('#', '')
                            red = int(stroke_color[0:2], 16)
                            green = int(stroke_color[2:4], 16)
                            blue = int(stroke_color[4:6], 16)
                            text_stroke = f"\\bord{stroke_size}\\3c&H{blue:02X}{green:02X}{red:02X}&"

                        # Modify text based on style
                        if style_name == 'italic':
                            italic_str = "{\\i1}"

                        elif style_name == 'underline':
                            underline_str = "{\\u1}"

                        elif style == 'color':
                            hex_color = style_name.replace('#', '')
                            red = int(hex_color[0:2], 16)
                            green = int(hex_color[2:4], 16)
                            blue = int(hex_color[4:6], 16)
                            color_str = f"\\c&H{blue:02X}{green:02X}{red:02X}&"

                        elif style == 'background-color' and stroke_size <= 0:
                            hex_color = style_name.replace('#', '')
                            red = int(hex_color[0:2], 16)
                            green = int(hex_color[2:4], 16)
                            blue = int(hex_color[4:6], 16)
                            background_color_str = f"\\rBackground\\bord1\\3c&H{blue:02X}{green:02X}{red:02X}&"

                        elif style == 'font-weight':
                            font_weight_str = "{\\b1}"

                        elif style == 'font-size':
                            font_size = style_name.replace('pt', '')
                            font_size_str = f"\\fs{font_size}"
                        
                        elif style == 'font-family':
                            font_family_str = f"\\fn{style_name}"

                        
                        # Combine all style modifications
                        text_style = "{" + text_stroke + background_color_str + color_str + italic_str + underline_str + font_size_str + font_family_str + font_weight_str + "}"  
                        text_with_styles = f"{text_style}{span.text.strip()}{{\\r}}"
            
                # Append modified text to the list
                text_styles.append(text_with_styles)

        return text_styles
    

    # Retrive the basic text style attributes
    # ///////////////////////////////////////////////////////////////

    def retrieve_text_style(self, style_attr):
        style_and_name = style_attr.split(';')
        style_attributes = []
        
        for text in style_and_name:
            if ':' in text:
                style_name = text.split(':')[1].strip().replace("'", '')
                if 'pt' in style_name:
                    style_name = style_name.replace('pt', '')
                    style_attributes.append(style_name)
                    break
                
                style_attributes.append(style_name)
        
        return style_attributes
    

class MainWindow(QMainWindow):
    def __init__(self):
        super().__init__()

        self.setWindowTitle("Video Exporter")

        self.export_button = QPushButton("Export Video")
        self.export_button.clicked.connect(self.export_video)

        layout = QVBoxLayout()
        layout.addWidget(self.export_button)

        central_widget = QWidget()
        central_widget.setLayout(layout)
        self.setCentralWidget(central_widget)

    def export_video(self):
        text_and_stroke = [("new_subtitle.html", "0", "0", 10, "#FFFFFF", "0:00:00.00", "0:00:05.00")]
        self.export_thread = videoExport(text_and_stroke, "other.mp4")
        self.export_thread.finished.connect(self.on_export_finished)
        self.export_thread.start()

    def on_export_finished(self):
        QMessageBox.information(self, "Export Finished", "Video export completed!")


if __name__ == "__main__":
    app = QApplication(sys.argv)
    window = MainWindow()
    window.show()
    sys.exit(app.exec())

The negatives of doing this way is that if you have a text animation within your scene you will have to learn how to convert it to .ASS animation which should be possible, also if you have non text animations on your QGraphicsScene and want to export as an MP4 you will probably have to use multiprocessing to capture the scene, (there could be other ways). The text stroke is only applicable if you are able to retrieve it I'm unsure if there is a way the the basic QGraphicsTextItem but since I'm not using Qt's outline system but instead drawing my own I can grab it.

Some more information about the .ASS I found useful and the way I create my outline text: https://stackoverflow.com/a/78362730/22802649 https://hhsprings.bitbucket.io/docs/programming/examples/ffmpeg/subtitle/ass.html

Upvotes: 0

kesh
kesh

Reputation: 5543

I think the problem that you'll face with the current approach (UI screen capture to produce annotated video) is that you'll likely lose the original video resolution (say if its 1080p but displayed in 300x400 UI frame, then you'll end up with 300x400 video not 1080p).

A better approach IMO is to generate a text image (PNG) with transparent background in FFmpeg (or any other means) with the matching frame resolution (so 1080p in my example), and load each of such images onto the QGraphicScene as a QGraphicItem (which I presume can be dragged around to reposition).

This will get you the background video and a bunch of text images with their display times and position offsets. Then, FFmpeg can handle merging these files together.

To generate a text PNG, you can run

import subprocess as sp

video_size = [1920, 1080]
text = "hello world"
color = "black"
fontsize = 30
fontfile = "Freeserif.ttf"

textfile = "temp_01.png" # place it in a temp folder & increment

sp.run(
    [
        "ffmpeg",
        "-f", "lavfi",
        "-i", f"color=c={color}@0:size={video_size[0]}x{video_size[1]},format=rgba",
        "-vf", f'drawtext=fontsize={fontsize}:fontfile={fontfile}:text=\'{text}\':x=(w-text_w)/2:y=(h-text_h)/2',
        "-update", "1", "-vframes", "1",
        "-y", # if needed to overwrite old
        textfile,
    ]
)

This script creates a PNG with "hello world" in the middle of the screen. Let's say we want to put this text between timestamps 1 and 2s of the video with user-defined offset of [-200px,100px] (towards the bottom-left corner).


text_start = 1
text_end = 2
text_xoff = -200
text_yoff = 100

videofile = "example.mp4"
outfile = "annotated.mp4"

sp.run(
    [
        "ffmpeg",
        "-i", videofile,  # [0:v]
        "-i", textfile,  # [1:v]
        "-filter_complex", f"[0:v][1:v]overlay=x={text_xoff}:y={text_yoff}:enable='between(t,{text_start},{text_end})'[out]",
        "-map", "[out]",
        '-y', # again, if need to overwrite previous output
        outfile,
    ]
)

If you have multiple text images to overlay, you need to list all the text files as additional inputs (-i ...) and run overlay filters in cascade:

[0:v][1:v]overlay=...[out1];
[out1][2:v]overlay=...[out2];
...
[outN][N:v]overlay=...[out]

Obviously, you want t generate the filtergraph expression programmatically.

Here are the links to the filters used color, format, drawtext, overlay. Familiarize yourself with these filters and the filtergraph construction in general (see the top of the linked doc page), especially be aware of the character escaping. (hint: place the text in single quotes and escape single quotes in your overlay text)

Notes

  • if text is too long, it gets truncated off video frame
  • once you get this mechanism down, you can look into if the piped PNG data can be used by Qt. FFmpeg supports base64 encoding using the data protocol.
  • overlay filter supports the text image to move in time thus animatable, but displaying the animation on Qt would be a pain
  • I think there is a limitation on how long a subprocess command can be. So you may hit the limit with this approach, especially if you encode the png data.

I'm not familiar with Qt end of the business. So, I'll leave that to you.

Feel free to ask questions in the comment section.

Upvotes: 0

Related Questions