Ray234
Ray234

Reputation: 173

Converting multiple HTML files to PDF using PyQt5

I tried following this answer: How to use PyQT5 to convert multiple HTML docs to PDF in one loop

I modified it to convert all html files found in a local folder. For example htmls is a list of html files to be converted: [Q:\Ray\test1.html, Q:\Ray\prac2.html]

This is the code. However, when I try to run it, Python just freezes and I have to stop the run.

import os
import glob
from PyQt5 import QtWidgets, QtWebEngineWidgets

class PdfPage(QtWebEngineWidgets.QWebEnginePage):
    def __init__(self):
        super().__init__()
        self._htmls = []
        self._current_path = ""

        self.setZoomFactor(1)
        self.loadFinished.connect(self._handleLoadFinished)
        self.pdfPrintingFinished.connect(self._handlePrintingFinished)

    def convert(self, htmls):
        self._htmls = iter(zip(htmls))
        self._fetchNext()

    def _fetchNext(self):
        try:
            self._current_path = next(self._htmls)
        except StopIteration:
            return False

    def _handleLoadFinished(self, ok):
        if ok:
            self.printToPdf(self._current_path)

    def _handlePrintingFinished(self, filePath, success):
        print("finished:", filePath, success)
        if not self._fetchNext():
            QtWidgets.QApplication.quit()


if __name__ == "__main__":

    current_dir = os.path.dirname(os.path.realpath(__file__))
    folder= current_dir+ '\\*.HTML'
    htmls= glob.glob(folder)

    app = QtWidgets.QApplication([])
    page = PdfPage()
    page.convert(htmls)
    app.exec_()

    print("finished")

Upvotes: 0

Views: 884

Answers (1)

eyllanesc
eyllanesc

Reputation: 243975

It seems that the OP has not understood the logic of my previous solution which is:

  1. Get the resource, in this case files,
  2. Load it on the page,
  3. When the load is finished then print the content of the page,
  4. When the printing is finished then execute step 1 with the next resource.

In this it does not perform step 2, on the other hand it is recommended that the path of the pdf has a name other than the html

import os
import glob
from PyQt5.QtCore import QUrl
from PyQt5 import QtWidgets, QtWebEngineWidgets


class PdfPage(QtWebEngineWidgets.QWebEnginePage):
    def __init__(self):
        super().__init__()
        self._htmls = []
        self._current_path = ""

        self.setZoomFactor(1)
        self.loadFinished.connect(self._handleLoadFinished)
        self.pdfPrintingFinished.connect(self._handlePrintingFinished)

    def convert(self, htmls):
        self._htmls = iter(htmls)
        self._fetchNext()

    def _fetchNext(self):
        try:
            self._current_path = next(self._htmls)
        except StopIteration:
            return False
        else:
            self.load(QUrl.fromLocalFile(self._current_path))
        return True

    def _handleLoadFinished(self, ok):
        if ok:
            self.printToPdf(self._current_path + ".pdf")

    def _handlePrintingFinished(self, filePath, success):
        print("finished:", filePath, success)
        if not self._fetchNext():
            QtWidgets.QApplication.quit()


if __name__ == "__main__":

    current_dir = os.path.dirname(os.path.realpath(__file__))
    folder= current_dir+ '\\*.HTML'
    htmls = glob.glob(folder)
    print(htmls)
    if htmls:
        app = QtWidgets.QApplication([])
        page = PdfPage()
        page.convert(htmls)
        app.exec_()
    print("finished")

Upvotes: 3

Related Questions