Atheuz
Atheuz

Reputation: 331

Screenshot of multiple webpages in a headerless QtWebKit browser in Python

I am attempting to render multiple webpages and taking screenshots of them, but I can only get it to work when rendering one webpage, because when I try it on multiple the program will either stop dead in it's tracks and hang forever OR just not do anything with images, css and will extract the text of the site and put it in one long block of text. Usually what's happening is it will hang.

Code I'm using to render the webpage in memory is this:

class Render(QWebPage):
    def __init__(self, url):
        self.app = QApplication(sys.argv)
        QWebPage.__init__(self)

        # Settings
        s = self.settings()
        #s.setAttribute(QWebSettings.AutoLoadImages, False)
        s.setAttribute(QWebSettings.JavascriptCanOpenWindows, False)
        s.setAttribute(QWebSettings.PluginsEnabled, True)

        self.loadFinished.connect(self._loadFinished)
        self.mainFrame().load(QUrl(url))
        #self.mainFrame().setScrollBarPolicy(Qt.Vertical, Qt.ScrollBarAlwaysOff)
        self.mainFrame().setScrollBarPolicy(Qt.Horizontal, Qt.ScrollBarAlwaysOff)
        self.app.exec_()

    def _loadFinished(self, result):
        self.frame = self.mainFrame()
        size = self.frame.contentsSize()
        size.setWidth(1366)
        self.setViewportSize(size)
        self.app.quit()

And here's how I'm saving the image:

def run(url):
    os.chdir("output")

    r = Render(url)
    image = QImage(r.viewportSize(), QImage.Format_ARGB32)
    painter = QPainter(image)
    r.frame.render(painter)
    painter.end()
    fp = "%s.png" % os_safe_name(url)
    image.save(fp)
    os.chdir("..")

Anyone know why this is happening?

Upvotes: 4

Views: 2387

Answers (2)

dbr
dbr

Reputation: 169593

Pretty much as described in Luke's answer, I shuffled things around to avoid creating a QApplication instance for each Render

Not the tidiest, but works for me:

import re
import sys
import time

# Tested with PySide 1.0.9, changing imports to PyQt should work identically
from PySide.QtCore import Qt, QUrl
from PySide.QtGui import QApplication, QImage, QPainter
from PySide.QtWebKit import QWebPage, QWebSettings


def os_safe_name(url):
    url = re.sub("[^a-zA-Z0-9_-]+", "_", url)
    url = re.sub("_{2,}", "_", url)
    return url


class Render(QWebPage):
    def __init__(self, url):
        QWebPage.__init__(self)

        self.url = url
        self.finished = False

        # Settings
        s = self.settings()
        #s.setAttribute(QWebSettings.AutoLoadImages, False)
        s.setAttribute(QWebSettings.JavascriptCanOpenWindows, False)
        s.setAttribute(QWebSettings.PluginsEnabled, True)

        #self.mainFrame().setScrollBarPolicy(Qt.Vertical, Qt.ScrollBarAlwaysOff)
        self.mainFrame().setScrollBarPolicy(Qt.Horizontal, Qt.ScrollBarAlwaysOff)

        # When page is loaded, callback saves image to file
        self.loadFinished.connect(self._loadFinished)

        self.mainFrame().load(QUrl(url))

    def _loadFinished(self, result):
        frame = self.mainFrame()
        size = frame.contentsSize()
        size.setWidth(1366)
        self.setViewportSize(size)

        image = QImage(self.viewportSize(), QImage.Format_ARGB32)

        painter = QPainter(image)
        frame.render(painter)
        painter.end()

        self.filepath = "output/%s.png" % os_safe_name(self.url)
        image.save(self.filepath)

        self.finished = True


def run(url, app = None):
    if app is None:
        app = QApplication(sys.argv)

    r = Render(url)

    while not r.finished:
        app.processEvents()
        time.sleep(0.01)

    return r.filepath


if __name__ == '__main__':
    app = QApplication(sys.argv)

    print run("http://stackoverflow.com", app=app)
    print run("http://google.com", app=app)

Upvotes: 4

Luke
Luke

Reputation: 11644

I presume you are creating multiple instances of your Render class. If this is the case, then you are most likely having problems because you create multiple QApplication instances. Instead, create a single QApplication and share it between all of your Render instances.

You'll also probably need to stop using app.quit() since you want the QApplication to continue functioning. Furthermore, since app.exec_() won't exit until you call quit(), you'll need to make your own event loop instead. Something like this:

while not self.finished:
    self.app.processEvents()
    time.sleep(0.01)

Upvotes: 1

Related Questions