blunty6363
blunty6363

Reputation: 29

Getting the link to PDF files from QWebEngineView

I am working with QWebEngineView and have found that when trying to click on a pdf file link it won't open the file. I have found that QWebEngineView has not way to display pdf files on it's own. With some research I can now download pdf files and display them on their own, however I need to be able to get the link of the pdf file from QWebEngineView to know which one to download. The problem is that the .url() function only returns the url of the current webpage and doesn't seem to be affected by me clicking the link of the pdf file and I can't find any other way to get the link of the pdf file. Any ideas on how to get the link to the pdf file? Any help is appreciated.

Upvotes: 0

Views: 749

Answers (1)

eyllanesc
eyllanesc

Reputation: 243897

You can use javascript to get all the links and then filter by the extension:

import sys

from PyQt5.QtCore import QCoreApplication, QUrl
from PyQt5.QtWidgets import QApplication
from PyQt5.QtWebEngineWidgets import QWebEngineView


def main():
    app = QApplication(sys.argv)

    url = QUrl("https://www.princexml.com/samples/")
    view = QWebEngineView()

    def callback(links):
        for link in links:
            if link.endswith(".pdf"):
                print(link)
            QCoreApplication.quit()

    def handle_load_finished(ok):
        if ok:
            view.page().runJavaScript(
                """
  (function() {
    // https://stackoverflow.com/a/3824292/6622587
    var urls = [];
    for(var i = document.links.length; i --> 0;)
        if(document.links[i].hostname === location.hostname)
            urls.push(document.links[i].href);
    return urls;
  })();""",
                callback,
            )

    view.loadFinished.connect(handle_load_finished)
    view.load(url)
    view.resize(640, 480)
    view.show()

    sys.exit(app.exec_())


if __name__ == "__main__":
    main()

Output:

http://www.princexml.com/howcome/2016/samples/magic6/magic.pdf
http://www.princexml.com/howcome/2016/samples/magic6/magic.pdf
https://www.princexml.com/samples/flyer/flyer.pdf
https://www.princexml.com/samples/flyer/flyer.pdf
https://www.princexml.com/samples/catalog/PrinceCatalogue.pdf
https://www.princexml.com/samples/catalog/PrinceCatalogue.pdf
http://www.princexml.com/howcome/2016/samples//malthus/essay.pdf
http://www.princexml.com/howcome/2016/samples//malthus/essay.pdf
http://www.princexml.com/howcome/2016/samples/magic8/index.pdf
http://www.princexml.com/howcome/2016/samples/magic8/index.pdf
http://www.princexml.com/howcome/2016/samples/invoice/index.pdf
https://www.princexml.com/samples/invoice/invoicesample.pdf
http://www.princexml.com/howcome/2016/samples/invoice/index.pdf
https://www.princexml.com/samples/invoice/invoicesample.pdf

Update:

If you want to download the PDF then it is not necessary to implement the above since QWebEngineView does allow downloads.

import sys

from PyQt5.QtCore import QCoreApplication, QFileInfo, QUrl
from PyQt5.QtWidgets import QApplication, QFileDialog
from PyQt5.QtWebEngineWidgets import QWebEngineView


def handle_download_erequested(download):
    download.downloadProgress.connect(print)
    download.stateChanged.connect(print)
    download.finished.connect(lambda: print("download finished"))
    old_path = download.url().path()  # download.path()
    suffix = QFileInfo(old_path).suffix()
    path, _ = QFileDialog.getSaveFileName(None, "Save File", old_path, "*." + suffix)
    if path:
        download.setPath(path)
        download.accept()


def main():
    app = QApplication(sys.argv)

    url = QUrl("https://www.princexml.com/samples/")
    view = QWebEngineView()

    view.page().profile().downloadRequested.connect(handle_download_erequested)
    view.load(url)
    view.resize(640, 480)
    view.show()

    sys.exit(app.exec_())


if __name__ == "__main__":
    main()

Also QWebEngineView has a PDF viewer

import sys

from PyQt5 import QtCore, QtWidgets, QtWebEngineWidgets


def main():

    print(
        f"PyQt5 version: {QtCore.PYQT_VERSION_STR}, Qt version: {QtCore.QT_VERSION_STR}"
    )

    app = QtWidgets.QApplication(sys.argv)
    view = QtWebEngineWidgets.QWebEngineView()
    settings = view.settings()
    settings.setAttribute(QtWebEngineWidgets.QWebEngineSettings.PluginsEnabled, True)
    url = QtCore.QUrl("https://www.princexml.com/samples/invoice/invoicesample.pdf")
    view.load(url)
    view.resize(640, 480)
    view.show()
    sys.exit(app.exec_())


if __name__ == "__main__":
    main()

enter image description here

Upvotes: 1

Related Questions