andyabel
andyabel

Reputation: 344

PyQt4 & BeautifulSoup setting the Browser Window Size

I'm attempting to use PyQt4 to do some web scraping, but the site I'm attempting to scrape keeps thinking I'm a mobile device, and is not presenting the dataset available to a desktop or laptop (even though I'm using a Mozilla/5.0 user agent).

To try to find out why I'm setting my URL to "whatsmyuseragent.com". And I notice that it's telling me that although my Screen Resolution is 1920px x 1080px my Browser Window Size is 0px x 0px, so could this be the problem?

Here is my code below. Any suggestions on what I need to change to convince the site I'm scraping to believe I'm a desktop or laptop (rather than a mobile) would be appreciated. Thanks.

import sys
from PyQt4 import QtGui
from PyQt4.QtGui import QApplication
from PyQt4.QtCore import QUrl
from PyQt4.QtWebKit import QWebPage
from PyQt4.QtNetwork import QNetworkRequest

import bs4 as bs
import urllib.request

class Client(QWebPage):

    def __init__ (self, url):
        self.app = QApplication(sys.argv)
        QWebPage.__init__(self)
        self.loadFinished.connect(self.on_page_load)
        self.request = QNetworkRequest()
        self.request.setUrl(QUrl(url))
        self.request.setRawHeader("User-Agent",'Mozilla/5.0')
        self.mainFrame().load(self.request)
        self.app.exec_()

    def on_page_load (self):
        self.app.quit()

url = 'http://www.whatsmyuseragent.com'
client_response = Client(url)
source = client_response.mainFrame().toHtml()
soup = bs.BeautifulSoup(source, 'lxml')

print(soup.prettify())

Upvotes: 0

Views: 661

Answers (1)

robyschek
robyschek

Reputation: 2035

Try to set the Viewport Size:

import sys
import re
from PyQt4 import QtGui
from PyQt4.QtGui import QApplication
from PyQt4.QtCore import QUrl, QSize
from PyQt4.QtWebKit import QWebPage
from PyQt4.QtNetwork import QNetworkRequest

import bs4 as bs

class Client(QWebPage):

    def __init__ (self, url):
        self.app = QApplication(sys.argv)
        QWebPage.__init__(self)

        # good ol'size
        size = QSize(640, 480)
        self.setViewportSize(size)

        self.loadFinished.connect(self.on_page_load)
        self.request = QNetworkRequest()
        self.request.setUrl(QUrl(url))
        self.request.setRawHeader("User-Agent",'Mozilla/5.0')
        self.mainFrame().load(self.request)
        self.app.exec_()

    def on_page_load (self):
        self.app.quit()

url = 'http://www.whatsmyuseragent.com'
client_response = Client(url)
source = client_response.mainFrame().toHtml()
soup = bs.BeautifulSoup(source, 'lxml')

# some meat from the soup
print(re.sub('\s+', ' ', soup.find(class_='browser-window').text))

This produce the following for me:

625 px x 465 px

Upvotes: 1

Related Questions