Nitheesh Chandrika
Nitheesh Chandrika

Reputation: 141

Pygtk WebKit get source html

Here is my sample code. How do I get the html source code of the current page. It only prints 'GString at 0x8875130' . How to convert it to real text contains html?

from gi.repository import WebKit
from gi.repository import Gtk, Gdk

def get_source(webobj, frame):
    print "loading..."
    x = web.get_main_frame().get_data_source().get_data()
    print x

win = Gtk.Window()

web = WebKit.WebView()
web.open("http://google.com")
web.connect("load-finished", get_source)

win.add(web)

win.show_all()

Gtk.main()

Upvotes: 2

Views: 2428

Answers (3)

Cedar Hamilton
Cedar Hamilton

Reputation: 21

#Before you can use the require_version() method from gi, you need to import the gi module.
import gi

#Specify versions to import from the repository.
gi.require_version('Gtk','3.0')
gi.require_version('WebKit','3.0')

#Import the modules that will give us a Graphical User Interface (GUI) and a WebKit Browser.
from gi.repository import Gtk,WebKit

#Define your function to handle the WebKit's "load-finished" event.  The webobj is a reference to the WebKit that triggered the event.  The frame is which frame triggered the event (useful if the loaded page has multiple frames like a frameset.
def ShowSource(webobj,frame):
    #What you have printed is what results from this line.  This line returns a reference to an object, so when you print it's return value, a description is all Python knows to print.
    SourceCodeStringObject=frame.get_data_source().get_data()

    #You can get the text the object is carrying from it's "str" member property like I do below.
    SourceCodeStringText=SourceCodeStringObject.str

    #Send the source code string text to the output stream.
    print(SourceCodeStringText)

#Create Window object.
Window=Gtk.Window()

#Set the text to display in the window's caption.
Window.set_title("Test of Python GTK and WebKit")

#Set the starting window size in pixels.
Window.set_default_size(480,320)

#Create the WebView object.
WebBrowser=WebKit.WebView()

#Tell the WebView object to load a website.
WebBrowser.open("https://stackoverflow.com/questions/24119290/pygtk-webkit-get-source-html")

#Set the event handler for the WebView's "load-finished" event to the function we have above.
WebBrowser.connect("load-finished",ShowSource)

#Add the WebView to the window.
Window.add(WebBrowser)

#Set the handler of the window closing to cause GTK to exit.  Without this, GTK will hang when it quits, because it's main loop that we start later will still be running.  Gtk.main_quit will stop the main loop for GTK.
Window.connect("delete-event",Gtk.main_quit)

#Display the window.
Window.show_all()

#Start GTK's main loop.
Gtk.main()

Upvotes: 2

José Brito
José Brito

Reputation: 1

This way works for me.

#!/usr/bin/env python
import webkit, gtk

def get_source(webobj, frame):
    print "loading..."
    x = web.get_main_frame().get_data_source().get_data()
    print x

win = gtk.Window()
win.set_position(gtk.WIN_POS_CENTER_ALWAYS)
win.resize(1024,768)
win.connect('destroy', lambda w: gtk.main_quit())
win.set_title('Titulo')
vbox = gtk.VBox(spacing=5)
vbox.set_border_width(5)
web = webkit.WebView()
vbox.pack_start(web, fill=True, expand=True)

web = webkit.WebView()
web.open("http://www.google.co.ve")
web.connect("load-finished", get_source)

browser_settings = web.get_settings()
browser_settings.set_property('user-agent', 'Mozilla/5.0 (X11; Linux i586; rv:31.0) Gecko/20100101 Firefox/31.0')
browser_settings.set_property('enable-default-context-menu', True)
browser_settings.set_property('enable-accelerated-compositing', True)
browser_settings.set_property('enable-file-access-from-file-uris', True)
web.set_settings(browser_settings)

win.add(web)
win.show_all()
gtk.main()

Upvotes: 0

user871199
user871199

Reputation: 1500

print x.str

Data is available as .str member of GLib.String object. For further details try help(GLib.String) on python prompt after importing libraries.

Upvotes: 3

Related Questions