Bocui
Bocui

Reputation: 469

Using tkinter to input into a variable, to be called

I'm currently working on a scraper-sort of program, which will enter a Wikipedia page, and in its current form, will scrape the references from the page.

I'd like to have a gui that will allow the user to input a Wikipedia page. I want the input to be attached to the selectWikiPage variable, but have had no luck as of far.

Below is my current code.

import requests
from bs4 import BeautifulSoup
import re
from tkinter import *

#begin tkinter gui
def show_entry_fields():
   print("Wikipedia URL: %s" % (e1.get()))
   e1.delete(0,END)
master = Tk()
Label(master, text="Wikipedia URL").grid(row=0)
e1 = Entry(master)
e1.insert(10,"http://en.wikipedia.org/wiki/randomness")
e1.grid(row=0, column=1)
Button(master, text='Scrape', command=master.quit).grid(row=3, column=0, sticky=W, pady=4)
mainloop( )

session = requests.Session()
selectWikiPage = input(print("Please enter the Wikipedia page you wish to scrape from"))
if "wikipedia" in selectWikiPage:
    html = session.post(selectWikiPage)
    bsObj = BeautifulSoup(html.text, "html.parser")

    findReferences = bsObj.find('ol', {'class': 'references'}) #isolate refereces section of page
    href = BeautifulSoup(str(findReferences), "html.parser")
    links = [a["href"] for a in href.find_all("a", href=True)]

    for link in links:
        print("Link: " + link)

else:
    print("Error: Please enter a valid Wikipedia URL")

Many thanks in advance.

Upvotes: 13

Views: 1479

Answers (2)

Deepayan Ghosh
Deepayan Ghosh

Reputation: 185

The code was almost correct. A few modifications were enough. Hope this helps. Please comment if you need any further clarifications.

    import requests
    from bs4 import BeautifulSoup
    import re
    from tkinter import *


    # you can declare selectWikiPage and master to be global and then avoid passing them around

    # begin tkinter gui
    def show_entry_fields():
        print("Wikipedia URL: %s" % (e1.get()))
        e1.delete(0, END)

    #utility which selects all the text from start to end in widget
    def select_all(event=None):
        event.widget.select_range(0, 'end')
        return 'break'


    #the code is same, just obtained the URL from the widget using get()

    def custom_scrape(e1, master):
        session = requests.Session()
        # selectWikiPage = input("Please enter the Wikipedia page you wish to scrape from")
        selectWikiPage = e1.get()
        if "wikipedia" in selectWikiPage:
            html = session.post(selectWikiPage)
            bsObj = BeautifulSoup(html.text, "html.parser")
            findReferences = bsObj.find('ol', {'class': 'references'})  # isolate refereces section of page
            href = BeautifulSoup(str(findReferences), "html.parser")
            links = [a["href"] for a in href.find_all("a", href=True)]
            for link in links:
                print("Link: " + link)
        else:
            print("Error: Please enter a valid Wikipedia URL")

        master.quit()


    master = Tk()
    Label(master, text="Wikipedia URL").grid(row=0)
    e1 = Entry(master)

    #bind ctrl+a for selecting all the contents of Entry widget
    e1.bind('<Control-a>', select_all)
    e1.insert(10, "Enter a wikipedia URL")
    e1.grid(row=0, column=1)

    #here, command attribute takes a lambda which itself doesnot take any args,
    #but in turn calls another function which accepts
    #arguments
    #Refer to: https://stackoverflow.com/questions/22723039/python-tkinter-passing-values-with-a-button for details

    Button(master, text='Scrape', command=lambda: custom_scrape(e1, master)).grid(row=3, column=0, sticky=W, pady=4)
    mainloop()

Upvotes: 2

Reblochon Masque
Reblochon Masque

Reputation: 36732

Here is a diminutive example based on your code; it allows to use the entry field to capture the value of the wiki page to visit, and prints it on the console.
You can then use this url to proceed with your scraping.

from tkinter import *

def m_quit():
    global wiki_url
    wiki_url += e1.get() + '/'
    print('quitting')
    master.destroy()

wiki_url = 'http://en.wikipedia.org/wiki/'    

master = Tk()
Label(master, text="Wikipedia URL").grid(row=0)

e1 = Entry(master)
e1.grid(row=0, column=1)

Button(master, text='Scrape', command=m_quit).grid(row=3, column=0, sticky=W, pady=4)

mainloop()

print(wiki_url)

Upvotes: 8

Related Questions