Reputation: 105
I'm trying to build a GUI over some existing code and I'm running into a MissingSchema
error. I am aware of the general problem but not the best solution.
Basically, before the tkinter mainloop()
I'm trying to make a requests
module request in order to create a BeautifulSoup object which is needed for a number of functions. However, to make that request I need a filled url
variable with a url of the user's choosing; however, this variable cannot be filled until after mainloop()
executes. Consequently the requests
call fails as the url is empty, giving me the MissingSchema
error. You can run the below code to see what I mean:
from tkinter import *
from tkinter import scrolledtext as st
import requests
import re
from bs4 import BeautifulSoup
root = Tk()
url_entry = Entry(root)
url = url_entry.get()
log_text = st.ScrolledText(root, state='disabled')
start_button = Button(root, text='Run program', command=lambda: [seo_find_stopwords(urlSoup)])
url_entry.grid(column=0, row=1)
log_text.grid(column=2, row=0, rowspan=3)
start_button.grid(column=1, row=5)
agent = "Mozilla/5.0 (Windows NT 6.3; Win64; x64; rv:10.0) Gecko/20100101 Firefox/10.0"
# attempts to access provided URL, returns errors if unable
try:
# 'agent' added as part of effort to avoid HTTP Error 403: Forbidden
url_request = requests.get(url, headers={'User-Agent': agent})
url_request.raise_for_status()
urlSoup = BeautifulSoup(url_request.text, 'lxml')
except requests.exceptions.MissingSchema as exc:
log_text.insert(INSERT, "ERROR: Invalid URL provided. Please try again with a valid URL.")
raise exc
# searches HTML page title for SEO stop words from stopwords.txt, then provides number and list of present stop words
def seo_find_stopwords(urlSoup):
stopwords_count = 0
stopwords_list = []
if urlSoup.title:
with open('stopwords.txt', 'r', encoding='utf-8') as file:
for line in file:
if re.search(r'\b' + line.rstrip('\n') + r'\b', urlSoup.title.text.casefold()):
stopwords_count += 1
stopwords_list.append(line.rstrip('\n'))
if stopwords_count > 0:
log_text.insert(INSERT, "{0} stop words were found in your page title. If possible, it would be good to "
"reduce them. The stop words found are: {1}".format(stopwords_count, stopwords_list))
root.mainloop()
Sorry if this is a bit large, I tried condensing it as much as possible. I'd like to know what the best way to rectify this error is. I am of the impression that it may be to put the portion regarding making the requests.get()
call into a function and use that to return the urlSoup
somehow to be used in the functions that need it.
Upvotes: 0
Views: 251
Reputation: 3275
You are attempting to get url even before the user has tried to enter any. So place url request in a function and call it when the Entry
widget has text or bind the event handler to a button
Here is a demo.(you can either press the enter key or the run button after inserting text in the Entry
widget)
from tkinter import *
import requests
from tkinter import scrolledtext as st
import re
from bs4 import BeautifulSoup
# searches HTML page title for SEO stop words from stopwords.txt, then provides number and list of present stop words
def seo_find_stopwords(urlSoup):
stopwords_count = 0
stopwords_list = []
print('No')
if urlSoup.title:
with open('stopwords.txt', 'r', encoding='utf-8') as file:
for line in file:
if re.search(r'\b' + line.rstrip('\n') + r'\b', urlSoup.title.text.casefold()):
stopwords_count += 1
stopwords_list.append(line.rstrip('\n'))
if stopwords_count > 0:
log_text.insert(INSERT, "{0} stop words were found in your page title. If possible, it would be good to "
"reduce them. The stop words found are: {1}".format(stopwords_count, stopwords_list))
def request_url(event=None):
global urlSoup
try:
# 'agent' added as part of effort to avoid HTTP Error 403: Forbidden
url_request = requests.get(url_entry.get(), headers={'User-Agent': agent})
url_request.raise_for_status()
urlSoup = BeautifulSoup(url_request.text, 'lxml')
except requests.exceptions.MissingSchema as exc:
log_text.insert(INSERT, "ERROR: Invalid URL provided. Please try again with a valid URL.")
raise exc
root = Tk()
urlSoup =''
url_entry = Entry(root)
url_entry.bind('<Return>', request_url)
#url = url_entry.get()
log_text = st.ScrolledText(root, state='disabled')
start_button = Button(root, text='Run program', command=lambda: request_url() or [seo_find_stopwords(urlSoup)])
url_entry.grid(column=0, row=1)
log_text.grid(column=2, row=0, rowspan=3)
start_button.grid(column=1, row=5)
agent = "Mozilla/5.0 (Windows NT 6.3; Win64; x64; rv:10.0) Gecko/20100101 Firefox/10.0"
# attempts to access provided URL, returns errors if unable
root.mainloop()
Upvotes: 2