Reputation: 2607
I've made a little program that scrapes links from yahoo with any keyword or phrase you want and as far as you want. I added an input at the end to ask the user if they want to input more terms and scrape more links (basically restart the program without them exiting and clicking again if they need to do more than one keyword). how would I make my code restart itself?
import bs4
import requests
import time
import lxml
from colorama import Fore, init
init()
print(Fore.RED + "▓██ ██▓ ▄▄▄ ██░ ██ ▒█████ ▒█████ ██████ ██▓ ▄▄▄ ░▒████ ██▓▓█████ ██▀███ ")
print(" ▒██ ██▒▒████▄ ▓██░ ██▒▒██▒ ██▒▒██▒ ██▒ ▒██ ▒ ▓██▒ ▒████▄ ▒██ ██░░█ ▀ ▓██ ▒ ██▒")
print(" ▒██ ██░▒██ ▀█▄ ▒██▀▀██░▒██░ ██▒▒██░ ██▒ ░ ▓██▄ ▒██░ ▒██ ▀█▄ ▒██ ██░▒███ ▓██ ░▄█ ▒")
print(" ░ ▐██▓░░██▄▄▄▄██ ░▓█ ░██ ▒██ ██░▒██ ██░ ▒ ██▒▒██░ ░██▄▄▄▄██ ░ ▐██▓░▒██ ▄ ▒██▀▀█▄ ")
print(" ░ ██▒▓░ ▓█ ▓██▒░▓█▒░██▓░ ████▓▒░░ ████▓▒░ ▒██████▒▒░██████▒▓█ ▓██▒ ░ ██▒▓░░█████▒░██▓ ▒██▒")
print(" ██▒▒▒ ▒▒ ▓▒█░ ▒ ░░▒░▒░ ▒░▒░▒░ ░ ▒░▒░▒░ ▒ ▒▓▒ ▒ ░░ ▒░▓ ░▒▒ ▓▒█░ ██▒▒▒ ░░ ▒░ ░░ ▒▓ ░▒▓░")
print(" ▓██ ░▒░ ▒ ▒▒ ░ ▒ ░▒░ ░ ░ ▒ ▒░ ░ ▒ ▒░ ░ ░▒ ░ ░░ ░ ▒ ░ ▒ ▒▒ ░▓██ ░▒░ ░ ░ ░ ░▒ ░ ▒░")
print(" ▒ ▒ ░░ ░ ▒ ░ ░░ ░░ ░ ░ ▒ ░ ░ ░ ▒ ░ ░ ░ ░ ░ ░ ▒ ▒ ▒ ░░ ░ ░░ ░ ")
print(" ░ ░ ░ ░ ░ ░ ░ ░ ░ ░ ░ ░ ░ ░ ░ ░░ ░ ░ ░ ░ ")
print(" ░ ░ ░ ░ ")
print("")
print("The Link Scraper For Yahoo")
print("")
print("Created By IronKey | V1.0 beta | python 3.7.4")
print("")
your_keyword = input("enter your dork or search term : ")
print("")
linkNumber = int( input("link depth? ") ) # <--- Raises Error if input is not a number
lst = []
# https://search.yahoo.com/search?p=bruhmoment&pz=10&ei=UTF-8&fr=yfp-t&bct=0&fp=1&b=31&pz=10&bct=0&xargs=0
link = "https://search.yahoo.com/search?p={}&pz=10&ei=UTF-8&fr=yfp-t&bct=0&fp=1&b={}&bct=0&xargs=0"
for i in range( linkNumber ):
lst.append( link.format( your_keyword, 1 + (i+1)*10 ) )
print("")
print( lst )
print("")
print("beginning search... ")
print("")
for x in tqdm(range(i)):
time.sleep(0.0001)
for i in lst:
source = requests.get(i)
bs4call = bs4.BeautifulSoup(source.text, "lxml")
links = bs4call.findAll('a', {'class':'ac-algo fz-l ac-21th lh-24'})
for link in links:
with open('links.txt', 'a') as out:
out.write(str(link['href']) + "\n")
print("links scraped and saved to file : links.txt")
print("")
deepParseInput = input("would you like to run more dorks? : ")
print("")
if deepParseInput == "y" or deepParseInput == "Y":
reload()
else:
print("I really hope you liked my program! Bye Bye - IronKey")
print("")
input("Press ENTER to leave the program --> ")
#for link in links:
#with open('baselinks.txt', 'a') as out:
#out.write(str(link['href']) + "\n")
print("")
deepParseInput = input("would you like to run more dorks? : ")
print("")
if deepParseInput == "y" or deepParseInput == "Y":
#restart the program
else:
print("I really hope you liked my program! Bye Bye - IronKey")
print("")
input("Press ENTER to leave the program --> ")
Upvotes: 1
Views: 63
Reputation: 352
A good idea would be transforming your code into a function and call the function in case the user wants to restart the program.
Upvotes: 3