Reputation: 49
i'm trying to open an URL with 2 kinds of options (that already worked for me in with different sites) but both of them aren't working.
from urllib.request import urlopen as ureq
import requests
from bs4 import BeautifulSoup
def main():
url = "https://www.amazon.com/s?k=black+watch&s=review-rank&qid=1568506943&ref=sr_st_review-rank"
client = ureq(url)
page = client.read()
client.close()
amazon_soup = BeautifulSoup(page, "html.parser")
# amazon_soup = get_page(url)
print(amazon_soup)
def get_page(url):
try:
response = requests.get(url)
if not response.ok:
print(('server responded', response.status_code))
else:
soup = BeautifulSoup(response.text, 'html.parser')
except:
print("oops")
requests.exceptions.ConnectionError
return
return soup
if __name__ == "__main__":
main()
it suppose to print the html code for this searching but i keep getting "urllib.error.HTTPError: HTTP Error 503: Service Unavailable " even if i try the other way (that marked as a comment for now) i get 503 Error
Upvotes: 0
Views: 958
Reputation: 1204
You need to attach proper user-agent header to your request, have a look at this snippet Websites like amazon and reddit expect you to have proper useragent or else they'll respond with 503.
from urllib.request import urlopen, Request
import requests
from bs4 import BeautifulSoup
def main():
url = "https://www.amazon.com/s?k=black+watch&s=review-rank&qid=1568506943&ref=sr_st_review-rank"
req = Request(url)
req.add_header('user-agent', 'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/534.20 (KHTML, like Gecko) Chrome/11.0.672.2 Safari/534.20')
client = urlopen(req)
page = client.read()
client.close()
amazon_soup = BeautifulSoup(page, "html.parser")
# amazon_soup = get_page(url)
print(amazon_soup)
def get_page(url):
try:
response = requests.get(url)
if not response.ok:
print(('server responded', response.status_code))
else:
soup = BeautifulSoup(response.text, 'html.parser')
except:
print("oops")
requests.exceptions.ConnectionError
return
return soup
if __name__ == "__main__":
main()
Upvotes: 1