Achikam Mor
Achikam Mor

Reputation: 49

can't open an URL neither with BeautifulSoup or request

i'm trying to open an URL with 2 kinds of options (that already worked for me in with different sites) but both of them aren't working.

from urllib.request import urlopen as ureq
import requests
from bs4 import BeautifulSoup


def main():
    url = "https://www.amazon.com/s?k=black+watch&s=review-rank&qid=1568506943&ref=sr_st_review-rank"
    client = ureq(url)
    page = client.read()
    client.close()
    amazon_soup = BeautifulSoup(page, "html.parser")
    # amazon_soup = get_page(url)
    print(amazon_soup)


def get_page(url):
    try:
        response = requests.get(url)
        if not response.ok:
            print(('server responded', response.status_code))
        else:
            soup = BeautifulSoup(response.text, 'html.parser')
    except:
        print("oops")
        requests.exceptions.ConnectionError
        return

    return soup


if __name__ == "__main__":
    main()

it suppose to print the html code for this searching but i keep getting "urllib.error.HTTPError: HTTP Error 503: Service Unavailable " even if i try the other way (that marked as a comment for now) i get 503 Error

Upvotes: 0

Views: 958

Answers (1)

AviKKi
AviKKi

Reputation: 1204

You need to attach proper user-agent header to your request, have a look at this snippet Websites like amazon and reddit expect you to have proper useragent or else they'll respond with 503.

from urllib.request import urlopen, Request
import requests
from bs4 import BeautifulSoup


def main():
    url = "https://www.amazon.com/s?k=black+watch&s=review-rank&qid=1568506943&ref=sr_st_review-rank"
    req = Request(url)
    req.add_header('user-agent', 'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/534.20 (KHTML, like Gecko) Chrome/11.0.672.2 Safari/534.20')
    client = urlopen(req)
    page = client.read()
    client.close()
    amazon_soup = BeautifulSoup(page, "html.parser")
    # amazon_soup = get_page(url)
    print(amazon_soup)


def get_page(url):
    try:
        response = requests.get(url)
        if not response.ok:
            print(('server responded', response.status_code))
        else:
            soup = BeautifulSoup(response.text, 'html.parser')
    except:
        print("oops")
        requests.exceptions.ConnectionError
        return

    return soup


if __name__ == "__main__":
    main()

Upvotes: 1

Related Questions