Reputation: 11
I am trying to scrape to basic information from this website using selenium. I am using the following piece of code(below). But All I am getting is "Access Denied" in pretty little HTML format. Am I doing something wrong?
import time
from selenium import webdriver
from bs4 import BeautifulSoup
from urllib.parse import urljoin
import requests # Simpler HTTP requests
from bs4 import BeautifulSoup # Python package for pulling data out of HTML
##### Web scraper for infinite scrolling page #####
driver = webdriver.Chrome(executable_path=r"E:\Chromedriver\chromedriver_win32_chrome83\chromedriver.exe")
driver.get("https://www.zomato.com/pune/delivery?rating_range=4.0-5.0")
time.sleep(2) # Allow 2 seconds for the web page to open
scroll_pause_time = 1 # You can set your own pause time. My laptop is a bit slow so I use 1 sec
screen_height = driver.execute_script("return window.screen.height;") # get the screen height of the web
i = 1
while True:
# scroll one screen height each time
driver.execute_script("window.scrollTo(0, {screen_height}*{i});".format(screen_height=screen_height, i=i))
i += 1
time.sleep(scroll_pause_time)
# update scroll height each time after scrolled, as the scroll height can change after we scrolled the page
scroll_height = driver.execute_script("return document.body.scrollHeight;")
# Break the loop when the height we need to scroll to is larger than the total scroll height
if (screen_height) * i > scroll_height:
break
url = 'https://www.zomato.com/pune/delivery?rating_range=4.0-5.0'
url_text = requests.get(url).text # Get the session text for the link
url_soup = BeautifulSoup(url_text, 'html.parser') # Get data from the HTML
print(url_soup)
Upvotes: 0
Views: 123
Reputation: 15619
You're getting "Access Denied" when querying the website with Python requests. If you add a user-agent to Python Requests it will work.
import requests
from bs4 import BeautifulSoup
http_headers = {'user-agent': 'Mozilla/5.0 (iPhone; CPU iPhone OS 12_0 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) FxiOS/13.2b11866 Mobile/16A366 Safari/605.1.15'}
page = requests.get("https://www.zomato.com/pune/delivery?rating_range=4.0-5.0",headers=http_headers, allow_redirects=True, verify=True, timeout=30)
soup = BeautifulSoup(page.content, 'html.parser')
print(soup)
P.S. You don't need to use Python Requests or BeautifulSoup with selenium
Upvotes: 1