Reputation: 171
I am going to extract reviews on companies on Glassdoor. I am using Selenium and the below code! I have a problem in extracting reviews from all pages. The code returns only the reviews in the 1st page! It moves to other pages!" Here, I put a loop to check for 4 pages, but I have reviews from 1st page JUST! I would appreciate if you could help!
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException, WebDriverException
from selenium.webdriver.chrome.options import Options
import time
import pandas as pd
from selenium.webdriver.common.action_chains import ActionChains
driver_path= r"C:\Users\TMaghsoudi\Desktop\chromedriver_win32.exe"
# chrome options
options = webdriver.ChromeOptions()
# options.add_argument("--start-maximized")
options.add_argument('--ignore-certificate-errors')
options.add_argument('--ignore-ssl-errors')
options.add_experimental_option('excludeSwitches', ['enable-logging'])
Pros =[]
Cons=[]
driver = webdriver.Chrome(driver_path, chrome_options=options)
# set driver
driver = webdriver.Chrome(driver_path, chrome_options=options)
# get url
url = "https://www.glassdoor.co.in/Job/index.htm"
driver.get(url)
time.sleep(3)
driver.find_element(By.CLASS_NAME, "HeaderStyles__signInButton").click()
time.sleep(5)
Enter_email= driver.find_element(By.ID, "modalUserEmail")
Enter_email.send_keys("XXXXXX")
Enter_email.send_keys(Keys.ENTER)
Enter_pass= driver.find_element(By.ID,"modalUserPassword")
Enter_pass.send_keys("XXXXXX")
SingIn= WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.XPATH, "//div[@class='d-flex align-items-center flex-column']/button[@class='gd-ui-button mt-std minWidthBtn css-1dqhu4c evpplnh0']")))
SingIn.click()
time.sleep(5)
driver.set_window_size(1120, 1000)
driver.find_element(By.CLASS_NAME,"siteHeader__HeaderStyles__navigationItem:nth-child(2)").click()
Company=driver.find_element(By.ID,"sc\.keyword")
Company.send_keys("Amazon")
Company.send_keys(Keys.ENTER)
time.sleep(5)
driver.find_element(By.CLASS_NAME, "px+ .module .reviews .eiHeaderLink").click()
#####Extract review####################
time.sleep(2)
Page_count=0
htmlelement=driver.find_element(By.TAG_NAME,"html")
################################
for i in range (1,4):
ADVs = driver.find_elements(By.CLASS_NAME,"v2__EIReviewDetailsV2__fullWidth:nth-child(1) span")
for Ads in ADVs:
Pros.append(Ads.text)
DisADVS = driver.find_elements(By.CLASS_NAME,"v2__EIReviewDetailsV2__fullWidth+ .v2__EIReviewDetailsV2__fullWidth span")
for DisAd in DisADVS:
Cons.append(DisAd.text)
element= WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.CLASS_NAME, "mb-std h2")))
actions = ActionChains(driver)
actions.move_to_element(element).perform()
WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.CLASS_NAME, "nextButton"))).click()
htmlelement.send_keys(Keys.HOME)
##########################################################
Upvotes: 0
Views: 463
Reputation: 171
I have just put a time.sleep(20) after last line in the loop.
Upvotes: 1