Tahereh Maghsoudi
Tahereh Maghsoudi

Reputation: 171

When I scrap the Glassdoor reviews, It returns just the 1st page

I am going to extract reviews on companies on Glassdoor. I am using Selenium and the below code! I have a problem in extracting reviews from all pages. The code returns only the reviews in the 1st page! It moves to other pages!" Here, I put a loop to check for 4 pages, but I have reviews from 1st page JUST! I would appreciate if you could help!

from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException, WebDriverException
from selenium.webdriver.chrome.options import Options
import time
import pandas as pd  
from selenium.webdriver.common.action_chains import ActionChains


driver_path= r"C:\Users\TMaghsoudi\Desktop\chromedriver_win32.exe"

# chrome options
options = webdriver.ChromeOptions()
# options.add_argument("--start-maximized")
options.add_argument('--ignore-certificate-errors')
options.add_argument('--ignore-ssl-errors')
options.add_experimental_option('excludeSwitches', ['enable-logging'])

Pros =[]
Cons=[]

driver = webdriver.Chrome(driver_path, chrome_options=options)

# set driver
driver = webdriver.Chrome(driver_path, chrome_options=options)

# get url
url = "https://www.glassdoor.co.in/Job/index.htm"
driver.get(url)

time.sleep(3)
driver.find_element(By.CLASS_NAME, "HeaderStyles__signInButton").click()

time.sleep(5)

Enter_email= driver.find_element(By.ID, "modalUserEmail")
Enter_email.send_keys("XXXXXX")
Enter_email.send_keys(Keys.ENTER)
Enter_pass= driver.find_element(By.ID,"modalUserPassword")
Enter_pass.send_keys("XXXXXX")


SingIn= WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.XPATH, "//div[@class='d-flex align-items-center flex-column']/button[@class='gd-ui-button mt-std minWidthBtn css-1dqhu4c evpplnh0']")))
SingIn.click()



time.sleep(5)
driver.set_window_size(1120, 1000)
driver.find_element(By.CLASS_NAME,"siteHeader__HeaderStyles__navigationItem:nth-child(2)").click()

Company=driver.find_element(By.ID,"sc\.keyword")
Company.send_keys("Amazon")
Company.send_keys(Keys.ENTER)

time.sleep(5)
driver.find_element(By.CLASS_NAME, "px+ .module .reviews .eiHeaderLink").click()

#####Extract review####################


time.sleep(2)

Page_count=0
htmlelement=driver.find_element(By.TAG_NAME,"html")
################################
for i in range (1,4):
        ADVs = driver.find_elements(By.CLASS_NAME,"v2__EIReviewDetailsV2__fullWidth:nth-child(1) span") 
        for Ads in ADVs:
                Pros.append(Ads.text)
        DisADVS = driver.find_elements(By.CLASS_NAME,"v2__EIReviewDetailsV2__fullWidth+ .v2__EIReviewDetailsV2__fullWidth span") 
        for DisAd in DisADVS:
                Cons.append(DisAd.text)
                
        element= WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.CLASS_NAME, "mb-std h2")))      
        actions = ActionChains(driver)
        actions.move_to_element(element).perform()
        WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.CLASS_NAME, "nextButton"))).click()
        htmlelement.send_keys(Keys.HOME)
##########################################################  

  

Upvotes: 0

Views: 463

Answers (1)

Tahereh Maghsoudi
Tahereh Maghsoudi

Reputation: 171

I have just put a time.sleep(20) after last line in the loop.

Upvotes: 1

Related Questions