Data Mining IMDB Reviews - Only extracting the first 25 reviews

Question

I am currently trying to extract all the reviews on Spiderman Homecoming movie but I am only able to get the first 25 reviews. I was able to load more in IMDB to get all the reviews as originally it only shows the first 25 but for some reason I am unable to mine all the reviews after every review has been loaded. Does anyone know what I am doing wrong?

Below is the code I am running:

import requests
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
import pandas as pd
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
from textblob import TextBlob
import time
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By


#Set the web browser
driver = webdriver.Chrome(executable_path=r"C:\Users\Kent_\Desktop\WorkStudy\chromedriver.exe")

#Go to Google
driver.get("https://www.imdb.com/title/tt6320628/reviews?ref_=tt_urv")

#Loop load more button
wait = WebDriverWait(driver,10)
while True:
    try:
        driver.find_element_by_css_selector("button#load-more-trigger").click()
        wait.until(EC.invisibility_of_element_located((By.CSS_SELECTOR,".ipl-load-more__load-indicator")))
        soup = BeautifulSoup(driver.page_source, 'lxml')
    except Exception:break


#Scrape IMBD review
ans = driver.current_url
page = requests.get(ans)
soup = BeautifulSoup(page.content, "html.parser")
all = soup.find(id="main")

#Get the title of the movie
all = soup.find(id="main")
parent = all.find(class_ ="parent")
name = parent.find(itemprop = "name")
url = name.find(itemprop = 'url')
film_title = url.get_text()
print('Pass finding phase.....')

#Get the title of the review
title_rev = all.select(".title")
title = [t.get_text().replace("
", "") for t in title_rev]
print('getting title of reviews and saving into a list')

#Get the review
review_rev = all.select(".content .text")
review = [r.get_text() for r in review_rev]
print('getting content of reviews and saving into a list')

#Make it into dataframe
table_review = pd.DataFrame({
    "Title" : title,
    "Review" : review
})
table_review.to_csv('Spiderman_Reviews.csv')

print(title)
print(review)

Data Mining IMDB Reviews - Only extracting the first 25 reviews

Answers (1)

Related Questions