Reputation: 23
I'm working on Scraping Mobile Legend Comment Data in https://play.google.com/. I want my bot to be able to scroll down by itself and load some comment as much as possible. After the bot finish that i want, this bot Scraping all the comment.
The Problem is when the bot do the infinite Scroll down and click the "Showmore" Button, somehow the second click of "Showmore" button gived me error ([7200:8128:0903/172837.024:ERROR:gpu_init.cc(441)] Passthrough is not supported, GL is disabled) and the looping is
break
.
from selenium import webdriver
from time import sleep
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import Select
from selenium.webdriver.chrome.options import Options
import pandas as pd
#activate GL first
options = Options()
#options.add_argument("--kiosk")#fullscreen
options.add_argument('--enable-webgl-draft-extensions')
driver = webdriver.Chrome('D:\chromedriver', chrome_options = options)
driver.maximize_window()
print("WebGL Activated")
#open google play
driver.get("https://play.google.com/store/apps/details?id=com.mobile.legends&showAllReviews=true")
sleep(10)
action = ActionChains(driver)
# Get scroll height
last_height = driver.execute_script("return document.body.scrollHeight")
SCROLL_PAUSE_TIME = 10
#this variable limit the infinite looping
click = 0
while not(click == 100):
# Scroll down to bottom
driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
# Wait to load page
sleep(SCROLL_PAUSE_TIME)
# Calculate new scroll height and compare with last scroll height
new_height = driver.execute_script("return document.body.scrollHeight")
if new_height == last_height:
try:
sleep(7)
showMore = driver.find_element_by_class_name("U26fgb.O0WRkf.oG5Srb.C0oVfc.n9lfJ.M9Bg4d")
action.move_to_element(showMore)
action.click(showMore)
action.perform()
sleep(10)
print("Click Showmore "+str(click))
click += 1
except:
print("------Scroll Finish-------")
print("Click ShowMore Counts = " + str(click))
break
last_height = new_height
This is output of terminal:
WebGL Activated
[5296:3760:0903/174720.883:ERROR:device_event_log_impl.cc(214)] [17:47:20.883] USB: usb_device_handle_win.cc:1048 Failed to read descriptor from node connection: A device attached to the system is not functioning. (0x1F)
[5296:3760:0903/174720.889:ERROR:device_event_log_impl.cc(214)] [17:47:20.889] Bluetooth: bluetooth_adapter_winrt.cc:713 GetBluetoothAdapterStaticsActivationFactory failed: Class not registered (0x80040154)
Click Showmore 0
[9052:7864:0903/174913.082:ERROR:gpu_init.cc(441)] Passthrough is not supported, GL is disabled
------Scroll Finish-------
Click ShowMore Counts = 1
Upvotes: 2
Views: 528
Reputation: 3433
This is a very lengthy code. But it keeps scrolling down if showmore_count
is removed.
Have added relevant comments too.
from selenium import webdriver
import time
driver = webdriver.Chrome(executable_path="path")
driver.maximize_window()
driver.implicitly_wait(10)
driver.get("https://play.google.com/store/apps/details?id=com.ucool.heroesarena&showAllReviews=true") # The URL provided in the question was not working for me, so choose this one.
time.sleep(5) # So that the page loads completely
j=0
showmore_count = 1 # Using this to stop scrolling down.
try:
while True:
reviews = driver.find_elements_by_xpath("//div[@jsname='fk8dgd']/div")
# time.sleep(.3)
driver.execute_script("arguments[0].scrollIntoView(true);", reviews[j])
driver.execute_script("window.scrollBy(0,-50)")
print("{}: {}".format(j+1, reviews[j].find_element_by_xpath(".//span[@class='X43Kjb']").text)) # prints the reviewers name. If j+1 is replaced with j, the Output prints from 0.
j += 1
except IndexError: # After all the comments are loaded jth element throws Indexerror. And "Show more" button appears. So in except block repeat the process.
while driver.find_element_by_xpath("//span[text()='Show More']").is_displayed() and showmore_count <=2:
driver.find_element_by_xpath("//span[text()='Show More']").click()
print("Clicked Show more {} time".format(showmore_count))
showmore_count+=1
time.sleep(5)
try: # Again jth element will throw IndexError, but clicking on "show more" will make it continue. So just pass in except.
while True:
reviews = driver.find_elements_by_xpath("//div[@jsname='fk8dgd']/div")
# time.sleep(.3)
driver.execute_script("arguments[0].scrollIntoView(true);", reviews[j])
driver.execute_script("window.scrollBy(0,-50)")
print("{}: {}".format(j, reviews[j].find_element_by_xpath(".//span[@class='X43Kjb']").text))
j += 1
except:
pass
except Exception as e:
print(e)
driver.quit()
Output is somewhat like this:
1: Reviewer name1
2: Reviewer name2
3: Reviewer name3
...
520: Reviewer name520
Upvotes: 1
Reputation: 415
Here is my approach. Using END key 8 times and clicking show more... There are too many comments tho... Let me know if you'll need any help scraping comments.
from selenium import webdriver
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.action_chains import ActionChains
from selenium.common.exceptions import NoSuchElementException
import time
url = "https://play.google.com/store/apps/details?id=com.mobile.legends&showAllReviews=true"
d = webdriver.Chrome(ChromeDriverManager().install())
d.get(url)
while True:
for _ in range(8):
actions = ActionChains(d)
actions.send_keys(Keys.END).perform()
time.sleep(1)
try:
d.find_element_by_class_name("CwaK9").click()
except NoSuchElementException:
break
Upvotes: 1