SMTH
SMTH

Reputation: 95

Selenium script captures first few items out of 100

I've rceated a script to scrape different collection names from a webpage traversing multiple pages. The script can parse first 13 names from each page out of 100 names. One such collection name looks like Pudgy Penguins. How can I capture 100 names instead of only 13 from different pages of that site using selenium?

from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.common.exceptions import TimeoutException
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

link = "https://opensea.io/rankings"

def scroll_to_the_bottom():
    check_height = driver.execute_script("return document.body.scrollHeight;")
    while True:
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
        try:
            WebDriverWait(driver,5).until(lambda driver: driver.execute_script("return document.body.scrollHeight;")  > check_height)
            check_height = driver.execute_script("return document.body.scrollHeight;") 
        except TimeoutException:
             break


def get_collection_names(link):
    driver.get(link)

    while True:
        scroll_to_the_bottom()
        for item in WebDriverWait(driver,10).until(EC.visibility_of_all_elements_located((By.CSS_SELECTOR,"[role='listitem'] [class$='Ranking--row']"))):
            collection_name = WebDriverWait(item,10).until(EC.visibility_of_element_located((By.CSS_SELECTOR,"[class$='Ranking--collection-name']"))).text
            yield collection_name

        try:
            button = WebDriverWait(driver,10).until(EC.presence_of_element_located((By.XPATH,"//button[contains(@class,'Buttonreact__StyledButton')][./i[contains(.,'arrow_forward')]]")))
            driver.execute_script('arguments[0].click();',button)
            WebDriverWait(driver,10).until(EC.staleness_of(item))
        except Exception as e:
            return

if __name__ == '__main__':
    driver = webdriver.Chrome()
    for collection_name in get_collection_names(link):
        print(collection_name)

Scrolling to the bottom of every page seems not to have any effect on the number of results the script produces.

Upvotes: 2

Views: 805

Answers (1)

lionking-123
lionking-123

Reputation: 311

I have checked your description and source code and I think there are many elements. So it doesn't load at one time. For solving this, scroll down to the bottom step by step. So, I have changed function scroll_to_the_bottom as below :

def scroll_to_the_bottom() :
    H = driver.execute_script('return document.body.scrollHeight;')
    h = 0
    while True :
        h += 300
        if h >= H :
            break
        
        driver.execute_script("window.scrollTo({}, {});".format(0, h))
        time.sleep(1)

So, embed above code into your code, we can change it as below :

import time
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC


link = "https://opensea.io/rankings"


def get_collection_names(link):
    driver.get(link)
    unique_items = set()
    while True:
        item = WebDriverWait(driver,10).until(EC.presence_of_element_located((By.CSS_SELECTOR,"[class$='Ranking--collection-name']")))
        H = driver.execute_script('return document.body.scrollHeight;')
        h = 0
        while True :
            h += 300
            if h >= H :
                break

            for element in WebDriverWait(driver,10).until(EC.presence_of_all_elements_located((By.CSS_SELECTOR,"[class$='Ranking--collection-name']"))):
                if element.text not in unique_items:
                    yield element.text
                unique_items.add(element.text)
            driver.execute_script("window.scrollTo(0, {});".format(h))
            time.sleep(1)

        try:
            button = WebDriverWait(driver,10).until(EC.presence_of_element_located((By.XPATH,"//button[contains(@class,'Buttonreact__StyledButton')][./i[contains(.,'arrow_forward')]]")))
            driver.execute_script('arguments[0].click();',button)
            WebDriverWait(driver,10).until(EC.staleness_of(item))
        except Exception as e:
            return


if __name__ == '__main__':
    driver = webdriver.Chrome()
    for item in get_collection_names(link):
        print(item)
    driver.quit()

Hope to be helpful for you. Thanks.

Upvotes: 1

Related Questions