How can I loop through several pages to download excel files using Selenium and Python

Question

I am trying to build a web scraper that will go through a website's pages and download the excel files from a dropdown menu at the bottom of the page.

The webpages only allow me to download the 50 locations that are displayed on each page and I cannot download all of them at once.

I am able to download the first page's Excel file, but the following pages yield nothing else.

I get the following output after running the code I have provided below.

Skipped a page 
No more pages.

If I exclude the lines where it asks to download the pages, it is able to go through each page until the end successfully.

I'll provide an example below for what I am trying to get accomplished.

I would appreciate any help and advice! Thank you!

from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.support.ui import Select
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.common.by import By

state = 'oklahoma'
rent_to_own = 'rent to own'

driver = webdriver.Chrome(ChromeDriverManager().install())
driver.maximize_window()
driver.get('https://www.careeronestop.org/toolkit/jobs/find-businesses.aspx')

industry = driver.find_element(By.ID, "txtKeyword") 
industry.send_keys(rent_to_own)

location = driver.find_element(By.ID, "txtLocation")
location.send_keys(state)

driver.find_element(By.ID, "btnSubmit").click()

driver.implicitly_wait(3)
        
def web_scrape():
        more_drawer = driver.find_element(By.XPATH, "//div[@class='more-drawer']//a[@href='/toolkit/jobs/find-businesses.aspx?keyword="+rent_to_own+"&ajax=0&location="+state+"&lang=en&Desfillall=y#Des']")
        more_drawer.click()

        driver.implicitly_wait(5)

        get_50 = Select(driver.find_element(By.ID, 'ViewPerPage'))
        get_50.select_by_value('50')

        driver.implicitly_wait(5)

        filter_description = driver.find_element(By.XPATH, "//ul[@class='filters-list']//a[@href='/toolkit/jobs/find-businesses.aspx?keyword="+rent_to_own+"&ajax=0&location="+state+"&lang=en&Desfillall=y&pagesize=50¤tpage=1&descfilter=Furniture~B~Renting ~F~ Leasing']")
        filter_description.click()
        
        while True:
            try:
                download_excel = Select(driver.find_element(By.ID, 'ResultsDownload'))
                download_excel.select_by_value('Excel')
                driver.implicitly_wait(20)
                first_50 = driver.find_element(By.XPATH, "//div[@id='relatedOccupations']//a[@onclick='hideMoreRelatedOccupations()']")
                first_50.click()
                driver.implicitly_wait(20)
                next_page = driver.find_element(By.XPATH, "//div[@class='pagination-wrap']//div//a[@class='next-page']")
                next_page.click()
                driver.implicitly_wait(20)
                print("Skipped a page.")
            except:
                print("No more pages.")
                return
web_scrape()

How can I loop through several pages to download excel files using Selenium and Python

Answers (1)

Related Questions