Execute a part of code after waiting for button to be clicked in selenium using python

Question

I have a webpage that displays some products. This webpage has around 50 products, and when i click on load more, more products are displayed. I want to extract information for all these. I have written a code for same. The problem however is, that the program proceeds with retrieving information without waiting for the button to be clicked. I have tried changing the time.sleep values to very high values, but no avail. Is there some other expression i could include to make the rest of the code wait till the button is clicked?

from selenium import webdriver
import time
from bs4 import BeautifulSoup
import requests
from selenium.webdriver.support.wait import WebDriverWait
import selenium.webdriver.support.expected_conditions as ec
from selenium.webdriver.common.by import By
import xlsxwriter

driver = webdriver.Chrome(executable_path=r"C:\Users\Home\Desktop\chromedriver.exe")
driver.get("https://justnebulizers.com/collections/nebulizer-accessories")
soup = BeautifulSoup(driver.page_source, 'html.parser')
time.sleep(5)

#wait = WebDriverWait(driver, 10)
#wait.until(ec.element_to_be_clickable((By.XPATH,"//a[@class='load-more__btn action_button continue-button']")))
button= driver.find_element_by_xpath(("//a[@class='load-more__btn action_button continue-button']"))
button.click() 

#wait.until(ec.invisibility_of_element_located((By.XPATH,"//a[@class='load-more__btn action_button continue-button']")))
time.sleep(10)
#WebDriverWait(driver, 10).until(ec.invisibility_of_element_located((By.XPATH, "//a[@class='load-more__btn action_button continue-button']")))



def cpap_spider(url):
    soup = BeautifulSoup(driver.page_source, 'html.parser')
    for link in soup.findAll("a", {"class":"product-info__caption"}):
            
        href="https://www.justnebulizers.com"+link.get("href")
        #title= link.string
        each_item(href)    
        print(href)
            #print(title)

def each_item(item_url):
    global cols_names, row_i
    source_code= requests.get(item_url)
    plain_text= source_code.text
    soup= BeautifulSoup(plain_text, 'html.parser')
    table=soup.find("table", {"class":"tab_table"})
    if table:
        table_rows = table.find_all('tr')
    else:
        row_i+=1
        return
    for row in table_rows:
      cols = row.find_all('td')
      for ele in range(0,len(cols)):
        temp = cols[ele].text.strip()
        if temp:
          # Here if you want then you can remove unwanted characters like : ? from temp
          # For example "Actual Weight" and ""
          if temp[-1:] == ":":
            temp = temp[:-1]
          # Name of column
          if ele == 0:
            try:
              cols_names_i = cols_names.index(temp)
            except:
              cols_names.append(temp)
              cols_names_i = len(cols_names) -  1
              worksheet.write(0, cols_names_i + 1, temp)
              continue;
          worksheet.write(row_i, cols_names_i + 1, temp)      
    row_i += 1
    
cols_names=[]
cols_names_i = 0
row_i = 1
workbook = xlsxwriter.Workbook('respiratory_care.xlsx')
worksheet = workbook.add_worksheet()
worksheet.write(0, 0, "href")
    
cpap_spider("https://justnebulizers.com/collections/nebulizer-accessories")
#each_item("https://www.1800cpap.com/viva-nasal-cpap-mask-by-3b-medical")       
workbook.close()

Canute S · Accepted Answer

The code is working just fine but you need to soup the source again with...

soup = BeautifulSoup(driver.page_source, 'html.parser')

...after you click the on the button to get the new items. I think that is why it looks like it is running without waiting.

There are wait methods that can be used with Selenium which you can use to ensure a condition is met before proceeding: https://selenium-python.readthedocs.io/waits.html#explicit-waits

Also you may want to try Scrapy for crawling: https://pypi.org/project/Scrapy/

Update: Try this:

Change

soup = BeautifulSoup(driver.page_source, 'html.parser')

with

soup = BeautifulSoup(driver.find_element_by_tag_name('html').get_attribute('innerHTML'), 'html.parser')

Credits: https://stackoverflow.com/a/43565160/4289062

Execute a part of code after waiting for button to be clicked in selenium using python

Answers (1)

Related Questions