Web Scraping shopee.sg with selenium and BeautifulSoup in python

Question

Whenever I am trying to scrape shopee.sg using selenium and BeautifulSoup I am not being able to extract all the data from a single page.

Example - For a search result consisting of 50 products information on the first 15 are getting extracted while the remaining are giving null values.

Now, I know this has got something to do with the scroller but I have no idea how to make it work. Any idea how to fix this?

Code as of now

from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.common.exceptions import TimeoutException
from time import sleep
import csv

# create object for chrome options
chrome_options = Options()
#base_url = 'https://shopee.sg/search?keyword=disinfectant'

# set chrome driver options to disable any popup's from the website
# to find local path for chrome profile, open chrome browser
# and in the address bar type, "chrome://version"
chrome_options.add_argument('disable-notifications')
chrome_options.add_argument('--disable-infobars')
chrome_options.add_argument('start-maximized')
#chrome_options.add_argument('user-data-dir=C:\Users\username\AppData\Local\Google\Chrome\User Data\Default')
# To disable the message, "Chrome is being controlled by automated test software"
chrome_options.add_argument("disable-infobars")
# Pass the argument 1 to allow and 2 to block
chrome_options.add_experimental_option("prefs", { 
    "profile.default_content_setting_values.notifications": 2
    })


def get_url(search_term):
    """Generate an url from the search term"""
    template = "https://www.shopee.sg/search?keyword={}"
    search_term = search_term.replace(' ','+')
    
    #add term query to url
    url = template.format(search_term)
    
    #add page query placeholder
    url+= '&page={}'
    
    return url

def main(search_term):
# invoke the webdriver
    driver = webdriver.Chrome(options = chrome_options)


    item_cost = []
    item_name = []
    url=get_url(search_term)

    for page in range(0,3):
        driver.get(url.format(page))
        delay = 5 #seconds


        try:
            WebDriverWait(driver, delay)
            print ("Page is ready")
            sleep(5)
            html = driver.execute_script("return document.getElementsByTagName('html')[0].innerHTML")
            #print(html)
            soup = BeautifulSoup(html, "html.parser")
            #find the product description
            for item_n in soup.find_all('div',{'class':'col-xs-2-4 shopee-search-item-result__item'}):
                try:
                    description_soup = item_n.find('div',{'class':'yQmmFK _1POlWt _36CEnF'})
                    name = description_soup.text.strip()
                except AttributeError:
                    name = ''
                print(name)    
                item_name.append(name)

            # find the price of items
            for item_c in soup.find_all('div',{'class':'col-xs-2-4 shopee-search-item-result__item'}):
                try:
                    price_soup = item_c.find('div',{'class':'WTFwws _1lK1eK _5W0f35'})
                    price_final = price_soup.find('span',{'class':'_29R_un'})
                    price = price_final.text.strip()
                except AttributeError:
                    price = ''
                print(price)
                item_cost.append(price)
  
        except TimeoutException:
            print ("Loading took too much time!-Try again")
        sleep(5)
    rows = zip(item_name, item_cost)
    
    
    with open('shopee_item_list.csv','w',newline='',encoding='utf-8') as f:
        writer=csv.writer(f)
        writer.writerow(['Product Description', 'Price'])
        writer.writerows(rows)```

Web Scraping shopee.sg with selenium and BeautifulSoup in python

Answers (1)

Scroller

Full (updated) code

Related Questions