user3476463
user3476463

Reputation: 4575

scroll to scrape additional details using selenium and beautifulsoup

I have the python code below. I'm using it to scrape property sale data from zillow. It uses selenium chrome driver to open a chrome browser. It then navigates to the property sale url for the zip code. I then use beautifulsoup to scrape the property details. The problem I'm having is that in the example below it's only scraping the first 9 properties data. I think it needs to scroll down and scrape more properties. Can anyone suggest how to scrape the rest of the properties info?

code:

import pandas as pd
import numpy as np

import os

from selenium import webdriver
from selenium.webdriver.chrome.options import Options


Options

options = Options()

chrome_options = Options() 



#re allows for matching text with regular expressions (including through BeautifulSoup)
#dateutil.parser provies .parse() to convert plain text dates in a variety of formats into datetime objects
import re, dateutil.parser
#BeautifulSoup provide a model for the source HTML
from bs4 import BeautifulSoup

#Webdriver is interface to the selected browser (PhantomJS)
from selenium import webdriver
#Ability to select values in HTML <select> tags
from selenium.webdriver.support import select
import time


from selenium.webdriver.common import action_chains, keys

from re import sub

import datetime




def property_details(zip_code):
    
    driver = webdriver.Chrome(executable_path=os.path.abspath("chromedriver"), chrome_options=chrome_options)

    
    zip_url = 'https://www.zillow.com/homes/for_sale/'+str(zip_code)+'_rb/'

    # waits between pulling data
    time.sleep(np.random.randint(low=3, high=7, size=1)[0])
    
    # opens website
    driver.get(zip_url)
    
    # getting page source data
    tstsoup = BeautifulSoup(driver.page_source)
    
    
    # getting list of prices

    price_parse=tstsoup.find_all('div',{'class':'list-card-price'})

    price_list=[]

    for x in range(len(price_parse)):

        price_list.append(int(sub(r'[^\d.]', '',price_parse[x].get_text())))
        
    # get address
    
    address_parse=tstsoup.find_all('address',{'class':'list-card-addr'})

    # address_parse[0].get_text()

    address_list=[]

    for i in range(len(address_parse)):

        address_list.append(address_parse[i].get_text())
        
    # get home details

    main_data=tstsoup.find_all('ul',{'class':'list-card-details'})
    detail_list=[]
    for data in main_data:
        d={  i.text.split(" ")[1] : i.text.split(" ")[0] for i in data.find_all("li",class_="")   }
        detail_list.append(d)
        
    # creating dict of property details    
        
    ts=str(datetime.datetime.now().isoformat())

    home_dict={}
    key_list=['bds','ba','sqft','acres']

    col_list=['acres', 'address', 'ba', 'bds', 'last_update', 'price', 'sqft',
           'zip_code']

    for x in range(len(detail_list)):

        for k in key_list:

            if k not in list(detail_list[x].keys()):

                detail_list[x][k]=-1

        # adding price data

        detail_list[x]['price']=price_list[x]

        # adding address

        detail_list[x]['address']=address_list[x]

        # add zip code

        detail_list[x]['zip_code']=zip_code

        # add state

    #     detail_list[x]['state']=state


        # adding last update date

        detail_list[x]['last_update']=ts

        # removing commas from numeric fields
#         detail_list[x]['sqft']=int(str(detail_list[x]['sqft']).replace(",", ""))

#         detail_list[x]['acres']=int(str(detail_list[x]['acres']).replace(",", ""))
        
        detail_list[x]['sqft']=detail_list[x]['sqft']

        detail_list[x]['acres']=detail_list[x]['acres']

        # cast strings to int

#         detail_list[x]['ba']=int(str(detail_list[x]['ba']).replace(",", ""))

#         detail_list[x]['bds']=int(str(detail_list[x]['bds']).replace(",", ""))
        
        detail_list[x]['ba']=detail_list[x]['ba']

        detail_list[x]['bds']=detail_list[x]['bds']

        # filtering for list of columns
        detail_list[x] = {k: v for k, v in detail_list[x].items() if k in col_list}


        # sorting by key

        detail_list[x]=dict(sorted(detail_list[x].items()))
        
        # turn into dataframe

    property_df=pd.DataFrame(detail_list)
    
    driver.quit()
    
    return property_df



# only returns first 9 entries

property_details(zip_code=94014)

tried implementing suggestion

UPDATE:

from selenium.webdriver.common.action_chains import ActionChains

zip_code=94016



driver = webdriver.Chrome(executable_path=os.path.abspath("chromedriver"), chrome_options=chrome_options)

    
zip_url = 'https://www.zillow.com/homes/for_sale/'+str(zip_code)+'_rb/'

# waits between pulling data
time.sleep(np.random.randint(low=3, high=7, size=1)[0])

# opens website
driver.get(zip_url)

# getting page source data
tstsoup = BeautifulSoup(driver.page_source)


# getting list of prices

price_parse=tstsoup.find_all('div',{'class':'list-card-price'})

price_list=[]

for x in range(len(price_parse)):

    price_list.append(int(sub(r'[^\d.]', '',price_parse[x].get_text())))

# get address

address_parse=tstsoup.find_all('address',{'class':'list-card-addr'})

# address_parse[0].get_text()

address_list=[]

for i in range(len(address_parse)):

    address_list.append(address_parse[i].get_text())

# get home details

main_data=tstsoup.find_all('ul',{'class':'list-card-details'})
detail_list=[]
for data in main_data:
    d={  i.text.split(" ")[1] : i.text.split(" ")[0] for i in data.find_all("li",class_="")   }
    detail_list.append(d)

# creating dict of property details    

ts=str(datetime.datetime.now().isoformat())

home_dict={}
key_list=['bds','ba','sqft','acres']

col_list=['acres', 'address', 'ba', 'bds', 'last_update', 'price', 'sqft',
       'zip_code']

for x in range(len(detail_list)):

    for k in key_list:

        if k not in list(detail_list[x].keys()):

            detail_list[x][k]=-1

    # adding price data

    detail_list[x]['price']=price_list[x]

    # adding address

    detail_list[x]['address']=address_list[x]

    # add zip code

    detail_list[x]['zip_code']=zip_code

    # add state

#     detail_list[x]['state']=state


    # adding last update date

    detail_list[x]['last_update']=ts

    # removing commas from numeric fields
#         detail_list[x]['sqft']=int(str(detail_list[x]['sqft']).replace(",", ""))

#         detail_list[x]['acres']=int(str(detail_list[x]['acres']).replace(",", ""))

    detail_list[x]['sqft']=detail_list[x]['sqft']

    detail_list[x]['acres']=detail_list[x]['acres']

    # cast strings to int

#         detail_list[x]['ba']=int(str(detail_list[x]['ba']).replace(",", ""))

#         detail_list[x]['bds']=int(str(detail_list[x]['bds']).replace(",", ""))

    detail_list[x]['ba']=detail_list[x]['ba']

    detail_list[x]['bds']=detail_list[x]['bds']

    # filtering for list of columns
    detail_list[x] = {k: v for k, v in detail_list[x].items() if k in col_list}


    # sorting by key

    detail_list[x]=dict(sorted(detail_list[x].items()))
    
    print('made it')
    print('')

    # add scrolling

    actions = ActionChains(driver)

    footer = driver.find_elements_by_css_selector('div#region-info-footer')
    #if element is existing on the page the list will be non-empty and interpreted as True by python`
    if(footer):
        if not footer[0].is_displayed():
            #scroll the last currently available price_parse element into view. 
            #this will load a bulk of new elements
            actions.move_to_element(price_parse[-1]).perform()
            time.sleep(0.5)
            
            print('footer')
        else:
            #you can also use this case to exit the loop since you already scraped all the data on this page
            
            print('break')
            break
    else:        
        #if footer element is even not existing we 100% still not reached the bottom
        actions.move_to_element(price_parse[-1]).perform()
        time.sleep(0.5)
        print('scroll')

    # turn into dataframe
    
    
    print(len(detail_list))

exits at the break statement, doesn't scroll

UPDATE:

code:

from selenium.webdriver.common.action_chains import ActionChains

zip_code=94014



driver = webdriver.Chrome(executable_path=os.path.abspath("chromedriver"), chrome_options=chrome_options)

    
zip_url = 'https://www.zillow.com/homes/for_sale/'+str(zip_code)+'_rb/'

# waits between pulling data
time.sleep(np.random.randint(low=3, high=7, size=1)[0])

# opens website
driver.get(zip_url)

# getting page source data
tstsoup = BeautifulSoup(driver.page_source)


# getting list of prices

price_parse=tstsoup.find_all('div',{'class':'list-card-price'})

price_list=[]

for x in range(len(price_parse)):

    price_list.append(int(sub(r'[^\d.]', '',price_parse[x].get_text())))

# get address

address_parse=tstsoup.find_all('address',{'class':'list-card-addr'})

# address_parse[0].get_text()

address_list=[]

for i in range(len(address_parse)):

    address_list.append(address_parse[i].get_text())

# get home details

main_data=tstsoup.find_all('ul',{'class':'list-card-details'})
detail_list=[]
for data in main_data:
    d={  i.text.split(" ")[1] : i.text.split(" ")[0] for i in data.find_all("li",class_="")   }
    detail_list.append(d)

# creating dict of property details    

ts=str(datetime.datetime.now().isoformat())

home_dict={}
key_list=['bds','ba','sqft','acres']

col_list=['acres', 'address', 'ba', 'bds', 'last_update', 'price', 'sqft',
       'zip_code']

for x in range(len(detail_list)):

    for k in key_list:

        if k not in list(detail_list[x].keys()):

            detail_list[x][k]=-1

    # adding price data

    detail_list[x]['price']=price_list[x]

    # adding address

    detail_list[x]['address']=address_list[x]

    # add zip code

    detail_list[x]['zip_code']=zip_code

    # add state

#     detail_list[x]['state']=state


    # adding last update date

    detail_list[x]['last_update']=ts

    # removing commas from numeric fields
#         detail_list[x]['sqft']=int(str(detail_list[x]['sqft']).replace(",", ""))

#         detail_list[x]['acres']=int(str(detail_list[x]['acres']).replace(",", ""))

    detail_list[x]['sqft']=detail_list[x]['sqft']

    detail_list[x]['acres']=detail_list[x]['acres']

    # cast strings to int

#         detail_list[x]['ba']=int(str(detail_list[x]['ba']).replace(",", ""))

#         detail_list[x]['bds']=int(str(detail_list[x]['bds']).replace(",", ""))

    detail_list[x]['ba']=detail_list[x]['ba']

    detail_list[x]['bds']=detail_list[x]['bds']

    # filtering for list of columns
    detail_list[x] = {k: v for k, v in detail_list[x].items() if k in col_list}


    # sorting by key

    detail_list[x]=dict(sorted(detail_list[x].items()))
    
    print('made it')
    print('')

    # add scrolling

    actions = ActionChains(driver)

    footer = driver.find_elements_by_css_selector('div#region-info-footer')
    #if element is existing on the page the list will be non-empty and interpreted as True by python`
    if(footer):
        if not footer[0].is_displayed():
            #scroll the last currently available price_parse element into view. 
            #this will load a bulk of new elements
            actions.move_to_element(price_parse[-1]).perform()
            
            # waits between pulling data
            time.sleep(np.random.randint(low=3, high=7, size=1)[0])
            
            print('footer')
        else:
            #you can also use this case to exit the loop since you already scraped all the data on this page
            
            print('break')
#             break
    else:        
        #if footer element is even not existing we 100% still not reached the bottom
        actions.move_to_element(price_parse[-1]).perform()
        
        # waits between pulling data
        time.sleep(np.random.randint(low=3, high=7, size=1)[0])
        
        print('scroll')

    # turn into dataframe
    
    
    print(len(detail_list))
    
    print(detail_list)
    print('')

output:

made it

break
9
[{'acres': -1, 'address': '543 Green Ridge Dr APT 4, Daly City, CA 94014', 'ba': '2', 'bds': '2', 'last_update': '2021-07-26T21:32:21.259987', 'price': 898000, 'sqft': '1,443', 'zip_code': 94014}, {'bds': '2', 'ba': '2', 'sqft': '940'}, {'bds': '2', 'ba': '2', 'sqft': '1,394'}, {'bds': '2', 'ba': '2', 'sqft': '1,158'}, {'bds': '3', 'ba': '2', 'sqft': '1,640'}, {'bds': '3', 'ba': '3', 'sqft': '1,472'}, {'bds': '4', 'ba': '2', 'sqft': '1,770'}, {'bds': '2', 'ba': '1', 'sqft': '560'}, {'bds': '4', 'ba': '2', 'sqft': '1,770'}]

made it

break
9
[{'acres': -1, 'address': '543 Green Ridge Dr APT 4, Daly City, CA 94014', 'ba': '2', 'bds': '2', 'last_update': '2021-07-26T21:32:21.259987', 'price': 898000, 'sqft': '1,443', 'zip_code': 94014}, {'acres': -1, 'address': '340 San Diego Ave, Daly City, CA 94014', 'ba': '2', 'bds': '2', 'last_update': '2021-07-26T21:32:21.259987', 'price': 870000, 'sqft': '940', 'zip_code': 94014}, {'bds': '2', 'ba': '2', 'sqft': '1,394'}, {'bds': '2', 'ba': '2', 'sqft': '1,158'}, {'bds': '3', 'ba': '2', 'sqft': '1,640'}, {'bds': '3', 'ba': '3', 'sqft': '1,472'}, {'bds': '4', 'ba': '2', 'sqft': '1,770'}, {'bds': '2', 'ba': '1', 'sqft': '560'}, {'bds': '4', 'ba': '2', 'sqft': '1,770'}]

made it

break
9
[{'acres': -1, 'address': '543 Green Ridge Dr APT 4, Daly City, CA 94014', 'ba': '2', 'bds': '2', 'last_update': '2021-07-26T21:32:21.259987', 'price': 898000, 'sqft': '1,443', 'zip_code': 94014}, {'acres': -1, 'address': '340 San Diego Ave, Daly City, CA 94014', 'ba': '2', 'bds': '2', 'last_update': '2021-07-26T21:32:21.259987', 'price': 870000, 'sqft': '940', 'zip_code': 94014}, {'acres': -1, 'address': '691 Pointe Pacific Dr #4701, Daly City, CA 94014', 'ba': '2', 'bds': '2', 'last_update': '2021-07-26T21:32:21.259987', 'price': 854000, 'sqft': '1,394', 'zip_code': 94014}, {'bds': '2', 'ba': '2', 'sqft': '1,158'}, {'bds': '3', 'ba': '2', 'sqft': '1,640'}, {'bds': '3', 'ba': '3', 'sqft': '1,472'}, {'bds': '4', 'ba': '2', 'sqft': '1,770'}, {'bds': '2', 'ba': '1', 'sqft': '560'}, {'bds': '4', 'ba': '2', 'sqft': '1,770'}]

made it

break
9
[{'acres': -1, 'address': '543 Green Ridge Dr APT 4, Daly City, CA 94014', 'ba': '2', 'bds': '2', 'last_update': '2021-07-26T21:32:21.259987', 'price': 898000, 'sqft': '1,443', 'zip_code': 94014}, {'acres': -1, 'address': '340 San Diego Ave, Daly City, CA 94014', 'ba': '2', 'bds': '2', 'last_update': '2021-07-26T21:32:21.259987', 'price': 870000, 'sqft': '940', 'zip_code': 94014}, {'acres': -1, 'address': '691 Pointe Pacific Dr #4701, Daly City, CA 94014', 'ba': '2', 'bds': '2', 'last_update': '2021-07-26T21:32:21.259987', 'price': 854000, 'sqft': '1,394', 'zip_code': 94014}, {'acres': -1, 'address': '535 Mountain View Dr APT 8, Daly City, CA 94014', 'ba': '2', 'bds': '2', 'last_update': '2021-07-26T21:32:21.259987', 'price': 749000, 'sqft': '1,158', 'zip_code': 94014}, {'bds': '3', 'ba': '2', 'sqft': '1,640'}, {'bds': '3', 'ba': '3', 'sqft': '1,472'}, {'bds': '4', 'ba': '2', 'sqft': '1,770'}, {'bds': '2', 'ba': '1', 'sqft': '560'}, {'bds': '4', 'ba': '2', 'sqft': '1,770'}]

made it

break
9
[{'acres': -1, 'address': '543 Green Ridge Dr APT 4, Daly City, CA 94014', 'ba': '2', 'bds': '2', 'last_update': '2021-07-26T21:32:21.259987', 'price': 898000, 'sqft': '1,443', 'zip_code': 94014}, {'acres': -1, 'address': '340 San Diego Ave, Daly City, CA 94014', 'ba': '2', 'bds': '2', 'last_update': '2021-07-26T21:32:21.259987', 'price': 870000, 'sqft': '940', 'zip_code': 94014}, {'acres': -1, 'address': '691 Pointe Pacific Dr #4701, Daly City, CA 94014', 'ba': '2', 'bds': '2', 'last_update': '2021-07-26T21:32:21.259987', 'price': 854000, 'sqft': '1,394', 'zip_code': 94014}, {'acres': -1, 'address': '535 Mountain View Dr APT 8, Daly City, CA 94014', 'ba': '2', 'bds': '2', 'last_update': '2021-07-26T21:32:21.259987', 'price': 749000, 'sqft': '1,158', 'zip_code': 94014}, {'acres': -1, 'address': '157 Westlake Ave, Daly City, CA 94014', 'ba': '2', 'bds': '3', 'last_update': '2021-07-26T21:32:21.259987', 'price': 998888, 'sqft': '1,640', 'zip_code': 94014}, {'bds': '3', 'ba': '3', 'sqft': '1,472'}, {'bds': '4', 'ba': '2', 'sqft': '1,770'}, {'bds': '2', 'ba': '1', 'sqft': '560'}, {'bds': '4', 'ba': '2', 'sqft': '1,770'}]

made it

break
9
[{'acres': -1, 'address': '543 Green Ridge Dr APT 4, Daly City, CA 94014', 'ba': '2', 'bds': '2', 'last_update': '2021-07-26T21:32:21.259987', 'price': 898000, 'sqft': '1,443', 'zip_code': 94014}, {'acres': -1, 'address': '340 San Diego Ave, Daly City, CA 94014', 'ba': '2', 'bds': '2', 'last_update': '2021-07-26T21:32:21.259987', 'price': 870000, 'sqft': '940', 'zip_code': 94014}, {'acres': -1, 'address': '691 Pointe Pacific Dr #4701, Daly City, CA 94014', 'ba': '2', 'bds': '2', 'last_update': '2021-07-26T21:32:21.259987', 'price': 854000, 'sqft': '1,394', 'zip_code': 94014}, {'acres': -1, 'address': '535 Mountain View Dr APT 8, Daly City, CA 94014', 'ba': '2', 'bds': '2', 'last_update': '2021-07-26T21:32:21.259987', 'price': 749000, 'sqft': '1,158', 'zip_code': 94014}, {'acres': -1, 'address': '157 Westlake Ave, Daly City, CA 94014', 'ba': '2', 'bds': '3', 'last_update': '2021-07-26T21:32:21.259987', 'price': 998888, 'sqft': '1,640', 'zip_code': 94014}, {'acres': -1, 'address': '17 Honeysuckle Ct, Daly City, CA 94014', 'ba': '3', 'bds': '3', 'last_update': '2021-07-26T21:32:21.259987', 'price': 1049888, 'sqft': '1,472', 'zip_code': 94014}, {'bds': '4', 'ba': '2', 'sqft': '1,770'}, {'bds': '2', 'ba': '1', 'sqft': '560'}, {'bds': '4', 'ba': '2', 'sqft': '1,770'}]

made it

break
9
[{'acres': -1, 'address': '543 Green Ridge Dr APT 4, Daly City, CA 94014', 'ba': '2', 'bds': '2', 'last_update': '2021-07-26T21:32:21.259987', 'price': 898000, 'sqft': '1,443', 'zip_code': 94014}, {'acres': -1, 'address': '340 San Diego Ave, Daly City, CA 94014', 'ba': '2', 'bds': '2', 'last_update': '2021-07-26T21:32:21.259987', 'price': 870000, 'sqft': '940', 'zip_code': 94014}, {'acres': -1, 'address': '691 Pointe Pacific Dr #4701, Daly City, CA 94014', 'ba': '2', 'bds': '2', 'last_update': '2021-07-26T21:32:21.259987', 'price': 854000, 'sqft': '1,394', 'zip_code': 94014}, {'acres': -1, 'address': '535 Mountain View Dr APT 8, Daly City, CA 94014', 'ba': '2', 'bds': '2', 'last_update': '2021-07-26T21:32:21.259987', 'price': 749000, 'sqft': '1,158', 'zip_code': 94014}, {'acres': -1, 'address': '157 Westlake Ave, Daly City, CA 94014', 'ba': '2', 'bds': '3', 'last_update': '2021-07-26T21:32:21.259987', 'price': 998888, 'sqft': '1,640', 'zip_code': 94014}, {'acres': -1, 'address': '17 Honeysuckle Ct, Daly City, CA 94014', 'ba': '3', 'bds': '3', 'last_update': '2021-07-26T21:32:21.259987', 'price': 1049888, 'sqft': '1,472', 'zip_code': 94014}, {'acres': -1, 'address': '314 E Moltke St, Daly City, CA 94014', 'ba': '2', 'bds': '4', 'last_update': '2021-07-26T21:32:21.259987', 'price': 1250000, 'sqft': '1,770', 'zip_code': 94014}, {'bds': '2', 'ba': '1', 'sqft': '560'}, {'bds': '4', 'ba': '2', 'sqft': '1,770'}]

made it

break
9
[{'acres': -1, 'address': '543 Green Ridge Dr APT 4, Daly City, CA 94014', 'ba': '2', 'bds': '2', 'last_update': '2021-07-26T21:32:21.259987', 'price': 898000, 'sqft': '1,443', 'zip_code': 94014}, {'acres': -1, 'address': '340 San Diego Ave, Daly City, CA 94014', 'ba': '2', 'bds': '2', 'last_update': '2021-07-26T21:32:21.259987', 'price': 870000, 'sqft': '940', 'zip_code': 94014}, {'acres': -1, 'address': '691 Pointe Pacific Dr #4701, Daly City, CA 94014', 'ba': '2', 'bds': '2', 'last_update': '2021-07-26T21:32:21.259987', 'price': 854000, 'sqft': '1,394', 'zip_code': 94014}, {'acres': -1, 'address': '535 Mountain View Dr APT 8, Daly City, CA 94014', 'ba': '2', 'bds': '2', 'last_update': '2021-07-26T21:32:21.259987', 'price': 749000, 'sqft': '1,158', 'zip_code': 94014}, {'acres': -1, 'address': '157 Westlake Ave, Daly City, CA 94014', 'ba': '2', 'bds': '3', 'last_update': '2021-07-26T21:32:21.259987', 'price': 998888, 'sqft': '1,640', 'zip_code': 94014}, {'acres': -1, 'address': '17 Honeysuckle Ct, Daly City, CA 94014', 'ba': '3', 'bds': '3', 'last_update': '2021-07-26T21:32:21.259987', 'price': 1049888, 'sqft': '1,472', 'zip_code': 94014}, {'acres': -1, 'address': '314 E Moltke St, Daly City, CA 94014', 'ba': '2', 'bds': '4', 'last_update': '2021-07-26T21:32:21.259987', 'price': 1250000, 'sqft': '1,770', 'zip_code': 94014}, {'acres': -1, 'address': '172 Hillcrest Dr, Daly City, CA 94014', 'ba': '1', 'bds': '2', 'last_update': '2021-07-26T21:32:21.259987', 'price': 755000, 'sqft': '560', 'zip_code': 94014}, {'bds': '4', 'ba': '2', 'sqft': '1,770'}]

made it

break
9
[{'acres': -1, 'address': '543 Green Ridge Dr APT 4, Daly City, CA 94014', 'ba': '2', 'bds': '2', 'last_update': '2021-07-26T21:32:21.259987', 'price': 898000, 'sqft': '1,443', 'zip_code': 94014}, {'acres': -1, 'address': '340 San Diego Ave, Daly City, CA 94014', 'ba': '2', 'bds': '2', 'last_update': '2021-07-26T21:32:21.259987', 'price': 870000, 'sqft': '940', 'zip_code': 94014}, {'acres': -1, 'address': '691 Pointe Pacific Dr #4701, Daly City, CA 94014', 'ba': '2', 'bds': '2', 'last_update': '2021-07-26T21:32:21.259987', 'price': 854000, 'sqft': '1,394', 'zip_code': 94014}, {'acres': -1, 'address': '535 Mountain View Dr APT 8, Daly City, CA 94014', 'ba': '2', 'bds': '2', 'last_update': '2021-07-26T21:32:21.259987', 'price': 749000, 'sqft': '1,158', 'zip_code': 94014}, {'acres': -1, 'address': '157 Westlake Ave, Daly City, CA 94014', 'ba': '2', 'bds': '3', 'last_update': '2021-07-26T21:32:21.259987', 'price': 998888, 'sqft': '1,640', 'zip_code': 94014}, {'acres': -1, 'address': '17 Honeysuckle Ct, Daly City, CA 94014', 'ba': '3', 'bds': '3', 'last_update': '2021-07-26T21:32:21.259987', 'price': 1049888, 'sqft': '1,472', 'zip_code': 94014}, {'acres': -1, 'address': '314 E Moltke St, Daly City, CA 94014', 'ba': '2', 'bds': '4', 'last_update': '2021-07-26T21:32:21.259987', 'price': 1250000, 'sqft': '1,770', 'zip_code': 94014}, {'acres': -1, 'address': '172 Hillcrest Dr, Daly City, CA 94014', 'ba': '1', 'bds': '2', 'last_update': '2021-07-26T21:32:21.259987', 'price': 755000, 'sqft': '560', 'zip_code': 94014}, {'acres': -1, 'address': '309 E Moltke St, Daly City, CA 94014', 'ba': '2', 'bds': '4', 'last_update': '2021-07-26T21:32:21.259987', 'price': 1199000, 'sqft': '1,770', 'zip_code': 94014}]

Upvotes: 0

Views: 873

Answers (1)

Prophet
Prophet

Reputation: 33361

I would suggest you Selenium approach. Scroll until footer element is visible.
Something like this:

from selenium.webdriver.common.action_chains import ActionChains

actions = ActionChains(driver)

def property_details(zip_code):

#here comes all your code
------------------
-----------------
#after it just add this:
footer = driver.find_elements_by_css_selector('div#region-info-footer')
#if element is existing on the page the list will be non-empty and interpreted as True by python`
if(footer):
    if not footer[0].is_displayed():
        #scroll the last currently available price_parse element into view. 
        #this will load a bulk of new elements
        actions.move_to_element(price_parse[-1]).perform()
        time.sleep(0.5)
    else:
        #you can also use this case to exit the loop since you already scraped all the data on this page
        break
else:        
    #if footer element is even not existing we 100% still not reached the bottom
    actions.move_to_element(price_parse[-1]).perform()
    time.sleep(0.5)

Upvotes: 1

Related Questions