Reputation: 4575
I have the python code below. I'm using it to scrape property sale data from zillow. It uses selenium chrome driver to open a chrome browser. It then navigates to the property sale url for the zip code. I then use beautifulsoup to scrape the property details. The problem I'm having is that in the example below it's only scraping the first 9 properties data. I think it needs to scroll down and scrape more properties. Can anyone suggest how to scrape the rest of the properties info?
code:
import pandas as pd
import numpy as np
import os
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
Options
options = Options()
chrome_options = Options()
#re allows for matching text with regular expressions (including through BeautifulSoup)
#dateutil.parser provies .parse() to convert plain text dates in a variety of formats into datetime objects
import re, dateutil.parser
#BeautifulSoup provide a model for the source HTML
from bs4 import BeautifulSoup
#Webdriver is interface to the selected browser (PhantomJS)
from selenium import webdriver
#Ability to select values in HTML <select> tags
from selenium.webdriver.support import select
import time
from selenium.webdriver.common import action_chains, keys
from re import sub
import datetime
def property_details(zip_code):
driver = webdriver.Chrome(executable_path=os.path.abspath("chromedriver"), chrome_options=chrome_options)
zip_url = 'https://www.zillow.com/homes/for_sale/'+str(zip_code)+'_rb/'
# waits between pulling data
time.sleep(np.random.randint(low=3, high=7, size=1)[0])
# opens website
driver.get(zip_url)
# getting page source data
tstsoup = BeautifulSoup(driver.page_source)
# getting list of prices
price_parse=tstsoup.find_all('div',{'class':'list-card-price'})
price_list=[]
for x in range(len(price_parse)):
price_list.append(int(sub(r'[^\d.]', '',price_parse[x].get_text())))
# get address
address_parse=tstsoup.find_all('address',{'class':'list-card-addr'})
# address_parse[0].get_text()
address_list=[]
for i in range(len(address_parse)):
address_list.append(address_parse[i].get_text())
# get home details
main_data=tstsoup.find_all('ul',{'class':'list-card-details'})
detail_list=[]
for data in main_data:
d={ i.text.split(" ")[1] : i.text.split(" ")[0] for i in data.find_all("li",class_="") }
detail_list.append(d)
# creating dict of property details
ts=str(datetime.datetime.now().isoformat())
home_dict={}
key_list=['bds','ba','sqft','acres']
col_list=['acres', 'address', 'ba', 'bds', 'last_update', 'price', 'sqft',
'zip_code']
for x in range(len(detail_list)):
for k in key_list:
if k not in list(detail_list[x].keys()):
detail_list[x][k]=-1
# adding price data
detail_list[x]['price']=price_list[x]
# adding address
detail_list[x]['address']=address_list[x]
# add zip code
detail_list[x]['zip_code']=zip_code
# add state
# detail_list[x]['state']=state
# adding last update date
detail_list[x]['last_update']=ts
# removing commas from numeric fields
# detail_list[x]['sqft']=int(str(detail_list[x]['sqft']).replace(",", ""))
# detail_list[x]['acres']=int(str(detail_list[x]['acres']).replace(",", ""))
detail_list[x]['sqft']=detail_list[x]['sqft']
detail_list[x]['acres']=detail_list[x]['acres']
# cast strings to int
# detail_list[x]['ba']=int(str(detail_list[x]['ba']).replace(",", ""))
# detail_list[x]['bds']=int(str(detail_list[x]['bds']).replace(",", ""))
detail_list[x]['ba']=detail_list[x]['ba']
detail_list[x]['bds']=detail_list[x]['bds']
# filtering for list of columns
detail_list[x] = {k: v for k, v in detail_list[x].items() if k in col_list}
# sorting by key
detail_list[x]=dict(sorted(detail_list[x].items()))
# turn into dataframe
property_df=pd.DataFrame(detail_list)
driver.quit()
return property_df
# only returns first 9 entries
property_details(zip_code=94014)
tried implementing suggestion
from selenium.webdriver.common.action_chains import ActionChains
zip_code=94016
driver = webdriver.Chrome(executable_path=os.path.abspath("chromedriver"), chrome_options=chrome_options)
zip_url = 'https://www.zillow.com/homes/for_sale/'+str(zip_code)+'_rb/'
# waits between pulling data
time.sleep(np.random.randint(low=3, high=7, size=1)[0])
# opens website
driver.get(zip_url)
# getting page source data
tstsoup = BeautifulSoup(driver.page_source)
# getting list of prices
price_parse=tstsoup.find_all('div',{'class':'list-card-price'})
price_list=[]
for x in range(len(price_parse)):
price_list.append(int(sub(r'[^\d.]', '',price_parse[x].get_text())))
# get address
address_parse=tstsoup.find_all('address',{'class':'list-card-addr'})
# address_parse[0].get_text()
address_list=[]
for i in range(len(address_parse)):
address_list.append(address_parse[i].get_text())
# get home details
main_data=tstsoup.find_all('ul',{'class':'list-card-details'})
detail_list=[]
for data in main_data:
d={ i.text.split(" ")[1] : i.text.split(" ")[0] for i in data.find_all("li",class_="") }
detail_list.append(d)
# creating dict of property details
ts=str(datetime.datetime.now().isoformat())
home_dict={}
key_list=['bds','ba','sqft','acres']
col_list=['acres', 'address', 'ba', 'bds', 'last_update', 'price', 'sqft',
'zip_code']
for x in range(len(detail_list)):
for k in key_list:
if k not in list(detail_list[x].keys()):
detail_list[x][k]=-1
# adding price data
detail_list[x]['price']=price_list[x]
# adding address
detail_list[x]['address']=address_list[x]
# add zip code
detail_list[x]['zip_code']=zip_code
# add state
# detail_list[x]['state']=state
# adding last update date
detail_list[x]['last_update']=ts
# removing commas from numeric fields
# detail_list[x]['sqft']=int(str(detail_list[x]['sqft']).replace(",", ""))
# detail_list[x]['acres']=int(str(detail_list[x]['acres']).replace(",", ""))
detail_list[x]['sqft']=detail_list[x]['sqft']
detail_list[x]['acres']=detail_list[x]['acres']
# cast strings to int
# detail_list[x]['ba']=int(str(detail_list[x]['ba']).replace(",", ""))
# detail_list[x]['bds']=int(str(detail_list[x]['bds']).replace(",", ""))
detail_list[x]['ba']=detail_list[x]['ba']
detail_list[x]['bds']=detail_list[x]['bds']
# filtering for list of columns
detail_list[x] = {k: v for k, v in detail_list[x].items() if k in col_list}
# sorting by key
detail_list[x]=dict(sorted(detail_list[x].items()))
print('made it')
print('')
# add scrolling
actions = ActionChains(driver)
footer = driver.find_elements_by_css_selector('div#region-info-footer')
#if element is existing on the page the list will be non-empty and interpreted as True by python`
if(footer):
if not footer[0].is_displayed():
#scroll the last currently available price_parse element into view.
#this will load a bulk of new elements
actions.move_to_element(price_parse[-1]).perform()
time.sleep(0.5)
print('footer')
else:
#you can also use this case to exit the loop since you already scraped all the data on this page
print('break')
break
else:
#if footer element is even not existing we 100% still not reached the bottom
actions.move_to_element(price_parse[-1]).perform()
time.sleep(0.5)
print('scroll')
# turn into dataframe
print(len(detail_list))
exits at the break statement, doesn't scroll
UPDATE:
code:
from selenium.webdriver.common.action_chains import ActionChains
zip_code=94014
driver = webdriver.Chrome(executable_path=os.path.abspath("chromedriver"), chrome_options=chrome_options)
zip_url = 'https://www.zillow.com/homes/for_sale/'+str(zip_code)+'_rb/'
# waits between pulling data
time.sleep(np.random.randint(low=3, high=7, size=1)[0])
# opens website
driver.get(zip_url)
# getting page source data
tstsoup = BeautifulSoup(driver.page_source)
# getting list of prices
price_parse=tstsoup.find_all('div',{'class':'list-card-price'})
price_list=[]
for x in range(len(price_parse)):
price_list.append(int(sub(r'[^\d.]', '',price_parse[x].get_text())))
# get address
address_parse=tstsoup.find_all('address',{'class':'list-card-addr'})
# address_parse[0].get_text()
address_list=[]
for i in range(len(address_parse)):
address_list.append(address_parse[i].get_text())
# get home details
main_data=tstsoup.find_all('ul',{'class':'list-card-details'})
detail_list=[]
for data in main_data:
d={ i.text.split(" ")[1] : i.text.split(" ")[0] for i in data.find_all("li",class_="") }
detail_list.append(d)
# creating dict of property details
ts=str(datetime.datetime.now().isoformat())
home_dict={}
key_list=['bds','ba','sqft','acres']
col_list=['acres', 'address', 'ba', 'bds', 'last_update', 'price', 'sqft',
'zip_code']
for x in range(len(detail_list)):
for k in key_list:
if k not in list(detail_list[x].keys()):
detail_list[x][k]=-1
# adding price data
detail_list[x]['price']=price_list[x]
# adding address
detail_list[x]['address']=address_list[x]
# add zip code
detail_list[x]['zip_code']=zip_code
# add state
# detail_list[x]['state']=state
# adding last update date
detail_list[x]['last_update']=ts
# removing commas from numeric fields
# detail_list[x]['sqft']=int(str(detail_list[x]['sqft']).replace(",", ""))
# detail_list[x]['acres']=int(str(detail_list[x]['acres']).replace(",", ""))
detail_list[x]['sqft']=detail_list[x]['sqft']
detail_list[x]['acres']=detail_list[x]['acres']
# cast strings to int
# detail_list[x]['ba']=int(str(detail_list[x]['ba']).replace(",", ""))
# detail_list[x]['bds']=int(str(detail_list[x]['bds']).replace(",", ""))
detail_list[x]['ba']=detail_list[x]['ba']
detail_list[x]['bds']=detail_list[x]['bds']
# filtering for list of columns
detail_list[x] = {k: v for k, v in detail_list[x].items() if k in col_list}
# sorting by key
detail_list[x]=dict(sorted(detail_list[x].items()))
print('made it')
print('')
# add scrolling
actions = ActionChains(driver)
footer = driver.find_elements_by_css_selector('div#region-info-footer')
#if element is existing on the page the list will be non-empty and interpreted as True by python`
if(footer):
if not footer[0].is_displayed():
#scroll the last currently available price_parse element into view.
#this will load a bulk of new elements
actions.move_to_element(price_parse[-1]).perform()
# waits between pulling data
time.sleep(np.random.randint(low=3, high=7, size=1)[0])
print('footer')
else:
#you can also use this case to exit the loop since you already scraped all the data on this page
print('break')
# break
else:
#if footer element is even not existing we 100% still not reached the bottom
actions.move_to_element(price_parse[-1]).perform()
# waits between pulling data
time.sleep(np.random.randint(low=3, high=7, size=1)[0])
print('scroll')
# turn into dataframe
print(len(detail_list))
print(detail_list)
print('')
output:
made it
break
9
[{'acres': -1, 'address': '543 Green Ridge Dr APT 4, Daly City, CA 94014', 'ba': '2', 'bds': '2', 'last_update': '2021-07-26T21:32:21.259987', 'price': 898000, 'sqft': '1,443', 'zip_code': 94014}, {'bds': '2', 'ba': '2', 'sqft': '940'}, {'bds': '2', 'ba': '2', 'sqft': '1,394'}, {'bds': '2', 'ba': '2', 'sqft': '1,158'}, {'bds': '3', 'ba': '2', 'sqft': '1,640'}, {'bds': '3', 'ba': '3', 'sqft': '1,472'}, {'bds': '4', 'ba': '2', 'sqft': '1,770'}, {'bds': '2', 'ba': '1', 'sqft': '560'}, {'bds': '4', 'ba': '2', 'sqft': '1,770'}]
made it
break
9
[{'acres': -1, 'address': '543 Green Ridge Dr APT 4, Daly City, CA 94014', 'ba': '2', 'bds': '2', 'last_update': '2021-07-26T21:32:21.259987', 'price': 898000, 'sqft': '1,443', 'zip_code': 94014}, {'acres': -1, 'address': '340 San Diego Ave, Daly City, CA 94014', 'ba': '2', 'bds': '2', 'last_update': '2021-07-26T21:32:21.259987', 'price': 870000, 'sqft': '940', 'zip_code': 94014}, {'bds': '2', 'ba': '2', 'sqft': '1,394'}, {'bds': '2', 'ba': '2', 'sqft': '1,158'}, {'bds': '3', 'ba': '2', 'sqft': '1,640'}, {'bds': '3', 'ba': '3', 'sqft': '1,472'}, {'bds': '4', 'ba': '2', 'sqft': '1,770'}, {'bds': '2', 'ba': '1', 'sqft': '560'}, {'bds': '4', 'ba': '2', 'sqft': '1,770'}]
made it
break
9
[{'acres': -1, 'address': '543 Green Ridge Dr APT 4, Daly City, CA 94014', 'ba': '2', 'bds': '2', 'last_update': '2021-07-26T21:32:21.259987', 'price': 898000, 'sqft': '1,443', 'zip_code': 94014}, {'acres': -1, 'address': '340 San Diego Ave, Daly City, CA 94014', 'ba': '2', 'bds': '2', 'last_update': '2021-07-26T21:32:21.259987', 'price': 870000, 'sqft': '940', 'zip_code': 94014}, {'acres': -1, 'address': '691 Pointe Pacific Dr #4701, Daly City, CA 94014', 'ba': '2', 'bds': '2', 'last_update': '2021-07-26T21:32:21.259987', 'price': 854000, 'sqft': '1,394', 'zip_code': 94014}, {'bds': '2', 'ba': '2', 'sqft': '1,158'}, {'bds': '3', 'ba': '2', 'sqft': '1,640'}, {'bds': '3', 'ba': '3', 'sqft': '1,472'}, {'bds': '4', 'ba': '2', 'sqft': '1,770'}, {'bds': '2', 'ba': '1', 'sqft': '560'}, {'bds': '4', 'ba': '2', 'sqft': '1,770'}]
made it
break
9
[{'acres': -1, 'address': '543 Green Ridge Dr APT 4, Daly City, CA 94014', 'ba': '2', 'bds': '2', 'last_update': '2021-07-26T21:32:21.259987', 'price': 898000, 'sqft': '1,443', 'zip_code': 94014}, {'acres': -1, 'address': '340 San Diego Ave, Daly City, CA 94014', 'ba': '2', 'bds': '2', 'last_update': '2021-07-26T21:32:21.259987', 'price': 870000, 'sqft': '940', 'zip_code': 94014}, {'acres': -1, 'address': '691 Pointe Pacific Dr #4701, Daly City, CA 94014', 'ba': '2', 'bds': '2', 'last_update': '2021-07-26T21:32:21.259987', 'price': 854000, 'sqft': '1,394', 'zip_code': 94014}, {'acres': -1, 'address': '535 Mountain View Dr APT 8, Daly City, CA 94014', 'ba': '2', 'bds': '2', 'last_update': '2021-07-26T21:32:21.259987', 'price': 749000, 'sqft': '1,158', 'zip_code': 94014}, {'bds': '3', 'ba': '2', 'sqft': '1,640'}, {'bds': '3', 'ba': '3', 'sqft': '1,472'}, {'bds': '4', 'ba': '2', 'sqft': '1,770'}, {'bds': '2', 'ba': '1', 'sqft': '560'}, {'bds': '4', 'ba': '2', 'sqft': '1,770'}]
made it
break
9
[{'acres': -1, 'address': '543 Green Ridge Dr APT 4, Daly City, CA 94014', 'ba': '2', 'bds': '2', 'last_update': '2021-07-26T21:32:21.259987', 'price': 898000, 'sqft': '1,443', 'zip_code': 94014}, {'acres': -1, 'address': '340 San Diego Ave, Daly City, CA 94014', 'ba': '2', 'bds': '2', 'last_update': '2021-07-26T21:32:21.259987', 'price': 870000, 'sqft': '940', 'zip_code': 94014}, {'acres': -1, 'address': '691 Pointe Pacific Dr #4701, Daly City, CA 94014', 'ba': '2', 'bds': '2', 'last_update': '2021-07-26T21:32:21.259987', 'price': 854000, 'sqft': '1,394', 'zip_code': 94014}, {'acres': -1, 'address': '535 Mountain View Dr APT 8, Daly City, CA 94014', 'ba': '2', 'bds': '2', 'last_update': '2021-07-26T21:32:21.259987', 'price': 749000, 'sqft': '1,158', 'zip_code': 94014}, {'acres': -1, 'address': '157 Westlake Ave, Daly City, CA 94014', 'ba': '2', 'bds': '3', 'last_update': '2021-07-26T21:32:21.259987', 'price': 998888, 'sqft': '1,640', 'zip_code': 94014}, {'bds': '3', 'ba': '3', 'sqft': '1,472'}, {'bds': '4', 'ba': '2', 'sqft': '1,770'}, {'bds': '2', 'ba': '1', 'sqft': '560'}, {'bds': '4', 'ba': '2', 'sqft': '1,770'}]
made it
break
9
[{'acres': -1, 'address': '543 Green Ridge Dr APT 4, Daly City, CA 94014', 'ba': '2', 'bds': '2', 'last_update': '2021-07-26T21:32:21.259987', 'price': 898000, 'sqft': '1,443', 'zip_code': 94014}, {'acres': -1, 'address': '340 San Diego Ave, Daly City, CA 94014', 'ba': '2', 'bds': '2', 'last_update': '2021-07-26T21:32:21.259987', 'price': 870000, 'sqft': '940', 'zip_code': 94014}, {'acres': -1, 'address': '691 Pointe Pacific Dr #4701, Daly City, CA 94014', 'ba': '2', 'bds': '2', 'last_update': '2021-07-26T21:32:21.259987', 'price': 854000, 'sqft': '1,394', 'zip_code': 94014}, {'acres': -1, 'address': '535 Mountain View Dr APT 8, Daly City, CA 94014', 'ba': '2', 'bds': '2', 'last_update': '2021-07-26T21:32:21.259987', 'price': 749000, 'sqft': '1,158', 'zip_code': 94014}, {'acres': -1, 'address': '157 Westlake Ave, Daly City, CA 94014', 'ba': '2', 'bds': '3', 'last_update': '2021-07-26T21:32:21.259987', 'price': 998888, 'sqft': '1,640', 'zip_code': 94014}, {'acres': -1, 'address': '17 Honeysuckle Ct, Daly City, CA 94014', 'ba': '3', 'bds': '3', 'last_update': '2021-07-26T21:32:21.259987', 'price': 1049888, 'sqft': '1,472', 'zip_code': 94014}, {'bds': '4', 'ba': '2', 'sqft': '1,770'}, {'bds': '2', 'ba': '1', 'sqft': '560'}, {'bds': '4', 'ba': '2', 'sqft': '1,770'}]
made it
break
9
[{'acres': -1, 'address': '543 Green Ridge Dr APT 4, Daly City, CA 94014', 'ba': '2', 'bds': '2', 'last_update': '2021-07-26T21:32:21.259987', 'price': 898000, 'sqft': '1,443', 'zip_code': 94014}, {'acres': -1, 'address': '340 San Diego Ave, Daly City, CA 94014', 'ba': '2', 'bds': '2', 'last_update': '2021-07-26T21:32:21.259987', 'price': 870000, 'sqft': '940', 'zip_code': 94014}, {'acres': -1, 'address': '691 Pointe Pacific Dr #4701, Daly City, CA 94014', 'ba': '2', 'bds': '2', 'last_update': '2021-07-26T21:32:21.259987', 'price': 854000, 'sqft': '1,394', 'zip_code': 94014}, {'acres': -1, 'address': '535 Mountain View Dr APT 8, Daly City, CA 94014', 'ba': '2', 'bds': '2', 'last_update': '2021-07-26T21:32:21.259987', 'price': 749000, 'sqft': '1,158', 'zip_code': 94014}, {'acres': -1, 'address': '157 Westlake Ave, Daly City, CA 94014', 'ba': '2', 'bds': '3', 'last_update': '2021-07-26T21:32:21.259987', 'price': 998888, 'sqft': '1,640', 'zip_code': 94014}, {'acres': -1, 'address': '17 Honeysuckle Ct, Daly City, CA 94014', 'ba': '3', 'bds': '3', 'last_update': '2021-07-26T21:32:21.259987', 'price': 1049888, 'sqft': '1,472', 'zip_code': 94014}, {'acres': -1, 'address': '314 E Moltke St, Daly City, CA 94014', 'ba': '2', 'bds': '4', 'last_update': '2021-07-26T21:32:21.259987', 'price': 1250000, 'sqft': '1,770', 'zip_code': 94014}, {'bds': '2', 'ba': '1', 'sqft': '560'}, {'bds': '4', 'ba': '2', 'sqft': '1,770'}]
made it
break
9
[{'acres': -1, 'address': '543 Green Ridge Dr APT 4, Daly City, CA 94014', 'ba': '2', 'bds': '2', 'last_update': '2021-07-26T21:32:21.259987', 'price': 898000, 'sqft': '1,443', 'zip_code': 94014}, {'acres': -1, 'address': '340 San Diego Ave, Daly City, CA 94014', 'ba': '2', 'bds': '2', 'last_update': '2021-07-26T21:32:21.259987', 'price': 870000, 'sqft': '940', 'zip_code': 94014}, {'acres': -1, 'address': '691 Pointe Pacific Dr #4701, Daly City, CA 94014', 'ba': '2', 'bds': '2', 'last_update': '2021-07-26T21:32:21.259987', 'price': 854000, 'sqft': '1,394', 'zip_code': 94014}, {'acres': -1, 'address': '535 Mountain View Dr APT 8, Daly City, CA 94014', 'ba': '2', 'bds': '2', 'last_update': '2021-07-26T21:32:21.259987', 'price': 749000, 'sqft': '1,158', 'zip_code': 94014}, {'acres': -1, 'address': '157 Westlake Ave, Daly City, CA 94014', 'ba': '2', 'bds': '3', 'last_update': '2021-07-26T21:32:21.259987', 'price': 998888, 'sqft': '1,640', 'zip_code': 94014}, {'acres': -1, 'address': '17 Honeysuckle Ct, Daly City, CA 94014', 'ba': '3', 'bds': '3', 'last_update': '2021-07-26T21:32:21.259987', 'price': 1049888, 'sqft': '1,472', 'zip_code': 94014}, {'acres': -1, 'address': '314 E Moltke St, Daly City, CA 94014', 'ba': '2', 'bds': '4', 'last_update': '2021-07-26T21:32:21.259987', 'price': 1250000, 'sqft': '1,770', 'zip_code': 94014}, {'acres': -1, 'address': '172 Hillcrest Dr, Daly City, CA 94014', 'ba': '1', 'bds': '2', 'last_update': '2021-07-26T21:32:21.259987', 'price': 755000, 'sqft': '560', 'zip_code': 94014}, {'bds': '4', 'ba': '2', 'sqft': '1,770'}]
made it
break
9
[{'acres': -1, 'address': '543 Green Ridge Dr APT 4, Daly City, CA 94014', 'ba': '2', 'bds': '2', 'last_update': '2021-07-26T21:32:21.259987', 'price': 898000, 'sqft': '1,443', 'zip_code': 94014}, {'acres': -1, 'address': '340 San Diego Ave, Daly City, CA 94014', 'ba': '2', 'bds': '2', 'last_update': '2021-07-26T21:32:21.259987', 'price': 870000, 'sqft': '940', 'zip_code': 94014}, {'acres': -1, 'address': '691 Pointe Pacific Dr #4701, Daly City, CA 94014', 'ba': '2', 'bds': '2', 'last_update': '2021-07-26T21:32:21.259987', 'price': 854000, 'sqft': '1,394', 'zip_code': 94014}, {'acres': -1, 'address': '535 Mountain View Dr APT 8, Daly City, CA 94014', 'ba': '2', 'bds': '2', 'last_update': '2021-07-26T21:32:21.259987', 'price': 749000, 'sqft': '1,158', 'zip_code': 94014}, {'acres': -1, 'address': '157 Westlake Ave, Daly City, CA 94014', 'ba': '2', 'bds': '3', 'last_update': '2021-07-26T21:32:21.259987', 'price': 998888, 'sqft': '1,640', 'zip_code': 94014}, {'acres': -1, 'address': '17 Honeysuckle Ct, Daly City, CA 94014', 'ba': '3', 'bds': '3', 'last_update': '2021-07-26T21:32:21.259987', 'price': 1049888, 'sqft': '1,472', 'zip_code': 94014}, {'acres': -1, 'address': '314 E Moltke St, Daly City, CA 94014', 'ba': '2', 'bds': '4', 'last_update': '2021-07-26T21:32:21.259987', 'price': 1250000, 'sqft': '1,770', 'zip_code': 94014}, {'acres': -1, 'address': '172 Hillcrest Dr, Daly City, CA 94014', 'ba': '1', 'bds': '2', 'last_update': '2021-07-26T21:32:21.259987', 'price': 755000, 'sqft': '560', 'zip_code': 94014}, {'acres': -1, 'address': '309 E Moltke St, Daly City, CA 94014', 'ba': '2', 'bds': '4', 'last_update': '2021-07-26T21:32:21.259987', 'price': 1199000, 'sqft': '1,770', 'zip_code': 94014}]
Upvotes: 0
Views: 873
Reputation: 33361
I would suggest you Selenium approach. Scroll until footer element is visible.
Something like this:
from selenium.webdriver.common.action_chains import ActionChains
actions = ActionChains(driver)
def property_details(zip_code):
#here comes all your code
------------------
-----------------
#after it just add this:
footer = driver.find_elements_by_css_selector('div#region-info-footer')
#if element is existing on the page the list will be non-empty and interpreted as True by python`
if(footer):
if not footer[0].is_displayed():
#scroll the last currently available price_parse element into view.
#this will load a bulk of new elements
actions.move_to_element(price_parse[-1]).perform()
time.sleep(0.5)
else:
#you can also use this case to exit the loop since you already scraped all the data on this page
break
else:
#if footer element is even not existing we 100% still not reached the bottom
actions.move_to_element(price_parse[-1]).perform()
time.sleep(0.5)
Upvotes: 1