Reputation: 183
Can you help me figure why my code didn't go to the next page after scraping the first one? I tried so much but it seemed i can't figure it out
from selenium import webdriver
number = 1
page = 'https://www.olx.com.eg/properties/alexandria/?page={}'.format(number)
driver = webdriver.Chrome()
while number <= 500:
driver.get(page)
test = [li.get_attribute('href') for li in driver.find_elements_by_css_selector('#offers_table .ads__item__ad--title')]
for link in test:
driver.get(link)
# if driver.find_element_by_css_selector('h1.brkword.lheight28'):
# name = driver.find_element_by_css_selector('h1.brkword.lheight28').text
# else:
# name = ''
# date = driver.find_element_by_css_selector('span.brlefte5').text[26:41]
# bed_rooms = driver.find_element_by_css_selector('tr:nth-child(1) .col:nth-child(1) a').text
# bath_rooms = driver.find_element_by_css_selector('tr:nth-child(1) .col+ .col a').text
# area = driver.find_element_by_css_selector('tr:nth-child(2) .col:nth-child(1) strong').text
# furnished = driver.find_element_by_css_selector('tr+ tr .col+ .col a').text
# floor = driver.find_element_by_css_selector('#offerdescription > div.clr.descriptioncontent.marginbott20 > table > tbody > tr:nth-child(3) > td.col > table > tbody > tr > td > strong > a').text
# desc = driver.find_element_by_css_selector('#textContent .large').text
# address = driver.find_element_by_css_selector('.c2b').text
# price = driver.find_element_by_css_selector('.not-arranged').text
try:
mobile_btn = driver.find_element_by_css_selector('#contact_methods > li > div > strong')
mobile_btn.click()
mobile = driver.find_element_by_css_selector('#contact_methods > li > div > strong').text
except:
mobile = 'no mobile found'
# owner = driver.find_element_by_css_selector('.user-box__info__name').text
print(mobile)
number += 1
Upvotes: 0
Views: 292
Reputation: 183
Thank you all this is the solution
from selenium import webdriver
number = 1
driver = webdriver.Chrome()
while number <= 500:
page = 'https://www.olx.com.eg/properties/alexandria/?page={}'.format(number)
driver.get(page)
test = [li.get_attribute('href') for li in driver.find_elements_by_css_selector('#offers_table .ads__item__ad--title')]
for link in test:
driver.get(link)
# if driver.find_element_by_css_selector('h1.brkword.lheight28'):
# name = driver.find_element_by_css_selector('h1.brkword.lheight28').text
# else:
# name = ''
# date = driver.find_element_by_css_selector('span.brlefte5').text[26:41]
# bed_rooms = driver.find_element_by_css_selector('tr:nth-child(1) .col:nth-child(1) a').text
# bath_rooms = driver.find_element_by_css_selector('tr:nth-child(1) .col+ .col a').text
# area = driver.find_element_by_css_selector('tr:nth-child(2) .col:nth-child(1) strong').text
# furnished = driver.find_element_by_css_selector('tr+ tr .col+ .col a').text
# floor = driver.find_element_by_css_selector('#offerdescription > div.clr.descriptioncontent.marginbott20 > table > tbody > tr:nth-child(3) > td.col > table > tbody > tr > td > strong > a').text
# desc = driver.find_element_by_css_selector('#textContent .large').text
# address = driver.find_element_by_css_selector('.c2b').text
# price = driver.find_element_by_css_selector('.not-arranged').text
# try:
# mobile_btn = driver.find_element_by_css_selector('#contact_methods > li > div > strong')
# mobile_btn.click()
# mobile = driver.find_element_by_css_selector('#contact_methods > li > div > strong').text
# except:
# mobile = 'no mobile found'
owner = driver.find_element_by_css_selector('.user-box__info__name').text
print(owner)
print(page)
break
number += 1
Upvotes: 0
Reputation: 33384
You have just missed the step.you need to define page variable inside while loop before driver.get(page)
called.What you did you defined it outside while loop.Try now.
from selenium import webdriver
number = 1
driver = webdriver.Chrome()
while number <= 500:
page = 'https://www.olx.com.eg/properties/alexandria/?page={}'.format(number)
number += 1
driver.get(page)
Upvotes: 1
Reputation: 256
You code is almost fine, I tested it and it goes through each link from the main page. The problem is that you have the number += 1
outside the scope of the while
loop and you dont generate the new URL
from selenium import webdriver
number = 1
# Have a variable just for the page URL
page = 'https://www.olx.com.eg/properties/alexandria/?page='
driver = webdriver.Chrome()
while number <= 500:
# Inside the loop add the page number
driver.get('{}{}'.format(page, number)
test = [li.get_attribute('href') for li in driver.find_elements_by_css_selector('#offers_table .ads__item__ad--title')]
for link in test:
driver.get(link)
try:
mobile_btn = driver.find_element_by_css_selector('#contact_methods > li > div > strong')
mobile_btn.click()
mobile = driver.find_element_by_css_selector('#contact_methods > li > div > strong').text
except:
mobile = 'no mobile found'
# owner = driver.find_element_by_css_selector('.user-box__info__name').text
print(mobile)
number += 1
The last line needs to be indented inside the while loop. If you want to make it in a more pythonic way:
for number in range(1, 500):
Upvotes: 1
Reputation: 785
First, wait till the driver can find your button, by simply waiting
using driver.implicitly_wait(10)
And after clicking also wait for some time. So that the element containing your text appears.
Upvotes: 1