Reputation: 48
I want to scrape prices of every hotel from a tourist site , i'm extracting names and arrangements butt he problem that the prices shows of after clic arrangments and i didn't know how to deal with it.
the out put i want to get :
{' Julius ': [('Petit Déjeuner', '216'),('Demi pension','264')]}
I put at your disposal my code if any of you can help me and thank you in advance.
#!/usr/bin/env python
# coding: utf-8
import json
from time import sleep
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait, Select
# create path and start webdriver
PATH = "C:\chromedriver.exe"
driver = webdriver.Chrome(PATH)
# first get website
driver.get('https://tn.tunisiebooking.com/')
wait = WebDriverWait(driver, 20)
# params to select
params = {
'destination': 'El Jem',
'date_from': '08/08/2021',
'date_to': '09/08/2021',
'bedroom': '1'
}
# select destination
destination_select = Select(driver.find_element_by_id('ville_des'))
destination_select.select_by_value(params['destination'])
# select bedroom
bedroom_select = Select(driver.find_element_by_id('select_ch'))
bedroom_select.select_by_value(params['bedroom'])
# select dates
script = f"document.getElementById('depart').value ='{params['date_from']}';"
script += f"document.getElementById('checkin').value ='{params['date_to']}';"
driver.execute_script(script)
# click bouton search
btn_rechercher = driver.find_element_by_id('boutonr')
btn_rechercher.click()
sleep(10)
# click bouton details
#btn_plus = driver.find_element_by_id('plus_res')
#btn_plus.click()
#sleep(10)
# ----------------------------------------------------------------------------
# get list of all hotels
hotels_list = []
hotels_objects = driver.find_elements_by_xpath(
'//div[contains(@class, "enveloppe_produit")]'
)
for hotel_obj in hotels_objects:
# get price object
price_object = hotel_obj.find_element_by_xpath(
'.//div[@class="monaieprix"]'
)
price_value = price_object.find_element_by_xpath(
'.//div[1]'
).text.replace('\n', '')
# get title data
title_data = hotel_obj.find_element_by_xpath(
'.//span[contains(@class, "tittre_hotel")]'
)
# get arrangements
arrangements_obj = hotel_obj.find_elements_by_xpath(
'.//div[contains(@class, "angle")]//u'
)
arrangements = [ao.text for ao in arrangements_obj]
# get arrangements
prixM_obj = hotel_obj.find_elements_by_xpath(
'.//div[contains(@id, "prixtotal")]'
)
prixM = [ao.text for ao in prixM_obj]
# create new object
hotels_list.append({
'name': title_data.find_element_by_xpath('.//a//h3').text,
'arrangements': arrangements,
'prixM':prixM,
'price': f'{price_value}'
})
# ----------------------------------------------------------------
#for hotel in hotels_list:
# print(json.dumps(hotel, indent=4))
import pandas as pd
df = pd.DataFrame(hotels_list, columns=['name','arrangements','price'])
df.head()
Upvotes: 2
Views: 196
Reputation: 3433
It seems that the DOM keeps changing. So based on the answers from this question and StaleElementReferenceException, below code might be useful for you.
from selenium import webdriver
from selenium.common.exceptions import StaleElementReferenceException
import time
driver = webdriver.Chrome(executable_path="path")
driver.maximize_window()
driver.implicitly_wait(10)
driver.get("https://tn.tunisiebooking.com/")
#Code to choose options.
hoteldata = {}
hotels = driver.find_elements_by_xpath("//div[starts-with(@id,'produit_affair')]")
for hotel in hotels:
name = hotel.find_element_by_tag_name("h3").text
details = []
argmts = hotel.find_element_by_class_name("angle_active").text
prize = hotel.find_element_by_xpath(".//div[contains(@id,'prixtotal_')]").get_attribute("innerText")
details.append((argmts,prize))
inactive = hotel.find_elements_by_xpath(".//div[@class='angle_desactive']")
for item in inactive:
try:
n = item.get_attribute("innerText")
item.click()
time.sleep(2)
pri = hotel.find_element_by_xpath(".//div[contains(@id,'prixtotal_')]").get_attribute("innerText")
details.append((n,pri))
except StaleElementReferenceException:
pass
hoteldata[name]=details
print(hoteldata)
driver.quit()
Upvotes: 1