Reputation: 48
I want to scrape data from a tourist site there is a list of hotels i'm extracting names and arrangements but i've got stuck in extracting the price of every arrangement because it's interactive the price shows up as soon as i choose the arrangement. I put at your disposal my code if any of you can help me and thank you in advance.
#!/usr/bin/env python
# coding: utf-8
import json
from time import sleep
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait, Select
# create path and start webdriver
PATH = "C:\chromedriver.exe"
driver = webdriver.Chrome(PATH)
# first get website
driver.get('https://tn.tunisiebooking.com/')
wait = WebDriverWait(driver, 20)
# params to select
params = {
'destination': 'Tunis',
'date_from': '08/08/2021',
'date_to': '09/08/2021',
'bedroom': '1'
}
# select destination
destination_select = Select(driver.find_element_by_id('ville_des'))
destination_select.select_by_value(params['destination'])
# select bedroom
bedroom_select = Select(driver.find_element_by_id('select_ch'))
bedroom_select.select_by_value(params['bedroom'])
# select dates
script = f"document.getElementById('depart').value ='{params['date_from']}';"
script += f"document.getElementById('checkin').value ='{params['date_to']}';"
driver.execute_script(script)
# click bouton search
btn_rechercher = driver.find_element_by_id('boutonr')
btn_rechercher.click()
sleep(10)
# click bouton details
#btn_plus = driver.find_element_by_id('plus_res')
#btn_plus.click()
#sleep(10)
# ----------------------------------------------------------------------------
# get list of all hotels
hotels_list = []
hotels_objects = driver.find_elements_by_xpath(
'//div[contains(@class, "enveloppe_produit")]'
)
for hotel_obj in hotels_objects:
# get price object
price_object = hotel_obj.find_element_by_xpath(
'.//div[@class="monaieprix"]'
)
price_value = price_object.find_element_by_xpath(
'.//div[1]'
).text.replace('\n', '')
# get title data
title_data = hotel_obj.find_element_by_xpath(
'.//span[contains(@class, "tittre_hotel")]'
)
# get arrangements
arrangements_obj = hotel_obj.find_elements_by_xpath(
'.//div[contains(@class, "angle")]//u'
)
arrangements = [ao.text for ao in arrangements_obj]
# get arrangements
prixM_obj = hotel_obj.find_elements_by_xpath(
'.//div[contains(@id, "prixtotal")]'
)
prixM = [ao.text for ao in prixM_obj]
# create new object
hotels_list.append({
'name': title_data.find_element_by_xpath('.//a//h3').text,
'arrangements': arrangements,
'prixM':prixM,
'price': f'{price_value}'
})
# ----------------------------------------------------------------
#for hotel in hotels_list:
# print(json.dumps(hotel, indent=4))
import pandas as pd
df = pd.DataFrame(hotels_list, columns=['name','arrangements','price'])
df.head()
Upvotes: 2
Views: 124
Reputation: 3433
In order to get prizes for all the arrangement options, performing click operations is necessary.
Below code retrieves 1st options(like Breakfast) arrangements and its prizes. Need to repeat the same process for all the other options available.
hotels = driver.find_elements_by_xpath("//div[starts-with(@id,'produit_affair_')]")
hoteldata = {}
for hotel in hotels:
name = hotel.find_element_by_tag_name("h3").text
arr = hotel.find_elements_by_tag_name("u")
rooms = hotel.find_elements_by_tag_name("label")
roomdata = []
for room in rooms:
room.click()
rprize = hotel.find_element_by_xpath("//div[starts-with(@id,'prixtotal_')]").text
roomdata.append((room.text,rprize))
hoteldata[name] = roomdata
print(hoteldata)
And the output:
{'KANTA': [('Chambre Double ', '43'), ('Chambre Double Vue Piscine ', '50')], 'El Mouradi Palace': [('Chambre Double ', '50'), ('Chambre Double superieure ', '50')], 'Occidental Sousse Marhaba': [('Double Standard ', '50'), ('Chambre Double Vue Mer. ', '50')], 'Tui Blue Scheherazade': [('Double Standard Vue Mer ', '50'), ('Double -Swim Up ', '50')], 'Golf Residence GAS': [('Double--Standard ', '50')], 'Sindbad Center GAS': [('Chambre Double ', '50')], 'Iberostar Diar el Andalous': [('Double Standard ', '50'), ('Double Standard Vue Mer ', '50'), ('Double Prestige ', '50'), ('Suite-Junior Double ', '50')], 'Seabel AlHambra Beach Golf & Spa': [('Bungalow Double ', '50'), ('Chambre Double superieure ', '50')], 'Marhaba Palace': [('Chambre Double ', '50')], 'Cosmos Tergui Club': [('Chambre Double ', '50'), ('Double_vue Mer ', '50')], 'Riadh Palms': [('Chambre Double-superieure ', '50'), ('Chambre Double Superieure Vue Mer ', '50')], 'Royal Jinene': [('Chambre Double ', '50'), ('Double Standard Vue Mer ', '50')], 'Houria Palace': [('Chambre-double-vue piscine ', '50'), ('Chambre Double ', '50')], 'Marhaba Beach': [('Chambre Double ', '50')], 'Marhaba Club': [('Chambre Double ', '50'), ('Chambre Double Vue Mer ', '50')], 'Palmyra Aqua Park ex soviva': [('Chambre Double ', '50')], 'Sousse City & Beach Hotel': [('Double Standard ', '50'), ('Double Standard Vue Mer ', '50')], 'Sousse Pearl Marriott Resort & Spa': [('Chambre Double Standard ', '50'), ('Double Standard Vue Mer ', '50')], 'Riviera': [('Double Standard ', '50')], 'Concorde Green Park Palace': [('Double Standard ', '50'), ('Double Standard Vue Mer ', '50'), ('Suite Prestige Vue mer ', '50')]}
Upvotes: 2