HiFAR
HiFAR

Reputation: 48

scraping data after click on interactive code

I want to scrape prices of every hotel from a tourist site , i'm extracting names and arrangements butt he problem that the prices shows of after clic arrangments and i didn't know how to deal with it.

the out put i want to get :

{' Julius ': [('Petit Déjeuner', '216'),('Demi pension','264')]}

I put at your disposal my code if any of you can help me and thank you in advance.

#!/usr/bin/env python
# coding: utf-8
import json
from time import sleep
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait, Select


# create path and start webdriver
PATH = "C:\chromedriver.exe"
driver = webdriver.Chrome(PATH)

# first get website
driver.get('https://tn.tunisiebooking.com/')
wait = WebDriverWait(driver, 20)

# params to select
params = {
    'destination': 'El Jem',
    'date_from': '08/08/2021',
    'date_to': '09/08/2021',
    'bedroom': '1'
}

# select destination
destination_select = Select(driver.find_element_by_id('ville_des'))
destination_select.select_by_value(params['destination'])

# select bedroom
bedroom_select = Select(driver.find_element_by_id('select_ch'))
bedroom_select.select_by_value(params['bedroom'])

# select dates
script = f"document.getElementById('depart').value ='{params['date_from']}';"
script += f"document.getElementById('checkin').value ='{params['date_to']}';"
driver.execute_script(script)

# click bouton search
btn_rechercher = driver.find_element_by_id('boutonr')
btn_rechercher.click()
sleep(10)

# click bouton details
#btn_plus = driver.find_element_by_id('plus_res')
#btn_plus.click()
#sleep(10)

# ----------------------------------------------------------------------------
# get list of all hotels
hotels_list = []
hotels_objects = driver.find_elements_by_xpath(
    '//div[contains(@class, "enveloppe_produit")]'
)
for hotel_obj in hotels_objects:
    # get price object
    price_object = hotel_obj.find_element_by_xpath(
        './/div[@class="monaieprix"]'
    )
    price_value = price_object.find_element_by_xpath(
        './/div[1]'
    ).text.replace('\n', '')

    # get title data
    title_data = hotel_obj.find_element_by_xpath(
        './/span[contains(@class, "tittre_hotel")]'
    )

    # get arrangements
    arrangements_obj = hotel_obj.find_elements_by_xpath(
        './/div[contains(@class, "angle")]//u'
    )
    arrangements = [ao.text for ao in arrangements_obj]
    
    # get arrangements
    prixM_obj = hotel_obj.find_elements_by_xpath(
        './/div[contains(@id, "prixtotal")]'
    )
    prixM = [ao.text for ao in  prixM_obj]

    # create new object
    hotels_list.append({
        'name': title_data.find_element_by_xpath('.//a//h3').text,
        'arrangements': arrangements,
        'prixM':prixM,
        'price': f'{price_value}'
    })

# ----------------------------------------------------------------
#for hotel in hotels_list:
#    print(json.dumps(hotel, indent=4))

import pandas as pd
df = pd.DataFrame(hotels_list, columns=['name','arrangements','price'])
df.head()

Upvotes: 2

Views: 196

Answers (1)

pmadhu
pmadhu

Reputation: 3433

It seems that the DOM keeps changing. So based on the answers from this question and StaleElementReferenceException, below code might be useful for you.

from selenium import webdriver
from selenium.common.exceptions import StaleElementReferenceException
import time

driver = webdriver.Chrome(executable_path="path")
driver.maximize_window()
driver.implicitly_wait(10)
driver.get("https://tn.tunisiebooking.com/")
#Code to choose options.
hoteldata = {}
hotels = driver.find_elements_by_xpath("//div[starts-with(@id,'produit_affair')]")
for hotel in hotels:
    name = hotel.find_element_by_tag_name("h3").text
    details = []
    argmts = hotel.find_element_by_class_name("angle_active").text
    prize = hotel.find_element_by_xpath(".//div[contains(@id,'prixtotal_')]").get_attribute("innerText")
    details.append((argmts,prize))
    inactive = hotel.find_elements_by_xpath(".//div[@class='angle_desactive']")
    for item in inactive:
        try:
            n = item.get_attribute("innerText")
            item.click()
            time.sleep(2)
            pri = hotel.find_element_by_xpath(".//div[contains(@id,'prixtotal_')]").get_attribute("innerText")
            details.append((n,pri))
        except StaleElementReferenceException:
            pass
    hoteldata[name]=details
print(hoteldata)
driver.quit()

Upvotes: 1

Related Questions