Prathamesh
Prathamesh

Reputation: 1

how to write xpath for this

I writing a script to scrap data from a website "https://pfchangsmexico.com.mx/ubicaciones/" in which I want latitude longitude values of each restaurants enter image description here in the image I have highlighted the latitude and longitude values but don't know to write its xpath or how to use css selector that. tell me how to write xpath for or css selector

import scrapy
import re
from scrapy_selenium import SeleniumRequest
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium import webdriver

class BistroSpider(scrapy.Spider):
    name = "bistro"
    allowed_domains = ["pfchangsmexico.com.mx"]
    start_urls = ["https://pfchangsmexico.com.mx/index.html"]

    """def __init__(self):
        super(BistroSpider, self).__init__()
        self.selenium = webdriver.Chrome()"""
    
    def parse(self, response):
        location_page = response.css('div a::attr(href)').get()
        yield SeleniumRequest(url=location_page, callback=self.parse_info)
    
    def parse_info(self, response):

        """iframe_locator = (By.XPATH, '//div/iframe')
        WebDriverWait(self.selenium, 10).until(EC.frame_to_be_available_and_switch_to_it(iframe_locator))"""

        
        res_names = response.xpath('//div/p[1]/span[1]/text()').getall()
        res_names = res_names[1:]
        print(res_names)
        print(len(res_names))
        res_address = response.xpath('//div/p[1]/span[2]/text()').getall()
        print(res_address)
        print(len(res_address))

        addresses = response.xpath('//div/p/span[2]').getall()

        addresses = [address for address in addresses if address !=
                     '<span style="font-family: Avenir;">Servicio a domicilio:</span>']
        addresses = [address.replace('</span>', '') for address in addresses]
        addresses = [re.sub(r'<.*?>', '', address) for address in addresses]

        print(addresses)
        print(len(addresses))

        postcode = response.xpath('//div/p[1]/span[3]/text()').getall()
        print(postcode)
        print(len(postcode))

        phoneno = response.xpath('//div/p[4]/a[1]/span[1]/text()').getall()
        print(phoneno)
        print(len(phoneno))

        """ Guadalajara (3) - 3rds path thats why 27 phone no in output
        //*[@id="guadalajara"]/div[2]/div[5]/div[1]/div/div/div[2]/div/p[3]/a[1]/span"""

        """iframe_element = self.selenium.find_element(By.XPATH, '//div/iframe')
        iframe_content = iframe_element.get_attribute("innerHTML")
        print("Iframe Content:")
        print(iframe_content)"""
``` this is my in this I also want latitude longitude values of each restaurants

Upvotes: 0

Views: 41

Answers (1)

Adon Bilivit
Adon Bilivit

Reputation: 27105

Each iframe has a src attribute that is effectively a hyperlink to Google maps. That attribute contains the longitude and latitude.

These can be extracted as follows:

from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By

IW = 10
URL = 'https://pfchangsmexico.com.mx/ubicaciones/'

chrome_options = webdriver.ChromeOptions()
chrome_options.add_argument("--headless")

service = Service('/Volumes/G-Drive/chromedriver')

with webdriver.Chrome(service=service, options=chrome_options) as wbe:
    wbe.implicitly_wait(IW)
    wbe.get(URL)
    for iframe in wbe.find_elements(By.TAG_NAME, 'iframe'):
        tokens = iframe.get_attribute('src').split('!')
        print('Longitude', tokens[5][2:])
        print('Latitude', tokens[6][2:])
        print()

Output:

Longitude -99.20608918499921
Latitude 19.441642086879245

Longitude -99.16426708499941
Latitude 19.428874086886445

Longitude -99.13233868499857
Latitude 19.48624618685391

...and so on

Upvotes: 0

Related Questions