Reputation: 1
I writing a script to scrap data from a website "https://pfchangsmexico.com.mx/ubicaciones/" in which I want latitude longitude values of each restaurants enter image description here in the image I have highlighted the latitude and longitude values but don't know to write its xpath or how to use css selector that. tell me how to write xpath for or css selector
import scrapy
import re
from scrapy_selenium import SeleniumRequest
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium import webdriver
class BistroSpider(scrapy.Spider):
name = "bistro"
allowed_domains = ["pfchangsmexico.com.mx"]
start_urls = ["https://pfchangsmexico.com.mx/index.html"]
"""def __init__(self):
super(BistroSpider, self).__init__()
self.selenium = webdriver.Chrome()"""
def parse(self, response):
location_page = response.css('div a::attr(href)').get()
yield SeleniumRequest(url=location_page, callback=self.parse_info)
def parse_info(self, response):
"""iframe_locator = (By.XPATH, '//div/iframe')
WebDriverWait(self.selenium, 10).until(EC.frame_to_be_available_and_switch_to_it(iframe_locator))"""
res_names = response.xpath('//div/p[1]/span[1]/text()').getall()
res_names = res_names[1:]
print(res_names)
print(len(res_names))
res_address = response.xpath('//div/p[1]/span[2]/text()').getall()
print(res_address)
print(len(res_address))
addresses = response.xpath('//div/p/span[2]').getall()
addresses = [address for address in addresses if address !=
'<span style="font-family: Avenir;">Servicio a domicilio:</span>']
addresses = [address.replace('</span>', '') for address in addresses]
addresses = [re.sub(r'<.*?>', '', address) for address in addresses]
print(addresses)
print(len(addresses))
postcode = response.xpath('//div/p[1]/span[3]/text()').getall()
print(postcode)
print(len(postcode))
phoneno = response.xpath('//div/p[4]/a[1]/span[1]/text()').getall()
print(phoneno)
print(len(phoneno))
""" Guadalajara (3) - 3rds path thats why 27 phone no in output
//*[@id="guadalajara"]/div[2]/div[5]/div[1]/div/div/div[2]/div/p[3]/a[1]/span"""
"""iframe_element = self.selenium.find_element(By.XPATH, '//div/iframe')
iframe_content = iframe_element.get_attribute("innerHTML")
print("Iframe Content:")
print(iframe_content)"""
``` this is my in this I also want latitude longitude values of each restaurants
Upvotes: 0
Views: 41
Reputation: 27105
Each iframe has a src attribute that is effectively a hyperlink to Google maps. That attribute contains the longitude and latitude.
These can be extracted as follows:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
IW = 10
URL = 'https://pfchangsmexico.com.mx/ubicaciones/'
chrome_options = webdriver.ChromeOptions()
chrome_options.add_argument("--headless")
service = Service('/Volumes/G-Drive/chromedriver')
with webdriver.Chrome(service=service, options=chrome_options) as wbe:
wbe.implicitly_wait(IW)
wbe.get(URL)
for iframe in wbe.find_elements(By.TAG_NAME, 'iframe'):
tokens = iframe.get_attribute('src').split('!')
print('Longitude', tokens[5][2:])
print('Latitude', tokens[6][2:])
print()
Output:
Longitude -99.20608918499921
Latitude 19.441642086879245
Longitude -99.16426708499941
Latitude 19.428874086886445
Longitude -99.13233868499857
Latitude 19.48624618685391
...and so on
Upvotes: 0