Reputation: 671
I am trying to create a weather forecast by scraping web-pages. (My prevoius question )
My code:
import time
import requests
from selenium import webdriver
from bs4 import BeautifulSoup
from keyboard import press_and_release
def weather_forecast2():
print('Hello, I can search up the weather for you.')
while True:
inp = input('Where shall I search? Enter a place :').capitalize()
print('Alright, checking the weather in ' + inp + '...')
URL = 'https://www.yr.no/nb'
"Search for a place"
driver = webdriver.Edge() # Open Microsoft Edge
driver.get(URL) # Goes to the HTML-page of the given URL
element = driver.find_element_by_id("søk") # Find the search input box
element.send_keys(inp) # Enter input
press_and_release('enter') # Click enter
cURL = driver.current_url # Current URL
"Find data"
driver.get(cURL) # Goes to the HTML-page that appeared after clicking button
r = requests.get(cURL) # Get request for contents of the page
print(r.content) # Outputs HTML code for the page
soup = BeautifulSoup(r.content, 'html5lib') # Parse the data with BeautifulSoup(HTML-string, HTML-parser)
I want to collect the temperatures from the page. I know that the xpaths to the elements Im looking for are
//[@id="dailyWeatherListItem0"]/div[2]/div1/span[2]/span1/text() //[@id="dailyWeatherListItem0"]/div[2]/div1/span[2]/span[3]/text() //[@id="dailyWeatherListItem1"]/div[2]/div1/span[2]/span1/text() //[@id="dailyWeatherListItem1"]/div[2]/div1/span[2]/span[3]/text() //[@id="dailyWeatherListItem2"]/div[2]/div1/span[2]/span1/text() //[@id="dailyWeatherListItem2"]/div[2]/div1/span[2]/span[3]/text() //[@id="dailyWeatherListItem3"]/div[2]/div1/span[2]/span1/text() //[@id="dailyWeatherListItem3"]/div[2]/div1/span[2]/span[3]/text()
//etc...
Basically I want to collect the following two elements nine times:
//[@id="dailyWeatherListItem{NUMBERS0-8}"]/div[2]/div1/span[2]/span1/text() //[@id="dailyWeatherListItem{NUMBER0-8}"]/div[2]/div1/span[2]/span[3]/text()
How can I use driver.find_element_by_xpath to do this? Or is there a more efficient function?
Upvotes: 0
Views: 187
Reputation: 84455
Assuming you can correctly retrieve the url then you can use that as the referer header, as well as the location id within that url, to call the API which actually returns the forecasts. I don't have your definition for press_and_release
so code is tested without that.
import requests, re
from selenium import webdriver
# url = 'https://www.yr.no/nb/v%C3%A6rvarsel/daglig-tabell/2-6058560/Canada/Ontario/London'
def get_forecast(str:url)->object:
location_id = re.search(r'daglig-tabell/(.*?)/', url).group(1)
headers = {'user-agent': 'Mozilla/5.0', 'referer': url}
forecasts = requests.get(f'https://www.yr.no/api/v0/locations/{location_id}/forecast', headers=headers).json()
return forecasts
def get_forecast_url():
print('Hello, I can search up the weather for you.')
driver = webdriver.Chrome() # Open Microsoft Edge. (I changed to Chrome)
while True:
inp = input('Where shall I search? Enter a place :').capitalize()
print('Alright, checking the weather in ' + inp + '...')
URL = 'https://www.yr.no/nb'
"Search for a place"
driver.get(URL) # Goes to the HTML-page of the given URL
driver.find_element_by_id("page-header__search-button").click() #open search
# Find the search input box
element = driver.find_element_by_id("page-header__search-input")
element.send_keys(inp) # Enter input
press_and_release('enter') # Click enter
cURL = driver.current_url # Current URL
print(get_forecast(cURL))
driver.quit()
Upvotes: 2