Reputation: 546
I am repeating this code so i can scrape some news from a website. On weekends and holidays, the website is not refreshed, there are only news on workdays.
This is the solution i created for the code to get the previous workday news, but i know its not the right way to be programming.
import smtplib, ssl
import datetime, time
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException, NoSuchElementException,StaleElementReferenceException, ElementClickInterceptedException
options = webdriver.ChromeOptions()
#options.add_argument('--headless')
driver = webdriver.Chrome(options=options)
wait = WebDriverWait(driver, 10)
driver.get('https://fiis.com.br/atualizacoes/')
driver.implicitly_wait(10)
time.sleep(2)
#here I try to close a new html popup that has been there since last week
try:
driver.find_element_by_xpath('/html/body/div[1]/div/button').click()
except NoSuchElementException:
print("NoSuchElementException")
try:
driver.find_element_by_xpath('/html/body/div[2]/div/button').click()
except NoSuchElementException:
print("NoSuchElementException")
except ElementClickInterceptedException:
try:
driver.find_element_by_xpath('/html/body/div[2]/div/button').click()
except NoSuchElementException:
print("NoSuchElementException")
wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, '[data-type="date"]'))).click()
#here I try diferent dates, first a today minus 1, then today minus 2 ... until today minus 4
try:
today = datetime.date.today()
five_day = datetime.timedelta(days=-1)
d_N1 = today + five_day
d_N1_2 = d_N1.strftime('%Y-%m-%d')
d_N1_3 = d_N1.strftime('%d.%m.%Y')
wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR, f'li[data-item="{d_N1_2}"]')))
driver.find_element_by_css_selector(f'li[data-item="{d_N1_2}"]').click()
except TimeoutException or NoSuchElementException:
try:
today = datetime.date.today()
five_day = datetime.timedelta(days=-2)
d_N1 = today + five_day
d_N1_2 = d_N1.strftime('%Y-%m-%d')
d_N1_3 = d_N1.strftime('%d.%m.%Y')
wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR, f'li[data-item="{d_N1_2}"]')))
driver.find_element_by_css_selector(f'li[data-item="{d_N1_2}"]').click()
except TimeoutException or NoSuchElementException:
try:
today = datetime.date.today()
five_day = datetime.timedelta(days=-3)
d_N1 = today + five_day
d_N1_2 = d_N1.strftime('%Y-%m-%d')
d_N1_3 = d_N1.strftime('%d.%m.%Y')
wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR, f'li[data-item="{d_N1_2}"]')))
driver.find_element_by_css_selector(f'li[data-item="{d_N1_2}"]').click()
except TimeoutException or NoSuchElementException:
try:
today = datetime.date.today()
five_day = datetime.timedelta(days=-3)
d_N1 = today + five_day
d_N1_2 = d_N1.strftime('%Y-%m-%d')
d_N1_3 = d_N1.strftime('%d.%m.%Y')
wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR, f'li[data-item="{d_N1_2}"]')))
driver.find_element_by_css_selector(f'li[data-item="{d_N1_2}"]').click()
except TimeoutException or NoSuchElementException:
driver.close()
driver.quit()
If yesterdays news is avaiable, i dont need to get the previous day.
4 days wont be enough in the future, so a better solution can be thought of.
Upvotes: 0
Views: 58
Reputation: 7998
you could replace the try-blocks with this:
days_ago = -4
today = datetime.date.today()
for n in range(-1, days_ago-1, -1):
five_day = datetime.timedelta(days=n)
d_N1 = today + five_day
d_N1_2 = d_N1.strftime('%Y-%m-%d')
d_N1_3 = d_N1.strftime('%d.%m.%Y')
try:
wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR, f'li[data-item="{d_N1_2}"]')))
driver.find_element_by_css_selector(f'li[data-item="{d_N1_2}"]').click()
break
except TimeoutException, NoSuchElementException:
if n <= days_ago:
print(f"failed to fetch data for any date")
This works by using a loop that will run for n=-1, n=-2, n=-3, n=-4
and will continue until either all of the n
values are exhausted, or until the driver.find_element_by_css_selector
doesn't produce an exception (that is the only way the break
statement can be reached).
Upvotes: 2
Reputation: 542
How about
retrys = 5
for i in range(retrys):
worked = false
try:
# code that might cause an exception
...
# leave the loop
break
except Exception:
# maybe print here that we retryed
pass
Then you don't need to add more complexity with object orientation at this point.
Upvotes: 0