Reputation:
I'm using selenium to iterate through the different combinations of a variable's parameters and download the data from a website. However, the for loop function stops working when there's no data; I also notice when selenium stops the webpage contains a text labeled "Cannot produce results.". Thus, I figured to use an if
statement with selenium to search for "Cannot produce results" and skip to the next loop if the aforementioned text were found. An example would be something like this:
import os
from selenium import webdriver
import zipfile
import pandas as pd
import time
for i in to_loop:
# directories
link = 'http://www.gaez.iiasa.ac.at/w/ctrl?
_flow=Vwr&_view=Welcome&idAS=0&idFS=0&fieldmain=main_&idPS=0'
## Access Chrome Driver to use selenium
# Define Download Directory
chrome_options = webdriver.ChromeOptions()
prefs = {'download.default_directory': 'C:/.../Download'}
chrome_options.add_experimental_option('prefs', prefs)
driver = webdriver.Chrome(
executable_path='C:/.../chromedriver.exe',
chrome_options=chrome_options)
driver.get(link)
# Enter username and password
driver.find_element_by_name('_username').send_keys(username)
driver.find_element_by_name('_password').send_keys(password)
driver.find_element_by_id('buttonSubmit__login').click()
# Click on Suitability and Potential Yield link
driver.find_element_by_name('_targetfieldmain=main_py&_...').click()
# Click on Agro-ecological suitability and productivity link
driver.find_element_by_name('&fieldmain=main_py&idPS=0&...').click()
# Click on Agro-ecological suitability and productivity list
driver.find_element_by_css_selector('input[value="
{}"]'.format(i[0])).click()
# Click on crop link
driver.find_element_by_css_selector("input.linksubmit[value=\"▸
Crop\"]").click()
AES_and_P = i[0]
driver.find_element_by_css_selector('input[value="
{}"]'.format(i[1])).click()
# Click on Water Supply Link
driver.find_element_by_css_selector("input.linksubmit[value=\"▸ Water
Supply\"]").click()
Crop = i[1]
driver.find_element_by_css_selector('input[value="
{}"]'.format(i[2])).click()
# Click on Input Level Link
driver.find_element_by_css_selector("input.linksubmit[value=\"▸ Input
Level\"]").click()
Water_Supply = i[2]
driver.find_element_by_css_selector('input[value="
{}"]'.format(i[3])).click()
Input_Level = i[3]
# If statement to skip to next loop if text found
data_check = driver.find_elements_by_partial_link_text('Cannot produce
results.')
if data_check[0].is_displayed():
continue
# Click on Time Period and Select Baseline
driver.find_element_by_css_selector("input.linksubmit[value=\"▸ Time
Period\"]").click()
driver.find_element_by_css_selector("input.linksubmit[value=\"1961-
1990\"]").click()
# Click on Geographic Areas Link
driver.find_element_by_css_selector("input.linksubmit[value=\"▸
Geographic Areas\"]").click()
# Unselect all countries
driver.find_element_by_xpath('//*[@id="fieldareaList__pln-1"]').click()
# Close tab for Northern Africa
driver.find_element_by_xpath('//*[@id="rg1-66-Northern
Africa"]/span').click()
# Wait 1 second
time.sleep(1)
# Click geographic area then country
driver.find_element_by_xpath('//label[text()="{}"]/following-
sibling::span'.format(geographic_area)).click()
driver.find_element_by_xpath('//label[text()="
{}"]'.format(country)).click()
# Click on Map Link
driver.find_element_by_css_selector("input.linksubmit[value=\"▸
Map\"]").click()
# Download Data
driver.find_element_by_xpath('//*[@id="buttons"]/a[4]/img').click()
# Wait 2 seconds
time.sleep(2)
# Download blah blah
path = 'C:/.../Download'
destination_folder = 'C:/.../CSV_Files'
file_list = [os.path.join(path, f) for f in os.listdir(path)]
time_sorted_list = sorted(file_list, key=os.path.getmtime)
file_name = time_sorted_list[-1]
# decompress the zipped file here
myzip = zipfile.ZipFile(file_name)
# Wait 1 second
time.sleep(1)
myzip.extract('data.asc', destination_folder)
# Save data.asc file as .csv and rename reflects download selections
newfilename = country + Crop + Water_Supply + Input_Level + AES_and_P
df = pd.read_table(os.path.join(destination_folder, 'data.asc'),
sep="\s+", skiprows=6, header=None)
df.to_csv(os.path.join(destination_folder, '{}.csv'.format(newfilename)))
# Delete downloaded data.asc file
delete_data_file = "C:/.../CSV_Files/data.asc"
# if file exists, delete it
if os.path.isfile(delete_data_file):
os.remove(delete_data_file)
else: # Show error
print("Error: %s file not found" % delete_data_file)
driver.close()
However, this code simply stops the function at continue, doesn't complete the download portion of the code, and iterates through the rest of the loop. Any idea with how to solve this? Also, please let me know if the question is confusing.
Upvotes: 1
Views: 8357
Reputation: 4882
After our discussion in the comments I believe I see at least part of your issue. The python continue
keyword does not mean "continue with the rest of control flow" it means to "continue to the next iteration of the loop, skip everything after".
For example, in the following piece of python code:
a = []
for i in range(10):
if i == 5:
continue
a.append(i)
print(a)
The result would be:
[0, 1, 2, 3, 4, 6, 7, 8, 9]
And not, using your previous logic
[5]
Thus to fix your code on the continue, you would need to flip the logic so that you skip if the condition is not true, for example:
if not data_check[0].is_displayed():
continue
While I've personally never encountered someone who had made the same error as you, I certainly emphasize as semantically it does appear to make more sense that "continue" would refer to continuing on with the next part of the program. Pythons choice in continue
comes in large part from historical usage of continue
as a keyword, particularly in C. In this case, we can see continue
as more of an extension of the break
statement into a "skip".
Upvotes: 2