Reputation: 13
For one of my personal projects, I'm trying to "web scrape" some financial data and I would like to put it into a windows task scheduler that runs daily.
This is my current code:
import requests
from selenium import webdriver
from selenium.webdriver.common.by import By
import selenium.webdriver.support.ui as ui
from selenium.webdriver.support.ui import WebDriverWait
import selenium.webdriver.support.expected_conditions as EC
from bs4 import BeautifulSoup
options = webdriver.ChromeOptions()
options.add_argument('--ignore-certificate-errors')
options.add_argument('--ignore-ssl-errors')
mainurl = "https://apa.nexregreporting.com/home/portfoliocompression"
headers = {'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.x Safari/53x'}
page = requests.get(mainurl, headers=headers)
soup = BeautifulSoup(page.content, 'html.parser')
When I use this code, it gives me a ConnectionError:
HTTPSConnectionPool error, Max retries exeeded with url:
How do I get Python to click the blue search button and save the Excel file into a designated folder? I noticed that the HTML object for the blue search button isn't normal either.
The website is https://apa.nexregreporting.com/home/portfoliocompression
Upvotes: 0
Views: 2386
Reputation: 1150
This is the code to open the chrome using selenium and downloading the file by clicking on the button.
import time
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
# Options for Chrome WebDriver
op = Options()
op.add_argument('--disable-notifications')
op.add_experimental_option("prefs",{
"download.prompt_for_download": False,
"download.directory_upgrade": True,
"safebrowsing.enabled": True
})
# Download Path
download_dir = 'D:\\'
# Initializing the Chrome webdriver with the options
driver = webdriver.Chrome(options=op)
# Setting Chrome to trust downloads
driver.command_executor._commands["send_command"] = ("POST", '/session/$sessionId/chromium/send_command')
params = {'cmd': 'Page.setDownloadBehavior', 'params': {'behavior': 'allow', 'downloadPath': download_dir}}
command_result = driver.execute("send_command", params)
driver.implicitly_wait(5)
# Opening the page
driver.get("https://apa.nexregreporting.com/home/portfoliocompression")
# Click on the button and wait for 10 seconds
driver.find_element_by_xpath('//*[@class="btn btn-default"]').click()
time.sleep(10)
# Closing the webdriver
driver.close()
Upvotes: 2