Reputation: 517
I am trying to download csv files that end with the specific characters "VX.csv" from this link:
https://www.cboe.com/products/futures/market-data/historical-data-archive
Here is the code I adapted from another similar question:
# Import Key Modules
from bs4 import BeautifulSoup
import requests
import urllib.request
url = 'https://www.cboe.com/products/futures/market-data/historical-data-archive'
def scraper(url):
html = urllib.request.urlopen(url).read()
soup = BeautifulSoup(html)
# Retrieve all of the anchor tags
tags = soup('a')
for tag in tags:
href = (tag.get('href', None))
if href.endswith("VX.csv"):
csv_url = urlparse.urljoin(url, href)
# ... do something with the csv file....
contents = urllib.urlopen(csv_url).read()
print("csv file size=", len(contents))
break # we only needed this one file, so we end the loop.
scraper(url)
I gives me the following error:
AttributeError: 'NoneType' object has no attribute 'endswith'
I am not sure where I am going wrong. Does anyone have any clues?
Upvotes: 0
Views: 1428
Reputation: 11505
import requests
from bs4 import BeautifulSoup
def main(url):
with requests.Session() as req:
r = req.get(url)
soup = BeautifulSoup(r.content, 'html.parser')
target = [f"{url[:20]}{item['href']}" for item in soup.select(
"a[href$='VX.csv']")]
for x in target:
print(f"Downloading {x}")
r = req.get(x)
name = x.rsplit("/", 1)[-1]
with open(name, 'wb') as f:
f.write(r.content)
main("https://www.cboe.com/products/futures/market-data/historical-data-archive")
Output:
CFE_F05_VX.csv CFE_G09_VX.csv CFE_H13_VX.csv CFE_J18_VX.csv CFE_M06_VX.csv CFE_N10_VX.csv CFE_Q13_VX.csv CFE_U17_VX.csv CFE_X05_VX.csv CFE_Z09_VX.csv
CFE_F06_VX.csv CFE_G10_VX.csv CFE_H14_VX.csv CFE_K04_VX.csv CFE_M07_VX.csv CFE_N11_VX.csv CFE_Q14_VX.csv CFE_U18_VX.csv CFE_X06_VX.csv CFE_Z10_VX.csv
CFE_F07_VX.csv CFE_G11_VX.csv CFE_H15_VX.csv CFE_K05_VX.csv CFE_M08_VX.csv CFE_N12_VX.csv CFE_Q15_VX.csv CFE_V04_VX.csv CFE_X07_VX.csv CFE_Z11_VX.csv
CFE_F08_VX.csv CFE_G12_VX.csv CFE_H16_VX.csv CFE_K06_VX.csv CFE_M09_VX.csv CFE_N13_VX.csv CFE_Q16_VX.csv CFE_V05_VX.csv CFE_X08_VX.csv CFE_Z12_VX.csv
CFE_F09_VX.csv CFE_G13_VX.csv CFE_H17_VX.csv CFE_K07_VX.csv CFE_M10_VX.csv CFE_N14_VX.csv CFE_Q17_VX.csv CFE_V06_VX.csv CFE_X09_VX.csv CFE_Z13_VX.csv
CFE_F10_VX.csv CFE_G14_VX.csv CFE_H18_VX.csv CFE_K08_VX.csv CFE_M11_VX.csv CFE_N15_VX.csv CFE_Q18_VX.csv CFE_V07_VX.csv CFE_X10_VX.csv CFE_Z14_VX.csv
CFE_F11_VX.csv CFE_G15_VX.csv CFE_J06_VX.csv CFE_K09_VX.csv CFE_M12_VX.csv CFE_N16_VX.csv CFE_U04_VX.csv CFE_V08_VX.csv CFE_X11_VX.csv CFE_Z15_VX.csv
CFE_F12_VX.csv CFE_G16_VX.csv CFE_J07_VX.csv CFE_K10_VX.csv CFE_M13_VX.csv CFE_N17_VX.csv CFE_U06_VX.csv CFE_V09_VX.csv CFE_X12_VX.csv CFE_Z16_VX.csv
CFE_F13_VX.csv CFE_G17_VX.csv CFE_J08_VX.csv CFE_K11_VX.csv CFE_M14_VX.csv CFE_N18_VX.csv CFE_U07_VX.csv CFE_V10_VX.csv CFE_X13_VX.csv CFE_Z17_VX.csv
CFE_F14_VX.csv CFE_G18_VX.csv CFE_J09_VX.csv CFE_K12_VX.csv CFE_M15_VX.csv CFE_Q04_VX.csv CFE_U08_VX.csv CFE_V11_VX.csv CFE_X14_VX.csv
CFE_F15_VX.csv CFE_H05_VX.csv CFE_J10_VX.csv CFE_K13_VX.csv CFE_M16_VX.csv CFE_Q05_VX.csv CFE_U09_VX.csv CFE_V12_VX.csv CFE_X15_VX.csv
CFE_F16_VX.csv CFE_H06_VX.csv CFE_J11_VX.csv CFE_K14_VX.csv CFE_M17_VX.csv CFE_Q06_VX.csv CFE_U10_VX.csv CFE_V13_VX.csv CFE_X16_VX.csv
CFE_F17_VX.csv CFE_H07_VX.csv CFE_J12_VX.csv CFE_K15_VX.csv CFE_M18_VX.csv CFE_Q07_VX.csv CFE_U11_VX.csv CFE_V14_VX.csv CFE_X17_VX.csv
CFE_F18_VX.csv CFE_H08_VX.csv CFE_J13_VX.csv CFE_K16_VX.csv CFE_N04_VX.csv CFE_Q08_VX.csv CFE_U12_VX.csv CFE_V15_VX.csv CFE_X18_VX.csv
CFE_G05_VX.csv CFE_H09_VX.csv CFE_J14_VX.csv CFE_K17_VX.csv CFE_N06_VX.csv CFE_Q09_VX.csv CFE_U13_VX.csv CFE_V16_VX.csv CFE_Z05_VX.csv
CFE_G06_VX.csv CFE_H10_VX.csv CFE_J15_VX.csv CFE_K18_VX.csv CFE_N07_VX.csv CFE_Q10_VX.csv CFE_U14_VX.csv CFE_V17_VX.csv CFE_Z06_VX.csv
CFE_G07_VX.csv CFE_H11_VX.csv CFE_J16_VX.csv CFE_M04_VX.csv CFE_N08_VX.csv CFE_Q11_VX.csv CFE_U15_VX.csv CFE_V18_VX.csv CFE_Z07_VX.csv
CFE_G08_VX.csv CFE_H12_VX.csv CFE_J17_VX.csv CFE_M05_VX.csv CFE_N09_VX.csv CFE_Q12_VX.csv CFE_U16_VX.csv CFE_X04_VX.csv CFE_Z08_VX.csv
Upvotes: 1