Reputation: 25
I'm trying to loop through a script that parses tables with Beautiful Soup in Python 2.7.
The first table parse works and produces the expected results. The second loop produces exactly the same results as the first loop.
Additional details:
Here is the script:
import urllib2
import csv
from bs4 import BeautifulSoup # latest version bs4
week = raw_input("Which week?")
week = str(week)
data = []
first = "http://fantasy.nfl.com/research/projections#researchProjections=researchProjections%2C%2Fresearch%2Fprojections%253Foffset%253D"
middle = "%2526position%253DO%2526sort%253DprojectedPts%2526statCategory%253DprojectedStats%2526statSeason%253D2015%2526statType%253DweekProjectedStats%2526statWeek%253D"
last = "%2Creplace"
page_num = 1
for page_num in range(1,3):
page_mult = (page_num-1) * 25 +1
next = str(page_mult)
url = first + next + middle + week + last
print url #I added this in order to check my output
html = urllib2.urlopen(url).read()
soup = BeautifulSoup(html,"lxml")
table = soup.find('table', attrs={'class':'tableType-player hasGroups'})
table_body = table.find('tbody')
rows = table_body.find_all('tr')
for row in rows:
cols = row.find_all('td')
cols = [ele.text.strip() for ele in cols]
data.append([ele for ele in cols if ele]) # Get rid of empty values
b = open('NFLtable.csv', 'w')
a = csv.writer(b)
a.writerows(data)
b.close()
page_num =page_num+1
print data
Upvotes: 1
Views: 1835
Reputation: 571
On the actual page they are using AJAX to request additional results, with a JSON response with some HTML as one of the values.
I modified your code a bit, give it a try:
import urllib2
import urllib
import csv
from bs4 import BeautifulSoup # latest version bs4
import json
week = raw_input("Which week?")
week = str(week)
data = []
url_format = "http://fantasy.nfl.com/research/projections?offset={offset}&position=O&sort=projectedPts&statCategory=projectedStats&statSeason=2015&statType=weekProjectedStats&statWeek={week}"
for page_num in range(1, 3):
page_mult = (page_num - 1) * 25 + 1
next = str(page_mult)
url = url_format.format(week=week, offset=page_mult)
print url # I added this in order to check my output
request = urllib2.Request(url, headers={'Ajax-Request': 'researchProjections'})
raw_json = urllib2.urlopen(request).read()
parsed_json = json.loads(raw_json)
html = parsed_json['content']
soup = BeautifulSoup(html, "html.parser")
table = soup.find('table', attrs={'class': 'tableType-player hasGroups'})
table_body = table.find('tbody')
rows = table_body.find_all('tr')
for row in rows:
cols = row.find_all('td')
cols = [ele.text.strip() for ele in cols]
data.append([ele for ele in cols if ele]) # Get rid of empty values
print data
I tested with week=4.
Upvotes: 1