Reputation: 103
So im trying to get all the statistics in the statistics box page on the url page for each team. An example of what the page looks like is on the hyperlink I put below. Im trying to have if so it prints out;
month : win % month : win % All time: win%
But I am not to sure how to write that code, since the last piece of code I wrote in the main was giving me an error.
http://www.gosugamers.net/counterstrike/teams/16448-nasty-gravy-runners
import time
import requests
from bs4 import BeautifulSoup
def get_all(url, base): # Well called it will print all the team links
r = requests.get(url)
page = r.text
soup = BeautifulSoup(page, 'html.parser')
for team_links in soup.select('div.details h3 a'):
members = int(team_links.find_next('th', text='Members:').find_next_sibling('td').text.strip().split()[0])
if members < 5:
continue
yield base + team_links['href']
next_page = soup.find('div', {'class': 'pages'}).find('span', text='Next')
while next_page:
# Gives the server a break
time.sleep(0.2)
r = requests.get(BASE_URL + next_page.find_previous('a')['href'])
page = r.text
soup = BeautifulSoup(page)
for team_links in soup.select('div.details h3 a'):
yield BASE_URL + team_links['href']
next_page = soup.find('div', {'class': 'pages'}).find('span', text='Next')
if __name__ == '__main__':
BASE_URL = 'http://www.gosugamers.net'
URL = 'http://www.gosugamers.net/counterstrike/teams'
for links in get_all(URL, BASE_URL): # When run it will generate all the links for all the teams
r = requests.get(links)
page = r.content
soup = BeautifulSoup(page)
for statistics in soup.select('div.statistics tr'):
win_rate = int(statistics.find('th', text='Winrate:').find_next_sibling('td'))
print(win_rate)
Upvotes: 0
Views: 75
Reputation: 180532
Not sure exactly what you want but this will get all the team stats:
from bs4 import BeautifulSoup, Tag
import requests
soup = BeautifulSoup(requests.get("http://www.gosugamers.net/counterstrike/teams/16448-nasty-gravy-runners").content)
table = soup.select_one("table.stats-table")
head1 = [th.text.strip() for th in table.select("tr.header th") if th.text]
head2 = [th.text.strip() for th in table.select_one("tr + tr") if isinstance(th, Tag)]
scores = [th.text.strip() for th in table.select_one("tr + tr + tr") if isinstance(th, Tag)]
print(head1, head2, scores)
Output:
([u'Jun', u'May', u'All time'], [u'Winrate:', u'0%', u'0%', u'0%'], [u'Matches played:', u'0 / 0 / 0', u'0 / 0 / 0', u'0 / 0 / 0'])
Upvotes: 1