In [1]:
import urllib2
from bs4 import BeautifulSoup
import pandas as pd
In [2]:
salary_df = pd.DataFrame([],columns = ['year','team','salary'])
In [3]:
year = 2016
team = 'GSW'
response = urllib2.urlopen('http://www.basketball-reference.com/teams/{}/{}.html'.format(team,year))
html = response.read()
In [4]:
salary_df
Out[4]:
In [6]:
soup = BeautifulSoup(html,'html.parser')
In [7]:
team_stats=soup.find("table",id="salaries")
In [8]:
team_stats
Out[8]:
In [9]:
data = team_stats.find_all("tr")
new_data = []
for i in data:
new_data.append(i.text)
new_data
Out[9]:
In [10]:
new_data2 = []
for i in new_data:
new_data2.append((i.replace('\n',',')).split(',', 3))
new_data2
new_data3 = [ x[1:] for x in new_data2]
In [11]:
new_data3
Out[11]:
In [12]:
new_data4 = map(lambda x: [x[0],x[1],(x[2].replace(',','').replace('$',''))], new_data3)
new_data4
Out[12]:
In [13]:
df = pd.DataFrame(new_data4,columns=new_data4[0])
df = df.drop(0,axis=0)
df = df.drop('Rk',axis=1)
df
Out[13]:
In [14]:
tot_salary = df['Salary'].astype('float32').sum()
current_entry = pd.DataFrame([[year,team,tot_salary]],columns=['year','team','salary'])
current_entry
Out[14]:
In [15]:
salary_df = pd.concat([salary_df,current_entry])
salary_df
Out[15]:
In [ ]:
salary_df.to_csv('{}_salary.csv'.format(team))