Reputation: 65
As an exercise, I was trying to analyse and visualize the following data. http://www.calvin.edu/~stob/data/Berkeley.csv I managed to achieve what I had in mind but my code seems very repetitive. Is there any way to achieve the following in fewer lines of code?
import pandas as pd
import matplotlib.pyplot as plt
df = pd.read_csv(""ucb.csv")
df_male = df[df['Gender'] == 'Male']
df_female = df[df['Gender'] == 'Female']
dmd = pd.pivot_table(df_male, values="Freq", index = 'Dept',
columns='Admit')
dfd = pd.pivot_table(df_female, values="Freq", index = 'Dept',
columns='Admit')
ddm = pd.pivot_table(df_male, values="Freq", index = 'Admit',
columns='Dept')
ddf = pd.pivot_table(df_female, values="Freq", index = 'Admit',
columns='Dept')
ma, mb, mc, md, me, mf = list(ddm['A']), list(ddm['B']), list(ddm['C']),
list(ddm['D']), list(ddm['E']), list(ddm['F'])
fa,fb,fc,fd,fe,ff=list(ddf['A']),list(ddf['B']),list(ddf['C']),
list(ddf['D']),list(ddf['E']),list(ddf['F'])
a,b,c,d = dmd['Admitted'].sum(), dmd['Rejected'].sum(),
dfd['Admitted'].sum(), dfd['Rejected'].sum()
males = "Accepted Vs Rejected ratio for " + str(a+b) + " Men"
females = "Accepted Vs Rejected ratio for " + str(c+d) + " Women"
male_ratio = [a,b]
female_ratio = [c,d]
labels = "Accepted", "Rejected"
fig = plt.figure(figsize=(18,18))
axm = fig.add_subplot(3,6,1)
axm.title.set_text(males)
axf = fig.add_subplot(3,6,4)
axf.title.set_text(females)
axma = fig.add_subplot(3,6,7)
axma.title.set_text("Dept A Men")
axmb = fig.add_subplot(3,6,8)
axmb.title.set_text("Dept B Men")
axmc = fig.add_subplot(3,6,9)
axmc.title.set_text("Dept C Men")
axmd = fig.add_subplot(3,6,10)
axmd.title.set_text("Dept D Men")
axme = fig.add_subplot(3,6,11)
axme.title.set_text("Dept E Men")
axmf = fig.add_subplot(3,6,12)
axmf.title.set_text("Dept F Men")
axfa = fig.add_subplot(3,6,13)
axfa.title.set_text("Dept A Women")
axfb = fig.add_subplot(3,6,14)
axfb.title.set_text("Dept B Women")
axfc = fig.add_subplot(3,6,15)
axfc.title.set_text("Dept C Women")
axfd = fig.add_subplot(3,6,16)
axfd.title.set_text("Dept D Women")
axfe = fig.add_subplot(3,6,17)
axfe.title.set_text("Dept E Women")
axff = fig.add_subplot(3,6,18)
axff.title.set_text("Dept F Women")
axm.pie(male_ratio, labels=labels, autopct='%1.1f%%')
axf.pie(female_ratio, labels=labels, autopct='%1.1f%%')
axma.pie(ma, labels=labels, autopct='%1.1f%%')
axmb.pie(mb, labels=labels, autopct='%1.1f%%')
axmc.pie(mc, labels=labels, autopct='%1.1f%%')
axmd.pie(md, labels=labels, autopct='%1.1f%%')
axme.pie(me, labels=labels, autopct='%1.1f%%')
axmf.pie(mf, labels=labels, autopct='%1.1f%%')
axfa.pie(fa, labels=labels, autopct='%1.1f%%')
axfb.pie(fb, labels=labels, autopct='%1.1f%%')
axfc.pie(fc, labels=labels, autopct='%1.1f%%')
axfd.pie(fd, labels=labels, autopct='%1.1f%%')
axfe.pie(fe, labels=labels, autopct='%1.1f%%')
axff.pie(ff, labels=labels, autopct='%1.1f%%')
Here is the output that I got: https://i.sstatic.net/4G0mI.jpg
Upvotes: 0
Views: 245
Reputation: 24440
Instead of creating different data frames for each gender you can combine them into one data frame by providing multiple indices. This allows you to use a nested for-loop to create a pie-chart for each department/gender combination, e.g.
import pandas as pd
import matplotlib.pyplot as plt
df = pd.read_csv("ucb.csv")
pivot = df.pivot_table(values="Freq", index = ['Gender', 'Admit'], columns=['Dept'])
# add a column with the total counts over all departments
pivot['total'] = pivot.sum(1)
male_ratio = pivot['total']['Male']
female_ratio = pivot['total']['Female']
males = f"Accepted Vs Rejected ratio for {int(male_ratio.sum())} Men"
females = f"Accepted Vs Rejected ratio for {int(female_ratio.sum())} Women"
labels = "Accepted", "Rejected"
fig = plt.figure(figsize=(18,18))
axm = fig.add_subplot(3,6,1)
axm.title.set_text(males)
axf = fig.add_subplot(3,6,4)
axf.title.set_text(females)
axm.pie(male_ratio, labels=labels, autopct='%1.1f%%')
axf.pie(female_ratio, labels=labels, autopct='%1.1f%%')
offset = 7
for gender, label in zip(("Male", "Female"), ('Men', 'Women')):
for dept in 'ABCDEF':
ax = fig.add_subplot(3, 6, offset)
ax.title.set_text(f'Dept {dept} {label}')
ax.pie(pivot[dept][gender], labels=labels, autopct='%1.1f%%')
offset += 1
Upvotes: 1