Reputation: 308
I have the following Pandas code.
import pandas as pd
DATA=pd.DataFrame({'Sex': ["M", "M", "F", "F","F","F","M"], 'Support': ['N', 'Y', 'Y', 'N',"Y","Y","N"]})
data=DATA.groupby(['Support','Sex']).size().plot(kind='bar')
I want it to output bar plot like the following
Notice that there are two groups: "No" and "Yes" and the bars have no spaces in between them. Finally colored. My code produces spaces and nasty x-labels. Any help would be appreciated. Thank you.
Upvotes: 2
Views: 900
Reputation: 18306
Something like:
from operator import methodcaller
import matplotlib.pyplot as plt
# replace the letters with words
df = df.replace({"M": "Male", "F": "Female", "N": "No", "Y": "Yes"})
# turn df into desired format for groups
table = df.pivot_table(index="Support", columns="Sex", aggfunc="size")
# real plotting
fig_size = (16, 9)
ax = table.plot(kind="bar", figsize=fig_size)
# sort the rectangles (bars) from left to right
rectangles_sorted = sorted(ax.patches, key=methodcaller("get_x"))
# flatten the table to form labels later on
flattened_table = list(table.stack().to_dict().items())
# alternating colors of bars
colors = ["red", "blue"]
# for each rectangle...
for j, rectangle in enumerate(rectangles_sorted):
# choose a color & set it
color = colors[j % 2]
rectangle.set_color(color)
# generate a label
(support, gender), val = flattened_table[j]
label = f"{gender}\n{val}"
# put the label on the plot
r_width = rectangle.get_width()
r_height = rectangle.get_height()
r_xpos = rectangle.get_x()
ax.text(r_xpos + r_width / 2, r_height + 0.01, label,
ha="center", va="bottom", size=20)
# some aesthetics
ax.set_xticklabels(ax.get_xticklabels(), size=20, rotation=0)
ax.set_xlabel("")
ax.get_yaxis().set_visible(False)
ax.legend(fontsize=20)
plt.box(False)
I get:
Upvotes: 1
Reputation: 117
You can simplify the plotting with plotly.express.
import pandas as pd
import plotly.express as px #pip install plotly
DATA=pd.DataFrame({'Sex': ["M", "M", "F", "F","F","F","M"], 'Support': ['N', 'Y', 'Y', 'N',"Y","Y","N"]})
DATA['COUNT'] = 1
#Creating a DataFrame with count info for plot
df_plot = pd.DataFrame(columns = ('Sex', 'Support', 'Count'))
idx = 0
sex_dic = {} #save which sex you already counted
for sex in DATA.Sex:
if sex in sex_dic:
pass
elif sex not in sex_dic:
sex_dic[sex] = 'counted'
#filter the uncounted Sex
df_step = DATA.set_index('Sex').filter(like = sex, axis = 0)
support_dic = {} #save wihch stat you already counted
for stat in DATA.Support:
if stat in support_dic:
pass
if stat not in support_dic:
support_dic[stat] = 'counted'
#filter the uncounted stat for the uncounted sex
df_step2 = df_step.set_index('Support').filter(like = stat, axis = 0)
#Here I add the new row to the DataFrame created for plotting
df_plot.loc[idx] = [sex, stat, len(df_step2)] #the lenght of the double filtered df is the number of times the combination [SEX,STAT] appears, i.e., the height of the each bar
idx += 1
df_plot.replace({'M':'Male', 'F':'Female'}, inplace = True) #Just set the text for each color
#plotting with plotly.express is pretty easy. Just say you want bars colered by SEX, arranged by SUPPORT.
fig = px.bar(df_plot, x = df_plot.Support, y = df_plot.Count, color = df_plot.Sex, barmode = 'group') #barmode = 'group' is to make different colors appear side by side
fig.update_layout(xaxis = dict(tickvals = ['Y', 'N'], ticktext = ['Yes', 'No'])) #update the xaxis so the text show YES and NO
fig.write_html('answer.html') #save as an interactive html, of which you can save a png.
Upvotes: 1