Reputation: 11
I had Facet Grids and managed to make them horizontal, however after that I have searched for an appropriate code to display the percentages as before but what I found just isn't working as expected.
import seaborn as sns
import pandas as pd
import numpy as np
data = {
'id': [1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 14, 14, 15, 15, 16, 16, 17, 17, 18, 18, 19, 19, 20, 20],
'survey': ['baseline', 'endline', 'baseline', 'endline', 'baseline', 'endline', 'baseline', 'endline', 'baseline', 'endline', 'baseline', 'endline', 'baseline', 'endline', 'baseline', 'endline', 'baseline', 'endline', 'baseline', 'endline', 'baseline', 'endline', 'baseline', 'endline', 'baseline', 'endline', 'baseline', 'endline', 'baseline', 'endline', 'baseline', 'endline', 'baseline', 'endline', 'baseline', 'endline', 'baseline', 'endline', 'baseline', 'endline'],
'level': ['low', 'high', 'medium', 'low', 'high', 'medium', 'medium', 'high', 'low', 'low', 'medium', 'high', 'low', 'medium', 'low', 'high', 'low', 'low', 'medium', 'high', 'high', 'high', 'high', 'medium', 'low', 'low', 'medium', 'high', 'low', 'medium', 'high', 'medium', 'low', 'high', 'high', 'medium', 'medium', 'low', 'high', 'low']
}
df = pd.DataFrame(data)
df.survey.value_counts()
# Prep data
df = (
df.groupby(["survey", "level"])
.count()
.sort_index(ascending=False)
.rename(columns = {"id": "count"})
)
# Add percentages
pcts = []
for idx in ["endline", "baseline"]:
pcts = (
pcts + (df.loc[(idx,), "count"] * 100 / df.loc[(idx,), "count"].sum()).tolist()
)
df["pct"] = pcts
df = df.reset_index()
df
g = sns.FacetGrid(df, col = "survey")
g.map_dataframe(sns.barplot,'pct', 'level', plt.margins(x = 0.5), hue = 'level', palette = 'Set1', orient = 'h')
g.set_axis_labels('Percentage', 'level')
def annotate_bars(ax=None, fmt='.2f', **kwargs):
ax = plt.gca() if ax is None else ax
total = len(df['pct'])
for p in ax.patches:
percentage = '{:.1f}%'.format(100 * p.get_width()/total)
x = p.get_x() + p.get_width() + 0.02
y = p.get_y() + p.get_height()/2
ax.annotate(percentage, (x, y))
g.map(annotate_bars, fmt='.2g', fontsize=8, color='k')
g.set_titles(col_template = '{col_name} survey')
Upvotes: 1
Views: 875
Reputation: 62373
seaborn.FacetGrid
documentation, use figure-level functions like sns.catplot
.matplotlib.pyplot.bar_label
, which is thoroughly explained in this answer.
.get_width
is used instead of .get_height
, when creating custom labels.pandas.crosstab
to calculate percentages.
normalize='index'
for each group in the 'Survey'
column.normalize='all'
to normalize over all values.df
, this reduces your code from 27 lines to 6 lines.python 3.11
, pandas 1.5.2
, matplotlib 3.6.2
, seaborn 0.12.1
# beginning with the initial dataframe
df = pd.DataFrame(data)
# calculate the percentages for each group in the survey column
percent = pd.crosstab(df.survey, df.level, normalize='index').mul(100).melt(ignore_index=False).reset_index()
# plot the bars
g = sns.catplot(kind='bar', data=percent, x='value', y='level', col='survey', aspect=1.3)
# iterate through each Facet (axes)
for ax in g.axes.ravel():
# iterate through the containers (bars)
for c in ax.containers:
# customize labels to not include values below a specific number
labels = [f'{w:0.1f}%' if (w := v.get_width()) > 0 else '' for v in c]
# add the labels
ax.bar_label(c, labels=labels)
labels
for .bar_label
. Instead use the fmt=
parameter.# iterate through each Facet (axes)
for ax in g.axes.ravel():
# iterate through the containers
for c in ax.containers:
# add the labels
ax.bar_label(c, fmt='%0.1f%%')
percent
survey level value
0 baseline high 30.0
1 endline high 40.0
2 baseline low 40.0
3 endline low 30.0
4 baseline medium 30.0
5 endline medium 30.0
df
directlydf = pd.DataFrame(data)
percent = pd.crosstab(df.survey, df.level, normalize='index').mul(100)
ax = percent.plot(kind='barh', figsize=(8, 6))
for c in ax.containers:
ax.bar_label(c, fmt='%0.1f%%')
ax.margins(x=0.1)
Upvotes: 1
Reputation: 37737
You're close, you just need to compute the sum/2
of % in df['pct']
rather than its length.
def annotate_bars(ax=None, fmt='.2f', **kwargs):
ax = plt.gca() if ax is None else ax
total = df['pct'].sum()/2 # <- this is the line you need to modify
for p in ax.patches:
percentage = '{:.1f}%'.format(100 * p.get_width()/total)
x = p.get_x() + p.get_width() + 0.02
y = p.get_y() + p.get_height()/2
ax.annotate(percentage, (x, y))
g.map(annotate_bars, fmt='.2g', fontsize=8, color='k')
g.set_titles(col_template = '{col_name} survey')
Output :
Upvotes: 1