Reputation: 5559
Perhaps some of you can help me with the following graph...
I have a dataframe containing the survey data of people who traveled during the last year (Yes
, No
) and if Yes
which transporation they used (Airplane
, Car
, Train
)
import pandas as pd
import numpy as np
data = {'Travel': ['Yes', 'No', 'Yes', 'Yes', 'Yes', 'Yes', 'No'],
'Transporation': ['Airplaine', np.nan, 'Car', 'Train', 'Train', 'Car', 'np.nan']
}
df = pd.DataFrame (data, columns = ['Travel','Transporation'])
Travel Transporation
0 Yes Airplaine
1 No NaN
2 Yes Car
3 Yes Train
4 Yes Train
5 Yes Car
6 No NaN
I plot the countplot of the first question and add the relative percentage of respondents who answered Yes
and No
.
import seaborn as sns
ax = sns.countplot(y='Travel', data=df, palette=['green',"red"])
ax.set_yticklabels(ax.get_yticklabels(), rotation=45)
ax.set_title('Travel last year')
ax.set_ylabel('')
total = df.shape[0]
for p in ax.patches:
percentage = '{:.1f}%'.format(100 * p.get_width()/total)
x = p.get_x() + p.get_width()# / 2 - 0.05
y = p.get_y() + p.get_height() / 2 - 0.05
ax.annotate(percentage, (x, y), size = 12)
plt.show()
In the same graph, I would like to make the bar indicating the Yes
a stacked bar indicating which transportation the people who answered yes used.
The final graph should be something like this:
Upvotes: 2
Views: 1097
Reputation: 12496
The simpliest way I know is to group the pandas dataframe as:
df_plot = df.fillna('_Hidden').replace('np.nan', '_Hidden').groupby(['Travel', 'Transporation']).size().reset_index().pivot(columns = 'Transporation', index = 'Travel', values = 0)
Then you can plot with:
ax = df_plot.plot(kind = 'barh', stacked = True)
Finally you can add the percentages:
total = df.shape[0]
yes = len(df[df['Travel'] == 'Yes'])/total
no = len(df[df['Travel'] == 'No'])/total
for p in ax.patches:
width, height = p.get_width(), p.get_height()
x, y = p.get_xy()
x = x + width
y = y + height / 2 - 0.05
if x/total == yes:
ax.annotate(f'{round(100*yes, 1)}%', (x, y), size = 12)
if x/total == no:
ax.annotate(f'{round(100*no, 1)}%', (x, y), size = 12)
if width != 0:
x, y = p.get_xy()
if y > 0:
ax.text(x + width/2,
y + height/2,
'{:.0f} %'.format(100*width/(yes*total)),
horizontalalignment = 'center',
verticalalignment = 'center')
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
data = {'Travel': ['Yes', 'No', 'Yes', 'Yes', 'Yes', 'Yes', 'No'],
'Transporation': ['Airplaine', np.nan, 'Car', 'Train', 'Train', 'Car', 'np.nan']}
df = pd.DataFrame (data, columns = ['Travel','Transporation'])
df_plot = df.fillna('_Hidden').replace('np.nan', '_Hidden').groupby(['Travel', 'Transporation']).size().reset_index().pivot(columns = 'Transporation', index = 'Travel', values = 0)
ax = df_plot.plot(kind = 'barh', stacked = True)
ax.legend(['Airplaine', 'Car', 'Train'])
ax.set_yticklabels(ax.get_yticklabels(), rotation = 45)
ax.set_title('Travel last year')
ax.set_ylabel('')
total = df.shape[0]
yes = len(df[df['Travel'] == 'Yes'])/total
no = len(df[df['Travel'] == 'No'])/total
for p in ax.patches:
width, height = p.get_width(), p.get_height()
x, y = p.get_xy()
x = x + width
y = y + height / 2 - 0.05
if x/total == yes:
ax.annotate(f'{round(100*yes, 1)}%', (x, y), size = 12)
if x/total == no:
ax.annotate(f'{round(100*no, 1)}%', (x, y), size = 12)
if width != 0:
x, y = p.get_xy()
if y > 0:
ax.text(x + width/2,
y + height/2,
'{:.0f} %'.format(100*width/(yes*total)),
horizontalalignment = 'center',
verticalalignment = 'center')
plt.show()
Upvotes: 3