yanachen
yanachen

Reputation: 3753

How to prevent an overlap in barplot using seaborn?

month   cate_cnt1_members   cate_cnt2_members   cate_cnt3_members   cate_cnt4_members   cate_cnt5_members   cate_cnt6_members   cate_cnt7_members   cate_cnt8_members   cate_cnt9_members   cate_cnt10_members  cate_cnt11_members  cate_cnt12_members  cate_cnt13_members  cate_cnt14_members
201501  93.525692   5.989799    0.455098    0.027863    0.001548    0.000000    0.000000    0.000000    0.000000    0.000000    0.000000    0.000000    0.000000    0.0
201502  90.515995   8.396707    0.971026    0.107892    0.008380    0.000000    0.000000    0.000000    0.000000    0.000000    0.000000    0.000000    0.000000    0.0
201503  82.525162   14.066414   2.836065    0.505229    0.061750    0.005380    0.000000    0.000000    0.000000    0.000000    0.000000    0.000000    0.000000    0.0
201504  75.546295   18.279664   4.884050    1.102780    0.172282    0.013621    0.001199    0.000109    0.000000    0.000000    0.000000    0.000000    0.000000    0.0
201505  71.142107   20.954861   6.278794    1.401423    0.206386    0.015837    0.000593    0.000000    0.000000    0.000000    0.000000    0.000000    0.000000    0.0
201506  63.783161   23.386509   9.241094    2.914457    0.601408    0.067921    0.005178    0.000273    0.000000    0.000000    0.000000    0.000000    0.000000    0.0
201507  62.361179   23.364693   9.888232    3.445630    0.812055    0.116408    0.010563    0.001240    0.000000    0.000000    0.000000    0.000000    0.000000    0.0

The pandas dataframe is showed above, which shows the different category's percentages that vary from month to month. I want to use seaborn to get a bar plot whose bar consists of the 14 category members' percentage from column 2 to the end. Here is my code:

flatui = ["#9b59b6", "#3498db", "#95a5a6", "#e74c3c", "#34495e", "#2ecc71"]
f, ax = plt.subplots(figsize=(6, 15))
sns.barplot(x = df['month'], y = df['cate_cnt1_members'], label='cate_cnt1_members',   color=sns.color_palette("Set2", 10)[0])
sns.barplot(x = df['month'], y = df['cate_cnt2_members'], label='cate_cnt2_members',   color=sns.color_palette("Set2", 10)[1])
sns.barplot(x = df['month'], y = df['cate_cnt3_members'], label='cate_cnt3_members',   color=sns.color_palette("Set2", 10)[2])
sns.barplot(x = df['month'], y = df['cate_cnt4_members'], label='cate_cnt4_members',   color=sns.color_palette("Set2", 10)[3])
sns.barplot(x = df['month'], y = df['cate_cnt5_members'], label='cate_cnt5_members',   color=sns.color_palette("Set2", 10)[4])
sns.barplot(x = df['month'], y = df['cate_cnt6_members'], label='cate_cnt6_members',   color=sns.color_palette("Set2", 10)[5])
sns.barplot(x = df['month'], y = df['cate_cnt7_members'], label='cate_cnt7_members',   color=sns.color_palette("Set2", 10)[6])
sns.barplot(x = df['month'], y = df['cate_cnt8_members'], label='cate_cnt8_members',   color=sns.color_palette("Set2", 10)[7])
sns.barplot(x = df['month'], y = df['cate_cnt9_members'], label='cate_cnt9_members',   color=sns.color_palette("Set2", 10)[8])
sns.barplot(x = df['month'], y = df['cate_cnt10_members'], label='cate_cnt10_members', color=sns.color_palette("Set2", 10)[9])
sns.barplot(x = df['month'], y = df['cate_cnt11_members'], label='cate_cnt11_members', color=sns.color_palette("Paired")[0])
sns.barplot(x = df['month'], y = df['cate_cnt12_members'], label='cate_cnt12_members', color=sns.color_palette("Paired")[1])
sns.barplot(x = df['month'], y = df['cate_cnt13_members'], label='cate_cnt13_members', color=sns.color_palette("Paired")[4])
sns.barplot(x = df['month'], y = df['cate_cnt14_members'], label='cate_cnt14_members', color=sns.color_palette(flatui)[0])
plt.ylabel("percentage of category scope count")
plt.xlabel(" Month")
ax.legend(ncol=7, loc="topper middle", frameon=True)
sns.despine(left=True, bottom=True)

And the result is below. But I don't want them to overlap with each other. I want the 14 components sum as 100 and fill the 100 fully. So how can I achieve this? enter image description here

Upvotes: 2

Views: 5077

Answers (2)

Ted Petrou
Ted Petrou

Reputation: 61967

There is a trivially easy way to do this in pandas. First you must set the index as month and then just create a stacked bar plot

df = df.set_index('month')
df.plot.bar(stacked=True)

To do this in seaborn is a little trickier. You must take the cumulative sum of each row and then plot that

# set the index if you haven't
df = df.set_index('month')
df = df.cumsum(axis=1)

And then some small adjustments to your original code. Plot then in reverse order so the 100% bars plot first.

flatui = ["#9b59b6", "#3498db", "#95a5a6", "#e74c3c", "#34495e", "#2ecc71"]
f, ax = plt.subplots(figsize=(10, 15))
sns.barplot(x = df.index, y = df['cate_cnt14_members'], label='cate_cnt14_members',   color=sns.color_palette("Set2", 10)[0])
sns.barplot(x = df.index, y = df['cate_cnt13_members'], label='cate_cnt13_members',   color=sns.color_palette("Set2", 10)[1])
sns.barplot(x = df.index, y = df['cate_cnt12_members'], label='cate_cnt12_members',   color=sns.color_palette("Set2", 10)[2])
sns.barplot(x = df.index, y = df['cate_cnt11_members'], label='cate_cnt11_members',   color=sns.color_palette("Set2", 10)[3])
sns.barplot(x = df.index, y = df['cate_cnt10_members'], label='cate_cnt10_members',   color=sns.color_palette("Set2", 10)[4])
sns.barplot(x = df.index, y = df['cate_cnt9_members'], label='cate_cnt9_members',   color=sns.color_palette("Set2", 10)[5])
sns.barplot(x = df.index, y = df['cate_cnt8_members'], label='cate_cnt8_members',   color=sns.color_palette("Set2", 10)[6])
sns.barplot(x = df.index, y = df['cate_cnt7_members'], label='cate_cnt7_members',   color=sns.color_palette("Set2", 10)[7])
sns.barplot(x = df.index, y = df['cate_cnt6_members'], label='cate_cnt6_members',   color=sns.color_palette("Set2", 10)[8])
sns.barplot(x = df.index, y = df['cate_cnt5_members'], label='cate_cnt5_members', color=sns.color_palette("Set2", 10)[9])
sns.barplot(x = df.index, y = df['cate_cnt4_members'], label='cate_cnt4_members', color=sns.color_palette("Paired")[0])
sns.barplot(x = df.index, y = df['cate_cnt3_members'], label='cate_cnt3_members', color=sns.color_palette("Paired")[1])
sns.barplot(x = df.index, y = df['cate_cnt2_members'], label='cate_cnt2_members', color=sns.color_palette("Paired")[4])
sns.barplot(x = df.index, y = df['cate_cnt1_members'], label='cate_cnt1_members', color=sns.color_palette(flatui)[0])
plt.ylabel("percentage of category scope count")
plt.xlabel(" Month")
ax.legend(ncol=7, loc="upper center", frameon=True)
sns.despine(left=True, bottom=True)

enter image description here

Upvotes: 3

Parfait
Parfait

Reputation: 107587

Consider melting your data from wide to long, then run a pivot table as source of a stacked bar graph:

from io import StringIO
import pandas as pd
from matplotlib import rc, pyplot as plt
import seaborn

data = """month,cate_cnt1_members,cate_cnt2_members,cate_cnt3_members,cate_cnt4_members,cate_cnt5_members,cate_cnt6_members,cate_cnt7_members,cate_cnt8_members,cate_cnt9_members,cate_cnt10_members,cate_cnt11_members,cate_cnt12_members,cate_cnt13_members,cate_cnt14_members
201501,93.525692,5.989799,0.455098,0.027863,0.001548,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0
201502,90.515995,8.396707,0.971026,0.107892,0.008380,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0
201503,82.525162,14.066414,2.836065,0.505229,0.061750,0.005380,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0
201504,75.546295,18.279664,4.884050,1.102780,0.172282,0.013621,0.001199,0.000109,0.000000,0.000000,0.000000,0.000000,0.000000,0.0
201505,71.142107,20.954861,6.278794,1.401423,0.206386,0.015837,0.000593,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0
201506,63.783161,23.386509,9.241094,2.914457,0.601408,0.067921,0.005178,0.000273,0.000000,0.000000,0.000000,0.000000,0.000000,0.0"""

df = pd.read_csv(StringIO(data))

dfm = pd.melt(df, id_vars="month")

seaborn.set()

dfm.pivot_table(values="value", columns="variable", index="month", aggfunc='sum').plot.bar(stacked=True)
locs, labels = plt.xticks()
plt.legend(loc='upper center', ncol=7, frameon=True, shadow=False, prop={'size':8})
plt.setp(labels, rotation=0, rotation_mode="anchor", ha="center")
plt.show()

Stacked Bar Graph

Upvotes: 1

Related Questions