patronlargibi
patronlargibi

Reputation: 115

count and percentages of hue for each group seaborn countplot

I created countplot with psudo-data below. I have percentage of each bar of graph. However, I want to write percentage of hue for each answer group.

Pseudo-data:

import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import random

lo = 0
hi = 10
size = 256
random.seed(1)

answer = [random.randint(lo, hi) for _ in range(size)]
sex = [random.randint(0, 1) for _ in range(size)]

data = {'sex': sex, 'answer': answer} 
df = pd.DataFrame(data)

di = {0:'A',1:'B',2:'C',3:'D',4:'E',5:'F',
      6:'G',7:'H',8:'I',9:'J',10:'K'}
df = df.replace({'answer': di})

di = {0:'Male',1:'Female'}
df = df.replace({'sex': di})
df = df.sort_values(by=['answer','sex'])

#See count of groups:
pd.pivot_table(xx,
              index='answer',
              columns='sex',
              aggfunc='size')

The graphic I tried here too:

#fig, ax = plt.subplots()

total = float(df.shape[0])

sns.set(rc={'figure.figsize':(22,10)})

ax = sns.countplot(y="answer", hue="sex", data=df)

# percentage of bars
for i in ax.patches:
    # get_width pulls left or right; get_y pushes up or down
    ax.text(i.get_width()+.12, i.get_y()+.3, \
            '%' + str(round((i.get_width()/total)*100, 1)), fontsize=15,
            color='dimgrey')
    
ax.set_ylabel('Answers',fontsize=20)
ax.set_xlabel('Count',fontsize=20)
ax.tick_params(axis='x', which='major', labelsize=20)
ax.tick_params(axis='y', which='major', labelsize=20)

plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left', borderaxespad=0.,
          prop={'size': 14})

ax.set_title("""
XXXX
""", fontsize=20,loc='left')

#plt.savefig('test.png', dpi=400,bbox_inches='tight')

Here is final visual; Note: I have black circles values, but I want to red ones. enter image description here

Upvotes: 1

Views: 1797

Answers (2)

patronlargibi
patronlargibi

Reputation: 115

I configured r-beginners's solution for more than 2 groups of hue.

answer = [random.randint(lo, hi) for _ in range(size)]
group = [random.randint(0, 2) for _ in range(size)]
data = {'group': group, 'answer': answer} 

df = pd.DataFrame(data)

di = {0:'A',1:'B',2:'C',3:'D',4:'E',5:'F',
      6:'G',7:'H',8:'I',9:'J',10:'K'}
df = df.replace({'answer': di})

di = {0:'G1',1:'G2',2:'G3'}
df = df.replace({'group': di})
df = df.sort_values(by=['answer','group'])

#See count of groups:
df1 = pd.pivot_table(df,
              index='answer',
              columns='group',
              aggfunc='size')
df1['ratio1'] = df1['G1'] / (df1['G1'] + df1['G2'] + df1['G3'])
df1['ratio2'] = df1['G2'] / (df1['G1'] + df1['G2'] + df1['G3'])
df1['ratio3'] = df1['G3'] / (df1['G1'] + df1['G2'] + df1['G3'])

## Graph code:

total = float(df.shape[0])

sns.set(rc={'figure.figsize':(22,10)})

ax = sns.countplot(y="answer", hue="group", data=df)

idx = df1.index.to_list()
n = 0
k = 0
l = 0
# percentage of bars
for i in ax.patches:
    # get_width pulls left or right; get_y pushes up or down
    #ax.text(i.get_width()+.12, i.get_y()+.3, '%' + str(round((i.get_width()/total)*100, 1)), fontsize=15, color='dimgrey')
    if df1.shape[0]-1 <= 10:
        ax.text(i.get_width()+.82, i.get_y()+.3, '%' + str(round(df1.loc[idx[n],'ratio1']*100, 1)),
                fontsize=15, color='r')
        n += 1
    elif (n >= df1.shape[0] and n<=2*df1.shape[0]-1):
        ax.text(i.get_width()+.82, i.get_y()+.3, '%' + str(round(df1.loc[idx[k],'ratio2']*100, 1)),
                fontsize=15, color='g')
        n += 1
        k += 1
    elif n >= 2*df1.shape[0]:
        ax.text(i.get_width()+.82, i.get_y()+.3, '%' + str(round(df1.loc[idx[l],'ratio3']*100, 1)),
                fontsize=15, color='b')
        n += 1
        k += 1
        l += 1

ax.set_ylabel('Answers',fontsize=20)
ax.set_xlabel('Count',fontsize=20)
ax.tick_params(axis='x', which='major', labelsize=20)
ax.tick_params(axis='y', which='major', labelsize=20)

plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left', borderaxespad=0.,
          prop={'size': 14})

If you have any suitable solution, I'd be happy to hear. enter image description here

Upvotes: 1

r-beginners
r-beginners

Reputation: 35265

Adds a column of configuration ratios to the original data frame. Add the text to the value of that added column in a looping process. You'll have to be creative because the count of Patches loops 22 times, while the index of the composition ratio column is 11 lines, so we need a conditional branch.

#See count of groups:
df1 = pd.pivot_table(df,
              index='answer',
              columns='sex',
              aggfunc='size')
df1['ratio'] = df1['Female'] / (df1['Female'] + df1['Male'])
df1
sex Female  Male    ratio
answer          
A   9   20  0.310345
B   8   10  0.444444
C   10  9   0.526316
D   13  11  0.541667
E   11  12  0.478261
F   7   10  0.411765
G   16  10  0.615385
H   15  9   0.625000
I   18  14  0.562500
J   9   13  0.409091
K   8   14  0.363636

import matplotlib.pyplot as plt
import seaborn as sns

total = float(df.shape[0])

sns.set(rc={'figure.figsize':(22,10)})

ax = sns.countplot(y="answer", hue="sex", data=df)

idx = df1.index.to_list()
n = 0
k = 0
# percentage of bars
for i in ax.patches:
    # get_width pulls left or right; get_y pushes up or down
    ax.text(i.get_width()+.12, i.get_y()+.3, '%' + str(round((i.get_width()/total)*100, 1)), fontsize=15, color='dimgrey')
    if n <= 10:
        ax.text(i.get_width()+.82, i.get_y()+.3, '%' + str(round(df1.loc[idx[n],'ratio']*100, 1)), fontsize=15, color='r')
        n += 1
    elif n >= 11:
        ax.text(i.get_width()+.82, i.get_y()+.3, '%' + str(round(100-df1.loc[idx[k],'ratio']*100, 1)), fontsize=15, color='r')
        n += 1
        k += 1

ax.set_ylabel('Answers',fontsize=20)
ax.set_xlabel('Count',fontsize=20)
ax.tick_params(axis='x', which='major', labelsize=20)
ax.tick_params(axis='y', which='major', labelsize=20)

plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left', borderaxespad=0.,
          prop={'size': 14})

enter image description here

Upvotes: 1

Related Questions