maggie
maggie

Reputation: 4395

Seaborn pointplot doesn't plot mean

I like this seaborn example and wanted to apply it on a pandas dataframe using a FacetGrid to compare different scenarios:

df_new = pd.read_json('{"TA":{"229":-30.0,"230":-30.0,"192":23.0,"193":23.0,"248":60.0,"249":60.0,"126":-30.0,"127":-30.0,"88":23.0,"89":23.0,"150":60.0,"151":60.0,"239":-30.0,"240":-30.0,"197":23.0,"198":23.0,"256":60.0,"257":60.0,"135":-30.0,"136":-30.0,"94":23.0,"95":23.0,"164":60.0,"165":60.0,"438":-30.0,"439":-30.0,"291":23.0,"405":23.0,"453":60.0,"454":60.0,"341":-30.0,"342":-30.0,"292":23.0,"293":23.0,"365":60.0,"366":60.0,"445":-30.0,"446":-30.0,"410":23.0,"411":23.0,"462":60.0,"463":60.0,"357":-30.0,"358":-30.0,"297":23.0,"298":23.0,"371":60.0,"372":60.0},"Type":{"229":"A","230":"A","192":"A","193":"A","248":"A","249":"A","126":"P","127":"P","88":"P","89":"P","150":"P","151":"P","239":"A","240":"A","197":"A","198":"A","256":"A","257":"A","135":"P","136":"P","94":"P","95":"P","164":"P","165":"P","438":"A","439":"A","291":"A","405":"A","453":"A","454":"A","341":"P","342":"P","292":"P","293":"P","365":"P","366":"P","445":"A","446":"A","410":"A","411":"A","462":"A","463":"A","357":"P","358":"P","297":"P","298":"P","371":"P","372":"P"},"Value":{"229":57.36232,"230":52.97104,"192":59.82472,"193":56.70568,"248":72.30088,"249":68.56624,"126":71.68528,"127":79.15456,"88":84.1204,"89":82.2736,"150":77.26672,"151":81.00136,"239":70.41304,"240":82.2736,"197":76.03552,"198":83.5048,"256":82.8892,"257":88.51168,"135":89.74288,"136":97.21216,"94":99.1,"95":95.98096,"164":95.98096,"165":96.59656,"438":64.8316,"439":73.53208,"291":107.18488,"405":82.2736,"453":77.26672,"454":86.00824,"341":105.29704,"342":97.21216,"292":108.41608,"293":100.3312,"365":84.77704,"366":88.51168,"445":46.11736,"446":52.35544,"410":62.32816,"411":65.4472,"462":71.06968,"463":74.80432,"357":77.92336,"358":79.15456,"297":94.09312,"298":87.23944,"371":82.2736,"372":98.4844},"Group":{"229":"FA","230":"FA","192":"FA","193":"FA","248":"FA","249":"FA","126":"FA","127":"FA","88":"FA","89":"FA","150":"FA","151":"FA","239":"FB","240":"FB","197":"FB","198":"FB","256":"FB","257":"FB","135":"FB","136":"FB","94":"FB","95":"FB","164":"FB","165":"FB","438":"RB","439":"RB","291":"RB","405":"RB","453":"RB","454":"RB","341":"RB","342":"RB","292":"RB","293":"RB","365":"RB","366":"RB","445":"RC","446":"RC","410":"RC","411":"RC","462":"RC","463":"RC","357":"RC","358":"RC","297":"RC","298":"RC","371":"RC","372":"RC"}}')
g = sns.factorplot(x="Value", y="Type", hue="TA",
                   col="Group", data=df_new, col_wrap=2,
                   kind="strip", dodge=True, jitter=True, alpha=.5)
g = g.map_dataframe(sns.pointplot, x="Value", y="Type", hue="TA",
                    dodge=.532, join=False, palette="dark", markers="d", scale=.75, ci=None)

def myplot(x, y, **kwargs):
    ax = plt.gca()
    data = kwargs.pop("data")
    print(data.shape, "in plotting group", data.iloc[0]['Group'])
    groups = data.groupby([y, 'TA'])
    for label, group_df in groups:
        print("Group label:", label, "Group mean: {:.2f}".format(group_df[x].mean()))

g = g.map_dataframe(myplot, x="Value", y="Type")
g.set_titles(row_template="{row_name}", col_template="{col_name}")

resulting plot

The problem is that the mean value which is provided by seaborn's pointplot is wrong for plotting group RB.

For debugging purpose i added a custom function myplot which just outputs the data of each plotting group and it's mean value:

((12, 4), 'in plotting group', u'FA')
('Group label:', (u'A', -30), 'Group mean: 55.17')
('Group label:', (u'A', 23), 'Group mean: 58.27')
('Group label:', (u'A', 60), 'Group mean: 70.43')
('Group label:', (u'P', -30), 'Group mean: 75.42')
('Group label:', (u'P', 23), 'Group mean: 83.20')
('Group label:', (u'P', 60), 'Group mean: 79.13')
((12, 4), 'in plotting group', u'FB')
('Group label:', (u'A', -30), 'Group mean: 76.34')
('Group label:', (u'A', 23), 'Group mean: 79.77')
('Group label:', (u'A', 60), 'Group mean: 85.70')
('Group label:', (u'P', -30), 'Group mean: 93.48')
('Group label:', (u'P', 23), 'Group mean: 97.54')
('Group label:', (u'P', 60), 'Group mean: 96.29')
((12, 4), 'in plotting group', u'RB')
('Group label:', (u'A', -30), 'Group mean: 69.18')
('Group label:', (u'A', 23), 'Group mean: 94.73')
('Group label:', (u'A', 60), 'Group mean: 81.64')
('Group label:', (u'P', -30), 'Group mean: 101.25')
('Group label:', (u'P', 23), 'Group mean: 104.37')
('Group label:', (u'P', 60), 'Group mean: 86.64')
((12, 4), 'in plotting group', u'RC')
('Group label:', (u'A', -30), 'Group mean: 49.24')
('Group label:', (u'A', 23), 'Group mean: 63.89')
('Group label:', (u'A', 60), 'Group mean: 72.94')
('Group label:', (u'P', -30), 'Group mean: 78.54')
('Group label:', (u'P', 23), 'Group mean: 90.67')
('Group label:', (u'P', 60), 'Group mean: 90.38')

So what i see here is that the mean value calculated does not correspond to the one of the pointplot. Is my calculation wrong? Did i set the wrong parameters to the plotting function?

Upvotes: 1

Views: 2092

Answers (1)

ImportanceOfBeingErnest
ImportanceOfBeingErnest

Reputation: 339120

As can be seen the means of the "P" and "A" Type are interchanged in the lower left subplot.

While the factorplot itself makes sure to have the same ordering accross its subplots, the mapped pointplot does not know about this order.

To make sure the same order is used everywhere you need to supply this order to the plotting functions.

g = sns.factorplot(..., order=["A","P"])
g.map_dataframe(sns.pointplot, ..., order=["A","P"]))

To be on the save side, hue_order could be specified as well, hue_order=[-30,23,60].

Complete example:

import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

df_new = pd.read_json('{"TA":{"229":-30.0,"230":-30.0,"192":23.0,"193":23.0,"248":60.0,"249":60.0,"126":-30.0,"127":-30.0,"88":23.0,"89":23.0,"150":60.0,"151":60.0,"239":-30.0,"240":-30.0,"197":23.0,"198":23.0,"256":60.0,"257":60.0,"135":-30.0,"136":-30.0,"94":23.0,"95":23.0,"164":60.0,"165":60.0,"438":-30.0,"439":-30.0,"291":23.0,"405":23.0,"453":60.0,"454":60.0,"341":-30.0,"342":-30.0,"292":23.0,"293":23.0,"365":60.0,"366":60.0,"445":-30.0,"446":-30.0,"410":23.0,"411":23.0,"462":60.0,"463":60.0,"357":-30.0,"358":-30.0,"297":23.0,"298":23.0,"371":60.0,"372":60.0},"Type":{"229":"A","230":"A","192":"A","193":"A","248":"A","249":"A","126":"P","127":"P","88":"P","89":"P","150":"P","151":"P","239":"A","240":"A","197":"A","198":"A","256":"A","257":"A","135":"P","136":"P","94":"P","95":"P","164":"P","165":"P","438":"A","439":"A","291":"A","405":"A","453":"A","454":"A","341":"P","342":"P","292":"P","293":"P","365":"P","366":"P","445":"A","446":"A","410":"A","411":"A","462":"A","463":"A","357":"P","358":"P","297":"P","298":"P","371":"P","372":"P"},"Value":{"229":57.36232,"230":52.97104,"192":59.82472,"193":56.70568,"248":72.30088,"249":68.56624,"126":71.68528,"127":79.15456,"88":84.1204,"89":82.2736,"150":77.26672,"151":81.00136,"239":70.41304,"240":82.2736,"197":76.03552,"198":83.5048,"256":82.8892,"257":88.51168,"135":89.74288,"136":97.21216,"94":99.1,"95":95.98096,"164":95.98096,"165":96.59656,"438":64.8316,"439":73.53208,"291":107.18488,"405":82.2736,"453":77.26672,"454":86.00824,"341":105.29704,"342":97.21216,"292":108.41608,"293":100.3312,"365":84.77704,"366":88.51168,"445":46.11736,"446":52.35544,"410":62.32816,"411":65.4472,"462":71.06968,"463":74.80432,"357":77.92336,"358":79.15456,"297":94.09312,"298":87.23944,"371":82.2736,"372":98.4844},"Group":{"229":"FA","230":"FA","192":"FA","193":"FA","248":"FA","249":"FA","126":"FA","127":"FA","88":"FA","89":"FA","150":"FA","151":"FA","239":"FB","240":"FB","197":"FB","198":"FB","256":"FB","257":"FB","135":"FB","136":"FB","94":"FB","95":"FB","164":"FB","165":"FB","438":"RB","439":"RB","291":"RB","405":"RB","453":"RB","454":"RB","341":"RB","342":"RB","292":"RB","293":"RB","365":"RB","366":"RB","445":"RC","446":"RC","410":"RC","411":"RC","462":"RC","463":"RC","357":"RC","358":"RC","297":"RC","298":"RC","371":"RC","372":"RC"}}')

g = sns.factorplot(x="Value", y="Type", hue="TA",
                   col="Group", data=df_new, col_wrap=2,
                   kind="strip", dodge=True, palette="dark",jitter=True, alpha=.5, 
                   order=["A","P"], hue_order=[-30,23,60])
g = g.map_dataframe(sns.pointplot, x="Value", y="Type", hue="TA", 
                    order=["A","P"], hue_order=[-30,23,60],
                    dodge=.532, join=False, palette="dark", markers="d", scale=.75, ci=None)

g.set_titles(row_template="{row_name}", col_template="{col_name}")

plt.show()

enter image description here

Upvotes: 1

Related Questions