Reputation: 4395
I like this seaborn example and wanted to apply it on a pandas dataframe using a FacetGrid to compare different scenarios:
df_new = pd.read_json('{"TA":{"229":-30.0,"230":-30.0,"192":23.0,"193":23.0,"248":60.0,"249":60.0,"126":-30.0,"127":-30.0,"88":23.0,"89":23.0,"150":60.0,"151":60.0,"239":-30.0,"240":-30.0,"197":23.0,"198":23.0,"256":60.0,"257":60.0,"135":-30.0,"136":-30.0,"94":23.0,"95":23.0,"164":60.0,"165":60.0,"438":-30.0,"439":-30.0,"291":23.0,"405":23.0,"453":60.0,"454":60.0,"341":-30.0,"342":-30.0,"292":23.0,"293":23.0,"365":60.0,"366":60.0,"445":-30.0,"446":-30.0,"410":23.0,"411":23.0,"462":60.0,"463":60.0,"357":-30.0,"358":-30.0,"297":23.0,"298":23.0,"371":60.0,"372":60.0},"Type":{"229":"A","230":"A","192":"A","193":"A","248":"A","249":"A","126":"P","127":"P","88":"P","89":"P","150":"P","151":"P","239":"A","240":"A","197":"A","198":"A","256":"A","257":"A","135":"P","136":"P","94":"P","95":"P","164":"P","165":"P","438":"A","439":"A","291":"A","405":"A","453":"A","454":"A","341":"P","342":"P","292":"P","293":"P","365":"P","366":"P","445":"A","446":"A","410":"A","411":"A","462":"A","463":"A","357":"P","358":"P","297":"P","298":"P","371":"P","372":"P"},"Value":{"229":57.36232,"230":52.97104,"192":59.82472,"193":56.70568,"248":72.30088,"249":68.56624,"126":71.68528,"127":79.15456,"88":84.1204,"89":82.2736,"150":77.26672,"151":81.00136,"239":70.41304,"240":82.2736,"197":76.03552,"198":83.5048,"256":82.8892,"257":88.51168,"135":89.74288,"136":97.21216,"94":99.1,"95":95.98096,"164":95.98096,"165":96.59656,"438":64.8316,"439":73.53208,"291":107.18488,"405":82.2736,"453":77.26672,"454":86.00824,"341":105.29704,"342":97.21216,"292":108.41608,"293":100.3312,"365":84.77704,"366":88.51168,"445":46.11736,"446":52.35544,"410":62.32816,"411":65.4472,"462":71.06968,"463":74.80432,"357":77.92336,"358":79.15456,"297":94.09312,"298":87.23944,"371":82.2736,"372":98.4844},"Group":{"229":"FA","230":"FA","192":"FA","193":"FA","248":"FA","249":"FA","126":"FA","127":"FA","88":"FA","89":"FA","150":"FA","151":"FA","239":"FB","240":"FB","197":"FB","198":"FB","256":"FB","257":"FB","135":"FB","136":"FB","94":"FB","95":"FB","164":"FB","165":"FB","438":"RB","439":"RB","291":"RB","405":"RB","453":"RB","454":"RB","341":"RB","342":"RB","292":"RB","293":"RB","365":"RB","366":"RB","445":"RC","446":"RC","410":"RC","411":"RC","462":"RC","463":"RC","357":"RC","358":"RC","297":"RC","298":"RC","371":"RC","372":"RC"}}')
g = sns.factorplot(x="Value", y="Type", hue="TA",
col="Group", data=df_new, col_wrap=2,
kind="strip", dodge=True, jitter=True, alpha=.5)
g = g.map_dataframe(sns.pointplot, x="Value", y="Type", hue="TA",
dodge=.532, join=False, palette="dark", markers="d", scale=.75, ci=None)
def myplot(x, y, **kwargs):
ax = plt.gca()
data = kwargs.pop("data")
print(data.shape, "in plotting group", data.iloc[0]['Group'])
groups = data.groupby([y, 'TA'])
for label, group_df in groups:
print("Group label:", label, "Group mean: {:.2f}".format(group_df[x].mean()))
g = g.map_dataframe(myplot, x="Value", y="Type")
g.set_titles(row_template="{row_name}", col_template="{col_name}")
The problem is that the mean value which is provided by seaborn's pointplot is wrong for plotting group RB.
For debugging purpose i added a custom function myplot
which just outputs the data of each plotting group and it's mean value:
((12, 4), 'in plotting group', u'FA')
('Group label:', (u'A', -30), 'Group mean: 55.17')
('Group label:', (u'A', 23), 'Group mean: 58.27')
('Group label:', (u'A', 60), 'Group mean: 70.43')
('Group label:', (u'P', -30), 'Group mean: 75.42')
('Group label:', (u'P', 23), 'Group mean: 83.20')
('Group label:', (u'P', 60), 'Group mean: 79.13')
((12, 4), 'in plotting group', u'FB')
('Group label:', (u'A', -30), 'Group mean: 76.34')
('Group label:', (u'A', 23), 'Group mean: 79.77')
('Group label:', (u'A', 60), 'Group mean: 85.70')
('Group label:', (u'P', -30), 'Group mean: 93.48')
('Group label:', (u'P', 23), 'Group mean: 97.54')
('Group label:', (u'P', 60), 'Group mean: 96.29')
((12, 4), 'in plotting group', u'RB')
('Group label:', (u'A', -30), 'Group mean: 69.18')
('Group label:', (u'A', 23), 'Group mean: 94.73')
('Group label:', (u'A', 60), 'Group mean: 81.64')
('Group label:', (u'P', -30), 'Group mean: 101.25')
('Group label:', (u'P', 23), 'Group mean: 104.37')
('Group label:', (u'P', 60), 'Group mean: 86.64')
((12, 4), 'in plotting group', u'RC')
('Group label:', (u'A', -30), 'Group mean: 49.24')
('Group label:', (u'A', 23), 'Group mean: 63.89')
('Group label:', (u'A', 60), 'Group mean: 72.94')
('Group label:', (u'P', -30), 'Group mean: 78.54')
('Group label:', (u'P', 23), 'Group mean: 90.67')
('Group label:', (u'P', 60), 'Group mean: 90.38')
So what i see here is that the mean value calculated does not correspond to the one of the pointplot. Is my calculation wrong? Did i set the wrong parameters to the plotting function?
Upvotes: 1
Views: 2092
Reputation: 339120
As can be seen the means of the "P"
and "A"
Type are interchanged in the lower left subplot.
While the factorplot
itself makes sure to have the same ordering accross its subplots, the mapped pointplot
does not know about this order.
To make sure the same order is used everywhere you need to supply this order to the plotting functions.
g = sns.factorplot(..., order=["A","P"])
g.map_dataframe(sns.pointplot, ..., order=["A","P"]))
To be on the save side, hue_order
could be specified as well, hue_order=[-30,23,60]
.
Complete example:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
df_new = pd.read_json('{"TA":{"229":-30.0,"230":-30.0,"192":23.0,"193":23.0,"248":60.0,"249":60.0,"126":-30.0,"127":-30.0,"88":23.0,"89":23.0,"150":60.0,"151":60.0,"239":-30.0,"240":-30.0,"197":23.0,"198":23.0,"256":60.0,"257":60.0,"135":-30.0,"136":-30.0,"94":23.0,"95":23.0,"164":60.0,"165":60.0,"438":-30.0,"439":-30.0,"291":23.0,"405":23.0,"453":60.0,"454":60.0,"341":-30.0,"342":-30.0,"292":23.0,"293":23.0,"365":60.0,"366":60.0,"445":-30.0,"446":-30.0,"410":23.0,"411":23.0,"462":60.0,"463":60.0,"357":-30.0,"358":-30.0,"297":23.0,"298":23.0,"371":60.0,"372":60.0},"Type":{"229":"A","230":"A","192":"A","193":"A","248":"A","249":"A","126":"P","127":"P","88":"P","89":"P","150":"P","151":"P","239":"A","240":"A","197":"A","198":"A","256":"A","257":"A","135":"P","136":"P","94":"P","95":"P","164":"P","165":"P","438":"A","439":"A","291":"A","405":"A","453":"A","454":"A","341":"P","342":"P","292":"P","293":"P","365":"P","366":"P","445":"A","446":"A","410":"A","411":"A","462":"A","463":"A","357":"P","358":"P","297":"P","298":"P","371":"P","372":"P"},"Value":{"229":57.36232,"230":52.97104,"192":59.82472,"193":56.70568,"248":72.30088,"249":68.56624,"126":71.68528,"127":79.15456,"88":84.1204,"89":82.2736,"150":77.26672,"151":81.00136,"239":70.41304,"240":82.2736,"197":76.03552,"198":83.5048,"256":82.8892,"257":88.51168,"135":89.74288,"136":97.21216,"94":99.1,"95":95.98096,"164":95.98096,"165":96.59656,"438":64.8316,"439":73.53208,"291":107.18488,"405":82.2736,"453":77.26672,"454":86.00824,"341":105.29704,"342":97.21216,"292":108.41608,"293":100.3312,"365":84.77704,"366":88.51168,"445":46.11736,"446":52.35544,"410":62.32816,"411":65.4472,"462":71.06968,"463":74.80432,"357":77.92336,"358":79.15456,"297":94.09312,"298":87.23944,"371":82.2736,"372":98.4844},"Group":{"229":"FA","230":"FA","192":"FA","193":"FA","248":"FA","249":"FA","126":"FA","127":"FA","88":"FA","89":"FA","150":"FA","151":"FA","239":"FB","240":"FB","197":"FB","198":"FB","256":"FB","257":"FB","135":"FB","136":"FB","94":"FB","95":"FB","164":"FB","165":"FB","438":"RB","439":"RB","291":"RB","405":"RB","453":"RB","454":"RB","341":"RB","342":"RB","292":"RB","293":"RB","365":"RB","366":"RB","445":"RC","446":"RC","410":"RC","411":"RC","462":"RC","463":"RC","357":"RC","358":"RC","297":"RC","298":"RC","371":"RC","372":"RC"}}')
g = sns.factorplot(x="Value", y="Type", hue="TA",
col="Group", data=df_new, col_wrap=2,
kind="strip", dodge=True, palette="dark",jitter=True, alpha=.5,
order=["A","P"], hue_order=[-30,23,60])
g = g.map_dataframe(sns.pointplot, x="Value", y="Type", hue="TA",
order=["A","P"], hue_order=[-30,23,60],
dodge=.532, join=False, palette="dark", markers="d", scale=.75, ci=None)
g.set_titles(row_template="{row_name}", col_template="{col_name}")
plt.show()
Upvotes: 1