Plotting means with facet_grid in ggplot

Question

Suppose I have the following data.frame:

values<-runif(72)
factorA<-rep(c(rep('a',9),rep('b',9)),2)
criterion<-rep(rep(c(rep('1/4',3),rep('1/2',3),rep('2/3',3)),4),2)
case<-rep(c(rep('1',18),rep('2',18)),2)
samp<-rep(c('10','20','30'),24)
datasets<-c(rep('1',36),rep('2',36))
df<-data.frame(values,factorA,criterion,case,samp,datasets)

I want to use ggplot to plot the means of values between the two datasets. If I consider only the subset datasets='1', I want to build the following plot

library(ggplot2)
library(directlabels)
ggplot(subset(df,datasets %in% '1'),aes(x=samp,y=values,group=criterion))+
     geom_line(aes(linetype = criterion),size=1)+
     facet_grid(case~factorA,scales='free',space='free')+
     geom_dl(aes(label = criterion),method=list(dl.combine('first.qp','last.qp')))

which produces

as values were obtained randomly.

I want to obtain exactly this plot, but where each line represents the trajectories of the means between the two datasets, for the corresponding values. For example, in the panel ('a','1'), the point on the line criterion='1/4' and abscissa samp='10' must be the mean of the corresponding values of datasets 1 and 2; the point on the line criterion='1/4' and abscissa samp='20' must be the mean of the corresponding values of datasets 1 and 2, and so on and for each panel. How can I achieve that? I tried with stat_summary() but with no success...

bs93 · Accepted Answer

I think you can use dplyr's group_by and summarize to accomplish what you describe. 1) Group the df by all variables except the dataframe column and 2) calculate the mean of the values column for each group:

library(tidyverse)
library(directlabels)
set.seed(1)
values<-runif(72)
factorA<-rep(c(rep('a',9),rep('b',9)),2)
criterion<-rep(rep(c(rep('1/4',3),rep('1/2',3),rep('2/3',3)),4),2)
case<-rep(c(rep('1',18),rep('2',18)),2)
samp<-rep(c('10','20','30'),24)
datasets<-c(rep('1',36),rep('2',36))
df<-data.frame(values,factorA,criterion,case,samp,datasets)

#calculate means of the value columns by group
df_means <- df %>% 
  group_by(case, factorA, samp, criterion) %>% 
  summarize(mean_value = mean(values))

ggplot(df_means,aes(x=samp,y=mean_value,group=criterion))+
  geom_line(aes(linetype = criterion),size=1)+
  facet_grid(case~factorA,scales='free',space='free')+
  geom_dl(aes(label = criterion),method=list(dl.combine('first.qp','last.qp')))

Check if desired output is correct:

> df %>% 
+ filter(criterion == '1/4', factorA == 'a', samp == '10', case == '1')
     values factorA criterion case samp datasets
1 0.2655087       a       1/4    1   10        1
2 0.7942399       a       1/4    1   10        2
> mean(c(0.2655087, 0.7942399))
[1] 0.5298743
> df_means %>% 
+   filter(criterion == '1/4', factorA == 'a', samp == '10', case == '1')
# A tibble: 1 x 5
# Groups:   case, factorA, samp [1]
  case  factorA samp  criterion mean_value
                 
1 1     a       10    1/4            0.530

Plotting means with facet_grid in ggplot

Answers (1)

Related Questions