Reputation: 3288
I have vectors of p-values, x values, and y values which were provided or generated using the command cmdscale
on a distance matrix. Simply plotting the coordinates with plot(x,y)
works fine, but I want the points to be sized proportionately to their p-value (smaller p-value means larger point). I can't quite think of a way to do this and am looking for suggestions. I thought of normalizing the p-values and scaling by some factor -- plot(..., cex=2*normalized)
-- but that won't work . Below I've dumped some example values I'm working with for reproducibility.
> dput(pValues)
c(4.48e-14, 1.66e-12, 2.53e-08, 8.57e-08, 3.4e-07, 5.68e-07,
9.92e-07, 1.08e-06, 2.82e-06, 1.81e-05, 0.000133, 0.00053, 0.000616,
0.000846, 0.000947, 0.001110537, 0.001110537, 0.001505779, 0.001573054,
0.001573054, 0.002112306, 0.002308863, 0.003121497, 0.003121497,
0.003121497, 0.003121497, 0.003121497, 0.003121497, 0.003121497,
0.003121497, 0.003177736, 0.004723347, 0.005004768, 0.005301549,
1.86e-17, 9.18e-17, 2.16e-16, 8.23e-16, 9.2e-16, 1.28e-15, 1.38e-15,
2.59e-15, 6.43e-15, 6.43e-15, 8.42e-15, 1.21e-14, 1.02e-13, 7.58e-13,
1.53e-12, 1.96e-11)
> dput(x)
c(-0.546606289027691, -0.513646680083475, 0.157100976250898,
0.109447441578375, 0.109447441578375, 0.104451507558839, 0.104451507558839,
0.109447441578375, 0.175507893375115, -0.14664445744836, 0.0543475836486623,
0.0557408040609083, 0.0893466913878634, 0.0893466913878634, 0.142438485025367,
0.0470980043880961, -0.0221917747418056, 0.109447441578375, 0.0362416205348296,
0.0470980043880961, 0.0362416205348296, 0.0347865097394601, 0.0391497309324339,
0.0413674642703439, 0.0667384023198892, 0.0461182424640277, 0.0413674642703439,
0.0667384023198892, 0.0461182424640277, 0.0475891023261346, 0.0893466913878634,
0.0764742527259463, 0.0422421029990655, -0.0221917747418056,
-0.510082195428624, -0.510082195428624, -0.510082195428624, -0.510082195428624,
0.53984552027647, 0.457352428403424, -0.510082195428624, -0.510082195428624,
0.476216399097293, 0.476216399097293, -0.510082195428624, 0.297997535161347,
-0.510082195428624, 0.397117197655551, 0.440730282360781, 0.0312250127868402)
> dput(y)
c(0.107461316099316, 0.156755909792581, -0.166842986685387,
-0.141978234324384, -0.141978234324384, -0.0687959347159215,
-0.0687959347159215, -0.141978234324384, -0.142554658469002,
-0.0395153544691704, -0.0576565915449701, -0.0936541502757846,
-0.0438034590304964, -0.0438034590304964, -0.190330058396921,
-0.0329359077881266, -0.0116066646384657, -0.141978234324384,
-0.0714188307783769, -0.0329359077881266, -0.0714188307783769,
-0.054867626805721, -0.0112558858117774, -0.0166800568953671,
-0.0274480805166001, -0.0331407851151761, -0.0166800568953671,
-0.0274480805166001, -0.0331407851151761, -0.00455654056913195,
-0.0438034590304963, -0.0148236474766705, -0.130181815402346,
-0.0116066646384657, 0.0838569446695995, 0.0838569446695995,
0.0838569446695995, 0.0838569446695995, 0.0372937912551249, 0.555328846358372,
0.0838569446695995, 0.0838569446695995, 0.521415820920117, 0.521415820920117,
0.0838569446695994, -0.506985517718071, 0.0838569446695995, -0.324019743520653,
0.421305271998988, -0.0312119222707089)
Upvotes: 6
Views: 8400
Reputation: 8377
You can just bind them into a data.frame and ggplot them:
df=data.frame(x,y,pValues)
library(ggplot2)
ggplot(data=df) + aes(x=x, y=y, size=-log(pValues)) + geom_point(alpha=0.5, col='blue')
I suggest plotting directly the logarithm of the p-value, taking the opposite, so you get the right intuitive way (the bigger, the more significant)
This was the quick way. If you want to customize your plot and improve your legend, we can directly specify the log transform in the trans
argument of scale_size. You can also mess with the range (range of size of the circles), the breaks that will be used in your legend (in the original unit, be careful), and even the legend title.
ggplot(data=df) + aes(x=x, y=y, size=pValues) + geom_point(alpha=0.5, col='blue') +
scale_size("p-values", trans="log10", range=c(15, 1), breaks=c(1e-17, 1e-15, 1e-10, 1e-5, 1e-3))
Note that I had to invert the order of the range limits, since there is no minus in the transform function.
Upvotes: 7
Reputation: 579
I think that log10 is best too ;)
#_________ installing Packages
#install.packages("ggplot2", dependencies = TRUE)
#install.packages("gridExtra", dependencies = TRUE)
#--------- loading lib
library("ggplot2")
library("gridExtra")
#Saving in png
png("ggplot2sizing.png",height=400,width=850)
df=data.frame(dOut_x,dOut_y,d_pvalue)
#TomNash proposal
grO <-ggplot(data=df) + aes(x=dOut_x,y=dOut_y, size=-log(d_pvalue)) + geom_point(alpha=0.5, col='blue')+labs(title = "*-log: basic*", plot.title = element_text(hjust = 0))
#Graph2 with scale_color_gradien -log
grTw <-ggplot(data=df, aes(x=dOut_x,y=dOut_y, size=-log(d_pvalue), color=dpuout_y))+geom_point(alpha=0.25)+scale_colour_gradientn(colours=rainbow(4))+labs(title = "*-log*", plot.title = element_text(hjust = 0))
#Graph3 with scale_color_gradien :: log10
grTh <-ggplot(data=df, aes(x=dOut_x,y=dOut_y, size=log10(d_pvalue), color=dpuout_y))+geom_point(alpha=0.25)+scale_colour_gradientn(colours=rainbow(4))+labs(title = "*log10*", plot.title = element_text(hjust = 0))
#Draw it all
grid.arrange(grO, grTw, grTh, ncol=3, top="Stack*R: ggplot2-Sizing")
dev.off()
Hope that can help. Good luck.
Upvotes: 3