variable
variable

Reputation: 1023

geom_smooth, stat_smooth confidence interval not working?

I have these data:

structure(list(Run = c("A013", "A015", "A023", "A024", "A031", 
"A032", "A035", "A040", "A045", "A046", "A049", "A013", "A015", 
"A023", "A024", "A031", "A032", "A035", "A040", "A045", "A046", 
"A013", "A015", "A023", "A024", "A031", "A032", "A035", "A040", 
"A013", "A015", "A023", "A024", "A031", "A032", "A035", "A040", 
"A013", "A015", "A023", "A024", "A031", "A032", "A013", "A015", 
"A023", "A024", "A013", "A015", "A023", "A024"), Step = structure(c(1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 
4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 5L, 5L, 6L, 6L, 6L, 6L, 7L, 7L, 
7L, 7L), .Label = c("1", "e", "k", "2", "q", "b", "m"), class = "factor"), 
    Weight = c(87.4064, 79.5822, 117.0674, 102.6384, 134.0752, 
    111.2398, 107.8464, 111.2576, 104.2428, 110.2848, 28.7292, 
    41.65656, 73.9356, 84.18504, 89.4845, 71.55106, 86.04072, 
    76.27296, 92.8749, 85.203, 91.92112, 39.5009258, 58.6035081, 
    75.13589946, 83.43157667, 88.8993795, 68.85183559, 64.77081269, 
    77.56733054, 32.5025, 51.45329, 66.29101, 73.79125, 79.95483, 
    60.9573, 58.34856, 68.83193, 29.65289, 40.74267, 56.97243, 
    61.48708, 70.24226, 54.79253, 22.8231064, 38.9966088, 55.2736576, 
    62.6077916, 20.7458048, 38.306526, 54.7937568, 61.1417148
    )), .Names = c("Run", "Step", "Weight"), row.names = c(NA, 
-51L), class = "data.frame")

I'm trying to get a nice looking geom_smooth() with 0.99 confidence using

require(ggplot2)
require(directlabels)
g1 <- ggplot(m1,
             aes(x=Step,y=Weight,label=Run,group=Run,color=Run)) + 
  geom_point() + geom_line()
g2 <- g1 +  geom_dl(method="first.bumpup")
g2 + geom_smooth(aes(group=1),level=0.99)

Here are my problems:

  1. The error ribbon doesn't look like 99% confidence, lots of points in the chart are outside it.

  2. When I expand the data set, the error ribbon collapses to very narrow where the majority of the points are outside it.

Am I doing something wrong here? Thanks,

Edit: here's what I see when I run this: Narrow ribbon When I look at a bigger data set, the ribbon gets even narrower, almost being on top of the smooth line.

Upvotes: 1

Views: 4678

Answers (1)

Roman Luštrik
Roman Luštrik

Reputation: 70623

Confidence intervals and prediction intervals are two different beasts. The former is about the mean of your data (fitted values), while the latter is where future observations will lie.

This is my code from RPubs repository.

set.seed(357)
library(ggplot2) # for ggplot()
library(gridExtra) 

x <- rnorm(20)
y <- x * rnorm(20, mean = 3, sd = 1)
xy <- data.frame(x, y)

mdl <- lm(y ~ x, data = xy)

# Predict these data for...
predx <- data.frame(x = seq(from = -2, to = 3, by = 0.1))

# ... confidence interval
conf.int <- cbind(predx, predict(mdl, newdata = predx, interval = "confidence", level = 0.95))

# ... prediction interval
pred.int <- cbind(predx, predict(mdl, newdata = predx, interval = "prediction", level = 0.95))
man <- predict(mdl, newdata = predx, se = TRUE)

# Manual calculation of confidence interval, tolerance of 0.95 (1.96).
lvl <- qt(1-(1 - 0.95)/2, mdl$df.residual) # Thank you, @Roland (http://chat.stackoverflow.com/transcript/message/10581408#10581408)
conf.int.man <- cbind(predx, fit = man$fit, lwr = man$fit - lvl * man$se.fit, upr = man$fit + lvl * man$se.fit)

g.conf <- ggplot(conf.int, aes(x = x, y = fit)) +
  theme_bw() +
  ggtitle("Confidence interval of estimated parameters from predict()") +
  geom_point(data = xy, aes(x = x, y = y)) +
  geom_smooth(data = conf.int, aes(ymin = lwr, ymax = upr), stat = "identity") 

g.pred <- ggplot(pred.int, aes(x = x, y = fit)) +
  theme_bw() +
  ggtitle("Prediction interval for future observations from predict()") +
  geom_point(data = xy, aes(x = x, y = y)) +
  geom_smooth(data = pred.int, aes(ymin = lwr, ymax = upr), stat = "identity")

grid.arrange(g.conf, g.pred, ncol = 2)

enter image description here

Upvotes: 3

Related Questions