Reputation: 3980
I am trying to fit a segmented glm to some data:
x <- c(0.25,0.5,0.75,1,1.25,1.5,1.75,2,2.25,2.5,2.75,3,3.25)
y <- c(5.516,5.725,5.9781,6,6.453,6.88,7.3,11,11.89,15.6,21.3,27,32.8)
d <- data.frame(x = x,
y = y)
if(!require("segmented")) {
install.packages("segmented")
require("segmented")
}
g1 <- glm(y ~ x,data = d)
g2 <- segmented(g1, seg.Z = ~ x,
psi = list(x = c(1.5)))
pdat <- data.frame(x = d$x,
y = broken.line(g2, link = FALSE)[,1])
pdat <- pdat[with(pdat, order(x)), ]
plot(y ~ x, data = d, pch = 21, bg = "white")
lines(y ~ x, data = pdat, type = "l", col = "red")
I would now like to draw confidence intervals around the segmented line, but have no idea on how to do this. I can draw confidence intervals for a non segmented plot:
## use quadratic function
g3 <- lm(y ~ poly(x, 2), data = d)
pdat <- with(d, data.frame(x = exp(seq(min(x),
max(x), length = 100))))
tmp2 <- predict(g3, newdata = pdat, se.fit = TRUE)
critVal <- qt(0.975, df = g3$df.residual)
pdat <- transform(pdat, pred = tmp2$fit, se = tmp2$se.fit)
pdat <- transform(pdat, yhat = pred,
upr = pred + (critVal * se),
lwr = pred - (critVal * se))
plot(y ~ x, data = d)
lines(yhat ~ x, data = pdat, type = "l", col = "red") # gam model
lines(upr ~ x, data = pdat, type = "l", lty = "dashed", col = "red") # upper limit
lines(lwr ~ x, data = pdat, type = "l", lty = "dashed", col = "red") # lower limit
But when I repeat this for the segmented version it doesn't seem quite right:
# repeat same method for segmented
g1 <- glm(y ~ x,data = d)
g2 <- segmented(g1, seg.Z = ~ x,
psi = list(x = c(1.5)))
pdat <- with(d, data.frame(x = exp(seq(min(x),
max(x), length = 100))))
tmp2 <- predict(g2, newdata = pdat, se.fit = TRUE)
critVal <- qt(0.975, df = g2$df.residual)
pdat <- transform(pdat, pred = tmp2$fit, se = tmp2$se.fit)
pdat <- transform(pdat, yhat = pred,
upr = pred + (critVal * se),
lwr = pred - (critVal * se))
plot(y ~ x, data = d)
lines(yhat ~ x, data = pdat, type = "l", col = "red") # gam model
lines(upr ~ x, data = pdat, type = "l", lty = "dashed", col = "red") # upper limit
lines(lwr ~ x, data = pdat, type = "l", lty = "dashed", col = "red") # lower limit
So, my first question is why does the quadratic function not extend to the entire x axis i.e. why does it stop at 1.25? Secondly, is the method that I've followed for confidence intervals for the segmented line correct, or is there a better method for this?
Upvotes: 1
Views: 1123
Reputation: 15441
Buildin on @Roman's answer, here is a similar apporach , that may perhaps closer to what your looking for:
x <- c(0.25,0.5,0.75,1,1.25,1.5,1.75,2,2.25,2.5,2.75,3,3.25)
y <- c(5.516,5.725,5.9781,6,6.453,6.88,7.3,11,11.89,15.6,21.3,27,32.8)
d <- data.frame(x = x,
y = y)
d$thing <- c(rep("a",8), rep("b",5))
library(ggplot2)
ggplot(d, aes(x = x, y = y, group = thing)) +
geom_point() +
theme_bw() +
stat_smooth(method = "lm", formula = y ~ I(x^2) + I(x^3),
fill = NA, linetype = 3, geom = "ribbon", colour = "red") +
stat_smooth(method = "lm", formula = y ~ I(x^2) + I(x^3),
fill = "transparent", colour = "black")
Upvotes: 2
Reputation: 70643
How about this? Band represents 95% CI.
x <- c(0.25,0.5,0.75,1,1.25,1.5,1.75,2,2.25,2.5,2.75,3,3.25)
y <- c(5.516,5.725,5.9781,6,6.453,6.88,7.3,11,11.89,15.6,21.3,27,32.8)
d <- data.frame(x = x,
y = y)
mdl <- glm(y ~ x + I(x^2) + I(x^3), data = d)
prd <- predict(mdl, newdata = d[, "x", drop = FALSE], se = TRUE)
d$fit <- prd$fit
d$lci <- d$fit - 1.96 * prd$se.fit
d$uci <- d$fit + 1.96 * prd$se.fit
library(ggplot2)
ggplot(d, aes(x = x, y = y, ymin = lci, ymax = uci)) +
theme_bw() +
geom_point(size = 3) +
geom_smooth(aes(x = x, y = fit), stat = "identity")
Upvotes: 3