Reputation: 55
I have the below code to plot a probit model comparing the chance of success based on a maximum temperature value. Seems to work well, I'm happy with the plot. But I'm hoping to highlight the point along the curve where the probability is 50%, and then draw a line down to the x-axis to determine (and show) this value as well. Also hoping to include confidence intervals for this estimate. Any help would be greatly appreciated!
data <- data.frame(MaxTemp = c(53.2402, 59.01004,51.42602,41.53883,44.70763,53.90285,51.130318,54.5929,43.697559,49.772446,54.902222,52.720528,58.782608,47.680374,48.30313,56.10921,57.660324,46.387924,60.503147,53.803177,52.27771,58.58555,55.74136,49.04505,46.816269,52.58295,52.751373,56.209747,51.733894,51.424305,50.74564,47.046513,53.030407,56.68752,56.639351,53.526585,51.562313),
Success=c(1,1,1,0,0,1,1,1,0,0,1,1,1,0,0,1,1,0,1,1,1,1,1,1,0,1,1,1,1,1,1,0,1,1,1,1,1))
TempProbitModel <- glm(Success ~ MaxTemp, data=data, family=binomial(link="logit"))
temp.data <- data.frame(MaxTemp = seq(40, 62, 0.5))
predicted.data <- as.data.frame(predict(TempProbitModel, newdata = temp.data, type="link", se=TRUE))
new.data <- cbind(temp.data, predicted.data)
std <- qnorm(0.95 / 2 + 0.5)
new.data$ymin <- TempProbitModel$family$linkinv(new.data$fit - std * new.data$se)
new.data$ymax <- TempProbitModel$family$linkinv(new.data$fit + std * new.data$se)
new.data$fit <- TempProbitModel$family$linkinv(new.data$fit)
(TempProb <- ggplot(data, aes(x=MaxTemp, y=Success)) +
geom_point() +
geom_ribbon(data=new.data, aes(y=fit, ymin=ymin, ymax=ymax), alpha=0.5) +
geom_line(data=new.data, aes(y=fit)) +
labs(x="Peak Temperature", y="Probability of Success") )
Upvotes: 2
Views: 486
Reputation: 16178
An another way of getting this point is to use approxfun
function.
f <- approxfun(new.data$fit,new.data$MaxTemp, rule = 2)
f(0.5)
[1] 49.39391
So now, if you are plotting it:
library(ggplot2)
ggplot(data, aes(x = MaxTemp, y = Success))+
geom_point()+
geom_ribbon(data=new.data, aes(y=fit, ymin=ymin, ymax=ymax), alpha=0.5) +
geom_line(data=new.data, aes(y=fit)) +
labs(x="Peak Temperature", y="Probability of Success") +
geom_point(x = f(0.5), y = 0.5, size = 3, color = "red")+
geom_vline(xintercept = f(0.5), linetype = "dashed", color = "red")+
geom_hline(yintercept = 0.5, linetype = "dashed", color = "red")
Upvotes: 3
Reputation: 6485
Find the closest value to y = 0.5
:
closest_value <- which(abs(new.data$fit - 0.5) == min(abs(new.data$fit - 0.5)))
Calculate slope at this point:
slope_at_closest_value <- (new.data[closest_value, "MaxTemp"] - new.data[closest_value - 1, "MaxTemp"]) /( new.data[closest_value, "fit"] - new.data[closest_value - 1, "fit"])
x_value <- new.data[closest_value - 1, "MaxTemp"] + slope_at_closest_value * (0.5 - new.data[closest_value - 1, "fit"])
Use this x_value to draw a vertical line:
ggplot(data, aes(x=MaxTemp, y=Success)) +
geom_point() +
geom_ribbon(data=new.data, aes(y=fit, ymin=ymin, ymax=ymax), alpha=0.5) +
geom_line(data=new.data, aes(y=fit)) +
labs(x="Peak Temperature", y="Probability of Success") +
geom_vline(xintercept = x_value, color="red")
This draws the following plot:
The confidence interval can be drawn accordingly.
Upvotes: 3