Benjamin
Benjamin

Reputation: 97

ggplot2: Logistic Regression points on the regression line, instead on the 0 and 1

I'm trying to generate a logistic regression plot with the points on the regression line as in this example:

enter image description here

What I get is the following:

enter image description here

I searched all over the internet but couldn't find anything helpful, and I've tried different combinations on ggplot myself, but nothing good came out.

Here is the code I'm using:

g <- ggplot(myData, aes(speed, GetResp.RESP))
g + geom_point(aes(color = PadLen, shape = PadLen), size = 2.5) +
  geom_smooth(method = "glm", method.args = list(family = "quasibinomial"), aes(color = PadLen), se = FALSE, size = 1.1) +
  scale_color_manual(values = c("black", "red"), labels = c("SmallPaddle", "BigPaddle")) +
  scale_shape_manual(values = c(1, 2), labels = c("SmallPaddle", "BigPaddle")) +
  theme_classic() + theme(legend.title = element_blank(), legend.position = c(0.8, 0.20)) +
  xlab("Ball Speed (cm/s)") +
  ylab('Proportion of "Fast" Responses') +
  scale_y_continuous(breaks = c(.0, .2, .4, .6, .8, 1.0), labels = c(".0", ".2", ".4", ".6", ".8", "1.0"))

Here is a reduced sample of the database, enough to get something to work with:

(If the dput code doesn't work, you can download the dput.R from here, and use dget(): https://file.io/PsBLeJ)

structure(list(Subject = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L), GetResp.RESP = c(1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 0L, 0L, 1L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 0L, 1L, 0L, 1L, 1L, 1L, 0L, 1L, 0L, 0L, 1L, 1L, 0L, 1L, 1L, 1L, 1L, 0L, 0L, 0L, 1L, 1L, 1L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 1L, 1L, 1L, 1L, 0L, 1L, 0L, 0L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L), PadLen = structure(c(1L, 2L, 1L, 2L, 2L, 1L, 2L, 1L, 2L, 1L, 1L, 2L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 2L, 1L, 1L, 2L, 1L, 2L, 2L, 2L, 2L, 1L, 2L, 1L, 2L, 1L, 1L, 1L, 1L, 2L, 1L, 2L, 1L, 1L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 1L, 1L, 2L, 1L, 1L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 1L, 2L, 2L, 1L, 1L, 2L, 1L, 1L, 2L, 1L, 2L, 1L, 1L, 2L, 2L, 1L, 2L, 1L, 1L), .Label = c("0", "1"), class = "factor"), speed = c(36.7686063997867, 26.5851542237119, 31.4488143078996, 26.5851542237119, 48.1629567594045, 36.7686063997867, 42.3730800240861, 22.4757186261048, 31.4488143078996, 31.4488143078996, 48.1629567594045, 48.1629567594045, 26.5851542237119, 42.3730800240861, 36.7686063997867, 48.1629567594045, 22.4757186261048, 36.7686063997867, 22.4757186261048, 42.3730800240861, 22.4757186261048, 22.4757186261048, 48.1629567594045, 31.4488143078996, 31.4488143078996, 31.4488143078996, 48.1629567594045, 26.5851542237119, 48.1629567594045, 36.7686063997867, 42.3730800240861, 26.5851542237119, 22.4757186261048, 42.3730800240861, 26.5851542237119, 42.3730800240861, 36.7686063997867, 42.3730800240861, 31.4488143078996, 36.7686063997867, 22.4757186261048, 22.4757186261048, 42.3730800240861, 31.4488143078996, 22.4757186261048, 31.4488143078996, 36.7686063997867, 48.1629567594045, 31.4488143078996, 26.5851542237119, 22.4757186261048, 26.5851542237119, 36.7686063997867, 36.7686063997867, 48.1629567594045, 36.7686063997867, 26.5851542237119, 42.3730800240861, 31.4488143078996, 42.3730800240861, 26.5851542237119, 42.3730800240861, 42.3730800240861, 48.1629567594045, 31.4488143078996, 36.7686063997867, 31.4488143078996, 48.1629567594045, 26.5851542237119, 36.7686063997867, 22.4757186261048, 48.1629567594045, 22.4757186261048, 42.3730800240861, 26.5851542237119, 42.3730800240861, 26.5851542237119, 48.1629567594045, 42.3730800240861, 31.4488143078996, 26.5851542237119, 36.7686063997867, 22.4757186261048), backCol = c(1L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 2L, 1L, 2L, 2L, 1L, 2L, 2L, 1L, 2L, 1L, 2L, 1L, 1L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 1L, 2L, 2L, 1L, 1L, 2L, 1L, 1L, 2L, 2L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 2L), sample = c(1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L, 20L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L, 20L, 21L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L, 20L, 21L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L, 20L, 21L), backColor = structure(c(2L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 1L, 2L, 1L, 1L, 2L, 1L, 1L, 2L, 1L, 2L, 1L, 2L, 2L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 2L, 1L, 1L, 2L, 2L, 1L, 2L, 2L, 1L, 1L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 1L), .Label = c("blue", "red"), class = "factor"), WasHit = c(0L, 1L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 0L, 1L, 1L, 1L, 1L, 0L, 1L, 1L, 1L, 0L, 1L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 1L, 1L, 1L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 1L, 1L, 1L, 1L, 1L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 0L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 0L, 1L, 0L, 0L, 1L, 1L, 1L, 0L, 0L, 1L, 1L, 0L, 1L, 1L, 1L, 1L, 0L, 0L), RedBlue.Cycle = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), RedBlue.Sample = c(1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L, 20L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L, 20L, 21L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L, 20L, 21L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L, 20L, 21L)), class = "data.frame", row.names = c(NA, -83L))

Upvotes: 1

Views: 1444

Answers (1)

StupidWolf
StupidWolf

Reputation: 46908

I made the plots below because it is a bit too long to comment. So I am not very sure what exactly are the plots on the regression line you showed in the first plot. If they are points from your regression line, they should fall exactly on the line. I think they might be generated from another fit that is not the same as the line. Anyhow, to show the predicted values for each unique data point:

# basic plot with points
g <- ggplot(myData, aes(speed, GetResp.RESP,color = PadLen,shape = PadLen)) +
  geom_smooth(method = "glm", method.args = list(family = "quasibinomial") , se = FALSE, size = 1.1) +
  scale_color_manual(values = c("black", "red"), labels = c("SmallPaddle", "BigPaddle")) +
  scale_shape_manual(values = c(1, 2), labels = c("SmallPaddle", "BigPaddle")) +
  theme_classic() + 
  xlab("Ball Speed (cm/s)") +
  ylab('Proportion of "Fast" Responses') 

#with data points
g1 = g+geom_point(size = 2.5)

# with predicted values from data points
fit = glm(GetResp.RESP~speed*PadLen,family=quasibinomial,data=myData)
datapts = sort(unique(myData$speed))
plotdf = data.frame(speed=rep(datapts,2),
PadLen=factor(rep(0:1,each=length(datapts))))
plotdf$GetResp.RESP = predict(fit,plotdf,type="response")

g2 = g + geom_point(data=plotdf)

enter image description here

Upvotes: 2

Related Questions