Reputation: 766
The ggmisc
package includes the excellent stat_poly_eq
function that allows you to annotate a plot with regression equations used in stat/geom_smooth
.
In the plot and code below I've tried to use stat_poly_eq
to manually specify the LHS of an equation with a custom annotation. I'm aware that an answer to this question goes into some detail about the customisation options available, however, in my attempts — see below — I've been unable to prevent the annotation from printing multiple times.
Does anyone have an idea how to spot the multiple printing? Thanks in advance.
library(ggplot2)
library(ggpmisc)
library(dplyr)
library(tidyr)
## RAW DATA
raw_data <- data.frame(X = c(-49:50))
raw_data$X <- raw_data$X * 0.1
raw_data$Y1 <- 2 * raw_data$X + rnorm(100, sd = 0.5)
raw_data$Y2 <- 3 * raw_data$X + rnorm(100, sd = 0.4)
## PREP DATA
plot_data <- raw_data %>%
pivot_longer(
cols = Y1:Y2,
names_to = "IV",
values_to = "Ratio")
## PLOT DATA
plot_data %>%
ggplot() +
aes(
x = X,
y = Ratio,
linetype = IV,
shape = IV) +
stat_smooth(
method = 'lm',
formula = y~x,
size = .5,
colour = "black",
se = FALSE) +
geom_point(
size = 3
) +
stat_poly_eq(
formula = y ~ x,
eq.with.lhs = c(
"italic(log(R[1]/R[2]))~`=`~",
"italic(log(T[1]/T[2]))~`=`~"),
eq.x.rhs = "~italic(X)",
aes(
label = paste(..eq.label.., sep = "~~~")),
parse = TRUE)
Upvotes: 2
Views: 239
Reputation: 29085
This can be achieved with a slightly modified version of the underlying StatPolyEq
:
The code used is the same code as what's in the question, swopping stat_poly_eq
for stat_poly_eq2
as defined below:
stat_poly_eq2 <- function (mapping = NULL, data = NULL, geom = "text_npc",
position = "identity", ..., formula = NULL, eq.with.lhs = TRUE,
eq.x.rhs = NULL, coef.digits = 3, rr.digits = 2, f.digits = 3,
p.digits = 3, label.x = "left", label.y = "top",
label.x.npc = NULL, label.y.npc = NULL, hstep = 0, vstep = NULL,
output.type = "expression", na.rm = FALSE, show.legend = FALSE,
inherit.aes = TRUE)
{
if (!is.null(label.x.npc)) {
stopifnot(grepl("_npc", geom))
label.x <- label.x.npc
}
if (!is.null(label.y.npc)) {
stopifnot(grepl("_npc", geom))
label.y <- label.y.npc
}
ggplot2::layer(data = data, mapping = mapping, stat = StatPolyEq2,
geom = geom, position = position, show.legend = show.legend,
inherit.aes = inherit.aes, params = list(formula = formula,
eq.with.lhs = eq.with.lhs, eq.x.rhs = eq.x.rhs, coef.digits = coef.digits,
rr.digits = rr.digits, f.digits = f.digits, p.digits = p.digits,
label.x = label.x, label.y = label.y, hstep = hstep,
vstep = ifelse(is.null(vstep), ifelse(grepl("label",
geom), 0.125, 0.075), vstep), npc.used = grepl("_npc",
geom), output.type = output.type, na.rm = na.rm,
...))
}
StatPolyEq2 <- ggproto(
"StatPolyEq2", StatPolyEq,
compute_group = function (data, scales, formula, weight, eq.with.lhs, eq.x.rhs,
coef.digits, rr.digits, f.digits, p.digits, label.x, label.y,
hstep, vstep, npc.used, output.type, na.rm)
{
force(data)
if (length(unique(data$x)) < 2) {
return(tibble::new_tibble())
}
output.type <- if (!length(output.type)) {
"expression"
}
else {
tolower(output.type)
}
stopifnot(output.type %in% c("expression", "text", "markdown",
"numeric", "latex", "tex", "tikz"))
if (is.null(data$weight)) {
data$weight <- 1
}
if (exists("grp.label", data)) {
if (length(unique(data[["grp.label"]])) > 1L) {
warning("Non-unique value in 'data$grp.label' for group.")
grp.label <- ""
}
else {
grp.label <- data[["grp.label"]][1]
}
}
else {
grp.label <- ""
}
if (is.null(eq.x.rhs)) {
if (output.type == "expression") {
eq.x.rhs <- "~italic(x)"
}
else if (output.type == "markdown") {
eq.x.rhs <- "_x_"
}
else {
eq.x.rhs <- " x"
}
}
group.idx <- abs(data$group[1])
if (length(label.x) >= group.idx) {
label.x <- label.x[group.idx]
}
else if (length(label.x) > 0) {
label.x <- label.x[1]
}
if (length(label.y) >= group.idx) {
label.y <- label.y[group.idx]
}
else if (length(label.y) > 0) {
label.y <- label.y[1]
}
if (length(eq.with.lhs) >= group.idx) {
eq.with.lhs <- eq.with.lhs[group.idx]
}
lm.args <- list(quote(formula), data = quote(data), weights = quote(weight))
mf <- do.call(stats::lm, lm.args)
mf.summary <- summary(mf)
rr <- mf.summary$r.squared
adj.rr <- mf.summary$adj.r.squared
AIC <- AIC(mf)
BIC <- BIC(mf)
if ("fstatistic" %in% names(mf.summary)) {
f.value <- mf.summary$fstatistic["value"]
f.df1 <- mf.summary$fstatistic["numdf"]
f.df2 <- mf.summary$fstatistic["dendf"]
p.value <- 1 - stats::pf(q = f.value, f.df1, f.df2)
}
else {
f.value <- f.df1 <- f.df2 <- p.value <- NA_real_
}
if (output.type == "numeric") {
z <- tibble::tibble(coef.ls = list(summary(mf)[["coefficients"]]),
coefs = list(stats::coefficients(mf)), r.squared = rr,
adj.r.squared = adj.rr, f.value = f.value, f.df1 = f.df1,
f.df2 = f.df2, p.value = p.value, AIC = AIC, BIC = BIC,
rr.label = "")
}
else {
coefs <- stats::coef(mf)
formula.rhs.chr <- as.character(formula)[3]
if (grepl("-1", formula.rhs.chr) || grepl("- 1", formula.rhs.chr)) {
coefs <- c(0, coefs)
}
stopifnot(coef.digits > 0)
if (coef.digits < 3) {
warning("'coef.digits < 3' Likely information loss!")
}
eq.char <- as.character(signif(polynom::as.polynomial(coefs),
coef.digits))
eq.char <- gsub("+ x", paste("+ 1.",
stringr::str_dup("0", coef.digits - 1L),
"*x", sep = ""),
eq.char, fixed = TRUE)
eq.char <- gsub("e([+-]?[0-9]*)", "%*%10^{\\1}", eq.char)
if (output.type %in% c("latex", "tex", "tikz", "markdown")) {
eq.char <- gsub("*", " ", eq.char, fixed = TRUE)
}
if (is.character(eq.with.lhs)) {
lhs <- eq.with.lhs
eq.with.lhs <- TRUE
}
else if (eq.with.lhs) {
if (output.type == "expression") {
lhs <- "italic(y)~`=`~"
}
else if (output.type %in% c("latex", "tex", "tikz", "text")) {
lhs <- "y = "
}
else if (output.type == "markdown") {
lhs <- "_y_ = "
}
}
if (eq.with.lhs) {
eq.char <- paste(lhs, eq.char, sep = "")
}
stopifnot(rr.digits > 0)
if (rr.digits < 2) {
warning("'rr.digits < 2' Likely information loss!")
}
stopifnot(p.digits > 0)
if (f.digits < 2) {
warning("'f.digits < 2' Likely information loss!")
}
stopifnot(p.digits > 0)
if (p.digits < 2) {
warning("'p.digits < 2' Likely information loss!")
}
rr.char <- format(rr, digits = rr.digits)
adj.rr.char <- format(adj.rr, digits = rr.digits)
AIC.char <- sprintf("%.4g", AIC)
BIC.char <- sprintf("%.4g", BIC)
f.value.char <- as.character(signif(f.value, digits = f.digits))
f.df1.char <- as.character(f.df1)
f.df2.char <- as.character(f.df2)
p.value.char <- as.character(signif(p.value, digits = p.digits))
if (output.type == "expression") {
z <- tibble::tibble(eq.label = gsub("x", eq.x.rhs, eq.char, fixed = TRUE),
rr.label = paste("italic(R)^2", rr.char, sep = "~`=`~"),
adj.rr.label = paste("italic(R)[adj]^2",
adj.rr.char, sep = "~`=`~"),
AIC.label = paste("AIC", AIC.char, sep = "~`=`~"),
BIC.label = paste("BIC", BIC.char, sep = "~`=`~"),
f.value.label = ifelse(is.na(f.value), character(0L),
paste("italic(F)[", f.df1.char,
"*\",\"*", f.df2.char, "]~`=`~", f.value.char,
sep = "")),
p.value.label = ifelse(is.na(p.value), character(0L),
paste("italic(P)", ifelse(p.value < 0.001, "0.001", p.value.char),
sep = ifelse(p.value < 0.001, "~`<=`~", "~`=`~"))),
grp.label = grp.label)
}
else if (output.type %in% c("latex", "tex", "text", "tikz")) {
z <- tibble::tibble(eq.label = gsub("x", eq.x.rhs,
eq.char, fixed = TRUE),
rr.label = paste("R^2", rr.char, sep = " = "),
adj.rr.label = paste("R_{adj}^2", adj.rr.char, sep = " = "),
AIC.label = paste("AIC", AIC.char, sep = " = "),
BIC.label = paste("BIC", BIC.char, sep = " = "),
f.value.label = ifelse(is.na(f.value), character(0L),
paste("F_{", f.df1.char, ",", f.df2.char, "} = ", f.value.char, sep = "")),
p.value.label = ifelse(is.na(p.value), character(0L),
paste("P", p.value.char,
sep = ifelse(p.value < 0.001, " \\leq ", " = "))),
grp.label = grp.label)
}
else if (output.type == "markdown") {
z <- tibble::tibble(eq.label = gsub("x", eq.x.rhs, eq.char, fixed = TRUE),
rr.label = paste("_R_<sup>2</sup>", rr.char, sep = " = "),
adj.rr.label = paste("_R_<sup>2</sup><sub>adj</sub>",
adj.rr.char, sep = " = "),
AIC.label = paste("AIC", AIC.char, sep = " = "),
BIC.label = paste("BIC", BIC.char, sep = " = "),
f.value.label = ifelse(is.na(f.value), character(0L),
paste("_F_<sub>", f.df1.char, ",", f.df2.char, "</sub> = ", f.value.char,
sep = "")),
p.value.label = ifelse(is.na(p.value), character(0L),
paste("_P_", p.value.char,
sep = ifelse(p.value < 0.001, " ≤ ", " = "))),
grp.label = grp.label)
}
else {
warning("Unknown 'output.type' argument: ", output.type)
}
}
if (npc.used) {
margin.npc <- 0.05
}
else {
margin.npc <- 0
}
if (is.character(label.x)) {
label.x <- compute_npcx(x = label.x, group = group.idx,
h.step = hstep, margin.npc = margin.npc)
if (!npc.used) {
x.expanse <- abs(diff(range(data$x)))
x.min <- min(data$x)
label.x <- label.x * x.expanse + x.min
}
}
if (is.character(label.y)) {
label.y <- compute_npcy(y = label.y, group = group.idx,
v.step = vstep, margin.npc = margin.npc)
if (!npc.used) {
y.expanse <- abs(diff(range(data$y)))
y.min <- min(data$y)
label.y <- label.y * y.expanse + y.min
}
}
if (npc.used) {
z$npcx <- label.x
z$x <- NA_real_
z$npcy <- label.y
z$y <- NA_real_
}
else {
z$x <- label.x
z$npcx <- NA_real_
z$y <- label.y
z$npcy <- NA_real_
}
z
})
Modifications from the original:
stat_poly_eq2
: stat = StatPolyEq2
instead of stat = StatPolyEq
;
StatPolyEq2
: added if (length(eq.with.lhs) >= group.idx) {eq.with.lhs <- eq.with.lhs[group.idx]}
clause in lines 58-60, so that if multiple equation expressions are given, only the one whose order corresponds to the specific group is used.
Upvotes: 2