vengefulsealion
vengefulsealion

Reputation: 766

How can I manually alter the LHS of two equation annotations in ggplot?

The ggmisc package includes the excellent stat_poly_eq function that allows you to annotate a plot with regression equations used in stat/geom_smooth.

In the plot and code below I've tried to use stat_poly_eq to manually specify the LHS of an equation with a custom annotation. I'm aware that an answer to this question goes into some detail about the customisation options available, however, in my attempts — see below — I've been unable to prevent the annotation from printing multiple times.

Does anyone have an idea how to spot the multiple printing? Thanks in advance.

library(ggplot2)
library(ggpmisc)
library(dplyr)
library(tidyr)

## RAW DATA

raw_data <- data.frame(X = c(-49:50))
raw_data$X <- raw_data$X * 0.1
raw_data$Y1 <- 2 * raw_data$X + rnorm(100, sd = 0.5)
raw_data$Y2 <- 3 * raw_data$X + rnorm(100, sd = 0.4)

## PREP DATA

plot_data <- raw_data %>%
  pivot_longer(
    cols = Y1:Y2,
    names_to = "IV",
    values_to = "Ratio") 

## PLOT DATA

plot_data %>%
  ggplot() +
  aes(
    x = X, 
    y = Ratio,
    linetype = IV,
    shape = IV) +
  stat_smooth(
    method = 'lm', 
    formula = y~x,
    size = .5,
    colour = "black",
    se = FALSE) +
  geom_point(
    size = 3
  ) +
  stat_poly_eq(
    formula =  y ~ x,
    eq.with.lhs = c(
      "italic(log(R[1]/R[2]))~`=`~", 
      "italic(log(T[1]/T[2]))~`=`~"),
    eq.x.rhs = "~italic(X)",
    aes(
      label = paste(..eq.label.., sep = "~~~")),
    parse = TRUE)

enter image description here

Upvotes: 2

Views: 239

Answers (1)

Z.Lin
Z.Lin

Reputation: 29085

This can be achieved with a slightly modified version of the underlying StatPolyEq:

result

The code used is the same code as what's in the question, swopping stat_poly_eq for stat_poly_eq2 as defined below:

stat_poly_eq2 <- function (mapping = NULL, data = NULL, geom = "text_npc", 
    position = "identity", ..., formula = NULL, eq.with.lhs = TRUE, 
    eq.x.rhs = NULL, coef.digits = 3, rr.digits = 2, f.digits = 3, 
    p.digits = 3, label.x = "left", label.y = "top", 
    label.x.npc = NULL, label.y.npc = NULL, hstep = 0, vstep = NULL, 
    output.type = "expression", na.rm = FALSE, show.legend = FALSE, 
    inherit.aes = TRUE) 
{
    if (!is.null(label.x.npc)) {
        stopifnot(grepl("_npc", geom))
        label.x <- label.x.npc
    }
    if (!is.null(label.y.npc)) {
        stopifnot(grepl("_npc", geom))
        label.y <- label.y.npc
    }
    ggplot2::layer(data = data, mapping = mapping, stat = StatPolyEq2, 
        geom = geom, position = position, show.legend = show.legend, 
        inherit.aes = inherit.aes, params = list(formula = formula, 
            eq.with.lhs = eq.with.lhs, eq.x.rhs = eq.x.rhs, coef.digits = coef.digits, 
            rr.digits = rr.digits, f.digits = f.digits, p.digits = p.digits, 
            label.x = label.x, label.y = label.y, hstep = hstep, 
            vstep = ifelse(is.null(vstep), ifelse(grepl("label", 
                geom), 0.125, 0.075), vstep), npc.used = grepl("_npc", 
                geom), output.type = output.type, na.rm = na.rm, 
            ...))
}
StatPolyEq2 <- ggproto(
  "StatPolyEq2", StatPolyEq,
  compute_group = function (data, scales, formula, weight, eq.with.lhs, eq.x.rhs, 
                            coef.digits, rr.digits, f.digits, p.digits, label.x, label.y, 
                            hstep, vstep, npc.used, output.type, na.rm) 
  {
    force(data)
    if (length(unique(data$x)) < 2) {
      return(tibble::new_tibble())
    }
    output.type <- if (!length(output.type)) {
      "expression"
    }
    else {
      tolower(output.type)
    }
    stopifnot(output.type %in% c("expression", "text", "markdown", 
                                 "numeric", "latex", "tex", "tikz"))
    if (is.null(data$weight)) {
      data$weight <- 1
    }
    if (exists("grp.label", data)) {
      if (length(unique(data[["grp.label"]])) > 1L) {
        warning("Non-unique value in 'data$grp.label' for group.")
        grp.label <- ""
      }
      else {
        grp.label <- data[["grp.label"]][1]
      }
    }
    else {
      grp.label <- ""
    }
    if (is.null(eq.x.rhs)) {
      if (output.type == "expression") {
        eq.x.rhs <- "~italic(x)"
      }
      else if (output.type == "markdown") {
        eq.x.rhs <- "_x_"
      }
      else {
        eq.x.rhs <- " x"
      }
    }
    group.idx <- abs(data$group[1])
    if (length(label.x) >= group.idx) {
      label.x <- label.x[group.idx]
    }
    else if (length(label.x) > 0) {
      label.x <- label.x[1]
    }
    if (length(label.y) >= group.idx) {
      label.y <- label.y[group.idx]
    }
    else if (length(label.y) > 0) {
      label.y <- label.y[1]
    }
    if (length(eq.with.lhs) >= group.idx) {
      eq.with.lhs <- eq.with.lhs[group.idx]
    }
    lm.args <- list(quote(formula), data = quote(data), weights = quote(weight))
    mf <- do.call(stats::lm, lm.args)
    mf.summary <- summary(mf)
    rr <- mf.summary$r.squared
    adj.rr <- mf.summary$adj.r.squared
    AIC <- AIC(mf)
    BIC <- BIC(mf)
    if ("fstatistic" %in% names(mf.summary)) {
      f.value <- mf.summary$fstatistic["value"]
      f.df1 <- mf.summary$fstatistic["numdf"]
      f.df2 <- mf.summary$fstatistic["dendf"]
      p.value <- 1 - stats::pf(q = f.value, f.df1, f.df2)
    }
    else {
      f.value <- f.df1 <- f.df2 <- p.value <- NA_real_
    }
    if (output.type == "numeric") {
      z <- tibble::tibble(coef.ls = list(summary(mf)[["coefficients"]]), 
                          coefs = list(stats::coefficients(mf)), r.squared = rr, 
                          adj.r.squared = adj.rr, f.value = f.value, f.df1 = f.df1, 
                          f.df2 = f.df2, p.value = p.value, AIC = AIC, BIC = BIC, 
                          rr.label = "")
    }
    else {
      coefs <- stats::coef(mf)
      formula.rhs.chr <- as.character(formula)[3]
      if (grepl("-1", formula.rhs.chr) || grepl("- 1", formula.rhs.chr)) {
        coefs <- c(0, coefs)
      }
      stopifnot(coef.digits > 0)
      if (coef.digits < 3) {
        warning("'coef.digits < 3' Likely information loss!")
      }
      eq.char <- as.character(signif(polynom::as.polynomial(coefs), 
                                     coef.digits))
      eq.char <- gsub("+ x", paste("+ 1.", 
                                   stringr::str_dup("0", coef.digits - 1L), 
                                   "*x", sep = ""),
                      eq.char, fixed = TRUE)
      eq.char <- gsub("e([+-]?[0-9]*)", "%*%10^{\\1}", eq.char)
      if (output.type %in% c("latex", "tex", "tikz", "markdown")) {
        eq.char <- gsub("*", " ", eq.char, fixed = TRUE)
      }
      if (is.character(eq.with.lhs)) {
        lhs <- eq.with.lhs
        eq.with.lhs <- TRUE
      }
      else if (eq.with.lhs) {
        if (output.type == "expression") {
          lhs <- "italic(y)~`=`~"
        }
        else if (output.type %in% c("latex", "tex", "tikz", "text")) {
          lhs <- "y = "
        }
        else if (output.type == "markdown") {
          lhs <- "_y_ = "
        }
      }
      if (eq.with.lhs) {
        eq.char <- paste(lhs, eq.char, sep = "")
      }
      stopifnot(rr.digits > 0)
      if (rr.digits < 2) {
        warning("'rr.digits < 2' Likely information loss!")
      }
      stopifnot(p.digits > 0)
      if (f.digits < 2) {
        warning("'f.digits < 2' Likely information loss!")
      }
      stopifnot(p.digits > 0)
      if (p.digits < 2) {
        warning("'p.digits < 2' Likely information loss!")
      }
      rr.char <- format(rr, digits = rr.digits)
      adj.rr.char <- format(adj.rr, digits = rr.digits)
      AIC.char <- sprintf("%.4g", AIC)
      BIC.char <- sprintf("%.4g", BIC)
      f.value.char <- as.character(signif(f.value, digits = f.digits))
      f.df1.char <- as.character(f.df1)
      f.df2.char <- as.character(f.df2)
      p.value.char <- as.character(signif(p.value, digits = p.digits))
      if (output.type == "expression") {
        z <- tibble::tibble(eq.label = gsub("x", eq.x.rhs, eq.char, fixed = TRUE), 
                            rr.label = paste("italic(R)^2", rr.char, sep = "~`=`~"), 
                            adj.rr.label = paste("italic(R)[adj]^2", 
                                                 adj.rr.char, sep = "~`=`~"),
                            AIC.label = paste("AIC", AIC.char, sep = "~`=`~"), 
                            BIC.label = paste("BIC", BIC.char, sep = "~`=`~"),
                            f.value.label = ifelse(is.na(f.value), character(0L), 
                                                   paste("italic(F)[", f.df1.char, 
                                                         "*\",\"*", f.df2.char, "]~`=`~", f.value.char, 
                                                         sep = "")), 
                            p.value.label = ifelse(is.na(p.value), character(0L), 
                                                   paste("italic(P)", ifelse(p.value < 0.001, "0.001", p.value.char), 
                                                         sep = ifelse(p.value <  0.001, "~`<=`~", "~`=`~"))), 
                            grp.label = grp.label)
      }
      else if (output.type %in% c("latex", "tex", "text", "tikz")) {
        z <- tibble::tibble(eq.label = gsub("x", eq.x.rhs, 
                                            eq.char, fixed = TRUE),
                            rr.label = paste("R^2", rr.char, sep = " = "), 
                            adj.rr.label = paste("R_{adj}^2", adj.rr.char, sep = " = "), 
                            AIC.label = paste("AIC", AIC.char, sep = " = "), 
                            BIC.label = paste("BIC", BIC.char, sep = " = "),
                            f.value.label = ifelse(is.na(f.value), character(0L), 
                                                   paste("F_{", f.df1.char, ",", f.df2.char, "} = ", f.value.char, sep = "")), 
                            p.value.label = ifelse(is.na(p.value), character(0L), 
                                                   paste("P", p.value.char, 
                                                         sep = ifelse(p.value < 0.001, " \\leq ", " = "))),
                            grp.label = grp.label)
      }
      else if (output.type == "markdown") {
        z <- tibble::tibble(eq.label = gsub("x", eq.x.rhs, eq.char, fixed = TRUE),
                            rr.label = paste("_R_<sup>2</sup>", rr.char, sep = " = "), 
                            adj.rr.label = paste("_R_<sup>2</sup><sub>adj</sub>", 
                                                 adj.rr.char, sep = " = "),
                            AIC.label = paste("AIC", AIC.char, sep = " = "), 
                            BIC.label = paste("BIC", BIC.char, sep = " = "), 
                            f.value.label = ifelse(is.na(f.value), character(0L), 
                                                   paste("_F_<sub>", f.df1.char, ",", f.df2.char, "</sub> = ", f.value.char, 
                                                         sep = "")), 
                            p.value.label = ifelse(is.na(p.value), character(0L), 
                                                   paste("_P_", p.value.char, 
                                                         sep = ifelse(p.value < 0.001, " &le; ", " = "))), 
                            grp.label = grp.label)
      }
      else {
        warning("Unknown 'output.type' argument: ", output.type)
      }
    }
    if (npc.used) {
      margin.npc <- 0.05
    }
    else {
      margin.npc <- 0
    }
    if (is.character(label.x)) {
      label.x <- compute_npcx(x = label.x, group = group.idx, 
                              h.step = hstep, margin.npc = margin.npc)
      if (!npc.used) {
        x.expanse <- abs(diff(range(data$x)))
        x.min <- min(data$x)
        label.x <- label.x * x.expanse + x.min
      }
    }
    if (is.character(label.y)) {
      label.y <- compute_npcy(y = label.y, group = group.idx, 
                              v.step = vstep, margin.npc = margin.npc)
      if (!npc.used) {
        y.expanse <- abs(diff(range(data$y)))
        y.min <- min(data$y)
        label.y <- label.y * y.expanse + y.min
      }
    }
    if (npc.used) {
      z$npcx <- label.x
      z$x <- NA_real_
      z$npcy <- label.y
      z$y <- NA_real_
    }
    else {
      z$x <- label.x
      z$npcx <- NA_real_
      z$y <- label.y
      z$npcy <- NA_real_
    }
    z
  })

Modifications from the original:

  • stat_poly_eq2: stat = StatPolyEq2 instead of stat = StatPolyEq;

  • StatPolyEq2: added if (length(eq.with.lhs) >= group.idx) {eq.with.lhs <- eq.with.lhs[group.idx]} clause in lines 58-60, so that if multiple equation expressions are given, only the one whose order corresponds to the specific group is used.

Upvotes: 2

Related Questions