Align IRR points on a forest plot with their respective table rows in R?

Question

I am struggling to get the rows in my table to align correctly with their respective IRR points on the plot (e.g., values for A align with the IRR point for A on the plot).

Specifically, I am creating a forest plot that includes a graph and corresponding tables of data. Each point on the forest plot represents an incidence rate ratio (IRR) based on their exposure to a given recreational substance. There are two groups for each substance. The two groups for cannabis exposure are A and B, groups for nicotine are C and D, and groups for alcohol are E and F. The two columns of data have the adjusted and unadjusted IRR values.

Here is my current plot below:

And here is the code:


# Load necessary libraries
library(tibble)
library(ggplot2)
library(dplyr)
library(forcats)
library(stringr)
library(patchwork) # For plot layout

# Define data
res <- tibble(
  model = c("Cannabis", "A", "B", "Nicotine", "C", "D", "Alcohol", "E", "F"),
  estimate = c(NA, 1.08, 1.01, NA, 1.06, 1.07, NA, 1.09, 1.22),
  conf.low = c(NA, 1.02, .99, NA, 1.04, 1.03, NA, 1.07, 1.11),
  conf.high = c(NA, 1.14, 1.05, NA, 1.08, 1.11, NA, 1.11, 1.33),
  estimate2 = c(NA, 1.09, 1.02, NA, 1.03, 1.06, NA, 1.07, 1.2),
  conf.low2 = c(NA, 1.03, .99, NA, 1.01, 1.04, NA, 1.05, 1.1),
  conf.high2 = c(NA, 1.15, 1.05, NA, 1.05, 1.08, NA, 1.09, 1.3)
)

# Convert 'model' to a factor with the specified level order and reverse it
res$model <- factor(res$model, levels = rev(c("Cannabis", "A", "B", "Nicotine", "C", "D", "Alcohol", "E", "F")))

# Create forest plot on log scale (middle section of figure)
p_right <- res %>%
  ggplot(aes(y = model)) + # Use 'model' as y with the reversed factor
  theme_classic() +
  # Plot confidence intervals only for non-NA values
  geom_linerange(data = subset(res, !is.na(estimate)), aes(xmin = conf.low, xmax = conf.high)) +
  # Plot points only for non-NA values
  geom_point(data = subset(res, !is.na(estimate)), aes(x = estimate),
             color = "black", fill = c("cyan2", "cyan2", "orange", "orange", "purple", "purple"), 
             shape = 21, size = 3, stroke = 0.5, position = position_dodge(width = 0.5)) +
  labs(x = "Incidence Rate Ratio") +
  coord_cartesian(ylim = c(1, 9), xlim = c(.5, 1.5)) +
  geom_vline(xintercept = 1, linetype = "dashed") +
  annotate("text", x = 1.3, y = 9, label = "Higher Risk") +
  annotate("text", x = .7, y = 9, label = "Lower Risk") +
  theme(
    axis.line.y = element_blank(),
    axis.ticks.y = element_blank(),
    axis.text.y = element_text(size = 10), # Display the y-axis text for readability
    axis.title.y = element_blank()
  )

# Wrangle results into pre-plotting table form for middle-side labels
res_plot <- res %>%
  mutate(
    across(c(estimate, conf.low, conf.high), ~ str_pad(round(.x, 2), width = 4, pad = "0", side = "right")),
    estimate_lab = paste0(estimate, " (", conf.low, "-", conf.high, ")")
  )


# Wrangle results into pre-plotting table form for left-side labels
res_plot2 <- res %>%
  mutate(
    across(c(estimate2, conf.low2, conf.high2), ~ str_pad(round(.x, 2), width = 4, pad = "0", side = "right")),
    estimate_lab2 = paste0(estimate2, " (", conf.low2, "-", conf.high2, ")")
  )

# Middle side of plot 
p_mid <- res_plot %>%
  ggplot(aes(y = model)) +
  geom_text(aes(x = 1, label = ifelse(!is.na(estimate), estimate_lab, "")),
            hjust = 0) +
  theme_void() +
  coord_cartesian(xlim = c(0, 4))

# Left side of plot
p_left <- res_plot2 %>%
  ggplot(aes(y = model)) +
  geom_text(aes(x = 0, label = model), hjust = 0, fontface = "bold") +
  # Only show estimate_lab text when it is not NA
  geom_text(aes(x = 1, label = ifelse(!is.na(estimate2), estimate_lab2, "")),
            hjust = 0) +
  theme_void() +
  coord_cartesian(xlim = c(0, 4))

# Layout design (top, left, bottom, right)
layout <- c(
  area(t = 0, l = 2, b = 10, r = 3),
  area(t = 0, l = 2, b = 10, r = 9),
  area(t = 0, l = 6, b = 10, r = 11)
)

# Final plot arrangement
final_plot <- p_left + p_mid + p_right + plot_layout(design = layout)
final_plot

Any advice on how to get the points on the plot to align with their rows would be greatly appreciated. Thank you so much!

stefan · Accepted Answer

The issue is that your main plot does not show the header rows of your data. To fix that add + scale_y_discrete(drop = FALSE) and use the labels= argument to not show a label for the header rows. Also note that I merged the left and middle plot in one plot.

library(tidyverse)
library(patchwork)

labels_y <- res |>
  distinct(model, estimate) |>
  mutate(estimate = if_else(is.na(estimate), "", model)) |>
  tibble::deframe()

p_right <- res %>%
  ggplot(aes(y = model)) + # Use 'model' as y with the reversed factor
  theme_classic() +
  # Plot confidence intervals only for non-NA values
  geom_linerange(data = subset(res, !is.na(estimate)), aes(xmin = conf.low, xmax = conf.high)) +
  # Plot points only for non-NA values
  geom_point(
    data = subset(res, !is.na(estimate)), aes(x = estimate),
    color = "black", fill = c("cyan2", "cyan2", "orange", "orange", "purple", "purple"),
    shape = 21, size = 3, stroke = 0.5, position = position_dodge(width = 0.5)
  ) +
  scale_y_discrete(
    drop = FALSE,
    labels = labels_y
  ) +
  labs(x = "Incidence Rate Ratio") +
  coord_cartesian(ylim = c(1, 9), xlim = c(.5, 1.5)) +
  geom_vline(xintercept = 1, linetype = "dashed") +
  annotate("text", x = 1.3, y = 9, label = "Higher Risk") +
  annotate("text", x = .7, y = 9, label = "Lower Risk") +
  theme(
    axis.line.y = element_blank(),
    axis.ticks.y = element_blank(),
    axis.text.y = element_text(size = 10),
    axis.title.y = element_blank()
  )

# Left side of plot
p_left <- res_plot %>%
  ggplot(aes(y = model)) +
  geom_text(aes(x = 1, label = ifelse(!is.na(estimate2), estimate_lab, "")),
    hjust = 0
  ) +
  scale_x_continuous(expand = c(0, 0)) +
  theme_void() +
  theme(axis.text.y.left = element_text(face = "bold", hjust = 0)) +
  coord_cartesian(xlim = c(0, 4))

# Layout design (top, left, bottom, right)
layout <- c(
  area(t = 0, l = 0, b = 10, r = 4),
  area(t = 0, l = 5, b = 10, r = 10)
)

# Final plot arrangement
final_plot <- p_left + p_right + plot_layout(design = layout)
final_plot

Align IRR points on a forest plot with their respective table rows in R?

Answers (1)

Related Questions