Kevin Lenzi
Kevin Lenzi

Reputation: 51

Add P-value Bracket to Tumor Growth ggplot

I would like to add p-value brackets to a tumor growth curve plot I have made in ggplot. This is a common situation that GraphPad Prism has built in functionality for, but I have not seen any examples of this using ggplot when I searched.

From what I can tell, existing R functions like ggpubr::stat_compare_means, ggpubr::stat_pvalue_manual, or ggprism::add_pvalue seem to rely on the groups being compared to be plotted on the x-axis. However, for tumor growth curves, time is the x-axis, and the groups being compared are specified in the legend.

I have included example figure from a publication that was made using GraphPad Prism as well as what I have generated in ggplot.

I would like to add the p-value brackets in the same manner. Preferably this could be an automatic process, so I am not specifying coordinates for every plot going forward.

Tumor growth curve made in GraphPad Prism with desired p-value brackets

library(tidyverse)
library(scales)

tumorDataExample <- data.frame(
  Mouse = rep(1:15, times = 5),
  Condition = rep(c("A", "B", "C"), each = 5, times = 5),
  Day = rep(c(0, 8, 10, 11, 14), each = 15),
  Volume = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
             55.419392, 63.819999, 9.622816, 43.9999665, 34.792254, 5.470524, 23.516325, 
             49.9453705, 9.7540625, 16.9136, 13.1383, 36.872528, 18.108342, 48.700748, 
             13.197388, 86.979584, 76.9613405, 36.452668, 20.9408, 26.978522, 24.628864, 
             6.397264, 132.420195, 18.3314305, 11.1797745, 5.0688, 29.738592, 17.87125, 
             32.8653, 19.9408, 135.7811505, 99.900178, 60.76815, 25.6632, 33.519087, 
             25.1935245, 12.581856, 294.5889, 38.69775, 15.056676, 7.7533775, 21.625065, 
             19.62, 55.118336, 16.1063615, 200.485611, 162.4776305, 104.56355, 40.106664, 
             66.2195385, 16.143108, 4.2439, 886.51492, 17.37468, 15.867072, 7.791552, 
             11.3016465, 37.626644, 74.365425, 4.080501)
)

tumorDataExampleSummary <- tumorDataExample %>%
  group_by(Day, Condition) %>%
  summarize(meanVolume = mean(Volume, na.rm = TRUE),
            se = sd(Volume) / sqrt(n()))

tumorSizeExample_Summary <- ggplot() +
  geom_line(data = tumorDataExampleSummary, aes(x = Day, y = meanVolume, color = Condition)) +
  geom_errorbar(data = tumorDataExampleSummary, aes(x = Day, ymin = meanVolume - se, ymax = meanVolume + se, color = Condition), width = 0.1) +
  geom_point(data = tumorDataExampleSummary, aes(x = Day, y = meanVolume, color = Condition)) +
  scale_y_continuous(breaks = pretty_breaks(n=8)) +
  scale_x_continuous(breaks = pretty_breaks(n=3)) +
  labs(title = "Tumor Volumes Over Time",
       x = "Days After Tumor Injection",
       y = expression("Tumor Volume (mm"^3*")")) +
  theme_minimal()

tumorSizeExample_Summary

Tumor Growth Curve made with ggplot

Upvotes: 3

Views: 78

Answers (2)

Kevin Lenzi
Kevin Lenzi

Reputation: 51

This code works as expected. It runs a Student's T test and plots it with brackets on the last day of measurements.

library(tidyverse)
library(scales)
library(gtools)


tumorDataExample <- data.frame(
  Mouse = rep(1:15, times = 5),
  Condition = rep(c("A", "B", "C"), each = 5, times = 5),
  Day = rep(c(0, 8, 10, 11, 14), each = 15),
  Volume = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
             55.419392, 63.819999, 9.622816, 43.9999665, 34.792254, 5.470524, 23.516325, 
             49.9453705, 9.7540625, 16.9136, 13.1383, 36.872528, 18.108342, 48.700748, 
             13.197388, 86.979584, 76.9613405, 36.452668, 20.9408, 26.978522, 24.628864, 
             6.397264, 132.420195, 18.3314305, 11.1797745, 5.0688, 29.738592, 17.87125, 
             32.8653, 19.9408, 135.7811505, 99.900178, 60.76815, 25.6632, 33.519087, 
             25.1935245, 12.581856, 294.5889, 38.69775, 15.056676, 7.7533775, 21.625065, 
             19.62, 55.118336, 16.1063615, 200.485611, 162.4776305, 104.56355, 40.106664, 
             66.2195385, 16.143108, 4.2439, 886.51492, 17.37468, 15.867072, 7.791552, 
             11.3016465, 37.626644, 74.365425, 4.080501)
)

#Create a summary table of the mean volume for each condition
tumorDataExampleSummary <- tumorDataExample %>%
  group_by(Day, Condition) %>%
  summarize(meanVolume = mean(Volume, na.rm = TRUE),
            se = sd(Volume) / sqrt(n()))

#Run a Student's T test on all the conditions
stat.test <- compare_means(
  Volume ~ Condition, data = tumorDataExample %>% filter(Day == max(tumorDataExample$Day)),
  method = "t.test",
  var.equal = TRUE #Student's T test
)
stat.test


#Create a list of all possible pairwise comparisons of Conditions
paired_columns <- gtools::combinations(n = 3, r = 2, v = unique(tumorDataExample$Condition) , 
                     repeats.allowed = FALSE) 

#Create a list of the p_values for each comparison in the order of the paired_columns
p_value_labs <- lapply(seq_along(unique(tumorDataExample$Condition)), function(i){
  paste0(round(stat.test$p[i], 3))
}) %>% unlist()

#Create a table of x positions for the brackets
#These will really be y values, but the coordinates are flipped in the plot
#This uses the meanVolume of each Condition for the last day of measurements
pvalue_bracket_xpos <- tumorDataExampleSummary  %>%
  filter(Day == max(tumorDataExampleSummary$Day)) %>%
  pivot_wider(id_cols = Day, names_from = Condition, 
              values_from = meanVolume)

#Create the plot
#x and y have to be flipped in aes() for the brackets to work
ggplot() +
  geom_path(data = tumorDataExampleSummary %>% arrange(Day), 
            aes(y = Day, x = meanVolume, color = Condition)) + #Need to use geom_path instead of geom_line
                                                              #Otherwise the lines will be connected in tumor volume order, not day
  geom_errorbarh(data = tumorDataExampleSummary, 
                 aes(y = Day, xmin = meanVolume - se, 
                     xmax = meanVolume + se, color = Condition), 
                 height = 0.1) +
  geom_point(data = tumorDataExampleSummary, 
             aes(y = Day, x = meanVolume, color = Condition)) +
  labs(title = "Tumor Volumes Over Time",
       y = "Days After Tumor Injection",
       x = expression("Tumor Volume (mm"^3*")")) +
  theme_minimal() +
  coord_flip() + ## x and y are flipped in aes(), so we flip them again
  ggpubr::geom_bracket(inherit.aes = FALSE, label = p_value_labs, 
                       coord.flip = TRUE, 
                       xmin = unlist(pvalue_bracket_xpos[,paired_columns[,1]]), 
                       xmax = unlist(pvalue_bracket_xpos[,paired_columns[,2]]), 
                       y.position = pvalue_bracket_xpos$Day + 0.3,
                       step.increase = 0.03,
                       tip.length = 0.02,
                       vjust = 0.25) +
  scale_x_continuous(breaks = pretty_breaks(n=8)) +
  scale_y_continuous(breaks = pretty_breaks(n=3))

Upvotes: 0

M--
M--

Reputation: 29153

library(tidyverse)
library(scales)

tumorDataExampleSummary <- tumorDataExample %>%
  summarize(meanVolume = mean(Volume, na.rm = TRUE),
            se = sd(Volume) / sqrt(n()), 
            .by = c(Day, Condition)) ## notice the use of .by

gtools::combinations(n = 3, r = 2, v = unique(tumorDataExample$Condition) , 
                     repeats.allowed = FALSE) -> paired_columns

tumorDataExampleSummary |> 
  pivot_wider(id_cols = -c(Day, se), names_from = Condition, 
              values_from = meanVolume, values_fn = list) |>
  (\(.) apply(paired_columns, 1, \(r){ 
    t.test(unlist(.[r[1]]), 
           unlist(.[r[2]]), paired = TRUE)}))() |>
  (\(.) lapply(seq_along(unique(tumorDataExample$Condition)), \(i){
    paste0(paired_columns[i, 1], "/", paired_columns[i, 2], 
                          "=", round(.[[i]]$p.value, 2))}))() |> 
  unlist() -> p_value_labs

tumorDataExampleSummary |> 
  filter(Day == max(Day)) |>
  pivot_wider(id_cols = Day, names_from = Condition, 
              values_from = meanVolume) -> x_pos
ggplot() +
  geom_line(data = tumorDataExampleSummary, 
            aes(y = Day, x = meanVolume, color = Condition)) +
  geom_errorbarh(data = tumorDataExampleSummary, 
                 aes(y = Day, xmin = meanVolume - se, 
                     xmax = meanVolume + se, color = Condition), 
                 height = 0.1) +
  geom_point(data = tumorDataExampleSummary, 
             aes(y = Day, x = meanVolume, color = Condition)) +
  labs(title = "Tumor Volumes Over Time",
       y = "Days After Tumor Injection",
       x = expression("Tumor Volume (mm"^3*")")) +
  theme_minimal() +
  coord_flip() + ## x and y are flipped in aes(), so we flip them again
  ggpubr::geom_bracket(inherit.aes = FALSE, label = p_value_labs, 
                       coord.flip = TRUE, 
                       xmin = unlist(x_pos[,paired_columns[,1]]), 
                       xmax = unlist(x_pos[,paired_columns[,2]]), 
                       y.position = x_pos$Day + 0.1,
                       step.increase = 0.03,
                       tip.length = 0.02,
                       vjust = 0.25) +
  scale_x_continuous(breaks = pretty_breaks(n=8)) +
  scale_y_continuous(breaks = pretty_breaks(n=3))

Created on 2025-02-26 with reprex v2.1.1

Upvotes: 1

Related Questions