Adding p-values to ggplot; ggsignif says it can only handle data with groups that are plotted on the x-axis

Question

I have data as follows, to which I am trying to add p-values:

library(ggplot2)
library(ggsignif)
library(dplyr)
data <- structure(list(treatment = c(0, 1, 0, 1, 0, 1, 0, 1, 0, 1), New_Compare_Truth = c(57, 
61, 12, 14, 141, 87, 104, 90, 12, 14), total_Hy = c(135, 
168, 9, 15, 103, 83, 238, 251, 9, 15), total = c(285, 305, 60, 
70, 705, 435, 520, 450, 60, 70), ratio = c(47.3684210526316, 
55.0819672131148, 15, 21.4285714285714, 14.6099290780142, 19.0804597701149, 
45.7692307692308, 55.7777777777778, 15, 21.4285714285714), Type = structure(c(2L, 
2L, 1L, 1L, 3L, 3L, 5L, 5L, 4L, 4L), .Label = c("A1. Others 
More 
H", 
"A2. Similar 
Norm", "A3. Others 
Less 
H", "B1. Others 
More 
H", 
"B2. Similar 
Norm or 
Higher"), class = "factor"), `Sample Selection` = c("Answers pr", 
"Answers pu", "Answers pr", "Answers pu", "Answers pr", 
"Answers pu", "Answers pr", "Answers pu", "Answers pr", 
"Answers pu"), p_value = c(0.0610371842601616, 0.0610371842601616, 
0.346302201593934, 0.346302201593934, 0.0472159407450147, 0.0472159407450147, 
0.0018764377521242, 0.0018764377521242, 0.346302201593934, 0.346302201593934
), x = c(2, 2, 1, 1, 3, 3, 5.5, 5.5, 4.5, 4.5)), row.names = c(NA, 
-10L), class = c("data.table", "data.frame"))

breaks_labels <- structure(list(Type = structure(c(2L, 1L, 3L, 5L, 4L), .Label = c("A1. Others 
More 
H", 
"A2. Similar 
Norm", "A3. Others 
Less 
H", "B1. Others 
More 
H", 
"B2. Similar 
Norm or 
Higher"), class = "factor"), x = c(2, 
1, 3, 5.5, 4.5)), row.names = c(NA, -5L), class = c("data.table", 
"data.frame"))

data %>% 
  ggplot(aes(x = x, y = ratio)) + 
  geom_col(aes(fill = `Sample Selection`), position = position_dodge(preserve = "single"), na.rm = TRUE) + 
  geom_text(position = position_dodge(width = .9),    # move to center of bars
            aes(label=sprintf("%.02f %%", round(ratio, digits = 1)), group = `Sample Selection`),
            vjust = -1.5,    # nudge above top of bar
            size = 4, 
            na.rm = TRUE) +        
  # geom_text(position = position_dodge(width = .9),    # move to center of bars
  #           aes(label= paste0("(", ifelse(variable == "Crime = 0", `Observation for Crime = 0`, `Observation for Crime = 1`), ")"), group = `Sample Selection`),
  #           vjust = -0.6,    # nudge above top of bar
  #           size = 4, 
  #           na.rm = TRUE) +     
    scale_fill_grey(start = 0.8, end = 0.5) +
    scale_y_continuous(expand = expansion(mult = c(0, .1))) +
    scale_x_continuous(breaks = breaks_labels$x, labels = breaks_labels$Type) +
    theme_bw(base_size = 15) + 
    xlab("Norm group for corporate Hy") +
    ylab("Percentage Compliant Decisions") +
    theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
    geom_signif(annotation=c("p=0.35", "p=0.06", "p=0.05", "p=0.34", "p=0.00"), y_position = c(30, 40, 55 ,75, 90), xmin=c(0.75,1.75,2.75,3.75,4.75),
              xmax=c(1.25,2.25,3.25,4.25,5.25))

For some reason, the last line causes the following error:

Error in f(...) : 
  Can only handle data with groups that are plotted on the x-axis

Since I am just putting in text and not referring to any variable, I don't really understand why this happens. Can anyone help me out? Without the last line it looks like this:

EDIT: Please note that I would like to keep the space between the third and the fourth column (which is apparently also what caused the problem, see Jared's answer).

jared_mamrot · Accepted Answer

Edit

Thanks for clarifying your expected outcome. Here is one way to include geom_signif() annotations without altering the original plot:

library(tidyverse)
library(ggsignif)
data <- structure(list(treatment = c(0, 1, 0, 1, 0, 1, 0, 1, 0, 1), New_Compare_Truth = c(57, 
                                                                                          61, 12, 14, 141, 87, 104, 90, 12, 14), total_Hy = c(135, 
                                                                                                                                              168, 9, 15, 103, 83, 238, 251, 9, 15), total = c(285, 305, 60, 
                                                                                                                                                                                               70, 705, 435, 520, 450, 60, 70), ratio = c(47.3684210526316, 
                                                                                                                                                                                                                                          55.0819672131148, 15, 21.4285714285714, 14.6099290780142, 19.0804597701149, 
                                                                                                                                                                                                                                          45.7692307692308, 55.7777777777778, 15, 21.4285714285714), Type = structure(c(2L, 
                                                                                                                                                                                                                                                                                                                        2L, 1L, 1L, 3L, 3L, 5L, 5L, 4L, 4L), .Label = c("A1. Others 
More 
H", 
                                                                                                                                                                                                                                                                                                                                                                        "A2. Similar 
Norm", "A3. Others 
Less 
H", "B1. Others 
More 
H", 
                                                                                                                                                                                                                                                                                                                                                                        "B2. Similar 
Norm or 
Higher"), class = "factor"), `Sample Selection` = c("Answers pr", 
                                                                                                                                                                                                                                                                                                                                                                                                                                                     "Answers pu", "Answers pr", "Answers pu", "Answers pr", 
                                                                                                                                                                                                                                                                                                                                                                                                                                                     "Answers pu", "Answers pr", "Answers pu", "Answers pr", 
                                                                                                                                                                                                                                                                                                                                                                                                                                                     "Answers pu"), p_value = c(0.0610371842601616, 0.0610371842601616, 
                                                                                                                                                                                                                                                                                                                                                                                                                                                                                0.346302201593934, 0.346302201593934, 0.0472159407450147, 0.0472159407450147, 
                                                                                                                                                                                                                                                                                                                                                                                                                                                                                0.0018764377521242, 0.0018764377521242, 0.346302201593934, 0.346302201593934
                                                                                                                                                                                                                                                                                                                                                                                                                                                     ), x = c(2, 2, 1, 1, 3, 3, 5.5, 5.5, 4.5, 4.5)), row.names = c(NA, 
                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                    -10L), class = c("data.table", "data.frame"))

breaks_labels <- structure(list(Type = structure(c(2L, 1L, 3L, 5L, 4L), .Label = c("A1. Others 
More 
H", 
                                                                                   "A2. Similar 
Norm", "A3. Others 
Less 
H", "B1. Others 
More 
H", 
                                                                                   "B2. Similar 
Norm or 
Higher"), class = "factor"), x = c(2, 
                                                                                                                                               1, 3, 5.5, 4.5)), row.names = c(NA, -5L), class = c("data.table", 
                                                                                                                                                                                                   "data.frame"))
annotation_df <- data.frame(signif = c("p=0.35", "p=0.06", "p=0.05", "p=0.34", "p=0.00"),
                            y_position = c(30, 40, 55 ,75, 90),
                            xmin = c(0.75,1.75,2.75,4.25,5.25),
                            xmax = c(1.25,2.25,3.25,4.75,5.75),
                            group = c(1,2,3,4,5))

data %>% 
  ggplot(aes(x = x, y = ratio, group = `Sample Selection`)) + 
  geom_col(aes(fill = `Sample Selection`),
           position = position_dodge(preserve = "single"), na.rm = TRUE) + 
  geom_text(position = position_dodge(width = .9),    # move to center of bars
            aes(label=sprintf("%.02f %%", round(ratio, digits = 1))),
            vjust = -1.5,    # nudge above top of bar
            size = 4, 
            na.rm = TRUE) +
  scale_fill_grey(start = 0.8, end = 0.5) +
  scale_y_continuous(expand = expansion(mult = c(0, .1))) +
  scale_x_continuous(breaks = breaks_labels$x, labels = breaks_labels$Type) +
  theme_bw(base_size = 15) + 
  xlab("Norm group for corporate Hy") +
  ylab("Percentage Compliant Decisions") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  geom_signif(aes(xmin = xmin,
                  xmax = xmax,
                  y_position = y_position,
                  annotations = signif,
                  group = group),
    data = annotation_df, manual = TRUE)
#> Warning: Ignoring unknown aesthetics: xmin, xmax, y_position, annotations

^{Created on 2021-07-20 by the reprex package (v2.0.0)}

Previous answer

One potential solution to your problem is to plot "Type" on the x axis instead of "x", e.g.

data %>% 
  ggplot(aes(x = Type, y = ratio)) + 
  geom_col(aes(fill = `Sample Selection`),
           position = position_dodge(preserve = "single"), na.rm = TRUE) + 
  geom_text(position = position_dodge(width = .9),    # move to center of bars
            aes(label=sprintf("%.02f %%", round(ratio, digits = 1)),
                group = `Sample Selection`),
            vjust = -1.5,
            size = 4, 
            na.rm = TRUE) +
  scale_fill_grey(start = 0.8, end = 0.5) +
  scale_y_continuous(expand = expansion(mult = c(0, .1))) +
  theme_bw(base_size = 15) + 
  xlab("Norm group for corporate Hy") +
  ylab("Percentage Compliant Decisions") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  geom_signif(annotation=c("p=0.35", "p=0.06", "p=0.05", "p=0.34", "p=0.00"),
              y_position = c(30, 40, 55 ,75, 90),
              xmin=c(0.75,1.75,2.75,3.75,4.75),
              xmax=c(1.25,2.25,3.25,4.25,5.25))

Adding p-values to ggplot; ggsignif says it can only handle data with groups that are plotted on the x-axis

Answers (1)

Edit

Previous answer

Related Questions