fixed fill for different sections of a density plot with ggplot

Question

Given draws from a rnorm, and cutoff c I want my plot to use the following colors:

Red for the section that is to the left of -c
Blue for the section in between -c and c
and Green for the section that is to the right of c

For example, if my data is:

set.seed(9782)
mydata <- rnorm(1000, 0, 2)
c <- 1

I want to plot something like this:

But if my data is all to the right of c the whole plot should be green. Similarly, if all is between -c and c or to the left of -c the plot should be all red or blue.

This is the code I wrote:

MinD <- min(mydata)
MaxD <- max(mydata)

df.plot <- data.frame(density = mydata)

if(c==0){
  case <- dplyr::case_when((MinD < 0 & MaxD >0) ~ "L_and_R",
                           (MinD > 0) ~ "R",
                           (MaxD < 0) ~ "L")
}else{
  case <- dplyr::case_when((MinD < -c & MaxD >c) ~ "ALL",
                           (MinD > -c & MaxD > c) ~ "Center_and_R",
                           (MinD > -c & MaxD  c ~ "R")
}

# Draw the Center

if(case %in% c("ALL", "Center_and_R", "Center", "Center_and_L")){
  ds <- density(df.plot$density, from = -c, to = c)
  ds_data_Center <- data.frame(x = ds$x, y = ds$y, section="Center")
} else{
  ds_data_Center <- data.frame(x = NA, y = NA, section="Center")
}

# Draw L

if(case %in% c("ALL", "Center_and_L", "L", "L_and_R")){
  ds <- density(df.plot$density, from = MinD, to = -c)
  ds_data_L <- data.frame(x = ds$x, y = ds$y, section="L")
} else{
  ds_data_L <- data.frame(x = NA, y = NA, section="L")
}

# Draw R

if(case %in% c("ALL", "Center_and_R", "R", "L_and_R")){
  ds <- density(df.plot$density, from = c, to = MaxD)
  ds_data_R <- data.frame(x = ds$x, y = ds$y, section="R")
} else{
  ds_data_R <- data.frame(x = NA, y = NA, section="R")
}

L_Pr <- round(mean(mydata < -c),2)
Center_Pr <- round(mean((mydata>-c & mydata c),2)

filldf <- data.frame(section = c("L", "Center", "R"), 
                     Pr = c(L_Pr, Center_Pr, R_Pr), 
                     fill = c("red", "blue", "green")) %>% 
  dplyr::mutate(section = as.character(section))


if(c==0){
  ds_data <- suppressWarnings(dplyr::bind_rows(ds_data_L, ds_data_R)) %>% 
    dplyr::full_join(filldf, by = "section") %>% filter(Pr!=0) %>% 
    dplyr::full_join(filldf, by = "section") %>% mutate(section = ordered(section, levels=c("L","R"))) 
  ds_data <- ds_data[order(ds_data$section), ] %>%  
    filter(Pr!=0) %>% 
    mutate(Pr=scales::percent(Pr))
}else{
  ds_data <- suppressWarnings(dplyr::bind_rows(ds_data_Center, ds_data_L, ds_data_R)) %>% 
    dplyr::full_join(filldf, by = "section") %>% mutate(section = ordered(section, levels=c("L","Center","R"))) 
  ds_data <- ds_data[order(ds_data$section), ] %>%  
    filter(Pr!=0) %>% 
    mutate(Pr=scales::percent(Pr))
}

fillScale <- scale_fill_manual(name = paste0("c = ", c, ":"),
                               values = as.character(unique(ds_data$fill)))

p <- ggplot(data = ds_data, aes(x=x, y=y, fill=Pr)) + 
  geom_area() + fillScale

Alas, I cannot figure out how to assign the colors to the different sections while keeping the percentages as labels for the colors.

eipi10 · Accepted Answer

We use the density function to create the data frame we'll actually plot. Then, We use the cut function to create groups using ranges of the data values. Finally, we calculate the probability mass for each group and use those as the actual legend labels.

We also create a labeled vector of colors to ensure that the same color always goes with a given range of x-values, regardless of whether the data contains any values within a given range of x-values.

The code below packages all this into a function.

library(tidyverse)
library(gridExtra)

fill_density = function(x, cc=1, adj=1, drop_levs=FALSE) {

  # Calculate density values for input data
  dens = data.frame(density(x, n=2^10, adjust=adj)[c("x","y")]) %>% 
    mutate(section = cut(x, breaks=c(-Inf, -1, cc, Inf))) %>% 
    group_by(section) %>% 
    mutate(prob = paste0(round(sum(y)*mean(diff(x))*100),"%"))

  # Get probability mass for each level of section
  # We'll use these as the label values in scale_fill_manual
  sp = dens %>% 
    group_by(section, prob) %>% 
    summarise %>% 
    ungroup

  if(!drop_levs) {
   sp = sp %>% complete(section, fill=list(prob="0%"))
  }

  # Assign colors to each level of section
  col = setNames(c("red","blue","green"), levels(dens$section))

  ggplot(dens, aes(x, y, fill=section)) +
    geom_area() +
    scale_fill_manual(labels=sp$prob, values=col, drop=drop_levs) +
    labs(fill="")
}

Now let's run the function on several different data distributions:

set.seed(3)
dat2 = rnorm(1000)
grid.arrange(fill_density(mydata), fill_density(mydata[mydata>0]),
             fill_density(mydata[mydata>2], drop_levs=TRUE), 
              fill_density(mydata[mydata>2], drop_levs=FALSE),
             fill_density(mydata[mydata < -5 | mydata > 5], adj=0.3), fill_density(dat2),
             ncol=2)

fixed fill for different sections of a density plot with ggplot

Answers (1)

Related Questions