Björn
Björn

Reputation: 1832

ggplot2 Heatmap 2 Different Color Schemes - Confusion Matrix: Matches in Different Color Scheme than Missclassifications

I adapted a heatmap plot for a confusion matrix from this answer.
However I would like to twist it. In the diagonal (from top left to bottom right) are the matches (correct classifications). My aim would be, to plot this diagonal in a yellow color palette. And mismatches (so all tiles except those in the diagonal) in a red color palette.

In my plot.cm function I can get the diagonal with

  cm_d$diag <- cm_d$Prediction == cm_d$Reference # Get the Diagonal
  cm_d$ndiag <- cm_d$Prediction != cm_d$Reference # Not the Diagonal

And with the correct geom_tile aesthetics I can get only the diagonal (in the desired yellow-ish) color scheme

geom_tile( data = cm_d[!is.na(cm_d$diag), ],aes(color = Freq)) +
scale_fill_gradient(guide = FALSE,low=alpha("lightyellow",0.75), high="yellow",na.value = 'white') 

enter image description here

However I am not able to get the second color scheme on the elements of cm_d$ndiag I found the package ggnewscale that offers new_scale() as well as new_scale_fill().
I tired to implement it with the help of this blog. However the result are only darkgray filled tiles for the rest of the heatmap enter image description here

# adapted from https://stackoverflow.com/a/60150826/7318488
library(ggplot2)     # to plot
library(gridExtra)   # to put more
library(grid)        # plot together
library(likert)      # for reversing the factor order
library(ggnewscale)

plot.cm <- function(cm){
  # extract the confusion matrix values as data.frame
  cm_d <- as.data.frame(cm$table)
  cm_d$diag <- cm_d$Prediction == cm_d$Reference # Get the Diagonal
  cm_d$ndiag <- cm_d$Prediction != cm_d$Reference # Not the Diagonal     
  cm_d[cm_d == 0] <- NA # Replace 0 with NA for white tiles
  cm_d$Reference <-  reverse.levels(cm_d$Reference) # diagonal starts at top left

  # plotting the matrix
  cm_d_p <-  ggplot(data = cm_d, aes(x = Prediction , y =  Reference, fill = Freq))+
    scale_x_discrete(position = "top") +
    geom_tile( data = cm_d[!is.na(cm_d$diag), ],aes(color = Freq)) +
    scale_fill_gradient(guide = FALSE,low=alpha("lightyellow",0.75), high="yellow",na.value = 'white') +
    # THIS DOESNT WORK
    # new_scale("fill") +
    # geom_tile( data = cm_d[!is.na(cm_d$ndiag), ],aes(color = Freq)) +
    # scale_fill_gradient(guide = FALSE,low=alpha("red",0.75), high="darkred",na.value = 'white') +

    geom_text(aes(label = Freq), color = 'black', size = 6) +
    theme_light() +
    theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
      legend.position = "none",
      panel.border = element_blank(),
      plot.background = element_blank(),
      axis.line = element_blank())

  return(cm_d_p)
}

Sample Data:
Simulated Caret Confusion Matrix

library(caret)
# simulated data
set.seed(23)
pred <- factor(sample(1:7,100,replace=T))
ref<- factor(sample(1:7,100,replace=T))
cm <- caret::confusionMatrix(pred,ref)
g <- plot.cm(cm)
g

Upvotes: 2

Views: 643

Answers (1)

DanO
DanO

Reputation: 660

I believe the issue is simply that you're specifying aes(color = Freq) instead of aes(fill = Freq. Is plot what you were aiming for? You could also simplify all of this by just using a divergent color scale and creating a new variable that marks Freq as negative if it's off the diagonal? See second example below

# adapted from https://stackoverflow.com/a/60150826/7318488
library(ggplot2)     # to plot
library(gridExtra)   # to put more
library(grid)        # plot together
library(likert)      # for reversing the factor order
#> Loading required package: xtable
library(ggnewscale)

plot.cm <- function(cm){
  # extract the confusion matrix values as data.frame
  cm_d <- as.data.frame(cm$table)
  cm_d$diag <- cm_d$Prediction == cm_d$Reference # Get the Diagonal
  cm_d$ndiag <- cm_d$Prediction != cm_d$Reference # Not the Diagonal     
  cm_d[cm_d == 0] <- NA # Replace 0 with NA for white tiles
  cm_d$Reference <-  reverse.levels(cm_d$Reference) # diagonal starts at top left

  # plotting the matrix
  cm_d_p <-  ggplot(data = cm_d, aes(x = Prediction , y =  Reference, fill = Freq))+
    scale_x_discrete(position = "top") +
    geom_tile( data = cm_d[!is.na(cm_d$diag), ],aes(fill = Freq)) +
    scale_fill_gradient(guide = FALSE,low=alpha("lightyellow",0.75), high="yellow",na.value = 'white') +
    # THIS DOESNT WORK
    new_scale("fill") +
    geom_tile( data = cm_d[!is.na(cm_d$ndiag), ],aes(fill = Freq)) +
    scale_fill_gradient(guide = FALSE,low=alpha("red",0.75), high="red",na.value = 'white') +

    geom_text(aes(label = Freq), color = 'black', size = 6) +
    theme_light() +
    theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
          legend.position = "none",
          panel.border = element_blank(),
          plot.background = element_blank(),
          axis.line = element_blank())

  return(cm_d_p)
}

library(caret)
#> Loading required package: lattice
# simulated data
set.seed(23)
pred <- factor(sample(1:7,100,replace=T))
ref<- factor(sample(1:7,100,replace=T))
cm <- caret::confusionMatrix(pred,ref)
g <- plot.cm(cm)
g
#> Warning: Removed 8 rows containing missing values (geom_text).

Created on 2020-04-29 by the reprex package (v0.3.0)

# adapted from https://stackoverflow.com/a/60150826/7318488
library(ggplot2)     # to plot
library(gridExtra)   # to put more
library(grid)        # plot together
library(likert)      # for reversing the factor order
#> Loading required package: xtable
library(ggnewscale)

plot.cm <- function(cm){
  # extract the confusion matrix values as data.frame
  cm_d <- as.data.frame(cm$table)
  cm_d$diag <- cm_d$Prediction == cm_d$Reference # Get the Diagonal
  cm_d$ndiag <- cm_d$Prediction != cm_d$Reference # Not the Diagonal     
  cm_d[cm_d == 0] <- NA # Replace 0 with NA for white tiles
  cm_d$Reference <-  reverse.levels(cm_d$Reference) # diagonal starts at top left

  cm_d$ref_freq <- cm_d$Freq * ifelse(is.na(cm_d$diag),-1,1)

  # plotting the matrix
  cm_d_p <-  ggplot(data = cm_d, aes(x = Prediction , y =  Reference, fill = Freq))+
    scale_x_discrete(position = "top") +
    geom_tile( data = cm_d,aes(fill = ref_freq)) +
    scale_fill_gradient2(guide = FALSE,low="red",high="yellow", midpoint = 0,na.value = 'white') +
    geom_text(aes(label = Freq), color = 'black', size = 6)+
     theme_light() +
    theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
          legend.position = "none",
          panel.border = element_blank(),
          plot.background = element_blank(),
          axis.line = element_blank())

  return(cm_d_p)
}

library(caret)
#> Loading required package: lattice
# simulated data
set.seed(23)
pred <- factor(sample(1:7,100,replace=T))
ref<- factor(sample(1:7,100,replace=T))
cm <- caret::confusionMatrix(pred,ref)
g <- plot.cm(cm)
g
#> Warning: Removed 8 rows containing missing values (geom_text).

Created on 2020-04-29 by the reprex package (v0.3.0)

Upvotes: 4

Related Questions