Komal Rathi
Komal Rathi

Reputation: 4274

How to use color, shape and legend label from different color

Here is my data frame:

> dput(x)
structure(list(cell_type = c("Macrophage_M2", "Monocyte", "Mast_cell", 
"T_cell_CD4+_effector_memory", "T_cell_CD4+_memory", "T_cell_CD8+_naive", 
"B_cell_plasma", "T_cell_CD4+_(non-regulatory)", "T_cell_CD4+_Th2", 
"Endothelial_cell", "T_cell_NK", "T_cell_regulatory_(Tregs)", 
"Cancer_associated_fibroblast", "T_cell_CD4+_central_memory", 
"Hematopoietic_stem_cell", "T_cell_regulatory_(Tregs)", "Hematopoietic_stem_cell", 
"Common_myeloid_progenitor", "Endothelial_cell", "Macrophage_M2"
), cluster = c("1", "1", "1", "1", "1", "2", "2", "2", "2", "2", 
"3", "3", "3", "3", "3", "4", "4", "4", "4", "4"), vt_score = c(0.620725205988837, 
0.565049819839983, 0.555015252291001, 0.529856563905404, 0.467454576797864, 
0.434671831438467, 0.367378221155799, 0.357576774722313, 0.305798026454146, 
0.222533326094146, 0.457134190027398, 0.447470659870985, 0.447033729409117, 
0.374612139728217, 0.347672664499527, 0.319135092044291, 0.172082683316721, 
0.116551044539405, 0.0763883445051204, 0.0268403599993589), id_col = c(1L, 
2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 
3L, 4L, 5L), colors = c("#0D0887FF", "#7E03A8FF", "#CC4678FF", 
"#F89441FF", "#F0F921FF", "#0D0887FF", "#7E03A8FF", "#CC4678FF", 
"#F89441FF", "#F0F921FF", "#0D0887FF", "#7E03A8FF", "#CC4678FF", 
"#F89441FF", "#F0F921FF", "#0D0887FF", "#7E03A8FF", "#CC4678FF", 
"#F89441FF", "#F0F921FF"), shapes = c(16, 16, 16, 16, 16, 17, 
17, 17, 17, 17, 18, 18, 18, 18, 18, 19, 19, 19, 19, 19)), class = "data.frame", row.names = c(NA, 
-20L))

First 10 lines for clarity:

> head(x, n =10)
                      cell_type cluster  vt_score id_col    colors shapes
1                 Macrophage_M2       1 0.6207252      1 #0D0887FF     16
2                      Monocyte       1 0.5650498      2 #7E03A8FF     16
3                     Mast_cell       1 0.5550153      3 #CC4678FF     16
4   T_cell_CD4+_effector_memory       1 0.5298566      4 #F89441FF     16
5            T_cell_CD4+_memory       1 0.4674546      5 #F0F921FF     16
6             T_cell_CD8+_naive       2 0.4346718      1 #0D0887FF     17
7                 B_cell_plasma       2 0.3673782      2 #7E03A8FF     17
8  T_cell_CD4+_(non-regulatory)       2 0.3575768      3 #CC4678FF     17
9               T_cell_CD4+_Th2       2 0.3057980      4 #F89441FF     17
10             Endothelial_cell       2 0.2225333      5 #F0F921FF     17

So here, there are 4 clusters and there is a shape (i.e. shapes column) associated with each cluster. For each cluster, there are 5 cell_types values and a color associated with each cell_type value (i.e. colors column). I want to color and shape using the colors and shapes columns but want to show the actual cell_type as labels in the legend. I have tried the below but it doesn't seem to work:

ggplot(data = x, aes(x = cluster, y = vt_score, 
                     color = cell_type, shape = cell_type)) +
  geom_point(size = 5, alpha =  0.8) +
  scale_color_manual(labels = x$cell_type, 
                     values = x$colors) +
  scale_shape_manual(labels = x$cell_type, 
                     values = x$shapes)

What I get is this which is incorrect (colors, shapes and cell_types don't seem to match)

enter image description here

Update:

I used limits based on @aosmith's suggestion below and it worked:


# add cell_type and cluster combination for duplicates
x$cell_type_lab <- paste0(x$cell_type,'_',x$cluster)

# use limits, labels (for legend) and values (colors/shapes) 
ggplot(data = x, aes(x = cluster, y = vt_score, 
                     color = cell_type_lab, shape = cell_type_lab)) +
  theme_bw() + theme_Publication2() +
  geom_point(size = 10, alpha =  0.7, position=position_dodge(width=0.05)) +
  scale_color_manual(limits = x$cell_type_lab,
                     labels = x$cell_type,
                     values = x$colors) +
  scale_shape_manual(limits = x$cell_type_lab, 
                     labels = x$cell_type,
                     values = x$shapes)

enter image description here

Upvotes: 0

Views: 67

Answers (1)

aosmith
aosmith

Reputation: 36076

To get the colors and legend key in the correct order, use the cell_type vector from the dataset as the limits along with passing the colors from the dataset to values. Then if you need to change the labels as well as the legend order you can pass a vector of labels to labels (in the same order as the other two vectors).

ggplot(data = x, aes(x = cluster, y = vt_score, 
                    color = cell_type)) +
     geom_point(size = 5, alpha =  0.8) +
     scale_color_manual(limits = x$cell_type,
                        values = x$colors) 

enter image description here

Upvotes: 1

Related Questions