Reputation: 4274
Here is my data frame:
> dput(x)
structure(list(cell_type = c("Macrophage_M2", "Monocyte", "Mast_cell",
"T_cell_CD4+_effector_memory", "T_cell_CD4+_memory", "T_cell_CD8+_naive",
"B_cell_plasma", "T_cell_CD4+_(non-regulatory)", "T_cell_CD4+_Th2",
"Endothelial_cell", "T_cell_NK", "T_cell_regulatory_(Tregs)",
"Cancer_associated_fibroblast", "T_cell_CD4+_central_memory",
"Hematopoietic_stem_cell", "T_cell_regulatory_(Tregs)", "Hematopoietic_stem_cell",
"Common_myeloid_progenitor", "Endothelial_cell", "Macrophage_M2"
), cluster = c("1", "1", "1", "1", "1", "2", "2", "2", "2", "2",
"3", "3", "3", "3", "3", "4", "4", "4", "4", "4"), vt_score = c(0.620725205988837,
0.565049819839983, 0.555015252291001, 0.529856563905404, 0.467454576797864,
0.434671831438467, 0.367378221155799, 0.357576774722313, 0.305798026454146,
0.222533326094146, 0.457134190027398, 0.447470659870985, 0.447033729409117,
0.374612139728217, 0.347672664499527, 0.319135092044291, 0.172082683316721,
0.116551044539405, 0.0763883445051204, 0.0268403599993589), id_col = c(1L,
2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L,
3L, 4L, 5L), colors = c("#0D0887FF", "#7E03A8FF", "#CC4678FF",
"#F89441FF", "#F0F921FF", "#0D0887FF", "#7E03A8FF", "#CC4678FF",
"#F89441FF", "#F0F921FF", "#0D0887FF", "#7E03A8FF", "#CC4678FF",
"#F89441FF", "#F0F921FF", "#0D0887FF", "#7E03A8FF", "#CC4678FF",
"#F89441FF", "#F0F921FF"), shapes = c(16, 16, 16, 16, 16, 17,
17, 17, 17, 17, 18, 18, 18, 18, 18, 19, 19, 19, 19, 19)), class = "data.frame", row.names = c(NA,
-20L))
First 10 lines for clarity:
> head(x, n =10)
cell_type cluster vt_score id_col colors shapes
1 Macrophage_M2 1 0.6207252 1 #0D0887FF 16
2 Monocyte 1 0.5650498 2 #7E03A8FF 16
3 Mast_cell 1 0.5550153 3 #CC4678FF 16
4 T_cell_CD4+_effector_memory 1 0.5298566 4 #F89441FF 16
5 T_cell_CD4+_memory 1 0.4674546 5 #F0F921FF 16
6 T_cell_CD8+_naive 2 0.4346718 1 #0D0887FF 17
7 B_cell_plasma 2 0.3673782 2 #7E03A8FF 17
8 T_cell_CD4+_(non-regulatory) 2 0.3575768 3 #CC4678FF 17
9 T_cell_CD4+_Th2 2 0.3057980 4 #F89441FF 17
10 Endothelial_cell 2 0.2225333 5 #F0F921FF 17
So here, there are 4 clusters and there is a shape (i.e. shapes column) associated with each cluster. For each cluster, there are 5 cell_types
values and a color associated with each cell_type value (i.e. colors column). I want to color and shape using the colors
and shapes
columns but want to show the actual cell_type
as labels in the legend. I have tried the below but it doesn't seem to work:
ggplot(data = x, aes(x = cluster, y = vt_score,
color = cell_type, shape = cell_type)) +
geom_point(size = 5, alpha = 0.8) +
scale_color_manual(labels = x$cell_type,
values = x$colors) +
scale_shape_manual(labels = x$cell_type,
values = x$shapes)
What I get is this which is incorrect (colors, shapes and cell_types don't seem to match)
Update:
I used limits based on @aosmith's suggestion below and it worked:
# add cell_type and cluster combination for duplicates
x$cell_type_lab <- paste0(x$cell_type,'_',x$cluster)
# use limits, labels (for legend) and values (colors/shapes)
ggplot(data = x, aes(x = cluster, y = vt_score,
color = cell_type_lab, shape = cell_type_lab)) +
theme_bw() + theme_Publication2() +
geom_point(size = 10, alpha = 0.7, position=position_dodge(width=0.05)) +
scale_color_manual(limits = x$cell_type_lab,
labels = x$cell_type,
values = x$colors) +
scale_shape_manual(limits = x$cell_type_lab,
labels = x$cell_type,
values = x$shapes)
Upvotes: 0
Views: 67
Reputation: 36076
To get the colors and legend key in the correct order, use the cell_type
vector from the dataset as the limits
along with passing the colors from the dataset to values
. Then if you need to change the labels as well as the legend order you can pass a vector of labels to labels
(in the same order as the other two vectors).
ggplot(data = x, aes(x = cluster, y = vt_score,
color = cell_type)) +
geom_point(size = 5, alpha = 0.8) +
scale_color_manual(limits = x$cell_type,
values = x$colors)
Upvotes: 1