Reputation: 356
I am working with a set of biological data, an example of which is as follows...
library(tidyverse)
data<-data.frame(
order=c("Artiodactlya","Artiodactlya","Artiodactlya","Artiodactlya","Artiodactlya","Artiodactlya","Artiodactlya","Artiodactlya","Artiodactlya","Artiodactlya","Artiodactlya","Artiodactlya","Artiodactlya","Artiodactlya","Artiodactlya","Artiodactlya","Artiodactlya","Artiodactlya","Artiodactlya","Artiodactlya","Artiodactlya","Artiodactlya","Artiodactlya","Artiodactlya","Artiodactlya","Artiodactlya","Artiodactlya","Artiodactlya","Artiodactlya","Artiodactlya","Artiodactlya","Artiodactlya","Artiodactlya","Artiodactlya","Artiodactlya","Artiodactlya","Artiodactlya","Artiodactlya","Artiodactlya","Artiodactlya","Artiodactlya","Artiodactlya","Artiodactlya","Perissodactyla","Artiodactlya","Artiodactlya","Artiodactlya","Artiodactlya","Artiodactlya","Artiodactlya","Artiodactlya","Artiodactlya","Perissodactyla","Perissodactyla","Artiodactlya","Artiodactlya","Artiodactlya","Proboscidea","Perissodactyla","Perissodactyla","Perissodactyla","Perissodactyla","Perissodactyla","Artiodactlya","Artiodactlya","Artiodactlya","Artiodactlya","Artiodactlya","Artiodactlya","Artiodactlya","Artiodactlya","Artiodactlya","Artiodactlya","Artiodactlya","Artiodactlya","Artiodactlya","Artiodactlya","Artiodactlya","Artiodactlya","Artiodactlya","Artiodactlya","Artiodactlya","Artiodactlya","Artiodactlya","Artiodactlya","Artiodactlya","Proboscidea","Artiodactlya","Artiodactlya","Artiodactlya","Artiodactlya","Artiodactlya","Artiodactlya","Artiodactlya","Artiodactlya","Artiodactlya","Artiodactlya","Artiodactlya","Artiodactlya","Artiodactlya","Artiodactlya","Artiodactlya","Artiodactlya","Artiodactlya","Artiodactlya","Artiodactlya","Artiodactlya","Artiodactlya","Artiodactlya","Artiodactlya","Artiodactlya","Artiodactlya","Artiodactlya","Artiodactlya","Artiodactlya","Artiodactlya","Artiodactlya","Artiodactlya","Artiodactlya","Artiodactlya","Artiodactlya","Artiodactlya","Artiodactlya","Artiodactlya","Artiodactlya","Artiodactlya","Artiodactlya","Artiodactlya","Artiodactlya","Artiodactlya","Artiodactlya","Artiodactlya","Artiodactlya","Artiodactlya","Artiodactlya","Artiodactlya","Artiodactlya","Artiodactlya","Artiodactlya","Artiodactlya","Artiodactlya","Artiodactlya","Artiodactlya","Artiodactlya","Artiodactlya","Artiodactlya","Artiodactlya","Artiodactlya","Artiodactlya","Artiodactlya","Perissodactyla","Perissodactyla","Artiodactlya","Artiodactlya","Artiodactlya","Artiodactlya","Artiodactlya","Artiodactlya","Artiodactlya","Artiodactlya","Perissodactyla","Perissodactyla","Perissodactyla","Perissodactyla","Artiodactlya","Artiodactlya","Artiodactlya","Artiodactlya","Artiodactlya","Artiodactlya","Artiodactlya","Artiodactlya","Artiodactlya","Artiodactlya","Artiodactlya","Artiodactlya","Artiodactlya","Artiodactlya","Artiodactlya","Artiodactlya"),
family=c("Bovidae","Bovidae","Bovidae","Cervidae","Bovidae","Bovidae","Bovidae","Antilocapridae","Bovidae","Cervidae","Cervidae","Suidae","Bovidae","Cervidae","Bovidae","Bovidae","Bovidae","Bovidae","Bovidae","Bovidae","Bovidae","Bovidae","Camelidae","Camelidae","Bovidae","Bovidae","Bovidae","Bovidae","Bovidae","Cervidae","Cervidae","Bovidae","Bovidae","Bovidae","Bovidae","Tayassuidae","Bovidae","Bovidae","Bovidae","Bovidae","Bovidae","Bovidae","Bovidae","Rhinocerotidae","Cervidae","Cervidae","Hippopotamidae","Bovidae","Bovidae","Cervidae","Bovidae","Bovidae","Rhinocerotidae","Rhinocerotidae","Bovidae","Cervidae","Cervidae","Elephantidae","Equidae","Equidae","Equidae","Equidae","Equidae","Bovidae","Bovidae","Bovidae","Bovidae","Bovidae","Bovidae","Bovidae","Giraffidae","Bovidae","Cervidae","Hippopotamidae","Bovidae","Bovidae","Tragulidae","Suidae","Bovidae","Bovidae","Bovidae","Bovidae","Bovidae","Camelidae","Camelidae","Bovidae","Elephantidae","Bovidae","Bovidae","Bovidae","Cervidae","Cervidae","Cervidae","Cervidae","Cervidae","Cervidae","Cervidae","Tragulidae","Moschidae","Moschidae","Cervidae","Cervidae","Cervidae","Cervidae","Cervidae","Bovidae","Bovidae","Bovidae","Bovidae","Bovidae","Bovidae","Bovidae","Bovidae","Cervidae","Cervidae","Giraffidae","Bovidae","Bovidae","Bovidae","Bovidae","Bovidae","Bovidae","Bovidae","Bovidae","Bovidae","Bovidae","Bovidae","Cervidae","Bovidae","Tayassuidae","Bovidae","Suidae","Suidae","Bovidae","Bovidae","Suidae","Suidae","Suidae","Bovidae","Bovidae","Bovidae","Bovidae","Cervidae","Cervidae","Cervidae","Bovidae","Bovidae","Bovidae","Bovidae","Bovidae","Rhinocerotidae","Rhinocerotidae","Cervidae","Bovidae","Suidae","Suidae","Suidae","Suidae","Bovidae","Bovidae","Tapiridae","Tapiridae","Tapiridae","Tapiridae","Tayassuidae","Bovidae","Bovidae","Bovidae","Bovidae","Bovidae","Bovidae","Bovidae","Bovidae","Bovidae","Tragulidae","Tragulidae","Tragulidae","Tragulidae","Camelidae","Camelidae"),
y=c(-0.001863125,-0.102166005,-0.082681787,-0.034819115,-0.203971372,-0.14608609,-0.124751937,-0.060228156,-0.26087436,-0.058897042,-0.021611439,0.055752278,0.122263006,-0.123637398,0.170119323,0.108469085,0.013105529,-0.182166486,0.157200336,0.215896371,-0.069659876,-0.035434215,0.019311512,-0.442534178,-0.135233526,-0.011888411,-0.029017369,-0.050831484,0.050087067,-0.201617248,-0.216700049,0.010216729,-0.098640743,0.021208928,-0.040445327,0.040405856,-0.17503882,0.035631826,-0.028174861,-0.073752856,-0.056991923,0.060354632,-0.086089782,0.383185174,-0.029138017,-0.185213812,0.314313318,0.033053219,0.056801385,-0.113646176,-0.03677128,-0.056918928,0.25985937,0.28245838,-0.236766645,-0.207875916,0.059422923,0.497636371,0.091523907,-0.148471043,-0.018225364,0.023796819,-0.110043566,-0.240739321,-0.12247444,-0.42435168,0.048939773,-0.240873039,-0.235269232,-0.186239865,-0.039528773,-0.101909681,-0.050572131,0.257243087,-0.160718975,0.036663509,-0.050196678,0.094949553,0.015759057,-0.07056069,-0.086559596,0.024333424,0.007231104,-0.274224886,-0.22015192,-0.211627074,0.426924042,-0.082525667,-0.131491001,-0.178848667,-0.003254554,-0.116941389,-0.09253145,-0.170562248,-0.0715427,-0.172290183,-0.071607145,-0.075225703,-0.234899977,-0.219243408,-0.125744609,-0.071939465,-0.143435232,-0.057530919,0.146632885,0.036838395,0.139369968,-0.03057342,-0.060313888,-0.143063257,-0.120948779,-0.106405272,-0.043558975,-0.143716061,-0.028329571,0.127768192,-0.043361589,-0.198177174,-0.07460359,-0.04461378,-0.050870971,-0.216857513,0.059916575,0.017476752,0.000504442,-0.154878275,-0.064869262,-0.173919684,-0.220541878,0.077789088,-0.289841358,0.107983011,0.136913671,-0.024349708,-0.143390158,0.055258952,0.172375097,0.162804834,-0.215683398,-0.225801926,0.043443631,-0.032301548,-0.117351289,-0.131936913,-0.046815394,-0.116160051,-0.069521163,0.054158881,-0.161774587,-0.14186065,0.323341205,0.295046188,0.034689389,-0.375590361,-0.032423553,0.107506313,0.088711276,0.12349364,-0.117709213,0.26788402,-0.053017818,-0.007870963,0.073981956,-0.077337294,0.077832946,-0.231063286,-0.01175521,-0.100368044,0.089356041,-0.068279906,0.122043925,-0.116726111,-0.063823308,-5.16E-05,-0.174039832,-0.160276752,-0.090239835,-0.022758897,-0.41815848,-0.192715601),
x=c(3.208441356,3.124588445,3.281248903,3.412877815,3.067591548,3.192802934,3.087885763,3.115415346,3.12499302,3.14482989,3.027751994,3.006893708,3.397121498,3.207634367,3.430877175,3.394930445,3.434355962,3.438225808,3.247873551,3.438257472,3.127752516,3.334252642,3.458311439,3.479717375,3.133538908,3.06679081,2.99343623,3.224377652,3.198973072,3.058220344,3.068204421,3.035769836,3.22549666,3.197225406,2.938519725,3.007831012,2.99343623,2.925827575,2.974636761,2.931966115,2.880813592,3.105510185,2.929418926,3.532822299,3.331312058,3.136088637,3.212498338,3.241795431,3.268845404,3.159717546,3.232585053,3.133538908,3.394858641,3.446340342,2.95658859,3.007534418,3.236903523,3.53140227,3.124504225,3.413048169,3.332332398,3.316079605,3.355930187,3.092204872,2.988413942,3.086074954,2.888224438,3.023663918,2.981750673,3.049835132,3.468697676,3.190877725,3.191730393,3.492377901,3.384484836,3.27277331,2.885587356,3.211947744,3.296575356,3.151965256,3.191868021,3.155336037,3.125887718,3.216211914,3.252002216,3.124226996,3.517468145,2.737272177,2.792509648,2.729124275,3.019393264,3.02596091,2.942775895,2.971275849,3.004751156,3.01911629,2.949390007,2.725225362,2.894133222,2.963787827,3.020293816,2.981356442,2.924623828,2.858837851,2.982271233,2.988049072,3.031024499,3.105368235,3.12742737,3.145196406,2.669316881,2.645606462,2.759037976,3.140388312,3.068585203,3.26020285,3.108498601,2.92800816,3.292497894,3.220003426,3.295699017,2.983153657,3.344496133,3.195470679,3.125604258,3.132049171,3.095595491,3.034828916,3.106417565,2.932465493,3.068185862,3.098369548,3.079586399,2.584331224,2.806204493,2.709108767,3.082323374,3.036807434,3.093561758,3.022881813,3.075106513,3.176091259,2.812913357,2.882026208,3.217396199,2.896007841,2.890234666,3.088042782,3.061461418,3.116590166,3.536558443,3.501671858,3.181266849,3.171897335,3.151632475,3.070037867,3.148385521,3.161368002,2.938007311,3.377682241,3.301073423,3.362618171,3.231405869,3.332299129,3.006551609,3.028083007,3.148669341,3.372912003,3.260071388,3.22816929,3.379686983,3.109281304,3.173037857,3.29666519,2.652971172,2.664936787,2.724531025,2.602059991,3.23192811,3.110408903))
I am trying to make a ggplot of my data and separate the points by shape and color for easier reading. In this case because the data are biological all of the categorical variables are nested, i.e. "Bovidae", "Camelidae", "Cervidae" are always "Artiodactyla", "Equidae" and "Rhinocerotidae" are always "Perissodactyla", etc.
However, this creates an issue when I try to plot the data in that I cannot easily get the different shapes and colors/fills to form a single legend. Normally if the shape and fills have the same level it is possible to make them form a single legend, but here even though shape is a nested subset of the values used to scale color I cannot easily combine them into a single legend, see below.
ggplot(data,aes(x,y))+
geom_point(aes(color=family,shape=order))+
labs(color="family",shape="family")
I know I can put a manual override for the legend using a command like the following:
scale_shape_manual(values=c(1,2,3),guide=FALSE)+
guides(shape=guide_legend(override.aes=list(shape=c(1,1,1,3,2...)))) ## i.e., code it whatever order of shapes desired
However, this requires me to manually type out the order of the various categories every time, which is very inefficient when I have a large number of categories as can be seen in the above graph and that in some of my graphs the order of the categories is not always the same (because data is missing for some groups or a certain subset is under consideration). It is also very easy to accidentally input an error if I do not enter the shapes in the precise order or make a lapsus.
Is there any way to automatically change the shapes on the legend when one variable is a nested subset of the other like this? That is, every level of "family" is technically already present in "order", merely duplicated, such that it is possible to write case_when or if_else statements for them like so...
mutate(order=case_when(family=="Elephantidae"~"Proboscidea",
family %in% c("Rhinocerotidae","Tapiridae","Equidae")~"Perissodactyla",
TRUE~"Artiodactyla"))
Can that be used to automatically combine shape and color into a single legend?
Upvotes: 1
Views: 259
Reputation: 37943
You can create some small lookup tables to match family to orders and orders to shapes. Then, you can use this to override the default shapes in the colour legend. I recommend you explicitly set the breaks of the colour scale and values of the shape scale, because it is easy for mistakes to slip in.
library(tidyverse)
# data <- data.frame(...) # omitted for brevity
# Make lookup tables
fam2order <- setNames(data$order, data$family)
order2shape <- c("Artiodactlya" = 16, "Perissodactyla" = 17, "Proboscidea" = 15)
# Make fam2order unique without dropping names
fam2order <- fam2order[!duplicated(data[, c("order", "family")])]
ggplot(data, aes(x, y)) +
geom_point(aes(colour = family, shape = order)) +
scale_colour_discrete(
breaks = names(fam2order),
guide = guide_legend(
override.aes = list(shape = order2shape[fam2order])
)
) +
scale_shape_manual(values = order2shape, guide = "none")
In addition, because you've effectively made a shape palette already, you can use that lookup table directly in combination with scale_shape_identity()
.
ggplot(data, aes(x, y)) +
geom_point(aes(colour = family, shape = order2shape[order])) +
scale_colour_discrete(
breaks = names(fam2order),
guide = guide_legend(
override.aes = list(shape = order2shape[fam2order])
)
) +
scale_shape_identity()
Yet another option is to use the same breaks for a colour and shape scale, but simply have repeated values for manual shapes. This dispenses with the whole override approach.
ggplot(data, aes(x, y)) +
geom_point(aes(colour = family, shape = family)) +
scale_colour_discrete(
breaks = names(fam2order)
) +
scale_shape_manual(
values = setNames(order2shape[fam2order], names(fam2order))
)
Upvotes: 4