Yellow_truffle
Yellow_truffle

Reputation: 923

How to reorder words based on their assigned probability in ggplot

I have a dataframe shown below (please note that this is only first 6 data of my dataframe. The dataframe has 161 rows):

> head(a)
# A tibble: 6 x 3
  word     name   value
  <chr>    <chr>  <dbl>
1 amen     t_1   0.0168
2 pool     t_1   0.0153
3 gym      t_1   0.0150
4 yaletown t_1   0.0139
5 fals     t_1   0.0112
6 creek    t_1   0.0112
---  --- ----- --------

I want to plot this data for each name group and here is my code:

ggplot(a, aes(word, value, fill = factor(name))) +
  geom_col(show.legend = FALSE) +
  facet_wrap(~ name, scales = "free") +
  coord_flip() 

The result is shown below:

enter image description here

This graph is good except that I want each graph to be sorted from high to low. for example for t_1, I want the word amen to be on top. How can I do this?

DATA

structure(list(word = c("amen", "pool", "gym", "yaletown", "fals", 
"creek", "concierg", "fals_creek", "seawal", "tub", "hot", "sauna", 
"hot_tub", "amen_includ", "citi", "indoor", "downtown", "offer", 
"centr", "indoor_pool", "pet", "rental", "updat", "laundri", 
"insuit", "quiet", "renov", "bright", "storag_locker", "face", 
"corner", "fireplac", "patio", "hous", "strata", "roof", "south", 
"paint", "pet_rental", "insuit_laundri", "mountain", "harbour", 
"stanlei", "stanlei_park", "english", "bai", "english_bai", "coal", 
"citi", "north", "coal_harbour", "concierg", "water", "vancouv", 
"shore", "north_shore", "luxuri", "offer", "enjoi", "shore_mountain", 
"master", "privat", "level", "bath", "patio", "ensuit", "main", 
"closet", "dine", "master_bedroom", "fireplac", "offer", "townhom", 
"garag", "size", "deck", "garden", "entertain", "famili", "hous", 
"school", "ubc", "secondari", "elementari", "centr", "commun", 
"close", "conveni", "line", "villag", "catchment", "polygon", 
"distanc", "canada", "pacif", "walk_distanc", "face", "canada_line", 
"oakridg", "vancouv", "bright", "centr", "river", "fit", "amen", 
"wall", "district", "central", "river_district", "vancouv", "meet", 
"commun", "amen_includ", "daili", "garden", "gold", "leed", "rang", 
"ga_rang", "heat", "pool", "custom", "design", "counter", "countertop", 
"quartz", "modern", "light", "tile", "cabinet", "top", "hardwood", 
"concret", "finish", "heat", "loft", "stone", "hardwood_floor", 
"stainless", "cabinetri", "wall", "stainless", "steel", "stainless_steel", 
"steel_applianc", "granit", "downtown", "lamin", "insuit", "skytrain", 
"countertop", "counter", "laundri", "lamin_floor", "close", "face", 
"bright", "central", "buyer", "transit", "storag_locker"), name = c("t_1", 
"t_1", "t_1", "t_1", "t_1", "t_1", "t_1", "t_1", "t_1", "t_1", 
"t_1", "t_1", "t_1", "t_1", "t_1", "t_1", "t_1", "t_1", "t_1", 
"t_1", "t_2", "t_2", "t_2", "t_2", "t_2", "t_2", "t_2", "t_2", 
"t_2", "t_2", "t_2", "t_2", "t_2", "t_2", "t_2", "t_2", "t_2", 
"t_2", "t_2", "t_2", "t_3", "t_3", "t_3", "t_3", "t_3", "t_3", 
"t_3", "t_3", "t_3", "t_3", "t_3", "t_3", "t_3", "t_3", "t_3", 
"t_3", "t_3", "t_3", "t_3", "t_3", "t_4", "t_4", "t_4", "t_4", 
"t_4", "t_4", "t_4", "t_4", "t_4", "t_4", "t_4", "t_4", "t_4", 
"t_4", "t_4", "t_4", "t_4", "t_4", "t_4", "t_4", "t_5", "t_5", 
"t_5", "t_5", "t_5", "t_5", "t_5", "t_5", "t_5", "t_5", "t_5", 
"t_5", "t_5", "t_5", "t_5", "t_5", "t_5", "t_5", "t_5", "t_5", 
"t_5", "t_6", "t_6", "t_6", "t_6", "t_6", "t_6", "t_6", "t_6", 
"t_6", "t_6", "t_6", "t_6", "t_6", "t_6", "t_6", "t_6", "t_6", 
"t_6", "t_6", "t_6", "t_7", "t_7", "t_7", "t_7", "t_7", "t_7", 
"t_7", "t_7", "t_7", "t_7", "t_7", "t_7", "t_7", "t_7", "t_7", 
"t_7", "t_7", "t_7", "t_7", "t_7", "t_8", "t_8", "t_8", "t_8", 
"t_8", "t_8", "t_8", "t_8", "t_8", "t_8", "t_8", "t_8", "t_8", 
"t_8", "t_8", "t_8", "t_8", "t_8", "t_8", "t_8"), value = c(0.0168220200995057, 
0.0153161615392215, 0.0149904123428906, 0.0138688207986376, 0.0112433212880903, 
0.0112374695061203, 0.01112823624268, 0.0110833725809099, 0.0101256309318173, 
0.00999689172847693, 0.00971405559992615, 0.00946633016319547, 
0.00937855343364523, 0.00916203750075464, 0.00908986552312444, 
0.00811651912211177, 0.00672769620122796, 0.00660870996783763, 
0.0057953122740054, 0.00572899207834522, 0.0138658083101884, 
0.0137837795637179, 0.010485360495114, 0.0091757787532163, 0.00819431199720066, 
0.00798132507654038, 0.00756830314255726, 0.00734955981863588, 
0.0068415572308448, 0.00680557971046299, 0.00665591322567468, 
0.00659547099143325, 0.0065925927898027, 0.00653215055556127, 
0.00651632044659328, 0.00646019551479766, 0.00623857398924574, 
0.00599680505228001, 0.00596082753189821, 0.00569171567944231, 
0.0186241974972874, 0.0147573323720616, 0.0135304193706392, 0.0131429731596637, 
0.0128808771934156, 0.0128618847320933, 0.0125200204282914, 0.0124706400288533, 
0.0124288566139442, 0.0122617229543077, 0.0119084631737125, 0.0107879079556951, 
0.0103320888839593, 0.0098914637812813, 0.00987247131995897, 
0.00924951858858663, 0.00903680302177657, 0.00830369401473473, 
0.007889658357908, 0.00771492771374258, 0.0189024481694971, 0.0180269566047608, 
0.0160150330924837, 0.0141518699853569, 0.013476351424265, 0.0128983806698291, 
0.0127203559226821, 0.0107181871910707, 0.0100816877526412, 0.00948176874170768, 
0.00909645490541704, 0.00737717481310755, 0.00708697008830637, 
0.00698454489131772, 0.0067918879731724, 0.00671628842301411, 
0.00664312756802222, 0.00659435366469429, 0.00656265062753114, 
0.00626269112206438, 0.0244598442695631, 0.0219542590218986, 
0.014506306535134, 0.0140648189743327, 0.0114732296563822, 0.0111922830267814, 
0.00977034906472007, 0.00792412835591466, 0.00774065300597126, 
0.00768331695911395, 0.00750557521385629, 0.0069723499780833, 
0.0067888746281399, 0.00666273532505382, 0.00659966567351077, 
0.00654232962665346, 0.00650219439385335, 0.00632445264859568, 
0.00621551415956679, 0.0061811125314524, 0.0061811125314524, 
0.0318859345137425, 0.0240809552302102, 0.0145728663053292, 0.0145503085617352, 
0.0133209115358609, 0.0118433793304523, 0.0113809455867748, 0.0106590977917661, 
0.010591424560984, 0.0102756161506677, 0.0101289908173065, 0.0100049232275394, 
0.00893343040682327, 0.00888831491963522, 0.00884319943244717, 
0.00866273748369498, 0.00809879389384439, 0.00778298548352805, 
0.00778298548352805, 0.00778298548352805, 0.0105073596303114, 
0.0104255592614531, 0.01040715417846, 0.00863004116501312, 0.00785907268852352, 
0.00765252675715628, 0.00763616668338462, 0.0074480258350105, 
0.00686928822533793, 0.00684270310545898, 0.00677521780115087, 
0.00662797713720591, 0.00607991466585521, 0.00596743915867503, 
0.00596334914023211, 0.00567704784922801, 0.00560342751725553, 
0.00560138250803408, 0.00558706744348387, 0.00558706744348387, 
0.0211985535498137, 0.0193575351827745, 0.0189303064930188, 0.0173450631968202, 
0.0141520908839095, 0.00979829324949158, 0.00938792885012101, 
0.00923896095171936, 0.00923052880652682, 0.00842666429817077, 
0.00802754275905693, 0.00795165345232402, 0.00788981772091202, 
0.00777457840328056, 0.00768182480616255, 0.00744291402570708, 
0.00740356401480853, 0.00717870680967398, 0.00694822817441105, 
0.00671774953914813)), row.names = c(NA, -161L), class = c("tbl_df", 
"tbl", "data.frame"))

Upvotes: 2

Views: 99

Answers (3)

LMc
LMc

Reputation: 18712

The package tidytext has a function reorder_within specifically for this type of problem. The default aggregation is to take the mean of value within name, but since there is only one value per name, word pair this returns the same as the max function.

library(tidytext)

ggplot(a, aes(reorder_within(word, value, name), value, fill = factor(name))) +
  geom_col(show.legend = FALSE) +
  facet_wrap(~ name, scales = "free") +
  coord_flip() +
  scale_x_reordered()

Note the use of scale_x_reordered() from the same package.

Upvotes: 4

Waldi
Waldi

Reputation: 41260

You could use fct_reorder in forcatspackage.
To handle the order in each facet, the factor is composed by paste(name,word) :

ggplot(a, aes(fct_reorder(.f=paste(name,word),.x=value), value, fill = factor(name))) +
  xlab(label =  "word")+
  geom_col(show.legend = FALSE) +
  facet_wrap(~ name, scales = "free") +
  coord_flip() 

enter image description here

Upvotes: 2

jdobres
jdobres

Reputation: 11957

This trick using ordered factors will get you most of the way there:

df <- a %>% 
  arrange(name, value) %>% 
  mutate(word = factor(word, unique(word), ordered = T))

ggplot(df, aes(word, value, fill = factor(name))) +
  geom_col(show.legend = FALSE) +
  facet_wrap(~ name, scales = "free") +
  coord_flip() 

enter image description here

However, because some words appear under multiple names, for some of them the ordering is off. Another solution would be to give each row a unique name, then edit the labeling in scale_x_discrete with gsub.

df <- a %>% 
  mutate(combo_name = paste(name, word)) %>% 
  arrange(name, value) %>% 
  mutate(combo_name = factor(combo_name, combo_name, ordered = T))

ggplot(df, aes(combo_name, value, fill = factor(name))) +
  geom_col(show.legend = FALSE) +
  facet_wrap(~ name, scales = "free") +
  scale_x_discrete(labels = function(x) gsub('.* ', '', x)) +
  coord_flip() 

enter image description here

Upvotes: 3

Related Questions