Reputation: 923

How to reorder words based on their assigned probability in ggplot

I have a dataframe shown below (please note that this is only first 6 data of my dataframe. The dataframe has 161 rows):

> head(a)
# A tibble: 6 x 3
  word     name   value
  <chr>    <chr>  <dbl>
1 amen     t_1   0.0168
2 pool     t_1   0.0153
3 gym      t_1   0.0150
4 yaletown t_1   0.0139
5 fals     t_1   0.0112
6 creek    t_1   0.0112
---  --- ----- --------

I want to plot this data for each name group and here is my code:

ggplot(a, aes(word, value, fill = factor(name))) +
  geom_col(show.legend = FALSE) +
  facet_wrap(~ name, scales = "free") +
  coord_flip()

The result is shown below:

This graph is good except that I want each graph to be sorted from high to low. for example for t_1, I want the word amen to be on top. How can I do this?

DATA

structure(list(word = c("amen", "pool", "gym", "yaletown", "fals", 
"creek", "concierg", "fals_creek", "seawal", "tub", "hot", "sauna", 
"hot_tub", "amen_includ", "citi", "indoor", "downtown", "offer", 
"centr", "indoor_pool", "pet", "rental", "updat", "laundri", 
"insuit", "quiet", "renov", "bright", "storag_locker", "face", 
"corner", "fireplac", "patio", "hous", "strata", "roof", "south", 
"paint", "pet_rental", "insuit_laundri", "mountain", "harbour", 
"stanlei", "stanlei_park", "english", "bai", "english_bai", "coal", 
"citi", "north", "coal_harbour", "concierg", "water", "vancouv", 
"shore", "north_shore", "luxuri", "offer", "enjoi", "shore_mountain", 
"master", "privat", "level", "bath", "patio", "ensuit", "main", 
"closet", "dine", "master_bedroom", "fireplac", "offer", "townhom", 
"garag", "size", "deck", "garden", "entertain", "famili", "hous", 
"school", "ubc", "secondari", "elementari", "centr", "commun", 
"close", "conveni", "line", "villag", "catchment", "polygon", 
"distanc", "canada", "pacif", "walk_distanc", "face", "canada_line", 
"oakridg", "vancouv", "bright", "centr", "river", "fit", "amen", 
"wall", "district", "central", "river_district", "vancouv", "meet", 
"commun", "amen_includ", "daili", "garden", "gold", "leed", "rang", 
"ga_rang", "heat", "pool", "custom", "design", "counter", "countertop", 
"quartz", "modern", "light", "tile", "cabinet", "top", "hardwood", 
"concret", "finish", "heat", "loft", "stone", "hardwood_floor", 
"stainless", "cabinetri", "wall", "stainless", "steel", "stainless_steel", 
"steel_applianc", "granit", "downtown", "lamin", "insuit", "skytrain", 
"countertop", "counter", "laundri", "lamin_floor", "close", "face", 
"bright", "central", "buyer", "transit", "storag_locker"), name = c("t_1", 
"t_1", "t_1", "t_1", "t_1", "t_1", "t_1", "t_1", "t_1", "t_1", 
"t_1", "t_1", "t_1", "t_1", "t_1", "t_1", "t_1", "t_1", "t_1", 
"t_1", "t_2", "t_2", "t_2", "t_2", "t_2", "t_2", "t_2", "t_2", 
"t_2", "t_2", "t_2", "t_2", "t_2", "t_2", "t_2", "t_2", "t_2", 
"t_2", "t_2", "t_2", "t_3", "t_3", "t_3", "t_3", "t_3", "t_3", 
"t_3", "t_3", "t_3", "t_3", "t_3", "t_3", "t_3", "t_3", "t_3", 
"t_3", "t_3", "t_3", "t_3", "t_3", "t_4", "t_4", "t_4", "t_4", 
"t_4", "t_4", "t_4", "t_4", "t_4", "t_4", "t_4", "t_4", "t_4", 
"t_4", "t_4", "t_4", "t_4", "t_4", "t_4", "t_4", "t_5", "t_5", 
"t_5", "t_5", "t_5", "t_5", "t_5", "t_5", "t_5", "t_5", "t_5", 
"t_5", "t_5", "t_5", "t_5", "t_5", "t_5", "t_5", "t_5", "t_5", 
"t_5", "t_6", "t_6", "t_6", "t_6", "t_6", "t_6", "t_6", "t_6", 
"t_6", "t_6", "t_6", "t_6", "t_6", "t_6", "t_6", "t_6", "t_6", 
"t_6", "t_6", "t_6", "t_7", "t_7", "t_7", "t_7", "t_7", "t_7", 
"t_7", "t_7", "t_7", "t_7", "t_7", "t_7", "t_7", "t_7", "t_7", 
"t_7", "t_7", "t_7", "t_7", "t_7", "t_8", "t_8", "t_8", "t_8", 
"t_8", "t_8", "t_8", "t_8", "t_8", "t_8", "t_8", "t_8", "t_8", 
"t_8", "t_8", "t_8", "t_8", "t_8", "t_8", "t_8"), value = c(0.0168220200995057, 
0.0153161615392215, 0.0149904123428906, 0.0138688207986376, 0.0112433212880903, 
0.0112374695061203, 0.01112823624268, 0.0110833725809099, 0.0101256309318173, 
0.00999689172847693, 0.00971405559992615, 0.00946633016319547, 
0.00937855343364523, 0.00916203750075464, 0.00908986552312444, 
0.00811651912211177, 0.00672769620122796, 0.00660870996783763, 
0.0057953122740054, 0.00572899207834522, 0.0138658083101884, 
0.0137837795637179, 0.010485360495114, 0.0091757787532163, 0.00819431199720066, 
0.00798132507654038, 0.00756830314255726, 0.00734955981863588, 
0.0068415572308448, 0.00680557971046299, 0.00665591322567468, 
0.00659547099143325, 0.0065925927898027, 0.00653215055556127, 
0.00651632044659328, 0.00646019551479766, 0.00623857398924574, 
0.00599680505228001, 0.00596082753189821, 0.00569171567944231, 
0.0186241974972874, 0.0147573323720616, 0.0135304193706392, 0.0131429731596637, 
0.0128808771934156, 0.0128618847320933, 0.0125200204282914, 0.0124706400288533, 
0.0124288566139442, 0.0122617229543077, 0.0119084631737125, 0.0107879079556951, 
0.0103320888839593, 0.0098914637812813, 0.00987247131995897, 
0.00924951858858663, 0.00903680302177657, 0.00830369401473473, 
0.007889658357908, 0.00771492771374258, 0.0189024481694971, 0.0180269566047608, 
0.0160150330924837, 0.0141518699853569, 0.013476351424265, 0.0128983806698291, 
0.0127203559226821, 0.0107181871910707, 0.0100816877526412, 0.00948176874170768, 
0.00909645490541704, 0.00737717481310755, 0.00708697008830637, 
0.00698454489131772, 0.0067918879731724, 0.00671628842301411, 
0.00664312756802222, 0.00659435366469429, 0.00656265062753114, 
0.00626269112206438, 0.0244598442695631, 0.0219542590218986, 
0.014506306535134, 0.0140648189743327, 0.0114732296563822, 0.0111922830267814, 
0.00977034906472007, 0.00792412835591466, 0.00774065300597126, 
0.00768331695911395, 0.00750557521385629, 0.0069723499780833, 
0.0067888746281399, 0.00666273532505382, 0.00659966567351077, 
0.00654232962665346, 0.00650219439385335, 0.00632445264859568, 
0.00621551415956679, 0.0061811125314524, 0.0061811125314524, 
0.0318859345137425, 0.0240809552302102, 0.0145728663053292, 0.0145503085617352, 
0.0133209115358609, 0.0118433793304523, 0.0113809455867748, 0.0106590977917661, 
0.010591424560984, 0.0102756161506677, 0.0101289908173065, 0.0100049232275394, 
0.00893343040682327, 0.00888831491963522, 0.00884319943244717, 
0.00866273748369498, 0.00809879389384439, 0.00778298548352805, 
0.00778298548352805, 0.00778298548352805, 0.0105073596303114, 
0.0104255592614531, 0.01040715417846, 0.00863004116501312, 0.00785907268852352, 
0.00765252675715628, 0.00763616668338462, 0.0074480258350105, 
0.00686928822533793, 0.00684270310545898, 0.00677521780115087, 
0.00662797713720591, 0.00607991466585521, 0.00596743915867503, 
0.00596334914023211, 0.00567704784922801, 0.00560342751725553, 
0.00560138250803408, 0.00558706744348387, 0.00558706744348387, 
0.0211985535498137, 0.0193575351827745, 0.0189303064930188, 0.0173450631968202, 
0.0141520908839095, 0.00979829324949158, 0.00938792885012101, 
0.00923896095171936, 0.00923052880652682, 0.00842666429817077, 
0.00802754275905693, 0.00795165345232402, 0.00788981772091202, 
0.00777457840328056, 0.00768182480616255, 0.00744291402570708, 
0.00740356401480853, 0.00717870680967398, 0.00694822817441105, 
0.00671774953914813)), row.names = c(NA, -161L), class = c("tbl_df", 
"tbl", "data.frame"))

Upvotes: 2

Answers (3)

LMc

Reputation: 18712

The package tidytext has a function reorder_within specifically for this type of problem. The default aggregation is to take the mean of value within name, but since there is only one value per name, word pair this returns the same as the max function.

library(tidytext)

ggplot(a, aes(reorder_within(word, value, name), value, fill = factor(name))) +
  geom_col(show.legend = FALSE) +
  facet_wrap(~ name, scales = "free") +
  coord_flip() +
  scale_x_reordered()

Note the use of scale_x_reordered() from the same package.

Upvotes: 4

Waldi

Reputation: 41260

You could use fct_reorder in forcatspackage.
To handle the order in each facet, the factor is composed by paste(name,word) :

ggplot(a, aes(fct_reorder(.f=paste(name,word),.x=value), value, fill = factor(name))) +
  xlab(label =  "word")+
  geom_col(show.legend = FALSE) +
  facet_wrap(~ name, scales = "free") +
  coord_flip()

Upvotes: 2

jdobres

Reputation: 11957

This trick using ordered factors will get you most of the way there:

df <- a %>% 
  arrange(name, value) %>% 
  mutate(word = factor(word, unique(word), ordered = T))

ggplot(df, aes(word, value, fill = factor(name))) +
  geom_col(show.legend = FALSE) +
  facet_wrap(~ name, scales = "free") +
  coord_flip()

However, because some words appear under multiple names, for some of them the ordering is off. Another solution would be to give each row a unique name, then edit the labeling in scale_x_discrete with gsub.

df <- a %>% 
  mutate(combo_name = paste(name, word)) %>% 
  arrange(name, value) %>% 
  mutate(combo_name = factor(combo_name, combo_name, ordered = T))

ggplot(df, aes(combo_name, value, fill = factor(name))) +
  geom_col(show.legend = FALSE) +
  facet_wrap(~ name, scales = "free") +
  scale_x_discrete(labels = function(x) gsub('.* ', '', x)) +
  coord_flip()

Upvotes: 3

How to reorder words based on their assigned probability in ggplot

Answers (3)

Related Questions