Atakan
Atakan

Reputation: 446

Avoid reordering and grouping duplicate x-values ggplot (geom_point)

I'm trying to plot the following data frame with ggplot. This data frame has an ordered summary of top10 scores from 13 separate clusters (10x13 = 130 total observations)

                    Cell.Type            Score
1                  GN_Thio_PC 2677.80617009519
2                  GN_UrAc_PC 2637.41032364779
3                Mo_6C+II-_Bl 2556.92913594902
4                GN_Arth_SynF 2391.45433001888
5                Mo_6C+II+_Bl 2315.52547519278
6                       GN_Bl 2304.98201202492
7       MF_Thio5_II-480int_PC 2285.71825571867
8       MF_Thio5_II+480int_PC 2248.11270401521
9                    MF_RP_Sp 2224.65822294734
10                      GN_BM 2069.57828218951
11                  T_8Mem_Sp 3650.92933141558
12                 NK_b2m-_Sp 3623.07526981183
13               NK_DAP10-_Sp 3568.82957776803
14              T_8Nve_Sp_OT1 3535.57114525684
15  T_8Mem_Sp_OT1_d100_LisOva 3532.02217747173
16                 NK_49H+_Sp 3519.49779859704
17               NK_DAP12-_Sp  3500.5532642101
18   T_8Eff_Sp_OT1_d10_LisOva 3448.56816636704
19                  NKT_4+_Lv 3445.33162798595
20                  T_8Nve_LN 3442.41228249856
21                   DC_8+_Th 1384.56532273906
22                  DC_8+_SLN 1333.48389922898
23                   DC_8-_Th 1329.54597466529
24             DC_103+11b-_Lu  1293.2048614532
25                    B_GC_Sp 1291.13567921318
26                  DC_8+_MLN 1266.18747131352
27                DC_8+_Sp_ST 1251.44702553637
28                  SC_MEP_BM  1229.9373063931
29             DC_8-4-11b-_Sp 1204.49733058435
30             DC_103+11b-_Lv 1196.79647629317
31                  T_8Mem_Sp 4888.53608836612
32              T_8Nve_Sp_OT1 4844.62387372193
33                  T_8Nve_LN 4803.08591833927
34                 T_8Nve_MLN 4801.60498804064
35                  T_4Nve_LN 4704.34332374882
36                  T_8Nve_PP 4680.54098917638
37                  T_8Nve_Sp 4668.51868809073
38                  T_4Nve_PP  4654.6363768553
39                 T_4Nve_MLN 4644.53632493487
40                 T_4FP3-_Sp 4643.81716614074
41      MF_Thio5_II-480int_PC 2104.44279848771
42       MF_Thio5_II-480hi_PC 2051.91548567208
43                MF_PPAR-_Lu 1971.01987723135
44      MF_Thio5_II+480int_PC  1968.1818651747
45                      MF_Lu 1941.36173402858
46             MF_II-480hi_PC 1821.13874693704
47       MF_Thio5_II+480lo_PC 1787.05712946341
48                 GN_Thio_PC  1728.7523034795
49           MF_Microglia_CNS  1711.6599582643
50             MF_II+480lo_PC 1711.26332938833
51               Mo_6C+II-_Bl 2962.12603781126
52               Mo_6C-II-_Bl 2758.18375042302
53             Mo_6C-IIint_Bl 2638.16680094079
54                      GN_Bl 2620.07541962536
55                 GN_UrAc_PC  2537.2896087047
56               Mo_6C+II+_Bl 2435.36956544536
57                      GN_BM 2387.13935841906
58                 GN_Thio_PC 2337.29884997719
59                 GN_Arth_BM  2311.2451915426
60               GN_Arth_SynF   2128.725307006
61  T_8Eff_Sp_OT1_48hr_LisOva 3285.38489328741
62  T_8Eff_Sp_OT1_24hr_LisOva 3069.01874851731
63                  T_8Mem_Sp 3022.56013472619
64                   T_ISP_Th 2983.45678085374
65  T_8Eff_Sp_OT1_12hr_LisOva 2964.79056150505
66    T_8Eff_Sp_OT1_d5_VSVOva 2950.29516615634
67              T_8Nve_Sp_OT1  2893.7887214778
68    T_8Eff_Sp_OT1_d6_LisOva 2891.41381948125
69       Tgd_vg3+24alo_e17_Th 2875.40895460188
70                T_8SP24-_Th 2858.05344865649
71      MF_Thio5_II-480int_PC 2783.70950776927
72       MF_Thio5_II-480hi_PC 2737.88858084566
73      MF_Thio5_II+480int_PC 2708.73493567958
74             MF_II-480hi_PC 2377.99196863673
75                   MF_RP_Sp 2281.78751440853
76           MF_Microglia_CNS    2145.29897799
77                MF_PPAR-_Lu 2089.46703313723
78                      Fi_Sk 2077.00426240616
79             MF_II+480lo_PC 2070.33177217184
80       MF_Thio5_II+480lo_PC 2049.00134439134
81                 GN_Thio_PC 3158.65427762739
82                 GN_UrAc_PC 2993.45396316058
83               Mo_6C+II-_Bl 2807.36027869234
84               GN_Arth_SynF 2783.56931762011
85                      GN_Bl 2666.31559591767
86               Mo_6C+II+_Bl  2472.0977029947
87                      GN_BM 2422.62741443588
88               Mo_6C-II-_Bl 2309.43925461481
89             Mo_6C-IIint_Bl  2238.0777055497
90                 GN_Arth_BM 2215.70702972594
91               GN_Arth_SynF  3027.3451404511
92                 GN_Thio_PC 2939.74912223882
93                 GN_UrAc_PC 2694.04395500259
94       MF_Thio5_II-480hi_PC 2507.39045396954
95                      GN_Bl 2407.18406139123
96      MF_Thio5_II-480int_PC 2380.65584862485
97                      Fi_Sk 2211.83581518875
98               Fi_MTS15+_Th 2209.41411415371
99           MF_Microglia_CNS 2149.92996548155
100                  MF_RP_Sp 2111.76895702472
101 T_8Eff_Sp_OT1_48hr_LisOva  4316.4227070348
102                  T_ISP_Th 4280.25335061696
103                 T_DPbl_Th 4102.01504953757
104              preT_DN3B_Th 3910.32665898991
105             preT_DN3-4_Th  3907.5798288054
106                  T_DN4_Th 3840.18882533614
107                 SC_MEP_BM 3780.57141700037
108   T_8Eff_Sp_OT1_d5_VSVOva 3757.01659494412
109      Tgd_vg3+24alo_e17_Th 3685.84648926922
110          Tgd_vg5+24ahi_Th 3616.99224871103
111 DC_IIhilang+103+11blo_SLN 4519.50952669406
112  DC_IIhilang+103-11b+_SLN 4415.97080261725
113 DC_IIhilang-103-11blo_SLN 4170.40873917108
114  DC_IIhilang-103-11b+_SLN 3963.46358118485
115           DC_8-4-11b-_MLN  3631.2974118135
116           DC_8-4-11b+_MLN 3386.67029828899
117           DC_8-4-11b+_SLN 3026.47679955977
118               Ep_MEChi_Th 2844.36034968535
119           DC_8-4-11b-_SLN 2835.94178377956
120                 DC_8+_MLN 2183.62863550565
121             DC_pDC_8+_SLN 3785.05815189249
122              DC_pDC_8+_Sp  3767.7193092587
123             DC_pDC_8+_MLN 3758.58340817543
124              DC_pDC_8-_Sp 3747.60526193027
125                   B_T1_Sp 2325.20093650829
126                   B_T2_Sp 2316.21996448563
127                   B_Fo_Sp 2253.78988549461
128                   B_T3_Sp  2225.2075463753
129                  B_Fo_MLN 2159.05742315915
130                   B_Fo_PC 2142.51258891406

to obtain a graph like this (which was generated by using the base graphing functions by using a rather ugly code):

enter image description here

The problem I'm running into is that ggplot groups and reorders the data instead of keeping the original order. How can I stop this behavior?

On another note, is there a better way of structuring this data frame for it to better work with ggplot? I'd like to visualize the top10 scores of all clusters visually separated but side-by-side (like in the image I attached above). My ultimate goal is making my code scalable so it can work with different numbers of starting clusters (i.e. 18 clusters as opposed to 13 clusters here) and top scores (ie. top5 as opposed to top10 here) amount just as well with minimal code re-writing.

Upvotes: 1

Views: 559

Answers (1)

Richard Telford
Richard Telford

Reputation: 9923

It is a little tricky because you have duplicate labels in the different groups, so the standard advice to convert the x-axis labels to a factor is not quite enough. Here I made a copy of cell.type before merging cell.type and group id, the graph is plotted with with this and then the labels switched. I used facets to show the labels

The graph needs to be quite large to work

library("tidyverse")
df2 <- df %>% mutate(id =  rep(1:13, each = 10),
              Cell.Type.label = Cell.Type,
              Cell.Type = paste(Cell.Type, id, sep = "_"),
              Cell.Type = factor(Cell.Type, levels = Cell.Type)) 
df2 %>%
  ggplot(aes(x = Cell.Type, y = Score, colour = as.factor(id))) +
  geom_point(show.legend = FALSE) + 
  facet_wrap(~id, nrow = 1, scales = "free_x") +
  scale_x_discrete(labels = df2$Cell.Type.label) +
  theme(panel.spacing = unit(x = 0, units = "pt"),
        axis.text.x = element_text(angle = 90, hjust = 1, size = 4))

data

df <- structure(list(Cell.Type = c("GN_Thio_PC", "GN_UrAc_PC", "Mo_6C+II-_Bl", 
"GN_Arth_SynF", "Mo_6C+II+_Bl", "GN_Bl", "MF_Thio5_II-480int_PC", 
"MF_Thio5_II+480int_PC", "MF_RP_Sp", "GN_BM", "T_8Mem_Sp", "NK_b2m-_Sp", 
"NK_DAP10-_Sp", "T_8Nve_Sp_OT1", "T_8Mem_Sp_OT1_d100_LisOva", 
"NK_49H+_Sp", "NK_DAP12-_Sp", "T_8Eff_Sp_OT1_d10_LisOva", "NKT_4+_Lv", 
"T_8Nve_LN", "DC_8+_Th", "DC_8+_SLN", "DC_8-_Th", "DC_103+11b-_Lu", 
"B_GC_Sp", "DC_8+_MLN", "DC_8+_Sp_ST", "SC_MEP_BM", "DC_8-4-11b-_Sp", 
"DC_103+11b-_Lv", "T_8Mem_Sp", "T_8Nve_Sp_OT1", "T_8Nve_LN", 
"T_8Nve_MLN", "T_4Nve_LN", "T_8Nve_PP", "T_8Nve_Sp", "T_4Nve_PP", 
"T_4Nve_MLN", "T_4FP3-_Sp", "MF_Thio5_II-480int_PC", "MF_Thio5_II-480hi_PC", 
"MF_PPAR-_Lu", "MF_Thio5_II+480int_PC", "MF_Lu", "MF_II-480hi_PC", 
"MF_Thio5_II+480lo_PC", "GN_Thio_PC", "MF_Microglia_CNS", "MF_II+480lo_PC", 
"Mo_6C+II-_Bl", "Mo_6C-II-_Bl", "Mo_6C-IIint_Bl", "GN_Bl", "GN_UrAc_PC", 
"Mo_6C+II+_Bl", "GN_BM", "GN_Thio_PC", "GN_Arth_BM", "GN_Arth_SynF", 
"T_8Eff_Sp_OT1_48hr_LisOva", "T_8Eff_Sp_OT1_24hr_LisOva", "T_8Mem_Sp", 
"T_ISP_Th", "T_8Eff_Sp_OT1_12hr_LisOva", "T_8Eff_Sp_OT1_d5_VSVOva", 
"T_8Nve_Sp_OT1", "T_8Eff_Sp_OT1_d6_LisOva", "Tgd_vg3+24alo_e17_Th", 
"T_8SP24-_Th", "MF_Thio5_II-480int_PC", "MF_Thio5_II-480hi_PC", 
"MF_Thio5_II+480int_PC", "MF_II-480hi_PC", "MF_RP_Sp", "MF_Microglia_CNS", 
"MF_PPAR-_Lu", "Fi_Sk", "MF_II+480lo_PC", "MF_Thio5_II+480lo_PC", 
"GN_Thio_PC", "GN_UrAc_PC", "Mo_6C+II-_Bl", "GN_Arth_SynF", "GN_Bl", 
"Mo_6C+II+_Bl", "GN_BM", "Mo_6C-II-_Bl", "Mo_6C-IIint_Bl", "GN_Arth_BM", 
"GN_Arth_SynF", "GN_Thio_PC", "GN_UrAc_PC", "MF_Thio5_II-480hi_PC", 
"GN_Bl", "MF_Thio5_II-480int_PC", "Fi_Sk", "Fi_MTS15+_Th", "MF_Microglia_CNS", 
"MF_RP_Sp", "T_8Eff_Sp_OT1_48hr_LisOva", "T_ISP_Th", "T_DPbl_Th", 
"preT_DN3B_Th", "preT_DN3-4_Th", "T_DN4_Th", "SC_MEP_BM", "T_8Eff_Sp_OT1_d5_VSVOva", 
"Tgd_vg3+24alo_e17_Th", "Tgd_vg5+24ahi_Th", "DC_IIhilang+103+11blo_SLN", 
"DC_IIhilang+103-11b+_SLN", "DC_IIhilang-103-11blo_SLN", "DC_IIhilang-103-11b+_SLN", 
"DC_8-4-11b-_MLN", "DC_8-4-11b+_MLN", "DC_8-4-11b+_SLN", "Ep_MEChi_Th", 
"DC_8-4-11b-_SLN", "DC_8+_MLN", "DC_pDC_8+_SLN", "DC_pDC_8+_Sp", 
"DC_pDC_8+_MLN", "DC_pDC_8-_Sp", "B_T1_Sp", "B_T2_Sp", "B_Fo_Sp", 
"B_T3_Sp", "B_Fo_MLN", "B_Fo_PC"), Score = c(2677.80617009519, 
2637.41032364779, 2556.92913594902, 2391.45433001888, 2315.52547519278, 
2304.98201202492, 2285.71825571867, 2248.11270401521, 2224.65822294734, 
2069.57828218951, 3650.92933141558, 3623.07526981183, 3568.82957776803, 
3535.57114525684, 3532.02217747173, 3519.49779859704, 3500.5532642101, 
3448.56816636704, 3445.33162798595, 3442.41228249856, 1384.56532273906, 
1333.48389922898, 1329.54597466529, 1293.2048614532, 1291.13567921318, 
1266.18747131352, 1251.44702553637, 1229.9373063931, 1204.49733058435, 
1196.79647629317, 4888.53608836612, 4844.62387372193, 4803.08591833927, 
4801.60498804064, 4704.34332374882, 4680.54098917638, 4668.51868809073, 
4654.6363768553, 4644.53632493487, 4643.81716614074, 2104.44279848771, 
2051.91548567208, 1971.01987723135, 1968.1818651747, 1941.36173402858, 
1821.13874693704, 1787.05712946341, 1728.7523034795, 1711.6599582643, 
1711.26332938833, 2962.12603781126, 2758.18375042302, 2638.16680094079, 
2620.07541962536, 2537.2896087047, 2435.36956544536, 2387.13935841906, 
2337.29884997719, 2311.2451915426, 2128.725307006, 3285.38489328741, 
3069.01874851731, 3022.56013472619, 2983.45678085374, 2964.79056150505, 
2950.29516615634, 2893.7887214778, 2891.41381948125, 2875.40895460188, 
2858.05344865649, 2783.70950776927, 2737.88858084566, 2708.73493567958, 
2377.99196863673, 2281.78751440853, 2145.29897799, 2089.46703313723, 
2077.00426240616, 2070.33177217184, 2049.00134439134, 3158.65427762739, 
2993.45396316058, 2807.36027869234, 2783.56931762011, 2666.31559591767, 
2472.0977029947, 2422.62741443588, 2309.43925461481, 2238.0777055497, 
2215.70702972594, 3027.3451404511, 2939.74912223882, 2694.04395500259, 
2507.39045396954, 2407.18406139123, 2380.65584862485, 2211.83581518875, 
2209.41411415371, 2149.92996548155, 2111.76895702472, 4316.4227070348, 
4280.25335061696, 4102.01504953757, 3910.32665898991, 3907.5798288054, 
3840.18882533614, 3780.57141700037, 3757.01659494412, 3685.84648926922, 
3616.99224871103, 4519.50952669406, 4415.97080261725, 4170.40873917108, 
3963.46358118485, 3631.2974118135, 3386.67029828899, 3026.47679955977, 
2844.36034968535, 2835.94178377956, 2183.62863550565, 3785.05815189249, 
3767.7193092587, 3758.58340817543, 3747.60526193027, 2325.20093650829, 
2316.21996448563, 2253.78988549461, 2225.2075463753, 2159.05742315915, 
2142.51258891406)), .Names = c("Cell.Type", "Score"), class = "data.frame", row.names = c("1", 
"2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13", 
"14", "15", "16", "17", "18", "19", "20", "21", "22", "23", "24", 
"25", "26", "27", "28", "29", "30", "31", "32", "33", "34", "35", 
"36", "37", "38", "39", "40", "41", "42", "43", "44", "45", "46", 
"47", "48", "49", "50", "51", "52", "53", "54", "55", "56", "57", 
"58", "59", "60", "61", "62", "63", "64", "65", "66", "67", "68", 
"69", "70", "71", "72", "73", "74", "75", "76", "77", "78", "79", 
"80", "81", "82", "83", "84", "85", "86", "87", "88", "89", "90", 
"91", "92", "93", "94", "95", "96", "97", "98", "99", "100", 
"101", "102", "103", "104", "105", "106", "107", "108", "109", 
"110", "111", "112", "113", "114", "115", "116", "117", "118", 
"119", "120", "121", "122", "123", "124", "125", "126", "127", 
"128", "129", "130"))

Upvotes: 1

Related Questions