Shaxi Liver
Shaxi Liver

Reputation: 1120

Subset rows with similar strings in one of the column and plot them together

I would like to group/subset the rows which have the same "base" string in one of the column and plot them on one graph. It would be great to have everything in one pdf file.. Each graph on separate page of pdf.

Data:

structure(list(`10` = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), `34` = c(0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 370500, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1091361.9, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1512409.6, 
0, 0, 0, 0, 0, 0), `59` = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 4231358.2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 5995680.4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 2266775, 0, 0, 0, 0, 0, 0, 6864490.1, 0, 0, 
0, 0, 0, 0), `84` = c(0, 0, 0, 0, 1783350, 0, 0, 0, 1177650, 
0, 0, 0, 0, 0, 0, 0, 0, 4316664.7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 9262556.7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 2831286.1, 0, 0, 0, 0, 0, 0, 10643218.2, 
0, 0, 0, 0, 0, 0), `110` = c(0, 0, 0, 0, 1778743.3, 0, 0, 0, 
1465966.7, 0, 0, 0, 0, 0, 0, 0, 0, 3111700, 0, 0, 1955337.5, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5584784.4, 5584784.4, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3092525, 0, 
0, 0, 0, 0, 0, 7847143.8, 0, 0, 0, 0, 0, 0), `134` = c(0, 0, 
0, 0, 1121869.4, 0, 0, 0, 1439430.6, 0, 0, 0, 0, 0, 0, 0, 0, 
2854250, 0, 0, 0, 0, 0, 0, 914890, 0, 0, 847880, 0, 0, 0, 0, 
0, 0, 0, 8191800, 0, 0, 0, 0, 0, 0, 1830904.5, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 1650150, 0, 0, 837130, 0, 0, 0, 4925095.1, 0, 
0, 0, 0, 0, 0), `165` = c(0, 0, 0, 0, 1432775, 0, 0, 0, 1394186.1, 
0, 1120183.3, 0, 0, 0, 0, 0, 0, 2262421.7, 0, 0, 0, 615660, 0, 
0, 1292795.8, 0, 0, 712622.5, 0, 0, 0, 0, 0, 0, 0, 2683469.4, 
0, 0, 0, 0, 0, 0, 2318485.5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1561800, 
0, 0, 0, 0, 0, 0, 4382993.7, 0, 0, 763460, 0, 0, 0), `199` = c(0, 
0, 0, 0, 1314220, 0, 0, 0, 1439718.8, 0, 1929266.7, 0, 0, 0, 
1101800, 0, 0, 2759366.7, 0, 0, 0, 1291728.6, 0, 0, 2489775.6, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2858345.8, 0, 0, 0, 1819542.1, 
0, 0, 1497640.3, 0, 0, 0, 1300250, 0, 0, 0, 0, 0, 0, 1566875, 
0, 0, 0, 0, 0, 0, 4625895.6, 0, 0, 1308158.3, 0, 0, 0), `234` = c(1257250, 
0, 0, 0, 0, 0, 0, 0, 1276080, 0, 1848500, 0, 0, 0, 1529350, 0, 
0, 2155275, 0, 0, 0, 2023041.9, 0, 0, 1966447.7, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 1184200, 1184200, 0, 0, 1652350, 0, 0, 2018581.7, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1835225, 0, 0, 0, 0, 0, 0, 4639414.7, 
0, 0, 720715, 0, 0, 0), `257` = c(0, 0, 0, 0, 0, 669442.5, 0, 
0, 1253026.7, 0, 960410, 960410, 0, 0, 1258267.5, 0, 0, 1707392.5, 
0, 0, 0, 563280, 0, 0, 2403237.9, 0, 0, 0, 1044100, 0, 2075700, 
0, 0, 0, 0, 0, 5718450, 0, 0, 1704550, 0, 0, 1350286.9, 0, 0, 
0, 0, 2011700, 0, 0, 0, 0, 0, 1739500, 0, 0, 0, 0, 0, 0, 4612520.8, 
4612520.8, 0, 0, 0, 0, 0), `362` = c(0, 1593500, 0, 0, 0, 1610625.3, 
0, 0, 1234902.5, 0, 0, 1481036.8, 0, 0, 1583647.5, 0, 0, 1752089.2, 
0, 0, 0, 0, 0, 0, 2410809.2, 0, 0, 0, 654940, 0, 0, 0, 0, 0, 
0, 0, 7014905.6, 0, 0, 0, 0, 0, 1165672.1, 0, 0, 0, 0, 0, 0, 
0, 1029910, 0, 0, 2153087.5, 0, 0, 0, 422920, 0, 0, 0, 7495855.9, 
0, 0, 0, 0, 0), `433` = c(0, 0, 0, 0, 0, 1340283.9, 0, 0, 1268996.9, 
0, 0, 1416683.3, 0, 0, 1047862.5, 0, 0, 1819653.8, 0, 0, 0, 0, 
0, 0, 2227565.7, 0, 0, 0, 763765, 0, 0, 1595430, 0, 0, 0, 0, 
4894549, 0, 0, 0, 0, 0, 1061375.4, 0, 0, 0, 0, 0, 2251950, 0, 
1042130, 0, 0, 2055300, 0, 0, 0, 696278.3, 0, 0, 0, 5353797.8, 
0, 0, 0, 0, 0), `506` = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2020300, 
2020300, 0, 0, 0, 0, 0, 0, 7681526, 0, 0, 0, 0, 0), `581` = c(0, 
0, 1749237.5, 0, 0, 0, 2421665.8, 0, 0, 1773262.5, 0, 0, 2251004.3, 
0, 0, 2570175, 0, 0, 3379756.9, 0, 0, 0, 2054455.6, 0, 0, 2518270.8, 
0, 0, 0, 0, 0, 0, 2917968.2, 0, 0, 0, 0, 7004350, 0, 0, 1451600, 
0, 0, 1394411, 0, 0, 0, 0, 0, 2507858.3, 0, 2377012.5, 0, 0, 
3719165.4, 0, 0, 0, 1472870.3, 0, 0, 9666916.1, 0, 0, 1730300, 
0, 0), `652` = c(0, 0, 476910, 476910, 0, 0, 1149078.8, 1149078.8, 
0, 1082468.7, 0, 0, 882769.7, 0, 0, 1370449.4, 1370449.4, 0, 
1529049, 1529049, 0, 0, 943632.2, 0, 0, 916587.8, 0, 0, 0, 988261.1, 
0, 0, 1778007.1, 1778007.1, 0, 0, 0, 3087304.8, 3087304.8, 0, 
782860, 782860, 0, 510158.5, 510158.5, 0, 0, 0, 0, 1503750, 0, 
1100677.5, 1100677.5, 0, 1669260, 1669260, 0, 0, 770733.2, 0, 
0, 4939242.8, 4939242.8, 0, 643564.4, 643564.4, 0), `733` = c(0, 
0, 0, 1095060, 0, 0, 0, 1674089.3, 0, 1252101.3, 0, 0, 1259111, 
0, 0, 0, 2429293.3, 0, 0, 2326928.3, 0, 0, 1259216.5, 0, 0, 1238837.5, 
0, 0, 0, 1224858.3, 0, 0, 0, 2952529.9, 0, 0, 0, 0, 4626414.7, 
0, 0, 1121440, 0, 0, 1025386.2, 0, 0, 0, 0, 1917900, 0, 0, 2197533.3, 
0, 0, 2840155.5, 0, 0, 1054285.7, 0, 0, 0, 7516814.2, 0, 0, 1329434.4, 
0), `818` = c(0, 0, 0, 720551.1, 0, 0, 0, 714662.7, 0, 617012.9, 
0, 0, 549850.8, 0, 0, 0, 1197460, 0, 0, 771979.2, 0, 0, 585847.5, 
585847.5, 0, 875475.4, 0, 0, 0, 576774, 0, 0, 0, 1147389.8, 0, 
0, 0, 0, 2292421.7, 0, 0, 755258.3, 0, 0, 0, 0, 0, 0, 0, 858930, 
0, 0, 1242668.3, 0, 0, 1580088.3, 0, 0, 641938.6, 641938.6, 0, 
0, 3838660.4, 0, 0, 733140.8, 733140.8), `896` = c(0, 0, 0, 590480, 
0, 0, 0, 817087.6, 0, 569869.5, 0, 0, 650822.5, 650822.5, 0, 
0, 1624052.5, 0, 0, 682570.8, 0, 0, 0, 1538800, 0, 690488.6, 
690488.6, 0, 0, 797923.9, 0, 0, 0, 1204889.3, 0, 0, 0, 0, 2184432.2, 
0, 0, 676654.7, 0, 0, 0, 210680, 0, 0, 0, 791152.5, 0, 0, 1599855.8, 
0, 0, 1358543.8, 0, 0, 0, 931288, 0, 0, 4683895.2, 0, 0, 0, 1202806
), `972` = c(0, 0, 0, 799116.4, 0, 0, 0, 759169.9, 0, 408845, 
0, 0, 0, 948980, 0, 0, 968766.7, 0, 0, 675349.7, 0, 0, 0, 0, 
0, 0, 1811117.6, 0, 0, 609098.5, 0, 0, 0, 1073749.1, 0, 0, 0, 
0, 2392258.9, 0, 0, 743580, 0, 0, 0, 1020485, 0, 0, 0, 446596.7, 
0, 0, 1178583, 0, 0, 1438261.7, 0, 0, 0, 1133057.9, 0, 0, 4445814.7, 
0, 0, 0, 1057776.9), `1039` = c(0, 0, 0, 447255.3, 0, 0, 0, 609409.1, 
0, 304340, 0, 0, 0, 0, 0, 0, 694232.8, 0, 0, 473015.3, 0, 0, 
0, 0, 0, 0, 419524.9, 0, 0, 447760.6, 0, 0, 0, 932513.5, 0, 0, 
0, 0, 1251960.5, 0, 0, 276560, 0, 0, 0, 259640, 0, 0, 0, 354995, 
0, 0, 1570222.5, 0, 0, 1021822, 0, 0, 0, 811614, 0, 0, 2941698.2, 
0, 0, 0, 1199942.5), Gene = c("AT1G04170_1", "AT1G04170_2", "AT1G04170_3", 
"AT1G04170_4", "AT1G08520_1", "AT1G08520_2", "AT1G08520_3", "AT1G08520_4", 
"AT1G10670_1", "AT1G10670_2", "AT1G53500_1", "AT1G53500_2", "AT1G53500_3", 
"AT1G53500_4", "AT1G54270_1", "AT1G54270_2", "AT1G54270_3", "AT1G80480_1", 
"AT1G80480_2", "AT1G80480_3", "AT2G16950_1", "AT2G16950_2", "AT2G16950_3", 
"AT2G16950_4", "AT3G03960_1", "AT3G03960_2", "AT3G03960_3", "AT3G57290_1", 
"AT3G57290_2", "AT3G57290_3", "AT3G63460_1", "AT3G63460_2", "AT3G63460_3", 
"AT3G63460_4", "AT4G20890_1", "AT4G20890_2", "AT4G20890_3", "AT4G20890_4", 
"AT4G20890_5", "AT4G20980_1", "AT4G20980_2", "AT4G20980_3", "AT4G24190_1", 
"AT4G24190_2", "AT4G24190_3", "AT4G24190_4", "AT4G29670_1", "AT4G29670_2", 
"AT4G29670_3", "AT4G29670_4", "AT5G23740_1", "AT5G23740_2", "AT5G23740_3", 
"AT5G23860_1", "AT5G23860_2", "AT5G23860_3", "AT5G40450_1", "AT5G40450_2", 
"AT5G40450_3", "AT5G40450_4", "AT5G62700_1", "AT5G62700_2", "AT5G62700_3", 
"ATCG00780_1", "ATCG00780_2", "ATCG00780_3", "ATCG00780_4")), .Names = c("10", 
"34", "59", "84", "110", "134", "165", "199", "234", "257", "362", 
"433", "506", "581", "652", "733", "818", "896", "972", "1039", 
"Gene"), row.names = c("AT1G04170_1", "AT1G04170_2", "AT1G04170_3", 
"AT1G04170_4", "AT1G08520_1", "AT1G08520_2", "AT1G08520_3", "AT1G08520_4", 
"AT1G10670_1", "AT1G10670_2", "AT1G53500_1", "AT1G53500_2", "AT1G53500_3", 
"AT1G53500_4", "AT1G54270_1", "AT1G54270_2", "AT1G54270_3", "AT1G80480_1", 
"AT1G80480_2", "AT1G80480_3", "AT2G16950_1", "AT2G16950_2", "AT2G16950_3", 
"AT2G16950_4", "AT3G03960_1", "AT3G03960_2", "AT3G03960_3", "AT3G57290_1", 
"AT3G57290_2", "AT3G57290_3", "AT3G63460_1", "AT3G63460_2", "AT3G63460_3", 
"AT3G63460_4", "AT4G20890_1", "AT4G20890_2", "AT4G20890_3", "AT4G20890_4", 
"AT4G20890_5", "AT4G20980_1", "AT4G20980_2", "AT4G20980_3", "AT4G24190_1", 
"AT4G24190_2", "AT4G24190_3", "AT4G24190_4", "AT4G29670_1", "AT4G29670_2", 
"AT4G29670_3", "AT4G29670_4", "AT5G23740_1", "AT5G23740_2", "AT5G23740_3", 
"AT5G23860_1", "AT5G23860_2", "AT5G23860_3", "AT5G40450_1", "AT5G40450_2", 
"AT5G40450_3", "AT5G40450_4", "AT5G62700_1", "AT5G62700_2", "AT5G62700_3", 
"ATCG00780_1", "ATCG00780_2", "ATCG00780_3", "ATCG00780_4"), class = "data.frame")

I would like to subset rows with the same "basis" which means string before dash.

I was trying to combine subset and grep function but it works for me only if I give the string manually. It would be quite a lot of work do to it string by string.

Upvotes: 0

Views: 60

Answers (1)

Steven Beaupré
Steven Beaupré

Reputation: 21621

You could gather() your data into long format, separate() the Gene column into label and number and, for better plot aesthetics, replace the 0 values with NAs in the y column using na_if() and specify the order of the x axis using factor()

library(dplyr)
library(tidyr)
long_df <- df %>%
  gather(x, y, -Gene) %>%
  separate(Gene, into = c("label", "number")) %>%
  mutate(y = na_if(y, 0),
         x = factor(x, levels = unique(x)))

Then, based on this answer, you could create the plot p and iterate through the groups using dplyr's do() or purrr's by_slice()

library(ggplot2)
p = ggplot(data = long_df, aes(x = x, y = y, color = number)) + geom_point()

# Using dplyr's do()
res <- long_df %>%
  group_by(label) %>%
  do(plots = p %+% . + facet_wrap(~label))

# Using purrr's by_slice()
library(purrr)
res <- long_df %>%
  slice_rows("label") %>%
  by_slice(~(p %+% . + facet_wrap(~label)), .to = "plots")

Which gives:

#Source: local data frame [19 x 2]
#Groups: <by row>
#
## A tibble: 19 × 2
#       label    plots
#*      <chr>   <list>
#1  AT1G04170 <S3: gg>
#2  AT1G08520 <S3: gg>
#3  AT1G10670 <S3: gg>
#4  AT1G53500 <S3: gg>
#5  AT1G54270 <S3: gg>
#6  AT1G80480 <S3: gg>
#7  AT2G16950 <S3: gg>
#8  AT3G03960 <S3: gg>
#9  AT3G57290 <S3: gg>
#10 AT3G63460 <S3: gg>
#11 AT4G20890 <S3: gg>
#12 AT4G20980 <S3: gg>
#13 AT4G24190 <S3: gg>
#14 AT4G29670 <S3: gg>
#15 AT5G23740 <S3: gg>
#16 AT5G23860 <S3: gg>
#17 AT5G40450 <S3: gg>
#18 AT5G62700 <S3: gg>
#19 ATCG00780 <S3: gg>

You can then access each plot, for example:

res$plots[1]

Which gives:

enter image description here


To save all plots into a pdf, simply do:

pdf()
res$plots
dev.off()

Upvotes: 3

Related Questions