Reputation: 563
I would like to subset columns in a data frame in R based on the specific row of the first column into other data frames
df1 <- data.frame(Ensembl_ID = c("ENSG00000000003", "ENSG00000000005", "ENSG00000000419", "ENSG00000000457", "ENSG00000000460", "ENSG00000000938", "ENSG00000000971", "ENSG00000001036", "ENSG00000001084", "ENSG00000001167" ), logFC.1 = c(0.147447019707984, -0.278643924528991, 0.00638502079233481, 0.00248371473862579, 0.0591639590814736, -0.0892578080659792, -0.0139042150604349, 0.15210410748665, -0.0273174541997048, 0.0373813166759115 ), logFC.2 = c(0.14237211045168, -0.153847067952652, 0.00806519294435945, -0.0243298183425441, 0.0639184480028851, -0.0791126460573967, -0.0517704622015086, 0.100033161692714, 0.105136768894399, 0.0509474174745926 ), logFC.3 = c(0.0692402101693023, -0.212626837128185, 0.0665466667502187, 0.0189664498456434, 0.073631371224761, -0.0642014520794086, 0.0115060035255512, 0.104767159584613, 0.140378485980222, 0.0814931176279395), logFC.4 = c(0.175916688982428, -0.0606440302201137, 0.0862627141013101, 0.105179938123113, 0.128866411791584, -0.0988927171791539, 0.128758540724723, 0.0997656895899759, 0.345468063926355, 0.130898388184307), logFC.5 = c(0.144743421921328, 0.247159332221974, 0.0232237466183996, 0.0800788300610377, 0.178887735169961, -0.0592727391427514, -0.0723099661837084, 0.0387715967173523, -0.0607793368610136, 0.110464511693512), logFC.6 = c(0.0848187321362019, -0.299283590551811, 0.0366788808661408, -0.00763280370062748, 0.0145148270035513, -0.0384916970002755, -0.0000335640771631606, 0.0851895375297912, -0.00364050261322463, 0.0602143760128463), logFC.7 = c(0.305256444042024, -0.274308408751318, 0.0977066795857243, -0.0265659018074027, 0.136348613124811, -0.0938364533000299, -0.143634179166262, 0.139913812601005, 0.268708965044232, 0.133427360632365), logFC.8 = c(0.12744808339884, -0.285015311267508, 0.0459140048745496, -0.00976012971218515, 0.13292412700208, -0.184687147498946, -0.0411558715447517, 0.165717944056239, 0.323358546432839, 0.0502386767987279), logFC.9 = c(0.286824598926274, 0.095530985319937, 0.101370835445593, 0.0352336819150421, 0.0573659992830985, -0.0739779010955875, 0.00466993628480923, 0.0486643748696862, 0.0322601740536419, 0.0873158516027886))
I would like to select the columns which have "ENSG00000000005" row values less than -0.1 (<-0.1) so logFC.5, logFC.9 column will be excluded in output.
df1 <- data.frame(Ensembl_ID = c("ENSG00000000003", "ENSG00000000005", "ENSG00000000419", "ENSG00000000457", "ENSG00000000460", "ENSG00000000938", "ENSG00000000971", "ENSG00000001036", "ENSG00000001084", "ENSG00000001167" ), logFC.1 = c(0.147447019707984, -0.278643924528991, 0.00638502079233481, 0.00248371473862579, 0.0591639590814736, -0.0892578080659792, -0.0139042150604349, 0.15210410748665, -0.0273174541997048, 0.0373813166759115 ), logFC.2 = c(0.14237211045168, -0.153847067952652, 0.00806519294435945, -0.0243298183425441, 0.0639184480028851, -0.0791126460573967, -0.0517704622015086, 0.100033161692714, 0.105136768894399, 0.0509474174745926 ), logFC.3 = c(0.0692402101693023, -0.212626837128185, 0.0665466667502187, 0.0189664498456434, 0.073631371224761, -0.0642014520794086, 0.0115060035255512, 0.104767159584613, 0.140378485980222, 0.0814931176279395), logFC.4 = c(0.175916688982428, -0.0606440302201137, 0.0862627141013101, 0.105179938123113, 0.128866411791584, -0.0988927171791539, 0.128758540724723, 0.0997656895899759, 0.345468063926355, 0.130898388184307), logFC.6 = c(0.0848187321362019, -0.299283590551811, 0.0366788808661408, -0.00763280370062748, 0.0145148270035513, -0.0384916970002755, -0.0000335640771631606, 0.0851895375297912, -0.00364050261322463, 0.0602143760128463), logFC.7 = c(0.305256444042024, -0.274308408751318, 0.0977066795857243, -0.0265659018074027, 0.136348613124811, -0.0938364533000299, -0.143634179166262, 0.139913812601005, 0.268708965044232, 0.133427360632365), logFC.8 = c(0.12744808339884, -0.285015311267508, 0.0459140048745496, -0.00976012971218515, 0.13292412700208, -0.184687147498946, -0.0411558715447517, 0.165717944056239, 0.323358546432839, 0.0502386767987279))
The expected output would like to be
Some ideas?
Cheers!
Upvotes: 0
Views: 111
Reputation: 3755
With BaseR
df1[,c(TRUE,df1[df1$Ensembl_ID=="ENSG00000000005",-1 ]< -0.1)]
gives,
Ensembl_ID logFC.1 logFC.2 logFC.3 logFC.6 logFC.7 logFC.8
1 ENSG00000000003 0.147447020 0.142372110 0.06924021 8.481873e-02 0.30525644 0.12744808
2 ENSG00000000005 -0.278643925 -0.153847068 -0.21262684 -2.992836e-01 -0.27430841 -0.28501531
3 ENSG00000000419 0.006385021 0.008065193 0.06654667 3.667888e-02 0.09770668 0.04591400
4 ENSG00000000457 0.002483715 -0.024329818 0.01896645 -7.632804e-03 -0.02656590 -0.00976013
5 ENSG00000000460 0.059163959 0.063918448 0.07363137 1.451483e-02 0.13634861 0.13292413
6 ENSG00000000938 -0.089257808 -0.079112646 -0.06420145 -3.849170e-02 -0.09383645 -0.18468715
7 ENSG00000000971 -0.013904215 -0.051770462 0.01150600 -3.356408e-05 -0.14363418 -0.04115587
8 ENSG00000001036 0.152104107 0.100033162 0.10476716 8.518954e-02 0.13991381 0.16571794
9 ENSG00000001084 -0.027317454 0.105136769 0.14037849 -3.640503e-03 0.26870897 0.32335855
10 ENSG00000001167 0.037381317 0.050947417 0.08149312 6.021438e-02 0.13342736 0.05023868
Note that column4 also should be excluded since -0.06064403 > -0.1
Upvotes: 2