Reputation: 573
I have a dataframe like this:
df5 <- data.frame(
Ensembl_ID = c("ENSG00000000003", "ENSG00000000005", "ENSG00000000419",
"ENSG00000000457", "ENSG00000000460", "ENSG00000000938",
"ENSG00000000971", "ENSG00000001036", "ENSG00000001084",
"ENSG00000001167" ),
`logFC 1` = c(-0.834165161710272, 1.02199443531549,
-0.558658947885705, -0.390114219973209, -1.23551839713296,
3.11429434221998, 0.283932163407262, -1.16908518620064,
-0.597054772455507, -0.593624543273255),
`logFC 2` = c(-1.18531035488942, 0.423719727339646, -1.23261719368372,
0.0855281133529292, -1.52366830232278, 3.36692586561211,
1.00323690950956, -0.000211248816114964, -4.74738483548391,
-0.318176231083024),
`logFC 3` = c(-0.262659255267546, 1.3962481061442, -0.548673555705647,
-0.0149651083306594, -1.45458689193089, 2.54126941463459,
1.17711308509307, -1.19425284921181, 1.17788731755683,
-0.367897054652365 ),
`logFC 4` = c(-0.840752912305256, 0.536548846040064, -0.277409459604357,
-0.241073614962264, -0.875313153342293, 1.61789645804321,
0.412287101096504, -1.11846661523232, -2.6274528854429,
-0.760452698231182),
`logFC 5` = c(-0.968784779247286, -0.502809694119192, -0.231526399163731,
-0.530038395734114, -0.706006018337411, 3.58264357077653,
-0.127521010699219, 0.270523387217103, 1.68335644352003,
-0.314902131571829),
`logFC 6` = c(-0.481754175843152, -0.440784040523259, -0.532975340622715,
-0.182089795101371, -0.564807490336052, 1.74119896504534,
-0.96169805631325, -0.721782763145306, -0.433459827401695,
-0.727495835245995 ),
`logFC 7` = c(-0.889343429110847, 1.07937149728343, -0.215144871523998,
-0.92234350748557, -0.832108253417702, 2.02456082994848,
-0.0434322861759954, -0.523126561938426, -0.556984056084809,
-0.740331742513503),
`logFC 8` = c(-0.858141567384178, 1.87728717064375, -0.381047638414538,
-0.613568289061259, -1.92838339196505, 2.23393705735665,
0.635389543483408, -0.466053620529111, -1.50483745357134,
-1.33400859143521),
`logFC 9` = c(-0.486388736112514, 0.789390852922639, -0.869434195504952,
-0.70405854858187, -1.16488184095428, 2.91497178849082,
-2.10331904053714, -0.571130459068143, -0.219526004620518,
-0.301435496557957)
)
I want to find the repetition of value in every row which is in the range of < -0.2, -0.2 to 0.2, and 0.2 to 1, and add the number of repetition to the last columns.
When I try to find it using the below code, it returns the wrong numbers
df5$sumFP <- rowSums(df5[1:10,-1] < -0.2) #for less than -0.2
df5$sumTP <- rowSums(df5[1:10,-1] > 0.2) #for range of -0.2 to 0.2
df5$sumFN <- rowSums(df5[1:10,-1] < 0.2 & df5[1:10,-1] > -0.2) #for range of 0.2 to 1
Any idea why? or any other way to do it?
Please help me. Thanks in advance
Upvotes: 1
Views: 74
Reputation: 9878
We can use dplyr with mutate and a rowSums call for every condition:
df5 %>% mutate(lower=rowSums(across(-Ensembl_ID, ~.x<(-0.2))),
intermediate=rowSums(across(-Ensembl_ID, ~.x>=(-0.2) & .x<=0.2)),
greater=rowSums(across(-Ensembl_ID, ~.x> 0.2 & .x<1)))
Ensembl_ID logFC.1 logFC.2 logFC.3 logFC.4 logFC.5 logFC.6
1 ENSG00000000003 -0.8341652 -1.1853103549 -0.26265926 -0.8407529 -0.9687848 -0.4817542
2 ENSG00000000005 1.0219944 0.4237197273 1.39624811 0.5365488 -0.5028097 -0.4407840
3 ENSG00000000419 -0.5586589 -1.2326171937 -0.54867356 -0.2774095 -0.2315264 -0.5329753
4 ENSG00000000457 -0.3901142 0.0855281134 -0.01496511 -0.2410736 -0.5300384 -0.1820898
5 ENSG00000000460 -1.2355184 -1.5236683023 -1.45458689 -0.8753132 -0.7060060 -0.5648075
6 ENSG00000000938 3.1142943 3.3669258656 2.54126941 1.6178965 3.5826436 1.7411990
7 ENSG00000000971 0.2839322 1.0032369095 1.17711309 0.4122871 -0.1275210 -0.9616981
8 ENSG00000001036 -1.1690852 -0.0002112488 -1.19425285 -1.1184666 0.2705234 -0.7217828
9 ENSG00000001084 -0.5970548 -4.7473848355 1.17788732 -2.6274529 1.6833564 -0.4334598
10 ENSG00000001167 -0.5936245 -0.3181762311 -0.36789705 -0.7604527 -0.3149021 -0.7274958
logFC.7 logFC.8 logFC.9 lower intermediate greater
1 -0.88934343 -0.8581416 -0.4863887 9 0 0
2 1.07937150 1.8772872 0.7893909 2 0 3
3 -0.21514487 -0.3810476 -0.8694342 9 0 0
4 -0.92234351 -0.6135683 -0.7040585 6 3 0
5 -0.83210825 -1.9283834 -1.1648818 9 0 0
6 2.02456083 2.2339371 2.9149718 0 1 0
7 -0.04343229 0.6353895 -2.1033190 2 2 3
8 -0.52312656 -0.4660536 -0.5711305 7 1 1
9 -0.55698406 -1.5048375 -0.2195260 7 0 0
10 -0.74033174 -1.3340086 -0.3014355 9 0 0
We can also do it with purrr:map_dfc
and dplyr::bind_cols
:
map_dfc(list(lower = \(x) x < (-0.2),
intermediate = \(x) x >= (-0.2) & x <= 0.2,
greater = \(x) x > 0.2 & x <1),
~.x(df5) %>% rowSums) %>%
bind_cols(df5, .)
Upvotes: 3
Reputation: 79276
pivot_longer
and then summarise
by condition and finally cbind
:library(dplyr)
library(tidyr)
df5 %>%
pivot_longer(
cols=-Ensembl_ID,
names_to = "names",
values_to = "values"
) %>%
group_by(Ensembl_ID) %>%
summarise(lesser =sum(values < -0.2),
between = sum(values >= -0.2 & values < 0.2),
greater = sum(values >= 0.2 & values < 1)) %>%
cbind(df5[,2:10])
Ensembl_ID lesser between greater logFC.1 logFC.2 logFC.3 logFC.4 logFC.5 logFC.6 logFC.7 logFC.8 logFC.9
1 ENSG00000000003 9 0 0 -0.8341652 -1.1853103549 -0.26265926 -0.8407529 -0.9687848 -0.4817542 -0.88934343 -0.8581416 -0.4863887
2 ENSG00000000005 2 0 3 1.0219944 0.4237197273 1.39624811 0.5365488 -0.5028097 -0.4407840 1.07937150 1.8772872 0.7893909
3 ENSG00000000419 9 0 0 -0.5586589 -1.2326171937 -0.54867356 -0.2774095 -0.2315264 -0.5329753 -0.21514487 -0.3810476 -0.8694342
4 ENSG00000000457 6 3 0 -0.3901142 0.0855281134 -0.01496511 -0.2410736 -0.5300384 -0.1820898 -0.92234351 -0.6135683 -0.7040585
5 ENSG00000000460 9 0 0 -1.2355184 -1.5236683023 -1.45458689 -0.8753132 -0.7060060 -0.5648075 -0.83210825 -1.9283834 -1.1648818
6 ENSG00000000938 0 0 0 3.1142943 3.3669258656 2.54126941 1.6178965 3.5826436 1.7411990 2.02456083 2.2339371 2.9149718
7 ENSG00000000971 2 2 3 0.2839322 1.0032369095 1.17711309 0.4122871 -0.1275210 -0.9616981 -0.04343229 0.6353895 -2.1033190
8 ENSG00000001036 7 1 1 -1.1690852 -0.0002112488 -1.19425285 -1.1184666 0.2705234 -0.7217828 -0.52312656 -0.4660536 -0.5711305
9 ENSG00000001084 7 0 0 -0.5970548 -4.7473848355 1.17788732 -2.6274529 1.6833564 -0.4334598 -0.55698406 -1.5048375 -0.2195260
10 ENSG00000001167 9 0 0 -0.5936245 -0.3181762311 -0.36789705 -0.7604527 -0.3149021 -0.7274958 -0.74033174 -1.3340086 -0.3014355
Upvotes: 2
Reputation: 73782
How about this.
x <- df5[1:10,-1]
rowSums(x < -0.2)
# 1 2 3 4 5 6 7 8 9 10
# 9 2 9 6 9 0 2 7 7 9
rowSums(-0.2 <= x & x < 0.2)
# 1 2 3 4 5 6 7 8 9 10
# 0 0 0 3 0 0 2 1 0 0
rowSums(0.2 <= x & x <= 1)
# 1 2 3 4 5 6 7 8 9 10
# 0 3 0 0 0 0 3 1 0 0
Upvotes: 2