PesKchan
PesKchan

Reputation: 968

Filter NA from different column and create new data-frame

I have a sample dataframe like this

dput(test)
structure(list(ENSEMBL = structure(c(1L, 2L, 3L, 4L, 5L, 6L, 
6L, 7L, 8L, 8L), .Label = c("ENSG00000000457", "ENSG00000000971", 
"ENSG00000001084", "ENSG00000001497", "ENSG00000001561", "ENSG00000001626", 
"ENSG00000002016", "ENSG00000002745"), class = "factor"), log2FoldChange_Expression_Expression = c(NA, 
NA, NA, -2.241, NA, NA, -2.14533419363106, NA, NA, -1.9), log2FoldChange_Region_Region = c(NA, 
-2.12428956535999, -1.90310925611108, -2.1197276635241, -1.56131421185801, 
-2.01286954861636, NA, -1.9307944731461, -1.67126804236843, -1.6944353089338
), Peak_Region_Region = structure(c(1L, 2L, 4L, 9L, 3L, 6L, 5L, 
10L, 7L, 8L), .Label = c("Peak13482", "Peak15476", "Peak177970", 
"Peak178509", "Peak197012", "Peak197013", "Peak197235", "Peak197237", 
"Peak224709", "Peak42732"), class = "factor")), class = "data.frame", row.names = c(NA, 
-10L))

So I have three condition to use

test
           ENSEMBL log2FoldChange_Expression_Expression log2FoldChange_Region_Region Peak_Region_Region
1  ENSG00000000457                                   NA                           NA          Peak13482
2  ENSG00000000971                                   NA                    -2.124290          Peak15476
3  ENSG00000001084                                   NA                    -1.903109         Peak178509
4  ENSG00000001497                            -2.241000                    -2.119728         Peak224709
5  ENSG00000001561                                   NA                    -1.561314         Peak177970
6  ENSG00000001626                                   NA                    -2.012870         Peak197013
7  ENSG00000001626                            -2.145334                           NA         Peak197012
8  ENSG00000002016                                   NA                    -1.930794          Peak42732
9  ENSG00000002745                                   NA                    -1.671268         Peak197235
10 ENSG00000002745                            -1.900000                    -1.694435         Peak197237

Suggestion or help would be really appreciated

Upvotes: 0

Views: 91

Answers (2)

Ronak Shah
Ronak Shah

Reputation: 388982

You may use if_all and if_any -

library(dplyr)

test1 <- test %>% filter(if_all(contains('log2FoldChange'), Negate(is.na)))
test2 <- test %>% filter(if_any(contains('log2FoldChange'), Negate(is.na)))

test1
#          ENSEMBL log2FoldChange_Expression_Expression log2FoldChange_Region_Region Peak_Region_Region
#1 ENSG00000001497                               -2.241                    -2.119728         Peak224709
#2 ENSG00000002745                               -1.900                    -1.694435         Peak197237

test2
#          ENSEMBL log2FoldChange_Expression_Expression log2FoldChange_Region_Region Peak_Region_Region
#1 ENSG00000000971                                   NA                    -2.124290          Peak15476
#2 ENSG00000001084                                   NA                    -1.903109         Peak178509
#3 ENSG00000001497                            -2.241000                    -2.119728         Peak224709
#4 ENSG00000001561                                   NA                    -1.561314         Peak177970
#5 ENSG00000001626                                   NA                    -2.012870         Peak197013
#6 ENSG00000001626                            -2.145334                           NA         Peak197012
#7 ENSG00000002016                                   NA                    -1.930794          Peak42732
#8 ENSG00000002745                                   NA                    -1.671268         Peak197235
#9 ENSG00000002745                            -1.900000                    -1.694435         Peak197237

Upvotes: 1

Samet S&#246;kel
Samet S&#246;kel

Reputation: 2670

library(dplyr)

condition1 <- test %>%
rowwise %>%
filter(all(!is.na(across(starts_with('log2FoldChange'))))) %>%
ungroup 

condition2 <- test %>%
rowwise %>%
filter(any(!is.na(across(starts_with('log2FoldChange'))))) %>%
ungroup 

condition1 output;

  ENSEMBL         log2FoldChange_Expressi… log2FoldChange_Regi… Peak_Region_Reg…
  <fct>                              <dbl>                <dbl> <fct>           
1 ENSG00000001497                    -2.24                -2.12 Peak224709      
2 ENSG00000002745                    -1.9                 -1.69 Peak197237   

condition2 output;

  ENSEMBL         log2FoldChange_Expressi… log2FoldChange_Regi… Peak_Region_Reg…
  <fct>                              <dbl>                <dbl> <fct>           
1 ENSG00000000971                    NA                   -2.12 Peak15476       
2 ENSG00000001084                    NA                   -1.90 Peak178509      
3 ENSG00000001497                    -2.24                -2.12 Peak224709      
4 ENSG00000001561                    NA                   -1.56 Peak177970      
5 ENSG00000001626                    NA                   -2.01 Peak197013      
6 ENSG00000001626                    -2.15                NA    Peak197012      
7 ENSG00000002016                    NA                   -1.93 Peak42732       
8 ENSG00000002745                    NA                   -1.67 Peak197235      
9 ENSG00000002745                    -1.9                 -1.69 Peak197237      

Upvotes: 2

Related Questions