Reputation: 757
I have a data like this,
df <- structure(list(Data = structure(c(1L, 2L, 3L, 4L, 5L, 6L, 7L,
9L, 10L, 11L, 13L, 14L, 15L, 16L, 17L, 18L, 19L, 20L, 21L, 22L,
23L, 8L, 12L), .Label = c("A", "B", "C", "D", "E", "F", "G",
"GH", "H", "I", "J", "JJ", "K", "L", "M", "N", "O", "P", "Q",
"S", "T", "U", "V"), class = "factor"), Case1 = c(0.775230796,
0.752114939, 0.738305175, 0.579739531, 0.573781392, 0.572924713,
0.563521221, 0.558172423, 0.557918102, 0.552505171, 0.551921725,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), Case2 = c(NA,
NA, 0.729029032, NA, NA, NA, NA, NA, 0.736282677, 0.702296369,
NA, 0.736060259, 0.735161607, 0.735100052, 0.734870114, 0.732743364,
0.703591649, NA, NA, NA, NA, NA, NA), Case3 = c(NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 0.735568109,
NA, NA, NA, NA, NA), Case4 = c(0.713963088, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 0.781378904,
0.769328289, NA, NA, NA), Case5 = c(NA, NA, NA, NA, 0.693759347,
NA, NA, NA, NA, NA, NA, NA, NA, NA, 0.688396329, NA, NA, NA,
NA, NA, 0.6781535, NA, NA), Case6 = c(0.795781477, 0.793446723,
0.814514206, NA, 0.773564937, NA, NA, 0.783075476, NA, NA, NA,
0.742827684, 0.796443568, NA, 0.75610514, 0.751162004, NA, NA,
NA, NA, NA, 0.738104459, NA), Case7 = c(0.732114731, 0.720226731,
0.782045984, NA, 0.717304483, NA, NA, 0.674992626, NA, NA, NA,
NA, 0.71098987, NA, 0.676952218, NA, NA, NA, NA, NA, NA, NA,
0.676754903)), class = "data.frame", row.names = c(NA, -23L))
I want to recognize which rows have only one value and them remove those that have 1 value or completely NA
I can remove those rows that are completely NA using the following
mydf<- df[rowSums(is.na(df)),]
however, I don't know how to remove those that have no values, or 1 value in each row
Upvotes: 0
Views: 70
Reputation: 1999
To find the entries where all rows are NA
, use the condition
rowSums(is.na(df[,-1])) == ncol(df[,-1])
To find the entries where there is only one value and the rest are NA
, use
rowSums(!is.na(df[,-1])) == 1
Combine them to get rows where either condition is NOT true (i.e. remove rows where condition is true and keep rows where condition is not true). We can do this with
> df[ !(rowSums(is.na(df[,-1])) == ncol(df[,-1]) | rowSums(!is.na(df[,-1])) == 1), ]
Data Case1 Case2 Case3 Case4 Case5 Case6 Case7
1 A 0.7752308 NA NA 0.7139631 NA 0.7957815 0.7321147
2 B 0.7521149 NA NA NA NA 0.7934467 0.7202267
3 C 0.7383052 0.7290290 NA NA NA 0.8145142 0.7820460
5 E 0.5737814 NA NA NA 0.6937593 0.7735649 0.7173045
8 H 0.5581724 NA NA NA NA 0.7830755 0.6749926
9 I 0.5579181 0.7362827 NA NA NA NA NA
10 J 0.5525052 0.7022964 NA NA NA NA NA
12 L NA 0.7360603 NA NA NA 0.7428277 NA
13 M NA 0.7351616 NA NA NA 0.7964436 0.7109899
15 O NA 0.7348701 NA NA 0.6883963 0.7561051 0.6769522
16 P NA 0.7327434 NA NA NA 0.7511620 NA
Upvotes: 1
Reputation: 5138
Here is one way to do it in base.
> # Create a column (or you could just use a vector)
> # The [-1] removes the first column, data, from sum
> df$value_count <- rowSums(!is.na(df[-1]))
>
> # Subset for more than, e.g. 1, values
> df <- df[df$value_count > 1,]
> df
Data Case1 Case2 Case3 Case4 Case5 Case6 Case7 value_count
1 A 0.7752308 NA NA 0.7139631 NA 0.7957815 0.7321147 4
2 B 0.7521149 NA NA NA NA 0.7934467 0.7202267 3
3 C 0.7383052 0.7290290 NA NA NA 0.8145142 0.7820460 4
5 E 0.5737814 NA NA NA 0.6937593 0.7735649 0.7173045 4
8 H 0.5581724 NA NA NA NA 0.7830755 0.6749926 3
9 I 0.5579181 0.7362827 NA NA NA NA NA 2
10 J 0.5525052 0.7022964 NA NA NA NA NA 2
12 L NA 0.7360603 NA NA NA 0.7428277 NA 2
13 M NA 0.7351616 NA NA NA 0.7964436 0.7109899 3
15 O NA 0.7348701 NA NA 0.6883963 0.7561051 0.6769522 4
16 P NA 0.7327434 NA NA NA 0.7511620 NA 2
Or, concisely:
df[rowSums(!is.na(df[-1])) > 1,]
Upvotes: 2