Reputation: 2067
My data looks as follows:
var1 var2 var3
1 9V .6V 77V
2 6V .3V 15V
3 9V .7V 114V
4 12V 1.0V 199V
5 14V 1.2V 245V
6 13V .8V 158V
7 11V .6V 136V
8 11V .7V 132V
9 12V .8V 171V
10 11V .7V 155V
11 13V .8V 166V
12 11V .7V 138V
13 11V .9V 173V
14 9V .8V 143V
15 8V .7V 105V
16 4V .4V 21V
17 8V .4V 26V
18 10V .8V 154V
19 9V .8V 130V
20 10V .7V 113V
21 10V .6V 102V
22 11V .8V 135V
23 9V .7V 120V
24 10V .7V 126V
25 7N .6N 124N
26 14N 1.1N 210N
The last 2 rows contain N
. I am trying to set to NA these observations which contain N
.
I am trying some combination of str_detect
and str_replace
but I cannot seem to get it working.
Additionally I have other (very rarely) letters, i.e. M
and P
- I would like to find a way to set if the observations contains one of these letters, then set that observation to NA
. i.e. c(var1:var3) %in% str_detect(c("N", "M", "P"))
... str_replace_all
.
Data:
structure(list(var1 = c("9V", "6V", "9V", "12V", "14V", "13V",
"11V", "11V", "12V", "11V", "13V", "11V", "11V", "9V", "8V",
"4V", "8V", "10V", "9V", "10V", "10V", "11V", "9V", "10V", "7N",
"14N", "7V", "5V", "7V", "9V", "8V", "8V", "5V", "4V", "4V",
"5V", "7V", "5V", "6V", "8V", "9V", "6V", "6V", "7V", "8V", "7V",
"8V", "8V", "7V", "8V"), var2 = c(".6V", ".3V", ".7V", "1.0V",
"1.2V", ".8V", ".6V", ".7V", ".8V", ".7V", ".8V", ".7V", ".9V",
".8V", ".7V", ".4V", ".4V", ".8V", ".8V", ".7V", ".6V", ".8V",
".7V", ".7V", ".6N", "1.1N", ".4V", ".3V", ".4V", ".6V", ".5V",
".6V", ".4V", ".3V", ".2V", ".3V", ".4V", ".3V", ".3V", ".5V",
".6V", ".4V", ".4V", ".4V", ".5V", ".4V", ".4V", ".5V", ".4V",
".4V"), var3 = c("77V", "15V", "114V", "199V", "245V", "158V",
"136V", "132V", "171V", "155V", "166V", "138V", "173V", "143V",
"105V", "21V", "26V", "154V", "130V", "113V", "102V", "135V",
"120V", "126V", "124N", "210N", "35V", "9V", "48V", "91V", "81V",
"80V", "14V", "11V", "7V", "13V", "34V", "18V", "15V", "58V",
"76V", "29V", "30V", "31V", "32V", "34V", "57V", "58V", "52V",
"49V")), row.names = c(NA, 50L), class = "data.frame")
Upvotes: 2
Views: 402
Reputation: 28825
The dplyr
-stringr
solution that you were trying to figure out would be like below:
library(stringr)
library(dplyr)
df1 %>%
mutate_at(vars(var1:var3),
list(~str_replace_all(., "N$|M$|P$", replacement = NA_character_)))
#> var1 var2 var3
#> 1 9V .6V 77V
#> 2 6V .3V 15V
#> 3 9V .7V 114V
#> 4 12V 1.0V 199V
#> 5 14V 1.2V 245V
## ...
#> 20 10V .7V 113V
#> 21 10V .6V 102V
#> 22 11V .8V 135V
#> 23 9V .7V 120V
#> 24 10V .7V 126V
#> 25 <NA> <NA> <NA>
#> 26 <NA> <NA> <NA>
#> 27 7V .4V 35V
#> 28 5V .3V 9V
#> 29 7V .4V 48V
#> 30 9V .6V 91V
## ...
#> 45 8V .5V 32V
#> 46 7V .4V 34V
#> 47 8V .4V 57V
#> 48 8V .5V 58V
#> 49 7V .4V 52V
#> 50 8V .4V 49V
Upvotes: 1
Reputation: 1210
You just need to change your pattern to "N|M|P" :
dat <- structure(list(var1 = c("9V", "6V", "9V", "12V", "14V", "13V",
"11V", "11V", "12V", "11V", "13V", "11V", "11V", "9V", "8V",
"4V", "8V", "10V", "9V", "10V", "10V", "11V", "9V", "10V", "7N",
"14N", "7V", "5V", "7V", "9V", "8V", "8V", "5V", "4V", "4V",
"5V", "7V", "5V", "6V", "8V", "9V", "6V", "6V", "7V", "8V", "7V",
"8V", "8V", "7V", "8V"), var2 = c(".6V", ".3V", ".7V", "1.0V",
"1.2V", ".8V", ".6V", ".7V", ".8V", ".7V", ".8V", ".7V", ".9V",
".8V", ".7V", ".4V", ".4V", ".8V", ".8V", ".7V", ".6V", ".8V",
".7V", ".7V", ".6N", "1.1N", ".4V", ".3V", ".4V", ".6V", ".5V",
".6V", ".4V", ".3V", ".2V", ".3V", ".4V", ".3V", ".3V", ".5V",
".6V", ".4V", ".4V", ".4V", ".5V", ".4V", ".4V", ".5V", ".4V",
".4V"), var3 = c("77V", "15V", "114V", "199V", "245V", "158V",
"136V", "132V", "171V", "155V", "166V", "138V", "173V", "143V",
"105V", "21V", "26V", "154V", "130V", "113V", "102V", "135V",
"120V", "126V", "124N", "210N", "35V", "9V", "48V", "91V", "81V",
"80V", "14V", "11V", "7V", "13V", "34V", "18V", "15V", "58V",
"76V", "29V", "30V", "31V", "32V", "34V", "57V", "58V", "52V",
"49V")), row.names = c(NA, 50L), class = "data.frame")
library(stringr)
library(dplyr)
dat %>% mutate(var3 = str_replace_all(var3, c("N|M|P"), replacement = NA_character_))
Upvotes: 1
Reputation: 160437
Here's one solution:
x[] <- lapply(x, function(s) ifelse(grepl("N$", s), NA_character_, s))
x
# var1 var2 var3
# 1 9V .6V 77V
# 2 6V .3V 15V
# 3 9V .7V 114V
# 4 12V 1.0V 199V
# 5 14V 1.2V 245V
# 6 13V .8V 158V
# 7 11V .6V 136V
# 8 11V .7V 132V
# 9 12V .8V 171V
# 10 11V .7V 155V
# 11 13V .8V 166V
# 12 11V .7V 138V
# 13 11V .9V 173V
# 14 9V .8V 143V
# 15 8V .7V 105V
# 16 4V .4V 21V
# 17 8V .4V 26V
# 18 10V .8V 154V
# 19 9V .8V 130V
# 20 10V .7V 113V
# 21 10V .6V 102V
# 22 11V .8V 135V
# 23 9V .7V 120V
# 24 10V .7V 126V
# 25 <NA> <NA> <NA>
# 26 <NA> <NA> <NA>
# 27 7V .4V 35V
# 28 5V .3V 9V
# 29 7V .4V 48V
# 30 9V .6V 91V
# 31 8V .5V 81V
# 32 8V .6V 80V
# 33 5V .4V 14V
# 34 4V .3V 11V
# 35 4V .2V 7V
# 36 5V .3V 13V
# 37 7V .4V 34V
# 38 5V .3V 18V
# 39 6V .3V 15V
# 40 8V .5V 58V
# 41 9V .6V 76V
# 42 6V .4V 29V
# 43 6V .4V 30V
# 44 7V .4V 31V
# 45 8V .5V 32V
# 46 7V .4V 34V
# 47 8V .4V 57V
# 48 8V .5V 58V
# 49 7V .4V 52V
# 50 8V .4V 49V
If your data has columns where you do not want to do this replacement, just use a subset:
x[2:3] <- lapply(x[2:3], ...)
Variant:
library(dplyr)
x %>%
mutate_at(vars(var1, var2, var3), ~ if_else(grepl("N$", .), NA_character_, .))
# or, if all columns
x %>%
mutate_all(~ if_else(grepl("N$", .), NA_character_, .))
The use of NA_character_
is two-fold:
character
;dplyr
version, its between
function requires that the class of both the "yes" and "no" arguments be the same, and class(NA)
is not class("A")
.Upvotes: 2