Mateusz1981
Mateusz1981

Reputation: 1867

why or is not working in dplyr

I've encounter this kind of problem using dplyr

df <- structure(list(Yta = c(2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
    2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
    2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
    2L, 2L, 2L), RAD = c(1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 
    3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 5L, 6L, 
    6L, 6L, 6L, 6L, 7L, 7L, 7L, 7L, 7L, 8L, 8L, 8L, 8L, 8L, 9L, 9L, 
    9L, 9L, 9L), PL = c(1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L, 
    2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 
    3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 
    4L, 5L), IDNR = c(1171L, 1615L, 1961L, 2095L, 2194L, 2324L, 1636L, 
    1343L, 1499L, 1949L, 1761L, 1950L, 2145L, 2567L, -4L, 2082L, 
    1463L, 1118L, 1704L, -5L, 1067L, 1485L, 2052L, 2076L, 1139L, 
    1975L, 2215L, 1835L, 2546L, 1660L, 1542L, 1281L, 1841L, 1511L, 
    2091L, 1665L, 1400L, 2021L, 1440L, -6L, 1837L, 1955L, 1321L, 
    1859L, 1757L), H91 = c(18L, 27L, 30L, 25L, 40L, 28L, 23L, 18L, 
    23L, 38L, 20L, 21L, 21L, 19L, NA, 23L, 32L, 18L, 29L, NA, 18L, 
    21L, 12L, 23L, 25L, 12L, 22L, 14L, 12L, 3L, 8L, 18L, NA, 24L, 
    26L, 19L, 22L, 22L, 26L, NA, 25L, 17L, 25L, 29L, 21L), D15 = c(NA, 
    182L, 96L, NA, NA, 142L, NA, NA, NA, 191L, NA, 134L, 111L, 99L, 
    NA, NA, NA, 37L, 217L, NA, 91L, 160L, 48L, NA, 132L, NA, 93L, 
    57L, NA, NA, NA, 82L, NA, 141L, NA, 120L, 109L, 105L, 118L, NA, 
    NA, NA, NA, NA, NA), LD15 = c(NA, NA, NA, NA, NA, NA, NA, NA, 
    NA, NA, NA, NA, NA, NA, NA, NA, NA, 1L, NA, NA, 1L, NA, NA, NA, 
    NA, NA, NA, 1L, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
    NA, NA, NA, NA, NA), H15 = c(NA, NA, NA, NA, NA, NA, NA, NA, 
    NA, 196L, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
    NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 126L, 
    NA, NA, NA, NA, NA, NA, NA), block = c(1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L), block1 = c(2, 2, 2, 2, 2, 2, 2, 
    2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 
    2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2), x = c(10, 
    10, 10, 10, 10, 11, 11, 11, 11, 11, 12, 12, 12, 12, 12, 13, 13, 
    13, 13, 13, 14, 14, 14, 14, 14, 15, 15, 15, 15, 15, 16, 16, 16, 
    16, 16, 17, 17, 17, 17, 17, 18, 18, 18, 18, 18), y = c(1, 2, 
    3, 4, 5, 1, 2, 3, 4, 5, 1, 2, 3, 4, 5, 1, 2, 3, 4, 5, 1, 2, 3, 
    4, 5, 1, 2, 3, 4, 5, 1, 2, 3, 4, 5, 1, 2, 3, 4, 5, 1, 2, 3, 4, 
    5), LEV_6 = c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 
    1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 
    1, 1, 0, 1, 1, 1, 1, 1), LEV_31 = c(0, NA, NA, 0, 0, NA, 0, 0, 
    0, NA, 0, NA, NA, NA, 0, 0, 0, 0, NA, 0, 0, NA, NA, 0, NA, 0, 
    NA, 0, 0, 0, 0, NA, 0, NA, 0, NA, NA, NA, NA, 0, 0, 0, 0, 0, 
    0)), .Names = c("Yta", "RAD", "PL", "IDNR", "H91", "D15", "LD15", 
    "H15", "block", "block1", "x", "y", "LEV_6", "LEV_31"), class = c("tbl_df", 
    "data.frame"), row.names = c(NA, -45L))

I want to make a variable LEV_31that should be either 0 or 1. 0 if is.na(D15) | LD15 == 1 otherwise 1

I tried this

df <- df %>% mutate(LEV_6 = ifelse(is.na(H91), 0, 1), LEV_31 = ifelse(is.na(D15), 0, ifelse(LD15 == 1, 0, 1)))

and this

df <- df %>% mutate(LEV_6 = ifelse(is.na(H91), 0, 1), LEV_31 = ifelse(is.na(D15) | LD15 == 1, 0, 1))

but I got NAs instead of 1s

Upvotes: 0

Views: 76

Answers (1)

Cath
Cath

Reputation: 24074

The problem was how R deals with NA in | statement: TRUE|NA will return TRUE because, whatever the other part would be if not NA it will end in TRUE, while with NA|FALSE it all depends on what can be instead of NA so we cannot know, hence R gives NA as output.

What you need is 0 if is.na(D15) | LD15 == 1 otherwise 1 so, in other words, as LD15 is either 1 or NA, you want 1 when D15 is not NA and LD15 is NA, which can be translated like this:

df$LEV_31 <- with(df, as.integer(!is.na(D15) & is.na(LD15)))

or, using dplyr:

df %>% mutate(LEV_31=as.numeric(!is.na(D15) & is.na(LD15)))

df$LEV_31
#[1] 0 1 1 0 0 1 0 0 0 1 0 1 1 1 0 0 0 1 1 0 1 1 1 0 1 0 1 1 0 0 0 1 0 1 0 1 1 1 1 0 0 0 0 0 0

As mentionned in comment by @DavidArenburg, if LD15 takes more value than NA or 1 then you can do:

with(df, as.integer(!(is.na(D15) | (!is.na(LD15) & (LD15 == 1)))))

Upvotes: 3

Related Questions