lofus77
lofus77

Reputation: 191

create new column based on the value in various other column values - Using R

DATA <- data.frame(
  LONDON= c(2,0,1,3,0,1), 
  STOKE = c(45, 40, 6, 22, 16, 10), 
  DERBY = c(5, 9, 8, 9, 2, 3), 
  NEWCASTLE = c(7, 6, 4, 10, 12, 8),
  MANCHESTER = c(2, 12, 1, 7, 8, 3),
  WATFORD = c(9, 2, 4, 6, 13, 6),
  KENT = c(1, 3, 23, 7, 12, 2),
  SURREY = c(2, 8, 4, 5, 1, 4),
  ESSEX = c(4, 12, 7, 9, 5, 1),
  HULL = c(1,3, 2, 4, 3, 1))

I wanted to create a "NEW_COL" column, which says "YES" or "NO" based on four different value columns. I want the new column to have a "YES" (otherwise have a ‘NO’) when DERBY or NEWCASTLE or SURREY is > 7 or when KENT or HULL = 2 I also want the "NEW_COL" column to be the second column in my dataset.

I thought something like this (below) would have done it, but it didn't. Any help on what I’m doing wrong here/how to navigate through.

DATA2 <- DATA %>% mutate(NEW_COL = ifelse(rowSums(select(., DERBY:NEWCASTLE) > 7 | select(., SURREY >7)  | select(., KENT == 2) | select(., HULL == 2), na.rm = TRUE), 'Yes', 'No'), .before = 2)

Upvotes: 6

Views: 2362

Answers (4)

TarJae
TarJae

Reputation: 78907

We could use an ifelse statement with your conditions

DATA %>% 
    mutate(NEW_COL = 
               ifelse(DERBY > 7 | NEWCASTLE > 7 | SURREY > 7 |
                             KENT == 2 | HULL == 2, "YES","NO"), .before=2)

Output:

  LONDON NEW_COL STOKE DERBY NEWCASTLE MANCHESTER WATFORD KENT SURREY ESSEX HULL
1      2      NO    45     5         7          2       9    1      2     4    1
2      0     YES    40     9         6         12       2    3      8    12    3
3      1     YES     6     8         4          1       4   23      4     7    2
4      3     YES    22     9        10          7       6    7      5     9    4
5      0     YES    16     2        12          8      13   12      1     5    3
6      1     YES    10     3         8          3       6    2      4     1    1

Upvotes: 4

Ronak Shah
Ronak Shah

Reputation: 388797

Your rowSums attempt was close. You can do -

library(dplyr)

DATA %>% 
  mutate(NEW_COL = ifelse(rowSums(select(., DERBY, NEWCASTLE, SURREY) > 7) > 0 | 
                          rowSums(select(., KENT, HULL) == 2) > 0, 'Yes', 'No'), .before = 2)

#  LONDON NEW_COL STOKE DERBY NEWCASTLE MANCHESTER WATFORD KENT SURREY ESSEX HULL
#1      2      No    45     5         7          2       9    1      2     4    1
#2      0     Yes    40     9         6         12       2    3      8    12    3
#3      1     Yes     6     8         4          1       4   23      4     7    2
#4      3     Yes    22     9        10          7       6    7      5     9    4
#5      0     Yes    16     2        12          8      13   12      1     5    3
#6      1     Yes    10     3         8          3       6    2      4     1    1

Upvotes: 1

icj
icj

Reputation: 749

I'm a fan of the case_when function as it's very readable and easily modifiable imo.

require(tidyverse)

greater_threshold <- 7
equality_value <- 2

df_new <- DATA %>%
  mutate(NEW_COL = case_when(DERBY > greater_threshold ~ "YES",
                             NEWCASTLE  > greater_threshold ~ "YES",
                             SURREY  > greater_threshold ~ "YES",
                             KENT == equality_value ~ "YES",
                             HULL == equality_value ~ "YES",
                             T ~ "NO"),
         .before = 2)

Upvotes: 1

akrun
akrun

Reputation: 886928

We may use if_any and .before to change the position

library(dplyr)
DATA <- DATA %>% 
    mutate(NEW_COL = c("NO", "YES")[1 + 
        (if_any(c(DERBY, NEWCASTLE, SURREY), `>`, 7)|
       if_any(c(KENT, HULL), `==`, 2))], .before = 2)

-output

DATA
   LONDON NEW_COL STOKE DERBY NEWCASTLE MANCHESTER WATFORD KENT SURREY ESSEX HULL
1      2      NO    45     5         7          2       9    1      2     4    1
2      0     YES    40     9         6         12       2    3      8    12    3
3      1     YES     6     8         4          1       4   23      4     7    2
4      3     YES    22     9        10          7       6    7      5     9    4
5      0     YES    16     2        12          8      13   12      1     5    3
6      1     YES    10     3         8          3       6    2      4     1    1

Upvotes: 4

Related Questions