Reputation: 303

Change multiple cell values based on single cell value

I have a dataframe:

a = c("yes", "yes", "no", "yes", "no")
b = c("brown", "grey", "white", "grey", NA)
c = c(7, 6, NA, 10, 8)
d = c("male", "female", "female", "male", "female")
Zoo = cbind.data.frame(a, b, c, d)
colnames(Zoo) = c("animal", "colour", "age", "gender")    

   animal colour  age  gender
    yes    brown   7   male
    yes    grey    6 female
    no     white  NA female
    yes    grey   10   male
    no     NA      8 female

If the value for 'animal' is no, I would like to change any non-NA values in the corresponding columns to "NL" (for non-logical). I can do this one column at a time as follows:

Zoo$colour = as.character(Zoo$colour)

Zoo$colour = 
  ifelse(Zoo$animal == "no" & !is.na(Zoo$colour), "NL", Zoo$colour)

and eventually arrive at this:

   animal colour  age  gender
    yes    brown   7   male
    yes    grey    6 female
    no     NL     NA     NL
    yes    grey   10   male
    no     NA     NL     NL

I'm sure there is a way of doing this more efficiently. Is there? Thank you!

Upvotes: 0

Answers (2)

Roman Luštrik

Reputation: 70623

Here is another way. Notice that I create a data.frame with stringsAsFactors = FALSE because working with factor levels in this setting is tedious. You can freely convert character columns to factors once you're done with this.

Basically, this code goes through each row, finds columns which have non-NAs and inserts "NL" in their place.

a = c("yes", "yes", "no", "yes", "no")
b = c("brown", "grey", "white", "grey", NA)
c = c(7, 6, NA, 10, 8)
d = c("male", "female", "female", "male", "female")
zoo <- data.frame(animal = a, color = b, age = c, gender = d, stringsAsFactors = FALSE)

for (i in 1:nrow(zoo)) {
  if (zoo[i, "animal"] == "no") {
    find.el <- !is.na(zoo[i, which(colnames(zoo) != "animal")])
    zoo[, 2:ncol(zoo)][i, find.el] <- "NL"
  }
}

  animal color  age gender
1    yes brown    7   male
2    yes  grey    6 female
3     no    NL <NA>     NL
4    yes  grey   10   male
5     no  <NA>   NL     NL

Upvotes: 3

akrun

Reputation: 886968

For multiple columns, we can use the efficient approach with set from data.table

library(data.table)
setDT(Zoo)
for(nm in names(Zoo)[-1]) {
  set(Zoo, i = NULL, j = nm, as.character(Zoo[[nm]]))
  set(Zoo, i = which(Zoo[['animal']]=='no' & !is.na(Zoo[[nm]])),
      j = nm, value = "NL")
}

Zoo
#   animal colour age gender
#1:    yes  brown   7   male
#2:    yes   grey   6 female
#3:     no     NL  NA     NL
#4:    yes   grey  10   male
#5:     no     NA  NL     NL

NOTE: This should be very efficient as we are using set

Or otherwise, we can use the elegant tidyverse syntax

library(dplyr)
Zoo %>% 
   mutate_at(2:4, funs(replace(., Zoo[['animal']]== 'no' & !is.na(.), 'NL')))
#   animal colour  age gender
#1    yes  brown    7   male
#2    yes   grey    6 female
#3     no     NL <NA>     NL
#4    yes   grey   10   male
#5     no   <NA>   NL     NL

Benchmarks

Zoo1 <- Zoo[rep(1:nrow(Zoo), 1e5),]
Zoo2 <- copy(Zoo1)
Zoo3 <- copy(Zoo2)

system.time({
setDT(Zoo2)
for(nm in names(Zoo2)[-1]) {
  set(Zoo2, i = NULL, j = nm, as.character(Zoo2[[nm]]))
  set(Zoo2, i = which(Zoo[['animal']]=='no' & !is.na(Zoo2[[nm]])),
      j = nm, value = "NL")
}
})
# user  system elapsed 
#   0.40    0.01    0.42 

system.time({
  Zoo3 %>% 
   mutate_at(2:4, funs(replace(., Zoo3[['animal']]== 'no' & !is.na(.), 'NL')))
 })
 #user  system elapsed 
 #  0.42    0.03    0.46 


system.time({
 for (i in 1:nrow(Zoo1)) {
  if (Zoo1[i, "animal"] == "no") {
    find.el <- !is.na(Zoo1[i, which(colnames(Zoo1) != "animal")])
    Zoo1[, 2:ncol(Zoo1)][i, find.el] <- "NL"
  }
}
})
#     user  system elapsed 
#  2086.49  577.51 2686.83

data

Zoo <- data.frame(animal = a, colour = b, age = c, gender = d, stringsAsFactors=FALSE)

Upvotes: 0

Change multiple cell values based on single cell value

Answers (2)

Benchmarks

data

Related Questions