Reputation: 2063
After some data wrangling and merging of datasets, I ended up with three variables holding the same information as in the dummy example:
cond.x <- c("1","2", "3","4",NA, "4", "1")
cond.y <- c("1", NA, "3", NA, "1", "4", NA)
dx <- c("scz", "cont", "siscz", "sicon", "scz", NA,NA)
mydata <-data.frame(cond.x, cond.y, dx)
> mydata
cond.x cond.y dx
1 1 1 scz
2 2 <NA> cont
3 3 3 siscz
4 4 <NA> sicon
5 <NA> 1 scz
6 4 4 <NA>
7 1 <NA> <NA>
So that 1 means scz, 2 means cont, 3 means siscz, 4 means siscon.
Upvotes: 2
Views: 66
Reputation: 887711
We can do this using coalesce
from tidyr
to create non-NA entries based on both 'cond.x' and 'cond.y', then use the index to update the values in 'dx'
library(tidyverse)
mydata %>%
mutate(dx = dx[coalesce(cond.x, cond.y)])
# cond.x cond.y dx
#1 1 1 scz
#2 2 <NA> cont
#3 3 3 siscz
#4 4 <NA> sicon
#5 <NA> 1 scz
#6 4 4 sicon
#7 1 <NA> scz
Upvotes: 1
Reputation: 2362
A bit shorter, mainly thanks to the data.table
package:
x <- c("1","2", "3","4",NA, "4", "1")
y <- c("1", NA, "3", NA, "1", "4", NA)
dx <- c("scz", "cont", "siscz", "sicon", "scz", NA,NA)
mydata <- data.frame(x, y, dx, stringsAsFactors = FALSE)
library(data.table)
# Convert to data.table by reference
setDT(mydata)
# Merge x and y into xy
mydata[, xy := unique(na.omit(x), na.omit(y)), by = dx][]
# Create lookup table
lookup <- mydata[, .(xy = first(xy)), by = dx] %>% na.omit() %>% setnames(c('dx_l', 'xy'))
# Join mydata with lookup using xy
mydata[lookup, dy := dx_l, on = c(xy = 'xy')][]
mydata[, .(dy)]
# dy
# 1: scz
# 2: cont
# 3: siscz
# 4: sicon
# 5: scz
# 6: sicon
# 7: scz
Upvotes: 1
Reputation: 12723
Convert dx
as factor and get their levels in to level_dx
. Then convert all 3 columns of mydata
into integer type.
mydata$dx <- factor(mydata$dx, levels = c("scz", "cont", "siscz", "sicon"))
level_dx <- levels(mydata$dx)
mydata[, 1:2] <- lapply(mydata[, 1:2], function(x) as.integer(as.character(x)) )
mydata$dx <- as.integer(mydata$dx)
Using fill
function from tidyr
package, fill the columns with their previous values either up or down direction and convert the dx
column back to factor variable.
library('tidyr')
mydata <- fill( data.frame(t(mydata)), 1:7, .direction = 'up')
mydata <- data.frame( t( fill( mydata, 1:7, .direction = 'down') ) )
mydata$dx <- factor( mydata$dx, levels = sort(unique( mydata$dx )), labels = level_dx)
# cond.x cond.y dx
# X1 1 1 scz
# X2 2 2 cont
# X3 3 3 siscz
# X4 4 4 sicon
# X5 1 1 scz
# X6 4 4 sicon
# X7 1 1 scz
Data:
cond.x <- c("1","2", "3","4",NA, "4", "1")
cond.y <- c("1", NA, "3", NA, "1", "4", NA)
dx <- c("scz", "cont", "siscz", "sicon", "scz", NA,NA)
mydata <-data.frame(cond.x, cond.y, dx)
mydata
# cond.x cond.y dx
# 1 1 1 scz
# 2 2 <NA> cont
# 3 3 3 siscz
# 4 4 <NA> sicon
# 5 <NA> 1 scz
# 6 4 4 <NA>
# 7 1 <NA> <NA>
Upvotes: 1