Armags
Armags

Reputation: 53

How to merge variables looping through by variable number in R

I have a dataframe with a lot of variables seen in multiple conditions. I'd like to merge each variable by condition.

The example data frame is a simplified version of what I have (3 variables over 2 conditions).

VAR.B_1 <- c(1, 2, 3, 4, 5, 'NA', 'NA', 'NA', 'NA', 'NA')
VAR.B_2 <- c(2, 2, 3, 4, 5,'NA', 'NA', 'NA', 'NA', 'NA')
VAR.B_3 <- c(1, 1, 1, 1, 1,'NA', 'NA', 'NA', 'NA', 'NA')
VAR.E_1 <- c(NA, NA, NA, NA, NA, 1, 1, 1, 1, 1)
VAR.E_2 <- c(NA, NA, NA, NA, NA, 1, 2, 3, 4, 5)
VAR.E_3 <- c(NA, NA, NA, NA, NA, 1, 1, 1, 1, 1)
Condition <- c("B", "B","B","B","B","E","E","E","E","E")

#Example dataset
data<-as.data.frame(cbind(VAR.B_1,VAR.B_2,VAR.B_3, VAR.E_1,VAR.E_2, VAR.E_3, Condition))

I want to end up with this, appended to the original data frame:

VAR_1  VAR_2   VAR_3 
1       2       1    
2       2       1    
3       3       1    
4       4       1    
5       5       1   
1       1       1      
1       2       1             
1       3       1             
1       4       1              
1       5       1              

I understand that R won't work with i inside the variable name, but I have an example of the kind of for loop I was trying to do. I would rather not call variables by column location, since there will be a lot of variables.

##Example of how I want to merge - this code does not work
for(i in 1:3) {
     data$VAR_[,i] <-ifelse(data$Condition == "B", VAR.B_[,i], 
          ifelse(data$Condition == "E", VAR.E_[,i], NA))
}

Upvotes: 0

Views: 133

Answers (3)

Tim Biegeleisen
Tim Biegeleisen

Reputation: 522161

Your data appears to have two kinds of NA values in it. It has NA, or R's NA value, and it also has the string 'NA'. In my solution below, I replace both with zero, cast each column in the data frame to numeric, and then just sum together like-numbered VAR columns. Then, drop the original columns which you don't want anymore.

data <- as.data.frame(cbind(VAR.B_1,VAR.B_2,VAR.B_3, VAR.E_1,VAR.E_2, VAR.E_3),
                      stringsAsFactors=FALSE)
data[is.na(data)] <- 0
data[data == 'NA'] <- 0
data <- as.data.frame(lapply(data, as.numeric))

data$VAR_1 <- data$VAR.B_1 + data$VAR.E_1
data$VAR_2 <- data$VAR.B_2 + data$VAR.E_2
data$VAR_3 <- data$VAR.B_3 + data$VAR.E_3

data <- data[c("VAR_1", "VAR_2", "VAR_3")]

Demo

Upvotes: 0

markdly
markdly

Reputation: 4534

This might work for your situation:

library(tidyverse)
library(stringr)
data %>% 
  mutate_all(as.character) %>% 
  gather(key, value, -Condition) %>%
  filter(!is.na(value), value != "NA") %>%
  mutate(key = str_replace(key, paste0("\\.", Condition), "")) %>%
  group_by(Condition, key) %>%
  mutate(rowid = 1:n()) %>%
  spread(key, value) %>%
  bind_cols(data)
#> # A tibble: 10 x 12
#> # Groups:   Condition [2]
#>    Condition rowid VAR_1 VAR_2 VAR_3 VAR.B_1 VAR.B_2 VAR.B_3 VAR.E_1
#>        <chr> <int> <chr> <chr> <chr>  <fctr>  <fctr>  <fctr>  <fctr>
#>  1         B     1     1     2     1       1       2       1      NA
#>  2         B     2     2     2     1       2       2       1      NA
#>  3         B     3     3     3     1       3       3       1      NA
#>  4         B     4     4     4     1       4       4       1      NA
#>  5         B     5     5     5     1       5       5       1      NA
#>  6         E     1     1     1     1      NA      NA      NA       1
#>  7         E     2     1     2     1      NA      NA      NA       1
#>  8         E     3     1     3     1      NA      NA      NA       1
#>  9         E     4     1     4     1      NA      NA      NA       1
#> 10         E     5     1     5     1      NA      NA      NA       1
#> # ... with 3 more variables: VAR.E_2 <fctr>, VAR.E_3 <fctr>,
#> #   Condition1 <fctr>

Upvotes: 1

d.b
d.b

Reputation: 32548

data.frame(lapply(split.default(data[-NCOL(data)], gsub("\\D+", "", head(names(data), -1))),
       function(a){
           a = sapply(a, function(x) as.numeric(as.character(x)))
           rowSums(a, na.rm = TRUE)
       }))
#   X1 X2 X3
#1   1  2  1
#2   2  2  1
#3   3  3  1
#4   4  4  1
#5   5  5  1
#6   1  1  1
#7   1  2  1
#8   1  3  1
#9   1  4  1
#10  1  5  1
#Warning messages:
#1: In FUN(X[[i]], ...) : NAs introduced by coercion
#2: In FUN(X[[i]], ...) : NAs introduced by coercion
#3: In FUN(X[[i]], ...) : NAs introduced by coercion

Upvotes: 0

Related Questions