MNU
MNU

Reputation: 764

Subtract multiple column in the same data frame in R

For the following data set

mydata=data.frame(x1_c1=c(1:5),
                  x2_c1=c(2:6),
                  x3_c1=c(3:7),
                  x4_c1=c(4:8),
                  x1_c2=0,
                  x2_c2=0,
                  x3_c2=0,
                  x4_c2=0,
                  x1_c3=c(1:5),
                  x2_c3=c(2:6),
                  x3_c3=c(3:7),
                  x4_c3=c(4:8))

> mydata
  x1_c1 x2_c1 x3_c1 x4_c1 x1_c2 x2_c2 x3_c2 x4_c2 x1_c3 x2_c3 x3_c3 x4_c3
1     1     2     3     4     0     0     0     0     1     2     3     4
2     2     3     4     5     0     0     0     0     2     3     4     5
3     3     4     5     6     0     0     0     0     3     4     5     6
4     4     5     6     7     0     0     0     0     4     5     6     7
5     5     6     7     8     0     0     0     0     5     6     7     8

I would like to subtract all variables ended with _c3 from the variable ended with _c1, _c2, and _c3 and then combine all columns. Here is an option to do it

mydata_update=cbind(mydata[,grep("_c1", colnames(mydata)) ]-mydata[,grep("_c3", colnames(mydata)) ],
                    mydata[,grep("_c2", colnames(mydata)) ]-mydata[,grep("_c3", colnames(mydata)) ],
                    mydata[,grep("_c3", colnames(mydata)) ]-mydata[,grep("_c3", colnames(mydata)) ])

Expected outcome is

> mydata_update
  x1_c1 x2_c1 x3_c1 x4_c1 x1_c2 x2_c2 x3_c2 x4_c2 x1_c3 x2_c3 x3_c3 x4_c3
1     0     0     0     0    -1    -2    -3    -4     0     0     0     0
2     0     0     0     0    -2    -3    -4    -5     0     0     0     0
3     0     0     0     0    -3    -4    -5    -6     0     0     0     0
4     0     0     0     0    -4    -5    -6    -7     0     0     0     0
5     0     0     0     0    -5    -6    -7    -8     0     0     0     0

Any other approach is appeciated.

Upvotes: 2

Views: 196

Answers (3)

akrun
akrun

Reputation: 886948

We can use split.default to split the data into groups based on the substring of the column names, then find the 'c3' column in each of the list elements with grep, subtract and cbind the list elements within do.call

out <- do.call(cbind, unname(lapply(split.default(mydata, 
           sub("_.*", "", names(mydata))), 
          function(x) x - x[,grep("_c3", names(x))])))[names(mydata)]

-output

 out
  x1_c1 x2_c1 x3_c1 x4_c1 x1_c2 x2_c2 x3_c2 x4_c2 x1_c3 x2_c3 x3_c3 x4_c3
1     0     0     0     0    -1    -2    -3    -4     0     0     0     0
2     0     0     0     0    -2    -3    -4    -5     0     0     0     0
3     0     0     0     0    -3    -4    -5    -6     0     0     0     0
4     0     0     0     0    -4    -5    -6    -7     0     0     0     0
5     0     0     0     0    -5    -6    -7    -8     0     0     0     0

Or we can use tidyverse

library(dplyr)
library(tidyr)
mydata %>% 
     mutate(rn = row_number()) %>% 
     pivot_longer(cols = -rn, names_to = c(".value", "grp"), 
            names_sep = "_") %>% 
     group_by(rn) %>%
     mutate(across(where(is.numeric), ~ . - .[grp == 'c3'])) %>% 
     ungroup %>%
     pivot_wider(names_from = grp, values_from = x1:x4) %>% 
     select(-rn) %>%
     select(names(mydata))

-output

# A tibble: 5 x 12
  x1_c1 x2_c1 x3_c1 x4_c1 x1_c2 x2_c2 x3_c2 x4_c2 x1_c3 x2_c3 x3_c3 x4_c3
  <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1     0     0     0     0    -1    -2    -3    -4     0     0     0     0
2     0     0     0     0    -2    -3    -4    -5     0     0     0     0
3     0     0     0     0    -3    -4    -5    -6     0     0     0     0
4     0     0     0     0    -4    -5    -6    -7     0     0     0     0
5     0     0     0     0    -5    -6    -7    -8     0     0     0     0

Upvotes: 1

thelatemail
thelatemail

Reputation: 93813

Match the prefixes for the data and the subtraction part, and then subtract:

subsel <- endsWith(names(mydata), "_c3")
prefix <- sub("_.+", "", names(mydata))
mydata - mydata[subsel][match(prefix, prefix[subsel])]

#  x1_c1 x2_c1 x3_c1 x4_c1 x1_c2 x2_c2 x3_c2 x4_c2 x1_c3 x2_c3 x3_c3 x4_c3
#1     0     0     0     0    -1    -2    -3    -4     0     0     0     0
#2     0     0     0     0    -2    -3    -4    -5     0     0     0     0
#3     0     0     0     0    -3    -4    -5    -6     0     0     0     0
#4     0     0     0     0    -4    -5    -6    -7     0     0     0     0
#5     0     0     0     0    -5    -6    -7    -8     0     0     0     0

Or if you want to live on the edge and you are sure your data is complete and sorted as expected:

mydata - as.matrix(mydata[,endsWith(names(mydata), "_c3")])

Upvotes: 3

user16051136
user16051136

Reputation:

Here's another way using a loop:

sm <- mydata[,grep("_c3",colnames(mydata))]
mydata_update <- mydata
for (i in seq(1,ncol(mydata),ncol(sm))) {
    mydata_update[,i:(i+ncol(sm)-1)] <- mydata_update[,i:(i+ncol(sm)-1)]-sm
}

mydata_update
  x1_c1 x2_c1 x3_c1 x4_c1 x1_c2 x2_c2 x3_c2 x4_c2 x1_c3 x2_c3 x3_c3 x4_c3
1     0     0     0     0    -1    -2    -3    -4     0     0     0     0
2     0     0     0     0    -2    -3    -4    -5     0     0     0     0
3     0     0     0     0    -3    -4    -5    -6     0     0     0     0
4     0     0     0     0    -4    -5    -6    -7     0     0     0     0
5     0     0     0     0    -5    -6    -7    -8     0     0     0     0

Upvotes: 1

Related Questions