Reputation: 764
For the following data set
mydata=data.frame(x1_c1=c(1:5),
x2_c1=c(2:6),
x3_c1=c(3:7),
x4_c1=c(4:8),
x1_c2=0,
x2_c2=0,
x3_c2=0,
x4_c2=0,
x1_c3=c(1:5),
x2_c3=c(2:6),
x3_c3=c(3:7),
x4_c3=c(4:8))
> mydata
x1_c1 x2_c1 x3_c1 x4_c1 x1_c2 x2_c2 x3_c2 x4_c2 x1_c3 x2_c3 x3_c3 x4_c3
1 1 2 3 4 0 0 0 0 1 2 3 4
2 2 3 4 5 0 0 0 0 2 3 4 5
3 3 4 5 6 0 0 0 0 3 4 5 6
4 4 5 6 7 0 0 0 0 4 5 6 7
5 5 6 7 8 0 0 0 0 5 6 7 8
I would like to subtract all variables ended with _c3
from the variable ended with _c1
, _c2
, and _c3
and then combine all columns. Here is an option to do it
mydata_update=cbind(mydata[,grep("_c1", colnames(mydata)) ]-mydata[,grep("_c3", colnames(mydata)) ],
mydata[,grep("_c2", colnames(mydata)) ]-mydata[,grep("_c3", colnames(mydata)) ],
mydata[,grep("_c3", colnames(mydata)) ]-mydata[,grep("_c3", colnames(mydata)) ])
Expected outcome is
> mydata_update
x1_c1 x2_c1 x3_c1 x4_c1 x1_c2 x2_c2 x3_c2 x4_c2 x1_c3 x2_c3 x3_c3 x4_c3
1 0 0 0 0 -1 -2 -3 -4 0 0 0 0
2 0 0 0 0 -2 -3 -4 -5 0 0 0 0
3 0 0 0 0 -3 -4 -5 -6 0 0 0 0
4 0 0 0 0 -4 -5 -6 -7 0 0 0 0
5 0 0 0 0 -5 -6 -7 -8 0 0 0 0
Any other approach is appeciated.
Upvotes: 2
Views: 196
Reputation: 886948
We can use split.default
to split the data into groups based on the substring of the column names, then find the 'c3' column in each of the list
elements with grep
, subtract and cbind
the list
elements within do.call
out <- do.call(cbind, unname(lapply(split.default(mydata,
sub("_.*", "", names(mydata))),
function(x) x - x[,grep("_c3", names(x))])))[names(mydata)]
-output
out
x1_c1 x2_c1 x3_c1 x4_c1 x1_c2 x2_c2 x3_c2 x4_c2 x1_c3 x2_c3 x3_c3 x4_c3
1 0 0 0 0 -1 -2 -3 -4 0 0 0 0
2 0 0 0 0 -2 -3 -4 -5 0 0 0 0
3 0 0 0 0 -3 -4 -5 -6 0 0 0 0
4 0 0 0 0 -4 -5 -6 -7 0 0 0 0
5 0 0 0 0 -5 -6 -7 -8 0 0 0 0
Or we can use tidyverse
library(dplyr)
library(tidyr)
mydata %>%
mutate(rn = row_number()) %>%
pivot_longer(cols = -rn, names_to = c(".value", "grp"),
names_sep = "_") %>%
group_by(rn) %>%
mutate(across(where(is.numeric), ~ . - .[grp == 'c3'])) %>%
ungroup %>%
pivot_wider(names_from = grp, values_from = x1:x4) %>%
select(-rn) %>%
select(names(mydata))
-output
# A tibble: 5 x 12
x1_c1 x2_c1 x3_c1 x4_c1 x1_c2 x2_c2 x3_c2 x4_c2 x1_c3 x2_c3 x3_c3 x4_c3
<dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 0 0 0 0 -1 -2 -3 -4 0 0 0 0
2 0 0 0 0 -2 -3 -4 -5 0 0 0 0
3 0 0 0 0 -3 -4 -5 -6 0 0 0 0
4 0 0 0 0 -4 -5 -6 -7 0 0 0 0
5 0 0 0 0 -5 -6 -7 -8 0 0 0 0
Upvotes: 1
Reputation: 93813
Match the prefixes for the data and the subtraction part, and then subtract:
subsel <- endsWith(names(mydata), "_c3")
prefix <- sub("_.+", "", names(mydata))
mydata - mydata[subsel][match(prefix, prefix[subsel])]
# x1_c1 x2_c1 x3_c1 x4_c1 x1_c2 x2_c2 x3_c2 x4_c2 x1_c3 x2_c3 x3_c3 x4_c3
#1 0 0 0 0 -1 -2 -3 -4 0 0 0 0
#2 0 0 0 0 -2 -3 -4 -5 0 0 0 0
#3 0 0 0 0 -3 -4 -5 -6 0 0 0 0
#4 0 0 0 0 -4 -5 -6 -7 0 0 0 0
#5 0 0 0 0 -5 -6 -7 -8 0 0 0 0
Or if you want to live on the edge and you are sure your data is complete and sorted as expected:
mydata - as.matrix(mydata[,endsWith(names(mydata), "_c3")])
Upvotes: 3
Reputation:
Here's another way using a loop:
sm <- mydata[,grep("_c3",colnames(mydata))]
mydata_update <- mydata
for (i in seq(1,ncol(mydata),ncol(sm))) {
mydata_update[,i:(i+ncol(sm)-1)] <- mydata_update[,i:(i+ncol(sm)-1)]-sm
}
mydata_update
x1_c1 x2_c1 x3_c1 x4_c1 x1_c2 x2_c2 x3_c2 x4_c2 x1_c3 x2_c3 x3_c3 x4_c3
1 0 0 0 0 -1 -2 -3 -4 0 0 0 0
2 0 0 0 0 -2 -3 -4 -5 0 0 0 0
3 0 0 0 0 -3 -4 -5 -6 0 0 0 0
4 0 0 0 0 -4 -5 -6 -7 0 0 0 0
5 0 0 0 0 -5 -6 -7 -8 0 0 0 0
Upvotes: 1