Reputation: 303
Here's an illustrative dataset:
set.seed(1)
sam_dat <- data.frame(
Operator = seq(1:3),
t1 = sample(runif(10)*10, 3),
t2 = sample(runif(3)*10, 3),
t3 = sample(runif(12)*10, 3),
t4 = sample(runif(34)*10, 3)
)
What would is the easiest way calculating the following (tidyverse, base R, or other)?
t1_t2 = t1 + t2
t1_t3 = t1 + t2 + t3
t1_t4 = t1 + t2 + t3 + t4
edit Is there a way to do this without having to explicitly write out each calculation? While that is feasible for the four columns in my example dataset, my real data has many more.
By manual calculation, the result for Operator 1 would be:
10.71, 19.40, 20.48
Thanks in advance!
Upvotes: 3
Views: 129
Reputation: 887691
In base R
, we can use transform
transform(sam_dat, t1_t2 = t1 + t2,
t1_t3 = t1 + t2 + t3, t1_t4 = t1 + t2 + t3 + t4)
# Operator t1 t2 t3 t4 t1_t2 t1_t3 t1_t4
#1 1 5.728534 4.976992 8.696908 1.079436 10.705526 19.40243 20.48187
#2 2 2.655087 7.176185 9.347052 7.829328 9.831272 19.17832 27.00765
#3 3 2.016819 7.698414 1.255551 4.068302 9.715234 10.97078 15.03909
Or another option is to subset the dataset into a list
and then use rowSums
sam_dat[c("t1_t2", "t1_t3", "t1_t4")] <- sapply(list(sam_dat[c('t1', 't2')],
sam_dat[c('t1', 't2', 't3')],
sam_dat[c('t1', 't2', 't3', 't4')]), rowSums)
Or make it more compact with indexing
nm1 <- paste(names(sam_dat)[2], names(sam_dat)[3:5], sep="_")
sam_dat[nm1] <- sapply(3:5, function(i) rowSums(sam_dat[2:i]))
Or another option is rowCumsums
from matrixStats
library(matrixStats)
sam_dat[nm1] <- rowCumsums(as.matrix(sam_dat[-1]))[,-1]
Or a base R
operation similar to accumulate
from purrr
is Reduce
sam_dat[nm1] <- do.call(cbind, Reduce(`+`, sam_dat[-1], accumulate = TRUE)[-1])
Or with mutate
from dplyr
library(dplyr)
sam_dat %>%
mutate(t1_t2 = t1 + t2,
t1_t3 = t1 + t2 + t3,
t1_t4 = t1 + t2 + t3 + t4)
Or another option in tidyverse
would be to pivot to 'long' format, do the computation and then transform back to 'wide'
library(tidyr)
library(stringr)
sam_dat %>%
pivot_longer(cols = -Operator) %>%
group_by(Operator) %>%
mutate(value = cumsum(value)) %>%
slice(-1) %>%
ungroup %>%
mutate(name = str_c('t1_', name)) %>%
pivot_wider(names_from = name, values_from = value) %>%
select(-Operator) %>%
bind_cols(sam_dat, .)
# A tibble: 3 x 8
# Operator t1 t2 t3 t4 t1_t2 t1_t3 t1_t4
# <int> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#1 1 5.73 4.98 8.70 1.08 10.7 19.4 20.5
#2 2 2.66 7.18 9.35 7.83 9.83 19.2 27.0
#3 3 2.02 7.70 1.26 4.07 9.72 11.0 15.0
Upvotes: 1
Reputation: 79328
Another option is to use the rowwise
function in dplyr
sam_dat[-1]%>%
rowwise()%>%
do(setNames(data.frame(t(cumsum(unlist(.)))),sprintf('t1_%s',names(.)))[-1])%>%
cbind(sam_dat,.)
Operator t1 t2 t3 t4 t1_t2 t1_t3 t1_t4
1 1 5.728534 4.976992 8.696908 1.079436 10.705526 19.40243 20.48187
2 2 2.655087 7.176185 9.347052 7.829328 9.831272 19.17832 27.00765
3 3 2.016819 7.698414 1.255551 4.068302 9.715234 10.97078 15.03909
The basic operation being:
sam_dat[-1]%>%
rowwise()%>%
do(data.frame(t(cumsum(unlist(.)))))
Upvotes: 2
Reputation: 40171
One dplyr
and purrr
option could be:
map(.x = accumulate(names(sam_dat[-1]), c)[-1],
~ sam_dat %>%
mutate(!!paste(.x, collapse = "_") := rowSums(select(., one_of(.x))))) %>%
reduce(full_join)
Operator t1 t2 t3 t4 t1_t2 t1_t2_t3 t1_t2_t3_t4
1 1 5.728534 4.976992 0.1339033 7.942399 10.70553 10.83943 18.78183
2 2 3.721239 7.698414 2.6722067 4.590657 11.41965 14.09186 18.68252
3 3 8.983897 3.841037 9.3470523 5.297196 12.82493 22.17199 27.46918
Or to further match your desired output:
map(.x = accumulate(names(sam_dat[-1]), c)[-1],
~ sam_dat %>%
mutate(!!paste(head(.x, 1), tail(.x, 1), sep = "_") := rowSums(select(., one_of(.x))))) %>%
reduce(full_join)
Operator t1 t2 t3 t4 t1_t2 t1_t3 t1_t4
1 1 5.728534 4.976992 0.1339033 7.942399 10.70553 10.83943 18.78183
2 2 3.721239 7.698414 2.6722067 4.590657 11.41965 14.09186 18.68252
3 3 8.983897 3.841037 9.3470523 5.297196 12.82493 22.17199 27.46918
Upvotes: 0
Reputation: 102519
Here is a base R solution using apply
+ cumsum
, i.e.,
sam_dat <- cbind(sam_dat,
`colnames<-`(t(apply(sam_dat[-1], 1,cumsum))[,-1],
paste0("t1_",names(sam_dat)[-c(1,2)])))
such that
> sam_dat
Operator t1 t2 t3 t4 t1_t2 t1_t3 t1_t4
1 1 5.728534 4.976992 8.696908 1.079436 10.705526 19.40243 20.48187
2 2 2.655087 7.176185 9.347052 7.829328 9.831272 19.17832 27.00765
3 3 2.016819 7.698414 1.255551 4.068302 9.715234 10.97078 15.03909
Upvotes: 1