Reputation: 3812
I have a dataframe df
that looks something like this:
X1 X2
1 0 0.06174568
2 0 0.06174568
3 0 0.05978832
4 0 0.05978832
5 0 0.06007480
...
22 0 0.06082051
23 0 0.06051641
24 128 0.06329613
25 0 0.06099445
26 0 0.06195348
27 0 0.06022723
28 0 0.06041903
29 0 0.06195348
...
36 0 0.06195348
37 0 0.06176168
38 0 0.06233710
39 103 0.06195348
40 0 0.06195348
41 0 0.06387155
42 0 0.06291252
I'd like to sum up the values of X2
until X1 != 0
, i.e. the rows 1-24 and 25-39. The sums should be added in a different column X3
:
X1 X2 X3
1 0 0.06174568 0.0000000
2 0 0.06174568 0.0000000
3 0 0.05978832 0.0000000
4 0 0.05978832 0.0000000
5 0 0.06007480 0.0000000
...
22 0 0.06082051 0.0000000
23 0 0.06051641 0.0000000
24 128 0.06329613 1.4660679
25 0 0.06099445 0.0000000
26 0 0.06195348 0.0000000
27 0 0.06022723 0.0000000
28 0 0.06041903 0.0000000
29 0 0.06195348 0.0000000
...
36 0 0.06195348 0.0000000
37 0 0.06176168 0.0000000
38 0 0.06233710 0.0000000
39 103 0.06195348 0.9876114
40 0 0.06195348 0.0000000
41 0 0.06387155 0.0000000
It's easy to solve using a for-loop:
j <- 1
df$X3 <- numeric(NROW(df))
for (i in 1:NROW(df)) {
if (df$X1[i] != 0) {
df$X3[i] <- sum(df$X2[j:i])
j <- i
}
}
But I was wondering if there was a way to calculate these sums without using a loop, possibly by using apply()
?
Heres the sample dataframe:
structure(list(X1 = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 128, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 103, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), X2 = c(0.0617456756756757,
0.0617456756756757, 0.0597883157894737, 0.0597883157894737, 0.0600748,
0.0611470588235294, 0.0611470588235294, 0.0599241176470588, 0.0606791111111111,
0.0628226666666667, 0.0599241176470588, 0.0619248571428571, 0.0613493684210526,
0.0613493684210526, 0.0620369230769231, 0.0594520512820513, 0.0620369230769231,
0.0608205128205128, 0.0615807692307692, 0.0612766666666667, 0.0608205128205128,
0.0608205128205128, 0.0605164102564103, 0.0632961290322581, 0.0609944516129032,
0.0619534838709678, 0.0602272258064516, 0.0604190322580645, 0.0619534838709678,
0.0623370967741936, 0.0617616774193548, 0.061378064516129, 0.0619534838709678,
0.0619534838709678, 0.061378064516129, 0.0619534838709678, 0.0617616774193548,
0.0623370967741936, 0.0619534838709678, 0.0619534838709678, 0.0638715483870968,
0.0629125161290323, 0.0629125161290323, 0.0629125161290323, 0.0636797419354839,
0.0638715483870968, 0.0632961290322581, 0.0623370967741936, 0.0652141935483871,
0.0650223870967742, 0.0632961290322581, 0.0608026451612903, 0.0615698709677419,
0.0632961290322581, 0.0608604848484849, 0.0648218181818182, 0.0614006666666667,
0.0596390196078431, 0.0604162926829268, 0.0608205128205128, 0.0608205128205128,
0.0608205128205128, 0.0595836164383562, 0.0589412054794521, 0.0599826829268293,
0.0598381463414634, 0.0599826829268293, 0.0598381463414634, 0.0599826829268293,
0.0599826829268293, 0.0598381463414634, 0.0606138, 0.0598728,
0.0597246, 0.0597246, 0.0594282, 0.0595764, 0.0601692, 0.0598728,
0.060021, 0.04617, 0.04617, 0.046398, 0.0603174, 0.0601692, 0.0597246,
0.0603174, 0.0594282, 0.060021, 0.0601692, 0.0602123076923077,
0.0606684615384615, 0.0597561538461538, 0.0601005263157895, 0.0607249473684211,
0.0607249473684211, 0.0600988235294118, 0.0602735294117647, 0.0606229411764706,
0.0602735294117647, 0.0599241176470588, 0.0614964705882353, 0.0606229411764706,
0.0606229411764706, 0.0602282857142857, 0.0614158857142857, 0.0602282857142857,
0.0605676, 0.0598889714285714, 0.0602282857142857, 0.0604713333333333,
0.0608678666666667, 0.0604713333333333, 0.0600748, 0.0602730666666667,
0.0602730666666667, 0.0601692, 0.0601692, 0.0598728, 0.0601692,
0.0601692, 0.0598728, 0.064768, 0.064592, 0.064416, 0.06424,
0.06424, 0.0642292, 0.0642292, 0.0642292, 0.0647038, 0.0643874,
0.065026, 0.06403475, 0.06443125, 0.064233, 0.06522425, 0.06443125,
0.06562075, 0.065026, 0.06443125, 0.06443125, 0.0603493333333333,
0.0603493333333333, 0.0600195555555556, 0.0603493333333333, 0.0601844444444445,
0.0601844444444445, 0.0646295, 0.06522425, 0.065026, 0.06522425,
0.06443125, 0.06443125, 0.0699253333333334, 0.0701013333333333,
0.0701013333333333, 0.0693973333333333, 0.0697493333333333, 0.0704533333333334,
0.0699253333333334, 0.0699253333333334, 0.0695733333333333, 0.0697493333333333,
0.0695997142857143, 0.0701429714285714, 0.0694186285714286, 0.0699618857142857,
0.0603493333333333, 0.0605142222222222, 0.0610088888888889, 0.0605142222222222,
0.0606791111111111, 0.0605142222222222)), .Names = c("X1", "X2"
), class = "data.frame", row.names = c(NA, 174L))
Upvotes: 3
Views: 207
Reputation: 38500
With base R, you could combine ave
and pmin
to get the result.
dat$x3 <- pmin(dat$X1, ave(dat$X2, cumsum(c(0, diff(dat$X1) < 0)), FUN=function(x) sum(x)))
Groups are constructed by calculating the difference in X1 and incrementing when it is negative. Note that this assumes, as in the example that the deviations from zero in X1 are always positive. If this is not the case, then you could replace dat$X1
with abs(dat$X1)
.
The zero values in the new variable are set to zero by using a pmin
comparison. I may be faster to use math operations like
dat$x3 <- ave(dat$X2, cumsum(c(0, diff(dat$X1) < 0)), FUN=function(x) sum(x)) * sign(dat$X1)
Upvotes: 2
Reputation: 886978
We create a group using cumsum
and then do the sum
library(dplyr)
df1 %>%
group_by(grp = lag(cumsum(X1 !=0), default = 0)) %>%
mutate(X3 = replace(rep(sum(X2),n()), X1==0, 0))
Upvotes: 5