Reputation: 103
I am trying to work out how sum rows based on multiple conditions, but retain all the rows in my data frame, so only those meeting a condition have the summed value, all others have the original value (from a column called iresult_posPPP). I have data for multiple countries, in different years, divisions etc. Where my ISO column is 'ALL' I need this value to be the sum of all the rows in iresult_posPPP which have the same values for DataYear, Division, FurtherDetails1 and FurtherDetails2.
I've attached a dput of my dataframe below:
Data <- structure(list(ID.x = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), Indicator = c("Gym and Leisure Centre",
"Gym and Leisure Centre", "Gym and Leisure Centre", "Gym and Leisure Centre",
"Gym and Leisure Centre", "Gym and Leisure Centre", "Gym and Leisure Centre",
"Gym and Leisure Centre", "Gym and Leisure Centre", "Gym and Leisure Centre",
"Gym and Leisure Centre", "Gym and Leisure Centre", "Gym and Leisure Centre",
"Gym and Leisure Centre", "Gym and Leisure Centre", "Gym and Leisure Centre",
"Gym and Leisure Centre", "Gym and Leisure Centre", "Gym and Leisure Centre",
"Gym and Leisure Centre", "Gym and Leisure Centre"), IndicatorID = c(98L,
98L, 98L, 98L, 98L, 98L, 98L, 98L, 98L, 98L, 98L, 98L, 98L, 98L,
98L, 98L, 98L, 98L, 98L, 98L, 98L), InputA = c(3762.134507, 4447.309316,
4293.110921, 3862.76676, 4071.175351, 4323.016371, 4036.130293,
4450.854575, 4224.807119, 4563, 4888.173223, 4473.037451, 4136.032594,
4429.615323, 4972.768468, 4910.636192, 4947.585664, 4119.657378,
4809.477176, 4135.606089, 4935.381334), InputAName = c("Total number of people using the gym",
"Total number of people using the gym", "Total number of people using the gym",
"Total number of people using the gym", "Total number of people using the gym",
"Total number of people using the gym", "Total number of people using the gym",
"Total number of people using the gym", "Total number of people using the gym",
"Total number of people using the gym", "Total number of people using the gym",
"Total number of people using the gym", "Total number of people using the gym",
"Total number of people using the gym", "Total number of people using the gym",
"Total number of people using the gym", "Total number of people using the gym",
"Total number of people using the gym", "Total number of people using the gym",
"Total number of people using the gym", "Total number of people using the gym"
), InputAUnit = c("#", "#", "#", "#", "#", "#", "#", "#", "#",
"#", "#", "#", "#", "#", "#", "#", "#", "#", "#", "#", "#"),
OutputCode = c("GBP", "GBP", "GBP", "GBP", "GBP", "GBP",
"GBP", "GBP", "GBP", "GBP", "GBP", "GBP", "GBP", "GBP", "GBP",
"GBP", "GBP", "GBP", "GBP", "GBP", "GBP"), DataYear = c(2016L,
2016L, 2016L, 2016L, 2016L, 2016L, 2016L, 2016L, 2016L, 2017L,
2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L,
2017L, 2017L), Country.x = c("Uzbekistan", "Vanuatu", "Venezuela, RB",
"Vietnam", "Virgin Islands (U.S.)", "West Bank and Gaza",
"Yemen, Rep.", "Zambia", "Zimbabwe", "ALL", "Afghanistan",
"Albania", "Algeria", "American Samoa", "Andorra", "Angola",
"Antigua and Barbuda", "Argentina", "Armenia", "Aruba", "Australia"
), ISO = c("UZB", "VUT", "VEN", "VNM", "VIR", "PSE", "YEM",
"ZMB", "ZWE", "ALL", "AFG", "ALB", "DZA", "ASM", "AND", "AGO",
"ATG", "ARG", "ARM", "ABW", "AUS"), Division = c("Two", "Two",
"Two", "Two", "Two", "Two", "Two", "Two", "Two", "One", "One",
"One", "One", "One", "One", "One", "One", "One", "One", "One",
"One"), FurtherDetails1 = c("fd1b", "fd1b", "fd1b", "fd1b",
"fd1b", "fd1b", "fd1b", "fd1b", "fd1b", "fd1a", "fd1a", "fd1a",
"fd1a", "fd1a", "fd1a", "fd1a", "fd1a", "fd1a", "fd1a", "fd1a",
"fd1a"), FurtherDetails2 = c("fd2b", "fd2b", "fd2b", "fd2b",
"fd2b", "fd2b", "fd2b", "fd2b", "fd2b", "fd2a", "fd2a", "fd2a",
"fd2a", "fd2a", "fd2a", "fd2a", "fd2a", "fd2a", "fd2a", "fd2a",
"fd2a"), ID.y = c(168L, 168L, 168L, 168L, 168L, 168L, 168L,
168L, 168L, 189L, 189L, 189L, 189L, 189L, 189L, 189L, 189L,
189L, 189L, 189L, 189L), Code.x = c("GBP", "GBP", "GBP",
"GBP", "GBP", "GBP", "GBP", "GBP", "GBP", "GBP", "GBP", "GBP",
"GBP", "GBP", "GBP", "GBP", "GBP", "GBP", "GBP", "GBP", "GBP"
), CoefficientYear = c(2017L, 2017L, 2017L, 2017L, 2017L,
2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L,
2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L), Percent = c(-2.69,
-2.69, -2.69, -2.69, -2.69, -2.69, -2.69, -2.69, -2.69, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), CoeffqInflation = c(-0.0269,
-0.0269, -0.0269, -0.0269, -0.0269, -0.0269, -0.0269, -0.0269,
-0.0269, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), Method = c("",
"", "", "", "", "", "", "", "", "", "", "", "", "", "", "",
"", "", "", "", ""), ID.x.x = c(248L, 248L, 248L, 248L, 248L,
248L, 248L, 248L, 248L, 248L, 248L, 248L, 248L, 248L, 248L,
248L, 248L, 248L, 248L, 248L, 248L), InputCode = c("GBP",
"GBP", "GBP", "GBP", "GBP", "GBP", "GBP", "GBP", "GBP", "GBP",
"GBP", "GBP", "GBP", "GBP", "GBP", "GBP", "GBP", "GBP", "GBP",
"GBP", "GBP"), CoeffqFactor = c(1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1), Year = c(2017L, 2017L,
2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L,
2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L,
2017L), Type = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), ID.y.y = c(212L,
213L, 214L, 215L, 216L, 217L, 218L, 219L, 220L, 221L, 1L,
2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L), Country.y = c("Uzbekistan",
"Vanuatu", "Venezuela, RB", "Vietnam", "Virgin Islands (U.S.)",
"West Bank and Gaza", "Yemen, Rep.", "Zambia", "Zimbabwe",
"ALL", "Afghanistan", "Albania", "Algeria", "American Samoa",
"Andorra", "Angola", "Antigua and Barbuda", "Argentina",
"Armenia", "Aruba", "Australia"), Currency = c("GBP", "GBP",
"GBP", "GBP", "GBP", "GBP", "GBP", "GBP", "GBP", "GBP", "GBP",
"GBP", "GBP", "GBP", "GBP", "GBP", "GBP", "GBP", "GBP", "GBP",
"GBP"), CoeffqPPP = c(0.184436278, 0.184436278, 0.367527677,
0.184436278, 0.994525717, 0.184436278, 0.184436278, 0.184436278,
0.112411792, 0, 0.112411792, 0.347344672, 0.367527677, 0.367527677,
0.994525717, 0.264750217, 0.994525717, 0.389806873, 0.188019865,
0.994525717, 0.994525717), Step = c(3, 3, 3, 3, 3, 3, 3,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3), Coeffq = c(2756.10980649717,
2756.10980649717, 2756.10980649717, 2756.10980649717, 2756.10980649717,
2756.10980649717, 2756.10980649717, 2756.10980649717, 2756.10980649717,
2756.10980649717, 2756.10980649717, 2756.10980649717, 2756.10980649717,
2756.10980649717, 2756.10980649717, 2756.10980649717, 2756.10980649717,
2756.10980649717, 2756.10980649717, 2756.10980649717, 2756.10980649717
), CoeffqYear = c("2017", "2017", "2017", "2017", "2017",
"2017", "2017", "2017", "2017", "2017", "2017", "2017", "2017",
"2017", "2017", "2017", "2017", "2017", "2017", "2017", "2017"
), CoeffqUnit = c("GBP", "GBP", "GBP", "GBP", "GBP", "GBP",
"GBP", "GBP", "GBP", "GBP", "GBP", "GBP", "GBP", "GBP", "GBP",
"GBP", "GBP", "GBP", "GBP", "GBP", "GBP"), Step2iresult = c(1.32904727554592,
1.57109861939866, 1.51662503362256, 1.36459757855418, 1.43822197171544,
1.52718971619107, 1.42584162253612, 1.57235105117809, 1.49249538546973,
1.61196860639412, 1.72684237959508, 1.58018758409756, 1.46113405578585,
1.56484787181204, 1.75672729723484, 1.73477786082112, 1.74783098133106,
1.45534919185534, 1.69904146852532, 1.46098338458917, 1.74351956410071
), iresult_pos = c(3564.46552325877, 4213.64007550875, 4067.54352800449,
3659.81038551155, 3857.2688325682, 4095.8776944954, 3824.06510187511,
4216.9990605352, 4002.82852462449, 4442.76248384841, 4759.36721667692,
4355.17049663636, 4027.04589975837, 4312.8925651774, 4841.73333125022,
4781.23827430327, 4817.21410774609, 4011.10217955023, 4682.744853048,
4026.63063339563, 4805.33136843763), iresult_posPPP = c(675.590128629298,
798.631273618818, 1536.26022396863, 693.661294511865, 3942.19818248036,
776.311208628047, 724.793273270513, 799.267918050159, 462.403789458181,
0, 535.008997612704, 1512.74526765823, 1480.05082471057,
1585.10738563022, 4815.22831278442, 1265.8338706505, 4790.8433144487,
1563.55519789396, 880.449055099529, 4004.58771777195, 4779.02562461803
), iresult_pos_m = c(0.00356446552325877, 0.00421364007550875,
0.00406754352800449, 0.00365981038551155, 0.0038572688325682,
0.0040958776944954, 0.00382406510187511, 0.0042169990605352,
0.00400282852462449, 0.00444276248384841, 0.00475936721667692,
0.00435517049663636, 0.00402704589975837, 0.0043128925651774,
0.00484173333125022, 0.00478123827430327, 0.00481721410774609,
0.00401110217955023, 0.004682744853048, 0.00402663063339563,
0.00480533136843763), ID = c(NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA),
StepName = c("Avoided cost of Dementia", "Avoided cost of Dementia",
"Avoided cost of Dementia", "Avoided cost of Dementia", "Avoided cost of Dementia",
"Avoided cost of Dementia", "Avoided cost of Dementia", "Avoided cost of Dementia",
"Avoided cost of Dementia", "Avoided cost of Dementia", "Avoided cost of Dementia",
"Avoided cost of Dementia", "Avoided cost of Dementia", "Avoided cost of Dementia",
"Avoided cost of Dementia", "Avoided cost of Dementia", "Avoided cost of Dementia",
"Avoided cost of Dementia", "Avoided cost of Dementia", "Avoided cost of Dementia",
"Avoided cost of Dementia"), ImpactID = c(10L, 10L, 10L,
10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L,
10L, 10L, 10L, 10L, 10L, 10L), Allocation = c(1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1), Polarity = c("Positive",
"Positive", "Positive", "Positive", "Positive", "Positive",
"Positive", "Positive", "Positive", "Positive", "Positive",
"Positive", "Positive", "Positive", "Positive", "Positive",
"Positive", "Positive", "Positive", "Positive", "Positive"
), Waterfall = c("Yes", "Yes", "Yes", "Yes", "Yes", "Yes",
"Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes",
"Yes", "Yes", "Yes", "Yes", "Yes", "Yes"), Int_Ext = c("External",
"External", "External", "External", "External", "External",
"External", "External", "External", "External", "External",
"External", "External", "External", "External", "External",
"External", "External", "External", "External", "External"
), ImpactCode = c("GDF", "GDF", "GDF", "GDF", "GDF", "GDF",
"GDF", "GDF", "GDF", "GDF", "GDF", "GDF", "GDF", "GDF", "GDF",
"GDF", "GDF", "GDF", "GDF", "GDF", "GDF"), ImpactName = c("Government // Direct Financial",
"Government // Direct Financial", "Government // Direct Financial",
"Government // Direct Financial", "Government // Direct Financial",
"Government // Direct Financial", "Government // Direct Financial",
"Government // Direct Financial", "Government // Direct Financial",
"Government // Direct Financial", "Government // Direct Financial",
"Government // Direct Financial", "Government // Direct Financial",
"Government // Direct Financial", "Government // Direct Financial",
"Government // Direct Financial", "Government // Direct Financial",
"Government // Direct Financial", "Government // Direct Financial",
"Government // Direct Financial", "Government // Direct Financial"
), Name = c("Gym and Leisure Centres", "Gym and Leisure Centres",
"Gym and Leisure Centres", "Gym and Leisure Centres", "Gym and Leisure Centres",
"Gym and Leisure Centres", "Gym and Leisure Centres", "Gym and Leisure Centres",
"Gym and Leisure Centres", "Gym and Leisure Centres", "Gym and Leisure Centres",
"Gym and Leisure Centres", "Gym and Leisure Centres", "Gym and Leisure Centres",
"Gym and Leisure Centres", "Gym and Leisure Centres", "Gym and Leisure Centres",
"Gym and Leisure Centres", "Gym and Leisure Centres", "Gym and Leisure Centres",
"Gym and Leisure Centres"), Capital = c("Manufactured", "Manufactured",
"Manufactured", "Manufactured", "Manufactured", "Manufactured",
"Manufactured", "Manufactured", "Manufactured", "Manufactured",
"Manufactured", "Manufactured", "Manufactured", "Manufactured",
"Manufactured", "Manufactured", "Manufactured", "Manufactured",
"Manufactured", "Manufactured", "Manufactured"), Flow = c("TBC",
"TBC", "TBC", "TBC", "TBC", "TBC", "TBC", "TBC", "TBC", "TBC",
"TBC", "TBC", "TBC", "TBC", "TBC", "TBC", "TBC", "TBC", "TBC",
"TBC", "TBC"), iresult = c(3564.46552325877, 4213.64007550875,
4067.54352800449, 3659.81038551155, 3857.2688325682, 4095.8776944954,
3824.06510187511, 4216.9990605352, 4002.82852462449, 4442.76248384841,
4759.36721667692, 4355.17049663636, 4027.04589975837, 4312.8925651774,
4841.73333125022, 4781.23827430327, 4817.21410774609, 4011.10217955023,
4682.744853048, 4026.63063339563, 4805.33136843763), iresult_m = c(0.00356446552325877,
0.00421364007550875, 0.00406754352800449, 0.00365981038551155,
0.0038572688325682, 0.0040958776944954, 0.00382406510187511,
0.0042169990605352, 0.00400282852462449, 0.00444276248384841,
0.00475936721667692, 0.00435517049663636, 0.00402704589975837,
0.0043128925651774, 0.00484173333125022, 0.00478123827430327,
0.00481721410774609, 0.00401110217955023, 0.004682744853048,
0.00402663063339563, 0.00480533136843763)), row.names = 1760:1780, class = "data.frame")
This is the gist of code I am currently trying to get to run, currently thinking an if statement approach:
Data %>%
mutate(iresult_pos = ifelse(ISO= 'ALL', (sum of the rows in iresult_posPPP where DataYear, Division, FurtherDetails1 and FutherDetails2 are the same), iresult_posPPP)
Is there a tidy way to do this, or potentially a step im missing out, as I've been wracking my brain/ going around stack overflow threads and cant think of a good way to do it. Any help would be greatly appreciated!
Thanks!
Upvotes: 1
Views: 84
Reputation: 420
One other option, using a dummy data set to show how it works:
# create dummy data
data <- data.frame(replicate(10,sample(0:1,1000,rep=TRUE)))
# sum X2, X3, X4, X9 where X1 == 1, store as X11
data <- data %>% mutate(
X11 = rowSums(select_(.,'X2','X3','X4','X9')))
Upvotes: 1
Reputation: 886948
May be we need a group by sum
library(dplyr)
Data %>%
group_by(DataYear, Division, FurtherDetails1, FurtherDetails2) %>%
mutate(iresult_pos =sum(iresult_posPPP))
Upvotes: 3