Reputation: 90
The code below works on my sample data for the single value of 50 but I am having trouble creating a way to parameterize the results by the click.amount variable, specifically the range 20:200. The variable definitions are:
date= timestamp of event
ID=uniquely determines the user
Group= a grouping variable
click= clicks per event(always =1)
click.amount=amount spent during event
Correct.Outcome=what my code should return
Reason =The reason why an outcome would be "N"
The criteria for an event to have outcome="Y"
For the sample code N=50, ultimately I would like to return a data frame where the rows represent proportion of Outcome="Y" and total click.amount for Outcome="Y" for each click.amount in the range 20:200. My actual data is around 500K to a 1M rows. If a cleaner solution exist without using dplyr that is fine, the code below just represents how I was able to get a partial solution. TIA
data:
structure(list(date = c(1550245200, 1550245211, 1550245411, 1552566300,
1552566350, 1552567300, 1552567400, 1552567500, 1552568600, 1552568600,
1552568601, 1552568691, 1552568691, 1552568891, 1552568899, 1552568601,
1552568691, 1552568691, 1552568891, 1552568899), ID = c(1, 1,
1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 6, 7, 8), Group = c("A",
"A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "A",
"A", "B", "B", "B", "B", "B"), clicks = c(1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1), click.amount = c(60,
4.89, 12.84, 70, 52, 10, 33, 80, 54, 11.89, 15.24, 25, 2, 1,
1, 15.24, 25, 2, 1, 1), Correct.Outcome = c("N", "Y", "Y", "N",
"N", "Y", "Y", "N", "N", "Y", "Y", "N", "N", "N", "N", "N", "N",
"N", "N", "N"), Reason = c("over 50", NA, NA, "over 50", "over 50",
NA, NA, "cumulative amount over 50", "over 50", NA, NA, "cumulative amount over 50",
"cumulative amount over 50", "cumulative amount over 50", "cumulative clicks over 5 and cumulative amount over 50",
"Group B", "Group B", "Group B", "Group B", "Group B")), row.names = c(NA,
-20L), class = c("tbl_df", "tbl", "data.frame"))
code:
results=testdat%>% group_by(ID)%>%arrange(date)%>%
mutate(cum.clicks=cumsum(ifelse(click.amount<=50 & Group=="A",clicks,0)),
cum.clickamount=cumsum(ifelse(click.amount<=50 & Group=="A",click.amount,0)))%>%
mutate(Outcome=ifelse(click.amount<=50 & Group=="A" & cum.clicks<=5 & cum.clickamount<=50,"Y","N" ))%>%
select(Outcome,Correct.Outcome,Reason,everything())%>%
arrange(ID,date)%>%
mutate(check=ifelse(Outcome !=Correct.Outcome,1,0))%>% ungroup()%>%
summarize(prop50=mean(Outcome=="Y"),amount50=sum(click.amount[Outcome=="Y"]))
Upvotes: 1
Views: 213
Reputation: 1666
I'm not sure I understand what you're doing, but turning dplyr into function can be done using sapply
.
library(dplyr)
#>
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#>
#> filter, lag
#> The following objects are masked from 'package:base':
#>
#> intersect, setdiff, setequal, union
testdata <- data.frame(stringsAsFactors=FALSE,
date = c(1550245200, 1550245211, 1550245411, 1552566300,
1552566350, 1552567300, 1552567400,
1552567500, 1552568600, 1552568600, 1552568601,
1552568691, 1552568691, 1552568891, 1552568899,
1552568601, 1552568691, 1552568691, 1552568891,
1552568899),
ID = c(1, 1, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 6, 7,
8),
Group = c("A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "A",
"A", "A", "A", "A", "B", "B", "B", "B",
"B"),
clicks = c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1),
click.amount = c(60, 4.89, 12.84, 70, 52, 10, 33, 80, 54, 11.89, 15.24,
25, 2, 1, 1, 15.24, 25, 2, 1, 1),
Correct.Outcome = c("N", "Y", "Y", "N", "N", "Y", "Y", "N", "N", "Y", "Y",
"N", "N", "N", "N", "N", "N", "N", "N",
"N"),
Reason = c("over 50", NA, NA, "over 50", "over 50", NA, NA,
"cumulative amount over 50", "over 50", NA,
NA, "cumulative amount over 50",
"cumulative amount over 50", "cumulative amount over 50",
"cumulative clicks over 5 and cumulative amount over 50", "Group B", "Group B", "Group B",
"Group B", "Group B"))
results <- sapply(X = 20:200,
FUN = function(t) {
testdata %>%
group_by(ID) %>%
arrange(date) %>%
mutate(cum.clicks = cumsum(x = ifelse(test = ((click.amount <= t) & (Group == "A")),
yes = clicks,
no = 0)),
cum.clickamount = cumsum(x = ifelse(test = ((click.amount <= t) & (Group=="A")),
yes = click.amount,
no = 0))) %>%
mutate(Outcome = ifelse(test = ((click.amount <= t) & (Group == "A") & (cum.clicks <= 5) & (cum.clickamount <= t)),
yes = "Y",
no = "N" )) %>%
select(Outcome, Correct.Outcome, Reason, everything()) %>%
arrange(ID, date) %>%
mutate(check = ifelse(test = (Outcome !=Correct.Outcome),
yes = 1,
no = 0)) %>%
ungroup()%>%
summarize(proportion = mean(Outcome == "Y"),
amount = sum(click.amount[Outcome=="Y"]))
})
results
#> [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10]
#> proportion 0.2 0.2 0.2 0.2 0.2 0.2 0.2 0.2 0.25 0.25
#> amount 39.62 39.62 39.62 39.62 39.62 39.62 39.62 39.62 54.86 54.86
#> [,11] [,12] [,13] [,14] [,15] [,16] [,17] [,18] [,19] [,20]
#> proportion 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25
#> amount 54.86 54.86 54.86 54.86 54.86 54.86 54.86 54.86 54.86 54.86
#> [,21] [,22] [,23] [,24] [,25] [,26] [,27] [,28] [,29] [,30]
#> proportion 0.25 0.25 0.25 0.3 0.3 0.3 0.3 0.3 0.3 0.3
#> amount 54.86 54.86 54.86 87.86 87.86 87.86 87.86 87.86 87.86 87.86
#> [,31] [,32] [,33] [,34] [,35] [,36] [,37] [,38] [,39]
#> proportion 0.3 0.3 0.35 0.4 0.4 0.45 0.5 0.5 0.5
#> amount 87.86 87.86 139.86 164.86 164.86 166.86 167.86 167.86 167.86
#> [,40] [,41] [,42] [,43] [,44] [,45] [,46] [,47] [,48]
#> proportion 0.5 0.45 0.45 0.45 0.45 0.45 0.5 0.5 0.5
#> amount 167.86 210.13 210.13 210.13 210.13 210.13 215.02 215.02 215.02
#> [,49] [,50] [,51] [,52] [,53] [,54] [,55] [,56] [,57]
#> proportion 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5
#> amount 215.02 215.02 233.02 233.02 233.02 233.02 233.02 233.02 233.02
#> [,58] [,59] [,60] [,61] [,62] [,63] [,64] [,65] [,66]
#> proportion 0.5 0.55 0.55 0.55 0.55 0.55 0.55 0.55 0.55
#> amount 233.02 245.86 245.86 245.86 245.86 245.86 245.86 245.86 245.86
#> [,67] [,68] [,69] [,70] [,71] [,72] [,73] [,74] [,75]
#> proportion 0.55 0.55 0.55 0.55 0.55 0.55 0.55 0.55 0.55
#> amount 245.86 245.86 245.86 245.86 245.86 245.86 245.86 245.86 245.86
#> [,76] [,77] [,78] [,79] [,80] [,81] [,82] [,83] [,84]
#> proportion 0.55 0.55 0.55 0.55 0.55 0.55 0.55 0.55 0.55
#> amount 245.86 245.86 245.86 245.86 245.86 245.86 245.86 245.86 245.86
#> [,85] [,86] [,87] [,88] [,89] [,90] [,91] [,92] [,93]
#> proportion 0.55 0.55 0.55 0.55 0.55 0.55 0.55 0.55 0.55
#> amount 245.86 245.86 245.86 245.86 245.86 245.86 245.86 245.86 245.86
#> [,94] [,95] [,96] [,97] [,98] [,99] [,100] [,101] [,102]
#> proportion 0.55 0.55 0.55 0.55 0.55 0.55 0.55 0.55 0.55
#> amount 245.86 245.86 245.86 245.86 245.86 245.86 245.86 245.86 245.86
#> [,103] [,104] [,105] [,106] [,107] [,108] [,109] [,110] [,111]
#> proportion 0.6 0.65 0.65 0.65 0.65 0.65 0.65 0.65 0.65
#> amount 297.86 377.86 377.86 377.86 377.86 377.86 377.86 377.86 377.86
#> [,112] [,113] [,114] [,115] [,116] [,117] [,118] [,119] [,120]
#> proportion 0.65 0.65 0.65 0.65 0.65 0.65 0.65 0.65 0.65
#> amount 377.86 377.86 377.86 377.86 377.86 377.86 377.86 377.86 377.86
#> [,121] [,122] [,123] [,124] [,125] [,126] [,127] [,128] [,129]
#> proportion 0.65 0.65 0.65 0.65 0.65 0.65 0.65 0.65 0.65
#> amount 377.86 377.86 377.86 377.86 377.86 377.86 377.86 377.86 377.86
#> [,130] [,131] [,132] [,133] [,134] [,135] [,136] [,137] [,138]
#> proportion 0.65 0.65 0.65 0.65 0.65 0.65 0.65 0.65 0.65
#> amount 377.86 377.86 377.86 377.86 377.86 377.86 377.86 377.86 377.86
#> [,139] [,140] [,141] [,142] [,143] [,144] [,145] [,146] [,147]
#> proportion 0.65 0.65 0.65 0.65 0.65 0.65 0.65 0.65 0.65
#> amount 377.86 377.86 377.86 377.86 377.86 377.86 377.86 377.86 377.86
#> [,148] [,149] [,150] [,151] [,152] [,153] [,154] [,155] [,156]
#> proportion 0.65 0.65 0.65 0.65 0.65 0.65 0.65 0.65 0.65
#> amount 377.86 377.86 377.86 377.86 377.86 377.86 377.86 377.86 377.86
#> [,157] [,158] [,159] [,160] [,161] [,162] [,163] [,164] [,165]
#> proportion 0.65 0.7 0.7 0.7 0.7 0.7 0.7 0.7 0.7
#> amount 377.86 431.86 431.86 431.86 431.86 431.86 431.86 431.86 431.86
#> [,166] [,167] [,168] [,169] [,170] [,171] [,172] [,173] [,174]
#> proportion 0.7 0.7 0.7 0.7 0.7 0.7 0.7 0.7 0.7
#> amount 431.86 431.86 431.86 431.86 431.86 431.86 431.86 431.86 431.86
#> [,175] [,176] [,177] [,178] [,179] [,180] [,181]
#> proportion 0.7 0.7 0.7 0.7 0.7 0.7 0.7
#> amount 431.86 431.86 431.86 431.86 431.86 431.86 431.86
Created on 2019-03-17 by the reprex package (v0.2.1)
Upvotes: 2