alleyway
alleyway

Reputation: 90

Turn dplyr pipeline into function

The code below works on my sample data for the single value of 50 but I am having trouble creating a way to parameterize the results by the click.amount variable, specifically the range 20:200. The variable definitions are:

The criteria for an event to have outcome="Y"

  1. click amount <=N
  2. cumulative clicks By ID <=5
  3. cumulative amount By ID<=N
  4. Group=A

For the sample code N=50, ultimately I would like to return a data frame where the rows represent proportion of Outcome="Y" and total click.amount for Outcome="Y" for each click.amount in the range 20:200. My actual data is around 500K to a 1M rows. If a cleaner solution exist without using dplyr that is fine, the code below just represents how I was able to get a partial solution. TIA

data:

    structure(list(date = c(1550245200, 1550245211, 1550245411, 1552566300, 
1552566350, 1552567300, 1552567400, 1552567500, 1552568600, 1552568600, 
1552568601, 1552568691, 1552568691, 1552568891, 1552568899, 1552568601, 
1552568691, 1552568691, 1552568891, 1552568899), ID = c(1, 1, 
1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 6, 7, 8), Group = c("A", 
"A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "A", 
"A", "B", "B", "B", "B", "B"), clicks = c(1, 1, 1, 1, 1, 1, 1, 
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1), click.amount = c(60, 
4.89, 12.84, 70, 52, 10, 33, 80, 54, 11.89, 15.24, 25, 2, 1, 
1, 15.24, 25, 2, 1, 1), Correct.Outcome = c("N", "Y", "Y", "N", 
"N", "Y", "Y", "N", "N", "Y", "Y", "N", "N", "N", "N", "N", "N", 
"N", "N", "N"), Reason = c("over 50", NA, NA, "over 50", "over 50", 
NA, NA, "cumulative amount over 50", "over 50", NA, NA, "cumulative amount over 50", 
"cumulative amount over 50", "cumulative amount over 50", "cumulative clicks over 5 and cumulative amount over 50", 
"Group B", "Group B", "Group B", "Group B", "Group B")), row.names = c(NA, 
-20L), class = c("tbl_df", "tbl", "data.frame"))

code:

 results=testdat%>% group_by(ID)%>%arrange(date)%>%
  mutate(cum.clicks=cumsum(ifelse(click.amount<=50 & Group=="A",clicks,0)),
         cum.clickamount=cumsum(ifelse(click.amount<=50 & Group=="A",click.amount,0)))%>%
  mutate(Outcome=ifelse(click.amount<=50 & Group=="A" & cum.clicks<=5 & cum.clickamount<=50,"Y","N" ))%>%
  select(Outcome,Correct.Outcome,Reason,everything())%>%
  arrange(ID,date)%>%
  mutate(check=ifelse(Outcome !=Correct.Outcome,1,0))%>% ungroup()%>%
  summarize(prop50=mean(Outcome=="Y"),amount50=sum(click.amount[Outcome=="Y"]))

Upvotes: 1

Views: 213

Answers (1)

yarnabrina
yarnabrina

Reputation: 1666

I'm not sure I understand what you're doing, but turning dplyr into function can be done using sapply.

library(dplyr)
#> 
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#> 
#>     filter, lag
#> The following objects are masked from 'package:base':
#> 
#>     intersect, setdiff, setequal, union
testdata <- data.frame(stringsAsFactors=FALSE,
                       date = c(1550245200, 1550245211, 1550245411, 1552566300,
                                1552566350, 1552567300, 1552567400,
                                1552567500, 1552568600, 1552568600, 1552568601,
                                1552568691, 1552568691, 1552568891, 1552568899,
                                1552568601, 1552568691, 1552568691, 1552568891,
                                1552568899),
                       ID = c(1, 1, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 6, 7,
                              8),
                       Group = c("A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "A",
                                 "A", "A", "A", "A", "B", "B", "B", "B",
                                 "B"),
                       clicks = c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
                                  1),
                       click.amount = c(60, 4.89, 12.84, 70, 52, 10, 33, 80, 54, 11.89, 15.24,
                                        25, 2, 1, 1, 15.24, 25, 2, 1, 1),
                       Correct.Outcome = c("N", "Y", "Y", "N", "N", "Y", "Y", "N", "N", "Y", "Y",
                                           "N", "N", "N", "N", "N", "N", "N", "N",
                                           "N"),
                       Reason = c("over 50", NA, NA, "over 50", "over 50", NA, NA,
                                  "cumulative amount over 50", "over 50", NA,
                                  NA, "cumulative amount over 50",
                                  "cumulative amount over 50", "cumulative amount over 50",
                                  "cumulative clicks over 5 and cumulative amount over 50", "Group B", "Group B", "Group B",
                                  "Group B", "Group B"))
results <- sapply(X = 20:200,
                  FUN = function(t) {
                    testdata %>%
                      group_by(ID) %>%
                      arrange(date) %>%
                      mutate(cum.clicks = cumsum(x = ifelse(test = ((click.amount <= t) & (Group == "A")),
                                                            yes = clicks,
                                                            no = 0)),
                             cum.clickamount = cumsum(x = ifelse(test = ((click.amount <= t) & (Group=="A")),
                                                                 yes = click.amount,
                                                                 no = 0))) %>%
                      mutate(Outcome = ifelse(test = ((click.amount <= t) & (Group == "A") & (cum.clicks <= 5) & (cum.clickamount <= t)),
                                              yes = "Y",
                                              no = "N" )) %>%
                      select(Outcome, Correct.Outcome, Reason, everything()) %>%
                      arrange(ID, date) %>%
                      mutate(check = ifelse(test = (Outcome !=Correct.Outcome),
                                            yes = 1,
                                            no = 0)) %>%
                      ungroup()%>%
                      summarize(proportion = mean(Outcome == "Y"),
                                amount = sum(click.amount[Outcome=="Y"]))
                  })
results
#>            [,1]  [,2]  [,3]  [,4]  [,5]  [,6]  [,7]  [,8]  [,9]  [,10]
#> proportion 0.2   0.2   0.2   0.2   0.2   0.2   0.2   0.2   0.25  0.25 
#> amount     39.62 39.62 39.62 39.62 39.62 39.62 39.62 39.62 54.86 54.86
#>            [,11] [,12] [,13] [,14] [,15] [,16] [,17] [,18] [,19] [,20]
#> proportion 0.25  0.25  0.25  0.25  0.25  0.25  0.25  0.25  0.25  0.25 
#> amount     54.86 54.86 54.86 54.86 54.86 54.86 54.86 54.86 54.86 54.86
#>            [,21] [,22] [,23] [,24] [,25] [,26] [,27] [,28] [,29] [,30]
#> proportion 0.25  0.25  0.25  0.3   0.3   0.3   0.3   0.3   0.3   0.3  
#> amount     54.86 54.86 54.86 87.86 87.86 87.86 87.86 87.86 87.86 87.86
#>            [,31] [,32] [,33]  [,34]  [,35]  [,36]  [,37]  [,38]  [,39] 
#> proportion 0.3   0.3   0.35   0.4    0.4    0.45   0.5    0.5    0.5   
#> amount     87.86 87.86 139.86 164.86 164.86 166.86 167.86 167.86 167.86
#>            [,40]  [,41]  [,42]  [,43]  [,44]  [,45]  [,46]  [,47]  [,48] 
#> proportion 0.5    0.45   0.45   0.45   0.45   0.45   0.5    0.5    0.5   
#> amount     167.86 210.13 210.13 210.13 210.13 210.13 215.02 215.02 215.02
#>            [,49]  [,50]  [,51]  [,52]  [,53]  [,54]  [,55]  [,56]  [,57] 
#> proportion 0.5    0.5    0.5    0.5    0.5    0.5    0.5    0.5    0.5   
#> amount     215.02 215.02 233.02 233.02 233.02 233.02 233.02 233.02 233.02
#>            [,58]  [,59]  [,60]  [,61]  [,62]  [,63]  [,64]  [,65]  [,66] 
#> proportion 0.5    0.55   0.55   0.55   0.55   0.55   0.55   0.55   0.55  
#> amount     233.02 245.86 245.86 245.86 245.86 245.86 245.86 245.86 245.86
#>            [,67]  [,68]  [,69]  [,70]  [,71]  [,72]  [,73]  [,74]  [,75] 
#> proportion 0.55   0.55   0.55   0.55   0.55   0.55   0.55   0.55   0.55  
#> amount     245.86 245.86 245.86 245.86 245.86 245.86 245.86 245.86 245.86
#>            [,76]  [,77]  [,78]  [,79]  [,80]  [,81]  [,82]  [,83]  [,84] 
#> proportion 0.55   0.55   0.55   0.55   0.55   0.55   0.55   0.55   0.55  
#> amount     245.86 245.86 245.86 245.86 245.86 245.86 245.86 245.86 245.86
#>            [,85]  [,86]  [,87]  [,88]  [,89]  [,90]  [,91]  [,92]  [,93] 
#> proportion 0.55   0.55   0.55   0.55   0.55   0.55   0.55   0.55   0.55  
#> amount     245.86 245.86 245.86 245.86 245.86 245.86 245.86 245.86 245.86
#>            [,94]  [,95]  [,96]  [,97]  [,98]  [,99]  [,100] [,101] [,102]
#> proportion 0.55   0.55   0.55   0.55   0.55   0.55   0.55   0.55   0.55  
#> amount     245.86 245.86 245.86 245.86 245.86 245.86 245.86 245.86 245.86
#>            [,103] [,104] [,105] [,106] [,107] [,108] [,109] [,110] [,111]
#> proportion 0.6    0.65   0.65   0.65   0.65   0.65   0.65   0.65   0.65  
#> amount     297.86 377.86 377.86 377.86 377.86 377.86 377.86 377.86 377.86
#>            [,112] [,113] [,114] [,115] [,116] [,117] [,118] [,119] [,120]
#> proportion 0.65   0.65   0.65   0.65   0.65   0.65   0.65   0.65   0.65  
#> amount     377.86 377.86 377.86 377.86 377.86 377.86 377.86 377.86 377.86
#>            [,121] [,122] [,123] [,124] [,125] [,126] [,127] [,128] [,129]
#> proportion 0.65   0.65   0.65   0.65   0.65   0.65   0.65   0.65   0.65  
#> amount     377.86 377.86 377.86 377.86 377.86 377.86 377.86 377.86 377.86
#>            [,130] [,131] [,132] [,133] [,134] [,135] [,136] [,137] [,138]
#> proportion 0.65   0.65   0.65   0.65   0.65   0.65   0.65   0.65   0.65  
#> amount     377.86 377.86 377.86 377.86 377.86 377.86 377.86 377.86 377.86
#>            [,139] [,140] [,141] [,142] [,143] [,144] [,145] [,146] [,147]
#> proportion 0.65   0.65   0.65   0.65   0.65   0.65   0.65   0.65   0.65  
#> amount     377.86 377.86 377.86 377.86 377.86 377.86 377.86 377.86 377.86
#>            [,148] [,149] [,150] [,151] [,152] [,153] [,154] [,155] [,156]
#> proportion 0.65   0.65   0.65   0.65   0.65   0.65   0.65   0.65   0.65  
#> amount     377.86 377.86 377.86 377.86 377.86 377.86 377.86 377.86 377.86
#>            [,157] [,158] [,159] [,160] [,161] [,162] [,163] [,164] [,165]
#> proportion 0.65   0.7    0.7    0.7    0.7    0.7    0.7    0.7    0.7   
#> amount     377.86 431.86 431.86 431.86 431.86 431.86 431.86 431.86 431.86
#>            [,166] [,167] [,168] [,169] [,170] [,171] [,172] [,173] [,174]
#> proportion 0.7    0.7    0.7    0.7    0.7    0.7    0.7    0.7    0.7   
#> amount     431.86 431.86 431.86 431.86 431.86 431.86 431.86 431.86 431.86
#>            [,175] [,176] [,177] [,178] [,179] [,180] [,181]
#> proportion 0.7    0.7    0.7    0.7    0.7    0.7    0.7   
#> amount     431.86 431.86 431.86 431.86 431.86 431.86 431.86

Created on 2019-03-17 by the reprex package (v0.2.1)

Upvotes: 2

Related Questions