Reputation: 11584
I am trying to use tidy dots to build a function that I can use to summarize the data based on the requirement. I need to group by variables using different metrics based on the requirement.
Sample Data:
> dput(d)
structure(list(Branch = c("MDU", "KPA", "OMR", "TVM", "KPA",
"MDU", "MDU", "MDB", "MDU", "OMR", "MDU", "AMB", "OMR", "OMR",
"GDY", "TVM", "KLM", "EKM", "MDU", "TVM", "KPA", "TVM", "GDY",
"EKM", "TVM", "KLM", "TVM", "TVM", "AMB", "TVM"), InvoiceDate = structure(c(18475,
18475, 18475, 18475, 18475, 18475, 18475, 18475, 18475, 18475,
18475, 18475, 18475, 18475, 18475, 18475, 18475, 18475, 18475,
18475, 18475, 18475, 18475, 18475, 18475, 18475, 18475, 18475,
18475, 18475), class = "Date"), LabourRev = c(1093, 300, 1, 2637.86,
255, 624, 634.75, 3645, 570, 5250, 900, 1500, 423, 1614, 999,
1140, 2914, 1640, 0, 2574, 255, 1046, 1999, 2637, 1355, 1144,
0, 1355, 1999, 3240.5), PartsRev = c(1111.98, 990, 0, 16555.64,
178, 2857.5, 31.74, 3265.03, 28.5, 1942.18, 45, 75, 21.15, 441.71,
49.95, 3919.5, 10652.72, 4877.58, 0, 16616.53, 1108.11, 1000.49,
99.95, 2889.68, 0, 3127.34, 0, 0, 99.95, 3618.73), TotalAmt = c(2204.98,
1290, 1, 19193.5, 433, 3481.5, 666.49, 6910.03, 598.5, 7192.18,
945, 1575, 444.15, 2055.71, 1048.95, 5059.5, 13566.72, 6517.58,
0, 19190.53, 1363.11, 2046.49, 2098.95, 5526.68, 1355, 4271.34,
0, 1355, 2098.95, 6859.23)), row.names = c(NA, -30L), class = c("tbl_df",
"tbl", "data.frame"))
>
structure:
> str(d)
Classes ‘tbl_df’, ‘tbl’ and 'data.frame': 30 obs. of 5 variables:
$ Branch : chr "MDU" "KPA" "OMR" "TVM" ...
$ InvoiceDate: Date, format: "2020-08-01" "2020-08-01" "2020-08-01" "2020-08-01" ...
$ LabourRev : num 1093 300 1 2638 255 ...
$ PartsRev : num 1112 990 0 16556 178 ...
$ TotalAmt : num 2205 1290 1 19194 433 ...
For example, I may group by just Branch or month of Invoice Date or by both.
I tried writing the below function for which I am getting the error as mentioned below:
mult_grouping <- function(df, ..., ...){
grping_vars <- enquos(..., .named = 1)
summary_vars <- enquos(..., .named = 1)
smry_sum <- map(summary_vars, function(var){
expr(sum(!!var, na.rm = 1))
})
names(smry_sum) <- paste0('sum_',names(smry_sum))
df %>%
group_by(!!!grping_vars) %>%
summarise(!!!smry_sum, Counts = n())
}
Error: repeated formal argument '...' on line 1
Are we supposed to use tidy dots just once in a function? Could someone let me know how to correct the code.
Upvotes: 1
Views: 47
Reputation: 389012
You don't need complicated non-standard evaluation if you can pass column names as string, across
handles string column names.
library(dplyr)
mult_grouping <- function(df, group_vars, summary_vars) {
df %>%
mutate(InvoiceDate = lubridate::month(InvoiceDate)) %>%
group_by(across(all_of(group_vars))) %>%
summarise(across(all_of(summary_vars), sum),
Counts = n())
}
d %>% mult_grouping("Branch", "LabourRev")
# Branch LabourRev Counts
# <chr> <dbl> <int>
#1 AMB 3499 2
#2 EKM 4277 2
#3 GDY 2998 2
#4 KLM 4058 2
#5 KPA 810 3
#6 MDB 3645 1
#7 MDU 3822. 6
#8 OMR 7288 4
#9 TVM 13348. 8
d %>% mult_grouping(c("Branch", "InvoiceDate"), "LabourRev")
# Branch InvoiceDate LabourRev Counts
# <chr> <dbl> <dbl> <int>
#1 AMB 8 3499 2
#2 EKM 8 4277 2
#3 GDY 8 2998 2
#4 KLM 8 4058 2
#5 KPA 8 810 3
#6 MDB 8 3645 1
#7 MDU 8 3822. 6
#8 OMR 8 7288 4
#9 TVM 8 13348. 8
d %>% mult_grouping(c("Branch", "InvoiceDate"), c("LabourRev", "PartsRev"))
# Branch InvoiceDate LabourRev PartsRev Counts
# <chr> <dbl> <dbl> <dbl> <int>
#1 AMB 8 3499 175. 2
#2 EKM 8 4277 7767. 2
#3 GDY 8 2998 150. 2
#4 KLM 8 4058 13780. 2
#5 KPA 8 810 2276. 3
#6 MDB 8 3645 3265. 1
#7 MDU 8 3822. 4075. 6
#8 OMR 8 7288 2405. 4
#9 TVM 8 13348. 41711. 8
To have a flexible date column we can pass it as a separate argument in the function.
mult_grouping <- function(df, group_vars, summary_vars, date_cols = NULL) {
if(is.null(date_cols)) {
df %>%
group_by(across(all_of(group_vars))) %>%
summarise(across(all_of(summary_vars), sum),
Counts = n())
} else {
df %>%
mutate(date_col = lubridate::month(.data[[date_cols]])) %>%
group_by(across(c(all_of(group_vars), date_col))) %>%
summarise(across(all_of(summary_vars), sum),
Counts = n())
}
}
and call them as :
d %>% mult_grouping("Branch", "LabourRev")
d %>% mult_grouping("Branch", c("LabourRev", "PartsRev"))
d %>% mult_grouping("Branch", c("LabourRev", "PartsRev"), "InvoiceDate")
Upvotes: 1