Reputation: 273
I am trying to create a function with one of column of my dataframe outplacement, because similar calculations I would need for other columns also.
numeric_fun<-function(dataset,grp_var,var){
require("dplyr")
dataset%>%select(grp_var,var)%>% group_by_(grp_var)%>%
summarize(
'q25' = quantile(var, type=6, probs = seq(0, 1, 0.25), na.rm=TRUE)[2],
'median' =round(quantile(var, type=6, probs = seq(0, 1, 0.25), na.rm=TRUE)[3],0),
'avg' = round( mean(var, na.rm=TRUE), 0),
'q75' = quantile(var, type=6, probs = seq(0, 1, 0.25), na.rm=TRUE)[4] ,
'n' = sum(!is.na(var))
)%>%
mutate(
q25 = ifelse( n < 5, "--", paste(q25,"%" )),
median = ifelse( n < 5, "--", paste(median,"%")),
avg = ifelse(n < 5, "--", paste(avg,"%")),
q75 = ifelse( n < 5, "--", paste(q75,"%")),
n = n
) %>%
rename( "Industry"=grp_var,
"25%" = q25,
"75%" = q75
)
}
numeric_fun(outplacement,"Q7_1","Q8")
But this ones is not working, any other efficient ways?
output of dput
structure(list(Q7_1 = structure(c(NA, NA, NA, 5L, 5L, 14L, NA,
1L, 9L, 13L, 1L, NA, 3L, 13L, 13L, 13L, 12L, 2L, 11L, 13L, 10L,
11L, NA, 1L, 4L, NA, 5L, NA, 5L, 4L, 5L, 9L, 2L, 4L, 13L, 10L,
13L, 13L, NA, 11L, NA, 1L, 11L, NA, 5L, NA, 1L, NA, 9L, 3L, 1L,
1L, 10L, 1L, NA, 5L, NA, NA, 2L, NA, 6L, 6L, NA, 13L, 14L, NA,
NA, 14L, 8L, 11L, 11L, 8L, 12L, 13L, NA, 3L, 11L, 3L, 11L, 1L,
NA, 9L, NA, 10L, 6L, 1L, 5L, 3L, 1L, 13L, 4L, 14L, NA, 13L), .Label = c("Banking/Financial Services",
"Chemicals", "Consumer Goods", "Energy", "High Tech", "Insurance/Reinsurance",
"Life Sciences", "Logistics", "Mining & Metals", "Other Manufacturing",
"Other Non-Manufacturing", "Retail & Wholesale", "Services (Non-Financial)", "Transportation Equipment"), class = "factor"), Q8 = c(NA, NA,
NA, NA, NA, NA, NA, 2L, 3L, 3L, 6L, NA, 5L, 4L, 2L, 5L, 6L, 2L,
2L, 3L, 2L, 5L, NA, 3L, 1L, NA, 3L, NA, 1L, 3L, 4L, 4L, 2L, 4L,
1L, 3L, 2L, 3L, NA, 2L, NA, 4L, 4L, NA, 1L, NA, 3L, NA, 1L, 3L,
5L, 2L, 3L, 1L, NA, 6L, NA, NA, 4L, NA, 1L, 5L, NA, 2L, 1L, NA,
NA, 2L, 6L, 6L, 2L, 6L, 3L, 5L, NA, 5L, 2L, 1L, 3L, 3L, NA, 3L,
NA, 3L, 3L, 6L, 4L, 1L, 4L, 6L, 3L, 5L, NA, 5L), Q9 = c(3L, 1L,
NA, 1L, 3L, 3L, NA, 3L, 3L, 1L, 1L, NA, 3L, 2L, 2L, 3L, 2L, 3L,
2L, 2L, 2L, 1L, NA, 3L, 1L, NA, 1L, NA, 1L, 2L, 1L, 2L, 3L, 1L,
1L, 1L, 3L, 3L, NA, 3L, NA, 2L, 2L, NA, 1L, NA, 1L, NA, 1L, 2L,
2L, 1L, 2L, 3L, NA, 1L, NA, NA, 2L, NA, 2L, 2L, NA, 2L, 2L, NA,
NA, 1L, 3L, 1L, 3L, 3L, 1L, 3L, NA, 1L, 3L, 1L, 1L, 3L, NA, 1L,
NA, 2L, 2L, 3L, 3L, 2L, 3L, 3L, 2L, 1L, NA, 2L), Q10 = c(NA,
1L, NA, 1L, NA, NA, NA, NA, NA, 1L, 1L, NA, NA, 1L, 2L, NA, 1L,
NA, 1L, 1L, 2L, 2L, NA, NA, 2L, NA, 2L, NA, 2L, 1L, NA, 1L, NA,
1L, 1L, 1L, NA, NA, NA, NA, NA, 2L, 1L, NA, 1L, NA, 2L, NA, 2L,
2L, 2L, 1L, 2L, 2L, NA, 1L, NA, NA, 2L, NA, 2L, 1L, NA, 1L, 2L,
NA, NA, 1L, 1L, NA, 1L, NA, NA, 2L, NA, NA, 1L, 1L, 1L, 2L, NA,
1L, NA, 1L, 2L, 2L, 1L, 1L, NA, 1L, NA, 2L, NA, 1L)), row.names = c(NA,
-94L), class = c("data.table", "data.frame"), .internal.selfref = <pointer:
0x0000000000090788>, .Names = c("Q7_1",
"Q8", "Q9", "Q10"))
Upvotes: 1
Views: 249
Reputation: 270195
The problem is that the code was using character strings in inappropriate contexts. We can use sym
and !! from the rlang package to translate them. Add the statements marked with ##
and then use !!grp_var
and !!var
in place of grp_var
and var
everywhere. Also changed group_by_
to group_by
, reformatted, changed the require
to library
(see next paragraph) and added library
statements for data.table and rlang.
Note that library
is preferable to require
unless within an if
. That way if the package is missing it will fail at the library
statement making the cause obvious. On the other hand with require
it will fail further down making it harder to debug.
library(data.table) ##
library(dplyr)
numeric_fun <- function(dataset, grp_var, var) {
grp_var <- sym(grp_var)
var <- sym(var)
dataset %>%
select(!!grp_var,!!var) %>%
group_by(!!grp_var) %>%
summarize(
'q25' = quantile(!!var, type=6, probs = seq(0, 1, 0.25), na.rm = TRUE)[2],
'median' = round(quantile(!!var, type=6, probs = seq(0, 1, 0.25), na.rm=TRUE)[3],0),
'avg' = round( mean(!!var, na.rm = TRUE), 0),
'q75' = quantile(!!var, type=6, probs = seq(0, 1, 0.25), na.rm = TRUE)[4] ,
'n' = sum(!is.na(!!var))
) %>%
mutate(
q25 = ifelse( n < 5, "--", paste(q25, "%" )),
median = ifelse( n < 5, "--", paste(median, "%")),
avg = ifelse(n < 5, "--", paste(avg, "%")),
q75 = ifelse( n < 5, "--", paste(q75, "%")),
n = n
) %>%
rename( "Industry" = !!grp_var,
"25%" = q25,
"75%" = q75
)
}
numeric_fun(outplacement,"Q7_1","Q8")
giving:
# A tibble: 14 x 6
Industry `25%` median avg `75%` n
<fctr> <chr> <chr> <chr> <chr> <int>
1 Banking/Financial Services 2 % 3 % 4 % 5 % 11
2 Chemicals -- -- -- -- 3
3 Consumer Goods 1 % 3 % 3 % 5 % 5
4 Energy -- -- -- -- 4
5 High Tech 1 % 4 % 3 % 4.5 % 6
6 Insurance/Reinsurance -- -- -- -- 3
7 Logistics -- -- -- -- 2
8 Mining & Metals -- -- -- -- 4
9 Other Manufacturing -- -- -- -- 4
10 Other Non-Manufacturing 2 % 2 % 3 % 4.75 % 8
11 Retail & Wholesale -- -- -- -- 2
12 Services (Non-Financial) 2 % 3 % 3 % 5 % 12
13 Transportation Equipment -- -- -- -- 3
14 <NA> -- -- -- -- 0
dput
does not work with objects having external pointers such as data.table objects so we used this:
outplacement <-
structure(list(Q7_1 = structure(c(NA, NA, NA, 5L, 5L, 14L, NA,
1L, 9L, 13L, 1L, NA, 3L, 13L, 13L, 13L, 12L, 2L, 11L, 13L, 10L,
11L, NA, 1L, 4L, NA, 5L, NA, 5L, 4L, 5L, 9L, 2L, 4L, 13L, 10L,
13L, 13L, NA, 11L, NA, 1L, 11L, NA, 5L, NA, 1L, NA, 9L, 3L, 1L,
1L, 10L, 1L, NA, 5L, NA, NA, 2L, NA, 6L, 6L, NA, 13L, 14L, NA,
NA, 14L, 8L, 11L, 11L, 8L, 12L, 13L, NA, 3L, 11L, 3L, 11L, 1L,
NA, 9L, NA, 10L, 6L, 1L, 5L, 3L, 1L, 13L, 4L, 14L, NA, 13L), .Label = c("Banking/Financial Services",
"Chemicals", "Consumer Goods", "Energy", "High Tech", "Insurance/Reinsurance",
"Life Sciences", "Logistics", "Mining & Metals", "Other Manufacturing",
"Other Non-Manufacturing", "Retail & Wholesale", "Services (Non-Financial)", "Transportation Equipment"), class = "factor"), Q8 = c(NA, NA,
NA, NA, NA, NA, NA, 2L, 3L, 3L, 6L, NA, 5L, 4L, 2L, 5L, 6L, 2L,
2L, 3L, 2L, 5L, NA, 3L, 1L, NA, 3L, NA, 1L, 3L, 4L, 4L, 2L, 4L,
1L, 3L, 2L, 3L, NA, 2L, NA, 4L, 4L, NA, 1L, NA, 3L, NA, 1L, 3L,
5L, 2L, 3L, 1L, NA, 6L, NA, NA, 4L, NA, 1L, 5L, NA, 2L, 1L, NA,
NA, 2L, 6L, 6L, 2L, 6L, 3L, 5L, NA, 5L, 2L, 1L, 3L, 3L, NA, 3L,
NA, 3L, 3L, 6L, 4L, 1L, 4L, 6L, 3L, 5L, NA, 5L), Q9 = c(3L, 1L,
NA, 1L, 3L, 3L, NA, 3L, 3L, 1L, 1L, NA, 3L, 2L, 2L, 3L, 2L, 3L,
2L, 2L, 2L, 1L, NA, 3L, 1L, NA, 1L, NA, 1L, 2L, 1L, 2L, 3L, 1L,
1L, 1L, 3L, 3L, NA, 3L, NA, 2L, 2L, NA, 1L, NA, 1L, NA, 1L, 2L,
2L, 1L, 2L, 3L, NA, 1L, NA, NA, 2L, NA, 2L, 2L, NA, 2L, 2L, NA,
NA, 1L, 3L, 1L, 3L, 3L, 1L, 3L, NA, 1L, 3L, 1L, 1L, 3L, NA, 1L,
NA, 2L, 2L, 3L, 3L, 2L, 3L, 3L, 2L, 1L, NA, 2L), Q10 = c(NA,
1L, NA, 1L, NA, NA, NA, NA, NA, 1L, 1L, NA, NA, 1L, 2L, NA, 1L,
NA, 1L, 1L, 2L, 2L, NA, NA, 2L, NA, 2L, NA, 2L, 1L, NA, 1L, NA,
1L, 1L, 1L, NA, NA, NA, NA, NA, 2L, 1L, NA, 1L, NA, 2L, NA, 2L,
2L, 2L, 1L, 2L, 2L, NA, 1L, NA, NA, 2L, NA, 2L, 1L, NA, 1L, 2L,
NA, NA, 1L, 1L, NA, 1L, NA, NA, 2L, NA, NA, 1L, 1L, 1L, 2L, NA,
1L, NA, 1L, 2L, 2L, 1L, 1L, NA, 1L, NA, 2L, NA, 1L)), row.names = c(NA,
-94L), class = "data.frame", .Names = c("Q7_1", "Q8", "Q9", "Q10"))
library(data.table)
outplacement <- as.data.table(outplacement)
Upvotes: 2
Reputation: 887851
Using the enquo/!!
from dplyr
we can remodel the function as
library(dplyr)
numeric_fun<-function(dataset,grp_var,var){
grp_var <- enquo(grp_var)
var <- enquo(var)
dataset %>%
select(!! grp_var, !!var) %>%
group_by(!! grp_var) %>%
summarise(
q25 = quantile(!! var, type=6, probs = seq(0, 1, 0.25), na.rm=TRUE)[2],
median =round(quantile(!! var, type=6, probs = seq(0, 1, 0.25), na.rm=TRUE)[3],0),
avg = round( mean(!! var, na.rm=TRUE), 0),
q75 = quantile(!! var, type=6, probs = seq(0, 1, 0.25), na.rm=TRUE)[4] ,
n = sum(!is.na(!!var))) %>%
mutate(
q25 = ifelse( n < 5, "--", paste0(q25,"%" )),
median = ifelse( n < 5, "--", paste0(median,"%")),
avg = ifelse(n < 5, "--", paste0(avg,"%")),
q75 = ifelse( n < 5, "--", paste0(q75,"%"))
) %>%
rename(Industry= !!grp_var,
`25%` = q25,
`75%` = q75
)
}
-run the function
numeric_fun(df1, Q7_1, Q8)
# A tibble: 14 x 6
# Industry `25%` median avg `75%` n
# <fctr> <chr> <chr> <chr> <chr> <int>
# 1 Banking/Financial Services 2% 3% 4% 5% 11
# 2 Chemicals -- -- -- -- 3
# 3 Consumer Goods 1% 3% 3% 5% 5
# 4 Energy -- -- -- -- 4
# 5 High Tech 1% 4% 3% 4.5% 6
# 6 Insurance/Reinsurance -- -- -- -- 3
# 7 Logistics -- -- -- -- 2
# 8 Mining & Metals -- -- -- -- 4
# 9 Other Manufacturing -- -- -- -- 4
#10 Other Non-Manufacturing 2% 2% 3% 4.75% 8
#11 Retail & Wholesale -- -- -- -- 2
#12 Services (Non-Financial) 2% 3% 3% 5% 12
#13 Transportation Equipment -- -- -- -- 3
#14 <NA> -- -- -- -- 0
Upvotes: 2