Reputation: 1430
I'm trying to summarize this data set as an example and I'm trying to use multiple functions n()
& mean()
. How can I combine both in the same workflow?
Here is a toy dataset that mirrors my larger data:
library(tidyverse)
df <- structure(list(group_var = c(70, 72, 73, 70, 70, 71, 70, 71,
71, 70), var1_scr = c(50.5, 25.75, 50.5, 50.5, 50.5, 50.5, 75.25,
75.25, 50.5, 75.25), var2_scr = c(50.5, 50.5, NA, 75.25, 50.5,
50.5, 75.25, 75.25, 100, 75.25), var3_scr = c(NA, NA, 75.25,
NA, NA, NA, NA, NA, NA, NA)), row.names = c(NA, -10L), class = c("tbl_df",
"tbl", "data.frame"))
df
#> # A tibble: 10 x 4
#> group_var var1_scr var2_scr var3_scr
#> <dbl> <dbl> <dbl> <dbl>
#> 1 70 50.5 50.5 NA
#> 2 72 25.8 50.5 NA
#> 3 73 50.5 NA 75.2
#> 4 70 50.5 75.2 NA
#> 5 70 50.5 50.5 NA
#> 6 71 50.5 50.5 NA
#> 7 70 75.2 75.2 NA
#> 8 71 75.2 75.2 NA
#> 9 71 50.5 100 NA
#> 10 70 75.2 75.2 NA
# summarize the scores
df %>% group_by(group_var) %>%
summarise_at(vars(ends_with("_scr")), funs(mean(., na.rm = TRUE)))
#> # A tibble: 4 x 4
#> group_var var1_scr var2_scr var3_scr
#> <dbl> <dbl> <dbl> <dbl>
#> 1 70 60.4 65.4 NaN
#> 2 71 58.8 75.2 NaN
#> 3 72 25.8 50.5 NaN
#> 4 73 50.5 NaN 75.2
# count all the oberservations
df %>% group_by(group_var) %>%
summarise(obs = n())
#> # A tibble: 4 x 2
#> group_var obs
#> <dbl> <int>
#> 1 70 5
#> 2 71 3
#> 3 72 1
#> 4 73 1
# my goal is to produce this dataset but using the mutate_at function
df %>% group_by(group_var) %>%
summarise(var1_scr = mean(var1_scr),
var2_scr = mean(var2_scr),
var3_scr = mean(var3_scr),
obs = n())
#> # A tibble: 4 x 5
#> group_var var1_scr var2_scr var3_scr obs
#> <dbl> <dbl> <dbl> <dbl> <int>
#> 1 70 60.4 65.4 NA 5
#> 2 71 58.8 75.2 NA 3
#> 3 72 25.8 50.5 NA 1
#> 4 73 50.5 NA 75.2 1
Created on 2019-08-15 by the reprex package (v0.3.0)
Upvotes: 3
Views: 2148
Reputation: 886938
An option is to add the 'n' also in the grouping variable after grouping by 'group_var' and then do the summarise_at
library(dplyr)
df %>%
group_by(group_var) %>%
group_by(obs = n(), add = TRUE) %>%
summarise_at(vars(ends_with("_scr")), list(~mean(., na.rm = TRUE)))
# A tibble: 4 x 5
# Groups: group_var [4]
# group_var obs var1_scr var2_scr var3_scr
# <dbl> <int> <dbl> <dbl> <dbl>
#1 70 5 60.4 65.4 NaN
#2 71 3 58.8 75.2 NaN
#3 72 1 25.8 50.5 NaN
#4 73 1 50.5 NaN 75.2
Another option is to create the frequency column with mutate
, and get the mean
by including that also in the summarise_at
(e.g. mean(rep(3, 5))
-> 3)
df %>%
group_by(group_var) %>%
mutate(obs = n()) %>%
summarise_at(vars(ends_with("_scr"), obs), list(~mean(., na.rm = TRUE)))
# A tibble: 4 x 5
# group_var var1_scr var2_scr var3_scr obs
# <dbl> <dbl> <dbl> <dbl> <dbl>
#1 70 60.4 65.4 NaN 5
#2 71 58.8 75.2 NaN 3
#3 72 25.8 50.5 NaN 1
#4 73 50.5 NaN 75.2 1
NOTE: Both of these provide one column for the 'obs'
Here, the OP's expected output is a summarised output for which summarise/summarise_at/summarise_all/summarise_if
are efficient. However, if we need to use mutate_at
(only for demonstration)
df %>%
group_by(group_var) %>%
mutate(obs = n()) %>%
mutate_at(vars(ends_with("_scr"), obs), list(~mean(., na.rm = TRUE))) %>%
distinct_at(vars(group_var, ends_with("_scr"), obs))
# A tibble: 4 x 5
# Groups: group_var [4]
# group_var var1_scr var2_scr var3_scr obs
# <dbl> <dbl> <dbl> <dbl> <dbl>
#1 70 60.4 65.4 NaN 5
#2 72 25.8 50.5 NaN 1
#3 73 50.5 NaN 75.2 1
#4 71 58.8 75.2 NaN 3
Upvotes: 6
Reputation: 13125
If you need the two function in the same call, we can do
library(dplyr)
df %>% group_by(group_var) %>%
summarise_at(vars(ends_with("_scr")), list(m=~mean(., na.rm = TRUE), n=~n()))
# A tibble: 4 x 7
group_var var1_scr_m var2_scr_m var3_scr_m var1_scr_n var2_scr_n var3_scr_n
<dbl> <dbl> <dbl> <dbl> <int> <int> <int>
1 70 60.4 65.4 NaN 5 5 5
2 71 58.8 75.2 NaN 3 3 3
3 72 25.8 50.5 NaN 1 1 1
4 73 50.5 NaN 75.2 1 1 1
Consider the OP note: my goal is to produce this dataset but using the mutate_at function
df %>% group_by(group_var) %>%
mutate_at(vars(ends_with("_scr")), list(m=~mean(., na.rm = TRUE), n=~n())) %>%
slice(1)
Upvotes: 3