Reputation: 368
I want to loop over a grouped dataframe of factor variables to count the
occurrences of each value within a variable using count
function from dplyr
,
and I think that the purrr::map
function would be the most suitable.
However, I cannot get this to work. I tried to use this post for my needs, but this did not work either. I also tried to hack together a function based on this post, but could not get this to work with the grouping variable.
Is it possible to loop over a grouped dataframe in the way that I want? If so, how?
Thanks in advance for your consideration.
library(tidyverse)
vars_df <-
structure(list(c = structure(c(2L, 3L, 3L, 2L, 3L, 3L, 2L, 2L,
1L, 2L, 2L, 2L, 3L, 1L, 3L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L,
2L, 3L, 1L, 2L, 1L, 3L, 3L, 2L, 1L, 2L, 2L, 2L, 3L, 3L, 2L, 1L,
1L, 2L, 3L, 3L, 2L, 2L, 3L, 3L, 3L, 2L), .Label = c("1", "2",
"3"), class = "factor"), pastpsyc = structure(c(2L, 1L, NA, 2L,
1L, 2L, 2L, 2L, 1L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 1L,
1L, 2L, 2L, 1L, 2L, NA, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 1L, 2L,
2L, 2L, 1L, 2L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 2L, 2L), .Label = c("0",
"1"), class = "factor"), pastmed = structure(c(2L, 1L, NA, 2L,
1L, 1L, 1L, 1L, NA, 2L, 1L, 1L, 1L, 2L, 1L, 2L, 1L, 1L, 2L, 1L,
2L, 2L, 1L, 1L, 1L, NA, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 2L, 2L), .Label = c("0",
"1"), class = "factor"), hxsuicide = structure(c(2L, 1L, NA,
2L, 2L, 1L, 2L, 2L, 1L, 2L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 2L,
1L, 2L, 2L, 1L, 1L, 2L, NA, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 2L, 2L,
2L, 2L, 2L, 1L, 1L, 2L, 2L, 2L, 1L, 2L, 2L, 1L, 2L, 2L, 2L), .Label = c("0",
"1"), class = "factor"), hxdsh = structure(c(2L, 1L, NA, 1L,
2L, 1L, 2L, 2L, 2L, 1L, 1L, 2L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L,
2L, 2L, 2L, 1L, 2L, NA, 2L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 1L, 2L,
2L, 2L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 2L, 2L, 2L, 2L, 2L), .Label = c("0",
"1"), class = "factor"), hxtrauma = structure(c(2L, 1L, NA, 2L,
1L, 1L, 1L, 1L, NA, 1L, 1L, NA, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 2L,
2L, 1L, 2L, 1L, 1L, NA, 2L, 1L, 1L, 2L, 2L, 2L, 1L, 1L, 1L, 2L,
1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L), .Label = c("0",
"1"), class = "factor")), row.names = c(NA, -50L), class = c("tbl_df",
"tbl", "data.frame"))
map_dfr(setNames(c('1', '2', '3'),
c('1', '2', '3')), ~
vars_df %>%
group_by(c) %>%
summarise(across(everything(), function(x)
sum(x == .x, na.rm = TRUE)), .groups = 'drop'), .id = 'var')
#> # A tibble: 9 x 7
#> var c pastpsyc pastmed hxsuicide hxdsh hxtrauma
#> <chr> <fct> <int> <int> <int> <int> <int>
#> 1 1 1 3 2 2 5 1
#> 2 1 2 16 9 18 16 10
#> 3 1 3 12 3 8 11 9
#> 4 2 1 0 0 0 0 0
#> 5 2 2 0 0 0 0 0
#> 6 2 3 0 0 0 0 0
#> 7 3 1 0 0 0 0 0
#> 8 3 2 0 0 0 0 0
#> 9 3 3 0 0 0 0 0
vars_df %>%
group_by(c) %>%
count(pastpsyc)
#> # A tibble: 7 x 3
#> # Groups: c [3]
#> c pastpsyc n
#> <fct> <fct> <int>
#> 1 1 0 4
#> 2 1 1 3
#> 3 2 0 8
#> 4 2 1 16
#> 5 3 0 5
#> 6 3 1 12
#> 7 3 <NA> 2
vars_df %>%
group_by(c) %>%
map(~ count(.))
#> Error in UseMethod("count"): no applicable method for 'count' applied to an object of class "factor"
.get_count <-
function(mygroup) {
quo_var <- enquo(mygroup)
vars_df %>%
group_by(!! quo_var) %>%
count() %>%
ungroup()
}
vars <-
vars_df %>%
colnames()
vars %>%
syms() %>%
map(function(var) .get_count(!!var))
#> [[1]]
#> # A tibble: 3 x 2
#> c n
#> <fct> <int>
#> 1 1 7
#> 2 2 24
#> 3 3 19
#>
#> [[2]]
#> # A tibble: 3 x 2
#> pastpsyc n
#> <fct> <int>
#> 1 0 17
#> 2 1 31
#> 3 <NA> 2
#>
#> [[3]]
#> # A tibble: 3 x 2
#> pastmed n
#> <fct> <int>
#> 1 0 33
#> 2 1 14
#> 3 <NA> 3
#>
#> [[4]]
#> # A tibble: 3 x 2
#> hxsuicide n
#> <fct> <int>
#> 1 0 20
#> 2 1 28
#> 3 <NA> 2
#>
#> [[5]]
#> # A tibble: 3 x 2
#> hxdsh n
#> <fct> <int>
#> 1 0 16
#> 2 1 32
#> 3 <NA> 2
#>
#> [[6]]
#> # A tibble: 3 x 2
#> hxtrauma n
#> <fct> <int>
#> 1 0 26
#> 2 1 20
#> 3 <NA> 4
vars %>%
syms() %>%
group_by(c) %>%
map(function(var) .get_count(!!var))
#> Error in UseMethod("group_by"): no applicable method for 'group_by' applied to an object of class "list"
# Created on 2021-05-26 by the reprex package (v2.0.0)
Upvotes: 0
Views: 390
Reputation: 388982
You can use map
as -
library(tidyverse)
vars %>% map(~vars_df %>% count(c, .data[[.x]]))
#[[1]]
# A tibble: 3 x 2
# c n
# <fct> <int>
#1 1 7
#2 2 24
3 3 19
#[[2]]
# A tibble: 7 x 3
# c pastpsyc n
# <fct> <fct> <int>
#1 1 0 4
#2 1 1 3
#3 2 0 8
#4 2 1 16
#5 3 0 5
#6 3 1 12
#7 3 NA 2
#...
#...
A different way to show the output in a long format -
vars_df %>% pivot_longer(cols = -c) %>% count(c, name, value)
# c name value n
# <fct> <chr> <fct> <int>
# 1 1 hxdsh 0 2
# 2 1 hxdsh 1 5
# 3 1 hxsuicide 0 5
# 4 1 hxsuicide 1 2
# 5 1 hxtrauma 0 5
# 6 1 hxtrauma 1 1
# 7 1 hxtrauma NA 1
# 8 1 pastmed 0 4
# 9 1 pastmed 1 2
#10 1 pastmed NA 1
# … with 28 more rows
Upvotes: 1