How to get frequencies of data using dplyr

Question

I have a data.frame like this:

# A tibble: 6 x 10
  freqtools freqtrees freqrt freqroamfriends freqroamalone freqparts freqmessy freqride freqall freqrain
                                                      
1         5         5      5               5             5         5         5        5       1        5
2         5         2      2               2             5         4         5        4       0        5
3         5         4      4               3             4         3         4        2       1        1
4         5         4      4               3             2         1         2        1       1        2
5         5         5      4               1             1         4         5        5       1        3
6         5         5      5               5             5         5         5        5       1        2

I would like some code, preferably using dplyr, that could answer the question:

In what proportion of the rows does 4 or 5 appear at least once?

And then the same question but with "at least twice" and again "at least three times" etc etc

and output this into a table with headings "atleast1" "atleast2" etc and the proportions.

EDIT , example of output of dput as requested:

structure(list(freqtools = c(5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L), freqtrees = c(5L, 2L, 4L, 
4L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 4L, 5L, 4L, 1L, 5L, 5L, 4L, 
4L, 4L, 5L, 4L, 5L, 5L, 5L, 5L, 3L, 5L, 5L, 5L, 5L, 5L), freqrt = c(5L, 
2L, 4L, 4L, 4L, 5L, 5L, NA, 3L, 5L, 5L, 5L, 4L, 5L, 3L, 2L, 5L, 
5L, 4L, 2L, 5L, 3L, 3L, 5L, 5L, 5L, 5L, 3L, 5L, 5L, 5L, 5L, 5L
), freqroamfriends = c(5L, 2L, 3L, 3L, 1L, 5L, 1L, 2L, 1L, 5L, 
5L, 5L, 1L, 3L, 3L, 1L, 4L, 5L, 4L, 1L, 3L, 3L, 2L, 3L, 5L, 5L, 
5L, 1L, 4L, 1L, 5L, 4L, 2L), freqroamalone = c(5L, 5L, 4L, 2L, 
1L, 5L, 1L, 2L, 1L, 5L, 5L, 5L, 1L, 1L, 2L, 1L, 2L, 5L, 3L, 1L, 
4L, 1L, 4L, 3L, 5L, 5L, 5L, 1L, 3L, 1L, 5L, 1L, 1L), freqparts = c(5L, 
4L, 3L, 1L, 4L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 1L, 5L, 3L, 2L, 5L, 
5L, 5L, 4L, 4L, 4L, 3L, 4L, 5L, 5L, 5L, 1L, 4L, 5L, 5L, 5L, 5L
), freqmessy = c(5L, 5L, 4L, 2L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 
5L, 5L, 5L, 3L, 2L, 5L, 5L, 5L, 4L, 4L, 4L, 2L, 4L, NA, 5L, 5L, 
3L, 4L, 5L, 5L, 5L, 5L), freqride = c(5L, 4L, 2L, 1L, 5L, 5L, 
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 3L, 4L, 5L, 5L, 4L, 4L, 4L, 4L, 
5L, 4L, 5L, 5L, 5L, 3L, 3L, 5L, 5L, 5L, 5L), freqall = c(1L, 
0L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 0L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 0L, 1L, 1L, 1L, 1L, 1L
), freqrain = c(5L, 5L, 1L, 2L, 3L, 2L, 3L, 4L, 5L, 5L, 5L, 3L, 
4L, 4L, 3L, 3L, 2L, 5L, 4L, 5L, 4L, 4L, 2L, 4L, 5L, 5L, 4L, 3L, 
2L, 3L, 5L, 4L, 5L)), class = c("tbl_df", "tbl", "data.frame"
), row.names = c(NA, -33L), .Names = c("freqtools", "freqtrees", 
"freqrt", "freqroamfriends", "freqroamalone", "freqparts", "freqmessy", 
"freqride", "freqall", "freqrain"))

tigerloveslobsters · Accepted Answer

1.Creating a wrapper function for counting frequencies

library(dplyr)
freq <- function (...) {
  sample_data %>% count(...) %>% arrange(desc(n))
}

2.Using apply() to send all columns to function freq

a <- apply(X = sample_data,MARGIN = 2,freq)

3.Using for loop to modify individual dataframe with in a (a list object)

for (i in 1:length(a)) {
  a[[i]]$Column <- names(a[i])
  print(i)
  names(a[[i]]) <- c("Variable","n","Column_name")
}

4.Using do.call() to bind all rows

final <- do.call(rbind,a) %>% data.frame() %>% select(Column_name,Variable,n)

5.Create percentage with dplyr

final %>% group_by(Column_name) %>% mutate(Percent=round(n/sum(n),4))

How to get frequencies of data using dplyr

Answers (2)

Related Questions