Reputation: 141
I have the following data
pt_id <- c(1,1,1,1,1,2,2,2,3,3,3,3,3,4,4,4,4)
Tob_pk <- c(2, 5, 7, 1, 8, 12, 14, 3, 6, 8, 10, 20, 13, 5, 4, 12, 10)
Tobacco <- c("Once","Twice","Never", NA, NA, NA, NA, NA,"Once","Twice","Quit","Once",NA,NA,"Never", NA, "Never")
Alcohol <- c("Twice", "Once",NA, NA, "Never", NA, NA, "Once", NA, "Quit", "Twice", NA, "Once", NA, NA, "Never", "Never")
PA <- c("Once",NA,"Never", NA, NA, NA, NA, NA,"Once",NA,"Quit","Once",NA,NA,"Never", NA, NA)
mydata <- data.frame(pt_id, Tob_pk, Tobacco, Alcohol, PA)
mydata
I want the summary/ proportions for each variable in my dataset, I tried using the following code to get the summary/ proportions for each variable
data_summ <- mydata %>%
summarize_at(.vars=3:5, funs(prop.table(.)))
However, I am getting the following error
Error: Problem with `summarise()` input `Tobacco`.
x invalid 'type' (character) of argument
ℹ Input `Tobacco` is `prop.table(Tobacco)`.
Run `rlang::last_error()` to see where the error occurred.
I am not sure where I am going wrong. It would be helpful if I could get any suggestions to get the following output but with the percentage of NA's too.
Tobacco Alcohol PA
Never 0.3333333 Never 0.3333333 Never 0.3333333
Once 0.3333333 Once 0.3333333 Once 0.5000000
Quit 0.1111111 Quit 0.1111111 Quit. 0.1666667
Twice 0.2222222 Twice 0.2222222
Thanks in advance!
Upvotes: 1
Views: 1295
Reputation: 8880
using base
pt_id <- c(1,1,1,1,1,2,2,2,3,3,3,3,3,4,4,4,4)
Tob_pk <- c(2, 5, 7, 1, 8, 12, 14, 3, 6, 8, 10, 20, 13, 5, 4, 12, 10)
Tobacco <- c("Once","Twice","Never", NA, NA, NA, NA, NA,"Once","Twice","Quit","Once",NA,NA,"Never", NA, "Never")
Alcohol <- c("Twice", "Once",NA, NA, "Never", NA, NA, "Once", NA, "Quit", "Twice", NA, "Once", NA, NA, "Never", "Never")
PA <- c("Once",NA,"Never", NA, NA, NA, NA, NA,"Once",NA,"Quit","Once",NA,NA,"Never", NA, NA)
mydata <- data.frame(pt_id, Tob_pk, Tobacco, Alcohol, PA)
apply(mydata[3:5], 2, function(x) prop.table(table(x, useNA="ifany")))
$Tobacco
x
Never Once Quit Twice <NA>
0.17647059 0.17647059 0.05882353 0.11764706 0.47058824
$Alcohol
x
Never Once Quit Twice <NA>
0.17647059 0.17647059 0.05882353 0.11764706 0.47058824
$PA
x
Never Once Quit <NA>
0.11764706 0.17647059 0.05882353 0.64705882
Created on 2021-01-18 by the reprex package (v0.3.0)
using tidyverse
library(tidyverse)
map_dfr(mydata[3:5], ~prop.table(table(.x)))
#> # A tibble: 3 x 4
#> Never Once Quit Twice
#> <table> <table> <table> <table>
#> 1 0.3333333 0.3333333 0.1111111 0.2222222
#> 2 0.3333333 0.3333333 0.1111111 0.2222222
#> 3 0.3333333 0.5000000 0.1666667 NA
Created on 2021-01-18 by the reprex package (v0.3.0)
Upvotes: 3
Reputation: 101024
Here is a base R option using prop.table
+ table
+ na.omit
+ factor
do.call(
cbind,
lapply(
mydata[3:5],
function(x) {
prop.table(
table(
na.omit(
factor(x, levels = unique(na.omit(unlist(mydata[3:5]))))
)
)
)
}
)
)
which gives
Tobacco Alcohol PA
Once 0.3333333 0.3333333 0.5000000
Twice 0.2222222 0.2222222 0.0000000
Never 0.3333333 0.3333333 0.3333333
Quit 0.1111111 0.1111111 0.1666667
Upvotes: 0
Reputation: 72593
You could transform the variables to factors with corresponding levels. Then table
with option useNA="ifany"
displays the NA
.
mydata[3:5] <- lapply(mydata[3:5], factor, levels=c("Never", "Once", "Quit", "Twice"))
res1 <- sapply(mydata[3:5], function(x) prop.table(table(x)))
res1
# Tobacco Alcohol PA
# Never 0.3333333 0.3333333 0.3333333
# Once 0.3333333 0.3333333 0.5000000
# Quit 0.1111111 0.1111111 0.1666667
# Twice 0.2222222 0.2222222 0.0000000
res2 <- sapply(mydata[3:5], function(x) prop.table(table(x, useNA="ifany")))
res2
# Tobacco Alcohol PA
# Never 0.17647059 0.17647059 0.11764706
# Once 0.17647059 0.17647059 0.17647059
# Quit 0.05882353 0.05882353 0.05882353
# Twice 0.11764706 0.11764706 0.00000000
# <NA> 0.47058824 0.47058824 0.64705882
Upvotes: 1
Reputation: 1136
Using dplyr:
library(tidyverse)
df <- mydata %>%
select(3:5) %>%
gather('Your_vice', 'freq', 1:3) %>%
group_by(Your_vice, freq) %>%
summarize(n = n()) %>%
mutate(perc = n/sum(n))
df
# A tibble: 14 x 4
# Groups: Your_vice [3]
Your_vice freq n perc
<chr> <chr> <int> <dbl>
1 Alcohol Never 3 0.176
2 Alcohol Once 3 0.176
3 Alcohol Quit 1 0.0588
4 Alcohol Twice 2 0.118
5 Alcohol NA 8 0.471
6 PA Never 2 0.118
7 PA Once 3 0.176
8 PA Quit 1 0.0588
9 PA NA 11 0.647
10 Tobacco Never 3 0.176
11 Tobacco Once 3 0.176
12 Tobacco Quit 1 0.0588
13 Tobacco Twice 2 0.118
14 Tobacco NA 8 0.471
Upvotes: 1