Calculate percentages based on grouping variable

Question

I would like to calculate the percentage of people reported doing some work per day. For example I would like to know the percentage of people reported doing some work on Monday from the entire sample.

I used the following code to calculate this, but I am not sure about my result.

df1 <- structure(list(id = c(12L, 123L, 10L), t1_1 = c(0L, 0L, 1L), 
      t1_2 = c(1L, 0L, 1L), t1_3 = c(1L, 0L, 1L), t2_1 = c(0L, 
      1L, 1L), t2_2 = c(1L, 1L, 1L), t2_3 = c(0L, 1L, 1L), t3_1 = c(1L, 
      0L, 1L), t3_2 = c(0L, 0L, 1L), t3_3 = c(1L, 0L, 1L), t4_1 = c(0L, 
      1L, 1L), t4_2 = c(1L, 1L, 1L), t4_3 = c(0L, 1L, 1L), t5_1 = c(0L, 
      1L, 1L), t5_2 = c(1L, 1L, 1L), t5_3 = c(0L, 1L, 1L), t6_1 = c(1L, 
      0L, 1L), t6_2 = c(1L, 0L, 1L), t6_3 = c(1L, 0L, 1L), t7_1 = c(0L, 
      1L, 1L), t7_2 = c(0L, 1L, 1L), t7_3 = c(1L, 1L, 1L)), 
      class = "data.frame", row.names = c(NA, -3L))

Variable description t1 - Monday (t1_1, t1_2, t1_3 - are time steps that measured if work was done during Monday); t2 - Tuesday; t3 - Wednesday; t4 - Thursday; t5 - Friday; t6 - Saturda and t7- Sunday; id is an identification number

df2 <- reshape2::melt(df1, id.vars = "id")
df2$variable <- as.character(df2$variable)
df2$day <- sapply(strsplit(df2$variable, "_"), `[`, 1)
df2$day <- factor(df2$day, levels = variable)

df3<-df2 %>%
  group_by (day)  %>%
  mutate (percent = (value/sum(value) *100))

ggplot(df3, aes(day, group = value)) + 
  geom_bar(aes(y = ..prop.., fill = factor(..x..)), stat="count") +
  scale_fill_discrete(name="Days", labels=c("Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday")) +
  scale_y_continuous(labels=scales::percent, limits=c(0,1)) +
  ylab("relative frequencies") + 
  theme_bw()

Result:

DzimitryM · Accepted Answer

library(dplyr)
df1 <- structure(
  list(id = c(12L, 123L, 10L),
       t1_1 = c(0L, 0L, 1L), t1_2 = c(1L, 0L, 1L), t1_3 = c(1L, 0L, 1L),
       t2_1 = c(0L, 1L, 1L), t2_2 = c(1L, 1L, 1L), t2_3 = c(0L, 1L, 1L),
       t3_1 = c(1L, 0L, 1L), t3_2 = c(0L, 0L, 1L), t3_3 = c(1L, 0L, 1L),
       t4_1 = c(0L, 1L, 1L), t4_2 = c(1L, 1L, 1L), t4_3 = c(0L, 1L, 1L),
       t5_1 = c(0L, 1L, 1L), t5_2 = c(1L, 1L, 1L), t5_3 = c(0L, 1L, 1L),
       t6_1 = c(1L, 0L, 1L), t6_2 = c(1L, 0L, 1L), t6_3 = c(1L, 0L, 1L),
       t7_1 = c(0L, 1L, 1L), t7_2 = c(0L, 1L, 1L), t7_3 = c(1L, 1L, 1L)), 
  class = "data.frame", row.names = c(NA, -3L))

df2 <- reshape2::melt(df1, id.vars = "id")
df2$variable <- as.character(df2$variable)
df2$day <- sapply(strsplit(df2$variable, "_"), `[`, 1)

df3 <- df2 %>%
  group_by(id, day)  %>%
  summarize(count = sum(value)) %>%
  group_by(id)  %>%
  mutate(percent = count / sum(count)) %>%
  arrange(day, id)

> df3
# A tibble: 21 x 4
# Groups:   id [3]
      id day   count percent
        
 1    10 t1        3  0.143 
 2    12 t1        2  0.182 
 3   123 t1        0  0     
 4    10 t2        3  0.143 
 5    12 t2        1  0.0909
 6   123 t2        3  0.25
 ...

Is it something you are looking for?

Calculate percentages based on grouping variable

Answers (1)

Related Questions