Nadine M.
Nadine M.

Reputation: 107

Count percentage of value "1" for each column AND for each group on specific variable

       ID   AT07 DT01_1 DT01_2 DT01_3 DT01_4 DT01_5 DT01_6 DT01_7 DT01_8 DT01_9 DT01_10 DT01_11 DT01_12 DT01_12.1
1 vD0FPVt AT07_1      1      1      1      1      1      1      1      1      1       1       1       0         0
2 UwwHYWL AT07_1      0      0      0      0      1      0      0      0      0       0       0       0         0
3 Lw7HM6J AT07_1      0      0      0      0      0      0      1      1      0       1       0       0         0
4 Llpww1k AT07_1      0      0      0      0      0      0      0      0      0       1       0       0         0
5 OwXwA9j AT07_1      0      0      0      0      0      0      1      0      0       1       1       0         0

Hi everyone, can you please help me in counting the percentage of the values "1" (separately for each col DT01) and aggregated on group level, where groups are AT07_1 to AT07_11. Variable DT01 range from DT01_1 to DT01_19. Thank you. The following pictures show the current dataset again as well as the result I want to yield. Percentage values in the images are invented.

enter image description here enter image description here

Here what I've tried so far (but its ineconomic):

library(dplyr)
resLong <- resLong %>%
   group_by(AT07) %>%
   mutate(DT01_1_percent1 = label_percent()(sum(DT01_1 == 1)/n())) %>%
   mutate(DT01_2_percent1 = label_percent()(sum(DT01_2 == 1)/n())) %>%
   mutate(DT01_3_percent1 = label_percent()(sum(DT01_3 == 1)/n())) %>%
   mutate(DT01_4_percent1 = label_percent()(sum(DT01_4 == 1)/n())) %>%
   mutate(DT01_5_percent1 = label_percent()(sum(DT01_5 == 1)/n())) %>%
   mutate(DT01_6_percent1 = label_percent()(sum(DT01_6 == 1)/n())) %>%
   mutate(DT01_7_percent1 = label_percent()(sum(DT01_7 == 1)/n())) %>%
   mutate(DT01_8_percent1 = label_percent()(sum(DT01_8 == 1)/n())) %>%
   mutate(DT01_9_percent1 = label_percent()(sum(DT01_9 == 1)/n())) %>%
   mutate(DT01_10_percent1 = label_percent()(sum(DT01_10 == 1)/n())) %>%
   mutate(DT01_11_percent1 = label_percent()(sum(DT01_11 == 1)/n())) %>%
   mutate(DT01_12_percent1 = label_percent()(sum(DT01_12 == 1)/n())) %>%
   mutate(DT01_13_percent1 = label_percent()(sum(DT01_13 == 1)/n())) %>%
   mutate(DT01_14_percent1 = label_percent()(sum(DT01_14 == 1)/n())) %>%
   mutate(DT01_15_percent1 = label_percent()(sum(DT01_15 == 1)/n())) %>%
   mutate(DT01_16_percent1 = label_percent()(sum(DT01_16 == 1)/n())) %>% 
   mutate(DT01_17_percent1 = label_percent()(sum(DT01_17 == 1)/n())) %>%
   mutate(DT01_18_percent1 = label_percent()(sum(DT01_18 == 1)/n())) %>%
   mutate(DT01_19_percent1 = label_percent()(sum(DT01_19 == 1)/n()))
 
  
resLong <- resLong[,-c(1:21)]

library(dplyr)
resLong <- distinct(resLong)

Upvotes: 0

Views: 42

Answers (1)

Adriano Mello
Adriano Mello

Reputation: 2132

Here It is:

# library(tidyverse)
library(dplyr)

# You're looking for `summarise` + `across` -------------------------------
aux_out <- summarise(
  aux_in, 
  .by = AT07,           # group column (one or more)
  across(               # applies over multiple columns
    starts_with("DT"),  # all columns starting with "DT"
    \(x) sum(x)/n()     # `sum()` works if there's only `1` and `0`
  ))                    # Multiply by `100` if you want in `%`    

Output:

> aux_out
# A tibble: 3 × 14
  AT07   DT01_1 DT01_2 DT01_3 DT01_4 DT01_5 DT01_6 DT01_7 DT01_8 DT01_9 DT01_10 DT01_11 DT01_12 DT01_12.1
  <chr>   <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>   <dbl>   <dbl>   <dbl>     <dbl>
1 AT07_1    0.2  0.2    0.2    0.2      0.4  0.2    0.6    0.4    0.2     0.8     0.4     0.2       0    
2 AT07_2    0.5  0.5    0.5    0.5      1    0.5    0.5    0.5    1       0.5     0.5     0         0.5  
3 AT07_3    1    0.667  0.333  0.667    0    0.333  0.667  0.333  0.333   0.667   0.333   0.333     0.667

Toy data:

library(dplyr)

# Toy data ----------------------------------------------------------------
aux_in <- tibble::tribble(
  ~ID,    ~AT07, ~DT01_1, ~DT01_2, ~DT01_3, ~DT01_4, ~DT01_5, ~DT01_6, ~DT01_7, ~DT01_8, ~DT01_9, ~DT01_10, ~DT01_11, ~DT01_12, ~DT01_12.1,
  "vD0FPVt", "AT07_1",       1,       1,       1,       1,       1,       1,       1,       1,       1,        1,        1,        0,          0,
  "UwwHYWL", "AT07_1",       0,       0,       0,       0,       1,       0,       0,       0,       0,        0,        0,        0,          0,
  "Lw7HM6J", "AT07_1",       0,       0,       0,       0,       0,       0,       1,       1,       0,        1,        0,        0,          0,
  "Llpww1k", "AT07_1",       0,       0,       0,       0,       0,       0,       0,       0,       0,        1,        0,        0,          0,
  "OwXwA9j", "AT07_1",       0,       0,       0,       0,       0,       0,       1,       0,       0,        1,        1,        1,          0,
  "_D0FPVt", "AT07_2",       1,       1,       1,       1,       1,       1,       1,       1,       1,        1,        1,        0,          1,
  "_wwHYWL", "AT07_2",       0,       0,       0,       0,       1,       0,       0,       0,       1,        0,        0,        0,          0,
  "_w7HM6J", "AT07_3",       1,       1,       0,       1,       0,       0,       1,       1,       0,        1,        0,        0,          0,
  "_lpww1k", "AT07_3",       1,       1,       1,       1,       0,       1,       0,       0,       1,        0,        0,        1,          1,
  "_wXwA9j", "AT07_3",       1,       0,       0,       0,       0,       0,       1,       0,       0,        1,        1,        0,          1)

Created on 2024-05-10 with reprex v2.1.0

Upvotes: 1

Related Questions