Geomicro
Geomicro

Reputation: 474

calculate percentages using tidyverse

I'm looking to divide the counts of each group from Type and Stream_Order by the total counts of all groups. I have the dataframe below. For example, for the Sediment Sample with Stream Order "1" in group "stoich", I'd like to divide the "n" value 1445 by it's counterpart in group "all": (1445/6312)*100 = 22.89 to calculate a percentage. So far I have

perc %>%
  group_by(Type,Stream_Order) %>%
  mutate(percentage = n/total*100)

But I realize I'm missing a lot. Sure I can do these by hand, but I would like to be prepared in the future for larger datasets.

> dput(perc)
structure(list(Type = c("Sediment", "Sediment", "Surface Water", 
"Surface Water", "Sediment", "Sediment", "Surface Water", "Surface Water", 
"Sediment", "Sediment", "Surface Water", "Surface Water", "Sediment", 
"Sediment", "Surface Water", "Surface Water"), Stream_Order = structure(c(1L, 
2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L), .Label = c("1", 
"2", "3", "4", "5", "6", "7", "8", "9", "Not_Provided"), class = "factor"), 
    n = c(1445L, 639L, 1736L, 815L, 4580L, 2456L, 3999L, 1743L, 
    287L, 109L, 656L, 450L, 6312L, 3204L, 6391L, 3008L), meancorr = c(0.292071913949931, 
    0.262339039457943, -0.089326777121153, -0.259460896530476, 
    7.82761635773663, 8.66618572293474, 9.85787757997926, 8.06533374329178, 
    -3.04777412402461, -3.0807907346078, -3.07173828840263, -3.17877354374679, 
    5.60801895761982, 6.59050580264329, 5.82874680645636, 4.12764893257951
    ), max = c(1.99996435865152, 1.98765952360616, 1.99718852523326, 
    1.99609913106005, 32.9381587597398, 33.1286964665043, 38.3287173847001, 
    28.8178499883634, -2.00562491853763, -2.01247690847229, -2.00017475758744, 
    -2.00391643807592, 32.9381587597398, 33.1286964665043, 38.3287173847001, 
    28.8178499883634), min = c(-1.99674391114338, -1.99895657727752, 
    -1.99815485165027, -1.9986527287077, 2.00180602762732, 2.00187366008464, 
    2.01245579505433, 2.0066032552998, -7.194006200101, -6.68838345991404, 
    -6.69361085662866, -6.27066223419871, -7.194006200101, -6.68838345991404, 
    -6.69361085662866, -6.27066223419871), Group = c("stoch", 
    "stoch", "stoch", "stoch", "hetero", "hetero", "hetero", 
    "hetero", "homo", "homo", "homo", "homo", "all", "all", "all", 
    "all")), class = "data.frame", row.names = c(NA, -16L))

Upvotes: 2

Views: 1431

Answers (1)

akrun
akrun

Reputation: 887981

We can subset 'n' using a logical expression on the 'Group' column (Group == 'all')

library(dplyr)
perc <- perc %>%
  group_by(Type, Stream_Order) %>%
  mutate(percentage = n/n[Group == 'all']*100) %>%
  ungroup

-output

perc
# A tibble: 16 × 8
   Type          Stream_Order     n meancorr   max   min Group  percentage
   <chr>         <fct>        <int>    <dbl> <dbl> <dbl> <chr>       <dbl>
 1 Sediment      1             1445   0.292   2.00 -2.00 stoch       22.9 
 2 Sediment      2              639   0.262   1.99 -2.00 stoch       19.9 
 3 Surface Water 1             1736  -0.0893  2.00 -2.00 stoch       27.2 
 4 Surface Water 2              815  -0.259   2.00 -2.00 stoch       27.1 
 5 Sediment      1             4580   7.83   32.9   2.00 hetero      72.6 
 6 Sediment      2             2456   8.67   33.1   2.00 hetero      76.7 
 7 Surface Water 1             3999   9.86   38.3   2.01 hetero      62.6 
 8 Surface Water 2             1743   8.07   28.8   2.01 hetero      57.9 
 9 Sediment      1              287  -3.05   -2.01 -7.19 homo         4.55
10 Sediment      2              109  -3.08   -2.01 -6.69 homo         3.40
11 Surface Water 1              656  -3.07   -2.00 -6.69 homo        10.3 
12 Surface Water 2              450  -3.18   -2.00 -6.27 homo        15.0 
13 Sediment      1             6312   5.61   32.9  -7.19 all        100   
14 Sediment      2             3204   6.59   33.1  -6.69 all        100   
15 Surface Water 1             6391   5.83   38.3  -6.69 all        100   
16 Surface Water 2             3008   4.13   28.8  -6.27 all        100   

Upvotes: 1

Related Questions