Reputation: 474
I'm looking to divide the counts of each group from Type and Stream_Order by the total counts of all groups. I have the dataframe below. For example, for the Sediment Sample with Stream Order "1" in group "stoich", I'd like to divide the "n" value 1445 by it's counterpart in group "all": (1445/6312)*100 = 22.89 to calculate a percentage. So far I have
perc %>%
group_by(Type,Stream_Order) %>%
mutate(percentage = n/total*100)
But I realize I'm missing a lot. Sure I can do these by hand, but I would like to be prepared in the future for larger datasets.
> dput(perc)
structure(list(Type = c("Sediment", "Sediment", "Surface Water",
"Surface Water", "Sediment", "Sediment", "Surface Water", "Surface Water",
"Sediment", "Sediment", "Surface Water", "Surface Water", "Sediment",
"Sediment", "Surface Water", "Surface Water"), Stream_Order = structure(c(1L,
2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L), .Label = c("1",
"2", "3", "4", "5", "6", "7", "8", "9", "Not_Provided"), class = "factor"),
n = c(1445L, 639L, 1736L, 815L, 4580L, 2456L, 3999L, 1743L,
287L, 109L, 656L, 450L, 6312L, 3204L, 6391L, 3008L), meancorr = c(0.292071913949931,
0.262339039457943, -0.089326777121153, -0.259460896530476,
7.82761635773663, 8.66618572293474, 9.85787757997926, 8.06533374329178,
-3.04777412402461, -3.0807907346078, -3.07173828840263, -3.17877354374679,
5.60801895761982, 6.59050580264329, 5.82874680645636, 4.12764893257951
), max = c(1.99996435865152, 1.98765952360616, 1.99718852523326,
1.99609913106005, 32.9381587597398, 33.1286964665043, 38.3287173847001,
28.8178499883634, -2.00562491853763, -2.01247690847229, -2.00017475758744,
-2.00391643807592, 32.9381587597398, 33.1286964665043, 38.3287173847001,
28.8178499883634), min = c(-1.99674391114338, -1.99895657727752,
-1.99815485165027, -1.9986527287077, 2.00180602762732, 2.00187366008464,
2.01245579505433, 2.0066032552998, -7.194006200101, -6.68838345991404,
-6.69361085662866, -6.27066223419871, -7.194006200101, -6.68838345991404,
-6.69361085662866, -6.27066223419871), Group = c("stoch",
"stoch", "stoch", "stoch", "hetero", "hetero", "hetero",
"hetero", "homo", "homo", "homo", "homo", "all", "all", "all",
"all")), class = "data.frame", row.names = c(NA, -16L))
Upvotes: 2
Views: 1431
Reputation: 887981
We can subset 'n' using a logical expression on the 'Group' column (Group == 'all'
)
library(dplyr)
perc <- perc %>%
group_by(Type, Stream_Order) %>%
mutate(percentage = n/n[Group == 'all']*100) %>%
ungroup
-output
perc
# A tibble: 16 × 8
Type Stream_Order n meancorr max min Group percentage
<chr> <fct> <int> <dbl> <dbl> <dbl> <chr> <dbl>
1 Sediment 1 1445 0.292 2.00 -2.00 stoch 22.9
2 Sediment 2 639 0.262 1.99 -2.00 stoch 19.9
3 Surface Water 1 1736 -0.0893 2.00 -2.00 stoch 27.2
4 Surface Water 2 815 -0.259 2.00 -2.00 stoch 27.1
5 Sediment 1 4580 7.83 32.9 2.00 hetero 72.6
6 Sediment 2 2456 8.67 33.1 2.00 hetero 76.7
7 Surface Water 1 3999 9.86 38.3 2.01 hetero 62.6
8 Surface Water 2 1743 8.07 28.8 2.01 hetero 57.9
9 Sediment 1 287 -3.05 -2.01 -7.19 homo 4.55
10 Sediment 2 109 -3.08 -2.01 -6.69 homo 3.40
11 Surface Water 1 656 -3.07 -2.00 -6.69 homo 10.3
12 Surface Water 2 450 -3.18 -2.00 -6.27 homo 15.0
13 Sediment 1 6312 5.61 32.9 -7.19 all 100
14 Sediment 2 3204 6.59 33.1 -6.69 all 100
15 Surface Water 1 6391 5.83 38.3 -6.69 all 100
16 Surface Water 2 3008 4.13 28.8 -6.27 all 100
Upvotes: 1