Reputation: 321
I want to calculate the percentage ratio between 2 rows and in a repeative manner for every new 2 rows. For example using my current dataset: I have 2 values in rows in the column Aantal
, 116 and 122. Calculations would be 116/total = 0.48 and 122/total = 0.51. I want to use some code to provide this for every 2 corresponding rows. I added a dataset with the desired outcome (this dataset show only the value 'Molenpolder', however I want both values in the column Locatie
).
My current dataset:
structure(list(Datum = structure(c(18748, 18748, 18779, 18779,
18809, 18809, 18809, 18809, 18840, 18840, 18840, 18840, 18871,
18871, 18871, 18871, 18901, 18901, 18901, 18901, 18932, 18932,
18932, 18932, 18962, 18962, 18993, 18993, 18993, 18993, 19024,
19024, 19052, 19052), class = "Date"), Locatie = c("Molenpolder",
"Molenpolder", "Molenpolder", "Molenpolder", "Berkenwoude", "Berkenwoude",
"Molenpolder", "Molenpolder", "Berkenwoude", "Berkenwoude", "Molenpolder",
"Molenpolder", "Berkenwoude", "Berkenwoude", "Molenpolder", "Molenpolder",
"Berkenwoude", "Berkenwoude", "Molenpolder", "Molenpolder", "Berkenwoude",
"Berkenwoude", "Molenpolder", "Molenpolder", "Berkenwoude", "Berkenwoude",
"Berkenwoude", "Berkenwoude", "Molenpolder", "Molenpolder", "Berkenwoude",
"Berkenwoude", "Molenpolder", "Molenpolder"), Geslacht = c("Man",
"Vrouw", "Man", "Vrouw", "Man", "Vrouw", "Man", "Vrouw", "Man",
"Vrouw", "Man", "Vrouw", "Man", "Vrouw", "Man", "Vrouw", "Man",
"Vrouw", "Man", "Vrouw", "Man", "Vrouw", "Man", "Vrouw", "Man",
"Vrouw", "Man", "Vrouw", "Man", "Vrouw", "Man", "Vrouw", "Man",
"Vrouw"), Aantal = c(116, 122, 4103.26419354839, 6953.40967741935,
9423, 9341.29, 14824.8675932241, 16933.0370786706, 17820.274105332,
15154.2391914913, 31680.8030521092, 22602.5941191067, 13258.542809897,
12760.9674586383, 28192.59, 23135.1, 3000.58, 2670.27, 8948.49,
9425.45, 67, 78, 265.07, 285.83, 89, 125, 30, 41, 34, 55, 154.86,
254.78, 113.61, 167.15)), class = c("grouped_df", "tbl_df", "tbl",
"data.frame"), row.names = c(NA, -34L), groups = structure(list(
Datum = structure(c(18748, 18779, 18809, 18809, 18840, 18840,
18871, 18871, 18901, 18901, 18932, 18932, 18962, 18993, 18993,
19024, 19052), class = "Date"), Locatie = c("Molenpolder",
"Molenpolder", "Berkenwoude", "Molenpolder", "Berkenwoude",
"Molenpolder", "Berkenwoude", "Molenpolder", "Berkenwoude",
"Molenpolder", "Berkenwoude", "Molenpolder", "Berkenwoude",
"Berkenwoude", "Molenpolder", "Berkenwoude", "Molenpolder"
), .rows = structure(list(1:2, 3:4, 5:6, 7:8, 9:10, 11:12,
13:14, 15:16, 17:18, 19:20, 21:22, 23:24, 25:26, 27:28,
29:30, 31:32, 33:34), ptype = integer(0), class = c("vctrs_list_of",
"vctrs_vctr", "list"))), class = c("tbl_df", "tbl", "data.frame"
), row.names = c(NA, -17L), .drop = TRUE))
Dataset of desired outcome:
structure(list(Datum = structure(c(18748, 18748, 18779, 18779,
18809, 18809, 18840, 18840, 18871, 18871, 18901, 18901, 18932,
18932, 18993, 18993, 19052, 19052), class = "Date"), Locatie = c("Molenpolder",
"Molenpolder", "Molenpolder", "Molenpolder", "Molenpolder", "Molenpolder",
"Molenpolder", "Molenpolder", "Molenpolder", "Molenpolder", "Molenpolder",
"Molenpolder", "Molenpolder", "Molenpolder", "Molenpolder", "Molenpolder",
"Molenpolder", "Molenpolder"), Geslacht = c("Man", "Vrouw", "Man",
"Vrouw", "Man", "Vrouw", "Man", "Vrouw", "Man", "Vrouw", "Man",
"Vrouw", "Man", "Vrouw", "Man", "Vrouw", "Man", "Vrouw"), Aantal = c(116,
122, 4103.26419354839, 6953.40967741935, 14824.8675932241, 16933.0370786706,
31680.8030521092, 22602.5941191067, 28192.59, 23135.1, 8948.49,
9425.45, 265.07, 285.83, 34, 55, 113.61, 167.15), Percentages = c(0.487394957983193,
0.512605042016807, 0.362924631908121, 0.637075368091879, 0.459214273485728,
0.540785726514272, 0.565716319029836, 0.434283680970164, 0.576064260390234,
0.423935739609766, 0.47624478865361, 0.52375521134639, 0.46601956825594,
0.53398043174406, 0.388888888888889, 0.611111111111111, 0.46919657033979,
0.53080342966021)), class = c("grouped_df", "tbl_df", "tbl",
"data.frame"), row.names = c(NA, -18L), groups = structure(list(
Datum = structure(c(18748, 18779, 18809, 18840, 18871, 18901,
18932, 18993, 19052), class = "Date"), Locatie = c("Molenpolder",
"Molenpolder", "Molenpolder", "Molenpolder", "Molenpolder",
"Molenpolder", "Molenpolder", "Molenpolder", "Molenpolder"
), .rows = structure(list(1:2, 3:4, 5:6, 7:8, 9:10, 11:12,
13:14, 15:16, 17:18), ptype = integer(0), class = c("vctrs_list_of",
"vctrs_vctr", "list"))), class = c("tbl_df", "tbl", "data.frame"
), row.names = c(NA, -9L), .drop = TRUE))
Upvotes: 2
Views: 163
Reputation: 27732
data.table
approach
library(data.table)
setDT(mydata)
mydata[, Percentages := Aantal / sum(Aantal), by = .(Datum, Locatie)][]
Upvotes: 3
Reputation: 24722
df %>%
group_by(Datum,Locatie) %>%
mutate(Percentages = Aantal/sum(Aantal))
Datum Locatie Geslacht Aantal Percentages
<date> <chr> <chr> <dbl> <dbl>
1 2021-05-01 Molenpolder Man 116 0.487
2 2021-05-01 Molenpolder Vrouw 122 0.513
3 2021-06-01 Molenpolder Man 4103. 0.371
4 2021-06-01 Molenpolder Vrouw 6953. 0.629
5 2021-07-01 Berkenwoude Man 9423 0.502
6 2021-07-01 Berkenwoude Vrouw 9341. 0.498
7 2021-07-01 Molenpolder Man 14825. 0.467
8 2021-07-01 Molenpolder Vrouw 16933. 0.533
9 2021-08-01 Berkenwoude Man 17820. 0.540
10 2021-08-01 Berkenwoude Vrouw 15154. 0.460
# ... with 24 more rows
Upvotes: 1