Pepijn95
Pepijn95

Reputation: 321

How to repeatedly calculate percentage ratio between two rows?

I want to calculate the percentage ratio between 2 rows and in a repeative manner for every new 2 rows. For example using my current dataset: I have 2 values in rows in the column Aantal, 116 and 122. Calculations would be 116/total = 0.48 and 122/total = 0.51. I want to use some code to provide this for every 2 corresponding rows. I added a dataset with the desired outcome (this dataset show only the value 'Molenpolder', however I want both values in the column Locatie).

My current dataset:

structure(list(Datum = structure(c(18748, 18748, 18779, 18779, 
18809, 18809, 18809, 18809, 18840, 18840, 18840, 18840, 18871, 
18871, 18871, 18871, 18901, 18901, 18901, 18901, 18932, 18932, 
18932, 18932, 18962, 18962, 18993, 18993, 18993, 18993, 19024, 
19024, 19052, 19052), class = "Date"), Locatie = c("Molenpolder", 
"Molenpolder", "Molenpolder", "Molenpolder", "Berkenwoude", "Berkenwoude", 
"Molenpolder", "Molenpolder", "Berkenwoude", "Berkenwoude", "Molenpolder", 
"Molenpolder", "Berkenwoude", "Berkenwoude", "Molenpolder", "Molenpolder", 
"Berkenwoude", "Berkenwoude", "Molenpolder", "Molenpolder", "Berkenwoude", 
"Berkenwoude", "Molenpolder", "Molenpolder", "Berkenwoude", "Berkenwoude", 
"Berkenwoude", "Berkenwoude", "Molenpolder", "Molenpolder", "Berkenwoude", 
"Berkenwoude", "Molenpolder", "Molenpolder"), Geslacht = c("Man", 
"Vrouw", "Man", "Vrouw", "Man", "Vrouw", "Man", "Vrouw", "Man", 
"Vrouw", "Man", "Vrouw", "Man", "Vrouw", "Man", "Vrouw", "Man", 
"Vrouw", "Man", "Vrouw", "Man", "Vrouw", "Man", "Vrouw", "Man", 
"Vrouw", "Man", "Vrouw", "Man", "Vrouw", "Man", "Vrouw", "Man", 
"Vrouw"), Aantal = c(116, 122, 4103.26419354839, 6953.40967741935, 
9423, 9341.29, 14824.8675932241, 16933.0370786706, 17820.274105332, 
15154.2391914913, 31680.8030521092, 22602.5941191067, 13258.542809897, 
12760.9674586383, 28192.59, 23135.1, 3000.58, 2670.27, 8948.49, 
9425.45, 67, 78, 265.07, 285.83, 89, 125, 30, 41, 34, 55, 154.86, 
254.78, 113.61, 167.15)), class = c("grouped_df", "tbl_df", "tbl", 
"data.frame"), row.names = c(NA, -34L), groups = structure(list(
    Datum = structure(c(18748, 18779, 18809, 18809, 18840, 18840, 
    18871, 18871, 18901, 18901, 18932, 18932, 18962, 18993, 18993, 
    19024, 19052), class = "Date"), Locatie = c("Molenpolder", 
    "Molenpolder", "Berkenwoude", "Molenpolder", "Berkenwoude", 
    "Molenpolder", "Berkenwoude", "Molenpolder", "Berkenwoude", 
    "Molenpolder", "Berkenwoude", "Molenpolder", "Berkenwoude", 
    "Berkenwoude", "Molenpolder", "Berkenwoude", "Molenpolder"
    ), .rows = structure(list(1:2, 3:4, 5:6, 7:8, 9:10, 11:12, 
        13:14, 15:16, 17:18, 19:20, 21:22, 23:24, 25:26, 27:28, 
        29:30, 31:32, 33:34), ptype = integer(0), class = c("vctrs_list_of", 
    "vctrs_vctr", "list"))), class = c("tbl_df", "tbl", "data.frame"
), row.names = c(NA, -17L), .drop = TRUE))

Dataset of desired outcome:

structure(list(Datum = structure(c(18748, 18748, 18779, 18779, 
18809, 18809, 18840, 18840, 18871, 18871, 18901, 18901, 18932, 
18932, 18993, 18993, 19052, 19052), class = "Date"), Locatie = c("Molenpolder", 
"Molenpolder", "Molenpolder", "Molenpolder", "Molenpolder", "Molenpolder", 
"Molenpolder", "Molenpolder", "Molenpolder", "Molenpolder", "Molenpolder", 
"Molenpolder", "Molenpolder", "Molenpolder", "Molenpolder", "Molenpolder", 
"Molenpolder", "Molenpolder"), Geslacht = c("Man", "Vrouw", "Man", 
"Vrouw", "Man", "Vrouw", "Man", "Vrouw", "Man", "Vrouw", "Man", 
"Vrouw", "Man", "Vrouw", "Man", "Vrouw", "Man", "Vrouw"), Aantal = c(116, 
122, 4103.26419354839, 6953.40967741935, 14824.8675932241, 16933.0370786706, 
31680.8030521092, 22602.5941191067, 28192.59, 23135.1, 8948.49, 
9425.45, 265.07, 285.83, 34, 55, 113.61, 167.15), Percentages = c(0.487394957983193, 
0.512605042016807, 0.362924631908121, 0.637075368091879, 0.459214273485728, 
0.540785726514272, 0.565716319029836, 0.434283680970164, 0.576064260390234, 
0.423935739609766, 0.47624478865361, 0.52375521134639, 0.46601956825594, 
0.53398043174406, 0.388888888888889, 0.611111111111111, 0.46919657033979, 
0.53080342966021)), class = c("grouped_df", "tbl_df", "tbl", 
"data.frame"), row.names = c(NA, -18L), groups = structure(list(
    Datum = structure(c(18748, 18779, 18809, 18840, 18871, 18901, 
    18932, 18993, 19052), class = "Date"), Locatie = c("Molenpolder", 
    "Molenpolder", "Molenpolder", "Molenpolder", "Molenpolder", 
    "Molenpolder", "Molenpolder", "Molenpolder", "Molenpolder"
    ), .rows = structure(list(1:2, 3:4, 5:6, 7:8, 9:10, 11:12, 
        13:14, 15:16, 17:18), ptype = integer(0), class = c("vctrs_list_of", 
    "vctrs_vctr", "list"))), class = c("tbl_df", "tbl", "data.frame"
), row.names = c(NA, -9L), .drop = TRUE))

Upvotes: 2

Views: 163

Answers (2)

Wimpel
Wimpel

Reputation: 27732

data.table approach

library(data.table)
setDT(mydata)
mydata[, Percentages := Aantal / sum(Aantal), by = .(Datum, Locatie)][]

Upvotes: 3

langtang
langtang

Reputation: 24722

df %>%
  group_by(Datum,Locatie) %>%
  mutate(Percentages = Aantal/sum(Aantal))

   Datum      Locatie     Geslacht Aantal Percentages
   <date>     <chr>       <chr>     <dbl>       <dbl>
 1 2021-05-01 Molenpolder Man        116        0.487
 2 2021-05-01 Molenpolder Vrouw      122        0.513
 3 2021-06-01 Molenpolder Man       4103.       0.371
 4 2021-06-01 Molenpolder Vrouw     6953.       0.629
 5 2021-07-01 Berkenwoude Man       9423        0.502
 6 2021-07-01 Berkenwoude Vrouw     9341.       0.498
 7 2021-07-01 Molenpolder Man      14825.       0.467
 8 2021-07-01 Molenpolder Vrouw    16933.       0.533
 9 2021-08-01 Berkenwoude Man      17820.       0.540
10 2021-08-01 Berkenwoude Vrouw    15154.       0.460
# ... with 24 more rows

Upvotes: 1

Related Questions