Sulz
Sulz

Reputation: 425

Calulating percentage differences within groups and time

I do have dataframe which looks like:

A <- data.frame(GroupName = c(rep(c("A", "B", "C"), each = 6)),
           ObservationName = c("alpha", "beta", "gamma", "alpha", "beta", "gamma", rep(c("delta", "epsilon"),3), rep(c("zeta", "eta", "theta"),2)),
           Date = rep(rep(seq(as.Date("2010-1-1"), as.Date("2010-3-1"), by = "month"), each =3), 2),
           Value = runif(n = 18, min = 1, max = 10))

Now I want to calculate the percentage change for the values always within a Group (Groupname) and the same Month (Date) and between all observations within this group if the measurment is in the same month (and Group). The result of my output should look something like (only first four cases for Group A, the rest should proceed like this) :

B <- data.frame(GroupName = c("A", "A", "A", "A"),
           ObservationName1 = c("alpha", "alpha", "beta", "alpha"),
           ObservationName2 = c("beta", "gamma", "gamma", "beta"),
           Date = c(as.Date("2010-1-1"), as.Date("2010-1-1"), as.Date("2010-1-1"), as.Date("2010-2-1")),
           Diff = c(abs(A[1,4]-A[2,4])/ ((A[1,4]+A[2,4])/2), #alpha beta Jan
                    abs(A[1,4]-A[3,4])/ ((A[1,4]+A[3,4])/2), #alpha gamma Jan
                    abs(A[2,4]-A[4,4])/ ((A[2,4]+A[4,4])/2), #beta gamma Jan
                    abs(A[4,4]-A[5,4])/ ((A[4,4]+A[5,4])/2))) #alpha gamma Feb

Upvotes: 2

Views: 60

Answers (1)

Ma&#235;l
Ma&#235;l

Reputation: 52059

With combn:

library(dplyr) #1.1.0
A %>% 
  reframe(result = combn(seq_along(Value), 2, function(i)
    list(ObservationName1 = ObservationName[i][1],
         ObservationName2 = ObservationName[i][2],
         Diff = abs(diff(Value[i])) / (sum(Value[i]) / 2)), # The pairs
    simplify = FALSE),
    .by = c(GroupName, Date)) %>% 
  unnest_wider(result)

output

# A tibble: 18 × 5
   GroupName Date       ObservationName1 ObservationName2     Diff
   <chr>     <date>     <chr>            <chr>               <dbl>
 1 A         2010-01-01 alpha            beta             0.414   
 2 A         2010-01-01 alpha            gamma            0.250   
 3 A         2010-01-01 beta             gamma            0.168   
 4 A         2010-02-01 alpha            beta             0.675   
 5 A         2010-02-01 alpha            gamma            0.480   
 6 A         2010-02-01 beta             gamma            1.07    
 7 B         2010-03-01 delta            epsilon          1.55    
 8 B         2010-03-01 delta            delta            1.11    
 9 B         2010-03-01 epsilon          delta            0.779   
10 B         2010-01-01 epsilon          delta            0.841   
11 B         2010-01-01 epsilon          epsilon          0.559   
12 B         2010-01-01 delta            epsilon          0.321   
13 C         2010-02-01 zeta             eta              0.770   
14 C         2010-02-01 zeta             theta            0.000664
15 C         2010-02-01 eta              theta            0.770   
16 C         2010-03-01 zeta             eta              0.799   
17 C         2010-03-01 zeta             theta            0.350   
18 C         2010-03-01 eta              theta            0.483    

Before dplyr 1.1.0:

A %>% 
  group_by(GroupName, Date) %>% 
  summarise(result = combn(seq_along(Value), 2, function(i)
    list(ObservationName1 = ObservationName[i][1],
         ObservationName2 = ObservationName[i][2],
         Diff = abs(diff(Value[i])) / (sum(Value[i]) / 2)), # The pairs
    simplify = FALSE)) %>% 
  ungroup() %>% 
  unnest_wider(result)

Upvotes: 3

Related Questions