dobiii
dobiii

Reputation: 33

Yearly rate change based on one or two columns in r

My data looks like this

structure(list(year = c(2017, 2017, 2017, 2017, 2018, 2018, 2018, 
2018, 2019, 2019, 2019, 2019, 2017, 2017, 2018, 2018, 2019, 2019, 
2017, 2017, 2017, 2017, 2018, 2018, 2018, 2018, 2019, 2019, 2019, 
2019), College = c("College1", "College1", "College1", "College1", 
"College1", "College1", "College1", "College1", "College1", "College1", 
"College1", "College1", "College2", "College2", "College2", "College2", 
"College2", "College2", "College3", "College3", "College3", "College3", 
"College3", "College3", "College3", "College3", "College3", "College3", 
"College3", "College3"), Department = c("Department 1", "Department 2", 
"Department 3", "Department 4", "Department 1", "Department 2", 
"Department 3", "Department 4", "Department 1", "Department 2", 
"Department 3", "Department 4", "Department 1", "Department 2", 
"Department 1", "Department 2", "Department 1", "Department 2", 
"Department 1", "Department 2", "Department 3", "Department 4", 
"Department 1", "Department 2", "Department 3", "Department 4", 
"Department 1", "Department 2", "Department 3", "Department 4"
), Enrollment = c(51L, 322L, 251L, 106L, 468L, 205L, 718L, 200L, 
344L, 256L, 434L, 38L, 487L, 503L, 14L, 448L, 489L, 437L, 695L, 
833L, 941L, 299L, 864L, 888L, 531L, 335L, 47L, 753L, 319L, 986L
)), row.names = c(NA, -30L), class = c("tbl_df", "tbl", "data.frame"
))

I am having trouble calculating the enrollment change throughout the years for each College and then for each Department. So technique I want one new column that tells me at the College level what is the enrollment change, and one new column that tells me at the Department level.

I have tried many different things and this is the closest i got

growth_rate = df %>%
arrange(College, Department)%>%
group_by(College) %>%
mutate(Diff_growth = Enrollment - lag(Enrollment), 
     Rate_percent = Diff_growth/Enrollment) 

But this code doesn't stop at the beginning of each cycle, which is the year 2017, it just keeps going down the rows. Any help would be greatly appreciated.

The desired output would look like this

structure(list(X = 1:30, year = c(2017L, 2017L, 2017L, 2017L, 
2018L, 2018L, 2018L, 2018L, 2019L, 2019L, 2019L, 2019L, 2017L, 
2017L, 2018L, 2018L, 2019L, 2019L, 2017L, 2017L, 2017L, 2017L, 
2018L, 2018L, 2018L, 2018L, 2019L, 2019L, 2019L, 2019L), College = 
c("College1", 
"College1", "College1", "College1", "College1", "College1", 
"College1", 
"College1", "College1", "College1", "College1", "College1", 
"College2", 
"College2", "College2", "College2", "College2", "College2", 
"College3", 
"College3", "College3", "College3", "College3", "College3", 
"College3", 
"College3", "College3", "College3", "College3", "College3"), 
Department = c("Department 1", "Department 2", "Department 3", 
"Department 4", "Department 1", "Department 2", "Department 3", 
"Department 4", "Department 1", "Department 2", "Department 3", 
"Department 4", "Department 1", "Department 2", "Department 1", 
"Department 2", "Department 1", "Department 2", "Department 1", 
"Department 2", "Department 3", "Department 4", "Department 1", 
"Department 2", "Department 3", "Department 4", "Department 1", 
"Department 2", "Department 3", "Department 4"), Enrollment = c(51L, 
322L, 251L, 106L, 468L, 205L, 718L, 200L, 344L, 256L, 434L, 
38L, 487L, 503L, 14L, 448L, 489L, 437L, 695L, 833L, 941L, 
299L, 864L, 888L, 531L, 335L, 47L, 753L, 319L, 986L), RateChange_College = c(NA, 
NA, NA, NA, 1.179452055, 1.179452055, 1.179452055, 1.179452055, 
-0.326209931, -0.326209931, -0.326209931, -0.326209931, NA, 
NA, -0.533333333, -0.533333333, 1.004329004, 1.004329004, 
NA, NA, NA, NA, -0.054190751, -0.054190751, -0.054190751, 
-0.054190751, -0.195951108, -0.195951108, -0.195951108, -0.195951108
), RateChange_Department = c(NA, NA, NA, NA, 8.176470588, 
-0.363354037, 1.860557769, 0.886792453, -0.264957265, 0.248780488, 
-0.395543175, -0.81, NA, NA, -0.971252567, -0.109343936, 
33.92857143, -0.024553571, NA, NA, NA, NA, 0.243165468, 0.066026411, 
-0.435706695, 0.120401338, -0.945601852, -0.152027027, -0.399246704, 
1.943283582)), class = "data.frame", row.names = c(NA, -30L
))

Upvotes: 0

Views: 84

Answers (2)

deschen
deschen

Reputation: 11006

Here's a solution that should match your desired output:

library(tidyverse)
df |> 
  arrange(College, year) |> 
  mutate(Enrollment_Year = sum(Enrollment), .by = c(College, year)) |> 
  mutate(RateChange_College = Enrollment_Year / lag(Enrollment_Year) - 1, .by = c(College, Department)) |> 
  arrange(College, Department, year) |> 
  mutate(RateChange_Department = Enrollment / lag(Enrollment) - 1, .by = c(College, Department)) |> 
  arrange(College, year, Department)

# A tibble: 30 × 7
    year College  Department   Enrollment Enrollment_Year RateChange_College RateChange_Department
   <dbl> <chr>    <chr>             <int>           <int>              <dbl>                 <dbl>
 1  2017 College1 Department 1         51             730         NA                    NA        
 2  2017 College1 Department 2        322             730         NA                    NA        
 3  2017 College1 Department 3        251             730         NA                    NA        
 4  2017 College1 Department 4        106             730         NA                    NA        
 5  2018 College1 Department 1        468            1591          1.179452              8.176471 
 6  2018 College1 Department 2        205            1591          1.179452             -0.3633540
 7  2018 College1 Department 3        718            1591          1.179452              1.860558 
 8  2018 College1 Department 4        200            1591          1.179452              0.8867925
 9  2019 College1 Department 1        344            1072         -0.3262099            -0.2649573
10  2019 College1 Department 2        256            1072         -0.3262099             0.2487805
# ℹ 20 more rows
# ℹ Use `print(n = ...)` to see more rows

We first calculate the Enrollment sum per year and college in order to be able to calculate the Rate Change. We then rearrange the data by College/Department/Year and do the same Rate Change calculation (however, this time directly with the original Enrollment column).

Upvotes: 1

Seth
Seth

Reputation: 3864

You can calculate the rates for colleges and college-department combinations separately, and join the results.

Rates per college

library(tidyverse)

college_rates <- df |>
  arrange(year, College, Department) |>
  summarize(college_yearly_enrollment = sum(Enrollment),
            .by = c(year, College)) |>
  mutate(RateChange_College = (college_yearly_enrollment - lag(college_yearly_enrollment))/
           lag(college_yearly_enrollment),
         .by = College) |>
  select(-college_yearly_enrollment)

glimpse(college_rates)
#> Rows: 9
#> Columns: 3
#> $ year               <dbl> 2017, 2017, 2017, 2018, 2018, 2018, 2019, 2019, 2019
#> $ College            <chr> "College1", "College2", "College3", "College1", "Co…
#> $ RateChange_College <dbl> NA, NA, NA, 1.17945205, -0.53333333, -0.05419075, -…

Join to rates per college-department

df |>
  arrange(year, College, Department) |>
  left_join(college_rates, by = join_by(year, College)) |> 
  mutate(RateChange_Department = (Enrollment - lag(Enrollment))/lag(Enrollment),
         .by = c(College, Department)) |> 
  # filter for reprex only
  filter(year > 2017)
#> # A tibble: 20 × 6
#>     year College  Department Enrollment RateChange_College RateChange_Department
#>    <dbl> <chr>    <chr>           <int>              <dbl>                 <dbl>
#>  1  2018 College1 Departmen…        468             1.18                  8.18  
#>  2  2018 College1 Departmen…        205             1.18                 -0.363 
#>  3  2018 College1 Departmen…        718             1.18                  1.86  
#>  4  2018 College1 Departmen…        200             1.18                  0.887 
#>  5  2018 College2 Departmen…         14            -0.533                -0.971 
#>  6  2018 College2 Departmen…        448            -0.533                -0.109 
#>  7  2018 College3 Departmen…        864            -0.0542                0.243 
#>  8  2018 College3 Departmen…        888            -0.0542                0.0660
#>  9  2018 College3 Departmen…        531            -0.0542               -0.436 
#> 10  2018 College3 Departmen…        335            -0.0542                0.120 
#> 11  2019 College1 Departmen…        344            -0.326                -0.265 
#> 12  2019 College1 Departmen…        256            -0.326                 0.249 
#> 13  2019 College1 Departmen…        434            -0.326                -0.396 
#> 14  2019 College1 Departmen…         38            -0.326                -0.81  
#> 15  2019 College2 Departmen…        489             1.00                 33.9   
#> 16  2019 College2 Departmen…        437             1.00                 -0.0246
#> 17  2019 College3 Departmen…         47            -0.196                -0.946 
#> 18  2019 College3 Departmen…        753            -0.196                -0.152 
#> 19  2019 College3 Departmen…        319            -0.196                -0.399 
#> 20  2019 College3 Departmen…        986            -0.196                 1.94

Upvotes: 2

Related Questions