Reputation: 8484
For a sample dataframe:
df1 <- structure(list(name = c("a", "b", "c", "d", "e", "f", "g", "h",
"i", "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u",
"v", "w", "x", "y", "z", "a", "b", "c", "d", "e", "f", "g", "h",
"i", "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u",
"v", "w", "x", "y", "z", "a", "b", "c", "d", "e", "f", "g", "h",
"i", "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u",
"v", "w", "x", "y", "z"), amount = c(5.5, 5.4, 5.2, 5.3, 5.1,
5.1, 5, 5, 4.9, 4.5, 6, 5.9, 5.7, 5.4, 5.3, 5.1, 5.6, 5.4, 5.3,
5.6, 4.6, 4.2, 4.5, 4.2, 4, 3.8, 6, 5.8, 5.7, 5.6, 5.3, 5.6,
5.4, 5.5, 5.4, 5.1, 9, 8.8, 8.6, 8.4, 8.2, 8, 7.8, 7.6, 7.4,
7.2, 6, 5.75, 5.5, 5.25, 5, 4.75, 10, 8.9, 7.8, 6.7, 5.6, 4.5,
3.4, 2.3, 1.2, 0.1, 6, 5.8, 5.7, 5.6, 5.5, 5.5, 5.4, 5.6, 5.8,
5.1, 6, 5.5, 5.4, 5.3, 5.2, 5.1), decile = c(1L, 2L, 3L, 4L,
5L, 6L, 7L, 8L, 9L, 10L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L,
10L, 1L, 2L, 3L, 4L, 5L, 6L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L,
9L, 10L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 1L, 2L, 3L,
4L, 5L, 6L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 1L, 2L,
3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 1L, 2L, 3L, 4L, 5L, 6L), time = c(2016L,
2016L, 2016L, 2016L, 2016L, 2016L, 2016L, 2016L, 2016L, 2016L,
2016L, 2016L, 2016L, 2016L, 2016L, 2016L, 2016L, 2016L, 2016L,
2016L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L,
2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L,
2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L,
2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2018L, 2018L, 2018L,
2018L, 2018L, 2018L, 2018L, 2018L, 2018L, 2018L, 2018L, 2018L,
2018L, 2018L, 2018L, 2018L, 2018L, 2018L, 2018L, 2018L, 2018L,
2018L, 2018L, 2018L, 2018L, 2018L)), .Names = c("name", "amount",
"decile", "time"), class = c("tbl_df", "tbl", "data.frame"), row.names = c(NA,
-78L), spec = structure(list(cols = structure(list(name = structure(list(), class = c("collector_character",
"collector")), amount = structure(list(), class = c("collector_double",
"collector")), decile = structure(list(), class = c("collector_integer",
"collector")), time = structure(list(), class = c("collector_integer",
"collector"))), .Names = c("name", "amount", "decile", "time"
)), default = structure(list(), class = c("collector_guess",
"collector"))), .Names = c("cols", "default"), class = "col_spec"))
I want to produce a summary table detailing the mean difference between decile groups 1 and 2 (i.e. mean result for decile 1 minus mean result for decile group 2), 2 and 3, 3 and 4, 4 and 5, 5 and 6, 6 and 7, 7 and 8, 8 and 9 and 9 and 10 FOR EACH YEAR.
Does anyone have any suggestions?
Upvotes: 1
Views: 145
Reputation: 14764
You could also do:
library(tidyverse)
for (i in 1:9) {
df1 <- df1 %>%
group_by(time) %>%
mutate_(
.dots = setNames(list(
paste0("mean(amount[decile ==", i, "], na.rm = TRUE) - mean(amount[decile == ", i ,"+ 1], na.rm = TRUE)")),
paste0("mean_", i, "_", i + 1))
)
}
Where the output is:
# A tibble: 78 x 13
# Groups: time [3]
name amount decile time mean_1_2 mean_2_3 mean_3_4 mean_4_5 mean_5_6 mean_6_7 mean_7_8 mean_8_9 mean_9_10
<chr> <dbl> <int> <int> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 a 5.5 1 2016 0.1000 0.2 0.1 0.15 0.1000 -0.2 0.1000 0.1 0.0500
2 b 5.4 2 2016 0.1000 0.2 0.1 0.15 0.1000 -0.2 0.1000 0.1 0.0500
3 c 5.2 3 2016 0.1000 0.2 0.1 0.15 0.1000 -0.2 0.1000 0.1 0.0500
4 d 5.3 4 2016 0.1000 0.2 0.1 0.15 0.1000 -0.2 0.1000 0.1 0.0500
5 e 5.1 5 2016 0.1000 0.2 0.1 0.15 0.1000 -0.2 0.1000 0.1 0.0500
6 f 5.1 6 2016 0.1000 0.2 0.1 0.15 0.1000 -0.2 0.1000 0.1 0.0500
7 g 5 7 2016 0.1000 0.2 0.1 0.15 0.1000 -0.2 0.1000 0.1 0.0500
8 h 5 8 2016 0.1000 0.2 0.1 0.15 0.1000 -0.2 0.1000 0.1 0.0500
9 i 4.9 9 2016 0.1000 0.2 0.1 0.15 0.1000 -0.2 0.1000 0.1 0.0500
10 j 4.5 10 2016 0.1000 0.2 0.1 0.15 0.1000 -0.2 0.1000 0.1 0.0500
# ... with 68 more rows
You can get a full summary per year then like this:
df1 <- df1 %>% ungroup() %>%
select(time, starts_with("mean")) %>%
distinct()
Output:
# A tibble: 3 x 10
time mean_1_2 mean_2_3 mean_3_4 mean_4_5 mean_5_6 mean_6_7 mean_7_8 mean_8_9 mean_9_10
<int> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 2016 0.1000 0.2 0.1 0.15 0.1000 -0.2 0.1000 0.1 0.0500
2 2017 0.263 0.0625 0.213 0.237 0.0875 -1.06 0.0500 0.150 0.25
3 2018 0.600 0.433 0.433 0.433 0.4 0.633 0.45 0.450 0.9
Upvotes: 1
Reputation: 39858
Using dplyr
:
df1 %>%
group_by(decile, time) %>% #Grouping by decile and year
summarise(res = mean(amount, na.rm = TRUE)) %>% #Calculating the means
arrange(time, decile) %>% #Arranging according years
group_by(time) %>% #Grouping by years
mutate(res = res - lead(res)) #Calculating the differences between deciles
decile time res
<int> <int> <dbl>
1 1 2016 0.1000
2 2 2016 0.200
3 3 2016 0.100
4 4 2016 0.150
5 5 2016 0.1000
6 6 2016 -0.200
7 7 2016 0.1000
8 8 2016 0.100
9 9 2016 0.0500
10 10 2016 NA
11 1 2017 0.263
Upvotes: 5