Reputation: 7107
I am trying to calculate the geometric mean using dplyr
. I am using some stock tickers and dividend payments. I am trying to group_by()
each stock symbol and take the last dividend
value (for example F
- Ford) in 2018 (0.45000) divide this by the first dividend
value in 1990 (4.71300) raised to the power of 1/(#of years) then subtract by 1.
(0.450 / 4.713)^(1/28) - 1
for Ford company with ticker F.
I am getting a little stuck with coding the first and last years etc. since the number of years vary for different firms.
Data:
divs_yearly <- structure(list(symbol = c("F", "F", "F", "F", "F", "F", "F",
"F", "F", "F", "F", "F", "F", "F", "F", "F", "F", "F", "F", "F",
"F", "F", "F", "F", "GE", "GE", "GE", "GE", "GE", "GE", "GE",
"GE", "GE", "GE", "GE", "GE", "GE", "GE", "GE", "GE", "GE", "GE",
"GE", "GE", "GE", "GE", "GE", "GE", "GE", "GE", "GE", "GE", "GE",
"MSFT", "MSFT", "MSFT", "MSFT", "MSFT", "MSFT", "MSFT", "MSFT",
"MSFT", "MSFT", "MSFT", "MSFT", "MSFT", "MSFT", "MSFT", "MSFT"
), year = c(1990, 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998,
1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2012, 2013, 2014,
2015, 2016, 2017, 2018, 1990, 1991, 1992, 1993, 1994, 1995, 1996,
1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007,
2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018,
2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013,
2014, 2015, 2016, 2017, 2018), dividends = c(4.713, 3.06345,
2.5136, 2.5136, 2.09682, 1.22994, 1.59787, 1.64484, 1.71978,
1.88002, 1.80015, 1.05, 0.4, 0.4, 0.4, 0.4, 0.25, 0.2, 0.4, 0.5,
0.6, 0.6, 0.6, 0.45, 1.9176, 2.0796, 2.3172, 2.6052, 1.8474,
1.6896, 1.9008, 1.3404, 1.2501, 1.4604, 0.8435, 0.66, 0.73, 0.77,
0.82, 0.91, 1.03, 1.15, 1.24, 0.61, 0.46, 0.61, 0.755, 0.79,
0.89, 0.92, 0.93, 1.2, 0.24, 0.24, 0.16, 0.32, 0.37, 0.41, 0.46,
0.52, 0.55, 0.68, 0.83, 0.97, 1.15, 1.29, 1.47, 1.59, 1.26),
growth = structure(list(symbol = c("F", "F", "F", "F", "F",
"F", "F", "F", "F", "F", "F", "F", "F", "F", "F", "F", "F",
"F", "F", "F", "F", "F", "F", "F", "GE", "GE", "GE", "GE",
"GE", "GE", "GE", "GE", "GE", "GE", "GE", "GE", "GE", "GE",
"GE", "GE", "GE", "GE", "GE", "GE", "GE", "GE", "GE", "GE",
"GE", "GE", "GE", "GE", "GE", "MSFT", "MSFT", "MSFT", "MSFT",
"MSFT", "MSFT", "MSFT", "MSFT", "MSFT", "MSFT", "MSFT", "MSFT",
"MSFT", "MSFT", "MSFT", "MSFT"), year = c(1990, 1991, 1992,
1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
2003, 2004, 2005, 2006, 2012, 2013, 2014, 2015, 2016, 2017,
2018, 1990, 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998,
1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008,
2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018,
2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012,
2013, 2014, 2015, 2016, 2017, 2018), dividends = c(NA, -0.35,
-0.17948717948718, 0, -0.165809993634628, -0.413426045154091,
0.299144673723921, 0.0293953826030904, 0.045560662435252,
0.093174708392934, -0.0424835906001001, -0.416715273727189,
-0.619047619047619, 0, 0, 0, -0.375, -0.2, 1, 0.25, 0.2,
0, 0, -0.25, NA, 0.0844806007509387, 0.114252740911714, 0.124287933713102,
-0.290879778903731, -0.085417343293277, 0.125, -0.294823232323232,
-0.0673679498657117, 0.16822654187665, -0.422418515475212,
-0.217545939537641, 0.106060606060606, 0.0547945205479452,
0.0649350649350651, 0.109756097560975, 0.131868131868132,
0.116504854368932, 0.0782608695652172, -0.508064516129032,
-0.245901639344262, 0.326086956521739, 0.237704918032787,
0.0463576158940397, 0.126582278481013, 0.0337078651685394,
0.0108695652173914, 0.290322580645161, -0.8, NA, -0.333333333333333,
1, 0.15625, 0.108108108108108, 0.121951219512195, 0.130434782608696,
0.0576923076923077, 0.236363636363636, 0.220588235294118,
0.168674698795181, 0.185567010309278, 0.121739130434783,
0.13953488372093, 0.0816326530612246, -0.207547169811321)), .Names = c("symbol",
"year", "dividends"), class = c("grouped_df", "tbl_df", "tbl",
"data.frame"), row.names = c(NA, -69L), vars = "symbol", labels = structure(list(
symbol = c("F", "GE", "MSFT")), row.names = c(NA, -3L
), class = "data.frame", vars = "symbol", drop = TRUE, .Names = "symbol"), indices = list(
0:23, 24:52, 53:68), drop = TRUE, group_sizes = c(24L,
29L, 16L), biggest_group_size = 29L)), .Names = c("symbol",
"year", "dividends", "growth"), row.names = c(NA, -69L), vars = "symbol", drop = TRUE, class = c("grouped_df",
"tbl_df", "tbl", "data.frame"))
Upvotes: 2
Views: 744
Reputation: 23608
A combination of the last and first functions will get you there. I only took the first 3 columns of your data.set as I got an error trying to use your complete set.
divs_yearly %>%
group_by(symbol) %>%
summarise(gm = (last(dividends) / first(dividends)) ^(1 / (last(year) - first(year))) - 1 )
# A tibble: 3 x 2
symbol gm
<chr> <dbl>
1 F -0.0805
2 GE -0.0715
3 MSFT 0.117
data:
divs_yearly <- structure(list(symbol = c("F", "F", "F", "F", "F", "F", "F",
"F", "F", "F", "F", "F", "F", "F", "F", "F", "F", "F", "F", "F",
"F", "F", "F", "F", "GE", "GE", "GE", "GE", "GE", "GE", "GE",
"GE", "GE", "GE", "GE", "GE", "GE", "GE", "GE", "GE", "GE", "GE",
"GE", "GE", "GE", "GE", "GE", "GE", "GE", "GE", "GE", "GE", "GE",
"MSFT", "MSFT", "MSFT", "MSFT", "MSFT", "MSFT", "MSFT", "MSFT",
"MSFT", "MSFT", "MSFT", "MSFT", "MSFT", "MSFT", "MSFT", "MSFT"
), year = c(1990, 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998,
1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2012, 2013, 2014,
2015, 2016, 2017, 2018, 1990, 1991, 1992, 1993, 1994, 1995, 1996,
1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007,
2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018,
2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013,
2014, 2015, 2016, 2017, 2018), dividends = c(4.713, 3.06345,
2.5136, 2.5136, 2.09682, 1.22994, 1.59787, 1.64484, 1.71978,
1.88002, 1.80015, 1.05, 0.4, 0.4, 0.4, 0.4, 0.25, 0.2, 0.4, 0.5,
0.6, 0.6, 0.6, 0.45, 1.9176, 2.0796, 2.3172, 2.6052, 1.8474,
1.6896, 1.9008, 1.3404, 1.2501, 1.4604, 0.8435, 0.66, 0.73, 0.77,
0.82, 0.91, 1.03, 1.15, 1.24, 0.61, 0.46, 0.61, 0.755, 0.79,
0.89, 0.92, 0.93, 1.2, 0.24, 0.24, 0.16, 0.32, 0.37, 0.41, 0.46,
0.52, 0.55, 0.68, 0.83, 0.97, 1.15, 1.29, 1.47, 1.59, 1.26)), row.names = c(NA,
-69L), class = c("tbl_df", "tbl", "data.frame"))
Upvotes: 4