Ess
Ess

Reputation: 97

Applying a function through mutate after group_by

I'm trying to apply a few functions to a tibble with data grouped by Symbol. All runs smoothly except the volatility function. Is there a way to get this to run, or do I have to split the tibble into multiple tibbles and then run the code on each one of those? I have about 500 symbols, only listing 2 here.

Here's my code:

library(tidyquant)
library(tidyverse)

tikrlist <- c('^NSEI', '^NSEBANK')
data <- tq_get(tikrlist, from = '2001-01-01', get = 'stock.prices')

data2 <- data %>%
  group_by(symbol) %>%
  select(symbol, date, open, high, low, adjusted) %>%
  na.omit()

# Make dataframe with all the required columns
ndf <- data2 %>%
mutate(dow = WEEKDAY(date, label = T), # Day Of the Week
     month = month(date, abbr = T), # Month of the year
     year = year(date), #Year
     t1ret = log(adjusted / lag(adjusted, n = 1)) * 100, # Daily Log Returns
     vol10 = volatility(data2[c('open', 'high', 'low', 'adjusted')], n = 10,
                        calc = 'yang.zhang'))

Here's the error:

Error in `mutate()`:
! Problem while computing `vol10 = volatility(...)`.
✖ `vol10` must be size 3340 or 1, not 6950.
ℹ The error occurred in group 1: symbol = "^NSEBANK".

dput:

structure(list(symbol = c("^NSEI", "^NSEI", "^NSEI", "^NSEI", 
"^NSEI", "^NSEI", "^NSEI", "^NSEI", "^NSEI", "^NSEI", "^NSEI", 
"^NSEI", "^NSEI", "^NSEI", "^NSEI", "^NSEI", "^NSEI", "^NSEI", 
"^NSEI", "^NSEI", "^NSEI", "^NSEI", "^NSEI", "^NSEI", "^NSEI", 
"^NSEI", "^NSEI", "^NSEI", "^NSEI", "^NSEI", "^NSEI", "^NSEI", 
"^NSEBANK", "^NSEBANK", "^NSEBANK", "^NSEBANK", "^NSEBANK", "^NSEBANK", 
"^NSEBANK", "^NSEBANK", "^NSEBANK", "^NSEBANK", "^NSEBANK", "^NSEBANK", 
"^NSEBANK", "^NSEBANK", "^NSEBANK", "^NSEBANK", "^NSEBANK", "^NSEBANK", 
"^NSEBANK", "^NSEBANK", "^NSEBANK", "^NSEBANK", "^NSEBANK", "^NSEBANK", 
"^NSEBANK", "^NSEBANK", "^NSEBANK", "^NSEBANK", "^NSEBANK", "^NSEBANK", 
"^NSEBANK", "^NSEBANK"), date = structure(c(19114, 19116, 19117, 
19118, 19121, 19122, 19123, 19124, 19125, 19128, 19129, 19130, 
19131, 19132, 19135, 19136, 19137, 19138, 19139, 19142, 19143, 
19144, 19145, 19146, 19149, 19150, 19151, 19152, 19153, 19156, 
19157, 19158, 19114, 19116, 19117, 19118, 19121, 19122, 19123, 
19124, 19125, 19128, 19129, 19130, 19131, 19132, 19135, 19136, 
19137, 19138, 19139, 19142, 19143, 19144, 19145, 19146, 19149, 
19150, 19151, 19152, 19153, 19156, 19157, 19158), class = "Date"), 
    open = c(16924.449219, 17096.599609, 16854.75, 16415.550781, 
    16227.700195, 16248.900391, 16270.049805, 16021.099609, 15977, 
    15845.099609, 15912.599609, 16318.150391, 15917.400391, 16043.799805, 
    16290.950195, 16225.549805, 16196.349609, 16105, 16296.599609, 
    16527.900391, 16578.449219, 16594.400391, 16481.650391, 16761.650391, 
    16530.699219, 16469.599609, 16474.949219, 16263.849609, 16283.950195, 
    15877.549805, 15674.25, 15729.25, 35627.601563, 36266.148438, 
    35705.351563, 34569.101563, 34091.75, 34181.199219, 34686.300781, 
    34289.101563, 33925.949219, 33256, 33796.199219, 34448.199219, 
    33461, 33765.148438, 34321.398438, 34223.199219, 34491.800781, 
    34670.601563, 35326.601563, 35958.800781, 35615.300781, 35358.949219, 
    35470.949219, 35929.550781, 35159.148438, 35080.800781, 35165.648438, 
    34802.25, 34686.949219, 33728.648438, 33180.601563, 33317.800781
    ), high = c(17092.25, 17132.849609, 16945.699219, 16484.199219, 
    16403.699219, 16404.550781, 16318.75, 16041.950195, 16083.599609, 
    15977.950195, 16284.25, 16399.800781, 15984.75, 16283.049805, 
    16414.699219, 16262.799805, 16223.349609, 16204.450195, 16370.599609, 
    16695.5, 16690.75, 16649.199219, 16646.400391, 16793.849609, 
    16610.949219, 16487.25, 16514.300781, 16492.800781, 16324.700195, 
    15886.150391, 15858, 15783.650391, 36235.25, 36309.300781, 
    35934.851563, 34797.949219, 34554.351563, 34781.800781, 34793.199219, 
    34341.199219, 34002.199219, 33821.648438, 34366.050781, 34655.800781, 
    33633.949219, 34358.050781, 34819.949219, 34586.449219, 34722.601563, 
    35222.050781, 35694.851563, 36083.699219, 35881.949219, 35768.699219, 
    35676.199219, 35958.449219, 35424.5, 35154.300781, 35449.898438, 
    35149.101563, 34752.449219, 33774.050781, 33618.148438, 33554.550781
    ), low = c(16917.25, 16623.949219, 16651.849609, 16340.900391, 
    16142.099609, 16197.299805, 15992.599609, 15735.75, 15740.849609, 
    15739.650391, 15900.799805, 16211.200195, 15775.200195, 16003.849609, 
    16185.75, 16078.599609, 16006.950195, 15903.700195, 16221.950195, 
    16506.150391, 16521.900391, 16438.849609, 16443.050781, 16567.900391, 
    16444.550781, 16347.099609, 16293.349609, 16243.849609, 16172.599609, 
    15684, 15659.450195, 15678.900391, 35618.199219, 35127.851563, 
    35133.800781, 34353.199219, 33927.199219, 34176.800781, 34143.199219, 
    33297.648438, 33007.898438, 33001.75, 33680.601563, 34134.699219, 
    33180.601563, 33658.050781, 34117.351563, 34115.800781, 34285.800781, 
    34424.75, 35291.949219, 35748.449219, 35288.949219, 35285.449219, 
    35385, 35175.101563, 35072.300781, 34834.398438, 34831.75, 
    34659.101563, 34346.648438, 33210.351563, 33123.898438, 33249.550781
    ), close = c(17069.099609, 16677.599609, 16682.650391, 16411.25, 
    16301.849609, 16240.049805, 16167.099609, 15808, 15782.150391, 
    15842.299805, 16259.299805, 16240.299805, 15809.400391, 16266.150391, 
    16214.700195, 16125.150391, 16025.799805, 16170.150391, 16352.450195, 
    16661.400391, 16584.550781, 16522.75, 16628, 16584.300781, 
    16569.550781, 16416.349609, 16356.25, 16478.099609, 16201.799805, 
    15774.400391, 15732.099609, 15692.150391, 36163.75, 35264.550781, 
    35232.851563, 34591.199219, 34275.398438, 34482.648438, 34693.148438, 
    33532.148438, 33121.351563, 33597.601563, 34301.898438, 34163.699219, 
    33315.648438, 34276.398438, 34247.601563, 34290.148438, 34339.5, 
    35094.898438, 35613.300781, 35826.949219, 35487.398438, 35620.800781, 
    35613.648438, 35275.050781, 35310.199219, 34996, 34946.148438, 
    35085.449219, 34483.800781, 33405.851563, 33311.351563, 33339
    ), volume = c(278200, 310600, 265800, 300500, 288400, 283100, 
    284300, 314900, 369100, 217600, 295700, 290400, 313900, 252400, 
    293800, 249800, 243300, 314300, 274100, 251400, 651600, 249600, 
    236000, 245500, 233600, 233800, 243500, 205000, 189700, 225500, 
    225400, 0, 170200, 214600, 167300, 199200, 179900, 138000, 
    179500, 382100, 272700, 195100, 161500, 146800, 150400, 129400, 
    156000, 163700, 153600, 191500, 132600, 127000, 149300, 100800, 
    140700, 104800, 100800, 91600, 124600, 100900, 90000, 131900, 
    113200, 0), adjusted = c(17069.099609, 16677.599609, 16682.650391, 
    16411.25, 16301.849609, 16240.049805, 16167.099609, 15808, 
    15782.150391, 15842.299805, 16259.299805, 16240.299805, 15809.400391, 
    16266.150391, 16214.700195, 16125.150391, 16025.799805, 16170.150391, 
    16352.450195, 16661.400391, 16584.550781, 16522.75, 16628, 
    16584.300781, 16569.550781, 16416.349609, 16356.25, 16478.099609, 
    16201.799805, 15774.400391, 15732.099609, 15692.150391, 36163.75, 
    35264.550781, 35232.851563, 34591.199219, 34275.398438, 34482.648438, 
    34693.148438, 33532.148438, 33121.351563, 33597.601563, 34301.898438, 
    34163.699219, 33315.648438, 34276.398438, 34247.601563, 34290.148438, 
    34339.5, 35094.898438, 35613.300781, 35826.949219, 35487.398438, 
    35620.800781, 35613.648438, 35275.050781, 35310.199219, 34996, 
    34946.148438, 35085.449219, 34483.800781, 33405.851563, 33311.351563, 
    33339)), row.names = c(NA, -64L), class = c("tbl_df", "tbl", 
"data.frame"))

Upvotes: 2

Views: 88

Answers (1)

Jonathan
Jonathan

Reputation: 1088

You need to access the data for each group using cur_data(), not the entire data data2:

ndf <- data2 %>%
  mutate(dow = WEEKDAY(date, label = T),
         month = month(date, abbr = T),
         year = year(date),
         t1ret = log(adjusted / lag(adjusted, n = 1)) * 100,
         vol10 = volatility(cur_data()[c('open', 'high', 'low', 'adjusted')], n = 10,
                            calc = 'yang.zhang'))

One other thing I'd like to mention is that you should make sure that both lag and volatility are applied to the ordered data. lag has an additional argument order_by = ..., for volatility you can use the function-wrapper with_order

data %>% select(symbol, date, open, high, low, adjusted) %>% 
  drop_na() %>% 
  group_by(symbol) %>% 
  mutate(dow = WEEKDAY(date, label = T),
         month = month(date, abbr = T),
         year = year(date),
         t1ret = log(adjusted / lag(adjusted, n = 1, order_by = date)) * 100,
         vol10 = with_order(date, volatility, cur_data()[c('open', 'high', 'low', 'adjusted')], n = 10,
                            calc = 'yang.zhang'))

Upvotes: 4

Related Questions