Reputation: 7107
I am trying to apply a series of functions to my data, however I am going wrong a little bit.
The functions can create more than one column so I use bind_rows
to add them to the original data.
What I am trying to do is to take the two functions in functions
and map
them over a list to create new columns in each list, I would like to use mutate
or summarise
.
library(tsfeatures)
library(dplyr)
library(purrr)
functions <- c("stl_features", "max_kl_shift")
Data %>%
map(., ~ map(., ~ data.frame(
bind_cols(
tsfeatures(.x["Value"], functions)
)
)
)
)
Error:
Error in approx(idx, x[idx], tt, rule = 2) : need at least two non-NA values to interpolate In addition: Warning messages: 1: In min(x) : no non-missing arguments to min; returning Inf 2: In max(x) : no non-missing arguments to max; returning -Inf
Data:
Data <- list(structure(list(time = structure(c(17045, 17046, 17050, 17051,
17052, 17053, 17056, 17057, 17058, 17059, 17060, 17063, 17064,
17065, 17066, 17067, 17070, 17071, 17072, 17073), class = "Date"),
ID = c("CAT1", "CAT1", "CAT1", "CAT1", "CAT1", "CAT1", "CAT1",
"CAT1", "CAT1", "CAT1", "CAT1", "CAT1", "CAT1", "CAT1", "CAT1",
"CAT1", "CAT1", "CAT1", "CAT1", "CAT1"), Value = c(0, 0.00348603358425681,
0.011173612052706, 0.000346065780582494, -0.00644578606355972,
-0.0201981554179086, 0.0123213639426545, -0.0121323473477323,
0.00368569810400099, 0.0121575628815795, -0.00373173650186931,
-0.00413587683295258, 0.00745717762898512, 0.00623533292069589,
0.0141584233987713, -0.000393793258897213, -0.016126574676531,
0.0113664093074735, -0.00185184350325229, -0.00838065921587761
), out = c(0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1,
0, 1, 1, 0)), row.names = c(NA, -20L), class = c("tbl_df",
"tbl", "data.frame")), structure(list(time = structure(c(17056,
17057, 17058, 17059, 17060, 17063, 17064, 17065, 17066, 17067,
17070, 17071, 17072, 17073, 17074, 17077, 17078, 17079, 17080,
17081), class = "Date"), ID = c("CAT1", "CAT1", "CAT1", "CAT1",
"CAT1", "CAT1", "CAT1", "CAT1", "CAT1", "CAT1", "CAT1", "CAT1",
"CAT1", "CAT1", "CAT1", "CAT1", "CAT1", "CAT1", "CAT1", "CAT1"
), Value = c(0.0123213639426545, -0.0121323473477323, 0.00368569810400099,
0.0121575628815795, -0.00373173650186931, -0.00413587683295258,
0.00745717762898512, 0.00623533292069589, 0.0141584233987713,
-0.000393793258897213, -0.016126574676531, 0.0113664093074735,
-0.00185184350325229, -0.00838065921587761, 0.00294185619615428,
-0.0060852193311054, 0.00500931320547093, 0.0000514895101431101,
0.000502291156859291, -0.00229123398600595), out = c(1, 0, 1,
1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 0, 0, 0)), row.names = c(NA,
-20L), class = c("tbl_df", "tbl", "data.frame")), structure(list(
time = structure(c(17064, 17065, 17066, 17067, 17070, 17071,
17072, 17073, 17074, 17077, 17078, 17079, 17080, 17081, 17084,
17085, 17086, 17087, 17088, 17091), class = "Date"), ID = c("CAT1",
"CAT1", "CAT1", "CAT1", "CAT1", "CAT1", "CAT1", "CAT1", "CAT1",
"CAT1", "CAT1", "CAT1", "CAT1", "CAT1", "CAT1", "CAT1", "CAT1",
"CAT1", "CAT1", "CAT1"), Value = c(0.00745717762898512, 0.00623533292069589,
0.0141584233987713, -0.000393793258897213, -0.016126574676531,
0.0113664093074735, -0.00185184350325229, -0.00838065921587761,
0.00294185619615428, -0.0060852193311054, 0.00500931320547093,
0.0000514895101431101, 0.000502291156859291, -0.00229123398600595,
0.0140114372217135, -0.00365167187405735, 0.00392047706151,
-0.0101127189155992, 0.000436945988930848, 0.00183678592569736
), out = c(1, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0,
1, 0, 0, 1)), row.names = c(NA, -20L), class = c("tbl_df",
"tbl", "data.frame")), structure(list(time = structure(c(17072,
17073, 17074, 17077, 17078, 17079, 17080, 17081, 17084, 17085,
17086, 17087, 17088, 17091, 17092, 17093, 17094, 17095, 17098,
17099), class = "Date"), ID = c("CAT1", "CAT1", "CAT1", "CAT1",
"CAT1", "CAT1", "CAT1", "CAT1", "CAT1", "CAT1", "CAT1", "CAT1",
"CAT1", "CAT1", "CAT1", "CAT1", "CAT1", "CAT1", "CAT1", "CAT1"
), Value = c(-0.00185184350325229, -0.00838065921587761, 0.00294185619615428,
-0.0060852193311054, 0.00500931320547093, 0.0000514895101431101,
0.000502291156859291, -0.00229123398600595, 0.0140114372217135,
-0.00365167187405735, 0.00392047706151, -0.0101127189155992,
0.000436945988930848, 0.00183678592569736, 0.0196163746454174,
0.00784647778278202, -0.00565193886462889, 0.00301143592272179,
0.0171885235697395, -0.00669036428079295), out = c(1, 0, 1, 0,
1, 0, 0, 0, 1, 0, 1, 0, 0, 1, 1, 1, 0, 1, 1, 0)), row.names = c(NA,
-20L), class = c("tbl_df", "tbl", "data.frame")), structure(list(
time = structure(c(17080, 17081, 17084, 17085, 17086, 17087,
17088, 17091, 17092, 17093, 17094, 17095, 17098, 17099, 17100,
17101, 17102, 17105, 17106, 17107), class = "Date"), ID = c("CAT1",
"CAT1", "CAT1", "CAT1", "CAT1", "CAT1", "CAT1", "CAT1", "CAT1",
"CAT1", "CAT1", "CAT1", "CAT1", "CAT1", "CAT1", "CAT1", "CAT1",
"CAT1", "CAT1", "CAT1"), Value = c(0.000502291156859291,
-0.00229123398600595, 0.0140114372217135, -0.00365167187405735,
0.00392047706151, -0.0101127189155992, 0.000436945988930848,
0.00183678592569736, 0.0196163746454174, 0.00784647778278202,
-0.00565193886462889, 0.00301143592272179, 0.0171885235697395,
-0.00669036428079295, -0.0106478836418512, -0.00465545067066953,
0.0000251700516804565, -0.0136163258207899, -0.00118539912060411,
-0.0190272881732103), out = c(0, 0, 1, 0, 1, 0, 0, 1, 1,
1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0)), row.names = c(NA, -20L
), class = c("tbl_df", "tbl", "data.frame")), structure(list(
time = structure(c(17088, 17091, 17092, 17093, 17094, 17095,
17098, 17099, 17100, 17101, 17102, 17105, 17106, 17107, 17108,
17109, 17112, 17113, 17114, 17115), class = "Date"), ID = c("CAT1",
"CAT1", "CAT1", "CAT1", "CAT1", "CAT1", "CAT1", "CAT1", "CAT1",
"CAT1", "CAT1", "CAT1", "CAT1", "CAT1", "CAT1", "CAT1", "CAT1",
"CAT1", "CAT1", "CAT1"), Value = c(0.000436945988930848,
0.00183678592569736, 0.0196163746454174, 0.00784647778278202,
-0.00565193886462889, 0.00301143592272179, 0.0171885235697395,
-0.00669036428079295, -0.0106478836418512, -0.00465545067066953,
0.0000251700516804565, -0.0136163258207899, -0.00118539912060411,
-0.0190272881732103, -0.00854690633203736, -0.000144312649125955,
0.0269021803390415, 0.0102105886057713, -0.00657804700031572,
-0.0289694516279417), out = c(0, 1, 1, 1, 0, 1, 1, 0, 0,
0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0)), row.names = c(NA, -20L
), class = c("tbl_df", "tbl", "data.frame")), structure(list(
time = structure(c(17098, 17099, 17100, 17101, 17102, 17105,
17106, 17107, 17108, 17109, 17112, 17113, 17114, 17115, 17116,
17119, 17120, 17121, 17122, 17123), class = "Date"), ID = c("CAT1",
"CAT1", "CAT1", "CAT1", "CAT1", "CAT1", "CAT1", "CAT1", "CAT1",
"CAT1", "CAT1", "CAT1", "CAT1", "CAT1", "CAT1", "CAT1", "CAT1",
"CAT1", "CAT1", "CAT1"), Value = c(0.0171885235697395, -0.00669036428079295,
-0.0106478836418512, -0.00465545067066953, 0.0000251700516804565,
-0.0136163258207899, -0.00118539912060411, -0.0190272881732103,
-0.00854690633203736, -0.000144312649125955, 0.0269021803390415,
0.0102105886057713, -0.00657804700031572, -0.0289694516279417,
-0.0111990899370517, -0.0237924756958046, 0.0304450229355975,
0.00789725649510542, 0.0088295314155904, -0.0138609782778413
), out = c(1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0,
1, 1, 1, 0)), row.names = c(NA, -20L), class = c("tbl_df",
"tbl", "data.frame")), structure(list(time = structure(c(17106,
17107, 17108, 17109, 17112, 17113, 17114, 17115, 17116, 17119,
17120, 17121, 17122, 17123, 17126, 17127, 17128, 17130, 17133,
17134), class = "Date"), ID = c("CAT1", "CAT1", "CAT1", "CAT1",
"CAT1", "CAT1", "CAT1", "CAT1", "CAT1", "CAT1", "CAT1", "CAT1",
"CAT1", "CAT1", "CAT1", "CAT1", "CAT1", "CAT1", "CAT1", "CAT1"
), Value = c(-0.00118539912060411, -0.0190272881732103, -0.00854690633203736,
-0.000144312649125955, 0.0269021803390415, 0.0102105886057713,
-0.00657804700031572, -0.0289694516279417, -0.0111990899370517,
-0.0237924756958046, 0.0304450229355975, 0.00789725649510542,
0.0088295314155904, -0.0138609782778413, 0.0113866913646978,
-0.0012090379426567, -0.00947587412040363, 0.00090671757719174,
0.00861253683999563, 0.00338440726054889), out = c(1, 0, 0, 1,
1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 1, 0, 0, 0, 1, 0)), row.names = c(NA,
-20L), class = c("tbl_df", "tbl", "data.frame")), structure(list(
time = structure(c(17114, 17115, 17116, 17119, 17120, 17121,
17122, 17123, 17126, 17127, 17128, 17130, 17133, 17134, 17135,
17136, 17137, 17140, 17141, 17142), class = "Date"), ID = c("CAT1",
"CAT1", "CAT1", "CAT1", "CAT1", "CAT1", "CAT1", "CAT1", "CAT1",
"CAT1", "CAT1", "CAT1", "CAT1", "CAT1", "CAT1", "CAT1", "CAT1",
"CAT1", "CAT1", "CAT1"), Value = c(-0.00657804700031572,
-0.0289694516279417, -0.0111990899370517, -0.0237924756958046,
0.0304450229355975, 0.00789725649510542, 0.0088295314155904,
-0.0138609782778413, 0.0113866913646978, -0.0012090379426567,
-0.00947587412040363, 0.00090671757719174, 0.00861253683999563,
0.00338440726054889, -0.016605324777718, -0.0133502127773003,
0.00344958960669994, 0.0160160159893405, -0.00447205963195563,
0.0159133949476373), out = c(1, 0, 0, 0, 1, 1, 1, 0, 1, 0,
0, 0, 1, 0, 0, 0, 1, 1, 0, 1)), row.names = c(NA, -20L), class = c("tbl_df",
"tbl", "data.frame")), structure(list(time = structure(c(17122,
17123, 17126, 17127, 17128, 17130, 17133, 17134, 17135, 17136,
17137, 17140, 17141, 17142, 17143, 17144, 17147, 17148, 17149,
17150), class = "Date"), ID = c("CAT1", "CAT1", "CAT1", "CAT1",
"CAT1", "CAT1", "CAT1", "CAT1", "CAT1", "CAT1", "CAT1", "CAT1",
"CAT1", "CAT1", "CAT1", "CAT1", "CAT1", "CAT1", "CAT1", "CAT1"
), Value = c(0.0088295314155904, -0.0138609782778413, 0.0113866913646978,
-0.0012090379426567, -0.00947587412040363, 0.00090671757719174,
0.00861253683999563, 0.00338440726054889, -0.016605324777718,
-0.0133502127773003, 0.00344958960669994, 0.0160160159893405,
-0.00447205963195563, 0.0159133949476373, 0.00678170228664343,
0.0165760738798502, -0.0000252860172512692, 0.00865350998635406,
0.00121847887105075, 0.000978545163097477), out = c(1, 0, 1,
0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1)), row.names = c(NA,
-20L), class = c("tbl_df", "tbl", "data.frame")))
Upvotes: 1
Views: 81
Reputation: 39154
The data frames in your example list Data
only contain two rows, which is too small for the tsfeatures
function. Another issue is when you do .x["Value"]
the output is a data frame, but the documentation of the tsfeatures
function said the first argument should be a a list of univariate time series object or a vector. Therefore, I assume the code you should use is .x[["Value"]]
, which results in a vector.
I tried you example as follows by combining all data frame in Data
to a single data frame.
tsfeatures(bind_rows(Data)[["Value"]], functions)
which leads to the following output.
# # A tibble: 1 x 10
# nperiods seasonal_period trend spike linearity curvature e_acf1 e_acf10 max_kl_shift time_kl_shift
# <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
# 1 0 1 0.965 0.00000631 2.99 -2.64 -0.0723 0.381 NA NA
Therefore, assuming your data frames in the Data
list have more than two rows. We can apply the above code to each of your data frames. We can wrap the above code using a map
function as follows.
Data %>% map(., ~tsfeatures(.x[["Value"]], functions))
I think this probably will work.
Upvotes: 1