mapping functions over a list of data frames

Question

I am trying to apply a series of functions to my data, however I am going wrong a little bit.

The functions can create more than one column so I use bind_rows to add them to the original data.

What I am trying to do is to take the two functions in functions and map them over a list to create new columns in each list, I would like to use mutate or summarise.

library(tsfeatures)
library(dplyr)
library(purrr)

functions <- c("stl_features", "max_kl_shift")

Data %>% 
  map(., ~ map(., ~ data.frame(
    bind_cols(
      tsfeatures(.x["Value"], functions)
    )
  )
  )
  )

Error:

Error in approx(idx, x[idx], tt, rule = 2) : need at least two non-NA values to interpolate In addition: Warning messages: 1: In min(x) : no non-missing arguments to min; returning Inf 2: In max(x) : no non-missing arguments to max; returning -Inf

Data:

Data <- list(structure(list(time = structure(c(17045, 17046, 17050, 17051, 
17052, 17053, 17056, 17057, 17058, 17059, 17060, 17063, 17064, 
17065, 17066, 17067, 17070, 17071, 17072, 17073), class = "Date"), 
    ID = c("CAT1", "CAT1", "CAT1", "CAT1", "CAT1", "CAT1", "CAT1", 
    "CAT1", "CAT1", "CAT1", "CAT1", "CAT1", "CAT1", "CAT1", "CAT1", 
    "CAT1", "CAT1", "CAT1", "CAT1", "CAT1"), Value = c(0, 0.00348603358425681, 
    0.011173612052706, 0.000346065780582494, -0.00644578606355972, 
    -0.0201981554179086, 0.0123213639426545, -0.0121323473477323, 
    0.00368569810400099, 0.0121575628815795, -0.00373173650186931, 
    -0.00413587683295258, 0.00745717762898512, 0.00623533292069589, 
    0.0141584233987713, -0.000393793258897213, -0.016126574676531, 
    0.0113664093074735, -0.00185184350325229, -0.00838065921587761
    ), out = c(0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 
    0, 1, 1, 0)), row.names = c(NA, -20L), class = c("tbl_df", 
"tbl", "data.frame")), structure(list(time = structure(c(17056, 
17057, 17058, 17059, 17060, 17063, 17064, 17065, 17066, 17067, 
17070, 17071, 17072, 17073, 17074, 17077, 17078, 17079, 17080, 
17081), class = "Date"), ID = c("CAT1", "CAT1", "CAT1", "CAT1", 
"CAT1", "CAT1", "CAT1", "CAT1", "CAT1", "CAT1", "CAT1", "CAT1", 
"CAT1", "CAT1", "CAT1", "CAT1", "CAT1", "CAT1", "CAT1", "CAT1"
), Value = c(0.0123213639426545, -0.0121323473477323, 0.00368569810400099, 
0.0121575628815795, -0.00373173650186931, -0.00413587683295258, 
0.00745717762898512, 0.00623533292069589, 0.0141584233987713, 
-0.000393793258897213, -0.016126574676531, 0.0113664093074735, 
-0.00185184350325229, -0.00838065921587761, 0.00294185619615428, 
-0.0060852193311054, 0.00500931320547093, 0.0000514895101431101, 
0.000502291156859291, -0.00229123398600595), out = c(1, 0, 1, 
1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 0, 0, 0)), row.names = c(NA, 
-20L), class = c("tbl_df", "tbl", "data.frame")), structure(list(
    time = structure(c(17064, 17065, 17066, 17067, 17070, 17071, 
    17072, 17073, 17074, 17077, 17078, 17079, 17080, 17081, 17084, 
    17085, 17086, 17087, 17088, 17091), class = "Date"), ID = c("CAT1", 
    "CAT1", "CAT1", "CAT1", "CAT1", "CAT1", "CAT1", "CAT1", "CAT1", 
    "CAT1", "CAT1", "CAT1", "CAT1", "CAT1", "CAT1", "CAT1", "CAT1", 
    "CAT1", "CAT1", "CAT1"), Value = c(0.00745717762898512, 0.00623533292069589, 
    0.0141584233987713, -0.000393793258897213, -0.016126574676531, 
    0.0113664093074735, -0.00185184350325229, -0.00838065921587761, 
    0.00294185619615428, -0.0060852193311054, 0.00500931320547093, 
    0.0000514895101431101, 0.000502291156859291, -0.00229123398600595, 
    0.0140114372217135, -0.00365167187405735, 0.00392047706151, 
    -0.0101127189155992, 0.000436945988930848, 0.00183678592569736
    ), out = c(1, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 
    1, 0, 0, 1)), row.names = c(NA, -20L), class = c("tbl_df", 
"tbl", "data.frame")), structure(list(time = structure(c(17072, 
17073, 17074, 17077, 17078, 17079, 17080, 17081, 17084, 17085, 
17086, 17087, 17088, 17091, 17092, 17093, 17094, 17095, 17098, 
17099), class = "Date"), ID = c("CAT1", "CAT1", "CAT1", "CAT1", 
"CAT1", "CAT1", "CAT1", "CAT1", "CAT1", "CAT1", "CAT1", "CAT1", 
"CAT1", "CAT1", "CAT1", "CAT1", "CAT1", "CAT1", "CAT1", "CAT1"
), Value = c(-0.00185184350325229, -0.00838065921587761, 0.00294185619615428, 
-0.0060852193311054, 0.00500931320547093, 0.0000514895101431101, 
0.000502291156859291, -0.00229123398600595, 0.0140114372217135, 
-0.00365167187405735, 0.00392047706151, -0.0101127189155992, 
0.000436945988930848, 0.00183678592569736, 0.0196163746454174, 
0.00784647778278202, -0.00565193886462889, 0.00301143592272179, 
0.0171885235697395, -0.00669036428079295), out = c(1, 0, 1, 0, 
1, 0, 0, 0, 1, 0, 1, 0, 0, 1, 1, 1, 0, 1, 1, 0)), row.names = c(NA, 
-20L), class = c("tbl_df", "tbl", "data.frame")), structure(list(
    time = structure(c(17080, 17081, 17084, 17085, 17086, 17087, 
    17088, 17091, 17092, 17093, 17094, 17095, 17098, 17099, 17100, 
    17101, 17102, 17105, 17106, 17107), class = "Date"), ID = c("CAT1", 
    "CAT1", "CAT1", "CAT1", "CAT1", "CAT1", "CAT1", "CAT1", "CAT1", 
    "CAT1", "CAT1", "CAT1", "CAT1", "CAT1", "CAT1", "CAT1", "CAT1", 
    "CAT1", "CAT1", "CAT1"), Value = c(0.000502291156859291, 
    -0.00229123398600595, 0.0140114372217135, -0.00365167187405735, 
    0.00392047706151, -0.0101127189155992, 0.000436945988930848, 
    0.00183678592569736, 0.0196163746454174, 0.00784647778278202, 
    -0.00565193886462889, 0.00301143592272179, 0.0171885235697395, 
    -0.00669036428079295, -0.0106478836418512, -0.00465545067066953, 
    0.0000251700516804565, -0.0136163258207899, -0.00118539912060411, 
    -0.0190272881732103), out = c(0, 0, 1, 0, 1, 0, 0, 1, 1, 
    1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0)), row.names = c(NA, -20L
), class = c("tbl_df", "tbl", "data.frame")), structure(list(
    time = structure(c(17088, 17091, 17092, 17093, 17094, 17095, 
    17098, 17099, 17100, 17101, 17102, 17105, 17106, 17107, 17108, 
    17109, 17112, 17113, 17114, 17115), class = "Date"), ID = c("CAT1", 
    "CAT1", "CAT1", "CAT1", "CAT1", "CAT1", "CAT1", "CAT1", "CAT1", 
    "CAT1", "CAT1", "CAT1", "CAT1", "CAT1", "CAT1", "CAT1", "CAT1", 
    "CAT1", "CAT1", "CAT1"), Value = c(0.000436945988930848, 
    0.00183678592569736, 0.0196163746454174, 0.00784647778278202, 
    -0.00565193886462889, 0.00301143592272179, 0.0171885235697395, 
    -0.00669036428079295, -0.0106478836418512, -0.00465545067066953, 
    0.0000251700516804565, -0.0136163258207899, -0.00118539912060411, 
    -0.0190272881732103, -0.00854690633203736, -0.000144312649125955, 
    0.0269021803390415, 0.0102105886057713, -0.00657804700031572, 
    -0.0289694516279417), out = c(0, 1, 1, 1, 0, 1, 1, 0, 0, 
    0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0)), row.names = c(NA, -20L
), class = c("tbl_df", "tbl", "data.frame")), structure(list(
    time = structure(c(17098, 17099, 17100, 17101, 17102, 17105, 
    17106, 17107, 17108, 17109, 17112, 17113, 17114, 17115, 17116, 
    17119, 17120, 17121, 17122, 17123), class = "Date"), ID = c("CAT1", 
    "CAT1", "CAT1", "CAT1", "CAT1", "CAT1", "CAT1", "CAT1", "CAT1", 
    "CAT1", "CAT1", "CAT1", "CAT1", "CAT1", "CAT1", "CAT1", "CAT1", 
    "CAT1", "CAT1", "CAT1"), Value = c(0.0171885235697395, -0.00669036428079295, 
    -0.0106478836418512, -0.00465545067066953, 0.0000251700516804565, 
    -0.0136163258207899, -0.00118539912060411, -0.0190272881732103, 
    -0.00854690633203736, -0.000144312649125955, 0.0269021803390415, 
    0.0102105886057713, -0.00657804700031572, -0.0289694516279417, 
    -0.0111990899370517, -0.0237924756958046, 0.0304450229355975, 
    0.00789725649510542, 0.0088295314155904, -0.0138609782778413
    ), out = c(1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 
    1, 1, 1, 0)), row.names = c(NA, -20L), class = c("tbl_df", 
"tbl", "data.frame")), structure(list(time = structure(c(17106, 
17107, 17108, 17109, 17112, 17113, 17114, 17115, 17116, 17119, 
17120, 17121, 17122, 17123, 17126, 17127, 17128, 17130, 17133, 
17134), class = "Date"), ID = c("CAT1", "CAT1", "CAT1", "CAT1", 
"CAT1", "CAT1", "CAT1", "CAT1", "CAT1", "CAT1", "CAT1", "CAT1", 
"CAT1", "CAT1", "CAT1", "CAT1", "CAT1", "CAT1", "CAT1", "CAT1"
), Value = c(-0.00118539912060411, -0.0190272881732103, -0.00854690633203736, 
-0.000144312649125955, 0.0269021803390415, 0.0102105886057713, 
-0.00657804700031572, -0.0289694516279417, -0.0111990899370517, 
-0.0237924756958046, 0.0304450229355975, 0.00789725649510542, 
0.0088295314155904, -0.0138609782778413, 0.0113866913646978, 
-0.0012090379426567, -0.00947587412040363, 0.00090671757719174, 
0.00861253683999563, 0.00338440726054889), out = c(1, 0, 0, 1, 
1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 1, 0, 0, 0, 1, 0)), row.names = c(NA, 
-20L), class = c("tbl_df", "tbl", "data.frame")), structure(list(
    time = structure(c(17114, 17115, 17116, 17119, 17120, 17121, 
    17122, 17123, 17126, 17127, 17128, 17130, 17133, 17134, 17135, 
    17136, 17137, 17140, 17141, 17142), class = "Date"), ID = c("CAT1", 
    "CAT1", "CAT1", "CAT1", "CAT1", "CAT1", "CAT1", "CAT1", "CAT1", 
    "CAT1", "CAT1", "CAT1", "CAT1", "CAT1", "CAT1", "CAT1", "CAT1", 
    "CAT1", "CAT1", "CAT1"), Value = c(-0.00657804700031572, 
    -0.0289694516279417, -0.0111990899370517, -0.0237924756958046, 
    0.0304450229355975, 0.00789725649510542, 0.0088295314155904, 
    -0.0138609782778413, 0.0113866913646978, -0.0012090379426567, 
    -0.00947587412040363, 0.00090671757719174, 0.00861253683999563, 
    0.00338440726054889, -0.016605324777718, -0.0133502127773003, 
    0.00344958960669994, 0.0160160159893405, -0.00447205963195563, 
    0.0159133949476373), out = c(1, 0, 0, 0, 1, 1, 1, 0, 1, 0, 
    0, 0, 1, 0, 0, 0, 1, 1, 0, 1)), row.names = c(NA, -20L), class = c("tbl_df", 
"tbl", "data.frame")), structure(list(time = structure(c(17122, 
17123, 17126, 17127, 17128, 17130, 17133, 17134, 17135, 17136, 
17137, 17140, 17141, 17142, 17143, 17144, 17147, 17148, 17149, 
17150), class = "Date"), ID = c("CAT1", "CAT1", "CAT1", "CAT1", 
"CAT1", "CAT1", "CAT1", "CAT1", "CAT1", "CAT1", "CAT1", "CAT1", 
"CAT1", "CAT1", "CAT1", "CAT1", "CAT1", "CAT1", "CAT1", "CAT1"
), Value = c(0.0088295314155904, -0.0138609782778413, 0.0113866913646978, 
-0.0012090379426567, -0.00947587412040363, 0.00090671757719174, 
0.00861253683999563, 0.00338440726054889, -0.016605324777718, 
-0.0133502127773003, 0.00344958960669994, 0.0160160159893405, 
-0.00447205963195563, 0.0159133949476373, 0.00678170228664343, 
0.0165760738798502, -0.0000252860172512692, 0.00865350998635406, 
0.00121847887105075, 0.000978545163097477), out = c(1, 0, 1, 
0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1)), row.names = c(NA, 
-20L), class = c("tbl_df", "tbl", "data.frame")))

www · Accepted Answer

The data frames in your example list Data only contain two rows, which is too small for the tsfeatures function. Another issue is when you do .x["Value"] the output is a data frame, but the documentation of the tsfeatures function said the first argument should be a a list of univariate time series object or a vector. Therefore, I assume the code you should use is .x[["Value"]], which results in a vector.

I tried you example as follows by combining all data frame in Data to a single data frame.

tsfeatures(bind_rows(Data)[["Value"]], functions)

which leads to the following output.

# # A tibble: 1 x 10
#     nperiods seasonal_period trend      spike linearity curvature  e_acf1 e_acf10 max_kl_shift time_kl_shift
#                                                           
#   1        0               1 0.965 0.00000631      2.99     -2.64 -0.0723   0.381           NA            NA

Therefore, assuming your data frames in the Data list have more than two rows. We can apply the above code to each of your data frames. We can wrap the above code using a map function as follows.

Data %>% map(., ~tsfeatures(.x[["Value"]], functions))

I think this probably will work.

mapping functions over a list of data frames

Answers (1)

Related Questions