Reputation: 1443
I'm trying to understand the difference between feasts::CCF
vs. base::ccf
and why they produce different results in the reprex (do the NAs have something to do with it?)
## Data
df <- structure(list(date = structure(c(1590919200, 1590922800, 1590926400,
1590930000, 1590933600, 1590937200, 1590940800, 1590944400, 1590948000,
1590951600, 1590955200, 1590958800, 1590962400, 1590966000, 1590969600,
1590973200, 1590976800, 1590980400, 1590984000, 1590987600, 1590991200,
1590994800, 1590998400, 1591002000), tzone = "UTC", class = c("POSIXct",
"POSIXt")), x = c(12.61, 14.2, 13.37, 16.68, 13.35, 11.42, 16.51,
11.78, 12.18, 13.67, 14.12, 13.2, 11.24, 10.76, 12.93, 16.48,
20.65, 14.55, NA, NA, NA, NA, NA, NA), y = c(459.07, 496.83,
511.17, 510.99, 511.22, 511.16, 511.22, 511.08, 511.14, 511.24,
511.21, 511.03, 511.13, 511.23, 511.1, 511.11, 511.34, 510.98,
511.18, 509.62, 511.09, 510.89, 505.53, 497.52)), class = "data.frame", row.names = c(NA,
-24L), spec = structure(list(cols = list(date = structure(list(
format = ""), class = c("collector_datetime", "collector"
)), cp.sum = structure(list(), class = c("collector_double",
"collector")), vru_gc_h2s_ppmv = structure(list(), class = c("collector_double",
"collector"))), default = structure(list(), class = c("collector_guess",
"collector")), skip = 1L), class = "col_spec"))
## Base R
ccf(df$x, df$y, lag.max = 12, plot = TRUE, na.action = na.pass)
## Feasts
library(tsibble)
library(feasts)
df %>%
as_tsibble(index = date) %>%
fill_gaps() %>%
CCF(x, y, lag_max = 12) %>%
autoplot()
Upvotes: 0
Views: 501
Reputation: 33812
Yes, the results are due to how NA is handled. ccf
gives the same result as CCF
if na.omit
is used instead of na.pass
.
EDIT
However - if you look at the source code for CCF()
, using e.g. View(CCF)
in RStudio, you'll see that it uses base ccf
:
compute_ccf <- function(.data, value1, value2, ...) {
value1 <- enexpr(value1)
value2 <- enexpr(value2)
ccf <- ccf(x = eval_tidy(value1, data = .data), y = eval_tidy(value2,
data = .data), plot = FALSE, ...)
lag <- as.numeric(ccf$lag)
tibble(lag = lag, ccf = as.numeric(ccf$acf))
}
So you may be able to save CCF()
as a new function (with a new name) and edit it to add na.action = na.pass
to the ccf
call.
ccf_napass <- ccf(df$x, df$y, lag.max = 12, na.action = na.pass, plot = FALSE)
ccf_naomit <- ccf(df$x, df$y, lag.max = 12, na.action = na.omit, plot = FALSE)
CCF_feasts <- df %>%
tsibble::as_tsibble(index = date) %>%
tsibble::fill_gaps() %>%
feasts::CCF(x, y, lag_max = 12) %>%
dplyr::pull(ccf)
data.frame(ccf_napass = ccf_napass$acf, ccf_naomit = ccf_naomit$acf, CCF_feasts)
ccf_napass ccf_naomit CCF_feasts
1 -0.001339325 -0.010819818 -0.010819818
2 0.031219857 0.005546548 0.005546548
3 0.041227368 -0.005382620 -0.005382620
4 -0.010115336 -0.019916634 -0.019916634
5 -0.091096987 -0.022542801 -0.022542801
6 -0.169062715 -0.017967752 -0.017967752
7 -0.069318178 -0.023075644 -0.023075644
8 -0.007847907 -0.043090132 -0.043090132
9 -0.008233668 -0.064749235 -0.064749235
10 -0.018144372 -0.070301161 -0.070301161
11 -0.002914065 -0.054108236 -0.054108236
12 0.035133485 0.028140497 0.028140497
13 0.133520812 0.118747777 0.118747777
14 -0.011428760 -0.010055235 -0.010055235
15 -0.021989205 -0.019475560 -0.019475560
16 -0.289253595 -0.257125853 -0.257125853
17 0.122887924 0.109172907 0.109172907
18 0.186626990 0.165904850 0.165904850
19 -0.217882808 -0.193637291 -0.193637291
20 0.280088738 0.249009757 0.249009757
21 0.204005890 0.181525595 0.181525595
22 0.036459851 0.032663381 0.032663381
23 0.017956562 0.016224802 0.016224802
24 0.179025309 0.159451149 0.159451149
25 0.418716273 0.372680241 0.372680241
Upvotes: 3