Daniel James
Daniel James

Reputation: 1433

Make my Two Different R Functions to be Just One Function

I want to use MonteCarlo function in MonteCarlo package in R which has one requirement among others as supply just one single function into the MonteCarlo package.

To run a simulation study, the user has to nest both - the generation of a sample and the calculation of the desired statistics from this sample - in a single function. This function is passed to MonteCarlo(). No additional programming is required (Vignette: The MonteCarlo Package).

Contrary to this vital condition, I have two different functions that suit my algorithm. I have used the MonteCarlo function as provided by the correct answer in this question for a method.

I want to use a different method thus, I write the following functions (function1 and function2) for it to be passed to MonteCarlo function as demonstrated below:

Here is the algorithm of what I want to do with R:

  1. Simulate 10 time series data set from ARIMA model through arima.sim() function
  2. Split the series into overlapping sub-series of possible 2s, 3s, 4s, 5s, 6s, 7s, 8s, and 9s.
  3. For each size take a resample the blocks with replacement, for new series and obtain the best ARIMA model from the subseries from each block size through auto.arima() function.
  4. Obtain for each subseries of each block sizes RMSE.

The below R functions get that done.

library(MonteCarlo)
library(forecast)
library(Metrics)
############################################
function1 <- function(lb, ov, n) {

  starts <- unique(sort(c(seq(1, n, lb), seq(lb-ov+1, n, lb))))
  ends <- pmin(starts + lb - 1, n)

  # truncate starts and ends to the first num elements
  num <- match(n, ends)
  head(data.frame(starts, ends), num)
}
#############################################
# parameter grids
n <- 10 # length of time series
lb <- seq(n-2) + 1 # vector of block sizes
phi <- 0.6 # autoregressive parameter
reps <- 3 # monte carlo replications

# simulation function  
function2 <- function(n, lb, phi) {

  #### simulate ####
  ov <- ceiling(lb/2)
  vblocks <- Vectorize(function1, c("lb", "ov"), SIMPLIFY = FALSE)
  d <- vblocks(lb = lb, ov = ov, n = n)
  ts <- arima.sim(n, model = list(ar = phi, order = c(1, 0, 0)), sd = 1)

  #### devide ####
  blk <- lapply(d, function(x) with(x, Map(function(i, j) ts[i:j], starts, ends)))
  #### resample ####
  res <- sample(blk, replace = TRUE, 10)        # resamples the blocks
  res.unlist <- unlist(res, use.names = FALSE)   # unlist the bootstrap series
  #### train, forecast ####
  train <- head(res.unlist, round(length(res.unlist) - 10)) # train set
  test <- tail(res.unlist, length(res.unlist) - length(train)) # test set
  nfuture <- forecast(train, # forecast
                      model = auto.arima(train), 
                      lambda = 0, biasadj = TRUE, h = length(test))$mean    
  ### metric ####
  RMSE <- rmse(test, nfuture) # return RMSE
  return(
    list("RMSE" = RMSE)
  )
}

param_list = list("n" = n, "lb" = lb, "phi" = phi)

set.seed(123, kind = "L'Ecuyer-CMRG")
MC_result <- MonteCarlo(func = bootstrap4, 
                            nrep = reps,
                            ncpus = parallel::detectCores() - 1,
                            param_list = param_list,
                            export_also = list(
                              "packages" = c("forecast", "Metrics")
                            ),
                            raw = T)

I got this error as I run the above:

in snowfall::sfExport("func2", "func", "libloc_strings", "function1", : Unknown/unfound variable ends in export. (local=TRUE)

I want to integrate function1 into function2 in a way that function1 will not be a function in function2.

here is my trial

function2 <- function(n, lb, phi) {

  #### simulate ####
  ov <- ceiling(lb/2)
  function1 <- head(data.frame(unique(sort(c(seq(1, n, lb), seq(lb-ov+1, n, lb)))), pmin(unique(sort(c(seq(1, n, lb), seq(lb-ov+1, n, lb)))) + lb - 1, n)), match(n, pmin(unique(sort(c(seq(1, n, lb), seq(lb-ov+1, n, lb)))) + lb - 1, n)))
  vblocks <- Vectorize(function1, c("lb", "ov"), SIMPLIFY = FALSE)
  d <- vblocks(lb = lb, ov = ov, n = n)
  ts <- arima.sim(n, model = list(ar = phi, order = c(1, 0, 0)), sd = 1)

  #### devide ####
    blk <- lapply(d, function(x) with(x, Map(function(i, j) ts[i:j], unique(sort(c(seq(1, n, lb), seq(lb-ov+1, n, lb)))), pmin(unique(sort(c(seq(1, n, lb), seq(lb-ov+1, n, lb)))) + lb - 1, n))))

  #### resample ####
  res <- sample(blk, replace = TRUE, 10)        # resamples the blocks
  res.unlist <- unlist(res, use.names = FALSE)   # unlist the bootstrap series
  #### train, forecast ####
  train <- head(res.unlist, round(length(res.unlist) - 10)) # train set
  test <- tail(res.unlist, length(res.unlist) - length(train)) # test set
  nfuture <- forecast(train, # forecast
                      model = auto.arima(train), 
                      lambda = 0, biasadj = TRUE, h = length(test))$mean    
  ### metric ####
  RMSE <- rmse(test, nfuture) # return RMSE
  return(
    list("RMSE" = RMSE)
  )
}

when I passed it to this:

set.seed(123, kind = "L'Ecuyer-CMRG")
MC_result <- MonteCarlo(func = function2, 
                            nrep = reps,
                            ncpus = parallel::detectCores() - 1,
                            param_list = param_list,
                            export_also = list(
                              "packages" = c("forecast", "Metrics")
                            ),
                            raw = T)

I got this error message:

3 nodes produced errors; first error: could not find function "vblocks"

What I did in my trial is just to put the whole of function1 as a single statement in function2

Upvotes: 0

Views: 114

Answers (1)

Paul
Paul

Reputation: 9087

You can put the contents of function1 into the body of function2 - including the variable assignments etc.

library(MonteCarlo)
library(forecast)
library(ModelMetrics)

mc_f <- function(n, lb, phi) {
  # Generate data
  ov <- ceiling(lb / 2)
  starts <- unique(sort(c(seq(1, n, lb), seq(lb - ov + 1, n, lb))))
  ends <- pmin(starts + lb - 1, n)
  num <- match(n, ends)
  d <- head(data.frame(starts, ends), num)
  
  ts <- arima.sim(n, model = list(ar = phi, order = c(1, 0, 0)), sd = 1)
  
  blk <- mapply(
    function(start, end) ts[start:end],
    d$starts, 
    d$ends, 
    SIMPLIFY = FALSE
  )
  
  # Resample
  res <- sample(blk, replace = TRUE, 10)
  res.unlist <- unlist(res, use.names = FALSE)
  
  # Train and forecast
  train <- head(res.unlist, round(length(res.unlist) - 10))
  test <- tail(res.unlist, length(res.unlist) - length(train))
  nfuture <- forecast(train,
                      model = auto.arima(train),
                      lambda = 0, biasadj = TRUE, h = length(test))$mean
  
  # Extract metric
  RMSE <- rmse(test, nfuture)
  list("RMSE" = RMSE)
}
reps <- 3
param_list <- list(n = 10, lb = seq(n - 2) + 1, phi = 0.6)

mc_result <- MonteCarlo(
  func = mc_f,
  nrep = reps,
  ncpus = parallel::detectCores() - 1,
  param_list = param_list
)
#> Grid of  8  parameter constellations to be evaluated. 
#>  
#> Simulation parallelized using 3 cpus. 
#>  
#> Progress: 
#>  
#>   |==================================================================================| 100%

Upvotes: 1

Related Questions