Leo
Leo

Reputation: 11

how can i do auto arima for multiple products in R?

i'm creating auto arima model in R for predict my demand. I do it for 1 product and its work. Im export in xlsx format, in columns:

My code is:

ps: variable names in portuguese because im from brazil.

bdvendas <- read.csv("Pedidos+PedidosItem.csv", header = T, sep = ";")

vendas <- bdvendas %>% 
  dplyr::select(dataPedido,SkuRaiz,quantidadeItemReal)

vendas$dataPedido <- dmy(vendas$dataPedido)

vendas <- subset(vendas, vendas$dataPedido > "2018-12-31")
vendas <- subset(vendas, vendas$SkuRaiz!="")
vendas <- na.omit(vendas)

teste <-  data.frame(as.yearmon(vendas$dataPedido))
teste <- cbind(vendas,teste)
names(teste)[1:length(teste)] <- c("dataPedido","SkuRaiz","Pedidos","MesPedido")

vendas <- teste %>% 
  group_by(MesPedido,SkuRaiz) %>% 
  summarise(Pedidos = sum(Pedidos))

analisesku <- vendas %>% 
  filter(SkuRaiz == 1081) ## <- HERE I SELECT MY PRODUCT

analisesku <- analisesku[-length(analisesku$Pedidos),]

ano_inicial <- as.numeric(format(analisesku$MesPedido,'%Y'))[1]
mes_inicial <- as.numeric(format(analisesku$MesPedido,'%m'))[1]

ano_final <- as.numeric(format(analisesku$MesPedido,'%Y'))[length(analisesku$MesPedido)]
mes_final <- as.numeric(format(analisesku$MesPedido,'%m'))[length(analisesku$MesPedido)]

tsbanco <- ts(analisesku$Pedidos, start = c(ano_inicial,mes_inicial), end = c(ano_final,mes_final), frequency = 12)
autoplot(tsbanco)

modelo <- auto.arima(tsbanco, stepwise = FALSE, approximation = FALSE, trace = TRUE)
previsao <- forecast(modelo, h=2, level = c(95))
print(previsao)
autoplot(previsao)
accuracy(previsao)
output <- print(summary(previsao))
output <- cbind(analisesku$SkuRaiz[1],output)
names(output) <- c("SkuRaiz","pointForecast","low95","high95")
mesprevisao <- data.frame(seq(as.Date(Sys.Date()), by = "month", length = 3))
names(mesprevisao) <- "mesPrevisao"
output <- cbind(mesprevisao,output)

write.table(output, file = "previsao.csv", sep = ";", dec = ',', row.names = F, col.names = T)

Thats work good.

But, my problem is: i need to do that for multiple products (around 3000 products), automatically.

ps: each product have unique series. They are independent.

How can i do that? I need to use loop or something like that?

Upvotes: 1

Views: 1008

Answers (1)

Econ_matrix
Econ_matrix

Reputation: 405

You did not provide any data so I will simulate some data and demonstrate step by step how you can forecast multiple time series.

Load forecast library

library(forecast)

Lets simulate 5 time series from an ARIMA Model

bts <- ts(dplyr::tibble(AA = arima.sim(list(order=c(1,0,0), ar=.5),
                                       n=100, mean = 12),
                        AB = arima.sim(list(order=c(1,0,0), ar=.5),
                                       n=100, mean = 12),
                        AC = arima.sim(list(order=c(1,0,0), ar=.5),
                                       n=100, mean = 11),
                        BA = arima.sim(list(order=c(1,0,0), ar=.5),
                                       n=100, mean = 10),
                        BB = arima.sim(list(order=c(1,0,0), ar=.5),
                                       n=100, mean = 14)), start = c(2000, 1),
          frequency = 12)

Plot all ts

autoplot(bts)

Fit the model to all ts

fit <- sapply(bts, FUN = auto.arima, simplify = FALSE, USE.NAMES = TRUE,
              # auto.arima arguments
              max.p = 5,
              max.q = 5,
              max.P = 2,
              max.Q = 2 # other arguments passed to auto arima
              )

Forecast all models

fc <- sapply(fit, FUN = forecast, simplify = FALSE, USE.NAMES = TRUE,
             h = 12 # forecast horizon
             # other arguments passed to forecast
             )

This simple function will help us to get mean, lower or upper level forecast in the list

get_value <- function(x, type = c("mean", "lower", "upper"), 
                      level = c(80, 95)){
  if(type == "mean"){
    out <- x[["mean"]]
  }
  if(type == "lower"){
    if(level == 80){
      out <- x[["lower"]][,1]
    }
    if(level == 95){
      out <- x[["lower"]][,2]
    }
  }
  if(type == "upper"){
    if(level == 80){
      out <- x[["upper"]][,1]
    }
    if(level == 95){
      out <- x[["upper"]][,2]
    }
  }
  return(out)
}

Get the mean forecast

point_forecast <- sapply(fc, FUN = get_value, simplify = TRUE, 
                         USE.NAMES = TRUE,
                         type = "mean")

Get upper value with 95 % confidence interval

fc_upper_95 <- sapply(fc, FUN = get_value, simplify = TRUE, 
                         USE.NAMES = TRUE,
                         type = "upper", level = 95)

Get upper value with 80 % confinence interval

fc_upper_80 <- sapply(fc, FUN = get_value, simplify = TRUE, 
                      USE.NAMES = TRUE,
                      type = "upper", level = 80)
                     

Since you have many time series it is a good idea to fit models in parallel to use computing resources efficiently

library(parallel)

n_cores <- parallel::detectCores()-1 # number of cores in your machine -1 core

cl <- makeCluster(n_cores)

fit_par <- parallel::parSapply(cl, bts, FUN = auto.arima, 
                               simplify = FALSE, USE.NAMES = TRUE,
                               # auto.arima arguments
                               max.p = 5,
                               max.q = 5,
                               max.P = 2,
                               max.Q = 2)

fc_par <- parallel::parSapply(cl, fit_par, FUN = forecast, simplify = FALSE, 
                              USE.NAMES = TRUE,
                              h = 12
                              # other arguments passed to forecast
                              )

point_forecast <- parallel::parSapply(cl, fc_par, FUN = get_value, 
                                      simplify = TRUE, USE.NAMES = TRUE, 
                                      type = "mean")

Upvotes: 1

Related Questions