forecast multiple ARIMA models after train and test R

I need to forecast over 100k of timeseries with ARIMA, this is a sample of my data:

library(yardstick)
library(forecast)
library(tsibble)
df<-tibble::tribble(
      ~ID, ~Period,  ~Value,
       1L, 201612L, -19188L,
       1L, 201701L,  -8805L,
       1L, 201702L,   5092L,
       1L, 201703L,   4587L,
       1L, 201704L,  -6083L,
       1L, 201705L, -15308L,
       1L, 201706L, -12004L,
       1L, 201707L,   -791L,
       1L, 201708L,  -5151L,
       1L, 201709L,   3312L,
       1L, 201710L,   7728L,
       1L, 201711L, -20823L,
       1L, 201712L,    901L,
       1L, 201801L,   7713L,
       1L, 201802L,   4506L,
       1L, 201803L,  24475L,
       1L, 201804L, -12418L,
       1L, 201805L,  14545L,
       1L, 201806L, -14233L,
       1L, 201807L,   1271L,
       1L, 201808L, -19064L,
       1L, 201809L,  -3018L,
       1L, 201810L,  13291L,
       1L, 201811L,   7111L,
       1L, 201812L, -16961L,
       1L, 201901L,  -2442L,
       1L, 201902L,  -6861L,
       1L, 201903L,   1819L,
       1L, 201904L,   8759L,
       1L, 201905L,  -9220L,
       1L, 201906L,  -9786L,
       1L, 201907L,  -8620L,
       1L, 201908L, -47736L,
       1L, 201909L,  -2586L,
       1L, 201910L,  12347L,
       1L, 201911L,  19758L,
       1L, 201912L,   4669L,
       1L, 202001L,   1499L,
       2L, 201612L,  -6146L,
       2L, 201701L,    321L,
       2L, 201702L,  20859L,
       2L, 201703L,  -7533L,
       2L, 201704L,     72L,
       2L, 201705L,  17915L,
       2L, 201706L,   -985L,
       2L, 201707L,   -832L,
       2L, 201708L,  -1773L,
       2L, 201709L,  -2532L,
       2L, 201710L,   2280L,
       2L, 201711L, -18821L,
       2L, 201712L,  16445L,
       2L, 201801L,   1660L,
       2L, 201802L,  -1857L,
       2L, 201803L,   3221L,
       2L, 201804L, -11009L,
       2L, 201805L, -11945L,
       2L, 201806L,  -7152L,
       2L, 201807L,  -3201L,
       2L, 201808L, -13226L,
       2L, 201809L, -13568L,
       2L, 201810L, -11952L,
       2L, 201811L,   1276L,
       2L, 201812L, -20049L,
       2L, 201901L,  -7576L,
       2L, 201902L, -10370L,
       2L, 201903L,  47760L,
       2L, 201904L, -37809L,
       2L, 201905L,  -9232L,
       2L, 201906L, -18635L,
       2L, 201907L,  -6548L,
       2L, 201908L, -29065L,
       2L, 201909L,  -2225L,
       2L, 201910L,   3613L,
       2L, 201911L, -11113L,
       2L, 201912L,   4626L,
       2L, 202001L, -12083L,
       3L, 201612L,  -5602L,
       3L, 201701L,   -692L,
       3L, 201702L,   1152L,
       3L, 201703L,   -378L,
       3L, 201704L,  -2342L,
       3L, 201705L,   1059L,
       3L, 201706L, -11490L,
       3L, 201707L,   -261L,
       3L, 201708L,   1703L,
       3L, 201709L,  -6968L,
       3L, 201710L,   6915L,
       3L, 201711L,  -6320L,
       3L, 201712L, -19468L,
       3L, 201801L, -16850L,
       3L, 201802L,  -9559L,
       3L, 201803L,  -6727L,
       3L, 201804L, -29877L,
       3L, 201805L,   7453L,
       3L, 201806L, -11100L,
       3L, 201807L,  14289L,
       3L, 201808L, -16686L,
       3L, 201809L, -17925L,
       3L, 201810L,  -2381L,
       3L, 201811L, -25015L,
       3L, 201812L, -20258L,
       3L, 201901L, -12875L,
       3L, 201902L,  -8534L,
       3L, 201903L,  -3880L,
       3L, 201904L, -27034L,
       3L, 201905L, -13624L,
       3L, 201906L, -29521L,
       3L, 201907L,  -4933L,
       3L, 201908L,  -5963L,
       3L, 201909L, -15193L,
       3L, 201910L,  -2960L,
       3L, 201911L,   6150L,
       3L, 201912L,  18957L,
       3L, 202001L, -10326L
      )

Some treatment to work with Arima:

df$year<-as.numeric(substr(df$Period,start = 1,stop = 4))
df$month<-as.numeric(substr(df$Period,start=5,stop=6))
df$day<-1

df <- df %>% 
  mutate(date=as.character(make_date(year,month,day))) 

df<-df %>% 
  mutate(YearMonth = tsibble::yearmonth((ymd(date)))) %>%
  as_tsibble(key=ID,index = YearMonth)

Now I separate train and test data. I use until 2018 as train, and then 2019 as test (including 2020 January)

 df_train<- df %>% 
      filter(YearMonth <= yearmonth("2018 Dec")) %>%
      model(ARIMA(Value ~ PDQ(0,0,0), stepwise=FALSE, approximation=FALSE))

    df_test<-df_train %>%
      forecast(h = 13) %>%
      accuracy(df)

Now I need to forecast each time serie, from Feb 2020 till Feb 2021. I don't know how to apply forecast or Arima from Forecast package because the arguments use a univariate time series of class ts. Is there another solution? Does anyone how to do it for each of them? Thanks for the help!

Have you had a look at the tidyverts packages fable, tsibble, and feasts?

https://tidyverts.org/

When using the fable package you don't need to load the forecast package to produce forecasts.

To forecast 1 year beyond the df dataset with a non-seasonal ARIMA() model, you could use:

library(fable)
df %>% 
  ARIMA(Value ~ PDQ(0,0,0), stepwise=FALSE, approximation=FALSE)) %>%
  forecast(h = "1 year")

But then if I use all the data will I get the same results as if I do it with a train test?

The produced forecasts are based on the provided data. You will get different results as the forecasts are now using all available data, rather than only the data in the training set.

You are trying to use time series data on a machine learning problem. They are two different things and probably not something you should mix.

This topic was automatically closed 21 days after the last reply. New replies are no longer allowed.