I need to forecast over 100k of timeseries with ARIMA, this is a sample of my data:
library(yardstick)
library(forecast)
library(tsibble)
df<-tibble::tribble(
~ID, ~Period, ~Value,
1L, 201612L, -19188L,
1L, 201701L, -8805L,
1L, 201702L, 5092L,
1L, 201703L, 4587L,
1L, 201704L, -6083L,
1L, 201705L, -15308L,
1L, 201706L, -12004L,
1L, 201707L, -791L,
1L, 201708L, -5151L,
1L, 201709L, 3312L,
1L, 201710L, 7728L,
1L, 201711L, -20823L,
1L, 201712L, 901L,
1L, 201801L, 7713L,
1L, 201802L, 4506L,
1L, 201803L, 24475L,
1L, 201804L, -12418L,
1L, 201805L, 14545L,
1L, 201806L, -14233L,
1L, 201807L, 1271L,
1L, 201808L, -19064L,
1L, 201809L, -3018L,
1L, 201810L, 13291L,
1L, 201811L, 7111L,
1L, 201812L, -16961L,
1L, 201901L, -2442L,
1L, 201902L, -6861L,
1L, 201903L, 1819L,
1L, 201904L, 8759L,
1L, 201905L, -9220L,
1L, 201906L, -9786L,
1L, 201907L, -8620L,
1L, 201908L, -47736L,
1L, 201909L, -2586L,
1L, 201910L, 12347L,
1L, 201911L, 19758L,
1L, 201912L, 4669L,
1L, 202001L, 1499L,
2L, 201612L, -6146L,
2L, 201701L, 321L,
2L, 201702L, 20859L,
2L, 201703L, -7533L,
2L, 201704L, 72L,
2L, 201705L, 17915L,
2L, 201706L, -985L,
2L, 201707L, -832L,
2L, 201708L, -1773L,
2L, 201709L, -2532L,
2L, 201710L, 2280L,
2L, 201711L, -18821L,
2L, 201712L, 16445L,
2L, 201801L, 1660L,
2L, 201802L, -1857L,
2L, 201803L, 3221L,
2L, 201804L, -11009L,
2L, 201805L, -11945L,
2L, 201806L, -7152L,
2L, 201807L, -3201L,
2L, 201808L, -13226L,
2L, 201809L, -13568L,
2L, 201810L, -11952L,
2L, 201811L, 1276L,
2L, 201812L, -20049L,
2L, 201901L, -7576L,
2L, 201902L, -10370L,
2L, 201903L, 47760L,
2L, 201904L, -37809L,
2L, 201905L, -9232L,
2L, 201906L, -18635L,
2L, 201907L, -6548L,
2L, 201908L, -29065L,
2L, 201909L, -2225L,
2L, 201910L, 3613L,
2L, 201911L, -11113L,
2L, 201912L, 4626L,
2L, 202001L, -12083L,
3L, 201612L, -5602L,
3L, 201701L, -692L,
3L, 201702L, 1152L,
3L, 201703L, -378L,
3L, 201704L, -2342L,
3L, 201705L, 1059L,
3L, 201706L, -11490L,
3L, 201707L, -261L,
3L, 201708L, 1703L,
3L, 201709L, -6968L,
3L, 201710L, 6915L,
3L, 201711L, -6320L,
3L, 201712L, -19468L,
3L, 201801L, -16850L,
3L, 201802L, -9559L,
3L, 201803L, -6727L,
3L, 201804L, -29877L,
3L, 201805L, 7453L,
3L, 201806L, -11100L,
3L, 201807L, 14289L,
3L, 201808L, -16686L,
3L, 201809L, -17925L,
3L, 201810L, -2381L,
3L, 201811L, -25015L,
3L, 201812L, -20258L,
3L, 201901L, -12875L,
3L, 201902L, -8534L,
3L, 201903L, -3880L,
3L, 201904L, -27034L,
3L, 201905L, -13624L,
3L, 201906L, -29521L,
3L, 201907L, -4933L,
3L, 201908L, -5963L,
3L, 201909L, -15193L,
3L, 201910L, -2960L,
3L, 201911L, 6150L,
3L, 201912L, 18957L,
3L, 202001L, -10326L
)
Some treatment to work with Arima:
df$year<-as.numeric(substr(df$Period,start = 1,stop = 4))
df$month<-as.numeric(substr(df$Period,start=5,stop=6))
df$day<-1
df <- df %>%
mutate(date=as.character(make_date(year,month,day)))
df<-df %>%
mutate(YearMonth = tsibble::yearmonth((ymd(date)))) %>%
as_tsibble(key=ID,index = YearMonth)
Now I separate train and test data. I use until 2018 as train, and then 2019 as test (including 2020 January)
df_train<- df %>%
filter(YearMonth <= yearmonth("2018 Dec")) %>%
model(ARIMA(Value ~ PDQ(0,0,0), stepwise=FALSE, approximation=FALSE))
df_test<-df_train %>%
forecast(h = 13) %>%
accuracy(df)
Now I need to forecast each time serie, from Feb 2020 till Feb 2021. I don't know how to apply forecast or Arima from Forecast package because the arguments use a univariate time series of class ts. Is there another solution? Does anyone how to do it for each of them? Thanks for the help!