ARIMA model accuracy scores show NaN

mlsops · June 6, 2023, 6:27pm

Hello,

I created some arima models for my financial forecasting work, when I checked for accuracy, for some reason, I am getting NaN results, if someone could please help, that would be much appreciated.

Here is what I did:

library(fpp3)

file <- file %>% 
  select(location, start_month, paid) %>%
  as_tsibble(index = start_month,
             key = location)

cn <- file %>%
    filter(location == "Central", as.Date(start_month) >= "2012-01-01") %>%
  filter_index(~ "2020-12-31") %>%
  stretch_tsibble(.init = 12, .step = 1)

fit <- cn %>%
  model(
ar112 = ARIMA(paid ~ 0 + pdq(1,1,2)),
ar110 = ARIMA(paid ~ pdq(1,1,0)),
ar011 = ARIMA(paid ~ pdq(0,1,1)),
ar012 = ARIMA(paid ~ pdq(0,1,2)),
ar010 = ARIMA(paid ~ pdq(0,1,0)),
ar211 = ARIMA(paid ~ 0 + pdq(2,1,1)),
ar311 = ARIMA(paid ~ 0 + pdq(3,1,1)),
ar011010 = ARIMA(paid ~ 0 + pdq(0,1,1) + PDQ(0,1,0)),
ar001010 = ARIMA(paid ~ 0 + pdq(0,1,0) + PDQ(0,1,0)),
arm = ARIMA(paid)
  )

fc <- fit %>%
  fabletools::forecast(h = "1 years")

#training set
accuracy(fit)

#testing set
accuracy(fc, cn)

structure(list(location = c("CENTRAL", "CENTRAL", "CENTRAL", "CENTRAL", "CENTRAL", 
"CENTRAL", "CENTRAL", "CENTRAL", "CENTRAL", "CENTRAL", "CENTRAL", "CENTRAL", "CENTRAL", "CENTRAL", 
"CENTRAL", "CENTRAL", "CENTRAL", "CENTRAL", "CENTRAL", "CENTRAL", "CENTRAL", "CENTRAL", "CENTRAL", 
"CENTRAL", "CENTRAL", "CENTRAL", "CENTRAL", "CENTRAL", "CENTRAL", "CENTRAL", "CENTRAL", "CENTRAL", 
"CENTRAL", "CENTRAL", "CENTRAL", "CENTRAL", "CENTRAL", "CENTRAL", "CENTRAL", "CENTRAL", "CENTRAL", 
"CENTRAL", "CENTRAL", "CENTRAL", "CENTRAL", "CENTRAL", "CENTRAL", "CENTRAL", "CENTRAL", "CENTRAL", 
"CENTRAL", "CENTRAL", "CENTRAL", "CENTRAL", "CENTRAL", "CENTRAL", "CENTRAL", "CENTRAL", "CENTRAL", 
"CENTRAL", "CENTRAL", "CENTRAL", "CENTRAL", "CENTRAL", "CENTRAL", "CENTRAL", "CENTRAL", "CENTRAL", 
"CENTRAL", "CENTRAL", "CENTRAL", "CENTRAL", "CENTRAL", "CENTRAL", "CENTRAL", "CENTRAL", "CENTRAL", 
"CENTRAL", "CENTRAL", "CENTRAL", "CENTRAL", "CENTRAL", "CENTRAL", "CENTRAL", "CENTRAL", "CENTRAL", 
"CENTRAL", "CENTRAL", "CENTRAL", "CENTRAL", "CENTRAL", "CENTRAL", "CENTRAL", "CENTRAL", "CENTRAL", 
"CENTRAL", "CENTRAL", "CENTRAL", "CENTRAL", "CENTRAL", "CENTRAL", "CENTRAL", "CENTRAL", "CENTRAL", 
"CENTRAL", "CENTRAL", "CENTRAL", "CENTRAL", "CENTRAL", "CENTRAL", "CENTRAL", "CENTRAL", "CENTRAL", 
"CENTRAL", "CENTRAL", "CENTRAL", "CENTRAL", "CENTRAL", "CENTRAL", "CENTRAL", "CENTRAL", "CENTRAL", 
"CENTRAL", "CENTRAL", "CENTRAL", "CENTRAL", "CENTRAL", "CENTRAL", "CENTRAL", "CENTRAL", "CENTRAL", 
"CENTRAL", "CENTRAL", "CENTRAL", "CENTRAL"), start_month = structure(c(15340, 
15371, 15400, 15431, 15461, 15492, 15522, 15553, 15584, 15614, 
15645, 15675, 15706, 15737, 15765, 15796, 15826, 15857, 15887, 
15918, 15949, 15979, 16010, 16040, 16071, 16102, 16130, 16161, 
16191, 16222, 16252, 16283, 16314, 16344, 16375, 16405, 16436, 
16467, 16495, 16526, 16556, 16587, 16617, 16648, 16679, 16709, 
16740, 16770, 16801, 16832, 16861, 16892, 16922, 16953, 16983, 
17014, 17045, 17075, 17106, 17136, 17167, 17198, 17226, 17257, 
17287, 17318, 17348, 17379, 17410, 17440, 17471, 17501, 17532, 
17563, 17591, 17622, 17652, 17683, 17713, 17744, 17775, 17805, 
17836, 17866, 17897, 17928, 17956, 17987, 18017, 18048, 18078, 
18109, 18140, 18170, 18201, 18231, 18262, 18293, 18322, 18353, 
18383, 18414, 18444, 18475, 18506, 18536, 18567, 18597, 18628, 
18659, 18687, 18718, 18748, 18779, 18809, 18840, 18871, 18901, 
18932, 18962, 18993, 19024, 19052, 19083, 19113, 19144, 19174, 
19205, 19236, 19266, 19297, 19327, 19358, 19389, 19417), class = c("yearmonth", 
"vctrs_vctr")), paid = c(21869876.18, 14415826.92, 22239821.54, 
15158099.2, 12536509.73, 14199695.08, 10686436.8, 15802861.19, 
19167855.63, 11439066.45, 13795489.86, 10833894.98, 20800566.93, 
12080691.96, 17194109.89, 11492454.31, 13462967.08, 10916737.32, 
10214705.89, 14922259.06, 17663526.44, 11108795.4, 14000760.71, 
9898798.85, 21442951.96, 11646791.67, 18295976.73, 10711057.45, 
13610743.68, 10438025.42, 11259793.06, 11041268.72, 16489276.77, 
12387899.2, 9267420.39, 8929222.48, 18728682.26, 10330538.71, 
14185652.77, 9518612.49, 12376634.95, 9302419.03, 11083148.54, 
9281051.38, 15451978.06, 11682611.61, 8513565.27, 9887395.28, 
16104970.05, 9267550.84, 11259513.86, 11021625.15, 8580042.72, 
9518362.28, 7171984.56, 8846477.89, 15383245.64, 7835016.92, 
6670793.59, 9268426.35, 14005650.2, 7851410.92, 9399812.27, 9286297.55, 
7230212.19, 7766674.85, 5721101.73, 9410132.81, 10520164.47, 
5827495.69, 5462011.19, 7029263.8, 10569524.68, 6289294.79, 7373754.88, 
6682247.13, 5256281.49, 5228528.98, 3926196.84, 5971231.33, 7626016.49, 
4233507.85, 4959751.12, 4027010.37, 7704476.58, 4510907.69, 5339528.03, 
4935419.53, 4879797.1, 3152441.89, 2958364.98, 5117842.01, 6332245.99, 
3628638.28, 4013896.01, 3238529.82, 6764505.99, 3720157.76, 3582348.1, 
2492669.48, 2332501.76, 1359651.78, 1599968.91, 2500401.46, 5364534.34, 
4454542.05, 3044066.89, 4060277.25, 6830080.73, 3693674.77, 4621996.04, 
4929497.19, 4129253.78, 4071349.83, 2745622.27, 4565434.32, 7611543.48, 
5134036.26, 5238043.47, 6427305.72, 7390503.46, 5637679.61, 6714924.11, 
6584376.55, 4981947.9, 5729639.12, 4552623, 4389053.46, 7168323.72, 
4840703, 4718361.3, 5381815.92, 5940947.47, 5240067.78, 6306831.11
)), class = c("tbl_ts", "tbl_df", "tbl", "data.frame"), row.names = c(NA, 
-135L), key = structure(list(location = "CENTRAL", .rows = structure(list(
    1:135), ptype = integer(0), class = c("vctrs_list_of", "vctrs_vctr", 
"list"))), class = c("tbl_df", "tbl", "data.frame"), row.names = c(NA, 
-1L), .drop = TRUE), index = structure("start_month", ordered = TRUE), index2 = "start_month", interval = structure(list(
    year = 0, quarter = 0, month = 1, week = 0, day = 0, hour = 0, 
    minute = 0, second = 0, millisecond = 0, microsecond = 0, 
    nanosecond = 0, unit = 0), .regular = TRUE, class = c("interval", 
"vctrs_rcrd", "vctrs_vctr")))

Also, not sure why I get this message when I check for accuracy in my test data: Warning message:
The future dataset is incomplete, incomplete out-of-sample data will be treated as missing.
12 observations are missing between 2023 Apr and 2024 Mar

mlsops · June 7, 2023, 10:11pm

I figured out the solution, instead of:

#testing set
accuracy(fc, cn)

It should be:

#testing set
accuracy(fc, file)

Since cn is the filtered test file and does not contain data past December 2022

system · June 14, 2023, 10:11pm

This topic was automatically closed 7 days after the last reply. New replies are no longer allowed.

If you have a query related to it or one of the replies, start a new topic and refer back with a link.