Correctly Using the Window() Functions in R

I have the following problem:

  • I am trying to fit two time series models (arima, ets)
  • Perform "rolling window cross validation" on these models
  • Record the average errors (mae, rmse) on different lengths of time (e.g. average mae for 1 time period ahead, average rmse for 1 time period ahead....average mae for 12 time period ahead, average rmse for 12 time period ahead).
  • Plot the results

First I generated some random data:

library(forecast)
library(lubridate)

set.seed(123)

weeks <- rep(seq(as.Date("2010-01-01"), as.Date("2023-01-01"), by = "week"), each = 1)
counts <- rpois(length(weeks), lambda = 50)
df <- data.frame(Week = as.character(weeks), Count = counts)

# Convert Week column to Date format
df$Week <- as.Date(df$Week)

# Create a time series object
ts_data <- ts(df$Count, frequency = 52, start = c(year(min(df$Week)), 1))

Next, I set up different objects required for the loop:

# Set the length of data for fitting models
k <- 60

# Initialize matrices to store the MAE and RMSE values (I was not sure if they should be initialized to 0 or NA?)
#mae_arima <- matrix(NA, nrow = length(ts_data) - k, ncol = 12)
#rmse_arima <- matrix(NA, nrow = length(ts_data) - k, ncol = 12)
#mae_ets <- matrix(NA, nrow = length(ts_data) - k, ncol = 12)
#rmse_ets <- matrix(NA, nrow = length(ts_data) - k, ncol = 12)

mae_arima <- matrix(0, nrow = length(ts_data) - k, ncol = 12)
rmse_arima <- matrix(0, nrow = length(ts_data) - k, ncol = 12)
mae_ets <- matrix(0, nrow = length(ts_data) - k, ncol = 12)
rmse_ets <- matrix(0, nrow = length(ts_data) - k, ncol = 12)

Finally, I tried to write the cross validation loop:

for(i in 1:(length(ts_data)-k)) {
    tryCatch({
        # Define the training and testing sets
        train_data <- window(ts_data, end = c(year(min(df$Week)) + floor((i+k-2)/52), (i+k-2)%%52+1))
        test_data <- window(ts_data, start = c(year(min(df$Week)) + floor((i+k-1)/52), (i+k-1)%%52+1),
                            end = c(year(min(df$Week)) + floor((i+11+k-1)/52), (i+11+k-1)%%52+1))
        
        # Fit and forecast using ARIMA model
        fit_arima <- auto.arima(train_data, seasonal = TRUE, lambda = "auto")
        fcast_arima <- forecast(fit_arima, h = 12)
        
        # Calculate MAE and RMSE for ARIMA model's forecast
        mae_arima[i, ] <- abs(fcast_arima[['mean']] - test_data)
        rmse_arima[i, ] <- sqrt(mean((fcast_arima[['mean']] - test_data)^2))
        
        # Fit and forecast using ETS model
        fit_ets <- ets(train_data)
        fcast_ets <- forecast(fit_ets, h = 12)
        
        # Calculate MAE and RMSE for ETS model's forecast
        mae_ets[i, ] <- abs(fcast_ets[['mean']] - test_data)
        rmse_ets[i, ] <- sqrt(mean((fcast_ets[['mean']] - test_data)^2))
        
        # Print model results, MAE and RMSE
        cat("ARIMA model results for iteration", i, ":\n")
        print(fit_arima)
        print(fcast_arima)
        cat("MAE (ARIMA):", round(mean(mae_arima[i, ]), 2), " RMSE (ARIMA):", round(mean(rmse_arima[i, ]), 2), "\n\n")
    })}

And here is the code for the plots:

# Create the plot
par(mfrow = c(2, 2))

# Plot for MAE (ARIMA)
plot(1:12, colMeans(mae_arima, na.rm = TRUE), type = "b", xlab = "Forecast Horizon (in months)", ylab = "MAE",
     main = "ARIMA Model", col = "blue", lty = 1)

# Plot for MAE (ETS)
plot(1:12, colMeans(mae_ets, na.rm = TRUE), type = "b", xlab = "Forecast Horizon (in months)", ylab = "MAE",
     main = "ETS Model", col = "green", lty = 2)

# Plot for RMSE (ARIMA)
plot(1:12, colMeans(rmse_arima, na.rm = TRUE), type = "b", xlab = "Forecast Horizon (in months)", ylab = "RMSE",
     main = "ARIMA Model", col = "red", lty = 3)

# Plot for RMSE (ETS)
plot(1:12, colMeans(rmse_ets, na.rm = TRUE), type = "b", xlab = "Forecast Horizon (in months)", ylab = "RMSE",
     main = "ETS Model", col = "orange", lty = 4)

The code seems to have run, but I am not sure if I am doing all of this correctly.

Can someone please tell me if I am doing this correctly?

Thanks!

References:

This topic was automatically closed 42 days after the last reply. New replies are no longer allowed.

If you have a query related to it or one of the replies, start a new topic and refer back with a link.