Data forecasting in R Studio

Hi Everyone,

I am trying to do forecasting in R Studio however, I ended up getting errors. Could someone help me with the forecasting model in R Studio?

Below is the R code:

# Step 1: Install Required Packages
install.packages("DBI")
install.packages("odbc")
install.packages("tidyverse")
install.packages("forecast")

# Step 2: Load Required Packages
library(DBI)
library(odbc)
library(tidyverse)
library(forecast)

# Step 3: Establish a Connection to SQL Server with Windows Authentication
con <- dbConnect(odbc(),
                 driver = "SQL Server",
                 server = "PRD1",
                 database = "DUMMY",
                 trusted_connection = "yes")

# Step 4: Query Data
query <- "SELECT Cast([source_system_link_2] as varchar) as [source_system_link_2]
      ,isnull([financial_scenario],'IC') as [financial_scenario]
      ,[account_description_1]
      ,[account_description_2]
      ,[balance_dt]
      ,format([balance_dt],'MMM-yy') as [month_year]
      ,sum([balance_amt]) as [balance_amt]
      ,Cast([acct_cd_1] as varchar) as [acct_cd_1]
      ,Cast([acct_cd_2] as varchar) as [acct_cd_2]
      ,[supplier_name]
      ,isnull([WO#],[Work Order ID]) as [WO#]
      ,[WO_ID]
      ,[Work Order ID]
      ,avg([Total Labor Hours]) as [Total Labor Hours]
      ,avg([Labor_Code]) as [Labor_Code]
      ,avg([Hourly Rate]) as [Hourly Rate]
  FROM [DUMMY].[SSDI].[CPWO_New]
group by Cast([source_system_link_2] as varchar)
      ,isnull([financial_scenario],'IC')
      ,[account_description_1]
      ,[account_description_2]
      ,[balance_dt]
      ,Cast([acct_cd_1] as varchar) 
      ,Cast([acct_cd_2] as varchar) 
      ,[supplier_name]
      ,isnull([WO#],[Work Order ID])
      ,[WO_ID]
      ,[Work Order ID]"
data <- dbGetQuery(con, query)

# Step 5: Prepare Time Series Data for Multiple Dimension and Value Columns

# Define the dimension columns and value columns from your SQL Server table
dimension_columns <- c("[source_system_link_2]", "[financial_scenario]", "[account_description_1]", "[account_description_2]", "[balance_dt]", "[acct_cd_1]", "[acct_cd_2]", "[supplier_name]", "[WO#]", "[Work Order ID]")
dimension_columns <- as.character(dimension_columns)
value_columns <- c("[balance_amt]", "[Total Labor Hours]", "[Labor_Code]", "[Hourly Rate]")

# Initialize an empty list to store the time series data for each value column and dimension combination
ts_data <- list()

# Iterate through each value column
for (value_column in value_columns) {
  
# Iterate through each dimension combination
for (i in 1:length(dimension_columns)) {
  
# Construct the SQL query dynamically based on dimension and value column
#query <- glue::glue("SELECT {dimension_columns[i]}, {value_column} FROM [DUMMY].[SSDI].[CPWO_New]")
query <- glue::glue("SELECT {as.character(dimension_columns[i])}, {value_column} FROM [DUMMY].[SSDI].[CPWO_New]")

# Fetch the data from the SQL Server table
data <- dbGetQuery(con, query)  
  
# Group the data by dimension columns and aggregate the values using appropriate functions (e.g., sum, average)
#data_agg <- data %>% group_by(across(all_of(dimension_columns[i]))) 
group_columns <- intersect(dimension_columns[i], colnames(data))
group_vars <- syms(group_columns)
data_agg <- data %>% dplyr::group_by(!!!group_vars)
  
# Create a time series object for each dimension and value column combination
ts_data[[paste(dimension_columns[i], value_column, sep = "_")]] <- ts_object <- ts(data_agg[[value_column]], frequency = 100) 
#<- ts(data_agg$value_column, frequency = 100)
}
} 
# The ts_data list will contain time series objects for each dimension and value column combination  
  
# Step 6: Perform Forecasting for Each Dimension and Value Column Combination

# Initialize an empty list to store the forecasted results for each combination
forecast_results <- list()

# Iterate through each dimension and value column combination
for (dimension_column in dimension_columns) {
  for (value_column in value_columns) {
    # Get the time series data for the current combination
    ts_data_current <- ts_data[[paste(dimension_column, value_column, sep = "_")]]
    
    # Perform forecasting using the desired method (e.g., ARIMA)
    forecast_model <- auto.arima(ts_data_current)
    
    # Add the forecasted results to the forecast_results list
    forecast_results[[paste(dimension_column, value_column, sep = "_")]] <- forecast_model
  }
}
# The forecast_results list contains the forecasted models for each dimension and value column combination 
  
#Step 7: Analyze and Extract Forecasted Results

# Initialize an empty list to store the extracted forecasted results
extracted_forecasts <- list()

# Iterate through each dimension and value column combination
for (dimension_column in dimension_columns) {
  for (value_column in value_columns) {
    # Get the forecasted model for the current combination
    forecast_model <- forecast_results[[paste(dimension_column, value_column, sep = "_")]]
    
    # Extract the point forecasts from the forecasted model
    point_forecasts <- forecast_model$mean
    
    # Store the extracted forecasts in the extracted_forecasts list
    extracted_forecasts[[paste(dimension_column, value_column, sep = "_")]] <- point_forecasts
  }
}  

# Step 8: Move Forecasted Results Back to SQL Server

# Establish a connection to SQL Server
con <- dbConnect(odbc(),
                 driver = "SQL Server",
                 server = "PRD1",
                 database = "DUMMY",
                 trusted_connection = "yes")

# Iterate through each dimension and value column combination
for (dimension_column in dimension_columns) {
  for (value_column in value_columns) {
    # Get the extracted forecasts for the current combination
    extracted_forecasts_current <- extracted_forecasts[[paste(dimension_column, value_column, sep = "_")]]
    
    # Create a table (if needed) to store the forecasted results
    create_table_query <- glue::glue("CREATE TABLE forecast_output_{dimension_column}_{value_column} (date_column DATE, forecast_value FLOAT)")
    dbExecute(con_output, create_table_query)
    
    # Insert the forecasted results into the table
    insert_query <- glue::glue("INSERT INTO forecast_output_{dimension_column}_{value_column} (date_column, forecast_value) VALUES (?, ?)")
    dbExecute(con_output, insert_query, params = list(index(extracted_forecasts_current), extracted_forecasts_current))
  }
}

# Close the connection
dbDisconnect(con)

Thanks
Nakul

Hi, we don't have your dataset. Can you provide a reproducible example?

What errors do you get?

Hi William,

Below is the sample data. I am unable to attach the full dataset.

source_system_link_2 financial_scenario balance_dt acct_cd_1 acct_cd_2 balance_amt Count
4863400000 AA 2021-01-09 0:00 525450 701 6 1
4863400000 AA 2021-02-11 0:00 522300 701 216.6 2
4863400000 AA 2021-03-23 0:00 525410 701 206.39 2
4863400000 AA 2021-04-10 0:00 511200 701 8.65 1
4863400000 AA 2021-04-10 0:00 525320 701 48.5 1
4863400000 AA 2021-04-10 0:00 525410 701 11 1
4863400000 AA 2021-05-11 0:00 525410 701 2217.52 2
4863400000 AA 2021-07-11 0:00 551200 701 224.5 1
4863400000 AA 2021-08-11 0:00 525410 701 2684.08 2
4863400000 AA 2021-09-09 0:00 521300 701 97.14 1
4863400000 AA 2021-10-06 0:00 525410 701 3698.57 2
4863400000 AA 2021-10-19 0:00 552205 701 631.81 1
4863400000 AA 2021-12-10 0:00 525370 701 2475 1
4863400000 AA 2021-12-17 0:00 525320 701 324 1
4863400000 AA 2021-12-23 0:00 524300 701 3559.13 1
4863400000 AA 2022-01-08 0:00 525300 701 6.25 1
4863400000 AA 2022-01-08 0:00 525530 701 50.52 1
4863400000 AA 2022-01-22 0:00 525370 701 475.2 1
4863400000 AA 2022-02-10 0:00 525320 701 12954.73 1
4863400000 AA 2022-02-11 0:00 525350 701 6.25 1
4863400000 AA 2022-04-10 0:00 521300 701 40.82 1

Thanks
Nakul

And what were the errors?

Have another read of the article on making the dataset properly reproducible too.

This topic was automatically closed 42 days after the last reply. New replies are no longer allowed.

If you have a query related to it or one of the replies, start a new topic and refer back with a link.