Linear Regression & Forecasting

I need help with a course project. I've been working on this for 5 days and have gotten no where. I'm using the AdventureWorks database. This part of the project is creating a linear regression, plotting the actual data for Jan 2013 - Dec 2013, and then forecast Jan 2014-Jun 2014. The forecasted piece needs to be in a different line type and color. It also needs to include the line-of-best-fit (abline). Sadly, my instructor will not provide any help other than "check these websites". Below is what I have tried (and failed) in R so far. Any guidance is appreciated!

#load DBI package
library(DBI)

#create connection
con <- DBI::dbConnect(odbc::odbc(),
                      Driver="SQL Server",
                      Server = ".",
                      Database = "AdventureWorks2017",
                      Trusted_Connection="True")


###create query 

library(dplyr)

qry1<-dbGetQuery (con,
                     "select
p.Name as 'Product'
,Case 
	when MONTH(OrderDate) = 1 then 'Jan-13'
	when MONTH(OrderDate) = 2 then 'Feb-13'
	when MONTH(OrderDate) = 3 then 'Mar-13'
	when MONTH(OrderDate) = 4 then 'Apr-13'
	when MONTH(OrderDate) = 5 then 'May-13'
	when MONTH(OrderDate) = 6 then 'Jun-13'
	when MONTH(OrderDate) = 7 then 'Jul-13'
	when MONTH(OrderDate) = 8 then 'Aug-13'
	when MONTH(OrderDate) = 9 then 'Sep-13'
	when MONTH(OrderDate) = 10 then 'Oct-13'
	when MONTH(OrderDate) = 11 then 'Nov-13'
	when MONTH(OrderDate) = 12 then 'Dec-13'
End as 'Month1'
,MONTH(OrderDate) as 'MonthOrder'
	,sum(sod.LineTotal) RevenueByDay
    FROM Sales.SalesOrderHeader soh
    left join Sales.SalesOrderDetail sod on soh.SalesOrderID=sod.SalesOrderID
    left join Production.Product p on sod.ProductID=p.ProductID
    where year(OrderDate)='2013'
    and p.name in ('Mountain-200 Black, 38')
	group by 
	p.Name
	,Case 
	when MONTH(OrderDate) = 1 then 'Jan-13'
	when MONTH(OrderDate) = 2 then 'Feb-13'
	when MONTH(OrderDate) = 3 then 'Mar-13'
	when MONTH(OrderDate) = 4 then 'Apr-13'
	when MONTH(OrderDate) = 5 then 'May-13'
	when MONTH(OrderDate) = 6 then 'Jun-13'
	when MONTH(OrderDate) = 7 then 'Jul-13'
	when MONTH(OrderDate) = 8 then 'Aug-13'
	when MONTH(OrderDate) = 9 then 'Sep-13'
	when MONTH(OrderDate) = 10 then 'Oct-13'
	when MONTH(OrderDate) = 11 then 'Nov-13'
	when MONTH(OrderDate) = 12 then 'Dec-13'
End
,MONTH(OrderDate) 
order by MONTH(OrderDate)
    ")

#created data frame
lndat<-data.frame(qry1)


class(lndat$Month1)

#make month a factor (because it wouldn't work otherwise)
lndat$Month1<-factor(c("Jan-13","Feb-13","Mar-13","Apr-13","May-13","Jun-13","Jul-13"
                         ,"Aug-13","Sep-13","Oct-13","Nov-13","Dec-13")
                       ,levels = c("Jan-13","Feb-13","Mar-13","Apr-13","May-13"
                                  ,"Jun-13","Jul-13","Aug-13","Sep-13","Oct-13","Nov-13","Dec-13"))

#Change column names
colnames(lndat)<-c("Product","Month","MonthOrder","Revenue")

#sumamry
summary(lndat)


class(lndat$MonthOrder)
lndat$MonthOrder<-as.numeric(lndat$MonthOrder)



plot(lndat$MonthOrder,lndat$Revenue)

prodLM<-lm(formula=MonthOrder~Revenue, data = lndat)

abline(lm(MonthOrder~Revenue, data = lndat)) ##DID NOT PRODUCT LINE

#open ggplot & scales library
library(ggplot2)
library(scales)

ggplot(prodLM,aes(MonthOrder,Revenue)) +
  geom_line() +
  geom_smooth(lndat$Revenue ~ lndat$MonthOrder) +
  scale_x_continuous("Month",limits = c(1,12)) 

if you want MonthOrder as x and Revenue as Y , then your lm formula should be reversed. i.e.

prodLM<-lm(formula=Revenue ~ MonthOrder, data = lndat)

to do the abline then would be just

abline(coef=coef(prodLM))

This topic was automatically closed 21 days after the last reply. New replies are no longer allowed.

If you have a query related to it or one of the replies, start a new topic and refer back with a link.