#### Data Modelling
round(prop.table(table(AVdata$Satisfaction))*100,digit=1)
for(i in 2:ncol(AVdata)){
if(class(AVdata[,i])=="factor"){
AVdata[,i] <- as.integer(AVdata[,i])
}
}
normalize <- function(x){
return ((x-min(x))/(max(x)-min(x)))
}
write.csv(AVdata, file = "AVdata.csv")
AVdata <- read.csv("AVdata.csv",header = T)
AVdata <- AVdata[,-1]
str(AVdata)
#### Let's Convert categorical to integer
## Gender
levels(AVdata$Gender)
AVdata$Gender<-factor(AVdata$Gender,levels=c("Female","Male"),labels=c(0,1))
AVdata$Gender <- as.integer(factor(AVdata$Gender))
## CustomerType
levels(AVdata$CustomerType)
AVdata$CustomerType<-factor(AVdata$CustomerType,levels=c("disloyal Customer","Loyal Customer"),labels=c(0,1),ordered = T)
AVdata$CustomerType <- as.integer(factor(AVdata$CustomerType))
##TypeTravel
levels(AVdata$TypeTravel)
AVdata$TypeTravel<-factor(AVdata$TypeTravel,levels=c("Business travel","Personal Travel"),labels=c(0,1))
AVdata$TypeTravel <- as.integer(factor(AVdata$TypeTravel))
##Class
levels(AVdata$Class)
AVdata$Class<-factor(AVdata$Class,levels=c("Eco","Eco Plus","Business"),labels=c(0,1,2),ordered = T)
AVdata$Class <- as.integer(factor(AVdata$Class))
AVdata_n <- as.data.frame(lapply(AVdata[2:22], normalize))
AVdata_n <- cbind(AVdata_n, Satisfaction=AVdata$Satisfaction)
set.seed(5)
partition <- createDataPartition(y = AVdata_n$Satisfaction, p=0.7, list= F)
AVdata_train <- AVdata_n[partition,]
AVdata_test <- AVdata_n[-partition,]
write.csv(AVdata_train, file = "AVdata_train.csv")
write.csv(AVdata_test, file = "AVdata_test.csv")
#### Logistic regression
AVlog_model <- glm(Satisfaction~., data=AVdata_train, family = "binomial")
summary(AVlog_model)
AVlog_preds <- predict(AVlog_model, AVdata_test[,1:22], type = "response")
head(AVlog_preds)
AVlog_class <- array(c(99))
for (i in 1:length(AVlog_preds)){
if(AVlog_preds[i]>0.5){
AVlog_class[i]<-"satisfied"
}else{
AVlog_class[i]<-"neutral or dissatisfied"
}
}
##Creating a new dataframe containing the actual and predicted values.
AVlog_result <- data.frame(Actual = AVdata_test$Satisfaction, Prediction = AVlog_class)
mr1 <- confusionMatrix(as.factor(AVlog_class),AVdata_test$Satisfaction, positive = "satisfied")
mr1
#### Linear Discriminant Analysis
pairs(AVdata_train[,1:5], main="Predict ", pch=22, bg=c("red", "blue")[unclass(AVdata_train$Satisfaction)])
AVlda_model <- lda(Satisfaction ~ ., AVdata_train)
AVlda_model
## Let's predict the model
AVlda_preds <- predict(AVlda_model, AVdata_test)
mr2 <- confusionMatrix(AVdata_test$Satisfaction, AVlda_preds$class, positive = "satisfied")
mr2