Hi
In your dataset target variable (pass) is a binary variable, hence we need to follow Logistic Regression. Using glm function we can build multiple models, make predictions, and confusion matrix. The dataset I used is from the link that you have provided above. Just copy these codes in your rstudio and run and see. hope useful.
For more detailed analysis, you may also refer to my channel "Happy Learning-GP" for the video "Shiny App Logistic Regression multiple model".
Here is the code :
library(dplyr)
library(caret) # logistic regression related package,
#Reading the data set as a data frame
df<-read.csv(file.choose(),header = T)
data <- df[,c('pass','sex','studytime','internet','freetime','goout','absences','G1','G2','G3')]
#function to get confusion matrix and statistics
fnsummarystat <- function(model,testdata){
pred1 <- predict(model, newdata = testdata, type = "response")
y_pred1 <- as.numeric(ifelse( pred1 > 0.5, 1, 0))
#Creates vectors having data points
y_pred1 <- factor( y_pred1, levels = c(0, 1))
y_act1 <- testdata[,'pass']
results <- caret::confusionMatrix(reference = y_act1, data = y_pred1)
print(results$table)
}
delete NA rows
data <- na.omit(data)
#Declare Dependent Variable and convert as factor
mdependvar <- 'pass'
data <-data %>% dplyr::select(-mdependvar, everything())
data$pass <- factor(ifelse(data$pass == 1, 1, 0), levels = c(0, 1))
#Partition data into Training and testdata datasets
set.seed(1234)
pd <- sample(2,nrow(data),replace = TRUE, prob = c(0.8,0.2))
traindata <- data[pd==1,]
testdata <- data[pd==2,]
#Building Multiple models
fullmodel <- glm(pass~., family="binomial", data=traindata)
model1 <- glm(pass~ sex, data = traindata, family = "binomial")
model2 <- glm(pass~ studytime, data = traindata, family = "binomial")
model3 <- glm(pass~ internet, data = traindata, family = "binomial")
model4 <- glm(pass~ freetime, data = traindata, family = "binomial")
model5 <- glm(pass~ goout, data = traindata, family = "binomial")
model6 <- glm(pass~ absences, data = traindata, family = "binomial")
model7 <- glm(pass~ G1, data = traindata, family = "binomial")
model8 <- glm(pass~ G2, data = traindata, family = "binomial")
model9 <- glm(pass~ G3, data = traindata, family = "binomial")
#confusion matrix and statistics
fnsummarystat(fullmodel,testdata)
fnsummarystat(model1,testdata)
fnsummarystat(model2,testdata)
fnsummarystat(model3,testdata)
fnsummarystat(model4,testdata)
fnsummarystat(model5,testdata)
fnsummarystat(model6,testdata)
fnsummarystat(model7,testdata)
fnsummarystat(model8,testdata)
fnsummarystat(model9,testdata)