Hi. I am trying to understand how to use gradient boosting. Here is a small dataset and an admittedly poor-fitting logistic regression model, with auc on testing data equal zero. Then an untuned gradient boosting model produces an auc of .417. Now .417 on its own is pretty poor, but compared to zero, I think the improvement is huge. Does such a large improvement make sense? Is gradient boosting that good? Please let me know if my code is incorrect. Thank you.

library(ROCR)

library(mlr)

library(xgboost)

data("UCBAdmissions")

df <- data.frame(UCBAdmissions)

df$Admit <- as.factor(ifelse(df$Admit == "Admitted", 0, 1))

set.seed(1234)

samp.size = floor(0.80*nrow(df))

train_ind = sample(seq_len(nrow(df)), size = samp.size)

train = df[train_ind,]

test = df[-train_ind,]

# 1 poor fitting logistic regression

model <- glm(Admit ~ Gender + Dept + Freq, data = train, family = "binomial")

pred <- predict(model, test)

ROCRpred <- prediction(pred, test$Admit)

ROCRperf <- ROCR::performance(ROCRpred, "tpr", "fpr")

auc <- ROCR::performance(ROCRpred, measure="auc")

auc <- auc@y.values[[1]]

paste0("Logistic auc: ", round(auc,3))

df <- createDummyFeatures(df, target = "Admit",

cols = c("Gender","Dept"))

set.seed(1234)

samp.size = floor(0.80*nrow(df))

train_ind = sample(seq_len(nrow(df)), size = samp.size)

train = df[train_ind,]

test = df[-train_ind,]

trainTask <- makeClassifTask(data = train, target = "Admit", positive = 1)

testTask <- makeClassifTask(data = test, target = "Admit", positive = 1)

set.seed(1)

xgb_learner <- makeLearner(

"classif.xgboost",

predict.type = "prob",

par.vals = list(

objective = "binary:logistic",

eval_metric = "auc",

nrounds = 200

)

)

# 2 gradient boosting model

xgb_model <- train(xgb_learner, task = trainTask)

result <- predict(xgb_model, testTask)

# head(result$data) # contains predictions

ROCRpred <- prediction(result$data[,3], result$data[,2])

ROCRperf <- ROCR::performance(ROCRpred, "tpr", "fpr")

auc <- ROCR::performance(ROCRpred, measure="auc")

auc <- auc@y.values[[1]]

paste0("XGB auc: ", round(auc,3))