Hi technocrat, thank you so much for your reply!
I'm sorry for forgetting to upload a reproducible example with my question. (I tried to use the reprex package, but I didn't get quite the outcome that I saw in screenshots. I'm sorry, I'm quite a newbie in R)
(Additional information about my dataset: I have 44 observations (rows), 4 Features and 1 column for my labels. With this formation, I'm able to get a constant Accuracy of 80%. And while I would be happy if that's really the case, I'm not quite sure if that's legit. I expected some decreases/increases during multiple iterations around 5% or something at least. That's why I'm quite sceptical if my seed setup is ok. But the example from help(trainControl) is kind of similar.....)
yourdata_neu <- data.frame(df_test)
#> Error in data.frame(df_test): Objekt 'df_test' nicht gefunden
rownames(yourdata_neu) <- NULL
#> Error in rownames(yourdata_neu) <- NULL: Objekt 'yourdata_neu' nicht gefunden
set.seed(123)
###############################Random Forest Round 1
#Training/Test-Split und Model trainieren
for (t in 1:5) {
set.seed(123)
for(i in 1:50){
seeds[[i]] <- sample.int(1000, 12)}
#For the last model:
seeds[[50]] <- sample.int(1000, 1)
yourdata_neu$Depressiv <- as.factor(yourdata_neu$Depressiv)
inTraining <- createDataPartition(yourdata_neu$Depressiv[1:nrow(yourdata_neu)], p = 0.75, list = FALSE) #75% der Probanden in Training, 25 in Test
training <- yourdata_neu[inTraining,]
testing <- yourdata_neu[-inTraining,]
#Crossvalidation werden K-Vali mit den 10mal wiederholen
train_control <- trainControl(method="cv", number=10, verboseIter = TRUE, seeds = seeds, search = "grid")
model <- train(training[,1:ncol(yourdata_neu)-1],as.factor(training[,ncol(yourdata_neu)]), method = "rf", type="classification", metric= "Accuracy", maximize= TRUE, trControl = train_control, importance = TRUE)
model1 <- randomForest(training[,1:ncol(yourdata_neu)-1],as.factor(training[,ncol(yourdata_neu)]), type="classification", importance = TRUE, proximity = TRUE) #Macht im Prinzip dasselbe, nur Randomforest
prediction1 <- predict(model1, testing[,1:ncol(yourdata_neu)-1])
prediction2 <- predict(model, testing[,1:ncol(yourdata_neu)-1])
print(confusionMatrix(prediction2, as.factor(testing[,ncol(yourdata_neu)]), positive = "1"))
importance <- importance(model1)
varImportance <- data.frame(Variables = row.names(importance),
Importance = round(importance[ ,'MeanDecreaseGini'],2))
#Create a rank variable based on importance
rankImportance <- varImportance %>%
mutate(Rank = paste0(dense_rank(desc(Importance))))
if(min(rankImportance$Importance) < 1.0){
RankImportance_Filter <- rankImportance[rankImportance$Importance == min(rankImportance$Importance),]
Importance_Table_Filter <- RankImportance_Filter$Variables
Excluding_Channels <- names(yourdata_neu) %in% Importance_Table_Filter
yourdata_neu <- yourdata_neu[!Excluding_Channels]
}
}
#> Error in eval(expr, envir, enclos): Objekt 'seeds' nicht gefunden