Issue with randomforest analysis

Hello,

I am struggling to use "Randomforest" analysis in R. Could you please review my code and give me some advice?
My dataset consists of two groups (HTP and OTP) and 36 metabolites.
My concern is that the model is too accurate, and the results seem to be exaggerated. The accuracy of the confusion matrix and kappa value resulted in 1. Additionally, the VarImpPlot shows different results, which is why I used set.seed. As a first-time user of this package, please let me know if you find any mistakes.

Thank you in advance for your help.

Sorry for that "reprex" is not working on my R script.

library(randomForest)
library(caret)
library(ROCR)
library(tidyverse)
library(rfUtilities)
library(reprex)

RF <- tibble::tribble(
~Group, ~Acetate, ~Alanine, ~Aspartate, ~Benzoate, ~Butyrate, ~Carnitine, ~Creatine, ~Dimethyl.sulfone, ~Dimethylamine, ~Erythritol, ~Formate, ~Fumarate, ~Glucarate, ~Glycine, ~Glycylproline, ~Guanidoacetate, ~Isobutyrate, ~Isocitrate, ~Isoleucine, ~Isopropanol, ~Leucine, ~Methanol, ~Methionine, ~Methylsuccinate, ~N-Acetylaspartate, ~N-Methylhydantoin, ~Phenylacetate, ~Phenylalanine, ~Proline, ~Propionate, ~Pyroglutamate, ~Ribose, ~Syringate, ~Urea, ~Valine,
"OTP", 19490.9, 648.2, 266.6, 2.52, 7500.5, 1, 26.2, 1.8, 2.6, 99, 73.4, 42.8, 51, 91.8, 100, 13, 1384.6, 280.2, 255.8, 2.64, 278.8, 48.2, 123.2, 79.4, 22.6, 4.8, 639.8, 223.2, 480.8, 8939.1, 143.8, 952.6, 0.24, 24.88, 227.6,
"OTP", 33816.07, 264.2, 171.6, 2.52, 11174.5, 1, 5.2, 0.12, 2.2, 50.4, 54.4, 12.6, 2.68, 200, 59.6, 5.2, 740.8, 180.8, 109, 2.64, 151.2, 35.8, 52.6, 33.6, 19.6, 4.4, 533.4, 91, 219, 17597.8, 127.4, 104, 2.8, 24.88, 95.4,
"OTP", 22190.11, 609.2, 313.2, 2.52, 7980.7, 0.8, 0.32, 2, 1.4, 85.4, 55.4, 29.2, 2.68, 479.4, 116.6, 12.4, 1400, 124.8, 172.6, 2.64, 167.4, 15.8, 111.8, 40, 17.4, 0.24, 644, 5.12, 278.6, 10879, 24.44, 601, 6.8, 24.88, 340.4,
"OTP", 26250.8, 594, 584.4, 2.52, 10755.6, 8.2, 12.8, 0.12, 1.6, 60.8, 61.8, 17.2, 31.8, 621.6, 95.8, 16.4, 1506.8, 268.2, 272, 2.64, 347.8, 66.2, 152.4, 75, 27.6, 12.2, 835, 284, 622.4, 10728.2, 373, 476, 2.2, 24.88, 96.8,
"OTP", 25245.01, 57.6, 75.2, 2.52, 9995.9, 2.8, 17.4, 0.6, 1.2, 47.6, 50.6, 16.8, 2.68, 187.2, 65, 10.4, 1142, 85.8, 92.8, 16, 137, 33.6, 42.2, 14.8, 6, 1.6, 549.6, 73.4, 205, 12694.6, 24.44, 599.2, 1.6, 24.88, 65.8,
"OTP", 24234.8, 485.4, 220, 2.52, 9250.1, 0.6, 29, 0.6, 0.12, 54, 49.6, 22.8, 29.2, 284.6, 14.2, 93, 1065.4, 97.2, 159.8, 2.64, 196, 30.6, 71.4, 38.6, 3.4, 2.2, 557.2, 80.4, 299.4, 13446.6, 122.2, 289.6, 3.8, 24.88, 221.4,
"OTP", 23107.37, 527.4, 281, 2.52, 9068.6, 1.2, 2.8, 0.6, 6.8, 9.52, 60.8, 18.2, 24, 336.6, 7, 100.8, 1065, 113.2, 111.4, 2.64, 149.8, 35, 4.8, 66, 30.8, 1.2, 439, 104.4, 358.8, 11270.4, 145.6, 636, 5, 24.88, 239.8,
"OTP", 18462.74, 739, 292.8, 2.52, 6214.6, 5.2, 12.2, 1, 0.8, 102.2, 49.4, 19, 13.4, 521.8, 147.8, 39.2, 857.8, 190.6, 240.2, 2.64, 269.6, 33.8, 151.2, 134.2, 61.8, 0.24, 383, 197.6, 542.6, 8173.9, 321.6, 526.4, 13.4, 24.88, 338.2,
"OTP", 29897.34, 403.2, 291, 2.52, 9559.1, 0.8, 17.4, 0.12, 0.12, 49.6, 48, 8.4, 79, 30.2, 1.12, 7, 1020.8, 185.6, 164, 2.64, 215.4, 29.6, 79.8, 2.96, 25.2, 3.8, 504.6, 117.6, 321.8, 14609.3, 24.44, 442.6, 4.6, 24.88, 178,
"OTP", 32100.3, 47, 96.2, 2.52, 8477.4, 0.12, 22.6, 0.8, 3.6, 48.4, 5.56, 28, 97.4, 365.8, 11, 96, 1048.8, 137.4, 150.8, 2.64, 166.8, 3.6, 86.8, 77.6, 0.56, 2.4, 454.8, 149.4, 413, 17041, 202.8, 951, 6.6, 24.88, 197.4,
"HTP", 20776.8, 93.4, 72.6, 18.4, 3435.6, 0.12, 1.6, 7.4, 0.6, 9.52, 30, 12.8, 2.68, 16, 1.12, 37.8, 37.36, 10.08, 30, 28.4, 13.8, 12.8, 16.6, 2.96, 2.8, 0.24, 134.8, 5.12, 81.8, 4846.6, 24.44, 58, 0.24, 24.88, 30.6,
"HTP", 27524.4, 93, 80.2, 23.6, 4499.6, 0.12, 0.32, 0.12, 0.12, 9.52, 27.8, 0.72, 2.68, 62.2, 5.6, 1.8, 186.8, 79.8, 27.6, 28.6, 18, 6.4, 18, 2.96, 0.56, 0.24, 176, 5.12, 95.8, 6567, 24.44, 27.6, 0.24, 147.4, 5,
"HTP", 31638, 54.8, 68, 12.6, 4770, 0.12, 7.4, 7.4, 0.12, 9.52, 5.56, 0.72, 2.68, 61.6, 9.6, 8, 37.36, 10.08, 38, 13.2, 1.8, 13.4, 24.2, 2.96, 0.56, 1.6, 185, 25.6, 7.12, 6720.6, 24.44, 174.2, 0.24, 24.88, 0.92,
"HTP", 37312, 156, 105.4, 2.52, 7100.8, 0.6, 1.8, 12.4, 0.12, 9.52, 5.56, 8, 2.68, 3.6, 1.12, 2.4, 37.36, 10.08, 69, 37.8, 95.2, 16.8, 34.2, 2.96, 12.4, 0.24, 261.6, 59.8, 181, 8013, 24.44, 516, 10.2, 24.88, 0.92,
"HTP", 24217.6, 64.6, 11.36, 63.4, 4677.8, 0.12, 3, 5, 0.12, 9.52, 5.56, 5.6, 2.68, 39.2, 1.12, 2.6, 37.36, 10.08, 15.6, 45.6, 9, 13, 4.6, 2.96, 3.2, 0.24, 230.8, 5.12, 35.6, 5883, 24.44, 179.6, 1.2, 299, 0.92,
"HTP", 33942.4, 76, 56.8, 2.52, 5283, 0.12, 0.32, 2.8, 0.12, 9.52, 5.56, 7.6, 2.68, 44.4, 12.4, 1.6, 37.36, 10.08, 21.8, 31.8, 12.2, 0.72, 11.2, 2.96, 0.56, 0.24, 273.2, 5.12, 71.2, 8354.4, 24.44, 170.8, 0.24, 124.4, 4.6,
"HTP", 34943.2, 118.8, 87.4, 2.52, 6402, 0.12, 2.6, 3.4, 0.12, 9.52, 45.4, 9.8, 2.68, 73.8, 1.12, 0.32, 401.2, 51.4, 45.6, 34.8, 32.2, 9.6, 23.2, 2.96, 0.56, 0.24, 311.2, 5.12, 128.4, 8767.2, 24.44, 259, 0.24, 338.4, 21,
"HTP", 28161, 102, 11.36, 22.6, 4824.6, 0.12, 0.32, 8.8, 0.12, 9.52, 5.56, 6.4, 2.68, 61.4, 1.12, 6.8, 522.6, 10.08, 29.6, 28.8, 16.2, 4.4, 20.8, 2.96, 0.56, 2.2, 282.4, 5.12, 135.4, 4878.6, 24.44, 123, 0.24, 24.88, 11,
"HTP", 41891, 44.6, 11.36, 28.4, 6865.2, 0.12, 1.8, 1, 0.12, 47.8, 32.6, 0.72, 2.68, 47.4, 1.12, 0.32, 37.36, 112.8, 3.12, 21.4, 1.8, 3.8, 4.6, 2.96, 0.56, 0.24, 284.6, 5.12, 55, 10465.6, 24.44, 114, 0.24, 24.88, 11.6,
"HTP", 58430.8, 135.8, 11.36, 27.4, 9009.4, 5.4, 0.32, 2.2, 0.12, 52.2, 5.56, 3.6, 2.68, 54, 15, 7, 37.36, 50.4, 39.6, 39.6, 16.4, 0.72, 17.6, 2.96, 0.56, 0.24, 176.8, 5.12, 77.2, 16718, 24.44, 209.8, 0.24, 391.4, 18.2
)

RF$Group <- as.factor(RF$Group)

set.seed(123) # for reproducibility
trainIndex <- createDataPartition(RF$Group, p = 0.7, list = FALSE)
RF_train <- RF[trainIndex,]
RF_test <- RF[-trainIndex,]

ctrl <- trainControl(method = "cv", number = 99, savePredictions = TRUE)

set.seed(123)
RF_m_cv <- train(Group ~ ., data = RF_train, method = "rf",
trControl = ctrl, tuneLength = 10, ntree = 500)

set.seed(123)
RF_m <- randomForest(Group ~ ., data = RF_train,
ntree = 500, mtry = RF_m_cv$bestTune$mtry, importance = TRUE)

RF_m

pred <- predict(RF_m, RF_test)

cm <- confusionMatrix(pred, RF_test$Group)
cm

varImpPlot(RF_m)

This topic was automatically closed 42 days after the last reply. New replies are no longer allowed.

If you have a query related to it or one of the replies, start a new topic and refer back with a link.