ConfusionMatrix

Hi, I need to do a ConfusionMatrix in Random Forest. This is my code, how can I do it?

library(randomForestExplainer)
library(randomForest)

iris[1,]
unique(iris$Species)
table(iris$Species)

db_class <- iris
plot(db_class, col=as.factor(db_class$Species))

righe_train <- sample(nrow(db_class), nrow(db_class)*0.8)
db_class_dc_train <- db_class[righe_train,]
db_class_dc_test <- db_class[-righe_train,]

plot(db_class [,-ncol(db_class)], col=as.factor(db_class$Species))
model_rf <- randomForest(Species~., 
                         db_class_dc_train,
                         ntree = 10)

varImpPlot(model_rf) 
plot_min_depth_distribution(model_rf)

getTree(model_rf, 10, labelVar = TRUE) 
model_rf$err.rate 
measure_importance(model_rf) 

pred_rf_test <- predict(model_rf, db_class_dc_test, type = "class")
CrossTable(x=db_class_dc_test$Species, y=pred_rf_test, prop.chisq = TRUE)

Printing that object gives you the confusion matrix. If you want more statistics, I suggest the function in caret:

library(randomForestExplainer)
#> Registered S3 method overwritten by 'GGally':
#>   method from   
#>   +.gg   ggplot2
library(randomForest)
#> randomForest 4.6-14
#> Type rfNews() to see new features/changes/bug fixes.

db_class <- iris

set.seed(1)
righe_train <- sample(nrow(db_class), nrow(db_class)*0.8)
db_class_dc_train <- db_class[righe_train,]
db_class_dc_test <- db_class[-righe_train,]

model_rf <- randomForest(Species~., 
                         db_class_dc_train,
                         ntree = 10)

model_rf
#> 
#> Call:
#>  randomForest(formula = Species ~ ., data = db_class_dc_train,      ntree = 10) 
#>                Type of random forest: classification
#>                      Number of trees: 10
#> No. of variables tried at each split: 2
#> 
#>         OOB estimate of  error rate: 4.2%
#> Confusion matrix:
#>            setosa versicolor virginica class.error
#> setosa         39          0         0  0.00000000
#> versicolor      0         36         2  0.05263158
#> virginica       0          3        39  0.07142857

pred_rf_test <- predict(model_rf, db_class_dc_test, type = "class")

caret::confusionMatrix(
   db_class_dc_test$Species,
   pred_rf_test
)
#> Confusion Matrix and Statistics
#> 
#>             Reference
#> Prediction   setosa versicolor virginica
#>   setosa         11          0         0
#>   versicolor      0         12         0
#>   virginica       0          2         5
#> 
#> Overall Statistics
#>                                           
#>                Accuracy : 0.9333          
#>                  95% CI : (0.7793, 0.9918)
#>     No Information Rate : 0.4667          
#>     P-Value [Acc > NIR] : 7.093e-08       
#>                                           
#>                   Kappa : 0.8958          
#>                                           
#>  Mcnemar's Test P-Value : NA              
#> 
#> Statistics by Class:
#> 
#>                      Class: setosa Class: versicolor Class: virginica
#> Sensitivity                 1.0000            0.8571           1.0000
#> Specificity                 1.0000            1.0000           0.9200
#> Pos Pred Value              1.0000            1.0000           0.7143
#> Neg Pred Value              1.0000            0.8889           1.0000
#> Prevalence                  0.3667            0.4667           0.1667
#> Detection Rate              0.3667            0.4000           0.1667
#> Detection Prevalence        0.3667            0.4000           0.2333
#> Balanced Accuracy           1.0000            0.9286           0.9600

Created on 2020-12-08 by the reprex package (v0.3.0)

2 Likes

This topic was automatically closed 7 days after the last reply. New replies are no longer allowed.

If you have a query related to it or one of the replies, start a new topic and refer back with a link.