Hi,
I cleaned up you code a bit and found the solution to your question:
library(dplyr)
library(pROC)
library(xlsx)
##librearias necesarias para entrenar
library(nnet)
library(caret)
library(NeuralNetTools)
##configuración de paralelismo
library(doParallel)
options(java.parameters = "-Xmx15g")
xl_data_tmp = read.csv("train.csv", header = TRUE, sep=",", dec=",")
xl_data_tmp$y <- as.factor(xl_data_tmp$y)
xl_data_tmp$x3 <- as.factor(xl_data_tmp$x3)
#Create normalize function
normalize <- function(data){
(data - min(data))/(max(data) - min(data))
}
#normalizar indices numéricos entre 0 y 1 si es necesario
xl_data_tmp = xl_data_tmp %>% mutate_at(c("x1", "x2", "x4", "x5", "x6", "x7", "x8", "x10", "x11", "x12"), normalize)
summary(xl_data_tmp)#mostrar resumen de los datos de entrenamiento
set.seed(1)
sampidx <- c(sample(1:650,519), sample(651:1299,519))
xl_data_tmp<-xl_data_tmp[sampidx,]
print(sampidx)
train_subset<-xl_data_tmp[sampidx,]
test_subset<-xl_data_tmp[-sampidx,]
numCores <- detectCores()#cantidad de cores
cl = makeCluster(numCores)
registerDoParallel(cl)
##parametros
nn.Grid <- expand.grid(.size=c(1,2,3), .decay=c(0.01,0.1,1))
#crear listas para guardar los resultados de las iteraciones de el entrenamiento de nnet,
set.seed(1)
nn.seeds <- vector(mode = "list", length = 11) # number of resamples + 1 for final model
for(i in 1:10) nn.seeds[[i]] <- sample.int(n=1000, 9) # 9 is the # of tuning parameter combinations
nn.seeds[[11]] <- 1 # for the last model
remove(i)
nn.seeds
#configuración de los ciclos de entrenamiento nnet y el retorno en la salida del resultado
nn.Control <- trainControl(method = "repeatedcv", # use N-fold cross validation
number = 5, # the number of folds
repeats = 2,
classProbs = TRUE, summaryFunction = twoClassSummary,
seeds = nn.seeds)
#Fit model
model.nn <- train(y ~ .,
data=train_subset,
method='nnet',
maxit = 500,
linout = FALSE,
trControl = nn.Control,
tuneGrid = nn.Grid,
metric = "ROC",
MaxNWts = 1000000,
importance=TRUE,
na.action=na.exclude,
allowParallel = TRUE)
stopCluster(cl)
remove(cl)
registerDoSEQ()
varImp(model.nn)#importancia de cada variable, en el modelo entrenado
print(model.nn)
plot(model.nn, metric = "ROC")#presentar la relación del ROC, decaimiento y tamaño
remove(nn.Control, nn.Grid, nn.seeds)
garson(model.nn)
olden(model.nn)
plot_data2<-garson(model.nn, bar_plot = FALSE)
plot_data3<-olden(model.nn, bar_plot = FALSE)
print(plot_data2)
plot_data3
#escribir el resultado a un archivo
write.xlsx(plot_data2, file = "garson.xlsx", sheetName = "resultado", append = FALSE)
write.xlsx(plot_data3, file = "olden.xlsx", sheetName = "resultado", append = FALSE)
##predecir usando datos externos
#leer datos para realizar una predicción desde archivo
xl_data_test = read.csv("predecir.csv", header = TRUE, sep=",", dec=",")
#conversión categorÃas
xl_data_test$y <- as.factor(xl_data_test$y)
xl_data_test$x3 <- as.factor(xl_data_test$x3)
#normalizar valores numéricos
xl_data_test = xl_data_test %>% mutate_at(c("x1", "x2", "x4", "x5", "x6", "x7", "x8", "x10", "x11", "x12"), normalize)
xl_data_test1 = xl_data_test
#predecir
preds.nn <- predict.train(model.nn, newdata=xl_data_test1, type="prob") # Neural network
percentages = preds.nn %>% transmute(Hpercent = round(H*100, 2), NHpercent = round(NH*100, 2))
head(percentages)
##escribir resultados de la prediccion a un excel
write.xlsx(cbind(preds.nn,xl_data_test1), file = "salida_prediccion.xlsx", sheetName = "nnet", append = FALSE)
Result:
> head(percentages)
Hpercent NHpercent
1 98.99 1.01
2 7.88 92.12
3 9.06 90.94
4 2.26 97.74
5 99.28 0.72
6 90.04 9.96
All you had to do in was change type="raw" to type="prob" in the predict.train function. You then get two columns with the probability of each output (sum is 1). I rounded it and turned it into percent for a better readability.
Hope this is what you wanted
PJ