Hi,
1)How can I improve my KNN algorithm?
2)How can I identifyt the best input features for my prediction?
3) How can I evaluate my model?
[Airline Passenger Satisfaction | Kaggle]
library(class)
library(dplyr)
#Import data and merging test and train
d.train=read.csv(file.choose(), header=T, sep=",")
d.test=read.csv(file.choose(), header=T, sep=",")
data= merge(d.test, d.train, by=names(d.train), all=T)
summary(data)
data=na.omit(data)
#cleaning data
data=data[,-c(1:2)]
##
row_labels=data[,20]
##
data$satisfaction <- as.factor(data$satisfaction)
data$satisfaction <- as.numeric(data$satisfaction)
data$Gender <- as.factor(data$Gender)
data$Gender <- as.numeric(data$Gender)
str(data)
# Training and Testing for KNN
data[,1:19] <-scale(data[,1:19])
set.seed(1234)
size <-floor(0.8*nrow(data))
train_ind <-sample(seq_len(nrow(data)),size=size)
train_labels<-data[train_ind,20]
test_labels<-row_labels[-train_ind]
data_train <-data[train_ind,1:19]
data_test <-data[-train_ind,1:19]
predictions<-knn(train=data_train, test=data_test,cl=train_labels, k=round(sqrt(nrow(data_train))))
############confusion matrix
tb<-table(predictions,test_labels)
accuracy<-function(x){
sum(diag(x)/sum(rowSums(x)))*100}
accuracy(tb)
res <- cor(data)
round(res, 2)