Hello,
I am building a Logistic Regression Model using glmnet() package:
> # Prep Training and Test data.
> trainDataIndex <- sample(1:nrow(df), 0.7*nrow(df)) # 70% training data
> trainData <- df[trainDataIndex, ]
> testData <- df[-trainDataIndex, ]
> set.seed(100)
> trainData <-
+ trainData %>%
+ dplyr::mutate(CUST_REGION_DESCR =
+ forcats::fct_relabel(CUST_REGION_DESCR, ~ trimws(.x)))
> testData <-
+ testData %>%
+ dplyr::mutate(CUST_REGION_DESCR =
+ forcats::fct_relabel(CUST_REGION_DESCR, ~ trimws(.x)))
> str(trainData)
'data.frame': 693843 obs. of 4 variables:
$ cust_prog_level : Factor w/ 14 levels "B","C","D","E",..: 9 7 10 9 10 9 10 5 10 5 ...
$ CUST_REGION_DESCR: Factor w/ 8 levels "CORPORATE REGION",..: 2 6 7 6 8 8 4 7 7 6 ...
$ Sales : num 92.7 2356 39 239.6 26 ...
$ New_Product_Type : Factor w/ 2 levels "0","1": 1 1 1 1 2 1 1 1 1 1 ...
> str(testData)
'data.frame': 297362 obs. of 4 variables:
$ cust_prog_level : Factor w/ 14 levels "B","C","D","E",..: 9 5 9 9 9 9 3 3 5 3 ...
$ CUST_REGION_DESCR: Factor w/ 8 levels "CORPORATE REGION",..: 3 3 6 6 7 6 7 2 2 4 ...
$ Sales : num 150.2 68.5 68.1 72.1 60.1 ...
$ New_Product_Type : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
> x = model.matrix(New_Product_Type ~.,data=trainData)
> cvfit = cv.glmnet(x, y=as.factor(trainData$New_Product_Type), alpha=1, family="binomial",type.measure = "mse")
> lambda_1se <- cvfit$lambda.1se
> coef(cvfit,s=lambda_1se)
23 x 1 sparse Matrix of class "dgCMatrix"
1
(Intercept) 0.02946581
(Intercept) .
cust_prog_levelC 0.14012975
cust_prog_levelD .
cust_prog_levelE 0.13339906
cust_prog_levelG -0.05325043
cust_prog_levelI 0.21440592
cust_prog_levelL 0.26273503
cust_prog_levelM .
cust_prog_levelN 0.26620261
cust_prog_levelP -0.05166799
cust_prog_levelR -0.33054803
cust_prog_levelS .
cust_prog_levelX 0.57508875
cust_prog_levelZ 1.20748454
CUST_REGION_DESCRMOUNTAIN WEST REGION -0.20993854
CUST_REGION_DESCRNORTH CENTRAL REGION -0.04035331
CUST_REGION_DESCRNORTH EAST REGION 0.01082858
CUST_REGION_DESCROHIO VALLEY REGION 0.03077584
CUST_REGION_DESCRSOUTH CENTRAL REGION .
CUST_REGION_DESCRSOUTH EAST REGION 0.10606213
CUST_REGION_DESCRWESTERN REGION -0.17587036
Sales -0.01223843
> #get test data
> x_test <- model.matrix(New_Product_Type~.,data = testData)
> #predict New_Product_Type, type=”New_Product_Type”
> lasso_prob <- predict(cvfit,newx = x_test,s=lambda_1se,type="response")
> #translate probabilities to predictions
> lasso_predict <- rep("neg",nrow(testData))
> lasso_predict[lasso_prob>.5] <- "pos"
> #confusion matrix
> table(pred=lasso_predict,true=testData$New_Product_Type)
true
pred 0 1
neg 207840 60865
pos 8697 19960
> #accuracy
> lasso_predict[lasso_prob>.8] <- "pos"
> #confusion matrix
> table(pred=lasso_predict,true=testData$New_Product_Type)
true
pred 0 1
neg 207840 60865
pos 8697 19960
When I test the accuracy, the return value is 0
> #accuracy
> mean(lasso_predict==testData$New_Product_Type)
[1] 0
So does it mean my model have ZERO accuracy?