Error in tidymodels tune_grid function call

Hi, I'm trying to replicate this tutorial on my computer - https://www.business-science.io/code-tools/2020/01/21/hyperparamater-tune-product-price-prediction.html

My R version is 3.6 and I have the latest versions of all the tidymodels packages used in it like rsample, tune, dials, parsnip, recipes, yardstick and textrecipes

I grabbed the data from this link and ran the below code as per the tutorial

library(tune)
library(dials)
library(parsnip)
library(rsample)
library(recipes)
library(yardstick)
library(textrecipes)
library(janitor)
library(tidyverse)

## Read file
car_prices_tbl <- read_csv("data.csv") %>%
  clean_names() %>%
  select(msrp, everything())

## Split data
set.seed(123)
car_initial_split <- initial_split(car_prices_tbl, prop = 0.80)
car_initial_split

## Preprocessing Step
preprocessing_recipe <- recipe(msrp ~ ., data = training(car_initial_split)) %>%
  
  # Encode Categorical Data Types
  step_string2factor(all_nominal()) %>%
  step_mutate(
    engine_cylinders = as.factor(engine_cylinders),
    number_of_doors  = as.factor(number_of_doors)
  ) %>%
  
  # Feature Engineering - Market Category
  step_mutate(market_category = market_category %>% str_replace_all("_", "")) %>%
  step_tokenize(market_category) %>%
  step_tokenfilter(market_category, min_times = 0.05, max_times = 1, percentage = TRUE) %>%
  step_tf(market_category, weight_scheme = "binary") %>%
  
  # Combine low-frequency categories
  step_other(all_nominal(), threshold = 0.02, other = "other") %>%
  
  # Impute missing
  step_knnimpute(engine_fuel_type, engine_hp, engine_cylinders, number_of_doors,
                 neighbors = 5) %>%
  
  # Remove unnecessary columns
  step_rm(model) %>%
  prep()

car_training_preprocessed_tbl <- preprocessing_recipe %>% bake(training(car_initial_split))

## Cross Validation
set.seed(123)
car_cv_folds <- training(car_initial_split) %>% 
  bake(preprocessing_recipe, new_data = .) %>%
  vfold_cv(v = 5)

## GLM model
glmnet_model <- linear_reg(
  mode    = "regression", 
  penalty = tune(), 
  mixture = tune()
) %>%
  set_engine("glmnet")

## Params Set
glmnet_params <- parameters(penalty(), mixture())
glmnet_params

set.seed(123)
glmnet_grid <- grid_max_entropy(glmnet_params, size = 20)
glmnet_grid

## Tune Model
glmnet_stage_1_cv_results_tbl <- tune_grid(
  formula   = msrp ~ .,
  model     = glmnet_model,
  resamples = car_cv_folds,
  grid      = glmnet_grid,
  metrics   = metric_set(mae, mape, rmse, rsq),
  control   = control_grid(verbose = TRUE)
)

However at every iteration of tuning the model I receive the following error

Fold5: model 17/20: Error: Result 1 must be a single string, not NULL of length 0

I tried building an individual model with glmnet, and xgboost (the other half of the tutorial) through the entire feature set and they run without errors. So it has to be something with how the tune_grid function interacts with the CV object from rsample but I'm unable to find out why as it is my first time playing around with this entire framework.

Can anyone please help?

Thanks!

Hi!

Try cutting and pasting your reprex. I made only the change in giving the full pathname to the read_csv function and got

library(tune)
library(dials)
#> Loading required package: scales
library(parsnip)
library(rsample)
#> Loading required package: tidyr
library(recipes)
#> Loading required package: dplyr
#> 
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#> 
#>     filter, lag
#> The following objects are masked from 'package:base':
#> 
#>     intersect, setdiff, setequal, union
#> 
#> Attaching package: 'recipes'
#> The following object is masked from 'package:scales':
#> 
#>     yj_trans
#> The following object is masked from 'package:stats':
#> 
#>     step
library(yardstick)
#> For binary classification, the first factor level is assumed to be the event.
#> Set the global option `yardstick.event_first` to `FALSE` to change this.
library(textrecipes)
library(janitor)
#> 
#> Attaching package: 'janitor'
#> The following objects are masked from 'package:stats':
#> 
#>     chisq.test, fisher.test
library(tidyverse)

## Read file
car_prices_tbl <- read_csv("~/projects/demo/carsdata.csv") %>%
  clean_names() %>%
  select(msrp, everything())
#> Parsed with column specification:
#> cols(
#>   Make = col_character(),
#>   Model = col_character(),
#>   Year = col_double(),
#>   `Engine Fuel Type` = col_character(),
#>   `Engine HP` = col_double(),
#>   `Engine Cylinders` = col_double(),
#>   `Transmission Type` = col_character(),
#>   Driven_Wheels = col_character(),
#>   `Number of Doors` = col_double(),
#>   `Market Category` = col_character(),
#>   `Vehicle Size` = col_character(),
#>   `Vehicle Style` = col_character(),
#>   `highway MPG` = col_double(),
#>   `city mpg` = col_double(),
#>   Popularity = col_double(),
#>   MSRP = col_double()
#> )

## Split data
set.seed(123)
car_initial_split <- initial_split(car_prices_tbl, prop = 0.80)
car_initial_split
#> <9532/2382/11914>

## Preprocessing Step
preprocessing_recipe <- recipe(msrp ~ ., data = training(car_initial_split)) %>%
  
  # Encode Categorical Data Types
  step_string2factor(all_nominal()) %>%
  step_mutate(
    engine_cylinders = as.factor(engine_cylinders),
    number_of_doors  = as.factor(number_of_doors)
  ) %>%
  
  # Feature Engineering - Market Category
  step_mutate(market_category = market_category %>% str_replace_all("_", "")) %>%
  step_tokenize(market_category) %>%
  step_tokenfilter(market_category, min_times = 0.05, max_times = 1, percentage = TRUE) %>%
  step_tf(market_category, weight_scheme = "binary") %>%
  
  # Combine low-frequency categories
  step_other(all_nominal(), threshold = 0.02, other = "other") %>%
  
  # Impute missing
  step_knnimpute(engine_fuel_type, engine_hp, engine_cylinders, number_of_doors,
                 neighbors = 5) %>%
  
  # Remove unnecessary columns
  step_rm(model) %>%
  prep()
#> Warning in tokenfilter_fun(training[, col_names[i], drop = TRUE], x$max_times, :
#> max_features was set to '100', but only 7 was available and selected.

car_training_preprocessed_tbl <- preprocessing_recipe %>% bake(training(car_initial_split))

## Cross Validation
set.seed(123)
car_cv_folds <- training(car_initial_split) %>% 
  bake(preprocessing_recipe, new_data = .) %>%
  vfold_cv(v = 5)

## GLM model
glmnet_model <- linear_reg(
  mode    = "regression", 
  penalty = tune(), 
  mixture = tune()
) %>%
  set_engine("glmnet")

## Params Set
glmnet_params <- parameters(penalty(), mixture())
glmnet_params
#> Collection of 2 parameters for tuning
#> 
#>       id parameter type object class
#>  penalty        penalty    nparam[+]
#>  mixture        mixture    nparam[+]

set.seed(123)
glmnet_grid <- grid_max_entropy(glmnet_params, size = 20)
glmnet_grid
#> # A tibble: 20 x 2
#>     penalty mixture
#>       <dbl>   <dbl>
#>  1 2.94e- 1  0.702 
#>  2 1.48e- 4  0.996 
#>  3 1.60e- 1  0.444 
#>  4 5.86e- 1  0.975 
#>  5 1.69e- 9  0.0491
#>  6 1.10e- 5  0.699 
#>  7 2.76e- 2  0.988 
#>  8 4.95e- 8  0.753 
#>  9 1.07e- 5  0.382 
#> 10 7.87e- 8  0.331 
#> 11 4.07e- 1  0.180 
#> 12 1.70e- 3  0.590 
#> 13 2.52e-10  0.382 
#> 14 2.47e-10  0.666 
#> 15 2.31e- 9  0.921 
#> 16 1.31e- 7  0.546 
#> 17 1.49e- 6  0.973 
#> 18 1.28e- 3  0.0224
#> 19 7.49e- 7  0.0747
#> 20 2.37e- 3  0.351

## Tune Model
glmnet_stage_1_cv_results_tbl <- tune_grid(
  formula   = msrp ~ .,
  model     = glmnet_model,
  resamples = car_cv_folds,
  grid      = glmnet_grid,
  metrics   = metric_set(mae, mape, rmse, rsq),
  control   = control_grid(verbose = TRUE)
)
#> i Fold1: formula
#> ✓ Fold1: formula
#> i Fold1: model  1/20
#> ✓ Fold1: model  1/20
#> i Fold1: model  1/20
#> ✓ Fold1: model  1/20
#> i Fold1: model  2/20
#> ✓ Fold1: model  2/20
#> i Fold1: model  2/20
#> ✓ Fold1: model  2/20
#> i Fold1: model  3/20
#> ✓ Fold1: model  3/20
#> i Fold1: model  3/20
#> ✓ Fold1: model  3/20
#> i Fold1: model  4/20
#> ✓ Fold1: model  4/20
#> i Fold1: model  4/20
#> ✓ Fold1: model  4/20
#> i Fold1: model  5/20
#> ✓ Fold1: model  5/20
#> i Fold1: model  5/20
#> ✓ Fold1: model  5/20
#> i Fold1: model  6/20
#> ✓ Fold1: model  6/20
#> i Fold1: model  6/20
#> ✓ Fold1: model  6/20
#> i Fold1: model  7/20
#> ✓ Fold1: model  7/20
#> i Fold1: model  7/20
#> ✓ Fold1: model  7/20
#> i Fold1: model  8/20
#> ✓ Fold1: model  8/20
#> i Fold1: model  8/20
#> ✓ Fold1: model  8/20
#> i Fold1: model  9/20
#> ✓ Fold1: model  9/20
#> i Fold1: model  9/20
#> ✓ Fold1: model  9/20
#> i Fold1: model 10/20
#> ✓ Fold1: model 10/20
#> i Fold1: model 10/20
#> ✓ Fold1: model 10/20
#> i Fold1: model 11/20
#> ✓ Fold1: model 11/20
#> i Fold1: model 11/20
#> ✓ Fold1: model 11/20
#> i Fold1: model 12/20
#> ✓ Fold1: model 12/20
#> i Fold1: model 12/20
#> ✓ Fold1: model 12/20
#> i Fold1: model 13/20
#> ✓ Fold1: model 13/20
#> i Fold1: model 13/20
#> ✓ Fold1: model 13/20
#> i Fold1: model 14/20
#> ✓ Fold1: model 14/20
#> i Fold1: model 14/20
#> ✓ Fold1: model 14/20
#> i Fold1: model 15/20
#> ✓ Fold1: model 15/20
#> i Fold1: model 15/20
#> ✓ Fold1: model 15/20
#> i Fold1: model 16/20
#> ✓ Fold1: model 16/20
#> i Fold1: model 16/20
#> ✓ Fold1: model 16/20
#> i Fold1: model 17/20
#> ✓ Fold1: model 17/20
#> i Fold1: model 17/20
#> ✓ Fold1: model 17/20
#> i Fold1: model 18/20
#> ✓ Fold1: model 18/20
#> i Fold1: model 18/20
#> ✓ Fold1: model 18/20
#> i Fold1: model 19/20
#> ✓ Fold1: model 19/20
#> i Fold1: model 19/20
#> ✓ Fold1: model 19/20
#> i Fold1: model 20/20
#> ✓ Fold1: model 20/20
#> i Fold1: model 20/20
#> ✓ Fold1: model 20/20
#> i Fold2: formula
#> ✓ Fold2: formula
#> i Fold2: model  1/20
#> ✓ Fold2: model  1/20
#> i Fold2: model  1/20
#> ✓ Fold2: model  1/20
#> i Fold2: model  2/20
#> ✓ Fold2: model  2/20
#> i Fold2: model  2/20
#> ✓ Fold2: model  2/20
#> i Fold2: model  3/20
#> ✓ Fold2: model  3/20
#> i Fold2: model  3/20
#> ✓ Fold2: model  3/20
#> i Fold2: model  4/20
#> ✓ Fold2: model  4/20
#> i Fold2: model  4/20
#> ✓ Fold2: model  4/20
#> i Fold2: model  5/20
#> ✓ Fold2: model  5/20
#> i Fold2: model  5/20
#> ✓ Fold2: model  5/20
#> i Fold2: model  6/20
#> ✓ Fold2: model  6/20
#> i Fold2: model  6/20
#> ✓ Fold2: model  6/20
#> i Fold2: model  7/20
#> ✓ Fold2: model  7/20
#> i Fold2: model  7/20
#> ✓ Fold2: model  7/20
#> i Fold2: model  8/20
#> ✓ Fold2: model  8/20
#> i Fold2: model  8/20
#> ✓ Fold2: model  8/20
#> i Fold2: model  9/20
#> ✓ Fold2: model  9/20
#> i Fold2: model  9/20
#> ✓ Fold2: model  9/20
#> i Fold2: model 10/20
#> ✓ Fold2: model 10/20
#> i Fold2: model 10/20
#> ✓ Fold2: model 10/20
#> i Fold2: model 11/20
#> ✓ Fold2: model 11/20
#> i Fold2: model 11/20
#> ✓ Fold2: model 11/20
#> i Fold2: model 12/20
#> ✓ Fold2: model 12/20
#> i Fold2: model 12/20
#> ✓ Fold2: model 12/20
#> i Fold2: model 13/20
#> ✓ Fold2: model 13/20
#> i Fold2: model 13/20
#> ✓ Fold2: model 13/20
#> i Fold2: model 14/20
#> ✓ Fold2: model 14/20
#> i Fold2: model 14/20
#> ✓ Fold2: model 14/20
#> i Fold2: model 15/20
#> ✓ Fold2: model 15/20
#> i Fold2: model 15/20
#> ✓ Fold2: model 15/20
#> i Fold2: model 16/20
#> ✓ Fold2: model 16/20
#> i Fold2: model 16/20
#> ✓ Fold2: model 16/20
#> i Fold2: model 17/20
#> ✓ Fold2: model 17/20
#> i Fold2: model 17/20
#> ✓ Fold2: model 17/20
#> i Fold2: model 18/20
#> ✓ Fold2: model 18/20
#> i Fold2: model 18/20
#> ✓ Fold2: model 18/20
#> i Fold2: model 19/20
#> ✓ Fold2: model 19/20
#> i Fold2: model 19/20
#> ✓ Fold2: model 19/20
#> i Fold2: model 20/20
#> ✓ Fold2: model 20/20
#> i Fold2: model 20/20
#> ✓ Fold2: model 20/20
#> i Fold3: formula
#> ✓ Fold3: formula
#> i Fold3: model  1/20
#> ✓ Fold3: model  1/20
#> i Fold3: model  1/20
#> ✓ Fold3: model  1/20
#> i Fold3: model  2/20
#> ✓ Fold3: model  2/20
#> i Fold3: model  2/20
#> ✓ Fold3: model  2/20
#> i Fold3: model  3/20
#> ✓ Fold3: model  3/20
#> i Fold3: model  3/20
#> ✓ Fold3: model  3/20
#> i Fold3: model  4/20
#> ✓ Fold3: model  4/20
#> i Fold3: model  4/20
#> ✓ Fold3: model  4/20
#> i Fold3: model  5/20
#> ✓ Fold3: model  5/20
#> i Fold3: model  5/20
#> ✓ Fold3: model  5/20
#> i Fold3: model  6/20
#> ✓ Fold3: model  6/20
#> i Fold3: model  6/20
#> ✓ Fold3: model  6/20
#> i Fold3: model  7/20
#> ✓ Fold3: model  7/20
#> i Fold3: model  7/20
#> ✓ Fold3: model  7/20
#> i Fold3: model  8/20
#> ✓ Fold3: model  8/20
#> i Fold3: model  8/20
#> ✓ Fold3: model  8/20
#> i Fold3: model  9/20
#> ✓ Fold3: model  9/20
#> i Fold3: model  9/20
#> ✓ Fold3: model  9/20
#> i Fold3: model 10/20
#> ✓ Fold3: model 10/20
#> i Fold3: model 10/20
#> ✓ Fold3: model 10/20
#> i Fold3: model 11/20
#> ✓ Fold3: model 11/20
#> i Fold3: model 11/20
#> ✓ Fold3: model 11/20
#> i Fold3: model 12/20
#> ✓ Fold3: model 12/20
#> i Fold3: model 12/20
#> ✓ Fold3: model 12/20
#> i Fold3: model 13/20
#> ✓ Fold3: model 13/20
#> i Fold3: model 13/20
#> ✓ Fold3: model 13/20
#> i Fold3: model 14/20
#> ✓ Fold3: model 14/20
#> i Fold3: model 14/20
#> ✓ Fold3: model 14/20
#> i Fold3: model 15/20
#> ✓ Fold3: model 15/20
#> i Fold3: model 15/20
#> ✓ Fold3: model 15/20
#> i Fold3: model 16/20
#> ✓ Fold3: model 16/20
#> i Fold3: model 16/20
#> ✓ Fold3: model 16/20
#> i Fold3: model 17/20
#> ✓ Fold3: model 17/20
#> i Fold3: model 17/20
#> ✓ Fold3: model 17/20
#> i Fold3: model 18/20
#> ✓ Fold3: model 18/20
#> i Fold3: model 18/20
#> ✓ Fold3: model 18/20
#> i Fold3: model 19/20
#> ✓ Fold3: model 19/20
#> i Fold3: model 19/20
#> ✓ Fold3: model 19/20
#> i Fold3: model 20/20
#> ✓ Fold3: model 20/20
#> i Fold3: model 20/20
#> ✓ Fold3: model 20/20
#> i Fold4: formula
#> ✓ Fold4: formula
#> i Fold4: model  1/20
#> ✓ Fold4: model  1/20
#> i Fold4: model  1/20
#> ✓ Fold4: model  1/20
#> i Fold4: model  2/20
#> ✓ Fold4: model  2/20
#> i Fold4: model  2/20
#> ✓ Fold4: model  2/20
#> i Fold4: model  3/20
#> ✓ Fold4: model  3/20
#> i Fold4: model  3/20
#> ✓ Fold4: model  3/20
#> i Fold4: model  4/20
#> ✓ Fold4: model  4/20
#> i Fold4: model  4/20
#> ✓ Fold4: model  4/20
#> i Fold4: model  5/20
#> ✓ Fold4: model  5/20
#> i Fold4: model  5/20
#> ✓ Fold4: model  5/20
#> i Fold4: model  6/20
#> ✓ Fold4: model  6/20
#> i Fold4: model  6/20
#> ✓ Fold4: model  6/20
#> i Fold4: model  7/20
#> ✓ Fold4: model  7/20
#> i Fold4: model  7/20
#> ✓ Fold4: model  7/20
#> i Fold4: model  8/20
#> ✓ Fold4: model  8/20
#> i Fold4: model  8/20
#> ✓ Fold4: model  8/20
#> i Fold4: model  9/20
#> ✓ Fold4: model  9/20
#> i Fold4: model  9/20
#> ✓ Fold4: model  9/20
#> i Fold4: model 10/20
#> ✓ Fold4: model 10/20
#> i Fold4: model 10/20
#> ✓ Fold4: model 10/20
#> i Fold4: model 11/20
#> ✓ Fold4: model 11/20
#> i Fold4: model 11/20
#> ✓ Fold4: model 11/20
#> i Fold4: model 12/20
#> ✓ Fold4: model 12/20
#> i Fold4: model 12/20
#> ✓ Fold4: model 12/20
#> i Fold4: model 13/20
#> ✓ Fold4: model 13/20
#> i Fold4: model 13/20
#> ✓ Fold4: model 13/20
#> i Fold4: model 14/20
#> ✓ Fold4: model 14/20
#> i Fold4: model 14/20
#> ✓ Fold4: model 14/20
#> i Fold4: model 15/20
#> ✓ Fold4: model 15/20
#> i Fold4: model 15/20
#> ✓ Fold4: model 15/20
#> i Fold4: model 16/20
#> ✓ Fold4: model 16/20
#> i Fold4: model 16/20
#> ✓ Fold4: model 16/20
#> i Fold4: model 17/20
#> ✓ Fold4: model 17/20
#> i Fold4: model 17/20
#> ✓ Fold4: model 17/20
#> i Fold4: model 18/20
#> ✓ Fold4: model 18/20
#> i Fold4: model 18/20
#> ✓ Fold4: model 18/20
#> i Fold4: model 19/20
#> ✓ Fold4: model 19/20
#> i Fold4: model 19/20
#> ✓ Fold4: model 19/20
#> i Fold4: model 20/20
#> ✓ Fold4: model 20/20
#> i Fold4: model 20/20
#> ✓ Fold4: model 20/20
#> i Fold5: formula
#> ✓ Fold5: formula
#> i Fold5: model  1/20
#> ✓ Fold5: model  1/20
#> i Fold5: model  1/20
#> ✓ Fold5: model  1/20
#> i Fold5: model  2/20
#> ✓ Fold5: model  2/20
#> i Fold5: model  2/20
#> ✓ Fold5: model  2/20
#> i Fold5: model  3/20
#> ✓ Fold5: model  3/20
#> i Fold5: model  3/20
#> ✓ Fold5: model  3/20
#> i Fold5: model  4/20
#> ✓ Fold5: model  4/20
#> i Fold5: model  4/20
#> ✓ Fold5: model  4/20
#> i Fold5: model  5/20
#> ✓ Fold5: model  5/20
#> i Fold5: model  5/20
#> ✓ Fold5: model  5/20
#> i Fold5: model  6/20
#> ✓ Fold5: model  6/20
#> i Fold5: model  6/20
#> ✓ Fold5: model  6/20
#> i Fold5: model  7/20
#> ✓ Fold5: model  7/20
#> i Fold5: model  7/20
#> ✓ Fold5: model  7/20
#> i Fold5: model  8/20
#> ✓ Fold5: model  8/20
#> i Fold5: model  8/20
#> ✓ Fold5: model  8/20
#> i Fold5: model  9/20
#> ✓ Fold5: model  9/20
#> i Fold5: model  9/20
#> ✓ Fold5: model  9/20
#> i Fold5: model 10/20
#> ✓ Fold5: model 10/20
#> i Fold5: model 10/20
#> ✓ Fold5: model 10/20
#> i Fold5: model 11/20
#> ✓ Fold5: model 11/20
#> i Fold5: model 11/20
#> ✓ Fold5: model 11/20
#> i Fold5: model 12/20
#> ✓ Fold5: model 12/20
#> i Fold5: model 12/20
#> ✓ Fold5: model 12/20
#> i Fold5: model 13/20
#> ✓ Fold5: model 13/20
#> i Fold5: model 13/20
#> ✓ Fold5: model 13/20
#> i Fold5: model 14/20
#> ✓ Fold5: model 14/20
#> i Fold5: model 14/20
#> ✓ Fold5: model 14/20
#> i Fold5: model 15/20
#> ✓ Fold5: model 15/20
#> i Fold5: model 15/20
#> ✓ Fold5: model 15/20
#> i Fold5: model 16/20
#> ✓ Fold5: model 16/20
#> i Fold5: model 16/20
#> ✓ Fold5: model 16/20
#> i Fold5: model 17/20
#> ✓ Fold5: model 17/20
#> i Fold5: model 17/20
#> ✓ Fold5: model 17/20
#> i Fold5: model 18/20
#> ✓ Fold5: model 18/20
#> i Fold5: model 18/20
#> ✓ Fold5: model 18/20
#> i Fold5: model 19/20
#> ✓ Fold5: model 19/20
#> i Fold5: model 19/20
#> ✓ Fold5: model 19/20
#> i Fold5: model 20/20
#> ✓ Fold5: model 20/20
#> i Fold5: model 20/20
#> ✓ Fold5: model 20/20
glmnet_stage_1_cv_results_tbl
#> #  5-fold cross-validation 
#> # A tibble: 5 x 4
#>   splits              id    .metrics          .notes          
#> * <list>              <chr> <list>            <list>          
#> 1 <split [7.6K/1.9K]> Fold1 <tibble [80 × 5]> <tibble [0 × 1]>
#> 2 <split [7.6K/1.9K]> Fold2 <tibble [80 × 5]> <tibble [0 × 1]>
#> 3 <split [7.6K/1.9K]> Fold3 <tibble [80 × 5]> <tibble [0 × 1]>
#> 4 <split [7.6K/1.9K]> Fold4 <tibble [80 × 5]> <tibble [0 × 1]>
#> 5 <split [7.6K/1.9K]> Fold5 <tibble [80 × 5]> <tibble [0 × 1]>

Created on 2020-02-23 by the reprex package (v0.3.0)

1 Like

Hi, thanks for letting me know about the reprex package, that sounds amazingly useful for community posts like this. I've run it and the output is below.

library(tune)
library(dials)
#> Loading required package: scales
library(parsnip)
library(rsample)
#> Loading required package: tidyr
library(recipes)
#> Loading required package: dplyr
#> 
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#> 
#>     filter, lag
#> The following objects are masked from 'package:base':
#> 
#>     intersect, setdiff, setequal, union
#> 
#> Attaching package: 'recipes'
#> The following object is masked from 'package:stats':
#> 
#>     step

library(yardstick)
#> For binary classification, the first factor level is assumed to be the event.
#> Set the global option `yardstick.event_first` to `FALSE` to change this.
library(textrecipes)
library(janitor)
#> 
#> Attaching package: 'janitor'
#> The following objects are masked from 'package:stats':
#> 
#>     chisq.test, fisher.test

library(tidyverse)

## Read file
car_prices_tbl <- read_csv("C:/Workspace/cars/data.csv") %>%
  clean_names() %>%
  select(msrp, everything())
#> Parsed with column specification:
#> cols(
#>   Make = col_character(),
#>   Model = col_character(),
#>   Year = col_double(),
#>   `Engine Fuel Type` = col_character(),
#>   `Engine HP` = col_double(),
#>   `Engine Cylinders` = col_double(),
#>   `Transmission Type` = col_character(),
#>   Driven_Wheels = col_character(),
#>   `Number of Doors` = col_double(),
#>   `Market Category` = col_character(),
#>   `Vehicle Size` = col_character(),
#>   `Vehicle Style` = col_character(),
#>   `highway MPG` = col_double(),
#>   `city mpg` = col_double(),
#>   Popularity = col_double(),
#>   MSRP = col_double()
#> )


## Split data
set.seed(123)
car_initial_split <- initial_split(car_prices_tbl, prop = 0.80)
car_initial_split
#> <9532/2382/11914>


## Preprocessing Step
preprocessing_recipe <- recipe(msrp ~ ., data = training(car_initial_split)) %>%
  
  # Encode Categorical Data Types
  step_string2factor(all_nominal()) %>%
  step_mutate(
    engine_cylinders = as.factor(engine_cylinders),
    number_of_doors  = as.factor(number_of_doors)
  ) %>%
  
  # Feature Engineering - Market Category
  step_mutate(market_category = market_category %>% str_replace_all("_", "")) %>%
  step_tokenize(market_category) %>%
  step_tokenfilter(market_category, min_times = 0.05, max_times = 1, percentage = TRUE) %>%
  step_tf(market_category, weight_scheme = "binary") %>%
  
  # Combine low-frequency categories
  step_other(all_nominal(), threshold = 0.02, other = "other") %>%
  
  # Impute missing
  step_knnimpute(engine_fuel_type, engine_hp, engine_cylinders, number_of_doors,
                 neighbors = 5) %>%
  
  # Remove unnecessary columns
  step_rm(model) %>%
  prep()
#> Warning in tokenfilter_fun(training[, col_names[i], drop = TRUE], x$max_times, :
#> max_features was set to '100', but only 7 was available and selected.


car_training_preprocessed_tbl <- preprocessing_recipe %>% bake(training(car_initial_split))

## Cross Validation
set.seed(123)
car_cv_folds <- training(car_initial_split) %>% 
  bake(preprocessing_recipe, new_data = .) %>%
  vfold_cv(v = 5)

## GLM model
glmnet_model <- linear_reg(
  mode    = "regression", 
  penalty = tune(), 
  mixture = tune()
) %>%
  set_engine("glmnet")

## Params Set
glmnet_params <- parameters(penalty(), mixture())
glmnet_params
#> Collection of 2 parameters for tuning
#> 
#>       id parameter type object class
#>  penalty        penalty    nparam[+]
#>  mixture        mixture    nparam[+]

set.seed(123)
glmnet_grid <- grid_max_entropy(glmnet_params, size = 20)
glmnet_grid
#> # A tibble: 20 x 2
#>     penalty mixture
#>       <dbl>   <dbl>
#>  1 2.94e- 1  0.702 
#>  2 1.48e- 4  0.996 
#>  3 1.60e- 1  0.444 
#>  4 5.86e- 1  0.975 
#>  5 1.69e- 9  0.0491
#>  6 1.10e- 5  0.699 
#>  7 2.76e- 2  0.988 
#>  8 4.95e- 8  0.753 
#>  9 1.07e- 5  0.382 
#> 10 7.87e- 8  0.331 
#> 11 4.07e- 1  0.180 
#> 12 1.70e- 3  0.590 
#> 13 2.52e-10  0.382 
#> 14 2.47e-10  0.666 
#> 15 2.31e- 9  0.921 
#> 16 1.31e- 7  0.546 
#> 17 1.49e- 6  0.973 
#> 18 1.28e- 3  0.0224
#> 19 7.49e- 7  0.0747
#> 20 2.37e- 3  0.351


## Tune Model
glmnet_stage_1_cv_results_tbl <- tune_grid(
  formula   = msrp ~ .,
  model     = glmnet_model,
  resamples = car_cv_folds,
  grid      = glmnet_grid,
  metrics   = metric_set(mae, mape, rmse, rsq),
  control   = control_grid(verbose = TRUE)
)
#> i Fold1: formula
#> v Fold1: formula
#> i Fold1: model  1/20
#> v Fold1: model  1/20
#> i Fold1: model  1/20
#> x Fold1: model  1/20: Error: Result 1 must be a single string, not NULL of length 0
#> i Fold1: model  2/20
#> v Fold1: model  2/20
#> i Fold1: model  2/20
#> x Fold1: model  2/20: Error: Result 1 must be a single string, not NULL of length 0
#> i Fold1: model  3/20
#> v Fold1: model  3/20
#> i Fold1: model  3/20
#> x Fold1: model  3/20: Error: Result 1 must be a single string, not NULL of length 0
#> i Fold1: model  4/20
#> v Fold1: model  4/20
#> i Fold1: model  4/20
#> x Fold1: model  4/20: Error: Result 1 must be a single string, not NULL of length 0
#> i Fold1: model  5/20
#> v Fold1: model  5/20
#> i Fold1: model  5/20
#> x Fold1: model  5/20: Error: Result 1 must be a single string, not NULL of length 0
#> i Fold1: model  6/20
#> v Fold1: model  6/20
#> i Fold1: model  6/20
#> x Fold1: model  6/20: Error: Result 1 must be a single string, not NULL of length 0
#> i Fold1: model  7/20
#> v Fold1: model  7/20
#> i Fold1: model  7/20
#> x Fold1: model  7/20: Error: Result 1 must be a single string, not NULL of length 0
#> i Fold1: model  8/20
#> v Fold1: model  8/20
#> i Fold1: model  8/20
#> x Fold1: model  8/20: Error: Result 1 must be a single string, not NULL of length 0
#> i Fold1: model  9/20
#> v Fold1: model  9/20
#> i Fold1: model  9/20
#> x Fold1: model  9/20: Error: Result 1 must be a single string, not NULL of length 0
#> i Fold1: model 10/20
#> v Fold1: model 10/20
#> i Fold1: model 10/20
#> x Fold1: model 10/20: Error: Result 1 must be a single string, not NULL of length 0
#> i Fold1: model 11/20
#> v Fold1: model 11/20
#> i Fold1: model 11/20
#> x Fold1: model 11/20: Error: Result 1 must be a single string, not NULL of length 0
#> i Fold1: model 12/20
#> v Fold1: model 12/20
#> i Fold1: model 12/20
#> x Fold1: model 12/20: Error: Result 1 must be a single string, not NULL of length 0
#> i Fold1: model 13/20
#> v Fold1: model 13/20
#> i Fold1: model 13/20
#> x Fold1: model 13/20: Error: Result 1 must be a single string, not NULL of length 0
#> i Fold1: model 14/20
#> v Fold1: model 14/20
#> i Fold1: model 14/20
#> x Fold1: model 14/20: Error: Result 1 must be a single string, not NULL of length 0
#> i Fold1: model 15/20
#> v Fold1: model 15/20
#> i Fold1: model 15/20
#> x Fold1: model 15/20: Error: Result 1 must be a single string, not NULL of length 0
#> i Fold1: model 16/20
#> v Fold1: model 16/20
#> i Fold1: model 16/20
#> x Fold1: model 16/20: Error: Result 1 must be a single string, not NULL of length 0
#> i Fold1: model 17/20
#> v Fold1: model 17/20
#> i Fold1: model 17/20
#> x Fold1: model 17/20: Error: Result 1 must be a single string, not NULL of length 0
#> i Fold1: model 18/20
#> v Fold1: model 18/20
#> i Fold1: model 18/20
#> x Fold1: model 18/20: Error: Result 1 must be a single string, not NULL of length 0
#> i Fold1: model 19/20
#> v Fold1: model 19/20
#> i Fold1: model 19/20
#> x Fold1: model 19/20: Error: Result 1 must be a single string, not NULL of length 0
#> i Fold1: model 20/20
#> v Fold1: model 20/20
#> i Fold1: model 20/20
#> x Fold1: model 20/20: Error: Result 1 must be a single string, not NULL of length 0
#> i Fold2: formula
#> v Fold2: formula
#> i Fold2: model  1/20
#> v Fold2: model  1/20
#> i Fold2: model  1/20
#> x Fold2: model  1/20: Error: Result 1 must be a single string, not NULL of length 0
#> i Fold2: model  2/20
#> v Fold2: model  2/20
#> i Fold2: model  2/20
#> x Fold2: model  2/20: Error: Result 1 must be a single string, not NULL of length 0
#> i Fold2: model  3/20
#> v Fold2: model  3/20
#> i Fold2: model  3/20
#> x Fold2: model  3/20: Error: Result 1 must be a single string, not NULL of length 0
#> i Fold2: model  4/20
#> v Fold2: model  4/20
#> i Fold2: model  4/20
#> x Fold2: model  4/20: Error: Result 1 must be a single string, not NULL of length 0
#> i Fold2: model  5/20
#> v Fold2: model  5/20
#> i Fold2: model  5/20
#> x Fold2: model  5/20: Error: Result 1 must be a single string, not NULL of length 0
#> i Fold2: model  6/20
#> v Fold2: model  6/20
#> i Fold2: model  6/20
#> x Fold2: model  6/20: Error: Result 1 must be a single string, not NULL of length 0
#> i Fold2: model  7/20
#> v Fold2: model  7/20
#> i Fold2: model  7/20
#> x Fold2: model  7/20: Error: Result 1 must be a single string, not NULL of length 0
#> i Fold2: model  8/20
#> v Fold2: model  8/20
#> i Fold2: model  8/20
#> x Fold2: model  8/20: Error: Result 1 must be a single string, not NULL of length 0
#> i Fold2: model  9/20
#> v Fold2: model  9/20
#> i Fold2: model  9/20
#> x Fold2: model  9/20: Error: Result 1 must be a single string, not NULL of length 0
#> i Fold2: model 10/20
#> v Fold2: model 10/20
#> i Fold2: model 10/20
#> x Fold2: model 10/20: Error: Result 1 must be a single string, not NULL of length 0
#> i Fold2: model 11/20
#> v Fold2: model 11/20
#> i Fold2: model 11/20
#> x Fold2: model 11/20: Error: Result 1 must be a single string, not NULL of length 0
#> i Fold2: model 12/20
#> v Fold2: model 12/20
#> i Fold2: model 12/20
#> x Fold2: model 12/20: Error: Result 1 must be a single string, not NULL of length 0
#> i Fold2: model 13/20
#> v Fold2: model 13/20
#> i Fold2: model 13/20
#> x Fold2: model 13/20: Error: Result 1 must be a single string, not NULL of length 0
#> i Fold2: model 14/20
#> v Fold2: model 14/20
#> i Fold2: model 14/20
#> x Fold2: model 14/20: Error: Result 1 must be a single string, not NULL of length 0
#> i Fold2: model 15/20
#> v Fold2: model 15/20
#> i Fold2: model 15/20
#> x Fold2: model 15/20: Error: Result 1 must be a single string, not NULL of length 0
#> i Fold2: model 16/20
#> v Fold2: model 16/20
#> i Fold2: model 16/20
#> x Fold2: model 16/20: Error: Result 1 must be a single string, not NULL of length 0
#> i Fold2: model 17/20
#> v Fold2: model 17/20
#> i Fold2: model 17/20
#> x Fold2: model 17/20: Error: Result 1 must be a single string, not NULL of length 0
#> i Fold2: model 18/20
#> v Fold2: model 18/20
#> i Fold2: model 18/20
#> x Fold2: model 18/20: Error: Result 1 must be a single string, not NULL of length 0
#> i Fold2: model 19/20
#> v Fold2: model 19/20
#> i Fold2: model 19/20
#> x Fold2: model 19/20: Error: Result 1 must be a single string, not NULL of length 0
#> i Fold2: model 20/20
#> v Fold2: model 20/20
#> i Fold2: model 20/20
#> x Fold2: model 20/20: Error: Result 1 must be a single string, not NULL of length 0
#> i Fold3: formula
#> v Fold3: formula
#> i Fold3: model  1/20
#> v Fold3: model  1/20
#> i Fold3: model  1/20
#> x Fold3: model  1/20: Error: Result 1 must be a single string, not NULL of length 0
#> i Fold3: model  2/20
#> v Fold3: model  2/20
#> i Fold3: model  2/20
#> x Fold3: model  2/20: Error: Result 1 must be a single string, not NULL of length 0
#> i Fold3: model  3/20
#> v Fold3: model  3/20
#> i Fold3: model  3/20
#> x Fold3: model  3/20: Error: Result 1 must be a single string, not NULL of length 0
#> i Fold3: model  4/20
#> v Fold3: model  4/20
#> i Fold3: model  4/20
#> x Fold3: model  4/20: Error: Result 1 must be a single string, not NULL of length 0
#> i Fold3: model  5/20
#> v Fold3: model  5/20
#> i Fold3: model  5/20
#> x Fold3: model  5/20: Error: Result 1 must be a single string, not NULL of length 0
#> i Fold3: model  6/20
#> v Fold3: model  6/20
#> i Fold3: model  6/20
#> x Fold3: model  6/20: Error: Result 1 must be a single string, not NULL of length 0
#> i Fold3: model  7/20
#> v Fold3: model  7/20
#> i Fold3: model  7/20
#> x Fold3: model  7/20: Error: Result 1 must be a single string, not NULL of length 0
#> i Fold3: model  8/20
#> v Fold3: model  8/20
#> i Fold3: model  8/20
#> x Fold3: model  8/20: Error: Result 1 must be a single string, not NULL of length 0
#> i Fold3: model  9/20
#> v Fold3: model  9/20
#> i Fold3: model  9/20
#> x Fold3: model  9/20: Error: Result 1 must be a single string, not NULL of length 0
#> i Fold3: model 10/20
#> v Fold3: model 10/20
#> i Fold3: model 10/20
#> x Fold3: model 10/20: Error: Result 1 must be a single string, not NULL of length 0
#> i Fold3: model 11/20
#> v Fold3: model 11/20
#> i Fold3: model 11/20
#> x Fold3: model 11/20: Error: Result 1 must be a single string, not NULL of length 0
#> i Fold3: model 12/20
#> v Fold3: model 12/20
#> i Fold3: model 12/20
#> x Fold3: model 12/20: Error: Result 1 must be a single string, not NULL of length 0
#> i Fold3: model 13/20
#> v Fold3: model 13/20
#> i Fold3: model 13/20
#> x Fold3: model 13/20: Error: Result 1 must be a single string, not NULL of length 0
#> i Fold3: model 14/20
#> v Fold3: model 14/20
#> i Fold3: model 14/20
#> x Fold3: model 14/20: Error: Result 1 must be a single string, not NULL of length 0
#> i Fold3: model 15/20
#> v Fold3: model 15/20
#> i Fold3: model 15/20
#> x Fold3: model 15/20: Error: Result 1 must be a single string, not NULL of length 0
#> i Fold3: model 16/20
#> v Fold3: model 16/20
#> i Fold3: model 16/20
#> x Fold3: model 16/20: Error: Result 1 must be a single string, not NULL of length 0
#> i Fold3: model 17/20
#> v Fold3: model 17/20
#> i Fold3: model 17/20
#> x Fold3: model 17/20: Error: Result 1 must be a single string, not NULL of length 0
#> i Fold3: model 18/20
#> v Fold3: model 18/20
#> i Fold3: model 18/20
#> x Fold3: model 18/20: Error: Result 1 must be a single string, not NULL of length 0
#> i Fold3: model 19/20
#> v Fold3: model 19/20
#> i Fold3: model 19/20
#> x Fold3: model 19/20: Error: Result 1 must be a single string, not NULL of length 0
#> i Fold3: model 20/20
#> v Fold3: model 20/20
#> i Fold3: model 20/20
#> x Fold3: model 20/20: Error: Result 1 must be a single string, not NULL of length 0
#> i Fold4: formula
#> v Fold4: formula
#> i Fold4: model  1/20
#> v Fold4: model  1/20
#> i Fold4: model  1/20
#> x Fold4: model  1/20: Error: Result 1 must be a single string, not NULL of length 0
#> i Fold4: model  2/20
#> v Fold4: model  2/20
#> i Fold4: model  2/20
#> x Fold4: model  2/20: Error: Result 1 must be a single string, not NULL of length 0
#> i Fold4: model  3/20
#> v Fold4: model  3/20
#> i Fold4: model  3/20
#> x Fold4: model  3/20: Error: Result 1 must be a single string, not NULL of length 0
#> i Fold4: model  4/20
#> v Fold4: model  4/20
#> i Fold4: model  4/20
#> x Fold4: model  4/20: Error: Result 1 must be a single string, not NULL of length 0
#> i Fold4: model  5/20
#> v Fold4: model  5/20
#> i Fold4: model  5/20
#> x Fold4: model  5/20: Error: Result 1 must be a single string, not NULL of length 0
#> i Fold4: model  6/20
#> v Fold4: model  6/20
#> i Fold4: model  6/20
#> x Fold4: model  6/20: Error: Result 1 must be a single string, not NULL of length 0
#> i Fold4: model  7/20
#> v Fold4: model  7/20
#> i Fold4: model  7/20
#> x Fold4: model  7/20: Error: Result 1 must be a single string, not NULL of length 0
#> i Fold4: model  8/20
#> v Fold4: model  8/20
#> i Fold4: model  8/20
#> x Fold4: model  8/20: Error: Result 1 must be a single string, not NULL of length 0
#> i Fold4: model  9/20
#> v Fold4: model  9/20
#> i Fold4: model  9/20
#> x Fold4: model  9/20: Error: Result 1 must be a single string, not NULL of length 0
#> i Fold4: model 10/20
#> v Fold4: model 10/20
#> i Fold4: model 10/20
#> x Fold4: model 10/20: Error: Result 1 must be a single string, not NULL of length 0
#> i Fold4: model 11/20
#> v Fold4: model 11/20
#> i Fold4: model 11/20
#> x Fold4: model 11/20: Error: Result 1 must be a single string, not NULL of length 0
#> i Fold4: model 12/20
#> v Fold4: model 12/20
#> i Fold4: model 12/20
#> x Fold4: model 12/20: Error: Result 1 must be a single string, not NULL of length 0
#> i Fold4: model 13/20
#> v Fold4: model 13/20
#> i Fold4: model 13/20
#> x Fold4: model 13/20: Error: Result 1 must be a single string, not NULL of length 0
#> i Fold4: model 14/20
#> v Fold4: model 14/20
#> i Fold4: model 14/20
#> x Fold4: model 14/20: Error: Result 1 must be a single string, not NULL of length 0
#> i Fold4: model 15/20
#> v Fold4: model 15/20
#> i Fold4: model 15/20
#> x Fold4: model 15/20: Error: Result 1 must be a single string, not NULL of length 0
#> i Fold4: model 16/20
#> v Fold4: model 16/20
#> i Fold4: model 16/20
#> x Fold4: model 16/20: Error: Result 1 must be a single string, not NULL of length 0
#> i Fold4: model 17/20
#> v Fold4: model 17/20
#> i Fold4: model 17/20
#> x Fold4: model 17/20: Error: Result 1 must be a single string, not NULL of length 0
#> i Fold4: model 18/20
#> v Fold4: model 18/20
#> i Fold4: model 18/20
#> x Fold4: model 18/20: Error: Result 1 must be a single string, not NULL of length 0
#> i Fold4: model 19/20
#> v Fold4: model 19/20
#> i Fold4: model 19/20
#> x Fold4: model 19/20: Error: Result 1 must be a single string, not NULL of length 0
#> i Fold4: model 20/20
#> v Fold4: model 20/20
#> i Fold4: model 20/20
#> x Fold4: model 20/20: Error: Result 1 must be a single string, not NULL of length 0
#> i Fold5: formula
#> v Fold5: formula
#> i Fold5: model  1/20
#> v Fold5: model  1/20
#> i Fold5: model  1/20
#> x Fold5: model  1/20: Error: Result 1 must be a single string, not NULL of length 0
#> i Fold5: model  2/20
#> v Fold5: model  2/20
#> i Fold5: model  2/20
#> x Fold5: model  2/20: Error: Result 1 must be a single string, not NULL of length 0
#> i Fold5: model  3/20
#> v Fold5: model  3/20
#> i Fold5: model  3/20
#> x Fold5: model  3/20: Error: Result 1 must be a single string, not NULL of length 0
#> i Fold5: model  4/20
#> v Fold5: model  4/20
#> i Fold5: model  4/20
#> x Fold5: model  4/20: Error: Result 1 must be a single string, not NULL of length 0
#> i Fold5: model  5/20
#> v Fold5: model  5/20
#> i Fold5: model  5/20
#> x Fold5: model  5/20: Error: Result 1 must be a single string, not NULL of length 0
#> i Fold5: model  6/20
#> v Fold5: model  6/20
#> i Fold5: model  6/20
#> x Fold5: model  6/20: Error: Result 1 must be a single string, not NULL of length 0
#> i Fold5: model  7/20
#> v Fold5: model  7/20
#> i Fold5: model  7/20
#> x Fold5: model  7/20: Error: Result 1 must be a single string, not NULL of length 0
#> i Fold5: model  8/20
#> v Fold5: model  8/20
#> i Fold5: model  8/20
#> x Fold5: model  8/20: Error: Result 1 must be a single string, not NULL of length 0
#> i Fold5: model  9/20
#> v Fold5: model  9/20
#> i Fold5: model  9/20
#> x Fold5: model  9/20: Error: Result 1 must be a single string, not NULL of length 0
#> i Fold5: model 10/20
#> v Fold5: model 10/20
#> i Fold5: model 10/20
#> x Fold5: model 10/20: Error: Result 1 must be a single string, not NULL of length 0
#> i Fold5: model 11/20
#> v Fold5: model 11/20
#> i Fold5: model 11/20
#> x Fold5: model 11/20: Error: Result 1 must be a single string, not NULL of length 0
#> i Fold5: model 12/20
#> v Fold5: model 12/20
#> i Fold5: model 12/20
#> x Fold5: model 12/20: Error: Result 1 must be a single string, not NULL of length 0
#> i Fold5: model 13/20
#> v Fold5: model 13/20
#> i Fold5: model 13/20
#> x Fold5: model 13/20: Error: Result 1 must be a single string, not NULL of length 0
#> i Fold5: model 14/20
#> v Fold5: model 14/20
#> i Fold5: model 14/20
#> x Fold5: model 14/20: Error: Result 1 must be a single string, not NULL of length 0
#> i Fold5: model 15/20
#> v Fold5: model 15/20
#> i Fold5: model 15/20
#> x Fold5: model 15/20: Error: Result 1 must be a single string, not NULL of length 0
#> i Fold5: model 16/20
#> v Fold5: model 16/20
#> i Fold5: model 16/20
#> x Fold5: model 16/20: Error: Result 1 must be a single string, not NULL of length 0
#> i Fold5: model 17/20
#> v Fold5: model 17/20
#> i Fold5: model 17/20
#> x Fold5: model 17/20: Error: Result 1 must be a single string, not NULL of length 0
#> i Fold5: model 18/20
#> v Fold5: model 18/20
#> i Fold5: model 18/20
#> x Fold5: model 18/20: Error: Result 1 must be a single string, not NULL of length 0
#> i Fold5: model 19/20
#> v Fold5: model 19/20
#> i Fold5: model 19/20
#> x Fold5: model 19/20: Error: Result 1 must be a single string, not NULL of length 0
#> i Fold5: model 20/20
#> v Fold5: model 20/20
#> i Fold5: model 20/20
#> x Fold5: model 20/20: Error: Result 1 must be a single string, not NULL of length 0
#> Warning: All models failed in tune_grid(). See the `.notes` column.

glmnet_stage_1_cv_results_tbl
#> #  5-fold cross-validation 
#> # A tibble: 5 x 4
#>   splits              id    .metrics .notes           
#> * <list>              <chr> <list>   <list>           
#> 1 <split [7.6K/1.9K]> Fold1 <NULL>   <tibble [20 x 1]>
#> 2 <split [7.6K/1.9K]> Fold2 <NULL>   <tibble [20 x 1]>
#> 3 <split [7.6K/1.9K]> Fold3 <NULL>   <tibble [20 x 1]>
#> 4 <split [7.6K/1.9K]> Fold4 <NULL>   <tibble [20 x 1]>
#> 5 <split [7.6K/1.9K]> Fold5 <NULL>   <tibble [20 x 1]>

Created on 2020-02-25 by the reprex package (v0.3.0)

They look identical except for the error during the tune_grid step :cry:

1 Like

Changing only the name of the csv file (because I'm a stickler for not naming objects after reserved names) and running your reprex I get

#  5-fold cross-validation 
# A tibble: 5 x 4
  splits              id    .metrics          .notes          
* <list>              <chr> <list>            <list>          
1 <split [7.6K/1.9K]> Fold1 <tibble [80 × 5]> <tibble [0 × 1]>
2 <split [7.6K/1.9K]> Fold2 <tibble [80 × 5]> <tibble [0 × 1]>
3 <split [7.6K/1.9K]> Fold3 <tibble [80 × 5]> <tibble [0 × 1]>
4 <split [7.6K/1.9K]> Fold4 <tibble [80 × 5]> <tibble [0 × 1]>
5 <split [7.6K/1.9K]> Fold5 <tibble [80 × 5]> <tibble [0 × 1]>

with no warnings or error messages.

Two possibilities come to mind: 1) it's Microsoft's fault (actually, I have no idea, but I'm on MacOS to test this) or 2) packages or R need to be updated to stay in sync with each other.

Thanks @technocrat!

I double checked R & package versions they are all the latest versions available to install :cry:

Unfortunately I don't think this is Microsoft's problem either. It works on a colleague's computer which is the same setup as mine. :cry: :cry:

FWIW This is my sessionInfo() output

R version 3.6.2 (2019-12-12)
Platform: x86_64-w64-mingw32/x64 (64-bit)
Running under: Windows 10 x64 (build 18363)

Matrix products: default

locale:
[1] LC_COLLATE=English_Australia.1252  LC_CTYPE=English_Australia.1252    LC_MONETARY=English_Australia.1252 LC_NUMERIC=C                      
[5] LC_TIME=English_Australia.1252    

attached base packages:
[1] stats     graphics  grDevices utils     datasets  methods   base     

other attached packages:
 [1] forcats_0.4.0     stringr_1.4.0     purrr_0.3.3       readr_1.3.1       tibble_2.1.3      ggplot2_3.2.1     tidyverse_1.2.1   janitor_1.2.0    
 [9] textrecipes_0.0.2 yardstick_0.0.3   recipes_0.1.9     dplyr_0.8.4       rsample_0.0.5     tidyr_1.0.2       parsnip_0.0.5     dials_0.0.4      
[17] scales_1.0.0      tune_0.0.1       

loaded via a namespace (and not attached):
 [1] nlme_3.1-142        lubridate_1.7.4     DiceDesign_1.8-1    httr_1.4.1          SnowballC_0.6.0     tools_3.6.2         backports_1.1.5    
 [8] utf8_1.1.4          R6_2.4.1            rpart_4.1-15        lazyeval_0.2.2      colorspace_1.4-1    nnet_7.3-12         withr_2.1.2        
[15] tidyselect_1.0.0    compiler_3.6.2      glmnet_3.0-2        cli_2.0.1           rvest_0.3.5         lgr_0.3.3           xml2_1.2.2         
[22] rsparse_0.3.3.4     digest_0.6.25       RhpcBLASctl_0.20-17 pkgconfig_2.0.3     lhs_1.0.1           textfeatures_0.3.3  rlang_0.4.4        
[29] readxl_1.3.1        rstudioapi_0.10     shape_1.4.4         generics_0.0.2      jsonlite_1.6        tokenizers_0.2.1    magrittr_1.5       
[36] text2vec_0.6        Matrix_1.2-18       Rcpp_1.0.3          munsell_0.5.0       fansi_0.4.1         GPfit_1.0-8         lifecycle_0.1.0    
[43] furrr_0.1.0         stringi_1.4.6       pROC_1.15.3         snakecase_0.11.0    MASS_7.3-51.4       plyr_1.8.5          grid_3.6.2         
[50] parallel_3.6.2      listenv_0.8.0       mlapi_0.1.0         crayon_1.3.4        lattice_0.20-38     haven_2.1.1         splines_3.6.2      
[57] hms_0.5.3           pillar_1.4.3        codetools_0.2-16    stopwords_1.0       glue_1.3.1          data.table_1.12.2   modelr_0.1.5       
[64] float_0.2-3         vctrs_0.2.3         foreach_1.4.7       cellranger_1.1.0    gtable_0.3.0        future_1.16.0       assertthat_0.2.1   
[71] gower_0.2.1         prodlim_2018.04.18  broom_0.5.2         class_7.3-15        survival_3.1-8      timeDate_3043.102   iterators_1.0.12   
[78] hardhat_0.1.1       lava_1.6.6          workflows_0.1.0     globals_0.12.5      ellipsis_0.3.0      ipred_0.9-9

Passing strange. Not the environment, the code or the data. What happens with

glmnet_grid <- grid_max_entropy(glmnet_params, size = 16)

Seems to work fine

    penalty mixture
      <dbl>   <dbl>
 1 1.86e- 1  0.521 
 2 7.21e- 9  0.380 
 3 1.80e- 4  0.0218
 4 2.36e-10  0.0814
 5 1.26e- 4  0.652 
 6 2.77e- 1  0.725 
 7 2.29e- 1  0.978 
 8 1.20e-10  0.564 
 9 1.53e- 9  0.940 
10 1.45e- 7  0.788 
11 1.94e- 4  0.275 
12 1.45e- 5  0.971 
13 1.48e- 3  0.848 
14 3.51e- 6  0.483 
15 5.70e- 1  0.0622
16 6.42e- 7  0.224 

I've fixed it @technocrat it was hard to catch but I noticed the only difference between my colleague's computer where it worked and mine was the package version number of yardstick. I definitely thought I reinstalled all packages but probably missed that. I've upgraded from 0.0.3 to 0.0.5 and that solved it. Thanks for your help and patience, really appreciate it.

Great! Glad you got there.

This topic was automatically closed 7 days after the last reply. New replies are no longer allowed.