tidymodels logistic_reg() and fit_resamples() error

Any idea why I am getting an error when using logistic_reg(), glm(), and fit_resamples()?

library(reprex)
#> Warning: package 'reprex' was built under R version 3.6.3
library(tidyverse)
library(tidymodels)
#> -- Attaching packages --------------------------------------------------------------- tidymodels 0.1.0 --
#> v broom     0.5.4          v rsample   0.0.5     
#> v dials     0.0.4          v tune      0.0.1.9000
#> v infer     0.5.1          v workflows 0.1.0     
#> v parsnip   0.0.5          v yardstick 0.0.5     
#> v recipes   0.1.9
#> -- Conflicts ------------------------------------------------------------------ tidymodels_conflicts() --
#> x scales::discard()   masks purrr::discard()
#> x dplyr::filter()     masks stats::filter()
#> x recipes::fixed()    masks stringr::fixed()
#> x dplyr::lag()        masks stats::lag()
#> x dials::margin()     masks ggplot2::margin()
#> x yardstick::spec()   masks readr::spec()
#> x recipes::step()     masks stats::step()
#> x recipes::yj_trans() masks scales::yj_trans()

set.seed(1234)
cv_fold_mtc <- vfold_cv(mtcars)

glm_mod <- 
  logistic_reg() %>%        
  set_engine("glm") 

fit_resamples(
  glm_mod,
  vs ~ mpg,
  resamples = cv_fold_mtc,
  control = tune::control_resamples(verbose = TRUE,
                                    save_pred = TRUE)
)
#> i Fold01: formula
#> v Fold01: formula
#> i Fold01: model
#> x Fold01: internal: Error: $ operator is invalid for atomic vectors
#> i Fold02: formula
#> v Fold02: formula
#> i Fold02: model
#> x Fold02: internal: Error: $ operator is invalid for atomic vectors
#> i Fold03: formula
#> v Fold03: formula
#> i Fold03: model
#> x Fold03: internal: Error: $ operator is invalid for atomic vectors
#> i Fold04: formula
#> v Fold04: formula
#> i Fold04: model
#> x Fold04: internal: Error: $ operator is invalid for atomic vectors
#> i Fold05: formula
#> v Fold05: formula
#> i Fold05: model
#> x Fold05: internal: Error: $ operator is invalid for atomic vectors
#> i Fold06: formula
#> v Fold06: formula
#> i Fold06: model
#> x Fold06: internal: Error: $ operator is invalid for atomic vectors
#> i Fold07: formula
#> v Fold07: formula
#> i Fold07: model
#> x Fold07: internal: Error: $ operator is invalid for atomic vectors
#> i Fold08: formula
#> v Fold08: formula
#> i Fold08: model
#> x Fold08: internal: Error: $ operator is invalid for atomic vectors
#> i Fold09: formula
#> v Fold09: formula
#> i Fold09: model
#> x Fold09: internal: Error: $ operator is invalid for atomic vectors
#> i Fold10: formula
#> v Fold10: formula
#> i Fold10: model
#> x Fold10: internal: Error: $ operator is invalid for atomic vectors
#> Warning: All models failed in [fit_resamples()]. See the `.notes` column.
#> #  10-fold cross-validation 
#> # A tibble: 10 x 5
#>    splits         id     .metrics .notes           .predictions
#>  * <list>         <chr>  <list>   <list>           <list>      
#>  1 <split [28/4]> Fold01 <NULL>   <tibble [1 x 1]> <NULL>      
#>  2 <split [28/4]> Fold02 <NULL>   <tibble [1 x 1]> <NULL>      
#>  3 <split [29/3]> Fold03 <NULL>   <tibble [1 x 1]> <NULL>      
#>  4 <split [29/3]> Fold04 <NULL>   <tibble [1 x 1]> <NULL>      
#>  5 <split [29/3]> Fold05 <NULL>   <tibble [1 x 1]> <NULL>      
#>  6 <split [29/3]> Fold06 <NULL>   <tibble [1 x 1]> <NULL>      
#>  7 <split [29/3]> Fold07 <NULL>   <tibble [1 x 1]> <NULL>      
#>  8 <split [29/3]> Fold08 <NULL>   <tibble [1 x 1]> <NULL>      
#>  9 <split [29/3]> Fold09 <NULL>   <tibble [1 x 1]> <NULL>      
#> 10 <split [29/3]> Fold10 <NULL>   <tibble [1 x 1]> <NULL>

For logistic regression, we constrain the outcome to be a factor vector:

library(tidyverse)
library(tidymodels)
#> ── Attaching packages ──────────────────────────────────────────────────────────────────────────────────── tidymodels 0.1.0 ──
#> ✓ broom     0.5.4          ✓ rsample   0.0.5     
#> ✓ dials     0.0.4          ✓ tune      0.0.1.9000
#> ✓ infer     0.5.1          ✓ workflows 0.1.0     
#> ✓ parsnip   0.0.5          ✓ yardstick 0.0.5     
#> ✓ recipes   0.1.9.9000
#> ── Conflicts ─────────────────────────────────────────────────────────────────────────────────────── tidymodels_conflicts() ──
#> x scales::discard() masks purrr::discard()
#> x dplyr::filter()   masks stats::filter()
#> x recipes::fixed()  masks stringr::fixed()
#> x dplyr::lag()      masks stats::lag()
#> x dials::margin()   masks ggplot2::margin()
#> x yardstick::spec() masks readr::spec()
#> x recipes::step()   masks stats::step()

mtcars <- 
  mtcars %>% 
  mutate(vs = factor(vs))

set.seed(1234)
cv_fold_mtc <- vfold_cv(mtcars)

glm_mod <- 
  logistic_reg() %>%        
  set_engine("glm") 

fit_resamples(
  glm_mod,
  vs ~ mpg,
  resamples = cv_fold_mtc,
  control = tune::control_resamples(verbose = TRUE,
                                    save_pred = TRUE))
#> i Fold01: formula
#> ✓ Fold01: formula
#> i Fold01: model
#> ✓ Fold01: model
#> i Fold01: model (predictions)
#> i Fold02: formula
#> ✓ Fold02: formula
#> i Fold02: model
#> ✓ Fold02: model
#> i Fold02: model (predictions)
#> i Fold03: formula
#> ✓ Fold03: formula
#> i Fold03: model
#> ✓ Fold03: model
#> i Fold03: model (predictions)
#> i Fold04: formula
#> ✓ Fold04: formula
#> i Fold04: model
#> ✓ Fold04: model
#> i Fold04: model (predictions)
#> x Fold04: internal: Error: In metric: `roc_auc`
#> No control observation.
#> i Fold05: formula
#> ✓ Fold05: formula
#> i Fold05: model
#> ✓ Fold05: model
#> i Fold05: model (predictions)
#> x Fold05: internal: Error: In metric: `roc_auc`
#> No control observation.
#> i Fold06: formula
#> ✓ Fold06: formula
#> i Fold06: model
#> ✓ Fold06: model
#> i Fold06: model (predictions)
#> i Fold07: formula
#> ✓ Fold07: formula
#> i Fold07: model
#> ✓ Fold07: model
#> i Fold07: model (predictions)
#> i Fold08: formula
#> ✓ Fold08: formula
#> i Fold08: model
#> ✓ Fold08: model
#> i Fold08: model (predictions)
#> i Fold09: formula
#> ✓ Fold09: formula
#> i Fold09: model
#> ✓ Fold09: model
#> i Fold09: model (predictions)
#> x Fold09: internal: Error: In metric: `roc_auc`
#> No case observation.
#> i Fold10: formula
#> ✓ Fold10: formula
#> i Fold10: model
#> ✓ Fold10: model
#> i Fold10: model (predictions)
#> #  10-fold cross-validation 
#> # A tibble: 10 x 5
#>    splits         id     .metrics         .notes           .predictions    
#>  * <list>         <chr>  <list>           <list>           <list>          
#>  1 <split [28/4]> Fold01 <tibble [2 × 3]> <tibble [0 × 1]> <tibble [4 × 5]>
#>  2 <split [28/4]> Fold02 <tibble [2 × 3]> <tibble [0 × 1]> <tibble [4 × 5]>
#>  3 <split [29/3]> Fold03 <tibble [2 × 3]> <tibble [0 × 1]> <tibble [3 × 5]>
#>  4 <split [29/3]> Fold04 <tibble [0 × 3]> <tibble [1 × 1]> <tibble [0 × 5]>
#>  5 <split [29/3]> Fold05 <tibble [0 × 3]> <tibble [1 × 1]> <tibble [0 × 5]>
#>  6 <split [29/3]> Fold06 <tibble [2 × 3]> <tibble [0 × 1]> <tibble [3 × 5]>
#>  7 <split [29/3]> Fold07 <tibble [2 × 3]> <tibble [0 × 1]> <tibble [3 × 5]>
#>  8 <split [29/3]> Fold08 <tibble [2 × 3]> <tibble [0 × 1]> <tibble [3 × 5]>
#>  9 <split [29/3]> Fold09 <tibble [0 × 3]> <tibble [1 × 1]> <tibble [0 × 5]>
#> 10 <split [29/3]> Fold10 <tibble [2 × 3]> <tibble [0 × 1]> <tibble [3 × 5]>

Created on 2020-03-17 by the reprex package (v0.3.0)

That error message is not good (parsnip has a better one) and we were already in the process of fixing it (I just accepted the PR - thanks @julia).

Now the error helps more:

library(tidyverse)
library(tidymodels)
#> ── Attaching packages ──────────────────────────────────────────────────────────────────────────────────── tidymodels 0.1.0 ──
#> ✓ broom     0.5.4          ✓ rsample   0.0.5     
#> ✓ dials     0.0.4          ✓ tune      0.0.1.9000
#> ✓ infer     0.5.1          ✓ workflows 0.1.0     
#> ✓ parsnip   0.0.5          ✓ yardstick 0.0.5     
#> ✓ recipes   0.1.9.9000
#> ── Conflicts ─────────────────────────────────────────────────────────────────────────────────────── tidymodels_conflicts() ──
#> x scales::discard() masks purrr::discard()
#> x dplyr::filter()   masks stats::filter()
#> x recipes::fixed()  masks stringr::fixed()
#> x dplyr::lag()      masks stats::lag()
#> x dials::margin()   masks ggplot2::margin()
#> x yardstick::spec() masks readr::spec()
#> x recipes::step()   masks stats::step()

set.seed(1234)
cv_fold_mtc <- vfold_cv(mtcars)

glm_mod <- 
  logistic_reg() %>%        
  set_engine("glm") 

fit_resamples(
  glm_mod,
  vs ~ mpg,
  resamples = cv_fold_mtc,
  control = tune::control_resamples(verbose = TRUE,
                                    save_pred = TRUE))
#> i Fold01: formula
#> ✓ Fold01: formula
#> i Fold01: model
#> x Fold01: model: Error: For classification models, the outcome should be a factor.
#> i Fold02: formula
#> ✓ Fold02: formula
#> i Fold02: model
#> x Fold02: model: Error: For classification models, the outcome should be a factor.
#> i Fold03: formula
#> ✓ Fold03: formula
#> i Fold03: model
#> x Fold03: model: Error: For classification models, the outcome should be a factor.
#> i Fold04: formula
#> ✓ Fold04: formula
#> i Fold04: model
#> x Fold04: model: Error: For classification models, the outcome should be a factor.
#> i Fold05: formula
#> ✓ Fold05: formula
#> i Fold05: model
#> x Fold05: model: Error: For classification models, the outcome should be a factor.
#> i Fold06: formula
#> ✓ Fold06: formula
#> i Fold06: model
#> x Fold06: model: Error: For classification models, the outcome should be a factor.
#> i Fold07: formula
#> ✓ Fold07: formula
#> i Fold07: model
#> x Fold07: model: Error: For classification models, the outcome should be a factor.
#> i Fold08: formula
#> ✓ Fold08: formula
#> i Fold08: model
#> x Fold08: model: Error: For classification models, the outcome should be a factor.
#> i Fold09: formula
#> ✓ Fold09: formula
#> i Fold09: model
#> x Fold09: model: Error: For classification models, the outcome should be a factor.
#> i Fold10: formula
#> ✓ Fold10: formula
#> i Fold10: model
#> x Fold10: model: Error: For classification models, the outcome should be a factor.
#> Warning: All models failed in [fit_resamples()]. See the `.notes` column.
#> #  10-fold cross-validation 
#> # A tibble: 10 x 5
#>    splits         id     .metrics .notes           .predictions
#>  * <list>         <chr>  <list>   <list>           <list>      
#>  1 <split [28/4]> Fold01 <NULL>   <tibble [1 × 1]> <NULL>      
#>  2 <split [28/4]> Fold02 <NULL>   <tibble [1 × 1]> <NULL>      
#>  3 <split [29/3]> Fold03 <NULL>   <tibble [1 × 1]> <NULL>      
#>  4 <split [29/3]> Fold04 <NULL>   <tibble [1 × 1]> <NULL>      
#>  5 <split [29/3]> Fold05 <NULL>   <tibble [1 × 1]> <NULL>      
#>  6 <split [29/3]> Fold06 <NULL>   <tibble [1 × 1]> <NULL>      
#>  7 <split [29/3]> Fold07 <NULL>   <tibble [1 × 1]> <NULL>      
#>  8 <split [29/3]> Fold08 <NULL>   <tibble [1 × 1]> <NULL>      
#>  9 <split [29/3]> Fold09 <NULL>   <tibble [1 × 1]> <NULL>      
#> 10 <split [29/3]> Fold10 <NULL>   <tibble [1 × 1]> <NULL>

Created on 2020-03-17 by the reprex package (v0.3.0)

2 Likes

Thank you! One follow-up question. Is there a recipe() step_x that will address this, without the use of {dplyr}?

step_mutate()?

This topic was automatically closed 21 days after the last reply. New replies are no longer allowed.