lme4 and tidymodels

Using lme4 package for tuning and recipe creation of variables:

  • I haven't been able to figure out how to add:
    • Recipe steps (step_other, step_dummy, etc.)
    • tuning parameters (df in splines, etc)
library(multilevelmod)
#> Loading required package: parsnip
data(sleepstudy, package = "lme4")
sleepstudy

set.seed(9599)
folds <- vfold_cv(sleepstudy)

lme_spec <-
  linear_reg() %>%
  set_mode("regression") %>%
  set_engine("lmer")

lme_wflow <-
  workflow() %>% 
  add_recipe(recipe(Reaction ~ Days + Subject, data = sleepstudy)) %>% 
  ## How do I add recipe steps and/or tuning parameters 
  ## (e.g. slines df in Days, etc) model?
  add_model(lme_spec, formula = Reaction ~ Days + (1 | Subject))

out <- fit_resamples(lme_wflow, resamples = folds, control = ctrl)

## If i could do tuning I would do this instead!
#tune_parameters <- parameters(workflow)
## space filling
#grid <- grid_max_entropy(tune_parameters,size=sizeoftrainmodels)
#ctrl <- control_grid(save_pred = TRUE,verbose = FALSE)
#
#glmn_tune <- workflow %>% 
#  tune_grid(resamples = folds,
#            grid = grid,
#            control = ctrl)

multi_metric <- metric_set(rmse, rsq, mae,ccc)
collect_metrics(out) %>%
  dplyr::select(.metric,mean,std_err)

This is a little tricky but can be done with some modifications that I just added to multilevelmod.

One thing first: I would use grouped cross-validation (see below).

If you just want to resample (not tune), there are two approaches to using something like splines. You can:

  • Use a model formula with the spline call directly embedded (approach 1 below)

  • Use a recipe and a model formula with a slightly tricky model formula (approach 2)

Here's your example:

library(tidymodels)
#> ── Attaching packages ────────────────────────────────────── tidymodels 0.1.2 ──
#> ✓ broom     0.7.2           ✓ recipes   0.1.15.9000
#> ✓ dials     0.0.9.9000      ✓ rsample   0.0.8      
#> ✓ dplyr     1.0.2           ✓ tibble    3.0.4      
#> ✓ ggplot2   3.3.2           ✓ tidyr     1.1.2      
#> ✓ infer     0.5.3           ✓ tune      0.1.2      
#> ✓ modeldata 0.1.0           ✓ workflows 0.2.1      
#> ✓ parsnip   0.1.4           ✓ yardstick 0.0.7      
#> ✓ purrr     0.3.4
#> ── Conflicts ───────────────────────────────────────── tidymodels_conflicts() ──
#> x purrr::discard() masks scales::discard()
#> x dplyr::filter()  masks stats::filter()
#> x dplyr::lag()     masks stats::lag()
#> x recipes::step()  masks stats::step()
library(multilevelmod) # Install current devel version

data(sleepstudy, package = "lme4")

set.seed(9599)
folds <- group_vfold_cv(sleepstudy, group = Subject)

lme_spec <-
  linear_reg() %>%
  set_mode("regression") %>%
  set_engine("lmer")

# Approach 1: use splines functions in the model formula:

lme_wflow_1 <-
  workflow() %>%
  add_recipe(recipe(Reaction ~ Days + Subject, data = sleepstudy)) %>%
  # or use
  # add_variables(outcomes = Reaction, predictors = c(Days, Subject))
  add_model(lme_spec, formula = Reaction ~ splines::ns(Days) + (1 | Subject))

out_1 <- fit_resamples(lme_wflow_1, resamples = folds)
#> 
#> Attaching package: 'rlang'
#> The following objects are masked from 'package:purrr':
#> 
#>     %@%, as_function, flatten, flatten_chr, flatten_dbl, flatten_int,
#>     flatten_lgl, flatten_raw, invoke, list_along, modify, prepend,
#>     splice
#> 
#> Attaching package: 'vctrs'
#> The following object is masked from 'package:tibble':
#> 
#>     data_frame
#> The following object is masked from 'package:dplyr':
#> 
#>     data_frame
#> Loading required package: Matrix
#> 
#> Attaching package: 'Matrix'
#> The following objects are masked from 'package:tidyr':
#> 
#>     expand, pack, unpack
collect_metrics(out_1)
#> # A tibble: 2 x 6
#>   .metric .estimator   mean     n std_err .config             
#>   <chr>   <chr>       <dbl> <int>   <dbl> <chr>               
#> 1 rmse    standard   43.5      18  5.80   Preprocessor1_Model1
#> 2 rsq     standard    0.646    18  0.0604 Preprocessor1_Model1

# Approach 2: use a recipe and put in the specific columns that you know will
# be there
lme_wflow_2 <-
  workflow() %>%
  add_recipe(
    recipe(Reaction ~ Days + Subject, data = sleepstudy) %>%
      step_ns(Days, deg_free = 3)
  ) %>%
  add_model(lme_spec, formula = Reaction ~ . -Subject + (1 | Subject))

out_2 <- fit_resamples(lme_wflow_2, resamples = folds)
collect_metrics(out_2)
#> # A tibble: 2 x 6
#>   .metric .estimator   mean     n std_err .config             
#>   <chr>   <chr>       <dbl> <int>   <dbl> <chr>               
#> 1 rmse    standard   43.6      18  5.89   Preprocessor1_Model1
#> 2 rsq     standard    0.646    18  0.0614 Preprocessor1_Model1

Created on 2020-12-18 by the reprex package (v0.3.0)

The model formula says "use all of the columns that are not Reaction" (symbolized by the .) but don't make dummy variables out of subject (the -Subject) and add a random intercept for the subjects ((1 | Subject)).

For tuning, I thought that there was an lme4 bug but it was an issue with my code.

You can use the same model formula approach:

library(tidymodels)
#> ── Attaching packages ────────────────────────────────────── tidymodels 0.1.2 ──
#> ✓ broom     0.7.2           ✓ recipes   0.1.15.9000
#> ✓ dials     0.0.9.9000      ✓ rsample   0.0.8      
#> ✓ dplyr     1.0.2           ✓ tibble    3.0.4      
#> ✓ ggplot2   3.3.2           ✓ tidyr     1.1.2      
#> ✓ infer     0.5.3           ✓ tune      0.1.2      
#> ✓ modeldata 0.1.0           ✓ workflows 0.2.1      
#> ✓ parsnip   0.1.4           ✓ yardstick 0.0.7      
#> ✓ purrr     0.3.4
#> ── Conflicts ───────────────────────────────────────── tidymodels_conflicts() ──
#> x purrr::discard() masks scales::discard()
#> x dplyr::filter()  masks stats::filter()
#> x dplyr::lag()     masks stats::lag()
#> x recipes::step()  masks stats::step()
library(multilevelmod) # Install current devel version

data(sleepstudy, package = "lme4")

set.seed(9599)
folds <- group_vfold_cv(sleepstudy, group = Subject)

lme_spec <-
  linear_reg() %>%
  set_mode("regression") %>%
  set_engine("lmer")

lme_wflow_3 <-
  workflow() %>%
  add_recipe(
    recipe(Reaction ~ Days + Subject, data = sleepstudy) %>%
      step_ns(Days, deg_free = tune()) %>%
      step_novel(Subject)
  ) %>%
  add_model(lme_spec, formula = Reaction ~ . -Subject + (1 | Subject))

out_3 <- tune_grid(lme_wflow_3, resamples = folds, grid = tibble(deg_free = 1:8))
#> 
#> Attaching package: 'rlang'
#> The following objects are masked from 'package:purrr':
#> 
#>     %@%, as_function, flatten, flatten_chr, flatten_dbl, flatten_int,
#>     flatten_lgl, flatten_raw, invoke, list_along, modify, prepend,
#>     splice
#> 
#> Attaching package: 'vctrs'
#> The following object is masked from 'package:tibble':
#> 
#>     data_frame
#> The following object is masked from 'package:dplyr':
#> 
#>     data_frame
#> Loading required package: Matrix
#> 
#> Attaching package: 'Matrix'
#> The following objects are masked from 'package:tidyr':
#> 
#>     expand, pack, unpack

collect_metrics(out_3)
#> # A tibble: 16 x 7
#>    deg_free .metric .estimator   mean     n std_err .config             
#>       <int> <chr>   <chr>       <dbl> <int>   <dbl> <chr>               
#>  1        1 rmse    standard   43.5      18  5.80   Preprocessor1_Model1
#>  2        1 rsq     standard    0.646    18  0.0604 Preprocessor1_Model1
#>  3        2 rmse    standard   43.4      18  5.89   Preprocessor2_Model1
#>  4        2 rsq     standard    0.651    18  0.0609 Preprocessor2_Model1
#>  5        3 rmse    standard   43.6      18  5.89   Preprocessor3_Model1
#>  6        3 rsq     standard    0.646    18  0.0614 Preprocessor3_Model1
#>  7        4 rmse    standard   43.5      18  5.91   Preprocessor4_Model1
#>  8        4 rsq     standard    0.647    18  0.0607 Preprocessor4_Model1
#>  9        5 rmse    standard   43.6      18  5.90   Preprocessor5_Model1
#> 10        5 rsq     standard    0.644    18  0.0606 Preprocessor5_Model1
#> 11        6 rmse    standard   43.6      18  5.91   Preprocessor6_Model1
#> 12        6 rsq     standard    0.643    18  0.0610 Preprocessor6_Model1
#> 13        7 rmse    standard   43.7      18  5.88   Preprocessor7_Model1
#> 14        7 rsq     standard    0.640    18  0.0617 Preprocessor7_Model1
#> 15        8 rmse    standard   43.8      18  5.86   Preprocessor8_Model1
#> 16        8 rsq     standard    0.634    18  0.0616 Preprocessor8_Model1

Created on 2020-12-18 by the reprex package (v0.3.0)

So let us know if there are any other catches that you can think of or find. We'd like to test more scenarios.

Hey Max,

Thanks for your feedback. When I ran your code it wasn't working. The error I am getting is :

$.notes

[1] "preprocessor 1/1, model 1/1 (predictions): Error in terms.formula(formula(x, fixed.only = TRUE)): '.' in formula and no 'data' argument"

library(tidymodels)
library(multilevelmod) # Installed current devel version
library(lme4)
sessionInfo()

data(sleepstudy, package = "lme4")

set.seed(9599)
folds <- group_vfold_cv(sleepstudy, group = Subject)

lme_spec <-
  linear_reg() %>%
  set_mode("regression") %>%
  set_engine("lmer")

lme_wflow_2 <-
  workflow() %>%
  add_recipe(
    recipe(Reaction ~ Days + Subject, data = sleepstudy) %>%
      step_ns(Days, deg_free = 3)
  ) %>%
  add_model(lme_spec, formula = Reaction ~ . -Subject + (1 | Subject))

out_2 <- fit_resamples(lme_wflow_2, resamples = folds)

c(out_2$.notes[[18]])

My session info is below. Am I missing something here?

can you run sessioninfo::session_info()?

> sessioninfo::session_info()
─ Session info ─────────────────────────────────────────────────────────────────────────────────────────────
 setting  value                       
 version  R version 3.6.2 (2019-12-12)
 os       macOS Mojave 10.14.6        
 system   x86_64, darwin15.6.0        
 ui       RStudio                     
 language (EN)                        
 collate  en_US.UTF-8                 
 ctype    en_US.UTF-8                 
 tz       America/New_York            
 date     2020-12-21                  

─ Packages ─────────────────────────────────────────────────────────────────────────────────────────────────
 package       * version    date       lib source                                   
 assertthat      0.2.1      2019-03-21 [1] CRAN (R 3.6.0)                           
 backports       1.2.1      2020-12-09 [1] CRAN (R 3.6.2)                           
 boot            1.3-25     2020-04-26 [1] CRAN (R 3.6.2)                           
 broom         * 0.7.2      2020-10-20 [1] CRAN (R 3.6.2)                           
 class           7.3-17     2020-04-26 [1] CRAN (R 3.6.2)                           
 cli             2.2.0      2020-11-20 [1] CRAN (R 3.6.2)                           
 codetools       0.2-18     2020-11-04 [1] CRAN (R 3.6.2)                           
 colorspace      2.0-0      2020-11-11 [1] CRAN (R 3.6.2)                           
 crayon          1.3.4      2017-09-16 [1] CRAN (R 3.6.0)                           
 dials         * 0.0.9      2020-09-16 [1] CRAN (R 3.6.2)                           
 DiceDesign      1.8-1      2019-07-31 [1] CRAN (R 3.6.0)                           
 digest          0.6.27     2020-10-24 [1] CRAN (R 3.6.2)                           
 dplyr         * 1.0.2      2020-08-18 [1] CRAN (R 3.6.2)                           
 ellipsis        0.3.1      2020-05-15 [1] CRAN (R 3.6.2)                           
 evaluate        0.14       2019-05-28 [1] CRAN (R 3.6.0)                           
 fansi           0.4.1      2020-01-08 [1] CRAN (R 3.6.0)                           
 foreach         1.5.1      2020-10-15 [1] CRAN (R 3.6.2)                           
 furrr           0.2.1      2020-10-21 [1] CRAN (R 3.6.2)                           
 future          1.21.0     2020-12-10 [1] CRAN (R 3.6.2)                           
 generics        0.1.0      2020-10-31 [1] CRAN (R 3.6.2)                           
 ggplot2       * 3.3.2      2020-06-19 [1] CRAN (R 3.6.2)                           
 globals         0.14.0     2020-11-22 [1] CRAN (R 3.6.2)                           
 glue            1.4.2      2020-08-27 [1] CRAN (R 3.6.2)                           
 gower           0.2.2      2020-06-23 [1] CRAN (R 3.6.2)                           
 GPfit           1.0-8      2019-02-08 [1] CRAN (R 3.6.0)                           
 gtable          0.3.0      2019-03-25 [1] CRAN (R 3.6.0)                           
 htmltools       0.5.0      2020-06-16 [1] CRAN (R 3.6.2)                           
 infer         * 0.5.3      2020-07-14 [1] CRAN (R 3.6.2)                           
 ipred           0.9-9      2019-04-28 [1] CRAN (R 3.6.0)                           
 iterators       1.0.13     2020-10-15 [1] CRAN (R 3.6.2)                           
 knitr           1.30       2020-09-22 [1] CRAN (R 3.6.2)                           
 lattice         0.20-41    2020-04-02 [1] CRAN (R 3.6.2)                           
 lava            1.6.8.1    2020-11-04 [1] CRAN (R 3.6.2)                           
 lhs             1.1.1      2020-10-05 [1] CRAN (R 3.6.2)                           
 lifecycle       0.2.0      2020-03-06 [1] CRAN (R 3.6.0)                           
 listenv         0.8.0      2019-12-05 [1] CRAN (R 3.6.0)                           
 lme4          * 1.1-26     2020-12-01 [1] CRAN (R 3.6.2)                           
 lubridate       1.7.9.2    2020-11-13 [1] CRAN (R 3.6.2)                           
 magrittr        2.0.1      2020-11-17 [1] CRAN (R 3.6.2)                           
 MASS            7.3-53     2020-09-09 [1] CRAN (R 3.6.2)                           
 Matrix        * 1.2-18     2019-11-27 [1] CRAN (R 3.6.2)                           
 minqa           1.2.4      2014-10-09 [1] CRAN (R 3.6.0)                           
 modeldata     * 0.1.0      2020-10-22 [1] CRAN (R 3.6.2)                           
 multilevelmod * 0.0.0.9000 2020-12-21 [1] Github (tidymodels/multilevelmod@8b14f10)
 munsell         0.5.0      2018-06-12 [1] CRAN (R 3.6.0)                           
 nlme            3.1-151    2020-12-10 [1] CRAN (R 3.6.2)                           
 nloptr          1.2.2.2    2020-07-02 [1] CRAN (R 3.6.2)                           
 nnet            7.3-14     2020-04-26 [1] CRAN (R 3.6.2)                           
 parallelly      1.22.0     2020-12-13 [1] CRAN (R 3.6.2)                           
 parsnip       * 0.1.4.9000 2020-12-21 [1] Github (tidymodels/parsnip@a82ed40)      
 pillar          1.4.7      2020-11-20 [1] CRAN (R 3.6.2)                           
 pkgconfig       2.0.3      2019-09-22 [1] CRAN (R 3.6.0)                           
 plyr            1.8.6      2020-03-03 [1] CRAN (R 3.6.0)                           
 pROC            1.16.2     2020-03-19 [1] CRAN (R 3.6.0)                           
 prodlim         2019.11.13 2019-11-17 [1] CRAN (R 3.6.0)                           
 purrr         * 0.3.4      2020-04-17 [1] CRAN (R 3.6.2)                           
 R6              2.5.0      2020-10-28 [1] CRAN (R 3.6.2)                           
 Rcpp            1.0.5      2020-07-06 [1] CRAN (R 3.6.2)                           
 recipes       * 0.1.15     2020-11-11 [1] CRAN (R 3.6.2)                           
 rlang           0.4.9      2020-11-26 [1] CRAN (R 3.6.2)                           
 rmarkdown       2.5        2020-10-21 [1] CRAN (R 3.6.2)                           
 rpart           4.1-15     2019-04-12 [1] CRAN (R 3.6.0)                           
 rsample       * 0.0.8      2020-09-23 [1] CRAN (R 3.6.2)                           
 rstudioapi      0.13       2020-11-12 [1] CRAN (R 3.6.2)                           
 scales        * 1.1.1      2020-05-11 [1] CRAN (R 3.6.2)                           
 sessioninfo     1.1.1      2018-11-05 [1] CRAN (R 3.6.0)                           
 statmod         1.4.35     2020-10-19 [1] CRAN (R 3.6.2)                           
 survival        3.2-7      2020-09-28 [1] CRAN (R 3.6.2)                           
 tibble        * 3.0.4      2020-10-12 [1] CRAN (R 3.6.2)                           
 tidymodels    * 0.1.2      2020-11-22 [1] CRAN (R 3.6.2)                           
 tidyr         * 1.1.2      2020-08-27 [1] CRAN (R 3.6.2)                           
 tidyselect      1.1.0      2020-05-11 [1] CRAN (R 3.6.2)                           
 timeDate        3043.102   2018-02-21 [1] CRAN (R 3.6.0)                           
 tune          * 0.1.2      2020-11-17 [1] CRAN (R 3.6.2)                           
 vctrs           0.3.6      2020-12-17 [1] CRAN (R 3.6.2)                           
 withr           2.3.0      2020-09-22 [1] CRAN (R 3.6.2)                           
 workflows     * 0.2.1      2020-10-08 [1] CRAN (R 3.6.2)                           
 xfun            0.19       2020-10-30 [1] CRAN (R 3.6.2)                           
 yaml            2.2.1      2020-02-01 [1] CRAN (R 3.6.0)                           
 yardstick     * 0.0.7      2020-07-13 [1] CRAN (R 3.6.2)                           

[1] /Library/Frameworks/R.framework/Versions/3.6/Resources/library

This topic was automatically closed 21 days after the last reply. New replies are no longer allowed.

If you have a query related to it or one of the replies, start a new topic and refer back with a link.