This is a little tricky but can be done with some modifications that I just added to multilevelmod
.
One thing first: I would use grouped cross-validation (see below).
If you just want to resample (not tune), there are two approaches to using something like splines. You can:
Here's your example:
library(tidymodels)
#> ── Attaching packages ────────────────────────────────────── tidymodels 0.1.2 ──
#> ✓ broom 0.7.2 ✓ recipes 0.1.15.9000
#> ✓ dials 0.0.9.9000 ✓ rsample 0.0.8
#> ✓ dplyr 1.0.2 ✓ tibble 3.0.4
#> ✓ ggplot2 3.3.2 ✓ tidyr 1.1.2
#> ✓ infer 0.5.3 ✓ tune 0.1.2
#> ✓ modeldata 0.1.0 ✓ workflows 0.2.1
#> ✓ parsnip 0.1.4 ✓ yardstick 0.0.7
#> ✓ purrr 0.3.4
#> ── Conflicts ───────────────────────────────────────── tidymodels_conflicts() ──
#> x purrr::discard() masks scales::discard()
#> x dplyr::filter() masks stats::filter()
#> x dplyr::lag() masks stats::lag()
#> x recipes::step() masks stats::step()
library(multilevelmod) # Install current devel version
data(sleepstudy, package = "lme4")
set.seed(9599)
folds <- group_vfold_cv(sleepstudy, group = Subject)
lme_spec <-
linear_reg() %>%
set_mode("regression") %>%
set_engine("lmer")
# Approach 1: use splines functions in the model formula:
lme_wflow_1 <-
workflow() %>%
add_recipe(recipe(Reaction ~ Days + Subject, data = sleepstudy)) %>%
# or use
# add_variables(outcomes = Reaction, predictors = c(Days, Subject))
add_model(lme_spec, formula = Reaction ~ splines::ns(Days) + (1 | Subject))
out_1 <- fit_resamples(lme_wflow_1, resamples = folds)
#>
#> Attaching package: 'rlang'
#> The following objects are masked from 'package:purrr':
#>
#> %@%, as_function, flatten, flatten_chr, flatten_dbl, flatten_int,
#> flatten_lgl, flatten_raw, invoke, list_along, modify, prepend,
#> splice
#>
#> Attaching package: 'vctrs'
#> The following object is masked from 'package:tibble':
#>
#> data_frame
#> The following object is masked from 'package:dplyr':
#>
#> data_frame
#> Loading required package: Matrix
#>
#> Attaching package: 'Matrix'
#> The following objects are masked from 'package:tidyr':
#>
#> expand, pack, unpack
collect_metrics(out_1)
#> # A tibble: 2 x 6
#> .metric .estimator mean n std_err .config
#> <chr> <chr> <dbl> <int> <dbl> <chr>
#> 1 rmse standard 43.5 18 5.80 Preprocessor1_Model1
#> 2 rsq standard 0.646 18 0.0604 Preprocessor1_Model1
# Approach 2: use a recipe and put in the specific columns that you know will
# be there
lme_wflow_2 <-
workflow() %>%
add_recipe(
recipe(Reaction ~ Days + Subject, data = sleepstudy) %>%
step_ns(Days, deg_free = 3)
) %>%
add_model(lme_spec, formula = Reaction ~ . -Subject + (1 | Subject))
out_2 <- fit_resamples(lme_wflow_2, resamples = folds)
collect_metrics(out_2)
#> # A tibble: 2 x 6
#> .metric .estimator mean n std_err .config
#> <chr> <chr> <dbl> <int> <dbl> <chr>
#> 1 rmse standard 43.6 18 5.89 Preprocessor1_Model1
#> 2 rsq standard 0.646 18 0.0614 Preprocessor1_Model1
Created on 2020-12-18 by the reprex package (v0.3.0)
The model formula says "use all of the columns that are not Reaction
" (symbolized by the .
) but don't make dummy variables out of subject (the -Subject
) and add a random intercept for the subjects ((1 | Subject)
).
For tuning, I thought that there was an lme4
bug but it was an issue with my code.
You can use the same model formula approach:
library(tidymodels)
#> ── Attaching packages ────────────────────────────────────── tidymodels 0.1.2 ──
#> ✓ broom 0.7.2 ✓ recipes 0.1.15.9000
#> ✓ dials 0.0.9.9000 ✓ rsample 0.0.8
#> ✓ dplyr 1.0.2 ✓ tibble 3.0.4
#> ✓ ggplot2 3.3.2 ✓ tidyr 1.1.2
#> ✓ infer 0.5.3 ✓ tune 0.1.2
#> ✓ modeldata 0.1.0 ✓ workflows 0.2.1
#> ✓ parsnip 0.1.4 ✓ yardstick 0.0.7
#> ✓ purrr 0.3.4
#> ── Conflicts ───────────────────────────────────────── tidymodels_conflicts() ──
#> x purrr::discard() masks scales::discard()
#> x dplyr::filter() masks stats::filter()
#> x dplyr::lag() masks stats::lag()
#> x recipes::step() masks stats::step()
library(multilevelmod) # Install current devel version
data(sleepstudy, package = "lme4")
set.seed(9599)
folds <- group_vfold_cv(sleepstudy, group = Subject)
lme_spec <-
linear_reg() %>%
set_mode("regression") %>%
set_engine("lmer")
lme_wflow_3 <-
workflow() %>%
add_recipe(
recipe(Reaction ~ Days + Subject, data = sleepstudy) %>%
step_ns(Days, deg_free = tune()) %>%
step_novel(Subject)
) %>%
add_model(lme_spec, formula = Reaction ~ . -Subject + (1 | Subject))
out_3 <- tune_grid(lme_wflow_3, resamples = folds, grid = tibble(deg_free = 1:8))
#>
#> Attaching package: 'rlang'
#> The following objects are masked from 'package:purrr':
#>
#> %@%, as_function, flatten, flatten_chr, flatten_dbl, flatten_int,
#> flatten_lgl, flatten_raw, invoke, list_along, modify, prepend,
#> splice
#>
#> Attaching package: 'vctrs'
#> The following object is masked from 'package:tibble':
#>
#> data_frame
#> The following object is masked from 'package:dplyr':
#>
#> data_frame
#> Loading required package: Matrix
#>
#> Attaching package: 'Matrix'
#> The following objects are masked from 'package:tidyr':
#>
#> expand, pack, unpack
collect_metrics(out_3)
#> # A tibble: 16 x 7
#> deg_free .metric .estimator mean n std_err .config
#> <int> <chr> <chr> <dbl> <int> <dbl> <chr>
#> 1 1 rmse standard 43.5 18 5.80 Preprocessor1_Model1
#> 2 1 rsq standard 0.646 18 0.0604 Preprocessor1_Model1
#> 3 2 rmse standard 43.4 18 5.89 Preprocessor2_Model1
#> 4 2 rsq standard 0.651 18 0.0609 Preprocessor2_Model1
#> 5 3 rmse standard 43.6 18 5.89 Preprocessor3_Model1
#> 6 3 rsq standard 0.646 18 0.0614 Preprocessor3_Model1
#> 7 4 rmse standard 43.5 18 5.91 Preprocessor4_Model1
#> 8 4 rsq standard 0.647 18 0.0607 Preprocessor4_Model1
#> 9 5 rmse standard 43.6 18 5.90 Preprocessor5_Model1
#> 10 5 rsq standard 0.644 18 0.0606 Preprocessor5_Model1
#> 11 6 rmse standard 43.6 18 5.91 Preprocessor6_Model1
#> 12 6 rsq standard 0.643 18 0.0610 Preprocessor6_Model1
#> 13 7 rmse standard 43.7 18 5.88 Preprocessor7_Model1
#> 14 7 rsq standard 0.640 18 0.0617 Preprocessor7_Model1
#> 15 8 rmse standard 43.8 18 5.86 Preprocessor8_Model1
#> 16 8 rsq standard 0.634 18 0.0616 Preprocessor8_Model1
Created on 2020-12-18 by the reprex package (v0.3.0)
So let us know if there are any other catches that you can think of or find. We'd like to test more scenarios.