help maniupating and working with a workflow_set object

I have a use case where I run a tune-models.R script as an RStudio Job overnight, which at the end writes the results to a folder on my laptop. The script structure is something like:

prep data -> create recipe -> create workflowset -> tune using workflow_map() -> package the results together with the splits -> use save.rds to save the results

by "package the results together with the splits ", i mean create a nested dataframe to save, like:

tune_results <- workflow_map(...)

workflowset_experiment <-
  tibble(workflows = list(tune_results),
         splits = list(splits),
         date_created = Sys.time()
)

saveRDS(workflowset_experiment, path)

I do this so i can have all i need to compare, evaluate and finalize the best model sometime later on in a new R session.

Since I usually only want to compare the best types of models to each other (i.e. plot the accuracy of best XGB, the best RF, the best KNN...), there are a lot of model/preprocessor combos in tune_results that i dont need. I have been trying to figure out how to remove these, while still keeping it a workflow_set object since all the nice helper functions only work with a workflow_set object.

my approach so far has been to search for an existing way to do this, but i haven't found a way so now I am trying to hack something together with no luck. Its very likley I am missing something that already does this, and/or taking the wrong approach to this, but I have a reprex below.

Is there tidymodels functionality that already does this? does anyone have advice on if this is posible and how to approach doing something like this? thanks!

reprex:

library(tidymodels)
library(stringr)
library(tidyverse)

data(parabolic)

# prep 
set.seed(1)
split <- initial_split(parabolic)
train_set <- training(split)
test_set <- testing(split)

set.seed(2)
train_resamples <- vfold_cv(train_set, v = 10)

logistic_reg_spec <- 
  logistic_reg(penalty = tune(),
               mixture = tune()) %>% 
  set_engine("glmnet")

rec <-
  recipe(class ~ ., data = train_set)

rec_norm <-
  recipe(class ~ ., data = train_set) %>%
  step_normalize(all_numeric_predictors())

workflow <- 
  workflow_set(
    preproc = list(rec = rec, rec_norm = rec_norm),
    models = list(lm = logistic_reg_spec), 
    cross = TRUE
  )

grid_ctrl <-
  control_grid(
    save_pred = TRUE,
    parallel_over = "everything",
    save_workflow = TRUE
  )

tune_results <-
  workflow %>%
  workflow_map(
    seed = 3566,
    verbose = FALSE,
    resamples = train_resamples,
    control = grid_ctrl,
    fn = "tune_grid", 
    grid = 10,
    metrics = yardstick::metric_set(accuracy)
  )

tune_results
#> # A workflow set/tibble: 2 × 4
#>   wflow_id    info             option    result   
#>   <chr>       <list>           <list>    <list>   
#> 1 rec_lm      <tibble [1 × 4]> <opts[4]> <tune[+]>
#> 2 rec_norm_lm <tibble [1 × 4]> <opts[4]> <tune[+]>

# these helper functions all work with the output of workflow_map():
tune_results %>% collect_metrics() # 20 rows (2 models * 10 folds * 1 metric):
#> # A tibble: 20 × 9
#>    wflow_id    .config      preproc model .metric .estimator  mean     n std_err
#>    <chr>       <chr>        <chr>   <chr> <chr>   <chr>      <dbl> <int>   <dbl>
#>  1 rec_lm      Preprocesso… recipe  logi… accura… binary     0.731    10  0.0188
#>  2 rec_lm      Preprocesso… recipe  logi… accura… binary     0.736    10  0.0153
#>  3 rec_lm      Preprocesso… recipe  logi… accura… binary     0.440    10  0.0211
#>  4 rec_lm      Preprocesso… recipe  logi… accura… binary     0.736    10  0.0153
#>  5 rec_lm      Preprocesso… recipe  logi… accura… binary     0.736    10  0.0153
#>  6 rec_lm      Preprocesso… recipe  logi… accura… binary     0.736    10  0.0153
#>  7 rec_lm      Preprocesso… recipe  logi… accura… binary     0.736    10  0.0153
#>  8 rec_lm      Preprocesso… recipe  logi… accura… binary     0.736    10  0.0153
#>  9 rec_lm      Preprocesso… recipe  logi… accura… binary     0.736    10  0.0171
#> 10 rec_lm      Preprocesso… recipe  logi… accura… binary     0.736    10  0.0153
#> 11 rec_norm_lm Preprocesso… recipe  logi… accura… binary     0.731    10  0.0188
#> 12 rec_norm_lm Preprocesso… recipe  logi… accura… binary     0.736    10  0.0153
#> 13 rec_norm_lm Preprocesso… recipe  logi… accura… binary     0.440    10  0.0211
#> 14 rec_norm_lm Preprocesso… recipe  logi… accura… binary     0.736    10  0.0153
#> 15 rec_norm_lm Preprocesso… recipe  logi… accura… binary     0.736    10  0.0153
#> 16 rec_norm_lm Preprocesso… recipe  logi… accura… binary     0.736    10  0.0153
#> 17 rec_norm_lm Preprocesso… recipe  logi… accura… binary     0.736    10  0.0153
#> 18 rec_norm_lm Preprocesso… recipe  logi… accura… binary     0.736    10  0.0153
#> 19 rec_norm_lm Preprocesso… recipe  logi… accura… binary     0.736    10  0.0171
#> 20 rec_norm_lm Preprocesso… recipe  logi… accura… binary     0.736    10  0.0153


tune_results %>% rank_results(rank_metric = "accuracy", select_best = TRUE)
#> # A tibble: 2 × 9
#>   wflow_id    .config       .metric  mean std_err     n preprocessor model  rank
#>   <chr>       <chr>         <chr>   <dbl>   <dbl> <int> <chr>        <chr> <int>
#> 1 rec_lm      Preprocessor… accura… 0.736  0.0171    10 recipe       logi…     1
#> 2 rec_norm_lm Preprocessor… accura… 0.736  0.0171    10 recipe       logi…     2


# trying to keep only the best instance of each model: 
# use `rank_results()` to identify the best performing preprocesser/model combo 
top_performers <- 
  tune_results %>% 
  rank_results(rank_metric = "accuracy", select_best = TRUE) %>% 
  select(wflow_id, .config) %>% 
  distinct() 

# unnest tune_results, filter out 18 worst performers, put back together
best_models <- 
  tune_results %>% 
  unnest(result) %>%
  unnest(.metrics) %>%
  inner_join(., top_performers, by = c("wflow_id", ".config")) %>% 
  nest(metrics = id:.config) %>%
  nest(result = splits:metrics)

#looks good?
best_models
#> # A tibble: 2 × 4
#>   wflow_id    info             option    result           
#>   <chr>       <list>           <list>    <list>           
#> 1 rec_lm      <tibble [1 × 4]> <opts[4]> <tibble [10 × 4]>
#> 2 rec_norm_lm <tibble [1 × 4]> <opts[4]> <tibble [10 × 4]>

# but these helper functions no longer work
best_models %>% collect_metrics() 
#> Error in `collect_metrics()`:
#> ! No `collect_metric()` exists for this type of object.
#> Backtrace:
#>     ▆
#>  1. ├─best_models %>% collect_metrics()
#>  2. ├─tune::collect_metrics(.)
#>  3. └─tune:::collect_metrics.default(.)
#>  4.   └─rlang::abort("No `collect_metric()` exists for this type of object.")


best_models %>% rank_results(rank_metric = "accuracy", select_best = TRUE)
#> Error in `collect_metrics()`:
#> ! No `collect_metric()` exists for this type of object.
#> Backtrace:
#>     ▆
#>  1. ├─best_models %>% ...
#>  2. └─workflowsets::rank_results(., rank_metric = "accuracy", select_best = TRUE)
#>  3.   └─workflowsets:::pick_metric(x, rank_metric)
#>  4.     ├─tune::collect_metrics(x)
#>  5.     └─tune:::collect_metrics.default(x)
#>  6.       └─rlang::abort("No `collect_metric()` exists for this type of object.")

# best_models is a regular old tibble instead of a workflowset object 
tune_results
#> # A workflow set/tibble: 2 × 4
#>   wflow_id    info             option    result   
#>   <chr>       <list>           <list>    <list>   
#> 1 rec_lm      <tibble [1 × 4]> <opts[4]> <tune[+]>
#> 2 rec_norm_lm <tibble [1 × 4]> <opts[4]> <tune[+]>

best_models
#> # A tibble: 2 × 4
#>   wflow_id    info             option    result           
#>   <chr>       <list>           <list>    <list>           
#> 1 rec_lm      <tibble [1 × 4]> <opts[4]> <tibble [10 × 4]>
#> 2 rec_norm_lm <tibble [1 × 4]> <opts[4]> <tibble [10 × 4]>
Created on 2023-11-17 with reprex v2.0.2