I'm confused about when (if at all) I need to use prep()
, bake()
, and juice()
. The case study currently on the main tidymodels page does not use them, but these functions appear in older tidymodels tutorials you can find online.
I'm following the tidymodels case study (condensed here), and it does not use these functions.
splits <- initial_split(hotels, strata = children)
hotel_other <- training(splits)
hotel_test <- testing(splits)
val_set <- validation_split(hotel_other,
strata = children,
prop = 0.80)
cores <- parallel::detectCores()
rf_mod <-
rand_forest(mtry = tune(), min_n = tune(), trees = 1000) %>%
set_engine("ranger", num.threads = cores) %>%
set_mode("classification")
rf_recipe <-
recipe(children ~ ., data = hotel_other) %>%
step_date(arrival_date) %>%
step_holiday(arrival_date) %>%
step_rm(arrival_date)
rf_workflow <-
workflow() %>%
add_model(rf_mod) %>%
add_recipe(rf_recipe)
set.seed(345)
rf_res <-
rf_workflow %>%
tune_grid(val_set,
grid = 25,
control = control_grid(save_pred = TRUE),
metrics = metric_set(roc_auc))
rf_best <-
rf_res %>%
select_best(metric = "roc_auc")
last_rf_mod <-
rand_forest(mtry = 8, min_n = 7, trees = 1000) %>%
set_engine("ranger", num.threads = cores, importance = "impurity") %>%
set_mode("classification")
last_rf_workflow <-
rf_workflow %>%
update_model(last_rf_mod)
last_rf_fit <-
last_rf_workflow %>%
last_fit(splits)