Hi,
I have been looking at the finetune
package to try out the tune_race_anova
based on the presentation given at rstudio::global 2021.
I am trying to get it to work with a work flow but am confusing myself.
Below is some pseudocode.
I'm getting stuck on the portion around tune_res_rf
and where to actually integrate the grid
Does anyone have any ideas?
set.seed(4595)
data_split <- initial_split(explore_data, strata = "tgt", prop = 0.75)
train_explore <- training(data_split)
test_explore <- testing(data_split)
# Generate resamples and repeat
report_resamples <- vfold_cv(train_explore, v = 10, repeats = 1, strata = tgt)
# Set up the model definition
preprocess <- train_explore %>%
recipe(tgt ~ .) %>%
themis::step_downsample(tgt)
# BUILD A RANDOM FOREST MODEL ---------------------------------------------
rf_mod <- rand_forest(
mtry = tune(),
trees = tune(),
min_n = tune()) %>%
set_mode("classification") %>%
set_engine("ranger")
rf_grid <- dials::parameters(
finalize(mtry(), select(explore_data, -tgt)),
trees(),
min_n())
tune_wf <- workflow() %>%
add_recipe(preprocess) %>%
add_model(rf_mod)
# Tune the models
library(doParallel)
library(finetune)
no_cores <- detectCores() - 1
registerDoParallel(cores=no_cores)
# This is the bit where i get stuck
set.seed(345)
tune_res_rf <- tune_race_anova(tune_wf,
resamples = report_resamples,
grid = rf_grid,
perf = metric_set(roc_auc, sens, spec, kap, accuracy)
)
doParallel::stopImplicitCluster()
Thank you for your time