Rule Fit Using Tidy Models Error

Hi,

I am trying out the model rule fit and keep getting the error

Error: At least one parameter does not match any id's in the set: 'mtry'
Run rlang::last_error() to see where the error occurred.

I was wondering if anyone could see where I am going wrong?

library(tidymodels)
library(tidyverse)
library(janitor)

mydf <- iris %>% 
  clean_names() %>% 
  mutate(flower_tgt = case_when(species == 'setosa' ~ 'Y',
                                TRUE ~ 'N')) %>% 
  select(-species)

# Set up the recipe and upsample based on the label
set_rec <- recipe(flower_tgt ~ ., data = mydf) %>% 
  themis::step_upsample(flower_tgt)

# Just check the recipe does what i think
set_rec %>% prep() %>% juice() %>% glimpse()

xrf_mod <- rule_fit(mtry = tune(),
                    trees = tune(),
                    min_n = tune(),
                    tree_depth = tune(),
                    learn_rate = tune(),
                    loss_reduction = tune(),
                    sample_size = tune(),
                    penalty = tune()) %>%
  set_engine("xrf") %>%
  set_mode("classification")


xrf_wf <- workflow() %>%
  add_model(xrf_mod) %>%
  add_recipe(set_rec, blueprint = hardhat::default_recipe_blueprint(allow_novel_levels = TRUE))

flwr_split <- initial_time_split(mydf)
train_data <- training(flwr_split)
test_data <- testing(flwr_split)

# We set up the cross validation of time slices and 
mset <- metric_set(recall, precision, f_meas, j_index)

grid_control <- control_grid(save_workflow = TRUE,
                             save_pred = TRUE,
                             extract = extract_model)

folds <- rsample::vfold_cv(train_data, v = 10, 
                           strata = flower_tgt)

xrf_grid <- xrf_wf %>% 
  parameters() %>% 
  update(mtry = mtry(range = c(2L, 3L))) %>% 
  grid_random(size = 2)

# ERROR HERE

Thanks

Thanks for the post and reproducible example, @john.smith!

You're running into this issue because you haven't loaded the rules package, which implements support for your rule_fit() model specification. Loading that and running your code as is (I siwtched out the deprecated parameters() with extract_parameter_set_dials()) will do the trick:

library(tidymodels)
library(tidyverse)
library(janitor)
#> 
#> Attaching package: 'janitor'
#> The following objects are masked from 'package:stats':
#> 
#>     chisq.test, fisher.test
library(rules)
#> 
#> Attaching package: 'rules'
#> The following object is masked from 'package:dials':
#> 
#>     max_rules

mydf <- iris %>% 
  clean_names() %>% 
  mutate(flower_tgt = case_when(species == 'setosa' ~ 'Y',
                                TRUE ~ 'N')) %>% 
  select(-species)

# Set up the recipe and upsample based on the label
set_rec <- recipe(flower_tgt ~ ., data = mydf) %>% 
  themis::step_upsample(flower_tgt)

# Just check the recipe does what i think
set_rec %>% prep() %>% juice() %>% glimpse()
#> Rows: 200
#> Columns: 5
#> $ sepal_length <dbl> 7.0, 6.4, 6.9, 5.5, 6.5, 5.7, 6.3, 4.9, 6.6, 5.2, 5.0, 5.…
#> $ sepal_width  <dbl> 3.2, 3.2, 3.1, 2.3, 2.8, 2.8, 3.3, 2.4, 2.9, 2.7, 2.0, 3.…
#> $ petal_length <dbl> 4.7, 4.5, 4.9, 4.0, 4.6, 4.5, 4.7, 3.3, 4.6, 3.9, 3.5, 4.…
#> $ petal_width  <dbl> 1.4, 1.5, 1.5, 1.3, 1.5, 1.3, 1.6, 1.0, 1.3, 1.4, 1.0, 1.…
#> $ flower_tgt   <fct> N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, …

xrf_mod <- rule_fit(mtry = tune(),
                    trees = tune(),
                    min_n = tune(),
                    tree_depth = tune(),
                    learn_rate = tune(),
                    loss_reduction = tune(),
                    sample_size = tune(),
                    penalty = tune()) %>%
  set_engine("xrf") %>%
  set_mode("classification")


xrf_wf <- workflow() %>%
  add_model(xrf_mod) %>%
  add_recipe(set_rec, blueprint = hardhat::default_recipe_blueprint(allow_novel_levels = TRUE))

flwr_split <- initial_time_split(mydf)
train_data <- training(flwr_split)
test_data <- testing(flwr_split)

# We set up the cross validation of time slices and 
mset <- metric_set(recall, precision, f_meas, j_index)

grid_control <- control_grid(save_workflow = TRUE,
                             save_pred = TRUE,
                             extract = extract_model)

folds <- rsample::vfold_cv(train_data, v = 10, 
                           strata = flower_tgt)

xrf_grid <- xrf_wf %>% 
  extract_parameter_set_dials() %>% 
  update(mtry = mtry(range = c(2L, 3L))) %>% 
  grid_random(size = 2)

xrf_grid
#> # A tibble: 2 × 8
#>    mtry trees min_n tree_depth  learn_rate loss_reduction sample_size    penalty
#>   <int> <int> <int>      <int>       <dbl>          <dbl>       <dbl>      <dbl>
#> 1     2    14    39          6 0.000000534        0.00709       0.857    6.52e-1
#> 2     3    24    11          7 0.0000795          0.531         0.633    2.21e-9

Created on 2022-08-29 by the reprex package (v2.0.1)

We're currently working on better machinery to warn when folks specify a model that requires a parsnip extension package to be loaded. Hopefully this will help you in troubleshooting errors like this in the future. :slight_smile:

1 Like

Hi @simoncouch

Thanks very much. I tried this portion and the grid comes out as expected but when i actually try and tune it, it crashes. I have not see any blog posts that demonstrate how to use the algorithm but its such a clever idea, i can think of half a dozen uses within our organization :slight_smile: .

I am not sure if i should open a new question/change the dataset on this but I updated the code to sample 8000 rows with replacement and then tried to do the tuning. Below is a cut down version of the code. This error might actually be from rulefit package so if you don't know off the top of your head please don't invest too much time in it.

unique notes:--------------------------------------------------------------------------------------
Error in rules::xrf_fit(object = object, data = data, colsample_bytree = ~2L, : argument "formula" is missing, with no default --------------------------------------------------------------------------------------
Error in rules::xrf_fit(object = object, data = data, colsample_bytree = ~3L, : argument "formula" is missing, with no default


library(tidymodels)
library(tidymodels)
library(tidyverse)
library(janitor)
library(rules)

mydf <- iris %>% 
  clean_names() %>% 
  mutate(flower_tgt = case_when(species == 'setosa' ~ 'Y', TRUE ~ 'N')) %>% 
  select(-species) %>% 
  dplyr::sample_n(8000, replace = TRUE) # requires a bigger dataset

# SET UP DATA SPLIT -------------------------------------------------------
# Set up the recipe and upsample based on the label
set_rec <- recipe(flower_tgt ~ ., data = mydf) %>% 
  themis::step_upsample(flower_tgt)

xrf_mod <- rule_fit(mtry = tune(),
                    trees = tune(),
                    min_n = tune(),
                    tree_depth = tune(),
                    learn_rate = tune(),
                    loss_reduction = tune(),
                    sample_size = tune(),
                    penalty = tune()) %>%
  set_engine("xrf") %>%
  set_mode("classification")

xrf_wf <- workflow() %>%
  add_model(xrf_mod) %>%
  add_recipe(set_rec, blueprint = hardhat::default_recipe_blueprint(allow_novel_levels = TRUE))

flwr_split <- initial_time_split(mydf)
train_data <- training(flwr_split)
test_data <- testing(flwr_split)

folds <- rsample::vfold_cv(train_data, v = 10, 
                           strata = flower_tgt)

# SET UP METRICS AND CONTROL GRID -----------------------------------------
mset <- metric_set(recall, precision, f_meas, j_index)

grid_control <- control_grid(save_workflow = TRUE,
                             save_pred = TRUE,
                             extract = extract_model)

xrf_grid <- xrf_wf %>% 
  extract_parameter_set_dials() %>% 
  update(mtry = mtry(range = c(2L, 3L))) %>% 
  grid_random(size = 2)

# TUNE MODEL --------------------------------------------------------------
my_res <- xrf_wf %>% 
  tune_grid(resamples = folds,
            grid = xrf_grid,
            control = grid_control,
            metrics = metric_set(j_index))

Thank you very much for your time

This topic was automatically closed 7 days after the last reply. New replies are no longer allowed.

If you have a query related to it or one of the replies, start a new topic and refer back with a link.