You can do this using the embed package. You definitely should not do it prior to resampling; use on the the step_lencode_*() functions do it for you (see example below).
There is a vignette on these methods too.
library(tidymodels)
library(embed)
set.seed(1)
dat <-
data.frame(
col_a = sample(letters, size = 10000, replace = TRUE),
col_b = sample(letters, size = 10000, replace = TRUE),
col_c = rnorm(10000),
y = factor(sample(LETTERS[1:2], 1000, replace = TRUE))
) %>%
mutate(concat = paste(col_a, col_b, sep = "-"))
rec <-
recipe(y ~ concat + col_c, data = dat) %>%
# See functions named step_lencode_* in the embed package
step_lencode_mixed(concat, outcome = vars(y))
lr_spec <- logistic_reg()
set.seed(2)
resamples <- vfold_cv(dat)
lr_res <-
lr_spec %>%
fit_resamples(rec, resamples = resamples)
collect_metrics(lr_res)
#> # A tibble: 2 × 6
#> .metric .estimator mean n std_err .config
#> <chr> <chr> <dbl> <int> <dbl> <chr>
#> 1 accuracy binary 0.516 10 0.00479 Preprocessor1_Model1
#> 2 roc_auc binary 0.517 10 0.00636 Preprocessor1_Model1
Created on 2022-01-03 by the reprex package (v2.0.0)