Try this
set.seed(123)
x1 <- rbinom(100, 1, runif(1))
x2 <- rbinom(100, 1, runif(1))
y <- rbinom(100, 1, runif(1))
# sprinkle some NAs
my_df <- data.frame(y, x1, x2)
my_df[c("x1", "x2")] <-
lapply(my_df[c("x1", "x2")], function(x) {
x[sample(seq_along(x), 0.25 * length(x))] <- NA
x
})
head(my_df)
#> y x1 x2
#> 1 1 1 0
#> 2 1 0 NA
#> 3 0 1 0
#> 4 1 NA 1
#> 5 1 NA 1
#> 6 1 NA NA
library(tidymodels)
#> Registered S3 method overwritten by 'tune':
#> method from
#> required_pkgs.model_spec parsnip
imp <- recipe(my_df, y ~ .) %>%
step_num2factor(all_numeric_predictors(),
transform = function(x) x + 1,
levels = c("0", "1")) %>%
step_impute_mode(all_nominal_predictors()) %>%
step_mutate_at(starts_with("x"), fn = ~ as.numeric(.) - 1)
imp %>% prep() %>% bake(new_data = NULL)
#> # A tibble: 100 x 3
#> x1 x2 y
#> <dbl> <dbl> <int>
#> 1 1 0 1
#> 2 0 0 1
#> 3 1 0 0
#> 4 0 1 1
#> 5 0 1 1
#> 6 0 0 1
#> 7 0 0 1
#> 8 0 0 1
#> 9 0 1 1
#> 10 1 0 1
#> # ... with 90 more rows
Created on 2021-12-24 by the reprex package (v2.0.1)