When building a recipe, how to replace missing values by imputing the mode in numeric variables that contain either 0 or 1?

Try this

set.seed(123)
x1 <- rbinom(100, 1, runif(1))
x2 <- rbinom(100, 1, runif(1))
y  <- rbinom(100, 1, runif(1))

# sprinkle some NAs
my_df <- data.frame(y, x1, x2)
my_df[c("x1", "x2")] <-
    lapply(my_df[c("x1", "x2")], function(x) {
        x[sample(seq_along(x), 0.25 * length(x))] <- NA
        x
    })

head(my_df)
#>   y x1 x2
#> 1 1  1  0
#> 2 1  0 NA
#> 3 0  1  0
#> 4 1 NA  1
#> 5 1 NA  1
#> 6 1 NA NA

library(tidymodels)
#> Registered S3 method overwritten by 'tune':
#>   method                   from   
#>   required_pkgs.model_spec parsnip

imp <- recipe(my_df, y ~ .) %>%
    step_num2factor(all_numeric_predictors(),
                    transform = function(x) x + 1,
                    levels = c("0", "1")) %>%
    step_impute_mode(all_nominal_predictors()) %>%
    step_mutate_at(starts_with("x"), fn = ~ as.numeric(.) - 1)

imp %>% prep() %>% bake(new_data = NULL)
#> # A tibble: 100 x 3
#>       x1    x2     y
#>    <dbl> <dbl> <int>
#>  1     1     0     1
#>  2     0     0     1
#>  3     1     0     0
#>  4     0     1     1
#>  5     0     1     1
#>  6     0     0     1
#>  7     0     0     1
#>  8     0     0     1
#>  9     0     1     1
#> 10     1     0     1
#> # ... with 90 more rows

Created on 2021-12-24 by the reprex package (v2.0.1)

1 Like