event_level option for metrics in fails when using set_metrics

Hi,
Im trying to model a binary classification problem where I need the f_measure (with beta = 0.5) as metric. However in my problem I need to use the second label as the relevant class. This can be done by specifying the event_level option in the f_meas function. However when using the metric_set function to set the metrics ( I need cv in my real case) I get the following Error:

"formal argument "event_level" matched by multiple actual arguments"

However, if I leave out the event_level option, the code runs through, but of course the metric is now on the first class. I know that i could change the levels, but I need the metric for both classes.

Here is my reprex, what is wrong?

library(tidyverse)
library(tidymodels)
library(DALEX)
set.seed(123)

data("titanic_imputed", package = "DALEX")

f_meas_first <- function(data,
                        truth,
                        estimate,
                        na_rm = TRUE,
                        ...) {
  f_meas(
    data,
    truth = !! rlang::enquo(truth),
    estimate = !! rlang::enquo(estimate),
    beta = 0.5,
    na_rm = na_rm,
    #event_level = "second",
    ...
  )
  
  
}

f_meas_first <- new_class_metric(f_meas_first, "maximize")
metrics_first  <-  metric_set(f_meas_first)


f_meas_second <- function(data,
                          truth,
                          estimate,
                          na_rm = TRUE,
                          ...) {
  f_meas(
    data,
    truth = !! rlang::enquo(truth),
    estimate = !! rlang::enquo(estimate),
    beta = 0.5,
    na_rm = na_rm,
    event_level = "second",
    ...
  )
  
  
}

f_meas_second <- new_class_metric(f_meas_second, "maximize")
metrics_second  <-  metric_set(f_meas_second)


data <- titanic_imputed %>%
  as_tibble() %>%
  mutate(label = as.factor(survived)) %>% select(-survived)
#%>% select(age,fare,sibsp,parch,survivedlabel)

model.glm <-
  logistic_reg() %>%
  set_engine("glm")

workflow.glm <-
  workflow() %>%
  add_formula(label ~ .) %>%
  add_model(model.glm)

fit.glm <-
  workflow.glm %>%
  fit(data = data)

prediction <- bind_cols(
  data %>% select(label),
  predict(fit.glm, new_data = data, type = "class"))


metrics_first(prediction, truth = label, estimate =.pred_class )
f_meas_first(prediction, truth = label, estimate = .pred_class )

metrics_second(prediction, truth = label, estimate =.pred_class )
f_meas_second(prediction, truth = label, estimate = .pred_class )

I should probably document this better, but the "correct" way to wrap existing metrics with a tweaked default is to first wrap the lower level _vec() version of it, and then create a new metric that calls metric_summarizer() and uses the new _vec() wrapper you created. This lets you set the metric_nm correctly, which will be required if you want to tune with this custom metric using the {tune} package.

The other thing that you should do is to let event_level be an argument to your new metric function. When you wrap it in metric_set(), the returned function that metric_set() generates has an event_level argument that it will try to pass on to the wrapped metric function. This is why you got the error about multiple actual arguments.

suppressPackageStartupMessages({
  library(tidyverse)
  library(tidymodels)
  library(DALEX)
})

set.seed(123)

data("titanic_imputed", package = "DALEX")

f_meas_beta_vec <- function(truth,
                            estimate,
                            estimator = NULL,
                            na_rm = TRUE,
                            event_level = "first",
                            ...) {
  f_meas_vec(
    truth = truth, 
    estimate = estimate, 
    beta = 0.5, 
    estimator = estimator,
    na_rm = na_rm,
    event_level = event_level,
    ...
  )
}

f_meas_beta <- function(data,
                        truth,
                        estimate,
                        estimator = NULL,
                        na_rm = TRUE,
                        event_level = "first",
                        ...) {
  metric_summarizer(
    metric_nm = "f_meas_beta", 
    metric_fn = f_meas_beta_vec, 
    data = data, 
    truth = !!enquo(truth), 
    estimate = !!enquo(estimate), 
    estimator = estimator, 
    na_rm = na_rm, 
    event_level = event_level, 
    ...
  )
}

f_meas_beta <- new_class_metric(f_meas_beta, "maximize")
metrics_beta  <-  metric_set(f_meas_beta)

data <- titanic_imputed %>%
  as_tibble() %>%
  mutate(label = as.factor(survived)) %>%
  select(-survived)

model.glm <-
  logistic_reg() %>%
  set_engine("glm")

workflow.glm <-
  workflow() %>%
  add_formula(label ~ .) %>%
  add_model(model.glm)

fit.glm <-
  workflow.glm %>%
  fit(data = data)

prediction <- bind_cols(
  data %>% select(label),
  predict(fit.glm, new_data = data, type = "class"))

f_meas_beta(prediction, truth = label, estimate = .pred_class, event_level = "first")
#> # A tibble: 1 x 3
#>   .metric     .estimator .estimate
#>   <chr>       <chr>          <dbl>
#> 1 f_meas_beta binary         0.834
metrics_beta(prediction, truth = label, estimate = .pred_class, event_level = "first")
#> # A tibble: 1 x 3
#>   .metric     .estimator .estimate
#>   <chr>       <chr>          <dbl>
#> 1 f_meas_beta binary         0.834

f_meas_beta(prediction, truth = label, estimate = .pred_class, event_level = "second")
#> # A tibble: 1 x 3
#>   .metric     .estimator .estimate
#>   <chr>       <chr>          <dbl>
#> 1 f_meas_beta binary         0.705
metrics_beta(prediction, truth = label, estimate = .pred_class, event_level = "second")
#> # A tibble: 1 x 3
#>   .metric     .estimator .estimate
#>   <chr>       <chr>          <dbl>
#> 1 f_meas_beta binary         0.705

Created on 2020-10-16 by the reprex package (v0.3.0.9001)

This topic was automatically closed 7 days after the last reply. New replies are no longer allowed.

If you have a query related to it or one of the replies, start a new topic and refer back with a link.