Mutate only if a column exists in a dataframe

My goal is to write a pipeable function which applies a (custom) function to a specified column; when the column is missing the original dataframe is returned (without any error/warning). In this example the floor() function is used as simple example.

library(tidyverse)

floor_if_not_missing <- function(df, col_name){

  col_exists <- df %>%
    dplyr::select(dplyr::any_of(rlang::as_label(rlang::enquo(col_name)))) %>%
    ncol()

  if(col_exists== 0){
    return(df)
  } else {
    df %>%
      mutate({{col_name}} := floor({{col_name}}))
  }
}

mtcars %>% head(5)
#>                    mpg cyl disp  hp drat    wt  qsec vs am gear carb
#> Mazda RX4         21.0   6  160 110 3.90 2.620 16.46  0  1    4    4
#> Mazda RX4 Wag     21.0   6  160 110 3.90 2.875 17.02  0  1    4    4
#> Datsun 710        22.8   4  108  93 3.85 2.320 18.61  1  1    4    1
#> Hornet 4 Drive    21.4   6  258 110 3.08 3.215 19.44  1  0    3    1
#> Hornet Sportabout 18.7   8  360 175 3.15 3.440 17.02  0  0    3    2

mtcars %>% head(5) %>% floor_if_not_missing(mpg)
#>                   mpg cyl disp  hp drat    wt  qsec vs am gear carb
#> Mazda RX4          21   6  160 110 3.90 2.620 16.46  0  1    4    4
#> Mazda RX4 Wag      21   6  160 110 3.90 2.875 17.02  0  1    4    4
#> Datsun 710         22   4  108  93 3.85 2.320 18.61  1  1    4    1
#> Hornet 4 Drive     21   6  258 110 3.08 3.215 19.44  1  0    3    1
#> Hornet Sportabout  18   8  360 175 3.15 3.440 17.02  0  0    3    2

mtcars %>% head(5) %>% floor_if_not_missing(miles_per_gallon)
#>                    mpg cyl disp  hp drat    wt  qsec vs am gear carb
#> Mazda RX4         21.0   6  160 110 3.90 2.620 16.46  0  1    4    4
#> Mazda RX4 Wag     21.0   6  160 110 3.90 2.875 17.02  0  1    4    4
#> Datsun 710        22.8   4  108  93 3.85 2.320 18.61  1  1    4    1
#> Hornet 4 Drive    21.4   6  258 110 3.08 3.215 19.44  1  0    3    1
#> Hornet Sportabout 18.7   8  360 175 3.15 3.440 17.02  0  0    3    2

This solution (and especially the verification part if a column exists) is a bit clumpsy.
Can you suggest any cleaner/tidyer approach?

Here is a solution with across(), it allows the variable name to be quoted or unquoted

library(tidyverse)

floor_if_not_missing <- function(df, col_name) {
  df %>%
    mutate(across({{ col_name }},
                  ~ floor(.)))
}

mtcars %>% head(5)
mtcars %>% head(5) %>% floor_if_not_missing(mpg)
mtcars %>% head(5) %>% floor_if_not_missing("mpg")

The suggested solution is not handling the case when a column is missing.

> mtcars %>% head(5) %>% floor_if_not_missing(miles_per_gallon)
Error: Problem with `mutate()` input `..1`.
x Can't subset columns that don't exist.
x Column `miles_per_gallon` doesn't exist.

my apologies !
I think this is an elegant approach

library(tidyverse)

floor_if_not_missing <- function(df, col_name) {
  tryCatch(
    expr =
      df %>%
        mutate(across(
          {{ col_name }},
          ~ floor(.)
        )),
    error = function(e) df
  )
}

head(mtcars) %>% floor_if_not_missing(mpgx)
head(mtcars) %>% floor_if_not_missing(mpg)
1 Like

You can also use the tidyselect, it works even if the variable doesn't exist, without the need to create a new function :

mtcars %>% head(5) %>% mutate(across(matches(c("mpgcd", "mpg", "wtw")), floor))
mtcars %>% head(5) %>% mutate(across(matches("miles_per_galon"), floor))
1 Like

The suggested solution is working general, but doesn't solve one of the goals: to be able to select dynamically a column, because the matches() expects a character vector.

Continuing on your idea, the following can be a solution:

library(tidyverse)

floor_if_not_missing <- function(df, col_name){

  col_name <- rlang::as_label(rlang::enquo(col_name))

  df %>% mutate(across(matches(col_name), floor))
}

mtcars %>% head(3) %>% floor_if_not_missing(mpg)
#>               mpg cyl disp  hp drat    wt  qsec vs am gear carb
#> Mazda RX4      21   6  160 110 3.90 2.620 16.46  0  1    4    4
#> Mazda RX4 Wag  21   6  160 110 3.90 2.875 17.02  0  1    4    4
#> Datsun 710     22   4  108  93 3.85 2.320 18.61  1  1    4    1

mtcars %>% head(3) %>% floor_if_not_missing(miles_per_gallon)
#>                mpg cyl disp  hp drat    wt  qsec vs am gear carb
#> Mazda RX4     21.0   6  160 110 3.90 2.620 16.46  0  1    4    4
#> Mazda RX4 Wag 21.0   6  160 110 3.90 2.875 17.02  0  1    4    4
#> Datsun 710    22.8   4  108  93 3.85 2.320 18.61  1  1    4    1

Any other improvement (simplification) idea?

I'd use any_of() instead of matches() as matches() looks for a regular expression. So if someone had a column name that was a regular expression you might get unexpected results. E.g.

library(tidyverse)

floor_if_not_missing_matches <- function(df, col_name){
  
  col_name <- rlang::as_label(rlang::enquo(col_name))
  
  df %>% mutate(across(matches(col_name), floor))
}

floor_if_not_missing_any_of <- function(df, col_name){
  
  col_name <- rlang::as_label(rlang::enquo(col_name))
  
  df %>% mutate(across(any_of(col_name), floor))
}

# this floors both mpg and m.
mtcars %>%
  select(mpg) %>%
  mutate(m. = mpg) %>%
  head(3) %>%
  floor_if_not_missing_matches(m.)
#>               mpg m.
#> Mazda RX4      21 21
#> Mazda RX4 Wag  21 21
#> Datsun 710     22 22

# this only floors m.
mtcars %>%
  select(mpg) %>%
  mutate(m. = mpg) %>%
  head(3) %>%
  floor_if_not_missing_any_of(m.)
#>                mpg m.
#> Mazda RX4     21.0 21
#> Mazda RX4 Wag 21.0 21
#> Datsun 710    22.8 22

# and to confirm original behaviour still works
mtcars %>%
  select(mpg) %>%
  mutate(m. = mpg) %>%
  head(3) %>%
  floor_if_not_missing_any_of(h.)
#>                mpg   m.
#> Mazda RX4     21.0 21.0
#> Mazda RX4 Wag 21.0 21.0
#> Datsun 710    22.8 22.8

Created on 2021-04-01 by the reprex package (v0.3.0)

2 Likes

This topic was automatically closed 7 days after the last reply. New replies are no longer allowed.

If you have a query related to it or one of the replies, start a new topic and refer back with a link.