How to mutate_at/mutate_if multiple columns using condition on other column outside .vars ? [dplyr]

Is there a single-call way to assign several specific columns to a value using dplyr, based on a condition from a column outside that group of columns?

My issue is that mutate_if checks for conditions on the specific columns themselves, and mutate_at seems to limit all references to just those same specific columns. Whereas I want to mutate based on a corresponding value in a column outside the ones I specify with .vars.

library(dplyr)

df <- structure(list(`1997` = c(0, 0, 1, 0, 0, 0, 0, 0, 0, 0), 
               `1998` = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), 
               `1999` = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), 
               `2000` = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), 
               allyrs = c(FALSE, FALSE, TRUE, FALSE, 
                             FALSE, TRUE, FALSE, TRUE, TRUE, FALSE)), 
          class = c("tbl_df", "tbl", "data.frame"), 
          row.names = c(NA, -10L))

# What is the condensed, multi-column mutate, dplyr equivalent to 
df[df$allyrs == T,]$`1997` <- 1
df[df$allyrs == T,]$`1998` <- 1
df[df$allyrs == T,]$`1999` <- 1
df[df$allyrs == T,]$`2000` <- 1

# In theory I just want to use mutate_if or mutate_at,
# but dplyr::mutate_if() checks conditions on the column itself, not a neighboring column,
# like checking if a column is a numeric or character'

# In theory I would use mutate_at(), but specifying which columns I want to modify makes it such
# that dplyr can no longer find the column I want to check conditions on
df %>% mutate(`1997` = ifelse(allyrs == T, 1, `1997`))
#> # A tibble: 10 x 5
#>    `1997` `1998` `1999` `2000` allyrs
#>     <dbl>  <dbl>  <dbl>  <dbl> <lgl> 
#>  1      0      0      0      0 FALSE 
#>  2      0      0      0      0 FALSE 
#>  3      1      0      0      0 TRUE  
#>  4      0      0      0      0 FALSE 
#>  5      0      0      0      0 FALSE 
#>  6      1      0      0      0 TRUE  
#> ....
  # repeat for all other years....
df %>% mutate_at(paste(1997:2000), function(x) 1) 
#> # A tibble: 10 x 5
#>    `1997` `1998` `1999` `2000` allyrs
#>     <dbl>  <dbl>  <dbl>  <dbl> <lgl> 
#>  1      1      1      1      1 FALSE 
#>  2      1      1      1      1 FALSE 
#> ....
  # mutate_at works insofar as you can select multiple columns but...
df %>% mutate_at(paste(1997:2000), function(x) ifelse(allyrs == T, 1, 0))
#> Error in ifelse(allyrs == T, 1, 0): object 'allyrs' not found
  # its usage constrains available variables to check
  #> Error in ifelse(allyrs == T, 1, 0) : object 'allyrs' not found 

# Is there just not a way to do this in one call using dplyr?

Hi, @shib, I think you are approaching. Just specify the data for this variable allyrs.
For here, in lazy way, I use .$allyrs but you can also use df$allyrs, I show it in the fold.

library(dplyr)
#> 
#> 载入程辑包:'dplyr'
#> The following objects are masked from 'package:stats':
#> 
#>     filter, lag
#> The following objects are masked from 'package:base':
#> 
#>     intersect, setdiff, setequal, union

df <- structure(list(`1997` = c(0, 0, 1, 0, 0, 0, 0, 0, 0, 0), 
               `1998` = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), 
               `1999` = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), 
               `2000` = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), 
               allyrs = c(FALSE, FALSE, TRUE, FALSE, 
                             FALSE, TRUE, FALSE, TRUE, TRUE, FALSE)), 
          class = c("tbl_df", "tbl", "data.frame"), 
          row.names = c(NA, -10L))
df %>% 
    mutate_at(
        paste(1997:2000)
        ,function(x) ifelse(.$allyrs == T, 1, 0)
        )
#> # A tibble: 10 x 5
#>    `1997` `1998` `1999` `2000` allyrs
#>     <dbl>  <dbl>  <dbl>  <dbl> <lgl> 
#>  1      0      0      0      0 FALSE 
#>  2      0      0      0      0 FALSE 
#>  3      1      1      1      1 TRUE  
#>  4      0      0      0      0 FALSE 
#>  5      0      0      0      0 FALSE 
#>  6      1      1      1      1 TRUE  
#>  7      0      0      0      0 FALSE 
#>  8      1      1      1      1 TRUE  
#>  9      1      1      1      1 TRUE  
#> 10      0      0      0      0 FALSE

Created on 2018-11-03 by the reprex package (v0.2.1)

`The df$allyrs` way
library(dplyr)
#> 
#> 载入程辑包:'dplyr'
#> The following objects are masked from 'package:stats':
#> 
#>     filter, lag
#> The following objects are masked from 'package:base':
#> 
#>     intersect, setdiff, setequal, union

df <- structure(list(`1997` = c(0, 0, 1, 0, 0, 0, 0, 0, 0, 0), 
               `1998` = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), 
               `1999` = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), 
               `2000` = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), 
               allyrs = c(FALSE, FALSE, TRUE, FALSE, 
                             FALSE, TRUE, FALSE, TRUE, TRUE, FALSE)), 
          class = c("tbl_df", "tbl", "data.frame"), 
          row.names = c(NA, -10L))
df %>% 
    mutate_at(
        paste(1997:2000)
        ,function(x) ifelse(df$allyrs == T, 1, 0)
        )
#> # A tibble: 10 x 5
#>    `1997` `1998` `1999` `2000` allyrs
#>     <dbl>  <dbl>  <dbl>  <dbl> <lgl> 
#>  1      0      0      0      0 FALSE 
#>  2      0      0      0      0 FALSE 
#>  3      1      1      1      1 TRUE  
#>  4      0      0      0      0 FALSE 
#>  5      0      0      0      0 FALSE 
#>  6      1      1      1      1 TRUE  
#>  7      0      0      0      0 FALSE 
#>  8      1      1      1      1 TRUE  
#>  9      1      1      1      1 TRUE  
#> 10      0      0      0      0 FALSE

Created on 2018-11-03 by the reprex package (v0.2.1)

3 Likes

You can achieve this also with gather/spread approach:

library(dplyr)

df <- structure(list(`1997` = c(0, 0, 1, 0, 0, 0, 0, 0, 0, 0), 
                     `1998` = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), 
                     `1999` = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), 
                     `2000` = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), 
                     allyrs = c(FALSE, FALSE, TRUE, FALSE, 
                                FALSE, TRUE, FALSE, TRUE, TRUE, FALSE)), 
                class = c("tbl_df", "tbl", "data.frame"), 
                row.names = c(NA, -10L))

df %>%
  tidyr::gather(key = "year", value = "value", -allyrs) %>% 
  dplyr::group_by(year) %>%
  dplyr::mutate(row = dplyr::row_number()) %>%
  dplyr::mutate(value = ifelse(allyrs == TRUE, 1, 0)) %>% 
  tidyr::spread(key = year, value = value) %>%
  dplyr::select(-row)
#> # A tibble: 10 x 5
#>    allyrs `1997` `1998` `1999` `2000`
#>    <lgl>   <dbl>  <dbl>  <dbl>  <dbl>
#>  1 FALSE       0      0      0      0
#>  2 FALSE       0      0      0      0
#>  3 FALSE       0      0      0      0
#>  4 FALSE       0      0      0      0
#>  5 FALSE       0      0      0      0
#>  6 FALSE       0      0      0      0
#>  7 TRUE        1      1      1      1
#>  8 TRUE        1      1      1      1
#>  9 TRUE        1      1      1      1
#> 10 TRUE        1      1      1      1

Created on 2018-11-02 by the reprex package (v0.2.1)

Thanks so much! Instant fix : )

How would you refer to the unchanged value within the anonymous function? I.e. if instead of changing it to 0 you wanted to keep the same value? function(x) ifelse(.$allyears == T, 1, <?>)

My first thought was the dot . but that refers to the entire dataframe...

@shib, Yes, it returns a data frame. Thus why I don't like ifelse function because it forces all output as a vector. Try if ... else ....

I am still working on it, when I finish it, I update my answer.

@shib But ... I successfully convert 1 to .$... .

library(dplyr)
#> 
#> 载入程辑包:'dplyr'
#> The following objects are masked from 'package:stats':
#> 
#>     filter, lag
#> The following objects are masked from 'package:base':
#> 
#>     intersect, setdiff, setequal, union

df <- structure(list(`1997` = c(0, 0, 1, 0, 0, 0, 0, 0, 0, 0), 
               `1998` = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), 
               `1999` = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), 
               `2000` = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), 
               allyrs = c(FALSE, FALSE, TRUE, FALSE, 
                             FALSE, TRUE, FALSE, TRUE, TRUE, FALSE)), 
          class = c("tbl_df", "tbl", "data.frame"), 
          row.names = c(NA, -10L))
df %>% 
    mutate_at(
        paste(1997:2000)
        ,function(x) ifelse(.$allyrs == T, 1, .$allyrs+5)
        # FALSE value in numeric column is 0, I add 5 to show it.
        )
#> # A tibble: 10 x 5
#>    `1997` `1998` `1999` `2000` allyrs
#>     <dbl>  <dbl>  <dbl>  <dbl> <lgl> 
#>  1      5      5      5      5 FALSE 
#>  2      5      5      5      5 FALSE 
#>  3      1      1      1      1 TRUE  
#>  4      5      5      5      5 FALSE 
#>  5      5      5      5      5 FALSE 
#>  6      1      1      1      1 TRUE  
#>  7      5      5      5      5 FALSE 
#>  8      1      1      1      1 TRUE  
#>  9      1      1      1      1 TRUE  
#> 10      5      5      5      5 FALSE

Created on 2018-11-03 by the reprex package (v0.2.1)

I'm not entirely sure why you implemented it with .$allyrs + 5, but I think I get what you're saying. You need something like .$... where ... is a placeholder for any of 1997 - 2017, right?

You can use purrr::modify_at() and dplyr::if_else() to solve this problem:

library(tidyverse)

df <- structure(list(`1997` = c(5, 5, 5, 5, 5, 5, 5, 5, 5, 5), 
               `1998` = c(6, 6, 6, 6, 6, 6, 6, 6, 6, 6), 
               `1999` = c(7, 7, 7, 7, 7, 7, 7, 7, 7, 7), 
               `2000` = c(8, 8, 8, 8, 8, 8, 8, 8, 8, 8), 
               allyrs = c(FALSE, FALSE, TRUE, FALSE, 
                             FALSE, TRUE, FALSE, TRUE, TRUE, FALSE)), 
          class = c("tbl_df", "tbl", "data.frame"), 
          row.names = c(NA, -10L))

df %>% 
  modify_at(
    paste(1997:2000),
    ~ if_else(df$allyrs, 1, .)
  )
#> # A tibble: 10 x 5
#>    `1997` `1998` `1999` `2000` allyrs
#>     <dbl>  <dbl>  <dbl>  <dbl> <lgl> 
#>  1      5      6      7      8 FALSE 
#>  2      5      6      7      8 FALSE 
#>  3      1      1      1      1 TRUE  
#>  4      5      6      7      8 FALSE 
#>  5      5      6      7      8 FALSE 
#>  6      1      1      1      1 TRUE  
#>  7      5      6      7      8 FALSE 
#>  8      1      1      1      1 TRUE  
#>  9      1      1      1      1 TRUE  
#> 10      5      6      7      8 FALSE

Created on 2018-11-02 by the reprex package (v0.2.1)

6 Likes

Hi @jcblum,
I find your solution very elegant.
Yet, when I tried it I got this error, "Error in df$allyrs : object of type 'closure' is not subsettable"...
Any ideas why or pointer for me where to look?
Thanks in advance for any help you can give me.