generate multiple variables based on other variables with dplyr

Hi there!

I would like to know how to generate multiple variables in one line of code instead of repeating almos the same code for every variable.

Let's say I have this dataset:

pid <- c(rep(1:5, each=5))
year <- c(rep(2015:2019, 5))
group <- (c(rep(1:5, 5)))

df <- data.frame(pid, year, group)

I want to generate these variables:

df1 <- df%>%
   mutate(group1_2015 = case_when(group==1 & year==2015 ~1, TRUE~0))%>%
    mutate(group1_2016 = case_when(group==1 & year==2016 ~1, TRUE~0))%>%
    mutate(group1_2017 = case_when(group==1 & year==2017 ~1, TRUE~0))%>%
    mutate(group1_2018 = case_when(group==1 & year==2018 ~1, TRUE~0))%>%
    mutate(group1_2019 = case_when(group==1 & year==2019 ~1, TRUE~0))%>%    
    mutate(group2_2015 = case_when(group==2 & year==2015 ~1, TRUE~0))%>%
    mutate(group2_2016 = case_when(group==2 & year==2016 ~1, TRUE~0))%>%
    mutate(group2_2017 = case_when(group==2 & year==2017 ~1, TRUE~0))%>%
    mutate(group2_2018 = case_when(group==2 & year==2018 ~1, TRUE~0))%>%
    mutate(group2_2019 = case_when(group==2 & year==2019 ~1, TRUE~0))%>%
    mutate(group3_2015 = case_when(group==3 & year==2015 ~1, TRUE~0))%>%
    mutate(group3_2016 = case_when(group==3 & year==2016 ~1, TRUE~0))%>%
    mutate(group3_2017 = case_when(group==3 & year==2017 ~1, TRUE~0))%>%
    mutate(group3_2018 = case_when(group==3 & year==2018 ~1, TRUE~0))%>%
    mutate(group3_2019 = case_when(group==3 & year==2019 ~1, TRUE~0))%>%
    mutate(group4_2015 = case_when(group==4 & year==2015 ~1, TRUE~0))%>%
    mutate(group4_2016 = case_when(group==4 & year==2016 ~1, TRUE~0))%>%
    mutate(group4_2017 = case_when(group==4 & year==2017 ~1, TRUE~0))%>%
    mutate(group4_2018 = case_when(group==4 & year==2018 ~1, TRUE~0))%>%
    mutate(group4_2019 = case_when(group==4 & year==2019 ~1, TRUE~0))%>%
    mutate(group5_2015 = case_when(group==5 & year==2015 ~1, TRUE~0))%>%
    mutate(group5_2016 = case_when(group==5 & year==2016 ~1, TRUE~0))%>%
    mutate(group5_2017 = case_when(group==5 & year==2017 ~1, TRUE~0))%>%
    mutate(group5_2018 = case_when(group==5 & year==2018 ~1, TRUE~0))%>%
    mutate(group5_2019 = case_when(group==5 & year==2019 ~1, TRUE~0))

I want to obtain a code within the dplyr package that allows me to generate these variables automatically. I would also like to obtain a code that could allow me to choose with how many groups or years I want to generate the variables (in case I do not want all the variables I have generated above)

Thank you!

here is one way it could be done

library(tidyverse)
library(rlang)
library(glue)

df <- data.frame(
  pid = rep(1:5, each = 5),
  year = rep(2015:2019, each = 5),
  group = rep(1:5, 5)
)

adder <- function(df_,
                  groups,
                  year) {
  if (missing(groups)) {
    stop("Must set groups")
  }
  if (missing(year)) {
    stop("Must set year")
  }

  to_make <- expand_grid(groups, year) |> mutate(
    name = glue("group{groups}_{year}"),
    expr = glue("case_when(group=={groups} & year=={year} ~1, TRUE~0)")
  )
  
  parsed_mutators <- rlang::parse_exprs(paste0(to_make$expr, collapse = ";"))

  names(parsed_mutators) <- to_make$name

  mutate(
    df_,
    !!!parsed_mutators
  )
}


adder(df,
  groups = 1:5,
  year = 2015:2019
)

Thank you very much @nirgrahamuk !! I would also appreciate it if you could explian a bit the intuition of this code

it programmatically creates your case_when instructions as text and then evaluates them as code

This topic was automatically closed 21 days after the last reply. New replies are no longer allowed.

If you have a query related to it or one of the replies, start a new topic and refer back with a link.