Create multiple variables in one go

Dear all,

I have the following data and would like to create a new set of variables for each existing variables. At the moment the codes are quite long, could someone help to convert this to a function that can run the codes in shorter length as I have a lot more variables that I have to create the same sets of new variables.

Thank you!
Polly

want <- data.frame("apple_etc" = c("2-4 per week", "1 per week", "2-4 per week", "2-4 per week", "Less than 1 per month"),
                   "orange_etc" = c("2-4 per week", "1 per week", "2-4 per week", "2-4 per week", "Less than 1 per month"),
                   "banana" = c("Never", "2-4 per week", "2-4 per week", "2-4 per week", "5-6 per week"))

want <- want %>%
  mutate(apple_unit=word(apple_etc, -1),
         apple_serve_1 = as.numeric(substr(apple_etc,1,1)), 
         apple_serve_2 = as.numeric(substr(apple_etc,3,4)),
         apple_serve = ifelse(!is.na(apple_serve_1) & is.na(apple_serve_2) , apple_serve_1, 
                       ifelse(apple_unit=="Never", 0, 
                       ifelse(is.na(apple_serve_1) & is.na(apple_serve_2), 1, (apple_serve_1 + apple_serve_2)/2))),
         apple_serve_day = ifelse(apple_unit=="week", apple_serve/7,
                           ifelse(apple_unit=="month", apple_serve/30, apple_serve))) %>%
  mutate(orange_unit=word(orange_etc, -1),
         orange_serve_1 = as.numeric(substr(orange_etc,1,1)), 
         orange_serve_2 = as.numeric(substr(orange_etc,3,4)),
         orange_serve = ifelse(!is.na(orange_serve_1) & is.na(orange_serve_2), orange_serve_1, 
                       ifelse(orange_unit=="Never", 0, 
                       ifelse(is.na(orange_serve_1) & is.na(orange_serve_2), 1, (orange_serve_1 + orange_serve_2)/2))),
         orange_serve_day = ifelse(orange_unit=="week", orange_serve/7,
                           ifelse(orange_unit=="month", orange_serve/30, orange_serve))) %>%
  mutate(banana_unit=word(banana, -1),
         banana_serve_1 = as.numeric(substr(banana,1,1)), 
         banana_serve_2 = as.numeric(substr(banana,3,4)),
         banana_serve = ifelse(!is.na(banana_serve_1) & is.na(banana_serve_2), banana_serve_1, 
                        ifelse(banana_unit=="Never", 0, 
                        ifelse(is.na(banana_serve_1) & is.na(banana_serve_2), 1, (banana_serve_1 + banana_serve_2)/2))),
         banana_serve_day = ifelse(banana_unit=="week", banana_serve/7,
                           ifelse(banana_unit=="month", banana_serve/30, banana_serve)))

I think something along these lines:

want0 <- data.frame(
  "apple_etc" = c("2-4 per week", "1 per week", "2-4 per week", "2-4 per week", "Less than 1 per month"),
  "orange_etc" = c("2-4 per week", "1 per week", "2-4 per week", "2-4 per week", "Less than 1 per month"),
  "banana" = c("Never", "2-4 per week", "2-4 per week", "2-4 per week", "5-6 per week")
)
library(tidyverse)
library(rlang)
asnumsub <- function(a, b, c) {
  as.numeric(substr(a, b, c))
}

srv <- function(serve_1, serve_2, unit) {
  ifelse(!is.na(serve_1) & is.na(serve_2), serve_1,
    ifelse(unit == "Never", 0,
      ifelse(is.na(serve_1) & is.na(serve_2), 1, (serve_1 + serve_2) / 2)
    )
  )
}

sday <- function(unit, serve) {
  ifelse(unit == "week", serve / 7,
    ifelse(unit == "month", serve / 30, serve)
  )
}

mutator <- function(df, invar) {
  w <- word(as_name(enquo(invar)), sep = "_")

  df %>% mutate("{w}_serve_day" := sday(word({{ invar }}, -1), srv(
    asnumsub({{ invar }}, 1, 1),
    asnumsub({{ invar }}, 3, 4), word({{ invar }})
  )))
}
want <- want0 %>%
  mutator(apple_etc) %>%
  mutator(orange_etc) %>%
  mutator(banana)

Dear nirgrahamuk,

Thank you for your reply. The codes work perfectly except for the last step. I wonder if there is a way to omit the specific variable names after the mutator or select all the variables I want. The data I presented here only has three variables, but the actual dataset I have has a lot more variables. In this final step, if I have to write a line for each variable, that seems quite lengthy as well. Thank you!

want <- want0 %>%
mutator(apple_etc) %>%
mutator(orange_etc) %>%
mutator(banana) %>%
mutator(peach_etc)
there are a lot more variables to be spelled out.

Polly

You could convert your data to long format, which would give you more flexibility on how to summarize the data and would reduce the amount of code needed. Here's a start:

library(tidyverse)

want %>% 
  pivot_longer(cols=everything()) %>% 
  mutate(name=str_extract(name, "[a-z]+"),
         amount=case_when(grepl("Never", value) ~ "0", 
                          grepl("Less than [0-9]", value) ~ str_extract(value, "[0-9]+"),
                          TRUE ~ str_extract(value, "[0-9-]+")),
         period = case_when(grepl("per week", value) ~ 7,
                            grepl("per month", value) ~ 30,
                            TRUE ~ NA_real_)) %>% 
  separate(amount, into=c("min", "max"), sep="-") %>% 
  mutate(across(c(min, max), as.numeric)) %>% 
  mutate(serve.per.period = (min + ifelse(is.na(max), min, max))/2,
         serve.per.day = serve.per.period/period) %>%
  arrange(name)
#>    name   value                  min   max period serve.per.period serve.per.day
#>    <chr>  <chr>                <dbl> <dbl>  <dbl>            <dbl>         <dbl>
#>  1 apple  2-4 per week             2     4      7              3          0.429 
#>  2 apple  1 per week               1    NA      7              1          0.143 
#>  3 apple  2-4 per week             2     4      7              3          0.429 
#>  4 apple  2-4 per week             2     4      7              3          0.429 
#>  5 apple  Less than 1 per mon…     1    NA     30              1          0.0333
#>  6 banana Never                    0    NA     NA              0         NA     
#>  7 banana 2-4 per week             2     4      7              3          0.429 
#>  8 banana 2-4 per week             2     4      7              3          0.429 
#>  9 banana 2-4 per week             2     4      7              3          0.429 
#> 10 banana 5-6 per week             5     6      7              5.5        0.786 
#> 11 orange 2-4 per week             2     4      7              3          0.429 
#> 12 orange 1 per week               1    NA      7              1          0.143 
#> 13 orange 2-4 per week             2     4      7              3          0.429 
#> 14 orange 2-4 per week             2     4      7              3          0.429 
#> 15 orange Less than 1 per mon…     1    NA     30              1          0.0333
1 Like

This topic was automatically closed 7 days after the last reply. New replies are no longer allowed.

If you have a query related to it or one of the replies, start a new topic and refer back with a link.