Create multiple variables in one go

Dear all,

I have the following data and would like to create a new set of variables for each existing variables. At the moment the codes are quite long, could someone help to convert this to a function that can run the codes in shorter length as I have a lot more variables that I have to create the same sets of new variables.

Thank you!
Polly

want <- data.frame("apple_etc" = c("2-4 per week", "1 per week", "2-4 per week", "2-4 per week", "Less than 1 per month"),
                   "orange_etc" = c("2-4 per week", "1 per week", "2-4 per week", "2-4 per week", "Less than 1 per month"),
                   "banana" = c("Never", "2-4 per week", "2-4 per week", "2-4 per week", "5-6 per week"))

want <- want %>%
  mutate(apple_unit=word(apple_etc, -1),
         apple_serve_1 = as.numeric(substr(apple_etc,1,1)), 
         apple_serve_2 = as.numeric(substr(apple_etc,3,4)),
         apple_serve = ifelse(!is.na(apple_serve_1) & is.na(apple_serve_2) , apple_serve_1, 
                       ifelse(apple_unit=="Never", 0, 
                       ifelse(is.na(apple_serve_1) & is.na(apple_serve_2), 1, (apple_serve_1 + apple_serve_2)/2))),
         apple_serve_day = ifelse(apple_unit=="week", apple_serve/7,
                           ifelse(apple_unit=="month", apple_serve/30, apple_serve))) %>%
  mutate(orange_unit=word(orange_etc, -1),
         orange_serve_1 = as.numeric(substr(orange_etc,1,1)), 
         orange_serve_2 = as.numeric(substr(orange_etc,3,4)),
         orange_serve = ifelse(!is.na(orange_serve_1) & is.na(orange_serve_2), orange_serve_1, 
                       ifelse(orange_unit=="Never", 0, 
                       ifelse(is.na(orange_serve_1) & is.na(orange_serve_2), 1, (orange_serve_1 + orange_serve_2)/2))),
         orange_serve_day = ifelse(orange_unit=="week", orange_serve/7,
                           ifelse(orange_unit=="month", orange_serve/30, orange_serve))) %>%
  mutate(banana_unit=word(banana, -1),
         banana_serve_1 = as.numeric(substr(banana,1,1)), 
         banana_serve_2 = as.numeric(substr(banana,3,4)),
         banana_serve = ifelse(!is.na(banana_serve_1) & is.na(banana_serve_2), banana_serve_1, 
                        ifelse(banana_unit=="Never", 0, 
                        ifelse(is.na(banana_serve_1) & is.na(banana_serve_2), 1, (banana_serve_1 + banana_serve_2)/2))),
         banana_serve_day = ifelse(banana_unit=="week", banana_serve/7,
                           ifelse(banana_unit=="month", banana_serve/30, banana_serve)))

I think something along these lines:

want0 <- data.frame(
  "apple_etc" = c("2-4 per week", "1 per week", "2-4 per week", "2-4 per week", "Less than 1 per month"),
  "orange_etc" = c("2-4 per week", "1 per week", "2-4 per week", "2-4 per week", "Less than 1 per month"),
  "banana" = c("Never", "2-4 per week", "2-4 per week", "2-4 per week", "5-6 per week")
)
library(tidyverse)
library(rlang)
asnumsub <- function(a, b, c) {
  as.numeric(substr(a, b, c))
}

srv <- function(serve_1, serve_2, unit) {
  ifelse(!is.na(serve_1) & is.na(serve_2), serve_1,
    ifelse(unit == "Never", 0,
      ifelse(is.na(serve_1) & is.na(serve_2), 1, (serve_1 + serve_2) / 2)
    )
  )
}

sday <- function(unit, serve) {
  ifelse(unit == "week", serve / 7,
    ifelse(unit == "month", serve / 30, serve)
  )
}

mutator <- function(df, invar) {
  w <- word(as_name(enquo(invar)), sep = "_")

  df %>% mutate("{w}_serve_day" := sday(word({{ invar }}, -1), srv(
    asnumsub({{ invar }}, 1, 1),
    asnumsub({{ invar }}, 3, 4), word({{ invar }})
  )))
}
want <- want0 %>%
  mutator(apple_etc) %>%
  mutator(orange_etc) %>%
  mutator(banana)

You could convert your data to long format, which would give you more flexibility on how to summarize the data and would reduce the amount of code needed. Here's a start:

library(tidyverse)

want %>% 
  pivot_longer(cols=everything()) %>% 
  mutate(name=str_extract(name, "[a-z]+"),
         amount=case_when(grepl("Never", value) ~ "0", 
                          grepl("Less than [0-9]", value) ~ str_extract(value, "[0-9]+"),
                          TRUE ~ str_extract(value, "[0-9-]+")),
         period = case_when(grepl("per week", value) ~ 7,
                            grepl("per month", value) ~ 30,
                            TRUE ~ NA_real_)) %>% 
  separate(amount, into=c("min", "max"), sep="-") %>% 
  mutate(across(c(min, max), as.numeric)) %>% 
  mutate(serve.per.period = (min + ifelse(is.na(max), min, max))/2,
         serve.per.day = serve.per.period/period) %>%
  arrange(name)
#>    name   value                  min   max period serve.per.period serve.per.day
#>    <chr>  <chr>                <dbl> <dbl>  <dbl>            <dbl>         <dbl>
#>  1 apple  2-4 per week             2     4      7              3          0.429 
#>  2 apple  1 per week               1    NA      7              1          0.143 
#>  3 apple  2-4 per week             2     4      7              3          0.429 
#>  4 apple  2-4 per week             2     4      7              3          0.429 
#>  5 apple  Less than 1 per mon…     1    NA     30              1          0.0333
#>  6 banana Never                    0    NA     NA              0         NA     
#>  7 banana 2-4 per week             2     4      7              3          0.429 
#>  8 banana 2-4 per week             2     4      7              3          0.429 
#>  9 banana 2-4 per week             2     4      7              3          0.429 
#> 10 banana 5-6 per week             5     6      7              5.5        0.786 
#> 11 orange 2-4 per week             2     4      7              3          0.429 
#> 12 orange 1 per week               1    NA      7              1          0.143 
#> 13 orange 2-4 per week             2     4      7              3          0.429 
#> 14 orange 2-4 per week             2     4      7              3          0.429 
#> 15 orange Less than 1 per mon…     1    NA     30              1          0.0333
1 Like

This topic was automatically closed 7 days after the last reply. New replies are no longer allowed.

If you have a query related to it or one of the replies, start a new topic and refer back with a link.

Dear nirgrahamuk,

Thank you for your reply. The codes work perfectly except for the last step. I wonder if there is a way to omit the specific variable names after the mutator or select all the variables I want. The data I presented here only has three variables, but the actual dataset I have has a lot more variables. In this final step, if I have to write a line for each variable, that seems quite lengthy as well. Thank you!

want <- want0 %>%
mutator(apple_etc) %>%
mutator(orange_etc) %>%
mutator(banana) %>%
mutator(peach_etc)
there are a lot more variables to be spelled out.

Polly