dplyr recode variables if a condition met

Hi,
Maybe the question is really basic but I don't know why my full code does not work.
I have this simple data file:

data.frame(stringsAsFactors=FALSE,
                       URN = c("aaa", "bbb", "ccc", "ddd", "eee", "fff", "ggg", "hhh", "iii"),
                        E1 = c(1, 2, 3, NA, NA, NA, NA, NA, NA),
                     S1 = c("ddd", "s", NA, "fr", "rf", "f", "aa", "v", "bg"),
                        A2 = c(1, 2, 3, 4, 5, 6, 7, 8, 9),
                        D1 = c(-1, 10, 6, -1, 8, 9, 7, -1, 99)
              )

Now, I want to multiply all numeric variables including any value between 5 and 10 by 10 (I want to ignore variables like E1 and S1 in my example). I am using this code but I don't know how to modify that to exclude numerical variables without values 5-10:

step1 <- source %>% mutate_at(vars(matches("[A-Z]\\d")), ~.x * 10)

Then I want to create 5 new variables. I use this code:

step2 <- step1 %>% mutate(
  Dealer.Rec.Score = case_when(
    A2 >= 10 & A2 <= 80 ~ 0,
    A2 >= 90 & A2 <= 100 ~ 100),
  A1.16 = case_when(
    A2 >= 10 & A2 <= 60 ~ "1-6",
    A2 >= 70 & A2 <= 80 ~ "7-8",
    A2 >= 90 & A2 <= 100 ~ "9-10"),
  D.16 = case_when(
    D1 >= 10 & D1 <= 60 ~ "1-6",
    D1 >= 70 & D1 <= 80 ~ "7-8",
    D1 >= 90 & D1 <= 100 ~ "9-10"),
  TDO = case_when(
    D1 == 990 ~ "No",
    D1 == -10 ~ "Not Required",
    D1 >= 10 & D1 <= 100 ~ "Yes"),
  TDY = case_when(
    D1 == 990 ~ 0,
    D1 >= 10 & D1 <= 100 ~ 100))

Maybe there is a way of running everything in one step (it did not work for me so I've decided to use two steps).

I am also hoping there is much clever way of recoding all variables including values from 1 to 10 to their "16" version instead of using this multiplied code for each variable I want to recode:

var.16 = case_when(
    var1 >= 10 & var1 <= 60 ~ "1-6",
    var1 >= 70 & var1 <= 80 ~ "7-8",
    var1 >= 90 & D1 <= var ~ "9-10"),

Any thoughts?

You can do something like this

library(dplyr)

df <- data.frame(stringsAsFactors=FALSE,
                 URN = c("aaa", "bbb", "ccc", "ddd", "eee", "fff", "ggg", "hhh", "iii"),
                 E1 = c(1, 2, 3, NA, NA, NA, NA, NA, NA),
                 S1 = c("ddd", "s", NA, "fr", "rf", "f", "aa", "v", "bg"),
                 A2 = c(1, 2, 3, 4, 5, 6, 7, 8, 9),
                 D1 = c(-1, 10, 6, -1, 8, 9, 7, -1, 99)
)

df %>% 
    mutate_if(~is.numeric(.) && any(. > 5, na.rm = TRUE),
              list(`16`= ~ case_when( . >= 1 & . <= 6 ~ "1-6",
                                      . >= 7 & . <= 8 ~ "7-8",
                                      . >= 9 & . <= 10 ~ "9-10")))
#>   URN E1   S1 A2 D1 A2_16 D1_16
#> 1 aaa  1  ddd  1 -1   1-6  <NA>
#> 2 bbb  2    s  2 10   1-6  9-10
#> 3 ccc  3 <NA>  3  6   1-6   1-6
#> 4 ddd NA   fr  4 -1   1-6  <NA>
#> 5 eee NA   rf  5  8   1-6   7-8
#> 6 fff NA    f  6  9   1-6  9-10
#> 7 ggg NA   aa  7  7   7-8   7-8
#> 8 hhh NA    v  8 -1   7-8  <NA>
#> 9 iii NA   bg  9 99  9-10  <NA>
2 Likes

Very clever, thank you.
Can I now modify that to add step 1 and other two conversions (TDO and TDY) in one code? I have some issues with that :frowning:

From your example it seems those new variables depend on specific columns (not dynamic) so you can simply use the code you were using before, if that is not the case then you would have to better explain the logic behind them.

Ok, thank you. Final question then.
I have some issues with merging step1 and step 2...

both.steps <- source 
%>% mutate_at(vars(matches("[A-Z]\\d")), ~.x * 10)
%>% mutate_if(~is.numeric(.) && any(. > 50, na.rm = TRUE),
            list(`16`= ~ case_when( . >= 10 & . <= 60 ~ "1-6",
                                    . >= 70 & . <= 80 ~ "7-8",
                                    . >= 90 & . <= 100 ~ "9-10")))
%>% mutate(
    Dealer.Rec.Score = case_when(
      A2 >= 10 & A2 <= 80 ~ 0,
      A2 >= 90 & A2 <= 100 ~ 100),
    TDO = case_when(
      D1 == 990 ~ "No",
      D1 == -10 ~ "Not Required",
      D1 >= 10 & D1 <= 100 ~ "Yes"),
    TDY = case_when(
      D1 == 990 ~ 0,
      D1 >= 10 & D1 <= 100 ~ 100))

...as the above is not working :frowning:

Can you please turn this into a self-contained reprex? It's harder to ensure that we're faithfully reproducing your problem if we have to copy, paste, and add names for code from different code chunks.

Also, n.b. that source() is also the name of a function in base R, which makes things just a tad more difficult to suss out.

This also matches non numeric variables so you are going to get an error. Have you noticed that you don't actually need the "times 10" step? you can just do something like this

df %>%
    mutate_if(~is.numeric(.) && any(. > 5, na.rm = TRUE),
              list(`16`= ~ case_when( . >= 1 & . <= 6 ~ "1-6",
                                      . >= 7 & . <= 8 ~ "7-8",
                                      . >= 9 & . <= 10 ~ "9-10"))) %>%
    mutate( Dealer.Rec.Score = case_when(
        A2 >= 1 & A2 <= 8 ~ 0,
        A2 >= 9 & A2 <= 10 ~ 100),
        TDO = case_when(
            D1 == 99 ~ "No",
            D1 == -1 ~ "Not Required",
            D1 >= 1 & D1 <= 10 ~ "Yes"),
        TDY = case_when(
            D1 == 99 ~ 0,
            D1 >= 1 & D1 <= 10 ~ 100))

Thank you very much!
The thing is that I want to keep original variables as well (but without 99 and -1) so perhaps I could do something like that?

source %>%
  mutate_if(~is.numeric(.) && any(. > 5, na.rm = TRUE),
            list(`16`= ~ case_when( . >= 1 & . <= 6 ~ "1-6",
                                    . >= 7 & . <= 8 ~ "7-8",
                                    . >= 9 & . <= 10 ~ "9-10")),
            list(`10`= ~ case_when( . == 1 ~ 10,
                                    . == 2 ~ 20,
                                    . == 3 ~ 30,
                                    . == 4 ~ 40,
                                    . == 5 ~ 50,
                                    . == 6 ~ 60,
                                    . == 7 ~ 70,
                                    . == 8 ~ 80,
                                    . == 9 ~ 90,
                                    . == 10 ~ 100))) %>%
  mutate( Dealer.Rec.Score = case_when(
    A2 >= 1 & A2 <= 8 ~ 0,
    A2 >= 9 & A2 <= 10 ~ 100),
    TDO = case_when(
      D1 == 99 ~ "No",
      D1 == -1 ~ "Not Required",
      D1 >= 1 & D1 <= 10 ~ "Yes"),
    TDY = case_when(
      D1 == 99 ~ 0,
      D1 >= 1 & D1 <= 10 ~ 100))

Unfortunately, it contains an error as 10 variables have not been created....

You are just using the wrong syntax, this should work

df %>%
    mutate_if(~is.numeric(.) && any(. > 5, na.rm = TRUE),
              list(`16`= ~ case_when( . >= 1 & . <= 6 ~ "1-6",
                                      . >= 7 & . <= 8 ~ "7-8",
                                      . >= 9 & . <= 10 ~ "9-10"),
                   `10`= ~ case_when( . == 1 ~ 10,
                                      . == 2 ~ 20,
                                      . == 3 ~ 30,
                                      . == 4 ~ 40,
                                      . == 5 ~ 50,
                                      . == 6 ~ 60,
                                      . == 7 ~ 70,
                                      . == 8 ~ 80,
                                      . == 9 ~ 90,
                                      . == 10 ~ 100))) %>%
    mutate( Dealer.Rec.Score = case_when(
        A2 >= 1 & A2 <= 8 ~ 0,
        A2 >= 9 & A2 <= 10 ~ 100),
        TDO = case_when(
            D1 == 99 ~ "No",
            D1 == -1 ~ "Not Required",
            D1 >= 1 & D1 <= 10 ~ "Yes"),
        TDY = case_when(
            D1 == 99 ~ 0,
            D1 >= 1 & D1 <= 10 ~ 100))

As a side comment, I think you should check the function's documentation before jumping into making follow up questions

1 Like

Sorry, It's not about documentation but more about making typing errors (brackets etc.).
Thank you so much!!!

This topic was automatically closed 7 days after the last reply. New replies are no longer allowed.