Adapt values in dataframe based on row value and corresponding column name

GreyMerchant · September 7, 2020, 2:48pm

Hello,

I am looking for a simple way to solve the following problem with dplyr syntax (mutate probably?). Essentially, I want the Origin column compared to the other Destination_x columns and when Origin is 5 then the corresponding value on that row in Destination_5 should be changed to 0. The same should happen in the second example, if it sees 7 for the second row then the second value in Destination_7 should be changed to 0.

df <- data.frame(
          Origin = c(5,7,3,12,2,6,8,5,8,
                     9,12,3,12,9,4,4,2,4,4),
   Destination_1 = c(1,1,1,1,1,1,1,1,1,
                     1,1,1,1,1,1,1,1,1,1),
   Destination_2 = c(1,1,1,1,1,1,1,1,1,
                     1,1,1,1,1,1,1,1,1,1),
   Destination_3 = c(1,1,1,1,1,1,1,1,1,
                     1,1,1,1,1,1,1,1,1,1),
   Destination_4 = c(1,1,1,1,1,1,1,1,1,
                     1,1,1,1,1,1,1,1,1,1),
   Destination_5 = c(1,1,1,1,1,1,1,1,1,
                     1,1,1,1,1,1,1,1,1,1),
   Destination_6 = c(1,1,1,1,1,1,1,1,1,
                     1,1,1,1,1,1,1,1,1,1),
   Destination_7 = c(1,1,1,1,1,1,1,1,1,
                     1,1,1,1,1,1,1,1,1,1),
   Destination_8 = c(1,1,1,1,1,1,1,1,1,
                     1,1,1,1,1,1,1,1,1,1),
   Destination_9 = c(1,1,1,1,1,1,1,1,1,
                     1,1,1,1,1,1,1,1,1,1),
  Destination_10 = c(1,1,1,1,1,1,1,1,1,
                     1,1,1,1,1,1,1,1,1,1),
  Destination_11 = c(1,1,1,1,1,1,1,1,1,
                     1,1,1,1,1,1,1,1,1,1),
  Destination_12 = c(1,1,1,1,1,1,1,1,1,
                     1,1,1,1,1,1,1,1,1,1),
  Destination_13 = c(1,1,1,1,1,1,1,1,1,
                     1,1,1,1,1,1,1,1,1,1)
)

siddharthprabhu · September 7, 2020, 6:22pm

These sorts of problems are much easier to tackle if you first pivot the data into long form.

library(dplyr, warn.conflicts = FALSE)
library(tidyr)

df <- tibble(
  Origin = c(5, 7, 3, 12, 2, 6, 8, 5, 8, 9, 12, 3, 12, 9, 4, 4, 2, 4, 4),
  Destination_1 = c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1),
  Destination_2 = c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1),
  Destination_3 = c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1),
  Destination_4 = c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1),
  Destination_5 = c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1),
  Destination_6 = c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1),
  Destination_7 = c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1),
  Destination_8 = c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1),
  Destination_9 = c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1),
  Destination_10 = c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1),
  Destination_11 = c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1),
  Destination_12 = c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1),
  Destination_13 = c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1)
)

df %>%
  pivot_longer(starts_with("Destination"), names_prefix = "Destination_") %>%
  mutate(value = if_else(Origin == as.numeric(name), 0, value)) %>%
  pivot_wider(names_prefix = "Destination_", values_fn = list) %>%
  unnest(starts_with("Destination")) %>% 
  print(n = 10, width = 70)
#> # A tibble: 19 x 14
#>    Origin Destination_1 Destination_2 Destination_3 Destination_4
#>     <dbl>         <dbl>         <dbl>         <dbl>         <dbl>
#>  1      5             1             1             1             1
#>  2      5             1             1             1             1
#>  3      7             1             1             1             1
#>  4      3             1             1             0             1
#>  5      3             1             1             0             1
#>  6     12             1             1             1             1
#>  7     12             1             1             1             1
#>  8     12             1             1             1             1
#>  9      2             1             0             1             1
#> 10      2             1             0             1             1
#> # ... with 9 more rows, and 9 more variables: Destination_5 <dbl>,
#> #   Destination_6 <dbl>, Destination_7 <dbl>, Destination_8 <dbl>,
#> #   Destination_9 <dbl>, Destination_10 <dbl>, Destination_11 <dbl>,
#> #   Destination_12 <dbl>, Destination_13 <dbl>

^{Created on 2020-09-08 by the reprex package (v0.3.0)}

The tricky bit here is that the values in Origin aren't unique so pivot_wider() generates list-cols which then need to be unnested. A consequence is that the row order of the input isn't preserved but I don't think there is a way around it.

GreyMerchant · September 7, 2020, 6:41pm

Hello,

Your solution definitely works! I would like to preserve row order as some other calcs will take place based on row position afterwards. How best can you add an ID and pivot with your example code?

I was thinking of doing something as the below over an apply where x would be the column and y the Origin. I am not sure how good this would work at all?

replace_destination <- function(x,y){x = case_when(y == readr::parse_number(as.character(x)) ~ 0,
                                                          TRUE ~ x) 
}

nirgrahamuk · September 7, 2020, 6:57pm

I would place a rowid early in the flow, and then just add it as an ID column for the pivot wider.


dfl <- pivot_longer(df %>% mutate(rowid = row_number()),
                    cols = c(Destination_1:Destination_13),
                    names_sep="_",
                    names_to=c("l1","l2")) %>% mutate(
                                                      l2=as.numeric(l2),
                                                      value=case_when(Origin==l2~Origin,
                                                                      TRUE~value)) 

df_2 <- pivot_wider(dfl,
                    id_cols = c(rowid,Origin),
                    names_prefix = "Destination_",
                    names_from=l2)

siddharthprabhu · September 7, 2020, 6:59pm

If adding an ID column is an option, then I would do it like this. This approach preserves row order and eliminates the need for unnest().

library(dplyr, warn.conflicts = FALSE)
library(tidyr)

df <- tibble(
  Origin = c(5, 7, 3, 12, 2, 6, 8, 5, 8, 9, 12, 3, 12, 9, 4, 4, 2, 4, 4),
  Destination_1 = c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1),
  Destination_2 = c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1),
  Destination_3 = c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1),
  Destination_4 = c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1),
  Destination_5 = c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1),
  Destination_6 = c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1),
  Destination_7 = c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1),
  Destination_8 = c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1),
  Destination_9 = c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1),
  Destination_10 = c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1),
  Destination_11 = c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1),
  Destination_12 = c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1),
  Destination_13 = c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1)
)

df %>%
  tibble::rowid_to_column() %>% 
  pivot_longer(starts_with("Destination"), names_prefix = "Destination_") %>%
  mutate(value = if_else(Origin == as.numeric(name), 0, value)) %>%
  pivot_wider(id_cols = c(rowid, Origin), names_prefix = "Destination_") %>%
  select(-rowid) %>% 
  print(n = 10, width = 70)
#> # A tibble: 19 x 14
#>    Origin Destination_1 Destination_2 Destination_3 Destination_4
#>     <dbl>         <dbl>         <dbl>         <dbl>         <dbl>
#>  1      5             1             1             1             1
#>  2      7             1             1             1             1
#>  3      3             1             1             0             1
#>  4     12             1             1             1             1
#>  5      2             1             0             1             1
#>  6      6             1             1             1             1
#>  7      8             1             1             1             1
#>  8      5             1             1             1             1
#>  9      8             1             1             1             1
#> 10      9             1             1             1             1
#> # ... with 9 more rows, and 9 more variables: Destination_5 <dbl>,
#> #   Destination_6 <dbl>, Destination_7 <dbl>, Destination_8 <dbl>,
#> #   Destination_9 <dbl>, Destination_10 <dbl>, Destination_11 <dbl>,
#> #   Destination_12 <dbl>, Destination_13 <dbl>

^{Created on 2020-09-08 by the reprex package (v0.3.0)}

GreyMerchant · September 7, 2020, 7:34pm

Thank you! This is a great solution. I don't mind the temporary rowid to maintain the order and put it back to where it was. This works really well.

system · September 14, 2020, 7:34pm

This topic was automatically closed 7 days after the last reply. New replies are no longer allowed.

If you have a query related to it or one of the replies, start a new topic and refer back with a link.