pivot_longer instead of split dataframe and bind_rows

I have the following messy data:

df<- tribble(
  ~food,           ~group,     ~group_p,     ~group2,          ~group2_p,  ~group3,      ~group3_p,
  "banana",       "fruits",      1,           NA,               NA,         NA,           NA,   
  "rice",         "grains",      1,           NA,               NA,         NA,           NA,   
  "mixed_dish1",  "nuts",        0.5,         "vegetables",     0.25,       "fruits",     0.25,
  "mixed_dish2",  "vegetables",  0.3,         "fruits",         0.4,        "grains",     0.3, 
  "tomato",       "vegetables",  1,           NA,               NA,          NA,          NA,   
  "apple",        "fruits",      1,           NA,               NA,          NA,          NA,   
  "mixed_dish3",  "nuts",        0.9,         "fruits",         0.1,         NA,           NA 
)

and I would like to convert it to this long format:

df_long <- tribble(
  ~food,           ~group,     ~group_p,  
  "banana",       "fruits",      1,              
  "rice",         "grains",      1,              
  "mixed_dish1",  "nuts",        0.5,               
  "mixed_dish1",   "vegetables", 0.25,
  "mixed_dish1",  "fruits",      0.25,
  "mixed_dish2",  "vegetables",  0.3,                 
  "mixed_dish2",   "fruits",     0.4,
  "mixed_dish2",   "grains",     0.3, 
  "tomato",       "vegetables",  1,              
  "apple",        "fruits",      1,              
  "mixed_dish3",  "nuts",        0.9,    
  "mixed_dish3",  "fruits",      0.1
)

By splitting the dataframe and binding the rows, I was able to achieve my goal:

df_part1 <- df %>% 
  select(food,group,group_p)

df_part2 <- df %>% 
select(food,group2,group2_p) %>% 
  rename("group"=group2,
         "group_p"=group2_p) %>% 
  filter(!is.na(group))

df_part3 <- df %>% 
select(food,group3,group3_p) %>% 
  rename(group=group3,group_p=group3_p)%>% 
  filter(!is.na(group))

df_binded <- bind_rows(df_part1,df_part2,df_part3) %>% 
arrange(food)

I wonder if there might be a simpler way to do this by pivot_longer, But didn't managed to find it on my own.

I could not find a way to do this without renaming some of the columns.

library(tibble)
library(tidyr)
DF <- df<- tribble(
  ~food,           ~group,     ~group_p,     ~group2,          ~group2_p,  ~group3,      ~group3_p,
  "banana",       "fruits",      1,           NA,               NA,         NA,           NA,   
  "rice",         "grains",      1,           NA,               NA,         NA,           NA,   
  "mixed_dish1",  "nuts",        0.5,         "vegetables",     0.25,       "fruits",     0.25,
  "mixed_dish2",  "vegetables",  0.3,         "fruits",         0.4,        "grains",     0.3, 
  "tomato",       "vegetables",  1,           NA,               NA,          NA,          NA,   
  "apple",        "fruits",      1,           NA,               NA,          NA,          NA,   
  "mixed_dish3",  "nuts",        0.9,         "fruits",         0.1,         NA,           NA 
)
colnames(DF)[seq(2,6,2)] <- paste(colnames(DF)[seq(2,6,2)], "g", sep = "_")
DF
#> # A tibble: 7 x 7
#>   food        group_g    group_p group2_g   group2_p group3_g group3_p
#>   <chr>       <chr>        <dbl> <chr>         <dbl> <chr>       <dbl>
#> 1 banana      fruits         1   <NA>          NA    <NA>        NA   
#> 2 rice        grains         1   <NA>          NA    <NA>        NA   
#> 3 mixed_dish1 nuts           0.5 vegetables     0.25 fruits       0.25
#> 4 mixed_dish2 vegetables     0.3 fruits         0.4  grains       0.3 
#> 5 tomato      vegetables     1   <NA>          NA    <NA>        NA   
#> 6 apple       fruits         1   <NA>          NA    <NA>        NA   
#> 7 mixed_dish3 nuts           0.9 fruits         0.1  <NA>        NA
DF %>% pivot_longer(cols = -food, names_to =c(".value"), names_prefix = "group\\d?_", 
                    values_drop_na=TRUE)
#> # A tibble: 12 x 3
#>    food        g              p
#>    <chr>       <chr>      <dbl>
#>  1 banana      fruits      1   
#>  2 rice        grains      1   
#>  3 mixed_dish1 nuts        0.5 
#>  4 mixed_dish1 vegetables  0.25
#>  5 mixed_dish1 fruits      0.25
#>  6 mixed_dish2 vegetables  0.3 
#>  7 mixed_dish2 fruits      0.4 
#>  8 mixed_dish2 grains      0.3 
#>  9 tomato      vegetables  1   
#> 10 apple       fruits      1   
#> 11 mixed_dish3 nuts        0.9 
#> 12 mixed_dish3 fruits      0.1

Created on 2021-09-23 by the reprex package (v0.3.0)

2 Likes

This is probably more inefficient!

df_longer = df %>%
  pivot_longer(
    cols = matches("group[0-9]?_"),
    names_to = "p",
    values_to = "p_val",
    values_drop_na = TRUE
  ) %>%
  pivot_longer(
    cols = matches("group[0-9]?"),
    names_to = "g",
    values_to = "g_val",
    values_drop_na = TRUE
  ) %>%
  mutate(p_num = str_extract(p, "[0-9]+"),
         g_num = str_extract(g, "[0-9]+")) %>%
  rowwise() %>%
  filter(identical(p_num, g_num)) %>%
  ungroup() %>%
  transmute(food, group = g_val, group_p = p_val)
1 Like

This topic was automatically closed 7 days after the last reply. New replies are no longer allowed.

If you have a query related to it or one of the replies, start a new topic and refer back with a link.